Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

release updates #8378

Merged
merged 33 commits into from
Feb 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
37ac5a3
[tutorial] fixed missing RIR scripts file. (#8257)
XuesongYang Jan 29, 2024
7b2415a
add values to en tts dict (#7879)
mgrafu Jan 30, 2024
5b2ffb6
mcore ds fix
Jan 31, 2024
37284d3
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jan 31, 2024
8c99a7f
Merge branch 'r1.23.0' into dpykhtar/mcore_ds_fix
dimapihtar Jan 31, 2024
6732410
Merge branch 'r1.23.0' into dpykhtar/mcore_ds_fix
dimapihtar Jan 31, 2024
12bc3cc
update mcore
dimapihtar Jan 31, 2024
35e1024
revert asr files
dimapihtar Jan 31, 2024
bec85bb
add comments
dimapihtar Jan 31, 2024
29ff2bd
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jan 31, 2024
c84afa1
Merge branch 'r1.23.0' into dpykhtar/mcore_ds_fix
dimapihtar Feb 1, 2024
0edd229
add support for mcore mock dataset
dimapihtar Feb 2, 2024
4098e53
update mcore version
dimapihtar Feb 2, 2024
a4630bf
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Feb 2, 2024
1c3f327
update gpt cfg
dimapihtar Feb 2, 2024
565565b
Merge branch 'r1.23.0' into dpykhtar/mcore_ds_fix
dimapihtar Feb 2, 2024
e1ae18a
Merge branch 'r1.23.0' into dpykhtar/mcore_ds_fix
dimapihtar Feb 5, 2024
a502a57
Merge branch 'r1.23.0' into dpykhtar/mcore_ds_fix
dimapihtar Feb 6, 2024
ccaceb4
update mcore commit
dimapihtar Feb 6, 2024
95159a0
fix Bert unit tests
dimapihtar Feb 7, 2024
842b77b
update bert tests
dimapihtar Feb 7, 2024
ea5443e
Merge branch 'r1.23.0' into dpykhtar/mcore_ds_fix
pablo-garay Feb 7, 2024
ed69105
fix bert mcore test
dimapihtar Feb 8, 2024
0db10f6
Merge branch 'r1.23.0' into dpykhtar/mcore_ds_fix
dimapihtar Feb 8, 2024
9de1ff8
fix gpt jenkins tests
dimapihtar Feb 8, 2024
9fa950e
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Feb 8, 2024
423b053
add support for dict data input type
dimapihtar Feb 8, 2024
be99fa7
Merge branch 'dpykhtar/mcore_ds_fix' into dpykhtar/release_updates
dimapihtar Feb 8, 2024
9622eff
add mock ds test
dimapihtar Feb 8, 2024
ff09c20
add test for dict data input type
dimapihtar Feb 8, 2024
69c572e
Merge branch 'r1.23.0' into dpykhtar/release_updates
dimapihtar Feb 9, 2024
9eabd27
mcore ds fix
dimapihtar Feb 9, 2024
4125716
data input fix
dimapihtar Feb 9, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 28 additions & 28 deletions Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -3595,7 +3595,7 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"'''
model.activations_checkpoint_method='block' \
model.activations_checkpoint_granularity='full' \
model.activations_checkpoint_num_layers=1 \
model.data.data_prefix=[.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document,.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document] \
model.data.data_prefix='{train:[1.0,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document],validation:[/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document],test:[/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document]}' \
model.data.index_mapping_dir=examples/nlp/language_modeling/gpt_index_mappings"
// commented out to save time on github ci @adithyare
//sh "python examples/nlp/language_modeling/megatron_gpt_pretraining.py \
Expand Down Expand Up @@ -5097,34 +5097,34 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"'''
}
}
failFast true
//parallel {
//stage('MockGPTDataset') {
// steps {
// sh "python examples/nlp/language_modeling/megatron_gpt_pretraining.py \
// trainer.max_steps=10 \
// trainer.limit_val_batches=7 \
// trainer.val_check_interval=10 \
// exp_manager.exp_dir=examples/nlp/language_modeling/gpt_pretrain_results \
// model.data.data_impl=mock \
// model.data.data_prefix=[] \
// "
// sh "rm -rf examples/nlp/language_modeling/gpt_pretrain_results"
// }
//}
//stage('MockT5Dataset') {
steps {
sh "python examples/nlp/language_modeling/megatron_t5_pretraining.py \
trainer.max_steps=10 \
trainer.limit_val_batches=3 \
trainer.val_check_interval=10 \
exp_manager.exp_dir=examples/nlp/language_modeling/t5_pretrain_results \
model.data.data_impl=mock \
model.data.data_prefix=[] \
"
sh "rm -rf examples/nlp/language_modeling/t5_pretrain_results"
parallel {
stage('MockGPTDataset') {
steps {
sh "python examples/nlp/language_modeling/megatron_gpt_pretraining.py \
trainer.max_steps=10 \
trainer.limit_val_batches=7 \
trainer.val_check_interval=10 \
exp_manager.exp_dir=examples/nlp/language_modeling/gpt_pretrain_results \
model.data.data_impl=mock \
model.data.data_prefix=[] \
"
sh "rm -rf examples/nlp/language_modeling/gpt_pretrain_results"
}
}
stage('MockT5Dataset') {
steps {
sh "python examples/nlp/language_modeling/megatron_t5_pretraining.py \
trainer.max_steps=10 \
trainer.limit_val_batches=3 \
trainer.val_check_interval=10 \
exp_manager.exp_dir=examples/nlp/language_modeling/t5_pretrain_results \
model.data.data_impl=mock \
model.data.data_prefix=[] \
"
sh "rm -rf examples/nlp/language_modeling/t5_pretrain_results"
}
}
}
//}
//}
}

stage('L2: TTS Fast dev runs 1') {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1205,13 +1205,11 @@ def build_train_valid_test_datasets(self):
1
] = 1 # This is to make sure we only have one epoch on every validation iteration

mock_dataset = self.cfg.data.get("mock_dataset", False)
mock_dataset = True if self.cfg.data.get("data_impl", "mmap") == "mock" else False
kwargs = {
"is_built_on_rank": is_dataset_built_on_rank,
"random_seed": self.cfg.seed,
"sequence_length": self.cfg.data.seq_length,
"blend": self.cfg.data.data_prefix,
"split": self.cfg.data.splits_string,
"path_to_cache": self.cfg.data.index_mapping_dir,
"tokenizer": self.tokenizer,
"reset_position_ids": self.reset_position_ids,
Expand All @@ -1220,6 +1218,14 @@ def build_train_valid_test_datasets(self):
"mock": mock_dataset,
}

# support for dict data input type
if isinstance(self.cfg.data.data_prefix, DictConfig):
_pref = self.cfg.data.data_prefix
kwargs['blend_per_split'] = [_pref['train'], _pref['validation'], _pref['test']]
else:
kwargs['blend'] = self.cfg.data.data_prefix
kwargs["split"] = self.cfg.data.splits_string

if self.cfg.data.get('add_fim', False):
dataset_config = GPTFIMDatasetConfig(self.tokenizer, self.cfg.data.fim, **kwargs)

Expand Down
Loading