diff --git a/examples/advanced/bionemo/downstream/downstream_nvflare.ipynb b/examples/advanced/bionemo/downstream/downstream_nvflare.ipynb index 2d2f72fc22..efd6109bcc 100644 --- a/examples/advanced/bionemo/downstream/downstream_nvflare.ipynb +++ b/examples/advanced/bionemo/downstream/downstream_nvflare.ipynb @@ -75,7 +75,7 @@ "source": [ "### Download Model Checkpoints\n", "\n", - "In order to download pretrained models from the NGC registry, **please ensure that you have installed and configured the NGC CLI**, check the [Quickstart Guide](https://docs.nvidia.com/bionemo-framework/latest/quickstart-fw.html) for more info. The following code will download the pretrained model `esm2nv_650M_converted.nemo` from the NGC registry." + "In order to download pretrained models from the NGC registry, **please ensure that you have installed and configured the NGC CLI**, check the [Quickstart Guide](https://docs.nvidia.com/bionemo-framework/latest) for more info. The following code will download the pretrained model `esm2nv_650M_converted.nemo` from the NGC registry." ] }, { diff --git a/examples/advanced/bionemo/downstream/sabdab/jobs/central_sabdab_esm1nv/app/custom/base_config.yaml b/examples/advanced/bionemo/downstream/sabdab/jobs/central_sabdab_esm1nv/app/custom/base_config.yaml index b88f272d15..5c1d4686ad 100644 --- a/examples/advanced/bionemo/downstream/sabdab/jobs/central_sabdab_esm1nv/app/custom/base_config.yaml +++ b/examples/advanced/bionemo/downstream/sabdab/jobs/central_sabdab_esm1nv/app/custom/base_config.yaml @@ -100,7 +100,7 @@ model: ngc_registry_target: uniref50_2022_05 ngc_registry_version: v23.06 data_prefix: "" # must be null or "" - num_workers: 8 + num_workers: 2 dataloader_type: single # cyclic reset_position_ids: False # Reset position ids after end-of-document token reset_attention_mask: False # Reset attention mask after end-of-document token @@ -154,7 +154,7 @@ model: batch_size: 128 num_epochs: 10 shuffle: True - num_workers: 8 + num_workers: 2 task_name: secondary_structure dataset_path: /data/FLIP/${model.dwnstr_task_validation.dataset.task_name} dataset: diff --git a/examples/advanced/bionemo/downstream/sabdab/jobs/central_sabdab_esm1nv/app/custom/downstream_flip_sabdab.yaml b/examples/advanced/bionemo/downstream/sabdab/jobs/central_sabdab_esm1nv/app/custom/downstream_flip_sabdab.yaml index fc3e3d12e0..fd9507cf80 100644 --- a/examples/advanced/bionemo/downstream/sabdab/jobs/central_sabdab_esm1nv/app/custom/downstream_flip_sabdab.yaml +++ b/examples/advanced/bionemo/downstream/sabdab/jobs/central_sabdab_esm1nv/app/custom/downstream_flip_sabdab.yaml @@ -13,7 +13,7 @@ encoder_frozen: False trainer: devices: 1 # number of GPUs or CPUs num_nodes: 1 - max_epochs: 200 + max_epochs: 20 val_check_interval: 0.0 limit_val_batches: 0.0 # number of batches in validation step, use fraction for fraction of data, 0 to disable limit_test_batches: 0.0 # number of batches in test step, use fraction for fraction of data, 0 to disable @@ -54,7 +54,7 @@ model: target_column: ["Y"] #["3state"np.sum(test_df['Y']==0), "resolved"] # names of label columns in csv file target_sizes: [2] # number of classes in each label for classifications or 1 for regression num_classes: 2 - num_workers: 8 + num_workers: 2 shuffle: True # shuffle training dataset max_seq_length: ${model.seq_length} emb_batch_size: ${model.micro_batch_size} diff --git a/examples/advanced/bionemo/downstream/sabdab/jobs/central_sabdab_esm1nv/app/custom/pretrain_small.yaml b/examples/advanced/bionemo/downstream/sabdab/jobs/central_sabdab_esm1nv/app/custom/pretrain_small.yaml index 46b45021f0..4599d92bcc 100644 --- a/examples/advanced/bionemo/downstream/sabdab/jobs/central_sabdab_esm1nv/app/custom/pretrain_small.yaml +++ b/examples/advanced/bionemo/downstream/sabdab/jobs/central_sabdab_esm1nv/app/custom/pretrain_small.yaml @@ -15,7 +15,7 @@ model: test: x[000..049] val: x[000..049] micro_batch_size: ${model.micro_batch_size} - num_workers: 10 + num_workers: 2 # Supported kwargs (with default values): # text_mmap (newline_int=10, header_lines=0, workers=None, sort_dataset_paths=True) diff --git a/examples/advanced/bionemo/downstream/sabdab/jobs/fedavg_sabdab_esm1nv/app/config/config_fed_server.conf b/examples/advanced/bionemo/downstream/sabdab/jobs/fedavg_sabdab_esm1nv/app/config/config_fed_server.conf index 6c020f494f..f13692a589 100644 --- a/examples/advanced/bionemo/downstream/sabdab/jobs/fedavg_sabdab_esm1nv/app/config/config_fed_server.conf +++ b/examples/advanced/bionemo/downstream/sabdab/jobs/fedavg_sabdab_esm1nv/app/config/config_fed_server.conf @@ -35,7 +35,7 @@ min_clients = 6 # number of global round of the training. - num_rounds = 50 + num_rounds = 20 # starting round is 0-based start_round = 0 diff --git a/examples/advanced/bionemo/downstream/sabdab/jobs/fedavg_sabdab_esm1nv/app/custom/base_config.yaml b/examples/advanced/bionemo/downstream/sabdab/jobs/fedavg_sabdab_esm1nv/app/custom/base_config.yaml index b88f272d15..5c1d4686ad 100644 --- a/examples/advanced/bionemo/downstream/sabdab/jobs/fedavg_sabdab_esm1nv/app/custom/base_config.yaml +++ b/examples/advanced/bionemo/downstream/sabdab/jobs/fedavg_sabdab_esm1nv/app/custom/base_config.yaml @@ -100,7 +100,7 @@ model: ngc_registry_target: uniref50_2022_05 ngc_registry_version: v23.06 data_prefix: "" # must be null or "" - num_workers: 8 + num_workers: 2 dataloader_type: single # cyclic reset_position_ids: False # Reset position ids after end-of-document token reset_attention_mask: False # Reset attention mask after end-of-document token @@ -154,7 +154,7 @@ model: batch_size: 128 num_epochs: 10 shuffle: True - num_workers: 8 + num_workers: 2 task_name: secondary_structure dataset_path: /data/FLIP/${model.dwnstr_task_validation.dataset.task_name} dataset: diff --git a/examples/advanced/bionemo/downstream/sabdab/jobs/fedavg_sabdab_esm1nv/app/custom/downstream_flip_sabdab.yaml b/examples/advanced/bionemo/downstream/sabdab/jobs/fedavg_sabdab_esm1nv/app/custom/downstream_flip_sabdab.yaml index 6382d12ce5..6488ad5205 100644 --- a/examples/advanced/bionemo/downstream/sabdab/jobs/fedavg_sabdab_esm1nv/app/custom/downstream_flip_sabdab.yaml +++ b/examples/advanced/bionemo/downstream/sabdab/jobs/fedavg_sabdab_esm1nv/app/custom/downstream_flip_sabdab.yaml @@ -54,7 +54,7 @@ model: target_column: ["Y"] #["3state"np.sum(test_df['Y']==0), "resolved"] # names of label columns in csv file target_sizes: [2] # number of classes in each label for classifications or 1 for regression num_classes: 2 - num_workers: 8 + num_workers: 2 shuffle: True # shuffle training dataset max_seq_length: ${model.seq_length} emb_batch_size: ${model.micro_batch_size} diff --git a/examples/advanced/bionemo/downstream/sabdab/jobs/fedavg_sabdab_esm1nv/app/custom/pretrain_small.yaml b/examples/advanced/bionemo/downstream/sabdab/jobs/fedavg_sabdab_esm1nv/app/custom/pretrain_small.yaml index 46b45021f0..4599d92bcc 100644 --- a/examples/advanced/bionemo/downstream/sabdab/jobs/fedavg_sabdab_esm1nv/app/custom/pretrain_small.yaml +++ b/examples/advanced/bionemo/downstream/sabdab/jobs/fedavg_sabdab_esm1nv/app/custom/pretrain_small.yaml @@ -15,7 +15,7 @@ model: test: x[000..049] val: x[000..049] micro_batch_size: ${model.micro_batch_size} - num_workers: 10 + num_workers: 2 # Supported kwargs (with default values): # text_mmap (newline_int=10, header_lines=0, workers=None, sort_dataset_paths=True) diff --git a/examples/advanced/bionemo/downstream/sabdab/jobs/local_sabdab_esm1nv/app/custom/base_config.yaml b/examples/advanced/bionemo/downstream/sabdab/jobs/local_sabdab_esm1nv/app/custom/base_config.yaml index b88f272d15..5c1d4686ad 100644 --- a/examples/advanced/bionemo/downstream/sabdab/jobs/local_sabdab_esm1nv/app/custom/base_config.yaml +++ b/examples/advanced/bionemo/downstream/sabdab/jobs/local_sabdab_esm1nv/app/custom/base_config.yaml @@ -100,7 +100,7 @@ model: ngc_registry_target: uniref50_2022_05 ngc_registry_version: v23.06 data_prefix: "" # must be null or "" - num_workers: 8 + num_workers: 2 dataloader_type: single # cyclic reset_position_ids: False # Reset position ids after end-of-document token reset_attention_mask: False # Reset attention mask after end-of-document token @@ -154,7 +154,7 @@ model: batch_size: 128 num_epochs: 10 shuffle: True - num_workers: 8 + num_workers: 2 task_name: secondary_structure dataset_path: /data/FLIP/${model.dwnstr_task_validation.dataset.task_name} dataset: diff --git a/examples/advanced/bionemo/downstream/sabdab/jobs/local_sabdab_esm1nv/app/custom/downstream_flip_sabdab.yaml b/examples/advanced/bionemo/downstream/sabdab/jobs/local_sabdab_esm1nv/app/custom/downstream_flip_sabdab.yaml index 752eed84eb..b21438510e 100644 --- a/examples/advanced/bionemo/downstream/sabdab/jobs/local_sabdab_esm1nv/app/custom/downstream_flip_sabdab.yaml +++ b/examples/advanced/bionemo/downstream/sabdab/jobs/local_sabdab_esm1nv/app/custom/downstream_flip_sabdab.yaml @@ -13,7 +13,7 @@ encoder_frozen: False trainer: devices: 1 # number of GPUs or CPUs num_nodes: 1 - max_epochs: 200 + max_epochs: 20 val_check_interval: 0.0 limit_val_batches: 0.0 # number of batches in validation step, use fraction for fraction of data, 0 to disable limit_test_batches: 0.0 # number of batches in test step, use fraction for fraction of data, 0 to disable @@ -54,7 +54,7 @@ model: target_column: ["Y"] #["3state"np.sum(test_df['Y']==0), "resolved"] # names of label columns in csv file target_sizes: [2] # number of classes in each label for classifications or 1 for regression num_classes: 2 - num_workers: 8 + num_workers: 2 shuffle: True # shuffle training dataset max_seq_length: ${model.seq_length} emb_batch_size: ${model.micro_batch_size} diff --git a/examples/advanced/bionemo/downstream/sabdab/jobs/local_sabdab_esm1nv/app/custom/pretrain_small.yaml b/examples/advanced/bionemo/downstream/sabdab/jobs/local_sabdab_esm1nv/app/custom/pretrain_small.yaml index 46b45021f0..4599d92bcc 100644 --- a/examples/advanced/bionemo/downstream/sabdab/jobs/local_sabdab_esm1nv/app/custom/pretrain_small.yaml +++ b/examples/advanced/bionemo/downstream/sabdab/jobs/local_sabdab_esm1nv/app/custom/pretrain_small.yaml @@ -15,7 +15,7 @@ model: test: x[000..049] val: x[000..049] micro_batch_size: ${model.micro_batch_size} - num_workers: 10 + num_workers: 2 # Supported kwargs (with default values): # text_mmap (newline_int=10, header_lines=0, workers=None, sort_dataset_paths=True) diff --git a/examples/advanced/bionemo/downstream/sabdab/run_sim_sabdab.py b/examples/advanced/bionemo/downstream/sabdab/run_sim_sabdab.py index 0c9a14a669..1c48035c36 100644 --- a/examples/advanced/bionemo/downstream/sabdab/run_sim_sabdab.py +++ b/examples/advanced/bionemo/downstream/sabdab/run_sim_sabdab.py @@ -14,15 +14,14 @@ from nvflare import SimulatorRunner -n_clients = 6 - # Choose from one of the available jobs job_name = "central_sabdab_esm1nv" -# job_name = "local_sabdab_esm1nv" -# job_name = "fedavg_sabdab_esm1nv" +n_clients = 1 +# job_name = "local_sabdab_esm1nv"; n_clients = 6 +# job_name = "fedavg_sabdab_esm1nv"; n_clients = 6 simulator = SimulatorRunner( - job_folder=f"jobs/{job_name}", workspace=f"/tmp/nvflare/results/{job_name}", n_clients=n_clients, threads=1 + job_folder=f"jobs/{job_name}", workspace=f"/tmp/nvflare/results/{job_name}", n_clients=n_clients, threads=n_clients ) run_status = simulator.run() print("Simulator finished with run_status", run_status) diff --git a/examples/advanced/bionemo/downstream/scl/jobs/fedavg_scl_finetune_esm2nv/app1/custom/base_config.yaml b/examples/advanced/bionemo/downstream/scl/jobs/fedavg_scl_finetune_esm2nv/app1/custom/base_config.yaml index 98873f04c5..ae3fd5c67e 100644 --- a/examples/advanced/bionemo/downstream/scl/jobs/fedavg_scl_finetune_esm2nv/app1/custom/base_config.yaml +++ b/examples/advanced/bionemo/downstream/scl/jobs/fedavg_scl_finetune_esm2nv/app1/custom/base_config.yaml @@ -140,7 +140,7 @@ model: ngc_registry_target: uniref50_2022_05 ngc_registry_version: v23.06 data_prefix: "" # must be null or "" - num_workers: 8 + num_workers: 2 dataloader_type: single # cyclic reset_position_ids: False # Reset position ids after end-of-document token reset_attention_mask: False # Reset attention mask after end-of-document token @@ -239,7 +239,7 @@ model: batch_size: 128 num_epochs: 10 shuffle: True - num_workers: 8 + num_workers: 2 task_name: secondary_structure dataset_path: ${oc.env:BIONEMO_HOME}/data/FLIP/${model.dwnstr_task_validation.dataset.task_name} dataset: diff --git a/examples/advanced/bionemo/downstream/scl/jobs/fedavg_scl_finetune_esm2nv/app1/custom/downstream_flip_scl.yaml b/examples/advanced/bionemo/downstream/scl/jobs/fedavg_scl_finetune_esm2nv/app1/custom/downstream_flip_scl.yaml index 039924eb03..35ad8ba402 100644 --- a/examples/advanced/bionemo/downstream/scl/jobs/fedavg_scl_finetune_esm2nv/app1/custom/downstream_flip_scl.yaml +++ b/examples/advanced/bionemo/downstream/scl/jobs/fedavg_scl_finetune_esm2nv/app1/custom/downstream_flip_scl.yaml @@ -47,7 +47,7 @@ model: target_column: ["TARGET"] #["3state", "resolved"] # names of label columns in csv file target_sizes: [10] # number of classes in each label for classifications or 1 for regression num_classes: 10 - num_workers: 8 + num_workers: 2 shuffle: True # shuffle training dataset max_seq_length: ${model.seq_length} emb_batch_size: ${model.micro_batch_size} diff --git a/examples/advanced/bionemo/downstream/scl/jobs/fedavg_scl_finetune_esm2nv/app2/custom/base_config.yaml b/examples/advanced/bionemo/downstream/scl/jobs/fedavg_scl_finetune_esm2nv/app2/custom/base_config.yaml index 98873f04c5..ae3fd5c67e 100644 --- a/examples/advanced/bionemo/downstream/scl/jobs/fedavg_scl_finetune_esm2nv/app2/custom/base_config.yaml +++ b/examples/advanced/bionemo/downstream/scl/jobs/fedavg_scl_finetune_esm2nv/app2/custom/base_config.yaml @@ -140,7 +140,7 @@ model: ngc_registry_target: uniref50_2022_05 ngc_registry_version: v23.06 data_prefix: "" # must be null or "" - num_workers: 8 + num_workers: 2 dataloader_type: single # cyclic reset_position_ids: False # Reset position ids after end-of-document token reset_attention_mask: False # Reset attention mask after end-of-document token @@ -239,7 +239,7 @@ model: batch_size: 128 num_epochs: 10 shuffle: True - num_workers: 8 + num_workers: 2 task_name: secondary_structure dataset_path: ${oc.env:BIONEMO_HOME}/data/FLIP/${model.dwnstr_task_validation.dataset.task_name} dataset: diff --git a/examples/advanced/bionemo/downstream/scl/jobs/fedavg_scl_finetune_esm2nv/app2/custom/downstream_flip_scl.yaml b/examples/advanced/bionemo/downstream/scl/jobs/fedavg_scl_finetune_esm2nv/app2/custom/downstream_flip_scl.yaml index 0dd9c89af1..bb1371eb49 100644 --- a/examples/advanced/bionemo/downstream/scl/jobs/fedavg_scl_finetune_esm2nv/app2/custom/downstream_flip_scl.yaml +++ b/examples/advanced/bionemo/downstream/scl/jobs/fedavg_scl_finetune_esm2nv/app2/custom/downstream_flip_scl.yaml @@ -47,7 +47,7 @@ model: target_column: ["TARGET"] #["3state", "resolved"] # names of label columns in csv file target_sizes: [10] # number of classes in each label for classifications or 1 for regression num_classes: 10 - num_workers: 8 + num_workers: 2 shuffle: True # shuffle training dataset max_seq_length: ${model.seq_length} emb_batch_size: ${model.micro_batch_size} diff --git a/examples/advanced/bionemo/downstream/scl/jobs/fedavg_scl_finetune_esm2nv/app3/custom/base_config.yaml b/examples/advanced/bionemo/downstream/scl/jobs/fedavg_scl_finetune_esm2nv/app3/custom/base_config.yaml index 98873f04c5..ae3fd5c67e 100644 --- a/examples/advanced/bionemo/downstream/scl/jobs/fedavg_scl_finetune_esm2nv/app3/custom/base_config.yaml +++ b/examples/advanced/bionemo/downstream/scl/jobs/fedavg_scl_finetune_esm2nv/app3/custom/base_config.yaml @@ -140,7 +140,7 @@ model: ngc_registry_target: uniref50_2022_05 ngc_registry_version: v23.06 data_prefix: "" # must be null or "" - num_workers: 8 + num_workers: 2 dataloader_type: single # cyclic reset_position_ids: False # Reset position ids after end-of-document token reset_attention_mask: False # Reset attention mask after end-of-document token @@ -239,7 +239,7 @@ model: batch_size: 128 num_epochs: 10 shuffle: True - num_workers: 8 + num_workers: 2 task_name: secondary_structure dataset_path: ${oc.env:BIONEMO_HOME}/data/FLIP/${model.dwnstr_task_validation.dataset.task_name} dataset: diff --git a/examples/advanced/bionemo/downstream/scl/jobs/fedavg_scl_finetune_esm2nv/app3/custom/downstream_flip_scl.yaml b/examples/advanced/bionemo/downstream/scl/jobs/fedavg_scl_finetune_esm2nv/app3/custom/downstream_flip_scl.yaml index 8c46d683cd..c4dcc7625f 100644 --- a/examples/advanced/bionemo/downstream/scl/jobs/fedavg_scl_finetune_esm2nv/app3/custom/downstream_flip_scl.yaml +++ b/examples/advanced/bionemo/downstream/scl/jobs/fedavg_scl_finetune_esm2nv/app3/custom/downstream_flip_scl.yaml @@ -47,7 +47,7 @@ model: target_column: ["TARGET"] #["3state", "resolved"] # names of label columns in csv file target_sizes: [10] # number of classes in each label for classifications or 1 for regression num_classes: 10 - num_workers: 8 + num_workers: 2 shuffle: True # shuffle training dataset max_seq_length: ${model.seq_length} emb_batch_size: ${model.micro_batch_size} diff --git a/examples/advanced/bionemo/downstream/scl/jobs/local_scl_finetune_esm2nv/app1/custom/base_config.yaml b/examples/advanced/bionemo/downstream/scl/jobs/local_scl_finetune_esm2nv/app1/custom/base_config.yaml index 98873f04c5..ae3fd5c67e 100644 --- a/examples/advanced/bionemo/downstream/scl/jobs/local_scl_finetune_esm2nv/app1/custom/base_config.yaml +++ b/examples/advanced/bionemo/downstream/scl/jobs/local_scl_finetune_esm2nv/app1/custom/base_config.yaml @@ -140,7 +140,7 @@ model: ngc_registry_target: uniref50_2022_05 ngc_registry_version: v23.06 data_prefix: "" # must be null or "" - num_workers: 8 + num_workers: 2 dataloader_type: single # cyclic reset_position_ids: False # Reset position ids after end-of-document token reset_attention_mask: False # Reset attention mask after end-of-document token @@ -239,7 +239,7 @@ model: batch_size: 128 num_epochs: 10 shuffle: True - num_workers: 8 + num_workers: 2 task_name: secondary_structure dataset_path: ${oc.env:BIONEMO_HOME}/data/FLIP/${model.dwnstr_task_validation.dataset.task_name} dataset: diff --git a/examples/advanced/bionemo/downstream/scl/jobs/local_scl_finetune_esm2nv/app1/custom/downstream_flip_scl.yaml b/examples/advanced/bionemo/downstream/scl/jobs/local_scl_finetune_esm2nv/app1/custom/downstream_flip_scl.yaml index a29d83e33c..eb15d16e72 100644 --- a/examples/advanced/bionemo/downstream/scl/jobs/local_scl_finetune_esm2nv/app1/custom/downstream_flip_scl.yaml +++ b/examples/advanced/bionemo/downstream/scl/jobs/local_scl_finetune_esm2nv/app1/custom/downstream_flip_scl.yaml @@ -47,7 +47,7 @@ model: target_column: ["TARGET"] #["3state", "resolved"] # names of label columns in csv file target_sizes: [10] # number of classes in each label for classifications or 1 for regression num_classes: 10 - num_workers: 8 + num_workers: 2 shuffle: True # shuffle training dataset max_seq_length: ${model.seq_length} emb_batch_size: ${model.micro_batch_size} diff --git a/examples/advanced/bionemo/downstream/scl/jobs/local_scl_finetune_esm2nv/app2/custom/base_config.yaml b/examples/advanced/bionemo/downstream/scl/jobs/local_scl_finetune_esm2nv/app2/custom/base_config.yaml index 98873f04c5..ae3fd5c67e 100644 --- a/examples/advanced/bionemo/downstream/scl/jobs/local_scl_finetune_esm2nv/app2/custom/base_config.yaml +++ b/examples/advanced/bionemo/downstream/scl/jobs/local_scl_finetune_esm2nv/app2/custom/base_config.yaml @@ -140,7 +140,7 @@ model: ngc_registry_target: uniref50_2022_05 ngc_registry_version: v23.06 data_prefix: "" # must be null or "" - num_workers: 8 + num_workers: 2 dataloader_type: single # cyclic reset_position_ids: False # Reset position ids after end-of-document token reset_attention_mask: False # Reset attention mask after end-of-document token @@ -239,7 +239,7 @@ model: batch_size: 128 num_epochs: 10 shuffle: True - num_workers: 8 + num_workers: 2 task_name: secondary_structure dataset_path: ${oc.env:BIONEMO_HOME}/data/FLIP/${model.dwnstr_task_validation.dataset.task_name} dataset: diff --git a/examples/advanced/bionemo/downstream/scl/jobs/local_scl_finetune_esm2nv/app2/custom/downstream_flip_scl.yaml b/examples/advanced/bionemo/downstream/scl/jobs/local_scl_finetune_esm2nv/app2/custom/downstream_flip_scl.yaml index 3f06c7eb18..7b6836a4c3 100644 --- a/examples/advanced/bionemo/downstream/scl/jobs/local_scl_finetune_esm2nv/app2/custom/downstream_flip_scl.yaml +++ b/examples/advanced/bionemo/downstream/scl/jobs/local_scl_finetune_esm2nv/app2/custom/downstream_flip_scl.yaml @@ -47,7 +47,7 @@ model: target_column: ["TARGET"] #["3state", "resolved"] # names of label columns in csv file target_sizes: [10] # number of classes in each label for classifications or 1 for regression num_classes: 10 - num_workers: 8 + num_workers: 2 shuffle: True # shuffle training dataset max_seq_length: ${model.seq_length} emb_batch_size: ${model.micro_batch_size} diff --git a/examples/advanced/bionemo/downstream/scl/jobs/local_scl_finetune_esm2nv/app3/custom/base_config.yaml b/examples/advanced/bionemo/downstream/scl/jobs/local_scl_finetune_esm2nv/app3/custom/base_config.yaml index 98873f04c5..ae3fd5c67e 100644 --- a/examples/advanced/bionemo/downstream/scl/jobs/local_scl_finetune_esm2nv/app3/custom/base_config.yaml +++ b/examples/advanced/bionemo/downstream/scl/jobs/local_scl_finetune_esm2nv/app3/custom/base_config.yaml @@ -140,7 +140,7 @@ model: ngc_registry_target: uniref50_2022_05 ngc_registry_version: v23.06 data_prefix: "" # must be null or "" - num_workers: 8 + num_workers: 2 dataloader_type: single # cyclic reset_position_ids: False # Reset position ids after end-of-document token reset_attention_mask: False # Reset attention mask after end-of-document token @@ -239,7 +239,7 @@ model: batch_size: 128 num_epochs: 10 shuffle: True - num_workers: 8 + num_workers: 2 task_name: secondary_structure dataset_path: ${oc.env:BIONEMO_HOME}/data/FLIP/${model.dwnstr_task_validation.dataset.task_name} dataset: diff --git a/examples/advanced/bionemo/downstream/scl/jobs/local_scl_finetune_esm2nv/app3/custom/downstream_flip_scl.yaml b/examples/advanced/bionemo/downstream/scl/jobs/local_scl_finetune_esm2nv/app3/custom/downstream_flip_scl.yaml index 1102374859..6ef437e83b 100644 --- a/examples/advanced/bionemo/downstream/scl/jobs/local_scl_finetune_esm2nv/app3/custom/downstream_flip_scl.yaml +++ b/examples/advanced/bionemo/downstream/scl/jobs/local_scl_finetune_esm2nv/app3/custom/downstream_flip_scl.yaml @@ -47,7 +47,7 @@ model: target_column: ["TARGET"] #["3state", "resolved"] # names of label columns in csv file target_sizes: [10] # number of classes in each label for classifications or 1 for regression num_classes: 10 - num_workers: 8 + num_workers: 2 shuffle: True # shuffle training dataset max_seq_length: ${model.seq_length} emb_batch_size: ${model.micro_batch_size} diff --git a/examples/advanced/bionemo/downstream/scl/run_sim_scl.py b/examples/advanced/bionemo/downstream/scl/run_sim_scl.py index 1e8e577322..b421c565be 100644 --- a/examples/advanced/bionemo/downstream/scl/run_sim_scl.py +++ b/examples/advanced/bionemo/downstream/scl/run_sim_scl.py @@ -21,7 +21,7 @@ # job_name = "fedavg_scl_finetune_esm2nv" simulator = SimulatorRunner( - job_folder=f"jobs/{job_name}", workspace=f"/tmp/nvflare/results/{job_name}", n_clients=n_clients, threads=1 + job_folder=f"jobs/{job_name}", workspace=f"/tmp/nvflare/results/{job_name}", n_clients=n_clients, threads=n_clients ) run_status = simulator.run() print("Simulator finished with run_status", run_status) diff --git a/examples/advanced/bionemo/downstream/tap/jobs/central_tap_esm1nv/app1/custom/base_config.yaml b/examples/advanced/bionemo/downstream/tap/jobs/central_tap_esm1nv/app1/custom/base_config.yaml index b88f272d15..5c1d4686ad 100644 --- a/examples/advanced/bionemo/downstream/tap/jobs/central_tap_esm1nv/app1/custom/base_config.yaml +++ b/examples/advanced/bionemo/downstream/tap/jobs/central_tap_esm1nv/app1/custom/base_config.yaml @@ -100,7 +100,7 @@ model: ngc_registry_target: uniref50_2022_05 ngc_registry_version: v23.06 data_prefix: "" # must be null or "" - num_workers: 8 + num_workers: 2 dataloader_type: single # cyclic reset_position_ids: False # Reset position ids after end-of-document token reset_attention_mask: False # Reset attention mask after end-of-document token @@ -154,7 +154,7 @@ model: batch_size: 128 num_epochs: 10 shuffle: True - num_workers: 8 + num_workers: 2 task_name: secondary_structure dataset_path: /data/FLIP/${model.dwnstr_task_validation.dataset.task_name} dataset: diff --git a/examples/advanced/bionemo/downstream/tap/jobs/central_tap_esm1nv/app1/custom/downstream_flip_tap.yaml b/examples/advanced/bionemo/downstream/tap/jobs/central_tap_esm1nv/app1/custom/downstream_flip_tap.yaml index d6163a2fa8..7030bd3130 100644 --- a/examples/advanced/bionemo/downstream/tap/jobs/central_tap_esm1nv/app1/custom/downstream_flip_tap.yaml +++ b/examples/advanced/bionemo/downstream/tap/jobs/central_tap_esm1nv/app1/custom/downstream_flip_tap.yaml @@ -13,7 +13,7 @@ encoder_frozen: False trainer: devices: 1 # number of GPUs or CPUs num_nodes: 1 - max_epochs: 200 + max_epochs: 20 val_check_interval: 1 limit_val_batches: 1000 # number of batches in validation step, use fraction for fraction of data, 0 to disable limit_test_batches: 1000 # number of batches in test step, use fraction for fraction of data, 0 to disable @@ -53,7 +53,7 @@ model: sequence_column: "Antibody" # name of column with protein sequence in csv file target_column: ["PSH"] #["3state", "resolved"] # names of label columns in csv file target_sizes: [1] # number of classes in each label for classifications or 1 for regression - num_workers: 8 + num_workers: 2 shuffle: True # shuffle training dataset max_seq_length: ${model.seq_length} emb_batch_size: ${model.micro_batch_size} diff --git a/examples/advanced/bionemo/downstream/tap/jobs/central_tap_esm1nv/app1/custom/pretrain_small.yaml b/examples/advanced/bionemo/downstream/tap/jobs/central_tap_esm1nv/app1/custom/pretrain_small.yaml index d03110f0e8..59528f7ff5 100644 --- a/examples/advanced/bionemo/downstream/tap/jobs/central_tap_esm1nv/app1/custom/pretrain_small.yaml +++ b/examples/advanced/bionemo/downstream/tap/jobs/central_tap_esm1nv/app1/custom/pretrain_small.yaml @@ -15,7 +15,7 @@ model: test: x[000..049] val: x[000..049] micro_batch_size: ${model.micro_batch_size} - num_workers: 10 + num_workers: 2 # Supported kwargs (with default values): # text_mmap (newline_int=10, header_lines=0, workers=None, sort_dataset_paths=True) diff --git a/examples/advanced/bionemo/downstream/tap/jobs/central_tap_esm1nv/app2/custom/base_config.yaml b/examples/advanced/bionemo/downstream/tap/jobs/central_tap_esm1nv/app2/custom/base_config.yaml index d09d54534e..757e2d81ae 100644 --- a/examples/advanced/bionemo/downstream/tap/jobs/central_tap_esm1nv/app2/custom/base_config.yaml +++ b/examples/advanced/bionemo/downstream/tap/jobs/central_tap_esm1nv/app2/custom/base_config.yaml @@ -100,7 +100,7 @@ model: ngc_registry_target: uniref50_2022_05 ngc_registry_version: v23.06 data_prefix: "" # must be null or "" - num_workers: 8 + num_workers: 2 dataloader_type: single # cyclic reset_position_ids: False # Reset position ids after end-of-document token reset_attention_mask: False # Reset attention mask after end-of-document token @@ -154,7 +154,7 @@ model: batch_size: 128 num_epochs: 10 shuffle: True - num_workers: 8 + num_workers: 2 task_name: secondary_structure dataset_path: /data/FLIP/${model.dwnstr_task_validation.dataset.task_name} dataset: diff --git a/examples/advanced/bionemo/downstream/tap/jobs/central_tap_esm1nv/app2/custom/downstream_flip_tap.yaml b/examples/advanced/bionemo/downstream/tap/jobs/central_tap_esm1nv/app2/custom/downstream_flip_tap.yaml index f19075b60e..7dbf71bed3 100644 --- a/examples/advanced/bionemo/downstream/tap/jobs/central_tap_esm1nv/app2/custom/downstream_flip_tap.yaml +++ b/examples/advanced/bionemo/downstream/tap/jobs/central_tap_esm1nv/app2/custom/downstream_flip_tap.yaml @@ -13,7 +13,7 @@ encoder_frozen: False trainer: devices: 1 # number of GPUs or CPUs num_nodes: 1 - max_epochs: 200 + max_epochs: 20 val_check_interval: 1 limit_val_batches: 1000 # number of batches in validation step, use fraction for fraction of data, 0 to disable limit_test_batches: 1000 # number of batches in test step, use fraction for fraction of data, 0 to disable @@ -53,7 +53,7 @@ model: sequence_column: "Antibody" # name of column with protein sequence in csv file target_column: ["PPC"] #["3state", "resolved"] # names of label columns in csv file target_sizes: [1] # number of classes in each label for classifications or 1 for regression - num_workers: 8 + num_workers: 2 shuffle: True # shuffle training dataset max_seq_length: ${model.seq_length} emb_batch_size: ${model.micro_batch_size} diff --git a/examples/advanced/bionemo/downstream/tap/jobs/central_tap_esm1nv/app2/custom/pretrain_small.yaml b/examples/advanced/bionemo/downstream/tap/jobs/central_tap_esm1nv/app2/custom/pretrain_small.yaml index d03110f0e8..59528f7ff5 100644 --- a/examples/advanced/bionemo/downstream/tap/jobs/central_tap_esm1nv/app2/custom/pretrain_small.yaml +++ b/examples/advanced/bionemo/downstream/tap/jobs/central_tap_esm1nv/app2/custom/pretrain_small.yaml @@ -15,7 +15,7 @@ model: test: x[000..049] val: x[000..049] micro_batch_size: ${model.micro_batch_size} - num_workers: 10 + num_workers: 2 # Supported kwargs (with default values): # text_mmap (newline_int=10, header_lines=0, workers=None, sort_dataset_paths=True) diff --git a/examples/advanced/bionemo/downstream/tap/jobs/central_tap_esm1nv/app3/custom/base_config.yaml b/examples/advanced/bionemo/downstream/tap/jobs/central_tap_esm1nv/app3/custom/base_config.yaml index b88f272d15..5c1d4686ad 100644 --- a/examples/advanced/bionemo/downstream/tap/jobs/central_tap_esm1nv/app3/custom/base_config.yaml +++ b/examples/advanced/bionemo/downstream/tap/jobs/central_tap_esm1nv/app3/custom/base_config.yaml @@ -100,7 +100,7 @@ model: ngc_registry_target: uniref50_2022_05 ngc_registry_version: v23.06 data_prefix: "" # must be null or "" - num_workers: 8 + num_workers: 2 dataloader_type: single # cyclic reset_position_ids: False # Reset position ids after end-of-document token reset_attention_mask: False # Reset attention mask after end-of-document token @@ -154,7 +154,7 @@ model: batch_size: 128 num_epochs: 10 shuffle: True - num_workers: 8 + num_workers: 2 task_name: secondary_structure dataset_path: /data/FLIP/${model.dwnstr_task_validation.dataset.task_name} dataset: diff --git a/examples/advanced/bionemo/downstream/tap/jobs/central_tap_esm1nv/app3/custom/downstream_flip_tap.yaml b/examples/advanced/bionemo/downstream/tap/jobs/central_tap_esm1nv/app3/custom/downstream_flip_tap.yaml index 9a8534fd91..bcc6cbed00 100644 --- a/examples/advanced/bionemo/downstream/tap/jobs/central_tap_esm1nv/app3/custom/downstream_flip_tap.yaml +++ b/examples/advanced/bionemo/downstream/tap/jobs/central_tap_esm1nv/app3/custom/downstream_flip_tap.yaml @@ -13,7 +13,7 @@ encoder_frozen: False trainer: devices: 1 # number of GPUs or CPUs num_nodes: 1 - max_epochs: 200 + max_epochs: 20 val_check_interval: 1 limit_val_batches: 1000 # number of batches in validation step, use fraction for fraction of data, 0 to disable limit_test_batches: 1000 # number of batches in test step, use fraction for fraction of data, 0 to disable @@ -53,7 +53,7 @@ model: sequence_column: "Antibody" # name of column with protein sequence in csv file target_column: ["PNC"] #["3state", "resolved"] # names of label columns in csv file target_sizes: [1] # number of classes in each label for classifications or 1 for regression - num_workers: 8 + num_workers: 2 shuffle: True # shuffle training dataset max_seq_length: ${model.seq_length} emb_batch_size: ${model.micro_batch_size} diff --git a/examples/advanced/bionemo/downstream/tap/jobs/central_tap_esm1nv/app3/custom/pretrain_small.yaml b/examples/advanced/bionemo/downstream/tap/jobs/central_tap_esm1nv/app3/custom/pretrain_small.yaml index d03110f0e8..59528f7ff5 100644 --- a/examples/advanced/bionemo/downstream/tap/jobs/central_tap_esm1nv/app3/custom/pretrain_small.yaml +++ b/examples/advanced/bionemo/downstream/tap/jobs/central_tap_esm1nv/app3/custom/pretrain_small.yaml @@ -15,7 +15,7 @@ model: test: x[000..049] val: x[000..049] micro_batch_size: ${model.micro_batch_size} - num_workers: 10 + num_workers: 2 # Supported kwargs (with default values): # text_mmap (newline_int=10, header_lines=0, workers=None, sort_dataset_paths=True) diff --git a/examples/advanced/bionemo/downstream/tap/jobs/central_tap_esm1nv/app4/custom/base_config.yaml b/examples/advanced/bionemo/downstream/tap/jobs/central_tap_esm1nv/app4/custom/base_config.yaml index b88f272d15..5c1d4686ad 100644 --- a/examples/advanced/bionemo/downstream/tap/jobs/central_tap_esm1nv/app4/custom/base_config.yaml +++ b/examples/advanced/bionemo/downstream/tap/jobs/central_tap_esm1nv/app4/custom/base_config.yaml @@ -100,7 +100,7 @@ model: ngc_registry_target: uniref50_2022_05 ngc_registry_version: v23.06 data_prefix: "" # must be null or "" - num_workers: 8 + num_workers: 2 dataloader_type: single # cyclic reset_position_ids: False # Reset position ids after end-of-document token reset_attention_mask: False # Reset attention mask after end-of-document token @@ -154,7 +154,7 @@ model: batch_size: 128 num_epochs: 10 shuffle: True - num_workers: 8 + num_workers: 2 task_name: secondary_structure dataset_path: /data/FLIP/${model.dwnstr_task_validation.dataset.task_name} dataset: diff --git a/examples/advanced/bionemo/downstream/tap/jobs/central_tap_esm1nv/app4/custom/downstream_flip_tap.yaml b/examples/advanced/bionemo/downstream/tap/jobs/central_tap_esm1nv/app4/custom/downstream_flip_tap.yaml index 92770a1842..3d0f8c0b7f 100644 --- a/examples/advanced/bionemo/downstream/tap/jobs/central_tap_esm1nv/app4/custom/downstream_flip_tap.yaml +++ b/examples/advanced/bionemo/downstream/tap/jobs/central_tap_esm1nv/app4/custom/downstream_flip_tap.yaml @@ -13,7 +13,7 @@ encoder_frozen: False trainer: devices: 1 # number of GPUs or CPUs num_nodes: 1 - max_epochs: 200 + max_epochs: 20 val_check_interval: 1 limit_val_batches: 1000 # number of batches in validation step, use fraction for fraction of data, 0 to disable limit_test_batches: 1000 # number of batches in test step, use fraction for fraction of data, 0 to disable @@ -53,7 +53,7 @@ model: sequence_column: "Antibody" # name of column with protein sequence in csv file target_column: ["SFvCSP"] #["3state", "resolved"] # names of label columns in csv file target_sizes: [1] # number of classes in each label for classifications or 1 for regression - num_workers: 8 + num_workers: 2 shuffle: True # shuffle training dataset max_seq_length: ${model.seq_length} emb_batch_size: ${model.micro_batch_size} diff --git a/examples/advanced/bionemo/downstream/tap/jobs/central_tap_esm1nv/app4/custom/pretrain_small.yaml b/examples/advanced/bionemo/downstream/tap/jobs/central_tap_esm1nv/app4/custom/pretrain_small.yaml index d03110f0e8..59528f7ff5 100644 --- a/examples/advanced/bionemo/downstream/tap/jobs/central_tap_esm1nv/app4/custom/pretrain_small.yaml +++ b/examples/advanced/bionemo/downstream/tap/jobs/central_tap_esm1nv/app4/custom/pretrain_small.yaml @@ -15,7 +15,7 @@ model: test: x[000..049] val: x[000..049] micro_batch_size: ${model.micro_batch_size} - num_workers: 10 + num_workers: 2 # Supported kwargs (with default values): # text_mmap (newline_int=10, header_lines=0, workers=None, sort_dataset_paths=True) diff --git a/examples/advanced/bionemo/downstream/tap/jobs/fedavg_tap_esm1nv/app1/custom/base_config.yaml b/examples/advanced/bionemo/downstream/tap/jobs/fedavg_tap_esm1nv/app1/custom/base_config.yaml index b88f272d15..5c1d4686ad 100644 --- a/examples/advanced/bionemo/downstream/tap/jobs/fedavg_tap_esm1nv/app1/custom/base_config.yaml +++ b/examples/advanced/bionemo/downstream/tap/jobs/fedavg_tap_esm1nv/app1/custom/base_config.yaml @@ -100,7 +100,7 @@ model: ngc_registry_target: uniref50_2022_05 ngc_registry_version: v23.06 data_prefix: "" # must be null or "" - num_workers: 8 + num_workers: 2 dataloader_type: single # cyclic reset_position_ids: False # Reset position ids after end-of-document token reset_attention_mask: False # Reset attention mask after end-of-document token @@ -154,7 +154,7 @@ model: batch_size: 128 num_epochs: 10 shuffle: True - num_workers: 8 + num_workers: 2 task_name: secondary_structure dataset_path: /data/FLIP/${model.dwnstr_task_validation.dataset.task_name} dataset: diff --git a/examples/advanced/bionemo/downstream/tap/jobs/fedavg_tap_esm1nv/app1/custom/downstream_flip_tap.yaml b/examples/advanced/bionemo/downstream/tap/jobs/fedavg_tap_esm1nv/app1/custom/downstream_flip_tap.yaml index e78ae40870..3acc1ec205 100644 --- a/examples/advanced/bionemo/downstream/tap/jobs/fedavg_tap_esm1nv/app1/custom/downstream_flip_tap.yaml +++ b/examples/advanced/bionemo/downstream/tap/jobs/fedavg_tap_esm1nv/app1/custom/downstream_flip_tap.yaml @@ -53,7 +53,7 @@ model: sequence_column: "Antibody" # name of column with protein sequence in csv file target_column: ["PSH"] #["3state", "resolved"] # names of label columns in csv file target_sizes: [1] # number of classes in each label for classifications or 1 for regression - num_workers: 8 + num_workers: 2 shuffle: True # shuffle training dataset max_seq_length: ${model.seq_length} emb_batch_size: ${model.micro_batch_size} diff --git a/examples/advanced/bionemo/downstream/tap/jobs/fedavg_tap_esm1nv/app1/custom/pretrain_small.yaml b/examples/advanced/bionemo/downstream/tap/jobs/fedavg_tap_esm1nv/app1/custom/pretrain_small.yaml index d03110f0e8..59528f7ff5 100644 --- a/examples/advanced/bionemo/downstream/tap/jobs/fedavg_tap_esm1nv/app1/custom/pretrain_small.yaml +++ b/examples/advanced/bionemo/downstream/tap/jobs/fedavg_tap_esm1nv/app1/custom/pretrain_small.yaml @@ -15,7 +15,7 @@ model: test: x[000..049] val: x[000..049] micro_batch_size: ${model.micro_batch_size} - num_workers: 10 + num_workers: 2 # Supported kwargs (with default values): # text_mmap (newline_int=10, header_lines=0, workers=None, sort_dataset_paths=True) diff --git a/examples/advanced/bionemo/downstream/tap/jobs/fedavg_tap_esm1nv/app2/custom/base_config.yaml b/examples/advanced/bionemo/downstream/tap/jobs/fedavg_tap_esm1nv/app2/custom/base_config.yaml index b88f272d15..5c1d4686ad 100644 --- a/examples/advanced/bionemo/downstream/tap/jobs/fedavg_tap_esm1nv/app2/custom/base_config.yaml +++ b/examples/advanced/bionemo/downstream/tap/jobs/fedavg_tap_esm1nv/app2/custom/base_config.yaml @@ -100,7 +100,7 @@ model: ngc_registry_target: uniref50_2022_05 ngc_registry_version: v23.06 data_prefix: "" # must be null or "" - num_workers: 8 + num_workers: 2 dataloader_type: single # cyclic reset_position_ids: False # Reset position ids after end-of-document token reset_attention_mask: False # Reset attention mask after end-of-document token @@ -154,7 +154,7 @@ model: batch_size: 128 num_epochs: 10 shuffle: True - num_workers: 8 + num_workers: 2 task_name: secondary_structure dataset_path: /data/FLIP/${model.dwnstr_task_validation.dataset.task_name} dataset: diff --git a/examples/advanced/bionemo/downstream/tap/jobs/fedavg_tap_esm1nv/app2/custom/downstream_flip_tap.yaml b/examples/advanced/bionemo/downstream/tap/jobs/fedavg_tap_esm1nv/app2/custom/downstream_flip_tap.yaml index 65c57fc76f..8ad382c2a8 100644 --- a/examples/advanced/bionemo/downstream/tap/jobs/fedavg_tap_esm1nv/app2/custom/downstream_flip_tap.yaml +++ b/examples/advanced/bionemo/downstream/tap/jobs/fedavg_tap_esm1nv/app2/custom/downstream_flip_tap.yaml @@ -53,7 +53,7 @@ model: sequence_column: "Antibody" # name of column with protein sequence in csv file target_column: ["PPC"] #["3state", "resolved"] # names of label columns in csv file target_sizes: [1] # number of classes in each label for classifications or 1 for regression - num_workers: 8 + num_workers: 2 shuffle: True # shuffle training dataset max_seq_length: ${model.seq_length} emb_batch_size: ${model.micro_batch_size} diff --git a/examples/advanced/bionemo/downstream/tap/jobs/fedavg_tap_esm1nv/app2/custom/pretrain_small.yaml b/examples/advanced/bionemo/downstream/tap/jobs/fedavg_tap_esm1nv/app2/custom/pretrain_small.yaml index d03110f0e8..59528f7ff5 100644 --- a/examples/advanced/bionemo/downstream/tap/jobs/fedavg_tap_esm1nv/app2/custom/pretrain_small.yaml +++ b/examples/advanced/bionemo/downstream/tap/jobs/fedavg_tap_esm1nv/app2/custom/pretrain_small.yaml @@ -15,7 +15,7 @@ model: test: x[000..049] val: x[000..049] micro_batch_size: ${model.micro_batch_size} - num_workers: 10 + num_workers: 2 # Supported kwargs (with default values): # text_mmap (newline_int=10, header_lines=0, workers=None, sort_dataset_paths=True) diff --git a/examples/advanced/bionemo/downstream/tap/jobs/fedavg_tap_esm1nv/app3/custom/base_config.yaml b/examples/advanced/bionemo/downstream/tap/jobs/fedavg_tap_esm1nv/app3/custom/base_config.yaml index b88f272d15..5c1d4686ad 100644 --- a/examples/advanced/bionemo/downstream/tap/jobs/fedavg_tap_esm1nv/app3/custom/base_config.yaml +++ b/examples/advanced/bionemo/downstream/tap/jobs/fedavg_tap_esm1nv/app3/custom/base_config.yaml @@ -100,7 +100,7 @@ model: ngc_registry_target: uniref50_2022_05 ngc_registry_version: v23.06 data_prefix: "" # must be null or "" - num_workers: 8 + num_workers: 2 dataloader_type: single # cyclic reset_position_ids: False # Reset position ids after end-of-document token reset_attention_mask: False # Reset attention mask after end-of-document token @@ -154,7 +154,7 @@ model: batch_size: 128 num_epochs: 10 shuffle: True - num_workers: 8 + num_workers: 2 task_name: secondary_structure dataset_path: /data/FLIP/${model.dwnstr_task_validation.dataset.task_name} dataset: diff --git a/examples/advanced/bionemo/downstream/tap/jobs/fedavg_tap_esm1nv/app3/custom/downstream_flip_tap.yaml b/examples/advanced/bionemo/downstream/tap/jobs/fedavg_tap_esm1nv/app3/custom/downstream_flip_tap.yaml index 9feda674d3..552341f7b7 100644 --- a/examples/advanced/bionemo/downstream/tap/jobs/fedavg_tap_esm1nv/app3/custom/downstream_flip_tap.yaml +++ b/examples/advanced/bionemo/downstream/tap/jobs/fedavg_tap_esm1nv/app3/custom/downstream_flip_tap.yaml @@ -53,7 +53,7 @@ model: sequence_column: "Antibody" # name of column with protein sequence in csv file target_column: ["PNC"] #["3state", "resolved"] # names of label columns in csv file target_sizes: [1] # number of classes in each label for classifications or 1 for regression - num_workers: 8 + num_workers: 2 shuffle: True # shuffle training dataset max_seq_length: ${model.seq_length} emb_batch_size: ${model.micro_batch_size} diff --git a/examples/advanced/bionemo/downstream/tap/jobs/fedavg_tap_esm1nv/app3/custom/pretrain_small.yaml b/examples/advanced/bionemo/downstream/tap/jobs/fedavg_tap_esm1nv/app3/custom/pretrain_small.yaml index d03110f0e8..59528f7ff5 100644 --- a/examples/advanced/bionemo/downstream/tap/jobs/fedavg_tap_esm1nv/app3/custom/pretrain_small.yaml +++ b/examples/advanced/bionemo/downstream/tap/jobs/fedavg_tap_esm1nv/app3/custom/pretrain_small.yaml @@ -15,7 +15,7 @@ model: test: x[000..049] val: x[000..049] micro_batch_size: ${model.micro_batch_size} - num_workers: 10 + num_workers: 2 # Supported kwargs (with default values): # text_mmap (newline_int=10, header_lines=0, workers=None, sort_dataset_paths=True) diff --git a/examples/advanced/bionemo/downstream/tap/jobs/fedavg_tap_esm1nv/app4/custom/base_config.yaml b/examples/advanced/bionemo/downstream/tap/jobs/fedavg_tap_esm1nv/app4/custom/base_config.yaml index b88f272d15..5c1d4686ad 100644 --- a/examples/advanced/bionemo/downstream/tap/jobs/fedavg_tap_esm1nv/app4/custom/base_config.yaml +++ b/examples/advanced/bionemo/downstream/tap/jobs/fedavg_tap_esm1nv/app4/custom/base_config.yaml @@ -100,7 +100,7 @@ model: ngc_registry_target: uniref50_2022_05 ngc_registry_version: v23.06 data_prefix: "" # must be null or "" - num_workers: 8 + num_workers: 2 dataloader_type: single # cyclic reset_position_ids: False # Reset position ids after end-of-document token reset_attention_mask: False # Reset attention mask after end-of-document token @@ -154,7 +154,7 @@ model: batch_size: 128 num_epochs: 10 shuffle: True - num_workers: 8 + num_workers: 2 task_name: secondary_structure dataset_path: /data/FLIP/${model.dwnstr_task_validation.dataset.task_name} dataset: diff --git a/examples/advanced/bionemo/downstream/tap/jobs/fedavg_tap_esm1nv/app4/custom/downstream_flip_tap.yaml b/examples/advanced/bionemo/downstream/tap/jobs/fedavg_tap_esm1nv/app4/custom/downstream_flip_tap.yaml index e4380f10ac..53fc6cf405 100644 --- a/examples/advanced/bionemo/downstream/tap/jobs/fedavg_tap_esm1nv/app4/custom/downstream_flip_tap.yaml +++ b/examples/advanced/bionemo/downstream/tap/jobs/fedavg_tap_esm1nv/app4/custom/downstream_flip_tap.yaml @@ -53,7 +53,7 @@ model: sequence_column: "Antibody" # name of column with protein sequence in csv file target_column: ["SFvCSP"] #["3state", "resolved"] # names of label columns in csv file target_sizes: [1] # number of classes in each label for classifications or 1 for regression - num_workers: 8 + num_workers: 2 shuffle: True # shuffle training dataset max_seq_length: ${model.seq_length} emb_batch_size: ${model.micro_batch_size} diff --git a/examples/advanced/bionemo/downstream/tap/jobs/fedavg_tap_esm1nv/app4/custom/pretrain_small.yaml b/examples/advanced/bionemo/downstream/tap/jobs/fedavg_tap_esm1nv/app4/custom/pretrain_small.yaml index d03110f0e8..59528f7ff5 100644 --- a/examples/advanced/bionemo/downstream/tap/jobs/fedavg_tap_esm1nv/app4/custom/pretrain_small.yaml +++ b/examples/advanced/bionemo/downstream/tap/jobs/fedavg_tap_esm1nv/app4/custom/pretrain_small.yaml @@ -15,7 +15,7 @@ model: test: x[000..049] val: x[000..049] micro_batch_size: ${model.micro_batch_size} - num_workers: 10 + num_workers: 2 # Supported kwargs (with default values): # text_mmap (newline_int=10, header_lines=0, workers=None, sort_dataset_paths=True) diff --git a/examples/advanced/bionemo/downstream/tap/jobs/fedavg_tap_esm1nv/server/config/config_fed_server.conf b/examples/advanced/bionemo/downstream/tap/jobs/fedavg_tap_esm1nv/server/config/config_fed_server.conf index c0d92b02f1..bacb3476a2 100644 --- a/examples/advanced/bionemo/downstream/tap/jobs/fedavg_tap_esm1nv/server/config/config_fed_server.conf +++ b/examples/advanced/bionemo/downstream/tap/jobs/fedavg_tap_esm1nv/server/config/config_fed_server.conf @@ -35,7 +35,7 @@ min_clients = 5 # number of global round of the training. - num_rounds = 300 + num_rounds = 20 # starting round is 0-based start_round = 0 diff --git a/examples/advanced/bionemo/downstream/tap/jobs/local_tap_esm1nv/app1/custom/base_config.yaml b/examples/advanced/bionemo/downstream/tap/jobs/local_tap_esm1nv/app1/custom/base_config.yaml index b88f272d15..5c1d4686ad 100644 --- a/examples/advanced/bionemo/downstream/tap/jobs/local_tap_esm1nv/app1/custom/base_config.yaml +++ b/examples/advanced/bionemo/downstream/tap/jobs/local_tap_esm1nv/app1/custom/base_config.yaml @@ -100,7 +100,7 @@ model: ngc_registry_target: uniref50_2022_05 ngc_registry_version: v23.06 data_prefix: "" # must be null or "" - num_workers: 8 + num_workers: 2 dataloader_type: single # cyclic reset_position_ids: False # Reset position ids after end-of-document token reset_attention_mask: False # Reset attention mask after end-of-document token @@ -154,7 +154,7 @@ model: batch_size: 128 num_epochs: 10 shuffle: True - num_workers: 8 + num_workers: 2 task_name: secondary_structure dataset_path: /data/FLIP/${model.dwnstr_task_validation.dataset.task_name} dataset: diff --git a/examples/advanced/bionemo/downstream/tap/jobs/local_tap_esm1nv/app1/custom/downstream_flip_tap.yaml b/examples/advanced/bionemo/downstream/tap/jobs/local_tap_esm1nv/app1/custom/downstream_flip_tap.yaml index 543c7e0d7f..2c8c40480b 100644 --- a/examples/advanced/bionemo/downstream/tap/jobs/local_tap_esm1nv/app1/custom/downstream_flip_tap.yaml +++ b/examples/advanced/bionemo/downstream/tap/jobs/local_tap_esm1nv/app1/custom/downstream_flip_tap.yaml @@ -13,7 +13,7 @@ encoder_frozen: False trainer: devices: 1 # number of GPUs or CPUs num_nodes: 1 - max_epochs: 200 + max_epochs: 20 val_check_interval: 1 limit_val_batches: 1000 # number of batches in validation step, use fraction for fraction of data, 0 to disable limit_test_batches: 1000 # number of batches in test step, use fraction for fraction of data, 0 to disable @@ -53,7 +53,7 @@ model: sequence_column: "Antibody" # name of column with protein sequence in csv file target_column: ["PSH"] #["3state", "resolved"] # names of label columns in csv file target_sizes: [1] # number of classes in each label for classifications or 1 for regression - num_workers: 8 + num_workers: 2 shuffle: True # shuffle training dataset max_seq_length: ${model.seq_length} emb_batch_size: ${model.micro_batch_size} diff --git a/examples/advanced/bionemo/downstream/tap/jobs/local_tap_esm1nv/app1/custom/pretrain_small.yaml b/examples/advanced/bionemo/downstream/tap/jobs/local_tap_esm1nv/app1/custom/pretrain_small.yaml index d03110f0e8..59528f7ff5 100644 --- a/examples/advanced/bionemo/downstream/tap/jobs/local_tap_esm1nv/app1/custom/pretrain_small.yaml +++ b/examples/advanced/bionemo/downstream/tap/jobs/local_tap_esm1nv/app1/custom/pretrain_small.yaml @@ -15,7 +15,7 @@ model: test: x[000..049] val: x[000..049] micro_batch_size: ${model.micro_batch_size} - num_workers: 10 + num_workers: 2 # Supported kwargs (with default values): # text_mmap (newline_int=10, header_lines=0, workers=None, sort_dataset_paths=True) diff --git a/examples/advanced/bionemo/downstream/tap/jobs/local_tap_esm1nv/app2/custom/base_config.yaml b/examples/advanced/bionemo/downstream/tap/jobs/local_tap_esm1nv/app2/custom/base_config.yaml index b88f272d15..5c1d4686ad 100644 --- a/examples/advanced/bionemo/downstream/tap/jobs/local_tap_esm1nv/app2/custom/base_config.yaml +++ b/examples/advanced/bionemo/downstream/tap/jobs/local_tap_esm1nv/app2/custom/base_config.yaml @@ -100,7 +100,7 @@ model: ngc_registry_target: uniref50_2022_05 ngc_registry_version: v23.06 data_prefix: "" # must be null or "" - num_workers: 8 + num_workers: 2 dataloader_type: single # cyclic reset_position_ids: False # Reset position ids after end-of-document token reset_attention_mask: False # Reset attention mask after end-of-document token @@ -154,7 +154,7 @@ model: batch_size: 128 num_epochs: 10 shuffle: True - num_workers: 8 + num_workers: 2 task_name: secondary_structure dataset_path: /data/FLIP/${model.dwnstr_task_validation.dataset.task_name} dataset: diff --git a/examples/advanced/bionemo/downstream/tap/jobs/local_tap_esm1nv/app2/custom/downstream_flip_tap.yaml b/examples/advanced/bionemo/downstream/tap/jobs/local_tap_esm1nv/app2/custom/downstream_flip_tap.yaml index 585fa11522..f66bfb8473 100644 --- a/examples/advanced/bionemo/downstream/tap/jobs/local_tap_esm1nv/app2/custom/downstream_flip_tap.yaml +++ b/examples/advanced/bionemo/downstream/tap/jobs/local_tap_esm1nv/app2/custom/downstream_flip_tap.yaml @@ -13,7 +13,7 @@ encoder_frozen: False trainer: devices: 1 # number of GPUs or CPUs num_nodes: 1 - max_epochs: 200 + max_epochs: 20 val_check_interval: 1 limit_val_batches: 1000 # number of batches in validation step, use fraction for fraction of data, 0 to disable limit_test_batches: 1000 # number of batches in test step, use fraction for fraction of data, 0 to disable @@ -53,7 +53,7 @@ model: sequence_column: "Antibody" # name of column with protein sequence in csv file target_column: ["PPC"] #["3state", "resolved"] # names of label columns in csv file target_sizes: [1] # number of classes in each label for classifications or 1 for regression - num_workers: 8 + num_workers: 2 shuffle: True # shuffle training dataset max_seq_length: ${model.seq_length} emb_batch_size: ${model.micro_batch_size} diff --git a/examples/advanced/bionemo/downstream/tap/jobs/local_tap_esm1nv/app2/custom/pretrain_small.yaml b/examples/advanced/bionemo/downstream/tap/jobs/local_tap_esm1nv/app2/custom/pretrain_small.yaml index d03110f0e8..59528f7ff5 100644 --- a/examples/advanced/bionemo/downstream/tap/jobs/local_tap_esm1nv/app2/custom/pretrain_small.yaml +++ b/examples/advanced/bionemo/downstream/tap/jobs/local_tap_esm1nv/app2/custom/pretrain_small.yaml @@ -15,7 +15,7 @@ model: test: x[000..049] val: x[000..049] micro_batch_size: ${model.micro_batch_size} - num_workers: 10 + num_workers: 2 # Supported kwargs (with default values): # text_mmap (newline_int=10, header_lines=0, workers=None, sort_dataset_paths=True) diff --git a/examples/advanced/bionemo/downstream/tap/jobs/local_tap_esm1nv/app3/custom/base_config.yaml b/examples/advanced/bionemo/downstream/tap/jobs/local_tap_esm1nv/app3/custom/base_config.yaml index b88f272d15..5c1d4686ad 100644 --- a/examples/advanced/bionemo/downstream/tap/jobs/local_tap_esm1nv/app3/custom/base_config.yaml +++ b/examples/advanced/bionemo/downstream/tap/jobs/local_tap_esm1nv/app3/custom/base_config.yaml @@ -100,7 +100,7 @@ model: ngc_registry_target: uniref50_2022_05 ngc_registry_version: v23.06 data_prefix: "" # must be null or "" - num_workers: 8 + num_workers: 2 dataloader_type: single # cyclic reset_position_ids: False # Reset position ids after end-of-document token reset_attention_mask: False # Reset attention mask after end-of-document token @@ -154,7 +154,7 @@ model: batch_size: 128 num_epochs: 10 shuffle: True - num_workers: 8 + num_workers: 2 task_name: secondary_structure dataset_path: /data/FLIP/${model.dwnstr_task_validation.dataset.task_name} dataset: diff --git a/examples/advanced/bionemo/downstream/tap/jobs/local_tap_esm1nv/app3/custom/downstream_flip_tap.yaml b/examples/advanced/bionemo/downstream/tap/jobs/local_tap_esm1nv/app3/custom/downstream_flip_tap.yaml index 08aead11c8..0417474b9a 100644 --- a/examples/advanced/bionemo/downstream/tap/jobs/local_tap_esm1nv/app3/custom/downstream_flip_tap.yaml +++ b/examples/advanced/bionemo/downstream/tap/jobs/local_tap_esm1nv/app3/custom/downstream_flip_tap.yaml @@ -13,7 +13,7 @@ encoder_frozen: False trainer: devices: 1 # number of GPUs or CPUs num_nodes: 1 - max_epochs: 200 + max_epochs: 20 val_check_interval: 1 limit_val_batches: 1000 # number of batches in validation step, use fraction for fraction of data, 0 to disable limit_test_batches: 1000 # number of batches in test step, use fraction for fraction of data, 0 to disable @@ -53,7 +53,7 @@ model: sequence_column: "Antibody" # name of column with protein sequence in csv file target_column: ["PNC"] #["3state", "resolved"] # names of label columns in csv file target_sizes: [1] # number of classes in each label for classifications or 1 for regression - num_workers: 8 + num_workers: 2 shuffle: True # shuffle training dataset max_seq_length: ${model.seq_length} emb_batch_size: ${model.micro_batch_size} diff --git a/examples/advanced/bionemo/downstream/tap/jobs/local_tap_esm1nv/app3/custom/pretrain_small.yaml b/examples/advanced/bionemo/downstream/tap/jobs/local_tap_esm1nv/app3/custom/pretrain_small.yaml index d03110f0e8..59528f7ff5 100644 --- a/examples/advanced/bionemo/downstream/tap/jobs/local_tap_esm1nv/app3/custom/pretrain_small.yaml +++ b/examples/advanced/bionemo/downstream/tap/jobs/local_tap_esm1nv/app3/custom/pretrain_small.yaml @@ -15,7 +15,7 @@ model: test: x[000..049] val: x[000..049] micro_batch_size: ${model.micro_batch_size} - num_workers: 10 + num_workers: 2 # Supported kwargs (with default values): # text_mmap (newline_int=10, header_lines=0, workers=None, sort_dataset_paths=True) diff --git a/examples/advanced/bionemo/downstream/tap/jobs/local_tap_esm1nv/app4/custom/base_config.yaml b/examples/advanced/bionemo/downstream/tap/jobs/local_tap_esm1nv/app4/custom/base_config.yaml index b88f272d15..5c1d4686ad 100644 --- a/examples/advanced/bionemo/downstream/tap/jobs/local_tap_esm1nv/app4/custom/base_config.yaml +++ b/examples/advanced/bionemo/downstream/tap/jobs/local_tap_esm1nv/app4/custom/base_config.yaml @@ -100,7 +100,7 @@ model: ngc_registry_target: uniref50_2022_05 ngc_registry_version: v23.06 data_prefix: "" # must be null or "" - num_workers: 8 + num_workers: 2 dataloader_type: single # cyclic reset_position_ids: False # Reset position ids after end-of-document token reset_attention_mask: False # Reset attention mask after end-of-document token @@ -154,7 +154,7 @@ model: batch_size: 128 num_epochs: 10 shuffle: True - num_workers: 8 + num_workers: 2 task_name: secondary_structure dataset_path: /data/FLIP/${model.dwnstr_task_validation.dataset.task_name} dataset: diff --git a/examples/advanced/bionemo/downstream/tap/jobs/local_tap_esm1nv/app4/custom/downstream_flip_tap.yaml b/examples/advanced/bionemo/downstream/tap/jobs/local_tap_esm1nv/app4/custom/downstream_flip_tap.yaml index 7b0197b21d..125c68a82d 100644 --- a/examples/advanced/bionemo/downstream/tap/jobs/local_tap_esm1nv/app4/custom/downstream_flip_tap.yaml +++ b/examples/advanced/bionemo/downstream/tap/jobs/local_tap_esm1nv/app4/custom/downstream_flip_tap.yaml @@ -13,7 +13,7 @@ encoder_frozen: False trainer: devices: 1 # number of GPUs or CPUs num_nodes: 1 - max_epochs: 200 + max_epochs: 20 val_check_interval: 1 limit_val_batches: 1000 # number of batches in validation step, use fraction for fraction of data, 0 to disable limit_test_batches: 1000 # number of batches in test step, use fraction for fraction of data, 0 to disable @@ -53,7 +53,7 @@ model: sequence_column: "Antibody" # name of column with protein sequence in csv file target_column: ["SFvCSP"] #["3state", "resolved"] # names of label columns in csv file target_sizes: [1] # number of classes in each label for classifications or 1 for regression - num_workers: 8 + num_workers: 2 shuffle: True # shuffle training dataset max_seq_length: ${model.seq_length} emb_batch_size: ${model.micro_batch_size} diff --git a/examples/advanced/bionemo/downstream/tap/jobs/local_tap_esm1nv/app4/custom/pretrain_small.yaml b/examples/advanced/bionemo/downstream/tap/jobs/local_tap_esm1nv/app4/custom/pretrain_small.yaml index d03110f0e8..59528f7ff5 100644 --- a/examples/advanced/bionemo/downstream/tap/jobs/local_tap_esm1nv/app4/custom/pretrain_small.yaml +++ b/examples/advanced/bionemo/downstream/tap/jobs/local_tap_esm1nv/app4/custom/pretrain_small.yaml @@ -15,7 +15,7 @@ model: test: x[000..049] val: x[000..049] micro_batch_size: ${model.micro_batch_size} - num_workers: 10 + num_workers: 2 # Supported kwargs (with default values): # text_mmap (newline_int=10, header_lines=0, workers=None, sort_dataset_paths=True) diff --git a/examples/advanced/bionemo/downstream/tap/run_sim_tap.py b/examples/advanced/bionemo/downstream/tap/run_sim_tap.py index 15d8ca4fce..54888d7385 100644 --- a/examples/advanced/bionemo/downstream/tap/run_sim_tap.py +++ b/examples/advanced/bionemo/downstream/tap/run_sim_tap.py @@ -22,7 +22,7 @@ # job_name = "fedavg_tap_esm1nv" simulator = SimulatorRunner( - job_folder=f"jobs/{job_name}", workspace=f"/tmp/nvflare/results/{job_name}", n_clients=n_clients, threads=1 + job_folder=f"jobs/{job_name}", workspace=f"/tmp/nvflare/results/{job_name}", n_clients=n_clients, threads=n_clients ) run_status = simulator.run() print("Simulator finished with run_status", run_status) diff --git a/examples/advanced/bionemo/task_fitting/task_fitting.ipynb b/examples/advanced/bionemo/task_fitting/task_fitting.ipynb index bbc12cac32..22859d2482 100644 --- a/examples/advanced/bionemo/task_fitting/task_fitting.ipynb +++ b/examples/advanced/bionemo/task_fitting/task_fitting.ipynb @@ -160,7 +160,7 @@ "source": [ "### Download Model Checkpoints\n", "\n", - "In order to download pretrained models from the NGC registry, **please ensure that you have installed and configured the NGC CLI**, check the [Quickstart Guide](https://docs.nvidia.com/bionemo-framework/latest/quickstart-fw.html) for more info. The following code will download the pretrained model `esm2nv_650M_converted.nemo` from the NGC registry." + "In order to download pretrained models from the NGC registry, **please ensure that you have installed and configured the NGC CLI**, check the [Quickstart Guide](https://docs.nvidia.com/bionemo-framework/latest) for more info. The following code will download the pretrained model `esm2nv_650M_converted.nemo` from the NGC registry." ] }, {