Enable Sequence Packing and Pipeline Parallel in NeVA (#8957)

* temp save Signed-off-by: yaoyu-33 <yaoyu.094@gmail.com> * temp save 2 Signed-off-by: yaoyu-33 <yaoyu.094@gmail.com> * update code Signed-off-by: yaoyu-33 <yaoyu.094@gmail.com> * enable seq packing Signed-off-by: yaoyu-33 <yaoyu.094@gmail.com> * fix neva and clip Signed-off-by: yaoyu-33 <yaoyu.094@gmail.com> * Enable parallel seq packing algo and few other fixes Signed-off-by: yaoyu-33 <yaoyu.094@gmail.com> * Pipeline parallel support Signed-off-by: yaoyu-33 <yaoyu.094@gmail.com> * Update data preprocess Signed-off-by: yaoyu-33 <yaoyu.094@gmail.com> * fix few pp issues Signed-off-by: yaoyu-33 <yaoyu.094@gmail.com> * enable sequence packing w/ PP Signed-off-by: yaoyu-33 <yaoyu.094@gmail.com> * Fix cu_seqlens in inputs Signed-off-by: yaoyu-33 <yaoyu.094@gmail.com> * add assert Signed-off-by: yaoyu-33 <yaoyu.094@gmail.com> * Depend on PP to decide whether do padding Signed-off-by: yaoyu-33 <yaoyu.094@gmail.com> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Add docstring Signed-off-by: yaoyu-33 <yaoyu.094@gmail.com> * Fix few evaluation issues Signed-off-by: yaoyu-33 <yaoyu.094@gmail.com> * Fix few PP evaluation issues Signed-off-by: yaoyu-33 <yaoyu.094@gmail.com> * Address comments Signed-off-by: yaoyu-33 <yaoyu.094@gmail.com> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * address comments Signed-off-by: yaoyu-33 <yaoyu.094@gmail.com> * Fix license Signed-off-by: yaoyu-33 <yaoyu.094@gmail.com> * Few neva bugs Signed-off-by: yaoyu-33 <yaoyu.094@gmail.com> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Few neva bugs Signed-off-by: yaoyu-33 <yaoyu.094@gmail.com> --------- Signed-off-by: yaoyu-33 <yaoyu.094@gmail.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Signed-off-by: Ao Tang <aot@nvidia.com>
NVIDIA · May 2, 2024 · 6066774 · 6066774
1 parent 987b3ad
commit 6066774
Show file tree

Hide file tree

Showing 10 changed files with 627 additions and 68 deletions.
diff --git a/examples/multimodal/multimodal_llm/neva/conf/neva_config.yaml b/examples/multimodal/multimodal_llm/neva/conf/neva_config.yaml
@@ -181,6 +181,7 @@ model:
     additional_special_tokens: null # ["<extra_id_0>", "<extra_id_1>", "<extra_id_2>", "<extra_id_3>", "<extra_id_4>", "<extra_id_5>"]
 
   data:
+    packed_sequence: False
     num_workers: 8
     dataloader_type: cyclic
     data_path:

diff --git a/examples/multimodal/multimodal_llm/neva/eval/vqa_science.py b/examples/multimodal/multimodal_llm/neva/eval/vqa_science.py
@@ -79,7 +79,8 @@ def eval_model(args):
     cfg.base_model_file = args.model_base
     cfg.inference.images_base_path = args.image_folder
     cfg.tensor_model_parallel_size = args.tp
-    cfg.trainer.devices = args.tp
+    cfg.pipeline_model_parallel_size = args.pp
+    cfg.trainer.devices = args.tp * args.pp
 
     model, image_processor = create_neva_model_and_processor(cfg)
     length_params: LengthParam = {
@@ -102,7 +103,8 @@ def eval_model(args):
     questions = get_chunk(questions, args.num_chunks, args.chunk_idx)
     answers_file = os.path.expanduser(args.answers_file)
     os.makedirs(os.path.dirname(answers_file), exist_ok=True)
-    ans_file = open(answers_file, "w")
+    if is_global_rank_zero():
+        ans_file = open(answers_file, "w")
     for i, line in enumerate(tqdm(questions, disable=(not is_global_rank_zero()))):
         idx = line["id"]
         question = line['conversations'][0]
@@ -123,7 +125,8 @@ def eval_model(args):
             sampling_params=sampling_params,
             inference_config=cfg,
         )
-        # import  pdb; pdb.set_trace()
+        if responses is None:
+            continue
         outputs = responses[0]["clean_response"]
 
         # prompt for answer
@@ -139,22 +142,24 @@ def eval_model(args):
             outputs = responses[0]["clean_response"]
             outputs = outputs_reasoning + '\n The answer is ' + outputs
 
-        ans_id = shortuuid.uuid()
-        ans_file.write(
-            json.dumps(
-                {
-                    "question_id": idx,
-                    "prompt": cur_prompt,
-                    "text": outputs,
-                    "answer_id": ans_id,
-                    "model_id": args.model_path,
-                    "metadata": {},
-                }
+        if is_global_rank_zero():
+            ans_id = shortuuid.uuid()
+            ans_file.write(
+                json.dumps(
+                    {
+                        "question_id": idx,
+                        "prompt": cur_prompt,
+                        "text": outputs,
+                        "answer_id": ans_id,
+                        "model_id": args.model_path,
+                        "metadata": {},
+                    }
+                )
+                + "\n"
             )
-            + "\n"
-        )
-        ans_file.flush()
-    ans_file.close()
+            ans_file.flush()
+    if is_global_rank_zero():
+        ans_file.close()
 
 
 if __name__ == "__main__":
@@ -166,6 +171,7 @@ def eval_model(args):
     parser.add_argument("--answers-file", type=str, default="answer.jsonl")
     parser.add_argument("--conv-mode", type=str, default="llava_v0")
     parser.add_argument("--tp", type=int, default=1)
+    parser.add_argument("--pp", type=int, default=1)
     parser.add_argument("--num-chunks", type=int, default=1)
     parser.add_argument("--chunk-idx", type=int, default=0)
     parser.add_argument("--temperature", type=float, default=0.2)

diff --git a/examples/multimodal/multimodal_llm/neva/neva_evaluation.py b/examples/multimodal/multimodal_llm/neva/neva_evaluation.py
@@ -20,6 +20,7 @@
 from nemo.collections.multimodal.parts.utils import create_neva_model_and_processor
 from nemo.collections.nlp.modules.common.transformer.text_generation import LengthParam, SamplingParam
 from nemo.core.config import hydra_runner
+from nemo.utils.get_rank import is_global_rank_zero
 
 
 try:
@@ -121,22 +122,27 @@ def forward_loop():
         )
     # ============== Quantization End =========================
 
-    results = []
-    for response, prompt in zip(responses, final_prompts):
-        prompt['full_text'] = response["clean_text"]
-        prompt['text'] = response["clean_response"]
-        prompt['model_id'] = cfg.neva_model_file
-        if 'image_path' in prompt:
-            prompt['image'] = prompt.pop('image_path')
-        if 'answer_id' not in prompt:
-            prompt['answer_id'] = 0
-        if 'metadata' not in prompt:
-            prompt['metadata'] = {}
-        results.append(prompt)
-
-    with open(cfg.output_file, 'w') as f:
-        for result in results:
-            f.write(json.dumps(result) + '\n')
+    # PP middle stages do not yield any responses
+    if responses is None:
+        return
+
+    if is_global_rank_zero():
+        results = []
+        for response, prompt in zip(responses, final_prompts):
+            prompt['full_text'] = response["clean_text"]
+            prompt['text'] = response["clean_response"]
+            prompt['model_id'] = cfg.neva_model_file
+            if 'image_path' in prompt:
+                prompt['image'] = prompt.pop('image_path')
+            if 'answer_id' not in prompt:
+                prompt['answer_id'] = 0
+            if 'metadata' not in prompt:
+                prompt['metadata'] = {}
+            results.append(prompt)
+
+        with open(cfg.output_file, 'w') as f:
+            for result in results:
+                f.write(json.dumps(result) + '\n')
 
 
 if __name__ == '__main__':