TP unsupported models and assertions (microsoft#2810)

Co-authored-by: Jeff Rasley <jerasley@microsoft.com>
FreyaRao · Mar 6, 2023 · 0956ec5 · 0956ec5
1 parent bf0b929
commit 0956ec5
Show file tree

Hide file tree

Showing 2 changed files with 17 additions and 2 deletions.
diff --git a/deepspeed/module_inject/auto_tp.py b/deepspeed/module_inject/auto_tp.py
@@ -27,13 +27,25 @@ def get_module_list(model):
         return mlist
 
     def supported(model):
-        unsupported = ['bloom', 'codegen', 'flaubert', 'xlm']
+        unsupported = [
+            'bloom',
+            'codegen',
+            'deberta',
+            'flaubert',
+            'fsmt',
+            'gpt2',
+            'led',
+            'longformer',
+            'xlm',
+            'xlnet'
+        ]
         model = str(model)
         key = re.search(r": (.*?)Model", model)
         if key is None:
             key = re.search(r": (.*?)Stack", model)
         if key is None:
             key = re.match(r"(.*?)Model", model)
+        assert key is not None, "Not able to determine model policy automatically. Please provide policy."
         if key.group(1).lower() in unsupported:
             return False
         return True
@@ -91,4 +103,5 @@ def tp_parser(model):
                 gem_list = list(set(gem_list))
                 policy_list = AutoTP.update_policy_list(policy_list, module, gem_list)
                 gem_list = []
+        assert len(policy_list), "Not able to determine model policy automatically. Please provide policy."
         return policy_list
diff --git a/docs/_tutorials/automatic-tensor-parallelism.md b/docs/_tutorials/automatic-tensor-parallelism.md
@@ -88,13 +88,15 @@ deepspeed --num_gpus <num_gpus> DeepSpeedExamples/inference/huggingface/text-gen
 The following results were collected using V100 SXM2 32GB GPUs.
 
 ### Max New Tokens = 50
+
 | Test       | Memory Allocated per GPU   | Max Batch Size   | Max Throughput per GPU   |
 | ---------- | -------------------------- | ---------------- | ------------------------ |
 | No TP      | 23.94 GB                   | 64               | 18.84 TFlops             |
 | 2 GPU TP   | 12.23 GB                   | 320              | 27.17 TFlops             |
 | 4 GPU TP   | 6.36 GB                    | 664              | 27.63 TFlops             |
 
 ### Max New Tokens = 1024
+
 | Test       | Memory Allocated per GPU   | Max Batch Size   | Max Throughput per GPU   |
 | ---------- | -------------------------- | ---------------- | ------------------------ |
 | No TP      | 23.94 GB                   | 2                | 1.65 TFlops              |
@@ -113,7 +115,6 @@ The following model families have been successfully tested with automatic tensor
 - electra
 - ernie
 - esm
-- gpt2
 - gpt-j
 - gpt-neo
 - gpt-neox
@@ -146,6 +147,7 @@ The following models are not currently supported with automatic tensor paralleli
 - deberta
 - flaubert
 - fsmt
+- gpt2
 - led
 - longformer
 - xlm