add support for torch.Tensor.size

bnellnm · bnellnm · commit 355436f42db0 · 2024-08-23T03:16:18.000Z
diff --git a/vllm/model_executor/model_optimizer/fused_op_generator_utils.py b/vllm/model_executor/model_optimizer/fused_op_generator_utils.py
@@ -72,7 +72,6 @@ def arg_schema_type(n: torch.fx.node.Argument,
         ty = n.type.__name__
     elif n.meta.get('type') and n.meta.get('type').__name__ != 'FakeTensor':
         ty = n.meta.get('type').__name__
-        print(f"meta type {ty}")
         if ty == 'Size':
             return 'std::vector<int64_t>' if add_prefix else 'int[]'
     else:
@@ -84,7 +83,6 @@ def arg_schema_type(n: torch.fx.node.Argument,
     if add_prefix and ty in builtin_types:
         return builtin_types[ty]
 
-    print(f"arg_schema_type {ty}")
     if ty == "SymInt" and add_prefix:
         return "int64_t"
 
diff --git a/vllm/model_executor/model_optimizer/naive_fused_op_generator.py b/vllm/model_executor/model_optimizer/naive_fused_op_generator.py
@@ -320,7 +320,6 @@ def make_fused_op(
             f"{arg_schema_type(inp, True)}" for inp in inputs.values()
         ]
         logger.debug("fused op argument types: %s", arg_types)
-        print(f"fused op argument types: {str(arg_types)}")
         for i, name in enumerate(inputs.keys()):
             # Don't use const refs here so inputs can be deleted when no
             # longer needed.
@@ -370,6 +369,11 @@ def make_fused_op(
 
         for n, fn in zip(nodes, fn_names):
             return_type = extract_node_type(n)
+
+            # Total hack
+            if n.op == 'call_method':
+                return_type = "Size"
+
             input_types = [argument_type_str(inp) for inp in n.args]
             comment_str = f"  // ({', '.join(input_types)}) -> {return_type}"
 
@@ -388,7 +392,10 @@ def make_fused_op(
                                 f"{self.sanitize(n.args[0].name, '::')}.")
                     first_arg = 1
 
-                if node_function_target(n).startswith("torch.ops._C"):
+                # First check is total hack here
+                if fn == 'size':
+                    call_str = call_str + "sizes("
+                elif node_function_target(n).startswith("torch.ops._C"):
                     call_str = call_str + f"{self.sanitize(fn, '::')}.call("
                 else:
                     call_str = call_str + f"{self.sanitize(fn, '::')}("
diff --git a/vllm/model_executor/model_optimizer/register.py b/vllm/model_executor/model_optimizer/register.py
@@ -103,6 +103,7 @@ def register_defaults():
     logger.debug("REGISTER DEFAULTS")
     # Note: methods need to be supported via function object and not name.
     register_fusable(torch.Tensor.to)
+    register_fusable(torch.Tensor.size, is_trivial=True)
     register_fusable(torch.Tensor.transpose, is_trivial=True)
     register_fusable(torch.Tensor.numel, is_trivial=True)
     register_fusable('_operator.add')