add comments for the meta device registration

ganyi1996ppo · ganyi1996ppo · commit 60ef042d27a8 · 2025-08-05T11:01:13.000+08:00
Signed-off-by: ganyi &lt;pleaplusone.gy@gmail.com&gt;
diff --git a/csrc/torch_binding_meta.cpp b/csrc/torch_binding_meta.cpp
@@ -5,6 +5,33 @@
 #include <torch_npu/csrc/framework/OpCommand.h>
 #include <torch_npu/csrc/npu/Module.h>
 #include "utils.h"
+/*
+ * How to write a meta implementation for a custom operator (meta kernel):
+ *
+ * Meta implementations are used for shape and dtype inference, tracing, and export.
+ * They do NOT perform any real computation or allocate device memory.
+ * Instead, they return empty tensors with the correct shapes, dtypes, and device types.
+ *
+ * Steps to write a meta implementation:
+ * 1. The function signature should match the operator's schema, but only use the arguments
+ *    necessary to infer output shapes and dtypes.
+ * 2. Use input tensor shapes, dtypes, and any relevant arguments to compute the output shapes.
+ * 3. Return empty tensors (e.g., at::empty_symint, at::empty_like) with the correct shape and dtype.
+ * 4. Do NOT perform any real computation or data movement.
+ * 5. Register the meta implementation with the "Meta" dispatch key using TORCH_LIBRARY_IMPL or similar.
+ *
+ * Example:
+ *   std::tuple<at::Tensor, at::Tensor> my_op_meta(
+ *       at::Tensor &input, int64_t some_param) {
+ *     // Infer output shape based on input and parameters
+ *     auto out_shape = ...;
+ *     at::Tensor out = at::empty_symint(out_shape, input.options());
+ *     // Return empty tensor(s) with correct shape/dtype
+ *     return {out, ...};
+ *   }
+ *
+ * See below for real examples.
+ */
 
 namespace vllm_ascend {
 namespace meta {
diff --git a/vllm_ascend/ops/meta_registration.py b/vllm_ascend/ops/meta_registration.py
@@ -1,8 +1,48 @@
 import torch
 from torch.library import Library
 
+# This file provides a template and registration utilities for writing "meta" implementations
+# of custom operators in Python for the vllm_ascend project.
+#
+# We offer two ways to implement meta implementations for custom ops:
+#   1. Python meta implementation (as shown in this file): Write a Python function that
+#      takes the same arguments as your operator and returns empty tensors with the correct
+#      shapes and dtypes. This is useful for rapid prototyping and for ops that are only
+#      used in Python.
+#   2. C++ meta implementation: You can also implement the meta function in C++ for better
+#      performance or to match the C++ op logic more closely. See `torch_binding_meta.cpp`
+#      for examples of C++ meta implementations and how to register them.
+#
+# Both approaches enable tracing, export, and shape inference in PyTorch and vLLM, which
+# is essential for supporting `torch.compile` and aclgraph.
+
+
+# How to add a new meta implementation in Python:
+# -------------------------------------
+# 1. Write a Python function that takes the same arguments as your operator, and returns
+#    empty tensors (using torch.empty_like, torch.empty, etc.) with the correct shapes and dtypes.
+#    Do NOT perform any real computation or allocate device memory.
+#
+# 2. Register your meta function using `register_meta_if_necessary`, providing:
+#    - The namespace (usually "_C" for custom ops)
+#    - The operator name (as registered in C++)
+#    - The Python meta function
+#    - (Optional) The overload name, if your op has overloads
+#
+# 3. The registration utility will check if a meta implementation already exists for your op,
+#    and only register if necessary. This avoids duplicate registrations.
+#
+# 4. Example meta implementations are provided below for rotary_embedding and get_masked_input_and_mask.
+#
+# 5. When developing new custom ops, always provide a meta implementation to enable tracing,
+#    export, and shape inference in PyTorch and vLLM to enable the capture of `torch.compile` 
+#    and aclgraph.
+#
+# For more details, see: https://pytorch.org/docs/stable/notes/extending.html#meta-tensors
+
 lib = Library("_C", "IMPL")
 
+
 def register_meta_if_necessary(ns:str, op_name: str, fn, overload: str = ""):
   if overload != "":
     op_name = op_name + "." + overload