lint fix

LeiWang1999 · LeiWang1999 · commit 2b3bd541dbc9 · 2025-11-18T13:17:57.000+08:00
diff --git a/examples/quickstart.py b/examples/quickstart.py
@@ -5,10 +5,7 @@
 # @tilelang.jit(target="cuda")
 # target currently can be "cuda" or "hip" or "cpu".
 # if not specified, it will be inferred from the input tensors during compile time
-@tilelang.jit(execution_backend="tvm_ffi", pass_configs={
-    tilelang.PassConfigKey.TL_DISABLE_TMA_LOWER:True,
-    tilelang.PassConfigKey.TL_DISABLE_WARP_SPECIALIZED: True,
-})
+@tilelang.jit
 def matmul(M, N, K, block_M, block_N, block_K, dtype="float16", accum_dtype="float"):
 
     @T.prim_func
@@ -51,7 +48,7 @@ def matmul_relu_kernel(
     return matmul_relu_kernel
 
 
-M = T.dynamic("m")  # M = T.dynamic("m") if you want to use dynamic shape
+M = 1024  # M = T.dynamic("m") if you want to use dynamic shape
 N = 1024
 K = 1024
 block_M = 128
@@ -60,11 +57,10 @@ def matmul_relu_kernel(
 
 # 1. Define the kernel (matmul) and compile/lower it into an executable module
 matmul_relu_kernel = matmul(M, N, K, block_M, block_N, block_K)
-
+print(matmul_relu_kernel.get_kernel_source())
 # 3. Test the kernel in Python with PyTorch data
 import torch
 
-M = 0
 # Create random input tensors on the GPU
 a = torch.randn(M, K, device="cuda", dtype=torch.float16)
 b = torch.randn(K, N, device="cuda", dtype=torch.float16)
@@ -81,3 +77,13 @@ def matmul_relu_kernel(
 torch.testing.assert_close(c, ref_c, rtol=1e-2, atol=1e-2)
 print("Kernel output matches PyTorch reference.")
 
+# 4. Retrieve and inspect the generated CUDA source (optional)
+# cuda_source = jit_kernel.get_kernel_source()
+# print("Generated CUDA kernel:\n", cuda_source)
+
+# 5.Profile latency with kernel
+profiler = matmul_relu_kernel.get_profiler(tensor_supply_type=tilelang.TensorSupplyType.Normal)
+
+latency = profiler.do_bench()
+
+print(f"Latency: {latency} ms")
diff --git a/tilelang/jit/adapter/tvm_ffi.py b/tilelang/jit/adapter/tvm_ffi.py
@@ -122,7 +122,6 @@ def _process_dynamic_symbolic(self) -> dict[tir.Var, tuple[int, int]]:
                         dynamic_symbolic_map[stride] = (1, i, j)
         return dynamic_symbolic_map
 
-
     def _convert_torch_func(self) -> Callable[..., Any]:
         # Capture thunks that reflect Torch's current stream and device.
         # These are evaluated at call time to align TVM execution with the
@@ -264,7 +263,6 @@ def func(*inputs: torch.Tensor | Any):
 
         return func
 
-
     @classmethod
     def from_database(cls,
                       params: list[TensorType],