Merge branch 'main' into main

LeiWang1999 · web-flow · commit 73d866e94b9c · 2025-05-10T17:06:22.000+08:00
diff --git a/examples/convolution/example_convolution.py b/examples/convolution/example_convolution.py
@@ -232,28 +232,7 @@ def main(
     return main
 
 
-def main(n=128, c=128, h=64, w=64, f=128, k=3, s=1, d=1, p=1, use_autotune=True, with_roller=True):
-    N, C, H, W, F, K, S, D, P = n, c, h, w, f, k, s, d, p
-    a = torch.randn(N, H, W, C).cuda().half()
-    b = torch.randn(K, K, C, F).cuda().half()
-    use_autotune = use_autotune
-    with_roller = with_roller
-    if use_autotune:
-        result = get_best_config(N, C, H, W, F, K, S, D, P, with_roller)
-        print(f"best latency {result.latency}")
-        kernel = result.kernel
-    else:
-        kernel = tilelang.compile(
-            convolution(N, C, H, W, F, K, S, D, P, 64, 128, 32, 3, 256), out_idx=[2])
-
-    out_c = kernel(a, b)
-    ref_c = ref_program(S, P, D)(a, b)
-    print(out_c)
-    print(ref_c)
-    # torch.testing.assert_close(out_c, ref_c, rtol=1e-2, atol=1e-2)
-
-
-if __name__ == "__main__":
+def main(argv=None):
     parser = argparse.ArgumentParser()
     parser.add_argument('--n', type=int, default=128, help='n')
     parser.add_argument('--c', type=int, default=128, help='c')
@@ -274,6 +253,25 @@ def main(n=128, c=128, h=64, w=64, f=128, k=3, s=1, d=1, p=1, use_autotune=True,
         action="store_true",
         default=True,
         help="Whether to enable BitBLAS roller for search space")
-    args = parser.parse_args()
-    main(args.n, args.c, args.h, args.w, args.f, args.k, args.s, args.d, args.p, args.use_autotune,
-         args.with_roller)
+
+    args = parser.parse_args(argv)
+    N, C, H, W, F, K, S, D, P = args.n, args.c, args.h, args.w, args.f, args.k, args.s, args.d, args.p
+    a = torch.randn(N, H, W, C).cuda().half()
+    b = torch.randn(K, K, C, F).cuda().half()
+    use_autotune = args.use_autotune
+    with_roller = args.with_roller
+    if use_autotune:
+        result = get_best_config(N, C, H, W, F, K, S, D, P, with_roller)
+        print(f"best latency {result.latency}")
+        kernel = result.kernel
+    else:
+        kernel = tilelang.compile(
+            convolution(N, C, H, W, F, K, S, D, P, 64, 128, 32, 3, 256), out_idx=[2])
+
+    out_c = kernel(a, b)
+    ref_c = ref_program(S, P, D)(a, b)
+    torch.testing.assert_close(out_c, ref_c, rtol=1e-2, atol=1e-2)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/convolution/test_example_convolution.py b/examples/convolution/test_example_convolution.py
@@ -7,7 +7,7 @@
 
 @tilelang.testing.requires_cuda
 def test_example_convolution():
-    example_convolution.main()
+    example_convolution.main([])
 
 
 if __name__ == "__main__":