From e0c8af1989a3a3ec233955c4d58e9ce40567e772 Mon Sep 17 00:00:00 2001 From: Hu Shenwei Date: Tue, 29 Jul 2025 19:37:05 +0800 Subject: [PATCH 1/2] fix(gemm.py): delete nvprof_nvtx_pop() --- python/paddle/incubate/fp8/deep_gemm/jit_kernels/gemm.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/python/paddle/incubate/fp8/deep_gemm/jit_kernels/gemm.py b/python/paddle/incubate/fp8/deep_gemm/jit_kernels/gemm.py index 6bf3e39e54465d..a84fbad6e30348 100644 --- a/python/paddle/incubate/fp8/deep_gemm/jit_kernels/gemm.py +++ b/python/paddle/incubate/fp8/deep_gemm/jit_kernels/gemm.py @@ -255,7 +255,6 @@ def gemm_fp8_fp8_bf16_nt( # NOTES: `get_tma_aligned_lhs_scales` may launch a kernel if not processed by previous kernels lhs_scales = get_col_major_tma_aligned_tensor(lhs_scales) assert rhs_scales.is_contiguous() - paddle.base.core.nvprof_nvtx_pop() # Do nothing if `m` is zero if m == 0: @@ -275,4 +274,3 @@ def gemm_fp8_fp8_bf16_nt( # Run the kernel. runtime(*args) - paddle.base.core.nvprof_nvtx_pop() From e8dc443fb4cadbf4df3f0d5395640a497c2cc880 Mon Sep 17 00:00:00 2001 From: Hu Shenwei Date: Wed, 30 Jul 2025 17:58:24 +0800 Subject: [PATCH 2/2] fix(compiler.py): delete nvprof_nvtx_pop() --- python/paddle/incubate/fp8/deep_gemm/jit/compiler.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/python/paddle/incubate/fp8/deep_gemm/jit/compiler.py b/python/paddle/incubate/fp8/deep_gemm/jit/compiler.py index 1a9e7943453395..2d6e27707e726b 100644 --- a/python/paddle/incubate/fp8/deep_gemm/jit/compiler.py +++ b/python/paddle/incubate/fp8/deep_gemm/jit/compiler.py @@ -23,8 +23,6 @@ import subprocess import uuid -import paddle - from ..utils import get_cuda_home from . import interleave_ffma from .runtime import Runtime, RuntimeCache @@ -158,7 +156,6 @@ def build(name: str, arg_defs: tuple, code: str) -> Runtime: if os.getenv("DG_JIT_DEBUG", None): print(f"Using cached JIT runtime {name} during build") return runtime_cache[path] - paddle.base.core.nvprof_nvtx_pop() # Write the code os.makedirs(path, exist_ok=True) args_path = f"{path}/kernel.args"