File tree Expand file tree Collapse file tree 1 file changed +16
-0
lines changed Expand file tree Collapse file tree 1 file changed +16
-0
lines changed Original file line number Diff line number Diff line change 8989else :
9090 xgr = LazyLoader ("xgr" , globals (), "xgrammar" )
9191
92+ import torch_npu
9293import vllm .envs as envs_vllm
9394
9495import vllm_ascend .envs as envs_ascend
9596
97+ if is_310p ():
98+ torch_npu .npu .set_compile_mode (jit_compile = False )
99+
96100
97101@dataclass
98102class GraphCaptureContext :
@@ -1991,6 +1995,18 @@ def load_model(self) -> None:
19911995
19921996 with DeviceMemoryProfiler () as m : # noqa: SIM117
19931997 self .model = get_model (vllm_config = self .vllm_config )
1998+
1999+ if is_310p ():
2000+ from vllm .model_executor .layers .linear import (
2001+ MergedColumnParallelLinear , QKVParallelLinear ,
2002+ RowParallelLinear )
2003+ for module in self .model .modules ():
2004+ if isinstance (module ,
2005+ (MergedColumnParallelLinear ,
2006+ QKVParallelLinear , RowParallelLinear )):
2007+ module .weight .data = torch_npu .npu_format_cast (
2008+ module .weight .data , ACL_FORMAT_FRACTAL_NZ )
2009+
19942010 try :
19952011 # For version compatibility, remove this after we abort vllm v0.9.1 support
19962012 from vllm .model_executor .models .interfaces import \
You can’t perform that action at this time.
0 commit comments