@@ -301,30 +301,13 @@ def patched_inline_call(parent, func, args, kwargs):
301301 logger .debug (
302302 "enable_cpp_symbolic_shape_guards config not available" )
303303
304- with patch .object (InliningInstructionTranslator , "inline_call" ,
305- patched_inline_call ), torch ._dynamo .config .patch (
306- ** dynamo_config_patches
307- << < << << HEAD
308- ), maybe_use_cudagraph_partition_wrapper (
309- self .vllm_config ):
310- from vllm .model_executor .parameter import (
311- BasevLLMParameter , ModelWeightParameter , RowvLLMParameter ,
312- _ColumnvLLMParameter )
313- with (
314- torch ._dynamo .config .patch (
315- "traceable_tensor_subclasses" , [
316- BasevLLMParameter , ModelWeightParameter ,
317- _ColumnvLLMParameter , RowvLLMParameter
318- ]),
319- patch (
320- "torch._dynamo.variables.torch.can_dispatch_torch_function" , # noqa: E501
321- return_false )):
322- output = self .compiled_callable (* args , ** kwargs )
323- == == == =
324- ), _torch27_patch_tensor_subclasses ():
325-
304+ with patch .object (
305+ InliningInstructionTranslator , "inline_call" ,
306+ patched_inline_call ), torch ._dynamo .config .patch (
307+ ** dynamo_config_patches
308+ ), maybe_use_cudagraph_partition_wrapper (
309+ self .vllm_config ), _torch27_patch_tensor_subclasses ():
326310 output = self .compiled_callable (* args , ** kwargs )
327- >> >> >> > 9 adfb5582 (break out function , gate torch )
328311 return output
329312
330313 # usually, capturing the model once is enough, and then we can
@@ -338,7 +321,6 @@ def patched_inline_call(parent, func, args, kwargs):
338321 return cls
339322
340323
341- < << << << HEAD
342324@contextlib .contextmanager
343325def maybe_use_cudagraph_partition_wrapper (vllm_config : VllmConfig ):
344326 """
@@ -386,8 +368,9 @@ def customized_cudagraph_wrapper(f,
386368 if (compilation_config .cudagraph_mode != CUDAGraphMode .NONE
387369 and compilation_config .use_inductor_graph_partition ):
388370 torch ._inductor .utils .set_customized_partition_wrappers (None )
389- == == == =
390- @contextlib .contextmanger
371+
372+
373+ @contextlib .contextmanager
391374def _torch27_patch_tensor_subclasses ():
392375 from vllm .model_executor .parameter import (BasevLLMParameter ,
393376 ModelWeightParameter ,
@@ -411,4 +394,3 @@ def return_false(*args, **kwargs):
411394 "torch._dynamo.variables.torch.can_dispatch_torch_function" , # noqa: E501
412395 return_false )):
413396 yield
414- > >> >> >> 9 adfb5582 (break out function , gate torch )
0 commit comments