@@ -110,6 +110,27 @@ def configure(self, config: VllmConfig):
110110 self .post_cleanup = PostCleanupPass (config )
111111 self .fix_functionalization = FixFunctionalizationPass (config )
112112
113+ # [HACK: Bug with Inductor graph partition and torch.compile cache]
114+ # In PyTorch 2.9, torch.compile has a bug where the graph
115+ # partition is not taken into account during caching.
116+ # Because vLLM's Mode.VLLM_COMPILE is the only mode that uses
117+ # Inductor graph partition, and VLLM_COMPILE implies there
118+ # is a PostGradPassManager, we put the list of operators to graph
119+ # partition into the PostGradPassManager's uuid (which
120+ # then gets incorporated into Inductor's FX graph cache key).
121+ # Remove this hack whenever torch.compile fixes it.
122+
123+ # This is the list of operators that vLLM asks Inductor to split.
124+ self .inductor_splitting_ops = []
125+ if (
126+ config .compilation_config .use_inductor_graph_partition
127+ and config .compilation_config .splitting_ops is not None
128+ ):
129+ # Sort them so we're not dependent on the ordering.
130+ self .inductor_splitting_ops = sorted (
131+ config .compilation_config .splitting_ops
132+ )
133+
113134 def add (self , pass_ : InductorPass ):
114135 assert isinstance (pass_ , InductorPass )
115136 self .passes .append (pass_ )
@@ -120,8 +141,16 @@ def uuid(self):
120141 affects compilation caching. Its uuid depends on the UUIDs of all
121142 dependent passes and the pass config. See InductorPass for more info.
122143 """
123- state = {"pass_config" : self .pass_config .uuid (), "passes" : []}
144+ state = {
145+ "pass_config" : self .pass_config .uuid (),
146+ "passes" : [],
147+ "inductor_splitting_ops" : [],
148+ }
124149 for pass_ in self .passes :
125150 state ["passes" ].append (pass_ .uuid ())
126151 state ["passes" ].append (self .fix_functionalization .uuid ())
152+
153+ # See [HACK: Bug with Inductor graph partition and torch.compile cache]
154+ state ["inductor_splitting_ops" ].extend (self .inductor_splitting_ops )
155+
127156 return InductorPass .hash_dict (state )
0 commit comments