@@ -2479,14 +2479,14 @@ def initialize_weights(self):
24792479 it correctly handles composite models. Indeed, depth-first recursion fails with composite models as it will usually
24802480 initialize the basic blocks (e.g. nn.Linear, nn.Embedding, etc) first, which will cause them to be initialized according
24812481 to the `_init_weights` of the outer-most model instead of the given sub-model.
2482- This function dynamically dispatches the correct `init_weights` function to the modules as we advance in the
2482+ This function dynamically dispatches the correct `init_weights` function to the modules as we advance in the
24832483 module graph along the recursion. It can handle an arbitrary number of sub-models.
24842484
24852485 Note that the `torch.no_grad()` decorator is very important as well, as most of our `_init_weights` do not use
24862486 `torch.nn.init` functions (which are all no_grad by default), but simply do in-place ops such as `module.weight.data.zero_()`.
24872487 """
24882488 if not hasattr (torch .nn .Module , "smart_apply" ):
2489- # This function is equivalent to `torch.nn.Module.apply`, except that it dynamically adjust the function
2489+ # This function is equivalent to `torch.nn.Module.apply`, except that it dynamically adjust the function
24902490 # to apply as we go down the graph
24912491 def smart_apply (self , fn ):
24922492 for module in self .children ():
@@ -2497,7 +2497,7 @@ def smart_apply(self, fn):
24972497 module .smart_apply (fn )
24982498 fn (self )
24992499 return self
2500-
2500+
25012501 torch .nn .Module .smart_apply = smart_apply
25022502
25032503 # Let the magic happen with this simple call
0 commit comments