From 6c722d32139c45f8015c3ebe026758a0d8d5e4e9 Mon Sep 17 00:00:00 2001
From: Elias Ellison <eellison@fb.com>
Date: Tue, 7 Apr 2020 09:39:56 -0700
Subject: [PATCH] [JIT] Optimize before inlining (#35562)

Summary:
Resubmit of https://github.com/pytorch/pytorch/pull/35424, only this time I run optimizations in the right order so the PR description is actually true.

This speeds up the inlining pass of FairSeq model from 180s -> 13s, and MaskRCNN model from 5s -> 1.5s.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/35562

Differential Revision: D20738922

Pulled By: eellison

fbshipit-source-id: 1439cf9d1f0bc780e2d64a744694f8b3b7ba4b70
---
 .../TestTensorBoard.test_pytorch_graph.expect | 46 ++++++++-----------
 test/test_jit.py                              | 14 +++++-
 torch/csrc/jit/api/function_impl.cpp          | 12 ++++-
 3 files changed, 41 insertions(+), 31 deletions(-)

diff --git a/test/expect/TestTensorBoard.test_pytorch_graph.expect b/test/expect/TestTensorBoard.test_pytorch_graph.expect
index b13e4a80c381a..52d232c98778e 100644
--- a/test/expect/TestTensorBoard.test_pytorch_graph.expect
+++ b/test/expect/TestTensorBoard.test_pytorch_graph.expect
@@ -26,7 +26,7 @@ node {
 node {
   name: "output/output.1"
   op: "IO Node"
-  input: "myLinear/Linear[l]/22"
+  input: "myLinear/Linear[l]/21"
   attr {
     key: "_output_shapes"
     value {
@@ -50,7 +50,17 @@ node {
   }
 }
 node {
-  name: "myLinear/Linear[l]/bias/17"
+  name: "myLinear/Linear[l]/17"
+  op: "prim::Constant"
+  attr {
+    key: "attr"
+    value {
+      s: "{ value : 1}"
+    }
+  }
+}
+node {
+  name: "myLinear/Linear[l]/bias/18"
   op: "prim::GetAttr"
   input: "myLinear/Linear[l]/weight/14"
   attr {
@@ -61,7 +71,7 @@ node {
   }
 }
 node {
-  name: "myLinear/Linear[l]/weight/18"
+  name: "myLinear/Linear[l]/weight/19"
   op: "prim::GetAttr"
   input: "myLinear/Linear[l]/weight/14"
   attr {
@@ -72,9 +82,9 @@ node {
   }
 }
 node {
-  name: "myLinear/Linear[l]/19"
+  name: "myLinear/Linear[l]/20"
   op: "aten::t"
-  input: "myLinear/Linear[l]/weight/18"
+  input: "myLinear/Linear[l]/weight/19"
   attr {
     key: "_output_shapes"
     value {
@@ -97,34 +107,14 @@ node {
     }
   }
 }
-node {
-  name: "myLinear/Linear[l]/20"
-  op: "prim::Constant"
-  attr {
-    key: "attr"
-    value {
-      s: "{ value : 1}"
-    }
-  }
-}
 node {
   name: "myLinear/Linear[l]/21"
-  op: "prim::Constant"
-  attr {
-    key: "attr"
-    value {
-      s: "{ value : 1}"
-    }
-  }
-}
-node {
-  name: "myLinear/Linear[l]/22"
   op: "aten::addmm"
-  input: "myLinear/Linear[l]/bias/17"
+  input: "myLinear/Linear[l]/bias/18"
   input: "input/input"
-  input: "myLinear/Linear[l]/19"
   input: "myLinear/Linear[l]/20"
-  input: "myLinear/Linear[l]/21"
+  input: "myLinear/Linear[l]/17"
+  input: "myLinear/Linear[l]/17"
   attr {
     key: "_output_shapes"
     value {
diff --git a/test/test_jit.py b/test/test_jit.py
index 82538a96d3c4a..bca71e629e4fb 100644
--- a/test/test_jit.py
+++ b/test/test_jit.py
@@ -5089,7 +5089,7 @@ def fn2(x):
         with self.capture_stdout():
             traced = torch.jit.trace(fn, [torch.ones(2, 2)])
 
-        FileCheck().check("goodbye").check("hello").run(traced.graph)
+        FileCheck().check("goodbye").run(traced.graph)
 
     def test_big_int_literals(self):
         def ok():
@@ -11360,6 +11360,18 @@ def check(mod):
         imported = self.getExportImportCopy(traced)
         check(imported)
 
+    def test_inlining_cleanup(self):
+        def foo(x):
+            return F.linear(x, x)
+
+        @torch.jit.script
+        def fee(x):
+            return foo(x)
+
+        # inlining optimizations should have cleaned up linear if statement
+        self.run_pass("inline", fee.graph)
+        FileCheck().check_not("prim::If").run(fee.graph)
+
     def test_trace_export_fns_recursive(self):
         class Foo(torch.nn.Module):
             def __init__(self):
diff --git a/torch/csrc/jit/api/function_impl.cpp b/torch/csrc/jit/api/function_impl.cpp
index c4eabda759ab1..712f501d43eda 100644
--- a/torch/csrc/jit/api/function_impl.cpp
+++ b/torch/csrc/jit/api/function_impl.cpp
@@ -2,6 +2,9 @@
 #include <torch/csrc/jit/passes/inliner.h>
 
 #include <torch/csrc/jit/frontend/error_report.h>
+#include <torch/csrc/jit/passes/constant_pooling.h>
+#include <torch/csrc/jit/passes/constant_propagation.h>
+#include <torch/csrc/jit/passes/peephole.h>
 
 namespace torch {
 namespace jit {
@@ -66,9 +69,14 @@ const c10::FunctionSchema& GraphFunction::getSchema() const {
 }
 
 void preoptimizeGraph(std::shared_ptr<Graph>& graph) {
-  // TODO: Invoke cleanup passes before and after inlining to reduce amount of
-  // code we're copying.
   Inline(*graph);
+  // Peephole Optimize cleans up many "is None" checks and creates constant prop
+  // opportunities
+  PeepholeOptimize(graph);
+  // // AliasDb construction can be slow, so run it just on immutable types
+  // // to clean up constant Ifs & other easy wins
+  ConstantPropagationImmutableTypes(graph);
+  ConstantPooling(graph);
 }
 
 } // namespace jit