orenwang
diff --git a/‎aten/src/ATen/native/ReduceOps.cpp‎
Lines changed: 0 additions & 61 deletions b/‎aten/src/ATen/native/ReduceOps.cpp‎
Lines changed: 0 additions & 61 deletions
diff --git a/‎aten/src/ATen/native/TensorCompare.cpp‎
Lines changed: 0 additions & 8 deletions b/‎aten/src/ATen/native/TensorCompare.cpp‎
Lines changed: 0 additions & 8 deletions
diff --git a/‎aten/src/ATen/native/TensorShape.cpp‎
Lines changed: 0 additions & 44 deletions b/‎aten/src/ATen/native/TensorShape.cpp‎
Lines changed: 0 additions & 44 deletions
diff --git a/‎aten/src/ATen/native/native_functions.yaml‎
Lines changed: 0 additions & 28 deletions b/‎aten/src/ATen/native/native_functions.yaml‎
Lines changed: 0 additions & 28 deletions
diff --git a/‎test/cpp/jit/test_misc.h‎
Lines changed: 3 additions & 37 deletions b/‎test/cpp/jit/test_misc.h‎
Lines changed: 3 additions & 37 deletions
diff --git a/‎test/expect/TestFuser.test_lstm_cuda-backward.expect‎
Lines changed: 20 additions & 20 deletions b/‎test/expect/TestFuser.test_lstm_cuda-backward.expect‎
Lines changed: 20 additions & 20 deletions
diff --git a/‎test/expect/TestFuser.test_lstm_cuda-forward.expect‎
Lines changed: 8 additions & 10 deletions b/‎test/expect/TestFuser.test_lstm_cuda-forward.expect‎
Lines changed: 8 additions & 10 deletions
@@ -335,42 +335,6 @@ Tensor& sum_out(Tensor& result, const Tensor& self, IntArrayRef dim, ScalarType
   return at::native::sum_out(result, self, dim, false, dtype);
 }
 
-int64_t _safe_size(IntArrayRef sizes, IntArrayRef dim) {
-  int64_t size = 1;
-  if (sizes.size() == 0) {
-    return 1;
-  }
-  for (auto d : dim) {
-    d = at::maybe_wrap_dim(d, sizes.size());
-    size *= sizes[d];
-  }
-  return size;
-}
-
-Tensor unsqueeze_multiple(const Tensor & t, IntArrayRef dim, size_t n_dims) {
-    auto dims_to_unsqueeze = at::dim_list_to_bitset(dim, n_dims);
-    Tensor res = t;
-    for (size_t i = 0; i < n_dims; i++){
-      if (dims_to_unsqueeze[i]) {
-        res = res.unsqueeze(i);
-      }
-    }
-    return res;
-}
-
-Tensor sum_backward(const Tensor & grad, IntArrayRef sizes, IntArrayRef dims, bool keepdim) {
-  if (!keepdim && sizes.size() > 0) {
-    if (dims.size()==1) {
-      return grad.unsqueeze(dims[0]).expand(sizes);
-    } else {
-      Tensor res = unsqueeze_multiple(grad, dims, sizes.size());
-      return res.expand(sizes);
-    }
-  } else {
-    return grad.expand(sizes);
-  }
-}
-
 Tensor& prod_out(Tensor& result, const Tensor& self, int64_t dim, bool keepdim, ScalarType dtype) {
   return at::native::prod_out(
       result, self, dim, keepdim, c10::optional<ScalarType>(dtype));
@@ -452,16 +416,6 @@ Tensor logsumexp(const Tensor &self, IntArrayRef dims, bool keepdim) {
   return at::native::logsumexp_out(result, self, dims, keepdim);
 }
 
-Tensor logsumexp_backward(const Tensor& grad, const Tensor & self, const Tensor& res, IntArrayRef dim, bool keepdim) {
-  Tensor grad_input = grad;
-  Tensor fwd_res = res;
-  if (!keepdim && self.dim() != 0) {
-    grad_input = unsqueeze_multiple(grad, dim, self.sizes().size());
-    fwd_res = unsqueeze_multiple(res, dim, self.sizes().size());
-  }
-  return grad_input * (self - fwd_res).exp();
-}
-
 static Tensor& norm_out(Tensor &result, const Tensor &self, optional<Scalar> opt_p,
                                IntArrayRef dim, bool keepdim, optional<ScalarType> opt_dtype) {
   auto p = opt_p.value_or(2.0);
@@ -674,21 +628,6 @@ Tensor &var_out(Tensor &result, const Tensor &self, IntArrayRef dim, bool unbias
   return std_var_out(result, self, dim, unbiased, keepdim, false);
 }
 
-Tensor var_backward(const Tensor & grad, const Tensor & self, bool unbiased) {
-  return (2.0 / (self.numel() - unbiased)) * grad * (self - self.mean());
-}
-
-Tensor var_backward(const Tensor & grad, const Tensor & self, IntArrayRef dim, bool unbiased, bool keepdim) {
-  if (self.dim() == 0) {
-    return at::var_backward(grad, self, unbiased);
-  }
-  Tensor unsqueezed_grad = grad;
-  if (!keepdim && self.dim() > 1) {
-    unsqueezed_grad = unsqueeze_multiple(grad, dim, self.sizes().size());
-  }
-  return (2.0 / (at::_safe_size(self.sizes(), dim) - unbiased)) * unsqueezed_grad * (self - self.mean(dim, true));
-}
-
 Tensor std(const Tensor& self, bool unbiased) {
   AT_CHECK(self.type().backend() == Backend::CPU || self.type().backend() == Backend::CUDA,
            "std only supports CPU AND CUDA backend, got: ", toString(self.type().backend()));
 
@@ -34,14 +34,6 @@ namespace at { namespace native {
 DEFINE_DISPATCH(max_kernel);
 DEFINE_DISPATCH(min_kernel);
 
-Tensor index_select_backward(const Tensor& grad, int64_t dim, const Tensor& indices, IntArrayRef sizes, bool keepdim) {
-  Tensor res = at::zeros(sizes, grad.options());
-  if (!keepdim && sizes.size() > 0) {
-    return res.scatter_(dim, indices.unsqueeze(dim), grad.unsqueeze(dim));
-  }
-  return res.scatter_(dim, indices, grad);
-}
-
 bool allclose(const Tensor& self, const Tensor& other, double rtol, double atol, bool equal_nan) {
   return at::isclose(self, other, rtol, atol, equal_nan).all().item<uint8_t>();
 }
 
@@ -384,16 +384,6 @@ Tensor permute(const Tensor& self, IntArrayRef dims) {
   return self.as_strided(newSizes, newStrides);
 }
 
-Tensor permute_backwards(const Tensor & grad, IntArrayRef fwd_dims) {
-  // invert the permutation
-  auto ndims = fwd_dims.size();
-  std::vector<int64_t> dims(ndims);
-  for (size_t i = 0; i < ndims; i++) {
-    dims[at::maybe_wrap_dim(fwd_dims[i], ndims)] = i;
-  }
-  return grad.permute(dims);
-}
-
 Tensor repeat(const Tensor& self, IntArrayRef repeats) {
   AT_CHECK(repeats.size() >= (size_t)self.dim(),
            "Number of dimensions of repeat dims can not be smaller than number of dimensions of tensor");
@@ -461,12 +451,6 @@ Tensor select(const Tensor& self, int64_t dim, int64_t index) {
   return self.as_strided(sizes, strides, storage_offset);
 }
 
-Tensor select_backward(const Tensor& grad, IntArrayRef input_sizes, int64_t dim, int64_t index) {
-  auto grad_input = at::zeros(input_sizes, grad.options());
-  grad_input.select(dim, index).copy_(grad);
-  return grad_input;
-}
-
 Tensor slice(const Tensor& self, int64_t dim, int64_t start, int64_t end, int64_t step) {
   int64_t ndim = self.dim();
   if (ndim == 0) {
@@ -500,12 +484,6 @@ Tensor slice(const Tensor& self, int64_t dim, int64_t start, int64_t end, int64_
   return self.as_strided(sizes, strides, storage_offset);
 }
 
-Tensor slice_backward(const Tensor& grad, IntArrayRef input_sizes, int64_t dim, int64_t start, int64_t end, int64_t step) {
-  auto grad_input = at::zeros(input_sizes, grad.options());
-  grad_input.slice(dim, start, end, step).copy_(grad);
-  return grad_input;
-}
-
 std::vector<Tensor> split(const Tensor& self, int64_t split_size, int64_t dim) {
   AT_CHECK(self.dim() != 0, "split expects at least a 1-dimensional tensor");
   AT_CHECK(split_size >= 0,  "split expects split_size be non-negative, but got split_size=", split_size);
@@ -712,28 +690,6 @@ Tensor squeeze(const Tensor& self) {
   return self.as_strided(std::get<0>(g), std::get<1>(g));
 }
 
-Tensor unsqueeze_to(const Tensor & self, IntArrayRef sizes) {
-  auto result = self;
-
-  int64_t nDims = sizes.size();
-  for (int64_t dim = 0; dim < nDims; dim++) {
-    if (sizes[dim] == 1) {
-      result = result.unsqueeze(dim);
-    }
-  }
-  return result;
-}
-
-Tensor unsqueeze_to(const Tensor & self, int64_t dim, IntArrayRef sizes) {
-  dim = at::maybe_wrap_dim(dim, sizes.size());
-  // in NumPy it's not an error to unsqueeze a scalar, but we still need to avoided
-  // unsqueezing in the backward.
-  if (sizes.size() > 0 && sizes[dim] == 1) {
-    return self.unsqueeze(dim);
-  }
-  return self;
-}
-
 Tensor squeeze(const Tensor& self, int64_t dim) {
   int64_t dims = self.dim();
   dim = maybe_wrap_dim(dim, dims);
 
@@ -59,10 +59,6 @@
   dispatch:
     CUDA: _cudnn_init_dropout_state
 
-- func: index_select_backward(Tensor grad, int64_t dim, Tensor indices, int[] sizes, bool keepdim) -> Tensor
-
-- func: select_backward(Tensor grad, int[] input_sizes, int64_t dim, int64_t index) -> Tensor
-
 - func: _fused_dropout(Tensor self, float p, Generator? generator=None) -> (Tensor, Tensor)
   matches_jit_signature: True
   variants: function
@@ -1335,9 +1331,6 @@
 - func: logsumexp(Tensor self, int[1] dim, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)
   matches_jit_signature: True
 
-- func: logsumexp_backward(Tensor grad, Tensor self, Tensor res, int[1] dim, bool keepdim) -> Tensor
-  matches_jit_signature: True
-
 - func: margin_ranking_loss(Tensor input1, Tensor input2, Tensor target, float margin=0.0, int reduction=Mean) -> Tensor
   matches_jit_signature: True
 
@@ -1410,9 +1403,6 @@
 - func: mean(Tensor self, int[1] dim, *, ScalarType dtype, Tensor(a!) out) -> Tensor(a!)
   matches_jit_signature: True
 
-- func: sum_backward(Tensor grad, int[] sizes, int[] dims, bool keepdim) -> Tensor
-  matches_jit_signature: True
-
 - func: median(Tensor self, int dim, bool keepdim=False) -> (Tensor values, Tensor indices)
   matches_jit_signature: True
   variants: function, method
@@ -1633,9 +1623,6 @@
   matches_jit_signature: True
   variants: method  # This is method-only to match the previous tensor API. In the future we could make this a function too.
 
-- func: permute_backwards(Tensor grad, int[] fwd_dims) -> Tensor
-  matches_jit_signature: True
-
 - func: pixel_shuffle(Tensor self, int upscale_factor) -> Tensor
   matches_jit_signature: True
 
@@ -1910,15 +1897,11 @@
   variants: function, method
   device_guard: False
 
-- func: _safe_size(int[] sizes, int[] dim) -> int64_t
-
 - func: slice(Tensor(a) self, int dim=0, int start=0, int end=9223372036854775807, int step=1) -> Tensor(a)
   matches_jit_signature: True
   variants: function, method
   device_guard: False
 
-- func: slice_backward(Tensor grad, int[] input_sizes, int64_t dim, int64_t start, int64_t end, int64_t step) -> Tensor
-
 - func: slogdet(Tensor self) -> (Tensor, Tensor)
   matches_jit_signature: True
   variants: function, method
@@ -2009,11 +1992,6 @@
   variants: function, method
   device_guard: False
 
-- func: unsqueeze_to(Tensor self, int[] sizes) -> Tensor
-  matches_jit_signature: True
-
-- func: unsqueeze_to(Tensor self, int64_t dim, int[] sizes) -> Tensor
-
 - func: squeeze_(Tensor(a!) self) -> Tensor(a!)
   matches_jit_signature: True
   variants: method
@@ -2310,12 +2288,6 @@
 - func: var(Tensor self, int[1] dim, bool unbiased=True, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)
   matches_jit_signature: True
 
-- func: var_backward(Tensor grad, Tensor self, bool unbiased) -> Tensor
-  matches_jit_signature: True
-
-- func: var_backward(Tensor grad, Tensor self, int[] dim, bool unbiased, bool keepdim) -> Tensor
-  matches_jit_signature: True
-
 - func: view_as(Tensor self, Tensor other) -> Tensor
   matches_jit_signature: True
   variants: method
 
@@ -848,24 +848,7 @@ void testDifferentiate(std::ostream& out = std::cout) {
 
   auto grad_spec = differentiate(graph);
   std::vector<size_t> expected_captured_inputs = {0, 1};
-  // With add/mul implemented using torchscript, we passes sizes of
-  // self & other instead passing the tensors themselve.
-  // The forward graph is now
-  //graph(%0 : Float(2, 3, 4)
-  //      %1 : Float(2, 3, 4)) {
-  //  %2 : Float(2, 3, 4) = aten::mul(%0, %1)
-  //  %self_size.4 : int[] = aten::size(%0)
-  //  %other_size.4 : int[] = aten::size(%1)
-  //  %3 : Float(2, 3, 4) = aten::mul(%2, %0)
-  //  %self_size.2 : int[] = aten::size(%2)
-  //  %4 : int = prim::Constant[value=1]()
-  //  %7 : int[] = aten::size(%3)
-  //  %5 : Float(2, 3, 4) = aten::add(%3, %1, %4)
-  //  return (%5, %2, %self_size.4, %other_size.4, %self_size.2, %7);
-  //}
-  // Thus all the sizes info added in forward outputs are saved
-  // in grad_spec.df_input_caputered_outputs.
-  std::vector<size_t> expected_captured_outputs = {1, 2, 3, 4, 5};
+  std::vector<size_t> expected_captured_outputs = {1, 2};
   std::vector<size_t> expected_input_vjps = {0, 1};
   std::vector<size_t> expected_output_vjps = {0, 1};
   ASSERT_EQ(grad_spec.f_real_outputs, 1);
@@ -897,29 +880,12 @@ void testDifferentiateWithRequiresGrad(std::ostream& out = std::cout) {
   PropagateInputShapes(graph);
   PropagateRequiresGrad(graph);
 
-  // With add/mul implemented using torchscript, we passes sizes of
-  // self & other instead passing the tensors themselve.
-  // The forward graph is now
-  // graph(%0 : Float(*)
-  //       %1 : Float(*)) {
-  //   %2 : Float(*) = aten::mul(%1, %1)
-  //   %3 : int = prim::Constant[value=1]()
-  //   %4 : Float(*) = aten::add(%2, %1, %3)
-  //   %39 : int[] = aten::size(%0)
-  //   %6 : Float(*) = aten::add(%4, %0, %3)
-  //   %7 : Float(*) = aten::mul(%6, %0)
-  //   %self_size.2 : int[] = aten::size(%6)
-  //   %11 : int[] = aten::size(%7)
-  //   %9 : Float(*) = aten::add(%7, %1, %3)
-  //   return (%4, %9, %39, %6, %self_size.2, %11);
-  // }
-
   auto grad_spec = differentiate(graph);
-  std::vector<size_t> expected_input_vjps = {1, 3}; // for e and %6 = (d + a)
+  std::vector<size_t> expected_input_vjps = {1, 2}; // for e and %4 = (d + a)
   std::vector<size_t> expected_output_vjps = {0}; // only a requires grad
   ASSERT_EQ(grad_spec.f_real_outputs, 2);
   ASSERT_EQ(grad_spec.df_input_captured_inputs, std::vector<size_t>({0}));
-  ASSERT_EQ(grad_spec.df_input_captured_outputs, std::vector<size_t>({2, 3, 4, 5}));
+  ASSERT_EQ(grad_spec.df_input_captured_outputs, std::vector<size_t>({2, 3}));
   ASSERT_EQ(grad_spec.df_input_vjps, expected_input_vjps);
   ASSERT_EQ(grad_spec.df_output_vjps, expected_output_vjps);
   out << "testDifferentiateWithRequiresGrad\n";
 
@@ -22,35 +22,35 @@ graph(%0 : Float(*, *),
       %forgetgate : Float(*, *),
       %cellgate : Float(*, *),
       %outgate : Float(*, *),
-      %self_size.5 : int[],
-      %other_size.5 : int[],
-      %self_size.3 : int[],
-      %other_size.3 : int[],
-      %28 : int[],
-      %29 : int[],
-      %30 : Float(*, *),
-      %self_size.1 : int[],
-      %other_size.1 : int[]):
-  %33 : int = prim::Constant[value=1]()
-  %34 : Tensor = prim::FusionGroup_0(%outgate, %0, %30, %self_size.1)
-  %grad_other.5 : Tensor, %36 : Tensor, %37 : Tensor, %38 : Tensor = prim::FusionGroup_1(%forgetgate, %9, %ingate, %cellgate, %1, %30, %0, %outgate, %other_size.5, %self_size.5, %28, %other_size.3, %self_size.3, %29, %other_size.1)
+      %24 : int[],
+      %25 : int[],
+      %26 : Float(*, *)):
+  %27 : int = prim::Constant[value=1]()
+  %28 : int[] = aten::size(%outgate)
+  %29 : int[] = aten::size(%26)
+  %30 : int[] = aten::size(%ingate)
+  %31 : int[] = aten::size(%cellgate)
+  %32 : int[] = aten::size(%forgetgate)
+  %33 : int[] = aten::size(%9)
+  %34 : Tensor = prim::FusionGroup_0(%outgate, %0, %26, %28)
+  %grad_other.5 : Tensor, %36 : Tensor, %37 : Tensor, %38 : Tensor = prim::FusionGroup_1(%forgetgate, %9, %ingate, %cellgate, %1, %26, %0, %outgate, %33, %32, %24, %31, %30, %25, %29)
   %39 : Tensor[] = prim::ListConstruct(%38, %36, %37, %34)
-  %40 : Tensor = aten::cat(%39, %33)
+  %40 : Tensor = aten::cat(%39, %27)
   %41 : Tensor = aten::_grad_sum_to_size(%40, %19)
   %42 : Tensor = aten::_grad_sum_to_size(%40, %17)
   %43 : Tensor = aten::_grad_sum_to_size(%40, %14)
   %44 : Tensor = aten::_grad_sum_to_size(%40, %15)
   %45 : Float(*, *) = aten::t(%13)
-  %46 : Float(*, *) = aten::mm(%44, %45)
+  %grad_self.7 : Float(*, *) = aten::mm(%44, %45)
   %47 : Float(*, *) = aten::t(%10)
-  %48 : Float(*, *) = aten::mm(%47, %44)
-  %grad_self.7 : Float(*, *) = aten::t(%48)
+  %grad_mat2.1 : Float(*, *) = aten::mm(%47, %44)
+  %grad_self.9 : Float(*, *) = aten::t(%grad_mat2.1)
   %50 : Float(*, *) = aten::t(%12)
-  %51 : Float(*, *) = aten::mm(%43, %50)
+  %grad_self.11 : Float(*, *) = aten::mm(%43, %50)
   %52 : Float(*, *) = aten::t(%11)
-  %53 : Float(*, *) = aten::mm(%52, %43)
-  %grad_self.9 : Float(*, *) = aten::t(%53)
-  return (%grad_other.5, %41, %42, %46, %grad_self.7, %51, %grad_self.9)
+  %grad_mat2.3 : Float(*, *) = aten::mm(%52, %43)
+  %grad_self.13 : Float(*, *) = aten::t(%grad_mat2.3)
+  return (%grad_other.5, %41, %42, %grad_self.7, %grad_self.9, %grad_self.11, %grad_self.13)
 with prim::FusionGroup_0 = graph(%0 : Float(*, *),
       %1 : Float(*, *),
       %2 : Float(*, *),
 
@@ -28,16 +28,14 @@ with prim::DifferentiableGraph_0 = graph(%0 : Float(*, *),
   %17 : Tensor, %18 : Tensor, %19 : Tensor, %20 : Tensor = prim::ListUnpack(%16)
   %21 : int[] = prim::BroadcastSizes(%11, %12)
   %22 : int[] = prim::BroadcastSizes(%21, %13)
-  %other_size.6 : int[] = aten::size(%0)
-  %hy : Float(*, *), %25 : Float(*, *), %cy : Float(*, *), %outgate.1 : Float(*, *), %cellgate.1 : Float(*, *), %forgetgate.1 : Float(*, *), %ingate.1 : Float(*, *) = prim::FusionGroup_0(%0, %20, %19, %18, %17)
-  %31 : int[] = aten::size(%25)
-  %32 : int[] = aten::size(%outgate.1)
-  %33 : int[] = aten::size(%cellgate.1)
-  %34 : int[] = aten::size(%forgetgate.1)
-  %35 : int[] = aten::size(%ingate.1)
-  %36 : int[] = prim::BroadcastSizes(%34, %other_size.6)
-  %37 : int[] = prim::BroadcastSizes(%35, %33)
-  return (%hy, %cy, %7, %9, %11, %12, %21, %13, %22, %14, %ingate.1, %forgetgate.1, %cellgate.1, %outgate.1, %34, %other_size.6, %35, %33, %36, %37, %25, %32, %31)
+  %hy : Float(*, *), %24 : Float(*, *), %cy : Float(*, *), %outgate.1 : Float(*, *), %cellgate.1 : Float(*, *), %forgetgate.1 : Float(*, *), %ingate.1 : Float(*, *) = prim::FusionGroup_0(%0, %20, %19, %18, %17)
+  %30 : int[] = aten::size(%0)
+  %31 : int[] = aten::size(%cellgate.1)
+  %32 : int[] = aten::size(%forgetgate.1)
+  %33 : int[] = aten::size(%ingate.1)
+  %34 : int[] = prim::BroadcastSizes(%32, %30)
+  %35 : int[] = prim::BroadcastSizes(%33, %31)
+  return (%hy, %cy, %7, %9, %11, %12, %21, %13, %22, %14, %ingate.1, %forgetgate.1, %cellgate.1, %outgate.1, %34, %35, %24)
 with prim::FusionGroup_0 = graph(%0 : Float(*, *),
       %1 : Tensor,
       %2 : Tensor,