Reduction rand like patch (#2031)

*_like operations are not filtering out reduction domain on inputs. This resulted with output differs in shape on input. Run into this issue on hugging face benchmark with python stack. 1. updated the operation to filter input domain with noReduction; 2. added a test case to verify the breakage and fix;
csarofeen · Oct 5, 2022 · 40e2703 · 40e2703
1 parent bc77266
commit 40e2703
Show file tree

Hide file tree

Showing 2 changed files with 40 additions and 8 deletions.
diff --git a/torch/csrc/jit/codegen/cuda/arith.cpp b/torch/csrc/jit/codegen/cuda/arith.cpp
@@ -471,17 +471,18 @@ TensorView* uniform(
  return out;
 }
 
-TensorView* rand_like(TensorView* v) {
+TensorView* rand_like(TensorView* tv) {
  TORCH_CHECK(
- isFloatingPointType(v->dtype()),
+ isFloatingPointType(tv->dtype()),
  "input must have floating point type, but got ",
- v->dtype());
+ tv->dtype());
  std::vector<Val*> shape;
- shape.reserve(v->getMaybeRFactorDomain().size());
- for (auto id : v->getMaybeRFactorDomain()) {
+ auto dom = TensorDomain::noReductions(tv->getMaybeRFactorDomain());
+ shape.reserve(dom.size());
+ for (auto id : dom) {
  shape.emplace_back(id->getMaybeExpandedExtent());
  }
- return rand(shape, v->dtype());
+ return rand(shape, tv->dtype());
 }
 
 Val* rand_like(Val* v) {
@@ -505,8 +506,9 @@ TensorView* full(
 
 TensorView* full_like(TensorView* tv, Val* fill_value) {
  std::vector<Val*> shape;
- shape.reserve(tv->getMaybeRFactorDomain().size());
- for (auto id : tv->getMaybeRFactorDomain()) {
+ auto dom = TensorDomain::noReductions(tv->getMaybeRFactorDomain());
+ shape.reserve(dom.size());
+ for (auto id : dom) {
  shape.emplace_back(id->getMaybeExpandedExtent());
  }
  return full(shape, fill_value, tv->dtype());

diff --git a/torch/csrc/jit/codegen/cuda/test/test_gpu_rng.cu b/torch/csrc/jit/codegen/cuda/test/test_gpu_rng.cu
@@ -365,5 +365,35 @@ TEST_F(NVFuserTest, FusionUniform_CUDA) {
  }
 }
 
+TEST_F(NVFuserTest, FusionRandLikeReduction_CUDA) {
+ auto dtype = kFloat;
+ std::unique_ptr<Fusion> fusion_ptr = std::make_unique<Fusion>();
+ auto fusion = fusion_ptr.get();
+ FusionGuard fg(fusion);
+
+ TensorView* tv0 = makeSymbolicTensor(2, aten_to_data_type(dtype));
+ fusion->addInput(tv0);
+ auto tv1 = sum(tv0, {0});
+ auto tv2 = rand_like(tv1);
+ auto tv3 = add(tv1, tv2);
+ fusion->addOutput(tv3);
+
+ FusionExecutorCache fec(std::move(fusion_ptr));
+
+ auto options = at::TensorOptions().dtype(dtype).device(at::kCUDA, 0);
+ at::Tensor t0 = at::zeros({2, 3}, options);
+
+ at::manual_seed(0);
+ auto cg_outputs = fec.runFusionWithInputs({t0});
+ auto out = cg_outputs[0];
+
+ at::manual_seed(0);
+ auto t1 = t0.sum(0);
+ auto t2 = generate_uniform(3, dtype).expand_as(t1);
+ auto t3 = t1.add(t2);
+
+ testValidate(fec.fusion(), {out}, {t0}, {t3}, __LINE__, __FILE__);
+}
+
 } // namespace jit
 } // namespace torch