Skip to content

Commit

Permalink
Reduction rand like patch (#2031)
Browse files Browse the repository at this point in the history
*_like operations are not filtering out reduction domain on inputs. This resulted with output differs in shape on input. Run into this issue on hugging face benchmark with python stack.

1. updated the operation to filter input domain with noReduction;
2. added a test case to verify the breakage and fix;
  • Loading branch information
jjsjann123 committed Oct 5, 2022
1 parent bc77266 commit 40e2703
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 8 deletions.
18 changes: 10 additions & 8 deletions torch/csrc/jit/codegen/cuda/arith.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -471,17 +471,18 @@ TensorView* uniform(
return out;
}

TensorView* rand_like(TensorView* v) {
TensorView* rand_like(TensorView* tv) {
TORCH_CHECK(
isFloatingPointType(v->dtype()),
isFloatingPointType(tv->dtype()),
"input must have floating point type, but got ",
v->dtype());
tv->dtype());
std::vector<Val*> shape;
shape.reserve(v->getMaybeRFactorDomain().size());
for (auto id : v->getMaybeRFactorDomain()) {
auto dom = TensorDomain::noReductions(tv->getMaybeRFactorDomain());
shape.reserve(dom.size());
for (auto id : dom) {
shape.emplace_back(id->getMaybeExpandedExtent());
}
return rand(shape, v->dtype());
return rand(shape, tv->dtype());
}

Val* rand_like(Val* v) {
Expand All @@ -505,8 +506,9 @@ TensorView* full(

TensorView* full_like(TensorView* tv, Val* fill_value) {
std::vector<Val*> shape;
shape.reserve(tv->getMaybeRFactorDomain().size());
for (auto id : tv->getMaybeRFactorDomain()) {
auto dom = TensorDomain::noReductions(tv->getMaybeRFactorDomain());
shape.reserve(dom.size());
for (auto id : dom) {
shape.emplace_back(id->getMaybeExpandedExtent());
}
return full(shape, fill_value, tv->dtype());
Expand Down
30 changes: 30 additions & 0 deletions torch/csrc/jit/codegen/cuda/test/test_gpu_rng.cu
Original file line number Diff line number Diff line change
Expand Up @@ -365,5 +365,35 @@ TEST_F(NVFuserTest, FusionUniform_CUDA) {
}
}
TEST_F(NVFuserTest, FusionRandLikeReduction_CUDA) {
auto dtype = kFloat;
std::unique_ptr<Fusion> fusion_ptr = std::make_unique<Fusion>();
auto fusion = fusion_ptr.get();
FusionGuard fg(fusion);
TensorView* tv0 = makeSymbolicTensor(2, aten_to_data_type(dtype));
fusion->addInput(tv0);
auto tv1 = sum(tv0, {0});
auto tv2 = rand_like(tv1);
auto tv3 = add(tv1, tv2);
fusion->addOutput(tv3);
FusionExecutorCache fec(std::move(fusion_ptr));
auto options = at::TensorOptions().dtype(dtype).device(at::kCUDA, 0);
at::Tensor t0 = at::zeros({2, 3}, options);
at::manual_seed(0);
auto cg_outputs = fec.runFusionWithInputs({t0});
auto out = cg_outputs[0];
at::manual_seed(0);
auto t1 = t0.sum(0);
auto t2 = generate_uniform(3, dtype).expand_as(t1);
auto t3 = t1.add(t2);
testValidate(fec.fusion(), {out}, {t0}, {t3}, __LINE__, __FILE__);
}
} // namespace jit
} // namespace torch

0 comments on commit 40e2703

Please sign in to comment.