Skip to content

Commit

Permalink
Add TensorViewBuilder::shape(std::vector<Val*> shape) (#1884)
Browse files Browse the repository at this point in the history
  • Loading branch information
zasdfgbnm committed Aug 5, 2022
1 parent 7cfb779 commit 1617373
Show file tree
Hide file tree
Showing 5 changed files with 110 additions and 18 deletions.
2 changes: 1 addition & 1 deletion torch/csrc/jit/codegen/cuda/executor_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -799,7 +799,7 @@ kir::ExpressionEvaluator bindKernelInputs(
extent->toString(),
" to ",
value,
"but it's already set to ",
" but it's already set to ",
*prev_value);
should_bind = false;
}
Expand Down
5 changes: 3 additions & 2 deletions torch/csrc/jit/codegen/cuda/ir_interface_nodes.h
Original file line number Diff line number Diff line change
Expand Up @@ -572,7 +572,8 @@ class TORCH_CUDA_CU_API TensorViewBuilder {
TensorViewBuilder& contiguity(std::vector<bool> contiguity);

//! Set the shape (default 0 dimensional, ie. scalar)
TensorViewBuilder& shape(std::vector<int64_t> shape);
TensorViewBuilder& shape(std::vector<Val*> shape);
TensorViewBuilder& shape(const std::vector<int64_t>& shape);

//! Creates a new TensorView with the specified options
TensorView* build() const;
Expand All @@ -581,7 +582,7 @@ class TORCH_CUDA_CU_API TensorViewBuilder {
size_t ndims_ = 0;
DataType dtype_ = DataType::Float;
std::vector<bool> contiguity_;
std::vector<int64_t> shape_;
std::vector<Val*> shape_;
};

} // namespace cuda
Expand Down
8 changes: 4 additions & 4 deletions torch/csrc/jit/codegen/cuda/ops/normalization.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -529,8 +529,8 @@ ForwardNormResult batch_norm(
auto invstd_bcast = broadcast(unbiased_invstd, broadcast_mask);

// During inference, mean/invstd output are empty tensors
mean = TensorViewBuilder().shape({0}).build();
invstd = TensorViewBuilder().shape({0}).build();
mean = TensorViewBuilder().shape(std::vector<int64_t>{0}).build();
invstd = TensorViewBuilder().shape(std::vector<int64_t>{0}).build();
y = mul(x_sub_mean, invstd_bcast);
}

Expand Down Expand Up @@ -782,8 +782,8 @@ ForwardNormResult instance_norm(
broadcast(unbiased_invstd, channels_only_broadcast_mask);

// During inference, mean/invstd output are empty tensors
mean = TensorViewBuilder().shape({0}).build();
invstd = TensorViewBuilder().shape({0}).build();
mean = TensorViewBuilder().shape(std::vector<int64_t>{0}).build();
invstd = TensorViewBuilder().shape(std::vector<int64_t>{0}).build();
y = mul(x_sub_mean, invstd_bcast);
}

Expand Down
39 changes: 28 additions & 11 deletions torch/csrc/jit/codegen/cuda/tensor_view.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1193,7 +1193,29 @@ TensorViewBuilder& TensorViewBuilder::contiguity(std::vector<bool> contiguity) {
return *this;
}

TensorViewBuilder& TensorViewBuilder::shape(std::vector<int64_t> shape) {
TensorViewBuilder& TensorViewBuilder::shape(const std::vector<int64_t>& shape) {
TORCH_CHECK(shape_.empty(), "Attempting to reset shape");
if (!shape.empty()) {
TORCH_CHECK(ndims_ == 0 || ndims_ == shape.size());
ndims_ = shape.size();
}
shape_.clear();
shape_.reserve(shape.size());
for (int64_t i : shape) {
if (i == -1) {
shape_.emplace_back(IrBuilder::create<Int>());
} else {
TORCH_CHECK(
i >= 0,
"Invalid extent value. ",
"For a tensor representing a single scalar use ndims = 0 with no sizes set.");
shape_.emplace_back(IrBuilder::create<Int>(i));
}
}
return *this;
}

TensorViewBuilder& TensorViewBuilder::shape(std::vector<Val*> shape) {
TORCH_CHECK(shape_.empty(), "Attempting to reset shape");
if (!shape.empty()) {
TORCH_CHECK(ndims_ == 0 || ndims_ == shape.size());
Expand All @@ -1207,28 +1229,23 @@ TensorView* TensorViewBuilder::build() const {
// Build the domain
std::vector<IterDomain*> domain(ndims_, nullptr);
for (const auto i : c10::irange(ndims_)) {
if (shape_.empty() || shape_[i] == -1) {
if (shape_.empty()) {
domain[i] =
IterDomainBuilder(
FusionGuard::getCurFusion()->zeroVal(), IrBuilder::create<Int>())
.build();
} else {
TORCH_CHECK(
shape_[i] >= 0,
"Invalid extent value. ",
"For a tensor representing a single scalar use ndims = 0 with no sizes set.");
if (shape_[i] == 1) {
if (shape_[i]->isOneInt()) {
// If size is known to be 1, assume it needs to be broadcasted.
domain[i] = IterDomainBuilder(
FusionGuard::getCurFusion()->zeroVal(),
FusionGuard::getCurFusion()->oneVal())
.iter_type(IterType::Broadcast)
.build();
} else {
domain[i] = IterDomainBuilder(
FusionGuard::getCurFusion()->zeroVal(),
IrBuilder::create<Int>(shape_[i]))
.build();
domain[i] =
IterDomainBuilder(FusionGuard::getCurFusion()->zeroVal(), shape_[i])
.build();
}
}
}
Expand Down
74 changes: 74 additions & 0 deletions torch/csrc/jit/codegen/cuda/test/test_gpu.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#if defined(USE_CUDA)
#include <gmock/gmock-matchers.h>
#include <gtest/gtest.h>

#include <torch/csrc/jit/codegen/cuda/arith.h>
Expand Down Expand Up @@ -25420,6 +25421,79 @@ TEST_F(NVFuserTest, FusionPrint_CUDA) {
}
}

TEST_F(NVFuserTest, FusionCheckedSymbolicShape_CUDA) {
const auto options =
at::TensorOptions().dtype(at::kFloat).device(at::kCUDA, 0);

at::Tensor a = at::randn({123, 456}, options);
at::Tensor b = at::randn({123, 456}, options);
at::Tensor c = at::randn({321, 654}, options);

using return_t =
std::pair<std::unique_ptr<FusionExecutorCache>, std::vector<at::Tensor>>;
auto matched_add = [](at::Tensor a, at::Tensor b) -> return_t {
auto fusion = std::make_unique<Fusion>();
FusionGuard fg(fusion.get());

Val* s1 = IrBuilder::create<Int>();
Val* s2 = IrBuilder::create<Int>();
auto builder = TensorViewBuilder().shape(std::vector<Val*>{s1, s2});
TensorView* tv0 = builder.build();
TensorView* tv1 = builder.build();

fusion->addInput(tv0);
fusion->addInput(tv1);

auto tv2 = add(tv0, tv1);

fusion->addOutput(tv2);

auto executor_cache =
std::make_unique<FusionExecutorCache>(std::move(fusion));
auto cg_outputs = executor_cache->runFusionWithInputs({a, b});
return {std::move(executor_cache), std::move(cg_outputs)};
};

{
auto ret1 = matched_add(a, b);
testValidate(
ret1.first->fusion(), ret1.second, {a, b}, {a + b}, __LINE__, __FILE__);
}

{
EXPECT_THAT(
[&]() { matched_add(a, c); },
::testing::ThrowsMessage<c10::Error>(
::testing::HasSubstr("Attempting to bind")));
}
}

TEST_F(NVFuserTest, FusionSizeDependentData_CUDA) {
auto fusion = std::make_unique<Fusion>();
FusionGuard fg(fusion.get());

Val* s1 = IrBuilder::create<Int>();
auto builder = TensorViewBuilder().shape(std::vector<Val*>{s1});
TensorView* tv0 = builder.build();

fusion->addInput(tv0);

auto tv1 = add(tv0, s1);

fusion->addOutput(tv1);

const auto options =
at::TensorOptions().dtype(at::kFloat).device(at::kCUDA, 0);

at::Tensor a = at::zeros({123}, options);

FusionExecutorCache executor_cache(std::move(fusion));
auto cg_outputs = executor_cache.runFusionWithInputs({a});

testValidate(
executor_cache.fusion(), cg_outputs, {a}, {a + 123}, __LINE__, __FILE__);
}

} // namespace jit
} // namespace torch
#endif // #if defined(USE_CUDA)

0 comments on commit 1617373

Please sign in to comment.