Skip to content

feat: Add support for dynamic zeros_like and ones_like #1847

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 57 additions & 0 deletions core/conversion/evaluators/aten.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,63 @@ auto aten_registrations TORCHTRT_UNUSED =
auto out_tensor = torch::ones(args.at(n->input(0)).unwrapToIntList().vec(), options);
return out_tensor;
}})
.evaluator(
{c10::Symbol::fromQualString("aten::new_zeros"),
// aten::new_zeros(Tensor self, int[] size, *, int? dtype=None, int? layout=None,
// Device? device=None, bool? pin_memory=None) -> (Tensor)
[](ConversionCtx* ctx, const torch::jit::Node* n, kwargs& args) -> c10::optional<torch::jit::IValue> {
auto tensor_info = newTensorImplementation(n, args);
return torch::zeros(tensor_info.first, tensor_info.second);
}})
.evaluator(
{c10::Symbol::fromQualString("aten::new_ones"),
// aten::new_ones(Tensor self, int[] size, *, int? dtype=None, int? layout=None,
// Device? device=None, bool? pin_memory=None) -> (Tensor)
[](ConversionCtx* ctx, const torch::jit::Node* n, kwargs& args) -> c10::optional<torch::jit::IValue> {
auto tensor_info = newTensorImplementation(n, args);
return torch::ones(tensor_info.first, tensor_info.second);
}})
.evaluator(
{c10::Symbol::fromQualString("aten::zeros_like"),
// aten::zeros_like(Tensor self, *, int? dtype=None, int? layout=None,
// Device? device=None, bool? pin_memory=None, int? memory_format=None) -> (Tensor)
[](ConversionCtx* ctx, const torch::jit::Node* n, kwargs& args) -> c10::optional<torch::jit::IValue> {
return newTensorLikeImplementation(
ctx, n, args, [](const std::vector<int64_t>& dims, const torch::TensorOptions& options) {
return torch::zeros(dims, options);
});
}})
.evaluator(
{c10::Symbol::fromQualString("aten::ones_like"),
// aten::ones_like(Tensor self, *, int? dtype=None, int? layout=None,
// Device? device=None, bool? pin_memory=None, int? memory_format=None) -> (Tensor)
[](ConversionCtx* ctx, const torch::jit::Node* n, kwargs& args) -> c10::optional<torch::jit::IValue> {
return newTensorLikeImplementation(
ctx, n, args, [](const std::vector<int64_t>& dims, const torch::TensorOptions& options) {
return torch::ones(dims, options);
});
}})
.evaluator(
{c10::Symbol::fromQualString("aten::fill_"),
// aten::fill_.Scalar(Tensor(a!) self, Scalar value) -> (Tensor(a!))
[](ConversionCtx* ctx, const torch::jit::Node* n, kwargs& args) -> c10::optional<torch::jit::IValue> {
auto tensor_var = args.at(n->input(0));
auto options = torch::TensorOptions().layout(torch::kStrided).device(torch::kCUDA);
std::vector<int64_t> dims;
if (tensor_var.isITensor()) {
auto tensor = tensor_var.ITensor();
auto dtype = util::TRTDataTypeToScalarType(tensor->getType());
options = options.dtype(dtype);
dims = util::toVec(tensor->getDimensions());
} else {
auto tensor = tensor_var.unwrapToTensor();
options = options.dtype(tensor.dtype());
dims = tensor.sizes().vec();
}
auto scalar_value = args.at(n->input(1)).unwrapToScalar();
auto out_tensor = torch::full(dims, scalar_value, options);
return out_tensor;
}})
.evaluator(
{c10::Symbol::fromQualString("aten::full"),
// aten::full(int[] size, Scalar fill_value, *, int? dtype=None, int? layout=None,
Expand Down
71 changes: 71 additions & 0 deletions core/conversion/evaluators/eval_util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -367,6 +367,77 @@ at::Tensor createTensorFromList(
return tensor;
}

std::pair<std::vector<int64_t>, torch::TensorOptions> newTensorImplementation(const torch::jit::Node* n, kwargs& args) {
auto options = torch::TensorOptions().layout(torch::kStrided).device(torch::kCUDA);

// Input 2 is the dtype
if (!args.at(n->input(2)).isNone() && !args.at(n->input(2)).IValue()->isNone()) {
options = options.dtype(c10::ScalarType(args.at(n->input(2)).unwrapToInt()));
} else {
auto tensor_var = args.at(n->input(0));
if (tensor_var.isITensor()) {
auto tensor = tensor_var.ITensor();
options = options.dtype(scalarTypeToTypeMeta(util::TRTDataTypeToScalarType(tensor->getType())));
} else {
auto tensor = tensor_var.unwrapToTensor();
options = options.dtype(tensor.dtype());
}
}
return std::make_pair(args.at(n->input(1)).unwrapToIntList().vec(), options);
}

c10::optional<torch::jit::IValue> newTensorLikeImplementation(
ConversionCtx* ctx,
const torch::jit::Node* n,
kwargs& args,
const std::function<torch::Tensor(const std::vector<int64_t>&, const torch::TensorOptions&)>& tensor_builder) {
auto options = torch::TensorOptions().layout(torch::kStrided).device(torch::kCUDA);
auto tensor_var = args.at(n->input(0));

if (tensor_var.isITensor()) {
auto tensor = tensor_var.ITensor();
auto dtype = util::TRTDataTypeToScalarType(tensor->getType());
options = options.dtype(dtype);
} else {
auto tensor = tensor_var.unwrapToTensor();
options = options.dtype(tensor.dtype());
}

// Input 1 is the dtype
if (!args.at(n->input(1)).isNone() && !args.at(n->input(1)).IValue()->isNone()) {
options = options.dtype(c10::ScalarType(args.at(n->input(1)).unwrapToInt()));
}
std::vector<int64_t> tensor_dims;
if (tensor_var.isITensor()) {
auto tensor = tensor_var.ITensor();
tensor_dims = util::toVec(tensor->getDimensions());
} else {
auto tensor = tensor_var.unwrapToTensor();
tensor_dims = tensor.sizes().vec();
}
if (ctx->settings.allow_shape_tensors && ctx->input_is_dynamic) {
auto self = args.at(n->input(0)).ITensorOrFreeze(ctx);
std::vector<int64_t> dims_vec(self->getDimensions().nbDims, 1);
auto constant = tensor_builder(dims_vec, options);
auto constant_itensor = converters::tensor_to_const(ctx, constant);
// broadcast constant to output shape
std::vector<int64_t> start_vec(self->getDimensions().nbDims, 0);
auto start_offset = util::toDims(c10::IntArrayRef(start_vec));
auto shape_layer = ctx->net->addShape(*self);
TORCHTRT_CHECK(shape_layer, "Unable to create shape layer from node: " << *n);
shape_layer->setName((util::node_info(n) + "_shape").c_str());
// slice implements expand
auto slice_layer = ctx->net->addSlice(*constant_itensor, start_offset, self->getDimensions(), start_offset);
TORCHTRT_CHECK(slice_layer, "Unable to create slice layer from node: " << *n);
slice_layer->setInput(2, *shape_layer->getOutput(0));
slice_layer->setName((util::node_info(n) + "_slice").c_str());
auto out_tensor = ctx->AssociateValueAndTensor(n->outputs()[0], slice_layer->getOutput(0));
LOG_DEBUG("Output tensor shape: " << out_tensor->getDimensions());
return {};
}
return tensor_builder(tensor_dims, options);
}

} // namespace evaluators
} // namespace conversion
} // namespace core
Expand Down
8 changes: 8 additions & 0 deletions core/conversion/evaluators/eval_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

#include "core/conversion/evaluators/evaluators.h"
#include "torch/csrc/jit/ir/ir.h"
#include "torch/torch.h"

namespace torch_tensorrt {
namespace core {
Expand All @@ -26,6 +27,13 @@ int64_t normalizeIndex(int64_t idx, int64_t list_size);

at::Tensor scalar_to_tensor(const at::Scalar& s, const at::Device device = at::kCPU);

std::pair<std::vector<int64_t>, torch::TensorOptions> newTensorImplementation(const torch::jit::Node* n, kwargs& args);
c10::optional<torch::jit::IValue> newTensorLikeImplementation(
ConversionCtx* ctx,
const torch::jit::Node* n,
kwargs& args,
const std::function<torch::Tensor(const std::vector<int64_t>&, const torch::TensorOptions&)>& tensor_builder);

} // namespace evaluators
} // namespace conversion
} // namespace core
Expand Down
190 changes: 190 additions & 0 deletions tests/core/conversion/evaluators/test_aten_evaluators.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,196 @@ TEST(Evaluators, ZerosDataTypeEvaluatesCorrectly) {
ASSERT_TRUE(at::equal(jit_results[0].toTensor().to(at::kCUDA), trt_results[0].toTensor()));
}

TEST(Evaluators, NewZerosEvaluatesCorrectly) {
const auto graph = R"IR(
graph(%x.1 : Tensor):
%2 : None = prim::Constant() # :0:0
%3 : int[] = aten::size(%x.1) # <string>:7:9
%z.1 : Tensor = aten::new_zeros(%x.1, %3, %2, %2, %2, %2)
return (%z.1))IR";

auto in = at::randint(1, 10, {1, 5, 5, 5}, {at::kCUDA});

auto g = std::make_shared<torch::jit::Graph>();
torch::jit::parseIR(graph, g.get());

auto jit_results = torch_tensorrt::tests::util::EvaluateGraphJIT(g, {in});
auto trt_results = torch_tensorrt::tests::util::EvaluateGraph(g->block(), {in});

ASSERT_TRUE(at::equal(jit_results[0].toTensor().to(at::kCUDA), trt_results[0].toTensor()));
}

TEST(Evaluators, NewZerosDataTypeEvaluatesCorrectly) {
const auto graph = R"IR(
graph(%x.1 : Tensor):
%2 : int = prim::Constant[value=5]() # :0:0 (Float16)
%3 : None = prim::Constant() # :0:0
%4 : int[] = aten::size(%x.1) # <string>:7:9
%z.1 : Tensor = aten::new_zeros(%x.1, %4, %2, %3, %3, %3)
return (%z.1))IR";

auto in = at::randint(1, 10, {1, 5, 5, 5}, {at::kCUDA});

auto g = std::make_shared<torch::jit::Graph>();
torch::jit::parseIR(graph, g.get());

auto jit_results = torch_tensorrt::tests::util::EvaluateGraphJIT(g, {in});
auto trt_results = torch_tensorrt::tests::util::EvaluateGraph(g->block(), {in});

ASSERT_TRUE(at::equal(jit_results[0].toTensor().to(at::kCUDA), trt_results[0].toTensor()));
}

TEST(Evaluators, NewOnesEvaluatesCorrectly) {
const auto graph = R"IR(
graph(%x.1 : Tensor):
%2 : None = prim::Constant() # :0:0
%3 : int[] = aten::size(%x.1) # <string>:7:9
%z.1 : Tensor = aten::new_ones(%x.1, %3, %2, %2, %2, %2)
return (%z.1))IR";

auto in = at::randint(1, 10, {1, 5, 5, 5}, {at::kCUDA});

auto g = std::make_shared<torch::jit::Graph>();
torch::jit::parseIR(graph, g.get());

auto jit_results = torch_tensorrt::tests::util::EvaluateGraphJIT(g, {in});
auto trt_results = torch_tensorrt::tests::util::EvaluateGraph(g->block(), {in});

ASSERT_TRUE(at::equal(jit_results[0].toTensor().to(at::kCUDA), trt_results[0].toTensor()));
}

TEST(Evaluators, NewOnesDataTypeEvaluatesCorrectly) {
const auto graph = R"IR(
graph(%x.1 : Tensor):
%2 : int = prim::Constant[value=5]() # :0:0 (Float16)
%3 : None = prim::Constant() # :0:0
%4 : int[] = aten::size(%x.1) # <string>:7:9
%z.1 : Tensor = aten::new_ones(%x.1, %4, %2, %3, %3, %3)
return (%z.1))IR";

auto in = at::randint(1, 10, {1, 5, 5, 5}, {at::kCUDA});

auto g = std::make_shared<torch::jit::Graph>();
torch::jit::parseIR(graph, g.get());

auto jit_results = torch_tensorrt::tests::util::EvaluateGraphJIT(g, {in});
auto trt_results = torch_tensorrt::tests::util::EvaluateGraph(g->block(), {in});

ASSERT_TRUE(at::equal(jit_results[0].toTensor().to(at::kCUDA), trt_results[0].toTensor()));
}

TEST(Evaluators, ZerosLikeEvaluatesCorrectly) {
const auto graph = R"IR(
graph(%x.1 : Tensor):
%2 : None = prim::Constant() # :0:0
%z.1 : Tensor = aten::zeros_like(%x.1, %2, %2, %2, %2, %2)
return (%z.1))IR";

auto in = at::randint(1, 10, {1, 5, 5, 5}, {at::kCUDA});

auto g = std::make_shared<torch::jit::Graph>();
torch::jit::parseIR(graph, g.get());

auto jit_results = torch_tensorrt::tests::util::EvaluateGraphJIT(g, {in});
auto trt_results = torch_tensorrt::tests::util::EvaluateGraph(g->block(), {in});

ASSERT_TRUE(at::equal(jit_results[0].toTensor().to(at::kCUDA), trt_results[0].toTensor()));
}

TEST(Evaluators, ZerosLikeDataTypeEvaluatesCorrectly) {
const auto graph = R"IR(
graph(%x.1 : Tensor):
%2 : int = prim::Constant[value=5]() # :0:0 (Float16)
%3 : None = prim::Constant()
%z.1 : Tensor = aten::zeros_like(%x.1, %2, %3, %3, %3, %3)
return (%z.1))IR";

auto in = at::randint(1, 10, {1, 5, 5, 5}, {at::kCUDA});

auto g = std::make_shared<torch::jit::Graph>();
torch::jit::parseIR(graph, g.get());

auto jit_results = torch_tensorrt::tests::util::EvaluateGraphJIT(g, {in});
auto trt_results = torch_tensorrt::tests::util::EvaluateGraph(g->block(), {in});

ASSERT_TRUE(at::equal(jit_results[0].toTensor().to(at::kCUDA), trt_results[0].toTensor()));
}

TEST(Evaluators, ZerosLikeDynamic) {
const auto graph = R"IR(
graph(%x.1 : Tensor):
%2 : int = prim::Constant[value=5]() # :0:0 (Float16)
%3 : None = prim::Constant()
%z.1 : Tensor = aten::zeros_like(%x.1, %2, %3, %3, %3, %3)
return (%z.1))IR";
auto in = at::randint(1, 10, {23, 17, 5, 29}, {at::kCUDA});

auto g = std::make_shared<torch::jit::Graph>();
torch::jit::parseIR(graph, g.get());

auto jit_results = torch_tensorrt::tests::util::EvaluateGraphJIT(g, {in});
auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {});
auto trt_results = torch_tensorrt::tests::util::RunGraphEngineDynamic(g, params, {in}, true, true);

ASSERT_TRUE(at::equal(jit_results[0].toTensor().to(at::kCUDA), trt_results[0]));
}

TEST(Evaluators, OnesLikeEvaluatesCorrectly) {
const auto graph = R"IR(
graph(%x.1 : Tensor):
%2 : None = prim::Constant() # :0:0
%z.1 : Tensor = aten::ones_like(%x.1, %2, %2, %2, %2, %2)
return (%z.1))IR";

auto in = at::randint(1, 10, {1, 5, 5, 5}, {at::kCUDA});

auto g = std::make_shared<torch::jit::Graph>();
torch::jit::parseIR(graph, g.get());

auto jit_results = torch_tensorrt::tests::util::EvaluateGraphJIT(g, {in});
auto trt_results = torch_tensorrt::tests::util::EvaluateGraph(g->block(), {in});

ASSERT_TRUE(at::equal(jit_results[0].toTensor().to(at::kCUDA), trt_results[0].toTensor()));
}

TEST(Evaluators, OnesLikeDataTypeEvaluatesCorrectly) {
const auto graph = R"IR(
graph(%x.1 : Tensor):
%2 : int = prim::Constant[value=5]() # :0:0 (Float16)
%3 : None = prim::Constant()
%z.1 : Tensor = aten::ones_like(%x.1, %2, %3, %3, %3, %3)
return (%z.1))IR";

auto in = at::randint(1, 10, {1, 5, 5, 5}, {at::kCUDA});

auto g = std::make_shared<torch::jit::Graph>();
torch::jit::parseIR(graph, g.get());

auto jit_results = torch_tensorrt::tests::util::EvaluateGraphJIT(g, {in});
auto trt_results = torch_tensorrt::tests::util::EvaluateGraph(g->block(), {in});

ASSERT_TRUE(at::equal(jit_results[0].toTensor().to(at::kCUDA), trt_results[0].toTensor()));
}

TEST(Evaluators, OnesLikeDynamic) {
const auto graph = R"IR(
graph(%x.1 : Tensor):
%2 : int = prim::Constant[value=5]() # :0:0 (Float16)
%3 : None = prim::Constant()
%z.1 : Tensor = aten::ones_like(%x.1, %2, %3, %3, %3, %3)
return (%z.1))IR";
auto in = at::randint(1, 10, {3, 6}, {at::kCUDA});

auto g = std::make_shared<torch::jit::Graph>();
torch::jit::parseIR(graph, g.get());

auto jit_results = torch_tensorrt::tests::util::EvaluateGraphJIT(g, {in});
auto params = torch_tensorrt::core::ir::get_static_params(g->inputs(), {});
auto trt_results = torch_tensorrt::tests::util::RunGraphEngineDynamic(g, params, {in}, true, true);

ASSERT_TRUE(at::equal(jit_results[0].toTensor().to(at::kCUDA), trt_results[0]));
}

TEST(Evaluators, ATenArangeIntEvaluatesCorrectly) {
const auto graph = R"IR(
graph():
Expand Down
1 change: 1 addition & 0 deletions tests/core/partitioning/test_loop_fallback.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ TEST(Partitioning, CheckLoopFallbackNoEvalCompilesCorrectly) {

std::vector<torch_tensorrt::core::ir::Input> input_ranges{torch_tensorrt::core::ir::Input({1, 10})};
torch_tensorrt::core::CompileSpec cfg(input_ranges);
cfg.partitioning_info.forced_fallback_operators.push_back("aten::ones_like");
cfg.partitioning_info.enabled = true;

auto jit_results = mod.forward(jit_inputs_ivalues).toTensor();
Expand Down