diff --git a/python/tvm/dlight/gpu/general_reduction.py b/python/tvm/dlight/gpu/general_reduction.py index a068e732b986..9b5c4c16153a 100644 --- a/python/tvm/dlight/gpu/general_reduction.py +++ b/python/tvm/dlight/gpu/general_reduction.py @@ -90,7 +90,10 @@ def f_layout_mapping(*iters): sch.transform_block_layout(block_infos[-1].block_rv, index_map) try: - # TODO: fix num_leading_s = 0 case + # Handle the case where num_leading_s = 0 + if num_leading_s == 0: + num_leading_s = 1 # Use at least one spatial dimension for blockIdx.x + assert num_trailing_r > 0 for block in block_infos[1:-1]: assert block.dom_kind() == dom_kind @@ -100,7 +103,13 @@ def f_layout_mapping(*iters): return None loops = sch.get_loops(block_infos[-1].block_rv) - bx = sch.fuse(*loops[:num_leading_s]) + + # Ensure we have at least one spatial dimension for blockIdx.x + if num_leading_s > 0: + bx = sch.fuse(*loops[:num_leading_s]) + else: + bx = loops[0] # Use the first loop as blockIdx.x + r_loop, tx = sch.split(loops[-1], [None, len_tx]) sch.reorder(tx, r_loop) sch.bind(bx, "blockIdx.x") diff --git a/python/tvm/relax/frontend/torch/base_fx_graph_translator.py b/python/tvm/relax/frontend/torch/base_fx_graph_translator.py index 003ceebec6ff..2f9f53146d2d 100644 --- a/python/tvm/relax/frontend/torch/base_fx_graph_translator.py +++ b/python/tvm/relax/frontend/torch/base_fx_graph_translator.py @@ -227,9 +227,54 @@ def _round(self, node: fx.Node) -> relax.Expr: return self.block_builder.emit(relax.op.round(arg)) def _softmax(self, node: fx.Node) -> relax.Var: + """ + For large tensors with non-last dimension softmax, we transpose to move + the softmax dimension to the end, apply softmax, and then transpose + back to the original shape. + """ x = self.env[node.args[0]] dim = node.args[1] if len(node.args) > 1 else node.kwargs.get("dim", -1) - return self.block_builder.emit(relax.op.nn.softmax(x, dim)) + input_shape = x.struct_info.shape + input_ndim = len(input_shape) + + if dim < 0: + # Ensure dim is express as a positive index + dim = input_ndim + dim + + # Check if this is a non-last dimension with large size (> 1024) + # The smallest power of 2 that doesn't work on a NVIDIA GeForce RTX + # 4090 if 8192. Using 1024 here to be safe. + is_large_non_last_dim = False + large_size_threshold = 1024 + + if dim != input_ndim - 1: # Not the last dimension + # Check if any dimension is large + for i, size in enumerate(input_shape): + if hasattr(size, "value") and size.value > large_size_threshold: + is_large_non_last_dim = True + break + + if is_large_non_last_dim: + # Special handling for large tensors with non-last dimension softmax + + # Get dimension ordering for transpose + dims = list(range(input_ndim)) + dims.append(dims.pop(dim)) + + # Transpose + x_transposed = self.block_builder.emit(relax.op.permute_dims(x, dims)) + + # Apply softmax on last dimension + softmax_result = self.block_builder.emit(relax.op.nn.softmax(x_transposed, -1)) + + # Transpose back to original shape + inv_dims = [-1] * len(dims) + for i, d in enumerate(dims): + inv_dims[d] = i + return self.block_builder.emit(relax.op.permute_dims(softmax_result, inv_dims)) + else: + # Regular softmax + return self.block_builder.emit(relax.op.nn.softmax(x, dim)) def _selu(self, node: fx.Node) -> relax.Var: x = self.env[node.args[0]] diff --git a/tests/python/relax/test_from_exported_to_cuda.py b/tests/python/relax/test_from_exported_to_cuda.py new file mode 100644 index 000000000000..a31345e04bd9 --- /dev/null +++ b/tests/python/relax/test_from_exported_to_cuda.py @@ -0,0 +1,71 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import tvm +from tvm import relax +import tvm.testing +import numpy as np +import torch +from torch.export import export +from tvm.relax.frontend.torch import from_exported_program +from torch.nn import Softmax + + +def assert_torch_output_vs_tvm_from_exported_to_cuda(raw_data, torch_module, target, dev): + """ + This util ensures that a torch module can successfully be exported to TVM + using torch.export and that the resuling IR program gives the same result + as PyTorch when ran on CUDA. + """ + raw_data_for_tvm = raw_data.copy() # In case the data is modified + torch_data = torch.from_numpy(raw_data) + example_args = (torch_data,) + + with torch.no_grad(): + exported_program = export(torch_module, example_args) + mod_from_torch = from_exported_program(exported_program, keep_params_as_input=True) + + tvm_mod, tvm_params = relax.frontend.detach_params(mod_from_torch) + + relax_pipeline = relax.get_default_pipeline(tvm.target.Target.from_device(tvm.cuda())) + ex = relax.build(tvm_mod, target=target, relax_pipeline=relax_pipeline) + vm = relax.VirtualMachine(ex, dev) + + gpu_data = tvm.nd.array(raw_data_for_tvm, dev) + gpu_params = [tvm.nd.array(p, dev) for p in tvm_params["main"]] + gpu_out = vm["main"](gpu_data, *gpu_params) + + pytorch_out = torch_module(torch_data).detach().numpy() + actual = gpu_out[0].numpy() + desired = pytorch_out + np.testing.assert_allclose(actual=actual, desired=desired, rtol=1e-5, atol=1e-5) + + +@tvm.testing.parametrize_targets("cuda") +def test_softmax_non_last_dim_large_tensor(target, dev): + """ + Tests ingesting a PyTorch exported model that uses softmax on a large + tensor, with the softmax dimension not being that last dimension, and + running it with CUDA. + """ + torch_module = Softmax(dim=2).eval() + raw_data = np.random.rand(10, 4, 32, 16384).astype("float32") + assert_torch_output_vs_tvm_from_exported_to_cuda(raw_data, torch_module, target, dev) + + +if __name__ == "__main__": + tvm.testing.main() diff --git a/tests/python/relax/test_frontend_from_exported_program.py b/tests/python/relax/test_frontend_from_exported_program.py index 8ca335c2fe7a..399739146359 100644 --- a/tests/python/relax/test_frontend_from_exported_program.py +++ b/tests/python/relax/test_frontend_from_exported_program.py @@ -82,7 +82,7 @@ def forward(self, input): class expected: @R.function def main( - input_1: R.Tensor((1, 3, 10, 10), dtype="float32") + input_1: R.Tensor((1, 3, 10, 10), dtype="float32"), ) -> R.Tuple(R.Tensor((1, 3, 10, 10), dtype="float32")): with R.dataflow(): lv: R.Tensor((1, 3, 10, 10), dtype="float32") = relax_op(input_1) @@ -112,7 +112,7 @@ def forward(self, input): class expected: @R.function def main( - input_1: R.Tensor((1, 3, 10, 10), dtype="float32") + input_1: R.Tensor((1, 3, 10, 10), dtype="float32"), ) -> R.Tuple(R.Tensor((1, 3, 10, 10), dtype="bool")): with R.dataflow(): lv: R.Tensor((1, 3, 10, 10), dtype="bool") = relax_op(input_1) @@ -135,7 +135,7 @@ def forward(self, input): class expected_clamp: @R.function def main( - input_1: R.Tensor((1, 3, 10, 10), dtype="float32") + input_1: R.Tensor((1, 3, 10, 10), dtype="float32"), ) -> R.Tuple(R.Tensor((1, 3, 10, 10), dtype="float32")): # block 0 with R.dataflow(): @@ -163,7 +163,7 @@ def forward(self, input): class expected_dropout: @R.function def main( - input_1: R.Tensor((1, 3, 10, 10), dtype="float32") + input_1: R.Tensor((1, 3, 10, 10), dtype="float32"), ) -> R.Tuple(R.Tensor((1, 3, 10, 10), dtype="float32")): # block 0 with R.dataflow(): @@ -191,7 +191,7 @@ def forward(self, input): class expected_gelu: @R.function def main( - input_1: R.Tensor((1, 3, 10, 10), dtype="float32") + input_1: R.Tensor((1, 3, 10, 10), dtype="float32"), ) -> R.Tuple(R.Tensor((1, 3, 10, 10), dtype="float32")): # block 0 with R.dataflow(): @@ -220,7 +220,7 @@ def forward(self, input): class expected_hardsigmoid: @R.function def main( - inp_0: R.Tensor((1, 3, 10, 10), dtype="float32") + inp_0: R.Tensor((1, 3, 10, 10), dtype="float32"), ) -> R.Tuple(R.Tensor((1, 3, 10, 10), dtype="float32")): with R.dataflow(): lv: R.Tensor((1, 3, 10, 10), dtype="float32") = R.add(inp_0, R.const(3, "float32")) @@ -252,7 +252,7 @@ def forward(self, input): class expected1: @R.function def main( - inp_0: R.Tensor((1, 3, 10, 10), dtype="float32") + inp_0: R.Tensor((1, 3, 10, 10), dtype="float32"), ) -> R.Tuple(R.Tensor((1, 3, 10, 10), dtype="float32")): with R.dataflow(): lv: R.Tensor((1, 3, 10, 10), dtype="float32") = R.add(inp_0, R.const(3, "float32")) @@ -294,7 +294,7 @@ def forward(self, input): class expected_relu: @R.function def main( - input_1: R.Tensor((1, 3, 10, 10), dtype="float32") + input_1: R.Tensor((1, 3, 10, 10), dtype="float32"), ) -> R.Tuple(R.Tensor((1, 3, 10, 10), dtype="float32")): # block 0 with R.dataflow(): @@ -323,7 +323,7 @@ def forward(self, input): class expected_sigmoid: @R.function def main( - input_1: R.Tensor((1, 3, 10, 10), dtype="float32") + input_1: R.Tensor((1, 3, 10, 10), dtype="float32"), ) -> R.Tuple(R.Tensor((1, 3, 10, 10), dtype="float32")): # block 0 with R.dataflow(): @@ -352,7 +352,7 @@ def forward(self, input): class expected_silu: @R.function def main( - input_1: R.Tensor((1, 3, 10, 10), dtype="float32") + input_1: R.Tensor((1, 3, 10, 10), dtype="float32"), ) -> R.Tuple(R.Tensor((1, 3, 10, 10), dtype="float32")): # block 0 with R.dataflow(): @@ -388,7 +388,7 @@ def forward(self, input): class expected1: @R.function def main( - inp_0: R.Tensor((1, 3, 10, 10), dtype="float32") + inp_0: R.Tensor((1, 3, 10, 10), dtype="float32"), ) -> R.Tuple(R.Tensor((1, 3, 10, 10), dtype="float32")): with R.dataflow(): lv: R.Tensor((1, 3, 10, 10), dtype="float32") = R.clip( @@ -425,7 +425,7 @@ def forward(self, input): class expected: @R.function def main( - input_1: R.Tensor((1, 3, 10, 10), dtype="float32") + input_1: R.Tensor((1, 3, 10, 10), dtype="float32"), ) -> R.Tuple(R.Tensor((1, 3, 10, 10), dtype="float32")): # block 0 with R.dataflow(): @@ -456,7 +456,7 @@ def forward(self, input): class expected1: @R.function def main( - input_1: R.Tensor((1, 3, 10, 10), dtype="float32") + input_1: R.Tensor((1, 3, 10, 10), dtype="float32"), ) -> R.Tuple(R.Tensor((1, 3, 10, 10), dtype="float32")): # block 0 with R.dataflow(): @@ -487,7 +487,7 @@ def forward(self, input): class expected1: @R.function def main( - input_1: R.Tensor((1, 3, 10, 10), dtype="float32") + input_1: R.Tensor((1, 3, 10, 10), dtype="float32"), ) -> R.Tuple(R.Tensor((1, 3, 10, 10), dtype="float32")): # block 0 with R.dataflow(): @@ -512,7 +512,7 @@ def forward(self, input): class expected_tril: @R.function def main( - input_1: R.Tensor((10, 10), dtype="float32") + input_1: R.Tensor((10, 10), dtype="float32"), ) -> R.Tuple(R.Tensor((10, 10), dtype="float32")): # block 0 with R.dataflow(): @@ -531,7 +531,7 @@ def forward(self, input): class expected_triu: @R.function def main( - input_1: R.Tensor((10, 10), dtype="float32") + input_1: R.Tensor((10, 10), dtype="float32"), ) -> R.Tuple(R.Tensor((10, 10), dtype="float32")): # block 0 with R.dataflow(): @@ -795,7 +795,7 @@ def forward(self, input): class expected1: @R.function def main( - input_1: R.Tensor((1, 3, 10, 10), dtype="float32") + input_1: R.Tensor((1, 3, 10, 10), dtype="float32"), ) -> R.Tuple(R.Tensor((1, 3, 10, 10), dtype="float32")): # block 0 with R.dataflow(): @@ -883,7 +883,7 @@ def forward(self, input): class expected1: @R.function def main( - input_1: R.Tensor((1, 3, 10, 10), dtype="float32") + input_1: R.Tensor((1, 3, 10, 10), dtype="float32"), ) -> R.Tuple(R.Tensor((1, 3, 10, 10), dtype="float32")): # block 0 with R.dataflow(): @@ -1580,7 +1580,7 @@ def forward(self, x, y): class Expected1: @R.function def main( - inp_0: R.Tensor((4, 4), dtype="float32") + inp_0: R.Tensor((4, 4), dtype="float32"), ) -> R.Tuple(R.Tensor((), dtype="float32")): with R.dataflow(): lv: R.Tensor((), dtype="float32") = R.einsum((inp_0,), subscripts="ii") @@ -1827,7 +1827,7 @@ def forward(self, input): class expected1: @R.function def main( - input_1: R.Tensor((1, 3, 10, 10), dtype="float32") + input_1: R.Tensor((1, 3, 10, 10), dtype="float32"), ) -> R.Tuple(R.Tensor((1, 3, 10, 10), dtype="float32")): # block 0 with R.dataflow(): @@ -1856,7 +1856,7 @@ def forward(self, input): class expected2: @R.function def main( - input_1: R.Tensor((1, 3, 10, 10), dtype="float32") + input_1: R.Tensor((1, 3, 10, 10), dtype="float32"), ) -> R.Tuple(R.Tensor((1, 3, 4, 4), dtype="float32")): # block 0 with R.dataflow(): @@ -1885,7 +1885,7 @@ def forward(self, input): class expected3: @R.function def main( - input_1: R.Tensor((1, 3, 10, 10), dtype="float32") + input_1: R.Tensor((1, 3, 10, 10), dtype="float32"), ) -> R.Tuple(R.Tensor((1, 3, 6, 6), dtype="float32")): # block 0 with R.dataflow(): @@ -2102,7 +2102,7 @@ def forward(self, input): class expected_bilinear: @R.function def main( - input: R.Tensor((1, 3, 112, 112), dtype="float32") + input: R.Tensor((1, 3, 112, 112), dtype="float32"), ) -> R.Tuple(R.Tensor((1, 3, 224, 224), dtype="float32")): # block 0 with R.dataflow(): @@ -2131,7 +2131,7 @@ def forward(self, input): class expected_nearest: @R.function def main( - input: R.Tensor((1, 3, 112, 112), dtype="float32") + input: R.Tensor((1, 3, 112, 112), dtype="float32"), ) -> R.Tuple(R.Tensor((1, 3, 224, 224), dtype="float32")): # block 0 with R.dataflow(): @@ -2170,7 +2170,7 @@ def forward(self, input: torch.Tensor): class Expected1: @R.function def main( - inp_0: R.Tensor((256, 256), dtype="float32") + inp_0: R.Tensor((256, 256), dtype="float32"), ) -> R.Tuple(R.Tensor((256,), dtype="float32")): with R.dataflow(): lv: R.Tensor((256,), dtype="float32") = R.mean(inp_0, axis=[-1], keepdims=False) @@ -2182,7 +2182,7 @@ def main( class Expected2: @R.function def main( - inp_0: R.Tensor((256, 256), dtype="float32") + inp_0: R.Tensor((256, 256), dtype="float32"), ) -> R.Tuple(R.Tensor((256, 1), dtype="float32")): with R.dataflow(): lv: R.Tensor((256, 1), dtype="float32") = R.mean(inp_0, axis=[-1], keepdims=True) @@ -2204,7 +2204,7 @@ def forward(self, x): class expected1: @R.function def main( - inp_0: R.Tensor((1, 2, 3, 4), dtype="float32") + inp_0: R.Tensor((1, 2, 3, 4), dtype="float32"), ) -> R.Tuple(R.Tensor((1, 4), dtype="float32")): # block 0 with R.dataflow(): @@ -2238,7 +2238,7 @@ def forward(self, input): class expected_argmax1: @R.function def main( - inp_0: R.Tensor((256, 256), dtype="float32") + inp_0: R.Tensor((256, 256), dtype="float32"), ) -> R.Tuple(R.Tensor((256,), dtype="int64")): with R.dataflow(): lv: R.Tensor((256,), dtype="int64") = R.argmax(inp_0, axis=-1, keepdims=False) @@ -2250,7 +2250,7 @@ def main( class expected_argmax2: @R.function def main( - inp_0: R.Tensor((256, 256), dtype="float32") + inp_0: R.Tensor((256, 256), dtype="float32"), ) -> R.Tuple(R.Tensor((256, 1), dtype="int64")): with R.dataflow(): lv: R.Tensor((256, 1), dtype="int64") = R.argmax(inp_0, axis=-1, keepdims=True) @@ -2279,7 +2279,7 @@ def forward(self, input): class expected_argmin1: @R.function def main( - inp_0: R.Tensor((256, 256), dtype="float32") + inp_0: R.Tensor((256, 256), dtype="float32"), ) -> R.Tuple(R.Tensor((), dtype="int64")): with R.dataflow(): lv: R.Tensor((), dtype="int64") = R.argmin(inp_0, axis=None, keepdims=False) @@ -2291,7 +2291,7 @@ def main( class expected_argmin2: @R.function def main( - inp_0: R.Tensor((256, 256), dtype="float32") + inp_0: R.Tensor((256, 256), dtype="float32"), ) -> R.Tuple(R.Tensor((1, 1), dtype="int64")): with R.dataflow(): lv: R.Tensor((1, 1), dtype="int64") = R.argmin(inp_0, axis=None, keepdims=True) @@ -2362,7 +2362,7 @@ def forward(self, input): class expected1: @R.function def main( - input_1: R.Tensor((1, 2, 3, 4), dtype="float32") + input_1: R.Tensor((1, 2, 3, 4), dtype="float32"), ) -> R.Tuple(R.Tensor((1, 2, 3, 4), dtype="int32")): # block 0 with R.dataflow(): @@ -2388,7 +2388,7 @@ def forward(self, x): class expected1: @R.function def main( - x: R.Tensor((1, 2, 3, 4), dtype="float32") + x: R.Tensor((1, 2, 3, 4), dtype="float32"), ) -> R.Tuple(R.Tensor((4, 2, 3, 4), dtype="float32")): # block 0 with R.dataflow(): @@ -2419,7 +2419,7 @@ def forward(self, input): class expected1: @R.function def main( - input_1: R.Tensor((1, 3, 10, 10), dtype="float32") + input_1: R.Tensor((1, 3, 10, 10), dtype="float32"), ) -> R.Tuple(R.Tensor((1, 3, 100), dtype="float32")): # block 0 with R.dataflow(): @@ -2445,7 +2445,7 @@ def forward(self, x): class expected1: @R.function def main( - x: R.Tensor((1, 2, 3, 4), dtype="float32") + x: R.Tensor((1, 2, 3, 4), dtype="float32"), ) -> R.Tuple(R.Tensor((1, 4, 3, 2), dtype="float32")): # block 0 with R.dataflow(): @@ -2483,7 +2483,7 @@ def main(x: R.Tensor((3,), dtype="float32")) -> R.Tuple(R.Tensor((6,), dtype="fl class expected2: @R.function def main( - x: R.Tensor((1, 3), dtype="float32") + x: R.Tensor((1, 3), dtype="float32"), ) -> R.Tuple(R.Tensor((4, 6), dtype="float32")): # block 0 with R.dataflow(): @@ -2511,7 +2511,7 @@ def forward(self, x): class expected1: @R.function def main( - x: R.Tensor((1, 2, 3, 4), dtype="float32") + x: R.Tensor((1, 2, 3, 4), dtype="float32"), ) -> R.Tuple(R.Tensor((2, 12), dtype="float32")): # block 0 with R.dataflow(): @@ -2533,7 +2533,7 @@ def forward(self, x): class expected1: @R.function def main( - x: R.Tensor((1, 3, 10, 10), dtype="float32") + x: R.Tensor((1, 3, 10, 10), dtype="float32"), ) -> R.Tuple(R.Tensor((1, 10, 3), dtype="float32")): # block 0 with R.dataflow(): @@ -2574,7 +2574,7 @@ def forward(self, x): class expected2: @R.function def main( - x: R.Tensor((8, 16), dtype="float32") + x: R.Tensor((8, 16), dtype="float32"), ) -> R.Tuple(R.Tensor((8, 1, 1, 16, 1), dtype="float32")): with R.dataflow(): lv: R.Tensor((8, 16), dtype="float32") = R.strided_slice( @@ -2749,7 +2749,7 @@ def forward(self, input): class Expected1: @R.function def main( - inp_0: R.Tensor((3, 1, 4, 1), dtype="float32") + inp_0: R.Tensor((3, 1, 4, 1), dtype="float32"), ) -> R.Tuple(R.Tensor((3, 4, 1), dtype="float32")): with R.dataflow(): lv: R.Tensor((3, 4, 1), dtype="float32") = R.squeeze(inp_0, axis=[1]) @@ -2765,7 +2765,7 @@ def forward(self, input): class Expected2: @R.function def main( - inp_0: R.Tensor((3, 1, 4, 1), dtype="float32") + inp_0: R.Tensor((3, 1, 4, 1), dtype="float32"), ) -> R.Tuple(R.Tensor((3, 4), dtype="float32")): with R.dataflow(): lv: R.Tensor((3, 4), dtype="float32") = R.squeeze(inp_0, axis=None) @@ -2796,7 +2796,7 @@ def forward(self, x): class expected1: @R.function def main( - x: R.Tensor((1, 3), dtype="float32") + x: R.Tensor((1, 3), dtype="float32"), ) -> R.Tuple(R.Tensor((1, 6), dtype="float32")): # block 0 with R.dataflow(): @@ -2809,7 +2809,7 @@ def main( class expected2: @R.function def main( - x: R.Tensor((1, 3), dtype="float32") + x: R.Tensor((1, 3), dtype="float32"), ) -> R.Tuple(R.Tensor((4, 6), dtype="float32")): # block 0 with R.dataflow(): @@ -2833,7 +2833,7 @@ def forward(self, x): class expected1: @R.function def main( - x: R.Tensor((1, 2, 3, 4), dtype="float32") + x: R.Tensor((1, 2, 3, 4), dtype="float32"), ) -> R.Tuple(R.Tensor((1, 4, 3, 2), dtype="float32")): # block 0 with R.dataflow(): @@ -2855,7 +2855,7 @@ def forward(self, input): class expected1: @R.function def main( - input_1: R.Tensor((1, 3, 10, 10), dtype="float32") + input_1: R.Tensor((1, 3, 10, 10), dtype="float32"), ) -> R.Tuple(R.Tensor((1, 1, 3, 10, 10), dtype="float32")): # block 0 with R.dataflow(): @@ -2872,7 +2872,7 @@ def forward(self, input): class expected2: @R.function def main( - input_1: R.Tensor((1, 3, 10, 10), dtype="float32") + input_1: R.Tensor((1, 3, 10, 10), dtype="float32"), ) -> R.Tuple(R.Tensor((1, 3, 10, 10, 1), dtype="float32")): # block 0 with R.dataflow(): @@ -2896,7 +2896,7 @@ def forward(self, x): class expected1: @R.function def main( - x: R.Tensor((1, 2, 3, 4), dtype="float32") + x: R.Tensor((1, 2, 3, 4), dtype="float32"), ) -> R.Tuple(R.Tensor((2, 12), dtype="float32")): # block 0 with R.dataflow(): @@ -2918,7 +2918,7 @@ def forward(self, input): class Expected: @R.function def main( - input: R.Tensor((10, 10), dtype="float32") + input: R.Tensor((10, 10), dtype="float32"), ) -> R.Tuple(R.Tensor((20,), dtype="int32")): with R.dataflow(): lv: R.Tensor((20,), dtype="int32") = R.arange(0, 20, 1, dtype="int32") @@ -2939,7 +2939,7 @@ def forward(self, input): class Expected: @R.function def main( - input: R.Tensor((10, 10), dtype="float32") + input: R.Tensor((10, 10), dtype="float32"), ) -> R.Tuple(R.Tensor((10, 10), dtype="float32")): with R.dataflow(): gv: R.Tuple(R.Tensor((10, 10), dtype="float32")) = (input,) @@ -2959,7 +2959,7 @@ def forward(self, input): class Expected: @R.function def main( - inp_0: R.Tensor((10, 10), dtype="float32") + inp_0: R.Tensor((10, 10), dtype="float32"), ) -> R.Tuple(R.Tensor((10, 10), dtype="float32")): with R.dataflow(): lv: R.Tensor((10, 10), dtype="float32") = R.zeros( @@ -2982,7 +2982,7 @@ def forward(self, input: torch.Tensor): class Expected: @R.function def main( - inp_0: R.Tensor((10, 10), dtype="float32") + inp_0: R.Tensor((10, 10), dtype="float32"), ) -> R.Tuple(R.Tensor((10, 10), dtype="float32")): with R.dataflow(): lv: R.Tensor((10, 10), dtype="float32") = R.full( @@ -3005,7 +3005,7 @@ def forward(self, x): class expected1: @R.function def main( - x: R.Tensor((1, 2, 3), dtype="float32") + x: R.Tensor((1, 2, 3), dtype="float32"), ) -> R.Tuple(R.Tensor((1, 2, 3), dtype="float32")): # block 0 with R.dataflow(): @@ -3034,7 +3034,7 @@ def forward(self, x): class expected_float: @R.function def main( - x: R.Tensor((1, 2, 3, 4), dtype="float32") + x: R.Tensor((1, 2, 3, 4), dtype="float32"), ) -> R.Tuple(R.Tensor((1, 2, 3, 4), dtype="float32")): # block 0 with R.dataflow(): @@ -3052,7 +3052,7 @@ def forward(self, x): class expected_half: @R.function def main( - x: R.Tensor((1, 2, 3, 4), dtype="float32") + x: R.Tensor((1, 2, 3, 4), dtype="float32"), ) -> R.Tuple(R.Tensor((1, 2, 3, 4), dtype="float16")): # block 0 with R.dataflow(): @@ -3070,7 +3070,7 @@ def forward(self, x): class expected_type: @R.function def main( - x: R.Tensor((1, 2, 3, 4), dtype="float32") + x: R.Tensor((1, 2, 3, 4), dtype="float32"), ) -> R.Tuple(R.Tensor((1, 2, 3, 4), dtype="float32")): # block 0 with R.dataflow(): @@ -3086,7 +3086,7 @@ def forward(self, input): class expected_to1: @R.function def main( - inp_0: R.Tensor((1, 2, 3, 4), dtype="float32") + inp_0: R.Tensor((1, 2, 3, 4), dtype="float32"), ) -> R.Tuple(R.Tensor((1, 2, 3, 4), dtype="float16")): with R.dataflow(): lv: R.Tensor((1, 2, 3, 4), dtype="float16") = R.astype(inp_0, dtype="float16") @@ -3102,7 +3102,7 @@ def forward(self, input): class expected_to2: @R.function def main( - inp_0: R.Tensor((1, 2, 3, 4), dtype="float32") + inp_0: R.Tensor((1, 2, 3, 4), dtype="float32"), ) -> R.Tuple(R.Tensor((1, 2, 3, 4), dtype="float32")): with R.dataflow(): lv: R.Tensor((1, 2, 3, 4), dtype="float32") = R.astype(inp_0, dtype="float32") @@ -3187,7 +3187,7 @@ def forward(self, x): class Expected: @R.function def main( - inp_0: R.Tensor((256, 256), dtype="float32") + inp_0: R.Tensor((256, 256), dtype="float32"), ) -> R.Tensor((256, 256), dtype="float32"): with R.dataflow(): gv: R.Tensor((256, 256), dtype="float32") = inp_0