From 161e366f1a9114271ba00acce9f0286d96497f80 Mon Sep 17 00:00:00 2001 From: Jeremy Kun Date: Mon, 8 Apr 2024 12:26:03 -0700 Subject: [PATCH] upgrade gx_kernel and roberts_cross to 64x64 --- tests/heir_simd_vectorizer/BUILD | 9 ++ .../heir_simd_vectorizer/gx_kernel_64x64.mlir | 50 +++++++++++ .../roberts_cross_64x64.mlir | 82 +++++++++++++++++++ 3 files changed, 141 insertions(+) create mode 100644 tests/heir_simd_vectorizer/gx_kernel_64x64.mlir create mode 100644 tests/heir_simd_vectorizer/roberts_cross_64x64.mlir diff --git a/tests/heir_simd_vectorizer/BUILD b/tests/heir_simd_vectorizer/BUILD index c9cee5978..105f06abc 100644 --- a/tests/heir_simd_vectorizer/BUILD +++ b/tests/heir_simd_vectorizer/BUILD @@ -11,6 +11,15 @@ glob_lit_tests( driver = "@heir//tests:run_lit.sh", size_override = { "box_blur_64x64.mlir": "large", + "roberts_cross_64x64.mlir": "enormous", + "gx_kernel_64x64.mlir": "large", + }, + tags_override = { + "gx_kernel_64x64.mlir": [ + "nofastbuild", + "notap", + "manual", + ], }, test_file_exts = ["mlir"], ) diff --git a/tests/heir_simd_vectorizer/gx_kernel_64x64.mlir b/tests/heir_simd_vectorizer/gx_kernel_64x64.mlir new file mode 100644 index 000000000..6a453417a --- /dev/null +++ b/tests/heir_simd_vectorizer/gx_kernel_64x64.mlir @@ -0,0 +1,50 @@ +// Ported from https://github.com/MarbleHE/HECO/blob/ba027a51f4e0a376a19ca05b1dcc7ab76da78a3e/evaluation/comparison/heco_input/gxkernel_64x64.mlir + +// RUN: heir-opt --secretize=entry-function=gx_kernel --wrap-generic --canonicalize --cse \ +// RUN: --heir-simd-vectorizer %s | FileCheck %s + +// CHECK-LABEL: @gx_kernel +// CHECK: secret.generic +// CHECK-COUNT-6: tensor_ext.rotate +// CHECK-NOT: tensor_ext.rotate +func.func @gx_kernel(%arg0: tensor<4096xi16>) -> tensor<4096xi16> { + %c4096 = arith.constant 4096 : index + %c64 = arith.constant 64 : index + %c1_index = arith.constant 1 : index + %c0_si16 = arith.constant 0 : i16 + %c0 = arith.constant 0 : i16 + %c1 = arith.constant 1 : i16 + %c2 = arith.constant 2 : i16 + %cm1= arith.constant -1 : i16 + %cm2 = arith.constant -2 : i16 + %weight_matrix = tensor.from_elements %c1, %cm1, %c2, %cm2, %c1, %cm1, %c0, %c0, %c0 : tensor<3x3xi16> + %0 = affine.for %x = 0 to 64 iter_args(%arg0_x = %arg0) -> (tensor<4096xi16>) { + %1 = affine.for %y = 0 to 64 iter_args(%arg0_y = %arg0_x) -> (tensor<4096xi16>) { + %2 = affine.for %j = -1 to 2 iter_args(%value_j = %c0_si16) -> (i16) { + %6 = affine.for %i = -1 to 2 iter_args(%value_i = %value_j) -> (i16) { + %7 = arith.addi %x, %i : index + %8 = arith.muli %7, %c64 : index + %9 = arith.addi %y, %j : index + %10 = arith.addi %8, %9 : index + %11 = arith.remui %10, %c4096 : index + %12 = tensor.extract %arg0[%11] : tensor<4096xi16> + // Get the weight from the weight matrix! + %ip = arith.addi %i,%c1_index : index + %jp = arith.addi %j,%c1_index : index + %w = tensor.extract %weight_matrix[%ip,%jp] : tensor<3x3xi16> + %mul = arith.muli %12, %w : i16 + %13 = arith.addi %value_i, %mul : i16 + affine.yield %13 : i16 + } + affine.yield %6 : i16 + } + %3 = arith.muli %c64, %x : index + %4 = arith.addi %3, %y : index + %5 = arith.remui %4, %c4096 : index + %6 = tensor.insert %2 into %arg0_y[%5] : tensor<4096xi16> + affine.yield %6 : tensor<4096xi16> + } + affine.yield %1 : tensor<4096xi16> + } + return %0 : tensor<4096xi16> +} diff --git a/tests/heir_simd_vectorizer/roberts_cross_64x64.mlir b/tests/heir_simd_vectorizer/roberts_cross_64x64.mlir new file mode 100644 index 000000000..4fd1b015e --- /dev/null +++ b/tests/heir_simd_vectorizer/roberts_cross_64x64.mlir @@ -0,0 +1,82 @@ +// Ported from https://github.com/MarbleHE/HECO/blob/3e13744233ab0c09030a41ef98b4e061b6fa2eac/evaluation/benchmark/heco_input/robertscross_64x64.mlir + +// RUN: heir-opt --secretize=entry-function=roberts_cross --wrap-generic --canonicalize --cse \ +// RUN: --heir-simd-vectorizer %s | FileCheck %s + +module{ + // CHECK-LABEL: @roberts_cross + // CHECK-SAME: (%[[arg0:.*]]: !secret.secret>) -> !secret.secret> { + // CHECK-NEXT: %[[cMinusOne:.*]] = arith.constant 4095 : index + // CHECK-NEXT: %[[cMinusRow:.*]] = arith.constant 4031 : index + // CHECK-NEXT: secret.generic ins(%[[arg0]] : !secret.secret>) { + // CHECK-NEXT: ^bb0(%[[arg1:.*]]: tensor<4096xi16>): + // CHECK-NEXT: %[[v1:.*]] = tensor_ext.rotate %[[arg1]], %[[cMinusRow]] + // CHECK-NEXT: %[[v2:.*]] = arith.subi %[[v1]], %[[arg1]] + // CHECK-NEXT: %[[v3:.*]] = tensor_ext.rotate %[[arg1]], %[[cMinusOne]] + // CHECK-NEXT: %[[v4:.*]] = arith.subi %[[v1]], %[[v3]] + // CHECK-DAG: %[[v5:.*]] = arith.muli %[[v2]], %[[v2]] + // CHECK-DAG: %[[v6:.*]] = arith.muli %[[v4]], %[[v4]] + // CHECK-NEXT: %[[v7:.*]] = arith.addi %[[v5]], %[[v6]] + func.func @roberts_cross(%img: tensor<4096xi16>) -> tensor<4096xi16> { + %c4096 = arith.constant 4096 : index + %c64 = arith.constant 64 : index + %c1 = arith.constant 1 : index + %c0 = arith.constant 0 : index + %c-1 = arith.constant -1 : index + + // Each point p = img[x][y], where x is row and y is column, in the new image will equal: + // (img[x-1][y-1] - img[x][y])^2 + (img[x-1][y] - img[x][y-1])^2 + %r = affine.for %x = 0 to 64 iter_args(%imgx = %img) -> tensor<4096xi16> { + %1 = affine.for %y = 0 to 64 iter_args(%imgy = %imgx) -> tensor<4096xi16> { + + // fetch img[x-1][y-1] + %4 = arith.addi %x, %c-1 : index + %5 = arith.muli %4, %c64 : index + %6 = arith.addi %y, %c-1 : index + %7 = arith.addi %5, %6 : index + %8 = arith.remui %7, %c4096 : index + %9 = tensor.extract %img[%8] : tensor<4096xi16> + + // fetch img[x][y] + %10 = arith.muli %x, %c64 : index + %11 = arith.addi %10, %y : index + %12 = arith.remui %11, %c4096 : index + %13 = tensor.extract %img[%12] : tensor<4096xi16> + + // subtract those two + %14 = arith.subi %9, %13 : i16 + + // fetch img[x-1][y] + %15 = arith.addi %x, %c-1 : index + %16 = arith.muli %15, %c64 : index + %17 = arith.addi %y, %c-1 : index + %18 = arith.addi %16, %17 : index + %19 = arith.remui %18, %c4096 : index + %20 = tensor.extract %img[%19] : tensor<4096xi16> + + // fetch img[x][y-1] + %21 = arith.muli %x, %c64 : index + %22 = arith.addi %y, %c-1 : index + %23 = arith.addi %21, %22 : index + %24 = arith.remui %23, %c4096 : index + %25 = tensor.extract %img[%24] : tensor<4096xi16> + + // subtract those two + %26 = arith.subi %20, %25 : i16 + + // square each difference + %27 = arith.muli %14, %14 : i16 + %28 = arith.muli %26, %26 : i16 + + // add the squares + %29 = arith.addi %27, %28 : i16 + + // save to result[x][y] + %30 = tensor.insert %29 into %imgy[%12] : tensor<4096xi16> + affine.yield %30: tensor<4096xi16> + } + affine.yield %1 : tensor<4096xi16> + } + return %r : tensor<4096xi16> + } +}