// -----// IR Dump Before AssignTargetDevicesPass (iree-hal-assign-target-devices) //----- // module { func.func @conv_2d_nhwc_hwcf(%arg0: tensor<2x14x14x32xi32>, %arg1: tensor<3x3x32x64xi32>) -> tensor<2x12x12x64xi32> { %c0_i32 = arith.constant 0 : i32 %0 = tensor.empty() : tensor<2x12x12x64xi32> %1 = linalg.fill ins(%c0_i32 : i32) outs(%0 : tensor<2x12x12x64xi32>) -> tensor<2x12x12x64xi32> %2 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%arg0, %arg1 : tensor<2x14x14x32xi32>, tensor<3x3x32x64xi32>) outs(%1 : tensor<2x12x12x64xi32>) -> tensor<2x12x12x64xi32> return %2 : tensor<2x12x12x64xi32> } } ... // -----// IR Dump Before FoldMemRefAliasOps (fold-memref-alias-ops) //----- // module { func.func @conv_2d_nhwc_hwcf_dispatch_0_conv_2d_nhwc_hwcf_2x12x12x64x3x3x32_i32() attributes {translation_info = #iree_codegen.translation_info} { %c8 = arith.constant 8 : index %c32 = arith.constant 32 : index %c1 = arith.constant 1 : index %c3 = arith.constant 3 : index %c0_i32 = arith.constant 0 : i32 %c0 = arith.constant 0 : index %0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) alignment(64) offset(%c0) flags(ReadOnly) : memref<2x14x14x32xi32> memref.assume_alignment %0, 64 : memref<2x14x14x32xi32> %1 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) alignment(64) offset(%c0) flags(ReadOnly) : memref<3x3x32x64xi32> memref.assume_alignment %1, 64 : memref<3x3x32x64xi32> %2 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) alignment(64) offset(%c0) : memref<2x12x12x64xi32> memref.assume_alignment %2, 64 : memref<2x12x12x64xi32> scf.forall (%arg0, %arg1, %arg2) = (0, 0, 0) to (12, 12, 64) step (4, 4, 4) { %subview = memref.subview %0[0, %arg0, %arg1, 0] [2, 6, 6, 32] [1, 1, 1, 1] : memref<2x14x14x32xi32> to memref<2x6x6x32xi32, strided<[6272, 448, 32, 1], offset: ?>> %subview_0 = memref.subview %1[0, 0, 0, %arg2] [3, 3, 32, 4] [1, 1, 1, 1] : memref<3x3x32x64xi32> to memref<3x3x32x4xi32, strided<[6144, 2048, 64, 1], offset: ?>> %subview_1 = memref.subview %2[0, %arg0, %arg1, %arg2] [2, 4, 4, 4] [1, 1, 1, 1] : memref<2x12x12x64xi32> to memref<2x4x4x4xi32, strided<[9216, 768, 64, 1], offset: ?>> %alloc = memref.alloc() : memref<2x6x6x32xi32, 1 : i32> linalg.copy ins(%subview : memref<2x6x6x32xi32, strided<[6272, 448, 32, 1], offset: ?>>) outs(%alloc : memref<2x6x6x32xi32, 1 : i32>) %alloc_2 = memref.alloc() : memref<3x3x32x4xi32, 1 : i32> linalg.copy ins(%subview_0 : memref<3x3x32x4xi32, strided<[6144, 2048, 64, 1], offset: ?>>) outs(%alloc_2 : memref<3x3x32x4xi32, 1 : i32>) %alloc_3 = memref.alloc() : memref<2x4x4x4xi32, 1 : i32> scf.forall (%arg3, %arg4) in (2, 4) { %subview_4 = memref.subview %alloc[%arg3, %arg4, 0, 0] [1, 3, 6, 32] [1, 1, 1, 1] : memref<2x6x6x32xi32, 1 : i32> to memref<1x3x6x32xi32, strided<[1152, 192, 32, 1], offset: ?>, 1 : i32> %subview_5 = memref.subview %alloc_3[%arg3, %arg4, 0, 0] [1, 1, 4, 4] [1, 1, 1, 1] : memref<2x4x4x4xi32, 1 : i32> to memref<1x1x4x4xi32, strided<[64, 16, 4, 1], offset: ?>, 1 : i32> %alloc_6 = memref.alloc() : memref<1x1x4x4xi32, 2 : i32> linalg.fill ins(%c0_i32 : i32) outs(%alloc_6 : memref<1x1x4x4xi32, 2 : i32>) scf.for %arg5 = %c0 to %c3 step %c1 { scf.for %arg6 = %c0 to %c3 step %c1 { scf.for %arg7 = %c0 to %c32 step %c8 { %subview_7 = memref.subview %subview_4[0, %arg5, %arg6, %arg7] [1, 1, 4, 8] [1, 1, 1, 1] : memref<1x3x6x32xi32, strided<[1152, 192, 32, 1], offset: ?>, 1 : i32> to memref<1x1x4x8xi32, strided<[1152, 192, 32, 1], offset: ?>, 1 : i32> %subview_8 = memref.subview %alloc_2[%arg5, %arg6, %arg7, 0] [1, 1, 8, 4] [1, 1, 1, 1] : memref<3x3x32x4xi32, 1 : i32> to memref<1x1x8x4xi32, strided<[384, 128, 4, 1], offset: ?>, 1 : i32> %alloc_9 = memref.alloc() : memref<1x1x4x8xi32, 2 : i32> linalg.copy ins(%subview_7 : memref<1x1x4x8xi32, strided<[1152, 192, 32, 1], offset: ?>, 1 : i32>) outs(%alloc_9 : memref<1x1x4x8xi32, 2 : i32>) %alloc_10 = memref.alloc() : memref<1x1x8x4xi32, 2 : i32> linalg.copy ins(%subview_8 : memref<1x1x8x4xi32, strided<[384, 128, 4, 1], offset: ?>, 1 : i32>) outs(%alloc_10 : memref<1x1x8x4xi32, 2 : i32>) %subview_11 = memref.subview %alloc_9[0, 0, 0, 0] [1, 1, 4, 8] [1, 1, 1, 1] : memref<1x1x4x8xi32, 2 : i32> to memref<1x4x8xi32, strided<[32, 8, 1]>, 2 : i32> %subview_12 = memref.subview %alloc_10[0, 0, 0, 0] [1, 1, 8, 4] [1, 1, 1, 1] : memref<1x1x8x4xi32, 2 : i32> to memref<1x8x4xi32, strided<[32, 4, 1]>, 2 : i32> %subview_13 = memref.subview %alloc_6[0, 0, 0, 0] [1, 1, 4, 4] [1, 1, 1, 1] : memref<1x1x4x4xi32, 2 : i32> to memref<1x4x4xi32, strided<[16, 4, 1]>, 2 : i32> linalg.conv_1d_nwc_wcf {dilations = dense<1> : vector<1xi64>, strides = dense<1> : vector<1xi64>} ins(%subview_11, %subview_12 : memref<1x4x8xi32, strided<[32, 8, 1]>, 2 : i32>, memref<1x8x4xi32, strided<[32, 4, 1]>, 2 : i32>) outs(%subview_13 : memref<1x4x4xi32, strided<[16, 4, 1]>, 2 : i32>) memref.dealloc %alloc_9 : memref<1x1x4x8xi32, 2 : i32> memref.dealloc %alloc_10 : memref<1x1x8x4xi32, 2 : i32> } } } linalg.copy ins(%alloc_6 : memref<1x1x4x4xi32, 2 : i32>) outs(%subview_5 : memref<1x1x4x4xi32, strided<[64, 16, 4, 1], offset: ?>, 1 : i32>) memref.dealloc %alloc_6 : memref<1x1x4x4xi32, 2 : i32> } linalg.copy ins(%alloc_3 : memref<2x4x4x4xi32, 1 : i32>) outs(%subview_1 : memref<2x4x4x4xi32, strided<[9216, 768, 64, 1], offset: ?>>) memref.dealloc %alloc : memref<2x6x6x32xi32, 1 : i32> memref.dealloc %alloc_2 : memref<3x3x32x4xi32, 1 : i32> memref.dealloc %alloc_3 : memref<2x4x4x4xi32, 1 : i32> } return } } ... // -----// IR Dump Before AMDAIENormalizeLoopBounds (iree-amdaie-normalize-loop-bounds) //----- // module { func.func @conv_2d_nhwc_hwcf_dispatch_0_conv_2d_nhwc_hwcf_2x12x12x64x3x3x32_i32() attributes {translation_info = #iree_codegen.translation_info} { %c768 = arith.constant 768 : index %c9216 = arith.constant 9216 : index %c16 = arith.constant 16 : index %c128 = arith.constant 128 : index %c384 = arith.constant 384 : index %c192 = arith.constant 192 : index %c1152 = arith.constant 1152 : index %c64 = arith.constant 64 : index %c2048 = arith.constant 2048 : index %c6144 = arith.constant 6144 : index %c4 = arith.constant 4 : index %c448 = arith.constant 448 : index %c6272 = arith.constant 6272 : index %c6 = arith.constant 6 : index %c2 = arith.constant 2 : index %c8 = arith.constant 8 : index %c32 = arith.constant 32 : index %c1 = arith.constant 1 : index %c3 = arith.constant 3 : index %c0_i32 = arith.constant 0 : i32 %c0 = arith.constant 0 : index %0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) alignment(64) offset(%c0) flags(ReadOnly) : memref<2x14x14x32xi32> %1 = amdaie.logicalobjectfifo.from_memref %0, {} : memref<2x14x14x32xi32> -> !amdaie.logicalobjectfifo> memref.assume_alignment %0, 64 : memref<2x14x14x32xi32> %2 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) alignment(64) offset(%c0) flags(ReadOnly) : memref<3x3x32x64xi32> %3 = amdaie.logicalobjectfifo.from_memref %2, {} : memref<3x3x32x64xi32> -> !amdaie.logicalobjectfifo> memref.assume_alignment %2, 64 : memref<3x3x32x64xi32> %4 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) alignment(64) offset(%c0) : memref<2x12x12x64xi32> %5 = amdaie.logicalobjectfifo.from_memref %4, {} : memref<2x12x12x64xi32> -> !amdaie.logicalobjectfifo> memref.assume_alignment %4, 64 : memref<2x12x12x64xi32> scf.forall (%arg0, %arg1, %arg2) = (0, 0, 0) to (12, 12, 64) step (4, 4, 4) { %alloc = memref.alloc() : memref<2x6x6x32xi32, 1 : i32> %6 = amdaie.logicalobjectfifo.from_memref %alloc, {} : memref<2x6x6x32xi32, 1 : i32> -> !amdaie.logicalobjectfifo> %7 = amdaie.logicalobjectfifo.from_memref %alloc, {} : memref<2x6x6x32xi32, 1 : i32> -> !amdaie.logicalobjectfifo> %8 = amdaie.dma_cpy_nd(%6[] [] [], %1[%c0, %arg0, %arg1, %c0] [%c2, %c6, %c6, %c32] [%c6272, %c448, %c32, %c1]) : (!amdaie.logicalobjectfifo>, !amdaie.logicalobjectfifo>) %alloc_0 = memref.alloc() : memref<3x3x32x4xi32, 1 : i32> %9 = amdaie.logicalobjectfifo.from_memref %alloc_0, {} : memref<3x3x32x4xi32, 1 : i32> -> !amdaie.logicalobjectfifo> %10 = amdaie.logicalobjectfifo.from_memref %alloc_0, {} : memref<3x3x32x4xi32, 1 : i32> -> !amdaie.logicalobjectfifo> %11 = amdaie.dma_cpy_nd(%9[] [] [], %3[%c0, %c0, %c0, %arg2] [%c3, %c3, %c32, %c4] [%c6144, %c2048, %c64, %c1]) : (!amdaie.logicalobjectfifo>, !amdaie.logicalobjectfifo>) %alloc_1 = memref.alloc() : memref<2x4x4x4xi32, 1 : i32> %12 = amdaie.logicalobjectfifo.from_memref %alloc_1, {} : memref<2x4x4x4xi32, 1 : i32> -> !amdaie.logicalobjectfifo> %13 = amdaie.logicalobjectfifo.from_memref %alloc_1, {} : memref<2x4x4x4xi32, 1 : i32> -> !amdaie.logicalobjectfifo> scf.forall (%arg3, %arg4) in (2, 4) { %alloc_2 = memref.alloc() : memref<1x1x4x4xi32, 2 : i32> %15 = amdaie.logicalobjectfifo.from_memref %alloc_2, {} : memref<1x1x4x4xi32, 2 : i32> -> !amdaie.logicalobjectfifo> linalg.fill ins(%c0_i32 : i32) outs(%alloc_2 : memref<1x1x4x4xi32, 2 : i32>) scf.for %arg5 = %c0 to %c3 step %c1 { scf.for %arg6 = %c0 to %c3 step %c1 { scf.for %arg7 = %c0 to %c32 step %c8 { %17 = affine.apply affine_map<()[s0, s1] -> (s0 + s1)>()[%arg4, %arg5] %alloc_3 = memref.alloc() : memref<1x1x4x8xi32, 2 : i32> %18 = amdaie.logicalobjectfifo.from_memref %alloc_3, {} : memref<1x1x4x8xi32, 2 : i32> -> !amdaie.logicalobjectfifo> %19 = amdaie.dma_cpy_nd(%18[] [] [], %7[%arg3, %17, %arg6, %arg7] [%c1, %c1, %c4, %c8] [%c1152, %c192, %c32, %c1]) : (!amdaie.logicalobjectfifo>, !amdaie.logicalobjectfifo>) %alloc_4 = memref.alloc() : memref<1x1x8x4xi32, 2 : i32> %20 = amdaie.logicalobjectfifo.from_memref %alloc_4, {} : memref<1x1x8x4xi32, 2 : i32> -> !amdaie.logicalobjectfifo> %21 = amdaie.dma_cpy_nd(%20[] [] [], %10[%arg5, %arg6, %arg7, %c0] [%c1, %c1, %c8, %c4] [%c384, %c128, %c4, %c1]) : (!amdaie.logicalobjectfifo>, !amdaie.logicalobjectfifo>) %subview = memref.subview %alloc_3[0, 0, 0, 0] [1, 1, 4, 8] [1, 1, 1, 1] : memref<1x1x4x8xi32, 2 : i32> to memref<1x4x8xi32, strided<[32, 8, 1]>, 2 : i32> %subview_5 = memref.subview %alloc_4[0, 0, 0, 0] [1, 1, 8, 4] [1, 1, 1, 1] : memref<1x1x8x4xi32, 2 : i32> to memref<1x8x4xi32, strided<[32, 4, 1]>, 2 : i32> %subview_6 = memref.subview %alloc_2[0, 0, 0, 0] [1, 1, 4, 4] [1, 1, 1, 1] : memref<1x1x4x4xi32, 2 : i32> to memref<1x4x4xi32, strided<[16, 4, 1]>, 2 : i32> linalg.conv_1d_nwc_wcf {dilations = dense<1> : vector<1xi64>, strides = dense<1> : vector<1xi64>} ins(%subview, %subview_5 : memref<1x4x8xi32, strided<[32, 8, 1]>, 2 : i32>, memref<1x8x4xi32, strided<[32, 4, 1]>, 2 : i32>) outs(%subview_6 : memref<1x4x4xi32, strided<[16, 4, 1]>, 2 : i32>) memref.dealloc %alloc_3 : memref<1x1x4x8xi32, 2 : i32> memref.dealloc %alloc_4 : memref<1x1x8x4xi32, 2 : i32> } } } %16 = amdaie.dma_cpy_nd(%12[%arg3, %arg4, %c0, %c0] [%c1, %c1, %c4, %c4] [%c64, %c16, %c4, %c1], %15[] [] []) : (!amdaie.logicalobjectfifo>, !amdaie.logicalobjectfifo>) memref.dealloc %alloc_2 : memref<1x1x4x4xi32, 2 : i32> } %14 = amdaie.dma_cpy_nd(%5[%c0, %arg0, %arg1, %arg2] [%c2, %c4, %c4, %c4] [%c9216, %c768, %c64, %c1], %13[] [] []) : (!amdaie.logicalobjectfifo>, !amdaie.logicalobjectfifo>) memref.dealloc %alloc : memref<2x6x6x32xi32, 1 : i32> memref.dealloc %alloc_0 : memref<3x3x32x4xi32, 1 : i32> memref.dealloc %alloc_1 : memref<2x4x4x4xi32, 1 : i32> } return } } ... // -----// IR Dump Before AMDAIEInsertCores (iree-amdaie-insert-cores) //----- // module { func.func @conv_2d_nhwc_hwcf_dispatch_0_conv_2d_nhwc_hwcf_2x12x12x64x3x3x32_i32() attributes {translation_info = #iree_codegen.translation_info} { %c768 = arith.constant 768 : index %c9216 = arith.constant 9216 : index %c16 = arith.constant 16 : index %c128 = arith.constant 128 : index %c384 = arith.constant 384 : index %c192 = arith.constant 192 : index %c1152 = arith.constant 1152 : index %c64 = arith.constant 64 : index %c2048 = arith.constant 2048 : index %c6144 = arith.constant 6144 : index %c4 = arith.constant 4 : index %c448 = arith.constant 448 : index %c6272 = arith.constant 6272 : index %c6 = arith.constant 6 : index %c2 = arith.constant 2 : index %c8 = arith.constant 8 : index %c32 = arith.constant 32 : index %c1 = arith.constant 1 : index %c3 = arith.constant 3 : index %c0_i32 = arith.constant 0 : i32 %c0 = arith.constant 0 : index %0 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) alignment(64) offset(%c0) flags(ReadOnly) : memref<2x14x14x32xi32> %1 = amdaie.logicalobjectfifo.from_memref %0, {} : memref<2x14x14x32xi32> -> !amdaie.logicalobjectfifo> memref.assume_alignment %0, 64 : memref<2x14x14x32xi32> %2 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) alignment(64) offset(%c0) flags(ReadOnly) : memref<3x3x32x64xi32> %3 = amdaie.logicalobjectfifo.from_memref %2, {} : memref<3x3x32x64xi32> -> !amdaie.logicalobjectfifo> memref.assume_alignment %2, 64 : memref<3x3x32x64xi32> %4 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) alignment(64) offset(%c0) : memref<2x12x12x64xi32> %5 = amdaie.logicalobjectfifo.from_memref %4, {} : memref<2x12x12x64xi32> -> !amdaie.logicalobjectfifo> memref.assume_alignment %4, 64 : memref<2x12x12x64xi32> %c3_0 = arith.constant 3 : index %c1_1 = arith.constant 1 : index %c3_2 = arith.constant 3 : index %c1_3 = arith.constant 1 : index %c16_4 = arith.constant 16 : index %c1_5 = arith.constant 1 : index scf.forall (%arg0, %arg1, %arg2) in (3, 3, 16) { %6 = affine.apply affine_map<(d0) -> (d0 * 4)>(%arg2) %7 = affine.apply affine_map<(d0) -> (d0 * 4)>(%arg1) %8 = affine.apply affine_map<(d0) -> (d0 * 4)>(%arg0) %alloc = memref.alloc() : memref<2x6x6x32xi32, 1 : i32> %9 = amdaie.logicalobjectfifo.from_memref %alloc, {} : memref<2x6x6x32xi32, 1 : i32> -> !amdaie.logicalobjectfifo> %10 = amdaie.logicalobjectfifo.from_memref %alloc, {} : memref<2x6x6x32xi32, 1 : i32> -> !amdaie.logicalobjectfifo> %11 = amdaie.dma_cpy_nd(%9[] [] [], %1[%c0, %8, %7, %c0] [%c2, %c6, %c6, %c32] [%c6272, %c448, %c32, %c1]) : (!amdaie.logicalobjectfifo>, !amdaie.logicalobjectfifo>) %alloc_6 = memref.alloc() : memref<3x3x32x4xi32, 1 : i32> %12 = amdaie.logicalobjectfifo.from_memref %alloc_6, {} : memref<3x3x32x4xi32, 1 : i32> -> !amdaie.logicalobjectfifo> %13 = amdaie.logicalobjectfifo.from_memref %alloc_6, {} : memref<3x3x32x4xi32, 1 : i32> -> !amdaie.logicalobjectfifo> %14 = amdaie.dma_cpy_nd(%12[] [] [], %3[%c0, %c0, %c0, %6] [%c3, %c3, %c32, %c4] [%c6144, %c2048, %c64, %c1]) : (!amdaie.logicalobjectfifo>, !amdaie.logicalobjectfifo>) %alloc_7 = memref.alloc() : memref<2x4x4x4xi32, 1 : i32> %15 = amdaie.logicalobjectfifo.from_memref %alloc_7, {} : memref<2x4x4x4xi32, 1 : i32> -> !amdaie.logicalobjectfifo> %16 = amdaie.logicalobjectfifo.from_memref %alloc_7, {} : memref<2x4x4x4xi32, 1 : i32> -> !amdaie.logicalobjectfifo> scf.forall (%arg3, %arg4) in (2, 4) { %alloc_8 = memref.alloc() : memref<1x1x4x4xi32, 2 : i32> %18 = amdaie.logicalobjectfifo.from_memref %alloc_8, {} : memref<1x1x4x4xi32, 2 : i32> -> !amdaie.logicalobjectfifo> linalg.fill ins(%c0_i32 : i32) outs(%alloc_8 : memref<1x1x4x4xi32, 2 : i32>) scf.for %arg5 = %c0 to %c3 step %c1 { scf.for %arg6 = %c0 to %c3 step %c1 { %c4_9 = arith.constant 4 : index %c1_10 = arith.constant 1 : index scf.for %arg7 = %c0 to %c4_9 step %c1_10 { %20 = affine.apply affine_map<(d0) -> (d0 * 8)>(%arg7) %21 = affine.apply affine_map<()[s0, s1] -> (s0 + s1)>()[%arg4, %arg5] %alloc_11 = memref.alloc() : memref<1x1x4x8xi32, 2 : i32> %22 = amdaie.logicalobjectfifo.from_memref %alloc_11, {} : memref<1x1x4x8xi32, 2 : i32> -> !amdaie.logicalobjectfifo> %23 = amdaie.dma_cpy_nd(%22[] [] [], %10[%arg3, %21, %arg6, %20] [%c1, %c1, %c4, %c8] [%c1152, %c192, %c32, %c1]) : (!amdaie.logicalobjectfifo>, !amdaie.logicalobjectfifo>) %alloc_12 = memref.alloc() : memref<1x1x8x4xi32, 2 : i32> %24 = amdaie.logicalobjectfifo.from_memref %alloc_12, {} : memref<1x1x8x4xi32, 2 : i32> -> !amdaie.logicalobjectfifo> %25 = amdaie.dma_cpy_nd(%24[] [] [], %13[%arg5, %arg6, %20, %c0] [%c1, %c1, %c8, %c4] [%c384, %c128, %c4, %c1]) : (!amdaie.logicalobjectfifo>, !amdaie.logicalobjectfifo>) %subview = memref.subview %alloc_11[0, 0, 0, 0] [1, 1, 4, 8] [1, 1, 1, 1] : memref<1x1x4x8xi32, 2 : i32> to memref<1x4x8xi32, strided<[32, 8, 1]>, 2 : i32> %subview_13 = memref.subview %alloc_12[0, 0, 0, 0] [1, 1, 8, 4] [1, 1, 1, 1] : memref<1x1x8x4xi32, 2 : i32> to memref<1x8x4xi32, strided<[32, 4, 1]>, 2 : i32> %subview_14 = memref.subview %alloc_8[0, 0, 0, 0] [1, 1, 4, 4] [1, 1, 1, 1] : memref<1x1x4x4xi32, 2 : i32> to memref<1x4x4xi32, strided<[16, 4, 1]>, 2 : i32> linalg.conv_1d_nwc_wcf {dilations = dense<1> : vector<1xi64>, strides = dense<1> : vector<1xi64>} ins(%subview, %subview_13 : memref<1x4x8xi32, strided<[32, 8, 1]>, 2 : i32>, memref<1x8x4xi32, strided<[32, 4, 1]>, 2 : i32>) outs(%subview_14 : memref<1x4x4xi32, strided<[16, 4, 1]>, 2 : i32>) memref.dealloc %alloc_11 : memref<1x1x4x8xi32, 2 : i32> memref.dealloc %alloc_12 : memref<1x1x8x4xi32, 2 : i32> } } } %19 = amdaie.dma_cpy_nd(%15[%arg3, %arg4, %c0, %c0] [%c1, %c1, %c4, %c4] [%c64, %c16, %c4, %c1], %18[] [] []) : (!amdaie.logicalobjectfifo>, !amdaie.logicalobjectfifo>) memref.dealloc %alloc_8 : memref<1x1x4x4xi32, 2 : i32> } %17 = amdaie.dma_cpy_nd(%5[%c0, %8, %7, %6] [%c2, %c4, %c4, %c4] [%c9216, %c768, %c64, %c1], %16[] [] []) : (!amdaie.logicalobjectfifo>, !amdaie.logicalobjectfifo>) memref.dealloc %alloc : memref<2x6x6x32xi32, 1 : i32> memref.dealloc %alloc_6 : memref<3x3x32x4xi32, 1 : i32> memref.dealloc %alloc_7 : memref<2x4x4x4xi32, 1 : i32> } return } } ... 0 libIREECompiler.so 0x00007fedda9998b7 llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) + 39 1 libIREECompiler.so 0x00007fedda997af0 llvm::sys::RunSignalHandlers() + 80 2 libIREECompiler.so 0x00007fedda999f7a 3 libc.so.6 0x00007fedd3e42520 4 libIREECompiler.so 0x00007fedda9ebfb3 mlir::ArrayAttr::getValue() const + 3 5 libIREECompiler.so 0x00007feddbe92dc9 6 libIREECompiler.so 0x00007fedda8dea5e 7 libIREECompiler.so 0x00007fedda8dea5e 8 libIREECompiler.so 0x00007fedda8dea5e 9 libIREECompiler.so 0x00007feddbe92c1d 10 libIREECompiler.so 0x00007feddab40115 mlir::detail::OpToOpPassAdaptor::run(mlir::Pass*, mlir::Operation*, mlir::AnalysisManager, bool, unsigned int) + 629 11 libIREECompiler.so 0x00007feddab40898 mlir::detail::OpToOpPassAdaptor::runPipeline(mlir::OpPassManager&, mlir::Operation*, mlir::AnalysisManager, bool, unsigned int, mlir::PassInstrumentor*, mlir::PassInstrumentation::PipelineParentInfo const*) + 328 12 libIREECompiler.so 0x00007feddab42105 mlir::detail::OpToOpPassAdaptor::runOnOperationImpl(bool) + 421 13 libIREECompiler.so 0x00007feddab402b7 mlir::detail::OpToOpPassAdaptor::run(mlir::Pass*, mlir::Operation*, mlir::AnalysisManager, bool, unsigned int) + 1047 14 libIREECompiler.so 0x00007feddab40898 mlir::detail::OpToOpPassAdaptor::runPipeline(mlir::OpPassManager&, mlir::Operation*, mlir::AnalysisManager, bool, unsigned int, mlir::PassInstrumentor*, mlir::PassInstrumentation::PipelineParentInfo const*) + 328 15 libIREECompiler.so 0x00007feddab44d61 16 libIREECompiler.so 0x00007feddc53231d 17 libIREECompiler.so 0x00007feddab40115 mlir::detail::OpToOpPassAdaptor::run(mlir::Pass*, mlir::Operation*, mlir::AnalysisManager, bool, unsigned int) + 629 18 libIREECompiler.so 0x00007feddab40898 mlir::detail::OpToOpPassAdaptor::runPipeline(mlir::OpPassManager&, mlir::Operation*, mlir::AnalysisManager, bool, unsigned int, mlir::PassInstrumentor*, mlir::PassInstrumentation::PipelineParentInfo const*) + 328 19 libIREECompiler.so 0x00007feddab42105 mlir::detail::OpToOpPassAdaptor::runOnOperationImpl(bool) + 421 20 libIREECompiler.so 0x00007feddab402b7 mlir::detail::OpToOpPassAdaptor::run(mlir::Pass*, mlir::Operation*, mlir::AnalysisManager, bool, unsigned int) + 1047 21 libIREECompiler.so 0x00007feddab40898 mlir::detail::OpToOpPassAdaptor::runPipeline(mlir::OpPassManager&, mlir::Operation*, mlir::AnalysisManager, bool, unsigned int, mlir::PassInstrumentor*, mlir::PassInstrumentation::PipelineParentInfo const*) + 328 22 libIREECompiler.so 0x00007feddab44d61 23 libIREECompiler.so 0x00007feddc532e1a 24 libIREECompiler.so 0x00007feddab40115 mlir::detail::OpToOpPassAdaptor::run(mlir::Pass*, mlir::Operation*, mlir::AnalysisManager, bool, unsigned int) + 629 25 libIREECompiler.so 0x00007feddab40898 mlir::detail::OpToOpPassAdaptor::runPipeline(mlir::OpPassManager&, mlir::Operation*, mlir::AnalysisManager, bool, unsigned int, mlir::PassInstrumentor*, mlir::PassInstrumentation::PipelineParentInfo const*) + 328 26 libIREECompiler.so 0x00007feddab42105 mlir::detail::OpToOpPassAdaptor::runOnOperationImpl(bool) + 421 27 libIREECompiler.so 0x00007feddab402b7 mlir::detail::OpToOpPassAdaptor::run(mlir::Pass*, mlir::Operation*, mlir::AnalysisManager, bool, unsigned int) + 1047 28 libIREECompiler.so 0x00007feddab40898 mlir::detail::OpToOpPassAdaptor::runPipeline(mlir::OpPassManager&, mlir::Operation*, mlir::AnalysisManager, bool, unsigned int, mlir::PassInstrumentor*, mlir::PassInstrumentation::PipelineParentInfo const*) + 328 29 libIREECompiler.so 0x00007feddab42c69 mlir::PassManager::run(mlir::Operation*) + 985 30 libIREECompiler.so 0x00007fedda8ede00 ireeCompilerInvocationPipeline + 3408 31 libIREECompiler.so 0x00007feddab06648 32 libIREECompiler.so 0x00007feddab05ed1 33 libc.so.6 0x00007fedd3e29d90 34 libc.so.6 0x00007fedd3e29e40 __libc_start_main + 128 35 iree-compile 0x000055a1626e36d5 Traceback (most recent call last): File "/proj/gdba/jamesn/workspace/iree-amd-aie/build_tools/ci/cpu_comparison/./run_test.py", line 743, in all_tests( File "/proj/gdba/jamesn/workspace/iree-amd-aie/build_tools/ci/cpu_comparison/./run_test.py", line 616, in all_tests aie_vs_llvm_cpu( File "/proj/gdba/jamesn/workspace/iree-amd-aie/build_tools/ci/cpu_comparison/./run_test.py", line 492, in aie_vs_llvm_cpu aie_vs_baseline( File "/proj/gdba/jamesn/workspace/iree-amd-aie/build_tools/ci/cpu_comparison/./run_test.py", line 428, in aie_vs_baseline aie_artefacts = generate_aie_artefacts( File "/proj/gdba/jamesn/workspace/iree-amd-aie/build_tools/ci/cpu_comparison/./run_test.py", line 143, in generate_aie_artefacts shell_out(compilation_flags, config.output_dir, config.verbose) File "/proj/gdba/jamesn/workspace/iree-amd-aie/build_tools/ci/cpu_comparison/./run_test.py", line 84, in shell_out raise RuntimeError(