Skip to content

Commit

Permalink
Merge branch 'main' into strided_access_on_target_new
Browse files Browse the repository at this point in the history
  • Loading branch information
yzhang93 authored Oct 2, 2024
2 parents 91aa862 + b7f8fc4 commit 721e7b9
Show file tree
Hide file tree
Showing 37 changed files with 688 additions and 223 deletions.
2 changes: 2 additions & 0 deletions build_tools/ci/cpu_comparison/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,8 @@ def generate_aie_vmfb(
f"--iree-amd-aie-vitis-install-dir={config.vitis_dir}",
f"--iree-hal-dump-executable-files-to={config.output_dir}",
"--iree-scheduling-optimize-bindings=false",
"--iree-hal-memoization=false",
"--iree-hal-indirect-command-buffers=false",
f"--mlir-disable-threading",
"--mlir-elide-resource-strings-if-larger=10",
]
Expand Down
4 changes: 4 additions & 0 deletions build_tools/ci/run_matmul_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -405,6 +405,8 @@ function run_matmul_test() {
--iree-amd-aie-enable-chess=${use_chess} \
--iree-amdaie-enable-packet-flow=${enable_packet_flow} \
--iree-hal-dump-executable-files-to=$PWD \
--iree-hal-memoization=false \
--iree-hal-indirect-command-buffers=false \
--mlir-elide-resource-strings-if-larger=10 \
--iree-amd-aie-show-invoked-commands"

Expand All @@ -416,6 +418,8 @@ function run_matmul_test() {
set +e

echo "**** Generating matmul .vmfb file for ${name} ****"
${IREE_COMPILE_EXE} "${matmul_ir}" \
${compilation_flags} --compile-to=vm -o "${matmul_vmfb}.vm"
${IREE_COMPILE_EXE} "${matmul_ir}" \
${compilation_flags} -o "${matmul_vmfb}"

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,12 @@ def AMDAIE_ConnectionType: I32EnumAttr<"ConnectionType",
]
> {
let cppNamespace = "mlir::iree_compiler::AMDAIE";
let genSpecializedAttr = 0;
}

def AMDAIE_ConnectionTypeAttr
: EnumAttr<AMDAIE_Dialect, AMDAIE_ConnectionType, "connection_type">;

def AMDAIE_CopyOpOperateOn: I32EnumAttr<"CopyOpOperateOn",
"Enables templated functions that operate on either source or target of "
"copy/dma operations",
Expand Down
21 changes: 19 additions & 2 deletions compiler/plugins/target/AMD-AIE/iree-amd-aie/IR/AMDAIEOps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -445,13 +445,14 @@ void CircularDmaCpyNdOp::getCanonicalizationPatterns(RewritePatternSet &results,

void ConnectionOp::build(mlir::OpBuilder &b, mlir::OperationState &result,
Value target, Value source) {
build(b, result, target, {}, source, {}, nullptr);
build(b, result, target, {}, source, {}, nullptr, nullptr);
}

void ConnectionOp::build(mlir::OpBuilder &b, mlir::OperationState &result,
Value target, ValueRange targetChannels, Value source,
ValueRange sourceChannels) {
build(b, result, target, targetChannels, source, sourceChannels, nullptr);
build(b, result, target, targetChannels, source, sourceChannels, nullptr,
nullptr);
}

FailureOr<AMDAIE::NpuCircularDmaCpyNdOp>
Expand All @@ -469,6 +470,22 @@ ConnectionOp::getNpuCircularDmaCpyNdUser() {
return npuDmaUsers[0];
}

std::optional<FlowOp> ConnectionOp::getFlowOp() {
return dyn_cast_if_present<AMDAIE::FlowOp>(getFlow().getDefiningOp());
}

//===----------------------------------------------------------------------===//
// AMDAIE_FlowOp
//===----------------------------------------------------------------------===//

LogicalResult FlowOp::verify() {
if (getSources().size() > 1 && getTargets().size() > 1) {
return emitOpError()
<< "multiple source and multiple targets is unsupported";
}
return success();
}

//===----------------------------------------------------------------------===//
// AMDAIE_LockOp
//===----------------------------------------------------------------------===//
Expand Down
53 changes: 47 additions & 6 deletions compiler/plugins/target/AMD-AIE/iree-amd-aie/IR/AMDAIEOps.td
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,42 @@ def AMDAIE_EndOp: AMDAIE_Op<"end", [Terminator]> {
let assemblyFormat = [{ attr-dict }];
}

def AMDAIE_FlowOp: AMDAIE_Op<"flow", [AttrSizedOperandSegments]>,
Results<(outs Index)> {
let summary = "The data connection between a set of source and target "
"channels.";
let description = [{
This operation represents a connection between source and target channels.
This is used to describe a logical data routing configuration between
channels, to be solved by the router for actual stream switch
configurations that implements it. The multiple sources and targets can
describe different connection patterns:
- Single source and multiple targets describes a data broadcasting pattern.
- Multiple sources and single target describes a data merge pattern.
- Multiple sources and multiple targets is not supported.

Example:

```mlir
%channel = amdaie.channel(%tile_0_0, 0, port_type = DMA)
%channel_1 = amdaie.channel(%tile_0_1, 0, port_type = DMA)
%channel_2 = amdaie.channel(%tile_1_1, 0, port_type = DMA)
%0 = amdaie.flow({%channel} -> {%channel_1, %channel_2})
{is_packet_flow = true, packet_id = 0 : ui8}
```
}];

let arguments = (
ins Variadic<Index>:$sources,
Variadic<Index>:$targets,
BoolAttr:$is_packet_flow,
OptionalAttr<UI8Attr>:$packet_id
);

let assemblyFormat = [{ `(` `{` $sources `}` `->` `{` $targets `}` `)` attr-dict }];
let hasVerifier = 1;
}

def AMDAIE_TileOp: AMDAIE_Op<"tile", [
Pure,
DeclareOpInterfaceMethods<OpAsmOpInterface, ["getAsmResultNames"]>
Expand Down Expand Up @@ -319,20 +355,23 @@ def AMDAIE_ChannelOp: AMDAIE_Op<"channel", [

```mlir
%tile = amdaie.tile(%c0, %c0)
%channel = amdaie.channel(%tile, 0)
%channel = amdaie.channel(%tile, 0, port_type = DMA)
```
}];

let arguments = (
ins Index:$tile,
ConfinedAttr<I8Attr, [IntMinValue<0>]>:$value
ConfinedAttr<I8Attr, [IntMinValue<0>]>:$value,
StrmSwPortTypeAttr:$port_type
);

let extraClassDeclaration = [{
TileOp getTileOp();
}];

let assemblyFormat = [{ `(` $tile `,` $value `)` attr-dict }];
let assemblyFormat = [{
`(` $tile `,` $value `,` `port_type` `=` $port_type `)` attr-dict
}];
}

//===----------------------------------------------------------------------===//
Expand Down Expand Up @@ -733,7 +772,8 @@ def AMDAIE_ConnectionOp: AMDAIE_Op<"connection",
Variadic<Index>:$target_channels,
AnyAMDAIELogicalObjectFifoType:$source,
Variadic<Index>:$source_channels,
OptionalAttr<AMDAIE_ConnectionType>:$connection_type
OptionalAttr<AMDAIE_ConnectionTypeAttr>:$connection_type,
Optional<Index>:$flow

);
let results = (outs Index:$result);
Expand All @@ -745,8 +785,8 @@ def AMDAIE_ConnectionOp: AMDAIE_Op<"connection",
`,`
$source
( ` ` `{` $source_channels^ `}` )?
( `,` `connection_type` `=` $connection_type^ )?
`)`
( `,` `flow` `=` $flow^ )?
`)`
attr-dict
`:` `(` type($target) `,` type($source) `)`
}];
Expand All @@ -758,6 +798,7 @@ def AMDAIE_ConnectionOp: AMDAIE_Op<"connection",
];

let extraClassDeclaration = [{
std::optional<FlowOp> getFlowOp();
Value getSourceMemref() { return getSource(); }
Value getTargetMemref() { return getTarget(); }
Type getSourceType() { return getSource().getType(); }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,29 @@ func.func @dma_cpy_nd_mixed(%arg0: !amdaie.logicalobjectfifo<memref<1x1x8x16xi32

// -----

// CHECK-LABEL: func.func @flow
// CHECK: %[[C0:.*]] = arith.constant 0 : index
// CHECK: %[[C1:.*]] = arith.constant 1 : index
// CHECK: %[[TILE_0_0:.*]] = amdaie.tile(%[[C0]], %[[C0]])
// CHECK: %[[TILE_0_1:.*]] = amdaie.tile(%[[C0]], %[[C1]])
// CHECK: %[[CHANNEL:.*]] = amdaie.channel(%[[TILE_0_0]], 0, port_type = DMA)
// CHECK: %[[CHANNEL_1:.*]] = amdaie.channel(%[[TILE_0_1]], 0, port_type = DMA)
// CHECK: amdaie.flow({%[[CHANNEL]]} -> {%[[CHANNEL_1]]}) {is_packet_flow = false}
// CHECK: amdaie.flow({%[[CHANNEL]]} -> {%[[CHANNEL_1]]}) {is_packet_flow = true, packet_id = 1 : ui8}
func.func @flow() {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%tile_0_0 = amdaie.tile(%c0, %c0)
%tile_0_1 = amdaie.tile(%c0, %c1)
%channel = amdaie.channel(%tile_0_0, 0, port_type = DMA)
%channel_1 = amdaie.channel(%tile_0_1, 0, port_type = DMA)
%0 = amdaie.flow({%channel} -> {%channel_1}) {is_packet_flow = false}
%1 = amdaie.flow({%channel} -> {%channel_1}) {is_packet_flow = true, packet_id = 1 : ui8}
return
}

// -----

// CHECK-LABEL: func.func @lock
// CHECK: %[[C0:.*]] = arith.constant 0 : index
// CHECK: %[[TILE_0:.*]] = amdaie.tile(%[[C0]], %[[C0]])
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// This pipeline is obtained by going into Passes.cpp, and dumping the pass pipeline (at the end of addAMDAIEObjectFifoLoweringPasses) using `passManager.dump()`. This test is included, as it can be useful to have a reference in IR of all the passes that are run.

// RUN: iree-opt --pass-pipeline="builtin.module(fold-memref-alias-ops,iree-amdaie-convert-to-dma,iree-amdaie-normalize-loop-bounds,iree-amdaie-insert-cores,iree-amdaie-localize-logicalobjectfifo,cse,iree-amdaie-distribute-cores-and-objectfifos,cse,canonicalize{ max-iterations=10 max-num-rewrites=-1 region-simplify=normal test-convergence=false top-down=true},iree-amdaie-split-logical-objectfifos-for-connection-reuse,cse,canonicalize{ max-iterations=10 max-num-rewrites=-1 region-simplify=normal test-convergence=false top-down=true},iree-amdaie-dma-to-circular-dma,func.func(iree-amdaie-create-aie-workgroup),cse,iree-amdaie-dma-cse,iree-amdaie-hoist-logical-objectfifo,iree-amdaie-canonicalize-doubly-strided-op{fold-single-dims=false},iree-amdaie-flatten-logicalobjectfifo,iree-amdaie-assign-logical-objectfifo-depth{l1-buffer-depth=2 l2-buffer-depth=2 l3-buffer-depth=1},iree-amdaie-access-to-acquire-release,iree-amdaie-none-access-to-temporary-buffer,iree-amdaie-assign-connection-types,cse,canonicalize{ max-iterations=10 max-num-rewrites=-1 region-simplify=normal test-convergence=false top-down=true},iree-amdaie-dma-composition{only-zero-stride-on-outer-dim=true},cse,canonicalize{ max-iterations=10 max-num-rewrites=-1 region-simplify=normal test-convergence=false top-down=true},iree-amdaie-dma-cse,iree-amdaie-assign-npu-dma-bd-ids,cse,canonicalize{ max-iterations=10 max-num-rewrites=-1 region-simplify=normal test-convergence=false top-down=true},iree-amdaie-controlcode-loop-unroll,cse,canonicalize{ max-iterations=10 max-num-rewrites=-1 region-simplify=normal test-convergence=false top-down=true},iree-amdaie-dma-cse,iree-amdaie-canonicalize-doubly-strided-op{fold-single-dims=false},canonicalize{ max-iterations=10 max-num-rewrites=-1 region-simplify=normal test-convergence=false top-down=true},iree-amdaie-convert-core-forall-to-for,canonicalize{ max-iterations=10 max-num-rewrites=-1 region-simplify=normal test-convergence=false top-down=true},iree-amdaie-assign-channels,cse,canonicalize{ max-iterations=10 max-num-rewrites=-1 region-simplify=normal test-convergence=false top-down=true},iree-amdaie-objfifo-bufferization,iree-amdaie-acquire-release-to-use-lock,iree-amdaie-canonicalize-npu-dma-cpy-nd{nb-dimensions=4},canonicalize{ max-iterations=10 max-num-rewrites=-1 region-simplify=normal test-convergence=false top-down=true},iree-amdaie-sink-into-core,canonicalize{ max-iterations=10 max-num-rewrites=-1 region-simplify=normal test-convergence=false top-down=true},iree-amdaie-lower-to-aie,iree-amdaie-remove-memoryspace)" --split-input-file %s | FileCheck %s
// RUN: iree-opt --pass-pipeline="builtin.module(fold-memref-alias-ops,iree-amdaie-convert-to-dma,iree-amdaie-normalize-loop-bounds,iree-amdaie-insert-cores,iree-amdaie-localize-logicalobjectfifo,cse,iree-amdaie-distribute-cores-and-objectfifos,cse,canonicalize{ max-iterations=10 max-num-rewrites=-1 region-simplify=normal test-convergence=false top-down=true},iree-amdaie-split-logical-objectfifos-for-connection-reuse,cse,canonicalize{ max-iterations=10 max-num-rewrites=-1 region-simplify=normal test-convergence=false top-down=true},iree-amdaie-dma-to-circular-dma,func.func(iree-amdaie-create-aie-workgroup),cse,iree-amdaie-dma-cse,iree-amdaie-hoist-logical-objectfifo,iree-amdaie-canonicalize-doubly-strided-op{fold-single-dims=false},iree-amdaie-flatten-logicalobjectfifo,iree-amdaie-assign-logical-objectfifo-depth{l1-buffer-depth=2 l2-buffer-depth=2 l3-buffer-depth=1},iree-amdaie-access-to-acquire-release,iree-amdaie-none-access-to-temporary-buffer,iree-amdaie-assign-connection-types,cse,canonicalize{ max-iterations=10 max-num-rewrites=-1 region-simplify=normal test-convergence=false top-down=true},iree-amdaie-dma-composition{only-zero-stride-on-outer-dim=true},cse,canonicalize{ max-iterations=10 max-num-rewrites=-1 region-simplify=normal test-convergence=false top-down=true},iree-amdaie-dma-cse,iree-amdaie-assign-npu-dma-bd-ids,cse,canonicalize{ max-iterations=10 max-num-rewrites=-1 region-simplify=normal test-convergence=false top-down=true},iree-amdaie-controlcode-loop-unroll,cse,canonicalize{ max-iterations=10 max-num-rewrites=-1 region-simplify=normal test-convergence=false top-down=true},iree-amdaie-dma-cse,iree-amdaie-canonicalize-doubly-strided-op{fold-single-dims=false},canonicalize{ max-iterations=10 max-num-rewrites=-1 region-simplify=normal test-convergence=false top-down=true},iree-amdaie-convert-core-forall-to-for,canonicalize{ max-iterations=10 max-num-rewrites=-1 region-simplify=normal test-convergence=false top-down=true},iree-amdaie-assign-channels,cse,canonicalize{ max-iterations=10 max-num-rewrites=-1 region-simplify=normal test-convergence=false top-down=true},iree-amdaie-objfifo-bufferization,iree-amdaie-connection-to-flow,iree-amdaie-assign-packet-ids,iree-amdaie-acquire-release-to-use-lock,iree-amdaie-canonicalize-npu-dma-cpy-nd{nb-dimensions=4},canonicalize{ max-iterations=10 max-num-rewrites=-1 region-simplify=normal test-convergence=false top-down=true},iree-amdaie-sink-into-core,canonicalize{ max-iterations=10 max-num-rewrites=-1 region-simplify=normal test-convergence=false top-down=true},iree-amdaie-lower-to-aie,iree-amdaie-remove-memoryspace)" --split-input-file %s | FileCheck %s



Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,20 +45,20 @@ LogicalResult assignChannels(AMDAIE::WorkgroupOp workgroupOp) {
for (Value tile : sourceLogicalObjFifo.getTiles()) {
uint8_t channel = generator.getProducerDMAChannel(tile);
auto channelOp = rewriter.create<AMDAIE::ChannelOp>(
rewriter.getUnknownLoc(), tile, channel);
rewriter.getUnknownLoc(), tile, channel, StrmSwPortType::DMA);
sourceChannels.push_back(channelOp.getResult());
}
SmallVector<Value> targetChannels;
for (Value tile : targetLogicalObjFifo.getTiles()) {
uint8_t channel = generator.getConsumerDMAChannel(tile);
auto channelOp = rewriter.create<AMDAIE::ChannelOp>(
rewriter.getUnknownLoc(), tile, channel);
rewriter.getUnknownLoc(), tile, channel, StrmSwPortType::DMA);
targetChannels.push_back(channelOp.getResult());
}
rewriter.replaceOpWithNewOp<AMDAIE::ConnectionOp>(
connectionOp, connectionOp.getTarget(), targetChannels,
connectionOp.getSource(), sourceChannels,
connectionOp.getConnectionTypeAttr());
connectionOp.getConnectionTypeAttr(), /*flow*/ nullptr);
}
return success();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ void AMDAIEAssignConnectionTypesPass::runOnOperation() {
rewriter.replaceOpWithNewOp<AMDAIE::ConnectionOp>(
connectionOp, connectionOp.getTarget(),
connectionOp.getTargetChannels(), connectionOp.getSource(),
connectionOp.getSourceChannels(), connectionTypeAttr);
connectionOp.getSourceChannels(), connectionTypeAttr, /*flow*/ nullptr);
return WalkResult::advance();
});
if (res.wasInterrupted()) return signalPassFailure();
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
// Copyright 2024 The IREE Authors
//
// Licensed under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

#include "iree-amd-aie/IR/AMDAIEOps.h"
#include "iree-amd-aie/Transforms/AMDAIEUtils.h"
#include "iree-amd-aie/Transforms/Passes.h"
#include "iree-amd-aie/Transforms/Transforms.h"

#define DEBUG_TYPE "iree-amdaie-assign-packet-ids"

namespace mlir::iree_compiler::AMDAIE {

namespace {

class AMDAIEAssignPacketIdsPass
: public impl::AMDAIEAssignPacketIdsBase<AMDAIEAssignPacketIdsPass> {
public:
void getDependentDialects(DialectRegistry &registry) const override {
registry.insert<AMDAIEDialect>();
}
void runOnOperation() override;
};

void AMDAIEAssignPacketIdsPass::runOnOperation() {
Operation *parentOp = getOperation();
IRRewriter rewriter(parentOp->getContext());
auto targetAttr = IREE::HAL::ExecutableTargetAttr::lookup(parentOp);
std::optional<AMDAIEDevice> maybeDevice = getConfigAMDAIEDevice(targetAttr);
if (!maybeDevice) {
parentOp->emitOpError()
<< "has no AMDAIEDevice in the target attribute configuration. This "
"device-specific information is required to assign packet IDs "
"within the resource constraints";
return signalPassFailure();
}
AMDAIE::AMDAIEDeviceModel deviceModel =
AMDAIE::getDeviceModel(maybeDevice.value());
auto ui8ty =
IntegerType::get(rewriter.getContext(), 8, IntegerType::Unsigned);
int pktFlowIndex{0};
WalkResult res = parentOp->walk([&](AMDAIE::FlowOp flowOp) {
if (pktFlowIndex > deviceModel.getPacketIdMaxIdx()) {
flowOp.emitOpError() << "ran out of packet IDs to assign";
return WalkResult::interrupt();
}
rewriter.setInsertionPoint(flowOp);
IntegerAttr pktIdAttr = flowOp.getIsPacketFlow()
? IntegerAttr::get(ui8ty, pktFlowIndex++)
: nullptr;
rewriter.replaceOpWithNewOp<AMDAIE::FlowOp>(
flowOp, flowOp.getSources(), flowOp.getTargets(),
flowOp.getIsPacketFlow(), pktIdAttr);
return WalkResult::advance();
});
if (res.wasInterrupted()) return signalPassFailure();
}

} // namespace

std::unique_ptr<Pass> createAMDAIEAssignPacketIdsPass() {
return std::make_unique<AMDAIEAssignPacketIdsPass>();
}

} // namespace mlir::iree_compiler::AMDAIE
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
// Copyright 2024 The IREE Authors
//
// Licensed under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

#include "iree-amd-aie/IR/AMDAIEOps.h"
#include "iree-amd-aie/Transforms/Passes.h"
#include "iree-amd-aie/Transforms/Transforms.h"

#define DEBUG_TYPE "iree-amdaie-connection-to-flow"

namespace mlir::iree_compiler::AMDAIE {

namespace {

class AMDAIEConnectionToFlowPass
: public impl::AMDAIEConnectionToFlowBase<AMDAIEConnectionToFlowPass> {
public:
void getDependentDialects(DialectRegistry &registry) const override {
registry.insert<AMDAIEDialect>();
}
void runOnOperation() override;
};

void AMDAIEConnectionToFlowPass::runOnOperation() {
Operation *parentOp = getOperation();
IRRewriter rewriter(parentOp->getContext());
// TODO(jornt): currently, don't delete connections as they are still
// needed for lowering to AIE dialect dma_bds. This will be changed in the
// future.
WalkResult res = parentOp->walk([&](AMDAIE::ConnectionOp connectionOp) {
rewriter.setInsertionPoint(connectionOp);
std::optional<AMDAIE::ConnectionType> connectionType =
connectionOp.getConnectionType();
bool isPacketFlow = connectionType && connectionType.value() ==
AMDAIE::ConnectionType::Packet;
auto flowOp = rewriter.create<AMDAIE::FlowOp>(
rewriter.getUnknownLoc(), connectionOp.getSourceChannels(),
connectionOp.getTargetChannels(), isPacketFlow, /*packetId*/ nullptr);
rewriter.replaceOpWithNewOp<AMDAIE::ConnectionOp>(
connectionOp, connectionOp.getTarget(),
connectionOp.getTargetChannels(), connectionOp.getSource(),
connectionOp.getSourceChannels(), connectionOp.getConnectionTypeAttr(),
flowOp);
return WalkResult::advance();
});
if (res.wasInterrupted()) return signalPassFailure();
}

} // namespace

std::unique_ptr<Pass> createAMDAIEConnectionToFlowPass() {
return std::make_unique<AMDAIEConnectionToFlowPass>();
}

} // namespace mlir::iree_compiler::AMDAIE
Loading

0 comments on commit 721e7b9

Please sign in to comment.