From f59995b047ac9d4690531271120ade6155eef4e5 Mon Sep 17 00:00:00 2001 From: Maksim Levental Date: Fri, 30 Aug 2024 22:16:51 -0500 Subject: [PATCH] [WIP] use peano for kernels fixes https://github.com/nod-ai/iree-amd-aie/issues/637 --- build_tools/ci/run_matmul_test.sh | 26 +++--- build_tools/download_peano.sh | 2 +- .../AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp | 87 +++++++++++-------- 3 files changed, 66 insertions(+), 49 deletions(-) diff --git a/build_tools/ci/run_matmul_test.sh b/build_tools/ci/run_matmul_test.sh index 5328eb8d3..a24f40126 100755 --- a/build_tools/ci/run_matmul_test.sh +++ b/build_tools/ci/run_matmul_test.sh @@ -182,7 +182,7 @@ function run_matmul_test() { local amd_aie_install_path="${IREE_INSTALL_DIR}" - local vitis_path="${VITIS}" + local vitis_path="" local use_chess="false" @@ -540,16 +540,15 @@ run_matmul_test \ # MLIR-AIR Matmul tests ################################################################### -if [ -d "$VITIS" ]; then - run_matmul_test \ - --name_prefix "ukern" \ - --lower_to_aie_pipeline "air" \ - --tile_pipeline "pad-pack" \ - --lhs_rhs_type "bf16" \ - --acc_type "f32" \ - --m "256" --k "256" --n "256" \ - --use_ukernel "1" -fi +run_matmul_test \ + --name_prefix "ukern" \ + --lower_to_aie_pipeline "air" \ + --tile_pipeline "pad-pack" \ + --lhs_rhs_type "bf16" \ + --acc_type "f32" \ + --m "256" --k "256" --n "256" \ + --vitis_path "${VITIS}" \ + --use_ukernel "1" # Example of a run with a group of 2+ matmuls. Currently this test is passed # the flag '--num_repeat_runs 0" as there is currently an issue with the runtime if @@ -720,6 +719,7 @@ if [ -d "$VITIS" ]; then --lhs_rhs_type "bf16" \ --acc_type "f32" \ --num_repeat_runs "2" \ + --vitis_path "${VITIS}" \ --use_ukernel "1" run_matmul_test_on_shapes ${bf16_ukernel_shapes_medium[@]} \ @@ -729,6 +729,7 @@ if [ -d "$VITIS" ]; then --lhs_rhs_type "bf16" \ --acc_type "f32" \ --num_repeat_runs "2" \ + --vitis_path "${VITIS}" \ --use_ukernel "1" fi @@ -746,6 +747,7 @@ if [ -d "$VITIS" ]; then --n "32" \ --k "32" \ --use_chess "1" \ + --vitis_path "${VITIS}" \ --num_repeat_runs "10" run_matmul_test \ @@ -757,6 +759,7 @@ if [ -d "$VITIS" ]; then --k "64" \ --use_chess "1" \ --num_repeat_runs "10" \ + --vitis_path "${VITIS}" \ --use_ukernel "1" run_matmul_test \ @@ -769,6 +772,7 @@ if [ -d "$VITIS" ]; then --n "32" \ --k "32" \ --use_chess "1" \ + --vitis_path "${VITIS}" \ --num_repeat_runs "10" fi diff --git a/build_tools/download_peano.sh b/build_tools/download_peano.sh index 70c8693f5..d5001a215 100644 --- a/build_tools/download_peano.sh +++ b/build_tools/download_peano.sh @@ -1,5 +1,5 @@ #!/bin/bash -RELEASE=19.0.0.2024082221+90abe71b +RELEASE=19.0.0.2024083101+42158757 pip download llvm_aie==$RELEASE -f https://github.com/Xilinx/llvm-aie/releases/expanded_assets/nightly unzip llvm_aie*whl diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp index 5de16906f..0dd180bca 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp @@ -96,7 +96,7 @@ namespace { FailureOr getTargetDir(const std::string &npuVersion) { if (npuVersion == "npu1") return std::string{"target_aie_ml"}; if (npuVersion == "npu4") return std::string{"target_aie2p"}; - llvm::errs() << "unsupported NPUVersion: " << npuVersion; + llvm::errs() << "unsupported NPUVersion: " << npuVersion << "\n"; return failure(); } @@ -152,7 +152,8 @@ FailureOr findVitis(std::optional &vitisDir, return failure(); } if (!std::filesystem::exists(licenseFile)) { - llvm::errs() << "ERROR: license file" << licenseFile << " does not exist"; + llvm::errs() << "ERROR: license file" << licenseFile << " does not exist" + << "\n"; return failure(); } } @@ -215,7 +216,7 @@ std::pair> makeChessArgs( archVersion = "21"; modelDir = "aie2p"; } else { - llvm::errs() << "unsupported NPU version: " << npuVersion; + llvm::errs() << "unsupported NPU version: " << npuVersion << "\n"; llvm::report_fatal_error("unsupported NPU version"); } @@ -475,7 +476,7 @@ static FailureOr assembleStringUsing( if (auto maybeErr = dumpStrToDisk(inputFileStr, inputFile.string()); maybeErr.has_value()) { llvm::errs() << "Failed to dump to disk " << inputFile.string() - << " because: " << maybeErr; + << " because: " << maybeErr << "\n"; return failure(); } @@ -487,7 +488,8 @@ static FailureOr assembleStringUsing( } if (failed(assembler(inputFile.string(), outputFile.string(), extraArgs, workDir, toolDir, npuVersion, verbose))) { - llvm::errs() << "Failed to assemble " << outputFileName << ".o"; + llvm::errs() << "Failed to assemble " << outputFileName << ".o" + << "\n"; return failure(); } return outputFile; @@ -532,22 +534,31 @@ static LogicalResult generateCoreElfFiles( Path cwd = std::filesystem::current_path(); FailureOr mmObjectFilePath; if (ukernel && (ukernel == "mm" || ukernel == "all")) { - FailureOr maybeVitisDir = findVitis(vitisDir, npuVersion); - if (failed(maybeVitisDir)) { - llvm::errs() << "compiling ukernels currently requires chess (even if " - "you're using peano)"; - return failure(); - } if (!std::filesystem::exists(cwd / "mm.o")) { - mmObjectFilePath = assembleStringUsingChess( - /*inputFileStr=*/_MM_CC, - /*inputFileName=*/"mm.cc", - /*outputFileName=*/"mm.o", - /*outputDir=*/cwd, - /*extraArgs*/ std::vector{}, - /*workDir=*/tempDir, - /*vitisDir=*/*maybeVitisDir, - /*npuVersion*/ npuVersion, verbose); + if (useChess) { + if (verbose) llvm::outs() << "using chess for ukernel codegen\n"; + FailureOr maybeVitisDir = findVitis(vitisDir, npuVersion); + mmObjectFilePath = assembleStringUsingChess( + /*inputFileStr=*/_MM_CC, + /*inputFileName=*/"mm.cc", + /*outputFileName=*/"mm.o", + /*outputDir=*/cwd, + /*extraArgs*/ std::vector{}, + /*workDir=*/tempDir, + /*vitisDir=*/*maybeVitisDir, + /*npuVersion*/ npuVersion, verbose); + } else { + if (verbose) llvm::outs() << "using peano for ukernel codegen\n"; + mmObjectFilePath = assembleStringUsingPeano( + /*inputFileStr=*/_MM_CC, + /*inputFileName=*/"mm.cc", + /*outputFileName=*/"mm.o", + /*outputDir=*/cwd, + /*extraArgs*/ std::vector{}, + /*workDir=*/tempDir, + /*peanoDir=*/peanoDir, + /*npuVersion*/ npuVersion, verbose); + } if (failed(mmObjectFilePath)) return failure(); } else { mmObjectFilePath = cwd / "mm.o"; @@ -579,13 +590,14 @@ static LogicalResult generateCoreElfFiles( { auto bcfOutput = openOutputFile(bcfPath.string(), &errorMessage); if (!bcfOutput) { - llvm::errs() << "failed to open bcf file because: " << errorMessage; + llvm::errs() << "failed to open bcf file because: " << errorMessage + << "\n"; return failure(); } if (failed(mlir::iree_compiler::AMDAIE::AIETranslateToBCF( deviceOp, bcfOutput->os(), col, row))) { - llvm::errs() << "Failed to generate BCF"; + llvm::errs() << "Failed to generate BCF\n"; return failure(); } bcfOutput->keep(); @@ -614,7 +626,7 @@ static LogicalResult generateCoreElfFiles( openOutputFile(ldscriptPath.string(), &errorMessage); if (!ldscriptOutput) { llvm::errs() << "Failed to open ldscript file because: " - << errorMessage; + << errorMessage << "\n"; return failure(); } if (failed(mlir::iree_compiler::AMDAIE::AIETranslateToLdScript( @@ -654,7 +666,7 @@ static LogicalResult generateCDO(MLIRContext *context, AIE::DeviceOp deviceOp, deviceOp = *copy.getOps().begin(); if (failed(mlir::iree_compiler::AMDAIE::AIETranslateToCDODirect( deviceOp, tempDir.string()))) { - llvm::errs() << "failed to emit CDO"; + llvm::errs() << "failed to emit CDO\n"; return failure(); } copy->erase(); @@ -750,7 +762,7 @@ static LogicalResult generateXCLBin( dumpStrToDisk(memTopologyData, memTopologyJsonFile.string()); maybeErr.has_value()) { llvm::errs() << "failed to dump to disk mem_topology.json because: " - << *maybeErr; + << *maybeErr << "\n"; return failure(); } } @@ -797,7 +809,7 @@ static LogicalResult generateXCLBin( dumpStrToDisk(aiePartitionJsonData, aiePartitionJsonFile.string()); maybeErr.has_value()) { llvm::errs() << "failed to dump to disk aie_partition.json because: " - << *maybeErr; + << *maybeErr << "\n"; return failure(); } } @@ -816,7 +828,7 @@ static LogicalResult generateXCLBin( if (auto maybeErr = dumpStrToDisk(kernelStr, kernelsJsonFile.string()); maybeErr.has_value()) { llvm::errs() << "failed to dump to disk kernels.json because: " - << *maybeErr; + << *maybeErr << "\n"; return failure(); } } @@ -825,7 +837,8 @@ static LogicalResult generateXCLBin( { auto designBifOut = openOutputFile(designBifFile.string(), &errorMessage); if (!designBifOut) { - llvm::errs() << "failed to open design.bif because: " << errorMessage; + llvm::errs() << "failed to open design.bif because: " << errorMessage + << "\n"; return failure(); } @@ -868,7 +881,7 @@ static LogicalResult generateXCLBin( } if (iree_aie_bootgen_main(cstrings.size(), const_cast(&cstrings[0]))) { - llvm::errs() << "failed to execute bootgen"; + llvm::errs() << "failed to execute bootgen\n"; return failure(); } } @@ -892,14 +905,14 @@ static LogicalResult generateXCLBin( "--force", "--input", *inputXclbin}; if (failed(runTool(xclbinutilBin.value().string(), inputFlags, verbose))) { - llvm::errs() << "failed to execute xclbinutil"; + llvm::errs() << "failed to execute xclbinutil\n"; return failure(); } auto aieInputPartitionOut = openInputFile(aieInputPartitionJsonFile.string(), &errorMessage); if (!aieInputPartitionOut) { llvm::errs() << "failed to open aie_input_partition.json because: " - << errorMessage; + << errorMessage << "\n"; return failure(); } Expected aieInputPartitionOutValue = @@ -913,7 +926,7 @@ static LogicalResult generateXCLBin( if (!aiePartitionOut) { llvm::errs() << "failed to open aie aie_input_partition.json for " "output because: " - << errorMessage; + << errorMessage << "\n"; return failure(); } llvm::Expected aiePartitionOutValue = @@ -931,7 +944,7 @@ static LogicalResult generateXCLBin( maybeErr.has_value()) { llvm::errs() << "failed to dump to disk aie_input_partition.json because: " - << errorMessage; + << errorMessage << "\n"; return failure(); } flags.insert(flags.end(), {"--input", *inputXclbin}); @@ -1040,14 +1053,14 @@ static LogicalResult generateUnifiedObject( } if (failed(pm.run(moduleOpCopy))) { - llvm::errs() << "Failed to lower to LLVM"; + llvm::errs() << "Failed to lower to LLVM\n"; return failure(); } llvm::LLVMContext llvmContext; auto llvmModule = translateModuleToLLVMIR(moduleOpCopy, llvmContext); if (!llvmModule) { - llvm::errs() << "Failed to translate module to LLVMIR"; + llvm::errs() << "Failed to translate module to LLVMIR\n"; return failure(); } @@ -1081,7 +1094,7 @@ static LogicalResult generateUnifiedObject( if (auto maybeErr = dumpStrToDisk(inputLLStr, LLVMIRFile.string()); maybeErr.has_value()) { llvm::errs() << "Failed to dump to disk input.ll" - << " because: " << maybeErr; + << " because: " << maybeErr << "\n"; return failure(); } Path peanoOptBin = peanoDir / "bin" / "opt"; @@ -1096,7 +1109,7 @@ static LogicalResult generateUnifiedObject( args.reserve(args.size() + peanoArgs.size()); args.insert(args.end(), peanoArgs.begin(), peanoArgs.end()); if (failed(runTool(peanoOptBin.string(), args, verbose))) { - llvm::errs() << "Failed to optimize ll with peano"; + llvm::errs() << "Failed to optimize ll with peano\n"; return failure(); }