From 82bcfdad163bc02b6e9750f6b7c9db404a791a9e Mon Sep 17 00:00:00 2001 From: yuhao <1171760467@qq.com> Date: Tue, 11 Apr 2023 01:27:33 +0000 Subject: [PATCH 01/12] refactor cast ofops to signless --- .../OneFlow/Conversion/OneFlowToTosa.h | 2 +- oneflow/ir/include/OneFlow/OneFlowPasses.td | 6 ++-- .../lib/OneFlow/Conversion/OneFlowToTosa.cpp | 36 ++++++++++++++----- oneflow/ir/oneflow-opt/oneflow-opt.cpp | 2 +- .../lib/OneFlow/MLIROneFlowTranslation.cpp | 2 +- .../OneFlow/auto_nhwc/test_nhwc_resnet.py | 1 + .../test_cast_ops_to_signless.mlir | 7 ++++ 7 files changed, 41 insertions(+), 15 deletions(-) create mode 100644 oneflow/ir/test/OneFlow/cuda_code_gen/test_cast_ops_to_signless.mlir diff --git a/oneflow/ir/include/OneFlow/Conversion/OneFlowToTosa.h b/oneflow/ir/include/OneFlow/Conversion/OneFlowToTosa.h index bd8be270d55..212aae49128 100644 --- a/oneflow/ir/include/OneFlow/Conversion/OneFlowToTosa.h +++ b/oneflow/ir/include/OneFlow/Conversion/OneFlowToTosa.h @@ -26,7 +26,7 @@ namespace oneflow { std::unique_ptr createLowerOneFlowToTosaPass(); std::unique_ptr createLowerOneFlowToLinalgPass(); std::unique_ptr createConvertToSignlessForTosaPass(); -std::unique_ptr createCastOneFlowInputToSignlessPass(); +std::unique_ptr createCastOneFlowOpsToSignlessPass(); } // namespace oneflow diff --git a/oneflow/ir/include/OneFlow/OneFlowPasses.td b/oneflow/ir/include/OneFlow/OneFlowPasses.td index 88816c0eb98..3cd78a12af0 100644 --- a/oneflow/ir/include/OneFlow/OneFlowPasses.td +++ b/oneflow/ir/include/OneFlow/OneFlowPasses.td @@ -35,9 +35,9 @@ def OneFlowJobToFuncPass : Pass<"ofjob-to-func", "ModuleOp"> { let dependentDialects = ["mlir::func::FuncDialect"]; } -def CastOneFlowInputToSignlessPass : Pass<"cast-ofinput-to-signless", "ModuleOp"> { - let summary = "cast oneflow input to singless"; - let constructor = "mlir::oneflow::createCastOneFlowInputToSignlessPass()"; +def CastOneFlowOpsToSignlessPass : Pass<"cast-ofops-to-signless", "ModuleOp"> { + let summary = "cast oneflow ops to singless"; + let constructor = "mlir::oneflow::createCastOneFlowOpsToSignlessPass()"; let dependentDialects = ["mlir::func::FuncDialect", "mlir::BuiltinDialect"]; } diff --git a/oneflow/ir/lib/OneFlow/Conversion/OneFlowToTosa.cpp b/oneflow/ir/lib/OneFlow/Conversion/OneFlowToTosa.cpp index 76e3cc42176..96febdb23b3 100644 --- a/oneflow/ir/lib/OneFlow/Conversion/OneFlowToTosa.cpp +++ b/oneflow/ir/lib/OneFlow/Conversion/OneFlowToTosa.cpp @@ -623,29 +623,47 @@ struct CastInputConversion final : public OpRewritePattern { if (isSignLessTensorOrOther(cast.getResult(0).getType())) { return failure(); } } } - LOG(ERROR) << "ok4"; InputOp cloned = rewriter.create(op->getLoc(), op.getResultTypes(), op->getOperands(), op->getAttrs()); - auto m = op->getParentOp(); - m->dump(); rewriter.replaceOpWithNewOp( op, convertToSignless(getContext(), op.getOutput().getType()), cloned.getOutput()); - m->dump(); + return success(); + } +}; + +struct CastVariableConversion final : public OpRewritePattern { + public: + explicit CastVariableConversion(mlir::MLIRContext* context) + : OpRewritePattern(context, /*benefit=*/0) {} + mlir::LogicalResult matchAndRewrite(VariableOp op, + mlir::PatternRewriter& rewriter) const override { + auto outType = op.getOutput().getType(); + if (isSignLessTensorOrOther(outType)) { return failure(); } + if (op->hasOneUse()) { + if (auto cast = + llvm::dyn_cast(op.getOutput().use_begin()->getOwner())) { + if (isSignLessTensorOrOther(cast.getResult(0).getType())) { return failure(); } + } + } + VariableOp cloned = rewriter.create(op->getLoc(), op.getResultTypes(), op->getOperands(), + op->getAttrs()); + rewriter.replaceOpWithNewOp( + op, convertToSignless(getContext(), op.getOutput().getType()), cloned.getOutput()); return success(); } }; namespace { -class CastOneFlowInputToSignlessPass - : public CastOneFlowInputToSignlessPassBase { +class CastOneFlowOpsToSignlessPass + : public CastOneFlowOpsToSignlessPassBase { void getDependentDialects(::mlir::DialectRegistry& registry) const override { registry.insert(); } void runOnOperation() override { Operation* op = getOperation(); RewritePatternSet patterns(&getContext()); - patterns.add(op->getContext()); + patterns.add(op->getContext()); (void)applyPatternsAndFoldGreedily(op, std::move(patterns)); } @@ -768,8 +786,8 @@ void ConvertToSignlessForTosaPass::runOnOperation() { (void)applyPatternsAndFoldGreedily(op, std::move(patterns)); } -std::unique_ptr createCastOneFlowInputToSignlessPass() { - return std::make_unique(); +std::unique_ptr createCastOneFlowOpsToSignlessPass() { + return std::make_unique(); } } // namespace oneflow diff --git a/oneflow/ir/oneflow-opt/oneflow-opt.cpp b/oneflow/ir/oneflow-opt/oneflow-opt.cpp index cfc245ef0e7..c2a893245fa 100644 --- a/oneflow/ir/oneflow-opt/oneflow-opt.cpp +++ b/oneflow/ir/oneflow-opt/oneflow-opt.cpp @@ -67,7 +67,7 @@ int32_t main(int32_t argc, char** argv) { mlir::registerBufferHostRegisterPassPass(); mlir::registerGpuCopyArgPassPass(); mlir::registerOneFlowJobToFuncPassPass(); - mlir::registerCastOneFlowInputToSignlessPassPass(); + mlir::registerCastOneFlowOpsToSignlessPassPass(); mlir::registerFuncToOneFlowJobPassPass(); #ifdef WITH_MLIR_CUDA_CODEGEN mlir::oneflow::registerGpuSerializeToCubinPass(); diff --git a/oneflow/ir/oneflow-translate/lib/OneFlow/MLIROneFlowTranslation.cpp b/oneflow/ir/oneflow-translate/lib/OneFlow/MLIROneFlowTranslation.cpp index 4785e797692..233c33d6f02 100644 --- a/oneflow/ir/oneflow-translate/lib/OneFlow/MLIROneFlowTranslation.cpp +++ b/oneflow/ir/oneflow-translate/lib/OneFlow/MLIROneFlowTranslation.cpp @@ -813,7 +813,7 @@ LogicalResult ApplyRoundTripPatterns(RoundTripOneFlowJobWrapperInterface& job_wr if (job_wrapper.IsLastIRPass() && ::oneflow::ParseBooleanFromEnv("ONEFLOW_MLIR_ENABLE_CODEGEN_FUSERS", false)) { pm.addPass(oneflow::createOneFlowJobToFuncPass()); - pm.addPass(oneflow::createCastOneFlowInputToSignlessPass()); + pm.addPass(oneflow::createCastOneFlowOpsToSignlessPass()); auto toTosa = oneflow::createLowerOneFlowToTosaPass(); CHECK(toTosa->initializeOptions("full=0 lower-job=0").succeeded()); pm.addPass(std::move(toTosa)); diff --git a/oneflow/ir/test/OneFlow/auto_nhwc/test_nhwc_resnet.py b/oneflow/ir/test/OneFlow/auto_nhwc/test_nhwc_resnet.py index a52e13bf4bd..a39feac4b1c 100644 --- a/oneflow/ir/test/OneFlow/auto_nhwc/test_nhwc_resnet.py +++ b/oneflow/ir/test/OneFlow/auto_nhwc/test_nhwc_resnet.py @@ -23,6 +23,7 @@ os.environ["ONEFLOW_MLIR_ENABLE_ROUND_TRIP"] = "1" os.environ["ONEFLOW_MLIR_PREFER_NHWC"] = "1" +os.environ["ONEFLOW_MLIR_ENABLE_CODEGEN_FUSERS"] = "1" import oneflow as flow import oneflow.unittest diff --git a/oneflow/ir/test/OneFlow/cuda_code_gen/test_cast_ops_to_signless.mlir b/oneflow/ir/test/OneFlow/cuda_code_gen/test_cast_ops_to_signless.mlir new file mode 100644 index 00000000000..23961e0f75a --- /dev/null +++ b/oneflow/ir/test/OneFlow/cuda_code_gen/test_cast_ops_to_signless.mlir @@ -0,0 +1,7 @@ +// RUN: oneflow-opt %s -cast-ofops-to-signless | FileCheck %s +// CHECK: unrealized_conversion_cast +func.func @Cast_289__FUSE__ScalarMulByTensor_290() -> tensor<512x2048x1x1xf32> { + %output_299 = "oneflow.variable"() {data_type = 2 : i32, device_name = ["@0:0"], device_tag = "cuda", hierarchy = [1], op_name = "resnet.layer4.2.conv1.weight", output_lbns = ["resnet.layer4.2.conv1.weight/out"], parallel = #sbp.parallel<[] -> [[#sbp.B]]>, scope_symbol_id = 1995 : i64, shape = [512 : si64, 2048 : si64, 1 : si64, 1 : si64]} : () -> tensor<512x2048x1x1xsi64> + %0 = "oneflow.cast"(%output_299) {device_name = ["0:0"], device_tag = "cpu", dtype = 2 : i32, hierarchy = [1], op_name = "Cast_1", op_type_name = "cast", scope_symbol_id = 4611686018427416574 : i64} : (tensor<512x2048x1x1xsi64>) -> tensor<512x2048x1x1xf32> + func.return %0 : tensor<512x2048x1x1xf32> +} \ No newline at end of file From c4fcdac00042629b54a31ee667bf5d02cd6a1bbc Mon Sep 17 00:00:00 2001 From: yuhao <1171760467@qq.com> Date: Tue, 11 Apr 2023 07:22:37 +0000 Subject: [PATCH 02/12] fix --- .../ir/lib/OneFlow/Conversion/OneFlowToTosa.cpp | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/oneflow/ir/lib/OneFlow/Conversion/OneFlowToTosa.cpp b/oneflow/ir/lib/OneFlow/Conversion/OneFlowToTosa.cpp index 96febdb23b3..ced09f605bf 100644 --- a/oneflow/ir/lib/OneFlow/Conversion/OneFlowToTosa.cpp +++ b/oneflow/ir/lib/OneFlow/Conversion/OneFlowToTosa.cpp @@ -645,8 +645,9 @@ struct CastVariableConversion final : public OpRewritePattern { if (isSignLessTensorOrOther(cast.getResult(0).getType())) { return failure(); } } } - VariableOp cloned = rewriter.create(op->getLoc(), op.getResultTypes(), op->getOperands(), - op->getAttrs()); + if (op.getOutput().getUses().empty()) { return failure(); } + VariableOp cloned = rewriter.create(op->getLoc(), op.getResultTypes(), + op->getOperands(), op->getAttrs()); rewriter.replaceOpWithNewOp( op, convertToSignless(getContext(), op.getOutput().getType()), cloned.getOutput()); return success(); @@ -711,10 +712,12 @@ void OneFlowLoweringToTosaPass::runOnOperation() { const auto mgr = ::oneflow::Singleton<::oneflow::VariableTensorMgr>::Get(); // check if the pass is triggered by python based on the presence of variable tensor manger - if (mgr) { - patterns.add(typeConverter, context); - } else { - patterns.add(typeConverter, context, this->variableAsConstant); + if (fullyConvert) { + if (mgr) { + patterns.add(typeConverter, context); + } else { + patterns.add(typeConverter, context, this->variableAsConstant); + } } patterns.add Date: Tue, 11 Apr 2023 07:40:04 +0000 Subject: [PATCH 03/12] fix --- .../lib/OneFlow/Conversion/OneFlowToTosa.cpp | 38 ++++--------------- 1 file changed, 8 insertions(+), 30 deletions(-) diff --git a/oneflow/ir/lib/OneFlow/Conversion/OneFlowToTosa.cpp b/oneflow/ir/lib/OneFlow/Conversion/OneFlowToTosa.cpp index ced09f605bf..1448c3af06d 100644 --- a/oneflow/ir/lib/OneFlow/Conversion/OneFlowToTosa.cpp +++ b/oneflow/ir/lib/OneFlow/Conversion/OneFlowToTosa.cpp @@ -473,26 +473,15 @@ struct NormalizationInferenceOpLowering final using OpConversionPattern::OpConversionPattern; LogicalResult matchAndRewrite(NormalizationInferenceOp op, OpAdaptor adaptor, ConversionPatternRewriter& rewriter) const override { - auto reshape_dim = [&](Type type, Value value) -> Value { - RankedTensorType in_type = value.getType().dyn_cast(); - RankedTensorType out_type = type.cast(); - SmallVector new_shape = {in_type.getShape()[0]}; - for (auto i = 2; i < out_type.getRank(); ++i) new_shape.push_back(1); - auto new_type = RankedTensorType::get(new_shape, out_type.getElementType()); - return rewriter.create(op->getLoc(), new_type, value, - rewriter.getDenseI64ArrayAttr(new_shape)); - }; - auto loc = op->getLoc(); - const auto out_type = op.getY().getType(); const auto epsilon_type = RankedTensorType::get({}, rewriter.getF32Type()); auto epsilon = rewriter.create( loc, epsilon_type, DenseElementsAttr::get(epsilon_type, op.getEpsilon())); - auto mean = reshape_dim(out_type, op.getMovingMean()); - auto variance = reshape_dim(out_type, op.getMovingVariance()); - auto gamma = reshape_dim(out_type, op.getGamma()); - auto beta = reshape_dim(out_type, op.getBeta()); + auto mean = op.getMovingMean(); + auto variance = op.getMovingVariance(); + auto gamma = op.getGamma(); + auto beta = op.getBeta(); auto output = op.getY(); auto x = op.getX(); @@ -508,31 +497,20 @@ struct NormalizationOpLowering final : public OpConversionPattern::OpConversionPattern; LogicalResult matchAndRewrite(NormalizationOp op, OpAdaptor adaptor, ConversionPatternRewriter& rewriter) const override { - auto reshape_dim = [&](Type type, Value value) -> Value { - const RankedTensorType in_type = value.getType().dyn_cast(); - const RankedTensorType out_type = type.cast(); - SmallVector new_shape = {in_type.getShape()[0]}; - for (auto i = 2; i < out_type.getRank(); ++i) new_shape.push_back(1); - const auto new_type = RankedTensorType::get(new_shape, out_type.getElementType()); - return rewriter.create(op->getLoc(), new_type, value, - rewriter.getDenseI64ArrayAttr(new_shape)); - }; - auto loc = op->getLoc(); - const auto out_type = op.getY().getType(); const auto epsilon_type = RankedTensorType::get({}, rewriter.getF32Type()); // epsilon = reshape(epsilon, shape_1) auto epsilon = rewriter.create( loc, epsilon_type, DenseElementsAttr::get(epsilon_type, op.getEpsilon())); // mean = reshape(mean, shape_0) - auto mean = reshape_dim(out_type, op.getMovingMean()); + auto mean = op.getMovingMean(); // variance= reshape(variance, shape_0) - auto variance = reshape_dim(out_type, op.getMovingVariance()); + auto variance = op.getMovingVariance(); // scale = reshape(scale, shape_0) - auto gamma = reshape_dim(out_type, op.getGamma()); + auto gamma = op.getGamma(); // beta = reshape(beta, shape_0) - auto beta = reshape_dim(out_type, op.getBeta()); + auto beta = op.getBeta(); auto output = op.getY(); auto x = op.getX(); From 64ca3eeaa4f10724eba0722dcb67212a74aaa932 Mon Sep 17 00:00:00 2001 From: yuhao <1171760467@qq.com> Date: Tue, 11 Apr 2023 11:38:15 +0000 Subject: [PATCH 04/12] fix --- .../lib/OneFlow/Conversion/OneFlowToTosa.cpp | 57 +++++++++++++------ 1 file changed, 40 insertions(+), 17 deletions(-) diff --git a/oneflow/ir/lib/OneFlow/Conversion/OneFlowToTosa.cpp b/oneflow/ir/lib/OneFlow/Conversion/OneFlowToTosa.cpp index 1448c3af06d..7376ec652c5 100644 --- a/oneflow/ir/lib/OneFlow/Conversion/OneFlowToTosa.cpp +++ b/oneflow/ir/lib/OneFlow/Conversion/OneFlowToTosa.cpp @@ -365,18 +365,27 @@ struct MaxPool2DOpLowering final : public OpConversionPattern { auto pad_pairs = get_pair_int64_from_array(op.getPadding()); auto loc = op.getLoc(); - auto perms = {0, 2, 3, 1}; const auto kernel = rewriter.getDenseI64ArrayAttr({kernel_pairs.first, kernel_pairs.second}); const auto stride = rewriter.getDenseI64ArrayAttr({stride_pairs.first, stride_pairs.second}); const auto pad = rewriter.getDenseI64ArrayAttr( {pad_pairs.first, pad_pairs.second, pad_pairs.first, pad_pairs.second}); - auto input = CreateTransposeValue(loc, rewriter, op.getX(), perms); - auto output = CreateTransposeType(op.getY().getType().cast(), perms); - - auto max_pool2d = rewriter.create(loc, output, input, kernel, stride, pad); - auto y = CreateTransposeValue(loc, rewriter, max_pool2d, {0, 3, 1, 2}); + auto input = op.getX(); + auto out_type = op.getY().getType().cast(); + + Value y; + if (op.IsNCHW()) { + auto perms = {0, 2, 3, 1}; + auto reverse_perms = {0, 3, 1, 2}; + input = CreateTransposeValue(loc, rewriter, input, perms); + out_type = CreateTransposeType(out_type, perms); + auto max_pool2d = + rewriter.create(loc, out_type, input, kernel, stride, pad); + y = CreateTransposeValue(loc, rewriter, max_pool2d, reverse_perms); + } else { + y = rewriter.create(loc, out_type, input, kernel, stride, pad); + } auto indice_output = convertToSignless(op->getContext(), op.getIndice().getType()); auto value = DenseElementsAttr::get(indice_output, rewriter.getZeroAttr(rewriter.getI64Type())); @@ -548,23 +557,37 @@ struct Conv2DOpLowering final : public OpConversionPattern { auto loc = op.getLoc(); if (!bias) { const auto output_shape = op.getOut().getType().cast(); - const auto output_channels = output_shape.getDimSize(1); + // support nhwc + const auto output_channels = output_shape.getDimSize(op.IsNCHW() ? 1 : 3); const auto bias_elem_type = output_shape.getElementType(); const auto type = RankedTensorType::get(output_channels, bias_elem_type); bias = rewriter.create( op.getLoc(), type, DenseElementsAttr::get(type, rewriter.getZeroAttr(bias_elem_type))); } - auto perms = {0, 2, 3, 1}; - auto in = CreateTransposeValue(loc, rewriter, op.getIn(), perms); - auto weight = CreateTransposeValue(loc, rewriter, op.getWeight(), perms); - const auto output = CreateTransposeType(op.getOut().getType().cast(), perms); - - auto conv2d = - rewriter.create(loc, output, in, weight, bias, pad, stride, dilation); - - auto res = CreateTransposeValue(loc, rewriter, conv2d, {0, 3, 1, 2}); - rewriter.replaceOp(op, {res}); + auto in = op.getIn(); + auto weight = op.getWeight(); + auto out_type = op.getOut().getType().cast(); + if (out_type.getRank() != 4) { + LOG(FATAL) << "Failed to lowering oneflow op"; + op->dump(); + } + // support nhwc + if (op.IsNCHW()) { + const auto perms = {0, 2, 3, 1}; + const auto reverse_perms = {0, 3, 1, 2}; + in = CreateTransposeValue(loc, rewriter, in, perms); + weight = CreateTransposeValue(loc, rewriter, weight, perms); + out_type = CreateTransposeType(out_type, perms); + auto conv2d = + rewriter.create(loc, out_type, in, weight, bias, pad, stride, dilation); + + auto res = CreateTransposeValue(loc, rewriter, conv2d, reverse_perms); + rewriter.replaceOp(op, {res}); + } else { + rewriter.replaceOpWithNewOp(op, out_type, in, weight, bias, pad, stride, + dilation); + } return success(); } }; From 03010883825e1428c608e9fc9327f33bba60450a Mon Sep 17 00:00:00 2001 From: yuhao <1171760467@qq.com> Date: Wed, 12 Apr 2023 03:01:06 +0000 Subject: [PATCH 05/12] fix --- .../lib/OneFlow/Conversion/OneFlowToTosa.cpp | 22 ++++++++++- oneflow/ir/oneflow-opt/oneflow-opt.cpp | 1 + oneflow/ir/test/Frontend/OneFlowToIree.mlir | 1 + .../OneFlow/conversion/OneFlowToTosa.mlir | 37 +++++++------------ 4 files changed, 35 insertions(+), 26 deletions(-) diff --git a/oneflow/ir/lib/OneFlow/Conversion/OneFlowToTosa.cpp b/oneflow/ir/lib/OneFlow/Conversion/OneFlowToTosa.cpp index 7376ec652c5..8929dabcf21 100644 --- a/oneflow/ir/lib/OneFlow/Conversion/OneFlowToTosa.cpp +++ b/oneflow/ir/lib/OneFlow/Conversion/OneFlowToTosa.cpp @@ -122,7 +122,21 @@ RankedTensorType CreateTransposeType(ShapedType output, ArrayRef perms) Value CreateBNOp(Location loc, ConversionPatternRewriter& rewriter, Value output, Value x, Value mean, Value variance, Value epsilon, Value gamma, Value beta) { - const auto output_type = output.getType(); + auto output_type = output.getType(); + RankedTensorType mean_type, x_type; + if (!(mean_type = llvm::dyn_cast_or_null(mean.getType())) + || !(x_type = llvm::dyn_cast_or_null(x.getType())) + || mean_type.getRank() != 1 || x_type.getRank() != 4) { + LOG(FATAL) << "failec to create bn op"; + } + // nhwc + bool isNhwc = x_type.getDimSize(3) == mean_type.getDimSize(0); + if (!isNhwc) { + if (x_type.getDimSize(1) == mean_type.getDimSize(0)) { LOG(FATAL) << "failec to create bn op"; } + auto perms = {0, 2, 3, 1}; + x = CreateTransposeValue(loc, rewriter, x, perms); + output_type = CreateTransposeType(output_type, perms); + } // sub_op = sub(input, mean) auto sub_op0 = rewriter.create(loc, output_type, x, mean); // add_op0 = add(var, epsilon) @@ -134,7 +148,11 @@ Value CreateBNOp(Location loc, ConversionPatternRewriter& rewriter, Value output // op5 = mul(mul_op0, gamma) auto mul_op1 = rewriter.create(loc, output_type, mul_op0, gamma, 0); // op6 = add(mul_op1, beta) - auto batch_norm = rewriter.create(loc, output_type, mul_op1, beta); + Value batch_norm = rewriter.create(loc, output_type, mul_op1, beta); + if (!isNhwc) { + auto reverse_perms = {0, 3, 1, 2}; + batch_norm = CreateTransposeValue(loc, rewriter, batch_norm, reverse_perms); + } return batch_norm; }; diff --git a/oneflow/ir/oneflow-opt/oneflow-opt.cpp b/oneflow/ir/oneflow-opt/oneflow-opt.cpp index c2a893245fa..77f29a32e9b 100644 --- a/oneflow/ir/oneflow-opt/oneflow-opt.cpp +++ b/oneflow/ir/oneflow-opt/oneflow-opt.cpp @@ -69,6 +69,7 @@ int32_t main(int32_t argc, char** argv) { mlir::registerOneFlowJobToFuncPassPass(); mlir::registerCastOneFlowOpsToSignlessPassPass(); mlir::registerFuncToOneFlowJobPassPass(); + mlir::registerAutoNhwcPass(); #ifdef WITH_MLIR_CUDA_CODEGEN mlir::oneflow::registerGpuSerializeToCubinPass(); #endif // WITH_MLIR_CUDA_CODEGEN diff --git a/oneflow/ir/test/Frontend/OneFlowToIree.mlir b/oneflow/ir/test/Frontend/OneFlowToIree.mlir index 95dd260a2a5..60de1f5acca 100644 --- a/oneflow/ir/test/Frontend/OneFlowToIree.mlir +++ b/oneflow/ir/test/Frontend/OneFlowToIree.mlir @@ -1,5 +1,6 @@ // RUN: oneflow-opt %s \ // RUN: -split-input-file \ +// RUN: -auto-nhwc \ // RUN: -lower-oneflow-to-tosa \ // RUN: -tosa-make-broadcastable \ // RUN: -verify-diagnostics -o - \ diff --git a/oneflow/ir/test/OneFlow/conversion/OneFlowToTosa.mlir b/oneflow/ir/test/OneFlow/conversion/OneFlowToTosa.mlir index f018f0eb72c..feebd646600 100644 --- a/oneflow/ir/test/OneFlow/conversion/OneFlowToTosa.mlir +++ b/oneflow/ir/test/OneFlow/conversion/OneFlowToTosa.mlir @@ -1,5 +1,6 @@ // RUN: oneflow-opt %s \ // RUN: -split-input-file \ +// RUN: -auto-nhwc \ // RUN: -lower-oneflow-to-tosa \ // RUN: -verify-diagnostics -o - \ // RUN: | FileCheck %s @@ -246,18 +247,12 @@ oneflow.job @test_relu(%arg0: tensor<1xf32>) -> tensor<1xf32> { } //CHECK-LABEL: test_bn -//CHECK: [[V0:%.+]] = "tosa.const"() {value = dense<9.99999974E-6> : tensor} : () -> tensor -//CHECK: [[V1:%.+]] = "tosa.reshape"(%arg1) {new_shape = array} : (tensor<64xf32>) -> tensor<64x1x1xf32> -//CHECK: [[V2:%.+]] = "tosa.reshape"(%arg2) {new_shape = array} : (tensor<64xf32>) -> tensor<64x1x1xf32> -//CHECK: [[V3:%.+]] = "tosa.reshape"(%arg3) {new_shape = array} : (tensor<64xf32>) -> tensor<64x1x1xf32> -//CHECK: [[V4:%.+]] = "tosa.reshape"(%arg4) {new_shape = array} : (tensor<64xf32>) -> tensor<64x1x1xf32> -//CHECK: [[V5:%.+]] = "tosa.sub"(%arg0, [[V1]]) : (tensor<1x64x112x112xf32>, tensor<64x1x1xf32>) -> tensor<1x64x112x112xf32> -//CHECK: [[V6:%.+]] = "tosa.add"([[V2]], [[V0]]) : (tensor<64x1x1xf32>, tensor) -> tensor<64x1x1xf32> -//CHECK: [[V7:%.+]] = "tosa.rsqrt"([[V6]]) : (tensor<64x1x1xf32>) -> tensor<64x1x1xf32> -//CHECK: [[V8:%.+]] = "tosa.mul"([[V5]], [[V7]]) {shift = 0 : i32} : (tensor<1x64x112x112xf32>, tensor<64x1x1xf32>) -> tensor<1x64x112x112xf32> -//CHECK: [[V9:%.+]] = "tosa.mul"([[V8]], [[V3]]) {shift = 0 : i32} : (tensor<1x64x112x112xf32>, tensor<64x1x1xf32>) -> tensor<1x64x112x112xf32> -//CHECK: [[V10:%.+]] = "tosa.add"([[V9]], [[V4]]) : (tensor<1x64x112x112xf32>, tensor<64x1x1xf32>) -> tensor<1x64x112x112xf32> -//CHECK: return [[V10]] : tensor<1x64x112x112xf32> +//CHECK: "tosa.sub" +//CHECK: "tosa.add" +//CHECK: "tosa.rsqrt" +//CHECK: "tosa.mul" +//CHECK: "tosa.mul" +//CHECK: "tosa.add" oneflow.job @test_bn( %x: tensor<1x64x112x112xf32>, %moving_mean: tensor<64xf32>, @@ -284,18 +279,12 @@ oneflow.job @test_bn( } //CHECK-LABEL: test_bn_infer -//CHECK: [[V0:%.+]] = "tosa.const"() {value = dense<9.99999974E-6> : tensor} : () -> tensor -//CHECK: [[V1:%.+]] = "tosa.reshape"(%arg1) {new_shape = array} : (tensor<64xf32>) -> tensor<64x1x1xf32> -//CHECK: [[V2:%.+]] = "tosa.reshape"(%arg2) {new_shape = array} : (tensor<64xf32>) -> tensor<64x1x1xf32> -//CHECK: [[V3:%.+]] = "tosa.reshape"(%arg3) {new_shape = array} : (tensor<64xf32>) -> tensor<64x1x1xf32> -//CHECK: [[V4:%.+]] = "tosa.reshape"(%arg4) {new_shape = array} : (tensor<64xf32>) -> tensor<64x1x1xf32> -//CHECK: [[V5:%.+]] = "tosa.sub"(%arg0, [[V1]]) : (tensor<1x64x112x112xf32>, tensor<64x1x1xf32>) -> tensor<1x64x112x112xf32> -//CHECK: [[V6:%.+]] = "tosa.add"([[V2]], [[V0]]) : (tensor<64x1x1xf32>, tensor) -> tensor<64x1x1xf32> -//CHECK: [[V7:%.+]] = "tosa.rsqrt"([[V6]]) : (tensor<64x1x1xf32>) -> tensor<64x1x1xf32> -//CHECK: [[V8:%.+]] = "tosa.mul"([[V5]], [[V7]]) {shift = 0 : i32} : (tensor<1x64x112x112xf32>, tensor<64x1x1xf32>) -> tensor<1x64x112x112xf32> -//CHECK: [[V9:%.+]] = "tosa.mul"([[V8]], [[V3]]) {shift = 0 : i32} : (tensor<1x64x112x112xf32>, tensor<64x1x1xf32>) -> tensor<1x64x112x112xf32> -//CHECK: [[V10:%.+]] = "tosa.add"([[V9]], [[V4]]) : (tensor<1x64x112x112xf32>, tensor<64x1x1xf32>) -> tensor<1x64x112x112xf32> -//CHECK: return [[V10]] : tensor<1x64x112x112xf32> +//CHECK: "tosa.sub" +//CHECK: "tosa.add" +//CHECK: "tosa.rsqrt" +//CHECK: "tosa.mul" +//CHECK: "tosa.mul" +//CHECK: "tosa.add" oneflow.job @test_bn_infer( %x: tensor<1x64x112x112xf32>, %moving_mean: tensor<64xf32>, From 1c951635f4a3a6ac2608bd6d51dda9ae6ec24654 Mon Sep 17 00:00:00 2001 From: yuhao <1171760467@qq.com> Date: Wed, 12 Apr 2023 03:02:47 +0000 Subject: [PATCH 06/12] fix --- oneflow/ir/lib/OneFlow/Conversion/OneFlowToTosa.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/oneflow/ir/lib/OneFlow/Conversion/OneFlowToTosa.cpp b/oneflow/ir/lib/OneFlow/Conversion/OneFlowToTosa.cpp index 8929dabcf21..9a79189a053 100644 --- a/oneflow/ir/lib/OneFlow/Conversion/OneFlowToTosa.cpp +++ b/oneflow/ir/lib/OneFlow/Conversion/OneFlowToTosa.cpp @@ -132,7 +132,6 @@ Value CreateBNOp(Location loc, ConversionPatternRewriter& rewriter, Value output // nhwc bool isNhwc = x_type.getDimSize(3) == mean_type.getDimSize(0); if (!isNhwc) { - if (x_type.getDimSize(1) == mean_type.getDimSize(0)) { LOG(FATAL) << "failec to create bn op"; } auto perms = {0, 2, 3, 1}; x = CreateTransposeValue(loc, rewriter, x, perms); output_type = CreateTransposeType(output_type, perms); From 79307aab8ab08dd726a985fea146d13b019a65fe Mon Sep 17 00:00:00 2001 From: jackalcooper Date: Wed, 12 Apr 2023 12:08:48 +0800 Subject: [PATCH 07/12] minor refine --- oneflow/ir/lib/OneFlow/Conversion/OneFlowToTosa.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/oneflow/ir/lib/OneFlow/Conversion/OneFlowToTosa.cpp b/oneflow/ir/lib/OneFlow/Conversion/OneFlowToTosa.cpp index 9a79189a053..484c0e01bd4 100644 --- a/oneflow/ir/lib/OneFlow/Conversion/OneFlowToTosa.cpp +++ b/oneflow/ir/lib/OneFlow/Conversion/OneFlowToTosa.cpp @@ -728,9 +728,9 @@ void OneFlowLoweringToTosaPass::runOnOperation() { }); RewritePatternSet patterns(context); - const auto mgr = ::oneflow::Singleton<::oneflow::VariableTensorMgr>::Get(); // check if the pass is triggered by python based on the presence of variable tensor manger if (fullyConvert) { + const auto mgr = ::oneflow::Singleton<::oneflow::VariableTensorMgr>::Get(); if (mgr) { patterns.add(typeConverter, context); } else { From d57a6a82b30090a40a46e3836a55cfe08efcac5c Mon Sep 17 00:00:00 2001 From: jackalcooper Date: Wed, 12 Apr 2023 12:09:36 +0800 Subject: [PATCH 08/12] rm var --- oneflow/ir/lib/OneFlow/Conversion/OneFlowToTosa.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/oneflow/ir/lib/OneFlow/Conversion/OneFlowToTosa.cpp b/oneflow/ir/lib/OneFlow/Conversion/OneFlowToTosa.cpp index 484c0e01bd4..252327ccb6e 100644 --- a/oneflow/ir/lib/OneFlow/Conversion/OneFlowToTosa.cpp +++ b/oneflow/ir/lib/OneFlow/Conversion/OneFlowToTosa.cpp @@ -730,8 +730,7 @@ void OneFlowLoweringToTosaPass::runOnOperation() { // check if the pass is triggered by python based on the presence of variable tensor manger if (fullyConvert) { - const auto mgr = ::oneflow::Singleton<::oneflow::VariableTensorMgr>::Get(); - if (mgr) { + if (::oneflow::Singleton<::oneflow::VariableTensorMgr>::Get()) { patterns.add(typeConverter, context); } else { patterns.add(typeConverter, context, this->variableAsConstant); From 914bf9effb4e9bede8570d715ed3c8086cf75ade Mon Sep 17 00:00:00 2001 From: yuhao <1171760467@qq.com> Date: Wed, 12 Apr 2023 05:31:19 +0000 Subject: [PATCH 09/12] fix --- oneflow/ir/include/OneFlow/OneFlowOps.td | 2 +- .../lib/OneFlow/Conversion/OneFlowToTosa.cpp | 55 +++++++++---------- .../ir/lib/OneFlow/Transform/AutoNHWCOps.cpp | 29 ++++++++++ 3 files changed, 57 insertions(+), 29 deletions(-) diff --git a/oneflow/ir/include/OneFlow/OneFlowOps.td b/oneflow/ir/include/OneFlow/OneFlowOps.td index f8a24175e31..27af8694297 100644 --- a/oneflow/ir/include/OneFlow/OneFlowOps.td +++ b/oneflow/ir/include/OneFlow/OneFlowOps.td @@ -206,7 +206,7 @@ def OneFlow_ReturnOp : Op]> { +def OneFlow_NormalizationInferenceOp : OneFlow_NormalizationBaseOp<"normalization_infer", [DeclareOpInterfaceMethods, DeclareOpInterfaceMethods]> { let output = (outs OneFlow_Tensor:$y ); diff --git a/oneflow/ir/lib/OneFlow/Conversion/OneFlowToTosa.cpp b/oneflow/ir/lib/OneFlow/Conversion/OneFlowToTosa.cpp index 9a79189a053..54a6a4074e4 100644 --- a/oneflow/ir/lib/OneFlow/Conversion/OneFlowToTosa.cpp +++ b/oneflow/ir/lib/OneFlow/Conversion/OneFlowToTosa.cpp @@ -120,22 +120,8 @@ RankedTensorType CreateTransposeType(ShapedType output, ArrayRef perms) return RankedTensorType::get(ranked_type, output.getElementType()); }; -Value CreateBNOp(Location loc, ConversionPatternRewriter& rewriter, Value output, Value x, +Value CreateBNOp(Location loc, ConversionPatternRewriter& rewriter, Type output_type, Value x, Value mean, Value variance, Value epsilon, Value gamma, Value beta) { - auto output_type = output.getType(); - RankedTensorType mean_type, x_type; - if (!(mean_type = llvm::dyn_cast_or_null(mean.getType())) - || !(x_type = llvm::dyn_cast_or_null(x.getType())) - || mean_type.getRank() != 1 || x_type.getRank() != 4) { - LOG(FATAL) << "failec to create bn op"; - } - // nhwc - bool isNhwc = x_type.getDimSize(3) == mean_type.getDimSize(0); - if (!isNhwc) { - auto perms = {0, 2, 3, 1}; - x = CreateTransposeValue(loc, rewriter, x, perms); - output_type = CreateTransposeType(output_type, perms); - } // sub_op = sub(input, mean) auto sub_op0 = rewriter.create(loc, output_type, x, mean); // add_op0 = add(var, epsilon) @@ -148,10 +134,6 @@ Value CreateBNOp(Location loc, ConversionPatternRewriter& rewriter, Value output auto mul_op1 = rewriter.create(loc, output_type, mul_op0, gamma, 0); // op6 = add(mul_op1, beta) Value batch_norm = rewriter.create(loc, output_type, mul_op1, beta); - if (!isNhwc) { - auto reverse_perms = {0, 3, 1, 2}; - batch_norm = CreateTransposeValue(loc, rewriter, batch_norm, reverse_perms); - } return batch_norm; }; @@ -508,11 +490,22 @@ struct NormalizationInferenceOpLowering final auto variance = op.getMovingVariance(); auto gamma = op.getGamma(); auto beta = op.getBeta(); - auto output = op.getY(); + auto output_type = op.getY().getType(); auto x = op.getX(); + if (op.IsNCHW()) { + const auto perms = {0, 2, 3, 1}; + x = CreateTransposeValue(loc, rewriter, x, perms); + output_type = CreateTransposeType(output_type, perms); + } + auto batch_norm = - oneflow::CreateBNOp(loc, rewriter, output, x, mean, variance, epsilon, gamma, beta); + oneflow::CreateBNOp(loc, rewriter, output_type, x, mean, variance, epsilon, gamma, beta); + + if (op.IsNCHW()) { + const auto reverse_perms = {0, 3, 1, 2}; + batch_norm = CreateTransposeValue(loc, rewriter, batch_norm, reverse_perms); + } rewriter.replaceOp(op, {batch_norm}); return success(); } @@ -526,22 +519,28 @@ struct NormalizationOpLowering final : public OpConversionPatterngetLoc(); const auto epsilon_type = RankedTensorType::get({}, rewriter.getF32Type()); - // epsilon = reshape(epsilon, shape_1) auto epsilon = rewriter.create( loc, epsilon_type, DenseElementsAttr::get(epsilon_type, op.getEpsilon())); - // mean = reshape(mean, shape_0) auto mean = op.getMovingMean(); - // variance= reshape(variance, shape_0) auto variance = op.getMovingVariance(); - // scale = reshape(scale, shape_0) auto gamma = op.getGamma(); - // beta = reshape(beta, shape_0) auto beta = op.getBeta(); - auto output = op.getY(); + auto output_type = op.getY().getType(); auto x = op.getX(); + if (op.IsNCHW()) { + const auto perms = {0, 2, 3, 1}; + x = CreateTransposeValue(loc, rewriter, x, perms); + output_type = CreateTransposeType(output_type, perms); + } + auto batch_norm = - oneflow::CreateBNOp(loc, rewriter, output, x, mean, variance, epsilon, gamma, beta); + oneflow::CreateBNOp(loc, rewriter, output_type, x, mean, variance, epsilon, gamma, beta); + + if (op.IsNCHW()) { + const auto reverse_perms = {0, 3, 1, 2}; + batch_norm = CreateTransposeValue(loc, rewriter, batch_norm, reverse_perms); + } auto moving_mean = op.getMovingMean(); auto moving_variance = op.getMovingVariance(); diff --git a/oneflow/ir/lib/OneFlow/Transform/AutoNHWCOps.cpp b/oneflow/ir/lib/OneFlow/Transform/AutoNHWCOps.cpp index 931517f5ce8..07444470437 100644 --- a/oneflow/ir/lib/OneFlow/Transform/AutoNHWCOps.cpp +++ b/oneflow/ir/lib/OneFlow/Transform/AutoNHWCOps.cpp @@ -95,10 +95,16 @@ llvm::SmallVector BroadcastAddOp::NchwToNhwc(llvm::SmallVectorgetAxisAttr().getValue().getSExtValue() == 1; } +bool NormalizationInferenceOp::IsNCHW() { return this->getAxisAttr().getValue().getSExtValue() == 1; } + llvm::DenseSet NormalizationOp::OperandsToTranspose() { return {this->getX()}; } +llvm::DenseSet NormalizationInferenceOp::OperandsToTranspose() { return {this->getX()}; } + llvm::DenseSet NormalizationOp::ResultsToTranspose() { return {this->getY()}; } +llvm::DenseSet NormalizationInferenceOp::ResultsToTranspose() { return {this->getY()}; } + llvm::SmallVector NormalizationOp::NchwToNhwc(llvm::SmallVector value, PatternRewriter& rewriter) { auto normalization_op = *this; @@ -122,6 +128,29 @@ llvm::SmallVector NormalizationOp::NchwToNhwc(llvm::SmallVector NormalizationInferenceOp::NchwToNhwc(llvm::SmallVector value, + PatternRewriter& rewriter) { + auto normalization_op = *this; + SmallVector operands; + operands.push_back(value[0]); + if (normalization_op.getMovingMean()) operands.push_back(normalization_op.getMovingMean()); + if (normalization_op.getMovingVariance()) + operands.push_back(normalization_op.getMovingVariance()); + operands.push_back(normalization_op.getGamma()); + operands.push_back(normalization_op.getBeta()); + if (normalization_op.get_addToOutput()) operands.push_back(normalization_op.get_addToOutput()); + NamedAttrList attributes = normalization_op->getAttrs(); + attributes.set(normalization_op.getAxisAttrName(), rewriter.getSI32IntegerAttr(3)); + auto res = + rewriter + .create( + normalization_op.getLoc(), getNHWCResultTypes(normalization_op), operands, attributes) + ->getResults(); + llvm::SmallVector results; + results.push_back(res[0]); + return results; +} + bool MaxPool2DOp::IsNCHW() { return this->getDataFormat().str() == "channels_first"; } llvm::DenseSet MaxPool2DOp::OperandsToTranspose() { return {this->getX()}; } From c2ffed070e80dc397527eb66ada7d917cec284d7 Mon Sep 17 00:00:00 2001 From: jackalcooper Date: Mon, 17 Apr 2023 10:43:34 +0800 Subject: [PATCH 10/12] minor fix --- oneflow/ir/lib/OneFlow/OneFlowInferReturnTypes.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/oneflow/ir/lib/OneFlow/OneFlowInferReturnTypes.cpp b/oneflow/ir/lib/OneFlow/OneFlowInferReturnTypes.cpp index 1621f24f4ed..a421f43f4e1 100644 --- a/oneflow/ir/lib/OneFlow/OneFlowInferReturnTypes.cpp +++ b/oneflow/ir/lib/OneFlow/OneFlowInferReturnTypes.cpp @@ -80,7 +80,6 @@ ::mlir::LogicalResult inferReturnTypesWithOpTypeName( std::unordered_map> lbi2logical_blob_desc_; auto operand_ids = user_op::ArgIds(op_type_name, operands.size(), attributes); - auto operand_index = 0; for (const auto& idOperand : llvm::zip(operand_ids, operands)) { const auto& arg_name = std::get<0>(idOperand).first; const auto& arg_id = std::get<0>(idOperand).second; @@ -111,7 +110,7 @@ ::mlir::LogicalResult inferReturnTypesWithOpTypeName( }; ::oneflow::ParallelConf parallel_conf = user_op::getParallelConfFromAttrDictionary(attributes); ::oneflow::ParallelDesc parallel_desc{parallel_conf}; - op->FillOpParallelDesc(parallel_desc); + CHECK_JUST(op->FillOpParallelDesc(parallel_desc)); CHECK_JUST(op->InferLogicalOutBlobDescs(GetLogicalBlobDesc4BnInOp, parallel_desc)); for (const auto& result_id : result_ids) { const auto& arg_name = result_id.first; From 5d5ee042514fcbe04931a58a78fea57532500025 Mon Sep 17 00:00:00 2001 From: jackalcooper Date: Mon, 17 Apr 2023 10:44:09 +0800 Subject: [PATCH 11/12] fix --- oneflow/ir/lib/OneFlow/OneFlowInferReturnTypes.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/oneflow/ir/lib/OneFlow/OneFlowInferReturnTypes.cpp b/oneflow/ir/lib/OneFlow/OneFlowInferReturnTypes.cpp index a421f43f4e1..2756bfc67f0 100644 --- a/oneflow/ir/lib/OneFlow/OneFlowInferReturnTypes.cpp +++ b/oneflow/ir/lib/OneFlow/OneFlowInferReturnTypes.cpp @@ -80,6 +80,7 @@ ::mlir::LogicalResult inferReturnTypesWithOpTypeName( std::unordered_map> lbi2logical_blob_desc_; auto operand_ids = user_op::ArgIds(op_type_name, operands.size(), attributes); + auto operand_index = 0; for (const auto& idOperand : llvm::zip(operand_ids, operands)) { const auto& arg_name = std::get<0>(idOperand).first; const auto& arg_id = std::get<0>(idOperand).second; From b5cbf681823ae05c8c6cbe4809235e56a34c5131 Mon Sep 17 00:00:00 2001 From: oneflow-ci-bot Date: Mon, 17 Apr 2023 02:52:21 +0000 Subject: [PATCH 12/12] auto format by CI --- oneflow/ir/lib/OneFlow/Transform/AutoNHWCOps.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/oneflow/ir/lib/OneFlow/Transform/AutoNHWCOps.cpp b/oneflow/ir/lib/OneFlow/Transform/AutoNHWCOps.cpp index 07444470437..1c763027b37 100644 --- a/oneflow/ir/lib/OneFlow/Transform/AutoNHWCOps.cpp +++ b/oneflow/ir/lib/OneFlow/Transform/AutoNHWCOps.cpp @@ -95,7 +95,9 @@ llvm::SmallVector BroadcastAddOp::NchwToNhwc(llvm::SmallVectorgetAxisAttr().getValue().getSExtValue() == 1; } -bool NormalizationInferenceOp::IsNCHW() { return this->getAxisAttr().getValue().getSExtValue() == 1; } +bool NormalizationInferenceOp::IsNCHW() { + return this->getAxisAttr().getValue().getSExtValue() == 1; +} llvm::DenseSet NormalizationOp::OperandsToTranspose() { return {this->getX()}; } @@ -129,7 +131,7 @@ llvm::SmallVector NormalizationOp::NchwToNhwc(llvm::SmallVector NormalizationInferenceOp::NchwToNhwc(llvm::SmallVector value, - PatternRewriter& rewriter) { + PatternRewriter& rewriter) { auto normalization_op = *this; SmallVector operands; operands.push_back(value[0]);