From 3ce1f781d2e72fb1befbfaa5768b2dde8b627c87 Mon Sep 17 00:00:00 2001 From: Akira Saitoh Date: Wed, 31 Aug 2022 13:03:59 +0900 Subject: [PATCH] AArch64: Inline Math.fma Implement the inlined version of java/lang/Math.fma. Signed-off-by: Akira Saitoh --- .../aarch64/codegen/J9CodeGenerator.cpp | 6 ++- .../aarch64/codegen/J9TreeEvaluator.cpp | 47 +++++++++++++++++++ 2 files changed, 52 insertions(+), 1 deletion(-) diff --git a/runtime/compiler/aarch64/codegen/J9CodeGenerator.cpp b/runtime/compiler/aarch64/codegen/J9CodeGenerator.cpp index e897becbf68..ccfe0be0ab9 100644 --- a/runtime/compiler/aarch64/codegen/J9CodeGenerator.cpp +++ b/runtime/compiler/aarch64/codegen/J9CodeGenerator.cpp @@ -216,7 +216,11 @@ bool J9::ARM64::CodeGenerator::suppressInliningOfRecognizedMethod(TR::RecognizedMethod method) { if (method == TR::java_lang_Math_min_F || - method == TR::java_lang_Math_max_F) + method == TR::java_lang_Math_max_F || + method == TR::java_lang_Math_fma_D || + method == TR::java_lang_Math_fma_F || + method == TR::java_lang_StrictMath_fma_D || + method == TR::java_lang_StrictMath_fma_F) { return true; } diff --git a/runtime/compiler/aarch64/codegen/J9TreeEvaluator.cpp b/runtime/compiler/aarch64/codegen/J9TreeEvaluator.cpp index fc930fb1143..7d99fdc9359 100644 --- a/runtime/compiler/aarch64/codegen/J9TreeEvaluator.cpp +++ b/runtime/compiler/aarch64/codegen/J9TreeEvaluator.cpp @@ -5146,6 +5146,43 @@ VMinlineMathMinMax(TR::Node *node, bool isMax, bool isDouble, TR::CodeGenerator return resReg; } +/** + * @brief Generates instruction sequence for inlining 3-children fp operation + * + * @param[in] node: node + * @param[in] op: opcode + * @param[in] cg: CodeGenerator + * @return the result register + */ +static TR::Register *inlineFPTrg1Src3(TR::Node *node, TR::InstOpCode::Mnemonic op, TR::CodeGenerator *cg) + { + TR_ASSERT_FATAL_WITH_NODE(node, node->getNumChildren() == 3, "In function inlineFPTrg1Src3, the node at address %p should have exactly 3 children, but got %u instead", node, node->getNumChildren()); + + TR::DataType type = node->getDataType(); + TR_ASSERT_FATAL_WITH_NODE(node, type == TR::Float || type == TR::Double, "In function inlineFPTrg1Src3, the node at address %p should be either TR::Float or TR::Double", node); + + TR::Node *firstChild = node->getFirstChild(); + TR::Node *secondChild = node->getSecondChild(); + TR::Node *thirdChild = node->getThirdChild(); + TR::Register *src1Register = cg->evaluate(firstChild); + TR::Register *src2Register = cg->evaluate(secondChild); + TR::Register *src3Register = cg->evaluate(thirdChild); + TR::Register *targetRegister; + + if (type == TR::Float) + targetRegister = cg->allocateSinglePrecisionRegister(); + else + targetRegister = cg->allocateRegister(TR_FPR); + + generateTrg1Src3Instruction(cg, op, node, targetRegister, src1Register, src2Register, src3Register); + + node->setRegister(targetRegister); + cg->decReferenceCount(firstChild); + cg->decReferenceCount(secondChild); + cg->decReferenceCount(thirdChild); + return targetRegister; + } + bool J9::ARM64::CodeGenerator::inlineDirectCall(TR::Node *node, TR::Register *&resultReg) { @@ -5208,6 +5245,16 @@ J9::ARM64::CodeGenerator::inlineDirectCall(TR::Node *node, TR::Register *&result return true; } + case TR::java_lang_Math_fma_D: + case TR::java_lang_StrictMath_fma_D: + resultReg = inlineFPTrg1Src3(node, TR::InstOpCode::fmaddd, cg); + return true; + + case TR::java_lang_Math_fma_F: + case TR::java_lang_StrictMath_fma_F: + resultReg = inlineFPTrg1Src3(node, TR::InstOpCode::fmadds, cg); + return true; + case TR::sun_misc_Unsafe_compareAndSwapInt_jlObjectJII_Z: { // In Java9 and newer this can be either the jdk.internal JNI method or the sun.misc Java wrapper.