Skip to content

Commit

Permalink
Merge pull request #15845 from Akira1Saitoh/aarch64InlineMathFma
Browse files Browse the repository at this point in the history
AArch64: Inline Math.fma
  • Loading branch information
knn-k committed Sep 9, 2022
2 parents 6bb8f5a + 3ce1f78 commit 85d5e63
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 1 deletion.
6 changes: 5 additions & 1 deletion runtime/compiler/aarch64/codegen/J9CodeGenerator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,11 @@ bool
J9::ARM64::CodeGenerator::suppressInliningOfRecognizedMethod(TR::RecognizedMethod method)
{
if (method == TR::java_lang_Math_min_F ||
method == TR::java_lang_Math_max_F)
method == TR::java_lang_Math_max_F ||
method == TR::java_lang_Math_fma_D ||
method == TR::java_lang_Math_fma_F ||
method == TR::java_lang_StrictMath_fma_D ||
method == TR::java_lang_StrictMath_fma_F)
{
return true;
}
Expand Down
47 changes: 47 additions & 0 deletions runtime/compiler/aarch64/codegen/J9TreeEvaluator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5146,6 +5146,43 @@ VMinlineMathMinMax(TR::Node *node, bool isMax, bool isDouble, TR::CodeGenerator
return resReg;
}

/**
* @brief Generates instruction sequence for inlining 3-children fp operation
*
* @param[in] node: node
* @param[in] op: opcode
* @param[in] cg: CodeGenerator
* @return the result register
*/
static TR::Register *inlineFPTrg1Src3(TR::Node *node, TR::InstOpCode::Mnemonic op, TR::CodeGenerator *cg)
{
TR_ASSERT_FATAL_WITH_NODE(node, node->getNumChildren() == 3, "In function inlineFPTrg1Src3, the node at address %p should have exactly 3 children, but got %u instead", node, node->getNumChildren());

TR::DataType type = node->getDataType();
TR_ASSERT_FATAL_WITH_NODE(node, type == TR::Float || type == TR::Double, "In function inlineFPTrg1Src3, the node at address %p should be either TR::Float or TR::Double", node);

TR::Node *firstChild = node->getFirstChild();
TR::Node *secondChild = node->getSecondChild();
TR::Node *thirdChild = node->getThirdChild();
TR::Register *src1Register = cg->evaluate(firstChild);
TR::Register *src2Register = cg->evaluate(secondChild);
TR::Register *src3Register = cg->evaluate(thirdChild);
TR::Register *targetRegister;

if (type == TR::Float)
targetRegister = cg->allocateSinglePrecisionRegister();
else
targetRegister = cg->allocateRegister(TR_FPR);

generateTrg1Src3Instruction(cg, op, node, targetRegister, src1Register, src2Register, src3Register);

node->setRegister(targetRegister);
cg->decReferenceCount(firstChild);
cg->decReferenceCount(secondChild);
cg->decReferenceCount(thirdChild);
return targetRegister;
}

bool
J9::ARM64::CodeGenerator::inlineDirectCall(TR::Node *node, TR::Register *&resultReg)
{
Expand Down Expand Up @@ -5208,6 +5245,16 @@ J9::ARM64::CodeGenerator::inlineDirectCall(TR::Node *node, TR::Register *&result
return true;
}

case TR::java_lang_Math_fma_D:
case TR::java_lang_StrictMath_fma_D:
resultReg = inlineFPTrg1Src3(node, TR::InstOpCode::fmaddd, cg);
return true;

case TR::java_lang_Math_fma_F:
case TR::java_lang_StrictMath_fma_F:
resultReg = inlineFPTrg1Src3(node, TR::InstOpCode::fmadds, cg);
return true;

case TR::sun_misc_Unsafe_compareAndSwapInt_jlObjectJII_Z:
{
// In Java9 and newer this can be either the jdk.internal JNI method or the sun.misc Java wrapper.
Expand Down

0 comments on commit 85d5e63

Please sign in to comment.