From e376fd9dce59e4c0109c87043eec09096127c3ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pavol=20=C5=BD=C3=A1=C4=8Dik?= Date: Mon, 9 Oct 2023 15:39:41 +0200 Subject: [PATCH] Add a pattern which matches reordered binary operations During compilation to LLVM IR, binary operations which are both commutative and associative may sometimes get reordered. This commit enables effective matching of such reordered instructions. Simply explained, when the operands differ in a compared reorderable binary operation and all its users are operations of the same type, the binary operation is skipped. The complete set of operands is then matched during the comparison of the user. --- diffkemp/cli.py | 3 +- diffkemp/config.py | 4 + diffkemp/simpll/Config.h | 1 + .../simpll/DifferentialFunctionComparator.cpp | 85 +++++++++++++ .../simpll/DifferentialFunctionComparator.h | 14 ++ diffkemp/simpll/SimpLL.cpp | 7 +- diffkemp/simpll/Utils.cpp | 20 ++- diffkemp/simpll/Utils.h | 5 + diffkemp/simpll/library/FFI.cpp | 1 + diffkemp/simpll/library/FFI.h | 1 + .../DifferentialFunctionComparatorTest.cpp | 120 ++++++++++++++++++ 11 files changed, 258 insertions(+), 3 deletions(-) diff --git a/diffkemp/cli.py b/diffkemp/cli.py index b58b927b2..50c188734 100644 --- a/diffkemp/cli.py +++ b/diffkemp/cli.py @@ -151,7 +151,8 @@ def make_argument_parser(): "relocations", "type-casts", "control-flow-only", - "inverse-conditions"] + "inverse-conditions", + "reordered-bin-ops"] # Semantic patterns options. compare_ap.add_argument("--enable-pattern", diff --git a/diffkemp/config.py b/diffkemp/config.py index 954ceae34..c26f2751b 100644 --- a/diffkemp/config.py +++ b/diffkemp/config.py @@ -22,6 +22,7 @@ def __init__( type_casts=False, control_flow_only=False, inverse_conditions=True, + reordered_bin_ops=True, ): """ Create a configuration of built-in patterns. @@ -39,6 +40,7 @@ def __init__( :param type_casts: Changes in type casts. :param control_flow_only: Consider control-flow changes only. :param inverse_conditions: Inverted branch conditions. + :param reordered_bin_ops: Match reordered binary operations. """ self.settings = { "struct-alignment": struct_alignment, @@ -51,6 +53,7 @@ def __init__( "type-casts": type_casts, "control-flow-only": control_flow_only, "inverse-conditions": inverse_conditions, + "reordered-bin-ops": reordered_bin_ops, } self.resolve_dependencies() @@ -97,6 +100,7 @@ def as_ffi_struct(self): ffi_struct.TypeCasts = self.settings["type-casts"] ffi_struct.ControlFlowOnly = self.settings["control-flow-only"] ffi_struct.InverseConditions = self.settings["inverse-conditions"] + ffi_struct.ReorderedBinOps = self.settings["reordered-bin-ops"] return ffi_struct diff --git a/diffkemp/simpll/Config.h b/diffkemp/simpll/Config.h index 2007c0b26..34ccbb72e 100644 --- a/diffkemp/simpll/Config.h +++ b/diffkemp/simpll/Config.h @@ -37,6 +37,7 @@ struct BuiltinPatterns { bool TypeCasts = false; bool ControlFlowOnly = false; bool InverseConditions = true; + bool ReorderedBinOps = true; }; /// Tool configuration parsed from CLI options. diff --git a/diffkemp/simpll/DifferentialFunctionComparator.cpp b/diffkemp/simpll/DifferentialFunctionComparator.cpp index 106d63add..26ae4a443 100644 --- a/diffkemp/simpll/DifferentialFunctionComparator.cpp +++ b/diffkemp/simpll/DifferentialFunctionComparator.cpp @@ -505,6 +505,11 @@ bool DifferentialFunctionComparator::maySkipInstruction( ignoredInstructions.insert(Inst); return true; } + if (config.Patterns.ReorderedBinOps && isReorderableBinaryOp(Inst) + && maySkipReorderableBinaryOp(Inst)) { + ignoredInstructions.insert(Inst); + return true; + } if (isCast(Inst)) { if (config.Patterns.TypeCasts) { replacedInstructions.insert({Inst, Inst->getOperand(0)}); @@ -660,6 +665,17 @@ bool DifferentialFunctionComparator::maySkipLoad(const LoadInst *Load) const { return false; } +/// Check whether the given reorderable binary operator can be skipped. +/// It can only be skipped if all its users are binary operations +/// of the same kind. +bool DifferentialFunctionComparator::maySkipReorderableBinaryOp( + const Instruction *Op) const { + return std::all_of(Op->user_begin(), Op->user_end(), [Op](auto user) { + auto userOp = dyn_cast(user); + return userOp && userOp->getOpcode() == Op->getOpcode(); + }); +} + bool mayIgnoreMacro(std::string macro) { return ignoredMacroList.find(macro) != ignoredMacroList.end(); } @@ -896,6 +912,30 @@ int DifferentialFunctionComparator::cmpBasicBlocks( continue; } + // If both instructions are reorderable binary operators, they may + // have skipped operands. Try to recursively collect all operands + // and compare their sets. + if (config.Patterns.ReorderedBinOps + && isReorderableBinaryOp(&*InstL) + && isReorderableBinaryOp(&*InstR)) { + Instruction::BinaryOps OpcodeL = + dyn_cast(&*InstL)->getOpcode(); + Instruction::BinaryOps OpcodeR = + dyn_cast(&*InstR)->getOpcode(); + std::multiset SnSetL, SnSetR; + std::multiset ConstantsSetL, ConstantsSetR; + if (OpcodeL == OpcodeR + && collectBinaryOperands( + &*InstL, OpcodeL, SnSetL, ConstantsSetL, sn_mapL) + && collectBinaryOperands( + &*InstR, OpcodeR, SnSetR, ConstantsSetR, sn_mapR) + && SnSetL == SnSetR && ConstantsSetL == ConstantsSetR) { + InstL++; + InstR++; + continue; + } + } + // If one of the instructions is a logical not, it is possible that // it will be used in an inverse condition. Hence, we skip it here // and mark that it may be inverse-matching the condition that @@ -1874,3 +1914,48 @@ bool DifferentialFunctionComparator::isDependingOnReloc( return false; } + +/// Recursively collect operands of reorderable binary operators. +/// Leafs must be constants or already synchronized values. +/// Return false if a non-synchronized non-constant leaf is found. +bool DifferentialFunctionComparator::collectBinaryOperands( + const Value *Val, + Instruction::BinaryOps Opcode, + std::multiset &SNs, + std::multiset &Constants, + DenseMap &sn_map) const { + // Substitute an ignored value with its replacement when necessary + auto replacementIt = replacedInstructions.find(Val); + const Value *newVal = replacementIt == replacedInstructions.end() + ? Val + : replacementIt->second; + // Constant leaf + if (auto Const = dyn_cast(newVal)) { + Constants.insert(Const->getSExtValue()); + return true; + } + // Non-leaf; try to collect its operands recursively + auto BinOp = dyn_cast(newVal); + if (BinOp && BinOp->getOpcode() == Opcode) { + std::multiset tmpSNs; + std::multiset tmpConstants; + bool leftSuccess = collectBinaryOperands( + BinOp->getOperand(0), Opcode, tmpSNs, tmpConstants, sn_map); + bool rightSuccess = collectBinaryOperands( + BinOp->getOperand(1), Opcode, tmpSNs, tmpConstants, sn_map); + if (leftSuccess && rightSuccess) { + SNs.merge(tmpSNs); + Constants.merge(tmpConstants); + return true; + } + } + // Already synchronized leaf; do not match if it is the last value inserted + // into the synchronization map - it has just been compared as not equal + auto snMapIt = sn_map.find(newVal); + if (snMapIt != sn_map.end() + && (unsigned)snMapIt->second != sn_map.size() - 1) { + SNs.insert(snMapIt->second); + return true; + } + return false; +} diff --git a/diffkemp/simpll/DifferentialFunctionComparator.h b/diffkemp/simpll/DifferentialFunctionComparator.h index f95822ed2..e066c4b8e 100644 --- a/diffkemp/simpll/DifferentialFunctionComparator.h +++ b/diffkemp/simpll/DifferentialFunctionComparator.h @@ -230,6 +230,11 @@ class DifferentialFunctionComparator : public FunctionComparator { /// ignorable loads are stored inside the ignored instructions map. bool maySkipLoad(const LoadInst *Load) const; + /// Check whether the given reorderable binary operator can be skipped. + /// It can only be skipped if all its users are binary operations + /// of the same kind. + bool maySkipReorderableBinaryOp(const Instruction *Op) const; + /// Retrive the replacement for the given value from the ignored /// instructions map. Try to generate the replacement if a bitcast is given. const Value * @@ -265,6 +270,15 @@ class DifferentialFunctionComparator : public FunctionComparator { /// (any instruction within it) access the same pointer and one of the /// accesses is a store and the other one is a load. bool isDependingOnReloc(const Instruction &Inst) const; + + /// Recursively collect operands of reorderable binary operators. + /// Leafs must be constants or already synchronized values. + /// Return false if a non-synchronized non-constant leaf is found. + bool collectBinaryOperands(const Value *Val, + Instruction::BinaryOps Opcode, + std::multiset &SNs, + std::multiset &Constants, + DenseMap &sn_map) const; }; #endif // DIFFKEMP_SIMPLL_DIFFERENTIALFUNCTIONCOMPARATOR_H diff --git a/diffkemp/simpll/SimpLL.cpp b/diffkemp/simpll/SimpLL.cpp index 09b6136ad..fbccc6cb8 100644 --- a/diffkemp/simpll/SimpLL.cpp +++ b/diffkemp/simpll/SimpLL.cpp @@ -111,6 +111,10 @@ cl::opt InverseConditionsOpt("inverse-conditions", cl::desc("Enable inverse conditions pattern."), cl::cat(BuiltinPatternsCategory)); +cl::opt ReorderedBinOpsOpt( + "reordered-bin-ops", + cl::desc("Enable reordered binary operations pattern."), + cl::cat(BuiltinPatternsCategory)); /// Add suffix to the file name. /// \param File Original file name. @@ -157,7 +161,8 @@ int main(int argc, const char **argv) { .Relocations = RelocationsOpt, .TypeCasts = TypeCastsOpt, .ControlFlowOnly = ControlFlowOnlyOpt, - .InverseConditions = InverseConditionsOpt}; + .InverseConditions = InverseConditionsOpt, + .ReorderedBinOps = ReorderedBinOpsOpt}; // Parse --fun option auto FunName = parseFunOption(); diff --git a/diffkemp/simpll/Utils.cpp b/diffkemp/simpll/Utils.cpp index 5aee7ba2f..17596b1b1 100644 --- a/diffkemp/simpll/Utils.cpp +++ b/diffkemp/simpll/Utils.cpp @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -248,7 +249,7 @@ bool isLogicalNot(const Instruction *Inst) { return false; if (auto *BinOp = dyn_cast(Inst)) { - if (BinOp->getOpcode() != llvm::Instruction::Xor) + if (BinOp->getOpcode() != Instruction::Xor) return false; if (auto constOp = dyn_cast(BinOp->getOperand(1))) @@ -257,6 +258,23 @@ bool isLogicalNot(const Instruction *Inst) { return false; } +/// Returns true if the given instruction is a reorderable binary operation, +/// i.e., it is commutative and associative. Note that IEEE 754 floating-point +/// addition/multiplication is NOT associative. +bool isReorderableBinaryOp(const Instruction *Inst) { + if (auto *BinOp = dyn_cast(Inst)) { + static std::set ReorderableOps = { + Instruction::Xor, + Instruction::Add, + Instruction::And, + Instruction::Or, + Instruction::Mul, + }; + return (ReorderableOps.count(BinOp->getOpcode()) != 0); + } + return false; +} + /// Get value of the given constant as a string std::string valueAsString(const Constant *Val) { if (auto *IntVal = dyn_cast(Val)) { diff --git a/diffkemp/simpll/Utils.h b/diffkemp/simpll/Utils.h index 686a7220b..a2e298486 100644 --- a/diffkemp/simpll/Utils.h +++ b/diffkemp/simpll/Utils.h @@ -106,6 +106,11 @@ bool isZeroGEP(const Value *Val); /// Returns true if the given instruction is a boolean negation operation bool isLogicalNot(const Instruction *Inst); +/// Returns true if the given instruction is a reorderable binary operation, +/// i.e., it is commutative and associative. Note that IEEE 754 floating-point +/// addition/multiplication is NOT associative. +bool isReorderableBinaryOp(const Instruction *Inst); + /// Run simplification passes on the function /// - simplify CFG /// - dead code elimination diff --git a/diffkemp/simpll/library/FFI.cpp b/diffkemp/simpll/library/FFI.cpp index ed8af61fd..dc154b142 100644 --- a/diffkemp/simpll/library/FFI.cpp +++ b/diffkemp/simpll/library/FFI.cpp @@ -72,6 +72,7 @@ BuiltinPatterns BuiltinPatternsFromC(builtin_patterns PatternsC) { .TypeCasts = (bool)PatternsC.TypeCasts, .ControlFlowOnly = (bool)PatternsC.ControlFlowOnly, .InverseConditions = (bool)PatternsC.InverseConditions, + .ReorderedBinOps = (bool)PatternsC.ReorderedBinOps, }; } diff --git a/diffkemp/simpll/library/FFI.h b/diffkemp/simpll/library/FFI.h index 4f9b75b71..98caa3114 100644 --- a/diffkemp/simpll/library/FFI.h +++ b/diffkemp/simpll/library/FFI.h @@ -34,6 +34,7 @@ struct builtin_patterns { int TypeCasts; int ControlFlowOnly; int InverseConditions; + int ReorderedBinOps; }; struct config { diff --git a/tests/unit_tests/simpll/DifferentialFunctionComparatorTest.cpp b/tests/unit_tests/simpll/DifferentialFunctionComparatorTest.cpp index 63d704b61..51b188033 100644 --- a/tests/unit_tests/simpll/DifferentialFunctionComparatorTest.cpp +++ b/tests/unit_tests/simpll/DifferentialFunctionComparatorTest.cpp @@ -1926,3 +1926,123 @@ TEST_F(DifferentialFunctionComparatorTest, ReorderedPHIs) { ReturnInst::Create(CtxL, AltResL, BBL); ASSERT_EQ(DiffComp->compare(), 1); } + +TEST_F(DifferentialFunctionComparatorTest, ReorderedBinaryOperationSimple) { + BasicBlock *BBL = BasicBlock::Create(CtxL, "", FL); + BasicBlock *BBR = BasicBlock::Create(CtxR, "", FR); + + // Operands for the binary operation + Constant *ConstL1 = ConstantInt::get(Type::getInt8Ty(CtxL), 0); + Constant *ConstL2 = ConstantInt::get(Type::getInt8Ty(CtxL), 1); + Constant *ConstR1 = ConstantInt::get(Type::getInt8Ty(CtxR), 0); + Constant *ConstR2 = ConstantInt::get(Type::getInt8Ty(CtxR), 1); + + // Commutative binary operations with reversed operands + auto ResL = BinaryOperator::Create( + BinaryOperator::Add, ConstL1, ConstL2, "", BBL); + auto ResR = BinaryOperator::Create( + BinaryOperator::Add, ConstR2, ConstR1, "", BBR); + + // Return the result of the operation + auto RetL = ReturnInst::Create(CtxL, ResL, BBL); + auto RetR = ReturnInst::Create(CtxR, ResR, BBR); + + ASSERT_EQ(DiffComp->compare(), 0); + + RetL->eraseFromParent(); + RetR->eraseFromParent(); + ResL->eraseFromParent(); + ResR->eraseFromParent(); + + // Not a commutative operation + ResL = BinaryOperator::Create( + BinaryOperator::Sub, ConstL1, ConstL2, "", BBL); + ResR = BinaryOperator::Create( + BinaryOperator::Sub, ConstR2, ConstR1, "", BBR); + + RetL = ReturnInst::Create(CtxL, ResL, BBL); + RetR = ReturnInst::Create(CtxR, ResR, BBR); + + ASSERT_EQ(DiffComp->compare(), 1); + + RetL->eraseFromParent(); + RetR->eraseFromParent(); + ResL->eraseFromParent(); + ResR->eraseFromParent(); + + // Different operands + ResL = BinaryOperator::Create( + BinaryOperator::Add, ConstL1, ConstL1, "", BBL); + ResR = BinaryOperator::Create( + BinaryOperator::Add, ConstR2, ConstR1, "", BBR); + + RetL = ReturnInst::Create(CtxL, ResL, BBL); + RetR = ReturnInst::Create(CtxR, ResR, BBR); + + ASSERT_EQ(DiffComp->compare(), 1); +} + +TEST_F(DifferentialFunctionComparatorTest, ReorderedBinaryOperationComplex) { + BasicBlock *BBL = BasicBlock::Create(CtxL, "", FL); + BasicBlock *BBR = BasicBlock::Create(CtxR, "", FR); + + Constant *ConstL1 = ConstantInt::get(Type::getInt8Ty(CtxL), 1); + Constant *ConstL2 = ConstantInt::get(Type::getInt8Ty(CtxL), 2); + auto VarL = new AllocaInst(Type::getInt8Ty(CtxL), 0, "var", BBL); + auto LoadL = new LoadInst(Type::getInt8Ty(CtxL), VarL, "load", BBL); + + Constant *ConstR1 = ConstantInt::get(Type::getInt8Ty(CtxR), 1); + Constant *ConstR2 = ConstantInt::get(Type::getInt8Ty(CtxR), 2); + auto VarR = new AllocaInst(Type::getInt8Ty(CtxR), 0, "var", BBR); + auto LoadR = new LoadInst(Type::getInt8Ty(CtxR), VarR, "load", BBR); + + // This operation should be skipped, and the operands collected later + auto FirstOpL = BinaryOperator::Create( + BinaryOperator::Add, ConstL1, ConstL2, "", BBL); + auto FirstOpR = BinaryOperator::Create( + BinaryOperator::Add, ConstR1, LoadR, "", BBR); + + // Here, the operands should be collected and matched + auto ResL = BinaryOperator::Create( + BinaryOperator::Add, FirstOpL, LoadL, "", BBL); + auto ResR = BinaryOperator::Create( + BinaryOperator::Add, FirstOpR, ConstR2, "", BBR); + + auto RetL = ReturnInst::Create(CtxL, ResL, BBL); + auto RetR = ReturnInst::Create(CtxR, ResR, BBR); + + ASSERT_EQ(DiffComp->compare(), 0); +} + +TEST_F(DifferentialFunctionComparatorTest, ReorderedBinaryOperationNeedLeaf) { + BasicBlock *BBL = BasicBlock::Create(CtxL, "", FL); + BasicBlock *BBR = BasicBlock::Create(CtxR, "", FR); + + Constant *ConstL1 = ConstantInt::get(Type::getInt8Ty(CtxL), 1); + Constant *ConstL2 = ConstantInt::get(Type::getInt8Ty(CtxL), 2); + Constant *ConstR1 = ConstantInt::get(Type::getInt8Ty(CtxR), 1); + Constant *ConstR2 = ConstantInt::get(Type::getInt8Ty(CtxR), 2); + + // Equal operations, should not be skipped + auto FirstOpL = BinaryOperator::Create( + BinaryOperator::Add, ConstL1, ConstL2, "", BBL); + auto FirstOpR = BinaryOperator::Create( + BinaryOperator::Add, ConstR1, ConstR2, "", BBR); + + // Same as before, but only on one side - should be skipped + auto SameOpL = BinaryOperator::Create( + BinaryOperator::Add, ConstL1, ConstL2, "", BBL); + + // These equal, but they do not use the synchronized operands, + // we must check the leafs + auto ResL = BinaryOperator::Create( + BinaryOperator::Add, SameOpL, ConstL1, "", BBL); + auto ResR = BinaryOperator::Create( + BinaryOperator::Add, FirstOpR, ConstR1, "", BBR); + + // Return the result of the operation + auto RetL = ReturnInst::Create(CtxL, ResL, BBL); + auto RetR = ReturnInst::Create(CtxR, ResR, BBR); + + ASSERT_EQ(DiffComp->compare(), 0); +}