From 6588a9e1379bb400272762cc8f5bf38ab51f2658 Mon Sep 17 00:00:00 2001 From: Axel Cohen Date: Thu, 30 Sep 2021 11:45:59 +0200 Subject: [PATCH 1/5] Add a string obfuscation pass dev-commits from corporate branch 13.x were squashed --- CMakeLists.txt | 2 + Plugin.cpp | 10 +- README.md | 3 + string/CMakeLists.txt | 26 ++++ string/StringObfuscation.cpp | 249 +++++++++++++++++++++++++++++++++++ string/StringObfuscation.h | 53 ++++++++ string/decode.c | 7 + string/generate_ir_header.sh | 13 ++ 8 files changed, 362 insertions(+), 1 deletion(-) create mode 100644 string/CMakeLists.txt create mode 100644 string/StringObfuscation.cpp create mode 100644 string/StringObfuscation.h create mode 100644 string/decode.c create mode 100755 string/generate_ir_header.sh diff --git a/CMakeLists.txt b/CMakeLists.txt index c8f3979..08c2f87 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -55,3 +55,5 @@ add_subdirectory(bogus) add_subdirectory(flattening) add_subdirectory(split) add_subdirectory(substitution) + +add_subdirectory(string) diff --git a/Plugin.cpp b/Plugin.cpp index 458f353..46d2634 100644 --- a/Plugin.cpp +++ b/Plugin.cpp @@ -11,6 +11,8 @@ #include "substitution/Substitution.h" #include "utils/CryptoUtils.h" +#include "string/StringObfuscation.h" + static const char PassesDelimiter = ','; static const std::string EnvVarPrefix = "LLVM_OBF_"; @@ -38,7 +40,13 @@ bool addPassWithName(FunctionPassManager &FPM, StringRef &passName) { } bool addPassWithName(ModulePassManager &MPM, StringRef &passName) { - return false; + if (passName == "string-encryption") { + MPM.addPass(StringObfuscatorPass()); + } else { + return false; + } + + return true; } template diff --git a/README.md b/README.md index 503742b..183020b 100644 --- a/README.md +++ b/README.md @@ -50,6 +50,9 @@ You can chose to insert passes in the optimization pipeline by setting the follo For instance if you want to run the flattening, bogus and substitution passes in that order, you can do: `export LLVM_OBF_SCALAROPTIMIZERLATE_PASSES="flattening, bogus, substitution, split-basic-blocks"` +Or you can run the string encryption pass with: +`export LLVM_OBF_OPTIMIZERLASTEP_PASSES="string"` + Refer to the llvm::PassBuilder documentation for more information on each insertion point. ### With opt diff --git a/string/CMakeLists.txt b/string/CMakeLists.txt new file mode 100644 index 0000000..9025150 --- /dev/null +++ b/string/CMakeLists.txt @@ -0,0 +1,26 @@ +find_program(XXD xxd) + +# If defined, use the target compiler. +# For cross-compilation the LLVM-IR generated is still arch specific. +if (NOT ${TARGET_C_COMPILER}) + set(CLANG ${TARGET_C_COMPILER}) +else() + set(CLANG "${LLVM_TOOLS_BINARY_DIR}/clang") +endif() + +# Compile the string decode function into IR bitcode and embed it as a c array +ADD_CUSTOM_COMMAND( + OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/decode.h + COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/generate_ir_header.sh + ${CLANG} ${XXD} + ${CMAKE_CURRENT_SOURCE_DIR}/decode.c + ${CMAKE_CURRENT_BINARY_DIR} decode.h + DEPENDS decode.c +) + +add_custom_target(generateDecodeIRHeader ALL + DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/decode.h +) + +add_dependencies(LLVMObfuscator generateDecodeIRHeader) +target_sources(LLVMObfuscator PRIVATE StringObfuscation.cpp) diff --git a/string/StringObfuscation.cpp b/string/StringObfuscation.cpp new file mode 100644 index 0000000..ce68c01 --- /dev/null +++ b/string/StringObfuscation.cpp @@ -0,0 +1,249 @@ +#include "StringObfuscation.h" +#include "string/decode.h" +#include "utils/Utils.h" +#include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/LazyValueInfo.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/PassManager.h" +#include "llvm/Passes/PassBuilder.h" +#include "llvm/Passes/PassPlugin.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include +#include + +#include "utils/CryptoUtils.h" + +static const unsigned int RandomNameMinSize = 5; +static const unsigned int RandomMaxNameSize = 15; +static const char ALPHANUM[] = + "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; + +using namespace llvm; + +namespace llvm { +ConstantDataArray *StringObfuscatorPass::encodeStringDataArray(LLVMContext &ctx, + const char *str, + size_t size, + uint8_t key) { + // Check this is a valid string (not containing zeros) + if (str[size - 1] == '\0') { + if (strnlen(str, size) != size - 1) + return nullptr; + } else { + if (strnlen(str, size) != size) + return nullptr; + } + + // Encode the data + char *encodedStr = (char *)malloc(size); + for (unsigned int i = 0; i < size; i++) { + encodedStr[i] = str[i] ^ key; + } + + // Update the value + auto encodedRef = StringRef(encodedStr, size); + + // Return a new ConstantDataArray + return static_cast( + ConstantDataArray::getString(ctx, encodedRef, false)); +} + +void StringObfuscatorPass::encodeGlobalString(LLVMContext &ctx, + GlobalVariable *gv, + ConstantDataArray *array) { + StringRef ref = array->getAsString(); + const char *str = ref.data(); + const unsigned int size = ref.size(); + + uint8_t key = cryptoutils->get_uint8_t(); + auto encodedArray = encodeStringDataArray(ctx, str, size, key); + if (encodedArray != nullptr) { + gv->setInitializer(encodedArray); + gv->setConstant(false); + this->globalStrings.push_back( + GlobalStringVariable(gv, size, 0, false, key)); + } +} + +void StringObfuscatorPass::encodeStructString(LLVMContext &ctx, + GlobalVariable *gv, + ConstantStruct *cs, + ConstantDataArray *array, + unsigned int index) { + StringRef ref = array->getAsString(); + const char *str = ref.data(); + const unsigned int size = ref.size(); + + uint8_t key = llvm::cryptoutils->get_uint8_t(); + auto encodedArray = encodeStringDataArray(ctx, str, size, key); + if (encodedArray != nullptr) { + cs->setOperand(index, encodedArray); + gv->setConstant(false); + this->globalStrings.push_back( + GlobalStringVariable(gv, size, index, true, key)); + } +} + +StringObfuscatorPass::StringObfuscatorPass() {} + +bool StringObfuscatorPass::encodeAllStrings(Module &M) { + auto &ctx = M.getContext(); + + // For each global variable + for (GlobalVariable &gv : M.globals()) { + if (!gv.isConstant() // constant + || !gv.hasInitializer() // unitialized + || gv.hasExternalLinkage() // external + || gv.getSection() == "llvm.metadata") { // Intrinsic Global Variables + //|| gv.getSection().find("__objc_methname") != string::npos) { // TODO : + // is this necessary ? + continue; + } + + // Get the variable value + Constant *initializer = gv.getInitializer(); + + // Encode the value and update the variable + if (isa(initializer)) { // Global variable + auto array = cast(initializer); + if (array->isString()) { + encodeGlobalString(ctx, &gv, array); + } + } else if (isa(initializer)) { // Variable in a struct + auto cs = cast(initializer); + for (unsigned int i = 0; i < initializer->getNumOperands(); i++) { + auto operand = cs->getOperand(i); + if (isa(operand)) { + auto array = cast(operand); + if (array->isString()) { + encodeStructString(ctx, &gv, cs, array, i); + } + } + } + } + } + + return !this->globalStrings.empty(); +} + +std::string StringObfuscatorPass::generateRandomName() { + std::string name = ""; + auto charsetSize = strlen(ALPHANUM) - 1; + + auto size = + MIN(cryptoutils->get_uint8_t() + RandomNameMinSize, RandomMaxNameSize); + for (unsigned int i = 0; i < size; i++) { + auto index = cryptoutils->get_range(charsetSize); + name += ALPHANUM[index]; + } + + return name; +} + +Function *StringObfuscatorPass::addDecodeFunction(Module &M) { + auto &ctx = M.getContext(); + + // Parse the bitcode from the header (creates a new module which contains + // the decode function) + SMDiagnostic err; + auto buf = MemoryBuffer::getMemBuffer( + StringRef(reinterpret_cast(decode_c_bc), decode_c_bc_len), + "", false); + std::unique_ptr decodeModule = + parseIR(buf->getMemBufferRef(), err, ctx); + Function *loadedFunction = decodeModule->getFunction("decodeString"); + + // Declare the decode function in M with the same signature as the loaded + // function + auto functionName = generateRandomName(); + M.getOrInsertFunction(functionName, loadedFunction->getFunctionType()); + Function *declaredFunction = M.getFunction(functionName); + + // Map the declared and loaded functions arguments + ValueToValueMapTy vmap; + auto larg = loadedFunction->arg_begin(); + for (auto darg = declaredFunction->arg_begin(); + darg != declaredFunction->arg_end(); darg++) { + vmap[&*larg] = &*darg; + larg++; + } + + // Copy the loaded function into the empty declared function (in the proper + // module) + SmallVector returns; + ClonedCodeInfo codeInfo; + CloneFunctionInto(declaredFunction, loadedFunction, vmap, +#if LLVM_VERSION_MAJOR < 13 + true, +#else + CloneFunctionChangeType::DifferentModule, +#endif + returns, "", &codeInfo); + + return declaredFunction; +} + +void StringObfuscatorPass::addDecodeAllStringsFunction( + Module &M, Function *decodeFunction) { + auto &ctx = M.getContext(); + + FunctionCallee callee = + M.getOrInsertFunction(generateRandomName(), Type::getVoidTy(ctx)); + Function *decodeAllStrings = cast(callee.getCallee()); + + decodeAllStrings->setCallingConv(CallingConv::C); + + BasicBlock *decodeBlock = + BasicBlock::Create(ctx, "decodeBlock", decodeAllStrings); + + // Insert function calls to decodeFunction to decrypt each encrypted string + // in the main + IRBuilder<> builder(decodeBlock); + for (auto str : this->globalStrings) { + Value *array = str.var; + + // If this is a struct we need to get a pointer to the array + // at the field index + if (str.isStruct) { + array = builder.CreateStructGEP( + str.var->getType()->getPointerElementType(), str.var, str.index); + } + + // Get a pointer to the first element of the array (start of the string) + auto ptr = builder.CreateConstInBoundsGEP2_32( + array->getType()->getPointerElementType(), array, 0, 0); + + // Get the size of the string + auto size = ConstantInt::get(IntegerType::getInt32Ty(ctx), str.size); + + auto key = ConstantInt::get(IntegerType::getInt8Ty(ctx), str.key); + + // Call the decode function + builder.CreateCall(decodeFunction, {ptr, size, key}); + } + + builder.CreateRetVoid(); + + // Add the function to global constructors + llvm::appendToGlobalCtors(M, decodeAllStrings, 0); +} + +PreservedAnalyses StringObfuscatorPass::run(Module &M, + ModuleAnalysisManager &MAM) { + // Encode all the global strings + if (!encodeAllStrings(M)) { + return PreservedAnalyses::all(); + } + + // Insert a function to decode a string + Function *decodeFunction = addDecodeFunction(M); + + // Insert a function decoding all the strings in global constructors + addDecodeAllStringsFunction(M, decodeFunction); + + return PreservedAnalyses::none(); +} +} // namespace llvm diff --git a/string/StringObfuscation.h b/string/StringObfuscation.h new file mode 100644 index 0000000..678150e --- /dev/null +++ b/string/StringObfuscation.h @@ -0,0 +1,53 @@ +#ifndef _STRING_OBFUSCATION_INCLUDES_ +#define _STRING_OBFUSCATION_INCLUDES_ + +// LLVM include +#include "llvm/ADT/Statistic.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/PassManager.h" +#include "llvm/Pass.h" +#include "llvm/Transforms/IPO.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/Local.h" // For DemoteRegToStack and DemotePHIToStack + +struct GlobalStringVariable { + llvm::GlobalVariable *var; + size_t size; + unsigned int index; + bool isStruct; + uint8_t key; + + GlobalStringVariable(llvm::GlobalVariable *var, size_t size, + unsigned int index, bool isStruct, uint8_t key) { + this->var = var; + this->size = size; + this->index = index; + this->isStruct = isStruct; + this->key = key; + } +}; + +namespace llvm { +struct StringObfuscatorPass : public PassInfoMixin { + std::vector globalStrings; + + StringObfuscatorPass(); + ConstantDataArray *encodeStringDataArray(LLVMContext &ctx, const char *str, + size_t size, uint8_t key); + void encodeStructString(LLVMContext &ctx, GlobalVariable *gv, + ConstantStruct *cs, ConstantDataArray *array, + unsigned int index); + void encodeGlobalString(LLVMContext &ctx, GlobalVariable *gv, + ConstantDataArray *array); + bool encodeAllStrings(Module &M); + std::string generateRandomName(); + Function *addDecodeFunction(Module &M); + void addDecodeAllStringsFunction(Module &M, Function *decodeFunction); + PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM); +}; + +} // namespace llvm + +#endif diff --git a/string/decode.c b/string/decode.c new file mode 100644 index 0000000..a982deb --- /dev/null +++ b/string/decode.c @@ -0,0 +1,7 @@ +#include + +void decodeString(char *str, int length, unsigned char key) { + for (int i = 0; i < length; i++) { + str[i] ^= key; + } +} diff --git a/string/generate_ir_header.sh b/string/generate_ir_header.sh new file mode 100755 index 0000000..392f225 --- /dev/null +++ b/string/generate_ir_header.sh @@ -0,0 +1,13 @@ +#!/bin/bash + +CC=${1} +XXD=${2} +INPUT_FILE=${3} +OUTPUT_DIR=${4} +OUTPUT_FILENAME=${5} + +BC_FILENAME="$(basename ${INPUT_FILE}).bc" + +cd ${OUTPUT_DIR} +${CC} -Os -emit-llvm -c "${INPUT_FILE}" -o "${BC_FILENAME}" +${XXD} -i "${BC_FILENAME}" > "${OUTPUT_FILENAME}" From ade2f040c94b55304f6e7874bf44843b054ab9a5 Mon Sep 17 00:00:00 2001 From: Patrice Blin Date: Mon, 24 Jul 2023 15:38:15 +0200 Subject: [PATCH 2/5] string: fix missing IR/Constants.h (llvm 15+) --- string/StringObfuscation.h | 1 + 1 file changed, 1 insertion(+) diff --git a/string/StringObfuscation.h b/string/StringObfuscation.h index 678150e..46f7254 100644 --- a/string/StringObfuscation.h +++ b/string/StringObfuscation.h @@ -3,6 +3,7 @@ // LLVM include #include "llvm/ADT/Statistic.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Module.h" From fdc10536c75aa79e5a6d636bf6b212133e00160e Mon Sep 17 00:00:00 2001 From: Patrice Blin Date: Mon, 24 Jul 2023 15:54:05 +0200 Subject: [PATCH 3/5] string: Fix deprecation of getElementType() (like 25ad72bf) --- string/StringObfuscation.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/string/StringObfuscation.cpp b/string/StringObfuscation.cpp index ce68c01..e2df4f6 100644 --- a/string/StringObfuscation.cpp +++ b/string/StringObfuscation.cpp @@ -209,7 +209,7 @@ void StringObfuscatorPass::addDecodeAllStringsFunction( // at the field index if (str.isStruct) { array = builder.CreateStructGEP( - str.var->getType()->getPointerElementType(), str.var, str.index); + str.var->getValueType(), str.var, str.index); } // Get a pointer to the first element of the array (start of the string) From 069552f870a2ac6f089c0ddeb28a15c1964d082a Mon Sep 17 00:00:00 2001 From: Patrice Blin Date: Mon, 24 Jul 2023 15:54:18 +0200 Subject: [PATCH 4/5] string: Fix deprecation of getElementType() --- string/StringObfuscation.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/string/StringObfuscation.cpp b/string/StringObfuscation.cpp index e2df4f6..e5fd79b 100644 --- a/string/StringObfuscation.cpp +++ b/string/StringObfuscation.cpp @@ -214,7 +214,7 @@ void StringObfuscatorPass::addDecodeAllStringsFunction( // Get a pointer to the first element of the array (start of the string) auto ptr = builder.CreateConstInBoundsGEP2_32( - array->getType()->getPointerElementType(), array, 0, 0); + array->getType(), array, 0, 0); // Get the size of the string auto size = ConstantInt::get(IntegerType::getInt32Ty(ctx), str.size); From f5376b2521c20399ee3a121ad2d6e1d8b22173c5 Mon Sep 17 00:00:00 2001 From: Georges Gagnerot Date: Fri, 5 Apr 2024 17:10:48 +0200 Subject: [PATCH 5/5] Removing linklibrary by default --- CMakeLists.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 08c2f87..b28e283 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -42,7 +42,8 @@ add_library(LLVMObfuscator SHARED Plugin.cpp) target_include_directories(LLVMObfuscator PRIVATE ${CMAKE_SOURCE_DIR}) target_include_directories(LLVMObfuscator PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) -target_link_libraries(LLVMObfuscator LLVMCore LLVMSupport) +#Add if needed +#target_link_libraries(LLVMObfuscator LLVMCore LLVMSupport) option(BUILD_DUMMY "Build dummy plugin" OFF) if(BUILD_DUMMY)