diff --git a/CMakeLists.txt b/CMakeLists.txt index c8f3979..b28e283 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -42,7 +42,8 @@ add_library(LLVMObfuscator SHARED Plugin.cpp) target_include_directories(LLVMObfuscator PRIVATE ${CMAKE_SOURCE_DIR}) target_include_directories(LLVMObfuscator PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) -target_link_libraries(LLVMObfuscator LLVMCore LLVMSupport) +#Add if needed +#target_link_libraries(LLVMObfuscator LLVMCore LLVMSupport) option(BUILD_DUMMY "Build dummy plugin" OFF) if(BUILD_DUMMY) @@ -55,3 +56,5 @@ add_subdirectory(bogus) add_subdirectory(flattening) add_subdirectory(split) add_subdirectory(substitution) + +add_subdirectory(string) diff --git a/Plugin.cpp b/Plugin.cpp index 458f353..46d2634 100644 --- a/Plugin.cpp +++ b/Plugin.cpp @@ -11,6 +11,8 @@ #include "substitution/Substitution.h" #include "utils/CryptoUtils.h" +#include "string/StringObfuscation.h" + static const char PassesDelimiter = ','; static const std::string EnvVarPrefix = "LLVM_OBF_"; @@ -38,7 +40,13 @@ bool addPassWithName(FunctionPassManager &FPM, StringRef &passName) { } bool addPassWithName(ModulePassManager &MPM, StringRef &passName) { - return false; + if (passName == "string-encryption") { + MPM.addPass(StringObfuscatorPass()); + } else { + return false; + } + + return true; } template diff --git a/README.md b/README.md index 503742b..183020b 100644 --- a/README.md +++ b/README.md @@ -50,6 +50,9 @@ You can chose to insert passes in the optimization pipeline by setting the follo For instance if you want to run the flattening, bogus and substitution passes in that order, you can do: `export LLVM_OBF_SCALAROPTIMIZERLATE_PASSES="flattening, bogus, substitution, split-basic-blocks"` +Or you can run the string encryption pass with: +`export LLVM_OBF_OPTIMIZERLASTEP_PASSES="string"` + Refer to the llvm::PassBuilder documentation for more information on each insertion point. ### With opt diff --git a/string/CMakeLists.txt b/string/CMakeLists.txt new file mode 100644 index 0000000..9025150 --- /dev/null +++ b/string/CMakeLists.txt @@ -0,0 +1,26 @@ +find_program(XXD xxd) + +# If defined, use the target compiler. +# For cross-compilation the LLVM-IR generated is still arch specific. +if (NOT ${TARGET_C_COMPILER}) + set(CLANG ${TARGET_C_COMPILER}) +else() + set(CLANG "${LLVM_TOOLS_BINARY_DIR}/clang") +endif() + +# Compile the string decode function into IR bitcode and embed it as a c array +ADD_CUSTOM_COMMAND( + OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/decode.h + COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/generate_ir_header.sh + ${CLANG} ${XXD} + ${CMAKE_CURRENT_SOURCE_DIR}/decode.c + ${CMAKE_CURRENT_BINARY_DIR} decode.h + DEPENDS decode.c +) + +add_custom_target(generateDecodeIRHeader ALL + DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/decode.h +) + +add_dependencies(LLVMObfuscator generateDecodeIRHeader) +target_sources(LLVMObfuscator PRIVATE StringObfuscation.cpp) diff --git a/string/StringObfuscation.cpp b/string/StringObfuscation.cpp new file mode 100644 index 0000000..e5fd79b --- /dev/null +++ b/string/StringObfuscation.cpp @@ -0,0 +1,249 @@ +#include "StringObfuscation.h" +#include "string/decode.h" +#include "utils/Utils.h" +#include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/LazyValueInfo.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/PassManager.h" +#include "llvm/Passes/PassBuilder.h" +#include "llvm/Passes/PassPlugin.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include +#include + +#include "utils/CryptoUtils.h" + +static const unsigned int RandomNameMinSize = 5; +static const unsigned int RandomMaxNameSize = 15; +static const char ALPHANUM[] = + "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; + +using namespace llvm; + +namespace llvm { +ConstantDataArray *StringObfuscatorPass::encodeStringDataArray(LLVMContext &ctx, + const char *str, + size_t size, + uint8_t key) { + // Check this is a valid string (not containing zeros) + if (str[size - 1] == '\0') { + if (strnlen(str, size) != size - 1) + return nullptr; + } else { + if (strnlen(str, size) != size) + return nullptr; + } + + // Encode the data + char *encodedStr = (char *)malloc(size); + for (unsigned int i = 0; i < size; i++) { + encodedStr[i] = str[i] ^ key; + } + + // Update the value + auto encodedRef = StringRef(encodedStr, size); + + // Return a new ConstantDataArray + return static_cast( + ConstantDataArray::getString(ctx, encodedRef, false)); +} + +void StringObfuscatorPass::encodeGlobalString(LLVMContext &ctx, + GlobalVariable *gv, + ConstantDataArray *array) { + StringRef ref = array->getAsString(); + const char *str = ref.data(); + const unsigned int size = ref.size(); + + uint8_t key = cryptoutils->get_uint8_t(); + auto encodedArray = encodeStringDataArray(ctx, str, size, key); + if (encodedArray != nullptr) { + gv->setInitializer(encodedArray); + gv->setConstant(false); + this->globalStrings.push_back( + GlobalStringVariable(gv, size, 0, false, key)); + } +} + +void StringObfuscatorPass::encodeStructString(LLVMContext &ctx, + GlobalVariable *gv, + ConstantStruct *cs, + ConstantDataArray *array, + unsigned int index) { + StringRef ref = array->getAsString(); + const char *str = ref.data(); + const unsigned int size = ref.size(); + + uint8_t key = llvm::cryptoutils->get_uint8_t(); + auto encodedArray = encodeStringDataArray(ctx, str, size, key); + if (encodedArray != nullptr) { + cs->setOperand(index, encodedArray); + gv->setConstant(false); + this->globalStrings.push_back( + GlobalStringVariable(gv, size, index, true, key)); + } +} + +StringObfuscatorPass::StringObfuscatorPass() {} + +bool StringObfuscatorPass::encodeAllStrings(Module &M) { + auto &ctx = M.getContext(); + + // For each global variable + for (GlobalVariable &gv : M.globals()) { + if (!gv.isConstant() // constant + || !gv.hasInitializer() // unitialized + || gv.hasExternalLinkage() // external + || gv.getSection() == "llvm.metadata") { // Intrinsic Global Variables + //|| gv.getSection().find("__objc_methname") != string::npos) { // TODO : + // is this necessary ? + continue; + } + + // Get the variable value + Constant *initializer = gv.getInitializer(); + + // Encode the value and update the variable + if (isa(initializer)) { // Global variable + auto array = cast(initializer); + if (array->isString()) { + encodeGlobalString(ctx, &gv, array); + } + } else if (isa(initializer)) { // Variable in a struct + auto cs = cast(initializer); + for (unsigned int i = 0; i < initializer->getNumOperands(); i++) { + auto operand = cs->getOperand(i); + if (isa(operand)) { + auto array = cast(operand); + if (array->isString()) { + encodeStructString(ctx, &gv, cs, array, i); + } + } + } + } + } + + return !this->globalStrings.empty(); +} + +std::string StringObfuscatorPass::generateRandomName() { + std::string name = ""; + auto charsetSize = strlen(ALPHANUM) - 1; + + auto size = + MIN(cryptoutils->get_uint8_t() + RandomNameMinSize, RandomMaxNameSize); + for (unsigned int i = 0; i < size; i++) { + auto index = cryptoutils->get_range(charsetSize); + name += ALPHANUM[index]; + } + + return name; +} + +Function *StringObfuscatorPass::addDecodeFunction(Module &M) { + auto &ctx = M.getContext(); + + // Parse the bitcode from the header (creates a new module which contains + // the decode function) + SMDiagnostic err; + auto buf = MemoryBuffer::getMemBuffer( + StringRef(reinterpret_cast(decode_c_bc), decode_c_bc_len), + "", false); + std::unique_ptr decodeModule = + parseIR(buf->getMemBufferRef(), err, ctx); + Function *loadedFunction = decodeModule->getFunction("decodeString"); + + // Declare the decode function in M with the same signature as the loaded + // function + auto functionName = generateRandomName(); + M.getOrInsertFunction(functionName, loadedFunction->getFunctionType()); + Function *declaredFunction = M.getFunction(functionName); + + // Map the declared and loaded functions arguments + ValueToValueMapTy vmap; + auto larg = loadedFunction->arg_begin(); + for (auto darg = declaredFunction->arg_begin(); + darg != declaredFunction->arg_end(); darg++) { + vmap[&*larg] = &*darg; + larg++; + } + + // Copy the loaded function into the empty declared function (in the proper + // module) + SmallVector returns; + ClonedCodeInfo codeInfo; + CloneFunctionInto(declaredFunction, loadedFunction, vmap, +#if LLVM_VERSION_MAJOR < 13 + true, +#else + CloneFunctionChangeType::DifferentModule, +#endif + returns, "", &codeInfo); + + return declaredFunction; +} + +void StringObfuscatorPass::addDecodeAllStringsFunction( + Module &M, Function *decodeFunction) { + auto &ctx = M.getContext(); + + FunctionCallee callee = + M.getOrInsertFunction(generateRandomName(), Type::getVoidTy(ctx)); + Function *decodeAllStrings = cast(callee.getCallee()); + + decodeAllStrings->setCallingConv(CallingConv::C); + + BasicBlock *decodeBlock = + BasicBlock::Create(ctx, "decodeBlock", decodeAllStrings); + + // Insert function calls to decodeFunction to decrypt each encrypted string + // in the main + IRBuilder<> builder(decodeBlock); + for (auto str : this->globalStrings) { + Value *array = str.var; + + // If this is a struct we need to get a pointer to the array + // at the field index + if (str.isStruct) { + array = builder.CreateStructGEP( + str.var->getValueType(), str.var, str.index); + } + + // Get a pointer to the first element of the array (start of the string) + auto ptr = builder.CreateConstInBoundsGEP2_32( + array->getType(), array, 0, 0); + + // Get the size of the string + auto size = ConstantInt::get(IntegerType::getInt32Ty(ctx), str.size); + + auto key = ConstantInt::get(IntegerType::getInt8Ty(ctx), str.key); + + // Call the decode function + builder.CreateCall(decodeFunction, {ptr, size, key}); + } + + builder.CreateRetVoid(); + + // Add the function to global constructors + llvm::appendToGlobalCtors(M, decodeAllStrings, 0); +} + +PreservedAnalyses StringObfuscatorPass::run(Module &M, + ModuleAnalysisManager &MAM) { + // Encode all the global strings + if (!encodeAllStrings(M)) { + return PreservedAnalyses::all(); + } + + // Insert a function to decode a string + Function *decodeFunction = addDecodeFunction(M); + + // Insert a function decoding all the strings in global constructors + addDecodeAllStringsFunction(M, decodeFunction); + + return PreservedAnalyses::none(); +} +} // namespace llvm diff --git a/string/StringObfuscation.h b/string/StringObfuscation.h new file mode 100644 index 0000000..46f7254 --- /dev/null +++ b/string/StringObfuscation.h @@ -0,0 +1,54 @@ +#ifndef _STRING_OBFUSCATION_INCLUDES_ +#define _STRING_OBFUSCATION_INCLUDES_ + +// LLVM include +#include "llvm/ADT/Statistic.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/PassManager.h" +#include "llvm/Pass.h" +#include "llvm/Transforms/IPO.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/Local.h" // For DemoteRegToStack and DemotePHIToStack + +struct GlobalStringVariable { + llvm::GlobalVariable *var; + size_t size; + unsigned int index; + bool isStruct; + uint8_t key; + + GlobalStringVariable(llvm::GlobalVariable *var, size_t size, + unsigned int index, bool isStruct, uint8_t key) { + this->var = var; + this->size = size; + this->index = index; + this->isStruct = isStruct; + this->key = key; + } +}; + +namespace llvm { +struct StringObfuscatorPass : public PassInfoMixin { + std::vector globalStrings; + + StringObfuscatorPass(); + ConstantDataArray *encodeStringDataArray(LLVMContext &ctx, const char *str, + size_t size, uint8_t key); + void encodeStructString(LLVMContext &ctx, GlobalVariable *gv, + ConstantStruct *cs, ConstantDataArray *array, + unsigned int index); + void encodeGlobalString(LLVMContext &ctx, GlobalVariable *gv, + ConstantDataArray *array); + bool encodeAllStrings(Module &M); + std::string generateRandomName(); + Function *addDecodeFunction(Module &M); + void addDecodeAllStringsFunction(Module &M, Function *decodeFunction); + PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM); +}; + +} // namespace llvm + +#endif diff --git a/string/decode.c b/string/decode.c new file mode 100644 index 0000000..a982deb --- /dev/null +++ b/string/decode.c @@ -0,0 +1,7 @@ +#include + +void decodeString(char *str, int length, unsigned char key) { + for (int i = 0; i < length; i++) { + str[i] ^= key; + } +} diff --git a/string/generate_ir_header.sh b/string/generate_ir_header.sh new file mode 100755 index 0000000..392f225 --- /dev/null +++ b/string/generate_ir_header.sh @@ -0,0 +1,13 @@ +#!/bin/bash + +CC=${1} +XXD=${2} +INPUT_FILE=${3} +OUTPUT_DIR=${4} +OUTPUT_FILENAME=${5} + +BC_FILENAME="$(basename ${INPUT_FILE}).bc" + +cd ${OUTPUT_DIR} +${CC} -Os -emit-llvm -c "${INPUT_FILE}" -o "${BC_FILENAME}" +${XXD} -i "${BC_FILENAME}" > "${OUTPUT_FILENAME}"