diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake index e9acc86bae6..e86fae410bd 100644 --- a/cmake/Dependencies.cmake +++ b/cmake/Dependencies.cmake @@ -86,6 +86,11 @@ llvm_map_components_to_libnames(LLVM_LIBRARIES core mcjit nativecodegen native) include_directories(SYSTEM ${LLVM_INCLUDE_DIRS}) list(APPEND Peloton_LINKER_LIBS ${LLVM_LIBRARIES}) +# --[ FFI +find_package(Libffi) +include_directories(SYSTEM ${LIBFFI_INCLUDE_DIRS}) +list(APPEND Peloton_LINKER_LIBS ${LIBFFI_LIBRARIES}) + # --[ IWYU # Generate clang compilation database diff --git a/cmake/Modules/FindLibffi.cmake b/cmake/Modules/FindLibffi.cmake new file mode 100644 index 00000000000..8e9883967bc --- /dev/null +++ b/cmake/Modules/FindLibffi.cmake @@ -0,0 +1,39 @@ +# - Try to find Libffi +# +# A Portable Foreign Function Interface Library (https://sourceware.org/libffi) +# +# Usage: +# LIBFFI_INCLUDE_DIRS, location of header files +# LIBFFI_LIBRARIES, location of library +# LIBFFI_FOUND, indicates if libffi was found + +# Look for the header file. +execute_process(COMMAND brew --prefix libffi OUTPUT_VARIABLE LIBFFI_BREW_PREFIX) + +find_library(LIBFFI_LIBRARY NAMES ffi libffi + PATHS /usr /usr/local /opt/local + PATH_SUFFIXES lib lib64 x86_64-linux-gnu lib/x86_64-linux-gnu + ) + +find_path(LIBFFI_INCLUDE_DIR ffi.h + PATHS /usr /usr/local /opt/local /usr/include/ffi + PATH_SUFFIXES include include/ffi include/x86_64-linux-gnu x86_64-linux-gnu + HINT LIBFFI_BREW_PREFIX + ) + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(LIBFFI DEFAULT_MSG LIBFFI_LIBRARY LIBFFI_INCLUDE_DIR) + + +# Copy the results to the output variables. +IF(LIBFFI_FOUND) + SET(LIBFFI_LIBRARIES ${LIBFFI_LIBRARY}) + SET(LIBFFI_INCLUDE_DIRS ${LIBFFI_INCLUDE_DIR}) +ELSE(LIBFFI_FOUND) + SET(LIBFFI_LIBRARIES) + SET(LIBFFI_INCLUDE_DIRS) +ENDIF(LIBFFI_FOUND) + +MARK_AS_ADVANCED(LIBFFI_INCLUDE_DIRS LIBFFI_LIBRARIES) + +message(STATUS "Found Libffi (include: ${LIBFFI_INCLUDE_DIRS}, library: ${LIBFFI_LIBRARIES})") \ No newline at end of file diff --git a/script/installation/packages.sh b/script/installation/packages.sh index dec631fab98..1dff718b7d6 100755 --- a/script/installation/packages.sh +++ b/script/installation/packages.sh @@ -176,6 +176,8 @@ if [ "$DISTRO" = "UBUNTU" ]; then libedit-dev \ libssl-dev \ postgresql-client \ + libffi6 \ + libffi-dev \ libtbb-dev \ python3-pip \ curl \ @@ -219,6 +221,7 @@ elif [ "$DISTRO" = "DARWIN" ]; then brew install libedit brew install llvm@3.7 brew install postgresql + brew install libffi brew install tbb brew install curl brew install wget diff --git a/src/codegen/code_context.cpp b/src/codegen/code_context.cpp index cffdd6e22f2..14731e4fdc1 100644 --- a/src/codegen/code_context.cpp +++ b/src/codegen/code_context.cpp @@ -44,8 +44,9 @@ namespace { class PelotonMemoryManager : public llvm::SectionMemoryManager { public: explicit PelotonMemoryManager( - const std::unordered_map &symbols) - : symbols_(symbols) {} + const std::unordered_map> &builtins) + : builtins_(builtins) {} #if LLVM_VERSION_GE(4, 0) #define RET_TYPE llvm::JITSymbol @@ -56,8 +57,6 @@ class PelotonMemoryManager : public llvm::SectionMemoryManager { #define BUILD_RET_TYPE(addr) \ (RET_TYPE{(uint64_t)addr, llvm::JITSymbolFlags::Exported}) #endif - - /// Find the address of the function with the given name RET_TYPE findSymbol(const std::string &name) override { LOG_TRACE("Looking up symbol '%s' ...", name.c_str()); if (auto *builtin = LookupSymbol(name)) { @@ -68,23 +67,22 @@ class PelotonMemoryManager : public llvm::SectionMemoryManager { LOG_TRACE("--> Not builtin, use fallback resolution ..."); return llvm::SectionMemoryManager::findSymbol(name); } - #undef RET_TYPE #undef BUILD_RET_TYPE private: void *LookupSymbol(const std::string &name) const { // Check for a builtin with the exact name - auto symbol_iter = symbols_.find(name); - if (symbol_iter != symbols_.end()) { - return symbol_iter->second; + auto symbol_iter = builtins_.find(name); + if (symbol_iter != builtins_.end()) { + return symbol_iter->second.second; } // Check for a builtin with the leading '_' removed if (!name.empty() && name[0] == '_') { - symbol_iter = symbols_.find(name.substr(1)); - if (symbol_iter != symbols_.end()) { - return symbol_iter->second; + symbol_iter = builtins_.find(name.substr(1)); + if (symbol_iter != builtins_.end()) { + return symbol_iter->second.second; } } @@ -94,7 +92,9 @@ class PelotonMemoryManager : public llvm::SectionMemoryManager { private: // The code context - const std::unordered_map &symbols_; + const std::unordered_map> + &builtins_; }; //////////////////////////////////////////////////////////////////////////////// @@ -177,7 +177,8 @@ CodeContext::CodeContext() func_(nullptr), udf_func_ptr_(nullptr), pass_manager_(nullptr), - engine_(nullptr) { + engine_(nullptr), + is_verified_(false) { // Initialize JIT stuff llvm::InitializeNativeTarget(); llvm::InitializeNativeTargetAsmPrinter(); @@ -200,8 +201,7 @@ CodeContext::CodeContext() engine_.reset( llvm::EngineBuilder(std::move(m)) .setEngineKind(llvm::EngineKind::JIT) - .setMCJITMemoryManager( - llvm::make_unique(function_symbols_)) + .setMCJITMemoryManager(llvm::make_unique(builtins_)) .setMCPU(llvm::sys::getHostCPUName()) .setErrorStr(&err_str_) .create()); @@ -223,6 +223,7 @@ CodeContext::CodeContext() int32_type_ = llvm::Type::getInt32Ty(*context_); int64_type_ = llvm::Type::getInt64Ty(*context_); double_type_ = llvm::Type::getDoubleTy(*context_); + float_type_ = llvm::Type::getFloatTy(*context_); void_type_ = llvm::Type::getVoidTy(*context_); void_ptr_type_ = llvm::Type::getInt8PtrTy(*context_); char_ptr_type_ = llvm::Type::getInt8PtrTy(*context_); @@ -251,14 +252,13 @@ void CodeContext::RegisterExternalFunction(llvm::Function *func_decl, PELOTON_ASSERT(func_impl != nullptr && "The function pointer cannot be NULL"); functions_.emplace_back(func_decl, func_impl); - // Register the builtin symbol by name - function_symbols_[func_decl->getName()] = func_impl; + builtins_[func_decl->getName()] = std::make_pair(func_decl, func_impl); } void CodeContext::RegisterBuiltin(llvm::Function *func_decl, CodeContext::FuncPtr func_impl) { const auto name = func_decl->getName(); - if (LookupBuiltin(name) != nullptr) { + if (LookupBuiltin(name).first != nullptr) { LOG_DEBUG("Builtin '%s' already registered, skipping ...", name.data()); return; } @@ -268,36 +268,50 @@ void CodeContext::RegisterBuiltin(llvm::Function *func_decl, func_decl->isDeclaration() && "You cannot provide a function definition for a builtin function"); - // Register the builtin function - builtins_[name] = func_decl; - - // Register the builtin symbol by name - function_symbols_[name] = func_impl; + // Register the builtin function with type and implementation + builtins_[name] = std::make_pair(func_decl, func_impl); } -llvm::Function *CodeContext::LookupBuiltin(const std::string &name) const { +std::pair CodeContext::LookupBuiltin(const std::string &name) const { auto iter = builtins_.find(name); - return (iter == builtins_.end() ? nullptr : iter->second); + return (iter == builtins_.end() ? std::make_pair(nullptr, nullptr) : iter->second); } -/// Optimize and JIT compile all the functions that were created in this context -bool CodeContext::Compile() { +/// Verify all the functions that were created in this context +void CodeContext::Verify() { // Verify the module is okay llvm::raw_ostream &errors = llvm::errs(); if (llvm::verifyModule(*module_, &errors)) { - // There is an error in the module that failed compilation. + // There is an error in the module. // Dump the crappy IR to the log ... LOG_ERROR("ERROR IN MODULE:\n%s\n", GetIR().c_str()); - return false; + + throw Exception("The generated LLVM code contains errors. "); } + // All is well + is_verified_ = true; +} + +/// Optimize all the functions that were created in this context +void CodeContext::Optimize() { + // make sure the code is verified + if (!is_verified_) Verify(); + // Run the optimization passes over each function in this module pass_manager_->doInitialization(); for (auto &func_iter : functions_) { pass_manager_->run(*func_iter.first); } pass_manager_->doFinalization(); +} + +/// JIT compile all the functions that were created in this context +void CodeContext::Compile() { + // make sure the code is verified + if (!is_verified_) Verify(); + // Print some IR stats if (settings::SettingsManager::GetBool(settings::SettingId::print_ir_stats)) { char name[] = "inst count"; InstructionCounts inst_count(*name); @@ -305,7 +319,7 @@ bool CodeContext::Compile() { inst_count.DumpStats(); } - // Functions and module have been optimized, now JIT compile the module + // JIT compile the module engine_->finalizeObject(); // Pull out the compiled function implementations @@ -314,14 +328,34 @@ bool CodeContext::Compile() { } // Log the module + LOG_TRACE("%s\n", GetIR().c_str()); if (settings::SettingsManager::GetBool(settings::SettingId::dump_ir)) { LOG_DEBUG("%s\n", GetIR().c_str()); } +} - // All is well - return true; +size_t CodeContext::GetTypeSize(llvm::Type *type) const { + auto size = GetDataLayout().getTypeSizeInBits(type) / 8; + return size != 0 ? size : 1; +} + +size_t CodeContext::GetTypeSizeInBits(llvm::Type *type) const { + return GetDataLayout().getTypeSizeInBits(type); +} + +size_t CodeContext::GetTypeAllocSize(llvm::Type *type) const { + return GetDataLayout().getTypeAllocSize(type); +} + +size_t CodeContext::GetTypeAllocSizeInBits(llvm::Type *type) const { + return GetDataLayout().getTypeAllocSizeInBits(type); +} + +size_t CodeContext::GetStructElementOffset(llvm::StructType *type, size_t index) const { + return GetDataLayout().getStructLayout(type)->getElementOffset(index); } +// TODO(marcel) same as LookupBuiltin? CodeContext::FuncPtr CodeContext::GetRawFunctionPointer( llvm::Function *fn) const { for (const auto &iter : functions_) { @@ -334,6 +368,7 @@ CodeContext::FuncPtr CodeContext::GetRawFunctionPointer( return nullptr; } +/// Get the module's layout const llvm::DataLayout &CodeContext::GetDataLayout() const { return module_->getDataLayout(); } diff --git a/src/codegen/codegen.cpp b/src/codegen/codegen.cpp index b810fd4c092..9b93049cfe1 100644 --- a/src/codegen/codegen.cpp +++ b/src/codegen/codegen.cpp @@ -150,7 +150,7 @@ llvm::Value *CodeGen::CallFunc(llvm::Value *fn, llvm::Value *CodeGen::Printf(const std::string &format, const std::vector &args) { - auto *printf_fn = LookupBuiltin("printf"); + auto *printf_fn = LookupBuiltin("printf").first; if (printf_fn == nullptr) { #if GCC_AT_LEAST_6 // In newer GCC versions (i.e., GCC 6+), function attributes are part of the @@ -183,7 +183,7 @@ llvm::Value *CodeGen::Printf(const std::string &format, llvm::Value *CodeGen::Memcmp(llvm::Value *ptr1, llvm::Value *ptr2, llvm::Value *len) { static constexpr char kMemcmpFnName[] = "memcmp"; - auto *memcmp_fn = LookupBuiltin(kMemcmpFnName); + auto *memcmp_fn = LookupBuiltin(kMemcmpFnName).first; if (memcmp_fn == nullptr) { #if GCC_AT_LEAST_6 // In newer GCC versions (i.e., GCC 6+), function attributes are part of the @@ -311,7 +311,7 @@ llvm::Function *CodeGen::RegisterBuiltin(const std::string &fn_name, llvm::FunctionType *fn_type, void *func_impl) { // Check if this is already registered as a built in, quit if to - auto *builtin = LookupBuiltin(fn_name); + auto *builtin = LookupBuiltin(fn_name).first; if (builtin != nullptr) { return builtin; } @@ -332,6 +332,10 @@ llvm::Type *CodeGen::LookupType(const std::string &name) const { return GetModule().getTypeByName(name); } +std::pair CodeGen::LookupBuiltin(const std::string &name) const { + return code_context_.LookupBuiltin(name); +}; + llvm::Value *CodeGen::GetState() const { auto *func_builder = code_context_.GetCurrentFunction(); PELOTON_ASSERT(func_builder != nullptr); @@ -346,6 +350,20 @@ uint64_t CodeGen::SizeOf(llvm::Type *type) const { return size != 0 ? size : 1; } +std::string CodeGen::Dump(const llvm::Value *value) { + std::string string; + llvm::raw_string_ostream llvm_stream(string); + llvm_stream << *value; + return llvm_stream.str(); +} + +std::string CodeGen::Dump(llvm::Type *type) { + std::string string; + llvm::raw_string_ostream llvm_stream(string); + llvm_stream << *type; + return llvm_stream.str(); +} + uint64_t CodeGen::ElementOffset(llvm::Type *type, uint32_t element_idx) const { PELOTON_ASSERT(llvm::isa(type)); auto &data_layout = code_context_.GetDataLayout(); diff --git a/src/codegen/compilation_context.cpp b/src/codegen/compilation_context.cpp index d7f65dafcda..82cd32b38d9 100644 --- a/src/codegen/compilation_context.cpp +++ b/src/codegen/compilation_context.cpp @@ -97,17 +97,13 @@ void CompilationContext::GeneratePlan(Query &query, } // Next, we prepare the query statement with the functions we've generated - Query::QueryFunctions funcs = { - .init_func = init, .plan_func = plan, .tear_down_func = tear_down}; - bool prepared = query.Prepare(funcs); - if (!prepared) { - throw Exception{"There was an error preparing the compiled query"}; - } + Query::LLVMFunctions funcs = {init, plan, tear_down}; + query.Prepare(funcs); // We're done if (stats != nullptr) { timer.Stop(); - stats->jit_ms = timer.GetDuration(); + stats->optimize_ms = timer.GetDuration(); } } diff --git a/src/codegen/interpreter/bytecode_builder.cpp b/src/codegen/interpreter/bytecode_builder.cpp new file mode 100644 index 00000000000..57295da2567 --- /dev/null +++ b/src/codegen/interpreter/bytecode_builder.cpp @@ -0,0 +1,1885 @@ +//===----------------------------------------------------------------------===// +// +// Peloton +// +// bytecode_builder.cpp +// +// Identification: src/codegen/interpreter/bytecode_builder.cpp +// +// Copyright (c) 2015-2018, Carnegie Mellon University Database Group +// +//===----------------------------------------------------------------------===// + +#include "codegen/interpreter/bytecode_builder.h" + +#include +#include + +#include "codegen/codegen.h" +#include "common/exception.h" +#include "util/math_util.h" + +namespace peloton { +namespace codegen { +namespace interpreter { + +BytecodeBuilder::BytecodeBuilder(const CodeContext &code_context, + const llvm::Function *function) + : bytecode_function_(function->getName().str()), + number_value_slots_(0), + number_temporary_value_slots_(0), + rpo_traversal_(function), + code_context_(code_context), + llvm_function_(function) {} + +BytecodeFunction BytecodeBuilder::CreateBytecodeFunction( + const CodeContext &code_context, const llvm::Function *function, + bool use_naive_register_allocator) { + BytecodeBuilder builder(code_context, function); + builder.AnalyseFunction(); + + if (use_naive_register_allocator) { + builder.PerformNaiveRegisterAllocation(); + } else { + builder.PerformGreedyRegisterAllocation(); + } + + builder.TranslateFunction(); + builder.Finalize(); + + return std::move(builder.bytecode_function_); +} + +Opcode BytecodeBuilder::GetOpcodeForTypeAllTypes(Opcode untyped_op, + llvm::Type *type) const { + index_t id = BytecodeFunction::GetOpcodeId(untyped_op); + + // This function highly depends on the macros in bytecode_instructions.def! + + if (type == code_context_.bool_type_ || type == code_context_.int8_type_) { + return BytecodeFunction::GetOpcodeFromId(id + 0); + } else if (type == code_context_.int16_type_) { + return BytecodeFunction::GetOpcodeFromId(id + 1); + } else if (type == code_context_.int32_type_) { + return BytecodeFunction::GetOpcodeFromId(id + 2); + } else if (type == code_context_.int64_type_ || + type == code_context_.char_ptr_type_ || type->isPointerTy()) { + return BytecodeFunction::GetOpcodeFromId(id + 3); + } else if (type == code_context_.float_type_) { + return BytecodeFunction::GetOpcodeFromId(id + 4); + } else if (type == code_context_.double_type_) { + return BytecodeFunction::GetOpcodeFromId(id + 5); + } else { + throw NotSupportedException("llvm type not supported: " + + CodeGen::Dump(type)); + } +} + +Opcode BytecodeBuilder::GetOpcodeForTypeIntTypes(Opcode untyped_op, + llvm::Type *type) const { + index_t id = BytecodeFunction::GetOpcodeId(untyped_op); + + // This function highly depends on the macros in bytecode_instructions.def! + + if (type == code_context_.bool_type_ || type == code_context_.int8_type_) { + return BytecodeFunction::GetOpcodeFromId(id + 0); + } else if (type == code_context_.int16_type_) { + return BytecodeFunction::GetOpcodeFromId(id + 1); + } else if (type == code_context_.int32_type_) { + return BytecodeFunction::GetOpcodeFromId(id + 2); + } else if (type == code_context_.int64_type_ || + type == code_context_.char_ptr_type_ || type->isPointerTy()) { + return BytecodeFunction::GetOpcodeFromId(id + 3); + } else { + throw NotSupportedException("llvm type not supported: " + + CodeGen::Dump(type)); + } +} + +Opcode BytecodeBuilder::GetOpcodeForTypeFloatTypes(Opcode untyped_op, + llvm::Type *type) const { + index_t id = BytecodeFunction::GetOpcodeId(untyped_op); + + // This function highly depends on the macros in bytecode_instructions.def! + + // float is missing! + if (type == code_context_.float_type_) { + return BytecodeFunction::GetOpcodeFromId(id + 0); + } else if (type == code_context_.double_type_) { + return BytecodeFunction::GetOpcodeFromId(id + 1); + } else { + throw NotSupportedException("llvm type not supported: " + + CodeGen::Dump(type)); + } +} + +Opcode BytecodeBuilder::GetOpcodeForTypeSizeIntTypes(Opcode untyped_op, + llvm::Type *type) const { + index_t id = BytecodeFunction::GetOpcodeId(untyped_op); + + // This function highly depends on the macros in bytecode_instructions.def! + + switch (code_context_.GetTypeSize(type)) { + case 1: + return BytecodeFunction::GetOpcodeFromId(id + 0); + + case 2: + return BytecodeFunction::GetOpcodeFromId(id + 1); + + case 4: + return BytecodeFunction::GetOpcodeFromId(id + 2); + + case 8: + return BytecodeFunction::GetOpcodeFromId(id + 3); + + default: + throw NotSupportedException("llvm type size not supported: " + + CodeGen::Dump(type)); + } +} + +Instruction &BytecodeBuilder::InsertBytecodeInstruction( + const llvm::Instruction *llvm_instruction, Opcode opcode, + const std::vector &args) { + PELOTON_ASSERT(opcode != Opcode::undefined); + + // calculate number of required instruction slots + // args.size() + 1 because of the Opcode + const size_t number_instruction_slots = MathUtil::DivRoundUp( + sizeof(uint16_t) * (1 + args.size()), sizeof(instr_slot_t)); + + bytecode_function_.bytecode_.insert(bytecode_function_.bytecode_.end(), + number_instruction_slots, 0); + Instruction &instruction = *reinterpret_cast( + &*(bytecode_function_.bytecode_.end() - number_instruction_slots)); + instruction.op = opcode; + for (size_t i = 0; i < args.size(); i++) instruction.args[i] = args[i]; + + AddInstructionToTrace(llvm_instruction, number_instruction_slots); + + return instruction; +} + +Instruction &BytecodeBuilder::InsertBytecodeInstruction( + const llvm::Instruction *llvm_instruction, Opcode opcode, + const std::vector &args) { + PELOTON_ASSERT(opcode != Opcode::undefined); + + std::vector args_transformed(args.size()); + std::transform( + args.begin(), args.end(), args_transformed.begin(), + [this](const llvm::Value *value) { return GetValueSlot(value); }); + + return InsertBytecodeInstruction(llvm_instruction, opcode, args_transformed); +} + +ExternalCallInstruction &BytecodeBuilder::InsertBytecodeExternalCallInstruction( + const llvm::Instruction *llvm_instruction, index_t call_context, + void *function) { + // calculate number of required instructionsslots and assert it is 2 + // (this way we recognise if any unintended size changes) + const size_t number_instruction_slots = MathUtil::DivRoundUp( + sizeof(ExternalCallInstruction), sizeof(instr_slot_t)); + PELOTON_ASSERT(number_instruction_slots == 2); + + bytecode_function_.bytecode_.insert(bytecode_function_.bytecode_.end(), + number_instruction_slots, 0); + + ExternalCallInstruction instruction = { + Opcode::call_external, call_context, + reinterpret_cast(function)}; + + instr_slot_t *instruction_slot = + &*(bytecode_function_.bytecode_.end() - number_instruction_slots); + ExternalCallInstruction *call_instruction_slot = + reinterpret_cast(instruction_slot); + *call_instruction_slot = instruction; + + AddInstructionToTrace(llvm_instruction, number_instruction_slots); + + return reinterpret_cast( + bytecode_function_.bytecode_[bytecode_function_.bytecode_.size() - + number_instruction_slots]); +} + +InternalCallInstruction &BytecodeBuilder::InsertBytecodeInternalCallInstruction( + const llvm::Instruction *llvm_instruction, index_t sub_function, + index_t dest_slot, size_t number_arguments) { + // calculate number of required instruction slots + // number_arguments + 4 because of the number of fixed arguments + // (see structure of InternalCallInstruction) + const size_t number_instruction_slots = MathUtil::DivRoundUp( + sizeof(uint16_t) * (4 + number_arguments), sizeof(instr_slot_t)); + + bytecode_function_.bytecode_.insert(bytecode_function_.bytecode_.end(), + number_instruction_slots, 0); + InternalCallInstruction &instruction = + *reinterpret_cast( + &*(bytecode_function_.bytecode_.end() - number_instruction_slots)); + instruction.op = Opcode::call_internal; + instruction.sub_function = sub_function; + instruction.dest_slot = dest_slot; + instruction.number_args = static_cast(number_arguments); + + PELOTON_ASSERT( + &instruction.args[number_arguments - 1] < + reinterpret_cast(&bytecode_function_.bytecode_.back() + 1)); + + AddInstructionToTrace(llvm_instruction, number_instruction_slots); + + return reinterpret_cast( + *(bytecode_function_.bytecode_.end() - number_instruction_slots)); +} + +#ifndef NDEBUG +void BytecodeBuilder::AddInstructionToTrace( + const llvm::Instruction *llvm_instruction, + size_t number_instruction_slots) { + bytecode_function_.instruction_trace_.insert( + bytecode_function_.instruction_trace_.end(), number_instruction_slots, + llvm_instruction); +} +#endif + +BytecodeBuilder::value_index_t BytecodeBuilder::GetValueIndex( + const llvm::Value *value) { + auto result = value_mapping_.find(value); + + // If the index already exists, just return it + if (result != value_mapping_.end()) { + return result->second; + } + + // Otherwise create a new index + + // Special case for constants + if (auto *llvm_constant = llvm::dyn_cast(value)) { + return GetConstantIndex(llvm_constant); + } + + value_index_t value_index = value_liveness_.size(); + value_mapping_[value] = value_index; + value_liveness_.emplace_back(std::numeric_limits::max(), + std::numeric_limits::max()); + return value_index; +} + +BytecodeBuilder::value_index_t BytecodeBuilder::CreateValueAlias( + const llvm::Value *alias, value_index_t value_index) { + PELOTON_ASSERT(value_mapping_.find(alias) == value_mapping_.end()); + value_mapping_[alias] = value_index; + + return value_index; +} + +value_t BytecodeBuilder::GetConstantValue( + const llvm::Constant *constant) const { + llvm::Type *type = constant->getType(); + + if (constant->isNullValue() || constant->isZeroValue() || llvm::isa(constant)) { + return 0; + } else { + switch (type->getTypeID()) { + case llvm::Type::IntegerTyID: { + int64_t value_signed = + llvm::cast(constant)->getSExtValue(); + return *reinterpret_cast(&value_signed); + } + + case llvm::Type::FloatTyID: { + float value_float = llvm::cast(constant) + ->getValueAPF() + .convertToFloat(); + return *reinterpret_cast(&value_float); + } + + case llvm::Type::DoubleTyID: { + double value_double = llvm::cast(constant) + ->getValueAPF() + .convertToDouble(); + + return *reinterpret_cast(&value_double); + } + + case llvm::Type::PointerTyID: { + if (constant->getNumOperands() > 0) { + if (auto *constant_int = + llvm::dyn_cast(constant->getOperand(0))) { + return reinterpret_cast(constant_int->getZExtValue()); + } + } + + PELOTON_FALLTHROUGH; + } + + default: + throw NotSupportedException("unsupported constant type: " + + CodeGen::Dump(constant->getType())); + } + } +} + +BytecodeBuilder::value_index_t BytecodeBuilder::GetConstantIndex( + const llvm::Constant *constant) { + auto value_mapping_result = value_mapping_.find(constant); + if (value_mapping_result != value_mapping_.end()) { + return value_mapping_result->second; + } + + value_t value = GetConstantValue(constant); + value_index_t value_index; + + // We merge all constants that share the same value (not the type!) + + // Check if entry with this value already exists + auto constant_result = std::find(bytecode_function_.constants_.begin(), + bytecode_function_.constants_.end(), value); + + if (constant_result == bytecode_function_.constants_.end()) { + // create new constant with that value + value_index = value_liveness_.size(); + value_mapping_[constant] = value_index; + value_liveness_.emplace_back(0, 0); // constant liveness starts at 0 + + bytecode_function_.constants_.push_back(value); + constant_value_indexes_.push_back(value_index); + + // constants liveness starts at program start + value_liveness_[value_index].first = 0; + } else { + // value already exists, create alias + auto constant_index = + constant_result - bytecode_function_.constants_.begin(); + value_index = constant_value_indexes_[constant_index]; + CreateValueAlias(constant, value_index); + } + + return value_index; +}; + +index_t BytecodeBuilder::GetValueSlot(const llvm::Value *value) const { + auto result = value_mapping_.find(value); + PELOTON_ASSERT(result != value_mapping_.end()); + + return value_slots_[result->second]; +} + +void BytecodeBuilder::ExtendValueLiveness( + const llvm::Value *llvm_value, instruction_index_t instruction_index) { + value_index_t value_index = GetValueIndex(llvm_value); + + // Special case if no liveness information is available yet + if (value_liveness_[value_index].first == + std::numeric_limits::max()) { + value_liveness_[value_index].first = instruction_index; + value_liveness_[value_index].second = instruction_index; + return; + } + + if (instruction_index < value_liveness_[value_index].first) { + value_liveness_[value_index].first = instruction_index; + } else if (instruction_index > value_liveness_[value_index].second) { + value_liveness_[value_index].second = instruction_index; + } +} + +index_t BytecodeBuilder::GetTemporaryValueSlot(const llvm::BasicBlock *bb) { + // we basically count the number of additional value slots that are + // requested per basic block + + // new entry in map is created automatically if necessary + number_temporary_values_[bb]++; + + number_temporary_value_slots_ = + std::max(number_temporary_value_slots_, + static_cast(number_temporary_values_[bb])); + return number_value_slots_ + number_temporary_values_[bb] - 1; +} + +ffi_type *BytecodeBuilder::GetFFIType(llvm::Type *type) const { + if (type->isVoidTy()) { + return &ffi_type_void; + } else if (type->isPointerTy()) { + return &ffi_type_pointer; + } else if (type == code_context_.double_type_) { + return &ffi_type_double; + } + + // exact type not necessary, only size is important + switch (code_context_.GetTypeSize(type)) { + case 1: + return &ffi_type_uint8; + + case 2: + return &ffi_type_uint16; + + case 4: + return &ffi_type_uint32; + + case 8: + return &ffi_type_uint64; + + default: + throw NotSupportedException( + std::string("can't find a ffi_type for type: ") + + CodeGen::Dump(type)); + } +} + +bool BytecodeBuilder::IsConstantValue(const llvm::Value *value) const { + auto *constant = llvm::dyn_cast(value); + return (constant != nullptr); +} + +int64_t BytecodeBuilder::GetConstantIntegerValueSigned( + llvm::Value *constant) const { + return llvm::cast(constant)->getSExtValue(); +} + +uint64_t BytecodeBuilder::GetConstantIntegerValueUnsigned( + llvm::Value *constant) const { + return llvm::cast(constant)->getZExtValue(); +} + +bool BytecodeBuilder::BasicBlockIsRPOSucc(const llvm::BasicBlock *bb, + const llvm::BasicBlock *succ) const { + // walk the vector where we saved the basic block pointers in R + // reverse post order (RPO) + for (size_t i = 0; i < bb_reverse_post_order_.size() - 1; i++) { + if (bb_reverse_post_order_[i] == bb && + bb_reverse_post_order_[i + 1] == succ) { + return true; + } + } + + return false; +} + +void BytecodeBuilder::AnalyseFunction() { + std::unordered_map> + bb_instruction_index_range; + + /* The analyse pass does: + * - determine the liveness of all values + * - merge values of instructions that translate to nop + * - merge constants and create list of constants + * - extract some additional information, e.g. for overflow aware operations + */ + + // Process function arguments + for (auto &argument : llvm_function_->args()) { + // DEF: function arguments are already defined at function start + ExtendValueLiveness(&argument, 0); + } + + instruction_index_t instruction_index = 0; + for (llvm::ReversePostOrderTraversal::rpo_iterator + traversal_iterator = rpo_traversal_.begin(); + traversal_iterator != rpo_traversal_.end(); ++traversal_iterator) { + const llvm::BasicBlock *bb = *traversal_iterator; + + // Add this basic block to the rpo vector for pred/succ lookups + bb_reverse_post_order_.push_back(bb); + + bb_instruction_index_range[bb].first = instruction_index; + + // Iterate all instructions to collect the liveness information + // There are exceptions for several instructions, + // which are labeled and explained below. + for (llvm::BasicBlock::const_iterator instr_iterator = bb->begin(); + instr_iterator != bb->end(); ++instr_iterator, ++instruction_index) { + const llvm::Instruction *instruction = instr_iterator; + + bool is_non_zero_gep = false; + if (instruction->getOpcode() == llvm::Instruction::GetElementPtr && + !llvm::cast(instruction) + ->hasAllZeroIndices()) { + is_non_zero_gep = true; + } + + // PHI-Handling: + // We do not process the PHI instructions directly, but at the end of a + // basic block, we process all PHI instructions of the successor blocks, + // that refer to the currect basic block. This is the position where we + // will insert the mov instructions when we resolve the PHIs later. + + // Skip PHI instructions + if (instruction->getOpcode() == llvm::Instruction::PHI) { + continue; + } + + // If next instruction is a terminator instruction, process + // PHIs of succeeding basic blocks first + if (llvm::isa(instruction)) { + bool found_back_edge = false; + + // For all successor basic blocks + for (auto succ_iterator = llvm::succ_begin(bb); + succ_iterator != llvm::succ_end(bb); ++succ_iterator) { + // Iterate phi instructions + for (llvm::BasicBlock::const_iterator instr_iterator = + succ_iterator->begin(); + auto *phi_instruction = + llvm::dyn_cast(&*instr_iterator); + ++instr_iterator) { + // extend lifetime of phi value itself + ExtendValueLiveness(phi_instruction, instruction_index); + + // extend lifetime of its operand + llvm::Value *phi_operand = + phi_instruction->getIncomingValueForBlock(bb); + // Similar to Exception 3, we extend the lifetime by one, to ensure + // the other phi operations do not overwrite the operand + ExtendValueLiveness(phi_operand, instruction_index + 1); + } // end iterate phi instructions + + // We also use iterating the basic block successors to find + // back edges. If we have seen a successor basic block before, it + // must be a back edge. + if (!found_back_edge) { + auto instruction_index_range = + bb_instruction_index_range.find(*succ_iterator); + if (instruction_index_range != bb_instruction_index_range.end()) { + index_t back_edge_instruction_index = + instruction_index_range->second.first; + + // For all values that are live at that time... + for (auto &liveness : value_liveness_) { + if (liveness.first < back_edge_instruction_index && + liveness.second >= back_edge_instruction_index) { + // ...extend lifetime of this value to survive back edge + // instruction_index + 1 is the index of the last + // instruction in this basic block + liveness.second = instruction_index + 1; + } + } + + found_back_edge = true; + } + } + } // end iterate successor basic blocks + + instruction_index++; + + // fall through (continue with terminator instruction) + } + + // Exception 1: Skip the ExtractValue instructions we already + // processed in Exception 6 + if (instruction->getOpcode() == llvm::Instruction::ExtractValue) { + auto *extractvalue_instruction = + llvm::cast(instruction); + + // Check if this extract refers to a overflow call instruction + auto result = overflow_results_mapping_.find( + llvm::cast(instruction->getOperand(0))); + if (result != overflow_results_mapping_.end() && + (result->second.first == extractvalue_instruction || + result->second.second == extractvalue_instruction)) { + continue; + } + + // fall through + } + + // USE: Iterate operands of instruction and extend their liveness + for (llvm::Instruction::const_op_iterator op_iterator = + instruction->op_begin(); + op_iterator != instruction->op_end(); ++op_iterator) { + llvm::Value *operand = op_iterator->get(); + + // constant operands + if (IsConstantValue(operand)) { + // Exception 2: the called function in a CallInst is also a constant + // but we want to skip this one + auto *call_instruction = llvm::dyn_cast(instruction); + if (call_instruction != nullptr && + call_instruction->getCalledFunction() == &*operand) { + continue; + } + + // Exception 3: constant operands from GEP and extractvalue are not + // needed, as they get encoded in the instruction itself + if (instruction->getOpcode() == llvm::Instruction::GetElementPtr || + instruction->getOpcode() == llvm::Instruction::ExtractValue) { + continue; + } + + // USE: extend liveness of constant value + ExtendValueLiveness(operand, instruction_index); + + // Exception 4: We extend the lifetime of GEP operands of GEPs + // that don't translate to nop, by one, to make sure that the operands + // don't get overridden when we split the GEP into several + // instructions. + } else if (is_non_zero_gep) { + ExtendValueLiveness(operand, instruction_index + 1); // extended! + + // A BasicBlock may be a label operand, but we don't need to track + // them + } else if (!llvm::isa(operand)) { + ExtendValueLiveness(operand, instruction_index); + } + } + + // Exception 5: For some instructions we know in advance that they will + // produce a nop, so we merge their value and their operand here + if (instruction->getOpcode() == llvm::Instruction::BitCast || + instruction->getOpcode() == llvm::Instruction::Trunc || + instruction->getOpcode() == llvm::Instruction::PtrToInt || + (instruction->getOpcode() == llvm::Instruction::GetElementPtr && + llvm::cast(instruction) + ->hasAllZeroIndices())) { + // merge operand resulting value + CreateValueAlias(instruction, + GetValueIndex(instruction->getOperand(0))); + continue; + } + + // Exception 6: Call instructions to any overflow aware operation + // have to be tracked, because we save their results directly in + // the destination slots of the ExtractValue instructions referring + // to them. + if (instruction->getOpcode() == llvm::Instruction::Call) { + // Check if the call instruction calls a overflow aware operation + // (unfortunately there is no better way to check this) + auto *call_instruction = llvm::cast(instruction); + llvm::Function *function = call_instruction->getCalledFunction(); + if (function->isDeclaration()) { + std::string function_name = function->getName().str(); + + if (function_name.size() >= 13 && + function_name.substr(10, 13) == "with.overflow") { + // create entry for this call + overflow_results_mapping_[call_instruction] = + std::make_pair(nullptr, nullptr); + + // Find the first ExtractValue instruction referring to this call + // instruction for result and overflow each and put it in the + // value_liveness vector here. The liveness of those + // instructions has to be extended to the definition of the call + // instruction, and this way we ensure that the vector is sorted + // by lifetime start index and we avoid sorting it later. + for (auto *user : call_instruction->users()) { + auto *extract_instruction = + llvm::cast(user); + size_t extract_index = *extract_instruction->idx_begin(); + + if (extract_index == 0) { + PELOTON_ASSERT( + overflow_results_mapping_[call_instruction].first == + nullptr); + overflow_results_mapping_[call_instruction].first = + extract_instruction; + + } else if (extract_index == 1) { + PELOTON_ASSERT( + overflow_results_mapping_[call_instruction].second == + nullptr); + overflow_results_mapping_[call_instruction].second = + extract_instruction; + } + + ExtendValueLiveness(extract_instruction, instruction_index); + } + + // Do not process the result of this instruction, + // as this value (the overflow result struct) doesn't exist + // later in the bytecode. + + continue; + } + } + } + + // DEF: save the instruction index as the liveness starting point + if (!instruction->getType()->isVoidTy()) { + ExtendValueLiveness(instruction, instruction_index); + } + } + + bb_instruction_index_range[bb].second = instruction_index - 1; + } +} + +void BytecodeBuilder::PerformNaiveRegisterAllocation() { + // assign a value slot to every liveness range in value_liveness_ + value_slots_.resize(value_liveness_.size(), 0); + index_t reg = 0; + + // process constants + for (auto &constant_value_index : constant_value_indexes_) { + value_slots_[constant_value_index] = reg++ + 1; + } + + // process function arguments + for (auto &argument : llvm_function_->args()) { + value_index_t argument_value_index = GetValueIndex(&argument); + value_slots_[argument_value_index] = reg++ + 1; + } + + // iterate over other entries, which are already sorted + for (value_index_t i = 0; i < value_liveness_.size(); ++i) { + // skip values that are never used (get assigned to dummy slot) + if (value_liveness_[i].first == value_liveness_[i].second) { + continue; + } + + // some values (constants, function arguments) are processed already + if (value_slots_[i] == 0) { + value_slots_[i] = reg++ + 1; // + 1 because 0 is dummy slot + } + } + + number_value_slots_ = reg + 1; +} + +void BytecodeBuilder::PerformGreedyRegisterAllocation() { + // assign a value slot to every liveness range in value_liveness_ + + value_slots_.resize(value_liveness_.size(), 0); + std::vector registers(constant_value_indexes_.size() + + llvm_function_->arg_size()); + index_t reg = 0; + + auto findEmptyRegister = [®isters](ValueLiveness liveness) { + for (index_t i = 0; i < registers.size(); ++i) { + if (registers[i].second <= liveness.first) { + registers[i] = liveness; + return i; + } + } + + // no empty register found, create new one + registers.push_back(liveness); + return static_cast(registers.size() - 1); + }; + + // process constants + for (auto &constant_value_index : constant_value_indexes_) { + registers[reg] = value_liveness_[constant_value_index]; + value_slots_[constant_value_index] = + reg++ + 1; // + 1 because 0 is dummy slot + } + + // process function arguments + for (auto &argument : llvm_function_->args()) { + value_index_t argument_value_index = GetValueIndex(&argument); + registers[reg] = value_liveness_[argument_value_index]; + value_slots_[argument_value_index] = + reg++ + 1; // + 1 because 0 is dummy slot + } + + PELOTON_ASSERT(registers.size() == reg); + +// The vector value_liveness_ is already sorted by lifetime start index +// except for the constant values, which are already processed + +#ifndef NDEBUG + // additional check in debug mode, to ensure that our assertion that the + // vector is already sorted by lifetime start index (except zero) is correct + instruction_index_t instruction_index = 1; + + for (value_index_t i = 0; i < value_liveness_.size(); ++i) { + if (value_liveness_[i].first != 0) { + PELOTON_ASSERT(value_liveness_[i].first >= instruction_index); + instruction_index = value_liveness_[i].first; + } + } +#endif + + // iterate over other entries, which are already sorted + for (value_index_t i = 0; i < value_liveness_.size(); ++i) { + // skip values that are never used + if (value_liveness_[i].first == value_liveness_[i].second) { + continue; + } + + if (value_slots_[i] == 0) { + value_slots_[i] = findEmptyRegister(value_liveness_[i]) + + 1; // + 1 because 0 is dummy slot + } + } + + number_value_slots_ = registers.size() + 1; // + 1 because 0 is dummy slot +} + +void BytecodeBuilder::TranslateFunction() { + // Map every basic block an index in the resulting bytecode stream. This + // is needed to perform the relocations in the branch instructions. + std::unordered_map bb_mapping; + + // Collect all bytecode relocations that have to be performed after + // translation, when the mapping information in bb_mapping is complete. + std::vector bytecode_relocations; + + // Iterate the basic blocks in reverse post order (RPO) + // Linear scan register allocation requires RPO traversal + // Initializing the RPO traversal is expensice, so we initialize it once + // for the BytecodeBuilder object and reuse it. + for (llvm::ReversePostOrderTraversal::rpo_iterator + traversal_iterator = rpo_traversal_.begin(); + traversal_iterator != rpo_traversal_.end(); ++traversal_iterator) { + const llvm::BasicBlock *bb = *traversal_iterator; + + // add basic block mapping + bb_mapping[bb] = bytecode_function_.bytecode_.size(); + + // Interate all instruction in the basic block + for (llvm::BasicBlock::const_iterator instr_iterator = bb->begin(); + instr_iterator != bb->end(); ++instr_iterator) { + const llvm::Instruction *instruction = instr_iterator; + + // Dispatch to the respective translator function + switch (instruction->getOpcode()) { + // Terminators + case llvm::Instruction::Br: + ProcessPHIsForBasicBlock(bb); + TranslateBranch(instruction, bytecode_relocations); + break; + + case llvm::Instruction::Ret: + ProcessPHIsForBasicBlock(bb); + TranslateReturn(instruction); + break; + + // Standard binary operators + // Logical operators + case llvm::Instruction::Add: + case llvm::Instruction::Sub: + case llvm::Instruction::Mul: + case llvm::Instruction::UDiv: + case llvm::Instruction::SDiv: + case llvm::Instruction::URem: + case llvm::Instruction::SRem: + case llvm::Instruction::Shl: + case llvm::Instruction::LShr: + case llvm::Instruction::And: + case llvm::Instruction::Or: + case llvm::Instruction::Xor: + case llvm::Instruction::AShr: + case llvm::Instruction::FAdd: + case llvm::Instruction::FSub: + case llvm::Instruction::FMul: + case llvm::Instruction::FDiv: + case llvm::Instruction::FRem: + TranslateBinaryOperator(instruction); + break; + + // Memory instructions + case llvm::Instruction::Load: + TranslateLoad(instruction); + break; + + case llvm::Instruction::Store: + TranslateStore(instruction); + break; + + case llvm::Instruction::Alloca: + TranslateAlloca(instruction); + break; + + case llvm::Instruction::GetElementPtr: + TranslateGetElementPtr(instruction); + break; + + // Cast instructions + case llvm::Instruction::BitCast: + // bit casts translate to nop + // values got already merged in analysis pass + break; + + case llvm::Instruction::SExt: + case llvm::Instruction::ZExt: + case llvm::Instruction::IntToPtr: + TranslateIntExt(instruction); + break; + + case llvm::Instruction::Trunc: + case llvm::Instruction::PtrToInt: + // trunc translates to nop + // values got already merged in analysis pass + break; + case llvm::Instruction::FPExt: + case llvm::Instruction::FPTrunc: + TranslateFloatTruncExt(instruction); + + case llvm::Instruction::UIToFP: + case llvm::Instruction::SIToFP: + case llvm::Instruction::FPToUI: + case llvm::Instruction::FPToSI: + TranslateFloatIntCast(instruction); + break; + + // Other instructions + case llvm::Instruction::ICmp: + case llvm::Instruction::FCmp: + TranslateCmp(instruction); + break; + + case llvm::Instruction::PHI: + // PHIs are handled before every terminating instruction + break; + + case llvm::Instruction::Call: + TranslateCall(instruction); + break; + + case llvm::Instruction::Select: + TranslateSelect(instruction); + break; + + case llvm::Instruction::ExtractValue: + TranslateExtractValue(instruction); + break; + + case llvm::Instruction::Unreachable: + // nop + break; + + // Instruction is not supported + default: { throw NotSupportedException("instruction not supported"); } + } + } + } + + // apply the relocations required by the placed branch instructions + for (auto &relocation : bytecode_relocations) { + reinterpret_cast( + &bytecode_function_.bytecode_[relocation.instruction_slot]) + ->args[relocation.argument] = bb_mapping[relocation.bb]; + } +} + +void BytecodeBuilder::Finalize() { + // calculate final number of value slots during runtime + bytecode_function_.number_values_ = + number_value_slots_ + number_temporary_value_slots_; + + // check if number values exceeds bit range (unrealistic) + if (bytecode_function_.number_values_ >= + std::numeric_limits::max()) { + throw NotSupportedException("number of values exceeds max number of bits"); + } + + // prepare arguments + bytecode_function_.number_function_arguments_ = llvm_function_->arg_size(); +} + +void BytecodeBuilder::ProcessPHIsForBasicBlock(const llvm::BasicBlock *bb) { + struct AdditionalMove { + const llvm::Instruction *instruction; + index_t dest; + index_t src; + }; + + // Takes track of additional moves (du to PHI swap problem) that have to be + // applied after all PHI nodes have been processed. + std::vector additional_moves; + + for (auto succ_iterator = llvm::succ_begin(bb); + succ_iterator != llvm::succ_end(bb); ++succ_iterator) { + // If the basic block is its own successor, we take risk to run into the PHI + // swap problem (lost copy problem). To avoid this, we move the values in + // temporary registers and move them to their destination after processing + // all other PHI nodes. + if (*succ_iterator == bb) { + for (auto instruction_iterator = succ_iterator->begin(); + auto *phi_node = + llvm::dyn_cast(&*instruction_iterator); + ++instruction_iterator) { + index_t temp_slot = GetTemporaryValueSlot(bb); + + InsertBytecodeInstruction( + phi_node, Opcode::phi_mov, + {temp_slot, GetValueSlot(phi_node->getIncomingValueForBlock(bb))}); + additional_moves.push_back( + {phi_node, GetValueSlot(phi_node), temp_slot}); + } + + // Common case: create mov instruction to destination slot + } else { + for (auto instruction_iterator = succ_iterator->begin(); + auto *phi_node = + llvm::dyn_cast(&*instruction_iterator); + ++instruction_iterator) { + if (GetValueSlot(phi_node) == + GetValueSlot(phi_node->getIncomingValueForBlock(bb))) { + continue; + } + + InsertBytecodeInstruction( + phi_node, Opcode::phi_mov, + {phi_node, phi_node->getIncomingValueForBlock(bb)}); + } + } + } + + // Place additional moves if needed + for (auto &entry : additional_moves) { + InsertBytecodeInstruction(entry.instruction, Opcode::phi_mov, + {entry.dest, entry.src}); + } +} + +void BytecodeBuilder::TranslateBranch( + const llvm::Instruction *instruction, + std::vector &bytecode_relocations) { + auto *branch_instruction = llvm::cast(&*instruction); + + // conditional branch + if (branch_instruction->isConditional()) { + // The first operand in the IR is the false branch, while the second one + // is the true one (printed llvm assembly is the other way round). + // To be consistent, we use the order of the memory representation + // in our bytecode. + + // If false branch is next basic block, we can use a fall through branch + if (BasicBlockIsRPOSucc( + branch_instruction->getParent(), + llvm::cast(branch_instruction->getOperand(1)))) { + InsertBytecodeInstruction( + instruction, Opcode::branch_cond_ft, + std::vector{GetValueSlot(branch_instruction->getOperand(0)), + 0}); + + BytecodeRelocation relocation_false{ + static_cast(bytecode_function_.bytecode_.size() - 1), 1, + llvm::cast(branch_instruction->getOperand(2))}; + + // add relocation entry, to insert missing information of destination + // later + bytecode_relocations.push_back(relocation_false); + + // no fall through + } else { + InsertBytecodeInstruction( + instruction, Opcode::branch_cond, + {GetValueSlot(branch_instruction->getOperand(0)), 0, 0}); + + BytecodeRelocation relocation_false{ + static_cast(bytecode_function_.bytecode_.size() - 1), 1, + llvm::cast(branch_instruction->getOperand(1))}; + + // add relocation entry, to insert missing information of destination + // later + bytecode_relocations.push_back(relocation_false); + + BytecodeRelocation relocation_true{ + static_cast(bytecode_function_.bytecode_.size() - 1), 2, + llvm::cast(branch_instruction->getOperand(2))}; + + // add relocation entry, to insert missing information of destination + // later + bytecode_relocations.push_back(relocation_true); + } + + // unconditional branch + } else { + // If the unconditional branch points to the next basic block, + // we can omit the branch instruction + if (!BasicBlockIsRPOSucc( + branch_instruction->getParent(), + llvm::cast(branch_instruction->getOperand(0)))) { + InsertBytecodeInstruction(instruction, Opcode::branch_uncond, + std::vector{0}); + + BytecodeRelocation relocation{ + static_cast(bytecode_function_.bytecode_.size() - 1), 0, + llvm::cast(branch_instruction->getOperand(0))}; + + // add relocation entry, to insert missing information of destination + // later + bytecode_relocations.push_back(relocation); + } + } +} + +void BytecodeBuilder::TranslateReturn(const llvm::Instruction *instruction) { + auto *return_instruction = llvm::cast(&*instruction); + + // We only have one ret bytecode instruction. If the function returns void, + // the instruction will return the value of the dummy value slot zero, + // but no one will every pick up that value. + + index_t return_slot = 0; + if (return_instruction->getNumOperands() > 0) { + return_slot = GetValueSlot(return_instruction->getOperand(0)); + } + + InsertBytecodeInstruction(instruction, Opcode::ret, + std::vector{return_slot}); +} + +void BytecodeBuilder::TranslateBinaryOperator( + const llvm::Instruction *instruction) { + auto *binary_operator = llvm::cast(&*instruction); + auto *type = binary_operator->getType(); + Opcode opcode; + + switch (binary_operator->getOpcode()) { + case llvm::Instruction::Add: + case llvm::Instruction::FAdd: + opcode = GetOpcodeForTypeAllTypes(GET_FIRST_ALL_TYPES(Opcode::add), type); + break; + + case llvm::Instruction::Sub: + case llvm::Instruction::FSub: + opcode = GetOpcodeForTypeAllTypes(GET_FIRST_ALL_TYPES(Opcode::sub), type); + break; + + case llvm::Instruction::Mul: + case llvm::Instruction::FMul: + opcode = GetOpcodeForTypeAllTypes(GET_FIRST_ALL_TYPES(Opcode::mul), type); + break; + + case llvm::Instruction::UDiv: + case llvm::Instruction::FDiv: + opcode = GetOpcodeForTypeAllTypes(GET_FIRST_ALL_TYPES(Opcode::div), type); + break; + + case llvm::Instruction::SDiv: + opcode = + GetOpcodeForTypeIntTypes(GET_FIRST_INT_TYPES(Opcode::sdiv), type); + break; + + case llvm::Instruction::URem: + opcode = + GetOpcodeForTypeIntTypes(GET_FIRST_INT_TYPES(Opcode::urem), type); + break; + + case llvm::Instruction::FRem: + opcode = + GetOpcodeForTypeFloatTypes(GET_FIRST_FLOAT_TYPES(Opcode::frem), type); + break; + + case llvm::Instruction::SRem: + opcode = + GetOpcodeForTypeIntTypes(GET_FIRST_INT_TYPES(Opcode::srem), type); + break; + + case llvm::Instruction::Shl: + opcode = GetOpcodeForTypeIntTypes(GET_FIRST_INT_TYPES(Opcode::shl), type); + break; + + case llvm::Instruction::LShr: + opcode = + GetOpcodeForTypeIntTypes(GET_FIRST_INT_TYPES(Opcode::lshr), type); + break; + + case llvm::Instruction::AShr: + opcode = + GetOpcodeForTypeIntTypes(GET_FIRST_INT_TYPES(Opcode::ashr), type); + break; + + case llvm::Instruction::And: + opcode = GetOpcodeForTypeIntTypes(GET_FIRST_INT_TYPES(Opcode::and), type); + break; + + case llvm::Instruction::Or: + opcode = GetOpcodeForTypeIntTypes(GET_FIRST_INT_TYPES(Opcode:: or), type); + break; + + case llvm::Instruction::Xor: + opcode = + GetOpcodeForTypeIntTypes(GET_FIRST_INT_TYPES(Opcode:: xor), type); + break; + + default: + throw NotSupportedException("binary operation not supported"); + } + + InsertBytecodeInstruction(instruction, opcode, + {binary_operator, binary_operator->getOperand(0), + binary_operator->getOperand(1)}); +} + +void BytecodeBuilder::TranslateAlloca(const llvm::Instruction *instruction) { + auto *alloca_instruction = llvm::cast(&*instruction); + Opcode opcode; + + // get type to allocate + llvm::Type *type = alloca_instruction->getAllocatedType(); + + // get type size in bytes + size_t type_size = code_context_.GetTypeSize(type); + + if (alloca_instruction->isArrayAllocation()) { + index_t array_size = GetValueSlot(alloca_instruction->getArraySize()); + opcode = + GetOpcodeForTypeIntTypes(GET_FIRST_INT_TYPES(Opcode::alloca_array), + alloca_instruction->getArraySize()->getType()); + + // type size is immediate value! + InsertBytecodeInstruction(instruction, opcode, + {GetValueSlot(alloca_instruction), + static_cast(type_size), array_size}); + } else { + opcode = Opcode::alloca; + // type size is immediate value! + InsertBytecodeInstruction( + instruction, opcode, + {GetValueSlot(alloca_instruction), static_cast(type_size)}); + } +} + +void BytecodeBuilder::TranslateLoad(const llvm::Instruction *instruction) { + auto *load_instruction = llvm::cast(&*instruction); + + Opcode opcode = GetOpcodeForTypeSizeIntTypes( + GET_FIRST_INT_TYPES(Opcode::load), load_instruction->getType()); + InsertBytecodeInstruction( + instruction, opcode, + {load_instruction, load_instruction->getPointerOperand()}); +} + +void BytecodeBuilder::TranslateStore(const llvm::Instruction *instruction) { + auto *store_instruction = llvm::cast(&*instruction); + + Opcode opcode = + GetOpcodeForTypeSizeIntTypes(GET_FIRST_INT_TYPES(Opcode::store), + store_instruction->getOperand(0)->getType()); + InsertBytecodeInstruction( + instruction, opcode, + std::vector{store_instruction->getPointerOperand(), + store_instruction->getValueOperand()}); +} + +void BytecodeBuilder::TranslateGetElementPtr( + const llvm::Instruction *instruction) { + auto *gep_instruction = llvm::cast(&*instruction); + int64_t overall_offset = 0; + + // If the GEP translates to a nop, the values have been already merged + // during the analysis pass + if (gep_instruction->hasAllZeroIndices()) { + return; + } + + // The offset is an immediate constant, not a slot index + // instruction is created here, but offset will be filled in later, + // because we may merge it with constant array accesses + auto &gep_offset_bytecode_instruction_ref = InsertBytecodeInstruction( + gep_instruction, Opcode::gep_offset, + {GetValueSlot(gep_instruction), + GetValueSlot(gep_instruction->getPointerOperand()), 0}); + size_t gep_offset_bytecode_instruction_index = + bytecode_function_.GetIndexFromIP(&gep_offset_bytecode_instruction_ref); + + // First index operand of the instruction is the array index for the + // source type + + // Get type of struct/array which will be processed + llvm::Type *type = gep_instruction->getSourceElementType(); + + if (IsConstantValue(gep_instruction->getOperand(1))) { + overall_offset += + code_context_.GetTypeSize(type) * + GetConstantIntegerValueSigned(gep_instruction->getOperand(1)); + } else { + index_t index = GetValueSlot(instruction->getOperand(1)); + Opcode opcode = + GetOpcodeForTypeIntTypes(GET_FIRST_INT_TYPES(Opcode::gep_array), + instruction->getOperand(1)->getType()); + + // size of array element is an immediate constant, not a slot index! + InsertBytecodeInstruction( + gep_instruction, opcode, + {GetValueSlot(gep_instruction), index, + static_cast(code_context_.GetTypeSize(type))}); + } + + // Iterate remaining Indexes + for (unsigned int operand_index = 2; + operand_index < instruction->getNumOperands(); ++operand_index) { + auto *operand = instruction->getOperand(operand_index); + + if (auto *array_type = llvm::dyn_cast(type)) { + if (IsConstantValue(operand)) { + overall_offset += + code_context_.GetTypeSize(array_type->getElementType()) * + GetConstantIntegerValueSigned(operand); + } else { + index_t index = GetValueSlot(operand); + Opcode opcode = GetOpcodeForTypeIntTypes( + GET_FIRST_INT_TYPES(Opcode::gep_array), operand->getType()); + + // size of array element is an immediate constant, not a slot index! + InsertBytecodeInstruction( + gep_instruction, opcode, + {GetValueSlot(gep_instruction), index, + static_cast( + code_context_.GetTypeSize(array_type->getElementType()))}); + } + + // get inner type for next iteration + type = array_type->getElementType(); + + } else if (auto *struct_type = llvm::dyn_cast(type)) { + uint64_t index = GetConstantIntegerValueUnsigned(operand); + PELOTON_ASSERT(index < struct_type->getNumElements()); + + // get element offset + overall_offset += code_context_.GetStructElementOffset(struct_type, index); + + // get inner type for next iteration + type = struct_type->getElementType(index); + + } else { + throw NotSupportedException( + "unexpected type in getelementptr instruction"); + } + } + + // make sure that resulting type is correct + PELOTON_ASSERT(type == gep_instruction->getResultElementType()); + + // fill in calculated overall offset in previously placed gep_offset + // bytecode instruction + // (use index instead of reference, as vector may has been relocated!) + reinterpret_cast( + &bytecode_function_.bytecode_[gep_offset_bytecode_instruction_index]) + ->args[2] = static_cast(overall_offset); +} + +void BytecodeBuilder::TranslateFloatIntCast( + const llvm::Instruction *instruction) { + auto *cast_instruction = llvm::dyn_cast(&*instruction); + + // These instruction basically exist from every integer type to every + // floating point type and the other way round. + // We can only expand instructions in one dimension, so we expand the + // integer dimension and create the floating point instances manually + // (float and double) + + Opcode opcode = Opcode::undefined; + + if (instruction->getOpcode() == llvm::Instruction::FPToSI) { + if (cast_instruction->getOperand(0)->getType() == + code_context_.float_type_) { + opcode = GetOpcodeForTypeIntTypes(GET_FIRST_INT_TYPES(Opcode::floattosi), + cast_instruction->getType()); + } else if (cast_instruction->getOperand(0)->getType() == + code_context_.double_type_) { + opcode = GetOpcodeForTypeIntTypes(GET_FIRST_INT_TYPES(Opcode::doubletosi), + cast_instruction->getType()); + } else { + throw NotSupportedException("unsupported cast instruction"); + } + + } else if (instruction->getOpcode() == llvm::Instruction::FPToUI) { + if (cast_instruction->getOperand(0)->getType() == + code_context_.float_type_) { + opcode = GetOpcodeForTypeIntTypes(GET_FIRST_INT_TYPES(Opcode::floattoui), + cast_instruction->getType()); + } else if (cast_instruction->getOperand(0)->getType() == + code_context_.double_type_) { + opcode = GetOpcodeForTypeIntTypes(GET_FIRST_INT_TYPES(Opcode::doubletoui), + cast_instruction->getType()); + } else { + throw NotSupportedException("unsupported cast instruction"); + } + + } else if (instruction->getOpcode() == llvm::Instruction::SIToFP) { + if (cast_instruction->getType() == code_context_.float_type_) { + opcode = + GetOpcodeForTypeIntTypes(GET_FIRST_INT_TYPES(Opcode::sitofloat), + cast_instruction->getOperand(0)->getType()); + } else if (cast_instruction->getType() == code_context_.double_type_) { + opcode = + GetOpcodeForTypeIntTypes(GET_FIRST_INT_TYPES(Opcode::sitodouble), + cast_instruction->getOperand(0)->getType()); + } else { + throw NotSupportedException("unsupported cast instruction"); + } + + } else if (instruction->getOpcode() == llvm::Instruction::UIToFP) { + if (cast_instruction->getType() == code_context_.float_type_) { + opcode = + GetOpcodeForTypeIntTypes(GET_FIRST_INT_TYPES(Opcode::uitofloat), + cast_instruction->getOperand(0)->getType()); + } else if (cast_instruction->getType() == code_context_.double_type_) { + opcode = + GetOpcodeForTypeIntTypes(GET_FIRST_INT_TYPES(Opcode::uitodouble), + cast_instruction->getOperand(0)->getType()); + } else { + throw NotSupportedException("unsupported cast instruction"); + } + + } else { + throw NotSupportedException("unsupported cast instruction"); + } + + InsertBytecodeInstruction( + cast_instruction, opcode, + {cast_instruction, cast_instruction->getOperand(0)}); +} + +void BytecodeBuilder::TranslateIntExt(const llvm::Instruction *instruction) { + auto *cast_instruction = llvm::dyn_cast(&*instruction); + + size_t src_type_size = + code_context_.GetTypeSize(cast_instruction->getSrcTy()); + size_t dest_type_size = + code_context_.GetTypeSize(cast_instruction->getDestTy()); + + if (src_type_size == dest_type_size) { + if (GetValueSlot(instruction) != GetValueSlot(instruction->getOperand(0))) + InsertBytecodeInstruction(instruction, Opcode::nop_mov, + {instruction, instruction->getOperand(0)}); + return; + } + + Opcode opcode = Opcode::undefined; + + if (instruction->getOpcode() == llvm::Instruction::SExt) { + if (src_type_size == 1 && dest_type_size == 2) { + opcode = Opcode::sext_i8_i16; + + } else if (src_type_size == 1 && dest_type_size == 4) { + opcode = Opcode::sext_i8_i32; + + } else if (src_type_size == 1 && dest_type_size == 8) { + opcode = Opcode::sext_i8_i64; + + } else if (src_type_size == 2 && dest_type_size == 4) { + opcode = Opcode::sext_i16_i32; + + } else if (src_type_size == 2 && dest_type_size == 8) { + opcode = Opcode::sext_i16_i64; + + } else if (src_type_size == 4 && dest_type_size == 8) { + opcode = Opcode::sext_i32_i64; + + } else { + throw NotSupportedException("unsupported sext instruction"); + } + + } else if (instruction->getOpcode() == llvm::Instruction::ZExt || + instruction->getOpcode() == llvm::Instruction::IntToPtr) { + if (src_type_size == 1 && dest_type_size == 2) { + opcode = Opcode::zext_i8_i16; + + } else if (src_type_size == 1 && dest_type_size == 4) { + opcode = Opcode::zext_i8_i32; + + } else if (src_type_size == 1 && dest_type_size == 8) { + opcode = Opcode::zext_i8_i64; + + } else if (src_type_size == 2 && dest_type_size == 4) { + opcode = Opcode::zext_i16_i32; + + } else if (src_type_size == 2 && dest_type_size == 8) { + opcode = Opcode::zext_i16_i64; + + } else if (src_type_size == 4 && dest_type_size == 8) { + opcode = Opcode::zext_i32_i64; + + } else { + throw NotSupportedException("unsupported zext instruction"); + } + + } else { + throw NotSupportedException("unexpected ext instruction"); + } + + InsertBytecodeInstruction( + cast_instruction, opcode, + {cast_instruction, cast_instruction->getOperand(0)}); +} + +void BytecodeBuilder::TranslateFloatTruncExt( + const llvm::Instruction *instruction) { + auto *cast_instruction = llvm::dyn_cast(&*instruction); + + auto src_type = cast_instruction->getSrcTy(); + auto dest_type = cast_instruction->getDestTy(); + + if (src_type == dest_type) { + if (GetValueSlot(instruction) != GetValueSlot(instruction->getOperand(0))) { + InsertBytecodeInstruction(instruction, Opcode::nop_mov, + {instruction, instruction->getOperand(0)}); + } + return; + } + + if (src_type == code_context_.double_type_ && + dest_type == code_context_.float_type_) { + InsertBytecodeInstruction( + cast_instruction, Opcode::doubletofloat, + {cast_instruction, cast_instruction->getOperand(0)}); + } else if (src_type == code_context_.float_type_ && + dest_type == code_context_.double_type_) { + InsertBytecodeInstruction( + cast_instruction, Opcode::floattodouble, + {cast_instruction, cast_instruction->getOperand(0)}); + } else { + throw NotSupportedException("unsupported FPTrunc/PFExt instruction"); + } +} + +void BytecodeBuilder::TranslateCmp(const llvm::Instruction *instruction) { + auto *cmp_instruction = llvm::cast(&*instruction); + auto *type = cmp_instruction->getOperand(0)->getType(); + Opcode opcode = Opcode::undefined; + + switch (cmp_instruction->getPredicate()) { + case llvm::CmpInst::Predicate::ICMP_EQ: + case llvm::CmpInst::Predicate::FCMP_OEQ: + case llvm::CmpInst::Predicate::FCMP_UEQ: + opcode = + GetOpcodeForTypeAllTypes(GET_FIRST_ALL_TYPES(Opcode::cmp_eq), type); + break; + + case llvm::CmpInst::Predicate::ICMP_NE: + case llvm::CmpInst::Predicate::FCMP_ONE: + case llvm::CmpInst::Predicate::FCMP_UNE: + opcode = + GetOpcodeForTypeAllTypes(GET_FIRST_ALL_TYPES(Opcode::cmp_ne), type); + break; + + case llvm::CmpInst::Predicate::ICMP_UGT: + case llvm::CmpInst::Predicate::FCMP_OGT: + case llvm::CmpInst::Predicate::FCMP_UGT: + opcode = + GetOpcodeForTypeAllTypes(GET_FIRST_ALL_TYPES(Opcode::cmp_gt), type); + break; + + case llvm::CmpInst::Predicate::ICMP_UGE: + case llvm::CmpInst::Predicate::FCMP_OGE: + case llvm::CmpInst::Predicate::FCMP_UGE: + opcode = + GetOpcodeForTypeAllTypes(GET_FIRST_ALL_TYPES(Opcode::cmp_ge), type); + break; + + case llvm::CmpInst::Predicate::ICMP_ULT: + case llvm::CmpInst::Predicate::FCMP_OLT: + case llvm::CmpInst::Predicate::FCMP_ULT: + opcode = + GetOpcodeForTypeAllTypes(GET_FIRST_ALL_TYPES(Opcode::cmp_lt), type); + break; + + case llvm::CmpInst::Predicate::ICMP_ULE: + case llvm::CmpInst::Predicate::FCMP_OLE: + case llvm::CmpInst::Predicate::FCMP_ULE: + opcode = + GetOpcodeForTypeAllTypes(GET_FIRST_ALL_TYPES(Opcode::cmp_le), type); + break; + + case llvm::CmpInst::Predicate::ICMP_SGT: + opcode = + GetOpcodeForTypeIntTypes(GET_FIRST_INT_TYPES(Opcode::cmp_sgt), type); + break; + + case llvm::CmpInst::Predicate::ICMP_SGE: + opcode = + GetOpcodeForTypeIntTypes(GET_FIRST_INT_TYPES(Opcode::cmp_sge), type); + break; + + case llvm::CmpInst::Predicate::ICMP_SLT: + opcode = + GetOpcodeForTypeIntTypes(GET_FIRST_INT_TYPES(Opcode::cmp_slt), type); + break; + + case llvm::CmpInst::Predicate::ICMP_SLE: + opcode = + GetOpcodeForTypeIntTypes(GET_FIRST_INT_TYPES(Opcode::cmp_sle), type); + break; + + default: + throw NotSupportedException("compare operand not supported"); + } + + InsertBytecodeInstruction(cmp_instruction, opcode, + {cmp_instruction, cmp_instruction->getOperand(0), + cmp_instruction->getOperand(1)}); +} + +void BytecodeBuilder::TranslateCall(const llvm::Instruction *instruction) { + auto *call_instruction = llvm::cast(&*instruction); + + llvm::Function *function = call_instruction->getCalledFunction(); + + if (function->isDeclaration()) { + // The only way to find out about the called function (even if its an + // intrinsic) is to check the function name string + std::string function_name = function->getName().str(); + + if (function_name.find("llvm.memcpy") == 0) { + if (call_instruction->getOperand(2)->getType() != + code_context_.int64_type_) { + throw NotSupportedException( + "memcpy with different size type than i64 not supported"); + } + + InsertBytecodeInstruction( + call_instruction, Opcode::llvm_memcpy, + {call_instruction->getOperand(0), call_instruction->getOperand(1), + call_instruction->getOperand(2)}); + + } else if (function_name.find("llvm.memmove") == 0) { + if (call_instruction->getOperand(2)->getType() != + code_context_.int64_type_) + throw NotSupportedException( + "memmove with different size type than i64 not supported"); + + InsertBytecodeInstruction( + call_instruction, Opcode::llvm_memmove, + {call_instruction->getOperand(0), call_instruction->getOperand(1), + call_instruction->getOperand(2)}); + + } else if (function_name.find("llvm.memset") == 0) { + if (call_instruction->getOperand(2)->getType() != + code_context_.int64_type_) + throw NotSupportedException( + "memset with different size type than i64 not supported"); + + InsertBytecodeInstruction( + call_instruction, Opcode::llvm_memset, + {call_instruction->getOperand(0), call_instruction->getOperand(1), + call_instruction->getOperand(2)}); + + } else if (function_name.find("with.overflow") == 10) { + index_t result = 0; + index_t overflow = 0; + auto *type = call_instruction->getOperand(0)->getType(); + Opcode opcode = Opcode::undefined; + + // The destination slots have been already prepared from the analysis pass + PELOTON_ASSERT(overflow_results_mapping_.find(call_instruction) != + overflow_results_mapping_.end()); + + if (overflow_results_mapping_[call_instruction].first != nullptr) { + result = + GetValueSlot(overflow_results_mapping_[call_instruction].first); + } + + if (overflow_results_mapping_[call_instruction].second != nullptr) { + overflow = + GetValueSlot(overflow_results_mapping_[call_instruction].second); + } + + if (function_name.substr(5, 4) == "uadd") { + opcode = GetOpcodeForTypeIntTypes( + GET_FIRST_INT_TYPES(Opcode::llvm_uadd_overflow), type); + } else if (function_name.substr(5, 4) == "sadd") { + opcode = GetOpcodeForTypeIntTypes( + GET_FIRST_INT_TYPES(Opcode::llvm_sadd_overflow), type); + } else if (function_name.substr(5, 4) == "usub") { + opcode = GetOpcodeForTypeIntTypes( + GET_FIRST_INT_TYPES(Opcode::llvm_usub_overflow), type); + } else if (function_name.substr(5, 4) == "ssub") { + opcode = GetOpcodeForTypeIntTypes( + GET_FIRST_INT_TYPES(Opcode::llvm_ssub_overflow), type); + } else if (function_name.substr(5, 4) == "umul") { + opcode = GetOpcodeForTypeIntTypes( + GET_FIRST_INT_TYPES(Opcode::llvm_umul_overflow), type); + } else if (function_name.substr(5, 4) == "smul") { + opcode = GetOpcodeForTypeIntTypes( + GET_FIRST_INT_TYPES(Opcode::llvm_smul_overflow), type); + } else { + throw NotSupportedException( + "the requested operation with overflow is not supported"); + } + + InsertBytecodeInstruction( + call_instruction, opcode, + {result, overflow, GetValueSlot(call_instruction->getOperand(0)), + GetValueSlot(call_instruction->getOperand(1))}); + + } else if (function_name.find("llvm.x86.sse42.crc32") == 0) { + if (call_instruction->getType() != code_context_.int64_type_) { + throw NotSupportedException( + "sse42.crc32 with different size type than i64 not supported"); + } + + InsertBytecodeInstruction( + call_instruction, Opcode::llvm_sse42_crc32, + {call_instruction, call_instruction->getOperand(0), + call_instruction->getOperand(1)}); + + } else { + Opcode opcode = + BytecodeFunction::GetExplicitCallOpcodeByString(function_name); + + // call explicit instantiation of this function if available + if (opcode != Opcode::undefined) { + std::vector args; + args.reserve(call_instruction->getNumArgOperands()); + + if (!instruction->getType()->isVoidTy()) { + args.push_back(call_instruction); + } + + for (unsigned int i = 0; i < call_instruction->getNumArgOperands(); + i++) { + args.push_back(call_instruction->getArgOperand(i)); + } + + InsertBytecodeInstruction(call_instruction, opcode, args); + + } else { + // Function is not available in IR context, so we have to make an + // external function call + + // lookup function pointer in code context + void *raw_pointer = code_context_.LookupBuiltin(function_name).second; + + if (raw_pointer == nullptr) { + throw NotSupportedException("could not find external function: " + + function_name); + } + + // libffi is used for external function calls + // Here we collect all the information that will be needed at runtime + // (function activation time) to create the libffi call interface. + + // Show a hint, that an explicit wrapper could be created for this + // function + LOG_DEBUG("The interpreter will call the C++ function '%s' per libffi. " + "Consider adding an explicit wrapper for this function in " + "bytecode_instructions.def\n", function_name.c_str()); + + index_t dest_slot = 0; + if (!instruction->getType()->isVoidTy()) { + dest_slot = GetValueSlot(call_instruction); + } + + size_t arguments_num = call_instruction->getNumArgOperands(); + ExternalCallContext call_context{ + dest_slot, GetFFIType(instruction->getType()), + std::vector(arguments_num), + std::vector(arguments_num)}; + + for (unsigned int i = 0; i < call_instruction->getNumArgOperands(); + i++) { + call_context.args[i] = + GetValueSlot(call_instruction->getArgOperand(i)); + call_context.arg_types[i] = + GetFFIType(call_instruction->getArgOperand(i)->getType()); + } + + // add call context to bytecode function + bytecode_function_.external_call_contexts_.push_back(call_context); + + // insert bytecode instruction referring to this call context + InsertBytecodeExternalCallInstruction( + call_instruction, + static_cast( + bytecode_function_.external_call_contexts_.size() - 1), + raw_pointer); + } + } + } else { + // Internal function call to another IR function in this code context + + index_t dest_slot = 0; + if (!instruction->getType()->isVoidTy()) { + dest_slot = GetValueSlot(call_instruction); + } + + // Translate the bytecode function we want to call + index_t sub_function_index; + const auto result = sub_function_mapping_.find(function); + if (result != sub_function_mapping_.end()) { + sub_function_index = result->second; + } else { + auto sub_function = + BytecodeBuilder::CreateBytecodeFunction(code_context_, function); + + bytecode_function_.sub_functions_.push_back(std::move(sub_function)); + sub_function_index = bytecode_function_.sub_functions_.size() - 1; + sub_function_mapping_[function] = sub_function_index; + } + + InternalCallInstruction &bytecode_instruction = + InsertBytecodeInternalCallInstruction( + call_instruction, sub_function_index, dest_slot, + call_instruction->getNumArgOperands()); + + for (unsigned int i = 0; i < call_instruction->getNumArgOperands(); i++) { + bytecode_instruction.args[i] = + GetValueSlot(call_instruction->getArgOperand(i)); + + // just to make sure, we check that no function argument is bigger + // than 8 Bytes + if (code_context_.GetTypeSize( + call_instruction->getArgOperand(i)->getType()) > 8) { + throw NotSupportedException("argument for internal call too big"); + } + } + } +} + +void BytecodeBuilder::TranslateSelect(const llvm::Instruction *instruction) { + auto *select_instruction = llvm::cast(&*instruction); + + InsertBytecodeInstruction( + select_instruction, Opcode::select, + {select_instruction, select_instruction->getCondition(), + select_instruction->getTrueValue(), + select_instruction->getFalseValue()}); +} + +void BytecodeBuilder::TranslateExtractValue( + const llvm::Instruction *instruction) { + auto *extract_instruction = llvm::cast(&*instruction); + + // Skip, if this ExtractValue instruction belongs to an overflow operation + auto call_result = overflow_results_mapping_.find( + llvm::cast(instruction->getOperand(0))); + if (call_result != overflow_results_mapping_.end()) { + return; + } + + // Get value type + llvm::Type *type = extract_instruction->getAggregateOperand()->getType(); + size_t offset_bits = 0; + + // make sure the result type fits in a value_t + if (code_context_.GetTypeSize(instruction->getType()) <= sizeof(value_t)) { + throw NotSupportedException("extracted value too big for register size"); + } + + // Iterate indexes + for (auto index_it = extract_instruction->idx_begin(), + index_end = extract_instruction->idx_end(); + index_it != index_end; index_it++) { + uint32_t index = *index_it; + + if (auto *array_type = llvm::dyn_cast(type)) { + // Advance offset + offset_bits += + code_context_.GetTypeAllocSizeInBits(array_type->getElementType()) * + index; + + // get inner type for next iteration + type = array_type->getElementType(); + } else if (auto *struct_type = llvm::dyn_cast(type)) { + PELOTON_ASSERT(index < struct_type->getNumElements()); + + // get element offset + offset_bits += code_context_.GetStructElementOffset(struct_type, index) * 8; + + // get inner type for next iteration + type = struct_type->getElementType(index); + } else { + throw NotSupportedException( + "unexpected type in extractvalue instruction"); + } + } + + // assure that resulting type is correct + PELOTON_ASSERT(type == extract_instruction->getType()); + + // number if bits to shift is an immediate value! + InsertBytecodeInstruction( + extract_instruction, Opcode::extractvalue, + {GetValueSlot(extract_instruction), + GetValueSlot(extract_instruction->getAggregateOperand()), + static_cast(offset_bits)}); +} + +} // namespace interpreter +} // namespace codegen +} // namespace peloton diff --git a/src/codegen/interpreter/bytecode_function.cpp b/src/codegen/interpreter/bytecode_function.cpp new file mode 100644 index 00000000000..2e29fe47e55 --- /dev/null +++ b/src/codegen/interpreter/bytecode_function.cpp @@ -0,0 +1,302 @@ +//===----------------------------------------------------------------------===// +// +// Peloton +// +// bytecode_function.cpp +// +// Identification: src/codegen/interpreter/bytecode_function.cpp +// +// Copyright (c) 2015-2018, Carnegie Mellon University Database Group +// +//===----------------------------------------------------------------------===// + +#include "codegen/interpreter/bytecode_function.h" + +#include +#include +#include +#include + +#include "codegen/codegen.h" + +// Includes for explicit function calls +#include "codegen/bloom_filter_accessor.h" +#include "codegen/util/bloom_filter.h" +#include "codegen/buffering_consumer.h" +#include "codegen/deleter.h" +#include "codegen/inserter.h" +#include "codegen/query_parameters.h" +#include "codegen/runtime_functions.h" +#include "codegen/transaction_runtime.h" +#include "codegen/updater.h" +#include "codegen/util/oa_hash_table.h" +#include "codegen/util/hash_table.h" +#include "codegen/util/sorter.h" +#include "codegen/values_runtime.h" +#include "executor/executor_context.h" +#include "function/date_functions.h" +#include "function/numeric_functions.h" +#include "function/string_functions.h" +#include "function/timestamp_functions.h" +#include "planner/project_info.h" +#include "storage/data_table.h" +#include "storage/storage_manager.h" +#include "storage/tile_group.h" +#include "storage/zone_map_manager.h" +#include "codegen/util/buffer.h" + +namespace peloton { +namespace codegen { +namespace interpreter { + +/** + * This lambda function serves as an init function to fill the const mapping + * of function names to opcodes. + */ +const std::unordered_map + BytecodeFunction::explicit_call_opcode_mapping_ = []() { + std::unordered_map mapping; + +#define HANDLE_INST(op) +#define HANDLE_EXPLICIT_CALL_INST(op, func) mapping[#func] = Opcode::op; + +#include "codegen/interpreter/bytecode_instructions.def" + + return mapping; + }(); + +const char *BytecodeFunction::GetOpcodeString(Opcode opcode) { + switch (opcode) { +#define HANDLE_INST(opcode) \ + case Opcode::opcode: \ + return #opcode; + +#include "codegen/interpreter/bytecode_instructions.def" + + default: + return "(invalid)"; + } +} + +#ifndef NDEBUG +const llvm::Instruction *BytecodeFunction::GetIRInstructionFromIP( + index_t instr_slot) const { + return instruction_trace_.at(instr_slot); +} +#endif + +size_t BytecodeFunction::GetInstructionSlotSize( + const Instruction *instruction) { + switch (instruction->op) { +#define HANDLE_INST(op) \ + case Opcode::op: \ + return 1; +#define HANDLE_EXTERNAL_CALL_INST(op) \ + case Opcode::op: \ + return 2; +#define HANDLE_INTERNAL_CALL_INST(op) \ + case Opcode::op: \ + return GetInteralCallInstructionSlotSize( \ + reinterpret_cast(instruction)); +#define HANDLE_SELECT_INST(op) \ + case Opcode::op: \ + return 2; +#define HANDLE_OVERFLOW_TYPED_INST(op, type) \ + case Opcode::op##_##type: \ + return 2; +#define HANDLE_EXPLICIT_CALL_INST(op, func) \ + case Opcode::op: \ + return GetExplicitCallInstructionSlotSize( \ + GetFunctionRequiredArgSlotsNum(&func)); + +#include "codegen/interpreter/bytecode_instructions.def" + + default: + PELOTON_ASSERT(false); + return 0; + } +} + +Opcode BytecodeFunction::GetExplicitCallOpcodeByString( + std::string function_name) { + auto result = explicit_call_opcode_mapping_.find(function_name); + + if (result != explicit_call_opcode_mapping_.end()) + return result->second; + else + return Opcode::undefined; +} + +void BytecodeFunction::DumpContents() const { + std::ofstream output; + output.open(function_name_ + ".bf"); + +#ifndef NDEBUG + const llvm::BasicBlock *bb; +#endif + + // Print Bytecode + output << "Bytecode:" << std::endl; + for (index_t i = 0; i < bytecode_.size();) { + auto *instruction = GetIPFromIndex(i); + +#ifndef NDEBUG + const llvm::Instruction *llvm_instruction = GetIRInstructionFromIP(i); + if (llvm_instruction->getOpcode() != llvm::Instruction::PHI) { + if (i > 0 && bb != llvm_instruction->getParent()) { + output << llvm_instruction->getParent()->getName().str() << ":" + << std::endl; + } + bb = llvm_instruction->getParent(); + } +#endif + + output << Dump(instruction) << std::endl; + i += GetInstructionSlotSize(instruction); + } + + // Print Constants + if (constants_.size() > 0) output << "Constants:" << std::endl; + for (size_t i = 0; i < constants_.size(); i++) { + output << "[" << std::setw(3) << std::dec << (i + 1) + << "] = " << *reinterpret_cast(&constants_[i]) + << " 0x" << std::hex << constants_[i] << std::endl; + } + + output << std::endl; + + output.close(); +} + +std::string BytecodeFunction::Dump(const Instruction *instruction) const { + std::ostringstream output; + output << "[" << std::setw(3) << GetIndexFromIP(instruction) << "] "; + output << std::setw(18) << GetOpcodeString(instruction->op) << " "; + + switch (instruction->op) { +#define HANDLE_INST(opcode) \ + case Opcode::opcode: \ + output << "[" << std::setw(3) << instruction->args[0] << "] "; \ + output << "[" << std::setw(3) << instruction->args[1] << "] "; \ + output << "[" << std::setw(3) << instruction->args[2] << "] "; \ + break; + +#ifndef NDEBUG +#define HANDLE_EXTERNAL_CALL_INST(opcode) \ + case Opcode::opcode: \ + output \ + << "[" << std::setw(3) \ + << external_call_contexts_ \ + [reinterpret_cast(instruction) \ + ->external_call_context] \ + .dest_slot \ + << "] "; \ + for (auto arg : external_call_contexts_[instruction->args[0]].args) { \ + output << "[" << std::setw(3) << arg << "] "; \ + } \ + output << "(" \ + << static_cast( \ + instruction_trace_[GetIndexFromIP(instruction)]) \ + ->getCalledFunction() \ + ->getName() \ + .str() \ + << ") "; \ + break; +#else +#define HANDLE_CALL_INST(opcode) \ + case Opcode::opcode: \ + output << "[" << std::setw(3) \ + << call_contexts_[reinterpret_cast( \ + instruction) \ + ->call_context] \ + .dest_slot \ + << "] "; \ + for (auto arg : call_contexts_[instruction->args[0]].args) { \ + output << "[" << std::setw(3) << arg << "] "; \ + } \ + break; +#endif + +#ifndef NDEBUG +#define HANDLE_INTERNAL_CALL_INST(opcode) \ + case Opcode::opcode: \ + output << "[" << std::setw(3) \ + << reinterpret_cast(instruction) \ + ->dest_slot \ + << "] "; \ + for (size_t i = 0; \ + i < reinterpret_cast(instruction) \ + ->number_args; \ + i++) { \ + output << "[" << std::setw(3) \ + << reinterpret_cast(instruction) \ + ->args[i] \ + << "] "; \ + } \ + output << "(" \ + << static_cast( \ + instruction_trace_[GetIndexFromIP(instruction)]) \ + ->getCalledFunction() \ + ->getName() \ + .str() \ + << ") "; \ + break; +#else +#define HANDLE_INTERNAL_CALL_INST(opcode) \ + case Opcode::opcode: \ + output << "[" << std::setw(3) \ + << reinterpret_cast(instruction) \ + ->dest_slot \ + << "] "; \ + for (size_t i = 0; \ + i < reinterpret_cast(instruction) \ + ->number_args; \ + i++) { \ + output << "[" << std::setw(3) \ + << reinterpret_cast(instruction) \ + ->args[i] \ + << "] "; \ + } \ + break; +#endif + +#define HANDLE_SELECT_INST(opcode) \ + case Opcode::opcode: \ + output << "[" << std::setw(3) << instruction->args[0] << "] "; \ + output << "[" << std::setw(3) << instruction->args[1] << "] "; \ + output << "[" << std::setw(3) << instruction->args[2] << "] "; \ + output << "[" << std::setw(3) << instruction->args[3] << "] "; \ + break; + +#define HANDLE_OVERFLOW_TYPED_INST(op, type) \ + case Opcode::op##_##type: \ + output << "[" << std::setw(3) << instruction->args[0] << "] "; \ + output << "[" << std::setw(3) << instruction->args[1] << "] "; \ + output << "[" << std::setw(3) << instruction->args[2] << "] "; \ + output << "[" << std::setw(3) << instruction->args[3] << "] "; \ + break; + +#define HANDLE_EXPLICIT_CALL_INST(opcode, func) \ + case Opcode::opcode: \ + for (size_t i = 0; i < GetFunctionRequiredArgSlotsNum(&func); i++) \ + output << "[" << std::setw(3) << instruction->args[i] << "] "; \ + break; + +#include "codegen/interpreter/bytecode_instructions.def" + + default: + break; + } + +#ifndef NDEBUG + output << "(" + << CodeGen::Dump(GetIRInstructionFromIP(GetIndexFromIP(instruction))) + << ")"; +#endif + + return output.str(); +} + +} // namespace interpreter +} // namespace codegen +} // namespace peloton diff --git a/src/codegen/interpreter/bytecode_interpreter.cpp b/src/codegen/interpreter/bytecode_interpreter.cpp new file mode 100644 index 00000000000..ccd51f495d8 --- /dev/null +++ b/src/codegen/interpreter/bytecode_interpreter.cpp @@ -0,0 +1,190 @@ +//===----------------------------------------------------------------------===// +// +// Peloton +// +// bytecode_interpreter.cpp +// +// Identification: src/codegen/interpreter/bytecode_interpreter.cpp +// +// Copyright (c) 2015-2018, Carnegie Mellon University Database Group +// +//===----------------------------------------------------------------------===// + +#include "codegen/interpreter/bytecode_interpreter.h" +#include "codegen/interpreter/bytecode_function.h" + +namespace peloton { +namespace codegen { +namespace interpreter { + +/** This is the actual dispatch code: It lookups the destination handler address + * in the label_pointers_ array and performs a direct jump there. + */ +#define INTERPRETER_DISPATCH_GOTO(ip) \ + goto *(label_pointers_[BytecodeFunction::GetOpcodeId( \ + reinterpret_cast(ip)->op)]) + +/** + * The array with the label pointers has to be zero initialized to make sure, + * that we fill it with the actual values on the first execution. + */ +void * + BytecodeInterpreter::label_pointers_[BytecodeFunction::GetNumberOpcodes()] = + {nullptr}; + +BytecodeInterpreter::BytecodeInterpreter( + const BytecodeFunction &bytecode_function) + : bytecode_function_(bytecode_function) {} + +value_t BytecodeInterpreter::ExecuteFunction( + const BytecodeFunction &bytecode_function, + const std::vector &arguments) { + BytecodeInterpreter interpreter(bytecode_function); + interpreter.ExecuteFunction(arguments); + + return interpreter.GetReturnValue(); +} + +void BytecodeInterpreter::ExecuteFunction( + const BytecodeFunction &bytecode_function, char *param) { + BytecodeInterpreter interpreter(bytecode_function); + interpreter.ExecuteFunction({reinterpret_cast(param)}); +} + +NEVER_INLINE NO_CLONE void BytecodeInterpreter::ExecuteFunction( + const std::vector &arguments) { + // Fill the label_pointers_ array with the handler addresses at first + // startup. (This can't be done outside of this function, as the labels are + // not visible there. + if (label_pointers_[0] == nullptr) { +#define HANDLE_INST(op) \ + label_pointers_[BytecodeFunction::GetOpcodeId(Opcode::op)] = &&_##op; + +#include "codegen/interpreter/bytecode_instructions.def" + } + + InitializeActivationRecord(arguments); + + // Get initial instruction pointer + const Instruction *bytecode = + reinterpret_cast(&bytecode_function_.bytecode_[0]); + const Instruction *ip = bytecode; + + // Start execution with first instruction + INTERPRETER_DISPATCH_GOTO(ip); + +//--------------------------------------------------------------------------// +// Dispatch area +// +// This is the actual dispatch area of the interpreter. Because we use +// threaded interpretation, this is not a dispatch loop, but a long list of +// labels, and the control flow jumps from one handler to the next with +// goto's -> INTERPRETER_DISPATCH_GOTO(ip) +// +// The whole dispatch area gets generated using the bytecode_instructions.def +// file. All instruction handlers from query_interpreter.h will get inlined +// here for all their types. Even though the function looks small here, +// it will be over 13kB in the resulting binary! +//--------------------------------------------------------------------------// + +#ifdef LOG_TRACE_ENABLED +#define TRACE_CODE_PRE LOG_TRACE("%s", bytecode_function_.Dump(ip).c_str()) +#else +#define TRACE_CODE_PRE +#endif + +#define HANDLE_RET_INST(op) \ + _ret: \ + TRACE_CODE_PRE; \ + GetValueReference(0) = GetValue(ip->args[0]); \ + return; + +#define HANDLE_TYPED_INST(op, type) \ + _##op##_##type : TRACE_CODE_PRE; \ + ip = op##Handler(ip); \ + INTERPRETER_DISPATCH_GOTO(ip); + +#define HANDLE_INST(op) \ + _##op : TRACE_CODE_PRE; \ + ip = op##Handler(ip); \ + INTERPRETER_DISPATCH_GOTO(ip); + +#define HANDLE_EXPLICIT_CALL_INST(op, func) \ + _##op : TRACE_CODE_PRE; \ + ip = explicit_callHandler(ip, &func); \ + INTERPRETER_DISPATCH_GOTO(ip); + +#include "codegen/interpreter/bytecode_instructions.def" + + //--------------------------------------------------------------------------// +} + +template +type_t BytecodeInterpreter::GetReturnValue() { + // the ret instruction saves the return value in value slot 0 by definition + return GetValue(0); +} + +void BytecodeInterpreter::InitializeActivationRecord( + const std::vector &arguments) { + // resize vector to required number of value slots + values_.resize(bytecode_function_.number_values_); + + index_t value_slot = 1; + + // fill in constants + for (auto &constant : bytecode_function_.constants_) { + SetValue(value_slot++, constant); + } + + // check if provided number of arguments matches the number required by + // the function + if (bytecode_function_.number_function_arguments_ != arguments.size()) { + throw Exception( + "llvm function called through interpreter with wrong number of " + "arguments"); + } + + // fill in function arguments + for (auto &argument : arguments) { + SetValue(value_slot++, argument); + } + + // prepare call activations + call_activations_.resize(bytecode_function_.external_call_contexts_.size()); + for (size_t i = 0; i < bytecode_function_.external_call_contexts_.size(); + i++) { + auto &call_context = bytecode_function_.external_call_contexts_[i]; + auto &call_activation = call_activations_[i]; + + // initialize libffi call interface + if (ffi_prep_cif(&call_activation.call_interface, FFI_DEFAULT_ABI, + call_context.args.size(), call_context.dest_type, + const_cast(call_context.arg_types.data())) != + FFI_OK) { + throw Exception("initializing ffi call interface failed "); + } + + // save the pointers to the value slots in the continuous arrays + for (const auto &arg : call_context.args) { + call_activation.value_pointers.push_back(&values_[arg]); + } + call_activation.return_pointer = &values_[call_context.dest_slot]; + } +} + +uintptr_t BytecodeInterpreter::AllocateMemory(size_t number_bytes) { + // allocate memory + std::unique_ptr pointer = + std::unique_ptr(new char[number_bytes]); + + // get raw pointer before moving pointer object! + auto raw_pointer = reinterpret_cast(pointer.get()); + + allocations_.emplace_back(std::move(pointer)); + return raw_pointer; +} + +} // namespace interpreter +} // namespace codegen +} // namespace peloton \ No newline at end of file diff --git a/src/codegen/query.cpp b/src/codegen/query.cpp index 87ed5ab572d..c69601b3b7f 100644 --- a/src/codegen/query.cpp +++ b/src/codegen/query.cpp @@ -11,11 +11,15 @@ //===----------------------------------------------------------------------===// #include "codegen/query.h" -#include "codegen/execution_consumer.h" +#include "codegen/interpreter/bytecode_builder.h" +#include "codegen/interpreter/bytecode_interpreter.h" +#include "codegen/query_compiler.h" #include "common/timer.h" #include "executor/plan_executor.h" +#include "codegen/execution_consumer.h" #include "executor/executor_context.h" #include "storage/storage_manager.h" +#include "settings/settings_manager.h" namespace peloton { namespace codegen { @@ -31,36 +35,103 @@ void Query::Execute(executor::ExecutorContext &executor_context, llvm::Type *query_state_type = query_state_.GetType(); size_t parameter_size = codegen.SizeOf(query_state_type); PELOTON_ASSERT((parameter_size % 8 == 0) && - "parameter size not multiple of 8"); + "parameter size not multiple of 8"); // Allocate some space for the function arguments std::unique_ptr param_data{new char[parameter_size]}; char *param = param_data.get(); PELOTON_MEMSET(param, 0, parameter_size); - // We use this handy class to avoid complex casting and pointer manipulation - struct FunctionArguments { - executor::ExecutorContext *executor_context; - char *consumer_arg; - char rest[0]; - } PACKED; - // Set up the function arguments auto *func_args = reinterpret_cast(param_data.get()); func_args->executor_context = &executor_context; func_args->consumer_arg = consumer.GetConsumerState(); + bool force_interpreter = settings::SettingsManager::GetBool( + settings::SettingId::codegen_interpreter); + + if (is_compiled_ && !force_interpreter) { + ExecuteNative(func_args, stats); + } else { + try { + ExecuteInterpreter(func_args, stats); + } catch (interpreter::NotSupportedException e) { + LOG_ERROR("query not supported by interpreter: %s", e.what()); + } + } +} + +void Query::Prepare(const LLVMFunctions &query_funcs) { + llvm_functions_ = query_funcs; + + // verify the functions + // will also be done by Optimize() or Compile() if not done before, + // but we do not want to mix up the timings, so do it here + code_context_.Verify(); + + // optimize the functions + // TODO(marcel): add switch to enable/disable optimization + // TODO(marcel): add timer to measure time used for optimization (see + // RuntimeStats) + code_context_.Optimize(); + + is_compiled_ = false; +} + +void Query::Compile(CompileStats *stats) { // Timer Timer timer; - timer.Start(); + if (stats != nullptr) { + timer.Start(); + } + + // Compile all functions in context + LOG_TRACE("Starting Query compilation ..."); + code_context_.Compile(); + + // Get pointers to the JITed functions + compiled_functions_.init_func = + (compiled_function_t)code_context_.GetRawFunctionPointer( + llvm_functions_.init_func); + PELOTON_ASSERT(compiled_functions_.init_func != nullptr); + + compiled_functions_.plan_func = + (compiled_function_t)code_context_.GetRawFunctionPointer( + llvm_functions_.plan_func); + PELOTON_ASSERT(compiled_functions_.plan_func != nullptr); + + compiled_functions_.tear_down_func = + (compiled_function_t)code_context_.GetRawFunctionPointer( + llvm_functions_.tear_down_func); + PELOTON_ASSERT(compiled_functions_.tear_down_func != nullptr); + + is_compiled_ = true; + + LOG_TRACE("Compilation finished."); + + // Timer for JIT compilation + if (stats != nullptr) { + timer.Stop(); + stats->compile_ms = timer.GetDuration(); + timer.Reset(); + } +} + +void Query::ExecuteNative(FunctionArguments *function_arguments, + RuntimeStats *stats) { + // Start timer + Timer timer; + if (stats != nullptr) { + timer.Start(); + } // Call init LOG_TRACE("Calling query's init() ..."); try { - init_func_(param); + compiled_functions_.init_func(function_arguments); } catch (...) { // Cleanup if an exception is encountered - tear_down_func_(param); + compiled_functions_.tear_down_func(function_arguments); throw; } @@ -75,10 +146,10 @@ void Query::Execute(executor::ExecutorContext &executor_context, // Execute the query! LOG_TRACE("Calling query's plan() ..."); try { - plan_func_(param); + compiled_functions_.plan_func(function_arguments); } catch (...) { // Cleanup if an exception is encountered - tear_down_func_(param); + compiled_functions_.tear_down_func(function_arguments); throw; } @@ -92,7 +163,7 @@ void Query::Execute(executor::ExecutorContext &executor_context, // Clean up LOG_TRACE("Calling query's tearDown() ..."); - tear_down_func_(param); + compiled_functions_.tear_down_func(function_arguments); // No need to cleanup if we get an exception while cleaning up... if (stats != nullptr) { @@ -101,33 +172,82 @@ void Query::Execute(executor::ExecutorContext &executor_context, } } -bool Query::Prepare(const QueryFunctions &query_funcs) { - LOG_TRACE("Going to JIT the query ..."); +void Query::ExecuteInterpreter(FunctionArguments *function_arguments, + RuntimeStats *stats) { + LOG_INFO("Using codegen interpreter to execute plan"); - // Compile the code - if (!code_context_.Compile()) { - return false; + // Timer + Timer timer; + if (stats != nullptr) { + timer.Start(); } - LOG_TRACE("Setting up Query ..."); + // Create Bytecode + interpreter::BytecodeFunction init_bytecode = + interpreter::BytecodeBuilder::CreateBytecodeFunction( + code_context_, llvm_functions_.init_func); + interpreter::BytecodeFunction plan_bytecode = + interpreter::BytecodeBuilder::CreateBytecodeFunction( + code_context_, llvm_functions_.plan_func); + interpreter::BytecodeFunction tear_down_bytecode = + interpreter::BytecodeBuilder::CreateBytecodeFunction( + code_context_, llvm_functions_.tear_down_func); - // Get pointers to the JITed functions - init_func_ = (compiled_function_t)code_context_.GetRawFunctionPointer( - query_funcs.init_func); - PELOTON_ASSERT(init_func_ != nullptr); + // Time initialization + if (stats != nullptr) { + timer.Stop(); + stats->interpreter_prepare_ms = timer.GetDuration(); + timer.Reset(); + timer.Start(); + } - plan_func_ = (compiled_function_t)code_context_.GetRawFunctionPointer( - query_funcs.plan_func); - PELOTON_ASSERT(plan_func_ != nullptr); + // Call init + LOG_TRACE("Calling query's init() ..."); + try { + interpreter::BytecodeInterpreter::ExecuteFunction( + init_bytecode, reinterpret_cast(function_arguments)); + } catch (...) { + interpreter::BytecodeInterpreter::ExecuteFunction( + tear_down_bytecode, reinterpret_cast(function_arguments)); + throw; + } + + if (stats != nullptr) { + timer.Stop(); + stats->init_ms = timer.GetDuration(); + timer.Reset(); + timer.Start(); + } + + // Execute the query! + LOG_TRACE("Calling query's plan() ..."); + try { + interpreter::BytecodeInterpreter::ExecuteFunction( + plan_bytecode, reinterpret_cast(function_arguments)); + } catch (...) { + interpreter::BytecodeInterpreter::ExecuteFunction( + tear_down_bytecode, reinterpret_cast(function_arguments)); + throw; + } - tear_down_func_ = (compiled_function_t)code_context_.GetRawFunctionPointer( - query_funcs.tear_down_func); - PELOTON_ASSERT(tear_down_func_ != nullptr); + // Timer plan execution + if (stats != nullptr) { + timer.Stop(); + stats->plan_ms = timer.GetDuration(); + timer.Reset(); + timer.Start(); + } - LOG_TRACE("Query has been setup ..."); + // Clean up + LOG_TRACE("Calling query's tearDown() ..."); + interpreter::BytecodeInterpreter::ExecuteFunction( + tear_down_bytecode, reinterpret_cast(function_arguments)); - // All is well - return true; + // No need to cleanup if we get an exception while cleaning up... + if (stats != nullptr) { + timer.Stop(); + stats->tear_down_ms = timer.GetDuration(); + } } } // namespace codegen diff --git a/src/codegen/updateable_storage.cpp b/src/codegen/updateable_storage.cpp index f911f13a8a1..ba699aeea25 100644 --- a/src/codegen/updateable_storage.cpp +++ b/src/codegen/updateable_storage.cpp @@ -16,6 +16,7 @@ #include "codegen/lang/if.h" #include "codegen/type/sql_type.h" +#include "util/math_util.h" namespace peloton { namespace codegen { @@ -248,7 +249,7 @@ UpdateableStorage::NullBitmap::NullBitmap(CodeGen &codegen, bitmap_ptr_ = codegen->CreateConstInBoundsGEP2_32( storage.GetNullBitmapType(), bitmap_arr, 0, 0); } - uint32_t num_bytes = (storage_.GetNumElements() + 7) >> 3; + uint32_t num_bytes = MathUtil::DivRoundUp(storage_.GetNumElements(), 8); bytes_.resize(num_bytes, nullptr); dirty_.resize(num_bytes, false); } diff --git a/src/executor/plan_executor.cpp b/src/executor/plan_executor.cpp index 6226e3a26cf..a945c46bc59 100644 --- a/src/executor/plan_executor.cpp +++ b/src/executor/plan_executor.cpp @@ -56,10 +56,10 @@ static void CompileAndExecutePlan( // Check if we have a cached compiled plan already codegen::Query *query = codegen::QueryCache::Instance().Find(plan); if (query == nullptr) { - // Cached plan doesn't exist, let's compile the query codegen::QueryCompiler compiler; auto compiled_query = compiler.Compile( *plan, executor_context.GetParams().GetQueryParametersMap(), consumer); + compiled_query->Compile(); // Grab an instance to the plan query = compiled_query.get(); diff --git a/src/include/codegen/code_context.h b/src/include/codegen/code_context.h index be41f2f536d..2c6c1b97a5d 100644 --- a/src/include/codegen/code_context.h +++ b/src/include/codegen/code_context.h @@ -34,6 +34,10 @@ namespace codegen { class FunctionBuilder; +namespace interpreter { +class BytecodeBuilder; +} // namespace interpreter + //===----------------------------------------------------------------------===// // The context where all generated LLVM query code resides. We create a context // instance for every query we see. We keep instances of these around in the @@ -43,6 +47,7 @@ class FunctionBuilder; class CodeContext { friend class CodeGen; friend class FunctionBuilder; + friend class interpreter::BytecodeBuilder; public: using FuncPtr = void *; @@ -63,7 +68,7 @@ class CodeContext { void RegisterBuiltin(llvm::Function *func_decl, FuncPtr func_impl); /// Lookup a builtin function that has been registered in this context - llvm::Function *LookupBuiltin(const std::string &name) const; + std::pair LookupBuiltin(const std::string &name) const; /// Return the LLVM function for UDF that has been registered in this context llvm::Function *GetUDF() const { return udf_func_ptr_; } @@ -71,13 +76,37 @@ class CodeContext { /// Sets UDF function ptr void SetUDF(llvm::Function *func_ptr) { udf_func_ptr_ = func_ptr; } + /// Verify all the code contained in this context + void Verify(); + + /// Optimize all the code contained in this context + void Optimize(); + /// Compile all the code contained in this context - bool Compile(); + void Compile(); /// Retrieve the raw function pointer to the provided compiled LLVM function FuncPtr GetRawFunctionPointer(llvm::Function *fn) const; + /// Get the number of bytes that are needed to store this type + size_t GetTypeSize(llvm::Type *type) const; + + /// Get the number of bits that are needed to store this type + size_t GetTypeSizeInBits(llvm::Type *type) const; + + /// Get the number of bytes between two elements of this type + /// This also includes the padding + size_t GetTypeAllocSize(llvm::Type *type) const; + + /// Get the number of bits between two elements of this type + /// This also includes the padding + size_t GetTypeAllocSizeInBits(llvm::Type *type) const; + + /// Get the offset of element inside a struct in byte + size_t GetStructElementOffset(llvm::StructType *type, size_t index) const; + /// Dump the contents of all the code in this context + /// Attention: this function may change the IR! void DumpContents() const; ////////////////////////////////////////////////////////////////////////////// @@ -143,20 +172,23 @@ class CodeContext { llvm::Type *int16_type_; llvm::Type *int32_type_; llvm::Type *int64_type_; + llvm::Type *float_type_; llvm::Type *double_type_; llvm::Type *void_type_; llvm::Type *void_ptr_type_; llvm::PointerType *char_ptr_type_; // All C/C++ builtin functions and their implementations - std::unordered_map builtins_; + std::unordered_map> + builtins_; // The functions needed in this module, and their implementations. If the // function has not been compiled yet, the function pointer will be NULL. The // function pointers are populated in Compile() std::vector> functions_; - std::unordered_map function_symbols_; + // Shows if the Verify() has been run + bool is_verified_; }; } // namespace codegen diff --git a/src/include/codegen/codegen.h b/src/include/codegen/codegen.h index 9a56edf5dfd..07952d2c4a6 100644 --- a/src/include/codegen/codegen.h +++ b/src/include/codegen/codegen.h @@ -157,9 +157,7 @@ class CodeGen { // Function lookup and registration //===--------------------------------------------------------------------===// llvm::Type *LookupType(const std::string &name) const; - llvm::Function *LookupBuiltin(const std::string &fn_name) const { - return code_context_.LookupBuiltin(fn_name); - } + std::pair LookupBuiltin(const std::string &name) const; llvm::Function *RegisterBuiltin(const std::string &fn_name, llvm::FunctionType *fn_type, void *func_impl); @@ -182,6 +180,13 @@ class CodeGen { return code_context_.GetCurrentFunction(); } + //===--------------------------------------------------------------------===// + // DEBUG OUTPUT + //===--------------------------------------------------------------------===// + + static std::string Dump(const llvm::Value *value); + static std::string Dump(llvm::Type *type); + private: friend class Hash; friend class Value; diff --git a/src/include/codegen/interpreter/bytecode_builder.h b/src/include/codegen/interpreter/bytecode_builder.h new file mode 100644 index 00000000000..20571a0a248 --- /dev/null +++ b/src/include/codegen/interpreter/bytecode_builder.h @@ -0,0 +1,470 @@ +//===----------------------------------------------------------------------===// +// +// Peloton +// +// bytecode_builder.h +// +// Identification: src/include/codegen/interpreter/bytecode_builder.h +// +// Copyright (c) 2015-2018, Carnegie Mellon University Database Group +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "codegen/interpreter/bytecode_function.h" + +namespace llvm { +class Instruction; +class Function; +class Value; +class BasicBlock; +class Type; +class Constant; +class CallInst; +class ExtractValueInst; +} // namespace llvm + +namespace peloton { +namespace codegen { + +class CodeContext; + +namespace interpreter { + +class BytecodeBuilder { + public: + /** + * Static method to create a bytecode function from a code context. + * @param code_context CodeContext containing the LLVM function + * @param function LLVM function that shall be interpreted later + * @return A BytecodeFunction object that can be passed to the + * BytecodeInterpreter (several times). + */ + static BytecodeFunction CreateBytecodeFunction( + const CodeContext &code_context, const llvm::Function *function, + bool use_naive_register_allocator = false); + + private: + // These types definitions have the purpose to make the code better + // understandable. The bytecode builder creates indexes to identify the + // LLVM types, which usually are only accessed by raw pointers. + // Those types shall indicate which index is meant by a function. + // None of these indexes end up in the bytecode function! + using value_index_t = index_t; + using instruction_index_t = index_t; + + /** + * Describes a bytecode relocation that has to be applied to add the + * destination of a branch instruction once its value is available. + * It gets created by TranslateBranch() and is processed after + * TranslateFunction() processed all instructions. + */ + struct BytecodeRelocation { + index_t instruction_slot; + index_t argument; + const llvm::BasicBlock *bb; + }; + + /** + * Describes the liveness of a value by start and end instruction index. + */ + using ValueLiveness = std::pair; + + private: + BytecodeBuilder(const CodeContext &code_context, + const llvm::Function *function); + + /** + * Analyses the function to collect values and constants and gets + * value liveness information + */ + void AnalyseFunction(); + + /** + * Naive register allocation that just assings a unique value slot to + * every value + */ + void PerformNaiveRegisterAllocation(); + + /** + * Greedy register allocation, that for each value tries to find the next free + * value slot, that is not occupied anymore. + */ + void PerformGreedyRegisterAllocation(); + + /** + * Translates all instructions into bytecode. + */ + void TranslateFunction(); + + /** + * Do some final conversations to make the created BytecodeFunction usable. + */ + void Finalize(); + + private: + //===--------------------------------------------------------------------===// + // Methods for Value Handling + //===--------------------------------------------------------------------===// + + /** + * Gets the value index for a given LLVM value. If no value index exists + * for this LLVM value, a new one is created. + * @param value LLVM Value + * @return the value index that is mapped to this LLVM value + */ + value_index_t GetValueIndex(const llvm::Value *value); + + /** + * Maps a given LLVM value to the same value index as another LLVM Value. + * @param alias LLVM value + * @param value_index the value index to map the LLVM value to. The + * value index must already exist. + * @return the value index that was given as parameter + */ + value_index_t CreateValueAlias(const llvm::Value *alias, + value_index_t value_index); + + /** + * Returns the value_index for a LLVM constant. + * In LLVM several Constant Objects with the same value can exist. This + * function tries to find an existing constant with the same value or creates + * a new one if necessary. + * @param constant LLVM constant + * @return a value index that refers to a constant with the same value. If + * no internal constant with this value exists before, a new value index + * is created. + */ + value_index_t GetConstantIndex(const llvm::Constant *constant); + + /** + * Returns the value slot (register) for a given LLVM value + * @param value LLVM value + * @return the value slot (register) assigned by the register allocation + * This function must not be called before the Analysis pass and the + * Register Allocation! + */ + index_t GetValueSlot(const llvm::Value *value) const; + + /** + * Extends the liveness range of a value to cover the given instruction index. + * The will be extended to the "left" or the "right" if necessary, or not at + * all, if it already covers this index. + * This function calls GetValueIndex, which may create a new value index. + * @param llvm_value LLVM value for which the liveness should be extended + * @param instruction_index position in the + */ + void ExtendValueLiveness(const llvm::Value *llvm_value, + instruction_index_t instruction_index); + + /** + * Returns the index for a additional temporary value slot in that + * basic block. Due to the phi swap problem (lost copy) it can happen, + * that during translation additional value slots are needed that have not + * been mapped by the register allocation. The number of additional temporary + * value slots is tracked and added to the overall number of value + * slots during finalization. + * @param bb basic block the temporary value slot shall be created in + * @return a temporary value slot index, that can be used only in + * this basic block + */ + index_t GetTemporaryValueSlot(const llvm::BasicBlock *bb); + + //===--------------------------------------------------------------------===// + // Helper Functions (const) + //===--------------------------------------------------------------------===// + + /** + * Returns the matching FFI type for a given LLVM type + * @param type LLVM type + * @return FFI type + */ + ffi_type *GetFFIType(llvm::Type *type) const; + + /** + * Checks if a LLVM Value is a constant + * @param value LLVM Value + * @return true, if the given LLVM value is a constant + */ + bool IsConstantValue(const llvm::Value *value) const; + + /** + * Extracts the actual constant value of a LLVM constant + * @param constant LLVM constant + * @return the actual value of the constant, sign or zero extended to + * the size of value_t + */ + value_t GetConstantValue(const llvm::Constant *constant) const; + + /** + * Directly extracts the signed integer value of a integer constant + * @param constant LLVM Constant that is a instance of llvm::ConstantInt + * @return signed integer value of the LLVM constant + */ + int64_t GetConstantIntegerValueSigned(llvm::Value *constant) const; + + /** + * Directly extracts the unsigned integer value of a integer constant + * @param constant LLVM Constant that is a instance of llvm::ConstantInt + * @return unsigned integer value of the LLVM constant + */ + uint64_t GetConstantIntegerValueUnsigned(llvm::Value *constant) const; + + /** + * Checks if one basic block is the successor of another basic block + * when walking all basic blocks in reverse post order. + * (Because ->nextNode doesn't work then) + * @param bb current LLVM basic block + * @param succ LLVM basic block that shall be checked to be the successor + * @return true, if succ is the successor of bb + */ + bool BasicBlockIsRPOSucc(const llvm::BasicBlock *bb, + const llvm::BasicBlock *succ) const; + + /** + * Creates the typed opcode for a bytecode instruction that is defined for + * _all_ types + * @param untyped_op untyped opcode for a byte instruction, retrieved using + * GET_FIRST_ALL_TYPES(op), where op must be defined for all types. + * @param type LLVM type to take the type information from + * @return typed opcode _ + */ + Opcode GetOpcodeForTypeAllTypes(Opcode untyped_op, llvm::Type *type) const; + + /** + * Creates the typed opcode for a bytecode instruction that is defined only + * for _integer_ types + * @param untyped_op untyped opcode for a byte instruction, retrieved using + * GET_FIRST_INT_TYPES(op), where op must be defined only for integer types. + * @param type LLVM type to take the type information from + * @return typed opcode _ + */ + Opcode GetOpcodeForTypeIntTypes(Opcode untyped_op, llvm::Type *type) const; + + /** + * Creates the typed opcode for a bytecode instruction that is defined only + * for _floating point_ types + * @param untyped_op untyped opcode for a byte instruction, retrieved using + * GET_FIRST_FLOAT_TYPES(op), where op must be defined only for float types. + * @param type LLVM type to take the type information from + * @return typed opcode _ + */ + Opcode GetOpcodeForTypeFloatTypes(Opcode untyped_op, llvm::Type *type) const; + + /** + * Creates the typed opcode for a bytecode instruction that is defined only + * for _integer_ types. In difference to the other function, this one only + * considers the type size to determine the opcode type. + * @param untyped_op untyped opcode for a byte instruction, retrieved using + * GET_FIRST_INT_TYPES(op), where op must be defined only for integer types. + * @param type LLVM type to take the size information from + * @return typed opcode _ + */ + Opcode GetOpcodeForTypeSizeIntTypes(Opcode untyped_op, + llvm::Type *type) const; + + //===--------------------------------------------------------------------===// + // Methods for creating Bytecode Instructions + //===--------------------------------------------------------------------===// + + /** + * Insert a bytecode instruction into the bytecode stream. + */ + Instruction &InsertBytecodeInstruction( + const llvm::Instruction *llvm_instruction, Opcode opcode, + const std::vector &args); + + /** + * Insert a bytecode instruction into the bytecode stream. + * Wrapper that automatically gets the value slots for the LLVM values + * provided. + */ + Instruction &InsertBytecodeInstruction( + const llvm::Instruction *llvm_instruction, Opcode opcode, + const std::vector &args); + + /** + * Insert a external call bytecode instruction into the bytecode stream. + * @param llvm_instruction LLVM function this instruction is created from. + * (Only needed for tracing information, not used in Release mode!) + * @param call_context index of the call context created for this external + * call instruction + * @param function function pointer to the external function + * @return Reference to the created instruction in the bytecode stream. + */ + ExternalCallInstruction &InsertBytecodeExternalCallInstruction( + const llvm::Instruction *llvm_instruction, index_t call_context, + void *function); + + /** + * Insert a internal call bytecode instruction into the bytecode stream. + * @param llvm_instruction LLVM function this instruction is created from. + * (Only needed for tracing information, not used in Release mode!) + * @param sub_function index to the sub function (bytecode function) for + * this LLVM function + * @param dest_slot Destination slot for the return value. Set zero if + * internal function returns void. + * @param number_arguments number of arguments provided in this function call. + * The internal call instruction has variadic size, depending on the number + * of arguments! + * @return Reference to the created instruction in the bytecode stream. + */ + InternalCallInstruction &InsertBytecodeInternalCallInstruction( + const llvm::Instruction *llvm_instruction, index_t sub_function, + index_t dest_slot, size_t number_arguments); + +/** + * Helper function, that adds the given instruction to the instruction trace. + * (Should only be called from InsertBytecode instructions) + * In Release mode this function compiles to a stub. + * @param llvm_instruction LLVM instruction the just created bytecode + * instruction originates from + * @param number_instruction_slots size of the bytecode instruction + */ +#ifndef NDEBUG + void AddInstructionToTrace(const llvm::Instruction *llvm_instruction, + size_t number_instruction_slots = 1); +#else + void AddInstructionToTrace( + UNUSED_ATTRIBUTE const llvm::Instruction *llvm_instruction, + UNUSED_ATTRIBUTE size_t number_instruction_slots = 1) {} +#endif + + //===--------------------------------------------------------------------===// + // Methods for Translating LLVM Instructions (called by TranslateFunction()) + //===--------------------------------------------------------------------===// + + /** + * Resolves the PHI nodes referring to this basic block, by placing mov + * instructions. Must be called just before the terminating LLVM instruction + * in a basic block. If the PHI swap / lost copy problem can occur, the + * function creates additional mov instructions and value slots. + * @param bb current basic block + */ + void ProcessPHIsForBasicBlock(const llvm::BasicBlock *bb); + + void TranslateBranch(const llvm::Instruction *instruction, + std::vector &bytecode_relocations); + void TranslateReturn(const llvm::Instruction *instruction); + void TranslateBinaryOperator(const llvm::Instruction *instruction); + void TranslateAlloca(const llvm::Instruction *instruction); + void TranslateLoad(const llvm::Instruction *instruction); + void TranslateStore(const llvm::Instruction *instruction); + void TranslateGetElementPtr(const llvm::Instruction *instruction); + void TranslateIntExt(const llvm::Instruction *instruction); + void TranslateFloatTruncExt(const llvm::Instruction *instruction); + void TranslateFloatIntCast(const llvm::Instruction *instruction); + void TranslateCmp(const llvm::Instruction *instruction); + void TranslateCall(const llvm::Instruction *instruction); + void TranslateSelect(const llvm::Instruction *instruction); + void TranslateExtractValue(const llvm::Instruction *instruction); + + private: + /** + * The bytecode function that is created (and then moved). All other + * members are helping data structures that don't end up in the resulting + * bytecode function + */ + BytecodeFunction bytecode_function_; + + /** + * Mapping from Value* to internal value index (includes merged + * values/constants). The value index is used to access the vectors below. + */ + std::unordered_map value_mapping_; + + /** + * Holds the value liveness per value (after analysis) + */ + std::vector value_liveness_; + + /** + * Holds the assigned value slot per value (after register allocation) + */ + std::vector value_slots_; + + /** + * Overall number of value slots needed (from register allocation) + * without temporary value slots (added during translation) + */ + size_t number_value_slots_; + + /** + * Holds the value_index of the constants in bytecode_function_.constants_, + * accessed with the same index. + */ + std::vector constant_value_indexes_; + + /** + * Additional temporary value slots (created due to phi swap problem). + * Mapping from instruction index to number of temporary slots needed + * at that time (specified by instruction index). + */ + std::unordered_map + number_temporary_values_; + + /** + * Maximum number of temporary value slots needed at all time points. + */ + size_t number_temporary_value_slots_; + + /** + * Keep track of all Call instructions that refer to a overflow aware + * operation, as their results get directly saved in the destination slots + * of the ExtractValue instructions refering to them. + */ + std::unordered_map< + const llvm::CallInst *, + std::pair> + overflow_results_mapping_; + + /** + * Mapping of LLVM functions to bytecode functions to avoid duplicated + * functions in case a internal function is called several times + */ + std::unordered_map sub_function_mapping_; + + /** + * ReversePostOrderTraversal, which is used for all BB traversals + * Initialization is very expensive, so we reuse it + * cannot be const, because the class doesn't provide const iterators + */ + llvm::ReversePostOrderTraversal rpo_traversal_; + + /** + * A vector holding the the basic block pointers in reverse post order. + * This vector is retrieved from the RPO traversal and necessary + * to make pred/pred lookups. + */ + std::vector bb_reverse_post_order_; + + /** + * Original code context the bytecode function is build from + */ + const CodeContext &code_context_; + + /** + * LLVM function that shall be translated + */ + const llvm::Function *llvm_function_; +}; + +class NotSupportedException : public std::runtime_error { + public: + NotSupportedException(std::string message) : std::runtime_error(message) {} +}; + +} // namespace interpreter +} // namespace codegen +} // namespace peloton diff --git a/src/include/codegen/interpreter/bytecode_function.h b/src/include/codegen/interpreter/bytecode_function.h new file mode 100644 index 00000000000..3bbb077e4f0 --- /dev/null +++ b/src/include/codegen/interpreter/bytecode_function.h @@ -0,0 +1,359 @@ +//===----------------------------------------------------------------------===// +// +// Peloton +// +// bytecode_function.h +// +// Identification: src/include/codegen/interpreter/bytecode_function.h +// +// Copyright (c) 2015-2018, Carnegie Mellon University Database Group +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include +#include +#include +#include +#include +#include + +#include "common/macros.h" +#include "util/math_util.h" + +namespace llvm { +class Instruction; +} // namespace llvm + +namespace peloton { +namespace codegen { + +class CodeContext; + +namespace interpreter { + +class BytecodeInterpreter; +class BytecodeBuilder; + +// Type definitions to match the LLVM terminology +using i8 = uint8_t; +using i16 = uint16_t; +using i32 = uint32_t; +using i64 = uint64_t; +using value_t = uint64_t; +using index_t = uint16_t; +using instr_slot_t = uint64_t; + +// Type template that converts any type into the matching interpreter type +template +using bytecode_type = typename std::conditional< + sizeof(type) == 1, i8, + typename std::conditional< + sizeof(type) == 2, i16, + typename std::conditional::type>::type>::type; + +/** + * Enum holding all Opcodes for all instructions. + */ +enum class Opcode : index_t { + undefined, + +#define HANDLE_INST(opcode) opcode, +#include "codegen/interpreter/bytecode_instructions.def" +#undef HANDLE_INST + + NUMBER_OPCODES +}; + +/** + * Struct to access a generic bytecode instruction. + * Every bytecode instruction starts with a 2 byte Opcode, followed by a + * variable number of 2 byte arguments. (Exception: ExternalCallInstruction) + * + * This struct is only for accessing Instructions, not for saving them! + * (sizeof returns a wrong value) All bytecode instructions are saved in + * one or more 8 byte instructions slots (instr_slot_t) in the bytecode stream. + */ +struct Instruction { + Opcode op; + index_t args[]; +}; + +/** + * Specialized struct for accessing a InternalCallInstruction. The number of + * arguments in .args[] is variable and must match the value .number_args . + * GetInteralCallInstructionSlotSize uses this information to calculate the + * number of occupied instruction slots. + */ +struct InternalCallInstruction { + Opcode op; + index_t sub_function; + index_t dest_slot; + index_t number_args; + index_t args[]; +}; + +/** + * Specialized struct for accessing a ExternalCallInstruction. It is the only + * instruction, that contains a field that is greater than 2 byte. + * Because libffi requires pointers to value slots of the current + * activation record, the instruction itself only contains an index for + * accessing the proper call context. During interpretation a call activation + * is created for every call context, holding the actual runtime pointers, + * which can be accessed with the same index. + */ +struct ExternalCallInstruction { + Opcode op; + index_t external_call_context; + void (*function)(void); +}; + +/** + * Call context holding information needed to create a runtime call activation + * for a ExternalCallInstruction in the bytecode stream. + */ +struct ExternalCallContext { + index_t dest_slot; + ffi_type *dest_type; + std::vector args; + std::vector arg_types; +}; + +/** + * A BytecodeFunction contains all information necessary to run a LLVM + * function in the interpreter and is completely independent from the + * CodeContext it was created from (except for the tracing information in debug + * mode). It can be moved and copied. + */ +class BytecodeFunction { + public: + /** + * Returns the Opcode enum for a given Opcode Id (to avoid plain casting) + * @param id Opcode Id + * @return Opcode enum + */ + ALWAYS_INLINE inline static constexpr Opcode GetOpcodeFromId(index_t id) { + return static_cast(id); + } + + /** + * Returns the Opcode Id to a given Opcode enum (to avoid plain casting) + * @param opcode Opcode enum + * @return Opcode Id + */ + ALWAYS_INLINE inline static constexpr index_t GetOpcodeId(Opcode opcode) { + return static_cast(opcode); + } + + /** + * Returns a numan readable string to a given Opcode + * @param opcode Opcode enum + * @return String representation if the Opcode + */ + static const char *GetOpcodeString(Opcode opcode); + + /** + * Returns the overall number of existing Opcodes (not trivial, as the Opcodes + * are created with expanding macros) + * @return overall number of existing Opcodes + */ + inline static constexpr size_t GetNumberOpcodes() { + return static_cast(Opcode::NUMBER_OPCODES); + } + + /** + * Return the instruction pointer to a given instruction index (from this + * bytecode function) + * @param index instruction index + * @return pointer to the instruction at that index inside the bytecode + */ + ALWAYS_INLINE inline const Instruction *GetIPFromIndex(index_t index) const { + return reinterpret_cast( + const_cast(bytecode_.data()) + index); + } + + /** + * Returns the instruction index for a given instruction pointer (from this + * bytecode function) + * @param instruction pointer to a given instruction inside the bytecode + * @return index to the instruction the pointer is pointing to + */ + ALWAYS_INLINE inline index_t GetIndexFromIP( + const Instruction *instruction) const { + index_t index = + reinterpret_cast(instruction) - bytecode_.data(); + return index; + } + +#ifndef NDEBUG + const llvm::Instruction *GetIRInstructionFromIP(index_t instr_slot) const; +#endif + + /** + * Returns the number of slots a given instruction occupies in the bytecode + * stream. + * @param instruction pointer to the instruction inside the bytecode + * @return number of slots (each 8 Byte) that are used by this instruction + */ + static size_t GetInstructionSlotSize(const Instruction *instruction); + + /** + * Returns the number of slots a given internal call instruction occupies in + * the bytecode stream. Internal instructions have a variable length, so the + * size has to be calculated. + * @param instruction pointer to instruction of type internal call + * @return number of slots (each 8 Byte) that are used by this instruction + */ + static ALWAYS_INLINE inline size_t GetInteralCallInstructionSlotSize( + const InternalCallInstruction *instruction) { + const size_t number_slots = + MathUtil::DivRoundUp(sizeof(uint16_t) * (4 + instruction->number_args), + sizeof(instr_slot_t)); + PELOTON_ASSERT(number_slots > 0); + return number_slots; + } + + /** + * Returns the number of slots an explicit call instruction occupies, + * given the number of argument slots. (return value and/or object pointer + * also need a slot!) + * @param number_args number of needed argument slots + * @return number of slots (each 8 Byte) that are used by this instruction + */ + static constexpr ALWAYS_INLINE inline size_t + GetExplicitCallInstructionSlotSize(size_t number_args) { + return MathUtil::DivRoundUp(sizeof(uint16_t) * (1 + number_args), + sizeof(instr_slot_t)); + } + + /** + * Returns the number of required argument slots that are needed in an + * explicit call bytecode instruction for this function. + * @param func pointer/reference to the function (declaration must be visible) + * @return number of required argument slots + * = arguments + return value + object pointer + */ + template + static constexpr ALWAYS_INLINE inline size_t GetFunctionRequiredArgSlotsNum( + UNUSED_ATTRIBUTE return_type (*func)(arg_types...)) { + return (std::is_void::value) ? sizeof...(arg_types) + : sizeof...(arg_types) + 1; + } + + template + static constexpr ALWAYS_INLINE inline size_t GetFunctionRequiredArgSlotsNum( + UNUSED_ATTRIBUTE return_type (class_type::*func)(arg_types...)) { + return (std::is_void::value) ? sizeof...(arg_types) + 1 + : sizeof...(arg_types) + 2; + } + + template + static constexpr ALWAYS_INLINE inline size_t GetFunctionRequiredArgSlotsNum( + UNUSED_ATTRIBUTE return_type (class_type::*func)(arg_types...) const) { + return (std::is_void::value) ? sizeof...(arg_types) + 1 + : sizeof...(arg_types) + 2; + } + + /** + * Returns the opcode for a fiven function name string (lookup in hash map). + * @param function_name string of function name with namespace + * @return the matching opcode or Opcode::undefined + */ + static Opcode GetExplicitCallOpcodeByString(std::string function_name); + + /*** + * Dumps the bytecode and the constants of this bytecode function to a + * file, identified by function name. + */ + void DumpContents() const; + + /** + * Gives a textual representation of the given instruction. (and the + * LLVM instruction it originates from, if Debug mode is enabled) + * @param instruction instruction from this bytecode function + * @return string containing a textual representatino of the instruction + */ + std::string Dump(const Instruction *instruction) const; + + private: + /** + * Creates a new empty BytecodeFunction object. + * @param id identifier for this bytecode function, usually inherited + * from code context. + */ + BytecodeFunction(std::string function_name) : function_name_(function_name) {} + + private: + /** + * Function name of the original function (used only for output). + */ + std::string function_name_; + + /** + * Number of needed value slots at runtime. + */ + size_t number_values_; + + /** + * Number of function arguments (to check correct number given to interpreter) + */ + size_t number_function_arguments_; + + /** + * Constants needed during runtime. + */ + std::vector constants_; + + /** + * This array of instruction slots holds the actual bytecode that is + * interpreted. Usually one instruction occupies one slot, but some + * instruction require several slots. Except for InternalCallInstruction, + * all instruction have a static size. The number of occipied instruction + * slots for an instruction can be obtained by GetInstructionSlotSize() + * + * The "Instruction" struct can be used to access every instruction in a + * generic way. + * + * It can be accessed by index (instruction index) oder a direct pointer + * to a instruction slot (IP). + */ + std::vector bytecode_; + + /** + * Call contexts that belong to ExternalCallInstructions in the bytecode, + * accessed by index. + */ + std::vector external_call_contexts_; + + /** + * Hierarchical array of further bytecode functions belonging to + * InteralFunctionCalls, accessed by index. + */ + std::vector sub_functions_; + + /** + * Constant map created at system startup, that maps the function name string + * of explicit defined function to their opcode. This way the function name + * lookup is implicitly made with a hash table. + */ + static const std::unordered_map + explicit_call_opcode_mapping_; + +#ifndef NDEBUG + /** + * In Debug mode: Maps every bytecode instruction slot to the + * LLVM instruction it was created from. + */ + std::vector instruction_trace_; +#endif + + private: + friend BytecodeInterpreter; + friend BytecodeBuilder; +}; + +} // namespace interpreter +} // namespace codegen +} // namespace peloton diff --git a/src/include/codegen/interpreter/bytecode_instructions.def b/src/include/codegen/interpreter/bytecode_instructions.def new file mode 100644 index 00000000000..83ec605ee0b --- /dev/null +++ b/src/include/codegen/interpreter/bytecode_instructions.def @@ -0,0 +1,412 @@ +//===----------------------------------------------------------------------===// +// +// Peloton +// +// bytecode_instructions.def +// +// Identification: src/include/codegen/interpreter/bytecode_instructions.def +// +// Copyright (c) 2015-2018, Carnegie Mellon University Database Group +// +//===----------------------------------------------------------------------===// + +//----------------------------------------------------------------------------// +// Instruction Definitions +// +// This file contains the definitions for all bytecode instructions. +// +// The definitions can be used by defining one of the HANDLE functions below +// before including this definition file (see X-Macros). This way the +// definitions can be used to generate the Opcode enum, the dispatch area, etc. +// +// Most instructions are automatically expanded to all their supported types. +// +// When adding a bytecode instruction here, the instruction at least needs a +// Translate-function in the BytecodeBuilder and a Handler-function in the +// BytecodeInterpreter. +//----------------------------------------------------------------------------// + +#ifndef HANDLE_INST +#define HANDLE_INST(op) +#endif + +#ifndef HANDLE_TYPED_INST +#define HANDLE_TYPED_INST(op, type) HANDLE_INST(op##_##type) +#endif + +#ifndef HANDLE_OVERFLOW_TYPED_INST +#define HANDLE_OVERFLOW_TYPED_INST(op, type) HANDLE_TYPED_INST(op, type) +#endif + +#ifndef HANDLE_SELECT_INST +#define HANDLE_SELECT_INST(op) HANDLE_INST(op) +#endif + +#ifndef HANDLE_RET_INST +#define HANDLE_RET_INST(op) HANDLE_INST(op) +#endif + +#ifndef HANDLE_EXTERNAL_CALL_INST +#define HANDLE_EXTERNAL_CALL_INST(op) HANDLE_INST(op) +#endif + +#ifndef HANDLE_INTERNAL_CALL_INST +#define HANDLE_INTERNAL_CALL_INST(op) HANDLE_INST(op) +#endif + +#ifndef HANDLE_EXPLICIT_CALL_INST +#define HANDLE_EXPLICIT_CALL_INST(op, func) HANDLE_INST(op) +#endif + +// Takes a function and a opcode and calls the function for all type instances +// of that opcode +#define CREATE_FOR_ALL_TYPES(func, op) \ + func(op, i8) func(op, i16) func(op, i32) func(op, i64) func(op, float) \ + func(op, double) + +// Returns the first type used when expanding to all types +// (needed for use of GetOpcodeForTypeAllTypes) +#define GET_FIRST_ALL_TYPES(op) (op##_i8) + +// Takes a function and a opcode and calls the function for all integer +// instances of that opcode +#define CREATE_FOR_INT_TYPES(func, op) \ + func(op, i8) func(op, i16) func(op, i32) func(op, i64) + +// Returns the first type used when expanding to integer types +// (needed for use of GetOpcodeForTypeIntTypes) +#define GET_FIRST_INT_TYPES(op) (op##_i8) + +// Takes a function and a opcode and calls the function for all floating point +// instances of that opcode +#define CREATE_FOR_FLOAT_TYPES(func, op) func(op, float) func(op, double) + +// Returns the first type used when expanding to floating point types +// (needed for use of GetOpcodeForTypeFloatTypes) +#define GET_FIRST_FLOAT_TYPES(op) (op##_float) + +//------ Bytecode Instruction Definitions ------// + +CREATE_FOR_ALL_TYPES(HANDLE_TYPED_INST, add) +CREATE_FOR_ALL_TYPES(HANDLE_TYPED_INST, sub) +CREATE_FOR_ALL_TYPES(HANDLE_TYPED_INST, mul) +CREATE_FOR_ALL_TYPES(HANDLE_TYPED_INST, + div) // division for unsigned integer and floating point +CREATE_FOR_INT_TYPES(HANDLE_TYPED_INST, sdiv) // division for signed integer +CREATE_FOR_INT_TYPES(HANDLE_TYPED_INST, urem) // remainder for unsigned integer +CREATE_FOR_FLOAT_TYPES(HANDLE_TYPED_INST, frem) // remainder for floating point +CREATE_FOR_INT_TYPES(HANDLE_TYPED_INST, srem) // remainder for signed integer +CREATE_FOR_INT_TYPES(HANDLE_TYPED_INST, shl) +CREATE_FOR_INT_TYPES(HANDLE_TYPED_INST, lshr) +CREATE_FOR_INT_TYPES(HANDLE_TYPED_INST, ashr) +CREATE_FOR_INT_TYPES(HANDLE_TYPED_INST, and) +CREATE_FOR_INT_TYPES(HANDLE_TYPED_INST, or) +CREATE_FOR_INT_TYPES(HANDLE_TYPED_INST, xor) + +HANDLE_INST(extractvalue) +CREATE_FOR_INT_TYPES(HANDLE_TYPED_INST, load) +CREATE_FOR_ALL_TYPES(HANDLE_TYPED_INST, store) +CREATE_FOR_INT_TYPES(HANDLE_TYPED_INST, alloca_array) +HANDLE_INST(alloca) + +CREATE_FOR_ALL_TYPES(HANDLE_TYPED_INST, + cmp_eq) // compare for unsigned integer and floating point +CREATE_FOR_ALL_TYPES(HANDLE_TYPED_INST, + cmp_ne) // compare for unsigned integer and floating point +CREATE_FOR_ALL_TYPES(HANDLE_TYPED_INST, + cmp_gt) // compare for unsigned integer and floating point +CREATE_FOR_ALL_TYPES(HANDLE_TYPED_INST, + cmp_lt) // compare for unsigned integer and floating point +CREATE_FOR_ALL_TYPES(HANDLE_TYPED_INST, + cmp_ge) // compare for unsigned integer and floating point +CREATE_FOR_ALL_TYPES(HANDLE_TYPED_INST, + cmp_le) // compare for unsigned integer and floating point +CREATE_FOR_INT_TYPES(HANDLE_TYPED_INST, cmp_sgt) // compare for signed integer +CREATE_FOR_INT_TYPES(HANDLE_TYPED_INST, cmp_slt) // compare for signed integer +CREATE_FOR_INT_TYPES(HANDLE_TYPED_INST, cmp_sge) // compare for signed integer +CREATE_FOR_INT_TYPES(HANDLE_TYPED_INST, cmp_sle) // compare for signed integer + +HANDLE_INST(sext_i8_i16) // there is no handy way to expand this relationship +HANDLE_INST(sext_i8_i32) +HANDLE_INST(sext_i8_i64) +HANDLE_INST(sext_i16_i32) +HANDLE_INST(sext_i16_i64) +HANDLE_INST(sext_i32_i64) +HANDLE_INST(zext_i8_i16) +HANDLE_INST(zext_i8_i32) +HANDLE_INST(zext_i8_i64) +HANDLE_INST(zext_i16_i32) +HANDLE_INST(zext_i16_i64) +HANDLE_INST(zext_i32_i64) + +CREATE_FOR_INT_TYPES(HANDLE_TYPED_INST, + doubletosi) // we can only expand in one dimension, so we + // expand the integer dimension and write down + // all floating point instances manually +CREATE_FOR_INT_TYPES(HANDLE_TYPED_INST, doubletoui) +CREATE_FOR_INT_TYPES(HANDLE_TYPED_INST, sitodouble) +CREATE_FOR_INT_TYPES(HANDLE_TYPED_INST, uitodouble) +CREATE_FOR_INT_TYPES(HANDLE_TYPED_INST, floattosi) +CREATE_FOR_INT_TYPES(HANDLE_TYPED_INST, floattoui) +CREATE_FOR_INT_TYPES(HANDLE_TYPED_INST, sitofloat) +CREATE_FOR_INT_TYPES(HANDLE_TYPED_INST, uitofloat) +HANDLE_INST(doubletofloat) +HANDLE_INST(floattodouble) + +HANDLE_INST(gep_offset) // struct access of GEP instruction (accumulated) +CREATE_FOR_INT_TYPES(HANDLE_TYPED_INST, + gep_array) // array access of GEP instruction (inplace) +HANDLE_INST(phi_mov) +HANDLE_INST(nop_mov) +HANDLE_SELECT_INST(select) +HANDLE_EXTERNAL_CALL_INST(call_external) // external function call +HANDLE_INTERNAL_CALL_INST(call_internal) // internal function call + +HANDLE_RET_INST(ret) +HANDLE_INST(branch_uncond) +HANDLE_INST(branch_cond) +HANDLE_INST(branch_cond_ft) // conditional branch with fall through + +HANDLE_INST(llvm_memcpy) +HANDLE_INST(llvm_memmove) +HANDLE_INST(llvm_memset) + +CREATE_FOR_INT_TYPES(HANDLE_OVERFLOW_TYPED_INST, llvm_uadd_overflow) +CREATE_FOR_INT_TYPES(HANDLE_OVERFLOW_TYPED_INST, llvm_sadd_overflow) +CREATE_FOR_INT_TYPES(HANDLE_OVERFLOW_TYPED_INST, llvm_usub_overflow) +CREATE_FOR_INT_TYPES(HANDLE_OVERFLOW_TYPED_INST, llvm_ssub_overflow) +CREATE_FOR_INT_TYPES(HANDLE_OVERFLOW_TYPED_INST, llvm_umul_overflow) +CREATE_FOR_INT_TYPES(HANDLE_OVERFLOW_TYPED_INST, llvm_smul_overflow) + +HANDLE_INST(llvm_sse42_crc32) + +//------ Explicit Call Instructions ------// +// +// Usually external functions are called using libffi. +// However, for often used functions, an explicit bytecode instruction for that +// specific function call can be created. For every function listed below, +// a bytecode instruction _and_ a matching handler function are generated +// automatically using heavy template-macro-magic. +// +// Further functions can easily be added here. The regarding headers have to be +// added in these files manually: bytecode_interpreter.h, bytecode_function.cpp + +HANDLE_EXPLICIT_CALL_INST( + peloton_transactionruntime_performvectorizedread, + peloton::codegen::TransactionRuntime::PerformVectorizedRead) + +HANDLE_EXPLICIT_CALL_INST(peloton_oahashtable_init, + peloton::codegen::util::OAHashTable::Init) +HANDLE_EXPLICIT_CALL_INST(peloton_oahashtable_storetuple, + peloton::codegen::util::OAHashTable::StoreTuple) +HANDLE_EXPLICIT_CALL_INST(peloton_oahashtable_destroy, + peloton::codegen::util::OAHashTable::Destroy) + +HANDLE_EXPLICIT_CALL_INST(peloton_deleter_init, peloton::codegen::Deleter::Init) +HANDLE_EXPLICIT_CALL_INST(peloton_deleter_delete, + peloton::codegen::Deleter::Delete) + +HANDLE_EXPLICIT_CALL_INST(peloton_updater_init, peloton::codegen::Updater::Init) +HANDLE_EXPLICIT_CALL_INST(peloton_updater_prepare, + peloton::codegen::Updater::Prepare) +HANDLE_EXPLICIT_CALL_INST(peloton_updater_preparepk, + peloton::codegen::Updater::PreparePK) +HANDLE_EXPLICIT_CALL_INST(peloton_updater_getpool, + peloton::codegen::Updater::GetPool) +HANDLE_EXPLICIT_CALL_INST(peloton_updater_update, + peloton::codegen::Updater::Update) +HANDLE_EXPLICIT_CALL_INST(peloton_updater_updatepk, + peloton::codegen::Updater::UpdatePK) +HANDLE_EXPLICIT_CALL_INST(peloton_updater_teardown, + peloton::codegen::Updater::TearDown) + +HANDLE_EXPLICIT_CALL_INST(peloton_inserter_init, + peloton::codegen::Inserter::Init) +HANDLE_EXPLICIT_CALL_INST(peloton_inserter_allocatetuplestorage, + peloton::codegen::Inserter::AllocateTupleStorage) +HANDLE_EXPLICIT_CALL_INST(peloton_inserter_getpool, + peloton::codegen::Inserter::GetPool) +HANDLE_EXPLICIT_CALL_INST(peloton_inserter_insert, + peloton::codegen::Inserter::Insert) +HANDLE_EXPLICIT_CALL_INST(peloton_inserter_teardown, + peloton::codegen::Inserter::TearDown) + +HANDLE_EXPLICIT_CALL_INST(peloton_sorter_init, + peloton::codegen::util::Sorter::Init) +HANDLE_EXPLICIT_CALL_INST(peloton_sorter_storeinputtuple, + peloton::codegen::util::Sorter::StoreInputTuple) +HANDLE_EXPLICIT_CALL_INST(peloton_sorter_sort, + peloton::codegen::util::Sorter::Sort) +HANDLE_EXPLICIT_CALL_INST(peloton_sorter_sortparallel, + peloton::codegen::util::Sorter::SortParallel) +HANDLE_EXPLICIT_CALL_INST(peloton_sorter_destroy, + peloton::codegen::util::Sorter::Destroy) + +HANDLE_EXPLICIT_CALL_INST(peloton_zonemap_shouldscantilegroup, + peloton::storage::ZoneMapManager::ShouldScanTileGroup) +HANDLE_EXPLICIT_CALL_INST(peloton_zonemap_getinstance, + peloton::storage::ZoneMapManager::GetInstance) + +HANDLE_EXPLICIT_CALL_INST(peloton_valuesruntime_outputboolean, + peloton::codegen::ValuesRuntime::OutputBoolean) +HANDLE_EXPLICIT_CALL_INST(peloton_valuesruntime_outputtinyint, + peloton::codegen::ValuesRuntime::OutputTinyInt) +HANDLE_EXPLICIT_CALL_INST(peloton_valuesruntime_outputsmallint, + peloton::codegen::ValuesRuntime::OutputSmallInt) +HANDLE_EXPLICIT_CALL_INST(peloton_valuesruntime_outputinteger, + peloton::codegen::ValuesRuntime::OutputInteger) +HANDLE_EXPLICIT_CALL_INST(peloton_valuesruntime_outputbigint, + peloton::codegen::ValuesRuntime::OutputBigInt) +HANDLE_EXPLICIT_CALL_INST(peloton_valuesruntime_outputdate, + peloton::codegen::ValuesRuntime::OutputDate) +HANDLE_EXPLICIT_CALL_INST(peloton_valuesruntime_outputtimestamp, + peloton::codegen::ValuesRuntime::OutputTimestamp) +HANDLE_EXPLICIT_CALL_INST(peloton_valuesruntime_outputdecimal, + peloton::codegen::ValuesRuntime::OutputDecimal) +HANDLE_EXPLICIT_CALL_INST(peloton_valuesruntime_outputvarchar, + peloton::codegen::ValuesRuntime::OutputVarchar) +HANDLE_EXPLICIT_CALL_INST(peloton_valuesruntime_outputvarbinary, + peloton::codegen::ValuesRuntime::OutputVarbinary) + +HANDLE_EXPLICIT_CALL_INST(peloton_executorcontext_gettransaction, + peloton::executor::ExecutorContext::GetTransaction) + +HANDLE_EXPLICIT_CALL_INST(peloton_stringfunctions_ascii, + peloton::function::StringFunctions::Ascii) +HANDLE_EXPLICIT_CALL_INST(peloton_stringfunctions_like, + peloton::function::StringFunctions::Like) +HANDLE_EXPLICIT_CALL_INST(peloton_stringfunctions_length, + peloton::function::StringFunctions::Length) +HANDLE_EXPLICIT_CALL_INST(peloton_stringfunctions_btrim, + peloton::function::StringFunctions::BTrim) +HANDLE_EXPLICIT_CALL_INST(peloton_stringfunctions_trim, + peloton::function::StringFunctions::Trim) +HANDLE_EXPLICIT_CALL_INST(peloton_stringfunctions_ltrim, + peloton::function::StringFunctions::LTrim) +HANDLE_EXPLICIT_CALL_INST(peloton_stringfunctions_rtrim, + peloton::function::StringFunctions::RTrim) +HANDLE_EXPLICIT_CALL_INST(peloton_stringfunctions_substr, + peloton::function::StringFunctions::Substr) +HANDLE_EXPLICIT_CALL_INST(peloton_stringfunctions_repeat, + peloton::function::StringFunctions::Repeat) +HANDLE_EXPLICIT_CALL_INST(peloton_stringfunctions_comparestrings, + peloton::function::StringFunctions::CompareStrings) + +HANDLE_EXPLICIT_CALL_INST(peloton_buffer_init, + peloton::codegen::util::Buffer::Init) +HANDLE_EXPLICIT_CALL_INST(peloton_buffer_append, + peloton::codegen::util::Buffer::Append) +HANDLE_EXPLICIT_CALL_INST(peloton_buffer_reset, + peloton::codegen::util::Buffer::Reset) +HANDLE_EXPLICIT_CALL_INST(peloton_buffer_destroy, + peloton::codegen::util::Buffer::Destroy) + +HANDLE_EXPLICIT_CALL_INST(peloton_numericfunctions_abs, + peloton::function::NumericFunctions::Abs) +HANDLE_EXPLICIT_CALL_INST(peloton_numericfunctions_floor, + peloton::function::NumericFunctions::Floor) +HANDLE_EXPLICIT_CALL_INST(peloton_numericfunctions_round, + peloton::function::NumericFunctions::Round) +HANDLE_EXPLICIT_CALL_INST(peloton_numericfunctions_ceil, + peloton::function::NumericFunctions::Ceil) + +HANDLE_EXPLICIT_CALL_INST(peloton_bloomfilteraccessor_init, + peloton::codegen::BloomFilterAccessor::Init) +HANDLE_EXPLICIT_CALL_INST(peloton_bloomfilteraccessor_destroy, + peloton::codegen::BloomFilterAccessor::Destroy) + +HANDLE_EXPLICIT_CALL_INST(peloton_bloomfilter_init, + peloton::codegen::util::BloomFilter::Init) +HANDLE_EXPLICIT_CALL_INST(peloton_bloomfilter_destroy, + peloton::codegen::util::BloomFilter::Destroy) + +HANDLE_EXPLICIT_CALL_INST(peloton_datatable_gettilegroupcount, + peloton::storage::DataTable::GetTileGroupCount) + +HANDLE_EXPLICIT_CALL_INST(peloton_datefunctions_now, + peloton::function::DateFunctions::Now) + +HANDLE_EXPLICIT_CALL_INST(peloton_hashtable_init, + peloton::codegen::util::HashTable::Init) +HANDLE_EXPLICIT_CALL_INST(peloton_hashtable_insert, + peloton::codegen::util::HashTable::Insert) +HANDLE_EXPLICIT_CALL_INST(peloton_hashtable_insertlazy, + peloton::codegen::util::HashTable::InsertLazy) +HANDLE_EXPLICIT_CALL_INST(peloton_hashtable_buildlazy, + peloton::codegen::util::HashTable::BuildLazy) +HANDLE_EXPLICIT_CALL_INST(peloton_hashtable_reservelazy, + peloton::codegen::util::HashTable::ReserveLazy) +HANDLE_EXPLICIT_CALL_INST(peloton_hashtable_mergelazyunfinished, + peloton::codegen::util::HashTable::MergeLazyUnfinished) +HANDLE_EXPLICIT_CALL_INST(peloton_hashtable_destroy, + peloton::codegen::util::HashTable::Destroy) + +HANDLE_EXPLICIT_CALL_INST(peloton_storagemanager_gettablewithoid, + peloton::storage::StorageManager::GetTableWithOid) + +HANDLE_EXPLICIT_CALL_INST(peloton_tilegroup_getnexttupleslot, + peloton::storage::TileGroup::GetNextTupleSlot) +HANDLE_EXPLICIT_CALL_INST(peloton_tilegroup_gettilegroupid, + peloton::storage::TileGroup::GetTileGroupId) + +HANDLE_EXPLICIT_CALL_INST(peloton_timestampfunctions_datetrunc, + peloton::function::TimestampFunctions::DateTrunc) +HANDLE_EXPLICIT_CALL_INST(peloton_timestampfunctions_datepart, + peloton::function::TimestampFunctions::DatePart) + +HANDLE_EXPLICIT_CALL_INST(peloton_queryparameters_getboolean, + peloton::codegen::QueryParameters::GetBoolean) +HANDLE_EXPLICIT_CALL_INST(peloton_queryparameters_gettinyint, + peloton::codegen::QueryParameters::GetTinyInt) +HANDLE_EXPLICIT_CALL_INST(peloton_queryparameters_getsmallint, + peloton::codegen::QueryParameters::GetSmallInt) +HANDLE_EXPLICIT_CALL_INST(peloton_queryparameters_getinteger, + peloton::codegen::QueryParameters::GetInteger) +HANDLE_EXPLICIT_CALL_INST(peloton_queryparameters_getbigint, + peloton::codegen::QueryParameters::GetBigInt) +HANDLE_EXPLICIT_CALL_INST(peloton_queryparameters_getdouble, + peloton::codegen::QueryParameters::GetDouble) +HANDLE_EXPLICIT_CALL_INST(peloton_queryparameters_getdate, + peloton::codegen::QueryParameters::GetDate) +HANDLE_EXPLICIT_CALL_INST(peloton_queryparameters_gettimestamp, + peloton::codegen::QueryParameters::GetTimestamp) +HANDLE_EXPLICIT_CALL_INST(peloton_queryparameters_getvarcharval, + peloton::codegen::QueryParameters::GetVarcharVal) +HANDLE_EXPLICIT_CALL_INST(peloton_queryparameters_getvarcharlen, + peloton::codegen::QueryParameters::GetVarcharLen) +HANDLE_EXPLICIT_CALL_INST(peloton_queryparameters_getvarbinaryval, + peloton::codegen::QueryParameters::GetVarbinaryVal) +HANDLE_EXPLICIT_CALL_INST(peloton_queryparameters_getvarbinarylen, + peloton::codegen::QueryParameters::GetVarbinaryLen) +HANDLE_EXPLICIT_CALL_INST(peloton_queryparameters_isnull, + peloton::codegen::QueryParameters::IsNull) + +HANDLE_EXPLICIT_CALL_INST(peloton_runtimefunctions_hashcrc64, + peloton::codegen::RuntimeFunctions::HashCrc64) +HANDLE_EXPLICIT_CALL_INST(peloton_runtimefunctions_gettilegroup, + peloton::codegen::RuntimeFunctions::GetTileGroup) +HANDLE_EXPLICIT_CALL_INST( + peloton_runtimefunctions_gettilegrouplayout, + peloton::codegen::RuntimeFunctions::GetTileGroupLayout) +HANDLE_EXPLICIT_CALL_INST( + peloton_runtimefunctions_fillpredicatearray, + peloton::codegen::RuntimeFunctions::FillPredicateArray) +HANDLE_EXPLICIT_CALL_INST( + peloton_runtimefunctions_throwdividebyzeroexception, + peloton::codegen::RuntimeFunctions::ThrowDivideByZeroException) +HANDLE_EXPLICIT_CALL_INST( + peloton_runtimefunctions_throwoverflowexception, + peloton::codegen::RuntimeFunctions::ThrowOverflowException) + +HANDLE_EXPLICIT_CALL_INST(peloton_bufferingconsumer_buffertuple, + peloton::codegen::BufferingConsumer::BufferTuple) + +// undefine all handlers +#undef HANDLE_INST +#undef HANDLE_TYPED_INST +#undef HANDLE_OVERFLOW_TYPED_INST +#undef HANDLE_SELECT_INST +#undef HANDLE_RET_INST +#undef HANDLE_EXTERNAL_CALL_INST +#undef HANDLE_INTERNAL_CALL_INST +#undef HANDLE_EXPLICIT_CALL_INST \ No newline at end of file diff --git a/src/include/codegen/interpreter/bytecode_interpreter.h b/src/include/codegen/interpreter/bytecode_interpreter.h new file mode 100644 index 00000000000..16193efbc5c --- /dev/null +++ b/src/include/codegen/interpreter/bytecode_interpreter.h @@ -0,0 +1,1260 @@ +//===----------------------------------------------------------------------===// +// +// Peloton +// +// bytecode_interpreter.h +// +// Identification: src/include/codegen/interpreter/bytecode_interpreter.h +// +// Copyright (c) 2015-2018, Carnegie Mellon University Database Group +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include "codegen/interpreter/bytecode_function.h" + +#include + +#include "codegen/query.h" +#include "common/exception.h" +#include "common/overflow_builtins.h" + +// Includes for explicit function calls +#include "codegen/bloom_filter_accessor.h" +#include "codegen/util/bloom_filter.h" +#include "codegen/buffering_consumer.h" +#include "codegen/deleter.h" +#include "codegen/inserter.h" +#include "codegen/query_parameters.h" +#include "codegen/runtime_functions.h" +#include "codegen/transaction_runtime.h" +#include "codegen/updater.h" +#include "codegen/util/oa_hash_table.h" +#include "codegen/util/hash_table.h" +#include "codegen/util/sorter.h" +#include "codegen/values_runtime.h" +#include "executor/executor_context.h" +#include "function/date_functions.h" +#include "function/numeric_functions.h" +#include "function/string_functions.h" +#include "function/timestamp_functions.h" +#include "planner/project_info.h" +#include "storage/data_table.h" +#include "storage/storage_manager.h" +#include "storage/tile_group.h" +#include "storage/zone_map_manager.h" +#include "codegen/util/buffer.h" + +namespace peloton { +namespace codegen { +namespace interpreter { + +/** + * Holds the runtime information for a external funtion call. Because libffi + * requires pointers to the actual value slots, this information is different + * for every function activation, and can not be stored in the bytecode + * function. + */ +struct CallActivation { + ffi_cif call_interface; + std::vector value_pointers; + value_t *return_pointer; +}; + +//----------------------------------------------------------------------------// +// Template Helper Functions // +//----------------------------------------------------------------------------// + +/** + * The seq-types allow to create a template sequence of integers, e.g. for + * indexed access. (std::integer_sequence is only available in C++14) + */ +template +struct seq { + using type = seq; +}; +template +struct concat; +template +struct concat, seq> : seq {}; + +template +struct gen_seq; +template +struct gen_seq : concat::type, + typename gen_seq::type>::type {}; +template <> +struct gen_seq<0> : seq<> {}; +template <> +struct gen_seq<1> : seq<0> {}; + +/** + * This function converts references to pointers to make value handling + * possible. The function is tagged with the a bool type that indicates + * whether the type is a reference. + * Non-reference types are returned without changes. + */ +template +static ALWAYS_INLINE inline constexpr + typename std::remove_pointer::type & + ConvertPointerToReference(type_t source, + UNUSED_ATTRIBUTE std::true_type is_reference) { + return *source; +}; + +template +static ALWAYS_INLINE inline constexpr type_t ConvertPointerToReference( + type_t source, UNUSED_ATTRIBUTE std::false_type not_a_reference) { + return source; +}; + +class BytecodeInterpreter { + public: + /** + * Executes a translated function with the interpreter + * @param bytecode_function bytecode function that shall be executed + * @param arguments vector of function arguments (stored as value_t). The + * number of arguments must match the number expected by the executed + * function. + * @return return Value of the LLVM function or undefined if void. + */ + static value_t ExecuteFunction(const BytecodeFunction &bytecode_function, + const std::vector &arguments); + /** + * Executes a translated function with the interpreter + * (Wrapper for usage with a single char* argument) + * @param bytecode_function bytecode function that shall be executed, must + * expect one argument. + * @param arguments Char pointer argument of the function. + */ + static void ExecuteFunction(const BytecodeFunction &bytecode_function, + char *param); + + private: + explicit BytecodeInterpreter(const BytecodeFunction &bytecode_function); + + /** + * Executes a function with the given arguments. The return value can + * afterwards retrieved with GetReturnValue(). This function is also called + * for internal function calls during execution. + * @param arguments Vector of function arguments (stored as value_t). The + * number of arguments must match the number expected by the executed + * function. + */ + void ExecuteFunction(const std::vector &arguments); + + /** + * Initializes the activation record by allocating the value slots, placing + * function arguments and constants and preparing call contexts. + * @param arguments Vector of function arguments (stored as value_t). The + * number of arguments must match the number expected by the executed + * function. + */ + void InitializeActivationRecord(const std::vector &arguments); + + /** + * Returns the function return value _after_ execution. + * @tparam type_t Expected return type. + * @return Return value of executed function or undefined if void. + */ + template + type_t GetReturnValue(); + + /** + * Get the current value of a value slot. + * @tparam type_t requested type + * @param index value slot index + * @return value as requested type + */ + template + ALWAYS_INLINE inline type_t GetValue(const index_t index) { + using type_noref_t = typename std::conditional< + std::is_reference::value, + typename std::remove_reference::type *, type_t>::type; + static_assert(sizeof(type_noref_t) <= sizeof(value_t), + "The interpreter can only handle values that fit in 8 bytes"); + + PELOTON_ASSERT(index >= 0 && index < bytecode_function_.number_values_); + return ConvertPointerToReference( + *reinterpret_cast(&values_[index]), + std::is_reference()); + } + + /** + * Get the reference to a value slot. Usually SetValue() should be used + * to set the values, but some use cases require pointers/references to the + * slots. + * @tparam type_t requested type + * @param index value slot index + * @return typed reference to the requested slot + */ + template + ALWAYS_INLINE inline type_t &GetValueReference(const index_t index) { + PELOTON_ASSERT(index >= 0 && index < bytecode_function_.number_values_); + return reinterpret_cast(values_[index]); + } + + /** + * Set the current value of a slot + * @tparam type_t requested type + * @param index value slot index + * @param value value of type type_t, that shall be set + */ + template + ALWAYS_INLINE inline void SetValue(const index_t index, const type_t value) { + using type_noref_t = typename std::conditional< + std::is_reference::value, + typename std::remove_reference::type *, type_t>::type; + + PELOTON_ASSERT(index >= 0 && index < bytecode_function_.number_values_); + *reinterpret_cast(&values_[index]) = value; + + DumpValue(index); + } + + /** + * Advance the instruction pointer by a compile-time value. + * @tparam number_instruction_slots size of current instruction + * @param instruction current instruction pointer + * @return new instruction pointer + */ + template + ALWAYS_INLINE inline const Instruction *AdvanceIP( + const Instruction *instruction) { + auto next = reinterpret_cast( + const_cast( + reinterpret_cast(instruction)) + + number_instruction_slots); + return next; + } + + /** + * Advance the instruction pointer by a run-time value. + * @tparam number_instruction_slots size of current instruction + * @param instruction current instruction pointer + * @return new instruction pointer + */ + ALWAYS_INLINE inline const Instruction *AdvanceIP( + const Instruction *instruction, size_t number_instruction_slots) { + auto next = reinterpret_cast( + const_cast( + reinterpret_cast(instruction)) + + number_instruction_slots); + return next; + } + + /** + * Allocate memory and return a pointer to it. (Memory is managed and gets + * freed after the interpreter exits) + * @param number_bytes number of bytes to allocate + * @return pointer to the allocated memory + */ + uintptr_t AllocateMemory(size_t number_bytes); + +/** + * Dump the value of the given as value slot for debug purposes. + * If LOG_TRACE is not enabled, this function compiles to a stub. + * @param index value index of value slot to dump + */ +#ifdef LOG_TRACE_ENABLED + template + void DumpValue(const index_t index) { + std::ostringstream output; + output << " [" << std::dec << std::setw(3) << index + << "] <= " << GetValue>(index) << "/0x" + << std::hex << GetValue>(index); + LOG_TRACE("%s", output.str().c_str()); + } +#else + template + void DumpValue(UNUSED_ATTRIBUTE const index_t index) {} +#endif + + //--------------------------------------------------------------------------// + // Instruction Handlers + // + // - The following functions are the instruction handlers for the bytecode + // instructions defined in bytecode_instructions.def . + // - The signatures of those functions are not code style conform, as they are + // generated from the opcode mnemonic + // - If the instruction is marked as a typed instruction in the .def file, + // it has a templated handler. Some handlers only support floating point or + // integer types, some both. Static asserts ensure this. + // - Because all the handlers will get inlined in the dispatch area, their + // definition must be in this header file. + //--------------------------------------------------------------------------// + + template + ALWAYS_INLINE inline const Instruction *addHandler( + const Instruction *instruction) { + SetValue(instruction->args[0], + (GetValue(instruction->args[1]) + + GetValue(instruction->args[2]))); + return AdvanceIP<1>(instruction); + } + + template + ALWAYS_INLINE inline const Instruction *subHandler( + const Instruction *instruction) { + SetValue(instruction->args[0], + (GetValue(instruction->args[1]) - + GetValue(instruction->args[2]))); + return AdvanceIP<1>(instruction); + } + + template + ALWAYS_INLINE inline const Instruction *mulHandler( + const Instruction *instruction) { + SetValue(instruction->args[0], + (GetValue(instruction->args[1]) * + GetValue(instruction->args[2]))); + return AdvanceIP<1>(instruction); + } + + template + ALWAYS_INLINE inline const Instruction *divHandler( + const Instruction *instruction) { + SetValue(instruction->args[0], + (GetValue(instruction->args[1]) / + GetValue(instruction->args[2]))); + return AdvanceIP<1>(instruction); + } + + template + ALWAYS_INLINE inline const Instruction *sdivHandler( + const Instruction *instruction) { + static_assert(std::is_integral::value, + "__func__ must only be used with integer types"); + using type_signed_t = typename std::make_signed::type; + SetValue(instruction->args[0], + (GetValue(instruction->args[1]) / + GetValue(instruction->args[2]))); + return AdvanceIP<1>(instruction); + } + + template + ALWAYS_INLINE inline const Instruction *uremHandler( + const Instruction *instruction) { + static_assert(std::is_integral::value, + "__func__ must only be used with integer types"); + SetValue(instruction->args[0], + (GetValue(instruction->args[1]) % + GetValue(instruction->args[2]))); + return AdvanceIP<1>(instruction); + } + + template + ALWAYS_INLINE inline const Instruction *fremHandler( + const Instruction *instruction) { + static_assert(std::is_floating_point::value, + "__func__ must only be used with floating point types"); + SetValue(instruction->args[0], + (std::fmod(GetValue(instruction->args[1]), + GetValue(instruction->args[2])))); + return AdvanceIP<1>(instruction); + } + + template + ALWAYS_INLINE inline const Instruction *sremHandler( + const Instruction *instruction) { + static_assert(std::is_integral::value, + "__func__ must only be used with integer types"); + using type_signed_t = typename std::make_signed::type; + SetValue(instruction->args[0], + (GetValue(instruction->args[1]) % + GetValue(instruction->args[2]))); + return AdvanceIP<1>(instruction); + } + + template + ALWAYS_INLINE inline const Instruction *shlHandler( + const Instruction *instruction) { + static_assert(std::is_integral::value, + "__func__ must only be used with integer types"); + SetValue(instruction->args[0], + (GetValue(instruction->args[1]) + << GetValue(instruction->args[2]))); + return AdvanceIP<1>(instruction); + } + + template + ALWAYS_INLINE inline const Instruction *lshrHandler( + const Instruction *instruction) { + static_assert(std::is_integral::value, + "__func__ must only be used with integer types"); + SetValue(instruction->args[0], + (GetValue(instruction->args[1]) >> + GetValue(instruction->args[2]))); + return AdvanceIP<1>(instruction); + } + + template + ALWAYS_INLINE inline const Instruction *ashrHandler( + const Instruction *instruction) { + static_assert(std::is_integral::value, + "__func__ must only be used with integer types"); + using type_signed_t = typename std::make_signed::type; + SetValue(instruction->args[0], + (GetValue(instruction->args[1]) >> + GetValue(instruction->args[2]))); + return AdvanceIP<1>(instruction); + } + + template + ALWAYS_INLINE inline const Instruction *andHandler( + const Instruction *instruction) { + static_assert(std::is_integral::value, + "__func__ must only be used with integer types"); + SetValue(instruction->args[0], + (GetValue(instruction->args[1]) & + GetValue(instruction->args[2]))); + return AdvanceIP<1>(instruction); + } + + template + ALWAYS_INLINE inline const Instruction *orHandler( + const Instruction *instruction) { + static_assert(std::is_integral::value, + "__func__ must only be used with integer types"); + SetValue(instruction->args[0], + (GetValue(instruction->args[1]) | + GetValue(instruction->args[2]))); + return AdvanceIP<1>(instruction); + } + + template + ALWAYS_INLINE inline const Instruction *xorHandler( + const Instruction *instruction) { + static_assert(std::is_integral::value, + "__func__ must only be used with integer types"); + SetValue(instruction->args[0], + (GetValue(instruction->args[1]) ^ + GetValue(instruction->args[2]))); + return AdvanceIP<1>(instruction); + } + + ALWAYS_INLINE inline const Instruction *extractvalueHandler( + const Instruction *instruction) { + SetValue( + instruction->args[0], + (GetValue(instruction->args[1]) >> instruction->args[2])); + return AdvanceIP<1>(instruction); + } + + template + ALWAYS_INLINE inline const Instruction *loadHandler( + const Instruction *instruction) { + SetValue(instruction->args[0], + (*GetValue(instruction->args[1]))); + return AdvanceIP<1>(instruction); + } + + template + ALWAYS_INLINE inline const Instruction *storeHandler( + const Instruction *instruction) { + *GetValue(instruction->args[0]) = + GetValue(instruction->args[1]); + return AdvanceIP<1>(instruction); + } + + template + ALWAYS_INLINE inline const Instruction *alloca_arrayHandler( + const Instruction *instruction) { + size_t number_bytes = + instruction->args[1] * GetValue(instruction->args[2]); + SetValue(instruction->args[0], (AllocateMemory(number_bytes))); + return AdvanceIP<1>(instruction); + } + + ALWAYS_INLINE inline const Instruction *allocaHandler( + const Instruction *instruction) { + size_t number_bytes = instruction->args[1]; + SetValue(instruction->args[0], (AllocateMemory(number_bytes))); + return AdvanceIP<1>(instruction); + } + + template + ALWAYS_INLINE inline const Instruction *cmp_eqHandler( + const Instruction *instruction) { + SetValue( + instruction->args[0], + (static_cast(GetValue(instruction->args[1]) == + GetValue(instruction->args[2])))); + return AdvanceIP<1>(instruction); + } + + template + ALWAYS_INLINE inline const Instruction *cmp_neHandler( + const Instruction *instruction) { + SetValue( + instruction->args[0], + (static_cast(GetValue(instruction->args[1]) != + GetValue(instruction->args[2])))); + return AdvanceIP<1>(instruction); + } + + template + ALWAYS_INLINE inline const Instruction *cmp_gtHandler( + const Instruction *instruction) { + SetValue( + instruction->args[0], + (static_cast(GetValue(instruction->args[1]) > + GetValue(instruction->args[2])))); + return AdvanceIP<1>(instruction); + } + + template + ALWAYS_INLINE inline const Instruction *cmp_ltHandler( + const Instruction *instruction) { + SetValue( + instruction->args[0], + (static_cast(GetValue(instruction->args[1]) < + GetValue(instruction->args[2])))); + return AdvanceIP<1>(instruction); + } + + template + ALWAYS_INLINE inline const Instruction *cmp_geHandler( + const Instruction *instruction) { + SetValue( + instruction->args[0], + (static_cast(GetValue(instruction->args[1]) >= + GetValue(instruction->args[2])))); + return AdvanceIP<1>(instruction); + } + + template + ALWAYS_INLINE inline const Instruction *cmp_leHandler( + const Instruction *instruction) { + SetValue( + instruction->args[0], + (static_cast(GetValue(instruction->args[1]) <= + GetValue(instruction->args[2])))); + return AdvanceIP<1>(instruction); + } + + template + ALWAYS_INLINE inline const Instruction *cmp_sgtHandler( + const Instruction *instruction) { + static_assert(std::is_integral::value, + "__func__ must only be used with integer types"); + using type_signed_t = typename std::make_signed::type; + SetValue( + instruction->args[0], + (static_cast(GetValue(instruction->args[1]) > + GetValue(instruction->args[2])))); + return AdvanceIP<1>(instruction); + } + + template + ALWAYS_INLINE inline const Instruction *cmp_sltHandler( + const Instruction *instruction) { + static_assert(std::is_integral::value, + "__func__ must only be used with integer types"); + using type_signed_t = typename std::make_signed::type; + SetValue( + instruction->args[0], + (static_cast(GetValue(instruction->args[1]) < + GetValue(instruction->args[2])))); + return AdvanceIP<1>(instruction); + } + + template + ALWAYS_INLINE inline const Instruction *cmp_sgeHandler( + const Instruction *instruction) { + static_assert(std::is_integral::value, + "__func__ must only be used with integer types"); + using type_signed_t = typename std::make_signed::type; + SetValue( + instruction->args[0], + (static_cast(GetValue(instruction->args[1]) >= + GetValue(instruction->args[2])))); + return AdvanceIP<1>(instruction); + } + + template + ALWAYS_INLINE inline const Instruction *cmp_sleHandler( + const Instruction *instruction) { + static_assert(std::is_integral::value, + "__func__ must only be used with integer types"); + using type_signed_t = typename std::make_signed::type; + SetValue( + instruction->args[0], + (static_cast(GetValue(instruction->args[1]) <= + GetValue(instruction->args[2])))); + return AdvanceIP<1>(instruction); + } + + ALWAYS_INLINE inline const Instruction *sext_i8_i16Handler( + const Instruction *instruction) { + using src_t = typename std::make_signed::type; + using dest_t = typename std::make_signed::type; + SetValue( + instruction->args[0], + (static_cast(GetValue(instruction->args[1])))); + return AdvanceIP<1>(instruction); + } + + ALWAYS_INLINE inline const Instruction *sext_i8_i32Handler( + const Instruction *instruction) { + using src_t = typename std::make_signed::type; + using dest_t = typename std::make_signed::type; + SetValue( + instruction->args[0], + (static_cast(GetValue(instruction->args[1])))); + return AdvanceIP<1>(instruction); + } + + ALWAYS_INLINE inline const Instruction *sext_i8_i64Handler( + const Instruction *instruction) { + using src_t = typename std::make_signed::type; + using dest_t = typename std::make_signed::type; + SetValue( + instruction->args[0], + (static_cast(GetValue(instruction->args[1])))); + return AdvanceIP<1>(instruction); + } + + ALWAYS_INLINE inline const Instruction *sext_i16_i32Handler( + const Instruction *instruction) { + using src_t = typename std::make_signed::type; + using dest_t = typename std::make_signed::type; + SetValue( + instruction->args[0], + (static_cast(GetValue(instruction->args[1])))); + return AdvanceIP<1>(instruction); + } + + ALWAYS_INLINE inline const Instruction *sext_i16_i64Handler( + const Instruction *instruction) { + using src_t = typename std::make_signed::type; + using dest_t = typename std::make_signed::type; + SetValue( + instruction->args[0], + (static_cast(GetValue(instruction->args[1])))); + return AdvanceIP<1>(instruction); + } + + ALWAYS_INLINE inline const Instruction *sext_i32_i64Handler( + const Instruction *instruction) { + using src_t = typename std::make_signed::type; + using dest_t = typename std::make_signed::type; + SetValue( + instruction->args[0], + (static_cast(GetValue(instruction->args[1])))); + return AdvanceIP<1>(instruction); + } + + ALWAYS_INLINE inline const Instruction *zext_i8_i16Handler( + const Instruction *instruction) { + using src_t = i8; + using dest_t = i16; + SetValue( + instruction->args[0], + (static_cast(GetValue(instruction->args[1])))); + return AdvanceIP<1>(instruction); + } + + ALWAYS_INLINE inline const Instruction *zext_i8_i32Handler( + const Instruction *instruction) { + using src_t = i8; + using dest_t = i32; + SetValue( + instruction->args[0], + (static_cast(GetValue(instruction->args[1])))); + return AdvanceIP<1>(instruction); + } + + ALWAYS_INLINE inline const Instruction *zext_i8_i64Handler( + const Instruction *instruction) { + using src_t = i8; + using dest_t = i64; + SetValue( + instruction->args[0], + (static_cast(GetValue(instruction->args[1])))); + return AdvanceIP<1>(instruction); + } + + ALWAYS_INLINE inline const Instruction *zext_i16_i32Handler( + const Instruction *instruction) { + using src_t = i16; + using dest_t = i32; + SetValue( + instruction->args[0], + (static_cast(GetValue(instruction->args[1])))); + return AdvanceIP<1>(instruction); + } + + ALWAYS_INLINE inline const Instruction *zext_i16_i64Handler( + const Instruction *instruction) { + using src_t = i16; + using dest_t = i64; + SetValue( + instruction->args[0], + (static_cast(GetValue(instruction->args[1])))); + return AdvanceIP<1>(instruction); + } + + ALWAYS_INLINE inline const Instruction *zext_i32_i64Handler( + const Instruction *instruction) { + using src_t = i32; + using dest_t = i64; + SetValue( + instruction->args[0], + (static_cast(GetValue(instruction->args[1])))); + return AdvanceIP<1>(instruction); + } + + // The FP<>Int casts are created in a two-level hierarchy + // eg. the generated call to floattosiHandler is redirected to + // tosiHandler + + template + ALWAYS_INLINE inline const Instruction *tosiHandler( + const Instruction *instruction) { + static_assert(std::is_integral::value, + "__func__ dest_type must be an integer type"); + static_assert(std::is_floating_point::value, + "__func__ src_type must be a floating point type"); + using dest_type_signed_t = typename std::make_signed::type; + + SetValue( + instruction->args[0], (static_cast( + GetValue(instruction->args[1])))); + return AdvanceIP<1>(instruction); + } + + template + ALWAYS_INLINE inline const Instruction *touiHandler( + const Instruction *instruction) { + static_assert(std::is_integral::value, + "__func__ dest_type must be an integer type"); + static_assert(std::is_floating_point::value, + "__func__ src_type must be a floating point type"); + + SetValue( + instruction->args[0], + (static_cast(GetValue(instruction->args[1])))); + return AdvanceIP<1>(instruction); + } + + template + ALWAYS_INLINE inline const Instruction *sitoHandler( + const Instruction *instruction) { + static_assert(std::is_floating_point::value, + "__func__ dest_type must be a floating point type"); + static_assert(std::is_integral::value, + "__func__ src_type must be an integer type"); + using src_type_signed_t = typename std::make_signed::type; + + SetValue(instruction->args[0], + (static_cast(GetValue( + instruction->args[1])))); + return AdvanceIP<1>(instruction); + } + + template + ALWAYS_INLINE inline const Instruction *uitoHandler( + const Instruction *instruction) { + static_assert(std::is_floating_point::value, + "__func__ dest_type must be a floating point type"); + static_assert(std::is_integral::value, + "__func__ src_type must be an integer type"); + + SetValue( + instruction->args[0], + (static_cast(GetValue(instruction->args[1])))); + return AdvanceIP<1>(instruction); + } + + template + ALWAYS_INLINE inline const Instruction *floattosiHandler( + const Instruction *instruction) { + return tosiHandler(instruction); + } + + template + ALWAYS_INLINE inline const Instruction *floattouiHandler( + const Instruction *instruction) { + return touiHandler(instruction); + } + + template + ALWAYS_INLINE inline const Instruction *sitofloatHandler( + const Instruction *instruction) { + return sitoHandler(instruction); + } + + template + ALWAYS_INLINE inline const Instruction *uitofloatHandler( + const Instruction *instruction) { + return uitoHandler(instruction); + } + + template + ALWAYS_INLINE inline const Instruction *doubletosiHandler( + const Instruction *instruction) { + return tosiHandler(instruction); + } + + template + ALWAYS_INLINE inline const Instruction *doubletouiHandler( + const Instruction *instruction) { + return touiHandler(instruction); + } + + template + ALWAYS_INLINE inline const Instruction *sitodoubleHandler( + const Instruction *instruction) { + return sitoHandler(instruction); + } + + template + ALWAYS_INLINE inline const Instruction *uitodoubleHandler( + const Instruction *instruction) { + return uitoHandler(instruction); + } + + ALWAYS_INLINE inline const Instruction *doubletofloatHandler( + const Instruction *instruction) { + SetValue( + instruction->args[0], + (static_cast(GetValue(instruction->args[1])))); + return AdvanceIP<1>(instruction); + } + + ALWAYS_INLINE inline const Instruction *floattodoubleHandler( + const Instruction *instruction) { + SetValue( + instruction->args[0], + (static_cast(GetValue(instruction->args[1])))); + return AdvanceIP<1>(instruction); + } + + ALWAYS_INLINE inline const Instruction *gep_offsetHandler( + const Instruction *instruction) { + uintptr_t sum = GetValue(instruction->args[1]) + + static_cast(instruction->args[2]); + SetValue(instruction->args[0], (sum)); + return AdvanceIP<1>(instruction); + } + + template + ALWAYS_INLINE inline const Instruction *gep_arrayHandler( + const Instruction *instruction) { + static_assert(std::is_integral::value, + "__func__ must only be used with integer types"); + uintptr_t product = + GetValue(instruction->args[1]) * instruction->args[2]; + SetValue(instruction->args[0], + (GetValue(instruction->args[0]) + product)); + return AdvanceIP<1>(instruction); + } + + ALWAYS_INLINE inline const Instruction *phi_movHandler( + const Instruction *instruction) { + SetValue(instruction->args[0], + (GetValue(instruction->args[1]))); + return AdvanceIP<1>(instruction); + } + + ALWAYS_INLINE inline const Instruction *selectHandler( + const Instruction *instruction) { + value_t result; + if (GetValue(instruction->args[1]) > 0) + result = GetValue(instruction->args[2]); + else + result = GetValue(instruction->args[3]); + + SetValue(instruction->args[0], (result)); + return AdvanceIP<2>(instruction); // bigger slot size! + } + + ALWAYS_INLINE inline const Instruction *call_externalHandler( + const Instruction *instruction) { + const ExternalCallInstruction *call_instruction = + reinterpret_cast(instruction); + CallActivation &call_activation = + call_activations_[call_instruction->external_call_context]; + + // call external function + ffi_call(&call_activation.call_interface, call_instruction->function, + call_activation.return_pointer, + reinterpret_cast(call_activation.value_pointers.data())); + + if (bytecode_function_ + .external_call_contexts_[call_instruction->external_call_context] + .dest_type != &ffi_type_void) { + DumpValue( + bytecode_function_ + .external_call_contexts_[call_instruction->external_call_context] + .dest_slot); + } + + return AdvanceIP<2>(instruction); // bigger slot size! + } + + ALWAYS_INLINE inline const Instruction *call_internalHandler( + const Instruction *instruction) { + const InternalCallInstruction *call_instruction = + reinterpret_cast(instruction); + + std::vector arguments(call_instruction->number_args); + for (size_t i = 0; i < call_instruction->number_args; i++) { + arguments[i] = GetValue(call_instruction->args[i]); + } + + value_t result = ExecuteFunction( + bytecode_function_.sub_functions_[call_instruction->sub_function], + arguments); + SetValue(call_instruction->dest_slot, result); + + return AdvanceIP( + instruction, + bytecode_function_.GetInteralCallInstructionSlotSize(call_instruction)); + } + + ALWAYS_INLINE inline const Instruction *nop_movHandler( + const Instruction *instruction) { + SetValue(instruction->args[0], + (GetValue(instruction->args[1]))); + return AdvanceIP<1>(instruction); + } + + ALWAYS_INLINE inline const Instruction *branch_uncondHandler( + const Instruction *instruction) { + return bytecode_function_.GetIPFromIndex(instruction->args[0]); + } + + ALWAYS_INLINE inline const Instruction *branch_condHandler( + const Instruction *instruction) { + index_t next_bb; + if (GetValue(instruction->args[0]) > 0) + next_bb = instruction->args[2]; + else + next_bb = instruction->args[1]; + + return bytecode_function_.GetIPFromIndex(next_bb); + } + + ALWAYS_INLINE inline const Instruction *branch_cond_ftHandler( + const Instruction *instruction) { + const Instruction *ip; + if ((GetValue(instruction->args[0]) & 0x1) > 0) + ip = bytecode_function_.GetIPFromIndex(instruction->args[1]); + else + ip = AdvanceIP<1>(instruction); + + return ip; + } + + ALWAYS_INLINE inline const Instruction *llvm_memcpyHandler( + const Instruction *instruction) { + PELOTON_MEMCPY(GetValue(instruction->args[0]), + GetValue(instruction->args[1]), + GetValue(instruction->args[2])); + return AdvanceIP<1>(instruction); + } + + ALWAYS_INLINE inline const Instruction *llvm_memmoveHandler( + const Instruction *instruction) { + std::memmove(GetValue(instruction->args[0]), + GetValue(instruction->args[1]), + GetValue(instruction->args[2])); + return AdvanceIP<1>(instruction); + } + + ALWAYS_INLINE inline const Instruction *llvm_memsetHandler( + const Instruction *instruction) { + PELOTON_MEMSET(GetValue(instruction->args[0]), + GetValue(instruction->args[1]), + GetValue(instruction->args[2])); + return AdvanceIP<1>(instruction); + } + + template + ALWAYS_INLINE inline const Instruction *llvm_uadd_overflowHandler( + const Instruction *instruction) { + static_assert(std::is_integral::value, + "__func__ must only be used with integer types"); + bool overflow = __builtin_add_overflow( + GetValue(instruction->args[2]), + GetValue(instruction->args[3]), + &GetValueReference(instruction->args[0])); + + DumpValue(instruction->args[0]); + + SetValue(instruction->args[1], (static_cast(overflow))); + return AdvanceIP<2>(instruction); + } + + template + ALWAYS_INLINE inline const Instruction *llvm_sadd_overflowHandler( + const Instruction *instruction) { + static_assert(std::is_integral::value, + "__func__ must only be used with integer types"); + using type_signed_t = typename std::make_signed::type; + bool overflow = __builtin_add_overflow( + GetValue(instruction->args[2]), + GetValue(instruction->args[3]), + &GetValueReference(instruction->args[0])); + + DumpValue(instruction->args[0]); + + SetValue(instruction->args[1], (static_cast(overflow))); + return AdvanceIP<2>(instruction); + } + + template + ALWAYS_INLINE inline const Instruction *llvm_usub_overflowHandler( + const Instruction *instruction) { + static_assert(std::is_integral::value, + "__func__ must only be used with integer types"); + bool overflow = __builtin_sub_overflow( + GetValue(instruction->args[2]), + GetValue(instruction->args[3]), + &GetValueReference(instruction->args[0])); + + DumpValue(instruction->args[0]); + + SetValue(instruction->args[1], (static_cast(overflow))); + return AdvanceIP<2>(instruction); + } + + template + ALWAYS_INLINE inline const Instruction *llvm_ssub_overflowHandler( + const Instruction *instruction) { + static_assert(std::is_integral::value, + "__func__ must only be used with integer types"); + using type_signed_t = typename std::make_signed::type; + bool overflow = __builtin_sub_overflow( + GetValue(instruction->args[2]), + GetValue(instruction->args[3]), + &GetValueReference(instruction->args[0])); + + DumpValue(instruction->args[0]); + + SetValue(instruction->args[1], (static_cast(overflow))); + return AdvanceIP<2>(instruction); + } + + template + ALWAYS_INLINE inline const Instruction *llvm_umul_overflowHandler( + const Instruction *instruction) { + static_assert(std::is_integral::value, + "__func__ must only be used with integer types"); + bool overflow = __builtin_mul_overflow( + GetValue(instruction->args[2]), + GetValue(instruction->args[3]), + &GetValueReference(instruction->args[0])); + + DumpValue(instruction->args[0]); + + SetValue(instruction->args[1], (static_cast(overflow))); + return AdvanceIP<2>(instruction); + } + + template + ALWAYS_INLINE inline const Instruction *llvm_smul_overflowHandler( + const Instruction *instruction) { + static_assert(std::is_integral::value, + "__func__ must only be used with integer types"); + using type_signed_t = typename std::make_signed::type; + bool overflow = __builtin_mul_overflow( + GetValue(instruction->args[2]), + GetValue(instruction->args[3]), + &GetValueReference(instruction->args[0])); + + DumpValue(instruction->args[0]); + + SetValue(instruction->args[1], (static_cast(overflow))); + return AdvanceIP<2>(instruction); + } + + ALWAYS_INLINE inline const Instruction *llvm_sse42_crc32Handler( + const Instruction *instruction) { + SetValue( + instruction->args[0], + (__builtin_ia32_crc32di(GetValue(instruction->args[1]), + GetValue(instruction->args[2])))); + return AdvanceIP<1>(instruction); + } + + // The handlers for explicit calls are generated using templates. + // + // The call arrives in explicit_callHandler(...), which is overloaded for + // 1. static functions + // 2. class methods and + // 3. const class methods + // and is then forwarded to explicit_call_wrapperHandler(...) which is tagged + // by a bool type, whether the called function returns void or not, which + // makes it 6 instances of that function. + + // 1. static function + template + ALWAYS_INLINE inline const Instruction *explicit_callHandler( + const Instruction *instruction, return_type (*func)(arg_types...)) { + // forward call depending on whether func returns void or not + return explicit_call_wrapperHandler(instruction, func, + gen_seq(), + std::is_void()); + } + + // 2. class method + template + ALWAYS_INLINE inline const Instruction *explicit_callHandler( + const Instruction *instruction, + return_type (class_type::*func)(arg_types...)) { + // forward call depending on whether func returns void or not + return explicit_call_wrapperHandler(instruction, func, + gen_seq(), + std::is_void()); + } + + // 3. const class method + template + ALWAYS_INLINE inline const Instruction *explicit_callHandler( + const Instruction *instruction, + return_type (class_type::*func)(arg_types...) const) { + // forward call depending on whether func returns void or not + return explicit_call_wrapperHandler(instruction, func, + gen_seq(), + std::is_void()); + } + + // 1. static function a) returns void + template + ALWAYS_INLINE inline const Instruction *explicit_call_wrapperHandler( + const Instruction *instruction, + UNUSED_ATTRIBUTE return_type (*func)(arg_types...), + const seq &, UNUSED_ATTRIBUTE std::false_type returns_void) { + // call the actual function + auto ret = func(GetValue(instruction->args[indexes + 1])...); + SetValue(instruction->args[0], ret); + + return AdvanceIP(instruction); + } + + // 1. static function b) returns non-void + template + ALWAYS_INLINE inline const Instruction *explicit_call_wrapperHandler( + const Instruction *instruction, + UNUSED_ATTRIBUTE return_type (*func)(arg_types...), + const seq &, + UNUSED_ATTRIBUTE std::true_type returns_not_void) { + // call the actual function + func(GetValue(instruction->args[indexes])...); + + return AdvanceIP(instruction); + } + + // 2. class method a) returns void + template + ALWAYS_INLINE inline const Instruction *explicit_call_wrapperHandler( + const Instruction *instruction, + UNUSED_ATTRIBUTE return_type (class_type::*func)(arg_types...), + const seq &, UNUSED_ATTRIBUTE std::false_type returns_void) { + // call the actual function + auto *obj = GetValue(instruction->args[1]); + return_type ret = + (obj->*func)(GetValue(instruction->args[indexes + 2])...); + SetValue(instruction->args[0], ret); + + return AdvanceIP(instruction); + } + + // 2. class method b) returns non-void + template + ALWAYS_INLINE inline const Instruction *explicit_call_wrapperHandler( + const Instruction *instruction, + UNUSED_ATTRIBUTE return_type (class_type::*func)(arg_types...), + const seq &, + UNUSED_ATTRIBUTE std::true_type returns_not_void) { + // call the actual function + auto *obj = GetValue(instruction->args[0]); + (obj->*func)(GetValue(instruction->args[indexes + 1])...); + + return AdvanceIP(instruction); + } + + // 3. const class method a) returns void + template + ALWAYS_INLINE inline const Instruction *explicit_call_wrapperHandler( + const Instruction *instruction, + UNUSED_ATTRIBUTE return_type (class_type::*func)(arg_types...) const, + const seq &, UNUSED_ATTRIBUTE std::false_type returns_void) { + // call the actual function + auto *obj = GetValue(instruction->args[1]); + return_type ret = + (obj->*func)(GetValue(instruction->args[indexes + 2])...); + SetValue(instruction->args[0], ret); + + return AdvanceIP(instruction); + } + + // 3. const class method b) returns non-void + template + ALWAYS_INLINE inline const Instruction *explicit_call_wrapperHandler( + const Instruction *instruction, + UNUSED_ATTRIBUTE return_type (class_type::*func)(arg_types...) const, + const seq &, + UNUSED_ATTRIBUTE std::true_type returns_not_void) { + // call the actual function + auto *obj = GetValue(instruction->args[0]); + (obj->*func)(GetValue(instruction->args[indexes + 1])...); + + return AdvanceIP(instruction); + } + + //--------------------------------------------------------------------------// + + private: + /** + * This static array holds the goto-pointer for the dispatch area for each + * Opcode. It will be filled once, when the interpreter is called the + * first time. + */ + static void *label_pointers_[BytecodeFunction::GetNumberOpcodes()]; + + /** + * Value slots (register) for the current function activation. + * (Aligned by something that is most likely the cache line size) + */ + alignas(64) std::vector values_; + + /** + * Holds all allocations made with alloca. We do not need to access them, + * but the unique pointer ensures they will be released at the end. + */ + std::vector> allocations_; + + /** + * Holds the call activation records for all external call instructions. + * (Created during initialization) + */ + std::vector call_activations_; + + /** + * Bytecode function used for execution. + */ + const BytecodeFunction &bytecode_function_; + + private: + // This class cannot be copy or move-constructed + DISALLOW_COPY_AND_MOVE(BytecodeInterpreter); +}; + +} // namespace interpreter +} // namespace codegen +} // namespace peloton \ No newline at end of file diff --git a/src/include/codegen/proxy/proxy.h b/src/include/codegen/proxy/proxy.h index 8e647e9e2ac..ab518bd59ae 100644 --- a/src/include/codegen/proxy/proxy.h +++ b/src/include/codegen/proxy/proxy.h @@ -237,7 +237,7 @@ struct MemFn { ::peloton::codegen::CodeGen &codegen) { \ static constexpr const char *kFnName = STR(NS::C::F); \ /* If the function has already been defined, return it. */ \ - if (::llvm::Function *func = codegen.LookupBuiltin(kFnName)) { \ + if (::llvm::Function *func = codegen.LookupBuiltin(kFnName).first) { \ return func; \ } \ \ diff --git a/src/include/codegen/query.h b/src/include/codegen/query.h index eecb5a175a5..141b5303b7d 100644 --- a/src/include/codegen/query.h +++ b/src/include/codegen/query.h @@ -13,9 +13,9 @@ #pragma once #include "codegen/code_context.h" +#include "codegen/parameter_cache.h" #include "codegen/query_parameters.h" #include "codegen/query_state.h" -#include "codegen/parameter_cache.h" namespace peloton { @@ -41,18 +41,39 @@ class ExecutionConsumer; //===----------------------------------------------------------------------===// class Query { public: + struct CompileStats { + double compile_ms = 0.0; + }; + struct RuntimeStats { + double interpreter_prepare_ms = 0.0; double init_ms = 0.0; double plan_ms = 0.0; double tear_down_ms = 0.0; }; - struct QueryFunctions { + // We use this handy class for the parameters to the llvm functions + // to avoid complex casting and pointer manipulation + struct FunctionArguments { + executor::ExecutorContext *executor_context; + char *consumer_arg; + char rest[0]; + } PACKED; + + struct LLVMFunctions { llvm::Function *init_func; llvm::Function *plan_func; llvm::Function *tear_down_func; }; + using compiled_function_t = void (*)(FunctionArguments *); + + struct CompiledFunctions { + compiled_function_t init_func; + compiled_function_t plan_func; + compiled_function_t tear_down_func; + }; + /// This class cannot be copy or move-constructed DISALLOW_COPY_AND_MOVE(Query); @@ -61,7 +82,10 @@ class Query { * * @param funcs The compiled functions that implement the logic of the query */ - bool Prepare(const QueryFunctions &funcs); + void Prepare(const LLVMFunctions &funcs); + + // Compiles the function in this query to native code + void Compile(CompileStats *stats = nullptr); /** * @brief Executes the compiled query. @@ -94,6 +118,14 @@ class Query { /// Constructor. Private so callers use the QueryCompiler class. explicit Query(const planner::AbstractPlan &query_plan); + // Execute the query as native code (must already be compiled) + void ExecuteNative(FunctionArguments *function_arguments, + RuntimeStats *stats); + + // Execute the query using the interpreter + void ExecuteInterpreter(FunctionArguments *function_arguments, + RuntimeStats *stats); + private: // The query plan const planner::AbstractPlan &query_plan_; @@ -104,11 +136,14 @@ class Query { // The size of the parameter the functions take QueryState query_state_; - // The init(), plan() and tearDown() functions - typedef void (*compiled_function_t)(char *); - compiled_function_t init_func_; - compiled_function_t plan_func_; - compiled_function_t tear_down_func_; + // LLVM IR of the query functions + LLVMFunctions llvm_functions_; + + // Pointers to the compiled query functions + CompiledFunctions compiled_functions_; + + // Shows if the query has been compiled to native code + bool is_compiled_; }; } // namespace codegen diff --git a/src/include/codegen/query_compiler.h b/src/include/codegen/query_compiler.h index 377bafcada9..fd2c0b466fd 100644 --- a/src/include/codegen/query_compiler.h +++ b/src/include/codegen/query_compiler.h @@ -43,8 +43,8 @@ class QueryCompiler { // The time taken to generate all the IR for the plan double ir_gen_ms = 0.0; - // The time taken to perform JIT compilation - double jit_ms = 0.0; + // Time consumed by LLVM Optimizer + double optimize_ms = 0.0; }; // Constructor diff --git a/src/include/common/macros.h b/src/include/common/macros.h index 96aaf6ab0d2..593619bffb4 100644 --- a/src/include/common/macros.h +++ b/src/include/common/macros.h @@ -29,7 +29,19 @@ namespace peloton { //===--------------------------------------------------------------------===// #define NEVER_INLINE __attribute__((noinline)) + +#ifdef NDEBUG #define ALWAYS_INLINE __attribute__((always_inline)) +#else +#define ALWAYS_INLINE +#endif + +#ifdef __clang__ +#define NO_CLONE +#else +#define NO_CLONE __attribute__((noclone)) +#endif + #define UNUSED_ATTRIBUTE __attribute__((unused)) #define PACKED __attribute__((packed)) @@ -106,6 +118,19 @@ namespace peloton { #define GCC_AT_LEAST_6 0 #endif +#if __GNUC__ > 5 || (__GNUC__ == 5 && __GNUC_MINOR__ >= 1) +#define GCC_AT_LEAST_51 1 +#else +#define GCC_AT_LEAST_51 0 +#endif + +// g++-5.0 does not support overflow builtins +#if GCC_AT_LEAST_51 +#define GCC_OVERFLOW_BUILTINS_DEFINED 1 +#else +#define GCC_OVERFLOW_BUILTINS_DEFINED 0 +#endif + //===--------------------------------------------------------------------===// // Port to OSX //===--------------------------- diff --git a/src/include/common/overflow_builtins.h b/src/include/common/overflow_builtins.h new file mode 100644 index 00000000000..d77aae01002 --- /dev/null +++ b/src/include/common/overflow_builtins.h @@ -0,0 +1,78 @@ +//===----------------------------------------------------------------------===// +// +// Peloton +// +// overflow_builtins.h +// +// Identification: src/include/common/overflow_builtins.h +// +// Copyright (c) 2015-2018, Carnegie Mellon University Database Group +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include "common/macros.h" + +#include + +//----------------------------------------------------------------------------// +// Fall back implementations if the gcc overflow builtins are not available. +// +// Documentation: +// https://gcc.gnu.org/onlinedocs/gcc/Integer-Overflow-Builtins.html +//----------------------------------------------------------------------------// + +namespace peloton { + +template +static inline bool builtin_add_overflow(type_t a, type_t b, type_t *res) { + *res = a + b; + + if (a >= 0 && b >= 0 && std::numeric_limits::max() - a < b) + return true; + else if (a < 0 && b < 0 && std::numeric_limits::min() - a > b) + return true; + + return false; +} + +template +static inline bool builtin_sub_overflow(type_t a, type_t b, type_t *res) { + *res = a - b; + + if (std::is_unsigned::value) + return b > a; + else + return ((((a ^ b)) & (*res ^ a)) & std::numeric_limits::min()) != 0; +} + +template +static inline bool builtin_mul_overflow(type_t a, type_t b, type_t *res) { + *res = a * b; + + if (a != 0 && *res / a != b) return true; + + return false; +} + +#if !GCC_OVERFLOW_BUILTINS_DEFINED + +template +static inline bool __builtin_add_overflow(type_t a, type_t b, type_t *res) { + return builtin_add_overflow(a, b, res); +} + +template +static inline bool __builtin_sub_overflow(type_t a, type_t b, type_t *res) { + return builtin_sub_overflow(a, b, res); +} + +template +static inline bool __builtin_mul_overflow(type_t a, type_t b, type_t *res) { + return builtin_mul_overflow(a, b, res); +} + +#endif + +} // namespace peloton diff --git a/src/include/settings/settings.h b/src/include/settings/settings.h index 757cc9043e6..a442fd151cf 100644 --- a/src/include/settings/settings.h +++ b/src/include/settings/settings.h @@ -200,6 +200,10 @@ SETTING_bool(codegen, true, true, true) +SETTING_bool(codegen_interpreter, + "Force interpretation of generated llvm code (default: false)", + false, true, true) + SETTING_bool(print_ir_stats, "Print statistics on generated IR (default: false)", false, diff --git a/src/include/util/math_util.h b/src/include/util/math_util.h new file mode 100644 index 00000000000..b4894421959 --- /dev/null +++ b/src/include/util/math_util.h @@ -0,0 +1,35 @@ +//===----------------------------------------------------------------------===// +// +// Peloton +// +// math_util.h +// +// Identification: src/include/util/math_util.h +// +// Copyright (c) 2015-2018, Carnegie Mellon University Database Group +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include "common/macros.h" + +namespace peloton { + +/** + * Math Utility Functions + */ +class MathUtil { + public: + /** + * Performs a division of two integer values and rounds up the result. + * Calculation is made using a trick with integer division. + */ + static constexpr ALWAYS_INLINE inline size_t DivRoundUp(size_t numerator, + size_t denominator) { + // division must be integer division + return (numerator + denominator - 1) / denominator; + } +}; + +} // namespace peloton diff --git a/test/codegen/bloom_filter_test.cpp b/test/codegen/bloom_filter_test.cpp index 032a4ef2250..ef6bf168c05 100644 --- a/test/codegen/bloom_filter_test.cpp +++ b/test/codegen/bloom_filter_test.cpp @@ -165,7 +165,7 @@ TEST_F(BloomFilterCodegenTest, FalsePositiveRateTest) { func.ReturnAndFinish(); } - ASSERT_TRUE(code_context.Compile()); + code_context.Compile(); typedef void (*ftype)(codegen::util::BloomFilter * bloom_filter, int *, int, int *); @@ -312,6 +312,7 @@ double BloomFilterCodegenTest::ExecuteJoin(std::string query, *plan, executor_context.GetParams().GetQueryParametersMap(), consumer); // Run + compiled_query->Compile(); compiled_query->Execute(executor_context, consumer, &stats); LOG_INFO("Execution Time: %0.0f ms", stats.plan_ms); diff --git a/test/codegen/bytecode_interpreter_test.cpp b/test/codegen/bytecode_interpreter_test.cpp new file mode 100644 index 00000000000..9231a92c901 --- /dev/null +++ b/test/codegen/bytecode_interpreter_test.cpp @@ -0,0 +1,242 @@ +//===----------------------------------------------------------------------===// +// +// Peloton +// +// bytecode_interpreter_test.cpp +// +// Identification: test/codegen/bytecode_interpreter_test.cpp +// +// Copyright (c) 2015-2018, Carnegie Mellon University Database Group +// +//===----------------------------------------------------------------------===// + +#include "codegen/interpreter/bytecode_interpreter.h" +#include "codegen/function_builder.h" +#include "codegen/interpreter/bytecode_builder.h" +#include "codegen/lang/loop.h" +#include "codegen/proxy/runtime_functions_proxy.h" +#include "common/harness.h" + +namespace peloton { +namespace test { + +class BytecodeInterpreterTest : public PelotonTest {}; + +TEST_F(BytecodeInterpreterTest, PHIResolveTest) { + // Create a loop that involves PHIs that have to be converted into move + // instructions. + + codegen::CodeContext code_context; + codegen::CodeGen cg{code_context}; + codegen::FunctionBuilder main{ + code_context, "main", cg.Int32Type(), {{"a", cg.Int32Type()}}}; + { + auto *a = main.GetArgumentByPosition(0); + auto *i = cg.Const32(0); + + codegen::lang::Loop loop{cg, cg.ConstBool(true), {{"i", i}, {"a", a}}}; + { + llvm::Value *i = loop.GetLoopVar(0); + llvm::Value *a = loop.GetLoopVar(1); + + a = cg->CreateSub(a, cg.Const32(1)); + i = cg->CreateAdd(i, cg.Const32(1)); + loop.LoopEnd(cg->CreateICmpULT(i, cg.Const32(10)), {i, a}); + } + + std::vector final; + loop.CollectFinalLoopVariables(final); + + auto *ret = final[1]; + main.ReturnAndFinish(ret); + } + + // create Bytecode + auto bytecode = codegen::interpreter::BytecodeBuilder::CreateBytecodeFunction( + code_context, main.GetFunction()); + + // run Bytecode + codegen::interpreter::value_t arg = 44; + codegen::interpreter::value_t ret = + codegen::interpreter::BytecodeInterpreter::ExecuteFunction(bytecode, + {arg}); + ASSERT_EQ(ret, arg - 10); +} + +TEST_F(BytecodeInterpreterTest, PHISwapProblemTest) { + // Produce the PHI swap problem, where additional moves have to be inserted + // in order to retrieve the correct result. + + codegen::CodeContext code_context; + codegen::CodeGen cg{code_context}; + codegen::FunctionBuilder main{ + code_context, "main", cg.Int32Type(), {{"a", cg.Int32Type()}}}; + { + auto *a = main.GetArgumentByPosition(0); + auto *b = cg.Const32(0); + auto *i = cg.Const32(0); + + codegen::lang::Loop loop{ + cg, cg.ConstBool(true), {{"i", i}, {"a", a}, {"b", b}}}; + { + llvm::Value *i = loop.GetLoopVar(0); + llvm::Value *a = loop.GetLoopVar(1); + llvm::Value *b = loop.GetLoopVar(2); + + i = cg->CreateAdd(i, cg.Const32(1)); + loop.LoopEnd(cg->CreateICmpULT(i, cg.Const32(2)), {i, b, a}); + } + + std::vector final; + loop.CollectFinalLoopVariables(final); + + auto *ret = final[1]; + main.ReturnAndFinish(ret); + } + + // create Bytecode + auto bytecode = codegen::interpreter::BytecodeBuilder::CreateBytecodeFunction( + code_context, main.GetFunction()); + + // run Bytecode + codegen::interpreter::value_t arg = 44; + codegen::interpreter::value_t ret = + codegen::interpreter::BytecodeInterpreter::ExecuteFunction(bytecode, + {arg}); + ASSERT_EQ(ret, arg); +} + +TEST_F(BytecodeInterpreterTest, OverflowIntrinsicsTest) { + // Use the overflow intrinsics and retrieve their output. During bytecode + // translation the extract instructions get omited and the values are written + // directly to their destination value slot. + + // We call the intrinsics several times and check the result statically + // right in the generated function. We merge all checks with AND and return + // it to the test case at the end. + + codegen::CodeContext code_context; + codegen::CodeGen cg{code_context}; + codegen::FunctionBuilder main{code_context, + "main", + cg.Int32Type(), + {{"a", cg.Int32Type()}, {"b", cg.Int32Type()}}}; + { + auto *a = main.GetArgumentByPosition(0); + auto *b = main.GetArgumentByPosition(1); + llvm::Value *add_overflow, *sub_overflow; + llvm::Value *ret = cg.ConstBool(true); + + auto *add_result = cg.CallAddWithOverflow(a, b, add_overflow); + auto *add_result_correct = cg->CreateICmp(llvm::CmpInst::Predicate::ICMP_EQ, + add_result, cg.Const32(10)); + ret = cg->CreateAnd(ret, add_result_correct); + auto *add_overflow_correct = cg->CreateNot(add_overflow); + ret = cg->CreateAnd(ret, add_overflow_correct); + + auto *sub_result = + cg.CallSubWithOverflow(cg.Const32(2147483648), b, sub_overflow); + auto *sub_result_correct = cg->CreateICmp( + llvm::CmpInst::Predicate::ICMP_EQ, sub_result, cg.Const32(2147483642)); + ret = cg->CreateAnd(ret, sub_result_correct); + ret = cg->CreateAnd(ret, sub_overflow); + + main.ReturnAndFinish(ret); + } + + // create Bytecode + auto bytecode = codegen::interpreter::BytecodeBuilder::CreateBytecodeFunction( + code_context, main.GetFunction()); + + // run Bytecode + codegen::interpreter::value_t ret = + codegen::interpreter::BytecodeInterpreter::ExecuteFunction(bytecode, + {4, 6}); + ASSERT_EQ(ret, 1); +} + +int f(int a, int b) { return a + b; } + +TEST_F(BytecodeInterpreterTest, ExternalCallTest) { + // Call an external function. + + codegen::CodeContext code_context; + codegen::CodeGen cg{code_context}; + + // create LLVM function declaration + auto *func_type = llvm::FunctionType::get( + cg.Int32Type(), {cg.Int32Type(), cg.Int32Type()}, false); + llvm::Function *func_decl = + llvm::Function::Create(func_type, llvm::Function::ExternalLinkage, "f", + &(cg.GetCodeContext().GetModule())); + code_context.RegisterExternalFunction(func_decl, (void *)f); + + codegen::FunctionBuilder main{code_context, + "main", + cg.Int32Type(), + {{"a", cg.Int32Type()}, {"b", cg.Int32Type()}}}; + { + auto *a = main.GetArgumentByPosition(0); + auto *b = main.GetArgumentByPosition(1); + + auto *ret = cg.CallFunc(func_decl, {a, b}); + + main.ReturnAndFinish(ret); + } + + // create Bytecode + auto bytecode = codegen::interpreter::BytecodeBuilder::CreateBytecodeFunction( + code_context, main.GetFunction()); + + // run Bytecode + codegen::interpreter::value_t ret = + codegen::interpreter::BytecodeInterpreter::ExecuteFunction(bytecode, + {4, 6}); + ASSERT_EQ(ret, 10); +} + +TEST_F(BytecodeInterpreterTest, InternalCallTest) { + // Call an internal function. + + codegen::CodeContext code_context; + codegen::CodeGen cg{code_context}; + + codegen::FunctionBuilder f{code_context, + "f", + cg.Int32Type(), + {{"a", cg.Int32Type()}, {"b", cg.Int32Type()}}}; + { + auto *a = f.GetArgumentByPosition(0); + auto *b = f.GetArgumentByPosition(1); + + auto *ret = cg->CreateAdd(a, b); + + f.ReturnAndFinish(ret); + } + + codegen::FunctionBuilder main{code_context, + "main", + cg.Int32Type(), + {{"a", cg.Int32Type()}, {"b", cg.Int32Type()}}}; + { + auto *a = main.GetArgumentByPosition(0); + auto *b = main.GetArgumentByPosition(1); + + auto *ret = cg.CallFunc(f.GetFunction(), {a, b}); + + main.ReturnAndFinish(ret); + } + + // create Bytecode + auto bytecode = codegen::interpreter::BytecodeBuilder::CreateBytecodeFunction( + code_context, main.GetFunction()); + + // run Bytecode + codegen::interpreter::value_t ret = + codegen::interpreter::BytecodeInterpreter::ExecuteFunction(bytecode, + {4, 6}); + ASSERT_EQ(ret, 10); +} + +} // namespace test +} // namespace peloton \ No newline at end of file diff --git a/test/codegen/function_builder_test.cpp b/test/codegen/function_builder_test.cpp index 1822e384f72..9235da51bb8 100644 --- a/test/codegen/function_builder_test.cpp +++ b/test/codegen/function_builder_test.cpp @@ -35,7 +35,7 @@ TEST_F(FunctionBuilderTest, ConstructSingleFunction) { func.ReturnAndFinish(cg.Const32(magic_num)); } - ASSERT_TRUE(code_context.Compile()); + code_context.Compile(); typedef int (*func_t)(void); func_t fn = (func_t) code_context.GetRawFunctionPointer(func.GetFunction()); @@ -80,7 +80,7 @@ TEST_F(FunctionBuilderTest, ConstructNestedFunction) { } // Make sure we can compile everything - ASSERT_TRUE(code_context.Compile()); + code_context.Compile(); typedef int (*func_t)(uint32_t); func_t fn = (func_t) code_context.GetRawFunctionPointer(main.GetFunction()); diff --git a/test/codegen/if_test.cpp b/test/codegen/if_test.cpp index 4c198cea2d3..6c00153269a 100644 --- a/test/codegen/if_test.cpp +++ b/test/codegen/if_test.cpp @@ -59,7 +59,7 @@ TEST_F(IfTest, TestIfOnly) { func.ReturnAndFinish(cond.BuildPHI(va, vb).GetValue()); } - ASSERT_TRUE(code_context.Compile()); + code_context.Compile(); typedef int (*ftype)(int); @@ -128,7 +128,7 @@ TEST_F(IfTest, TestIfInsideLoop) { func.ReturnAndFinish(final[1]); } - ASSERT_TRUE(code_context.Compile()); + code_context.Compile(); typedef int (*ftype)(int); @@ -174,7 +174,7 @@ TEST_F(IfTest, BreakTest) { func.ReturnAndFinish(final[0]); } - ASSERT_TRUE(code_context.Compile()); + code_context.Compile(); typedef int (*ftype)(int); @@ -235,7 +235,7 @@ TEST_F(IfTest, ComplexNestedIf) { func.ReturnAndFinish(cond.BuildPHI(vab, vc).GetValue()); } - ASSERT_TRUE(code_context.Compile()); + code_context.Compile(); typedef int (*ftype)(int); diff --git a/test/codegen/testing_codegen_util.cpp b/test/codegen/testing_codegen_util.cpp index a19598e33ed..b587aeac199 100644 --- a/test/codegen/testing_codegen_util.cpp +++ b/test/codegen/testing_codegen_util.cpp @@ -257,7 +257,7 @@ void PelotonCodeGenTest::CreateAndLoadTableWithLayout( txn_manager.CommitTransaction(txn); } -codegen::QueryCompiler::CompileStats PelotonCodeGenTest::CompileAndExecute( +PelotonCodeGenTest::CodeGenStats PelotonCodeGenTest::CompileAndExecute( planner::AbstractPlan &plan, codegen::ExecutionConsumer &consumer) { codegen::QueryParameters parameters(plan, {}); @@ -266,15 +266,18 @@ codegen::QueryCompiler::CompileStats PelotonCodeGenTest::CompileAndExecute( auto *txn = txn_manager.BeginTransaction(); // Compile the query. - codegen::QueryCompiler::CompileStats stats; + CodeGenStats stats; auto query = codegen::QueryCompiler().Compile( - plan, parameters.GetQueryParametersMap(), consumer, &stats); + plan, parameters.GetQueryParametersMap(), consumer, &stats.compile_stats); // Executor context executor::ExecutorContext exec_ctx{txn, std::move(parameters)}; - // Execute the query - query->Execute(exec_ctx, consumer); + // Compile Query to native code + query->Compile(); + + // Execute the quer + query->Execute(exec_ctx, consumer, &stats.runtime_stats); // Commit the transaction. txn_manager.CommitTransaction(txn); @@ -282,7 +285,7 @@ codegen::QueryCompiler::CompileStats PelotonCodeGenTest::CompileAndExecute( return stats; } -codegen::QueryCompiler::CompileStats PelotonCodeGenTest::CompileAndExecuteCache( +PelotonCodeGenTest::CodeGenStats PelotonCodeGenTest::CompileAndExecuteCache( std::shared_ptr plan, codegen::ExecutionConsumer &consumer, bool &cached, std::vector params) { @@ -294,19 +297,20 @@ codegen::QueryCompiler::CompileStats PelotonCodeGenTest::CompileAndExecuteCache( codegen::QueryParameters(*plan, params)}; // Compile - codegen::QueryCompiler::CompileStats stats; + CodeGenStats stats; codegen::Query *query = codegen::QueryCache::Instance().Find(plan); cached = (query != nullptr); if (query == nullptr) { codegen::QueryCompiler compiler; auto compiled_query = compiler.Compile( *plan, exec_ctx.GetParams().GetQueryParametersMap(), consumer); + compiled_query->Compile(); query = compiled_query.get(); codegen::QueryCache::Instance().Add(plan, std::move(compiled_query)); } // Execute the query. - query->Execute(exec_ctx, consumer); + query->Execute(exec_ctx, consumer, &stats.runtime_stats); // Commit the transaction. txn_manager.CommitTransaction(txn); diff --git a/test/codegen/value_integrity_test.cpp b/test/codegen/value_integrity_test.cpp index 0057721352b..97dfeca23e4 100644 --- a/test/codegen/value_integrity_test.cpp +++ b/test/codegen/value_integrity_test.cpp @@ -67,7 +67,7 @@ void DivideByZeroTest(const codegen::type::Type &data_type, ExpressionType op) { } // Should be able to compile - EXPECT_TRUE(code_context.Compile()); + code_context.Compile(); typedef void (*func)(CType); func f = (func)code_context.GetRawFunctionPointer(function.GetFunction()); @@ -134,7 +134,7 @@ void OverflowTest(const codegen::type::Type &data_type, ExpressionType op) { } // Should be able to compile - EXPECT_TRUE(code_context.Compile()); + code_context.Compile(); typedef void (*func)(CType); func f = (func)code_context.GetRawFunctionPointer(function.GetFunction()); diff --git a/test/common/overflow_builtins_test.cpp b/test/common/overflow_builtins_test.cpp new file mode 100644 index 00000000000..8abdd86fee9 --- /dev/null +++ b/test/common/overflow_builtins_test.cpp @@ -0,0 +1,212 @@ +//===----------------------------------------------------------------------===// +// +// Peloton +// +// overflow_builtins_test.cpp +// +// Identification: test/common/overflow_builtins_test.cpp +// +// Copyright (c) 2015-2018, Carnegie Mellon University Database Group +// +//===----------------------------------------------------------------------===// + +#include "common/overflow_builtins.h" + +#include "common/harness.h" + +#include + +namespace peloton { +namespace test { + +//===--------------------------------------------------------------------===// +// Testing the fallback functions for overflow aware operations +//===--------------------------------------------------------------------===// + +class OverflowBuiltinsTest : public PelotonTest { + public: + using unsigned_t = unsigned int; + using signed_t = int; +}; + +TEST_F(OverflowBuiltinsTest, UnsignedAddTest) { + unsigned_t max = std::numeric_limits::max(); + unsigned_t min = std::numeric_limits::min(); + + unsigned_t c; + bool overflow; + + overflow = builtin_add_overflow(0, 3, &c); + EXPECT_EQ(c, 3); + EXPECT_EQ(overflow, false); + + overflow = builtin_add_overflow(0, 0, &c); + EXPECT_EQ(c, 0); + EXPECT_EQ(overflow, false); + + overflow = builtin_add_overflow(max - 12, 3, &c); + EXPECT_EQ(c, max - 9); + EXPECT_EQ(overflow, false); + + overflow = builtin_add_overflow(max - 12, 12, &c); + EXPECT_EQ(c, max); + EXPECT_EQ(overflow, false); + + overflow = builtin_add_overflow(max - 12, 13, &c); + EXPECT_EQ(c, min); + EXPECT_EQ(overflow, true); + + overflow = builtin_add_overflow(max - 12, 21, &c); + EXPECT_EQ(c, min + 8); + EXPECT_EQ(overflow, true); +} + +TEST_F(OverflowBuiltinsTest, SignedAddTest) { + signed_t max = std::numeric_limits::max(); + signed_t min = std::numeric_limits::min(); + + signed_t c; + bool overflow; + + overflow = builtin_add_overflow(min, 3, &c); + EXPECT_EQ(c, min + 3); + EXPECT_EQ(overflow, false); + + overflow = builtin_add_overflow(min, 0, &c); + EXPECT_EQ(c, min); + EXPECT_EQ(overflow, false); + + overflow = builtin_add_overflow(0, -12, &c); + EXPECT_EQ(c, -12); + EXPECT_EQ(overflow, false); + + overflow = builtin_add_overflow(min, -1, &c); + EXPECT_EQ(c, max); + EXPECT_EQ(overflow, true); + + overflow = builtin_add_overflow(max, 1, &c); + EXPECT_EQ(c, min); + EXPECT_EQ(overflow, true); + + overflow = builtin_add_overflow(0, -13, &c); + EXPECT_EQ(c, -13); + EXPECT_EQ(overflow, false); + + overflow = builtin_add_overflow(-12, 13, &c); + EXPECT_EQ(c, 1); + EXPECT_EQ(overflow, false); +} + +TEST_F(OverflowBuiltinsTest, UnsignedSubTest) { + unsigned_t max = std::numeric_limits::max(); + unsigned_t min = std::numeric_limits::min(); + + unsigned_t c; + bool overflow; + + overflow = builtin_sub_overflow(3, 3, &c); + EXPECT_EQ(c, 0); + EXPECT_EQ(overflow, false); + + overflow = builtin_sub_overflow(0, 0, &c); + EXPECT_EQ(c, 0); + EXPECT_EQ(overflow, false); + + overflow = builtin_sub_overflow(min + 12, 3, &c); + EXPECT_EQ(c, min + 9); + EXPECT_EQ(overflow, false); + + overflow = builtin_sub_overflow(min + 12, 12, &c); + EXPECT_EQ(c, min); + EXPECT_EQ(overflow, false); + + overflow = builtin_sub_overflow(min + 12, 13, &c); + EXPECT_EQ(c, max); + EXPECT_EQ(overflow, true); + + overflow = builtin_sub_overflow(min + 12, 15, &c); + EXPECT_EQ(c, max - 2); + EXPECT_EQ(overflow, true); +} + +TEST_F(OverflowBuiltinsTest, SignedSubTest) { + signed_t max = std::numeric_limits::max(); + signed_t min = std::numeric_limits::min(); + + signed_t c; + bool overflow; + + overflow = builtin_sub_overflow(min + 3, 3, &c); + EXPECT_EQ(c, min); + EXPECT_EQ(overflow, false); + + overflow = builtin_sub_overflow(min, 0, &c); + EXPECT_EQ(c, min); + EXPECT_EQ(overflow, false); + + overflow = builtin_sub_overflow(0, -12, &c); + EXPECT_EQ(c, 12); + EXPECT_EQ(overflow, false); + + overflow = builtin_sub_overflow(min, 1, &c); + EXPECT_EQ(c, max); + EXPECT_EQ(overflow, true); + + overflow = builtin_sub_overflow(max, -1, &c); + EXPECT_EQ(c, min); + EXPECT_EQ(overflow, true); + + overflow = builtin_sub_overflow(0, 13, &c); + EXPECT_EQ(c, -13); + EXPECT_EQ(overflow, false); + + overflow = builtin_sub_overflow(-12, -13, &c); + EXPECT_EQ(c, 1); + EXPECT_EQ(overflow, false); +} + +TEST_F(OverflowBuiltinsTest, UnsignedMulTest) { + unsigned_t max = std::numeric_limits::max(); + + unsigned_t c; + bool overflow; + + overflow = builtin_mul_overflow(3, 3, &c); + EXPECT_EQ(c, 9); + EXPECT_EQ(overflow, false); + + overflow = builtin_mul_overflow(0, 0, &c); + EXPECT_EQ(c, 0); + EXPECT_EQ(overflow, false); + + overflow = builtin_mul_overflow(max, 2, &c); + EXPECT_EQ(c, 4294967294); + EXPECT_EQ(overflow, true); +} + +TEST_F(OverflowBuiltinsTest, SignedMulTest) { + signed_t max = std::numeric_limits::max(); + // signed_t min = std::numeric_limits::min(); + + signed_t c; + bool overflow; + + overflow = builtin_mul_overflow(-1, 2, &c); + EXPECT_EQ(c, -2); + EXPECT_EQ(overflow, false); + + overflow = builtin_mul_overflow(2, -4, &c); + EXPECT_EQ(c, -8); + EXPECT_EQ(overflow, false); + + overflow = builtin_mul_overflow(-4, -4, &c); + EXPECT_EQ(c, 16); + EXPECT_EQ(overflow, false); + + overflow = builtin_mul_overflow(max, -2, &c); + EXPECT_EQ(c, 2); + EXPECT_EQ(overflow, true); +} + +} // namespace test +} // namespace peloton diff --git a/test/include/codegen/testing_codegen_util.h b/test/include/codegen/testing_codegen_util.h index c61a47e67c2..fa9fcf852cd 100644 --- a/test/include/codegen/testing_codegen_util.h +++ b/test/include/codegen/testing_codegen_util.h @@ -20,8 +20,8 @@ #include "codegen/execution_consumer.h" #include "codegen/value.h" #include "common/container_tuple.h" -#include "expression/constant_value_expression.h" #include "expression/comparison_expression.h" +#include "expression/constant_value_expression.h" #include "expression/tuple_value_expression.h" #include "planner/binding_context.h" #include "storage/data_table.h" @@ -68,6 +68,11 @@ class PelotonCodeGenTest : public PelotonTest { PelotonCodeGenTest(oid_t tuples_per_tilegroup = DEFAULT_TUPLES_PER_TILEGROUP, peloton::LayoutType layout_type = LayoutType::ROW); + struct CodeGenStats { + codegen::QueryCompiler::CompileStats compile_stats; + codegen::Query::RuntimeStats runtime_stats; + }; + virtual ~PelotonCodeGenTest(); // Get the test database @@ -103,10 +108,10 @@ class PelotonCodeGenTest : public PelotonTest { bool is_inlined); // Compile and execute the given plan - codegen::QueryCompiler::CompileStats CompileAndExecute( + CodeGenStats CompileAndExecute( planner::AbstractPlan &plan, codegen::ExecutionConsumer &consumer); - codegen::QueryCompiler::CompileStats CompileAndExecuteCache( + CodeGenStats CompileAndExecuteCache( std::shared_ptr plan, codegen::ExecutionConsumer &consumer, bool &cached, std::vector params = {});