diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
index e9acc86bae6..e86fae410bd 100644
--- a/cmake/Dependencies.cmake
+++ b/cmake/Dependencies.cmake
@@ -86,6 +86,11 @@ llvm_map_components_to_libnames(LLVM_LIBRARIES core mcjit nativecodegen native)
 include_directories(SYSTEM ${LLVM_INCLUDE_DIRS})
 list(APPEND Peloton_LINKER_LIBS ${LLVM_LIBRARIES})
 
+# --[ FFI
+find_package(Libffi)
+include_directories(SYSTEM ${LIBFFI_INCLUDE_DIRS})
+list(APPEND Peloton_LINKER_LIBS ${LIBFFI_LIBRARIES})
+
 # --[ IWYU
 
 # Generate clang compilation database
diff --git a/cmake/Modules/FindLibffi.cmake b/cmake/Modules/FindLibffi.cmake
new file mode 100644
index 00000000000..8e9883967bc
--- /dev/null
+++ b/cmake/Modules/FindLibffi.cmake
@@ -0,0 +1,39 @@
+# - Try to find Libffi
+#
+#  A Portable Foreign Function Interface Library (https://sourceware.org/libffi)
+#
+# Usage:
+# LIBFFI_INCLUDE_DIRS, location of header files
+# LIBFFI_LIBRARIES, location of library
+# LIBFFI_FOUND, indicates if libffi was found
+
+# Look for the header file.
+execute_process(COMMAND brew --prefix libffi OUTPUT_VARIABLE LIBFFI_BREW_PREFIX)
+
+find_library(LIBFFI_LIBRARY NAMES ffi libffi
+        PATHS /usr /usr/local /opt/local
+        PATH_SUFFIXES lib lib64 x86_64-linux-gnu lib/x86_64-linux-gnu
+        )
+
+find_path(LIBFFI_INCLUDE_DIR ffi.h
+        PATHS /usr /usr/local /opt/local /usr/include/ffi
+        PATH_SUFFIXES include include/ffi include/x86_64-linux-gnu x86_64-linux-gnu
+        HINT LIBFFI_BREW_PREFIX
+        )
+
+include(FindPackageHandleStandardArgs)
+find_package_handle_standard_args(LIBFFI DEFAULT_MSG LIBFFI_LIBRARY LIBFFI_INCLUDE_DIR)
+
+
+# Copy the results to the output variables.
+IF(LIBFFI_FOUND)
+    SET(LIBFFI_LIBRARIES ${LIBFFI_LIBRARY})
+    SET(LIBFFI_INCLUDE_DIRS ${LIBFFI_INCLUDE_DIR})
+ELSE(LIBFFI_FOUND)
+    SET(LIBFFI_LIBRARIES)
+    SET(LIBFFI_INCLUDE_DIRS)
+ENDIF(LIBFFI_FOUND)
+
+MARK_AS_ADVANCED(LIBFFI_INCLUDE_DIRS LIBFFI_LIBRARIES)
+
+message(STATUS "Found Libffi (include: ${LIBFFI_INCLUDE_DIRS}, library: ${LIBFFI_LIBRARIES})")
\ No newline at end of file
diff --git a/script/installation/packages.sh b/script/installation/packages.sh
index dec631fab98..1dff718b7d6 100755
--- a/script/installation/packages.sh
+++ b/script/installation/packages.sh
@@ -176,6 +176,8 @@ if [ "$DISTRO" = "UBUNTU" ]; then
         libedit-dev \
         libssl-dev \
         postgresql-client \
+        libffi6 \
+        libffi-dev \
         libtbb-dev \
         python3-pip \
         curl \
@@ -219,6 +221,7 @@ elif [ "$DISTRO" = "DARWIN" ]; then
     brew install libedit
     brew install llvm@3.7
     brew install postgresql
+    brew install libffi
     brew install tbb
     brew install curl
     brew install wget
diff --git a/src/codegen/code_context.cpp b/src/codegen/code_context.cpp
index cffdd6e22f2..14731e4fdc1 100644
--- a/src/codegen/code_context.cpp
+++ b/src/codegen/code_context.cpp
@@ -44,8 +44,9 @@ namespace {
 class PelotonMemoryManager : public llvm::SectionMemoryManager {
  public:
   explicit PelotonMemoryManager(
-      const std::unordered_map<std::string, CodeContext::FuncPtr> &symbols)
-      : symbols_(symbols) {}
+      const std::unordered_map<std::string,
+                               std::pair<llvm::Function *, CodeContext::FuncPtr>> &builtins)
+      : builtins_(builtins) {}
 
 #if LLVM_VERSION_GE(4, 0)
 #define RET_TYPE llvm::JITSymbol
@@ -56,8 +57,6 @@ class PelotonMemoryManager : public llvm::SectionMemoryManager {
 #define BUILD_RET_TYPE(addr) \
   (RET_TYPE{(uint64_t)addr, llvm::JITSymbolFlags::Exported})
 #endif
-
-  /// Find the address of the function with the given name
   RET_TYPE findSymbol(const std::string &name) override {
     LOG_TRACE("Looking up symbol '%s' ...", name.c_str());
     if (auto *builtin = LookupSymbol(name)) {
@@ -68,23 +67,22 @@ class PelotonMemoryManager : public llvm::SectionMemoryManager {
     LOG_TRACE("--> Not builtin, use fallback resolution ...");
     return llvm::SectionMemoryManager::findSymbol(name);
   }
-
 #undef RET_TYPE
 #undef BUILD_RET_TYPE
 
  private:
   void *LookupSymbol(const std::string &name) const {
     // Check for a builtin with the exact name
-    auto symbol_iter = symbols_.find(name);
-    if (symbol_iter != symbols_.end()) {
-      return symbol_iter->second;
+    auto symbol_iter = builtins_.find(name);
+    if (symbol_iter != builtins_.end()) {
+      return symbol_iter->second.second;
     }
 
     // Check for a builtin with the leading '_' removed
     if (!name.empty() && name[0] == '_') {
-      symbol_iter = symbols_.find(name.substr(1));
-      if (symbol_iter != symbols_.end()) {
-        return symbol_iter->second;
+      symbol_iter = builtins_.find(name.substr(1));
+      if (symbol_iter != builtins_.end()) {
+        return symbol_iter->second.second;
       }
     }
 
@@ -94,7 +92,9 @@ class PelotonMemoryManager : public llvm::SectionMemoryManager {
 
  private:
   // The code context
-  const std::unordered_map<std::string, CodeContext::FuncPtr> &symbols_;
+  const std::unordered_map<std::string,
+                           std::pair<llvm::Function *, CodeContext::FuncPtr>>
+      &builtins_;
 };
 
 ////////////////////////////////////////////////////////////////////////////////
@@ -177,7 +177,8 @@ CodeContext::CodeContext()
       func_(nullptr),
       udf_func_ptr_(nullptr),
       pass_manager_(nullptr),
-      engine_(nullptr) {
+      engine_(nullptr),
+      is_verified_(false) {
   // Initialize JIT stuff
   llvm::InitializeNativeTarget();
   llvm::InitializeNativeTargetAsmPrinter();
@@ -200,8 +201,7 @@ CodeContext::CodeContext()
   engine_.reset(
       llvm::EngineBuilder(std::move(m))
           .setEngineKind(llvm::EngineKind::JIT)
-          .setMCJITMemoryManager(
-               llvm::make_unique<PelotonMemoryManager>(function_symbols_))
+          .setMCJITMemoryManager(llvm::make_unique<PelotonMemoryManager>(builtins_))
           .setMCPU(llvm::sys::getHostCPUName())
           .setErrorStr(&err_str_)
           .create());
@@ -223,6 +223,7 @@ CodeContext::CodeContext()
   int32_type_ = llvm::Type::getInt32Ty(*context_);
   int64_type_ = llvm::Type::getInt64Ty(*context_);
   double_type_ = llvm::Type::getDoubleTy(*context_);
+  float_type_ = llvm::Type::getFloatTy(*context_);
   void_type_ = llvm::Type::getVoidTy(*context_);
   void_ptr_type_ = llvm::Type::getInt8PtrTy(*context_);
   char_ptr_type_ = llvm::Type::getInt8PtrTy(*context_);
@@ -251,14 +252,13 @@ void CodeContext::RegisterExternalFunction(llvm::Function *func_decl,
   PELOTON_ASSERT(func_impl != nullptr && "The function pointer cannot be NULL");
   functions_.emplace_back(func_decl, func_impl);
 
-  // Register the builtin symbol by name
-  function_symbols_[func_decl->getName()] = func_impl;
+  builtins_[func_decl->getName()] = std::make_pair(func_decl, func_impl);
 }
 
 void CodeContext::RegisterBuiltin(llvm::Function *func_decl,
                                   CodeContext::FuncPtr func_impl) {
   const auto name = func_decl->getName();
-  if (LookupBuiltin(name) != nullptr) {
+  if (LookupBuiltin(name).first != nullptr) {
     LOG_DEBUG("Builtin '%s' already registered, skipping ...", name.data());
     return;
   }
@@ -268,36 +268,50 @@ void CodeContext::RegisterBuiltin(llvm::Function *func_decl,
       func_decl->isDeclaration() &&
       "You cannot provide a function definition for a builtin function");
 
-  // Register the builtin function
-  builtins_[name] = func_decl;
-
-  // Register the builtin symbol by name
-  function_symbols_[name] = func_impl;
+  // Register the builtin function with type and implementation
+  builtins_[name] = std::make_pair(func_decl, func_impl);
 }
 
-llvm::Function *CodeContext::LookupBuiltin(const std::string &name) const {
+std::pair<llvm::Function *, CodeContext::FuncPtr> CodeContext::LookupBuiltin(const std::string &name) const {
   auto iter = builtins_.find(name);
-  return (iter == builtins_.end() ? nullptr : iter->second);
+  return (iter == builtins_.end() ? std::make_pair<llvm::Function *, CodeContext::FuncPtr>(nullptr, nullptr) : iter->second);
 }
 
-/// Optimize and JIT compile all the functions that were created in this context
-bool CodeContext::Compile() {
+/// Verify all the functions that were created in this context
+void CodeContext::Verify() {
   // Verify the module is okay
   llvm::raw_ostream &errors = llvm::errs();
   if (llvm::verifyModule(*module_, &errors)) {
-    // There is an error in the module that failed compilation.
+    // There is an error in the module.
     // Dump the crappy IR to the log ...
     LOG_ERROR("ERROR IN MODULE:\n%s\n", GetIR().c_str());
-    return false;
+
+    throw Exception("The generated LLVM code contains errors. ");
   }
 
+  // All is well
+  is_verified_ = true;
+}
+
+/// Optimize all the functions that were created in this context
+void CodeContext::Optimize() {
+  // make sure the code is verified
+  if (!is_verified_) Verify();
+
   // Run the optimization passes over each function in this module
   pass_manager_->doInitialization();
   for (auto &func_iter : functions_) {
     pass_manager_->run(*func_iter.first);
   }
   pass_manager_->doFinalization();
+}
+
+/// JIT compile all the functions that were created in this context
+void CodeContext::Compile() {
+  // make sure the code is verified
+  if (!is_verified_) Verify();
 
+  // Print some IR stats
   if (settings::SettingsManager::GetBool(settings::SettingId::print_ir_stats)) {
     char name[] = "inst count";
     InstructionCounts inst_count(*name);
@@ -305,7 +319,7 @@ bool CodeContext::Compile() {
     inst_count.DumpStats();
   }
 
-  // Functions and module have been optimized, now JIT compile the module
+  // JIT compile the module
   engine_->finalizeObject();
 
   // Pull out the compiled function implementations
@@ -314,14 +328,34 @@ bool CodeContext::Compile() {
   }
 
   // Log the module
+  LOG_TRACE("%s\n", GetIR().c_str());
   if (settings::SettingsManager::GetBool(settings::SettingId::dump_ir)) {
     LOG_DEBUG("%s\n", GetIR().c_str());
   }
+}
 
-  // All is well
-  return true;
+size_t CodeContext::GetTypeSize(llvm::Type *type) const {
+  auto size = GetDataLayout().getTypeSizeInBits(type) / 8;
+  return size != 0 ? size : 1;
+}
+
+size_t CodeContext::GetTypeSizeInBits(llvm::Type *type) const {
+  return GetDataLayout().getTypeSizeInBits(type);
+}
+
+size_t CodeContext::GetTypeAllocSize(llvm::Type *type) const {
+  return GetDataLayout().getTypeAllocSize(type);
+}
+
+size_t CodeContext::GetTypeAllocSizeInBits(llvm::Type *type) const {
+  return GetDataLayout().getTypeAllocSizeInBits(type);
+}
+
+size_t CodeContext::GetStructElementOffset(llvm::StructType *type, size_t index) const {
+  return GetDataLayout().getStructLayout(type)->getElementOffset(index);
 }
 
+// TODO(marcel) same as LookupBuiltin?
 CodeContext::FuncPtr CodeContext::GetRawFunctionPointer(
     llvm::Function *fn) const {
   for (const auto &iter : functions_) {
@@ -334,6 +368,7 @@ CodeContext::FuncPtr CodeContext::GetRawFunctionPointer(
   return nullptr;
 }
 
+/// Get the module's layout
 const llvm::DataLayout &CodeContext::GetDataLayout() const {
   return module_->getDataLayout();
 }
diff --git a/src/codegen/codegen.cpp b/src/codegen/codegen.cpp
index b810fd4c092..9b93049cfe1 100644
--- a/src/codegen/codegen.cpp
+++ b/src/codegen/codegen.cpp
@@ -150,7 +150,7 @@ llvm::Value *CodeGen::CallFunc(llvm::Value *fn,
 
 llvm::Value *CodeGen::Printf(const std::string &format,
                              const std::vector<llvm::Value *> &args) {
-  auto *printf_fn = LookupBuiltin("printf");
+  auto *printf_fn = LookupBuiltin("printf").first;
   if (printf_fn == nullptr) {
 #if GCC_AT_LEAST_6
 // In newer GCC versions (i.e., GCC 6+), function attributes are part of the
@@ -183,7 +183,7 @@ llvm::Value *CodeGen::Printf(const std::string &format,
 llvm::Value *CodeGen::Memcmp(llvm::Value *ptr1, llvm::Value *ptr2,
                              llvm::Value *len) {
   static constexpr char kMemcmpFnName[] = "memcmp";
-  auto *memcmp_fn = LookupBuiltin(kMemcmpFnName);
+  auto *memcmp_fn = LookupBuiltin(kMemcmpFnName).first;
   if (memcmp_fn == nullptr) {
 #if GCC_AT_LEAST_6
 // In newer GCC versions (i.e., GCC 6+), function attributes are part of the
@@ -311,7 +311,7 @@ llvm::Function *CodeGen::RegisterBuiltin(const std::string &fn_name,
                                          llvm::FunctionType *fn_type,
                                          void *func_impl) {
   // Check if this is already registered as a built in, quit if to
-  auto *builtin = LookupBuiltin(fn_name);
+  auto *builtin = LookupBuiltin(fn_name).first;
   if (builtin != nullptr) {
     return builtin;
   }
@@ -332,6 +332,10 @@ llvm::Type *CodeGen::LookupType(const std::string &name) const {
   return GetModule().getTypeByName(name);
 }
 
+std::pair<llvm::Function *, CodeContext::FuncPtr> CodeGen::LookupBuiltin(const std::string &name) const {
+  return code_context_.LookupBuiltin(name);
+};
+
 llvm::Value *CodeGen::GetState() const {
   auto *func_builder = code_context_.GetCurrentFunction();
   PELOTON_ASSERT(func_builder != nullptr);
@@ -346,6 +350,20 @@ uint64_t CodeGen::SizeOf(llvm::Type *type) const {
   return size != 0 ? size : 1;
 }
 
+std::string CodeGen::Dump(const llvm::Value *value) {
+  std::string string;
+  llvm::raw_string_ostream llvm_stream(string);
+  llvm_stream << *value;
+  return llvm_stream.str();
+}
+
+std::string CodeGen::Dump(llvm::Type *type) {
+  std::string string;
+  llvm::raw_string_ostream llvm_stream(string);
+  llvm_stream << *type;
+  return llvm_stream.str();
+}
+
 uint64_t CodeGen::ElementOffset(llvm::Type *type, uint32_t element_idx) const {
   PELOTON_ASSERT(llvm::isa<llvm::StructType>(type));
   auto &data_layout = code_context_.GetDataLayout();
diff --git a/src/codegen/compilation_context.cpp b/src/codegen/compilation_context.cpp
index d7f65dafcda..82cd32b38d9 100644
--- a/src/codegen/compilation_context.cpp
+++ b/src/codegen/compilation_context.cpp
@@ -97,17 +97,13 @@ void CompilationContext::GeneratePlan(Query &query,
   }
 
   // Next, we prepare the query statement with the functions we've generated
-  Query::QueryFunctions funcs = {
-      .init_func = init, .plan_func = plan, .tear_down_func = tear_down};
-  bool prepared = query.Prepare(funcs);
-  if (!prepared) {
-    throw Exception{"There was an error preparing the compiled query"};
-  }
+  Query::LLVMFunctions funcs = {init, plan, tear_down};
+  query.Prepare(funcs);
 
   // We're done
   if (stats != nullptr) {
     timer.Stop();
-    stats->jit_ms = timer.GetDuration();
+    stats->optimize_ms = timer.GetDuration();
   }
 }
 
diff --git a/src/codegen/interpreter/bytecode_builder.cpp b/src/codegen/interpreter/bytecode_builder.cpp
new file mode 100644
index 00000000000..57295da2567
--- /dev/null
+++ b/src/codegen/interpreter/bytecode_builder.cpp
@@ -0,0 +1,1885 @@
+//===----------------------------------------------------------------------===//
+//
+//                         Peloton
+//
+// bytecode_builder.cpp
+//
+// Identification: src/codegen/interpreter/bytecode_builder.cpp
+//
+// Copyright (c) 2015-2018, Carnegie Mellon University Database Group
+//
+//===----------------------------------------------------------------------===//
+
+#include "codegen/interpreter/bytecode_builder.h"
+
+#include <llvm/IR/InstIterator.h>
+#include <fstream>
+
+#include "codegen/codegen.h"
+#include "common/exception.h"
+#include "util/math_util.h"
+
+namespace peloton {
+namespace codegen {
+namespace interpreter {
+
+BytecodeBuilder::BytecodeBuilder(const CodeContext &code_context,
+                                 const llvm::Function *function)
+    : bytecode_function_(function->getName().str()),
+      number_value_slots_(0),
+      number_temporary_value_slots_(0),
+      rpo_traversal_(function),
+      code_context_(code_context),
+      llvm_function_(function) {}
+
+BytecodeFunction BytecodeBuilder::CreateBytecodeFunction(
+    const CodeContext &code_context, const llvm::Function *function,
+    bool use_naive_register_allocator) {
+  BytecodeBuilder builder(code_context, function);
+  builder.AnalyseFunction();
+
+  if (use_naive_register_allocator) {
+    builder.PerformNaiveRegisterAllocation();
+  } else {
+    builder.PerformGreedyRegisterAllocation();
+  }
+
+  builder.TranslateFunction();
+  builder.Finalize();
+
+  return std::move(builder.bytecode_function_);
+}
+
+Opcode BytecodeBuilder::GetOpcodeForTypeAllTypes(Opcode untyped_op,
+                                                 llvm::Type *type) const {
+  index_t id = BytecodeFunction::GetOpcodeId(untyped_op);
+
+  // This function highly depends on the macros in bytecode_instructions.def!
+
+  if (type == code_context_.bool_type_ || type == code_context_.int8_type_) {
+    return BytecodeFunction::GetOpcodeFromId(id + 0);
+  } else if (type == code_context_.int16_type_) {
+    return BytecodeFunction::GetOpcodeFromId(id + 1);
+  } else if (type == code_context_.int32_type_) {
+    return BytecodeFunction::GetOpcodeFromId(id + 2);
+  } else if (type == code_context_.int64_type_ ||
+             type == code_context_.char_ptr_type_ || type->isPointerTy()) {
+    return BytecodeFunction::GetOpcodeFromId(id + 3);
+  } else if (type == code_context_.float_type_) {
+    return BytecodeFunction::GetOpcodeFromId(id + 4);
+  } else if (type == code_context_.double_type_) {
+    return BytecodeFunction::GetOpcodeFromId(id + 5);
+  } else {
+    throw NotSupportedException("llvm type not supported: " +
+                                CodeGen::Dump(type));
+  }
+}
+
+Opcode BytecodeBuilder::GetOpcodeForTypeIntTypes(Opcode untyped_op,
+                                                 llvm::Type *type) const {
+  index_t id = BytecodeFunction::GetOpcodeId(untyped_op);
+
+  // This function highly depends on the macros in bytecode_instructions.def!
+
+  if (type == code_context_.bool_type_ || type == code_context_.int8_type_) {
+    return BytecodeFunction::GetOpcodeFromId(id + 0);
+  } else if (type == code_context_.int16_type_) {
+    return BytecodeFunction::GetOpcodeFromId(id + 1);
+  } else if (type == code_context_.int32_type_) {
+    return BytecodeFunction::GetOpcodeFromId(id + 2);
+  } else if (type == code_context_.int64_type_ ||
+             type == code_context_.char_ptr_type_ || type->isPointerTy()) {
+    return BytecodeFunction::GetOpcodeFromId(id + 3);
+  } else {
+    throw NotSupportedException("llvm type not supported: " +
+                                CodeGen::Dump(type));
+  }
+}
+
+Opcode BytecodeBuilder::GetOpcodeForTypeFloatTypes(Opcode untyped_op,
+                                                   llvm::Type *type) const {
+  index_t id = BytecodeFunction::GetOpcodeId(untyped_op);
+
+  // This function highly depends on the macros in bytecode_instructions.def!
+
+  // float is missing!
+  if (type == code_context_.float_type_) {
+    return BytecodeFunction::GetOpcodeFromId(id + 0);
+  } else if (type == code_context_.double_type_) {
+    return BytecodeFunction::GetOpcodeFromId(id + 1);
+  } else {
+    throw NotSupportedException("llvm type not supported: " +
+                                CodeGen::Dump(type));
+  }
+}
+
+Opcode BytecodeBuilder::GetOpcodeForTypeSizeIntTypes(Opcode untyped_op,
+                                                     llvm::Type *type) const {
+  index_t id = BytecodeFunction::GetOpcodeId(untyped_op);
+
+  // This function highly depends on the macros in bytecode_instructions.def!
+
+  switch (code_context_.GetTypeSize(type)) {
+    case 1:
+      return BytecodeFunction::GetOpcodeFromId(id + 0);
+
+    case 2:
+      return BytecodeFunction::GetOpcodeFromId(id + 1);
+
+    case 4:
+      return BytecodeFunction::GetOpcodeFromId(id + 2);
+
+    case 8:
+      return BytecodeFunction::GetOpcodeFromId(id + 3);
+
+    default:
+      throw NotSupportedException("llvm type size not supported: " +
+                                  CodeGen::Dump(type));
+  }
+}
+
+Instruction &BytecodeBuilder::InsertBytecodeInstruction(
+    const llvm::Instruction *llvm_instruction, Opcode opcode,
+    const std::vector<index_t> &args) {
+  PELOTON_ASSERT(opcode != Opcode::undefined);
+
+  // calculate number of required instruction slots
+  // args.size() + 1 because of the Opcode
+  const size_t number_instruction_slots = MathUtil::DivRoundUp(
+      sizeof(uint16_t) * (1 + args.size()), sizeof(instr_slot_t));
+
+  bytecode_function_.bytecode_.insert(bytecode_function_.bytecode_.end(),
+                                      number_instruction_slots, 0);
+  Instruction &instruction = *reinterpret_cast<Instruction *>(
+      &*(bytecode_function_.bytecode_.end() - number_instruction_slots));
+  instruction.op = opcode;
+  for (size_t i = 0; i < args.size(); i++) instruction.args[i] = args[i];
+
+  AddInstructionToTrace(llvm_instruction, number_instruction_slots);
+
+  return instruction;
+}
+
+Instruction &BytecodeBuilder::InsertBytecodeInstruction(
+    const llvm::Instruction *llvm_instruction, Opcode opcode,
+    const std::vector<const llvm::Value *> &args) {
+  PELOTON_ASSERT(opcode != Opcode::undefined);
+
+  std::vector<index_t> args_transformed(args.size());
+  std::transform(
+      args.begin(), args.end(), args_transformed.begin(),
+      [this](const llvm::Value *value) { return GetValueSlot(value); });
+
+  return InsertBytecodeInstruction(llvm_instruction, opcode, args_transformed);
+}
+
+ExternalCallInstruction &BytecodeBuilder::InsertBytecodeExternalCallInstruction(
+    const llvm::Instruction *llvm_instruction, index_t call_context,
+    void *function) {
+  // calculate number of required instructionsslots and assert it is 2
+  // (this way we recognise if any unintended size changes)
+  const size_t number_instruction_slots = MathUtil::DivRoundUp(
+      sizeof(ExternalCallInstruction), sizeof(instr_slot_t));
+  PELOTON_ASSERT(number_instruction_slots == 2);
+
+  bytecode_function_.bytecode_.insert(bytecode_function_.bytecode_.end(),
+                                      number_instruction_slots, 0);
+
+  ExternalCallInstruction instruction = {
+      Opcode::call_external, call_context,
+      reinterpret_cast<void (*)(void)>(function)};
+
+  instr_slot_t *instruction_slot =
+      &*(bytecode_function_.bytecode_.end() - number_instruction_slots);
+  ExternalCallInstruction *call_instruction_slot =
+      reinterpret_cast<ExternalCallInstruction *>(instruction_slot);
+  *call_instruction_slot = instruction;
+
+  AddInstructionToTrace(llvm_instruction, number_instruction_slots);
+
+  return reinterpret_cast<ExternalCallInstruction &>(
+      bytecode_function_.bytecode_[bytecode_function_.bytecode_.size() -
+                                   number_instruction_slots]);
+}
+
+InternalCallInstruction &BytecodeBuilder::InsertBytecodeInternalCallInstruction(
+    const llvm::Instruction *llvm_instruction, index_t sub_function,
+    index_t dest_slot, size_t number_arguments) {
+  // calculate number of required instruction slots
+  // number_arguments + 4 because of the number of fixed arguments
+  // (see structure of InternalCallInstruction)
+  const size_t number_instruction_slots = MathUtil::DivRoundUp(
+      sizeof(uint16_t) * (4 + number_arguments), sizeof(instr_slot_t));
+
+  bytecode_function_.bytecode_.insert(bytecode_function_.bytecode_.end(),
+                                      number_instruction_slots, 0);
+  InternalCallInstruction &instruction =
+      *reinterpret_cast<InternalCallInstruction *>(
+          &*(bytecode_function_.bytecode_.end() - number_instruction_slots));
+  instruction.op = Opcode::call_internal;
+  instruction.sub_function = sub_function;
+  instruction.dest_slot = dest_slot;
+  instruction.number_args = static_cast<index_t>(number_arguments);
+
+  PELOTON_ASSERT(
+      &instruction.args[number_arguments - 1] <
+      reinterpret_cast<index_t *>(&bytecode_function_.bytecode_.back() + 1));
+
+  AddInstructionToTrace(llvm_instruction, number_instruction_slots);
+
+  return reinterpret_cast<InternalCallInstruction &>(
+      *(bytecode_function_.bytecode_.end() - number_instruction_slots));
+}
+
+#ifndef NDEBUG
+void BytecodeBuilder::AddInstructionToTrace(
+    const llvm::Instruction *llvm_instruction,
+    size_t number_instruction_slots) {
+  bytecode_function_.instruction_trace_.insert(
+      bytecode_function_.instruction_trace_.end(), number_instruction_slots,
+      llvm_instruction);
+}
+#endif
+
+BytecodeBuilder::value_index_t BytecodeBuilder::GetValueIndex(
+    const llvm::Value *value) {
+  auto result = value_mapping_.find(value);
+
+  // If the index already exists, just return it
+  if (result != value_mapping_.end()) {
+    return result->second;
+  }
+
+  // Otherwise create a new index
+
+  // Special case for constants
+  if (auto *llvm_constant = llvm::dyn_cast<llvm::Constant>(value)) {
+    return GetConstantIndex(llvm_constant);
+  }
+
+  value_index_t value_index = value_liveness_.size();
+  value_mapping_[value] = value_index;
+  value_liveness_.emplace_back(std::numeric_limits<index_t>::max(),
+                               std::numeric_limits<index_t>::max());
+  return value_index;
+}
+
+BytecodeBuilder::value_index_t BytecodeBuilder::CreateValueAlias(
+    const llvm::Value *alias, value_index_t value_index) {
+  PELOTON_ASSERT(value_mapping_.find(alias) == value_mapping_.end());
+  value_mapping_[alias] = value_index;
+
+  return value_index;
+}
+
+value_t BytecodeBuilder::GetConstantValue(
+    const llvm::Constant *constant) const {
+  llvm::Type *type = constant->getType();
+
+  if (constant->isNullValue() || constant->isZeroValue() || llvm::isa<llvm::UndefValue>(constant)) {
+    return 0;
+  } else {
+    switch (type->getTypeID()) {
+      case llvm::Type::IntegerTyID: {
+        int64_t value_signed =
+            llvm::cast<llvm::ConstantInt>(constant)->getSExtValue();
+        return *reinterpret_cast<value_t *>(&value_signed);
+      }
+
+      case llvm::Type::FloatTyID: {
+        float value_float = llvm::cast<llvm::ConstantFP>(constant)
+                                ->getValueAPF()
+                                .convertToFloat();
+        return *reinterpret_cast<value_t *>(&value_float);
+      }
+
+      case llvm::Type::DoubleTyID: {
+        double value_double = llvm::cast<llvm::ConstantFP>(constant)
+                                  ->getValueAPF()
+                                  .convertToDouble();
+
+        return *reinterpret_cast<value_t *>(&value_double);
+      }
+
+      case llvm::Type::PointerTyID: {
+        if (constant->getNumOperands() > 0) {
+          if (auto *constant_int =
+                  llvm::dyn_cast<llvm::ConstantInt>(constant->getOperand(0))) {
+            return reinterpret_cast<value_t>(constant_int->getZExtValue());
+          }
+        }
+
+        PELOTON_FALLTHROUGH;
+      }
+
+      default:
+        throw NotSupportedException("unsupported constant type: " +
+                                    CodeGen::Dump(constant->getType()));
+    }
+  }
+}
+
+BytecodeBuilder::value_index_t BytecodeBuilder::GetConstantIndex(
+    const llvm::Constant *constant) {
+  auto value_mapping_result = value_mapping_.find(constant);
+  if (value_mapping_result != value_mapping_.end()) {
+    return value_mapping_result->second;
+  }
+
+  value_t value = GetConstantValue(constant);
+  value_index_t value_index;
+
+  // We merge all constants that share the same value (not the type!)
+
+  // Check if entry with this value already exists
+  auto constant_result = std::find(bytecode_function_.constants_.begin(),
+                                   bytecode_function_.constants_.end(), value);
+
+  if (constant_result == bytecode_function_.constants_.end()) {
+    // create new constant with that value
+    value_index = value_liveness_.size();
+    value_mapping_[constant] = value_index;
+    value_liveness_.emplace_back(0, 0);  // constant liveness starts at 0
+
+    bytecode_function_.constants_.push_back(value);
+    constant_value_indexes_.push_back(value_index);
+
+    // constants liveness starts at program start
+    value_liveness_[value_index].first = 0;
+  } else {
+    // value already exists, create alias
+    auto constant_index =
+        constant_result - bytecode_function_.constants_.begin();
+    value_index = constant_value_indexes_[constant_index];
+    CreateValueAlias(constant, value_index);
+  }
+
+  return value_index;
+};
+
+index_t BytecodeBuilder::GetValueSlot(const llvm::Value *value) const {
+  auto result = value_mapping_.find(value);
+  PELOTON_ASSERT(result != value_mapping_.end());
+
+  return value_slots_[result->second];
+}
+
+void BytecodeBuilder::ExtendValueLiveness(
+    const llvm::Value *llvm_value, instruction_index_t instruction_index) {
+  value_index_t value_index = GetValueIndex(llvm_value);
+
+  // Special case if no liveness information is available yet
+  if (value_liveness_[value_index].first ==
+      std::numeric_limits<index_t>::max()) {
+    value_liveness_[value_index].first = instruction_index;
+    value_liveness_[value_index].second = instruction_index;
+    return;
+  }
+
+  if (instruction_index < value_liveness_[value_index].first) {
+    value_liveness_[value_index].first = instruction_index;
+  } else if (instruction_index > value_liveness_[value_index].second) {
+    value_liveness_[value_index].second = instruction_index;
+  }
+}
+
+index_t BytecodeBuilder::GetTemporaryValueSlot(const llvm::BasicBlock *bb) {
+  // we basically count the number of additional value slots that are
+  // requested per basic block
+
+  // new entry in map is created automatically if necessary
+  number_temporary_values_[bb]++;
+
+  number_temporary_value_slots_ =
+      std::max(number_temporary_value_slots_,
+               static_cast<size_t>(number_temporary_values_[bb]));
+  return number_value_slots_ + number_temporary_values_[bb] - 1;
+}
+
+ffi_type *BytecodeBuilder::GetFFIType(llvm::Type *type) const {
+  if (type->isVoidTy()) {
+    return &ffi_type_void;
+  } else if (type->isPointerTy()) {
+    return &ffi_type_pointer;
+  } else if (type == code_context_.double_type_) {
+    return &ffi_type_double;
+  }
+
+  // exact type not necessary, only size is important
+  switch (code_context_.GetTypeSize(type)) {
+    case 1:
+      return &ffi_type_uint8;
+
+    case 2:
+      return &ffi_type_uint16;
+
+    case 4:
+      return &ffi_type_uint32;
+
+    case 8:
+      return &ffi_type_uint64;
+
+    default:
+      throw NotSupportedException(
+          std::string("can't find a ffi_type for type: ") +
+          CodeGen::Dump(type));
+  }
+}
+
+bool BytecodeBuilder::IsConstantValue(const llvm::Value *value) const {
+  auto *constant = llvm::dyn_cast<llvm::Constant>(value);
+  return (constant != nullptr);
+}
+
+int64_t BytecodeBuilder::GetConstantIntegerValueSigned(
+    llvm::Value *constant) const {
+  return llvm::cast<llvm::ConstantInt>(constant)->getSExtValue();
+}
+
+uint64_t BytecodeBuilder::GetConstantIntegerValueUnsigned(
+    llvm::Value *constant) const {
+  return llvm::cast<llvm::ConstantInt>(constant)->getZExtValue();
+}
+
+bool BytecodeBuilder::BasicBlockIsRPOSucc(const llvm::BasicBlock *bb,
+                                          const llvm::BasicBlock *succ) const {
+  // walk the vector where we saved the basic block pointers in R
+  // reverse post order (RPO)
+  for (size_t i = 0; i < bb_reverse_post_order_.size() - 1; i++) {
+    if (bb_reverse_post_order_[i] == bb &&
+        bb_reverse_post_order_[i + 1] == succ) {
+      return true;
+    }
+  }
+
+  return false;
+}
+
+void BytecodeBuilder::AnalyseFunction() {
+  std::unordered_map<const llvm::BasicBlock *, std::pair<index_t, index_t>>
+      bb_instruction_index_range;
+
+  /* The analyse pass does:
+   * - determine the liveness of all values
+   * - merge values of instructions that translate to nop
+   * - merge constants and create list of constants
+   * - extract some additional information, e.g. for overflow aware operations
+   */
+
+  // Process function arguments
+  for (auto &argument : llvm_function_->args()) {
+    // DEF: function arguments are already defined at function start
+    ExtendValueLiveness(&argument, 0);
+  }
+
+  instruction_index_t instruction_index = 0;
+  for (llvm::ReversePostOrderTraversal<const llvm::Function *>::rpo_iterator
+           traversal_iterator = rpo_traversal_.begin();
+       traversal_iterator != rpo_traversal_.end(); ++traversal_iterator) {
+    const llvm::BasicBlock *bb = *traversal_iterator;
+
+    // Add this basic block to the rpo vector for pred/succ lookups
+    bb_reverse_post_order_.push_back(bb);
+
+    bb_instruction_index_range[bb].first = instruction_index;
+
+    // Iterate all instructions to collect the liveness information
+    // There are exceptions for several instructions,
+    // which are labeled and explained below.
+    for (llvm::BasicBlock::const_iterator instr_iterator = bb->begin();
+         instr_iterator != bb->end(); ++instr_iterator, ++instruction_index) {
+      const llvm::Instruction *instruction = instr_iterator;
+
+      bool is_non_zero_gep = false;
+      if (instruction->getOpcode() == llvm::Instruction::GetElementPtr &&
+          !llvm::cast<llvm::GetElementPtrInst>(instruction)
+               ->hasAllZeroIndices()) {
+        is_non_zero_gep = true;
+      }
+
+      // PHI-Handling:
+      // We do not process the PHI instructions directly, but at the end of a
+      // basic block, we process all PHI instructions of the successor blocks,
+      // that refer to the currect basic block. This is the position where we
+      // will insert the mov instructions when we resolve the PHIs later.
+
+      // Skip PHI instructions
+      if (instruction->getOpcode() == llvm::Instruction::PHI) {
+        continue;
+      }
+
+      // If next instruction is a terminator instruction, process
+      // PHIs of succeeding basic blocks first
+      if (llvm::isa<llvm::TerminatorInst>(instruction)) {
+        bool found_back_edge = false;
+
+        // For all successor basic blocks
+        for (auto succ_iterator = llvm::succ_begin(bb);
+             succ_iterator != llvm::succ_end(bb); ++succ_iterator) {
+          // Iterate phi instructions
+          for (llvm::BasicBlock::const_iterator instr_iterator =
+                   succ_iterator->begin();
+               auto *phi_instruction =
+                   llvm::dyn_cast<llvm::PHINode>(&*instr_iterator);
+               ++instr_iterator) {
+            // extend lifetime of phi value itself
+            ExtendValueLiveness(phi_instruction, instruction_index);
+
+            // extend lifetime of its operand
+            llvm::Value *phi_operand =
+                phi_instruction->getIncomingValueForBlock(bb);
+            // Similar to Exception 3, we extend the lifetime by one, to ensure
+            // the other phi operations do not overwrite the operand
+            ExtendValueLiveness(phi_operand, instruction_index + 1);
+          }  // end iterate phi instructions
+
+          // We also use iterating the basic block successors to find
+          // back edges. If we have seen a successor basic block before, it
+          // must be a back edge.
+          if (!found_back_edge) {
+            auto instruction_index_range =
+                bb_instruction_index_range.find(*succ_iterator);
+            if (instruction_index_range != bb_instruction_index_range.end()) {
+              index_t back_edge_instruction_index =
+                  instruction_index_range->second.first;
+
+              // For all values that are live at that time...
+              for (auto &liveness : value_liveness_) {
+                if (liveness.first < back_edge_instruction_index &&
+                    liveness.second >= back_edge_instruction_index) {
+                  // ...extend lifetime of this value to survive back edge
+                  // instruction_index + 1 is the index of the last
+                  // instruction in this basic block
+                  liveness.second = instruction_index + 1;
+                }
+              }
+
+              found_back_edge = true;
+            }
+          }
+        }  // end iterate successor basic blocks
+
+        instruction_index++;
+
+        // fall through (continue with terminator instruction)
+      }
+
+      // Exception 1: Skip the ExtractValue instructions we already
+      // processed in Exception 6
+      if (instruction->getOpcode() == llvm::Instruction::ExtractValue) {
+        auto *extractvalue_instruction =
+            llvm::cast<llvm::ExtractValueInst>(instruction);
+
+        // Check if this extract refers to a overflow call instruction
+        auto result = overflow_results_mapping_.find(
+            llvm::cast<llvm::CallInst>(instruction->getOperand(0)));
+        if (result != overflow_results_mapping_.end() &&
+            (result->second.first == extractvalue_instruction ||
+             result->second.second == extractvalue_instruction)) {
+          continue;
+        }
+
+        // fall through
+      }
+
+      // USE: Iterate operands of instruction and extend their liveness
+      for (llvm::Instruction::const_op_iterator op_iterator =
+               instruction->op_begin();
+           op_iterator != instruction->op_end(); ++op_iterator) {
+        llvm::Value *operand = op_iterator->get();
+
+        // constant operands
+        if (IsConstantValue(operand)) {
+          // Exception 2: the called function in a CallInst is also a constant
+          // but we want to skip this one
+          auto *call_instruction = llvm::dyn_cast<llvm::CallInst>(instruction);
+          if (call_instruction != nullptr &&
+              call_instruction->getCalledFunction() == &*operand) {
+            continue;
+          }
+
+          // Exception 3: constant operands from GEP and extractvalue are not
+          // needed, as they get encoded in the instruction itself
+          if (instruction->getOpcode() == llvm::Instruction::GetElementPtr ||
+              instruction->getOpcode() == llvm::Instruction::ExtractValue) {
+            continue;
+          }
+
+          // USE: extend liveness of constant value
+          ExtendValueLiveness(operand, instruction_index);
+
+          // Exception 4: We extend the lifetime of GEP operands of GEPs
+          // that don't translate to nop, by one, to make sure that the operands
+          // don't get overridden when we split the GEP into several
+          // instructions.
+        } else if (is_non_zero_gep) {
+          ExtendValueLiveness(operand, instruction_index + 1);  // extended!
+
+          // A BasicBlock may be a label operand, but we don't need to track
+          // them
+        } else if (!llvm::isa<llvm::BasicBlock>(operand)) {
+          ExtendValueLiveness(operand, instruction_index);
+        }
+      }
+
+      // Exception 5: For some instructions we know in advance that they will
+      // produce a nop, so we merge their value and their operand here
+      if (instruction->getOpcode() == llvm::Instruction::BitCast ||
+          instruction->getOpcode() == llvm::Instruction::Trunc ||
+          instruction->getOpcode() == llvm::Instruction::PtrToInt ||
+          (instruction->getOpcode() == llvm::Instruction::GetElementPtr &&
+           llvm::cast<llvm::GetElementPtrInst>(instruction)
+               ->hasAllZeroIndices())) {
+        // merge operand resulting value
+        CreateValueAlias(instruction,
+                         GetValueIndex(instruction->getOperand(0)));
+        continue;
+      }
+
+      // Exception 6: Call instructions to any overflow aware operation
+      // have to be tracked, because we save their results directly in
+      // the destination slots of the ExtractValue instructions referring
+      // to them.
+      if (instruction->getOpcode() == llvm::Instruction::Call) {
+        // Check if the call instruction calls a overflow aware operation
+        // (unfortunately there is no better way to check this)
+        auto *call_instruction = llvm::cast<llvm::CallInst>(instruction);
+        llvm::Function *function = call_instruction->getCalledFunction();
+        if (function->isDeclaration()) {
+          std::string function_name = function->getName().str();
+
+          if (function_name.size() >= 13 &&
+              function_name.substr(10, 13) == "with.overflow") {
+            // create entry for this call
+            overflow_results_mapping_[call_instruction] =
+                std::make_pair(nullptr, nullptr);
+
+            // Find the first ExtractValue instruction referring to this call
+            // instruction for result and overflow each and put it in the
+            // value_liveness vector here. The liveness of those
+            // instructions has to be extended to the definition of the call
+            // instruction, and this way we ensure that the vector is sorted
+            // by lifetime start index and we avoid sorting it later.
+            for (auto *user : call_instruction->users()) {
+              auto *extract_instruction =
+                  llvm::cast<llvm::ExtractValueInst>(user);
+              size_t extract_index = *extract_instruction->idx_begin();
+
+              if (extract_index == 0) {
+                PELOTON_ASSERT(
+                    overflow_results_mapping_[call_instruction].first ==
+                    nullptr);
+                overflow_results_mapping_[call_instruction].first =
+                    extract_instruction;
+
+              } else if (extract_index == 1) {
+                PELOTON_ASSERT(
+                    overflow_results_mapping_[call_instruction].second ==
+                    nullptr);
+                overflow_results_mapping_[call_instruction].second =
+                    extract_instruction;
+              }
+
+              ExtendValueLiveness(extract_instruction, instruction_index);
+            }
+
+            // Do not process the result of this instruction,
+            // as this value (the overflow result struct) doesn't exist
+            // later in the bytecode.
+
+            continue;
+          }
+        }
+      }
+
+      // DEF: save the instruction index as the liveness starting point
+      if (!instruction->getType()->isVoidTy()) {
+        ExtendValueLiveness(instruction, instruction_index);
+      }
+    }
+
+    bb_instruction_index_range[bb].second = instruction_index - 1;
+  }
+}
+
+void BytecodeBuilder::PerformNaiveRegisterAllocation() {
+  // assign a value slot to every liveness range in value_liveness_
+  value_slots_.resize(value_liveness_.size(), 0);
+  index_t reg = 0;
+
+  // process constants
+  for (auto &constant_value_index : constant_value_indexes_) {
+    value_slots_[constant_value_index] = reg++ + 1;
+  }
+
+  // process function arguments
+  for (auto &argument : llvm_function_->args()) {
+    value_index_t argument_value_index = GetValueIndex(&argument);
+    value_slots_[argument_value_index] = reg++ + 1;
+  }
+
+  // iterate over other entries, which are already sorted
+  for (value_index_t i = 0; i < value_liveness_.size(); ++i) {
+    // skip values that are never used (get assigned to dummy slot)
+    if (value_liveness_[i].first == value_liveness_[i].second) {
+      continue;
+    }
+
+    // some values (constants, function arguments) are processed already
+    if (value_slots_[i] == 0) {
+      value_slots_[i] = reg++ + 1;  // + 1 because 0 is dummy slot
+    }
+  }
+
+  number_value_slots_ = reg + 1;
+}
+
+void BytecodeBuilder::PerformGreedyRegisterAllocation() {
+  // assign a value slot to every liveness range in value_liveness_
+
+  value_slots_.resize(value_liveness_.size(), 0);
+  std::vector<ValueLiveness> registers(constant_value_indexes_.size() +
+                                       llvm_function_->arg_size());
+  index_t reg = 0;
+
+  auto findEmptyRegister = [&registers](ValueLiveness liveness) {
+    for (index_t i = 0; i < registers.size(); ++i) {
+      if (registers[i].second <= liveness.first) {
+        registers[i] = liveness;
+        return i;
+      }
+    }
+
+    // no empty register found, create new one
+    registers.push_back(liveness);
+    return static_cast<index_t>(registers.size() - 1);
+  };
+
+  // process constants
+  for (auto &constant_value_index : constant_value_indexes_) {
+    registers[reg] = value_liveness_[constant_value_index];
+    value_slots_[constant_value_index] =
+        reg++ + 1;  // + 1 because 0 is dummy slot
+  }
+
+  // process function arguments
+  for (auto &argument : llvm_function_->args()) {
+    value_index_t argument_value_index = GetValueIndex(&argument);
+    registers[reg] = value_liveness_[argument_value_index];
+    value_slots_[argument_value_index] =
+        reg++ + 1;  // + 1 because 0 is dummy slot
+  }
+
+  PELOTON_ASSERT(registers.size() == reg);
+
+// The vector value_liveness_ is already sorted by lifetime start index
+// except for the constant values, which are already processed
+
+#ifndef NDEBUG
+  // additional check in debug mode, to ensure that our assertion that the
+  // vector is already sorted by lifetime start index (except zero) is correct
+  instruction_index_t instruction_index = 1;
+
+  for (value_index_t i = 0; i < value_liveness_.size(); ++i) {
+    if (value_liveness_[i].first != 0) {
+      PELOTON_ASSERT(value_liveness_[i].first >= instruction_index);
+      instruction_index = value_liveness_[i].first;
+    }
+  }
+#endif
+
+  // iterate over other entries, which are already sorted
+  for (value_index_t i = 0; i < value_liveness_.size(); ++i) {
+    // skip values that are never used
+    if (value_liveness_[i].first == value_liveness_[i].second) {
+      continue;
+    }
+
+    if (value_slots_[i] == 0) {
+      value_slots_[i] = findEmptyRegister(value_liveness_[i]) +
+                        1;  // + 1 because 0 is dummy slot
+    }
+  }
+
+  number_value_slots_ = registers.size() + 1;  // + 1 because 0 is dummy slot
+}
+
+void BytecodeBuilder::TranslateFunction() {
+  // Map every basic block an index in the resulting bytecode stream. This
+  // is needed to perform the relocations in the branch instructions.
+  std::unordered_map<const llvm::BasicBlock *, index_t> bb_mapping;
+
+  // Collect all bytecode relocations that have to be performed after
+  // translation, when the mapping information in bb_mapping is complete.
+  std::vector<BytecodeRelocation> bytecode_relocations;
+
+  // Iterate the basic blocks in reverse post order (RPO)
+  // Linear scan register allocation requires RPO traversal
+  // Initializing the RPO traversal is expensice, so we initialize it once
+  // for the BytecodeBuilder object and reuse it.
+  for (llvm::ReversePostOrderTraversal<const llvm::Function *>::rpo_iterator
+           traversal_iterator = rpo_traversal_.begin();
+       traversal_iterator != rpo_traversal_.end(); ++traversal_iterator) {
+    const llvm::BasicBlock *bb = *traversal_iterator;
+
+    // add basic block mapping
+    bb_mapping[bb] = bytecode_function_.bytecode_.size();
+
+    // Interate all instruction in the basic block
+    for (llvm::BasicBlock::const_iterator instr_iterator = bb->begin();
+         instr_iterator != bb->end(); ++instr_iterator) {
+      const llvm::Instruction *instruction = instr_iterator;
+
+      // Dispatch to the respective translator function
+      switch (instruction->getOpcode()) {
+        // Terminators
+        case llvm::Instruction::Br:
+          ProcessPHIsForBasicBlock(bb);
+          TranslateBranch(instruction, bytecode_relocations);
+          break;
+
+        case llvm::Instruction::Ret:
+          ProcessPHIsForBasicBlock(bb);
+          TranslateReturn(instruction);
+          break;
+
+        // Standard binary operators
+        // Logical operators
+        case llvm::Instruction::Add:
+        case llvm::Instruction::Sub:
+        case llvm::Instruction::Mul:
+        case llvm::Instruction::UDiv:
+        case llvm::Instruction::SDiv:
+        case llvm::Instruction::URem:
+        case llvm::Instruction::SRem:
+        case llvm::Instruction::Shl:
+        case llvm::Instruction::LShr:
+        case llvm::Instruction::And:
+        case llvm::Instruction::Or:
+        case llvm::Instruction::Xor:
+        case llvm::Instruction::AShr:
+        case llvm::Instruction::FAdd:
+        case llvm::Instruction::FSub:
+        case llvm::Instruction::FMul:
+        case llvm::Instruction::FDiv:
+        case llvm::Instruction::FRem:
+          TranslateBinaryOperator(instruction);
+          break;
+
+        // Memory instructions
+        case llvm::Instruction::Load:
+          TranslateLoad(instruction);
+          break;
+
+        case llvm::Instruction::Store:
+          TranslateStore(instruction);
+          break;
+
+        case llvm::Instruction::Alloca:
+          TranslateAlloca(instruction);
+          break;
+
+        case llvm::Instruction::GetElementPtr:
+          TranslateGetElementPtr(instruction);
+          break;
+
+        // Cast instructions
+        case llvm::Instruction::BitCast:
+          // bit casts translate to nop
+          // values got already merged in analysis pass
+          break;
+
+        case llvm::Instruction::SExt:
+        case llvm::Instruction::ZExt:
+        case llvm::Instruction::IntToPtr:
+          TranslateIntExt(instruction);
+          break;
+
+        case llvm::Instruction::Trunc:
+        case llvm::Instruction::PtrToInt:
+          // trunc translates to nop
+          // values got already merged in analysis pass
+          break;
+        case llvm::Instruction::FPExt:
+        case llvm::Instruction::FPTrunc:
+          TranslateFloatTruncExt(instruction);
+
+        case llvm::Instruction::UIToFP:
+        case llvm::Instruction::SIToFP:
+        case llvm::Instruction::FPToUI:
+        case llvm::Instruction::FPToSI:
+          TranslateFloatIntCast(instruction);
+          break;
+
+        // Other instructions
+        case llvm::Instruction::ICmp:
+        case llvm::Instruction::FCmp:
+          TranslateCmp(instruction);
+          break;
+
+        case llvm::Instruction::PHI:
+          // PHIs are handled before every terminating instruction
+          break;
+
+        case llvm::Instruction::Call:
+          TranslateCall(instruction);
+          break;
+
+        case llvm::Instruction::Select:
+          TranslateSelect(instruction);
+          break;
+
+        case llvm::Instruction::ExtractValue:
+          TranslateExtractValue(instruction);
+          break;
+
+        case llvm::Instruction::Unreachable:
+          // nop
+          break;
+
+        // Instruction is not supported
+        default: { throw NotSupportedException("instruction not supported"); }
+      }
+    }
+  }
+
+  // apply the relocations required by the placed branch instructions
+  for (auto &relocation : bytecode_relocations) {
+    reinterpret_cast<Instruction *>(
+        &bytecode_function_.bytecode_[relocation.instruction_slot])
+        ->args[relocation.argument] = bb_mapping[relocation.bb];
+  }
+}
+
+void BytecodeBuilder::Finalize() {
+  // calculate final number of value slots during runtime
+  bytecode_function_.number_values_ =
+      number_value_slots_ + number_temporary_value_slots_;
+
+  // check if number values exceeds bit range (unrealistic)
+  if (bytecode_function_.number_values_ >=
+      std::numeric_limits<index_t>::max()) {
+    throw NotSupportedException("number of values exceeds max number of bits");
+  }
+
+  // prepare arguments
+  bytecode_function_.number_function_arguments_ = llvm_function_->arg_size();
+}
+
+void BytecodeBuilder::ProcessPHIsForBasicBlock(const llvm::BasicBlock *bb) {
+  struct AdditionalMove {
+    const llvm::Instruction *instruction;
+    index_t dest;
+    index_t src;
+  };
+
+  // Takes track of additional moves (du to PHI swap problem) that have to be
+  // applied after all PHI nodes have been processed.
+  std::vector<AdditionalMove> additional_moves;
+
+  for (auto succ_iterator = llvm::succ_begin(bb);
+       succ_iterator != llvm::succ_end(bb); ++succ_iterator) {
+    // If the basic block is its own successor, we take risk to run into the PHI
+    // swap problem (lost copy problem). To avoid this, we move the values in
+    // temporary registers and move them to their destination after processing
+    // all other PHI nodes.
+    if (*succ_iterator == bb) {
+      for (auto instruction_iterator = succ_iterator->begin();
+           auto *phi_node =
+               llvm::dyn_cast<llvm::PHINode>(&*instruction_iterator);
+           ++instruction_iterator) {
+        index_t temp_slot = GetTemporaryValueSlot(bb);
+
+        InsertBytecodeInstruction(
+            phi_node, Opcode::phi_mov,
+            {temp_slot, GetValueSlot(phi_node->getIncomingValueForBlock(bb))});
+        additional_moves.push_back(
+            {phi_node, GetValueSlot(phi_node), temp_slot});
+      }
+
+      // Common case: create mov instruction to destination slot
+    } else {
+      for (auto instruction_iterator = succ_iterator->begin();
+           auto *phi_node =
+               llvm::dyn_cast<llvm::PHINode>(&*instruction_iterator);
+           ++instruction_iterator) {
+        if (GetValueSlot(phi_node) ==
+            GetValueSlot(phi_node->getIncomingValueForBlock(bb))) {
+          continue;
+        }
+
+        InsertBytecodeInstruction(
+            phi_node, Opcode::phi_mov,
+            {phi_node, phi_node->getIncomingValueForBlock(bb)});
+      }
+    }
+  }
+
+  // Place additional moves if needed
+  for (auto &entry : additional_moves) {
+    InsertBytecodeInstruction(entry.instruction, Opcode::phi_mov,
+                              {entry.dest, entry.src});
+  }
+}
+
+void BytecodeBuilder::TranslateBranch(
+    const llvm::Instruction *instruction,
+    std::vector<BytecodeRelocation> &bytecode_relocations) {
+  auto *branch_instruction = llvm::cast<llvm::BranchInst>(&*instruction);
+
+  // conditional branch
+  if (branch_instruction->isConditional()) {
+    // The first operand in the IR is the false branch, while the second one
+    // is the true one (printed llvm assembly is the other way round).
+    // To be consistent, we use the order of the memory representation
+    // in our bytecode.
+
+    // If false branch is next basic block, we can use a fall through branch
+    if (BasicBlockIsRPOSucc(
+            branch_instruction->getParent(),
+            llvm::cast<llvm::BasicBlock>(branch_instruction->getOperand(1)))) {
+      InsertBytecodeInstruction(
+          instruction, Opcode::branch_cond_ft,
+          std::vector<index_t>{GetValueSlot(branch_instruction->getOperand(0)),
+                               0});
+
+      BytecodeRelocation relocation_false{
+          static_cast<index_t>(bytecode_function_.bytecode_.size() - 1), 1,
+          llvm::cast<llvm::BasicBlock>(branch_instruction->getOperand(2))};
+
+      // add relocation entry, to insert missing information of destination
+      // later
+      bytecode_relocations.push_back(relocation_false);
+
+      // no fall through
+    } else {
+      InsertBytecodeInstruction(
+          instruction, Opcode::branch_cond,
+          {GetValueSlot(branch_instruction->getOperand(0)), 0, 0});
+
+      BytecodeRelocation relocation_false{
+          static_cast<index_t>(bytecode_function_.bytecode_.size() - 1), 1,
+          llvm::cast<llvm::BasicBlock>(branch_instruction->getOperand(1))};
+
+      // add relocation entry, to insert missing information of destination
+      // later
+      bytecode_relocations.push_back(relocation_false);
+
+      BytecodeRelocation relocation_true{
+          static_cast<index_t>(bytecode_function_.bytecode_.size() - 1), 2,
+          llvm::cast<llvm::BasicBlock>(branch_instruction->getOperand(2))};
+
+      // add relocation entry, to insert missing information of destination
+      // later
+      bytecode_relocations.push_back(relocation_true);
+    }
+
+    // unconditional branch
+  } else {
+    // If the unconditional branch points to the next basic block,
+    // we can omit the branch instruction
+    if (!BasicBlockIsRPOSucc(
+            branch_instruction->getParent(),
+            llvm::cast<llvm::BasicBlock>(branch_instruction->getOperand(0)))) {
+      InsertBytecodeInstruction(instruction, Opcode::branch_uncond,
+                                std::vector<index_t>{0});
+
+      BytecodeRelocation relocation{
+          static_cast<index_t>(bytecode_function_.bytecode_.size() - 1), 0,
+          llvm::cast<llvm::BasicBlock>(branch_instruction->getOperand(0))};
+
+      // add relocation entry, to insert missing information of destination
+      // later
+      bytecode_relocations.push_back(relocation);
+    }
+  }
+}
+
+void BytecodeBuilder::TranslateReturn(const llvm::Instruction *instruction) {
+  auto *return_instruction = llvm::cast<llvm::ReturnInst>(&*instruction);
+
+  // We only have one ret bytecode instruction. If the function returns void,
+  // the instruction will return the value of the dummy value slot zero,
+  // but no one will every pick up that value.
+
+  index_t return_slot = 0;
+  if (return_instruction->getNumOperands() > 0) {
+    return_slot = GetValueSlot(return_instruction->getOperand(0));
+  }
+
+  InsertBytecodeInstruction(instruction, Opcode::ret,
+                            std::vector<index_t>{return_slot});
+}
+
+void BytecodeBuilder::TranslateBinaryOperator(
+    const llvm::Instruction *instruction) {
+  auto *binary_operator = llvm::cast<llvm::BinaryOperator>(&*instruction);
+  auto *type = binary_operator->getType();
+  Opcode opcode;
+
+  switch (binary_operator->getOpcode()) {
+    case llvm::Instruction::Add:
+    case llvm::Instruction::FAdd:
+      opcode = GetOpcodeForTypeAllTypes(GET_FIRST_ALL_TYPES(Opcode::add), type);
+      break;
+
+    case llvm::Instruction::Sub:
+    case llvm::Instruction::FSub:
+      opcode = GetOpcodeForTypeAllTypes(GET_FIRST_ALL_TYPES(Opcode::sub), type);
+      break;
+
+    case llvm::Instruction::Mul:
+    case llvm::Instruction::FMul:
+      opcode = GetOpcodeForTypeAllTypes(GET_FIRST_ALL_TYPES(Opcode::mul), type);
+      break;
+
+    case llvm::Instruction::UDiv:
+    case llvm::Instruction::FDiv:
+      opcode = GetOpcodeForTypeAllTypes(GET_FIRST_ALL_TYPES(Opcode::div), type);
+      break;
+
+    case llvm::Instruction::SDiv:
+      opcode =
+          GetOpcodeForTypeIntTypes(GET_FIRST_INT_TYPES(Opcode::sdiv), type);
+      break;
+
+    case llvm::Instruction::URem:
+      opcode =
+          GetOpcodeForTypeIntTypes(GET_FIRST_INT_TYPES(Opcode::urem), type);
+      break;
+
+    case llvm::Instruction::FRem:
+      opcode =
+          GetOpcodeForTypeFloatTypes(GET_FIRST_FLOAT_TYPES(Opcode::frem), type);
+      break;
+
+    case llvm::Instruction::SRem:
+      opcode =
+          GetOpcodeForTypeIntTypes(GET_FIRST_INT_TYPES(Opcode::srem), type);
+      break;
+
+    case llvm::Instruction::Shl:
+      opcode = GetOpcodeForTypeIntTypes(GET_FIRST_INT_TYPES(Opcode::shl), type);
+      break;
+
+    case llvm::Instruction::LShr:
+      opcode =
+          GetOpcodeForTypeIntTypes(GET_FIRST_INT_TYPES(Opcode::lshr), type);
+      break;
+
+    case llvm::Instruction::AShr:
+      opcode =
+          GetOpcodeForTypeIntTypes(GET_FIRST_INT_TYPES(Opcode::ashr), type);
+      break;
+
+    case llvm::Instruction::And:
+      opcode = GetOpcodeForTypeIntTypes(GET_FIRST_INT_TYPES(Opcode::and), type);
+      break;
+
+    case llvm::Instruction::Or:
+      opcode = GetOpcodeForTypeIntTypes(GET_FIRST_INT_TYPES(Opcode:: or), type);
+      break;
+
+    case llvm::Instruction::Xor:
+      opcode =
+          GetOpcodeForTypeIntTypes(GET_FIRST_INT_TYPES(Opcode:: xor), type);
+      break;
+
+    default:
+      throw NotSupportedException("binary operation not supported");
+  }
+
+  InsertBytecodeInstruction(instruction, opcode,
+                            {binary_operator, binary_operator->getOperand(0),
+                             binary_operator->getOperand(1)});
+}
+
+void BytecodeBuilder::TranslateAlloca(const llvm::Instruction *instruction) {
+  auto *alloca_instruction = llvm::cast<llvm::AllocaInst>(&*instruction);
+  Opcode opcode;
+
+  // get type to allocate
+  llvm::Type *type = alloca_instruction->getAllocatedType();
+
+  // get type size in bytes
+  size_t type_size = code_context_.GetTypeSize(type);
+
+  if (alloca_instruction->isArrayAllocation()) {
+    index_t array_size = GetValueSlot(alloca_instruction->getArraySize());
+    opcode =
+        GetOpcodeForTypeIntTypes(GET_FIRST_INT_TYPES(Opcode::alloca_array),
+                                 alloca_instruction->getArraySize()->getType());
+
+    // type size is immediate value!
+    InsertBytecodeInstruction(instruction, opcode,
+                              {GetValueSlot(alloca_instruction),
+                               static_cast<index_t>(type_size), array_size});
+  } else {
+    opcode = Opcode::alloca;
+    // type size is immediate value!
+    InsertBytecodeInstruction(
+        instruction, opcode,
+        {GetValueSlot(alloca_instruction), static_cast<index_t>(type_size)});
+  }
+}
+
+void BytecodeBuilder::TranslateLoad(const llvm::Instruction *instruction) {
+  auto *load_instruction = llvm::cast<llvm::LoadInst>(&*instruction);
+
+  Opcode opcode = GetOpcodeForTypeSizeIntTypes(
+      GET_FIRST_INT_TYPES(Opcode::load), load_instruction->getType());
+  InsertBytecodeInstruction(
+      instruction, opcode,
+      {load_instruction, load_instruction->getPointerOperand()});
+}
+
+void BytecodeBuilder::TranslateStore(const llvm::Instruction *instruction) {
+  auto *store_instruction = llvm::cast<llvm::StoreInst>(&*instruction);
+
+  Opcode opcode =
+      GetOpcodeForTypeSizeIntTypes(GET_FIRST_INT_TYPES(Opcode::store),
+                                   store_instruction->getOperand(0)->getType());
+  InsertBytecodeInstruction(
+      instruction, opcode,
+      std::vector<const llvm::Value *>{store_instruction->getPointerOperand(),
+                                       store_instruction->getValueOperand()});
+}
+
+void BytecodeBuilder::TranslateGetElementPtr(
+    const llvm::Instruction *instruction) {
+  auto *gep_instruction = llvm::cast<llvm::GetElementPtrInst>(&*instruction);
+  int64_t overall_offset = 0;
+
+  // If the GEP translates to a nop, the values have been already merged
+  // during the analysis pass
+  if (gep_instruction->hasAllZeroIndices()) {
+    return;
+  }
+
+  // The offset is an immediate constant, not a slot index
+  // instruction is created here, but offset will be filled in later,
+  // because we may merge it with constant array accesses
+  auto &gep_offset_bytecode_instruction_ref = InsertBytecodeInstruction(
+      gep_instruction, Opcode::gep_offset,
+      {GetValueSlot(gep_instruction),
+       GetValueSlot(gep_instruction->getPointerOperand()), 0});
+  size_t gep_offset_bytecode_instruction_index =
+      bytecode_function_.GetIndexFromIP(&gep_offset_bytecode_instruction_ref);
+
+  // First index operand of the instruction is the array index for the
+  // source type
+
+  // Get type of struct/array which will be processed
+  llvm::Type *type = gep_instruction->getSourceElementType();
+
+  if (IsConstantValue(gep_instruction->getOperand(1))) {
+    overall_offset +=
+        code_context_.GetTypeSize(type) *
+        GetConstantIntegerValueSigned(gep_instruction->getOperand(1));
+  } else {
+    index_t index = GetValueSlot(instruction->getOperand(1));
+    Opcode opcode =
+        GetOpcodeForTypeIntTypes(GET_FIRST_INT_TYPES(Opcode::gep_array),
+                                 instruction->getOperand(1)->getType());
+
+    // size of array element is an immediate constant, not a slot index!
+    InsertBytecodeInstruction(
+        gep_instruction, opcode,
+        {GetValueSlot(gep_instruction), index,
+         static_cast<index_t>(code_context_.GetTypeSize(type))});
+  }
+
+  // Iterate remaining Indexes
+  for (unsigned int operand_index = 2;
+       operand_index < instruction->getNumOperands(); ++operand_index) {
+    auto *operand = instruction->getOperand(operand_index);
+
+    if (auto *array_type = llvm::dyn_cast<llvm::ArrayType>(type)) {
+      if (IsConstantValue(operand)) {
+        overall_offset +=
+            code_context_.GetTypeSize(array_type->getElementType()) *
+            GetConstantIntegerValueSigned(operand);
+      } else {
+        index_t index = GetValueSlot(operand);
+        Opcode opcode = GetOpcodeForTypeIntTypes(
+            GET_FIRST_INT_TYPES(Opcode::gep_array), operand->getType());
+
+        // size of array element is an immediate constant, not a slot index!
+        InsertBytecodeInstruction(
+            gep_instruction, opcode,
+            {GetValueSlot(gep_instruction), index,
+             static_cast<index_t>(
+                 code_context_.GetTypeSize(array_type->getElementType()))});
+      }
+
+      // get inner type for next iteration
+      type = array_type->getElementType();
+
+    } else if (auto *struct_type = llvm::dyn_cast<llvm::StructType>(type)) {
+      uint64_t index = GetConstantIntegerValueUnsigned(operand);
+      PELOTON_ASSERT(index < struct_type->getNumElements());
+
+      // get element offset
+      overall_offset += code_context_.GetStructElementOffset(struct_type, index);
+
+      // get inner type for next iteration
+      type = struct_type->getElementType(index);
+
+    } else {
+      throw NotSupportedException(
+          "unexpected type in getelementptr instruction");
+    }
+  }
+
+  // make sure that resulting type is correct
+  PELOTON_ASSERT(type == gep_instruction->getResultElementType());
+
+  // fill in calculated overall offset in previously placed gep_offset
+  // bytecode instruction
+  // (use index instead of reference, as vector may has been relocated!)
+  reinterpret_cast<Instruction *>(
+      &bytecode_function_.bytecode_[gep_offset_bytecode_instruction_index])
+      ->args[2] = static_cast<index_t>(overall_offset);
+}
+
+void BytecodeBuilder::TranslateFloatIntCast(
+    const llvm::Instruction *instruction) {
+  auto *cast_instruction = llvm::dyn_cast<llvm::CastInst>(&*instruction);
+
+  // These instruction basically exist from every integer type to every
+  // floating point type and the other way round.
+  // We can only expand instructions in one dimension, so we expand the
+  // integer dimension and create the floating point instances manually
+  // (float and double)
+
+  Opcode opcode = Opcode::undefined;
+
+  if (instruction->getOpcode() == llvm::Instruction::FPToSI) {
+    if (cast_instruction->getOperand(0)->getType() ==
+        code_context_.float_type_) {
+      opcode = GetOpcodeForTypeIntTypes(GET_FIRST_INT_TYPES(Opcode::floattosi),
+                                        cast_instruction->getType());
+    } else if (cast_instruction->getOperand(0)->getType() ==
+               code_context_.double_type_) {
+      opcode = GetOpcodeForTypeIntTypes(GET_FIRST_INT_TYPES(Opcode::doubletosi),
+                                        cast_instruction->getType());
+    } else {
+      throw NotSupportedException("unsupported cast instruction");
+    }
+
+  } else if (instruction->getOpcode() == llvm::Instruction::FPToUI) {
+    if (cast_instruction->getOperand(0)->getType() ==
+        code_context_.float_type_) {
+      opcode = GetOpcodeForTypeIntTypes(GET_FIRST_INT_TYPES(Opcode::floattoui),
+                                        cast_instruction->getType());
+    } else if (cast_instruction->getOperand(0)->getType() ==
+               code_context_.double_type_) {
+      opcode = GetOpcodeForTypeIntTypes(GET_FIRST_INT_TYPES(Opcode::doubletoui),
+                                        cast_instruction->getType());
+    } else {
+      throw NotSupportedException("unsupported cast instruction");
+    }
+
+  } else if (instruction->getOpcode() == llvm::Instruction::SIToFP) {
+    if (cast_instruction->getType() == code_context_.float_type_) {
+      opcode =
+          GetOpcodeForTypeIntTypes(GET_FIRST_INT_TYPES(Opcode::sitofloat),
+                                   cast_instruction->getOperand(0)->getType());
+    } else if (cast_instruction->getType() == code_context_.double_type_) {
+      opcode =
+          GetOpcodeForTypeIntTypes(GET_FIRST_INT_TYPES(Opcode::sitodouble),
+                                   cast_instruction->getOperand(0)->getType());
+    } else {
+      throw NotSupportedException("unsupported cast instruction");
+    }
+
+  } else if (instruction->getOpcode() == llvm::Instruction::UIToFP) {
+    if (cast_instruction->getType() == code_context_.float_type_) {
+      opcode =
+          GetOpcodeForTypeIntTypes(GET_FIRST_INT_TYPES(Opcode::uitofloat),
+                                   cast_instruction->getOperand(0)->getType());
+    } else if (cast_instruction->getType() == code_context_.double_type_) {
+      opcode =
+          GetOpcodeForTypeIntTypes(GET_FIRST_INT_TYPES(Opcode::uitodouble),
+                                   cast_instruction->getOperand(0)->getType());
+    } else {
+      throw NotSupportedException("unsupported cast instruction");
+    }
+
+  } else {
+    throw NotSupportedException("unsupported cast instruction");
+  }
+
+  InsertBytecodeInstruction(
+      cast_instruction, opcode,
+      {cast_instruction, cast_instruction->getOperand(0)});
+}
+
+void BytecodeBuilder::TranslateIntExt(const llvm::Instruction *instruction) {
+  auto *cast_instruction = llvm::dyn_cast<llvm::CastInst>(&*instruction);
+
+  size_t src_type_size =
+      code_context_.GetTypeSize(cast_instruction->getSrcTy());
+  size_t dest_type_size =
+      code_context_.GetTypeSize(cast_instruction->getDestTy());
+
+  if (src_type_size == dest_type_size) {
+    if (GetValueSlot(instruction) != GetValueSlot(instruction->getOperand(0)))
+      InsertBytecodeInstruction(instruction, Opcode::nop_mov,
+                                {instruction, instruction->getOperand(0)});
+    return;
+  }
+
+  Opcode opcode = Opcode::undefined;
+
+  if (instruction->getOpcode() == llvm::Instruction::SExt) {
+    if (src_type_size == 1 && dest_type_size == 2) {
+      opcode = Opcode::sext_i8_i16;
+
+    } else if (src_type_size == 1 && dest_type_size == 4) {
+      opcode = Opcode::sext_i8_i32;
+
+    } else if (src_type_size == 1 && dest_type_size == 8) {
+      opcode = Opcode::sext_i8_i64;
+
+    } else if (src_type_size == 2 && dest_type_size == 4) {
+      opcode = Opcode::sext_i16_i32;
+
+    } else if (src_type_size == 2 && dest_type_size == 8) {
+      opcode = Opcode::sext_i16_i64;
+
+    } else if (src_type_size == 4 && dest_type_size == 8) {
+      opcode = Opcode::sext_i32_i64;
+
+    } else {
+      throw NotSupportedException("unsupported sext instruction");
+    }
+
+  } else if (instruction->getOpcode() == llvm::Instruction::ZExt ||
+             instruction->getOpcode() == llvm::Instruction::IntToPtr) {
+    if (src_type_size == 1 && dest_type_size == 2) {
+      opcode = Opcode::zext_i8_i16;
+
+    } else if (src_type_size == 1 && dest_type_size == 4) {
+      opcode = Opcode::zext_i8_i32;
+
+    } else if (src_type_size == 1 && dest_type_size == 8) {
+      opcode = Opcode::zext_i8_i64;
+
+    } else if (src_type_size == 2 && dest_type_size == 4) {
+      opcode = Opcode::zext_i16_i32;
+
+    } else if (src_type_size == 2 && dest_type_size == 8) {
+      opcode = Opcode::zext_i16_i64;
+
+    } else if (src_type_size == 4 && dest_type_size == 8) {
+      opcode = Opcode::zext_i32_i64;
+
+    } else {
+      throw NotSupportedException("unsupported zext instruction");
+    }
+
+  } else {
+    throw NotSupportedException("unexpected ext instruction");
+  }
+
+  InsertBytecodeInstruction(
+      cast_instruction, opcode,
+      {cast_instruction, cast_instruction->getOperand(0)});
+}
+
+void BytecodeBuilder::TranslateFloatTruncExt(
+    const llvm::Instruction *instruction) {
+  auto *cast_instruction = llvm::dyn_cast<llvm::CastInst>(&*instruction);
+
+  auto src_type = cast_instruction->getSrcTy();
+  auto dest_type = cast_instruction->getDestTy();
+
+  if (src_type == dest_type) {
+    if (GetValueSlot(instruction) != GetValueSlot(instruction->getOperand(0))) {
+      InsertBytecodeInstruction(instruction, Opcode::nop_mov,
+                                {instruction, instruction->getOperand(0)});
+    }
+    return;
+  }
+
+  if (src_type == code_context_.double_type_ &&
+      dest_type == code_context_.float_type_) {
+    InsertBytecodeInstruction(
+        cast_instruction, Opcode::doubletofloat,
+        {cast_instruction, cast_instruction->getOperand(0)});
+  } else if (src_type == code_context_.float_type_ &&
+             dest_type == code_context_.double_type_) {
+    InsertBytecodeInstruction(
+        cast_instruction, Opcode::floattodouble,
+        {cast_instruction, cast_instruction->getOperand(0)});
+  } else {
+    throw NotSupportedException("unsupported FPTrunc/PFExt instruction");
+  }
+}
+
+void BytecodeBuilder::TranslateCmp(const llvm::Instruction *instruction) {
+  auto *cmp_instruction = llvm::cast<llvm::CmpInst>(&*instruction);
+  auto *type = cmp_instruction->getOperand(0)->getType();
+  Opcode opcode = Opcode::undefined;
+
+  switch (cmp_instruction->getPredicate()) {
+    case llvm::CmpInst::Predicate::ICMP_EQ:
+    case llvm::CmpInst::Predicate::FCMP_OEQ:
+    case llvm::CmpInst::Predicate::FCMP_UEQ:
+      opcode =
+          GetOpcodeForTypeAllTypes(GET_FIRST_ALL_TYPES(Opcode::cmp_eq), type);
+      break;
+
+    case llvm::CmpInst::Predicate::ICMP_NE:
+    case llvm::CmpInst::Predicate::FCMP_ONE:
+    case llvm::CmpInst::Predicate::FCMP_UNE:
+      opcode =
+          GetOpcodeForTypeAllTypes(GET_FIRST_ALL_TYPES(Opcode::cmp_ne), type);
+      break;
+
+    case llvm::CmpInst::Predicate::ICMP_UGT:
+    case llvm::CmpInst::Predicate::FCMP_OGT:
+    case llvm::CmpInst::Predicate::FCMP_UGT:
+      opcode =
+          GetOpcodeForTypeAllTypes(GET_FIRST_ALL_TYPES(Opcode::cmp_gt), type);
+      break;
+
+    case llvm::CmpInst::Predicate::ICMP_UGE:
+    case llvm::CmpInst::Predicate::FCMP_OGE:
+    case llvm::CmpInst::Predicate::FCMP_UGE:
+      opcode =
+          GetOpcodeForTypeAllTypes(GET_FIRST_ALL_TYPES(Opcode::cmp_ge), type);
+      break;
+
+    case llvm::CmpInst::Predicate::ICMP_ULT:
+    case llvm::CmpInst::Predicate::FCMP_OLT:
+    case llvm::CmpInst::Predicate::FCMP_ULT:
+      opcode =
+          GetOpcodeForTypeAllTypes(GET_FIRST_ALL_TYPES(Opcode::cmp_lt), type);
+      break;
+
+    case llvm::CmpInst::Predicate::ICMP_ULE:
+    case llvm::CmpInst::Predicate::FCMP_OLE:
+    case llvm::CmpInst::Predicate::FCMP_ULE:
+      opcode =
+          GetOpcodeForTypeAllTypes(GET_FIRST_ALL_TYPES(Opcode::cmp_le), type);
+      break;
+
+    case llvm::CmpInst::Predicate::ICMP_SGT:
+      opcode =
+          GetOpcodeForTypeIntTypes(GET_FIRST_INT_TYPES(Opcode::cmp_sgt), type);
+      break;
+
+    case llvm::CmpInst::Predicate::ICMP_SGE:
+      opcode =
+          GetOpcodeForTypeIntTypes(GET_FIRST_INT_TYPES(Opcode::cmp_sge), type);
+      break;
+
+    case llvm::CmpInst::Predicate::ICMP_SLT:
+      opcode =
+          GetOpcodeForTypeIntTypes(GET_FIRST_INT_TYPES(Opcode::cmp_slt), type);
+      break;
+
+    case llvm::CmpInst::Predicate::ICMP_SLE:
+      opcode =
+          GetOpcodeForTypeIntTypes(GET_FIRST_INT_TYPES(Opcode::cmp_sle), type);
+      break;
+
+    default:
+      throw NotSupportedException("compare operand not supported");
+  }
+
+  InsertBytecodeInstruction(cmp_instruction, opcode,
+                            {cmp_instruction, cmp_instruction->getOperand(0),
+                             cmp_instruction->getOperand(1)});
+}
+
+void BytecodeBuilder::TranslateCall(const llvm::Instruction *instruction) {
+  auto *call_instruction = llvm::cast<llvm::CallInst>(&*instruction);
+
+  llvm::Function *function = call_instruction->getCalledFunction();
+
+  if (function->isDeclaration()) {
+    // The only way to find out about the called function (even if its an
+    // intrinsic) is to check the function name string
+    std::string function_name = function->getName().str();
+
+    if (function_name.find("llvm.memcpy") == 0) {
+      if (call_instruction->getOperand(2)->getType() !=
+          code_context_.int64_type_) {
+        throw NotSupportedException(
+            "memcpy with different size type than i64 not supported");
+      }
+
+      InsertBytecodeInstruction(
+          call_instruction, Opcode::llvm_memcpy,
+          {call_instruction->getOperand(0), call_instruction->getOperand(1),
+           call_instruction->getOperand(2)});
+
+    } else if (function_name.find("llvm.memmove") == 0) {
+      if (call_instruction->getOperand(2)->getType() !=
+          code_context_.int64_type_)
+        throw NotSupportedException(
+            "memmove with different size type than i64 not supported");
+
+      InsertBytecodeInstruction(
+          call_instruction, Opcode::llvm_memmove,
+          {call_instruction->getOperand(0), call_instruction->getOperand(1),
+           call_instruction->getOperand(2)});
+
+    } else if (function_name.find("llvm.memset") == 0) {
+      if (call_instruction->getOperand(2)->getType() !=
+          code_context_.int64_type_)
+        throw NotSupportedException(
+            "memset with different size type than i64 not supported");
+
+      InsertBytecodeInstruction(
+          call_instruction, Opcode::llvm_memset,
+          {call_instruction->getOperand(0), call_instruction->getOperand(1),
+           call_instruction->getOperand(2)});
+
+    } else if (function_name.find("with.overflow") == 10) {
+      index_t result = 0;
+      index_t overflow = 0;
+      auto *type = call_instruction->getOperand(0)->getType();
+      Opcode opcode = Opcode::undefined;
+
+      // The destination slots have been already prepared from the analysis pass
+      PELOTON_ASSERT(overflow_results_mapping_.find(call_instruction) !=
+                     overflow_results_mapping_.end());
+
+      if (overflow_results_mapping_[call_instruction].first != nullptr) {
+        result =
+            GetValueSlot(overflow_results_mapping_[call_instruction].first);
+      }
+
+      if (overflow_results_mapping_[call_instruction].second != nullptr) {
+        overflow =
+            GetValueSlot(overflow_results_mapping_[call_instruction].second);
+      }
+
+      if (function_name.substr(5, 4) == "uadd") {
+        opcode = GetOpcodeForTypeIntTypes(
+            GET_FIRST_INT_TYPES(Opcode::llvm_uadd_overflow), type);
+      } else if (function_name.substr(5, 4) == "sadd") {
+        opcode = GetOpcodeForTypeIntTypes(
+            GET_FIRST_INT_TYPES(Opcode::llvm_sadd_overflow), type);
+      } else if (function_name.substr(5, 4) == "usub") {
+        opcode = GetOpcodeForTypeIntTypes(
+            GET_FIRST_INT_TYPES(Opcode::llvm_usub_overflow), type);
+      } else if (function_name.substr(5, 4) == "ssub") {
+        opcode = GetOpcodeForTypeIntTypes(
+            GET_FIRST_INT_TYPES(Opcode::llvm_ssub_overflow), type);
+      } else if (function_name.substr(5, 4) == "umul") {
+        opcode = GetOpcodeForTypeIntTypes(
+            GET_FIRST_INT_TYPES(Opcode::llvm_umul_overflow), type);
+      } else if (function_name.substr(5, 4) == "smul") {
+        opcode = GetOpcodeForTypeIntTypes(
+            GET_FIRST_INT_TYPES(Opcode::llvm_smul_overflow), type);
+      } else {
+        throw NotSupportedException(
+            "the requested operation with overflow is not supported");
+      }
+
+      InsertBytecodeInstruction(
+          call_instruction, opcode,
+          {result, overflow, GetValueSlot(call_instruction->getOperand(0)),
+           GetValueSlot(call_instruction->getOperand(1))});
+
+    } else if (function_name.find("llvm.x86.sse42.crc32") == 0) {
+      if (call_instruction->getType() != code_context_.int64_type_) {
+        throw NotSupportedException(
+            "sse42.crc32 with different size type than i64 not supported");
+      }
+
+      InsertBytecodeInstruction(
+          call_instruction, Opcode::llvm_sse42_crc32,
+          {call_instruction, call_instruction->getOperand(0),
+           call_instruction->getOperand(1)});
+
+    } else {
+      Opcode opcode =
+          BytecodeFunction::GetExplicitCallOpcodeByString(function_name);
+
+      // call explicit instantiation of this function if available
+      if (opcode != Opcode::undefined) {
+        std::vector<const llvm::Value *> args;
+        args.reserve(call_instruction->getNumArgOperands());
+
+        if (!instruction->getType()->isVoidTy()) {
+          args.push_back(call_instruction);
+        }
+
+        for (unsigned int i = 0; i < call_instruction->getNumArgOperands();
+             i++) {
+          args.push_back(call_instruction->getArgOperand(i));
+        }
+
+        InsertBytecodeInstruction(call_instruction, opcode, args);
+
+      } else {
+        // Function is not available in IR context, so we have to make an
+        // external function call
+
+        // lookup function pointer in code context
+        void *raw_pointer = code_context_.LookupBuiltin(function_name).second;
+
+        if (raw_pointer == nullptr) {
+          throw NotSupportedException("could not find external function: " +
+                                      function_name);
+        }
+
+        // libffi is used for external function calls
+        // Here we collect all the information that will be needed at runtime
+        // (function activation time) to create the libffi call interface.
+
+        // Show a hint, that an explicit wrapper could be created for this
+        // function
+        LOG_DEBUG("The interpreter will call the C++ function '%s' per libffi. "
+                  "Consider adding an explicit wrapper for this function in "
+                  "bytecode_instructions.def\n", function_name.c_str());
+
+        index_t dest_slot = 0;
+        if (!instruction->getType()->isVoidTy()) {
+          dest_slot = GetValueSlot(call_instruction);
+        }
+
+        size_t arguments_num = call_instruction->getNumArgOperands();
+        ExternalCallContext call_context{
+            dest_slot, GetFFIType(instruction->getType()),
+            std::vector<index_t>(arguments_num),
+            std::vector<ffi_type *>(arguments_num)};
+
+        for (unsigned int i = 0; i < call_instruction->getNumArgOperands();
+             i++) {
+          call_context.args[i] =
+              GetValueSlot(call_instruction->getArgOperand(i));
+          call_context.arg_types[i] =
+              GetFFIType(call_instruction->getArgOperand(i)->getType());
+        }
+
+        // add call context to bytecode function
+        bytecode_function_.external_call_contexts_.push_back(call_context);
+
+        // insert bytecode instruction referring to this call context
+        InsertBytecodeExternalCallInstruction(
+            call_instruction,
+            static_cast<index_t>(
+                bytecode_function_.external_call_contexts_.size() - 1),
+            raw_pointer);
+      }
+    }
+  } else {
+    // Internal function call to another IR function in this code context
+
+    index_t dest_slot = 0;
+    if (!instruction->getType()->isVoidTy()) {
+      dest_slot = GetValueSlot(call_instruction);
+    }
+
+    // Translate the bytecode function we want to call
+    index_t sub_function_index;
+    const auto result = sub_function_mapping_.find(function);
+    if (result != sub_function_mapping_.end()) {
+      sub_function_index = result->second;
+    } else {
+      auto sub_function =
+          BytecodeBuilder::CreateBytecodeFunction(code_context_, function);
+
+      bytecode_function_.sub_functions_.push_back(std::move(sub_function));
+      sub_function_index = bytecode_function_.sub_functions_.size() - 1;
+      sub_function_mapping_[function] = sub_function_index;
+    }
+
+    InternalCallInstruction &bytecode_instruction =
+        InsertBytecodeInternalCallInstruction(
+            call_instruction, sub_function_index, dest_slot,
+            call_instruction->getNumArgOperands());
+
+    for (unsigned int i = 0; i < call_instruction->getNumArgOperands(); i++) {
+      bytecode_instruction.args[i] =
+          GetValueSlot(call_instruction->getArgOperand(i));
+
+      // just to make sure, we check that no function argument is bigger
+      // than 8 Bytes
+      if (code_context_.GetTypeSize(
+              call_instruction->getArgOperand(i)->getType()) > 8) {
+        throw NotSupportedException("argument for internal call too big");
+      }
+    }
+  }
+}
+
+void BytecodeBuilder::TranslateSelect(const llvm::Instruction *instruction) {
+  auto *select_instruction = llvm::cast<llvm::SelectInst>(&*instruction);
+
+  InsertBytecodeInstruction(
+      select_instruction, Opcode::select,
+      {select_instruction, select_instruction->getCondition(),
+       select_instruction->getTrueValue(),
+       select_instruction->getFalseValue()});
+}
+
+void BytecodeBuilder::TranslateExtractValue(
+    const llvm::Instruction *instruction) {
+  auto *extract_instruction = llvm::cast<llvm::ExtractValueInst>(&*instruction);
+
+  // Skip, if this ExtractValue instruction belongs to an overflow operation
+  auto call_result = overflow_results_mapping_.find(
+      llvm::cast<llvm::CallInst>(instruction->getOperand(0)));
+  if (call_result != overflow_results_mapping_.end()) {
+    return;
+  }
+
+  // Get value type
+  llvm::Type *type = extract_instruction->getAggregateOperand()->getType();
+  size_t offset_bits = 0;
+
+  // make sure the result type fits in a value_t
+  if (code_context_.GetTypeSize(instruction->getType()) <= sizeof(value_t)) {
+    throw NotSupportedException("extracted value too big for register size");
+  }
+
+  // Iterate indexes
+  for (auto index_it = extract_instruction->idx_begin(),
+            index_end = extract_instruction->idx_end();
+       index_it != index_end; index_it++) {
+    uint32_t index = *index_it;
+
+    if (auto *array_type = llvm::dyn_cast<llvm::ArrayType>(type)) {
+      // Advance offset
+      offset_bits +=
+          code_context_.GetTypeAllocSizeInBits(array_type->getElementType()) *
+          index;
+
+      // get inner type for next iteration
+      type = array_type->getElementType();
+    } else if (auto *struct_type = llvm::dyn_cast<llvm::StructType>(type)) {
+      PELOTON_ASSERT(index < struct_type->getNumElements());
+
+      // get element offset
+      offset_bits += code_context_.GetStructElementOffset(struct_type, index) * 8;
+
+      // get inner type for next iteration
+      type = struct_type->getElementType(index);
+    } else {
+      throw NotSupportedException(
+          "unexpected type in extractvalue instruction");
+    }
+  }
+
+  // assure that resulting type is correct
+  PELOTON_ASSERT(type == extract_instruction->getType());
+
+  // number if bits to shift is an immediate value!
+  InsertBytecodeInstruction(
+      extract_instruction, Opcode::extractvalue,
+      {GetValueSlot(extract_instruction),
+       GetValueSlot(extract_instruction->getAggregateOperand()),
+       static_cast<index_t>(offset_bits)});
+}
+
+}  // namespace interpreter
+}  // namespace codegen
+}  // namespace peloton
diff --git a/src/codegen/interpreter/bytecode_function.cpp b/src/codegen/interpreter/bytecode_function.cpp
new file mode 100644
index 00000000000..2e29fe47e55
--- /dev/null
+++ b/src/codegen/interpreter/bytecode_function.cpp
@@ -0,0 +1,302 @@
+//===----------------------------------------------------------------------===//
+//
+//                         Peloton
+//
+// bytecode_function.cpp
+//
+// Identification: src/codegen/interpreter/bytecode_function.cpp
+//
+// Copyright (c) 2015-2018, Carnegie Mellon University Database Group
+//
+//===----------------------------------------------------------------------===//
+
+#include "codegen/interpreter/bytecode_function.h"
+
+#include <fstream>
+#include <iomanip>
+#include <sstream>
+#include <string>
+
+#include "codegen/codegen.h"
+
+// Includes for explicit function calls
+#include "codegen/bloom_filter_accessor.h"
+#include "codegen/util/bloom_filter.h"
+#include "codegen/buffering_consumer.h"
+#include "codegen/deleter.h"
+#include "codegen/inserter.h"
+#include "codegen/query_parameters.h"
+#include "codegen/runtime_functions.h"
+#include "codegen/transaction_runtime.h"
+#include "codegen/updater.h"
+#include "codegen/util/oa_hash_table.h"
+#include "codegen/util/hash_table.h"
+#include "codegen/util/sorter.h"
+#include "codegen/values_runtime.h"
+#include "executor/executor_context.h"
+#include "function/date_functions.h"
+#include "function/numeric_functions.h"
+#include "function/string_functions.h"
+#include "function/timestamp_functions.h"
+#include "planner/project_info.h"
+#include "storage/data_table.h"
+#include "storage/storage_manager.h"
+#include "storage/tile_group.h"
+#include "storage/zone_map_manager.h"
+#include "codegen/util/buffer.h"
+
+namespace peloton {
+namespace codegen {
+namespace interpreter {
+
+/**
+ * This lambda function serves as an init function to fill the const mapping
+ * of function names to opcodes.
+ */
+const std::unordered_map<std::string, Opcode>
+    BytecodeFunction::explicit_call_opcode_mapping_ = []() {
+      std::unordered_map<std::string, Opcode> mapping;
+
+#define HANDLE_INST(op)
+#define HANDLE_EXPLICIT_CALL_INST(op, func) mapping[#func] = Opcode::op;
+
+#include "codegen/interpreter/bytecode_instructions.def"
+
+      return mapping;
+    }();
+
+const char *BytecodeFunction::GetOpcodeString(Opcode opcode) {
+  switch (opcode) {
+#define HANDLE_INST(opcode) \
+  case Opcode::opcode:      \
+    return #opcode;
+
+#include "codegen/interpreter/bytecode_instructions.def"
+
+    default:
+      return "(invalid)";
+  }
+}
+
+#ifndef NDEBUG
+const llvm::Instruction *BytecodeFunction::GetIRInstructionFromIP(
+    index_t instr_slot) const {
+  return instruction_trace_.at(instr_slot);
+}
+#endif
+
+size_t BytecodeFunction::GetInstructionSlotSize(
+    const Instruction *instruction) {
+  switch (instruction->op) {
+#define HANDLE_INST(op) \
+  case Opcode::op:      \
+    return 1;
+#define HANDLE_EXTERNAL_CALL_INST(op) \
+  case Opcode::op:                    \
+    return 2;
+#define HANDLE_INTERNAL_CALL_INST(op)         \
+  case Opcode::op:                            \
+    return GetInteralCallInstructionSlotSize( \
+        reinterpret_cast<const InternalCallInstruction *>(instruction));
+#define HANDLE_SELECT_INST(op) \
+  case Opcode::op:             \
+    return 2;
+#define HANDLE_OVERFLOW_TYPED_INST(op, type) \
+  case Opcode::op##_##type:                  \
+    return 2;
+#define HANDLE_EXPLICIT_CALL_INST(op, func)    \
+  case Opcode::op:                             \
+    return GetExplicitCallInstructionSlotSize( \
+        GetFunctionRequiredArgSlotsNum(&func));
+
+#include "codegen/interpreter/bytecode_instructions.def"
+
+    default:
+      PELOTON_ASSERT(false);
+      return 0;
+  }
+}
+
+Opcode BytecodeFunction::GetExplicitCallOpcodeByString(
+    std::string function_name) {
+  auto result = explicit_call_opcode_mapping_.find(function_name);
+
+  if (result != explicit_call_opcode_mapping_.end())
+    return result->second;
+  else
+    return Opcode::undefined;
+}
+
+void BytecodeFunction::DumpContents() const {
+  std::ofstream output;
+  output.open(function_name_ + ".bf");
+
+#ifndef NDEBUG
+  const llvm::BasicBlock *bb;
+#endif
+
+  // Print Bytecode
+  output << "Bytecode:" << std::endl;
+  for (index_t i = 0; i < bytecode_.size();) {
+    auto *instruction = GetIPFromIndex(i);
+
+#ifndef NDEBUG
+    const llvm::Instruction *llvm_instruction = GetIRInstructionFromIP(i);
+    if (llvm_instruction->getOpcode() != llvm::Instruction::PHI) {
+      if (i > 0 && bb != llvm_instruction->getParent()) {
+        output << llvm_instruction->getParent()->getName().str() << ":"
+               << std::endl;
+      }
+      bb = llvm_instruction->getParent();
+    }
+#endif
+
+    output << Dump(instruction) << std::endl;
+    i += GetInstructionSlotSize(instruction);
+  }
+
+  // Print Constants
+  if (constants_.size() > 0) output << "Constants:" << std::endl;
+  for (size_t i = 0; i < constants_.size(); i++) {
+    output << "[" << std::setw(3) << std::dec << (i + 1)
+           << "] = " << *reinterpret_cast<const int64_t *>(&constants_[i])
+           << " 0x" << std::hex << constants_[i] << std::endl;
+  }
+
+  output << std::endl;
+
+  output.close();
+}
+
+std::string BytecodeFunction::Dump(const Instruction *instruction) const {
+  std::ostringstream output;
+  output << "[" << std::setw(3) << GetIndexFromIP(instruction) << "] ";
+  output << std::setw(18) << GetOpcodeString(instruction->op) << " ";
+
+  switch (instruction->op) {
+#define HANDLE_INST(opcode)                                        \
+  case Opcode::opcode:                                             \
+    output << "[" << std::setw(3) << instruction->args[0] << "] "; \
+    output << "[" << std::setw(3) << instruction->args[1] << "] "; \
+    output << "[" << std::setw(3) << instruction->args[2] << "] "; \
+    break;
+
+#ifndef NDEBUG
+#define HANDLE_EXTERNAL_CALL_INST(opcode)                                      \
+  case Opcode::opcode:                                                         \
+    output                                                                     \
+        << "[" << std::setw(3)                                                 \
+        << external_call_contexts_                                             \
+               [reinterpret_cast<const ExternalCallInstruction *>(instruction) \
+                    ->external_call_context]                                   \
+                   .dest_slot                                                  \
+        << "] ";                                                               \
+    for (auto arg : external_call_contexts_[instruction->args[0]].args) {      \
+      output << "[" << std::setw(3) << arg << "] ";                            \
+    }                                                                          \
+    output << "("                                                              \
+           << static_cast<const llvm::CallInst *>(                             \
+                  instruction_trace_[GetIndexFromIP(instruction)])             \
+                  ->getCalledFunction()                                        \
+                  ->getName()                                                  \
+                  .str()                                                       \
+           << ") ";                                                            \
+    break;
+#else
+#define HANDLE_CALL_INST(opcode)                                        \
+  case Opcode::opcode:                                                  \
+    output << "[" << std::setw(3)                                       \
+           << call_contexts_[reinterpret_cast<const CallInstruction *>( \
+                                 instruction)                           \
+                                 ->call_context]                        \
+                  .dest_slot                                            \
+           << "] ";                                                     \
+    for (auto arg : call_contexts_[instruction->args[0]].args) {        \
+      output << "[" << std::setw(3) << arg << "] ";                     \
+    }                                                                   \
+    break;
+#endif
+
+#ifndef NDEBUG
+#define HANDLE_INTERNAL_CALL_INST(opcode)                                      \
+  case Opcode::opcode:                                                         \
+    output << "[" << std::setw(3)                                              \
+           << reinterpret_cast<const InternalCallInstruction *>(instruction)   \
+                  ->dest_slot                                                  \
+           << "] ";                                                            \
+    for (size_t i = 0;                                                         \
+         i < reinterpret_cast<const InternalCallInstruction *>(instruction)    \
+                 ->number_args;                                                \
+         i++) {                                                                \
+      output << "[" << std::setw(3)                                            \
+             << reinterpret_cast<const InternalCallInstruction *>(instruction) \
+                    ->args[i]                                                  \
+             << "] ";                                                          \
+    }                                                                          \
+    output << "("                                                              \
+           << static_cast<const llvm::CallInst *>(                             \
+                  instruction_trace_[GetIndexFromIP(instruction)])             \
+                  ->getCalledFunction()                                        \
+                  ->getName()                                                  \
+                  .str()                                                       \
+           << ") ";                                                            \
+    break;
+#else
+#define HANDLE_INTERNAL_CALL_INST(opcode)                                      \
+  case Opcode::opcode:                                                         \
+    output << "[" << std::setw(3)                                              \
+           << reinterpret_cast<const InternalCallInstruction *>(instruction)   \
+                  ->dest_slot                                                  \
+           << "] ";                                                            \
+    for (size_t i = 0;                                                         \
+         i < reinterpret_cast<const InternalCallInstruction *>(instruction)    \
+                 ->number_args;                                                \
+         i++) {                                                                \
+      output << "[" << std::setw(3)                                            \
+             << reinterpret_cast<const InternalCallInstruction *>(instruction) \
+                    ->args[i]                                                  \
+             << "] ";                                                          \
+    }                                                                          \
+    break;
+#endif
+
+#define HANDLE_SELECT_INST(opcode)                                 \
+  case Opcode::opcode:                                             \
+    output << "[" << std::setw(3) << instruction->args[0] << "] "; \
+    output << "[" << std::setw(3) << instruction->args[1] << "] "; \
+    output << "[" << std::setw(3) << instruction->args[2] << "] "; \
+    output << "[" << std::setw(3) << instruction->args[3] << "] "; \
+    break;
+
+#define HANDLE_OVERFLOW_TYPED_INST(op, type)                       \
+  case Opcode::op##_##type:                                        \
+    output << "[" << std::setw(3) << instruction->args[0] << "] "; \
+    output << "[" << std::setw(3) << instruction->args[1] << "] "; \
+    output << "[" << std::setw(3) << instruction->args[2] << "] "; \
+    output << "[" << std::setw(3) << instruction->args[3] << "] "; \
+    break;
+
+#define HANDLE_EXPLICIT_CALL_INST(opcode, func)                        \
+  case Opcode::opcode:                                                 \
+    for (size_t i = 0; i < GetFunctionRequiredArgSlotsNum(&func); i++) \
+      output << "[" << std::setw(3) << instruction->args[i] << "] ";   \
+    break;
+
+#include "codegen/interpreter/bytecode_instructions.def"
+
+    default:
+      break;
+  }
+
+#ifndef NDEBUG
+  output << "("
+         << CodeGen::Dump(GetIRInstructionFromIP(GetIndexFromIP(instruction)))
+         << ")";
+#endif
+
+  return output.str();
+}
+
+}  // namespace interpreter
+}  // namespace codegen
+}  // namespace peloton
diff --git a/src/codegen/interpreter/bytecode_interpreter.cpp b/src/codegen/interpreter/bytecode_interpreter.cpp
new file mode 100644
index 00000000000..ccd51f495d8
--- /dev/null
+++ b/src/codegen/interpreter/bytecode_interpreter.cpp
@@ -0,0 +1,190 @@
+//===----------------------------------------------------------------------===//
+//
+//                         Peloton
+//
+// bytecode_interpreter.cpp
+//
+// Identification: src/codegen/interpreter/bytecode_interpreter.cpp
+//
+// Copyright (c) 2015-2018, Carnegie Mellon University Database Group
+//
+//===----------------------------------------------------------------------===//
+
+#include "codegen/interpreter/bytecode_interpreter.h"
+#include "codegen/interpreter/bytecode_function.h"
+
+namespace peloton {
+namespace codegen {
+namespace interpreter {
+
+/** This is the actual dispatch code: It lookups the destination handler address
+ *  in the label_pointers_ array and performs a direct jump there.
+ */
+#define INTERPRETER_DISPATCH_GOTO(ip)                   \
+  goto *(label_pointers_[BytecodeFunction::GetOpcodeId( \
+      reinterpret_cast<const Instruction *>(ip)->op)])
+
+/**
+ * The array with the label pointers has to be zero initialized to make sure,
+ * that we fill it with the actual values on the first execution.
+ */
+void *
+    BytecodeInterpreter::label_pointers_[BytecodeFunction::GetNumberOpcodes()] =
+        {nullptr};
+
+BytecodeInterpreter::BytecodeInterpreter(
+    const BytecodeFunction &bytecode_function)
+    : bytecode_function_(bytecode_function) {}
+
+value_t BytecodeInterpreter::ExecuteFunction(
+    const BytecodeFunction &bytecode_function,
+    const std::vector<value_t> &arguments) {
+  BytecodeInterpreter interpreter(bytecode_function);
+  interpreter.ExecuteFunction(arguments);
+
+  return interpreter.GetReturnValue<value_t>();
+}
+
+void BytecodeInterpreter::ExecuteFunction(
+    const BytecodeFunction &bytecode_function, char *param) {
+  BytecodeInterpreter interpreter(bytecode_function);
+  interpreter.ExecuteFunction({reinterpret_cast<value_t &>(param)});
+}
+
+NEVER_INLINE NO_CLONE void BytecodeInterpreter::ExecuteFunction(
+    const std::vector<value_t> &arguments) {
+  // Fill the label_pointers_ array with the handler addresses at first
+  // startup. (This can't be done outside of this function, as the labels are
+  // not visible there.
+  if (label_pointers_[0] == nullptr) {
+#define HANDLE_INST(op) \
+  label_pointers_[BytecodeFunction::GetOpcodeId(Opcode::op)] = &&_##op;
+
+#include "codegen/interpreter/bytecode_instructions.def"
+  }
+
+  InitializeActivationRecord(arguments);
+
+  // Get initial instruction pointer
+  const Instruction *bytecode =
+      reinterpret_cast<const Instruction *>(&bytecode_function_.bytecode_[0]);
+  const Instruction *ip = bytecode;
+
+  // Start execution with first instruction
+  INTERPRETER_DISPATCH_GOTO(ip);
+
+//--------------------------------------------------------------------------//
+//                             Dispatch area
+//
+// This is the actual dispatch area of the interpreter. Because we use
+// threaded interpretation, this is not a dispatch loop, but a long list of
+// labels, and the control flow jumps from one handler to the next with
+// goto's -> INTERPRETER_DISPATCH_GOTO(ip)
+//
+// The whole dispatch area gets generated using the bytecode_instructions.def
+// file. All instruction handlers from query_interpreter.h will get inlined
+// here for all their types. Even though the function looks small here,
+// it will be over 13kB in the resulting binary!
+//--------------------------------------------------------------------------//
+
+#ifdef LOG_TRACE_ENABLED
+#define TRACE_CODE_PRE LOG_TRACE("%s", bytecode_function_.Dump(ip).c_str())
+#else
+#define TRACE_CODE_PRE
+#endif
+
+#define HANDLE_RET_INST(op)                                       \
+  _ret:                                                           \
+  TRACE_CODE_PRE;                                                 \
+  GetValueReference<value_t>(0) = GetValue<value_t>(ip->args[0]); \
+  return;
+
+#define HANDLE_TYPED_INST(op, type) \
+  _##op##_##type : TRACE_CODE_PRE;  \
+  ip = op##Handler<type>(ip);       \
+  INTERPRETER_DISPATCH_GOTO(ip);
+
+#define HANDLE_INST(op)   \
+  _##op : TRACE_CODE_PRE; \
+  ip = op##Handler(ip);   \
+  INTERPRETER_DISPATCH_GOTO(ip);
+
+#define HANDLE_EXPLICIT_CALL_INST(op, func) \
+  _##op : TRACE_CODE_PRE;                   \
+  ip = explicit_callHandler(ip, &func);     \
+  INTERPRETER_DISPATCH_GOTO(ip);
+
+#include "codegen/interpreter/bytecode_instructions.def"
+
+  //--------------------------------------------------------------------------//
+}
+
+template <typename type_t>
+type_t BytecodeInterpreter::GetReturnValue() {
+  // the ret instruction saves the return value in value slot 0 by definition
+  return GetValue<type_t>(0);
+}
+
+void BytecodeInterpreter::InitializeActivationRecord(
+    const std::vector<value_t> &arguments) {
+  // resize vector to required number of value slots
+  values_.resize(bytecode_function_.number_values_);
+
+  index_t value_slot = 1;
+
+  // fill in constants
+  for (auto &constant : bytecode_function_.constants_) {
+    SetValue<value_t>(value_slot++, constant);
+  }
+
+  // check if provided number of arguments matches the number required by
+  // the function
+  if (bytecode_function_.number_function_arguments_ != arguments.size()) {
+    throw Exception(
+        "llvm function called through interpreter with wrong number of "
+        "arguments");
+  }
+
+  // fill in function arguments
+  for (auto &argument : arguments) {
+    SetValue<value_t>(value_slot++, argument);
+  }
+
+  // prepare call activations
+  call_activations_.resize(bytecode_function_.external_call_contexts_.size());
+  for (size_t i = 0; i < bytecode_function_.external_call_contexts_.size();
+       i++) {
+    auto &call_context = bytecode_function_.external_call_contexts_[i];
+    auto &call_activation = call_activations_[i];
+
+    // initialize libffi call interface
+    if (ffi_prep_cif(&call_activation.call_interface, FFI_DEFAULT_ABI,
+                     call_context.args.size(), call_context.dest_type,
+                     const_cast<ffi_type **>(call_context.arg_types.data())) !=
+        FFI_OK) {
+      throw Exception("initializing ffi call interface failed ");
+    }
+
+    // save the pointers to the value slots in the continuous arrays
+    for (const auto &arg : call_context.args) {
+      call_activation.value_pointers.push_back(&values_[arg]);
+    }
+    call_activation.return_pointer = &values_[call_context.dest_slot];
+  }
+}
+
+uintptr_t BytecodeInterpreter::AllocateMemory(size_t number_bytes) {
+  // allocate memory
+  std::unique_ptr<char[]> pointer =
+      std::unique_ptr<char[]>(new char[number_bytes]);
+
+  // get raw pointer before moving pointer object!
+  auto raw_pointer = reinterpret_cast<uintptr_t>(pointer.get());
+
+  allocations_.emplace_back(std::move(pointer));
+  return raw_pointer;
+}
+
+}  // namespace interpreter
+}  // namespace codegen
+}  // namespace peloton
\ No newline at end of file
diff --git a/src/codegen/query.cpp b/src/codegen/query.cpp
index 87ed5ab572d..c69601b3b7f 100644
--- a/src/codegen/query.cpp
+++ b/src/codegen/query.cpp
@@ -11,11 +11,15 @@
 //===----------------------------------------------------------------------===//
 
 #include "codegen/query.h"
-#include "codegen/execution_consumer.h"
+#include "codegen/interpreter/bytecode_builder.h"
+#include "codegen/interpreter/bytecode_interpreter.h"
+#include "codegen/query_compiler.h"
 #include "common/timer.h"
 #include "executor/plan_executor.h"
+#include "codegen/execution_consumer.h"
 #include "executor/executor_context.h"
 #include "storage/storage_manager.h"
+#include "settings/settings_manager.h"
 
 namespace peloton {
 namespace codegen {
@@ -31,36 +35,103 @@ void Query::Execute(executor::ExecutorContext &executor_context,
   llvm::Type *query_state_type = query_state_.GetType();
   size_t parameter_size = codegen.SizeOf(query_state_type);
   PELOTON_ASSERT((parameter_size % 8 == 0) &&
-                 "parameter size not multiple of 8");
+      "parameter size not multiple of 8");
 
   // Allocate some space for the function arguments
   std::unique_ptr<char[]> param_data{new char[parameter_size]};
   char *param = param_data.get();
   PELOTON_MEMSET(param, 0, parameter_size);
 
-  // We use this handy class to avoid complex casting and pointer manipulation
-  struct FunctionArguments {
-    executor::ExecutorContext *executor_context;
-    char *consumer_arg;
-    char rest[0];
-  } PACKED;
-
   // Set up the function arguments
   auto *func_args = reinterpret_cast<FunctionArguments *>(param_data.get());
   func_args->executor_context = &executor_context;
   func_args->consumer_arg = consumer.GetConsumerState();
 
+  bool force_interpreter = settings::SettingsManager::GetBool(
+      settings::SettingId::codegen_interpreter);
+
+  if (is_compiled_ && !force_interpreter) {
+    ExecuteNative(func_args, stats);
+  } else {
+    try {
+      ExecuteInterpreter(func_args, stats);
+    } catch (interpreter::NotSupportedException e) {
+      LOG_ERROR("query not supported by interpreter: %s", e.what());
+    }
+  }
+}
+
+void Query::Prepare(const LLVMFunctions &query_funcs) {
+  llvm_functions_ = query_funcs;
+
+  // verify the functions
+  // will also be done by Optimize() or Compile() if not done before,
+  // but we do not want to mix up the timings, so do it here
+  code_context_.Verify();
+
+  // optimize the functions
+  // TODO(marcel): add switch to enable/disable optimization
+  // TODO(marcel): add timer to measure time used for optimization (see
+  // RuntimeStats)
+  code_context_.Optimize();
+
+  is_compiled_ = false;
+}
+
+void Query::Compile(CompileStats *stats) {
   // Timer
   Timer<std::milli> timer;
-  timer.Start();
+  if (stats != nullptr) {
+    timer.Start();
+  }
+
+  // Compile all functions in context
+  LOG_TRACE("Starting Query compilation ...");
+  code_context_.Compile();
+
+  // Get pointers to the JITed functions
+  compiled_functions_.init_func =
+      (compiled_function_t)code_context_.GetRawFunctionPointer(
+          llvm_functions_.init_func);
+  PELOTON_ASSERT(compiled_functions_.init_func != nullptr);
+
+  compiled_functions_.plan_func =
+      (compiled_function_t)code_context_.GetRawFunctionPointer(
+          llvm_functions_.plan_func);
+  PELOTON_ASSERT(compiled_functions_.plan_func != nullptr);
+
+  compiled_functions_.tear_down_func =
+      (compiled_function_t)code_context_.GetRawFunctionPointer(
+          llvm_functions_.tear_down_func);
+  PELOTON_ASSERT(compiled_functions_.tear_down_func != nullptr);
+
+  is_compiled_ = true;
+
+  LOG_TRACE("Compilation finished.");
+
+  // Timer for JIT compilation
+  if (stats != nullptr) {
+    timer.Stop();
+    stats->compile_ms = timer.GetDuration();
+    timer.Reset();
+  }
+}
+
+void Query::ExecuteNative(FunctionArguments *function_arguments,
+                          RuntimeStats *stats) {
+  // Start timer
+  Timer<std::milli> timer;
+  if (stats != nullptr) {
+    timer.Start();
+  }
 
   // Call init
   LOG_TRACE("Calling query's init() ...");
   try {
-    init_func_(param);
+    compiled_functions_.init_func(function_arguments);
   } catch (...) {
     // Cleanup if an exception is encountered
-    tear_down_func_(param);
+    compiled_functions_.tear_down_func(function_arguments);
     throw;
   }
 
@@ -75,10 +146,10 @@ void Query::Execute(executor::ExecutorContext &executor_context,
   // Execute the query!
   LOG_TRACE("Calling query's plan() ...");
   try {
-    plan_func_(param);
+    compiled_functions_.plan_func(function_arguments);
   } catch (...) {
     // Cleanup if an exception is encountered
-    tear_down_func_(param);
+    compiled_functions_.tear_down_func(function_arguments);
     throw;
   }
 
@@ -92,7 +163,7 @@ void Query::Execute(executor::ExecutorContext &executor_context,
 
   // Clean up
   LOG_TRACE("Calling query's tearDown() ...");
-  tear_down_func_(param);
+  compiled_functions_.tear_down_func(function_arguments);
 
   // No need to cleanup if we get an exception while cleaning up...
   if (stats != nullptr) {
@@ -101,33 +172,82 @@ void Query::Execute(executor::ExecutorContext &executor_context,
   }
 }
 
-bool Query::Prepare(const QueryFunctions &query_funcs) {
-  LOG_TRACE("Going to JIT the query ...");
+void Query::ExecuteInterpreter(FunctionArguments *function_arguments,
+                               RuntimeStats *stats) {
+  LOG_INFO("Using codegen interpreter to execute plan");
 
-  // Compile the code
-  if (!code_context_.Compile()) {
-    return false;
+  // Timer
+  Timer<std::milli> timer;
+  if (stats != nullptr) {
+    timer.Start();
   }
 
-  LOG_TRACE("Setting up Query ...");
+  // Create Bytecode
+  interpreter::BytecodeFunction init_bytecode =
+      interpreter::BytecodeBuilder::CreateBytecodeFunction(
+          code_context_, llvm_functions_.init_func);
+  interpreter::BytecodeFunction plan_bytecode =
+      interpreter::BytecodeBuilder::CreateBytecodeFunction(
+          code_context_, llvm_functions_.plan_func);
+  interpreter::BytecodeFunction tear_down_bytecode =
+      interpreter::BytecodeBuilder::CreateBytecodeFunction(
+          code_context_, llvm_functions_.tear_down_func);
 
-  // Get pointers to the JITed functions
-  init_func_ = (compiled_function_t)code_context_.GetRawFunctionPointer(
-      query_funcs.init_func);
-  PELOTON_ASSERT(init_func_ != nullptr);
+  // Time initialization
+  if (stats != nullptr) {
+    timer.Stop();
+    stats->interpreter_prepare_ms = timer.GetDuration();
+    timer.Reset();
+    timer.Start();
+  }
 
-  plan_func_ = (compiled_function_t)code_context_.GetRawFunctionPointer(
-      query_funcs.plan_func);
-  PELOTON_ASSERT(plan_func_ != nullptr);
+  // Call init
+  LOG_TRACE("Calling query's init() ...");
+  try {
+    interpreter::BytecodeInterpreter::ExecuteFunction(
+        init_bytecode, reinterpret_cast<char *>(function_arguments));
+  } catch (...) {
+    interpreter::BytecodeInterpreter::ExecuteFunction(
+        tear_down_bytecode, reinterpret_cast<char *>(function_arguments));
+    throw;
+  }
+
+  if (stats != nullptr) {
+    timer.Stop();
+    stats->init_ms = timer.GetDuration();
+    timer.Reset();
+    timer.Start();
+  }
+
+  // Execute the query!
+  LOG_TRACE("Calling query's plan() ...");
+  try {
+    interpreter::BytecodeInterpreter::ExecuteFunction(
+        plan_bytecode, reinterpret_cast<char *>(function_arguments));
+  } catch (...) {
+    interpreter::BytecodeInterpreter::ExecuteFunction(
+        tear_down_bytecode, reinterpret_cast<char *>(function_arguments));
+    throw;
+  }
 
-  tear_down_func_ = (compiled_function_t)code_context_.GetRawFunctionPointer(
-      query_funcs.tear_down_func);
-  PELOTON_ASSERT(tear_down_func_ != nullptr);
+  // Timer plan execution
+  if (stats != nullptr) {
+    timer.Stop();
+    stats->plan_ms = timer.GetDuration();
+    timer.Reset();
+    timer.Start();
+  }
 
-  LOG_TRACE("Query has been setup ...");
+  // Clean up
+  LOG_TRACE("Calling query's tearDown() ...");
+  interpreter::BytecodeInterpreter::ExecuteFunction(
+      tear_down_bytecode, reinterpret_cast<char *>(function_arguments));
 
-  // All is well
-  return true;
+  // No need to cleanup if we get an exception while cleaning up...
+  if (stats != nullptr) {
+    timer.Stop();
+    stats->tear_down_ms = timer.GetDuration();
+  }
 }
 
 }  // namespace codegen
diff --git a/src/codegen/updateable_storage.cpp b/src/codegen/updateable_storage.cpp
index f911f13a8a1..ba699aeea25 100644
--- a/src/codegen/updateable_storage.cpp
+++ b/src/codegen/updateable_storage.cpp
@@ -16,6 +16,7 @@
 
 #include "codegen/lang/if.h"
 #include "codegen/type/sql_type.h"
+#include "util/math_util.h"
 
 namespace peloton {
 namespace codegen {
@@ -248,7 +249,7 @@ UpdateableStorage::NullBitmap::NullBitmap(CodeGen &codegen,
     bitmap_ptr_ = codegen->CreateConstInBoundsGEP2_32(
         storage.GetNullBitmapType(), bitmap_arr, 0, 0);
   }
-  uint32_t num_bytes = (storage_.GetNumElements() + 7) >> 3;
+  uint32_t num_bytes = MathUtil::DivRoundUp(storage_.GetNumElements(), 8);
   bytes_.resize(num_bytes, nullptr);
   dirty_.resize(num_bytes, false);
 }
diff --git a/src/executor/plan_executor.cpp b/src/executor/plan_executor.cpp
index 6226e3a26cf..a945c46bc59 100644
--- a/src/executor/plan_executor.cpp
+++ b/src/executor/plan_executor.cpp
@@ -56,10 +56,10 @@ static void CompileAndExecutePlan(
   // Check if we have a cached compiled plan already
   codegen::Query *query = codegen::QueryCache::Instance().Find(plan);
   if (query == nullptr) {
-    // Cached plan doesn't exist, let's compile the query
     codegen::QueryCompiler compiler;
     auto compiled_query = compiler.Compile(
         *plan, executor_context.GetParams().GetQueryParametersMap(), consumer);
+    compiled_query->Compile();
 
     // Grab an instance to the plan
     query = compiled_query.get();
diff --git a/src/include/codegen/code_context.h b/src/include/codegen/code_context.h
index be41f2f536d..2c6c1b97a5d 100644
--- a/src/include/codegen/code_context.h
+++ b/src/include/codegen/code_context.h
@@ -34,6 +34,10 @@ namespace codegen {
 
 class FunctionBuilder;
 
+namespace interpreter {
+class BytecodeBuilder;
+}  // namespace interpreter
+
 //===----------------------------------------------------------------------===//
 // The context where all generated LLVM query code resides. We create a context
 // instance for every query we see.  We keep instances of these around in the
@@ -43,6 +47,7 @@ class FunctionBuilder;
 class CodeContext {
   friend class CodeGen;
   friend class FunctionBuilder;
+  friend class interpreter::BytecodeBuilder;
 
  public:
   using FuncPtr = void *;
@@ -63,7 +68,7 @@ class CodeContext {
   void RegisterBuiltin(llvm::Function *func_decl, FuncPtr func_impl);
 
   /// Lookup a builtin function that has been registered in this context
-  llvm::Function *LookupBuiltin(const std::string &name) const;
+  std::pair<llvm::Function *, FuncPtr> LookupBuiltin(const std::string &name) const;
 
   /// Return the LLVM function for UDF that has been registered in this context
   llvm::Function *GetUDF() const { return udf_func_ptr_; }
@@ -71,13 +76,37 @@ class CodeContext {
   /// Sets UDF function ptr
   void SetUDF(llvm::Function *func_ptr) { udf_func_ptr_ = func_ptr; }
 
+  /// Verify all the code contained in this context
+  void Verify();
+
+  /// Optimize all the code contained in this context
+  void Optimize();
+
   /// Compile all the code contained in this context
-  bool Compile();
+  void Compile();
 
   /// Retrieve the raw function pointer to the provided compiled LLVM function
   FuncPtr GetRawFunctionPointer(llvm::Function *fn) const;
 
+  /// Get the number of bytes that are needed to store this type
+  size_t GetTypeSize(llvm::Type *type) const;
+
+  /// Get the number of bits that are needed to store this type
+  size_t GetTypeSizeInBits(llvm::Type *type) const;
+
+  /// Get the number of bytes between two elements of this type
+  /// This also includes the padding
+  size_t GetTypeAllocSize(llvm::Type *type) const;
+
+  /// Get the number of bits between two elements of this type
+  /// This also includes the padding
+  size_t GetTypeAllocSizeInBits(llvm::Type *type) const;
+
+  /// Get the offset of element <index> inside a struct in byte
+  size_t GetStructElementOffset(llvm::StructType *type, size_t index) const;
+
   /// Dump the contents of all the code in this context
+  /// Attention: this function may change the IR!
   void DumpContents() const;
 
   //////////////////////////////////////////////////////////////////////////////
@@ -143,20 +172,23 @@ class CodeContext {
   llvm::Type *int16_type_;
   llvm::Type *int32_type_;
   llvm::Type *int64_type_;
+  llvm::Type *float_type_;
   llvm::Type *double_type_;
   llvm::Type *void_type_;
   llvm::Type *void_ptr_type_;
   llvm::PointerType *char_ptr_type_;
 
   // All C/C++ builtin functions and their implementations
-  std::unordered_map<std::string, llvm::Function *> builtins_;
+  std::unordered_map<std::string, std::pair<llvm::Function *, FuncPtr>>
+      builtins_;
 
   // The functions needed in this module, and their implementations. If the
   // function has not been compiled yet, the function pointer will be NULL. The
   // function pointers are populated in Compile()
   std::vector<std::pair<llvm::Function *, FuncPtr>> functions_;
 
-  std::unordered_map<std::string, FuncPtr> function_symbols_;
+  // Shows if the Verify() has been run
+  bool is_verified_;
 };
 
 }  // namespace codegen
diff --git a/src/include/codegen/codegen.h b/src/include/codegen/codegen.h
index 9a56edf5dfd..07952d2c4a6 100644
--- a/src/include/codegen/codegen.h
+++ b/src/include/codegen/codegen.h
@@ -157,9 +157,7 @@ class CodeGen {
   // Function lookup and registration
   //===--------------------------------------------------------------------===//
   llvm::Type *LookupType(const std::string &name) const;
-  llvm::Function *LookupBuiltin(const std::string &fn_name) const {
-    return code_context_.LookupBuiltin(fn_name);
-  }
+  std::pair<llvm::Function *, CodeContext::FuncPtr> LookupBuiltin(const std::string &name) const;
   llvm::Function *RegisterBuiltin(const std::string &fn_name,
                                   llvm::FunctionType *fn_type, void *func_impl);
 
@@ -182,6 +180,13 @@ class CodeGen {
     return code_context_.GetCurrentFunction();
   }
 
+  //===--------------------------------------------------------------------===//
+  // DEBUG OUTPUT
+  //===--------------------------------------------------------------------===//
+
+  static std::string Dump(const llvm::Value *value);
+  static std::string Dump(llvm::Type *type);
+
  private:
   friend class Hash;
   friend class Value;
diff --git a/src/include/codegen/interpreter/bytecode_builder.h b/src/include/codegen/interpreter/bytecode_builder.h
new file mode 100644
index 00000000000..20571a0a248
--- /dev/null
+++ b/src/include/codegen/interpreter/bytecode_builder.h
@@ -0,0 +1,470 @@
+//===----------------------------------------------------------------------===//
+//
+//                         Peloton
+//
+// bytecode_builder.h
+//
+// Identification: src/include/codegen/interpreter/bytecode_builder.h
+//
+// Copyright (c) 2015-2018, Carnegie Mellon University Database Group
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+#include <ffi.h>
+#include <llvm/ADT/PostOrderIterator.h>
+#include <llvm/IR/CFG.h>
+#include <cmath>
+#include <cstdint>
+#include <memory>
+#include <tuple>
+#include <unordered_map>
+#include <vector>
+
+#include "codegen/interpreter/bytecode_function.h"
+
+namespace llvm {
+class Instruction;
+class Function;
+class Value;
+class BasicBlock;
+class Type;
+class Constant;
+class CallInst;
+class ExtractValueInst;
+}  // namespace llvm
+
+namespace peloton {
+namespace codegen {
+
+class CodeContext;
+
+namespace interpreter {
+
+class BytecodeBuilder {
+ public:
+  /**
+   * Static method to create a bytecode function from a code context.
+   * @param code_context CodeContext containing the LLVM function
+   * @param function LLVM function that shall be interpreted later
+   * @return A BytecodeFunction object that can be passed to the
+   * BytecodeInterpreter (several times).
+   */
+  static BytecodeFunction CreateBytecodeFunction(
+      const CodeContext &code_context, const llvm::Function *function,
+      bool use_naive_register_allocator = false);
+
+ private:
+  // These types definitions have the purpose to make the code better
+  // understandable. The bytecode builder creates indexes to identify the
+  // LLVM types, which usually are only accessed by raw pointers.
+  // Those types shall indicate which index is meant by a function.
+  // None of these indexes end up in the bytecode function!
+  using value_index_t = index_t;
+  using instruction_index_t = index_t;
+
+  /**
+   * Describes a bytecode relocation that has to be applied to add the
+   * destination of a branch instruction once its value is available.
+   * It gets created by TranslateBranch() and is processed after
+   * TranslateFunction() processed all instructions.
+   */
+  struct BytecodeRelocation {
+    index_t instruction_slot;
+    index_t argument;
+    const llvm::BasicBlock *bb;
+  };
+
+  /**
+   * Describes the liveness of a value by start and end instruction index.
+   */
+  using ValueLiveness = std::pair<index_t, index_t>;
+
+ private:
+  BytecodeBuilder(const CodeContext &code_context,
+                  const llvm::Function *function);
+
+  /**
+   * Analyses the function to collect values and constants and gets
+   * value liveness information
+   */
+  void AnalyseFunction();
+
+  /**
+   * Naive register allocation that just assings a unique value slot to
+   * every value
+   */
+  void PerformNaiveRegisterAllocation();
+
+  /**
+   * Greedy register allocation, that for each value tries to find the next free
+   * value slot, that is not occupied anymore.
+   */
+  void PerformGreedyRegisterAllocation();
+
+  /**
+   * Translates all instructions into bytecode.
+   */
+  void TranslateFunction();
+
+  /**
+   * Do some final conversations to make the created BytecodeFunction usable.
+   */
+  void Finalize();
+
+ private:
+  //===--------------------------------------------------------------------===//
+  // Methods for Value Handling
+  //===--------------------------------------------------------------------===//
+
+  /**
+   * Gets the value index for a given LLVM value. If no value index exists
+   * for this LLVM value, a new one is created.
+   * @param value LLVM Value
+   * @return the value index that is mapped to this LLVM value
+   */
+  value_index_t GetValueIndex(const llvm::Value *value);
+
+  /**
+   * Maps a given LLVM value to the same value index as another LLVM Value.
+   * @param alias LLVM value
+   * @param value_index the value index to map the LLVM value to. The
+   * value index must already exist.
+   * @return the value index that was given as parameter
+   */
+  value_index_t CreateValueAlias(const llvm::Value *alias,
+                                 value_index_t value_index);
+
+  /**
+   * Returns the value_index for a LLVM constant.
+   * In LLVM several Constant Objects with the same value can exist. This
+   * function tries to find an existing constant with the same value or creates
+   * a new one if necessary.
+   * @param constant LLVM constant
+   * @return a value index that refers to a constant with the same value. If
+   * no internal constant with this value exists before, a new value index
+   * is created.
+   */
+  value_index_t GetConstantIndex(const llvm::Constant *constant);
+
+  /**
+   * Returns the value slot (register) for a given LLVM value
+   * @param value LLVM value
+   * @return the value slot (register) assigned by the register allocation
+   * This function must not be called before the Analysis pass and the
+   * Register Allocation!
+   */
+  index_t GetValueSlot(const llvm::Value *value) const;
+
+  /**
+   * Extends the liveness range of a value to cover the given instruction index.
+   * The will be extended to the "left" or the "right" if necessary, or not at
+   * all, if it already covers this index.
+   * This function calls GetValueIndex, which may create a new value index.
+   * @param llvm_value LLVM value for which the liveness should be extended
+   * @param instruction_index position in the
+   */
+  void ExtendValueLiveness(const llvm::Value *llvm_value,
+                           instruction_index_t instruction_index);
+
+  /**
+   * Returns the index for a additional temporary value slot in that
+   * basic block. Due to the phi swap problem (lost copy) it can happen,
+   * that during translation additional value slots are needed that have not
+   * been mapped by the register allocation. The number of additional temporary
+   * value slots is tracked and added to the overall number of value
+   * slots during finalization.
+   * @param bb basic block the temporary value slot shall be created in
+   * @return a temporary value slot index, that can be used only in
+   * this basic block
+   */
+  index_t GetTemporaryValueSlot(const llvm::BasicBlock *bb);
+
+  //===--------------------------------------------------------------------===//
+  // Helper Functions (const)
+  //===--------------------------------------------------------------------===//
+
+  /**
+   * Returns the matching FFI type for a given LLVM type
+   * @param type LLVM type
+   * @return FFI type
+   */
+  ffi_type *GetFFIType(llvm::Type *type) const;
+
+  /**
+   * Checks if a LLVM Value is a constant
+   * @param value LLVM Value
+   * @return true, if the given LLVM value is a constant
+   */
+  bool IsConstantValue(const llvm::Value *value) const;
+
+  /**
+   * Extracts the actual constant value of a LLVM constant
+   * @param constant LLVM constant
+   * @return the actual value of the constant, sign or zero extended to
+   * the size of value_t
+   */
+  value_t GetConstantValue(const llvm::Constant *constant) const;
+
+  /**
+   * Directly extracts the signed integer value of a integer constant
+   * @param constant LLVM Constant that is a instance of llvm::ConstantInt
+   * @return signed integer value of the LLVM constant
+   */
+  int64_t GetConstantIntegerValueSigned(llvm::Value *constant) const;
+
+  /**
+   * Directly extracts the unsigned integer value of a integer constant
+   * @param constant LLVM Constant that is a instance of llvm::ConstantInt
+   * @return unsigned integer value of the LLVM constant
+   */
+  uint64_t GetConstantIntegerValueUnsigned(llvm::Value *constant) const;
+
+  /**
+   * Checks if one basic block is the successor of another basic block
+   * when walking all basic blocks in reverse post order.
+   * (Because ->nextNode doesn't work then)
+   * @param bb current LLVM basic block
+   * @param succ LLVM basic block that shall be checked to be the successor
+   * @return true, if succ is the successor of bb
+   */
+  bool BasicBlockIsRPOSucc(const llvm::BasicBlock *bb,
+                           const llvm::BasicBlock *succ) const;
+
+  /**
+   * Creates the typed opcode for a bytecode instruction that is defined for
+   * _all_ types
+   * @param untyped_op untyped opcode for a byte instruction, retrieved using
+   * GET_FIRST_ALL_TYPES(op), where op must be defined for all types.
+   * @param type LLVM type to take the type information from
+   * @return typed opcode <op>_<type>
+   */
+  Opcode GetOpcodeForTypeAllTypes(Opcode untyped_op, llvm::Type *type) const;
+
+  /**
+   * Creates the typed opcode for a bytecode instruction that is defined only
+   * for _integer_ types
+   * @param untyped_op untyped opcode for a byte instruction, retrieved using
+   * GET_FIRST_INT_TYPES(op), where op must be defined only for integer types.
+   * @param type LLVM type to take the type information from
+   * @return typed opcode <op>_<type>
+   */
+  Opcode GetOpcodeForTypeIntTypes(Opcode untyped_op, llvm::Type *type) const;
+
+  /**
+   * Creates the typed opcode for a bytecode instruction that is defined only
+   * for _floating point_ types
+   * @param untyped_op untyped opcode for a byte instruction, retrieved using
+   * GET_FIRST_FLOAT_TYPES(op), where op must be defined only for float types.
+   * @param type LLVM type to take the type information from
+   * @return typed opcode <op>_<type>
+   */
+  Opcode GetOpcodeForTypeFloatTypes(Opcode untyped_op, llvm::Type *type) const;
+
+  /**
+   * Creates the typed opcode for a bytecode instruction that is defined only
+   * for _integer_ types. In difference to the other function, this one only
+   * considers the type size to determine the opcode type.
+   * @param untyped_op untyped opcode for a byte instruction, retrieved using
+   * GET_FIRST_INT_TYPES(op), where op must be defined only for integer types.
+   * @param type LLVM type to take the size information from
+   * @return typed opcode <op>_<type>
+   */
+  Opcode GetOpcodeForTypeSizeIntTypes(Opcode untyped_op,
+                                      llvm::Type *type) const;
+
+  //===--------------------------------------------------------------------===//
+  // Methods for creating Bytecode Instructions
+  //===--------------------------------------------------------------------===//
+
+  /**
+   * Insert a bytecode instruction into the bytecode stream.
+   */
+  Instruction &InsertBytecodeInstruction(
+      const llvm::Instruction *llvm_instruction, Opcode opcode,
+      const std::vector<index_t> &args);
+
+  /**
+   * Insert a bytecode instruction into the bytecode stream.
+   * Wrapper that automatically gets the value slots for the LLVM values
+   * provided.
+   */
+  Instruction &InsertBytecodeInstruction(
+      const llvm::Instruction *llvm_instruction, Opcode opcode,
+      const std::vector<const llvm::Value *> &args);
+
+  /**
+   * Insert a external call bytecode instruction into the bytecode stream.
+   * @param llvm_instruction LLVM function this instruction is created from.
+   * (Only needed for tracing information, not used in Release mode!)
+   * @param call_context index of the call context created for this external
+   * call instruction
+   * @param function function pointer to the external function
+   * @return Reference to the created instruction in the bytecode stream.
+   */
+  ExternalCallInstruction &InsertBytecodeExternalCallInstruction(
+      const llvm::Instruction *llvm_instruction, index_t call_context,
+      void *function);
+
+  /**
+   * Insert a internal call bytecode instruction into the bytecode stream.
+   * @param llvm_instruction LLVM function this instruction is created from.
+   * (Only needed for tracing information, not used in Release mode!)
+   * @param sub_function index to the sub function (bytecode function) for
+   * this LLVM function
+   * @param dest_slot Destination slot for the return value. Set zero if
+   * internal function returns void.
+   * @param number_arguments number of arguments provided in this function call.
+   * The internal call instruction has variadic size, depending on the number
+   * of arguments!
+   * @return Reference to the created instruction in the bytecode stream.
+   */
+  InternalCallInstruction &InsertBytecodeInternalCallInstruction(
+      const llvm::Instruction *llvm_instruction, index_t sub_function,
+      index_t dest_slot, size_t number_arguments);
+
+/**
+ * Helper function, that adds the given instruction to the instruction trace.
+ * (Should only be called from InsertBytecode instructions)
+ * In Release mode this function compiles to a stub.
+ * @param llvm_instruction LLVM instruction the just created bytecode
+ * instruction originates from
+ * @param number_instruction_slots size of the bytecode instruction
+ */
+#ifndef NDEBUG
+  void AddInstructionToTrace(const llvm::Instruction *llvm_instruction,
+                             size_t number_instruction_slots = 1);
+#else
+  void AddInstructionToTrace(
+      UNUSED_ATTRIBUTE const llvm::Instruction *llvm_instruction,
+      UNUSED_ATTRIBUTE size_t number_instruction_slots = 1) {}
+#endif
+
+  //===--------------------------------------------------------------------===//
+  // Methods for Translating LLVM Instructions (called by TranslateFunction())
+  //===--------------------------------------------------------------------===//
+
+  /**
+   * Resolves the PHI nodes referring to this basic block, by placing mov
+   * instructions. Must be called just before the terminating LLVM instruction
+   * in a basic block. If the PHI swap / lost copy problem can occur, the
+   * function creates additional mov instructions and value slots.
+   * @param bb current basic block
+   */
+  void ProcessPHIsForBasicBlock(const llvm::BasicBlock *bb);
+
+  void TranslateBranch(const llvm::Instruction *instruction,
+                       std::vector<BytecodeRelocation> &bytecode_relocations);
+  void TranslateReturn(const llvm::Instruction *instruction);
+  void TranslateBinaryOperator(const llvm::Instruction *instruction);
+  void TranslateAlloca(const llvm::Instruction *instruction);
+  void TranslateLoad(const llvm::Instruction *instruction);
+  void TranslateStore(const llvm::Instruction *instruction);
+  void TranslateGetElementPtr(const llvm::Instruction *instruction);
+  void TranslateIntExt(const llvm::Instruction *instruction);
+  void TranslateFloatTruncExt(const llvm::Instruction *instruction);
+  void TranslateFloatIntCast(const llvm::Instruction *instruction);
+  void TranslateCmp(const llvm::Instruction *instruction);
+  void TranslateCall(const llvm::Instruction *instruction);
+  void TranslateSelect(const llvm::Instruction *instruction);
+  void TranslateExtractValue(const llvm::Instruction *instruction);
+
+ private:
+  /**
+   *  The bytecode function that is created (and then moved). All other
+   * members are helping data structures that don't end up in the resulting
+   * bytecode function
+   */
+  BytecodeFunction bytecode_function_;
+
+  /**
+   * Mapping from Value* to internal value index (includes merged
+   * values/constants). The value index is used to access the vectors below.
+   */
+  std::unordered_map<const llvm::Value *, value_index_t> value_mapping_;
+
+  /**
+   * Holds the value liveness per value (after analysis)
+   */
+  std::vector<ValueLiveness> value_liveness_;
+
+  /**
+   * Holds the assigned value slot per value (after register allocation)
+   */
+  std::vector<index_t> value_slots_;
+
+  /**
+   * Overall number of value slots needed (from register allocation)
+   * without temporary value slots (added during translation)
+   */
+  size_t number_value_slots_;
+
+  /**
+   * Holds the value_index of the constants in bytecode_function_.constants_,
+   * accessed with the same index.
+   */
+  std::vector<value_index_t> constant_value_indexes_;
+
+  /**
+   * Additional temporary value slots (created due to phi swap problem).
+   * Mapping from instruction index to number of temporary slots needed
+   * at that time (specified by instruction index).
+   */
+  std::unordered_map<const llvm::BasicBlock *, index_t>
+      number_temporary_values_;
+
+  /**
+   * Maximum number of temporary value slots needed at all time points.
+   */
+  size_t number_temporary_value_slots_;
+
+  /**
+   * Keep track of all Call instructions that refer to a overflow aware
+   * operation, as their results get directly saved in the destination slots
+   * of the ExtractValue instructions refering to them.
+   */
+  std::unordered_map<
+      const llvm::CallInst *,
+      std::pair<const llvm::ExtractValueInst *, const llvm::ExtractValueInst *>>
+      overflow_results_mapping_;
+
+  /**
+   * Mapping of LLVM functions to bytecode functions to avoid duplicated
+   * functions in case a internal function is called several times
+   */
+  std::unordered_map<const llvm::Function *, index_t> sub_function_mapping_;
+
+  /**
+   * ReversePostOrderTraversal, which is used for all BB traversals
+   * Initialization is very expensive, so we reuse it
+   * cannot be const, because the class doesn't provide const iterators
+   */
+  llvm::ReversePostOrderTraversal<const llvm::Function *> rpo_traversal_;
+
+  /**
+   * A vector holding the the basic block pointers in reverse post order.
+   * This vector is retrieved from the RPO traversal and necessary
+   * to make pred/pred lookups.
+   */
+  std::vector<const llvm::BasicBlock *> bb_reverse_post_order_;
+
+  /**
+   * Original code context the bytecode function is build from
+   */
+  const CodeContext &code_context_;
+
+  /**
+   * LLVM function that shall be translated
+   */
+  const llvm::Function *llvm_function_;
+};
+
+class NotSupportedException : public std::runtime_error {
+ public:
+  NotSupportedException(std::string message) : std::runtime_error(message) {}
+};
+
+}  // namespace interpreter
+}  // namespace codegen
+}  // namespace peloton
diff --git a/src/include/codegen/interpreter/bytecode_function.h b/src/include/codegen/interpreter/bytecode_function.h
new file mode 100644
index 00000000000..3bbb077e4f0
--- /dev/null
+++ b/src/include/codegen/interpreter/bytecode_function.h
@@ -0,0 +1,359 @@
+//===----------------------------------------------------------------------===//
+//
+//                         Peloton
+//
+// bytecode_function.h
+//
+// Identification: src/include/codegen/interpreter/bytecode_function.h
+//
+// Copyright (c) 2015-2018, Carnegie Mellon University Database Group
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+#include <ffi.h>
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "common/macros.h"
+#include "util/math_util.h"
+
+namespace llvm {
+class Instruction;
+}  // namespace llvm
+
+namespace peloton {
+namespace codegen {
+
+class CodeContext;
+
+namespace interpreter {
+
+class BytecodeInterpreter;
+class BytecodeBuilder;
+
+// Type definitions to match the LLVM terminology
+using i8 = uint8_t;
+using i16 = uint16_t;
+using i32 = uint32_t;
+using i64 = uint64_t;
+using value_t = uint64_t;
+using index_t = uint16_t;
+using instr_slot_t = uint64_t;
+
+// Type template that converts any type into the matching interpreter type
+template <typename type>
+using bytecode_type = typename std::conditional<
+    sizeof(type) == 1, i8,
+    typename std::conditional<
+        sizeof(type) == 2, i16,
+        typename std::conditional<sizeof(type) == 3 || sizeof(type) == 4, i32,
+                                  i64>::type>::type>::type;
+
+/**
+ * Enum holding all Opcodes for all instructions.
+ */
+enum class Opcode : index_t {
+  undefined,
+
+#define HANDLE_INST(opcode) opcode,
+#include "codegen/interpreter/bytecode_instructions.def"
+#undef HANDLE_INST
+
+  NUMBER_OPCODES
+};
+
+/**
+ * Struct to access a generic bytecode instruction.
+ * Every bytecode instruction starts with a 2 byte Opcode, followed by a
+ * variable number of 2 byte arguments. (Exception: ExternalCallInstruction)
+ *
+ * This struct is only for accessing Instructions, not for saving them!
+ * (sizeof returns a wrong value) All bytecode instructions are saved in
+ * one or more 8 byte instructions slots (instr_slot_t) in the bytecode stream.
+ */
+struct Instruction {
+  Opcode op;
+  index_t args[];
+};
+
+/**
+ * Specialized struct for accessing a InternalCallInstruction. The number of
+ * arguments in .args[] is variable and must match the value .number_args .
+ * GetInteralCallInstructionSlotSize uses this information to calculate the
+ * number of occupied instruction slots.
+ */
+struct InternalCallInstruction {
+  Opcode op;
+  index_t sub_function;
+  index_t dest_slot;
+  index_t number_args;
+  index_t args[];
+};
+
+/**
+ * Specialized struct for accessing a ExternalCallInstruction. It is the only
+ * instruction, that contains a field that is greater than 2 byte.
+ * Because libffi requires pointers to value slots of the current
+ * activation record, the instruction itself only contains an index for
+ * accessing the proper call context. During interpretation a call activation
+ * is created for every call context, holding the actual runtime pointers,
+ * which can be accessed with the same index.
+ */
+struct ExternalCallInstruction {
+  Opcode op;
+  index_t external_call_context;
+  void (*function)(void);
+};
+
+/**
+ * Call context holding information needed to create a runtime call activation
+ * for a ExternalCallInstruction in the bytecode stream.
+ */
+struct ExternalCallContext {
+  index_t dest_slot;
+  ffi_type *dest_type;
+  std::vector<index_t> args;
+  std::vector<ffi_type *> arg_types;
+};
+
+/**
+ * A BytecodeFunction contains all information necessary to run a LLVM
+ * function in the interpreter and is completely independent from the
+ * CodeContext it was created from (except for the tracing information in debug
+ * mode). It can be moved and copied.
+ */
+class BytecodeFunction {
+ public:
+  /**
+   * Returns the Opcode enum for a given Opcode Id (to avoid plain casting)
+   * @param id Opcode Id
+   * @return Opcode enum
+   */
+  ALWAYS_INLINE inline static constexpr Opcode GetOpcodeFromId(index_t id) {
+    return static_cast<Opcode>(id);
+  }
+
+  /**
+   * Returns the Opcode Id to a given Opcode enum (to avoid plain casting)
+   * @param opcode Opcode enum
+   * @return Opcode Id
+   */
+  ALWAYS_INLINE inline static constexpr index_t GetOpcodeId(Opcode opcode) {
+    return static_cast<index_t>(opcode);
+  }
+
+  /**
+   * Returns a numan readable string to a given Opcode
+   * @param opcode Opcode enum
+   * @return String representation if the Opcode
+   */
+  static const char *GetOpcodeString(Opcode opcode);
+
+  /**
+   * Returns the overall number of existing Opcodes (not trivial, as the Opcodes
+   * are created with expanding macros)
+   * @return overall number of existing Opcodes
+   */
+  inline static constexpr size_t GetNumberOpcodes() {
+    return static_cast<index_t>(Opcode::NUMBER_OPCODES);
+  }
+
+  /**
+   * Return the instruction pointer to a given instruction index (from this
+   * bytecode function)
+   * @param index instruction index
+   * @return pointer to the instruction at that index inside the bytecode
+   */
+  ALWAYS_INLINE inline const Instruction *GetIPFromIndex(index_t index) const {
+    return reinterpret_cast<const Instruction *>(
+        const_cast<instr_slot_t *>(bytecode_.data()) + index);
+  }
+
+  /**
+   * Returns the instruction index for a given instruction pointer (from this
+   * bytecode function)
+   * @param instruction pointer to a given instruction inside the bytecode
+   * @return index to the instruction the pointer is pointing to
+   */
+  ALWAYS_INLINE inline index_t GetIndexFromIP(
+      const Instruction *instruction) const {
+    index_t index =
+        reinterpret_cast<const instr_slot_t *>(instruction) - bytecode_.data();
+    return index;
+  }
+
+#ifndef NDEBUG
+  const llvm::Instruction *GetIRInstructionFromIP(index_t instr_slot) const;
+#endif
+
+  /**
+   * Returns the number of slots a given instruction occupies in the bytecode
+   * stream.
+   * @param instruction pointer to the instruction inside the bytecode
+   * @return number of slots (each 8 Byte) that are used by this instruction
+   */
+  static size_t GetInstructionSlotSize(const Instruction *instruction);
+
+  /**
+   * Returns the number of slots a given internal call instruction occupies in
+   * the bytecode stream. Internal instructions have a variable length, so the
+   * size has to be calculated.
+   * @param instruction pointer to instruction of type internal call
+   * @return number of slots (each 8 Byte) that are used by this instruction
+   */
+  static ALWAYS_INLINE inline size_t GetInteralCallInstructionSlotSize(
+      const InternalCallInstruction *instruction) {
+    const size_t number_slots =
+        MathUtil::DivRoundUp(sizeof(uint16_t) * (4 + instruction->number_args),
+                             sizeof(instr_slot_t));
+    PELOTON_ASSERT(number_slots > 0);
+    return number_slots;
+  }
+
+  /**
+   * Returns the number of slots an explicit call instruction occupies,
+   * given the number of argument slots. (return value and/or object pointer
+   * also need a slot!)
+   * @param number_args number of needed argument slots
+   * @return number of slots (each 8 Byte) that are used by this instruction
+   */
+  static constexpr ALWAYS_INLINE inline size_t
+  GetExplicitCallInstructionSlotSize(size_t number_args) {
+    return MathUtil::DivRoundUp(sizeof(uint16_t) * (1 + number_args),
+                                sizeof(instr_slot_t));
+  }
+
+  /**
+   * Returns the number of required argument slots that are needed in an
+   * explicit call bytecode instruction for this function.
+   * @param func pointer/reference to the function (declaration must be visible)
+   * @return number of required argument slots
+   *  = arguments + return value + object pointer
+   */
+  template <typename return_type, typename... arg_types>
+  static constexpr ALWAYS_INLINE inline size_t GetFunctionRequiredArgSlotsNum(
+      UNUSED_ATTRIBUTE return_type (*func)(arg_types...)) {
+    return (std::is_void<return_type>::value) ? sizeof...(arg_types)
+                                              : sizeof...(arg_types) + 1;
+  }
+
+  template <typename return_type, typename class_type, typename... arg_types>
+  static constexpr ALWAYS_INLINE inline size_t GetFunctionRequiredArgSlotsNum(
+      UNUSED_ATTRIBUTE return_type (class_type::*func)(arg_types...)) {
+    return (std::is_void<return_type>::value) ? sizeof...(arg_types) + 1
+                                              : sizeof...(arg_types) + 2;
+  }
+
+  template <typename return_type, typename class_type, typename... arg_types>
+  static constexpr ALWAYS_INLINE inline size_t GetFunctionRequiredArgSlotsNum(
+      UNUSED_ATTRIBUTE return_type (class_type::*func)(arg_types...) const) {
+    return (std::is_void<return_type>::value) ? sizeof...(arg_types) + 1
+                                              : sizeof...(arg_types) + 2;
+  }
+
+  /**
+   * Returns the opcode for a fiven function name string (lookup in hash map).
+   * @param function_name string of function name with namespace
+   * @return the matching opcode or Opcode::undefined
+   */
+  static Opcode GetExplicitCallOpcodeByString(std::string function_name);
+
+  /***
+   * Dumps the bytecode and the constants of this bytecode function to a
+   * file, identified by function name.
+   */
+  void DumpContents() const;
+
+  /**
+   * Gives a textual representation of the given instruction. (and the
+   * LLVM instruction it originates from, if Debug mode is enabled)
+   * @param instruction instruction from this bytecode function
+   * @return string containing a textual representatino of the instruction
+   */
+  std::string Dump(const Instruction *instruction) const;
+
+ private:
+  /**
+   * Creates a new empty BytecodeFunction object.
+   * @param id identifier for this bytecode function, usually inherited
+   * from code context.
+   */
+  BytecodeFunction(std::string function_name) : function_name_(function_name) {}
+
+ private:
+  /**
+   * Function name of the original function (used only for output).
+   */
+  std::string function_name_;
+
+  /**
+   * Number of needed value slots at runtime.
+   */
+  size_t number_values_;
+
+  /**
+   * Number of function arguments (to check correct number given to interpreter)
+   */
+  size_t number_function_arguments_;
+
+  /**
+   * Constants needed during runtime.
+   */
+  std::vector<value_t> constants_;
+
+  /**
+   * This array of instruction slots holds the actual bytecode that is
+   * interpreted. Usually one instruction occupies one slot, but some
+   * instruction require several slots. Except for InternalCallInstruction,
+   * all instruction have a static size. The number of occipied instruction
+   * slots for an instruction can be obtained by GetInstructionSlotSize()
+   *
+   * The "Instruction" struct can be used to access every instruction in a
+   * generic way.
+   *
+   * It can be accessed by index (instruction index) oder a direct pointer
+   * to a instruction slot (IP).
+   */
+  std::vector<instr_slot_t> bytecode_;
+
+  /**
+   * Call contexts that belong to ExternalCallInstructions in the bytecode,
+   * accessed by index.
+   */
+  std::vector<ExternalCallContext> external_call_contexts_;
+
+  /**
+   * Hierarchical array of further bytecode functions belonging to
+   * InteralFunctionCalls, accessed by index.
+   */
+  std::vector<BytecodeFunction> sub_functions_;
+
+  /**
+   * Constant map created at system startup, that maps the function name string
+   * of explicit defined function to their opcode. This way the function name
+   * lookup is implicitly made with a hash table.
+   */
+  static const std::unordered_map<std::string, Opcode>
+      explicit_call_opcode_mapping_;
+
+#ifndef NDEBUG
+  /**
+   * In Debug mode: Maps every bytecode instruction slot to the
+   * LLVM instruction it was created from.
+   */
+  std::vector<const llvm::Instruction *> instruction_trace_;
+#endif
+
+ private:
+  friend BytecodeInterpreter;
+  friend BytecodeBuilder;
+};
+
+}  // namespace interpreter
+}  // namespace codegen
+}  // namespace peloton
diff --git a/src/include/codegen/interpreter/bytecode_instructions.def b/src/include/codegen/interpreter/bytecode_instructions.def
new file mode 100644
index 00000000000..83ec605ee0b
--- /dev/null
+++ b/src/include/codegen/interpreter/bytecode_instructions.def
@@ -0,0 +1,412 @@
+//===----------------------------------------------------------------------===//
+//
+//                         Peloton
+//
+// bytecode_instructions.def
+//
+// Identification: src/include/codegen/interpreter/bytecode_instructions.def
+//
+// Copyright (c) 2015-2018, Carnegie Mellon University Database Group
+//
+//===----------------------------------------------------------------------===//
+
+//----------------------------------------------------------------------------//
+//                          Instruction Definitions
+//
+// This file contains the definitions for all bytecode instructions.
+//
+// The definitions can be used by defining one of the HANDLE functions below
+// before including this definition file (see X-Macros). This way the
+// definitions can be used to generate the Opcode enum, the dispatch area, etc.
+//
+// Most instructions are automatically expanded to all their supported types.
+//
+// When adding a bytecode instruction here, the instruction at least needs a
+// Translate-function in the BytecodeBuilder and a Handler-function in the
+// BytecodeInterpreter.
+//----------------------------------------------------------------------------//
+
+#ifndef HANDLE_INST
+#define HANDLE_INST(op)
+#endif
+
+#ifndef HANDLE_TYPED_INST
+#define HANDLE_TYPED_INST(op, type) HANDLE_INST(op##_##type)
+#endif
+
+#ifndef HANDLE_OVERFLOW_TYPED_INST
+#define HANDLE_OVERFLOW_TYPED_INST(op, type) HANDLE_TYPED_INST(op, type)
+#endif
+
+#ifndef HANDLE_SELECT_INST
+#define HANDLE_SELECT_INST(op) HANDLE_INST(op)
+#endif
+
+#ifndef HANDLE_RET_INST
+#define HANDLE_RET_INST(op) HANDLE_INST(op)
+#endif
+
+#ifndef HANDLE_EXTERNAL_CALL_INST
+#define HANDLE_EXTERNAL_CALL_INST(op) HANDLE_INST(op)
+#endif
+
+#ifndef HANDLE_INTERNAL_CALL_INST
+#define HANDLE_INTERNAL_CALL_INST(op) HANDLE_INST(op)
+#endif
+
+#ifndef HANDLE_EXPLICIT_CALL_INST
+#define HANDLE_EXPLICIT_CALL_INST(op, func) HANDLE_INST(op)
+#endif
+
+// Takes a function and a opcode and calls the function for all type instances
+// of that opcode
+#define CREATE_FOR_ALL_TYPES(func, op)                                   \
+  func(op, i8) func(op, i16) func(op, i32) func(op, i64) func(op, float) \
+      func(op, double)
+
+// Returns the first type used when expanding to all types
+// (needed for use of GetOpcodeForTypeAllTypes)
+#define GET_FIRST_ALL_TYPES(op) (op##_i8)
+
+// Takes a function and a opcode and calls the function for all integer
+// instances of that opcode
+#define CREATE_FOR_INT_TYPES(func, op) \
+  func(op, i8) func(op, i16) func(op, i32) func(op, i64)
+
+// Returns the first type used when expanding to integer types
+// (needed for use of GetOpcodeForTypeIntTypes)
+#define GET_FIRST_INT_TYPES(op) (op##_i8)
+
+// Takes a function and a opcode and calls the function for all floating point
+// instances of that opcode
+#define CREATE_FOR_FLOAT_TYPES(func, op) func(op, float) func(op, double)
+
+// Returns the first type used when expanding to floating point types
+// (needed for use of GetOpcodeForTypeFloatTypes)
+#define GET_FIRST_FLOAT_TYPES(op) (op##_float)
+
+//------              Bytecode Instruction Definitions                  ------//
+
+CREATE_FOR_ALL_TYPES(HANDLE_TYPED_INST, add)
+CREATE_FOR_ALL_TYPES(HANDLE_TYPED_INST, sub)
+CREATE_FOR_ALL_TYPES(HANDLE_TYPED_INST, mul)
+CREATE_FOR_ALL_TYPES(HANDLE_TYPED_INST,
+                     div)  // division for unsigned integer and floating point
+CREATE_FOR_INT_TYPES(HANDLE_TYPED_INST, sdiv)  // division for signed integer
+CREATE_FOR_INT_TYPES(HANDLE_TYPED_INST, urem)  // remainder for unsigned integer
+CREATE_FOR_FLOAT_TYPES(HANDLE_TYPED_INST, frem)  // remainder for floating point
+CREATE_FOR_INT_TYPES(HANDLE_TYPED_INST, srem)    // remainder for signed integer
+CREATE_FOR_INT_TYPES(HANDLE_TYPED_INST, shl)
+CREATE_FOR_INT_TYPES(HANDLE_TYPED_INST, lshr)
+CREATE_FOR_INT_TYPES(HANDLE_TYPED_INST, ashr)
+CREATE_FOR_INT_TYPES(HANDLE_TYPED_INST, and)
+CREATE_FOR_INT_TYPES(HANDLE_TYPED_INST, or)
+CREATE_FOR_INT_TYPES(HANDLE_TYPED_INST, xor)
+
+HANDLE_INST(extractvalue)
+CREATE_FOR_INT_TYPES(HANDLE_TYPED_INST, load)
+CREATE_FOR_ALL_TYPES(HANDLE_TYPED_INST, store)
+CREATE_FOR_INT_TYPES(HANDLE_TYPED_INST, alloca_array)
+HANDLE_INST(alloca)
+
+CREATE_FOR_ALL_TYPES(HANDLE_TYPED_INST,
+                     cmp_eq)  // compare for unsigned integer and floating point
+CREATE_FOR_ALL_TYPES(HANDLE_TYPED_INST,
+                     cmp_ne)  // compare for unsigned integer and floating point
+CREATE_FOR_ALL_TYPES(HANDLE_TYPED_INST,
+                     cmp_gt)  // compare for unsigned integer and floating point
+CREATE_FOR_ALL_TYPES(HANDLE_TYPED_INST,
+                     cmp_lt)  // compare for unsigned integer and floating point
+CREATE_FOR_ALL_TYPES(HANDLE_TYPED_INST,
+                     cmp_ge)  // compare for unsigned integer and floating point
+CREATE_FOR_ALL_TYPES(HANDLE_TYPED_INST,
+                     cmp_le)  // compare for unsigned integer and floating point
+CREATE_FOR_INT_TYPES(HANDLE_TYPED_INST, cmp_sgt)  // compare for signed integer
+CREATE_FOR_INT_TYPES(HANDLE_TYPED_INST, cmp_slt)  // compare for signed integer
+CREATE_FOR_INT_TYPES(HANDLE_TYPED_INST, cmp_sge)  // compare for signed integer
+CREATE_FOR_INT_TYPES(HANDLE_TYPED_INST, cmp_sle)  // compare for signed integer
+
+HANDLE_INST(sext_i8_i16)  // there is no handy way to expand this relationship
+HANDLE_INST(sext_i8_i32)
+HANDLE_INST(sext_i8_i64)
+HANDLE_INST(sext_i16_i32)
+HANDLE_INST(sext_i16_i64)
+HANDLE_INST(sext_i32_i64)
+HANDLE_INST(zext_i8_i16)
+HANDLE_INST(zext_i8_i32)
+HANDLE_INST(zext_i8_i64)
+HANDLE_INST(zext_i16_i32)
+HANDLE_INST(zext_i16_i64)
+HANDLE_INST(zext_i32_i64)
+
+CREATE_FOR_INT_TYPES(HANDLE_TYPED_INST,
+                     doubletosi)  // we can only expand in one dimension, so we
+                                  // expand the integer dimension and write down
+                                  // all floating point instances manually
+CREATE_FOR_INT_TYPES(HANDLE_TYPED_INST, doubletoui)
+CREATE_FOR_INT_TYPES(HANDLE_TYPED_INST, sitodouble)
+CREATE_FOR_INT_TYPES(HANDLE_TYPED_INST, uitodouble)
+CREATE_FOR_INT_TYPES(HANDLE_TYPED_INST, floattosi)
+CREATE_FOR_INT_TYPES(HANDLE_TYPED_INST, floattoui)
+CREATE_FOR_INT_TYPES(HANDLE_TYPED_INST, sitofloat)
+CREATE_FOR_INT_TYPES(HANDLE_TYPED_INST, uitofloat)
+HANDLE_INST(doubletofloat)
+HANDLE_INST(floattodouble)
+
+HANDLE_INST(gep_offset)  // struct access of GEP instruction (accumulated)
+CREATE_FOR_INT_TYPES(HANDLE_TYPED_INST,
+                     gep_array)  // array access of GEP instruction (inplace)
+HANDLE_INST(phi_mov)
+HANDLE_INST(nop_mov)
+HANDLE_SELECT_INST(select)
+HANDLE_EXTERNAL_CALL_INST(call_external)  // external function call
+HANDLE_INTERNAL_CALL_INST(call_internal)  // internal function call
+
+HANDLE_RET_INST(ret)
+HANDLE_INST(branch_uncond)
+HANDLE_INST(branch_cond)
+HANDLE_INST(branch_cond_ft)  // conditional branch with fall through
+
+HANDLE_INST(llvm_memcpy)
+HANDLE_INST(llvm_memmove)
+HANDLE_INST(llvm_memset)
+
+CREATE_FOR_INT_TYPES(HANDLE_OVERFLOW_TYPED_INST, llvm_uadd_overflow)
+CREATE_FOR_INT_TYPES(HANDLE_OVERFLOW_TYPED_INST, llvm_sadd_overflow)
+CREATE_FOR_INT_TYPES(HANDLE_OVERFLOW_TYPED_INST, llvm_usub_overflow)
+CREATE_FOR_INT_TYPES(HANDLE_OVERFLOW_TYPED_INST, llvm_ssub_overflow)
+CREATE_FOR_INT_TYPES(HANDLE_OVERFLOW_TYPED_INST, llvm_umul_overflow)
+CREATE_FOR_INT_TYPES(HANDLE_OVERFLOW_TYPED_INST, llvm_smul_overflow)
+
+HANDLE_INST(llvm_sse42_crc32)
+
+//------                 Explicit Call Instructions                     ------//
+//
+// Usually external functions are called using libffi.
+// However, for often used functions, an explicit bytecode instruction for that
+// specific function call can be created. For every function listed below,
+// a bytecode instruction _and_ a matching handler function are generated
+// automatically using heavy template-macro-magic.
+//
+// Further functions can easily be added here. The regarding headers have to be
+// added in these files manually: bytecode_interpreter.h, bytecode_function.cpp
+
+HANDLE_EXPLICIT_CALL_INST(
+    peloton_transactionruntime_performvectorizedread,
+    peloton::codegen::TransactionRuntime::PerformVectorizedRead)
+
+HANDLE_EXPLICIT_CALL_INST(peloton_oahashtable_init,
+                          peloton::codegen::util::OAHashTable::Init)
+HANDLE_EXPLICIT_CALL_INST(peloton_oahashtable_storetuple,
+                          peloton::codegen::util::OAHashTable::StoreTuple)
+HANDLE_EXPLICIT_CALL_INST(peloton_oahashtable_destroy,
+                          peloton::codegen::util::OAHashTable::Destroy)
+
+HANDLE_EXPLICIT_CALL_INST(peloton_deleter_init, peloton::codegen::Deleter::Init)
+HANDLE_EXPLICIT_CALL_INST(peloton_deleter_delete,
+                          peloton::codegen::Deleter::Delete)
+
+HANDLE_EXPLICIT_CALL_INST(peloton_updater_init, peloton::codegen::Updater::Init)
+HANDLE_EXPLICIT_CALL_INST(peloton_updater_prepare,
+                          peloton::codegen::Updater::Prepare)
+HANDLE_EXPLICIT_CALL_INST(peloton_updater_preparepk,
+                          peloton::codegen::Updater::PreparePK)
+HANDLE_EXPLICIT_CALL_INST(peloton_updater_getpool,
+                          peloton::codegen::Updater::GetPool)
+HANDLE_EXPLICIT_CALL_INST(peloton_updater_update,
+                          peloton::codegen::Updater::Update)
+HANDLE_EXPLICIT_CALL_INST(peloton_updater_updatepk,
+                          peloton::codegen::Updater::UpdatePK)
+HANDLE_EXPLICIT_CALL_INST(peloton_updater_teardown,
+                          peloton::codegen::Updater::TearDown)
+
+HANDLE_EXPLICIT_CALL_INST(peloton_inserter_init,
+                          peloton::codegen::Inserter::Init)
+HANDLE_EXPLICIT_CALL_INST(peloton_inserter_allocatetuplestorage,
+                          peloton::codegen::Inserter::AllocateTupleStorage)
+HANDLE_EXPLICIT_CALL_INST(peloton_inserter_getpool,
+                          peloton::codegen::Inserter::GetPool)
+HANDLE_EXPLICIT_CALL_INST(peloton_inserter_insert,
+                          peloton::codegen::Inserter::Insert)
+HANDLE_EXPLICIT_CALL_INST(peloton_inserter_teardown,
+                          peloton::codegen::Inserter::TearDown)
+
+HANDLE_EXPLICIT_CALL_INST(peloton_sorter_init,
+                          peloton::codegen::util::Sorter::Init)
+HANDLE_EXPLICIT_CALL_INST(peloton_sorter_storeinputtuple,
+                          peloton::codegen::util::Sorter::StoreInputTuple)
+HANDLE_EXPLICIT_CALL_INST(peloton_sorter_sort,
+                          peloton::codegen::util::Sorter::Sort)
+HANDLE_EXPLICIT_CALL_INST(peloton_sorter_sortparallel,
+                          peloton::codegen::util::Sorter::SortParallel)
+HANDLE_EXPLICIT_CALL_INST(peloton_sorter_destroy,
+                          peloton::codegen::util::Sorter::Destroy)
+
+HANDLE_EXPLICIT_CALL_INST(peloton_zonemap_shouldscantilegroup,
+                          peloton::storage::ZoneMapManager::ShouldScanTileGroup)
+HANDLE_EXPLICIT_CALL_INST(peloton_zonemap_getinstance,
+                          peloton::storage::ZoneMapManager::GetInstance)
+
+HANDLE_EXPLICIT_CALL_INST(peloton_valuesruntime_outputboolean,
+                          peloton::codegen::ValuesRuntime::OutputBoolean)
+HANDLE_EXPLICIT_CALL_INST(peloton_valuesruntime_outputtinyint,
+                          peloton::codegen::ValuesRuntime::OutputTinyInt)
+HANDLE_EXPLICIT_CALL_INST(peloton_valuesruntime_outputsmallint,
+                          peloton::codegen::ValuesRuntime::OutputSmallInt)
+HANDLE_EXPLICIT_CALL_INST(peloton_valuesruntime_outputinteger,
+                          peloton::codegen::ValuesRuntime::OutputInteger)
+HANDLE_EXPLICIT_CALL_INST(peloton_valuesruntime_outputbigint,
+                          peloton::codegen::ValuesRuntime::OutputBigInt)
+HANDLE_EXPLICIT_CALL_INST(peloton_valuesruntime_outputdate,
+                          peloton::codegen::ValuesRuntime::OutputDate)
+HANDLE_EXPLICIT_CALL_INST(peloton_valuesruntime_outputtimestamp,
+                          peloton::codegen::ValuesRuntime::OutputTimestamp)
+HANDLE_EXPLICIT_CALL_INST(peloton_valuesruntime_outputdecimal,
+                          peloton::codegen::ValuesRuntime::OutputDecimal)
+HANDLE_EXPLICIT_CALL_INST(peloton_valuesruntime_outputvarchar,
+                          peloton::codegen::ValuesRuntime::OutputVarchar)
+HANDLE_EXPLICIT_CALL_INST(peloton_valuesruntime_outputvarbinary,
+                          peloton::codegen::ValuesRuntime::OutputVarbinary)
+
+HANDLE_EXPLICIT_CALL_INST(peloton_executorcontext_gettransaction,
+                          peloton::executor::ExecutorContext::GetTransaction)
+
+HANDLE_EXPLICIT_CALL_INST(peloton_stringfunctions_ascii,
+                          peloton::function::StringFunctions::Ascii)
+HANDLE_EXPLICIT_CALL_INST(peloton_stringfunctions_like,
+                          peloton::function::StringFunctions::Like)
+HANDLE_EXPLICIT_CALL_INST(peloton_stringfunctions_length,
+                          peloton::function::StringFunctions::Length)
+HANDLE_EXPLICIT_CALL_INST(peloton_stringfunctions_btrim,
+                          peloton::function::StringFunctions::BTrim)
+HANDLE_EXPLICIT_CALL_INST(peloton_stringfunctions_trim,
+                          peloton::function::StringFunctions::Trim)
+HANDLE_EXPLICIT_CALL_INST(peloton_stringfunctions_ltrim,
+                          peloton::function::StringFunctions::LTrim)
+HANDLE_EXPLICIT_CALL_INST(peloton_stringfunctions_rtrim,
+                          peloton::function::StringFunctions::RTrim)
+HANDLE_EXPLICIT_CALL_INST(peloton_stringfunctions_substr,
+                          peloton::function::StringFunctions::Substr)
+HANDLE_EXPLICIT_CALL_INST(peloton_stringfunctions_repeat,
+                          peloton::function::StringFunctions::Repeat)
+HANDLE_EXPLICIT_CALL_INST(peloton_stringfunctions_comparestrings,
+                          peloton::function::StringFunctions::CompareStrings)
+
+HANDLE_EXPLICIT_CALL_INST(peloton_buffer_init,
+                          peloton::codegen::util::Buffer::Init)
+HANDLE_EXPLICIT_CALL_INST(peloton_buffer_append,
+                          peloton::codegen::util::Buffer::Append)
+HANDLE_EXPLICIT_CALL_INST(peloton_buffer_reset,
+                          peloton::codegen::util::Buffer::Reset)
+HANDLE_EXPLICIT_CALL_INST(peloton_buffer_destroy,
+                          peloton::codegen::util::Buffer::Destroy)
+
+HANDLE_EXPLICIT_CALL_INST(peloton_numericfunctions_abs,
+                          peloton::function::NumericFunctions::Abs)
+HANDLE_EXPLICIT_CALL_INST(peloton_numericfunctions_floor,
+                          peloton::function::NumericFunctions::Floor)
+HANDLE_EXPLICIT_CALL_INST(peloton_numericfunctions_round,
+                          peloton::function::NumericFunctions::Round)
+HANDLE_EXPLICIT_CALL_INST(peloton_numericfunctions_ceil,
+                          peloton::function::NumericFunctions::Ceil)
+
+HANDLE_EXPLICIT_CALL_INST(peloton_bloomfilteraccessor_init,
+                          peloton::codegen::BloomFilterAccessor::Init)
+HANDLE_EXPLICIT_CALL_INST(peloton_bloomfilteraccessor_destroy,
+                          peloton::codegen::BloomFilterAccessor::Destroy)
+
+HANDLE_EXPLICIT_CALL_INST(peloton_bloomfilter_init,
+                          peloton::codegen::util::BloomFilter::Init)
+HANDLE_EXPLICIT_CALL_INST(peloton_bloomfilter_destroy,
+                          peloton::codegen::util::BloomFilter::Destroy)
+
+HANDLE_EXPLICIT_CALL_INST(peloton_datatable_gettilegroupcount,
+                          peloton::storage::DataTable::GetTileGroupCount)
+
+HANDLE_EXPLICIT_CALL_INST(peloton_datefunctions_now,
+                          peloton::function::DateFunctions::Now)
+
+HANDLE_EXPLICIT_CALL_INST(peloton_hashtable_init,
+                          peloton::codegen::util::HashTable::Init)
+HANDLE_EXPLICIT_CALL_INST(peloton_hashtable_insert,
+                          peloton::codegen::util::HashTable::Insert)
+HANDLE_EXPLICIT_CALL_INST(peloton_hashtable_insertlazy,
+                          peloton::codegen::util::HashTable::InsertLazy)
+HANDLE_EXPLICIT_CALL_INST(peloton_hashtable_buildlazy,
+                          peloton::codegen::util::HashTable::BuildLazy)
+HANDLE_EXPLICIT_CALL_INST(peloton_hashtable_reservelazy,
+                          peloton::codegen::util::HashTable::ReserveLazy)
+HANDLE_EXPLICIT_CALL_INST(peloton_hashtable_mergelazyunfinished,
+                          peloton::codegen::util::HashTable::MergeLazyUnfinished)
+HANDLE_EXPLICIT_CALL_INST(peloton_hashtable_destroy,
+                          peloton::codegen::util::HashTable::Destroy)
+
+HANDLE_EXPLICIT_CALL_INST(peloton_storagemanager_gettablewithoid,
+                          peloton::storage::StorageManager::GetTableWithOid)
+
+HANDLE_EXPLICIT_CALL_INST(peloton_tilegroup_getnexttupleslot,
+                          peloton::storage::TileGroup::GetNextTupleSlot)
+HANDLE_EXPLICIT_CALL_INST(peloton_tilegroup_gettilegroupid,
+                          peloton::storage::TileGroup::GetTileGroupId)
+
+HANDLE_EXPLICIT_CALL_INST(peloton_timestampfunctions_datetrunc,
+                          peloton::function::TimestampFunctions::DateTrunc)
+HANDLE_EXPLICIT_CALL_INST(peloton_timestampfunctions_datepart,
+                          peloton::function::TimestampFunctions::DatePart)
+
+HANDLE_EXPLICIT_CALL_INST(peloton_queryparameters_getboolean,
+                          peloton::codegen::QueryParameters::GetBoolean)
+HANDLE_EXPLICIT_CALL_INST(peloton_queryparameters_gettinyint,
+                          peloton::codegen::QueryParameters::GetTinyInt)
+HANDLE_EXPLICIT_CALL_INST(peloton_queryparameters_getsmallint,
+                          peloton::codegen::QueryParameters::GetSmallInt)
+HANDLE_EXPLICIT_CALL_INST(peloton_queryparameters_getinteger,
+                          peloton::codegen::QueryParameters::GetInteger)
+HANDLE_EXPLICIT_CALL_INST(peloton_queryparameters_getbigint,
+                          peloton::codegen::QueryParameters::GetBigInt)
+HANDLE_EXPLICIT_CALL_INST(peloton_queryparameters_getdouble,
+                          peloton::codegen::QueryParameters::GetDouble)
+HANDLE_EXPLICIT_CALL_INST(peloton_queryparameters_getdate,
+                          peloton::codegen::QueryParameters::GetDate)
+HANDLE_EXPLICIT_CALL_INST(peloton_queryparameters_gettimestamp,
+                          peloton::codegen::QueryParameters::GetTimestamp)
+HANDLE_EXPLICIT_CALL_INST(peloton_queryparameters_getvarcharval,
+                          peloton::codegen::QueryParameters::GetVarcharVal)
+HANDLE_EXPLICIT_CALL_INST(peloton_queryparameters_getvarcharlen,
+                          peloton::codegen::QueryParameters::GetVarcharLen)
+HANDLE_EXPLICIT_CALL_INST(peloton_queryparameters_getvarbinaryval,
+                          peloton::codegen::QueryParameters::GetVarbinaryVal)
+HANDLE_EXPLICIT_CALL_INST(peloton_queryparameters_getvarbinarylen,
+                          peloton::codegen::QueryParameters::GetVarbinaryLen)
+HANDLE_EXPLICIT_CALL_INST(peloton_queryparameters_isnull,
+                          peloton::codegen::QueryParameters::IsNull)
+
+HANDLE_EXPLICIT_CALL_INST(peloton_runtimefunctions_hashcrc64,
+                          peloton::codegen::RuntimeFunctions::HashCrc64)
+HANDLE_EXPLICIT_CALL_INST(peloton_runtimefunctions_gettilegroup,
+                          peloton::codegen::RuntimeFunctions::GetTileGroup)
+HANDLE_EXPLICIT_CALL_INST(
+    peloton_runtimefunctions_gettilegrouplayout,
+    peloton::codegen::RuntimeFunctions::GetTileGroupLayout)
+HANDLE_EXPLICIT_CALL_INST(
+    peloton_runtimefunctions_fillpredicatearray,
+    peloton::codegen::RuntimeFunctions::FillPredicateArray)
+HANDLE_EXPLICIT_CALL_INST(
+    peloton_runtimefunctions_throwdividebyzeroexception,
+    peloton::codegen::RuntimeFunctions::ThrowDivideByZeroException)
+HANDLE_EXPLICIT_CALL_INST(
+    peloton_runtimefunctions_throwoverflowexception,
+    peloton::codegen::RuntimeFunctions::ThrowOverflowException)
+
+HANDLE_EXPLICIT_CALL_INST(peloton_bufferingconsumer_buffertuple,
+                          peloton::codegen::BufferingConsumer::BufferTuple)
+
+// undefine all handlers
+#undef HANDLE_INST
+#undef HANDLE_TYPED_INST
+#undef HANDLE_OVERFLOW_TYPED_INST
+#undef HANDLE_SELECT_INST
+#undef HANDLE_RET_INST
+#undef HANDLE_EXTERNAL_CALL_INST
+#undef HANDLE_INTERNAL_CALL_INST
+#undef HANDLE_EXPLICIT_CALL_INST
\ No newline at end of file
diff --git a/src/include/codegen/interpreter/bytecode_interpreter.h b/src/include/codegen/interpreter/bytecode_interpreter.h
new file mode 100644
index 00000000000..16193efbc5c
--- /dev/null
+++ b/src/include/codegen/interpreter/bytecode_interpreter.h
@@ -0,0 +1,1260 @@
+//===----------------------------------------------------------------------===//
+//
+//                         Peloton
+//
+// bytecode_interpreter.h
+//
+// Identification: src/include/codegen/interpreter/bytecode_interpreter.h
+//
+// Copyright (c) 2015-2018, Carnegie Mellon University Database Group
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+#include "codegen/interpreter/bytecode_function.h"
+
+#include <type_traits>
+
+#include "codegen/query.h"
+#include "common/exception.h"
+#include "common/overflow_builtins.h"
+
+// Includes for explicit function calls
+#include "codegen/bloom_filter_accessor.h"
+#include "codegen/util/bloom_filter.h"
+#include "codegen/buffering_consumer.h"
+#include "codegen/deleter.h"
+#include "codegen/inserter.h"
+#include "codegen/query_parameters.h"
+#include "codegen/runtime_functions.h"
+#include "codegen/transaction_runtime.h"
+#include "codegen/updater.h"
+#include "codegen/util/oa_hash_table.h"
+#include "codegen/util/hash_table.h"
+#include "codegen/util/sorter.h"
+#include "codegen/values_runtime.h"
+#include "executor/executor_context.h"
+#include "function/date_functions.h"
+#include "function/numeric_functions.h"
+#include "function/string_functions.h"
+#include "function/timestamp_functions.h"
+#include "planner/project_info.h"
+#include "storage/data_table.h"
+#include "storage/storage_manager.h"
+#include "storage/tile_group.h"
+#include "storage/zone_map_manager.h"
+#include "codegen/util/buffer.h"
+
+namespace peloton {
+namespace codegen {
+namespace interpreter {
+
+/**
+ * Holds the runtime information for a external funtion call. Because libffi
+ * requires pointers to the actual value slots, this information is different
+ * for every function activation, and can not be stored in the bytecode
+ * function.
+ */
+struct CallActivation {
+  ffi_cif call_interface;
+  std::vector<value_t *> value_pointers;
+  value_t *return_pointer;
+};
+
+//----------------------------------------------------------------------------//
+//                         Template Helper Functions                          //
+//----------------------------------------------------------------------------//
+
+/**
+ * The seq-types allow to create a template sequence of integers, e.g. for
+ * indexed access. (std::integer_sequence is only available in C++14)
+ */
+template <int...>
+struct seq {
+  using type = seq;
+};
+template <typename T1, typename T2>
+struct concat;
+template <int... I1, int... I2>
+struct concat<seq<I1...>, seq<I2...>> : seq<I1..., (sizeof...(I1) + I2)...> {};
+
+template <int N>
+struct gen_seq;
+template <int N>
+struct gen_seq : concat<typename gen_seq<N / 2>::type,
+                        typename gen_seq<N - N / 2>::type>::type {};
+template <>
+struct gen_seq<0> : seq<> {};
+template <>
+struct gen_seq<1> : seq<0> {};
+
+/**
+ * This function converts references to pointers to make value handling
+ * possible. The function is tagged with the a bool type that indicates
+ * whether the type is a reference.
+ * Non-reference types are returned without changes.
+ */
+template <typename type_t>
+static ALWAYS_INLINE inline constexpr
+    typename std::remove_pointer<type_t>::type &
+    ConvertPointerToReference(type_t source,
+                              UNUSED_ATTRIBUTE std::true_type is_reference) {
+  return *source;
+};
+
+template <typename type_t>
+static ALWAYS_INLINE inline constexpr type_t ConvertPointerToReference(
+    type_t source, UNUSED_ATTRIBUTE std::false_type not_a_reference) {
+  return source;
+};
+
+class BytecodeInterpreter {
+ public:
+  /**
+   * Executes a translated function with the interpreter
+   * @param bytecode_function bytecode function that shall be executed
+   * @param arguments vector of function arguments (stored as value_t). The
+   * number of arguments must match the number expected by the executed
+   * function.
+   * @return return Value of the LLVM function or undefined if void.
+   */
+  static value_t ExecuteFunction(const BytecodeFunction &bytecode_function,
+                                 const std::vector<value_t> &arguments);
+  /**
+   * Executes a translated function with the interpreter
+   * (Wrapper for usage with a single char* argument)
+   * @param bytecode_function  bytecode function that shall be executed, must
+   * expect one argument.
+   * @param arguments Char pointer argument of the function.
+   */
+  static void ExecuteFunction(const BytecodeFunction &bytecode_function,
+                              char *param);
+
+ private:
+  explicit BytecodeInterpreter(const BytecodeFunction &bytecode_function);
+
+  /**
+   * Executes a function with the given arguments. The return value can
+   * afterwards retrieved with GetReturnValue(). This function is also called
+   * for internal function calls during execution.
+   * @param arguments Vector of function arguments (stored as value_t). The
+   * number of arguments must match the number expected by the executed
+   * function.
+   */
+  void ExecuteFunction(const std::vector<value_t> &arguments);
+
+  /**
+   * Initializes the activation record by allocating the value slots, placing
+   * function arguments and constants and preparing call contexts.
+   * @param arguments Vector of function arguments (stored as value_t). The
+   * number of arguments must match the number expected by the executed
+   * function.
+   */
+  void InitializeActivationRecord(const std::vector<value_t> &arguments);
+
+  /**
+   * Returns the function return value _after_ execution.
+   * @tparam type_t Expected return type.
+   * @return Return value of executed function or undefined if void.
+   */
+  template <typename type_t>
+  type_t GetReturnValue();
+
+  /**
+   * Get the current value of a value slot.
+   * @tparam type_t requested type
+   * @param index value slot index
+   * @return value as requested type
+   */
+  template <typename type_t>
+  ALWAYS_INLINE inline type_t GetValue(const index_t index) {
+    using type_noref_t = typename std::conditional<
+        std::is_reference<type_t>::value,
+        typename std::remove_reference<type_t>::type *, type_t>::type;
+    static_assert(sizeof(type_noref_t) <= sizeof(value_t),
+                  "The interpreter can only handle values that fit in 8 bytes");
+
+    PELOTON_ASSERT(index >= 0 && index < bytecode_function_.number_values_);
+    return ConvertPointerToReference(
+        *reinterpret_cast<type_noref_t *>(&values_[index]),
+        std::is_reference<type_t>());
+  }
+
+  /**
+   * Get the reference to a value slot. Usually SetValue() should be used
+   * to set the values, but some use cases require pointers/references to the
+   * slots.
+   * @tparam type_t requested type
+   * @param index value slot index
+   * @return typed reference to the requested slot
+   */
+  template <typename type_t>
+  ALWAYS_INLINE inline type_t &GetValueReference(const index_t index) {
+    PELOTON_ASSERT(index >= 0 && index < bytecode_function_.number_values_);
+    return reinterpret_cast<type_t &>(values_[index]);
+  }
+
+  /**
+   * Set the current value of a slot
+   * @tparam type_t requested type
+   * @param index value slot index
+   * @param value value of type type_t, that shall be set
+   */
+  template <typename type_t>
+  ALWAYS_INLINE inline void SetValue(const index_t index, const type_t value) {
+    using type_noref_t = typename std::conditional<
+        std::is_reference<type_t>::value,
+        typename std::remove_reference<type_t>::type *, type_t>::type;
+
+    PELOTON_ASSERT(index >= 0 && index < bytecode_function_.number_values_);
+    *reinterpret_cast<type_noref_t *>(&values_[index]) = value;
+
+    DumpValue<type_t>(index);
+  }
+
+  /**
+   * Advance the instruction pointer by a compile-time value.
+   * @tparam number_instruction_slots size of current instruction
+   * @param instruction current instruction pointer
+   * @return new instruction pointer
+   */
+  template <size_t number_instruction_slots>
+  ALWAYS_INLINE inline const Instruction *AdvanceIP(
+      const Instruction *instruction) {
+    auto next = reinterpret_cast<const Instruction *>(
+        const_cast<instr_slot_t *>(
+            reinterpret_cast<const instr_slot_t *>(instruction)) +
+        number_instruction_slots);
+    return next;
+  }
+
+  /**
+   * Advance the instruction pointer by a run-time value.
+   * @tparam number_instruction_slots size of current instruction
+   * @param instruction current instruction pointer
+   * @return new instruction pointer
+   */
+  ALWAYS_INLINE inline const Instruction *AdvanceIP(
+      const Instruction *instruction, size_t number_instruction_slots) {
+    auto next = reinterpret_cast<const Instruction *>(
+        const_cast<instr_slot_t *>(
+            reinterpret_cast<const instr_slot_t *>(instruction)) +
+        number_instruction_slots);
+    return next;
+  }
+
+  /**
+   * Allocate memory and return a pointer to it. (Memory is managed and gets
+   * freed after the interpreter exits)
+   * @param number_bytes number of bytes to allocate
+   * @return pointer to the allocated memory
+   */
+  uintptr_t AllocateMemory(size_t number_bytes);
+
+/**
+ * Dump the value of the given as value slot for debug purposes.
+ * If LOG_TRACE is not enabled, this function compiles to a stub.
+ * @param index value index of value slot to dump
+ */
+#ifdef LOG_TRACE_ENABLED
+  template <typename type_t>
+  void DumpValue(const index_t index) {
+    std::ostringstream output;
+    output << "  [" << std::dec << std::setw(3) << index
+           << "] <= " << GetValue<bytecode_type<type_t>>(index) << "/0x"
+           << std::hex << GetValue<bytecode_type<type_t>>(index);
+    LOG_TRACE("%s", output.str().c_str());
+  }
+#else
+  template <typename type_t>
+  void DumpValue(UNUSED_ATTRIBUTE const index_t index) {}
+#endif
+
+  //--------------------------------------------------------------------------//
+  //                          Instruction Handlers
+  //
+  // - The following functions are the instruction handlers for the bytecode
+  //   instructions defined in bytecode_instructions.def .
+  // - The signatures of those functions are not code style conform, as they are
+  //   generated from the opcode mnemonic
+  // - If the instruction is marked as a typed instruction in the .def file,
+  //   it has a templated handler. Some handlers only support floating point or
+  //   integer types, some both. Static asserts ensure this.
+  // - Because all the handlers will get inlined in the dispatch area, their
+  //   definition must be in this header file.
+  //--------------------------------------------------------------------------//
+
+  template <typename type_t>
+  ALWAYS_INLINE inline const Instruction *addHandler(
+      const Instruction *instruction) {
+    SetValue<type_t>(instruction->args[0],
+                     (GetValue<type_t>(instruction->args[1]) +
+                      GetValue<type_t>(instruction->args[2])));
+    return AdvanceIP<1>(instruction);
+  }
+
+  template <typename type_t>
+  ALWAYS_INLINE inline const Instruction *subHandler(
+      const Instruction *instruction) {
+    SetValue<type_t>(instruction->args[0],
+                     (GetValue<type_t>(instruction->args[1]) -
+                      GetValue<type_t>(instruction->args[2])));
+    return AdvanceIP<1>(instruction);
+  }
+
+  template <typename type_t>
+  ALWAYS_INLINE inline const Instruction *mulHandler(
+      const Instruction *instruction) {
+    SetValue<type_t>(instruction->args[0],
+                     (GetValue<type_t>(instruction->args[1]) *
+                      GetValue<type_t>(instruction->args[2])));
+    return AdvanceIP<1>(instruction);
+  }
+
+  template <typename type_t>
+  ALWAYS_INLINE inline const Instruction *divHandler(
+      const Instruction *instruction) {
+    SetValue<type_t>(instruction->args[0],
+                     (GetValue<type_t>(instruction->args[1]) /
+                      GetValue<type_t>(instruction->args[2])));
+    return AdvanceIP<1>(instruction);
+  }
+
+  template <typename type_t>
+  ALWAYS_INLINE inline const Instruction *sdivHandler(
+      const Instruction *instruction) {
+    static_assert(std::is_integral<type_t>::value,
+                  "__func__ must only be used with integer types");
+    using type_signed_t = typename std::make_signed<type_t>::type;
+    SetValue<type_signed_t>(instruction->args[0],
+                            (GetValue<type_signed_t>(instruction->args[1]) /
+                             GetValue<type_signed_t>(instruction->args[2])));
+    return AdvanceIP<1>(instruction);
+  }
+
+  template <typename type_t>
+  ALWAYS_INLINE inline const Instruction *uremHandler(
+      const Instruction *instruction) {
+    static_assert(std::is_integral<type_t>::value,
+                  "__func__ must only be used with integer types");
+    SetValue<type_t>(instruction->args[0],
+                     (GetValue<type_t>(instruction->args[1]) %
+                      GetValue<type_t>(instruction->args[2])));
+    return AdvanceIP<1>(instruction);
+  }
+
+  template <typename type_t>
+  ALWAYS_INLINE inline const Instruction *fremHandler(
+      const Instruction *instruction) {
+    static_assert(std::is_floating_point<type_t>::value,
+                  "__func__ must only be used with floating point types");
+    SetValue<type_t>(instruction->args[0],
+                     (std::fmod(GetValue<type_t>(instruction->args[1]),
+                                GetValue<type_t>(instruction->args[2]))));
+    return AdvanceIP<1>(instruction);
+  }
+
+  template <typename type_t>
+  ALWAYS_INLINE inline const Instruction *sremHandler(
+      const Instruction *instruction) {
+    static_assert(std::is_integral<type_t>::value,
+                  "__func__ must only be used with integer types");
+    using type_signed_t = typename std::make_signed<type_t>::type;
+    SetValue<type_signed_t>(instruction->args[0],
+                            (GetValue<type_signed_t>(instruction->args[1]) %
+                             GetValue<type_signed_t>(instruction->args[2])));
+    return AdvanceIP<1>(instruction);
+  }
+
+  template <typename type_t>
+  ALWAYS_INLINE inline const Instruction *shlHandler(
+      const Instruction *instruction) {
+    static_assert(std::is_integral<type_t>::value,
+                  "__func__ must only be used with integer types");
+    SetValue<type_t>(instruction->args[0],
+                     (GetValue<type_t>(instruction->args[1])
+                      << GetValue<type_t>(instruction->args[2])));
+    return AdvanceIP<1>(instruction);
+  }
+
+  template <typename type_t>
+  ALWAYS_INLINE inline const Instruction *lshrHandler(
+      const Instruction *instruction) {
+    static_assert(std::is_integral<type_t>::value,
+                  "__func__ must only be used with integer types");
+    SetValue<type_t>(instruction->args[0],
+                     (GetValue<type_t>(instruction->args[1]) >>
+                      GetValue<type_t>(instruction->args[2])));
+    return AdvanceIP<1>(instruction);
+  }
+
+  template <typename type_t>
+  ALWAYS_INLINE inline const Instruction *ashrHandler(
+      const Instruction *instruction) {
+    static_assert(std::is_integral<type_t>::value,
+                  "__func__ must only be used with integer types");
+    using type_signed_t = typename std::make_signed<type_t>::type;
+    SetValue<type_signed_t>(instruction->args[0],
+                            (GetValue<type_signed_t>(instruction->args[1]) >>
+                             GetValue<type_t>(instruction->args[2])));
+    return AdvanceIP<1>(instruction);
+  }
+
+  template <typename type_t>
+  ALWAYS_INLINE inline const Instruction *andHandler(
+      const Instruction *instruction) {
+    static_assert(std::is_integral<type_t>::value,
+                  "__func__ must only be used with integer types");
+    SetValue<type_t>(instruction->args[0],
+                     (GetValue<type_t>(instruction->args[1]) &
+                      GetValue<type_t>(instruction->args[2])));
+    return AdvanceIP<1>(instruction);
+  }
+
+  template <typename type_t>
+  ALWAYS_INLINE inline const Instruction *orHandler(
+      const Instruction *instruction) {
+    static_assert(std::is_integral<type_t>::value,
+                  "__func__ must only be used with integer types");
+    SetValue<type_t>(instruction->args[0],
+                     (GetValue<type_t>(instruction->args[1]) |
+                      GetValue<type_t>(instruction->args[2])));
+    return AdvanceIP<1>(instruction);
+  }
+
+  template <typename type_t>
+  ALWAYS_INLINE inline const Instruction *xorHandler(
+      const Instruction *instruction) {
+    static_assert(std::is_integral<type_t>::value,
+                  "__func__ must only be used with integer types");
+    SetValue<type_t>(instruction->args[0],
+                     (GetValue<type_t>(instruction->args[1]) ^
+                      GetValue<type_t>(instruction->args[2])));
+    return AdvanceIP<1>(instruction);
+  }
+
+  ALWAYS_INLINE inline const Instruction *extractvalueHandler(
+      const Instruction *instruction) {
+    SetValue<value_t>(
+        instruction->args[0],
+        (GetValue<value_t>(instruction->args[1]) >> instruction->args[2]));
+    return AdvanceIP<1>(instruction);
+  }
+
+  template <typename type_t>
+  ALWAYS_INLINE inline const Instruction *loadHandler(
+      const Instruction *instruction) {
+    SetValue<type_t>(instruction->args[0],
+                     (*GetValue<type_t *>(instruction->args[1])));
+    return AdvanceIP<1>(instruction);
+  }
+
+  template <typename type_t>
+  ALWAYS_INLINE inline const Instruction *storeHandler(
+      const Instruction *instruction) {
+    *GetValue<type_t *>(instruction->args[0]) =
+        GetValue<type_t>(instruction->args[1]);
+    return AdvanceIP<1>(instruction);
+  }
+
+  template <typename type_t>
+  ALWAYS_INLINE inline const Instruction *alloca_arrayHandler(
+      const Instruction *instruction) {
+    size_t number_bytes =
+        instruction->args[1] * GetValue<type_t>(instruction->args[2]);
+    SetValue<uintptr_t>(instruction->args[0], (AllocateMemory(number_bytes)));
+    return AdvanceIP<1>(instruction);
+  }
+
+  ALWAYS_INLINE inline const Instruction *allocaHandler(
+      const Instruction *instruction) {
+    size_t number_bytes = instruction->args[1];
+    SetValue<uintptr_t>(instruction->args[0], (AllocateMemory(number_bytes)));
+    return AdvanceIP<1>(instruction);
+  }
+
+  template <typename type_t>
+  ALWAYS_INLINE inline const Instruction *cmp_eqHandler(
+      const Instruction *instruction) {
+    SetValue<value_t>(
+        instruction->args[0],
+        (static_cast<value_t>(GetValue<type_t>(instruction->args[1]) ==
+                              GetValue<type_t>(instruction->args[2]))));
+    return AdvanceIP<1>(instruction);
+  }
+
+  template <typename type_t>
+  ALWAYS_INLINE inline const Instruction *cmp_neHandler(
+      const Instruction *instruction) {
+    SetValue<value_t>(
+        instruction->args[0],
+        (static_cast<value_t>(GetValue<type_t>(instruction->args[1]) !=
+                              GetValue<type_t>(instruction->args[2]))));
+    return AdvanceIP<1>(instruction);
+  }
+
+  template <typename type_t>
+  ALWAYS_INLINE inline const Instruction *cmp_gtHandler(
+      const Instruction *instruction) {
+    SetValue<value_t>(
+        instruction->args[0],
+        (static_cast<value_t>(GetValue<type_t>(instruction->args[1]) >
+                              GetValue<type_t>(instruction->args[2]))));
+    return AdvanceIP<1>(instruction);
+  }
+
+  template <typename type_t>
+  ALWAYS_INLINE inline const Instruction *cmp_ltHandler(
+      const Instruction *instruction) {
+    SetValue<value_t>(
+        instruction->args[0],
+        (static_cast<value_t>(GetValue<type_t>(instruction->args[1]) <
+                              GetValue<type_t>(instruction->args[2]))));
+    return AdvanceIP<1>(instruction);
+  }
+
+  template <typename type_t>
+  ALWAYS_INLINE inline const Instruction *cmp_geHandler(
+      const Instruction *instruction) {
+    SetValue<value_t>(
+        instruction->args[0],
+        (static_cast<value_t>(GetValue<type_t>(instruction->args[1]) >=
+                              GetValue<type_t>(instruction->args[2]))));
+    return AdvanceIP<1>(instruction);
+  }
+
+  template <typename type_t>
+  ALWAYS_INLINE inline const Instruction *cmp_leHandler(
+      const Instruction *instruction) {
+    SetValue<value_t>(
+        instruction->args[0],
+        (static_cast<value_t>(GetValue<type_t>(instruction->args[1]) <=
+                              GetValue<type_t>(instruction->args[2]))));
+    return AdvanceIP<1>(instruction);
+  }
+
+  template <typename type_t>
+  ALWAYS_INLINE inline const Instruction *cmp_sgtHandler(
+      const Instruction *instruction) {
+    static_assert(std::is_integral<type_t>::value,
+                  "__func__ must only be used with integer types");
+    using type_signed_t = typename std::make_signed<type_t>::type;
+    SetValue<value_t>(
+        instruction->args[0],
+        (static_cast<value_t>(GetValue<type_signed_t>(instruction->args[1]) >
+                              GetValue<type_signed_t>(instruction->args[2]))));
+    return AdvanceIP<1>(instruction);
+  }
+
+  template <typename type_t>
+  ALWAYS_INLINE inline const Instruction *cmp_sltHandler(
+      const Instruction *instruction) {
+    static_assert(std::is_integral<type_t>::value,
+                  "__func__ must only be used with integer types");
+    using type_signed_t = typename std::make_signed<type_t>::type;
+    SetValue<value_t>(
+        instruction->args[0],
+        (static_cast<value_t>(GetValue<type_signed_t>(instruction->args[1]) <
+                              GetValue<type_signed_t>(instruction->args[2]))));
+    return AdvanceIP<1>(instruction);
+  }
+
+  template <typename type_t>
+  ALWAYS_INLINE inline const Instruction *cmp_sgeHandler(
+      const Instruction *instruction) {
+    static_assert(std::is_integral<type_t>::value,
+                  "__func__ must only be used with integer types");
+    using type_signed_t = typename std::make_signed<type_t>::type;
+    SetValue<value_t>(
+        instruction->args[0],
+        (static_cast<value_t>(GetValue<type_signed_t>(instruction->args[1]) >=
+                              GetValue<type_signed_t>(instruction->args[2]))));
+    return AdvanceIP<1>(instruction);
+  }
+
+  template <typename type_t>
+  ALWAYS_INLINE inline const Instruction *cmp_sleHandler(
+      const Instruction *instruction) {
+    static_assert(std::is_integral<type_t>::value,
+                  "__func__ must only be used with integer types");
+    using type_signed_t = typename std::make_signed<type_t>::type;
+    SetValue<value_t>(
+        instruction->args[0],
+        (static_cast<value_t>(GetValue<type_signed_t>(instruction->args[1]) <=
+                              GetValue<type_signed_t>(instruction->args[2]))));
+    return AdvanceIP<1>(instruction);
+  }
+
+  ALWAYS_INLINE inline const Instruction *sext_i8_i16Handler(
+      const Instruction *instruction) {
+    using src_t = typename std::make_signed<i8>::type;
+    using dest_t = typename std::make_signed<i16>::type;
+    SetValue<dest_t>(
+        instruction->args[0],
+        (static_cast<dest_t>(GetValue<src_t>(instruction->args[1]))));
+    return AdvanceIP<1>(instruction);
+  }
+
+  ALWAYS_INLINE inline const Instruction *sext_i8_i32Handler(
+      const Instruction *instruction) {
+    using src_t = typename std::make_signed<i8>::type;
+    using dest_t = typename std::make_signed<i32>::type;
+    SetValue<dest_t>(
+        instruction->args[0],
+        (static_cast<dest_t>(GetValue<src_t>(instruction->args[1]))));
+    return AdvanceIP<1>(instruction);
+  }
+
+  ALWAYS_INLINE inline const Instruction *sext_i8_i64Handler(
+      const Instruction *instruction) {
+    using src_t = typename std::make_signed<i8>::type;
+    using dest_t = typename std::make_signed<i64>::type;
+    SetValue<dest_t>(
+        instruction->args[0],
+        (static_cast<dest_t>(GetValue<src_t>(instruction->args[1]))));
+    return AdvanceIP<1>(instruction);
+  }
+
+  ALWAYS_INLINE inline const Instruction *sext_i16_i32Handler(
+      const Instruction *instruction) {
+    using src_t = typename std::make_signed<i16>::type;
+    using dest_t = typename std::make_signed<i32>::type;
+    SetValue<dest_t>(
+        instruction->args[0],
+        (static_cast<dest_t>(GetValue<src_t>(instruction->args[1]))));
+    return AdvanceIP<1>(instruction);
+  }
+
+  ALWAYS_INLINE inline const Instruction *sext_i16_i64Handler(
+      const Instruction *instruction) {
+    using src_t = typename std::make_signed<i16>::type;
+    using dest_t = typename std::make_signed<i64>::type;
+    SetValue<dest_t>(
+        instruction->args[0],
+        (static_cast<dest_t>(GetValue<src_t>(instruction->args[1]))));
+    return AdvanceIP<1>(instruction);
+  }
+
+  ALWAYS_INLINE inline const Instruction *sext_i32_i64Handler(
+      const Instruction *instruction) {
+    using src_t = typename std::make_signed<i32>::type;
+    using dest_t = typename std::make_signed<i64>::type;
+    SetValue<dest_t>(
+        instruction->args[0],
+        (static_cast<dest_t>(GetValue<src_t>(instruction->args[1]))));
+    return AdvanceIP<1>(instruction);
+  }
+
+  ALWAYS_INLINE inline const Instruction *zext_i8_i16Handler(
+      const Instruction *instruction) {
+    using src_t = i8;
+    using dest_t = i16;
+    SetValue<dest_t>(
+        instruction->args[0],
+        (static_cast<dest_t>(GetValue<src_t>(instruction->args[1]))));
+    return AdvanceIP<1>(instruction);
+  }
+
+  ALWAYS_INLINE inline const Instruction *zext_i8_i32Handler(
+      const Instruction *instruction) {
+    using src_t = i8;
+    using dest_t = i32;
+    SetValue<dest_t>(
+        instruction->args[0],
+        (static_cast<dest_t>(GetValue<src_t>(instruction->args[1]))));
+    return AdvanceIP<1>(instruction);
+  }
+
+  ALWAYS_INLINE inline const Instruction *zext_i8_i64Handler(
+      const Instruction *instruction) {
+    using src_t = i8;
+    using dest_t = i64;
+    SetValue<dest_t>(
+        instruction->args[0],
+        (static_cast<dest_t>(GetValue<src_t>(instruction->args[1]))));
+    return AdvanceIP<1>(instruction);
+  }
+
+  ALWAYS_INLINE inline const Instruction *zext_i16_i32Handler(
+      const Instruction *instruction) {
+    using src_t = i16;
+    using dest_t = i32;
+    SetValue<dest_t>(
+        instruction->args[0],
+        (static_cast<dest_t>(GetValue<src_t>(instruction->args[1]))));
+    return AdvanceIP<1>(instruction);
+  }
+
+  ALWAYS_INLINE inline const Instruction *zext_i16_i64Handler(
+      const Instruction *instruction) {
+    using src_t = i16;
+    using dest_t = i64;
+    SetValue<dest_t>(
+        instruction->args[0],
+        (static_cast<dest_t>(GetValue<src_t>(instruction->args[1]))));
+    return AdvanceIP<1>(instruction);
+  }
+
+  ALWAYS_INLINE inline const Instruction *zext_i32_i64Handler(
+      const Instruction *instruction) {
+    using src_t = i32;
+    using dest_t = i64;
+    SetValue<dest_t>(
+        instruction->args[0],
+        (static_cast<dest_t>(GetValue<src_t>(instruction->args[1]))));
+    return AdvanceIP<1>(instruction);
+  }
+
+  // The FP<>Int casts are created in a two-level hierarchy
+  // eg. the generated call to floattosiHandler<i8> is redirected to
+  // tosiHandler<float, i8>
+
+  template <typename src_type_t, typename dest_type_t>
+  ALWAYS_INLINE inline const Instruction *tosiHandler(
+      const Instruction *instruction) {
+    static_assert(std::is_integral<dest_type_t>::value,
+                  "__func__ dest_type must be an integer type");
+    static_assert(std::is_floating_point<src_type_t>::value,
+                  "__func__ src_type must be a floating point type");
+    using dest_type_signed_t = typename std::make_signed<dest_type_t>::type;
+
+    SetValue<dest_type_signed_t>(
+        instruction->args[0], (static_cast<dest_type_signed_t>(
+                                  GetValue<src_type_t>(instruction->args[1]))));
+    return AdvanceIP<1>(instruction);
+  }
+
+  template <typename src_type_t, typename dest_type_t>
+  ALWAYS_INLINE inline const Instruction *touiHandler(
+      const Instruction *instruction) {
+    static_assert(std::is_integral<dest_type_t>::value,
+                  "__func__ dest_type must be an integer type");
+    static_assert(std::is_floating_point<src_type_t>::value,
+                  "__func__ src_type must be a floating point type");
+
+    SetValue<dest_type_t>(
+        instruction->args[0],
+        (static_cast<dest_type_t>(GetValue<src_type_t>(instruction->args[1]))));
+    return AdvanceIP<1>(instruction);
+  }
+
+  template <typename src_type_t, typename dest_type_t>
+  ALWAYS_INLINE inline const Instruction *sitoHandler(
+      const Instruction *instruction) {
+    static_assert(std::is_floating_point<dest_type_t>::value,
+                  "__func__ dest_type must be a floating point type");
+    static_assert(std::is_integral<src_type_t>::value,
+                  "__func__ src_type must be an integer type");
+    using src_type_signed_t = typename std::make_signed<src_type_t>::type;
+
+    SetValue<dest_type_t>(instruction->args[0],
+                          (static_cast<dest_type_t>(GetValue<src_type_signed_t>(
+                              instruction->args[1]))));
+    return AdvanceIP<1>(instruction);
+  }
+
+  template <typename src_type_t, typename dest_type_t>
+  ALWAYS_INLINE inline const Instruction *uitoHandler(
+      const Instruction *instruction) {
+    static_assert(std::is_floating_point<dest_type_t>::value,
+                  "__func__ dest_type must be a floating point type");
+    static_assert(std::is_integral<src_type_t>::value,
+                  "__func__ src_type must be an integer type");
+
+    SetValue<dest_type_t>(
+        instruction->args[0],
+        (static_cast<dest_type_t>(GetValue<src_type_t>(instruction->args[1]))));
+    return AdvanceIP<1>(instruction);
+  }
+
+  template <typename type_t>
+  ALWAYS_INLINE inline const Instruction *floattosiHandler(
+      const Instruction *instruction) {
+    return tosiHandler<float, type_t>(instruction);
+  }
+
+  template <typename type_t>
+  ALWAYS_INLINE inline const Instruction *floattouiHandler(
+      const Instruction *instruction) {
+    return touiHandler<float, type_t>(instruction);
+  }
+
+  template <typename type_t>
+  ALWAYS_INLINE inline const Instruction *sitofloatHandler(
+      const Instruction *instruction) {
+    return sitoHandler<type_t, float>(instruction);
+  }
+
+  template <typename type_t>
+  ALWAYS_INLINE inline const Instruction *uitofloatHandler(
+      const Instruction *instruction) {
+    return uitoHandler<type_t, float>(instruction);
+  }
+
+  template <typename type_t>
+  ALWAYS_INLINE inline const Instruction *doubletosiHandler(
+      const Instruction *instruction) {
+    return tosiHandler<double, type_t>(instruction);
+  }
+
+  template <typename type_t>
+  ALWAYS_INLINE inline const Instruction *doubletouiHandler(
+      const Instruction *instruction) {
+    return touiHandler<double, type_t>(instruction);
+  }
+
+  template <typename type_t>
+  ALWAYS_INLINE inline const Instruction *sitodoubleHandler(
+      const Instruction *instruction) {
+    return sitoHandler<type_t, double>(instruction);
+  }
+
+  template <typename type_t>
+  ALWAYS_INLINE inline const Instruction *uitodoubleHandler(
+      const Instruction *instruction) {
+    return uitoHandler<type_t, double>(instruction);
+  }
+
+  ALWAYS_INLINE inline const Instruction *doubletofloatHandler(
+      const Instruction *instruction) {
+    SetValue<float>(
+        instruction->args[0],
+        (static_cast<float>(GetValue<double>(instruction->args[1]))));
+    return AdvanceIP<1>(instruction);
+  }
+
+  ALWAYS_INLINE inline const Instruction *floattodoubleHandler(
+      const Instruction *instruction) {
+    SetValue<double>(
+        instruction->args[0],
+        (static_cast<double>(GetValue<float>(instruction->args[1]))));
+    return AdvanceIP<1>(instruction);
+  }
+
+  ALWAYS_INLINE inline const Instruction *gep_offsetHandler(
+      const Instruction *instruction) {
+    uintptr_t sum = GetValue<uintptr_t>(instruction->args[1]) +
+                    static_cast<uintptr_t>(instruction->args[2]);
+    SetValue<uintptr_t>(instruction->args[0], (sum));
+    return AdvanceIP<1>(instruction);
+  }
+
+  template <typename type_t>
+  ALWAYS_INLINE inline const Instruction *gep_arrayHandler(
+      const Instruction *instruction) {
+    static_assert(std::is_integral<type_t>::value,
+                  "__func__ must only be used with integer types");
+    uintptr_t product =
+        GetValue<type_t>(instruction->args[1]) * instruction->args[2];
+    SetValue<uintptr_t>(instruction->args[0],
+                        (GetValue<uintptr_t>(instruction->args[0]) + product));
+    return AdvanceIP<1>(instruction);
+  }
+
+  ALWAYS_INLINE inline const Instruction *phi_movHandler(
+      const Instruction *instruction) {
+    SetValue<value_t>(instruction->args[0],
+                      (GetValue<value_t>(instruction->args[1])));
+    return AdvanceIP<1>(instruction);
+  }
+
+  ALWAYS_INLINE inline const Instruction *selectHandler(
+      const Instruction *instruction) {
+    value_t result;
+    if (GetValue<i8>(instruction->args[1]) > 0)
+      result = GetValue<value_t>(instruction->args[2]);
+    else
+      result = GetValue<value_t>(instruction->args[3]);
+
+    SetValue<value_t>(instruction->args[0], (result));
+    return AdvanceIP<2>(instruction);  // bigger slot size!
+  }
+
+  ALWAYS_INLINE inline const Instruction *call_externalHandler(
+      const Instruction *instruction) {
+    const ExternalCallInstruction *call_instruction =
+        reinterpret_cast<const ExternalCallInstruction *>(instruction);
+    CallActivation &call_activation =
+        call_activations_[call_instruction->external_call_context];
+
+    // call external function
+    ffi_call(&call_activation.call_interface, call_instruction->function,
+             call_activation.return_pointer,
+             reinterpret_cast<void **>(call_activation.value_pointers.data()));
+
+    if (bytecode_function_
+            .external_call_contexts_[call_instruction->external_call_context]
+            .dest_type != &ffi_type_void) {
+      DumpValue<value_t>(
+          bytecode_function_
+              .external_call_contexts_[call_instruction->external_call_context]
+              .dest_slot);
+    }
+
+    return AdvanceIP<2>(instruction);  // bigger slot size!
+  }
+
+  ALWAYS_INLINE inline const Instruction *call_internalHandler(
+      const Instruction *instruction) {
+    const InternalCallInstruction *call_instruction =
+        reinterpret_cast<const InternalCallInstruction *>(instruction);
+
+    std::vector<value_t> arguments(call_instruction->number_args);
+    for (size_t i = 0; i < call_instruction->number_args; i++) {
+      arguments[i] = GetValue<value_t>(call_instruction->args[i]);
+    }
+
+    value_t result = ExecuteFunction(
+        bytecode_function_.sub_functions_[call_instruction->sub_function],
+        arguments);
+    SetValue(call_instruction->dest_slot, result);
+
+    return AdvanceIP(
+        instruction,
+        bytecode_function_.GetInteralCallInstructionSlotSize(call_instruction));
+  }
+
+  ALWAYS_INLINE inline const Instruction *nop_movHandler(
+      const Instruction *instruction) {
+    SetValue<value_t>(instruction->args[0],
+                      (GetValue<value_t>(instruction->args[1])));
+    return AdvanceIP<1>(instruction);
+  }
+
+  ALWAYS_INLINE inline const Instruction *branch_uncondHandler(
+      const Instruction *instruction) {
+    return bytecode_function_.GetIPFromIndex(instruction->args[0]);
+  }
+
+  ALWAYS_INLINE inline const Instruction *branch_condHandler(
+      const Instruction *instruction) {
+    index_t next_bb;
+    if (GetValue<i8>(instruction->args[0]) > 0)
+      next_bb = instruction->args[2];
+    else
+      next_bb = instruction->args[1];
+
+    return bytecode_function_.GetIPFromIndex(next_bb);
+  }
+
+  ALWAYS_INLINE inline const Instruction *branch_cond_ftHandler(
+      const Instruction *instruction) {
+    const Instruction *ip;
+    if ((GetValue<value_t>(instruction->args[0]) & 0x1) > 0)
+      ip = bytecode_function_.GetIPFromIndex(instruction->args[1]);
+    else
+      ip = AdvanceIP<1>(instruction);
+
+    return ip;
+  }
+
+  ALWAYS_INLINE inline const Instruction *llvm_memcpyHandler(
+      const Instruction *instruction) {
+    PELOTON_MEMCPY(GetValue<void *>(instruction->args[0]),
+              GetValue<void *>(instruction->args[1]),
+              GetValue<i64>(instruction->args[2]));
+    return AdvanceIP<1>(instruction);
+  }
+
+  ALWAYS_INLINE inline const Instruction *llvm_memmoveHandler(
+      const Instruction *instruction) {
+    std::memmove(GetValue<void *>(instruction->args[0]),
+                 GetValue<void *>(instruction->args[1]),
+                 GetValue<i64>(instruction->args[2]));
+    return AdvanceIP<1>(instruction);
+  }
+
+  ALWAYS_INLINE inline const Instruction *llvm_memsetHandler(
+      const Instruction *instruction) {
+    PELOTON_MEMSET(GetValue<void *>(instruction->args[0]),
+              GetValue<i8>(instruction->args[1]),
+              GetValue<i64>(instruction->args[2]));
+    return AdvanceIP<1>(instruction);
+  }
+
+  template <typename type_t>
+  ALWAYS_INLINE inline const Instruction *llvm_uadd_overflowHandler(
+      const Instruction *instruction) {
+    static_assert(std::is_integral<type_t>::value,
+                  "__func__ must only be used with integer types");
+    bool overflow = __builtin_add_overflow(
+        GetValue<type_t>(instruction->args[2]),
+        GetValue<type_t>(instruction->args[3]),
+        &GetValueReference<type_t>(instruction->args[0]));
+
+    DumpValue<type_t>(instruction->args[0]);
+
+    SetValue<value_t>(instruction->args[1], (static_cast<value_t>(overflow)));
+    return AdvanceIP<2>(instruction);
+  }
+
+  template <typename type_t>
+  ALWAYS_INLINE inline const Instruction *llvm_sadd_overflowHandler(
+      const Instruction *instruction) {
+    static_assert(std::is_integral<type_t>::value,
+                  "__func__ must only be used with integer types");
+    using type_signed_t = typename std::make_signed<type_t>::type;
+    bool overflow = __builtin_add_overflow(
+        GetValue<type_signed_t>(instruction->args[2]),
+        GetValue<type_signed_t>(instruction->args[3]),
+        &GetValueReference<type_signed_t>(instruction->args[0]));
+
+    DumpValue<type_t>(instruction->args[0]);
+
+    SetValue<value_t>(instruction->args[1], (static_cast<value_t>(overflow)));
+    return AdvanceIP<2>(instruction);
+  }
+
+  template <typename type_t>
+  ALWAYS_INLINE inline const Instruction *llvm_usub_overflowHandler(
+      const Instruction *instruction) {
+    static_assert(std::is_integral<type_t>::value,
+                  "__func__ must only be used with integer types");
+    bool overflow = __builtin_sub_overflow(
+        GetValue<type_t>(instruction->args[2]),
+        GetValue<type_t>(instruction->args[3]),
+        &GetValueReference<type_t>(instruction->args[0]));
+
+    DumpValue<type_t>(instruction->args[0]);
+
+    SetValue<value_t>(instruction->args[1], (static_cast<value_t>(overflow)));
+    return AdvanceIP<2>(instruction);
+  }
+
+  template <typename type_t>
+  ALWAYS_INLINE inline const Instruction *llvm_ssub_overflowHandler(
+      const Instruction *instruction) {
+    static_assert(std::is_integral<type_t>::value,
+                  "__func__ must only be used with integer types");
+    using type_signed_t = typename std::make_signed<type_t>::type;
+    bool overflow = __builtin_sub_overflow(
+        GetValue<type_signed_t>(instruction->args[2]),
+        GetValue<type_signed_t>(instruction->args[3]),
+        &GetValueReference<type_signed_t>(instruction->args[0]));
+
+    DumpValue<type_t>(instruction->args[0]);
+
+    SetValue<value_t>(instruction->args[1], (static_cast<value_t>(overflow)));
+    return AdvanceIP<2>(instruction);
+  }
+
+  template <typename type_t>
+  ALWAYS_INLINE inline const Instruction *llvm_umul_overflowHandler(
+      const Instruction *instruction) {
+    static_assert(std::is_integral<type_t>::value,
+                  "__func__ must only be used with integer types");
+    bool overflow = __builtin_mul_overflow(
+        GetValue<type_t>(instruction->args[2]),
+        GetValue<type_t>(instruction->args[3]),
+        &GetValueReference<type_t>(instruction->args[0]));
+
+    DumpValue<type_t>(instruction->args[0]);
+
+    SetValue<value_t>(instruction->args[1], (static_cast<value_t>(overflow)));
+    return AdvanceIP<2>(instruction);
+  }
+
+  template <typename type_t>
+  ALWAYS_INLINE inline const Instruction *llvm_smul_overflowHandler(
+      const Instruction *instruction) {
+    static_assert(std::is_integral<type_t>::value,
+                  "__func__ must only be used with integer types");
+    using type_signed_t = typename std::make_signed<type_t>::type;
+    bool overflow = __builtin_mul_overflow(
+        GetValue<type_signed_t>(instruction->args[2]),
+        GetValue<type_signed_t>(instruction->args[3]),
+        &GetValueReference<type_signed_t>(instruction->args[0]));
+
+    DumpValue<type_t>(instruction->args[0]);
+
+    SetValue<value_t>(instruction->args[1], (static_cast<value_t>(overflow)));
+    return AdvanceIP<2>(instruction);
+  }
+
+  ALWAYS_INLINE inline const Instruction *llvm_sse42_crc32Handler(
+      const Instruction *instruction) {
+    SetValue<i64>(
+        instruction->args[0],
+        (__builtin_ia32_crc32di(GetValue<i64>(instruction->args[1]),
+                                GetValue<i64>(instruction->args[2]))));
+    return AdvanceIP<1>(instruction);
+  }
+
+  // The handlers for explicit calls are generated using templates.
+  //
+  // The call arrives in explicit_callHandler(...), which is overloaded for
+  // 1. static functions
+  // 2. class methods and
+  // 3. const class methods
+  // and is then forwarded to explicit_call_wrapperHandler(...) which is tagged
+  // by a bool type, whether the called function returns void or not, which
+  // makes it 6 instances of that function.
+
+  // 1. static function
+  template <typename return_type, typename... arg_types>
+  ALWAYS_INLINE inline const Instruction *explicit_callHandler(
+      const Instruction *instruction, return_type (*func)(arg_types...)) {
+    // forward call depending on whether func returns void or not
+    return explicit_call_wrapperHandler(instruction, func,
+                                        gen_seq<sizeof...(arg_types)>(),
+                                        std::is_void<return_type>());
+  }
+
+  // 2. class method
+  template <typename return_type, typename class_type, typename... arg_types>
+  ALWAYS_INLINE inline const Instruction *explicit_callHandler(
+      const Instruction *instruction,
+      return_type (class_type::*func)(arg_types...)) {
+    // forward call depending on whether func returns void or not
+    return explicit_call_wrapperHandler(instruction, func,
+                                        gen_seq<sizeof...(arg_types)>(),
+                                        std::is_void<return_type>());
+  }
+
+  // 3. const class method
+  template <typename return_type, typename class_type, typename... arg_types>
+  ALWAYS_INLINE inline const Instruction *explicit_callHandler(
+      const Instruction *instruction,
+      return_type (class_type::*func)(arg_types...) const) {
+    // forward call depending on whether func returns void or not
+    return explicit_call_wrapperHandler(instruction, func,
+                                        gen_seq<sizeof...(arg_types)>(),
+                                        std::is_void<return_type>());
+  }
+
+  // 1. static function a) returns void
+  template <typename return_type, typename... arg_types, int... indexes>
+  ALWAYS_INLINE inline const Instruction *explicit_call_wrapperHandler(
+      const Instruction *instruction,
+      UNUSED_ATTRIBUTE return_type (*func)(arg_types...),
+      const seq<indexes...> &, UNUSED_ATTRIBUTE std::false_type returns_void) {
+    // call the actual function
+    auto ret = func(GetValue<arg_types>(instruction->args[indexes + 1])...);
+    SetValue(instruction->args[0], ret);
+
+    return AdvanceIP<BytecodeFunction::GetExplicitCallInstructionSlotSize(
+        sizeof...(arg_types) + 1)>(instruction);
+  }
+
+  // 1. static function b) returns non-void
+  template <typename return_type, typename... arg_types, int... indexes>
+  ALWAYS_INLINE inline const Instruction *explicit_call_wrapperHandler(
+      const Instruction *instruction,
+      UNUSED_ATTRIBUTE return_type (*func)(arg_types...),
+      const seq<indexes...> &,
+      UNUSED_ATTRIBUTE std::true_type returns_not_void) {
+    // call the actual function
+    func(GetValue<arg_types>(instruction->args[indexes])...);
+
+    return AdvanceIP<BytecodeFunction::GetExplicitCallInstructionSlotSize(
+        sizeof...(arg_types))>(instruction);
+  }
+
+  // 2. class method a) returns void
+  template <typename return_type, typename class_type, typename... arg_types,
+            int... indexes>
+  ALWAYS_INLINE inline const Instruction *explicit_call_wrapperHandler(
+      const Instruction *instruction,
+      UNUSED_ATTRIBUTE return_type (class_type::*func)(arg_types...),
+      const seq<indexes...> &, UNUSED_ATTRIBUTE std::false_type returns_void) {
+    // call the actual function
+    auto *obj = GetValue<class_type *>(instruction->args[1]);
+    return_type ret =
+        (obj->*func)(GetValue<arg_types>(instruction->args[indexes + 2])...);
+    SetValue<return_type>(instruction->args[0], ret);
+
+    return AdvanceIP<BytecodeFunction::GetExplicitCallInstructionSlotSize(
+        sizeof...(arg_types) + 2)>(instruction);
+  }
+
+  // 2. class method b) returns non-void
+  template <typename return_type, typename class_type, typename... arg_types,
+            int... indexes>
+  ALWAYS_INLINE inline const Instruction *explicit_call_wrapperHandler(
+      const Instruction *instruction,
+      UNUSED_ATTRIBUTE return_type (class_type::*func)(arg_types...),
+      const seq<indexes...> &,
+      UNUSED_ATTRIBUTE std::true_type returns_not_void) {
+    // call the actual function
+    auto *obj = GetValue<class_type *>(instruction->args[0]);
+    (obj->*func)(GetValue<arg_types>(instruction->args[indexes + 1])...);
+
+    return AdvanceIP<BytecodeFunction::GetExplicitCallInstructionSlotSize(
+        sizeof...(arg_types) + 1)>(instruction);
+  }
+
+  // 3. const class method a) returns void
+  template <typename return_type, typename class_type, typename... arg_types,
+            int... indexes>
+  ALWAYS_INLINE inline const Instruction *explicit_call_wrapperHandler(
+      const Instruction *instruction,
+      UNUSED_ATTRIBUTE return_type (class_type::*func)(arg_types...) const,
+      const seq<indexes...> &, UNUSED_ATTRIBUTE std::false_type returns_void) {
+    // call the actual function
+    auto *obj = GetValue<class_type *>(instruction->args[1]);
+    return_type ret =
+        (obj->*func)(GetValue<arg_types>(instruction->args[indexes + 2])...);
+    SetValue<return_type>(instruction->args[0], ret);
+
+    return AdvanceIP<BytecodeFunction::GetExplicitCallInstructionSlotSize(
+        sizeof...(arg_types) + 2)>(instruction);
+  }
+
+  // 3. const class method b) returns non-void
+  template <typename return_type, typename class_type, typename... arg_types,
+            int... indexes>
+  ALWAYS_INLINE inline const Instruction *explicit_call_wrapperHandler(
+      const Instruction *instruction,
+      UNUSED_ATTRIBUTE return_type (class_type::*func)(arg_types...) const,
+      const seq<indexes...> &,
+      UNUSED_ATTRIBUTE std::true_type returns_not_void) {
+    // call the actual function
+    auto *obj = GetValue<class_type *>(instruction->args[0]);
+    (obj->*func)(GetValue<arg_types>(instruction->args[indexes + 1])...);
+
+    return AdvanceIP<BytecodeFunction::GetExplicitCallInstructionSlotSize(
+        sizeof...(arg_types) + 1)>(instruction);
+  }
+
+  //--------------------------------------------------------------------------//
+
+ private:
+  /**
+   * This static array holds the goto-pointer for the dispatch area for each
+   * Opcode. It will be filled once, when the interpreter is called the
+   * first time.
+   */
+  static void *label_pointers_[BytecodeFunction::GetNumberOpcodes()];
+
+  /**
+   * Value slots (register) for the current function activation.
+   * (Aligned by something that is most likely the cache line size)
+   */
+  alignas(64) std::vector<value_t> values_;
+
+  /**
+   * Holds all allocations made with alloca. We do not need to access them,
+   * but the unique pointer ensures they will be released at the end.
+   */
+  std::vector<std::unique_ptr<char[]>> allocations_;
+
+  /**
+   * Holds the call activation records for all external call instructions.
+   * (Created during initialization)
+   */
+  std::vector<CallActivation> call_activations_;
+
+  /**
+   * Bytecode function used for execution.
+   */
+  const BytecodeFunction &bytecode_function_;
+
+ private:
+  // This class cannot be copy or move-constructed
+  DISALLOW_COPY_AND_MOVE(BytecodeInterpreter);
+};
+
+}  // namespace interpreter
+}  // namespace codegen
+}  // namespace peloton
\ No newline at end of file
diff --git a/src/include/codegen/proxy/proxy.h b/src/include/codegen/proxy/proxy.h
index 8e647e9e2ac..ab518bd59ae 100644
--- a/src/include/codegen/proxy/proxy.h
+++ b/src/include/codegen/proxy/proxy.h
@@ -237,7 +237,7 @@ struct MemFn<R (*)(Args..., ...), T, F> {
       ::peloton::codegen::CodeGen &codegen) {                                 \
     static constexpr const char *kFnName = STR(NS::C::F);                     \
     /* If the function has already been defined, return it. */                \
-    if (::llvm::Function *func = codegen.LookupBuiltin(kFnName)) {            \
+    if (::llvm::Function *func = codegen.LookupBuiltin(kFnName).first) {      \
       return func;                                                            \
     }                                                                         \
                                                                               \
diff --git a/src/include/codegen/query.h b/src/include/codegen/query.h
index eecb5a175a5..141b5303b7d 100644
--- a/src/include/codegen/query.h
+++ b/src/include/codegen/query.h
@@ -13,9 +13,9 @@
 #pragma once
 
 #include "codegen/code_context.h"
+#include "codegen/parameter_cache.h"
 #include "codegen/query_parameters.h"
 #include "codegen/query_state.h"
-#include "codegen/parameter_cache.h"
 
 namespace peloton {
 
@@ -41,18 +41,39 @@ class ExecutionConsumer;
 //===----------------------------------------------------------------------===//
 class Query {
  public:
+  struct CompileStats {
+    double compile_ms = 0.0;
+  };
+
   struct RuntimeStats {
+    double interpreter_prepare_ms = 0.0;
     double init_ms = 0.0;
     double plan_ms = 0.0;
     double tear_down_ms = 0.0;
   };
 
-  struct QueryFunctions {
+  // We use this handy class for the parameters to the llvm functions
+  // to avoid complex casting and pointer manipulation
+  struct FunctionArguments {
+    executor::ExecutorContext *executor_context;
+    char *consumer_arg;
+    char rest[0];
+  } PACKED;
+
+  struct LLVMFunctions {
     llvm::Function *init_func;
     llvm::Function *plan_func;
     llvm::Function *tear_down_func;
   };
 
+  using compiled_function_t = void (*)(FunctionArguments *);
+
+  struct CompiledFunctions {
+    compiled_function_t init_func;
+    compiled_function_t plan_func;
+    compiled_function_t tear_down_func;
+  };
+
   /// This class cannot be copy or move-constructed
   DISALLOW_COPY_AND_MOVE(Query);
 
@@ -61,7 +82,10 @@ class Query {
    *
    * @param funcs The compiled functions that implement the logic of the query
    */
-  bool Prepare(const QueryFunctions &funcs);
+  void Prepare(const LLVMFunctions &funcs);
+
+  // Compiles the function in this query to native code
+  void Compile(CompileStats *stats = nullptr);
 
   /**
    * @brief Executes the compiled query.
@@ -94,6 +118,14 @@ class Query {
   /// Constructor. Private so callers use the QueryCompiler class.
   explicit Query(const planner::AbstractPlan &query_plan);
 
+  // Execute the query as native code (must already be compiled)
+  void ExecuteNative(FunctionArguments *function_arguments,
+                     RuntimeStats *stats);
+
+  // Execute the query using the interpreter
+  void ExecuteInterpreter(FunctionArguments *function_arguments,
+                          RuntimeStats *stats);
+
  private:
   // The query plan
   const planner::AbstractPlan &query_plan_;
@@ -104,11 +136,14 @@ class Query {
   // The size of the parameter the functions take
   QueryState query_state_;
 
-  // The init(), plan() and tearDown() functions
-  typedef void (*compiled_function_t)(char *);
-  compiled_function_t init_func_;
-  compiled_function_t plan_func_;
-  compiled_function_t tear_down_func_;
+  // LLVM IR of the query functions
+  LLVMFunctions llvm_functions_;
+
+  // Pointers to the compiled query functions
+  CompiledFunctions compiled_functions_;
+
+  // Shows if the query has been compiled to native code
+  bool is_compiled_;
 };
 
 }  // namespace codegen
diff --git a/src/include/codegen/query_compiler.h b/src/include/codegen/query_compiler.h
index 377bafcada9..fd2c0b466fd 100644
--- a/src/include/codegen/query_compiler.h
+++ b/src/include/codegen/query_compiler.h
@@ -43,8 +43,8 @@ class QueryCompiler {
     // The time taken to generate all the IR for the plan
     double ir_gen_ms = 0.0;
 
-    // The time taken to perform JIT compilation
-    double jit_ms = 0.0;
+    // Time consumed by LLVM Optimizer
+    double optimize_ms = 0.0;
   };
 
   // Constructor
diff --git a/src/include/common/macros.h b/src/include/common/macros.h
index 96aaf6ab0d2..593619bffb4 100644
--- a/src/include/common/macros.h
+++ b/src/include/common/macros.h
@@ -29,7 +29,19 @@ namespace peloton {
 //===--------------------------------------------------------------------===//
 
 #define NEVER_INLINE __attribute__((noinline))
+
+#ifdef NDEBUG
 #define ALWAYS_INLINE __attribute__((always_inline))
+#else
+#define ALWAYS_INLINE
+#endif
+
+#ifdef __clang__
+#define NO_CLONE
+#else
+#define NO_CLONE __attribute__((noclone))
+#endif
+
 #define UNUSED_ATTRIBUTE __attribute__((unused))
 #define PACKED __attribute__((packed))
 
@@ -106,6 +118,19 @@ namespace peloton {
 #define GCC_AT_LEAST_6 0
 #endif
 
+#if __GNUC__ > 5 || (__GNUC__ == 5 && __GNUC_MINOR__ >= 1)
+#define GCC_AT_LEAST_51 1
+#else
+#define GCC_AT_LEAST_51 0
+#endif
+
+// g++-5.0 does not support overflow builtins
+#if GCC_AT_LEAST_51
+#define GCC_OVERFLOW_BUILTINS_DEFINED 1
+#else
+#define GCC_OVERFLOW_BUILTINS_DEFINED 0
+#endif
+
 //===--------------------------------------------------------------------===//
 // Port to OSX
 //===---------------------------
diff --git a/src/include/common/overflow_builtins.h b/src/include/common/overflow_builtins.h
new file mode 100644
index 00000000000..d77aae01002
--- /dev/null
+++ b/src/include/common/overflow_builtins.h
@@ -0,0 +1,78 @@
+//===----------------------------------------------------------------------===//
+//
+//                         Peloton
+//
+// overflow_builtins.h
+//
+// Identification: src/include/common/overflow_builtins.h
+//
+// Copyright (c) 2015-2018, Carnegie Mellon University Database Group
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+#include "common/macros.h"
+
+#include <limits>
+
+//----------------------------------------------------------------------------//
+// Fall back implementations if the gcc overflow builtins are not available.
+//
+// Documentation:
+// https://gcc.gnu.org/onlinedocs/gcc/Integer-Overflow-Builtins.html
+//----------------------------------------------------------------------------//
+
+namespace peloton {
+
+template <typename type_t>
+static inline bool builtin_add_overflow(type_t a, type_t b, type_t *res) {
+  *res = a + b;
+
+  if (a >= 0 && b >= 0 && std::numeric_limits<type_t>::max() - a < b)
+    return true;
+  else if (a < 0 && b < 0 && std::numeric_limits<type_t>::min() - a > b)
+    return true;
+
+  return false;
+}
+
+template <typename type_t>
+static inline bool builtin_sub_overflow(type_t a, type_t b, type_t *res) {
+  *res = a - b;
+
+  if (std::is_unsigned<type_t>::value)
+    return b > a;
+  else
+    return ((((a ^ b)) & (*res ^ a)) & std::numeric_limits<type_t>::min()) != 0;
+}
+
+template <typename type_t>
+static inline bool builtin_mul_overflow(type_t a, type_t b, type_t *res) {
+  *res = a * b;
+
+  if (a != 0 && *res / a != b) return true;
+
+  return false;
+}
+
+#if !GCC_OVERFLOW_BUILTINS_DEFINED
+
+template <typename type_t>
+static inline bool __builtin_add_overflow(type_t a, type_t b, type_t *res) {
+  return builtin_add_overflow(a, b, res);
+}
+
+template <typename type_t>
+static inline bool __builtin_sub_overflow(type_t a, type_t b, type_t *res) {
+  return builtin_sub_overflow(a, b, res);
+}
+
+template <typename type_t>
+static inline bool __builtin_mul_overflow(type_t a, type_t b, type_t *res) {
+  return builtin_mul_overflow(a, b, res);
+}
+
+#endif
+
+}  // namespace peloton
diff --git a/src/include/settings/settings.h b/src/include/settings/settings.h
index 757cc9043e6..a442fd151cf 100644
--- a/src/include/settings/settings.h
+++ b/src/include/settings/settings.h
@@ -200,6 +200,10 @@ SETTING_bool(codegen,
             true,
             true, true)
 
+SETTING_bool(codegen_interpreter,
+             "Force interpretation of generated llvm code (default: false)",
+             false, true, true)
+
 SETTING_bool(print_ir_stats,
              "Print statistics on generated IR (default: false)",
              false,
diff --git a/src/include/util/math_util.h b/src/include/util/math_util.h
new file mode 100644
index 00000000000..b4894421959
--- /dev/null
+++ b/src/include/util/math_util.h
@@ -0,0 +1,35 @@
+//===----------------------------------------------------------------------===//
+//
+//                         Peloton
+//
+// math_util.h
+//
+// Identification: src/include/util/math_util.h
+//
+// Copyright (c) 2015-2018, Carnegie Mellon University Database Group
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+#include "common/macros.h"
+
+namespace peloton {
+
+/**
+ * Math Utility Functions
+ */
+class MathUtil {
+ public:
+  /**
+   * Performs a division of two integer values and rounds up the result.
+   * Calculation is made using a trick with integer division.
+   */
+  static constexpr ALWAYS_INLINE inline size_t DivRoundUp(size_t numerator,
+                                                          size_t denominator) {
+    // division must be integer division
+    return (numerator + denominator - 1) / denominator;
+  }
+};
+
+}  // namespace peloton
diff --git a/test/codegen/bloom_filter_test.cpp b/test/codegen/bloom_filter_test.cpp
index 032a4ef2250..ef6bf168c05 100644
--- a/test/codegen/bloom_filter_test.cpp
+++ b/test/codegen/bloom_filter_test.cpp
@@ -165,7 +165,7 @@ TEST_F(BloomFilterCodegenTest, FalsePositiveRateTest) {
     func.ReturnAndFinish();
   }
 
-  ASSERT_TRUE(code_context.Compile());
+  code_context.Compile();
 
   typedef void (*ftype)(codegen::util::BloomFilter * bloom_filter, int *, int,
                         int *);
@@ -312,6 +312,7 @@ double BloomFilterCodegenTest::ExecuteJoin(std::string query,
         *plan, executor_context.GetParams().GetQueryParametersMap(), consumer);
 
     // Run
+    compiled_query->Compile();
     compiled_query->Execute(executor_context, consumer, &stats);
 
     LOG_INFO("Execution Time: %0.0f ms", stats.plan_ms);
diff --git a/test/codegen/bytecode_interpreter_test.cpp b/test/codegen/bytecode_interpreter_test.cpp
new file mode 100644
index 00000000000..9231a92c901
--- /dev/null
+++ b/test/codegen/bytecode_interpreter_test.cpp
@@ -0,0 +1,242 @@
+//===----------------------------------------------------------------------===//
+//
+//                         Peloton
+//
+// bytecode_interpreter_test.cpp
+//
+// Identification: test/codegen/bytecode_interpreter_test.cpp
+//
+// Copyright (c) 2015-2018, Carnegie Mellon University Database Group
+//
+//===----------------------------------------------------------------------===//
+
+#include "codegen/interpreter/bytecode_interpreter.h"
+#include "codegen/function_builder.h"
+#include "codegen/interpreter/bytecode_builder.h"
+#include "codegen/lang/loop.h"
+#include "codegen/proxy/runtime_functions_proxy.h"
+#include "common/harness.h"
+
+namespace peloton {
+namespace test {
+
+class BytecodeInterpreterTest : public PelotonTest {};
+
+TEST_F(BytecodeInterpreterTest, PHIResolveTest) {
+  // Create a loop that involves PHIs that have to be converted into move
+  // instructions.
+
+  codegen::CodeContext code_context;
+  codegen::CodeGen cg{code_context};
+  codegen::FunctionBuilder main{
+      code_context, "main", cg.Int32Type(), {{"a", cg.Int32Type()}}};
+  {
+    auto *a = main.GetArgumentByPosition(0);
+    auto *i = cg.Const32(0);
+
+    codegen::lang::Loop loop{cg, cg.ConstBool(true), {{"i", i}, {"a", a}}};
+    {
+      llvm::Value *i = loop.GetLoopVar(0);
+      llvm::Value *a = loop.GetLoopVar(1);
+
+      a = cg->CreateSub(a, cg.Const32(1));
+      i = cg->CreateAdd(i, cg.Const32(1));
+      loop.LoopEnd(cg->CreateICmpULT(i, cg.Const32(10)), {i, a});
+    }
+
+    std::vector<llvm::Value *> final;
+    loop.CollectFinalLoopVariables(final);
+
+    auto *ret = final[1];
+    main.ReturnAndFinish(ret);
+  }
+
+  // create Bytecode
+  auto bytecode = codegen::interpreter::BytecodeBuilder::CreateBytecodeFunction(
+      code_context, main.GetFunction());
+
+  // run Bytecode
+  codegen::interpreter::value_t arg = 44;
+  codegen::interpreter::value_t ret =
+      codegen::interpreter::BytecodeInterpreter::ExecuteFunction(bytecode,
+                                                                 {arg});
+  ASSERT_EQ(ret, arg - 10);
+}
+
+TEST_F(BytecodeInterpreterTest, PHISwapProblemTest) {
+  // Produce the PHI swap problem, where additional moves have to be inserted
+  // in order to retrieve the correct result.
+
+  codegen::CodeContext code_context;
+  codegen::CodeGen cg{code_context};
+  codegen::FunctionBuilder main{
+      code_context, "main", cg.Int32Type(), {{"a", cg.Int32Type()}}};
+  {
+    auto *a = main.GetArgumentByPosition(0);
+    auto *b = cg.Const32(0);
+    auto *i = cg.Const32(0);
+
+    codegen::lang::Loop loop{
+        cg, cg.ConstBool(true), {{"i", i}, {"a", a}, {"b", b}}};
+    {
+      llvm::Value *i = loop.GetLoopVar(0);
+      llvm::Value *a = loop.GetLoopVar(1);
+      llvm::Value *b = loop.GetLoopVar(2);
+
+      i = cg->CreateAdd(i, cg.Const32(1));
+      loop.LoopEnd(cg->CreateICmpULT(i, cg.Const32(2)), {i, b, a});
+    }
+
+    std::vector<llvm::Value *> final;
+    loop.CollectFinalLoopVariables(final);
+
+    auto *ret = final[1];
+    main.ReturnAndFinish(ret);
+  }
+
+  // create Bytecode
+  auto bytecode = codegen::interpreter::BytecodeBuilder::CreateBytecodeFunction(
+      code_context, main.GetFunction());
+
+  // run Bytecode
+  codegen::interpreter::value_t arg = 44;
+  codegen::interpreter::value_t ret =
+      codegen::interpreter::BytecodeInterpreter::ExecuteFunction(bytecode,
+                                                                 {arg});
+  ASSERT_EQ(ret, arg);
+}
+
+TEST_F(BytecodeInterpreterTest, OverflowIntrinsicsTest) {
+  // Use the overflow intrinsics and retrieve their output. During bytecode
+  // translation the extract instructions get omited and the values are written
+  // directly to their destination value slot.
+
+  // We call the intrinsics several times and check the result statically
+  // right in the generated function. We merge all checks with AND and return
+  // it to the test case at the end.
+
+  codegen::CodeContext code_context;
+  codegen::CodeGen cg{code_context};
+  codegen::FunctionBuilder main{code_context,
+                                "main",
+                                cg.Int32Type(),
+                                {{"a", cg.Int32Type()}, {"b", cg.Int32Type()}}};
+  {
+    auto *a = main.GetArgumentByPosition(0);
+    auto *b = main.GetArgumentByPosition(1);
+    llvm::Value *add_overflow, *sub_overflow;
+    llvm::Value *ret = cg.ConstBool(true);
+
+    auto *add_result = cg.CallAddWithOverflow(a, b, add_overflow);
+    auto *add_result_correct = cg->CreateICmp(llvm::CmpInst::Predicate::ICMP_EQ,
+                                              add_result, cg.Const32(10));
+    ret = cg->CreateAnd(ret, add_result_correct);
+    auto *add_overflow_correct = cg->CreateNot(add_overflow);
+    ret = cg->CreateAnd(ret, add_overflow_correct);
+
+    auto *sub_result =
+        cg.CallSubWithOverflow(cg.Const32(2147483648), b, sub_overflow);
+    auto *sub_result_correct = cg->CreateICmp(
+        llvm::CmpInst::Predicate::ICMP_EQ, sub_result, cg.Const32(2147483642));
+    ret = cg->CreateAnd(ret, sub_result_correct);
+    ret = cg->CreateAnd(ret, sub_overflow);
+
+    main.ReturnAndFinish(ret);
+  }
+
+  // create Bytecode
+  auto bytecode = codegen::interpreter::BytecodeBuilder::CreateBytecodeFunction(
+      code_context, main.GetFunction());
+
+  // run Bytecode
+  codegen::interpreter::value_t ret =
+      codegen::interpreter::BytecodeInterpreter::ExecuteFunction(bytecode,
+                                                                 {4, 6});
+  ASSERT_EQ(ret, 1);
+}
+
+int f(int a, int b) { return a + b; }
+
+TEST_F(BytecodeInterpreterTest, ExternalCallTest) {
+  // Call an external function.
+
+  codegen::CodeContext code_context;
+  codegen::CodeGen cg{code_context};
+
+  // create LLVM function declaration
+  auto *func_type = llvm::FunctionType::get(
+      cg.Int32Type(), {cg.Int32Type(), cg.Int32Type()}, false);
+  llvm::Function *func_decl =
+      llvm::Function::Create(func_type, llvm::Function::ExternalLinkage, "f",
+                             &(cg.GetCodeContext().GetModule()));
+  code_context.RegisterExternalFunction(func_decl, (void *)f);
+
+  codegen::FunctionBuilder main{code_context,
+                                "main",
+                                cg.Int32Type(),
+                                {{"a", cg.Int32Type()}, {"b", cg.Int32Type()}}};
+  {
+    auto *a = main.GetArgumentByPosition(0);
+    auto *b = main.GetArgumentByPosition(1);
+
+    auto *ret = cg.CallFunc(func_decl, {a, b});
+
+    main.ReturnAndFinish(ret);
+  }
+
+  // create Bytecode
+  auto bytecode = codegen::interpreter::BytecodeBuilder::CreateBytecodeFunction(
+      code_context, main.GetFunction());
+
+  // run Bytecode
+  codegen::interpreter::value_t ret =
+      codegen::interpreter::BytecodeInterpreter::ExecuteFunction(bytecode,
+                                                                 {4, 6});
+  ASSERT_EQ(ret, 10);
+}
+
+TEST_F(BytecodeInterpreterTest, InternalCallTest) {
+  // Call an internal function.
+
+  codegen::CodeContext code_context;
+  codegen::CodeGen cg{code_context};
+
+  codegen::FunctionBuilder f{code_context,
+                             "f",
+                             cg.Int32Type(),
+                             {{"a", cg.Int32Type()}, {"b", cg.Int32Type()}}};
+  {
+    auto *a = f.GetArgumentByPosition(0);
+    auto *b = f.GetArgumentByPosition(1);
+
+    auto *ret = cg->CreateAdd(a, b);
+
+    f.ReturnAndFinish(ret);
+  }
+
+  codegen::FunctionBuilder main{code_context,
+                                "main",
+                                cg.Int32Type(),
+                                {{"a", cg.Int32Type()}, {"b", cg.Int32Type()}}};
+  {
+    auto *a = main.GetArgumentByPosition(0);
+    auto *b = main.GetArgumentByPosition(1);
+
+    auto *ret = cg.CallFunc(f.GetFunction(), {a, b});
+
+    main.ReturnAndFinish(ret);
+  }
+
+  // create Bytecode
+  auto bytecode = codegen::interpreter::BytecodeBuilder::CreateBytecodeFunction(
+      code_context, main.GetFunction());
+
+  // run Bytecode
+  codegen::interpreter::value_t ret =
+      codegen::interpreter::BytecodeInterpreter::ExecuteFunction(bytecode,
+                                                                 {4, 6});
+  ASSERT_EQ(ret, 10);
+}
+
+}  // namespace test
+}  // namespace peloton
\ No newline at end of file
diff --git a/test/codegen/function_builder_test.cpp b/test/codegen/function_builder_test.cpp
index 1822e384f72..9235da51bb8 100644
--- a/test/codegen/function_builder_test.cpp
+++ b/test/codegen/function_builder_test.cpp
@@ -35,7 +35,7 @@ TEST_F(FunctionBuilderTest, ConstructSingleFunction) {
     func.ReturnAndFinish(cg.Const32(magic_num));
   }
 
-  ASSERT_TRUE(code_context.Compile());
+  code_context.Compile();
 
   typedef int (*func_t)(void);
   func_t fn = (func_t) code_context.GetRawFunctionPointer(func.GetFunction());
@@ -80,7 +80,7 @@ TEST_F(FunctionBuilderTest, ConstructNestedFunction) {
   }
 
   // Make sure we can compile everything
-  ASSERT_TRUE(code_context.Compile());
+  code_context.Compile();
 
   typedef int (*func_t)(uint32_t);
   func_t fn = (func_t) code_context.GetRawFunctionPointer(main.GetFunction());
diff --git a/test/codegen/if_test.cpp b/test/codegen/if_test.cpp
index 4c198cea2d3..6c00153269a 100644
--- a/test/codegen/if_test.cpp
+++ b/test/codegen/if_test.cpp
@@ -59,7 +59,7 @@ TEST_F(IfTest, TestIfOnly) {
     func.ReturnAndFinish(cond.BuildPHI(va, vb).GetValue());
   }
 
-  ASSERT_TRUE(code_context.Compile());
+  code_context.Compile();
 
   typedef int (*ftype)(int);
 
@@ -128,7 +128,7 @@ TEST_F(IfTest, TestIfInsideLoop) {
     func.ReturnAndFinish(final[1]);
   }
 
-  ASSERT_TRUE(code_context.Compile());
+  code_context.Compile();
 
   typedef int (*ftype)(int);
 
@@ -174,7 +174,7 @@ TEST_F(IfTest, BreakTest) {
     func.ReturnAndFinish(final[0]);
   }
 
-  ASSERT_TRUE(code_context.Compile());
+  code_context.Compile();
 
   typedef int (*ftype)(int);
 
@@ -235,7 +235,7 @@ TEST_F(IfTest, ComplexNestedIf) {
     func.ReturnAndFinish(cond.BuildPHI(vab, vc).GetValue());
   }
 
-  ASSERT_TRUE(code_context.Compile());
+  code_context.Compile();
 
   typedef int (*ftype)(int);
 
diff --git a/test/codegen/testing_codegen_util.cpp b/test/codegen/testing_codegen_util.cpp
index a19598e33ed..b587aeac199 100644
--- a/test/codegen/testing_codegen_util.cpp
+++ b/test/codegen/testing_codegen_util.cpp
@@ -257,7 +257,7 @@ void PelotonCodeGenTest::CreateAndLoadTableWithLayout(
   txn_manager.CommitTransaction(txn);
 }
 
-codegen::QueryCompiler::CompileStats PelotonCodeGenTest::CompileAndExecute(
+PelotonCodeGenTest::CodeGenStats PelotonCodeGenTest::CompileAndExecute(
     planner::AbstractPlan &plan, codegen::ExecutionConsumer &consumer) {
   codegen::QueryParameters parameters(plan, {});
 
@@ -266,15 +266,18 @@ codegen::QueryCompiler::CompileStats PelotonCodeGenTest::CompileAndExecute(
   auto *txn = txn_manager.BeginTransaction();
 
   // Compile the query.
-  codegen::QueryCompiler::CompileStats stats;
+  CodeGenStats stats;
   auto query = codegen::QueryCompiler().Compile(
-      plan, parameters.GetQueryParametersMap(), consumer, &stats);
+      plan, parameters.GetQueryParametersMap(), consumer, &stats.compile_stats);
 
   // Executor context
   executor::ExecutorContext exec_ctx{txn, std::move(parameters)};
 
-  // Execute the query
-  query->Execute(exec_ctx, consumer);
+  // Compile Query to native code
+  query->Compile();
+
+  // Execute the quer
+  query->Execute(exec_ctx, consumer, &stats.runtime_stats);
 
   // Commit the transaction.
   txn_manager.CommitTransaction(txn);
@@ -282,7 +285,7 @@ codegen::QueryCompiler::CompileStats PelotonCodeGenTest::CompileAndExecute(
   return stats;
 }
 
-codegen::QueryCompiler::CompileStats PelotonCodeGenTest::CompileAndExecuteCache(
+PelotonCodeGenTest::CodeGenStats PelotonCodeGenTest::CompileAndExecuteCache(
     std::shared_ptr<planner::AbstractPlan> plan,
     codegen::ExecutionConsumer &consumer, bool &cached,
     std::vector<type::Value> params) {
@@ -294,19 +297,20 @@ codegen::QueryCompiler::CompileStats PelotonCodeGenTest::CompileAndExecuteCache(
                                      codegen::QueryParameters(*plan, params)};
 
   // Compile
-  codegen::QueryCompiler::CompileStats stats;
+  CodeGenStats stats;
   codegen::Query *query = codegen::QueryCache::Instance().Find(plan);
   cached = (query != nullptr);
   if (query == nullptr) {
     codegen::QueryCompiler compiler;
     auto compiled_query = compiler.Compile(
         *plan, exec_ctx.GetParams().GetQueryParametersMap(), consumer);
+    compiled_query->Compile();
     query = compiled_query.get();
     codegen::QueryCache::Instance().Add(plan, std::move(compiled_query));
   }
 
   // Execute the query.
-  query->Execute(exec_ctx, consumer);
+  query->Execute(exec_ctx, consumer, &stats.runtime_stats);
 
   // Commit the transaction.
   txn_manager.CommitTransaction(txn);
diff --git a/test/codegen/value_integrity_test.cpp b/test/codegen/value_integrity_test.cpp
index 0057721352b..97dfeca23e4 100644
--- a/test/codegen/value_integrity_test.cpp
+++ b/test/codegen/value_integrity_test.cpp
@@ -67,7 +67,7 @@ void DivideByZeroTest(const codegen::type::Type &data_type, ExpressionType op) {
   }
 
   // Should be able to compile
-  EXPECT_TRUE(code_context.Compile());
+  code_context.Compile();
 
   typedef void (*func)(CType);
   func f = (func)code_context.GetRawFunctionPointer(function.GetFunction());
@@ -134,7 +134,7 @@ void OverflowTest(const codegen::type::Type &data_type, ExpressionType op) {
   }
 
   // Should be able to compile
-  EXPECT_TRUE(code_context.Compile());
+  code_context.Compile();
 
   typedef void (*func)(CType);
   func f = (func)code_context.GetRawFunctionPointer(function.GetFunction());
diff --git a/test/common/overflow_builtins_test.cpp b/test/common/overflow_builtins_test.cpp
new file mode 100644
index 00000000000..8abdd86fee9
--- /dev/null
+++ b/test/common/overflow_builtins_test.cpp
@@ -0,0 +1,212 @@
+//===----------------------------------------------------------------------===//
+//
+//                         Peloton
+//
+// overflow_builtins_test.cpp
+//
+// Identification: test/common/overflow_builtins_test.cpp
+//
+// Copyright (c) 2015-2018, Carnegie Mellon University Database Group
+//
+//===----------------------------------------------------------------------===//
+
+#include "common/overflow_builtins.h"
+
+#include "common/harness.h"
+
+#include <limits>
+
+namespace peloton {
+namespace test {
+
+//===--------------------------------------------------------------------===//
+// Testing the fallback functions for overflow aware operations
+//===--------------------------------------------------------------------===//
+
+class OverflowBuiltinsTest : public PelotonTest {
+ public:
+  using unsigned_t = unsigned int;
+  using signed_t = int;
+};
+
+TEST_F(OverflowBuiltinsTest, UnsignedAddTest) {
+  unsigned_t max = std::numeric_limits<unsigned_t>::max();
+  unsigned_t min = std::numeric_limits<unsigned_t>::min();
+
+  unsigned_t c;
+  bool overflow;
+
+  overflow = builtin_add_overflow<unsigned_t>(0, 3, &c);
+  EXPECT_EQ(c, 3);
+  EXPECT_EQ(overflow, false);
+
+  overflow = builtin_add_overflow<unsigned_t>(0, 0, &c);
+  EXPECT_EQ(c, 0);
+  EXPECT_EQ(overflow, false);
+
+  overflow = builtin_add_overflow<unsigned_t>(max - 12, 3, &c);
+  EXPECT_EQ(c, max - 9);
+  EXPECT_EQ(overflow, false);
+
+  overflow = builtin_add_overflow<unsigned_t>(max - 12, 12, &c);
+  EXPECT_EQ(c, max);
+  EXPECT_EQ(overflow, false);
+
+  overflow = builtin_add_overflow<unsigned_t>(max - 12, 13, &c);
+  EXPECT_EQ(c, min);
+  EXPECT_EQ(overflow, true);
+
+  overflow = builtin_add_overflow<unsigned_t>(max - 12, 21, &c);
+  EXPECT_EQ(c, min + 8);
+  EXPECT_EQ(overflow, true);
+}
+
+TEST_F(OverflowBuiltinsTest, SignedAddTest) {
+  signed_t max = std::numeric_limits<signed_t>::max();
+  signed_t min = std::numeric_limits<signed_t>::min();
+
+  signed_t c;
+  bool overflow;
+
+  overflow = builtin_add_overflow<signed_t>(min, 3, &c);
+  EXPECT_EQ(c, min + 3);
+  EXPECT_EQ(overflow, false);
+
+  overflow = builtin_add_overflow<signed_t>(min, 0, &c);
+  EXPECT_EQ(c, min);
+  EXPECT_EQ(overflow, false);
+
+  overflow = builtin_add_overflow<signed_t>(0, -12, &c);
+  EXPECT_EQ(c, -12);
+  EXPECT_EQ(overflow, false);
+
+  overflow = builtin_add_overflow<signed_t>(min, -1, &c);
+  EXPECT_EQ(c, max);
+  EXPECT_EQ(overflow, true);
+
+  overflow = builtin_add_overflow<signed_t>(max, 1, &c);
+  EXPECT_EQ(c, min);
+  EXPECT_EQ(overflow, true);
+
+  overflow = builtin_add_overflow<signed_t>(0, -13, &c);
+  EXPECT_EQ(c, -13);
+  EXPECT_EQ(overflow, false);
+
+  overflow = builtin_add_overflow<signed_t>(-12, 13, &c);
+  EXPECT_EQ(c, 1);
+  EXPECT_EQ(overflow, false);
+}
+
+TEST_F(OverflowBuiltinsTest, UnsignedSubTest) {
+  unsigned_t max = std::numeric_limits<unsigned_t>::max();
+  unsigned_t min = std::numeric_limits<unsigned_t>::min();
+
+  unsigned_t c;
+  bool overflow;
+
+  overflow = builtin_sub_overflow<unsigned_t>(3, 3, &c);
+  EXPECT_EQ(c, 0);
+  EXPECT_EQ(overflow, false);
+
+  overflow = builtin_sub_overflow<unsigned_t>(0, 0, &c);
+  EXPECT_EQ(c, 0);
+  EXPECT_EQ(overflow, false);
+
+  overflow = builtin_sub_overflow<unsigned_t>(min + 12, 3, &c);
+  EXPECT_EQ(c, min + 9);
+  EXPECT_EQ(overflow, false);
+
+  overflow = builtin_sub_overflow<unsigned_t>(min + 12, 12, &c);
+  EXPECT_EQ(c, min);
+  EXPECT_EQ(overflow, false);
+
+  overflow = builtin_sub_overflow<unsigned_t>(min + 12, 13, &c);
+  EXPECT_EQ(c, max);
+  EXPECT_EQ(overflow, true);
+
+  overflow = builtin_sub_overflow<unsigned_t>(min + 12, 15, &c);
+  EXPECT_EQ(c, max - 2);
+  EXPECT_EQ(overflow, true);
+}
+
+TEST_F(OverflowBuiltinsTest, SignedSubTest) {
+  signed_t max = std::numeric_limits<signed_t>::max();
+  signed_t min = std::numeric_limits<signed_t>::min();
+
+  signed_t c;
+  bool overflow;
+
+  overflow = builtin_sub_overflow<signed_t>(min + 3, 3, &c);
+  EXPECT_EQ(c, min);
+  EXPECT_EQ(overflow, false);
+
+  overflow = builtin_sub_overflow<signed_t>(min, 0, &c);
+  EXPECT_EQ(c, min);
+  EXPECT_EQ(overflow, false);
+
+  overflow = builtin_sub_overflow<signed_t>(0, -12, &c);
+  EXPECT_EQ(c, 12);
+  EXPECT_EQ(overflow, false);
+
+  overflow = builtin_sub_overflow<signed_t>(min, 1, &c);
+  EXPECT_EQ(c, max);
+  EXPECT_EQ(overflow, true);
+
+  overflow = builtin_sub_overflow<signed_t>(max, -1, &c);
+  EXPECT_EQ(c, min);
+  EXPECT_EQ(overflow, true);
+
+  overflow = builtin_sub_overflow<signed_t>(0, 13, &c);
+  EXPECT_EQ(c, -13);
+  EXPECT_EQ(overflow, false);
+
+  overflow = builtin_sub_overflow<signed_t>(-12, -13, &c);
+  EXPECT_EQ(c, 1);
+  EXPECT_EQ(overflow, false);
+}
+
+TEST_F(OverflowBuiltinsTest, UnsignedMulTest) {
+  unsigned_t max = std::numeric_limits<unsigned_t>::max();
+
+  unsigned_t c;
+  bool overflow;
+
+  overflow = builtin_mul_overflow<unsigned_t>(3, 3, &c);
+  EXPECT_EQ(c, 9);
+  EXPECT_EQ(overflow, false);
+
+  overflow = builtin_mul_overflow<unsigned_t>(0, 0, &c);
+  EXPECT_EQ(c, 0);
+  EXPECT_EQ(overflow, false);
+
+  overflow = builtin_mul_overflow<unsigned_t>(max, 2, &c);
+  EXPECT_EQ(c, 4294967294);
+  EXPECT_EQ(overflow, true);
+}
+
+TEST_F(OverflowBuiltinsTest, SignedMulTest) {
+  signed_t max = std::numeric_limits<signed_t>::max();
+  // signed_t min = std::numeric_limits<signed_t>::min();
+
+  signed_t c;
+  bool overflow;
+
+  overflow = builtin_mul_overflow<signed_t>(-1, 2, &c);
+  EXPECT_EQ(c, -2);
+  EXPECT_EQ(overflow, false);
+
+  overflow = builtin_mul_overflow<signed_t>(2, -4, &c);
+  EXPECT_EQ(c, -8);
+  EXPECT_EQ(overflow, false);
+
+  overflow = builtin_mul_overflow<signed_t>(-4, -4, &c);
+  EXPECT_EQ(c, 16);
+  EXPECT_EQ(overflow, false);
+
+  overflow = builtin_mul_overflow<signed_t>(max, -2, &c);
+  EXPECT_EQ(c, 2);
+  EXPECT_EQ(overflow, true);
+}
+
+}  // namespace test
+}  // namespace peloton
diff --git a/test/include/codegen/testing_codegen_util.h b/test/include/codegen/testing_codegen_util.h
index c61a47e67c2..fa9fcf852cd 100644
--- a/test/include/codegen/testing_codegen_util.h
+++ b/test/include/codegen/testing_codegen_util.h
@@ -20,8 +20,8 @@
 #include "codegen/execution_consumer.h"
 #include "codegen/value.h"
 #include "common/container_tuple.h"
-#include "expression/constant_value_expression.h"
 #include "expression/comparison_expression.h"
+#include "expression/constant_value_expression.h"
 #include "expression/tuple_value_expression.h"
 #include "planner/binding_context.h"
 #include "storage/data_table.h"
@@ -68,6 +68,11 @@ class PelotonCodeGenTest : public PelotonTest {
   PelotonCodeGenTest(oid_t tuples_per_tilegroup = DEFAULT_TUPLES_PER_TILEGROUP,
                      peloton::LayoutType layout_type = LayoutType::ROW);
 
+  struct CodeGenStats {
+    codegen::QueryCompiler::CompileStats compile_stats;
+    codegen::Query::RuntimeStats runtime_stats;
+  };
+
   virtual ~PelotonCodeGenTest();
 
   // Get the test database
@@ -103,10 +108,10 @@ class PelotonCodeGenTest : public PelotonTest {
                                     bool is_inlined);
 
   // Compile and execute the given plan
-  codegen::QueryCompiler::CompileStats CompileAndExecute(
+  CodeGenStats CompileAndExecute(
       planner::AbstractPlan &plan, codegen::ExecutionConsumer &consumer);
 
-  codegen::QueryCompiler::CompileStats CompileAndExecuteCache(
+  CodeGenStats CompileAndExecuteCache(
       std::shared_ptr<planner::AbstractPlan> plan,
       codegen::ExecutionConsumer &consumer, bool &cached,
       std::vector<type::Value> params = {});