diff --git a/.clang-format b/.clang-format new file mode 100644 index 00000000..6fb30678 --- /dev/null +++ b/.clang-format @@ -0,0 +1,2 @@ +BasedOnStyle: LLVM + diff --git a/.github/workflows/docker-archlinux.yml b/.github/workflows/docker-archlinux.yml new file mode 100644 index 00000000..da48f07b --- /dev/null +++ b/.github/workflows/docker-archlinux.yml @@ -0,0 +1,16 @@ +name: docker-archlinux + +on: + push: + branches: [ main ] + pull_request: + branches: [ main ] + workflow_dispatch: + +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: Build the Docker image + run: docker build . --file Dockerfile_archlinux --tag llvm-tutor:llvm-14 diff --git a/.github/workflows/docker-fedora.yml b/.github/workflows/docker-fedora.yml new file mode 100644 index 00000000..2fef30e6 --- /dev/null +++ b/.github/workflows/docker-fedora.yml @@ -0,0 +1,16 @@ +name: docker-fedora + +on: + push: + branches: [ main ] + pull_request: + branches: [ main ] + workflow_dispatch: + +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: Build the Docker image + run: docker build . --file Dockerfile_fedora --tag llvm-tutor:llvm-14 diff --git a/.github/workflows/docker-ubuntu.yml b/.github/workflows/docker-ubuntu.yml new file mode 100644 index 00000000..39b062f9 --- /dev/null +++ b/.github/workflows/docker-ubuntu.yml @@ -0,0 +1,16 @@ +name: docker-ubuntu + +on: + push: + branches: [ main ] + pull_request: + branches: [ main ] + workflow_dispatch: + +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: Build the Docker image + run: docker build . --file Dockerfile_ubuntu --tag llvm-tutor:llvm-14 diff --git a/.github/workflows/docker-ubunut-apt.yml b/.github/workflows/docker-ubunut-apt.yml new file mode 100644 index 00000000..7ccb57eb --- /dev/null +++ b/.github/workflows/docker-ubunut-apt.yml @@ -0,0 +1,16 @@ +name: docker-ubuntu-apt + +on: + push: + branches: [ main ] + pull_request: + branches: [ main ] + workflow_dispatch: + +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: Build the Docker image + run: docker build . --file Dockerfile_ubuntu_apt --tag llvm-tutor:llvm-14 diff --git a/.github/workflows/x86-darwin-llvm-from-sources.yml b/.github/workflows/x86-darwin-llvm-from-sources.yml new file mode 100644 index 00000000..cbfab803 --- /dev/null +++ b/.github/workflows/x86-darwin-llvm-from-sources.yml @@ -0,0 +1,49 @@ +name: x86-Darwin-llvm-from-sources + +on: + schedule: + - cron: '0 0 * * *' + workflow_dispatch: + +jobs: + build: + runs-on: macos-latest + steps: + - uses: actions/checkout@v1 + - name: Download Ninja + run: | + git clone https://github.com/ninja-build/ninja.git + pushd ninja + ./configure.py --bootstrap + popd + - name: Add Ninja to $PATH + run: | + echo "${GITHUB_WORKSPACE}/ninja" >> $GITHUB_PATH + - name: Clone llvm-project + run: | + git clone --depth 1 --single-branch --branch release/14.x https://github.com/llvm/llvm-project + - name: Build LLVM + run: | + cd llvm-project + mkdir build && cd build + cmake -G Ninja \ + -DCMAKE_BUILD_TYPE=Release -DLLVM_ENABLE_PROJECTS="clang" \ + -DLLVM_TARGETS_TO_BUILD="X86" -DLLVM_OPTIMIZED_TABLEGEN=ON \ + ../llvm + # Note that only the required tools are built + ninja clang opt lli not FileCheck + - name: Install lit + run: | + sudo pip3 install lit + - name: Build HelloWorld + run: | + cd HelloWorld + mkdir build && cd build + cmake -DLT_LLVM_INSTALL_DIR="$GITHUB_WORKSPACE/llvm-project/build" ../ + make -j2 + - name: Build llvm-tutor + run tests + run: | + mkdir build && cd build + cmake -DLT_LLVM_INSTALL_DIR="$GITHUB_WORKSPACE/llvm-project/build" ../ + make -j2 + lit test/ diff --git a/.github/workflows/x86-darwin.yml b/.github/workflows/x86-darwin.yml new file mode 100644 index 00000000..85361714 --- /dev/null +++ b/.github/workflows/x86-darwin.yml @@ -0,0 +1,36 @@ +name: x86-Darwin + +on: + push: + pull_request: + branches: [ main ] + schedule: + - cron: '0 0 * * *' + workflow_dispatch: + +jobs: + build: + runs-on: macos-latest + strategy: + matrix: + type: [Debug, Release] + steps: + - uses: actions/checkout@v1 + - name: Install Dependencies + run: | + brew update + brew install llvm + pip3 install lit + - name: Build HelloWorld + run: | + cd HelloWorld + mkdir build && cd build + cmake -DLT_LLVM_INSTALL_DIR="/usr/local/opt/llvm@14/" -DCMAKE_BUILD_TYPE=${{ matrix.type }} ../ + make -j2 + - name: Build llvm-tutor + run tests + run: | + cd $GITHUB_WORKSPACE + mkdir build && cd build + cmake -DLT_LLVM_INSTALL_DIR="/usr/local/opt/llvm@14/" -DCMAKE_BUILD_TYPE=${{ matrix.type }} ../ + make -j2 + lit test/ diff --git a/.github/workflows/x86-ubuntu-llvm-from-sources-static.yml b/.github/workflows/x86-ubuntu-llvm-from-sources-static.yml new file mode 100644 index 00000000..ad8074b1 --- /dev/null +++ b/.github/workflows/x86-ubuntu-llvm-from-sources-static.yml @@ -0,0 +1,50 @@ +name: x86-Ubuntu-llvm-from-sources-static + +on: + schedule: + - cron: '0 0 * * *' + workflow_dispatch: + +jobs: + build: + runs-on: ubuntu-20.04 + steps: + - uses: actions/checkout@v2 + - name: Download Ninja + run: | + git clone https://github.com/ninja-build/ninja.git + pushd ninja + ./configure.py --bootstrap + popd + - name: Add Ninja to $PATH + run: | + echo "${GITHUB_WORKSPACE}/ninja" >> $GITHUB_PATH + - name: Clone llvm-project + run: | + git clone --depth 1 --single-branch --branch release/14.x https://github.com/llvm/llvm-project + cd llvm-project + - name: Register MBAAdd statically + run: | + bash utils/static_registration.sh --llvm_project_dir "$GITHUB_WORKSPACE/llvm-project" + - name: Build LLVM + run: | + cd llvm-project + mkdir build && cd build + ln -s /usr/bin/x86_64-linux-gnu-ld.gold ld + cmake -G Ninja \ + -DCMAKE_BUILD_TYPE=Release -DLLVM_ENABLE_PROJECTS="clang" \ + -DLLVM_TARGETS_TO_BUILD="host" -DLLVM_OPTIMIZED_TABLEGEN=ON \ + -DLLVM_BUILD_EXAMPLES=On -DLLVM_MBASUB_LINK_INTO_TOOLS=On \ + ../llvm + # Note that only the required tools are built + ninja + - name: Install lit + run: | + sudo apt-get install python3-setuptools + sudo pip3 install lit + - name: Run MBASub - sanity check + run: | + $GITHUB_WORKSPACE/llvm-project/build/bin/opt -passes=mba-sub -S $GITHUB_WORKSPACE/test/MBA_sub.ll + - name: Run MBASub tests + run: | + $GITHUB_WORKSPACE/llvm-project/build/bin/llvm-lit $GITHUB_WORKSPACE/llvm-project/llvm/test/Examples/MBASub/ diff --git a/.github/workflows/x86-ubuntu-llvm-from-sources.yml b/.github/workflows/x86-ubuntu-llvm-from-sources.yml new file mode 100644 index 00000000..ed4ee44e --- /dev/null +++ b/.github/workflows/x86-ubuntu-llvm-from-sources.yml @@ -0,0 +1,53 @@ +name: x86-Ubuntu-llvm-from-sources + +on: + schedule: + - cron: '0 0 * * *' + workflow_dispatch: + +jobs: + build: + runs-on: ubuntu-20.04 + steps: + - uses: actions/checkout@v1 + - name: Download Ninja + run: | + git clone https://github.com/ninja-build/ninja.git + pushd ninja + ./configure.py --bootstrap + # chmod +x ninja + ./ninja --version + popd + - name: Add Ninja to $PATH + run: | + echo "${GITHUB_WORKSPACE}/ninja" >> $GITHUB_PATH + - name: Clone llvm-project + run: | + git clone --depth 10 --single-branch --branch release/14.x https://github.com/llvm/llvm-project + - name: Build LLVM + run: | + cd llvm-project + mkdir build && cd build + ln -s /usr/bin/x86_64-linux-gnu-ld.gold ld + cmake -G Ninja \ + -DCMAKE_BUILD_TYPE=Release -DLLVM_ENABLE_PROJECTS="clang" \ + -DLLVM_TARGETS_TO_BUILD="X86" -DLLVM_OPTIMIZED_TABLEGEN=ON \ + -DBUILD_SHARED_LIBS=On ../llvm + # Note that only the required tools are built + ninja clang opt lli not FileCheck + - name: Install lit + run: | + sudo apt-get install python3-setuptools + sudo pip3 install lit + - name: Build HelloWorld + run: | + cd HelloWorld + mkdir build && cd build + cmake -DLT_LLVM_INSTALL_DIR="$GITHUB_WORKSPACE/llvm-project/build" ../ + make -j2 + - name: Build llvm-tutor + run tests + run: | + mkdir build && cd build + cmake -DLT_LLVM_INSTALL_DIR="$GITHUB_WORKSPACE/llvm-project/build" ../ + make -j2 + lit test/ diff --git a/.github/workflows/x86-ubuntu.yml b/.github/workflows/x86-ubuntu.yml new file mode 100644 index 00000000..c2239d50 --- /dev/null +++ b/.github/workflows/x86-ubuntu.yml @@ -0,0 +1,48 @@ +name: x86-Ubuntu + +on: + push: + pull_request: + branches: [ master ] + schedule: + - cron: '0 0 * * *' + workflow_dispatch: + +jobs: + build: + runs-on: ubuntu-20.04 + strategy: + matrix: + compiler: + - { compiler: GNU, CC: gcc-10, CXX: g++-10 } + - { compiler: LLVM, CC: clang-10, CXX: clang++-10 } + type: [Debug, Release] + steps: + - uses: actions/checkout@v1 + - name: Install Dependencies + env: + CC: ${{ matrix.compiler.CC }} + CXX: ${{ matrix.compiler.CXX }} + run: | + wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | sudo apt-key add - + sudo apt-add-repository "deb http://apt.llvm.org/focal/ llvm-toolchain-focal-14 main" + sudo apt-get update + sudo apt-get install -y llvm-14 llvm-14-dev llvm-14-tools clang-14 + sudo apt-get install python3-setuptools + sudo pip3 install lit + - name: Build HelloWorld + env: + CC: ${{ matrix.compiler.CC }} + CXX: ${{ matrix.compiler.CXX }} + run: | + cd HelloWorld + mkdir build && cd build + cmake -DLT_LLVM_INSTALL_DIR="/usr/lib/llvm-14/" -DCMAKE_BUILD_TYPE=${{ matrix.type }} ../ + make -j2 + - name: Build llvm-tutor + run tests + run: | + cd $GITHUB_WORKSPACE + mkdir build && cd build + cmake -DLT_LLVM_INSTALL_DIR="/usr/lib/llvm-14/" -DCMAKE_BUILD_TYPE=${{ matrix.type }} ../ + make -j2 + lit -va test/ diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 00000000..d176426f --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,113 @@ +cmake_minimum_required(VERSION 3.13.4) +project(llvm-tutor) + +#=============================================================================== +# 1. VERIFY LLVM INSTALLATION DIR +# This is just a bit of a sanity checking. +#=============================================================================== +set(LT_LLVM_INSTALL_DIR "" CACHE PATH "LLVM installation directory") + +# 1.1 Check the "include| directory +set(LT_LLVM_INCLUDE_DIR "${LT_LLVM_INSTALL_DIR}/include/llvm") +if(NOT EXISTS "${LT_LLVM_INCLUDE_DIR}") +message(FATAL_ERROR + " LT_LLVM_INSTALL_DIR (${LT_LLVM_INCLUDE_DIR}) is invalid.") +endif() + +# 1.2 Check that the LLVMConfig.cmake file exists (the location depends on the +# OS) +set(LT_VALID_INSTALLATION FALSE) + +# Ubuntu + Darwin +if(EXISTS "${LT_LLVM_INSTALL_DIR}/lib/cmake/llvm/LLVMConfig.cmake") + set(LT_VALID_INSTALLATION TRUE) +endif() + +# Fedora +if(EXISTS "${LT_LLVM_INSTALL_DIR}/lib64/cmake/llvm/LLVMConfig.cmake") + set(LT_VALID_INSTALLATION TRUE) +endif() + +if(NOT ${LT_VALID_INSTALLATION}) + message(FATAL_ERROR + "LLVM installation directory, (${LT_LLVM_INSTALL_DIR}), is invalid. Couldn't + find LLVMConfig.cmake.") +endif() + +#=============================================================================== +# 2. LOAD LLVM CONFIGURATION +# For more: http://llvm.org/docs/CMake.html#embedding-llvm-in-your-project +#=============================================================================== +# Add the location of LLVMConfig.cmake to CMake search paths (so that +# find_package can locate it) +# Note: On Fedora, when using the pre-compiled binaries installed with `dnf`, +# LLVMConfig.cmake is located in "/usr/lib64/cmake/llvm". But this path is +# among other paths that will be checked by default when using +# `find_package(llvm)`. So there's no need to add it here. +list(APPEND CMAKE_PREFIX_PATH "${LT_LLVM_INSTALL_DIR}/lib/cmake/llvm/") + +find_package(LLVM 14 REQUIRED CONFIG) + +# Another sanity check +if(NOT "14" VERSION_EQUAL "${LLVM_VERSION_MAJOR}") + message(FATAL_ERROR "Found LLVM ${LLVM_VERSION_MAJOR}, but need LLVM 14") +endif() + +message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}") +message(STATUS "Using LLVMConfig.cmake in: ${LT_LLVM_INSTALL_DIR}") + +message("LLVM STATUS: + Definitions ${LLVM_DEFINITIONS} + Includes ${LLVM_INCLUDE_DIRS} + Libraries ${LLVM_LIBRARY_DIRS} + Targets ${LLVM_TARGETS_TO_BUILD}" +) + +# Set the LLVM header and library paths +include_directories(SYSTEM ${LLVM_INCLUDE_DIRS}) +link_directories(${LLVM_LIBRARY_DIRS}) +add_definitions(${LLVM_DEFINITIONS}) + +#=============================================================================== +# 3. LLVM-TUTOR BUILD CONFIGURATION +#=============================================================================== +# Use the same C++ standard as LLVM does +set(CMAKE_CXX_STANDARD 14 CACHE STRING "") + +# Build type +if (NOT CMAKE_BUILD_TYPE) + set(CMAKE_BUILD_TYPE Debug CACHE + STRING "Build type (default Debug):" FORCE) +endif() + +# Compiler flags +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall\ + -fdiagnostics-color=always") + +# LLVM is normally built without RTTI. Be consistent with that. +if(NOT LLVM_ENABLE_RTTI) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti") +endif() + +# -fvisibility-inlines-hidden is set when building LLVM and on Darwin warnings +# are triggered if llvm-tutor is built without this flag (though otherwise it +# builds fine). For consistency, add it here too. +include(CheckCXXCompilerFlag) +check_cxx_compiler_flag("-fvisibility-inlines-hidden" SUPPORTS_FVISIBILITY_INLINES_HIDDEN_FLAG) +if (${SUPPORTS_FVISIBILITY_INLINES_HIDDEN_FLAG} EQUAL "1") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fvisibility-inlines-hidden") +endif() + +# Set the build directories +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/bin") +set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/lib") + +#=============================================================================== +# 4. ADD SUB-TARGETS +# Doing this at the end so that all definitions and link/include paths are +# available for the sub-projects. +#=============================================================================== +add_subdirectory(lib) +add_subdirectory(tools) +add_subdirectory(test) +add_subdirectory(HelloWorld) diff --git a/Dockerfile_archlinux b/Dockerfile_archlinux new file mode 100644 index 00000000..8cf12cff --- /dev/null +++ b/Dockerfile_archlinux @@ -0,0 +1,47 @@ +# ============================================================================= +# An Arch Linux docker file for llvm-tutor. Clones and builds llvm-tutor, runs +# all tests. It uses the precompiled LLVM packages from Fedora. +# +# USAGE: +# ```bash +# wget https://raw.githubusercontent.com/banach-space/llvm-tutor/main/Dockerfile_archlinux +# docker build -t=llvm-tutor:llvm-14 . +# docker run --rm -it --hostname=llvm-tutor llvm-tutor:llvm-14 /bin/bash +# ``` +# ============================================================================= + +FROM archlinux + +ENV LLVM_DIR /usr/ +ENV TUTOR_DIR /llvm-tutor + +# 1. INSTALL DEPENDENCIES +RUN pacman -Syu --noconfirm \ + git \ + cmake \ + ninja \ + gcc \ + llvm \ + clang \ + python-pip + +# 2. INSTALL LIT +RUN pip3 install lit + +# 3. CLONE LLVM-TUTOR +RUN git clone https://github.com/banach-space/llvm-tutor $TUTOR_DIR + +# 4. BUILD AND RUN HELLO-WORLD +RUN mkdir -p $TUTOR_DIR/hello-world-build \ + && cd $TUTOR_DIR/hello-world-build \ + && cmake -G Ninja -DLT_LLVM_INSTALL_DIR=$LLVM_DIR ../HelloWorld \ + && ninja +RUN cd $TUTOR_DIR/hello-world-build && $LLVM_DIR/bin/clang -S -O1 -emit-llvm ../inputs/input_for_hello.c -o input_for_hello.ll +RUN cd $TUTOR_DIR/hello-world-build && $LLVM_DIR/bin/opt -load-pass-plugin ./libHelloWorld.so -passes=hello-world -disable-output input_for_hello.ll 2>&1 | grep "(llvm-tutor) Hello from: foo" + +# 5. BUILD AND RUN LLVM-TUTOR +RUN mkdir -p $TUTOR_DIR/build \ + && cd $TUTOR_DIR/build \ + && cmake -G Ninja -DLT_LLVM_INSTALL_DIR=$LLVM_DIR ../ \ + && ninja \ + && lit test/ diff --git a/Dockerfile_fedora b/Dockerfile_fedora new file mode 100644 index 00000000..967f382d --- /dev/null +++ b/Dockerfile_fedora @@ -0,0 +1,50 @@ +# ============================================================================= +# A Fedora docker file for llvm-tutor. Clones and builds llvm-tutor, runs all +# tests. It uses the precompiled LLVM packages from Fedora. +# +# USAGE: +# ```bash +# wget https://raw.githubusercontent.com/banach-space/llvm-tutor/main/Dockerfile_fedora +# docker build -t=llvm-tutor:llvm-14 . +# docker run --rm -it --hostname=llvm-tutor llvm-tutor:llvm-14 /bin/bash +# ``` +# ============================================================================= + +FROM fedora:36 + +ENV LLVM_DIR /usr/ +ENV TUTOR_DIR /llvm-tutor + +# 1. INSTALL DEPENDENCIES +RUN dnf -y install \ + git \ + cmake \ + ninja-build \ + gcc \ + gcc-c++ \ + llvm-devel \ + clang \ + zlib \ + zlib-devel \ + python3-pip + +# 2. INSTALL LIT +RUN pip3 install lit + +# 3. CLONE LLVM-TUTOR +RUN git clone https://github.com/banach-space/llvm-tutor $TUTOR_DIR + +# 4. BUILD AND RUN HELLO-WORLD +RUN mkdir -p $TUTOR_DIR/hello-world-build \ + && cd $TUTOR_DIR/hello-world-build \ + && cmake -G Ninja -DLT_LLVM_INSTALL_DIR=$LLVM_DIR ../HelloWorld \ + && ninja +RUN cd $TUTOR_DIR/hello-world-build && $LLVM_DIR/bin/clang -S -O1 -emit-llvm ../inputs/input_for_hello.c -o input_for_hello.ll +RUN cd $TUTOR_DIR/hello-world-build && $LLVM_DIR/bin/opt -load-pass-plugin ./libHelloWorld.so -passes=hello-world -disable-output input_for_hello.ll 2>&1 | grep "(llvm-tutor) Hello from: foo" + +# 5. BUILD AND RUN LLVM-TUTOR +RUN mkdir -p $TUTOR_DIR/build \ + && cd $TUTOR_DIR/build \ + && cmake -DLT_LLVM_INSTALL_DIR=$LLVM_DIR ../ \ + && make -j $(nproc --all) \ + && lit test/ diff --git a/Dockerfile_ubuntu b/Dockerfile_ubuntu new file mode 100644 index 00000000..948a547b --- /dev/null +++ b/Dockerfile_ubuntu @@ -0,0 +1,50 @@ +# How to run: +# 1. Download the Dockerfile +# $ wget https://raw.githubusercontent.com/banach-space/llvm-tutor/master/Dockerfile +# 2. Build the Docker image +# $ docker build -t=llvm-tutor:llvm-14 . +# 3. Run the Docker container +# $ docker run --rm -it --hostname=llvm-tutor llvm-tutor:llvm-14 /bin/bash + +FROM debian:buster + +# Installing dependencies +RUN apt-get update && apt-get install -y \ + git \ + cmake \ + ninja-build \ + build-essential \ + python3-minimal python3-pip\ + && rm -rf /var/lib/apt/lists/* + +# Installing lit +# Note that lit's tests depend on 'not' and 'FileCheck', LLVM utilities. +# https://github.com/llvm/llvm-project/tree/master/llvm/utils/lit +# So, we need to add -DLLVM_INSTALL_UTILS=ON cmake flag when trying to build LLVM. +# https://llvm.org/docs/CMake.html +RUN pip3 install lit + +# Building LLVM+Clang (release/14.x) from source +ENV LLVM_DIR /opt/llvm +RUN git clone --branch release/14.x --depth 1 https://github.com/llvm/llvm-project \ + && mkdir -p $LLVM_DIR \ + && mkdir -p llvm-project/build \ + && cd llvm-project/build \ + && cmake -G Ninja \ + -DLLVM_ENABLE_PROJECTS=clang \ + -DLLVM_TARGETS_TO_BUILD=X86 \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_INSTALL_PREFIX=$LLVM_DIR \ + -DLLVM_INSTALL_UTILS=ON \ + ../llvm \ + && cmake --build . --target install \ + && rm -r /llvm-project + +# Building llvm-tutor +ENV TUTOR_DIR /llvm-tutor +RUN git clone https://github.com/banach-space/llvm-tutor $TUTOR_DIR \ + && mkdir -p $TUTOR_DIR/build \ + && cd $TUTOR_DIR/build \ + && cmake -DLT_LLVM_INSTALL_DIR=$LLVM_DIR ../ \ + && make -j $(nproc --all) \ + && lit test/ diff --git a/Dockerfile_ubuntu_apt b/Dockerfile_ubuntu_apt new file mode 100644 index 00000000..aab32719 --- /dev/null +++ b/Dockerfile_ubuntu_apt @@ -0,0 +1,60 @@ +# ============================================================================= +# An Ubuntu docker file for llvm-tutor. Clones and builds llvm-tutor, runs all +# tests. It uses the precompiled LLVM packages from Ubuntu. +# +# USAGE: +# ```bash +# wget https://raw.githubusercontent.com/banach-space/llvm-tutor/main/Dockerfile_ubuntu +# docker build -t=llvm-tutor:llvm-14 . +# docker run --rm -it --hostname=llvm-tutor llvm-tutor:llvm-14 /bin/bash +# ``` +# ============================================================================= + +FROM ubuntu:20.04 + +ENV LLVM_DIR /usr/lib/llvm-14/ +ENV TUTOR_DIR /llvm-tutor + +# 1. INSTALL DEPENDENCIES +# As `tzdata` gets installed automatically (AFAIK, Python depends on it), it +# will interrupt the set-up with a question about the time-zone to use. However, +# this is a non-interactive sessions and that won't work. Work around it by +# setting the time-zone here. +ENV TZ=Europe/London +RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone +RUN apt-get update && apt-get install -y \ + git \ + cmake \ + ninja-build \ + build-essential \ + python3-minimal python3-pip\ + wget \ + software-properties-common \ + && rm -rf /var/lib/apt/lists/* + +RUN wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | apt-key add - +RUN apt-add-repository "deb http://apt.llvm.org/focal/ llvm-toolchain-focal-14 main" +RUN apt-get update +RUN apt-get install -y llvm-14 llvm-14-dev llvm-14-tools clang-14 +RUN apt-get install -y python3-setuptools + +# 2. INSTALL LIT +RUN pip3 install lit + +# 3. CLONE LLVM-TUTOR +RUN git clone https://github.com/banach-space/llvm-tutor $TUTOR_DIR + +# 4. BUILD AND RUN HELLO-WORLD +RUN mkdir -p $TUTOR_DIR/hello-world-build \ + && cd $TUTOR_DIR/hello-world-build \ + && cmake -G Ninja -DLT_LLVM_INSTALL_DIR=$LLVM_DIR ../HelloWorld \ + && ninja +RUN cd $TUTOR_DIR/hello-world-build && /usr/bin/clang-14 -S -O1 -emit-llvm ../inputs/input_for_hello.c -o input_for_hello.ll +RUN cd $TUTOR_DIR/hello-world-build && /usr/bin/opt-14 -load-pass-plugin ./libHelloWorld.so -passes=hello-world -disable-output input_for_hello.ll 2>&1 | grep "(llvm-tutor) Hello from: foo" + +# 5. BUILD AND RUN LLVM-TUTOR +RUN mkdir -p $TUTOR_DIR/build \ + && cd $TUTOR_DIR/build \ + && cmake -DLT_LLVM_INSTALL_DIR=$LLVM_DIR ../ \ + && make -j $(nproc --all) \ + && lit test/ diff --git a/HelloWorld/CMakeLists.txt b/HelloWorld/CMakeLists.txt new file mode 100644 index 00000000..555101d3 --- /dev/null +++ b/HelloWorld/CMakeLists.txt @@ -0,0 +1,39 @@ +cmake_minimum_required(VERSION 3.13.4) +project(llvm-tutor-hello-world) + +#=============================================================================== +# 1. LOAD LLVM CONFIGURATION +#=============================================================================== +# Set this to a valid LLVM installation dir +set(LT_LLVM_INSTALL_DIR "" CACHE PATH "LLVM installation directory") + +# Add the location of LLVMConfig.cmake to CMake search paths (so that +# find_package can locate it) +list(APPEND CMAKE_PREFIX_PATH "${LT_LLVM_INSTALL_DIR}/lib/cmake/llvm/") + +# FIXME: This is a warkaround for #25. Remove once resolved and use +find_package(LLVM 14 REQUIRED CONFIG) + +# HelloWorld includes headers from LLVM - update the include paths accordingly +include_directories(SYSTEM ${LLVM_INCLUDE_DIRS}) + +#=============================================================================== +# 2. LLVM-TUTOR BUILD CONFIGURATION +#=============================================================================== +# Use the same C++ standard as LLVM does +set(CMAKE_CXX_STANDARD 14 CACHE STRING "") + +# LLVM is normally built without RTTI. Be consistent with that. +if(NOT LLVM_ENABLE_RTTI) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti") +endif() + +#=============================================================================== +# 3. ADD THE TARGET +#=============================================================================== +add_library(HelloWorld SHARED HelloWorld.cpp) + +# Allow undefined symbols in shared objects on Darwin (this is the default +# behaviour on Linux) +target_link_libraries(HelloWorld + "$<$:-undefined dynamic_lookup>") diff --git a/HelloWorld/HelloWorld.cpp b/HelloWorld/HelloWorld.cpp new file mode 100644 index 00000000..20426256 --- /dev/null +++ b/HelloWorld/HelloWorld.cpp @@ -0,0 +1,110 @@ +//============================================================================= +// FILE: +// HelloWorld.cpp +// +// DESCRIPTION: +// Visits all functions in a module, prints their names and the number of +// arguments via stderr. Strictly speaking, this is an analysis pass (i.e. +// the functions are not modified). However, in order to keep things simple +// there's no 'print' method here (every analysis pass should implement it). +// +// USAGE: +// 1. Legacy PM +// opt -load libHelloWorld.dylib -legacy-hello-world -disable-output `\` +// +// 2. New PM +// opt -load-pass-plugin=libHelloWorld.dylib -passes="hello-world" `\` +// -disable-output +// +// +// License: MIT +//============================================================================= +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/Passes/PassBuilder.h" +#include "llvm/Passes/PassPlugin.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +//----------------------------------------------------------------------------- +// HelloWorld implementation +//----------------------------------------------------------------------------- +// No need to expose the internals of the pass to the outside world - keep +// everything in an anonymous namespace. +namespace { + +// This method implements what the pass does +void visitor(Function &F) { + errs() << "(llvm-tutor) Hello from: "<< F.getName() << "\n"; + errs() << "(llvm-tutor) number of arguments: " << F.arg_size() << "\n"; +} + +// New PM implementation +struct HelloWorld : PassInfoMixin { + // Main entry point, takes IR unit to run the pass on (&F) and the + // corresponding pass manager (to be queried if need be) + PreservedAnalyses run(Function &F, FunctionAnalysisManager &) { + visitor(F); + return PreservedAnalyses::all(); + } + + // Without isRequired returning true, this pass will be skipped for functions + // decorated with the optnone LLVM attribute. Note that clang -O0 decorates + // all functions with optnone. + static bool isRequired() { return true; } +}; + +// Legacy PM implementation +struct LegacyHelloWorld : public FunctionPass { + static char ID; + LegacyHelloWorld() : FunctionPass(ID) {} + // Main entry point - the name conveys what unit of IR this is to be run on. + bool runOnFunction(Function &F) override { + visitor(F); + // Doesn't modify the input unit of IR, hence 'false' + return false; + } +}; +} // namespace + +//----------------------------------------------------------------------------- +// New PM Registration +//----------------------------------------------------------------------------- +llvm::PassPluginLibraryInfo getHelloWorldPluginInfo() { + return {LLVM_PLUGIN_API_VERSION, "HelloWorld", LLVM_VERSION_STRING, + [](PassBuilder &PB) { + PB.registerPipelineParsingCallback( + [](StringRef Name, FunctionPassManager &FPM, + ArrayRef) { + if (Name == "hello-world") { + FPM.addPass(HelloWorld()); + return true; + } + return false; + }); + }}; +} + +// This is the core interface for pass plugins. It guarantees that 'opt' will +// be able to recognize HelloWorld when added to the pass pipeline on the +// command line, i.e. via '-passes=hello-world' +extern "C" LLVM_ATTRIBUTE_WEAK ::llvm::PassPluginLibraryInfo +llvmGetPassPluginInfo() { + return getHelloWorldPluginInfo(); +} + +//----------------------------------------------------------------------------- +// Legacy PM Registration +//----------------------------------------------------------------------------- +// The address of this variable is used to uniquely identify the pass. The +// actual value doesn't matter. +char LegacyHelloWorld::ID = 0; + +// This is the core interface for pass plugins. It guarantees that 'opt' will +// recognize LegacyHelloWorld when added to the pass pipeline on the command +// line, i.e. via '--legacy-hello-world' +static RegisterPass + X("legacy-hello-world", "Hello World Pass", + true, // This pass doesn't modify the CFG => true + false // This pass is not a pure analysis pass => false + ); diff --git a/README.md b/README.md new file mode 100644 index 00000000..769d1047 --- /dev/null +++ b/README.md @@ -0,0 +1,1346 @@ +llvm-tutor +========= +[![Build Status](https://github.com/banach-space/llvm-tutor/workflows/x86-Ubuntu/badge.svg?branch=main)](https://github.com/banach-space/llvm-tutor/actions?query=workflow%3Ax86-Ubuntu+branch%3Amain) +[![Build Status](https://github.com/banach-space/llvm-tutor/workflows/x86-Darwin/badge.svg?branch=main)](https://github.com/banach-space/llvm-tutor/actions?query=workflow%3Ax86-Darwin+branch%3Amain) + + +Example LLVM passes - based on **LLVM 14** + +**llvm-tutor** is a collection of self-contained reference LLVM passes. It's a +tutorial that targets novice and aspiring LLVM developers. Key features: + +* **Out-of-tree** - builds against a binary LLVM installation (no need to build LLVM from sources) +* **Complete** - includes `CMake` build scripts, LIT tests, CI set-up and documentation +* **Modern** - based on the latest version of LLVM (and updated with every release) + +### Overview +LLVM implements a very rich, powerful and popular API. However, like many +complex technologies, it can be quite daunting and overwhelming to learn and +master. The goal of this LLVM tutorial is to showcase that LLVM can in fact be +easy and fun to work with. This is demonstrated through a range self-contained, +testable LLVM passes, which are implemented using idiomatic LLVM. + +This document explains how to set-up your environment, build and run the +examples, and go about debugging. It contains a high-level overview of the +implemented examples and contains some background information on writing LLVM +passes. The source files, apart from the code itself, contain comments that +will guide you through the implementation. All examples are complemented with +[LIT](https://llvm.org/docs/TestingGuide.html) tests and reference [input +files](https://github.com/banach-space/llvm-tutor/blob/main/inputs). + +Visit [**clang-tutor**](https://github.com/banach-space/clang-tutor/) if you +are internested in similar tutorial for Clang. + +### Table of Contents +* [HelloWorld: Your First Pass](#helloworld-your-first-pass) +* Part 1: **llvm-tutor** in more detail + * [Development Environment](#development-environment) + * [Building & Testing](#building--testing) + * [Overview of the Passes](#overview-of-the-passes) + * [Debugging](#debugging) +* Part 2: Passes In LLVM + * [About Pass Managers in LLVM](#about-pass-managers-in-llvm) + * [Analysis vs Transformation Pass](#analysis-vs-transformation-pass) + * [Dynamic vs Static Plugins](#dynamic-vs-static-plugins) + * [Optimisation Passes Inside LLVM](#optimisation-passes-inside-llvm) +* [References](#references) + + +HelloWorld: Your First Pass +=========================== +The **HelloWorld** pass from +[HelloWorld.cpp](https://github.com/banach-space/llvm-tutor/blob/main/HelloWorld/HelloWorld.cpp) +is a self-contained *reference example*. The corresponding +[CMakeLists.txt](https://github.com/banach-space/llvm-tutor/blob/main/HelloWorld/CMakeLists.txt) +implements the minimum set-up for an out-of-source pass. + +For every function defined in the input module, **HelloWorld** prints its name +and the number of arguments that it takes. You can build it like this: + +```bash +export LLVM_DIR= +mkdir build +cd build +cmake -DLT_LLVM_INSTALL_DIR=$LLVM_DIR /HelloWorld/ +make +``` + +Before you can test it, you need to prepare an input file: + +```bash +# Generate an LLVM test file +$LLVM_DIR/bin/clang -O1 -S -emit-llvm /inputs/input_for_hello.c -o input_for_hello.ll +``` + +Finally, run **HelloWorld** with +[**opt**](http://llvm.org/docs/CommandGuide/opt.html) (use `libHelloWorld.so` +on Linux and `libHelloWorld.dylib` on Mac OS): + +```bash +# Run the pass +$LLVM_DIR/bin/opt -load-pass-plugin ./libHelloWorld.{so|dylib} -passes=hello-world -disable-output input_for_hello.ll +# Expected output +(llvm-tutor) Hello from: foo +(llvm-tutor) number of arguments: 1 +(llvm-tutor) Hello from: bar +(llvm-tutor) number of arguments: 2 +(llvm-tutor) Hello from: fez +(llvm-tutor) number of arguments: 3 +(llvm-tutor) Hello from: main +(llvm-tutor) number of arguments: 2 +``` + +The **HelloWorld** pass doesn't modify the input module. The `-disable-output` +flag is used to prevent **opt** from printing the output bitcode file. + +Development Environment +======================= +## Platform Support And Requirements +This project has been tested on **Ubuntu 20.04** and **Mac OS X 10.14.4**. In +order to build **llvm-tutor** you will need: + * LLVM 14 + * C++ compiler that supports C++14 + * CMake 3.13.4 or higher + +In order to run the passes, you will need: + * **clang-14** (to generate input LLVM files) + * [**opt**](http://llvm.org/docs/CommandGuide/opt.html) (to run the passes) + +There are additional requirements for tests (these will be satisfied by +installing LLVM 14): + * [**lit**](https://llvm.org/docs/CommandGuide/lit.html) (aka **llvm-lit**, + LLVM tool for executing the tests) + * [**FileCheck**](https://llvm.org/docs/CommandGuide/FileCheck.html) (LIT + requirement, it's used to check whether tests generate the expected output) + +## Installing LLVM 14 on Mac OS X +On Darwin you can install LLVM 14 with [Homebrew](https://brew.sh/): + +```bash +brew install llvm@14 +``` + +If you already have an older version of LLVM installed, you can upgrade it to +LLVM 14 like this: + +```bash +brew upgrade llvm +``` + +Once the installation (or upgrade) is complete, all the required header files, +libraries and tools will be located in `/usr/local/opt/llvm/`. + +## Installing LLVM 14 on Ubuntu +On Ubuntu Bionic, you can [install modern +LLVM](https://blog.kowalczyk.info/article/k/how-to-install-latest-clang-6.0-on-ubuntu-16.04-xenial-wsl.html) +from the official [repository](http://apt.llvm.org/): + +```bash +wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | sudo apt-key add - +sudo apt-add-repository "deb http://apt.llvm.org/bionic/ llvm-toolchain-bionic-14 main" +sudo apt-get update +sudo apt-get install -y llvm-14 llvm-14-dev llvm-14-tools clang-14 +``` +This will install all the required header files, libraries and tools in +`/usr/lib/llvm-14/`. + +## Building LLVM 14 From Sources +Building from sources can be slow and tricky to debug. It is not necessary, but +might be your preferred way of obtaining LLVM 14. The following steps will work +on Linux and Mac OS X: + +```bash +git clone https://github.com/llvm/llvm-project.git +cd llvm-project +git checkout release/14.x +mkdir build +cd build +cmake -DCMAKE_BUILD_TYPE=Release -DLLVM_TARGETS_TO_BUILD=host -DLLVM_ENABLE_PROJECTS=clang /llvm/ +cmake --build . +``` +For more details read the [official +documentation](https://llvm.org/docs/CMake.html). + +Building & Testing +=================== +## Building +You can build **llvm-tutor** (and all the provided pass plugins) as follows: + +```bash +cd +cmake -DLT_LLVM_INSTALL_DIR= +make +``` + +The `LT_LLVM_INSTALL_DIR` variable should be set to the root of either the +installation or build directory of LLVM 14. It is used to locate the +corresponding `LLVMConfig.cmake` script that is used to set the include and +library paths. + +## Testing +In order to run **llvm-tutor** tests, you need to install **llvm-lit** (aka +**lit**). It's not bundled with LLVM 14 packages, but you can install it with +**pip**: + +```bash +# Install lit - note that this installs lit globally +pip install lit +``` +Running the tests is as simple as: + +```bash +$ lit /test +``` +Voilà! You should see all tests passing. + +## LLVM Plugins as shared objecs +In **llvm-tutor** every LLVM pass is implemented in a separate shared object +(you can learn more about shared objects +[here](http://www.yolinux.com/TUTORIALS/LibraryArchives-StaticAndDynamic.html)). +These shared objects are essentially dynamically loadable plugins for **opt**. +All plugins are built in the `/lib` directory. + +Note that the extension of dynamically loaded shared objects differs between +Linux and Mac OS. For example, for the **HelloWorld** pass you will get: + +* `libHelloWorld.so` on Linux +* `libHelloWorld.dylib` on MacOS. + +For the sake of consistency, in this README.md file all examples use the `*.so` +extension. When working on Mac OS, use `*.dylib` instead. + +Overview of The Passes +====================== +The available passes are categorised as either Analysis, Transformation or CFG. +The difference between Analysis and Transformation passes is rather +self-explanatory ([here](#analysis-vs-transformation-pass) is a more technical +breakdown). A CFG pass is simply a Transformation pass that modifies the Control +Flow Graph. This is frequently a bit more complex and requires some extra bookkeeping, +hence a dedicated category. + +In the following table the passes are grouped thematically and ordered by the +level of complexity. + +| Name | Description | Category | +|-----------|-----------------|------| +|[**HelloWorld**](#helloworld-your-first-pass) | visits all functions and prints their names | Analysis | +|[**OpcodeCounter**](#opcodecounter) | prints a summary of LLVM IR opcodes in the input module | Analysis | +|[**InjectFuncCall**](#injectfunccall) | instruments the input module by inserting calls to `printf` | Transformation | +|[**StaticCallCounter**](#staticcallcounter) | counts direct function calls at compile-time (static analysis) | Analysis | +|[**DynamicCallCounter**](#dynamiccallcounter) | counts direct function calls at run-time (dynamic analysis) | Transformation | +|[**MBASub**](#mbasub) | obfuscate integer `sub` instructions | Transformation | +|[**MBAAdd**](#mbaadd) | obfuscate 8-bit integer `add` instructions | Transformation | +|[**FindFCmpEq**](#findfcmpeq) | finds floating-point equality comparisons | Analysis | +|[**ConvertFCmpEq**](#convertfcmpeq) | converts direct floating-point equality comparisons to difference comparisons | Transformation | +|[**RIV**](#riv) | finds reachable integer values for each basic block | Analysis | +|[**DuplicateBB**](#duplicatebb) | duplicates basic blocks, requires **RIV** analysis results | CFG | +|[**MergeBB**](#mergebb) | merges duplicated basic blocks | CFG | + +Once you've [built](#building--testing) this project, you can experiment with +every pass separately. All passes, except for +[**HelloWorld**](#helloworld-your-first-pass), are described in more details +below. + +LLVM passes work with LLVM IR files. You can generate one like this: + +```bash +export LLVM_DIR= +# Textual form +$LLVM_DIR/bin/clang -O1 -emit-llvm input.c -S -o out.ll +# Binary/bit-code form +$LLVM_DIR/bin/clang -O1 -emit-llvm input.c -c -o out.bc +``` +It doesn't matter whether you choose the binary, `*.bc` (default), or +textual/LLVM assembly form (`.ll`, requires the `-S` flag). Obviously, the +latter is more human-readable. Similar logic applies to **opt** - by default it +generates `*.bc` files. You can use `-S` to have the output written as `*.ll` +files instead. + +Note that `clang` adds the `optnone` [function +attribute](https://llvm.org/docs/LangRef.html#function-attributes) if either + +* no optimization level is specified, or +* `-O0` is specified. + +If you want to compile at `-O0`, you need to specify `-O0 -Xclang +-disable-O0-optnone` or define a static +[isRequired](https://llvm.org/docs/WritingAnLLVMNewPMPass.html#required-passes) +method in your pass. Alternatively, you can specify `-O1` or higher. +Otherwise the new pass manager will register the pass but your pass will not be +executed. + +As noted [earlier](#llvm-plugins-as-shared-objecs), all examples in this file +use the `*.so` extension for pass plugins. When working on Mac OS, use +`*.dylib` instead. + +## OpcodeCounter +**OpcodeCounter** is an Analysis pass that prints a summary of the [LLVM IR +opcodes](https://github.com/llvm/llvm-project/blob/release/14.x/llvm/lib/IR/Instruction.cpp#L347-L426) +encountered in every function in the input module. This pass can be [run +automatically](#auto-registration-with-optimisation-pipelines) with one of the +pre-defined optimisation pipelines. However, let's use our tried and tested method +first. + +### Run the pass +We will use +[input_for_cc.c](https://github.com/banach-space/llvm-tutor/blob/main/inputs/input_for_cc.c) +to test **OpcodeCounter**. Since **OpcodeCounter** is an Analysis pass, we want +**opt** to print its results. There are two ways of achieving this. First, you +need to choose which pass manager you want to use (see +[here](#about-pass-managers-in-llvm) for more details). Next: + +* Legacy Pass Manager: use the `-analyze` command line option. This option is + used to instruct **opt** to print the results of the analysis pass that has + just been run. +* New Pass Manager: Simply use the [printing + pass](#printing-passes-for-the-new-pass-manager) that corresponds to + **OpcodeCounter**. This pass is called `print`. No extra + arguments are needed, but it's a good idea to add `-disable-output` (it is + not required when using `-analyze`). + +```bash +export LLVM_DIR= +# Generate an LLVM file to analyze +$LLVM_DIR/bin/clang -emit-llvm -c /inputs/input_for_cc.c -o input_for_cc.bc +# Run the pass through opt - Legacy PM +$LLVM_DIR/bin/opt -enable-new-pm=0 -load /lib/libOpcodeCounter.so -legacy-opcode-counter -analyze input_for_cc.bc +# Run the pass through opt - New PM +$LLVM_DIR/bin/opt -load-pass-plugin /lib/libOpcodeCounter.so --passes="print" -disable-output input_for_cc.bc +``` + +For `main`, **OpcodeCounter** prints the following summary (note that when running the pass, +a summary for other functions defined in `input_for_cc.bc` is also printed): + +``` +================================================= +LLVM-TUTOR: OpcodeCounter results for `main` +================================================= +OPCODE #N TIMES USED +------------------------------------------------- +load 2 +br 4 +icmp 1 +add 1 +ret 1 +alloca 2 +store 4 +call 4 +------------------------------------------------- +``` + +### Auto-registration with optimisation pipelines +You can run **OpcodeCounter** by simply specifying an optimisation level (e.g. +`-O{1|2|3|s}`). This is achieved through auto-registration with the existing +optimisation pass pipelines. Note that you still have to specify the plugin +file to be loaded: + +```bash +$LLVM_DIR/bin/opt -load /lib/libOpcodeCounter.so -O1 input_for_cc.bc +``` +In this example I used the Legacy Pass Manager (the plugin file was specified +with `-load` rather than `-load-pass-plugin`). The auto-registration also works +with the New Pass Manager: + +```bash +$LLVM_DIR/bin/opt -load-pass-plugin /lib/libOpcodeCounter.so --passes='default' input_for_cc.bc +``` + +This is implemented in +[OpcodeCounter.cpp](https://github.com/banach-space/llvm-tutor/blob/main/lib/OpcodeCounter.cpp), +on +[line 122](https://github.com/banach-space/llvm-tutor/blob/main/lib/OpcodeCounter.cpp#L122-L126) for the New PM, and on +[line 159](https://github.com/banach-space/llvm-tutor/blob/main/lib/OpcodeCounter.cpp#L159-L164) for the Legacy PM. +This [section](#about-pass-managers-in-llvm) contains more information about +the pass managers in LLVM. + +## InjectFuncCall +This pass is a _HelloWorld_ example for _code instrumentation_. For every function +defined in the input module, **InjectFuncCall** will add (_inject_) the following +call to [`printf`](https://en.cppreference.com/w/cpp/io/c/fprintf): + +```C +printf("(llvm-tutor) Hello from: %s\n(llvm-tutor) number of arguments: %d\n", FuncName, FuncNumArgs) +``` +This call is added at the beginning of each function (i.e. before any other +instruction). `FuncName` is the name of the function and `FuncNumArgs` is the +number of arguments that the function takes. + +### Run the pass +We will use +[input_for_hello.c](https://github.com/banach-space/llvm-tutor/blob/main/inputs/input_for_hello.c) +to test **InjectFuncCall**: + +```bash +export LLVM_DIR= +# Generate an LLVM file to analyze +$LLVM_DIR/bin/clang -O0 -emit-llvm -c /inputs/input_for_hello.c -o input_for_hello.bc +# Run the pass through opt - Legacy PM +$LLVM_DIR/bin/opt -enable-new-pm=0 -load /lib/libInjectFuncCall.so -legacy-inject-func-call input_for_hello.bc -o instrumented.bin +# Run the pass through opt - New PM +$LLVM_DIR/bin/opt -load-pass-plugin /lib/libInjectFuncCall.so --passes="inject-func-call" input_for_hello.bc -o instrumented.bin +``` +This generates `instrumented.bin`, which is the instrumented version of +`input_for_hello.bc`. In order to verify that **InjectFuncCall** worked as +expected, you can either check the output file (and verify that it contains +extra calls to `printf`) or run it: + +``` +$LLVM_DIR/bin/lli instrumented.bin +(llvm-tutor) Hello from: main +(llvm-tutor) number of arguments: 2 +(llvm-tutor) Hello from: foo +(llvm-tutor) number of arguments: 1 +(llvm-tutor) Hello from: bar +(llvm-tutor) number of arguments: 2 +(llvm-tutor) Hello from: foo +(llvm-tutor) number of arguments: 1 +(llvm-tutor) Hello from: fez +(llvm-tutor) number of arguments: 3 +(llvm-tutor) Hello from: bar +(llvm-tutor) number of arguments: 2 +(llvm-tutor) Hello from: foo +(llvm-tutor) number of arguments: 1 +``` + +### InjectFuncCall vs HelloWorld +You might have noticed that **InjectFuncCall** is somewhat similar to +[**HelloWorld**](#helloworld-your-first-pass). In both cases the pass visits +all functions, prints their names and the number of arguments. The difference +between the two passes becomes quite apparent when you compare the output +generated for the same input file, e.g. `input_for_hello.c`. The number of +times `Hello from` is printed is either: +* once per every function call in the case of **InjectFuncCall**, or +* once per function definition in the case of **HelloWorld**. + +This makes perfect sense and hints how different the two passes are. Whether to +print `Hello from` is determined at either: +* run-time for **InjectFuncCall**, or +* compile-time for **HelloWorld**. + +Also, note that in the case of **InjectFuncCall** we had to first run the pass +with **opt** and then execute the instrumented IR module in order to see the +output. For **HelloWorld** it was sufficient to run run the pass with **opt**. + +## StaticCallCounter +The **StaticCallCounter** pass counts the number of _static_ function calls in +the input LLVM module. _Static_ refers to the fact that these function calls +are compile-time calls (i.e. visible during the compilation). This is in +contrast to _dynamic_ function calls, i.e. function calls encountered at +run-time (when the compiled module is run). The distinction becomes apparent +when analysing functions calls within loops, e.g.: +```c + for (i = 0; i < 10; i++) + foo(); +``` +Although at run-time `foo` will be executed 10 times, **StaticCallCounter** +will report only 1 function call. + +This pass will only consider direct functions calls. Functions calls via +function pointers are not taken into account. + +### Run the pass through **opt** +We will use +[input_for_cc.c](https://github.com/banach-space/llvm-tutor/blob/main/inputs/input_for_cc.c) +to test **StaticCallCounter**: + +```bash +export LLVM_DIR= +# Generate an LLVM file to analyze +$LLVM_DIR/bin/clang -emit-llvm -c /inputs/input_for_cc.c -o input_for_cc.bc +# Run the pass through opt - Legacy PM +$LLVM_DIR/bin/opt -enable-new-pm=0 -load /lib/libStaticCallCounter.so -legacy-static-cc -analyze input_for_cc.bc +``` +You should see the following output: + +``` +================================================= +LLVM-TUTOR: static analysis results +================================================= +NAME #N DIRECT CALLS +------------------------------------------------- +foo 3 +bar 2 +fez 1 +------------------------------------------------- +``` + +Note the extra command line option above: `-analyze`. It's required to inform +**opt** to print the results of the analysis to `stdout`. We discussed this +option in more detail [here](#run-the-pass). + +### Run the pass through `static` +You can run **StaticCallCounter** through a standalone tool called `static`. +`static` is an LLVM based tool implemented in +[StaticMain.cpp](https://github.com/banach-space/llvm-tutor/blob/main/tools/StaticMain.cpp). +It is a command line wrapper that allows you to run **StaticCallCounter** +without the need for **opt**: + +```bash +/bin/static input_for_cc.bc +``` +It is an example of a relatively basic static analysis tool. Its implementation +demonstrates how basic pass management in LLVM works (i.e. it handles that for +itself instead of relying on **opt**). + +## DynamicCallCounter +The **DynamicCallCounter** pass counts the number of _run-time_ (i.e. +encountered during the execution) function calls. It does so by inserting +call-counting instructions that are executed every time a function is called. +Only calls to functions that are _defined_ in the input module are counted. +This pass builds on top of ideas presented in +[**InjectFuncCall**](#injectfunccall). You may want to experiment with that +example first. + +### Run the pass +We will use +[input_for_cc.c](https://github.com/banach-space/llvm-tutor/blob/main/inputs/input_for_cc.c) +to test **DynamicCallCounter**: + +```bash +export LLVM_DIR= +# Generate an LLVM file to analyze +$LLVM_DIR/bin/clang -emit-llvm -c /inputs/input_for_cc.c -o input_for_cc.bc +# Instrument the input file +$LLVM_DIR/bin/opt -enable-new-pm=0 -load /lib/libDynamicCallCounter.so -legacy-dynamic-cc input_for_cc.bc -o instrumented_bin +``` +This generates `instrumented.bin`, which is the instrumented version of +`input_for_cc.bc`. In order to verify that **DynamicCallCounter** worked as +expected, you can either check the output file (and verify that it contains +new call-counting instructions) or run it: + +```bash +# Run the instrumented binary +$LLVM_DIR/bin/lli -jit-kind=mcjit ./instrumented_bin +``` +You will see the following output: + +``` +================================================= +LLVM-TUTOR: dynamic analysis results +================================================= +NAME #N DIRECT CALLS +------------------------------------------------- +foo 13 +bar 2 +fez 1 +main 1 +``` + +### DynamicCallCounter vs StaticCallCounter +The number of function calls reported by **DynamicCallCounter** and +**StaticCallCounter** are different, but both results are correct. They +correspond to _run-time_ and _compile-time_ function calls respectively. Note +also that for **StaticCallCounter** it was sufficient to run the pass through +**opt** to have the summary printed. For **DynamicCallCounter** we had to _run +the instrumented binary_ to see the output. This is similar to what we observed +when comparing [HelloWorld and InjectFuncCall](#injectfunccall-vs-helloworld). + +## Mixed Boolean Arithmetic Transformations +These passes implement [mixed +boolean arithmetic](https://tel.archives-ouvertes.fr/tel-01623849/document) +transformations. Similar transformation are often used in code obfuscation (you +may also know them from [Hacker's +Delight](https://www.amazon.co.uk/Hackers-Delight-Henry-S-Warren/dp/0201914654)) +and are a great illustration of what and how LLVM passes can be used for. + +Similar transformation are possible at the source-code level. The relevant +Clang plugins are available in +[**clang-tutor**](https://github.com/banach-space/clang-tutor#obfuscator). + +### MBASub +The **MBASub** pass implements this rather basic expression: + +``` +a - b == (a + ~b) + 1 +``` +Basically, it replaces all instances of integer `sub` according to the above +formula. The corresponding LIT tests verify that both the formula and that the +implementation are correct. + +#### Run the pass +We will use +[input_for_mba_sub.c](https://github.com/banach-space/llvm-tutor/blob/main/inputs/input_for_mba_sub.c) +to test **MBASub**: + +```bash +export LLVM_DIR= +$LLVM_DIR/bin/clang -emit-llvm -S /inputs/input_for_mba_sub.c -o input_for_sub.ll +$LLVM_DIR/bin/opt -load /lib/libMBASub.so -legacy-mba-sub -S input_for_sub.ll -o out.ll +``` + +### MBAAdd +The **MBAAdd** pass implements a slightly more involved formula that is only +valid for 8 bit integers: + +``` +a + b == (((a ^ b) + 2 * (a & b)) * 39 + 23) * 151 + 111 +``` +Similarly to `MBASub`, it replaces all instances of integer `add` according to +the above identity, but only for 8-bit integers. The LIT tests verify that both +the formula and the implementation are correct. + +#### Run the pass +We will use +[input_for_add.c](https://github.com/banach-space/llvm-tutor/blob/main/inputs/input_for_mba.c) +to test **MBAAdd**: + +```bash +export LLVM_DIR= +$LLVM_DIR/bin/clang -O1 -emit-llvm -S /inputs/input_for_mba.c -o input_for_mba.ll +$LLVM_DIR/bin/opt -load /lib/libMBAAdd.so -legacy-mba-add -S input_for_mba.ll -o out.ll +``` +You can also specify the level of _obfuscation_ on a scale of `0.0` to `1.0`, with +`0` corresponding to no obfuscation and `1` meaning that all `add` instructions +are to be replaced with `(((a ^ b) + 2 * (a & b)) * 39 + 23) * 151 + 111`, e.g.: +```bash +$LLVM_DIR/bin/opt -load /lib/libMBAAdd.so -legacy-mba-add -mba-ratio=0.3 /inputs/input_for_mba.c -o out.ll +``` + +## RIV +**RIV** is an analysis pass that for each [basic +block](http://llvm.org/docs/ProgrammersManual.html#the-basicblock-class) BB in +the input function computes the set reachable integer values, i.e. the integer +values that are visible (i.e. can be used) in BB. Since the pass operates on +the LLVM IR representation of the input file, it takes into account all values +that have [integer type](https://llvm.org/docs/LangRef.html#integer-type) in +the [LLVM IR](https://llvm.org/docs/LangRef.html) sense. In particular, since +at the LLVM IR level booleans are represented as 1-bit wide integers (i.e. +`i1`), you will notice that booleans are also included in the result. + +This pass demonstrates how to request results from other analysis passes in +LLVM. In particular, it relies on the [Dominator +Tree](https://en.wikipedia.org/wiki/Dominator_(graph_theory)) analysis pass +from LLVM, which is is used to obtain the dominance tree for the basic blocks +in the input function. + +### Run the pass +We will use +[input_for_riv.c](https://github.com/banach-space/llvm-tutor/blob/main/inputs/input_for_riv.c) +to test **RIV**: + +```bash +export LLVM_DIR= +# Generate an LLVM file to analyze +$LLVM_DIR/bin/clang -emit-llvm -S -O1 /inputs/input_for_riv.c -o input_for_riv.ll +# Run the pass through opt - Legacy PM +$LLVM_DIR/bin/opt -enable-new-pm=0 -load /lib/libRIV.so -legacy-riv -analyze input_for_riv.ll +``` +You will see the following output: + +``` +================================================= +LLVM-TUTOR: RIV analysis results +================================================= +BB id Reachable Ineger Values +------------------------------------------------- +BB %entry + i32 %a + i32 %b + i32 %c +BB %if.then + %add = add nsw i32 %a, 123 + %cmp = icmp sgt i32 %a, 0 + i32 %a + i32 %b + i32 %c +BB %if.end8 + %add = add nsw i32 %a, 123 + %cmp = icmp sgt i32 %a, 0 + i32 %a + i32 %b + i32 %c +BB %if.then2 + %mul = mul nsw i32 %b, %a + %div = sdiv i32 %b, %c + %cmp1 = icmp eq i32 %mul, %div + %add = add nsw i32 %a, 123 + %cmp = icmp sgt i32 %a, 0 + i32 %a + i32 %b + i32 %c +BB %if.else + %mul = mul nsw i32 %b, %a + %div = sdiv i32 %b, %c + %cmp1 = icmp eq i32 %mul, %div + %add = add nsw i32 %a, 123 + %cmp = icmp sgt i32 %a, 0 + i32 %a + i32 %b + i32 %c +``` + +Note the extra command line option above: `-analyze`. It's required to inform +**opt** to print the results of the analysis to `stdout`. We discussed this +option in more detail [here](#run-the-pass). + +## DuplicateBB +This pass will duplicate all basic blocks in a module, with the exception of +basic blocks for which there are no reachable integer values (identified through +the **RIV** pass). An example of such a basic block is the entry block in a +function that: +* takes no arguments and +* is embedded in a module that defines no global values. + +Basic blocks are duplicated by first inserting an `if-then-else` construct and +then cloning all the instructions from the original basic block (with the +exception of [PHI +nodes](https://en.wikipedia.org/wiki/Static_single_assignment_form)) into two +new basic blocks (clones of the original basic block). The `if-then-else` +construct is introduced as a non-trivial mechanism that decides which of the +cloned basic blocks to branch to. This condition is equivalent to: + +```cpp +if (var == 0) + goto clone 1 +else + goto clone 2 +``` +in which: +* `var` is a randomly picked variable from the `RIV` set for the current basic + block +* `clone 1` and `clone 2` are labels for the cloned basic blocks. + +The complete transformation looks like this: + +```c +BEFORE: AFTER: +------- ------ + [ if-then-else ] + DuplicateBB / \ +[ BB ] ------------> [clone 1] [clone 2] + \ / + [ tail ] + +LEGEND: +------- +[BB] - the original basic block +[if-then-else] - a new basic block that contains the if-then-else statement (inserted by DuplicateBB) +[clone 1|2] - two new basic blocks that are clones of BB (inserted by DuplicateBB) +[tail] - the new basic block that merges [clone 1] and [clone 2] (inserted by DuplicateBB) +``` +As depicted above, **DuplicateBB** replaces qualifying basic blocks with 4 new +basic blocks. This is implemented through LLVM's +[SplitBlockAndInsertIfThenElse](https://github.com/llvm/llvm-project/blob/release/14.x/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h#L468). +**DuplicateBB** does all the necessary preparation and clean-up. In other +words, it's an elaborate wrapper for LLVM's `SplitBlockAndInsertIfThenElse`. + +### Run the pass +This pass depends on the **RIV** pass, which also needs be loaded in order for +**DuplicateBB** to work. Lets use +[input_for_duplicate_bb.c](https://github.com/banach-space/llvm-tutor/blob/main/inputs/input_for_duplicate_bb.c) +as our sample input. First, generate the LLVM file: + +```bash +export LLVM_DIR= +$LLVM_DIR/bin/clang -emit-llvm -S -O1 /inputs/input_for_duplicate_bb.c -o input_for_duplicate_bb.ll +``` + +Function `foo` in `input_for_duplicate_bb.ll` should look like this (all metadata has been stripped): + +```llvm +define i32 @foo(i32) { + ret i32 1 +} +``` +Note that there's only one basic block (the _entry_ block) and that `foo` takes +one argument (this means that the result from **RIV** will be a non-empty set). +We will now apply **DuplicateBB** to `foo`: + +```bash +$LLVM_DIR/bin/opt -load /lib/libRIV.so -load /lib/libDuplicateBB.so -legacy-duplicate-bb -S input_for_duplicate_bb.ll -o duplicate.ll +``` +After the instrumentation `foo` will look like this (all metadata has been stripped): + +```llvm +define i32 @foo(i32) { +lt-if-then-else-0: + %2 = icmp eq i32 %0, 0 + br i1 %2, label %lt-if-then-0, label %lt-else-0 + +clone-1-0: + br label %lt-tail-0 + +clone-2-0: + br label %lt-tail-0 + +lt-tail-0: + ret i32 1 +} +``` +There are four basic blocks instead of one. All new basic blocks end with a +numeric id of the original basic block (`0` in this case). `lt-if-then-else-0` +contains the new `if-then-else` condition. `clone-1-0` and `clone-2-0` are +clones of the original basic block in `foo`. `lt-tail-0` is the extra basic +block that's required to merge `clone-1-0` and `clone-2-0`. + +## MergeBB +**MergeBB** will merge qualifying basic blocks that are identical. To some +extent, this pass reverts the transformations introduced by **DuplicateBB**. +This is illustrated below: + +```c +BEFORE: AFTER DuplicateBB: AFTER MergeBB: +------- ------------------ -------------- + [ if-then-else ] [ if-then-else* ] + DuplicateBB / \ MergeBB | +[ BB ] ------------> [clone 1] [clone 2] --------> [ clone ] + \ / | + [ tail ] [ tail* ] + +LEGEND: +------- +[BB] - the original basic block +[if-then-else] - a new basic block that contains the if-then-else statement (**DuplicateBB**) +[clone 1|2] - two new basic blocks that are clones of BB (**DuplicateBB**) +[tail] - the new basic block that merges [clone 1] and [clone 2] (**DuplicateBB**) +[clone] - [clone 1] and [clone 2] after merging, this block should be very similar to [BB] (**MergeBB**) +[label*] - [label] after being updated by **MergeBB** +``` +Recall that **DuplicateBB** replaces all qualifying basic block with four new +basic blocks, two of which are clones of the original block. **MergeBB** will +merge those two clones back together, but it will not remove the remaining two +blocks added by **DuplicateBB** (it will update them though). + +### Run the pass +Lets use the following IR implementation of `foo` as input. Note that basic +blocks 3 and 5 are identical and can safely be merged: + +```llvm +define i32 @foo(i32) { + %2 = icmp eq i32 %0, 19 + br i1 %2, label %3, label %5 + +;