premAI-io · nsosio · Jan 31, 2024 · Jan 23, 2024 · Jan 23, 2024 · Jan 27, 2024
diff --git a/bench_onnxruntime/README.md b/bench_onnxruntime/README.md
@@ -0,0 +1,35 @@
+# ONNX Runtime
+
+[![GitHub Repo](https://img.shields.io/badge/github-%23121011.svg?style=for-the-badge&logo=github&logoColor=white)](https://github.com/ggerganov/llama.cpp) &nbsp;
+
+
+[ONNX (Open Neural Network Exchange) Runtime](https://github.com/microsoft/onnxruntime) is an open-source, cross-platform runtime that enables efficient execution of neural network models trained in various frameworks, promoting interoperability and flexibility in deploying machine learning models. This benchmark implementation uses [HuggingFace Optimum](https://github.com/huggingface/optimum) which supports models running under ONNX Runtime.
+
+### 🚀 Running the ONNX Runtime Benchmark.
+
+You can run the ONNX Runtime  benchmark using the following command:
+
+```bash
+./bench_onnxruntime/bench.sh \
+  --prompt <value> \            # Enter a prompt string
+  --max_tokens <value> \        # Maximum number of tokens to output
+  --repetitions <value> \       # Number of repititions to be made for the prompt.
+  --log_file <file_path> \      # A .log file underwhich we want to write the results.
+  --device <cpu/cuda/metal> \   # The device in which we want to benchmark.
+  --models_dir <path_to_models> # The directory in which model weights are present
+```
+
+To get started quickly you can simply run:
+
+```bash
+./bench_onnxruntime/bench.sh -d cuda
+```
+This will take all the default values (see in the [bench.sh](/bench_onnxruntime/bench.sh) file) and perform the benchmarks. You can find all the benchmarks results for ONNX Runtime [here](/docs/llama2.md).
+
+
+### 👀 Some points to note:
+
+1. ONNX Runtime requires HuggingFace Llama2-7B weights. And it converts those weights into ONNX format using this [setup.sh](/bench_onnxruntime/setup.sh) script. So running this benchmark would assume that you already agree to the required terms and conditions and verified to download the weights.
+2. ONNX Runtime GPU only support Float16 precision format.
+3. Running LLama 2 using ONNX Runtime in CPU/Metal is too memory intensive, so benchmarking is skipped for those.
+4. Please note that you might not be able to run the current implementation of onnx benchmark. Since it requires some specific dependencies and anaconda support. A fix will be reflected in the upcoming versions.
diff --git a/bench_onnxruntime/bench.py b/bench_onnxruntime/bench.py
@@ -54,7 +54,9 @@ def benchmark(self, prompt, max_tokens, repetitions):
 
 
 if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="llama.cpp Benchmark Llama model.")
+    parser = argparse.ArgumentParser(
+        description="ONXX Runtime Benchmark for Llama model."
+    )
     parser.add_argument(
         "--prompt",
         type=str,

diff --git a/bench_onnxruntime/bench.sh b/bench_onnxruntime/bench.sh
@@ -2,36 +2,36 @@
 
 ########################################################################################################
 # Script: bench.sh
-# Description: This script runs benchmarks onnxruntime llama benchmark.
+# Description: This script runs benchmarks ONNX Runtime Llama-2 benchmark.
 #
 # Usage: ./bench.sh [OPTIONS]
 # OPTIONS:
-#   -p, --prompt      Prompt for benchmarks (default: 'Explain what is a transformer')
-#   -r, --repetitions Number of repetitions for benchmarks (default: 2)
-#   -m, --max_tokens  Maximum number of tokens for benchmarks (default: 100)
-#   -d, --device      Device for benchmarks (possible values: 'metal', 'gpu', and 'cpu', default: 'cpu')
+#   -p, --prompt      Prompt for benchmarks (default: 'Write an essay about the transformer model architecture')
+#   -r, --repetitions Number of repetitions for benchmarks (default: 10)
+#   -m, --max_tokens  Maximum number of tokens for benchmarks (default: 512)
+#   -d, --device      Device for benchmarks (possible values: 'metal', 'cuda', and 'cpu', default: 'cuda')
 #   -lf, --log_file   Logging file name.
 #   -md, --models_dir Models directory.
 #   -h, --help        Show this help message
 ########################################################################################################
 
 set -euo pipefail
 
+CURRENT_DIR="$(pwd)"
 SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
 
 print_usage() {
     echo "Usage: $0 [OPTIONS]"
     echo "OPTIONS:"
-    echo "  -p, --prompt        Prompt for benchmarks (default: 'Explain what is a transformer')"
-    echo "  -r, --repetitions   Number of repetitions for benchmarks (default: 2)"
-    echo "  -m, --max_tokens    Maximum number of tokens for benchmarks (default: 100)"
-    echo "  -d, --device        Device for benchmarks (possible values: 'metal', 'gpu', and 'cpu', default: 'cpu')"
+    echo "  -p, --prompt        Prompt for benchmarks (default: 'Write an essay about the transformer model architecture')"
+    echo "  -r, --repetitions   Number of repetitions for benchmarks (default: 10)"
+    echo "  -m, --max_tokens    Maximum number of tokens for benchmarks (default: 512)"
+    echo "  -d, --device        Device for benchmarks (possible values: 'metal', 'cuda', and 'cpu', default: 'cuda')"
     echo "  -lf, --log_file     Logging file name."
     echo "  -md, --models_dir   Models directory."
     echo "  -h, --help          Show this help message"
     exit 1
 }
-
 check_cuda() {
     if command -v nvcc &> /dev/null
     then
@@ -57,16 +57,29 @@ check_platform() {
 }
 
 check_python() {
-    if command -v python &> /dev/null
-    then
-        echo -e "\nUsing $(python --version)."
+    if command -v python &> /dev/null; then
+        PYTHON_CMD="python"
+    elif command -v python3 &> /dev/null; then
+        PYTHON_CMD="python3"
     else
-        echo -e "\nPython does not exist."
+        echo "Python is not installed."
         exit 1
     fi
 }
 
 setup() {
+
+    # Check if Logs folder exists else Make the logs folder
+    LOGS_FOLDER="$CURRENT_DIR/Logs"
+
+    if [ -d "$LOGS_FOLDER" ]; then
+        echo "Folder '$LOGS_FOLDER' already exists. Skipping."
+    else
+        # Create the folder
+        mkdir "$LOGS_FOLDER"
+        echo "'$LOGS_FOLDER' created."
+    fi
+
     echo -e "\nSetting up with $SCRIPT_DIR/setup.sh..."
     bash "$SCRIPT_DIR"/setup.sh "$1"
 }
@@ -82,7 +95,7 @@ run_benchmarks() {
     # shellcheck disable=SC1091
     source "$SCRIPT_DIR/venv/bin/activate"
 
-    python "$SCRIPT_DIR"/bench.py \
+    "$PYTHON_CMD" "$SCRIPT_DIR"/bench.py \
         --prompt "$PROMPT" \
         --repetitions "$REPETITIONS" \
         --max_tokens "$MAX_TOKENS" \
@@ -147,15 +160,18 @@ while [ "$#" -gt 0 ]; do
             ;;
     esac
 done
-# Set default values if not provided
-PROMPT="${PROMPT:-"Explain what is a transformer"}"
-REPETITIONS="${REPETITIONS:-10}"
-MAX_TOKENS="${MAX_TOKENS:-100}"
-DEVICE="${DEVICE:-'cpu'}"
-LOG_FILENAME="${LOG_FILENAME:-"benchmark_$(date +'%Y%m%d%H%M%S').log"}"
+
 MODELS_DIR="${MODELS_DIR:-"./models"}"
 
 check_platform
 check_python
 setup "$MODELS_DIR"
+
+# Set default values if not provided
+PROMPT="${PROMPT:-"Write an essay about the transformer model architecture"}"
+REPETITIONS="${REPETITIONS:-10}"
+MAX_TOKENS="${MAX_TOKENS:-512}"
+DEVICE="${DEVICE:-'cuda'}"
+LOG_FILENAME="${LOG_FILENAME:-"$LOGS_FOLDER/benchmark_onnx_$(date +'%Y%m%d%H%M%S').log"}"
+
 run_benchmarks "$PROMPT" "$REPETITIONS" "$MAX_TOKENS" "$DEVICE" "$LOG_FILENAME" "$MODELS_DIR"
diff --git a/bench_onnxruntime/setup.sh b/bench_onnxruntime/setup.sh
@@ -13,15 +13,28 @@ if [ "$#" -ne 1 ]; then
     exit 1
 fi
 
+check_python() {
+    if command -v python &> /dev/null; then
+        PYTHON_CMD="python"
+    elif command -v python3 &> /dev/null; then
+        PYTHON_CMD="python3"
+    else
+        echo "Python is not installed."
+        exit 1
+    fi
+}
+
 # Define directory paths
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 VENV_DIR="$SCRIPT_DIR/venv"
 MODELS_FOLDER="$1"
 LLAMA_HF_MODEL_DIR="$MODELS_FOLDER/llama-2-7b-hf"
 LLAMA_ONNX_MODEL_DIR="$MODELS_FOLDER/llama-2-7b-onnx"
 
+check_python
+
 if [ ! -d "$VENV_DIR" ]; then
-    python -m venv "$VENV_DIR"
+    "$PYTHON_CMD" -m venv "$VENV_DIR"
     echo "Virtual environment '$VENV_DIR' created."
     # shellcheck disable=SC1091
     source "$VENV_DIR/bin/activate"