Merge remote-tracking branch 'ggerganov/master'

* ggerganov/master: whisper : Replace WHISPER_PRINT_DEBUG with WHISPER_LOG_DEBUG (ggerganov#1681) sync : ggml (ggml_scale, ggml_row_size, etc.) (ggerganov#1677) docker : Dockerize whisper.cpp (ggerganov#1674) CI : Add coverage for talk-llama when WHISPER_CUBLAS=1 (ggerganov#1672) examples : Revert CMakeLists.txt for talk-llama (ggerganov#1669) cmake : set default CUDA architectures (ggerganov#1667)
bygreencn · Dec 25, 2023 · d8e9321 · d8e9321
2 parents b63ec23 + 37a709f
commit d8e9321
Show file tree

Hide file tree

Showing 26 changed files with 3,716 additions and 1,646 deletions.
diff --git a/.devops/main-cuda.Dockerfile b/.devops/main-cuda.Dockerfile
@@ -0,0 +1,34 @@
+ARG UBUNTU_VERSION=22.04
+# This needs to generally match the container host's environment.
+ARG CUDA_VERSION=12.3.1
+# Target the CUDA build image
+ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
+# Target the CUDA runtime image
+ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
+
+FROM ${BASE_CUDA_DEV_CONTAINER} AS build
+WORKDIR /app
+
+# Unless otherwise specified, we make a fat build.
+ARG CUDA_DOCKER_ARCH=all
+# Set nvcc architecture
+ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
+# Enable cuBLAS
+ENV WHISPER_CUBLAS=1
+
+RUN apt-get update && \
+    apt-get install -y build-essential \
+    && rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
+
+COPY .. .
+RUN make
+
+FROM ${BASE_CUDA_RUN_CONTAINER} AS runtime
+WORKDIR /app
+
+RUN apt-get update && \
+  apt-get install -y curl ffmpeg \
+  && rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
+
+COPY --from=build /app /app
+ENTRYPOINT [ "bash", "-c" ]
diff --git a/.devops/main.Dockerfile b/.devops/main.Dockerfile
@@ -0,0 +1,19 @@
+FROM ubuntu:22.04 AS build
+WORKDIR /app
+
+RUN apt-get update && \
+  apt-get install -y build-essential \
+  && rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
+
+COPY .. .
+RUN make
+
+FROM ubuntu:22.04 AS runtime
+WORKDIR /app
+
+RUN apt-get update && \
+  apt-get install -y curl ffmpeg \
+  && rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
+
+COPY --from=build /app /app
+ENTRYPOINT [ "bash", "-c" ]
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
@@ -117,7 +117,6 @@ jobs:
             -w /workspace ${{ env.ubuntu_image }} /bin/sh -c '
             set -e
             apt update
-            apt install -y clang
             apt install -y clang build-essential cmake libsdl2-dev
             cmake . -DWHISPER_SDL2=ON -DCMAKE_BUILD_TYPE=${{ matrix.build }} -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COMPILER=clang
             make
@@ -167,7 +166,7 @@ jobs:
             s2arc: x64
             jnaPath: win32-x86-64
           - sdl2: ON
-            s2ver: 2.26.0
+            s2ver: 2.28.5
 
     steps:
       - name: Clone
@@ -228,7 +227,7 @@ jobs:
             obzip: https://github.com/OpenMathLib/OpenBLAS/releases/download/v0.3.25/OpenBLAS-0.3.25-x64.zip
             s2arc: x64
           - sdl2: ON
-            s2ver: 2.26.0
+            s2ver: 2.28.5
 
     steps:
       - name: Clone
@@ -295,7 +294,7 @@ jobs:
           - arch: x64
             s2arc: x64
           - sdl2: ON
-            s2ver: 2.26.0
+            s2ver: 2.28.5
 
     steps:
       - name: Clone
@@ -321,7 +320,8 @@ jobs:
         run: >
           cmake -S . -B ./build -A ${{ matrix.arch }}
           -DCMAKE_BUILD_TYPE=${{ matrix.build }}
-          -DWHISPER_CUBLAS=1
+          -DWHISPER_CUBLAS=${{ matrix.cublas }}
+          -DWHISPER_SDL2=${{ matrix.sdl2 }}
 
       - name: Build ${{ matrix.cuda-toolkit }}
         run: |

diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml
@@ -0,0 +1,57 @@
+name: Publish Docker image
+
+on:
+  pull_request:
+  push:
+    branches:
+      - master
+
+jobs:
+  push_to_registry:
+    name: Push Docker image to Docker Hub
+    if: github.event.pull_request.draft == false
+
+    runs-on: ubuntu-latest
+    env:
+      COMMIT_SHA: ${{ github.sha }}
+    strategy:
+      matrix:
+        config:
+          - { tag: "main", dockerfile: ".devops/main.Dockerfile", platform: "linux/amd64,linux/arm64" }
+          - { tag: "main-cuda", dockerfile: ".devops/main-cuda.Dockerfile", platform: "linux/amd64" }
+
+    steps:
+      - name: Check out the repo
+        uses: actions/checkout@v3
+
+      - name: Set up QEMU
+        uses: docker/setup-qemu-action@v3
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      - name: Log in to Docker Hub
+        uses: docker/login-action@v3
+        with:
+          registry: ghcr.io
+          username: ${{ github.repository_owner }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Build and push Docker image (versioned)
+        if: github.event_name == 'push'
+        uses: docker/build-push-action@v5
+        with:
+          context: .
+          push: true
+          platforms: ${{ matrix.config.platforms }}
+          tags: "ghcr.io/${{ github.repository }}:${{ matrix.config.tag }}-${{ env.COMMIT_SHA }}"
+          file: ${{ matrix.config.dockerfile }}
+
+      - name: Build and push Docker image (tagged)
+        uses: docker/build-push-action@v4
+        with:
+          context: .
+          push: ${{ github.event_name == 'push' }}
+          platforms: ${{ matrix.config.platforms }}
+          tags: "ghcr.io/${{ github.repository }}:${{ matrix.config.tag }}"
+          file: ${{ matrix.config.dockerfile }}
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -526,7 +526,13 @@ endif()
 
 if (GGML_SOURCES_CUDA)
     message(STATUS "GGML CUDA sources found, configuring CUDA architecture")
-    set_property(TARGET whisper PROPERTY CUDA_ARCHITECTURES OFF)
+    # Only configure gmml CUDA architectures is not globally set
+    if (NOT DEFINED GGML_CUDA_ARCHITECTURES)
+        # Not overriden by user, so set defaults
+        set(GGML_CUDA_ARCHITECTURES 52 61 70)
+    endif()
+    message(STATUS "GGML Configuring CUDA architectures ${GGML_CUDA_ARCHITECTURES}")
+    set_property(TARGET whisper PROPERTY CUDA_ARCHITECTURES ${GGML_CUDA_ARCHITECTURES})
     set_property(TARGET whisper PROPERTY CUDA_SELECT_NVCC_ARCH_FLAGS "Auto")
 endif()
 

diff --git a/README.md b/README.md
@@ -33,6 +33,7 @@ Supported platforms:
 - [x] [WebAssembly](examples/whisper.wasm)
 - [x] Windows ([MSVC](https://github.com/ggerganov/whisper.cpp/blob/master/.github/workflows/build.yml#L117-L144) and [MinGW](https://github.com/ggerganov/whisper.cpp/issues/168)]
 - [x] [Raspberry Pi](https://github.com/ggerganov/whisper.cpp/discussions/166)
+- [x] [docker](https://github.com/ggerganov/whisper.cpp/pkgs/container/whisper.cpp)
 
 The entire high-level implementation of the model is contained in [whisper.h](whisper.h) and [whisper.cpp](whisper.cpp).
 The rest of the code is part of the [ggml](https://github.com/ggerganov/ggml) machine learning library.
@@ -448,6 +449,36 @@ make clean
 WHISPER_OPENBLAS=1 make -j
 ```
 
+## Docker
+
+### Prerequisites
+* Docker must be installed and running on your system.
+* Create a folder to store big models & intermediate files (ex. /whisper/models)
+
+### Images
+We have two Docker images available for this project:
+
+1. `ghcr.io/ggerganov/whisper.cpp:main`: This image includes the main executable file as well as `curl` and `ffmpeg`. (platforms: `linux/amd64`, `linux/arm64`)
+2. `ghcr.io/ggerganov/whisper.cpp:main-cuda`: Same as `main` but compiled with CUDA support. (platforms: `linux/amd64`)
+
+### Usage
+
+```shell
+# download model and persist it in a local folder
+docker run -it --rm \
+  -v path/to/models:/models \
+  whisper.cpp:main "./models/download-ggml-model.sh base /models"
+# transcribe an audio file
+docker run -it --rm \
+  -v path/to/models:/models \
+  -v path/to/audios:/audios \
+  whisper.cpp:main "./main -m /models/ggml-base.bin -f /audios/jfk.wav"
+# transcribe an audio file in samples folder
+docker run -it --rm \
+  -v path/to/models:/models \
+  whisper.cpp:main "./main -m /models/ggml-base.bin -f ./samples/jfk.wav"
+```
+
 ## Limitations
 
 - Inference only

diff --git a/examples/talk-llama/CMakeLists.txt b/examples/talk-llama/CMakeLists.txt
@@ -1,30 +1,14 @@
 if (WHISPER_SDL2)
     # talk-llama
     set(TARGET talk-llama)
-    #add_executable(${TARGET} talk-llama.cpp llama.cpp)
-    #target_include_directories(${TARGET} PRIVATE ${SDL2_INCLUDE_DIRS})
-    #target_link_libraries(${TARGET} PRIVATE common common-sdl whisper ${SDL2_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT})
-
-    # TODO: this is temporary
-    #       need to export ggml symbols for MSVC, but too lazy ..
-    add_executable(${TARGET}
-        talk-llama.cpp
-        llama.cpp
-        ../common.cpp
-        ../common-sdl.cpp
-        ../../ggml.c
-        ../../ggml-alloc.c
-        ../../ggml-backend.c
-        ../../ggml-quants.c
-        ../../whisper.cpp)
+    add_executable(${TARGET} talk-llama.cpp llama.cpp)
+    target_include_directories(${TARGET} PRIVATE ${SDL2_INCLUDE_DIRS})
+    target_link_libraries(${TARGET} PRIVATE common common-sdl whisper ${SDL2_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT})
 
     if(WIN32)
-    # It requires Windows 8.1 or later for PrefetchVirtualMemory
-    target_compile_definitions(${TARGET} PRIVATE -D_WIN32_WINNT=0x0602)
+        # It requires Windows 8.1 or later for PrefetchVirtualMemory
+        target_compile_definitions(${TARGET} PRIVATE -D_WIN32_WINNT=0x0602)
     endif()
 
-    target_include_directories(${TARGET} PRIVATE ${SDL2_INCLUDE_DIRS} ../../)
-    target_link_libraries(${TARGET} PRIVATE ${SDL2_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT})
-
     include(DefaultTargetOptions)
 endif ()