ggml-org
diff --git a/‎README.md‎
Lines changed: 1 addition & 0 deletions b/‎README.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎docs/backend/zDNN.md‎
Lines changed: 61 additions & 0 deletions b/‎docs/backend/zDNN.md‎
Lines changed: 61 additions & 0 deletions
diff --git a/‎ggml/include/ggml-zdnn.h‎
Lines changed: 3 additions & 0 deletions b/‎ggml/include/ggml-zdnn.h‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎ggml/src/ggml-zdnn/.gitignore‎
Lines changed: 1 addition & 0 deletions b/‎ggml/src/ggml-zdnn/.gitignore‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎ggml/src/ggml-zdnn/common.hpp‎
Lines changed: 59 additions & 0 deletions b/‎ggml/src/ggml-zdnn/common.hpp‎
Lines changed: 59 additions & 0 deletions
diff --git a/‎ggml/src/ggml-zdnn/ggml-zdnn-impl.h‎
Lines changed: 0 additions & 98 deletions b/‎ggml/src/ggml-zdnn/ggml-zdnn-impl.h‎
Lines changed: 0 additions & 98 deletions
@@ -274,6 +274,7 @@ Instructions for adding support for new models: [HOWTO-add-model.md](docs/develo
 | [Vulkan](docs/build.md#vulkan) | GPU |
 | [CANN](docs/build.md#cann) | Ascend NPU |
 | [OpenCL](docs/backend/OPENCL.md) | Adreno GPU |
+| [IBM zDNN](docs/backend/zDNN.md) | IBM Z & LinuxONE |
 | [WebGPU [In Progress]](docs/build.md#webgpu) | All |
 | [RPC](https://github.com/ggml-org/llama.cpp/tree/master/tools/rpc) | All |
 
 
@@ -0,0 +1,61 @@
+# llama.cpp for IBM zDNN Accelerator
+
+## Background
+
+IBM zDNN (Z Deep Neural Network) is a hardware acceleration library designed specifically to leverage the IBM NNPA (Neural Network Processor Assist) accelerator located within IBM Telum I and II processors. It provides significant performance improvements for neural network inference operations.
+
+### Llama.cpp + IBM zDNN
+
+The llama.cpp zDNN backend is designed to enable llama.cpp on IBM z17 and later systems via the IBM zDNN hardware acceleration library.
+
+## Software & Hardware Support
+
+| Hardware Level       | Status        | Verified                   |
+| -------------------- | ------------- | -------------------------- |
+| IBM z17 / LinuxONE 5 | Supported     | RHEL 9.6, IBM z17, 40 IFLs |
+| IBM z16 / LinuxONE 4 | Not Supported |                            |
+
+## Data Types Supported
+
+| Data Type | Status    |
+| --------- | --------- |
+| F32       | Supported |
+| F16       | Supported |
+| BF16      | Supported |
+
+## CMake Options
+
+The IBM zDNN backend has the following CMake options that control the behaviour of the backend.
+
+| CMake Option | Default Value | Description                         |
+| ------------ | ------------- | ----------------------------------- |
+| `GGML_ZDNN`  | `OFF`         | Compile llama.cpp with zDNN support |
+| `ZDNN_ROOT`  | `""`          | Override zDNN library lookup        |
+
+## 1. Install zDNN Library
+
+Note: Using the zDNN library provided via `apt` or `yum` may not work correctly as reported in [#15772](https://github.com/ggml-org/llama.cpp/issues/15772). It is preferred that you compile from source.
+
+```sh
+git clone --recurse-submodules https://github.com/IBM/zDNN
+cd zDNN
+
+autoreconf .
+./configure --prefix=/opt/zdnn-libs
+
+make build
+sudo make install
+```
+
+## 2. Build llama.cpp
+
+```sh
+git clone https://github.com/ggml-org/llama.cpp
+cd llama.cpp
+
+cmake -S . -G Ninja -B build \
+    -DCMAKE_BUILD_TYPE=Release \
+    -DGGML_ZDNN=ON \
+    -DZDNN_ROOT=/opt/zdnn-libs
+cmake --build build --config Release -j$(nproc)
+```
@@ -7,6 +7,9 @@
 extern "C" {
 #endif
 
+// device buffer
+GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_zdnn_buffer_type(void);
+
 GGML_BACKEND_API ggml_backend_reg_t ggml_backend_zdnn_reg(void);
 
 #ifdef __cplusplus
 
@@ -0,0 +1 @@
+zdnn.h
@@ -0,0 +1,59 @@
+#ifndef GGML_ZDNN_COMMON_HPP
+#define GGML_ZDNN_COMMON_HPP
+
+#include "ggml.h"
+#include "ggml-impl.h"
+
+#include "zdnn.h"
+
+#include <vector>
+#include <memory>
+
+#define GGML_ZDNN_NAME    "zDNN"
+#define GGML_ZDNN_VERSION ZDNN_VERNUM
+
+#define ZDNN_CHECK(stmt)                \
+    do {                                \
+        zdnn_status status = (stmt);    \
+        GGML_ASSERT(status == ZDNN_OK); \
+    } while (0);
+
+struct ggml_backend_zdnn_device_context {
+    int zdnn_device;
+    int zdnn_device_ref_count;
+
+    bool has_parmblkformat_0;
+    bool has_parmblkformat_1;  // checks for z17
+
+    size_t max_size;
+
+    char name[128];
+};
+
+struct ggml_backend_zdnn_context {
+    int device;
+    ggml_cgraph * gf;
+};
+
+struct ggml_backend_zdnn_buffer {
+    void * data;
+    ggml_backend_zdnn_buffer * extra;  // for bias, etc.
+    size_t size;
+
+    zdnn_tensor_desc pre_tfm_desc;
+    zdnn_tensor_desc tfm_desc;
+    zdnn_ztensor     ztensor;
+
+    char name[GGML_MAX_NAME];
+};
+
+struct ggml_backend_zdnn_buffer_context {
+    void * all_data;
+    size_t all_size;
+    bool owned;
+
+    int n_buffers;
+    std::vector<std::unique_ptr<ggml_backend_zdnn_buffer>> buffers;
+};
+
+#endif  // GGML_ZDNN_COMMON_HPP