From bc7a6faaa3733e88a8a863f9e6ae17049bbb70f1 Mon Sep 17 00:00:00 2001
From: Andrei Betlen <abetlen@gmail.com>
Date: Tue, 18 Apr 2023 01:30:04 -0400
Subject: [PATCH] Add bindings for LoRA adapters. Closes #88

---
 examples/llama_cpp.py | 20 +++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/examples/llama_cpp.py b/examples/llama_cpp.py
index 935017ab1e8ae..c2d1ace63ed05 100644
--- a/examples/llama_cpp.py
+++ b/examples/llama_cpp.py
@@ -114,7 +114,9 @@ class llama_context_params(Structure):
 LLAMA_FTYPE_MOSTLY_F16 = ctypes.c_int(1)  # except 1d tensors
 LLAMA_FTYPE_MOSTLY_Q4_0 = ctypes.c_int(2)  # except 1d tensors
 LLAMA_FTYPE_MOSTLY_Q4_1 = ctypes.c_int(3)  # except 1d tensors
-LLAMA_FTYPE_MOSTLY_Q4_1_SOME_F16 = ctypes.c_int(4)  # tok_embeddings.weight and output.weight are F16
+LLAMA_FTYPE_MOSTLY_Q4_1_SOME_F16 = ctypes.c_int(
+    4
+)  # tok_embeddings.weight and output.weight are F16
 
 # Functions
 
@@ -177,6 +179,22 @@ def llama_model_quantize(
 _lib.llama_model_quantize.restype = c_int
 
 
+# Apply a LoRA adapter to a loaded model
+# path_base_model is the path to a higher quality model to use as a base for
+# the layers modified by the adapter. Can be NULL to use the current loaded model.
+# The model needs to be reloaded before applying a new adapter, otherwise the adapter
+# will be applied on top of the previous one
+# Returns 0 on success
+def llama_apply_lora_from_file(
+    ctx: llama_context_p, path_lora: bytes, path_base_model: bytes, n_threads: c_int
+) -> c_int:
+    return _lib.llama_apply_lora_from_file(ctx, path_lora, path_base_model, n_threads)
+
+
+_lib.llama_apply_lora_from_file.argtypes = [llama_context_p, c_char_p, c_char_p, c_int]
+_lib.llama_apply_lora_from_file.restype = c_int
+
+
 # Returns the KV cache that will contain the context for the
 # ongoing prediction with the model.
 def llama_get_kv_cache(ctx: llama_context_p):