Skip to content

Commit 35abf89

Browse files
committed
Add bindings for LoRA adapters. Closes ggml-org#88
1 parent 3f68e95 commit 35abf89

File tree

1 file changed

+19
-1
lines changed

1 file changed

+19
-1
lines changed

Diff for: llama_cpp/llama_cpp.py

+19-1
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,9 @@ class llama_context_params(Structure):
114114
LLAMA_FTYPE_MOSTLY_F16 = ctypes.c_int(1) # except 1d tensors
115115
LLAMA_FTYPE_MOSTLY_Q4_0 = ctypes.c_int(2) # except 1d tensors
116116
LLAMA_FTYPE_MOSTLY_Q4_1 = ctypes.c_int(3) # except 1d tensors
117-
LLAMA_FTYPE_MOSTLY_Q4_1_SOME_F16 = ctypes.c_int(4) # tok_embeddings.weight and output.weight are F16
117+
LLAMA_FTYPE_MOSTLY_Q4_1_SOME_F16 = ctypes.c_int(
118+
4
119+
) # tok_embeddings.weight and output.weight are F16
118120

119121
# Functions
120122

@@ -175,6 +177,22 @@ def llama_model_quantize(fname_inp: bytes, fname_out: bytes, itype: c_int) -> c_
175177
_lib.llama_model_quantize.restype = c_int
176178

177179

180+
# Apply a LoRA adapter to a loaded model
181+
# path_base_model is the path to a higher quality model to use as a base for
182+
# the layers modified by the adapter. Can be NULL to use the current loaded model.
183+
# The model needs to be reloaded before applying a new adapter, otherwise the adapter
184+
# will be applied on top of the previous one
185+
# Returns 0 on success
186+
def llama_apply_lora_from_file(
187+
ctx: llama_context_p, path_lora: bytes, path_base_model: bytes, n_threads: c_int
188+
) -> c_int:
189+
return _lib.llama_apply_lora_from_file(ctx, path_lora, path_base_model, n_threads)
190+
191+
192+
_lib.llama_apply_lora_from_file.argtypes = [llama_context_p, c_char_p, c_char_p, c_int]
193+
_lib.llama_apply_lora_from_file.restype = c_int
194+
195+
178196
# Returns the KV cache that will contain the context for the
179197
# ongoing prediction with the model.
180198
def llama_get_kv_cache(ctx: llama_context_p):

0 commit comments

Comments
 (0)