From bc7a6faaa3733e88a8a863f9e6ae17049bbb70f1 Mon Sep 17 00:00:00 2001 From: Andrei Betlen Date: Tue, 18 Apr 2023 01:30:04 -0400 Subject: [PATCH] Add bindings for LoRA adapters. Closes #88 --- examples/llama_cpp.py | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/examples/llama_cpp.py b/examples/llama_cpp.py index 935017ab1e8ae..c2d1ace63ed05 100644 --- a/examples/llama_cpp.py +++ b/examples/llama_cpp.py @@ -114,7 +114,9 @@ class llama_context_params(Structure): LLAMA_FTYPE_MOSTLY_F16 = ctypes.c_int(1) # except 1d tensors LLAMA_FTYPE_MOSTLY_Q4_0 = ctypes.c_int(2) # except 1d tensors LLAMA_FTYPE_MOSTLY_Q4_1 = ctypes.c_int(3) # except 1d tensors -LLAMA_FTYPE_MOSTLY_Q4_1_SOME_F16 = ctypes.c_int(4) # tok_embeddings.weight and output.weight are F16 +LLAMA_FTYPE_MOSTLY_Q4_1_SOME_F16 = ctypes.c_int( + 4 +) # tok_embeddings.weight and output.weight are F16 # Functions @@ -177,6 +179,22 @@ def llama_model_quantize( _lib.llama_model_quantize.restype = c_int +# Apply a LoRA adapter to a loaded model +# path_base_model is the path to a higher quality model to use as a base for +# the layers modified by the adapter. Can be NULL to use the current loaded model. +# The model needs to be reloaded before applying a new adapter, otherwise the adapter +# will be applied on top of the previous one +# Returns 0 on success +def llama_apply_lora_from_file( + ctx: llama_context_p, path_lora: bytes, path_base_model: bytes, n_threads: c_int +) -> c_int: + return _lib.llama_apply_lora_from_file(ctx, path_lora, path_base_model, n_threads) + + +_lib.llama_apply_lora_from_file.argtypes = [llama_context_p, c_char_p, c_char_p, c_int] +_lib.llama_apply_lora_from_file.restype = c_int + + # Returns the KV cache that will contain the context for the # ongoing prediction with the model. def llama_get_kv_cache(ctx: llama_context_p):