@@ -114,7 +114,9 @@ class llama_context_params(Structure):
114
114
LLAMA_FTYPE_MOSTLY_F16 = ctypes .c_int (1 ) # except 1d tensors
115
115
LLAMA_FTYPE_MOSTLY_Q4_0 = ctypes .c_int (2 ) # except 1d tensors
116
116
LLAMA_FTYPE_MOSTLY_Q4_1 = ctypes .c_int (3 ) # except 1d tensors
117
- LLAMA_FTYPE_MOSTLY_Q4_1_SOME_F16 = ctypes .c_int (4 ) # tok_embeddings.weight and output.weight are F16
117
+ LLAMA_FTYPE_MOSTLY_Q4_1_SOME_F16 = ctypes .c_int (
118
+ 4
119
+ ) # tok_embeddings.weight and output.weight are F16
118
120
119
121
# Functions
120
122
@@ -175,6 +177,22 @@ def llama_model_quantize(fname_inp: bytes, fname_out: bytes, itype: c_int) -> c_
175
177
_lib .llama_model_quantize .restype = c_int
176
178
177
179
180
+ # Apply a LoRA adapter to a loaded model
181
+ # path_base_model is the path to a higher quality model to use as a base for
182
+ # the layers modified by the adapter. Can be NULL to use the current loaded model.
183
+ # The model needs to be reloaded before applying a new adapter, otherwise the adapter
184
+ # will be applied on top of the previous one
185
+ # Returns 0 on success
186
+ def llama_apply_lora_from_file (
187
+ ctx : llama_context_p , path_lora : bytes , path_base_model : bytes , n_threads : c_int
188
+ ) -> c_int :
189
+ return _lib .llama_apply_lora_from_file (ctx , path_lora , path_base_model , n_threads )
190
+
191
+
192
+ _lib .llama_apply_lora_from_file .argtypes = [llama_context_p , c_char_p , c_char_p , c_int ]
193
+ _lib .llama_apply_lora_from_file .restype = c_int
194
+
195
+
178
196
# Returns the KV cache that will contain the context for the
179
197
# ongoing prediction with the model.
180
198
def llama_get_kv_cache (ctx : llama_context_p ):
0 commit comments