Skip to content

Commit c72289e

Browse files
committed
Merge branch 'master' into Nexes_CQ20
2 parents eaee12e + 190a37d commit c72289e

File tree

19 files changed

+1593
-363
lines changed

19 files changed

+1593
-363
lines changed

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,7 @@ Typically finetunes of the base models below are supported as well.
9393
- [x] [FalconMamba Models](https://huggingface.co/collections/tiiuae/falconmamba-7b-66b9a580324dd1598b0f6d4a)
9494
- [x] [Jais](https://huggingface.co/inceptionai/jais-13b-chat)
9595
- [x] [Bielik-11B-v2.3](https://huggingface.co/collections/speakleash/bielik-11b-v23-66ee813238d9b526a072408a)
96+
- [x] [RWKV-6](https://github.com/BlinkDL/RWKV-LM)
9697

9798
(instructions for supporting more models: [HOWTO-add-model.md](./docs/development/HOWTO-add-model.md))
9899

@@ -173,6 +174,7 @@ Unless otherwise noted these projects are open-source with permissive licensing:
173174
- [LARS - The LLM & Advanced Referencing Solution](https://github.com/abgulati/LARS) (AGPL)
174175
- [LLMUnity](https://github.com/undreamai/LLMUnity) (MIT)
175176
- [Llama Assistant](https://github.com/vietanhdev/llama-assistant) (GPL)
177+
- [PocketPal AI - An iOS and Android App](https://github.com/a-ghorbani/pocketpal-ai) (MIT)
176178

177179
*(to have a project listed here, it should clearly state that it depends on `llama.cpp`)*
178180

common/arg.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1097,7 +1097,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
10971097
}
10981098
).set_examples({LLAMA_EXAMPLE_EMBEDDING, LLAMA_EXAMPLE_RETRIEVAL, LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_POOLING"));
10991099
add_opt(common_arg(
1100-
{"--attention"}, "{causal,non,causal}",
1100+
{"--attention"}, "{causal,non-causal}",
11011101
"attention type for embeddings, use model default if unspecified",
11021102
[](common_params & params, const std::string & value) {
11031103
/**/ if (value == "causal") { params.attention_type = LLAMA_ATTENTION_TYPE_CAUSAL; }
@@ -1695,7 +1695,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
16951695
).set_examples({LLAMA_EXAMPLE_BENCH}));
16961696
add_opt(common_arg(
16971697
{"--embd-normalize"}, "N",
1698-
string_format("normalisation for embendings (default: %d) (-1=none, 0=max absolute int16, 1=taxicab, 2=euclidean, >2=p-norm)", params.embd_normalize),
1698+
string_format("normalisation for embeddings (default: %d) (-1=none, 0=max absolute int16, 1=taxicab, 2=euclidean, >2=p-norm)", params.embd_normalize),
16991699
[](common_params & params, int value) {
17001700
params.embd_normalize = value;
17011701
}
@@ -1709,7 +1709,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
17091709
).set_examples({LLAMA_EXAMPLE_EMBEDDING}));
17101710
add_opt(common_arg(
17111711
{"--embd-separator"}, "STRING",
1712-
"separator of embendings (default \\n) for example \"<#sep#>\"",
1712+
"separator of embeddings (default \\n) for example \"<#sep#>\"",
17131713
[](common_params & params, const std::string & value) {
17141714
params.embd_sep = value;
17151715
}

common/common.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1035,7 +1035,7 @@ static ggml_type kv_cache_type_from_str(const std::string & s) {
10351035
return GGML_TYPE_Q5_1;
10361036
}
10371037

1038-
throw std::runtime_error("Invalid cache type: " + s);
1038+
throw std::runtime_error("Unsupported cache type: " + s);
10391039
}
10401040

10411041
struct llama_context_params common_context_params_to_llama(const common_params & params) {
@@ -1047,7 +1047,7 @@ struct llama_context_params common_context_params_to_llama(const common_params &
10471047
cparams.n_ubatch = params.n_ubatch;
10481048
cparams.n_threads = params.cpuparams.n_threads;
10491049
cparams.n_threads_batch = params.cpuparams_batch.n_threads == -1 ?
1050-
params.cpuparams.n_threads : params.cpuparams_batch.n_threads;
1050+
params.cpuparams.n_threads : params.cpuparams_batch.n_threads;
10511051
cparams.logits_all = params.logits_all;
10521052
cparams.embeddings = params.embedding;
10531053
cparams.rope_scaling_type = params.rope_scaling_type;

common/common.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -274,9 +274,9 @@ struct common_params {
274274

275275
// embedding
276276
bool embedding = false; // get only sentence embedding
277-
int32_t embd_normalize = 2; // normalisation for embendings (-1=none, 0=max absolute int16, 1=taxicab, 2=euclidean, >2=p-norm)
277+
int32_t embd_normalize = 2; // normalisation for embeddings (-1=none, 0=max absolute int16, 1=taxicab, 2=euclidean, >2=p-norm)
278278
std::string embd_out = ""; // empty = default, "array" = [[],[]...], "json" = openai style, "json+" = same "json" + cosine similarity matrix
279-
std::string embd_sep = "\n"; // separator of embendings
279+
std::string embd_sep = "\n"; // separator of embeddings
280280
bool reranking = false; // enable reranking support on server
281281

282282
// server params

convert_hf_to_gguf.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2864,6 +2864,9 @@ def set_vocab(self):
28642864
self.gguf_writer.add_token_list(tokens)
28652865
self.gguf_writer.add_token_types(toktypes)
28662866
special_vocab = gguf.SpecialVocab(self.dir_model, load_merges=False)
2867+
special_vocab.chat_template = "rwkv-world"
2868+
# hack: Add '\n\n' as the EOT token to make it chat normally
2869+
special_vocab._set_special_token("eot", 261)
28672870
special_vocab.add_to_gguf(self.gguf_writer)
28682871

28692872
def set_gguf_parameters(self):

convert_lora_to_gguf.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -348,6 +348,9 @@ def get_tensors(self) -> Iterator[tuple[str, Tensor]]:
348348
if ".base_layer.weight" in name:
349349
continue
350350
logger.error(f"Unexpected name '{name}': Not a lora_A or lora_B tensor")
351+
if ".embed_tokens.weight" in name or ".lm_head.weight" in name:
352+
logger.error("Embeddings is present in the adapter. This can be due to new tokens added during fine tuning")
353+
logger.error("Hint: if you are using TRL, make sure not to call setup_chat_format()")
351354
sys.exit(1)
352355

353356
if base_name in tensor_map:

0 commit comments

Comments
 (0)