Skip to content

Commit c9fdebc

Browse files
Hotfix prompt caching introduced in ggml-org#1169, fixes ggml-org#1257
1 parent f0d70f1 commit c9fdebc

File tree

2 files changed

+4
-3
lines changed

2 files changed

+4
-3
lines changed

examples/main/main.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -404,7 +404,7 @@ int main(int argc, char ** argv) {
404404
// optionally save the session on first sample (for faster prompt loading next time)
405405
if (!path_session.empty() && need_to_save_session) {
406406
need_to_save_session = false;
407-
llama_save_session_file(ctx, path_session.c_str(), session_tokens.data(), session_tokens.size());
407+
llama_save_session_file(ctx, path_session.c_str(), session_tokens.data(), session_tokens.size() - 1); // FIXME: -1 is a hack
408408
}
409409

410410
llama_token id = 0;

llama.cpp

+3-2
Original file line numberDiff line numberDiff line change
@@ -2701,7 +2701,7 @@ size_t llama_load_session_file(struct llama_context * ctx, const char * path_ses
27012701
const uint32_t magic = file.read_u32();
27022702
const uint32_t version = file.read_u32();
27032703

2704-
if (!(magic == 'ggsn' && version == 0)) {
2704+
if (!(magic == 'ggsn' && version == 1)) {
27052705
fprintf(stderr, "%s : unknown (magic, version) for session file: %08x, %08x\n", __func__, magic, version);
27062706
return 0;
27072707
}
@@ -2724,6 +2724,7 @@ size_t llama_load_session_file(struct llama_context * ctx, const char * path_ses
27242724
const size_t n_orig_state_size = llama_get_state_size(ctx);
27252725
if (n_state_size != n_orig_state_size) {
27262726
fprintf(stderr, "%s : failed to validate state size\n", __func__);
2727+
return 0;
27272728
}
27282729
std::unique_ptr<uint8_t[]> state_data(new uint8_t[n_state_size]);
27292730
file.read_raw(state_data.get(), n_state_size);
@@ -2739,7 +2740,7 @@ size_t llama_save_session_file(struct llama_context * ctx, const char * path_ses
27392740
llama_copy_state_data(ctx, state_data.get());
27402741

27412742
file.write_u32('ggsn'); // magic
2742-
file.write_u32(0); // version
2743+
file.write_u32(1); // version
27432744
file.write_raw(&ctx->model.hparams, sizeof(llama_hparams));
27442745

27452746
file.write_u32((uint32_t) n_token_count); // REVIEW

0 commit comments

Comments
 (0)