diff --git a/llama.cpp b/llama.cpp index bed24207db776..ce505295340de 100644 --- a/llama.cpp +++ b/llama.cpp @@ -3,6 +3,7 @@ #include "ggml.h" #include +#include #include #include #include @@ -1757,6 +1758,12 @@ llama_token llama_sample_top_p_top_k( // TODO: avoid this ... const auto last_n_tokens = std::vector(last_n_tokens_data, last_n_tokens_data + last_n_tokens_size); + if (std::abs(temp) < FLT_EPSILON) { + temp = 0.8f; + top_k = 1; + top_p = 0.0f; + } + result = llama_sample_top_p_top_k( *ctx, last_n_tokens,