We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 62f1f99 commit 4bb8d50Copy full SHA for 4bb8d50
src/llama.cpp
@@ -13742,9 +13742,6 @@ struct llm_build_context {
13742
struct ggml_cgraph * build_nemotron() {
13743
struct ggml_cgraph * gf = ggml_new_graph_custom(ctx0, llama_model_max_nodes(model), false);
13744
13745
- // mutable variable, needed during the last layer of the computation to skip unused tokens
13746
- int32_t n_tokens = this->n_tokens;
13747
-
13748
const int64_t n_embd_head = hparams.n_embd_head_v;
13749
GGML_ASSERT(n_embd_head == hparams.n_embd_head_k);
13750
//GGML_ASSERT(n_embd_head == hparams.n_rot);
0 commit comments