Skip to content

Commit 9e8a353

Browse files
danbevNeoZhangJianyu
authored andcommitted
llama : add --completion-bash option (ggml-org#11846)
This commit adds a new option `--completion-bash` to the llama.cpp which outputs a source-able bash completion script. The motivation for this change is to provide a more user-friendly experience for users who use the command-line interface of llama.cpp. This is currently only basic and all options are displayed for all llama executables but this can be improved in the future if needed. Example usage: ```console $ build/bin/llama-cli --completion-bash > ~/.llama-completion.bash $ source ~/.llama-completion.bash $ ./build/bin/llama-server --m<TAB> --main-gpu --mirostat --mirostat-lr --model --multiline-input --min-p --mirostat-ent --mlock --model-url ```
1 parent e54c613 commit 9e8a353

File tree

3 files changed

+128
-0
lines changed

3 files changed

+128
-0
lines changed

README.md

+14
Original file line numberDiff line numberDiff line change
@@ -555,3 +555,17 @@ If your issue is with model generation quality, then please at least scan the fo
555555
556556
#### References
557557
558+
559+
### Completions
560+
Command-line completion is available for some environments.
561+
562+
#### Bash Completion
563+
```bash
564+
$ build/bin/llama-cli --completion-bash > ~/.llama-completion.bash
565+
$ source ~/.llama-completion.bash
566+
```
567+
Optionally this can be added to your `.bashrc` or `.bash_profile` to load it
568+
automatically. For example:
569+
```console
570+
$ echo "source ~/.llama-completion.bash" >> ~/.bashrc
571+
```

common/arg.cpp

+113
Original file line numberDiff line numberDiff line change
@@ -365,6 +365,108 @@ static void common_params_print_usage(common_params_context & ctx_arg) {
365365
print_options(specific_options);
366366
}
367367

368+
static void common_params_print_completion(common_params_context & ctx_arg) {
369+
std::vector<common_arg *> common_options;
370+
std::vector<common_arg *> sparam_options;
371+
std::vector<common_arg *> specific_options;
372+
373+
for (auto & opt : ctx_arg.options) {
374+
if (opt.is_sparam) {
375+
sparam_options.push_back(&opt);
376+
} else if (opt.in_example(ctx_arg.ex)) {
377+
specific_options.push_back(&opt);
378+
} else {
379+
common_options.push_back(&opt);
380+
}
381+
}
382+
383+
printf("_llama_completions() {\n");
384+
printf(" local cur prev opts\n");
385+
printf(" COMPREPLY=()\n");
386+
printf(" cur=\"${COMP_WORDS[COMP_CWORD]}\"\n");
387+
printf(" prev=\"${COMP_WORDS[COMP_CWORD-1]}\"\n\n");
388+
389+
printf(" opts=\"");
390+
auto print_options = [](const std::vector<common_arg *> & options) {
391+
for (const common_arg * opt : options) {
392+
for (const char * arg : opt->args) {
393+
printf("%s ", arg);
394+
}
395+
}
396+
};
397+
398+
print_options(common_options);
399+
print_options(sparam_options);
400+
print_options(specific_options);
401+
printf("\"\n\n");
402+
403+
printf(" case \"$prev\" in\n");
404+
printf(" --model)\n");
405+
printf(" COMPREPLY=( $(compgen -f -X '!*.gguf' -- \"$cur\") $(compgen -d -- \"$cur\") )\n");
406+
printf(" return 0\n");
407+
printf(" ;;\n");
408+
printf(" --grammar-file)\n");
409+
printf(" COMPREPLY=( $(compgen -f -X '!*.gbnf' -- \"$cur\") $(compgen -d -- \"$cur\") )\n");
410+
printf(" return 0\n");
411+
printf(" ;;\n");
412+
printf(" *)\n");
413+
printf(" COMPREPLY=( $(compgen -W \"${opts}\" -- \"$cur\") )\n");
414+
printf(" return 0\n");
415+
printf(" ;;\n");
416+
printf(" esac\n");
417+
printf("}\n\n");
418+
419+
std::set<std::string> executables = {
420+
"llama-batched",
421+
"llama-batched-bench",
422+
"llama-bench",
423+
"llama-cli",
424+
"llama-convert-llama2c-to-ggml",
425+
"llama-cvector-generator",
426+
"llama-embedding",
427+
"llama-eval-callback",
428+
"llama-export-lora",
429+
"llama-gbnf-validator",
430+
"llama-gen-docs",
431+
"llama-gguf",
432+
"llama-gguf-hash",
433+
"llama-gguf-split",
434+
"llama-gritlm",
435+
"llama-imatrix",
436+
"llama-infill",
437+
"llama-llava-cli",
438+
"llama-llava-clip-quantize-cli",
439+
"llama-lookahead",
440+
"llama-lookup",
441+
"llama-lookup-create",
442+
"llama-lookup-merge",
443+
"llama-lookup-stats",
444+
"llama-minicpmv-cli",
445+
"llama-parallel",
446+
"llama-passkey",
447+
"llama-perplexity",
448+
"llama-q8dot",
449+
"llama-quantize",
450+
"llama-quantize-stats",
451+
"llama-qwen2vl-cli",
452+
"llama-retrieval",
453+
"llama-run",
454+
"llama-save-load-state",
455+
"llama-server",
456+
"llama-simple",
457+
"llama-simple-chat",
458+
"llama-speculative",
459+
"llama-speculative-simple",
460+
"llama-tokenize",
461+
"llama-tts",
462+
"llama-vdot"
463+
};
464+
465+
for (const auto& exe : executables) {
466+
printf("complete -F _llama_completions %s\n", exe.c_str());
467+
}
468+
}
469+
368470
static std::vector<ggml_backend_dev_t> parse_device_list(const std::string & value) {
369471
std::vector<ggml_backend_dev_t> devices;
370472
auto dev_names = string_split<std::string>(value, ',');
@@ -426,6 +528,10 @@ bool common_params_parse(int argc, char ** argv, common_params & params, llama_e
426528
}
427529
exit(0);
428530
}
531+
if (ctx_arg.params.completion) {
532+
common_params_print_completion(ctx_arg);
533+
exit(0);
534+
}
429535
} catch (const std::invalid_argument & ex) {
430536
fprintf(stderr, "%s\n", ex.what());
431537
ctx_arg.params = params_org;
@@ -494,6 +600,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
494600
exit(0);
495601
}
496602
));
603+
add_opt(common_arg(
604+
{"--completion-bash"},
605+
"print source-able bash completion script for llama.cpp",
606+
[](common_params & params) {
607+
params.completion = true;
608+
}
609+
));
497610
add_opt(common_arg(
498611
{"--verbose-prompt"},
499612
string_format("print a verbose prompt before generation (default: %s)", params.verbose_prompt ? "true" : "false"),

common/common.h

+1
Original file line numberDiff line numberDiff line change
@@ -298,6 +298,7 @@ struct common_params {
298298
bool kl_divergence = false; // compute KL divergence
299299

300300
bool usage = false; // print usage
301+
bool completion = false; // print source-able completion script
301302
bool use_color = false; // use color to distinguish generations and inputs
302303
bool special = false; // enable special token output
303304
bool interactive = false; // interactive mode

0 commit comments

Comments
 (0)