Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,28 @@ option(LLAMA_BUILD_SERVER "llama: build server example" ${LLAMA_STANDALONE})
option(LLAMA_CURL "llama: use libcurl to download model from an URL" ON)
option(LLAMA_LLGUIDANCE "llama-common: include LLGuidance library for structured output in common utils" OFF)

# Find yaml-cpp for YAML config support
find_package(PkgConfig QUIET)
if(PkgConfig_FOUND)
pkg_check_modules(YAML_CPP QUIET yaml-cpp)
endif()

if(NOT YAML_CPP_FOUND)
find_package(yaml-cpp QUIET)
if(yaml-cpp_FOUND)
set(YAML_CPP_LIBRARIES yaml-cpp)
set(YAML_CPP_FOUND TRUE)
endif()
endif()

if(NOT YAML_CPP_FOUND)
message(STATUS "yaml-cpp not found, YAML config support will be disabled")
set(LLAMA_YAML_CONFIG OFF)
else()
message(STATUS "Found yaml-cpp, enabling YAML config support")
set(LLAMA_YAML_CONFIG ON)
endif()

# Required for relocatable CMake package
include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/build-info.cmake)
include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/common.cmake)
Expand Down
46 changes: 46 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,52 @@ To learn more about model quantization, [read this documentation](tools/quantize

</details>

- <details>
<summary>Run with YAML configuration</summary>

llama.cpp supports loading configuration from YAML files using the `--config` flag. This allows you to organize complex configurations and share them easily.

```bash
# Use YAML config
llama-cli --config configs/minimal.yaml

# Combine YAML with flag overrides
llama-cli --config configs/base.yaml --temp 0.5 --predict 256
```

**Precedence rules:**
- Command line flags override YAML config values
- YAML config values override default values
- Order: `flags > yaml > defaults`

**Example YAML config:**
```yaml
# Basic model and generation settings
model: "models/my-model.gguf"
ctx-size: 2048
predict: 128
seed: 42

# Sampling parameters
temp: 0.7
top-k: 40
top-p: 0.9

# Input/output
prompt: "Hello, how are you?"
color: true
```

**Key features:**
- Relative paths in YAML are resolved relative to the config file location
- Unknown YAML keys are rejected with helpful error messages
- All CLI flags have corresponding YAML keys (use long flag names without `--`)
- Boolean flags can be set as `true`/`false` in YAML

See `configs/minimal.yaml` and `configs/override.yaml` for example configurations.

</details>

- <details>
<summary>Run in conversation mode with custom chat template</summary>

Expand Down
8 changes: 8 additions & 0 deletions common/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,14 @@ if (LLAMA_LLGUIDANCE)
set(LLAMA_COMMON_EXTRA_LIBS ${LLAMA_COMMON_EXTRA_LIBS} llguidance ${LLGUIDANCE_PLATFORM_LIBS})
endif ()

if(LLAMA_YAML_CONFIG AND YAML_CPP_FOUND)
target_compile_definitions(${TARGET} PRIVATE LLAMA_YAML_CONFIG)
target_link_libraries(${TARGET} PRIVATE ${YAML_CPP_LIBRARIES})
if(YAML_CPP_INCLUDE_DIRS)
target_include_directories(${TARGET} PRIVATE ${YAML_CPP_INCLUDE_DIRS})
endif()
endif()

target_include_directories(${TARGET} PUBLIC . ../vendor)
target_compile_features (${TARGET} PUBLIC cxx_std_17)
target_link_libraries (${TARGET} PRIVATE ${LLAMA_COMMON_EXTRA_LIBS} PUBLIC llama Threads::Threads)
Expand Down
215 changes: 215 additions & 0 deletions common/arg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@
#define JSON_ASSERT GGML_ASSERT
#include <nlohmann/json.hpp>

#ifdef LLAMA_YAML_CONFIG
#include <yaml-cpp/yaml.h>
#endif

#include <algorithm>
#include <climits>
#include <cstdarg>
Expand Down Expand Up @@ -1251,6 +1255,208 @@ bool common_params_parse(int argc, char ** argv, common_params & params, llama_e
return true;
}

#ifdef LLAMA_YAML_CONFIG
bool common_params_load_yaml_config(const std::string & config_path, common_params & params) {
try {
YAML::Node config = YAML::LoadFile(config_path);

std::filesystem::path config_dir = std::filesystem::path(config_path).parent_path();

std::vector<std::string> valid_keys = common_params_get_valid_yaml_keys();
std::set<std::string> valid_keys_set(valid_keys.begin(), valid_keys.end());

for (const auto& kv : config) {
std::string key = kv.first.as<std::string>();

if (valid_keys_set.find(key) == valid_keys_set.end()) {
std::ostringstream oss;
oss << "Unknown YAML key: '" << key << "'\n";
oss << "Valid keys are: ";
for (size_t i = 0; i < valid_keys.size(); ++i) {
oss << valid_keys[i];
if (i < valid_keys.size() - 1) oss << ", ";
}
throw std::runtime_error(oss.str());
}

YAML::Node value = kv.second;

if (key == "model" || key == "m") {
std::string model_path = value.as<std::string>();
if (!model_path.empty() && model_path[0] != '/') {
model_path = (config_dir / model_path).string();
}
params.model.path = model_path;
} else if (key == "threads" || key == "t") {
params.cpuparams.n_threads = value.as<int>();
if (params.cpuparams.n_threads <= 0) {
params.cpuparams.n_threads = std::thread::hardware_concurrency();
}
} else if (key == "threads-batch" || key == "tb") {
params.cpuparams_batch.n_threads = value.as<int>();
if (params.cpuparams_batch.n_threads <= 0) {
params.cpuparams_batch.n_threads = std::thread::hardware_concurrency();
}
} else if (key == "ctx-size" || key == "c") {
params.n_ctx = value.as<int>();
} else if (key == "batch-size" || key == "b") {
params.n_batch = value.as<int>();
} else if (key == "ubatch-size" || key == "ub") {
params.n_ubatch = value.as<int>();
} else if (key == "predict" || key == "n") {
params.n_predict = value.as<int>();
} else if (key == "keep") {
params.n_keep = value.as<int>();
} else if (key == "seed" || key == "s") {
params.sampling.seed = value.as<uint32_t>();
} else if (key == "temp") {
params.sampling.temp = value.as<float>();
params.sampling.temp = std::max(params.sampling.temp, 0.0f);
} else if (key == "top-k") {
params.sampling.top_k = value.as<int>();
} else if (key == "top-p") {
params.sampling.top_p = value.as<float>();
} else if (key == "min-p") {
params.sampling.min_p = value.as<float>();
} else if (key == "prompt" || key == "p") {
params.prompt = value.as<std::string>();
} else if (key == "file" || key == "f") {
std::string file_path = value.as<std::string>();
if (!file_path.empty() && file_path[0] != '/') {
file_path = (config_dir / file_path).string();
}
params.prompt = read_file(file_path);
params.prompt_file = file_path;
if (!params.prompt.empty() && params.prompt.back() == '\n') {
params.prompt.pop_back();
}
} else if (key == "system-prompt" || key == "sys") {
params.system_prompt = value.as<std::string>();
} else if (key == "system-prompt-file" || key == "sysf") {
std::string file_path = value.as<std::string>();
if (!file_path.empty() && file_path[0] != '/') {
file_path = (config_dir / file_path).string();
}
params.system_prompt = read_file(file_path);
if (!params.system_prompt.empty() && params.system_prompt.back() == '\n') {
params.system_prompt.pop_back();
}
} else if (key == "escape" || key == "e") {
params.escape = value.as<bool>();
} else if (key == "interactive" || key == "i") {
params.interactive = value.as<bool>();
} else if (key == "interactive-first" || key == "if") {
params.interactive_first = value.as<bool>();
} else if (key == "multiline-input" || key == "mli") {
params.multiline_input = value.as<bool>();
} else if (key == "color" || key == "co") {
params.use_color = value.as<bool>();
} else if (key == "verbose-prompt") {
params.verbose_prompt = value.as<bool>();
} else if (key == "no-display-prompt") {
params.display_prompt = !value.as<bool>();
} else if (key == "conversation" || key == "cnv") {
if (value.as<bool>()) {
params.conversation_mode = COMMON_CONVERSATION_MODE_ENABLED;
}
} else if (key == "no-conversation" || key == "no-cnv") {
if (value.as<bool>()) {
params.conversation_mode = COMMON_CONVERSATION_MODE_DISABLED;
}
} else if (key == "single-turn" || key == "st") {
params.single_turn = value.as<bool>();
} else if (key == "special" || key == "sp") {
params.special = value.as<bool>();
} else if (key == "flash-attn" || key == "fa") {
std::string fa_value = value.as<std::string>();
if (fa_value == "on" || fa_value == "enabled" || fa_value == "1") {
params.flash_attn_type = LLAMA_FLASH_ATTN_TYPE_ENABLED;
} else if (fa_value == "off" || fa_value == "disabled" || fa_value == "0") {
params.flash_attn_type = LLAMA_FLASH_ATTN_TYPE_DISABLED;
} else if (fa_value == "auto" || fa_value == "-1") {
params.flash_attn_type = LLAMA_FLASH_ATTN_TYPE_AUTO;
}
} else if (key == "no-perf") {
if (value.as<bool>()) {
params.no_perf = true;
params.sampling.no_perf = true;
}
} else if (key == "ignore-eos") {
params.sampling.ignore_eos = value.as<bool>();
} else if (key == "no-warmup") {
params.warmup = !value.as<bool>();
} else if (key == "spm-infill") {
params.spm_infill = value.as<bool>();
} else if (key == "samplers") {
std::string samplers_str = value.as<std::string>();
const auto sampler_names = string_split<std::string>(samplers_str, ';');
params.sampling.samplers = common_sampler_types_from_names(sampler_names, true);
} else if (key == "sampling-seq" || key == "sampler-seq") {
std::string seq = value.as<std::string>();
params.sampling.samplers = common_sampler_types_from_chars(seq);
}
}

return true;
} catch (const YAML::Exception& e) {
fprintf(stderr, "YAML parsing error: %s\n", e.what());
return false;
} catch (const std::exception& e) {
fprintf(stderr, "Error loading YAML config: %s\n", e.what());
return false;
}
}

std::vector<std::string> common_params_get_valid_yaml_keys() {
return {
"model", "m",
"threads", "t",
"threads-batch", "tb",
"ctx-size", "c",
"batch-size", "b",
"ubatch-size", "ub",
"predict", "n",
"keep",
"seed", "s",
"temp",
"top-k",
"top-p",
"min-p",
"prompt", "p",
"file", "f",
"system-prompt", "sys",
"system-prompt-file", "sysf",
"escape", "e",
"interactive", "i",
"interactive-first", "if",
"multiline-input", "mli",
"color", "co",
"verbose-prompt",
"no-display-prompt",
"conversation", "cnv",
"no-conversation", "no-cnv",
"single-turn", "st",
"special", "sp",
"flash-attn", "fa",
"no-perf",
"ignore-eos",
"no-warmup",
"spm-infill",
"samplers",
"sampling-seq", "sampler-seq"
};
}
#else
bool common_params_load_yaml_config(const std::string & config_path, common_params & params) {
fprintf(stderr, "YAML config support not available (yaml-cpp not found during build)\n");
return false;
}

std::vector<std::string> common_params_get_valid_yaml_keys() {
return {};
}
#endif

static std::string list_builtin_chat_templates() {
std::vector<const char *> supported_tmpl;
int32_t res = llama_chat_builtin_templates(nullptr, 0);
Expand Down Expand Up @@ -1294,6 +1500,15 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
};


add_opt(common_arg(
{"--config"}, "FNAME",
"path to YAML config file",
[](common_params & params, const std::string & value) {
if (!common_params_load_yaml_config(value, params)) {
throw std::runtime_error("Failed to load YAML config file: " + value);
}
}
));
add_opt(common_arg(
{"-h", "--help", "--usage"},
"print usage and exit",
Expand Down
3 changes: 3 additions & 0 deletions common/arg.h
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,9 @@ struct common_params_context {
common_params_context(common_params & params) : params(params) {}
};

bool common_params_load_yaml_config(const std::string & config_path, common_params & params);
std::vector<std::string> common_params_get_valid_yaml_keys();

// parse input arguments from CLI
// if one argument has invalid value, it will automatically display usage of the specific argument (and not the full usage message)
bool common_params_parse(int argc, char ** argv, common_params & params, llama_example ex, void(*print_usage)(int, char **) = nullptr);
Expand Down
11 changes: 11 additions & 0 deletions configs/minimal.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@

model: "../models/tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf"

predict: 128
ctx-size: 2048
batch-size: 512

seed: 42
temp: 0.7
top-k: 40
top-p: 0.9
35 changes: 35 additions & 0 deletions configs/override.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@

model: "../models/tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf"
ctx-size: 4096
batch-size: 1024
ubatch-size: 256

threads: 8
threads-batch: 8
no-perf: false
no-warmup: false
flash-attn: "auto"

predict: 256
keep: 0

seed: 1337
temp: 0.8
top-k: 50
top-p: 0.95
min-p: 0.05
samplers: "top_k;top_p;min_p;temp"
ignore-eos: false

prompt: "Explain the concept of machine learning in simple terms."
escape: true
special: false
color: true
verbose-prompt: false
no-display-prompt: false

interactive: false
interactive-first: false
multiline-input: false
conversation: false
single-turn: false
Loading