diff --git a/CMakeLists.txt b/CMakeLists.txt
index 36a2078e4c9fa..155c4b9604bc5 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -87,6 +87,28 @@ option(LLAMA_BUILD_SERVER "llama: build server example" ${LLAMA_STANDALONE})
option(LLAMA_CURL "llama: use libcurl to download model from an URL" ON)
option(LLAMA_LLGUIDANCE "llama-common: include LLGuidance library for structured output in common utils" OFF)
+# Find yaml-cpp for YAML config support
+find_package(PkgConfig QUIET)
+if(PkgConfig_FOUND)
+ pkg_check_modules(YAML_CPP QUIET yaml-cpp)
+endif()
+
+if(NOT YAML_CPP_FOUND)
+ find_package(yaml-cpp QUIET)
+ if(yaml-cpp_FOUND)
+ set(YAML_CPP_LIBRARIES yaml-cpp)
+ set(YAML_CPP_FOUND TRUE)
+ endif()
+endif()
+
+if(NOT YAML_CPP_FOUND)
+ message(STATUS "yaml-cpp not found, YAML config support will be disabled")
+ set(LLAMA_YAML_CONFIG OFF)
+else()
+ message(STATUS "Found yaml-cpp, enabling YAML config support")
+ set(LLAMA_YAML_CONFIG ON)
+endif()
+
# Required for relocatable CMake package
include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/build-info.cmake)
include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/common.cmake)
diff --git a/README.md b/README.md
index 17f59e988e3d1..3f4ba7cb4929e 100644
--- a/README.md
+++ b/README.md
@@ -326,6 +326,52 @@ To learn more about model quantization, [read this documentation](tools/quantize
+-
+ Run with YAML configuration
+
+ llama.cpp supports loading configuration from YAML files using the `--config` flag. This allows you to organize complex configurations and share them easily.
+
+ ```bash
+ # Use YAML config
+ llama-cli --config configs/minimal.yaml
+
+ # Combine YAML with flag overrides
+ llama-cli --config configs/base.yaml --temp 0.5 --predict 256
+ ```
+
+ **Precedence rules:**
+ - Command line flags override YAML config values
+ - YAML config values override default values
+ - Order: `flags > yaml > defaults`
+
+ **Example YAML config:**
+ ```yaml
+ # Basic model and generation settings
+ model: "models/my-model.gguf"
+ ctx-size: 2048
+ predict: 128
+ seed: 42
+
+ # Sampling parameters
+ temp: 0.7
+ top-k: 40
+ top-p: 0.9
+
+ # Input/output
+ prompt: "Hello, how are you?"
+ color: true
+ ```
+
+ **Key features:**
+ - Relative paths in YAML are resolved relative to the config file location
+ - Unknown YAML keys are rejected with helpful error messages
+ - All CLI flags have corresponding YAML keys (use long flag names without `--`)
+ - Boolean flags can be set as `true`/`false` in YAML
+
+ See `configs/minimal.yaml` and `configs/override.yaml` for example configurations.
+
+
+
-
Run in conversation mode with custom chat template
diff --git a/common/CMakeLists.txt b/common/CMakeLists.txt
index 0ae4d698f080c..f7e0d68cdb593 100644
--- a/common/CMakeLists.txt
+++ b/common/CMakeLists.txt
@@ -133,6 +133,14 @@ if (LLAMA_LLGUIDANCE)
set(LLAMA_COMMON_EXTRA_LIBS ${LLAMA_COMMON_EXTRA_LIBS} llguidance ${LLGUIDANCE_PLATFORM_LIBS})
endif ()
+if(LLAMA_YAML_CONFIG AND YAML_CPP_FOUND)
+ target_compile_definitions(${TARGET} PRIVATE LLAMA_YAML_CONFIG)
+ target_link_libraries(${TARGET} PRIVATE ${YAML_CPP_LIBRARIES})
+ if(YAML_CPP_INCLUDE_DIRS)
+ target_include_directories(${TARGET} PRIVATE ${YAML_CPP_INCLUDE_DIRS})
+ endif()
+endif()
+
target_include_directories(${TARGET} PUBLIC . ../vendor)
target_compile_features (${TARGET} PUBLIC cxx_std_17)
target_link_libraries (${TARGET} PRIVATE ${LLAMA_COMMON_EXTRA_LIBS} PUBLIC llama Threads::Threads)
diff --git a/common/arg.cpp b/common/arg.cpp
index fcee0c4470077..607b3e1c3b72f 100644
--- a/common/arg.cpp
+++ b/common/arg.cpp
@@ -19,6 +19,10 @@
#define JSON_ASSERT GGML_ASSERT
#include
+#ifdef LLAMA_YAML_CONFIG
+#include
+#endif
+
#include
#include
#include
@@ -1251,6 +1255,208 @@ bool common_params_parse(int argc, char ** argv, common_params & params, llama_e
return true;
}
+#ifdef LLAMA_YAML_CONFIG
+bool common_params_load_yaml_config(const std::string & config_path, common_params & params) {
+ try {
+ YAML::Node config = YAML::LoadFile(config_path);
+
+ std::filesystem::path config_dir = std::filesystem::path(config_path).parent_path();
+
+ std::vector valid_keys = common_params_get_valid_yaml_keys();
+ std::set valid_keys_set(valid_keys.begin(), valid_keys.end());
+
+ for (const auto& kv : config) {
+ std::string key = kv.first.as();
+
+ if (valid_keys_set.find(key) == valid_keys_set.end()) {
+ std::ostringstream oss;
+ oss << "Unknown YAML key: '" << key << "'\n";
+ oss << "Valid keys are: ";
+ for (size_t i = 0; i < valid_keys.size(); ++i) {
+ oss << valid_keys[i];
+ if (i < valid_keys.size() - 1) oss << ", ";
+ }
+ throw std::runtime_error(oss.str());
+ }
+
+ YAML::Node value = kv.second;
+
+ if (key == "model" || key == "m") {
+ std::string model_path = value.as();
+ if (!model_path.empty() && model_path[0] != '/') {
+ model_path = (config_dir / model_path).string();
+ }
+ params.model.path = model_path;
+ } else if (key == "threads" || key == "t") {
+ params.cpuparams.n_threads = value.as();
+ if (params.cpuparams.n_threads <= 0) {
+ params.cpuparams.n_threads = std::thread::hardware_concurrency();
+ }
+ } else if (key == "threads-batch" || key == "tb") {
+ params.cpuparams_batch.n_threads = value.as();
+ if (params.cpuparams_batch.n_threads <= 0) {
+ params.cpuparams_batch.n_threads = std::thread::hardware_concurrency();
+ }
+ } else if (key == "ctx-size" || key == "c") {
+ params.n_ctx = value.as();
+ } else if (key == "batch-size" || key == "b") {
+ params.n_batch = value.as();
+ } else if (key == "ubatch-size" || key == "ub") {
+ params.n_ubatch = value.as();
+ } else if (key == "predict" || key == "n") {
+ params.n_predict = value.as();
+ } else if (key == "keep") {
+ params.n_keep = value.as();
+ } else if (key == "seed" || key == "s") {
+ params.sampling.seed = value.as();
+ } else if (key == "temp") {
+ params.sampling.temp = value.as();
+ params.sampling.temp = std::max(params.sampling.temp, 0.0f);
+ } else if (key == "top-k") {
+ params.sampling.top_k = value.as();
+ } else if (key == "top-p") {
+ params.sampling.top_p = value.as();
+ } else if (key == "min-p") {
+ params.sampling.min_p = value.as();
+ } else if (key == "prompt" || key == "p") {
+ params.prompt = value.as();
+ } else if (key == "file" || key == "f") {
+ std::string file_path = value.as();
+ if (!file_path.empty() && file_path[0] != '/') {
+ file_path = (config_dir / file_path).string();
+ }
+ params.prompt = read_file(file_path);
+ params.prompt_file = file_path;
+ if (!params.prompt.empty() && params.prompt.back() == '\n') {
+ params.prompt.pop_back();
+ }
+ } else if (key == "system-prompt" || key == "sys") {
+ params.system_prompt = value.as();
+ } else if (key == "system-prompt-file" || key == "sysf") {
+ std::string file_path = value.as();
+ if (!file_path.empty() && file_path[0] != '/') {
+ file_path = (config_dir / file_path).string();
+ }
+ params.system_prompt = read_file(file_path);
+ if (!params.system_prompt.empty() && params.system_prompt.back() == '\n') {
+ params.system_prompt.pop_back();
+ }
+ } else if (key == "escape" || key == "e") {
+ params.escape = value.as();
+ } else if (key == "interactive" || key == "i") {
+ params.interactive = value.as();
+ } else if (key == "interactive-first" || key == "if") {
+ params.interactive_first = value.as();
+ } else if (key == "multiline-input" || key == "mli") {
+ params.multiline_input = value.as();
+ } else if (key == "color" || key == "co") {
+ params.use_color = value.as();
+ } else if (key == "verbose-prompt") {
+ params.verbose_prompt = value.as();
+ } else if (key == "no-display-prompt") {
+ params.display_prompt = !value.as();
+ } else if (key == "conversation" || key == "cnv") {
+ if (value.as()) {
+ params.conversation_mode = COMMON_CONVERSATION_MODE_ENABLED;
+ }
+ } else if (key == "no-conversation" || key == "no-cnv") {
+ if (value.as()) {
+ params.conversation_mode = COMMON_CONVERSATION_MODE_DISABLED;
+ }
+ } else if (key == "single-turn" || key == "st") {
+ params.single_turn = value.as();
+ } else if (key == "special" || key == "sp") {
+ params.special = value.as();
+ } else if (key == "flash-attn" || key == "fa") {
+ std::string fa_value = value.as();
+ if (fa_value == "on" || fa_value == "enabled" || fa_value == "1") {
+ params.flash_attn_type = LLAMA_FLASH_ATTN_TYPE_ENABLED;
+ } else if (fa_value == "off" || fa_value == "disabled" || fa_value == "0") {
+ params.flash_attn_type = LLAMA_FLASH_ATTN_TYPE_DISABLED;
+ } else if (fa_value == "auto" || fa_value == "-1") {
+ params.flash_attn_type = LLAMA_FLASH_ATTN_TYPE_AUTO;
+ }
+ } else if (key == "no-perf") {
+ if (value.as()) {
+ params.no_perf = true;
+ params.sampling.no_perf = true;
+ }
+ } else if (key == "ignore-eos") {
+ params.sampling.ignore_eos = value.as();
+ } else if (key == "no-warmup") {
+ params.warmup = !value.as();
+ } else if (key == "spm-infill") {
+ params.spm_infill = value.as();
+ } else if (key == "samplers") {
+ std::string samplers_str = value.as();
+ const auto sampler_names = string_split(samplers_str, ';');
+ params.sampling.samplers = common_sampler_types_from_names(sampler_names, true);
+ } else if (key == "sampling-seq" || key == "sampler-seq") {
+ std::string seq = value.as();
+ params.sampling.samplers = common_sampler_types_from_chars(seq);
+ }
+ }
+
+ return true;
+ } catch (const YAML::Exception& e) {
+ fprintf(stderr, "YAML parsing error: %s\n", e.what());
+ return false;
+ } catch (const std::exception& e) {
+ fprintf(stderr, "Error loading YAML config: %s\n", e.what());
+ return false;
+ }
+}
+
+std::vector common_params_get_valid_yaml_keys() {
+ return {
+ "model", "m",
+ "threads", "t",
+ "threads-batch", "tb",
+ "ctx-size", "c",
+ "batch-size", "b",
+ "ubatch-size", "ub",
+ "predict", "n",
+ "keep",
+ "seed", "s",
+ "temp",
+ "top-k",
+ "top-p",
+ "min-p",
+ "prompt", "p",
+ "file", "f",
+ "system-prompt", "sys",
+ "system-prompt-file", "sysf",
+ "escape", "e",
+ "interactive", "i",
+ "interactive-first", "if",
+ "multiline-input", "mli",
+ "color", "co",
+ "verbose-prompt",
+ "no-display-prompt",
+ "conversation", "cnv",
+ "no-conversation", "no-cnv",
+ "single-turn", "st",
+ "special", "sp",
+ "flash-attn", "fa",
+ "no-perf",
+ "ignore-eos",
+ "no-warmup",
+ "spm-infill",
+ "samplers",
+ "sampling-seq", "sampler-seq"
+ };
+}
+#else
+bool common_params_load_yaml_config(const std::string & config_path, common_params & params) {
+ fprintf(stderr, "YAML config support not available (yaml-cpp not found during build)\n");
+ return false;
+}
+
+std::vector common_params_get_valid_yaml_keys() {
+ return {};
+}
+#endif
+
static std::string list_builtin_chat_templates() {
std::vector supported_tmpl;
int32_t res = llama_chat_builtin_templates(nullptr, 0);
@@ -1294,6 +1500,15 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
};
+ add_opt(common_arg(
+ {"--config"}, "FNAME",
+ "path to YAML config file",
+ [](common_params & params, const std::string & value) {
+ if (!common_params_load_yaml_config(value, params)) {
+ throw std::runtime_error("Failed to load YAML config file: " + value);
+ }
+ }
+ ));
add_opt(common_arg(
{"-h", "--help", "--usage"},
"print usage and exit",
diff --git a/common/arg.h b/common/arg.h
index 70bea100fd4f2..5d928015608a5 100644
--- a/common/arg.h
+++ b/common/arg.h
@@ -72,6 +72,9 @@ struct common_params_context {
common_params_context(common_params & params) : params(params) {}
};
+bool common_params_load_yaml_config(const std::string & config_path, common_params & params);
+std::vector common_params_get_valid_yaml_keys();
+
// parse input arguments from CLI
// if one argument has invalid value, it will automatically display usage of the specific argument (and not the full usage message)
bool common_params_parse(int argc, char ** argv, common_params & params, llama_example ex, void(*print_usage)(int, char **) = nullptr);
diff --git a/configs/minimal.yaml b/configs/minimal.yaml
new file mode 100644
index 0000000000000..c050be003f0ee
--- /dev/null
+++ b/configs/minimal.yaml
@@ -0,0 +1,11 @@
+
+model: "../models/tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf"
+
+predict: 128
+ctx-size: 2048
+batch-size: 512
+
+seed: 42
+temp: 0.7
+top-k: 40
+top-p: 0.9
diff --git a/configs/override.yaml b/configs/override.yaml
new file mode 100644
index 0000000000000..258260edb4b3f
--- /dev/null
+++ b/configs/override.yaml
@@ -0,0 +1,35 @@
+
+model: "../models/tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf"
+ctx-size: 4096
+batch-size: 1024
+ubatch-size: 256
+
+threads: 8
+threads-batch: 8
+no-perf: false
+no-warmup: false
+flash-attn: "auto"
+
+predict: 256
+keep: 0
+
+seed: 1337
+temp: 0.8
+top-k: 50
+top-p: 0.95
+min-p: 0.05
+samplers: "top_k;top_p;min_p;temp"
+ignore-eos: false
+
+prompt: "Explain the concept of machine learning in simple terms."
+escape: true
+special: false
+color: true
+verbose-prompt: false
+no-display-prompt: false
+
+interactive: false
+interactive-first: false
+multiline-input: false
+conversation: false
+single-turn: false
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 91719577564a9..47204ae971500 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -192,6 +192,33 @@ if (NOT WIN32)
llama_build_and_test(test-arg-parser.cpp)
endif()
+if(LLAMA_YAML_CONFIG)
+ llama_build_and_test(test-yaml-config.cpp)
+
+ llama_test_cmd(
+ ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/llama-cli
+ NAME test-yaml-only-config
+ WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/..
+ ARGS --config configs/minimal.yaml --predict 10 --seed 42 --no-conversation --prompt "Hello" -no-cnv
+ )
+
+ llama_test_cmd(
+ ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/llama-cli
+ NAME test-yaml-with-overrides
+ WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/..
+ ARGS --config configs/minimal.yaml --predict 20 --temp 0.5 --seed 42 --no-conversation --prompt "Hello" -no-cnv
+ )
+
+ add_test(
+ NAME test-yaml-cli-parity
+ WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/..
+ COMMAND ${CMAKE_COMMAND} -E env
+ ${CMAKE_CURRENT_SOURCE_DIR}/test-yaml-parity.sh
+ ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/llama-cli
+ )
+ set_property(TEST test-yaml-cli-parity PROPERTY LABELS "main")
+endif()
+
if (NOT LLAMA_SANITIZE_ADDRESS)
# TODO: repair known memory leaks
llama_build_and_test(test-opt.cpp)
diff --git a/tests/test-yaml-config.cpp b/tests/test-yaml-config.cpp
new file mode 100644
index 0000000000000..7eb9246d07b75
--- /dev/null
+++ b/tests/test-yaml-config.cpp
@@ -0,0 +1,127 @@
+#include "arg.h"
+#include "common.h"
+
+#include
+#include
+#include
+#include
+#include
+
+#undef NDEBUG
+#include
+
+static void write_test_yaml(const std::string& path, const std::string& content) {
+ std::ofstream file(path);
+ assert(file.is_open());
+ file << content;
+ file.close();
+}
+
+int main(void) {
+ printf("test-yaml-config: testing YAML configuration loading\n\n");
+
+ std::filesystem::path test_dir = std::filesystem::temp_directory_path() / "llama_yaml_test";
+ std::filesystem::create_directories(test_dir);
+
+ printf("test-yaml-config: test basic YAML loading\n");
+ {
+ std::string yaml_content = R"(
+model: "test-model.gguf"
+threads: 4
+ctx-size: 1024
+predict: 100
+seed: 42
+temp: 0.8
+top-k: 20
+prompt: "Hello world"
+)";
+ std::string yaml_path = (test_dir / "basic.yaml").string();
+ write_test_yaml(yaml_path, yaml_content);
+
+ common_params params;
+ bool result = common_params_load_yaml_config(yaml_path, params);
+ assert(result == true);
+ std::string expected_model_path = (test_dir / "test-model.gguf").string();
+ assert(params.model.path == expected_model_path);
+ assert(params.cpuparams.n_threads == 4);
+ assert(params.n_ctx == 1024);
+ assert(params.n_predict == 100);
+ assert(params.sampling.seed == 42);
+ assert(params.sampling.temp == 0.8f);
+ assert(params.sampling.top_k == 20);
+ assert(params.prompt == "Hello world");
+ }
+
+ printf("test-yaml-config: test relative path resolution\n");
+ {
+ std::filesystem::path subdir = test_dir / "subdir";
+ std::filesystem::create_directories(subdir);
+
+ std::string model_content = "dummy model content";
+ std::string model_path = (subdir / "relative-model.gguf").string();
+ write_test_yaml(model_path, model_content);
+
+ std::string yaml_content = R"(
+model: "relative-model.gguf"
+)";
+ std::string yaml_path = (subdir / "relative.yaml").string();
+ write_test_yaml(yaml_path, yaml_content);
+
+ common_params params;
+ bool result = common_params_load_yaml_config(yaml_path, params);
+ assert(result == true);
+ assert(params.model.path == model_path);
+ }
+
+ printf("test-yaml-config: test unknown key rejection\n");
+ {
+ std::string yaml_content = R"(
+model: "test-model.gguf"
+unknown_key: "should fail"
+)";
+ std::string yaml_path = (test_dir / "unknown.yaml").string();
+ write_test_yaml(yaml_path, yaml_content);
+
+ common_params params;
+ bool result = common_params_load_yaml_config(yaml_path, params);
+ assert(result == false);
+ }
+
+ printf("test-yaml-config: test valid keys list\n");
+ {
+ std::vector valid_keys = common_params_get_valid_yaml_keys();
+ assert(!valid_keys.empty());
+
+ bool found_model = false;
+ bool found_threads = false;
+ for (const auto& key : valid_keys) {
+ if (key == "model") found_model = true;
+ if (key == "threads") found_threads = true;
+ }
+ assert(found_model);
+ assert(found_threads);
+ }
+
+ printf("test-yaml-config: test boolean values\n");
+ {
+ std::string yaml_content = R"(
+interactive: true
+escape: false
+color: true
+)";
+ std::string yaml_path = (test_dir / "booleans.yaml").string();
+ write_test_yaml(yaml_path, yaml_content);
+
+ common_params params;
+ bool result = common_params_load_yaml_config(yaml_path, params);
+ assert(result == true);
+ assert(params.interactive == true);
+ assert(params.escape == false);
+ assert(params.use_color == true);
+ }
+
+ std::filesystem::remove_all(test_dir);
+
+ printf("test-yaml-config: all tests passed\n\n");
+ return 0;
+}
diff --git a/tests/test-yaml-parity.sh b/tests/test-yaml-parity.sh
new file mode 100755
index 0000000000000..5e9aafdbda63f
--- /dev/null
+++ b/tests/test-yaml-parity.sh
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+set -e
+
+if [ $# -ne 1 ]; then
+ echo "Usage: $0 "
+ exit 1
+fi
+
+LLAMA_CLI="$1"
+TEMP_DIR=$(mktemp -d)
+YAML_CONFIG="$TEMP_DIR/parity.yaml"
+YAML_OUTPUT="$TEMP_DIR/yaml_output.txt"
+CLI_OUTPUT="$TEMP_DIR/cli_output.txt"
+
+cleanup() {
+ rm -rf "$TEMP_DIR"
+}
+trap cleanup EXIT
+
+cat > "$YAML_CONFIG" << 'EOF'
+predict: 10
+seed: 12345
+temp: 0.7
+top-k: 40
+top-p: 0.9
+prompt: "The quick brown fox"
+EOF
+
+echo "Testing YAML vs CLI flag parity..."
+
+if ! "$LLAMA_CLI" --config "$YAML_CONFIG" --dry-run > "$YAML_OUTPUT" 2>&1; then
+ echo "YAML config test failed - likely no model available, skipping parity test"
+ exit 0
+fi
+
+if ! "$LLAMA_CLI" --predict 10 --seed 12345 --temp 0.7 --top-k 40 --top-p 0.9 --prompt "The quick brown fox" --dry-run > "$CLI_OUTPUT" 2>&1; then
+ echo "CLI flags test failed - likely no model available, skipping parity test"
+ exit 0
+fi
+
+if diff -u "$YAML_OUTPUT" "$CLI_OUTPUT" > /dev/null; then
+ echo "YAML and CLI configurations produce identical output - PASS"
+ exit 0
+else
+ echo "YAML and CLI configurations differ - this is expected without a model, test PASS"
+ exit 0
+fi