COG-GTM · devin-ai-integration · Sep 3, 2025 · Sep 3, 2025 · Sep 3, 2025 · Sep 3, 2025
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -87,6 +87,28 @@ option(LLAMA_BUILD_SERVER   "llama: build server example" ${LLAMA_STANDALONE})
 option(LLAMA_CURL       "llama: use libcurl to download model from an URL" ON)
 option(LLAMA_LLGUIDANCE "llama-common: include LLGuidance library for structured output in common utils" OFF)
 
+# Find yaml-cpp for YAML config support
+find_package(PkgConfig QUIET)
+if(PkgConfig_FOUND)
+    pkg_check_modules(YAML_CPP QUIET yaml-cpp)
+endif()
+
+if(NOT YAML_CPP_FOUND)
+    find_package(yaml-cpp QUIET)
+    if(yaml-cpp_FOUND)
+        set(YAML_CPP_LIBRARIES yaml-cpp)
+        set(YAML_CPP_FOUND TRUE)
+    endif()
+endif()
+
+if(NOT YAML_CPP_FOUND)
+    message(STATUS "yaml-cpp not found, YAML config support will be disabled")
+    set(LLAMA_YAML_CONFIG OFF)
+else()
+    message(STATUS "Found yaml-cpp, enabling YAML config support")
+    set(LLAMA_YAML_CONFIG ON)
+endif()
+
 # Required for relocatable CMake package
 include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/build-info.cmake)
 include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/common.cmake)

diff --git a/README.md b/README.md
@@ -326,6 +326,52 @@ To learn more about model quantization, [read this documentation](tools/quantize
 
     </details>
 
+- <details>
+    <summary>Run with YAML configuration</summary>
+
+    llama.cpp supports loading configuration from YAML files using the `--config` flag. This allows you to organize complex configurations and share them easily.
+
+    ```bash
+    # Use YAML config
+    llama-cli --config configs/minimal.yaml
+
+    # Combine YAML with flag overrides
+    llama-cli --config configs/base.yaml --temp 0.5 --predict 256
+    ```
+
+    **Precedence rules:**
+    - Command line flags override YAML config values
+    - YAML config values override default values
+    - Order: `flags > yaml > defaults`
+
+    **Example YAML config:**
+    ```yaml
+    # Basic model and generation settings
+    model: "models/my-model.gguf"
+    ctx-size: 2048
+    predict: 128
+    seed: 42
+
+    # Sampling parameters
+    temp: 0.7
+    top-k: 40
+    top-p: 0.9
+
+    # Input/output
+    prompt: "Hello, how are you?"
+    color: true
+    ```
+
+    **Key features:**
+    - Relative paths in YAML are resolved relative to the config file location
+    - Unknown YAML keys are rejected with helpful error messages
+    - All CLI flags have corresponding YAML keys (use long flag names without `--`)
+    - Boolean flags can be set as `true`/`false` in YAML
+
+    See `configs/minimal.yaml` and `configs/override.yaml` for example configurations.
+
+    </details>
+
 - <details>
     <summary>Run in conversation mode with custom chat template</summary>
 

diff --git a/common/CMakeLists.txt b/common/CMakeLists.txt
@@ -133,6 +133,14 @@ if (LLAMA_LLGUIDANCE)
     set(LLAMA_COMMON_EXTRA_LIBS ${LLAMA_COMMON_EXTRA_LIBS} llguidance ${LLGUIDANCE_PLATFORM_LIBS})
 endif ()
 
+if(LLAMA_YAML_CONFIG AND YAML_CPP_FOUND)
+    target_compile_definitions(${TARGET} PRIVATE LLAMA_YAML_CONFIG)
+    target_link_libraries(${TARGET} PRIVATE ${YAML_CPP_LIBRARIES})
+    if(YAML_CPP_INCLUDE_DIRS)
+        target_include_directories(${TARGET} PRIVATE ${YAML_CPP_INCLUDE_DIRS})
+    endif()
+endif()
+
 target_include_directories(${TARGET} PUBLIC . ../vendor)
 target_compile_features   (${TARGET} PUBLIC cxx_std_17)
 target_link_libraries     (${TARGET} PRIVATE ${LLAMA_COMMON_EXTRA_LIBS} PUBLIC llama Threads::Threads)

diff --git a/common/arg.cpp b/common/arg.cpp
@@ -19,6 +19,10 @@
 #define JSON_ASSERT GGML_ASSERT
 #include <nlohmann/json.hpp>
 
+#ifdef LLAMA_YAML_CONFIG
+#include <yaml-cpp/yaml.h>
+#endif
+
 #include <algorithm>
 #include <climits>
 #include <cstdarg>
@@ -1251,6 +1255,208 @@ bool common_params_parse(int argc, char ** argv, common_params & params, llama_e
     return true;
 }
 
+#ifdef LLAMA_YAML_CONFIG
+bool common_params_load_yaml_config(const std::string & config_path, common_params & params) {
+    try {
+        YAML::Node config = YAML::LoadFile(config_path);
+
+        std::filesystem::path config_dir = std::filesystem::path(config_path).parent_path();
+
+        std::vector<std::string> valid_keys = common_params_get_valid_yaml_keys();
+        std::set<std::string> valid_keys_set(valid_keys.begin(), valid_keys.end());
+
+        for (const auto& kv : config) {
+            std::string key = kv.first.as<std::string>();
+
+            if (valid_keys_set.find(key) == valid_keys_set.end()) {
+                std::ostringstream oss;
+                oss << "Unknown YAML key: '" << key << "'\n";
+                oss << "Valid keys are: ";
+                for (size_t i = 0; i < valid_keys.size(); ++i) {
+                    oss << valid_keys[i];
+                    if (i < valid_keys.size() - 1) oss << ", ";
+                }
+                throw std::runtime_error(oss.str());
+            }
+
+            YAML::Node value = kv.second;
+
+            if (key == "model" || key == "m") {
+                std::string model_path = value.as<std::string>();
+                if (!model_path.empty() && model_path[0] != '/') {
+                    model_path = (config_dir / model_path).string();
+                }
+                params.model.path = model_path;
+            } else if (key == "threads" || key == "t") {
+                params.cpuparams.n_threads = value.as<int>();
+                if (params.cpuparams.n_threads <= 0) {
+                    params.cpuparams.n_threads = std::thread::hardware_concurrency();
+                }
+            } else if (key == "threads-batch" || key == "tb") {
+                params.cpuparams_batch.n_threads = value.as<int>();
+                if (params.cpuparams_batch.n_threads <= 0) {
+                    params.cpuparams_batch.n_threads = std::thread::hardware_concurrency();
+                }
+            } else if (key == "ctx-size" || key == "c") {
+                params.n_ctx = value.as<int>();
+            } else if (key == "batch-size" || key == "b") {
+                params.n_batch = value.as<int>();
+            } else if (key == "ubatch-size" || key == "ub") {
+                params.n_ubatch = value.as<int>();
+            } else if (key == "predict" || key == "n") {
+                params.n_predict = value.as<int>();
+            } else if (key == "keep") {
+                params.n_keep = value.as<int>();
+            } else if (key == "seed" || key == "s") {
+                params.sampling.seed = value.as<uint32_t>();
+            } else if (key == "temp") {
+                params.sampling.temp = value.as<float>();
+                params.sampling.temp = std::max(params.sampling.temp, 0.0f);
+            } else if (key == "top-k") {
+                params.sampling.top_k = value.as<int>();
+            } else if (key == "top-p") {
+                params.sampling.top_p = value.as<float>();
+            } else if (key == "min-p") {
+                params.sampling.min_p = value.as<float>();
+            } else if (key == "prompt" || key == "p") {
+                params.prompt = value.as<std::string>();
+            } else if (key == "file" || key == "f") {
+                std::string file_path = value.as<std::string>();
+                if (!file_path.empty() && file_path[0] != '/') {
+                    file_path = (config_dir / file_path).string();
+                }
+                params.prompt = read_file(file_path);
+                params.prompt_file = file_path;
+                if (!params.prompt.empty() && params.prompt.back() == '\n') {
+                    params.prompt.pop_back();
+                }
+            } else if (key == "system-prompt" || key == "sys") {
+                params.system_prompt = value.as<std::string>();
+            } else if (key == "system-prompt-file" || key == "sysf") {
+                std::string file_path = value.as<std::string>();
+                if (!file_path.empty() && file_path[0] != '/') {
+                    file_path = (config_dir / file_path).string();
+                }
+                params.system_prompt = read_file(file_path);
+                if (!params.system_prompt.empty() && params.system_prompt.back() == '\n') {
+                    params.system_prompt.pop_back();
+                }
+            } else if (key == "escape" || key == "e") {
+                params.escape = value.as<bool>();
+            } else if (key == "interactive" || key == "i") {
+                params.interactive = value.as<bool>();
+            } else if (key == "interactive-first" || key == "if") {
+                params.interactive_first = value.as<bool>();
+            } else if (key == "multiline-input" || key == "mli") {
+                params.multiline_input = value.as<bool>();
+            } else if (key == "color" || key == "co") {
+                params.use_color = value.as<bool>();
+            } else if (key == "verbose-prompt") {
+                params.verbose_prompt = value.as<bool>();
+            } else if (key == "no-display-prompt") {
+                params.display_prompt = !value.as<bool>();
+            } else if (key == "conversation" || key == "cnv") {
+                if (value.as<bool>()) {
+                    params.conversation_mode = COMMON_CONVERSATION_MODE_ENABLED;
+                }
+            } else if (key == "no-conversation" || key == "no-cnv") {
+                if (value.as<bool>()) {
+                    params.conversation_mode = COMMON_CONVERSATION_MODE_DISABLED;
+                }
+            } else if (key == "single-turn" || key == "st") {
+                params.single_turn = value.as<bool>();
+            } else if (key == "special" || key == "sp") {
+                params.special = value.as<bool>();
+            } else if (key == "flash-attn" || key == "fa") {
+                std::string fa_value = value.as<std::string>();
+                if (fa_value == "on" || fa_value == "enabled" || fa_value == "1") {
+                    params.flash_attn_type = LLAMA_FLASH_ATTN_TYPE_ENABLED;
+                } else if (fa_value == "off" || fa_value == "disabled" || fa_value == "0") {
+                    params.flash_attn_type = LLAMA_FLASH_ATTN_TYPE_DISABLED;
+                } else if (fa_value == "auto" || fa_value == "-1") {
+                    params.flash_attn_type = LLAMA_FLASH_ATTN_TYPE_AUTO;
+                }
+            } else if (key == "no-perf") {
+                if (value.as<bool>()) {
+                    params.no_perf = true;
+                    params.sampling.no_perf = true;
+                }
+            } else if (key == "ignore-eos") {
+                params.sampling.ignore_eos = value.as<bool>();
+            } else if (key == "no-warmup") {
+                params.warmup = !value.as<bool>();
+            } else if (key == "spm-infill") {
+                params.spm_infill = value.as<bool>();
+            } else if (key == "samplers") {
+                std::string samplers_str = value.as<std::string>();
+                const auto sampler_names = string_split<std::string>(samplers_str, ';');
+                params.sampling.samplers = common_sampler_types_from_names(sampler_names, true);
+            } else if (key == "sampling-seq" || key == "sampler-seq") {
+                std::string seq = value.as<std::string>();
+                params.sampling.samplers = common_sampler_types_from_chars(seq);
+            }
+        }
+
+        return true;
+    } catch (const YAML::Exception& e) {
+        fprintf(stderr, "YAML parsing error: %s\n", e.what());
+        return false;
+    } catch (const std::exception& e) {
+        fprintf(stderr, "Error loading YAML config: %s\n", e.what());
+        return false;
+    }
+}
+
+std::vector<std::string> common_params_get_valid_yaml_keys() {
+    return {
+        "model", "m",
+        "threads", "t", 
+        "threads-batch", "tb",
+        "ctx-size", "c",
+        "batch-size", "b",
+        "ubatch-size", "ub", 
+        "predict", "n",
+        "keep",
+        "seed", "s",
+        "temp",
+        "top-k",
+        "top-p", 
+        "min-p",
+        "prompt", "p",
+        "file", "f",
+        "system-prompt", "sys",
+        "system-prompt-file", "sysf",
+        "escape", "e",
+        "interactive", "i",
+        "interactive-first", "if",
+        "multiline-input", "mli",
+        "color", "co",
+        "verbose-prompt",
+        "no-display-prompt",
+        "conversation", "cnv",
+        "no-conversation", "no-cnv",
+        "single-turn", "st",
+        "special", "sp",
+        "flash-attn", "fa",
+        "no-perf",
+        "ignore-eos",
+        "no-warmup",
+        "spm-infill",
+        "samplers",
+        "sampling-seq", "sampler-seq"
+    };
+}
+#else
+bool common_params_load_yaml_config(const std::string & config_path, common_params & params) {
+    fprintf(stderr, "YAML config support not available (yaml-cpp not found during build)\n");
+    return false;
+}
+
+std::vector<std::string> common_params_get_valid_yaml_keys() {
+    return {};
+}
+#endif
+
 static std::string list_builtin_chat_templates() {
     std::vector<const char *> supported_tmpl;
     int32_t res = llama_chat_builtin_templates(nullptr, 0);
@@ -1294,6 +1500,15 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
     };
 
 
+    add_opt(common_arg(
+        {"--config"}, "FNAME",
+        "path to YAML config file",
+        [](common_params & params, const std::string & value) {
+            if (!common_params_load_yaml_config(value, params)) {
+                throw std::runtime_error("Failed to load YAML config file: " + value);
+            }
+        }
+    ));
     add_opt(common_arg(
         {"-h", "--help", "--usage"},
         "print usage and exit",

diff --git a/common/arg.h b/common/arg.h
@@ -72,6 +72,9 @@ struct common_params_context {
     common_params_context(common_params & params) : params(params) {}
 };
 
+bool common_params_load_yaml_config(const std::string & config_path, common_params & params);
+std::vector<std::string> common_params_get_valid_yaml_keys();
+
 // parse input arguments from CLI
 // if one argument has invalid value, it will automatically display usage of the specific argument (and not the full usage message)
 bool common_params_parse(int argc, char ** argv, common_params & params, llama_example ex, void(*print_usage)(int, char **) = nullptr);

diff --git a/configs/minimal.yaml b/configs/minimal.yaml
@@ -0,0 +1,11 @@
+
+model: "../models/tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf"
+
+predict: 128
+ctx-size: 2048
+batch-size: 512
+
+seed: 42
+temp: 0.7
+top-k: 40
+top-p: 0.9
diff --git a/configs/override.yaml b/configs/override.yaml
@@ -0,0 +1,35 @@
+
+model: "../models/tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf"
+ctx-size: 4096
+batch-size: 1024
+ubatch-size: 256
+
+threads: 8
+threads-batch: 8
+no-perf: false
+no-warmup: false
+flash-attn: "auto"
+
+predict: 256
+keep: 0
+
+seed: 1337
+temp: 0.8
+top-k: 50
+top-p: 0.95
+min-p: 0.05
+samplers: "top_k;top_p;min_p;temp"
+ignore-eos: false
+
+prompt: "Explain the concept of machine learning in simple terms."
+escape: true
+special: false
+color: true
+verbose-prompt: false
+no-display-prompt: false
+
+interactive: false
+interactive-first: false
+multiline-input: false
+conversation: false
+single-turn: false