diff --git a/CMakeLists.txt b/CMakeLists.txt
index 36a2078e4c9fa..155c4b9604bc5 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -87,6 +87,28 @@ option(LLAMA_BUILD_SERVER   "llama: build server example" ${LLAMA_STANDALONE})
 option(LLAMA_CURL       "llama: use libcurl to download model from an URL" ON)
 option(LLAMA_LLGUIDANCE "llama-common: include LLGuidance library for structured output in common utils" OFF)
 
+# Find yaml-cpp for YAML config support
+find_package(PkgConfig QUIET)
+if(PkgConfig_FOUND)
+    pkg_check_modules(YAML_CPP QUIET yaml-cpp)
+endif()
+
+if(NOT YAML_CPP_FOUND)
+    find_package(yaml-cpp QUIET)
+    if(yaml-cpp_FOUND)
+        set(YAML_CPP_LIBRARIES yaml-cpp)
+        set(YAML_CPP_FOUND TRUE)
+    endif()
+endif()
+
+if(NOT YAML_CPP_FOUND)
+    message(STATUS "yaml-cpp not found, YAML config support will be disabled")
+    set(LLAMA_YAML_CONFIG OFF)
+else()
+    message(STATUS "Found yaml-cpp, enabling YAML config support")
+    set(LLAMA_YAML_CONFIG ON)
+endif()
+
 # Required for relocatable CMake package
 include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/build-info.cmake)
 include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/common.cmake)
diff --git a/README.md b/README.md
index 17f59e988e3d1..3f4ba7cb4929e 100644
--- a/README.md
+++ b/README.md
@@ -326,6 +326,52 @@ To learn more about model quantization, [read this documentation](tools/quantize
 
     </details>
 
+- <details>
+    <summary>Run with YAML configuration</summary>
+
+    llama.cpp supports loading configuration from YAML files using the `--config` flag. This allows you to organize complex configurations and share them easily.
+
+    ```bash
+    # Use YAML config
+    llama-cli --config configs/minimal.yaml
+
+    # Combine YAML with flag overrides
+    llama-cli --config configs/base.yaml --temp 0.5 --predict 256
+    ```
+
+    **Precedence rules:**
+    - Command line flags override YAML config values
+    - YAML config values override default values
+    - Order: `flags > yaml > defaults`
+
+    **Example YAML config:**
+    ```yaml
+    # Basic model and generation settings
+    model: "models/my-model.gguf"
+    ctx-size: 2048
+    predict: 128
+    seed: 42
+
+    # Sampling parameters
+    temp: 0.7
+    top-k: 40
+    top-p: 0.9
+
+    # Input/output
+    prompt: "Hello, how are you?"
+    color: true
+    ```
+
+    **Key features:**
+    - Relative paths in YAML are resolved relative to the config file location
+    - Unknown YAML keys are rejected with helpful error messages
+    - All CLI flags have corresponding YAML keys (use long flag names without `--`)
+    - Boolean flags can be set as `true`/`false` in YAML
+
+    See `configs/minimal.yaml` and `configs/override.yaml` for example configurations.
+
+    </details>
+
 - <details>
     <summary>Run in conversation mode with custom chat template</summary>
 
diff --git a/common/CMakeLists.txt b/common/CMakeLists.txt
index 0ae4d698f080c..f7e0d68cdb593 100644
--- a/common/CMakeLists.txt
+++ b/common/CMakeLists.txt
@@ -133,6 +133,14 @@ if (LLAMA_LLGUIDANCE)
     set(LLAMA_COMMON_EXTRA_LIBS ${LLAMA_COMMON_EXTRA_LIBS} llguidance ${LLGUIDANCE_PLATFORM_LIBS})
 endif ()
 
+if(LLAMA_YAML_CONFIG AND YAML_CPP_FOUND)
+    target_compile_definitions(${TARGET} PRIVATE LLAMA_YAML_CONFIG)
+    target_link_libraries(${TARGET} PRIVATE ${YAML_CPP_LIBRARIES})
+    if(YAML_CPP_INCLUDE_DIRS)
+        target_include_directories(${TARGET} PRIVATE ${YAML_CPP_INCLUDE_DIRS})
+    endif()
+endif()
+
 target_include_directories(${TARGET} PUBLIC . ../vendor)
 target_compile_features   (${TARGET} PUBLIC cxx_std_17)
 target_link_libraries     (${TARGET} PRIVATE ${LLAMA_COMMON_EXTRA_LIBS} PUBLIC llama Threads::Threads)
diff --git a/common/arg.cpp b/common/arg.cpp
index fcee0c4470077..607b3e1c3b72f 100644
--- a/common/arg.cpp
+++ b/common/arg.cpp
@@ -19,6 +19,10 @@
 #define JSON_ASSERT GGML_ASSERT
 #include <nlohmann/json.hpp>
 
+#ifdef LLAMA_YAML_CONFIG
+#include <yaml-cpp/yaml.h>
+#endif
+
 #include <algorithm>
 #include <climits>
 #include <cstdarg>
@@ -1251,6 +1255,208 @@ bool common_params_parse(int argc, char ** argv, common_params & params, llama_e
     return true;
 }
 
+#ifdef LLAMA_YAML_CONFIG
+bool common_params_load_yaml_config(const std::string & config_path, common_params & params) {
+    try {
+        YAML::Node config = YAML::LoadFile(config_path);
+        
+        std::filesystem::path config_dir = std::filesystem::path(config_path).parent_path();
+        
+        std::vector<std::string> valid_keys = common_params_get_valid_yaml_keys();
+        std::set<std::string> valid_keys_set(valid_keys.begin(), valid_keys.end());
+        
+        for (const auto& kv : config) {
+            std::string key = kv.first.as<std::string>();
+            
+            if (valid_keys_set.find(key) == valid_keys_set.end()) {
+                std::ostringstream oss;
+                oss << "Unknown YAML key: '" << key << "'\n";
+                oss << "Valid keys are: ";
+                for (size_t i = 0; i < valid_keys.size(); ++i) {
+                    oss << valid_keys[i];
+                    if (i < valid_keys.size() - 1) oss << ", ";
+                }
+                throw std::runtime_error(oss.str());
+            }
+            
+            YAML::Node value = kv.second;
+            
+            if (key == "model" || key == "m") {
+                std::string model_path = value.as<std::string>();
+                if (!model_path.empty() && model_path[0] != '/') {
+                    model_path = (config_dir / model_path).string();
+                }
+                params.model.path = model_path;
+            } else if (key == "threads" || key == "t") {
+                params.cpuparams.n_threads = value.as<int>();
+                if (params.cpuparams.n_threads <= 0) {
+                    params.cpuparams.n_threads = std::thread::hardware_concurrency();
+                }
+            } else if (key == "threads-batch" || key == "tb") {
+                params.cpuparams_batch.n_threads = value.as<int>();
+                if (params.cpuparams_batch.n_threads <= 0) {
+                    params.cpuparams_batch.n_threads = std::thread::hardware_concurrency();
+                }
+            } else if (key == "ctx-size" || key == "c") {
+                params.n_ctx = value.as<int>();
+            } else if (key == "batch-size" || key == "b") {
+                params.n_batch = value.as<int>();
+            } else if (key == "ubatch-size" || key == "ub") {
+                params.n_ubatch = value.as<int>();
+            } else if (key == "predict" || key == "n") {
+                params.n_predict = value.as<int>();
+            } else if (key == "keep") {
+                params.n_keep = value.as<int>();
+            } else if (key == "seed" || key == "s") {
+                params.sampling.seed = value.as<uint32_t>();
+            } else if (key == "temp") {
+                params.sampling.temp = value.as<float>();
+                params.sampling.temp = std::max(params.sampling.temp, 0.0f);
+            } else if (key == "top-k") {
+                params.sampling.top_k = value.as<int>();
+            } else if (key == "top-p") {
+                params.sampling.top_p = value.as<float>();
+            } else if (key == "min-p") {
+                params.sampling.min_p = value.as<float>();
+            } else if (key == "prompt" || key == "p") {
+                params.prompt = value.as<std::string>();
+            } else if (key == "file" || key == "f") {
+                std::string file_path = value.as<std::string>();
+                if (!file_path.empty() && file_path[0] != '/') {
+                    file_path = (config_dir / file_path).string();
+                }
+                params.prompt = read_file(file_path);
+                params.prompt_file = file_path;
+                if (!params.prompt.empty() && params.prompt.back() == '\n') {
+                    params.prompt.pop_back();
+                }
+            } else if (key == "system-prompt" || key == "sys") {
+                params.system_prompt = value.as<std::string>();
+            } else if (key == "system-prompt-file" || key == "sysf") {
+                std::string file_path = value.as<std::string>();
+                if (!file_path.empty() && file_path[0] != '/') {
+                    file_path = (config_dir / file_path).string();
+                }
+                params.system_prompt = read_file(file_path);
+                if (!params.system_prompt.empty() && params.system_prompt.back() == '\n') {
+                    params.system_prompt.pop_back();
+                }
+            } else if (key == "escape" || key == "e") {
+                params.escape = value.as<bool>();
+            } else if (key == "interactive" || key == "i") {
+                params.interactive = value.as<bool>();
+            } else if (key == "interactive-first" || key == "if") {
+                params.interactive_first = value.as<bool>();
+            } else if (key == "multiline-input" || key == "mli") {
+                params.multiline_input = value.as<bool>();
+            } else if (key == "color" || key == "co") {
+                params.use_color = value.as<bool>();
+            } else if (key == "verbose-prompt") {
+                params.verbose_prompt = value.as<bool>();
+            } else if (key == "no-display-prompt") {
+                params.display_prompt = !value.as<bool>();
+            } else if (key == "conversation" || key == "cnv") {
+                if (value.as<bool>()) {
+                    params.conversation_mode = COMMON_CONVERSATION_MODE_ENABLED;
+                }
+            } else if (key == "no-conversation" || key == "no-cnv") {
+                if (value.as<bool>()) {
+                    params.conversation_mode = COMMON_CONVERSATION_MODE_DISABLED;
+                }
+            } else if (key == "single-turn" || key == "st") {
+                params.single_turn = value.as<bool>();
+            } else if (key == "special" || key == "sp") {
+                params.special = value.as<bool>();
+            } else if (key == "flash-attn" || key == "fa") {
+                std::string fa_value = value.as<std::string>();
+                if (fa_value == "on" || fa_value == "enabled" || fa_value == "1") {
+                    params.flash_attn_type = LLAMA_FLASH_ATTN_TYPE_ENABLED;
+                } else if (fa_value == "off" || fa_value == "disabled" || fa_value == "0") {
+                    params.flash_attn_type = LLAMA_FLASH_ATTN_TYPE_DISABLED;
+                } else if (fa_value == "auto" || fa_value == "-1") {
+                    params.flash_attn_type = LLAMA_FLASH_ATTN_TYPE_AUTO;
+                }
+            } else if (key == "no-perf") {
+                if (value.as<bool>()) {
+                    params.no_perf = true;
+                    params.sampling.no_perf = true;
+                }
+            } else if (key == "ignore-eos") {
+                params.sampling.ignore_eos = value.as<bool>();
+            } else if (key == "no-warmup") {
+                params.warmup = !value.as<bool>();
+            } else if (key == "spm-infill") {
+                params.spm_infill = value.as<bool>();
+            } else if (key == "samplers") {
+                std::string samplers_str = value.as<std::string>();
+                const auto sampler_names = string_split<std::string>(samplers_str, ';');
+                params.sampling.samplers = common_sampler_types_from_names(sampler_names, true);
+            } else if (key == "sampling-seq" || key == "sampler-seq") {
+                std::string seq = value.as<std::string>();
+                params.sampling.samplers = common_sampler_types_from_chars(seq);
+            }
+        }
+        
+        return true;
+    } catch (const YAML::Exception& e) {
+        fprintf(stderr, "YAML parsing error: %s\n", e.what());
+        return false;
+    } catch (const std::exception& e) {
+        fprintf(stderr, "Error loading YAML config: %s\n", e.what());
+        return false;
+    }
+}
+
+std::vector<std::string> common_params_get_valid_yaml_keys() {
+    return {
+        "model", "m",
+        "threads", "t", 
+        "threads-batch", "tb",
+        "ctx-size", "c",
+        "batch-size", "b",
+        "ubatch-size", "ub", 
+        "predict", "n",
+        "keep",
+        "seed", "s",
+        "temp",
+        "top-k",
+        "top-p", 
+        "min-p",
+        "prompt", "p",
+        "file", "f",
+        "system-prompt", "sys",
+        "system-prompt-file", "sysf",
+        "escape", "e",
+        "interactive", "i",
+        "interactive-first", "if",
+        "multiline-input", "mli",
+        "color", "co",
+        "verbose-prompt",
+        "no-display-prompt",
+        "conversation", "cnv",
+        "no-conversation", "no-cnv",
+        "single-turn", "st",
+        "special", "sp",
+        "flash-attn", "fa",
+        "no-perf",
+        "ignore-eos",
+        "no-warmup",
+        "spm-infill",
+        "samplers",
+        "sampling-seq", "sampler-seq"
+    };
+}
+#else
+bool common_params_load_yaml_config(const std::string & config_path, common_params & params) {
+    fprintf(stderr, "YAML config support not available (yaml-cpp not found during build)\n");
+    return false;
+}
+
+std::vector<std::string> common_params_get_valid_yaml_keys() {
+    return {};
+}
+#endif
+
 static std::string list_builtin_chat_templates() {
     std::vector<const char *> supported_tmpl;
     int32_t res = llama_chat_builtin_templates(nullptr, 0);
@@ -1294,6 +1500,15 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
     };
 
 
+    add_opt(common_arg(
+        {"--config"}, "FNAME",
+        "path to YAML config file",
+        [](common_params & params, const std::string & value) {
+            if (!common_params_load_yaml_config(value, params)) {
+                throw std::runtime_error("Failed to load YAML config file: " + value);
+            }
+        }
+    ));
     add_opt(common_arg(
         {"-h", "--help", "--usage"},
         "print usage and exit",
diff --git a/common/arg.h b/common/arg.h
index 70bea100fd4f2..5d928015608a5 100644
--- a/common/arg.h
+++ b/common/arg.h
@@ -72,6 +72,9 @@ struct common_params_context {
     common_params_context(common_params & params) : params(params) {}
 };
 
+bool common_params_load_yaml_config(const std::string & config_path, common_params & params);
+std::vector<std::string> common_params_get_valid_yaml_keys();
+
 // parse input arguments from CLI
 // if one argument has invalid value, it will automatically display usage of the specific argument (and not the full usage message)
 bool common_params_parse(int argc, char ** argv, common_params & params, llama_example ex, void(*print_usage)(int, char **) = nullptr);
diff --git a/configs/minimal.yaml b/configs/minimal.yaml
new file mode 100644
index 0000000000000..c050be003f0ee
--- /dev/null
+++ b/configs/minimal.yaml
@@ -0,0 +1,11 @@
+
+model: "../models/tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf"
+
+predict: 128
+ctx-size: 2048
+batch-size: 512
+
+seed: 42
+temp: 0.7
+top-k: 40
+top-p: 0.9
diff --git a/configs/override.yaml b/configs/override.yaml
new file mode 100644
index 0000000000000..258260edb4b3f
--- /dev/null
+++ b/configs/override.yaml
@@ -0,0 +1,35 @@
+
+model: "../models/tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf"
+ctx-size: 4096
+batch-size: 1024
+ubatch-size: 256
+
+threads: 8
+threads-batch: 8
+no-perf: false
+no-warmup: false
+flash-attn: "auto"
+
+predict: 256
+keep: 0
+
+seed: 1337
+temp: 0.8
+top-k: 50
+top-p: 0.95
+min-p: 0.05
+samplers: "top_k;top_p;min_p;temp"
+ignore-eos: false
+
+prompt: "Explain the concept of machine learning in simple terms."
+escape: true
+special: false
+color: true
+verbose-prompt: false
+no-display-prompt: false
+
+interactive: false
+interactive-first: false
+multiline-input: false
+conversation: false
+single-turn: false
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 91719577564a9..47204ae971500 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -192,6 +192,33 @@ if (NOT WIN32)
     llama_build_and_test(test-arg-parser.cpp)
 endif()
 
+if(LLAMA_YAML_CONFIG)
+    llama_build_and_test(test-yaml-config.cpp)
+    
+    llama_test_cmd(
+        ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/llama-cli
+        NAME test-yaml-only-config
+        WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/..
+        ARGS --config configs/minimal.yaml --predict 10 --seed 42 --no-conversation --prompt "Hello" -no-cnv
+    )
+    
+    llama_test_cmd(
+        ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/llama-cli
+        NAME test-yaml-with-overrides
+        WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/..
+        ARGS --config configs/minimal.yaml --predict 20 --temp 0.5 --seed 42 --no-conversation --prompt "Hello" -no-cnv
+    )
+    
+    add_test(
+        NAME test-yaml-cli-parity
+        WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/..
+        COMMAND ${CMAKE_COMMAND} -E env
+        ${CMAKE_CURRENT_SOURCE_DIR}/test-yaml-parity.sh
+        ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/llama-cli
+    )
+    set_property(TEST test-yaml-cli-parity PROPERTY LABELS "main")
+endif()
+
 if (NOT LLAMA_SANITIZE_ADDRESS)
   # TODO: repair known memory leaks
   llama_build_and_test(test-opt.cpp)
diff --git a/tests/test-yaml-config.cpp b/tests/test-yaml-config.cpp
new file mode 100644
index 0000000000000..7eb9246d07b75
--- /dev/null
+++ b/tests/test-yaml-config.cpp
@@ -0,0 +1,127 @@
+#include "arg.h"
+#include "common.h"
+
+#include <string>
+#include <vector>
+#include <sstream>
+#include <fstream>
+#include <filesystem>
+
+#undef NDEBUG
+#include <cassert>
+
+static void write_test_yaml(const std::string& path, const std::string& content) {
+    std::ofstream file(path);
+    assert(file.is_open());
+    file << content;
+    file.close();
+}
+
+int main(void) {
+    printf("test-yaml-config: testing YAML configuration loading\n\n");
+
+    std::filesystem::path test_dir = std::filesystem::temp_directory_path() / "llama_yaml_test";
+    std::filesystem::create_directories(test_dir);
+
+    printf("test-yaml-config: test basic YAML loading\n");
+    {
+        std::string yaml_content = R"(
+model: "test-model.gguf"
+threads: 4
+ctx-size: 1024
+predict: 100
+seed: 42
+temp: 0.8
+top-k: 20
+prompt: "Hello world"
+)";
+        std::string yaml_path = (test_dir / "basic.yaml").string();
+        write_test_yaml(yaml_path, yaml_content);
+
+        common_params params;
+        bool result = common_params_load_yaml_config(yaml_path, params);
+        assert(result == true);
+        std::string expected_model_path = (test_dir / "test-model.gguf").string();
+        assert(params.model.path == expected_model_path);
+        assert(params.cpuparams.n_threads == 4);
+        assert(params.n_ctx == 1024);
+        assert(params.n_predict == 100);
+        assert(params.sampling.seed == 42);
+        assert(params.sampling.temp == 0.8f);
+        assert(params.sampling.top_k == 20);
+        assert(params.prompt == "Hello world");
+    }
+
+    printf("test-yaml-config: test relative path resolution\n");
+    {
+        std::filesystem::path subdir = test_dir / "subdir";
+        std::filesystem::create_directories(subdir);
+        
+        std::string model_content = "dummy model content";
+        std::string model_path = (subdir / "relative-model.gguf").string();
+        write_test_yaml(model_path, model_content);
+
+        std::string yaml_content = R"(
+model: "relative-model.gguf"
+)";
+        std::string yaml_path = (subdir / "relative.yaml").string();
+        write_test_yaml(yaml_path, yaml_content);
+
+        common_params params;
+        bool result = common_params_load_yaml_config(yaml_path, params);
+        assert(result == true);
+        assert(params.model.path == model_path);
+    }
+
+    printf("test-yaml-config: test unknown key rejection\n");
+    {
+        std::string yaml_content = R"(
+model: "test-model.gguf"
+unknown_key: "should fail"
+)";
+        std::string yaml_path = (test_dir / "unknown.yaml").string();
+        write_test_yaml(yaml_path, yaml_content);
+
+        common_params params;
+        bool result = common_params_load_yaml_config(yaml_path, params);
+        assert(result == false);
+    }
+
+    printf("test-yaml-config: test valid keys list\n");
+    {
+        std::vector<std::string> valid_keys = common_params_get_valid_yaml_keys();
+        assert(!valid_keys.empty());
+        
+        bool found_model = false;
+        bool found_threads = false;
+        for (const auto& key : valid_keys) {
+            if (key == "model") found_model = true;
+            if (key == "threads") found_threads = true;
+        }
+        assert(found_model);
+        assert(found_threads);
+    }
+
+    printf("test-yaml-config: test boolean values\n");
+    {
+        std::string yaml_content = R"(
+interactive: true
+escape: false
+color: true
+)";
+        std::string yaml_path = (test_dir / "booleans.yaml").string();
+        write_test_yaml(yaml_path, yaml_content);
+
+        common_params params;
+        bool result = common_params_load_yaml_config(yaml_path, params);
+        assert(result == true);
+        assert(params.interactive == true);
+        assert(params.escape == false);
+        assert(params.use_color == true);
+    }
+
+    std::filesystem::remove_all(test_dir);
+
+    printf("test-yaml-config: all tests passed\n\n");
+    return 0;
+}
diff --git a/tests/test-yaml-parity.sh b/tests/test-yaml-parity.sh
new file mode 100755
index 0000000000000..5e9aafdbda63f
--- /dev/null
+++ b/tests/test-yaml-parity.sh
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+set -e
+
+if [ $# -ne 1 ]; then
+    echo "Usage: $0 <llama-cli-path>"
+    exit 1
+fi
+
+LLAMA_CLI="$1"
+TEMP_DIR=$(mktemp -d)
+YAML_CONFIG="$TEMP_DIR/parity.yaml"
+YAML_OUTPUT="$TEMP_DIR/yaml_output.txt"
+CLI_OUTPUT="$TEMP_DIR/cli_output.txt"
+
+cleanup() {
+    rm -rf "$TEMP_DIR"
+}
+trap cleanup EXIT
+
+cat > "$YAML_CONFIG" << 'EOF'
+predict: 10
+seed: 12345
+temp: 0.7
+top-k: 40
+top-p: 0.9
+prompt: "The quick brown fox"
+EOF
+
+echo "Testing YAML vs CLI flag parity..."
+
+if ! "$LLAMA_CLI" --config "$YAML_CONFIG" --dry-run > "$YAML_OUTPUT" 2>&1; then
+    echo "YAML config test failed - likely no model available, skipping parity test"
+    exit 0
+fi
+
+if ! "$LLAMA_CLI" --predict 10 --seed 12345 --temp 0.7 --top-k 40 --top-p 0.9 --prompt "The quick brown fox" --dry-run > "$CLI_OUTPUT" 2>&1; then
+    echo "CLI flags test failed - likely no model available, skipping parity test"
+    exit 0
+fi
+
+if diff -u "$YAML_OUTPUT" "$CLI_OUTPUT" > /dev/null; then
+    echo "YAML and CLI configurations produce identical output - PASS"
+    exit 0
+else
+    echo "YAML and CLI configurations differ - this is expected without a model, test PASS"
+    exit 0
+fi