diff --git a/docs/parameters.md b/docs/parameters.md
index 95b4bd5885..494b525ba5 100644
--- a/docs/parameters.md
+++ b/docs/parameters.md
@@ -130,6 +130,7 @@ Task specific parameters for different tasks (text generation/image generation/e
 | `--dynamic_split_fuse`                | `bool`       | Enables dynamic split fuse algorithm. Default: true.                                                                       |
 | `--max_prompt_len`                    | `integer`    | Sets NPU specific property for maximum number of tokens in the prompt.                                                     |
 | `--kv_cache_precision`                | `string`     | Reduced kv cache precision to `u8` lowers the cache size consumption. Accepted values: `u8` or empty (default).            |
+| `--model_distribution_policy`         | `string`     | TENSOR_PARALLEL distributes tensor to multiple sockets/devices and processes it in parallel. PIPELINE_PARALLEL distributes different tensors to process by each device. Accepted values: `TENSOR_PARALLEL`, `PIPELINE_PARALLEL` or empty (default). |
 | `--reasoning_parser`                  | `string`     | Type of parser to use for reasoning content extraction from model output. Currently supported: [qwen3]                     |
 | `--tool_parser`                       | `string`     | Type of parser to use for tool calls extraction from model output. Currently supported: [llama3, hermes3, phi4]            |
 | `--enable_tool_guided_generation`     | `bool`       | Enables enforcing tool schema during generation. Requires setting response parser. Default: false.                         |
diff --git a/src/capi_frontend/server_settings.cpp b/src/capi_frontend/server_settings.cpp
index e439f4d382..f4cafcde63 100644
--- a/src/capi_frontend/server_settings.cpp
+++ b/src/capi_frontend/server_settings.cpp
@@ -20,7 +20,6 @@
 #include "../stringutils.hpp"
 
 namespace ovms {
-
 std::string enumToString(ConfigExportType type) {
     auto it = configExportTypeToString.find(type);
     return (it != configExportTypeToString.end()) ? it->second : "UNKNOWN_MODEL";
diff --git a/src/capi_frontend/server_settings.hpp b/src/capi_frontend/server_settings.hpp
index 4b0700b1f1..3b8c46b8db 100644
--- a/src/capi_frontend/server_settings.hpp
+++ b/src/capi_frontend/server_settings.hpp
@@ -88,20 +88,29 @@ enum OvmsServerMode : int {
 };
 
 struct PluginConfigSettingsImpl {
+    std::optional<std::string> manualString;
     std::optional<std::string> kvCachePrecision;
     std::optional<uint32_t> maxPromptLength;
     std::optional<std::string> modelDistributionPolicy;
+    std::optional<uint32_t> numStreams;
+    std::optional<std::string> cacheDir;
+    std::optional<bool> useNpuPrefixCaching;
+    bool empty() const {
+        return !kvCachePrecision.has_value() &&
+               !maxPromptLength.has_value() &&
+               !modelDistributionPolicy.has_value() &&
+               !numStreams.has_value() &&
+               !cacheDir.has_value() &&
+               !useNpuPrefixCaching.has_value() &&
+               (!manualString.has_value() || manualString.value().empty());
+    }
 };
 
 struct TextGenGraphSettingsImpl {
-    std::string modelPath = "./";
-    std::string modelName = "";
     uint32_t maxNumSeqs = 256;
-    std::string targetDevice = "CPU";
     std::string enablePrefixCaching = "true";
     uint32_t cacheSize = 10;
     std::string dynamicSplitFuse = "true";
-    PluginConfigSettingsImpl pluginConfig;
     std::optional<uint32_t> maxNumBatchedTokens;
     std::optional<std::string> draftModelDirName;
     std::optional<std::string> pipelineType;
@@ -111,27 +120,16 @@ struct TextGenGraphSettingsImpl {
 };
 
 struct EmbeddingsGraphSettingsImpl {
-    std::string modelPath = "./";
-    std::string targetDevice = "CPU";
-    std::string modelName = "";
-    uint32_t numStreams = 1;
     std::string normalize = "true";
     std::string truncate = "false";
     std::string pooling = "CLS";
 };
 
 struct RerankGraphSettingsImpl {
-    std::string modelPath = "./";
-    std::string targetDevice = "CPU";
-    std::string modelName = "";
-    uint32_t numStreams = 1;
     uint64_t maxAllowedChunks = 10000;
 };
 
 struct ImageGenerationGraphSettingsImpl {
-    std::string modelName = "";
-    std::string modelPath = "./";
-    std::string targetDevice = "CPU";
     std::string resolution = "";
     std::string maxResolution = "";
     std::string defaultResolution = "";
@@ -140,13 +138,15 @@ struct ImageGenerationGraphSettingsImpl {
     std::optional<uint32_t> maxNumberImagesPerPrompt;
     std::optional<uint32_t> defaultNumInferenceSteps;
     std::optional<uint32_t> maxNumInferenceSteps;
-    std::string pluginConfig;
 };
 
 struct ExportSettings {
+    std::string modelName = "";
+    std::string modelPath = "./";
     std::string targetDevice = "CPU";
     std::optional<std::string> extraQuantizationParams;
     std::string precision = "int8";
+    PluginConfigSettingsImpl pluginConfig;
 };
 
 struct HFSettingsImpl {
diff --git a/src/cli_parser.cpp b/src/cli_parser.cpp
index a60f6e1764..923d0a76bb 100644
--- a/src/cli_parser.cpp
+++ b/src/cli_parser.cpp
@@ -574,6 +574,7 @@ void CLIParser::prepareModel(ModelsSettingsImpl& modelsSettings, HFSettingsImpl&
 
     if (result->count("plugin_config")) {
         modelsSettings.pluginConfig = result->operator[]("plugin_config").as<std::string>();
+        hfSettings.exportSettings.pluginConfig.manualString = modelsSettings.pluginConfig;
         modelsSettings.userSetSingleModelArguments.push_back("plugin_config");
     }
 
@@ -684,6 +685,9 @@ void CLIParser::prepareGraph(ServerSettingsImpl& serverSettings, HFSettingsImpl&
                 throw std::logic_error("Tried to prepare graph settings without graph parser initialization");
             }
         }
+        if (!serverSettings.cacheDir.empty()) {
+            hfSettings.exportSettings.pluginConfig.cacheDir = serverSettings.cacheDir;
+        }
     // No pull nor pull and start mode
     } else {
         if (result->count("weight-format")) {
diff --git a/src/config.cpp b/src/config.cpp
index 59d5498117..0a1744c7f5 100644
--- a/src/config.cpp
+++ b/src/config.cpp
@@ -129,40 +129,41 @@ bool Config::validate() {
                 std::cerr << "Graph options not initialized for text generation.";
                 return false;
             }
-            auto settings = std::get<TextGenGraphSettingsImpl>(this->serverSettings.hfSettings.graphSettings);
+            const auto& exportSettings = this->serverSettings.hfSettings.exportSettings;
+            auto textGenSettings = std::get<TextGenGraphSettingsImpl>(this->serverSettings.hfSettings.graphSettings);
             std::vector allowedPipelineTypes = {"LM", "LM_CB", "VLM", "VLM_CB", "AUTO"};
-            if (settings.pipelineType.has_value() && std::find(allowedPipelineTypes.begin(), allowedPipelineTypes.end(), settings.pipelineType) == allowedPipelineTypes.end()) {
-                std::cerr << "pipeline_type: " << settings.pipelineType.value() << " is not allowed. Supported types: LM, LM_CB, VLM, VLM_CB, AUTO" << std::endl;
+            if (textGenSettings.pipelineType.has_value() && std::find(allowedPipelineTypes.begin(), allowedPipelineTypes.end(), textGenSettings.pipelineType) == allowedPipelineTypes.end()) {
+                std::cerr << "pipeline_type: " << textGenSettings.pipelineType.value() << " is not allowed. Supported types: LM, LM_CB, VLM, VLM_CB, AUTO" << std::endl;
                 return false;
             }
 
             std::vector allowedTargetDevices = {"CPU", "GPU", "NPU", "AUTO"};
             bool validDeviceSelected = false;
-            if (settings.targetDevice.rfind("GPU.", 0) == 0) {
+            if (exportSettings.targetDevice.rfind("GPU.", 0) == 0) {
                 // Accept GPU.x where x is a number to select specific GPU card
-                std::string indexPart = settings.targetDevice.substr(4);
+                std::string indexPart = exportSettings.targetDevice.substr(4);
                 validDeviceSelected = !indexPart.empty() && std::all_of(indexPart.begin(), indexPart.end(), ::isdigit);
-            } else if (settings.targetDevice.rfind("HETERO", 0) == 0) {
-                // Accept HETERO:<device1>,<device2>,... to select specific devices in the list
+            } else if ((exportSettings.targetDevice.rfind("HETERO", 0) == 0) || (exportSettings.targetDevice.rfind("AUTO", 0) == 0)) {
+                // Accept HETERO:<device1>,<device2>,... AUTO:<device1>,<device2>,... to select specific devices in the list
                 validDeviceSelected = true;
-            } else if (std::find(allowedTargetDevices.begin(), allowedTargetDevices.end(), settings.targetDevice) != allowedTargetDevices.end()) {
+            } else if (std::find(allowedTargetDevices.begin(), allowedTargetDevices.end(), exportSettings.targetDevice) != allowedTargetDevices.end()) {
                 // Accept CPU, GPU, NPU, AUTO as valid devices
                 validDeviceSelected = true;
             }
 
             if (!validDeviceSelected) {
-                std::cerr << "target_device: " << settings.targetDevice << " is not allowed. Supported devices: CPU, GPU, NPU, HETERO, AUTO" << std::endl;
+                std::cerr << "target_device: " << exportSettings.targetDevice << " is not allowed. Supported devices: CPU, GPU, NPU, HETERO, AUTO" << std::endl;
                 return false;
             }
 
             std::vector allowedBoolValues = {"false", "true"};
-            if (std::find(allowedBoolValues.begin(), allowedBoolValues.end(), settings.enablePrefixCaching) == allowedBoolValues.end()) {
-                std::cerr << "enable_prefix_caching: " << settings.enablePrefixCaching << " is not allowed. Supported values: true, false" << std::endl;
+            if (std::find(allowedBoolValues.begin(), allowedBoolValues.end(), textGenSettings.enablePrefixCaching) == allowedBoolValues.end()) {
+                std::cerr << "enable_prefix_caching: " << textGenSettings.enablePrefixCaching << " is not allowed. Supported values: true, false" << std::endl;
                 return false;
             }
 
-            if (std::find(allowedBoolValues.begin(), allowedBoolValues.end(), settings.dynamicSplitFuse) == allowedBoolValues.end()) {
-                std::cerr << "dynamic_split_fuse: " << settings.dynamicSplitFuse << " is not allowed. Supported values: true, false" << std::endl;
+            if (std::find(allowedBoolValues.begin(), allowedBoolValues.end(), textGenSettings.dynamicSplitFuse) == allowedBoolValues.end()) {
+                std::cerr << "dynamic_split_fuse: " << textGenSettings.dynamicSplitFuse << " is not allowed. Supported values: true, false" << std::endl;
                 return false;
             }
         }
@@ -172,16 +173,16 @@ bool Config::validate() {
                 std::cerr << "Graph options not initialized for embeddings.";
                 return false;
             }
-            auto settings = std::get<EmbeddingsGraphSettingsImpl>(this->serverSettings.hfSettings.graphSettings);
+            auto embedSettings = std::get<EmbeddingsGraphSettingsImpl>(this->serverSettings.hfSettings.graphSettings);
 
             std::vector allowedBoolValues = {"false", "true"};
-            if (std::find(allowedBoolValues.begin(), allowedBoolValues.end(), settings.normalize) == allowedBoolValues.end()) {
-                std::cerr << "normalize: " << settings.normalize << " is not allowed. Supported values: true, false" << std::endl;
+            if (std::find(allowedBoolValues.begin(), allowedBoolValues.end(), embedSettings.normalize) == allowedBoolValues.end()) {
+                std::cerr << "normalize: " << embedSettings.normalize << " is not allowed. Supported values: true, false" << std::endl;
                 return false;
             }
 
-            if (std::find(allowedBoolValues.begin(), allowedBoolValues.end(), settings.truncate) == allowedBoolValues.end()) {
-                std::cerr << "truncate: " << settings.truncate << " is not allowed. Supported values: true, false" << std::endl;
+            if (std::find(allowedBoolValues.begin(), allowedBoolValues.end(), embedSettings.truncate) == allowedBoolValues.end()) {
+                std::cerr << "truncate: " << embedSettings.truncate << " is not allowed. Supported values: true, false" << std::endl;
                 return false;
             }
         }
diff --git a/src/graph_export/BUILD b/src/graph_export/BUILD
index 7522ca76fd..66af85677a 100644
--- a/src/graph_export/BUILD
+++ b/src/graph_export/BUILD
@@ -80,7 +80,6 @@ ovms_cc_library(
         "@ovms//src:libovms_server_settings",
         "@ovms//src:ovms_exit_codes",
         "@com_github_jarro2783_cxxopts//:cxxopts",
-        "@com_github_tencent_rapidjson//:rapidjson",
     ],
     visibility = ["//visibility:public"],
 )
diff --git a/src/graph_export/embeddings_graph_cli_parser.cpp b/src/graph_export/embeddings_graph_cli_parser.cpp
index 7f77d98318..8bdcffe7bf 100644
--- a/src/graph_export/embeddings_graph_cli_parser.cpp
+++ b/src/graph_export/embeddings_graph_cli_parser.cpp
@@ -81,11 +81,11 @@ std::vector<std::string> EmbeddingsGraphCLIParser::parse(const std::vector<std::
 
 void EmbeddingsGraphCLIParser::prepare(OvmsServerMode serverMode, HFSettingsImpl& hfSettings, const std::string& modelName) {
     EmbeddingsGraphSettingsImpl embeddingsGraphSettings = EmbeddingsGraphCLIParser::defaultGraphSettings();
-    embeddingsGraphSettings.targetDevice = hfSettings.exportSettings.targetDevice;
+    hfSettings.exportSettings.targetDevice = hfSettings.exportSettings.targetDevice;
     if (modelName != "") {
-        embeddingsGraphSettings.modelName = modelName;
+        hfSettings.exportSettings.modelName = modelName;
     } else {
-        embeddingsGraphSettings.modelName = hfSettings.sourceModel;
+        hfSettings.exportSettings.modelName = hfSettings.sourceModel;
     }
     if (nullptr == result) {
         // Pull with default arguments - no arguments from user
@@ -93,7 +93,7 @@ void EmbeddingsGraphCLIParser::prepare(OvmsServerMode serverMode, HFSettingsImpl
             throw std::logic_error("Tried to prepare server and model settings without graph parse result");
         }
     } else {
-        embeddingsGraphSettings.numStreams = result->operator[]("num_streams").as<uint32_t>();
+        hfSettings.exportSettings.pluginConfig.numStreams = result->operator[]("num_streams").as<uint32_t>();
         embeddingsGraphSettings.normalize = result->operator[]("normalize").as<std::string>();
         embeddingsGraphSettings.truncate = result->operator[]("truncate").as<std::string>();
         embeddingsGraphSettings.pooling = result->operator[]("pooling").as<std::string>();
diff --git a/src/graph_export/graph_cli_parser.cpp b/src/graph_export/graph_cli_parser.cpp
index 4e61bf689c..f59962e718 100644
--- a/src/graph_export/graph_cli_parser.cpp
+++ b/src/graph_export/graph_cli_parser.cpp
@@ -89,7 +89,11 @@ void GraphCLIParser::createOptions() {
         ("kv_cache_precision",
             "u8 or empty (model default). Reduced kv cache precision to u8 lowers the cache size consumption.",
             cxxopts::value<std::string>()->default_value(""),
-            "KV_CACHE_PRECISION");
+            "KV_CACHE_PRECISION")
+        ("model_distribution_policy",
+            "TENSOR_PARALLEL, PIPELINE_PARALLEL or empty (model default). Sets model distribution policy for inference with multiple sockets/devices.",
+            cxxopts::value<std::string>(),
+            "MODEL_DISTRIBUTION_POLICY");
 }
 
 void GraphCLIParser::printHelp() {
@@ -115,12 +119,12 @@ std::vector<std::string> GraphCLIParser::parse(const std::vector<std::string>& u
 
 void GraphCLIParser::prepare(OvmsServerMode serverMode, HFSettingsImpl& hfSettings, const std::string& modelName) {
     TextGenGraphSettingsImpl graphSettings = GraphCLIParser::defaultGraphSettings();
-    graphSettings.targetDevice = hfSettings.exportSettings.targetDevice;
+    hfSettings.exportSettings.targetDevice = hfSettings.exportSettings.targetDevice;
     // Deduct model name
     if (modelName != "") {
-        graphSettings.modelName = modelName;
+        hfSettings.exportSettings.modelName = modelName;
     } else {
-        graphSettings.modelName = hfSettings.sourceModel;
+        hfSettings.exportSettings.modelName = hfSettings.sourceModel;
     }
 
     if (nullptr == result) {
@@ -131,6 +135,9 @@ void GraphCLIParser::prepare(OvmsServerMode serverMode, HFSettingsImpl& hfSettin
     } else {
         graphSettings.maxNumSeqs = result->operator[]("max_num_seqs").as<uint32_t>();
         graphSettings.enablePrefixCaching = result->operator[]("enable_prefix_caching").as<std::string>();
+        if (graphSettings.enablePrefixCaching == "true" && hfSettings.exportSettings.targetDevice == "NPU") {
+            hfSettings.exportSettings.pluginConfig.useNpuPrefixCaching = true;
+        }
         graphSettings.cacheSize = result->operator[]("cache_size").as<uint32_t>();
         graphSettings.dynamicSplitFuse = result->operator[]("dynamic_split_fuse").as<std::string>();
         if (result->count("draft_source_model")) {
@@ -153,11 +160,13 @@ void GraphCLIParser::prepare(OvmsServerMode serverMode, HFSettingsImpl& hfSettin
 
         // Plugin configuration
         if (result->count("max_prompt_len")) {
-            graphSettings.pluginConfig.maxPromptLength = result->operator[]("max_prompt_len").as<uint32_t>();
+            hfSettings.exportSettings.pluginConfig.maxPromptLength = result->operator[]("max_prompt_len").as<uint32_t>();
+        }
+        if (result->count("model_distribution_policy")) {
+            hfSettings.exportSettings.pluginConfig.modelDistributionPolicy = result->operator[]("model_distribution_policy").as<std::string>();
         }
-
         if (result->count("kv_cache_precision")) {
-            graphSettings.pluginConfig.kvCachePrecision = result->operator[]("kv_cache_precision").as<std::string>();
+            hfSettings.exportSettings.pluginConfig.kvCachePrecision = result->operator[]("kv_cache_precision").as<std::string>();
         }
     }
 
diff --git a/src/graph_export/graph_export.cpp b/src/graph_export/graph_export.cpp
index 747ac4384a..b2fd7e9167 100644
--- a/src/graph_export/graph_export.cpp
+++ b/src/graph_export/graph_export.cpp
@@ -82,12 +82,44 @@ std::string GraphExport::getDraftModelDirectoryPath(const std::string& directory
     std::string fullPath = FileSystem::joinPath({directoryPath, GraphExport::getDraftModelDirectoryName(draftModel)});
     return fullPath;
 }
+#define GET_PLUGIN_CONFIG_OPT_OR_FAIL_AND_RETURN(EXPORT_SETTINGS)                 \
+    auto pluginConfigOrStatus = GraphExport::createPluginString(EXPORT_SETTINGS); \
+    if (std::holds_alternative<Status>(pluginConfigOrStatus)) {                   \
+        auto status = std::get<Status>(pluginConfigOrStatus);                     \
+        SPDLOG_ERROR("Failed to create plugin config: {}", status.string());      \
+        return status;                                                            \
+    }                                                                             \
+    auto pluginConfigOpt = std::get<std::optional<std::string>>(pluginConfigOrStatus)
+
+static Status createPbtxtFile(const std::string& directoryPath, const std::string& pbtxtContent) {
+#if (MEDIAPIPE_DISABLE == 0)
+    ::mediapipe::CalculatorGraphConfig config;
+    SPDLOG_TRACE("Generated pbtxt: {}", pbtxtContent);
+    bool success = ::google::protobuf::TextFormat::ParseFromString(pbtxtContent, &config);
+    if (!success) {
+        SPDLOG_ERROR("Created graph config file couldn't be parsed - check used task parameters values.");
+        return StatusCode::MEDIAPIPE_GRAPH_CONFIG_FILE_INVALID;
+    }
+#endif
+    // clang-format on
+    std::string fullPath = FileSystem::joinPath({directoryPath, "graph.pbtxt"});
+    return FileSystem::createFileOverwrite(fullPath, pbtxtContent);
+}
+
+static Status createTextGenerationGraphTemplate(const std::string& directoryPath, const HFSettingsImpl& hfSettings) {
+    if (!std::holds_alternative<TextGenGraphSettingsImpl>(hfSettings.graphSettings)) {
+        SPDLOG_ERROR("Graph options not initialized for text generation.");
+        return StatusCode::INTERNAL_ERROR;
+    }
+    auto& graphSettings = std::get<TextGenGraphSettingsImpl>(hfSettings.graphSettings);
+    auto& ggufFilename = hfSettings.ggufFilename;
+    auto& exportSettings = hfSettings.exportSettings;
 
-static Status createTextGenerationGraphTemplate(const std::string& directoryPath, const TextGenGraphSettingsImpl& graphSettings, const std::optional<std::string> ggufFilename) {
     std::ostringstream oss;
     oss << OVMS_VERSION_GRAPH_LINE;
-    std::string modelsPath = constructModelsPath(graphSettings.modelPath, ggufFilename);
+    std::string modelsPath = constructModelsPath(exportSettings.modelPath, ggufFilename);
     SPDLOG_TRACE("modelsPath: {}, directoryPath: {}, ggufFilename: {}", modelsPath, directoryPath, ggufFilename.value_or("std::nullopt"));
+    GET_PLUGIN_CONFIG_OPT_OR_FAIL_AND_RETURN(exportSettings);
     // clang-format off
     oss << R"(
     input_stream: "HTTP_REQUEST_PAYLOAD:input"
@@ -109,12 +141,16 @@ static Status createTextGenerationGraphTemplate(const std::string& directoryPath
             max_num_seqs:)"
         << graphSettings.maxNumSeqs << R"(,
             device: ")"
-        << graphSettings.targetDevice << R"(",
+        << exportSettings.targetDevice << R"(",
             models_path: ")"
         << modelsPath << R"(",
-            plugin_config: ')"
-        << GraphExport::createPluginString(graphSettings.pluginConfig) << R"(',
-            enable_prefix_caching: )"
+            )";
+    if (pluginConfigOpt.has_value()) {
+        oss << R"(plugin_config: ')"
+        << pluginConfigOpt.value() << R"(',
+            )";
+    }
+    oss << R"(enable_prefix_caching: )"
         << graphSettings.enablePrefixCaching << R"(,
             cache_size: )"
         << graphSettings.cacheSize << R"(,)";
@@ -162,35 +198,31 @@ static Status createTextGenerationGraphTemplate(const std::string& directoryPath
         }
     }
     })";
-#if (MEDIAPIPE_DISABLE == 0)
-    ::mediapipe::CalculatorGraphConfig config;
-    bool success = ::google::protobuf::TextFormat::ParseFromString(oss.str(), &config);
-    SPDLOG_TRACE("Generated pbtxt: {}", oss.str());
-    if (!success) {
-        SPDLOG_ERROR("Created graph config file couldn't be parsed - check used task parameters values.");
-        return StatusCode::MEDIAPIPE_GRAPH_CONFIG_FILE_INVALID;
-    }
-#endif
-    // clang-format on
-    std::string fullPath = FileSystem::joinPath({directoryPath, "graph.pbtxt"});
-    return FileSystem::createFileOverwrite(fullPath, oss.str());
+    return createPbtxtFile(directoryPath, oss.str());
 }
 
-static Status createRerankGraphTemplate(const std::string& directoryPath, const RerankGraphSettingsImpl& graphSettings) {
+static Status createRerankGraphTemplate(const std::string& directoryPath, const HFSettingsImpl& hfSettings) {
+    if (!std::holds_alternative<RerankGraphSettingsImpl>(hfSettings.graphSettings)) {
+        SPDLOG_ERROR("Graph options not initialized for reranking.");
+        return StatusCode::INTERNAL_ERROR;
+    }
+    auto& graphSettings = std::get<RerankGraphSettingsImpl>(hfSettings.graphSettings);
+    auto& ggufFilename = hfSettings.ggufFilename;
+    auto& exportSettings = hfSettings.exportSettings;
+
     std::ostringstream oss;
     oss << OVMS_VERSION_GRAPH_LINE;
     // Windows path creation - graph parser needs forward slashes in paths
-    std::string graphOkPath = graphSettings.modelPath;
-    if (FileSystem::getOsSeparator() != "/") {
-        std::replace(graphOkPath.begin(), graphOkPath.end(), '\\', '/');
-    }
+    std::string modelsPath = constructModelsPath(exportSettings.modelPath, ggufFilename);
+    SPDLOG_TRACE("modelsPath: {}, directoryPath: {}, ggufFilename: {}", modelsPath, directoryPath, ggufFilename.value_or("std::nullopt"));
+    GET_PLUGIN_CONFIG_OPT_OR_FAIL_AND_RETURN(exportSettings);
     // clang-format off
     oss << R"(
 input_stream: "REQUEST_PAYLOAD:input"
 output_stream: "RESPONSE_PAYLOAD:output"
 node {
     name: ")"
-    << graphSettings.modelName << R"(",
+    << exportSettings.modelName << R"(",
     calculator: "RerankCalculatorOV"
     input_side_packet: "RERANK_NODE_RESOURCES:rerank_servable"
     input_stream: "REQUEST_PAYLOAD:input"
@@ -198,44 +230,42 @@ node {
     node_options: {
         [type.googleapis.com / mediapipe.RerankCalculatorOVOptions]: {
             models_path: ")"
-            << graphOkPath << R"(",
+            << modelsPath << R"(",
             max_allowed_chunks: )"
             << graphSettings.maxAllowedChunks << R"(,
-            target_device: ")" << graphSettings.targetDevice << R"(",
-            plugin_config: '{ "NUM_STREAMS": ")" << graphSettings.numStreams << R"("}',
+            target_device: ")" << exportSettings.targetDevice << R"(",
+            )";
+    if (pluginConfigOpt.has_value()) {
+        oss << R"(plugin_config: ')" << pluginConfigOpt.value()  << R"(',)";
+    }
+    oss << R"(
         }
     }
 })";
+    return createPbtxtFile(directoryPath, oss.str());
+}
 
-#if (MEDIAPIPE_DISABLE == 0)
-    ::mediapipe::CalculatorGraphConfig config;
-    bool success = ::google::protobuf::TextFormat::ParseFromString(oss.str(), &config);
-    if (!success) {
-        SPDLOG_ERROR("Created rerank graph config couldn't be parsed.");
-        return StatusCode::MEDIAPIPE_GRAPH_CONFIG_FILE_INVALID;
+static Status createEmbeddingsGraphTemplate(const std::string& directoryPath, const HFSettingsImpl& hfSettings) {
+    if (!std::holds_alternative<EmbeddingsGraphSettingsImpl>(hfSettings.graphSettings)) {
+        SPDLOG_ERROR("Graph options not initialized for embeddings.");
+        return StatusCode::INTERNAL_ERROR;
     }
-#endif
-    // clang-format on
-    std::string fullPath = FileSystem::joinPath({directoryPath, "graph.pbtxt"});
-    return FileSystem::createFileOverwrite(fullPath, oss.str());
-}
+    auto& graphSettings = std::get<EmbeddingsGraphSettingsImpl>(hfSettings.graphSettings);
+    auto& ggufFilename = hfSettings.ggufFilename;
+    auto& exportSettings = hfSettings.exportSettings;
 
-static Status createEmbeddingsGraphTemplate(const std::string& directoryPath, const EmbeddingsGraphSettingsImpl& graphSettings) {
     std::ostringstream oss;
     oss << OVMS_VERSION_GRAPH_LINE;
-    // Windows path creation - graph parser needs forward slashes in paths
-    std::string graphOkPath = graphSettings.modelPath;
-    if (FileSystem::getOsSeparator() != "/") {
-        std::replace(graphOkPath.begin(), graphOkPath.end(), '\\', '/');
-    }
-
+    std::string modelsPath = constructModelsPath(exportSettings.modelPath, ggufFilename);
+    SPDLOG_TRACE("modelsPath: {}, directoryPath: {}, ggufFilename: {}", modelsPath, directoryPath, ggufFilename.value_or("std::nullopt"));
+    GET_PLUGIN_CONFIG_OPT_OR_FAIL_AND_RETURN(exportSettings);
     // clang-format off
     oss << R"(
 input_stream: "REQUEST_PAYLOAD:input"
 output_stream: "RESPONSE_PAYLOAD:output"
 node {
     name: ")"
-    << graphSettings.modelName << R"(",
+    << exportSettings.modelName << R"(",
     calculator: "EmbeddingsCalculatorOV"
     input_side_packet: "EMBEDDINGS_NODE_RESOURCES:embeddings_servable"
     input_stream: "REQUEST_PAYLOAD:input"
@@ -243,33 +273,37 @@ node {
     node_options: {
         [type.googleapis.com / mediapipe.EmbeddingsCalculatorOVOptions]: {
             models_path: ")"
-            << graphOkPath << R"(",
+            << modelsPath << R"(",
             normalize_embeddings: )"
             << graphSettings.normalize << R"(,
             truncate: )"
             << graphSettings.truncate << R"(,
             pooling: )"
             << graphSettings.pooling << R"(,
-            target_device: ")" << graphSettings.targetDevice << R"(",
-            plugin_config: '{ "NUM_STREAMS": ")" << graphSettings.numStreams << R"("}',
-        }
+            target_device: ")" << exportSettings.targetDevice << R"(",
+            )";
+    if (pluginConfigOpt.has_value()) {
+        oss << R"(plugin_config: ')" << pluginConfigOpt.value() << R"(',
+        )";
+    }
+    oss << R"(}
     }
 })";
+    return createPbtxtFile(directoryPath, oss.str());
+}
 
-#if (MEDIAPIPE_DISABLE == 0)
-    ::mediapipe::CalculatorGraphConfig config;
-    bool success = ::google::protobuf::TextFormat::ParseFromString(oss.str(), &config);
-    if (!success) {
-        SPDLOG_ERROR("Created embeddings graph config couldn't be parsed.");
-        return StatusCode::MEDIAPIPE_GRAPH_CONFIG_FILE_INVALID;
+static Status createImageGenerationGraphTemplate(const std::string& directoryPath, const HFSettingsImpl& hfSettings) {
+    if (!std::holds_alternative<ImageGenerationGraphSettingsImpl>(hfSettings.graphSettings)) {
+        SPDLOG_ERROR("Graph options not initialized for image generation.");
+        return StatusCode::INTERNAL_ERROR;
     }
-#endif
-    // clang-format on
-    std::string fullPath = FileSystem::joinPath({directoryPath, "graph.pbtxt"});
-    return FileSystem::createFileOverwrite(fullPath, oss.str());
-}
+    auto& graphSettings = std::get<ImageGenerationGraphSettingsImpl>(hfSettings.graphSettings);
+    auto& exportSettings = hfSettings.exportSettings;
+    auto& ggufFilename = hfSettings.ggufFilename;
+    std::string modelsPath = constructModelsPath(exportSettings.modelPath, ggufFilename);
+    SPDLOG_TRACE("modelsPath: {}, directoryPath: {}, ggufFilename: {}", modelsPath, directoryPath, ggufFilename.value_or("std::nullopt"));
+    GET_PLUGIN_CONFIG_OPT_OR_FAIL_AND_RETURN(exportSettings);
 
-static Status createImageGenerationGraphTemplate(const std::string& directoryPath, const ImageGenerationGraphSettingsImpl& graphSettings) {
     std::ostringstream oss;
     oss << OVMS_VERSION_GRAPH_LINE;
     // clang-format off
@@ -285,12 +319,11 @@ node: {
   output_stream: "HTTP_RESPONSE_PAYLOAD:output"
   node_options: {
       [type.googleapis.com / mediapipe.ImageGenCalculatorOptions]: {
-          models_path: ")" << graphSettings.modelPath << R"("
-          device: ")" << graphSettings.targetDevice << R"(")";
-
-    if (graphSettings.pluginConfig.size()) {
+          models_path: ")" << modelsPath << R"("
+          device: ")" << exportSettings.targetDevice << R"(")";
+    if (pluginConfigOpt.has_value()) {
         oss << R"(
-          plugin_config: ')" << graphSettings.pluginConfig << R"(')";
+          plugin_config: ')" << pluginConfigOpt.value() << R"(')";
     }
 
     if (graphSettings.resolution.size()) {
@@ -338,10 +371,8 @@ node: {
   }
 }
 )";
-
     // clang-format on
-    std::string fullPath = FileSystem::joinPath({directoryPath, "graph.pbtxt"});
-    return FileSystem::createFileOverwrite(fullPath, oss.str());
+    return createPbtxtFile(directoryPath, oss.str());
 }
 
 GraphExport::GraphExport() {
@@ -360,77 +391,111 @@ Status GraphExport::createServableConfig(const std::string& directoryPath, const
     if (!hfSettings.ggufFilename.has_value()) {
         bool is_dir = false;
         status = LocalFileSystem::isDir(directoryPath, &is_dir);
-        if (!status.ok())
+        if (!status.ok()) {
+            SPDLOG_ERROR("Failed to check if graph path is directory: {}: {}", directoryPath, status.string());
             return status;
-
+        }
         if (!is_dir) {
             SPDLOG_ERROR("Graph path is not a directory: {}", directoryPath);
             return StatusCode::PATH_INVALID;
         }
     }
     if (hfSettings.task == TEXT_GENERATION_GRAPH) {
-        if (std::holds_alternative<TextGenGraphSettingsImpl>(hfSettings.graphSettings)) {
-            return createTextGenerationGraphTemplate(directoryPath, std::get<TextGenGraphSettingsImpl>(hfSettings.graphSettings), hfSettings.ggufFilename);
-        } else {
-            SPDLOG_ERROR("Graph options not initialized for text generation.");
-            return StatusCode::INTERNAL_ERROR;
-        }
+        return createTextGenerationGraphTemplate(directoryPath, hfSettings);
     } else if (hfSettings.task == EMBEDDINGS_GRAPH) {
-        if (std::holds_alternative<EmbeddingsGraphSettingsImpl>(hfSettings.graphSettings)) {
-            return createEmbeddingsGraphTemplate(directoryPath, std::get<EmbeddingsGraphSettingsImpl>(hfSettings.graphSettings));
-        } else {
-            SPDLOG_ERROR("Graph options not initialized for embeddings.");
-            return StatusCode::INTERNAL_ERROR;
-        }
+        return createEmbeddingsGraphTemplate(directoryPath, hfSettings);
     } else if (hfSettings.task == RERANK_GRAPH) {
-        if (std::holds_alternative<RerankGraphSettingsImpl>(hfSettings.graphSettings)) {
-            return createRerankGraphTemplate(directoryPath, std::get<RerankGraphSettingsImpl>(hfSettings.graphSettings));
-        } else {
-            SPDLOG_ERROR("Graph options not initialized for rerank.");
-            return StatusCode::INTERNAL_ERROR;
-        }
+        return createRerankGraphTemplate(directoryPath, hfSettings);
     } else if (hfSettings.task == IMAGE_GENERATION_GRAPH) {
-        if (std::holds_alternative<ImageGenerationGraphSettingsImpl>(hfSettings.graphSettings)) {
-            return createImageGenerationGraphTemplate(directoryPath, std::get<ImageGenerationGraphSettingsImpl>(hfSettings.graphSettings));
-        } else {
-            SPDLOG_ERROR("Graph options not initialized for image generation.");
-            return StatusCode::INTERNAL_ERROR;
-        }
+        return createImageGenerationGraphTemplate(directoryPath, hfSettings);
     } else if (hfSettings.task == UNKNOWN_GRAPH) {
         SPDLOG_ERROR("Graph options not initialized.");
         return StatusCode::INTERNAL_ERROR;
     }
+    SPDLOG_ERROR("Graph options not initialized.");
     return StatusCode::INTERNAL_ERROR;
 }
 
-std::string GraphExport::createPluginString(const PluginConfigSettingsImpl& pluginConfig) {
+std::variant<std::optional<std::string>, Status> GraphExport::createPluginString(const ExportSettings& exportSettings) {
+    bool configNotEmpty = false;
+    auto& stringPluginConfig = exportSettings.pluginConfig.manualString;
+    auto& pluginConfig = exportSettings.pluginConfig;
+    SPDLOG_TRACE("Creating plugin config string from export settings. Manual string: {}, pluginConfig.numStreams: {}, pluginConfig.kvCachePrecision: {}, pluginConfig.maxPromptLength: {}, pluginConfig.modelDistributionPolicy: {}, pluginConfig.cacheDir: {}", pluginConfig.manualString.value_or("std::nullopt"), pluginConfig.numStreams.value_or(0), pluginConfig.kvCachePrecision.value_or("std::nullopt"), pluginConfig.maxPromptLength.value_or(0), pluginConfig.modelDistributionPolicy.value_or("std::nullopt"), exportSettings.pluginConfig.cacheDir.value_or("std::nullopt"));
     rapidjson::Document d;
     d.SetObject();
-    bool configNotEmpty = false;
-
+    if (stringPluginConfig.has_value() && !stringPluginConfig.value().empty()) {
+        configNotEmpty = true;
+        if (d.Parse(stringPluginConfig.value().c_str()).HasParseError()) {
+            return StatusCode::PLUGIN_CONFIG_WRONG_FORMAT;
+        }
+    }
     if (pluginConfig.kvCachePrecision.has_value()) {
         rapidjson::Value name;
         name.SetString(pluginConfig.kvCachePrecision.value().c_str(), d.GetAllocator());
+        auto itr = d.FindMember("KV_CACHE_PRECISION");
+        if (itr != d.MemberEnd()) {
+            return Status(StatusCode::PLUGIN_CONFIG_CONFLICTING_PARAMETERS, "Doubled KV_CACHE_PRECISION parameter in plugin config.");
+        }
         d.AddMember("KV_CACHE_PRECISION", name, d.GetAllocator());
         configNotEmpty = true;
     }
-
     if (pluginConfig.maxPromptLength.has_value()) {
-        rapidjson::Value name;
-        name.SetString(std::to_string(pluginConfig.maxPromptLength.value()).c_str(), d.GetAllocator());
-        d.AddMember("MAX_PROMPT_LEN", name, d.GetAllocator());
+        rapidjson::Value value;
+        value.SetUint(pluginConfig.maxPromptLength.value());
+        auto itr = d.FindMember("MAX_PROMPT_LEN");
+        if (itr != d.MemberEnd()) {
+            return Status(StatusCode::PLUGIN_CONFIG_CONFLICTING_PARAMETERS, "Doubled MAX_PROMPT_LEN parameter in plugin config.");
+        }
+        d.AddMember("MAX_PROMPT_LEN", value, d.GetAllocator());
         configNotEmpty = true;
     }
-
     if (pluginConfig.modelDistributionPolicy.has_value()) {
-        rapidjson::Value name;
-        name.SetString(pluginConfig.modelDistributionPolicy.value().c_str(), d.GetAllocator());
-        d.AddMember("MODEL_DISTRIBUTION_POLICY", name, d.GetAllocator());
+        rapidjson::Value value;
+        value.SetString(pluginConfig.modelDistributionPolicy.value().c_str(), d.GetAllocator());
+        auto itr = d.FindMember("MODEL_DISTRIBUTION_POLICY");
+        if (itr != d.MemberEnd()) {
+            return Status(StatusCode::PLUGIN_CONFIG_CONFLICTING_PARAMETERS, "Doubled MODEL_DISTRIBUTION_POLICY parameter in plugin config.");
+        }
+        d.AddMember("MODEL_DISTRIBUTION_POLICY", value, d.GetAllocator());
+        configNotEmpty = true;
+    }
+    if (pluginConfig.numStreams.has_value()) {
+        rapidjson::Value value;
+        value.SetUint(pluginConfig.numStreams.value());
+        auto itr = d.FindMember("NUM_STREAMS");
+        if (itr != d.MemberEnd()) {
+            if (pluginConfig.numStreams.value() == 1) {
+                // ignoring double setting NUM_STREAMS is required for embeddings & rerank
+                // since 1 is default value coming from CLI
+                SPDLOG_DEBUG("Doubled NUM_STREAMS parameter in plugin config. Will ignore `--num_streams` CLI parameter.");
+            } else {
+                return Status(StatusCode::PLUGIN_CONFIG_CONFLICTING_PARAMETERS, "Doubled NUM_STREAMS parameter in plugin config.");
+            }
+        } else {
+            d.AddMember("NUM_STREAMS", value, d.GetAllocator());
+            configNotEmpty = true;
+        }
+    }
+    if (exportSettings.pluginConfig.cacheDir.has_value()) {
+        rapidjson::Value value;
+        value.SetString(exportSettings.pluginConfig.cacheDir.value().c_str(), d.GetAllocator());
+        auto itr = d.FindMember("CACHE_DIR");
+        if (itr != d.MemberEnd()) {
+            return Status(StatusCode::PLUGIN_CONFIG_CONFLICTING_PARAMETERS, "Doubled CACHE_DIR parameter in plugin config.");
+        }
+        d.AddMember("CACHE_DIR", value, d.GetAllocator());
+        configNotEmpty = true;
+    }
+    if (pluginConfig.useNpuPrefixCaching.has_value()) {
+        rapidjson::Value value;
+        value.SetBool(pluginConfig.useNpuPrefixCaching.value());
+        auto itr = d.FindMember("NPUW_LLM_ENABLE_PREFIX_CACHING");
+        if (itr != d.MemberEnd()) {
+            return Status(StatusCode::PLUGIN_CONFIG_CONFLICTING_PARAMETERS, "Doubled NPUW_LLM_ENABLE_PREFIX_CACHING parameter in plugin config.");
+        }
+        d.AddMember("NPUW_LLM_ENABLE_PREFIX_CACHING", value, d.GetAllocator());
         configNotEmpty = true;
     }
-
-    std::string pluginString = "{ }";
-
     if (configNotEmpty) {
         // Serialize the document to a JSON string
         rapidjson::StringBuffer buffer;
@@ -438,10 +503,10 @@ std::string GraphExport::createPluginString(const PluginConfigSettingsImpl& plug
         d.Accept(writer);
 
         // Output the JSON string
-        pluginString = buffer.GetString();
+        return buffer.GetString();
+    } else {
+        return std::nullopt;
     }
-
-    return pluginString;
 }
 
 }  // namespace ovms
diff --git a/src/graph_export/graph_export.hpp b/src/graph_export/graph_export.hpp
index 8b8ee46129..e6f9fdcbef 100644
--- a/src/graph_export/graph_export.hpp
+++ b/src/graph_export/graph_export.hpp
@@ -14,18 +14,21 @@
 // limitations under the License.
 //*****************************************************************************
 #pragma once
+#include <optional>
 #include <string>
+#include <variant>
 
 namespace ovms {
 struct PluginConfigSettingsImpl;
 struct HFSettingsImpl;
+struct ExportSettings;
 class Status;
 
 class GraphExport {
 public:
     GraphExport();
     Status createServableConfig(const std::string& directoryPath, const HFSettingsImpl& graphSettings);
-    static std::string createPluginString(const PluginConfigSettingsImpl& pluginConfig);
+    static std::variant<std::optional<std::string>, Status> createPluginString(const ExportSettings& exportSettings);
     static std::string getDraftModelDirectoryName(std::string draftModel);
     static std::string getDraftModelDirectoryPath(const std::string& directoryPath, const std::string& draftModel);
 };
diff --git a/src/graph_export/image_generation_graph_cli_parser.cpp b/src/graph_export/image_generation_graph_cli_parser.cpp
index 162e9ac310..ed0d1b91ef 100644
--- a/src/graph_export/image_generation_graph_cli_parser.cpp
+++ b/src/graph_export/image_generation_graph_cli_parser.cpp
@@ -24,14 +24,6 @@
 #include <utility>
 #include <vector>
 
-#pragma warning(push)
-#pragma warning(disable : 6313)
-#include <rapidjson/document.h>
-#include <rapidjson/istreamwrapper.h>
-#include <rapidjson/stringbuffer.h>
-#include <rapidjson/writer.h>
-#pragma warning(pop)
-
 #include "../capi_frontend/server_settings.hpp"
 #include "../ovms_exit_codes.hpp"
 #include "../status.hpp"
@@ -115,12 +107,11 @@ std::vector<std::string> ImageGenerationGraphCLIParser::parse(const std::vector<
 
 void ImageGenerationGraphCLIParser::prepare(ServerSettingsImpl& serverSettings, HFSettingsImpl& hfSettings, const std::string& modelName) {
     ImageGenerationGraphSettingsImpl imageGenerationGraphSettings = ImageGenerationGraphCLIParser::defaultGraphSettings();
-    imageGenerationGraphSettings.targetDevice = hfSettings.exportSettings.targetDevice;
     // Deduct model name
     if (modelName != "") {
-        imageGenerationGraphSettings.modelName = modelName;
+        hfSettings.exportSettings.modelName = modelName;
     } else {
-        imageGenerationGraphSettings.modelName = hfSettings.sourceModel;
+        hfSettings.exportSettings.modelName = hfSettings.sourceModel;
     }
     if (nullptr == result) {
         // Pull with default arguments - no arguments from user
@@ -159,25 +150,17 @@ void ImageGenerationGraphCLIParser::prepare(ServerSettingsImpl& serverSettings,
         }
 
         if (result->count("num_streams") || serverSettings.cacheDir != "") {
-            rapidjson::Document pluginConfigDoc;
-            pluginConfigDoc.SetObject();
-            rapidjson::Document::AllocatorType& allocator = pluginConfigDoc.GetAllocator();
             if (result->count("num_streams")) {
                 uint32_t numStreams = result->operator[]("num_streams").as<uint32_t>();
                 if (numStreams == 0) {
                     throw std::invalid_argument("num_streams must be greater than 0");
                 }
-                pluginConfigDoc.AddMember("NUM_STREAMS", numStreams, allocator);
+                hfSettings.exportSettings.pluginConfig.numStreams = result->operator[]("num_streams").as<uint32_t>();
             }
 
             if (!serverSettings.cacheDir.empty()) {
-                pluginConfigDoc.AddMember("CACHE_DIR", rapidjson::Value(serverSettings.cacheDir.c_str(), allocator), allocator);
+                hfSettings.exportSettings.pluginConfig.cacheDir = serverSettings.cacheDir;
             }
-
-            rapidjson::StringBuffer buffer;
-            rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
-            pluginConfigDoc.Accept(writer);
-            imageGenerationGraphSettings.pluginConfig = buffer.GetString();
         }
     }
 
diff --git a/src/graph_export/rerank_graph_cli_parser.cpp b/src/graph_export/rerank_graph_cli_parser.cpp
index 1038687107..80f1561a4a 100644
--- a/src/graph_export/rerank_graph_cli_parser.cpp
+++ b/src/graph_export/rerank_graph_cli_parser.cpp
@@ -73,12 +73,12 @@ std::vector<std::string> RerankGraphCLIParser::parse(const std::vector<std::stri
 
 void RerankGraphCLIParser::prepare(OvmsServerMode serverMode, HFSettingsImpl& hfSettings, const std::string& modelName) {
     ovms::RerankGraphSettingsImpl rerankGraphSettings = RerankGraphCLIParser::defaultGraphSettings();
-    rerankGraphSettings.targetDevice = hfSettings.exportSettings.targetDevice;
+    hfSettings.exportSettings.targetDevice = hfSettings.exportSettings.targetDevice;
     // Deduct model name
     if (modelName != "") {
-        rerankGraphSettings.modelName = modelName;
+        hfSettings.exportSettings.modelName = modelName;
     } else {
-        rerankGraphSettings.modelName = hfSettings.sourceModel;
+        hfSettings.exportSettings.modelName = hfSettings.sourceModel;
     }
     if (nullptr == result) {
         // Pull with default arguments - no arguments from user
@@ -86,7 +86,7 @@ void RerankGraphCLIParser::prepare(OvmsServerMode serverMode, HFSettingsImpl& hf
             throw std::logic_error("Tried to prepare server and model settings without graph parse result");
         }
     } else {
-        rerankGraphSettings.numStreams = result->operator[]("num_streams").as<uint32_t>();
+        hfSettings.exportSettings.pluginConfig.numStreams = result->operator[]("num_streams").as<uint32_t>();
         rerankGraphSettings.maxAllowedChunks = result->operator[]("max_allowed_chunks").as<uint64_t>();
     }
 
diff --git a/src/mediapipe_internal/mediapipegraphdefinition.cpp b/src/mediapipe_internal/mediapipegraphdefinition.cpp
index c40a8d1087..ca708451db 100644
--- a/src/mediapipe_internal/mediapipegraphdefinition.cpp
+++ b/src/mediapipe_internal/mediapipegraphdefinition.cpp
@@ -100,7 +100,7 @@ Status MediapipeGraphDefinition::validateForConfigLoadableness() {
         SPDLOG_LOGGER_ERROR(modelmanager_logger, "Trying to parse empty mediapipe graph definition: {} failed", this->getName(), this->chosenConfig);
         return StatusCode::MEDIAPIPE_GRAPH_CONFIG_FILE_INVALID;
     }
-
+    SPDLOG_TRACE("Will try to load pbtxt config: {}", this->chosenConfig);
     bool success = ::google::protobuf::TextFormat::ParseFromString(chosenConfig, &this->config);
     if (!success) {
         SPDLOG_LOGGER_ERROR(modelmanager_logger, "Trying to parse mediapipe graph definition: {} failed", this->getName(), this->chosenConfig);
diff --git a/src/status.cpp b/src/status.cpp
index 38640f52fc..3e3b9425b9 100644
--- a/src/status.cpp
+++ b/src/status.cpp
@@ -36,6 +36,7 @@ const std::unordered_map<StatusCode, std::string> Status::statusMessageMap = {
     {StatusCode::LAYOUT_WRONG_FORMAT, "The provided layout is in wrong format"},
     {StatusCode::DIM_WRONG_FORMAT, "The provided dimension is in wrong format"},
     {StatusCode::PLUGIN_CONFIG_WRONG_FORMAT, "Plugin config is in wrong format"},
+    {StatusCode::PLUGIN_CONFIG_CONFLICTING_PARAMETERS, "Tried to set the same key twice in plugin config"},
     {StatusCode::MODEL_VERSION_POLICY_WRONG_FORMAT, "Model version policy is in wrong format"},
     {StatusCode::MODEL_VERSION_POLICY_UNSUPPORTED_KEY, "Model version policy contains unsupported key"},
     {StatusCode::GRPC_CHANNEL_ARG_WRONG_FORMAT, "Grpc channel arguments passed in wrong format"},
diff --git a/src/status.hpp b/src/status.hpp
index d604e792d2..fee6300d99 100644
--- a/src/status.hpp
+++ b/src/status.hpp
@@ -40,6 +40,7 @@ enum class StatusCode {
     LAYOUT_WRONG_FORMAT,                  /*!< The provided layout param is in wrong format */
     DIM_WRONG_FORMAT,                     /*!< The provided dimension param is in wrong format */
     PLUGIN_CONFIG_WRONG_FORMAT,           /*!< Plugin config is in wrong format */
+    PLUGIN_CONFIG_CONFLICTING_PARAMETERS, /*!< Tried to set the same key twice in plugin config */
     MODEL_VERSION_POLICY_WRONG_FORMAT,    /*!< Model version policy is in wrong format */
     MODEL_VERSION_POLICY_UNSUPPORTED_KEY, /*!< Model version policy contains invalid key */
     GRPC_CHANNEL_ARG_WRONG_FORMAT,
diff --git a/src/test/graph_export_test.cpp b/src/test/graph_export_test.cpp
index 3a103f135c..8499a76d77 100644
--- a/src/test/graph_export_test.cpp
+++ b/src/test/graph_export_test.cpp
@@ -85,7 +85,7 @@ const std::string expectedFullPluginGraphContents = R"(
             max_num_seqs:256,
             device: "CPU",
             models_path: "./",
-            plugin_config: '{"KV_CACHE_PRECISION":"u8","MAX_PROMPT_LEN":"123","MODEL_DISTRIBUTION_POLICY":"PIPELINE_PARALLEL"}',
+            plugin_config: '{"KV_CACHE_PRECISION":"u8","MAX_PROMPT_LEN":123,"MODEL_DISTRIBUTION_POLICY":"PIPELINE_PARALLEL"}',
             enable_prefix_caching: true,
             cache_size: 10,
         }
@@ -123,7 +123,6 @@ const std::string expectedGraphContentsWithResponseParser = R"(
             max_num_seqs:256,
             device: "CPU",
             models_path: "./",
-            plugin_config: '{ }',
             enable_prefix_caching: true,
             cache_size: 10,
             reasoning_parser: "REASONING_PARSER",
@@ -164,7 +163,6 @@ const std::string expectedDefaultGraphContents = R"(
             max_num_seqs:256,
             device: "CPU",
             models_path: "./",
-            plugin_config: '{ }',
             enable_prefix_caching: true,
             cache_size: 10,
         }
@@ -202,7 +200,6 @@ const std::string expectedDraftAndFuseGraphContents = R"(
             max_num_seqs:256,
             device: "CPU",
             models_path: "./",
-            plugin_config: '{ }',
             enable_prefix_caching: true,
             cache_size: 10,
             dynamic_split_fuse: false,
@@ -243,7 +240,6 @@ const std::string expectedGGUFGraphContents = R"(
             max_num_seqs:256,
             device: "CPU",
             models_path: "./PRETTY_GOOD_GGUF_MODEL.gguf",
-            plugin_config: '{ }',
             enable_prefix_caching: true,
             cache_size: 10,
         }
@@ -281,7 +277,6 @@ const std::string expectedGGUFGraphContents2 = R"(
             max_num_seqs:256,
             device: "CPU",
             models_path: "./PRETTY_GOOD_GGUF_MODEL_Q8-00001-of-20000.gguf",
-            plugin_config: '{ }',
             enable_prefix_caching: true,
             cache_size: 10,
         }
@@ -313,7 +308,7 @@ node {
             models_path: "/some/path",
             max_allowed_chunks: 18,
             target_device: "GPU",
-            plugin_config: '{ "NUM_STREAMS": "2"}',
+            plugin_config: '{"NUM_STREAMS":2}',
         }
     }
 }
@@ -333,7 +328,7 @@ node {
             models_path: "./",
             max_allowed_chunks: 10000,
             target_device: "CPU",
-            plugin_config: '{ "NUM_STREAMS": "1"}',
+            plugin_config: '{"NUM_STREAMS":1}',
         }
     }
 }
@@ -355,7 +350,7 @@ node {
             truncate: true,
             pooling: LAST,
             target_device: "GPU",
-            plugin_config: '{ "NUM_STREAMS": "2"}',
+            plugin_config: '{"NUM_STREAMS":2}',
         }
     }
 }
@@ -377,7 +372,7 @@ node {
             truncate: false,
             pooling: CLS,
             target_device: "CPU",
-            plugin_config: '{ "NUM_STREAMS": "1"}',
+            plugin_config: '{"NUM_STREAMS":1}',
         }
     }
 }
@@ -460,8 +455,9 @@ TEST_F(GraphCreationTest, positiveDefaultWithVersionString) {
     ASSERT_EQ(expected, graphContents) << graphContents;
 }
 
-TEST_F(GraphCreationTest, positiveReranktWithVersionString) {
+TEST_F(GraphCreationTest, positiveRerankWithVersionString) {
     ovms::HFSettingsImpl hfSettings;
+    hfSettings.exportSettings.pluginConfig.numStreams = 1;
     hfSettings.task = ovms::RERANK_GRAPH;
     ovms::RerankGraphSettingsImpl rerankGraphSettings;
     hfSettings.graphSettings = std::move(rerankGraphSettings);
@@ -477,6 +473,7 @@ TEST_F(GraphCreationTest, positiveReranktWithVersionString) {
 
 TEST_F(GraphCreationTest, positiveEmbeddingsWithVersionString) {
     ovms::HFSettingsImpl hfSettings;
+    hfSettings.exportSettings.pluginConfig.numStreams = 1;
     hfSettings.task = ovms::EMBEDDINGS_GRAPH;
     ovms::EmbeddingsGraphSettingsImpl embeddingsGraphSettings;
     hfSettings.graphSettings = std::move(embeddingsGraphSettings);
@@ -566,12 +563,13 @@ TEST_F(GraphCreationTest, WillOverwriteExistingGraphPbtxtGGUF) {
 
 TEST_F(GraphCreationTest, rerankPositiveNonDefault) {
     ovms::HFSettingsImpl hfSettings;
+    auto& exportSettings = hfSettings.exportSettings;
     hfSettings.task = ovms::RERANK_GRAPH;
     ovms::RerankGraphSettingsImpl rerankGraphSettings;
-    rerankGraphSettings.targetDevice = "GPU";
-    rerankGraphSettings.modelName = "myModel";
-    rerankGraphSettings.modelPath = "/some/path";
-    rerankGraphSettings.numStreams = 2;
+    exportSettings.targetDevice = "GPU";
+    exportSettings.modelName = "myModel";
+    exportSettings.modelPath = "/some/path";
+    exportSettings.pluginConfig.numStreams = 2;
     rerankGraphSettings.maxAllowedChunks = 18;
     hfSettings.graphSettings = std::move(rerankGraphSettings);
 
@@ -586,6 +584,7 @@ TEST_F(GraphCreationTest, rerankPositiveNonDefault) {
 
 TEST_F(GraphCreationTest, rerankPositiveDefault) {
     ovms::HFSettingsImpl hfSettings;
+    hfSettings.exportSettings.pluginConfig.numStreams = 1;
     hfSettings.task = ovms::RERANK_GRAPH;
     ovms::RerankGraphSettingsImpl rerankGraphSettings;
     hfSettings.graphSettings = std::move(rerankGraphSettings);
@@ -601,11 +600,12 @@ TEST_F(GraphCreationTest, rerankPositiveDefault) {
 
 TEST_F(GraphCreationTest, rerankCreatedPbtxtInvalid) {
     ovms::HFSettingsImpl hfSettings;
+    auto& exportSettings = hfSettings.exportSettings;
     hfSettings.task = ovms::RERANK_GRAPH;
     ovms::RerankGraphSettingsImpl rerankGraphSettings;
-    rerankGraphSettings.targetDevice = "GPU";
-    rerankGraphSettings.modelName = "myModel\"";
-    rerankGraphSettings.numStreams = 2;
+    exportSettings.targetDevice = "GPU";
+    exportSettings.modelName = "myModel\"";
+    exportSettings.pluginConfig.numStreams = 2;
     hfSettings.graphSettings = std::move(rerankGraphSettings);
     std::string graphPath = ovms::FileSystem::appendSlash(this->directoryPath) + "graph.pbtxt";
     std::unique_ptr<ovms::GraphExport> graphExporter = std::make_unique<ovms::GraphExport>();
@@ -621,10 +621,10 @@ TEST_F(GraphCreationTest, embeddingsPositiveNonDefault) {
     ovms::HFSettingsImpl hfSettings;
     hfSettings.task = ovms::EMBEDDINGS_GRAPH;
     ovms::EmbeddingsGraphSettingsImpl embeddingsGraphSettings;
-    embeddingsGraphSettings.targetDevice = "GPU";
-    embeddingsGraphSettings.modelName = "myModel";
-    embeddingsGraphSettings.modelPath = "/model1/path";
-    embeddingsGraphSettings.numStreams = 2;
+    hfSettings.exportSettings.targetDevice = "GPU";
+    hfSettings.exportSettings.modelName = "myModel";
+    hfSettings.exportSettings.modelPath = "/model1/path";
+    hfSettings.exportSettings.pluginConfig.numStreams = 2;
     embeddingsGraphSettings.normalize = "false";
     embeddingsGraphSettings.truncate = "true";
     embeddingsGraphSettings.pooling = "LAST";
@@ -643,6 +643,7 @@ TEST_F(GraphCreationTest, embeddingsPositiveDefault) {
     hfSettings.task = ovms::EMBEDDINGS_GRAPH;
     ovms::EmbeddingsGraphSettingsImpl embeddingsGraphSettings;
     hfSettings.graphSettings = std::move(embeddingsGraphSettings);
+    hfSettings.exportSettings.pluginConfig.numStreams = 1;
     std::string graphPath = ovms::FileSystem::appendSlash(this->directoryPath) + "graph.pbtxt";
     std::unique_ptr<ovms::GraphExport> graphExporter = std::make_unique<ovms::GraphExport>();
     auto status = graphExporter->createServableConfig(this->directoryPath, hfSettings);
@@ -656,9 +657,9 @@ TEST_F(GraphCreationTest, embeddingsCreatedPbtxtInvalid) {
     ovms::HFSettingsImpl hfSettings;
     hfSettings.task = ovms::EMBEDDINGS_GRAPH;
     ovms::EmbeddingsGraphSettingsImpl embeddingsGraphSettings;
-    embeddingsGraphSettings.targetDevice = "GPU";
-    embeddingsGraphSettings.modelName = "myModel\"";
-    embeddingsGraphSettings.numStreams = 2;
+    hfSettings.exportSettings.targetDevice = "GPU";
+    hfSettings.exportSettings.modelName = "myModel\"";
+    hfSettings.exportSettings.pluginConfig.numStreams = 2;
     embeddingsGraphSettings.normalize = "true";
     embeddingsGraphSettings.pooling = "CLS";
     hfSettings.graphSettings = std::move(embeddingsGraphSettings);
@@ -670,13 +671,35 @@ TEST_F(GraphCreationTest, embeddingsCreatedPbtxtInvalid) {
     ASSERT_EQ(status, ovms::StatusCode::OK);
 #endif
 }
+TEST_F(GraphCreationTest, embeddingsDoubleSetNumStreams) {
+    // by default for embeddings we set numStreams=1 in CLI
+    // we should ignore double setting & check if equals the one from `--plugin_config`
+    // if both `--num_streams` is used to change from 1 and `--plugin_config` is used with
+    // num streams we trigger error
+    ovms::HFSettingsImpl hfSettings;
+    hfSettings.task = ovms::EMBEDDINGS_GRAPH;
+    ovms::EmbeddingsGraphSettingsImpl embeddingsGraphSettings;
+    hfSettings.exportSettings.targetDevice = "GPU";
+    hfSettings.exportSettings.modelName = "myModel";
+    hfSettings.exportSettings.pluginConfig.numStreams = 1;  // imitates default from CLI
+    hfSettings.exportSettings.pluginConfig.manualString = "{\"NUM_STREAMS\":1}";
+    embeddingsGraphSettings.normalize = "true";
+    embeddingsGraphSettings.pooling = "CLS";
+    hfSettings.graphSettings = std::move(embeddingsGraphSettings);
+    std::unique_ptr<ovms::GraphExport> graphExporter = std::make_unique<ovms::GraphExport>();
+    auto status = graphExporter->createServableConfig(this->directoryPath, hfSettings);
+    ASSERT_EQ(status, ovms::StatusCode::OK);
+    hfSettings.exportSettings.pluginConfig.numStreams = 2;  // non-default value - it should fail
+    status = graphExporter->createServableConfig(this->directoryPath, hfSettings);
+    ASSERT_EQ(status, ovms::StatusCode::PLUGIN_CONFIG_CONFLICTING_PARAMETERS) << status.string();
+}
 
 TEST_F(GraphCreationTest, positivePluginConfigAll) {
     ovms::HFSettingsImpl hfSettings;
     ovms::TextGenGraphSettingsImpl graphSettings;
-    graphSettings.pluginConfig.kvCachePrecision = "u8";
-    graphSettings.pluginConfig.maxPromptLength = 123;
-    graphSettings.pluginConfig.modelDistributionPolicy = "PIPELINE_PARALLEL";
+    hfSettings.exportSettings.pluginConfig.kvCachePrecision = "u8";
+    hfSettings.exportSettings.pluginConfig.maxPromptLength = 123;
+    hfSettings.exportSettings.pluginConfig.modelDistributionPolicy = "PIPELINE_PARALLEL";
 
     hfSettings.graphSettings = std::move(graphSettings);
 
@@ -710,7 +733,7 @@ TEST_F(GraphCreationTest, positiveWithParsersAndToolGuidedGeneration) {
 TEST_F(GraphCreationTest, positivePluginConfigOne) {
     ovms::HFSettingsImpl hfSettings;
     ovms::TextGenGraphSettingsImpl graphSettings;
-    graphSettings.pluginConfig.kvCachePrecision = "u8";
+    hfSettings.exportSettings.pluginConfig.kvCachePrecision = "u8";
     hfSettings.graphSettings = std::move(graphSettings);
 
     std::string graphPath = ovms::FileSystem::appendSlash(this->directoryPath) + "graph.pbtxt";
@@ -763,7 +786,7 @@ TEST_F(GraphCreationTest, negativeCreatedPbtxtInvalid) {
     ovms::HFSettingsImpl hfSettings;
     hfSettings.task = ovms::TEXT_GENERATION_GRAPH;
     ovms::TextGenGraphSettingsImpl graphSettings;
-    graphSettings.modelPath = "invalid\"";
+    hfSettings.exportSettings.modelPath = "invalid\"";
     hfSettings.graphSettings = std::move(graphSettings);
     std::string graphPath = ovms::FileSystem::appendSlash(this->directoryPath) + "graph.pbtxt";
     std::string subconfigPath = ovms::FileSystem::appendSlash(this->directoryPath) + "subconfig.json";
@@ -775,6 +798,19 @@ TEST_F(GraphCreationTest, negativeCreatedPbtxtInvalid) {
     ASSERT_EQ(status, ovms::StatusCode::OK);
 #endif
 }
+TEST_F(GraphCreationTest, positiveTextGeneration) {
+    ovms::HFSettingsImpl hfSettings;
+    hfSettings.task = ovms::TEXT_GENERATION_GRAPH;
+    ovms::TextGenGraphSettingsImpl graphSettings;
+    hfSettings.graphSettings = std::move(graphSettings);
+    hfSettings.exportSettings.targetDevice = "NPU";
+    hfSettings.exportSettings.pluginConfig.useNpuPrefixCaching = true;
+    std::string graphPath = ovms::FileSystem::appendSlash(this->directoryPath) + "graph.pbtxt";
+    std::string subconfigPath = ovms::FileSystem::appendSlash(this->directoryPath) + "subconfig.json";
+    std::unique_ptr<ovms::GraphExport> graphExporter = std::make_unique<ovms::GraphExport>();
+    auto status = graphExporter->createServableConfig(this->directoryPath, hfSettings);
+    ASSERT_EQ(status, ovms::StatusCode::OK);
+}
 
 TEST_F(GraphCreationTest, imageGenerationPositiveDefault) {
     ovms::HFSettingsImpl hfSettings;
@@ -794,8 +830,9 @@ TEST_F(GraphCreationTest, imageGenerationPositiveFull) {
     ovms::HFSettingsImpl hfSettings;
     hfSettings.task = ovms::IMAGE_GENERATION_GRAPH;
     ovms::ImageGenerationGraphSettingsImpl imageGenerationGraphSettings;
-    imageGenerationGraphSettings.pluginConfig = "{\"NUM_STREAMS\":14,\"CACHE_DIR\":\"/cache\"}";
-    imageGenerationGraphSettings.targetDevice = "GPU";
+    hfSettings.exportSettings.pluginConfig.numStreams = 14;
+    hfSettings.exportSettings.pluginConfig.cacheDir = "/cache";
+    hfSettings.exportSettings.targetDevice = "GPU";
     imageGenerationGraphSettings.defaultResolution = "300x400";
     imageGenerationGraphSettings.maxResolution = "3000x4000";
     imageGenerationGraphSettings.maxNumberImagesPerPrompt = 7;
@@ -810,3 +847,47 @@ TEST_F(GraphCreationTest, imageGenerationPositiveFull) {
     std::string graphContents = GetFileContents(graphPath);
     ASSERT_EQ(expectedImageGenerationGraphContents, removeVersionString(graphContents)) << graphContents;
 }
+TEST_F(GraphCreationTest, pluginConfigAsString) {
+    ovms::ExportSettings exportSettings;
+
+    auto& pluginConfig = exportSettings.pluginConfig;
+    std::optional<std::string> stringPluginConfig;
+    pluginConfig.kvCachePrecision = "u8";
+    pluginConfig.maxPromptLength = 256;
+    pluginConfig.modelDistributionPolicy = "TENSOR_PARALLEL";
+    pluginConfig.manualString = "{\"NUM_STREAMS\":4}";
+    auto res = ovms::GraphExport::createPluginString(exportSettings);
+    ASSERT_TRUE(std::holds_alternative<std::optional<std::string>>(res));
+    ASSERT_EQ(std::get<std::optional<std::string>>(res).value(),
+        "{\"NUM_STREAMS\":4,\"KV_CACHE_PRECISION\":\"u8\",\"MAX_PROMPT_LEN\":256,\"MODEL_DISTRIBUTION_POLICY\":\"TENSOR_PARALLEL\"}");
+}
+TEST_F(GraphCreationTest, pluginConfigNegative) {
+    using ovms::Status;
+    ovms::PluginConfigSettingsImpl pluginConfig;
+    ovms::ExportSettings exportSettings;
+    std::optional<std::string> stringPluginConfig;
+    pluginConfig.kvCachePrecision = "u8";
+    pluginConfig.maxPromptLength = 256;
+    pluginConfig.modelDistributionPolicy = "TENSOR_PARALLEL";
+    pluginConfig.cacheDir = "/cache";
+
+    exportSettings.pluginConfig = pluginConfig;
+    exportSettings.pluginConfig.manualString = "{\"KV_CACHE_PRECISION\":\"fp16\"}";
+    auto res = ovms::GraphExport::createPluginString(exportSettings);
+    ASSERT_TRUE(std::holds_alternative<ovms::Status>(res));
+    ASSERT_EQ(std::get<Status>(res), ovms::StatusCode::PLUGIN_CONFIG_CONFLICTING_PARAMETERS);
+
+    exportSettings.pluginConfig.manualString = "{\"MAX_PROMPT_LEN\":512}";
+    res = ovms::GraphExport::createPluginString(exportSettings);
+    ASSERT_TRUE(std::holds_alternative<ovms::Status>(res));
+    ASSERT_EQ(std::get<Status>(res), ovms::StatusCode::PLUGIN_CONFIG_CONFLICTING_PARAMETERS);
+
+    exportSettings.pluginConfig.manualString = "{\"CACHE_DIR\":\"/cache\"}";
+    res = ovms::GraphExport::createPluginString(exportSettings);
+    ASSERT_TRUE(std::holds_alternative<ovms::Status>(res));
+    ASSERT_EQ(std::get<Status>(res), ovms::StatusCode::PLUGIN_CONFIG_CONFLICTING_PARAMETERS);
+    exportSettings.pluginConfig.manualString = "{\"MODEL_DISTRIBUTION_POLICY\":\"PIPELINE_PARALLEL\"}";
+    res = ovms::GraphExport::createPluginString(exportSettings);
+    ASSERT_TRUE(std::holds_alternative<ovms::Status>(res));
+    ASSERT_EQ(std::get<Status>(res), ovms::StatusCode::PLUGIN_CONFIG_CONFLICTING_PARAMETERS);
+}
diff --git a/src/test/llm/llmnode_test.cpp b/src/test/llm/llmnode_test.cpp
index d3d673d880..e96feff657 100644
--- a/src/test/llm/llmnode_test.cpp
+++ b/src/test/llm/llmnode_test.cpp
@@ -3573,7 +3573,8 @@ TEST_F(LLMConfigHttpTest, LLMNodeNonExistantModelsPath) {
     ovms::MediapipeGraphConfig mgc{"mediaDummy", "", ""};
     DummyMediapipeGraphDefinition mediapipeDummy("mediaDummy", mgc, testPbtxt, nullptr);
     mediapipeDummy.inputConfig = testPbtxt;
-    ASSERT_EQ(mediapipeDummy.validate(manager), StatusCode::LLM_NODE_DIRECTORY_DOES_NOT_EXIST);
+    auto status = mediapipeDummy.validate(manager);
+    ASSERT_EQ(status, StatusCode::LLM_NODE_DIRECTORY_DOES_NOT_EXIST) << status.string();
 }
 
 TEST_F(LLMConfigHttpTest, LLMNodeBadWorkspacePathEmpty) {
@@ -3615,7 +3616,8 @@ TEST_F(LLMConfigHttpTest, LLMNodeBadWorkspacePathEmpty) {
     ovms::MediapipeGraphConfig mgc{"mediaDummy", "", ""};
     DummyMediapipeGraphDefinition mediapipeDummy("mediaDummy", mgc, testPbtxt, nullptr);
     mediapipeDummy.inputConfig = testPbtxt;
-    ASSERT_EQ(mediapipeDummy.validate(manager), StatusCode::LLM_NODE_DIRECTORY_DOES_NOT_EXIST);
+    auto status = mediapipeDummy.validate(manager);
+    ASSERT_EQ(status, StatusCode::LLM_NODE_DIRECTORY_DOES_NOT_EXIST) << status.string();
 }
 
 TEST_F(LLMConfigHttpTest, LLMNodeWorkspacePathToFileNotDir) {
@@ -3657,7 +3659,8 @@ TEST_F(LLMConfigHttpTest, LLMNodeWorkspacePathToFileNotDir) {
     ovms::MediapipeGraphConfig mgc{"mediaDummy", "", ""};
     DummyMediapipeGraphDefinition mediapipeDummy("mediaDummy", mgc, testPbtxt, nullptr);
     mediapipeDummy.inputConfig = testPbtxt;
-    ASSERT_EQ(mediapipeDummy.validate(manager), StatusCode::LLM_NODE_PATH_DOES_NOT_EXIST_AND_NOT_GGUFFILE);
+    auto status = mediapipeDummy.validate(manager);
+    ASSERT_EQ(status, StatusCode::LLM_NODE_PATH_DOES_NOT_EXIST_AND_NOT_GGUFFILE) << status.string();
 }
 
 class LLMConfigHttpTestParameterized : public ::testing::Test, public ::testing::WithParamInterface<std::tuple<std::string, ovms::StatusCode>> {
@@ -3710,7 +3713,8 @@ TEST_P(LLMConfigHttpTestParameterized, LLMNodeResourceInitFailed) {
     ovms::MediapipeGraphConfig mgc{"mediaDummy", "", ""};
     DummyMediapipeGraphDefinition mediapipeDummy("mediaDummy", mgc, testPbtxt, nullptr);
     mediapipeDummy.inputConfig = testPbtxt;
-    ASSERT_EQ(mediapipeDummy.validate(manager), expectedStatusCode);
+    auto status = mediapipeDummy.validate(manager);
+    ASSERT_EQ(status, expectedStatusCode);
     ASSERT_EQ(mediapipeDummy.getGenAiServable("llmNode"), nullptr);
 }
 
diff --git a/src/test/ovmsconfig_test.cpp b/src/test/ovmsconfig_test.cpp
index de98c1815c..192f64cf43 100644
--- a/src/test/ovmsconfig_test.cpp
+++ b/src/test/ovmsconfig_test.cpp
@@ -1005,7 +1005,7 @@ TEST_F(OvmsConfigDeathTest, simultaneousPullAndRemove) {
     EXPECT_EXIT(ovms::Config::instance().parse(arg_count, n_argv), ::testing::ExitedWithCode(OVMS_EX_USAGE), "--remove_from_config cannot be used with --pull or --task") << createCmd(arg_count, n_argv) << buffer.str();
 }
 
-TEST(OvmsGraphConfigTest, positiveAllChanged) {
+TEST(OvmsGraphConfigTest, positiveAllChangedTextGeneration) {
     std::string modelName = "OpenVINO/Phi-3-mini-FastDraft-50M-int8-ov";
     std::string downloadPath = "test/repository";
     char* n_argv[] = {
@@ -1022,9 +1022,9 @@ TEST(OvmsGraphConfigTest, positiveAllChanged) {
         (char*)"--max_num_seqs",
         (char*)"128",
         (char*)"--target_device",
-        (char*)"GPU",
+        (char*)"NPU",
         (char*)"--enable_prefix_caching",
-        (char*)"false",
+        (char*)"true",
         (char*)"--cache_size",
         (char*)"20",
         (char*)"--max_num_batched_tokens",
@@ -1038,23 +1038,31 @@ TEST(OvmsGraphConfigTest, positiveAllChanged) {
         (char*)"--tool_parser",
         (char*)"toolParserName",
         (char*)"--enable_tool_guided_generation",
-        (char*)"true"};
-
-    int arg_count = 30;
+        (char*)"true",
+        (char*)"--model_distribution_policy",
+        (char*)"TENSOR_PARALLEL",
+        (char*)"--max_prompt_len",
+        (char*)"2048",
+        (char*)"--kv_cache_precision",
+        (char*)"u8"};
+
+    int arg_count = 36;
     ConstructorEnabledConfig config;
     config.parse(arg_count, n_argv);
 
     auto& hfSettings = config.getServerSettings().hfSettings;
+    auto& exportSettings = hfSettings.exportSettings;
     ASSERT_EQ(hfSettings.sourceModel, modelName);
     ASSERT_EQ(hfSettings.downloadPath, downloadPath);
     ASSERT_EQ(config.getServerSettings().serverMode, ovms::HF_PULL_MODE);
     ovms::TextGenGraphSettingsImpl graphSettings = std::get<ovms::TextGenGraphSettingsImpl>(hfSettings.graphSettings);
     ASSERT_EQ(graphSettings.pipelineType.value(), "VLM");
-    ASSERT_EQ(graphSettings.modelPath, "./");
+    ASSERT_EQ(exportSettings.modelPath, "./");
     ASSERT_EQ(graphSettings.maxNumSeqs, 128);
-    ASSERT_EQ(graphSettings.targetDevice, "GPU");
-    ASSERT_EQ(graphSettings.pluginConfig.kvCachePrecision.has_value(), false);
-    ASSERT_EQ(graphSettings.enablePrefixCaching, "false");
+    ASSERT_EQ(exportSettings.targetDevice, "NPU");
+    ASSERT_EQ(exportSettings.pluginConfig.kvCachePrecision.has_value(), true);
+    ASSERT_EQ(exportSettings.pluginConfig.kvCachePrecision.value(), "u8");
+    ASSERT_EQ(graphSettings.enablePrefixCaching, "true");
     ASSERT_EQ(graphSettings.cacheSize, 20);
     ASSERT_EQ(graphSettings.maxNumBatchedTokens.value(), 16);
     ASSERT_EQ(graphSettings.dynamicSplitFuse, "true");
@@ -1062,9 +1070,15 @@ TEST(OvmsGraphConfigTest, positiveAllChanged) {
     ASSERT_EQ(graphSettings.reasoningParser.value(), "reasoningParserName");
     ASSERT_EQ(graphSettings.toolParser.value(), "toolParserName");
     ASSERT_EQ(graphSettings.enableToolGuidedGeneration, "true");
+    ASSERT_EQ(exportSettings.pluginConfig.modelDistributionPolicy.has_value(), true);
+    ASSERT_EQ(exportSettings.pluginConfig.modelDistributionPolicy.value(), "TENSOR_PARALLEL");
+    ASSERT_EQ(exportSettings.pluginConfig.maxPromptLength.has_value(), true);
+    ASSERT_EQ(exportSettings.pluginConfig.maxPromptLength.value(), 2048);
+    ASSERT_EQ(exportSettings.pluginConfig.kvCachePrecision.value(), "u8");
+    ASSERT_EQ(exportSettings.pluginConfig.useNpuPrefixCaching.value(), true);
 }
 
-TEST(OvmsGraphConfigTest, positiveSomeChanged) {
+TEST(OvmsGraphConfigTest, positiveSomeChangedTextGeneration) {
     std::string modelName = "OpenVINO/Phi-3-mini-FastDraft-50M-int8-ov";
     std::string downloadPath = "test/repository";
     char* n_argv[] = {
@@ -1090,17 +1104,18 @@ TEST(OvmsGraphConfigTest, positiveSomeChanged) {
     config.parse(arg_count, n_argv);
 
     auto& hfSettings = config.getServerSettings().hfSettings;
+    auto& exportSettings = hfSettings.exportSettings;
     ASSERT_EQ(hfSettings.sourceModel, modelName);
     ASSERT_EQ(hfSettings.downloadPath, downloadPath);
     ASSERT_EQ(hfSettings.overwriteModels, true);
     ASSERT_EQ(config.getServerSettings().serverMode, ovms::HF_PULL_MODE);
     ovms::TextGenGraphSettingsImpl graphSettings = std::get<ovms::TextGenGraphSettingsImpl>(hfSettings.graphSettings);
-    ASSERT_EQ(graphSettings.modelName, modelName);
+    ASSERT_EQ(exportSettings.modelName, modelName);
     ASSERT_EQ(graphSettings.pipelineType.value(), "VLM");
-    ASSERT_EQ(graphSettings.modelPath, "./");
+    ASSERT_EQ(exportSettings.modelPath, "./");
     ASSERT_EQ(graphSettings.maxNumSeqs, 128);
-    ASSERT_EQ(graphSettings.targetDevice, "NPU");
-    ASSERT_EQ(graphSettings.pluginConfig.kvCachePrecision.has_value(), false);
+    ASSERT_EQ(exportSettings.targetDevice, "NPU");
+    ASSERT_EQ(exportSettings.pluginConfig.kvCachePrecision.has_value(), false);
     ASSERT_EQ(graphSettings.enablePrefixCaching, "true");
     ASSERT_EQ(graphSettings.cacheSize, 10);
     ASSERT_EQ(graphSettings.maxNumBatchedTokens.has_value(), false);
@@ -1126,16 +1141,17 @@ TEST(OvmsGraphConfigTest, positiveTaskTextGen) {
     ConstructorEnabledConfig config;
     config.parse(arg_count, n_argv);
     auto& hfSettings = config.getServerSettings().hfSettings;
+    auto& exportSettings = hfSettings.exportSettings;
     ASSERT_EQ(hfSettings.sourceModel, modelName);
     ASSERT_EQ(hfSettings.downloadPath, downloadPath);
     ASSERT_EQ(config.getServerSettings().serverMode, ovms::HF_PULL_MODE);
     ovms::TextGenGraphSettingsImpl graphSettings = std::get<ovms::TextGenGraphSettingsImpl>(hfSettings.graphSettings);
-    ASSERT_EQ(graphSettings.modelName, modelName);
+    ASSERT_EQ(exportSettings.modelName, modelName);
     ASSERT_EQ(graphSettings.pipelineType.has_value(), false);
-    ASSERT_EQ(graphSettings.modelPath, "./");
+    ASSERT_EQ(exportSettings.modelPath, "./");
     ASSERT_EQ(graphSettings.maxNumSeqs, 256);
-    ASSERT_EQ(graphSettings.targetDevice, "CPU");
-    ASSERT_EQ(graphSettings.pluginConfig.kvCachePrecision.has_value(), false);
+    ASSERT_EQ(exportSettings.targetDevice, "CPU");
+    ASSERT_EQ(exportSettings.pluginConfig.kvCachePrecision.has_value(), false);
     ASSERT_EQ(graphSettings.enablePrefixCaching, "true");
     ASSERT_EQ(graphSettings.cacheSize, 10);
     ASSERT_EQ(graphSettings.maxNumBatchedTokens.has_value(), false);
@@ -1218,20 +1234,29 @@ TEST(OvmsExportHfSettingsTest, allChanged) {
         (char*)"NPU",
         (char*)"--task",
         (char*)"text_generation",
-    };
+        (char*)"--plugin_config",
+        (char*)"{\"NUM_STREAMS\":\"2\"}",
+        (char*)"--cache_dir",
+        (char*)"/tmp/cache_dir_with_gold"};
 
-    int arg_count = 15;
+    int arg_count = 19;
     ConstructorEnabledConfig config;
     config.parse(arg_count, n_argv);
 
-    auto& hfSettings = config.getServerSettings().hfSettings;
+    auto& serverSettings = config.getServerSettings();
+    auto& hfSettings = serverSettings.hfSettings;
+    auto& exportSettings = hfSettings.exportSettings;
     ASSERT_EQ(hfSettings.sourceModel, modelName);
     ASSERT_EQ(hfSettings.downloadPath, downloadPath);
     ASSERT_EQ(hfSettings.overwriteModels, true);
-    ASSERT_EQ(hfSettings.exportSettings.precision, "fp64");
-    ASSERT_EQ(hfSettings.exportSettings.targetDevice, "NPU");
+    ASSERT_EQ(exportSettings.precision, "fp64");
+    ASSERT_EQ(exportSettings.targetDevice, "NPU");
     ASSERT_EQ(hfSettings.downloadType, ovms::OPTIMUM_CLI_DOWNLOAD);
-    ASSERT_EQ(hfSettings.exportSettings.extraQuantizationParams.value(), "--sym --ratio 1.0");
+    ASSERT_EQ(exportSettings.extraQuantizationParams.value(), "--sym --ratio 1.0");
+    ASSERT_EQ(exportSettings.pluginConfig.cacheDir.value(), "/tmp/cache_dir_with_gold");
+    // here we expect only what is passed by user not all plugin parameters passed to genai
+    ASSERT_EQ(hfSettings.exportSettings.pluginConfig.manualString.value(), "{\"NUM_STREAMS\":\"2\"}");
+    ASSERT_EQ(serverSettings.cacheDir, "/tmp/cache_dir_with_gold");
     ASSERT_EQ(config.getServerSettings().serverMode, ovms::HF_PULL_MODE);
 }
 
@@ -1255,21 +1280,27 @@ TEST(OvmsExportHfSettingsTest, allChangedPullAndStart) {
         (char*)"NPU",
         (char*)"--task",
         (char*)"text_generation",
-    };
+        (char*)"--plugin_config",
+        (char*)"{\"NUM_STREAMS\":\"2\"}",
+        (char*)"--cache_dir",
+        (char*)"/tmp/cache_dir_with_gold"};
 
-    int arg_count = 16;
+    int arg_count = 20;
     ConstructorEnabledConfig config;
     config.parse(arg_count, n_argv);
 
     auto& hfSettings = config.getServerSettings().hfSettings;
+    auto& exportSettings = hfSettings.exportSettings;
     ASSERT_EQ(hfSettings.sourceModel, modelName);
     ASSERT_EQ(hfSettings.downloadPath, downloadPath);
     ASSERT_EQ(hfSettings.overwriteModels, true);
-    ASSERT_EQ(hfSettings.exportSettings.precision, "fp64");
-    ASSERT_EQ(hfSettings.exportSettings.targetDevice, "NPU");
+    ASSERT_EQ(exportSettings.precision, "fp64");
+    ASSERT_EQ(exportSettings.targetDevice, "NPU");
     ASSERT_EQ(hfSettings.downloadType, ovms::OPTIMUM_CLI_DOWNLOAD);
-    ASSERT_EQ(hfSettings.exportSettings.extraQuantizationParams.value(), "--sym --ratio 1.0");
+    ASSERT_EQ(exportSettings.extraQuantizationParams.value(), "--sym --ratio 1.0");
     ASSERT_EQ(config.getServerSettings().serverMode, ovms::HF_PULL_AND_START_MODE);
+    ASSERT_EQ(exportSettings.pluginConfig.manualString.value(), "{\"NUM_STREAMS\":\"2\"}");
+    ASSERT_EQ(exportSettings.pluginConfig.cacheDir.value(), "/tmp/cache_dir_with_gold");
 }
 
 TEST(OvmsGraphConfigTest, positiveDefault) {
@@ -1290,16 +1321,17 @@ TEST(OvmsGraphConfigTest, positiveDefault) {
     ConstructorEnabledConfig config;
     config.parse(arg_count, n_argv);
     auto& hfSettings = config.getServerSettings().hfSettings;
+    auto& exportSettings = hfSettings.exportSettings;
     ASSERT_EQ(hfSettings.sourceModel, modelName);
     ASSERT_EQ(hfSettings.downloadPath, downloadPath);
     ASSERT_EQ(config.getServerSettings().serverMode, ovms::HF_PULL_MODE);
     ASSERT_EQ(hfSettings.task, ovms::TEXT_GENERATION_GRAPH);
     ovms::TextGenGraphSettingsImpl graphSettings = std::get<ovms::TextGenGraphSettingsImpl>(hfSettings.graphSettings);
     ASSERT_EQ(graphSettings.pipelineType.has_value(), false);
-    ASSERT_EQ(graphSettings.modelPath, "./");
+    ASSERT_EQ(exportSettings.modelPath, "./");
     ASSERT_EQ(graphSettings.maxNumSeqs, 256);
-    ASSERT_EQ(graphSettings.targetDevice, "CPU");
-    ASSERT_EQ(graphSettings.pluginConfig.kvCachePrecision.has_value(), false);
+    ASSERT_EQ(exportSettings.targetDevice, "CPU");
+    ASSERT_EQ(exportSettings.pluginConfig.kvCachePrecision.has_value(), false);
     ASSERT_EQ(graphSettings.enablePrefixCaching, "true");
     ASSERT_EQ(graphSettings.cacheSize, 10);
     ASSERT_EQ(graphSettings.maxNumBatchedTokens.has_value(), false);
@@ -1328,6 +1360,7 @@ TEST(OvmsGraphConfigTest, positiveDefaultStart) {
     ConstructorEnabledConfig config;
     config.parse(arg_count, n_argv);
     auto& hfSettings = config.getServerSettings().hfSettings;
+    auto& exportSettings = hfSettings.exportSettings;
     ASSERT_EQ(hfSettings.sourceModel, modelName);
     ASSERT_EQ(hfSettings.downloadPath, downloadPath);
     ASSERT_EQ(hfSettings.overwriteModels, false);
@@ -1335,10 +1368,10 @@ TEST(OvmsGraphConfigTest, positiveDefaultStart) {
     ASSERT_EQ(hfSettings.task, ovms::TEXT_GENERATION_GRAPH);
     ovms::TextGenGraphSettingsImpl graphSettings = std::get<ovms::TextGenGraphSettingsImpl>(hfSettings.graphSettings);
     ASSERT_EQ(graphSettings.pipelineType.has_value(), false);
-    ASSERT_EQ(graphSettings.modelPath, "./");
+    ASSERT_EQ(exportSettings.modelPath, "./");
     ASSERT_EQ(graphSettings.maxNumSeqs, 256);
-    ASSERT_EQ(graphSettings.targetDevice, "CPU");
-    ASSERT_EQ(graphSettings.pluginConfig.kvCachePrecision.has_value(), false);
+    ASSERT_EQ(exportSettings.targetDevice, "CPU");
+    ASSERT_EQ(exportSettings.pluginConfig.kvCachePrecision.has_value(), false);
     ASSERT_EQ(graphSettings.enablePrefixCaching, "true");
     ASSERT_EQ(graphSettings.cacheSize, 10);
     ASSERT_EQ(graphSettings.maxNumBatchedTokens.has_value(), false);
@@ -1369,7 +1402,7 @@ TEST(OvmsGraphConfigTest, positiveTargetDeviceHetero) {
     config.parse(arg_count, n_argv);
     auto& hfSettings = config.getServerSettings().hfSettings;
     ovms::TextGenGraphSettingsImpl graphSettings = std::get<ovms::TextGenGraphSettingsImpl>(hfSettings.graphSettings);
-    ASSERT_EQ(graphSettings.targetDevice, "HETERO");
+    ASSERT_EQ(hfSettings.exportSettings.targetDevice, "HETERO");
 }
 
 TEST(OvmsGraphConfigTest, positiveTargetDeviceSpecificGPU) {
@@ -1393,7 +1426,7 @@ TEST(OvmsGraphConfigTest, positiveTargetDeviceSpecificGPU) {
     config.parse(arg_count, n_argv);
     auto& hfSettings = config.getServerSettings().hfSettings;
     ovms::TextGenGraphSettingsImpl graphSettings = std::get<ovms::TextGenGraphSettingsImpl>(hfSettings.graphSettings);
-    ASSERT_EQ(graphSettings.targetDevice, "GPU.1");
+    ASSERT_EQ(hfSettings.exportSettings.targetDevice, "GPU.1");
 }
 
 TEST(OvmsGraphConfigTest, negativePipelineType) {
@@ -1497,23 +1530,29 @@ TEST(OvmsGraphConfigTest, positiveAllChangedRerank) {
         (char*)"2",
         (char*)"--model_name",
         (char*)servingName.c_str(),
-    };
+        (char*)"--plugin_config",
+        (char*)"{\"SOME_KEY\":\"SOME_VALUE\"}",
+        (char*)"--cache_dir",
+        (char*)"/tmp/cache_dir_with_emptiness"};
 
-    int arg_count = 16;
+    int arg_count = 20;
     ConstructorEnabledConfig config;
     config.parse(arg_count, n_argv);
 
     auto& hfSettings = config.getServerSettings().hfSettings;
+    auto& exportSettings = hfSettings.exportSettings;
     ASSERT_EQ(hfSettings.sourceModel, modelName);
     ASSERT_EQ(hfSettings.downloadPath, downloadPath);
     ASSERT_EQ(config.getServerSettings().serverMode, ovms::HF_PULL_MODE);
     ASSERT_EQ(hfSettings.task, ovms::RERANK_GRAPH);
     ovms::RerankGraphSettingsImpl rerankGraphSettings = std::get<ovms::RerankGraphSettingsImpl>(hfSettings.graphSettings);
     ASSERT_EQ(rerankGraphSettings.maxAllowedChunks, 1002);
-    ASSERT_EQ(rerankGraphSettings.numStreams, 2);
-    ASSERT_EQ(rerankGraphSettings.targetDevice, "GPU");
-    ASSERT_EQ(rerankGraphSettings.modelName, servingName);
-    ASSERT_EQ(rerankGraphSettings.modelPath, "./");
+    ASSERT_EQ(exportSettings.pluginConfig.numStreams, 2);
+    ASSERT_EQ(exportSettings.targetDevice, "GPU");
+    ASSERT_EQ(exportSettings.modelName, servingName);
+    ASSERT_EQ(exportSettings.modelPath, "./");
+    ASSERT_EQ(hfSettings.exportSettings.pluginConfig.cacheDir.value(), "/tmp/cache_dir_with_emptiness");
+    ASSERT_EQ(hfSettings.exportSettings.pluginConfig.manualString.value(), "{\"SOME_KEY\":\"SOME_VALUE\"}");
 }
 
 TEST(OvmsGraphConfigTest, positiveAllChangedRerankStart) {
@@ -1545,16 +1584,17 @@ TEST(OvmsGraphConfigTest, positiveAllChangedRerankStart) {
     config.parse(arg_count, n_argv);
 
     auto& hfSettings = config.getServerSettings().hfSettings;
+    auto& exportSettings = hfSettings.exportSettings;
     ASSERT_EQ(hfSettings.sourceModel, modelName);
     ASSERT_EQ(hfSettings.downloadPath, downloadPath);
     ASSERT_EQ(config.getServerSettings().serverMode, ovms::HF_PULL_AND_START_MODE);
     ASSERT_EQ(hfSettings.task, ovms::RERANK_GRAPH);
     ovms::RerankGraphSettingsImpl rerankGraphSettings = std::get<ovms::RerankGraphSettingsImpl>(hfSettings.graphSettings);
     ASSERT_EQ(rerankGraphSettings.maxAllowedChunks, 1002);
-    ASSERT_EQ(rerankGraphSettings.numStreams, 2);
-    ASSERT_EQ(rerankGraphSettings.targetDevice, "GPU");
-    ASSERT_EQ(rerankGraphSettings.modelName, servingName);
-    ASSERT_EQ(rerankGraphSettings.modelPath, "./");
+    ASSERT_EQ(exportSettings.pluginConfig.numStreams, 2);
+    ASSERT_EQ(exportSettings.targetDevice, "GPU");
+    ASSERT_EQ(exportSettings.modelName, servingName);
+    ASSERT_EQ(exportSettings.modelPath, "./");
 }
 
 TEST(OvmsGraphConfigTest, positiveDefaultRerank) {
@@ -1577,16 +1617,17 @@ TEST(OvmsGraphConfigTest, positiveDefaultRerank) {
     config.parse(arg_count, n_argv);
 
     auto& hfSettings = config.getServerSettings().hfSettings;
+    auto& exportSettings = hfSettings.exportSettings;
     ASSERT_EQ(hfSettings.sourceModel, modelName);
     ASSERT_EQ(hfSettings.downloadPath, downloadPath);
     ASSERT_EQ(config.getServerSettings().serverMode, ovms::HF_PULL_MODE);
     ASSERT_EQ(hfSettings.task, ovms::RERANK_GRAPH);
     ovms::RerankGraphSettingsImpl rerankGraphSettings = std::get<ovms::RerankGraphSettingsImpl>(hfSettings.graphSettings);
     ASSERT_EQ(rerankGraphSettings.maxAllowedChunks, 10000);
-    ASSERT_EQ(rerankGraphSettings.numStreams, 1);
-    ASSERT_EQ(rerankGraphSettings.targetDevice, "CPU");
-    ASSERT_EQ(rerankGraphSettings.modelName, modelName);
-    ASSERT_EQ(rerankGraphSettings.modelPath, "./");
+    ASSERT_EQ(exportSettings.pluginConfig.numStreams, 1);
+    ASSERT_EQ(exportSettings.targetDevice, "CPU");
+    ASSERT_EQ(exportSettings.modelName, modelName);
+    ASSERT_EQ(exportSettings.modelPath, "./");
 }
 
 TEST(OvmsGraphConfigTest, positiveSomeChangedRerank) {
@@ -1615,16 +1656,17 @@ TEST(OvmsGraphConfigTest, positiveSomeChangedRerank) {
     config.parse(arg_count, n_argv);
 
     auto& hfSettings = config.getServerSettings().hfSettings;
+    auto& exportSettings = hfSettings.exportSettings;
     ASSERT_EQ(hfSettings.sourceModel, modelName);
     ASSERT_EQ(hfSettings.downloadPath, downloadPath);
     ASSERT_EQ(config.getServerSettings().serverMode, ovms::HF_PULL_MODE);
     ASSERT_EQ(hfSettings.task, ovms::RERANK_GRAPH);
     ovms::RerankGraphSettingsImpl rerankGraphSettings = std::get<ovms::RerankGraphSettingsImpl>(hfSettings.graphSettings);
     ASSERT_EQ(rerankGraphSettings.maxAllowedChunks, 2);
-    ASSERT_EQ(rerankGraphSettings.numStreams, 1);
-    ASSERT_EQ(rerankGraphSettings.targetDevice, "GPU");
-    ASSERT_EQ(rerankGraphSettings.modelName, servingName);
-    ASSERT_EQ(rerankGraphSettings.modelPath, "./");
+    ASSERT_EQ(exportSettings.pluginConfig.numStreams, 1);
+    ASSERT_EQ(exportSettings.targetDevice, "GPU");
+    ASSERT_EQ(exportSettings.modelName, servingName);
+    ASSERT_EQ(exportSettings.modelPath, "./");
 }
 
 TEST(OvmsGraphConfigTest, positiveAllChangedImageGeneration) {
@@ -1661,19 +1703,22 @@ TEST(OvmsGraphConfigTest, positiveAllChangedImageGeneration) {
         (char*)"2",
         (char*)"--max_num_inference_steps",
         (char*)"3",
+        (char*)"--plugin_config",
+        (char*)"{\"SOME_KEY\":\"SOME_VALUE\"}",
     };
 
-    int arg_count = 30;
+    int arg_count = 32;
     ConstructorEnabledConfig config;
     config.parse(arg_count, n_argv);
 
     auto& hfSettings = config.getServerSettings().hfSettings;
+    auto& exportSettings = hfSettings.exportSettings;
     ASSERT_EQ(hfSettings.sourceModel, modelName);
     ASSERT_EQ(hfSettings.downloadPath, downloadPath);
     ASSERT_EQ(config.getServerSettings().serverMode, ovms::HF_PULL_MODE);
     ASSERT_EQ(hfSettings.task, ovms::IMAGE_GENERATION_GRAPH);
     ovms::ImageGenerationGraphSettingsImpl imageGenerationGraphSettings = std::get<ovms::ImageGenerationGraphSettingsImpl>(hfSettings.graphSettings);
-    ASSERT_EQ(imageGenerationGraphSettings.targetDevice, "GPU GPU NPU");
+    ASSERT_EQ(exportSettings.targetDevice, "GPU GPU NPU");
     ASSERT_EQ(imageGenerationGraphSettings.resolution, " 3000x4000 200x700 100x200");
     ASSERT_TRUE(imageGenerationGraphSettings.guidanceScale.has_value());
     ASSERT_NEAR(imageGenerationGraphSettings.guidanceScale.value(), 8.2, 1e-5);
@@ -1686,7 +1731,9 @@ TEST(OvmsGraphConfigTest, positiveAllChangedImageGeneration) {
     ASSERT_EQ(imageGenerationGraphSettings.defaultNumInferenceSteps.value(), 2);
     ASSERT_TRUE(imageGenerationGraphSettings.maxNumInferenceSteps.has_value());
     ASSERT_EQ(imageGenerationGraphSettings.maxNumInferenceSteps.value(), 3);
-    ASSERT_EQ(imageGenerationGraphSettings.pluginConfig, "{\"NUM_STREAMS\":14,\"CACHE_DIR\":\"/cache\"}");
+    ASSERT_EQ(exportSettings.pluginConfig.numStreams, 14);
+    ASSERT_EQ(exportSettings.pluginConfig.cacheDir.value(), "/cache");
+    ASSERT_EQ(exportSettings.pluginConfig.manualString.value(), "{\"SOME_KEY\":\"SOME_VALUE\"}");
 }
 
 TEST(OvmsGraphConfigTest, positiveDefaultImageGeneration) {
@@ -1708,18 +1755,19 @@ TEST(OvmsGraphConfigTest, positiveDefaultImageGeneration) {
     config.parse(arg_count, n_argv);
 
     auto& hfSettings = config.getServerSettings().hfSettings;
+    auto& exportSettings = hfSettings.exportSettings;
     ASSERT_EQ(hfSettings.sourceModel, modelName);
     ASSERT_EQ(hfSettings.downloadPath, downloadPath);
     ASSERT_EQ(config.getServerSettings().serverMode, ovms::HF_PULL_MODE);
     ASSERT_EQ(hfSettings.task, ovms::IMAGE_GENERATION_GRAPH);
     ovms::ImageGenerationGraphSettingsImpl imageGenerationGraphSettings = std::get<ovms::ImageGenerationGraphSettingsImpl>(hfSettings.graphSettings);
-    ASSERT_EQ(imageGenerationGraphSettings.targetDevice, "CPU");
+    ASSERT_EQ(exportSettings.targetDevice, "CPU");
     ASSERT_TRUE(imageGenerationGraphSettings.maxResolution.empty());
     ASSERT_TRUE(imageGenerationGraphSettings.defaultResolution.empty());
     ASSERT_FALSE(imageGenerationGraphSettings.maxNumberImagesPerPrompt.has_value());
     ASSERT_FALSE(imageGenerationGraphSettings.defaultNumInferenceSteps.has_value());
     ASSERT_FALSE(imageGenerationGraphSettings.maxNumInferenceSteps.has_value());
-    ASSERT_TRUE(imageGenerationGraphSettings.pluginConfig.empty());
+    ASSERT_TRUE(exportSettings.pluginConfig.empty());
 }
 
 TEST(OvmsGraphConfigTest, positiveAllChangedEmbeddings) {
@@ -1747,13 +1795,17 @@ TEST(OvmsGraphConfigTest, positiveAllChangedEmbeddings) {
         (char*)"2",
         (char*)"--model_name",
         (char*)servingName.c_str(),
-    };
+        (char*)"--plugin_config",
+        (char*)"{\"SOME_KEY\":\"SOME_VALUE\"}",
+        (char*)"--cache_dir",
+        (char*)"/tmp/cache_dir_with_emptiness"};
 
-    int arg_count = 20;
+    int arg_count = 24;
     ConstructorEnabledConfig config;
     config.parse(arg_count, n_argv);
 
     auto& hfSettings = config.getServerSettings().hfSettings;
+    auto& exportSettings = hfSettings.exportSettings;
     ASSERT_EQ(hfSettings.sourceModel, modelName);
     ASSERT_EQ(hfSettings.downloadPath, downloadPath);
     ASSERT_EQ(config.getServerSettings().serverMode, ovms::HF_PULL_MODE);
@@ -1762,10 +1814,12 @@ TEST(OvmsGraphConfigTest, positiveAllChangedEmbeddings) {
     ASSERT_EQ(embeddingsGraphSettings.normalize, "false");
     ASSERT_EQ(embeddingsGraphSettings.truncate, "true");
     ASSERT_EQ(embeddingsGraphSettings.pooling, "CLS");
-    ASSERT_EQ(embeddingsGraphSettings.numStreams, 2);
-    ASSERT_EQ(embeddingsGraphSettings.targetDevice, "GPU");
-    ASSERT_EQ(embeddingsGraphSettings.modelName, servingName);
-    ASSERT_EQ(embeddingsGraphSettings.modelPath, "./");
+    ASSERT_EQ(exportSettings.pluginConfig.numStreams, 2);
+    ASSERT_EQ(exportSettings.targetDevice, "GPU");
+    ASSERT_EQ(exportSettings.modelName, servingName);
+    ASSERT_EQ(exportSettings.modelPath, "./");
+    ASSERT_EQ(exportSettings.pluginConfig.cacheDir.value(), "/tmp/cache_dir_with_emptiness");
+    ASSERT_EQ(exportSettings.pluginConfig.manualString.value(), "{\"SOME_KEY\":\"SOME_VALUE\"}");
 }
 
 TEST(OvmsGraphConfigTest, positiveAllChangedEmbeddingsStart) {
@@ -1801,6 +1855,7 @@ TEST(OvmsGraphConfigTest, positiveAllChangedEmbeddingsStart) {
     config.parse(arg_count, n_argv);
 
     auto& hfSettings = config.getServerSettings().hfSettings;
+    auto& exportSettings = hfSettings.exportSettings;
     ASSERT_EQ(hfSettings.sourceModel, modelName);
     ASSERT_EQ(hfSettings.downloadPath, downloadPath);
     ASSERT_EQ(config.getServerSettings().serverMode, ovms::HF_PULL_AND_START_MODE);
@@ -1809,10 +1864,10 @@ TEST(OvmsGraphConfigTest, positiveAllChangedEmbeddingsStart) {
     ASSERT_EQ(embeddingsGraphSettings.normalize, "false");
     ASSERT_EQ(embeddingsGraphSettings.truncate, "true");
     ASSERT_EQ(embeddingsGraphSettings.pooling, "LAST");
-    ASSERT_EQ(embeddingsGraphSettings.numStreams, 2);
-    ASSERT_EQ(embeddingsGraphSettings.targetDevice, "GPU");
-    ASSERT_EQ(embeddingsGraphSettings.modelName, servingName);
-    ASSERT_EQ(embeddingsGraphSettings.modelPath, "./");
+    ASSERT_EQ(exportSettings.pluginConfig.numStreams, 2);
+    ASSERT_EQ(exportSettings.targetDevice, "GPU");
+    ASSERT_EQ(exportSettings.modelName, servingName);
+    ASSERT_EQ(exportSettings.modelPath, "./");
 }
 
 TEST(OvmsGraphConfigTest, positiveDefaultEmbeddings) {
@@ -1834,6 +1889,7 @@ TEST(OvmsGraphConfigTest, positiveDefaultEmbeddings) {
     config.parse(arg_count, n_argv);
 
     auto& hfSettings = config.getServerSettings().hfSettings;
+    auto& exportSettings = hfSettings.exportSettings;
     ASSERT_EQ(hfSettings.sourceModel, modelName);
     ASSERT_EQ(hfSettings.downloadPath, downloadPath);
     ASSERT_EQ(config.getServerSettings().serverMode, ovms::HF_PULL_MODE);
@@ -1842,9 +1898,9 @@ TEST(OvmsGraphConfigTest, positiveDefaultEmbeddings) {
     ASSERT_EQ(embeddingsGraphSettings.normalize, "true");
     ASSERT_EQ(embeddingsGraphSettings.truncate, "false");
     ASSERT_EQ(embeddingsGraphSettings.pooling, "CLS");
-    ASSERT_EQ(embeddingsGraphSettings.numStreams, 1);
-    ASSERT_EQ(embeddingsGraphSettings.targetDevice, "CPU");
-    ASSERT_EQ(embeddingsGraphSettings.modelName, modelName);
+    ASSERT_EQ(exportSettings.pluginConfig.numStreams, 1);
+    ASSERT_EQ(exportSettings.targetDevice, "CPU");
+    ASSERT_EQ(exportSettings.modelName, modelName);
 }
 
 TEST(OvmsGraphConfigTest, positiveSomeChangedEmbeddings) {
@@ -1875,17 +1931,18 @@ TEST(OvmsGraphConfigTest, positiveSomeChangedEmbeddings) {
     config.parse(arg_count, n_argv);
 
     auto& hfSettings = config.getServerSettings().hfSettings;
+    auto& exportSettings = config.getServerSettings().hfSettings.exportSettings;
     ASSERT_EQ(hfSettings.sourceModel, modelName);
     ASSERT_EQ(hfSettings.downloadPath, downloadPath);
     ASSERT_EQ(config.getServerSettings().serverMode, ovms::HF_PULL_MODE);
     ASSERT_EQ(hfSettings.task, ovms::EMBEDDINGS_GRAPH);
     ovms::EmbeddingsGraphSettingsImpl embeddingsGraphSettings = std::get<ovms::EmbeddingsGraphSettingsImpl>(hfSettings.graphSettings);
     ASSERT_EQ(embeddingsGraphSettings.pooling, "LAST");
-    ASSERT_EQ(embeddingsGraphSettings.numStreams, 1);
+    ASSERT_EQ(exportSettings.pluginConfig.numStreams, 1);
     ASSERT_EQ(embeddingsGraphSettings.normalize, "false");
-    ASSERT_EQ(embeddingsGraphSettings.targetDevice, "GPU");
-    ASSERT_EQ(embeddingsGraphSettings.modelName, servingName);
-    ASSERT_EQ(embeddingsGraphSettings.modelPath, "./");
+    ASSERT_EQ(exportSettings.targetDevice, "GPU");
+    ASSERT_EQ(exportSettings.modelName, servingName);
+    ASSERT_EQ(exportSettings.modelPath, "./");
 }
 
 TEST(OvmsGraphConfigTest, negativeEmbeddingsInvalidNormalize) {
diff --git a/src/test/pull_hf_model_test.cpp b/src/test/pull_hf_model_test.cpp
index 5993d4cffa..8b43167d9a 100644
--- a/src/test/pull_hf_model_test.cpp
+++ b/src/test/pull_hf_model_test.cpp
@@ -91,7 +91,6 @@ const std::string expectedGraphContents = R"(
             max_num_seqs:256,
             device: "CPU",
             models_path: "./",
-            plugin_config: '{ }',
             enable_prefix_caching: true,
             cache_size: 10,
         }
@@ -129,7 +128,6 @@ const std::string expectedGraphContentsDraft = R"(
             max_num_seqs:256,
             device: "CPU",
             models_path: "./",
-            plugin_config: '{ }',
             enable_prefix_caching: true,
             cache_size: 10,
             # Speculative decoding configuration