openvinotoolkit · dtrawins · Oct 28, 2025
diff --git a/docs/parameters.md b/docs/parameters.md
@@ -125,7 +125,7 @@ Task specific parameters for different tasks (text generation/image generation/e
 | `--pipeline_type`                     | `string`     | Type of the pipeline to be used. Choices: `LM`, `LM_CB`, `VLM`, `VLM_CB`, `AUTO`. Default: `AUTO`.                         |
 | `--enable_prefix_caching`             | `bool`       | Enables algorithm to cache the prompt tokens. Default: true.                                                               |
 | `--max_num_batched_tokens`            | `integer`    | The maximum number of tokens that can be batched together.                                                                 |
-| `--cache_size`                        | `integer`    | Cache size in GB. Default: 10.                                                                                             |
+| `--cache_size`                        | `integer`    | Cache size in GB. Default: 1.                                                                                             |
 | `--draft_source_model`                | `string`     | HF model name or path to the local folder with PyTorch or OpenVINO draft model.                                            |
 | `--dynamic_split_fuse`                | `bool`       | Enables dynamic split fuse algorithm. Default: true.                                                                       |
 | `--max_prompt_len`                    | `integer`    | Sets NPU specific property for maximum number of tokens in the prompt.                                                     |

diff --git a/src/graph_export/graph_cli_parser.cpp b/src/graph_export/graph_cli_parser.cpp
@@ -57,8 +57,8 @@ void GraphCLIParser::createOptions() {
             cxxopts::value<uint32_t>(),
             "MAX_NUM_BATCHED_TOKENS")
         ("cache_size",
-            "cache size in GB, default is 10.",
-            cxxopts::value<uint32_t>()->default_value("10"),
+            "cache size in GB, default is 1.",
+            cxxopts::value<uint32_t>()->default_value("1"),
             "CACHE_SIZE")
         ("draft_source_model",
             "HF model name or path to the local folder with PyTorch or OpenVINO draft model.",