From c86e422b78615a1273f65da0870a07b322ef3a1a Mon Sep 17 00:00:00 2001 From: csh <458761603@qq.com> Date: Thu, 25 Jul 2024 14:40:08 +0800 Subject: [PATCH] [Backend] Limit ctx-size to a maximum of 8K. Signed-off-by: csh <458761603@qq.com> --- moxin-backend/src/backend_impls/api_server.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/moxin-backend/src/backend_impls/api_server.rs b/moxin-backend/src/backend_impls/api_server.rs index 83d3b825..1352c563 100644 --- a/moxin-backend/src/backend_impls/api_server.rs +++ b/moxin-backend/src/backend_impls/api_server.rs @@ -35,7 +35,7 @@ fn create_wasi( load_model: &LoadModelOptions, ) -> wasmedge_sdk::WasmEdgeResult { // use model metadata context size - let ctx_size = Some(format!("{}", file.context_size)); + let ctx_size = Some(format!("{}", file.context_size.min(8 * 1024))); let n_gpu_layers = match load_model.gpu_layers { moxin_protocol::protocol::GPULayers::Specific(n) => Some(n.to_string()),