diff --git a/moxin-backend/src/backend_impls/api_server.rs b/moxin-backend/src/backend_impls/api_server.rs index 83d3b825..1352c563 100644 --- a/moxin-backend/src/backend_impls/api_server.rs +++ b/moxin-backend/src/backend_impls/api_server.rs @@ -35,7 +35,7 @@ fn create_wasi( load_model: &LoadModelOptions, ) -> wasmedge_sdk::WasmEdgeResult { // use model metadata context size - let ctx_size = Some(format!("{}", file.context_size)); + let ctx_size = Some(format!("{}", file.context_size.min(8 * 1024))); let n_gpu_layers = match load_model.gpu_layers { moxin_protocol::protocol::GPULayers::Specific(n) => Some(n.to_string()),