Skip to content

Commit

Permalink
[Backend] let the system decide api-server.wasm port
Browse files Browse the repository at this point in the history
Signed-off-by: csh <458761603@qq.com>
  • Loading branch information
L-jasmine committed Aug 6, 2024
1 parent 4e22cbc commit 9efb754
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 6 deletions.
23 changes: 17 additions & 6 deletions moxin-backend/src/backend_impls/api_server.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ static WASM: &[u8] = include_bytes!("../../wasm/llama-api-server.wasm");
pub struct LLamaEdgeApiServer {
id: String,
listen_addr: SocketAddr,
load_model_options: LoadModelOptions,
wasm_module: Module,
running_controller: tokio::sync::broadcast::Sender<()>,
#[allow(dead_code)]
Expand Down Expand Up @@ -141,17 +142,23 @@ impl BackendModel for LLamaEdgeApiServer {
options: moxin_protocol::protocol::LoadModelOptions,
tx: std::sync::mpsc::Sender<anyhow::Result<moxin_protocol::protocol::LoadModelResponse>>,
) -> Self {
let load_model_options = options.clone();
let mut need_reload = true;
let (wasm_module, listen_addr) = if let Some(old_model) = &old_model {
if old_model.id == file.id.as_str() {
if old_model.id == file.id.as_str()
&& old_model.load_model_options.n_ctx == options.n_ctx
&& old_model.load_model_options.n_batch == options.n_batch
{
need_reload = false;
}
(old_model.wasm_module.clone(), old_model.listen_addr)
} else {
(
Module::from_bytes(None, WASM).unwrap(),
([0, 0, 0, 0], 8080).into(),
)
let new_addr = std::net::TcpListener::bind("localhost:0")
.unwrap()
.local_addr()
.unwrap();

(Module::from_bytes(None, WASM).unwrap(), new_addr)
};

if !need_reload {
Expand All @@ -160,6 +167,7 @@ impl BackendModel for LLamaEdgeApiServer {
file_id: file.id.to_string(),
model_id: file.model_id,
information: "".to_string(),
listen_port: listen_addr.port(),
},
)));
return old_model.unwrap();
Expand All @@ -173,7 +181,8 @@ impl BackendModel for LLamaEdgeApiServer {

let file_id = file.id.to_string();

let url = format!("http://localhost:{}/echo", listen_addr.port());
let listen_port = listen_addr.port();
let url = format!("http://localhost:{}/echo", listen_port);

let file_ = file.clone();

Expand Down Expand Up @@ -205,6 +214,7 @@ impl BackendModel for LLamaEdgeApiServer {
file_id: file_.id.to_string(),
model_id: file_.model_id,
information: "".to_string(),
listen_port,
},
)));
} else {
Expand All @@ -220,6 +230,7 @@ impl BackendModel for LLamaEdgeApiServer {
listen_addr,
running_controller,
model_thread,
load_model_options,
};

new_model
Expand Down
2 changes: 2 additions & 0 deletions moxin-backend/src/backend_impls/chat_ui.rs
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,7 @@ fn get_input(
file_id,
model_id,
information: String::new(),
listen_port: 0,
})));
}

Expand Down Expand Up @@ -430,6 +431,7 @@ impl super::BackendModel for ChatBotModel {
file_id: file.id.to_string(),
model_id: file.model_id,
information: "".to_string(),
listen_port: 0,
})));
return old_model.unwrap();
}
Expand Down
4 changes: 4 additions & 0 deletions moxin-protocol/src/protocol.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,10 @@ pub struct LoadedModelInfo {
pub file_id: FileID,
pub model_id: ModelID,

// The port where the local server is listening for the model.
// if 0, the server is not running.
pub listen_port: u16,

// JSON formatted string with the model information. See "Model Inspector" in LMStudio.
pub information: String,
}
Expand Down

0 comments on commit 9efb754

Please sign in to comment.