Skip to content

Commit

Permalink
Merge pull request #166 from moxin-org/smooth-model-switch
Browse files Browse the repository at this point in the history
Smooth model switch
  • Loading branch information
jmbejar authored Jul 26, 2024
2 parents 8967f3d + dd9f0e3 commit 97b08b0
Show file tree
Hide file tree
Showing 12 changed files with 481 additions and 187 deletions.
13 changes: 3 additions & 10 deletions moxin-backend/src/backend_impls/chat_ui.rs
Original file line number Diff line number Diff line change
Expand Up @@ -316,22 +316,15 @@ fn create_wasi(
file: &DownloadedFile,
load_model: &LoadModelOptions,
) -> wasmedge_sdk::WasmEdgeResult<WasiModule> {
let ctx_size = if load_model.n_ctx > 0 {
Some(load_model.n_ctx.to_string())
} else {
None
};
let ctx_size = Some(format!("{}", file.context_size));

let n_gpu_layers = match load_model.gpu_layers {
moxin_protocol::protocol::GPULayers::Specific(n) => Some(n.to_string()),
moxin_protocol::protocol::GPULayers::Max => None,
};

let batch_size = if load_model.n_batch > 0 {
Some(load_model.n_batch.to_string())
} else {
None
};
// Set n_batch to a fixed value of 128.
let batch_size = Some(format!("128"));

let mut prompt_template = load_model.prompt_template.clone();
if prompt_template.is_none() && !file.prompt_template.is_empty() {
Expand Down
4 changes: 0 additions & 4 deletions moxin-backend/src/backend_impls/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -136,8 +136,6 @@ fn test_chat() {
prompt_template: None,
gpu_layers: moxin_protocol::protocol::GPULayers::Max,
use_mlock: false,
n_batch: 512,
n_ctx: 512,
rope_freq_scale: 0.0,
rope_freq_base: 0.0,
context_overflow_policy: moxin_protocol::protocol::ContextOverflowPolicy::StopAtLimit,
Expand Down Expand Up @@ -211,8 +209,6 @@ fn test_chat_stop() {
prompt_template: None,
gpu_layers: moxin_protocol::protocol::GPULayers::Max,
use_mlock: false,
n_batch: 512,
n_ctx: 512,
rope_freq_scale: 0.0,
rope_freq_base: 0.0,
context_overflow_policy: moxin_protocol::protocol::ContextOverflowPolicy::StopAtLimit,
Expand Down
2 changes: 0 additions & 2 deletions moxin-protocol/src/protocol.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,6 @@ pub struct LoadModelOptions {
pub prompt_template: Option<String>,
pub gpu_layers: GPULayers,
pub use_mlock: bool,
pub n_batch: u32,
pub n_ctx: u32,
pub rope_freq_scale: f32,
pub rope_freq_base: f32,

Expand Down
2 changes: 1 addition & 1 deletion src/app.rs
Original file line number Diff line number Diff line change
Expand Up @@ -348,7 +348,7 @@ impl MatchEvent for App {
.ui
.chat_history_card_options(id!(chat_history_card_options));
// TODO: Would be cool to listen for this action inside of the widget itself.
chat_history_card_options.selected(cx, chat_id, cords);
let _ = chat_history_card_options.selected(cx, chat_id, cords);
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion src/chat/chat_history_card.rs
Original file line number Diff line number Diff line change
Expand Up @@ -355,7 +355,7 @@ impl ChatHistoryCard {
&mut self,
cx: &mut Cx,
actions: &Actions,
scope: &mut Scope,
_scope: &mut Scope,
) {
for action in actions {
if let ChatHistoryCardAction::ActivateTitleEdition(chat_id) =
Expand Down
Loading

0 comments on commit 97b08b0

Please sign in to comment.