Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions lib/llm/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ hf-hub = { workspace = true }
humantime = { workspace = true } # input/batch
rand = { workspace = true }
oneshot = { workspace = true }
parking_lot = "0.12.4"
prometheus = { workspace = true }
serde = { workspace = true }
serde_json = { workspace = true }
Expand Down
39 changes: 18 additions & 21 deletions lib/llm/src/discovery/model_manager.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,18 @@
// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

use std::{
collections::{HashMap, HashSet},
sync::{Arc, RwLock},
};

use parking_lot::Mutex;

use dynamo_runtime::component::Component;
use dynamo_runtime::prelude::DistributedRuntimeProvider;
use dynamo_runtime::slug::Slug;

use crate::discovery::ModelEntry;

use crate::kv_router::{KvRouterConfig, scheduler::DefaultWorkerSelector};
use crate::{
kv_router::KvRouter,
Expand All @@ -15,12 +21,6 @@ use crate::{
completions::OpenAICompletionsStreamingEngine, embeddings::OpenAIEmbeddingsStreamingEngine,
},
};
use std::collections::HashSet;
use std::sync::RwLock;
use std::{
collections::HashMap,
sync::{Arc, Mutex},
};

#[derive(Debug, thiserror::Error)]
pub enum ModelManagerError {
Expand Down Expand Up @@ -61,7 +61,7 @@ impl ModelManager {
}

pub fn get_model_entries(&self) -> Vec<ModelEntry> {
self.entries.lock().unwrap().values().cloned().collect()
self.entries.lock().values().cloned().collect()
}

pub fn has_model_any(&self, model: &str) -> bool {
Expand Down Expand Up @@ -170,12 +170,12 @@ impl ModelManager {
/// Save a ModelEntry under an instance's etcd `models/` key so we can fetch it later when the key is
/// deleted from etcd.
pub fn save_model_entry(&self, key: &str, entry: ModelEntry) {
self.entries.lock().unwrap().insert(key.to_string(), entry);
self.entries.lock().insert(key.to_string(), entry);
}

/// Remove and return model entry for this instance's etcd key. We do this when the instance stops.
pub fn remove_model_entry(&self, key: &str) -> Option<ModelEntry> {
self.entries.lock().unwrap().remove(key)
self.entries.lock().remove(key)
}

pub async fn kv_chooser_for(
Expand Down Expand Up @@ -203,7 +203,7 @@ impl ModelManager {
}

fn get_kv_chooser(&self, model_name: &str) -> Option<Arc<KvRouter>> {
self.kv_choosers.lock().unwrap().get(model_name).cloned()
self.kv_choosers.lock().get(model_name).cloned()
}

/// Create and return a KV chooser for this component and model
Expand Down Expand Up @@ -242,21 +242,18 @@ impl ModelManager {
let new_kv_chooser = Arc::new(chooser);
self.kv_choosers
.lock()
.unwrap()
.insert(model_name.to_string(), new_kv_chooser.clone());
Ok(new_kv_chooser)
}

pub fn get_model_tool_call_parser(&self, model: &str) -> Option<String> {
match self.entries.lock() {
Ok(entries) => entries
.values()
.find(|entry| entry.name == model)
.and_then(|entry| entry.runtime_config.as_ref())
.and_then(|config| config.tool_call_parser.clone())
.map(|parser| parser.to_string()),
Err(_) => None,
}
self.entries
.lock()
.values()
.find(|entry| entry.name == model)
.and_then(|entry| entry.runtime_config.as_ref())
.and_then(|config| config.tool_call_parser.clone())
.map(|parser| parser.to_string())
}
}

Expand Down
Loading