diff --git a/.devcontainer/post-create.sh b/.devcontainer/post-create.sh index 6c056ef56f..249ef1eb20 100755 --- a/.devcontainer/post-create.sh +++ b/.devcontainer/post-create.sh @@ -55,8 +55,6 @@ cargo doc --no-deps # create symlinks for the binaries in the deploy directory mkdir -p $HOME/dynamo/deploy/sdk/src/dynamo/sdk/cli/bin ln -sf $HOME/dynamo/.build/target/debug/dynamo-run $HOME/dynamo/deploy/sdk/src/dynamo/sdk/cli/bin/dynamo-run -ln -sf $HOME/dynamo/.build/target/debug/http $HOME/dynamo/deploy/sdk/src/dynamo/sdk/cli/bin/http -ln -sf $HOME/dynamo/.build/target/debug/llmctl $HOME/dynamo/deploy/sdk/src/dynamo/sdk/cli/bin/llmctl # install the python bindings cd $HOME/dynamo/lib/bindings/python && retry maturin develop diff --git a/Cargo.lock b/Cargo.lock index a1227132b3..f44e4bc83e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -714,12 +714,6 @@ version = "3.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1628fb46dfa0b37568d12e5edd512553eccf6a22a78e8bde00bb4aed84d5bdbf" -[[package]] -name = "bytecount" -version = "0.6.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ce89b21cab1437276d2650d57e971f9d548a2d9037cc231abdc0562b97498ce" - [[package]] name = "bytemuck" version = "1.23.1" @@ -2973,18 +2967,6 @@ dependencies = [ "itoa", ] -[[package]] -name = "http" -version = "0.3.2" -dependencies = [ - "clap 4.5.40", - "dynamo-llm", - "dynamo-runtime", - "serde", - "serde_json", - "tokio", -] - [[package]] name = "http" version = "1.3.1" @@ -3730,21 +3712,6 @@ dependencies = [ "toktrie 0.7.29", ] -[[package]] -name = "llmctl" -version = "0.3.2" -dependencies = [ - "anyhow", - "clap 4.5.40", - "dynamo-llm", - "dynamo-runtime", - "serde", - "serde_json", - "tabled", - "tokio", - "tracing", -] - [[package]] name = "local-ip-address" version = "0.6.4" @@ -4725,17 +4692,6 @@ dependencies = [ "serde", ] -[[package]] -name = "papergrid" -version = "0.14.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b915f831b85d984193fdc3d3611505871dc139b2534530fa01c1a6a6707b6723" -dependencies = [ - "bytecount", - "fnv", - "unicode-width 0.2.0", -] - [[package]] name = "parking_lot" version = "0.12.4" @@ -6848,29 +6804,6 @@ dependencies = [ "version-compare", ] -[[package]] -name = "tabled" -version = "0.18.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "121d8171ee5687a4978d1b244f7d99c43e7385a272185a2f1e1fa4dc0979d444" -dependencies = [ - "papergrid", - "tabled_derive", -] - -[[package]] -name = "tabled_derive" -version = "0.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "52d9946811baad81710ec921809e2af67ad77719418673b2a3794932d57b7538" -dependencies = [ - "heck 0.5.0", - "proc-macro-error2", - "proc-macro2", - "quote", - "syn 2.0.100", -] - [[package]] name = "target-lexicon" version = "0.12.16" diff --git a/Cargo.toml b/Cargo.toml index 4ee2525d4d..f732080e5e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,7 +15,6 @@ [workspace] members = [ - "components/http", "components/metrics", "components/router", "launch/*", diff --git a/Earthfile b/Earthfile index 7b9dfd5639..9b06bb7fb5 100644 --- a/Earthfile +++ b/Earthfile @@ -120,9 +120,7 @@ dynamo-build: # Remove existing symlinks rm -f /workspace/deploy/sdk/src/dynamo/sdk/cli/bin/* && \ # Create new symlinks pointing to the correct location - ln -sf /workspace/target/release/dynamo-run /workspace/deploy/sdk/src/dynamo/sdk/cli/bin/dynamo-run && \ - ln -sf /workspace/target/release/http /workspace/deploy/sdk/src/dynamo/sdk/cli/bin/http && \ - ln -sf /workspace/target/release/llmctl /workspace/deploy/sdk/src/dynamo/sdk/cli/bin/llmctl + ln -sf /workspace/target/release/dynamo-run /workspace/deploy/sdk/src/dynamo/sdk/cli/bin/dynamo-run RUN cd /workspace/lib/bindings/python && \ diff --git a/README.md b/README.md index d074af225f..401c3640d4 100644 --- a/README.md +++ b/README.md @@ -172,8 +172,6 @@ Otherwise, to develop locally, we recommend working inside of the container cargo build --release mkdir -p /workspace/deploy/sdk/src/dynamo/sdk/cli/bin -cp /workspace/target/release/http /workspace/deploy/sdk/src/dynamo/sdk/cli/bin -cp /workspace/target/release/llmctl /workspace/deploy/sdk/src/dynamo/sdk/cli/bin cp /workspace/target/release/dynamo-run /workspace/deploy/sdk/src/dynamo/sdk/cli/bin uv pip install -e . diff --git a/components/http/Cargo.toml b/components/http/Cargo.toml deleted file mode 100644 index 6eef60146b..0000000000 --- a/components/http/Cargo.toml +++ /dev/null @@ -1,34 +0,0 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: Apache-2.0 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -[package] -name = "http" -version.workspace = true -edition.workspace = true -authors.workspace = true -license.workspace = true -homepage.workspace = true -repository.workspace = true - -[dependencies] -dynamo-runtime = { workspace = true} -dynamo-llm = { workspace = true} - -serde = { workspace = true } -serde_json = { workspace = true } -tokio = { workspace = true } - -clap = { version = "4.5", features = ["derive"] } diff --git a/components/http/src/main.rs b/components/http/src/main.rs deleted file mode 100644 index 7762c21f50..0000000000 --- a/components/http/src/main.rs +++ /dev/null @@ -1,73 +0,0 @@ -// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -// SPDX-License-Identifier: Apache-2.0 - -use clap::Parser; - -use dynamo_llm::discovery::{ModelWatcher, MODEL_ROOT_PATH}; -use dynamo_llm::http::service::service_v2::HttpService; -use dynamo_runtime::{ - logging, pipeline::RouterMode, transports::etcd::PrefixWatcher, DistributedRuntime, Result, - Runtime, Worker, -}; - -#[derive(Parser)] -#[command(author, version, about, long_about = None)] -struct Args { - /// Host for the HTTP service - #[arg(long, default_value = "0.0.0.0")] - host: String, - - /// Port number for the HTTP service - #[arg(short, long, default_value = "8080")] - port: u16, - - /// Namespace for the distributed component - #[arg(long, default_value = "public")] - namespace: String, - - /// Component name for the service - #[arg(long, default_value = "http")] - component: String, -} - -#[tokio::main] -async fn main() -> Result<()> { - logging::init(); - let worker = Worker::from_current()?; - worker.execute_async(app).await -} - -async fn app(runtime: Runtime) -> Result<()> { - let distributed = DistributedRuntime::from_settings(runtime.clone()).await?; - let args = Args::parse(); - - let http_service = HttpService::builder() - .port(args.port) - .host(args.host) - .build()?; - let manager = http_service.state().manager_clone(); - - // todo - use the IntoComponent trait to register the component - // todo - start a service - // todo - we want the service to create an entry and register component definition - // todo - the component definition should be the type of component and it's config - // in this example we will have an HttpServiceComponentDefinition object which will be - // written to etcd - // the cli when operating on an `http` component will validate the namespace.component is - // registered with HttpServiceComponentDefinition - - let watch_obj = ModelWatcher::new(distributed.clone(), manager, RouterMode::Random, None); - - if let Some(etcd_client) = distributed.etcd_client() { - let models_watcher: PrefixWatcher = - etcd_client.kv_get_and_watch_prefix(MODEL_ROOT_PATH).await?; - - let (_prefix, _watcher, receiver) = models_watcher.dissolve(); - tokio::spawn(async move { - watch_obj.watch(receiver).await; - }); - } - - // Run the service - http_service.run(runtime.child_token()).await -} diff --git a/container/Dockerfile.none b/container/Dockerfile.none index cb90e1201c..6ae8e9b937 100644 --- a/container/Dockerfile.none +++ b/container/Dockerfile.none @@ -49,8 +49,6 @@ ENV CARGO_TARGET_DIR=/workspace/target RUN cargo build --release --locked && \ cargo doc --no-deps && \ cp target/release/dynamo-run /usr/local/bin && \ - cp target/release/http /usr/local/bin && \ - cp target/release/llmctl /usr/local/bin && \ cp target/release/metrics /usr/local/bin && \ cp target/release/mock_worker /usr/local/bin diff --git a/container/Dockerfile.sglang b/container/Dockerfile.sglang index f045dbd612..9a415a6000 100644 --- a/container/Dockerfile.sglang +++ b/container/Dockerfile.sglang @@ -367,8 +367,6 @@ RUN mkdir -p /opt/dynamo/bindings/wheels && \ cp target/release/libdynamo_llm_capi.so /opt/dynamo/bindings/lib/. && \ cp -r lib/bindings/c/include /opt/dynamo/bindings/. && \ cp target/release/dynamo-run /usr/local/bin && \ - cp target/release/http /usr/local/bin && \ - cp target/release/llmctl /usr/local/bin && \ cp target/release/metrics /usr/local/bin && \ cp target/release/mock_worker /usr/local/bin diff --git a/container/Dockerfile.sglang-deepep b/container/Dockerfile.sglang-deepep index 53e001f82e..897d36e523 100644 --- a/container/Dockerfile.sglang-deepep +++ b/container/Dockerfile.sglang-deepep @@ -132,8 +132,6 @@ ENV CARGO_BUILD_JOBS=${CARGO_BUILD_JOBS:-16} RUN cargo build --release RUN mkdir -p deploy/sdk/src/dynamo/sdk/cli/bin -RUN cp target/release/http deploy/sdk/src/dynamo/sdk/cli/bin -RUN cp target/release/llmctl deploy/sdk/src/dynamo/sdk/cli/bin RUN cp target/release/dynamo-run deploy/sdk/src/dynamo/sdk/cli/bin RUN cd lib/bindings/python && pip install --break-system-packages -e . && cd ../../.. diff --git a/container/Dockerfile.tensorrt_llm b/container/Dockerfile.tensorrt_llm index 6f1a2cf757..d69a392f2a 100644 --- a/container/Dockerfile.tensorrt_llm +++ b/container/Dockerfile.tensorrt_llm @@ -317,8 +317,6 @@ RUN mkdir -p /opt/dynamo/bindings/wheels && \ cp target/release/libdynamo_llm_capi.so /opt/dynamo/bindings/lib/. && \ cp -r lib/bindings/c/include /opt/dynamo/bindings/. && \ cp target/release/dynamo-run /usr/local/bin && \ - cp target/release/http /usr/local/bin && \ - cp target/release/llmctl /usr/local/bin && \ cp target/release/metrics /usr/local/bin && \ cp target/release/mock_worker /usr/local/bin diff --git a/container/Dockerfile.vllm b/container/Dockerfile.vllm index 46e173b636..e0941877ff 100644 --- a/container/Dockerfile.vllm +++ b/container/Dockerfile.vllm @@ -432,8 +432,6 @@ RUN mkdir -p /opt/dynamo/bindings/wheels && \ cp target/release/libdynamo_llm_capi.so /opt/dynamo/bindings/lib/. && \ cp -r lib/bindings/c/include /opt/dynamo/bindings/. && \ cp target/release/dynamo-run /usr/local/bin && \ - cp target/release/http /usr/local/bin && \ - cp target/release/llmctl /usr/local/bin && \ cp target/release/metrics /usr/local/bin && \ cp target/release/mock_worker /usr/local/bin diff --git a/container/Dockerfile.vllm_v1 b/container/Dockerfile.vllm_v1 index 8c55ebcd0e..b5e0570edb 100644 --- a/container/Dockerfile.vllm_v1 +++ b/container/Dockerfile.vllm_v1 @@ -412,8 +412,6 @@ RUN mkdir -p /opt/dynamo/bindings/wheels && \ cp target/release/libdynamo_llm_capi.so /opt/dynamo/bindings/lib/. && \ cp -r lib/bindings/c/include /opt/dynamo/bindings/. && \ cp target/release/dynamo-run /usr/local/bin && \ - cp target/release/http /usr/local/bin && \ - cp target/release/llmctl /usr/local/bin && \ cp target/release/metrics /usr/local/bin && \ cp target/release/mock_worker /usr/local/bin diff --git a/deploy/sdk/src/dynamo/sdk/cli/run_executable.py b/deploy/sdk/src/dynamo/sdk/cli/run_executable.py index 1e23fa8fea..809830cfb2 100644 --- a/deploy/sdk/src/dynamo/sdk/cli/run_executable.py +++ b/deploy/sdk/src/dynamo/sdk/cli/run_executable.py @@ -69,28 +69,6 @@ def dynamo_run(args=None): return result.returncode -def llmctl(args=None): - """ - Run the llmctl executable with the provided arguments. - If no args provided, passes through sys.argv[1:] to the executable. - """ - if args is None: - args = sys.argv[1:] - result = run_executable("llmctl", args=args, capture_output=False) - return result.returncode - - -def http(args=None): - """ - Run the http executable with the provided arguments. - If no args provided, passes through sys.argv[1:] to the executable. - """ - if args is None: - args = sys.argv[1:] - result = run_executable("http", args=args, capture_output=False) - return result.returncode - - def metrics(args=None): """ Run the metrics executable with the provided arguments. diff --git a/docs/get_started.md b/docs/get_started.md index 1ccd039290..608f07b6fa 100644 --- a/docs/get_started.md +++ b/docs/get_started.md @@ -206,8 +206,6 @@ Otherwise, to develop locally, we recommend working inside of the container: cargo build --release mkdir -p /workspace/deploy/dynamo/sdk/src/dynamo/sdk/cli/bin -cp /workspace/target/release/http /workspace/deploy/dynamo/sdk/src/dynamo/sdk/cli/bin -cp /workspace/target/release/llmctl /workspace/deploy/dynamo/sdk/src/dynamo/sdk/cli/bin cp /workspace/target/release/dynamo-run /workspace/deploy/dynamo/sdk/src/dynamo/sdk/cli/bin uv pip install -e . diff --git a/docs/guides/backend.md b/docs/guides/backend.md index 28aa5df30a..789b4930cc 100644 --- a/docs/guides/backend.md +++ b/docs/guides/backend.md @@ -92,10 +92,6 @@ class ResponseType(BaseModel): # Add other fields as needed ``` -For example, if you deploy your worker directly behind an OpenAI HTTP (`http`) service -using `llmctl`, you can define the request and response types to correspond to -Chat Completions objects, such as the ones specified in the OpenAI API. For example: - ```python from vllm.entrypoints.openai.protocol import ChatCompletionRequest diff --git a/hatch_build.py b/hatch_build.py index 766ce5c475..5290526ea9 100644 --- a/hatch_build.py +++ b/hatch_build.py @@ -24,8 +24,6 @@ def initialize(self, version, build_data): bin_path = os.getenv("DYNAMO_BIN_PATH", "target/release") build_data["force_include"] = { f"{bin_path}/dynamo-run": "dynamo/sdk/cli/bin/dynamo-run", - f"{bin_path}/llmctl": "dynamo/sdk/cli/bin/llmctl", - f"{bin_path}/http": "dynamo/sdk/cli/bin/http", f"{bin_path}/metrics": "dynamo/sdk/cli/bin/metrics", f"{bin_path}/mock_worker": "dynamo/sdk/cli/bin/mock_worker", f"{bin_path}/libdynamo_llm_capi.so": "dynamo/sdk/cli/bin/libdynamo_llm_capi.so", diff --git a/launch/llmctl/Cargo.toml b/launch/llmctl/Cargo.toml deleted file mode 100644 index 3d6446d6f1..0000000000 --- a/launch/llmctl/Cargo.toml +++ /dev/null @@ -1,36 +0,0 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: Apache-2.0 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -[package] -name = "llmctl" -version.workspace = true -edition.workspace = true -authors.workspace = true -license.workspace = true -homepage.workspace = true -repository.workspace = true - -[dependencies] -anyhow = { workspace = true } -dynamo-runtime = { workspace = true } -dynamo-llm = { workspace = true } - -serde = { workspace = true } -serde_json = { workspace = true } -tracing = { workspace = true } -tokio = { workspace = true } - -clap = { version = "4.5", features = ["derive"] } -tabled = { version = "0.18" } diff --git a/launch/llmctl/src/main.rs b/launch/llmctl/src/main.rs deleted file mode 100644 index f8b96d8427..0000000000 --- a/launch/llmctl/src/main.rs +++ /dev/null @@ -1,362 +0,0 @@ -// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -// SPDX-License-Identifier: Apache-2.0 - -use std::sync::Arc; - -use clap::{Parser, Subcommand}; - -use dynamo_llm::discovery::{ModelManager, ModelWatcher}; -use dynamo_llm::local_model::{LocalModelBuilder, ModelNetworkName}; -use dynamo_llm::model_type::ModelType; -use dynamo_runtime::component::Endpoint; -use dynamo_runtime::pipeline::RouterMode; -use dynamo_runtime::{ - distributed::DistributedConfig, logging, DistributedRuntime, Result, Runtime, Worker, -}; - -// Macro to define model types and associated commands -macro_rules! define_type_subcommands { - ($(($variant:ident, $primary_name:expr, [$($alias:expr),*], $help:expr)),* $(,)?) => { - #[derive(Subcommand)] - enum AddCommands { - $( - #[doc = $help] - #[command(name = $primary_name, aliases = [$($alias),*])] - $variant(AddModelArgs), - )* - } - - #[derive(Subcommand)] - enum ListCommands { - $( - #[doc = concat!("List ", $primary_name, " models")] - #[command(name = $primary_name, aliases = [$($alias),*])] - $variant, - )* - } - - #[derive(Subcommand)] - enum RemoveCommands { - $( - #[doc = concat!("Remove ", $primary_name, " model")] - #[command(name = $primary_name, aliases = [$($alias),*])] - $variant(RemoveModelArgs), - )* - } - - impl AddCommands { - fn into_parts(self) -> (ModelType, String, String) { - match self { - $(Self::$variant(args) => (ModelType::$variant, args.model_name, args.endpoint_name)),* - } - } - } - - impl RemoveCommands { - fn into_parts(self) -> (ModelType, String) { - match self { - $(Self::$variant(args) => (ModelType::$variant, args.model_name)),* - } - } - } - - impl ListCommands { - fn model_type(&self) -> ModelType { - match self { - $(Self::$variant => ModelType::$variant),* - } - } - } - } -} - -define_type_subcommands!( - ( - Chat, - "chat", - ["chat-model", "chat-models"], - "Add a chat model" - ), - ( - Completion, - "completion", - ["completions", "completion-model"], - "Add a completion model" - ), - // Add new model types here: - ( - Embedding, - "embedding", - ["embeddings", "embedding-model"], - "Add an embedding model" - ) -); - -#[derive(Parser)] -#[command( - author="NVIDIA", - version="0.2.1", - about="LLMCTL - Deprecated. Do not use.", - long_about = None, - disable_help_subcommand = true, -)] -struct Cli { - /// Public Namespace to operate in - /// Do not use this. In fact don't use anything about this file. - #[arg(short = 'n', long)] - public_namespace: Option, - - #[command(subcommand)] - command: Commands, -} - -#[derive(Subcommand)] -enum Commands { - /// HTTP service related commands - Http { - #[command(subcommand)] - command: HttpCommands, - }, -} - -#[derive(Subcommand)] -enum HttpCommands { - /// Add models - Add { - #[command(subcommand)] - model_type: AddCommands, - }, - - /// List models (all types if no specific type provided) - List { - #[command(subcommand)] - model_type: Option, - }, - - /// Remove models - Remove { - #[command(subcommand)] - model_type: RemoveCommands, - }, -} - -#[derive(Parser)] -struct AddModelArgs { - /// Model name (e.g. foo/v1) - #[arg(name = "model-name")] - model_name: String, - /// Endpoint name (format: component.endpoint or namespace.component.endpoint) - #[arg(name = "endpoint-name")] - endpoint_name: String, -} - -/// Common fields for removing any model type -#[derive(Parser)] -struct RemoveModelArgs { - /// Name of the model to remove - #[arg(name = "model-name")] - model_name: String, -} - -fn main() -> Result<()> { - logging::init(); - let cli = Cli::parse(); - - // Default namespace to "dynamo" if not specified - let namespace = cli.public_namespace.unwrap_or_else(|| "dynamo".to_string()); - - let worker = Worker::from_settings()?; - worker.execute(|runtime| async move { handle_command(runtime, namespace, cli.command).await }) -} - -async fn handle_command(runtime: Runtime, namespace: String, command: Commands) -> Result<()> { - let settings = DistributedConfig::for_cli(); - let distributed = DistributedRuntime::new(runtime, settings).await?; - - match command { - Commands::Http { command } => { - match command { - HttpCommands::Add { model_type } => { - let (model_type, model_name, endpoint_name) = model_type.into_parts(); - add_model( - &distributed, - namespace.to_string(), - model_type, - model_name, - &endpoint_name, - ) - .await?; - } - HttpCommands::List { model_type } => { - match model_type { - Some(model_type) => { - list_models( - &distributed, - namespace.clone(), - Some(model_type.model_type()), - ) - .await?; - } - None => { - // List all model types - list_models(&distributed, namespace.clone(), None).await?; - } - } - } - HttpCommands::Remove { model_type } => { - let (model_type, name) = model_type.into_parts(); - remove_model(&distributed, model_type, &name).await?; - } - } - } - } - Ok(()) -} - -async fn add_model( - distributed: &DistributedRuntime, - namespace: String, - model_type: ModelType, - model_name: String, - endpoint_name: &str, -) -> Result<()> { - tracing::debug!("Adding model {model_name} with endpoint {endpoint_name}"); - if model_name.starts_with('/') { - anyhow::bail!("Model name '{model_name}' cannot start with a slash"); - } - - let endpoint = endpoint_from_name(distributed, &namespace, endpoint_name)?; - - let mut model = LocalModelBuilder::default() - .model_name(Some(model_name)) - .build() - .await?; - model.attach(&endpoint, model_type).await?; - - Ok(()) -} - -#[derive(tabled::Tabled)] -struct ModelRow { - #[tabled(rename = "MODEL TYPE")] - model_type: String, - #[tabled(rename = "MODEL NAME")] - name: String, - #[tabled(rename = "NAMESPACE")] - namespace: String, - #[tabled(rename = "COMPONENT")] - component: String, - #[tabled(rename = "ENDPOINT")] - endpoint: String, -} - -async fn list_models( - distributed: &DistributedRuntime, - namespace: String, - model_type: Option, -) -> Result<()> { - // We only need a ModelWatcher to call it's all_entries. llmctl is going away so no need to - // refactor for this. - let watcher = ModelWatcher::new( - distributed.clone(), - Arc::new(ModelManager::new()), - RouterMode::Random, - None, - ); - - let mut models = Vec::new(); - for entry in watcher.all_entries().await? { - match (model_type, entry.model_type) { - (None, _) => { - // list all - } - (Some(want), got) if want == got => { - // match - } - _ => { - // no match - continue; - } - } - models.push(ModelRow { - model_type: entry.model_type.as_str().to_string(), - name: entry.name, - namespace: entry.endpoint.namespace, - component: entry.endpoint.component, - endpoint: entry.endpoint.name, - }); - } - - if models.is_empty() { - match &model_type { - Some(mt) => println!( - "No {} models found in namespace: {}", - mt.as_str(), - namespace - ), - None => println!("No models found in namespace: {}", namespace), - } - } else { - let table = tabled::Table::new(models); - match &model_type { - Some(mt) => println!("Listing {} models in namespace: {}", mt.as_str(), namespace), - None => println!("Listing all models in namespace: {}", namespace), - } - println!("{}", table); - } - Ok(()) -} - -async fn remove_model( - distributed: &DistributedRuntime, - model_type: ModelType, - model_name: &str, -) -> Result<()> { - // We have to do this manually because normally the etcd lease system does it for us - let watcher = ModelWatcher::new( - distributed.clone(), - Arc::new(ModelManager::new()), - RouterMode::Random, - None, - ); - let Some(etcd_client) = distributed.etcd_client() else { - anyhow::bail!("llmctl is only useful with dynamic workers"); - }; - let active_instances = watcher.entries_for_model(model_name).await?; - for entry in active_instances - .into_iter() - .filter(|entry| entry.model_type == model_type) - { - let network_name = ModelNetworkName::from_entry(&entry, 0); - tracing::debug!("deleting key: {network_name}"); - etcd_client - .kv_delete(network_name.to_string(), None) - .await?; - } - - Ok(()) -} - -fn endpoint_from_name( - distributed: &DistributedRuntime, - namespace: &str, - endpoint_name: &str, -) -> anyhow::Result { - let parts: Vec<&str> = endpoint_name.split('.').collect(); - - if parts.len() < 2 { - anyhow::bail!("Endpoint name '{}' is too short. Format should be 'component.endpoint' or 'namespace.component.endpoint'", endpoint_name); - } else if parts.len() > 3 { - anyhow::bail!("Endpoint name '{}' is too long. Format should be 'component.endpoint' or 'namespace.component.endpoint'", endpoint_name); - } - - // TODO previous version sometime hardcoded this to "http", so maybe adjust - let component_name = parts[parts.len() - 2].to_string(); - let endpoint_name = parts[parts.len() - 1].to_string(); - - let component = distributed - .namespace(namespace)? - .component(component_name)?; - - Ok(component.endpoint(endpoint_name)) -} diff --git a/pyproject.toml b/pyproject.toml index aa623d8dfb..9c30d6bdb6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -71,8 +71,6 @@ vllm = [ [project.scripts] dynamo = "dynamo.sdk.cli.cli:cli" dynamo-run = "dynamo.sdk.cli.run_executable:dynamo_run" -llmctl = "dynamo.sdk.cli.run_executable:llmctl" -http = "dynamo.sdk.cli.run_executable:http" metrics = "dynamo.sdk.cli.run_executable:metrics" mock_worker = "dynamo.sdk.cli.run_executable:mock_worker"