ai-dynamo
diff --git a/‎lib/llm/src/engines.rs‎
Lines changed: 41 additions & 1 deletion b/‎lib/llm/src/engines.rs‎
Lines changed: 41 additions & 1 deletion
diff --git a/‎lib/llm/src/protocols/openai.rs‎
Lines changed: 4 additions & 52 deletions b/‎lib/llm/src/protocols/openai.rs‎
Lines changed: 4 additions & 52 deletions
diff --git a/‎lib/llm/src/protocols/openai/chat_completions.rs‎
Lines changed: 45 additions & 4 deletions b/‎lib/llm/src/protocols/openai/chat_completions.rs‎
Lines changed: 45 additions & 4 deletions
diff --git a/‎lib/llm/src/protocols/openai/completions.rs‎
Lines changed: 30 additions & 1 deletion b/‎lib/llm/src/protocols/openai/completions.rs‎
Lines changed: 30 additions & 1 deletion
@@ -124,8 +124,19 @@ fn delta_core(tok: u32) -> Annotated<LLMEngineOutput> {
 /// Useful for testing ingress such as service-http.
 struct EchoEngineFull {}
 
+/// Validate Engine that verifies request data
+pub struct ValidateEngine<E> {
+    inner: E,
+}
+
+impl<E> ValidateEngine<E> {
+    pub fn new(inner: E) -> Self {
+        Self { inner }
+    }
+}
+
 /// Engine that dispatches requests to either OpenAICompletions
-//or OpenAIChatCompletions engine
+/// or OpenAIChatCompletions engine
 pub struct EngineDispatcher<E> {
     inner: E,
 }
@@ -136,6 +147,11 @@ impl<E> EngineDispatcher<E> {
     }
 }
 
+/// Trait on request types that allows us to validate the data
+pub trait ValidateRequest {
+    fn validate(&self) -> Result<(), anyhow::Error>;
+}
+
 /// Trait that allows handling both completion and chat completions requests
 #[async_trait]
 pub trait StreamingEngine: Send + Sync {
@@ -267,6 +283,30 @@ impl
     }
 }
 
+#[async_trait]
+impl<E, Req, Resp> AsyncEngine<SingleIn<Req>, ManyOut<Annotated<Resp>>, Error> for ValidateEngine<E>
+where
+    E: AsyncEngine<SingleIn<Req>, ManyOut<Annotated<Resp>>, Error> + Send + Sync,
+    Req: ValidateRequest + Send + Sync + 'static,
+    Resp: Send + Sync + 'static,
+{
+    async fn generate(
+        &self,
+        incoming_request: SingleIn<Req>,
+    ) -> Result<ManyOut<Annotated<Resp>>, Error> {
+        let (request, context) = incoming_request.into_parts();
+
+        // Validate the request first
+        if let Err(validation_error) = request.validate() {
+            return Err(anyhow::anyhow!("Validation failed: {}", validation_error));
+        }
+
+        // Forward to inner engine if validation passes
+        let validated_request = SingleIn::rejoin(request, context);
+        self.inner.generate(validated_request).await
+    }
+}
+
 #[async_trait]
 impl<E> StreamingEngine for EngineDispatcher<E>
 where
 
@@ -13,8 +13,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-use std::fmt::Display;
-
 use anyhow::Result;
 use serde::{Deserialize, Serialize};
 
@@ -29,42 +27,11 @@ pub mod embeddings;
 pub mod models;
 pub mod nvext;
 pub mod responses;
+pub mod validate;
 
-/// Minimum allowed value for OpenAI's `temperature` sampling option
-pub const MIN_TEMPERATURE: f32 = 0.0;
-
-/// Maximum allowed value for OpenAI's `temperature` sampling option
-pub const MAX_TEMPERATURE: f32 = 2.0;
-
-/// Allowed range of values for OpenAI's `temperature`` sampling option
-pub const TEMPERATURE_RANGE: (f32, f32) = (MIN_TEMPERATURE, MAX_TEMPERATURE);
-
-/// Minimum allowed value for OpenAI's `top_p` sampling option
-pub const MIN_TOP_P: f32 = 0.0;
-
-/// Maximum allowed value for OpenAI's `top_p` sampling option
-pub const MAX_TOP_P: f32 = 1.0;
-
-/// Allowed range of values for OpenAI's `top_p` sampling option
-pub const TOP_P_RANGE: (f32, f32) = (MIN_TOP_P, MAX_TOP_P);
-
-/// Minimum allowed value for OpenAI's `frequency_penalty` sampling option
-pub const MIN_FREQUENCY_PENALTY: f32 = -2.0;
-
-/// Maximum allowed value for OpenAI's `frequency_penalty` sampling option
-pub const MAX_FREQUENCY_PENALTY: f32 = 2.0;
-
-/// Allowed range of values for OpenAI's `frequency_penalty` sampling option
-pub const FREQUENCY_PENALTY_RANGE: (f32, f32) = (MIN_FREQUENCY_PENALTY, MAX_FREQUENCY_PENALTY);
-
-/// Minimum allowed value for OpenAI's `presence_penalty` sampling option
-pub const MIN_PRESENCE_PENALTY: f32 = -2.0;
-
-/// Maximum allowed value for OpenAI's `presence_penalty` sampling option
-pub const MAX_PRESENCE_PENALTY: f32 = 2.0;
-
-/// Allowed range of values for OpenAI's `presence_penalty` sampling option
-pub const PRESENCE_PENALTY_RANGE: (f32, f32) = (MIN_PRESENCE_PENALTY, MAX_PRESENCE_PENALTY);
+use validate::{
+    validate_range, FREQUENCY_PENALTY_RANGE, PRESENCE_PENALTY_RANGE, TEMPERATURE_RANGE, TOP_P_RANGE,
+};
 
 #[derive(Serialize, Deserialize, Debug)]
 pub struct AnnotatedDelta<R> {
@@ -166,21 +133,6 @@ impl<T: OpenAIStopConditionsProvider> StopConditionsProvider for T {
     }
 }
 
-// todo - move to common location
-fn validate_range<T>(value: Option<T>, range: &(T, T)) -> Result<Option<T>>
-where
-    T: PartialOrd + Display,
-{
-    if value.is_none() {
-        return Ok(None);
-    }
-    let value = value.unwrap();
-    if value < range.0 || value > range.1 {
-        anyhow::bail!("Value {} is out of range [{}, {}]", value, range.0, range.1);
-    }
-    Ok(Some(value))
-}
-
 pub trait DeltaGeneratorExt<ResponseType: Send + Sync + 'static + std::fmt::Debug>:
     Send + Sync + 'static
 {
 
@@ -17,10 +17,12 @@ use dynamo_runtime::protocols::annotated::AnnotationsProvider;
 use serde::{Deserialize, Serialize};
 use validator::Validate;
 
-use super::nvext::NvExt;
-use super::nvext::NvExtProvider;
-use super::OpenAISamplingOptionsProvider;
-use super::OpenAIStopConditionsProvider;
+use crate::engines::ValidateRequest;
+
+use super::{
+    nvext::NvExt, nvext::NvExtProvider, validate, OpenAISamplingOptionsProvider,
+    OpenAIStopConditionsProvider,
+};
 
 mod aggregator;
 mod delta;
@@ -174,3 +176,42 @@ impl OpenAIStopConditionsProvider for NvCreateChatCompletionRequest {
         self.nvext.as_ref()
     }
 }
+
+/// Implements `ValidateRequest` for `NvCreateChatCompletionRequest`,
+/// allowing us to validate the data.
+impl ValidateRequest for NvCreateChatCompletionRequest {
+    fn validate(&self) -> Result<(), anyhow::Error> {
+        validate::validate_messages(&self.inner.messages)?;
+        validate::validate_model(&self.inner.model)?;
+        // none for store
+        validate::validate_reasoning_effort(&self.inner.reasoning_effort)?;
+        validate::validate_metadata(&self.inner.metadata)?;
+        validate::validate_frequency_penalty(self.inner.frequency_penalty)?;
+        validate::validate_logit_bias(&self.inner.logit_bias)?;
+        // none for logprobs
+        validate::validate_top_logprobs(self.inner.top_logprobs)?;
+        // validate::validate_max_tokens(self.inner.max_tokens)?; // warning depricated field
+        validate::validate_max_completion_tokens(self.inner.max_completion_tokens)?;
+        validate::validate_n(self.inner.n)?;
+        // none for modalities
+        // none for prediction
+        // none for audio
+        validate::validate_presence_penalty(self.inner.presence_penalty)?;
+        // none for response_format
+        // none for seed
+        validate::validate_service_tier(&self.inner.service_tier)?;
+        validate::validate_stop(&self.inner.stop)?;
+        // none for stream
+        // none for stream_options
+        validate::validate_temperature(self.inner.temperature)?;
+        validate::validate_top_p(self.inner.top_p)?;
+        validate::validate_tools(&self.inner.tools.as_deref())?;
+        // none for tool_choice
+        // none for parallel_tool_calls
+        validate::validate_user(self.inner.user.as_deref())?;
+        // none for function call
+        // none for functions
+
+        Ok(())
+    }
+}
@@ -18,10 +18,12 @@ use dynamo_runtime::protocols::annotated::AnnotationsProvider;
 use serde::{Deserialize, Serialize};
 use validator::Validate;
 
+use crate::engines::ValidateRequest;
+
 use super::{
     common::{self, SamplingOptionsProvider, StopConditionsProvider},
     nvext::{NvExt, NvExtProvider},
-    ContentProvider, OpenAISamplingOptionsProvider, OpenAIStopConditionsProvider,
+    validate, ContentProvider, OpenAISamplingOptionsProvider, OpenAIStopConditionsProvider,
 };
 
 mod aggregator;
@@ -275,3 +277,30 @@ impl TryFrom<common::StreamingCompletionResponse> for async_openai::types::Choic
         Ok(choice)
     }
 }
+
+/// Implements `ValidateRequest` for `NvCreateCompletionRequest`,
+/// allowing us to validate the data.
+impl ValidateRequest for NvCreateCompletionRequest {
+    fn validate(&self) -> Result<(), anyhow::Error> {
+        validate::validate_model(&self.inner.model)?;
+        validate::validate_prompt(&self.inner.prompt)?;
+        validate::validate_suffix(self.inner.suffix.as_deref())?;
+        validate::validate_max_tokens(self.inner.max_tokens)?;
+        validate::validate_temperature(self.inner.temperature)?;
+        validate::validate_top_p(self.inner.top_p)?;
+        validate::validate_n(self.inner.n)?;
+        // none for stream
+        // none for stream_options
+        validate::validate_logprobs(self.inner.logprobs)?;
+        // none for echo
+        validate::validate_stop(&self.inner.stop)?;
+        validate::validate_presence_penalty(self.inner.presence_penalty)?;
+        validate::validate_frequency_penalty(self.inner.frequency_penalty)?;
+        validate::validate_best_of(self.inner.best_of, self.inner.n)?;
+        validate::validate_logit_bias(&self.inner.logit_bias)?;
+        validate::validate_user(self.inner.user.as_deref())?;
+        // none for seed
+
+        Ok(())
+    }
+}