atoma-network · jorgeantonio21 · Jan 30, 2025 · Jan 29, 2025 · Jan 30, 2025 · Jan 30, 2025
diff --git a/atoma-proxy/src/server/handlers/nodes.rs b/atoma-proxy/src/server/handlers/nodes.rs
@@ -31,7 +31,7 @@ const BODY_HASH_SIZE: usize = 32;
 /// in the request. We set a default value here to be used for node selection, as a upper
 /// bound for the number of tokens for each request.
 /// TODO: In the future, this number can be dynamically adjusted based on the model.
-pub const MAX_NUM_TOKENS_FOR_CONFIDENTIAL_COMPUTE: i64 = 128_000;
+pub const MAX_NUM_TOKENS_FOR_CONFIDENTIAL_COMPUTE: i64 = 8_192;
 
 #[derive(OpenApi)]
 #[openapi(paths(nodes_create, nodes_create_lock))]

diff --git a/atoma-proxy/src/server/middleware.rs b/atoma-proxy/src/server/middleware.rs
@@ -16,13 +16,10 @@ use tracing::instrument;
 use super::{
     error::AtomaProxyError,
     handlers::{
-        chat_completions::{CHAT_COMPLETIONS_PATH, CONFIDENTIAL_CHAT_COMPLETIONS_PATH},
         image_generations::CONFIDENTIAL_IMAGE_GENERATIONS_PATH,
-        nodes::MAX_NUM_TOKENS_FOR_CONFIDENTIAL_COMPUTE,
-        update_state_manager,
+        nodes::MAX_NUM_TOKENS_FOR_CONFIDENTIAL_COMPUTE, update_state_manager,
     },
     http_server::ProxyState,
-    DEFAULT_MAX_TOKENS, MAX_COMPLETION_TOKENS,
 };
 use super::{types::ConfidentialComputeRequest, Result};
 
@@ -239,16 +236,11 @@ pub async fn authenticate_middleware(
             message: format!("Failed to convert body to bytes: {}", e),
             endpoint: req_parts.uri.path().to_string(),
         })?;
-    let mut body_json: Value =
+    let body_json: Value =
         serde_json::from_slice(&body_bytes).map_err(|e| AtomaProxyError::InternalError {
             message: format!("Failed to parse body as JSON: {}", e),
             endpoint: req_parts.uri.path().to_string(),
         })?;
-    if endpoint == CONFIDENTIAL_CHAT_COMPLETIONS_PATH || endpoint == CHAT_COMPLETIONS_PATH {
-        // NOTE: Chat completions endpoints processed by Atoma nodes require a max_tokens field
-        body_json[MAX_COMPLETION_TOKENS] = serde_json::json!(DEFAULT_MAX_TOKENS);
-    }
-    let endpoint = req_parts.uri.path().to_string();
 
     // Authenticate request and lock compute units for a Stack.
     //

diff --git a/atoma-proxy/src/server/mod.rs b/atoma-proxy/src/server/mod.rs
@@ -25,7 +25,7 @@ pub(crate) const MAX_TOKENS: &str = "max_tokens";
 pub(crate) const MAX_COMPLETION_TOKENS: &str = "max_completion_tokens";
 
 /// The default max_tokens value.
-pub(crate) const DEFAULT_MAX_TOKENS: u64 = 4_096;
+pub(crate) const DEFAULT_MAX_TOKENS: u64 = 8_192;
 
 /// The one million constant.
 pub(crate) const ONE_MILLION: u64 = 1_000_000;