feat: 🎸 max-token (#1538)

added limit to 4k for gpt4 # Description Please include a summary of the changes and the related issue. Please also include relevant motivation and context. ## Checklist before requesting a review Please delete options that are not relevant. - [ ] My code follows the style guidelines of this project - [ ] I have performed a self-review of my code - [ ] I have commented hard-to-understand areas - [ ] I have ideally added tests that prove my fix is effective or that my feature works - [ ] New and existing unit tests pass locally with my changes - [ ] Any dependent changes have been merged ## Screenshots (if appropriate):
QuivrHQ · Nov 1, 2023 · b330370 · b330370
1 parent 7845124
commit b330370
Show file tree

Hide file tree

Showing 3 changed files with 5 additions and 5 deletions.
diff --git a/backend/llm/qa_headless.py b/backend/llm/qa_headless.py
@@ -31,7 +31,7 @@
 class HeadlessQA(BaseModel):
     model: str
     temperature: float = 0.0
-    max_tokens: int = 256
+    max_tokens: int = 2000
     user_openai_api_key: Optional[str] = None
     openai_api_key: Optional[str] = None
     streaming: bool = False

diff --git a/backend/routes/chat_routes.py b/backend/routes/chat_routes.py
@@ -167,7 +167,7 @@ async def create_question_handler(
         chat_question.temperature = (
             chat_question.temperature or brain.temperature or 0.1
         )
-        chat_question.max_tokens = chat_question.max_tokens or brain.max_tokens or 256
+        chat_question.max_tokens = chat_question.max_tokens or brain.max_tokens or 512
 
     try:
         check_user_requests_limit(current_user)

diff --git a/frontend/lib/helpers/defineMaxTokens.ts b/frontend/lib/helpers/defineMaxTokens.ts
@@ -4,11 +4,11 @@ export const defineMaxTokens = (model: Model | PaidModels): number => {
   //At the moment is evaluating only models from OpenAI
   switch (model) {
     case "gpt-3.5-turbo":
-      return 750;
+      return 1000;
     case "gpt-3.5-turbo-16k":
-      return 2000;
+      return 4000;
     case "gpt-4":
-      return 1000;
+      return 4000;
     default:
       return 500;
   }