diff --git a/README.md b/README.md index 6dc0b2dd..b3403d65 100644 --- a/README.md +++ b/README.md @@ -94,7 +94,7 @@ See [API Reference](https://glide.einstack.ai/api-reference/introduction) for mo "role": "user", "content": "Where was it played?" }, - "messageHistory": [ + "message_history": [ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "Who won the world series in 2020?"}, {"role": "assistant", "content": "The Los Angeles Dodgers won the World Series in 2020."} diff --git "a/docs/api/Language API/\360\237\222\254 Chat Stream.bru" "b/docs/api/Language API/\360\237\222\254 Chat Stream.bru" deleted file mode 100644 index e544c061..00000000 --- "a/docs/api/Language API/\360\237\222\254 Chat Stream.bru" +++ /dev/null @@ -1,11 +0,0 @@ -meta { - name: 💬 Chat Stream - type: http - seq: 2 -} - -get { - url: {{base_url}}/language/default/chatStream - body: none - auth: none -} diff --git "a/docs/api/Language API/\360\237\222\254 Chat.bru" "b/docs/api/Language API/\360\237\222\254 Chat.bru" deleted file mode 100644 index 6ea21147..00000000 --- "a/docs/api/Language API/\360\237\222\254 Chat.bru" +++ /dev/null @@ -1,21 +0,0 @@ -meta { - name: 💬 Chat - type: http - seq: 1 -} - -post { - url: {{base_url}}/language/default/chat/ - body: json - auth: none -} - -body:json { - { - "message": { - "role": "user", - "content": "How are you doing?" - }, - "messageHistory": [] - } -} diff --git "a/docs/api/Language API/\360\237\224\247 Router List.bru" "b/docs/api/Language API/\360\237\224\247 Router List.bru" deleted file mode 100644 index 0545245f..00000000 --- "a/docs/api/Language API/\360\237\224\247 Router List.bru" +++ /dev/null @@ -1,21 +0,0 @@ -meta { - name: 🔧 Router List - type: http - seq: 3 -} - -get { - url: {{base_url}}/language/ - body: json - auth: none -} - -body:json { - { - "message": { - "role": "user", - "content": "How are you doing?" - }, - "messageHistory": [] - } -} diff --git a/docs/api/bruno.json b/docs/api/bruno.json deleted file mode 100644 index c543e3e0..00000000 --- a/docs/api/bruno.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "version": "1", - "name": "glide", - "type": "collection" -} \ No newline at end of file diff --git a/docs/api/environments/Development.bru b/docs/api/environments/Development.bru deleted file mode 100644 index a8abb8bf..00000000 --- a/docs/api/environments/Development.bru +++ /dev/null @@ -1,3 +0,0 @@ -vars { - base_url: http://127.0.0.1:9099/v1 -} diff --git "a/docs/api/\360\237\224\247 Health.bru" "b/docs/api/\360\237\224\247 Health.bru" deleted file mode 100644 index df4d4d10..00000000 --- "a/docs/api/\360\237\224\247 Health.bru" +++ /dev/null @@ -1,11 +0,0 @@ -meta { - name: 🔧 Health - type: http - seq: 1 -} - -get { - url: {{base_url}}/health/ - body: none - auth: none -} diff --git a/docs/docs.go b/docs/docs.go index a236eac8..4d095573 100644 --- a/docs/docs.go +++ b/docs/docs.go @@ -40,7 +40,7 @@ const docTemplate = `{ "200": { "description": "OK", "schema": { - "$ref": "#/definitions/http.HealthSchema" + "$ref": "#/definitions/schemas.HealthSchema" } } } @@ -48,7 +48,7 @@ const docTemplate = `{ }, "/v1/language/": { "get": { - "description": "Retrieve list of configured language routers and their configurations", + "description": "Retrieve list of configured active language routers and their configurations", "consumes": [ "application/json" ], @@ -64,7 +64,7 @@ const docTemplate = `{ "200": { "description": "OK", "schema": { - "$ref": "#/definitions/http.RouterListSchema" + "$ref": "#/definitions/schemas.RouterListSchema" } } } @@ -112,13 +112,13 @@ const docTemplate = `{ "400": { "description": "Bad Request", "schema": { - "$ref": "#/definitions/http.ErrorSchema" + "$ref": "#/definitions/schemas.Error" } }, "404": { "description": "Not Found", "schema": { - "$ref": "#/definitions/http.ErrorSchema" + "$ref": "#/definitions/schemas.Error" } } } @@ -179,7 +179,7 @@ const docTemplate = `{ "404": { "description": "Not Found", "schema": { - "$ref": "#/definitions/http.ErrorSchema" + "$ref": "#/definitions/schemas.Error" } }, "426": { @@ -190,637 +190,6 @@ const docTemplate = `{ } }, "definitions": { - "anthropic.Config": { - "type": "object", - "required": [ - "apiVersion", - "baseUrl", - "chatEndpoint", - "model" - ], - "properties": { - "apiVersion": { - "type": "string" - }, - "baseUrl": { - "type": "string" - }, - "chatEndpoint": { - "type": "string" - }, - "defaultParams": { - "$ref": "#/definitions/anthropic.Params" - }, - "model": { - "type": "string" - } - } - }, - "anthropic.Params": { - "type": "object", - "properties": { - "max_tokens": { - "type": "integer" - }, - "metadata": { - "type": "string" - }, - "stop": { - "type": "array", - "items": { - "type": "string" - } - }, - "system": { - "type": "string" - }, - "temperature": { - "type": "number" - }, - "top_k": { - "type": "integer" - }, - "top_p": { - "type": "number" - } - } - }, - "azureopenai.Config": { - "type": "object", - "required": [ - "apiVersion", - "baseUrl", - "model" - ], - "properties": { - "apiVersion": { - "description": "The API version to use for this operation. This follows the YYYY-MM-DD format (e.g 2023-05-15)", - "type": "string" - }, - "baseUrl": { - "description": "The name of your Azure OpenAI Resource (e.g https://glide-test.openai.azure.com/)", - "type": "string" - }, - "chatEndpoint": { - "type": "string" - }, - "defaultParams": { - "$ref": "#/definitions/azureopenai.Params" - }, - "model": { - "description": "This is your deployment name. You're required to first deploy a model before you can make calls (e.g. glide-gpt-35)", - "type": "string" - } - } - }, - "azureopenai.Params": { - "type": "object", - "properties": { - "frequency_penalty": { - "type": "integer" - }, - "logit_bias": { - "type": "object", - "additionalProperties": { - "type": "number" - } - }, - "max_tokens": { - "type": "integer" - }, - "n": { - "type": "integer" - }, - "presence_penalty": { - "type": "integer" - }, - "response_format": { - "description": "TODO: should this be a part of the chat request API?" - }, - "seed": { - "type": "integer" - }, - "stop": { - "type": "array", - "items": { - "type": "string" - } - }, - "temperature": { - "type": "number" - }, - "tool_choice": {}, - "tools": { - "type": "array", - "items": { - "type": "string" - } - }, - "top_p": { - "type": "number" - }, - "user": { - "type": "string" - } - } - }, - "bedrock.Config": { - "type": "object", - "required": [ - "awsRegion", - "baseUrl", - "chatEndpoint", - "model" - ], - "properties": { - "awsRegion": { - "type": "string" - }, - "baseUrl": { - "type": "string" - }, - "chatEndpoint": { - "type": "string" - }, - "defaultParams": { - "$ref": "#/definitions/bedrock.Params" - }, - "model": { - "type": "string" - } - } - }, - "bedrock.Params": { - "type": "object", - "properties": { - "max_tokens": { - "type": "integer" - }, - "stop": { - "type": "array", - "items": { - "type": "string" - } - }, - "temperature": { - "type": "number" - }, - "top_p": { - "type": "number" - } - } - }, - "clients.ClientConfig": { - "type": "object", - "properties": { - "max_idle_connections": { - "type": "integer" - }, - "max_idle_connections_per_host": { - "type": "integer" - }, - "timeout": { - "type": "string" - } - } - }, - "cohere.Config": { - "type": "object", - "required": [ - "baseUrl", - "chatEndpoint", - "model" - ], - "properties": { - "baseUrl": { - "type": "string" - }, - "chatEndpoint": { - "type": "string" - }, - "defaultParams": { - "$ref": "#/definitions/cohere.Params" - }, - "model": { - "description": "https://docs.cohere.com/docs/models#command", - "type": "string" - } - } - }, - "cohere.Params": { - "type": "object", - "required": [ - "temperature" - ], - "properties": { - "connectors": { - "type": "array", - "items": { - "type": "string" - } - }, - "frequency_penalty": { - "type": "number", - "maximum": 1, - "minimum": 0 - }, - "k": { - "type": "integer", - "maximum": 500, - "minimum": 0 - }, - "max_tokens": { - "type": "integer" - }, - "p": { - "type": "number", - "maximum": 0.99, - "minimum": 0.01 - }, - "preamble": { - "type": "string" - }, - "presence_penalty": { - "type": "number", - "maximum": 1, - "minimum": 0 - }, - "prompt_truncation": { - "type": "string" - }, - "search_queries_only": { - "type": "boolean" - }, - "seed": { - "type": "integer" - }, - "stop_sequences": { - "type": "array", - "maxItems": 5, - "items": { - "type": "string" - } - }, - "temperature": { - "type": "number" - } - } - }, - "http.ErrorSchema": { - "type": "object", - "properties": { - "message": { - "type": "string" - } - } - }, - "http.HealthSchema": { - "type": "object", - "properties": { - "healthy": { - "type": "boolean" - } - } - }, - "http.RouterListSchema": { - "type": "object", - "properties": { - "routers": { - "type": "array", - "items": { - "$ref": "#/definitions/routers.LangRouterConfig" - } - } - } - }, - "latency.Config": { - "type": "object", - "properties": { - "decay": { - "description": "Weight of new latency measurements", - "type": "number" - }, - "update_interval": { - "description": "How often gateway should probe models with not the lowest response latency", - "type": "string" - }, - "warmup_samples": { - "description": "The number of latency probes required to init moving average", - "type": "integer" - } - } - }, - "octoml.Config": { - "type": "object", - "required": [ - "baseUrl", - "chatEndpoint", - "model" - ], - "properties": { - "baseUrl": { - "type": "string" - }, - "chatEndpoint": { - "type": "string" - }, - "defaultParams": { - "$ref": "#/definitions/octoml.Params" - }, - "model": { - "type": "string" - } - } - }, - "octoml.Params": { - "type": "object", - "properties": { - "frequency_penalty": { - "type": "integer" - }, - "max_tokens": { - "type": "integer" - }, - "presence_penalty": { - "type": "integer" - }, - "stop": { - "type": "array", - "items": { - "type": "string" - } - }, - "temperature": { - "type": "number" - }, - "top_p": { - "type": "number" - } - } - }, - "ollama.Config": { - "type": "object", - "required": [ - "baseUrl", - "chatEndpoint", - "model" - ], - "properties": { - "baseUrl": { - "type": "string" - }, - "chatEndpoint": { - "type": "string" - }, - "defaultParams": { - "$ref": "#/definitions/ollama.Params" - }, - "model": { - "type": "string" - } - } - }, - "ollama.Params": { - "type": "object", - "properties": { - "microstat": { - "type": "integer" - }, - "microstat_eta": { - "type": "number" - }, - "microstat_tau": { - "type": "number" - }, - "num_ctx": { - "type": "integer" - }, - "num_gpu": { - "type": "integer" - }, - "num_gqa": { - "type": "integer" - }, - "num_predict": { - "type": "integer" - }, - "num_thread": { - "type": "integer" - }, - "repeat_last_n": { - "type": "integer" - }, - "seed": { - "type": "integer" - }, - "stop": { - "type": "array", - "items": { - "type": "string" - } - }, - "stream": { - "type": "boolean" - }, - "temperature": { - "type": "number" - }, - "tfs_z": { - "type": "number" - }, - "top_k": { - "type": "integer" - }, - "top_p": { - "type": "number" - } - } - }, - "openai.Config": { - "type": "object", - "required": [ - "baseUrl", - "chatEndpoint", - "model" - ], - "properties": { - "baseUrl": { - "type": "string" - }, - "chatEndpoint": { - "type": "string" - }, - "defaultParams": { - "$ref": "#/definitions/openai.Params" - }, - "model": { - "type": "string" - } - } - }, - "openai.Params": { - "type": "object", - "properties": { - "frequency_penalty": { - "type": "integer" - }, - "logit_bias": { - "type": "object", - "additionalProperties": { - "type": "number" - } - }, - "max_tokens": { - "type": "integer" - }, - "n": { - "type": "integer" - }, - "presence_penalty": { - "type": "integer" - }, - "response_format": { - "description": "TODO: should this be a part of the chat request API?" - }, - "seed": { - "type": "integer" - }, - "stop": { - "type": "array", - "items": { - "type": "string" - } - }, - "temperature": { - "type": "number" - }, - "tool_choice": {}, - "tools": { - "type": "array", - "items": { - "type": "string" - } - }, - "top_p": { - "type": "number" - }, - "user": { - "type": "string" - } - } - }, - "providers.LangModelConfig": { - "type": "object", - "required": [ - "enabled", - "id" - ], - "properties": { - "anthropic": { - "$ref": "#/definitions/anthropic.Config" - }, - "azureopenai": { - "$ref": "#/definitions/azureopenai.Config" - }, - "bedrock": { - "$ref": "#/definitions/bedrock.Config" - }, - "client": { - "$ref": "#/definitions/clients.ClientConfig" - }, - "cohere": { - "$ref": "#/definitions/cohere.Config" - }, - "enabled": { - "description": "Is the model enabled?", - "type": "boolean" - }, - "error_budget": { - "type": "string" - }, - "id": { - "description": "Model instance ID (unique in scope of the router)", - "type": "string" - }, - "latency": { - "$ref": "#/definitions/latency.Config" - }, - "octoml": { - "$ref": "#/definitions/octoml.Config" - }, - "ollama": { - "$ref": "#/definitions/ollama.Config" - }, - "openai": { - "description": "Add other providers like", - "allOf": [ - { - "$ref": "#/definitions/openai.Config" - } - ] - }, - "weight": { - "type": "integer" - } - } - }, - "retry.ExpRetryConfig": { - "type": "object", - "properties": { - "base_multiplier": { - "type": "integer" - }, - "max_delay": { - "type": "integer" - }, - "max_retries": { - "type": "integer" - }, - "min_delay": { - "type": "integer" - } - } - }, - "routers.LangRouterConfig": { - "type": "object", - "required": [ - "enabled", - "models", - "retry", - "routers", - "strategy" - ], - "properties": { - "enabled": { - "description": "Is router enabled?", - "type": "boolean" - }, - "models": { - "description": "the list of models that could handle requests", - "type": "array", - "minItems": 1, - "items": { - "$ref": "#/definitions/providers.LangModelConfig" - } - }, - "retry": { - "description": "retry when no healthy model is available to router", - "allOf": [ - { - "$ref": "#/definitions/retry.ExpRetryConfig" - } - ] - }, - "routers": { - "description": "Unique router ID", - "type": "string" - }, - "strategy": { - "description": "strategy on picking the next model to serve the request", - "type": "string" - } - } - }, "schemas.ChatMessage": { "type": "object", "required": [ @@ -851,13 +220,13 @@ const docTemplate = `{ "message": { "$ref": "#/definitions/schemas.ChatMessage" }, - "messageHistory": { + "message_history": { "type": "array", "items": { "$ref": "#/definitions/schemas.ChatMessage" } }, - "override": { + "override_params": { "$ref": "#/definitions/schemas.OverrideChatRequest" } } @@ -868,42 +237,61 @@ const docTemplate = `{ "cached": { "type": "boolean" }, - "created": { + "created_at": { "type": "integer" }, "id": { "type": "string" }, - "model": { + "model_id": { + "type": "string" + }, + "model_name": { "type": "string" }, - "modelResponse": { + "model_response": { "$ref": "#/definitions/schemas.ModelResponse" }, - "model_id": { + "provider_id": { "type": "string" }, - "provider": { + "router_id": { + "type": "string" + } + } + }, + "schemas.Error": { + "type": "object", + "properties": { + "message": { "type": "string" }, - "router": { + "name": { "type": "string" } } }, + "schemas.HealthSchema": { + "type": "object", + "properties": { + "healthy": { + "type": "boolean" + } + } + }, "schemas.ModelResponse": { "type": "object", "properties": { "message": { "$ref": "#/definitions/schemas.ChatMessage" }, - "responseId": { + "metadata": { "type": "object", "additionalProperties": { "type": "string" } }, - "tokenCount": { + "token_usage": { "$ref": "#/definitions/schemas.TokenUsage" } } @@ -923,16 +311,25 @@ const docTemplate = `{ } } }, + "schemas.RouterListSchema": { + "type": "object", + "properties": { + "routers": { + "type": "array", + "items": {} + } + } + }, "schemas.TokenUsage": { "type": "object", "properties": { - "promptTokens": { + "prompt_tokens": { "type": "integer" }, - "responseTokens": { + "response_tokens": { "type": "integer" }, - "totalTokens": { + "total_tokens": { "type": "integer" } } diff --git a/docs/swagger.json b/docs/swagger.json index f626c88d..60d3ccc2 100644 --- a/docs/swagger.json +++ b/docs/swagger.json @@ -37,7 +37,7 @@ "200": { "description": "OK", "schema": { - "$ref": "#/definitions/http.HealthSchema" + "$ref": "#/definitions/schemas.HealthSchema" } } } @@ -45,7 +45,7 @@ }, "/v1/language/": { "get": { - "description": "Retrieve list of configured language routers and their configurations", + "description": "Retrieve list of configured active language routers and their configurations", "consumes": [ "application/json" ], @@ -61,7 +61,7 @@ "200": { "description": "OK", "schema": { - "$ref": "#/definitions/http.RouterListSchema" + "$ref": "#/definitions/schemas.RouterListSchema" } } } @@ -109,13 +109,13 @@ "400": { "description": "Bad Request", "schema": { - "$ref": "#/definitions/http.ErrorSchema" + "$ref": "#/definitions/schemas.Error" } }, "404": { "description": "Not Found", "schema": { - "$ref": "#/definitions/http.ErrorSchema" + "$ref": "#/definitions/schemas.Error" } } } @@ -176,7 +176,7 @@ "404": { "description": "Not Found", "schema": { - "$ref": "#/definitions/http.ErrorSchema" + "$ref": "#/definitions/schemas.Error" } }, "426": { @@ -187,637 +187,6 @@ } }, "definitions": { - "anthropic.Config": { - "type": "object", - "required": [ - "apiVersion", - "baseUrl", - "chatEndpoint", - "model" - ], - "properties": { - "apiVersion": { - "type": "string" - }, - "baseUrl": { - "type": "string" - }, - "chatEndpoint": { - "type": "string" - }, - "defaultParams": { - "$ref": "#/definitions/anthropic.Params" - }, - "model": { - "type": "string" - } - } - }, - "anthropic.Params": { - "type": "object", - "properties": { - "max_tokens": { - "type": "integer" - }, - "metadata": { - "type": "string" - }, - "stop": { - "type": "array", - "items": { - "type": "string" - } - }, - "system": { - "type": "string" - }, - "temperature": { - "type": "number" - }, - "top_k": { - "type": "integer" - }, - "top_p": { - "type": "number" - } - } - }, - "azureopenai.Config": { - "type": "object", - "required": [ - "apiVersion", - "baseUrl", - "model" - ], - "properties": { - "apiVersion": { - "description": "The API version to use for this operation. This follows the YYYY-MM-DD format (e.g 2023-05-15)", - "type": "string" - }, - "baseUrl": { - "description": "The name of your Azure OpenAI Resource (e.g https://glide-test.openai.azure.com/)", - "type": "string" - }, - "chatEndpoint": { - "type": "string" - }, - "defaultParams": { - "$ref": "#/definitions/azureopenai.Params" - }, - "model": { - "description": "This is your deployment name. You're required to first deploy a model before you can make calls (e.g. glide-gpt-35)", - "type": "string" - } - } - }, - "azureopenai.Params": { - "type": "object", - "properties": { - "frequency_penalty": { - "type": "integer" - }, - "logit_bias": { - "type": "object", - "additionalProperties": { - "type": "number" - } - }, - "max_tokens": { - "type": "integer" - }, - "n": { - "type": "integer" - }, - "presence_penalty": { - "type": "integer" - }, - "response_format": { - "description": "TODO: should this be a part of the chat request API?" - }, - "seed": { - "type": "integer" - }, - "stop": { - "type": "array", - "items": { - "type": "string" - } - }, - "temperature": { - "type": "number" - }, - "tool_choice": {}, - "tools": { - "type": "array", - "items": { - "type": "string" - } - }, - "top_p": { - "type": "number" - }, - "user": { - "type": "string" - } - } - }, - "bedrock.Config": { - "type": "object", - "required": [ - "awsRegion", - "baseUrl", - "chatEndpoint", - "model" - ], - "properties": { - "awsRegion": { - "type": "string" - }, - "baseUrl": { - "type": "string" - }, - "chatEndpoint": { - "type": "string" - }, - "defaultParams": { - "$ref": "#/definitions/bedrock.Params" - }, - "model": { - "type": "string" - } - } - }, - "bedrock.Params": { - "type": "object", - "properties": { - "max_tokens": { - "type": "integer" - }, - "stop": { - "type": "array", - "items": { - "type": "string" - } - }, - "temperature": { - "type": "number" - }, - "top_p": { - "type": "number" - } - } - }, - "clients.ClientConfig": { - "type": "object", - "properties": { - "max_idle_connections": { - "type": "integer" - }, - "max_idle_connections_per_host": { - "type": "integer" - }, - "timeout": { - "type": "string" - } - } - }, - "cohere.Config": { - "type": "object", - "required": [ - "baseUrl", - "chatEndpoint", - "model" - ], - "properties": { - "baseUrl": { - "type": "string" - }, - "chatEndpoint": { - "type": "string" - }, - "defaultParams": { - "$ref": "#/definitions/cohere.Params" - }, - "model": { - "description": "https://docs.cohere.com/docs/models#command", - "type": "string" - } - } - }, - "cohere.Params": { - "type": "object", - "required": [ - "temperature" - ], - "properties": { - "connectors": { - "type": "array", - "items": { - "type": "string" - } - }, - "frequency_penalty": { - "type": "number", - "maximum": 1, - "minimum": 0 - }, - "k": { - "type": "integer", - "maximum": 500, - "minimum": 0 - }, - "max_tokens": { - "type": "integer" - }, - "p": { - "type": "number", - "maximum": 0.99, - "minimum": 0.01 - }, - "preamble": { - "type": "string" - }, - "presence_penalty": { - "type": "number", - "maximum": 1, - "minimum": 0 - }, - "prompt_truncation": { - "type": "string" - }, - "search_queries_only": { - "type": "boolean" - }, - "seed": { - "type": "integer" - }, - "stop_sequences": { - "type": "array", - "maxItems": 5, - "items": { - "type": "string" - } - }, - "temperature": { - "type": "number" - } - } - }, - "http.ErrorSchema": { - "type": "object", - "properties": { - "message": { - "type": "string" - } - } - }, - "http.HealthSchema": { - "type": "object", - "properties": { - "healthy": { - "type": "boolean" - } - } - }, - "http.RouterListSchema": { - "type": "object", - "properties": { - "routers": { - "type": "array", - "items": { - "$ref": "#/definitions/routers.LangRouterConfig" - } - } - } - }, - "latency.Config": { - "type": "object", - "properties": { - "decay": { - "description": "Weight of new latency measurements", - "type": "number" - }, - "update_interval": { - "description": "How often gateway should probe models with not the lowest response latency", - "type": "string" - }, - "warmup_samples": { - "description": "The number of latency probes required to init moving average", - "type": "integer" - } - } - }, - "octoml.Config": { - "type": "object", - "required": [ - "baseUrl", - "chatEndpoint", - "model" - ], - "properties": { - "baseUrl": { - "type": "string" - }, - "chatEndpoint": { - "type": "string" - }, - "defaultParams": { - "$ref": "#/definitions/octoml.Params" - }, - "model": { - "type": "string" - } - } - }, - "octoml.Params": { - "type": "object", - "properties": { - "frequency_penalty": { - "type": "integer" - }, - "max_tokens": { - "type": "integer" - }, - "presence_penalty": { - "type": "integer" - }, - "stop": { - "type": "array", - "items": { - "type": "string" - } - }, - "temperature": { - "type": "number" - }, - "top_p": { - "type": "number" - } - } - }, - "ollama.Config": { - "type": "object", - "required": [ - "baseUrl", - "chatEndpoint", - "model" - ], - "properties": { - "baseUrl": { - "type": "string" - }, - "chatEndpoint": { - "type": "string" - }, - "defaultParams": { - "$ref": "#/definitions/ollama.Params" - }, - "model": { - "type": "string" - } - } - }, - "ollama.Params": { - "type": "object", - "properties": { - "microstat": { - "type": "integer" - }, - "microstat_eta": { - "type": "number" - }, - "microstat_tau": { - "type": "number" - }, - "num_ctx": { - "type": "integer" - }, - "num_gpu": { - "type": "integer" - }, - "num_gqa": { - "type": "integer" - }, - "num_predict": { - "type": "integer" - }, - "num_thread": { - "type": "integer" - }, - "repeat_last_n": { - "type": "integer" - }, - "seed": { - "type": "integer" - }, - "stop": { - "type": "array", - "items": { - "type": "string" - } - }, - "stream": { - "type": "boolean" - }, - "temperature": { - "type": "number" - }, - "tfs_z": { - "type": "number" - }, - "top_k": { - "type": "integer" - }, - "top_p": { - "type": "number" - } - } - }, - "openai.Config": { - "type": "object", - "required": [ - "baseUrl", - "chatEndpoint", - "model" - ], - "properties": { - "baseUrl": { - "type": "string" - }, - "chatEndpoint": { - "type": "string" - }, - "defaultParams": { - "$ref": "#/definitions/openai.Params" - }, - "model": { - "type": "string" - } - } - }, - "openai.Params": { - "type": "object", - "properties": { - "frequency_penalty": { - "type": "integer" - }, - "logit_bias": { - "type": "object", - "additionalProperties": { - "type": "number" - } - }, - "max_tokens": { - "type": "integer" - }, - "n": { - "type": "integer" - }, - "presence_penalty": { - "type": "integer" - }, - "response_format": { - "description": "TODO: should this be a part of the chat request API?" - }, - "seed": { - "type": "integer" - }, - "stop": { - "type": "array", - "items": { - "type": "string" - } - }, - "temperature": { - "type": "number" - }, - "tool_choice": {}, - "tools": { - "type": "array", - "items": { - "type": "string" - } - }, - "top_p": { - "type": "number" - }, - "user": { - "type": "string" - } - } - }, - "providers.LangModelConfig": { - "type": "object", - "required": [ - "enabled", - "id" - ], - "properties": { - "anthropic": { - "$ref": "#/definitions/anthropic.Config" - }, - "azureopenai": { - "$ref": "#/definitions/azureopenai.Config" - }, - "bedrock": { - "$ref": "#/definitions/bedrock.Config" - }, - "client": { - "$ref": "#/definitions/clients.ClientConfig" - }, - "cohere": { - "$ref": "#/definitions/cohere.Config" - }, - "enabled": { - "description": "Is the model enabled?", - "type": "boolean" - }, - "error_budget": { - "type": "string" - }, - "id": { - "description": "Model instance ID (unique in scope of the router)", - "type": "string" - }, - "latency": { - "$ref": "#/definitions/latency.Config" - }, - "octoml": { - "$ref": "#/definitions/octoml.Config" - }, - "ollama": { - "$ref": "#/definitions/ollama.Config" - }, - "openai": { - "description": "Add other providers like", - "allOf": [ - { - "$ref": "#/definitions/openai.Config" - } - ] - }, - "weight": { - "type": "integer" - } - } - }, - "retry.ExpRetryConfig": { - "type": "object", - "properties": { - "base_multiplier": { - "type": "integer" - }, - "max_delay": { - "type": "integer" - }, - "max_retries": { - "type": "integer" - }, - "min_delay": { - "type": "integer" - } - } - }, - "routers.LangRouterConfig": { - "type": "object", - "required": [ - "enabled", - "models", - "retry", - "routers", - "strategy" - ], - "properties": { - "enabled": { - "description": "Is router enabled?", - "type": "boolean" - }, - "models": { - "description": "the list of models that could handle requests", - "type": "array", - "minItems": 1, - "items": { - "$ref": "#/definitions/providers.LangModelConfig" - } - }, - "retry": { - "description": "retry when no healthy model is available to router", - "allOf": [ - { - "$ref": "#/definitions/retry.ExpRetryConfig" - } - ] - }, - "routers": { - "description": "Unique router ID", - "type": "string" - }, - "strategy": { - "description": "strategy on picking the next model to serve the request", - "type": "string" - } - } - }, "schemas.ChatMessage": { "type": "object", "required": [ @@ -848,13 +217,13 @@ "message": { "$ref": "#/definitions/schemas.ChatMessage" }, - "messageHistory": { + "message_history": { "type": "array", "items": { "$ref": "#/definitions/schemas.ChatMessage" } }, - "override": { + "override_params": { "$ref": "#/definitions/schemas.OverrideChatRequest" } } @@ -865,42 +234,61 @@ "cached": { "type": "boolean" }, - "created": { + "created_at": { "type": "integer" }, "id": { "type": "string" }, - "model": { + "model_id": { + "type": "string" + }, + "model_name": { "type": "string" }, - "modelResponse": { + "model_response": { "$ref": "#/definitions/schemas.ModelResponse" }, - "model_id": { + "provider_id": { "type": "string" }, - "provider": { + "router_id": { + "type": "string" + } + } + }, + "schemas.Error": { + "type": "object", + "properties": { + "message": { "type": "string" }, - "router": { + "name": { "type": "string" } } }, + "schemas.HealthSchema": { + "type": "object", + "properties": { + "healthy": { + "type": "boolean" + } + } + }, "schemas.ModelResponse": { "type": "object", "properties": { "message": { "$ref": "#/definitions/schemas.ChatMessage" }, - "responseId": { + "metadata": { "type": "object", "additionalProperties": { "type": "string" } }, - "tokenCount": { + "token_usage": { "$ref": "#/definitions/schemas.TokenUsage" } } @@ -920,16 +308,25 @@ } } }, + "schemas.RouterListSchema": { + "type": "object", + "properties": { + "routers": { + "type": "array", + "items": {} + } + } + }, "schemas.TokenUsage": { "type": "object", "properties": { - "promptTokens": { + "prompt_tokens": { "type": "integer" }, - "responseTokens": { + "response_tokens": { "type": "integer" }, - "totalTokens": { + "total_tokens": { "type": "integer" } } diff --git a/docs/swagger.yaml b/docs/swagger.yaml index c4608100..6cc49187 100644 --- a/docs/swagger.yaml +++ b/docs/swagger.yaml @@ -1,434 +1,5 @@ basePath: / definitions: - anthropic.Config: - properties: - apiVersion: - type: string - baseUrl: - type: string - chatEndpoint: - type: string - defaultParams: - $ref: '#/definitions/anthropic.Params' - model: - type: string - required: - - apiVersion - - baseUrl - - chatEndpoint - - model - type: object - anthropic.Params: - properties: - max_tokens: - type: integer - metadata: - type: string - stop: - items: - type: string - type: array - system: - type: string - temperature: - type: number - top_k: - type: integer - top_p: - type: number - type: object - azureopenai.Config: - properties: - apiVersion: - description: The API version to use for this operation. This follows the YYYY-MM-DD - format (e.g 2023-05-15) - type: string - baseUrl: - description: The name of your Azure OpenAI Resource (e.g https://glide-test.openai.azure.com/) - type: string - chatEndpoint: - type: string - defaultParams: - $ref: '#/definitions/azureopenai.Params' - model: - description: This is your deployment name. You're required to first deploy - a model before you can make calls (e.g. glide-gpt-35) - type: string - required: - - apiVersion - - baseUrl - - model - type: object - azureopenai.Params: - properties: - frequency_penalty: - type: integer - logit_bias: - additionalProperties: - type: number - type: object - max_tokens: - type: integer - "n": - type: integer - presence_penalty: - type: integer - response_format: - description: 'TODO: should this be a part of the chat request API?' - seed: - type: integer - stop: - items: - type: string - type: array - temperature: - type: number - tool_choice: {} - tools: - items: - type: string - type: array - top_p: - type: number - user: - type: string - type: object - bedrock.Config: - properties: - awsRegion: - type: string - baseUrl: - type: string - chatEndpoint: - type: string - defaultParams: - $ref: '#/definitions/bedrock.Params' - model: - type: string - required: - - awsRegion - - baseUrl - - chatEndpoint - - model - type: object - bedrock.Params: - properties: - max_tokens: - type: integer - stop: - items: - type: string - type: array - temperature: - type: number - top_p: - type: number - type: object - clients.ClientConfig: - properties: - max_idle_connections: - type: integer - max_idle_connections_per_host: - type: integer - timeout: - type: string - type: object - cohere.Config: - properties: - baseUrl: - type: string - chatEndpoint: - type: string - defaultParams: - $ref: '#/definitions/cohere.Params' - model: - description: https://docs.cohere.com/docs/models#command - type: string - required: - - baseUrl - - chatEndpoint - - model - type: object - cohere.Params: - properties: - connectors: - items: - type: string - type: array - frequency_penalty: - maximum: 1 - minimum: 0 - type: number - k: - maximum: 500 - minimum: 0 - type: integer - max_tokens: - type: integer - p: - maximum: 0.99 - minimum: 0.01 - type: number - preamble: - type: string - presence_penalty: - maximum: 1 - minimum: 0 - type: number - prompt_truncation: - type: string - search_queries_only: - type: boolean - seed: - type: integer - stop_sequences: - items: - type: string - maxItems: 5 - type: array - temperature: - type: number - required: - - temperature - type: object - http.ErrorSchema: - properties: - message: - type: string - type: object - http.HealthSchema: - properties: - healthy: - type: boolean - type: object - http.RouterListSchema: - properties: - routers: - items: - $ref: '#/definitions/routers.LangRouterConfig' - type: array - type: object - latency.Config: - properties: - decay: - description: Weight of new latency measurements - type: number - update_interval: - description: How often gateway should probe models with not the lowest response - latency - type: string - warmup_samples: - description: The number of latency probes required to init moving average - type: integer - type: object - octoml.Config: - properties: - baseUrl: - type: string - chatEndpoint: - type: string - defaultParams: - $ref: '#/definitions/octoml.Params' - model: - type: string - required: - - baseUrl - - chatEndpoint - - model - type: object - octoml.Params: - properties: - frequency_penalty: - type: integer - max_tokens: - type: integer - presence_penalty: - type: integer - stop: - items: - type: string - type: array - temperature: - type: number - top_p: - type: number - type: object - ollama.Config: - properties: - baseUrl: - type: string - chatEndpoint: - type: string - defaultParams: - $ref: '#/definitions/ollama.Params' - model: - type: string - required: - - baseUrl - - chatEndpoint - - model - type: object - ollama.Params: - properties: - microstat: - type: integer - microstat_eta: - type: number - microstat_tau: - type: number - num_ctx: - type: integer - num_gpu: - type: integer - num_gqa: - type: integer - num_predict: - type: integer - num_thread: - type: integer - repeat_last_n: - type: integer - seed: - type: integer - stop: - items: - type: string - type: array - stream: - type: boolean - temperature: - type: number - tfs_z: - type: number - top_k: - type: integer - top_p: - type: number - type: object - openai.Config: - properties: - baseUrl: - type: string - chatEndpoint: - type: string - defaultParams: - $ref: '#/definitions/openai.Params' - model: - type: string - required: - - baseUrl - - chatEndpoint - - model - type: object - openai.Params: - properties: - frequency_penalty: - type: integer - logit_bias: - additionalProperties: - type: number - type: object - max_tokens: - type: integer - "n": - type: integer - presence_penalty: - type: integer - response_format: - description: 'TODO: should this be a part of the chat request API?' - seed: - type: integer - stop: - items: - type: string - type: array - temperature: - type: number - tool_choice: {} - tools: - items: - type: string - type: array - top_p: - type: number - user: - type: string - type: object - providers.LangModelConfig: - properties: - anthropic: - $ref: '#/definitions/anthropic.Config' - azureopenai: - $ref: '#/definitions/azureopenai.Config' - bedrock: - $ref: '#/definitions/bedrock.Config' - client: - $ref: '#/definitions/clients.ClientConfig' - cohere: - $ref: '#/definitions/cohere.Config' - enabled: - description: Is the model enabled? - type: boolean - error_budget: - type: string - id: - description: Model instance ID (unique in scope of the router) - type: string - latency: - $ref: '#/definitions/latency.Config' - octoml: - $ref: '#/definitions/octoml.Config' - ollama: - $ref: '#/definitions/ollama.Config' - openai: - allOf: - - $ref: '#/definitions/openai.Config' - description: Add other providers like - weight: - type: integer - required: - - enabled - - id - type: object - retry.ExpRetryConfig: - properties: - base_multiplier: - type: integer - max_delay: - type: integer - max_retries: - type: integer - min_delay: - type: integer - type: object - routers.LangRouterConfig: - properties: - enabled: - description: Is router enabled? - type: boolean - models: - description: the list of models that could handle requests - items: - $ref: '#/definitions/providers.LangModelConfig' - minItems: 1 - type: array - retry: - allOf: - - $ref: '#/definitions/retry.ExpRetryConfig' - description: retry when no healthy model is available to router - routers: - description: Unique router ID - type: string - strategy: - description: strategy on picking the next model to serve the request - type: string - required: - - enabled - - models - - retry - - routers - - strategy - type: object schemas.ChatMessage: properties: content: @@ -451,11 +22,11 @@ definitions: properties: message: $ref: '#/definitions/schemas.ChatMessage' - messageHistory: + message_history: items: $ref: '#/definitions/schemas.ChatMessage' type: array - override: + override_params: $ref: '#/definitions/schemas.OverrideChatRequest' required: - message @@ -464,30 +35,42 @@ definitions: properties: cached: type: boolean - created: + created_at: type: integer id: type: string - model: - type: string model_id: type: string - modelResponse: + model_name: + type: string + model_response: $ref: '#/definitions/schemas.ModelResponse' - provider: + provider_id: + type: string + router_id: + type: string + type: object + schemas.Error: + properties: + message: type: string - router: + name: type: string type: object + schemas.HealthSchema: + properties: + healthy: + type: boolean + type: object schemas.ModelResponse: properties: message: $ref: '#/definitions/schemas.ChatMessage' - responseId: + metadata: additionalProperties: type: string type: object - tokenCount: + token_usage: $ref: '#/definitions/schemas.TokenUsage' type: object schemas.OverrideChatRequest: @@ -500,13 +83,19 @@ definitions: - message - model_id type: object + schemas.RouterListSchema: + properties: + routers: + items: {} + type: array + type: object schemas.TokenUsage: properties: - promptTokens: + prompt_tokens: type: integer - responseTokens: + response_tokens: type: integer - totalTokens: + total_tokens: type: integer type: object externalDocs: @@ -537,7 +126,7 @@ paths: "200": description: OK schema: - $ref: '#/definitions/http.HealthSchema' + $ref: '#/definitions/schemas.HealthSchema' summary: Gateway Health tags: - Operations @@ -545,7 +134,7 @@ paths: get: consumes: - application/json - description: Retrieve list of configured language routers and their configurations + description: Retrieve list of configured active language routers and their configurations operationId: glide-language-routers produces: - application/json @@ -553,7 +142,7 @@ paths: "200": description: OK schema: - $ref: '#/definitions/http.RouterListSchema' + $ref: '#/definitions/schemas.RouterListSchema' summary: Language Router List tags: - Language @@ -585,11 +174,11 @@ paths: "400": description: Bad Request schema: - $ref: '#/definitions/http.ErrorSchema' + $ref: '#/definitions/schemas.Error' "404": description: Not Found schema: - $ref: '#/definitions/http.ErrorSchema' + $ref: '#/definitions/schemas.Error' summary: Language Chat tags: - Language @@ -632,7 +221,7 @@ paths: "404": description: Not Found schema: - $ref: '#/definitions/http.ErrorSchema' + $ref: '#/definitions/schemas.Error' "426": description: Upgrade Required summary: Language Chat diff --git a/pkg/api/http/handlers.go b/pkg/api/http/handlers.go index 581b270e..ae12025e 100644 --- a/pkg/api/http/handlers.go +++ b/pkg/api/http/handlers.go @@ -2,18 +2,14 @@ package http import ( "context" - "errors" "sync" - "github.com/EinStack/glide/pkg/telemetry" - "go.uber.org/zap" - - "github.com/EinStack/glide/pkg/routers" - "github.com/EinStack/glide/pkg/api/schemas" - + "github.com/EinStack/glide/pkg/routers" + "github.com/EinStack/glide/pkg/telemetry" "github.com/gofiber/contrib/websocket" "github.com/gofiber/fiber/v2" + "go.uber.org/zap" ) type Handler = func(c *fiber.Ctx) error @@ -32,15 +28,13 @@ type Handler = func(c *fiber.Ctx) error // @Accept json // @Produce json // @Success 200 {object} schemas.ChatResponse -// @Failure 400 {object} http.ErrorSchema -// @Failure 404 {object} http.ErrorSchema +// @Failure 400 {object} schemas.Error +// @Failure 404 {object} schemas.Error // @Router /v1/language/{router}/chat [POST] func LangChatHandler(routerManager *routers.RouterManager) Handler { return func(c *fiber.Ctx) error { if !c.Is("json") { - return c.Status(fiber.StatusBadRequest).JSON(ErrorSchema{ - Message: "Glide accepts only JSON payloads", - }) + return c.Status(fiber.StatusBadRequest).JSON(schemas.ErrUnsupportedMediaType) } // Unmarshal request body @@ -48,29 +42,25 @@ func LangChatHandler(routerManager *routers.RouterManager) Handler { err := c.BodyParser(&req) if err != nil { - return c.Status(fiber.StatusBadRequest).JSON(ErrorSchema{ - Message: err.Error(), - }) + return c.Status(fiber.StatusBadRequest).JSON(schemas.NewPayloadParseErr(err)) } // Get router ID from path routerID := c.Params("router") + router, err := routerManager.GetLangRouter(routerID) + if err != nil { + httpErr := schemas.FromErr(err) - if errors.Is(err, routers.ErrRouterNotFound) { - // Return not found error - return c.Status(fiber.StatusNotFound).JSON(ErrorSchema{ - Message: err.Error(), - }) + return c.Status(httpErr.Status).JSON(httpErr) } // Chat with router resp, err := router.Chat(c.Context(), req) if err != nil { - // Return internal server error - return c.Status(fiber.StatusInternalServerError).JSON(ErrorSchema{ - Message: err.Error(), - }) + httpErr := schemas.FromErr(err) + + return c.Status(httpErr.Status).JSON(httpErr) } // Return chat response @@ -85,9 +75,9 @@ func LangStreamRouterValidator(routerManager *routers.RouterManager) Handler { _, err := routerManager.GetLangRouter(routerID) if err != nil { - return c.Status(fiber.StatusNotFound).JSON(ErrorSchema{ - Message: err.Error(), - }) + httpErr := schemas.FromErr(err) + + return c.Status(httpErr.Status).JSON(httpErr) } return c.Next() @@ -111,7 +101,7 @@ func LangStreamRouterValidator(routerManager *routers.RouterManager) Handler { // @Accept json // @Success 101 // @Failure 426 -// @Failure 404 {object} http.ErrorSchema +// @Failure 404 {object} schemas.Error // @Router /v1/language/{router}/chatStream [GET] func LangStreamChatHandler(tel *telemetry.Telemetry, routerManager *routers.RouterManager) Handler { // TODO: expose websocket connection configs https://github.com/gofiber/contrib/tree/main/websocket @@ -174,22 +164,22 @@ func LangStreamChatHandler(tel *telemetry.Telemetry, routerManager *routers.Rout // // @id glide-language-routers // @Summary Language Router List -// @Description Retrieve list of configured language routers and their configurations +// @Description Retrieve list of configured active language routers and their configurations // @tags Language // @Accept json // @Produce json -// @Success 200 {object} http.RouterListSchema +// @Success 200 {object} schemas.RouterListSchema // @Router /v1/language/ [GET] func LangRoutersHandler(routerManager *routers.RouterManager) Handler { return func(c *fiber.Ctx) error { configuredRouters := routerManager.GetLangRouters() - cfgs := make([]*routers.LangRouterConfig, 0, len(configuredRouters)) + cfgs := make([]interface{}, 0, len(configuredRouters)) // opaque by design for _, router := range configuredRouters { cfgs = append(cfgs, router.Config) } - return c.Status(fiber.StatusOK).JSON(RouterListSchema{Routers: cfgs}) + return c.Status(fiber.StatusOK).JSON(schemas.RouterListSchema{Routers: cfgs}) } } @@ -201,14 +191,12 @@ func LangRoutersHandler(routerManager *routers.RouterManager) Handler { // @tags Operations // @Accept json // @Produce json -// @Success 200 {object} http.HealthSchema +// @Success 200 {object} schemas.HealthSchema // @Router /v1/health/ [get] func HealthHandler(c *fiber.Ctx) error { - return c.Status(fiber.StatusOK).JSON(HealthSchema{Healthy: true}) + return c.Status(fiber.StatusOK).JSON(schemas.HealthSchema{Healthy: true}) } func NotFoundHandler(c *fiber.Ctx) error { - return c.Status(fiber.StatusNotFound).JSON(ErrorSchema{ - Message: "The route is not found", - }) + return c.Status(fiber.StatusNotFound).JSON(schemas.ErrRouteNotFound) } diff --git a/pkg/api/http/schemas.go b/pkg/api/http/schemas.go deleted file mode 100644 index 3ee515eb..00000000 --- a/pkg/api/http/schemas.go +++ /dev/null @@ -1,15 +0,0 @@ -package http - -import "github.com/EinStack/glide/pkg/routers" - -type ErrorSchema struct { - Message string `json:"message"` -} - -type HealthSchema struct { - Healthy bool `json:"healthy"` -} - -type RouterListSchema struct { - Routers []*routers.LangRouterConfig `json:"routers"` -} diff --git a/pkg/api/schemas/chat.go b/pkg/api/schemas/chat.go index 4be88692..f811a119 100644 --- a/pkg/api/schemas/chat.go +++ b/pkg/api/schemas/chat.go @@ -3,12 +3,12 @@ package schemas // ChatRequest defines Glide's Chat Request Schema unified across all language models type ChatRequest struct { Message ChatMessage `json:"message" validate:"required"` - MessageHistory []ChatMessage `json:"messageHistory"` - Override *OverrideChatRequest `json:"override,omitempty"` + MessageHistory []ChatMessage `json:"message_history"` + OverrideParams *OverrideChatRequest `json:"override_params,omitempty"` } type OverrideChatRequest struct { - Model string `json:"model_id" validate:"required"` + ModelID string `json:"model_id" validate:"required"` Message ChatMessage `json:"message" validate:"required"` } @@ -25,27 +25,27 @@ func NewChatFromStr(message string) *ChatRequest { // ChatResponse defines Glide's Chat Response Schema unified across all language models type ChatResponse struct { ID string `json:"id,omitempty"` - Created int `json:"created,omitempty"` - Provider string `json:"provider,omitempty"` - RouterID string `json:"router,omitempty"` + Created int `json:"created_at,omitempty"` + Provider string `json:"provider_id,omitempty"` + RouterID string `json:"router_id,omitempty"` ModelID string `json:"model_id,omitempty"` - ModelName string `json:"model,omitempty"` + ModelName string `json:"model_name,omitempty"` Cached bool `json:"cached,omitempty"` - ModelResponse ModelResponse `json:"modelResponse,omitempty"` + ModelResponse ModelResponse `json:"model_response,omitempty"` } // ModelResponse is the unified response from the provider. type ModelResponse struct { - SystemID map[string]string `json:"responseId,omitempty"` + Metadata map[string]string `json:"metadata,omitempty"` Message ChatMessage `json:"message"` - TokenUsage TokenUsage `json:"tokenCount"` + TokenUsage TokenUsage `json:"token_usage"` } type TokenUsage struct { - PromptTokens int `json:"promptTokens"` - ResponseTokens int `json:"responseTokens"` - TotalTokens int `json:"totalTokens"` + PromptTokens int `json:"prompt_tokens"` + ResponseTokens int `json:"response_tokens"` + TotalTokens int `json:"total_tokens"` } // ChatMessage is a message in a chat request. diff --git a/pkg/api/schemas/chat_stream.go b/pkg/api/schemas/chat_stream.go index 983d2242..41c3578b 100644 --- a/pkg/api/schemas/chat_stream.go +++ b/pkg/api/schemas/chat_stream.go @@ -6,22 +6,14 @@ type ( Metadata = map[string]any EventType = string FinishReason = string - ErrorCode = string ) var ( - Complete FinishReason = "complete" - MaxTokens FinishReason = "max_tokens" - ContentFiltered FinishReason = "content_filtered" - ErrorReason FinishReason = "error" - OtherReason FinishReason = "other" -) - -var ( - NoModelConfigured ErrorCode = "no_model_configured" - ModelUnavailable ErrorCode = "model_unavailable" - AllModelsUnavailable ErrorCode = "all_models_unavailable" - UnknownError ErrorCode = "unknown_error" + ReasonComplete FinishReason = "complete" + ReasonMaxTokens FinishReason = "max_tokens" + ReasonContentFiltered FinishReason = "content_filtered" + ReasonError FinishReason = "error" + ReasonOther FinishReason = "other" ) type StreamRequestID = string @@ -30,8 +22,8 @@ type StreamRequestID = string type ChatStreamRequest struct { ID StreamRequestID `json:"id" validate:"required"` Message ChatMessage `json:"message" validate:"required"` - MessageHistory []ChatMessage `json:"messageHistory" validate:"required"` - Override *OverrideChatRequest `json:"overrideMessage,omitempty"` + MessageHistory []ChatMessage `json:"message_history" validate:"required"` + OverrideParams *OverrideChatRequest `json:"override_params,omitempty"` Metadata *Metadata `json:"metadata,omitempty"` } @@ -52,8 +44,8 @@ type ModelChunkResponse struct { type ChatStreamMessage struct { ID StreamRequestID `json:"id"` - CreatedAt int `json:"createdAt"` - RouterID string `json:"routerId"` + CreatedAt int `json:"created_at"` + RouterID string `json:"router_id"` Metadata *Metadata `json:"metadata,omitempty"` Chunk *ChatStreamChunk `json:"chunk,omitempty"` Error *ChatStreamError `json:"error,omitempty"` @@ -61,18 +53,18 @@ type ChatStreamMessage struct { // ChatStreamChunk defines a message for a chunk of streaming chat response type ChatStreamChunk struct { - ModelID string `json:"modelId"` - Provider string `json:"providerName"` - ModelName string `json:"modelName"` + ModelID string `json:"model_id"` + Provider string `json:"provider_id"` + ModelName string `json:"model_name"` Cached bool `json:"cached"` - ModelResponse ModelChunkResponse `json:"modelResponse"` - FinishReason *FinishReason `json:"finishReason,omitempty"` + ModelResponse ModelChunkResponse `json:"model_response"` + FinishReason *FinishReason `json:"finish_reason,omitempty"` } type ChatStreamError struct { - ErrCode ErrorCode `json:"errCode"` + Name ErrorName `json:"name"` Message string `json:"message"` - FinishReason *FinishReason `json:"finishReason,omitempty"` + FinishReason *FinishReason `json:"finish_reason,omitempty"` } func NewChatStreamChunk( @@ -93,7 +85,7 @@ func NewChatStreamChunk( func NewChatStreamError( reqID StreamRequestID, routerID string, - errCode ErrorCode, + errName ErrorName, errMsg string, reqMetadata *Metadata, finishReason *FinishReason, @@ -104,7 +96,7 @@ func NewChatStreamError( CreatedAt: int(time.Now().UTC().Unix()), Metadata: reqMetadata, Error: &ChatStreamError{ - ErrCode: errCode, + Name: errName, Message: errMsg, FinishReason: finishReason, }, diff --git a/pkg/api/schemas/errors.go b/pkg/api/schemas/errors.go new file mode 100644 index 00000000..2765f93e --- /dev/null +++ b/pkg/api/schemas/errors.go @@ -0,0 +1,79 @@ +package schemas + +import ( + "fmt" + + "github.com/gofiber/fiber/v2" +) + +type ErrorName = string + +var ( + UnsupportedMediaType ErrorName = "unsupported_media_type" + RouteNotFound ErrorName = "route_not_found" + PayloadParseError ErrorName = "payload_parse_error" + RouterNotFound ErrorName = "router_not_found" + NoModelConfigured ErrorName = "no_model_configured" + ModelUnavailable ErrorName = "model_unavailable" + AllModelsUnavailable ErrorName = "all_models_unavailable" + UnknownError ErrorName = "unknown_error" +) + +// Error / Error contains more context than the built-in error type, +// so we know information like error code and message that are useful to propagate to clients +type Error struct { + Status int `json:"-"` + Name string `json:"name"` + Message string `json:"message"` +} + +var _ error = (*Error)(nil) + +// Error returns the error message. +func (e *Error) Error() string { + return fmt.Sprintf("Error (%s): %s", e.Name, e.Message) +} + +func NewError(status int, name string, message string) Error { + return Error{Status: status, Name: name, Message: message} +} + +var ErrUnsupportedMediaType = NewError( + fiber.StatusBadRequest, + UnsupportedMediaType, + "application/json is the only supported media type", +) + +var ErrRouteNotFound = NewError( + fiber.StatusNotFound, + RouteNotFound, + "requested route is not found or method is not allowed", +) + +var ErrRouterNotFound = NewError(fiber.StatusNotFound, RouterNotFound, "router is not found") + +var ErrNoModelAvailable = NewError( + 503, + AllModelsUnavailable, + "all providers are unavailable", +) + +func NewPayloadParseErr(err error) Error { + return NewError( + fiber.StatusBadRequest, + PayloadParseError, + err.Error(), + ) +} + +func FromErr(err error) Error { + if apiErr, ok := err.(*Error); ok { + return *apiErr + } + + return NewError( + fiber.StatusInternalServerError, + UnknownError, + err.Error(), + ) +} diff --git a/pkg/api/schemas/health_checks.go b/pkg/api/schemas/health_checks.go new file mode 100644 index 00000000..6078e769 --- /dev/null +++ b/pkg/api/schemas/health_checks.go @@ -0,0 +1,5 @@ +package schemas + +type HealthSchema struct { + Healthy bool `json:"healthy"` +} diff --git a/pkg/api/schemas/routers.go b/pkg/api/schemas/routers.go new file mode 100644 index 00000000..9111a319 --- /dev/null +++ b/pkg/api/schemas/routers.go @@ -0,0 +1,9 @@ +package schemas + +// RouterListSchema returns list of active configured routers. +// +// Routers config is exposed as an opaque value to indicate that user services must not use it to base any logic on it. +// The endpoint is used for debugging/informational reasons +type RouterListSchema struct { + Routers []interface{} `json:"routers"` +} diff --git a/pkg/providers/anthropic/chat.go b/pkg/providers/anthropic/chat.go index a9156c2d..d1b6c326 100644 --- a/pkg/providers/anthropic/chat.go +++ b/pkg/providers/anthropic/chat.go @@ -148,7 +148,7 @@ func (c *Client) doChatRequest(ctx context.Context, payload *ChatRequest) (*sche ModelName: anthropicResponse.Model, Cached: false, ModelResponse: schemas.ModelResponse{ - SystemID: map[string]string{}, + Metadata: map[string]string{}, Message: schemas.ChatMessage{ Role: completion.Type, Content: completion.Text, diff --git a/pkg/providers/azureopenai/chat.go b/pkg/providers/azureopenai/chat.go index 216a9de9..dadfcbad 100644 --- a/pkg/providers/azureopenai/chat.go +++ b/pkg/providers/azureopenai/chat.go @@ -128,7 +128,7 @@ func (c *Client) doChatRequest(ctx context.Context, payload *ChatRequest) (*sche ModelName: openAICompletion.ModelName, Cached: false, ModelResponse: schemas.ModelResponse{ - SystemID: map[string]string{ + Metadata: map[string]string{ "system_fingerprint": openAICompletion.SystemFingerprint, }, Message: schemas.ChatMessage{ diff --git a/pkg/providers/bedrock/chat.go b/pkg/providers/bedrock/chat.go index 69fb08fd..81a04f7d 100644 --- a/pkg/providers/bedrock/chat.go +++ b/pkg/providers/bedrock/chat.go @@ -110,7 +110,7 @@ func (c *Client) doChatRequest(ctx context.Context, payload *ChatRequest) (*sche ModelName: c.config.Model, Cached: false, ModelResponse: schemas.ModelResponse{ - SystemID: map[string]string{ + Metadata: map[string]string{ "system_fingerprint": "none", }, Message: schemas.ChatMessage{ diff --git a/pkg/providers/cohere/chat.go b/pkg/providers/cohere/chat.go index 81459672..e19fce8b 100644 --- a/pkg/providers/cohere/chat.go +++ b/pkg/providers/cohere/chat.go @@ -143,7 +143,7 @@ func (c *Client) doChatRequest(ctx context.Context, payload *ChatRequest) (*sche ModelName: c.config.Model, Cached: false, ModelResponse: schemas.ModelResponse{ - SystemID: map[string]string{ + Metadata: map[string]string{ "generationId": cohereCompletion.GenerationID, "responseId": cohereCompletion.ResponseID, }, diff --git a/pkg/providers/cohere/finish_reason.go b/pkg/providers/cohere/finish_reason.go index 7076a2f3..139498e6 100644 --- a/pkg/providers/cohere/finish_reason.go +++ b/pkg/providers/cohere/finish_reason.go @@ -36,18 +36,18 @@ func (m *FinishReasonMapper) Map(finishReason *string) *schemas.FinishReason { switch strings.ToLower(*finishReason) { case CompleteReason: - reason = &schemas.Complete + reason = &schemas.ReasonComplete case MaxTokensReason: - reason = &schemas.MaxTokens + reason = &schemas.ReasonMaxTokens case FilteredReason: - reason = &schemas.ContentFiltered + reason = &schemas.ReasonContentFiltered default: m.tel.Logger.Warn( "Unknown finish reason, other is going to used", zap.String("unknown_reason", *finishReason), ) - reason = &schemas.OtherReason + reason = &schemas.ReasonOther } return reason diff --git a/pkg/providers/octoml/chat.go b/pkg/providers/octoml/chat.go index 946a66d9..7556cd75 100644 --- a/pkg/providers/octoml/chat.go +++ b/pkg/providers/octoml/chat.go @@ -142,7 +142,7 @@ func (c *Client) doChatRequest(ctx context.Context, payload *ChatRequest) (*sche ModelName: openAICompletion.ModelName, Cached: false, ModelResponse: schemas.ModelResponse{ - SystemID: map[string]string{ + Metadata: map[string]string{ "system_fingerprint": openAICompletion.SystemFingerprint, }, Message: schemas.ChatMessage{ diff --git a/pkg/providers/ollama/chat.go b/pkg/providers/ollama/chat.go index 63404e94..2f5c8454 100644 --- a/pkg/providers/ollama/chat.go +++ b/pkg/providers/ollama/chat.go @@ -188,7 +188,7 @@ func (c *Client) doChatRequest(ctx context.Context, payload *ChatRequest) (*sche ModelName: ollamaCompletion.Model, Cached: false, ModelResponse: schemas.ModelResponse{ - SystemID: map[string]string{ + Metadata: map[string]string{ "system_fingerprint": "", }, Message: schemas.ChatMessage{ diff --git a/pkg/providers/openai/chat.go b/pkg/providers/openai/chat.go index fe7e8984..323cd84f 100644 --- a/pkg/providers/openai/chat.go +++ b/pkg/providers/openai/chat.go @@ -135,7 +135,7 @@ func (c *Client) doChatRequest(ctx context.Context, payload *ChatRequest) (*sche ModelName: chatCompletion.ModelName, Cached: false, ModelResponse: schemas.ModelResponse{ - SystemID: map[string]string{ + Metadata: map[string]string{ "system_fingerprint": chatCompletion.SystemFingerprint, }, Message: schemas.ChatMessage{ diff --git a/pkg/providers/openai/finish_reasons.go b/pkg/providers/openai/finish_reasons.go index 5d2a0fb4..28b5f675 100644 --- a/pkg/providers/openai/finish_reasons.go +++ b/pkg/providers/openai/finish_reasons.go @@ -34,18 +34,18 @@ func (m *FinishReasonMapper) Map(finishReason string) *schemas.FinishReason { switch finishReason { case CompleteReason: - reason = &schemas.Complete + reason = &schemas.ReasonComplete case MaxTokensReason: - reason = &schemas.MaxTokens + reason = &schemas.ReasonMaxTokens case FilteredReason: - reason = &schemas.ContentFiltered + reason = &schemas.ReasonContentFiltered default: m.tel.Logger.Warn( "Unknown finish reason, other is going to used", zap.String("unknown_reason", finishReason), ) - reason = &schemas.OtherReason + reason = &schemas.ReasonOther } return reason diff --git a/pkg/providers/testing/lang.go b/pkg/providers/testing/lang.go index d524dada..0973576b 100644 --- a/pkg/providers/testing/lang.go +++ b/pkg/providers/testing/lang.go @@ -12,14 +12,14 @@ import ( // RespMock mocks a chat response or a streaming chat chunk type RespMock struct { Msg string - Err *error + Err error } func (m *RespMock) Resp() *schemas.ChatResponse { return &schemas.ChatResponse{ ID: "rsp0001", ModelResponse: schemas.ModelResponse{ - SystemID: map[string]string{ + Metadata: map[string]string{ "ID": "0001", }, Message: schemas.ChatMessage{ @@ -81,7 +81,7 @@ func (m *RespStreamMock) Recv() (*schemas.ChatStreamChunk, error) { m.idx++ if chunk.Err != nil { - return nil, *chunk.Err + return nil, chunk.Err } return chunk.RespChunk(), nil @@ -130,7 +130,7 @@ func (c *ProviderMock) Chat(_ context.Context, _ *schemas.ChatRequest) (*schemas c.idx++ if response.Err != nil { - return nil, *response.Err + return nil, response.Err } return response.Resp(), nil diff --git a/pkg/routers/manager.go b/pkg/routers/manager.go index 015616f1..123ea09e 100644 --- a/pkg/routers/manager.go +++ b/pkg/routers/manager.go @@ -1,13 +1,10 @@ package routers import ( - "errors" - + "github.com/EinStack/glide/pkg/api/schemas" "github.com/EinStack/glide/pkg/telemetry" ) -var ErrRouterNotFound = errors.New("no router found with given ID") - type RouterManager struct { Config *Config tel *telemetry.Telemetry @@ -48,5 +45,5 @@ func (r *RouterManager) GetLangRouter(routerID string) (*LangRouter, error) { return router, nil } - return nil, ErrRouterNotFound + return nil, &schemas.ErrRouterNotFound } diff --git a/pkg/routers/router.go b/pkg/routers/router.go index a4128f7d..fe1d6a98 100644 --- a/pkg/routers/router.go +++ b/pkg/routers/router.go @@ -16,10 +16,7 @@ import ( "github.com/EinStack/glide/pkg/api/schemas" ) -var ( - ErrNoModels = errors.New("no models configured for router") - ErrNoModelAvailable = errors.New("could not handle request because all providers are not available") -) +var ErrNoModels = errors.New("no models configured for router") type RouterID = string @@ -86,10 +83,10 @@ func (r *LangRouter) Chat(ctx context.Context, req *schemas.ChatRequest) (*schem langModel := model.(providers.LangModel) // Check if there is an override in the request - if req.Override != nil { + if req.OverrideParams != nil { // Override the message if the language model ID matches the override model ID - if langModel.ID() == req.Override.Model { - req.Message = req.Override.Message + if langModel.ID() == req.OverrideParams.ModelID { + req.Message = req.OverrideParams.Message } } @@ -124,7 +121,7 @@ func (r *LangRouter) Chat(ctx context.Context, req *schemas.ChatRequest) (*schem // if we reach this part, then we are in trouble r.logger.Error("No model was available to handle chat request") - return nil, ErrNoModelAvailable + return nil, &schemas.ErrNoModelAvailable } func (r *LangRouter) ChatStream( @@ -139,7 +136,7 @@ func (r *LangRouter) ChatStream( schemas.NoModelConfigured, ErrNoModels.Error(), req.Metadata, - &schemas.ErrorReason, + &schemas.ReasonError, ) return @@ -239,9 +236,9 @@ func (r *LangRouter) ChatStream( respC <- schemas.NewChatStreamError( req.ID, r.routerID, - schemas.AllModelsUnavailable, - ErrNoModelAvailable.Error(), + schemas.ErrNoModelAvailable.Name, + schemas.ErrNoModelAvailable.Message, req.Metadata, - &schemas.ErrorReason, + &schemas.ReasonError, ) } diff --git a/pkg/routers/router_test.go b/pkg/routers/router_test.go index 468aabb0..6c69a968 100644 --- a/pkg/routers/router_test.go +++ b/pkg/routers/router_test.go @@ -5,24 +5,15 @@ import ( "testing" "time" - "github.com/EinStack/glide/pkg/routers/latency" - + "github.com/EinStack/glide/pkg/api/schemas" + "github.com/EinStack/glide/pkg/providers" "github.com/EinStack/glide/pkg/providers/clients" - - "github.com/EinStack/glide/pkg/telemetry" - - "github.com/EinStack/glide/pkg/routers/routing" - - "github.com/EinStack/glide/pkg/routers/retry" - - "github.com/EinStack/glide/pkg/routers/health" - ptesting "github.com/EinStack/glide/pkg/providers/testing" - - "github.com/EinStack/glide/pkg/providers" - - "github.com/EinStack/glide/pkg/api/schemas" - + "github.com/EinStack/glide/pkg/routers/health" + "github.com/EinStack/glide/pkg/routers/latency" + "github.com/EinStack/glide/pkg/routers/retry" + "github.com/EinStack/glide/pkg/routers/routing" + "github.com/EinStack/glide/pkg/telemetry" "github.com/stretchr/testify/require" ) @@ -80,14 +71,14 @@ func TestLangRouter_Chat_PickThirdHealthy(t *testing.T) { langModels := []*providers.LanguageModel{ providers.NewLangModel( "first", - ptesting.NewProviderMock([]ptesting.RespMock{{Err: &ErrNoModelAvailable}, {Msg: "3"}}), + ptesting.NewProviderMock([]ptesting.RespMock{{Err: &schemas.ErrNoModelAvailable}, {Msg: "3"}}), budget, *latConfig, 1, ), providers.NewLangModel( "second", - ptesting.NewProviderMock([]ptesting.RespMock{{Err: &ErrNoModelAvailable}, {Msg: "4"}}), + ptesting.NewProviderMock([]ptesting.RespMock{{Err: &schemas.ErrNoModelAvailable}, {Msg: "4"}}), budget, *latConfig, 1, @@ -138,14 +129,14 @@ func TestLangRouter_Chat_SuccessOnRetry(t *testing.T) { langModels := []*providers.LanguageModel{ providers.NewLangModel( "first", - ptesting.NewProviderMock([]ptesting.RespMock{{Err: &ErrNoModelAvailable}, {Msg: "2"}}), + ptesting.NewProviderMock([]ptesting.RespMock{{Err: &schemas.ErrNoModelAvailable}, {Msg: "2"}}), budget, *latConfig, 1, ), providers.NewLangModel( "second", - ptesting.NewProviderMock([]ptesting.RespMock{{Err: &ErrNoModelAvailable}, {Msg: "1"}}), + ptesting.NewProviderMock([]ptesting.RespMock{{Err: &schemas.ErrNoModelAvailable}, {Msg: "1"}}), budget, *latConfig, 1, @@ -182,7 +173,7 @@ func TestLangRouter_Chat_UnhealthyModelInThePool(t *testing.T) { langModels := []*providers.LanguageModel{ providers.NewLangModel( "first", - ptesting.NewProviderMock([]ptesting.RespMock{{Err: &clients.ErrProviderUnavailable}, {Msg: "3"}}), + ptesting.NewProviderMock([]ptesting.RespMock{{Err: clients.ErrProviderUnavailable}, {Msg: "3"}}), budget, *latConfig, 1, @@ -228,14 +219,14 @@ func TestLangRouter_Chat_AllModelsUnavailable(t *testing.T) { langModels := []*providers.LanguageModel{ providers.NewLangModel( "first", - ptesting.NewProviderMock([]ptesting.RespMock{{Err: &ErrNoModelAvailable}, {Err: &ErrNoModelAvailable}}), + ptesting.NewProviderMock([]ptesting.RespMock{{Err: &schemas.ErrNoModelAvailable}, {Err: &schemas.ErrNoModelAvailable}}), budget, *latConfig, 1, ), providers.NewLangModel( "second", - ptesting.NewProviderMock([]ptesting.RespMock{{Err: &ErrNoModelAvailable}, {Err: &ErrNoModelAvailable}}), + ptesting.NewProviderMock([]ptesting.RespMock{{Err: &schemas.ErrNoModelAvailable}, {Err: &schemas.ErrNoModelAvailable}}), budget, *latConfig, 1, @@ -419,7 +410,7 @@ func TestLangRouter_ChatStream_AllModelsUnavailable(t *testing.T) { "first", ptesting.NewStreamProviderMock([]ptesting.RespStreamMock{ ptesting.NewRespStreamMock(&[]ptesting.RespMock{ - {Err: &clients.ErrProviderUnavailable}, + {Err: clients.ErrProviderUnavailable}, }), }), budget, @@ -430,7 +421,7 @@ func TestLangRouter_ChatStream_AllModelsUnavailable(t *testing.T) { "second", ptesting.NewStreamProviderMock([]ptesting.RespStreamMock{ ptesting.NewRespStreamMock(&[]ptesting.RespMock{ - {Err: &clients.ErrProviderUnavailable}, + {Err: clients.ErrProviderUnavailable}, }), }), budget, @@ -468,7 +459,7 @@ func TestLangRouter_ChatStream_AllModelsUnavailable(t *testing.T) { require.Nil(t, result.Chunk) require.NotNil(t, result.Error) - errs = append(errs, result.Error.ErrCode) + errs = append(errs, result.Error.Name) } require.Equal(t, []string{schemas.ModelUnavailable, schemas.ModelUnavailable, schemas.AllModelsUnavailable}, errs)