fix(plugin/ai-proxy): improve the robustness of anthropic's statistics

There is a bit difference in the usage data between ai providers, and considering the variability of it, this PR attempts to improve the robustness during the process of transforming the usage data when: no usage data provided in the upstream response; some possible changes in structure of usage data in the future; In #12781 , it is arbitrarily believed that the usage data is always included and the data struct won't change. This may be a potential issue throwing an error. Therefore, this PR is a follow-up one to prevent from it. (cherry picked from commit a22d696) Co-authored-by: Robin Xiang <liverpool8056@163.com>
Kong · Apr 29, 2024 · 043ee5d · 043ee5d
1 parent 7f9c132
commit 043ee5d
Show file tree

Hide file tree

Showing 4 changed files with 182 additions and 9 deletions.
diff --git a/kong/llm/drivers/anthropic.lua b/kong/llm/drivers/anthropic.lua
@@ -268,6 +268,20 @@ local transformers_from = {
     end
 
     if response_table.content then
+      local usage = response_table.usage
+
+      if usage then
+        usage = {
+          prompt_tokens = usage.input_tokens,
+          completion_tokens = usage.output_tokens,
+          total_tokens = usage.input_tokens and usage.output_tokens and
+            usage.input_tokens + usage.output_tokens or nil,
+        }
+
+      else
+        usage = "no usage data returned from upstream"
+      end
+
       local res = {
         choices = {
           {
@@ -279,16 +293,11 @@ local transformers_from = {
             finish_reason = response_table.stop_reason,
           },
         },
-        usage = {
-          prompt_tokens = response_table.usage.input_tokens or 0,
-          completion_tokens = response_table.usage.output_tokens or 0,
-          total_tokens = response_table.usage.input_tokens and response_table.usage.output_tokens and
-            response_table.usage.input_tokens + response_table.usage.output_tokens or 0,
-        },
+        usage = usage,
         model = response_table.model,
         object = "chat.content",
       }
-        
+
       return cjson.encode(res)
     else
       -- it's probably an error block, return generic error

diff --git a/spec/03-plugins/38-ai-proxy/03-anthropic_integration_spec.lua b/spec/03-plugins/38-ai-proxy/03-anthropic_integration_spec.lua
@@ -8,6 +8,7 @@
 local helpers = require "spec.helpers"
 local cjson = require "cjson"
 local pl_file = require "pl.file"
+local deepcompare  = require("pl.tablex").deepcompare
 
 local PLUGIN_NAME = "ai-proxy"
 local MOCK_PORT = helpers.get_available_port()
@@ -82,6 +83,56 @@ for _, strategy in helpers.all_strategies() do if strategy ~= "cassandra" then
               }
             }
 
+            location = "/llm/v1/chat/no_usage_upstream_response" {
+              content_by_lua_block {
+                local pl_file = require "pl.file"
+                local json = require("cjson.safe")
+
+                local token = ngx.req.get_headers()["x-api-key"]
+                if token == "anthropic-key" then
+                  ngx.req.read_body()
+                  local body, err = ngx.req.get_body_data()
+                  body, err = json.decode(body)
+
+                  if err or (not body.messages) then
+                    ngx.status = 400
+                    ngx.print(pl_file.read("spec/fixtures/ai-proxy/anthropic/llm-v1-chat/responses/bad_request.json"))
+                  else
+                    ngx.status = 200
+                    ngx.print(pl_file.read("spec/fixtures/ai-proxy/anthropic/llm-v1-chat/responses/no_usage_response.json"))
+                  end
+                else
+                  ngx.status = 401
+                  ngx.print(pl_file.read("spec/fixtures/ai-proxy/anthropic/llm-v1-chat/responses/unauthorized.json"))
+                end
+              }
+            }
+
+            location = "/llm/v1/chat/malformed_usage_upstream_response" {
+              content_by_lua_block {
+                local pl_file = require "pl.file"
+                local json = require("cjson.safe")
+
+                local token = ngx.req.get_headers()["x-api-key"]
+                if token == "anthropic-key" then
+                  ngx.req.read_body()
+                  local body, err = ngx.req.get_body_data()
+                  body, err = json.decode(body)
+
+                  if err or (not body.messages) then
+                    ngx.status = 400
+                    ngx.print(pl_file.read("spec/fixtures/ai-proxy/anthropic/llm-v1-chat/responses/bad_request.json"))
+                  else
+                    ngx.status = 200
+                    ngx.print(pl_file.read("spec/fixtures/ai-proxy/anthropic/llm-v1-chat/responses/malformed_usage_response.json"))
+                  end
+                else
+                  ngx.status = 401
+                  ngx.print(pl_file.read("spec/fixtures/ai-proxy/anthropic/llm-v1-chat/responses/unauthorized.json"))
+                end
+              }
+            }
+
             location = "/llm/v1/chat/bad_request" {
               content_by_lua_block {
                 local pl_file = require "pl.file"
@@ -177,15 +228,15 @@ for _, strategy in helpers.all_strategies() do if strategy ~= "cassandra" then
       --
 
       -- 200 chat bad upstream response with one option
-      local chat_good = assert(bp.routes:insert {
+      local chat_bad = assert(bp.routes:insert {
         service = empty_service,
         protocols = { "http" },
         strip_path = true,
         paths = { "/anthropic/llm/v1/chat/bad_upstream_response" }
       })
       bp.plugins:insert {
         name = PLUGIN_NAME,
-        route = { id = chat_good.id },
+        route = { id = chat_bad.id },
         config = {
           route_type = "llm/v1/chat",
           auth = {
@@ -206,6 +257,65 @@ for _, strategy in helpers.all_strategies() do if strategy ~= "cassandra" then
       }
       --
 
+      -- 200 chat no-usage response
+      local chat_no_usage = assert(bp.routes:insert {
+        service = empty_service,
+        protocols = { "http" },
+        strip_path = true,
+        paths = { "/anthropic/llm/v1/chat/no_usage_upstream_response" }
+      })
+      bp.plugins:insert {
+        name = PLUGIN_NAME,
+        route = { id = chat_no_usage.id },
+        config = {
+          route_type = "llm/v1/chat",
+          auth = {
+            header_name = "x-api-key",
+            header_value = "anthropic-key",
+          },
+          model = {
+            name = "claude-2.1",
+            provider = "anthropic",
+            options = {
+              max_tokens = 256,
+              temperature = 1.0,
+              upstream_url = "http://"..helpers.mock_upstream_host..":"..MOCK_PORT.."/llm/v1/chat/no_usage_upstream_response",
+              anthropic_version = "2023-06-01",
+            },
+          },
+        },
+      }
+      --
+
+      -- 200 chat malformed-usage response
+      local chat_malformed_usage = assert(bp.routes:insert {
+        service = empty_service,
+        protocols = { "http" },
+        strip_path = true,
+        paths = { "/anthropic/llm/v1/chat/malformed_usage_upstream_response" }
+      })
+      bp.plugins:insert {
+        name = PLUGIN_NAME,
+        route = { id = chat_malformed_usage.id },
+        config = {
+          route_type = "llm/v1/chat",
+          auth = {
+            header_name = "x-api-key",
+            header_value = "anthropic-key",
+          },
+          model = {
+            name = "claude-2.1",
+            provider = "anthropic",
+            options = {
+              max_tokens = 256,
+              temperature = 1.0,
+              upstream_url = "http://"..helpers.mock_upstream_host..":"..MOCK_PORT.."/llm/v1/chat/malformed_usage_upstream_response",
+              anthropic_version = "2023-06-01",
+            },
+          },
+        },
+      }
+
       -- 200 completions good with one option
       local completions_good = assert(bp.routes:insert {
         service = empty_service,
@@ -477,6 +587,34 @@ for _, strategy in helpers.all_strategies() do if strategy ~= "cassandra" then
         -- check this is in the 'kong' response format
         assert.equals(json.error.message, "request format not recognised")
       end)
+
+      it("no usage response", function()
+        local r = client:get("/anthropic/llm/v1/chat/no_usage_upstream_response", {
+          headers = {
+            ["content-type"] = "application/json",
+            ["accept"] = "application/json",
+          },
+          body = pl_file.read("spec/fixtures/ai-proxy/anthropic/llm-v1-chat/requests/good.json"),
+        })
+
+        local body = assert.res_status(200 , r)
+        local json = cjson.decode(body)
+        assert.equals(json.usage, "no usage data returned from upstream")
+      end)
+
+      it("malformed usage response", function()
+        local r = client:get("/anthropic/llm/v1/chat/malformed_usage_upstream_response", {
+          headers = {
+            ["content-type"] = "application/json",
+            ["accept"] = "application/json",
+          },
+          body = pl_file.read("spec/fixtures/ai-proxy/anthropic/llm-v1-chat/requests/good.json"),
+        })
+
+        local body = assert.res_status(200 , r)
+        local json = cjson.decode(body)
+        assert.is_truthy(deepcompare(json.usage, {}))
+      end)
     end)
 
     describe("anthropic llm/v1/completions", function()

diff --git a/spec/fixtures/ai-proxy/anthropic/llm-v1-chat/responses/malformed_usage_response.json b/spec/fixtures/ai-proxy/anthropic/llm-v1-chat/responses/malformed_usage_response.json
@@ -0,0 +1,15 @@
+{
+  "content": [
+      {
+          "text": "The sum of 1 + 1 is 2.",
+          "type": "text"
+      }
+  ],
+  "model": "claude-2.1",
+  "stop_reason": "end_turn",
+  "stop_sequence": "string",
+  "usage": {
+      "foo": 0,
+      "bar": 0
+  }
+}
diff --git a/spec/fixtures/ai-proxy/anthropic/llm-v1-chat/responses/no_usage_response.json b/spec/fixtures/ai-proxy/anthropic/llm-v1-chat/responses/no_usage_response.json
@@ -0,0 +1,11 @@
+{
+  "content": [
+      {
+          "text": "The sum of 1 + 1 is 2.",
+          "type": "text"
+      }
+  ],
+  "model": "claude-2.1",
+  "stop_reason": "end_turn",
+  "stop_sequence": "string"
+}