From 93b3bde3ecf1f3100d48026d0337b8a1d11b097d Mon Sep 17 00:00:00 2001 From: Wangchong Zhou Date: Mon, 2 Dec 2024 15:49:23 +0800 Subject: [PATCH 1/2] fix(ai-proxy): set content-length for non compressed response (cherry picked from commit a850c2680171853f4402c9f80acedde717fca451) --- kong/llm/drivers/shared.lua | 2 +- kong/llm/plugin/base.lua | 2 ++ .../shared-filters/normalize-json-response.lua | 15 ++++++++------- 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/kong/llm/drivers/shared.lua b/kong/llm/drivers/shared.lua index 03c00bbcddb9..55169a29b97d 100644 --- a/kong/llm/drivers/shared.lua +++ b/kong/llm/drivers/shared.lua @@ -175,7 +175,7 @@ _M.operation_map = { } _M.clear_response_headers = { - shared = { + shared = { -- deprecared, not using "Content-Length", }, openai = { diff --git a/kong/llm/plugin/base.lua b/kong/llm/plugin/base.lua index 0daca7a29419..4bafcecefb5a 100644 --- a/kong/llm/plugin/base.lua +++ b/kong/llm/plugin/base.lua @@ -110,6 +110,8 @@ function MetaPlugin:header_filter(sub_plugin, conf) -- and seems nginx doesn't support it elseif get_global_ctx("accept_gzip") then + -- for gzip response, don't set content-length at all to align with upstream + kong.response.clear_header("Content-Length") kong.response.set_header("Content-Encoding", "gzip") end diff --git a/kong/llm/plugin/shared-filters/normalize-json-response.lua b/kong/llm/plugin/shared-filters/normalize-json-response.lua index 1e0988f52495..f98b0d07bf12 100644 --- a/kong/llm/plugin/shared-filters/normalize-json-response.lua +++ b/kong/llm/plugin/shared-filters/normalize-json-response.lua @@ -2,7 +2,6 @@ local cjson = require("cjson") local ai_plugin_ctx = require("kong.llm.plugin.ctx") local ai_plugin_o11y = require("kong.llm.plugin.observability") -local ai_shared = require("kong.llm.drivers.shared") local _M = { NAME = "normalize-json-response", @@ -57,6 +56,8 @@ local function transform_body(conf) end set_global_ctx("response_body", response_body) -- to be sent out later or consumed by other plugins + + return #response_body end function _M:run(conf) @@ -81,8 +82,9 @@ function _M:run(conf) -- if not streaming, prepare the response body buffer -- this must be called before sending any response headers so that -- we can modify status code if needed + local body_length if not get_global_ctx("stream_mode") then - transform_body(conf) + body_length = transform_body(conf) end -- populate cost @@ -94,12 +96,11 @@ function _M:run(conf) ai_plugin_o11y.metrics_set("llm_usage_cost", 0) end - -- clear shared restricted headers - for _, v in ipairs(ai_shared.clear_response_headers.shared) do - kong.response.clear_header(v) + if not get_global_ctx("accept_gzip") and not get_global_ctx("stream_mode") then + -- otherwise use our transformed body length + kong.response.set_header("Content-Length", body_length) end - if ngx.var.http_kong_debug or conf.model_name_header then local model_t = ai_plugin_ctx.get_request_model_table_inuse() assert(model_t and model_t.name, "model name is missing") @@ -109,4 +110,4 @@ function _M:run(conf) return true end -return _M \ No newline at end of file +return _M From f0c61068979195e6ff8a01071361be9bf19a20ab Mon Sep 17 00:00:00 2001 From: Wangchong Zhou Date: Mon, 2 Dec 2024 15:49:55 +0800 Subject: [PATCH 2/2] fix(ai-proxy): fix content-encoding for non 200 responses (cherry picked from commit 2cdafddf69cd3d3c56ef66d0a3ea3ab154cbd1f2) --- kong/llm/plugin/base.lua | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kong/llm/plugin/base.lua b/kong/llm/plugin/base.lua index 4bafcecefb5a..c678ec43d0a5 100644 --- a/kong/llm/plugin/base.lua +++ b/kong/llm/plugin/base.lua @@ -113,6 +113,9 @@ function MetaPlugin:header_filter(sub_plugin, conf) -- for gzip response, don't set content-length at all to align with upstream kong.response.clear_header("Content-Length") kong.response.set_header("Content-Encoding", "gzip") + + else + kong.response.clear_header("Content-Encoding") end else