support ai-model based moderation

shreemaan-abhishek · Sep 2, 2024 · e16a823 · e16a823
1 parent f713f87
commit e16a823
Show file tree

Hide file tree

Showing 3 changed files with 60 additions and 39 deletions.
diff --git a/apisix/core/request.lua b/apisix/core/request.lua
@@ -21,6 +21,7 @@
 
 local lfs = require("lfs")
 local log = require("apisix.core.log")
+local json = require("apisix.core.json")
 local io = require("apisix.core.io")
 local req_add_header
 if ngx.config.subsystem == "http" then
@@ -334,6 +335,26 @@ function _M.get_body(max_size, ctx)
 end
 
 
+function _M.get_body_table()
+    local body, err = _M.get_body()
+    if not body then
+        return nil, { message = "could not get body: " .. (err or "request body is empty") }
+    end
+
+    body, err = body:gsub("\\\"", "\"") -- remove escaping in JSON
+    if not body then
+        return nil, { message = "failed to remove escaping from body. err: " .. err}
+    end
+
+    local body_tab, err = json.decode(body)
+    if not body_tab then
+        return nil, { message = "could not get parse JSON request body: " .. err }
+    end
+
+    return body_tab
+end
+
+
 function _M.get_scheme(ctx)
     if not ctx then
         ctx = ngx.ctx.api_ctx

diff --git a/apisix/plugins/content-moderation.lua b/apisix/plugins/content-moderation.lua
@@ -88,9 +88,14 @@ function _M.check_schema(conf)
 end
 
 function _M.rewrite(conf, ctx)
-    local body = core.request.get_body()
+    local body, err = core.request.get_body_table()
     if not body then
-        return
+        return 400, err
+    end
+
+    local msgs = body.messages
+    if not msgs or type(msgs) ~= "table" or #msgs < 1 then
+        return 400, "messages not found in request body"
     end
 
     local provider = conf.provider[next(conf.provider)]
@@ -115,40 +120,43 @@ function _M.rewrite(conf, ctx)
         port = port,
     })
 
+    local text_segments = {}
+    for _, msg in ipairs(msgs) do
+        core.table.insert_tail(text_segments, {
+            Text = msg.content
+        })
+    end
     local res, err = comprehend:detectToxicContent({
         LanguageCode = "en",
-        TextSegments = {
-            {
-                Text = body
-            }
-        },
+        TextSegments = text_segments,
     })
 
     if not res then
         core.log.error("failed to send request to ", provider, ": ", err)
         return 500, err
     end
 
-    local result = res.body and res.body.ResultList and res.body.ResultList[1]
-    if not result then
-        return 500, "failed to get moderation result from response"
+    local results = res.body and res.body.ResultList
+    if not results or type(results) ~= "table" or #results < 1 then
+        return 500, "failed to get moderation results from response"
     end
 
-
-    if conf.moderation_categories then
-        for _, item in pairs(result.Labels) do
-            if not conf.moderation_categories[item.Name] then
-                goto continue
-            end
-            if item.Score > conf.moderation_categories[item.Name] then
-                return 400, "request body exceeds " .. item.Name .. " threshold"
+    for _, result in ipairs(results) do
+        if conf.moderation_categories then
+            for _, item in pairs(result.Labels) do
+                if not conf.moderation_categories[item.Name] then
+                    goto continue
+                end
+                if item.Score > conf.moderation_categories[item.Name] then
+                    return 400, "request body exceeds " .. item.Name .. " threshold"
+                end
+                ::continue::
             end
-            ::continue::
         end
-    end
 
-    if result.Toxicity > conf.toxicity_level then
-        return 400, "request body exceeds toxicity threshold"
+        if result.Toxicity > conf.toxicity_level then
+            return 400, "request body exceeds toxicity threshold"
+        end
     end
 end
 

diff --git a/t/plugin/content-moderation.t b/t/plugin/content-moderation.t
@@ -135,7 +135,7 @@ passed
 === TEST 2: toxic request should fail
 --- request
 POST /echo
-toxic
+{"model":"gpt-4o-mini","messages":[{"role":"user","content":"toxic"}]}
 --- error_code: 400
 --- response_body chomp
 request body exceeds toxicity threshold
@@ -145,10 +145,8 @@ request body exceeds toxicity threshold
 === TEST 3: good request should pass
 --- request
 POST /echo
-good_request
+{"model":"gpt-4o-mini","messages":[{"role":"user","content":"good_request"}]}
 --- error_code: 200
---- response_body chomp
-good_request
 
 
 
@@ -199,7 +197,7 @@ passed
 === TEST 5: profane request should fail
 --- request
 POST /echo
-profane
+{"model":"gpt-4o-mini","messages":[{"role":"user","content":"profane"}]}
 --- error_code: 400
 --- response_body chomp
 request body exceeds PROFANITY threshold
@@ -209,7 +207,7 @@ request body exceeds PROFANITY threshold
 === TEST 6: very profane request should also fail
 --- request
 POST /echo
-very_profane
+{"model":"gpt-4o-mini","messages":[{"role":"user","content":"very_profane"}]}
 --- error_code: 400
 --- response_body chomp
 request body exceeds PROFANITY threshold
@@ -219,10 +217,8 @@ request body exceeds PROFANITY threshold
 === TEST 7: good_request should pass
 --- request
 POST /echo
-good_request
+{"model":"gpt-4o-mini","messages":[{"role":"user","content":"good_request"}]}
 --- error_code: 200
---- response_body chomp
-good_request
 
 
 
@@ -273,7 +269,7 @@ passed
 === TEST 9: profane request should pass profanity check but fail toxicity check
 --- request
 POST /echo
-profane
+{"model":"gpt-4o-mini","messages":[{"role":"user","content":"profane"}]}
 --- error_code: 400
 --- response_body chomp
 request body exceeds toxicity threshold
@@ -283,17 +279,15 @@ request body exceeds toxicity threshold
 === TEST 10: profane_but_not_toxic request should pass
 --- request
 POST /echo
-profane_but_not_toxic
+{"model":"gpt-4o-mini","messages":[{"role":"user","content":"profane_but_not_toxic"}]}
 --- error_code: 200
---- response_body chomp
-profane_but_not_toxic
 
 
 
 === TEST 11: but very profane request will fail
 --- request
 POST /echo
-very_profane
+{"model":"gpt-4o-mini","messages":[{"role":"user","content":"very_profane"}]}
 --- error_code: 400
 --- response_body chomp
 request body exceeds PROFANITY threshold
@@ -303,7 +297,5 @@ request body exceeds PROFANITY threshold
 === TEST 12: good_request should pass
 --- request
 POST /echo
-good_request
+{"model":"gpt-4o-mini","messages":[{"role":"user","content":"good_request"}]}
 --- error_code: 200
---- response_body chomp
-good_request