diff --git a/cmake/ExperimentalPlugins.cmake b/cmake/ExperimentalPlugins.cmake index e8ea6141347..349b76aec2f 100644 --- a/cmake/ExperimentalPlugins.cmake +++ b/cmake/ExperimentalPlugins.cmake @@ -33,6 +33,7 @@ auto_option(CACHE_FILL FEATURE_VAR BUILD_CACHE_FILL DEFAULT ${_DEFAULT}) auto_option(CERT_REPORTING_TOOL FEATURE_VAR BUILD_CERT_REPORTING_TOOL DEFAULT ${_DEFAULT}) auto_option(COOKIE_REMAP FEATURE_VAR BUILD_COOKIE_REMAP DEFAULT ${_DEFAULT}) auto_option(CUSTOM_REDIRECT FEATURE_VAR BUILD_CUSTOM_REDIRECT DEFAULT ${_DEFAULT}) +auto_option(FILTER_BODY FEATURE_VAR BUILD_FILTER_BODY DEFAULT ${_DEFAULT}) auto_option(FQ_PACING FEATURE_VAR BUILD_FQ_PACING DEFAULT ${_DEFAULT}) auto_option(GEOIP_ACL FEATURE_VAR BUILD_GEOIP_ACL DEFAULT ${_DEFAULT}) auto_option(HEADER_FREQ FEATURE_VAR BUILD_HEADER_FREQ DEFAULT ${_DEFAULT}) diff --git a/doc/admin-guide/plugins/filter_body.en.rst b/doc/admin-guide/plugins/filter_body.en.rst new file mode 100644 index 00000000000..700eb57ae3c --- /dev/null +++ b/doc/admin-guide/plugins/filter_body.en.rst @@ -0,0 +1,449 @@ +.. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +.. include:: ../../common.defs + +.. _admin-plugins-filter_body: + +Filter Body Plugin +****************** + +Description +=========== + +The ``filter_body`` plugin is an experimental plugin that provides streaming +request and response body content inspection with configurable pattern matching +and actions. It can be used to detect and mitigate security threats such as CVE +exploits, XXE (XML External Entity) attacks, SQL injection patterns, and other +malicious content. + +The plugin uses a streaming transform approach, processing data as it arrives +without buffering the entire request or response body. A small lookback buffer +(sized to the longest pattern minus one byte) is maintained to detect patterns +that span chunk boundaries. + +Features +-------- + +- YAML-based configuration with flexible rule definitions +- Header-based filtering with AND/OR logic +- Case-insensitive header matching, case-sensitive body patterns +- Configurable actions per rule: ``log``, ``block``, ``add_header`` +- Support for both request and response body inspection +- Configurable HTTP methods to match (GET, POST, PUT, etc.) +- Per-rule metrics counters for monitoring match activity +- Streaming transform with lookback buffer for cross-boundary pattern matching +- Optional ``max_content_length`` to skip inspection of large bodies +- Optional ``status`` codes to match for response rules + +Installation +============ + +The ``filter_body`` plugin is an experimental plugin and is not built by default. +To build it, pass ``-DENABLE_FILTER_BODY=ON`` to ``cmake`` when configuring:: + + cmake -DENABLE_FILTER_BODY=ON ... + +Alternatively, build all experimental plugins at once with +``-DBUILD_EXPERIMENTAL_PLUGINS=ON``:: + + cmake -DBUILD_EXPERIMENTAL_PLUGINS=ON ... + +Configuration +============= + +The plugin is configured as a remap plugin with a YAML configuration file:: + + map http://example.com/ http://origin.example.com/ @plugin=filter_body.so @pparam=filter_body.yaml + +The configuration file path can be relative to the |TS| configuration directory +or an absolute path. + +Configuration File Format +------------------------- + +The configuration file uses YAML format with a list of rules. Each rule has a +``name``, a ``filter`` section containing all filtering criteria, and an +``action`` section specifying what to do when a match occurs:: + + rules: + - name: rule_name + filter: + direction: request # optional, defaults to request + methods: # for request rules only + - POST + - PUT + max_content_length: 1048576 + headers: + - name: Content-Type + patterns: + - "application/xml" + - "text/xml" + body_patterns: + - " + X-Another-Header: some-value + +For response rules, use ``status`` instead of ``methods`` within the ``filter`` +section:: + + rules: + - name: response_rule + filter: + direction: response + status: # for response rules only + - 200 + - 201 + body_patterns: + - "sensitive_data" + action: + - log + +Rule Options +------------ + +``name`` (required) + A unique name for the rule. Used in log messages and metrics when the rule + matches. The special placeholder ```` can be used in header values + to substitute the rule's name dynamically. + +``filter`` (required) + A section containing all filtering criteria that determine which requests or + responses the rule applies to. This section separates the "what to match" + from the "what to do" (action). + +Filter Options +-------------- + +The following options are valid within the ``filter`` section: + +``direction`` (optional) + Specifies whether to inspect request or response bodies. + Valid values: ``request``, ``response``. Default: ``request``. + +``methods`` (optional) + List of HTTP methods to match. If not specified, all methods are matched. + Only valid for request rules. Example: ``[GET, POST, PUT]``. + +``status`` (optional) + List of HTTP status codes to match. If not specified, all status codes are + matched. Only valid for response rules. Example: ``[200, 201]``. + +``max_content_length`` (optional) + Maximum content length in bytes for body inspection. Bodies larger than + this value will not be inspected. If set to 0 or not specified, all bodies + are inspected regardless of size. + +``headers`` (optional) + List of header conditions that must all match (AND logic) for body + inspection to occur. Each header can have multiple patterns (OR logic + within a single header). + + - ``name``: Header name (case-insensitive matching). + - ``patterns``: List of patterns to match against the header value. + +``body_patterns`` (required) + List of patterns to search for in the body content. Pattern matching is + case-sensitive. If any pattern matches, the configured actions are executed. + +Action Options +-------------- + +``action`` (optional) + List of actions to take when a pattern matches. Default is ``[log]``. + Valid values: + + - ``log``: Log the match to the Traffic Server log. + - ``block``: Block the request/response (see Block Action below for details). + - ``add_header``: Add custom headers to the request/response. This action + takes a map of header names to values. Use ```` in header + values to substitute the rule's name dynamically. Example:: + + action: + - log + - add_header: + X-Security-Match: + X-Custom-Flag: detected + +Matching Logic +============== + +Header Matching +--------------- + +Headers are matched using the following logic: + +1. All configured headers must match (AND logic between headers). +2. Within each header, any pattern can match (OR logic between patterns). +3. Header name matching is case-insensitive. +4. Header value matching is case-insensitive. + +For example, with this configuration:: + + filter: + headers: + - name: Content-Type + patterns: + - "application/xml" + - "text/xml" + - name: X-Custom-Header + patterns: + - "value1" + +A request must have: + +- A ``Content-Type`` header containing either "application/xml" OR "text/xml", AND +- An ``X-Custom-Header`` header containing "value1". + +Body Pattern Matching +--------------------- + +Body patterns are matched using simple substring search: + +- Matching is case-sensitive. +- Any pattern match triggers the configured actions. +- The plugin uses a streaming approach with a lookback buffer to handle patterns + that may span buffer boundaries. + +Actions +======= + +Log Action +---------- + +When the ``log`` action is configured, pattern matches are logged to the +Traffic Server error log (``diags.log``). No special debug configuration is +required - log messages are always written when a pattern matches. + +Log messages include the rule name and matched pattern in the format:: + + NOTE: [filter_body] Matched rule: , pattern: + +To also log the headers for debugging, you can configure access logging to +include request and response headers. See :ref:`admin-logging` for details +on configuring access logs. + +Block Action +------------ + +When the ``block`` action is configured, the connections are closed and no +further data is forwarded. + +.. warning:: + + Because the plugin uses streaming body inspection, a malicious pattern may + not be detected until after some (or all) of the body has already been sent. + The ``block`` action stops further transmission but cannot recall data + already sent. For maximum protection, consider using ``max_content_length`` + to limit inspection to smaller bodies, or use header-based filtering to + reduce the attack surface. + +**Request body blocking**: Both the client and origin connections are closed. +The client does not receive any HTTP response - the connection simply closes. +This is because body inspection occurs after request headers have been sent to +the origin. + +**Response body blocking**: The HTTP status code has already been sent to the +client before body inspection begins. The connection is closed, leaving the +client with a partial response body. + +Add Header Action +----------------- + +When the ``add_header`` action is configured, custom headers are added: + +- For request rules: Headers are added to the server request (proxy request + going to the origin). This header modification occurs during body inspection, + after the initial request headers have been read but before they are sent + to the origin. + +- For response rules: Headers are added to the client response. Since body + inspection occurs during response streaming, headers are added before the + response body is sent to the client. + +The ``add_header`` action takes a map of header names and values:: + + action: + - add_header: + X-Security-Match: + X-Custom-Flag: detected + +Use the special placeholder ```` in header values to substitute the +rule's name dynamically. Multiple headers can be specified in a single +``add_header`` action. + +.. note:: + + To verify that headers are being added correctly, you can configure access + logging to include the server request headers (for request rules) or client + response headers (for response rules). Use log fields like ``{Server-Request}`` + or ``{Client-Response}`` in your log format. See :ref:`admin-logging` for + details. + +Example Configurations +====================== + +XXE Attack Detection +-------------------- + +Block XML requests containing XXE patterns:: + + rules: + - name: xxe_detection + filter: + direction: request + methods: + - POST + - PUT + headers: + - name: Content-Type + patterns: + - "application/xml" + - "text/xml" + - "application/xhtml+xml" + body_patterns: + - " + +Metrics +======= + +The plugin creates a metrics counter for each configured rule. The counter is +incremented each time the rule matches a pattern in a request or response body. + +Metric names follow this format:: + + plugin.filter_body.rule..matches + +For example, a rule named ``xxe_detection`` would have a metric named:: + + plugin.filter_body.rule.xxe_detection.matches + +You can query these metrics using ``traffic_ctl``, replacing ```` with +the name from your configuration:: + + traffic_ctl metric get plugin.filter_body.rule..matches + +Or list all filter_body metrics:: + + traffic_ctl metric match plugin.filter_body + +Debugging +========= + +To enable debug output for the plugin, configure debug tags in records.yaml:: + + records: + proxy.config.diags.debug.enabled: 1 + proxy.config.diags.debug.tags: filter_body + +Debug output includes: + +- Configuration loading and rule parsing. +- Header matching results. +- Pattern match notifications. +- Action execution. + +Limitations +=========== + +1. **Request blocking**: When blocking request bodies, both the client and + origin connections are closed. The client does not receive any HTTP response + code - the connection simply closes. This is because body inspection occurs + after the request headers have already been sent to the origin. + +2. **Response blocking**: When blocking response bodies, the HTTP status code + has already been sent to the client before body inspection begins. The + plugin closes the connection, leaving the client with a partial response + body. + +3. **Pattern matching**: The plugin uses simple substring matching. Regular + expressions are not currently supported. + +4. **Memory usage**: The lookback buffer size is determined by the longest + body pattern configured. Very long patterns may increase memory usage. + +5. **Cross-boundary pattern search**: When searching for patterns that may span + buffer block boundaries, the plugin uses a two-phase search. The boundary + search copies only a small region (at most 2 * max pattern length bytes) to + detect patterns spanning boundaries. The main block search is zero-copy. + +6. **Performance**: Body inspection adds processing overhead. Use + ``max_content_length`` to limit inspection to smaller bodies when appropriate. + +See Also +======== + +- :doc:`header_rewrite.en` for header-based request/response modification. +- :doc:`access_control.en` for access control based on various criteria. diff --git a/doc/admin-guide/plugins/index.en.rst b/doc/admin-guide/plugins/index.en.rst index 3334bdc2d79..34ae3687dee 100644 --- a/doc/admin-guide/plugins/index.en.rst +++ b/doc/admin-guide/plugins/index.en.rst @@ -171,6 +171,7 @@ directory of the |TS| source tree. Experimental plugins can be compiled by passi Certifier Cert Reporting Tool Cookie Remap + Filter Body GeoIP ACL FQ Pacing Header Frequency @@ -211,6 +212,9 @@ directory of the |TS| source tree. Experimental plugins can be compiled by passi :doc:`Cookie Remap ` Makes decisions on destinations based on cookies. +:doc:`Filter Body ` + Streaming body content inspection with configurable pattern matching for detecting security threats. + :doc:`FQ Pacing ` FQ Pacing: Rate Limit TCP connections using Linux's Fair Queuing queue discipline diff --git a/plugins/experimental/CMakeLists.txt b/plugins/experimental/CMakeLists.txt index 20a54f4705c..db76b82e829 100644 --- a/plugins/experimental/CMakeLists.txt +++ b/plugins/experimental/CMakeLists.txt @@ -35,6 +35,9 @@ endif() if(BUILD_CUSTOM_REDIRECT) add_subdirectory(custom_redirect) endif() +if(BUILD_FILTER_BODY) + add_subdirectory(filter_body) +endif() if(BUILD_FQ_PACING) add_subdirectory(fq_pacing) endif() diff --git a/plugins/experimental/filter_body/CMakeLists.txt b/plugins/experimental/filter_body/CMakeLists.txt new file mode 100644 index 00000000000..dd8083c3b42 --- /dev/null +++ b/plugins/experimental/filter_body/CMakeLists.txt @@ -0,0 +1,24 @@ +####################### +# +# Licensed to the Apache Software Foundation (ASF) under one or more contributor license +# agreements. See the NOTICE file distributed with this work for additional information regarding +# copyright ownership. The ASF licenses this file to you under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations under +# the License. +# +####################### + +project(filter_body) + +add_atsplugin(filter_body filter_body.cc) + +target_link_libraries(filter_body PRIVATE yaml-cpp::yaml-cpp) + +verify_remap_plugin(filter_body) diff --git a/plugins/experimental/filter_body/README.md b/plugins/experimental/filter_body/README.md new file mode 100644 index 00000000000..0d270da697e --- /dev/null +++ b/plugins/experimental/filter_body/README.md @@ -0,0 +1,199 @@ +# filter_body - Request/Response Body Content Filter Plugin + +## Overview + +The `filter_body` plugin is a remap plugin that performs zero-copy streaming +inspection of request or response bodies to detect CVE exploitation attempts +and other malicious patterns. When configured patterns are matched, the plugin +can log, block (return 403), and/or add headers. + +## Features + +- Zero-copy streaming body inspection (no full buffering). +- Case-insensitive header pattern matching. +- Case-sensitive body pattern matching. +- Handles patterns that span buffer boundaries. +- Per-rule direction: inspect request or response. +- Configurable actions: log, block, add_header. +- Optional Content-Length limit to skip large payloads. +- Per-rule metrics counters. + +## Configuration + +The plugin uses a YAML configuration file. Usage in `remap.config`: + +``` +map http://example.com/ http://origin.com/ @plugin=filter_body.so @pparam=filter_body.yaml +``` + +### Example Configuration + +The configuration uses a `filter` node to group all filtering criteria, +keeping them separate from the `action`: + +```yaml +rules: + # Block XXE attacks in XML requests. + - name: "xxe_detection" + filter: + direction: request # "request" (default) or "response" + methods: [POST] # HTTP methods to inspect + max_content_length: 1048576 # Skip bodies larger than 1MB + headers: + - name: "Content-Type" + patterns: # Case-insensitive, ANY matches (OR) + - "application/xml" + - "text/xml" + body_patterns: # Case-sensitive, ANY matches + - "" + X-Threat-Type: "proto-pollution" + + # Filter sensitive data from responses. + - name: "ssn_leak" + filter: + direction: response + status: [200] # Only inspect 200 responses + headers: + - name: "Content-Type" + patterns: ["application/json", "text/html"] + body_patterns: + - "SSN:" + - "social security" + action: + - log + - block +``` + +## Configuration Fields + +### Top Level + +| Field | Description | +|-------|-------------| +| `rules` | Array of filter rules. | + +### Per-Rule Fields + +| Field | Description | +|-------|-------------| +| `name` | Rule name (required, used in logging and metrics). | +| `filter` | Container for all filtering criteria (required). | +| `action` | Array of actions (default: `[log]`). | + +### Filter Section Fields + +| Field | Description | +|-------|-------------| +| `direction` | `"request"` or `"response"` (default: `request`). | +| `methods` | Array of HTTP methods to inspect (empty = all, request rules only). | +| `status` | Array of HTTP status codes to match (response rules only). | +| `max_content_length` | Skip inspection if Content-Length exceeds this value. | +| `headers` | Array of header conditions (ALL must match). | +| `body_patterns` | Array of body patterns to search for (ANY matches). | + +### Actions + +- `log` - Log match to `diags.log`. +- `block` - Return 403 Forbidden. +- `add_header` - Add configured headers (supports multiple headers and `` substitution). + +```yaml +action: + - log + - add_header: + X-Security-Match: "" + X-Another-Header: "some-value" +``` + +The `` placeholder is replaced with the rule's `name` value at +runtime. + +### Header Conditions + +```yaml +filter: + headers: + - name: "Content-Type" # Header name (case-insensitive) + patterns: # Patterns to match (OR logic, case-insensitive) + - "application/xml" + - "text/xml" +``` + +## Matching Logic + +1. Rules are evaluated based on direction (request/response). +2. For body inspection to trigger: + - Method must match (if configured, request rules only). + - Status code must match (if configured, response rules only). + - Content-Length must be ≤ `max_content_length` (if configured). + - ALL header conditions must match. + - Within each header, ANY pattern matches (OR, case-insensitive). +3. Body is streamed through and searched for patterns (case-sensitive). +4. If ANY body pattern matches, configured actions are executed. + +## Performance Notes + +- Uses zero-copy streaming; data is not buffered entirely. +- Only a small lookback buffer (`max_pattern_length - 1` bytes) is maintained + to detect patterns that span buffer boundaries. +- Use `max_content_length` to skip inspection of large payloads. +- Header matching is done before any body processing begins. + +## Metrics + +The plugin creates a metrics counter for each rule: + +``` +plugin.filter_body.rule..matches +``` + +Query with `traffic_ctl`, replacing `` with the name from your config: + +```bash +traffic_ctl metric get plugin.filter_body.rule..matches +traffic_ctl metric match plugin.filter_body +``` + +## Building + +Enable with cmake: + +```bash +cmake -DENABLE_FILTER_BODY=ON ... +``` + +Or build all experimental plugins: + +```bash +cmake -DBUILD_EXPERIMENTAL_PLUGINS=ON ... +``` + +## Documentation + +For comprehensive documentation, see the [Admin Guide](../../../doc/admin-guide/plugins/filter_body.en.rst). + +## License + +Licensed to the Apache Software Foundation (ASF) under the Apache License, Version 2.0. diff --git a/plugins/experimental/filter_body/filter_body.cc b/plugins/experimental/filter_body/filter_body.cc new file mode 100644 index 00000000000..11f68a42cc1 --- /dev/null +++ b/plugins/experimental/filter_body/filter_body.cc @@ -0,0 +1,1040 @@ +/** @file + + @brief A remap plugin that filters request/response bodies for CVE exploitation patterns. + + This plugin performs zero-copy streaming inspection of request or response bodies, + looking for configured patterns. When a pattern matches, it can log, block (403), + and/or add a header. + + @section license License + + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include +#include +#include +#include +#include + +#include + +#include "swoc/TextView.h" +#include "ts/ts.h" +#include "ts/remap.h" +#include "tscore/ink_defs.h" + +#define PLUGIN_NAME "filter_body" + +namespace +{ +DbgCtl dbg_ctl{PLUGIN_NAME}; + +// Action flags +constexpr unsigned ACTION_LOG = 1 << 0; +constexpr unsigned ACTION_BLOCK = 1 << 1; +constexpr unsigned ACTION_ADD_HEADER = 1 << 2; + +// Direction +enum class Direction { REQUEST, RESPONSE }; + +// Header match condition +struct HeaderCondition { + std::string name; + std::vector patterns; // case-insensitive match +}; + +// Header to add when action triggers +struct AddHeader { + std::string name; + std::string value; // supports substitution +}; + +// A single filtering rule +struct Rule { + std::string name; + Direction direction = Direction::REQUEST; + unsigned actions = ACTION_LOG; // default: log only + std::vector add_headers; // headers to add on match + std::vector methods; // for request rules + std::vector status_codes; // for response rules + int64_t max_content_length = -1; // -1 means no limit + std::vector headers; + std::vector body_patterns; // case-sensitive match + size_t max_pattern_len = 0; + int stat_id = -1; // metrics counter for matches (-1 = not created) +}; + +// Plugin configuration (per remap instance) +struct FilterConfig { + std::vector request_rules; + std::vector response_rules; + size_t max_lookback = 0; // max pattern length - 1 across all rules +}; + +// Per-transaction transform data +struct TransformData { + TSHttpTxn txnp; + Rule const *matched_rule = nullptr; + FilterConfig const *config = nullptr; + std::vector active_rules; // rules that passed header check + std::string lookback; // small buffer for cross-boundary patterns + TSIOBuffer output_buffer = nullptr; + TSIOBufferReader output_reader = nullptr; + TSVIO output_vio = nullptr; + Direction direction = Direction::REQUEST; // direction of this transform + bool blocked = false; + bool headers_added = false; +}; + +/** + * @brief Case-insensitive substring search. + * + * Searches for @a needle within @a haystack using case-insensitive comparison. + * + * @param[in] haystack The string to search within. + * @param[in] needle The pattern to search for. + * @return Pointer to the first occurrence of needle in haystack, or nullptr if not found. + */ +const char * +strcasestr_local(swoc::TextView haystack, swoc::TextView needle) +{ + if (needle.empty() || haystack.size() < needle.size()) { + return nullptr; + } + + for (size_t i = 0; i <= haystack.size() - needle.size(); ++i) { + if (haystack.substr(i, needle.size()).starts_with_nocase(needle)) { + return haystack.data() + i; + } + } + return nullptr; +} + +/** + * @brief Case-sensitive substring search. + * + * Searches for @a needle within @a haystack using exact (case-sensitive) comparison. + * + * @param[in] haystack The string to search within. + * @param[in] needle The pattern to search for. + * @return Pointer to the first occurrence of needle in haystack, or nullptr if not found. + */ +const char * +strstr_local(swoc::TextView haystack, swoc::TextView needle) +{ + if (needle.empty() || haystack.size() < needle.size()) { + return nullptr; + } + + auto pos = haystack.find(needle); + if (pos != std::string::npos) { + return haystack.data() + pos; + } + return nullptr; +} + +/** + * @brief Check if the HTTP method matches the rule's method filter. + * + * If the rule has no method restrictions, all methods match. + * + * @param[in] rule The rule containing method restrictions. + * @param[in] bufp The message buffer containing the HTTP headers. + * @param[in] hdr_loc The location of the HTTP header. + * @return true if the method matches or no method restriction exists, false otherwise. + */ +bool +method_matches(Rule const &rule, TSMBuffer bufp, TSMLoc hdr_loc) +{ + if (rule.methods.empty()) { + return true; + } + + int method_len = 0; + const char *method = TSHttpHdrMethodGet(bufp, hdr_loc, &method_len); + if (method == nullptr) { + return false; + } + + swoc::TextView method_view(method, method_len); + method_view.trim_if(::isspace); + + for (auto const &m : rule.methods) { + if (0 == strcasecmp(method_view, swoc::TextView(m))) { + return true; + } + } + return false; +} + +/** + * @brief Check if the HTTP status code matches the rule's status filter. + * + * For response rules, this checks if the response status code is in the rule's + * allowed status codes list. + * + * @param[in] rule The rule containing the status code filter. + * @param[in] bufp The message buffer containing the HTTP response. + * @param[in] hdr_loc The location of the HTTP response header. + * @return true if the status matches or no status restriction exists, false otherwise. + */ +bool +status_matches(Rule const &rule, TSMBuffer bufp, TSMLoc hdr_loc) +{ + if (rule.status_codes.empty()) { + return true; // no status restriction + } + + TSHttpStatus status = TSHttpHdrStatusGet(bufp, hdr_loc); + for (int const code : rule.status_codes) { + if (static_cast(status) == code) { + return true; + } + } + return false; +} + +/** + * @brief Check if Content-Length is within the rule's max_content_length limit. + * + * If the rule has no content length limit (max_content_length < 0), all sizes are allowed. + * If the Content-Length header is missing, the check passes. + * + * @param[in] rule The rule containing the content length limit. + * @param[in] bufp The message buffer containing the HTTP headers. + * @param[in] hdr_loc The location of the HTTP header. + * @return true if content length is within limit or no limit exists, false otherwise. + */ +bool +content_length_ok(Rule const &rule, TSMBuffer bufp, TSMLoc hdr_loc) +{ + if (rule.max_content_length < 0) { + return true; // no limit + } + + TSMLoc field_loc = TSMimeHdrFieldFind(bufp, hdr_loc, TS_MIME_FIELD_CONTENT_LENGTH, TS_MIME_LEN_CONTENT_LENGTH); + if (field_loc == TS_NULL_MLOC) { + return true; // no Content-Length header, allow + } + + int64_t content_length = TSMimeHdrFieldValueInt64Get(bufp, hdr_loc, field_loc, 0); + TSHandleMLocRelease(bufp, hdr_loc, field_loc); + + return content_length <= rule.max_content_length; +} + +/** + * @brief Check if a single header condition matches. + * + * Uses case-insensitive pattern search. Returns true if any pattern in the + * condition matches any value of the specified header (OR logic within header). + * + * @param[in] cond The header condition to check. + * @param[in] bufp The message buffer containing the HTTP headers. + * @param[in] hdr_loc The location of the HTTP header. + * @return true if the header exists and any pattern matches, false otherwise. + */ +bool +header_condition_matches(HeaderCondition const &cond, TSMBuffer bufp, TSMLoc hdr_loc) +{ + TSMLoc field_loc = TSMimeHdrFieldFind(bufp, hdr_loc, cond.name.c_str(), static_cast(cond.name.length())); + if (field_loc == TS_NULL_MLOC) { + return false; + } + + bool matched = false; + int num_values = TSMimeHdrFieldValuesCount(bufp, hdr_loc, field_loc); + for (int i = 0; i < num_values && !matched; ++i) { + int value_len = 0; + const char *value = TSMimeHdrFieldValueStringGet(bufp, hdr_loc, field_loc, i, &value_len); + if (value == nullptr) { + continue; + } + + swoc::TextView value_view(value, value_len); + for (auto const &pattern : cond.patterns) { + if (strcasestr_local(value_view, swoc::TextView(pattern)) != nullptr) { + matched = true; + break; + } + } + } + + TSHandleMLocRelease(bufp, hdr_loc, field_loc); + return matched; +} + +/** + * @brief Check if ALL header conditions in a rule match. + * + * Uses AND logic between headers - all header conditions must match for the + * rule to apply. + * + * @param[in] rule The rule containing header conditions. + * @param[in] bufp The message buffer containing the HTTP headers. + * @param[in] hdr_loc The location of the HTTP header. + * @return true if all header conditions match, false otherwise. + */ +bool +headers_match(Rule const &rule, TSMBuffer bufp, TSMLoc hdr_loc) +{ + for (auto const &cond : rule.headers) { + if (!header_condition_matches(cond, bufp, hdr_loc)) { + return false; + } + } + return true; +} + +/** + * @brief Search for body patterns in the given data. + * + * Searches for any of the rule's body patterns in the data using case-sensitive + * matching. Returns the first matched pattern. + * + * @param[in] rule The rule containing body patterns to search for. + * @param[in] data The data buffer to search within. + * @return Pointer to the matched pattern string, or nullptr if no match. + */ +std::string const * +search_body_patterns(Rule const &rule, swoc::TextView data) +{ + for (auto const &pattern : rule.body_patterns) { + if (strstr_local(data, swoc::TextView(pattern)) != nullptr) { + return &pattern; + } + } + return nullptr; +} + +/** + * @brief Add a header field to an HTTP message. + * + * Creates and appends a new header field with the given name and value. + * + * @param[in] bufp The message buffer to add the header to. + * @param[in] hdr_loc The location of the HTTP header. + * @param[in] name The header field name. + * @param[in] value The header field value. + */ +void +add_header_to_message(TSMBuffer bufp, TSMLoc hdr_loc, std::string const &name, std::string const &value) +{ + TSMLoc field_loc; + if (TSMimeHdrFieldCreateNamed(bufp, hdr_loc, name.c_str(), static_cast(name.length()), &field_loc) != TS_SUCCESS) { + TSError("[%s] Failed to create header field: %s", PLUGIN_NAME, name.c_str()); + return; + } + + if (TSMimeHdrFieldValueStringSet(bufp, hdr_loc, field_loc, -1, value.c_str(), static_cast(value.length())) != TS_SUCCESS) { + TSError("[%s] Failed to set header value: %s", PLUGIN_NAME, name.c_str()); + TSHandleMLocRelease(bufp, hdr_loc, field_loc); + return; + } + + if (TSMimeHdrFieldAppend(bufp, hdr_loc, field_loc) != TS_SUCCESS) { + TSError("[%s] Failed to append header field: %s", PLUGIN_NAME, name.c_str()); + } + + TSHandleMLocRelease(bufp, hdr_loc, field_loc); +} + +/** + * @brief Substitute placeholder in header value. + * + * @param[in] value The header value that may contain . + * @param[in] rule_name The rule name to substitute. + * @return The value with replaced by the actual rule name. + */ +std::string +substitute_rule_name(std::string const &value, std::string const &rule_name) +{ + std::string result = value; + std::string const placeholder = ""; + size_t pos = 0; + while ((pos = result.find(placeholder, pos)) != std::string::npos) { + result.replace(pos, placeholder.length(), rule_name); + pos += rule_name.length(); + } + return result; +} + +/** + * @brief Execute the configured actions for a matched rule. + * + * Performs the actions specified in the rule: log, add_header, and/or block. + * For request rules, headers are added to the server request (proxy request to origin). + * For response rules, headers are added to the client response. + * + * @note Headers are added during body inspection, which occurs after headers may have + * already been sent. For request transforms, the server request headers should + * still be modifiable. For response transforms, headers are added before the + * response is sent to the client. + * + * @param[in,out] data The transform data containing transaction state. + * @param[in] rule The matched rule containing actions to execute. + * @param[in] matched_pattern The pattern that triggered the match (for logging). + */ +void +execute_actions(TransformData *data, Rule const *rule, std::string const *matched_pattern) +{ + // Increment the metrics counter for this rule (stat_id is guaranteed valid at load time) + TSStatIntIncrement(rule->stat_id, 1); + + // Log action always writes to diags.log so it doesn't require debug tags + if (rule->actions & ACTION_LOG) { + TSError("[%s] Matched rule: %s, pattern: %s", PLUGIN_NAME, rule->name.c_str(), + matched_pattern ? matched_pattern->c_str() : "unknown"); + } + + if ((rule->actions & ACTION_ADD_HEADER) && !data->headers_added && !rule->add_headers.empty()) { + TSMBuffer bufp; + TSMLoc hdr_loc; + bool success = false; + + if (data->direction == Direction::REQUEST) { + // For request rules: add headers to server request (proxy request going to origin) + if (TSHttpTxnServerReqGet(data->txnp, &bufp, &hdr_loc) == TS_SUCCESS) { + for (auto const &hdr : rule->add_headers) { + std::string value = substitute_rule_name(hdr.value, rule->name); + add_header_to_message(bufp, hdr_loc, hdr.name, value); + Dbg(dbg_ctl, "Added header %s: %s to server request", hdr.name.c_str(), value.c_str()); + } + TSHandleMLocRelease(bufp, TS_NULL_MLOC, hdr_loc); + success = true; + } + } else { + // For response rules: add headers to client response + if (TSHttpTxnClientRespGet(data->txnp, &bufp, &hdr_loc) == TS_SUCCESS) { + for (auto const &hdr : rule->add_headers) { + std::string value = substitute_rule_name(hdr.value, rule->name); + add_header_to_message(bufp, hdr_loc, hdr.name, value); + Dbg(dbg_ctl, "Added header %s: %s to client response", hdr.name.c_str(), value.c_str()); + } + TSHandleMLocRelease(bufp, TS_NULL_MLOC, hdr_loc); + success = true; + } + } + + if (success) { + data->headers_added = true; + } + } + + if (rule->actions & ACTION_BLOCK) { + data->blocked = true; + TSHttpTxnStatusSet(data->txnp, TS_HTTP_STATUS_FORBIDDEN); + // Set error body so client gets a proper response + char const *error_body = "Blocked by content filter"; + TSHttpTxnErrorBodySet(data->txnp, TSstrdup(error_body), strlen(error_body), TSstrdup("text/plain")); + Dbg(dbg_ctl, "Blocking request due to rule: %s", rule->name.c_str()); + } +} + +/** + * @brief Transform continuation handler for streaming body inspection. + * + * Processes body data in a streaming fashion, searching for patterns across + * buffer blocks. Uses a lookback buffer to detect patterns that span block + * boundaries. + * + * @note The pattern search creates a temporary string when the lookback buffer + * is non-empty, which involves a memory copy. This is necessary to handle + * patterns spanning buffer boundaries. + * + * @param[in] contp The transform continuation. + * @param[in] event The event type (WRITE_READY, WRITE_COMPLETE, ERROR). + * @param[in] edata Event data (unused). + * @return Always returns 0. + */ +int +transform_handler(TSCont contp, TSEvent event, void *edata ATS_UNUSED) +{ + if (TSVConnClosedGet(contp)) { + auto *data = static_cast(TSContDataGet(contp)); + if (data) { + if (data->output_reader) { + TSIOBufferReaderFree(data->output_reader); + } + if (data->output_buffer) { + TSIOBufferDestroy(data->output_buffer); + } + delete data; + } + TSContDestroy(contp); + return 0; + } + + auto *data = static_cast(TSContDataGet(contp)); + if (data == nullptr) { + return 0; + } + + switch (event) { + case TS_EVENT_ERROR: { + TSVIO write_vio = TSVConnWriteVIOGet(contp); + TSContCall(TSVIOContGet(write_vio), TS_EVENT_ERROR, write_vio); + break; + } + + case TS_EVENT_VCONN_WRITE_COMPLETE: + TSVConnShutdown(TSTransformOutputVConnGet(contp), 0, 1); + break; + + case TS_EVENT_VCONN_WRITE_READY: + default: { + // Get the write VIO + TSVIO write_vio = TSVConnWriteVIOGet(contp); + if (!TSVIOBufferGet(write_vio)) { + // No more data + if (data->output_vio) { + TSVIONBytesSet(data->output_vio, TSVIONDoneGet(write_vio)); + TSVIOReenable(data->output_vio); + } + return 0; + } + + // Initialize output buffer if needed + if (!data->output_buffer) { + TSVConn output_conn = TSTransformOutputVConnGet(contp); + data->output_buffer = TSIOBufferCreate(); + data->output_reader = TSIOBufferReaderAlloc(data->output_buffer); + + int64_t nbytes = TSVIONBytesGet(write_vio); + data->output_vio = TSVConnWrite(output_conn, contp, data->output_reader, nbytes); + } + + // Process available data + int64_t towrite = TSVIONTodoGet(write_vio); + if (towrite > 0 && !data->blocked) { + TSIOBufferReader reader = TSVIOReaderGet(write_vio); + int64_t avail = TSIOBufferReaderAvail(reader); + if (avail > towrite) { + avail = towrite; + } + + if (avail > 0) { + // Zero-copy: iterate through buffer blocks + // Stop iterating if we've already found a match (matched_rule != nullptr) + TSIOBufferBlock block = TSIOBufferReaderStart(reader); + while (block != nullptr && !data->matched_rule) { + int64_t block_avail = 0; + const char *block_data = TSIOBufferBlockReadStart(block, reader, &block_avail); + + if (block_data && block_avail > 0) { + // Two-phase search to minimize memory copying: + // + // Phase 1 (boundary search): When we have lookback data, create a small + // buffer containing the lookback + first few bytes of the current block. + // This catches patterns that span block boundaries. The copy is limited + // to at most (2 * max_lookback) bytes. + // + // Phase 2 (block search): Search the remainder of the current block + // in-place (zero-copy). This catches patterns entirely within the block + // that weren't already covered by Phase 1. + + size_t search_offset = 0; // Where to start Phase 2 search + + // Phase 1: Boundary search (only when we have lookback data) + // Skip if we've already found a match (matched_rule != nullptr) + if (!data->lookback.empty() && !data->matched_rule) { + // Create boundary buffer: lookback + enough of block to fully contain any + // pattern that starts within the first max_lookback bytes of the block. + // We need 2*max_lookback bytes from the block to ensure a max-length pattern + // starting at position (max_lookback-1) is fully contained. + size_t boundary_extent = std::min(static_cast(block_avail), 2 * data->config->max_lookback); + std::string boundary_buffer; + boundary_buffer.reserve(data->lookback.length() + boundary_extent); + boundary_buffer = data->lookback; + boundary_buffer.append(block_data, boundary_extent); + + // Search boundary for patterns spanning block boundaries or starting near boundary + for (Rule const *rule : data->active_rules) { + std::string const *matched = search_body_patterns(*rule, swoc::TextView(boundary_buffer)); + if (matched) { + data->matched_rule = rule; + execute_actions(data, rule, matched); + break; // Stop searching after first match + } + } + + // Phase 2 starts after max_lookback bytes - these are guaranteed to be fully + // searchable in Phase 1's boundary_buffer, avoiding duplicate detection + search_offset = std::min(static_cast(block_avail), data->config->max_lookback); + } + + // Phase 2: Search remainder of block in-place (zero-copy) + // Skip if we've already found a match or bytes already covered by Phase 1 + if (!data->matched_rule && search_offset < static_cast(block_avail)) { + for (Rule const *rule : data->active_rules) { + std::string const *matched = search_body_patterns( + *rule, swoc::TextView(block_data + search_offset, static_cast(block_avail) - search_offset)); + if (matched) { + data->matched_rule = rule; + execute_actions(data, rule, matched); + break; // Stop searching after first match + } + } + } + + // Update lookback buffer (only keep last max_lookback bytes) + // Skip if we've found a match - no need to search further blocks + if (data->config->max_lookback > 0 && !data->matched_rule) { + size_t lookback_size = data->config->max_lookback; + if (static_cast(block_avail) >= lookback_size) { + data->lookback.assign(block_data + block_avail - lookback_size, lookback_size); + } else { + data->lookback.append(block_data, block_avail); + if (data->lookback.length() > lookback_size) { + data->lookback = data->lookback.substr(data->lookback.length() - lookback_size); + } + } + } + } + + block = TSIOBufferBlockNext(block); + } + + if (data->blocked) { + // Blocking action - complete the transform with zero output + // The 403 status we set will cause ATS to generate the error response + TSVIONBytesSet(data->output_vio, 0); + TSVIOReenable(data->output_vio); + + // Consume all remaining input + int64_t const remaining = TSIOBufferReaderAvail(reader); + if (remaining > 0) { + TSIOBufferReaderConsume(reader, remaining); + } + TSVIONDoneSet(write_vio, TSVIONBytesGet(write_vio)); + + // Signal write complete + TSContCall(TSVIOContGet(write_vio), TS_EVENT_VCONN_WRITE_COMPLETE, write_vio); + return 0; + } + + // Zero-copy: copy data through to output + TSIOBufferCopy(data->output_buffer, reader, avail, 0); + TSIOBufferReaderConsume(reader, avail); + TSVIONDoneSet(write_vio, TSVIONDoneGet(write_vio) + avail); + } + } + + // Check if we're done + if (TSVIONTodoGet(write_vio) > 0) { + if (towrite > 0) { + TSVIOReenable(data->output_vio); + TSContCall(TSVIOContGet(write_vio), TS_EVENT_VCONN_WRITE_READY, write_vio); + } + } else { + TSVIONBytesSet(data->output_vio, TSVIONDoneGet(write_vio)); + TSVIOReenable(data->output_vio); + TSContCall(TSVIOContGet(write_vio), TS_EVENT_VCONN_WRITE_COMPLETE, write_vio); + } + break; + } + } + + return 0; +} + +/** + * @brief Create a transform continuation for body inspection. + * + * Allocates and initializes a TransformData structure and creates a transform + * continuation that will process the body data. + * + * @param[in] txnp The HTTP transaction. + * @param[in] config The plugin configuration. + * @param[in] active_rules The rules that passed header matching and should be checked. + * @param[in] dir The direction (request or response) for this transform. + * @return The transform virtual connection. + */ +TSVConn +create_transform(TSHttpTxn txnp, FilterConfig const *config, std::vector const &active_rules, Direction dir) +{ + TSVConn connp = TSTransformCreate(transform_handler, txnp); + + auto *data = new TransformData(); + data->txnp = txnp; + data->config = config; + data->active_rules = active_rules; + data->direction = dir; + + // Pre-allocate lookback buffer + if (config->max_lookback > 0) { + data->lookback.reserve(config->max_lookback); + } + + TSContDataSet(connp, data); + return connp; +} + +/** + * @brief Response handler for response rules. + * + * Called on TS_HTTP_READ_RESPONSE_HDR_HOOK to check response rules and add + * a response transform if any rules match. Also handles TS_HTTP_TXN_CLOSE_HOOK + * to clean up the continuation. Request rules are handled directly in TSRemapDoRemap. + * + * @param[in] contp The continuation (contains FilterConfig pointer). + * @param[in] event The event type (READ_RESPONSE_HDR or TXN_CLOSE). + * @param[in] edata The HTTP transaction. + * @return Always returns 0. + */ +int +response_handler(TSCont contp, TSEvent event, void *edata) +{ + TSHttpTxn txnp = static_cast(edata); + FilterConfig const *config = static_cast(TSContDataGet(contp)); + + // Handle transaction close - clean up continuation + if (event == TS_EVENT_HTTP_TXN_CLOSE) { + TSContDestroy(contp); + TSHttpTxnReenable(txnp, TS_EVENT_HTTP_CONTINUE); + return 0; + } + + if (config == nullptr) { + TSHttpTxnReenable(txnp, TS_EVENT_HTTP_CONTINUE); + return 0; + } + + TSMBuffer bufp; + TSMLoc hdr_loc; + + std::vector active_rules; + + if (event == TS_EVENT_HTTP_READ_RESPONSE_HDR) { + // Check response rules + if (TSHttpTxnServerRespGet(txnp, &bufp, &hdr_loc) != TS_SUCCESS) { + TSHttpTxnReenable(txnp, TS_EVENT_HTTP_CONTINUE); + return 0; + } + + for (auto const &rule : config->response_rules) { + // For response rules: check status codes and headers on response + if (status_matches(rule, bufp, hdr_loc) && content_length_ok(rule, bufp, hdr_loc) && headers_match(rule, bufp, hdr_loc)) { + Dbg(dbg_ctl, "Response rule '%s' header conditions matched, will inspect body", rule.name.c_str()); + active_rules.push_back(&rule); + } + } + + TSHandleMLocRelease(bufp, TS_NULL_MLOC, hdr_loc); + + if (!active_rules.empty()) { + TSVConn transform = create_transform(txnp, config, active_rules, Direction::RESPONSE); + TSHttpTxnHookAdd(txnp, TS_HTTP_RESPONSE_TRANSFORM_HOOK, transform); + } + } + + TSHttpTxnReenable(txnp, TS_EVENT_HTTP_CONTINUE); + return 0; +} + +/** + * @brief Parse the YAML configuration file. + * + * Loads and parses the YAML configuration file, creating Rule objects for each + * rule definition. Rules are separated into request_rules and response_rules + * based on their direction setting. Filtering criteria are contained within a + * 'filter' node to separate them from actions. + * + * @param[in] filename The configuration file path (absolute or relative to config dir). + * @return Pointer to the parsed FilterConfig, or nullptr on error. + */ +FilterConfig * +parse_config(const char *filename) +{ + std::string path; + if (filename[0] == '/') { + path = filename; + } else { + path = std::string(TSConfigDirGet()) + "/" + filename; + } + + Dbg(dbg_ctl, "Loading configuration from %s", path.c_str()); + + YAML::Node root; + try { + root = YAML::LoadFile(path); + } catch (const std::exception &ex) { + TSError("[%s] Failed to load config file '%s': %s", PLUGIN_NAME, path.c_str(), ex.what()); + return nullptr; + } + + auto *config = new FilterConfig(); + + try { + if (!root["rules"]) { + TSError("[%s] No 'rules' section in config", PLUGIN_NAME); + delete config; + return nullptr; + } + + for (auto const &rule_node : root["rules"]) { + Rule rule; + + // Name (required) + if (rule_node["name"]) { + rule.name = rule_node["name"].as(); + } else { + TSError("[%s] Rule missing 'name' field", PLUGIN_NAME); + delete config; + return nullptr; + } + + // Filter node is required (contains all filtering criteria) + YAML::Node filter_node = rule_node["filter"]; + if (!filter_node) { + TSError("[%s] Rule '%s' missing 'filter' node", PLUGIN_NAME, rule.name.c_str()); + delete config; + return nullptr; + } + + // Direction (default: request) - from filter node + if (filter_node["direction"]) { + std::string dir = filter_node["direction"].as(); + if (dir == "response") { + rule.direction = Direction::RESPONSE; + } else { + rule.direction = Direction::REQUEST; + } + } + + // Actions (default: [log]) + // Supports string actions: "log", "block" + // Supports map actions with add_header: + // - add_header: + // X-Header-Name: header-value + // X-Another: + rule.actions = 0; + if (rule_node["action"]) { + for (auto const &action_node : rule_node["action"]) { + if (action_node.IsScalar()) { + std::string action = action_node.as(); + if (action == "log") { + rule.actions |= ACTION_LOG; + } else if (action == "block") { + rule.actions |= ACTION_BLOCK; + } + } else if (action_node.IsMap() && action_node["add_header"]) { + rule.actions |= ACTION_ADD_HEADER; + auto const &headers_node = action_node["add_header"]; + for (auto const &hdr : headers_node) { + AddHeader add_hdr; + add_hdr.name = hdr.first.as(); + add_hdr.value = hdr.second.as(); + rule.add_headers.push_back(add_hdr); + } + } + } + } + if (rule.actions == 0) { + rule.actions = ACTION_LOG; // default + } + + // Methods (for request rules) - from filter node + if (filter_node["methods"]) { + for (auto const &method_node : filter_node["methods"]) { + rule.methods.push_back(method_node.as()); + } + } + + // Status codes (for response rules) - from filter node + if (filter_node["status"]) { + for (auto const &status_node : filter_node["status"]) { + rule.status_codes.push_back(status_node.as()); + } + } + + // Validate method/status usage + if (rule.direction == Direction::REQUEST && !rule.status_codes.empty()) { + TSError("[%s] Rule '%s': 'status' is only valid for response rules", PLUGIN_NAME, rule.name.c_str()); + delete config; + return nullptr; + } + if (rule.direction == Direction::RESPONSE && !rule.methods.empty()) { + TSError("[%s] Rule '%s': 'methods' is only valid for request rules", PLUGIN_NAME, rule.name.c_str()); + delete config; + return nullptr; + } + + // Max content length - from filter node + if (filter_node["max_content_length"]) { + rule.max_content_length = filter_node["max_content_length"].as(); + } + + // Header conditions - from filter node + if (filter_node["headers"]) { + for (auto const &header_node : filter_node["headers"]) { + HeaderCondition cond; + if (header_node["name"]) { + cond.name = header_node["name"].as(); + } + if (header_node["patterns"]) { + for (auto const &pattern_node : header_node["patterns"]) { + cond.patterns.push_back(pattern_node.as()); + } + } + rule.headers.push_back(cond); + } + } + + // Body patterns - from filter node + if (filter_node["body_patterns"]) { + for (auto const &pattern_node : filter_node["body_patterns"]) { + std::string pattern = pattern_node.as(); + rule.body_patterns.push_back(pattern); + if (pattern.length() > rule.max_pattern_len) { + rule.max_pattern_len = pattern.length(); + } + } + } + + // Update max lookback + if (rule.max_pattern_len > 1) { + size_t lookback = rule.max_pattern_len - 1; + if (lookback > config->max_lookback) { + config->max_lookback = lookback; + } + } + + // Create a metrics counter for this rule + std::string stat_name = std::string("plugin.") + PLUGIN_NAME + ".rule." + rule.name + ".matches"; + rule.stat_id = TSStatCreate(stat_name.c_str(), TS_RECORDDATATYPE_INT, TS_STAT_NON_PERSISTENT, TS_STAT_SYNC_COUNT); + if (rule.stat_id == TS_ERROR) { + TSError("[%s] Failed to create stat '%s'", PLUGIN_NAME, stat_name.c_str()); + delete config; + return nullptr; + } + Dbg(dbg_ctl, "Created stat '%s' with id %d", stat_name.c_str(), rule.stat_id); + + Dbg(dbg_ctl, "Loaded rule: %s (direction=%s, actions=%u)", rule.name.c_str(), + rule.direction == Direction::REQUEST ? "request" : "response", rule.actions); + + // Add to appropriate list + if (rule.direction == Direction::REQUEST) { + config->request_rules.push_back(std::move(rule)); + } else { + config->response_rules.push_back(std::move(rule)); + } + } + } catch (const std::exception &ex) { + TSError("[%s] Error parsing config: %s", PLUGIN_NAME, ex.what()); + delete config; + return nullptr; + } + + Dbg(dbg_ctl, "Loaded %zu request rules and %zu response rules (max_lookback=%zu)", config->request_rules.size(), + config->response_rules.size(), config->max_lookback); + + return config; +} + +} // anonymous namespace + +/////////////////////////////////////////////////////////////////////////////// +// Remap plugin interface +/////////////////////////////////////////////////////////////////////////////// + +TSReturnCode +TSRemapInit(TSRemapInterface *api_info, char *errbuf, int errbuf_size) +{ + if (!api_info) { + TSstrlcpy(errbuf, "[TSRemapInit] Invalid TSRemapInterface argument", errbuf_size); + return TS_ERROR; + } + + if (api_info->size < sizeof(TSRemapInterface)) { + TSstrlcpy(errbuf, "[TSRemapInit] Incorrect size of TSRemapInterface structure", errbuf_size); + return TS_ERROR; + } + + Dbg(dbg_ctl, "filter_body remap plugin initialized"); + return TS_SUCCESS; +} + +TSReturnCode +TSRemapNewInstance(int argc, char *argv[], void **instance, char *errbuf, int errbuf_size) +{ + if (argc < 3) { + TSstrlcpy(errbuf, "[TSRemapNewInstance] Missing configuration file argument", errbuf_size); + return TS_ERROR; + } + + FilterConfig *config = parse_config(argv[2]); + if (config == nullptr) { + TSstrlcpy(errbuf, "[TSRemapNewInstance] Failed to parse configuration file", errbuf_size); + return TS_ERROR; + } + + *instance = config; + return TS_SUCCESS; +} + +void +TSRemapDeleteInstance(void *instance) +{ + auto *config = static_cast(instance); + delete config; +} + +TSRemapStatus +TSRemapDoRemap(void *instance, TSHttpTxn txnp, TSRemapRequestInfo *rri ATS_UNUSED) +{ + auto *config = static_cast(instance); + if (config == nullptr) { + return TSREMAP_NO_REMAP; + } + + // For request rules, check headers now (in TSRemapDoRemap, headers are already available) + if (!config->request_rules.empty()) { + TSMBuffer bufp; + TSMLoc hdr_loc; + + if (TSHttpTxnClientReqGet(txnp, &bufp, &hdr_loc) == TS_SUCCESS) { + std::vector active_rules; + + for (auto const &rule : config->request_rules) { + if (method_matches(rule, bufp, hdr_loc) && content_length_ok(rule, bufp, hdr_loc) && headers_match(rule, bufp, hdr_loc)) { + Dbg(dbg_ctl, "Request rule '%s' header conditions matched, will inspect body", rule.name.c_str()); + active_rules.push_back(&rule); + } + } + + TSHandleMLocRelease(bufp, TS_NULL_MLOC, hdr_loc); + + if (!active_rules.empty()) { + TSVConn transform = create_transform(txnp, config, active_rules, Direction::REQUEST); + TSHttpTxnHookAdd(txnp, TS_HTTP_REQUEST_TRANSFORM_HOOK, transform); + } + } + } + + // For response rules, add a hook to check when response headers arrive + if (!config->response_rules.empty()) { + TSCont contp = TSContCreate(response_handler, nullptr); + TSContDataSet(contp, config); + TSHttpTxnHookAdd(txnp, TS_HTTP_READ_RESPONSE_HDR_HOOK, contp); + // Add TXN_CLOSE_HOOK to clean up the continuation + TSHttpTxnHookAdd(txnp, TS_HTTP_TXN_CLOSE_HOOK, contp); + } + + return TSREMAP_NO_REMAP; +} diff --git a/tests/gold_tests/autest-site/ats_replay.test.ext b/tests/gold_tests/autest-site/ats_replay.test.ext index 3028ac9edff..63ee6c217f5 100644 --- a/tests/gold_tests/autest-site/ats_replay.test.ext +++ b/tests/gold_tests/autest-site/ats_replay.test.ext @@ -162,6 +162,10 @@ def ATSReplayTest(obj, replay_file: str): process_config = server_config.get('process_config', {}) server = tr.AddVerifierServerProcess(name, replay_file, **process_config) + # Set expected return code for server if specified. + if 'return_code' in server_config: + server.ReturnCode = server_config['return_code'] + # ATS configuration. if not 'ats' in autest_config: raise ValueError(f"Replay file {replay_file} does not contain 'autest.ats' section") @@ -179,6 +183,10 @@ def ATSReplayTest(obj, replay_file: str): client = tr.AddVerifierClientProcess( name, replay_file, http_ports=[ts.Variables.port], https_ports=https_ports, **process_config) + # Set expected return code if specified. + if 'return_code' in client_config: + client.ReturnCode = client_config['return_code'] + if dns: ts.StartBefore(dns) ts.StartBefore(server) diff --git a/tests/gold_tests/autest-site/trafficserver.test.ext b/tests/gold_tests/autest-site/trafficserver.test.ext index a76762d029e..57f09b8329d 100755 --- a/tests/gold_tests/autest-site/trafficserver.test.ext +++ b/tests/gold_tests/autest-site/trafficserver.test.ext @@ -58,7 +58,8 @@ def MakeATSProcess( log_data=default_log_data, use_traffic_out=True, dump_runroot=True, - enable_proxy_protocol=False): + enable_proxy_protocol=False, + disable_log_checks=False): """Create a traffic server process. :param block_for_debug: if True, causes traffic_server to run with the @@ -240,11 +241,12 @@ def MakeATSProcess( tmpname = os.path.join(log_dir, fname) p.Disk.File(tmpname, id='diags_log') # add this test back once we have network namespaces working again - p.Disk.diags_log.Content = Testers.ExcludesExpression("ERROR:", f"Diags log file {fname} should not contain errors") - p.Disk.diags_log.Content += Testers.ExcludesExpression("FATAL:", f"Diags log file {fname} should not contain errors") - p.Disk.diags_log.Content += Testers.ExcludesExpression( - "Unrecognized configuration value", - f"Diags log file {fname} should not contain a warning about an unrecognized configuration") + if not disable_log_checks: + p.Disk.diags_log.Content = Testers.ExcludesExpression("ERROR:", f"Diags log file {fname} should not contain errors") + p.Disk.diags_log.Content += Testers.ExcludesExpression("FATAL:", f"Diags log file {fname} should not contain errors") + p.Disk.diags_log.Content += Testers.ExcludesExpression( + "Unrecognized configuration value", + f"Diags log file {fname} should not contain a warning about an unrecognized configuration") # traffic.out fname = "traffic.out" diff --git a/tests/gold_tests/pluginTest/filter_body/config/filter_body_request_block.yaml b/tests/gold_tests/pluginTest/filter_body/config/filter_body_request_block.yaml new file mode 100644 index 00000000000..baf83838ec2 --- /dev/null +++ b/tests/gold_tests/pluginTest/filter_body/config/filter_body_request_block.yaml @@ -0,0 +1,16 @@ +# Configuration for blocking requests with XXE patterns. +rules: + - name: "xxe_request_block" + filter: + direction: request + methods: [POST] + headers: + - name: "Content-Type" + patterns: + - "application/xml" + - "text/xml" + body_patterns: + - "" + diff --git a/tests/gold_tests/pluginTest/filter_body/config/filter_body_request_log.yaml b/tests/gold_tests/pluginTest/filter_body/config/filter_body_request_log.yaml new file mode 100644 index 00000000000..47f42f6fd57 --- /dev/null +++ b/tests/gold_tests/pluginTest/filter_body/config/filter_body_request_log.yaml @@ -0,0 +1,16 @@ +# Configuration for logging requests with XXE patterns (no blocking). +rules: + - name: "xxe_request_log" + filter: + direction: request + methods: [POST] + headers: + - name: "Content-Type" + patterns: + - "application/xml" + - "text/xml" + body_patterns: + - "" diff --git a/tests/gold_tests/pluginTest/filter_body/config/filter_body_response_log.yaml b/tests/gold_tests/pluginTest/filter_body/config/filter_body_response_log.yaml new file mode 100644 index 00000000000..ad588a2f0d8 --- /dev/null +++ b/tests/gold_tests/pluginTest/filter_body/config/filter_body_response_log.yaml @@ -0,0 +1,16 @@ +# Configuration for logging sensitive data in responses (no blocking). +rules: + - name: "sensitive_response_log" + filter: + direction: response + status: [200] + headers: + - name: "Content-Type" + patterns: + - "application/json" + - "text/html" + body_patterns: + - "SSN:" + - "password:" + action: [log] + diff --git a/tests/gold_tests/pluginTest/filter_body/filter_body.replay.yaml b/tests/gold_tests/pluginTest/filter_body/filter_body.replay.yaml new file mode 100644 index 00000000000..1b70d6048ad --- /dev/null +++ b/tests/gold_tests/pluginTest/filter_body/filter_body.replay.yaml @@ -0,0 +1,365 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# +# Consolidated filter_body plugin tests. +# Each remap rule tests a different configuration, differentiated by host header. +# +meta: + version: "1.0" + +autest: + description: 'Verify filter_body plugin for request/response body content filtering' + + server: + name: 'server' + + client: + name: 'client' + return_code: 1 + + ats: + name: 'ts' + process_config: + enable_cache: false + # The filtered requests produce an ERROR message, so we have to disable + # the default log checks of the trafficserver extension. + disable_log_checks: true + + copy_to_config_dir: + - config + + records_config: + proxy.config.diags.debug.enabled: 1 + proxy.config.diags.debug.tags: 'filter_body' + + remap_config: + # Request log only - pattern logged but request passes through + - from: http://request-log.example.com/ + to: http://127.0.0.1:{SERVER_HTTP_PORT}/ + plugins: + - name: "filter_body.so" + args: + - "config/filter_body_request_log.yaml" + + # Request block - request with XXE pattern is blocked + - from: http://request-block.example.com/ + to: http://127.0.0.1:{SERVER_HTTP_PORT}/ + plugins: + - name: "filter_body.so" + args: + - "config/filter_body_request_block.yaml" + + # Request header - request passes, header added to server request + - from: http://request-header.example.com/ + to: http://127.0.0.1:{SERVER_HTTP_PORT}/ + plugins: + - name: "filter_body.so" + args: + - "config/filter_body_request_header.yaml" + + # Request no match - header mismatch, no body inspection + - from: http://request-nomatch.example.com/ + to: http://127.0.0.1:{SERVER_HTTP_PORT}/ + plugins: + - name: "filter_body.so" + args: + - "config/filter_body_request_block.yaml" + + # Response log - detect sensitive data in responses + - from: http://response-log.example.com/ + to: http://127.0.0.1:{SERVER_HTTP_PORT}/ + plugins: + - name: "filter_body.so" + args: + - "config/filter_body_response_log.yaml" + + # Response header - add header when sensitive data detected + - from: http://response-header.example.com/ + to: http://127.0.0.1:{SERVER_HTTP_PORT}/ + plugins: + - name: "filter_body.so" + args: + - "config/filter_body_response_header.yaml" + + # Response block - block responses with sensitive data + - from: http://response-block.example.com/ + to: http://127.0.0.1:{SERVER_HTTP_PORT}/ + plugins: + - name: "filter_body.so" + args: + - "config/filter_body_response_block.yaml" + + log_validation: + diags_log: + contains: + - expression: "Matched rule: xxe_request_log" + description: "Verify request log rule matched" + - expression: "Matched rule: xxe_request_block" + description: "Verify request block rule matched" + - expression: "Matched rule: xxe_request_header" + description: "Verify request header rule matched" + - expression: "Matched rule: sensitive_response_log" + description: "Verify response log rule matched" + - expression: "Matched rule: sensitive_response_header" + description: "Verify response header rule matched" + - expression: "Matched rule: sensitive_response_block" + description: "Verify response block rule matched" + traffic_out: + contains: + - expression: "Blocking request due to rule" + description: "Verify request blocking action was taken" + - expression: "Added header X-Security-Match" + description: "Verify header was added for request" + # Adding an internal response is not supported while streaming the body. + #- expression: "Added header X-Data-Classification" + # description: "Verify header was added for response" + +sessions: + ############################################################################# + # Test 1: Request log only - pattern logged but request passes through + ############################################################################# + - transactions: + - client-request: + method: "POST" + version: "1.1" + url: /api/data + headers: + fields: + - [Host, request-log.example.com] + - [Content-Type, "application/xml"] + - [Content-Length, 49] + - [uuid, request-log-test] + content: + data: '' + + proxy-request: + method: "POST" + url: /api/data + + server-response: + status: 200 + reason: OK + headers: + fields: + - [Content-Length, 2] + content: + data: "OK" + + proxy-response: + status: 200 + + ############################################################################# + # Test 2: Request block - request with XXE pattern is blocked + # + # When blocking request bodies, ATS closes the connection to the origin and + # the client either experiences simply a closed connection or, depending upon + # timeing, a 502 Bad Gateway response. The plugin cannot send a custom error + # response (like 403) because the request headers have already been sent to + # the origin by the time the body is inspected. + ############################################################################# + - transactions: + - client-request: + method: "POST" + version: "1.1" + url: /api/data + headers: + fields: + - [Host, request-block.example.com] + - [Content-Type, "application/xml+plus_other_stuff"] + - [Content-Length, 49] + - [uuid, request-block-test] + content: + data: '' + + server-response: + status: 200 + reason: OK + headers: + fields: + - [Content-Length, 2] + content: + data: "OK" + + proxy-response: + status: 502 + + ############################################################################# + # Test 3: Request header - request passes, header added to server request + ############################################################################# + - transactions: + - client-request: + method: "POST" + version: "1.1" + url: /api/data + headers: + fields: + - [Host, request-header.example.com] + - [Content-Type, "application/xml"] + - [Content-Length, 24] + - [uuid, request-header-test] + content: + data: '' + + proxy-request: + method: "POST" + url: /api/data + # Note that only internal headers are added since the body is + # inspected after the headers are sent to the origin. So + # don't expect to see any external headers added. + + server-response: + status: 200 + reason: OK + headers: + fields: + - [Content-Length, 2] + content: + data: "OK" + + proxy-response: + status: 200 + + ############################################################################# + # Test 4: Request no match - header mismatch, no body inspection + # Uses block config but with wrong Content-Type, so no inspection occurs + ############################################################################# + - transactions: + - client-request: + method: "POST" + version: "1.1" + url: /api/data + headers: + fields: + - [Host, request-nomatch.example.com] + - [Content-Type, "application/json"] + - [Content-Length, 49] + - [uuid, request-nomatch-test] + content: + data: '' + + proxy-request: + method: "POST" + url: /api/data + + server-response: + status: 200 + reason: OK + headers: + fields: + - [Content-Length, 2] + content: + data: "OK" + + proxy-response: + status: 200 + + ############################################################################# + # Test 5: Response log - detect sensitive data in responses + ############################################################################# + - transactions: + - client-request: + method: "GET" + version: "1.1" + url: /api/user + headers: + fields: + - [Host, response-log.example.com] + - [uuid, response-log-test] + + proxy-request: + method: "GET" + url: /api/user + + server-response: + status: 200 + reason: OK + headers: + fields: + - [Content-Type, "application/json"] + - [Content-Length, 36] + content: + data: '{"name": "John", "SSN: 123-45-6789"}' + + proxy-response: + status: 200 + + ############################################################################# + # Test 6: Response header - detect pattern and attempt header addition + # Note: Response header addition during transforms has timing limitations + ############################################################################# + - transactions: + - client-request: + method: "GET" + version: "1.1" + url: /api/secret + headers: + fields: + - [Host, response-header.example.com] + - [uuid, response-header-test] + + proxy-request: + method: "GET" + url: /api/secret + + server-response: + status: 200 + reason: OK + headers: + fields: + - [Content-Type, "application/json"] + - [Content-Length, 28] + content: + data: '{"data": "secret_data here"}' + + proxy-response: + status: 200 + # Note that only internal headers are added since the body is + # inspected after the headers are sent to the origin. So + # don't expect to see any external headers added. + + ############################################################################# + # Test 7: Response block - detect pattern and attempt blocking + # Note: Response blocking after streaming starts has limitations - the + # response will still return 200 but the body will be blocked. + ############################################################################# + - transactions: + - client-request: + method: "GET" + version: "1.1" + url: /api/blocked + headers: + fields: + - [Host, response-block.example.com] + - [uuid, response-block-test] + + proxy-request: + method: "GET" + url: /api/blocked + + server-response: + status: 200 + reason: OK + headers: + fields: + - [Content-Type, "application/json"] + - [Content-Length, 36] + content: + data: '{"name": "John", "SSN: 123-45-6789"}' + + # Note that blocking happens after the 200 response headers are sent. + proxy-response: + status: 200 diff --git a/tests/gold_tests/pluginTest/filter_body/filter_body.test.py b/tests/gold_tests/pluginTest/filter_body/filter_body.test.py new file mode 100644 index 00000000000..12931b23c4d --- /dev/null +++ b/tests/gold_tests/pluginTest/filter_body/filter_body.test.py @@ -0,0 +1,24 @@ +''' +Verify filter_body plugin for request/response body content filtering. +''' +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +Test.Summary = 'Verify filter_body plugin for request/response body content filtering.' + +Test.SkipUnless(Condition.PluginExists('filter_body.so')) + +Test.ATSReplayTest(replay_file="filter_body.replay.yaml")