Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion doc/admin-guide/plugins/traffic_dump.en.rst
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,10 @@ Plugin Configuration

(`required`) - specifies the max disk usage N bytes (approximate). Traffic Dump will stop capturing new sessions once disk usage exceeds this limit.

.. option:: --sensitive-fields <field1,field2,...,fieldn>

(`optional`) - a comma seperatated list of HTTP case-insensitive field names whose values are considered sensitive information. Traffic Dump will not dump the incoming field values for any of these fields but will instead dump a generic value for them of the same length as the original. If this option is not used, a default list of "Cookie,Set-Cookie" is used. Providing this option overwrites that default list with whatever values the user provides. Pass a quoted empty string as the argument to specify that no fields are sensitive,

``traffic_ctl`` <command>
* ``traffic_ctl plugin msg traffic_dump.sample N`` - changes the sampling ratio N as mentioned above.
* ``traffic_ctl plugin msg traffic_dump.reset`` - resets the disk usage counter.
Expand All @@ -57,7 +61,7 @@ This format contains traffic data including:

* Each session and transactions in the session.
* Timestamps.
* The four headers (ua request, proxy request, origin server response, proxy response).
* The four sets of headers (user agent request, proxy request, origin server response, proxy response).
* The protocol stack for the user agent.
* The transaction count for the outbound session.
* The content block sizes.
Expand Down
140 changes: 134 additions & 6 deletions plugins/experimental/traffic_dump/traffic_dump.cc
Original file line number Diff line number Diff line change
Expand Up @@ -32,20 +32,59 @@
#include <arpa/inet.h>
#include <netinet/in.h>

#include <algorithm>
#include <sstream>
#include <iomanip>
#include <chrono>
#include <atomic>
#include <string>
#include <string_view>
#include <unordered_set>

#include "tscore/ts_file.h"
#include "tscpp/util/TextView.h"
#include "ts/ts.h"

namespace
{
const char *PLUGIN_NAME = "traffic_dump";
const std::string closing = "]}]}";
std::string defaut_sensitive_field_value;

// A case-insensitive comparitor used for comparing HTTP field names.
struct InsensitiveCompare {
bool
operator()(std::string_view a, std::string_view b) const
{
return strcasecmp(a, b) == 0;
}
};

struct StringHashByLower {
public:
size_t
operator()(const std::string &str) const
{
std::string lower;
std::transform(str.begin(), str.end(), lower.begin(), [](unsigned char c) -> unsigned char { return std::tolower(c); });
return std::hash<std::string>()(lower);
}
};

/// Fields considered sensitive because they may contain user-private
/// information. These fields are replaced with auto-generated generic content
/// by default. To turn off this behavior, the user should add the
/// --promiscuous-mode flag as a commandline argument.
///
/// While these are specified with case, they are matched case-insensitively.
std::unordered_set<std::string, StringHashByLower, InsensitiveCompare> default_sensitive_fields = {
"Set-Cookie",
"Cookie",
};

/// The set of fields, default and user-specified, that are sensitive and whose
/// values will be replaced with auto-generated generic content.
std::unordered_set<std::string, StringHashByLower, InsensitiveCompare> sensitive_fields;

ts::file::path log_path{"dump"}; // default log directory
int s_arg_idx = 0; // Session Arg Index to pass on session data
Expand Down Expand Up @@ -183,10 +222,10 @@ esc_json_out(const char *buf, int64_t len, std::ostream &jsonfile)

/// escape_json(): escape chars in a string and returns json string
std::string
escape_json(std::string const &s)
escape_json(std::string_view s)
{
std::ostringstream o;
esc_json_out(s.c_str(), s.length(), o);
esc_json_out(s.data(), s.length(), o);
return o.str();
}
std::string
Expand All @@ -205,9 +244,9 @@ json_entry(std::string const &name, const char *value, int64_t size)

/// json_entry_array(): Formats to array-style entry i.e. ["field","value"]
inline std::string
json_entry_array(const char *name, int name_len, const char *value, int value_len)
json_entry_array(std::string_view name, std::string_view value)
{
return "[\"" + escape_json(name, name_len) + "\", \"" + escape_json(value, value_len) + "\"]";
return "[\"" + escape_json(name) + "\", \"" + escape_json(value) + "\"]";
}

/** Remove the scheme prefix from the url.
Expand Down Expand Up @@ -236,6 +275,49 @@ write_content_node(int64_t num_body_bytes)
return std::string(R"(,"content":{"encoding":"plain","size":)" + std::to_string(num_body_bytes) + '}');
}

/** Initialize the generic sensitive field to be dumped. This is used instead
* of the sensitive field values seen on the wire.
*/
void
initialize_default_sensitive_field()
{
// 128 KB is the maximum size supported for all headers, so this size should
// be plenty large for our needs.
constexpr size_t default_field_size = 128 * 1024;
defaut_sensitive_field_value.resize(default_field_size);

char *field_buffer = defaut_sensitive_field_value.data();
for (auto i = 0u; i < default_field_size; i += 8) {
sprintf(field_buffer, "%07x ", i / 8);
field_buffer += 8;
}
}

/** Inspect the field to see whether it is sensitive and return a generic value
* of equal size to the original if it is.
*
* @param[in] name The field name to inspect.
* @param[in] original_value The field value to inspect.
*
* @return The value traffic_dump should dump for the given field.
*/
std::string_view
replace_sensitive_fields(std::string_view name, std::string_view original_value)
{
auto search = sensitive_fields.find(std::string(name));
if (search == sensitive_fields.end()) {
return original_value;
}
auto new_value_size = original_value.size();
if (original_value.size() > defaut_sensitive_field_value.size()) {
new_value_size = defaut_sensitive_field_value.size();
TSError("[%s] Encountered a sensitive field value larger than our default "
"field size. Default size: %zu, incoming field size: %zu",
PLUGIN_NAME, defaut_sensitive_field_value.size(), original_value.size());
}
return std::string_view{defaut_sensitive_field_value.data(), new_value_size};
}

/// Read the txn information from TSMBuffer and write the header information.
/// This function does not write the content node.
std::string
Expand Down Expand Up @@ -302,8 +384,11 @@ write_message_node_no_content(TSMBuffer &buffer, TSMLoc &hdr_loc)
int name_len = 0, value_len = 0;
// Append to "fields" list if valid value exists
if ((name = TSMimeHdrFieldNameGet(buffer, hdr_loc, field_loc, &name_len)) && name_len) {
std::string_view name_view{name, static_cast<size_t>(name_len)};
value = TSMimeHdrFieldValueStringGet(buffer, hdr_loc, field_loc, -1, &value_len);
result += json_entry_array(name, name_len, value, value_len);
std::string_view value_view{value, static_cast<size_t>(value_len)};
std::string_view new_value = replace_sensitive_fields(name_view, value_view);
result += json_entry_array(name_view, new_value);
}

next_field_loc = TSMimeHdrFieldNext(buffer, hdr_loc, field_loc);
Expand Down Expand Up @@ -391,6 +476,7 @@ session_txn_handler(TSCont contp, TSEvent event, void *edata)
// Get UUID
char uuid[TS_CRUUID_STRING_LEN + 1];
TSAssert(TS_SUCCESS == TSClientRequestUuidGet(txnp, uuid));
std::string_view uuid_view{uuid, strnlen(uuid, TS_CRUUID_STRING_LEN)};

// Generate per transaction json records
if (!ssnData->first) {
Expand All @@ -408,7 +494,7 @@ session_txn_handler(TSCont contp, TSEvent event, void *edata)
// The uuid is a header field for each message in the transaction. Use the
// "all" node to apply to each message.
std::string_view name = "uuid";
txn_info += ",\"all\":{\"headers\":{\"fields\":[" + json_entry_array(name.data(), name.size(), uuid, strlen(uuid));
txn_info += ",\"all\":{\"headers\":{\"fields\":[" + json_entry_array(name, uuid_view);
txn_info += "]}}";
ssnData->write_to_disk(txn_info);
break;
Expand Down Expand Up @@ -633,15 +719,38 @@ TSPluginInit(int argc, const char *argv[])
info.vendor_name = "Apache Software Foundation";
info.support_email = "dev@trafficserver.apache.org";

bool sensitive_fields_were_specified = false;
/// Commandline options
static const struct option longopts[] = {{"logdir", required_argument, nullptr, 'l'},
{"sample", required_argument, nullptr, 's'},
{"limit", required_argument, nullptr, 'm'},
{"sensitive-fields", required_argument, nullptr, 'f'},
{nullptr, no_argument, nullptr, 0}};
int opt = 0;
while (opt >= 0) {
opt = getopt_long(argc, const_cast<char *const *>(argv), "l:", longopts, nullptr);
switch (opt) {
case 'f': {
// --sensitive-fields takes a comma-separated list of HTTP fields that
// are sensitive. The field values for these fields will be replaced
// with generic traffic_dump generated data.
//
// If this option is not used, then the default values in
// default_sensitive_fields is used. If this option is used, then it
// replaced the default sensitive fields with the user-supplied list of
// sensitive fields.
sensitive_fields_were_specified = true;
ts::TextView input_filter_fields{std::string_view{optarg}};
ts::TextView filter_field;
while (!(filter_field = input_filter_fields.take_prefix_at(',')).empty()) {
filter_field.trim_if(&isspace);
if (filter_field.empty()) {
continue;
}
sensitive_fields.emplace(filter_field);
}
break;
}
case 'l': {
log_path = ts::file::path{optarg};
break;
Expand All @@ -664,6 +773,23 @@ TSPluginInit(int argc, const char *argv[])
}
}

if (!sensitive_fields_were_specified) {
// The user did not provide their own list of sensitive fields. Use the
// default.
sensitive_fields.merge(default_sensitive_fields);
}

std::string sensitive_fields_string;
bool is_first = true;
for (const auto &field : sensitive_fields) {
if (!is_first) {
sensitive_fields_string += ", ";
}
is_first = false;
sensitive_fields_string += field;
}
TSDebug(PLUGIN_NAME, "Sensitive fields for which generic values will be dumped: %s", sensitive_fields_string.c_str());

// Make absolute path if not
if (!log_path.is_absolute()) {
log_path = ts::file::path(TSInstallDirGet()) / log_path;
Expand All @@ -675,6 +801,8 @@ TSPluginInit(int argc, const char *argv[])
} else if (TS_SUCCESS != TSUserArgIndexReserve(TS_USER_ARGS_SSN, PLUGIN_NAME, "Track log related data", &s_arg_idx)) {
TSError("[%s] Unable to initialize plugin (disabled). Failed to reserve ssn arg.", PLUGIN_NAME);
} else {
initialize_default_sensitive_field();

/// Add global hooks
TSCont ssncont = TSContCreate(global_ssn_handler, nullptr);
TSHttpHookAdd(TS_HTTP_SSN_START_HOOK, ssncont);
Expand Down
1 change: 1 addition & 0 deletions tests/gold_tests/pluginTest/traffic_dump/gold/200.gold
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
``
< HTTP/1.1 200 OK
< Content-Length: 0
< Set-Cookie: classified_not_for_logging
< Date: ``
< Age: ``
< Server: ATS/``
Expand Down
46 changes: 31 additions & 15 deletions tests/gold_tests/pluginTest/traffic_dump/traffic_dump.test.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,14 +33,16 @@
"Host: www.example.com\r\nContent-Length: 0\r\n\r\n",
"timestamp": "1469733493.993", "body": ""}
response_header = {"headers": "HTTP/1.1 200 OK"
"\r\nConnection: close\r\nContent-Length: 0\r\n\r\n",
"\r\nConnection: close\r\nContent-Length: 0"
"\r\nSet-Cookie: classified_not_for_logging\r\n\r\n",
"timestamp": "1469733493.993", "body": ""}
server.addResponse("sessionfile.log", request_header, response_header)
request_header = {"headers": "GET /one HTTP/1.1\r\n"
"Host: www.example.com\r\nContent-Length: 0\r\n\r\n",
"timestamp": "1469733493.993", "body": ""}
response_header = {"headers": "HTTP/1.1 200 OK"
"\r\nConnection: close\r\nContent-Length: 0\r\n\r\n",
"\r\nConnection: close\r\nContent-Length: 0"
"\r\nSet-Cookie: classified_not_for_logging\r\n\r\n",
"timestamp": "1469733493.993", "body": ""}
server.addResponse("sessionfile.log", request_header, response_header)
request_header = {"headers": "GET /post_with_body HTTP/1.1\r\n"
Expand All @@ -63,7 +65,8 @@
)
# Configure traffic_dump.
ts.Disk.plugin_config.AddLine(
'traffic_dump.so --logdir {0} --sample 1 --limit 1000000000'.format(replay_dir)
'traffic_dump.so --logdir {0} --sample 1 --limit 1000000000 '
'--sensitive-fields "cookie,set-cookie,x-request-1,x-request-2"'.format(replay_dir)
)

# Set up trafficserver expectations.
Expand Down Expand Up @@ -99,17 +102,21 @@

tr.Processes.Default.StartBefore(server, ready=When.PortOpen(server.Variables.Port))
tr.Processes.Default.StartBefore(Test.Processes.ts)
tr.Processes.Default.Command = 'curl http://127.0.0.1:{0} -H\'Host: www.example.com\' --verbose'.format(
ts.Variables.port)
tr.Processes.Default.Command = \
('curl http://127.0.0.1:{0} -H"Cookie: donotlogthis" '
'-H"Host: www.example.com" -H"X-Request-1: ultra_sensitive" --verbose'.format(
ts.Variables.port))
tr.Processes.Default.ReturnCode = 0
tr.Processes.Default.Streams.stderr = "gold/200.gold"
tr.StillRunningAfter = server
tr.StillRunningAfter = ts

# Execute the second transaction.
tr = Test.AddTestRun("Second transaction")
tr.Processes.Default.Command = 'curl http://127.0.0.1:{0}/one -H\'Host: www.example.com\' --verbose'.format(
ts.Variables.port)
tr.Processes.Default.Command = \
('curl http://127.0.0.1:{0}/one -H"Host: www.example.com" '
'-H"X-Request-2: also_very_sensitive" --verbose'.format(
ts.Variables.port))
tr.Processes.Default.ReturnCode = 0
tr.Processes.Default.Streams.stderr = "gold/200.gold"
tr.StillRunningAfter = server
Expand All @@ -118,22 +125,29 @@
# Verify the properties of the replay file for the first transaction.
tr = Test.AddTestRun("Verify the json content of the first session")
verify_replay = "verify_replay.py"
sensitive_fields_arg = (
"--sensitive-fields cookie "
"--sensitive-fields set-cookie "
"--sensitive-fields x-request-1 "
"--sensitive-fields x-request-2 ")
tr.Setup.CopyAs(verify_replay, Test.RunDirectory)
tr.Processes.Default.Command = "python3 {0} {1} {2}".format(
tr.Processes.Default.Command = "python3 {0} {1} {2} {3}".format(
verify_replay,
os.path.join(Test.Variables.AtsTestToolsDir, 'lib', 'replay_schema.json'),
replay_file_session_1)
replay_file_session_1,
sensitive_fields_arg)
tr.Processes.Default.ReturnCode = 0
tr.StillRunningAfter = server
tr.StillRunningAfter = ts

# Verify the properties of the replay file for the second transaction.
tr = Test.AddTestRun("Verify the json content of the second session")
tr.Setup.CopyAs(verify_replay, Test.RunDirectory)
tr.Processes.Default.Command = "python3 {0} {1} {2} --request-target '/one'".format(
tr.Processes.Default.Command = "python3 {0} {1} {2} {3} --request-target '/one'".format(
verify_replay,
os.path.join(Test.Variables.AtsTestToolsDir, 'lib', 'replay_schema.json'),
replay_file_session_2)
replay_file_session_2,
sensitive_fields_arg)
tr.Processes.Default.ReturnCode = 0
tr.StillRunningAfter = server
tr.StillRunningAfter = ts
Expand All @@ -147,7 +161,7 @@
request_target = "http://localhost:{0}/candy".format(ts.Variables.port)
tr.Processes.Default.Command = (
'curl --request-target "{0}" '
'http://127.0.0.1:{1} -H\'Host: www.example.com\' --verbose'.format(
'http://127.0.0.1:{1} -H"Host: www.example.com" --verbose'.format(
request_target, ts.Variables.port))
tr.Processes.Default.ReturnCode = 0
tr.Processes.Default.Streams.stderr = "gold/explicit_target.gold"
Expand All @@ -157,10 +171,11 @@
tr = Test.AddTestRun("Verify the replay file has the explicit target.")
tr.Setup.CopyAs(verify_replay, Test.RunDirectory)

tr.Processes.Default.Command = "python3 {0} {1} {2} --request-target '{3}'".format(
tr.Processes.Default.Command = "python3 {0} {1} {2} {3} --request-target '{4}'".format(
verify_replay,
os.path.join(Test.Variables.AtsTestToolsDir, 'lib', 'replay_schema.json'),
replay_file_session_3,
sensitive_fields_arg,
request_target)
tr.Processes.Default.ReturnCode = 0
tr.StillRunningAfter = server
Expand All @@ -178,7 +193,7 @@
# in the test run directory.
tr.Processes.Default.Command = (
'curl --data-binary @{0} --request-target "{1}" '
'http://127.0.0.1:{2} -H\'Host: www.example.com\' --verbose'.format(
'http://127.0.0.1:{2} -H"Host: www.example.com" --verbose'.format(
verify_replay, request_target, ts.Variables.port))
tr.Processes.Default.ReturnCode = 0
tr.Processes.Default.Streams.stderr = "gold/post_with_body.gold"
Expand All @@ -190,10 +205,11 @@

size_of_verify_replay_file = os.path.getsize(os.path.join(Test.TestDirectory, verify_replay))
tr.Processes.Default.Command = \
"python3 {0} {1} {2} --client-request-size {3}".format(
"python3 {0} {1} {2} {3} --client-request-size {4}".format(
verify_replay,
os.path.join(Test.Variables.AtsTestToolsDir, 'lib', 'replay_schema.json'),
replay_file_session_4,
sensitive_fields_arg,
size_of_verify_replay_file)
tr.Processes.Default.ReturnCode = 0
tr.StillRunningAfter = server
Expand Down
Loading