From 15c994666043ac541cd6108f8f741191d82c2161 Mon Sep 17 00:00:00 2001 From: Benjamin Kilimnik Date: Wed, 29 Nov 2023 21:21:14 +0000 Subject: [PATCH] Add lazy parsing tests for socket trace bpf and data stream test Signed-off-by: Benjamin Kilimnik --- .../socket_tracer/data_stream_test.cc | 99 +++++++++++++++++++ .../socket_tracer/socket_trace_bpf_test.cc | 23 ++++- 2 files changed, 118 insertions(+), 4 deletions(-) diff --git a/src/stirling/source_connectors/socket_tracer/data_stream_test.cc b/src/stirling/source_connectors/socket_tracer/data_stream_test.cc index 693496df826..4277ca848f1 100644 --- a/src/stirling/source_connectors/socket_tracer/data_stream_test.cc +++ b/src/stirling/source_connectors/socket_tracer/data_stream_test.cc @@ -494,6 +494,105 @@ TEST_F(DataStreamTest, SpikeCapacityWithLargeDataChunkAndSSLEnabled) { 0, SocketTracerMetrics::GetProtocolMetrics(kProtocolHTTP, kSSLNone).data_loss_bytes.Value()); } +// Note that our data loss metric gets skewed under certain circumstances for HTTP when +// lazy implementation is used. When lazy parsing is turned on, but we did not detect an incomplete +// chunk in the contiguous head, the event parser doesn't push a partial frame to avoid masking +// other errors. But the HTTP parser still removes data from the buffer for partial frames, so even +// though the event parser doesn't push partial frames, the offset into the datastream buffer is +// still updated. This causes the data loss metric calculation for num_bytes_advanced +// (data_buffer_.position() - last_processed_pos_) to change. In practice, however, we only +// enable lazy parsing when we detect an incomplete chunk in a contiguous head, so this should +// be a non-issue. +TEST_F(DataStreamTest, SpikeCapacityWithLargeDataChunkAndLazyParsingEnabled) { + // This test simulates a case where the buffer itself should normally only contain 16 bytes + // but for a big response we make an exception and temporarily increase its size + // to 1024 bytes. After the response is parsed, the buffer size should be reduced back to 16 + // bytes. And any existing data will be trimmed down to 16 bytes. + int spike_capacity_bytes = 1024; + int retention_capacity_bytes = 16; + auto buffer_expiry_timestamp = now() - std::chrono::seconds(10000); + DataStream stream(spike_capacity_bytes); + stream.set_protocol(kProtocolHTTP, true); // Turn lazy parsing on + + std::unique_ptr resp0 = event_gen_.InitRecvEvent(kHTTPResp0); + std::unique_ptr resp1 = event_gen_.InitRecvEvent(kHTTPResp0); + // For the partial response resp2, we would normally not be able to parse any of the + // 92 bytes because it is incomplete. Hence these bytes should be sitting in the databuffer until + // cleanup With lazy parsing however, we are able to parse the metadata of the response, or 87 + // bytes, with just 5 bytes leftover in the databuffer (representing the incomplete Content + // "pixie"). So when we reduce spike capacity back to 16 bytes, we have just 5 bytes of data left + // instead of 16 (of the full 92 bytes) for non-lazy parsing. + std::unique_ptr resp2 = + event_gen_.InitRecvEvent(kHTTPIncompleteResp); + // Even with lazy parsing turned on, partial frame won't get pushed unless there's a gap, so we + // add an incomplete chunk info for kHTTPIncompleteResp so that event parsing will push + // partial frame with lazy parsing turned on for HTTP + resp2->attr.bytes_missed = + 95; // gap size is 95 bytes (content length 100, but content contains just 5 bytes "pixie") + resp2->attr.incomplete_chunk = chunk_t::kUnknownGapReason; + + stream.AddData(std::move(resp0)); + stream.AddData(std::move(resp1)); + stream.AddData(std::move(resp2)); + + protocols::http::StateWrapper state{}; + stream.ProcessBytesToFrames(message_type_t::kResponse, &state); + stream.CleanupEvents(retention_capacity_bytes, buffer_expiry_timestamp); + auto frames = stream.Frames(); + EXPECT_THAT(frames[0], SizeIs(3)); // Captured 2 complete frames and 1 partial frame + EXPECT_EQ(stream.data_buffer().size(), + 0); // Flushed out bytes after partial frame, which are cut off by the gap + + // Run ProcessBytesToFrames again to propagate data loss stats. + stream.ProcessBytesToFrames(message_type_t::kResponse, &state); + // We lost just the 5 bytes of the incomplete payload of the HTTP Resp kHTTPIncompleteResp + // ("pixie") + EXPECT_EQ( + 5, SocketTracerMetrics::GetProtocolMetrics(kProtocolHTTP, kSSLNone).data_loss_bytes.Value()); + EXPECT_EQ(0, SocketTracerMetrics::GetProtocolMetrics(kProtocolHTTP, kSSLUnspecified) + .data_loss_bytes.Value()); +} + +TEST_F(DataStreamTest, SpikeCapacityWithLargeDataChunkAndSSLEnabledAndLazyParsingEnabled) { + int spike_capacity_bytes = 1024; + int retention_capacity_bytes = 16; + auto buffer_expiry_timestamp = now() - std::chrono::seconds(10000); + DataStream stream(spike_capacity_bytes); + stream.set_ssl_source(kSSLUnspecified); + stream.set_protocol(kProtocolHTTP, true); // Turn lazy parsing on + + std::unique_ptr resp0 = event_gen_.InitRecvEvent(kHTTPResp0); + std::unique_ptr resp1 = event_gen_.InitRecvEvent(kHTTPResp0); + std::unique_ptr resp2 = + event_gen_.InitRecvEvent(kHTTPIncompleteResp); + // Add an incomplete chunk info for kHTTPIncompleteResp so that event parsing will push + // partial frame with lazy parsing turned on for HTTP + resp2->attr.bytes_missed = + 95; // gap size is 95 bytes (content length 100, but content contains just 5 bytes "pixie") + resp2->attr.incomplete_chunk = chunk_t::kUnknownGapReason; + + stream.AddData(std::move(resp0)); + stream.AddData(std::move(resp1)); + stream.AddData(std::move(resp2)); + + protocols::http::StateWrapper state{}; + stream.ProcessBytesToFrames(message_type_t::kResponse, &state); + stream.CleanupEvents(retention_capacity_bytes, buffer_expiry_timestamp); + auto frames = stream.Frames(); + EXPECT_THAT(frames[0], SizeIs(3)); // Captured 2 complete frames and 1 partial frame + EXPECT_EQ(stream.data_buffer().size(), + 0); // Flushed out bytes after partial frame, which are cut off by the gap + + // Run ProcessBytesToFrames again to propagate data loss stats. + stream.ProcessBytesToFrames(message_type_t::kResponse, &state); + // We lost just the 5 bytes of the incomplete payload of the HTTP Resp kHTTPIncompleteResp + // ("pixie") + EXPECT_EQ(5, SocketTracerMetrics::GetProtocolMetrics(kProtocolHTTP, kSSLUnspecified) + .data_loss_bytes.Value()); + EXPECT_EQ( + 0, SocketTracerMetrics::GetProtocolMetrics(kProtocolHTTP, kSSLNone).data_loss_bytes.Value()); +} + TEST_F(DataStreamTest, ResyncCausesDuplicateEventBug) { // Test to catch regression on a bug. The bug occured when a resync occurs in ParseFrames, leading // to an invalid state where the data stream buffer still had data that had already been diff --git a/src/stirling/source_connectors/socket_tracer/socket_trace_bpf_test.cc b/src/stirling/source_connectors/socket_tracer/socket_trace_bpf_test.cc index 1397d2e0804..14638c764fa 100644 --- a/src/stirling/source_connectors/socket_tracer/socket_trace_bpf_test.cc +++ b/src/stirling/source_connectors/socket_tracer/socket_trace_bpf_test.cc @@ -36,6 +36,7 @@ #include "src/shared/types/types.h" #include "src/stirling/core/data_table.h" #include "src/stirling/source_connectors/socket_tracer/bcc_bpf_intf/socket_trace.hpp" +#include "src/stirling/source_connectors/socket_tracer/common.h" #include "src/stirling/source_connectors/socket_tracer/socket_trace_connector.h" #include "src/stirling/source_connectors/socket_tracer/testing/client_server_system.h" #include "src/stirling/source_connectors/socket_tracer/testing/socket_trace_bpf_test_fixture.h" @@ -519,10 +520,20 @@ TEST_F(SocketTraceBPFTest, LargeMessages) { GetMutableConnTracker(system.ServerPID(), system.ServerFD())); EXPECT_EQ(server_tracker->recv_data().data_buffer().Head(), kHTTPReqMsg1); std::string server_send_data(server_tracker->send_data().data_buffer().Head()); - EXPECT_THAT(server_send_data.size(), 131153); + if (LazyParsingEnabled(traffic_protocol_t::kProtocolHTTP)) { + // TODO(@benkilimnik): This will need updating if we raise our chunk limit. + // with lazy parsing, we do not pad with filler and thus save ourselves from allocating 8273 + // null bytes. Gap reason is kExceededChunkLimitAndMaxMsgSize. + EXPECT_THAT(server_send_data.size(), 122880); + } else { + EXPECT_THAT(server_send_data.size(), 131153); + } EXPECT_THAT(server_send_data, HasSubstr("+++++")); - // We expect filling with \0 bytes. - EXPECT_EQ(server_send_data.substr(server_send_data.size() - 5, 5), ConstStringView("\0\0\0\0\0")); + if (!LazyParsingEnabled(traffic_protocol_t::kProtocolHTTP)) { + // We expect filling with \0 bytes if lazy parsing is disabled. + EXPECT_EQ(server_send_data.substr(server_send_data.size() - 5, 5), + ConstStringView("\0\0\0\0\0")); + } } constexpr std::string_view kHTTPRespMsgHeader = @@ -594,7 +605,11 @@ TEST_F(SocketTraceBPFTest, SendFile) { records[kHTTPRespBodyIdx]->Get(1)}; const std::string kHTTPRespMsgContentAsFiller(kHTTPRespMsgContent.size(), 0); - EXPECT_THAT(responses, Contains(kHTTPRespMsgContentAsFiller)); + // With lazy parsing, we do not pad with filler and thus save ourselves from allocating null + // bytes. Gap reason is kSendFile. + if (!LazyParsingEnabled(traffic_protocol_t::kProtocolHTTP)) { + EXPECT_THAT(responses, Contains(kHTTPRespMsgContentAsFiller)); + } EXPECT_THAT(responses, Contains(kHTTPRespMsgContent)); }