From 1be328613d80e32338b36da6d5472fad320947da Mon Sep 17 00:00:00 2001 From: Melissa Kilby Date: Wed, 26 Apr 2023 14:09:57 +0000 Subject: [PATCH] new(userspace/falco): introduce native support for resource_utilization metrics / stats v2 Signed-off-by: Melissa Kilby --- .../falco/app/actions/process_events.cpp | 17 +- userspace/falco/stats_writer.cpp | 182 +++++++++++++++--- userspace/falco/stats_writer.h | 23 ++- 3 files changed, 189 insertions(+), 33 deletions(-) diff --git a/userspace/falco/app/actions/process_events.cpp b/userspace/falco/app/actions/process_events.cpp index 1ae017ad15d..568d0ebc0ae 100644 --- a/userspace/falco/app/actions/process_events.cpp +++ b/userspace/falco/app/actions/process_events.cpp @@ -275,7 +275,7 @@ static falco::app::run_result do_inspect( } // for capture mode, the source name can change at every event - stats_collector.collect(inspector, inspector->event_sources()[source_engine_idx]); + stats_collector.collect(inspector, inspector->event_sources()[source_engine_idx], num_evts); } else { @@ -292,7 +292,7 @@ static falco::app::run_result do_inspect( } // for live mode, the source name is constant - stats_collector.collect(inspector, source); + stats_collector.collect(inspector, source, num_evts); } // Reset the timeouts counter, Falco successfully got an event to process @@ -398,16 +398,19 @@ static void process_inspector_events( } } -static std::shared_ptr init_stats_writer(const options& opts) +static std::shared_ptr init_stats_writer(const options& opts, std::shared_ptr outputs) { - auto statsw = std::make_shared(); - if (!opts.stats_filename.empty()) + auto statsw = std::make_shared(outputs); + std::string err; + if (opts.stats_interval > 0) { - std::string err; if (!stats_writer::init_ticker(opts.stats_interval, err)) { throw falco_exception(err); } + } + if (!opts.stats_filename.empty()) + { statsw.reset(new stats_writer(opts.stats_filename)); } return statsw; @@ -422,7 +425,7 @@ falco::app::run_result falco::app::actions::process_events(falco::app::state& s) s.engine->complete_rule_loading(); // Initialize stats writer - auto statsw = init_stats_writer(s.options); + auto statsw = init_stats_writer(s.options, s.outputs); if (s.options.dry_run) { diff --git a/userspace/falco/stats_writer.cpp b/userspace/falco/stats_writer.cpp index 578853c69c7..a28ef361f2f 100644 --- a/userspace/falco/stats_writer.cpp +++ b/userspace/falco/stats_writer.cpp @@ -25,7 +25,7 @@ limitations under the License. #include "stats_writer.h" #include "logger.h" #include "banned.h" // This raises a compilation error when certain functions are used -#include "logger.h" +#include "config_falco.h" // note: ticker_t is an uint16_t, which is enough because we don't care about // overflows here. Threads calling stats_writer::handle() will just @@ -67,18 +67,19 @@ stats_writer::ticker_t stats_writer::get_ticker() return s_timer.load(std::memory_order_relaxed); } -stats_writer::stats_writer() - : m_initialized(false), m_total_samples(0) +stats_writer::stats_writer(std::shared_ptr outputs) + : m_initialized(false), m_total_samples(0), m_internal_rule_initialized(true) { - + m_outputs = outputs; } stats_writer::stats_writer(const std::string &filename) - : m_initialized(true), m_total_samples(0) + : m_initialized(true), m_total_samples(0), m_internal_rule_initialized(false) { m_output.exceptions(std::ofstream::failbit | std::ofstream::badbit); m_output.open(filename, std::ios_base::app); m_worker = std::thread(&stats_writer::worker, this); + m_outputs = nullptr; } stats_writer::~stats_writer() @@ -95,6 +96,11 @@ bool stats_writer::has_output() const return m_initialized; } +bool stats_writer::has_internal_rule_output() const +{ + return m_internal_rule_initialized; +} + void stats_writer::stop_worker() { stats_writer::msg msg; @@ -162,39 +168,169 @@ void stats_writer::worker() noexcept } stats_writer::collector::collector(std::shared_ptr writer) - : m_writer(writer), m_last_tick(0), m_samples(0) + : m_writer(writer), m_last_tick(0), m_samples(0), m_last_now(0), m_last_n_evts(0), m_last_n_drops(0), m_last_num_evts(0) +{ +} + + +std::map stats_writer::collector::get_stats_v2_internal_rule(std::shared_ptr inspector, uint64_t num_evts, uint64_t stats_snapshot_time_delta_sec) { + std::map output_fields; + const scap_agent_info* agent_info = inspector->get_agent_info(); + const scap_machine_info* machine_info = inspector->get_machine_info(); + + /* Wrapper fields needed for statistical analyses and attributions. */ + output_fields["falco_version"] = FALCO_VERSION; + output_fields["falco_start_ts"] = std::to_string(agent_info->start_ts_epoch); + output_fields["kernel_release"] = agent_info->uname_r; + output_fields["host_boot_ts"] = std::to_string(machine_info->boot_ts_epoch); + output_fields["hostname"] = machine_info->hostname; /* Explicitly add hostname to log msg in case hostname rule output field is disabled. */ + output_fields["host_num_cpus"] = std::to_string(machine_info->num_cpus); + if(inspector->check_current_engine(BPF_ENGINE)) + { + output_fields["driver"] = "bpf"; /* Falco kernel driver type. */ + } else if(inspector->check_current_engine(MODERN_BPF_ENGINE)) + { + output_fields["driver"] = "modern_bpf"; + } else + { + output_fields["driver"] = "kmod"; + } + + /* Resource utilization, CPU and memory usage etc. */ + + uint32_t nstats; + int32_t rc; + const scap_stats_v2* utilization; + auto buffer = inspector->get_sinsp_stats_v2_buffer(); + utilization = libsinsp::resource_utilization::get_resource_utilization(agent_info, buffer, &nstats, &rc); + if (utilization && rc == 0 && nstats > 0) + { + // todo: support unit convertions for memory metrics + for(uint32_t stat = 0; stat < nstats; stat++) + { + switch(utilization[stat].type) + { + case STATS_VALUE_TYPE_U64: + output_fields[utilization[stat].name] = std::to_string(utilization[stat].value.u64); + break; + case STATS_VALUE_TYPE_U32: + output_fields[utilization[stat].name] = std::to_string(utilization[stat].value.u32); + break; + case STATS_VALUE_TYPE_D: + output_fields[utilization[stat].name] = std::to_string(utilization[stat].value.d); + break; + default: + break; + } + } + } + + /* Kernel side stats counters and libbpf stats if applicable. */ + + if (m_last_num_evts != 0 && stats_snapshot_time_delta_sec > 0) + { + /* Successfully processed userspace events. */ + output_fields["falco_evts_rate_sec"] = std::to_string((num_evts - m_last_num_evts) / stats_snapshot_time_delta_sec); + } + output_fields["falco_num_evts"] = std::to_string(num_evts); + output_fields["falco_num_evts_prev"] = std::to_string(m_last_num_evts); + m_last_num_evts = num_evts; + nstats = 0; + uint32_t flags = 0; + flags |= PPM_SCAP_STATS_KERNEL_COUNTERS; + if (!inspector->check_current_engine(KMOD_ENGINE) && (machine_info->flags & PPM_BPF_STATS_ENABLED)) + { + flags |= PPM_SCAP_STATS_LIBBPF_STATS; + } + const scap_stats_v2* stats_v2 = inspector->get_capture_stats_v2(flags, &nstats, &rc); + if (stats_v2 && nstats > 0 && rc == 0) + { + for(uint32_t stat = 0; stat < nstats; stat++) + { + switch(stats_v2[stat].type) + { + case STATS_VALUE_TYPE_U64: + if (strncmp(stats_v2[stat].name, "n_evts", 6) == 0) + { + output_fields["falco_evts_rate_kernel_sec"] = std::to_string(0); + if (m_last_n_evts != 0 && stats_snapshot_time_delta_sec > 0) + { + /* n_evts is total number of kernel side events. */ + output_fields["falco_evts_rate_kernel_sec"] = std::to_string((stats_v2[stat].value.u64 - m_last_n_evts) / stats_snapshot_time_delta_sec); + } + output_fields["n_evts_prev"] = std::to_string(m_last_n_evts); + m_last_n_evts = stats_v2[stat].value.u64; + } + else if (strncmp(stats_v2[stat].name, "n_drops", 7) == 0) + { + output_fields["falco_evts_drop_rate_kernel_sec"] = std::to_string(0); + if (m_last_n_drops != 0 && stats_snapshot_time_delta_sec > 0) + { + /* n_drops is total number of kernel side event drops. */ + output_fields["falco_evts_drop_rate_kernel_sec"] = std::to_string((stats_v2[stat].value.u64 - m_last_n_evts) / stats_snapshot_time_delta_sec); + } + output_fields["n_drops_prev"] = std::to_string(m_last_n_drops); + m_last_n_drops = stats_v2[stat].value.u64; + } + output_fields[stats_v2[stat].name] = std::to_string(stats_v2[stat].value.u64); + break; + default: + break; + } + } + } + return output_fields; } -void stats_writer::collector::collect(std::shared_ptr inspector, const std::string& src) +void stats_writer::collector::collect(std::shared_ptr inspector, const std::string &src, uint64_t num_evts) { // just skip if no output is configured - if (m_writer->has_output()) + if (m_writer->has_output() || m_writer->has_internal_rule_output()) { // collect stats once per each ticker period auto tick = stats_writer::get_ticker(); if (tick != m_last_tick) { - stats_writer::msg msg; - msg.stop = false; - msg.source = src; - inspector->get_capture_stats(&msg.stats); - m_samples++; - if(m_samples == 1) + if (m_writer->has_internal_rule_output() && m_writer->m_outputs) { - msg.delta = msg.stats; + auto now = std::chrono::duration_cast(std::chrono::system_clock::now().time_since_epoch()).count(); + std::string rule = "Falco internal: resource utilization stats metrics"; + std::string msg = ""; + uint64_t stats_snapshot_time_delta = 0; + if (m_last_now != 0) + { + stats_snapshot_time_delta = now - m_last_now; + } + std::map output_fields = stats_writer::collector::get_stats_v2_internal_rule(inspector, num_evts, (stats_snapshot_time_delta / ONE_SECOND_IN_NS)); + output_fields["evt.time"] = std::to_string(now); /* Some ETLs may prefer a consistent timestamp within output. */ + m_writer->m_outputs->handle_msg(now, falco_common::PRIORITY_DEBUG, msg, rule, output_fields); + m_last_tick = tick; + m_last_now = now; } - else + else if (m_writer->has_output()) { - msg.delta.n_evts = msg.stats.n_evts - m_last_stats.n_evts; - msg.delta.n_drops = msg.stats.n_drops - m_last_stats.n_drops; - msg.delta.n_preemptions = msg.stats.n_preemptions - m_last_stats.n_preemptions; - } + stats_writer::msg msg; + msg.stop = false; + msg.source = src; + inspector->get_capture_stats(&msg.stats); + m_samples++; + if(m_samples == 1) + { + msg.delta = msg.stats; + } + else + { + msg.delta.n_evts = msg.stats.n_evts - m_last_stats.n_evts; + msg.delta.n_drops = msg.stats.n_drops - m_last_stats.n_drops; + msg.delta.n_preemptions = msg.stats.n_preemptions - m_last_stats.n_preemptions; + } - m_last_tick = tick; - m_last_stats = msg.stats; - m_writer->push(msg); + m_last_tick = tick; + m_last_stats = msg.stats; + m_writer->push(msg); + } } } } diff --git a/userspace/falco/stats_writer.h b/userspace/falco/stats_writer.h index 2144f0c5e7f..35034a9703d 100644 --- a/userspace/falco/stats_writer.h +++ b/userspace/falco/stats_writer.h @@ -23,6 +23,7 @@ limitations under the License. #include #include "tbb/concurrent_queue.h" +#include "falco_outputs.h" /*! \brief Writes stats samples collected from inspectors into a given output. @@ -56,13 +57,22 @@ class stats_writer \brief Collects one stats sample from an inspector and for the given event source name */ - void collect(std::shared_ptr inspector, const std::string& src); + void collect(std::shared_ptr inspector, const std::string& src, uint64_t num_evts); + + /*! + \brief Collect snapshot stats v2 formatted as internal rule. + */ + std::map get_stats_v2_internal_rule(std::shared_ptr inspector, uint64_t num_evts, uint64_t stats_snapshot_time_delta_sec); private: std::shared_ptr m_writer; stats_writer::ticker_t m_last_tick; uint64_t m_samples; scap_stats m_last_stats; + uint64_t m_last_now; + uint64_t m_last_n_evts; + uint64_t m_last_n_drops; + uint64_t m_last_num_evts; }; stats_writer(const stats_writer&) = delete; @@ -79,7 +89,7 @@ class stats_writer \brief Initializes a writer without any output. With this constructor, has_output() always returns false */ - stats_writer(); + stats_writer(std::shared_ptr outputs); /*! \brief Initializes a writer that prints to a file at the given filename. @@ -92,6 +102,11 @@ class stats_writer */ inline bool has_output() const; + /*! + \brief Returns true if the writer is configured to emit internal Falco rules + */ + inline bool has_internal_rule_output() const; + /*! \brief Initializes the ticker with a given interval period defined in milliseconds. Subsequent calls to init_ticker will dismiss the @@ -129,7 +144,9 @@ class stats_writer uint64_t m_total_samples; std::thread m_worker; std::ofstream m_output; - tbb::concurrent_bounded_queue m_queue; + tbb::concurrent_bounded_queue m_queue; + bool m_internal_rule_initialized; + std::shared_ptr m_outputs; // note: in this way, only collectors can push into the queue friend class stats_writer::collector;