From a6d8cdbca877e652cc88ed78c8815e850607183d Mon Sep 17 00:00:00 2001 From: Adrian Serrano Date: Thu, 24 Jun 2021 15:15:29 +0200 Subject: [PATCH 1/7] Add Anomali ThreatStream support to threatintel module (#26350) * Add Anomali ThreatStream support to threatintel module This adds a new dataset, `anomalithreatstream` to the threatintel module. It allows to ingest indicators from Anomali ThreatStream Integrator via a custom SDK output. --- CHANGELOG.next.asciidoc | 1 + filebeat/docs/fields.asciidoc | 185 + filebeat/docs/modules/threatintel.asciidoc | 104 +- filebeat/tests/system/test_modules.py | 1 + x-pack/filebeat/filebeat.reference.yml | 25 + .../module/threatintel/_meta/config.yml | 25 + .../module/threatintel/_meta/docs.asciidoc | 104 +- .../anomalithreatstream/_meta/fields.yml | 103 + .../anomalithreatstream/config/config.yml | 58 + .../anomalithreatstream/ingest/pipeline.yml | 396 ++ .../anomalithreatstream/manifest.yml | 20 + .../anomalithreatstream/test/generated.log | 100 + .../test/generated.log-expected.json | 4125 +++++++++++++++++ x-pack/filebeat/module/threatintel/fields.go | 2 +- .../modules.d/threatintel.yml.disabled | 25 + 15 files changed, 5261 insertions(+), 13 deletions(-) create mode 100644 x-pack/filebeat/module/threatintel/anomalithreatstream/_meta/fields.yml create mode 100644 x-pack/filebeat/module/threatintel/anomalithreatstream/config/config.yml create mode 100644 x-pack/filebeat/module/threatintel/anomalithreatstream/ingest/pipeline.yml create mode 100644 x-pack/filebeat/module/threatintel/anomalithreatstream/manifest.yml create mode 100644 x-pack/filebeat/module/threatintel/anomalithreatstream/test/generated.log create mode 100644 x-pack/filebeat/module/threatintel/anomalithreatstream/test/generated.log-expected.json diff --git a/CHANGELOG.next.asciidoc b/CHANGELOG.next.asciidoc index 8fd35de6166f..1f158e375314 100644 --- a/CHANGELOG.next.asciidoc +++ b/CHANGELOG.next.asciidoc @@ -826,6 +826,7 @@ https://github.com/elastic/beats/compare/v7.0.0-alpha2...master[Check the HEAD d - Add `include_s3_metadata` config option to the `aws-s3` input for including object metadata in events. {pull}26267[26267] - RFC 5424 and UNIX socket support in the Syslog input are now GA {pull}26293[26293] - Update grok patterns for HA Proxy module {issue}25827[25827] {pull}25835[25835] +- Added dataset `anomalithreatstream` to the `threatintel` module to ingest indicators from Anomali ThreatStream {pull}26350[26350] *Heartbeat* diff --git a/filebeat/docs/fields.asciidoc b/filebeat/docs/fields.asciidoc index eacdb5df8c05..7bcfbb6d16ed 100644 --- a/filebeat/docs/fields.asciidoc +++ b/filebeat/docs/fields.asciidoc @@ -152754,6 +152754,191 @@ type: keyword The STIX reference object. +type: keyword + +-- + +[float] +=== anomalithreatstream + +Fields for Anomali ThreatStream + + + +*`threatintel.anomalithreatstream.classification`*:: ++ +-- +Indicates whether an indicator is private or from a public feed and available publicly. Possible values: private, public. + + +type: keyword + +example: private + +-- + +*`threatintel.anomalithreatstream.confidence`*:: ++ +-- +The measure of the accuracy (from 0 to 100) assigned by ThreatStream's predictive analytics technology to indicators. + + +type: short + +-- + +*`threatintel.anomalithreatstream.detail2`*:: ++ +-- +Detail text for indicator. + + +type: text + +example: Imported by user 42. + +-- + +*`threatintel.anomalithreatstream.id`*:: ++ +-- +The ID of the indicator. + + +type: keyword + +-- + +*`threatintel.anomalithreatstream.import_session_id`*:: ++ +-- +ID of the import session that created the indicator on ThreatStream. + + +type: keyword + +-- + +*`threatintel.anomalithreatstream.itype`*:: ++ +-- +Indicator type. Possible values: "apt_domain", "apt_email", "apt_ip", "apt_url", "bot_ip", "c2_domain", "c2_ip", "c2_url", "i2p_ip", "mal_domain", "mal_email", "mal_ip", "mal_md5", "mal_url", "parked_ip", "phish_email", "phish_ip", "phish_url", "scan_ip", "spam_domain", "ssh_ip", "suspicious_domain", "tor_ip" and "torrent_tracker_url". + + +type: keyword + +-- + +*`threatintel.anomalithreatstream.maltype`*:: ++ +-- +Information regarding a malware family, a CVE ID, or another attack or threat, associated with the indicator. + + +type: wildcard + +-- + +*`threatintel.anomalithreatstream.md5`*:: ++ +-- +Hash for the indicator. + + +type: keyword + +-- + +*`threatintel.anomalithreatstream.resource_uri`*:: ++ +-- +Relative URI for the indicator details. + + +type: keyword + +-- + +*`threatintel.anomalithreatstream.severity`*:: ++ +-- +Criticality associated with the threat feed that supplied the indicator. Possible values: low, medium, high, very-high. + + +type: keyword + +-- + +*`threatintel.anomalithreatstream.source`*:: ++ +-- +Source for the indicator. + + +type: keyword + +example: Analyst + +-- + +*`threatintel.anomalithreatstream.source_feed_id`*:: ++ +-- +ID for the integrator source. + + +type: keyword + +-- + +*`threatintel.anomalithreatstream.state`*:: ++ +-- +State for this indicator. + + +type: keyword + +example: active + +-- + +*`threatintel.anomalithreatstream.trusted_circle_ids`*:: ++ +-- +ID of the trusted circle that imported the indicator. + + +type: keyword + +-- + +*`threatintel.anomalithreatstream.update_id`*:: ++ +-- +Update ID. + + +type: keyword + +-- + +*`threatintel.anomalithreatstream.url`*:: ++ +-- +URL for the indicator. + + +type: keyword + +-- + +*`threatintel.anomalithreatstream.value_type`*:: ++ +-- +Data type of the indicator. Possible values: ip, domain, url, email, md5. + + type: keyword -- diff --git a/filebeat/docs/modules/threatintel.asciidoc b/filebeat/docs/modules/threatintel.asciidoc index dad52d14c929..f39a9377fb1d 100644 --- a/filebeat/docs/modules/threatintel.asciidoc +++ b/filebeat/docs/modules/threatintel.asciidoc @@ -22,15 +22,17 @@ fields. The available filesets are: -* `abuseurl`: Supports gathering URL entities from Abuse.ch. -* `abusemalware`: Supports gathering Malware/Payload entities from Abuse.ch. -* `misp`: Supports gathering threat intel attributes from MISP (replaces MISP module). -* `malwarebazaar`: Supports gathering Malware/Payload entities from Malware Bazaar. -* `otx`: Supports gathering threat intel attributes from AlientVault OTX. -* `anomali`: Supports gathering threat intel attributes from Anomali. +* <>: Supports gathering URL entities from Abuse.ch. +* <>: Supports gathering Malware/Payload entities from Abuse.ch. +* <>: Supports gathering threat intel attributes from MISP (replaces MISP module). +* <>: Supports gathering Malware/Payload entities from Malware Bazaar. +* <>: Supports gathering threat intel attributes from AlientVault OTX. +* <>: Supports gathering threat intel attributes from Anomali Limo. +* <>: Supports gathering threat intel attributes from Anomali ThreatStream. include::../include/gs-link.asciidoc[] +[[abuseurl]] [float] ==== `abuseurl` fileset settings @@ -70,6 +72,7 @@ Abuse.ch URL Threat Intel is mapped to the following ECS fields. | host | threatintel.indicator.ip/domain |============================================================== +[[abusemalware]] [float] ==== `abusemalware` fileset settings @@ -109,6 +112,7 @@ Abuse.ch Malware Threat Intel is mapped to the following ECS fields. | file_size | threatintel.indicator.file.size |================================================================ +[[malwarebazaar]] [float] ==== `malwarebazaar` fileset settings @@ -163,6 +167,7 @@ Malware Bazaar Threat Intel is mapped to the following ECS fields. | code_sign.serial_number | threatintel.indicator.file.x509.serial_number |================================================================ +[[misp]] [float] ==== `misp` fileset settings @@ -240,6 +245,7 @@ MISP Threat Intel is mapped to the following ECS fields. `misp.value` is mapped to the appropriate field dependent on attribute type. +[[otx]] [float] ==== `otx` fileset settings @@ -315,6 +321,7 @@ OTX Threat Intel is mapped to the following ECS fields. `otx.indicator` is mapped to the appropriate field dependent on attribute type. +[[anomali]] [float] ==== `anomali` fileset settings @@ -396,6 +403,91 @@ Anomali Threat Intel is mapped to the following ECS fields. `anomali.pattern` is mapped to the appropriate field dependent on attribute type. +[[anomalithreatstream]] +[float] +==== `anomalithreatstream` fileset settings + +To configure the ThreatStream integration you first need to define an output +in the Anomali ThreatStream Integrator using the Elastic SDK provided by Anomali. +It will deliver indicators via HTTP or HTTPS to a Filebeat instance running as +a server. + +Configure an Integrator output with the following settings: + +* Indicator Filter: `*` (or use any desired filter). +* SDK Executable Command: `/path/to/python /path/to/anomali-sdk/main.py`. + Adjust the paths to the python executable and the directory where the Elastic SDK + has been unpacked. +* Metadata in JSON Format: `{"url": "https://filebeat:8080/", "server_certificate": "/path/to/cert.pem", "secret": "my secret"}`. + - `url`: Use the host and port where Filebeat will be running, and `http` or `https` accordingly. + - `server_certificate`: If using HTTPS, absolute path to the server certificate. Otherwise don't set + this field. + - `secret`: A shared secret string to authenticate messages between the SDK and Filebeat. + +Then configure the `anomalithreatstream` fileset in Filebeat accordingly: +[source,yaml] +---- +- module: threatintel + anomalithreatstream: + enabled: true + var.input: http_endpoint + var.listen_address: 0.0.0.0 # Listen on all interfaces. + var.listen_port: 8080 + var.secret: 'my secret' + var.ssl_certificate: path/to/server_ssl_cert.pem + var.ssl_key: path/to/ssl_key.pem +---- + +*`var.listen_address`*:: + +Local address to bind the HTTP server to. Use `0.0.0.0` to accept connections +from all interfaces. + +*`var.listen_port`*:: + +Port number to use for the HTTP server. + +*`var.secret`*:: + +Shared secret between the SDK and Filebeat, used to authenticate messages. + +*`var.ssl_certificate`*:: + +Path to the public SSL certificate for the HTTPS server. If unset, Filebeat +will use unsecure HTTP connections. + +*`var.ssl_key`*:: + +Path to the certificate's private key. + +Anomali ThreatStream fields are mapped to the following ECS fields: + +[options="header"] +|============================================================= +| ThreatStream fields | ECS Fields +| asn | threatintel.indicator.as.number +| classification<> | threatintel.indicator.marking.tlp +| confidence<> | threatintel.indicator.confidence +| country | threatintel.indicator.geo.country_iso_code +| date_first | threatintel.indicator.first_seen +| date_last | threatintel.indicator.last_seen +| detail | tags +| domain | threatintel.indicator.domain +| email | threatintel.indicator.email.address +| itype<> | threatintel.indicator.type +| lat | threatintel.indicator.geo.location.lat +| lon | threatintel.indicator.geo.location.lon +| md5 | threatintel.indicator.file.hash +| org | threatintel.indicator.as.organization.name +| severity<> | event.severity +| source | threatintel.indicator.provider +| srcip | threatintel.indicator.ip +| url | threatintel.indicator.url.original +|============================================================= + +[[a]] +[small]#[1]: Field is used to derive a value for the ECS field but its original value is kept under `threatintel.anomalithreatstream`.# + :has-dashboards!: [float] diff --git a/filebeat/tests/system/test_modules.py b/filebeat/tests/system/test_modules.py index 3b2449bc1c1a..fa8507a5952b 100644 --- a/filebeat/tests/system/test_modules.py +++ b/filebeat/tests/system/test_modules.py @@ -277,6 +277,7 @@ def clean_keys(obj): "threatintel.abuseurl", "threatintel.abusemalware", "threatintel.anomali", + "threatintel.anomalithreatstream", "threatintel.malwarebazaar", "snyk.vulnerabilities", "snyk.audit", diff --git a/x-pack/filebeat/filebeat.reference.yml b/x-pack/filebeat/filebeat.reference.yml index eb54d2f62b90..e90809b1503c 100644 --- a/x-pack/filebeat/filebeat.reference.yml +++ b/x-pack/filebeat/filebeat.reference.yml @@ -2249,6 +2249,31 @@ filebeat.modules: # The interval to poll the API for updates var.interval: 5m + anomalithreatstream: + enabled: true + + # Input used for ingesting threat intel data + var.input: http_endpoint + + # Address to bind to in order to receive HTTP requests + # from the Integrator SDK. Use 0.0.0.0 to bind to all + # existing interfaces. + var.listen_address: localhost + + # Port to use to receive HTTP requests from the + # Integrator SDK. + var.listen_port: 8080 + + # Secret key to authenticate requests from the SDK. + var.secret: '' + + # Uncomment the following and set the absolute paths + # to the server SSL certificate and private key to + # enable HTTPS secure connections. + # + # var.ssl_certificate: path/to/server_ssl_cert.pem + # var.ssl_key: path/to/ssl_key.pem + #---------------------------- Apache Tomcat Module ---------------------------- - module: tomcat log: diff --git a/x-pack/filebeat/module/threatintel/_meta/config.yml b/x-pack/filebeat/module/threatintel/_meta/config.yml index 9a78adca0aea..ce5b52714723 100644 --- a/x-pack/filebeat/module/threatintel/_meta/config.yml +++ b/x-pack/filebeat/module/threatintel/_meta/config.yml @@ -112,3 +112,28 @@ # The interval to poll the API for updates var.interval: 5m + + anomalithreatstream: + enabled: true + + # Input used for ingesting threat intel data + var.input: http_endpoint + + # Address to bind to in order to receive HTTP requests + # from the Integrator SDK. Use 0.0.0.0 to bind to all + # existing interfaces. + var.listen_address: localhost + + # Port to use to receive HTTP requests from the + # Integrator SDK. + var.listen_port: 8080 + + # Secret key to authenticate requests from the SDK. + var.secret: '' + + # Uncomment the following and set the absolute paths + # to the server SSL certificate and private key to + # enable HTTPS secure connections. + # + # var.ssl_certificate: path/to/server_ssl_cert.pem + # var.ssl_key: path/to/ssl_key.pem diff --git a/x-pack/filebeat/module/threatintel/_meta/docs.asciidoc b/x-pack/filebeat/module/threatintel/_meta/docs.asciidoc index 43619f5394b5..177f52646011 100644 --- a/x-pack/filebeat/module/threatintel/_meta/docs.asciidoc +++ b/x-pack/filebeat/module/threatintel/_meta/docs.asciidoc @@ -17,15 +17,17 @@ fields. The available filesets are: -* `abuseurl`: Supports gathering URL entities from Abuse.ch. -* `abusemalware`: Supports gathering Malware/Payload entities from Abuse.ch. -* `misp`: Supports gathering threat intel attributes from MISP (replaces MISP module). -* `malwarebazaar`: Supports gathering Malware/Payload entities from Malware Bazaar. -* `otx`: Supports gathering threat intel attributes from AlientVault OTX. -* `anomali`: Supports gathering threat intel attributes from Anomali. +* <>: Supports gathering URL entities from Abuse.ch. +* <>: Supports gathering Malware/Payload entities from Abuse.ch. +* <>: Supports gathering threat intel attributes from MISP (replaces MISP module). +* <>: Supports gathering Malware/Payload entities from Malware Bazaar. +* <>: Supports gathering threat intel attributes from AlientVault OTX. +* <>: Supports gathering threat intel attributes from Anomali Limo. +* <>: Supports gathering threat intel attributes from Anomali ThreatStream. include::../include/gs-link.asciidoc[] +[[abuseurl]] [float] ==== `abuseurl` fileset settings @@ -65,6 +67,7 @@ Abuse.ch URL Threat Intel is mapped to the following ECS fields. | host | threatintel.indicator.ip/domain |============================================================== +[[abusemalware]] [float] ==== `abusemalware` fileset settings @@ -104,6 +107,7 @@ Abuse.ch Malware Threat Intel is mapped to the following ECS fields. | file_size | threatintel.indicator.file.size |================================================================ +[[malwarebazaar]] [float] ==== `malwarebazaar` fileset settings @@ -158,6 +162,7 @@ Malware Bazaar Threat Intel is mapped to the following ECS fields. | code_sign.serial_number | threatintel.indicator.file.x509.serial_number |================================================================ +[[misp]] [float] ==== `misp` fileset settings @@ -235,6 +240,7 @@ MISP Threat Intel is mapped to the following ECS fields. `misp.value` is mapped to the appropriate field dependent on attribute type. +[[otx]] [float] ==== `otx` fileset settings @@ -310,6 +316,7 @@ OTX Threat Intel is mapped to the following ECS fields. `otx.indicator` is mapped to the appropriate field dependent on attribute type. +[[anomali]] [float] ==== `anomali` fileset settings @@ -391,6 +398,91 @@ Anomali Threat Intel is mapped to the following ECS fields. `anomali.pattern` is mapped to the appropriate field dependent on attribute type. +[[anomalithreatstream]] +[float] +==== `anomalithreatstream` fileset settings + +To configure the ThreatStream integration you first need to define an output +in the Anomali ThreatStream Integrator using the Elastic SDK provided by Anomali. +It will deliver indicators via HTTP or HTTPS to a Filebeat instance running as +a server. + +Configure an Integrator output with the following settings: + +* Indicator Filter: `*` (or use any desired filter). +* SDK Executable Command: `/path/to/python /path/to/anomali-sdk/main.py`. + Adjust the paths to the python executable and the directory where the Elastic SDK + has been unpacked. +* Metadata in JSON Format: `{"url": "https://filebeat:8080/", "server_certificate": "/path/to/cert.pem", "secret": "my secret"}`. + - `url`: Use the host and port where Filebeat will be running, and `http` or `https` accordingly. + - `server_certificate`: If using HTTPS, absolute path to the server certificate. Otherwise don't set + this field. + - `secret`: A shared secret string to authenticate messages between the SDK and Filebeat. + +Then configure the `anomalithreatstream` fileset in Filebeat accordingly: +[source,yaml] +---- +- module: threatintel + anomalithreatstream: + enabled: true + var.input: http_endpoint + var.listen_address: 0.0.0.0 # Listen on all interfaces. + var.listen_port: 8080 + var.secret: 'my secret' + var.ssl_certificate: path/to/server_ssl_cert.pem + var.ssl_key: path/to/ssl_key.pem +---- + +*`var.listen_address`*:: + +Local address to bind the HTTP server to. Use `0.0.0.0` to accept connections +from all interfaces. + +*`var.listen_port`*:: + +Port number to use for the HTTP server. + +*`var.secret`*:: + +Shared secret between the SDK and Filebeat, used to authenticate messages. + +*`var.ssl_certificate`*:: + +Path to the public SSL certificate for the HTTPS server. If unset, Filebeat +will use unsecure HTTP connections. + +*`var.ssl_key`*:: + +Path to the certificate's private key. + +Anomali ThreatStream fields are mapped to the following ECS fields: + +[options="header"] +|============================================================= +| ThreatStream fields | ECS Fields +| asn | threatintel.indicator.as.number +| classification<> | threatintel.indicator.marking.tlp +| confidence<> | threatintel.indicator.confidence +| country | threatintel.indicator.geo.country_iso_code +| date_first | threatintel.indicator.first_seen +| date_last | threatintel.indicator.last_seen +| detail | tags +| domain | threatintel.indicator.domain +| email | threatintel.indicator.email.address +| itype<> | threatintel.indicator.type +| lat | threatintel.indicator.geo.location.lat +| lon | threatintel.indicator.geo.location.lon +| md5 | threatintel.indicator.file.hash +| org | threatintel.indicator.as.organization.name +| severity<> | event.severity +| source | threatintel.indicator.provider +| srcip | threatintel.indicator.ip +| url | threatintel.indicator.url.original +|============================================================= + +[[a]] +[small]#[1]: Field is used to derive a value for the ECS field but its original value is kept under `threatintel.anomalithreatstream`.# + :has-dashboards!: [float] diff --git a/x-pack/filebeat/module/threatintel/anomalithreatstream/_meta/fields.yml b/x-pack/filebeat/module/threatintel/anomalithreatstream/_meta/fields.yml new file mode 100644 index 000000000000..2cb193b4bb1c --- /dev/null +++ b/x-pack/filebeat/module/threatintel/anomalithreatstream/_meta/fields.yml @@ -0,0 +1,103 @@ +- name: anomalithreatstream + type: group + description: > + Fields for Anomali ThreatStream + default_field: false + fields: + + - name: classification + type: keyword + description: > + Indicates whether an indicator is private or from a public feed and available publicly. + Possible values: private, public. + example: private + + - name: confidence + type: short + description: > + The measure of the accuracy (from 0 to 100) assigned by ThreatStream's predictive analytics technology to indicators. + + - name: detail2 + type: text + description: > + Detail text for indicator. + example: Imported by user 42. + + - name: id + type: keyword + description: > + The ID of the indicator. + + - name: import_session_id + type: keyword + description: > + ID of the import session that created the indicator on ThreatStream. + + - name: itype + type: keyword + description: > + Indicator type. + Possible values: "apt_domain", "apt_email", "apt_ip", "apt_url", + "bot_ip", "c2_domain", "c2_ip", "c2_url", "i2p_ip", "mal_domain", + "mal_email", "mal_ip", "mal_md5", "mal_url", "parked_ip", "phish_email", + "phish_ip", "phish_url", "scan_ip", "spam_domain", "ssh_ip", + "suspicious_domain", "tor_ip" and "torrent_tracker_url". + + - name: maltype + type: wildcard + description: > + Information regarding a malware family, a CVE ID, or another attack or threat, associated with the indicator. + + - name: md5 + type: keyword + description: > + Hash for the indicator. + + - name: resource_uri + type: keyword + description: > + Relative URI for the indicator details. + + - name: severity + type: keyword + description: > + Criticality associated with the threat feed that supplied the indicator. + Possible values: low, medium, high, very-high. + + - name: source + type: keyword + description: > + Source for the indicator. + example: Analyst + + - name: source_feed_id + type: keyword + description: > + ID for the integrator source. + + - name: state + type: keyword + description: > + State for this indicator. + example: active + + - name: trusted_circle_ids + type: keyword + description: > + ID of the trusted circle that imported the indicator. + + - name: update_id + type: keyword + description: > + Update ID. + + - name: url + type: keyword + description: > + URL for the indicator. + + - name: value_type + type: keyword + description: > + Data type of the indicator. + Possible values: ip, domain, url, email, md5. diff --git a/x-pack/filebeat/module/threatintel/anomalithreatstream/config/config.yml b/x-pack/filebeat/module/threatintel/anomalithreatstream/config/config.yml new file mode 100644 index 000000000000..0d3e590d7912 --- /dev/null +++ b/x-pack/filebeat/module/threatintel/anomalithreatstream/config/config.yml @@ -0,0 +1,58 @@ +{{ if eq .input "http_endpoint" }} + +type: http_endpoint +enabled: true + +listen_address: {{ .listen_address }} +listen_port: {{ .listen_port }} +prefix: json +content_type: application/x-ndjson + +{{ if .secret }} +hmac: + header: X-Filebeat-Signature + key: {{ .secret }} + type: sha256 + prefix: sha256= +{{ end }} + +{{ if .ssl_certificate }} +ssl: + enabled: true + certificate: {{ .ssl_certificate }} + key: {{ .ssl_key }} + verification_mode: none +{{ end }} + +{{ else if eq .input "file" }} + +type: log +paths: +{{ range $i, $path := .paths }} + - {{$path}} +{{ end }} +exclude_files: [".gz$"] +json.add_error_key: true +{{ end }} + +tags: {{.tags | tojson}} + +processors: + - add_fields: + target: '' + fields: + ecs.version: 1.10.0 + - fingerprint: + fields: + - event.dataset + - json.id + target_field: '@metadata._id' + encoding: base64 + - script: + lang: javascript + id: my_filter + source: > + function process(event) { + event.Put("@metadata.op_type", "index"); + } + diff --git a/x-pack/filebeat/module/threatintel/anomalithreatstream/ingest/pipeline.yml b/x-pack/filebeat/module/threatintel/anomalithreatstream/ingest/pipeline.yml new file mode 100644 index 000000000000..6d4658c05048 --- /dev/null +++ b/x-pack/filebeat/module/threatintel/anomalithreatstream/ingest/pipeline.yml @@ -0,0 +1,396 @@ +description: Pipeline for parsing Anomali ThreatStream +processors: +# +# Safeguard against feeding the pipeline with documents other +# that the ones generated by Filebeat's http_endpoint input. +# +- fail: + if: 'ctx.json == null || !(ctx.json instanceof Map)' + message: 'missing json object in input document' + +# +# Set basic ECS fields. +# +- set: + field: event.ingested + value: '{{{ _ingest.timestamp }}}' +- set: + field: event.kind + value: enrichment +- set: + field: event.category + value: threat +- set: + field: event.type + value: indicator + +# +# Map itype field to STIX 2.0 Cyber Observable values (threatintel.indicator.type). +# +- script: + lang: painless + if: 'ctx.json.itype != null' + description: > + Map itype field to STIX 2.0 Cyber Observable values (threatintel.indicator.type). + params: + actor_ip: ipv4-addr + adware_domain: domain-name + anon_proxy: ipv4-addr + anon_vpn: ipv4-addr + apt_domain: domain-name + apt_email: email-addr + apt_ip: ipv4-addr + apt_md5: file + apt_subject: email + apt_ua: url + apt_url: url + bot_ip: ipv4-addr + brute_ip: ipv4-addr + c2_domain: domain-name + c2_ip: ipv4-addr + c2_url: url + comm_proxy_domain: domain-name + comm_proxy_ip: ipv4-addr + compromised_domain: domain-name + compromised_ip: ipv4-addr + compromised_url: url + crypto_hash: file + crypto_ip: ipv4-addr + crypto_pool: domain + crypto_url: url + crypto_wallet: file + ddos_ip: ipv4-addr + disposable_email_domain: domain-name + dyn_dns: domain-name + exfil_domain: domain-name + exfil_ip: ipv4-addr + exfil_url: url + exploit_domain: domain-name + exploit_ip: ipv4-addr + exploit_url: url + free_email_domain: domain-name + geolocation_url: url + hack_tool: file + i2p_ip: ipv4-addr + ipcheck_url: url + mal_domain: domain-name + mal_email: email-addr + mal_ip: ipv4-addr + mal_md5: file + mal_sslcert_sh1: x509-certificate + mal_sslcert_sha1: x509-certificate + mal_ua: url + mal_url: url + p2pcnc: ipv4-addr + parked_domain: domain-name + parked_ip: ipv4-addr + parked_url: url + pastesite_url: url + phish_domain: domain-name + phish_email: email-addr + phish_ip: ipv4-addr + phish_url: url + proxy_ip: ipv4-addr + scan_ip: ipv4-addr + sinkhole_domain: domain-name + sinkhole_ip: ipv4-addr + spam_domain: domain-name + spam_email: email-addr + spam_ip: ipv4-addr + spam_url: url + speedtest_url: url + ssh_ip: ipv4-addr + suppress: suppress + suspicious_domain: domain-name + suspicious_email: email-addr + suspicious_ip: ipv4-addr + suspicious_reg_email: email-addr + suspicious_url: url + tor_ip: ipv4-addr + torrent_tracker_url: url + vpn_domain: domain-name + vps_ip: ipv4-addr + whois_bulk_reg_email: email-addr + whois_privacy_domain: domain-name + whois_privacy_email: email-addr + source: > + String mapping = params[ctx.json.itype]; + if (mapping != null) { + ctx["threatintel_indicator_type"] = mapping; + } + on_failure: + - append: + field: error.message + value: 'Unable to determine indicator type from "{{{ json.itype }}}": {{{ _ingest.on_failure_message }}}' + +- rename: + field: threatintel_indicator_type + target_field: threatintel.indicator.type + ignore_missing: true + +# +# Detect ipv6 for ipv4-addr types. +# +- set: + field: threatintel.indicator.type + value: ipv6-addr + if: 'ctx.threatintel?.indicator?.type == "ipv4-addr" && ctx.json.srcip != null && ctx.json.srcip.contains(":")' + +# +# Map first and last seen dates. +# +- date: + field: json.date_first + target_field: threatintel.indicator.first_seen + formats: + - ISO8601 + if: 'ctx.json.date_first != null' + on_failure: + - append: + field: error.message + value: 'Error parsing date_first field value "{{{ json.date_first }}}": {{{ _ingest.on_failure_message }}}' + +- date: + field: json.date_last + target_field: threatintel.indicator.last_seen + formats: + - ISO8601 + if: 'ctx.json.date_last != null' + on_failure: + - append: + field: error.message + value: 'Error parsing date_last field value "{{{ json.date_last }}}": {{{ _ingest.on_failure_message }}}' + +# +# Map IP geolocation fields. +# +- convert: + field: json.lat + target_field: threatintel.indicator.geo.location.lat + type: double + if: 'ctx.json.lat != null && ctx.json.lon != null' + on_failure: + - append: + field: error.message + value: 'Cannot convert lat field "{{{ json.lat }}}" to double: {{{ _ingest.on_failure_message }}}' +- convert: + field: json.lon + target_field: threatintel.indicator.geo.location.lon + type: double + if: 'ctx.json.lat != null && ctx.json.lon != null' + on_failure: + - append: + field: error.message + value: 'Cannot convert lon field "{{{ json.lon }}}" to double: {{{ _ingest.on_failure_message }}}' + +# +# Map classification field to Traffic Light Protocol (TLP). +# Currently: +# public => White ("Disclosure is not limited.") +# private => Amber ("Limited disclosure, restricted to participants’ organizations."). +# +- set: + field: threatintel.indicator.marking.tlp + value: Amber + if: 'ctx.json.classification == "private"' +- set: + field: threatintel.indicator.marking.tlp + value: White + if: 'ctx.json.classification == "public"' + +# +# Convert confidence field (-1..100) to ECS confidence (0..10). +# +- script: + lang: painless + description: > + Normalize confidence level. + source: > + def value = ctx.json.confidence; + if (value == null || value < 0.0 || value > 100.0) return; + ctx["threatintel_indicator_confidence"] = (long)Math.round((double)value / 10.0); + on_failure: + - append: + field: error.message + value: 'failed to normalize confidence value `{{{ json.confidence }}}`: {{{ _ingest.on_failure_message }}}' + +- rename: + field: threatintel_indicator_confidence + target_field: threatintel.indicator.confidence + ignore_missing: true + +# +# Convert asn field. +# +- convert: + field: json.asn + target_field: threatintel.indicator.as.number + type: long + ignore_missing: true + on_failure: + - append: + field: error.message + value: 'Cannot convert asn field `{{{ json.asn }}}` to long: {{{ _ingest.on_failure_message }}}' + +- rename: + field: json.org + target_field: threatintel.indicator.as.organization.name + ignore_missing: true + +- rename: + field: json.domain + target_field: threatintel.indicator.domain + ignore_missing: true + +- rename: + field: json.email + target_field: threatintel.indicator.email.address + ignore_missing: true + +- rename: + field: json.srcip + target_field: threatintel.indicator.ip + ignore_missing: true + +- uri_parts: + field: json.url + target_field: threatintel.indicator.url + keep_original: true + remove_if_successful: true + if: 'ctx.json.url != null' + on_failure: + - append: + field: error.message + value: 'Cannot parse url field `{{{ json.url }}}`: {{{ _ingest.on_failure_message }}}' + +- rename: + field: json.country + target_field: threatintel.indicator.geo.country_iso_code + ignore_missing: true + +# +# md5 field can actually contain different kinds of hash. +# Map to file.hash.* depending on hash length. +# +- rename: + field: json.md5 + target_field: threatintel.indicator.file.hash.md5 + if: 'ctx.json.md5 != null && ctx.json.md5.length() == 32' + +- rename: + field: json.md5 + target_field: threatintel.indicator.file.hash.sha1 + if: 'ctx.json.md5 != null && ctx.json.md5.length() == 40' + +- rename: + field: json.md5 + target_field: threatintel.indicator.file.hash.sha256 + if: 'ctx.json.md5 != null && ctx.json.md5.length() == 64' + +- rename: + field: json.md5 + target_field: threatintel.indicator.file.hash.sha512 + if: 'ctx.json.md5 != null && ctx.json.md5.length() == 128' + +- rename: + field: json.source + target_field: threatintel.indicator.provider + ignore_missing: true + +# +# Map field severity to event severity as follows: +# low => 3 +# medium => 5 +# high => 7 +# very-high => 9 +# +- set: + field: event.severity + value: 3 + if: 'ctx.json.severity == "low"' + +- set: + field: event.severity + value: 5 + if: 'ctx.json.severity == "medium"' + +- set: + field: event.severity + value: 7 + if: 'ctx.json.severity == "high"' + +- set: + field: event.severity + value: 9 + if: 'ctx.json.severity == "very-high"' + +# +# Field trusted_circles_ids is a comma-separated string +# that can contain leading and trailing separators (i.e. ",123,"). +# Need a script processor as split processor doesn't support +# removing non-trailing separators. +# +- script: + lang: painless + if: 'ctx.json.trusted_circle_ids != null && ctx.json.trusted_circle_ids instanceof String' + description: > + Convert trusted_circles_ids from CSV to an array. + source: > + def lst = + Stream.of(ctx.json.trusted_circle_ids.splitOnToken(",")) + .filter(s -> !s.isEmpty()) + .toArray(String[]::new); + if (lst.length > 0) { + ctx.json.trusted_circle_ids = lst; + } else { + ctx.json.remove('trusted_circle_ids'); + } + on_failure: + - append: + field: error.message + value: 'unable to split trusted_circle_ids "{{{ json.trusted_circle_ids }}}": {{{ _ingest.on_failure_message }}}' + +# +# Split detail field and append each component to ECS tags field. +# +- split: + field: json.detail + separator: '(?' + + # Uncomment the following and set the absolute paths + # to the server SSL certificate and private key to + # enable HTTPS secure connections. + # + # var.ssl_certificate: path/to/server_ssl_cert.pem + # var.ssl_key: path/to/ssl_key.pem From ec52595d8f07862c987cdd9661c24328bb1d353c Mon Sep 17 00:00:00 2001 From: Adrian Serrano Date: Thu, 24 Jun 2021 15:15:47 +0200 Subject: [PATCH 2/7] Allow fields with ip_range datatype (#26444) This updates libbeat to support fields with ip_range type. See https://www.elastic.co/guide/en/elasticsearch/reference/master/range.html --- libbeat/kibana/fields_transformer.go | 1 + libbeat/kibana/fields_transformer_test.go | 2 ++ libbeat/mapping/field.go | 2 +- libbeat/mapping/field_test.go | 5 +++++ 4 files changed, 9 insertions(+), 1 deletion(-) diff --git a/libbeat/kibana/fields_transformer.go b/libbeat/kibana/fields_transformer.go index 7009484e573f..7b3db3103d9e 100644 --- a/libbeat/kibana/fields_transformer.go +++ b/libbeat/kibana/fields_transformer.go @@ -293,6 +293,7 @@ var ( "geo_point": "geo_point", "date": "date", "ip": "ip", + "ip_range": "ip_range", "boolean": "boolean", } ) diff --git a/libbeat/kibana/fields_transformer_test.go b/libbeat/kibana/fields_transformer_test.go index fc7e9485536f..a7c343854459 100644 --- a/libbeat/kibana/fields_transformer_test.go +++ b/libbeat/kibana/fields_transformer_test.go @@ -205,6 +205,8 @@ func TestTransformTypes(t *testing.T) { {commonField: mapping.Field{Type: "string"}, expected: nil}, {commonField: mapping.Field{Type: "date"}, expected: "date"}, {commonField: mapping.Field{Type: "geo_point"}, expected: "geo_point"}, + {commonField: mapping.Field{Type: "ip"}, expected: "ip"}, + {commonField: mapping.Field{Type: "ip_range"}, expected: "ip_range"}, {commonField: mapping.Field{Type: "invalid"}, expected: nil}, } for idx, test := range tests { diff --git a/libbeat/mapping/field.go b/libbeat/mapping/field.go index b204c9549eba..9b26284981f8 100644 --- a/libbeat/mapping/field.go +++ b/libbeat/mapping/field.go @@ -153,7 +153,7 @@ func (f *Field) validateType() error { allowedFormatters = []string{"geo_point"} case "date_range": allowedFormatters = []string{"date_range"} - case "boolean", "binary", "ip", "alias", "array": + case "boolean", "binary", "ip", "alias", "array", "ip_range": // No formatters, metric types, or units allowed. case "object": if f.DynamicTemplate && (len(f.ObjectTypeParams) > 0 || f.ObjectType != "") { diff --git a/libbeat/mapping/field_test.go b/libbeat/mapping/field_test.go index 94ec43943349..52c03c6a018c 100644 --- a/libbeat/mapping/field_test.go +++ b/libbeat/mapping/field_test.go @@ -364,6 +364,11 @@ func TestFieldValidate(t *testing.T) { }, err: true, }, + "allow ip_range": { + cfg: common.MapStr{"type": "ip_range"}, + err: false, + field: Field{Type: "ip_range"}, + }, } for name, test := range tests { From 1f5198e0c3a21fa88082bd3dd8cc068f1367f058 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?No=C3=A9mi=20V=C3=A1nyi?= Date: Thu, 24 Jun 2021 15:56:45 +0200 Subject: [PATCH 3/7] Add support for copytruncate method when rotating input logs with an external tool in `filestream` input (#23457) ## What does this PR do? The PR makes the `filestream` log rotation aware to make sure Filebeat can cooperate better with external log rotation tools. The first supported strategy is `copytruncate`. When `logrotate` rotates e.g. `boot.log` with `copytruncate` the following things happen: 1. all archived files are renamed e.g. `boot.log.2` is renamed `boot.log.3` until `boot.log.1` no longer exists 2. `boot.log` is copied to `boot.log.1` 3. `boot.log` is truncated You can see my tests on my machine: Before rotation: ``` root@sleipnir:/home/n# ls -lisaht /var/log/boot.log* 130476 30K -rw------- 1 root root 28K Jan 29 08:59 /var/log/boot.log 130577 36K -rw------- 1 root root 34K Jan 29 08:59 /var/log/boot.log.1 130657 60K -rw------- 1 root root 57K Jan 7 09:51 /var/log/boot.log.2 ``` After rotation: ``` root@sleipnir:/home/n# ls -lisaht /var/log/boot.log* 130476 0 -rw------- 1 root root 0 May 25 12:41 /var/log/boot.log 130430 30K -rw------- 1 root root 28K May 25 12:41 /var/log/boot.log.1 130577 36K -rw------- 1 root root 34K Jan 29 08:59 /var/log/boot.log.2 130657 60K -rw------- 1 root root 57K Jan 7 09:51 /var/log/boot.log.3 ``` On rotation, the active file is continued and archived files are kept open until EOF is reached. ### Configuration ```yaml rotation.external.strategy.copytruncate: suffix_regex: \.\d$ count: 10 ``` Note: when Filebeat will be able to rotate input logs, its configuration will be under `rotation.internal.*`. ## Why is it important? Previously, Filebeat was not able to cooperate with external log rotation tools that used `copytruncate` method. --- CHANGELOG.next.asciidoc | 2 + .../config/filebeat.inputs.reference.yml.tmpl | 10 + .../input-filestream-file-options.asciidoc | 53 +++ filebeat/filebeat.reference.yml | 10 + filebeat/input/filestream/config.go | 12 + .../filestream/copytruncate_prospector.go | 361 ++++++++++++++++++ .../copytruncate_prospector_test.go | 276 +++++++++++++ filebeat/input/filestream/fswatch_test.go | 47 +-- filebeat/input/filestream/identifier.go | 3 + .../filestream/identifier_inode_deviceid.go | 1 + filebeat/input/filestream/input.go | 41 +- .../internal/input-logfile/fswatch.go | 21 + .../internal/input-logfile/harvester.go | 32 ++ .../internal/input-logfile/publish.go | 2 +- .../internal/input-logfile/store.go | 52 ++- filebeat/input/filestream/logger.go | 41 ++ filebeat/input/filestream/prospector.go | 131 ++++--- .../input/filestream/prospector_creator.go | 106 +++++ filebeat/input/filestream/prospector_test.go | 33 +- x-pack/filebeat/filebeat.reference.yml | 10 + 20 files changed, 1128 insertions(+), 116 deletions(-) create mode 100644 filebeat/input/filestream/copytruncate_prospector.go create mode 100644 filebeat/input/filestream/copytruncate_prospector_test.go create mode 100644 filebeat/input/filestream/logger.go create mode 100644 filebeat/input/filestream/prospector_creator.go diff --git a/CHANGELOG.next.asciidoc b/CHANGELOG.next.asciidoc index 1f158e375314..4653100fedd2 100644 --- a/CHANGELOG.next.asciidoc +++ b/CHANGELOG.next.asciidoc @@ -828,6 +828,8 @@ https://github.com/elastic/beats/compare/v7.0.0-alpha2...master[Check the HEAD d - Update grok patterns for HA Proxy module {issue}25827[25827] {pull}25835[25835] - Added dataset `anomalithreatstream` to the `threatintel` module to ingest indicators from Anomali ThreatStream {pull}26350[26350] +- Add support for `copytruncate` method when rotating input logs with an external tool in `filestream` input. {pull}23457[23457] + *Heartbeat* - Add mime type detection for http responses. {pull}22976[22976] diff --git a/filebeat/_meta/config/filebeat.inputs.reference.yml.tmpl b/filebeat/_meta/config/filebeat.inputs.reference.yml.tmpl index 19176bfc39e5..211292b9432b 100644 --- a/filebeat/_meta/config/filebeat.inputs.reference.yml.tmpl +++ b/filebeat/_meta/config/filebeat.inputs.reference.yml.tmpl @@ -292,6 +292,16 @@ filebeat.inputs: # original for harvesting but will report the symlink name as source. #prospector.scanner.symlinks: false + ### Log rotation + + # When an external tool rotates the input files with copytruncate strategy + # use this section to help the input find the rotated files. + #rotation.external.strategy.copytruncate: + # Regex that matches the rotated files. + # suffix_regex: \.\d$ + # If the rotated filename suffix is a datetime, set it here. + # dateformat: -20060102 + ### State options # Files for the modification data is older then clean_inactive the state from the registry is removed diff --git a/filebeat/docs/inputs/input-filestream-file-options.asciidoc b/filebeat/docs/inputs/input-filestream-file-options.asciidoc index 3beb1f7fa98b..4de04cc9d289 100644 --- a/filebeat/docs/inputs/input-filestream-file-options.asciidoc +++ b/filebeat/docs/inputs/input-filestream-file-options.asciidoc @@ -482,3 +482,56 @@ Set the location of the marker file the following way: ---- file_identity.inode_marker.path: /logs/.filebeat-marker ---- + +=== Log rotation + +As log files are constantly written, they must be rotated and purged to prevent +the logger application from filling up the disk. Rotation is done by an external +application, thus, {beatname_uc} needs information how to cooperate with it. + +When reading from rotating files make sure the paths configuration includes +both the active file and all rotated files. + +By default, {beatname_uc} is able to track files correctly in the following strategies: +* create: new active file with a unique name is created on rotation +* rename: rotated files are renamed + +However, in case of copytruncate strategy, you should provide additional configuration +to {beatname_uc}. + +[float] +==== rotation.external.strategy.copytruncate + +experimental[] + +If the log rotating application copies the contents of the active file and then +truncates the original file, use these options to help {beatname_uc} to read files +correctly. + +Set the option `suffix_regex` so {beatname_uc} can tell active and rotated files apart. There are +two supported suffix types in the input: numberic and date. + +==== Numeric suffix + +If your rotated files have an incrementing index appended to the end of the filename, e.g. +active file `apache.log` and the rotated files are named `apache.log.1`, `apache.log.2`, etc, +use the following configuration. + +[source,yaml] +--- +rotation.external.strategy.copytruncate: + suffix_regex: \.\d$ +--- + +==== Date suffix + +If the rotation date is appended to the end of the filename, e.g. active file `apache.log` and the +rotated files are named `apache.log-20210526`, `apache.log-20210527`, etc. use the following configuration: + +[source,yaml] +--- +rotation.external.strategy.copytruncate: + suffix_regex: \-\d{6}$ + dateformat: -20060102 +--- + diff --git a/filebeat/filebeat.reference.yml b/filebeat/filebeat.reference.yml index 51d42a93f2a2..de1917041bd9 100644 --- a/filebeat/filebeat.reference.yml +++ b/filebeat/filebeat.reference.yml @@ -699,6 +699,16 @@ filebeat.inputs: # original for harvesting but will report the symlink name as source. #prospector.scanner.symlinks: false + ### Log rotation + + # When an external tool rotates the input files with copytruncate strategy + # use this section to help the input find the rotated files. + #rotation.external.strategy.copytruncate: + # Regex that matches the rotated files. + # suffix_regex: \.\d$ + # If the rotated filename suffix is a datetime, set it here. + # dateformat: -20060102 + ### State options # Files for the modification data is older then clean_inactive the state from the registry is removed diff --git a/filebeat/input/filestream/config.go b/filebeat/input/filestream/config.go index cf41c97d080f..007da10a0455 100644 --- a/filebeat/input/filestream/config.go +++ b/filebeat/input/filestream/config.go @@ -41,6 +41,7 @@ type config struct { HarvesterLimit uint32 `config:"harvester_limit" validate:"min=0"` IgnoreOlder time.Duration `config:"ignore_older"` IgnoreInactive ignoreInactiveType `config:"ignore_inactive"` + Rotation *common.ConfigNamespace `config:"rotation"` } type closerConfig struct { @@ -78,6 +79,17 @@ type backoffConfig struct { Max time.Duration `config:"max" validate:"nonzero"` } +type rotationConfig struct { + Strategy *common.ConfigNamespace `config:"strategy" validate:"required"` +} + +type commonRotationConfig struct { + SuffixRegex string `config:"suffix_regex" validate:"required"` + DateFormat string `config:"dateformat"` +} + +type copyTruncateConfig commonRotationConfig + func defaultConfig() config { return config{ Reader: defaultReaderConfig(), diff --git a/filebeat/input/filestream/copytruncate_prospector.go b/filebeat/input/filestream/copytruncate_prospector.go new file mode 100644 index 000000000000..ba64778998cb --- /dev/null +++ b/filebeat/input/filestream/copytruncate_prospector.go @@ -0,0 +1,361 @@ +// Licensed to Elasticsearch B.V. under one or more contributor +// license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright +// ownership. Elasticsearch B.V. licenses this file to you under +// the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package filestream + +import ( + "os" + "regexp" + "sort" + "strconv" + "time" + + "github.com/urso/sderr" + + loginp "github.com/elastic/beats/v7/filebeat/input/filestream/internal/input-logfile" + input "github.com/elastic/beats/v7/filebeat/input/v2" + "github.com/elastic/beats/v7/libbeat/logp" + "github.com/elastic/go-concert/unison" +) + +const ( + copyTruncateProspectorDebugKey = "copy_truncate_file_prospector" + copiedFileIdx = 0 +) + +var ( + numericSuffixRegexp = regexp.MustCompile("\\d*$") +) + +// sorter is required for ordering rotated log files +// The slice is ordered so the newest rotated file comes first. +type sorter interface { + sort([]rotatedFileInfo) +} + +// rotatedFileInfo stores the file information of a rotated file. +type rotatedFileInfo struct { + path string + src loginp.Source + + ts time.Time + idx int +} + +func (f rotatedFileInfo) String() string { + return f.path +} + +// rotatedFilestream includes the information of the original file +// and its identifier, and the rotated file. +type rotatedFilestream struct { + originalSrc loginp.Source + rotated []rotatedFileInfo +} + +func newRotatedFilestreams(cfg *copyTruncateConfig) *rotatedFilestreams { + var sorter sorter + sorter = newNumericSorter() + if cfg.DateFormat != "" { + sorter = &dateSorter{cfg.DateFormat} + } + return &rotatedFilestreams{ + table: make(map[string]*rotatedFilestream, 0), + sorter: sorter, + } +} + +// numericSorter sorts rotated log files that have a numeric suffix. +// Example: apache.log.1, apache.log.2 +type numericSorter struct { + suffix *regexp.Regexp +} + +func newNumericSorter() sorter { + return &numericSorter{ + suffix: numericSuffixRegexp, + } +} + +func (s *numericSorter) sort(files []rotatedFileInfo) { + sort.Slice( + files, + func(i, j int) bool { + return s.GetIdx(&files[i]) < s.GetIdx(&files[j]) + }, + ) +} + +func (s *numericSorter) GetIdx(fi *rotatedFileInfo) int { + if fi.idx > 0 { + return fi.idx + } + + idxStr := s.suffix.FindString(fi.path) + if idxStr == "" { + return -1 + } + idx, err := strconv.Atoi(idxStr) + if err != nil { + return -1 + } + fi.idx = idx + + return idx +} + +// dateSorter sorts rotated log files that have a date suffix +// based on the configured format. +// Example: apache.log-21210526, apache.log-20210527 +type dateSorter struct { + format string +} + +func (s *dateSorter) sort(files []rotatedFileInfo) { + sort.Slice( + files, + func(i, j int) bool { + return s.GetTs(&files[j]).Before(s.GetTs(&files[i])) + }, + ) +} + +func (s *dateSorter) GetTs(fi *rotatedFileInfo) time.Time { + if !fi.ts.IsZero() { + return fi.ts + } + fileTs := fi.path[len(fi.path)-len(s.format):] + + ts, err := time.Parse(s.format, fileTs) + if err != nil { + return time.Time{} + } + fi.ts = ts + return ts +} + +// rotatedFilestreams is a map of original files and their rotated instances. +type rotatedFilestreams struct { + table map[string]*rotatedFilestream + sorter sorter +} + +// addOriginalFile adds a new original file and its identifying information +// to the bookkeeper. +func (r rotatedFilestreams) addOriginalFile(path string, src loginp.Source) { + if _, ok := r.table[path]; ok { + return + } + r.table[path] = &rotatedFilestream{originalSrc: src, rotated: make([]rotatedFileInfo, 0)} +} + +// isOriginalAdded checks if an original file has been found. +func (r rotatedFilestreams) isOriginalAdded(path string) bool { + _, ok := r.table[path] + return ok +} + +// originalSrc returns the original Source information of a given +// original file path. +func (r rotatedFilestreams) originalSrc(path string) loginp.Source { + return r.table[path].originalSrc +} + +// addRotatedFile adds a new rotated file to the list and returns its index. +// if a file is already added, the source is updated and the index is returned. +func (r rotatedFilestreams) addRotatedFile(original, rotated string, src loginp.Source) int { + for idx, fi := range r.table[original].rotated { + if fi.path == rotated { + r.table[original].rotated[idx].src = src + return idx + } + } + + r.table[original].rotated = append(r.table[original].rotated, rotatedFileInfo{rotated, src, time.Time{}, 0}) + r.sorter.sort(r.table[original].rotated) + + for idx, fi := range r.table[original].rotated { + if fi.path == rotated { + return idx + } + } + + return -1 +} + +// addRotatedFile adds a new rotated file to the list and returns its index. +// if a file is already added, the source is updated and the index is returned. +func (r rotatedFilestreams) removeRotatedFile(original, rotated string) { + for idx, fi := range r.table[original].rotated { + if fi.path == rotated { + r.table[original].rotated = append(r.table[original].rotated[:idx], r.table[original].rotated[idx+1:]...) + return + } + } +} + +type copyTruncateFileProspector struct { + fileProspector + rotatedSuffix *regexp.Regexp + rotatedFiles *rotatedFilestreams +} + +// Run starts the fileProspector which accepts FS events from a file watcher. +func (p *copyTruncateFileProspector) Run(ctx input.Context, s loginp.StateMetadataUpdater, hg loginp.HarvesterGroup) { + log := ctx.Logger.With("prospector", copyTruncateProspectorDebugKey) + log.Debug("Starting prospector") + defer log.Debug("Prospector has stopped") + + defer p.stopHarvesterGroup(log, hg) + + var tg unison.MultiErrGroup + + tg.Go(func() error { + p.filewatcher.Run(ctx.Cancelation) + return nil + }) + + tg.Go(func() error { + ignoreInactiveSince := getIgnoreSince(p.ignoreInactiveSince, ctx.Agent) + + for ctx.Cancelation.Err() == nil { + fe := p.filewatcher.Event() + + if fe.Op == loginp.OpDone { + return nil + } + + src := p.identifier.GetSource(fe) + log = loggerWithEvent(log, fe, src) + + switch fe.Op { + case loginp.OpCreate, loginp.OpWrite: + if fe.Op == loginp.OpCreate { + log.Debugf("A new file %s has been found", fe.NewPath) + + } else if fe.Op == loginp.OpWrite { + log.Debugf("File %s has been updated", fe.NewPath) + } + + if p.fileProspector.isFileIgnored(log, fe, ignoreInactiveSince) { + continue + } + + if fe.Op == loginp.OpCreate { + err := s.UpdateMetadata(src, fileMeta{Source: fe.NewPath, IdentifierName: p.identifier.Name()}) + if err != nil { + log.Errorf("Failed to set cursor meta data of entry %s: %v", src.Name(), err) + } + } + + // check if the event belongs to a rotated file + if p.isRotated(fe) { + log.Debugf("File %s is rotated", fe.NewPath) + + p.onRotatedFile(log, ctx, fe, src, hg) + + } else { + log.Debugf("File %s is original", fe.NewPath) + // if file is original, add it to the bookeeper + p.rotatedFiles.addOriginalFile(fe.NewPath, src) + + hg.Start(ctx, src) + } + + case loginp.OpTruncate: + log.Debugf("File %s has been truncated", fe.NewPath) + + s.ResetCursor(src, state{Offset: 0}) + hg.Restart(ctx, src) + + case loginp.OpDelete: + log.Debugf("File %s has been removed", fe.OldPath) + + p.fileProspector.onRemove(log, fe, src, s, hg) + + case loginp.OpRename: + log.Debugf("File %s has been renamed to %s", fe.OldPath, fe.NewPath) + + // check if the event belongs to a rotated file + if p.isRotated(fe) { + log.Debugf("File %s is rotated", fe.NewPath) + + p.onRotatedFile(log, ctx, fe, src, hg) + } + + p.fileProspector.onRename(log, ctx, fe, src, s, hg) + + default: + log.Error("Unkown return value %v", fe.Op) + } + } + return nil + }) + + errs := tg.Wait() + if len(errs) > 0 { + log.Error("%s", sderr.WrapAll(errs, "running prospector failed")) + } +} + +func (p *copyTruncateFileProspector) isRotated(event loginp.FSEvent) bool { + if p.rotatedSuffix.MatchString(event.NewPath) { + return true + } + return false +} + +func (p *copyTruncateFileProspector) onRotatedFile( + log *logp.Logger, + ctx input.Context, + fe loginp.FSEvent, + src loginp.Source, + hg loginp.HarvesterGroup, +) { + // Continue reading the rotated file from where we have left off with the original. + // The original will be picked up again when updated and read from the beginning. + originalPath := p.rotatedSuffix.ReplaceAllLiteralString(fe.NewPath, "") + // if we haven't encountered the original file which was rotated, get its information + if !p.rotatedFiles.isOriginalAdded(originalPath) { + fi, err := os.Stat(originalPath) + if err != nil { + log.Errorf("Cannot continue file, error while getting the information of the original file: %+v", err) + log.Debugf("Starting possibly rotated file from the beginning: %s", fe.NewPath) + hg.Start(ctx, src) + return + } + originalSrc := p.identifier.GetSource(loginp.FSEvent{NewPath: originalPath, Info: fi}) + p.rotatedFiles.addOriginalFile(originalPath, originalSrc) + p.rotatedFiles.addRotatedFile(originalPath, fe.NewPath, src) + hg.Start(ctx, src) + return + } + + idx := p.rotatedFiles.addRotatedFile(originalPath, fe.NewPath, src) + if idx == copiedFileIdx { + // if a file is the most fresh rotated file, continue reading from + // where we have left off with the active file. + previousSrc := p.rotatedFiles.table[originalPath].originalSrc + hg.Continue(ctx, previousSrc, src) + } else { + // if a file is rotated but not the most fresh rotated file, + // read it from where have left off. + if fe.Op != loginp.OpRename { + hg.Start(ctx, src) + } + } +} diff --git a/filebeat/input/filestream/copytruncate_prospector_test.go b/filebeat/input/filestream/copytruncate_prospector_test.go new file mode 100644 index 000000000000..52d6b1e33915 --- /dev/null +++ b/filebeat/input/filestream/copytruncate_prospector_test.go @@ -0,0 +1,276 @@ +// Licensed to Elasticsearch B.V. under one or more contributor +// license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright +// ownership. Elasticsearch B.V. licenses this file to you under +// the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package filestream + +import ( + "context" + "fmt" + "regexp" + "testing" + + "github.com/stretchr/testify/require" + + loginp "github.com/elastic/beats/v7/filebeat/input/filestream/internal/input-logfile" + input "github.com/elastic/beats/v7/filebeat/input/v2" + "github.com/elastic/beats/v7/libbeat/logp" +) + +func TestCopyTruncateProspector_Create(t *testing.T) { + testCases := map[string]struct { + events []loginp.FSEvent + expectedEvents []harvesterEvent + expectedRotatedFiles map[string][]string + }{ + "one new file, then rotated": { + events: []loginp.FSEvent{ + loginp.FSEvent{Op: loginp.OpCreate, NewPath: "/path/to/file"}, + loginp.FSEvent{Op: loginp.OpCreate, NewPath: "/path/to/file.1"}, + }, + expectedEvents: []harvesterEvent{ + harvesterStart("path::/path/to/file"), + harvesterContinue("path::/path/to/file -> path::/path/to/file.1"), + harvesterGroupStop{}, + }, + expectedRotatedFiles: map[string][]string{ + "/path/to/file": []string{ + "/path/to/file.1", + }, + }, + }, + "one new file, then rotated twice in order": { + events: []loginp.FSEvent{ + loginp.FSEvent{Op: loginp.OpCreate, NewPath: "/path/to/file"}, + loginp.FSEvent{Op: loginp.OpCreate, NewPath: "/path/to/file.1"}, + loginp.FSEvent{Op: loginp.OpTruncate, NewPath: "/path/to/file"}, + loginp.FSEvent{Op: loginp.OpRename, NewPath: "/path/to/file.2", OldPath: "/path/to/file.1"}, + loginp.FSEvent{Op: loginp.OpCreate, NewPath: "/path/to/file.1"}, + loginp.FSEvent{Op: loginp.OpTruncate, NewPath: "/path/to/file"}, + }, + expectedEvents: []harvesterEvent{ + harvesterStart("path::/path/to/file"), + harvesterContinue("path::/path/to/file -> path::/path/to/file.1"), + harvesterRestart("path::/path/to/file"), + harvesterStop("path::/path/to/file.1"), + harvesterStart("path::/path/to/file.2"), + harvesterContinue("path::/path/to/file -> path::/path/to/file.1"), + harvesterRestart("path::/path/to/file"), + harvesterGroupStop{}, + }, + expectedRotatedFiles: map[string][]string{ + "/path/to/file": []string{ + "/path/to/file.1", + "/path/to/file.2", + }, + }, + }, + "one new file, then rotated twice with renaming": { + events: []loginp.FSEvent{ + loginp.FSEvent{Op: loginp.OpCreate, NewPath: "/path/to/file.2"}, + loginp.FSEvent{Op: loginp.OpCreate, NewPath: "/path/to/file"}, + loginp.FSEvent{Op: loginp.OpCreate, NewPath: "/path/to/file.1"}, + loginp.FSEvent{Op: loginp.OpRename, NewPath: "/path/to/file.3", OldPath: "/path/to/file.2"}, + loginp.FSEvent{Op: loginp.OpRename, NewPath: "/path/to/file.2", OldPath: "/path/to/file.1"}, + loginp.FSEvent{Op: loginp.OpCreate, NewPath: "/path/to/file.1"}, + loginp.FSEvent{Op: loginp.OpTruncate, NewPath: "/path/to/file"}, + }, + expectedEvents: []harvesterEvent{ + harvesterStart("path::/path/to/file.2"), + harvesterStart("path::/path/to/file"), + harvesterContinue("path::/path/to/file -> path::/path/to/file.1"), + harvesterStop("path::/path/to/file.2"), + harvesterStart("path::/path/to/file.3"), + harvesterStop("path::/path/to/file.1"), + harvesterStart("path::/path/to/file.2"), + harvesterContinue("path::/path/to/file -> path::/path/to/file.1"), + harvesterRestart("path::/path/to/file"), + harvesterGroupStop{}, + }, + expectedRotatedFiles: map[string][]string{ + "/path/to/file": []string{ + "/path/to/file.1", + "/path/to/file.2", + "/path/to/file.3", + }, + }, + }, + "first rotated file, when rotated file not exist": { + events: []loginp.FSEvent{ + loginp.FSEvent{Op: loginp.OpCreate, NewPath: "/path/to/file.1"}, + }, + expectedEvents: []harvesterEvent{ + harvesterStart("path::/path/to/file.1"), + harvesterGroupStop{}, + }, + expectedRotatedFiles: map[string][]string{}, + }, + } + + for name, test := range testCases { + test := test + + t.Run(name, func(t *testing.T) { + p := copyTruncateFileProspector{ + fileProspector{ + filewatcher: &mockFileWatcher{events: test.events}, + identifier: mustPathIdentifier(false), + }, + regexp.MustCompile("\\.\\d$"), + &rotatedFilestreams{make(map[string]*rotatedFilestream), newNumericSorter()}, + } + ctx := input.Context{Logger: logp.L(), Cancelation: context.Background()} + hg := newTestHarvesterGroup() + + p.Run(ctx, newMockMetadataUpdater(), hg) + + require.Equal(t, len(test.expectedEvents), len(hg.events)) + for i := 0; i < len(test.expectedEvents); i++ { + require.Equal(t, test.expectedEvents[i], hg.events[i]) + } + + for originalFile, rotatedFiles := range test.expectedRotatedFiles { + rFile, ok := p.rotatedFiles.table[originalFile] + if !ok { + fmt.Printf("cannot find %s in original files\n", originalFile) + t.FailNow() + } + require.Equal(t, len(rotatedFiles), len(rFile.rotated)) + for i, rotatedFile := range rotatedFiles { + if rFile.rotated[i].path != rotatedFile { + fmt.Printf("%s is not a rotated file, instead %s is\n", rFile.rotated[i].path, rotatedFile) + t.FailNow() + } + } + } + }) + } +} + +func TestNumericSorter(t *testing.T) { + testCases := map[string]struct { + fileinfos []rotatedFileInfo + expectedOrder []string + }{ + "one fileinfo": { + fileinfos: []rotatedFileInfo{ + rotatedFileInfo{path: "/path/to/apache.log.1"}, + }, + expectedOrder: []string{ + "/path/to/apache.log.1", + }, + }, + "ordered fileinfos": { + fileinfos: []rotatedFileInfo{ + rotatedFileInfo{path: "/path/to/apache.log.1"}, + rotatedFileInfo{path: "/path/to/apache.log.2"}, + rotatedFileInfo{path: "/path/to/apache.log.3"}, + }, + expectedOrder: []string{ + "/path/to/apache.log.1", + "/path/to/apache.log.2", + "/path/to/apache.log.3", + }, + }, + "unordered fileinfos": { + fileinfos: []rotatedFileInfo{ + rotatedFileInfo{path: "/path/to/apache.log.3"}, + rotatedFileInfo{path: "/path/to/apache.log.1"}, + rotatedFileInfo{path: "/path/to/apache.log.2"}, + }, + expectedOrder: []string{ + "/path/to/apache.log.1", + "/path/to/apache.log.2", + "/path/to/apache.log.3", + }, + }, + "unordered fileinfos with numbers in filename": { + fileinfos: []rotatedFileInfo{ + rotatedFileInfo{path: "/path/to/apache42.log.3"}, + rotatedFileInfo{path: "/path/to/apache43.log.1"}, + rotatedFileInfo{path: "/path/to/apache44.log.2"}, + }, + expectedOrder: []string{ + "/path/to/apache43.log.1", + "/path/to/apache44.log.2", + "/path/to/apache42.log.3", + }, + }, + } + sorter := newNumericSorter() + + for name, test := range testCases { + test := test + t.Run(name, func(t *testing.T) { + sorter.sort(test.fileinfos) + for i, fi := range test.fileinfos { + require.Equal(t, test.expectedOrder[i], fi.path) + } + + }) + } +} +func TestDateSorter(t *testing.T) { + testCases := map[string]struct { + fileinfos []rotatedFileInfo + expectedOrder []string + }{ + "one fileinfo": { + fileinfos: []rotatedFileInfo{ + rotatedFileInfo{path: "/path/to/apache.log-20140506"}, + }, + expectedOrder: []string{ + "/path/to/apache.log-20140506", + }, + }, + "ordered fileinfos": { + fileinfos: []rotatedFileInfo{ + rotatedFileInfo{path: "/path/to/apache.log-20140506"}, + rotatedFileInfo{path: "/path/to/apache.log-20140507"}, + rotatedFileInfo{path: "/path/to/apache.log-20140508"}, + }, + expectedOrder: []string{ + "/path/to/apache.log-20140508", + "/path/to/apache.log-20140507", + "/path/to/apache.log-20140506", + }, + }, + "unordered fileinfos": { + fileinfos: []rotatedFileInfo{ + rotatedFileInfo{path: "/path/to/apache.log-20140507"}, + rotatedFileInfo{path: "/path/to/apache.log-20140508"}, + rotatedFileInfo{path: "/path/to/apache.log-20140506"}, + }, + expectedOrder: []string{ + "/path/to/apache.log-20140508", + "/path/to/apache.log-20140507", + "/path/to/apache.log-20140506", + }, + }, + } + sorter := dateSorter{"-20060102"} + + for name, test := range testCases { + test := test + t.Run(name, func(t *testing.T) { + sorter.sort(test.fileinfos) + for i, fi := range test.fileinfos { + require.Equal(t, test.expectedOrder[i], fi.path) + } + + }) + } +} diff --git a/filebeat/input/filestream/fswatch_test.go b/filebeat/input/filestream/fswatch_test.go index c2c01a53da2c..54fe3804f02d 100644 --- a/filebeat/input/filestream/fswatch_test.go +++ b/filebeat/input/filestream/fswatch_test.go @@ -129,69 +129,69 @@ func TestFileWatchNewDeleteModified(t *testing.T) { "one new file": { prevFiles: map[string]os.FileInfo{}, nextFiles: map[string]os.FileInfo{ - "new_path": testFileInfo{"new_path", 5, oldTs}, + "new_path": testFileInfo{"new_path", 5, oldTs, nil}, }, expectedEvents: []loginp.FSEvent{ - loginp.FSEvent{Op: loginp.OpCreate, OldPath: "", NewPath: "new_path", Info: testFileInfo{"new_path", 5, oldTs}}, + loginp.FSEvent{Op: loginp.OpCreate, OldPath: "", NewPath: "new_path", Info: testFileInfo{"new_path", 5, oldTs, nil}}, }, }, "one deleted file": { prevFiles: map[string]os.FileInfo{ - "old_path": testFileInfo{"old_path", 5, oldTs}, + "old_path": testFileInfo{"old_path", 5, oldTs, nil}, }, nextFiles: map[string]os.FileInfo{}, expectedEvents: []loginp.FSEvent{ - loginp.FSEvent{Op: loginp.OpDelete, OldPath: "old_path", NewPath: "", Info: testFileInfo{"old_path", 5, oldTs}}, + loginp.FSEvent{Op: loginp.OpDelete, OldPath: "old_path", NewPath: "", Info: testFileInfo{"old_path", 5, oldTs, nil}}, }, }, "one modified file": { prevFiles: map[string]os.FileInfo{ - "path": testFileInfo{"path", 5, oldTs}, + "path": testFileInfo{"path", 5, oldTs, nil}, }, nextFiles: map[string]os.FileInfo{ - "path": testFileInfo{"path", 10, newTs}, + "path": testFileInfo{"path", 10, newTs, nil}, }, expectedEvents: []loginp.FSEvent{ - loginp.FSEvent{Op: loginp.OpWrite, OldPath: "path", NewPath: "path", Info: testFileInfo{"path", 10, newTs}}, + loginp.FSEvent{Op: loginp.OpWrite, OldPath: "path", NewPath: "path", Info: testFileInfo{"path", 10, newTs, nil}}, }, }, "two modified files": { prevFiles: map[string]os.FileInfo{ - "path1": testFileInfo{"path1", 5, oldTs}, - "path2": testFileInfo{"path2", 5, oldTs}, + "path1": testFileInfo{"path1", 5, oldTs, nil}, + "path2": testFileInfo{"path2", 5, oldTs, nil}, }, nextFiles: map[string]os.FileInfo{ - "path1": testFileInfo{"path1", 10, newTs}, - "path2": testFileInfo{"path2", 10, newTs}, + "path1": testFileInfo{"path1", 10, newTs, nil}, + "path2": testFileInfo{"path2", 10, newTs, nil}, }, expectedEvents: []loginp.FSEvent{ - loginp.FSEvent{Op: loginp.OpWrite, OldPath: "path1", NewPath: "path1", Info: testFileInfo{"path1", 10, newTs}}, - loginp.FSEvent{Op: loginp.OpWrite, OldPath: "path2", NewPath: "path2", Info: testFileInfo{"path2", 10, newTs}}, + loginp.FSEvent{Op: loginp.OpWrite, OldPath: "path1", NewPath: "path1", Info: testFileInfo{"path1", 10, newTs, nil}}, + loginp.FSEvent{Op: loginp.OpWrite, OldPath: "path2", NewPath: "path2", Info: testFileInfo{"path2", 10, newTs, nil}}, }, }, "one modified file, one new file": { prevFiles: map[string]os.FileInfo{ - "path1": testFileInfo{"path1", 5, oldTs}, + "path1": testFileInfo{"path1", 5, oldTs, nil}, }, nextFiles: map[string]os.FileInfo{ - "path1": testFileInfo{"path1", 10, newTs}, - "path2": testFileInfo{"path2", 10, newTs}, + "path1": testFileInfo{"path1", 10, newTs, nil}, + "path2": testFileInfo{"path2", 10, newTs, nil}, }, expectedEvents: []loginp.FSEvent{ - loginp.FSEvent{Op: loginp.OpWrite, OldPath: "path1", NewPath: "path1", Info: testFileInfo{"path1", 10, newTs}}, - loginp.FSEvent{Op: loginp.OpCreate, OldPath: "", NewPath: "path2", Info: testFileInfo{"path2", 10, newTs}}, + loginp.FSEvent{Op: loginp.OpWrite, OldPath: "path1", NewPath: "path1", Info: testFileInfo{"path1", 10, newTs, nil}}, + loginp.FSEvent{Op: loginp.OpCreate, OldPath: "", NewPath: "path2", Info: testFileInfo{"path2", 10, newTs, nil}}, }, }, "one new file, one deleted file": { prevFiles: map[string]os.FileInfo{ - "path_deleted": testFileInfo{"path_deleted", 5, oldTs}, + "path_deleted": testFileInfo{"path_deleted", 5, oldTs, nil}, }, nextFiles: map[string]os.FileInfo{ - "path_new": testFileInfo{"path_new", 10, newTs}, + "path_new": testFileInfo{"path_new", 10, newTs, nil}, }, expectedEvents: []loginp.FSEvent{ - loginp.FSEvent{Op: loginp.OpDelete, OldPath: "path_deleted", NewPath: "", Info: testFileInfo{"path_deleted", 5, oldTs}}, - loginp.FSEvent{Op: loginp.OpCreate, OldPath: "", NewPath: "path_new", Info: testFileInfo{"path_new", 10, newTs}}, + loginp.FSEvent{Op: loginp.OpDelete, OldPath: "path_deleted", NewPath: "", Info: testFileInfo{"path_deleted", 5, oldTs, nil}}, + loginp.FSEvent{Op: loginp.OpCreate, OldPath: "", NewPath: "path_new", Info: testFileInfo{"path_new", 10, newTs, nil}}, }, }, } @@ -232,6 +232,7 @@ type testFileInfo struct { path string size int64 time time.Time + sys interface{} } func (t testFileInfo) Name() string { return t.path } @@ -239,7 +240,7 @@ func (t testFileInfo) Size() int64 { return t.size } func (t testFileInfo) Mode() os.FileMode { return 0 } func (t testFileInfo) ModTime() time.Time { return t.time } func (t testFileInfo) IsDir() bool { return false } -func (t testFileInfo) Sys() interface{} { return nil } +func (t testFileInfo) Sys() interface{} { return t.sys } func mustDuration(durStr string) time.Duration { dur, err := time.ParseDuration(durStr) diff --git a/filebeat/input/filestream/identifier.go b/filebeat/input/filestream/identifier.go index bde88aa03fe4..7b28a1d3cbac 100644 --- a/filebeat/input/filestream/identifier.go +++ b/filebeat/input/filestream/identifier.go @@ -64,6 +64,7 @@ type fileSource struct { newPath string oldPath string truncated bool + archived bool name string identifierGenerator string @@ -105,6 +106,7 @@ func (i *inodeDeviceIdentifier) GetSource(e loginp.FSEvent) fileSource { newPath: e.NewPath, oldPath: e.OldPath, truncated: e.Op == loginp.OpTruncate, + archived: e.Op == loginp.OpArchived, name: i.name + identitySep + file.GetOSState(e.Info).String(), identifierGenerator: i.name, } @@ -143,6 +145,7 @@ func (p *pathIdentifier) GetSource(e loginp.FSEvent) fileSource { newPath: e.NewPath, oldPath: e.OldPath, truncated: e.Op == loginp.OpTruncate, + archived: e.Op == loginp.OpArchived, name: p.name + identitySep + path, identifierGenerator: p.name, } diff --git a/filebeat/input/filestream/identifier_inode_deviceid.go b/filebeat/input/filestream/identifier_inode_deviceid.go index fb87708dd181..291bc0ad3578 100644 --- a/filebeat/input/filestream/identifier_inode_deviceid.go +++ b/filebeat/input/filestream/identifier_inode_deviceid.go @@ -99,6 +99,7 @@ func (i *inodeMarkerIdentifier) GetSource(e loginp.FSEvent) fileSource { newPath: e.NewPath, oldPath: e.OldPath, truncated: e.Op == loginp.OpTruncate, + archived: e.Op == loginp.OpArchived, name: i.name + identitySep + osstate.InodeString() + "-" + i.markerContents(), identifierGenerator: i.name, } diff --git a/filebeat/input/filestream/input.go b/filebeat/input/filestream/input.go index 8294baa85d2d..e143280e5b90 100644 --- a/filebeat/input/filestream/input.go +++ b/filebeat/input/filestream/input.go @@ -83,14 +83,9 @@ func configure(cfg *common.Config) (loginp.Prospector, loginp.Harvester, error) return nil, nil, err } - filewatcher, err := newFileWatcher(config.Paths, config.FileWatcher) + prospector, err := newProspector(config) if err != nil { - return nil, nil, fmt.Errorf("error while creating filewatcher %v", err) - } - - identifier, err := newFileIdentifier(config.FileIdentity) - if err != nil { - return nil, nil, fmt.Errorf("error while creating file identifier: %v", err) + return nil, nil, fmt.Errorf("cannot create prospector: %w", err) } encodingFactory, ok := encoding.FindEncoding(config.Reader.Encoding) @@ -98,14 +93,6 @@ func configure(cfg *common.Config) (loginp.Prospector, loginp.Harvester, error) return nil, nil, fmt.Errorf("unknown encoding('%v')", config.Reader.Encoding) } - prospector := &fileProspector{ - filewatcher: filewatcher, - identifier: identifier, - ignoreOlder: config.IgnoreOlder, - cleanRemoved: config.CleanRemoved, - stateChangeCloser: config.Close.OnStateChange, - } - filestream := &filestream{ readerConfig: config.Reader, encodingFactory: encodingFactory, @@ -123,7 +110,7 @@ func (inp *filestream) Test(src loginp.Source, ctx input.TestContext) error { return fmt.Errorf("not file source") } - reader, err := inp.open(ctx.Logger, ctx.Cancelation, fs.newPath, 0) + reader, err := inp.open(ctx.Logger, ctx.Cancelation, fs, 0) if err != nil { return err } @@ -144,7 +131,7 @@ func (inp *filestream) Run( log := ctx.Logger.With("path", fs.newPath).With("state-id", src.Name()) state := initState(log, cursor, fs) - r, err := inp.open(log, ctx.Cancelation, fs.newPath, state.Offset) + r, err := inp.open(log, ctx.Cancelation, fs, state.Offset) if err != nil { log.Errorf("File could not be opened for reading: %v", err) return err @@ -176,18 +163,30 @@ func initState(log *logp.Logger, c loginp.Cursor, s fileSource) state { return state } -func (inp *filestream) open(log *logp.Logger, canceler input.Canceler, path string, offset int64) (reader.Reader, error) { - f, err := inp.openFile(log, path, offset) +func (inp *filestream) open(log *logp.Logger, canceler input.Canceler, fs fileSource, offset int64) (reader.Reader, error) { + f, err := inp.openFile(log, fs.newPath, offset) if err != nil { return nil, err } log.Debug("newLogFileReader with config.MaxBytes:", inp.readerConfig.MaxBytes) + // if the file is archived, it means that it is not going to be updated in the future + // thus, when EOF is reached, it can be closed + closerCfg := inp.closerConfig + if fs.archived && !inp.closerConfig.Reader.OnEOF { + closerCfg = closerConfig{ + Reader: readerCloserConfig{ + OnEOF: true, + AfterInterval: inp.closerConfig.Reader.AfterInterval, + }, + OnStateChange: inp.closerConfig.OnStateChange, + } + } // TODO: NewLineReader uses additional buffering to deal with encoding and testing // for new lines in input stream. Simple 8-bit based encodings, or plain // don't require 'complicated' logic. - logReader, err := newFileReader(log, canceler, f, inp.readerConfig, inp.closerConfig) + logReader, err := newFileReader(log, canceler, f, inp.readerConfig, closerCfg) if err != nil { return nil, err } @@ -218,7 +217,7 @@ func (inp *filestream) open(log *logp.Logger, canceler input.Canceler, path stri r = readfile.NewStripNewline(r, inp.readerConfig.LineTerminator) - r = readfile.NewFilemeta(r, path) + r = readfile.NewFilemeta(r, fs.newPath) r, err = newParsers(r, parserConfig{maxBytes: inp.readerConfig.MaxBytes, lineTerminator: inp.readerConfig.LineTerminator}, inp.readerConfig.Parsers) if err != nil { diff --git a/filebeat/input/filestream/internal/input-logfile/fswatch.go b/filebeat/input/filestream/internal/input-logfile/fswatch.go index 56235e6c4bc5..9982f370e4f9 100644 --- a/filebeat/input/filestream/internal/input-logfile/fswatch.go +++ b/filebeat/input/filestream/internal/input-logfile/fswatch.go @@ -30,11 +30,32 @@ const ( OpDelete OpRename OpTruncate + OpArchived +) + +var ( + operationNames = map[Operation]string{ + OpDone: "done", + OpCreate: "create", + OpWrite: "write", + OpDelete: "delete", + OpRename: "rename", + OpTruncate: "truncate", + OpArchived: "archive", + } ) // Operation describes what happened to a file. type Operation uint8 +func (o *Operation) String() string { + name, ok := operationNames[*o] + if !ok { + return "" + } + return name +} + // FSEvent returns inforamation about file system changes. type FSEvent struct { // NewPath is the new path of the file. diff --git a/filebeat/input/filestream/internal/input-logfile/harvester.go b/filebeat/input/filestream/internal/input-logfile/harvester.go index 00b14bc498a8..926485ab1819 100644 --- a/filebeat/input/filestream/internal/input-logfile/harvester.go +++ b/filebeat/input/filestream/internal/input-logfile/harvester.go @@ -119,6 +119,8 @@ type HarvesterGroup interface { Start(input.Context, Source) // Restart starts a Harvester if it might be already running. Restart(input.Context, Source) + // Continue starts a new Harvester with the state information of the previous. + Continue(ctx input.Context, previous, next Source) // Stop cancels the reader of a given Source. Stop(Source) // StopGroup cancels all running Harvesters. @@ -217,6 +219,36 @@ func startHarvester(ctx input.Context, hg *defaultHarvesterGroup, s Source, rest } } +// Continue start a new Harvester with the state information from a different Source. +func (hg *defaultHarvesterGroup) Continue(ctx input.Context, previous, next Source) { + ctx.Logger.Debugf("Continue harvester for file prev=%s, next=%s", previous.Name(), next.Name()) + prevID := hg.identifier.ID(previous) + nextID := hg.identifier.ID(next) + + hg.tg.Go(func(canceler unison.Canceler) error { + previousResource, err := lock(ctx, hg.store, prevID) + if err != nil { + return fmt.Errorf("error while locking previous resource: %v", err) + } + // mark previous state out of date + // so when reading starts again the offset is set to zero + hg.store.remove(prevID) + + nextResource, err := lock(ctx, hg.store, nextID) + if err != nil { + return fmt.Errorf("error while locking next resource: %v", err) + } + hg.store.UpdateTTL(nextResource, hg.cleanTimeout) + + previousResource.copyInto(nextResource) + releaseResource(previousResource) + releaseResource(nextResource) + + hg.Start(ctx, next) + return nil + }) +} + // Stop stops the running Harvester for a given Source. func (hg *defaultHarvesterGroup) Stop(s Source) { hg.tg.Go(func(_ unison.Canceler) error { diff --git a/filebeat/input/filestream/internal/input-logfile/publish.go b/filebeat/input/filestream/internal/input-logfile/publish.go index ddc389321b1f..fa4950619196 100644 --- a/filebeat/input/filestream/internal/input-logfile/publish.go +++ b/filebeat/input/filestream/internal/input-logfile/publish.go @@ -128,7 +128,7 @@ func (op *updateOp) Execute(n uint) { resource.stateMutex.Lock() defer resource.stateMutex.Unlock() - if resource.lockedVersion != op.resource.version { + if resource.lockedVersion != op.resource.version || resource.isDeleted() { return } diff --git a/filebeat/input/filestream/internal/input-logfile/store.go b/filebeat/input/filestream/internal/input-logfile/store.go index fe149f59d779..398c7db00ff4 100644 --- a/filebeat/input/filestream/internal/input-logfile/store.go +++ b/filebeat/input/filestream/internal/input-logfile/store.go @@ -93,6 +93,9 @@ type resource struct { // stored indicates that the state is available in the registry file. It is false for new entries. stored bool + // invalid indicates if the resource has been marked for deletion, if yes, it cannot be overwritten + // in the persistent state. + invalid bool // internalInSync is true if all 'Internal' metadata like TTL or update timestamp are in sync. // Normally resources are added when being created. But if operations failed we will retry inserting @@ -291,8 +294,12 @@ func (s *store) updateMetadata(key string, meta interface{}) error { } // writeState writes the state to the persistent store. -// WARNING! it does not lock the store +// WARNING! it does not lock the store or the resource. func (s *store) writeState(r *resource) { + if r.invalid { + return + } + err := s.persistentStore.Set(r.key, r.inSyncStateSnapshot()) if err != nil { s.log.Errorf("Failed to update resource fields for '%v'", r.key) @@ -301,6 +308,7 @@ func (s *store) writeState(r *resource) { r.stored = true r.internalInSync = true } + } // resetCursor sets the cursor to the value in cur in the persistent store and @@ -332,7 +340,6 @@ func (s *store) remove(key string) error { if resource == nil { return fmt.Errorf("resource '%s' not found", key) } - s.UpdateTTL(resource, 0) return nil } @@ -341,6 +348,10 @@ func (s *store) remove(key string) error { // The TTL value is part of the internal state, and will be written immediately to the persistent store. // On update the resource its `cursor` state is used, to keep the cursor state in sync with the current known // on disk store state. +// +// If the TTL of the resource is set to 0, once it is persisted, it is going to be removed from the +// store in the next cleaner run. The resource also gets invalidated to make sure new updates are not +// saved to the registry. func (s *store) UpdateTTL(resource *resource, ttl time.Duration) { resource.stateMutex.Lock() defer resource.stateMutex.Unlock() @@ -354,6 +365,15 @@ func (s *store) UpdateTTL(resource *resource, ttl time.Duration) { } s.writeState(resource) + + if resource.isDeleted() { + // version must be incremented to make sure existing resource + // instances do not overwrite the removal of the entry + resource.version++ + // invalidate it after it has been persisted to make sure it cannot + //be overwritten in the persistent store + resource.invalid = true + } } // Find returns the resource for a given key. If the key is unknown and create is set to false nil will be returned. @@ -362,7 +382,7 @@ func (s *states) Find(key string, create bool) *resource { s.mu.Lock() defer s.mu.Unlock() - if resource := s.table[key]; resource != nil { + if resource := s.table[key]; resource != nil && !resource.isDeleted() { resource.Retain() return resource } @@ -389,6 +409,10 @@ func (r *resource) IsNew() bool { return r.pendingCursor == nil && r.cursor == nil } +func (r *resource) isDeleted() bool { + return !r.internalState.Updated.IsZero() && r.internalState.TTL == 0 +} + // Retain is used to indicate that 'resource' gets an additional 'owner'. // Owners of an resource can be active inputs or pending update operations // not yet written to disk. @@ -430,6 +454,27 @@ func (r *resource) inSyncStateSnapshot() state { } } +func (r *resource) copyInto(dst *resource) { + r.stateMutex.Lock() + defer r.stateMutex.Unlock() + + internalState := r.internalState + + // This is required to prevent the cleaner from removing the + // entry from the registry immediately. + // It still might be removed if the output is blocked for a long + // time. If removed the whole file is resent to the output when found/updated. + internalState.Updated = time.Now() + dst.stored = r.stored + dst.internalInSync = true + dst.internalState = internalState + dst.activeCursorOperations = r.activeCursorOperations + dst.cursor = r.cursor + dst.pendingCursor = nil + dst.cursorMeta = r.cursorMeta + dst.lock = unison.MakeMutex() +} + func (r *resource) copyWithNewKey(key string) *resource { internalState := r.internalState @@ -447,6 +492,7 @@ func (r *resource) copyWithNewKey(key string) *resource { cursor: r.cursor, pendingCursor: nil, cursorMeta: r.cursorMeta, + lock: unison.MakeMutex(), } } diff --git a/filebeat/input/filestream/logger.go b/filebeat/input/filestream/logger.go new file mode 100644 index 000000000000..ebb1a2bc71a1 --- /dev/null +++ b/filebeat/input/filestream/logger.go @@ -0,0 +1,41 @@ +// Licensed to Elasticsearch B.V. under one or more contributor +// license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright +// ownership. Elasticsearch B.V. licenses this file to you under +// the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package filestream + +import ( + loginp "github.com/elastic/beats/v7/filebeat/input/filestream/internal/input-logfile" + "github.com/elastic/beats/v7/libbeat/common/file" + "github.com/elastic/beats/v7/libbeat/logp" +) + +func loggerWithEvent(logger *logp.Logger, event loginp.FSEvent, src loginp.Source) *logp.Logger { + log := logger.With( + "operation", event.Op, + "source_name", src.Name(), + ) + if event.Info != nil && event.Info.Sys() != nil { + log = log.With("os_id", file.GetOSState(event.Info)) + } + if event.NewPath != "" { + log = log.With("new_path", event.NewPath) + } + if event.OldPath != "" { + log = log.With("old_path", event.OldPath) + } + return log +} diff --git a/filebeat/input/filestream/prospector.go b/filebeat/input/filestream/prospector.go index 97bf14efa7c0..a820758f6249 100644 --- a/filebeat/input/filestream/prospector.go +++ b/filebeat/input/filestream/prospector.go @@ -129,6 +129,8 @@ func (p *fileProspector) Run(ctx input.Context, s loginp.StateMetadataUpdater, h } src := p.identifier.GetSource(fe) + log = loggerWithEvent(log, fe, src) + switch fe.Op { case loginp.OpCreate, loginp.OpWrite: if fe.Op == loginp.OpCreate { @@ -143,15 +145,7 @@ func (p *fileProspector) Run(ctx input.Context, s loginp.StateMetadataUpdater, h log.Debugf("File %s has been updated", fe.NewPath) } - if p.ignoreOlder > 0 { - now := time.Now() - if now.Sub(fe.Info.ModTime()) > p.ignoreOlder { - log.Debugf("Ignore file because ignore_older reached. File %s", fe.NewPath) - break - } - } - if !ignoreInactiveSince.IsZero() && fe.Info.ModTime().Sub(ignoreInactiveSince) <= 0 { - log.Debugf("Ignore file because ignore_since.* reached time %v. File %s", p.ignoreInactiveSince, fe.NewPath) + if p.isFileIgnored(log, fe, ignoreInactiveSince) { break } @@ -166,58 +160,12 @@ func (p *fileProspector) Run(ctx input.Context, s loginp.StateMetadataUpdater, h case loginp.OpDelete: log.Debugf("File %s has been removed", fe.OldPath) - if p.stateChangeCloser.Removed { - log.Debugf("Stopping harvester as file %s has been removed and close.on_state_change.removed is enabled.", src.Name()) - hg.Stop(src) - } - - if p.cleanRemoved { - log.Debugf("Remove state for file as file removed: %s", fe.OldPath) - - err := s.Remove(src) - if err != nil { - log.Errorf("Error while removing state from statestore: %v", err) - } - } + p.onRemove(log, fe, src, s, hg) case loginp.OpRename: log.Debugf("File %s has been renamed to %s", fe.OldPath, fe.NewPath) - // if file_identity is based on path, the current reader has to be cancelled - // and a new one has to start. - if !p.identifier.Supports(trackRename) { - prevSrc := p.identifier.GetSource(loginp.FSEvent{NewPath: fe.OldPath}) - hg.Stop(prevSrc) - - log.Debugf("Remove state for file as file renamed and path file_identity is configured: %s", fe.OldPath) - err := s.Remove(prevSrc) - if err != nil { - log.Errorf("Error while removing old state of renamed file (%s): %v", fe.OldPath, err) - } - - hg.Start(ctx, src) - } else { - // update file metadata as the path has changed - var meta fileMeta - err := s.FindCursorMeta(src, meta) - if err != nil { - log.Errorf("Error while getting cursor meta data of entry %s: %v", src.Name(), err) - - meta.IdentifierName = p.identifier.Name() - } - err = s.UpdateMetadata(src, fileMeta{Source: src.newPath, IdentifierName: meta.IdentifierName}) - if err != nil { - log.Errorf("Failed to update cursor meta data of entry %s: %v", src.Name(), err) - } - - if p.stateChangeCloser.Renamed { - log.Debugf("Stopping harvester as file %s has been renamed and close.on_state_change.renamed is enabled.", src.Name()) - - fe.Op = loginp.OpDelete - srcToClose := p.identifier.GetSource(fe) - hg.Stop(srcToClose) - } - } + p.onRename(log, ctx, fe, src, s, hg) default: log.Error("Unkown return value %v", fe.Op) @@ -232,6 +180,75 @@ func (p *fileProspector) Run(ctx input.Context, s loginp.StateMetadataUpdater, h } } +func (p *fileProspector) isFileIgnored(log *logp.Logger, fe loginp.FSEvent, ignoreInactiveSince time.Time) bool { + if p.ignoreOlder > 0 { + now := time.Now() + if now.Sub(fe.Info.ModTime()) > p.ignoreOlder { + log.Debugf("Ignore file because ignore_older reached. File %s", fe.NewPath) + return true + } + } + if !ignoreInactiveSince.IsZero() && fe.Info.ModTime().Sub(ignoreInactiveSince) <= 0 { + log.Debugf("Ignore file because ignore_since.* reached time %v. File %s", p.ignoreInactiveSince, fe.NewPath) + return true + } + return false +} + +func (p *fileProspector) onRemove(log *logp.Logger, fe loginp.FSEvent, src loginp.Source, s loginp.StateMetadataUpdater, hg loginp.HarvesterGroup) { + if p.stateChangeCloser.Removed { + log.Debugf("Stopping harvester as file %s has been removed and close.on_state_change.removed is enabled.", src.Name()) + hg.Stop(src) + } + + if p.cleanRemoved { + log.Debugf("Remove state for file as file removed: %s", fe.OldPath) + + err := s.Remove(src) + if err != nil { + log.Errorf("Error while removing state from statestore: %v", err) + } + } +} + +func (p *fileProspector) onRename(log *logp.Logger, ctx input.Context, fe loginp.FSEvent, src loginp.Source, s loginp.StateMetadataUpdater, hg loginp.HarvesterGroup) { + // if file_identity is based on path, the current reader has to be cancelled + // and a new one has to start. + if !p.identifier.Supports(trackRename) { + prevSrc := p.identifier.GetSource(loginp.FSEvent{NewPath: fe.OldPath}) + hg.Stop(prevSrc) + + log.Debugf("Remove state for file as file renamed and path file_identity is configured: %s", fe.OldPath) + err := s.Remove(prevSrc) + if err != nil { + log.Errorf("Error while removing old state of renamed file (%s): %v", fe.OldPath, err) + } + + hg.Start(ctx, src) + } else { + // update file metadata as the path has changed + var meta fileMeta + err := s.FindCursorMeta(src, meta) + if err != nil { + log.Errorf("Error while getting cursor meta data of entry %s: %v", src.Name(), err) + + meta.IdentifierName = p.identifier.Name() + } + err = s.UpdateMetadata(src, fileMeta{Source: fe.NewPath, IdentifierName: meta.IdentifierName}) + if err != nil { + log.Errorf("Failed to update cursor meta data of entry %s: %v", src.Name(), err) + } + + if p.stateChangeCloser.Renamed { + log.Debugf("Stopping harvester as file %s has been renamed and close.on_state_change.renamed is enabled.", src.Name()) + + fe.Op = loginp.OpDelete + srcToClose := p.identifier.GetSource(fe) + hg.Stop(srcToClose) + } + } +} + func (p *fileProspector) stopHarvesterGroup(log *logp.Logger, hg loginp.HarvesterGroup) { err := hg.StopGroup() if err != nil { diff --git a/filebeat/input/filestream/prospector_creator.go b/filebeat/input/filestream/prospector_creator.go new file mode 100644 index 000000000000..59f86d1426a8 --- /dev/null +++ b/filebeat/input/filestream/prospector_creator.go @@ -0,0 +1,106 @@ +// Licensed to Elasticsearch B.V. under one or more contributor +// license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright +// ownership. Elasticsearch B.V. licenses this file to you under +// the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package filestream + +import ( + "fmt" + "regexp" + "sync" + + loginp "github.com/elastic/beats/v7/filebeat/input/filestream/internal/input-logfile" + "github.com/elastic/beats/v7/libbeat/common/cfgwarn" +) + +const ( + externalMode = "external" + internalMode = "internal" + + copytruncateStrategy = "copytruncate" +) + +var ( + experimentalWarning sync.Once +) + +func newProspector(config config) (loginp.Prospector, error) { + filewatcher, err := newFileWatcher(config.Paths, config.FileWatcher) + if err != nil { + return nil, fmt.Errorf("error while creating filewatcher %v", err) + } + + identifier, err := newFileIdentifier(config.FileIdentity) + if err != nil { + return nil, fmt.Errorf("error while creating file identifier: %v", err) + } + + fileprospector := fileProspector{ + filewatcher: filewatcher, + identifier: identifier, + ignoreOlder: config.IgnoreOlder, + cleanRemoved: config.CleanRemoved, + stateChangeCloser: config.Close.OnStateChange, + } + if config.Rotation == nil { + return &fileprospector, nil + } + + rotationMethod := config.Rotation.Name() + switch rotationMethod { + case "": + return &fileprospector, nil + + case internalMode: + return nil, fmt.Errorf("not implemented: internal log rotation") + + case externalMode: + externalConfig := config.Rotation.Config() + cfg := rotationConfig{} + err := externalConfig.Unpack(&cfg) + if err != nil { + return nil, fmt.Errorf("failed to unpack configuration of external rotation: %+v", err) + } + strategy := cfg.Strategy.Name() + switch strategy { + case copytruncateStrategy: + experimentalWarning.Do(func() { + cfgwarn.Experimental("rotation.external.copytruncate is used.") + }) + + cpCfg := ©TruncateConfig{} + err = cfg.Strategy.Config().Unpack(&cpCfg) + if err != nil { + return nil, fmt.Errorf("failed to unpack configuration of external copytruncate rotation: %+v", err) + } + suffix, err := regexp.Compile(cpCfg.SuffixRegex) + if err != nil { + return nil, fmt.Errorf("invalid suffix regex for copytruncate rotation") + } + fileprospector.stateChangeCloser.Renamed = false + return ©TruncateFileProspector{ + fileprospector, + suffix, + newRotatedFilestreams(cpCfg), + }, nil + default: + } + return nil, fmt.Errorf("no such external rotation strategy: %s", strategy) + + default: + } + return nil, fmt.Errorf("no such rotation method: %s", rotationMethod) +} diff --git a/filebeat/input/filestream/prospector_test.go b/filebeat/input/filestream/prospector_test.go index ffcdbcf31f9f..83dc2055df0c 100644 --- a/filebeat/input/filestream/prospector_test.go +++ b/filebeat/input/filestream/prospector_test.go @@ -170,8 +170,8 @@ func TestProspectorNewAndUpdatedFiles(t *testing.T) { }{ "two new files": { events: []loginp.FSEvent{ - loginp.FSEvent{Op: loginp.OpCreate, NewPath: "/path/to/file"}, - loginp.FSEvent{Op: loginp.OpCreate, NewPath: "/path/to/other/file"}, + loginp.FSEvent{Op: loginp.OpCreate, NewPath: "/path/to/file", Info: testFileInfo{}}, + loginp.FSEvent{Op: loginp.OpCreate, NewPath: "/path/to/other/file", Info: testFileInfo{}}, }, expectedEvents: []harvesterEvent{ harvesterStart("path::/path/to/file"), @@ -181,7 +181,7 @@ func TestProspectorNewAndUpdatedFiles(t *testing.T) { }, "one updated file": { events: []loginp.FSEvent{ - loginp.FSEvent{Op: loginp.OpWrite, NewPath: "/path/to/file"}, + loginp.FSEvent{Op: loginp.OpWrite, NewPath: "/path/to/file", Info: testFileInfo{}}, }, expectedEvents: []harvesterEvent{ harvesterStart("path::/path/to/file"), @@ -190,8 +190,8 @@ func TestProspectorNewAndUpdatedFiles(t *testing.T) { }, "one updated then truncated file": { events: []loginp.FSEvent{ - loginp.FSEvent{Op: loginp.OpWrite, NewPath: "/path/to/file"}, - loginp.FSEvent{Op: loginp.OpTruncate, NewPath: "/path/to/file"}, + loginp.FSEvent{Op: loginp.OpWrite, NewPath: "/path/to/file", Info: testFileInfo{}}, + loginp.FSEvent{Op: loginp.OpTruncate, NewPath: "/path/to/file", Info: testFileInfo{}}, }, expectedEvents: []harvesterEvent{ harvesterStart("path::/path/to/file"), @@ -204,12 +204,12 @@ func TestProspectorNewAndUpdatedFiles(t *testing.T) { loginp.FSEvent{ Op: loginp.OpCreate, NewPath: "/path/to/file", - Info: testFileInfo{"/path/to/file", 5, minuteAgo}, + Info: testFileInfo{"/path/to/file", 5, minuteAgo, nil}, }, loginp.FSEvent{ Op: loginp.OpWrite, NewPath: "/path/to/other/file", - Info: testFileInfo{"/path/to/other/file", 5, minuteAgo}, + Info: testFileInfo{"/path/to/other/file", 5, minuteAgo, nil}, }, }, ignoreOlder: 10 * time.Second, @@ -222,12 +222,12 @@ func TestProspectorNewAndUpdatedFiles(t *testing.T) { loginp.FSEvent{ Op: loginp.OpCreate, NewPath: "/path/to/file", - Info: testFileInfo{"/path/to/file", 5, minuteAgo}, + Info: testFileInfo{"/path/to/file", 5, minuteAgo, nil}, }, loginp.FSEvent{ Op: loginp.OpWrite, NewPath: "/path/to/other/file", - Info: testFileInfo{"/path/to/other/file", 5, minuteAgo}, + Info: testFileInfo{"/path/to/other/file", 5, minuteAgo, nil}, }, }, ignoreOlder: 5 * time.Minute, @@ -265,13 +265,13 @@ func TestProspectorDeletedFile(t *testing.T) { }{ "one deleted file without clean removed": { events: []loginp.FSEvent{ - loginp.FSEvent{Op: loginp.OpDelete, OldPath: "/path/to/file"}, + loginp.FSEvent{Op: loginp.OpDelete, OldPath: "/path/to/file", Info: testFileInfo{}}, }, cleanRemoved: false, }, "one deleted file with clean removed": { events: []loginp.FSEvent{ - loginp.FSEvent{Op: loginp.OpDelete, OldPath: "/path/to/file"}, + loginp.FSEvent{Op: loginp.OpDelete, OldPath: "/path/to/file", Info: testFileInfo{}}, }, cleanRemoved: true, }, @@ -318,6 +318,7 @@ func TestProspectorRenamedFile(t *testing.T) { Op: loginp.OpRename, OldPath: "/old/path/to/file", NewPath: "/new/path/to/file", + Info: testFileInfo{}, }, }, expectedEvents: []harvesterEvent{ @@ -332,6 +333,7 @@ func TestProspectorRenamedFile(t *testing.T) { Op: loginp.OpRename, OldPath: "/old/path/to/file", NewPath: "/new/path/to/file", + Info: testFileInfo{}, }, }, trackRename: true, @@ -345,6 +347,7 @@ func TestProspectorRenamedFile(t *testing.T) { Op: loginp.OpRename, OldPath: "/old/path/to/file", NewPath: "/new/path/to/file", + Info: testFileInfo{}, }, }, trackRename: true, @@ -396,6 +399,10 @@ type harvesterRestart string func (h harvesterRestart) String() string { return string(h) } +type harvesterContinue string + +func (h harvesterContinue) String() string { return string(h) } + type harvesterStop string func (h harvesterStop) String() string { return string(h) } @@ -420,6 +427,10 @@ func (t *testHarvesterGroup) Restart(_ input.Context, s loginp.Source) { t.events = append(t.events, harvesterRestart(s.Name())) } +func (t *testHarvesterGroup) Continue(_ input.Context, p, s loginp.Source) { + t.events = append(t.events, harvesterContinue(p.Name()+" -> "+s.Name())) +} + func (t *testHarvesterGroup) Stop(s loginp.Source) { t.events = append(t.events, harvesterStop(s.Name())) } diff --git a/x-pack/filebeat/filebeat.reference.yml b/x-pack/filebeat/filebeat.reference.yml index e90809b1503c..f55994e9c83c 100644 --- a/x-pack/filebeat/filebeat.reference.yml +++ b/x-pack/filebeat/filebeat.reference.yml @@ -2750,6 +2750,16 @@ filebeat.inputs: # original for harvesting but will report the symlink name as source. #prospector.scanner.symlinks: false + ### Log rotation + + # When an external tool rotates the input files with copytruncate strategy + # use this section to help the input find the rotated files. + #rotation.external.strategy.copytruncate: + # Regex that matches the rotated files. + # suffix_regex: \.\d$ + # If the rotated filename suffix is a datetime, set it here. + # dateformat: -20060102 + ### State options # Files for the modification data is older then clean_inactive the state from the registry is removed From 11064491efcae552cf67f0606ff4efb60e3d41a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?No=C3=A9mi=20V=C3=A1nyi?= Date: Thu, 24 Jun 2021 15:57:15 +0200 Subject: [PATCH 4/7] Do not close filestream harvester if an unexpected error is returned when close.on_state_change.* is enabled (#26411) ## What does this PR do? This PR returns early if `close.on_state_change.removed` is enabled and the opened file no longer exists. Otherwise, it logs an error message and keeps the reader running. ## Why is it important? Previously, a message has been logged on error level and the reader has been stopped if the `Stat` call returned an error. However, it was not correct because if `close.on_state_change.renamed` was enabled the reader would have been closed if the file had been removed. Now the reader is not stopped. --- CHANGELOG.next.asciidoc | 1 + filebeat/input/filestream/filestream.go | 13 ++++++++++--- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.next.asciidoc b/CHANGELOG.next.asciidoc index 4653100fedd2..c172c1b03587 100644 --- a/CHANGELOG.next.asciidoc +++ b/CHANGELOG.next.asciidoc @@ -277,6 +277,7 @@ https://github.com/elastic/beats/compare/v7.0.0-alpha2...master[Check the HEAD d - Fix bug in aws-s3 input where the end of gzipped log files might have been discarded. {pull}26260[26260] - Fix bug in `httpjson` that prevented `first_event` getting updated. {pull}26407[26407] - Fix bug in the Syslog input that misparsed rfc5424 days starting with 0. {pull}26419[26419] +- Do not close filestream harvester if an unexpected error is returned when close.on_state_change.* is enabled. {pull}26411[26411] *Filebeat* diff --git a/filebeat/input/filestream/filestream.go b/filebeat/input/filestream/filestream.go index 908d85581459..5e72dc927b95 100644 --- a/filebeat/input/filestream/filestream.go +++ b/filebeat/input/filestream/filestream.go @@ -179,14 +179,21 @@ func (f *logFile) shouldBeClosed() bool { info, statErr := f.file.Stat() if statErr != nil { + // return early if the file does not exist anymore and the reader should be closed + if f.closeRemoved && errors.Is(statErr, os.ErrNotExist) { + f.log.Debugf("close.on_state_change.removed is enabled and file %s has been removed", f.file.Name()) + return true + } + + // If an unexpected error happens we keep the reader open hoping once everything will go back to normal. f.log.Errorf("Unexpected error reading from %s; error: %s", f.file.Name(), statErr) - return true + return false } if f.closeRenamed { // Check if the file can still be found under the same path if !isSameFile(f.file.Name(), info) { - f.log.Debugf("close_renamed is enabled and file %s has been renamed", f.file.Name()) + f.log.Debugf("close.on_state_change.renamed is enabled and file %s has been renamed", f.file.Name()) return true } } @@ -194,7 +201,7 @@ func (f *logFile) shouldBeClosed() bool { if f.closeRemoved { // Check if the file name exists. See https://github.com/elastic/filebeat/issues/93 if file.IsRemoved(f.file) { - f.log.Debugf("close_removed is enabled and file %s has been removed", f.file.Name()) + f.log.Debugf("close.on_state_change.removed is enabled and file %s has been removed", f.file.Name()) return true } } From 1591c15ab83c42bee4f5d4a1a103cf4891572e9c Mon Sep 17 00:00:00 2001 From: HeGaoYuan Date: Thu, 24 Jun 2021 22:23:49 +0800 Subject: [PATCH 5/7] Update indexing.go godocs (#26408) --- libbeat/processors/add_kubernetes_metadata/indexing.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/libbeat/processors/add_kubernetes_metadata/indexing.go b/libbeat/processors/add_kubernetes_metadata/indexing.go index 19f66ea12125..1b1e6a36fb59 100644 --- a/libbeat/processors/add_kubernetes_metadata/indexing.go +++ b/libbeat/processors/add_kubernetes_metadata/indexing.go @@ -62,21 +62,21 @@ func (r *Register) AddMatcher(name string, matcher MatcherConstructor) { r.matchers[name] = matcher } -// AddIndexer to the register +// AddDefaultIndexerConfig to the register func (r *Register) AddDefaultIndexerConfig(name string, config common.Config) { r.Lock() defer r.Unlock() r.defaultIndexerConfigs[name] = config } -// AddMatcher to the register +// AddDefaultMatcherConfig to the register func (r *Register) AddDefaultMatcherConfig(name string, config common.Config) { r.Lock() defer r.Unlock() r.defaultMatcherConfigs[name] = config } -// AddIndexer to the register +// GetIndexer from the register func (r *Register) GetIndexer(name string) IndexerConstructor { r.RLock() defer r.RUnlock() @@ -88,7 +88,7 @@ func (r *Register) GetIndexer(name string) IndexerConstructor { } } -// AddMatcher to the register +// GetMatcher from the register func (r *Register) GetMatcher(name string) MatcherConstructor { r.RLock() defer r.RUnlock() From 1ce38f43b16ba9f8a50a46456864ad0487bfb36e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Manuel=20de=20la=20Pe=C3=B1a?= Date: Thu, 24 Jun 2021 18:47:00 +0200 Subject: [PATCH 6/7] fix: update MSSQL Server linux image's Docker registry (#26440) * chore: add supported versions for mssql * fix: update repository for MS SQL Server linux image --- x-pack/metricbeat/module/mssql/_meta/Dockerfile | 2 +- x-pack/metricbeat/module/mssql/_meta/supported-versions.yml | 2 ++ x-pack/metricbeat/module/mssql/docker-compose.yml | 4 ++-- 3 files changed, 5 insertions(+), 3 deletions(-) create mode 100644 x-pack/metricbeat/module/mssql/_meta/supported-versions.yml diff --git a/x-pack/metricbeat/module/mssql/_meta/Dockerfile b/x-pack/metricbeat/module/mssql/_meta/Dockerfile index dded9f0f2a5a..29c116faf712 100644 --- a/x-pack/metricbeat/module/mssql/_meta/Dockerfile +++ b/x-pack/metricbeat/module/mssql/_meta/Dockerfile @@ -1,5 +1,5 @@ ARG MSSQL_VERSION -FROM microsoft/mssql-server-linux:${MSSQL_VERSION} +FROM mcr.microsoft.com/mssql/server:${MSSQL_VERSION} ENV ACCEPT_EULA='Y' ENV SA_PASSWORD='1234_asdf' diff --git a/x-pack/metricbeat/module/mssql/_meta/supported-versions.yml b/x-pack/metricbeat/module/mssql/_meta/supported-versions.yml new file mode 100644 index 000000000000..e9f409a28750 --- /dev/null +++ b/x-pack/metricbeat/module/mssql/_meta/supported-versions.yml @@ -0,0 +1,2 @@ +variants: + - MSSQL_VERSION: 2017-GA-ubuntu diff --git a/x-pack/metricbeat/module/mssql/docker-compose.yml b/x-pack/metricbeat/module/mssql/docker-compose.yml index e9362795a6a0..9fce8257135e 100644 --- a/x-pack/metricbeat/module/mssql/docker-compose.yml +++ b/x-pack/metricbeat/module/mssql/docker-compose.yml @@ -2,10 +2,10 @@ version: '2.3' services: mssql: - image: docker.elastic.co/integrations-ci/beats-mssql:${MSSQL_VERSION:-2017-GA}-1 + image: docker.elastic.co/integrations-ci/beats-mssql:${MSSQL_VERSION:-2017-GA-ubuntu}-1 build: context: ./_meta args: - MSSQL_VERSION: ${MSSQL_VERSION:-2017-GA} + MSSQL_VERSION: ${MSSQL_VERSION:-2017-GA-ubuntu} ports: - 1433 From 4aff295660ea6cd02858ee2f01af4d9dbc6b4df1 Mon Sep 17 00:00:00 2001 From: Alex Resnick Date: Thu, 24 Jun 2021 11:57:31 -0500 Subject: [PATCH 7/7] [Filebeat] Parse additonal debug data fields for Okta module (#25818) * #25689: Parse additonal debug data fields for Okta module * update generated data * update changelog * added additional test data & `uri_parts` processor * update fields * fix changelog * update fields Co-authored-by: Marius Iversen --- CHANGELOG.next.asciidoc | 2 +- filebeat/docs/fields.asciidoc | 127 ++++++++++ x-pack/filebeat/module/okta/fields.go | 2 +- .../module/okta/system/_meta/fields.yml | 66 ++++++ .../module/okta/system/ingest/pipeline.yml | 104 +++++++-- .../system/test/okta-system-test.json.log | 2 + .../okta-system-test.json.log-expected.json | 217 ++++++++++++++++++ 7 files changed, 498 insertions(+), 22 deletions(-) diff --git a/CHANGELOG.next.asciidoc b/CHANGELOG.next.asciidoc index c172c1b03587..1e083caa4223 100644 --- a/CHANGELOG.next.asciidoc +++ b/CHANGELOG.next.asciidoc @@ -827,8 +827,8 @@ https://github.com/elastic/beats/compare/v7.0.0-alpha2...master[Check the HEAD d - Add `include_s3_metadata` config option to the `aws-s3` input for including object metadata in events. {pull}26267[26267] - RFC 5424 and UNIX socket support in the Syslog input are now GA {pull}26293[26293] - Update grok patterns for HA Proxy module {issue}25827[25827] {pull}25835[25835] +- Update Okta module to parse additional fields to `okta.debug_context.debug_data`. {issue}25689[25689] {pull}25818[25818] - Added dataset `anomalithreatstream` to the `threatintel` module to ingest indicators from Anomali ThreatStream {pull}26350[26350] - - Add support for `copytruncate` method when rotating input logs with an external tool in `filestream` input. {pull}23457[23457] *Heartbeat* diff --git a/filebeat/docs/fields.asciidoc b/filebeat/docs/fields.asciidoc index 7bcfbb6d16ed..c983f91aca68 100644 --- a/filebeat/docs/fields.asciidoc +++ b/filebeat/docs/fields.asciidoc @@ -113109,6 +113109,133 @@ type: keyword -- +[float] +=== suspicious_activity + +The suspicious activity fields from the debug data. + + + +*`okta.debug_context.debug_data.suspicious_activity.browser`*:: ++ +-- +The browser used. + + +type: keyword + +-- + +*`okta.debug_context.debug_data.suspicious_activity.event_city`*:: ++ +-- +The city where the suspicious activity took place. + + +type: keyword + +-- + +*`okta.debug_context.debug_data.suspicious_activity.event_country`*:: ++ +-- +The country where the suspicious activity took place. + + +type: keyword + +-- + +*`okta.debug_context.debug_data.suspicious_activity.event_id`*:: ++ +-- +The event ID. + + +type: keyword + +-- + +*`okta.debug_context.debug_data.suspicious_activity.event_ip`*:: ++ +-- +The IP of the suspicious event. + + +type: ip + +-- + +*`okta.debug_context.debug_data.suspicious_activity.event_latitude`*:: ++ +-- +The latitude where the suspicious activity took place. + + +type: float + +-- + +*`okta.debug_context.debug_data.suspicious_activity.event_longitude`*:: ++ +-- +The longitude where the suspicious activity took place. + + +type: float + +-- + +*`okta.debug_context.debug_data.suspicious_activity.event_state`*:: ++ +-- +The state where the suspicious activity took place. + + +type: keyword + +-- + +*`okta.debug_context.debug_data.suspicious_activity.event_transaction_id`*:: ++ +-- +The event transaction ID. + + +type: keyword + +-- + +*`okta.debug_context.debug_data.suspicious_activity.event_type`*:: ++ +-- +The event type. + + +type: keyword + +-- + +*`okta.debug_context.debug_data.suspicious_activity.os`*:: ++ +-- +The OS of the system from where the suspicious activity occured. + + +type: keyword + +-- + +*`okta.debug_context.debug_data.suspicious_activity.timestamp`*:: ++ +-- +The timestamp of when the activity occurred. + + +type: date + +-- + [float] === authentication_context diff --git a/x-pack/filebeat/module/okta/fields.go b/x-pack/filebeat/module/okta/fields.go index 749c3ee54dbd..2a925de84d24 100644 --- a/x-pack/filebeat/module/okta/fields.go +++ b/x-pack/filebeat/module/okta/fields.go @@ -19,5 +19,5 @@ func init() { // AssetOkta returns asset data. // This is the base64 encoded gzipped contents of module/okta. func AssetOkta() string { - return "eJzsWk1zozwSvudXdOXMm8N+zMGHrSKGZJjYxgU4rpwoBdq2Nhh5JJGM59dviQ8bAwZik633MNyMpOd5utVqtYT/gjfcj4C9SXIDIKmMcAR29itEEXC6k5TFI/jPDQDAlIVJhLBiHDYkDiMar0HshcQtRGwtYMXZNh1+dwOwohiFYpQO/AtissUDkXrkfocjWHOW7PI3Ia5IEkk/HTiCFYkEHppqWtTzkFKc0qqnTF2mTxIaHl4e7F0sLKP0VmwYlyPwNghJTH8mCDTEWNIVRQ5sBXKDKRlM2Np8x1jelQafEaqeywAzN73h/oPxo/aaZajG+apz3b4UE7zTtpKValSh4wKbuob3tOAduaAsrst/rjWUtOejrpDfA6GnBQLfkVO5r5vg1ltKNhTjrjDiLARMEyHhFYHF6SQZ5v3iUQNr9mBrsNSdmQaMg+k4tnOBxSEVu4js/S0KQdYNoWdkHWBa61CyP0eBHOUKN3wCqaeFJJCM1+3SK69za/J8lNOmY9NkeYE5OZTcEAkRStizBIRkHIHGK8a3RJaCtpupnm2hIU+WmgoHnCTMc27rsAXAqiW8VPJdG3MlmV3M7ZXyUzcriSTymEj0h7FcL/Aakn63mmKBqV9DqCnWo8LrUlFoCCKKsayvgXH1/ekiIK8skSlBBnCGboBF0Jvp8kWwa3B+5WV7DG4QrDmQMOQoDgkik9saAIlA7pP1qaM7yGoToEAgBWlKHlUV5z3V6qujZk4+/DO62wK3w6rMiZx8HGw4mnA0sNGZpfpTDCvHdktqOrhfOfsQyIcVkIM2+KRHcP1m8TAZdoMpVp/oakpy+E6DwYSc1ZDztEoZJuenMmrZ/qwnDsGZyIBtG6ooO2toK2dOc2/vhJlTflE6bkC/OAVzJOLkEHDV7GRohUNrOhv5RRLJ4fgVWpW/UqyPwF2Mx6bravCgW5OFY2rgPlnzuWlooE8m9lIDw5y9aDD+rk8m5uzR1GAxe5rZy1lLmEnC19iwn3vV96USPaIiU5v2EZ8oyVtGZv5bRURKjDH8U5f+qUs/U5dKTmJBAtl4aeA1Nl6QJTlGRGJYZvuCbNnB8rc7vTXq/Mq1Ur5nKnFXrzdul+b9rQa3P+z725bQCfE1WfsBiyX+asiEhmqGca35wk02ZYOc7Yu22rMcF4dO5qOQHC5se05TpkSNG/BIkZVu/orGa+Q7Toc+VpSAixBvqRbLVcHPBIWs5uSr9dRzcs7UU0/C6cDnrgwYFo7VIUFuOBLpi0TsMJA4qGMUMhyQO4QkPBrWBwtn0pJTSCI3atKCdGmeTy76Sb9hsswp9xemmm6ii/NNxX87zt5pWDkuD3QsPKbNij0FaW1fsZ883dcX3ndz5llj3bPsmT937GfLMB0N9LFnPZu+YTnm2LOdFw0mhj7X4ME0TCftrIFrjy19oip51eUwuL3mO3WJkNh4DxVLXFcc1emOiuEKulVKwDFNSST6P8zMkaxjRsaOaagZ0Sel2XBcXQP3ZarPPHOswaNtP05MDYyFrcHL4t56Ml/6mjpk+dJqpsKtm+jNNXCnrgZz3XWXtqOOfa5rOllEWUtdA3OqWxMNbBWb/9Dgx9LTYKx6PKgwNTWYO6bvftcd0/Ddl+nU9Bxr7D+ZL1ruwYllzjzfNV03BTXMZ2ts+gvDMtovSoRIPhtzzUswQ2qqFQjnZP/5WuHr9+Kq5OZ9sBo7Vwspf2htkXD4JPwrPeFFvkAhVP4Y7nIrRyxcc/RMwXnssQIS79tDKZbIV2TIG8AcEBKB4R3g3fpOAzuREWNvGtirFQ3wn9/+rcGH8Hgi5PlNXWCQcCr357dzN+8xzEZe8H3hFt5GcfnmLXrV+n22JRazLUtE/r+Su8+v/zjZvp65Zm/aKnstPd3NYbu+L/A1ielvUrl3aPNJL/4ybjbN7CMW6ZJrkNbDWSV3Va9woCNTdQpukKxIOvaTptLmouVvqeUfowQXuTrCwbyoIVqPu2xL6GD3yxlaH7NVOfVr38D7yliEJO7Pu9yg3CAHKoEKIJACA+MQs7avDvnJrp7ZnFrDhfcftaProDcfOboGNE5/qz5qOyoununODzaEDnSfloP1n5VJRQaw1/9icHoRfuXlCK0mlMbv053L9vh9uiPJ1f+pBdcVN9a8wKzVwM//0uD5W4cgwRIeDFhsuSleOd46BKyRrTnZbWhAooZSoQflYwmhmbhsVdMu0jPdB6d/Q+v2VUW6ym4K44xHSpMiibxqY0kLTQXSSbVjQqaOD68mzKBAQXXSBiyJJb/emxlMJ90aWcSCptKix478eBzcFl6HAEPm7xiN5c3/AgAA//++EKJK" + return "eJzsWktzq7gS3p9f0XXWTBb3MYssbhXHJhkmjnGBnVRWlAJtWzcYeSSRHM+vv8XTGImHbTx1F5NdDPq+r1utVqvFL/CBh3tgH5J8A5BURngPTv5fiCLgdC8pi+/hP98AAJ5ZmEQIa8ZhS+IwovEGxEFI3EHENgLWnO2y4XffANYUo1DcZwN/gZjssCJK/+Rhj/ew4SzZF7+EuCZJJP1s4D2sSSSweqRoSf8eMopT2vSvTl2nTxIaVj9W9q5W9rT2q9gyLu9huUVIYvpHgkBDjCVdU+TA1iC3mJHBjG2sT4zlXW1wi9D07zLA3E0fePhi/KhdsQzTcX76smpfhgnL02c1K9NRpY4LbOobPtCCT+SCsliV/6I8qGkvRl0hfwDCQAsEfiKn8qCa4KlPajaU464wohUCnhMh4R2BxdkkTa0fq0cD7PmDY8Cr6c4NYBws13XcCywOqdhH5ODvUAiy0YTeNH8BnpUXavYXKFCgXOGGM5AGWkgCybhql9n4ubCmyEcFbTY2S5YXmFNAyS2REKGEA0tASMYRaLxmfEdkLWj7mdRsC5o8WXtUOuAkYba5rccWAFtJeJnkuy7mRjK7mHtZy0/9rCSSyGMi0R/HcrPE0yT9fjXlAkv/G0NNuR5TvD4VpYYgohhLdQ1Mmr+fLgLyzhKZEeQALXQjLILBTJcvgr3G+Y0fu2Nwi2AvgIQhR1EliFxuZwAkArlPNqeO7iFTJiAFgQxElzyaKto91emro2ZOvvwW3V2B22NV7kROviobjiYcDdQ6s1Z/inHlOF5NTQ/3O2dfAvm4AgpQjU8GBNefLB4nw24xwxoSXbokh580GE1Iq4aCp1PKODk/k6Fk+1ZPVMGZyIDtNFWUkz/oKmdOc+/ghFlQ3igda9AvTsEciTg5BFw1Ozla6VBFp5ZfJJEcjz9Fa/I3ivV78FaTieV5BjyY9mzlWgZ4T/ZiYU0NMGcz59WAqTV/M2DymzmbWfNHy4DV/GnuvM47wkwSvkHNfr5s/l4r0SMqcrXZO+KMkrxjZO6/dUSkxBjDv+vSv+vSc+pSyUksSCC1TYOl9uEFWZJjRCSGdbYbZMselv+705tW5y3XSr3PVONutje+v1o/vhvw/Xfnx/eO0AnxPdn4AYsl/tRkwmn6GCbK4ws32YwNCrYbbbWtHBeHTu6jkFQN24HTlCtJx414pMhLN39N4w3yPadjHytqwGWId1SL9argjwSFbObkq/WoOblgGqgn4XTkc1cODCvX7pEgtxyJ9EUi9hhIHNUxKTJUyD1CEh6N64OVO+uhTKXRgLJE+GmC+jxt+Q6kOYJACVIskvyiQ3YssvZl1rPQoPeg2ufAAdZB49CaiNZJBOVuI1CdOaKkFB2+tsgx869uDiRjH7CPSGtS0GhmSSz5LWXnBOMrVxLaiKIzBrCnw8U0QxnamnBn6bAXZXKtOQ0/27tIqrKISCqTEFv1rSNGmnvVGRJL/PEnOGLx5rbSS4LxtQtJZLvuq+Mzgx9fda1u/QuWV43tnKWmFOy3kHbYD/Ga0qsdUYjjVUs//3wh21m7p5wFQcIHbViS7lBIsmtPW6E+gAeqr/BTI762GJfn5ZpQvdKqm5DIbVphBtk5ov0kZJ68N86R6JT7hueifqKLD0cN/+05+6Rho2QaqYd9POM17ClJlUOw87Q0fXO1/M2aL+2JubSdub9wnRd7arkGmJOl/WL5U9u1JkvHfTNgNjUXBjxYU8vNXjbAcya2OTPgwUxfqQZ3N6hOXSIkai/NYombhqN63dEwPIXulBJwzM5PJPoLZuZI1jMjE9eapjNizmqz4XqmAd7bszlfWhMDHh3ncWYZMF05BrytfthP1ttQU8fstXSameVvxcTlwgDv2TNgYXreq+NODTA9z3LziLJfTQOsZ9OeGeCksfkPA35/XRowSd94SMPUMmDhWr73m+laU997e362lq498Z+sN6Pw4My25kvfszwvA51aL/bE8lfTlg2u6oIJkZwbc/olmCPpGhuEc3I4v7Fx+8ZBU7L+0K5u+1cKqX8V1iGhqj5+Zu3oyBcohKZCuiKWC8TSNUfPlJzHN9ZA4kN3KMUS+ZqMeV1ZAOanYcC7zZ0BTiIjxj4McNZrGuA/f/23AV9iyRMh2zd1gUHCqTy0b+de8cY4G3nJd8MtvIvi8s1baCZP7ZgM2ZZYzHZpkZhXkXfnr/842b23fBOg2yoHLT3TK2D7PobgGxLTP0njkqTLJ4P467j5NLOvWGRLTiNtgLNq7mreN0HvmaC3plYkpyQ9+4mutLlo+dvp8o9Rgof8kwYIi7KG6BIQsh2ho12G52hDzE7LqZ8HDe87YxGSeDjv6xblFjlQCVQAgQwYGIeYdX0iUbSh1czmKg8uvKxR+uyjXtMU6AbQ/MyWvpNuR+UtOd37wZbQkS7/CrDhszJryAD2/l8MTm/tr7zJURp2LX28nmV7/JiuJ8mpn5XDdcWNvSgxlRr45V8GvPzad0fAEh6MWGx5GV493noEbJBtONlvaUAiTakwgPKxhqAnrlt1wV1EdbrR9fzPSPdlZ7+1Z1NNirajeCZTBtJLtWdCZo7XtF7PJMyhIIXqpW27ijjXmzlML90GWcQCXWkxYEd+PA7uCq8qwJD5e0Zj+e1/AQAA//+JacfI" } diff --git a/x-pack/filebeat/module/okta/system/_meta/fields.yml b/x-pack/filebeat/module/okta/system/_meta/fields.yml index 794d1cfa7702..82a75685bf26 100644 --- a/x-pack/filebeat/module/okta/system/_meta/fields.yml +++ b/x-pack/filebeat/module/okta/system/_meta/fields.yml @@ -213,6 +213,72 @@ description: > The URL. + - name: suspicious_activity + description: > + The suspicious activity fields from the debug data. + type: group + fields: + + - name: browser + type: keyword + description: > + The browser used. + + - name: event_city + type: keyword + description: > + The city where the suspicious activity took place. + + - name: event_country + type: keyword + description: > + The country where the suspicious activity took place. + + - name: event_id + type: keyword + description: > + The event ID. + + - name: event_ip + type: ip + description: > + The IP of the suspicious event. + + - name: event_latitude + type: float + description: > + The latitude where the suspicious activity took place. + + - name: event_longitude + type: float + description: > + The longitude where the suspicious activity took place. + + - name: event_state + type: keyword + description: > + The state where the suspicious activity took place. + + - name: event_transaction_id + type: keyword + description: > + The event transaction ID. + + - name: event_type + type: keyword + description: > + The event type. + + - name: os + type: keyword + description: > + The OS of the system from where the suspicious activity occured. + + - name: timestamp + type: date + description: > + The timestamp of when the activity occurred. + - name: authentication_context title: Authentication Context short: Fields that let you store information about authentication context. diff --git a/x-pack/filebeat/module/okta/system/ingest/pipeline.yml b/x-pack/filebeat/module/okta/system/ingest/pipeline.yml index dc576e9c70c7..b5e4a688f338 100644 --- a/x-pack/filebeat/module/okta/system/ingest/pipeline.yml +++ b/x-pack/filebeat/module/okta/system/ingest/pipeline.yml @@ -4,23 +4,6 @@ processors: - set: field: event.ingested value: "{{_ingest.timestamp}}" - - script: - description: Drops null/empty values recursively - lang: painless - source: | - boolean drop(Object o) { - if (o == null || o == "") { - return true; - } else if (o instanceof Map) { - ((Map) o).values().removeIf(v -> drop(v)); - return (((Map) o).size() == 0); - } else if (o instanceof List) { - ((List) o).removeIf(v -> drop(v)); - return (((List) o).length == 0); - } - return false; - } - drop(ctx); - remove: field: message ignore_missing: true @@ -265,6 +248,72 @@ processors: target_field: okta.debug_context.debug_data.url ignore_missing: true ignore_failure: true + - uri_parts: + field: okta.debug_context.debug_data.url + ignore_failure: true + if: ctx?.okta?.debug_context?.debug_data?.url != null + - rename: + field: json.debugContext.debugData.suspiciousActivityBrowser + target_field: okta.debug_context.debug_data.suspicious_activity.browser + ignore_missing: true + ignore_failure: true + - rename: + ignore_failure: true + field: json.debugContext.debugData.suspiciousActivityEventCity + target_field: okta.debug_context.debug_data.suspicious_activity.event_city + ignore_missing: true + - rename: + field: json.debugContext.debugData.suspiciousActivityEventCountry + target_field: okta.debug_context.debug_data.suspicious_activity.event_country + ignore_missing: true + ignore_failure: true + - rename: + field: json.debugContext.debugData.suspiciousActivityEventId + target_field: okta.debug_context.debug_data.suspicious_activity.event_id + ignore_missing: true + ignore_failure: true + - rename: + field: json.debugContext.debugData.suspiciousActivityEventIp + target_field: okta.debug_context.debug_data.suspicious_activity.event_ip + ignore_missing: true + ignore_failure: true + - rename: + field: json.debugContext.debugData.suspiciousActivityEventLatitude + target_field: okta.debug_context.debug_data.suspicious_activity.event_latitude + ignore_missing: true + ignore_failure: true + - rename: + field: json.debugContext.debugData.suspiciousActivityEventLongitude + target_field: okta.debug_context.debug_data.suspicious_activity.event_longitude + ignore_missing: true + ignore_failure: true + - rename: + field: json.debugContext.debugData.suspiciousActivityEventState + target_field: okta.debug_context.debug_data.suspicious_activity.event_state + ignore_missing: true + ignore_failure: true + - rename: + field: json.debugContext.debugData.suspiciousActivityEventTransactionId + target_field: okta.debug_context.debug_data.suspicious_activity.event_transaction_id + ignore_missing: true + ignore_failure: true + - rename: + field: json.debugContext.debugData.suspiciousActivityEventType + target_field: okta.debug_context.debug_data.suspicious_activity.event_type + ignore_missing: true + ignore_failure: true + - rename: + field: json.debugContext.debugData.suspiciousActivityOs + target_field: okta.debug_context.debug_data.suspicious_activity.os + ignore_missing: true + ignore_failure: true + - date: + field: json.debugContext.debugData.suspiciousActivityTimestamp + target_field: okta.debug_context.debug_data.suspicious_activity.timestamp + ignore_failure: true + formats: + - ISO8601 + if: ctx?.json?.debugContext?.debugData?.suspiciousActivityTimestamp != null - rename: field: json.authenticationContext.authenticationProvider target_field: okta.authentication_context.authentication_provider @@ -452,6 +501,7 @@ processors: field: - okta_target_user - okta_target_group + - json ignore_missing: true - set: field: client.user.id @@ -498,9 +548,6 @@ processors: value: "{{destination.ip}}" allow_duplicates: false if: ctx?.destination?.ip != null - - remove: - field: json - ignore_missing: true - user_agent: field: user_agent.original ignore_missing: true @@ -544,6 +591,23 @@ processors: field: destination.as.organization_name target_field: destination.as.organization.name ignore_missing: true + - script: + description: Drops null/empty values recursively + lang: painless + source: | + boolean drop(Object o) { + if (o == null || o == "") { + return true; + } else if (o instanceof Map) { + ((Map) o).values().removeIf(v -> drop(v)); + return (((Map) o).size() == 0); + } else if (o instanceof List) { + ((List) o).removeIf(v -> drop(v)); + return (((List) o).length == 0); + } + return false; + } + drop(ctx); on_failure: - set: diff --git a/x-pack/filebeat/module/okta/system/test/okta-system-test.json.log b/x-pack/filebeat/module/okta/system/test/okta-system-test.json.log index a2644a7d3bec..b21d1eca1e16 100644 --- a/x-pack/filebeat/module/okta/system/test/okta-system-test.json.log +++ b/x-pack/filebeat/module/okta/system/test/okta-system-test.json.log @@ -1,3 +1,5 @@ {"actor":{"alternateId":"xxxxxx@elastic.co","detailEntry":null,"displayName":"xxxxxx","id":"00u1abvz4pYqdM8ms4x6","type":"User"},"authenticationContext":{"authenticationProvider":null,"authenticationStep":0,"credentialProvider":null,"credentialType":null,"externalSessionId":"102nZHzd6OHSfGG51vsoc22gw","interface":null,"issuer":null},"client":{"device":"Computer","geographicalContext":{"city":"Dublin","country":"United States","geolocation":{"lat":37.7201,"lon":-121.919},"postalCode":"94568","state":"California"},"id":null,"ipAddress":"108.255.197.247","userAgent":{"browser":"FIREFOX","os":"Mac OS X","rawUserAgent":"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:72.0) Gecko/20100101 Firefox/72.0"},"zone":"null"},"debugContext":{"debugData":{"authnRequestId":"XkcAsWb8WjwDP76xh@1v8wAABp0","requestId":"XkccyyMli2Uay2I93ZgRzQAAB0c","requestUri":"/login/signout","threatSuspected":"false","url":"/login/signout?message=login_page_messages.session_has_expired"}},"displayMessage":"User logout from Okta","eventType":"user.session.end","legacyEventType":"core.user_auth.logout_success","outcome":{"reason":null,"result":"SUCCESS"},"published":"2020-02-14T22:18:51.843Z","request":{"ipChain":[{"geographicalContext":{"city":"Dublin","country":"United States","geolocation":{"lat":37.7201,"lon":-121.919},"postalCode":"94568","state":"California"},"ip":"108.255.197.247","source":null,"version":"V4"}]},"securityContext":{"asNumber":null,"asOrg":null,"domain":null,"isProxy":null,"isp":null},"severity":"INFO","target":null,"transaction":{"detail":{},"id":"XkccyyMli2Uay2I93ZgRzQAAB0c","type":"WEB"},"uuid":"faf7398a-4f77-11ea-97fb-5925e98228bd","version":"0"} {"actor":{"alternateId":"xxxxxx@elastic.co","detailEntry":null,"displayName":"xxxxxx","id":"00u1abvz4pYqdM8ms4x6","type":"User"},"authenticationContext":{"authenticationProvider":null,"authenticationStep":0,"credentialProvider":null,"credentialType":null,"externalSessionId":"102bZDNFfWaQSyEZQuDgWt-uQ","interface":null,"issuer":null},"client":{"device":"Computer","geographicalContext":{"city":"Dublin","country":"United States","geolocation":{"lat":37.7201,"lon":-121.919},"postalCode":"94568","state":"California"},"id":null,"ipAddress":"108.255.197.247","userAgent":{"browser":"FIREFOX","os":"Mac OS X","rawUserAgent":"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:72.0) Gecko/20100101 Firefox/72.0"},"zone":"null"},"debugContext":{"debugData":{"deviceFingerprint":"541daf91d15bef64a7e08c946fd9a9d0","requestId":"XkcAsWb8WjwDP76xh@1v8wAABp0","requestUri":"/api/v1/authn","threatSuspected":"false","url":"/api/v1/authn?"}},"displayMessage":"User login to Okta","eventType":"user.session.start","legacyEventType":"core.user_auth.login_success","outcome":{"reason":null,"result":"SUCCESS"},"published":"2020-02-14T20:18:57.718Z","request":{"ipChain":[{"geographicalContext":{"city":"Dublin","country":"United States","geolocation":{"lat":37.7201,"lon":-121.919},"postalCode":"94568","state":"California"},"ip":"108.255.197.247","source":null,"version":"V4"}]},"securityContext":{"asNumber":null,"asOrg":null,"domain":null,"isProxy":null,"isp":null},"severity":"INFO","target":null,"transaction":{"detail":{},"id":"XkcAsWb8WjwDP76xh@1v8wAABp0","type":"WEB"},"uuid":"3aeede38-4f67-11ea-abd3-1f5d113f2546","version":"0"} {"actor":{"alternateId":"xxxxxx@elastic.co","detailEntry":null,"displayName":"xxxxxx","id":"00u1abvz4pYqdM8ms4x6","type":"User"},"authenticationContext":{"authenticationProvider":null,"authenticationStep":0,"credentialProvider":null,"credentialType":null,"externalSessionId":"102bZDNFfWaQSyEZQuDgWt-uQ","interface":null,"issuer":null},"client":{"device":"Computer","geographicalContext":{"city":"Dublin","country":"United States","geolocation":{"lat":37.7201,"lon":-121.919},"postalCode":"94568","state":"California"},"id":null,"ipAddress":"108.255.197.247","userAgent":{"browser":"FIREFOX","os":"Mac OS X","rawUserAgent":"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:72.0) Gecko/20100101 Firefox/72.0"},"zone":"null"},"debugContext":{"debugData":{"deviceFingerprint":"541daf91d15bef64a7e08c946fd9a9d0","requestId":"XkcAsWb8WjwDP76xh@1v8wAABp0","requestUri":"/api/v1/authn","threatSuspected":"false","url":"/api/v1/authn?"}},"displayMessage":"Evaluation of sign-on policy","eventType":"policy.evaluate_sign_on","legacyEventType":null,"outcome":{"reason":"Sign-on policy evaluation resulted in ALLOW","result":"ALLOW"},"published":"2020-02-14T20:18:57.762Z","request":{"ipChain":[{"geographicalContext":{"city":"Dublin","country":"United States","geolocation":{"lat":37.7201,"lon":-121.919},"postalCode":"94568","state":"California"},"ip":"108.255.197.247","source":null,"version":"V4"}]},"securityContext":{"asNumber":null,"asOrg":null,"domain":null,"isProxy":null,"isp":null},"severity":"INFO","target":[{"alternateId":"unknown","detailEntry":{"policyType":"OktaSignOn"},"displayName":"Default Policy","id":"00p1abvweGGDW10Ur4x6","type":"PolicyEntity"},{"alternateId":"00p1abvweGGDW10Ur4x6","detailEntry":null,"displayName":"Default Rule","id":"0pr1abvwfqGFI4n064x6","type":"PolicyRule"}],"transaction":{"detail":{},"id":"XkcAsWb8WjwDP76xh@1v8wAABp0","type":"WEB"},"uuid":"3af594f9-4f67-11ea-abd3-1f5d113f2546","version":"0"} +{"actor":{"alternateId":"xxxxxx@elastic.co","detailEntry":null,"displayName":"xxxxxx","id":"00u1abvz4pYqdM8ms4x6","type":"User"},"authenticationContext":{"authenticationProvider":null,"authenticationStep":0,"credentialProvider":null,"credentialType":null,"externalSessionId":"102bZDNFfWaQSyEZQuDgWt-uQ","interface":null,"issuer":null},"client":{"device":"Computer","geographicalContext":{"city":"Dublin","country":"United States","geolocation":{"lat":37.7201,"lon":-121.919},"postalCode":"94568","state":"California"},"id":null,"ipAddress":"108.255.197.247","userAgent":{"browser":"FIREFOX","os":"Mac OS X","rawUserAgent":"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:72.0) Gecko/20100101 Firefox/72.0"},"zone":"null"},"debugContext":{"debugData":{"deviceFingerprint":"541daf91d15bef64a7e08c946fd9a9d0","requestId":"","requestUri":"","threatSuspected":"false","url":"","suspiciousActivityBrowser":"browser","suspiciousActivityEventCity":"New York City","suspiciousActivityEventCountry":"United Sates","suspiciousActivityEventId":"1234567","suspiciousActivityEventIp":"10.50.14.5","suspiciousActivityEventLatitude":"40.744960","suspiciousActivityEventLongitude":"-73.988590","suspiciousActivityEventState":"New York","suspiciousActivityEventTransactionId":"12345678900","suspiciousActivityEventType":"system.email.new_device_notification.sent_message","suspiciousActivityOs":"Windows 10","suspiciousActivityTimestamp":"2021-05-08T21:50:16.594Z"}},"displayMessage":"Evaluation of sign-on policy","eventType":"policy.evaluate_sign_on","legacyEventType":null,"outcome":{"reason":"Sign-on policy evaluation resulted in ALLOW","result":"ALLOW"},"published":"2020-02-14T20:18:57.762Z","request":{"ipChain":[{"geographicalContext":{"city":"Dublin","country":"United States","geolocation":{"lat":37.7201,"lon":-121.919},"postalCode":"94568","state":"California"},"ip":"108.255.197.247","source":null,"version":"V4"}]},"securityContext":{"asNumber":null,"asOrg":null,"domain":null,"isProxy":null,"isp":null},"severity":"INFO","target":[{"alternateId":"unknown","detailEntry":{"policyType":"OktaSignOn"},"displayName":"Default Policy","id":"00p1abvweGGDW10Ur4x6","type":"PolicyEntity"},{"alternateId":"00p1abvweGGDW10Ur4x6","detailEntry":null,"displayName":"Default Rule","id":"0pr1abvwfqGFI4n064x6","type":"PolicyRule"}],"transaction":{"detail":{},"id":"XkcAsWb8WjwDP76xh@1v8wAABp0","type":"WEB"},"uuid":"36a3b6b3-fcc0-47a0-96bd-95330cfdb658","version":"0"} +{"actor":{"alternateId":"xxxxxx@elastic.co","detailEntry":null,"displayName":"xxxxxx","id":"00u1abvz4pYqdM8ms4x6","type":"User"},"authenticationContext":{"authenticationProvider":null,"authenticationStep":0,"credentialProvider":null,"credentialType":null,"externalSessionId":"102bZDNFfWaQSyEZQuDgWt-uQ","interface":null,"issuer":null},"client":{"device":"Computer","geographicalContext":{"city":"Dublin","country":"United States","geolocation":{"lat":37.7201,"lon":-121.919},"postalCode":"94568","state":"California"},"id":null,"ipAddress":"108.255.197.247","userAgent":{"browser":"FIREFOX","os":"Mac OS X","rawUserAgent":"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:72.0) Gecko/20100101 Firefox/72.0"},"zone":"null"},"debugContext":{"debugData":{"requestId":"","requestUri":"","suspiciousActivityBrowser":"browser","suspiciousActivityEventCity":"New York City","suspiciousActivityEventCountry":"United States","suspiciousActivityEventId":"1234567","suspiciousActivityEventIp":"10.50.14.5","suspiciousActivityEventLatitude":"40.744960","suspiciousActivityEventLongitude":"-73.988590","suspiciousActivityEventState":"New York","suspiciousActivityEventTransactionId":"12345678900","suspiciousActivityEventType":"system.email.new_device_notification.sent_message","suspiciousActivityOs":"Windows 10","suspiciousActivityTimestamp":"2021-05-08T21:50:16.594Z","url":""}},"device":null,"displayMessage":"User report suspicious activity","eventType":"user.account.report_suspicious_activity_by_enduser","legacyEventType":"core.user.account.report_suspicious_activity_by_enduser","outcome":{"reason":null,"result":"SUCCESS"},"published":"2020-02-14T20:18:57.762Z","request":{"ipChain":[{"geographicalContext":{"city":"Dublin","country":"United States","geolocation":{"lat":37.7201,"lon":-121.919},"postalCode":"94568","state":"California"},"ip":"108.255.197.247","source":null,"version":"V4"}]},"securityContext":{"asNumber":7018,"asOrg":"AT&T Services, Inc.","domain":"att.com","isProxy":false,"isp":"AT&T Corp."},"severity":"WARN","target":[{"alternateId":"xxxxxx@elastic.co","detailEntry":null,"displayName":"xxxxxx","id":"00u1abvz4pYqdM8ms4x6","type":"User"}],"transaction":{"detail":{},"id":"XkcAsWb8WjwDP76xh@1v8wAABp0","type":"WEB"},"uuid":"c2adb364-88d1-45a9-a620-2b64e44c5fcf","version":"0"} diff --git a/x-pack/filebeat/module/okta/system/test/okta-system-test.json.log-expected.json b/x-pack/filebeat/module/okta/system/test/okta-system-test.json.log-expected.json index 226b52efa7d6..e882c2b68cf5 100644 --- a/x-pack/filebeat/module/okta/system/test/okta-system-test.json.log-expected.json +++ b/x-pack/filebeat/module/okta/system/test/okta-system-test.json.log-expected.json @@ -72,6 +72,9 @@ "tags": [ "forwarded" ], + "url.original": "/login/signout?message=login_page_messages.session_has_expired", + "url.path": "/login/signout", + "url.query": "message=login_page_messages.session_has_expired", "user.full_name": "xxxxxx", "user_agent.device.name": "Mac", "user_agent.name": "Firefox", @@ -155,6 +158,8 @@ "tags": [ "forwarded" ], + "url.original": "/api/v1/authn?", + "url.path": "/api/v1/authn", "user.full_name": "xxxxxx", "user_agent.device.name": "Mac", "user_agent.name": "Firefox", @@ -251,6 +256,8 @@ "tags": [ "forwarded" ], + "url.original": "/api/v1/authn?", + "url.path": "/api/v1/authn", "user.full_name": "xxxxxx", "user_agent.device.name": "Mac", "user_agent.name": "Firefox", @@ -259,5 +266,215 @@ "user_agent.os.name": "Mac OS X", "user_agent.os.version": "10.15", "user_agent.version": "72.0." + }, + { + "@timestamp": "2020-02-14T20:18:57.762Z", + "client.geo.city_name": "Dublin", + "client.geo.country_name": "United States", + "client.geo.location.lat": 37.7201, + "client.geo.location.lon": -121.919, + "client.geo.region_name": "California", + "client.ip": "108.255.197.247", + "client.user.full_name": "xxxxxx", + "client.user.id": "00u1abvz4pYqdM8ms4x6", + "event.action": "policy.evaluate_sign_on", + "event.category": [ + "authentication" + ], + "event.dataset": "okta.system", + "event.id": "36a3b6b3-fcc0-47a0-96bd-95330cfdb658", + "event.kind": "event", + "event.module": "okta", + "event.original": "{\"actor\":{\"alternateId\":\"xxxxxx@elastic.co\",\"detailEntry\":null,\"displayName\":\"xxxxxx\",\"id\":\"00u1abvz4pYqdM8ms4x6\",\"type\":\"User\"},\"authenticationContext\":{\"authenticationProvider\":null,\"authenticationStep\":0,\"credentialProvider\":null,\"credentialType\":null,\"externalSessionId\":\"102bZDNFfWaQSyEZQuDgWt-uQ\",\"interface\":null,\"issuer\":null},\"client\":{\"device\":\"Computer\",\"geographicalContext\":{\"city\":\"Dublin\",\"country\":\"United States\",\"geolocation\":{\"lat\":37.7201,\"lon\":-121.919},\"postalCode\":\"94568\",\"state\":\"California\"},\"id\":null,\"ipAddress\":\"108.255.197.247\",\"userAgent\":{\"browser\":\"FIREFOX\",\"os\":\"Mac OS X\",\"rawUserAgent\":\"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:72.0) Gecko/20100101 Firefox/72.0\"},\"zone\":\"null\"},\"debugContext\":{\"debugData\":{\"deviceFingerprint\":\"541daf91d15bef64a7e08c946fd9a9d0\",\"requestId\":\"\",\"requestUri\":\"\",\"threatSuspected\":\"false\",\"url\":\"\",\"suspiciousActivityBrowser\":\"browser\",\"suspiciousActivityEventCity\":\"New York City\",\"suspiciousActivityEventCountry\":\"United Sates\",\"suspiciousActivityEventId\":\"1234567\",\"suspiciousActivityEventIp\":\"10.50.14.5\",\"suspiciousActivityEventLatitude\":\"40.744960\",\"suspiciousActivityEventLongitude\":\"-73.988590\",\"suspiciousActivityEventState\":\"New York\",\"suspiciousActivityEventTransactionId\":\"12345678900\",\"suspiciousActivityEventType\":\"system.email.new_device_notification.sent_message\",\"suspiciousActivityOs\":\"Windows 10\",\"suspiciousActivityTimestamp\":\"2021-05-08T21:50:16.594Z\"}},\"displayMessage\":\"Evaluation of sign-on policy\",\"eventType\":\"policy.evaluate_sign_on\",\"legacyEventType\":null,\"outcome\":{\"reason\":\"Sign-on policy evaluation resulted in ALLOW\",\"result\":\"ALLOW\"},\"published\":\"2020-02-14T20:18:57.762Z\",\"request\":{\"ipChain\":[{\"geographicalContext\":{\"city\":\"Dublin\",\"country\":\"United States\",\"geolocation\":{\"lat\":37.7201,\"lon\":-121.919},\"postalCode\":\"94568\",\"state\":\"California\"},\"ip\":\"108.255.197.247\",\"source\":null,\"version\":\"V4\"}]},\"securityContext\":{\"asNumber\":null,\"asOrg\":null,\"domain\":null,\"isProxy\":null,\"isp\":null},\"severity\":\"INFO\",\"target\":[{\"alternateId\":\"unknown\",\"detailEntry\":{\"policyType\":\"OktaSignOn\"},\"displayName\":\"Default Policy\",\"id\":\"00p1abvweGGDW10Ur4x6\",\"type\":\"PolicyEntity\"},{\"alternateId\":\"00p1abvweGGDW10Ur4x6\",\"detailEntry\":null,\"displayName\":\"Default Rule\",\"id\":\"0pr1abvwfqGFI4n064x6\",\"type\":\"PolicyRule\"}],\"transaction\":{\"detail\":{},\"id\":\"XkcAsWb8WjwDP76xh@1v8wAABp0\",\"type\":\"WEB\"},\"uuid\":\"36a3b6b3-fcc0-47a0-96bd-95330cfdb658\",\"version\":\"0\"}", + "event.outcome": "success", + "event.type": [ + "info" + ], + "fileset.name": "system", + "input.type": "log", + "log.offset": 5218, + "okta.actor.alternate_id": "xxxxxx@elastic.co", + "okta.actor.display_name": "xxxxxx", + "okta.actor.id": "00u1abvz4pYqdM8ms4x6", + "okta.actor.type": "User", + "okta.authentication_context.authentication_step": 0, + "okta.authentication_context.external_session_id": "102bZDNFfWaQSyEZQuDgWt-uQ", + "okta.client.device": "Computer", + "okta.client.ip": "108.255.197.247", + "okta.client.user_agent.browser": "FIREFOX", + "okta.client.user_agent.os": "Mac OS X", + "okta.client.user_agent.raw_user_agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:72.0) Gecko/20100101 Firefox/72.0", + "okta.client.zone": "null", + "okta.debug_context.debug_data.device_fingerprint": "541daf91d15bef64a7e08c946fd9a9d0", + "okta.debug_context.debug_data.request_id": "", + "okta.debug_context.debug_data.request_uri": "", + "okta.debug_context.debug_data.suspicious_activity.browser": "browser", + "okta.debug_context.debug_data.suspicious_activity.event_city": "New York City", + "okta.debug_context.debug_data.suspicious_activity.event_country": "United Sates", + "okta.debug_context.debug_data.suspicious_activity.event_id": "1234567", + "okta.debug_context.debug_data.suspicious_activity.event_ip": "10.50.14.5", + "okta.debug_context.debug_data.suspicious_activity.event_latitude": "40.744960", + "okta.debug_context.debug_data.suspicious_activity.event_longitude": "-73.988590", + "okta.debug_context.debug_data.suspicious_activity.event_state": "New York", + "okta.debug_context.debug_data.suspicious_activity.event_transaction_id": "12345678900", + "okta.debug_context.debug_data.suspicious_activity.event_type": "system.email.new_device_notification.sent_message", + "okta.debug_context.debug_data.suspicious_activity.os": "Windows 10", + "okta.debug_context.debug_data.suspicious_activity.timestamp": "2021-05-08T21:50:16.594Z", + "okta.debug_context.debug_data.threat_suspected": "false", + "okta.debug_context.debug_data.url": "", + "okta.display_message": "Evaluation of sign-on policy", + "okta.event_type": "policy.evaluate_sign_on", + "okta.outcome.reason": "Sign-on policy evaluation resulted in ALLOW", + "okta.outcome.result": "ALLOW", + "okta.target": [ + { + "alternate_id": "unknown", + "display_name": "Default Policy", + "id": "00p1abvweGGDW10Ur4x6", + "type": "PolicyEntity" + }, + { + "alternate_id": "00p1abvweGGDW10Ur4x6", + "display_name": "Default Rule", + "id": "0pr1abvwfqGFI4n064x6", + "type": "PolicyRule" + } + ], + "okta.transaction.id": "XkcAsWb8WjwDP76xh@1v8wAABp0", + "okta.transaction.type": "WEB", + "okta.uuid": "36a3b6b3-fcc0-47a0-96bd-95330cfdb658", + "related.ip": [ + "108.255.197.247" + ], + "related.user": [ + "xxxxxx" + ], + "service.type": "okta", + "source.as.number": 7018, + "source.as.organization.name": "AT&T Services, Inc.", + "source.geo.city_name": "Dublin", + "source.geo.continent_name": "North America", + "source.geo.country_iso_code": "US", + "source.geo.country_name": "United States", + "source.geo.location.lat": 37.7201, + "source.geo.location.lon": -121.919, + "source.geo.region_iso_code": "US-CA", + "source.geo.region_name": "California", + "source.ip": "108.255.197.247", + "source.user.full_name": "xxxxxx", + "source.user.id": "00u1abvz4pYqdM8ms4x6", + "tags": [ + "forwarded" + ], + "user.full_name": "xxxxxx", + "user_agent.device.name": "Mac", + "user_agent.name": "Firefox", + "user_agent.original": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:72.0) Gecko/20100101 Firefox/72.0", + "user_agent.os.full": "Mac OS X 10.15", + "user_agent.os.name": "Mac OS X", + "user_agent.os.version": "10.15", + "user_agent.version": "72.0." + }, + { + "@timestamp": "2020-02-14T20:18:57.762Z", + "client.as.number": "7018", + "client.as.organization.name": "AT&T Services, Inc.", + "client.domain": "att.com", + "client.geo.city_name": "Dublin", + "client.geo.country_name": "United States", + "client.geo.location.lat": 37.7201, + "client.geo.location.lon": -121.919, + "client.geo.region_name": "California", + "client.ip": "108.255.197.247", + "client.user.full_name": "xxxxxx", + "client.user.id": "00u1abvz4pYqdM8ms4x6", + "event.action": "user.account.report_suspicious_activity_by_enduser", + "event.dataset": "okta.system", + "event.id": "c2adb364-88d1-45a9-a620-2b64e44c5fcf", + "event.kind": "event", + "event.module": "okta", + "event.original": "{\"actor\":{\"alternateId\":\"xxxxxx@elastic.co\",\"detailEntry\":null,\"displayName\":\"xxxxxx\",\"id\":\"00u1abvz4pYqdM8ms4x6\",\"type\":\"User\"},\"authenticationContext\":{\"authenticationProvider\":null,\"authenticationStep\":0,\"credentialProvider\":null,\"credentialType\":null,\"externalSessionId\":\"102bZDNFfWaQSyEZQuDgWt-uQ\",\"interface\":null,\"issuer\":null},\"client\":{\"device\":\"Computer\",\"geographicalContext\":{\"city\":\"Dublin\",\"country\":\"United States\",\"geolocation\":{\"lat\":37.7201,\"lon\":-121.919},\"postalCode\":\"94568\",\"state\":\"California\"},\"id\":null,\"ipAddress\":\"108.255.197.247\",\"userAgent\":{\"browser\":\"FIREFOX\",\"os\":\"Mac OS X\",\"rawUserAgent\":\"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:72.0) Gecko/20100101 Firefox/72.0\"},\"zone\":\"null\"},\"debugContext\":{\"debugData\":{\"requestId\":\"\",\"requestUri\":\"\",\"suspiciousActivityBrowser\":\"browser\",\"suspiciousActivityEventCity\":\"New York City\",\"suspiciousActivityEventCountry\":\"United States\",\"suspiciousActivityEventId\":\"1234567\",\"suspiciousActivityEventIp\":\"10.50.14.5\",\"suspiciousActivityEventLatitude\":\"40.744960\",\"suspiciousActivityEventLongitude\":\"-73.988590\",\"suspiciousActivityEventState\":\"New York\",\"suspiciousActivityEventTransactionId\":\"12345678900\",\"suspiciousActivityEventType\":\"system.email.new_device_notification.sent_message\",\"suspiciousActivityOs\":\"Windows 10\",\"suspiciousActivityTimestamp\":\"2021-05-08T21:50:16.594Z\",\"url\":\"\"}},\"device\":null,\"displayMessage\":\"User report suspicious activity\",\"eventType\":\"user.account.report_suspicious_activity_by_enduser\",\"legacyEventType\":\"core.user.account.report_suspicious_activity_by_enduser\",\"outcome\":{\"reason\":null,\"result\":\"SUCCESS\"},\"published\":\"2020-02-14T20:18:57.762Z\",\"request\":{\"ipChain\":[{\"geographicalContext\":{\"city\":\"Dublin\",\"country\":\"United States\",\"geolocation\":{\"lat\":37.7201,\"lon\":-121.919},\"postalCode\":\"94568\",\"state\":\"California\"},\"ip\":\"108.255.197.247\",\"source\":null,\"version\":\"V4\"}]},\"securityContext\":{\"asNumber\":7018,\"asOrg\":\"AT&T Services, Inc.\",\"domain\":\"att.com\",\"isProxy\":false,\"isp\":\"AT&T Corp.\"},\"severity\":\"WARN\",\"target\":[{\"alternateId\":\"xxxxxx@elastic.co\",\"detailEntry\":null,\"displayName\":\"xxxxxx\",\"id\":\"00u1abvz4pYqdM8ms4x6\",\"type\":\"User\"}],\"transaction\":{\"detail\":{},\"id\":\"XkcAsWb8WjwDP76xh@1v8wAABp0\",\"type\":\"WEB\"},\"uuid\":\"c2adb364-88d1-45a9-a620-2b64e44c5fcf\",\"version\":\"0\"}", + "event.outcome": "success", + "fileset.name": "system", + "input.type": "log", + "log.offset": 7707, + "okta.actor.alternate_id": "xxxxxx@elastic.co", + "okta.actor.display_name": "xxxxxx", + "okta.actor.id": "00u1abvz4pYqdM8ms4x6", + "okta.actor.type": "User", + "okta.authentication_context.authentication_step": 0, + "okta.authentication_context.external_session_id": "102bZDNFfWaQSyEZQuDgWt-uQ", + "okta.client.device": "Computer", + "okta.client.ip": "108.255.197.247", + "okta.client.user_agent.browser": "FIREFOX", + "okta.client.user_agent.os": "Mac OS X", + "okta.client.user_agent.raw_user_agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:72.0) Gecko/20100101 Firefox/72.0", + "okta.client.zone": "null", + "okta.debug_context.debug_data.request_id": "", + "okta.debug_context.debug_data.request_uri": "", + "okta.debug_context.debug_data.suspicious_activity.browser": "browser", + "okta.debug_context.debug_data.suspicious_activity.event_city": "New York City", + "okta.debug_context.debug_data.suspicious_activity.event_country": "United States", + "okta.debug_context.debug_data.suspicious_activity.event_id": "1234567", + "okta.debug_context.debug_data.suspicious_activity.event_ip": "10.50.14.5", + "okta.debug_context.debug_data.suspicious_activity.event_latitude": "40.744960", + "okta.debug_context.debug_data.suspicious_activity.event_longitude": "-73.988590", + "okta.debug_context.debug_data.suspicious_activity.event_state": "New York", + "okta.debug_context.debug_data.suspicious_activity.event_transaction_id": "12345678900", + "okta.debug_context.debug_data.suspicious_activity.event_type": "system.email.new_device_notification.sent_message", + "okta.debug_context.debug_data.suspicious_activity.os": "Windows 10", + "okta.debug_context.debug_data.suspicious_activity.timestamp": "2021-05-08T21:50:16.594Z", + "okta.debug_context.debug_data.url": "", + "okta.display_message": "User report suspicious activity", + "okta.event_type": "user.account.report_suspicious_activity_by_enduser", + "okta.outcome.result": "SUCCESS", + "okta.security_context.as.number": 7018, + "okta.security_context.as.organization.name": "AT&T Services, Inc.", + "okta.security_context.domain": "att.com", + "okta.security_context.is_proxy": false, + "okta.security_context.isp": "AT&T Corp.", + "okta.target": [ + { + "alternate_id": "xxxxxx@elastic.co", + "type": "User" + } + ], + "okta.transaction.id": "XkcAsWb8WjwDP76xh@1v8wAABp0", + "okta.transaction.type": "WEB", + "okta.uuid": "c2adb364-88d1-45a9-a620-2b64e44c5fcf", + "related.ip": [ + "108.255.197.247" + ], + "related.user": [ + "xxxxxx" + ], + "service.type": "okta", + "source.as.number": 7018, + "source.as.organization.name": "AT&T Services, Inc.", + "source.domain": "att.com", + "source.geo.city_name": "Dublin", + "source.geo.continent_name": "North America", + "source.geo.country_iso_code": "US", + "source.geo.country_name": "United States", + "source.geo.location.lat": 37.7201, + "source.geo.location.lon": -121.919, + "source.geo.region_iso_code": "US-CA", + "source.geo.region_name": "California", + "source.ip": "108.255.197.247", + "source.user.full_name": "xxxxxx", + "source.user.id": "00u1abvz4pYqdM8ms4x6", + "tags": [ + "forwarded" + ], + "user.full_name": "xxxxxx", + "user.target.full_name": "xxxxxx", + "user.target.id": "00u1abvz4pYqdM8ms4x6", + "user_agent.device.name": "Mac", + "user_agent.name": "Firefox", + "user_agent.original": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:72.0) Gecko/20100101 Firefox/72.0", + "user_agent.os.full": "Mac OS X 10.15", + "user_agent.os.name": "Mac OS X", + "user_agent.os.version": "10.15", + "user_agent.version": "72.0." } ] \ No newline at end of file