Skip to content

Commit

Permalink
Add support for http hostname in nginx filebeat module (elastic#14505)
Browse files Browse the repository at this point in the history
It is common to modify the default log format in nginx to include
the http host used by the client to make the logged request.

This is similar to what we already did for Apache in elastic#12778.

It also fixes an issue found on source address parsing when
source addresses are not IPs. For that, the pipeline works
with addresses as strings instead of IPs, and it only tries to
convert to IP to finally store the it in `source.ip` if it parses
as an IP.

Co-authored-by: poma <Semenov.Roman@mail.ru>
  • Loading branch information
jsoriano and poma committed Nov 20, 2019
1 parent df17e26 commit 1967bc9
Show file tree
Hide file tree
Showing 6 changed files with 357 additions and 12 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.next.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,7 @@ https://github.com/elastic/beats/compare/v7.0.0-alpha2...master[Check the HEAD d
- Close chan of Closer first before calling callback {pull}14231[14231]
- Fix race condition in S3 input plugin. {pull}14359[14359]
- Decode hex values in auditd module. {pull}14471[14471]
- Fix parse of remote addresses that are not IPs in nginx logs. {pull}14505[14505]
- Fix handling multiline log entries in nginx module. {issue}14349[14349] {pull}14499[14499]
- Fix parsing of Elasticsearch node name by `elasticsearch/slowlog` fileset. {pull}14547[14547]
- cisco/asa fileset: Fix parsing of 302021 message code. {pull}14519[14519]
Expand Down Expand Up @@ -407,6 +408,7 @@ https://github.com/elastic/beats/compare/v7.0.0-alpha2...master[Check the HEAD d
- Add more filesets to Zeek module. {pull}14150[14150]
- Add `index` option to all inputs to directly set a per-input index value. {pull}14010[14010]
- Remove beta flag for some filebeat modules. {pull}14374[14374]
- Add support for http hostname in nginx filebeat module. {pull}14505[14505]
- Add attack_pattern_kql field to MISP threat indicators. {pull}14470[14470]
- Add fileset to the Zeek module for the intel.log. {pull}14404[14404]
- New fileset googlecloud/firewall for ingesting Google Cloud Firewall logs. {pull}14553[14553]
Expand Down
25 changes: 14 additions & 11 deletions filebeat/module/nginx/access/ingest/default.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,12 @@
"grok": {
"field": "message",
"patterns": [
"\"?(?:%{IP_LIST:nginx.access.remote_ip_list}|%{DATA:source.address}) - %{DATA:user.name} \\[%{HTTPDATE:nginx.access.time}\\] \"%{DATA:nginx.access.info}\" %{NUMBER:http.response.status_code:long} %{NUMBER:http.response.body.bytes:long} \"%{DATA:http.request.referrer}\" \"%{DATA:user_agent.original}\""
"(%{NGINX_HOST} )?\"?(?:%{NGINX_ADDRESS_LIST:nginx.access.remote_ip_list}|%{NOTSPACE:source.address}) - %{DATA:user.name} \\[%{HTTPDATE:nginx.access.time}\\] \"%{DATA:nginx.access.info}\" %{NUMBER:http.response.status_code:long} %{NUMBER:http.response.body.bytes:long} \"%{DATA:http.request.referrer}\" \"%{DATA:user_agent.original}\""
],
"pattern_definitions": {
"IP_LIST": "%{IP}(\"?,?\\s*%{IP})*"
"NGINX_HOST": "(?:%{IP:destination.ip}|%{NGINX_NOTSEPARATOR:destination.domain})(:%{NUMBER:destination.port})?",
"NGINX_NOTSEPARATOR": "[^\t ,:]+",
"NGINX_ADDRESS_LIST": "(?:%{IP}|%{WORD})(\"?,?\\s*(?:%{IP}|%{WORD}))*"
},
"ignore_missing": true
}
Expand Down Expand Up @@ -44,31 +46,32 @@
},
{
"set": {
"field": "source.ip",
"field": "source.address",
"if": "ctx.source?.address == null",
"value": ""
}
},
{
"script": {
"if": "ctx.nginx?.access?.remote_ip_list != null && ctx.nginx.access.remote_ip_list.length > 0",
"lang": "painless",
"source": "boolean isPrivate(def dot, def ip) { try { StringTokenizer tok = new StringTokenizer(ip, dot); int firstByte = Integer.parseInt(tok.nextToken()); int secondByte = Integer.parseInt(tok.nextToken()); if (firstByte == 10) { return true; } if (firstByte == 192 && secondByte == 168) { return true; } if (firstByte == 172 && secondByte >= 16 && secondByte <= 31) { return true; } if (firstByte == 127) { return true; } return false; } catch (Exception e) { return false; } } try { ctx.source.ip = null; if (ctx.nginx.access.remote_ip_list == null) { return; } def found = false; for (def item : ctx.nginx.access.remote_ip_list) { if (!isPrivate(params.dot, item)) { ctx.source.ip = item; found = true; break; } } if (!found) { ctx.source.ip = ctx.nginx.access.remote_ip_list[0]; }} catch (Exception e) { ctx.source.ip = null; }",
"source": "boolean isPrivate(def dot, def ip) { try { StringTokenizer tok = new StringTokenizer(ip, dot); int firstByte = Integer.parseInt(tok.nextToken()); int secondByte = Integer.parseInt(tok.nextToken()); if (firstByte == 10) { return true; } if (firstByte == 192 && secondByte == 168) { return true; } if (firstByte == 172 && secondByte >= 16 && secondByte <= 31) { return true; } if (firstByte == 127) { return true; } return false; } catch (Exception e) { return false; } } try { ctx.source.address = null; if (ctx.nginx.access.remote_ip_list == null) { return; } def found = false; for (def item : ctx.nginx.access.remote_ip_list) { if (!isPrivate(params.dot, item)) { ctx.source.address = item; found = true; break; } } if (!found) { ctx.source.address = ctx.nginx.access.remote_ip_list[0]; }} catch (Exception e) { ctx.source.address = null; }",
"params": {
"dot": "."
}
}
},
{
"remove": {
"field": "source.ip",
"if": "ctx.source.ip == null"
"field": "source.address",
"if": "ctx.source.address == null"
}
},
{
"convert": {
"field": "source.ip",
"target_field": "source.address",
"type": "string",
"ignore_missing": true
"grok": {
"field": "source.address",
"patterns": ["^%{IP:source.ip}$"],
"ignore_failure": true
}
},
{
Expand Down
10 changes: 10 additions & 0 deletions filebeat/module/nginx/access/test/test-with-host.log
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
example.com 10.0.0.2, 10.0.0.1, 127.0.0.1 - - [07/Dec/2016:11:05:07 +0100] "GET /ocelot HTTP/1.1" 200 571 "-" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.12; rv:49.0) Gecko/20100101 Firefox/49.0"
example.com 172.17.0.1 - - [29/May/2017:19:02:48 +0000] "GET /stringpatch HTTP/1.1" 404 612 "-" "Mozilla/5.0 (Windows NT 6.1; rv:15.0) Gecko/20120716 Firefox/15.0a2" "-"
example.com 10.0.0.2, 10.0.0.1, 85.181.35.98 - - [07/Dec/2016:11:05:07 +0100] "GET /ocelot HTTP/1.1" 200 571 "-" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.12; rv:49.0) Gecko/20100101 Firefox/49.0"
example.com:80 85.181.35.98 - - [07/Dec/2016:11:05:07 +0100] "GET /ocelot HTTP/1.1" 200 571 "-" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36"
example.com:80 "10.5.102.222, 199.96.1.1, 204.246.1.1" 10.2.1.185 - - [22/Jan/2016:13:18:29 +0000] "GET /assets/xxxx?q=100 HTTP/1.1" 200 25507 "-" "Amazon CloudFront"
1.2.3.4 2a03:0000:10ff:f00f:0000:0000:0:8000, 10.225.192.17 10.2.2.121 - - [30/Dec/2016:06:47:09 +0000] "GET /test.html HTTP/1.1" 404 8571 "-" "Mozilla/5.0 (compatible; Facebot 1.0; https://developers.facebook.com/docs/sharing/webmasters/crawler)"
1.2.3.4:80 127.0.0.1 - - [12/Apr/2018:09:48:40 +0200] "" 400 0 "-" "-"
example.com:80 unix: - - [26/Feb/2019:15:39:42 +0100] "hello" 400 173 "-" "-"
1.2.3.4 localhost - - [29/May/2017:19:02:48 +0000] "GET /test2 HTTP/1.1" 200 612 "-" "Mozilla/5.0 (Windows NT 6.1; rv:15.0) Gecko/20120716 Firefox/15.0a2" "-"
example.com localhost, localhost - - [29/May/2017:19:02:48 +0000] "GET /test2 HTTP/1.1" 200 612 "-" "Mozilla/5.0 (Windows NT 6.1; rv:15.0) Gecko/20120716 Firefox/15.0a2" "-"
Loading

0 comments on commit 1967bc9

Please sign in to comment.