From 91dd1740a6c8081c0f32e1771b35ddda9696fe99 Mon Sep 17 00:00:00 2001 From: Tudor Golubenco Date: Fri, 2 Jun 2017 14:57:19 +0200 Subject: [PATCH] Nginx module: use first not private IP address as remote_ip (#4417) A common customization to the nginx logs is to add the contents of the X-Forwarded-For header in front of the remote IPs. This typically results in a list of remote IPs. This adds a new field `remote_ip_list` which is an array, and uses a Painless script to automatically select the first non-private IP for the `remote_ip` field, which is the field on which GeoIP is applied. Fixes #4322. (cherry picked from commit a2c162f35846c5005c71d15359df9f8a9879a2f7) --- filebeat/docs/fields.asciidoc | 42 ++++++++++++++++++- .../module/apache2/access/_meta/fields.yml | 8 ++++ filebeat/module/nginx/access/_meta/fields.yml | 17 +++++++- .../module/nginx/access/ingest/default.json | 17 +++++++- 4 files changed, 80 insertions(+), 4 deletions(-) diff --git a/filebeat/docs/fields.asciidoc b/filebeat/docs/fields.asciidoc index 461655b7e42..1836f06d39e 100644 --- a/filebeat/docs/fields.asciidoc +++ b/filebeat/docs/fields.asciidoc @@ -231,6 +231,22 @@ type: geo_point The longitude and latitude. +[float] +=== apache2.access.geoip.region_name + +type: keyword + +The region name. + + +[float] +=== apache2.access.geoip.city_name + +type: keyword + +The city name. + + [float] == error Fields @@ -765,12 +781,20 @@ Contains fields for the Nginx access logs. +[float] +=== nginx.access.remote_ip_list + +type: array + +An array of remote IP addresses. It is a list because it is common to include, besides the client IP address, IP addresses from headers like `X-Forwarded-For`. See also the `remote_ip` field. + + [float] === nginx.access.remote_ip type: keyword -Client IP address. +Client IP address. The first public IP address from the `remote_ip_list` array. If no public IP addresses are present, this field contains the first private IP address from the `remote_ip_list` array. [float] @@ -953,6 +977,22 @@ type: geo_point The longitude and latitude. +[float] +=== nginx.access.geoip.region_name + +type: keyword + +The region name. + + +[float] +=== nginx.access.geoip.city_name + +type: keyword + +The city name. + + [float] == error Fields diff --git a/filebeat/module/apache2/access/_meta/fields.yml b/filebeat/module/apache2/access/_meta/fields.yml index 97fabdc5cab..be09717198c 100644 --- a/filebeat/module/apache2/access/_meta/fields.yml +++ b/filebeat/module/apache2/access/_meta/fields.yml @@ -104,4 +104,12 @@ type: geo_point description: > The longitude and latitude. + - name: region_name + type: keyword + description: > + The region name. + - name: city_name + type: keyword + description: > + The city name. diff --git a/filebeat/module/nginx/access/_meta/fields.yml b/filebeat/module/nginx/access/_meta/fields.yml index 0b5f1eb275e..38e89be9ddb 100644 --- a/filebeat/module/nginx/access/_meta/fields.yml +++ b/filebeat/module/nginx/access/_meta/fields.yml @@ -3,10 +3,17 @@ description: > Contains fields for the Nginx access logs. fields: + - name: remote_ip_list + type: array + description: > + An array of remote IP addresses. It is a list because it is common to include, besides the client + IP address, IP addresses from headers like `X-Forwarded-For`. See also the `remote_ip` field. - name: remote_ip type: keyword description: > - Client IP address. + Client IP address. The first public IP address from the `remote_ip_list` array. If no public IP + addresses are present, this field contains the first private IP address from the `remote_ip_list` + array. - name: user_name type: keyword description: > @@ -104,4 +111,12 @@ type: geo_point description: > The longitude and latitude. + - name: region_name + type: keyword + description: > + The region name. + - name: city_name + type: keyword + description: > + The city name. diff --git a/filebeat/module/nginx/access/ingest/default.json b/filebeat/module/nginx/access/ingest/default.json index cf0441d5126..c1ddbda2cdb 100644 --- a/filebeat/module/nginx/access/ingest/default.json +++ b/filebeat/module/nginx/access/ingest/default.json @@ -4,11 +4,24 @@ "grok": { "field": "message", "patterns":[ - "%{IPORHOST:nginx.access.remote_ip} - %{DATA:nginx.access.user_name} \\[%{HTTPDATE:nginx.access.time}\\] \"%{WORD:nginx.access.method} %{DATA:nginx.access.url} HTTP/%{NUMBER:nginx.access.http_version}\" %{NUMBER:nginx.access.response_code} %{NUMBER:nginx.access.body_sent.bytes} \"%{DATA:nginx.access.referrer}\" \"%{DATA:nginx.access.agent}\"" + "\"?%{IP_LIST:nginx.access.remote_ip_list} - %{DATA:nginx.access.user_name} \\[%{HTTPDATE:nginx.access.time}\\] \"%{WORD:nginx.access.method} %{DATA:nginx.access.url} HTTP/%{NUMBER:nginx.access.http_version}\" %{NUMBER:nginx.access.response_code} %{NUMBER:nginx.access.body_sent.bytes} \"%{DATA:nginx.access.referrer}\" \"%{DATA:nginx.access.agent}\"" ], + "pattern_definitions": { + "IP_LIST": "%{IP}(\"?,?\\s*%{IP})*" + }, "ignore_missing": true } - },{ + }, { + "split": { + "field": "nginx.access.remote_ip_list", + "separator": "\"?,?\\s+" + } + }, { + "script": { + "lang": "painless", + "inline": "boolean isPrivate(def ip) { try { StringTokenizer tok = new StringTokenizer(ip, '.'); int firstByte = Integer.parseInt(tok.nextToken()); int secondByte = Integer.parseInt(tok.nextToken()); if (firstByte == 10) { return true; } if (firstByte == 192 && secondByte == 168) { return true; } if (firstByte == 172 && secondByte >= 16 && secondByte <= 31) { return true; } if (firstByte == 127) { return true; } return false; } catch (Exception e) { return false; } } def found = false; for (def item : ctx.nginx.access.remote_ip_list) { if (!isPrivate(item)) { ctx.nginx.access.remote_ip = item; found = true; break; } } if (!found) { ctx.nginx.access.remote_ip = ctx.nginx.access.remote_ip_list[0]; }" + } + }, { "remove":{ "field": "message" }