Skip to content

Commit

Permalink
Add timezone support to logparser timestamps (influxdata#2882)
Browse files Browse the repository at this point in the history
  • Loading branch information
sebito91 authored and jeichorn committed Jul 24, 2017
1 parent cc7c85d commit 84bc2a9
Show file tree
Hide file tree
Showing 6 changed files with 259 additions and 16 deletions.
26 changes: 25 additions & 1 deletion plugins/inputs/logparser/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,10 @@ regex patterns.
## /var/log/*/*.log -> find all .log files with a parent dir in /var/log
## /var/log/apache.log -> only tail the apache log file
files = ["/var/log/apache/access.log"]
## Read file from beginning.

## Read files that currently exist from the beginning. Files that are created
## while telegraf is running (and that match the "files" globs) will always
## be read from the beginning.
from_beginning = false

## Parse logstash-style "grok" patterns:
Expand All @@ -28,13 +31,27 @@ regex patterns.
## %{COMMON_LOG_FORMAT} (plain apache & nginx access logs)
## %{COMBINED_LOG_FORMAT} (access logs + referrer & agent)
patterns = ["%{COMBINED_LOG_FORMAT}"]

## Name of the outputted measurement name.
measurement = "apache_access_log"

## Full path(s) to custom pattern files.
custom_pattern_files = []

## Custom patterns can also be defined here. Put one pattern per line.
custom_patterns = '''
'''

## Timezone allows you to provide an override for timestamps that
## don't already include an offset
## e.g. 04/06/2016 12:41:45 data one two 5.43µs
##
## Default: "" which renders UTC
## Options are as follows:
## 1. Local -- interpret based on machine localtime
## 2. "Canada/Eastern" -- Unix TZ values like those found in https://en.wikipedia.org/wiki/List_of_tz_database_time_zones
## 3. UTC -- or blank/unspecified, will return timestamp in UTC
timezone = "Canada/Eastern"
```

### Grok Parser
Expand Down Expand Up @@ -125,6 +142,13 @@ Wed Apr 12 13:10:34 PST 2017 value=42
'''
```

For cases where the timestamp itself is without offset, the `timezone` config var is available
to denote an offset. By default (with `timezone` either omit, blank or set to `"UTC"`), the times
are processed as if in the UTC timezone. If specified as `timezone = "Local"`, the timestamp
will be processed based on the current machine timezone configuration. Lastly, if using a
timezone from the list of Unix [timezones](https://en.wikipedia.org/wiki/List_of_tz_database_time_zones), the logparser grok will attempt to offset
the timestamp accordingly. See test cases for more detailed examples.

#### TOML Escaping

When saving patterns to the configuration file, keep in mind the different TOML
Expand Down
31 changes: 25 additions & 6 deletions plugins/inputs/logparser/grok/grok.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ var (
patternOnlyRe = regexp.MustCompile(`%{(\w+)}`)
)

// Parser is the primary struct to handle and grok-patterns defined in the config toml
type Parser struct {
Patterns []string
// namedPatterns is a list of internally-assigned names to the patterns
Expand All @@ -70,6 +71,16 @@ type Parser struct {
CustomPatternFiles []string
Measurement string

// Timezone is an optional component to help render log dates to
// your chosen zone.
// Default: "" which renders UTC
// Options are as follows:
// 1. Local -- interpret based on machine localtime
// 2. "America/Chicago" -- Unix TZ values like those found in https://en.wikipedia.org/wiki/List_of_tz_database_time_zones
// 3. UTC -- or blank/unspecified, will return timestamp in UTC
Timezone string
loc *time.Location

// typeMap is a map of patterns -> capture name -> modifier,
// ie, {
// "%{TESTLOG}":
Expand Down Expand Up @@ -105,6 +116,7 @@ type Parser struct {
tsModder *tsModder
}

// Compile is a bound method to Parser which will process the options for our parser
func (p *Parser) Compile() error {
p.typeMap = make(map[string]map[string]string)
p.tsMap = make(map[string]map[string]string)
Expand Down Expand Up @@ -142,9 +154,9 @@ func (p *Parser) Compile() error {

// Parse any custom pattern files supplied.
for _, filename := range p.CustomPatternFiles {
file, err := os.Open(filename)
if err != nil {
return err
file, fileErr := os.Open(filename)
if fileErr != nil {
return fileErr
}

scanner := bufio.NewScanner(bufio.NewReader(file))
Expand All @@ -155,9 +167,16 @@ func (p *Parser) Compile() error {
p.Measurement = "logparser_grok"
}

p.loc, err = time.LoadLocation(p.Timezone)
if err != nil {
log.Printf("W! improper timezone supplied (%s), setting loc to UTC", p.Timezone)
p.loc, _ = time.LoadLocation("UTC")
}

return p.compileCustomPatterns()
}

// ParseLine is the primary function to process individual lines, returning the metrics
func (p *Parser) ParseLine(line string) (telegraf.Metric, error) {
var err error
// values are the parsed fields from the log line
Expand Down Expand Up @@ -251,7 +270,7 @@ func (p *Parser) ParseLine(line string) (telegraf.Metric, error) {
var foundTs bool
// first try timestamp layouts that we've already found
for _, layout := range p.foundTsLayouts {
ts, err := time.Parse(layout, v)
ts, err := time.ParseInLocation(layout, v, p.loc)
if err == nil {
timestamp = ts
foundTs = true
Expand All @@ -262,7 +281,7 @@ func (p *Parser) ParseLine(line string) (telegraf.Metric, error) {
// layouts.
if !foundTs {
for _, layout := range timeLayouts {
ts, err := time.Parse(layout, v)
ts, err := time.ParseInLocation(layout, v, p.loc)
if err == nil {
timestamp = ts
foundTs = true
Expand All @@ -280,7 +299,7 @@ func (p *Parser) ParseLine(line string) (telegraf.Metric, error) {
case DROP:
// goodbye!
default:
ts, err := time.Parse(t, v)
ts, err := time.ParseInLocation(t, v, p.loc)
if err == nil {
timestamp = ts
} else {
Expand Down
186 changes: 183 additions & 3 deletions plugins/inputs/logparser/grok/grok_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ func Benchmark_ParseLine_CommonLogFormat(b *testing.B) {
p := &Parser{
Patterns: []string{"%{COMMON_LOG_FORMAT}"},
}
p.Compile()
_ = p.Compile()

var m telegraf.Metric
for n := 0; n < b.N; n++ {
Expand All @@ -29,7 +29,7 @@ func Benchmark_ParseLine_CombinedLogFormat(b *testing.B) {
p := &Parser{
Patterns: []string{"%{COMBINED_LOG_FORMAT}"},
}
p.Compile()
_ = p.Compile()

var m telegraf.Metric
for n := 0; n < b.N; n++ {
Expand All @@ -48,7 +48,7 @@ func Benchmark_ParseLine_CustomPattern(b *testing.B) {
TEST_LOG_A %{NUMBER:myfloat:float} %{RESPONSE_CODE} %{IPORHOST:clientip} %{RESPONSE_TIME}
`,
}
p.Compile()
_ = p.Compile()

var m telegraf.Metric
for n := 0; n < b.N; n++ {
Expand Down Expand Up @@ -707,3 +707,183 @@ func TestShortPatternRegression(t *testing.T) {
},
metric.Fields())
}

func TestTimezoneEmptyCompileFileAndParse(t *testing.T) {
p := &Parser{
Patterns: []string{"%{TEST_LOG_A}", "%{TEST_LOG_B}"},
CustomPatternFiles: []string{"./testdata/test-patterns"},
Timezone: "",
}
assert.NoError(t, p.Compile())

metricA, err := p.ParseLine(`[04/Jun/2016:12:41:45 +0100] 1.25 200 192.168.1.1 5.432µs 101`)
require.NotNil(t, metricA)
assert.NoError(t, err)
assert.Equal(t,
map[string]interface{}{
"clientip": "192.168.1.1",
"myfloat": float64(1.25),
"response_time": int64(5432),
"myint": int64(101),
},
metricA.Fields())
assert.Equal(t, map[string]string{"response_code": "200"}, metricA.Tags())
assert.Equal(t, int64(1465040505000000000), metricA.UnixNano())

metricB, err := p.ParseLine(`[04/06/2016--12:41:45] 1.25 mystring dropme nomodifier`)
require.NotNil(t, metricB)
assert.NoError(t, err)
assert.Equal(t,
map[string]interface{}{
"myfloat": 1.25,
"mystring": "mystring",
"nomodifier": "nomodifier",
},
metricB.Fields())
assert.Equal(t, map[string]string{}, metricB.Tags())
assert.Equal(t, int64(1465044105000000000), metricB.UnixNano())
}

func TestTimezoneMalformedCompileFileAndParse(t *testing.T) {
p := &Parser{
Patterns: []string{"%{TEST_LOG_A}", "%{TEST_LOG_B}"},
CustomPatternFiles: []string{"./testdata/test-patterns"},
Timezone: "Something/Weird",
}
assert.NoError(t, p.Compile())

metricA, err := p.ParseLine(`[04/Jun/2016:12:41:45 +0100] 1.25 200 192.168.1.1 5.432µs 101`)
require.NotNil(t, metricA)
assert.NoError(t, err)
assert.Equal(t,
map[string]interface{}{
"clientip": "192.168.1.1",
"myfloat": float64(1.25),
"response_time": int64(5432),
"myint": int64(101),
},
metricA.Fields())
assert.Equal(t, map[string]string{"response_code": "200"}, metricA.Tags())
assert.Equal(t, int64(1465040505000000000), metricA.UnixNano())

metricB, err := p.ParseLine(`[04/06/2016--12:41:45] 1.25 mystring dropme nomodifier`)
require.NotNil(t, metricB)
assert.NoError(t, err)
assert.Equal(t,
map[string]interface{}{
"myfloat": 1.25,
"mystring": "mystring",
"nomodifier": "nomodifier",
},
metricB.Fields())
assert.Equal(t, map[string]string{}, metricB.Tags())
assert.Equal(t, int64(1465044105000000000), metricB.UnixNano())
}

func TestTimezoneEuropeCompileFileAndParse(t *testing.T) {
p := &Parser{
Patterns: []string{"%{TEST_LOG_A}", "%{TEST_LOG_B}"},
CustomPatternFiles: []string{"./testdata/test-patterns"},
Timezone: "Europe/Berlin",
}
assert.NoError(t, p.Compile())

metricA, err := p.ParseLine(`[04/Jun/2016:12:41:45 +0100] 1.25 200 192.168.1.1 5.432µs 101`)
require.NotNil(t, metricA)
assert.NoError(t, err)
assert.Equal(t,
map[string]interface{}{
"clientip": "192.168.1.1",
"myfloat": float64(1.25),
"response_time": int64(5432),
"myint": int64(101),
},
metricA.Fields())
assert.Equal(t, map[string]string{"response_code": "200"}, metricA.Tags())
assert.Equal(t, int64(1465040505000000000), metricA.UnixNano())

metricB, err := p.ParseLine(`[04/06/2016--12:41:45] 1.25 mystring dropme nomodifier`)
require.NotNil(t, metricB)
assert.NoError(t, err)
assert.Equal(t,
map[string]interface{}{
"myfloat": 1.25,
"mystring": "mystring",
"nomodifier": "nomodifier",
},
metricB.Fields())
assert.Equal(t, map[string]string{}, metricB.Tags())
assert.Equal(t, int64(1465036905000000000), metricB.UnixNano())
}

func TestTimezoneAmericasCompileFileAndParse(t *testing.T) {
p := &Parser{
Patterns: []string{"%{TEST_LOG_A}", "%{TEST_LOG_B}"},
CustomPatternFiles: []string{"./testdata/test-patterns"},
Timezone: "Canada/Eastern",
}
assert.NoError(t, p.Compile())

metricA, err := p.ParseLine(`[04/Jun/2016:12:41:45 +0100] 1.25 200 192.168.1.1 5.432µs 101`)
require.NotNil(t, metricA)
assert.NoError(t, err)
assert.Equal(t,
map[string]interface{}{
"clientip": "192.168.1.1",
"myfloat": float64(1.25),
"response_time": int64(5432),
"myint": int64(101),
},
metricA.Fields())
assert.Equal(t, map[string]string{"response_code": "200"}, metricA.Tags())
assert.Equal(t, int64(1465040505000000000), metricA.UnixNano())

metricB, err := p.ParseLine(`[04/06/2016--12:41:45] 1.25 mystring dropme nomodifier`)
require.NotNil(t, metricB)
assert.NoError(t, err)
assert.Equal(t,
map[string]interface{}{
"myfloat": 1.25,
"mystring": "mystring",
"nomodifier": "nomodifier",
},
metricB.Fields())
assert.Equal(t, map[string]string{}, metricB.Tags())
assert.Equal(t, int64(1465058505000000000), metricB.UnixNano())
}

func TestTimezoneLocalCompileFileAndParse(t *testing.T) {
p := &Parser{
Patterns: []string{"%{TEST_LOG_A}", "%{TEST_LOG_B}"},
CustomPatternFiles: []string{"./testdata/test-patterns"},
Timezone: "Local",
}
assert.NoError(t, p.Compile())

metricA, err := p.ParseLine(`[04/Jun/2016:12:41:45 +0100] 1.25 200 192.168.1.1 5.432µs 101`)
require.NotNil(t, metricA)
assert.NoError(t, err)
assert.Equal(t,
map[string]interface{}{
"clientip": "192.168.1.1",
"myfloat": float64(1.25),
"response_time": int64(5432),
"myint": int64(101),
},
metricA.Fields())
assert.Equal(t, map[string]string{"response_code": "200"}, metricA.Tags())
assert.Equal(t, int64(1465040505000000000), metricA.UnixNano())

metricB, err := p.ParseLine(`[04/06/2016--12:41:45] 1.25 mystring dropme nomodifier`)
require.NotNil(t, metricB)
assert.NoError(t, err)
assert.Equal(t,
map[string]interface{}{
"myfloat": 1.25,
"mystring": "mystring",
"nomodifier": "nomodifier",
},
metricB.Fields())
assert.Equal(t, map[string]string{}, metricB.Tags())
assert.Equal(t, time.Date(2016, time.June, 4, 12, 41, 45, 0, time.Local).UnixNano(), metricB.UnixNano())
}
2 changes: 1 addition & 1 deletion plugins/inputs/logparser/grok/influx_patterns.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
package grok

// THIS SHOULD BE KEPT IN-SYNC WITH patterns/influx-patterns
// DEFAULT_PATTERNS SHOULD BE KEPT IN-SYNC WITH patterns/influx-patterns
const DEFAULT_PATTERNS = `
# Captures are a slightly modified version of logstash "grok" patterns, with
# the format %{<capture syntax>[:<semantic name>][:<modifier>]}
Expand Down
Loading

0 comments on commit 84bc2a9

Please sign in to comment.