Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

adding timezone support to logparser timestamps #2882

Merged
merged 1 commit into from
Jun 5, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 25 additions & 1 deletion plugins/inputs/logparser/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,10 @@ regex patterns.
## /var/log/*/*.log -> find all .log files with a parent dir in /var/log
## /var/log/apache.log -> only tail the apache log file
files = ["/var/log/apache/access.log"]
## Read file from beginning.

## Read files that currently exist from the beginning. Files that are created
## while telegraf is running (and that match the "files" globs) will always
## be read from the beginning.
from_beginning = false

## Parse logstash-style "grok" patterns:
Expand All @@ -28,13 +31,27 @@ regex patterns.
## %{COMMON_LOG_FORMAT} (plain apache & nginx access logs)
## %{COMBINED_LOG_FORMAT} (access logs + referrer & agent)
patterns = ["%{COMBINED_LOG_FORMAT}"]

## Name of the outputted measurement name.
measurement = "apache_access_log"

## Full path(s) to custom pattern files.
custom_pattern_files = []

## Custom patterns can also be defined here. Put one pattern per line.
custom_patterns = '''
'''

## Timezone allows you to provide an override for timestamps that
## don't already include an offset
## e.g. 04/06/2016 12:41:45 data one two 5.43µs
##
## Default: "" which renders UTC
## Options are as follows:
## 1. Local -- interpret based on machine localtime
## 2. "Canada/Eastern" -- Unix TZ values like those found in https://en.wikipedia.org/wiki/List_of_tz_database_time_zones
## 3. UTC -- or blank/unspecified, will return timestamp in UTC
timezone = "Canada/Eastern"
```

### Grok Parser
Expand Down Expand Up @@ -125,6 +142,13 @@ Wed Apr 12 13:10:34 PST 2017 value=42
'''
```

For cases where the timestamp itself is without offset, the `timezone` config var is available
to denote an offset. By default (with `timezone` either omit, blank or set to `"UTC"`), the times
are processed as if in the UTC timezone. If specified as `timezone = "Local"`, the timestamp
will be processed based on the current machine timezone configuration. Lastly, if using a
timezone from the list of Unix [timezones](https://en.wikipedia.org/wiki/List_of_tz_database_time_zones), the logparser grok will attempt to offset
the timestamp accordingly. See test cases for more detailed examples.

#### TOML Escaping

When saving patterns to the configuration file, keep in mind the different TOML
Expand Down
31 changes: 25 additions & 6 deletions plugins/inputs/logparser/grok/grok.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ var (
patternOnlyRe = regexp.MustCompile(`%{(\w+)}`)
)

// Parser is the primary struct to handle and grok-patterns defined in the config toml
type Parser struct {
Patterns []string
// namedPatterns is a list of internally-assigned names to the patterns
Expand All @@ -70,6 +71,16 @@ type Parser struct {
CustomPatternFiles []string
Measurement string

// Timezone is an optional component to help render log dates to
// your chosen zone.
// Default: "" which renders UTC
// Options are as follows:
// 1. Local -- interpret based on machine localtime
// 2. "America/Chicago" -- Unix TZ values like those found in https://en.wikipedia.org/wiki/List_of_tz_database_time_zones
// 3. UTC -- or blank/unspecified, will return timestamp in UTC
Timezone string
loc *time.Location

// typeMap is a map of patterns -> capture name -> modifier,
// ie, {
// "%{TESTLOG}":
Expand Down Expand Up @@ -105,6 +116,7 @@ type Parser struct {
tsModder *tsModder
}

// Compile is a bound method to Parser which will process the options for our parser
func (p *Parser) Compile() error {
p.typeMap = make(map[string]map[string]string)
p.tsMap = make(map[string]map[string]string)
Expand Down Expand Up @@ -142,9 +154,9 @@ func (p *Parser) Compile() error {

// Parse any custom pattern files supplied.
for _, filename := range p.CustomPatternFiles {
file, err := os.Open(filename)
if err != nil {
return err
file, fileErr := os.Open(filename)
if fileErr != nil {
return fileErr
}

scanner := bufio.NewScanner(bufio.NewReader(file))
Expand All @@ -155,9 +167,16 @@ func (p *Parser) Compile() error {
p.Measurement = "logparser_grok"
}

p.loc, err = time.LoadLocation(p.Timezone)
if err != nil {
log.Printf("W! improper timezone supplied (%s), setting loc to UTC", p.Timezone)
p.loc, _ = time.LoadLocation("UTC")
}

return p.compileCustomPatterns()
}

// ParseLine is the primary function to process individual lines, returning the metrics
func (p *Parser) ParseLine(line string) (telegraf.Metric, error) {
var err error
// values are the parsed fields from the log line
Expand Down Expand Up @@ -251,7 +270,7 @@ func (p *Parser) ParseLine(line string) (telegraf.Metric, error) {
var foundTs bool
// first try timestamp layouts that we've already found
for _, layout := range p.foundTsLayouts {
ts, err := time.Parse(layout, v)
ts, err := time.ParseInLocation(layout, v, p.loc)
if err == nil {
timestamp = ts
foundTs = true
Expand All @@ -262,7 +281,7 @@ func (p *Parser) ParseLine(line string) (telegraf.Metric, error) {
// layouts.
if !foundTs {
for _, layout := range timeLayouts {
ts, err := time.Parse(layout, v)
ts, err := time.ParseInLocation(layout, v, p.loc)
if err == nil {
timestamp = ts
foundTs = true
Expand All @@ -280,7 +299,7 @@ func (p *Parser) ParseLine(line string) (telegraf.Metric, error) {
case DROP:
// goodbye!
default:
ts, err := time.Parse(t, v)
ts, err := time.ParseInLocation(t, v, p.loc)
if err == nil {
timestamp = ts
} else {
Expand Down
186 changes: 183 additions & 3 deletions plugins/inputs/logparser/grok/grok_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ func Benchmark_ParseLine_CommonLogFormat(b *testing.B) {
p := &Parser{
Patterns: []string{"%{COMMON_LOG_FORMAT}"},
}
p.Compile()
_ = p.Compile()

var m telegraf.Metric
for n := 0; n < b.N; n++ {
Expand All @@ -29,7 +29,7 @@ func Benchmark_ParseLine_CombinedLogFormat(b *testing.B) {
p := &Parser{
Patterns: []string{"%{COMBINED_LOG_FORMAT}"},
}
p.Compile()
_ = p.Compile()

var m telegraf.Metric
for n := 0; n < b.N; n++ {
Expand All @@ -48,7 +48,7 @@ func Benchmark_ParseLine_CustomPattern(b *testing.B) {
TEST_LOG_A %{NUMBER:myfloat:float} %{RESPONSE_CODE} %{IPORHOST:clientip} %{RESPONSE_TIME}
`,
}
p.Compile()
_ = p.Compile()

var m telegraf.Metric
for n := 0; n < b.N; n++ {
Expand Down Expand Up @@ -707,3 +707,183 @@ func TestShortPatternRegression(t *testing.T) {
},
metric.Fields())
}

func TestTimezoneEmptyCompileFileAndParse(t *testing.T) {
p := &Parser{
Patterns: []string{"%{TEST_LOG_A}", "%{TEST_LOG_B}"},
CustomPatternFiles: []string{"./testdata/test-patterns"},
Timezone: "",
}
assert.NoError(t, p.Compile())

metricA, err := p.ParseLine(`[04/Jun/2016:12:41:45 +0100] 1.25 200 192.168.1.1 5.432µs 101`)
require.NotNil(t, metricA)
assert.NoError(t, err)
assert.Equal(t,
map[string]interface{}{
"clientip": "192.168.1.1",
"myfloat": float64(1.25),
"response_time": int64(5432),
"myint": int64(101),
},
metricA.Fields())
assert.Equal(t, map[string]string{"response_code": "200"}, metricA.Tags())
assert.Equal(t, int64(1465040505000000000), metricA.UnixNano())

metricB, err := p.ParseLine(`[04/06/2016--12:41:45] 1.25 mystring dropme nomodifier`)
require.NotNil(t, metricB)
assert.NoError(t, err)
assert.Equal(t,
map[string]interface{}{
"myfloat": 1.25,
"mystring": "mystring",
"nomodifier": "nomodifier",
},
metricB.Fields())
assert.Equal(t, map[string]string{}, metricB.Tags())
assert.Equal(t, int64(1465044105000000000), metricB.UnixNano())
}

func TestTimezoneMalformedCompileFileAndParse(t *testing.T) {
p := &Parser{
Patterns: []string{"%{TEST_LOG_A}", "%{TEST_LOG_B}"},
CustomPatternFiles: []string{"./testdata/test-patterns"},
Timezone: "Something/Weird",
}
assert.NoError(t, p.Compile())

metricA, err := p.ParseLine(`[04/Jun/2016:12:41:45 +0100] 1.25 200 192.168.1.1 5.432µs 101`)
require.NotNil(t, metricA)
assert.NoError(t, err)
assert.Equal(t,
map[string]interface{}{
"clientip": "192.168.1.1",
"myfloat": float64(1.25),
"response_time": int64(5432),
"myint": int64(101),
},
metricA.Fields())
assert.Equal(t, map[string]string{"response_code": "200"}, metricA.Tags())
assert.Equal(t, int64(1465040505000000000), metricA.UnixNano())

metricB, err := p.ParseLine(`[04/06/2016--12:41:45] 1.25 mystring dropme nomodifier`)
require.NotNil(t, metricB)
assert.NoError(t, err)
assert.Equal(t,
map[string]interface{}{
"myfloat": 1.25,
"mystring": "mystring",
"nomodifier": "nomodifier",
},
metricB.Fields())
assert.Equal(t, map[string]string{}, metricB.Tags())
assert.Equal(t, int64(1465044105000000000), metricB.UnixNano())
}

func TestTimezoneEuropeCompileFileAndParse(t *testing.T) {
p := &Parser{
Patterns: []string{"%{TEST_LOG_A}", "%{TEST_LOG_B}"},
CustomPatternFiles: []string{"./testdata/test-patterns"},
Timezone: "Europe/Berlin",
}
assert.NoError(t, p.Compile())

metricA, err := p.ParseLine(`[04/Jun/2016:12:41:45 +0100] 1.25 200 192.168.1.1 5.432µs 101`)
require.NotNil(t, metricA)
assert.NoError(t, err)
assert.Equal(t,
map[string]interface{}{
"clientip": "192.168.1.1",
"myfloat": float64(1.25),
"response_time": int64(5432),
"myint": int64(101),
},
metricA.Fields())
assert.Equal(t, map[string]string{"response_code": "200"}, metricA.Tags())
assert.Equal(t, int64(1465040505000000000), metricA.UnixNano())

metricB, err := p.ParseLine(`[04/06/2016--12:41:45] 1.25 mystring dropme nomodifier`)
require.NotNil(t, metricB)
assert.NoError(t, err)
assert.Equal(t,
map[string]interface{}{
"myfloat": 1.25,
"mystring": "mystring",
"nomodifier": "nomodifier",
},
metricB.Fields())
assert.Equal(t, map[string]string{}, metricB.Tags())
assert.Equal(t, int64(1465036905000000000), metricB.UnixNano())
}

func TestTimezoneAmericasCompileFileAndParse(t *testing.T) {
p := &Parser{
Patterns: []string{"%{TEST_LOG_A}", "%{TEST_LOG_B}"},
CustomPatternFiles: []string{"./testdata/test-patterns"},
Timezone: "Canada/Eastern",
}
assert.NoError(t, p.Compile())

metricA, err := p.ParseLine(`[04/Jun/2016:12:41:45 +0100] 1.25 200 192.168.1.1 5.432µs 101`)
require.NotNil(t, metricA)
assert.NoError(t, err)
assert.Equal(t,
map[string]interface{}{
"clientip": "192.168.1.1",
"myfloat": float64(1.25),
"response_time": int64(5432),
"myint": int64(101),
},
metricA.Fields())
assert.Equal(t, map[string]string{"response_code": "200"}, metricA.Tags())
assert.Equal(t, int64(1465040505000000000), metricA.UnixNano())

metricB, err := p.ParseLine(`[04/06/2016--12:41:45] 1.25 mystring dropme nomodifier`)
require.NotNil(t, metricB)
assert.NoError(t, err)
assert.Equal(t,
map[string]interface{}{
"myfloat": 1.25,
"mystring": "mystring",
"nomodifier": "nomodifier",
},
metricB.Fields())
assert.Equal(t, map[string]string{}, metricB.Tags())
assert.Equal(t, int64(1465058505000000000), metricB.UnixNano())
}

func TestTimezoneLocalCompileFileAndParse(t *testing.T) {
p := &Parser{
Patterns: []string{"%{TEST_LOG_A}", "%{TEST_LOG_B}"},
CustomPatternFiles: []string{"./testdata/test-patterns"},
Timezone: "Local",
}
assert.NoError(t, p.Compile())

metricA, err := p.ParseLine(`[04/Jun/2016:12:41:45 +0100] 1.25 200 192.168.1.1 5.432µs 101`)
require.NotNil(t, metricA)
assert.NoError(t, err)
assert.Equal(t,
map[string]interface{}{
"clientip": "192.168.1.1",
"myfloat": float64(1.25),
"response_time": int64(5432),
"myint": int64(101),
},
metricA.Fields())
assert.Equal(t, map[string]string{"response_code": "200"}, metricA.Tags())
assert.Equal(t, int64(1465040505000000000), metricA.UnixNano())

metricB, err := p.ParseLine(`[04/06/2016--12:41:45] 1.25 mystring dropme nomodifier`)
require.NotNil(t, metricB)
assert.NoError(t, err)
assert.Equal(t,
map[string]interface{}{
"myfloat": 1.25,
"mystring": "mystring",
"nomodifier": "nomodifier",
},
metricB.Fields())
assert.Equal(t, map[string]string{}, metricB.Tags())
assert.Equal(t, time.Date(2016, time.June, 4, 12, 41, 45, 0, time.Local).UnixNano(), metricB.UnixNano())
}
2 changes: 1 addition & 1 deletion plugins/inputs/logparser/grok/influx_patterns.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
package grok

// THIS SHOULD BE KEPT IN-SYNC WITH patterns/influx-patterns
// DEFAULT_PATTERNS SHOULD BE KEPT IN-SYNC WITH patterns/influx-patterns
const DEFAULT_PATTERNS = `
# Captures are a slightly modified version of logstash "grok" patterns, with
# the format %{<capture syntax>[:<semantic name>][:<modifier>]}
Expand Down
Loading