Skip to content

Commit

Permalink
Added support for field names containing '-' and '.'
Browse files Browse the repository at this point in the history
  • Loading branch information
aantono committed Feb 12, 2018
1 parent ce01e59 commit a24cc63
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 7 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,5 @@ _testmain.go
.coveralls.yml
# added by GitSavvy
.DS_Store
.idea
*.i*
36 changes: 29 additions & 7 deletions grok.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@ import (
)

var (
normal = regexp.MustCompile(`%{(\w+(?::\w+(?::\w+)?)?)}`)
canonical = regexp.MustCompile(`%{(\w+(?::\w+(?::\w+)?)?)}`)
normal = regexp.MustCompile(`%{([\w-.]+(?::[\w-.]+(?::[\w-.]+)?)?)}`)
symbolic = regexp.MustCompile(`\W`)
)

// A Config structure is used to configure a Grok parser.
Expand All @@ -31,6 +33,7 @@ type Config struct {
type Grok struct {
rawPattern map[string]string
config *Config
aliases map[string]string
compiledPatterns map[string]*gRegexp
patterns map[string]*gPattern
patternsGuard *sync.RWMutex
Expand Down Expand Up @@ -59,6 +62,7 @@ func New() (*Grok, error) {
func NewWithConfig(config *Config) (*Grok, error) {
g := &Grok{
config: config,
aliases: map[string]string{},
compiledPatterns: map[string]*gRegexp{},
patterns: map[string]*gPattern{},
rawPattern: map[string]string{},
Expand Down Expand Up @@ -125,7 +129,7 @@ func (g *Grok) addPatternsFromMap(m map[string]string) error {
patternDeps := graph{}
for k, v := range m {
keys := []string{}
for _, key := range normal.FindAllStringSubmatch(v, -1) {
for _, key := range canonical.FindAllStringSubmatch(v, -1) {
names := strings.Split(key[1], ":")
syntax := names[0]
if g.patterns[syntax] == nil {
Expand Down Expand Up @@ -206,6 +210,7 @@ func (g *Grok) compiledParse(gr *gRegexp, text string) (map[string]string, error
if g.config.RemoveEmptyValues && match[i] == "" {
continue
}
name = g.nameToAlias(name)
captures[name] = match[i]
}
}
Expand Down Expand Up @@ -238,17 +243,18 @@ func (g *Grok) ParseTyped(pattern string, text string) (map[string]interface{},
if g.config.RemoveEmptyValues == true && match[i] == "" {
continue
}
name := g.nameToAlias(segmentName)
if segmentType, ok := gr.typeInfo[segmentName]; ok {
switch segmentType {
case "int":
captures[segmentName], _ = strconv.Atoi(match[i])
captures[name], _ = strconv.Atoi(match[i])
case "float":
captures[segmentName], _ = strconv.ParseFloat(match[i], 64)
captures[name], _ = strconv.ParseFloat(match[i], 64)
default:
return nil, fmt.Errorf("ERROR the value %s cannot be converted to %s", match[i], segmentType)
}
} else {
captures[segmentName] = match[i]
captures[name] = match[i]
}
}

Expand All @@ -274,6 +280,7 @@ func (g *Grok) ParseToMultiMap(pattern, text string) (map[string][]string, error
if g.config.RemoveEmptyValues == true && match[i] == "" {
continue
}
name = g.nameToAlias(name)
captures[name] = append(captures[name], match[i])
}
}
Expand Down Expand Up @@ -321,9 +328,10 @@ func (g *Grok) denormalizePattern(pattern string, storedPatterns map[string]*gPa
for _, values := range normal.FindAllStringSubmatch(pattern, -1) {
names := strings.Split(values[1], ":")

syntax, semantic := names[0], names[0]
syntax, semantic, alias := names[0], names[0], names[0]
if len(names) > 1 {
semantic = names[1]
alias = g.aliasizePatternName(semantic)
}

// Add type cast information only if type set, and not string
Expand All @@ -341,7 +349,7 @@ func (g *Grok) denormalizePattern(pattern string, storedPatterns map[string]*gPa
var buffer bytes.Buffer
if !g.config.NamedCapturesOnly || (g.config.NamedCapturesOnly && len(names) > 1) {
buffer.WriteString("(?P<")
buffer.WriteString(semantic)
buffer.WriteString(alias)
buffer.WriteString(">")
buffer.WriteString(storedPattern.expression)
buffer.WriteString(")")
Expand All @@ -366,6 +374,20 @@ func (g *Grok) denormalizePattern(pattern string, storedPatterns map[string]*gPa

}

func (g *Grok) aliasizePatternName(name string) string {
alias := symbolic.ReplaceAllString(name, "_")
g.aliases[alias] = name
return alias
}

func (g *Grok) nameToAlias(name string) string {
alias, ok := g.aliases[name]
if ok {
return alias
}
return name
}

// ParseStream will match the given pattern on a line by line basis from the reader
// and apply the results to the process function
func (g *Grok) ParseStream(reader *bufio.Reader, pattern string, process func(map[string]string) error) error {
Expand Down
5 changes: 5 additions & 0 deletions grok_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,11 @@ func TestNamedCaptures(t *testing.T) {
"%{DAY:jour}",
"Tue May 15 11:21:42 [conn1047685] moveChunk deleted: 7157",
)

check("day-of.week", "Tue",
"%{DAY:day-of.week}",
"Tue May 15 11:21:42 [conn1047685] moveChunk deleted: 7157",
)
}

func TestErrorCaptureUnknowPattern(t *testing.T) {
Expand Down

0 comments on commit a24cc63

Please sign in to comment.