Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix zerologwriter json parser bug #961

Merged
merged 2 commits into from
Sep 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
176 changes: 135 additions & 41 deletions v3/integrations/logcontext-v2/zerologWriter/zerolog-writer.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package zerologWriter

import (
"bytes"
"context"
"io"
"strings"
Expand Down Expand Up @@ -55,15 +56,23 @@ func parseJSONLogData(log []byte) newrelic.LogData {
data.Message = string(log)
data.Timestamp = time.Now().UnixMilli()

for i := 0; i < len(log)-1; {
i := skipPastSpaces(log, 0)
if i < 0 || i >= len(log) || log[i] != '{' {
return data
}
i++
for i < len(log)-1 {
// get key; always a string field
key, valStart := getKey(log, i)
if valStart < 0 {
return data
}
var next int

// NOTE: depending on the key, the type of field the value is can differ
switch key {
case zerolog.LevelFieldName:
data.Severity, next = getStringValue(log, valStart+1)
data.Severity, next = getStringValue(log, valStart)
case zerolog.ErrorStackFieldName:
_, next = getStackTrace(log, valStart)
default:
Expand All @@ -72,7 +81,7 @@ func parseJSONLogData(log []byte) newrelic.LogData {
}
// TODO: once we update the logging spec to support custom attributes, capture these
if isStringValue(log, valStart) {
_, next = getStringValue(log, valStart+1)
_, next = getStringValue(log, valStart)
} else if isNumberValue(log, valStart) {
_, next = getNumberValue(log, valStart)
} else {
Expand All @@ -90,54 +99,131 @@ func parseJSONLogData(log []byte) newrelic.LogData {
}

func isStringValue(p []byte, indx int) bool {
if indx = skipPastSpaces(p, indx); indx < 0 {
return false
}
return p[indx] == '"'
}

func isNumberValue(p []byte, indx int) bool {
return unicode.IsDigit(rune(p[indx]))
if indx = skipPastSpaces(p, indx); indx < 0 {
return false
}
// unicode.IsDigit isn't sufficient here because JSON numbers can start with a sign too
return unicode.IsDigit(rune(p[indx])) || p[indx] == '-'
}

// zerolog keys are always JSON strings
func getKey(p []byte, indx int) (string, int) {
value := strings.Builder{}
i := indx

// find start of string field
for ; i < len(p); i++ {
if p[i] == '"' {
i += 1
break
}
i := skipPastSpaces(p, indx)
if i < 0 || i >= len(p) || p[i] != '"' {
return "", -1
}

// parse value of string field
for ; i < len(p); i++ {
if p[i] == '"' && i+1 < len(p) && p[i+1] == ':' {
return value.String(), i + 2
literalNext := false
for i++; i < len(p); i++ {
if literalNext {
value.WriteByte(p[i])
literalNext = false
continue
}

if p[i] == '\\' {
value.WriteByte(p[i])
literalNext = true
continue
}

if p[i] == '"' {
// found end of key. Now look for the colon separator
for i++; i < len(p); i++ {
if p[i] == ':' && i+1 < len(p) {
return value.String(), i + 1
}
if p[i] != ' ' && p[i] != '\t' {
break
}
}
// Oh oh. if we got here, there wasn't a colon, or there wasn't a value after it, or
// something showed up between the end of the key and the colon that wasn't a space.
// In any of those cases, we didn't find the key of a key/value pair.
return "", -1
} else {
value.WriteByte(p[i])
}
}

return "", -1
}

/*
func isEOL(p []byte, i int) bool {
return p[i] == '}' && i+2 == len(p)
for ; i < len(p); i++ {
if p[i] == ' ' || p[i] == '\t' {
continue
}
if p[i] == '}' {
// nothing but space to the end from here?
for i++; i < len(p); i++ {
if p[i] != ' ' && p[i] != '\t' && p[i] != '\r' && p[i] != '\n' {
return false // nope, that wasn't the end of the string
}
}
return true
}
}
return false
}
*/

func skipPastSpaces(p []byte, i int) int {
for ; i < len(p); i++ {
if p[i] != ' ' && p[i] != '\t' && p[i] != '\r' && p[i] != '\n' {
return i
}
}
return -1
}

func getStringValue(p []byte, indx int) (string, int) {
value := strings.Builder{}

// skip to start of string
i := skipPastSpaces(p, indx)
if i < 0 || i >= len(p) || p[i] != '"' {
return "", -1
}

// parse value of string field
for i := indx; i < len(p); i++ {
if p[i] == '"' && i+1 < len(p) {
if p[i+1] == ',' && i+2 < len(p) && p[i+2] == '"' {
return value.String(), i + 2
} else if isEOL(p, i+1) {
literalNext := false
for i++; i < len(p); i++ {
if literalNext {
value.WriteByte(p[i])
literalNext = false
continue
}

if p[i] == '\\' {
value.WriteByte(p[i])
literalNext = true
continue
}

if p[i] == '"' {
// end of string. search past the comma so we can find the following key (if any) later.
if i = skipPastSpaces(p, i+1); i < 0 || i >= len(p) {
return value.String(), -1
}
if p[i] == ',' {
if i+1 < len(p) {
return value.String(), i + 1
}
return value.String(), -1
}
return value.String(), -1
}

value.WriteByte(p[i])
}

Expand All @@ -148,35 +234,43 @@ func getNumberValue(p []byte, indx int) (string, int) {
value := strings.Builder{}

// parse value of string field
for i := indx; i < len(p); i++ {
if p[i] == ',' && i+1 < len(p) && p[i+1] == '"' {
return value.String(), i + 1
} else if isEOL(p, i) {
return value.String(), -1
} else {
value.WriteByte(p[i])
}
i := skipPastSpaces(p, indx)
if i < 0 {
return "", -1
}
// JSON numeric values contain digits, '.', '-', 'e'
for ; i < len(p) && bytes.IndexByte([]byte("0123456789-+eE."), p[i]) >= 0; i++ {
value.WriteByte(p[i])
}

return "", -1
i = skipPastSpaces(p, i)
if i > 0 && i+1 < len(p) && p[i] == ',' {
return value.String(), i + 1
}
return value.String(), -1
}

func getStackTrace(p []byte, indx int) (string, int) {
value := strings.Builder{}

// parse value of string field
for i := indx; i < len(p); i++ {
i := skipPastSpaces(p, indx)
if i < 0 || i >= len(p) || p[i] != '[' {
return "", -1
}
// the stack trace is everything from '[' to the next ']'.
// TODO: this is a little naïve and we may need to consider parsing
// the data inbetween more carefully. To date, we haven't seen a case
// where that is necessary, and prefer not to impact performance of the
// system by doing the extra processing, but we can revisit that later
// if necessary.
for ; i < len(p); i++ {
if p[i] == ']' {
value.WriteByte(p[i])

if i+1 < len(p) {
if isEOL(p, i+1) {
return value.String(), -1
}
if p[i+1] == ',' && i+2 < len(p) && p[i+2] == '"' {
return value.String(), i + 2
}
i = skipPastSpaces(p, i)
if i > 0 && i+1 < len(p) && p[i] == ',' {
return value.String(), i + 1
}
return value.String(), -1
} else {
value.WriteByte(p[i])
}
Expand Down
22 changes: 20 additions & 2 deletions v3/integrations/logcontext-v2/zerologWriter/zerolog-writer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -116,13 +116,31 @@ func TestParseLogData(t *testing.T) {
},
},
{
`{"Scale":"833 cents","Interval":833.09,"time":1562212768,"message":"Fibonacci is everywhere","level":"debug"}` + "\n",
`{"Scale":"833 cents" , "Interval":833.09,"time":1562212768,"message":"Fibonacci is everywhere","level":"debug"}` + "\n",
"level",
newrelic.LogData{
Message: `{"Scale":"833 cents","Interval":833.09,"time":1562212768,"message":"Fibonacci is everywhere","level":"debug"}` + "\n",
Message: `{"Scale":"833 cents" , "Interval":833.09,"time":1562212768,"message":"Fibonacci is everywhere","level":"debug"}` + "\n",
Severity: "debug",
},
},
/* regression test case from issue 955 by MarioCarrion */
{
`{"level":"info","message":"\"value\","}` + "\n",
"level",
newrelic.LogData{
Message: `{"level":"info","message":"\"value\","}` + "\n",
Severity: "info",
},
},
{
`{"level":"info","message":","}` + "\n",
"level",
newrelic.LogData{
Message: `{"level":"info","message":","}` + "\n",
Severity: "info",
},
},
/* end of issue 955 test case */
}
for _, test := range tests {
if test.levelKey != "" {
Expand Down
Loading