Skip to content

Commit

Permalink
feat(pkg/csv2lp): add possibility to parse long and unsignedLong valu…
Browse files Browse the repository at this point in the history
…es strictly #18744
  • Loading branch information
sranka committed Aug 6, 2020
1 parent 32ef7b9 commit e0a7d17
Show file tree
Hide file tree
Showing 4 changed files with 106 additions and 48 deletions.
13 changes: 11 additions & 2 deletions pkg/csv2lp/csv_table.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ type CsvTableColumn struct {
// TimeZone of dateTime column, applied when parsing dateTime DataType
TimeZone *time.Location
// ParseF is an optional function used to convert column's string value to interface{}
ParseF func(value string, lineNumber int) (interface{}, error)
ParseF func(value string) (interface{}, error)

// escapedLabel contains escaped label that can be directly used in line protocol
escapedLabel string
Expand Down Expand Up @@ -126,9 +126,18 @@ func (c *CsvTableColumn) setupDataType(columnValue string) {
// setup column data type
c.DataType = columnValue

// setup custom parsing of bool data type
// setup custom parsing
if c.DataType == boolDatatype && c.DataFormat != "" {
c.ParseF = createBoolParseFn(c.DataFormat)
return
}
if c.DataType == longDatatype && strings.HasPrefix(c.DataFormat, "strict") {
c.ParseF = createStrictLongParseFn(c.DataFormat[6:])
return
}
if c.DataType == uLongDatatype && strings.HasPrefix(c.DataFormat, "strict") {
c.ParseF = createStrictUnsignedLongParseFn(c.DataFormat[6:])
return
}
}

Expand Down
54 changes: 37 additions & 17 deletions pkg/csv2lp/csv_table_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -329,40 +329,56 @@ func Test_DataTypeInColumnName(t *testing.T) {
csv string
line string
ignoreDataTypeInColumnName bool
error string
}{
{
"m|measurement,b|boolean:x:,c|boolean:x:|x\n" +
csv: "m|measurement,b|boolean:x:,c|boolean:x:|x\n" +
"cpu,,",
`cpu c=true`,
false,
line: `cpu c=true`,
},
{
"m|measurement,a|boolean,b|boolean:0:1,c|boolean:x:,d|boolean:x:\n" +
csv: "m|measurement,a|boolean,b|boolean:0:1,c|boolean:x:,d|boolean:x:\n" +
"cpu,1,1,x,y",
`cpu a=true,b=false,c=true,d=false`,
false,
line: `cpu a=true,b=false,c=true,d=false`,
},
{
"#constant measurement,cpu\n" +
csv: "#constant measurement,cpu\n" +
"a|long,b|string\n" +
"1,1",
`cpu a=1i,b="1"`,
false,
line: `cpu a=1i,b="1"`,
},
{
"#constant measurement,cpu\n" +
csv: "#constant measurement,cpu\n" +
"a|long,b|string\n" +
"1,1",
`cpu a|long=1,b|string=1`,
true,
line: `cpu a|long=1,b|string=1`,
ignoreDataTypeInColumnName: true,
},
{
"#constant measurement,cpu\n" +
csv: "#constant measurement,cpu\n" +
"#datatype long,string\n" +
"a|long,b|string\n" +
"1,1",
`cpu a|long=1i,b|string="1"`,
true,
line: `cpu a|long=1i,b|string="1"`,
ignoreDataTypeInColumnName: true,
},
{
csv: "#constant measurement,cpu\n" +
"a|long:strict: ,b|unsignedLong:strict: \n" +
"1 2,1 2",
line: `cpu a=12i,b=12u`,
},
{
csv: "#constant measurement,cpu\n" +
"a|long:strict\n" +
"1.1,1",
error: "column 'a': '1.1' cannot fit into long data type",
},
{
csv: "#constant measurement,cpu\n" +
"a|unsignedLong:strict\n" +
"1.1,1",
error: "column 'a': '1.1' cannot fit into unsignedLong data type",
},
}

Expand All @@ -376,8 +392,12 @@ func Test_DataTypeInColumnName(t *testing.T) {
rowProcessed := table.AddRow(row)
if rowProcessed {
line, err := table.CreateLine(row)
if err != nil && test.line != "" {
require.Nil(t, err.Error())
if err != nil {
if test.error == "" {
require.Nil(t, err.Error())
} else {
require.Equal(t, test.error, err.Error())
}
}
lines = append(lines, line)
}
Expand Down
63 changes: 47 additions & 16 deletions pkg/csv2lp/data_conversion.go
Original file line number Diff line number Diff line change
Expand Up @@ -83,16 +83,16 @@ func escapeString(val string) string {
return val
}

// normalizeNumberString normalizes the supplied value according to DataForm of the supplied column.
// normalizeNumberString normalizes the supplied value according to the supplied format.
// This normalization is intended to convert number strings of different locales to a strconv-parseable value.
//
// The format's first character is a fraction delimiter character. Next characters in the format
// are simply removed, they are typically used to visually separate groups in large numbers.
// The removeFraction parameter controls whether the returned value can contain also the fraction part.
// An empty format means ". \n\t\r_"
//
// For example, to get a strconv-parseable float from a Spanish value '3.494.826.157,123', use format ",." .
func normalizeNumberString(value string, column *CsvTableColumn, removeFraction bool, lineNumber int) string {
format := column.DataFormat
func normalizeNumberString(value string, format string, removeFraction bool) (normalized string, truncated bool) {
if format == "" {
format = ". \n\t\r_"
}
Expand All @@ -112,28 +112,24 @@ func normalizeNumberString(value string, column *CsvTableColumn, removeFraction
}
if c == fractionRune {
if removeFraction {
// warn about lost precision
truncatedValue := retVal.String()
warning := fmt.Errorf("'%s' truncated to '%s' to fit into '%s' data type", value, truncatedValue, column.DataType)
log.Printf("WARNING: %v\n", CreateRowColumnError(lineNumber, column.Label, warning))
return truncatedValue
return retVal.String(), true
}
retVal.WriteByte('.')
continue
}
retVal.WriteRune(c)
}

return retVal.String()
return retVal.String(), false
}
return value
return value, false
}

func toTypedValue(val string, column *CsvTableColumn, lineNumber int) (interface{}, error) {
dataType := column.DataType
dataFormat := column.DataFormat
if column.ParseF != nil {
return column.ParseF(val, lineNumber)
return column.ParseF(val)
}
switch dataType {
case stringDatatype:
Expand Down Expand Up @@ -165,7 +161,8 @@ func toTypedValue(val string, column *CsvTableColumn, lineNumber int) (interface
case durationDatatype:
return time.ParseDuration(val)
case doubleDatatype:
return strconv.ParseFloat(normalizeNumberString(val, column, false, lineNumber), 64)
normalized, _ := normalizeNumberString(val, dataFormat, false)
return strconv.ParseFloat(normalized, 64)
case boolDatatype:
switch {
case len(val) == 0:
Expand All @@ -178,9 +175,21 @@ func toTypedValue(val string, column *CsvTableColumn, lineNumber int) (interface
return nil, errors.New("Unsupported boolean value '" + val + "' , first character is expected to be 't','f','0','1','y','n'")
}
case longDatatype:
return strconv.ParseInt(normalizeNumberString(val, column, true, lineNumber), 10, 64)
normalized, truncated := normalizeNumberString(val, dataFormat, true)
if truncated {
error := CreateRowColumnError(lineNumber, column.Label,
fmt.Errorf("'%s' truncated to '%s' to fit into long data type", val, normalized))
log.Printf("WARNING: %v\n", error)
}
return strconv.ParseInt(normalized, 10, 64)
case uLongDatatype:
return strconv.ParseUint(normalizeNumberString(val, column, true, lineNumber), 10, 64)
normalized, truncated := normalizeNumberString(val, dataFormat, true)
if truncated {
error := CreateRowColumnError(lineNumber, column.Label,
fmt.Errorf("'%s' truncated to '%s' to fit into unsignedLong data type", val, normalized))
log.Printf("WARNING: %v\n", error)
}
return strconv.ParseUint(normalized, 10, 64)
case base64BinaryDataType:
return base64.StdEncoding.DecodeString(val)
default:
Expand Down Expand Up @@ -267,7 +276,7 @@ func CreateDecoder(encoding string) (func(io.Reader) io.Reader, error) {
}

// createBoolParseFn returns a function that converts a string value to boolean according to format "true,yes,1:false,no,0"
func createBoolParseFn(format string) func(string, int) (interface{}, error) {
func createBoolParseFn(format string) func(string) (interface{}, error) {
var err error = nil
truthy := []string{}
falsy := []string{}
Expand All @@ -284,7 +293,7 @@ func createBoolParseFn(format string) func(string, int) (interface{}, error) {
falsy = strings.Split(f, ",")
}
}
return func(val string, _lineNumber int) (interface{}, error) {
return func(val string) (interface{}, error) {
if err != nil {
return nil, err
}
Expand All @@ -308,3 +317,25 @@ func createBoolParseFn(format string) func(string, int) (interface{}, error) {
return nil, fmt.Errorf("unsupported boolean value: %s must one of %v or one of %v", val, truthy, falsy)
}
}

// createStrictLongParseFn returns a function that converts a string value to long and fails also when a fraction digit is detected
func createStrictLongParseFn(dataFormat string) func(string) (interface{}, error) {
return func(val string) (interface{}, error) {
normalized, truncated := normalizeNumberString(val, dataFormat, true)
if truncated {
return 0, fmt.Errorf("'%s' cannot fit into long data type", val)
}
return strconv.ParseInt(normalized, 10, 64)
}
}

// createStrictUnsignedLongParseFn returns a function that converts a string value to unsigned long and fails when a fraction digit is detected
func createStrictUnsignedLongParseFn(dataFormat string) func(string) (interface{}, error) {
return func(val string) (interface{}, error) {
normalized, truncated := normalizeNumberString(val, dataFormat, true)
if truncated {
return 0, fmt.Errorf("'%s' cannot fit into unsignedLong data type", val)
}
return strconv.ParseUint(normalized, 10, 64)
}
}
24 changes: 11 additions & 13 deletions pkg/csv2lp/data_conversion_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -247,14 +247,14 @@ func Test_NormalizeNumberString(t *testing.T) {
format string
removeFraction bool
expect string
warning string
truncated bool
}{
{"123", "", true, "123", ""},
{"123", ".", true, "123", ""},
{"123.456", ".", true, "123", "::PREFIX::WARNING: line 1: column 'test': '123.456' truncated to '123' to fit into 'tst' data type\n"},
{"123.456", ".", false, "123.456", ""},
{"1 2.3,456", ",. ", false, "123.456", ""},
{" 1 2\t3.456 \r\n", "", false, "123.456", ""},
{"123", "", true, "123", false},
{"123", ".", true, "123", false},
{"123.456", ".", true, "123", true},
{"123.456", ".", false, "123.456", false},
{"1 2.3,456", ",. ", false, "123.456", false},
{" 1 2\t3.456 \r\n", "", false, "123.456", false},
}

for i, test := range tests {
Expand All @@ -272,11 +272,9 @@ func Test_NormalizeNumberString(t *testing.T) {
log.SetFlags(oldFlags)
log.SetPrefix(oldPrefix)
}()

require.Equal(t, test.expect,
normalizeNumberString(test.value,
&CsvTableColumn{Label: "test", DataType: "tst", DataFormat: test.format}, test.removeFraction, 1))
require.Equal(t, test.warning, buf.String())
normalized, truncated := normalizeNumberString(test.value, test.format, test.removeFraction)
require.Equal(t, test.expect, normalized)
require.Equal(t, test.truncated, truncated)
})
}
}
Expand Down Expand Up @@ -336,7 +334,7 @@ func Test_CreateBoolParseFn(t *testing.T) {
fn := createBoolParseFn(test.format)
for j, pair := range test.pair {
t.Run(fmt.Sprint(i)+"_"+fmt.Sprint(j), func(t *testing.T) {
result, err := fn(pair.value, 1)
result, err := fn(pair.value)
switch pair.expect {
case "true":
require.Equal(t, true, result)
Expand Down

0 comments on commit e0a7d17

Please sign in to comment.