Skip to content

Commit

Permalink
feat(pkg/csv2lp): log a warning when loosing precision #18744
Browse files Browse the repository at this point in the history
  • Loading branch information
sranka committed Sep 12, 2020
1 parent cbc640b commit 001343d
Show file tree
Hide file tree
Showing 5 changed files with 75 additions and 32 deletions.
18 changes: 16 additions & 2 deletions pkg/csv2lp/csv2lp.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,21 @@ type CsvLineError struct {
}

func (e CsvLineError) Error() string {
return fmt.Sprintf("line %d: %v", e.Line, e.Err)
if e.Line > 0 {
return fmt.Sprintf("line %d: %v", e.Line, e.Err)
}
return fmt.Sprintf("%v", e.Err)
}

// CreateRowColumnError creates adds row number and column name to the error supplied
func CreateRowColumnError(line int, columnLabel string, err error) CsvLineError {
return CsvLineError{
Line: line,
Err: CsvColumnError{
Column: columnLabel,
Err: err,
},
}
}

// CsvToLineReader represents state of transformation from csv data to lien protocol reader
Expand Down Expand Up @@ -98,7 +112,7 @@ func (state *CsvToLineReader) Read(p []byte) (n int, err error) {
if state.Table.AddRow(row) {
var err error
state.lineBuffer = state.lineBuffer[:0] // reuse line buffer
state.lineBuffer, err = state.Table.AppendLine(state.lineBuffer, row)
state.lineBuffer, err = state.Table.AppendLine(state.lineBuffer, row, state.LineNumber)
if !state.dataRowAdded && state.logTableDataColumns {
log.Println(state.Table.DataColumnsInfo())
}
Expand Down
4 changes: 4 additions & 0 deletions pkg/csv2lp/csv2lp_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,10 @@ func Test_CsvLineError(t *testing.T) {
CsvLineError{Line: 2, Err: CsvColumnError{"a", errors.New("cause")}},
"line 2: column 'a': cause",
},
{
CsvLineError{Line: -1, Err: CsvColumnError{"a", errors.New("cause")}},
"column 'a': cause",
},
}
for _, test := range tests {
require.Equal(t, test.value, test.err.Error())
Expand Down
10 changes: 5 additions & 5 deletions pkg/csv2lp/csv_table.go
Original file line number Diff line number Diff line change
Expand Up @@ -391,15 +391,15 @@ func (t *CsvTable) recomputeLineProtocolColumns() {
// CreateLine produces a protocol line out of the supplied row or returns error
func (t *CsvTable) CreateLine(row []string) (line string, err error) {
buffer := make([]byte, 100)[:0]
buffer, err = t.AppendLine(buffer, row)
buffer, err = t.AppendLine(buffer, row, -1)
if err != nil {
return "", err
}
return *(*string)(unsafe.Pointer(&buffer)), nil
}

// AppendLine appends a protocol line to the supplied buffer using a CSV row and returns appended buffer or an error if any
func (t *CsvTable) AppendLine(buffer []byte, row []string) ([]byte, error) {
func (t *CsvTable) AppendLine(buffer []byte, row []string, lineNumber int) ([]byte, error) {
if t.computeLineProtocolColumns() {
// validate column data types
if t.cachedFieldValue != nil && !IsTypeSupported(t.cachedFieldValue.DataType) {
Expand Down Expand Up @@ -447,7 +447,7 @@ func (t *CsvTable) AppendLine(buffer []byte, row []string) ([]byte, error) {
buffer = append(buffer, escapeTag(field)...)
buffer = append(buffer, '=')
var err error
buffer, err = appendConverted(buffer, value, t.cachedFieldValue)
buffer, err = appendConverted(buffer, value, t.cachedFieldValue, lineNumber)
if err != nil {
return buffer, CsvColumnError{
t.cachedFieldName.Label,
Expand All @@ -468,7 +468,7 @@ func (t *CsvTable) AppendLine(buffer []byte, row []string) ([]byte, error) {
buffer = append(buffer, field.LineLabel()...)
buffer = append(buffer, '=')
var err error
buffer, err = appendConverted(buffer, value, field)
buffer, err = appendConverted(buffer, value, field, lineNumber)
if err != nil {
return buffer, CsvColumnError{
field.Label,
Expand All @@ -491,7 +491,7 @@ func (t *CsvTable) AppendLine(buffer []byte, row []string) ([]byte, error) {
}
buffer = append(buffer, ' ')
var err error
buffer, err = appendConverted(buffer, timeVal, t.cachedTime)
buffer, err = appendConverted(buffer, timeVal, t.cachedTime, lineNumber)
if err != nil {
return buffer, CsvColumnError{
t.cachedTime.Label,
Expand Down
32 changes: 19 additions & 13 deletions pkg/csv2lp/data_conversion.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"errors"
"fmt"
"io"
"log"
"math"
"strconv"
"strings"
Expand Down Expand Up @@ -82,16 +83,17 @@ func escapeString(val string) string {
return val
}

// normalizeNumberString normalizes the supplied value with the help of the format supplied.
// normalizeNumberString normalizes the supplied value according to DataForm of the supplied column.
// This normalization is intended to convert number strings of different locales to a strconv-parseable value.
//
// The format's first character is a fraction delimiter character. Next characters in the format
// are simply removed, they are typically used to visually separate groups in large numbers.
// The removeFaction parameter controls whether the returned value can contain also the fraction part.
// The removeFraction parameter controls whether the returned value can contain also the fraction part.
//
// For example, to get a strconv-parseable float from a Spanish value '3.494.826.157,123', use format ",." .
func normalizeNumberString(value string, format string, removeFraction bool) string {
if len(format) == 0 {
func normalizeNumberString(value string, column *CsvTableColumn, removeFraction bool, lineNumber int) string {
format := column.DataFormat
if format == "" {
format = ". \n\t\r_"
}
if strings.ContainsAny(value, format) {
Expand All @@ -110,20 +112,24 @@ func normalizeNumberString(value string, format string, removeFraction bool) str
}
if c == fractionRune {
if removeFraction {
break ForAllCharacters
// warn about lost precision
truncatedValue := retVal.String()
warning := fmt.Errorf("'%s' truncated to '%s' to fit into '%s' data type", value, truncatedValue, column.DataType)
log.Printf("WARNING: %v\n", CreateRowColumnError(lineNumber, column.Label, warning))
return truncatedValue
}
retVal.WriteByte('.')
} else {
retVal.WriteRune(c)
continue
}
retVal.WriteRune(c)
}

return retVal.String()
}
return value
}

func toTypedValue(val string, column *CsvTableColumn) (interface{}, error) {
func toTypedValue(val string, column *CsvTableColumn, lineNumber int) (interface{}, error) {
dataType := column.DataType
dataFormat := column.DataFormat
if column.ParseF != nil {
Expand Down Expand Up @@ -159,7 +165,7 @@ func toTypedValue(val string, column *CsvTableColumn) (interface{}, error) {
case durationDatatype:
return time.ParseDuration(val)
case doubleDatatype:
return strconv.ParseFloat(normalizeNumberString(val, dataFormat, false), 64)
return strconv.ParseFloat(normalizeNumberString(val, column, false, lineNumber), 64)
case boolDatatype:
switch {
case len(val) == 0:
Expand All @@ -172,9 +178,9 @@ func toTypedValue(val string, column *CsvTableColumn) (interface{}, error) {
return nil, errors.New("Unsupported boolean value '" + val + "' , first character is expected to be 't','f','0','1','y','n'")
}
case longDatatype:
return strconv.ParseInt(normalizeNumberString(val, dataFormat, true), 10, 64)
return strconv.ParseInt(normalizeNumberString(val, column, true, lineNumber), 10, 64)
case uLongDatatype:
return strconv.ParseUint(normalizeNumberString(val, dataFormat, true), 10, 64)
return strconv.ParseUint(normalizeNumberString(val, column, true, lineNumber), 10, 64)
case base64BinaryDataType:
return base64.StdEncoding.DecodeString(val)
default:
Expand Down Expand Up @@ -230,11 +236,11 @@ func appendProtocolValue(buffer []byte, value interface{}) ([]byte, error) {
}
}

func appendConverted(buffer []byte, val string, column *CsvTableColumn) ([]byte, error) {
func appendConverted(buffer []byte, val string, column *CsvTableColumn, lineNumber int) ([]byte, error) {
if len(column.DataType) == 0 { // keep the value as it is
return append(buffer, val...), nil
}
typedVal, err := toTypedValue(val, column)
typedVal, err := toTypedValue(val, column, lineNumber)
if err != nil {
return buffer, err
}
Expand Down
43 changes: 31 additions & 12 deletions pkg/csv2lp/data_conversion_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"io/ioutil"
"log"
"math"
"os"
"strings"
"testing"
"time"
Expand Down Expand Up @@ -112,9 +113,9 @@ func Test_ToTypedValue(t *testing.T) {

for i, test := range tests {
t.Run(fmt.Sprint(i)+" "+test.value, func(t *testing.T) {
column := &CsvTableColumn{}
column := &CsvTableColumn{Label: "test"}
column.setupDataType(test.dataType)
val, err := toTypedValue(test.value, column)
val, err := toTypedValue(test.value, column, 1)
if err != nil && test.expect != nil {
require.Nil(t, err.Error())
}
Expand Down Expand Up @@ -143,7 +144,7 @@ func Test_ToTypedValue_dateTimeCustomTimeZone(t *testing.T) {
column := &CsvTableColumn{}
column.TimeZone = tz
column.setupDataType(test.dataType)
val, err := toTypedValue(test.value, column)
val, err := toTypedValue(test.value, column, 1)
if err != nil && test.expect != nil {
require.Nil(t, err.Error())
}
Expand Down Expand Up @@ -210,9 +211,9 @@ func Test_AppendConverted(t *testing.T) {

for i, test := range tests {
t.Run(fmt.Sprint(i), func(t *testing.T) {
column := &CsvTableColumn{}
column := &CsvTableColumn{Label: "test"}
column.setupDataType(test.dataType)
val, err := appendConverted(nil, test.value, column)
val, err := appendConverted(nil, test.value, column, 1)
if err != nil && test.expect != "" {
require.Nil(t, err.Error())
}
Expand Down Expand Up @@ -246,18 +247,36 @@ func Test_NormalizeNumberString(t *testing.T) {
format string
removeFraction bool
expect string
warning string
}{
{"123", "", true, "123"},
{"123", ".", true, "123"},
{"123.456", ".", true, "123"},
{"123.456", ".", false, "123.456"},
{"1 2.3,456", ",. ", false, "123.456"},
{" 1 2\t3.456 \r\n", "", false, "123.456"},
{"123", "", true, "123", ""},
{"123", ".", true, "123", ""},
{"123.456", ".", true, "123", "::PREFIX::WARNING: line 1: column 'test': '123.456' truncated to '123' to fit into 'tst' data type\n"},
{"123.456", ".", false, "123.456", ""},
{"1 2.3,456", ",. ", false, "123.456", ""},
{" 1 2\t3.456 \r\n", "", false, "123.456", ""},
}

for i, test := range tests {
t.Run(fmt.Sprint(i), func(t *testing.T) {
require.Equal(t, test.expect, normalizeNumberString(test.value, test.format, test.removeFraction))
// customize logging to check warnings
var buf bytes.Buffer
log.SetOutput(&buf)
oldFlags := log.Flags()
log.SetFlags(0)
oldPrefix := log.Prefix()
prefix := "::PREFIX::"
log.SetPrefix(prefix)
defer func() {
log.SetOutput(os.Stderr)
log.SetFlags(oldFlags)
log.SetPrefix(oldPrefix)
}()

require.Equal(t, test.expect,
normalizeNumberString(test.value,
&CsvTableColumn{Label: "test", DataType: "tst", DataFormat: test.format}, test.removeFraction, 1))
require.Equal(t, test.warning, buf.String())
})
}
}
Expand Down

0 comments on commit 001343d

Please sign in to comment.