Skip to content

Commit

Permalink
Merge pull request #21 from PDOK/improve-error-handling
Browse files Browse the repository at this point in the history
fix: improve error handling, make clear error comes from CSV file
  • Loading branch information
rkettelerij authored Jan 7, 2025
2 parents e6d1bb8 + 50ca131 commit 7a81875
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 59 deletions.
52 changes: 16 additions & 36 deletions internal/etl/transform/extend_values.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@ package transform

import (
"encoding/csv"
"fmt"
"os"
"strings"

"github.com/PDOK/gomagpie/internal/engine/util"
)

// Return slice of fieldValuesByName
func extendFieldValues(fieldValuesByName map[string]string, substitutionsFile, synonymsFile string) ([]map[string]string, error) {
substitutions, err := readCsvFile(substitutionsFile)
if err != nil {
Expand All @@ -22,24 +22,13 @@ func extendFieldValues(fieldValuesByName map[string]string, substitutionsFile, s
var fieldValuesByNameWithAllValues = make(map[string][]string)
for key, value := range fieldValuesByName {
valueLower := strings.ToLower(value)

// Get all substitutions
substitutedValues, err := extendValues([]string{valueLower}, substitutions)
if err != nil {
return nil, err
}
substitutedValues := extendValues([]string{valueLower}, substitutions)
// Get all synonyms for these substituted values
// one way
synonymsValuesOneWay, err := extendValues(substitutedValues, synonyms)
if err != nil {
return nil, err
}
// reverse way
allValues, err := extendValues(synonymsValuesOneWay, util.Inverse(synonyms))
if err != nil {
return nil, err
}

// -> one way
synonymsValuesOneWay := extendValues(substitutedValues, synonyms)
// <- reverse way
allValues := extendValues(synonymsValuesOneWay, util.Inverse(synonyms))
// Create map with for each key a slice of []values
fieldValuesByNameWithAllValues[key] = allValues
}
Expand Down Expand Up @@ -83,32 +72,26 @@ func generateCombinations(keys []string, values [][]string) []map[string]string
return result
}

func extendValues(input []string, mapping map[string]string) ([]string, error) {
func extendValues(input []string, mapping map[string]string) []string {
var results []string
results = append(results, input...)

for j := range input {
for oldChar, newChar := range mapping {
if strings.Contains(input[j], oldChar) {
for i := 0; i < strings.Count(input[j], oldChar); i++ {
extendedInput, err := replaceNth(input[j], oldChar, newChar, i+1)
if err != nil {
return nil, err
}
subCombinations, err := extendValues([]string{extendedInput}, mapping)
if err != nil {
return nil, err
}
extendedInput := replaceNth(input[j], oldChar, newChar, i+1)
subCombinations := extendValues([]string{extendedInput}, mapping)
results = append(results, subCombinations...)
}
}
}
}
// Possible performance improvement here by avoiding duplicates in the first place
return uniqueSlice(results), nil
return uniqueSlice(results)
}

func replaceNth(input, oldChar, newChar string, nthIndex int) (string, error) {
func replaceNth(input, oldChar, newChar string, nthIndex int) string {
count := 0
result := strings.Builder{}

Expand All @@ -121,12 +104,9 @@ func replaceNth(input, oldChar, newChar string, nthIndex int) (string, error) {
continue
}
}
err := result.WriteByte(input[i])
if err != nil {
return "", err
}
result.WriteByte(input[i]) // no need to catch error since "the returned error is always nil"
}
return result.String(), nil
return result.String()
}

func uniqueSlice(s []string) []string {
Expand All @@ -146,18 +126,18 @@ func readCsvFile(filepath string) (map[string]string, error) {

file, err := os.Open(filepath)
if err != nil {
return nil, err
return nil, fmt.Errorf("failed to open CSV file: %w", err)
}
defer file.Close()

reader := csv.NewReader(file)
records, err := reader.ReadAll()
if err != nil {
return nil, err
return nil, fmt.Errorf("failed to parse CSV file: %w", err)
}

for _, row := range records {
substitutions[row[0]] = row[1]
}
return substitutions, err
return substitutions, nil
}
38 changes: 15 additions & 23 deletions internal/etl/transform/extend_values_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,23 +64,19 @@ func Test_extendValues(t *testing.T) {
mapping map[string]string
}
tests := []struct {
name string
args args
want []string
wantErr assert.ErrorAssertionFunc
name string
args args
want []string
}{
{"No mapping", args{input: []string{"foobar"}, mapping: map[string]string{"eerste": "1ste", "tweede": "2de", "fryslân": "friesland"}}, []string{"foobar"}, assert.NoError},
{"Single mapping", args{input: []string{"foobar eerste"}, mapping: map[string]string{"eerste": "1ste", "tweede": "2de", "fryslân": "friesland"}}, []string{"foobar eerste", "foobar 1ste"}, assert.NoError},
{"Two different mappings", args{input: []string{"foobar eerste tweede"}, mapping: map[string]string{"eerste": "1ste", "tweede": "2de", "fryslân": "friesland"}}, []string{"foobar eerste tweede", "foobar 1ste tweede", "foobar eerste 2de", "foobar 1ste 2de"}, assert.NoError},
{"Two similar mappings", args{input: []string{"foobar eerste eerste"}, mapping: map[string]string{"eerste": "1ste", "tweede": "2de", "fryslân": "friesland"}}, []string{"foobar eerste eerste", "foobar 1ste eerste", "foobar eerste 1ste", "foobar 1ste 1ste"}, assert.NoError},
{"Three from same mapping", args{input: []string{"naer achtaertune kaerel"}, mapping: map[string]string{"ae": "a", "à": "a"}}, []string{"naer achtaertune kaerel", "naer achtartune kaerel", "nar achtartune kaerel", "nar achtaertune kaerel", "naer achtaertune karel", "naer achtartune karel", "nar achtartune karel", "nar achtaertune karel"}, assert.NoError},
{"No mapping", args{input: []string{"foobar"}, mapping: map[string]string{"eerste": "1ste", "tweede": "2de", "fryslân": "friesland"}}, []string{"foobar"}},
{"Single mapping", args{input: []string{"foobar eerste"}, mapping: map[string]string{"eerste": "1ste", "tweede": "2de", "fryslân": "friesland"}}, []string{"foobar eerste", "foobar 1ste"}},
{"Two different mappings", args{input: []string{"foobar eerste tweede"}, mapping: map[string]string{"eerste": "1ste", "tweede": "2de", "fryslân": "friesland"}}, []string{"foobar eerste tweede", "foobar 1ste tweede", "foobar eerste 2de", "foobar 1ste 2de"}},
{"Two similar mappings", args{input: []string{"foobar eerste eerste"}, mapping: map[string]string{"eerste": "1ste", "tweede": "2de", "fryslân": "friesland"}}, []string{"foobar eerste eerste", "foobar 1ste eerste", "foobar eerste 1ste", "foobar 1ste 1ste"}},
{"Three from same mapping", args{input: []string{"naer achtaertune kaerel"}, mapping: map[string]string{"ae": "a", "à": "a"}}, []string{"naer achtaertune kaerel", "naer achtartune kaerel", "nar achtartune kaerel", "nar achtaertune kaerel", "naer achtaertune karel", "naer achtartune karel", "nar achtartune karel", "nar achtaertune karel"}},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got, err := extendValues(tt.args.input, tt.args.mapping)
if !tt.wantErr(t, err, fmt.Sprintf("extendValues(%v, %v)", tt.args.input, tt.args.mapping)) {
return
}
got := extendValues(tt.args.input, tt.args.mapping)
assert.ElementsMatch(t, tt.want, got, "extendValues(%v, %v)", tt.args.input, tt.args.mapping)
})
}
Expand All @@ -94,20 +90,16 @@ func Test_replaceNth(t *testing.T) {
nthIndex int
}
tests := []struct {
name string
args args
want string
wantErr assert.ErrorAssertionFunc
name string
args args
want string
}{
{"Replace 1st only", args{"naer achtaertune kaerel", "ae", "a", 1}, "nar achtaertune kaerel", assert.NoError},
{"Replace 2nd only", args{"naer achtaertune kaerel", "ae", "a", 2}, "naer achtartune kaerel", assert.NoError},
{"Replace 1st only", args{"naer achtaertune kaerel", "ae", "a", 1}, "nar achtaertune kaerel"},
{"Replace 2nd only", args{"naer achtaertune kaerel", "ae", "a", 2}, "naer achtartune kaerel"},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got, err := replaceNth(tt.args.input, tt.args.oldChar, tt.args.newChar, tt.args.nthIndex)
if !tt.wantErr(t, err, fmt.Sprintf("replaceNth(%v, %v, %v, %v)", tt.args.input, tt.args.oldChar, tt.args.newChar, tt.args.nthIndex)) {
return
}
got := replaceNth(tt.args.input, tt.args.oldChar, tt.args.newChar, tt.args.nthIndex)
assert.Equalf(t, tt.want, got, "replaceNth(%v, %v, %v, %v)", tt.args.input, tt.args.oldChar, tt.args.newChar, tt.args.nthIndex)
})
}
Expand Down

0 comments on commit 7a81875

Please sign in to comment.