Skip to content

Commit

Permalink
perf: improve streaming (#41)
Browse files Browse the repository at this point in the history
* perf: use fast stream for jsonline reading

* perf: faster string conversions

* perf: remove one useless malloc

* perf: precompile templates

* refactor: templates
  • Loading branch information
adrienaury authored Oct 1, 2023
1 parent e2ac847 commit d1f72fc
Show file tree
Hide file tree
Showing 8 changed files with 107 additions and 65 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@ Types of changes
- `Fixed` for any bug fixes.
- `Security` in case of vulnerabilities.

## [0.8.0]

- `Added` improvements for global performance (2 times faster).

## [0.7.0]

- `Added` improvements for global performance (2 to 3 times faster).
Expand Down
31 changes: 25 additions & 6 deletions internal/infra/config_loader.go
Original file line number Diff line number Diff line change
Expand Up @@ -84,18 +84,30 @@ func LoadConfig(filename string) (mimo.Config, error) {
return CreateConfig(config)
}

//nolint:cyclop
//nolint:cyclop,funlen
func CreateConfig(yamlconfig *YAMLStructure) (mimo.Config, error) {
config := mimo.NewConfig()

CreatePreprocesses(yamlconfig, &config)
if err := CreatePreprocesses(yamlconfig, &config); err != nil {
return config, err
}

for _, yamlcolumn := range yamlconfig.Columns {
excludeTmpl, err := mimo.NewTemplate(yamlcolumn.ExcludeTemplate)
if err != nil {
return config, fmt.Errorf("%w", err)
}

coherentTmpl, err := mimo.NewTemplate(yamlcolumn.CoherentSource)
if err != nil {
return config, fmt.Errorf("%w", err)
}

column := mimo.ColumnConfig{
Exclude: yamlcolumn.Exclude,
ExcludeTemplate: yamlcolumn.ExcludeTemplate,
ExcludeTemplate: excludeTmpl,
CoherentWith: yamlcolumn.CoherentWith,
CoherentSource: yamlcolumn.CoherentSource,
CoherentSource: coherentTmpl,
Constraints: []mimo.Constraint{},
Alias: yamlcolumn.Alias,
}
Expand Down Expand Up @@ -145,12 +157,19 @@ func CreateConfig(yamlconfig *YAMLStructure) (mimo.Config, error) {
return config, nil
}

func CreatePreprocesses(yamlconfig *YAMLStructure, config *mimo.Config) {
func CreatePreprocesses(yamlconfig *YAMLStructure, config *mimo.Config) error {
for _, yamlpreprocess := range yamlconfig.Preprocesses {
valueTmpl, err := mimo.NewTemplate(yamlpreprocess.Value)
if err != nil {
return fmt.Errorf("%w", err)
}

preprocess := mimo.PreprocessConfig{
Path: yamlpreprocess.Path,
Value: yamlpreprocess.Value,
Value: valueTmpl,
}
config.PreprocessConfigs = append(config.PreprocessConfigs, preprocess)
}

return nil
}
35 changes: 11 additions & 24 deletions internal/infra/datarowreader_jsonline.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ package infra

import (
"bufio"
"errors"
"fmt"
"io"
"os"
Expand All @@ -32,7 +31,7 @@ import (
const linebreak byte = 10

type DataRowReaderJSONLine struct {
input *bufio.Scanner
decoder *json.Decoder
}

type DataRowReaderWriterJSONLine struct {
Expand All @@ -46,63 +45,51 @@ func NewDataRowReaderJSONLineFromFile(filename string) (*DataRowReaderJSONLine,
return nil, fmt.Errorf("%w", err)
}

return &DataRowReaderJSONLine{input: bufio.NewScanner(source)}, nil
return &DataRowReaderJSONLine{decoder: json.NewDecoder(source)}, nil
}

func NewDataRowReaderJSONLine(input io.Reader, output io.Writer) *DataRowReaderWriterJSONLine {
return &DataRowReaderWriterJSONLine{input: bufio.NewScanner(input), output: bufio.NewWriter(output)}
}

func (drr *DataRowReaderJSONLine) ReadDataRow() (mimo.DataRow, error) {
var data mimo.DataRow

if drr.input.Scan() {
data = mimo.DataRow{}
if err := json.UnmarshalNoEscape(drr.input.Bytes(), &data); err != nil {
if drr.decoder.More() {
data := mimo.DataRow{}
if err := drr.decoder.Decode(&data); err != nil {
return nil, fmt.Errorf("%w", err)
}
}

if err := drr.input.Err(); err != nil {
if errors.Is(err, io.EOF) {
return nil, nil
}

return nil, fmt.Errorf("%w", err)
return data, nil
}

return data, nil
return nil, nil
}

func (drr *DataRowReaderJSONLine) Close() error {
return nil
}

func (drr *DataRowReaderWriterJSONLine) ReadDataRow() (mimo.DataRow, error) {
var data mimo.DataRow

if drr.input.Scan() {
if drr.output != nil {
if err := drr.writeLine(); err != nil {
return nil, err
}
}

data = mimo.DataRow{}
data := mimo.DataRow{}
if err := json.UnmarshalNoEscape(drr.input.Bytes(), &data); err != nil {
return nil, fmt.Errorf("%w", err)
}

return data, nil
}

if err := drr.input.Err(); err != nil {
if errors.Is(err, io.EOF) {
return nil, nil
}

return nil, fmt.Errorf("%w", err)
}

return data, nil
return nil, nil
}

func (drr *DataRowReaderWriterJSONLine) writeLine() error {
Expand Down
21 changes: 18 additions & 3 deletions pkg/mimo/benchmark_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -141,14 +141,29 @@ func BenchmarkAllOptions(b *testing.B) {

defer driver.Close()

excludeTmpl, err := mimo.NewTemplate(`{{uuidv4 | contains "a"}}`)
if err != nil {
b.FailNow()
}

coherentSourceTmpl, err := mimo.NewTemplate("{{.name | NoAccent | title}} {{.surname | NoAccent | upper}}")
if err != nil {
b.FailNow()
}

pprocTmpl, err := mimo.NewTemplate("{{.name | NoAccent | lower}}.{{.surname | NoAccent | lower}}@{{uuidv4}}.com")
if err != nil {
b.FailNow()
}

driver.Configure(mimo.Config{
ColumnNames: []string{"value"},
ColumnConfigs: map[string]mimo.ColumnConfig{
"value": {
Exclude: []any{"Odile", "Tiffany"},
ExcludeTemplate: `{{uuidv4 | contains "a"}}`,
ExcludeTemplate: excludeTmpl,
CoherentWith: []string{"name", "surname"},
CoherentSource: "{{.name | NoAccent | title}} {{.surname | NoAccent | upper}}",
CoherentSource: coherentSourceTmpl,
Constraints: []mimo.Constraint{
{
Target: mimo.MaskingRate,
Expand All @@ -162,7 +177,7 @@ func BenchmarkAllOptions(b *testing.B) {
PreprocessConfigs: []mimo.PreprocessConfig{
{
Path: "email",
Value: "{{.name | NoAccent | lower}}.{{.surname | NoAccent | lower}}@{{uuidv4}}.com",
Value: pprocTmpl,
},
},
})
Expand Down
12 changes: 6 additions & 6 deletions pkg/mimo/driver_preprocess.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,18 +29,18 @@ func (d *Driver) preprocess(row DataRow) {
}
}

func preprocessValue(value any, paths []string, stack []any, templstr string, root DataRow) {
func preprocessValue(value any, paths []string, stack []any, tmpl *Template, root DataRow) {
path := paths[0]

var err error

if len(paths) == 1 {
if obj, ok := value.(map[string]any); ok {
obj[path], err = applyTemplate(templstr, root, append(stack, obj))
obj[path], err = tmpl.Execute(root, append(stack, obj))
}

if obj, ok := value.(DataRow); ok {
obj[path], err = applyTemplate(templstr, root, append(stack, obj))
obj[path], err = tmpl.Execute(root, append(stack, obj))
}

if err != nil {
Expand All @@ -53,18 +53,18 @@ func preprocessValue(value any, paths []string, stack []any, templstr string, ro
if path == "[]" {
if array, ok := value.([]any); ok {
for _, item := range array {
preprocessValue(item, paths[1:], append(stack, array), templstr, root) //nolint:asasalint
preprocessValue(item, paths[1:], append(stack, array), tmpl, root) //nolint:asasalint
}
}

return
}

if obj, ok := value.(map[string]any); ok {
preprocessValue(obj[path], paths[1:], append(stack, obj), templstr, root)
preprocessValue(obj[path], paths[1:], append(stack, obj), tmpl, root)
}

if obj, ok := value.(DataRow); ok {
preprocessValue(obj[path], paths[1:], append(stack, obj), templstr, root)
preprocessValue(obj[path], paths[1:], append(stack, obj), tmpl, root)
}
}
14 changes: 9 additions & 5 deletions pkg/mimo/model.go
Original file line number Diff line number Diff line change
Expand Up @@ -425,8 +425,8 @@ func (r Report) UpdateValue(root DataRow, realValue any, maskedValue any, stack
coherenceValues = []any{realValue}
}

if len(config.ExcludeTemplate) > 0 {
result, err := applyTemplate(config.ExcludeTemplate, root, stack)
if config.ExcludeTemplate != nil {
result, err := config.ExcludeTemplate.Execute(root, stack)

log.Err(err).Str("result", result).Msg("compute exclusion from template")

Expand Down Expand Up @@ -454,8 +454,8 @@ func computeCoherenceValues(config ColumnConfig, root DataRow, stack []any) []an
}
}

if len(config.CoherentSource) > 0 {
source, err := applyTemplate(config.CoherentSource, root, stack)
if config.CoherentSource != nil {
source, err := config.CoherentSource.Execute(root, stack)

log.Err(err).Str("result", source).Msg("generating coherence source from template")

Expand Down Expand Up @@ -489,7 +489,11 @@ func toString(value any) (string, bool) {
switch tvalue := value.(type) {
case string:
str = strconv.Quote(tvalue)
case int, int64, int32, int16, int8, uint, uint64, uint32, uint16, uint8, float32, float64, bool:
case float64:
str = strconv.FormatFloat(tvalue, 'g', -1, 64)
case bool:
str = strconv.FormatBool(tvalue)
case int, int64, int32, int16, int8, uint, uint64, uint32, uint16, uint8:
str = fmt.Sprint(tvalue)
case json.Number:
str = string(tvalue)
Expand Down
14 changes: 7 additions & 7 deletions pkg/mimo/model_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,10 @@ type Config struct {
}

type ColumnConfig struct {
Exclude []any // exclude values from the masking rate computation (default: exclude only nil values)
ExcludeTemplate string // exclude values if template expression evaluate to True (default: False)
CoherentWith []string // list of fields from witch the coherent rate is computed (default: the current field)
CoherentSource string // template to execute to create coherence source
Exclude []any // exclude values from the masking rate (default: exclude only nil values)
ExcludeTemplate *Template // exclude values if template expression evaluate to True (default: False)
CoherentWith []string // list of fields use for coherent rate computation (default: the current field)
CoherentSource *Template // template to execute to create coherence source
Constraints []Constraint // list of constraints to validate
Alias string // alias to use in persisted data

Expand All @@ -36,7 +36,7 @@ type ColumnConfig struct {

type PreprocessConfig struct {
Path string
Value string
Value *Template
}

type Constraint struct {
Expand Down Expand Up @@ -74,9 +74,9 @@ func NewConfig() Config {
func NewDefaultColumnConfig() ColumnConfig {
return ColumnConfig{
Exclude: []any{},
ExcludeTemplate: "",
ExcludeTemplate: nil,
CoherentWith: []string{},
CoherentSource: "",
CoherentSource: nil,
Constraints: []Constraint{},
Alias: "",
excluded: false,
Expand Down
41 changes: 27 additions & 14 deletions pkg/mimo/template.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,30 +29,43 @@ import (
"golang.org/x/text/unicode/norm"
)

func applyTemplate(tmplstring string, root DataRow, stack []any) (string, error) {
funcmap := generateFuncMap()
type Template struct {
tpml *template.Template
}

func NewTemplate(tmplstr string) (*Template, error) {
if len(tmplstr) > 0 {
tmpl, err := template.New("").Funcs(generateFuncMap()).Funcs(sprig.TxtFuncMap()).Parse(tmplstr)
if err != nil {
return nil, fmt.Errorf("%w", err)
}

return &Template{tmpl}, nil
}

funcmap["Stack"] = generateStackFunc(stack)
return nil, nil //nolint:nilnil
}

tmpl, err := template.New("template").Funcs(sprig.TxtFuncMap()).Funcs(funcmap).Parse(tmplstring)
if err != nil {
return "", fmt.Errorf("%w", err)
func (t *Template) Execute(root DataRow, stack []any) (string, error) {
funcstack := template.FuncMap{
"Stack": generateStackFunc(stack),
}

tmpl := t.tpml.Funcs(funcstack)

result := &strings.Builder{}
err = tmpl.Execute(result, root)
err := tmpl.Execute(result, root)

return result.String(), err
}

func generateFuncMap() template.FuncMap {
funcMap := template.FuncMap{}

funcMap["ToUpper"] = strings.ToUpper
funcMap["ToLower"] = strings.ToLower
funcMap["NoAccent"] = rmAcc

return funcMap
return template.FuncMap{
"Stack": func(index int) any { return nil },
"ToUpper": strings.ToUpper,
"ToLower": strings.ToLower,
"NoAccent": rmAcc,
}
}

func generateStackFunc(theStack []any) func(index int) any {
Expand Down

0 comments on commit d1f72fc

Please sign in to comment.