From 8657822f49c1df46a68343ed21bbf1e4ee0f00a2 Mon Sep 17 00:00:00 2001 From: Adrien Aury <44274230+adrienaury@users.noreply.github.com> Date: Fri, 29 Sep 2023 22:56:01 +0000 Subject: [PATCH] perf: use buffered output --- internal/infra/datarowreader_jsonline.go | 24 ++++++++++++++++++++++-- pkg/mimo/benchmark_test.go | 14 +++----------- 2 files changed, 25 insertions(+), 13 deletions(-) diff --git a/internal/infra/datarowreader_jsonline.go b/internal/infra/datarowreader_jsonline.go index 05d0b5a..d1800ad 100644 --- a/internal/infra/datarowreader_jsonline.go +++ b/internal/infra/datarowreader_jsonline.go @@ -29,6 +29,8 @@ import ( "github.com/cgi-fr/mimo/pkg/mimo" ) +const linebreak byte = 10 + type DataRowReaderJSONLine struct { input *bufio.Scanner output *bufio.Writer @@ -72,8 +74,10 @@ func (drr *DataRowReaderJSONLine) ReadDataRowAndWrite() (mimo.DataRow, error) { var data mimo.DataRow if drr.input.Scan() { - if _, err := drr.output.Write(append(drr.input.Bytes(), '\n')); err != nil { - return nil, fmt.Errorf("%w", err) + if drr.output != nil { + if err := drr.writeLine(); err != nil { + return nil, err + } } data = mimo.DataRow{} @@ -93,6 +97,22 @@ func (drr *DataRowReaderJSONLine) ReadDataRowAndWrite() (mimo.DataRow, error) { return data, nil } +func (drr *DataRowReaderJSONLine) writeLine() error { + if _, err := drr.output.Write(drr.input.Bytes()); err != nil { + return fmt.Errorf("%w", err) + } + + if err := drr.output.WriteByte(linebreak); err != nil { + return fmt.Errorf("%w", err) + } + + return nil +} + func (drr *DataRowReaderJSONLine) Flush() error { + if drr.output == nil { + return nil + } + return fmt.Errorf("%w", drr.output.Flush()) } diff --git a/pkg/mimo/benchmark_test.go b/pkg/mimo/benchmark_test.go index 6b08fa9..9facbc1 100644 --- a/pkg/mimo/benchmark_test.go +++ b/pkg/mimo/benchmark_test.go @@ -18,8 +18,6 @@ package mimo_test import ( - "io" - "os" "testing" "github.com/cgi-fr/mimo/internal/infra" @@ -35,13 +33,11 @@ func BenchmarkInMemory(b *testing.B) { b.FailNow() } - file, err := os.Open("testdata/masked.jsonl") + maskedReader, err := infra.NewDataRowReaderJSONLineFromFile("testdata/masked.jsonl") if err != nil { b.FailNow() } - maskedReader := infra.NewDataRowReaderJSONLine(file, io.Discard) - driver := mimo.NewDriver( realReader, maskedReader, @@ -76,13 +72,11 @@ func BenchmarkOnDisk(b *testing.B) { b.FailNow() } - file, err := os.Open("testdata/masked.jsonl") + maskedReader, err := infra.NewDataRowReaderJSONLineFromFile("testdata/masked.jsonl") if err != nil { b.FailNow() } - maskedReader := infra.NewDataRowReaderJSONLine(file, io.Discard) - driver := mimo.NewDriver( realReader, maskedReader, @@ -123,13 +117,11 @@ func BenchmarkAllOptions(b *testing.B) { b.FailNow() } - file, err := os.Open("testdata/single-100-2.jsonl") + maskedReader, err := infra.NewDataRowReaderJSONLineFromFile("testdata/single-100-2.jsonl") if err != nil { b.FailNow() } - maskedReader := infra.NewDataRowReaderJSONLine(file, io.Discard) - driver := mimo.NewDriver( realReader, maskedReader,