From f31908d5b07777d0a401b4ee271cf3081ec81342 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sat, 17 Feb 2024 12:15:37 -0500 Subject: [PATCH 1/4] mlr sparsify --- docs/src/reference-verbs.md | 57 ++++--- docs/src/reference-verbs.md.in | 6 + pkg/transformers/aaa_transformer_table.go | 1 + pkg/transformers/sparsify.go | 194 ++++++++++++++++++++++ test/cases/cli-help/0001/expout | 14 ++ test/input/sparsify-input.csv | 5 + 6 files changed, 258 insertions(+), 19 deletions(-) create mode 100644 pkg/transformers/sparsify.go create mode 100644 test/input/sparsify-input.csv diff --git a/docs/src/reference-verbs.md b/docs/src/reference-verbs.md index cfa66dd826..142b90ce04 100644 --- a/docs/src/reference-verbs.md +++ b/docs/src/reference-verbs.md @@ -3126,6 +3126,25 @@ a b c 9 8 7 +## sparsify + +
+mlr sparsify --help
+
+
+Usage: mlr sparsify [options]
+Unsets fields for which the key is the empty string (or, optionally, another
+specified value). Only makes sense with output format not being CSV or TSV.
+Options:
+-s {filler string} What values to remove. Defaults to the empty string.
+-f {a,b,c} Specify field names to be operated on; any other fields won't be
+           modified. The default is to modify all fields.
+-h|--help  Show this message.
+Example: if the input is two records, one being 'a=1,b=2' and the other
+being 'b=3,c=4', then the output is the two records 'a=1,b=2,c=' and
+'a=,b=3,c=4'.
+
+ ## split
@@ -3409,14 +3428,14 @@ fields, optionally categorized by one or more fields.
   data/medium
 
-x_y_cov    0.000042574820827444476
-x_y_corr   0.0005042001844467462
-y_y_cov    0.08461122467974003
+x_y_cov    0.00004257482082749404
+x_y_corr   0.0005042001844473328
+y_y_cov    0.08461122467974005
 y_y_corr   1
-x2_xy_cov  0.04188382281779374
-x2_xy_corr 0.630174342037994
-x2_y2_cov  -0.00030953725962542085
-x2_y2_corr -0.0034249088761121966
+x2_xy_cov  0.041883822817793716
+x2_xy_corr 0.6301743420379936
+x2_y2_cov  -0.0003095372596253918
+x2_y2_corr -0.003424908876111875
 
@@ -3425,12 +3444,12 @@ x2_y2_corr -0.0034249088761121966
   data/medium
 
-a   x_y_ols_m             x_y_ols_b           x_y_ols_n x_y_r2                  y_y_ols_m y_y_ols_b y_y_ols_n y_y_r2 xy_y2_ols_m        xy_y2_ols_b         xy_y2_ols_n xy_y2_r2
-pan 0.01702551273681908   0.5004028922897639  2081      0.00028691820445814767  1         0         2081      1      0.8781320866715662 0.11908230147563566 2081        0.41749827377311266
-eks 0.0407804923685586    0.48140207967651016 1965      0.0016461239223448587   1         0         1965      1      0.8978728611690183 0.10734054433612333 1965        0.45563223864254526
-wye -0.03915349075204814  0.5255096523974456  1966      0.0015051268704373607   1         0         1966      1      0.8538317334220835 0.1267454301662969  1966        0.38991721818599295
-zee 0.0027812364960399147 0.5043070448033061  2047      0.000007751652858786137 1         0         2047      1      0.8524439912011013 0.12401684308018937 2047        0.39356598090006495
-hat -0.018620577041095078 0.5179005397264935  1941      0.0003520036646055585   1         0         1941      1      0.8412305086345014 0.13557328318623216 1941        0.3687944261732265
+a   x_y_ols_m             x_y_ols_b          x_y_ols_n x_y_r2                  y_y_ols_m y_y_ols_b                           y_y_ols_n y_y_r2 xy_y2_ols_m        xy_y2_ols_b         xy_y2_ols_n xy_y2_r2
+pan 0.017025512736819345  0.500402892289764  2081      0.00028691820445815624  1         -0.00000000000000002890430283104539 2081      1      0.8781320866715664 0.11908230147563569 2081        0.4174982737731127
+eks 0.04078049236855813   0.4814020796765104 1965      0.0016461239223448218   1         0.00000000000000017862676354313703  1965      1      0.897872861169018  0.1073405443361234  1965        0.4556322386425451
+wye -0.03915349075204785  0.5255096523974457 1966      0.0015051268704373377   1         0.00000000000000004464425401127647  1966      1      0.8538317334220837 0.1267454301662969  1966        0.3899172181859931
+zee 0.0027812364960401333 0.5043070448033061 2047      0.000007751652858787357 1         0.00000000000000004819404567023685  2047      1      0.8524439912011011 0.12401684308018947 2047        0.39356598090006495
+hat -0.018620577041095272 0.5179005397264937 1941      0.00035200366460556604  1         -0.00000000000000003400445761787692 1941      1      0.8412305086345017 0.13557328318623207 1941        0.3687944261732266
 
Here's an example simple line-fit. The `x` and `y` @@ -3516,11 +3535,11 @@ upsec_count_pca_quality 0.9999590846136102 donesec 92.33051350964094 color purple -upsec_count_pca_m -39.03009744795354 -upsec_count_pca_b 979.9883413064914 +upsec_count_pca_m -39.030097447953594 +upsec_count_pca_b 979.9883413064917 upsec_count_pca_n 21 upsec_count_pca_quality 0.9999908956206317 -donesec 25.10852919630297 +donesec 25.108529196302943 ## step @@ -3797,9 +3816,9 @@ distinct_count 5 5 10000 10000 10000 mode pan wye 1 0.3467901443380824 0.7268028627434533 sum 0 0 50005000 4986.019681679581 5062.057444929905 mean - - 5000.5 0.49860196816795804 0.5062057444929905 -stddev - - 2886.8956799071675 0.2902925151144007 0.290880086426933 -var - - 8334166.666666667 0.08426974433144456 0.08461122467974003 -skewness - - 0 -0.0006899591185521965 -0.017849760120133784 +stddev - - 2886.8956799071675 0.29029251511440074 0.2908800864269331 +var - - 8334166.666666667 0.08426974433144457 0.08461122467974005 +skewness - - 0 -0.0006899591185517494 -0.01784976012013298 minlen 3 3 1 15 13 maxlen 3 3 5 22 22 min eks eks 1 0.00004509679127584487 0.00008818962627266114 diff --git a/docs/src/reference-verbs.md.in b/docs/src/reference-verbs.md.in index 44feda3deb..8959ebf6bb 100644 --- a/docs/src/reference-verbs.md.in +++ b/docs/src/reference-verbs.md.in @@ -995,6 +995,12 @@ GENMD-RUN-COMMAND mlr --ijson --opprint sort-within-records data/sort-within-records.json GENMD-EOF +## sparsify + +GENMD-RUN-COMMAND +mlr sparsify --help +GENMD-EOF + ## split GENMD-RUN-COMMAND diff --git a/pkg/transformers/aaa_transformer_table.go b/pkg/transformers/aaa_transformer_table.go index ed98af07f2..34a5b6ea85 100644 --- a/pkg/transformers/aaa_transformer_table.go +++ b/pkg/transformers/aaa_transformer_table.go @@ -62,6 +62,7 @@ var TRANSFORMER_LOOKUP_TABLE = []TransformerSetup{ SkipTrivialRecordsSetup, SortSetup, SortWithinRecordsSetup, + SparsifySetup, SplitSetup, SsubSetup, Stats1Setup, diff --git a/pkg/transformers/sparsify.go b/pkg/transformers/sparsify.go new file mode 100644 index 0000000000..be1a6de73a --- /dev/null +++ b/pkg/transformers/sparsify.go @@ -0,0 +1,194 @@ +package transformers + +import ( + "container/list" + "fmt" + "os" + "strings" + + "github.com/johnkerl/miller/pkg/cli" + "github.com/johnkerl/miller/pkg/lib" + "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/pkg/types" +) + +// ---------------------------------------------------------------- +const verbNameSparsify = "sparsify" + +var SparsifySetup = TransformerSetup{ + Verb: verbNameSparsify, + UsageFunc: transformerSparsifyUsage, + ParseCLIFunc: transformerSparsifyParseCLI, + IgnoresInput: false, +} + +func transformerSparsifyUsage( + o *os.File, +) { + fmt.Fprintf(o, "Usage: %s %s [options]\n", "mlr", verbNameSparsify) + fmt.Fprint(o, + `Unsets fields for which the key is the empty string (or, optionally, another +specified value). Only makes sense with output format not being CSV or TSV. +`) + + fmt.Fprintf(o, "Options:\n") + fmt.Fprintf(o, "-s {filler string} What values to remove. Defaults to the empty string.\n") + fmt.Fprintf(o, "-f {a,b,c} Specify field names to be operated on; any other fields won't be\n") + fmt.Fprintf(o, " modified. The default is to modify all fields.\n") + fmt.Fprintf(o, "-h|--help Show this message.\n") + + fmt.Fprint(o, + `Example: if the input is two records, one being 'a=1,b=2' and the other +being 'b=3,c=4', then the output is the two records 'a=1,b=2,c=' and +'a=,b=3,c=4'. +`) +} + +func transformerSparsifyParseCLI( + pargi *int, + argc int, + args []string, + _ *cli.TOptions, + doConstruct bool, // false for first pass of CLI-parse, true for second pass +) IRecordTransformer { + + // Skip the verb name from the current spot in the mlr command line + argi := *pargi + verb := args[argi] + argi++ + + fillerString := "" + var specifiedFieldNames []string = nil + + for argi < argc /* variable increment: 1 or 2 depending on flag */ { + opt := args[argi] + if !strings.HasPrefix(opt, "-") { + break // No more flag options to process + } + if args[argi] == "--" { + break // All transformers must do this so main-flags can follow verb-flags + } + argi++ + + if opt == "-h" || opt == "--help" { + transformerSparsifyUsage(os.Stdout) + os.Exit(0) + + } else if opt == "-s" { + fillerString = cli.VerbGetStringArgOrDie(verb, opt, args, &argi, argc) + + } else if opt == "-f" { + specifiedFieldNames = cli.VerbGetStringArrayArgOrDie(verb, opt, args, &argi, argc) + + } else { + transformerSparsifyUsage(os.Stderr) + os.Exit(1) + } + } + + *pargi = argi + if !doConstruct { // All transformers must do this for main command-line parsing + return nil + } + + transformer, err := NewTransformerSparsify( + fillerString, + specifiedFieldNames, + ) + if err != nil { + fmt.Fprintln(os.Stderr, err) + os.Exit(1) + } + + return transformer +} + +// ---------------------------------------------------------------- +type TransformerSparsify struct { + fillerString string + fieldNamesSet map[string]bool + recordTransformerFunc RecordTransformerFunc +} + +func NewTransformerSparsify( + fillerString string, + specifiedFieldNames []string, +) (*TransformerSparsify, error) { + + tr := &TransformerSparsify{ + fillerString: fillerString, + fieldNamesSet: lib.StringListToSet(specifiedFieldNames), + } + if specifiedFieldNames == nil { + tr.recordTransformerFunc = tr.transformAll + } else { + tr.recordTransformerFunc = tr.transformSome + } + + return tr, nil +} + +func (tr *TransformerSparsify) Transform( + inrecAndContext *types.RecordAndContext, + outputRecordsAndContexts *list.List, // list of *types.RecordAndContext + inputDownstreamDoneChannel <-chan bool, + outputDownstreamDoneChannel chan<- bool, +) { + HandleDefaultDownstreamDone(inputDownstreamDoneChannel, outputDownstreamDoneChannel) + + if !inrecAndContext.EndOfStream { + tr.recordTransformerFunc( + inrecAndContext, + outputRecordsAndContexts, + inputDownstreamDoneChannel, + outputDownstreamDoneChannel, + ) + } else { + outputRecordsAndContexts.PushBack(inrecAndContext) // end-of-stream marker + } +} + +func (tr *TransformerSparsify) transformAll( + inrecAndContext *types.RecordAndContext, + outputRecordsAndContexts *list.List, // list of *types.RecordAndContext + inputDownstreamDoneChannel <-chan bool, + outputDownstreamDoneChannel chan<- bool, +) { + inrec := inrecAndContext.Record + outrec := mlrval.NewMlrmapAsRecord() + + for pe := inrec.Head; pe != nil; pe = pe.Next { + if pe.Value.String() != tr.fillerString { + // Reference OK because ownership transfer + outrec.PutReference(pe.Key, pe.Value) + } + } + + outrecAndContext := types.NewRecordAndContext(outrec, &inrecAndContext.Context) + outputRecordsAndContexts.PushBack(outrecAndContext) +} + +// ---------------------------------------------------------------- +func (tr *TransformerSparsify) transformSome( + inrecAndContext *types.RecordAndContext, + outputRecordsAndContexts *list.List, // list of *types.RecordAndContext + inputDownstreamDoneChannel <-chan bool, + outputDownstreamDoneChannel chan<- bool, +) { + inrec := inrecAndContext.Record + outrec := mlrval.NewMlrmapAsRecord() + + for pe := inrec.Head; pe != nil; pe = pe.Next { + if tr.fieldNamesSet[pe.Key] { + if pe.Value.String() != tr.fillerString { + // Reference OK because ownership transfer + outrec.PutReference(pe.Key, pe.Value) + } + } else { + outrec.PutReference(pe.Key, pe.Value) + } + } + + outrecAndContext := types.NewRecordAndContext(outrec, &inrecAndContext.Context) + outputRecordsAndContexts.PushBack(outrecAndContext) +} diff --git a/test/cases/cli-help/0001/expout b/test/cases/cli-help/0001/expout index b25e4a56d1..07ca9d0961 100644 --- a/test/cases/cli-help/0001/expout +++ b/test/cases/cli-help/0001/expout @@ -988,6 +988,20 @@ Options: -r Recursively sort subobjects/submaps, e.g. for JSON input. -h|--help Show this message. +================================================================ +sparsify +Usage: mlr sparsify [options] +Unsets fields for which the key is the empty string (or, optionally, another +specified value). Only makes sense with output format not being CSV or TSV. +Options: +-s {filler string} What values to remove. Defaults to the empty string. +-f {a,b,c} Specify field names to be operated on; any other fields won't be + modified. The default is to modify all fields. +-h|--help Show this message. +Example: if the input is two records, one being 'a=1,b=2' and the other +being 'b=3,c=4', then the output is the two records 'a=1,b=2,c=' and +'a=,b=3,c=4'. + ================================================================ split Usage: mlr split [options] {filename} diff --git a/test/input/sparsify-input.csv b/test/input/sparsify-input.csv new file mode 100644 index 0000000000..16916596e0 --- /dev/null +++ b/test/input/sparsify-input.csv @@ -0,0 +1,5 @@ +a,b,c +1,2,3 +4,5, +,, +7,8,9 From b509cce0d5a9ca9b66f4076e4a7362caa2769fb8 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sat, 17 Feb 2024 12:17:48 -0500 Subject: [PATCH 2/4] regression-test cases --- test/cases/verb-sparsify/0001/cmd | 1 + test/cases/verb-sparsify/0001/experr | 0 test/cases/verb-sparsify/0001/expout | 17 +++++++++++++++++ test/cases/verb-sparsify/0002/cmd | 1 + test/cases/verb-sparsify/0002/experr | 0 test/cases/verb-sparsify/0002/expout | 21 +++++++++++++++++++++ test/cases/verb-sparsify/0003/cmd | 1 + test/cases/verb-sparsify/0003/experr | 0 test/cases/verb-sparsify/0003/expout | 21 +++++++++++++++++++++ test/cases/verb-sparsify/0004/cmd | 1 + test/cases/verb-sparsify/0004/experr | 0 test/cases/verb-sparsify/0004/expout | 19 +++++++++++++++++++ test/cases/verb-sparsify/0005/cmd | 1 + test/cases/verb-sparsify/0005/experr | 0 test/cases/verb-sparsify/0005/expout | 21 +++++++++++++++++++++ test/cases/verb-sparsify/0006/cmd | 1 + test/cases/verb-sparsify/0006/experr | 0 test/cases/verb-sparsify/0006/expout | 21 +++++++++++++++++++++ test/cases/verb-sparsify/0007/cmd | 1 + test/cases/verb-sparsify/0007/experr | 0 test/cases/verb-sparsify/0007/expout | 22 ++++++++++++++++++++++ test/cases/verb-sparsify/0008/cmd | 1 + test/cases/verb-sparsify/0008/experr | 0 test/cases/verb-sparsify/0008/expout | 22 ++++++++++++++++++++++ 24 files changed, 172 insertions(+) create mode 100644 test/cases/verb-sparsify/0001/cmd create mode 100644 test/cases/verb-sparsify/0001/experr create mode 100644 test/cases/verb-sparsify/0001/expout create mode 100644 test/cases/verb-sparsify/0002/cmd create mode 100644 test/cases/verb-sparsify/0002/experr create mode 100644 test/cases/verb-sparsify/0002/expout create mode 100644 test/cases/verb-sparsify/0003/cmd create mode 100644 test/cases/verb-sparsify/0003/experr create mode 100644 test/cases/verb-sparsify/0003/expout create mode 100644 test/cases/verb-sparsify/0004/cmd create mode 100644 test/cases/verb-sparsify/0004/experr create mode 100644 test/cases/verb-sparsify/0004/expout create mode 100644 test/cases/verb-sparsify/0005/cmd create mode 100644 test/cases/verb-sparsify/0005/experr create mode 100644 test/cases/verb-sparsify/0005/expout create mode 100644 test/cases/verb-sparsify/0006/cmd create mode 100644 test/cases/verb-sparsify/0006/experr create mode 100644 test/cases/verb-sparsify/0006/expout create mode 100644 test/cases/verb-sparsify/0007/cmd create mode 100644 test/cases/verb-sparsify/0007/experr create mode 100644 test/cases/verb-sparsify/0007/expout create mode 100644 test/cases/verb-sparsify/0008/cmd create mode 100644 test/cases/verb-sparsify/0008/experr create mode 100644 test/cases/verb-sparsify/0008/expout diff --git a/test/cases/verb-sparsify/0001/cmd b/test/cases/verb-sparsify/0001/cmd new file mode 100644 index 0000000000..38ec29b150 --- /dev/null +++ b/test/cases/verb-sparsify/0001/cmd @@ -0,0 +1 @@ +mlr --c2j --from test/input/sparsify-input.csv sparsify diff --git a/test/cases/verb-sparsify/0001/experr b/test/cases/verb-sparsify/0001/experr new file mode 100644 index 0000000000..e69de29bb2 diff --git a/test/cases/verb-sparsify/0001/expout b/test/cases/verb-sparsify/0001/expout new file mode 100644 index 0000000000..e9c9893a93 --- /dev/null +++ b/test/cases/verb-sparsify/0001/expout @@ -0,0 +1,17 @@ +[ +{ + "a": 1, + "b": 2, + "c": 3 +}, +{ + "a": 4, + "b": 5 +}, +{}, +{ + "a": 7, + "b": 8, + "c": 9 +} +] diff --git a/test/cases/verb-sparsify/0002/cmd b/test/cases/verb-sparsify/0002/cmd new file mode 100644 index 0000000000..3ac1c96309 --- /dev/null +++ b/test/cases/verb-sparsify/0002/cmd @@ -0,0 +1 @@ +mlr --c2j --from test/input/sparsify-input.csv sparsify -f a diff --git a/test/cases/verb-sparsify/0002/experr b/test/cases/verb-sparsify/0002/experr new file mode 100644 index 0000000000..e69de29bb2 diff --git a/test/cases/verb-sparsify/0002/expout b/test/cases/verb-sparsify/0002/expout new file mode 100644 index 0000000000..8bc89d0aad --- /dev/null +++ b/test/cases/verb-sparsify/0002/expout @@ -0,0 +1,21 @@ +[ +{ + "a": 1, + "b": 2, + "c": 3 +}, +{ + "a": 4, + "b": 5, + "c": "" +}, +{ + "b": "", + "c": "" +}, +{ + "a": 7, + "b": 8, + "c": 9 +} +] diff --git a/test/cases/verb-sparsify/0003/cmd b/test/cases/verb-sparsify/0003/cmd new file mode 100644 index 0000000000..fc08ebef93 --- /dev/null +++ b/test/cases/verb-sparsify/0003/cmd @@ -0,0 +1 @@ +mlr --c2j --from test/input/sparsify-input.csv sparsify -f b diff --git a/test/cases/verb-sparsify/0003/experr b/test/cases/verb-sparsify/0003/experr new file mode 100644 index 0000000000..e69de29bb2 diff --git a/test/cases/verb-sparsify/0003/expout b/test/cases/verb-sparsify/0003/expout new file mode 100644 index 0000000000..b607e38938 --- /dev/null +++ b/test/cases/verb-sparsify/0003/expout @@ -0,0 +1,21 @@ +[ +{ + "a": 1, + "b": 2, + "c": 3 +}, +{ + "a": 4, + "b": 5, + "c": "" +}, +{ + "a": "", + "c": "" +}, +{ + "a": 7, + "b": 8, + "c": 9 +} +] diff --git a/test/cases/verb-sparsify/0004/cmd b/test/cases/verb-sparsify/0004/cmd new file mode 100644 index 0000000000..5ea1aa7bdc --- /dev/null +++ b/test/cases/verb-sparsify/0004/cmd @@ -0,0 +1 @@ +mlr --c2j --from test/input/sparsify-input.csv sparsify -f b,c diff --git a/test/cases/verb-sparsify/0004/experr b/test/cases/verb-sparsify/0004/experr new file mode 100644 index 0000000000..e69de29bb2 diff --git a/test/cases/verb-sparsify/0004/expout b/test/cases/verb-sparsify/0004/expout new file mode 100644 index 0000000000..ebf9878cd4 --- /dev/null +++ b/test/cases/verb-sparsify/0004/expout @@ -0,0 +1,19 @@ +[ +{ + "a": 1, + "b": 2, + "c": 3 +}, +{ + "a": 4, + "b": 5 +}, +{ + "a": "" +}, +{ + "a": 7, + "b": 8, + "c": 9 +} +] diff --git a/test/cases/verb-sparsify/0005/cmd b/test/cases/verb-sparsify/0005/cmd new file mode 100644 index 0000000000..012aee2b62 --- /dev/null +++ b/test/cases/verb-sparsify/0005/cmd @@ -0,0 +1 @@ +mlr --c2j --from test/input/sparsify-input.csv sparsify -s 1 diff --git a/test/cases/verb-sparsify/0005/experr b/test/cases/verb-sparsify/0005/experr new file mode 100644 index 0000000000..e69de29bb2 diff --git a/test/cases/verb-sparsify/0005/expout b/test/cases/verb-sparsify/0005/expout new file mode 100644 index 0000000000..839476d587 --- /dev/null +++ b/test/cases/verb-sparsify/0005/expout @@ -0,0 +1,21 @@ +[ +{ + "b": 2, + "c": 3 +}, +{ + "a": 4, + "b": 5, + "c": "" +}, +{ + "a": "", + "b": "", + "c": "" +}, +{ + "a": 7, + "b": 8, + "c": 9 +} +] diff --git a/test/cases/verb-sparsify/0006/cmd b/test/cases/verb-sparsify/0006/cmd new file mode 100644 index 0000000000..42567786a0 --- /dev/null +++ b/test/cases/verb-sparsify/0006/cmd @@ -0,0 +1 @@ +mlr --c2j --from test/input/sparsify-input.csv sparsify -f a -s 1 diff --git a/test/cases/verb-sparsify/0006/experr b/test/cases/verb-sparsify/0006/experr new file mode 100644 index 0000000000..e69de29bb2 diff --git a/test/cases/verb-sparsify/0006/expout b/test/cases/verb-sparsify/0006/expout new file mode 100644 index 0000000000..839476d587 --- /dev/null +++ b/test/cases/verb-sparsify/0006/expout @@ -0,0 +1,21 @@ +[ +{ + "b": 2, + "c": 3 +}, +{ + "a": 4, + "b": 5, + "c": "" +}, +{ + "a": "", + "b": "", + "c": "" +}, +{ + "a": 7, + "b": 8, + "c": 9 +} +] diff --git a/test/cases/verb-sparsify/0007/cmd b/test/cases/verb-sparsify/0007/cmd new file mode 100644 index 0000000000..99b590da4e --- /dev/null +++ b/test/cases/verb-sparsify/0007/cmd @@ -0,0 +1 @@ +mlr --c2j --from test/input/sparsify-input.csv sparsify -f b -s 1 diff --git a/test/cases/verb-sparsify/0007/experr b/test/cases/verb-sparsify/0007/experr new file mode 100644 index 0000000000..e69de29bb2 diff --git a/test/cases/verb-sparsify/0007/expout b/test/cases/verb-sparsify/0007/expout new file mode 100644 index 0000000000..d7f95feba6 --- /dev/null +++ b/test/cases/verb-sparsify/0007/expout @@ -0,0 +1,22 @@ +[ +{ + "a": 1, + "b": 2, + "c": 3 +}, +{ + "a": 4, + "b": 5, + "c": "" +}, +{ + "a": "", + "b": "", + "c": "" +}, +{ + "a": 7, + "b": 8, + "c": 9 +} +] diff --git a/test/cases/verb-sparsify/0008/cmd b/test/cases/verb-sparsify/0008/cmd new file mode 100644 index 0000000000..b943d2c795 --- /dev/null +++ b/test/cases/verb-sparsify/0008/cmd @@ -0,0 +1 @@ +mlr --c2j --from test/input/sparsify-input.csv sparsify -f b,c -s 1 diff --git a/test/cases/verb-sparsify/0008/experr b/test/cases/verb-sparsify/0008/experr new file mode 100644 index 0000000000..e69de29bb2 diff --git a/test/cases/verb-sparsify/0008/expout b/test/cases/verb-sparsify/0008/expout new file mode 100644 index 0000000000..d7f95feba6 --- /dev/null +++ b/test/cases/verb-sparsify/0008/expout @@ -0,0 +1,22 @@ +[ +{ + "a": 1, + "b": 2, + "c": 3 +}, +{ + "a": 4, + "b": 5, + "c": "" +}, +{ + "a": "", + "b": "", + "c": "" +}, +{ + "a": 7, + "b": 8, + "c": 9 +} +] From 6029680d51e3c4ea0e6f74ade53637c809c70224 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sat, 17 Feb 2024 12:23:05 -0500 Subject: [PATCH 3/4] typofix --- docs/src/data-diving-examples.md | 46 ++++++++++++++++---------------- docs/src/reference-verbs.md | 4 +-- docs/src/two-pass-algorithms.md | 4 +-- pkg/transformers/sparsify.go | 4 +-- test/cases/cli-help/0001/expout | 4 +-- 5 files changed, 28 insertions(+), 34 deletions(-) diff --git a/docs/src/data-diving-examples.md b/docs/src/data-diving-examples.md index 39738f193d..100716ec26 100644 --- a/docs/src/data-diving-examples.md +++ b/docs/src/data-diving-examples.md @@ -160,11 +160,11 @@ CITRUS COUNTY 1332.9 79974.9 483785.1 stats2 -a corr,linreg-ols,r2 -f tiv_2011,tiv_2012
-tiv_2011_tiv_2012_corr  0.9730497632351692
-tiv_2011_tiv_2012_ols_m 0.9835583980337723
-tiv_2011_tiv_2012_ols_b 433854.6428968317
+tiv_2011_tiv_2012_corr  0.9730497632351701
+tiv_2011_tiv_2012_ols_m 0.9835583980337732
+tiv_2011_tiv_2012_ols_b 433854.6428968301
 tiv_2011_tiv_2012_ols_n 36634
-tiv_2011_tiv_2012_r2    0.9468258417320189
+tiv_2011_tiv_2012_r2    0.9468258417320204
 
@@ -322,7 +322,7 @@ Look at bivariate stats by color and shape. In particular, `u,v` pairwise correl
 
           u_v_corr              w_x_corr
-0.1334180491027861 -0.011319841199866178
+0.1334180491027861 -0.011319841199852926
 
@@ -332,22 +332,22 @@ Look at bivariate stats by color and shape. In particular, `u,v` pairwise correl
 
  color    shape              u_v_corr               w_x_corr
-   red   circle    0.9807984401887236   -0.01856553658708754
-orange   square   0.17685855992752927   -0.07104431573806054
- green   circle   0.05764419437577255    0.01179572988801509
-   red   square   0.05574477124893523 -0.0006801456507510942
-yellow triangle   0.04457273771962798   0.024604310103081825
-yellow   square   0.04379172927296089   -0.04462197201631237
-purple   circle   0.03587354936895086     0.1341133954140899
-  blue   square   0.03241153095761164  -0.053507648119643196
-  blue triangle  0.015356427073158766 -0.0006089997461435399
-orange   circle  0.010518953877704048   -0.16279397329279383
-   red triangle   0.00809782571528034   0.012486621357942596
-purple triangle  0.005155190909099334  -0.045057909256220656
-purple   square -0.025680276963377404    0.05769429647930396
- green   square   -0.0257760734502851  -0.003265173252087127
-orange triangle -0.030456661186085785    -0.1318699981926352
-yellow   circle  -0.06477331572781474    0.07369449819706045
-  blue   circle  -0.10234761901929677  -0.030528539069837757
- green triangle  -0.10901825107358765   -0.04848782060162929
+   red   circle    0.9807984401887242  -0.018565536587084836
+orange   square   0.17685855992752933   -0.07104431573805543
+ green   circle   0.05764419437577257   0.011795729888018455
+   red   square    0.0557447712489348 -0.0006801456507506415
+yellow triangle    0.0445727377196281   0.024604310103079844
+yellow   square    0.0437917292729612  -0.044621972016306265
+purple   circle   0.03587354936895115    0.13411339541407613
+  blue   square   0.03241153095761152   -0.05350764811965621
+  blue triangle  0.015356427073158612 -0.0006089997461408209
+orange   circle  0.010518953877704181    -0.1627939732927932
+   red triangle   0.00809782571528054    0.01248662135795501
+purple triangle  0.005155190909099739   -0.04505790925621933
+purple   square  -0.02568027696337717   0.057694296479293694
+ green   square -0.025776073450284875 -0.0032651732520739014
+orange triangle -0.030456661186085584   -0.13186999819263814
+yellow   circle  -0.06477331572781515     0.0736944981970553
+  blue   circle   -0.1023476190192966  -0.030528539069839333
+ green triangle  -0.10901825107358747   -0.04848782060162855
 
diff --git a/docs/src/reference-verbs.md b/docs/src/reference-verbs.md index 142b90ce04..417aa4d9b4 100644 --- a/docs/src/reference-verbs.md +++ b/docs/src/reference-verbs.md @@ -3140,9 +3140,7 @@ Options: -f {a,b,c} Specify field names to be operated on; any other fields won't be modified. The default is to modify all fields. -h|--help Show this message. -Example: if the input is two records, one being 'a=1,b=2' and the other -being 'b=3,c=4', then the output is the two records 'a=1,b=2,c=' and -'a=,b=3,c=4'. +Example: if input is a=1,b=,c=3 then output is a=1,c=3. ## split diff --git a/docs/src/two-pass-algorithms.md b/docs/src/two-pass-algorithms.md index 146f3a81e1..e475aebf3b 100644 --- a/docs/src/two-pass-algorithms.md +++ b/docs/src/two-pass-algorithms.md @@ -598,8 +598,8 @@ hat pan 0.4643355557376876 x_count 10000 x_sum 4986.019681679581 x_mean 0.49860196816795804 -x_var 0.08426974433144456 -x_stddev 0.2902925151144007 +x_var 0.08426974433144457 +x_stddev 0.29029251511440074
diff --git a/pkg/transformers/sparsify.go b/pkg/transformers/sparsify.go
index be1a6de73a..b6ae40c516 100644
--- a/pkg/transformers/sparsify.go
+++ b/pkg/transformers/sparsify.go
@@ -38,9 +38,7 @@ specified value). Only makes sense with output format not being CSV or TSV.
 	fmt.Fprintf(o, "-h|--help  Show this message.\n")
 
 	fmt.Fprint(o,
-		`Example: if the input is two records, one being 'a=1,b=2' and the other
-being 'b=3,c=4', then the output is the two records 'a=1,b=2,c=' and
-'a=,b=3,c=4'.
+		`Example: if input is a=1,b=,c=3 then output is a=1,c=3.
 `)
 }
 
diff --git a/test/cases/cli-help/0001/expout b/test/cases/cli-help/0001/expout
index 07ca9d0961..95b4d3f141 100644
--- a/test/cases/cli-help/0001/expout
+++ b/test/cases/cli-help/0001/expout
@@ -998,9 +998,7 @@ Options:
 -f {a,b,c} Specify field names to be operated on; any other fields won't be
            modified. The default is to modify all fields.
 -h|--help  Show this message.
-Example: if the input is two records, one being 'a=1,b=2' and the other
-being 'b=3,c=4', then the output is the two records 'a=1,b=2,c=' and
-'a=,b=3,c=4'.
+Example: if input is a=1,b=,c=3 then output is a=1,c=3.
 
 ================================================================
 split

From 0dbad911031526dcaf366e27ac8d88b5d23eaf31 Mon Sep 17 00:00:00 2001
From: John Kerl 
Date: Sat, 17 Feb 2024 12:27:31 -0500
Subject: [PATCH 4/4] Remove mods due to processor-architecture change

---
 docs/src/data-diving-examples.md | 46 ++++++++++++++++----------------
 docs/src/two-pass-algorithms.md  |  4 +--
 2 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/docs/src/data-diving-examples.md b/docs/src/data-diving-examples.md
index 100716ec26..39738f193d 100644
--- a/docs/src/data-diving-examples.md
+++ b/docs/src/data-diving-examples.md
@@ -160,11 +160,11 @@ CITRUS COUNTY       1332.9                 79974.9                483785.1
   stats2 -a corr,linreg-ols,r2 -f tiv_2011,tiv_2012
 
-tiv_2011_tiv_2012_corr  0.9730497632351701
-tiv_2011_tiv_2012_ols_m 0.9835583980337732
-tiv_2011_tiv_2012_ols_b 433854.6428968301
+tiv_2011_tiv_2012_corr  0.9730497632351692
+tiv_2011_tiv_2012_ols_m 0.9835583980337723
+tiv_2011_tiv_2012_ols_b 433854.6428968317
 tiv_2011_tiv_2012_ols_n 36634
-tiv_2011_tiv_2012_r2    0.9468258417320204
+tiv_2011_tiv_2012_r2    0.9468258417320189
 
@@ -322,7 +322,7 @@ Look at bivariate stats by color and shape. In particular, `u,v` pairwise correl
 
           u_v_corr              w_x_corr
-0.1334180491027861 -0.011319841199852926
+0.1334180491027861 -0.011319841199866178
 
@@ -332,22 +332,22 @@ Look at bivariate stats by color and shape. In particular, `u,v` pairwise correl
 
  color    shape              u_v_corr               w_x_corr
-   red   circle    0.9807984401887242  -0.018565536587084836
-orange   square   0.17685855992752933   -0.07104431573805543
- green   circle   0.05764419437577257   0.011795729888018455
-   red   square    0.0557447712489348 -0.0006801456507506415
-yellow triangle    0.0445727377196281   0.024604310103079844
-yellow   square    0.0437917292729612  -0.044621972016306265
-purple   circle   0.03587354936895115    0.13411339541407613
-  blue   square   0.03241153095761152   -0.05350764811965621
-  blue triangle  0.015356427073158612 -0.0006089997461408209
-orange   circle  0.010518953877704181    -0.1627939732927932
-   red triangle   0.00809782571528054    0.01248662135795501
-purple triangle  0.005155190909099739   -0.04505790925621933
-purple   square  -0.02568027696337717   0.057694296479293694
- green   square -0.025776073450284875 -0.0032651732520739014
-orange triangle -0.030456661186085584   -0.13186999819263814
-yellow   circle  -0.06477331572781515     0.0736944981970553
-  blue   circle   -0.1023476190192966  -0.030528539069839333
- green triangle  -0.10901825107358747   -0.04848782060162855
+   red   circle    0.9807984401887236   -0.01856553658708754
+orange   square   0.17685855992752927   -0.07104431573806054
+ green   circle   0.05764419437577255    0.01179572988801509
+   red   square   0.05574477124893523 -0.0006801456507510942
+yellow triangle   0.04457273771962798   0.024604310103081825
+yellow   square   0.04379172927296089   -0.04462197201631237
+purple   circle   0.03587354936895086     0.1341133954140899
+  blue   square   0.03241153095761164  -0.053507648119643196
+  blue triangle  0.015356427073158766 -0.0006089997461435399
+orange   circle  0.010518953877704048   -0.16279397329279383
+   red triangle   0.00809782571528034   0.012486621357942596
+purple triangle  0.005155190909099334  -0.045057909256220656
+purple   square -0.025680276963377404    0.05769429647930396
+ green   square   -0.0257760734502851  -0.003265173252087127
+orange triangle -0.030456661186085785    -0.1318699981926352
+yellow   circle  -0.06477331572781474    0.07369449819706045
+  blue   circle  -0.10234761901929677  -0.030528539069837757
+ green triangle  -0.10901825107358765   -0.04848782060162929
 
diff --git a/docs/src/two-pass-algorithms.md b/docs/src/two-pass-algorithms.md index e475aebf3b..146f3a81e1 100644 --- a/docs/src/two-pass-algorithms.md +++ b/docs/src/two-pass-algorithms.md @@ -598,8 +598,8 @@ hat pan 0.4643355557376876 x_count 10000 x_sum 4986.019681679581 x_mean 0.49860196816795804 -x_var 0.08426974433144457 -x_stddev 0.29029251511440074 +x_var 0.08426974433144456 +x_stddev 0.2902925151144007