From 1711929c3364f0049b738a16aabc78521535a2c4 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sat, 19 Aug 2023 16:15:36 -0400 Subject: [PATCH 1/4] sub, gsub, and ssub verbs --- .../pkg/transformers/aaa_transformer_table.go | 3 + internal/pkg/transformers/gsub.go | 157 ++++++++++++++++++ internal/pkg/transformers/ssub.go | 156 +++++++++++++++++ internal/pkg/transformers/sub.go | 157 ++++++++++++++++++ test/cases/cli-help/0001/expout | 29 ++++ test/cases/verb-case/x | 13 -- 6 files changed, 502 insertions(+), 13 deletions(-) create mode 100644 internal/pkg/transformers/gsub.go create mode 100644 internal/pkg/transformers/ssub.go create mode 100644 internal/pkg/transformers/sub.go delete mode 100644 test/cases/verb-case/x diff --git a/internal/pkg/transformers/aaa_transformer_table.go b/internal/pkg/transformers/aaa_transformer_table.go index 60f490e0d8..ece90a8584 100644 --- a/internal/pkg/transformers/aaa_transformer_table.go +++ b/internal/pkg/transformers/aaa_transformer_table.go @@ -33,6 +33,7 @@ var TRANSFORMER_LOOKUP_TABLE = []TransformerSetup{ GrepSetup, GroupBySetup, GroupLikeSetup, + GsubSetup, HavingFieldsSetup, HeadSetup, HistogramSetup, @@ -62,9 +63,11 @@ var TRANSFORMER_LOOKUP_TABLE = []TransformerSetup{ SortSetup, SortWithinRecordsSetup, SplitSetup, + SsubSetup, Stats1Setup, Stats2Setup, StepSetup, + SubSetup, SummarySetup, TacSetup, TailSetup, diff --git a/internal/pkg/transformers/gsub.go b/internal/pkg/transformers/gsub.go new file mode 100644 index 0000000000..550aeda5af --- /dev/null +++ b/internal/pkg/transformers/gsub.go @@ -0,0 +1,157 @@ +package transformers + +import ( + "container/list" + "fmt" + "os" + "strings" + + "github.com/johnkerl/miller/internal/pkg/bifs" + "github.com/johnkerl/miller/internal/pkg/cli" + "github.com/johnkerl/miller/internal/pkg/mlrval" + "github.com/johnkerl/miller/internal/pkg/types" +) + +// ---------------------------------------------------------------- +const verbNameGsub = "gsub" + +var GsubSetup = TransformerSetup{ + Verb: verbNameGsub, + UsageFunc: transformerGsubUsage, + ParseCLIFunc: transformerGsubParseCLI, + IgnoresInput: false, +} + +func transformerGsubUsage( + o *os.File, +) { + fmt.Fprintf(o, "Usage: %s %s [options]\n", "mlr", verbNameGsub) + fmt.Fprintf(o, "Replaces old string with new string in specified field(s), with regex support\n") + fmt.Fprintf(o, "for the old string and handling multiple matches, like the `gsub` DSL function.\n") + fmt.Fprintf(o, "See also the `sub` and `ssub` verbs.\n") + fmt.Fprintf(o, "Options:\n") + fmt.Fprintf(o, "-f {a,b,c} Field names to convert.\n") + fmt.Fprintf(o, "-h|--help Show this message.\n") +} + +func transformerGsubParseCLI( + pargi *int, + argc int, + args []string, + _ *cli.TOptions, + doConstruct bool, // false for first pass of CLI-parse, true for second pass +) IRecordTransformer { + + // Skip the verb name from the current spot in the mlr command line + argi := *pargi + verb := args[argi] + argi++ + + // Parse local flags + var fieldNames []string = nil + var oldText string + var newText string + + for argi < argc /* variable increment: 1 or 2 depending on flag */ { + opt := args[argi] + if !strings.HasPrefix(opt, "-") { + break // No more flag options to process + } + if args[argi] == "--" { + break // All transformers must do this so main-flags can follow verb-flags + } + argi++ + + if opt == "-h" || opt == "--help" { + transformerGsubUsage(os.Stdout) + os.Exit(0) + + } else if opt == "-f" { + fieldNames = cli.VerbGetStringArrayArgOrDie(verb, opt, args, &argi, argc) + } else { + transformerGsubUsage(os.Stderr) + os.Exit(1) + } + } + + if fieldNames == nil { + transformerGsubUsage(os.Stderr) + os.Exit(1) + } + + // Get the old and new text from the command line + if (argc - argi) < 2 { + transformerGsubUsage(os.Stderr) + os.Exit(1) + } + oldText = args[argi] + newText = args[argi+1] + + argi += 2 + + *pargi = argi + if !doConstruct { // All transformers must do this for main command-line parsing + return nil + } + + transformer, err := NewTransformerGsub( + fieldNames, + oldText, + newText, + ) + if err != nil { + fmt.Fprintln(os.Stderr, err) + os.Exit(1) + } + + return transformer +} + +// ---------------------------------------------------------------- +type TransformerGsub struct { + fieldNames []string + oldText *mlrval.Mlrval + newText *mlrval.Mlrval +} + +// ---------------------------------------------------------------- +func NewTransformerGsub( + fieldNames []string, + oldText string, + newText string, +) (*TransformerGsub, error) { + tr := &TransformerGsub{ + fieldNames: fieldNames, + oldText: mlrval.FromString(oldText), + newText: mlrval.FromString(newText), + } + return tr, nil +} + +func (tr *TransformerGsub) Transform( + inrecAndContext *types.RecordAndContext, + outputRecordsAndContexts *list.List, // list of *types.RecordAndContext + inputDownstreamDoneChannel <-chan bool, + outputDownstreamDoneChannel chan<- bool, +) { + HandleDefaultDownstreamDone(inputDownstreamDoneChannel, outputDownstreamDoneChannel) + + if !inrecAndContext.EndOfStream { + inrec := inrecAndContext.Record + + for _, fieldName := range tr.fieldNames { + oldValue := inrec.Get(fieldName) + if oldValue == nil { + continue + } + + newValue := bifs.BIF_gsub(oldValue, tr.oldText, tr.newText) + + inrec.PutReference(fieldName, newValue) + } + + outputRecordsAndContexts.PushBack(inrecAndContext) + } else { + outputRecordsAndContexts.PushBack(inrecAndContext) // emit end-of-stream marker + } +} diff --git a/internal/pkg/transformers/ssub.go b/internal/pkg/transformers/ssub.go new file mode 100644 index 0000000000..bd8e542473 --- /dev/null +++ b/internal/pkg/transformers/ssub.go @@ -0,0 +1,156 @@ +package transformers + +import ( + "container/list" + "fmt" + "os" + "strings" + + "github.com/johnkerl/miller/internal/pkg/bifs" + "github.com/johnkerl/miller/internal/pkg/cli" + "github.com/johnkerl/miller/internal/pkg/mlrval" + "github.com/johnkerl/miller/internal/pkg/types" +) + +// ---------------------------------------------------------------- +const verbNameSsub = "ssub" + +var SsubSetup = TransformerSetup{ + Verb: verbNameSsub, + UsageFunc: transformerSsubUsage, + ParseCLIFunc: transformerSsubParseCLI, + IgnoresInput: false, +} + +func transformerSsubUsage( + o *os.File, +) { + fmt.Fprintf(o, "Usage: %s %s [options]\n", "mlr", verbNameSsub) + fmt.Fprintf(o, "Replaces old string with new string in specified field(s), without regex support for\n") + fmt.Fprintf(o, "the old string, like the `ssub` DSL function. See also the `gsub` and `sub` verbs.\n") + fmt.Fprintf(o, "Options:\n") + fmt.Fprintf(o, "-f {a,b,c} Field names to convert.\n") + fmt.Fprintf(o, "-h|--help Show this message.\n") +} + +func transformerSsubParseCLI( + pargi *int, + argc int, + args []string, + _ *cli.TOptions, + doConstruct bool, // false for first pass of CLI-parse, true for second pass +) IRecordTransformer { + + // Skip the verb name from the current spot in the mlr command line + argi := *pargi + verb := args[argi] + argi++ + + // Parse local flags + var fieldNames []string = nil + var oldText string + var newText string + + for argi < argc /* variable increment: 1 or 2 depending on flag */ { + opt := args[argi] + if !strings.HasPrefix(opt, "-") { + break // No more flag options to process + } + if args[argi] == "--" { + break // All transformers must do this so main-flags can follow verb-flags + } + argi++ + + if opt == "-h" || opt == "--help" { + transformerSsubUsage(os.Stdout) + os.Exit(0) + + } else if opt == "-f" { + fieldNames = cli.VerbGetStringArrayArgOrDie(verb, opt, args, &argi, argc) + } else { + transformerSsubUsage(os.Stderr) + os.Exit(1) + } + } + + if fieldNames == nil { + transformerSsubUsage(os.Stderr) + os.Exit(1) + } + + // Get the old and new text from the command line + if (argc - argi) < 2 { + transformerSsubUsage(os.Stderr) + os.Exit(1) + } + oldText = args[argi] + newText = args[argi+1] + + argi += 2 + + *pargi = argi + if !doConstruct { // All transformers must do this for main command-line parsing + return nil + } + + transformer, err := NewTransformerSsub( + fieldNames, + oldText, + newText, + ) + if err != nil { + fmt.Fprintln(os.Stderr, err) + os.Exit(1) + } + + return transformer +} + +// ---------------------------------------------------------------- +type TransformerSsub struct { + fieldNames []string + oldText *mlrval.Mlrval + newText *mlrval.Mlrval +} + +// ---------------------------------------------------------------- +func NewTransformerSsub( + fieldNames []string, + oldText string, + newText string, +) (*TransformerSsub, error) { + tr := &TransformerSsub{ + fieldNames: fieldNames, + oldText: mlrval.FromString(oldText), + newText: mlrval.FromString(newText), + } + return tr, nil +} + +func (tr *TransformerSsub) Transform( + inrecAndContext *types.RecordAndContext, + outputRecordsAndContexts *list.List, // list of *types.RecordAndContext + inputDownstreamDoneChannel <-chan bool, + outputDownstreamDoneChannel chan<- bool, +) { + HandleDefaultDownstreamDone(inputDownstreamDoneChannel, outputDownstreamDoneChannel) + + if !inrecAndContext.EndOfStream { + inrec := inrecAndContext.Record + + for _, fieldName := range tr.fieldNames { + oldValue := inrec.Get(fieldName) + if oldValue == nil { + continue + } + + newValue := bifs.BIF_ssub(oldValue, tr.oldText, tr.newText) + + inrec.PutReference(fieldName, newValue) + } + + outputRecordsAndContexts.PushBack(inrecAndContext) + } else { + outputRecordsAndContexts.PushBack(inrecAndContext) // emit end-of-stream marker + } +} diff --git a/internal/pkg/transformers/sub.go b/internal/pkg/transformers/sub.go new file mode 100644 index 0000000000..eee7783624 --- /dev/null +++ b/internal/pkg/transformers/sub.go @@ -0,0 +1,157 @@ +package transformers + +import ( + "container/list" + "fmt" + "os" + "strings" + + "github.com/johnkerl/miller/internal/pkg/bifs" + "github.com/johnkerl/miller/internal/pkg/cli" + "github.com/johnkerl/miller/internal/pkg/mlrval" + "github.com/johnkerl/miller/internal/pkg/types" +) + +// ---------------------------------------------------------------- +const verbNameSub = "sub" + +var SubSetup = TransformerSetup{ + Verb: verbNameSub, + UsageFunc: transformerSubUsage, + ParseCLIFunc: transformerSubParseCLI, + IgnoresInput: false, +} + +func transformerSubUsage( + o *os.File, +) { + fmt.Fprintf(o, "Usage: %s %s [options]\n", "mlr", verbNameSub) + fmt.Fprintf(o, "Replaces old string with new string in specified field(s), with regex support\n") + fmt.Fprintf(o, "for the old string and not handling multiple matches, like the `sub` DSL function.\n") + fmt.Fprintf(o, "See also the `gsub` and `ssub` verbs.\n") + fmt.Fprintf(o, "Options:\n") + fmt.Fprintf(o, "-f {a,b,c} Field names to convert.\n") + fmt.Fprintf(o, "-h|--help Show this message.\n") +} + +func transformerSubParseCLI( + pargi *int, + argc int, + args []string, + _ *cli.TOptions, + doConstruct bool, // false for first pass of CLI-parse, true for second pass +) IRecordTransformer { + + // Skip the verb name from the current spot in the mlr command line + argi := *pargi + verb := args[argi] + argi++ + + // Parse local flags + var fieldNames []string = nil + var oldText string + var newText string + + for argi < argc /* variable increment: 1 or 2 depending on flag */ { + opt := args[argi] + if !strings.HasPrefix(opt, "-") { + break // No more flag options to process + } + if args[argi] == "--" { + break // All transformers must do this so main-flags can follow verb-flags + } + argi++ + + if opt == "-h" || opt == "--help" { + transformerSubUsage(os.Stdout) + os.Exit(0) + + } else if opt == "-f" { + fieldNames = cli.VerbGetStringArrayArgOrDie(verb, opt, args, &argi, argc) + } else { + transformerSubUsage(os.Stderr) + os.Exit(1) + } + } + + if fieldNames == nil { + transformerSubUsage(os.Stderr) + os.Exit(1) + } + + // Get the old and new text from the command line + if (argc - argi) < 2 { + transformerSubUsage(os.Stderr) + os.Exit(1) + } + oldText = args[argi] + newText = args[argi+1] + + argi += 2 + + *pargi = argi + if !doConstruct { // All transformers must do this for main command-line parsing + return nil + } + + transformer, err := NewTransformerSub( + fieldNames, + oldText, + newText, + ) + if err != nil { + fmt.Fprintln(os.Stderr, err) + os.Exit(1) + } + + return transformer +} + +// ---------------------------------------------------------------- +type TransformerSub struct { + fieldNames []string + oldText *mlrval.Mlrval + newText *mlrval.Mlrval +} + +// ---------------------------------------------------------------- +func NewTransformerSub( + fieldNames []string, + oldText string, + newText string, +) (*TransformerSub, error) { + tr := &TransformerSub{ + fieldNames: fieldNames, + oldText: mlrval.FromString(oldText), + newText: mlrval.FromString(newText), + } + return tr, nil +} + +func (tr *TransformerSub) Transform( + inrecAndContext *types.RecordAndContext, + outputRecordsAndContexts *list.List, // list of *types.RecordAndContext + inputDownstreamDoneChannel <-chan bool, + outputDownstreamDoneChannel chan<- bool, +) { + HandleDefaultDownstreamDone(inputDownstreamDoneChannel, outputDownstreamDoneChannel) + + if !inrecAndContext.EndOfStream { + inrec := inrecAndContext.Record + + for _, fieldName := range tr.fieldNames { + oldValue := inrec.Get(fieldName) + if oldValue == nil { + continue + } + + newValue := bifs.BIF_sub(oldValue, tr.oldText, tr.newText) + + inrec.PutReference(fieldName, newValue) + } + + outputRecordsAndContexts.PushBack(inrecAndContext) + } else { + outputRecordsAndContexts.PushBack(inrecAndContext) // emit end-of-stream marker + } +} diff --git a/test/cases/cli-help/0001/expout b/test/cases/cli-help/0001/expout index d6f70fe41a..55efea8ac7 100644 --- a/test/cases/cli-help/0001/expout +++ b/test/cases/cli-help/0001/expout @@ -379,6 +379,16 @@ Outputs records in batches having identical field names. Options: -h|--help Show this message. +================================================================ +gsub +Usage: mlr gsub [options] +Replaces old string with new string in specified field(s), with regex support +for the old string and handling multiple matches, like the `gsub` DSL function. +See also the `sub` and `ssub` verbs. +Options: +-f {a,b,c} Field names to convert. +-h|--help Show this message. + ================================================================ having-fields Usage: mlr having-fields [options] @@ -1016,6 +1026,15 @@ then there will be split_yellow_triangle.csv, split_yellow_square.csv, etc. See also the "tee" DSL function which lets you do more ad-hoc customization. +================================================================ +ssub +Usage: mlr ssub [options] +Replaces old string with new string in specified field(s), without regex support for +the old string, like the `ssub` DSL function. See also the `gsub` and `sub` verbs. +Options: +-f {a,b,c} Field names to convert. +-h|--help Show this message. + ================================================================ stats1 Usage: mlr stats1 [options] @@ -1156,6 +1175,16 @@ Please see https://miller.readthedocs.io/en/latest/reference-verbs.html#filter o https://en.wikipedia.org/wiki/Moving_average#Exponential_moving_average for more information on EWMA. +================================================================ +sub +Usage: mlr sub [options] +Replaces old string with new string in specified field(s), with regex support +for the old string and not handling multiple matches, like the `sub` DSL function. +See also the `gsub` and `ssub` verbs. +Options: +-f {a,b,c} Field names to convert. +-h|--help Show this message. + ================================================================ summary Usage: mlr summary [options] diff --git a/test/cases/verb-case/x b/test/cases/verb-case/x deleted file mode 100644 index a24cc18bdf..0000000000 --- a/test/cases/verb-case/x +++ /dev/null @@ -1,13 +0,0 @@ -mkdir 0020; echo mlr --from test/input.cases-csv --c2j case -u > 0020/cmd -mkdir 0021; echo mlr --from test/input.cases-csv --c2j case -l > 0021/cmd -mkdir 0022; echo mlr --from test/input.cases-csv --c2j case -s > 0022/cmd -mkdir 0023; echo mlr --from test/input.cases-csv --c2j case -t > 0023/cmd -mkdir 0024; echo mlr --from test/input.cases-csv --c2j case -k -u > 0024/cmd -mkdir 0025; echo mlr --from test/input.cases-csv --c2j case -k -l > 0025/cmd -mkdir 0026; echo mlr --from test/input.cases-csv --c2j case -k -s > 0026/cmd -mkdir 0027; echo mlr --from test/input.cases-csv --c2j case -k -t > 0027/cmd -mkdir 0028; echo mlr --from test/input.cases-csv --c2j case -v -u > 0028/cmd -mkdir 0029; echo mlr --from test/input.cases-csv --c2j case -v -l > 0029/cmd -mkdir 0030; echo mlr --from test/input.cases-csv --c2j case -v -s > 0030/cmd -mkdir 0031; echo mlr --from test/input.cases-csv --c2j case -v -t > 0031/cmd -mkdir 0032; echo mlr --from test/input.cases-csv --c2j case -u apple,ball then case -l cat,dog > 0032/cmd From 2b9505af49cd9539a34bc10e67954bd2d5191974 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sat, 19 Aug 2023 16:19:40 -0400 Subject: [PATCH 2/4] doc mods --- docs/src/manpage.md | 39 +++++++++++++++++++++++++----- docs/src/manpage.txt | 39 +++++++++++++++++++++++++----- man/manpage.txt | 39 +++++++++++++++++++++++++----- man/mlr.1 | 57 +++++++++++++++++++++++++++++++++++++++----- 4 files changed, 150 insertions(+), 24 deletions(-) diff --git a/docs/src/manpage.md b/docs/src/manpage.md index aad8a4f50f..1a9ebea12c 100644 --- a/docs/src/manpage.md +++ b/docs/src/manpage.md @@ -194,12 +194,13 @@ MILLER(1) MILLER(1) 1mVERB LIST0m altkv bar bootstrap case cat check clean-whitespace count-distinct count count-similar cut decimate fill-down fill-empty filter flatten format-values - fraction gap grep group-by group-like having-fields head histogram json-parse - json-stringify join label latin1-to-utf8 least-frequent merge-fields - most-frequent nest nothing put regularize remove-empty-columns rename reorder - repeat reshape sample sec2gmtdate sec2gmt seqgen shuffle skip-trivial-records - sort sort-within-records split stats1 stats2 step summary tac tail tee - template top utf8-to-latin1 unflatten uniq unspace unsparsify + fraction gap grep group-by group-like gsub having-fields head histogram + json-parse json-stringify join label latin1-to-utf8 least-frequent + merge-fields most-frequent nest nothing put regularize remove-empty-columns + rename reorder repeat reshape sample sec2gmtdate sec2gmt seqgen shuffle + skip-trivial-records sort sort-within-records split ssub stats1 stats2 step + sub summary tac tail tee template top utf8-to-latin1 unflatten uniq unspace + unsparsify 1mFUNCTION LIST0m abs acos acosh any append apply arrayify asin asinh asserting_absent @@ -1245,6 +1246,15 @@ MILLER(1) MILLER(1) Options: -h|--help Show this message. + 1mgsub0m + Usage: mlr gsub [options] + Replaces old string with new string in specified field(s), with regex support + for the old string and handling multiple matches, like the `gsub` DSL function. + See also the `sub` and `ssub` verbs. + Options: + -f {a,b,c} Field names to convert. + -h|--help Show this message. + 1mhaving-fields0m Usage: mlr having-fields [options] Conditionally passes through records depending on each record's field names. @@ -1853,6 +1863,14 @@ MILLER(1) MILLER(1) See also the "tee" DSL function which lets you do more ad-hoc customization. + 1mssub0m + Usage: mlr ssub [options] + Replaces old string with new string in specified field(s), without regex support for + the old string, like the `ssub` DSL function. See also the `gsub` and `sub` verbs. + Options: + -f {a,b,c} Field names to convert. + -h|--help Show this message. + 1mstats10m Usage: mlr stats1 [options] Computes univariate statistics for one or more given fields, accumulated across @@ -1990,6 +2008,15 @@ MILLER(1) MILLER(1) https://en.wikipedia.org/wiki/Moving_average#Exponential_moving_average for more information on EWMA. + 1msub0m + Usage: mlr sub [options] + Replaces old string with new string in specified field(s), with regex support + for the old string and not handling multiple matches, like the `sub` DSL function. + See also the `gsub` and `ssub` verbs. + Options: + -f {a,b,c} Field names to convert. + -h|--help Show this message. + 1msummary0m Usage: mlr summary [options] Show summary statistics about the input data. diff --git a/docs/src/manpage.txt b/docs/src/manpage.txt index 1d59128536..7372e3768b 100644 --- a/docs/src/manpage.txt +++ b/docs/src/manpage.txt @@ -173,12 +173,13 @@ MILLER(1) MILLER(1) 1mVERB LIST0m altkv bar bootstrap case cat check clean-whitespace count-distinct count count-similar cut decimate fill-down fill-empty filter flatten format-values - fraction gap grep group-by group-like having-fields head histogram json-parse - json-stringify join label latin1-to-utf8 least-frequent merge-fields - most-frequent nest nothing put regularize remove-empty-columns rename reorder - repeat reshape sample sec2gmtdate sec2gmt seqgen shuffle skip-trivial-records - sort sort-within-records split stats1 stats2 step summary tac tail tee - template top utf8-to-latin1 unflatten uniq unspace unsparsify + fraction gap grep group-by group-like gsub having-fields head histogram + json-parse json-stringify join label latin1-to-utf8 least-frequent + merge-fields most-frequent nest nothing put regularize remove-empty-columns + rename reorder repeat reshape sample sec2gmtdate sec2gmt seqgen shuffle + skip-trivial-records sort sort-within-records split ssub stats1 stats2 step + sub summary tac tail tee template top utf8-to-latin1 unflatten uniq unspace + unsparsify 1mFUNCTION LIST0m abs acos acosh any append apply arrayify asin asinh asserting_absent @@ -1224,6 +1225,15 @@ MILLER(1) MILLER(1) Options: -h|--help Show this message. + 1mgsub0m + Usage: mlr gsub [options] + Replaces old string with new string in specified field(s), with regex support + for the old string and handling multiple matches, like the `gsub` DSL function. + See also the `sub` and `ssub` verbs. + Options: + -f {a,b,c} Field names to convert. + -h|--help Show this message. + 1mhaving-fields0m Usage: mlr having-fields [options] Conditionally passes through records depending on each record's field names. @@ -1832,6 +1842,14 @@ MILLER(1) MILLER(1) See also the "tee" DSL function which lets you do more ad-hoc customization. + 1mssub0m + Usage: mlr ssub [options] + Replaces old string with new string in specified field(s), without regex support for + the old string, like the `ssub` DSL function. See also the `gsub` and `sub` verbs. + Options: + -f {a,b,c} Field names to convert. + -h|--help Show this message. + 1mstats10m Usage: mlr stats1 [options] Computes univariate statistics for one or more given fields, accumulated across @@ -1969,6 +1987,15 @@ MILLER(1) MILLER(1) https://en.wikipedia.org/wiki/Moving_average#Exponential_moving_average for more information on EWMA. + 1msub0m + Usage: mlr sub [options] + Replaces old string with new string in specified field(s), with regex support + for the old string and not handling multiple matches, like the `sub` DSL function. + See also the `gsub` and `ssub` verbs. + Options: + -f {a,b,c} Field names to convert. + -h|--help Show this message. + 1msummary0m Usage: mlr summary [options] Show summary statistics about the input data. diff --git a/man/manpage.txt b/man/manpage.txt index 1d59128536..7372e3768b 100644 --- a/man/manpage.txt +++ b/man/manpage.txt @@ -173,12 +173,13 @@ MILLER(1) MILLER(1) 1mVERB LIST0m altkv bar bootstrap case cat check clean-whitespace count-distinct count count-similar cut decimate fill-down fill-empty filter flatten format-values - fraction gap grep group-by group-like having-fields head histogram json-parse - json-stringify join label latin1-to-utf8 least-frequent merge-fields - most-frequent nest nothing put regularize remove-empty-columns rename reorder - repeat reshape sample sec2gmtdate sec2gmt seqgen shuffle skip-trivial-records - sort sort-within-records split stats1 stats2 step summary tac tail tee - template top utf8-to-latin1 unflatten uniq unspace unsparsify + fraction gap grep group-by group-like gsub having-fields head histogram + json-parse json-stringify join label latin1-to-utf8 least-frequent + merge-fields most-frequent nest nothing put regularize remove-empty-columns + rename reorder repeat reshape sample sec2gmtdate sec2gmt seqgen shuffle + skip-trivial-records sort sort-within-records split ssub stats1 stats2 step + sub summary tac tail tee template top utf8-to-latin1 unflatten uniq unspace + unsparsify 1mFUNCTION LIST0m abs acos acosh any append apply arrayify asin asinh asserting_absent @@ -1224,6 +1225,15 @@ MILLER(1) MILLER(1) Options: -h|--help Show this message. + 1mgsub0m + Usage: mlr gsub [options] + Replaces old string with new string in specified field(s), with regex support + for the old string and handling multiple matches, like the `gsub` DSL function. + See also the `sub` and `ssub` verbs. + Options: + -f {a,b,c} Field names to convert. + -h|--help Show this message. + 1mhaving-fields0m Usage: mlr having-fields [options] Conditionally passes through records depending on each record's field names. @@ -1832,6 +1842,14 @@ MILLER(1) MILLER(1) See also the "tee" DSL function which lets you do more ad-hoc customization. + 1mssub0m + Usage: mlr ssub [options] + Replaces old string with new string in specified field(s), without regex support for + the old string, like the `ssub` DSL function. See also the `gsub` and `sub` verbs. + Options: + -f {a,b,c} Field names to convert. + -h|--help Show this message. + 1mstats10m Usage: mlr stats1 [options] Computes univariate statistics for one or more given fields, accumulated across @@ -1969,6 +1987,15 @@ MILLER(1) MILLER(1) https://en.wikipedia.org/wiki/Moving_average#Exponential_moving_average for more information on EWMA. + 1msub0m + Usage: mlr sub [options] + Replaces old string with new string in specified field(s), with regex support + for the old string and not handling multiple matches, like the `sub` DSL function. + See also the `gsub` and `ssub` verbs. + Options: + -f {a,b,c} Field names to convert. + -h|--help Show this message. + 1msummary0m Usage: mlr summary [options] Show summary statistics about the input data. diff --git a/man/mlr.1 b/man/mlr.1 index 583b5dc11f..4e1dc9ca3b 100644 --- a/man/mlr.1 +++ b/man/mlr.1 @@ -214,12 +214,13 @@ for all things with "map" in their names. .nf altkv bar bootstrap case cat check clean-whitespace count-distinct count count-similar cut decimate fill-down fill-empty filter flatten format-values -fraction gap grep group-by group-like having-fields head histogram json-parse -json-stringify join label latin1-to-utf8 least-frequent merge-fields -most-frequent nest nothing put regularize remove-empty-columns rename reorder -repeat reshape sample sec2gmtdate sec2gmt seqgen shuffle skip-trivial-records -sort sort-within-records split stats1 stats2 step summary tac tail tee -template top utf8-to-latin1 unflatten uniq unspace unsparsify +fraction gap grep group-by group-like gsub having-fields head histogram +json-parse json-stringify join label latin1-to-utf8 least-frequent +merge-fields most-frequent nest nothing put regularize remove-empty-columns +rename reorder repeat reshape sample sec2gmtdate sec2gmt seqgen shuffle +skip-trivial-records sort sort-within-records split ssub stats1 stats2 step +sub summary tac tail tee template top utf8-to-latin1 unflatten uniq unspace +unsparsify .fi .if n \{\ .RE @@ -1529,6 +1530,21 @@ Options: .fi .if n \{\ .RE +.SS "gsub" +.if n \{\ +.RS 0 +.\} +.nf +Usage: mlr gsub [options] +Replaces old string with new string in specified field(s), with regex support +for the old string and handling multiple matches, like the `gsub` DSL function. +See also the `sub` and `ssub` verbs. +Options: +-f {a,b,c} Field names to convert. +-h|--help Show this message. +.fi +.if n \{\ +.RE .SS "having-fields" .if n \{\ .RS 0 @@ -2311,6 +2327,20 @@ See also the "tee" DSL function which lets you do more ad-hoc customization. .fi .if n \{\ .RE +.SS "ssub" +.if n \{\ +.RS 0 +.\} +.nf +Usage: mlr ssub [options] +Replaces old string with new string in specified field(s), without regex support for +the old string, like the `ssub` DSL function. See also the `gsub` and `sub` verbs. +Options: +-f {a,b,c} Field names to convert. +-h|--help Show this message. +.fi +.if n \{\ +.RE .SS "stats1" .if n \{\ .RS 0 @@ -2466,6 +2496,21 @@ for more information on EWMA. .fi .if n \{\ .RE +.SS "sub" +.if n \{\ +.RS 0 +.\} +.nf +Usage: mlr sub [options] +Replaces old string with new string in specified field(s), with regex support +for the old string and not handling multiple matches, like the `sub` DSL function. +See also the `gsub` and `ssub` verbs. +Options: +-f {a,b,c} Field names to convert. +-h|--help Show this message. +.fi +.if n \{\ +.RE .SS "summary" .if n \{\ .RS 0 From a84732720613b82da8bb2780fbf261c3e47dbfa6 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sat, 19 Aug 2023 16:26:42 -0400 Subject: [PATCH 3/4] content for verbs reference page --- docs/src/reference-verbs.md | 146 +++++++++++++++++++++++++++++++++ docs/src/reference-verbs.md.in | 42 ++++++++++ 2 files changed, 188 insertions(+) diff --git a/docs/src/reference-verbs.md b/docs/src/reference-verbs.md index a9abbcfe56..6e9fbb4780 100644 --- a/docs/src/reference-verbs.md +++ b/docs/src/reference-verbs.md @@ -1447,6 +1447,55 @@ record_count resource 150 /path/to/second/file +## gsub + +
+mlr gsub -h
+
+
+Usage: mlr gsub [options]
+Replaces old string with new string in specified field(s), with regex support
+for the old string and handling multiple matches, like the `gsub` DSL function.
+See also the `sub` and `ssub` verbs.
+Options:
+-f {a,b,c}  Field names to convert.
+-h|--help   Show this message.
+
+ +
+mlr --icsv --opprint --from example.csv cat --filename then sub -f color,shape l X
+
+
+filename    color  shape    flag  k  index quantity rate
+example.csv yeXlow triangXe true  1  11    43.6498  9.8870
+example.csv red    square   true  2  15    79.2778  0.0130
+example.csv red    circXe   true  3  16    13.8103  2.9010
+example.csv red    square   false 4  48    77.5542  7.4670
+example.csv purpXe triangXe false 5  51    81.2290  8.5910
+example.csv red    square   false 6  64    77.1991  9.5310
+example.csv purpXe triangXe false 7  65    80.1405  5.8240
+example.csv yeXlow circXe   true  8  73    63.9785  4.2370
+example.csv yeXlow circXe   true  9  87    63.5058  8.3350
+example.csv purpXe square   false 10 91    72.3735  8.2430
+
+ +
+mlr --icsv --opprint --from example.csv cat --filename then gsub -f color,shape l X
+
+
+filename    color  shape    flag  k  index quantity rate
+example.csv yeXXow triangXe true  1  11    43.6498  9.8870
+example.csv red    square   true  2  15    79.2778  0.0130
+example.csv red    circXe   true  3  16    13.8103  2.9010
+example.csv red    square   false 4  48    77.5542  7.4670
+example.csv purpXe triangXe false 5  51    81.2290  8.5910
+example.csv red    square   false 6  64    77.1991  9.5310
+example.csv purpXe triangXe false 7  65    80.1405  5.8240
+example.csv yeXXow circXe   true  8  73    63.9785  4.2370
+example.csv yeXXow circXe   true  9  87    63.5058  8.3350
+example.csv purpXe square   false 10 91    72.3735  8.2430
+
+ ## having-fields
@@ -3120,6 +3169,54 @@ then there will be split_yellow_triangle.csv, split_yellow_square.csv, etc.
 See also the "tee" DSL function which lets you do more ad-hoc customization.
 
+## ssub + +
+mlr ssub -h
+
+
+Usage: mlr ssub [options]
+Replaces old string with new string in specified field(s), without regex support for
+the old string, like the `ssub` DSL function. See also the `gsub` and `sub` verbs.
+Options:
+-f {a,b,c}  Field names to convert.
+-h|--help   Show this message.
+
+ +
+mlr --icsv --opprint --from example.csv cat --filename then sub -f filename . o
+
+
+filename    color  shape    flag  k  index quantity rate
+oxample.csv yellow triangle true  1  11    43.6498  9.8870
+oxample.csv red    square   true  2  15    79.2778  0.0130
+oxample.csv red    circle   true  3  16    13.8103  2.9010
+oxample.csv red    square   false 4  48    77.5542  7.4670
+oxample.csv purple triangle false 5  51    81.2290  8.5910
+oxample.csv red    square   false 6  64    77.1991  9.5310
+oxample.csv purple triangle false 7  65    80.1405  5.8240
+oxample.csv yellow circle   true  8  73    63.9785  4.2370
+oxample.csv yellow circle   true  9  87    63.5058  8.3350
+oxample.csv purple square   false 10 91    72.3735  8.2430
+
+ +
+mlr --icsv --opprint --from example.csv cat --filename then ssub -f filename . o
+
+
+filename    color  shape    flag  k  index quantity rate
+exampleocsv yellow triangle true  1  11    43.6498  9.8870
+exampleocsv red    square   true  2  15    79.2778  0.0130
+exampleocsv red    circle   true  3  16    13.8103  2.9010
+exampleocsv red    square   false 4  48    77.5542  7.4670
+exampleocsv purple triangle false 5  51    81.2290  8.5910
+exampleocsv red    square   false 6  64    77.1991  9.5310
+exampleocsv purple triangle false 7  65    80.1405  5.8240
+exampleocsv yellow circle   true  8  73    63.9785  4.2370
+exampleocsv yellow circle   true  9  87    63.5058  8.3350
+exampleocsv purple square   false 10 91    72.3735  8.2430
+
+ ## stats1
@@ -3574,6 +3671,55 @@ $ each 10 uptime | mlr -p step -a delta -f 11
 
 
+## sub + +
+mlr sub -h
+
+
+Usage: mlr sub [options]
+Replaces old string with new string in specified field(s), with regex support
+for the old string and not handling multiple matches, like the `sub` DSL function.
+See also the `gsub` and `ssub` verbs.
+Options:
+-f {a,b,c}  Field names to convert.
+-h|--help   Show this message.
+
+ +
+mlr --icsv --opprint --from example.csv cat --filename then sub -f color,shape l X
+
+
+filename    color  shape    flag  k  index quantity rate
+example.csv yeXlow triangXe true  1  11    43.6498  9.8870
+example.csv red    square   true  2  15    79.2778  0.0130
+example.csv red    circXe   true  3  16    13.8103  2.9010
+example.csv red    square   false 4  48    77.5542  7.4670
+example.csv purpXe triangXe false 5  51    81.2290  8.5910
+example.csv red    square   false 6  64    77.1991  9.5310
+example.csv purpXe triangXe false 7  65    80.1405  5.8240
+example.csv yeXlow circXe   true  8  73    63.9785  4.2370
+example.csv yeXlow circXe   true  9  87    63.5058  8.3350
+example.csv purpXe square   false 10 91    72.3735  8.2430
+
+ +
+mlr --icsv --opprint --from example.csv cat --filename then gsub -f color,shape l X
+
+
+filename    color  shape    flag  k  index quantity rate
+example.csv yeXXow triangXe true  1  11    43.6498  9.8870
+example.csv red    square   true  2  15    79.2778  0.0130
+example.csv red    circXe   true  3  16    13.8103  2.9010
+example.csv red    square   false 4  48    77.5542  7.4670
+example.csv purpXe triangXe false 5  51    81.2290  8.5910
+example.csv red    square   false 6  64    77.1991  9.5310
+example.csv purpXe triangXe false 7  65    80.1405  5.8240
+example.csv yeXXow circXe   true  8  73    63.9785  4.2370
+example.csv yeXXow circXe   true  9  87    63.5058  8.3350
+example.csv purpXe square   false 10 91    72.3735  8.2430
+
+ ## summary
diff --git a/docs/src/reference-verbs.md.in b/docs/src/reference-verbs.md.in
index 0ff0bd15dd..44feda3deb 100644
--- a/docs/src/reference-verbs.md.in
+++ b/docs/src/reference-verbs.md.in
@@ -487,6 +487,20 @@ GENMD-RUN-COMMAND
 mlr --opprint group-like data/het.dkvp
 GENMD-EOF
 
+## gsub
+
+GENMD-RUN-COMMAND
+mlr gsub -h
+GENMD-EOF
+
+GENMD-RUN-COMMAND
+mlr --icsv --opprint --from example.csv cat --filename then sub -f color,shape l X
+GENMD-EOF
+
+GENMD-RUN-COMMAND
+mlr --icsv --opprint --from example.csv cat --filename then gsub -f color,shape l X
+GENMD-EOF
+
 ## having-fields
 
 GENMD-RUN-COMMAND
@@ -987,6 +1001,20 @@ GENMD-RUN-COMMAND
 mlr split --help
 GENMD-EOF
 
+## ssub
+
+GENMD-RUN-COMMAND
+mlr ssub -h
+GENMD-EOF
+
+GENMD-RUN-COMMAND
+mlr --icsv --opprint --from example.csv cat --filename then sub -f filename . o
+GENMD-EOF
+
+GENMD-RUN-COMMAND
+mlr --icsv --opprint --from example.csv cat --filename then ssub -f filename . o
+GENMD-EOF
+
 ## stats1
 
 GENMD-RUN-COMMAND
@@ -1095,6 +1123,20 @@ Example deriving uptime-delta from system uptime:
 
 GENMD-INCLUDE-ESCAPED(data/ping-delta-example.txt)
 
+## sub
+
+GENMD-RUN-COMMAND
+mlr sub -h
+GENMD-EOF
+
+GENMD-RUN-COMMAND
+mlr --icsv --opprint --from example.csv cat --filename then sub -f color,shape l X
+GENMD-EOF
+
+GENMD-RUN-COMMAND
+mlr --icsv --opprint --from example.csv cat --filename then gsub -f color,shape l X
+GENMD-EOF
+
 ## summary
 
 GENMD-RUN-COMMAND

From 182b0eb9da1f6d1913b9d2469ae288bc737a99cc Mon Sep 17 00:00:00 2001
From: John Kerl 
Date: Sat, 19 Aug 2023 16:39:10 -0400
Subject: [PATCH 4/4] test/cases/verb-sub-gsub-ssub/

---
 test/cases/verb-sub-gsub-ssub/0001/cmd    |  1 +
 test/cases/verb-sub-gsub-ssub/0001/experr |  0
 test/cases/verb-sub-gsub-ssub/0001/expout | 11 +++++++++++
 test/cases/verb-sub-gsub-ssub/0002/cmd    |  1 +
 test/cases/verb-sub-gsub-ssub/0002/experr |  0
 test/cases/verb-sub-gsub-ssub/0002/expout | 11 +++++++++++
 test/cases/verb-sub-gsub-ssub/0003/cmd    |  1 +
 test/cases/verb-sub-gsub-ssub/0003/experr |  0
 test/cases/verb-sub-gsub-ssub/0003/expout | 11 +++++++++++
 test/cases/verb-sub-gsub-ssub/0004/cmd    |  1 +
 test/cases/verb-sub-gsub-ssub/0004/experr |  0
 test/cases/verb-sub-gsub-ssub/0004/expout | 11 +++++++++++
 12 files changed, 48 insertions(+)
 create mode 100644 test/cases/verb-sub-gsub-ssub/0001/cmd
 create mode 100644 test/cases/verb-sub-gsub-ssub/0001/experr
 create mode 100644 test/cases/verb-sub-gsub-ssub/0001/expout
 create mode 100644 test/cases/verb-sub-gsub-ssub/0002/cmd
 create mode 100644 test/cases/verb-sub-gsub-ssub/0002/experr
 create mode 100644 test/cases/verb-sub-gsub-ssub/0002/expout
 create mode 100644 test/cases/verb-sub-gsub-ssub/0003/cmd
 create mode 100644 test/cases/verb-sub-gsub-ssub/0003/experr
 create mode 100644 test/cases/verb-sub-gsub-ssub/0003/expout
 create mode 100644 test/cases/verb-sub-gsub-ssub/0004/cmd
 create mode 100644 test/cases/verb-sub-gsub-ssub/0004/experr
 create mode 100644 test/cases/verb-sub-gsub-ssub/0004/expout

diff --git a/test/cases/verb-sub-gsub-ssub/0001/cmd b/test/cases/verb-sub-gsub-ssub/0001/cmd
new file mode 100644
index 0000000000..7d4cec775c
--- /dev/null
+++ b/test/cases/verb-sub-gsub-ssub/0001/cmd
@@ -0,0 +1 @@
+mlr --d2p --from test/input/abixy sub  -f a,b e X
diff --git a/test/cases/verb-sub-gsub-ssub/0001/experr b/test/cases/verb-sub-gsub-ssub/0001/experr
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/test/cases/verb-sub-gsub-ssub/0001/expout b/test/cases/verb-sub-gsub-ssub/0001/expout
new file mode 100644
index 0000000000..917c3f5ed6
--- /dev/null
+++ b/test/cases/verb-sub-gsub-ssub/0001/expout
@@ -0,0 +1,11 @@
+a   b   i  x          y
+pan pan 1  0.34679014 0.72680286
+Xks pan 2  0.75867996 0.52215111
+wyX wyX 3  0.20460331 0.33831853
+Xks wyX 4  0.38139939 0.13418874
+wyX pan 5  0.57328892 0.86362447
+zXe pan 6  0.52712616 0.49322129
+Xks zXe 7  0.61178406 0.18788492
+zXe wyX 8  0.59855401 0.97618139
+hat wyX 9  0.03144188 0.74955076
+pan wyX 10 0.50262601 0.95261836
diff --git a/test/cases/verb-sub-gsub-ssub/0002/cmd b/test/cases/verb-sub-gsub-ssub/0002/cmd
new file mode 100644
index 0000000000..f33200891d
--- /dev/null
+++ b/test/cases/verb-sub-gsub-ssub/0002/cmd
@@ -0,0 +1 @@
+mlr --d2p --from test/input/abixy gsub -f a,b e X
diff --git a/test/cases/verb-sub-gsub-ssub/0002/experr b/test/cases/verb-sub-gsub-ssub/0002/experr
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/test/cases/verb-sub-gsub-ssub/0002/expout b/test/cases/verb-sub-gsub-ssub/0002/expout
new file mode 100644
index 0000000000..49d53727b3
--- /dev/null
+++ b/test/cases/verb-sub-gsub-ssub/0002/expout
@@ -0,0 +1,11 @@
+a   b   i  x          y
+pan pan 1  0.34679014 0.72680286
+Xks pan 2  0.75867996 0.52215111
+wyX wyX 3  0.20460331 0.33831853
+Xks wyX 4  0.38139939 0.13418874
+wyX pan 5  0.57328892 0.86362447
+zXX pan 6  0.52712616 0.49322129
+Xks zXX 7  0.61178406 0.18788492
+zXX wyX 8  0.59855401 0.97618139
+hat wyX 9  0.03144188 0.74955076
+pan wyX 10 0.50262601 0.95261836
diff --git a/test/cases/verb-sub-gsub-ssub/0003/cmd b/test/cases/verb-sub-gsub-ssub/0003/cmd
new file mode 100644
index 0000000000..ff6b15c4ac
--- /dev/null
+++ b/test/cases/verb-sub-gsub-ssub/0003/cmd
@@ -0,0 +1 @@
+mlr --d2p --from test/input/abixy sub  -f a,b . X
diff --git a/test/cases/verb-sub-gsub-ssub/0003/experr b/test/cases/verb-sub-gsub-ssub/0003/experr
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/test/cases/verb-sub-gsub-ssub/0003/expout b/test/cases/verb-sub-gsub-ssub/0003/expout
new file mode 100644
index 0000000000..a8b8e86432
--- /dev/null
+++ b/test/cases/verb-sub-gsub-ssub/0003/expout
@@ -0,0 +1,11 @@
+a   b   i  x          y
+Xan Xan 1  0.34679014 0.72680286
+Xks Xan 2  0.75867996 0.52215111
+Xye Xye 3  0.20460331 0.33831853
+Xks Xye 4  0.38139939 0.13418874
+Xye Xan 5  0.57328892 0.86362447
+Xee Xan 6  0.52712616 0.49322129
+Xks Xee 7  0.61178406 0.18788492
+Xee Xye 8  0.59855401 0.97618139
+Xat Xye 9  0.03144188 0.74955076
+Xan Xye 10 0.50262601 0.95261836
diff --git a/test/cases/verb-sub-gsub-ssub/0004/cmd b/test/cases/verb-sub-gsub-ssub/0004/cmd
new file mode 100644
index 0000000000..8770d578d5
--- /dev/null
+++ b/test/cases/verb-sub-gsub-ssub/0004/cmd
@@ -0,0 +1 @@
+mlr --d2p --from test/input/abixy ssub -f a,b e X
diff --git a/test/cases/verb-sub-gsub-ssub/0004/experr b/test/cases/verb-sub-gsub-ssub/0004/experr
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/test/cases/verb-sub-gsub-ssub/0004/expout b/test/cases/verb-sub-gsub-ssub/0004/expout
new file mode 100644
index 0000000000..917c3f5ed6
--- /dev/null
+++ b/test/cases/verb-sub-gsub-ssub/0004/expout
@@ -0,0 +1,11 @@
+a   b   i  x          y
+pan pan 1  0.34679014 0.72680286
+Xks pan 2  0.75867996 0.52215111
+wyX wyX 3  0.20460331 0.33831853
+Xks wyX 4  0.38139939 0.13418874
+wyX pan 5  0.57328892 0.86362447
+zXe pan 6  0.52712616 0.49322129
+Xks zXe 7  0.61178406 0.18788492
+zXe wyX 8  0.59855401 0.97618139
+hat wyX 9  0.03144188 0.74955076
+pan wyX 10 0.50262601 0.95261836