Skip to content

Commit

Permalink
Use int8 for mvtype (memory reduction)
Browse files Browse the repository at this point in the history
  • Loading branch information
johnkerl committed Nov 26, 2022
1 parent a299ce2 commit 9b8d65c
Show file tree
Hide file tree
Showing 17 changed files with 368 additions and 347 deletions.
23 changes: 12 additions & 11 deletions internal/pkg/bifs/cmp.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@ import (
"github.com/johnkerl/miller/internal/pkg/mlrval"
)

// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
//
// string_cmp implements the spaceship operator for strings.
func string_cmp(a, b string) int64 {
if a < b {
Expand Down Expand Up @@ -43,7 +44,7 @@ func float_cmp(a, b float64) int64 {
return 0
}

// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
func eq_b_ss(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval {
return mlrval.FromBool(input1.AcquireStringValue() == input2.AcquireStringValue())
}
Expand All @@ -66,7 +67,7 @@ func cmp_b_ss(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval {
return mlrval.FromInt(int64(string_cmp(input1.AcquireStringValue(), input2.AcquireStringValue())))
}

// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
func eq_b_xs(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval {
return mlrval.FromBool(input1.String() == input2.AcquireStringValue())
}
Expand All @@ -89,7 +90,7 @@ func cmp_b_xs(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval {
return mlrval.FromInt(int64(string_cmp(input1.String(), input2.AcquireStringValue())))
}

// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
func eq_b_sx(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval {
return mlrval.FromBool(input1.AcquireStringValue() == input2.String())
}
Expand All @@ -112,7 +113,7 @@ func cmp_b_sx(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval {
return mlrval.FromInt(string_cmp(input1.AcquireStringValue(), input2.String()))
}

// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
func eq_b_ii(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval {
return mlrval.FromBool(input1.AcquireIntValue() == input2.AcquireIntValue())
}
Expand All @@ -135,7 +136,7 @@ func cmp_b_ii(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval {
return mlrval.FromInt(int_cmp(input1.AcquireIntValue(), input2.AcquireIntValue()))
}

// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
func eq_b_if(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval {
return mlrval.FromBool(float64(input1.AcquireIntValue()) == input2.AcquireFloatValue())
}
Expand All @@ -158,7 +159,7 @@ func cmp_b_if(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval {
return mlrval.FromInt(float_cmp(float64(input1.AcquireIntValue()), input2.AcquireFloatValue()))
}

// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
func eq_b_fi(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval {
return mlrval.FromBool(input1.AcquireFloatValue() == float64(input2.AcquireIntValue()))
}
Expand All @@ -181,7 +182,7 @@ func cmp_b_fi(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval {
return mlrval.FromInt(float_cmp(input1.AcquireFloatValue(), float64(input2.AcquireIntValue())))
}

// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
func eq_b_ff(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval {
return mlrval.FromBool(input1.AcquireFloatValue() == input2.AcquireFloatValue())
}
Expand All @@ -204,7 +205,7 @@ func cmp_b_ff(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval {
return mlrval.FromInt(float_cmp(input1.AcquireFloatValue(), input2.AcquireFloatValue()))
}

// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
func eq_b_bb(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval {
return mlrval.FromBool(input1.AcquireBoolValue() == input2.AcquireBoolValue())
}
Expand All @@ -231,7 +232,7 @@ func cmp_b_bb(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval {
return mlrval.FromInt(int_cmp(lib.BoolToInt(input1.AcquireBoolValue()), lib.BoolToInt(input2.AcquireBoolValue())))
}

// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
func eq_b_aa(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval {
a := input1.AcquireArrayValue()
b := input2.AcquireArrayValue()
Expand All @@ -257,7 +258,7 @@ func ne_b_aa(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval {
return mlrval.FromBool(!output.AcquireBoolValue())
}

// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
func eq_b_mm(input1, input2 *mlrval.Mlrval) *mlrval.Mlrval {
return mlrval.FromBool(input1.AcquireMapValue().Equals(input2.AcquireMapValue()))
}
Expand Down
22 changes: 11 additions & 11 deletions internal/pkg/bifs/collections.go
Original file line number Diff line number Diff line change
Expand Up @@ -867,17 +867,17 @@ func unaliasArrayLengthIndex(n int, mindex int) (int, bool) {
}

// MillerSliceAccess is code shared by the string-slicer and the array-slicer.
// * Miller indices are 1-up, 1..n where n is the length of the array/string.
// They are also aliased -n..-1. These are called "mindex" (if int) or "index mlrval"
// (if mlrval).
// * Go indices are 0-up, with no aliasing. These are called "zindex".
// * The job of this routine is to map a pair of index-mlrval to a pair of zindex,
// with possible outcomes that the slice access should result in an empty array/string,
// or Mlrval of type absent, or Mlrval of type error.
// * Callsites include the DSL array-slicer (e.g. [1,2,3,4,5][2:3]), the DSL string-slicer
// (e.g. "abcde"[2:3]), the substr1 function (e.g. substr1("abcde", 2, 3), and the substr0
// function (e.g. substr0("abcde", 1, 2)).
// * The isZeroUp argument is in support of substr0.
// - Miller indices are 1-up, 1..n where n is the length of the array/string.
// They are also aliased -n..-1. These are called "mindex" (if int) or "index mlrval"
// (if mlrval).
// - Go indices are 0-up, with no aliasing. These are called "zindex".
// - The job of this routine is to map a pair of index-mlrval to a pair of zindex,
// with possible outcomes that the slice access should result in an empty array/string,
// or Mlrval of type absent, or Mlrval of type error.
// - Callsites include the DSL array-slicer (e.g. [1,2,3,4,5][2:3]), the DSL string-slicer
// (e.g. "abcde"[2:3]), the substr1 function (e.g. substr1("abcde", 2, 3), and the substr0
// function (e.g. substr0("abcde", 1, 2)).
// - The isZeroUp argument is in support of substr0.
func MillerSliceAccess(
lowerIndexMlrval *mlrval.Mlrval,
upperIndexMlrval *mlrval.Mlrval,
Expand Down
14 changes: 7 additions & 7 deletions internal/pkg/cli/flag_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,13 +50,13 @@ import (
// Data types used within the flags table.

// FlagParser is a function which takes a flag such as `--foo`.
// * It should assume that a flag.Owns method has already been invoked to be
// sure that this function is indeed the right one to call for `--foo`.
// * The FlagParser function is responsible for advancing *pargi by 1 (if
// `--foo`) or 2 (if `--foo bar`), checking to see if argc is long enough in
// the latter case, and mutating the options struct.
// * Successful handling of the flag is indicated by this function making a
// non-zero increment of *pargi.
// - It should assume that a flag.Owns method has already been invoked to be
// sure that this function is indeed the right one to call for `--foo`.
// - The FlagParser function is responsible for advancing *pargi by 1 (if
// `--foo`) or 2 (if `--foo bar`), checking to see if argc is long enough in
// the latter case, and mutating the options struct.
// - Successful handling of the flag is indicated by this function making a
// non-zero increment of *pargi.
type FlagParser func(
args []string,
argc int,
Expand Down
16 changes: 8 additions & 8 deletions internal/pkg/cli/option_parse.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,14 @@ import (
)

// FinalizeReaderOptions does a few things.
// * If a file format was specified but one or more separators were not, a
// default specific to that file format is applied.
// * Computing regexes for IPS and IFS, and unbackslashing IRS. This is
// because the '\n' at the command line which is Go "\\n" (a backslash and an
// n) needs to become the single newline character, and likewise for "\t", etc.
// * IFS/IPS can have escapes like "\x1f" which aren't valid regex literals
// so we unhex them. For example, from "\x1f" -- the four bytes '\', 'x', '1', 'f'
// -- to the single byte with hex code 0x1f.
// - If a file format was specified but one or more separators were not, a
// default specific to that file format is applied.
// - Computing regexes for IPS and IFS, and unbackslashing IRS. This is
// because the '\n' at the command line which is Go "\\n" (a backslash and an
// n) needs to become the single newline character, and likewise for "\t", etc.
// - IFS/IPS can have escapes like "\x1f" which aren't valid regex literals
// so we unhex them. For example, from "\x1f" -- the four bytes '\', 'x', '1', 'f'
// -- to the single byte with hex code 0x1f.
func FinalizeReaderOptions(readerOptions *TReaderOptions) {

readerOptions.IFS = lib.UnhexStringLiteral(readerOptions.IFS)
Expand Down
14 changes: 9 additions & 5 deletions internal/pkg/climain/mlrcli_shebang.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,16 @@ import (
)

// maybeInterpolateDashS supports Miller scripts with shebang lines like
// #!/usr/bin/env mlr -s
// --csv tac then filter '
// NR % 2 == 1
// '
//
// #!/usr/bin/env mlr -s
// --csv tac then filter '
// NR % 2 == 1
// '
//
// invoked as
// scriptfile input1.csv input2.csv
//
// scriptfile input1.csv input2.csv
//
// The "-s" flag must be the very first command-line argument after "mlr" for
// two reasons:
// * This is how shebang lines work
Expand Down
18 changes: 9 additions & 9 deletions internal/pkg/dsl/cst/builtin_functions.go
Original file line number Diff line number Diff line change
Expand Up @@ -385,21 +385,21 @@ func (node *TernaryFunctionWithStateCallsiteNode) Evaluate(
//
// Note the use of "capture" is ambiguous:
//
// * There is the regex-match part which captures submatches out
// of a full match expression, and saves them.
// - There is the regex-match part which captures submatches out
// of a full match expression, and saves them.
//
// * Then there is the part which inserts these captures into another string.
//
// * For sub/gsub, the former and latter are both within the sub/gsub routine.
// E.g. with
// - For sub/gsub, the former and latter are both within the sub/gsub routine.
// E.g. with
// $y = sub($x, "(..)_(...)", "\2:\1"
// and $x being "ab_cde", $y will be "cde:ab".
// and $x being "ab_cde", $y will be "cde:ab".
//
// * For =~ and !=~, the former are right there, but the latter can be several
// lines later. E.g.
// - For =~ and !=~, the former are right there, but the latter can be several
// lines later. E.g.
// if ($x =~ "(..)_(...)") {
// ... other lines of code ...
// $y = "\2:\1";
// ... other lines of code ...
// $y = "\2:\1";
// }
//
// So: this RegexCaptureBinaryFunctionCallsiteNode only refers to the =~ and
Expand Down
16 changes: 8 additions & 8 deletions internal/pkg/dsl/cst/leaves.go
Original file line number Diff line number Diff line change
Expand Up @@ -244,10 +244,10 @@ type StringLiteralNode struct {
// "\9" in it. As of the original design of Miller, submatches are captured
// in one place and interpolated in another. For example:
//
// if ($x =~ "(..)_(...)" {
// ... other lines of code ...
// $y = "\2:\1";
// }
// if ($x =~ "(..)_(...)" {
// ... other lines of code ...
// $y = "\2:\1";
// }
//
// This node type is for things like "\2:\1". They can occur quite far from the
// =~ callsite so we need to check all string literals to see if they have "\0"
Expand Down Expand Up @@ -287,10 +287,10 @@ func (node *StringLiteralNode) Evaluate(

// As noted above, in things like
//
// if ($x =~ "(..)_(...)" {
// ... other lines of code ...
// $y = "\2:\1";
// }
// if ($x =~ "(..)_(...)" {
// ... other lines of code ...
// $y = "\2:\1";
// }
//
// the captures can be set (by =~ or !=~) quite far from where they are used.
// This is why we consult the state.RegexCaptures here, to see if they've been
Expand Down
12 changes: 6 additions & 6 deletions internal/pkg/dsl/cst/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,12 @@ import (
// Namely, for "bare booleans" which are non-assignment statements like 'NR >
// 10' or 'true' or '$x =~ "(..)_(...)" or even '1+2'.
//
// * For mlr put, bare booleans are no-ops; except side-effects (like
// regex-captures)
// * For mlr filter, they set the filter condition only if they're the last
// statement in the main block.
// * For mlr repl, similar to mlr filter: they are used to track the output to
// be printed for an expression entered at the REPL prompt.
// - For mlr put, bare booleans are no-ops; except side-effects (like
// regex-captures)
// - For mlr filter, they set the filter condition only if they're the last
// statement in the main block.
// - For mlr repl, similar to mlr filter: they are used to track the output to
// be printed for an expression entered at the REPL prompt.
type DSLInstanceType int

const (
Expand Down
14 changes: 7 additions & 7 deletions internal/pkg/dsl/cst/validate.go
Original file line number Diff line number Diff line change
Expand Up @@ -272,13 +272,13 @@ func validateForLoopTwoVariableUniqueNames(astNode *dsl.ASTNode) error {
// Check against 'for ((a,a), b in $*)' or 'for ((a,b), a in $*)' -- repeated 'a'.
// AST:
// * statement block
// * multi-variable for-loop "for"
// * parameter list
// * local variable "a"
// * local variable "b"
// * local variable "a"
// * full record "$*"
// * statement block
// - multi-variable for-loop "for"
// - parameter list
// - local variable "a"
// - local variable "b"
// - local variable "a"
// - full record "$*"
// - statement block
func validateForLoopMultivariableUniqueNames(astNode *dsl.ASTNode) error {
lib.InternalCodingErrorIf(astNode.Type != dsl.NodeTypeForLoopMultivariable)
keyVarsNode := astNode.Children[0]
Expand Down
12 changes: 6 additions & 6 deletions internal/pkg/input/record_reader_xtab.go
Original file line number Diff line number Diff line change
Expand Up @@ -124,13 +124,13 @@ func (reader *RecordReaderXTAB) processHandle(

// Given input like
//
// a 1
// b 2
// c 3
// a 1
// b 2
// c 3
//
// a 4
// b 5
// c 6
// a 4
// b 5
// c 6
//
// this function reads the input stream a line at a time, then produces
// string-lists one per stanza where a stanza is delimited by blank line, or
Expand Down
30 changes: 15 additions & 15 deletions internal/pkg/lib/regex.go
Original file line number Diff line number Diff line change
Expand Up @@ -145,16 +145,16 @@ func RegexReplacementHasCaptures(
// RegexMatches implements the =~ DSL operator. The captures are stored in DSL
// state and may be used by a DSL statement after the =~. For example, in
//
// sub($a, "(..)_(...)", "\1:\2")
// sub($a, "(..)_(...)", "\1:\2")
//
// the replacement string is an argument to sub and therefore the captures are
// confined to the implementation of the sub function. Similarly for gsub. But
// for the match operator, people can do
//
// if ($x =~ "(..)_(...)") {
// ... other lines of code ...
// $y = "\2:\1"
// }
// if ($x =~ "(..)_(...)") {
// ... other lines of code ...
// $y = "\2:\1"
// }
//
// and the =~ callsite doesn't know if captures will be used or not. So,
// RegexMatches always returns the captures array. It is stored within the CST
Expand Down Expand Up @@ -229,18 +229,18 @@ func RegexMatchesCompiled(
}

// InterpolateCaptures example:
// * Input $x is "ab_cde"
// * DSL expression
// - Input $x is "ab_cde"
// - DSL expression
// if ($x =~ "(..)_(...)") {
// ... other lines of code ...
// $y = "\2:\1";
// ... other lines of code ...
// $y = "\2:\1";
// }
// * InterpolateCaptures is used on the evaluation of "\2:\1"
// * replacementString is "\2:\1"
// * replacementMatrix contains precomputed/cached offsets for the "\2" and
// "\1" substrings within "\2:\1"
// * captures has slot 0 being "ab_cde" (for "\0"), slot 1 being "ab" (for "\1"),
// slot 2 being "cde" (for "\2"), and slots 3-9 being "".
// - InterpolateCaptures is used on the evaluation of "\2:\1"
// - replacementString is "\2:\1"
// - replacementMatrix contains precomputed/cached offsets for the "\2" and
// "\1" substrings within "\2:\1"
// - captures has slot 0 being "ab_cde" (for "\0"), slot 1 being "ab" (for "\1"),
// slot 2 being "cde" (for "\2"), and slots 3-9 being "".
func InterpolateCaptures(
replacementString string,
replacementMatrix [][]int,
Expand Down
12 changes: 6 additions & 6 deletions internal/pkg/mlrval/mlrmap_accessors.go
Original file line number Diff line number Diff line change
Expand Up @@ -208,12 +208,12 @@ func (mlrmap *Mlrmap) findEntry(key string) *MlrmapEntry {
// findEntryByPositionalIndex is for '$[1]' etc. in the DSL.
//
// Notes:
// * This is a linear search.
// * Indices are 1-up not 0-up
// * Indices -n..-1 are aliases for 1..n. In particular, it will be faster to
// get the -1st field than the nth.
// * Returns 0 on invalid index: 0, or < -n, or > n where n is the number of
// fields.
// - This is a linear search.
// - Indices are 1-up not 0-up
// - Indices -n..-1 are aliases for 1..n. In particular, it will be faster to
// get the -1st field than the nth.
// - Returns 0 on invalid index: 0, or < -n, or > n where n is the number of
// fields.
func (mlrmap *Mlrmap) findEntryByPositionalIndex(position int64) *MlrmapEntry {
if position > mlrmap.FieldCount || position < -mlrmap.FieldCount || position == 0 {
return nil
Expand Down
Loading

0 comments on commit 9b8d65c

Please sign in to comment.