Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

addresses issue #310 #311

Merged
merged 4 commits into from
Jun 6, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion core_matcher.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,12 @@ func (m *coreMatcher) fields() *coreFields {
// addPattern - the patternBytes is a JSON text which must be an object. The X is what the matcher returns to indicate
// that the provided pattern has been matched. In many applications it might be a string which is the pattern's name.
func (m *coreMatcher) addPattern(x X, patternJSON string) error {
return m.addPatternWithPrinter(x, patternJSON, &nullPrinter{})
}

// addPatternWithPrinter can be called from debugging and under-development code to allow viewing pretty-printed
// NFAs
func (m *coreMatcher) addPatternWithPrinter(x X, patternJSON string, printer printer) error {
timbray marked this conversation as resolved.
Show resolved Hide resolved
patternFields, err := patternFromJSON([]byte(patternJSON))
if err != nil {
return err
Expand Down Expand Up @@ -97,7 +103,7 @@ func (m *coreMatcher) addPattern(x X, patternJSON string) error {
case existsFalseType:
ns = state.addExists(false, field)
default:
ns = state.addTransition(field)
ns = state.addTransition(field, printer)
}

nextStates = append(nextStates, ns...)
Expand Down
4 changes: 2 additions & 2 deletions field_matcher.go
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ func (m *fieldMatcher) addExists(exists bool, field *patternField) []*fieldMatch
return []*fieldMatcher{trans}
}

func (m *fieldMatcher) addTransition(field *patternField) []*fieldMatcher {
func (m *fieldMatcher) addTransition(field *patternField, printer printer) []*fieldMatcher {
timbray marked this conversation as resolved.
Show resolved Hide resolved
// we build the new updateable state in freshStart so that we can blast it in atomically once computed
current := m.fields()
freshStart := &fmFields{
Expand All @@ -119,7 +119,7 @@ func (m *fieldMatcher) addTransition(field *patternField) []*fieldMatcher {
// cases where this doesn't happen and reduce the number of fieldMatchStates
var nextFieldMatchers []*fieldMatcher
for _, val := range field.vals {
nextFieldMatchers = append(nextFieldMatchers, vm.addTransition(val))
nextFieldMatchers = append(nextFieldMatchers, vm.addTransition(val, printer))

// if the val is a number, let's add a transition on the canonicalized number
// TODO: Only do this if asked
Expand Down
67 changes: 4 additions & 63 deletions nfa.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,10 @@ func traverseOneFAStep(table *smallTable, index int, val []byte, transitions []*
return transitions
}
index++
// 1. Note no effort to traverse multiple next-steps in parallel. The traversal compute is tiny and the
// necessary concurrency apparatus would almost certainly outweigh it
// 2. TODO: It would probably be better to implement this iteratively rather than recursively.
// The recursion will potentially go as deep as the val argument is long.
for _, nextStep := range nextSteps.steps {
transitions = append(transitions, nextStep.fieldTransitions...)
transitions = traverseOneFAStep(nextStep.table, index, val, transitions)
Expand Down Expand Up @@ -101,66 +105,3 @@ func mergeFAStates(state1, state2 *faState, keyMemo map[faStepKey]*faState) *faS

return combined
}

/**************************************/
/* debugging apparatus from here down */
/**************************************/
/*
func (t *smallTable) dump() string {
return dump1(&faState{table: t}, 0, make(map[*smallTable]bool))
}
func dump1(fas *faState, indent int, already map[*smallTable]bool) string {
t := fas.table
s := " " + st2(t) + "\n"
for _, step := range t.steps {
if step != nil {
for _, state := range step.steps {
_, ok := already[state.table]
if !ok {
already[state.table] = true
s += dump1(state, indent+1, already)
}
}
}
}
return s
}
func (t *smallTable) shortDump() string {
return fmt.Sprintf("%d-%s", t.serial, t.label)
}

func (n *faNext) String() string {
var snames []string
for _, step := range n.steps {
snames = append(snames, fmt.Sprintf("%d %s", step.table.serial, step.table.label))
}
return "[" + strings.Join(snames, " · ") + "]"
}

func stString(t *smallTable) string {
var rows []string

for i := range t.ceilings {
c := t.ceilings[i]
if i == 0 {
c = 0
} else {
if c != valueTerminator && c != byte(byteCeiling) {
c = t.ceilings[i-1]
}
}
var trailer string
if i == len(t.ceilings)-1 && c != valueTerminator && c != byte(byteCeiling) {
trailer = "…"
} else {
trailer = ""
}
if t.steps[i] != nil {
rows = append(rows, fmt.Sprintf("%s%s:%s ", branchChar(c), trailer, t.steps[i].String()))
} else {
rows = append(rows, fmt.Sprintf("%s%s:nil ", branchChar(c), trailer))
}
}
return fmt.Sprintf("s%d [%s] ", t.serial, t.label) + strings.Join(rows, "/ ")
}
*/
4 changes: 2 additions & 2 deletions nfa_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ func TestFocusedMerge(t *testing.T) {

for _, shellStyle := range shellStyles {
str := `"` + shellStyle + `"`
automaton, matcher := makeShellStyleAutomaton([]byte(str))
automaton, matcher := makeShellStyleAutomaton([]byte(str), &nullPrinter{})
automata = append(automata, automaton)
matchers = append(matchers, matcher)
}
Expand All @@ -76,7 +76,7 @@ func TestFocusedMerge(t *testing.T) {
s := statsAccum{
fmVisited: make(map[*fieldMatcher]bool),
vmVisited: make(map[*valueMatcher]bool),
stVisited: make(map[any]bool),
stVisited: make(map[*smallTable]bool),
}
faStats(merged, &s)
fmt.Println(s.stStats())
Expand Down
158 changes: 158 additions & 0 deletions prettyprinter.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
package quamina

import (
"fmt"
"math/rand"
"strings"
)

// printer is an interface used to generate representations of Quamina data structures to facilitate
// debugging and optimization. It's an interface rather than a type so that a null implementation can
// be provided for production that should incur very little performance cost.
type printer interface {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd suggest following what Google did when they developed the slog logging package. Instead of going with an interface type, they used a concrete struct with a specific interface handler type to avoid allocations: https://youtu.be/8rnI2xLrdeM?feature=shared&t=1336 (starts at that conversation)

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

tl;dr

// inspired by https://pkg.go.dev/golang.org/x/exp/slog#Logger
type Handler interface {
  Enabled() bool
  Handle(...)
}

type Printer struct {
  handler handler
}

func New() *Printer {...}

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, looking at this. Unless you're actively testing/debugging and using newPrettyPrinter() the only allocation currently in production code is in value_matcher:

		case shellStyleType:
			newFA, nextField = makeShellStyleAutomaton(valBytes, &nullPrinter{})

It dawns on me that if I put a global variable in pretty_printer.go like this:

var sharedNullPrinter = &nullPrinter{}

Then that line in value_matcher becomes:

		case shellStyleType:
			newFA, nextField = makeShellStyleAutomaton(valBytes, sharedNullPrinter)

Then there would be no allocations ever outside of when you're testing/debugging. Would that address your concern?

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also, I agree about documenting this stuff. Probably best to create a DEVELOPING.md or some such; is there a standard name for this kind of thing?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If this implementation is purely for developing, CONTRIBUTING.md might be common

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Left a comment in the place where I think alloc might happen? Ideally, we'd have a benchmark which shows the before/after here?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, CONTRIBUTING is the place.

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added a description to CONTRIBUTING in the latest commit. If that's OK I think we can resolve this conversation?

labelTable(table *smallTable, label string)
printNFA(table *smallTable) string
shortPrintNFA(table *smallTable) string
}

// nullPrinter is what the name says, a do-nothing implementation of the printer interface which ideally
// should consume close to zero CPU cycles.
type nullPrinter struct{}

const noPP = "prettyprinting not enabled"

func (*nullPrinter) labelTable(_ *smallTable, _ string) {
}
func (*nullPrinter) printNFA(_ *smallTable) string {
return noPP
}
func (*nullPrinter) shortPrintNFA(_ *smallTable) string {
return noPP
}

// prettyPrinter makes a human-readable representation of a NFA; each smallTable may be
// given a label and as a side effect will get a random 3-digit serial number. For an example
// of the output, see the functions TestPP and TestNullPP in prettyprinter_test.go
type prettyPrinter struct {
randInts rand.Source
tableLabels map[*smallTable]string
tableSerials map[*smallTable]uint
}

func newPrettyPrinter(seed int) *prettyPrinter {
return &prettyPrinter{
randInts: rand.NewSource(int64(seed)),
tableLabels: make(map[*smallTable]string),
tableSerials: make(map[*smallTable]uint),
}
}

func (pp *prettyPrinter) tableSerial(t *smallTable) uint {
return pp.tableSerials[t]
}
func (pp *prettyPrinter) tableLabel(t *smallTable) string {
return pp.tableLabels[t]
}

func (pp *prettyPrinter) labelTable(table *smallTable, label string) {
pp.tableLabels[table] = label
pp.tableSerials[table] = uint(pp.randInts.Int63()%500 + 500)
}

func (pp *prettyPrinter) printNFA(t *smallTable) string {
return pp.printNFAStep(&faState{table: t}, 0, make(map[*smallTable]bool))
}

func (pp *prettyPrinter) printNFAStep(fas *faState, indent int, already map[*smallTable]bool) string {
t := fas.table
trailer := "\n"
if len(fas.fieldTransitions) != 0 {
trailer = fmt.Sprintf(" [%d transition(s)]\n", len(fas.fieldTransitions))
}
s := " " + pp.printTable(t) + trailer
for _, step := range t.steps {
if step != nil {
for _, state := range step.steps {
_, ok := already[state.table]
if !ok {
already[state.table] = true
s += pp.printNFAStep(state, indent+1, already)
}
}
}
}
return s
}

func (pp *prettyPrinter) printTable(t *smallTable) string {
// going to build a string rep of a smallTable based on the unpacked form
// each line is going to be a range like
// 'c' .. 'e' => %X
// lines where the *faNext is nil are omitted
var rows []string
unpacked := unpackTable(t)

var rangeStart int
var b int

defTrans := unpacked[0]

for {
for b < len(unpacked) && unpacked[b] == nil {
b++
}
if b == len(unpacked) {
break
}
rangeStart = b
lastN := unpacked[b]
for b < len(unpacked) && unpacked[b] == lastN {
b++
}
if lastN != defTrans {
row := ""
if b == rangeStart+1 {
row += fmt.Sprintf("'%s'", branchChar(byte(rangeStart)))
} else {
row += fmt.Sprintf("'%s'…'%s'", branchChar(byte(rangeStart)), branchChar(byte(b-1)))
}
row += " → " + pp.nextString(lastN)
rows = append(rows, row)
}
}
serial := pp.tableSerial(t)
label := pp.tableLabel(t)
if defTrans != nil {
dtString := "★ → " + pp.nextString(defTrans)
return fmt.Sprintf("%d [%s] ", serial, label) + strings.Join(rows, " / ") + " / " + dtString
} else {
return fmt.Sprintf("%d [%s] ", serial, label) + strings.Join(rows, " / ")
}
}

func (pp *prettyPrinter) nextString(n *faNext) string {
var snames []string
for _, step := range n.steps {
snames = append(snames, fmt.Sprintf("%d %s",
pp.tableSerial(step.table), pp.tableLabel(step.table)))
}
return "[" + strings.Join(snames, " · ") + "]"
}

func branchChar(b byte) string {
switch b {
// TODO: Figure out how to test commented-out cases
// case 0:
// return "∅"
case valueTerminator:
return "ℵ"
// case byte(byteCeiling):
// return "♾️"
default:
return fmt.Sprintf("%c", b)
}
}

func (pp *prettyPrinter) shortPrintNFA(table *smallTable) string {
return fmt.Sprintf("%d-%s", pp.tableSerials[table], pp.tableLabels[table])
}
35 changes: 35 additions & 0 deletions prettyprinter_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
package quamina

import (
"testing"
)

func TestPP(t *testing.T) {
pp := newPrettyPrinter(1)
table, _ := makeShellStyleAutomaton([]byte(`"x*9"`), pp)
pp.labelTable(table, "START HERE")
wanted := ` 758 [START HERE] '"' → [910 on " at 0]
910 [on " at 0] 'x' → [821 gS at 2]
821 [gS at 2] '9' → [551 gX on 9 at 3] / ★ → [821 gS at 2]
551 [gX on 9 at 3] '"' → [937 on " at 4] / '9' → [551 gX on 9 at 3] / ★ → [821 gS at 2]
937 [on " at 4] '9' → [551 gX on 9 at 3] / 'ℵ' → [820 last step at 5] / ★ → [821 gS at 2]
820 [last step at 5] [1 transition(s)]
`
s := pp.printNFA(table)
if s != wanted {
t.Errorf("LONG: wanted\n<%s>\ngot\n<%s>\n", wanted, s)
}
if pp.shortPrintNFA(table) != "758-START HERE" {
t.Errorf("SHORT: wanted <%s> got <%s>\n", "758-START HERE", pp.shortPrintNFA(table))
}
}

func TestNullPP(t *testing.T) {
np := &nullPrinter{}
table := newSmallTable()
table.addByteStep(3, &faNext{})
np.labelTable(table, "foo")
if np.printNFA(table) != noPP || np.shortPrintNFA(table) != noPP {
t.Error("didn't get noPP")
}
}
14 changes: 7 additions & 7 deletions shell_style.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ func readShellStyleSpecial(pb *patternBuild, valsIn []typedVal) (pathVals []type
}

// makeShellStyleAutomaton - recognize a "-delimited string containing one '*' glob.
func makeShellStyleAutomaton(val []byte) (start *smallTable, nextField *fieldMatcher) {
func makeShellStyleAutomaton(val []byte, printer printer) (start *smallTable, nextField *fieldMatcher) {
table := newSmallTable()
start = table
nextField = newFieldMatcher()
Expand All @@ -69,26 +69,26 @@ func makeShellStyleAutomaton(val []byte) (start *smallTable, nextField *fieldMat
if i == len(val)-2 {
step := &faState{table: newSmallTable(), fieldTransitions: []*fieldMatcher{nextField}}
table.setDefault(&faNext{steps: []*faState{step}})
//DEBUG step.table.label = fmt.Sprintf("prefix escape at %d", i)
printer.labelTable(table, fmt.Sprintf("prefix escape at %d", i))
return
}

// loop back on everything
globStep = &faState{table: table}
//DEBUG table.label = fmt.Sprintf("gS at %d", i)
printer.labelTable(table, fmt.Sprintf("gS at %d", i))
table.setDefault(&faNext{steps: []*faState{globStep}})

// escape the glob on the next char from the pattern - remember the byte and the state escaped to
i++
globExitByte = val[i]
globExitStep = &faState{table: newSmallTable()}
//DEBUG globExitStep.table.label = fmt.Sprintf("gX on %c at %d", val[i], i)
printer.labelTable(globExitStep.table, fmt.Sprintf("gX on %c at %d", val[i], i))
// escape the glob
table.addByteStep(globExitByte, &faNext{steps: []*faState{globExitStep}})
table = globExitStep.table
} else {
nextStep := &faState{table: newSmallTable()}
//DEBUG nextStep.table.label = fmt.Sprintf("on %c at %d", val[i], i)
printer.labelTable(nextStep.table, fmt.Sprintf("on %c at %d", val[i], i))

// we're going to move forward on 'ch'. On anything else, we leave it at nil or - if we've passed
// a glob, loop back to the glob stae. if 'ch' is also the glob exit byte, also put in a transfer
Expand All @@ -110,14 +110,14 @@ func makeShellStyleAutomaton(val []byte) (start *smallTable, nextField *fieldMat
}

lastStep := &faState{table: newSmallTable(), fieldTransitions: []*fieldMatcher{nextField}}
//DEBUG lastStep.table.label = fmt.Sprintf("last step at %d", i)
printer.labelTable(lastStep.table, fmt.Sprintf("last step at %d", i))
if globExitStep != nil {
table.setDefault(&faNext{steps: []*faState{globStep}})
table.addByteStep(globExitByte, &faNext{steps: []*faState{globExitStep}})
table.addByteStep(valueTerminator, &faNext{steps: []*faState{lastStep}})
} else {
table.addByteStep(valueTerminator, &faNext{steps: []*faState{lastStep}})
}
// fmt.Printf("new for [%s]: %s\n", string(val), start.dump())
// fmt.Printf("new for [%s]: %s\n", string(val), printer.printNFA(start))
return
}
2 changes: 1 addition & 1 deletion shell_style_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ func TestMakeShellStyleAutomaton(t *testing.T) {
}

for i, pattern := range patterns {
a, wanted := makeShellStyleAutomaton([]byte(pattern))
a, wanted := makeShellStyleAutomaton([]byte(pattern), &nullPrinter{})
vm := newValueMatcher()
vmf := vmFields{startTable: a}
vm.update(&vmf)
Expand Down
Loading