Change APIs

Change fields of tokens, results of lexical analysis, as follows: - Rename: mode -> mode_id - Rename: kind_id -> mode_kind_id - Add: kind_id The kind ID is unique across all modes, but the mode kind ID is unique only within a mode. Change fields of a transition table as follows: - Rename: initial_mode -> initial_mode_id - Rename: modes -> mode_names - Rename: kinds -> kind_names - Rename: specs[].kinds -> specs[].kind_names - Rename: specs[].dfa.initial_state -> specs[].dfa.initial_state_id Change public types defined in the spec package as follows: - Rename: LexModeNum -> LexModeID - Rename: LexKind -> LexKindName - Add: LexKindID - Add: StateID
nihei9 · Aug 1, 2021 · 2433c27 · 2433c27
1 parent 03e3688
commit 2433c27
Show file tree

Hide file tree

Showing 11 changed files with 289 additions and 231 deletions.
diff --git a/README.md b/README.md
@@ -47,33 +47,33 @@ If you want to make sure that the lexical specification behaves as expected, you
 ⚠️ An encoding that `maleeni lex` and the driver can handle is only UTF-8.
 
 ```sh
-$ echo -n 'The truth is out there.' | maleeni lex clexspec.json | jq -r '[.kind_name, .text, .eof] | @csv'
-"word","The",false
-"whitespace"," ",false
-"word","truth",false
-"whitespace"," ",false
-"word","is",false
-"whitespace"," ",false
-"word","out",false
-"whitespace"," ",false
-"word","there",false
-"punctuation",".",false
-"","",true
+$ echo -n 'The truth is out there.' | maleeni lex clexspec.json | jq -r '[.kind_id, .kind_name, .text, .eof] | @csv'
+2,"word","The",false
+1,"whitespace"," ",false
+2,"word","truth",false
+1,"whitespace"," ",false
+2,"word","is",false
+1,"whitespace"," ",false
+2,"word","out",false
+1,"whitespace"," ",false
+2,"word","there",false
+3,"punctuation",".",false
+0,"","",true
 ```
 
 The JSON format of tokens that `maleeni lex` command prints is as follows:
 
-| Field     | Type              | Description                                                                            |
-|-----------|-------------------|----------------------------------------------------------------------------------------|
-| mode      | integer           | `mode` represents a number that corresponds to a `mode_name`.                          |
-| mode_name | string            | `mode_name` is a mode name that represents in which mode the lexer detected the token. |
-| kind_id   | integer           | `kind_id` represents an ID of a kind and is unique among modes.                        |
-| kind      | integer           | `kind` represents a number that corresponds to a `KindName`.                           |
-| kind_name | string            | `kind_name` is a kind name that represents what kind the token has.                    |
-| match     | array of integers | `match` is a byte sequence matched a pattern of a lexical specification.               |
-| text      | string            | `text` is a string representation of `match`.                                          |
-| eof       | bool              | If `eof` is true, it means the token is the EOF token.                                 |
-| invalid   | bool              | If `invalid` is true, it means the token is an error token.                            |
+| Field        | Type              | Description                                                                                                                                           |
+|--------------|-------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------|
+| mode_id      | integer           | An ID of a lex mode.                                                                                                                                  |
+| mode_name    | string            | A name of a lex mode.                                                                                                                                 |
+| kind_id      | integer           | An ID of a kind. This is unique among all modes.                                                                                                      |
+| mode_kind_id | integer           | An ID of a lexical kind. This is unique only within a mode. Note that you need to use `KindID` field if you want to identify a kind across all modes. |
+| kind_name    | string            | A name of a lexical kind.                                                                                                                             |
+| match        | array of integers | A byte sequense of a lexeme.                                                                                                                          |
+| text         | string            | A string representation of a lexeme.                                                                                                                  |
+| eof          | bool              | When this field is `true`, it means the token is the EOF token.                                                                                       |
+| invalid      | bool              | When this field is `true`, it means the token is an error token.                                                                                      |
 
 When using the driver, please import `github.com/nihei9/maleeni/driver` and `github.com/nihei9/maleeni/spec` package.
 You can use the driver easily in the following way:

diff --git a/compiler/ast.go b/compiler/ast.go
@@ -3,6 +3,8 @@ package compiler
 import (
 	"fmt"
 	"io"
+
+	"github.com/nihei9/maleeni/spec"
 )
 
 type astNode interface {
@@ -78,13 +80,13 @@ func (n *symbolNode) last() *symbolPositionSet {
 }
 
 type endMarkerNode struct {
-	id        int
+	id        spec.LexModeKindID
 	pos       symbolPosition
 	firstMemo *symbolPositionSet
 	lastMemo  *symbolPositionSet
 }
 
-func newEndMarkerNode(id int) *endMarkerNode {
+func newEndMarkerNode(id spec.LexModeKindID) *endMarkerNode {
 	return &endMarkerNode{
 		id:  id,
 		pos: symbolPositionNil,

diff --git a/compiler/compiler.go b/compiler/compiler.go
@@ -54,28 +54,28 @@ func Compile(lexspec *spec.LexSpec, opts ...CompilerOption) (*spec.CompiledLexSp
 		}
 	}
 
-	modeEntries, modes, modeNums, fragmetns := groupEntriesByLexMode(lexspec.Entries)
+	modeEntries, modeNames, modeName2ID, fragmetns := groupEntriesByLexMode(lexspec.Entries)
 
 	modeSpecs := []*spec.CompiledLexModeSpec{
 		nil,
 	}
 	for i, es := range modeEntries[1:] {
-		modeName := modes[i+1]
+		modeName := modeNames[i+1]
 		config.logger.Log("Compile %v mode:", modeName)
-		modeSpec, err := compile(es, modeNums, fragmetns, config)
+		modeSpec, err := compile(es, modeName2ID, fragmetns, config)
 		if err != nil {
 			return nil, fmt.Errorf("failed to compile in %v mode: %w", modeName, err)
 		}
 		modeSpecs = append(modeSpecs, modeSpec)
 	}
 
-	var kindNames []spec.LexKind
-	var name2ID map[spec.LexKind]spec.LexKindID
+	var kindNames []spec.LexKindName
+	var name2ID map[spec.LexKindName]spec.LexKindID
 	{
-		name2ID = map[spec.LexKind]spec.LexKindID{}
+		name2ID = map[spec.LexKindName]spec.LexKindID{}
 		id := spec.LexKindIDMin
 		for _, modeSpec := range modeSpecs[1:] {
-			for _, name := range modeSpec.Kinds[1:] {
+			for _, name := range modeSpec.KindNames[1:] {
 				if _, ok := name2ID[name]; ok {
 					continue
 				}
@@ -84,7 +84,7 @@ func Compile(lexspec *spec.LexSpec, opts ...CompilerOption) (*spec.CompiledLexSp
 			}
 		}
 
-		kindNames = make([]spec.LexKind, len(name2ID)+1)
+		kindNames = make([]spec.LexKindName, len(name2ID)+1)
 		for name, id := range name2ID {
 			kindNames[id] = name
 		}
@@ -94,8 +94,8 @@ func Compile(lexspec *spec.LexSpec, opts ...CompilerOption) (*spec.CompiledLexSp
 	{
 		kindIDs = make([][]spec.LexKindID, len(modeSpecs))
 		for i, modeSpec := range modeSpecs[1:] {
-			ids := make([]spec.LexKindID, len(modeSpec.Kinds))
-			for modeID, name := range modeSpec.Kinds {
+			ids := make([]spec.LexKindID, len(modeSpec.KindNames))
+			for modeID, name := range modeSpec.KindNames {
 				if modeID == 0 {
 					continue
 				}
@@ -106,25 +106,25 @@ func Compile(lexspec *spec.LexSpec, opts ...CompilerOption) (*spec.CompiledLexSp
 	}
 
 	return &spec.CompiledLexSpec{
-		InitialMode:      spec.LexModeNumDefault,
-		Modes:            modes,
-		Kinds:            kindNames,
+		InitialModeID:    spec.LexModeIDDefault,
+		ModeNames:        modeNames,
+		KindNames:        kindNames,
 		KindIDs:          kindIDs,
 		CompressionLevel: config.compLv,
 		Specs:            modeSpecs,
 	}, nil
 }
 
-func groupEntriesByLexMode(entries []*spec.LexEntry) ([][]*spec.LexEntry, []spec.LexModeName, map[spec.LexModeName]spec.LexModeNum, map[string]*spec.LexEntry) {
-	modes := []spec.LexModeName{
+func groupEntriesByLexMode(entries []*spec.LexEntry) ([][]*spec.LexEntry, []spec.LexModeName, map[spec.LexModeName]spec.LexModeID, map[string]*spec.LexEntry) {
+	modeNames := []spec.LexModeName{
 		spec.LexModeNameNil,
 		spec.LexModeNameDefault,
 	}
-	modeNums := map[spec.LexModeName]spec.LexModeNum{
-		spec.LexModeNameNil:     spec.LexModeNumNil,
-		spec.LexModeNameDefault: spec.LexModeNumDefault,
+	modeName2ID := map[spec.LexModeName]spec.LexModeID{
+		spec.LexModeNameNil:     spec.LexModeIDNil,
+		spec.LexModeNameDefault: spec.LexModeIDDefault,
 	}
-	lastModeNum := spec.LexModeNumDefault
+	lastModeID := spec.LexModeIDDefault
 	modeEntries := [][]*spec.LexEntry{
 		nil,
 		{},
@@ -141,30 +141,30 @@ func groupEntriesByLexMode(entries []*spec.LexEntry) ([][]*spec.LexEntry, []spec
 				spec.LexModeNameDefault,
 			}
 		}
-		for _, mode := range ms {
-			num, ok := modeNums[mode]
+		for _, modeName := range ms {
+			modeID, ok := modeName2ID[modeName]
 			if !ok {
-				num = lastModeNum.Succ()
-				lastModeNum = num
-				modeNums[mode] = num
-				modes = append(modes, mode)
+				modeID = lastModeID + 1
+				lastModeID = modeID
+				modeName2ID[modeName] = modeID
+				modeNames = append(modeNames, modeName)
 				modeEntries = append(modeEntries, []*spec.LexEntry{})
 			}
-			modeEntries[num] = append(modeEntries[num], e)
+			modeEntries[modeID] = append(modeEntries[modeID], e)
 		}
 	}
-	return modeEntries, modes, modeNums, fragments
+	return modeEntries, modeNames, modeName2ID, fragments
 }
 
-func compile(entries []*spec.LexEntry, modeNums map[spec.LexModeName]spec.LexModeNum, fragments map[string]*spec.LexEntry, config *compilerConfig) (*spec.CompiledLexModeSpec, error) {
-	var kinds []spec.LexKind
-	var patterns map[int][]byte
+func compile(entries []*spec.LexEntry, modeName2ID map[spec.LexModeName]spec.LexModeID, fragments map[string]*spec.LexEntry, config *compilerConfig) (*spec.CompiledLexModeSpec, error) {
+	var kindNames []spec.LexKindName
+	var patterns map[spec.LexModeKindID][]byte
 	{
-		kinds = append(kinds, spec.LexKindNil)
-		patterns = map[int][]byte{}
+		kindNames = append(kindNames, spec.LexKindNameNil)
+		patterns = map[spec.LexModeKindID][]byte{}
 		for i, e := range entries {
-			kinds = append(kinds, e.Kind)
-			patterns[i+1] = []byte(e.Pattern)
+			kindNames = append(kindNames, e.Kind)
+			patterns[spec.LexModeKindID(i+1)] = []byte(e.Pattern)
 		}
 
 		config.logger.Log("Patterns:")
@@ -173,16 +173,16 @@ func compile(entries []*spec.LexEntry, modeNums map[spec.LexModeName]spec.LexMod
 		}
 	}
 
-	push := []spec.LexModeNum{
-		spec.LexModeNumNil,
+	push := []spec.LexModeID{
+		spec.LexModeIDNil,
 	}
 	pop := []int{
 		0,
 	}
 	for _, e := range entries {
-		pushV := spec.LexModeNumNil
+		pushV := spec.LexModeIDNil
 		if e.Push != "" {
-			pushV = modeNums[e.Push]
+			pushV = modeName2ID[e.Push]
 		}
 		push = append(push, pushV)
 		popV := 0
@@ -222,7 +222,7 @@ func compile(entries []*spec.LexEntry, modeNums map[spec.LexModeName]spec.LexMod
 
 		config.logger.Log(`DFA:
   States: %v states (%v entries)
-  Initial State: %v`, tranTab.RowCount, tranTab.RowCount*tranTab.ColCount, tranTab.InitialState)
+  Initial State ID: %v`, tranTab.RowCount, tranTab.RowCount*tranTab.ColCount, tranTab.InitialStateID)
 		config.logger.Log("  Accepting States:")
 		for state, symbol := range tranTab.AcceptingStates {
 			config.logger.Log("    %v: %v", state, symbol)
@@ -244,10 +244,10 @@ func compile(entries []*spec.LexEntry, modeNums map[spec.LexModeName]spec.LexMod
 	}
 
 	return &spec.CompiledLexModeSpec{
-		Kinds: kinds,
-		Push:  push,
-		Pop:   pop,
-		DFA:   tranTab,
+		KindNames: kindNames,
+		Push:      push,
+		Pop:       pop,
+		DFA:       tranTab,
 	}, nil
 }
 
@@ -259,7 +259,7 @@ const (
 func compressTransitionTableLv2(tranTab *spec.TransitionTable) (*spec.TransitionTable, error) {
 	ueTab := compressor.NewUniqueEntriesTable()
 	{
-		orig, err := compressor.NewOriginalTable(tranTab.UncompressedTransition, tranTab.ColCount)
+		orig, err := compressor.NewOriginalTable(convertStateIDSliceToIntSlice(tranTab.UncompressedTransition), tranTab.ColCount)
 		if err != nil {
 			return nil, err
 		}
@@ -285,8 +285,8 @@ func compressTransitionTableLv2(tranTab *spec.TransitionTable) (*spec.Transition
 		UniqueEntries: &spec.RowDisplacementTable{
 			OriginalRowCount: rdTab.OriginalRowCount,
 			OriginalColCount: rdTab.OriginalColCount,
-			EmptyValue:       rdTab.EmptyValue,
-			Entries:          rdTab.Entries,
+			EmptyValue:       spec.StateIDNil,
+			Entries:          convertIntSliceToStateIDSlice(rdTab.Entries),
 			Bounds:           rdTab.Bounds,
 			RowDisplacement:  rdTab.RowDisplacement,
 		},
@@ -302,7 +302,7 @@ func compressTransitionTableLv2(tranTab *spec.TransitionTable) (*spec.Transition
 func compressTransitionTableLv1(tranTab *spec.TransitionTable) (*spec.TransitionTable, error) {
 	ueTab := compressor.NewUniqueEntriesTable()
 	{
-		orig, err := compressor.NewOriginalTable(tranTab.UncompressedTransition, tranTab.ColCount)
+		orig, err := compressor.NewOriginalTable(convertStateIDSliceToIntSlice(tranTab.UncompressedTransition), tranTab.ColCount)
 		if err != nil {
 			return nil, err
 		}
@@ -313,7 +313,7 @@ func compressTransitionTableLv1(tranTab *spec.TransitionTable) (*spec.Transition
 	}
 
 	tranTab.Transition = &spec.UniqueEntriesTable{
-		UncompressedUniqueEntries: ueTab.UniqueEntries,
+		UncompressedUniqueEntries: convertIntSliceToStateIDSlice(ueTab.UniqueEntries),
 		RowNums:                   ueTab.RowNums,
 		OriginalRowCount:          ueTab.OriginalRowCount,
 		OriginalColCount:          ueTab.OriginalColCount,
@@ -322,3 +322,19 @@ func compressTransitionTableLv1(tranTab *spec.TransitionTable) (*spec.Transition
 
 	return tranTab, nil
 }
+
+func convertStateIDSliceToIntSlice(s []spec.StateID) []int {
+	is := make([]int, len(s))
+	for i, v := range s {
+		is[i] = v.Int()
+	}
+	return is
+}
+
+func convertIntSliceToStateIDSlice(s []int) []spec.StateID {
+	ss := make([]spec.StateID, len(s))
+	for i, v := range s {
+		ss[i] = spec.StateID(v)
+	}
+	return ss
+}
diff --git a/compiler/dfa.go b/compiler/dfa.go
@@ -9,7 +9,7 @@ import (
 type DFA struct {
 	States               []string
 	InitialState         string
-	AcceptingStatesTable map[string]int
+	AcceptingStatesTable map[string]spec.LexModeKindID
 	TransitionTable      map[string][256]string
 }
 
@@ -65,7 +65,7 @@ func genDFA(root astNode, symTab *symbolTable) *DFA {
 		}
 	}
 
-	accTab := map[string]int{}
+	accTab := map[string]spec.LexModeKindID{}
 	{
 		for h, s := range stateMap {
 			for _, pos := range s.set() {
@@ -104,33 +104,33 @@ func genDFA(root astNode, symTab *symbolTable) *DFA {
 }
 
 func genTransitionTable(dfa *DFA) (*spec.TransitionTable, error) {
-	state2Num := map[string]int{}
+	stateHash2ID := map[string]spec.StateID{}
 	for i, s := range dfa.States {
 		// Since 0 represents an invalid value in a transition table,
 		// assign a number greater than or equal to 1 to states.
-		state2Num[s] = i + 1
+		stateHash2ID[s] = spec.StateID(i + spec.StateIDMin.Int())
 	}
 
-	acc := make([]int, len(dfa.States)+1)
+	acc := make([]spec.LexModeKindID, len(dfa.States)+1)
 	for _, s := range dfa.States {
 		id, ok := dfa.AcceptingStatesTable[s]
 		if !ok {
 			continue
 		}
-		acc[state2Num[s]] = id
+		acc[stateHash2ID[s]] = id
 	}
 
 	rowCount := len(dfa.States) + 1
 	colCount := 256
-	tran := make([]int, rowCount*colCount)
+	tran := make([]spec.StateID, rowCount*colCount)
 	for s, tab := range dfa.TransitionTable {
 		for v, to := range tab {
-			tran[state2Num[s]*256+v] = state2Num[to]
+			tran[stateHash2ID[s].Int()*256+v] = stateHash2ID[to]
 		}
 	}
 
 	return &spec.TransitionTable{
-		InitialState:           state2Num[dfa.InitialState],
+		InitialStateID:         stateHash2ID[dfa.InitialState],
 		AcceptingStates:        acc,
 		UncompressedTransition: tran,
 		RowCount:               rowCount,