Skip to content

Commit

Permalink
Change APIs
Browse files Browse the repository at this point in the history
Change fields of tokens, results of lexical analysis, as follows:
- Rename: mode -> mode_id
- Rename: kind_id -> mode_kind_id
- Add: kind_id

The kind ID is unique across all modes, but the mode kind ID is unique only within a mode.

Change fields of a transition table as follows:
- Rename: initial_mode -> initial_mode_id
- Rename: modes -> mode_names
- Rename: kinds -> kind_names
- Rename: specs[].kinds -> specs[].kind_names
- Rename: specs[].dfa.initial_state -> specs[].dfa.initial_state_id

Change public types defined in the spec package as follows:
- Rename: LexModeNum -> LexModeID
- Rename: LexKind -> LexKindName
- Add: LexKindID
- Add: StateID
  • Loading branch information
nihei9 committed Aug 1, 2021
1 parent 03e3688 commit 2433c27
Show file tree
Hide file tree
Showing 11 changed files with 289 additions and 231 deletions.
46 changes: 23 additions & 23 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,33 +47,33 @@ If you want to make sure that the lexical specification behaves as expected, you
⚠️ An encoding that `maleeni lex` and the driver can handle is only UTF-8.

```sh
$ echo -n 'The truth is out there.' | maleeni lex clexspec.json | jq -r '[.kind_name, .text, .eof] | @csv'
"word","The",false
"whitespace"," ",false
"word","truth",false
"whitespace"," ",false
"word","is",false
"whitespace"," ",false
"word","out",false
"whitespace"," ",false
"word","there",false
"punctuation",".",false
"","",true
$ echo -n 'The truth is out there.' | maleeni lex clexspec.json | jq -r '[.kind_id, .kind_name, .text, .eof] | @csv'
2,"word","The",false
1,"whitespace"," ",false
2,"word","truth",false
1,"whitespace"," ",false
2,"word","is",false
1,"whitespace"," ",false
2,"word","out",false
1,"whitespace"," ",false
2,"word","there",false
3,"punctuation",".",false
0,"","",true
```

The JSON format of tokens that `maleeni lex` command prints is as follows:

| Field | Type | Description |
|-----------|-------------------|----------------------------------------------------------------------------------------|
| mode | integer | `mode` represents a number that corresponds to a `mode_name`. |
| mode_name | string | `mode_name` is a mode name that represents in which mode the lexer detected the token. |
| kind_id | integer | `kind_id` represents an ID of a kind and is unique among modes. |
| kind | integer | `kind` represents a number that corresponds to a `KindName`. |
| kind_name | string | `kind_name` is a kind name that represents what kind the token has. |
| match | array of integers | `match` is a byte sequence matched a pattern of a lexical specification. |
| text | string | `text` is a string representation of `match`. |
| eof | bool | If `eof` is true, it means the token is the EOF token. |
| invalid | bool | If `invalid` is true, it means the token is an error token. |
| Field | Type | Description |
|--------------|-------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------|
| mode_id | integer | An ID of a lex mode. |
| mode_name | string | A name of a lex mode. |
| kind_id | integer | An ID of a kind. This is unique among all modes. |
| mode_kind_id | integer | An ID of a lexical kind. This is unique only within a mode. Note that you need to use `KindID` field if you want to identify a kind across all modes. |
| kind_name | string | A name of a lexical kind. |
| match | array of integers | A byte sequense of a lexeme. |
| text | string | A string representation of a lexeme. |
| eof | bool | When this field is `true`, it means the token is the EOF token. |
| invalid | bool | When this field is `true`, it means the token is an error token. |

When using the driver, please import `github.com/nihei9/maleeni/driver` and `github.com/nihei9/maleeni/spec` package.
You can use the driver easily in the following way:
Expand Down
6 changes: 4 additions & 2 deletions compiler/ast.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ package compiler
import (
"fmt"
"io"

"github.com/nihei9/maleeni/spec"
)

type astNode interface {
Expand Down Expand Up @@ -78,13 +80,13 @@ func (n *symbolNode) last() *symbolPositionSet {
}

type endMarkerNode struct {
id int
id spec.LexModeKindID
pos symbolPosition
firstMemo *symbolPositionSet
lastMemo *symbolPositionSet
}

func newEndMarkerNode(id int) *endMarkerNode {
func newEndMarkerNode(id spec.LexModeKindID) *endMarkerNode {
return &endMarkerNode{
id: id,
pos: symbolPositionNil,
Expand Down
112 changes: 64 additions & 48 deletions compiler/compiler.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,28 +54,28 @@ func Compile(lexspec *spec.LexSpec, opts ...CompilerOption) (*spec.CompiledLexSp
}
}

modeEntries, modes, modeNums, fragmetns := groupEntriesByLexMode(lexspec.Entries)
modeEntries, modeNames, modeName2ID, fragmetns := groupEntriesByLexMode(lexspec.Entries)

modeSpecs := []*spec.CompiledLexModeSpec{
nil,
}
for i, es := range modeEntries[1:] {
modeName := modes[i+1]
modeName := modeNames[i+1]
config.logger.Log("Compile %v mode:", modeName)
modeSpec, err := compile(es, modeNums, fragmetns, config)
modeSpec, err := compile(es, modeName2ID, fragmetns, config)
if err != nil {
return nil, fmt.Errorf("failed to compile in %v mode: %w", modeName, err)
}
modeSpecs = append(modeSpecs, modeSpec)
}

var kindNames []spec.LexKind
var name2ID map[spec.LexKind]spec.LexKindID
var kindNames []spec.LexKindName
var name2ID map[spec.LexKindName]spec.LexKindID
{
name2ID = map[spec.LexKind]spec.LexKindID{}
name2ID = map[spec.LexKindName]spec.LexKindID{}
id := spec.LexKindIDMin
for _, modeSpec := range modeSpecs[1:] {
for _, name := range modeSpec.Kinds[1:] {
for _, name := range modeSpec.KindNames[1:] {
if _, ok := name2ID[name]; ok {
continue
}
Expand All @@ -84,7 +84,7 @@ func Compile(lexspec *spec.LexSpec, opts ...CompilerOption) (*spec.CompiledLexSp
}
}

kindNames = make([]spec.LexKind, len(name2ID)+1)
kindNames = make([]spec.LexKindName, len(name2ID)+1)
for name, id := range name2ID {
kindNames[id] = name
}
Expand All @@ -94,8 +94,8 @@ func Compile(lexspec *spec.LexSpec, opts ...CompilerOption) (*spec.CompiledLexSp
{
kindIDs = make([][]spec.LexKindID, len(modeSpecs))
for i, modeSpec := range modeSpecs[1:] {
ids := make([]spec.LexKindID, len(modeSpec.Kinds))
for modeID, name := range modeSpec.Kinds {
ids := make([]spec.LexKindID, len(modeSpec.KindNames))
for modeID, name := range modeSpec.KindNames {
if modeID == 0 {
continue
}
Expand All @@ -106,25 +106,25 @@ func Compile(lexspec *spec.LexSpec, opts ...CompilerOption) (*spec.CompiledLexSp
}

return &spec.CompiledLexSpec{
InitialMode: spec.LexModeNumDefault,
Modes: modes,
Kinds: kindNames,
InitialModeID: spec.LexModeIDDefault,
ModeNames: modeNames,
KindNames: kindNames,
KindIDs: kindIDs,
CompressionLevel: config.compLv,
Specs: modeSpecs,
}, nil
}

func groupEntriesByLexMode(entries []*spec.LexEntry) ([][]*spec.LexEntry, []spec.LexModeName, map[spec.LexModeName]spec.LexModeNum, map[string]*spec.LexEntry) {
modes := []spec.LexModeName{
func groupEntriesByLexMode(entries []*spec.LexEntry) ([][]*spec.LexEntry, []spec.LexModeName, map[spec.LexModeName]spec.LexModeID, map[string]*spec.LexEntry) {
modeNames := []spec.LexModeName{
spec.LexModeNameNil,
spec.LexModeNameDefault,
}
modeNums := map[spec.LexModeName]spec.LexModeNum{
spec.LexModeNameNil: spec.LexModeNumNil,
spec.LexModeNameDefault: spec.LexModeNumDefault,
modeName2ID := map[spec.LexModeName]spec.LexModeID{
spec.LexModeNameNil: spec.LexModeIDNil,
spec.LexModeNameDefault: spec.LexModeIDDefault,
}
lastModeNum := spec.LexModeNumDefault
lastModeID := spec.LexModeIDDefault
modeEntries := [][]*spec.LexEntry{
nil,
{},
Expand All @@ -141,30 +141,30 @@ func groupEntriesByLexMode(entries []*spec.LexEntry) ([][]*spec.LexEntry, []spec
spec.LexModeNameDefault,
}
}
for _, mode := range ms {
num, ok := modeNums[mode]
for _, modeName := range ms {
modeID, ok := modeName2ID[modeName]
if !ok {
num = lastModeNum.Succ()
lastModeNum = num
modeNums[mode] = num
modes = append(modes, mode)
modeID = lastModeID + 1
lastModeID = modeID
modeName2ID[modeName] = modeID
modeNames = append(modeNames, modeName)
modeEntries = append(modeEntries, []*spec.LexEntry{})
}
modeEntries[num] = append(modeEntries[num], e)
modeEntries[modeID] = append(modeEntries[modeID], e)
}
}
return modeEntries, modes, modeNums, fragments
return modeEntries, modeNames, modeName2ID, fragments
}

func compile(entries []*spec.LexEntry, modeNums map[spec.LexModeName]spec.LexModeNum, fragments map[string]*spec.LexEntry, config *compilerConfig) (*spec.CompiledLexModeSpec, error) {
var kinds []spec.LexKind
var patterns map[int][]byte
func compile(entries []*spec.LexEntry, modeName2ID map[spec.LexModeName]spec.LexModeID, fragments map[string]*spec.LexEntry, config *compilerConfig) (*spec.CompiledLexModeSpec, error) {
var kindNames []spec.LexKindName
var patterns map[spec.LexModeKindID][]byte
{
kinds = append(kinds, spec.LexKindNil)
patterns = map[int][]byte{}
kindNames = append(kindNames, spec.LexKindNameNil)
patterns = map[spec.LexModeKindID][]byte{}
for i, e := range entries {
kinds = append(kinds, e.Kind)
patterns[i+1] = []byte(e.Pattern)
kindNames = append(kindNames, e.Kind)
patterns[spec.LexModeKindID(i+1)] = []byte(e.Pattern)
}

config.logger.Log("Patterns:")
Expand All @@ -173,16 +173,16 @@ func compile(entries []*spec.LexEntry, modeNums map[spec.LexModeName]spec.LexMod
}
}

push := []spec.LexModeNum{
spec.LexModeNumNil,
push := []spec.LexModeID{
spec.LexModeIDNil,
}
pop := []int{
0,
}
for _, e := range entries {
pushV := spec.LexModeNumNil
pushV := spec.LexModeIDNil
if e.Push != "" {
pushV = modeNums[e.Push]
pushV = modeName2ID[e.Push]
}
push = append(push, pushV)
popV := 0
Expand Down Expand Up @@ -222,7 +222,7 @@ func compile(entries []*spec.LexEntry, modeNums map[spec.LexModeName]spec.LexMod

config.logger.Log(`DFA:
States: %v states (%v entries)
Initial State: %v`, tranTab.RowCount, tranTab.RowCount*tranTab.ColCount, tranTab.InitialState)
Initial State ID: %v`, tranTab.RowCount, tranTab.RowCount*tranTab.ColCount, tranTab.InitialStateID)
config.logger.Log(" Accepting States:")
for state, symbol := range tranTab.AcceptingStates {
config.logger.Log(" %v: %v", state, symbol)
Expand All @@ -244,10 +244,10 @@ func compile(entries []*spec.LexEntry, modeNums map[spec.LexModeName]spec.LexMod
}

return &spec.CompiledLexModeSpec{
Kinds: kinds,
Push: push,
Pop: pop,
DFA: tranTab,
KindNames: kindNames,
Push: push,
Pop: pop,
DFA: tranTab,
}, nil
}

Expand All @@ -259,7 +259,7 @@ const (
func compressTransitionTableLv2(tranTab *spec.TransitionTable) (*spec.TransitionTable, error) {
ueTab := compressor.NewUniqueEntriesTable()
{
orig, err := compressor.NewOriginalTable(tranTab.UncompressedTransition, tranTab.ColCount)
orig, err := compressor.NewOriginalTable(convertStateIDSliceToIntSlice(tranTab.UncompressedTransition), tranTab.ColCount)
if err != nil {
return nil, err
}
Expand All @@ -285,8 +285,8 @@ func compressTransitionTableLv2(tranTab *spec.TransitionTable) (*spec.Transition
UniqueEntries: &spec.RowDisplacementTable{
OriginalRowCount: rdTab.OriginalRowCount,
OriginalColCount: rdTab.OriginalColCount,
EmptyValue: rdTab.EmptyValue,
Entries: rdTab.Entries,
EmptyValue: spec.StateIDNil,
Entries: convertIntSliceToStateIDSlice(rdTab.Entries),
Bounds: rdTab.Bounds,
RowDisplacement: rdTab.RowDisplacement,
},
Expand All @@ -302,7 +302,7 @@ func compressTransitionTableLv2(tranTab *spec.TransitionTable) (*spec.Transition
func compressTransitionTableLv1(tranTab *spec.TransitionTable) (*spec.TransitionTable, error) {
ueTab := compressor.NewUniqueEntriesTable()
{
orig, err := compressor.NewOriginalTable(tranTab.UncompressedTransition, tranTab.ColCount)
orig, err := compressor.NewOriginalTable(convertStateIDSliceToIntSlice(tranTab.UncompressedTransition), tranTab.ColCount)
if err != nil {
return nil, err
}
Expand All @@ -313,7 +313,7 @@ func compressTransitionTableLv1(tranTab *spec.TransitionTable) (*spec.Transition
}

tranTab.Transition = &spec.UniqueEntriesTable{
UncompressedUniqueEntries: ueTab.UniqueEntries,
UncompressedUniqueEntries: convertIntSliceToStateIDSlice(ueTab.UniqueEntries),
RowNums: ueTab.RowNums,
OriginalRowCount: ueTab.OriginalRowCount,
OriginalColCount: ueTab.OriginalColCount,
Expand All @@ -322,3 +322,19 @@ func compressTransitionTableLv1(tranTab *spec.TransitionTable) (*spec.Transition

return tranTab, nil
}

func convertStateIDSliceToIntSlice(s []spec.StateID) []int {
is := make([]int, len(s))
for i, v := range s {
is[i] = v.Int()
}
return is
}

func convertIntSliceToStateIDSlice(s []int) []spec.StateID {
ss := make([]spec.StateID, len(s))
for i, v := range s {
ss[i] = spec.StateID(v)
}
return ss
}
18 changes: 9 additions & 9 deletions compiler/dfa.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ import (
type DFA struct {
States []string
InitialState string
AcceptingStatesTable map[string]int
AcceptingStatesTable map[string]spec.LexModeKindID
TransitionTable map[string][256]string
}

Expand Down Expand Up @@ -65,7 +65,7 @@ func genDFA(root astNode, symTab *symbolTable) *DFA {
}
}

accTab := map[string]int{}
accTab := map[string]spec.LexModeKindID{}
{
for h, s := range stateMap {
for _, pos := range s.set() {
Expand Down Expand Up @@ -104,33 +104,33 @@ func genDFA(root astNode, symTab *symbolTable) *DFA {
}

func genTransitionTable(dfa *DFA) (*spec.TransitionTable, error) {
state2Num := map[string]int{}
stateHash2ID := map[string]spec.StateID{}
for i, s := range dfa.States {
// Since 0 represents an invalid value in a transition table,
// assign a number greater than or equal to 1 to states.
state2Num[s] = i + 1
stateHash2ID[s] = spec.StateID(i + spec.StateIDMin.Int())
}

acc := make([]int, len(dfa.States)+1)
acc := make([]spec.LexModeKindID, len(dfa.States)+1)
for _, s := range dfa.States {
id, ok := dfa.AcceptingStatesTable[s]
if !ok {
continue
}
acc[state2Num[s]] = id
acc[stateHash2ID[s]] = id
}

rowCount := len(dfa.States) + 1
colCount := 256
tran := make([]int, rowCount*colCount)
tran := make([]spec.StateID, rowCount*colCount)
for s, tab := range dfa.TransitionTable {
for v, to := range tab {
tran[state2Num[s]*256+v] = state2Num[to]
tran[stateHash2ID[s].Int()*256+v] = stateHash2ID[to]
}
}

return &spec.TransitionTable{
InitialState: state2Num[dfa.InitialState],
InitialStateID: stateHash2ID[dfa.InitialState],
AcceptingStates: acc,
UncompressedTransition: tran,
RowCount: rowCount,
Expand Down
Loading

0 comments on commit 2433c27

Please sign in to comment.