-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlexer.go
104 lines (95 loc) · 2.08 KB
/
lexer.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
package func_purrser
import (
"bufio"
"bytes"
"fmt"
"io"
"regexp"
"unicode"
)
type tokentypetype int
const (
TokenIdent tokentypetype = iota
TokenRange
TokenOpenParen
TokenCloseParen
TokenSeparator
)
type Token struct {
ttype tokentypetype
content string
}
const rangePattern = `\A[a-zA-Z]{1,2}\d+(?:\:(?:\d+|[a-zA-Z]{1,2}\d*))?\z`
type runeSelector func(rune) bool
func readall(input *bufio.Reader, s runeSelector, init rune, buf *bytes.Buffer) error {
_, err := buf.WriteRune(init)
if err != nil {
return err
}
c, _, err := input.ReadRune()
if err == io.EOF {
return nil
} else if err != nil {
return err
}
for s(c) {
_, err = buf.WriteRune(c)
if err != nil {
return err
}
c, _, err = input.ReadRune()
if err == io.EOF {
return nil
} else if err != nil {
return err
}
}
err = input.UnreadRune()
return err
}
func identBodySelector(c rune) bool {
return unicode.IsLetter(c) || unicode.IsDigit(c) || c == ':'
}
func Tokenize(f io.Reader) ([]Token, error) {
reader := bufio.NewReader(f)
identBuffer := bytes.NewBufferString("")
var tokens []Token
for {
c, _, err := reader.ReadRune()
if err == io.EOF {
return tokens, nil
} else if err != nil {
return nil, err
}
switch {
case unicode.IsLetter(c):
identBuffer.Reset()
err = readall(reader, identBodySelector, c, identBuffer)
if err != nil {
return nil, err
}
content := identBuffer.String()
identIsRange, err := regexp.MatchString(rangePattern, content)
if err != nil {
return nil, err
}
var tokenType tokentypetype
if identIsRange {
tokenType = TokenRange
} else {
tokenType = TokenIdent
}
tokens = append(tokens, Token{ttype: tokenType, content: identBuffer.String()})
case c == '(':
tokens = append(tokens, Token{ttype: TokenOpenParen})
case c == ')':
tokens = append(tokens, Token{ttype: TokenCloseParen})
case c == ',' || c == ';':
tokens = append(tokens, Token{ttype: TokenSeparator})
case unicode.IsSpace(c):
default:
return nil, fmt.Errorf("Character not recognized: %c", c)
}
}
return tokens, nil
}