-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathLexicalAnalysis.py
227 lines (174 loc) · 7.71 KB
/
LexicalAnalysis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
#Author : Craig Clephane
#Last edited : 22/03/2019
#File contains functions which support the lexical analysis of a compiler (Phase one).
#Imported files.
import sys
import tokentable #reads token file.
import errorhandling #reads errorhandling file.
import symtable #reads symbol table.
input_file = sys.stdin
#Dummy files.
Character = " "
Column = 0
Line = 1
file = None
endOfLine = False
Idname = ""
#Array containing tokens, as well as number of tokens
TokenArray = []
tokenCount = 0
commentGone = False
#Grabs next character from the file, as well as shifting columns (Character) along everytime this function is called.
#Once a new line is detected, the column (Character) will reset to the start of the line, and shift line.
def grabNextCharacter():
global Character, Column, Line, endOfLine, commentGone
Character = file.read(1)
Column += 1
if (Character == '\n' and commentGone == True):
Line += 1
Column = 0
endOfLine = False
print("hello")
commentGone = False
return
if (Character == '\n' and Column == 1):
Line += 1
Column = 0
return
if (Character == '\n' and Column != 1):
Line += 1
Column = 0
endOfLine = True
##SHOULD BE RETURNING TOKEN TO MAIN.PY HERE, BUT THIS IS CALLED SEVERAL TIMES WITHIN FILES ITS SELF
return Character
#Function which identifies the following character after a previous character and returns the correct token, based on the parameters.
def follow(expect, ifyes, ifno, errLine, errCol):
if grabNextCharacter() == expect:
grabNextCharacter()
bufferTokens(ifyes)
return ifyes, errLine, errCol
if ifno == tokentable.TokenEOF:
followUnrecognized(errLine ,errCol)
#Store Buffer Token, and return the token
bufferTokens(ifno)
return ifno, errLine, errCol
#Function which reads the string, and returns a string token as well as the text.
def stringLit(start, errLine, errCol):
global Line
text = ""
#Loop appends characters onto the text if the character does not equal to the first character (")
while grabNextCharacter() != start:
if len(Character) == 0:
endOfFile2(errLine, errCol)
if Character == '\n':
endOfLine(errLine, errCol)
text += Character
grabNextCharacter()
#Add to symbol table whether a String has been decleared
if tokenCount > 1:
findtok = tokenCount - 3
#Check if the token 'String' was found, if not do nothing
if TokenArray[findtok] == 32:
pushToSymbolTable(Idname, 'String', Line, text)
if TokenArray[findtok] == 31:
pushToSymbolTable(Idname, 'Int', Line, text)
bufferTokens(tokentable.TokenString)
return tokentable.TokenString, errLine, errCol, text
#Function which handles identifiers and integers by running a series of if statements and while loops.
def identifiersOrIntegers(errLine, errCol):
is_number = True
Text = ""
global Idname, Line
#While loop to append the characters to a text string, also idenify whether the set of character is a digit or not.
while Character.isalnum() or Character == '_':
Text += Character
if not Character.isdigit():
is_number = False
grabNextCharacter()
#If the text is a digit, convert to a number, and return integer token.
if Text[0].isdigit():
if not is_number:
invalidNumber(errLine, errCol, text)
n = int(Text)
#Add to symbol table whether an Int has been decleared
if tokenCount > 1:
findtok = tokenCount - 3
#Check if the token 'Int' was found, if not do nothing
if TokenArray[findtok] == 32:
pushToSymbolTable(Idname, 'String', Line, n)
if TokenArray[findtok] == 31:
pushToSymbolTable(Idname, 'Int', Line, n)
bufferTokens(tokentable.TokenInteger)
return tokentable.TokenInteger, errLine, errCol, n
#If text matches a keyword, find keyword and return keyword token.
if Text in tokentable.keyWords:
bufferTokens(tokentable.keyWords[Text])
return tokentable.keyWords[Text], errLine, errCol
if tokenCount > 1:
findtok = tokenCount - 3
if TokenArray[findtok] == 32:
pushToSymbolTable(Idname, 'String', Text, line)
if TokenArray[findtok] == 31:
pushToSymbolTable(Idname, 'Int', Text, Line)
#If text is not an integer or a keyword, return identifier token, with what the identifier is.
#Buffer Value
Idname = Text
bufferTokens(tokentable.TokenIdent)
return tokentable.TokenIdent, errLine, errCol, Text
#Function which identifies whether the string is a comment as well as return the divide token.
def commentsAndDiv(errLine, errCol):
global commentGone
#If the character after '/' does not equal to a *, return token divide.
if grabNextCharacter() != '*':
bufferTokens(tokentable.TokenDivide)
return tokentable.TokenDivide, errLine, errCol
#Grab the next character, and if it equals to *, ignore the following text until '/' is identified again.
grabNextCharacter()
while True:
if Character == '*':
if grabNextCharacter() == '/':
grabNextCharacter()
commentGone = True
return getToken()
elif len(Character) == 0:
endOfFile(errLine, errCol)
else:
grabNextCharacter()
#Function which pushes nessceray components to the symbol table.
def pushToSymbolTable(name, type, line, value):
symtable.insert(name, type, line, value)
def bufferTokens(token):
global TokenArray
global tokenCount
TokenArray.append(token)
tokenCount += 1
#Get Token function calls the grab next character function and identifies what token the character is by a series of if statements.
def getToken():
global Character, Column, Line
#Checks whether the character is white space or not, if true return next character.
while Character.isspace():
grabNextCharacter()
#Temp Line and Column variables
errLine = Line
errCol = Column
#If the number of characters is zero, return the end of file token, meaning there are no more tokens within the file.
if len(Character) == 0:
bufferTokens(tokentable.TokenEOF)
return tokentable.TokenEOF, errLine, errCol
#If the character equals to the following Symbol, return the results from the respected function.
elif Character == '/': return commentsAndDiv(errLine, errCol)
elif Character == '=': return follow ('=', tokentable.TokenEQ, tokentable.TokenAssign, errLine, errCol)
elif Character == '<': return follow ('=', tokentable.TokenLeq, tokentable.TokenLess, errLine, errCol)
elif Character == '>': return follow ('=', tokentable.TokenGEQ, tokentable.TokenGTR, errLine, errCol)
elif Character == '!': return follow ('=', tokentable.TokenEQ, tokentable.TokenAssign, errLine, errCol)
elif Character == '&': return follow ('&', tokentable.TokenAnd, tokentable.TokenEOF, errLine, errCol)
elif Character == '|': return follow ('|', tokentable.TokenOR, tokentable.TokenEOF, errLine, errCol)
elif Character == '"': return stringLit(Character, errLine, errCol)
#If the character is equal to anything within the symbol table, return the corrosponding token.
elif Character in tokentable.Symbols:
sym = tokentable.Symbols[Character]
grabNextCharacter()
bufferTokens(sym)
return sym, errLine, errCol
#If the character does not match anything from above, fun the indent or interger function.
else: return identifiersOrIntegers(errLine, errCol)