-
Notifications
You must be signed in to change notification settings - Fork 0
/
lexer.l
175 lines (168 loc) · 9.1 KB
/
lexer.l
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
/*C-DECLARATIONS*/
%{
/*INCLUDE STATEMENTS*/
#include <stdlib.h>
#include <string.h>
#include "header.h"
#include "y.tab.h"
//FUNCTION DECLARATIONS.
int lno=0; /*LINE NUMBER COUNT*/
void yyerror(char *); /*FUNCTION TO PRINT ERRORS*/
long long hash(char* str); /*HASH FUNCTION TO HASH ID(IDENTIFIERS) TO INTEGERS*/
%}
/*DECLARATIONS FOR USING THEM IN REGULAR EXPRESSIONS*/
alpha [a-zA-Z]
digit [0-9]
/*RULES FOR TOKENS - REGULAR EXPRESSIONS*/
%%
"<*" { //MULTI LINE COMMENT-SKIP IT!!
char ch = input();
while(ch != '*'
|| input() != '>'){
ch = input();
}
}
"<<" { //SINGLE LINE COMMENT-SKIP THE LINE FROM HERE!!
char ch = input();
while(ch != '\n'){
ch = input();
}
}
"___" { //RECOGNIZE END OF CLASS IN INPUT FILE.
return CLSEND;
}
"..." { //RECOGNIZE END OF SEPARATE FUNCTIONS DECLARATIONS IN INPUT.
return FUNEND;
}
"main" { //RECOGNIZE THE "main" AND RETURN "MAIN" TOKEN : USED IN MAIN FUNCTION.
return MAIN;
}
"return" { //RECOGNIZE "return" AND RETURN THE "RETURN" TOKEN : USED TO RETURN VALUES IN FUNCTIONS.
return RETURN;
}
"new" { //RECOGNIZE "new" AND RETURN "CLSCONSTR" TOKEN : USED IN OBJECT CREATION STATEMENTS.
return CLSCONSTR;
}
"@" { //RECOGNIZE "AT The Rate" SYMBOL : USED IN FUNCTION DECLARATIONS.
return FUNCALL;
}
">=" { //RECOGNIZE ">=" AS 'GREATER THAN OR EQUAL TO' OPERATOR AND RETURN THE "GE" TOKEN.
return GE;
}
"<=" { //RECOGNIZE "<=" AS 'LESS THAN OR EQUAL TO' OPERATOR AND RETURN THE "LE" TOKEN.
return LE;
}
"==" { //RECOGNIZE "==" AS 'IS EQUAL TO' OPERATOR AND RETURN THE "EQ" TOKEN.
return EQ;
}
"!=" { //RECOGNIZE "!=" AS 'IS NOT EQUAL TO' OPERATOR AND RETURN THE "NE" TOKEN.
return NE;
}
"++" { //RECOGNIZE "++" AS 'UNARY INCREMENT' OPERATOR AND RETURN THE "UP" TOKEN.
return UP;
}
"--" { //RECOGNIZE "--" AS 'UNARY DECREMENT' OPERATOR AND RETURN THE "UM" TOKEN.
return UM;
}
"+=" { //RECOGNIZE "/=" AS 'ADD & ASSIGN' OPERATOR AND RETURN THE "EP" TOKEN.
return EP;
}
"-=" { //RECOGNIZE "/=" AS 'SUBTRACT & ASSIGN' OPERATOR AND RETURN THE "EM" TOKEN.
return EM;
}
"*=" { //RECOGNIZE "/=" AS 'MULTIPLY & ASSIGN' OPERATOR AND RETURN THE "EMUL" TOKEN.
return EMUL;
}
"/=" { //RECOGNIZE "/=" AS 'DIVIDE & ASSIGN' OPERATOR AND RETURN THE "EDIV" TOKEN.
return EDIV;
}
"&" { //RECOGNIZE "&" AS 'AND' OPERATOR AND RETURN THE "AND" TOKEN.
return AND;
}
"|" { //RECOGNIZE "|" AS 'OR' OPERATOR AND RETURN THE "OR" TOKEN.
return OR;
}
"int" { //RECOGNIZE "int" AS DECLARATION AND RETURN THE "INT" TOKEN.
return INT;
}
"double" { //RECOGNIZE "double" AS DECLARATION AND RETURN THE "FLOAT" TOKEN.
return FLOAT;
}
"until" { //RECOGNIZE "until" AND RETURN THE "UNTIL" TOKEN == WHILE LOOP IN C++.
return UNTIL;
}
"loop" { //RECOGNIZE "loop" AND RETURN THE "LOOP" TOKEN == FOR LOOP IN C++.
return LOOP;
}
"check" { //RECOGNIZE "check" AND RETURN THE "CHECK" TOKEN == IF STATEMENT IN C++.
return CHECK;
}
"otherwise" { //RECOGNIZE "otherwise" AND RETURN THE "OTHERWISE" TOKEN == ELSE STATEMENT IN C++.
return OTHERWISE;
}
"printf" { //RECOGNIZE "printf" AND RETURN THE "PRINTF" TOKEN : FOR PRINTING A STRING AND AN EXPRESSION.
return PRINTF;
}
"write" { //RECOGNIZE "write" AND RETURN THE "WRITE" TOKEN : FOR PRINTING ANY INDIVIDUAL VARIABLES.
return WRITE;
}
"cout" { //RECOGNIZE "cout" AND RETURN THE "PRINT" TOKEN : FOR PRINTING ANY # OF EXPRESSIONS.
return PRINT;
}
"cin" { //RECOGNIZE "return" AND RETURN THE "RETURN" TOKEN : USED TO RETURN VALUES IN FUNCTIONS.
return INPUT;
}
[-()<>=+*/;{}.$,] { //RECOGNIZE THESE CHARACTERS AND RETURN THEM TO PARSER AS IT IS.
return *yytext;
}
["].*["] { //RECOGNIZE THE "String" AND RETURN THE STRING TOKEN WITH STRING VALUE OF IT.
char str[1000];
int sz = strlen(yytext);
for(int i=0; i< sz-2; i++){
str[i] = *(yytext+i+1);
}
for(int i=sz-2; i< 1000; i++){
str[i] = '\0';
}
yylval.strVal = str;
return STRING;
}
{alpha}({alpha}|{digit})* { //RECOGNIZE THE "Identifier" AND RETURN ID TOKEN WITH HASHED VALUE OF THE IDENTIFIER.
yylval.symInd = hash(yytext);
return ID;
}
[-]?[0-9]+[.][0-9]+ { //RECOGNIZE THE NUMBER AS 'Double Value' AND RETURN "DOUBLE" TOKEN WITH IT'S VALUE.
yylval.dVal = atof(yytext); return DOUBLE;
}
0 { //RECOGNIZE THE NUMBER AS 0 AND RETURN "INTEGER" TOKEN WITH IT'S VALUE.
yylval.iVal = atoi(yytext); return INTEGER;
}
[1-9][0-9]* { //RECOGNIZE THE NUMBER AS 'Integer Value' AND RETURN "INTEGER" TOKEN WITH IT'S VALUE.
yylval.iVal = atoi(yytext); return INTEGER;
}
[\n] { //RECOGNIZE '\n' AS NEWLINE AND INCREASE LINENO(lno).
lno++;
}
[\t ] { //RECOGNIZE '\t' AND ' ' AND IGNORE THEM.
;
}
. { //IF UNKNOWN CHARACTER RETURN ERROR.
yyerror("Unknown character");
}
%%
/*SUBROUTINES FOR LEX FILE*/
/*HASH FUNCTION WHICH TAKES IDENTIDIER AS STRING INPUT
AND PRODUCES INTEGER OUTPUT WHICH IS USED FOR INDEXING IN SYMBOL TABLE FOR THAT VARIABLE*/
long long hash(char* str){
long long x = 28;
long long ans = 0;
for(int i=0; i< yyleng; i++){
ans += ((str[i]-'a'+1)*x)%100;
x++;
}
return ans;
}
/*YYWRAP FUNCTION*/
int yywrap(void) {
return 1;
}