-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlexer.h
109 lines (97 loc) · 2.88 KB
/
lexer.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
/* ************************************************************************** */
/* */
/* ::: :::::::: */
/* lexer.h :+: :+: :+: */
/* +:+ +:+ +:+ */
/* By: gbudau <gbudau@student.42.fr> +#+ +:+ +#+ */
/* +#+#+#+#+#+ +#+ */
/* Created: 2020/11/09 00:01:02 by gbudau #+# #+# */
/* Updated: 2021/01/17 21:12:47 by gbudau ### ########.fr */
/* */
/* ************************************************************************** */
#ifndef LEXER_H
# define LEXER_H
# include "libft.h"
# define OPERATORS "|;><"
# define QUOTES "\'\"\\"
enum e_special_chars
{
CHAR_QUOTE = '\'',
CHAR_DQUOTE = '\"',
CHAR_BACKSLASH = '\\',
CHAR_DOLLAR_SIGN = '$'
};
/*
** Enumeration of tokens that can be an OPERATOR or a WORD token
** TOKEN_WORD is a command, command argument, file or directory names
*/
typedef enum e_tokentype
{
TOKEN_PIPE,
TOKEN_SEMICOLON,
TOKEN_SINGLE_QUOTE,
TOKEN_DOUBLE_QUOTE,
TOKEN_GREAT,
TOKEN_LESS,
TOKEN_DGREAT,
TOKEN_WORD
} t_tokentype;
/*
** State machine for quotes and backslash escape in WORD tokens
*/
typedef enum e_scan_state
{
STATE_GENERAL,
STATE_IN_QUOTE,
STATE_IN_DQUOTE,
STATE_BACKSLASH
} t_scan_state;
enum e_lexer_errors
{
NO_LEXER_ERR,
ERR_INCOMPLETE_QUOTE,
ERR_INCOMPLETE_DQUOTE,
ERR_INCOMPLETE_BACKSLASH,
ERR_UNEXPECTED_TOKEN_DOUBLE_SEMICOLON
};
/*
** Struct containing the type of the token (OPERATORS or WORD)
** And the token itself
*/
typedef struct s_token
{
t_tokentype type;
char *str;
} t_token;
/*
** Variables used for splitting the line into tokens
** *tokens = list of tokens for the current line
** *start = start of the token
** *current = current character that is being analyzed
** state = current state of the WORD token
** error = syntax errors while lexing (e_lexer_errors)
*/
typedef struct s_scanner
{
t_list *tokens;
char *start;
char *current;
t_scan_state state;
int error;
} t_scanner;
t_list *tokenize(char *line, int *last_status);
int advance_word(t_scanner *scanner);
void get_state(t_scanner *scanner);
int is_at_end(t_scanner *scanner);
int is_backslash(char c);
int is_general_delimiter(char c);
int is_dquote_backslash_special(char c);
void regress(t_scanner *scanner);
char advance(t_scanner *scanner);
char peek(t_scanner *scanner);
char peek_next(t_scanner *scanner);
void skip_space(t_scanner *scanner);
int match(char expected, t_scanner *scanner);
void clear_token(void *node);
void print_tokens(t_list *tokens);
#endif