From 4ec28e62ffe7596942dfa7aabc7892a3edca199d Mon Sep 17 00:00:00 2001 From: Alexander Klauer Date: Sun, 11 Nov 2018 15:27:03 +0100 Subject: [PATCH 01/12] Bugfix: use delete on memory obtained with new hashtab.cpp: do not use yaep_free() on memory obtained with operator new(). --- src/hashtab.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/hashtab.cpp b/src/hashtab.cpp index 33c041f..03c407a 100644 --- a/src/hashtab.cpp +++ b/src/hashtab.cpp @@ -151,7 +151,8 @@ hash_table::expand_hash_table (void) } yaep_free (alloc, entries); *this = (*new_htab); - yaep_free (new_htab->alloc, new_htab); + new_htab->entries = nullptr; + delete new_htab; } /* The following variable is used for debugging. Its value is number From 2bdd468fb9b3433a47260676921057bb62a3ffcb Mon Sep 17 00:00:00 2001 From: Alexander Klauer Date: Sun, 11 Nov 2018 15:28:33 +0100 Subject: [PATCH 02/12] sgramm.y: switch bison internals to reentrant mode Required for #12. --- src/sgramm.y | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/src/sgramm.y b/src/sgramm.y index cb27f43..984d22e 100644 --- a/src/sgramm.y +++ b/src/sgramm.y @@ -112,11 +112,14 @@ static char *slhs; /* Forward declarations. */ extern int yyerror (const char *str); -extern int yylex (void); +union YYSTYPE; +extern int yylex (union YYSTYPE *lvalp); extern int yyparse (void); %} +%define api.pure full + %union { void *ref; @@ -269,7 +272,7 @@ static int nsterm, nsrule; /* The following implements lexical analyzer for yacc code. */ int -yylex (void) +yylex (YYSTYPE *lvalp) { int c; int n_errs = 0; @@ -320,13 +323,13 @@ yylex (void) return c; case '\'': OS_TOP_ADD_BYTE (stoks, '\''); - yylval.num = *curr_ch++; - OS_TOP_ADD_BYTE (stoks, yylval.num); + lvalp->num = *curr_ch++; + OS_TOP_ADD_BYTE (stoks, lvalp->num); if (*curr_ch++ != '\'') yyerror ("invalid character"); OS_TOP_ADD_BYTE (stoks, '\''); OS_TOP_ADD_BYTE (stoks, '\0'); - yylval.ref = OS_TOP_BEGIN (stoks); + lvalp->ref = OS_TOP_BEGIN (stoks); OS_TOP_FINISH (stoks); return CHAR; default: @@ -337,8 +340,8 @@ yylex (void) OS_TOP_ADD_BYTE (stoks, c); curr_ch--; OS_TOP_ADD_BYTE (stoks, '\0'); - yylval.ref = OS_TOP_BEGIN (stoks); - if (strcmp ((char *) yylval.ref, "TERM") == 0) + lvalp->ref = OS_TOP_BEGIN (stoks); + if (strcmp ((char *) lvalp->ref, "TERM") == 0) { OS_TOP_NULLIFY (stoks); return TERM; @@ -355,9 +358,9 @@ yylex (void) } else if (isdigit (c)) { - yylval.num = c - '0'; + lvalp->num = c - '0'; while ((c = *curr_ch++) != '\0' && isdigit (c)) - yylval.num = yylval.num * 10 + (c - '0'); + lvalp->num = lvalp->num * 10 + (c - '0'); curr_ch--; return NUMBER; } From 4ae33af5aecb72151b2f80c8d753938e99a9d75a Mon Sep 17 00:00:00 2001 From: Alexander Klauer Date: Sun, 11 Nov 2018 17:22:09 +0100 Subject: [PATCH 03/12] Allow setting of user pointer in grammar Required for #12. --- src/yaep.c | 33 +++++++++++++++++++++++++++++++++ src/yaep.cpp | 12 ++++++++++++ src/yaep.h | 12 ++++++++++++ 3 files changed, 57 insertions(+) diff --git a/src/yaep.c b/src/yaep.c index 3719017..a77a382 100644 --- a/src/yaep.c +++ b/src/yaep.c @@ -185,6 +185,8 @@ struct grammar struct term_sets *term_sets_ptr; /* Allocator. */ YaepAllocator *alloc; + /* User pointer */ + void *userptr; }; /* The following variable value is the reference for the current @@ -3049,9 +3051,40 @@ yaep_create_grammar (void) grammar->symbs_ptr = symbs_ptr = symb_init (); grammar->term_sets_ptr = term_sets_ptr = term_set_init (); grammar->rules_ptr = rules_ptr = rule_init (); + grammar->userptr = NULL; return grammar; } +/* The following function sets the user pointer for a grammar. */ +#ifdef __cplusplus +static +#endif +void +yaep_grammar_setuserptr (struct grammar *g, void *userptr) +{ + if (g != NULL) + { + g->userptr = userptr; + } +} + +/* The following function retrieves the user pointer of a grammar. */ +#ifdef __cplusplus +static +#endif +void * +yaep_grammar_getuserptr (struct grammar *g) +{ + if (g == NULL) + { + return NULL; + } + else + { + return g->userptr; + } +} + /* The following function makes grammar empty. */ static void yaep_empty_grammar (void) diff --git a/src/yaep.cpp b/src/yaep.cpp index 7182059..e7c0091 100644 --- a/src/yaep.cpp +++ b/src/yaep.cpp @@ -135,6 +135,18 @@ yaep::set_recovery_match (int n_toks) return yaep_set_recovery_match (this->grammar, n_toks); } +void +yaep::setuserptr (void *userptr) noexcept +{ + yaep_grammar_setuserptr (this->grammar, userptr); +} + +void * +yaep::getuserptr () const noexcept +{ + return yaep_grammar_getuserptr (this->grammar); +} + int yaep::parse (int (*read_token) (void **attr), void (*syntax_error) (int err_tok_num, diff --git a/src/yaep.h b/src/yaep.h index da92d39..6c89c9a 100644 --- a/src/yaep.h +++ b/src/yaep.h @@ -155,6 +155,16 @@ struct yaep_tree_node the first. */ extern struct grammar *yaep_create_grammar (void); +/* The following function stores a user-defined pointer + in the given grammar. */ +extern void yaep_grammar_setuserptr (struct grammar *g, void *userptr); + +/* The following function retrieves a user-defined pointer + previously set with yaep_grammar_setuserptr() from + the given grammar. If no user pointer has been set, + a null pointer is returned. */ +extern void *yaep_grammar_getuserptr (struct grammar *g); + /* The function returns the last occurred error code for given grammar. */ extern int yaep_error_code (struct grammar *g); @@ -353,6 +363,8 @@ class yaep int set_cost_flag (int flag); int set_error_recovery_flag (int flag); int set_recovery_match (int n_toks); + void setuserptr (void *userptr) noexcept; + void *getuserptr () const noexcept; /* See comments for function yaep_parse. */ int parse (int (*read_token) (void **attr), From 737acae240f8de8b21c9ed3a9db71709c64c1013 Mon Sep 17 00:00:00 2001 From: Alexander Klauer Date: Sun, 11 Nov 2018 18:05:42 +0100 Subject: [PATCH 04/12] Provide grammar to read_terminal() and read_rule() Hack for #12 while using the current API. --- src/yaep.c | 26 ++++++++++++++------------ src/yaep.h | 25 +++++++++++++++++++++++++ 2 files changed, 39 insertions(+), 12 deletions(-) diff --git a/src/yaep.c b/src/yaep.c index a77a382..cd1f2b0 100644 --- a/src/yaep.c +++ b/src/yaep.c @@ -3359,34 +3359,36 @@ yaep_read_grammar (struct grammar *g, int strict_p, const char *name, *lhs, **rhs, *anode; struct symb *symb, *start; struct rule *rule; - int anode_cost; int *transl; - int i, el, code; + int i, el; + struct _yaep_reentrant_hack anode_cost, code; assert (g != NULL); + anode_cost.grammar = g; + code.grammar = g; grammar = g; symbs_ptr = g->symbs_ptr; term_sets_ptr = g->term_sets_ptr; rules_ptr = g->rules_ptr; - if ((code = setjmp (error_longjump_buff)) != 0) + if ((code.value = setjmp (error_longjump_buff)) != 0) { - return code; + return code.value; } if (!grammar->undefined_p) yaep_empty_grammar (); - while ((name = (*read_terminal) (&code)) != NULL) + while ((name = (*read_terminal) (&code.value)) != NULL) { - if (code < 0) + if (code.value < 0) yaep_error (YAEP_NEGATIVE_TERM_CODE, "term `%s' has negative code", name); symb = symb_find_by_repr (name); if (symb != NULL) yaep_error (YAEP_REPEATED_TERM_DECL, "repeated declaration of term `%s'", name); - if (symb_find_by_code (code) != NULL) + if (symb_find_by_code (code.value) != NULL) yaep_error (YAEP_REPEATED_TERM_CODE, - "repeated code %d in term `%s'", code, name); - symb_add_term (name, code); + "repeated code %d in term `%s'", code.value, name); + symb_add_term (name, code.value); } /* Adding error symbol. */ @@ -3398,7 +3400,7 @@ yaep_read_grammar (struct grammar *g, int strict_p, grammar->term_error = symb_add_term (TERM_ERROR_NAME, TERM_ERROR_CODE); grammar->term_error_num = grammar->term_error->u.term.term_num; grammar->axiom = grammar->end_marker = NULL; - while ((lhs = (*read_rule) (&rhs, &anode, &anode_cost, &transl)) != NULL) + while ((lhs = (*read_rule) (&rhs, &anode, &anode_cost.value, &transl)) != NULL) { symb = symb_find_by_repr (lhs); if (symb == NULL) @@ -3409,7 +3411,7 @@ yaep_read_grammar (struct grammar *g, int strict_p, if (anode == NULL && transl != NULL && *transl >= 0 && transl[1] >= 0) yaep_error (YAEP_INCORRECT_TRANSLATION, "rule for `%s' has incorrect translation", lhs); - if (anode != NULL && anode_cost < 0) + if (anode != NULL && anode_cost.value < 0) yaep_error (YAEP_NEGATIVE_COST, "translation for `%s' has negative cost", lhs); if (grammar->axiom == NULL) @@ -3439,7 +3441,7 @@ yaep_read_grammar (struct grammar *g, int strict_p, rule->order[0] = 0; rule->trans_len = 1; } - rule = rule_new_start (symb, anode, (anode != NULL ? anode_cost : 0)); + rule = rule_new_start (symb, anode, (anode != NULL ? anode_cost.value : 0)); while (*rhs != NULL) { symb = symb_find_by_repr (*rhs); diff --git a/src/yaep.h b/src/yaep.h index 6c89c9a..3bb9a64 100644 --- a/src/yaep.h +++ b/src/yaep.h @@ -33,6 +33,7 @@ #define __YAEP__ #include +#include /* The following is a forward declaration of grammar formed by function yaep_read_grammar. */ @@ -148,6 +149,30 @@ struct yaep_tree_node } val; }; +/* The following structure is used to work around a limitation of + yaep_read_grammar(). The read_terminal() and read_rule() functions + passed to yaep_read_grammar() cannot take a user-defined argument, + but this is required for reentrant operation. + The long-term solution would be to expand the API of yaep (FIXME). + When sticking to the old API, however, we hijack the pointer-to-int + parameters to slip through the grammar as additional information. + This structure is for internal use only. + Use the yaep_reentrant_hack_grammar() macro to retrieve the grammar. */ +struct _yaep_reentrant_hack +{ + int value; + struct grammar *grammar; +}; + +/* The following macro retrieves the grammar from a pointer-to-int argument. + It can only be applied to arguments to the code parameter of the + read_terminal() parameter, and to the anode_cost parameter of the + read_rule() parameter of yaep_read_grammar(). */ +#define yaep_reentrant_hack_grammar(x) \ + (((struct _yaep_reentrant_hack *) \ + (((char *) (x)) - offsetof (struct _yaep_reentrant_hack, value))) \ + ->grammar) + #ifndef __cplusplus /* The following function creates undefined grammar. The function From 4271228c46284d1add6fded57c2f203638c44d14 Mon Sep 17 00:00:00 2001 From: Alexander Klauer Date: Sun, 11 Nov 2018 18:56:24 +0100 Subject: [PATCH 05/12] sgramm.y: introduce per-instance parser_data As a test, eliminate the static global variable anode_cost. Required for #12. --- src/sgramm.y | 46 ++++++++++++++++++++++++++-------------------- 1 file changed, 26 insertions(+), 20 deletions(-) diff --git a/src/sgramm.y b/src/sgramm.y index 984d22e..3334d1e 100644 --- a/src/sgramm.y +++ b/src/sgramm.y @@ -87,6 +87,14 @@ struct srule int *trans; }; +/* The following structure contains the parser data. */ +struct parser_data +{ + /* The following is cost of the last translation which contains an + abstract node. */ + int anode_cost; +}; + /* The following vlos contain all syntax terminal and syntax rule structures. */ #ifndef __cplusplus @@ -103,22 +111,19 @@ static os_t srhs, strans; static os_t *srhs, *strans; #endif -/* The following is cost of the last translation which contains an - abstract node. */ -static int anode_cost; - /* This variable is used in yacc action to process alternatives. */ static char *slhs; /* Forward declarations. */ -extern int yyerror (const char *str); +extern int yyerror (struct parser_data *data, const char *str); union YYSTYPE; -extern int yylex (union YYSTYPE *lvalp); -extern int yyparse (void); +extern int yylex (union YYSTYPE *lvalp, struct parser_data *data); +extern int yyparse (struct parser_data *data); %} %define api.pure full +%param {struct parser_data *data} %union { @@ -176,7 +181,7 @@ alt : seq trans OS_TOP_ADD_MEMORY (strans, &end_marker, sizeof (int)); rule.lhs = slhs; rule.anode = (char *) $2; - rule.anode_cost = (rule.anode == NULL ? 0 : anode_cost); + rule.anode_cost = (rule.anode == NULL ? 0 : data->anode_cost); rule.rhs_len = OS_TOP_LENGTH (srhs) / sizeof (char *); OS_TOP_EXPAND (srhs, sizeof (char *)); rule.rhs = (char **) OS_TOP_BEGIN (srhs); @@ -248,8 +253,8 @@ numbers : } ; -cost : { anode_cost = 1;} - | NUMBER { anode_cost = $1; } +cost : { data->anode_cost = 1;} + | NUMBER { data->anode_cost = $1; } ; %% /* The following is current input character of the grammar @@ -272,7 +277,7 @@ static int nsterm, nsrule; /* The following implements lexical analyzer for yacc code. */ int -yylex (YYSTYPE *lvalp) +yylex (YYSTYPE *lvalp, struct parser_data *data) { int c; int n_errs = 0; @@ -295,13 +300,13 @@ yylex (YYSTYPE *lvalp) { n_errs++; curr_ch--; - yyerror ("invalid input character /"); + yyerror (data, "invalid input character /"); } for (;;) { c = *curr_ch++; if (c == '\0') - yyerror ("unfinished comment"); + yyerror (data, "unfinished comment"); if (c == '\n') ln++; if (c == '*') @@ -326,7 +331,7 @@ yylex (YYSTYPE *lvalp) lvalp->num = *curr_ch++; OS_TOP_ADD_BYTE (stoks, lvalp->num); if (*curr_ch++ != '\'') - yyerror ("invalid character"); + yyerror (data, "invalid character"); OS_TOP_ADD_BYTE (stoks, '\''); OS_TOP_ADD_BYTE (stoks, '\0'); lvalp->ref = OS_TOP_BEGIN (stoks); @@ -374,10 +379,10 @@ yylex (YYSTYPE *lvalp) if (isprint (c)) { sprintf (str, "invalid input character '%c'", c); - yyerror (str); + yyerror (data, str); } else - yyerror ("invalid input character"); + yyerror (data, "invalid input character"); } } } @@ -388,7 +393,7 @@ yylex (YYSTYPE *lvalp) /* The following implements syntactic error diagnostic function yacc code. */ int -yyerror (const char *str) +yyerror (struct parser_data *data, const char *str) { yaep_error (YAEP_DESCRIPTION_SYNTAX_ERROR_CODE, "description syntax error on ln %d", ln); @@ -416,7 +421,7 @@ static void free_sgrammar (void); /* The following is major function which parses the description and transforms it into IR. */ static int -set_sgrammar (struct grammar *g, const char *grammar) +set_sgrammar (struct grammar *g, const char *grammar, struct parser_data *data) { int i, j, num; struct sterm *term, *prev, *arr; @@ -434,7 +439,7 @@ set_sgrammar (struct grammar *g, const char *grammar) OS_CREATE (srhs, g->alloc, 0); OS_CREATE (strans, g->alloc, 0); curr_ch = grammar; - yyparse (); + yyparse (data); /* sort array of syntax terminals by names. */ num = VLO_LENGTH (sterms) / sizeof (struct sterm); arr = (struct sterm *) VLO_BEGIN (sterms); @@ -532,9 +537,10 @@ static yaep_parse_grammar (struct grammar *g, int strict_p, const char *description) { int code; + struct parser_data data; assert (g != NULL); - if ((code = set_sgrammar (g, description)) != 0) + if ((code = set_sgrammar (g, description, &data)) != 0) return code; code = yaep_read_grammar (g, strict_p, sread_terminal, sread_rule); free_sgrammar (); From 775826eec5bf7a2b57c37fedfd14815656c7fd5e Mon Sep 17 00:00:00 2001 From: Alexander Klauer Date: Sun, 11 Nov 2018 19:29:11 +0100 Subject: [PATCH 06/12] sgramm.y: eliminate easy global variables Eliminate all global variables not needed in sread_terminal() or sread_rule(). Required for #12. --- src/sgramm.y | 160 +++++++++++++++++++++++++-------------------------- 1 file changed, 78 insertions(+), 82 deletions(-) diff --git a/src/sgramm.y b/src/sgramm.y index 3334d1e..2ae7522 100644 --- a/src/sgramm.y +++ b/src/sgramm.y @@ -90,9 +90,29 @@ struct srule /* The following structure contains the parser data. */ struct parser_data { + /* The following contain all right hand sides and translations arrays. + See members rhs, trans in structure `rule'. */ +#ifndef __cplusplus + os_t srhs, strans; +#else + os_t *srhs, *strans; +#endif /* The following is cost of the last translation which contains an abstract node. */ int anode_cost; + /* This variable is used in yacc action to process alternatives. */ + char *slhs; + /* The following is current input character of the grammar + description. */ + const char *curr_ch; + /* The following is current line number of the grammar description. */ + int ln; + /* The following contains all representation of the syntax tokens. */ +#ifndef __cplusplus + os_t stoks; +#else + os_t *stoks; +#endif }; /* The following vlos contain all syntax terminal and syntax rule @@ -103,17 +123,6 @@ static vlo_t sterms, srules; static vlo_t *sterms, *srules; #endif -/* The following contain all right hand sides and translations arrays. - See members rhs, trans in structure `rule'. */ -#ifndef __cplusplus -static os_t srhs, strans; -#else -static os_t *srhs, *strans; -#endif - -/* This variable is used in yacc action to process alternatives. */ -static char *slhs; - /* Forward declarations. */ extern int yyerror (struct parser_data *data, const char *str); union YYSTYPE; @@ -166,7 +175,7 @@ number : {$$ = -1;} | '=' NUMBER {$$ = $2;} ; -rule : SEM_IDENT {slhs = (char *) $1;} rhs opt_sem +rule : SEM_IDENT {data->slhs = (char *) $1;} rhs opt_sem ; rhs : rhs '|' alt @@ -178,17 +187,17 @@ alt : seq trans struct srule rule; int end_marker = -1; - OS_TOP_ADD_MEMORY (strans, &end_marker, sizeof (int)); - rule.lhs = slhs; + OS_TOP_ADD_MEMORY (data->strans, &end_marker, sizeof (int)); + rule.lhs = data->slhs; rule.anode = (char *) $2; rule.anode_cost = (rule.anode == NULL ? 0 : data->anode_cost); - rule.rhs_len = OS_TOP_LENGTH (srhs) / sizeof (char *); - OS_TOP_EXPAND (srhs, sizeof (char *)); - rule.rhs = (char **) OS_TOP_BEGIN (srhs); + rule.rhs_len = OS_TOP_LENGTH (data->srhs) / sizeof (char *); + OS_TOP_EXPAND (data->srhs, sizeof (char *)); + rule.rhs = (char **) OS_TOP_BEGIN (data->srhs); rule.rhs [rule.rhs_len] = NULL; - OS_TOP_FINISH (srhs); - rule.trans = (int *) OS_TOP_BEGIN (strans); - OS_TOP_FINISH (strans); + OS_TOP_FINISH (data->srhs); + rule.trans = (int *) OS_TOP_BEGIN (data->strans); + OS_TOP_FINISH (data->strans); VLO_ADD_MEMORY (srules, &rule, sizeof (rule)); } ; @@ -197,7 +206,7 @@ seq : seq IDENT { char *repr = (char *) $2; - OS_TOP_ADD_MEMORY (srhs, &repr, sizeof (repr)); + OS_TOP_ADD_MEMORY (data->srhs, &repr, sizeof (repr)); } | seq CHAR { @@ -207,7 +216,7 @@ seq : seq IDENT term.code = term.repr [1]; term.num = VLO_LENGTH (sterms) / sizeof (term); VLO_ADD_MEMORY (sterms, &term, sizeof (term)); - OS_TOP_ADD_MEMORY (srhs, &term.repr, sizeof (term.repr)); + OS_TOP_ADD_MEMORY (data->srhs, &term.repr, sizeof (term.repr)); } | ; @@ -219,14 +228,14 @@ trans : {$$ = NULL;} int symb_num = $2; $$ = NULL; - OS_TOP_ADD_MEMORY (strans, &symb_num, sizeof (int)); + OS_TOP_ADD_MEMORY (data->strans, &symb_num, sizeof (int)); } | '#' '-' { int symb_num = YAEP_NIL_TRANSLATION_NUMBER; $$ = NULL; - OS_TOP_ADD_MEMORY (strans, &symb_num, sizeof (int)); + OS_TOP_ADD_MEMORY (data->strans, &symb_num, sizeof (int)); } | '#' IDENT cost '(' numbers ')' { @@ -243,13 +252,13 @@ numbers : { int symb_num = $2; - OS_TOP_ADD_MEMORY (strans, &symb_num, sizeof (int)); + OS_TOP_ADD_MEMORY (data->strans, &symb_num, sizeof (int)); } | numbers '-' { int symb_num = YAEP_NIL_TRANSLATION_NUMBER; - OS_TOP_ADD_MEMORY (strans, &symb_num, sizeof (int)); + OS_TOP_ADD_MEMORY (data->strans, &symb_num, sizeof (int)); } ; @@ -257,20 +266,6 @@ cost : { data->anode_cost = 1;} | NUMBER { data->anode_cost = $1; } ; %% -/* The following is current input character of the grammar - description. */ -static const char *curr_ch; - -/* The following is current line number of the grammar description. */ -static int ln; - -/* The following contains all representation of the syntax tokens. */ -#ifndef __cplusplus -static os_t stoks; -#else -static os_t *stoks; -#endif - /* The following is number of syntax terminal and syntax rules being read. */ static int nsterm, nsrule; @@ -284,37 +279,37 @@ yylex (YYSTYPE *lvalp, struct parser_data *data) for (;;) { - c = *curr_ch++; + c = *data->curr_ch++; switch (c) { case '\0': return 0; case '\n': - ln++; + data->ln++; case '\t': case ' ': break; case '/': - c = *curr_ch++; + c = *data->curr_ch++; if (c != '*' && n_errs == 0) { n_errs++; - curr_ch--; + data->curr_ch--; yyerror (data, "invalid input character /"); } for (;;) { - c = *curr_ch++; + c = *data->curr_ch++; if (c == '\0') yyerror (data, "unfinished comment"); if (c == '\n') - ln++; + data->ln++; if (c == '*') { - c = *curr_ch++; + c = *data->curr_ch++; if (c == '/') break; - curr_ch--; + data->curr_ch--; } } break; @@ -327,46 +322,47 @@ yylex (YYSTYPE *lvalp, struct parser_data *data) case ')': return c; case '\'': - OS_TOP_ADD_BYTE (stoks, '\''); - lvalp->num = *curr_ch++; - OS_TOP_ADD_BYTE (stoks, lvalp->num); - if (*curr_ch++ != '\'') + OS_TOP_ADD_BYTE (data->stoks, '\''); + lvalp->num = *data->curr_ch++; + OS_TOP_ADD_BYTE (data->stoks, lvalp->num); + if (*data->curr_ch++ != '\'') yyerror (data, "invalid character"); - OS_TOP_ADD_BYTE (stoks, '\''); - OS_TOP_ADD_BYTE (stoks, '\0'); - lvalp->ref = OS_TOP_BEGIN (stoks); - OS_TOP_FINISH (stoks); + OS_TOP_ADD_BYTE (data->stoks, '\''); + OS_TOP_ADD_BYTE (data->stoks, '\0'); + lvalp->ref = OS_TOP_BEGIN (data->stoks); + OS_TOP_FINISH (data->stoks); return CHAR; default: if (isalpha (c) || c == '_') { - OS_TOP_ADD_BYTE (stoks, c); - while ((c = *curr_ch++) != '\0' && (isalnum (c) || c == '_')) - OS_TOP_ADD_BYTE (stoks, c); - curr_ch--; - OS_TOP_ADD_BYTE (stoks, '\0'); - lvalp->ref = OS_TOP_BEGIN (stoks); + OS_TOP_ADD_BYTE (data->stoks, c); + while ((c = *data->curr_ch++) != '\0' && (isalnum (c) + || c == '_')) + OS_TOP_ADD_BYTE (data->stoks, c); + data->curr_ch--; + OS_TOP_ADD_BYTE (data->stoks, '\0'); + lvalp->ref = OS_TOP_BEGIN (data->stoks); if (strcmp ((char *) lvalp->ref, "TERM") == 0) { - OS_TOP_NULLIFY (stoks); + OS_TOP_NULLIFY (data->stoks); return TERM; } - OS_TOP_FINISH (stoks); - while ((c = *curr_ch++) != '\0') + OS_TOP_FINISH (data->stoks); + while ((c = *data->curr_ch++) != '\0') if (c == '\n') - ln++; + data->ln++; else if (c != '\t' && c != ' ') break; if (c != ':') - curr_ch--; + data->curr_ch--; return (c == ':' ? SEM_IDENT : IDENT); } else if (isdigit (c)) { lvalp->num = c - '0'; - while ((c = *curr_ch++) != '\0' && isdigit (c)) + while ((c = *data->curr_ch++) != '\0' && isdigit (c)) lvalp->num = lvalp->num * 10 + (c - '0'); - curr_ch--; + data->curr_ch--; return NUMBER; } else @@ -396,7 +392,7 @@ int yyerror (struct parser_data *data, const char *str) { yaep_error (YAEP_DESCRIPTION_SYNTAX_ERROR_CODE, - "description syntax error on ln %d", ln); + "description syntax error on ln %d", data->ln); return 0; } @@ -416,7 +412,7 @@ sterm_num_cmp (const void *t1, const void *t2) return ((struct sterm *) t1)->num - ((struct sterm *) t2)->num; } -static void free_sgrammar (void); +static void free_sgrammar (struct parser_data *data); /* The following is major function which parses the description and transforms it into IR. */ @@ -427,18 +423,18 @@ set_sgrammar (struct grammar *g, const char *grammar, struct parser_data *data) struct sterm *term, *prev, *arr; int code = 256; - ln = 1; + data->ln = 1; if ((code = setjmp (error_longjump_buff)) != 0) { - free_sgrammar (); + free_sgrammar (data); return code; } - OS_CREATE (stoks, g->alloc, 0); + OS_CREATE (data->stoks, g->alloc, 0); VLO_CREATE (sterms, g->alloc, 0); VLO_CREATE (srules, g->alloc, 0); - OS_CREATE (srhs, g->alloc, 0); - OS_CREATE (strans, g->alloc, 0); - curr_ch = grammar; + OS_CREATE (data->srhs, g->alloc, 0); + OS_CREATE (data->strans, g->alloc, 0); + data->curr_ch = grammar; yyparse (data); /* sort array of syntax terminals by names. */ num = VLO_LENGTH (sterms) / sizeof (struct sterm); @@ -485,13 +481,13 @@ set_sgrammar (struct grammar *g, const char *grammar, struct parser_data *data) /* The following frees IR. */ static void -free_sgrammar (void) +free_sgrammar (struct parser_data *data) { - OS_DELETE (strans); - OS_DELETE (srhs); + OS_DELETE (data->strans); + OS_DELETE (data->srhs); VLO_DELETE (srules); VLO_DELETE (sterms); - OS_DELETE (stoks); + OS_DELETE (data->stoks); } /* The following two functions implements functions used by YAEP. */ @@ -543,6 +539,6 @@ yaep_parse_grammar (struct grammar *g, int strict_p, const char *description) if ((code = set_sgrammar (g, description, &data)) != 0) return code; code = yaep_read_grammar (g, strict_p, sread_terminal, sread_rule); - free_sgrammar (); + free_sgrammar (&data); return code; } From e327bd84fd44dd1213deb940e1aa14d83423992e Mon Sep 17 00:00:00 2001 From: Alexander Klauer Date: Sun, 11 Nov 2018 19:53:07 +0100 Subject: [PATCH 07/12] sgramm.y: eliminate all global variables Eliminate remaining global variables used by sread_terminal() and sread_rule() by passing parser data via reentrancy hack. Required for #12. --- src/sgramm.y | 71 +++++++++++++++++++++++++++++----------------------- 1 file changed, 40 insertions(+), 31 deletions(-) diff --git a/src/sgramm.y b/src/sgramm.y index 2ae7522..100c4de 100644 --- a/src/sgramm.y +++ b/src/sgramm.y @@ -90,6 +90,13 @@ struct srule /* The following structure contains the parser data. */ struct parser_data { + /* The following vlos contain all syntax terminal and syntax rule + structures. */ +#ifndef __cplusplus + vlo_t sterms, srules; +#else + vlo_t *sterms, *srules; +#endif /* The following contain all right hand sides and translations arrays. See members rhs, trans in structure `rule'. */ #ifndef __cplusplus @@ -113,16 +120,11 @@ struct parser_data #else os_t *stoks; #endif + /* The following is number of syntax terminal and syntax rules being + read. */ + int nsterm, nsrule; }; -/* The following vlos contain all syntax terminal and syntax rule - structures. */ -#ifndef __cplusplus -static vlo_t sterms, srules; -#else -static vlo_t *sterms, *srules; -#endif - /* Forward declarations. */ extern int yyerror (struct parser_data *data, const char *str); union YYSTYPE; @@ -165,8 +167,8 @@ terms : terms IDENT number term.repr = (char *) $2; term.code = $3; - term.num = VLO_LENGTH (sterms) / sizeof (term); - VLO_ADD_MEMORY (sterms, &term, sizeof (term)); + term.num = VLO_LENGTH (data->sterms) / sizeof (term); + VLO_ADD_MEMORY (data->sterms, &term, sizeof (term)); } | TERM ; @@ -198,7 +200,7 @@ alt : seq trans OS_TOP_FINISH (data->srhs); rule.trans = (int *) OS_TOP_BEGIN (data->strans); OS_TOP_FINISH (data->strans); - VLO_ADD_MEMORY (srules, &rule, sizeof (rule)); + VLO_ADD_MEMORY (data->srules, &rule, sizeof (rule)); } ; @@ -214,8 +216,8 @@ seq : seq IDENT term.repr = (char *) $2; term.code = term.repr [1]; - term.num = VLO_LENGTH (sterms) / sizeof (term); - VLO_ADD_MEMORY (sterms, &term, sizeof (term)); + term.num = VLO_LENGTH (data->sterms) / sizeof (term); + VLO_ADD_MEMORY (data->sterms, &term, sizeof (term)); OS_TOP_ADD_MEMORY (data->srhs, &term.repr, sizeof (term.repr)); } | @@ -266,9 +268,6 @@ cost : { data->anode_cost = 1;} | NUMBER { data->anode_cost = $1; } ; %% -/* The following is number of syntax terminal and syntax rules being - read. */ -static int nsterm, nsrule; /* The following implements lexical analyzer for yacc code. */ int @@ -430,15 +429,15 @@ set_sgrammar (struct grammar *g, const char *grammar, struct parser_data *data) return code; } OS_CREATE (data->stoks, g->alloc, 0); - VLO_CREATE (sterms, g->alloc, 0); - VLO_CREATE (srules, g->alloc, 0); + VLO_CREATE (data->sterms, g->alloc, 0); + VLO_CREATE (data->srules, g->alloc, 0); OS_CREATE (data->srhs, g->alloc, 0); OS_CREATE (data->strans, g->alloc, 0); data->curr_ch = grammar; yyparse (data); /* sort array of syntax terminals by names. */ - num = VLO_LENGTH (sterms) / sizeof (struct sterm); - arr = (struct sterm *) VLO_BEGIN (sterms); + num = VLO_LENGTH (data->sterms) / sizeof (struct sterm); + arr = (struct sterm *) VLO_BEGIN (data->sterms); qsort (arr, num, sizeof (struct sterm), sterm_name_cmp); /* Check different codes for the same syntax terminal and remove duplicates. */ @@ -464,18 +463,18 @@ set_sgrammar (struct grammar *g, const char *grammar, struct parser_data *data) else if (prev->code != -1) prev->code = term->code; } - VLO_SHORTEN (sterms, (num - j) * sizeof (struct sterm)); + VLO_SHORTEN (data->sterms, (num - j) * sizeof (struct sterm)); num = j; /* sort array of syntax terminals by order number. */ qsort (arr, num, sizeof (struct sterm), sterm_num_cmp); /* Assign codes. */ for (i = 0; i < num; i++) { - term = (struct sterm *) VLO_BEGIN (sterms) + i; + term = (struct sterm *) VLO_BEGIN (data->sterms) + i; if (term->code < 0) term->code = code++; } - nsterm = nsrule = 0; + data->nsterm = data->nsrule = 0; return 0; } @@ -485,8 +484,8 @@ free_sgrammar (struct parser_data *data) { OS_DELETE (data->strans); OS_DELETE (data->srhs); - VLO_DELETE (srules); - VLO_DELETE (sterms); + VLO_DELETE (data->srules); + VLO_DELETE (data->sterms); OS_DELETE (data->stoks); } @@ -496,13 +495,16 @@ sread_terminal (int *code) { struct sterm *term; const char *name; + struct parser_data *data; - term = &((struct sterm *) VLO_BEGIN (sterms))[nsterm]; - if ((char *) term >= (char *) VLO_BOUND (sterms)) + data = (struct parser_data *) + yaep_grammar_getuserptr (yaep_reentrant_hack_grammar (code)); + term = &((struct sterm *) VLO_BEGIN (data->sterms))[data->nsterm]; + if ((char *) term >= (char *) VLO_BOUND (data->sterms)) return NULL; *code = term->code; name = term->repr; - nsterm++; + data->nsterm++; return name; } @@ -512,16 +514,19 @@ sread_rule (const char ***rhs, const char **abs_node, int *anode_cost, { struct srule *rule; const char *lhs; + struct parser_data *data; - rule = &((struct srule *) VLO_BEGIN (srules))[nsrule]; - if ((char *) rule >= (char *) VLO_BOUND (srules)) + data = (struct parser_data *) + yaep_grammar_getuserptr (yaep_reentrant_hack_grammar (anode_cost)); + rule = &((struct srule *) VLO_BEGIN (data->srules))[data->nsrule]; + if ((char *) rule >= (char *) VLO_BOUND (data->srules)) return NULL; lhs = rule->lhs; *rhs = (const char **) rule->rhs; *abs_node = rule->anode; *anode_cost = rule->anode_cost; *transl = rule->trans; - nsrule++; + data->nsrule++; return lhs; } @@ -534,11 +539,15 @@ yaep_parse_grammar (struct grammar *g, int strict_p, const char *description) { int code; struct parser_data data; + void *oldptr; assert (g != NULL); if ((code = set_sgrammar (g, description, &data)) != 0) return code; + oldptr = yaep_grammar_getuserptr (g); + yaep_grammar_setuserptr (g, &data); code = yaep_read_grammar (g, strict_p, sread_terminal, sread_rule); + yaep_grammar_setuserptr (g, oldptr); free_sgrammar (&data); return code; } From 384554c686a2fa1eaec168e6d6625a712c9047cd Mon Sep 17 00:00:00 2001 From: Alexander Klauer Date: Sun, 11 Nov 2018 21:03:39 +0100 Subject: [PATCH 08/12] sgramm.y: use old-style directives Make sgramm.y compatible with older bison versions. --- src/sgramm.y | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/sgramm.y b/src/sgramm.y index 100c4de..20d2ab1 100644 --- a/src/sgramm.y +++ b/src/sgramm.y @@ -133,8 +133,9 @@ extern int yyparse (struct parser_data *data); %} -%define api.pure full -%param {struct parser_data *data} +%pure-parser +%lex-param {struct parser_data *data} +%parse-param {struct parser_data *data} %union { From b068e485453162876de93bbc46040030e0cb01d9 Mon Sep 17 00:00:00 2001 From: Alexander Klauer Date: Sun, 18 Nov 2018 16:42:32 +0100 Subject: [PATCH 09/12] ansic.l: switch to reentrant mode --- test/C++/test41.cpp | 9 +- test/C/test41.c | 9 +- test/ansic.h | 11 +- test/ansic.l | 237 ++++++++++++++-------------- test/compare_parsers/test_yaep.c | 11 +- test/compare_parsers/test_yyparse.c | 9 +- 6 files changed, 156 insertions(+), 130 deletions(-) diff --git a/test/C++/test41.cpp b/test/C++/test41.cpp index 16e07b6..0aeeb35 100644 --- a/test/C++/test41.cpp +++ b/test/C++/test41.cpp @@ -125,17 +125,21 @@ static void store_lexs( YaepAllocator * alloc ) { #ifdef DEBUG int nt = 0; #endif + yyscan_t scanner; lexs = new os( alloc, 0 ); list = NULL; prev = NULL; - while ((code = yylex ()) > 0) { + code = yylex_init (&scanner); + assert (code == 0); + while ((code = yylex (scanner)) > 0) { #ifdef DEBUG nt++; #endif if (code == IDENTIFIER) { - lexs->top_add_memory (get_yytext (), strlen (get_yytext ()) + 1); + lexs->top_add_memory + (yyget_text (scanner), strlen (yyget_text (scanner)) + 1); lex.id = (char *) lexs->top_begin (); lexs->top_finish (); } @@ -153,6 +157,7 @@ static void store_lexs( YaepAllocator * alloc ) { } lexs->top_finish (); } + yylex_destroy (scanner); #ifdef DEBUG fprintf (stderr, "%d tokens\n", nt); #endif diff --git a/test/C/test41.c b/test/C/test41.c index 75cc594..03f112f 100644 --- a/test/C/test41.c +++ b/test/C/test41.c @@ -131,17 +131,21 @@ static void store_lexs( YaepAllocator * alloc ) { #ifdef DEBUG int nt = 0; #endif + yyscan_t scanner; OS_CREATE( lexs, alloc, 0 ); list = NULL; prev = NULL; - while ((code = yylex ()) > 0) { + code = yylex_init (&scanner); + assert (code == 0); + while ((code = yylex (scanner)) > 0) { #ifdef DEBUG nt++; #endif if (code == IDENTIFIER) { - OS_TOP_ADD_MEMORY (lexs, yytext, strlen (yytext) + 1); + OS_TOP_ADD_MEMORY + (lexs, yyget_text (scanner), strlen (yyget_text (scanner)) + 1); lex.id = OS_TOP_BEGIN (lexs); OS_TOP_FINISH (lexs); } @@ -159,6 +163,7 @@ static void store_lexs( YaepAllocator * alloc ) { } OS_TOP_FINISH (lexs); } + yylex_destroy (scanner); #ifdef DEBUG fprintf (stderr, "%d tokens\n", nt); #endif diff --git a/test/ansic.h b/test/ansic.h index 5d566ca..1626591 100644 --- a/test/ansic.h +++ b/test/ansic.h @@ -101,7 +101,14 @@ struct lex { extern int column; extern int line; -extern int yylex (void); -extern char *get_yytext (void); +#ifndef YY_TYPEDEF_YY_SCANNER_T +#define YY_TYPEDEF_YY_SCANNER_T +typedef void* yyscan_t; /* FIXME: needed for reentrancy. + Would be better if flex created its own header. */ +#endif +extern int yylex_init (yyscan_t *yyscanner); +extern int yylex (yyscan_t yyscanner); +extern int yylex_destroy (yyscan_t yyscanner); +extern char *yyget_text (yyscan_t yyscanner); #endif diff --git a/test/ansic.l b/test/ansic.l index 00e562c..9e7b6b5 100644 --- a/test/ansic.l +++ b/test/ansic.l @@ -24,6 +24,8 @@ */ +%option reentrant + D [0-9] L [a-zA-Z_] H [a-fA-F0-9] @@ -42,146 +44,143 @@ IS (u|U|l|L)* #define ECHO #ifdef __cplusplus -static int yyinput(); +static int yyinput( yyscan_t yyscanner ); #define input yyinput #endif -int yywrap (); -void comment (void); -void count (void); +int yywrap ( yyscan_t yyscanner ); +struct yyguts_t; /* FIXME: internal flex type, needed for the comment() + and count() functions, because these functions call + unput() or use yytext, which is allowed only inside a rule. */ +int comment (yyscan_t yyscanner, struct yyguts_t *yyg); +void count (struct yyguts_t *yyg); int check_type (void); %} %% -"/*" { comment(); } - -"__signed__" { count(); return(SIGNED); } -"__signed" { count(); return(SIGNED); } -"__const" { count(); return(CONST); } -"__const__" { count(); return(CONST); } -"__inline" { count(); return(INLINE); } -"__inline__" { count(); return(INLINE); } - -"auto" { count(); return(AUTO); } -"break" { count(); return(BREAK); } -"case" { count(); return(CASE); } -"char" { count(); return(CHAR); } -"const" { count(); return(CONST); } -"continue" { count(); return(CONTINUE); } -"default" { count(); return(DEFAULT); } -"do" { count(); return(DO); } -"double" { count(); return(DOUBLE); } -"else" { count(); return(ELSE); } -"enum" { count(); return(ENUM); } -"extern" { count(); return(EXTERN); } -"float" { count(); return(FLOAT); } -"for" { count(); return(FOR); } -"goto" { count(); return(GOTO); } -"if" { count(); return(IF); } -"int" { count(); return(INT); } -"long" { count(); return(LONG); } -"register" { count(); return(REGISTER); } -"return" { count(); return(RETURN); } -"short" { count(); return(SHORT); } -"signed" { count(); return(SIGNED); } -"sizeof" { count(); return(SIZEOF); } -"static" { count(); return(STATIC); } -"struct" { count(); return(STRUCT); } -"switch" { count(); return(SWITCH); } -"typedef" { count(); return(TYPEDEF); } -"union" { count(); return(UNION); } -"unsigned" { count(); return(UNSIGNED); } -"void" { count(); return(VOID); } -"volatile" { count(); return(VOLATILE); } -"while" { count(); return(WHILE); } - -{L}({L}|{D})* { count(); return(check_type()); } - -0[xX]{H}+{IS}? { count(); return(CONSTANT); } -0{D}+{IS}? { count(); return(CONSTANT); } -{D}+{IS}? { count(); return(CONSTANT); } -'(\\.|[^\\'])+' { count(); return(CONSTANT); } - -{D}+{E}{FS}? { count(); return(CONSTANT); } -{D}*"."{D}+({E})?{FS}? { count(); return(CONSTANT); } -{D}+"."{D}*({E})?{FS}? { count(); return(CONSTANT); } - -\"(\\.|[^\\"])*\" { count(); return(STRING_LITERAL); } - -">>=" { count(); return(RIGHT_ASSIGN); } -"<<=" { count(); return(LEFT_ASSIGN); } -"+=" { count(); return(ADD_ASSIGN); } -"-=" { count(); return(SUB_ASSIGN); } -"*=" { count(); return(MUL_ASSIGN); } -"/=" { count(); return(DIV_ASSIGN); } -"%=" { count(); return(MOD_ASSIGN); } -"&=" { count(); return(AND_ASSIGN); } -"^=" { count(); return(XOR_ASSIGN); } -"|=" { count(); return(OR_ASSIGN); } -">>" { count(); return(RIGHT_OP); } -"<<" { count(); return(LEFT_OP); } -"++" { count(); return(INC_OP); } -"--" { count(); return(DEC_OP); } -"->" { count(); return(PTR_OP); } -"&&" { count(); return(AND_OP); } -"||" { count(); return(OR_OP); } -"<=" { count(); return(LE_OP); } -">=" { count(); return(GE_OP); } -"==" { count(); return(EQ_OP); } -"!=" { count(); return(NE_OP); } -";" { count(); return(';'); } -"{" { count(); return('{'); } -"}" { count(); return('}'); } -"," { count(); return(','); } -":" { count(); return(':'); } -"=" { count(); return('='); } -"(" { count(); return('('); } -")" { count(); return(')'); } -"[" { count(); return('['); } -"]" { count(); return(']'); } -"..." { count(); return(ELIPSIS); } -"." { count(); return('.'); } -"&" { count(); return('&'); } -"!" { count(); return('!'); } -"~" { count(); return('~'); } -"-" { count(); return('-'); } -"+" { count(); return('+'); } -"*" { count(); return('*'); } -"/" { count(); return('/'); } -"%" { count(); return('%'); } -"<" { count(); return('<'); } -">" { count(); return('>'); } -"^" { count(); return('^'); } -"|" { count(); return('|'); } -"?" { count(); return('?'); } - -[ \t\v\n\f] { count(); } +"/*" { comment (yyscanner, yyg); } + +"__signed__" { count (yyg); return(SIGNED); } +"__signed" { count (yyg); return(SIGNED); } +"__const" { count (yyg); return(CONST); } +"__const__" { count (yyg); return(CONST); } +"__inline" { count (yyg); return(INLINE); } +"__inline__" { count (yyg); return(INLINE); } + +"auto" { count (yyg); return(AUTO); } +"break" { count (yyg); return(BREAK); } +"case" { count (yyg); return(CASE); } +"char" { count (yyg); return(CHAR); } +"const" { count (yyg); return(CONST); } +"continue" { count (yyg); return(CONTINUE); } +"default" { count (yyg); return(DEFAULT); } +"do" { count (yyg); return(DO); } +"double" { count (yyg); return(DOUBLE); } +"else" { count (yyg); return(ELSE); } +"enum" { count (yyg); return(ENUM); } +"extern" { count (yyg); return(EXTERN); } +"float" { count (yyg); return(FLOAT); } +"for" { count (yyg); return(FOR); } +"goto" { count (yyg); return(GOTO); } +"if" { count (yyg); return(IF); } +"int" { count (yyg); return(INT); } +"long" { count (yyg); return(LONG); } +"register" { count (yyg); return(REGISTER); } +"return" { count (yyg); return(RETURN); } +"short" { count (yyg); return(SHORT); } +"signed" { count (yyg); return(SIGNED); } +"sizeof" { count (yyg); return(SIZEOF); } +"static" { count (yyg); return(STATIC); } +"struct" { count (yyg); return(STRUCT); } +"switch" { count (yyg); return(SWITCH); } +"typedef" { count (yyg); return(TYPEDEF); } +"union" { count (yyg); return(UNION); } +"unsigned" { count (yyg); return(UNSIGNED); } +"void" { count (yyg); return(VOID); } +"volatile" { count (yyg); return(VOLATILE); } +"while" { count (yyg); return(WHILE); } + +{L}({L}|{D})* { count (yyg); return(check_type()); } + +0[xX]{H}+{IS}? { count (yyg); return(CONSTANT); } +0{D}+{IS}? { count (yyg); return(CONSTANT); } +{D}+{IS}? { count (yyg); return(CONSTANT); } +'(\\.|[^\\'])+' { count (yyg); return(CONSTANT); } + +{D}+{E}{FS}? { count (yyg); return(CONSTANT); } +{D}*"."{D}+({E})?{FS}? { count (yyg); return(CONSTANT); } +{D}+"."{D}*({E})?{FS}? { count (yyg); return(CONSTANT); } + +\"(\\.|[^\\"])*\" { count (yyg); return(STRING_LITERAL); } + +">>=" { count (yyg); return(RIGHT_ASSIGN); } +"<<=" { count (yyg); return(LEFT_ASSIGN); } +"+=" { count (yyg); return(ADD_ASSIGN); } +"-=" { count (yyg); return(SUB_ASSIGN); } +"*=" { count (yyg); return(MUL_ASSIGN); } +"/=" { count (yyg); return(DIV_ASSIGN); } +"%=" { count (yyg); return(MOD_ASSIGN); } +"&=" { count (yyg); return(AND_ASSIGN); } +"^=" { count (yyg); return(XOR_ASSIGN); } +"|=" { count (yyg); return(OR_ASSIGN); } +">>" { count (yyg); return(RIGHT_OP); } +"<<" { count (yyg); return(LEFT_OP); } +"++" { count (yyg); return(INC_OP); } +"--" { count (yyg); return(DEC_OP); } +"->" { count (yyg); return(PTR_OP); } +"&&" { count (yyg); return(AND_OP); } +"||" { count (yyg); return(OR_OP); } +"<=" { count (yyg); return(LE_OP); } +">=" { count (yyg); return(GE_OP); } +"==" { count (yyg); return(EQ_OP); } +"!=" { count (yyg); return(NE_OP); } +";" { count (yyg); return(';'); } +"{" { count (yyg); return('{'); } +"}" { count (yyg); return('}'); } +"," { count (yyg); return(','); } +":" { count (yyg); return(':'); } +"=" { count (yyg); return('='); } +"(" { count (yyg); return('('); } +")" { count (yyg); return(')'); } +"[" { count (yyg); return('['); } +"]" { count (yyg); return(']'); } +"..." { count (yyg); return(ELIPSIS); } +"." { count (yyg); return('.'); } +"&" { count (yyg); return('&'); } +"!" { count (yyg); return('!'); } +"~" { count (yyg); return('~'); } +"-" { count (yyg); return('-'); } +"+" { count (yyg); return('+'); } +"*" { count (yyg); return('*'); } +"/" { count (yyg); return('/'); } +"%" { count (yyg); return('%'); } +"<" { count (yyg); return('<'); } +">" { count (yyg); return('>'); } +"^" { count (yyg); return('^'); } +"|" { count (yyg); return('|'); } +"?" { count (yyg); return('?'); } + +[ \t\v\n\f] { count (yyg); } . { /* ignore bad characters */ } %% -char * -get_yytext (void) -{ - return yytext; -} - int -yywrap() +yywrap (yyscan_t unused) { return(1); } -void -comment(void) +int +comment (yyscan_t yyscanner, struct yyguts_t *yyg) { char c, c1; loop: - while ((c = input()) != '*' && c != 0) + while ((c = input (yyscanner)) != '*' && c != 0) /*putchar(c)*/; - if ((c1 = input()) != '/' && c != 0) + if ((c1 = input (yyscanner)) != '/' && c != 0) { unput(c1); goto loop; @@ -192,7 +191,7 @@ loop: } void -count(void) +count (struct yyguts_t *yyg) { int i; diff --git a/test/compare_parsers/test_yaep.c b/test/compare_parsers/test_yaep.c index 2ce2171..12bf2a8 100644 --- a/test/compare_parsers/test_yaep.c +++ b/test/compare_parsers/test_yaep.c @@ -97,7 +97,7 @@ #define _BOOL 6006 #define _COMPLEX 7006 #define _IMAGINARY 8006 - + #include "test_common.c" int @@ -127,17 +127,21 @@ static void store_lexs( YaepAllocator * alloc ) { #ifdef DEBUG int nt = 0; #endif + yyscan_t scanner; OS_CREATE( lexs, alloc, 0 ); list = NULL; prev = NULL; - while ((code = yylex ()) > 0) { + code = yylex_init (&scanner); + assert (code == 0); + while ((code = yylex (scanner)) > 0) { #ifdef DEBUG nt++; #endif if (code == IDENTIFIER) { - OS_TOP_ADD_MEMORY (lexs, yytext, strlen (yytext) + 1); + OS_TOP_ADD_MEMORY + (lexs, yyget_text (scanner), strlen (yyget_text (scanner)) + 1); lex.id = OS_TOP_BEGIN (lexs); OS_TOP_FINISH (lexs); } @@ -155,6 +159,7 @@ static void store_lexs( YaepAllocator * alloc ) { } OS_TOP_FINISH (lexs); } + yylex_destroy (scanner); #ifdef DEBUG fprintf (stderr, "%d tokens\n", nt); #endif diff --git a/test/compare_parsers/test_yyparse.c b/test/compare_parsers/test_yyparse.c index b50b06e..d9d3bd1 100644 --- a/test/compare_parsers/test_yyparse.c +++ b/test/compare_parsers/test_yyparse.c @@ -73,14 +73,18 @@ get_lex (void) static void store_lexs( YaepAllocator * alloc ) { struct lex lex, *prev; int code; + yyscan_t scanner; OS_CREATE( lexs, alloc, 0 ); list = NULL; prev = NULL; - while ((code = yylex ()) > 0) { + code = yylex_init (&scanner); + assert (code == 0); + while ((code = yylex (scanner)) > 0) { if (code == IDENTIFIER) { - OS_TOP_ADD_MEMORY (lexs, yytext, strlen (yytext) + 1); + OS_TOP_ADD_MEMORY + (lexs, yyget_text (scanner), strlen (yyget_text (scanner)) + 1); lex.id = OS_TOP_BEGIN (lexs); OS_TOP_FINISH (lexs); } @@ -98,6 +102,7 @@ static void store_lexs( YaepAllocator * alloc ) { } OS_TOP_FINISH (lexs); } + yylex_destroy (scanner); } main() From 7f24849ff6e1e52a84b51290db689f1e2941bfef Mon Sep 17 00:00:00 2001 From: Alexander Klauer Date: Sun, 18 Nov 2018 17:19:23 +0100 Subject: [PATCH 10/12] ansic.l: eliminate global column and line vars --- test/C++/test41.cpp | 27 +++++++++++---------------- test/C/test41.c | 27 +++++++++++---------------- test/ansic.h | 5 +---- test/ansic.l | 10 ++++++---- test/compare_parsers/test_yaep.c | 8 +++----- test/compare_parsers/test_yyparse.c | 8 +++----- 6 files changed, 35 insertions(+), 50 deletions(-) diff --git a/test/C++/test41.cpp b/test/C++/test41.cpp index 0aeeb35..53940cf 100644 --- a/test/C++/test41.cpp +++ b/test/C++/test41.cpp @@ -43,9 +43,6 @@ static os_t *lexs; static struct lex *list; static struct lex *curr = NULL; -int column = 0; -int line = 1; - static hash_table_t table; static unsigned @@ -103,20 +100,19 @@ int find_typedef (const char *id, int level) } int -get_lex (void) +get_lex (void **attr) { if (curr == NULL) curr = list; else curr = curr->next; if (curr == NULL) - return 0; - line = curr->line; - column = curr->column; - if (curr->code == IDENTIFIER) - return IDENTIFIER; - else - return curr->code; + { + *attr = NULL; + return 0; + } + *attr = (void *) (ptrdiff_t) curr->line; + return curr->code; } static void store_lexs( YaepAllocator * alloc ) { @@ -130,7 +126,9 @@ static void store_lexs( YaepAllocator * alloc ) { lexs = new os( alloc, 0 ); list = NULL; prev = NULL; - code = yylex_init (&scanner); + lex.column = 0; + lex.line = 1; + code = yylex_init_extra (&lex, &scanner); assert (code == 0); while ((code = yylex (scanner)) > 0) { #ifdef DEBUG @@ -146,8 +144,6 @@ static void store_lexs( YaepAllocator * alloc ) { else lex.id = NULL; lex.code = code; - lex.line = line; - lex.column = column; lex.next = NULL; lexs->top_add_memory (&lex, sizeof (lex)); if (prev == NULL) @@ -199,8 +195,7 @@ test_read_token (void **attr) { int code; - *attr = (void *) (ptrdiff_t) line; - code = get_lex (); + code = get_lex (attr); if (code <= 0) return -1; return code; diff --git a/test/C/test41.c b/test/C/test41.c index 03f112f..9a33662 100644 --- a/test/C/test41.c +++ b/test/C/test41.c @@ -45,9 +45,6 @@ static os_t lexs; static struct lex *list; static struct lex *curr = NULL; -int column = 0; -int line = 1; - static hash_table_t table; static unsigned @@ -105,20 +102,19 @@ int find_typedef (const char *id, int level) } int -get_lex (void) +get_lex (void **attr) { if (curr == NULL) curr = list; else curr = curr->next; if (curr == NULL) - return 0; - line = curr->line; - column = curr->column; - if (curr->code == IDENTIFIER) - return IDENTIFIER; - else - return curr->code; + { + *attr = NULL; + return 0; + } + *attr = (void *) (ptrdiff_t) curr->line; + return curr->code; } #define yylex yylex1 @@ -136,7 +132,9 @@ static void store_lexs( YaepAllocator * alloc ) { OS_CREATE( lexs, alloc, 0 ); list = NULL; prev = NULL; - code = yylex_init (&scanner); + lex.column = 0; + lex.line = 1; + code = yylex_init_extra (&lex, &scanner); assert (code == 0); while ((code = yylex (scanner)) > 0) { #ifdef DEBUG @@ -152,8 +150,6 @@ static void store_lexs( YaepAllocator * alloc ) { else lex.id = NULL; lex.code = code; - lex.line = line; - lex.column = column; lex.next = NULL; OS_TOP_ADD_MEMORY (lexs, &lex, sizeof (lex)); if (prev == NULL) @@ -175,8 +171,7 @@ test_read_token_from_lex (void **attr) { int code; - *attr = (void *) (ptrdiff_t) line; - code = get_lex (); + code = get_lex (attr); if (code <= 0) return -1; return code; diff --git a/test/ansic.h b/test/ansic.h index 1626591..ee54f6f 100644 --- a/test/ansic.h +++ b/test/ansic.h @@ -98,15 +98,12 @@ struct lex { struct lex *next; }; -extern int column; -extern int line; - #ifndef YY_TYPEDEF_YY_SCANNER_T #define YY_TYPEDEF_YY_SCANNER_T typedef void* yyscan_t; /* FIXME: needed for reentrancy. Would be better if flex created its own header. */ #endif -extern int yylex_init (yyscan_t *yyscanner); +extern int yylex_init_extra (struct lex *lex, yyscan_t *yyscanner); extern int yylex (yyscan_t yyscanner); extern int yylex_destroy (yyscan_t yyscanner); extern char *yyget_text (yyscan_t yyscanner); diff --git a/test/ansic.l b/test/ansic.l index 9e7b6b5..5c42e60 100644 --- a/test/ansic.l +++ b/test/ansic.l @@ -25,6 +25,7 @@ */ %option reentrant +%option extra-type="struct lex *" D [0-9] L [a-zA-Z_] @@ -194,17 +195,18 @@ void count (struct yyguts_t *yyg) { int i; + struct lex *lex = yyget_extra (yyg); for (i = 0; yytext[i] != '\0'; i++) if (yytext[i] == '\n') { - column = 0; - line++; + lex->column = 0; + lex->line++; } else if (yytext[i] == '\t') - column += 8 - (column % 8); + lex->column += 8 - (lex->column % 8); else - column++; + lex->column++; ECHO; } diff --git a/test/compare_parsers/test_yaep.c b/test/compare_parsers/test_yaep.c index 12bf2a8..468eef2 100644 --- a/test/compare_parsers/test_yaep.c +++ b/test/compare_parsers/test_yaep.c @@ -109,8 +109,6 @@ get_lex (void) curr = curr->next; if (curr == NULL) return 0; - line = curr->line; - column = curr->column; if (curr->code == IDENTIFIER) return IDENTIFIER; else @@ -132,7 +130,9 @@ static void store_lexs( YaepAllocator * alloc ) { OS_CREATE( lexs, alloc, 0 ); list = NULL; prev = NULL; - code = yylex_init (&scanner); + lex.column = 0; + lex.line = 1; + code = yylex_init_extra (&lex, &scanner); assert (code == 0); while ((code = yylex (scanner)) > 0) { #ifdef DEBUG @@ -148,8 +148,6 @@ static void store_lexs( YaepAllocator * alloc ) { else lex.id = NULL; lex.code = code; - lex.line = line; - lex.column = column; lex.next = NULL; OS_TOP_ADD_MEMORY (lexs, &lex, sizeof (lex)); if (prev == NULL) diff --git a/test/compare_parsers/test_yyparse.c b/test/compare_parsers/test_yyparse.c index d9d3bd1..5787c80 100644 --- a/test/compare_parsers/test_yyparse.c +++ b/test/compare_parsers/test_yyparse.c @@ -53,8 +53,6 @@ get_lex (void) curr = curr->next; if (curr == NULL) return 0; - line = curr->line; - column = curr->column; if (curr->code == IDENTIFIER) { yylval = curr->id; @@ -78,7 +76,9 @@ static void store_lexs( YaepAllocator * alloc ) { OS_CREATE( lexs, alloc, 0 ); list = NULL; prev = NULL; - code = yylex_init (&scanner); + lex.column = 0; + lex.line = 1; + code = yylex_init_extra (&lex, &scanner); assert (code == 0); while ((code = yylex (scanner)) > 0) { if (code == IDENTIFIER) @@ -91,8 +91,6 @@ static void store_lexs( YaepAllocator * alloc ) { else lex.id = NULL; lex.code = code; - lex.line = line; - lex.column = column; lex.next = NULL; OS_TOP_ADD_MEMORY (lexs, &lex, sizeof (lex)); if (prev == NULL) From 0cc9e2e74caf4885cfa68c09521d1dcea2089d10 Mon Sep 17 00:00:00 2001 From: Alexander Klauer Date: Sun, 18 Nov 2018 18:00:15 +0100 Subject: [PATCH 11/12] ansic.y: switch to pure parsing --- test/compare_parsers/ansic.y | 2 ++ test/compare_parsers/test_yyparse.c | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/test/compare_parsers/ansic.y b/test/compare_parsers/ansic.y index 0e18a69..07aac6e 100644 --- a/test/compare_parsers/ansic.y +++ b/test/compare_parsers/ansic.y @@ -28,6 +28,8 @@ #define YYSTYPE string_t %} +%pure-parser + %token IDENTIFIER CONSTANT STRING_LITERAL SIZEOF %token PTR_OP INC_OP DEC_OP LEFT_OP RIGHT_OP LE_OP GE_OP EQ_OP NE_OP %token AND_OP OR_OP MUL_ASSIGN DIV_ASSIGN MOD_ASSIGN ADD_ASSIGN diff --git a/test/compare_parsers/test_yyparse.c b/test/compare_parsers/test_yyparse.c index 5787c80..e0d302e 100644 --- a/test/compare_parsers/test_yyparse.c +++ b/test/compare_parsers/test_yyparse.c @@ -45,7 +45,7 @@ static int level = 0; #undef yylex int -get_lex (void) +get_lex (YYSTYPE *lvalp) { if (curr == NULL) curr = list; @@ -55,7 +55,7 @@ get_lex (void) return 0; if (curr->code == IDENTIFIER) { - yylval = curr->id; + *lvalp = curr->id; if (!after_struct_flag && find_typedef (curr->id, level)) return TYPE_NAME; else From 951188c2fe6429acd29cf763d5b8ae60466c40f6 Mon Sep 17 00:00:00 2001 From: Alexander Klauer Date: Sun, 18 Nov 2018 20:04:41 +0100 Subject: [PATCH 12/12] ansic.y: eliminate line and column global vars --- test/compare_parsers/ansic.y | 31 +++++++++++++++++++++++++------ 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/test/compare_parsers/ansic.y b/test/compare_parsers/ansic.y index 07aac6e..1ebccd9 100644 --- a/test/compare_parsers/ansic.y +++ b/test/compare_parsers/ansic.y @@ -26,6 +26,8 @@ %{ #define YYSTYPE string_t + +void yyerror (char *s); %} %pure-parser @@ -470,11 +472,28 @@ identifier #include -extern int column; -extern int line; - -yyerror(s) -char *s; +void +yyerror (char *s) { - fprintf (stderr, "syntax error line - %d, column - %d\n", line, column + 1); + int line, column; + + if (curr == NULL) + { + curr = list; + } + else + { + curr = curr->next; + } + if (curr == NULL) + { + line = -1; + column = -1; + } + else + { + line = curr->line; + column = curr->column; + } + fprintf (stderr, "syntax error line - %d, column - %d\n", line, column + 1); }