Skip to content

Commit

Permalink
fix: only disallow xml, xml-stylesheet, xml-model in PITarget (#3)
Browse files Browse the repository at this point in the history
  • Loading branch information
amaanq authored Aug 9, 2023
1 parent 9fcc73c commit 148ad3d
Show file tree
Hide file tree
Showing 7 changed files with 88 additions and 33 deletions.
64 changes: 47 additions & 17 deletions common/scanner.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,20 @@ enum TokenType {
PIContent,
Comment,
CharData,
XmlModel,
XmlStylesheet,
};

/// Advance the lexer to the next token
static inline void advance(TSLexer *lexer) { lexer->advance(lexer, false); }

static inline bool is_valid_pi_char(int32_t chr) {
return isalnum(chr) || chr == '_' || chr == ':' || chr == '.' ||
chr == '-' || chr == L'·';
}

/// Scan for the target of a PI node
static bool scan_pi_target(TSLexer *lexer) {
static bool scan_pi_target(TSLexer *lexer, const bool *valid_symbols) {
bool advanced_once = false, found_x_first = false;

if (isalpha(lexer->lookahead) || lexer->lookahead == '_') {
Expand All @@ -27,27 +34,50 @@ static bool scan_pi_target(TSLexer *lexer) {
}

if (advanced_once) {
while (isalnum(lexer->lookahead) || lexer->lookahead == '_' ||
lexer->lookahead == ':' || lexer->lookahead == '.' ||
lexer->lookahead == L'·' || lexer->lookahead == '-') {
if (lexer->lookahead == 'x' || lexer->lookahead == 'X') {
lexer->mark_end(lexer);
advance(lexer);
if (lexer->lookahead == 'm' || lexer->lookahead == 'M') {
advance(lexer);
if (lexer->lookahead == 'l' || lexer->lookahead == 'L') {
advance(lexer);
return false;
}
}
}

while (is_valid_pi_char(lexer->lookahead)) {
if (found_x_first &&
(lexer->lookahead == 'm' || lexer->lookahead == 'M')) {
advance(lexer);
if (lexer->lookahead == 'l' || lexer->lookahead == 'L') {
advance(lexer);
return false;
if (is_valid_pi_char(lexer->lookahead)) {
found_x_first = false;
bool last_char_hyphen = lexer->lookahead == '-';
advance(lexer);
if (last_char_hyphen) {
// scan for stylesheet/model and disallow that
if (valid_symbols[XmlModel]) {
const char *const word = "model";
int j = 0;
while (word[j] != '\0') {
if (word[j] != lexer->lookahead) {
break;
}
j++;
advance(lexer);
}
if (word[j] == '\0') {
return false;
}
}
if (valid_symbols[XmlStylesheet]) {
const char *const word = "stylesheet";
int j = 0;
while (word[j] != '\0') {
if (word[j] != lexer->lookahead) {
break;
}
j++;
advance(lexer);
}
if (word[j] == '\0') {
return false;
}
}
}
} else {
return false;
}
}
}

Expand Down
2 changes: 1 addition & 1 deletion tree-sitter-dtd/src/scanner.c
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ static inline bool in_error_recovery(const bool *valid_symbols) {
bool tree_sitter_dtd_external_scanner_scan(void *payload, TSLexer *lexer, const bool *valid_symbols) {
if (in_error_recovery(valid_symbols)) return false;

if (valid_symbols[PITarget]) return scan_pi_target(lexer);
if (valid_symbols[PITarget]) return scan_pi_target(lexer, valid_symbols);

if (valid_symbols[PIContent]) return scan_pi_content(lexer);

Expand Down
16 changes: 11 additions & 5 deletions tree-sitter-dtd/src/tree_sitter/parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,8 @@ extern "C" {
#define ts_builtin_sym_end 0
#define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024

typedef uint16_t TSStateId;

#ifndef TREE_SITTER_API_H_
typedef uint16_t TSStateId;
typedef uint16_t TSSymbol;
typedef uint16_t TSFieldId;
typedef struct TSLanguage TSLanguage;
Expand Down Expand Up @@ -130,9 +129,16 @@ struct TSLanguage {
* Lexer Macros
*/

#ifdef _MSC_VER
#define UNUSED __pragma(warning(suppress : 4101))
#else
#define UNUSED __attribute__((unused))
#endif

#define START_LEXER() \
bool result = false; \
bool skip = false; \
UNUSED \
bool eof = false; \
int32_t lookahead; \
goto start; \
Expand Down Expand Up @@ -166,7 +172,7 @@ struct TSLanguage {
* Parse Table Macros
*/

#define SMALL_STATE(id) id - LARGE_STATE_COUNT
#define SMALL_STATE(id) ((id) - LARGE_STATE_COUNT)

#define STATE(id) id

Expand All @@ -176,15 +182,15 @@ struct TSLanguage {
{{ \
.shift = { \
.type = TSParseActionTypeShift, \
.state = state_value \
.state = (state_value) \
} \
}}

#define SHIFT_REPEAT(state_value) \
{{ \
.shift = { \
.type = TSParseActionTypeShift, \
.state = state_value, \
.state = (state_value), \
.repetition = true \
} \
}}
Expand Down
2 changes: 2 additions & 0 deletions tree-sitter-xml/grammar.js
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ module.exports = grammar(DTD, {
$._pi_content,
$.Comment,
$.CharData,
'xml-model',
'xml-stylesheet',
],

inline: $ => [
Expand Down
19 changes: 15 additions & 4 deletions tree-sitter-xml/src/parser.c

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion tree-sitter-xml/src/scanner.c
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ bool tree_sitter_xml_external_scanner_scan(void *payload, TSLexer *lexer,
const bool *valid_symbols) {
if (in_error_recovery(valid_symbols)) return false;

if (valid_symbols[PITarget]) return scan_pi_target(lexer);
if (valid_symbols[PITarget]) return scan_pi_target(lexer, valid_symbols);

if (valid_symbols[PIContent]) return scan_pi_content(lexer);

Expand Down
16 changes: 11 additions & 5 deletions tree-sitter-xml/src/tree_sitter/parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,8 @@ extern "C" {
#define ts_builtin_sym_end 0
#define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024

typedef uint16_t TSStateId;

#ifndef TREE_SITTER_API_H_
typedef uint16_t TSStateId;
typedef uint16_t TSSymbol;
typedef uint16_t TSFieldId;
typedef struct TSLanguage TSLanguage;
Expand Down Expand Up @@ -130,9 +129,16 @@ struct TSLanguage {
* Lexer Macros
*/

#ifdef _MSC_VER
#define UNUSED __pragma(warning(suppress : 4101))
#else
#define UNUSED __attribute__((unused))
#endif

#define START_LEXER() \
bool result = false; \
bool skip = false; \
UNUSED \
bool eof = false; \
int32_t lookahead; \
goto start; \
Expand Down Expand Up @@ -166,7 +172,7 @@ struct TSLanguage {
* Parse Table Macros
*/

#define SMALL_STATE(id) id - LARGE_STATE_COUNT
#define SMALL_STATE(id) ((id) - LARGE_STATE_COUNT)

#define STATE(id) id

Expand All @@ -176,15 +182,15 @@ struct TSLanguage {
{{ \
.shift = { \
.type = TSParseActionTypeShift, \
.state = state_value \
.state = (state_value) \
} \
}}

#define SHIFT_REPEAT(state_value) \
{{ \
.shift = { \
.type = TSParseActionTypeShift, \
.state = state_value, \
.state = (state_value), \
.repetition = true \
} \
}}
Expand Down

0 comments on commit 148ad3d

Please sign in to comment.