Skip to content

Commit

Permalink
Fix issue #1690
Browse files Browse the repository at this point in the history
Regular expressions with a character class followed by a dash (-) failed with "bad character range". For example,  /[0-9]-2/ was not accepted as a valid regexp.
  • Loading branch information
plusvic committed Apr 25, 2022
1 parent 7e00915 commit 3209132
Show file tree
Hide file tree
Showing 3 changed files with 87 additions and 102 deletions.
135 changes: 62 additions & 73 deletions libyara/re_lexer.c
Original file line number Diff line number Diff line change
Expand Up @@ -593,9 +593,9 @@ static const flex_int16_t yy_accept[45] =
{ 0,
0, 0, 0, 0, 30, 7, 7, 28, 6, 17,
7, 27, 29, 26, 18, 5, 3, 16, 15, 13,
11, 9, 14, 12, 10, 8, 0, 0, 0, 0,
25, 23, 21, 24, 22, 20, 0, 4, 0, 1,
2, 19, 0, 0
11, 9, 14, 12, 10, 8, 0, 0, 0, 25,
23, 21, 24, 22, 20, 0, 4, 0, 1, 2,
19, 0, 0, 0
} ;

static const YY_CHAR yy_ec[256] =
Expand Down Expand Up @@ -632,61 +632,57 @@ static const YY_CHAR yy_ec[256] =

static const YY_CHAR yy_meta[23] =
{ 0,
1, 2, 1, 1, 3, 4, 4, 4, 4, 1,
1, 1, 1, 5, 1, 4, 4, 1, 1, 1,
1, 1, 1, 1, 1, 2, 2, 2, 2, 1,
1, 1, 1, 3, 1, 2, 2, 1, 1, 1,
1, 1
} ;

static const flex_int16_t yy_base[51] =
static const flex_int16_t yy_base[50] =
{ 0,
0, 20, 3, 5, 50, 89, 89, 89, 10, 36,
0, 44, 43, 47, 38, 89, 26, 33, 89, 89,
89, 89, 89, 89, 89, 89, 4, 5, 0, 33,
32, 31, 29, 26, 24, 23, 15, 89, 8, 89,
89, 89, 0, 89, 67, 72, 77, 82, 84, 4
0, 20, 3, 5, 37, 76, 76, 76, 10, 36,
0, 31, 29, 47, 76, 76, 17, 23, 76, 76,
76, 76, 76, 76, 76, 76, 4, 5, 0, 76,
76, 76, 76, 76, 76, 0, 76, 8, 76, 76,
76, 0, 23, 76, 67, 70, 73, 18, 6
} ;

static const flex_int16_t yy_def[51] =
static const flex_int16_t yy_def[50] =
{ 0,
45, 45, 46, 46, 44, 44, 44, 44, 44, 44,
44, 44, 44, 47, 44, 44, 44, 44, 44, 44,
44, 44, 44, 44, 44, 44, 44, 44, 48, 44,
44, 44, 44, 44, 44, 44, 49, 44, 44, 44,
44, 44, 50, 0, 44, 44, 44, 44, 44, 44
44, 44, 44, 44, 44, 44, 44, 44, 44, 44,
44, 44, 44, 44, 44, 44, 44, 44, 47, 44,
44, 44, 44, 44, 44, 48, 44, 44, 44, 44,
44, 49, 44, 0, 44, 44, 44, 44, 44
} ;

static const flex_int16_t yy_nxt[112] =
static const flex_int16_t yy_nxt[99] =
{ 0,
44, 7, 8, 27, 13, 28, 13, 30, 27, 39,
28, 9, 10, 39, 8, 14, 15, 14, 15, 29,
11, 7, 8, 16, 17, 40, 41, 29, 29, 40,
29, 9, 10, 29, 8, 29, 29, 29, 18, 38,
11, 18, 29, 19, 20, 21, 22, 29, 29, 44,
44, 23, 24, 25, 26, 31, 32, 33, 44, 44,
44, 44, 44, 34, 35, 36, 37, 6, 6, 6,
6, 6, 12, 12, 12, 12, 12, 30, 44, 30,
30, 30, 42, 42, 42, 42, 43, 43, 5, 44,
44, 7, 8, 27, 13, 28, 13, 43, 27, 38,
28, 9, 10, 38, 8, 14, 15, 14, 15, 42,
11, 7, 8, 16, 17, 39, 40, 29, 18, 39,
37, 9, 10, 29, 8, 29, 44, 44, 44, 44,
11, 18, 44, 19, 20, 21, 22, 44, 44, 44,
44, 23, 24, 25, 26, 30, 31, 32, 44, 44,
44, 44, 44, 33, 34, 35, 36, 6, 6, 6,
12, 12, 12, 41, 41, 5, 44, 44, 44, 44,
44, 44, 44, 44, 44, 44, 44, 44, 44, 44,
44, 44, 44, 44, 44, 44, 44, 44

44, 44, 44, 44, 44, 44, 44, 44, 44, 44,
44
} ;

static const flex_int16_t yy_chk[112] =
static const flex_int16_t yy_chk[99] =
{ 0,
0, 1, 1, 11, 3, 11, 4, 50, 28, 27,
28, 1, 1, 39, 1, 3, 3, 4, 4, 37,
1, 2, 2, 9, 9, 27, 28, 36, 35, 39,
34, 2, 2, 33, 2, 32, 31, 30, 18, 17,
2, 10, 15, 10, 10, 10, 10, 13, 12, 5,
0, 1, 1, 11, 3, 11, 4, 49, 28, 27,
28, 1, 1, 38, 1, 3, 3, 4, 4, 48,
1, 2, 2, 9, 9, 27, 28, 43, 18, 38,
17, 2, 2, 13, 2, 12, 5, 0, 0, 0,
2, 10, 0, 10, 10, 10, 10, 0, 0, 0,
0, 10, 10, 10, 10, 14, 14, 14, 0, 0,
0, 0, 0, 14, 14, 14, 14, 45, 45, 45,
45, 45, 46, 46, 46, 46, 46, 47, 0, 47,
47, 47, 48, 48, 48, 48, 49, 49, 44, 44,
46, 46, 46, 47, 47, 44, 44, 44, 44, 44,
44, 44, 44, 44, 44, 44, 44, 44, 44, 44,
44, 44, 44, 44, 44, 44, 44, 44

44, 44, 44, 44, 44, 44, 44, 44, 44, 44,
44
} ;

/* Table of booleans, true if rule could match eol. */
Expand Down Expand Up @@ -791,10 +787,10 @@ int read_escaped_char(
yyscan_t yyscanner,
uint8_t* escaped_char);

#line 794 "re_lexer.c"
#line 790 "re_lexer.c"
#define YY_NO_UNISTD_H 1

#line 797 "re_lexer.c"
#line 793 "re_lexer.c"

#define INITIAL 0
#define char_class 1
Expand Down Expand Up @@ -1070,7 +1066,7 @@ YY_DECL
#line 111 "re_lexer.l"


#line 1073 "re_lexer.c"
#line 1069 "re_lexer.c"

while ( /*CONSTCOND*/1 ) /* loops until end-of-file is reached */
{
Expand Down Expand Up @@ -1378,6 +1374,11 @@ YY_RULE_SETUP
{

// A range inside a character class.
//
// Matches an hex digit (i.e: \x01), or any character that is
// not ] or \, followed by -, followed by any character that is
// not ]
//
// [abc0-9]
// ^- matching here

Expand Down Expand Up @@ -1422,67 +1423,55 @@ YY_RULE_SETUP
YY_BREAK
case 20:
YY_RULE_SETUP
#line 357 "re_lexer.l"
#line 362 "re_lexer.l"
{

int i;

for (i = 0; i < 32; i++)
for (int i = 0; i < 32; i++)
LEX_ENV->re_class.bitmap[i] |= word_chars[i];
}
YY_BREAK
case 21:
YY_RULE_SETUP
#line 366 "re_lexer.l"
#line 369 "re_lexer.l"
{

int i;

for (i = 0; i < 32; i++)
for (int i = 0; i < 32; i++)
LEX_ENV->re_class.bitmap[i] |= ~word_chars[i];
}
YY_BREAK
case 22:
YY_RULE_SETUP
#line 375 "re_lexer.l"
#line 376 "re_lexer.l"
{

int i;

for (i = 0; i < 32; i++)
for (int i = 0; i < 32; i++)
LEX_ENV->re_class.bitmap[i] |= space_chars[i];
}
YY_BREAK
case 23:
YY_RULE_SETUP
#line 384 "re_lexer.l"
#line 383 "re_lexer.l"
{

int i;

for (i = 0; i < 32; i++)
for (int i = 0; i < 32; i++)
LEX_ENV->re_class.bitmap[i] |= ~space_chars[i];
}
YY_BREAK
case 24:
YY_RULE_SETUP
#line 393 "re_lexer.l"
#line 390 "re_lexer.l"
{

char c;

for (c = '0'; c <= '9'; c++)
for (char c = '0'; c <= '9'; c++)
LEX_ENV->re_class.bitmap[c / 8] |= 1 << c % 8;
}
YY_BREAK
case 25:
YY_RULE_SETUP
#line 402 "re_lexer.l"
#line 397 "re_lexer.l"
{

int i;

for (i = 0; i < 32; i++)
for (int i = 0; i < 32; i++)
{
// digits 0-7 are in the sixth byte of the vector, let that byte alone
if (i == 6)
Expand All @@ -1499,7 +1488,7 @@ YY_RULE_SETUP
YY_BREAK
case 26:
YY_RULE_SETUP
#line 422 "re_lexer.l"
#line 415 "re_lexer.l"
{

uint8_t c;
Expand All @@ -1517,7 +1506,7 @@ YY_RULE_SETUP
YY_BREAK
case 27:
YY_RULE_SETUP
#line 438 "re_lexer.l"
#line 431 "re_lexer.l"
{

if (yytext[0] >= 32 && yytext[0] < 127)
Expand All @@ -1535,7 +1524,7 @@ YY_RULE_SETUP
}
YY_BREAK
case YY_STATE_EOF(char_class):
#line 455 "re_lexer.l"
#line 448 "re_lexer.l"
{

// End of regexp reached while scanning a character class.
Expand All @@ -1546,7 +1535,7 @@ case YY_STATE_EOF(char_class):
YY_BREAK
case 28:
YY_RULE_SETUP
#line 464 "re_lexer.l"
#line 457 "re_lexer.l"
{

if (yytext[0] >= 32 && yytext[0] < 127)
Expand All @@ -1561,18 +1550,18 @@ YY_RULE_SETUP
}
YY_BREAK
case YY_STATE_EOF(INITIAL):
#line 478 "re_lexer.l"
#line 471 "re_lexer.l"
{

yyterminate();
}
YY_BREAK
case 29:
YY_RULE_SETUP
#line 483 "re_lexer.l"
#line 476 "re_lexer.l"
ECHO;
YY_BREAK
#line 1575 "re_lexer.c"
#line 1564 "re_lexer.c"

case YY_END_OF_BUFFER:
{
Expand Down Expand Up @@ -2721,7 +2710,7 @@ void yyfree (void * ptr , yyscan_t yyscanner)

#define YYTABLES_NAME "yytables"

#line 483 "re_lexer.l"
#line 476 "re_lexer.l"


int escaped_char_value(
Expand Down
31 changes: 12 additions & 19 deletions libyara/re_lexer.l
Original file line number Diff line number Diff line change
Expand Up @@ -308,9 +308,14 @@ hex_digit [0-9a-fA-F]



<char_class>(\\x{hex_digit}{2}|\\.|[^\\])\-[^]] {
<char_class>(\\x{hex_digit}{2}|[^]\\])-[^]] {

// A range inside a character class.
//
// Matches an hex digit (i.e: \x01), or any character that is
// not ] or \, followed by -, followed by any character that is
// not ]
//
// [abc0-9]
// ^- matching here

Expand Down Expand Up @@ -356,54 +361,42 @@ hex_digit [0-9a-fA-F]

<char_class>\\w {

int i;

for (i = 0; i < 32; i++)
for (int i = 0; i < 32; i++)
LEX_ENV->re_class.bitmap[i] |= word_chars[i];
}


<char_class>\\W {

int i;

for (i = 0; i < 32; i++)
for (int i = 0; i < 32; i++)
LEX_ENV->re_class.bitmap[i] |= ~word_chars[i];
}


<char_class>\\s {

int i;

for (i = 0; i < 32; i++)
for (int i = 0; i < 32; i++)
LEX_ENV->re_class.bitmap[i] |= space_chars[i];
}


<char_class>\\S {

int i;

for (i = 0; i < 32; i++)
for (int i = 0; i < 32; i++)
LEX_ENV->re_class.bitmap[i] |= ~space_chars[i];
}


<char_class>\\d {

char c;

for (c = '0'; c <= '9'; c++)
for (char c = '0'; c <= '9'; c++)
LEX_ENV->re_class.bitmap[c / 8] |= 1 << c % 8;
}


<char_class>\\D {

int i;

for (i = 0; i < 32; i++)
for (int i = 0; i < 32; i++)
{
// digits 0-7 are in the sixth byte of the vector, let that byte alone
if (i == 6)
Expand Down
Loading

0 comments on commit 3209132

Please sign in to comment.