From d7e67ff54b60d99c90f0ccd2cfe7ae1131481216 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20=C4=8Euri=C5=A1?= Date: Wed, 23 Aug 2023 15:26:43 +0200 Subject: [PATCH] Added warning on unknown escape sequences (#1880) * Added warning on unknown escape sequences * fixed memory leaks and different output when using regex as operand with strict-escape parameter * Remove some unwanted characters from strict escaping, add testcases --- cli/yara.c | 12 + cli/yarac.c | 12 + libyara/base64.c | 2 +- libyara/compiler.c | 1 + libyara/grammar.c | 423 ++++++++++++++++---------------- libyara/grammar.y | 17 +- libyara/include/yara/compiler.h | 1 + libyara/include/yara/error.h | 1 + libyara/include/yara/re.h | 8 +- libyara/include/yara/re_lexer.h | 13 +- libyara/parser.c | 38 ++- libyara/re.c | 12 +- libyara/re_lexer.c | 194 ++++++++++----- libyara/re_lexer.l | 112 +++++++-- tests/test-rules.c | 38 ++- tests/util.c | 10 +- tests/util.h | 28 +++ 17 files changed, 625 insertions(+), 297 deletions(-) diff --git a/cli/yara.c b/cli/yara.c index ca4324736a..ea27646fd4 100644 --- a/cli/yara.c +++ b/cli/yara.c @@ -157,6 +157,7 @@ static bool ignore_warnings = false; static bool fast_scan = false; static bool negate = false; static bool print_count_only = false; +static bool strict_escape = false; static bool fail_on_warnings = false; static bool rules_are_compiled = false; static bool disable_console_logs = false; @@ -192,6 +193,12 @@ args_option_t options[] = { &print_count_only, _T("print only number of matches")), + OPT_BOOLEAN( + 'E', + _T("strict-escape"), + &strict_escape, + _T("warn on unknown escape sequences")), + OPT_STRING_MULTI( 'd', _T("define"), @@ -1565,6 +1572,11 @@ int _tmain(int argc, const char_t** argv) yr_compiler_set_callback(compiler, print_compiler_error, &cr); + if (strict_escape) + compiler->strict_escape = true; + else + compiler->strict_escape = false; + if (!compile_files(compiler, argc, argv)) exit_with_code(EXIT_FAILURE); diff --git a/cli/yarac.c b/cli/yarac.c index 8144077400..c8ecaad323 100644 --- a/cli/yarac.c +++ b/cli/yarac.c @@ -73,6 +73,7 @@ static char* ext_vars[MAX_ARGS_EXT_VAR + 1]; static bool ignore_warnings = false; static bool show_version = false; static bool show_help = false; +static bool strict_escape = false; static bool fail_on_warnings = false; static long max_strings_per_rule = DEFAULT_MAX_STRINGS_PER_RULE; @@ -103,6 +104,12 @@ args_option_t options[] = { OPT_BOOLEAN('h', _T("help"), &show_help, _T("show this help and exit")), + OPT_BOOLEAN( + 'E', + _T("strict-escape"), + &strict_escape, + _T("warn on unknown escape sequences")), + OPT_LONG( 0, _T("max-strings-per-rule"), @@ -233,6 +240,11 @@ int _tmain(int argc, const char_t** argv) yr_compiler_set_callback(compiler, report_error, &cr); + if (strict_escape) + compiler->strict_escape = true; + else + compiler->strict_escape = false; + if (!compile_files(compiler, argc, argv)) exit_with_code(EXIT_FAILURE); diff --git a/libyara/base64.c b/libyara/base64.c index 7bfb4d3119..d1a294ee04 100644 --- a/libyara/base64.c +++ b/libyara/base64.c @@ -361,7 +361,7 @@ int _yr_base64_create_regexp( // printf("%s\n", re_str); FAIL_ON_ERROR_WITH_CLEANUP( - yr_re_parse(re_str, re_ast, re_error), yr_free(re_str)); + yr_re_parse(re_str, re_ast, re_error, RE_PARSER_FLAG_NONE), yr_free(re_str)); yr_free(re_str); diff --git a/libyara/compiler.c b/libyara/compiler.c index 92fee82a79..94c4f3417d 100644 --- a/libyara/compiler.c +++ b/libyara/compiler.c @@ -240,6 +240,7 @@ YR_API int yr_compiler_create(YR_COMPILER** compiler) new_compiler->re_ast_clbk_user_data = NULL; new_compiler->last_error = ERROR_SUCCESS; new_compiler->last_error_line = 0; + new_compiler->strict_escape = false; new_compiler->current_line = 0; new_compiler->file_name_stack_ptr = 0; new_compiler->fixup_stack_head = NULL; diff --git a/libyara/grammar.c b/libyara/grammar.c index bfa4a0addf..cb58f7e6ee 100644 --- a/libyara/grammar.c +++ b/libyara/grammar.c @@ -132,7 +132,7 @@ // fail_if_error() is used in parser actions for aborting the parsing if an // error has occurred. See fail_with_error for details. #define fail_if_error(e) \ - if (e != ERROR_SUCCESS) \ + if (e != ERROR_SUCCESS && e != ERROR_UNKNOWN_ESCAPE_SEQUENCE) \ { \ fail_with_error(e); \ } @@ -949,16 +949,16 @@ static const yytype_int16 yyrline[] = 670, 695, 701, 761, 762, 763, 764, 765, 766, 772, 793, 824, 829, 846, 851, 871, 872, 886, 887, 888, 889, 890, 894, 895, 909, 913, 1008, 1056, 1117, 1162, - 1163, 1167, 1202, 1255, 1297, 1320, 1326, 1332, 1344, 1354, - 1364, 1374, 1384, 1394, 1404, 1414, 1428, 1443, 1454, 1529, - 1567, 1471, 1695, 1706, 1717, 1736, 1755, 1767, 1804, 1810, - 1816, 1815, 1861, 1860, 1904, 1911, 1918, 1925, 1932, 1939, - 1946, 1950, 1958, 1959, 1984, 2004, 2032, 2106, 2134, 2153, - 2164, 2207, 2223, 2243, 2253, 2252, 2261, 2275, 2276, 2281, - 2291, 2306, 2305, 2318, 2319, 2324, 2357, 2382, 2438, 2445, - 2451, 2457, 2467, 2471, 2479, 2491, 2505, 2512, 2519, 2544, - 2556, 2568, 2580, 2595, 2607, 2622, 2665, 2686, 2721, 2756, - 2790, 2815, 2832, 2842, 2852, 2862, 2872, 2892, 2912 + 1163, 1167, 1202, 1255, 1310, 1333, 1339, 1345, 1357, 1367, + 1377, 1387, 1397, 1407, 1417, 1427, 1441, 1456, 1467, 1542, + 1580, 1484, 1708, 1719, 1730, 1749, 1768, 1780, 1817, 1823, + 1829, 1828, 1874, 1873, 1917, 1924, 1931, 1938, 1945, 1952, + 1959, 1963, 1971, 1972, 1997, 2017, 2045, 2119, 2147, 2166, + 2177, 2220, 2236, 2256, 2266, 2265, 2274, 2288, 2289, 2294, + 2304, 2319, 2318, 2331, 2332, 2337, 2370, 2395, 2451, 2458, + 2464, 2470, 2480, 2484, 2492, 2504, 2518, 2525, 2532, 2557, + 2569, 2581, 2593, 2608, 2620, 2635, 2678, 2699, 2734, 2769, + 2803, 2828, 2845, 2855, 2865, 2875, 2885, 2905, 2925 }; #endif @@ -3215,6 +3215,7 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); int result = ERROR_SUCCESS; int re_flags = 0; + int parser_flags = RE_PARSER_FLAG_NONE; if ((yyvsp[0].sized_string)->flags & SIZED_STRING_FLAGS_NO_CASE) re_flags |= RE_FLAGS_NO_CASE; @@ -3222,9 +3223,13 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); if ((yyvsp[0].sized_string)->flags & SIZED_STRING_FLAGS_DOT_ALL) re_flags |= RE_FLAGS_DOT_ALL; + if (compiler->strict_escape) + parser_flags |= RE_PARSER_FLAG_ENABLE_STRICT_ESCAPE_SEQUENCES; + result = yr_re_compile( (yyvsp[0].sized_string)->c_string, re_flags, + parser_flags, compiler->arena, &re_ref, &error); @@ -3234,23 +3239,31 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); if (result == ERROR_INVALID_REGULAR_EXPRESSION) yr_compiler_set_error_extra_info(compiler, error.message); - if (result == ERROR_SUCCESS) + if (result == ERROR_SUCCESS || result == ERROR_UNKNOWN_ESCAPE_SEQUENCE) + { + if (result == ERROR_UNKNOWN_ESCAPE_SEQUENCE) + { + yywarning( + yyscanner, + "unknown escape sequence"); + } result = yr_parser_emit_with_arg_reloc( yyscanner, OP_PUSH, yr_arena_ref_to_ptr(compiler->arena, &re_ref), NULL, NULL); + } fail_if_error(result); (yyval.expression).type = EXPRESSION_TYPE_REGEXP; } -#line 3250 "libyara/grammar.c" +#line 3263 "libyara/grammar.c" break; case 74: /* boolean_expression: expression */ -#line 1298 "libyara/grammar.y" +#line 1311 "libyara/grammar.y" { if ((yyvsp[0].expression).type == EXPRESSION_TYPE_STRING) { @@ -3270,31 +3283,31 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.expression).type = EXPRESSION_TYPE_BOOLEAN; } -#line 3274 "libyara/grammar.c" +#line 3287 "libyara/grammar.c" break; case 75: /* expression: "" */ -#line 1321 "libyara/grammar.y" +#line 1334 "libyara/grammar.y" { fail_if_error(yr_parser_emit_push_const(yyscanner, 1)); (yyval.expression).type = EXPRESSION_TYPE_BOOLEAN; } -#line 3284 "libyara/grammar.c" +#line 3297 "libyara/grammar.c" break; case 76: /* expression: "" */ -#line 1327 "libyara/grammar.y" +#line 1340 "libyara/grammar.y" { fail_if_error(yr_parser_emit_push_const(yyscanner, 0)); (yyval.expression).type = EXPRESSION_TYPE_BOOLEAN; } -#line 3294 "libyara/grammar.c" +#line 3307 "libyara/grammar.c" break; case 77: /* expression: primary_expression "" regexp */ -#line 1333 "libyara/grammar.y" +#line 1346 "libyara/grammar.y" { check_type((yyvsp[-2].expression), EXPRESSION_TYPE_STRING, "matches"); check_type((yyvsp[0].expression), EXPRESSION_TYPE_REGEXP, "matches"); @@ -3306,11 +3319,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.expression).type = EXPRESSION_TYPE_BOOLEAN; } -#line 3310 "libyara/grammar.c" +#line 3323 "libyara/grammar.c" break; case 78: /* expression: primary_expression "" primary_expression */ -#line 1345 "libyara/grammar.y" +#line 1358 "libyara/grammar.y" { check_type((yyvsp[-2].expression), EXPRESSION_TYPE_STRING, "contains"); check_type((yyvsp[0].expression), EXPRESSION_TYPE_STRING, "contains"); @@ -3320,11 +3333,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.expression).type = EXPRESSION_TYPE_BOOLEAN; } -#line 3324 "libyara/grammar.c" +#line 3337 "libyara/grammar.c" break; case 79: /* expression: primary_expression "" primary_expression */ -#line 1355 "libyara/grammar.y" +#line 1368 "libyara/grammar.y" { check_type((yyvsp[-2].expression), EXPRESSION_TYPE_STRING, "icontains"); check_type((yyvsp[0].expression), EXPRESSION_TYPE_STRING, "icontains"); @@ -3334,11 +3347,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.expression).type = EXPRESSION_TYPE_BOOLEAN; } -#line 3338 "libyara/grammar.c" +#line 3351 "libyara/grammar.c" break; case 80: /* expression: primary_expression "" primary_expression */ -#line 1365 "libyara/grammar.y" +#line 1378 "libyara/grammar.y" { check_type((yyvsp[-2].expression), EXPRESSION_TYPE_STRING, "startswith"); check_type((yyvsp[0].expression), EXPRESSION_TYPE_STRING, "startswith"); @@ -3348,11 +3361,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.expression).type = EXPRESSION_TYPE_BOOLEAN; } -#line 3352 "libyara/grammar.c" +#line 3365 "libyara/grammar.c" break; case 81: /* expression: primary_expression "" primary_expression */ -#line 1375 "libyara/grammar.y" +#line 1388 "libyara/grammar.y" { check_type((yyvsp[-2].expression), EXPRESSION_TYPE_STRING, "istartswith"); check_type((yyvsp[0].expression), EXPRESSION_TYPE_STRING, "istartswith"); @@ -3362,11 +3375,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.expression).type = EXPRESSION_TYPE_BOOLEAN; } -#line 3366 "libyara/grammar.c" +#line 3379 "libyara/grammar.c" break; case 82: /* expression: primary_expression "" primary_expression */ -#line 1385 "libyara/grammar.y" +#line 1398 "libyara/grammar.y" { check_type((yyvsp[-2].expression), EXPRESSION_TYPE_STRING, "endswith"); check_type((yyvsp[0].expression), EXPRESSION_TYPE_STRING, "endswith"); @@ -3376,11 +3389,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.expression).type = EXPRESSION_TYPE_BOOLEAN; } -#line 3380 "libyara/grammar.c" +#line 3393 "libyara/grammar.c" break; case 83: /* expression: primary_expression "" primary_expression */ -#line 1395 "libyara/grammar.y" +#line 1408 "libyara/grammar.y" { check_type((yyvsp[-2].expression), EXPRESSION_TYPE_STRING, "iendswith"); check_type((yyvsp[0].expression), EXPRESSION_TYPE_STRING, "iendswith"); @@ -3390,11 +3403,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.expression).type = EXPRESSION_TYPE_BOOLEAN; } -#line 3394 "libyara/grammar.c" +#line 3407 "libyara/grammar.c" break; case 84: /* expression: primary_expression "" primary_expression */ -#line 1405 "libyara/grammar.y" +#line 1418 "libyara/grammar.y" { check_type((yyvsp[-2].expression), EXPRESSION_TYPE_STRING, "iequals"); check_type((yyvsp[0].expression), EXPRESSION_TYPE_STRING, "iequals"); @@ -3404,11 +3417,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.expression).type = EXPRESSION_TYPE_BOOLEAN; } -#line 3408 "libyara/grammar.c" +#line 3421 "libyara/grammar.c" break; case 85: /* expression: "string identifier" */ -#line 1415 "libyara/grammar.y" +#line 1428 "libyara/grammar.y" { int result = yr_parser_reduce_string_identifier( yyscanner, @@ -3422,11 +3435,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.expression).type = EXPRESSION_TYPE_BOOLEAN; } -#line 3426 "libyara/grammar.c" +#line 3439 "libyara/grammar.c" break; case 86: /* expression: "string identifier" "" primary_expression */ -#line 1429 "libyara/grammar.y" +#line 1442 "libyara/grammar.y" { int result; @@ -3441,11 +3454,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.expression).type = EXPRESSION_TYPE_BOOLEAN; } -#line 3445 "libyara/grammar.c" +#line 3458 "libyara/grammar.c" break; case 87: /* expression: "string identifier" "" range */ -#line 1444 "libyara/grammar.y" +#line 1457 "libyara/grammar.y" { int result = yr_parser_reduce_string_identifier( yyscanner, (yyvsp[-2].c_string), OP_FOUND_IN, YR_UNDEFINED); @@ -3456,11 +3469,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.expression).type = EXPRESSION_TYPE_BOOLEAN; } -#line 3460 "libyara/grammar.c" +#line 3473 "libyara/grammar.c" break; case 88: /* expression: "" for_expression error */ -#line 1455 "libyara/grammar.y" +#line 1468 "libyara/grammar.y" { // Free all the loop variable identifiers, including the variables for // the current loop (represented by loop_index), and set loop_index to @@ -3477,11 +3490,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); compiler->loop_index = -1; YYERROR; } -#line 3481 "libyara/grammar.c" +#line 3494 "libyara/grammar.c" break; case 89: /* $@6: %empty */ -#line 1529 "libyara/grammar.y" +#line 1542 "libyara/grammar.y" { // var_frame is used for accessing local variables used in this loop. // All local variables are accessed using var_frame as a reference, @@ -3519,11 +3532,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); fail_if_error(yr_parser_emit_with_arg( yyscanner, OP_POP_M, var_frame + 2, NULL, NULL)); } -#line 3523 "libyara/grammar.c" +#line 3536 "libyara/grammar.c" break; case 90: /* $@7: %empty */ -#line 1567 "libyara/grammar.y" +#line 1580 "libyara/grammar.y" { YR_LOOP_CONTEXT* loop_ctx = &compiler->loop[compiler->loop_index]; YR_FIXUP* fixup; @@ -3572,11 +3585,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); loop_ctx->start_ref = loop_start_ref; } -#line 3576 "libyara/grammar.c" +#line 3589 "libyara/grammar.c" break; case 91: /* expression: "" for_expression $@6 for_iteration ':' $@7 '(' boolean_expression ')' */ -#line 1616 "libyara/grammar.y" +#line 1629 "libyara/grammar.y" { int32_t jmp_offset; YR_FIXUP* fixup; @@ -3656,11 +3669,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.expression).type = EXPRESSION_TYPE_BOOLEAN; } -#line 3660 "libyara/grammar.c" +#line 3673 "libyara/grammar.c" break; case 92: /* expression: for_expression "" string_set */ -#line 1696 "libyara/grammar.y" +#line 1709 "libyara/grammar.y" { if ((yyvsp[-2].expression).type == EXPRESSION_TYPE_INTEGER && (yyvsp[-2].expression).value.integer > (yyvsp[0].integer)) { @@ -3671,11 +3684,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.expression).type = EXPRESSION_TYPE_BOOLEAN; } -#line 3675 "libyara/grammar.c" +#line 3688 "libyara/grammar.c" break; case 93: /* expression: for_expression "" rule_set */ -#line 1707 "libyara/grammar.y" +#line 1720 "libyara/grammar.y" { if ((yyvsp[-2].expression).type == EXPRESSION_TYPE_INTEGER && (yyvsp[-2].expression).value.integer > (yyvsp[0].integer)) { @@ -3686,11 +3699,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.expression).type = EXPRESSION_TYPE_BOOLEAN; } -#line 3690 "libyara/grammar.c" +#line 3703 "libyara/grammar.c" break; case 94: /* expression: primary_expression '%' "" string_set */ -#line 1718 "libyara/grammar.y" +#line 1731 "libyara/grammar.y" { check_type((yyvsp[-3].expression), EXPRESSION_TYPE_INTEGER, "%"); @@ -3709,11 +3722,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); yr_parser_emit_with_arg(yyscanner, OP_OF_PERCENT, OF_STRING_SET, NULL, NULL); } -#line 3713 "libyara/grammar.c" +#line 3726 "libyara/grammar.c" break; case 95: /* expression: primary_expression '%' "" rule_set */ -#line 1737 "libyara/grammar.y" +#line 1750 "libyara/grammar.y" { check_type((yyvsp[-3].expression), EXPRESSION_TYPE_INTEGER, "%"); @@ -3732,11 +3745,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); yr_parser_emit_with_arg(yyscanner, OP_OF_PERCENT, OF_RULE_SET, NULL, NULL); } -#line 3736 "libyara/grammar.c" +#line 3749 "libyara/grammar.c" break; case 96: /* expression: for_expression "" string_set "" range */ -#line 1756 "libyara/grammar.y" +#line 1769 "libyara/grammar.y" { if ((yyvsp[-4].expression).type == EXPRESSION_TYPE_INTEGER && (yyvsp[-4].expression).value.integer > (yyvsp[-2].integer)) { @@ -3748,11 +3761,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.expression).type = EXPRESSION_TYPE_BOOLEAN; } -#line 3752 "libyara/grammar.c" +#line 3765 "libyara/grammar.c" break; case 97: /* expression: for_expression "" string_set "" primary_expression */ -#line 1768 "libyara/grammar.y" +#line 1781 "libyara/grammar.y" { if ((yyvsp[0].expression).type != EXPRESSION_TYPE_INTEGER) { @@ -3789,30 +3802,30 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.expression).type = EXPRESSION_TYPE_BOOLEAN; } -#line 3793 "libyara/grammar.c" +#line 3806 "libyara/grammar.c" break; case 98: /* expression: "" boolean_expression */ -#line 1805 "libyara/grammar.y" +#line 1818 "libyara/grammar.y" { yr_parser_emit(yyscanner, OP_NOT, NULL); (yyval.expression).type = EXPRESSION_TYPE_BOOLEAN; } -#line 3803 "libyara/grammar.c" +#line 3816 "libyara/grammar.c" break; case 99: /* expression: "" boolean_expression */ -#line 1811 "libyara/grammar.y" +#line 1824 "libyara/grammar.y" { yr_parser_emit(yyscanner, OP_DEFINED, NULL); (yyval.expression).type = EXPRESSION_TYPE_BOOLEAN; } -#line 3812 "libyara/grammar.c" +#line 3825 "libyara/grammar.c" break; case 100: /* $@8: %empty */ -#line 1816 "libyara/grammar.y" +#line 1829 "libyara/grammar.y" { YR_FIXUP* fixup; YR_ARENA_REF jmp_offset_ref; @@ -3834,11 +3847,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); fixup->next = compiler->fixup_stack_head; compiler->fixup_stack_head = fixup; } -#line 3838 "libyara/grammar.c" +#line 3851 "libyara/grammar.c" break; case 101: /* expression: boolean_expression "" $@8 boolean_expression */ -#line 1838 "libyara/grammar.y" +#line 1851 "libyara/grammar.y" { YR_FIXUP* fixup; @@ -3861,11 +3874,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.expression).type = EXPRESSION_TYPE_BOOLEAN; } -#line 3865 "libyara/grammar.c" +#line 3878 "libyara/grammar.c" break; case 102: /* $@9: %empty */ -#line 1861 "libyara/grammar.y" +#line 1874 "libyara/grammar.y" { YR_FIXUP* fixup; YR_ARENA_REF jmp_offset_ref; @@ -3886,11 +3899,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); fixup->next = compiler->fixup_stack_head; compiler->fixup_stack_head = fixup; } -#line 3890 "libyara/grammar.c" +#line 3903 "libyara/grammar.c" break; case 103: /* expression: boolean_expression "" $@9 boolean_expression */ -#line 1882 "libyara/grammar.y" +#line 1895 "libyara/grammar.y" { YR_FIXUP* fixup; @@ -3913,99 +3926,99 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.expression).type = EXPRESSION_TYPE_BOOLEAN; } -#line 3917 "libyara/grammar.c" +#line 3930 "libyara/grammar.c" break; case 104: /* expression: primary_expression "<" primary_expression */ -#line 1905 "libyara/grammar.y" +#line 1918 "libyara/grammar.y" { fail_if_error(yr_parser_reduce_operation( yyscanner, "<", (yyvsp[-2].expression), (yyvsp[0].expression))); (yyval.expression).type = EXPRESSION_TYPE_BOOLEAN; } -#line 3928 "libyara/grammar.c" +#line 3941 "libyara/grammar.c" break; case 105: /* expression: primary_expression ">" primary_expression */ -#line 1912 "libyara/grammar.y" +#line 1925 "libyara/grammar.y" { fail_if_error(yr_parser_reduce_operation( yyscanner, ">", (yyvsp[-2].expression), (yyvsp[0].expression))); (yyval.expression).type = EXPRESSION_TYPE_BOOLEAN; } -#line 3939 "libyara/grammar.c" +#line 3952 "libyara/grammar.c" break; case 106: /* expression: primary_expression "<=" primary_expression */ -#line 1919 "libyara/grammar.y" +#line 1932 "libyara/grammar.y" { fail_if_error(yr_parser_reduce_operation( yyscanner, "<=", (yyvsp[-2].expression), (yyvsp[0].expression))); (yyval.expression).type = EXPRESSION_TYPE_BOOLEAN; } -#line 3950 "libyara/grammar.c" +#line 3963 "libyara/grammar.c" break; case 107: /* expression: primary_expression ">=" primary_expression */ -#line 1926 "libyara/grammar.y" +#line 1939 "libyara/grammar.y" { fail_if_error(yr_parser_reduce_operation( yyscanner, ">=", (yyvsp[-2].expression), (yyvsp[0].expression))); (yyval.expression).type = EXPRESSION_TYPE_BOOLEAN; } -#line 3961 "libyara/grammar.c" +#line 3974 "libyara/grammar.c" break; case 108: /* expression: primary_expression "==" primary_expression */ -#line 1933 "libyara/grammar.y" +#line 1946 "libyara/grammar.y" { fail_if_error(yr_parser_reduce_operation( yyscanner, "==", (yyvsp[-2].expression), (yyvsp[0].expression))); (yyval.expression).type = EXPRESSION_TYPE_BOOLEAN; } -#line 3972 "libyara/grammar.c" +#line 3985 "libyara/grammar.c" break; case 109: /* expression: primary_expression "!=" primary_expression */ -#line 1940 "libyara/grammar.y" +#line 1953 "libyara/grammar.y" { fail_if_error(yr_parser_reduce_operation( yyscanner, "!=", (yyvsp[-2].expression), (yyvsp[0].expression))); (yyval.expression).type = EXPRESSION_TYPE_BOOLEAN; } -#line 3983 "libyara/grammar.c" +#line 3996 "libyara/grammar.c" break; case 110: /* expression: primary_expression */ -#line 1947 "libyara/grammar.y" +#line 1960 "libyara/grammar.y" { (yyval.expression) = (yyvsp[0].expression); } -#line 3991 "libyara/grammar.c" +#line 4004 "libyara/grammar.c" break; case 111: /* expression: '(' expression ')' */ -#line 1951 "libyara/grammar.y" +#line 1964 "libyara/grammar.y" { (yyval.expression) = (yyvsp[-1].expression); } -#line 3999 "libyara/grammar.c" +#line 4012 "libyara/grammar.c" break; case 112: /* for_iteration: for_variables "" iterator */ -#line 1958 "libyara/grammar.y" +#line 1971 "libyara/grammar.y" { (yyval.integer) = FOR_ITERATION_ITERATOR; } -#line 4005 "libyara/grammar.c" +#line 4018 "libyara/grammar.c" break; case 113: /* for_iteration: "" string_iterator */ -#line 1960 "libyara/grammar.y" +#line 1973 "libyara/grammar.y" { int var_frame; int result = ERROR_SUCCESS; @@ -4026,11 +4039,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.integer) = FOR_ITERATION_STRING_SET; } -#line 4030 "libyara/grammar.c" +#line 4043 "libyara/grammar.c" break; case 114: /* for_variables: "identifier" */ -#line 1985 "libyara/grammar.y" +#line 1998 "libyara/grammar.y" { int result = ERROR_SUCCESS; @@ -4050,11 +4063,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); assert(loop_ctx->vars_count <= YR_MAX_LOOP_VARS); } -#line 4054 "libyara/grammar.c" +#line 4067 "libyara/grammar.c" break; case 115: /* for_variables: for_variables ',' "identifier" */ -#line 2005 "libyara/grammar.y" +#line 2018 "libyara/grammar.y" { int result = ERROR_SUCCESS; @@ -4079,11 +4092,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); loop_ctx->vars[loop_ctx->vars_count++].identifier.ptr = (yyvsp[0].c_string); } -#line 4083 "libyara/grammar.c" +#line 4096 "libyara/grammar.c" break; case 116: /* iterator: identifier */ -#line 2033 "libyara/grammar.y" +#line 2046 "libyara/grammar.y" { YR_LOOP_CONTEXT* loop_ctx = &compiler->loop[compiler->loop_index]; @@ -4157,11 +4170,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); fail_if_error(result); } -#line 4161 "libyara/grammar.c" +#line 4174 "libyara/grammar.c" break; case 117: /* iterator: set */ -#line 2107 "libyara/grammar.y" +#line 2120 "libyara/grammar.y" { int result = ERROR_SUCCESS; @@ -4185,11 +4198,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); fail_if_error(result); } -#line 4189 "libyara/grammar.c" +#line 4202 "libyara/grammar.c" break; case 118: /* set: '(' enumeration ')' */ -#line 2135 "libyara/grammar.y" +#line 2148 "libyara/grammar.y" { // $2.count contains the number of items in the enumeration fail_if_error(yr_parser_emit_push_const(yyscanner, (yyvsp[-1].enumeration).count)); @@ -4208,22 +4221,22 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.enumeration).type = (yyvsp[-1].enumeration).type; } -#line 4212 "libyara/grammar.c" +#line 4225 "libyara/grammar.c" break; case 119: /* set: range */ -#line 2154 "libyara/grammar.y" +#line 2167 "libyara/grammar.y" { fail_if_error(yr_parser_emit( yyscanner, OP_ITER_START_INT_RANGE, NULL)); (yyval.enumeration).type = EXPRESSION_TYPE_INTEGER; } -#line 4223 "libyara/grammar.c" +#line 4236 "libyara/grammar.c" break; case 120: /* range: '(' primary_expression ".." primary_expression ')' */ -#line 2165 "libyara/grammar.y" +#line 2178 "libyara/grammar.y" { int result = ERROR_SUCCESS; @@ -4262,11 +4275,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); fail_if_error(result); } -#line 4266 "libyara/grammar.c" +#line 4279 "libyara/grammar.c" break; case 121: /* enumeration: primary_expression */ -#line 2208 "libyara/grammar.y" +#line 2221 "libyara/grammar.y" { int result = ERROR_SUCCESS; @@ -4282,11 +4295,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.enumeration).type = (yyvsp[0].expression).type; (yyval.enumeration).count = 1; } -#line 4286 "libyara/grammar.c" +#line 4299 "libyara/grammar.c" break; case 122: /* enumeration: enumeration ',' primary_expression */ -#line 2224 "libyara/grammar.y" +#line 2237 "libyara/grammar.y" { int result = ERROR_SUCCESS; @@ -4302,38 +4315,38 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.enumeration).type = (yyvsp[-2].enumeration).type; (yyval.enumeration).count = (yyvsp[-2].enumeration).count + 1; } -#line 4306 "libyara/grammar.c" +#line 4319 "libyara/grammar.c" break; case 123: /* string_iterator: string_set */ -#line 2244 "libyara/grammar.y" +#line 2257 "libyara/grammar.y" { fail_if_error(yr_parser_emit_push_const(yyscanner, (yyvsp[0].integer))); fail_if_error(yr_parser_emit(yyscanner, OP_ITER_START_STRING_SET, NULL)); } -#line 4316 "libyara/grammar.c" +#line 4329 "libyara/grammar.c" break; case 124: /* $@10: %empty */ -#line 2253 "libyara/grammar.y" +#line 2266 "libyara/grammar.y" { // Push end-of-list marker yr_parser_emit_push_const(yyscanner, YR_UNDEFINED); } -#line 4325 "libyara/grammar.c" +#line 4338 "libyara/grammar.c" break; case 125: /* string_set: '(' $@10 string_enumeration ')' */ -#line 2258 "libyara/grammar.y" +#line 2271 "libyara/grammar.y" { (yyval.integer) = (yyvsp[-1].integer); } -#line 4333 "libyara/grammar.c" +#line 4346 "libyara/grammar.c" break; case 126: /* string_set: "" */ -#line 2262 "libyara/grammar.y" +#line 2275 "libyara/grammar.y" { fail_if_error(yr_parser_emit_push_const(yyscanner, YR_UNDEFINED)); @@ -4343,23 +4356,23 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.integer) = count; } -#line 4347 "libyara/grammar.c" +#line 4360 "libyara/grammar.c" break; case 127: /* string_enumeration: string_enumeration_item */ -#line 2275 "libyara/grammar.y" +#line 2288 "libyara/grammar.y" { (yyval.integer) = (yyvsp[0].integer); } -#line 4353 "libyara/grammar.c" +#line 4366 "libyara/grammar.c" break; case 128: /* string_enumeration: string_enumeration ',' string_enumeration_item */ -#line 2276 "libyara/grammar.y" +#line 2289 "libyara/grammar.y" { (yyval.integer) = (yyvsp[-2].integer) + (yyvsp[0].integer); } -#line 4359 "libyara/grammar.c" +#line 4372 "libyara/grammar.c" break; case 129: /* string_enumeration_item: "string identifier" */ -#line 2282 "libyara/grammar.y" +#line 2295 "libyara/grammar.y" { int count = 0; int result = yr_parser_emit_pushes_for_strings(yyscanner, (yyvsp[0].c_string), &count); @@ -4369,11 +4382,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.integer) = count; } -#line 4373 "libyara/grammar.c" +#line 4386 "libyara/grammar.c" break; case 130: /* string_enumeration_item: "string identifier with wildcard" */ -#line 2292 "libyara/grammar.y" +#line 2305 "libyara/grammar.y" { int count = 0; int result = yr_parser_emit_pushes_for_strings(yyscanner, (yyvsp[0].c_string), &count); @@ -4383,40 +4396,40 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.integer) = count; } -#line 4387 "libyara/grammar.c" +#line 4400 "libyara/grammar.c" break; case 131: /* $@11: %empty */ -#line 2306 "libyara/grammar.y" +#line 2319 "libyara/grammar.y" { // Push end-of-list marker yr_parser_emit_push_const(yyscanner, YR_UNDEFINED); } -#line 4396 "libyara/grammar.c" +#line 4409 "libyara/grammar.c" break; case 132: /* rule_set: '(' $@11 rule_enumeration ')' */ -#line 2311 "libyara/grammar.y" +#line 2324 "libyara/grammar.y" { (yyval.integer) = (yyvsp[-1].integer); } -#line 4404 "libyara/grammar.c" +#line 4417 "libyara/grammar.c" break; case 133: /* rule_enumeration: rule_enumeration_item */ -#line 2318 "libyara/grammar.y" +#line 2331 "libyara/grammar.y" { (yyval.integer) = (yyvsp[0].integer); } -#line 4410 "libyara/grammar.c" +#line 4423 "libyara/grammar.c" break; case 134: /* rule_enumeration: rule_enumeration ',' rule_enumeration_item */ -#line 2319 "libyara/grammar.y" +#line 2332 "libyara/grammar.y" { (yyval.integer) = (yyvsp[-2].integer) + (yyvsp[0].integer); } -#line 4416 "libyara/grammar.c" +#line 4429 "libyara/grammar.c" break; case 135: /* rule_enumeration_item: "identifier" */ -#line 2325 "libyara/grammar.y" +#line 2338 "libyara/grammar.y" { int result = ERROR_SUCCESS; @@ -4449,11 +4462,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.integer) = 1; } -#line 4453 "libyara/grammar.c" +#line 4466 "libyara/grammar.c" break; case 136: /* rule_enumeration_item: "identifier" '*' */ -#line 2358 "libyara/grammar.y" +#line 2371 "libyara/grammar.y" { int count = 0; YR_NAMESPACE* ns = (YR_NAMESPACE*) yr_arena_get_ptr( @@ -4474,11 +4487,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.integer) = count; } -#line 4478 "libyara/grammar.c" +#line 4491 "libyara/grammar.c" break; case 137: /* for_expression: primary_expression */ -#line 2383 "libyara/grammar.y" +#line 2396 "libyara/grammar.y" { if ((yyvsp[0].expression).type == EXPRESSION_TYPE_INTEGER && !IS_UNDEFINED((yyvsp[0].expression).value.integer)) { @@ -4534,57 +4547,57 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.expression).value.integer = (yyvsp[0].expression).value.integer; } -#line 4538 "libyara/grammar.c" +#line 4551 "libyara/grammar.c" break; case 138: /* for_expression: for_quantifier */ -#line 2439 "libyara/grammar.y" +#line 2452 "libyara/grammar.y" { (yyval.expression).value.integer = (yyvsp[0].expression).value.integer; } -#line 4546 "libyara/grammar.c" +#line 4559 "libyara/grammar.c" break; case 139: /* for_quantifier: "" */ -#line 2446 "libyara/grammar.y" +#line 2459 "libyara/grammar.y" { yr_parser_emit_push_const(yyscanner, YR_UNDEFINED); (yyval.expression).type = EXPRESSION_TYPE_QUANTIFIER; (yyval.expression).value.integer = FOR_EXPRESSION_ALL; } -#line 4556 "libyara/grammar.c" +#line 4569 "libyara/grammar.c" break; case 140: /* for_quantifier: "" */ -#line 2452 "libyara/grammar.y" +#line 2465 "libyara/grammar.y" { yr_parser_emit_push_const(yyscanner, 1); (yyval.expression).type = EXPRESSION_TYPE_QUANTIFIER; (yyval.expression).value.integer = FOR_EXPRESSION_ANY; } -#line 4566 "libyara/grammar.c" +#line 4579 "libyara/grammar.c" break; case 141: /* for_quantifier: "" */ -#line 2458 "libyara/grammar.y" +#line 2471 "libyara/grammar.y" { yr_parser_emit_push_const(yyscanner, 0); (yyval.expression).type = EXPRESSION_TYPE_QUANTIFIER; (yyval.expression).value.integer = FOR_EXPRESSION_NONE; } -#line 4576 "libyara/grammar.c" +#line 4589 "libyara/grammar.c" break; case 142: /* primary_expression: '(' primary_expression ')' */ -#line 2468 "libyara/grammar.y" +#line 2481 "libyara/grammar.y" { (yyval.expression) = (yyvsp[-1].expression); } -#line 4584 "libyara/grammar.c" +#line 4597 "libyara/grammar.c" break; case 143: /* primary_expression: "" */ -#line 2472 "libyara/grammar.y" +#line 2485 "libyara/grammar.y" { fail_if_error(yr_parser_emit( yyscanner, OP_FILESIZE, NULL)); @@ -4592,11 +4605,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.expression).type = EXPRESSION_TYPE_INTEGER; (yyval.expression).value.integer = YR_UNDEFINED; } -#line 4596 "libyara/grammar.c" +#line 4609 "libyara/grammar.c" break; case 144: /* primary_expression: "" */ -#line 2480 "libyara/grammar.y" +#line 2493 "libyara/grammar.y" { yywarning(yyscanner, "using deprecated \"entrypoint\" keyword. Use the \"entry_point\" " @@ -4608,11 +4621,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.expression).type = EXPRESSION_TYPE_INTEGER; (yyval.expression).value.integer = YR_UNDEFINED; } -#line 4612 "libyara/grammar.c" +#line 4625 "libyara/grammar.c" break; case 145: /* primary_expression: "integer function" '(' primary_expression ')' */ -#line 2492 "libyara/grammar.y" +#line 2505 "libyara/grammar.y" { check_type((yyvsp[-1].expression), EXPRESSION_TYPE_INTEGER, "intXXXX or uintXXXX"); @@ -4626,33 +4639,33 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.expression).type = EXPRESSION_TYPE_INTEGER; (yyval.expression).value.integer = YR_UNDEFINED; } -#line 4630 "libyara/grammar.c" +#line 4643 "libyara/grammar.c" break; case 146: /* primary_expression: "integer number" */ -#line 2506 "libyara/grammar.y" +#line 2519 "libyara/grammar.y" { fail_if_error(yr_parser_emit_push_const(yyscanner, (yyvsp[0].integer))); (yyval.expression).type = EXPRESSION_TYPE_INTEGER; (yyval.expression).value.integer = (yyvsp[0].integer); } -#line 4641 "libyara/grammar.c" +#line 4654 "libyara/grammar.c" break; case 147: /* primary_expression: "floating point number" */ -#line 2513 "libyara/grammar.y" +#line 2526 "libyara/grammar.y" { fail_if_error(yr_parser_emit_with_arg_double( yyscanner, OP_PUSH, (yyvsp[0].double_), NULL, NULL)); (yyval.expression).type = EXPRESSION_TYPE_FLOAT; } -#line 4652 "libyara/grammar.c" +#line 4665 "libyara/grammar.c" break; case 148: /* primary_expression: "text string" */ -#line 2520 "libyara/grammar.y" +#line 2533 "libyara/grammar.y" { YR_ARENA_REF ref; @@ -4677,11 +4690,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.expression).type = EXPRESSION_TYPE_STRING; (yyval.expression).value.sized_string_ref = ref; } -#line 4681 "libyara/grammar.c" +#line 4694 "libyara/grammar.c" break; case 149: /* primary_expression: "string count" "" range */ -#line 2545 "libyara/grammar.y" +#line 2558 "libyara/grammar.y" { int result = yr_parser_reduce_string_identifier( yyscanner, (yyvsp[-2].c_string), OP_COUNT_IN, YR_UNDEFINED); @@ -4693,11 +4706,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.expression).type = EXPRESSION_TYPE_INTEGER; (yyval.expression).value.integer = YR_UNDEFINED; } -#line 4697 "libyara/grammar.c" +#line 4710 "libyara/grammar.c" break; case 150: /* primary_expression: "string count" */ -#line 2557 "libyara/grammar.y" +#line 2570 "libyara/grammar.y" { int result = yr_parser_reduce_string_identifier( yyscanner, (yyvsp[0].c_string), OP_COUNT, YR_UNDEFINED); @@ -4709,11 +4722,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.expression).type = EXPRESSION_TYPE_INTEGER; (yyval.expression).value.integer = YR_UNDEFINED; } -#line 4713 "libyara/grammar.c" +#line 4726 "libyara/grammar.c" break; case 151: /* primary_expression: "string offset" '[' primary_expression ']' */ -#line 2569 "libyara/grammar.y" +#line 2582 "libyara/grammar.y" { int result = yr_parser_reduce_string_identifier( yyscanner, (yyvsp[-3].c_string), OP_OFFSET, YR_UNDEFINED); @@ -4725,11 +4738,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.expression).type = EXPRESSION_TYPE_INTEGER; (yyval.expression).value.integer = YR_UNDEFINED; } -#line 4729 "libyara/grammar.c" +#line 4742 "libyara/grammar.c" break; case 152: /* primary_expression: "string offset" */ -#line 2581 "libyara/grammar.y" +#line 2594 "libyara/grammar.y" { int result = yr_parser_emit_push_const(yyscanner, 1); @@ -4744,11 +4757,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.expression).type = EXPRESSION_TYPE_INTEGER; (yyval.expression).value.integer = YR_UNDEFINED; } -#line 4748 "libyara/grammar.c" +#line 4761 "libyara/grammar.c" break; case 153: /* primary_expression: "string length" '[' primary_expression ']' */ -#line 2596 "libyara/grammar.y" +#line 2609 "libyara/grammar.y" { int result = yr_parser_reduce_string_identifier( yyscanner, (yyvsp[-3].c_string), OP_LENGTH, YR_UNDEFINED); @@ -4760,11 +4773,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.expression).type = EXPRESSION_TYPE_INTEGER; (yyval.expression).value.integer = YR_UNDEFINED; } -#line 4764 "libyara/grammar.c" +#line 4777 "libyara/grammar.c" break; case 154: /* primary_expression: "string length" */ -#line 2608 "libyara/grammar.y" +#line 2621 "libyara/grammar.y" { int result = yr_parser_emit_push_const(yyscanner, 1); @@ -4779,11 +4792,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.expression).type = EXPRESSION_TYPE_INTEGER; (yyval.expression).value.integer = YR_UNDEFINED; } -#line 4783 "libyara/grammar.c" +#line 4796 "libyara/grammar.c" break; case 155: /* primary_expression: identifier */ -#line 2623 "libyara/grammar.y" +#line 2636 "libyara/grammar.y" { int result = ERROR_SUCCESS; @@ -4826,11 +4839,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); fail_if_error(result); } -#line 4830 "libyara/grammar.c" +#line 4843 "libyara/grammar.c" break; case 156: /* primary_expression: '-' primary_expression */ -#line 2666 "libyara/grammar.y" +#line 2679 "libyara/grammar.y" { int result = ERROR_SUCCESS; @@ -4851,11 +4864,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); fail_if_error(result); } -#line 4855 "libyara/grammar.c" +#line 4868 "libyara/grammar.c" break; case 157: /* primary_expression: primary_expression '+' primary_expression */ -#line 2687 "libyara/grammar.y" +#line 2700 "libyara/grammar.y" { int result = yr_parser_reduce_operation( yyscanner, "+", (yyvsp[-2].expression), (yyvsp[0].expression)); @@ -4890,11 +4903,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); fail_if_error(result); } -#line 4894 "libyara/grammar.c" +#line 4907 "libyara/grammar.c" break; case 158: /* primary_expression: primary_expression '-' primary_expression */ -#line 2722 "libyara/grammar.y" +#line 2735 "libyara/grammar.y" { int result = yr_parser_reduce_operation( yyscanner, "-", (yyvsp[-2].expression), (yyvsp[0].expression)); @@ -4929,11 +4942,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); fail_if_error(result); } -#line 4933 "libyara/grammar.c" +#line 4946 "libyara/grammar.c" break; case 159: /* primary_expression: primary_expression '*' primary_expression */ -#line 2757 "libyara/grammar.y" +#line 2770 "libyara/grammar.y" { int result = yr_parser_reduce_operation( yyscanner, "*", (yyvsp[-2].expression), (yyvsp[0].expression)); @@ -4967,11 +4980,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); fail_if_error(result); } -#line 4971 "libyara/grammar.c" +#line 4984 "libyara/grammar.c" break; case 160: /* primary_expression: primary_expression '\\' primary_expression */ -#line 2791 "libyara/grammar.y" +#line 2804 "libyara/grammar.y" { int result = yr_parser_reduce_operation( yyscanner, "\\", (yyvsp[-2].expression), (yyvsp[0].expression)); @@ -4996,11 +5009,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); fail_if_error(result); } -#line 5000 "libyara/grammar.c" +#line 5013 "libyara/grammar.c" break; case 161: /* primary_expression: primary_expression '%' primary_expression */ -#line 2816 "libyara/grammar.y" +#line 2829 "libyara/grammar.y" { check_type((yyvsp[-2].expression), EXPRESSION_TYPE_INTEGER, "%"); check_type((yyvsp[0].expression), EXPRESSION_TYPE_INTEGER, "%"); @@ -5017,11 +5030,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); fail_if_error(ERROR_DIVISION_BY_ZERO); } } -#line 5021 "libyara/grammar.c" +#line 5034 "libyara/grammar.c" break; case 162: /* primary_expression: primary_expression '^' primary_expression */ -#line 2833 "libyara/grammar.y" +#line 2846 "libyara/grammar.y" { check_type((yyvsp[-2].expression), EXPRESSION_TYPE_INTEGER, "^"); check_type((yyvsp[0].expression), EXPRESSION_TYPE_INTEGER, "^"); @@ -5031,11 +5044,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.expression).type = EXPRESSION_TYPE_INTEGER; (yyval.expression).value.integer = OPERATION(^, (yyvsp[-2].expression).value.integer, (yyvsp[0].expression).value.integer); } -#line 5035 "libyara/grammar.c" +#line 5048 "libyara/grammar.c" break; case 163: /* primary_expression: primary_expression '&' primary_expression */ -#line 2843 "libyara/grammar.y" +#line 2856 "libyara/grammar.y" { check_type((yyvsp[-2].expression), EXPRESSION_TYPE_INTEGER, "^"); check_type((yyvsp[0].expression), EXPRESSION_TYPE_INTEGER, "^"); @@ -5045,11 +5058,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.expression).type = EXPRESSION_TYPE_INTEGER; (yyval.expression).value.integer = OPERATION(&, (yyvsp[-2].expression).value.integer, (yyvsp[0].expression).value.integer); } -#line 5049 "libyara/grammar.c" +#line 5062 "libyara/grammar.c" break; case 164: /* primary_expression: primary_expression '|' primary_expression */ -#line 2853 "libyara/grammar.y" +#line 2866 "libyara/grammar.y" { check_type((yyvsp[-2].expression), EXPRESSION_TYPE_INTEGER, "|"); check_type((yyvsp[0].expression), EXPRESSION_TYPE_INTEGER, "|"); @@ -5059,11 +5072,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.expression).type = EXPRESSION_TYPE_INTEGER; (yyval.expression).value.integer = OPERATION(|, (yyvsp[-2].expression).value.integer, (yyvsp[0].expression).value.integer); } -#line 5063 "libyara/grammar.c" +#line 5076 "libyara/grammar.c" break; case 165: /* primary_expression: '~' primary_expression */ -#line 2863 "libyara/grammar.y" +#line 2876 "libyara/grammar.y" { check_type((yyvsp[0].expression), EXPRESSION_TYPE_INTEGER, "~"); @@ -5073,11 +5086,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.expression).value.integer = ((yyvsp[0].expression).value.integer == YR_UNDEFINED) ? YR_UNDEFINED : ~((yyvsp[0].expression).value.integer); } -#line 5077 "libyara/grammar.c" +#line 5090 "libyara/grammar.c" break; case 166: /* primary_expression: primary_expression "<<" primary_expression */ -#line 2873 "libyara/grammar.y" +#line 2886 "libyara/grammar.y" { int result; @@ -5097,11 +5110,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); fail_if_error(result); } -#line 5101 "libyara/grammar.c" +#line 5114 "libyara/grammar.c" break; case 167: /* primary_expression: primary_expression ">>" primary_expression */ -#line 2893 "libyara/grammar.y" +#line 2906 "libyara/grammar.y" { int result; @@ -5121,19 +5134,19 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); fail_if_error(result); } -#line 5125 "libyara/grammar.c" +#line 5138 "libyara/grammar.c" break; case 168: /* primary_expression: regexp */ -#line 2913 "libyara/grammar.y" +#line 2926 "libyara/grammar.y" { (yyval.expression) = (yyvsp[0].expression); } -#line 5133 "libyara/grammar.c" +#line 5146 "libyara/grammar.c" break; -#line 5137 "libyara/grammar.c" +#line 5150 "libyara/grammar.c" default: break; } @@ -5357,5 +5370,5 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); return yyresult; } -#line 2918 "libyara/grammar.y" +#line 2931 "libyara/grammar.y" diff --git a/libyara/grammar.y b/libyara/grammar.y index 547e7f640d..005187d21c 100644 --- a/libyara/grammar.y +++ b/libyara/grammar.y @@ -88,7 +88,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // fail_if_error() is used in parser actions for aborting the parsing if an // error has occurred. See fail_with_error for details. #define fail_if_error(e) \ - if (e != ERROR_SUCCESS) \ + if (e != ERROR_SUCCESS && e != ERROR_UNKNOWN_ESCAPE_SEQUENCE) \ { \ fail_with_error(e); \ } @@ -1268,6 +1268,7 @@ regexp int result = ERROR_SUCCESS; int re_flags = 0; + int parser_flags = RE_PARSER_FLAG_NONE; if ($1->flags & SIZED_STRING_FLAGS_NO_CASE) re_flags |= RE_FLAGS_NO_CASE; @@ -1275,9 +1276,13 @@ regexp if ($1->flags & SIZED_STRING_FLAGS_DOT_ALL) re_flags |= RE_FLAGS_DOT_ALL; + if (compiler->strict_escape) + parser_flags |= RE_PARSER_FLAG_ENABLE_STRICT_ESCAPE_SEQUENCES; + result = yr_re_compile( $1->c_string, re_flags, + parser_flags, compiler->arena, &re_ref, &error); @@ -1287,13 +1292,21 @@ regexp if (result == ERROR_INVALID_REGULAR_EXPRESSION) yr_compiler_set_error_extra_info(compiler, error.message); - if (result == ERROR_SUCCESS) + if (result == ERROR_SUCCESS || result == ERROR_UNKNOWN_ESCAPE_SEQUENCE) + { + if (result == ERROR_UNKNOWN_ESCAPE_SEQUENCE) + { + yywarning( + yyscanner, + "unknown escape sequence"); + } result = yr_parser_emit_with_arg_reloc( yyscanner, OP_PUSH, yr_arena_ref_to_ptr(compiler->arena, &re_ref), NULL, NULL); + } fail_if_error(result); diff --git a/libyara/include/yara/compiler.h b/libyara/include/yara/compiler.h index a5bd5608f7..9fe80c7af5 100644 --- a/libyara/include/yara/compiler.h +++ b/libyara/include/yara/compiler.h @@ -237,6 +237,7 @@ typedef struct _YR_COMPILER int current_line; int last_error; int last_error_line; + bool strict_escape; jmp_buf error_recovery; diff --git a/libyara/include/yara/error.h b/libyara/include/yara/error.h index 0cd1ea6f39..3621b08b05 100644 --- a/libyara/include/yara/error.h +++ b/libyara/include/yara/error.h @@ -108,6 +108,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define ERROR_IDENTIFIER_MATCHES_WILDCARD 63 #define ERROR_INVALID_VALUE 64 #define ERROR_TOO_SLOW_SCANNING 65 +#define ERROR_UNKNOWN_ESCAPE_SEQUENCE 66 #define GOTO_EXIT_ON_ERROR(x) \ { \ diff --git a/libyara/include/yara/re.h b/libyara/include/yara/re.h index 1098c27ca6..160cb0fcda 100644 --- a/libyara/include/yara/re.h +++ b/libyara/include/yara/re.h @@ -101,6 +101,11 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define RE_FLAGS_GREEDY 0x400 #define RE_FLAGS_UNGREEDY 0x800 +enum YR_RE_PARSER_FLAGS { + RE_PARSER_FLAG_NONE = 0 << 0, + RE_PARSER_FLAG_ENABLE_STRICT_ESCAPE_SEQUENCES = 1 << 0, +}; + typedef int RE_MATCH_CALLBACK_FUNC( const uint8_t* match, int match_length, @@ -155,13 +160,14 @@ int yr_re_fast_exec( void* callback_args, int* matches); -int yr_re_parse(const char* re_string, RE_AST** re_ast, RE_ERROR* error); +int yr_re_parse(const char* re_string, RE_AST** re_ast, RE_ERROR* error, int flags); int yr_re_parse_hex(const char* hex_string, RE_AST** re_ast, RE_ERROR* error); int yr_re_compile( const char* re_string, int flags, + int parser_flags, YR_ARENA* arena, YR_ARENA_REF* ref, RE_ERROR* error); diff --git a/libyara/include/yara/re_lexer.h b/libyara/include/yara/re_lexer.h index 297aa031c1..930e31ab93 100644 --- a/libyara/include/yara/re_lexer.h +++ b/libyara/include/yara/re_lexer.h @@ -30,6 +30,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #undef yyparse #undef yylex #undef yyerror +#undef yywarning #undef yyfatal #undef yychar #undef yydebug @@ -44,6 +45,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define yyparse re_yyparse #define yylex re_yylex #define yyerror re_yyerror +#define yywarning re_yywarning #define yyfatal re_yyfatal #define yychar re_yychar #define yydebug re_yydebug @@ -59,11 +61,15 @@ typedef void* yyscan_t; #define YY_EXTRA_TYPE RE_AST* #define YY_USE_CONST +#define VALID_ESCAPE_SEQUENCE 1 +#define UNKNOWN_ESCAPE_SEQUENCE 2 + typedef struct _RE_LEX_ENVIRONMENT { RE_CLASS re_class; int last_error; char last_error_message[256]; + bool strict_escape; } RE_LEX_ENVIRONMENT; @@ -94,8 +100,13 @@ void yyerror( RE_LEX_ENVIRONMENT* lex_env, const char* error_message); +void yywarning( + yyscan_t yyscanner, + RE_LEX_ENVIRONMENT* lex_env, + const char* error_message); + void yyfatal(yyscan_t yyscanner, const char* error_message); int yyparse(void* yyscanner, RE_LEX_ENVIRONMENT* lex_env); -int yr_parse_re_string(const char* re_string, RE_AST** re_ast, RE_ERROR* error); +int yr_parse_re_string(const char* re_string, RE_AST** re_ast, RE_ERROR* error, int flags); diff --git a/libyara/parser.c b/libyara/parser.c index e924671090..2433e52743 100644 --- a/libyara/parser.c +++ b/libyara/parser.c @@ -748,23 +748,37 @@ int yr_parser_reduce_string_declaration( if (modifier.flags & STRING_FLAGS_HEXADECIMAL) result = yr_re_parse_hex(str->c_string, &re_ast, &re_error); else if (modifier.flags & STRING_FLAGS_REGEXP) - result = yr_re_parse(str->c_string, &re_ast, &re_error); + { + int flags = RE_PARSER_FLAG_NONE; + if (compiler->strict_escape) + flags |= RE_PARSER_FLAG_ENABLE_STRICT_ESCAPE_SEQUENCES; + result = yr_re_parse(str->c_string, &re_ast, &re_error, flags); + } else result = yr_base64_ast_from_string(str, modifier, &re_ast, &re_error); if (result != ERROR_SUCCESS) { - snprintf( - message, - sizeof(message), - "invalid %s \"%s\": %s", - (modifier.flags & STRING_FLAGS_HEXADECIMAL) ? "hex string" - : "regular expression", - identifier, - re_error.message); - - yr_compiler_set_error_extra_info(compiler, message); - goto _exit; + if (result == ERROR_UNKNOWN_ESCAPE_SEQUENCE) + { + yywarning( + yyscanner, + "unknown escape sequence"); + } + else + { + snprintf( + message, + sizeof(message), + "invalid %s \"%s\": %s", + (modifier.flags & STRING_FLAGS_HEXADECIMAL) ? "hex string" + : "regular expression", + identifier, + re_error.message); + + yr_compiler_set_error_extra_info(compiler, message); + goto _exit; + } } if (re_ast->flags & RE_FLAGS_FAST_REGEXP) diff --git a/libyara/re.c b/libyara/re.c index c78ceb247b..e128e3065a 100644 --- a/libyara/re.c +++ b/libyara/re.c @@ -214,9 +214,9 @@ void yr_re_ast_destroy(RE_AST* re_ast) // Parses a regexp but don't emit its code. A further call to // yr_re_ast_emit_code is required to get the code. // -int yr_re_parse(const char* re_string, RE_AST** re_ast, RE_ERROR* error) +int yr_re_parse(const char* re_string, RE_AST** re_ast, RE_ERROR* error, int flags) { - return yr_parse_re_string(re_string, re_ast, error); + return yr_parse_re_string(re_string, re_ast, error, flags); } //////////////////////////////////////////////////////////////////////////////// @@ -235,14 +235,18 @@ int yr_re_parse_hex(const char* hex_string, RE_AST** re_ast, RE_ERROR* error) int yr_re_compile( const char* re_string, int flags, + int parser_flags, YR_ARENA* arena, YR_ARENA_REF* ref, RE_ERROR* error) { RE_AST* re_ast; RE _re; + int result; - FAIL_ON_ERROR(yr_re_parse(re_string, &re_ast, error)); + result = yr_re_parse(re_string, &re_ast, error, parser_flags); + if (result != ERROR_UNKNOWN_ESCAPE_SEQUENCE) + FAIL_ON_ERROR(result); _re.flags = flags; @@ -255,7 +259,7 @@ int yr_re_compile( yr_re_ast_destroy(re_ast); - return ERROR_SUCCESS; + return result; } //////////////////////////////////////////////////////////////////////////////// diff --git a/libyara/re_lexer.c b/libyara/re_lexer.c index d465ab19d8..1be8297fc9 100644 --- a/libyara/re_lexer.c +++ b/libyara/re_lexer.c @@ -1,6 +1,6 @@ -#line 2 "re_lexer.c" +#line 1 "libyara/re_lexer.c" -#line 4 "re_lexer.c" +#line 3 "libyara/re_lexer.c" #define YY_INT_ALIGNED short int @@ -702,7 +702,7 @@ static const flex_int32_t yy_rule_can_match_eol[30] = #define yymore() yymore_used_but_not_detected #define YY_MORE_ADJ 0 #define YY_RESTORE_YY_MORE_OFFSET -#line 1 "re_lexer.l" +#line 1 "libyara/re_lexer.l" /* Copyright (c) 2013. The YARA Authors. All Rights Reserved. @@ -732,7 +732,7 @@ ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* Lexical analyzer for regular expressions */ -#line 33 "re_lexer.l" +#line 33 "libyara/re_lexer.l" /* Disable warnings for unused functions in this file. @@ -759,6 +759,7 @@ with noyywrap then we can remove this pragma. #include #include #include +#include #ifdef _WIN32 @@ -785,16 +786,18 @@ static uint8_t space_chars[] = { int escaped_char_value( char* text, - uint8_t* value); + uint8_t* value, + bool strict_escape); int read_escaped_char( yyscan_t yyscanner, - uint8_t* escaped_char); + uint8_t* escaped_char, + bool strict_escape); -#line 795 "re_lexer.c" +#line 797 "libyara/re_lexer.c" #define YY_NO_UNISTD_H 1 -#line 798 "re_lexer.c" +#line 800 "libyara/re_lexer.c" #define INITIAL 0 #define char_class 1 @@ -1067,10 +1070,10 @@ YY_DECL } { -#line 111 "re_lexer.l" +#line 114 "libyara/re_lexer.l" -#line 1074 "re_lexer.c" +#line 1076 "libyara/re_lexer.c" while ( /*CONSTCOND*/1 ) /* loops until end-of-file is reached */ { @@ -1137,7 +1140,7 @@ YY_DECL case 1: YY_RULE_SETUP -#line 113 "re_lexer.l" +#line 116 "libyara/re_lexer.l" { // Examples: {3,8} {0,5} {,5} {7,} @@ -1179,7 +1182,7 @@ YY_RULE_SETUP YY_BREAK case 2: YY_RULE_SETUP -#line 153 "re_lexer.l" +#line 156 "libyara/re_lexer.l" { // Example: {10} @@ -1208,7 +1211,7 @@ YY_RULE_SETUP YY_BREAK case 3: YY_RULE_SETUP -#line 180 "re_lexer.l" +#line 183 "libyara/re_lexer.l" { // Start of a negated character class. Example: [^abcd] @@ -1220,7 +1223,7 @@ YY_RULE_SETUP YY_BREAK case 4: YY_RULE_SETUP -#line 189 "re_lexer.l" +#line 192 "libyara/re_lexer.l" { // Start of character negated class containing a ]. @@ -1235,7 +1238,7 @@ YY_RULE_SETUP YY_BREAK case 5: YY_RULE_SETUP -#line 202 "re_lexer.l" +#line 205 "libyara/re_lexer.l" { // Start of character class containing a ]. @@ -1250,7 +1253,7 @@ YY_RULE_SETUP YY_BREAK case 6: YY_RULE_SETUP -#line 215 "re_lexer.l" +#line 218 "libyara/re_lexer.l" { // Start of character class. Example: [abcd] @@ -1263,7 +1266,7 @@ YY_RULE_SETUP case 7: /* rule 7 can match eol */ YY_RULE_SETUP -#line 225 "re_lexer.l" +#line 228 "libyara/re_lexer.l" { // Any non-special character is passed as a CHAR token to the scanner. @@ -1274,63 +1277,63 @@ YY_RULE_SETUP YY_BREAK case 8: YY_RULE_SETUP -#line 234 "re_lexer.l" +#line 237 "libyara/re_lexer.l" { return _WORD_CHAR_; } YY_BREAK case 9: YY_RULE_SETUP -#line 239 "re_lexer.l" +#line 242 "libyara/re_lexer.l" { return _NON_WORD_CHAR_; } YY_BREAK case 10: YY_RULE_SETUP -#line 244 "re_lexer.l" +#line 247 "libyara/re_lexer.l" { return _SPACE_; } YY_BREAK case 11: YY_RULE_SETUP -#line 249 "re_lexer.l" +#line 252 "libyara/re_lexer.l" { return _NON_SPACE_; } YY_BREAK case 12: YY_RULE_SETUP -#line 254 "re_lexer.l" +#line 257 "libyara/re_lexer.l" { return _DIGIT_; } YY_BREAK case 13: YY_RULE_SETUP -#line 259 "re_lexer.l" +#line 262 "libyara/re_lexer.l" { return _NON_DIGIT_; } YY_BREAK case 14: YY_RULE_SETUP -#line 264 "re_lexer.l" +#line 267 "libyara/re_lexer.l" { return _WORD_BOUNDARY_; } YY_BREAK case 15: YY_RULE_SETUP -#line 268 "re_lexer.l" +#line 271 "libyara/re_lexer.l" { return _NON_WORD_BOUNDARY_; } YY_BREAK case 16: YY_RULE_SETUP -#line 273 "re_lexer.l" +#line 276 "libyara/re_lexer.l" { yyerror(yyscanner, lex_env, "backreferences are not allowed"); @@ -1339,16 +1342,24 @@ YY_RULE_SETUP YY_BREAK case 17: YY_RULE_SETUP -#line 280 "re_lexer.l" +#line 283 "libyara/re_lexer.l" { uint8_t c; + int return_code; - if (read_escaped_char(yyscanner, &c)) + return_code = read_escaped_char(yyscanner, &c, LEX_ENV->strict_escape); + if (return_code == VALID_ESCAPE_SEQUENCE) { yylval->integer = c; return _CHAR_; } + else if (return_code == UNKNOWN_ESCAPE_SEQUENCE) + { + yywarning(yyscanner, lex_env, "unknown escape sequence"); + yylval->integer = c; + return _CHAR_; + } else { yyerror(yyscanner, lex_env, "illegal escape sequence"); @@ -1358,7 +1369,7 @@ YY_RULE_SETUP YY_BREAK case 18: YY_RULE_SETUP -#line 297 "re_lexer.l" +#line 308 "libyara/re_lexer.l" { // End of character class. @@ -1374,7 +1385,7 @@ YY_RULE_SETUP case 19: /* rule 19 can match eol */ YY_RULE_SETUP -#line 311 "re_lexer.l" +#line 322 "libyara/re_lexer.l" { // A range inside a character class. The regexp is... @@ -1404,7 +1415,7 @@ YY_RULE_SETUP if (start == '\\') { - if (!escaped_char_value(yytext, &start)) + if (!escaped_char_value(yytext, &start, LEX_ENV->strict_escape)) { yyerror(yyscanner, lex_env, "illegal escape sequence"); yyterminate(); @@ -1418,7 +1429,7 @@ YY_RULE_SETUP if (end == '\\') { - if (!read_escaped_char(yyscanner, &end)) + if (!read_escaped_char(yyscanner, &end, LEX_ENV->strict_escape)) { yyerror(yyscanner, lex_env, "illegal escape sequence"); yyterminate(); @@ -1439,7 +1450,7 @@ YY_RULE_SETUP YY_BREAK case 20: YY_RULE_SETUP -#line 374 "re_lexer.l" +#line 385 "libyara/re_lexer.l" { for (int i = 0; i < 32; i++) @@ -1448,7 +1459,7 @@ YY_RULE_SETUP YY_BREAK case 21: YY_RULE_SETUP -#line 381 "re_lexer.l" +#line 392 "libyara/re_lexer.l" { for (int i = 0; i < 32; i++) @@ -1457,7 +1468,7 @@ YY_RULE_SETUP YY_BREAK case 22: YY_RULE_SETUP -#line 388 "re_lexer.l" +#line 399 "libyara/re_lexer.l" { for (int i = 0; i < 32; i++) @@ -1466,7 +1477,7 @@ YY_RULE_SETUP YY_BREAK case 23: YY_RULE_SETUP -#line 395 "re_lexer.l" +#line 406 "libyara/re_lexer.l" { for (int i = 0; i < 32; i++) @@ -1475,7 +1486,7 @@ YY_RULE_SETUP YY_BREAK case 24: YY_RULE_SETUP -#line 402 "re_lexer.l" +#line 413 "libyara/re_lexer.l" { for (char c = '0'; c <= '9'; c++) @@ -1484,7 +1495,7 @@ YY_RULE_SETUP YY_BREAK case 25: YY_RULE_SETUP -#line 409 "re_lexer.l" +#line 420 "libyara/re_lexer.l" { for (int i = 0; i < 32; i++) @@ -1504,13 +1515,20 @@ YY_RULE_SETUP YY_BREAK case 26: YY_RULE_SETUP -#line 427 "re_lexer.l" +#line 438 "libyara/re_lexer.l" { uint8_t c; + int return_code; - if (read_escaped_char(yyscanner, &c)) + return_code = read_escaped_char(yyscanner, &c, LEX_ENV->strict_escape); + if (return_code == VALID_ESCAPE_SEQUENCE) + { + LEX_ENV->re_class.bitmap[c / 8] |= 1 << c % 8; + } + else if (return_code == UNKNOWN_ESCAPE_SEQUENCE) { + yywarning(yyscanner, lex_env, "unknown escape sequence"); LEX_ENV->re_class.bitmap[c / 8] |= 1 << c % 8; } else @@ -1522,7 +1540,7 @@ YY_RULE_SETUP YY_BREAK case 27: YY_RULE_SETUP -#line 443 "re_lexer.l" +#line 461 "libyara/re_lexer.l" { if (yytext[0] >= 32 && yytext[0] < 127) @@ -1540,7 +1558,7 @@ YY_RULE_SETUP } YY_BREAK case YY_STATE_EOF(char_class): -#line 460 "re_lexer.l" +#line 478 "libyara/re_lexer.l" { // End of regexp reached while scanning a character class. @@ -1551,7 +1569,7 @@ case YY_STATE_EOF(char_class): YY_BREAK case 28: YY_RULE_SETUP -#line 469 "re_lexer.l" +#line 487 "libyara/re_lexer.l" { if (yytext[0] >= 32 && yytext[0] < 127) @@ -1566,7 +1584,7 @@ YY_RULE_SETUP } YY_BREAK case YY_STATE_EOF(INITIAL): -#line 483 "re_lexer.l" +#line 501 "libyara/re_lexer.l" { yyterminate(); @@ -1574,10 +1592,10 @@ case YY_STATE_EOF(INITIAL): YY_BREAK case 29: YY_RULE_SETUP -#line 488 "re_lexer.l" +#line 506 "libyara/re_lexer.l" ECHO; YY_BREAK -#line 1581 "re_lexer.c" +#line 1598 "libyara/re_lexer.c" case YY_END_OF_BUFFER: { @@ -2726,12 +2744,13 @@ void yyfree (void * ptr , yyscan_t yyscanner) #define YYTABLES_NAME "yytables" -#line 488 "re_lexer.l" +#line 506 "libyara/re_lexer.l" int escaped_char_value( char* text, - uint8_t* value) + uint8_t* value, + bool strict_escape) { unsigned int hex_value; char hex[3]; @@ -2770,11 +2789,49 @@ int escaped_char_value( *value = '\a'; break; + // Support metacharacters in escape sequences + case '\\': + case '^': + case '$': + case '.': + case '|': + case '(': + case ')': + case '[': + case ']': + + // Support other special characters that are used in rules and need to be escaped + case '*': + case '+': + case '?': + case '"': + case '\'': + case '-': + case '{': + case '}': + case '#': + case ':': + case '_': + case '=': + case '/': + case '!': + case ',': + case '@': + case '<': + case '>': + case '~': + case '&': + case '%': + *value = text[1]; + break; default: *value = text[1]; + if (strict_escape) + return UNKNOWN_ESCAPE_SEQUENCE; + return VALID_ESCAPE_SEQUENCE; } - return 1; + return VALID_ESCAPE_SEQUENCE; } @@ -2787,7 +2844,8 @@ int escaped_char_value( int read_escaped_char( yyscan_t yyscanner, - uint8_t* escaped_char) + uint8_t* escaped_char, + bool strict_escape) { char text[4] = {0, 0, 0, 0}; @@ -2810,7 +2868,7 @@ int read_escaped_char( return 0; } - return escaped_char_value(text, escaped_char); + return escaped_char_value(text, escaped_char, strict_escape); } @@ -2841,7 +2899,7 @@ void yyerror( // subsequent errors like "syntax error, unexpected $end" caused by // early parser termination. - if (lex_env->last_error == ERROR_SUCCESS) + if (lex_env->last_error == ERROR_SUCCESS || lex_env->last_error == ERROR_UNKNOWN_ESCAPE_SEQUENCE) { lex_env->last_error = ERROR_INVALID_REGULAR_EXPRESSION; @@ -2852,17 +2910,40 @@ void yyerror( } } +void yywarning( + yyscan_t yyscanner, + RE_LEX_ENVIRONMENT* lex_env, + const char *error_message) +{ + // Do not overwrite Errors + // print out warning only if there is not any other error beforehand + + if (lex_env->last_error == ERROR_SUCCESS) + { + lex_env->last_error = ERROR_UNKNOWN_ESCAPE_SEQUENCE; + + strlcpy( + lex_env->last_error_message, + error_message, + sizeof(lex_env->last_error_message)); + } +} int yr_parse_re_string( const char* re_string, RE_AST** re_ast, - RE_ERROR* error) + RE_ERROR* error, + int flags) { yyscan_t yyscanner; jmp_buf recovery_trampoline; RE_LEX_ENVIRONMENT lex_env; lex_env.last_error = ERROR_SUCCESS; + if (flags & RE_PARSER_FLAG_ENABLE_STRICT_ESCAPE_SEQUENCES) + lex_env.strict_escape = true; + else + lex_env.strict_escape = false; lex_env.last_error_message[0] = '\0'; yr_thread_storage_set_value( @@ -2890,8 +2971,11 @@ int yr_parse_re_string( if (lex_env.last_error != ERROR_SUCCESS) { - yr_re_ast_destroy(*re_ast); - *re_ast = NULL; + if (lex_env.last_error != ERROR_UNKNOWN_ESCAPE_SEQUENCE) + { + yr_re_ast_destroy(*re_ast); + *re_ast = NULL; + } strlcpy( error->message, diff --git a/libyara/re_lexer.l b/libyara/re_lexer.l index 6cdc30b645..38863ad52f 100644 --- a/libyara/re_lexer.l +++ b/libyara/re_lexer.l @@ -56,6 +56,7 @@ with noyywrap then we can remove this pragma. #include #include #include +#include #ifdef _WIN32 @@ -82,11 +83,13 @@ static uint8_t space_chars[] = { int escaped_char_value( char* text, - uint8_t* value); + uint8_t* value, + bool strict_escape); int read_escaped_char( yyscan_t yyscanner, - uint8_t* escaped_char); + uint8_t* escaped_char, + bool strict_escape); %} @@ -280,12 +283,20 @@ hex_digit [0-9a-fA-F] \\ { uint8_t c; + int return_code; - if (read_escaped_char(yyscanner, &c)) + return_code = read_escaped_char(yyscanner, &c, LEX_ENV->strict_escape); + if (return_code == VALID_ESCAPE_SEQUENCE) { yylval->integer = c; return _CHAR_; } + else if (return_code == UNKNOWN_ESCAPE_SEQUENCE) + { + yywarning(yyscanner, lex_env, "unknown escape sequence"); + yylval->integer = c; + return _CHAR_; + } else { yyerror(yyscanner, lex_env, "illegal escape sequence"); @@ -337,7 +348,7 @@ hex_digit [0-9a-fA-F] if (start == '\\') { - if (!escaped_char_value(yytext, &start)) + if (!escaped_char_value(yytext, &start, LEX_ENV->strict_escape)) { yyerror(yyscanner, lex_env, "illegal escape sequence"); yyterminate(); @@ -351,7 +362,7 @@ hex_digit [0-9a-fA-F] if (end == '\\') { - if (!read_escaped_char(yyscanner, &end)) + if (!read_escaped_char(yyscanner, &end, LEX_ENV->strict_escape)) { yyerror(yyscanner, lex_env, "illegal escape sequence"); yyterminate(); @@ -427,9 +438,16 @@ hex_digit [0-9a-fA-F] \\ { uint8_t c; + int return_code; - if (read_escaped_char(yyscanner, &c)) + return_code = read_escaped_char(yyscanner, &c, LEX_ENV->strict_escape); + if (return_code == VALID_ESCAPE_SEQUENCE) + { + LEX_ENV->re_class.bitmap[c / 8] |= 1 << c % 8; + } + else if (return_code == UNKNOWN_ESCAPE_SEQUENCE) { + yywarning(yyscanner, lex_env, "unknown escape sequence"); LEX_ENV->re_class.bitmap[c / 8] |= 1 << c % 8; } else @@ -489,7 +507,8 @@ hex_digit [0-9a-fA-F] int escaped_char_value( char* text, - uint8_t* value) + uint8_t* value, + bool strict_escape) { unsigned int hex_value; char hex[3]; @@ -528,11 +547,49 @@ int escaped_char_value( *value = '\a'; break; + // Support metacharacters in escape sequences + case '\\': + case '^': + case '$': + case '.': + case '|': + case '(': + case ')': + case '[': + case ']': + + // Support other special characters that are used in rules and need to be escaped + case '*': + case '+': + case '?': + case '"': + case '\'': + case '-': + case '{': + case '}': + case '#': + case ':': + case '_': + case '=': + case '/': + case '!': + case ',': + case '@': + case '<': + case '>': + case '~': + case '&': + case '%': + *value = text[1]; + break; default: *value = text[1]; + if (strict_escape) + return UNKNOWN_ESCAPE_SEQUENCE; + return VALID_ESCAPE_SEQUENCE; } - return 1; + return VALID_ESCAPE_SEQUENCE; } @@ -545,7 +602,8 @@ int escaped_char_value( int read_escaped_char( yyscan_t yyscanner, - uint8_t* escaped_char) + uint8_t* escaped_char, + bool strict_escape) { char text[4] = {0, 0, 0, 0}; @@ -568,7 +626,7 @@ int read_escaped_char( return 0; } - return escaped_char_value(text, escaped_char); + return escaped_char_value(text, escaped_char, strict_escape); } @@ -599,7 +657,7 @@ void yyerror( // subsequent errors like "syntax error, unexpected $end" caused by // early parser termination. - if (lex_env->last_error == ERROR_SUCCESS) + if (lex_env->last_error == ERROR_SUCCESS || lex_env->last_error == ERROR_UNKNOWN_ESCAPE_SEQUENCE) { lex_env->last_error = ERROR_INVALID_REGULAR_EXPRESSION; @@ -610,17 +668,40 @@ void yyerror( } } +void yywarning( + yyscan_t yyscanner, + RE_LEX_ENVIRONMENT* lex_env, + const char *error_message) +{ + // Do not overwrite Errors + // print out warning only if there is not any other error beforehand + + if (lex_env->last_error == ERROR_SUCCESS) + { + lex_env->last_error = ERROR_UNKNOWN_ESCAPE_SEQUENCE; + + strlcpy( + lex_env->last_error_message, + error_message, + sizeof(lex_env->last_error_message)); + } +} int yr_parse_re_string( const char* re_string, RE_AST** re_ast, - RE_ERROR* error) + RE_ERROR* error, + int flags) { yyscan_t yyscanner; jmp_buf recovery_trampoline; RE_LEX_ENVIRONMENT lex_env; lex_env.last_error = ERROR_SUCCESS; + if (flags & RE_PARSER_FLAG_ENABLE_STRICT_ESCAPE_SEQUENCES) + lex_env.strict_escape = true; + else + lex_env.strict_escape = false; lex_env.last_error_message[0] = '\0'; yr_thread_storage_set_value( @@ -648,8 +729,11 @@ int yr_parse_re_string( if (lex_env.last_error != ERROR_SUCCESS) { - yr_re_ast_destroy(*re_ast); - *re_ast = NULL; + if (lex_env.last_error != ERROR_UNKNOWN_ESCAPE_SEQUENCE) + { + yr_re_ast_destroy(*re_ast); + *re_ast = NULL; + } strlcpy( error->message, diff --git a/tests/test-rules.c b/tests/test-rules.c index e3c1be1865..d5baaa30d5 100644 --- a/tests/test-rules.c +++ b/tests/test-rules.c @@ -2438,6 +2438,16 @@ void test_re() "rule test { strings: $a = /\\b/ wide condition: $a }", TEXT_1024_BYTES "abc"); + assert_true_rule_blob( + "rule test { condition: \"avb\" matches /a\\vb/ }", + TEXT_1024_BYTES "rule test { condition: \"avb\" matches /a\\vb/ }" + ) + + assert_false_rule_blob( + "rule test { condition: \"ab\" matches /a\\vb/ }", + TEXT_1024_BYTES "rule test { condition: \"ab\" matches /a\\vb/ }" + ) + assert_regexp_syntax_error(")"); assert_true_regexp("abc", "abc", "abc"); assert_false_regexp("abc", "xbc"); @@ -2611,7 +2621,8 @@ void test_re() assert_true_regexp("[\\x01-\\x03]+", "\x01\x02\x03", "\x01\x02\x03"); assert_false_regexp("[\\x00-\\x02]+", "\x03\x04\x05"); assert_true_regexp("[\\x5D]", "]", "]"); - assert_true_regexp("[\\0x5A-\\x5D]", "\x5B", "\x5B"); + assert_true_regexp("[\\x5A-\\x5D]", "\x5B", "\x5B"); + assert_false_regexp("[\\x5A-\\x5D]", "\x4F") assert_true_regexp("[\\x5D-\\x5F]", "\x5E", "\x5E"); assert_true_regexp("[\\x5C-\\x5F]", "\x5E", "\x5E"); assert_true_regexp("[\\x5D-\\x5F]", "\x5E", "\x5E"); @@ -3541,6 +3552,30 @@ void test_process_scan() } #endif +void test_invalid_escape_sequences_warnings() + { + YR_DEBUG_FPRINTF(1, stderr, "+ %s() {\n", __FUNCTION__); + + assert_warning_strict_escape("rule test { strings: $a = /ab\\cdef/ condition: $a }"); + assert_warning_strict_escape("rule test { strings: $a = /ab\\ def/ condition: $a }"); + assert_warning_strict_escape("rule test { strings: $a = /ab\\;def/ condition: $a }"); + assert_no_warnings("rule test { strings: $a = /ab\\*def/ condition: $a }"); + assert_no_warnings("rule test { strings: $a = /abcdef/ condition: $a }"); + assert_warning_strict_escape("rule test { strings: $a = /ab\\cdef/ condition: $a }"); + assert_no_warnings("rule test { strings: $a = /abcdef/ condition: $a }"); + assert_warning_strict_escape("rule test { strings: $a = /\\\\WINDOWS\\\\system32\\\\\\victim\\.exe\\.exe/ condition: $a }"); + assert_no_warnings("rule test { strings: $a = /\\\\WINDOWS\\\\system32\\\\victim\\.exe\\.exe/ condition: $a }"); + assert_warning_strict_escape("rule test { strings: $a = /AppData\\\\Roaming\\\\[0-9]{9,12}\\VMwareCplLauncher\\.exe/ condition: $a }"); + assert_no_warnings("rule test { strings: $a = /AppData\\\\Roaming\\\\[0-9]{9,12}\\\\VMwareCplLauncher\\.exe/ condition: $a }"); + assert_warning_strict_escape("rule test { strings: $a = /ab[\\000-\\343]/ condition: $a }"); + assert_no_warnings("rule test { strings: $a = /ab[\\x00-\\x43]/ condition: $a }"); + assert_warning_strict_escape("rule test { strings: $a = /C:\\Users\\\\[^\\\\]+\\\\AppData\\\\Local\\\\AzireVPN\\\\token\\.txt/ condition: $a }"); + assert_no_warnings("rule test { strings: $a = /C:\\\\Users\\\\[^\\\\]+\\\\AppData\\\\Local\\\\AzireVPN\\\\token\\.txt/ condition: $a }"); + assert_warning_strict_escape("rule test { condition: \"avb\" matches /a\\vb/ }"); + + YR_DEBUG_FPRINTF(1, stderr, "} // %s()\n", __FUNCTION__); +} + void test_performance_warnings() { YR_DEBUG_FPRINTF(1, stderr, "+ %s() {\n", __FUNCTION__); @@ -3892,6 +3927,7 @@ static void test_pass(int pass) #endif test_time_module(); + test_invalid_escape_sequences_warnings(); test_performance_warnings(); test_defined(); diff --git a/tests/util.c b/tests/util.c index e9b3eea5cc..fa99eca685 100644 --- a/tests/util.c +++ b/tests/util.c @@ -388,6 +388,12 @@ static void _compiler_callback( } int compile_rule(char* string, YR_RULES** rules) +{ + bool strict_escape_flag = false; + return compile_rule_ex(string, rules, strict_escape_flag); +} + +int compile_rule_ex(char* string, YR_RULES** rules, bool strict_escape_flag) { YR_COMPILER* compiler = NULL; int result = ERROR_SUCCESS; @@ -401,6 +407,8 @@ int compile_rule(char* string, YR_RULES** rules) goto _exit; } + compiler->strict_escape = strict_escape_flag; + yr_compiler_set_callback(compiler, _compiler_callback, &warnings); // Define some variables that will be used in test cases. @@ -711,7 +719,7 @@ void assert_re_atoms(char* re, int expected_atom_count, atom* expected_atoms) int exit_code; - yr_re_parse(re, &re_ast, &re_error); + yr_re_parse(re, &re_ast, &re_error, RE_PARSER_FLAG_NONE); exit_code = _assert_atoms(re_ast, expected_atom_count, expected_atoms); if (re_ast != NULL) diff --git a/tests/util.h b/tests/util.h index 52dd610a74..0a18ccf5e2 100644 --- a/tests/util.h +++ b/tests/util.h @@ -96,6 +96,10 @@ int compile_rule( char* string, YR_RULES** rules); +int compile_rule_ex( + char* string, + YR_RULES** rules, + bool strict_escape_flag); typedef struct SCAN_CALLBACK_CTX SCAN_CALLBACK_CTX; @@ -341,6 +345,26 @@ void assert_hex_atoms( } while (0); +#define assert_warnings_strict_escape(rule, w) do { \ + YR_RULES* rules; \ + bool strict_escape = true; \ + int result = compile_rule_ex(rule, &rules, strict_escape); \ + if (result == ERROR_SUCCESS) { \ + yr_rules_destroy(rules); \ + if (warnings < w) { \ + fprintf(stderr, "%s:%d: expecting warning\n", \ + __FILE__, __LINE__); \ + exit(EXIT_FAILURE); \ + } \ + } \ + else { \ + fprintf(stderr, "%s:%d: failed to compile << %s >>: %s\n", \ + __FILE__, __LINE__, rule, compile_error); \ + exit(EXIT_FAILURE); \ + } \ + } while (0); + + #define assert_no_warnings(rule) do { \ YR_RULES* rules; \ int result = compile_rule(rule, &rules); \ @@ -363,6 +387,10 @@ void assert_hex_atoms( #define assert_warning(rule) assert_warnings(rule, 1) +#define assert_warning_strict_escape(rule) \ + assert_warnings_strict_escape(rule, 1) + + #define assert_true_regexp(regexp,string,expected) do { \ if (!capture_string("rule test { strings: $a = /" regexp \ "/ condition: $a }", string, expected)) { \