Skip to content

Commit

Permalink
Fix issue with warnings for non-ascii characters.
Browse files Browse the repository at this point in the history
The warning was being issued even for escaped characters like \t and \x01, we want the warning only for non-escaped characters.
  • Loading branch information
plusvic committed Mar 17, 2021
1 parent 428317b commit 2893100
Show file tree
Hide file tree
Showing 10 changed files with 2,771 additions and 2,749 deletions.
687 changes: 342 additions & 345 deletions libyara/grammar.c

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion libyara/grammar.h
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ extern int yara_yydebug;

union YYSTYPE
{
#line 309 "grammar.y" /* yacc.c:1916 */
#line 302 "grammar.y" /* yacc.c:1916 */

YR_EXPRESSION expression;
SIZED_STRING* sized_string;
Expand Down
31 changes: 14 additions & 17 deletions libyara/grammar.y
Original file line number Diff line number Diff line change
Expand Up @@ -77,13 +77,6 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
} \


#define check_valid_ascii(s) \
if (!ss_is_valid_ascii(s)) \
{ \
yywarning(yyscanner, \
"non-ascii character in string \"%s\".", (s)->c_string); \
}

#define check_type_with_cleanup(expression, expected_type, op, cleanup) \
if (((expression.type) & (expected_type)) == 0) \
{ \
Expand Down Expand Up @@ -340,7 +333,8 @@ rules
import
: _IMPORT_ _TEXT_STRING_
{
check_valid_ascii($2);
if ($2->flags & SIZED_STRING_FLAGS_UNESCAPED_NON_ASCII)
yywarning(yyscanner, "non-ascii character in import statement");

int result = yr_parser_reduce_import(yyscanner, $2);

Expand Down Expand Up @@ -605,7 +599,8 @@ string_declaration
}
_TEXT_STRING_ string_modifiers
{
check_valid_ascii($4);
if ($4->flags & SIZED_STRING_FLAGS_UNESCAPED_NON_ASCII)
yywarning(yyscanner, "non-ascii character in string \"%s\"", $1);

int result = yr_parser_reduce_string_declaration(
yyscanner, $5, $1, $4, &$<string>$);
Expand Down Expand Up @@ -800,7 +795,8 @@ string_modifier
{
int result = ERROR_SUCCESS;

check_valid_ascii($3);
if ($3->flags & SIZED_STRING_FLAGS_UNESCAPED_NON_ASCII)
yywarning(yyscanner, "non-ascii character in base64 alphabet");

if ($3->length != 64)
{
Expand All @@ -824,7 +820,8 @@ string_modifier
{
int result = ERROR_SUCCESS;

check_valid_ascii($3);
if ($3->flags & SIZED_STRING_FLAGS_UNESCAPED_NON_ASCII)
yywarning(yyscanner, "non-ascii character in base64 alphabet");

if ($3->length != 64)
{
Expand Down Expand Up @@ -1230,21 +1227,20 @@ arguments_list
regexp
: _REGEXP_
{
SIZED_STRING* sized_string = $1;
YR_ARENA_REF re_ref;
RE_ERROR error;

int result = ERROR_SUCCESS;
int re_flags = 0;

if (sized_string->flags & SIZED_STRING_FLAGS_NO_CASE)
if ($1->flags & SIZED_STRING_FLAGS_NO_CASE)
re_flags |= RE_FLAGS_NO_CASE;

if (sized_string->flags & SIZED_STRING_FLAGS_DOT_ALL)
if ($1->flags & SIZED_STRING_FLAGS_DOT_ALL)
re_flags |= RE_FLAGS_DOT_ALL;

result = yr_re_compile(
sized_string->c_string,
$1->c_string,
re_flags,
compiler->arena,
&re_ref,
Expand Down Expand Up @@ -1281,7 +1277,7 @@ boolean_expression
compiler->arena, &$1.value.sized_string_ref);

yywarning(yyscanner,
"Using literal string \"%s\" in a boolean operation.",
"using literal string \"%s\" in a boolean operation.",
sized_string->c_string);
}

Expand Down Expand Up @@ -2276,7 +2272,8 @@ primary_expression
{
YR_ARENA_REF ref;

check_valid_ascii($1);
if ($1->flags & SIZED_STRING_FLAGS_UNESCAPED_NON_ASCII)
yywarning(yyscanner, "non-ascii character in literal string");

int result = _yr_compiler_store_data(
compiler,
Expand Down
10 changes: 10 additions & 0 deletions libyara/include/yara/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -251,10 +251,20 @@ typedef struct _YR_COMPILER

char last_error_extra_info[YR_MAX_COMPILER_ERROR_EXTRA_INFO];

// This buffer is used by the lexer for accumulating text strings. Those
// strings are copied from flex's internal variables. lex_buf_ptr points to
// the end of the string and lex_buf_len contains the number of bytes that
// have been copied into lex_buf.
char lex_buf[YR_LEX_BUF_SIZE];
char* lex_buf_ptr;
unsigned short lex_buf_len;

// lex_buf_unescaped_non_ascii is true if lex_buf contains a non-ASCII
// character that appeared in unescaped form in the source file. For example,
// the string "\x01foo" will contain a non-ASCII character, but it was
// escaped in the source file, so lex_buf_unescaped_non_ascii is false.
bool lex_buf_unescaped_non_ascii;

char include_base_dir[MAX_PATH];
void* user_data;
void* incl_clbk_user_data;
Expand Down
13 changes: 11 additions & 2 deletions libyara/include/yara/sizedstr.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,20 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <yara/integers.h>
#include <yara/utils.h>

// SIZED_STRING_FLAGS_NO_CASE indicates that the has been decorated with
// the "nocase" modifier or with the /i modifier in the case of regular
// expressions.
#define SIZED_STRING_FLAGS_NO_CASE 1

// SIZED_STRING_FLAGS_DOT_ALL is used for strings that contain a regular
// expression that had the /s modifier.
#define SIZED_STRING_FLAGS_DOT_ALL 2

// SIZED_STRING_FLAGS_UNESCAPED_NON_ASCII that the string contains characters
// outside the printable range [32,126] which did not appeared in escaped form
// in the source file.
#define SIZED_STRING_FLAGS_UNESCAPED_NON_ASCII 3

#pragma pack(push)
#pragma pack(1)

Expand Down Expand Up @@ -71,8 +82,6 @@ bool ss_endswith(SIZED_STRING* s1, SIZED_STRING* s2);

bool ss_iendswith(SIZED_STRING* s1, SIZED_STRING* s2);

bool ss_is_valid_ascii(SIZED_STRING* s);

SIZED_STRING* ss_dup(SIZED_STRING* s);

SIZED_STRING* ss_new(const char* s);
Expand Down
Loading

0 comments on commit 2893100

Please sign in to comment.