Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

resolve case insensitive regex #1569 #1573

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ def _format_match(value) -> str:
value = re.sub(r"([^\\])\\S", r"\1[^ \t\n\r\f\v]", value)
value = re.sub(r"([^\\])\\w", r"\1[a-zA-Z0-9_]", value)
value = re.sub(r"([^\\])\\W", r"\1[^a-zA-Z0-9_]", value)
value = _unfold_case_insensitive_regex(value)
return '/{}/'.format(value)

@staticmethod
Expand Down Expand Up @@ -304,6 +305,77 @@ def _format_translated_queries(query_array):
return formatted_queries


def _unfold_case_insensitive_regex(regex_pattern):
# this function should be executed after "\s" unfolding

if "(?i)" in regex_pattern:

escaped_left_bracket_symbol = "辶"
delliott90 marked this conversation as resolved.
Show resolved Hide resolved
escaped_right_bracket_symbol = "廴"
escaped_backslash = "彳"

p = regex_pattern
p = p.replace(r"\\", escaped_backslash)
p = p.replace(r"\[", escaped_left_bracket_symbol)
p = p.replace(r"\]", escaped_right_bracket_symbol)

if p.count("[") != p.count("]"):
raise RuntimeError(f"regex /{regex_pattern}/ has odd number of brackets.")
else:
xs = re.split(r"[\[\]]", p)

# ci_index: case insensitive index (first appearance)
ci_index = -1
for i, x in enumerate(xs):
if i % 2 == 0 and "(?i)" in x:
ci_index = i
break

if ci_index > -1:

# xsb: xs inside bracket
xsb = xs[1::2]
xsb[ci_index//2:] = ["[" + _unfold_ci_chars(x, True) + "]" for x in xsb[ci_index//2:]]
xs[1::2] = xsb

# xsob: xs outside bracket
xsob = xs[0::2]
xsob_s_h, *xsob_s_t = xsob[ci_index//2].split("(?i)")
xsob_s_t = [_unfold_ci_chars(x, False) for x in xsob_s_t]
xsob[ci_index//2] = "".join([xsob_s_h] + xsob_s_t)
xsob[ci_index//2+1:] = [_unfold_ci_chars(x.replace("(?i)", ""), False) for x in xsob[ci_index//2+1:]]
xs[0::2] = xsob

p_unfolded = "".join(xs)
p_unfolded = p_unfolded.replace(escaped_right_bracket_symbol, r"\]")
p_unfolded = p_unfolded.replace(escaped_left_bracket_symbol, r"\[")
p_unfolded = p_unfolded.replace(escaped_backslash, r"\\")
return p_unfolded

else:
# fake case insensitive flag in square bracket
return regex_pattern

else:
# no case insensitive flag
return regex_pattern


def _unfold_ci_chars(regex_pattern_segment, if_set):
# if_set: if all chars are in the square bracket of regex
def char_mapper(c):
if if_set:
return c.lower() + c.upper()
else:
return f"[{c.lower()}{c.upper()}]"
xs = list(regex_pattern_segment)
s = "".join([char_mapper(x) if x.isascii() and x.isalpha() else x for x in xs])
if if_set:
# dedup for items inside square bracket
s = "".join(sorted(set(s)))
return s


def translate_pattern(pattern: Pattern, data_model_mapping, options):
# Added size parameter in tranmission module
#result_limit = options['result_limit']
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
from stix_shifter.stix_translation import stix_translation
from stix_shifter_utils.utils.error_response import ErrorCode
from stix_shifter_modules.elastic_ecs.stix_translation.query_constructor import (
_unfold_ci_chars,
_unfold_case_insensitive_regex,
)
import unittest
import datetime
import re
Expand Down Expand Up @@ -36,6 +40,31 @@ def _remove_timestamp_from_query(queries):

class TestStixtoQuery(unittest.TestCase, object):

def test_case_insensitive_unfold_chars(self):
input_output_pairs = [ ("a", False, "[aA]")
, ("ab", False, "[aA][bB]")
, ("ab7#*c((D))", False, "[aA][bB]7#*[cC](([dD]))")
, ("aba", True, "ABab")
, ("ab7#BBeE", True, "#7ABEabe")
]
for (x,y,z) in input_output_pairs:
assert z == _unfold_ci_chars(x, y)


def test_case_insensitive_unfold_regex(self):
iopairs = [ ("http://z[abc]83m li", "http://z[abc]83m li")
, ("(?i)virus", "[vV][iI][rR][uU][sS]")
, ("(?i)virus[ s]", "[vV][iI][rR][uU][sS][ Ss]")
, ("(?i)virus[ s] bin [c3b]", "[vV][iI][rR][uU][sS][ Ss] [bB][iI][nN] [3BCbc]")
, (r"(?i)virus\[ s\]", r"[vV][iI][rR][uU][sS]\[ [sS]\]")
, (r"(?i)virus\\[ s\\]", r"[vV][iI][rR][uU][sS]\\[ Ss\\]")
, ("(?i)http://z83m li", "[hH][tT][tT][pP]://[zZ]83[mM] [lL][iI]")
, ("(?i)http://z[abc]83m li", "[hH][tT][tT][pP]://[zZ][ABCabc]83[mM] [lL][iI]")
, ("http://(?i)z[abc]83m li", "http://[zZ][ABCabc]83[mM] [lL][iI]")
]
for (x,y) in iopairs:
assert y == _unfold_case_insensitive_regex(x)

def test_ipv4_query(self):
stix_pattern = "[ipv4-addr:value = '192.168.122.83' OR ipv4-addr:value = '192.168.122.84']"
translated_query = translation.translate('elastic_ecs', 'query', '{}', stix_pattern)
Expand Down Expand Up @@ -278,6 +307,13 @@ def test_match_operator_escaped(self):
test_query = ['(process.name : /cmd\\.exe/ OR process.parent.name : /cmd\\.exe/)']
_test_query_assertions(translated_query, test_query)

def test_match_operator_case_sensitive(self):
stix_pattern = r"[process:name MATCHES '(?i)virus\\.exe']"
translated_query = translation.translate('elastic_ecs', 'query', '{}', stix_pattern)
translated_query['queries'] = _remove_timestamp_from_query(translated_query['queries'])
test_query = ['(process.name : /[vV][iI][rR][uU][sS]\\.[eE][xX][eE]/ OR process.parent.name : /[vV][iI][rR][uU][sS]\\.[eE][xX][eE]/)']
_test_query_assertions(translated_query, test_query)

def test_match_operator_with_backslash(self):
# STIX uses backslash as escape, so to match a literal . in RE you need double-backslash
stix_pattern = r"[process:name MATCHES '^cmd\\.exe .*']"
Expand Down