Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Utils create a encoder #34

Merged
merged 11 commits into from
Jul 17, 2024
27 changes: 27 additions & 0 deletions tests/test_coding.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
class TestDictionaryEncoder(unittest.TestCase):
"""
A class to test the DictionaryEncoder class.

Methods
-------
test_encode() -> None:
Tests the encode method.
"""

def test_encode(self) -> None:
"""
Tests the encode method.
"""
test_cases: dict[str, str] = {
"if (([ebp_1 + 0x14].b == 0 || [ebp_1 + 0x14].b != 0 && [ebp_1 + 0x14].c != 0) || ![ebp_1 + 0x14] == 0) then 387 @ 0x8040da8 else 388 @ 0x8040d8b": "( ( A | ! A & B ) | ! C )",
"if ([ebp_1 + 0x14].b == 0 || [ebp_1 + 0x14].b != 0) then 387 @ 0x8040da8 else 388 @ 0x8040d8b": "( A | ! A )",
"while (x == 3 && y >= 2):": "( A & B )",
"if ([ebp_1 + 0x14].b == 0 || [ebp_1 + 0x14].b != 0 && [ebp_1 + 0x14].c != 0) || (![ebp_1 + 0x14] == 0) then 387 @ 0x8040da8 else 388 @ 0x8040d8b": "( A | ! A & B ) | ( ! C )",
}
for test in test_cases:
encoder: DictionaryEncoder = DictionaryEncoder()
encoded_str: str = encoder.encode(test)
answer = test_cases.get(test)
self.assertEqual(
encoded_str, answer, "Two values are not equal to each other..."
)
155 changes: 155 additions & 0 deletions utils/coding.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
import string
from dataclasses import dataclass
import re
from typing import Generator
import unittest
import itertools


@dataclass
class Boolean:
"""
A class to represent a boolean expression with raw and encoded forms.

Attributes
----------
raw : str
The raw boolean expression.
encoded : str
The encoded boolean expression.
"""

raw: str
encoded: str


class NameGenerator:
"""
A class to generate unique names for boolean conditions.

Attributes
----------
generated_dictionary_keys : dict
A dictionary to store generated names for conditions.
last_state : list
A list to store the last state of generated names.

Methods
-------
generate_name(conditional: str) -> str:
Generates a unique name for a given condition.
generate_unique_uppercase_string() -> any:
Generates a unique uppercase string.
"""

def __init__(self) -> None:
"""
Constructs all the necessary attributes for the NameGenerator object.
"""
self.generated_dictionary_keys: dict[str, str] = {}
self.prev_state = []

def generate_name(self, conditional: str) -> str:
"""
Generates a unique name for a given condition.

Parameters
----------
conditional : str
The condition to generate a name for.

Returns
-------
str
The generated name.
"""
replaced_conditional = re.sub("!=", "==", conditional)
if self.generated_dictionary_keys.get(replaced_conditional) == None:
gen_key: str = next(self.generate_unique_uppercase_string())
self.generated_dictionary_keys[replaced_conditional] = gen_key
return gen_key
else:
if conditional != replaced_conditional:
return "! " + self.generated_dictionary_keys.get(replaced_conditional)
else:
return self.generated_dictionary_keys.get(replaced_conditional)

def generate_unique_uppercase_string(self) -> Generator[str, None, None]:
"""
Generates a unique uppercase string.
}

Yields
------
str
A unique uppercase string.
"""
for length in itertools.count(1):
for s in itertools.product(string.ascii_uppercase, repeat=length):
if "".join(s) not in self.prev_state:
self.prev_state.append("".join(s))
yield "".join(s)


class DictionaryEncoder:
"""
A class to encode boolean expressions using generated names.

Attributes
----------
name_generator : NameGenerator
An instance of the NameGenerator class.

Methods
-------
encode(mlil_if_string: str) -> str:
Encodes a given MLIL if-string.
"""

def __init__(self) -> None:
"""
Constructs all the necessary attributes for the DictionaryEncoder object.
"""
self.name_generator = NameGenerator()

def encode(self, mlil_if_string: str) -> str:
"""
Encodes a given MLIL if-string.

Parameters
----------
mlil_if_string : str
The MLIL if-string to encode.

Returns
-------
str
The encoded string.
"""

mlil_if_string.index

first_index: int = mlil_if_string.index("(")
last_index: int = len(mlil_if_string) - mlil_if_string[::-1].index(")")

condition: str = mlil_if_string[first_index:last_index]

LOGICAL_OPERATORS: re.Pattern = r"(\|\||&&|!(?!\=)|\(|\))"
split_conditions: list[str] = re.split(LOGICAL_OPERATORS, condition)
split_conditions = [cond.strip() for cond in split_conditions if cond.strip()]

encoded_parts: list[str] = []

for cond in split_conditions:
if cond in {"||", "&&", "!", "(", ")"}:
if cond == "||":
encoded_parts.append("|")
elif cond == "&&":
encoded_parts.append("&")
else:
encoded_parts.append(cond)
else:
code: str = self.name_generator.generate_name(cond)
encoded_parts.append(code)

return " ".join(encoded_parts)