Fix grokker rules that have fields with common prefixes (#571)

* Add test and bugfix for fields with common prefix --------- Co-authored-by: Jörg Zimmermann <101292599+ekneg54@users.noreply.github.com>
fkie-cad · Apr 24, 2024 · 3376751 · 3376751
1 parent 509397f
commit 3376751
Show file tree

Hide file tree

Showing 3 changed files with 29 additions and 9 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -10,15 +10,16 @@
 
 ### Bugfix
 
-  * fixes bug where missing key in credentials file leads to AttributeError
+* fixes a bug in grokker rules, where common field prefixes wasn't possible
+* fixes bug where missing key in credentials file leads to AttributeError
 
 ## 11.1.0
 
 ### Features
 
-  * new documentation part with security best practices which compiles to `user_manual/security/best_practices.html`
-    * also comes with excel export functionality of given best practices  
-  * add basic auth to http_input
+* new documentation part with security best practices which compiles to `user_manual/security/best_practices.html`
+  * also comes with excel export functionality of given best practices
+* add basic auth to http_input
 
 ### Bugfix
 

diff --git a/logprep/processor/grokker/rule.py b/logprep/processor/grokker/rule.py
@@ -68,10 +68,13 @@ def _dotted_field_to_logstash_converter(mapping: dict) -> dict:
     def _transform(pattern):  # nosemgrep
         fields = re.findall(FIELD_PATTERN, pattern)
         for dotted_field, _ in fields:
-            splitted_field = dotted_field.split(".")
-            if len(splitted_field) > 1:
-                replacement = "".join(f"[{element}]" for element in splitted_field)
-                pattern = re.sub(re.escape(dotted_field), replacement, pattern)
+            if "." in dotted_field:
+                replacement = "".join(f"[{element}]" for element in dotted_field.split("."))
+                # ensure full field is replaced by scanning for ':' at the front and '}' or ':'
+                # at the end in the pattern. Also add them again in the replacement string.
+                pattern = re.sub(
+                    f":{re.escape(dotted_field)}([}}:])", f":{replacement}\\1", pattern
+                )
         return pattern
 
     def _replace_pattern(pattern):
@@ -116,7 +119,7 @@ class Config(DissectorRule.Config):
         pattern.
         It is possible to use `oniguruma` regex pattern with or without grok patterns in the
         patterns part. When defining an `oniguruma` there is a limitation of three nested
-        parentheses inside the pattern. Applying more nested parentheses is not possible.  
+        parentheses inside the pattern. Applying more nested parentheses is not possible.
         Logstashs ecs conform grok patterns are used to resolve the here used grok patterns.
         When writing patterns it is advised to be careful as the underlying regex can become complex
         fast. If the execution and the resolving of the pattern takes more than one second a

diff --git a/tests/unit/processor/grokker/test_grokker.py b/tests/unit/processor/grokker/test_grokker.py
@@ -310,6 +310,22 @@
             "port": 1234,
         },
     ),
+    (
+        "Subfield with common prefix",
+        {
+            "filter": "message",
+            "grokker": {
+                "mapping": {
+                    "message": "Facility %{USER:facility.location} %{USER:facility.location_level}"
+                }
+            },
+        },
+        {"message": "Facility spain primary"},
+        {
+            "message": "Facility spain primary",
+            "facility": {"location": "spain", "location_level": "primary"},
+        },
+    ),
 ]
 
 failure_test_cases = [