- Bugfix: bad stack trace when a test search fails

- Bugfix: add a setup step for `contentctl test`: `wait_for_app_installation`. This step ensures that the required apps are correctly installed AND configured (for splunk ES) - Bugfix: add a 20 secs sleep after starting the container in `contentctl test` to ensure Docker has the time to map the ports correctly before contentctl attempts to connect. This was problematic on Windows with WSL. - Bugfix: Fix path to download apps from SplunkBase - Bugfix: Fix a bug where `contentctl validate` and `contentctl build` needed the splunkbase creds to work, which made no sense. - Fix typo - Feature: Introduction of a logger object in the hope of gradually replacing usage of `print()` statements - Feature: `contentctl test` now creates missing indexes specified in test data as `custom_index` - Change: Send tests data to the HEC as text and not binary - Change: Add a message that makes debugging failed tests easier.
splunk · Nov 14, 2024 · 967a1f1 · 967a1f1
2 parents e330115 + 8207247
commit 967a1f1
Show file tree

Hide file tree

Showing 6 changed files with 90 additions and 27 deletions.
diff --git a/contentctl/actions/detection_testing/infrastructures/DetectionTestingInfrastructure.py b/contentctl/actions/detection_testing/infrastructures/DetectionTestingInfrastructure.py
@@ -1,3 +1,4 @@
+import logging
 import time
 import uuid
 import abc
@@ -17,12 +18,13 @@
 import requests                                                                                     # type: ignore
 import splunklib.client as client                                                                   # type: ignore
 from splunklib.binding import HTTPError                                                             # type: ignore
+from splunklib.client import Service
 from splunklib.results import JSONResultsReader, Message                                            # type: ignore
 import splunklib.results
 from urllib3 import disable_warnings
 import urllib.parse
 
-from contentctl.objects.config import test_common, Infrastructure
+from contentctl.objects.config import test_common, Infrastructure, ENTERPRISE_SECURITY_UID
 from contentctl.objects.enums import PostTestBehavior, AnalyticsType
 from contentctl.objects.detection import Detection
 from contentctl.objects.base_test import BaseTest
@@ -42,6 +44,8 @@
     TestingStates
 )
 
+LOG = Utils.get_logger()
+
 
 class SetupTestGroupResults(BaseModel):
     exception: Union[Exception, None] = None
@@ -107,6 +111,7 @@ class DetectionTestingInfrastructure(BaseModel, abc.ABC):
 
     def __init__(self, **data):
         super().__init__(**data)
+        self._conn: None | Service = None
 
     # TODO: why not use @abstractmethod
     def start(self):
@@ -138,7 +143,8 @@ def setup(self):
         try:
             for func, msg in [
                 (self.start, "Starting"),
-                (self.get_conn, "Waiting for App Installation"),
+                (self.get_conn, "Getting initial connection"),
+                (self.wait_for_app_installation, "Waiting for App Installation"),
                 (self.configure_conf_file_datamodels, "Configuring Datamodels"),
                 (self.create_replay_index, f"Create index '{self.sync_obj.replay_index}'"),
                 (self.get_all_indexes, "Getting all indexes from server"),
@@ -170,14 +176,15 @@ def wait_for_ui_ready(self):
     def configure_hec(self):
         self.hec_channel = str(uuid.uuid4())
         try:
-            res = self.get_conn().input(
-                path="/servicesNS/nobody/splunk_httpinput/data/inputs/http/http:%2F%2FDETECTION_TESTING_HEC"
-            )
-            self.hec_token = str(res.token)
-            return
-        except Exception:
-            # HEC input does not exist.  That's okay, we will create it
+            # Delete old HEC
+            self.get_conn().inputs.delete("DETECTION_TESTING_HEC", kind='http')
+        except (HTTPError, KeyError) as e:
+            # HEC input didn't exist in the first place, everything is good.
             pass
+        except Exception as e:
+            LOG.error("Error when deleting input DETECTION_TESTING_HEC.")
+            LOG.exception(e)
+            raise e
 
         try:
             res = self.get_conn().inputs.create(
@@ -210,6 +217,31 @@ def get_all_indexes(self) -> None:
         except Exception as e:
             raise (Exception(f"Failure getting indexes: {str(e)}"))
 
+    def wait_for_app_installation(self):
+        config_apps = self.global_config.apps
+        installed_config_apps = []
+        while len(installed_config_apps) < len(config_apps):
+            try:
+                # Get apps installed in the Splunk instance
+                splunk_instance_apps = self.get_conn().apps.list()
+
+                # Try to find all the apps we want to be installed (config_apps)
+                installed_config_apps = []
+                for config_app in config_apps:
+                    for splunk_instance_app in splunk_instance_apps:
+                        if config_app.appid == splunk_instance_app.name:
+                            # For Enterprise Security, we need to make sure the app is also configured.
+                            if config_app.uid == ENTERPRISE_SECURITY_UID and splunk_instance_app.content.get('configured') != '1':
+                                continue
+                            installed_config_apps.append(config_app.appid)
+                LOG.debug("Apps in the Splunk instance: " + str(list(map(lambda x: x.name, splunk_instance_apps))))
+                LOG.debug(f"apps in contentctl package found in Splunk instance: {installed_config_apps}")
+                if len(installed_config_apps) >= len(config_apps):
+                    break
+            except Exception as e:
+                LOG.exception(e)
+            time.sleep(5)
+
     def get_conn(self) -> client.Service:
         try:
             if not self._conn:
@@ -218,8 +250,9 @@ def get_conn(self) -> client.Service:
                 # continue trying to re-establish a connection until after
                 # the server has restarted
                 self.connect_to_api()
-        except Exception:
+        except Exception as e:
             # there was some issue getting the connection. Try again just once
+            LOG.exception(e)
             self.connect_to_api()
         return self._conn
 
@@ -295,7 +328,7 @@ def configure_imported_roles(
     ):  
         try:
             # Set which roles should be configured. For Enterprise Security/Integration Testing,
-            # we must add some extra foles.
+            # we must add some extra roles.
             if self.global_config.enable_integration_testing:
                 roles = imported_roles + enterprise_security_roles
             else:
@@ -1100,7 +1133,7 @@ def retry_search_until_timeout(
             threat_object_fields_set = set([o.name for o in detection.tags.observable if "Attacker" in o.role]) # just the "threat objects"
 
             # Ensure the search had at least one result
-            if int(job.content.get("resultCount", "0")) > 0:
+            if int(job.content["resultCount"]) > 0:
                 # Initialize the test result
                 test.result = UnitTestResult()
 
@@ -1203,6 +1236,7 @@ def retry_search_until_timeout(
                     self.infrastructure,
                     TestResultStatus.FAIL,
                     duration=time.time() - search_start_time,
+                    message=f"Search had 0 result. {job.content}"
                 )
                 tick += 1
 
@@ -1247,16 +1281,16 @@ def replay_attack_data_file(
         test_group_start_time: float,
     ):
         # Before attempting to replay the file, ensure that the index we want
-        # to replay into actuall exists. If not, we should throw a detailed
-        # exception that can easily be interpreted by the user.
+        # to replay into actually exists. If not, we create the index.
         if attack_data_file.custom_index is not None and \
             attack_data_file.custom_index not in self.all_indexes_on_server:
-            raise ReplayIndexDoesNotExistOnServer(
-                f"Unable to replay data file {attack_data_file.data} "
-                f"into index '{attack_data_file.custom_index}'. "
-                "The index does not exist on the Splunk Server. "
-                f"The only valid indexes on the server are {self.all_indexes_on_server}"
-            )
+            index = self.get_conn().indexes.create(name=attack_data_file.custom_index)
+            LOG.info(f"Created Index {attack_data_file.custom_index}: {index}")
+            LOG.info("Re-retup of the HEC and roles and indexes...")
+            self.get_all_indexes()
+            self.configure_imported_roles()
+            self.configure_delete_indexes()
+            self.configure_hec()
 
         tempfile = mktemp(dir=tmp_dir)
         if not (str(attack_data_file.data).startswith("http://") or 
@@ -1358,7 +1392,7 @@ def hec_raw_replay(
         url_with_hec_path = urllib.parse.urljoin(
             url_with_port, "services/collector/raw"
         )
-        with open(tempfile, "rb") as datafile:
+        with open(tempfile, "r") as datafile:
             try:
                 res = requests.post(
                     url_with_hec_path,

diff --git a/...tctl/actions/detection_testing/infrastructures/DetectionTestingInfrastructureContainer.py b/...tctl/actions/detection_testing/infrastructures/DetectionTestingInfrastructureContainer.py
@@ -1,3 +1,5 @@
+import time
+
 from contentctl.actions.detection_testing.infrastructures.DetectionTestingInfrastructure import (
     DetectionTestingInfrastructure,
 )
@@ -25,6 +27,10 @@ def start(self):
 
         self.container = self.make_container()
         self.container.start()
+        # There might be a small delay between the starting of the container and the binding of the ports for splunk.
+        # To avoid a "connection refused" error, wait a little bit before finishing the method call.
+        # This won't change setup time, because the container is already started.
+        time.sleep(20)
 
     def finish(self):
         if self.container is not None:

diff --git a/contentctl/contentctl.py b/contentctl/contentctl.py
@@ -5,6 +5,7 @@
 import tyro
 
 from contentctl.actions.initialize import Initialize
+from contentctl.helper.utils import Utils
 from contentctl.objects.config import init, validate, build,  new, deploy_acs, test, test_servers, inspect, report, test_common, release_notes
 from contentctl.actions.validate import Validate
 from contentctl.actions.new_content import NewContent
@@ -51,8 +52,7 @@
 #     """
 #     )
 
-
-
+LOG = Utils.get_logger()
 
 def init_func(config:test):    
     Initialize().execute(config)
@@ -153,7 +153,8 @@ def main():
             config_obj = YmlReader().load_file(configFile)
             t = test.model_validate(config_obj)
     except Exception as e:
-        print(f"Error validating 'contentctl.yml':\n{str(e)}")
+        LOG.error(f"Error validating 'contentctl.yml':\n{str(e)}")
+        LOG.exception(e)
         sys.exit(1)
 
 

diff --git a/contentctl/helper/utils.py b/contentctl/helper/utils.py
@@ -1,3 +1,4 @@
+import logging
 import os
 import git
 import shutil
@@ -19,6 +20,13 @@
 
 TOTAL_BYTES = 0
 ALWAYS_PULL = True
+LOG = logging.getLogger("main")
+LOG.setLevel(logging.INFO)
+handler = logging.StreamHandler()
+handler.setLevel(logging.INFO)
+formatter = logging.Formatter('%(asctime)s - [%(levelname)-8s] %(message)s')
+handler.setFormatter(formatter)
+LOG.addHandler(handler)
 
 
 class Utils:
@@ -485,3 +493,7 @@ def getPercent(numerator: float, denominator: float, decimal_places: int) -> str
         ratio = numerator / denominator
         percent = ratio * 100
         return Utils.getFixedWidth(percent, decimal_places) + "%"
+
+    @staticmethod
+    def get_logger() -> logging.Logger:
+        return LOG
diff --git a/contentctl/objects/config.py b/contentctl/objects/config.py
@@ -30,7 +30,8 @@
 ENTERPRISE_SECURITY_UID = 263
 COMMON_INFORMATION_MODEL_UID = 1621
 
-SPLUNKBASE_URL = "https://splunkbase.splunk.com/app/{uid}/release/{version}/download"
+SPLUNKBASE_BASE_URL = "https://splunkbase.splunk.com"
+SPLUNKBASE_URL = SPLUNKBASE_BASE_URL + "/app/{uid}/release/{version}/download"
 
 
 # TODO (#266): disable the use_enum_values configuration
@@ -93,6 +94,10 @@ def getApp(self, config:test,stage_file:bool=False)->str:
             destination = config.getLocalAppDir() / server_path.name
             if stage_file:
                 Utils.download_file_from_http(file_url_string, str(destination))
+        # Needed for `contentctl validate` and `contentctl build` else it fails without the splunkbase creds,
+        # which shouldn't be mandatory for validation or building the app.
+        elif self.version is not None and self.uid is not None:
+            destination = self.getSplunkbasePath()
         else:
             raise Exception(f"Unknown path for app '{self.title}'")
 
@@ -886,7 +891,7 @@ def getContainerEnvironmentString(self,stage_file:bool=False, include_custom_app
 
         container_paths = []
         for path in paths:
-            if path.startswith(SPLUNKBASE_URL):
+            if path.startswith(SPLUNKBASE_BASE_URL):
                 container_paths.append(path)
             else:
                 container_paths.append((self.getContainerAppDir()/pathlib.Path(path).name).as_posix())

diff --git a/contentctl/objects/unit_test_result.py b/contentctl/objects/unit_test_result.py
@@ -1,6 +1,6 @@
 from __future__ import annotations
 
-from typing import Union,TYPE_CHECKING
+from typing import Union, TYPE_CHECKING, Optional
 from splunklib.data import Record
 from contentctl.objects.base_test_result import BaseTestResult, TestResultStatus
 
@@ -23,6 +23,7 @@ def set_job_content(
         status: TestResultStatus,
         exception: Union[Exception, None] = None,
         duration: float = 0,
+        message: Optional[str] = None
     ) -> bool:
         """
         Sets various fields in the result, pulling some fields from the provided search job's
@@ -75,4 +76,8 @@ def set_job_content(
                 self.message = f"ERROR with no more specific message available."
             self.sid_link = NO_SID
 
+        if message:
+            # Override the message
+            self.message = message
+
         return self.success