diff --git a/api_app/analyzers_manager/file_analyzers/phishing/phishing_form_compiler.py b/api_app/analyzers_manager/file_analyzers/phishing/phishing_form_compiler.py index ee21f845f6..ad2fdf2095 100644 --- a/api_app/analyzers_manager/file_analyzers/phishing/phishing_form_compiler.py +++ b/api_app/analyzers_manager/file_analyzers/phishing/phishing_form_compiler.py @@ -32,6 +32,7 @@ class PhishingFormCompiler(FileAnalyzer): pin_matching: list = [] cvv_matching: list = [] expiration_date_matching: list = [] + user_agent: str = "" def __init__( self, @@ -196,9 +197,13 @@ def perform_request_to_form(self, form) -> Response: params = self.compile_form_field(form) dest_url = self.extract_action_attribute(form) logger.info(f"Job #{self.job_id}: Sending {params=} to submit url {dest_url}") + headers = { + "User-Agent": self.user_agent, + } response = requests.post( url=dest_url, data=params, + headers=headers, proxies=( {"http": self.proxy_address, "https": self.proxy_address} if self.proxy_address diff --git a/api_app/analyzers_manager/migrations/0143_alter_analyzer_config_phishing_extractor_and_form_compiler.py b/api_app/analyzers_manager/migrations/0143_alter_analyzer_config_phishing_extractor_and_form_compiler.py new file mode 100644 index 0000000000..90dd892415 --- /dev/null +++ b/api_app/analyzers_manager/migrations/0143_alter_analyzer_config_phishing_extractor_and_form_compiler.py @@ -0,0 +1,107 @@ +from django.db import migrations + + +def migrate(apps, schema_editor): + Parameter = apps.get_model("api_app", "Parameter") + PluginConfig = apps.get_model("api_app", "PluginConfig") + PythonModule = apps.get_model("api_app", "PythonModule") + pm_extractor = PythonModule.objects.get( + module="phishing.phishing_extractor.PhishingExtractor", + base_path="api_app.analyzers_manager.observable_analyzers", + ) + pm_form_compiler = PythonModule.objects.get( + module="phishing.phishing_form_compiler.PhishingFormCompiler", + base_path="api_app.analyzers_manager.file_analyzers", + ) + p_extractor = Parameter.objects.create( + name="user_agent", + type="str", + description="Custom user agent for the Phishing Extractor Selenium browser.", + is_secret=False, + required=False, + python_module=pm_extractor, + ) + p_form_compiler = Parameter.objects.create( + name="user_agent", + type="str", + description="Custom user agent for the compilation of form.", + is_secret=False, + required=False, + python_module=pm_form_compiler, + ) + for config in pm_extractor.analyzerconfigs.all(): + PluginConfig.objects.create( + parameter=p_extractor, + analyzer_config=config, + value="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.3", + owner=None, + for_organization=False, + ) + + for config in pm_form_compiler.analyzerconfigs.all(): + PluginConfig.objects.create( + parameter=p_form_compiler, + analyzer_config=config, + value="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.3", + owner=None, + for_organization=False, + ) + + +def reverse_migrate(apps, schema_editor): + Parameter = apps.get_model("api_app", "Parameter") + PluginConfig = apps.get_model("api_app", "PluginConfig") + PythonModule = apps.get_model("api_app", "PythonModule") + pm_extractor = PythonModule.objects.get( + module="phishing.phishing_extractor.PhishingExtractor", + base_path="api_app.analyzers_manager.observable_analyzers", + ) + pm_form_compiler = PythonModule.objects.get( + module="phishing.phishing_form_compiler.PhishingFormCompiler", + base_path="api_app.analyzers_manager.file_analyzers", + ) + + p_extractor = Parameter.objects.get( + name="user_agent", + type="str", + description="Custom user agent for the Phishing Extractor Selenium browser.", + is_secret=False, + required=False, + python_module=pm_extractor, + ) + p_form_compiler = Parameter.objects.get( + name="user_agent", + type="str", + description="Custom user agent for the compilation of form", + is_secret=False, + required=False, + python_module=pm_form_compiler, + ) + + for config in pm_extractor.analyzerconfigs.all(): + PluginConfig.objects.create( + parameter=p_extractor, + analyzer_config=config, + ) + + for config in pm_form_compiler.analyzerconfigs.all(): + PluginConfig.objects.create( + parameter=p_form_compiler, + analyzer_config=config, + ) + + p_extractor.delete() + p_form_compiler.delete() + + +class Migration(migrations.Migration): + atomic = False + dependencies = [ + ("api_app", "0062_alter_parameter_python_module"), + ( + "analyzers_manager", + "0142_alter_analyzerreport_data_model_content_type_and_more", + ), + ] + + operations = [migrations.RunPython(migrate, reverse_migrate)] diff --git a/api_app/analyzers_manager/observable_analyzers/phishing/phishing_extractor.py b/api_app/analyzers_manager/observable_analyzers/phishing/phishing_extractor.py index ae6a67d310..c3174dc179 100644 --- a/api_app/analyzers_manager/observable_analyzers/phishing/phishing_extractor.py +++ b/api_app/analyzers_manager/observable_analyzers/phishing/phishing_extractor.py @@ -17,6 +17,7 @@ class PhishingExtractor(ObservableAnalyzer, DockerBasedAnalyzer): proxy_address: str = "" window_width: int window_height: int + user_agent: str = "" def __init__( self, @@ -40,6 +41,8 @@ def config(self, runtime_configuration: Dict): self.args.append(f"--window_width={self.window_width}") if self.window_height: self.args.append(f"--window_height={self.window_height}") + if self.user_agent: + self.args.append(f"--user_agent={self.user_agent}") def run(self): req_data: {} = { diff --git a/api_app/playbooks_manager/migrations/0057_alter_phishing_extractor_add_domain.py b/api_app/playbooks_manager/migrations/0057_alter_phishing_extractor_add_domain.py new file mode 100644 index 0000000000..0f70e6f039 --- /dev/null +++ b/api_app/playbooks_manager/migrations/0057_alter_phishing_extractor_add_domain.py @@ -0,0 +1,32 @@ +from django.db import migrations + +from api_app.analyzers_manager.constants import ObservableTypes + + +def migrate(apps, schema_editor): + PlaybookConfig = apps.get_model("playbooks_manager", "PlaybookConfig") + config = PlaybookConfig.objects.get(name="PhishingExtractor") + config.type = [ + ObservableTypes.URL, + ObservableTypes.DOMAIN, + ] + config.full_clean() + config.save() + + +def reverse_migrate(apps, schema_editor): + PlaybookConfig = apps.get_model("playbooks_manager", "PlaybookConfig") + config = PlaybookConfig.objects.get(name="PhishingExtractor") + config.type = [ + ObservableTypes.URL, + ] + config.full_clean() + config.save() + + +class Migration(migrations.Migration): + dependencies = [ + ("playbooks_manager", "0056_download_sample_vt"), + ] + + operations = [migrations.RunPython(migrate, reverse_migrate)] diff --git a/integrations/phishing_analyzers/analyzers/driver_wrapper.py b/integrations/phishing_analyzers/analyzers/driver_wrapper.py index f33b7b217b..fc198232ce 100644 --- a/integrations/phishing_analyzers/analyzers/driver_wrapper.py +++ b/integrations/phishing_analyzers/analyzers/driver_wrapper.py @@ -1,6 +1,7 @@ import functools import logging import os +from random import randint from typing import Iterator from selenium.common import WebDriverException @@ -8,6 +9,7 @@ from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support.wait import WebDriverWait from seleniumwire.request import Request +from seleniumwire.thirdparty.mitmproxy.exceptions import ServerException from seleniumwire.webdriver import ChromeOptions, Remote LOG_NAME = "driver_wrapper" @@ -39,7 +41,7 @@ def handle_exception(self, *args, **kwargs): try: return func(self, *args, **kwargs) except WebDriverException as e: - logger.error( + logger.exception( f"Error while performing {func.__name__}" f"{' for url=' + url if func.__name__ == 'navigate' else ''}: {e}" ) @@ -56,14 +58,52 @@ def __init__( proxy_address: str = "", window_width: int = 1920, window_height: int = 1080, + user_agent: str = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.3", ): self.proxy: str = proxy_address self.window_width: int = window_width self.window_height: int = window_height + self.user_agent: str = user_agent self.last_url: str = "" - self._driver: Remote = self._init_driver(self.window_width, self.window_height) + self.base_port = 17000 + self.port_pool_size = 100 + self._driver: Remote = self._init_driver( + self.window_width, self.window_height, self.user_agent + ) - def _init_driver(self, window_width: int, window_height: int) -> Remote: + def _pick_free_port_from_pool( + self, sw_options: {}, options: ChromeOptions + ) -> Remote: + tries: int = 0 + while tries < self.port_pool_size: + picked_port = randint(self.base_port, self.base_port + self.port_pool_size) + sw_options.update({"port": picked_port}) + + # traffic must go back to host running selenium-wire + options.add_argument( + f"--proxy-server=http://phishing_analyzers:{picked_port}" + ) + try: + driver = Remote( + command_executor="http://selenium-hub:4444/wd/hub", + options=options, + seleniumwire_options=sw_options, + ) + except ServerException: + logger.info( + f"Failed to create driver with {picked_port=}. Trying with another one..." + ) + tries += 1 + else: + logger.info(f"Found free port {picked_port}. Creating driver...") + return driver + raise RuntimeError( + "Failed to retrieve a free port for MitM proxy! Try restarting the job" + ) + + def _init_driver( + self, window_width: int, window_height: int, user_agent: str + ) -> Remote: logger.info(f"Adding proxy with option: {self.proxy}") logger.info("Creating Chrome driver...") sw_options: {} = { @@ -71,8 +111,8 @@ def _init_driver(self, window_width: int, window_height: int) -> Remote: "enable_har": True, # https://github.com/wkeeling/selenium-wire/issues/220#issuecomment-794308386 # config to have local seleniumwire proxy compatible with another proxy - "addr": "0.0.0.0", # nosec B104 # where selenium-wire proxy will run - "port": 7007, + "addr": "0.0.0.0", # where selenium-wire proxy will run + "port": 0, } if self.proxy: sw_options["proxy"] = {"http": self.proxy, "https": self.proxy} @@ -85,23 +125,22 @@ def _init_driver(self, window_width: int, window_height: int) -> Remote: options.add_argument("--headless=new") options.add_argument("--ignore-certificate-errors") options.add_argument(f"--window-size={window_width},{window_height}") - # traffic must go back to host running selenium-wire - options.add_argument("--proxy-server=http://phishing_analyzers:7007") - driver = Remote( - command_executor="http://selenium-hub:4444/wd/hub", - options=options, - seleniumwire_options=sw_options, - ) - return driver + options.add_argument(f"--user-agent={user_agent}") + + return self._pick_free_port_from_pool(sw_options, options) def restart(self, motivation: str = "", timeout_wait_page: int = 0): - logger.info(f"Restarting driver: {motivation=}") + logger.info(f"{self._driver.session_id}: Restarting driver: {motivation=}") self._driver.quit() self._driver = self._init_driver( - window_width=self.window_width, window_height=self.window_height + window_width=self.window_width, + window_height=self.window_height, + user_agent=self.user_agent, ) if self.last_url: - logger.info(f"Navigating to {self.last_url} after driver has restarted") + logger.info( + f"{self._driver.session_id}: Navigating to {self.last_url} after driver has restarted" + ) self.navigate(self.last_url, timeout_wait_page=timeout_wait_page) @driver_exception_handler @@ -111,7 +150,7 @@ def navigate(self, url: str = "", timeout_wait_page: int = 0): return self.last_url = url - logger.info(f"Navigating to {url=}") + logger.info(f"{self._driver.session_id}: Navigating to {url=}") self._driver.get(url) # dinamically wait for page to load its content with a fallback # of `timeout_wait_page` seconds. @@ -123,17 +162,21 @@ def navigate(self, url: str = "", timeout_wait_page: int = 0): @driver_exception_handler def get_page_source(self) -> str: - logger.info(f"Extracting page source for url {self.last_url}") + logger.info( + f"{self._driver.session_id}: Extracting page source for url {self.last_url}" + ) return self._driver.page_source @driver_exception_handler def get_current_url(self) -> str: - logger.info("Extracting current URL of page") + logger.info(f"{self._driver.session_id}: Extracting current URL of page") return self._driver.current_url @driver_exception_handler def get_base64_screenshot(self) -> str: - logger.info(f"Extracting screenshot of page as base64 for url {self.last_url}") + logger.info( + f"{self._driver.session_id}: Extracting screenshot of page as base64 for url {self.last_url}" + ) return self._driver.get_screenshot_as_base64() def iter_requests(self) -> Iterator[Request]: @@ -142,5 +185,10 @@ def iter_requests(self) -> Iterator[Request]: def get_har(self) -> str: return self._driver.har + def close(self): + logger.info(f"{self._driver.session_id}: Closing") + self._driver.close() + def quit(self): + logger.info(f"{self._driver.session_id}: Quitting") self._driver.quit() diff --git a/integrations/phishing_analyzers/analyzers/extract_phishing_site.py b/integrations/phishing_analyzers/analyzers/extract_phishing_site.py index 060323def6..609f777950 100644 --- a/integrations/phishing_analyzers/analyzers/extract_phishing_site.py +++ b/integrations/phishing_analyzers/analyzers/extract_phishing_site.py @@ -51,19 +51,29 @@ def analyze_target( proxy_address: str, window_width: int = 1920, window_height: int = 1080, + user_agent: str = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.3", ): - driver_wrapper = DriverWrapper( - proxy_address=proxy_address, - window_width=window_width, - window_height=window_height, - ) - driver_wrapper.navigate(url=target_url, timeout_wait_page=5) - - result: str = json.dumps(extract_driver_result(driver_wrapper), default=str) - logger.debug(f"JSON dump of driver {result=}") - print(result) + driver_wrapper = None + try: + driver_wrapper = DriverWrapper( + proxy_address=proxy_address, + window_width=window_width, + window_height=window_height, + user_agent=user_agent, + ) + driver_wrapper.navigate(url=target_url, timeout_wait_page=5) - driver_wrapper.quit() + result: str = json.dumps(extract_driver_result(driver_wrapper), default=str) + logger.debug(f"JSON dump of driver {result=}") + print(result) + except Exception as e: + logger.exception( + f"Exception during analysis of target website {target_url}: {e}" + ) + finally: + # if anything goes wrong make sure to free the slot + if driver_wrapper: + driver_wrapper.quit() if __name__ == "__main__": @@ -72,6 +82,7 @@ def analyze_target( parser.add_argument("--proxy_address", type=str, required=False) parser.add_argument("--window_width", type=int, required=False) parser.add_argument("--window_height", type=int, required=False) + parser.add_argument("--user_agent", type=str, required=False) arguments = parser.parse_args() logger.info(f"Extracted arguments for {LOG_NAME}: {vars(arguments)}") @@ -80,4 +91,5 @@ def analyze_target( proxy_address=arguments.proxy_address, window_width=arguments.window_width, window_height=arguments.window_height, + user_agent=arguments.user_agent, ) diff --git a/integrations/phishing_analyzers/compose.yml b/integrations/phishing_analyzers/compose.yml index 4efcfdb9e8..d6245ff4b9 100644 --- a/integrations/phishing_analyzers/compose.yml +++ b/integrations/phishing_analyzers/compose.yml @@ -7,7 +7,7 @@ services: restart: unless-stopped expose: - "4005" - - "7007" # selenium-wire proxy + - "17000-17100" # selenium-wire proxies pool env_file: - env_file_integrations volumes: @@ -27,13 +27,21 @@ services: - SE_EVENT_BUS_HOST=selenium-hub - SE_EVENT_BUS_PUBLISH_PORT=4442 - SE_EVENT_BUS_SUBSCRIBE_PORT=4443 + # to allow multiple session to run + - SE_NODE_OVERRIDE_MAX_SESSIONS=true + - SE_NODE_MAX_SESSIONS=4 + # clean session for long running containers + # https://github.com/SeleniumHQ/docker-selenium/blob/trunk/README.md#automatic-browser-leftovers-cleanup + - SE_ENABLE_BROWSER_LEFTOVERS_CLEANUP=true + - SE_BROWSER_LEFTOVERS_INTERVAL_SECS=86400 + - SE_BROWSER_LEFTOVERS_PROCESSES_SECS=86400 selenium-hub: image: selenium/hub:4.26.0 container_name: selenium-hub environment: - SE_ENABLE_TRACING=false - ports: - - "4442:4442" - - "4443:4443" - - "4444:4444" \ No newline at end of file + expose: + - "4442" + - "4443" + - "4444" \ No newline at end of file