diff --git a/cloudgrep/__main__.py b/cloudgrep/__main__.py index bd219a1..8db34df 100644 --- a/cloudgrep/__main__.py +++ b/cloudgrep/__main__.py @@ -25,7 +25,7 @@ def main() -> None: "--query", type=list_of_strings, help="Text to search for. Will be parsed as a Regex. E.g. example.com", - required=False + required=False, ) parser.add_argument( "-v", diff --git a/cloudgrep/cloud.py b/cloudgrep/cloud.py index 1a807df..7c74780 100644 --- a/cloudgrep/cloud.py +++ b/cloudgrep/cloud.py @@ -85,7 +85,15 @@ def download_file(key: str) -> None: blob_data = blob_client.download_blob() blob_data.readinto(my_blob) matched = Search().search_file( - tmp.name, key, query, hide_filenames, yara_rules, log_format, log_properties, json_output + tmp.name, + key, + query, + hide_filenames, + yara_rules, + log_format, + log_properties, + json_output, + account_name, ) if matched: nonlocal matched_count @@ -226,7 +234,9 @@ def get_azure_objects( ) container_client = blob_service_client.get_container_client(container_name) blobs = container_client.list_blobs(name_starts_with=prefix) + for blob in blobs: + if self.filter_object_azure( blob, key_contains, diff --git a/cloudgrep/cloudgrep.py b/cloudgrep/cloudgrep.py index 93a332c..4dc71b2 100644 --- a/cloudgrep/cloudgrep.py +++ b/cloudgrep/cloudgrep.py @@ -13,7 +13,7 @@ class CloudGrep: def load_queries(self, file: str) -> List[str]: """Load in a list of queries from a file""" with open(file, "r") as f: - return ([line.strip() for line in f.readlines() if len(line.strip())]) + return [line.strip() for line in f.readlines() if len(line.strip())] def search( self, @@ -47,6 +47,9 @@ def search( case "cloudtrail": log_format = "json" log_properties = ["Records"] + case "azure": + log_format = "json" + log_properties = ["data"] case _: logging.error( f"Invalid log_type value ('{log_type}') unhandled in switch statement in 'search' function." @@ -98,6 +101,7 @@ def search( ) ) print(f"Searching {len(matching_keys)} files in {account_name}/{container_name} for {query}...") + Cloud().download_from_azure( account_name, container_name, diff --git a/cloudgrep/search.py b/cloudgrep/search.py index 8d7149a..014df17 100644 --- a/cloudgrep/search.py +++ b/cloudgrep/search.py @@ -27,8 +27,10 @@ def print_match(self, matched_line_dict: dict, hide_filenames: bool, json_output if hide_filenames: matched_line_dict.pop("key_name") try: + print(json.dumps(matched_line_dict)) except TypeError: + print(str(matched_line_dict)) else: line = "" @@ -40,6 +42,7 @@ def print_match(self, matched_line_dict: dict, hide_filenames: bool, json_output if not hide_filenames: print(f"{matched_line_dict['key_name']}: {line}") else: + print(line) def search_logs( @@ -73,6 +76,7 @@ def search_logs( # Step into property/properties to get to final list of lines for per-line searching. if log_properties != None: + for log_property in log_properties: if line_parsed: line_parsed = line_parsed.get(log_property, None) @@ -84,6 +88,7 @@ def search_logs( # Perform per-line searching. for record in line_parsed: if re.search(search, json.dumps(record)): + matched_line_dict = {"key_name": key_name, "query": search, "line": record} self.print_match(matched_line_dict, hide_filenames, json_output) @@ -101,9 +106,13 @@ def search_line( matched = False for cur_search in search: if re.search(cur_search, line): + if log_format != None: - self.search_logs(line, key_name, cur_search, hide_filenames, log_format, log_properties, json_output) + self.search_logs( + line, key_name, cur_search, hide_filenames, log_format, log_properties, json_output + ) else: + matched_line_dict = {"key_name": key_name, "query": cur_search, "line": line} self.print_match(matched_line_dict, hide_filenames, json_output) matched = True @@ -129,21 +138,37 @@ def search_file( log_format: Optional[str] = None, log_properties: List[str] = [], json_output: Optional[bool] = False, + account_name: Optional[str] = None, ) -> bool: """Regex search of the file line by line""" matched = False - logging.info(f"Searching {file_name} for {search}") + logging.info(f"Searching {file_name} for {search}") if yara_rules: matched = self.yara_scan_file(file_name, key_name, hide_filenames, yara_rules, json_output) else: if key_name.endswith(".gz"): with gzip.open(file_name, "rt") as f: - for line in f: - if self.search_line( - key_name, search, hide_filenames, line, log_format, log_properties, json_output - ): - matched = True + if account_name: + try: + # Try to load the file as JSON + json_data = json.load(f) + for i in range(len(json_data)): + data = json_data[i] + line = json.dumps(data) + if self.search_line( + key_name, search, hide_filenames, line, log_format, log_properties, json_output + ): + matched = True + except json.JSONDecodeError: + logging.info(f"File {file_name} is not JSON") + else: + for line in f: + + if self.search_line( + key_name, search, hide_filenames, line, log_format, log_properties, json_output + ): + matched = True elif key_name.endswith(".zip"): with tempfile.TemporaryDirectory() as tempdir: with zipfile.ZipFile(file_name, "r") as zf: @@ -153,19 +178,42 @@ def search_file( logging.info(f"Searching in zip {filename}") if os.path.isfile(os.path.join(tempdir, filename)): with open(os.path.join(tempdir, filename)) as f: - for line in f: - if self.search_line( - f"{key_name}/{filename}", - search, - hide_filenames, - line, - log_format, - log_properties, - json_output, - ): - matched = True + if account_name: + if account_name: + try: + json_data = json.load(f) + for i in range(len(json_data)): + data = json_data[i] + line = json.dumps(data) + + if self.search_line( + key_name, + search, + hide_filenames, + line, + log_format, + log_properties, + json_output, + ): + matched = True + except json.JSONDecodeError: + logging.info(f"File {file_name} is not JSON") + else: + for line in f: + if self.search_line( + f"{key_name}/{filename}", + search, + hide_filenames, + line, + log_format, + log_properties, + json_output, + ): + matched = True else: + for line in self.get_all_strings_line(file_name): + if self.search_line( key_name, search, hide_filenames, line, log_format, log_properties, json_output ): diff --git a/requirements.txt b/requirements.txt index 07ea38e..817ce1b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ -botocore==1.24.46 -boto3==1.21.24 -boto3-stubs==1.20.49 +botocore>=1.24.46 +boto3>=1.28.0 +boto3-stubs>=1.20.49 python-dateutil==2.8.1 types-python-dateutil==2.8.13 pytest==7.2.0 @@ -13,4 +13,4 @@ azure-core==1.29.4 azure-identity==1.14.1 google-cloud-storage==2.12.0 setuptools==68.2.2 -yara-python==4.3.1 \ No newline at end of file +yara-python==4.3.1 diff --git a/tests/data/azure.json b/tests/data/azure.json new file mode 100644 index 0000000..5bce29d --- /dev/null +++ b/tests/data/azure.json @@ -0,0 +1,56 @@ +[ + { + "data": { + "authorization": { + "action": "Microsoft.Storage/storageAccounts/listKeys/action", + "scope": "/subscriptions/ji12gbh3jh12b3h12vb3hv123h/resourceGroups/test/providers/Microsoft.Storage/storageAccounts/storagetest" + }, + "caller": "test@email", + "channels": "Operation", + "claims": { + "aud": "https://management.core.windows.net/", + "ver": "1.0", + "xms_cae": "1", + "xms_tcdt": "1231293743" + }, + "correlationId": "b12321j3bhdgscj214j3b12rhv", + "description": "", + "eventDataId": "21371283ghjgfsdb9876123", + "eventName": { + "value": "EndRequest", + "localizedValue": "End request" + }, + "httpRequest": { + "clientRequestId": "9dsfghj1290-381293ghu123gvh123", + "clientIpAddress": "11.11.11.10", + "method": "POST", + "uri": "https://management.azure.com/subscriptions/12937812uhg3uhj2qwgrfbhsdfgb138294e12ugv/resourceGroups/Test/providers/Microsoft.Storage/storageAccounts/storagetest/listKeys?api-version=2022-05-01" + }, + "id": "/subscriptions/12937812uhg3uhj2qwgrfbhsdfgb138294e12ugv/resourceGroups/Test/providers/Microsoft.Storage/storageAccounts/storagetest/events/21371283ghjgfsdb9876123/ticks/2193871283612873612", + "level": "Informational", + "resourceGroupName": "Test", + "resourceProviderName": { + "value": "Microsoft.Storage", + "localizedValue": "Microsoft.Storage" + }, + "resourceId": "/subscriptions/12937812uhg3uhj2qwgrfbhsdfgb138294e12ugv/resourceGroups/Test/providers/Microsoft.Storage/storageAccounts/storagetest", + "operationId": "ac8a903f-315d-421a-8533-84ed10b356cd", + "operationName": { + "value": "Microsoft.Storage/storageAccounts/listKeys/action", + "localizedValue": "List Storage Account Keys" + }, + "status": { + "value": "Succeeded", + "localizedValue": "Succeeded" + }, + "subStatus": { + "value": "OK", + "localizedValue": "OK (HTTP Status Code: 200)" + }, + "tenantId": "12321MN3BNDASVBfD09SFDGSD" + }, + "version": 1, + "eventId": "21371283ghjgfsdb9876123", + "eventType": "AZURE_CLOUD" + } + ] \ No newline at end of file diff --git a/tests/data/azure_singleline.json b/tests/data/azure_singleline.json new file mode 100644 index 0000000..5bce29d --- /dev/null +++ b/tests/data/azure_singleline.json @@ -0,0 +1,56 @@ +[ + { + "data": { + "authorization": { + "action": "Microsoft.Storage/storageAccounts/listKeys/action", + "scope": "/subscriptions/ji12gbh3jh12b3h12vb3hv123h/resourceGroups/test/providers/Microsoft.Storage/storageAccounts/storagetest" + }, + "caller": "test@email", + "channels": "Operation", + "claims": { + "aud": "https://management.core.windows.net/", + "ver": "1.0", + "xms_cae": "1", + "xms_tcdt": "1231293743" + }, + "correlationId": "b12321j3bhdgscj214j3b12rhv", + "description": "", + "eventDataId": "21371283ghjgfsdb9876123", + "eventName": { + "value": "EndRequest", + "localizedValue": "End request" + }, + "httpRequest": { + "clientRequestId": "9dsfghj1290-381293ghu123gvh123", + "clientIpAddress": "11.11.11.10", + "method": "POST", + "uri": "https://management.azure.com/subscriptions/12937812uhg3uhj2qwgrfbhsdfgb138294e12ugv/resourceGroups/Test/providers/Microsoft.Storage/storageAccounts/storagetest/listKeys?api-version=2022-05-01" + }, + "id": "/subscriptions/12937812uhg3uhj2qwgrfbhsdfgb138294e12ugv/resourceGroups/Test/providers/Microsoft.Storage/storageAccounts/storagetest/events/21371283ghjgfsdb9876123/ticks/2193871283612873612", + "level": "Informational", + "resourceGroupName": "Test", + "resourceProviderName": { + "value": "Microsoft.Storage", + "localizedValue": "Microsoft.Storage" + }, + "resourceId": "/subscriptions/12937812uhg3uhj2qwgrfbhsdfgb138294e12ugv/resourceGroups/Test/providers/Microsoft.Storage/storageAccounts/storagetest", + "operationId": "ac8a903f-315d-421a-8533-84ed10b356cd", + "operationName": { + "value": "Microsoft.Storage/storageAccounts/listKeys/action", + "localizedValue": "List Storage Account Keys" + }, + "status": { + "value": "Succeeded", + "localizedValue": "Succeeded" + }, + "subStatus": { + "value": "OK", + "localizedValue": "OK (HTTP Status Code: 200)" + }, + "tenantId": "12321MN3BNDASVBfD09SFDGSD" + }, + "version": 1, + "eventId": "21371283ghjgfsdb9876123", + "eventType": "AZURE_CLOUD" + } + ] \ No newline at end of file diff --git a/tests/data/bad_azure.json b/tests/data/bad_azure.json new file mode 100644 index 0000000..70beb07 --- /dev/null +++ b/tests/data/bad_azure.json @@ -0,0 +1,56 @@ +[ + { + "data": { + "authorization": { + "action": "Microsoft.Storage/storageAccounts/listKeys/action", + "scope": "/subscriptions/ji12gbh3jh12b3h12vb3hv123h/resourceGroups/test/providers/Microsoft.Storage/storageAccounts/storagetest" + }, + "caller": "test@email", + "channels": "Operation", + "claims": { + "aud": "https://management.core.windows.net/", + "ver": "1.0", + "xms_cae": "1", + "xms_tcdt": "1231293743" + }, + "correlationId": "b12321j3bhdgscj214j3b12rhv", + "description": "", + "eventDataId": "21371283ghjgfsdb9876123", + "eventName": { + "value": "EndRequest", + "localizedValue": "End request" + }, + "httpRequest": { + "clientRequestId": "9dsfghj1290-381293ghu123gvh123", + "clientIpAddress": "11.11.11.10", + "method": "POST", + "uri": "https://management.azure.com/subscriptions/12937812uhg3uhj2qwgrfbhsdfgb138294e12ugv/resourceGroups/Test/providers/Microsoft.Storage/storageAccounts/storagetest/listKeys?api-version=2022-05-01" + }, + "id": "/subscriptions/12937812uhg3uhj2qwgrfbhsdfgb138294e12ugv/resourceGroups/Test/providers/Microsoft.Storage/storageAccounts/storagetest/events/21371283ghjgfsdb9876123/ticks/2193871283612873612", + "level": "Informational", + "resourceGroupName": "Test", + "resourceProviderName": { + "value": "Microsoft.Storage", + "localizedValue": "Microsoft.Storage" + }, + "resourceId": "/subscriptions/12937812uhg3uhj2qwgrfbhsdfgb138294e12ugv/resourceGroups/Test/providers/Microsoft.Storage/storageAccounts/storagetest", + "operationId": "ac8a903f-315d-421a-8533-84ed10b356cd", + "operationName": { + "value": "Microsoft.Storage/storageAccounts/listKeys/action", + "localizedValue": "List Storage Account Keys" + }, + "status": { + "value": "Succeeded", + "localizedValue": "Succeeded" + }, + "subStatus": { + "value": "OK", + "localizedValue": "OK (HTTP Status Code: 200)" + }, + "tenantId": "12321MN3BNDASVBfD09SFDGSD" + }, + "version": 1, + "eventId": "21371283ghjgfsdb9876123", + "eventType": "AZURE_CLOUD" + +] \ No newline at end of file diff --git a/tests/test_unit.py b/tests/test_unit.py index ac274ea..32c3e8f 100644 --- a/tests/test_unit.py +++ b/tests/test_unit.py @@ -75,7 +75,7 @@ def test_e2e(self) -> None: assert len(matching_keys) == 3 print(f"Checking we only get one search hit in: {matching_keys}") - hits = Cloud().download_from_s3_multithread(_BUCKET, matching_keys, _QUERY, False, None) # type: ignore + hits = Cloud().download_from_s3_multithread(_BUCKET, matching_keys, _QUERY, False, None) # type: ignore assert hits == 3 print("Testing with multiple queries from a file") @@ -125,7 +125,7 @@ def test_returns_string_with_file_contents(self) -> None: f.write("query1\nquery2\nquery3") queries = CloudGrep().load_queries(file) self.assertIsInstance(queries, List) - self.assertEqual(queries, ["query1", "query2", "query3"] ) + self.assertEqual(queries, ["query1", "query2", "query3"]) # Given a valid file name, key name, and yara rules, the method should successfully match the file against the rules and print only the rule name and matched strings if hide_filenames is True. def test_yara(self) -> None: @@ -201,3 +201,45 @@ def test_search_cloudtrail(self) -> None: # Assert we can parse the output self.assertIn("SignatureVersion", output) self.assertTrue(json.loads(output)) + + +def test_search_azure(self) -> None: # type: ignore + # Arrange + log_format = "json" + log_properties = ["data"] + + # Test it doesnt crash on bad json + Search().search_file( + f"{BASE_PATH}/data/bad_azure.json", + "bad_azure.json", + ["azure.gz"], + False, + None, + log_format, + log_properties, + ) + Search().search_file( + f"{BASE_PATH}/data/azure.json", + "azure.json", + ["azure.gz"], + False, + None, + log_format, + log_properties, + ) + with patch("sys.stdout", new=StringIO()) as fake_out: + Search().search_file( + f"{BASE_PATH}/data/azure_singleline.json", + "azure_singleline.json", + ["azure.gz"], + False, + None, + log_format, + log_properties, + True, + ) + output = fake_out.getvalue().strip() + + # Assert we can parse the output + self.assertIn("SignatureVersion", output) + self.assertTrue(json.loads(output))