diff --git a/analyzers/MISPWarningLists/MISPWarningLists.json b/analyzers/MISPWarningLists/MISPWarningLists.json index 211d59534..89a7f936a 100644 --- a/analyzers/MISPWarningLists/MISPWarningLists.json +++ b/analyzers/MISPWarningLists/MISPWarningLists.json @@ -3,7 +3,7 @@ "author": "Nils Kuhnert, CERT-Bund", "license": "AGPL-V3", "url": "https://github.com/BSI-CERT-Bund/misp-warninglists-analyzer", - "version": "1.0", + "version": "2.0", "description": "Check IoCs/Observables against MISP Warninglists to filter false positives.", "dataTypeList": ["ip", "hash", "domain", "fqdn", "url"], "baseConfig": "MISPWarningLists", @@ -14,7 +14,14 @@ "description": "path to Warninglists folder", "type": "string", "multi": false, - "required": true + "required": false + }, + { + "name": "conn", + "description": "sqlalchemy connection string", + "multi": false, + "required": false, + "type": "string" } ] } diff --git a/analyzers/MISPWarningLists/mispwarninglists.py b/analyzers/MISPWarningLists/mispwarninglists.py index 911b9a424..1ac25178d 100755 --- a/analyzers/MISPWarningLists/mispwarninglists.py +++ b/analyzers/MISPWarningLists/mispwarninglists.py @@ -9,6 +9,14 @@ from glob import glob from os.path import exists +try: + import sqlalchemy as db + from tld import get_tld + + USE_DB = True +except ImportError: + USE_DB = False + class MISPWarninglistsAnalyzer(Analyzer): """ @@ -22,92 +30,171 @@ class MISPWarninglistsAnalyzer(Analyzer): } ``` """ + def __init__(self): Analyzer.__init__(self) self.data = self.get_data() - self.path = self.get_param('config.path', 'misp-warninglists') - if not exists(self.path): - self.error('Path to misp-warninglists does not exist.') - self.warninglists = self.readwarninglists() + self.path = self.get_param("config.path", "misp-warninglists") + conn = self.get_param("config.conn", None) + self.warninglists = self.readwarninglists() if not USE_DB else None + self.engine = db.create_engine(conn) if conn and USE_DB else None + if not exists(self.path) and not self.engine: + self.error("wrong configuration settings.") def readwarninglists(self): - files = glob('{}/lists/*/*.json'.format(self.path)) + files = glob("{}/lists/*/*.json".format(self.path)) listcontent = [] for file in files: - with io.open(file, 'r') as fh: + with io.open(file, "r") as fh: content = json.loads(fh.read()) - values = Extractor().check_iterable(content.get('list', [])) + values = Extractor().check_iterable(content.get("list", [])) obj = { - "name": content.get('name', 'Unknown'), - "values": [value['data'] for value in values], - "dataTypes": [value['dataType'] for value in values] + "name": content.get("name", "Unknown"), + "values": [value["data"] for value in values], + "dataTypes": [value["dataType"] for value in values], } listcontent.append(obj) return listcontent def lastlocalcommit(self): try: - with io.open('{}/.git/refs/heads/master'.format(self.path), 'r') as fh: - return fh.read().strip('\n') + with io.open("{}/.git/refs/heads/master".format(self.path), "r") as fh: + return fh.read().strip("\n") except Exception as e: - return 'Error: could not get local commit hash ({}).'.format(e) + return "Error: could not get local commit hash ({}).".format(e) @staticmethod def lastremotecommit(): - url = 'https://api.github.com/repos/misp/misp-warninglists/branches/master' + url = "https://api.github.com/repos/misp/misp-warninglists/branches/master" try: result_dict = requests.get(url).json() - return result_dict['commit']['sha'] + return result_dict["commit"]["sha"] except Exception as e: - return 'Error: could not get remote commit hash ({}).'.format(e) + return "Error: could not get remote commit hash ({}).".format(e) def run(self): results = [] data = self.data - if self.data_type == 'ip': + + if self.data_type == "ip": try: data = ipaddress.ip_address(self.data) except ValueError: - return self.error("{} is said to be an IP address but it isn't".format(self.data)) - for list in self.warninglists: - if self.data_type not in list.get('dataTypes'): - continue - - if self.data_type == 'ip': - for net in list.get('values', []): - try: - if data in ipaddress.ip_network(net): - results.append({"name": list.get('name')}) - break - except ValueError: - # Ignoring if net is not a valid IP network since we want to compare ip addresses - pass + return self.error( + "{} is said to be an IP address but it isn't".format(self.data) + ) + + if not self.engine: + for list in self.warninglists: + if self.data_type not in list.get("dataTypes"): + continue + + if self.data_type == "ip": + for net in list.get("values", []): + try: + if data in ipaddress.ip_network(net): + results.append({"name": list.get("name")}) + break + except ValueError: + # Ignoring if net is not a valid IP network since we want to compare ip addresses + pass + else: + if data.lower() in list.get("values", []): + results.append({"name": list.get("name")}) + + self.report( + { + "results": results, + "mode": "json", + "is_uptodate": self.lastlocalcommit() + == self.lastremotecommit(), + } + ) + else: + field = None + if self.data_type == "ip": + sql = ( + "SELECT list_name, list_version, address as value FROM warninglists WHERE address >>= inet '%s'" + % data + ) + elif self.data_type == "hash": + sql = ( + "SELECT list_name, list_version, hash as value FROM warninglists WHERE hash='%s'" + % data + ) else: - if data.lower() in list.get('values', []): - results.append({ - "name": list.get('name') - }) - - self.report({ - "results": results, - "is_uptodate": self.lastlocalcommit() == self.lastremotecommit() - }) + ext = get_tld(data, fix_protocol=True, as_object=True) + subdomain = ext.subdomain if ext.subdomain != "" else None + domain = ext.domain + tld = ext.tld + query = ext.parsed_url[2] if ext.parsed_url[2] != "" else None + + if not domain or not tld: + return self.error( + "{} is not a valid url/domain/fqdn".format(self.data) + ) + + if query: + if subdomain and subdomain != "*": + sql = ( + "SELECT list_name, list_version, concat(subdomain, '.', domain, '.', tld, query) as value FROM warninglists WHERE subdomain = '%s' and domain = '%s' and tld = '%s' and query = '%s'" + % (subdomain, domain, tld, query) + ) + else: + sql = ( + "SELECT list_name, list_version, concat(domain, '.', tld, query) as value FROM warninglists WHERE domain = '%s' and tld = '%s' and query = '%s'" + % (domain, tld, query) + ) + elif not subdomain: + sql = ( + "SELECT list_name, list_version, concat(domain, '.', tld) as value FROM warninglists WHERE subdomain is null and domain = '%s' and tld = '%s'" + % (domain, tld) + ) + elif subdomain == "*": + sql = ( + "SELECT list_name, list_version, concat(subdomain, '.', domain, '.', tld) as value FROM warninglists WHERE subdomain is not null and domain = '%s' and tld = '%s'" + % (domain, tld) + ) + else: + sql = ( + "SELECT list_name, list_version, concat(subdomain, '.', domain, '.', tld) as value FROM warninglists WHERE (subdomain = '%s' or subdomain = '*') and domain = '%s' and tld = '%s'" + % (subdomain, domain, tld) + ) + values = self.engine.execute(sql) + self.engine.dispose() + if values.rowcount > 0: + for row in values: + results.append( + { + key: value + for (key, value) in zip( + ["list_name", "list_version", "value"], row + ) + } + ) + self.report({"results": results, "mode": "db", "is_uptodate": "N/A"}) def summary(self, raw): taxonomies = [] - if len(raw['results']) > 0: - taxonomies.append(self.build_taxonomy('suspicious', 'MISP', 'Warninglists', 'Potential fp')) + if len(raw["results"]) > 0: + taxonomies.append( + self.build_taxonomy( + "suspicious", "MISP", "Warninglists", "Potential fp" + ) + ) else: - taxonomies.append(self.build_taxonomy('info', 'MISP', 'Warninglists', 'No hits')) + taxonomies.append( + self.build_taxonomy("info", "MISP", "Warninglists", "No hits") + ) - if not raw.get('is_uptodate', False): - taxonomies.append(self.build_taxonomy('info', 'MISP', 'Warninglists', 'Outdated')) + if raw.get("mode", None) == "json" and not raw.get("is_uptodate", False): + taxonomies.append( + self.build_taxonomy("info", "MISP", "Warninglists", "Outdated") + ) - return { - "taxonomies": taxonomies - } + return {"taxonomies": taxonomies} -if __name__ == '__main__': +if __name__ == "__main__": MISPWarninglistsAnalyzer().run() diff --git a/analyzers/MISPWarningLists/requirements.txt b/analyzers/MISPWarningLists/requirements.txt index e8d34eb7f..10975ab77 100644 --- a/analyzers/MISPWarningLists/requirements.txt +++ b/analyzers/MISPWarningLists/requirements.txt @@ -1,3 +1,6 @@ cortexutils requests ipaddress +tld +sqlalchemy +psycopg2-binary diff --git a/analyzers/MISPWarningLists/warninglists_create_db.py b/analyzers/MISPWarningLists/warninglists_create_db.py new file mode 100644 index 000000000..663b19f0b --- /dev/null +++ b/analyzers/MISPWarningLists/warninglists_create_db.py @@ -0,0 +1,201 @@ +#!/usr/bin/env python +# coding: utf-8 + +import re +import json +import logging +import ipaddress +from glob import glob +from tqdm import tqdm +from tld import get_tld + +logging.basicConfig(filename='import.log',level=logging.DEBUG) + + +import psycopg2.extras +from sqlalchemy import Table, Column, Integer, String, MetaData, ForeignKey, Index, create_engine +from sqlalchemy.sql import select +from sqlalchemy.dialects.postgresql import CIDR + +conn_string = "" +warninglists_path = "misp-warninglists/**/list.json" + +engine = create_engine(conn_string, use_batch_mode=True) +conn = engine.connect() + +# UPDATE TLD FROM MOZILLA +from tld.utils import update_tld_names +update_tld_names() + + +# HASH REGEX +md5_re = re.compile(r"^[a-f0-9]{32}(:.+)?$", re.IGNORECASE) +sha1_re = re.compile(r"^[a-f0-9]{40}(:.+)?$", re.IGNORECASE) +sha224_re = re.compile(r"^[a-f0-9]{56}(:.+)?$", re.IGNORECASE) +sha256_re = re.compile(r"^[a-f0-9]{64}(:.+)?$", re.IGNORECASE) +sha512_re = re.compile(r"^[a-f0-9]{128}(:.+)?$", re.IGNORECASE) + + + +items = {} +avoid_list = [] + +file_list = [file for file in glob(warninglists_path, recursive=True) if file.split("/")[-2] not in avoid_list] +for file_item in file_list: + with open(file_item, 'r') as f: + json_data = json.load(f) + file_name = file_item.split("/")[-2] + items[file_name] = {} + items[file_name]['version'] = str(json_data['version']) + items[file_name]['list'] = {x:{} for x in json_data['list']} + +for k, v in items.items(): + logging.debug(f"NAME: {k} - VERSION: {v['version']} - ITEMS: {len(v['list'])}") + + +# In[7]: + +for k, v in tqdm(items.items()): + for item in v['list'].keys(): + new_item = item + if new_item.startswith('.'): + new_item = "*" + new_item + if new_item.endswith('.'): + new_item = new_item[:-1] + try: + ipaddress.ip_address(new_item) + items[k]['list'][item]['type'] = 'cidr' + items[k]['list'][item]['address'] = new_item + except: + try: + ipaddress.ip_network(new_item) + items[k]['list'][item]['type'] = 'cidr' + items[k]['list'][item]['address'] = new_item + except: + if md5_re.match(new_item): + items[k]['list'][item]['type'] = 'md5' + items[k]['list'][item]['hash'] = new_item + elif sha1_re.match(new_item): + items[k]['list'][item]['type'] = 'sha1' + items[k]['list'][item]['hash'] = new_item + elif sha224_re.match(new_item): + items[k]['list'][item]['type'] = 'sha224' + items[k]['list'][item]['hash'] = new_item + elif sha256_re.match(new_item): + items[k]['list'][item]['type'] = 'sha256' + items[k]['list'][item]['hash'] = new_item + elif sha512_re.match(new_item): + items[k]['list'][item]['type'] = 'sha512' + items[k]['list'][item]['hash'] = new_item + else: + if new_item.find(".") == -1: + logging.error(f"NOT VALID: {new_item} [{k}]") + continue + try: + ext = get_tld(new_item, fix_protocol=True, as_object=True) + items[k]['list'][item]['type'] = 'url-domain' + items[k]['list'][item]['subdomain'] = ext.subdomain if ext.subdomain != '' else None + items[k]['list'][item]['domain'] = ext.domain + items[k]['list'][item]['tld'] = ext.tld + items[k]['list'][item]['query'] = ext.parsed_url[2] if ext.parsed_url[2] != '' else None + except: + logging.error(f"NOT VALID: {new_item} [{k}]") + + +# CREATE OR USE DB +metadata = MetaData() + +warninglists = Table( + "warninglists", + metadata, + Column("id", Integer, primary_key=True, autoincrement=True), + Column("list_name", String), + Column("list_version", String), + Column("address", CIDR), + Column("hash", String), + Column("subdomain", String), + Column("domain", String), + Column("tld", String), + Column("query", String), +) + +warninglists_address_idx = Index("warninglists_address_idx", warninglists.c.address) +warninglists_hash_idx = Index("warninglists_hash_idx", warninglists.c.hash) +warninglists_domain_idx = Index("warninglists_domain_idx", warninglists.c.domain) + +try: + warninglists.create(engine) +except: + logging.error("DB already exists") + + +try: + warninglists_address_idx.drop(engine) +except: + logging.error("warninglists_address_idx does not exists") + + +try: + warninglists_hash_idx.drop(engine) +except: + logging.error("warninglists_hash_idx does not exists") + + +try: + warninglists_domain_idx.drop(engine) +except: + logging.error("warninglists_domain_idx does not exists") + + +# CHECK IF OLD RELEASE ARE IN DB +s = select([warninglists.c.list_name, warninglists.c.list_version]).distinct() +last_versions = [x for x in conn.execute(s)] +print(f"{len(last_versions)} list already available in db") + + +# INSERT, UPDATE OR SKIP +raw_conn = engine.raw_connection() +cursor = raw_conn.cursor() + +for k, v in tqdm(items.items()): + name = k + version = items[k]['version'] + if (name, version) not in last_versions: + if name in [x[0] for x in last_versions]: + logging.debug(f"{(name, version)} is an update - DELETE OLD RELEASE") + d = warninglists.delete().where(warninglists.c.list_name == name) + conn.execute(d) + + logging.debug(f"{(name, version)} not in db - BULK IMPORTING") + tbi = [{ + 'list_name': name, + 'list_version': version, + 'address': item.get('address', None), + 'hash': item.get('hash', None), + 'subdomain': item.get('subdomain', None), + 'domain': item.get('domain', None), + 'tld': item.get('tld', None), + 'query': item.get('query', None), + } for item_old_name, item in v['list'].items()] + psycopg2.extras.execute_batch(cursor, """INSERT INTO warninglists(list_name, list_version, address, hash, subdomain, domain, tld, query) VALUES (%(list_name)s, %(list_version)s, %(address)s, %(hash)s, %(subdomain)s, %(domain)s, %(tld)s, %(query)s)""", tbi) + raw_conn.commit() + else: + logging.debug(f"{name}, {version} already in db - SKIPPING") + +cursor.close() +conn.close() +raw_conn.close() + +try: + warninglists_address_idx.create(engine) +except: + logging.error(f"warninglists_address_idx already exists") +try: + warninglists_hash_idx.create(engine) +except: + logging.error(f"warninglists_hash_idx already exists") +try: + warninglists_domain_idx.create(engine) +except: + logging.error(f"warninglists_domain_idx already exists") +engine.dispose() diff --git a/thehive-templates/MISPWarningLists_1_0/long.html b/thehive-templates/MISPWarningLists_2_0/long.html similarity index 71% rename from thehive-templates/MISPWarningLists_1_0/long.html rename to thehive-templates/MISPWarningLists_2_0/long.html index a1af782d8..9dbb97d86 100644 --- a/thehive-templates/MISPWarningLists_1_0/long.html +++ b/thehive-templates/MISPWarningLists_2_0/long.html @@ -9,9 +9,12 @@

Observable was found in following MISP warning lists:

-