From 9e86dceec9cf3a8870c7056aa56c7eaa0f947d51 Mon Sep 17 00:00:00 2001 From: Sebastian Wagner Date: Thu, 29 Apr 2021 14:46:06 +0200 Subject: [PATCH 1/2] ENH: harmonization: create and use other/malware the classification malicious code / malware is replaced by malcious code / infected system malcious code / malware-distribution other / malware for all other cases, i.e. malware itself fixes certtools/intelmq#1752 related to certtools/intelmq#1409 --- NEWS.md | 15 ++++++++++++++- docs/dev/data-format.rst | 2 +- intelmq/bots/experts/idea/expert.py | 1 + intelmq/bots/experts/taxonomy/expert.py | 1 + intelmq/bots/parsers/github_feed/parser.py | 4 ++-- intelmq/lib/harmonization.py | 6 ++++-- .../tests/bots/parsers/github_feed/test_parser.py | 10 +++------- 7 files changed, 26 insertions(+), 13 deletions(-) diff --git a/NEWS.md b/NEWS.md index d4184840a..164694abe 100644 --- a/NEWS.md +++ b/NEWS.md @@ -30,16 +30,24 @@ The classification scheme has been updated to better match the [Reference Securi | malicious code | | malicious-code | | | malicious code | c2server | malicious-code | c2-server | | malicious code | malware | malicious-code | infected-system / malware-distribution | +| malicious code | malware | other | malware | | malicious code | ransomware | malicious-code | infected-system | | vulnerable | vulnerable client | vulnerable | vulnerable-system | | vulnerable | vulnerable service | vulnerable | vulnerable-system | | other | unknown | other | undetermined | - For the taxonomy 'availability', the type `misconfiguration` is new. -- For the taxonomy 'other', the type `undetermined` is new. +- For the taxonomy 'other', the types `malware` and `undetermined` are new. The old names can still be used in code, and they are automatically converted to the new names. +#### "Malware" + +The previously existing classification type "malware" under the taxonomy "malicious code" was removed, as this type does not exist in the RSIT. +Most of the usages were wrong anyway, and should have been infected-device, malware-distribution or something else anyway. +There is only one usage in IntelMQ, which can not be changed. +And that one is really about malware itself (or: the hashes of samples). For this purpose, the new type "malware" under the taxonomy "other" was created, *slightly* deviating from the RSIT in this respect, but "other" can be freely extended. + ### Configuration The `defaults.conf` file was removed. Settings that should effect all the bots are not part of the runtime.conf file and are configured in the `global` section in that file. @@ -93,6 +101,11 @@ UPDATE events SET "classification.type" = 'malware-distribution' WHERE "classification.taxonomy" = 'malicious-code' AND ("classification.type" = 'malware' OR "classification.type" = 'ransomware'); ``` +or this: +```sql +UPDATE events + SET "classification.taxonomy" = 'other' + WHERE "classification.type" = 'malware'; 2.3.3 Bugfix release (unreleased) diff --git a/docs/dev/data-format.rst b/docs/dev/data-format.rst index fea5c6c71..088c9de7b 100644 --- a/docs/dev/data-format.rst +++ b/docs/dev/data-format.rst @@ -135,13 +135,13 @@ The taxonomy can be automatically added by the taxonomy expert bot based on the malicious-code c2-server This is a command and control server in charge of a given number of botnet drones. malicious-code dga domain DGA Domains are seen various families of malware that are used to periodically generate a large number of domain names that can be used as rendezvous points with their command and control servers. Not in ENISA eCSIRT-II taxonomy. malicious-code infected-system This is a compromised machine, which has been observed to make a connection to a command and control server. - malicious-code malware A URL is the most common resource with reference to malware binary distribution. Not in ENISA eCSIRT-II taxonomy. malicious-code malware-configuration This is a resource which updates botnet drones with a new configuration. malicious-code malware-distribution URI used for malware distribution, e.g. a download URL included in fake invoice malware spam. other blacklist Some sources provide blacklists, which clearly refer to abusive behavior, such as spamming, but fail to denote the exact reason why a given identity has been blacklisted. The reason may be that the justification is anecdotal or missing entirely. This type should only be used if the typing fits the definition of a blacklist, but an event specific denomination is not possible for one reason or another. other other All incidents which don't fit in one of the given categories should be put into this class. other proxy This refers to the use of proxies from inside your network. Not in ENISA eCSIRT-II taxonomy. other tor This IOC refers to incidents related to TOR network infrastructure. Not in ENISA eCSIRT-II taxonomy. + other malware An IoC referring to a malware (sample) itself. Not in RSIT. other undetermined The categorisation of the incident is unknown/undetermined. test test Meant for testing. vulnerable ddos-amplifier Publicly accessible services that can be abused for conducting DDoS reflection/amplification attacks, e.g. DNS open-resolvers or NTP servers with monlist enabled. diff --git a/intelmq/bots/experts/idea/expert.py b/intelmq/bots/experts/idea/expert.py index 4e7b4912c..5ccfd4e66 100644 --- a/intelmq/bots/experts/idea/expert.py +++ b/intelmq/bots/experts/idea/expert.py @@ -76,6 +76,7 @@ class IdeaExpertBot(Bot): "copyright": "Fraud.Copyright", "misconfiguration": "Availability.Outage", # outage includes human error "undetermined": "Other", + "malware": "Malware", } TYPE_TO_SOURCE_TYPE = { diff --git a/intelmq/bots/experts/taxonomy/expert.py b/intelmq/bots/experts/taxonomy/expert.py index b3daa3649..d6f2ded83 100644 --- a/intelmq/bots/experts/taxonomy/expert.py +++ b/intelmq/bots/experts/taxonomy/expert.py @@ -55,6 +55,7 @@ "undetermined": "other", "proxy": "other", # not in ENISA eCSIRT-II taxonomy "tor": "other", # not in ENISA eCSIRT-II taxonomy + "malware": "other", # intentionally not in RSIT "test": "test", "ddos-amplifier": "vulnerable", "information-disclosure": "vulnerable", diff --git a/intelmq/bots/parsers/github_feed/parser.py b/intelmq/bots/parsers/github_feed/parser.py index c29781f2a..5843ae2c5 100644 --- a/intelmq/bots/parsers/github_feed/parser.py +++ b/intelmq/bots/parsers/github_feed/parser.py @@ -79,7 +79,7 @@ class Next(Exception): for ioc in json_content: event = clean_event.copy() event.add('raw', str(ioc)) - event.add('classification.type', 'unknown') + event.add('classification.type', 'malware') event.add('classification.taxonomy', 'other') event.add('event_description.text', ioc['Description']) @@ -129,7 +129,7 @@ def parse_domain_indicator(event, ioc_indicator: str): def parse_hash_indicator(event, ioc_indicator: str, hash_type: str): event.add('malware.hash.{}'.format(hash_type), ioc_indicator) - event.change('classification.taxonomy', 'malicious code') + event.change('classification.taxonomy', 'other') event.change('classification.type', 'malware') return event diff --git a/intelmq/lib/harmonization.py b/intelmq/lib/harmonization.py index ef24cd5e7..ea173b035 100644 --- a/intelmq/lib/harmonization.py +++ b/intelmq/lib/harmonization.py @@ -259,8 +259,9 @@ class ClassificationType(String): 'ransomware' -> 'infected-system' 'unknown' -> 'undetermined' - These old values can not be automatically mapped as they are ambiguous: - 'malware': Either 'infected-system' or 'malware-distribution' + These values changed their taxonomy: + 'malware': In terms of the taxonomy 'malicious-code' they can be either 'infected-system' or 'malware-distribution' + but in terms of malware actually, it is now taxonomy 'other' Allowed values are: * """ @@ -286,6 +287,7 @@ class ClassificationType(String): 'infected-system', 'information-disclosure', 'data-leak', + 'malware', 'malware-configuration', 'malware-distribution', 'masquerade', diff --git a/intelmq/tests/bots/parsers/github_feed/test_parser.py b/intelmq/tests/bots/parsers/github_feed/test_parser.py index 68b5cb0b6..f866d8cf2 100644 --- a/intelmq/tests/bots/parsers/github_feed/test_parser.py +++ b/intelmq/tests/bots/parsers/github_feed/test_parser.py @@ -23,7 +23,7 @@ "feed.name": "Strangereal Intel DailyIOC", "time.observation": "2019-03-01T01:01:01+00:00", "classification.taxonomy": "other", - "classification.type": "unknown", + "classification.type": "malware", "__type": "Event" } @@ -49,8 +49,6 @@ def test_no_processing_is_executed_for_the_feed_is_unknown(self): self.assertRegexpMatchesLog("Unknown feed '{}'.".format(wrong_report['feed.url'])) - # https://github.com/certtools/intelmq/issues/1752 - @unittest.expectedFailure def test_extra_fields_are_present_in_generated_event(self): custom_report = EXAMPLE_STRANGEREALINTEL_REPORT.copy() custom_report['extra.file_metadata'] = { @@ -64,8 +62,6 @@ def test_extra_fields_are_present_in_generated_event(self): for event in self.get_output_queue(): assert 'extra.file_metadata.sha' in event and 'extra.file_metadata.size' in event - # https://github.com/certtools/intelmq/issues/1752 - @unittest.expectedFailure def test_strangerealintel_feed_processing_is_successful(self): self.run_bot() @@ -74,7 +70,7 @@ def test_strangerealintel_feed_processing_is_successful(self): sha256_event = EXAMPLE_STRANGEREALINTEL_EVENT.copy() sha256_event['malware.hash.sha256'] = EXAMPLE_STRANGERINTEL_FILE_JSON[0]['Indicator'] sha256_event['event_description.text'] = EXAMPLE_STRANGERINTEL_FILE_JSON[0]['Description'] - sha256_event['classification.taxonomy'] = 'malicious code' + sha256_event['classification.taxonomy'] = 'other' sha256_event['classification.type'] = 'malware' sha256_event['raw'] = utils.base64_encode(str(EXAMPLE_STRANGERINTEL_FILE_JSON[0])) self.assertMessageEqual(0, sha256_event) @@ -82,7 +78,7 @@ def test_strangerealintel_feed_processing_is_successful(self): md5_event = EXAMPLE_STRANGEREALINTEL_EVENT.copy() md5_event['malware.hash.md5'] = EXAMPLE_STRANGERINTEL_FILE_JSON[1]['Indicator'] md5_event['event_description.text'] = EXAMPLE_STRANGERINTEL_FILE_JSON[1]['Description'] - md5_event['classification.taxonomy'] = 'malicious code' + md5_event['classification.taxonomy'] = 'other' md5_event['classification.type'] = 'malware' md5_event['raw'] = utils.base64_encode(str(EXAMPLE_STRANGERINTEL_FILE_JSON[1])) self.assertMessageEqual(1, md5_event) From 177dce8524355ea4cbf9b45e9acc5df783ab02ea Mon Sep 17 00:00:00 2001 From: Sebastian Wagner Date: Thu, 29 Apr 2021 14:47:56 +0200 Subject: [PATCH 2/2] DOC: fixes for "other" classification --- NEWS.md | 2 +- docs/dev/data-format.rst | 12 +++++++----- intelmq/bots/experts/taxonomy/expert.py | 6 +++--- 3 files changed, 11 insertions(+), 9 deletions(-) diff --git a/NEWS.md b/NEWS.md index 164694abe..296ce6e21 100644 --- a/NEWS.md +++ b/NEWS.md @@ -18,7 +18,7 @@ Both the XMPP collector bot and the XMPP output bot were removed. This [was eval and the XMPP bots were deprecated in 391d625. ### Harmonization -The classification scheme has been updated to better match the [Reference Security Incident Taxonomy](https://github.com/enisaeu/Reference-Security-Incident-Taxonomy-Task-Force/). The following labels were renamed: +The classification scheme has been updated to better match the [Reference Security Incident Taxonomy (RSIT)](https://github.com/enisaeu/Reference-Security-Incident-Taxonomy-Task-Force/). The following labels were renamed: | old taxonomy name | old type name | new taxonomy name | new type name | |-|-|-|-| diff --git a/docs/dev/data-format.rst b/docs/dev/data-format.rst index 088c9de7b..1e9554bfe 100644 --- a/docs/dev/data-format.rst +++ b/docs/dev/data-format.rst @@ -94,7 +94,7 @@ Classification IntelMQ classifies events using three labels: taxonomy, type and identifier. This tuple of three values can be used for deduplication of events and describes what happened. -The taxonomy can be automatically added by the taxonomy expert bot based on the given type. The following classification scheme follow the `Reference Security Incident Taxonomy `_: +The taxonomy can be automatically added by the taxonomy expert bot based on the given type. The following classification scheme follows the `Reference Security Incident Taxonomy (RSIT) `_: =============================== ========================================= ============================================= @@ -137,13 +137,13 @@ The taxonomy can be automatically added by the taxonomy expert bot based on the malicious-code infected-system This is a compromised machine, which has been observed to make a connection to a command and control server. malicious-code malware-configuration This is a resource which updates botnet drones with a new configuration. malicious-code malware-distribution URI used for malware distribution, e.g. a download URL included in fake invoice malware spam. - other blacklist Some sources provide blacklists, which clearly refer to abusive behavior, such as spamming, but fail to denote the exact reason why a given identity has been blacklisted. The reason may be that the justification is anecdotal or missing entirely. This type should only be used if the typing fits the definition of a blacklist, but an event specific denomination is not possible for one reason or another. + other blacklist Some sources provide blacklists, which clearly refer to abusive behavior, such as spamming, but fail to denote the exact reason why a given identity has been blacklisted. The reason may be that the justification is anecdotal or missing entirely. This type should only be used if the typing fits the definition of a blacklist, but an event specific denomination is not possible for one reason or another. Not in RSIT. other other All incidents which don't fit in one of the given categories should be put into this class. - other proxy This refers to the use of proxies from inside your network. Not in ENISA eCSIRT-II taxonomy. - other tor This IOC refers to incidents related to TOR network infrastructure. Not in ENISA eCSIRT-II taxonomy. other malware An IoC referring to a malware (sample) itself. Not in RSIT. + other proxy This refers to the use of proxies from inside your network. Not in RSIT. + test test Meant for testing. Not in RSIT. + other tor This IOC refers to incidents related to TOR network infrastructure. Not in RSIT. other undetermined The categorisation of the incident is unknown/undetermined. - test test Meant for testing. vulnerable ddos-amplifier Publicly accessible services that can be abused for conducting DDoS reflection/amplification attacks, e.g. DNS open-resolvers or NTP servers with monlist enabled. vulnerable information-disclosure Publicly accessible services potentially disclosing sensitive information, e.g. SNMP or Redis. vulnerable potentially-unwanted-accessible Potentially unwanted publicly accessible services, e.g. Telnet, RDP or VNC. @@ -151,6 +151,8 @@ The taxonomy can be automatically added by the taxonomy expert bot based on the vulnerable weak-crypto Publicly accessible services offering weak crypto, e.g. web servers susceptible to POODLE/FREAK attacks. =============================== ========================================= ============================================= +In the "other" taxonomy, several types are not in the RSIT, but this taxonomy is intentionally extensible. + Meaning of source, destination and local values for each classification type and possible identifiers. The identifier is often a normalized malware name, grouping many variants. +Examples of the meaning of the *source* and *destination* fields for each classification type and possible identifiers are shown here. Usually the main information is in the *source* fields. The identifier is often a normalized malware name, grouping many variants. diff --git a/intelmq/bots/experts/taxonomy/expert.py b/intelmq/bots/experts/taxonomy/expert.py index d6f2ded83..3ac6af6bc 100644 --- a/intelmq/bots/experts/taxonomy/expert.py +++ b/intelmq/bots/experts/taxonomy/expert.py @@ -50,12 +50,12 @@ "infected-system": "malicious-code", "malware-configuration": "malicious-code", "malware-distribution": "malicious-code", - "blacklist": "other", # not in ENISA eCSIRT-II taxonomy + "blacklist": "other", # intentionally not in RSIT "other": "other", "undetermined": "other", - "proxy": "other", # not in ENISA eCSIRT-II taxonomy - "tor": "other", # not in ENISA eCSIRT-II taxonomy "malware": "other", # intentionally not in RSIT + "proxy": "other", # intentionally not in RSIT + "tor": "other", # intentionally not in RSIT "test": "test", "ddos-amplifier": "vulnerable", "information-disclosure": "vulnerable",