diff --git a/.travis.yml b/.travis.yml index 068db6930..2496ca3b7 100644 --- a/.travis.yml +++ b/.travis.yml @@ -5,28 +5,34 @@ env: - requirements=true - requirements=false python: - - 3.4 - 3.5 - 3.6 - 3.7 + - 3.8 matrix: include: - - python: 3.4 + - python: 3.5 env: mode=debian - - python: 3.6 + - python: 3.8 env: mode=codestyle +before_install: + - if [[ -v requirements ]]; then curl -s -O https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-7.6.1-amd64.deb; fi + - if [[ -v requirements ]]; then sudo dpkg -i --force-confnew elasticsearch-7.6.1-amd64.deb; fi + - if [[ -v requirements ]]; then sudo sed -i.old 's/-Xms1g/-Xms128m/' /etc/elasticsearch/jvm.options; fi + - if [[ -v requirements ]]; then sudo sed -i.old 's/-Xmx1g/-Xmx128m/' /etc/elasticsearch/jvm.options; fi + - if [[ -v requirements ]]; then echo -e '-XX:+DisableExplicitGC\n-Djdk.io.permissionsUseCanonicalPath=true\n-Dlog4j.skipJansi=true\n-server\n' | sudo tee -a /etc/elasticsearch/jvm.options; fi + - if [[ -v requirements ]]; then sudo chown -R elasticsearch:elasticsearch /etc/default/elasticsearch; fi + - if [[ -v requirements ]]; then sudo systemctl start elasticsearch; fi install: - set -e - if [[ -v requirements ]]; then sudo apt-get install polipo lighttpd; fi - - if [[ $mode == debian ]]; then sudo apt-get install dpkg-dev dh-python python-setuptools python3-setuptools python3-all debhelper quilt fakeroot dh-systemd safe-rm; fi + - if [[ $mode == debian ]]; then sudo apt-get install dpkg-dev dh-python python-setuptools python3-setuptools python3-all debhelper quilt fakeroot dh-systemd safe-rm; pip3 install requests; pip3 install redis; pip3 install dnspython; pip3 install psutil; pip3 install python-dateutil; pip3 install termstyle; pip3 install pytz; pip3 install typing; fi - if [[ $requirements == true ]]; then for file in intelmq/bots/*/*/REQUIREMENTS.txt; do pip install -r $file; done; fi - - if [[ $TRAVIS_PYTHON_VERSION < '3.5' ]]; then sudo pip install typing; fi - - if [[ -v requirements ]]; then pip install Cerberus!=1.3 codecov pyyaml; fi + - if [[ -v requirements ]]; then pip install Cerberus!=1.3 codecov pyyaml requests_mock; fi - if [[ $mode == codestyle ]]; then pip install pycodestyle; fi - if [[ -v requirements ]]; then sudo sed -i '/^Defaults\tsecure_path.*$/ d' /etc/sudoers; fi - if [[ -v requirements ]]; then sudo pip install .; fi - - if [[ -v requirements ]]; then sudo cp /opt/intelmq/etc/examples/* /opt/intelmq/etc/; fi - - if [[ -v requirements ]]; then sudo chown -R $USER /opt/intelmq/; fi + - if [[ -v requirements ]]; then sudo intelmqsetup --skip-ownership; fi before_script: - if [[ $requirements == true ]] ; then psql -c "CREATE USER intelmq WITH SUPERUSER" -U postgres; fi - if [[ $requirements == true ]] ; then psql -c "CREATE DATABASE intelmq" -U intelmq template1; fi @@ -44,6 +50,7 @@ before_script: - if [[ $mode == debian ]]; then tar -xzf ../intelmq_$debversion.debian.tar.gz; fi - if [[ $mode == debian ]]; then popd; fi - if [[ -v requirements ]]; then sudo cp intelmq/tests/assets/* /var/www/html/ && sudo touch /var/www/html/$(date +%Y).txt; fi + - if [[ $requirements == true ]]; then sudo bash -c 'echo "[rabbitmq_management]." > /etc/rabbitmq/enabled_plugins' && sudo systemctl restart rabbitmq-server; fi script: - if [[ $requirements == true ]]; then TZ=utc INTELMQ_TEST_DATABASES=1 INTELMQ_TEST_LOCAL_WEB=1 INTELMQ_TEST_EXOTIC=1 nosetests --with-coverage --cover-package=intelmq --cover-branches; find contrib/ -name "test*.py" -exec nosetests {} \+; elif [[ $requirements == false ]]; then INTELMQ_TEST_LOCAL_WEB=1 nosetests --with-coverage --cover-package=intelmq --cover-branches; fi - if [[ $mode == codestyle ]]; then pycodestyle intelmq/{bots,lib,bin}; fi @@ -53,7 +60,6 @@ script: services: - redis-server - postgresql - - elasticsearch - mongodb - rabbitmq after_success: diff --git a/CHANGELOG.md b/CHANGELOG.md index 96efd566f..afb502e72 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,314 @@ CHANGELOG ========== +2.2.0 (2020-06-18) +------------------ +Dropped support for Python 3.4. + +### Core +- `__init__`: Changes to the path-handling, see [User Guide, section _/opt and LSB paths_](docs/User-Guide.md#opt-and-lsb-paths) for more information + - The environment variable `INTELMQ_ROOT_DIR` can be used to set custom root directories instead of `/opt/intelmq/` (#805) in case of non LSB-path installations. + - The environment variable `ROOT_DIR` can be used to set custom root directories instead of `/` (#805) in case of LSB-path installations. +- `intelmq.lib.exceptions`: Added `MissingDependencyError` for show error messages about a missing library and how to install it (#1471). + - Added optional parameter `installed` to show the installed version. + - Added optional parameter `additional_text` to show arbitrary text. +- Adding more type annotations for core libraries. +- `intelmq.lib.pipeline.Pythonlist.sleep`: Drop deprecated method. +- `intelmq.lib.utils`: `write_configuration`: Append a newline at end of configuration/file to allow proper comparisons & diffs. +- `intelmq.lib.test`: `BotTestCase` drops privileges upon initialization (#1489). +- `intelmq.lib.bot`: + - New class `OutputBot`: + - Method `export_event` to format/export events according to the parameters given by the user. + - `ParserBot`: New methods `parse_json_stream` and `recover_line_json_stream`. + - `ParserBot.recover_line_json`: Fix format by adding a list around the line data. + - `Bot.send_message`: In debugging log level, the path to which the message is sent is now logged too. + +### Bots +- Bots with dependencies: Use of `intelmq.lib.exceptions.MissingDependencyError`. + +#### Collectors +- `intelmq.bots.collectors.misp.collector`: Deprecate parameter `misp_verify` in favor of generic parameter `http_verify_cert`. +- `intelmq.bots.collectors.tcp.collector`: Drop compatibility with Python 3.4. +- `intelmq.bots.collectors.stomp.collector`: + - Check the stomp.py version and show an error message if it does not match. + - For stomp.py versions `>= 5.0.0` redirect the `stomp.PrintingListener` output to debug logging. +- `intelmq.bots.collectors.microsoft.collector_azure`: Support current Python library `azure-storage-blob>= 12.0.0`, configuration is incompatible and needs manual change. See NEWS file and bot's documentation for more details. +- `intelmq.bots.collectors.amqp.collector_amqp`: Require `pika` minimum version 1.0. +- `intelmq.bots.collectors.github_api.collector_github_contents_api`: Added (PR#1481). + +#### Parsers +- `intelmq.bots.parsers.autoshun.parser`: Drop compatibility with Python 3.4. +- `intelmq.bots.parsers.html_table.parser`: Drop compatibility with Python 3.4. +- `intelmq.bots.parsers.shadowserver.parser`: Add support for MQTT and Open-IPP feeds (PR#1512, PR#1544). +- `intelmq.bots.parsers.taichung.parser`: + - Migrate to `ParserBot`. + - Also parse geolocation information if available. +- `intelmq.bots.parsers.cymru.parser_full_bogons`: + - Migrate to `ParserBot`. + - Add last updated information in raw. +- `intelmq.bots.parsers.anubisnetworks.parser`: Add new parameter `use_malware_familiy_as_classification_identifier`. +- `intelmq.bots.parsers.microsoft.parser_ctip`: Compatibility for new CTIP data format used provided by the Azure interface. +- `intelmq.bots.parsers.cymru.parser_cap_program`: Support for `openresolver` type. +- `intelmq.bots.parsers.github_feed.parser`: Added (PR#1481). +- `intelmq.bots.parsers.urlvir.parser`: Removed, as the feed is discontinued (#1537). + +#### Experts +- `intelmq.bots.experts.csv_converter`: Added as converter to CSV. +- `intelmq.bots.experts.misp`: Added (PR#1475). +- `intelmq.bots.experts.modify`: New parameter `maximum_matches`. + +#### Outputs +- `intelmq.bots.outputs.amqptopic`: + - Use `OutputBot` and `export_event`. + - Allow formatting the routing key with event data by the new parameter `format_routing_key` (boolean). +- `intelmq.bots.outputs.file`: Use `OutputBot` and `export_event`. +- `intelmq.bots.outputs.files`: Use `OutputBot` and `export_event`. +- `intelmq.bots.outputs.misp.output_feed`: Added, creates a MISP Feed (PR#1473). +- `intelmq.bots.outputs.misp.output_api`: Added, pushes to MISP via the API (PR#1506, PR#1536). +- `intelmq.bots.outputs.elasticsearch.output`: Dropped ElasticSearch version 5 compatibility, added version 7 compatibility (#1513). + +### Documentation +- Document usage of the `INTELMQ_ROOT_DIR` environment variable. +- Added document on MISP integration possibilities. +- Feeds: + - Added "Full Bogons IPv6" feed. + - Remove discontinued URLVir Feeds (#1537). + +### Packaging +- `setup.py` do not try to install any data to `/opt/intelmq/` as the behavior is inconsistent on various systems and with `intelmqsetup` we have a tool to create the structure and files anyway. +- `debian/rules`: + - Provide a blank state file in the package. +- Patches: + - Updated `fix-intelmq-paths.patch`. + +### Tests +- Travis: Use `intelmqsetup` here too. + - Install required build dependencies for the Debian package build test. + - This version is no longer automatically tested on Python `<` 3.5. + - Also run the tests on Python 3.8. + - Run the Debian packaging tests on Python 3.5 and the code-style test on 3.8. +- Added tests for the new bot `intelmq.bots.outputs.misp.output_feed` (#1473). +- Added tests for the new bot `intelmq.bots.experts.misp.expert` (#1473). +- Added tests for `intelmq.lib.exceptions`. +- Added tests for `intelmq.lib.bot.OutputBot` and `intelmq.lib.bot.OutputBot.export_event`. +- Added IPv6 tests for `intelmq.bots.parsers.cymru.parser_full_bogons`. +- Added tests for `intelmq.lib.bot.ParserBot`'s new methods `parse_json_stream` and `recover_line_json_stream`. +- `intelmq.tests.test_conf`: Set encoding to UTF-8 for reading the `feeds.yaml` file. + +### Tools +- `intelmqctl`: + - `upgrade-config`: + - Allow setting the state file location with the `--state-file` parameter. + - Do not require a second run anymore, if the state file is newly created (#1491). + - New parameter `no_backup`/`--no-backup` to skip creation of `.bak` files for state and configuration files. + - Only require `psutil` for the `IntelMQProcessManager`, not for process manager independent calls like `upgrade-config` or `check`. + - Add new command `debug` to output some information for debugging. Currently implemented: + - paths + - environment variables + - `IntelMQController`: New argument `--no-file-logging` to disable logging to file. + - If dropping privileges does not work, `intelmqctl` will now abort (#1489). +- `intelmqsetup`: + - Add argument parsing and an option to skip setting file ownership, possibly not requiring root permissions. + - Call `intelmqctl upgrade-config` and add argument for the state file path (#1491). +- `intelmq_generate_misp_objects_templates.py`: Tool to create a MISP object template (#1470). +- `intelmqdump`: New parameter `-t` or `--truncate` to optionally give the maximum length of `raw` data to show, 0 for no truncating. + +### Contrib +- Added `development-tools`. +- ElasticSearch: Dropped version 5 compatibility, added version 7 compatibility (#1513). +- Malware Name Mapping Downloader: + - New parameter `--mwnmp-ignore-adware`. + - The parameter `--add-default` supports an optional parameter to define the default value. + +### Known issues +- Bots started with IntelMQ-Manager stop when the webserver is restarted. (#952). +- Corrupt dump files when interrupted during writing (#870). + + +2.1.3 (2020-05-26) +------------------ + +### Requirements +- The python library `requests` is (again) listed as dependency of the core (#1519). + +### Core +- `intelmq.lib.upgrades`: + - Harmonization upgrade: Also check and update regular expressions. + - Add function to migrate the deprecated parameter `attach_unzip` to `extract_files` for the mail attachment collector. + - Add function to migrate changed Taichung URL feed. + - Check for discontinued Abuse.CH Zeus Tracker feed. +- `intelmq.lib.bot`: + - `ParserBot.recover_line`: Parameter `line` needs to be optional, fix usage of fallback value `self.current_line`. + - `start`: Handle decoding errors in the pipeline different so that the bot is not stuck in an endless loop (#1494). + - `start`: Only acknowledge a message in case of errors, if we actually had a message to dump, which is not the case for collectors. + - `_dump_message`: Dump messages with encoding errors base64 encoded, not in JSON format as it's not possible to decode them (#1494). +- `intelmq.lib.test`: + - `BotTestCase.run_bot`: Add parameters `allowed_error_count` and `allowed_warning_count` to allow set the number per run, not per test class. + - Set `source_pipeline_broker` and `destination_pipeline_broker` to `pythonlist` instead of the old `broker`, fixes `intelmq.tests.lib.test_bot.TestBot.test_pipeline_raising`. + - Fix test for (allowed) errors and warnings. +- `intelmq.lib.exceptions`: + - `InvalidKey`: Add `KeyError` as parent class. + - `DecodingError`: Added, string representation has all relevant information on the decoding error, including encoding, reason and the affected string (#1494). +- `intelmq.lib.pipeline`: + - Decode messages in `Pipeline.receive` not in the implementation's `_receive` so that the internal counter is correct in case of decoding errors (#1494). +- `intelmq.lib.utils`: + - `decode`: Raise new `DecodingError` if decoding fails. + +### Harmonization +- `protocol.transport`: Adapt regular expression to allow the value `nvp-ii` (protocol 11). + +### Bots +#### Collectors +- `intelmq.bots.collectors.mail.collector_mail_attach`: + - Fix handling of deprecated parameter name `attach_unzip`. + - Fix handling of attachments without filenames (#1538). +- `intelmq.bots.collectors.stomp.collector`: Fix compatibility with stomp.py versions `> 4.1.20` and catch errors on shutdown. +- `intelmq.bots.collectors.microsoft`: + - Update `REQUIREMENTS.txt` temporarily fixing deprecated Azure library (#1530, PR#1532). + - `intelmq.bots.collectors.microsoft.collector_interflow`: Add method for printing the file list. + +#### Parsers +- `intelmq.bots.parsers.cymru.parser_cap_program`: Support for protocol 11 (`nvp-ii`) and `conficker` type. +- `intelmq.bots.parsers.taichung.parser`: Support more types/classifications: + - Application Compromise: Apache vulnerability & SQL injections + - Brute-force: MSSQL & SSH password guess attacks; Office 365, SSH & SIP attacks + - C2 Sever: Attack controller + - DDoS + - DoS: DNS, DoS, Excess connection + - IDS Alert / known vulnerability exploitation: backdoor + - Malware: Malware Proxy + - Warn on new unknown types. +- `intelmq.bots.parsers.bitcash.parser`: Removed as feed is discontinued. +- `intelmq.bots.parsers.fraunhofer.parser_ddosattack_cnc` and `intelmq.bots.parsers.fraunhofer.parser_ddosattack_target`: Removed as feed is discontinued. +- `intelmq.bots.parsers.malwaredomains.parser`: Correctly classify `C&C` and `phishing` events. +- `intelmq.bots.parsers.shadowserver.parser`: More verbose error message for missing report specification (#1507). +- `intelmq.bots.parsers.n6.parser_n6stomp`: Always add n6 field `name` as `malware.name` independent of `category`. +- `intelmq.bots.parsers.anubisnetworks`: Update parser with new data format. +- `intelmq.bots.parsers.bambenek`: Add new feed URLs with Host `faf.bambenekconsulting.com` (#1525, PR#1526). +- `intelmq.bots.parsers.abusech.parser_ransomware`: Removed, as the feed is discontinued (#1537). +- `intelmq.bots.parsers.nothink.parser`: Removed, as the feed is discontinued (#1537). +- `intelmq.bots.parsers.n6.parser`: Remove not allowed characters in the name field for `malware.name` and write original value to `event_description.text` instead. + +#### Experts +- `intelmq.bots.experts.cymru_whois.lib`: Fix parsing of AS names with Unicode characters. + +#### Outputs +- `intelmq.bots.outputs.mongodb`: + - Set default port 27017. + - Use different authentication mechanisms per MongoDB server version to fix compatibility with server version >= 3.4 (#1439). + +### Documentation +- Feeds: + - Remove unavailable feed Abuse.CH Zeus Tracker. + - Remove the field `status`, offline feeds should be removed. + - Add a new field `public` to differentiate between private and public feeds. + - Adding documentation URLs to nearly all feeds. + - Remove unavailable Bitcash.cz feed. + - Remove unavailable Fraunhofer DDos Attack feeds. + - Remove unavailable feed Abuse.CH Ransomware Tracker (#1537). + - Update information on Bambenek Feeds, many require a license now (#1525). + - Remove discontinued Nothink Honeypot Feeds (#1537). +- Developers Guide: Fix the instructions for `/opt/intelmq` file permissions. + +### Packaging +- Patches: `fix-logrotate-path.patch`: also include path to rotated file in patch. +- Fix paths from `/opt` to LSB for `setup.py` and `contrib/logrotate/intelmq` in build process (#1500). +- Add runtime dependency `debianutils` for the program `which`, which is required for `intelmqctl`. + +### Tests +- Dropping Travis tests for 3.4 as required libraries dropped 3.4 support. +- `intelmq.tests.bots.experts.cymru_whois`: + - Drop missing ASN test, does not work anymore. + - IPv6 to IPv4 test: Test for two possible results. +- `intelmq.lib.test`: Fix compatibility of logging capture with Python >= 3.7 by reworking the whole process (#1342). +- `intelmq.bots.collectors.tcp.test_collector`: Removing custom mocking and bot starting, not necessary anymore. +- Added tests for `intelmq.bin.intelmqctl.IntelMQProcessManager._interpret_commandline`. +- Fix and split `tests.bots.experts.ripe.test_expert.test_ripe_stat_error_json`. +- Added tests for invalid encodings in input messages in `intelmq.tests.lib.test_bot` and `intelmq.tests.lib.test_pipeline` (#1494). +- Travis: Explicitly enable RabbitMQ management plugin. +- `intelmq.tests.lib.test_message`: Fix usage of the parameter `blacklist` for Message hash tests (#1539). + +### Tools +- `intelmqsetup`: Copy missing BOTS file to IntelMQ's root directory (#1498). +- `intelmq_gen_docs`: Feed documentation generation: Handle missing/empty parameters. +- `intelmqctl`: + - `IntelMQProcessManager`: For the status of running bots also check the bot ID of the commandline and ignore the path of the executable (#1492). + - `IntelMQController`: Fix exit codes of `check` command for JSON output (now 0 on success and 1 on error, was swapped, #1520). +- `intelmqdump`: + - Handle base64-type messages for show, editor and recovery actions. + +### Contrib +- `intelmq/bots/experts/asn_lookup/update-asn-data`: Use `pyasn_util_download.py` to download the data instead from RIPE, which cannot be parsed currently (#1517, PR#1518, https://github.com/hadiasghari/pyasn/issues/62). + +### Known issues +- HTTP stream collector: retry on regular connection problems? (#1435). +- Bots started with IntelMQ-Manager stop when the webserver is restarted. (#952). +- Reverse DNS: Only first record is used (#877). +- Corrupt dump files when interrupted during writing (#870). + + +2.1.2 (2020-01-28) +------------------ + +### Core +- `__init__`: Resolve absolute path for `STATE_FILE_PATH` variable (resolves `..`). +- `intelmq.lib.utils`: + - log: Do not raise an exception if logging to neither file nor syslog is requested. + - logging StreamHandler: Colorize all warning and error messages red. + - logging FileHandler: Strip all shell colorizations from the messages (#1436). +- `intelmq.lib.message`: + - `Message.to_json`: Set `sort_keys=True` to get reproducible results. + - `drop_privileges`: Handle situations where the user or group `intelmq` does not exist. +- `intelmq.lib.pipeline`: + - `Amqp._send` and `Amqp._acknowledge`: Log traceback in debug mode in case of errors and necessary re-connections. + - `Amqp._acknowledge`: Reset delivery tag if acknowledge was successful. + +### Bots +#### Collectors +- `intelmq.bots.collectors.misp.collector`: + - Add compatibility with current pymisp versions and versions released after January 2020 (PR #1468). + +#### Parsers +- `intelmq.bots.parsers.shadowserver.config`: Add some missing fields for the feed `accessible-rdp` (#1463). +- `intelmq.bots.parsers.shadowserver.parser`: + - Feed-detection based on file names: The prefixed date is optional now. + - Feed-detection based on file names: Re-detect feed for every report received (#1493). + +#### Experts +- `intelmq.bots.experts.national_cert_contact_certat`: Handle empty responses by server (#1467). +- `intelmq.bots.experts.maxmind_geoip`: The script `update-geoip-data` now requires a license key as second parameter because of upstream changes (#1484)). + +#### Outputs +- `intelmq.bots.outputs.restapi.output`: Fix logging of response body if response status code was not ok. + +### Documentation +- Remove some hardcoded `/opt/intelmq/` paths from code comments and program outputs. + +### Packaging +- debian/rules: Only replace `/opt/intelmq/` with LSB-paths in some certain files, not the whole tree, avoiding wrong replacements. +- debian/rules and debian/intelmq.install: Do install the examples configuration directly instead of working around the abandoned examples directory. + +### Tests +- `lib/test_utils`: Skip some tests on Python 3.4 because `contextlib.redirect_stdout` and `contextlib.redirect_sterr` are not supported on this version. +- Travis: Stop running tests with all optional dependencies on Python 3.4, as more and more libraries are dropping support for it. Tests on the core and code without non-optional requirements are not affected. +- `tests.bots.parsers.html_table`: Make tests independent of current year. + +### Tools +- `intelmqctl upgrade-config`: Fix missing substitution in error message "State file %r is not writable.". + +### Known issues +- bots trapped in endless loop if decoding of raw message fails (#1494) +- intelmqctl status of processes: need to check bot id too (#1492) +- MongoDB authentication: compatibility on different MongoDB and pymongo versions (#1439) +- ctl: shell colorizations are logged (#1436) +- http stream collector: retry on regular connection problems? (#1435) +- tests: capture logging with context manager (#1342) +- Bots started with IntelMQ-Manager stop when the webserver is restarted. (#952) +- n6 parser: mapping is modified within each run (#905) +- reverse DNS: Only first record is used (#877) +- Corrupt dump files when interrupted during writing (#870) + + 2.1.1 (2019-11-11) ------------------ @@ -78,14 +386,14 @@ CHANGELOG - Require the bot instance as parameter for all pipeline classes. - New internal variable `_has_message` to keep the state of the pipeline. - Split receive and acknowledge into public-facing and private methods. - - Add `reject_message` method to the Pipeline class for explicit requeue of messages. + - Add `reject_message` method to the Pipeline class for explicit re-queue of messages. - AMQP: - Make exchange configurable. - If exchange is set, the queues are not declared, the queue name is for routing used by exchanges. - `intelmq.lib.bot`: - Log message after successful bot initialization, no log message anymore for ready pipeline. - Use existing current message if receive is called and the current message still exists. - - Fix handling of received messaged after a sighup that happend during a blocking receving connection using explicit rejection (#1438). + - Fix handling of received messaged after a SIGHUP that happened during a blocking receiving connection using explicit rejection (#1438). - New method `_parse_common_parameters` called before `init` to parse commonly used argument. Currently supported: `extract_files`. - `intelmq.lib.test`: - Fix the tests broker by providing the testing pipeline. @@ -115,13 +423,13 @@ CHANGELOG - Use the lib's `unzip` function for uncompressing attachments and use the . - `intelmq.bots.collectors.mail.collector_mail_url`: Save the file name of the downloaded file as `extra.file_name`. - `intelmq.bots.collectors.amqp.collector_amqp`: New collector to collect data from (remote) AMQP servers, for bot IntelMQ as well as external data. - - use default SSL context for client purposes, fixes compatibility with python < 3.6 if TLS is used. + - use default SSL context for client purposes, fixes compatibility with python `<` 3.6 if TLS is used. #### Parsers - `intelmq.bot.parsers.html_table.parser`: * New parameter "html_parser". * Use time conversion functions directly from `intelmq.lib.harmonization.DateTime.convert`. - - Limit lxml dependency on 3.4 to < 4.4.0 (incompatibility). + - Limit lxml dependency on 3.4 to `<` 4.4.0 (incompatibility). - `intelmq.bots.parsers.netlab_360.parser`: Add support for hajime scanners. - `intelmq.bots.parsers.hibp.parser_callback`: A new parser to parse data retrieved from a HIBP Enterprise Subscription. - `intelmq.bots.parsers.shadowserver.parser`: @@ -139,7 +447,7 @@ CHANGELOG - Compatibility shim will be available in the 2.x series. - `intelmq.bot.outputs.sql.output` added generic SQL output bot. Comparted to - new optional parameter `engine` with `postgresql` (default) and `sqlite` (new) as possible values. -- `intelmq.bots.outputs.stomp.output`: New parameters `message_hierarchical_output`, `message_jsondict_as_string`, `message_with_type`, `single_key`. +- `intelmq.bots.outputs.stomp.output`: New parameters `message_hierarchical`, `message_jsondict_as_string`, `message_with_type`, `single_key`. ### Documentation - Feeds: @@ -152,7 +460,7 @@ CHANGELOG - Travis: - Use UTC timezone. - Tests for `utils.unzip`. -- Add a new asset: Zip archive with two files, same as with tar.gz archive. +- Add a new asset: Zip archive with two files, same as with `.tar.gz` archive. - Added tests for the Mail Attachment & Mail URL collectors. - Ignore logging-tests on Python 3.7 temporarily (#1342). @@ -193,9 +501,9 @@ CHANGELOG - `intelmq.lib.upgrades.v110_deprecations`: Fix upgrade of ripe expert configuration. - `intelmq.lib.bot_debugger`: - Fix handling of empty messages generated by parser when user wanted to show the result by "--show-sent" flag. - - Fix handling of sent messages for bots using the `path_permissive` paramter (#1453). + - Fix handling of sent messages for bots using the `path_permissive` parameter (#1453). - `intelmq.lib.pipeline.Amqp`: - - use default SSL context for client purposes, fixes compatibility with python < 3.6 if TLS is used. + - use default SSL context for client purposes, fixes compatibility with python `<` 3.6 if TLS is used. - Reconnect once on sending messages if disconnect detected. ### Bots @@ -225,7 +533,7 @@ CHANGELOG - `intelmq.bots.experts.reverse_dns.expert`: Add optional parameter `overwrite`, current behavior was `True`, default if not given is `True` now, will change to `False` in 3.0.0 (#1452, #1455). #### Outputs -- `intelmq.bots.outputs.amqptopic.output`: use default SSL context for client purposes, fixes compatibility with python < 3.6 if TLS is used. +- `intelmq.bots.outputs.amqptopic.output`: use default SSL context for client purposes, fixes compatibility with python `<` 3.6 if TLS is used. ### Packaging - Rules: @@ -357,7 +665,7 @@ CHANGELOG - Clarify on Uninstallation ### Packaging -- Do not execute the tcp collector tests during debian and ubuntu builds as they fail there. +- Do not execute the tcp collector tests during Debian and Ubuntu builds as they fail there. ### Tests - `intelmq.lib.test`: Disable statistics for test runs of bots. @@ -366,8 +674,8 @@ CHANGELOG ### Tools - `intelmqsetup`: Only change directory ownerships if necessary. -- `intelmqctl`: - - Provide new command `upgrade-conf` to uprade configuration to a newer version. +- `intelmqctl`:/**--- + - Provide new command `upgrade-conf` to upgrade configuration to a newer version. - Makes backups of configurations files on its own. - Also checks for previously skipped or new functions of older versions and catches up. - Provides logging level on class layer. @@ -428,7 +736,7 @@ See also the changelog for 2.0.0.beta1 below. - See the Core section for the changes in the allowed values for `classification.type`. ### Bots -- Use the new RSIT types in several types, see above +- Use the new RSIT types in several bots, see above #### Parsers - `intelmq.bots.parsers.spamhaus.parser_cert`: Added support for `extortion` events. @@ -481,13 +789,13 @@ There are some features considered as beta and marked as such in the documentati - Allow setting the broker of source and destination independently. - Support for a new AMQP broker. See User Guide for configuration. (#1179) - `lib/bot`: - - Dump messages locks the dump file using unix file locks (#574). + - Dump messages locks the dump file using Unix file locks (#574). - Print idle/rate limit time also in human readable format (#1332). - `set_request_parameters`: Use `{}` as default proxy value instead of `None`. Allows updating of existing proxy dictionaries. - Bots drop privileges if they run as root. - Save statistics on successfully and failed processed messages in the redis database 3. - `lib/utils` - - Function `unzip` to extract files from gzipped and/or tar-archives. + - Function `unzip` to extract files from gz-zipped and/or tar-archives. - New class `ListHandler`: new handler for logging purpose which saves the messages in a list. - Add function `seconds_to_human`. - Add function `drop_privileges`. @@ -510,7 +818,7 @@ There are some features considered as beta and marked as such in the documentati - added `intelmq.bots.collectors.api`: collecting data using an HTTP API (#123, #1187). - added `intelmq.bots.collectors.rsync` (#1286). - `intelmq.bots.collectors.http.collector_http`: - - Add support for uncompressing of gzipped-files (#1270). + - Add support for uncompressing of gz-zipped-files (#1270). - Add time-delta support for time formatted URLs (#1366). - `intelmq.collectors.blueliv.collector_crimeserver`: Allow setting the API URL by parameter (#1336). - `intelmq.collectors.mail`: @@ -574,7 +882,7 @@ There are some features considered as beta and marked as such in the documentati ### Tools - `intelmqctl check`: Now uses the new `ListHandler` from utils to handle the logging in JSON output mode. -- `intelmqctl run`: The message that a running bot has been stopped, is not longer a warning, but an informational message. No need to inform sysadmins about this intended behaviour. +- `intelmqctl run`: The message that a running bot has been stopped, is not longer a warning, but an informational message. No need to inform sysadmins about this intended behavior. - `intelmqdump`: Inspecting dumps locks the dump file using unix file locks (#574). - `intelmqctl`: - After the check if the program runs as root, it tries to drop privileges. Only if this does not work, a warning is shown. @@ -582,7 +890,7 @@ There are some features considered as beta and marked as such in the documentati ### Contrib - `malware_name_mapping`: - - Added the script `apply_mapping_eventdb.py` to apply the mapping to an eventdb. + - Added the script `apply_mapping_eventdb.py` to apply the mapping to an EventDB. - Possibility to add local rules using the download tool. - `check_mk`: - Added scripts for monitoring queues and statistics. @@ -835,7 +1143,7 @@ Update allowed classification fields to 2018-09-26 version (#802, #1350, #1380). A tool to convert from yaml to md has been added. ### Tools -- `intelmq_gen_feeds_docs` addded to bin directory, allows generating the Feeds.md documentation file from feeds.yaml +- `intelmq_gen_feeds_docs` added to bin directory, allows generating the Feeds.md documentation file from feeds.yaml - `intelmq_gen_docs` merges both `intelmq_gen_feeds_docs` and `intelmq_gen_harm_docs` in one file and automatically updates the documentation files. #### intelmqctl @@ -850,7 +1158,7 @@ Update allowed classification fields to 2018-09-26 version (#802, #1350, #1380). - `intelmqctl run` if message is sent to a non-default path, it is printed out. - `intelmqctl restart` bug fix; returned some half-nonsense, now returns return state of start and stop operation in a list (#1226). - `intelmqctl check`: New parameter `--no-connections` to prevent the command from making connections e.g. to the redis pipeline.s -- `intelmqctl list queues`: don't display named paths amongst standard queues. +- `intelmqctl list queues`: don't display named paths among standard queues. - The process status test failed if the PATH did not include the bot executables and the `which` command failed. Then the proccess's command line could not be compared correctly. The fix warns of this and adds a new status 'unknown' (#1297). @@ -900,7 +1208,7 @@ Update allowed classification fields to 2018-09-26 version (#802, #1350, #1380). ### Bots - Removed print statements from various bots. -- Replaced various occurences of `self.logger.error()` + `self.stop()` with `raise ValueError`. +- Replaced various occurrences of `self.logger.error()` + `self.stop()` with `raise ValueError`. #### Collectors - `bots.collectors.mail`: @@ -991,7 +1299,7 @@ Update allowed classification fields to 2018-09-26 version (#802, #1350, #1380). - New allowed value for `classification.type`: `infected system` for taxonomy `malicious code` (#1197). ### Requirements -- Requests is no longer listed as dependency of the core. For depending bots the requirement is noted in their REQUIREMENTS.txt file. +- Requests is no longer listed as dependency of the core. For depending bots the requirement is noted in their `REQUIREMENTS.txt` file. ### Documentation - Use Markdown for README again, as pypi now supports it. @@ -1265,7 +1573,7 @@ no known issues 1.0.0 Stable release (2017-08-04) --------------------------------- ### Core -- Fixes a thrown FileNotFound exception when stopping bots started with `intelmqctl run ...` +- Fixes a thrown `FileNotFound` exception when stopping bots started with `intelmqctl run ...` ### Harmonization - leading dots in FQDNs are rejected and removed in sanitation (#1022, #1030) @@ -1295,7 +1603,7 @@ no known issues ### Bots #### Collectors -- HTTP collectors: If http_username and http_password are both given and empty or null, 'None:None' has been used to authenticate. It is now checked that the username evaluates to non-false/null before adding the authentication. (fixes #1017) +- HTTP collectors: If `http_username` and `http_password` are both given and empty or null, 'None:None' has been used to authenticate. It is now checked that the username evaluates to non-false/null before adding the authentication. (fixes #1017) - Dropped unmaintained and undocumented FTP(S) collectors `bots.collectors.ftp`. Also, the FTPS collector had a license conflict (#842). - `bots.collectors.http.collector_http_stream`: drop deprecated parameter `url` in favor of `http_url` @@ -1322,7 +1630,7 @@ v1.0.0.dev8 Beta release (2017-06-14) ### Configuration - Added `log_processed_messages_count` (500) and `log_processed_messages_seconds` (900) to defaults.conf. - `http_timeout` has been renamed to `http_timeout_sec` and `http_timeout_max_tries` has been added. - This setting is honored by bots.collectors.http.* and bots.collectors.mail.collector_mail_url, bots.collectors.rt (only `http_timeout_sec`), bots.outputs.restapi.output and bots.experts.ripencc_abuse_contact + This setting is honored by `bots.collectors.http.*` and `bots.collectors.mail.collector_mail_url`, `bots.collectors.rt` (only `http_timeout_sec`), `bots.outputs.restapi.output` and `bots.experts.ripencc_abuse_contact`. ### Documentation - Minor fixes @@ -1331,25 +1639,25 @@ v1.0.0.dev8 Beta release (2017-06-14) - Better documentation of packages ### Tools -- added a bot debugger (https://github.com/certtools/intelmq/pull/975) -- missing bot executable is detected and handled by intelmqctl (https://github.com/certtools/intelmq/pull/979) +- added a bot debugger (#975) +- missing bot executable is detected and handled by intelmqctl (#979) ### Core -- fix bug which prevented dumps to be written if the file did not exist (https://github.com/certtools/intelmq/pull/986) +- fix bug which prevented dumps to be written if the file did not exist (#986) - Fix reload of bots regarding logging - type annotations for all core libraries ### Bots -- added bots.experts.idea, bots.outputs.files +- added `bots.experts.idea`, bots.outputs.files - possibility to split large csv Reports into Chunks, currently possible for mail url and file collector - elasticsearch output supports HTTP Basic Auth -- bots.collectors.mail.collector_mail_url and bots collectors.file.collector can split large reports (https://github.com/certtools/intelmq/pull/680) -- bots.parsers.shadowserver support the VNC feed -- handling of HTTP timeouts, see above https://github.com/certtools/intelmq/pull/859 -- bots.parsers.bambenek saves the malware name -- bots.parsers.fraunhofer.parser_dga saves the malware name -- bots.parsers.shadowserver handles NULL bytes -- bots.parsers.abusech.parser_ransomware handles the IP 0.0.0.0 specially +- `bots.collectors.mail.collector_mail_url` and bots collectors.file.collector can split large reports (#680) +- `bots.parsers.shadowserver` support the VNC feed +- handling of HTTP timeouts, see above #859 +- `bots.parsers.bambenek` saves the malware name +- `bots.parsers.fraunhofer.parser_dga` saves the malware name +- `bots.parsers.shadowserver` handles NULL bytes +- `bots.parsers.abusech.parser_ransomware` handles the IP 0.0.0.0 specially ### Harmonization - New field named `output` to support export to foreign formats @@ -1362,7 +1670,7 @@ v1.0.0.dev7 Beta release (2017-05-09) ### Bots #### Collectors -- bots.collectors.alienvault_otx: OTX library has been removed, install it as package instead +- `bots.collectors.alienvault_otx`: OTX library has been removed, install it as package instead #### Parsers - API keys will be removed from feed.url if possible @@ -1370,14 +1678,14 @@ v1.0.0.dev7 Beta release (2017-05-09) - Added support for Compromised-Website, Open-Netis, NTP-Version, Sandbox-URL, Spam-URL, Vulnerable-ISAKMP, Botnet-CCIP, Accessible-RDP, Open-LDAP, Blacklisted-IP, Accessible-Telnet, Accessible-CWMP (#748). #### Experts -- added bots.experts.field_reducer, bots.outputs.smtp -- bots.experts.deduplicator: `ignore_keys` has been renamed to `filter_keys` and `filter_type` has been removed. -- bots.experts.modify: The configration is now list-based for a consistent ordering -- bots.experts.tor_node as an optional parameter `overwrite` +- added `bots.experts.field_reducer`, `bots.outputs.smtp`. +- `bots.experts.deduplicator`: `ignore_keys` has been renamed to `filter_keys` and `filter_type` has been removed. +- `bots.experts.modify`: The configuration is now list-based for a consistent ordering. +- `bots.experts.tor_node` as an optional parameter `overwrite`. ### Harmonization - New parameter and field named feed.documentation to link to documentation of the feed -- classification.taxonomy is lower case only +- `classification.taxonomy` is lower case only v1.0.0.dev6 Beta release (2017-01-11) ------------------------------------- @@ -1398,11 +1706,11 @@ Changes between 0.9 and 1.0.0.dev6 - bots/BOTS now contains only generic and specific collectors. For a list of feeds, see docs/Feeds.md ### Tools -- DEV: intelmq_gen_harm_docs: added to generate Harmonization documentation -- intelmq_psql_initdb: creates a table for a postgresql database using the harmonization fields +- DEV: `intelmq_gen_harm_docs`: added to generate Harmonization documentation +- `intelmq_psql_initdb`: creates a table for a postgresql database using the harmonization fields - intelmqctl: reworked argument parsing, many bugfixes - intelmqdump: added to inspect dumped messages and reinsert them into the queues -- DEV: rewrite_config_files: added to rewrite configuration files with consistent style +- DEV: `rewrite_config_files`: added to rewrite configuration files with consistent style ### Bots @@ -1417,24 +1725,24 @@ Changes between 0.9 and 1.0.0.dev6 - source specific parsers added: abusech, alienvault, alienvault otx, anubisnetworks, autoshun, bambenek, bitcash, bitsight, blocklistde, blueliv, ci army, cleanmx, cymru_full_bogons, danger_rulez, dataplane, dshield (asn, block and domain), dyn, fraunhofer_dga, hphosts, malc0de, malwaredomains, misp, n6, netlab_360, nothink, openphish, proxyspy, spamhaus cert, taichung, turris, urlvir - generic parsers added: csv, json - specific parsers dropped: abusehelper (broken), arbor (source unavailable), bruteforceblocker, certeu, dragonresearchgroup parser (discontinued), hpfeeds, microsoft_dcu (broken), taichungcitynetflow, torexitnode parser -- renamed intelmq.bots.parsers.spamhaus.parser to intelmq.bots.parsers.spamhaus.parser_drop - renamed intelmq.bots.parsers.malwarepatrol.parser-dansguardian to intelmq.bots.parsers.malwarepatrol.parser_dansguardian -- renamed intelmq.bots.parsers.taichungcitynetflow.parser to intelmq.bots.parsers.taichung.parser +- renamed `intelmq.bots.parsers.spamhaus.parser` to `intelmq.bots.parsers.spamhaus.parser_drop`. + renamed `intelmq.bots.parsers.malwarepatrol.parser-dansguardian to `intelmq.bots.parsers.malwarepatrol.parser_dansguardian` +- renamed `intelmq.bots.parsers.taichungcitynetflow.parser to `intelmq.bots.parsers.taichung.parser` - major rework of shadowserver parsers - enhanced all parsers #### Experts - Added experts: asnlookup, cert.at contact lookup, filter, generic db lookup, gethostbyname, modify, reverse dns, rfc1918, tor_nodes, url2fqdn - removed experts: contactdb, countrycodefilter (obsolete), sanitizer (obsolete) -- renamed intelmq.bots.expers.abusix.abusix to bots.expers.abusix.expert - intelmq.bots.experts.asnlookup.asnlookup to intelmq.bots.experts.asn_lookup.expert - intelmq.bots.experts.cymru.expert to intelmq.bots.experts.cymru_whois.expert - intelmq.bots.experts.deduplicator.deduplicator to intelmq.bots.experts.deduplicator.expert - intelmq.bots.experts.geoip.geopip to intelmq.bots.experts.maxmind_geoip.expert - intelmq.bots.experts.ripencc.ripencc to intelmq.bots.experts.ripencc_abuse_contact.expert - intelmq.bots.experts.taxonomy.taxonomy to intelmq.bots.experts.taxonomy.expert +- renamed `intelmq.bots.expers.abusix.abusix` to `intelmq.bots.expers.abusix.expert` + `intelmq.bots.experts.asnlookup.asnlookup` to `intelmq.bots.experts.asn_lookup.expert` + `intelmq.bots.experts.cymru.expert` to `intelmq.bots.experts.cymru_whois.expert` + `intelmq.bots.experts.deduplicator.deduplicator` to `intelmq.bots.experts.deduplicator.expert` + `intelmq.bots.experts.geoip.geopip` to `intelmq.bots.experts.maxmind_geoip.expert` + `intelmq.bots.experts.ripencc.ripencc` to `intelmq.bots.experts.ripencc_abuse_contact.expert` + `intelmq.bots.experts.taxonomy.taxonomy` to `intelmq.bots.experts.taxonomy.expert` - enhanced all experts -- changed configuration syntax for bots.experts.modify to a more simple variant +- changed configuration syntax for `intelmq.bots.experts.modify` to a more simple variant #### Outputs - added: amqp, elasticsearch, redis, restapi, smtp, stomp, tcp, udp, xmpp diff --git a/NEWS.md b/NEWS.md index 4856230d3..6baa356c3 100644 --- a/NEWS.md +++ b/NEWS.md @@ -3,6 +3,84 @@ NEWS See the changelog for a full list of changes. +2.2.0 Feature release (2020-06-17) +---------------------------------- + +### Requirements +- IntelMQ no longer supports Python 3.4, Python `>=` 3.5 is required. + CentOS 7 (with EPEL) provides both Python 3.4 and Python 3.6. If IntelMQ was installed with Python 3.4, the code needs to be re-installed with Python 3.6 and removed for Python 3.4. Application data is compatible. To install needed packages: `yum install python36 python36-devel python36-requests`. +- The *AMQP collector* requires the `pika` library minimum version 1.0. + +### Configuration + +#### ElasticSearch Output Bot +The ElasticSearch Output bot does no longer support (only) ElasticSearch version 5, but only version 7 (#1513). + +#### Microsoft Azure Collector Bot +The Bot has been majorly changed to support the current Azure Python library `azure-storage-blob>=12.0.0`. +This also changes the required configuration parameters. The new required parameters are: + +* `redis_cache_db`: 5 +* `redis_cache_host`: `"127.0.0.1"` +* `redis_cache_password`: null, depending on your Redis server configuration +* `redis_cache_port`: 6379, depending on your Redis server configuration +* `redis_cache_ttl`: 864000 (10 days), depending on how old fast the data you are fetching is removed from the storage +* `connection_string`: connection string as given by Microsoft, includes endpoint and authentication information +* `container_name`: name of the container to connect to + +The previous parameters `account_name`, `account_key` and `delete` are not supported anymore. + +#### URLVir Feeds and Parser +All URLVir feeds have been discontinued. The URLVir Parser has been removed. +The `intelmqctl upgrade-config` command warns if you have these feed and the bot in use. + + +2.1.3 Bugfix release (2020-05-26) +--------------------------------- + +### Requirements +The python library `requests` is required by the core. + +### Harmonization +The regular expression of the field `protocol.transport` has been updated to accommodate the value `nvp-ii`. +`intelmqctl upgrade-config` handles the change to automatically upgrade your configuration. + +#### Taichung feed +The Taichung feed "Netflow (Recent 30)" with URL `https://www.tc.edu.tw/net/netflow/lkout/recent/30` is no longer available and gives an error 500. +As a drop-in replacement the Parser as well as the Feed documentation are now adapted to the full feed available at `https://www.tc.edu.tw/net/netflow/lkout/recent/`. +The `intelmqctl upgrade-config` command takes care of this change. + +#### Abuse.ch Zeus Tracker Feed +The Abuse.ch Zeus Tracker has been discontinued on 2019-07-08. The `intelmqctl upgrade-config` command warns if you have this feed in use. + +#### Abuse.ch Ransomware Tracker Feed +The Abuse.ch Ransomware Tracker has been discontinued on 2019-12-08. The `intelmqctl upgrade-config` command warns if you have this feed in use. + +#### Bitcash.cz Feed +The Bitcash.cz Banned IPs / Blocklist feed previously available under `https://bitcash.cz/misc/log/blacklist` is no longer available. The `intelmqctl upgrade-config` command warns if you have this feed in use. + +#### Fraunhofer DDoS Attack Feed +The Fraunhofer DDoS Attack feed previously available under `https://feed.caad.fkie.fraunhofer.de/ddosattackfeed/` is no longer available. The `intelmqctl upgrade-config` command warns if you have this feed in use. + +#### Bambenek Feeds +Many Bambenek feeds require a license now and URLs have changed. See https://osint.bambenekconsulting.com/feeds/ for more information. The `intelmqctl upgrade-config` command also warns if you have previously documents feeds in use and migrates the URL for the DGA domain feed. + +#### Nothink Feeds and Parser +All Nothink Honeypot feeds have been discontinued and current the data available covers the time until 2019. The Nothink Parser has been removed. +The `intelmqctl upgrade-config` command warns if you have these feed and the bot in use. + + +2.1.2 Bugfix release (2020-01-28) +--------------------------------- + +#### MaxMind GeoIP +MaxMind requires a registration before being able to download the GeoLite2 database starting with 2019-12-30: https://blog.maxmind.com/2019/12/18/significant-changes-to-accessing-and-using-geolite2-databases/ +If the provided `update-geoip-data` script is used, the license key can now be set second parameter. + +### Libraries +When using MISP integrations, make sure your currently installed PyMISP version supports the installed Python version. Any PyMISP version newer than 2.4.119.1 requires Python 3.6 or newer. + + 2.1.1 Bugfix release (2019-11-11) --------------------------------- diff --git a/README.rst b/README.rst index 3a1b7e1f5..9d3858f9d 100644 --- a/README.rst +++ b/README.rst @@ -54,26 +54,19 @@ Table of Contents ----------------- 1. `How to Install <#how-to-install>`__ -2. `Developers Guide <#developers-guide>`__ -3. `User Guide <#user-guide>`__ -4. `IntelMQ Manager <#intelmq-manager>`__ +2. `User Guide <#user-guide>`__ +3. `IntelMQ Manager and more tools <#intelmq-manager-and-more-tools>`__ +4. `How to Participate <#how-to-participate>`__ 5. `Incident Handling Automation Project <#incident-handling-automation-project>`__ -6. `Data Harmonization <#data-harmonization>`__ -7. `How to Participate <#how-to-participate>`__ -8. `Licence <#licence>`__ -9. `Funded by <#funded-by>`__ +6. `Licence <#licence>`__ +7. `Funded by <#funded-by>`__ How to Install -------------- See `INSTALL `__. -Developers Guide ----------------- - -See `Developers Guide `__. - User Guide ---------- @@ -84,13 +77,31 @@ If you know additional feeds and how to parse them, please contribute your code For support questions please use the intelmq-users mailing list: https://lists.cert.at/cgi-bin/mailman/listinfo/intelmq-users -IntelMQ Manager ---------------- +IntelMQ use the Data Harmonization. Please read `this +document `__ for more details. + +IntelMQ Manager and more tools +------------------------------ Check out this graphical `tool `__ and easily manage an IntelMQ system. +More tools can be found in the `ecosystem documentation `__. + +How to participate +------------------ + +IntelMQ is a community project depending on your contributions. Please consider sharing your work. + +- Have a look at our `Developers Guide `__ for documentation. +- Subscribe to the Intelmq-dev Mailing list to get answers to your development questions: + https://lists.cert.at/cgi-bin/mailman/listinfo/intelmq-dev +- The `Github issues `__ lists all the open feature requests, bug reports and ideas. +- Looking for ideas which additional feeds you could add support for? The `Feeds whishlist `__ is the list you are looking for. +- Contribute code with pull requests (The `Github help `__ can be useful if you are not familiar with the system yet). +- Some developers are also on IRC: `channel #intelmq on irc.freenode.net `__. + Incident Handling Automation Project ------------------------------------ @@ -98,23 +109,6 @@ Incident Handling Automation Project http://www.enisa.europa.eu/activities/cert/support/incident-handling-automation - **Mailing-list:** ihap@lists.trusted-introducer.org -Data Harmonization ------------------- - -IntelMQ use the Data Harmonization. Please read `this -document `__ for more details. - -How to participate ------------------- - -- Subscribe to the Intelmq-dev Mailing list: - https://lists.cert.at/cgi-bin/mailman/listinfo/intelmq-dev (for - developers) -- Watch out for our regular developers conf call -- IRC: server: irc.freenode.net, channel: #intelmq -- Via github issues -- Via Pull requests (please do read help.github.com first) - Licence ------- diff --git a/contrib/README.md b/contrib/README.md index b4b468b43..c7fd230e1 100644 --- a/contrib/README.md +++ b/contrib/README.md @@ -4,11 +4,12 @@ This directory contains contributed scripts which are helpful for maintaining an intelmq instance. * **cron-jobs**: cron job files for pulling in newer versions of supporting databases such as pyasn -* **logcheck**: logcheck ruleset +* **logcheck**: logcheck ruleset to filter logs for error messages * **prettyprint**: prints the json output for file-output bot prettyly * **config-backup**: simple Makefile for doing a `make backup` inside of /opt/intelmq in order to preserve the latest configs * **logrotate**: an example scrpt for Debian's /etc/logrotate.d/ directory. * **check_mk**: Scripts for monitoring an IntelMQ instance with Check_MK. +* **development-tools**: Tools useful for development ## Outdated The following scripts are out of date but are left here for reference. TODO: adapt to current version diff --git a/contrib/bash-completion/intelmqctl b/contrib/bash-completion/intelmqctl index e00b1c1c4..c45fab3e9 100644 --- a/contrib/bash-completion/intelmqctl +++ b/contrib/bash-completion/intelmqctl @@ -18,7 +18,7 @@ _intelmqctl () #echo "posice: $COMP_CWORD $COMP_WORDS"; case $COMP_CWORD in 1) - opts="start stop restart reload run status log clear list check enable disable upgrade-config"; + opts="start stop restart reload run status log clear list check enable disable upgrade-config debug"; COMPREPLY=($(compgen -W "${opts} ${generic_pre} ${generic_post}" -- ${cur})); return 0 ;; diff --git a/contrib/development-tools/bots-feeds.sh b/contrib/development-tools/bots-feeds.sh new file mode 100755 index 000000000..0554e7b20 --- /dev/null +++ b/contrib/development-tools/bots-feeds.sh @@ -0,0 +1,13 @@ +#!/bin/bash + +echo "Bots:" +jq '.Collector | keys | length' intelmq/bots/BOTS +jq '.Parser | keys | length' intelmq/bots/BOTS +jq '.Expert | keys | length' intelmq/bots/BOTS +jq '.Output | keys | length' intelmq/bots/BOTS + +echo "Feeds:" +egrep -c '^ [^ ]' intelmq/etc/feeds.yaml +echo "Shadowserver:" +python3 -c "import intelmq.bots.parsers.shadowserver.config; print(len(intelmq.bots.parsers.shadowserver.config.mapping))" + diff --git a/contrib/elasticsearch/README.md b/contrib/elasticsearch/README.md index bd9ce8cb0..97813ad22 100644 --- a/contrib/elasticsearch/README.md +++ b/contrib/elasticsearch/README.md @@ -18,7 +18,7 @@ pip3 install elasticsearch ``` usage: elasticmapper [-h] --harmonization-file [--harmonization-fallback] [--host ] [--index INDEX] - [--index-type INDEX_TYPE] [--output ] + [--output ] Elastic Mapper tool @@ -30,8 +30,6 @@ optional arguments: harmonization fallback to `text` type --host elasticsearch server IP --index INDEX elasticsearch index - --index-type INDEX_TYPE - elasticsearch index type --index-template save the mapping as a template for newly-created indices --output write mapping to file ``` @@ -41,24 +39,24 @@ optional arguments: #### Send only to Elasticsearch ``` -elasticmapper --harmonization-file=intelmq/intelmq/etc/harmonization.conf --index=intelmq --index-type=events --host=127.0.0.1 +elasticmapper --harmonization-file=intelmq/intelmq/etc/harmonization.conf --index=intelmq --host=127.0.0.1 ``` #### Write only to output file ``` -elasticmapper --harmonization-file=intelmq/intelmq/etc/harmonization.conf --index=intelmq --index-type=events --output=/tmp/mapping.txt +elasticmapper --harmonization-file=intelmq/intelmq/etc/harmonization.conf --index=intelmq --output=/tmp/mapping.txt ``` #### Send to Elasticsearch and write to output file ``` -elasticmapper --harmonization-file=intelmq/intelmq/etc/harmonization.conf --index=intelmq --index-type=events --output=/tmp/mapping.txt --host=127.0.0.1 +elasticmapper --harmonization-file=intelmq/intelmq/etc/harmonization.conf --index=intelmq --output=/tmp/mapping.txt --host=127.0.0.1 ``` #### Send to Elasticsearch as a template (see https://www.elastic.co/guide/en/elasticsearch/reference/current/indices-templates.html) ``` -elasticmapper --harmonization-file=intelmq/intelmq/etc/harmonization.conf --index=intelmq --index-type=events --host=127.0.0.1 --index-template +elasticmapper --harmonization-file=intelmq/intelmq/etc/harmonization.conf --index=intelmq --host=127.0.0.1 --index-template ``` #### Harmonization fallback @@ -66,5 +64,5 @@ elasticmapper --harmonization-file=intelmq/intelmq/etc/harmonization.conf --inde Revert to the default 'text' type in the generated mapping for any fields which have unrecognizable field types. ``` -elasticmapper --harmonization-file=intelmq/intelmq/etc/harmonization.conf --index=intelmq --index-type=events --output=/tmp/mapping.txt --host=127.0.0.1 --harmonization-fallback +elasticmapper --harmonization-file=intelmq/intelmq/etc/harmonization.conf --index=intelmq --output=/tmp/mapping.txt --host=127.0.0.1 --harmonization-fallback ``` diff --git a/contrib/elasticsearch/elasticmapper b/contrib/elasticsearch/elasticmapper index fa58b8f98..e136b75c1 100755 --- a/contrib/elasticsearch/elasticmapper +++ b/contrib/elasticsearch/elasticmapper @@ -62,7 +62,7 @@ def mapping_properties_from_harmonization(harmonization, replacement_char): return __mapping_properties_from_harmonization(properties), err -def create_mapping(harmonization, index_type, replacement_char): +def create_mapping(harmonization, replacement_char): config = {"enabled": False} @@ -70,10 +70,7 @@ def create_mapping(harmonization, index_type, replacement_char): data = { "mappings": { - index_type: { - "_all": config, - "properties": properties - } + "properties": properties } } @@ -176,13 +173,6 @@ if __name__ == "__main__": required=False, help='elasticsearch index name, or template name if using a template') - parser.add_argument('--index-type', - action="store", - dest="index_type", - default="events", - required=False, - help='elasticsearch index type') - parser.add_argument('--index-template', action="store_true", dest="index_template", @@ -209,7 +199,7 @@ if __name__ == "__main__": with open(arguments.harmonization_file) as fp: harmonization = json.load(fp) - data, err = create_mapping(harmonization, arguments.index_type, arguments.replacement_char) + data, err = create_mapping(harmonization, arguments.replacement_char) if err: if arguments.harmonization_fallback: diff --git a/contrib/malware_name_mapping/download_mapping.py b/contrib/malware_name_mapping/download_mapping.py index d327adac7..f65cead09 100755 --- a/contrib/malware_name_mapping/download_mapping.py +++ b/contrib/malware_name_mapping/download_mapping.py @@ -76,12 +76,13 @@ def generate_regex_from_human(*values): def download(url: str=URL, add_default=False, params=None, include_malpedia=False, - include_misp=False): + include_misp=False, mwnmp_ignore_adware=False): download = requests.get(url) download.raise_for_status() rules = [generate_rule(*line[:2]) for line in csv.reader(io.StringIO(download.text)) - if not line[0].startswith('#')] + if not line[0].startswith('#') + if (not mwnmp_ignore_adware or not any(['adware' == field for field in line]))] if include_malpedia: download = requests.get(URL_MALPEDIA) download.raise_for_status() @@ -103,7 +104,7 @@ def download(url: str=URL, add_default=False, params=None, include_malpedia=Fals name=rule_name)) if add_default: - rules.append(generate_rule(".*", "{msg[malware.name]}", name="default")) + rules.append(generate_rule(".*", add_default, name="default")) if params: rules.extend((generate_rule(rule[0][0], rule[1][0]) for rule in params)) @@ -131,7 +132,9 @@ def create_parser(): help='The URL to download the mapping from.') parser.add_argument('--add-default', '-d', help='Add a default rule to use the malware name as identifier.', - const=True, action='store_const') + # action='store_true') + const='{msg[malware.name]}', + nargs='?') parser.add_argument('--expression', '-e', nargs=1, action='append', help='Expression for an additional rule.', @@ -150,6 +153,11 @@ def create_parser(): help='Include MISP galaxy cluster data (CC0 1.0 Universal), ' 'only threat_actor.json is used' 'from %s.' % URL_MISP) + parser.add_argument('--mwnmp-ignore-adware', + action='store_true', + help='Ignore adware in the malware name mapping data. ' + 'for rules. Ignores all lines with an explicit ' + "'adware' field.") return parser @@ -163,6 +171,7 @@ def main(args): params=zip(args.expression, args.identifier), include_malpedia=args.include_malpedia, include_misp=args.include_misp, + mwnmp_ignore_adware=args.mwnmp_ignore_adware, ) if args.filename: try: diff --git a/contrib/malware_name_mapping/test_download_mapping.py b/contrib/malware_name_mapping/test_download_mapping.py index 4761be6c6..125d34866 100644 --- a/contrib/malware_name_mapping/test_download_mapping.py +++ b/contrib/malware_name_mapping/test_download_mapping.py @@ -1,12 +1,13 @@ # -*- coding: utf-8 -*- import unittest import json -from download_mapping import create_parser, main, generate_regex_from_human, IDENTIFIER_FROM_HUMAN +import requests_mock +from download_mapping import create_parser, main, generate_regex_from_human, IDENTIFIER_FROM_HUMAN, URL from intelmq.lib.test import skip_internet -@skip_internet() class TestDownloadMapping(unittest.TestCase): + @skip_internet() def test_download(self): parser = create_parser() args = parser.parse_args("") @@ -14,6 +15,7 @@ def test_download(self): self.assertGreater(len(result), 100) self.assertIsInstance(result, list) + @skip_internet() def test_download_identifier_expression(self): parser = create_parser() args = parser.parse_args('-e "^foobar$" -i foobar'.split(' ')) @@ -21,6 +23,71 @@ def test_download_identifier_expression(self): self.assertGreater(len(result), 100) self.assertIsInstance(result, list) + def test_download_ignore_adware(self): + parser = create_parser() + args = parser.parse_args(["--mwnmp-ignore-adware"]) + with requests_mock.Mocker() as mocker: + mocker.get(URL, text='"^foobar$",foobar,adware,barfoo') + result = json.loads(main(args)) + self.assertEqual(len(result), 0) + self.assertIsInstance(result, list) + + def test_download_not_ignore_not_adware(self): + parser = create_parser() + args = parser.parse_args(["--mwnmp-ignore-adware"]) + with requests_mock.Mocker() as mocker: + mocker.get(URL, text='"^foobar$",foobar,not adware,barfoo') + result = json.loads(main(args)) + self.assertEqual(len(result), 1) + self.assertIsInstance(result, list) + + def test_download_add_default(self): + " Test --add-default parameter. " + parser = create_parser() + args = parser.parse_args(["--add-default"]) + with requests_mock.Mocker() as mocker: + mocker.get(URL, text='') + result = json.loads(main(args)) + self.assertEqual(result, + [{'if': {'classification.taxonomy': 'malicious code', + 'malware.name': '.*'}, + 'rulename': 'default', + 'then': {'classification.identifier': '{msg[malware.name]}'}}] + ) + + def test_download_add_default_constant(self): + " Test --add-default parameter with an argument" + parser = create_parser() + args = parser.parse_args(["--add-default", "constant"]) + with requests_mock.Mocker() as mocker: + mocker.get(URL, text='') + result = json.loads(main(args)) + self.assertEqual(result, + [{'if': {'classification.taxonomy': 'malicious code', + 'malware.name': '.*'}, + 'rulename': 'default', + 'then': {'classification.identifier': 'constant'}}] + ) + + maxDiff=None + + +class TestParser(unittest.TestCase): + def test_parser_default(self): + parser = create_parser() + args = parser.parse_args(["--add-default"]) + self.assertEqual(args.add_default, '{msg[malware.name]}') + + def test_parser_default_argument(self): + parser = create_parser() + args = parser.parse_args(["--add-default", "constant"]) + self.assertEqual(args.add_default, "constant") + + def test_parser_no_default(self): + parser = create_parser() + args = parser.parse_args() + self.assertEqual(args.add_default, None) + class TestDownloadMappingHelpers(unittest.TestCase): def test_generate_regex_from_human(self): diff --git a/debian/changelog b/debian/changelog index 97c9789dd..0b2eb9cca 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,45 @@ +intelmq (2.2.0-1) stable; urgency=medium + + * Update to 2.2.0. + + -- Sebastian Wagner Thu, 18 Jun 2020 10:02:33 +0200 + +intelmq (2.2.0~rc1-1) unstable; urgency=medium + + * Update to 2.2.0 Release Candidate 1. + + -- Sebastian Wagner Sat, 30 May 2020 12:22:30 +0200 + +intelmq (2.2.0~alpha2-1) unstable; urgency=medium + + * Update to 2.2.0~alpha1. + + -- Sebastian Wagner Mon, 16 Mar 2020 18:28:33 +0100 + +intelmq (2.1.3-1) stable; urgency=medium + + * Update to 2.1.3. + + -- Sebastian Wagner Tue, 26 May 2020 12:21:26 +0200 + +intelmq (2.1.2-2) stable; urgency=medium + + * Add "debianutils" as new dependency, because IntelMQ needs `which`. + + -- Sebastian Wagner Wed, 01 Apr 2020 09:28:23 +0200 + +intelmq (2.1.2-1) stable; urgency=medium + + * Update to 2.1.2. + + -- Wagner Sebastian Tue, 28 Jan 2020 16:43:16 +0100 + +intelmq (2.1.2~alpha1-1) stable; urgency=medium + + * Update to 2.1.2~alpha1 + + -- Sebastian Wagner Tue, 26 Nov 2019 12:06:55 +0100 + intelmq (2.1.1-1) stable; urgency=medium * Update to 2.1.1. diff --git a/debian/control b/debian/control index 305b2bc35..ef7be9835 100644 --- a/debian/control +++ b/debian/control @@ -3,7 +3,7 @@ Maintainer: Sebastian Wagner Section: python Priority: optional Build-Depends: debhelper (>= 4.1.16), python3-all, python3-setuptools, quilt, dh-python, python-setuptools, dh-systemd, safe-rm, python3-requests, python3-redis, python3-dnspython, python3-psutil, python3-dateutil, python3-termstyle, python3-tz, lsb-release -X-Python3-Version: >= 3.4 +X-Python3-Version: >= 3.5 Standards-Version: 3.9.6 Homepage: https://github.com/certtools/intelmq/ @@ -13,7 +13,7 @@ Depends: ${misc:Depends}, ${python3:Depends}, python3-dateutil (>= 2.5), python3-dnspython (>= 1.11.1), python3-openssl, python3-psutil (>= 1.2.1), python3-redis (>= 2.10), python3-requests (>= 2.2.1), python3-termstyle (>= 0.1.10), python3-tz, - redis-server, cron, bash-completion, jq, systemd + redis-server, cron, bash-completion, jq, systemd, debianutils Suggests: python3-imbox (>= 0.8), python3-pyasn (>= 1.5.0), python3-stomp.py (>= 4.1.9), python3-sleekxmpp (>= 1.3.1), python3-geoip2 (>= 2.2.0), python3-pymongo, python3-psycopg2 diff --git a/debian/intelmq.install b/debian/intelmq.install index 7292e50a3..1bfa94f57 100644 --- a/debian/intelmq.install +++ b/debian/intelmq.install @@ -3,3 +3,5 @@ contrib/logrotate/intelmq etc/logrotate.d/ contrib/bash-completion/intelmqctl usr/share/bash-completion/completions/ contrib/bash-completion/intelmqdump usr/share/bash-completion/completions/ intelmq/bots/experts/modify/examples/* usr/share/doc/intelmq/bots/experts/modify/examples/ +intelmq/etc/* etc/intelmq/ +intelmq/bots/BOTS etc/intelmq/ diff --git a/debian/patches/fix-intelmq-paths.patch b/debian/patches/fix-intelmq-paths.patch index 128e4afbb..7635342b8 100644 --- a/debian/patches/fix-intelmq-paths.patch +++ b/debian/patches/fix-intelmq-paths.patch @@ -1,16 +1,22 @@ Description: Set all paths to LSB Author: Sebastian Wagner -Last-Update: 2019-05-17 +Last-Update: 2019-11-19 --- This patch header follows DEP-3: http://dep.debian.net/deps/dep3/ ---- a/intelmq/__init__.py -+++ b/intelmq/__init__.py -@@ -1,7 +1,7 @@ - from .version import __version__, __version_info__ +--- a/intelmq/__init__.py 2019-11-19 15:48:35.654629757 +0100 ++++ b/intelmq/__init__.py 2019-11-19 15:49:44.970512504 +0100 +@@ -2,11 +2,11 @@ import os --if os.getenv('INTELMQ_PATHS_NO_OPT', False): -+if not os.getenv('INTELMQ_PATHS_OPT', False): - ROOT_DIR = "" - CONFIG_DIR = "/etc/intelmq/" - DEFAULT_LOGGING_LEVEL = "INFO" + +-path = "opt" ++path = "lsb" + if os.getenv("INTELMQ_ROOT_DIR", False): + path = "opt" +-elif os.getenv('INTELMQ_PATHS_NO_OPT', False): +- path = "lsb" ++elif os.getenv('INTELMQ_PATHS_OPT', False): ++ path = "opt" + + + if path == "lsb": diff --git a/debian/patches/fix-logrotate-path.patch b/debian/patches/fix-logrotate-path.patch index f95744b6a..9c4a36bb5 100644 --- a/debian/patches/fix-logrotate-path.patch +++ b/debian/patches/fix-logrotate-path.patch @@ -1,11 +1,20 @@ Description: Fix paths in logroate config to match obey FHS -Author: Gernot Schulz -Last-Update: 2016-07-20 +Author: Gernot Schulz , Seastian Wagner +Last-Update: 2020-03-17 --- This patch header follows DEP-3: http://dep.debian.net/deps/dep3/ ---- a/contrib/logrotate/intelmq -+++ b/contrib/logrotate/intelmq -@@ -11,7 +11,7 @@ +--- a/contrib/logrotate/intelmq 2019-05-14 12:28:51.834183000 +0200 ++++ b/contrib/logrotate/intelmq 2020-03-17 11:40:48.391615177 +0100 +@@ -3,7 +3,7 @@ + copytruncate + create 640 intelmq intelmq + +-/opt/intelmq/var/log/*.log { ++/var/log/intelmq/*.log { + su intelmq intelmq + daily + maxsize 10M +@@ -11,11 +11,11 @@ notifempty sharedscripts postrotate @@ -14,6 +23,11 @@ This patch header follows DEP-3: http://dep.debian.net/deps/dep3/ endscript } +-/opt/intelmq/var/lib/bots/file-output/*.txt { ++/var/lib/intelmq/bots/file-output/*.txt { + su intelmq intelmq + daily + maxsize 10M @@ -23,6 +23,6 @@ notifempty sharedscripts diff --git a/debian/rules b/debian/rules index e06d92fa0..73418ccbc 100755 --- a/debian/rules +++ b/debian/rules @@ -34,10 +34,8 @@ override_dh_auto_install: $(BOTDOCS) rm intelmq/bin/rewrite_config_files.py rm intelmq/bin/intelmq_gen_docs.py intelmq/tests/bin/test_gen_docs.py sed -i -e '/#!\/usr\/bin\//d' intelmq/bin/*.py - find . -type f -exec sed -i -f debian/sedfile {} \; + sed -i -f debian/sedfile intelmq/bots/BOTS intelmq/etc/* docs/intelmqctl.md docs/Bots.md setup.py contrib/logrotate/intelmq python3 setup.py install --root=debian/intelmq --prefix=/usr - mv debian/intelmq/etc/intelmq/examples/* debian/intelmq/etc/intelmq/ - rmdir debian/intelmq/etc/intelmq/examples # these are already in /usr/bin/ #rm %{buildroot}/%{python3_sitelib}/intelmq/bots/experts/maxmind_geoip/update-geoip-data #rm %{buildroot}/%{python3_sitelib}/intelmq/bots/experts/asn_lookup/update-asn-data @@ -61,6 +59,8 @@ override_dh_auto_install: $(BOTDOCS) done # remove program not needed for packages rm debian/intelmq/usr/bin/intelmqsetup + # create directory layout and empty state file + ROOT_DIR=debian/intelmq/ PYTHONPATH=. python3 intelmq/bin/intelmqsetup.py --skip-ownership --state-file debian/intelmq/var/lib/intelmq/state.json override_dh_install: dh_install diff --git a/docs/Bots.md b/docs/Bots.md index 71d2d0b51..0f707711d 100644 --- a/docs/Bots.md +++ b/docs/Bots.md @@ -30,6 +30,7 @@ - [Parsers](#parsers) - [Not complete](#not-complete) - [Generic CSV Parser](#generic-csv-parser) + - [Calidog Certstream](#calidog-certstream) - [Cymru CAP Program](#cymru-cap-program) - [Cymru Full Bogons](#cymru-full-bogons) - [HTML Table Parser](#html-table-parser) @@ -39,6 +40,7 @@ - [Experts](#experts) - [Abusix](#abusix) - [ASN Lookup](#asn-lookup) + - [CSV Converter](#csv-converter) - [Copy Extra](#copy-extra) - [Cymru Whois](#cymru-whois) - [Deduplicator](#deduplicator) @@ -54,6 +56,7 @@ - [Gethostbyname](#gethostbyname) - [IDEA](#idea) - [MaxMind GeoIP](#maxmind-geoip) + - [MISP](#misp) - [Modify](#modify) - [Configuration File](#configuration-file) - [Actions](#actions) @@ -80,6 +83,8 @@ - [Filename formatting](#filename-formatting) - [Files](#files) - [McAfee Enterprise Security Manager](#mcafee-enterprise-security-manager) + - [MISP Feed](#misp-feed) + - [MISP API](#misp-api) - [MongoDB](#mongodb) - [Installation Requirements](#installation-requirements) - [Redis](#redis) @@ -185,6 +190,8 @@ Multihreading is disabled for all Collectors, as this would lead to duplicated d ### AMQP +Requires the [`pika` python library](https://pypi.org/project/pika/), minimum version 1.0.0. + #### Information: * `name`: intelmq.bots.collectors.amqp.collector_amqp * `lookup`: yes @@ -199,7 +206,7 @@ Multihreading is disabled for all Collectors, as this would lead to duplicated d * `connection_heartbeat`: Heartbeat to server, in seconds, defaults to 3600 * `connection_host`: Name/IP for the AMQP server, defaults to 127.0.0.1 * `connection_port`: Port for the AMQP server, defaults to 5672 -* `connection_vhost`: Virtual host to connect, on a http(s) connection would be http:/IP/ +* `connection_vhost`: Virtual host to connect, on an http(s) connection would be http:/IP/ * `expect_intelmq_message`: Boolean, if the data is from IntelMQ or not. Default: `false`. If true, then the data can be any Report or Event and will be passed to the next bot as is. Otherwise a new report is created with the raw data. * `password`: Password for authentication on your AMQP server * `queue_name`: The name of the queue to fetch data from @@ -217,7 +224,7 @@ Currently only fetching from a queue is supported can be extended in the future. * `lookup:` yes * `public:` yes * `cache (redis db):` none -* `description:` collect report messages from a HTTP REST API +* `description:` collect report messages from an HTTP REST API #### Configuration Parameters: @@ -251,7 +258,7 @@ You may use a `JSON` specifying [time-delta](https://docs.python.org/3/library/d Zipped files are automatically extracted if detected. -For extracted files, every extracted file is sent in it's own report. Every report has a field named `extra.file_name` with the file name in the archive the content was extracted from. +For extracted files, every extracted file is sent in its own report. Every report has a field named `extra.file_name` with the file name in the archive the content was extracted from. * * * @@ -294,13 +301,14 @@ The parameter `http_timeout_max_tries` is of no use in this collector. * `mail_host`: FQDN or IP of mail server * `mail_user`: user account of the email account * `mail_password`: password associated with the user account +* `mail_port`: IMAP server port, optional (default: 143 without SSL, 993 for SSL) * `mail_ssl`: whether the mail account uses SSL (default: `true`) * `folder`: folder in which to look for mails (default: `INBOX`) * `subject_regex`: regular expression to look for a subject * `url_regex`: regular expression of the feed URL to search for in the mail body * `sent_from`: filter messages by sender * `sent_to`: filter messages by recipient -* `ssl_ca_certificate`: Optional string of path to trusted CA certicate. Applies only to IMAP connections, not HTTP. If the provided certificate is not found, the IMAP connection will fail on handshake. By default, no certificate is used. +* `ssl_ca_certificate`: Optional string of path to trusted CA certificate. Applies only to IMAP connections, not HTTP. If the provided certificate is not found, the IMAP connection will fail on handshake. By default, no certificate is used. The resulting reports contains the following special fields: * `feed.url`: The URL the data was downloaded from @@ -342,6 +350,7 @@ limitation set `chunk_size` to something like `384000000`, i.e., ~384 MB. * `mail_host`: FQDN or IP of mail server * `mail_user`: user account of the email account * `mail_password`: password associated with the user account +* `mail_port`: IMAP server port, optional (default: 143 without SSL, 993 for SSL) * `mail_ssl`: whether the mail account uses SSL (default: `true`) * `folder`: folder in which to look for mails (default: `INBOX`) * `subject_regex`: regular expression to look for a subject @@ -374,12 +383,13 @@ The resulting reports contains the following special fields: * `mail_host`: FQDN or IP of mail server * `mail_user`: user account of the email account * `mail_password`: password associated with the user account +* `mail_port`: IMAP server port, optional (default: 143 without SSL, 993 for SSL) * `mail_ssl`: whether the mail account uses SSL (default: `true`) * `folder`: folder in which to look for mails (default: `INBOX`) * `subject_regex`: regular expression to look for a subject * `sent_from`: filter messages by sender * `sent_to`: filter messages by recipient -* `ssl_ca_certificate`: Optional string of path to trusted CA certicate. Applies only to IMAP connections, not HTTP. If the provided certificate is not found, the IMAP connection will fail on handshake. By default, no certificate is used. +* `ssl_ca_certificate`: Optional string of path to trusted CA certificate. Applies only to IMAP connections, not HTTP. If the provided certificate is not found, the IMAP connection will fail on handshake. By default, no certificate is used. * `content_types`: Which bodies to use based on the content_type. Default: `true`/`['html', 'plain']` for all: - string with comma separated values, e.g. `['html', 'plain']` - `true`, `false`, `null`: Same as default value @@ -392,6 +402,38 @@ The resulting reports contains the following special fields: * * * +### GithubAPI + + +#### Information: +* `name:` intelmq.bots.collectors.github_api.collector_github_contents_api +* `lookup:` yes +* `public:` yes +* `cache (redis db):` none +* `description:` Collects files matched by regex from github repository via the Github API. + Optionally with github credentials, which are used as the Basic HTTP authetication. + +#### Configuration Parameters: + +* **Feed parameters** (see above) +* `basic_auth_username:` Github account username (optional) +* `basic_auth_password:` Github account password (optional) +* `repository:` Github target repository (`/`) +* `regex:` Valid regex of target files within the repository (defaults to `.*.json`) +* `extra_fields:` Comma-separated list of extra fields from [github contents API](https://developer.github.com/v3/repos/contents/) + +#### Workflow + +The optional authentication parameters provide a high limit of the github API requests. +With the github user authentication, the requests are rate limited to 5000 per hour, otherwise to 60 requests per hour. + +The collector recursively searches for `regex`-defined files in the provided `repository`. +Additionally it adds extra file metadata defined by the `extra_fields`. + +The bot always sets the url, from which downloaded the file, as `feed.url`. + +* * * + ### Fileinput #### Information: @@ -439,7 +481,7 @@ If `delete_file` is set, the file will be deleted after processing. If deletion is not possible, the bot will stop. To prevent data loss, the bot also stops when no `postfix` is set and -`delete_file` was set. This can not be overridden. +`delete_file` was set. This cannot be overridden. The bot always sets the file name as feed.url @@ -473,16 +515,18 @@ Requires the rsync executable * `lookup:` yes * `public:` yes * `cache (redis db):` none -* `description:` collect messages from [MISP](https://github.com/MISP), a malware information sharing platform. server. +* `description:` collect messages from [MISP](https://github.com/MISP), a malware information sharing platform server. #### Configuration Parameters: * **Feed parameters** (see above) -* `misp_url`: url of MISP server (with trailing '/') +* `misp_url`: URL of MISP server (with trailing '/') * `misp_key`: MISP Authkey -* `misp_verify`: (default: `true`) * `misp_tag_to_process`: MISP tag for events to be processed -* `misp_tag_processed`: MISP tag for processed events +* `misp_tag_processed`: MISP tag for processed events, optional + +Generic parameters used in this bot: +* `http_verify_cert`: Verify the TLS certicate of the server, boolean (default: `true`) #### Workflow This collector will search for events on a MISP server that have a @@ -516,7 +560,8 @@ If none of the filename matches apply, the contents of the first (RT-) "history" * **Feed parameters** (see above) * **HTTP parameters** (see above) -* `extract_files`: Optional, boolean or list of strings. See documentation of the Generic URL Fetcher for more details. +* `extract_attachment`: Optional, boolean or list of strings. See documentation of the Generic URL Fetcher parameter `extract_files` for more details. +* `extract_download`: Optional, boolean or list of strings. See documentation of the Generic URL Fetcher parameter `extract_files` for more details. * `uri`: url of the REST interface of the RT * `user`: RT username * `password`: RT password @@ -529,7 +574,7 @@ If none of the filename matches apply, the contents of the first (RT-) "history" * `take_ticket`: whether to take the ticket (default: `true`) * `url_regex`: regular expression of an URL to search for in the ticket * `attachment_regex`: regular expression of an attachment in the ticket -* `unzip_attachment`: whether to unzip a found attachment. Only the first file in the archive is used. +* `unzip_attachment`: whether to unzip a found attachment. Only the first file in the archive is used. Deprecated in favor of `extract_attachment`. The parameter `http_timeout_max_tries` is of no use in this collector. @@ -649,7 +694,7 @@ pip3 install -r intelmq/bots/collectors/xmpp/REQUIREMENTS.txt * `xmpp_server`: The domain name of the server of the XMPP-Account (part after the @ sign) * `xmpp_user`: The username of the XMPP-Account the collector shall use (part before the @ sign) * `xmpp_password`: The password of the XMPP-Account -* `xmpp_room`: The room which which has to be joined by the XMPP-Collector (full address room@conference.server.tld) +* `xmpp_room`: The room which has to be joined by the XMPP-Collector (full address room@conference.server.tld) * `xmpp_room_nick`: The username / nickname the collector shall use within the room * `xmpp_room_password`: The password which might be required to join a room - `use_muc` : If this parameter is `true`, the bot will join the room `xmpp_room`. @@ -719,7 +764,7 @@ pip3 install -r intelmq/bots/collectors/blueliv/REQUIREMENTS.txt ### Calidog Certstream A Bot to collect data from the Certificate Transparency Log (CTL) -This bot works based on certstream libary (https://github.com/CaliDog/certstream-python) +This bot works based on certstream library (https://github.com/CaliDog/certstream-python) #### Information: * `name:` intelmq.bots.collectors.calidog.collector_certstream @@ -747,27 +792,30 @@ This bot works based on certstream libary (https://github.com/CaliDog/certstream * **Feed parameters** (see above) * `dxl_config_file`: location of the config file containing required information to connect $ -* `dxl_topic`: the name of the DXL topix to subscribe +* `dxl_topic`: the name of the DXL topic to subscribe * * * ### Microsoft Azure Iterates over all blobs in all containers in an Azure storage. +The Cache is required to memorize which files have already been processed (TTL needs to be high enough to cover the oldest files available!). + +This bot significantly changed in a backwards-incompatible way in IntelMQ Version 2.2.0 to support current versions of the Microsoft Azure Python libraries. #### Information: -* `name:` intelmq.bots.collectors.microsoft.collector_azure -* `lookup:` yes -* `public:` no -* `cache (redis db):` none -* `description:` collect blobs from microsoft azure using their library +* `name`: intelmq.bots.collectors.microsoft.collector_azure +* `lookup`: yes +* `public`: no +* `cache (redis db)`: 5 +* `description`: collect blobs from Microsoft Azure using their library #### Configuration Parameters: +* **Cache parameters** (see above) * **Feed parameters** (see above) -* `account_name`: account name as give by Microsoft -* `account_key`: account key as give by Microsoft -* `delete`: boolean, delete containers and blobs after fetching +* `connection_string`: connection string as given by Microsoft +* `container_name`: name of the container to connect to * * * @@ -846,7 +894,7 @@ Collects tweets from target_timelines. Up to tweet_count tweets from each user a * `include_rts`: whether to include retweets by given screen_name * `consumer_key`: Twitter api login data * `consumer_secret`: Twitter api login data -* `acces_token_key`: Twitter api login data +* `access_token_key`: Twitter api login data * `access_token_secret`: Twitter api login data ### API collector bot @@ -876,6 +924,23 @@ This list is not complete. Look at `intelmq/bots/BOTS` or the list of parsers sh TODO +### AnubisNetworks Cyberfeed Stream + +#### Information +* `name`: `intelmq.bots.parsers.anubisnetworks.parser` +* `lookup`: no +* `public`: yes +* `cache (redis db)`: none +* `description`: parsers data from AnubisNetworks Cyberfeed Stream + +#### Description + +The feed format changes over time. The parser supports at least data from 2016 and 2020. + +#### Configuration parameters + +* `use_malware_familiy_as_classification_identifier`: default: `true`. Use the `malw.family` field as `classification.type`. If `false`, check if the same as `malw.variant`. If it is the same, it is ignored. Otherwise saved as `extra.malware.family`. + ### Generic CSV Parser Lines starting with `'#'` will be ignored. Headers won't be interpreted. @@ -907,8 +972,8 @@ Lines starting with `'#'` will be ignored. Headers won't be interpreted. - parse a value and ignore if it fails `"columns": "source.url|__IGNORE__"` - * `"column_regex_search"`: Optional. A dictionary mapping field names (as given per the columns parameter) to regular expression. The field is evaulated using `re.search`. Eg. to get the ASN out of `AS1234` use: `{"source.asn": "[0-9]*"}`. - * `"default_url_protocol"`: For URLs you can give a defaut protocol which will be pretended to the data. + * `"column_regex_search"`: Optional. A dictionary mapping field names (as given per the columns parameter) to regular expression. The field is evaluated using `re.search`. Eg. to get the ASN out of `AS1234` use: `{"source.asn": "[0-9]*"}`. + * `"default_url_protocol"`: For URLs you can give a default protocol which will be pretended to the data. * `"delimiter"`: separation character of the CSV, e.g. `","` * `"skip_header"`: Boolean, skip the first line of the file, optional. Lines starting with `#` will be skipped additionally, make sure you do not skip more lines than needed! * `time_format`: Optional. If `"timestamp"`, `"windows_nt"` or `"epoch_millis"` the time will be converted first. With the default `null` fuzzy time parsing will be used. @@ -938,6 +1003,25 @@ you can map them to the correct ones. The `type_translation` field can hold a JS * * * +### Calidog Certstream + + +#### Information: +* `name:` intelmq.bots.parsers.calidog.parser_certstream +* `lookup:` no +* `public:` yes +* `cache (redis db):` none +* `description:` parsers data from Certificate Transparency Log + +#### Description + +For each domain in the `leaf_cert.all_domains` object one event with the domain in `source.fqdn` (and `source.ip` as fallback) is produced. +The seen-date is saved in `time.source` and the classification type is `other`. + +* **Feed parameters** (see above) + +* * * + ### Fraunhofer DDos Attack Parser #### Information: @@ -1013,6 +1097,15 @@ http://www.team-cymru.com/bogon-reference.html * * * +### Github Feed + +#### Information + +* `name:` intelmq.bots.parsers.github_feed.parser +* `description:` Parses Feeds available publicly on github (should receive from github_api collector) + +* * * + ### Have I Been Pwned Callback Parser #### Information: @@ -1093,7 +1186,7 @@ Parses breaches and pastes and creates one event per e-mail address. The e-mail "attribute_value": "details" ``` * `"table_index"`: Index of the table if multiple tables present. If `attribute_name` and `attribute_value` given, index according to tables remaining after filtering with table attribute. Default: `0`. - * `"split_column"`: Padded column to be splitted to get values, to be used in conjection with `split_separator` and `split_index`, optional. + * `"split_column"`: Padded column to be split to get values, to be used in conjunction with `split_separator` and `split_index`, optional. * `"split_separator"`: Delimiter string for padded column. * `"split_index"`: Index of unpadded string in returned list from splitting `split_column` with `split_separator` as delimiter string. Default: `0`. E.g. @@ -1159,6 +1252,20 @@ Parses breaches and pastes and creates one event per e-mail address. The e-mail * * * +### Microsoft CTIP Parser + +* `name`: `intelmq.bots.parsers.microsoft.parser_ctip` +* `public`: no +* `cache (redis db)`: none +* `description`: Parses data from the Microsoft CTIP Feed + +#### Description + +Can parse the JSON format provided by the Interflow interface (lists of dictionaries) as well as the format provided by the Azure interface (one dictionary per line). +The provided data differs between the two formats/providers. + +* * * + ### MISP * `name:` intelmq.bots.parsers.misp.parser @@ -1185,7 +1292,7 @@ constants. #### Configuration Parameters: -* `domain_whitelist`: domains to be filetered out +* `domain_whitelist`: domains to be filtered out * `substitutions`: semicolon delimited list of even length of pairs of substitutions (for example: '[.];.;,;.' substitutes '[.]' for '.' and ',' for '.') * `classification_type`: string with a valid classification type as defined in data harmonization * `default_scheme`: Default scheme for URLs if not given. See also the next section. @@ -1212,7 +1319,7 @@ This does not affect URLs which already include the scheme. #### How this bot works? -There are two possibilities TODO. +There are two possibilities for the bot to determine which feed the data belongs to in order to determine the correct mapping of the columns: #### Automatic feed detection Since IntelMQ version 2.1 the parser can detect the feed based on metadata provided by the collector. @@ -1224,6 +1331,7 @@ If this lookup is not possible, and the feed name is not given as parameter, the The field `extra.file_name` has the following structure: `%Y-%m-%d-${report_name}[-suffix].csv` where suffix can be something like `country-geo`. For example, some possible filenames are `2019-01-01-scan_http-country-geo.csv` or `2019-01-01-scan_tftp.csv`. The important part is `${report_name}`, between the date and the suffix. +Since version 2.1.2 the date in the filename is optional, so filenames like `scan_tftp.csv` are also detected. #### Fixed feed name If the method above is not possible and for upgraded instances, the feed can be set with the `feedname` parameter. @@ -1266,11 +1374,13 @@ These are the supported feed name and their corresponding file name for automati | Open-DB2-Discovery-Service | `scan_db2` | | Open-Elasticsearch | `scan_elasticsearch` | | Open-IPMI | `scan_ipmi` | +| Open-IPP | `scan_ipp` | | Open-LDAP | `scan_ldap ` | | Open-LDAP-TCP | `scan_ldap_tcp` | | Open-mDNS | `scan_mdns` | | Open-Memcached | `scan_memcached` | | Open-MongoDB | `scan_mongodb` | +| Open-MQTT | `scan_mqtt` | | Open-MSSQL | `scan_mssql` | | Open-NATPMP | `scan_nat_pmp` | | Open-NetBIOS-Nameservice | `scan_netbios` | @@ -1305,7 +1415,7 @@ Add a new feedformat and conversions if required to the file `config.py`. Don't forget to update the `feed_idx` dict. It is required to look up the correct configuration. -Look a the documentation in the bots's `config.py` file for more information. +Look at the documentation in the bots's `config.py` file for more information. * * * @@ -1406,6 +1516,28 @@ Note: the '<' '>' characters only are syntactic markings, no shell redirection i * * * +### CSV Converter + + +#### Information: +* `name`: `intelmq.bots.experts.csv_converter.expert +* `lookup`: no +* `public`: yes +* `cache (redis db)`: none +* `description`: Converts an event to CSV format, saved in the `output` field. + +#### Configuration Parameters: + + * `delimiter`: String, default `","` + * `fieldnames`: Comma-separated list of field names, e.g. `"time.source,classification.type,source.ip"` + +#### Usage + +To use the CSV-converted data in an output bot - for example in a file output, +use the configuration parameter `single_key` of the output bot and set it to `output`. + +* * * + ### Copy Extra #### Information: @@ -1431,6 +1563,8 @@ FIXME * `cache (redis db):` 5 * `description:` IP to geolocation, ASN, BGP prefix +Public documentation: https://www.team-cymru.com/IP-ASN-mapping.html#dns + #### Configuration Parameters: * **Cache parameters** (see in section [common parameters](#common-parameters)) @@ -1501,11 +1635,11 @@ And additionally the exceptions, together with the above wildcard rule: #### Configuration Parameters: * **Cache parameters** (see in section [common parameters](#common-parameters)) -* `bypass`- true or false value to bypass the eduplicator. When set to true, messages will not be deduplicated. Default: false +* `bypass`- true or false value to bypass the deduplicator. When set to true, messages will not be deduplicated. Default: false ##### Parameters for "fine-grained" deduplication -* `filter_type`: type of the filtering which can be "blacklist" or "whitelist". The filter type will be used to define how Deduplicator bot will interpret the the parameter `filter_keys` in order to decide whether an event has already been seen or not, i.e., duplicated event or a completely new event. +* `filter_type`: type of the filtering which can be "blacklist" or "whitelist". The filter type will be used to define how Deduplicator bot will interpret the parameter `filter_keys` in order to decide whether an event has already been seen or not, i.e., duplicated event or a completely new event. * "whitelist" configuration: only the keys listed in `filter_keys` will be considered to verify if an event is duplicated or not. * "blacklist" configuration: all keys except those in `filter_keys` will be considered to verify if an event is duplicated or not. * `filter_keys`: string with multiple keys separated by comma. Please note that `time.observation` key will not be considered even if defined, because the system always ignore that key. @@ -1544,6 +1678,14 @@ The bot with this configuration will detect duplication based on all keys, excep } ``` +#### Flushing the cache + +To flush the deduplicator's cache, you can use the `redis-cli` tool. Enter the database used by the bot and submit the `flushdb` command: +``` +redis-cli -n 6 +flushdb +``` + * * * ### DO Portal Expert Bot @@ -1635,6 +1777,8 @@ Examples of time filter definition: | drop | ✓ | ✗ | ✓ | ✓ | ✗ | | drop | ✗ | ✓ | ✗ | ✗ | ✓ | +In `DEBUG` logging level, one can see that the message is sent to both matching paths, also if one of the paths is not configured. Of course the message is only delivered to the configured paths. + * * * ### Format Field @@ -1742,7 +1886,7 @@ Converts the event to IDEA format and saves it as JSON in the field `output`. Al Documentation about IDEA: https://idea.cesnet.cz/en/index #### Information: -* `name:` idea +* `name:` intelmq.bots.experts.idea.expert * `lookup:` local config * `public:` yes * `cache (redis db):` none @@ -1757,7 +1901,7 @@ Documentation about IDEA: https://idea.cesnet.cz/en/index ### MaxMind GeoIP #### Information: -* `name:` maxmind-geoip +* `name:` intelmq.bots.experts.maxmind_geoip.expert * `lookup:` local database * `public:` yes * `cache (redis db):` none @@ -1767,8 +1911,7 @@ Documentation about IDEA: https://idea.cesnet.cz/en/index The bot requires the maxmind's `geoip2` Python library, version 2.2.0 has been tested. -The database is available at https://geolite.maxmind.com/download/geoip/database/GeoLite2-City.mmdb.gz -You need to unzip it. +To download the database a free license key is required. More information can be found at https://blog.maxmind.com/2019/12/18/significant-changes-to-accessing-and-using-geolite2-databases/ You may want to use a shell script provided in the contrib directory to keep the database up to date: `contrib/cron-jobs/update-geoip-data` @@ -1778,6 +1921,25 @@ You may want to use a shell script provided in the contrib directory to keep the * `overwrite`: boolean * `use_registered`: boolean. MaxMind has two country ISO codes: One for the physical location of the address and one for the registered location. Default is `false` (backwards-compatibility). See also https://github.com/certtools/intelmq/pull/1344 for a short explanation. +### MISP + +Queries a MISP instance for the `source.ip` and adds the MISP Attribute UUID and MISP Event ID of the newest attribute found. + +#### Information: +* `name:` intelmq.bots.experts.misp.expert +* `lookup:` yes +* `public:` no +* `cache (redis db):` none +* `description:` IP address to MISP attribute and event + +#### Configuration Parameters: + +* `misp_key`: MISP Authkey +* `misp_url`: URL of MISP server (with trailing '/') + +Generic parameters used in this bot: +* `http_verify_cert`: Verify the TLS certicate of the server, boolean (default: `true`) + * * * ### McAfee Active Response Hash lookup @@ -1846,6 +2008,8 @@ You may want to use a shell script provided in the contrib directory to keep the * `configuration_path`: filename * `case_sensitive`: boolean, default: true +* `maximum_matches`: Maximum number of matches. Processing stops after the limit is reached. Default: no limit (`null`, `0`). +* `overwrite`: Overwrite any existing fields by matching rules. Default if the parameter is given: `true`, for backwards compatibility. Default will change to `false` in version 3.0.0. #### Configuration File @@ -1954,7 +2118,7 @@ For boolean values, the comparison value needs to be `true` or `false` as in JSO #### Configuration Parameters: -* `filter`: (true/false) act as a a filter for AT. +* `filter`: (true/false) act as a filter for AT. * `overwrite_cc`: set to true if you want to overwrite any potentially existing cc fields in the event. * * * @@ -1968,7 +2132,7 @@ This Bot tags events with score found in recorded futures large IP risklist. * `lookup:` local database * `public:` no * `cache (redis db):` none -* `description:` Record risk score associated to source and destination IP if they are present. Assigns 0 to to IPs not in the RF list. +* `description:` Record risk score associated to source and destination IP if they are present. Assigns 0 to IP addresses not in the RF list. #### Configuration Parameters: @@ -2229,7 +2393,7 @@ Example: or with the command `keep`. The named queue needs to configured in the pipeline, see the User Guide for more information. - ```path 'named-queue``` + ```path 'named-queue'``` * `drop` marks the event to be dropped. The event will not be forwarded to the next bot in the pipeline. The sieve file processing is interrupted upon @@ -2315,7 +2479,7 @@ writes it to `source.fqdn` or `destination.fqdn` if it is a hostname, or * `lookup:` none * `public:` yes * `cache (redis db):` none -* `description:` Waits for a some time or until a queue size is lower than a given numer. +* `description:` Waits for a some time or until a queue size is lower than a given number. #### Configuration Parameters: @@ -2343,6 +2507,8 @@ Note that SIGHUPs and reloads interrupt the sleeping. Sends data to an AMQP Server See https://www.rabbitmq.com/tutorials/amqp-concepts.html for more details on amqp topic exchange. +Requires the [`pika` python library](https://pypi.org/project/pika/). + #### Information * `name`: `intelmq.bots.outputs.amqptopic.output` * `lookup`: to the amqp server @@ -2356,7 +2522,7 @@ See https://www.rabbitmq.com/tutorials/amqp-concepts.html for more details on am * connection_heartbeat : Heartbeat to server, in seconds, defaults to 3600 * connection_host : Name/IP for the AMQP server, defaults to 127.0.0.1 * connection_port : Port for the AMQP server, defaults to 5672 -* connection_vhost : Virtual host to connect, on a http(s) connection would be http:/IP/ +* connection_vhost : Virtual host to connect, on an http(s) connection would be http:/IP/ * content_type : Content type to deliver to AMQP server, currently only supports "application/json" * delivery_mode : 1 - Non-persistent, 2 - Persistent. On persistent mode, messages are delivered to 'durable' queues and will be saved to disk. * exchange_durable : If set to True, the exchange will survive broker restart, otherwise will be a transient exchange. @@ -2366,10 +2532,10 @@ See https://www.rabbitmq.com/tutorials/amqp-concepts.html for more details on am * password : Password for authentication on your AMQP server * require_confirmation : If set to True, an exception will be raised if a confirmation error is received * routing_key : The routing key for your amqptopic -* `single_key` : Only send a the field instead of the full event (expecting a field name as string) +* `single_key` : Only send the field instead of the full event (expecting a field name as string) * username : Username for authentication on your AMQP server * `use_ssl` : Use ssl for the connection, make sure to also set the correct port, usually 5671 (`true`/`false`) -* message_hierarchical_output: Convert the message to hierachical JSON, default: false +* message_hierarchical_output: Convert the message to hierarchical JSON, default: false * message_with_type : Include the type in the sent message, default: false * message_jsondict_as_string: Convert fields of type JSONDict (extra) as string, default: false @@ -2418,6 +2584,8 @@ This output bot discards all incoming messages. * `cache`: no * `description`: Output Bot that sends events to Elasticsearch +Only ElasticSearch version 7 supported. + #### Configuration parameters: * `elastic_host`: Name/IP for the Elasticsearch server, defaults to 127.0.0.1 @@ -2431,15 +2599,14 @@ This output bot discards all incoming messages. 'weekly' --> intelmq-2018-42 'monthly' --> intelmq-2018-02 'yearly' --> intelmq-2018 -* `elastic_doctype`: Elasticsearch document type for the event. Default: events -* `http_username`: http_auth basic username -* `http_password`: http_auth basic password +* `http_username`: HTTP basic authentication username +* `http_password`: HTTP basic authentication password * `use_ssl`: Whether to use SSL/TLS when connecting to Elasticsearch. Default: False * `http_verify_cert`: Whether to require verification of the server's certificate. Default: False * `ssl_ca_certificate`: An optional path to a certificate bundle to use for verifying the server -* `ssl_show_warnings`: Whether to show warnings if the server's certificate can not be verified. Default: True +* `ssl_show_warnings`: Whether to show warnings if the server's certificate cannot be verified. Default: True * `replacement_char`: If set, dots ('.') in field names will be replaced with this character prior to indexing. This is for backward compatibility with ES 2.X. Default: null. Recommended for ES2.X: '_' -* `flatten_fields1: In ES, some query and aggregations work better if the fields are flat and not JSON. Here you can provide a list of fields to convert. +* `flatten_fields`: In ES, some query and aggregations work better if the fields are flat and not JSON. Here you can provide a list of fields to convert. Can be a list of strings (fieldnames) or a string with field names separated by a comma (,). eg `extra,field2` or `['extra', 'field2']` Default: ['extra'] @@ -2472,7 +2639,7 @@ Multihreading is disabled for this bot, as this would lead to corrupted files. * `encoding_errors_mode`: By default `'strict'`, see for more details and options: https://docs.python.org/3/library/functions.html#open For example with `'backslashreplace'` all characters which cannot be properly encoded will be written escaped with backslashes. * `file`: file path of output file. Missing directories will be created if possible with the mode 755. * `format_filename`: Boolean if the filename should be formatted (default: `false`). -* `hierarchial_output`: If true, the resulting dictionary will be hierarchical (field names split by dot). +* `hierarchical_output`: If true, the resulting dictionary will be hierarchical (field names split by dot). * `single_key`: if `none`, the whole event is saved (default); otherwise the bot saves only contents of the specified key. In case of `raw` the data is base64 decoded. ##### Filename formatting @@ -2523,7 +2690,86 @@ If the field used in the format string is not defined, `None` will be used as fa * `esm_user`: username of user entitled to write to watchlist * `esm_pw`: password of user * `esm_watchlist`: name of the watchlist to write to -* `field`: name of the intelMQ field to be written to ESM +* `field`: name of the IntelMQ field to be written to ESM + +* * * + +### MISP Feed + +#### Information: +* `name:` `intelmq.bots.outputs.misp.output_feed` +* `lookup:` no +* `public:` no +* `cache (redis db):` none +* `description:` Create a directory layout in the MISP Feed format + +The PyMISP library >= 2.4.119.1 is required, see +[REQUIREMENTS.txt](../intelmq/bots/outputs/misp/REQUIREMENTS.txt). + +#### Configuration Parameters: + +* **Feed parameters** (see above) +* `misp_org_name`: Org name which creates the event, string +* `misp_org_uuid`: Org UUID which creates the event, string +* `output_dir`: Output directory path, e.g. `/opt/intelmq/var/lib/bots/mispfeed-output`. Will be created if it does not exist and possible. +* `interval_event`: The output bot creates one event per each interval, all data in this time frame is part of this event. Default "1 hour", string. + +#### Usage in MISP + +Configure the destination directory of this feed as feed in MISP, either as local location, or served via a web server. See [the MISP documentation on Feeds](https://www.circl.lu/doc/misp/managing-feeds/) for more information + +* * * + +### MISP API + +#### Information: +* `name:` `intelmq.bots.outputs.misp.output_api` +* `lookup:` no +* `public:` no +* `cache (redis db):` none +* `description:` Connect to a MISP instance and add event as MISPObject if not there already. + +The PyMISP library >= 2.4.120 is required, see +[REQUIREMENTS.txt](../intelmq/bots/outputs/misp/REQUIREMENTS.txt). + +#### Configuration Parameters: + +* **Feed parameters** (see above) +* `add_feed_provider_as_tag`: bool (use `true` when in doubt) +* `add_feed_name_as_tag`: bool (use `true` when in doubt) +* `misp_additional_correlation_fields`: list of fields for which + the correlation flags will be enabled (in addition to those which are + in significant_fields) +* `misp_additional_tags`: list of tags to set not be searched for + when looking for duplicates +* `misp_key`: str, API key for accessing MISP +* `misp_publish`: bool, if a new MISP event should be set to "publish". + Expert setting as MISP may really make it "public"! + (Use `false` when in doubt.) +* `misp_tag_for_bot`: str, used to mark MISP events +* `misp_to_ids_fields`: list of fields for which the `to_ids` flags will be set +* `misp_url`: str, URL of the MISP server +* `significant_fields`: list of intelmq field names + +The `significant_fields` values +will be searched for in all MISP attribute values +and if all values are found in the same MISP event, no new MISP event +will be created. +Instead if the existing MISP events have the same feed.provider +and match closely, their timestamp will be updated. + +If a new MISP event is inserted the `significant_fields` and the +`misp_additional_correlation_fields` will be the attributes +where correlation is enabled. + +Make sure to build the IntelMQ Botnet in a way the rate of incoming +events is what MISP can handle, as IntelMQ can process many more events faster +than MISP (which is by design as MISP is for manual handling). +Also remove the fields of the IntelMQ events with an expert bot +that you do not want to be inserted into MISP. + +(More details can be found in the docstring of +[`output_api.py`](../intelmq/bots/outputs/misp/output_api.py)). * * * @@ -2545,7 +2791,7 @@ Saves events in a MongoDB either as hierarchical structure or flat with full key * `db_user` : Database user that should be used if you enabled authentication * `db_pass` : Password associated to `db_user` * `host`: MongoDB host (FQDN or IP) -* `port`: MongoDB port +* `port`: MongoDB port, default: 27017 * `hierarchical_output`: Boolean (default true) as mongodb does not allow saving keys with dots, we split the dictionary in sub-dictionaries. * `replacement_char`: String (default `'_'`) used as replacement character for the dots in key names if hierarchical output is not used. @@ -2555,7 +2801,7 @@ Saves events in a MongoDB either as hierarchical structure or flat with full key pip3 install pymongo>=2.7.1 ``` -The bot has been tested with pymongo versions 2.7.1 and 3.4. +The bot has been tested with pymongo versions 2.7.1, 3.4 and 3.10.1 (server versions 2.6.10 and 3.6.8). * * * @@ -2574,7 +2820,7 @@ The bot has been tested with pymongo versions 2.7.1 and 3.4. * `redis_password`: remote server password * `redis_queue`: remote server list (queue), e.g.: "remote-server-queue" * `redis_server_ip`: remote server IP address, e.g.: 127.0.0.1 -* `redis_server_port`: remote server Port, e.g: 6379 +* `redis_server_port`: remote server Port, e.g.: 6379 * `redis_timeout`: Connection timeout, in msecs, e.g.: 50000 * `hierarchical_output`: whether output should be sent in hierarchical json format (default: false) * `with_type`: Send the `__type` field (default: true) @@ -2748,7 +2994,7 @@ psql -h localhost intelmq-events intelmq "10.10.10.10" + port => 6379 + db => 4 + data_type => "list" + key => "logstash-queue" + } +} +``` +* `host` - same as redis_server_ip from the Redis Output Bot +* `port` - the redis_server_port from the Redis Output Bot +* `db` - the redis_db parameter from the Redis Output Bot +* `data_type` - set to `list` +* `key` - same as redis_queue from the Redis Output Bot + +#### Notes + +* You can also use syntax like this: `host => "${REDIS_HOST:10.10.10.10}"`\ + The value will be taken from environment variable `$REDIS_HOST`. If the environment variable is not defined then the default value of `10.10.10.10` will be used instead. + +### Filter (optional) + +Before passing the data to the database you can apply certain changes. This is done with filters. See an example: +``` +filter { + mutate { + lowercase => ["source.geolocation.city", "classification.identifier"] + remove_field => ["__type", "@version"] + } + date { + match => ["time.observation", "ISO8601"] + } +} +``` + +#### Notes + +* It is not recommended to apply any modifications to the data (within the `mutate` key) outside of the IntelMQ. All necessary modifications should be done only by appropriate IntelMQ bots. This example only demonstrates the possibility. + +* It is recommended to use the `date` filter: generally we have two timestamp fields - `time.source` (provided by the feed source this can be understood as when the event happend; however it is not always present) and `time.observation` (when IntelMQ collected this event). Logstash also adds another field `@timestamp` with time of processing by Logstash. While it can be useful for debugging, I recommend to set the `@timestamp` to the same value as `time.observation`. + +### Output + +The pipeline also needs output, where we define our database (Elasticsearch). The simplest way of doing so is defining an output like this: +``` +output { + elasticsearch { + hosts => ["http://10.10.10.11:9200", "http://10.10.10.12:9200"] + index => "intelmq-%{+YYYY.MM}" + } +} +``` +* `hosts` - Elasticsearch host (or more) with the correct port (9200 by default) +* `index` - name of the index where to insert data + +#### Notes +* Authors experience, hardware equipment and the amount of events collected led to having a separate index for each month. This might not necessarily suit your needs, but is a suggested option. + +* By default the ELK stack uses unsecure HTTP. It is possible to setup Security for secure connections and basic user management. This is possible with the Basic (free) licence since versions 6.8.0 and 7.1.0. + +## Configuring Elasticsearch + +Configuring Elasticsearch is entirely up to you and should be consulted with the [official documentation](https://www.elastic.co/guide/en/elasticsearch/reference/index.html). What you will most likely need is something called [index template](https://www.elastic.co/guide/en/elasticsearch/reference/current/indices-templates.html) mappings. IntelMQ provides a tool for generating such mappings. See [ElasticMapper Tool](../contrib/elasticsearch/README.md). + +#### Notes + +* Default installation of Elasticsearch database allows anyone with cURL and connection capability administrative access to the database. Make sure you secure your toys! diff --git a/docs/Ecosystem.md b/docs/Ecosystem.md index 6671e3e79..f3247a162 100644 --- a/docs/Ecosystem.md +++ b/docs/Ecosystem.md @@ -22,6 +22,8 @@ This is not a software itself but listed here because the term it is often menti The EventDB is a (usually PostgreSQL) database with data from intelmq. +For some related scripts see the [contrib/eventdb](../contrib/eventdb) directory and the [eventdb-stats](https://github.com/wagner-certat/eventdb-stats) repository for simple statistics generation. + ## intelmq-webinput-csv A web-based interface to inject CSV data into IntelMQ with on-line validation and live feedback. @@ -55,3 +57,9 @@ https://github.com/certat/do-portal/ A Grafana-based statistics portal for the EventDB. Integrated in do-portal. https://github.com/certtools/stats-portal/ + +## Malware Name Mapping + +A mapping for malware names of different feeds with different namings to a common family name. + +https://github.com/certtools/malware_name_mapping diff --git a/docs/FAQ.md b/docs/FAQ.md index ea4301116..7bac7241e 100644 --- a/docs/FAQ.md +++ b/docs/FAQ.md @@ -3,7 +3,6 @@ **Table of Contents:** - [Send IntelMQ events to Splunk](#send-intelmq-events-to-splunk) -- [Git information](#git-information) - [Permission denied when using redis unix socket](#permission-denied-when-using-redis-unix-socket) - [Why is the time invalid?](#why-is-the-time-invalid) - [How can I improve the speed?](#how-can-i-improve-the-speed) diff --git a/docs/Feeds-whishlist.md b/docs/Feeds-whishlist.md new file mode 100644 index 000000000..4f6550525 --- /dev/null +++ b/docs/Feeds-whishlist.md @@ -0,0 +1,75 @@ +# Feeds whishlist + +This is a list with various feeds currently not supported in IntelMQ. + +If you want to **contribute** documenting how to configure existing bots in order to collect new feeds or by creating new parsers, here is a list of potentially interesting feeds. +See [Feeds documentation](Developers-Guide.md#feeds-documentation) for more information on this. + +This list evolved from the issue [Contribute: Feeds List (#384)](https://github.com/certtools/intelmq/issues/384). + +- A list of feeds: https://threatfeeds.io/ +- Some third party intelmq bots: https://github.com/NRDCS/intelmq/tree/certlt/intelmq/bots + +- [Abuse.ch SSL Blacklists](https://sslbl.abuse.ch/blacklist/) +- [Adblock Plus Malwaredomains](https://easylist-msie.adblockplus.org/malwaredomains_full.tpl) +- [APWG's ecrimex](https://www.ecrimex.net) +- [apivoid IP Reputation API](https://www.apivoid.com/api/ip-reputation/) +- [Bad IPs](https://www.badips.com) +- [Berkeley](https://security.berkeley.edu/aggressive_ips/ips) +- [Binary Defense](https://www.binarydefense.com/) +- [Bot Invaders Realtime tracker](http://www.marc-blanchard.com/BotInvaders/index.php) +- [Botscout Last Caught](http://botscout.com/last_caught_cache.htm) +- [Carbon Black Feeds](https://github.com/carbonblack/cbfeeds) +- [Chaos Reigns](http://www.chaosreigns.com/spam/) +- [Critical Stack](https://intel.criticalstack.com) +- [Cruzit](http://www.cruzit.com/xwbl2txt.php) +- [Cyber Crime Tracker](http://cybercrime-tracker.net/all.php) +- [DNS DB API](https://api.dnsdb.info) +- [Dyn DNS](http://security-research.dyndns.org/pub/) +- [Facebook Threat Exchange](https://developers.facebook.com/docs/threat-exchange) +- [FilterLists](https://filterlists.com) +- [Firehol IPLists](https://iplists.firehol.org/) +- [Google Webmaster Alerts](https://www.google.com/webmasters/) +- [GPF Comics DNS Blacklist](https://www.gpf-comics.com/dnsbl/export.php) +- [Greensnow](https://blocklist.greensnow.co/greensnow.txt) +- [HP Feeds](https://github.com/rep/hpfeeds) +- [IBM X-Force Exchange](https://exchange.xforce.ibmcloud.com/) +- [ISC SANS](https://isc.sans.edu/ipsascii.html) +- [ISightPartners](http://www.isightpartners.com/) +- [Joewein](http://www.joewein.net) +- [Malshare](https://malshare.com/) +- [Malware Config](http://malwareconfig.com) +- [Malware DB (cert.pl)](https://mwdb.cert.pl/) +- [MalwareDomainList](http://www.malwaredomainlist.com/zeuscsv.php) +- [MalwareDomains](http://www.malwaredomainlist.com/hostslist/yesterday_urls.php) +- [MalwareInt](http://malwareint.com) +- [Manity Spam IP addresses](http://www.dnsbl.manitu.net/download/nixspam-ip.dump.gz) +- [Marc Blanchard DGA Domains](http://www.marc-blanchard.com/BotInvaders/index.php) +- [MaxMind Proxies](https://www.maxmind.com/en/anonymous_proxies) +- [mIRC Servers](http://www.mirc.com/servers.ini) +- [Monzymerza](https://github.com/monzymerza/parthenon) +- [Multiproxy](http://multiproxy.org/txt_all/proxy.txt) +- [MVPS](http://mvps.org) +- [Null Secure](http://nullsecure.org) +- [OpenBugBounty](https://www.openbugbounty.org/) +- [Payload Security](http://payload-security.com) +- [Project Honeypot (#284)](http://www.projecthoneypot.org/list_of_ips.php?rss=1) +- [ShadowServer Sandbox API](http://www.shadowserver.org/wiki/pmwiki.php/Services/Sandboxapi) +- [Shodan search API](https://shodan.readthedocs.io/en/latest/tutorial.html#searching-shodan) +- [Snort](http://labs.snort.org/feeds/ip-filter.blf) +- [Spamhaus BGP feed (BGPf)](https://www.spamhaus.org/bgpf/) +- [SteveBlack Hosts File](https://github.com/StevenBlack/hosts) +- [TheCyberThreat](http://thecyberthreat.com/cyber-threat-intelligence-feeds/) +- [The Haleys](http://charles.the-haleys.org/ssh_dico_attack_hdeny_format.php/hostsdeny.txt) +- [Threat Crowd](https://www.threatcrowd.org/feeds/hashes.txt) +- [Threat Grid](http://www.threatgrid.com/) +- [Threatstream](https://ui.threatstream.com/) +- [TOR Project Exit addresses](https://check.torproject.org/exit-addresses) +- [TotalHash](http://totalhash.com) +- [UCE Protect](http://wget-mirrors.uceprotect.net/) +- [URI BL](http://rss.uribl.com/index.shtml) +- [Virustotal](https://www.virustotal.com/gui/home/search) +- [virustream](https://github.com/ntddk/virustream) +- [VoIP Blacklist](http://www.voipbl.org/update/) +- [Wordpress Callback Domains](http://callbackdomains.wordpress.com) +- [YourCMC](http://vmx.yourcmc.ru/BAD_HOSTS.IP4) diff --git a/docs/Feeds.md b/docs/Feeds.md index 4974649ad..89d1a528e 100644 --- a/docs/Feeds.md +++ b/docs/Feeds.md @@ -12,7 +12,6 @@ To add feeds to this file add them to `intelmq/etc/feeds.yaml` and then run - [AnubisNetworks](#anubisnetworks) - [Autoshun](#autoshun) - [Bambenek](#bambenek) -- [Bitcash](#bitcash) - [Blocklist.de](#blocklistde) - [Blueliv](#blueliv) - [CERT.PL](#certpl) @@ -35,20 +34,18 @@ To add feeds to this file add them to `intelmq/etc/feeds.yaml` and then run - [McAfee Advanced Threat Defense](#mcafee-advanced-threat-defense) - [Microsoft](#microsoft) - [Netlab 360](#netlab-360) -- [Nothink](#nothink) - [OpenPhish](#openphish) -- [OpenPhish Commercial](#openphish-commercial) - [PhishTank](#phishtank) - [PrecisionSec](#precisionsec) - [ShadowServer](#shadowserver) - [Spamhaus](#spamhaus) +- [Strangereal Intel](#strangereal-intel) - [Sucuri](#sucuri) - [Surbl](#surbl) - [Taichung](#taichung) - [Team Cymru](#team-cymru) - [Threatminer](#threatminer) - [Turris](#turris) -- [URLVir](#urlvir) - [University of Toulouse](#university-of-toulouse) - [VXVault](#vxvault) - [ViriBack](#viriback) @@ -63,8 +60,9 @@ To add feeds to this file add them to `intelmq/etc/feeds.yaml` and then run ## Feodo Tracker Browse -* **Status:** on -* **Revision:** 19-03-2019 +* **Public:** yes +* **Revision:** 2019-03-19 +* **Documentation:** https://feodotracker.abuse.ch/browse * **Description:** ### Collector @@ -88,8 +86,9 @@ To add feeds to this file add them to `intelmq/etc/feeds.yaml` and then run ## Feodo Tracker IPs -* **Status:** on -* **Revision:** 25-03-2019 +* **Public:** yes +* **Revision:** 2019-03-25 +* **Documentation:** https://feodotracker.abuse.ch/ * **Description:** List of botnet Command&Control servers (C&Cs) tracked by Feodo Tracker, associated with Dridex and Emotet (aka Heodo). * **Additional Information:** https://feodotracker.abuse.ch/ The data in the column Last Online is used for `time.source` if available, with 00:00 as time. Otherwise first seen is used as `time.source`. @@ -108,31 +107,11 @@ To add feeds to this file add them to `intelmq/etc/feeds.yaml` and then run * **Configuration Parameters:** -## Ransomware Tracker - -* **Status:** on -* **Revision:** 20-01-2018 -* **Description:** Ransomware Tracker feed includes FQDN's, URL's, and known IP addresses that were used for said FQDN's and URL's for various ransomware families. - -### Collector - -* **Module:** intelmq.bots.collectors.http.collector_http -* **Configuration Parameters:** -* * `http_url`: `https://ransomwaretracker.abuse.ch/feeds/csv/` -* * `name`: `Ransomware Tracker` -* * `provider`: `Abuse.ch` -* * `rate_limit`: `129600` - -### Parser - -* **Module:** intelmq.bots.parsers.abusech.parser_ransomware -* **Configuration Parameters:** - - ## URLhaus -* **Status:** on -* **Revision:** 14-02-2019 +* **Public:** yes +* **Revision:** 2019-02-14 +* **Documentation:** https://urlhaus.abuse.ch/ * **Description:** URLhaus is a project from abuse.ch with the goal of sharing malicious URLs that are being used for malware distribution. URLhaus offers a country, ASN (AS number) and Top Level Domain (TLD) feed for network operators / Internet Service Providers (ISPs), Computer Emergency Response Teams (CERTs) and domain registries. ### Collector @@ -154,54 +133,13 @@ To add feeds to this file add them to `intelmq/etc/feeds.yaml` and then run * * `type_translation`: `{"malware_download": "malware-distribution"}` -## Zeus Tracker Domains - -* **Status:** off -* **Revision:** 20-01-2018 -* **Description:** The ZeuS domain blocklist (BadDomains) is the recommended blocklist if you want to block only ZeuS domain names. It has domain names that ZeuS Tracker believes to be hijacked (level 2). Hence the false positive rate should be much lower compared to the standard ZeuS domain blocklist. - -### Collector - -* **Module:** intelmq.bots.collectors.http.collector_http -* **Configuration Parameters:** -* * `http_url`: `https://zeustracker.abuse.ch/blocklist.php?download=baddomains` -* * `name`: `Zeus Tracker Domains` -* * `provider`: `Abuse.ch` -* * `rate_limit`: `129600` - -### Parser - -* **Module:** intelmq.bots.parsers.abusech.parser_domain -* **Configuration Parameters:** - - -## Zeus Tracker IPs - -* **Status:** off -* **Revision:** 20-01-2018 -* **Description:** This list only includes IPv4 addresses that are used by the ZeuS Trojan. It is the recommended list if you want to block only ZeuS IPs. It excludes IP addresses that ZeuS Tracker believes to be hijacked (level 2) or belong to a free web hosting provider (level 3). Hence the false positive rate should be much lower compared to the standard ZeuS IP blocklist. - -### Collector - -* **Module:** intelmq.bots.collectors.http.collector_http -* **Configuration Parameters:** -* * `http_url`: `https://zeustracker.abuse.ch/blocklist.php?download=badips` -* * `name`: `Zeus Tracker IPs` -* * `provider`: `Abuse.ch` -* * `rate_limit`: `129600` - -### Parser - -* **Module:** intelmq.bots.parsers.abusech.parser_ip -* **Configuration Parameters:** - - # AlienVault ## OTX -* **Status:** on -* **Revision:** 20-01-2018 +* **Public:** unknown +* **Revision:** 2018-01-20 +* **Documentation:** https://otx.alienvault.com/ * **Description:** AlienVault OTX Collector is the bot responsible to get the report through the API. Report could vary according to subscriptions. ### Collector @@ -220,8 +158,8 @@ To add feeds to this file add them to `intelmq/etc/feeds.yaml` and then run ## Reputation List -* **Status:** off -* **Revision:** 20-01-2018 +* **Public:** yes +* **Revision:** 2018-01-20 * **Description:** List of malicious IPs. ### Collector @@ -243,9 +181,10 @@ To add feeds to this file add them to `intelmq/etc/feeds.yaml` and then run ## Cyberfeed Stream -* **Status:** on -* **Revision:** 20-01-2018 -* **Description:** AnubisNetworks Collector is the bot responsible to get AnubisNetworks Cyberfeed Stream. +* **Public:** unknown +* **Revision:** 2020-06-15 +* **Documentation:** https://www.anubisnetworks.com/ https://www.bitsight.com/ +* **Description:** Fetches and parsers the Cyberfeed data stream. ### Collector @@ -260,14 +199,16 @@ To add feeds to this file add them to `intelmq/etc/feeds.yaml` and then run * **Module:** intelmq.bots.parsers.anubisnetworks.parser * **Configuration Parameters:** +* * `use_malware_familiy_as_classification_identifier`: `True` # Autoshun ## Shunlist -* **Status:** off -* **Revision:** 20-01-2018 +* **Public:** unknown +* **Revision:** 2018-01-20 +* **Documentation:** https://www.autoshun.org/ * **Description:** You need to register in order to use the list. ### Collector @@ -289,15 +230,19 @@ To add feeds to this file add them to `intelmq/etc/feeds.yaml` and then run ## C2 Domains -* **Status:** on -* **Revision:** 20-01-2018 -* **Description:** Master Feed of known, active and non-sinkholed C&Cs domain names. License: https://osint.bambenekconsulting.com/license.txt +* **Public:** unknown +* **Revision:** 2018-01-20 +* **Documentation:** https://osint.bambenekconsulting.com/feeds/ +* **Description:** Master Feed of known, active and non-sinkholed C&Cs domain names. Requires access credentials. +* **Additional Information:** License: https://osint.bambenekconsulting.com/license.txt ### Collector * **Module:** intelmq.bots.collectors.http.collector_http * **Configuration Parameters:** -* * `http_url`: `https://osint.bambenekconsulting.com/feeds/c2-dommasterlist.txt` +* * `http_password`: `__PASSWORD__` +* * `http_url`: `https://faf.bambenekconsulting.com/feeds/c2-dommasterlist.txt` +* * `http_username`: `__USERNAME__` * * `name`: `C2 Domains` * * `provider`: `Bambenek` * * `rate_limit`: `3600` @@ -310,15 +255,19 @@ To add feeds to this file add them to `intelmq/etc/feeds.yaml` and then run ## C2 IPs -* **Status:** on -* **Revision:** 20-01-2018 -* **Description:** Master Feed of known, active and non-sinkholed C&Cs IP addresses License: https://osint.bambenekconsulting.com/license.txt +* **Public:** unknown +* **Revision:** 2018-01-20 +* **Documentation:** https://osint.bambenekconsulting.com/feeds/ +* **Description:** Master Feed of known, active and non-sinkholed C&Cs IP addresses. Requires access credentials. +* **Additional Information:** License: https://osint.bambenekconsulting.com/license.txt ### Collector * **Module:** intelmq.bots.collectors.http.collector_http * **Configuration Parameters:** -* * `http_url`: `https://osint.bambenekconsulting.com/feeds/c2-ipmasterlist.txt` +* * `http_password`: `__PASSWORD__` +* * `http_url`: `https://faf.bambenekconsulting.com/feeds/c2-ipmasterlist.txt` +* * `http_username`: `__USERNAME__` * * `name`: `C2 IPs` * * `provider`: `Bambenek` * * `rate_limit`: `3600` @@ -331,15 +280,17 @@ To add feeds to this file add them to `intelmq/etc/feeds.yaml` and then run ## DGA Domains -* **Status:** on -* **Revision:** 20-01-2018 -* **Description:** Domain feed of known DGA domains from -2 to +3 days License: https://osint.bambenekconsulting.com/license.txt +* **Public:** yes +* **Revision:** 2018-01-20 +* **Documentation:** https://osint.bambenekconsulting.com/feeds/ +* **Description:** Domain feed of known DGA domains from -2 to +3 days +* **Additional Information:** License: https://osint.bambenekconsulting.com/license.txt ### Collector * **Module:** intelmq.bots.collectors.http.collector_http * **Configuration Parameters:** -* * `http_url`: `https://osint.bambenekconsulting.com/feeds/dga-feed.txt` +* * `http_url`: `https://faf.bambenekconsulting.com/feeds/dga-feed.txt` * * `name`: `DGA Domains` * * `provider`: `Bambenek` * * `rate_limit`: `3600` @@ -350,35 +301,13 @@ To add feeds to this file add them to `intelmq/etc/feeds.yaml` and then run * **Configuration Parameters:** -# Bitcash - -## Banned IPs - -* **Status:** on -* **Revision:** 20-01-2018 -* **Description:** IPs banned for serious abusing of our services (scanning, sniffing, harvesting, dos attacks). - -### Collector - -* **Module:** intelmq.bots.collectors.http.collector_http -* **Configuration Parameters:** -* * `http_url`: `https://bitcash.cz/misc/log/blacklist` -* * `name`: `Banned IPs` -* * `provider`: `Bitcash` -* * `rate_limit`: `3600` - -### Parser - -* **Module:** intelmq.bots.parsers.bitcash.parser -* **Configuration Parameters:** - - # Blocklist.de ## Apache -* **Status:** on -* **Revision:** 20-01-2018 +* **Public:** yes +* **Revision:** 2018-01-20 +* **Documentation:** http://www.blocklist.de/en/export.html * **Description:** Blocklist.DE Apache Collector is the bot responsible to get the report from source of information. All IP addresses which have been reported within the last 48 hours as having run attacks on the service Apache, Apache-DDOS, RFI-Attacks. ### Collector @@ -398,8 +327,9 @@ To add feeds to this file add them to `intelmq/etc/feeds.yaml` and then run ## Bots -* **Status:** on -* **Revision:** 20-01-2018 +* **Public:** yes +* **Revision:** 2018-01-20 +* **Documentation:** http://www.blocklist.de/en/export.html * **Description:** Blocklist.DE Bots Collector is the bot responsible to get the report from source of information. All IP addresses which have been reported within the last 48 hours as having run attacks attacks on the RFI-Attacks, REG-Bots, IRC-Bots or BadBots (BadBots = he has posted a Spam-Comment on a open Forum or Wiki). ### Collector @@ -419,8 +349,9 @@ To add feeds to this file add them to `intelmq/etc/feeds.yaml` and then run ## Brute-force Logins -* **Status:** on -* **Revision:** 20-01-2018 +* **Public:** yes +* **Revision:** 2018-01-20 +* **Documentation:** http://www.blocklist.de/en/export.html * **Description:** Blocklist.DE Brute-force Login Collector is the bot responsible to get the report from source of information. All IPs which attacks Joomlas, Wordpress and other Web-Logins with Brute-Force Logins. ### Collector @@ -440,8 +371,9 @@ To add feeds to this file add them to `intelmq/etc/feeds.yaml` and then run ## FTP -* **Status:** on -* **Revision:** 20-01-2018 +* **Public:** yes +* **Revision:** 2018-01-20 +* **Documentation:** http://www.blocklist.de/en/export.html * **Description:** Blocklist.DE FTP Collector is the bot responsible to get the report from source of information. All IP addresses which have been reported within the last 48 hours for attacks on the Service FTP. ### Collector @@ -461,8 +393,9 @@ To add feeds to this file add them to `intelmq/etc/feeds.yaml` and then run ## IMAP -* **Status:** on -* **Revision:** 20-01-2018 +* **Public:** yes +* **Revision:** 2018-01-20 +* **Documentation:** http://www.blocklist.de/en/export.html * **Description:** Blocklist.DE IMAP Collector is the bot responsible to get the report from source of information. All IP addresses which have been reported within the last 48 hours for attacks on the service like IMAP, SASL, POP3, etc. ### Collector @@ -482,8 +415,9 @@ To add feeds to this file add them to `intelmq/etc/feeds.yaml` and then run ## IRC Bots -* **Status:** on -* **Revision:** 20-01-2018 +* **Public:** yes +* **Revision:** 2018-01-20 +* **Documentation:** http://www.blocklist.de/en/export.html * **Description:** No description provided by feed provider. ### Collector @@ -503,8 +437,9 @@ To add feeds to this file add them to `intelmq/etc/feeds.yaml` and then run ## Mail -* **Status:** on -* **Revision:** 20-01-2018 +* **Public:** yes +* **Revision:** 2018-01-20 +* **Documentation:** http://www.blocklist.de/en/export.html * **Description:** Blocklist.DE Mail Collector is the bot responsible to get the report from source of information. All IP addresses which have been reported within the last 48 hours as having run attacks on the service Mail, Postfix. ### Collector @@ -524,8 +459,9 @@ To add feeds to this file add them to `intelmq/etc/feeds.yaml` and then run ## SIP -* **Status:** on -* **Revision:** 20-01-2018 +* **Public:** yes +* **Revision:** 2018-01-20 +* **Documentation:** http://www.blocklist.de/en/export.html * **Description:** Blocklist.DE SIP Collector is the bot responsible to get the report from source of information. All IP addresses that tried to login in a SIP-, VOIP- or Asterisk-Server and are included in the IPs-List from http://www.infiltrated.net/ (Twitter). ### Collector @@ -545,8 +481,9 @@ To add feeds to this file add them to `intelmq/etc/feeds.yaml` and then run ## SSH -* **Status:** on -* **Revision:** 20-01-2018 +* **Public:** yes +* **Revision:** 2018-01-20 +* **Documentation:** http://www.blocklist.de/en/export.html * **Description:** Blocklist.DE SSH Collector is the bot responsible to get the report from source of information. All IP addresses which have been reported within the last 48 hours as having run attacks on the service SSH. ### Collector @@ -566,8 +503,9 @@ To add feeds to this file add them to `intelmq/etc/feeds.yaml` and then run ## Strong IPs -* **Status:** on -* **Revision:** 20-01-2018 +* **Public:** yes +* **Revision:** 2018-01-20 +* **Documentation:** http://www.blocklist.de/en/export.html * **Description:** Blocklist.DE Strong IPs Collector is the bot responsible to get the report from source of information. All IPs which are older then 2 month and have more then 5.000 attacks. ### Collector @@ -589,8 +527,9 @@ To add feeds to this file add them to `intelmq/etc/feeds.yaml` and then run ## CrimeServer -* **Status:** on -* **Revision:** 20-01-2018 +* **Public:** unknown +* **Revision:** 2018-01-20 +* **Documentation:** https://www.blueliv.com/ * **Description:** Blueliv Crimeserver Collector is the bot responsible to get the report through the API. * **Additional Information:** The service uses a different API for free users and paying subscribers. In 'CrimeServer' feed the difference lies in the data points present in the feed. The non-free API available from Blueliv contains, for this specific feed, following extra fields not present in the free API; "_id" - Internal unique ID "subType" - Subtype of the Crime Server "countryName" - Country name where the Crime Server is located, in English "city" - City where the Crime Server is located "domain" - Domain of the Crime Server "host" - Host of the Crime Server "createdAt" - Date when the Crime Server was added to Blueliv CrimeServer database "asnCidr" - Range of IPs that belong to an ISP (registered via Autonomous System Number (ASN)) "asnId" - Identifier of an ISP registered via ASN "asnDesc" Description of the ISP registered via ASN @@ -613,9 +552,11 @@ To add feeds to this file add them to `intelmq/etc/feeds.yaml` and then run ## N6 Stomp Stream -* **Status:** on -* **Revision:** 20-01-2018 +* **Public:** unknown +* **Revision:** 2018-01-20 +* **Documentation:** https://n6.cert.pl/en/ * **Description:** N6 Collector - CERT.pl's N6 Collector - N6 feed via STOMP interface. Note that rate_limit does not apply for this bot as it is waiting for messages on a stream. +* **Additional Information:** Contact cert.pl to get access to the feed. ### Collector @@ -640,8 +581,9 @@ To add feeds to this file add them to `intelmq/etc/feeds.yaml` and then run ## Army List -* **Status:** on -* **Revision:** 20-01-2018 +* **Public:** yes +* **Revision:** 2018-01-20 +* **Documentation:** https://cinsscore.com/#list * **Description:** The CINS Army list is a subset of the CINS Active Threat Intelligence ruleset, and consists of IP addresses that meet one of two basic criteria: 1) The IP's recent Rogue Packet score factor is very poor, or 2) The IP has tripped a designated number of 'trusted' alerts across a given number of our Sentinels deployed around the world. ### Collector @@ -663,9 +605,11 @@ To add feeds to this file add them to `intelmq/etc/feeds.yaml` and then run ## CertStream -* **Status:** on -* **Revision:** 15-06-2018 +* **Public:** yes +* **Revision:** 2018-06-15 +* **Documentation:** https://medium.com/cali-dog-security/introducing-certstream-3fc13bb98067 * **Description:** HTTP Websocket Stream from certstream.calidog.io providing data from Certificate Transparency Logs. +* **Additional Information:** Be aware that this feed provides a lot of data and may overload your system quickly. ### Collector @@ -684,8 +628,9 @@ To add feeds to this file add them to `intelmq/etc/feeds.yaml` and then run ## Phishing -* **Status:** on -* **Revision:** 20-01-2018 +* **Public:** unknown +* **Revision:** 2018-01-20 +* **Documentation:** http://clean-mx.de/ * **Description:** In order to download the CleanMX feed you need to use a custom user agent and register that user agent. ### Collector @@ -707,8 +652,9 @@ To add feeds to this file add them to `intelmq/etc/feeds.yaml` and then run ## Virus -* **Status:** on -* **Revision:** 20-01-2018 +* **Public:** unknown +* **Revision:** 2018-01-20 +* **Documentation:** http://clean-mx.de/ * **Description:** In order to download the CleanMX feed you need to use a custom user agent and register that user agent. ### Collector @@ -732,8 +678,9 @@ To add feeds to this file add them to `intelmq/etc/feeds.yaml` and then run ## Latest -* **Status:** on -* **Revision:** 19-03-2019 +* **Public:** yes +* **Revision:** 2019-03-19 +* **Documentation:** https://cybercrime-tracker.net/index.php * **Description:** C2 servers ### Collector @@ -759,8 +706,9 @@ To add feeds to this file add them to `intelmq/etc/feeds.yaml` and then run ## AS Details -* **Status:** on -* **Revision:** 20-01-2018 +* **Public:** yes +* **Revision:** 2018-01-20 +* **Documentation:** https://www.dshield.org/reports.html * **Description:** No description provided by feed provider. ### Collector @@ -780,8 +728,9 @@ To add feeds to this file add them to `intelmq/etc/feeds.yaml` and then run ## Block -* **Status:** on -* **Revision:** 20-01-2018 +* **Public:** yes +* **Revision:** 2018-01-20 +* **Documentation:** https://www.dshield.org/reports.html * **Description:** This list summarizes the top 20 attacking class C (/24) subnets over the last three days. The number of 'attacks' indicates the number of targets reporting scans from this subnet. ### Collector @@ -801,8 +750,9 @@ To add feeds to this file add them to `intelmq/etc/feeds.yaml` and then run ## Suspicious Domains -* **Status:** on -* **Revision:** 20-01-2018 +* **Public:** yes +* **Revision:** 2018-01-20 +* **Documentation:** https://www.dshield.org/reports.html * **Description:** There are many suspicious domains on the internet. In an effort to identify them, as well as false positives, we have assembled weighted lists based on tracking and malware lists from different sources. ISC is collecting and categorizing various lists associated with a certain level of sensitivity. ### Collector @@ -824,8 +774,9 @@ To add feeds to this file add them to `intelmq/etc/feeds.yaml` and then run ## Bruteforce Blocker -* **Status:** on -* **Revision:** 20-01-2018 +* **Public:** yes +* **Revision:** 2018-01-20 +* **Documentation:** http://danger.rulez.sk/index.php/bruteforceblocker/ * **Description:** Its main purpose is to block SSH bruteforce attacks via firewall. ### Collector @@ -843,33 +794,13 @@ To add feeds to this file add them to `intelmq/etc/feeds.yaml` and then run * **Configuration Parameters:** -## SIP Invitation - -* **Status:** on -* **Revision:** 20-01-2018 -* **Description:** Entries consist of fields with identifying characteristics of a source IP address that has been seen initiating a SIP INVITE operation to a remote host. The report lists hosts that are suspicious of more than just port scanning. These hosts may be SIP client cataloging or conducting various forms of telephony abuse. Report is updated hourly. - -### Collector - -* **Module:** intelmq.bots.collectors.http.collector_http -* **Configuration Parameters:** -* * `http_url`: `http://dataplane.org/sipinvitation.txt` -* * `name`: `SIP Invitation` -* * `provider`: `Danger Rulez` -* * `rate_limit`: `3600` - -### Parser - -* **Module:** intelmq.bots.parsers.dataplane.parser -* **Configuration Parameters:** - - # Dataplane ## SIP Query -* **Status:** on -* **Revision:** 20-01-2018 +* **Public:** yes +* **Revision:** 2018-01-20 +* **Documentation:** http://dataplane.org/ * **Description:** Entries consist of fields with identifying characteristics of a source IP address that has been seen initiating a SIP OPTIONS query to a remote host. This report lists hosts that are suspicious of more than just port scanning. The hosts may be SIP server cataloging or conducting various forms of telephony abuse. Report is updated hourly. ### Collector @@ -889,8 +820,9 @@ To add feeds to this file add them to `intelmq/etc/feeds.yaml` and then run ## SIP Registration -* **Status:** on -* **Revision:** 20-01-2018 +* **Public:** yes +* **Revision:** 2018-01-20 +* **Documentation:** http://dataplane.org/ * **Description:** Entries consist of fields with identifying characteristics of a source IP address that has been seen initiating a SIP REGISTER operation to a remote host. This report lists hosts that are suspicious of more than just port scanning. The hosts may be SIP client cataloging or conducting various forms of telephony abuse. Report is updated hourly. ### Collector @@ -910,8 +842,9 @@ To add feeds to this file add them to `intelmq/etc/feeds.yaml` and then run ## SSH Client Connection -* **Status:** on -* **Revision:** 20-01-2018 +* **Public:** yes +* **Revision:** 2018-01-20 +* **Documentation:** http://dataplane.org/ * **Description:** Entries below consist of fields with identifying characteristics of a source IP address that has been seen initiating an SSH connection to a remote host. This report lists hosts that are suspicious of more than just port scanning. The hosts may be SSH server cataloging or conducting authentication attack attempts. Report is updated hourly. ### Collector @@ -931,8 +864,9 @@ To add feeds to this file add them to `intelmq/etc/feeds.yaml` and then run ## SSH Password Authentication -* **Status:** on -* **Revision:** 20-01-2018 +* **Public:** yes +* **Revision:** 2018-01-20 +* **Documentation:** http://dataplane.org/ * **Description:** Entries below consist of fields with identifying characteristics of a source IP address that has been seen attempting to remotely login to a host using SSH password authentication. The report lists hosts that are highly suspicious and are likely conducting malicious SSH password authentication attacks. Report is updated hourly. ### Collector @@ -954,8 +888,9 @@ To add feeds to this file add them to `intelmq/etc/feeds.yaml` and then run ## Infected Domains -* **Status:** on -* **Revision:** 20-01-2018 +* **Public:** yes +* **Revision:** 2018-01-20 +* **Documentation:** http://security-research.dyndns.org/pub/malware-feeds/ * **Description:** DynDNS ponmocup. List of ponmocup malware redirection domains and infected web-servers. See also http://security-research.dyndns.org/pub/botnet-links.html ### Collector @@ -975,61 +910,11 @@ To add feeds to this file add them to `intelmq/etc/feeds.yaml` and then run # Fraunhofer -## DDoS Attack Feed (C&C) - -* **Status:** on -* **Revision:** 01-07-2018 -* **Description:** The Fraunhofer DDoS attack feed provides information about tracked C&C servers and detected attack commands from these C&Cs. You may request access to the feed via email to infection-reporter@fkie.fraunhofer.de -* **Additional Information:** The source feed provides a stream of newline separated JSON objects. Each line represents a single event observed by DDoS C&C trackers, e.g. attack commands. The feed can be retrieved with either the generic HTTP Stream Collector Bot for a streaming live feed or with the generic HTTP Collector Bot for a polled feed. - -### Collector - -* **Module:** intelmq.bots.collectors.http.collector_http_stream -* **Configuration Parameters:** -* * `http_password`: `{{ your password }}` -* * `http_url`: `https://feed.caad.fkie.fraunhofer.de/ddosattackfeed` -* * `http_username`: `{{ your username }}` -* * `name`: `DDoS Attack Feed (C&C)` -* * `provider`: `Fraunhofer` -* * `rate_limit`: `10` -* * `strip_lines`: `True` - -### Parser - -* **Module:** intelmq.bots.parsers.fraunhofer.parser_ddosattack_cnc -* **Configuration Parameters:** -* * `unknown_messagetype_accuracy`: `80` - - -## DDoS Attack Feed (Targets) - -* **Status:** on -* **Revision:** 01-07-2018 -* **Description:** The Fraunhofer DDoS attack feed provides information about tracked C&C servers and detected attack commands from these C&Cs. You may request access to the feed via email to infection-reporter@fkie.fraunhofer.de -* **Additional Information:** The source feed provides a stream of newline separated JSON objects. Each line represents a single event observed by DDoS C&C trackers, e.g. attack commands. The feed can be retrieved with either the generic HTTP Stream Collector Bot for a streaming live feed or with the generic HTTP Collector Bot for a polled feed. - -### Collector - -* **Module:** intelmq.bots.collectors.http.collector_http_stream -* **Configuration Parameters:** -* * `http_password`: `{{ your password }}` -* * `http_url`: `https://feed.caad.fkie.fraunhofer.de/ddosattackfeed` -* * `http_username`: `{{ your username }}` -* * `name`: `DDoS Attack Feed (Targets)` -* * `provider`: `Fraunhofer` -* * `rate_limit`: `10` -* * `strip_lines`: `True` - -### Parser - -* **Module:** intelmq.bots.parsers.fraunhofer.parser_ddosattack_target -* **Configuration Parameters:** - - ## DGA Archive -* **Status:** on -* **Revision:** 20-01-2018 +* **Public:** unknown +* **Revision:** 2018-01-20 +* **Documentation:** https://dgarchive.caad.fkie.fraunhofer.de/welcome/ * **Description:** Fraunhofer DGA collector fetches data from Fraunhofer's domain generation archive. ### Collector @@ -1053,8 +938,9 @@ To add feeds to this file add them to `intelmq/etc/feeds.yaml` and then run ## Hosts -* **Status:** off -* **Revision:** 20-01-2018 +* **Public:** yes +* **Revision:** 2018-01-20 +* **Documentation:** http://hosts-file.net/ * **Description:** hpHosts is a community managed and maintained hosts file that allows an additional layer of protection against access to ad, tracking and malicious websites. ### Collector @@ -1077,8 +963,8 @@ To add feeds to this file add them to `intelmq/etc/feeds.yaml` and then run ## Enterprise Callback -* **Status:** on -* **Revision:** 11-09-2019 +* **Public:** unknown +* **Revision:** 2019-09-11 * **Documentation:** https://haveibeenpwned.com/EnterpriseSubscriber/ * **Description:** With the Enterprise Subscription of 'Have I Been Pwned' you are able to provide a callback URL and any new leak data is submitted to it. It is recommended to put a webserver with Authorization check, TLS etc. in front of the API collector. * **Additional Information:** "A minimal nginx configuration could look like: @@ -1121,8 +1007,9 @@ server { ## Bind Format -* **Status:** on -* **Revision:** 20-01-2018 +* **Public:** yes +* **Revision:** 2018-01-20 +* **Documentation:** http://malc0de.com/dashboard/ * **Description:** This feed includes FQDN's of malicious hosts, the file format is in Bind file format. ### Collector @@ -1142,8 +1029,9 @@ server { ## IP Blacklist -* **Status:** on -* **Revision:** 20-01-2018 +* **Public:** yes +* **Revision:** 2018-01-20 +* **Documentation:** http://malc0de.com/dashboard/ * **Description:** This feed includes IP Addresses of malicious hosts. ### Collector @@ -1163,8 +1051,9 @@ server { ## Windows Format -* **Status:** on -* **Revision:** 20-01-2018 +* **Public:** yes +* **Revision:** 2018-01-20 +* **Documentation:** http://malc0de.com/dashboard/ * **Description:** This feed includes FQDN's of malicious hosts, the file format is in Windows Hosts file format. ### Collector @@ -1186,8 +1075,9 @@ server { ## Blacklist -* **Status:** on -* **Revision:** 20-01-2018 +* **Public:** yes +* **Revision:** 2018-01-20 +* **Documentation:** http://www.malwaredomainlist.com/ * **Description:** No description provided by feed provider. ### Collector @@ -1209,8 +1099,9 @@ server { ## Malicious -* **Status:** on -* **Revision:** 20-01-2018 +* **Public:** yes +* **Revision:** 2018-01-20 +* **Documentation:** http://www.malwaredomains.com/ * **Description:** Malware Prevention through Domain Blocking (Black Hole DNS Sinkhole) ### Collector @@ -1232,8 +1123,9 @@ server { ## DansGuardian -* **Status:** on -* **Revision:** 20-01-2018 +* **Public:** unknown +* **Revision:** 2018-01-20 +* **Documentation:** https://www.malwarepatrol.net/ * **Description:** Malware block list with URLs ### Collector @@ -1255,8 +1147,9 @@ server { ## Latest malicious activity -* **Status:** on -* **Revision:** 05-02-2018 +* **Public:** yes +* **Revision:** 2018-02-05 +* **Documentation:** https://www.malwareurl.com/ * **Description:** Latest malicious domains/IPs. ### Collector @@ -1278,8 +1171,9 @@ server { ## Sandbox Reports -* **Status:** on -* **Revision:** 05-07-2018 +* **Public:** unknown +* **Revision:** 2018-07-05 +* **Documentation:** https://www.mcafee.com/enterprise/en-us/products/advanced-threat-defense.html * **Description:** Processes reports from McAfee's sandboxing solution via the openDXL API. ### Collector @@ -1298,11 +1192,12 @@ server { # Microsoft -## BingMURLs +## BingMURLs via Interflow -* **Status:** on -* **Revision:** 29-05-2018 -* **Description:** Collects Malicious URLs detected by Bing from the Interflow API. +* **Public:** unknown +* **Revision:** 2018-05-29 +* **Documentation:** https://docs.microsoft.com/en-us/security/gsp/informationsharingandexchange +* **Description:** Collects Malicious URLs detected by Bing from the Interflow API. The feed is available via Microsoft’s Government Security Program (GSP). * **Additional Information:** Depending on the file sizes you may need to increase the parameter 'http_timeout_sec' of the collector. ### Collector @@ -1312,7 +1207,7 @@ server { * * `api_key`: `{{your API key}}` * * `file_match`: `^bingmurls_` * * `http_timeout_sec`: `300` -* * `name`: `BingMURLs` +* * `name`: `BingMURLs via Interflow` * * `not_older_than`: `2 days` * * `provider`: `Microsoft` * * `rate_limit`: `3600` @@ -1323,11 +1218,40 @@ server { * **Configuration Parameters:** -## CTIP +## CTIP via Azure + +* **Public:** unknown +* **Revision:** 2020-05-29 +* **Documentation:** https://docs.microsoft.com/en-us/security/gsp/informationsharingandexchange +* **Description:** Collects CTIP (Sinkhole data) files from a shared Azure Storage. The feed is available via Microsoft’s Government Security Program (GSP). +* **Additional Information:** The cache is needed for memorizing which files have already been processed, the TTL should be higher than the oldest file available in the storage (currently the last three days are available). The connection string contains endpoint as well as authentication information. + +### Collector + +* **Module:** intelmq.bots.collectors.microsoft.collector_azure +* **Configuration Parameters:** +* * `connection_string`: `{{your connection string}}` +* * `container_name`: `ctip-infected-summary` +* * `name`: `CTIP via Azure` +* * `provider`: `Microsoft` +* * `rate_limit`: `3600` +* * `redis_cache_db`: `5` +* * `redis_cache_host`: `127.0.0.1` +* * `redis_cache_port`: `6379` +* * `redis_cache_ttl`: `864000` + +### Parser + +* **Module:** intelmq.bots.parsers.microsoft.parser_ctip +* **Configuration Parameters:** + + +## CTIP via Interflow -* **Status:** on -* **Revision:** 06-03-2018 -* **Description:** Collects CTIP files from the Interflow API. +* **Public:** unknown +* **Revision:** 2018-03-06 +* **Documentation:** https://docs.microsoft.com/en-us/security/gsp/informationsharingandexchange +* **Description:** Collects CTIP (Sinkhole data) files from the Interflow API.The feed is available via Microsoft’s Government Security Program (GSP). * **Additional Information:** Depending on the file sizes you may need to increase the parameter 'http_timeout_sec' of the collector. As many IPs occur very often in the data, you may want to use a deduplicator specifically for the feed. ### Collector @@ -1337,7 +1261,7 @@ server { * * `api_key`: `{{your API key}}` * * `file_match`: `^ctip_` * * `http_timeout_sec`: `300` -* * `name`: `CTIP` +* * `name`: `CTIP via Interflow` * * `not_older_than`: `2 days` * * `provider`: `Microsoft` * * `rate_limit`: `3600` @@ -1352,9 +1276,10 @@ server { ## DGA -* **Status:** on -* **Revision:** 20-01-2018 -* **Description:** This feed lists DGA family, Domain, Start and end of valid time(UTC) of a number of DGA families. reference: http://data.netlab.360.com/dga +* **Public:** yes +* **Revision:** 2018-01-20 +* **Documentation:** http://data.netlab.360.com/dga +* **Description:** This feed lists DGA family, Domain, Start and end of valid time(UTC) of a number of DGA families. ### Collector @@ -1373,8 +1298,9 @@ server { ## Hajime Scanner -* **Status:** on -* **Revision:** 01-08-2019 +* **Public:** yes +* **Revision:** 2019-08-01 +* **Documentation:** https://data.netlab.360.com/hajime/ * **Description:** This feed lists IP address for know Hajime bots network. These IPs data are obtained by joining the DHT network and interacting with the Hajime node ### Collector @@ -1394,9 +1320,10 @@ server { ## Magnitude EK -* **Status:** on -* **Revision:** 20-01-2018 -* **Description:** This feed lists FQDN and possibly the URL used by Magnitude Exploit Kit. Information also includes the IP address used for the domain and last time seen. reference: http://data.netlab.360.com/ek +* **Public:** yes +* **Revision:** 2018-01-20 +* **Documentation:** http://data.netlab.360.com/ek +* **Description:** This feed lists FQDN and possibly the URL used by Magnitude Exploit Kit. Information also includes the IP address used for the domain and last time seen. ### Collector @@ -1415,9 +1342,10 @@ server { ## Mirai Scanner -* **Status:** on -* **Revision:** 20-01-2018 -* **Description:** This feed provides IP addresses which actively scan for vulnerable IoT devices and install Mirai Botnet. reference: http://data.netlab.360.com/mirai-scanner/ +* **Public:** yes +* **Revision:** 2018-01-20 +* **Documentation:** http://data.netlab.360.com/mirai-scanner/ +* **Description:** This feed provides IP addresses which actively scan for vulnerable IoT devices and install Mirai Botnet. ### Collector @@ -1434,137 +1362,52 @@ server { * **Configuration Parameters:** -# Nothink - -## DNS Attack - -* **Status:** on -* **Revision:** 20-01-2018 -* **Description:** This feed provides attack information for attack information against DNS honeypots. reference: http://www.nothink.org/honeypot_dns.php . - -### Collector - -* **Module:** intelmq.bots.collectors.http.collector_http -* **Configuration Parameters:** -* * `http_url`: `http://www.nothink.org/honeypot_dns_attacks.txt` -* * `name`: `DNS Attack` -* * `provider`: `Nothink` -* * `rate_limit`: `3600` - -### Parser - -* **Module:** intelmq.bots.parsers.nothink.parser -* **Configuration Parameters:** - - -## SNMP - -* **Status:** on -* **Revision:** 20-01-2018 -* **Description:** There are a number of feeds you can use to depend on how far back you would like to go. The time.source will still be the date and time the feed was generated at nothink. This feed provides IP addresses of systems that have connected to a honeypot via SNMP in the last 24 hours. reference: http://www.nothink.org/honeypot_snmp.php - -### Collector - -* **Module:** intelmq.bots.collectors.http.collector_http -* **Configuration Parameters:** -* * `http_url`: `http://www.nothink.org/blacklist/blacklist_snmp_day.txt` -* * `name`: `SNMP` -* * `provider`: `Nothink` -* * `rate_limit`: `86400` - -### Parser - -* **Module:** intelmq.bots.parsers.nothink.parser -* **Configuration Parameters:** - - -## SSH - -* **Status:** on -* **Revision:** 20-01-2018 -* **Description:** There are a number of feeds you can use to depend on how far back you would like to go. The time.source will still be the date and time the feed was generated at nothink. This feed provides IP addresses of systems that have connected to a honeypot via SSH in the last 24 hours. Reference: http://www.nothink.org/honeypots.php - -### Collector - -* **Module:** intelmq.bots.collectors.http.collector_http -* **Configuration Parameters:** -* * `http_url`: `http://www.nothink.org/blacklist/blacklist_ssh_day.txt` -* * `name`: `SSH` -* * `provider`: `Nothink` -* * `rate_limit`: `86400` - -### Parser - -* **Module:** intelmq.bots.parsers.nothink.parser -* **Configuration Parameters:** - - -## Telnet - -* **Status:** on -* **Revision:** 20-01-2018 -* **Description:** There are a number of feeds you can use to depend on how far back you would like to go. The time.source will still be the date and time the feed was generated at nothink. This feed provides IP addresses of systems that have connected to a honeypot via Telnet in the last 24 hours. reference: http://www.nothink.org/honeypots.php - -### Collector - -* **Module:** intelmq.bots.collectors.http.collector_http -* **Configuration Parameters:** -* * `http_url`: `http://www.nothink.org/blacklist/blacklist_telnet_day.txt` -* * `name`: `Telnet` -* * `provider`: `Nothink` -* * `rate_limit`: `86400` - -### Parser - -* **Module:** intelmq.bots.parsers.nothink.parser -* **Configuration Parameters:** - - # OpenPhish -## Phishing +## Premium Feed -* **Status:** on -* **Revision:** 20-01-2018 +* **Public:** unknown +* **Revision:** 2018-02-06 +* **Documentation:** https://www.openphish.com/phishing_feeds.html * **Description:** OpenPhish is a fully automated self-contained platform for phishing intelligence. It identifies phishing sites and performs intelligence analysis in real time without human intervention and without using any external resources, such as blacklists. +* **Additional Information:** Discounts available for Government and National CERTs a well as for Nonprofit and Not-for-Profit organizations. ### Collector * **Module:** intelmq.bots.collectors.http.collector_http * **Configuration Parameters:** -* * `http_url`: `https://www.openphish.com/feed.txt` -* * `name`: `Phishing` +* * `http_password`: `{{ your password}}` +* * `http_url`: `https://openphish.com/prvt-intell/` +* * `http_username`: `{{ your username}}` +* * `name`: `Premium Feed` * * `provider`: `OpenPhish` * * `rate_limit`: `86400` ### Parser -* **Module:** intelmq.bots.parsers.openphish.parser +* **Module:** intelmq.bots.parsers.openphish.parser_commercial * **Configuration Parameters:** -# OpenPhish Commercial +## Public feed -## Phishing - -* **Status:** on -* **Revision:** 06-02-2018 +* **Public:** yes +* **Revision:** 2018-01-20 +* **Documentation:** https://www.openphish.com/ * **Description:** OpenPhish is a fully automated self-contained platform for phishing intelligence. It identifies phishing sites and performs intelligence analysis in real time without human intervention and without using any external resources, such as blacklists. ### Collector * **Module:** intelmq.bots.collectors.http.collector_http * **Configuration Parameters:** -* * `http_password`: `{{ your password}}` -* * `http_url`: `https://openphish.com/prvt-intell/` -* * `http_username`: `{{ your username}}` -* * `name`: `Phishing` -* * `provider`: `OpenPhish Commercial` +* * `http_url`: `https://www.openphish.com/feed.txt` +* * `name`: `Public feed` +* * `provider`: `OpenPhish` * * `rate_limit`: `86400` ### Parser -* **Module:** intelmq.bots.parsers.openphish.parser_commercial +* **Module:** intelmq.bots.parsers.openphish.parser * **Configuration Parameters:** @@ -1572,8 +1415,9 @@ server { ## Online -* **Status:** on -* **Revision:** 20-01-2018 +* **Public:** unknown +* **Revision:** 2018-01-20 +* **Documentation:** https://data.phishtank.com/ * **Description:** PhishTank is a collaborative clearing house for data and information about phishing on the Internet. ### Collector @@ -1595,8 +1439,8 @@ server { ## Agent Tesla -* **Status:** on -* **Revision:** 02-04-2019 +* **Public:** yes +* **Revision:** 2019-04-02 * **Documentation:** https://precisionsec.com/threat-intelligence-feeds/agent-tesla/ * **Description:** Agent Tesla IoCs, URLs where the malware is hosted. @@ -1623,8 +1467,9 @@ server { ## Via IMAP -* **Status:** on -* **Revision:** 20-01-2018 +* **Public:** unknown +* **Revision:** 2018-01-20 +* **Documentation:** https://www.shadowserver.org/what-we-do/network-reporting/ * **Description:** Shadowserver sends out a variety of reports (see https://www.shadowserver.org/wiki/pmwiki.php/Services/Reports). * **Additional Information:** The configuration retrieves the data from a e-mails via IMAP from the attachments. @@ -1633,7 +1478,7 @@ server { * **Module:** intelmq.bots.collectors.mail.collector_mail_attach * **Configuration Parameters:** * * `attach_regex`: `csv.zip` -* * `attach_unzip`: `True` +* * `extract_files`: `True` * * `folder`: `INBOX` * * `mail_host`: `__HOST__` * * `mail_password`: `__PASSWORD__` @@ -1652,8 +1497,9 @@ server { ## Via Request Tracker -* **Status:** on -* **Revision:** 20-01-2018 +* **Public:** unknown +* **Revision:** 2018-01-20 +* **Documentation:** https://www.shadowserver.org/what-we-do/network-reporting/ * **Description:** Shadowserver sends out a variety of reports (see https://www.shadowserver.org/wiki/pmwiki.php/Services/Reports). * **Additional Information:** The configuration retrieves the data from a RT/RTIR ticketing instance via the attachment or an download. @@ -1691,8 +1537,9 @@ server { ## ASN Drop -* **Status:** on -* **Revision:** 20-01-2018 +* **Public:** yes +* **Revision:** 2018-01-20 +* **Documentation:** https://www.spamhaus.org/drop/ * **Description:** ASN-DROP contains a list of Autonomous System Numbers controlled by spammers or cyber criminals, as well as "hijacked" ASNs. ASN-DROP can be used to filter BGP routes which are being used for malicious purposes. ### Collector @@ -1712,9 +1559,10 @@ server { ## CERT -* **Status:** on -* **Revision:** 20-01-2018 -* **Description:** Spamhaus CERT Insight Portal. Access limited to CERTs and CSIRTs with national or regional responsibility. https://www.spamhaus.org/news/article/705/spamhaus-launches-cert-insight-portal . +* **Public:** unknown +* **Revision:** 2018-01-20 +* **Documentation:** https://www.spamhaus.org/news/article/705/spamhaus-launches-cert-insight-portal +* **Description:** Spamhaus CERT Insight Portal. Access limited to CERTs and CSIRTs with national or regional responsibility. . ### Collector @@ -1733,8 +1581,9 @@ server { ## Drop -* **Status:** on -* **Revision:** 20-01-2018 +* **Public:** yes +* **Revision:** 2018-01-20 +* **Documentation:** https://www.spamhaus.org/drop/ * **Description:** The DROP list will not include any IP address space under the control of any legitimate network - even if being used by "the spammers from hell". DROP will only include netblocks allocated directly by an established Regional Internet Registry (RIR) or National Internet Registry (NIR) such as ARIN, RIPE, AFRINIC, APNIC, LACNIC or KRNIC or direct RIR allocations. ### Collector @@ -1754,8 +1603,9 @@ server { ## Dropv6 -* **Status:** on -* **Revision:** 20-01-2018 +* **Public:** yes +* **Revision:** 2018-01-20 +* **Documentation:** https://www.spamhaus.org/drop/ * **Description:** The DROPv6 list includes IPv6 ranges allocated to spammers or cyber criminals. DROPv6 will only include IPv6 netblocks allocated directly by an established Regional Internet Registry (RIR) or National Internet Registry (NIR) such as ARIN, RIPE, AFRINIC, APNIC, LACNIC or KRNIC or direct RIR allocations. ### Collector @@ -1775,8 +1625,9 @@ server { ## EDrop -* **Status:** on -* **Revision:** 20-01-2018 +* **Public:** yes +* **Revision:** 2018-01-20 +* **Documentation:** https://www.spamhaus.org/drop/ * **Description:** EDROP is an extension of the DROP list that includes sub-allocated netblocks controlled by spammers or cyber criminals. EDROP is meant to be used in addition to the direct allocations on the DROP list. ### Collector @@ -1794,12 +1645,38 @@ server { * **Configuration Parameters:** +# Strangereal Intel + +## DailyIOC + +* **Public:** yes +* **Revision:** 2019-12-05 +* **Documentation:** https://github.com/StrangerealIntel/DailyIOC +* **Description:** Daily IOC from tweets and articles +* **Additional Information:** collector's `extra_fields` parameter may be any of fields from the github [content API response](https://developer.github.com/v3/repos/contents/) + +### Collector + +* **Module:** intelmq.bots.collectors.github_api.collector_github_contents_api +* **Configuration Parameters:** +* * `basic_auth_password`: `PASSWORD` +* * `basic_auth_username`: `USERNAME` +* * `regex`: `.*.json` +* * `repository`: `StrangerealIntel/DailyIOC` + +### Parser + +* **Module:** intelmq.bots.parsers.github_feed +* **Configuration Parameters:** + + # Sucuri ## Hidden IFrames -* **Status:** on -* **Revision:** 28-01-2018 +* **Public:** yes +* **Revision:** 2018-01-28 +* **Documentation:** http://labs.sucuri.net/?malware * **Description:** Latest hidden iframes identified on compromised web sites. * **Additional Information:** Please note that the parser only extracts the hidden iframes and the conditional redirects, not the encoded javascript. @@ -1822,8 +1699,8 @@ server { ## Malicious Domains -* **Status:** on -* **Revision:** 04-09-2018 +* **Public:** unknown +* **Revision:** 2018-09-04 * **Description:** Detected malicious domains. Note that you have to opened up Sponsored Datafeed Service (SDS) access to the SURBL data via rsync for your IP address. ### Collector @@ -1841,18 +1718,19 @@ server { # Taichung -## Netflow +## Netflow Recent -* **Status:** on -* **Revision:** 20-01-2018 -* **Description:** Abnormal flows detected. +* **Public:** yes +* **Revision:** 2018-01-20 +* **Documentation:** https://www.tc.edu.tw/net/netflow/lkout/recent/ +* **Description:** Abnormal flows detected: Attacking (DoS, Brute-Force, Scanners) and malicious hosts (C&C servers, hosting malware) ### Collector * **Module:** intelmq.bots.collectors.http.collector_http * **Configuration Parameters:** -* * `http_url`: `https://www.tc.edu.tw/net/netflow/lkout/recent/30` -* * `name`: `Netflow` +* * `http_url`: `https://www.tc.edu.tw/net/netflow/lkout/recent/` +* * `name`: `Netflow Recent` * * `provider`: `Taichung` * * `rate_limit`: `3600` @@ -1860,15 +1738,15 @@ server { * **Module:** intelmq.bots.parsers.taichung.parser * **Configuration Parameters:** -* * `error_log_message`: `false` # Team Cymru ## CAP -* **Status:** on -* **Revision:** 20-01-2018 +* **Public:** unknown +* **Revision:** 2018-01-20 +* **Documentation:** https://www.team-cymru.com/CSIRT-AP.html https://www.cymru.com/$certname/report_info.txt * **Description:** Team Cymru provides daily lists of compromised or abused devices for the ASNs and/or netblocks with a CSIRT's jurisdiction. This includes such information as bot infected hosts, command and control systems, open resolvers, malware urls, phishing urls, and brute force attacks * **Additional Information:** "Two feeds types are offered: * The new https://www.cymru.com/$certname/$certname_{time[%Y%m%d]}.txt @@ -1894,10 +1772,11 @@ server { * **Configuration Parameters:** -## Full Bogons +## Full Bogons IPv4 -* **Status:** on -* **Revision:** 20-01-2018 +* **Public:** yes +* **Revision:** 2018-01-20 +* **Documentation:** https://www.team-cymru.com/bogon-reference-http.html * **Description:** Fullbogons are a larger set which also includes IP space that has been allocated to an RIR, but not assigned by that RIR to an actual ISP or other end-user. IANA maintains a convenient IPv4 summary page listing allocated and reserved netblocks, and each RIR maintains a list of all prefixes that they have assigned to end-users. Our bogon reference pages include additional links and resources to assist those who wish to properly filter bogon prefixes within their networks. ### Collector @@ -1905,7 +1784,29 @@ server { * **Module:** intelmq.bots.collectors.http.collector_http * **Configuration Parameters:** * * `http_url`: `https://www.team-cymru.org/Services/Bogons/fullbogons-ipv4.txt` -* * `name`: `Full Bogons` +* * `name`: `Full Bogons IPv4` +* * `provider`: `Team Cymru` +* * `rate_limit`: `129600` + +### Parser + +* **Module:** intelmq.bots.parsers.cymru.parser_full_bogons +* **Configuration Parameters:** + + +## Full Bogons IPv6 + +* **Public:** yes +* **Revision:** 2018-01-20 +* **Documentation:** https://www.team-cymru.com/bogon-reference-http.html +* **Description:** Fullbogons are a larger set which also includes IP space that has been allocated to an RIR, but not assigned by that RIR to an actual ISP or other end-user. IANA maintains a convenient IPv4 summary page listing allocated and reserved netblocks, and each RIR maintains a list of all prefixes that they have assigned to end-users. Our bogon reference pages include additional links and resources to assist those who wish to properly filter bogon prefixes within their networks. + +### Collector + +* **Module:** intelmq.bots.collectors.http.collector_http +* **Configuration Parameters:** +* * `http_url`: `https://www.team-cymru.org/Services/Bogons/fullbogons-ipv6.txt` +* * `name`: `Full Bogons IPv6` * * `provider`: `Team Cymru` * * `rate_limit`: `129600` @@ -1919,8 +1820,8 @@ server { ## Recent domains -* **Status:** on -* **Revision:** 06-02-2018 +* **Public:** yes +* **Revision:** 2018-02-06 * **Documentation:** https://www.threatminer.org/ * **Description:** Latest malicious domains. @@ -1943,8 +1844,9 @@ server { ## Greylist -* **Status:** on -* **Revision:** 20-01-2018 +* **Public:** yes +* **Revision:** 2018-01-20 +* **Documentation:** https://project.turris.cz/greylist-data/legend.txt * **Description:** The data are processed and clasified every week and behaviour of IP addresses that accessed a larger number of Turris routers is evaluated. The result is a list of addresses that have tried to obtain information about services on the router or tried to gain access to them. We publish this so called "greylist" that also contains a list of tags for each address which indicate what behaviour of the address was observed. ### Collector @@ -1962,57 +1864,14 @@ server { * **Configuration Parameters:** -# URLVir - -## Hosts - -* **Status:** on -* **Revision:** 20-01-2018 -* **Description:** This feed provides FQDN's or IP addresses for Active Malicious Hosts. - -### Collector - -* **Module:** intelmq.bots.collectors.http.collector_http -* **Configuration Parameters:** -* * `http_url`: `http://www.urlvir.com/export-hosts/` -* * `name`: `Hosts` -* * `provider`: `URLVir` -* * `rate_limit`: `129600` - -### Parser - -* **Module:** intelmq.bots.parsers.urlvir.parser -* **Configuration Parameters:** - - -## IPs - -* **Status:** on -* **Revision:** 20-01-2018 -* **Description:** This feed provides IP addresses hosting Malware. - -### Collector - -* **Module:** intelmq.bots.collectors.http.collector_http -* **Configuration Parameters:** -* * `http_url`: `http://www.urlvir.com/export-ip-addresses/` -* * `name`: `IPs` -* * `provider`: `URLVir` -* * `rate_limit`: `129600` - -### Parser - -* **Module:** intelmq.bots.parsers.urlvir.parser -* **Configuration Parameters:** - - # University of Toulouse ## Blacklist -* **Status:** on -* **Revision:** 20-01-2018 -* **Description:** The collections and feed description can be found on: https://dsi.ut-capitole.fr/blacklists/. +* **Public:** yes +* **Revision:** 2018-01-20 +* **Documentation:** https://dsi.ut-capitole.fr/blacklists/ +* **Description:** Various blacklist feeds ### Collector @@ -2037,8 +1896,9 @@ server { ## URLs -* **Status:** on -* **Revision:** 20-01-2018 +* **Public:** yes +* **Revision:** 2018-01-20 +* **Documentation:** http://vxvault.net/ViriList.php * **Description:** This feed provides IP addresses hosting Malware. ### Collector @@ -2060,8 +1920,9 @@ server { ## Unsafe sites -* **Status:** on -* **Revision:** 27-06-2018 +* **Public:** yes +* **Revision:** 2018-06-27 +* **Documentation:** https://viriback.com/ * **Description:** Latest detected unsafe sites. * **Additional Information:** You need to install the lxml library in order to parse this feed. @@ -2088,8 +1949,8 @@ server { ## Unsafe sites -* **Status:** on -* **Revision:** 09-03-2018 +* **Public:** yes +* **Revision:** 2018-03-09 * **Description:** Latest detected unsafe sites. ### Collector @@ -2111,8 +1972,9 @@ server { ## Defacements -* **Status:** on -* **Revision:** 20-01-2018 +* **Public:** unknown +* **Revision:** 2018-01-20 +* **Documentation:** https://zone-h.org/ * **Description:** all the information contained in Zone-H's cybercrime archive were either collected online from public sources or directly notified anonymously to us. ### Collector @@ -2120,7 +1982,7 @@ server { * **Module:** intelmq.bots.collectors.mail.collector_mail_attach * **Configuration Parameters:** * * `attach_regex`: `csv` -* * `attach_unzip`: `False` +* * `extract_files`: `False` * * `folder`: `INBOX` * * `mail_host`: `__HOST__` * * `mail_password`: `__PASSWORD__` diff --git a/docs/INSTALL.md b/docs/INSTALL.md index d3a41029b..e997b50fc 100644 --- a/docs/INSTALL.md +++ b/docs/INSTALL.md @@ -1,10 +1,9 @@ **Table of Contents:** - [Requirements](#requirements) - [Install Dependencies](#install-dependencies) - - [Debian 8](#debian-8) - - [Ubuntu 16.04 / Ubuntu 18.04 / Debian 9](#ubuntu-1604--ubuntu-1804--debian-9) + - [Ubuntu / Debian](#ubuntu--debian) - [CentOS 7 / RHEL 7](#centos-7--rhel-7) - - [openSUSE Leap 15.0 / 15.1](#opensuse-leap-150--151) + - [openSUSE Leap 15.1](#opensuse-leap-151) - [Installation](#installation) - [Native Packages](#native-packages) - [PyPi](#pypi) @@ -20,38 +19,21 @@ For testing pre-releases see also the [Developer's Guide](Developers-Guide.md#te # Requirements -The following instructions assume the following requirements: +The following instructions assume the following requirements. Python versions >= 3.5 are supported. Supported and recommended operating systems are: * CentOS 7 -* Debian 8, 9 and 10 -* OpenSUSE Leap 15.0 and 15.1 -* Ubuntu: 16.04, 18.04 and 19.04 +* Debian 9 and 10 +* OpenSUSE Leap 15.1 +* Ubuntu: 16.04, 18.04, 19.10, 20.04 -Other distributions which are (most probably) supported include RHEL, Fedora and openSUSE Tumbleweed. +Other distributions which are (most probably) supported include CentOS 8, RHEL, Fedora and openSUSE Tumbleweed. # Install Dependencies If you are using native packages, you can simply skip this section as all dependencies are installed automatically. -## Debian 8 - -```bash -apt-get install python3 python3-pip -apt-get install build-essential libffi-dev -apt-get install python3-dev -apt-get install redis-server -``` - -**Special note for Debian 8**: -if you are using Debian 8, you need to install this package extra: ``apt-get install libgnutls28-dev``. -In addition, Debian 8 has an old version of pip3. Please get a current one via: -```bash -curl "https://bootstrap.pypa.io/get-pip.py" -o "/tmp/get-pip.py" -python3.4 /tmp/get-pip.py -``` - -## Ubuntu 16.04 / Ubuntu 18.04 / Debian 9 +## Ubuntu / Debian ```bash apt install python3-pip python3-dnspython python3-psutil python3-redis python3-requests python3-termstyle python3-tz python3-dateutil @@ -68,18 +50,12 @@ apt install python3-sleekxmpp python3-pymongo python3-psycopg2 ```bash yum install epel-release -yum install python34 python34-devel +yum install python36 python36-devel python36-requests yum install gcc gcc-c++ yum install redis ``` -Install the last pip version: -```bash -curl "https://bootstrap.pypa.io/get-pip.py" -o "/tmp/get-pip.py" -python3.4 /tmp/get-pip.py -``` - -## openSUSE 15.0 / 15.1 +## openSUSE 15.1 ```bash zypper install python3-dateutil python3-dnspython python3-psutil python3-pytz python3-redis python3-requests python3-python-termstyle @@ -117,7 +93,8 @@ Supported Operating Systems: * **openSUSE Tumbleweed** * **Ubuntu 16.04** (enable the universe repositories by appending ` universe` in `/etc/apt/sources.list` to `deb http://[...].archive.ubuntu.com/ubuntu/ xenial main`) * **Ubuntu 18.04** (enable the universe repositories by appending ` universe` in `/etc/apt/sources.list` to `deb http://[...].archive.ubuntu.com/ubuntu/ bionic main`) -* **Ubuntu 19.04** (enable the universe repositories by appending ` universe` in `/etc/apt/sources.list` to `deb http://[...].archive.ubuntu.com/ubuntu/ disco main`) +* **Ubuntu 19.10** (enable the universe repositories by appending ` universe` in `/etc/apt/sources.list` to `deb http://[...].archive.ubuntu.com/ubuntu/ eoan main`) +* **Ubuntu 20.04** (enable the universe repositories by appending ` universe` in `/etc/apt/sources.list` to `deb http://[...].archive.ubuntu.com/ubuntu/ focal main`) Get the installation instructions for your operating system here: [Installation Native Packages](https://software.opensuse.org/download.html?project=home%3Asebix%3Aintelmq&package=intelmq). @@ -133,11 +110,11 @@ pip3 install intelmq useradd -d /opt/intelmq -U -s /bin/bash intelmq sudo intelmqsetup ``` - +`intelmqsetup` will create all necessary directories, provides a default configuration for new setups. See [the user-guide section on paths](User-Guide.md#opt-and-lsb-paths) for more information on them and how to influence them. ## Additional Information -Following any one of the installation methods mentioned before setup the IntelMQ base. However, some bots have additional dependencies which are mentioned in their own documentation available on this [directory](https://github.com/certtools/intelmq/tree/develop/intelmq/bots). +Following any one of the installation methods mentioned before, will setup the IntelMQ base. However, some bots may have additional dependencies which are mentioned in their own documentation available on in the [Bots documentation](https://github.com/certtools/intelmq/tree/develop/docs/Bots.md). # Afterwards diff --git a/docs/IntelMQ-3.0-Architecture.md b/docs/IntelMQ-3.0-Architecture.md new file mode 100644 index 000000000..f58329c82 --- /dev/null +++ b/docs/IntelMQ-3.0-Architecture.md @@ -0,0 +1,153 @@ +# Idea list and architecture of IntelMQ 3.0 + +Authors: Aaron Kaplan , Sebastian Wagner + +## Use-cases + +XXX fill in a complete list of use cases XXX + +### CERTs + +No direct access to networks in constituency. + +#### Data collection + +#### Distribution of information + +#### National CERT + +Work is based heavily on Geolocation + +#### Sector CERT + +Work is based on known constituents, sector information, lists of IP address ranges and domains, company & organisation names. + +### SOCs and NOCs + +Goal is the protection of internal known networks only. Direct access to the networks. + +Involves collecting information from internal infrastructure, matching IoCs to internal infrastructure, using IoCs for active protection. + +### Data science and research + +## Users + +XXX fill in a complete list of use cases XXX + +## RESTful API + +For automation purposes, we will need a typical RESTful API to manage, control, monitor the IntelMQ "botnet" and read and set configs. +See [#1424](https://github.com/certtools/intelmq/issues/1424) + +## UX + + + + +### Devops/ Sysadmin perspective + +#### Docker + +_Task_: create a setup where each bot MAY run in a docker container + +_Background_: It might make sense to be able to run each bot in a docker container since it fits with a lot of new paradigms in orchestration. +With a proper template, each bot running in a docker container could send its logs to some central logger (for example splunk or similar) and +the sysadmin/devops teams which are already using these systems for monitoring alerts can properly fit the IntelMQ logs and alerts to their regular daily routine. +Docker also allows the sysadmin/devops folks to centrally manage the system. + +_Think about_: how do we integrate the pipeline graph? + +_Category_: this feature should be OPTIONAL. + +#### Tutorials and VMs / dockers + +_Task_: create tutorials with VMs/docker images. + +_Background_: +We are missing good tutorials ("playbooks") on how to run certain workflows via IntelMQ. Ideally, we would offer ready-made VMs/docker images where people who want to +try out IntelMQ (and consequently adapt the setup to their own needs). This also helps teachers/presenters who want to demo IntelMQ. + +Specifically we would like to have: + * how to process shadowserver feeds + * how to process shodan data + * how to process n6 data + +_Think about_: shadowserver already created some training material. Build on this. + +_Category_: OPTIONAL component, but highly needed. + + +## Architecture + + + +### Message queue + +_Task_: Create a Kafka MQ backend: add Kafka as a replaceable MQ for IntelMQ 3.0 + +_Background_: IntelMQ 2.0 supports AMQP (RabbitMQ) next to redis as a message queue. Many organisations use Kafka internally. Support connecting to their other work flows. + +_Think about_: Using [Apache Pulsar](https://pulsar.apache.org/) + + +_Category_: SHOULD + + +## Notification settings + +_Task_: Keep notification settings per event: Where to (destination mail/host address), how (protocol, authentication (SSL client certificate), etc), how often/time information (intervals etc.) + +_Background_: CERTs (and potentially other groups of users) need to specify where the events should be sent to, how often etc. Currently only destination email addresses can be saved (`source.abuse_contact`), which is not enough for most use-cases. There exist some custom solutions (e.g. `notify` boolean at cert.at (to be changed), `extra.processing` dictionary at BSI), but no least common denominator. + +See also https://github.com/certtools/intelmq/issues/758 + +_Category_: this feature should be OPTIONAL but is NEEDED by several users. + + +## Configuration parameter handling in Bots and a bot's unified documentation + +_Task_: Handle bots' configuration parameters by the core, providing type sanitation, checks, default values and documentation. + +_Background_: Currently every bot needs to handle these issues itself, but many of these checks could be done centrally in a generic way. At upgrades, new configuration might get introduced and the bots need to provide defaults values although they are available in BOTS. Error handling on parameters must be done for every bot on itself. Documentation is not available to the Bots, not available in BOTS and the Manager. There are 3 places for parameters where the available information is spread: BOTS, `Bots.md` and the bots' code. + + +## Automatic Monitoring & Management: Handling full load situations + +_Task_: Create a solution to prevent system over-loading (only for Redis). + +_Background_: If too much data is ingested, collected or enriched, the system can easily run out of memory. This quickly causes major operation troubles and data loss, needing manual intervention. + +See also: https://github.com/certtools/intelmq/issues/709 + + +## Making intelmq plug-able and getting rid of BOTS + +_Task_: Allow installation of IntelMQ bots, meaning the deprecation of the centralized BOTS file and a generated documentation. + +_Background_: Adapting IntelMQ to specific needs also means the development of specific bots which might not part of the public repository. Adding them to an existing IntelMQ installation is currently only possible by cloning the repository and adding the code there, not by just providing/installing the required code (because of BOTS and central documentation). + +See also https://github.com/certtools/intelmq/issues/972 + + +## Exposing a plug-in or hooking API + +_Task_: Provide an hooking API for the core classes. + +_Background_: Adapting IntelMQ to specific can require adaptions in the Core classes' code. Instead of making the changes/extensions in the core itself, we can provide a hook system allowing to call (or replace?) functions at specific steps. For example custom monitoring. + + +## Grouping of events + +_Task_: Provide possibilities to assign an event to a group of events. + +_Background_: Several IoCs part of one MISP Event. Grouping of similar events to one group for outputs (e.g. one CSV file per Network). + +See also: https://github.com/certtools/intelmq/issues/751 + +## Data Format: Multiple values + +_Task_: Allow multiple values for (some) fields in the data format. + +_Background_: In some cases one value per field is not enough, for example for Domain -> IP address lookups. Other formats like IDEA and n6 support this. + +See also: https://github.com/certtools/intelmq/issues/543 https://github.com/certtools/intelmq/issues/373 diff --git a/docs/MISP-Integrations.md b/docs/MISP-Integrations.md new file mode 100644 index 000000000..e265f4507 --- /dev/null +++ b/docs/MISP-Integrations.md @@ -0,0 +1,27 @@ +# MISP integrations in IntelMQ + +## MISP API Collector + +The MISP API Collector fetches data from MISP via the MISP API. + +Look at the Bots' documentation for more information. + +## Coming soon: MISP Expert + +The MISP Expert will search MISP by API +for attributes/events matching data of the event. + +Look at the Bots' documentation for more information. + +## MISP Feed Output + +This bot creates a complete "MISP feed" ready to be configured in MISP as incoming data source. + +Look at the Bots' documentation for more information. + + +## MISP API Output + +Can be used to directly create MISP events in a MISP instance. + +Look at the Bots' documentation for more information. diff --git a/docs/Release.md b/docs/Release.md index 3d7dd2c9c..5b61d3e8e 100644 --- a/docs/Release.md +++ b/docs/Release.md @@ -10,13 +10,14 @@ - [Prepare new version](#prepare-new-version) -General assumption: You are working on branch maintenance, the next version is a bug fix release. For feature releaeses it is slightly different. +General assumption: You are working on branch maintenance, the next version is a bug fix release. For feature releases it is slightly different. ## Check before * Make sure the current state is really final ;) You can test most of the steps described here locally before doing it real. * Check the upgrade functions in `intelmq/lib/upgrades.py`. + * Close the milestone on GitHub and move any open issues to the next one. ## Documentation @@ -36,7 +37,7 @@ Why a separate branch? Because if problems show up, you can still force-push to Tag the commit with `git tag -s version HEAD`, merge it into master, push the branches *and* the tag. The tag is just `a.b.c`, not prefixed with `v` (that was necessary only with SVN a long time ago...). -Go to https://github.com/certtools/intelmq/tags and enter the release notes (changelog) for the new tag, then it's considered a release by github. +Go to https://github.com/certtools/intelmq/tags and enter the release notes (from the CHANGELOG) for the new tag, then it's considered a *release* by GitHub. ## Tarballs and PyPI @@ -63,8 +64,9 @@ For bigger releases, probably also at IHAP, Twitter, etc. Ask your favorite soci Increase the version in `intelmq/version.py` and declare it as alpha version. Add the new version in `intelmq/lib/upgrades.py`. +Add a new entry in `debian/changelog` with `dch -v [version] -c debian/changelog`. -Add a new empty changelog and news section. For the changelog: +Add new entries to `CHANGELOG.md` and `NEWS.md`. For `CHANGELOG.md`: ``` ### Configuration @@ -96,7 +98,7 @@ Add a new empty changelog and news section. For the changelog: ### Known issues ``` -And for the news: +And for `NEWS.md`: ``` ### Requirements diff --git a/docs/User-Guide.md b/docs/User-Guide.md index bf3b0aae5..cf278a67e 100644 --- a/docs/User-Guide.md +++ b/docs/User-Guide.md @@ -12,6 +12,7 @@ For upgrade instructions, see [UPGRADING.md](UPGRADING.md). - [Miscellaneous](#miscellaneous) - [Pipeline Configuration](#pipeline-configuration) - [Runtime Configuration](#runtime-configuration) + - [Multithreading (Beta)](#multithreading-beta) - [Harmonization Configuration](#harmonization-configuration) - [Utilities](#utilities) - [Management](#management) @@ -49,24 +50,20 @@ systemctl start redis.service ## /opt and LSB paths -If you installed the packages, LSB paths are used instead of `/opt/intelmq`. -Otherwise, the configuration directory is `/opt/intelmq/etc/`. +If you installed the packages, standard Linux paths (LSB paths) are used: `/var/log/intelmq/`, `/etc/intelmq/`, `/var/lib/intelmq/`, `/var/run/intelmq/`. +Otherwise, the configuration directory is `/opt/intelmq/etc/`. Using the environment variable `INTELMQ_ROOT_DIR` allows setting any arbitrary root directory. You can switch this by setting the environment variables `INTELMQ_PATHS_NO_OPT` and `INTELMQ_PATHS_OPT`, respectively. * When installing the Python packages, you can set `INTELMQ_PATHS_NO_OPT` to something non-empty to use LSB-paths. -* When installing the deb/rpm packages, you can set `INTELMQ_PATHS_OPT` to something non-empty to use `/opt/` paths. +* When installing the deb/rpm packages, you can set `INTELMQ_PATHS_OPT` to something non-empty to use `/opt/intelmq/` paths, or a path set with `INTELMQ_ROOT_DIR`. + +The environment variable `ROOT_DIR` is meant to set an alternative root directory instead of `/`. This is primarily meant for package build environments an analogous to setuptools' `--root` parameter. Thus it is only used in LSB-mode. ## Overview -All files are JSON. By -default, the installation method puts its distributed configuration files into -`etc/examples`, so it does not overwrite your local configuration. Prior to the -first run, copy them to `etc`: +All configuration files are in the JSON format. +For new installations a default setup with some examples is provided by the `intelmqsetup` tool. If this is not the case, make sure the program was run (see installation instructions). -```bash -cd /opt/intelmq/etc -cp -a examples/* . -``` * `defaults.conf`: default values for all bots and their behavior, e.g. error handling, log options and pipeline configuration. Will be removed in the [future](https://github.com/certtools/intelmq/issues/267). @@ -79,7 +76,7 @@ To configure a new bot, you need to define and configure it in `runtime.conf` us Configure source and destination queues in `pipeline.conf`. Use the IntelMQ Manager mentioned above to generate the configuration files if unsure. -In the shipped examples 4 collectors and parsers, 6 common experts and one output are configured. The default collector and the parser handle data from malware domain list, the file output bot writes all data to `/opt/intelmq/var/lib/bots/file-output/events.txt`. +In the shipped examples 4 collectors and parsers, 6 common experts and one output are configured. The default collector and the parser handle data from malware domain list, the file output bot writes all data to `/opt/intelmq/var/lib/bots/file-output/events.txt`/`/var/lib/intelmq/bots/file-output/events.txt`. ## System Configuration (defaults) @@ -89,7 +86,7 @@ Example: * `logging_handler`: Can be one of `"file"` or `"syslog"`. * `logging_level`: Defines the system-wide log level that will be use by all bots and the intelmqctl tool. Possible values are: `"CRITICAL"`, `"ERROR"`, `"WARNING"`, `"INFO"` and `"DEBUG"`. -* `logging_path`: If `logging_handler` is `file`. Defines the system-wide log-folder that will be use by all bots and the intelmqctl tool. Default value: `/opt/intelmq/var/log/` +* `logging_path`: If `logging_handler` is `file`. Defines the system-wide log-folder that will be use by all bots and the intelmqctl tool. Default value: `/opt/intelmq/var/log/`/`/opt/var/log/intelmq/`. * `logging_syslog`: If `logging_handler` is `syslog`. Either a list with hostname and UDP port of syslog service, e.g. `["localhost", 514]` or a device name/path, e.g. the default `"/var/log"`. We recommend `logging_level` `WARNING` for production environments and `INFO` if you want more details. In any case, watch your free disk space. @@ -124,6 +121,7 @@ If the path `_on_error` exists for a bot, the message is also sent to this queue * **`load_balance`** - this option allows you to choose the behavior of the queue. Use the following values: * **`true`** - splits the messages into several queues without duplication * **`false`** - duplicates the messages into each queue + * When using AMQP as message broker, take a look at the [Multithreading](#multithreading-beta) section and the `instances_threads` parameter. * **`broker`** - select which broker intelmq can use. Use the following values: * **`redis`** - Redis allows some persistence but is not so fast as ZeroMQ (in development). But note that persistence has to be manually activated. See http://redis.io/topics/persistence @@ -182,7 +180,7 @@ supervisor.rpcinterface_factory=supervisor_twiddler.rpcinterface:make_twiddler_r [group:intelmq] ``` -Change IntelMQ process manager in `/opt/intelmq/etc/defaults.conf`: +Change IntelMQ process manager in the *defaults* configuration: ``` "process_manager": "supervisor", @@ -438,106 +436,37 @@ See the [IntelMQ Manager repository](https://github.com/certtools/intelmq-manage ### Command-line interface: intelmqctl -**Syntax:** +**Syntax** see `intelmqctl -h` -```bash -# su - intelmq -$ intelmqctl -h -usage: intelmqctl [-h] [-v] [--type {text,json}] [--quiet] - {list,check,clear,log,run,help,start,stop,restart,reload,status,enable,disable} - ... +* Starting a bot: `intelmqctl start bot-id` +* Stopping a bot: `intelmqctl stop bot-id` +* Reloading a bot: `intelmqctl reload bot-id` +* Restarting a bot: `intelmqctl restart bot-id` +* Get status of a bot: `intelmqctl status bot-id` - description: intelmqctl is the tool to control intelmq system. +* Run a bot directly for debugging purpose and temporarily leverage the logging level to DEBUG: `intelmqctl run bot-id` +* Get a pdb (or ipdb if installed) live console. `intelmqctl run bot-id console` +* See the message that waits in the input queue. `intelmqctl run bot-id message get` +* See additional help for further explanation. `intelmqctl run bot-id --help` - Outputs are logged to /opt/intelmq/var/log/intelmqctl +* Starting the botnet (all bots): `intelmqctl start` +* Starting a group of bots: `intelmqctl start --group experts` + +* Get a list of all configured bots: `intelmqctl list bots` +* Get a list of all queues: `intelmqctl list queues` + If -q is given, only queues with more than one item are listed. +* Get a list of all queues and status of the bots: `intelmqctl list queues-and-status` + +* Clear a queue: `intelmqctl clear queue-id` +* Get logs of a bot: `intelmqctl log bot-id number-of-lines log-level` + Reads the last lines from bot log. + Log level should be one of DEBUG, INFO, ERROR or CRITICAL. + Default is INFO. Number of lines defaults to 10, -1 gives all. Result + can be longer due to our logging format! + +* Upgrade from a previous version: `intelmqctl upgrade-config` + Make a backup of your configuration first, also including bot's configuration files. -optional arguments: - -h, --help show this help message and exit - -v, --version show program's version number and exit - --type {text,json}, -t {text,json} - choose if it should return regular text or other - machine-readable - --quiet, -q Quiet mode, useful for reloads initiated scripts like - logrotate - -subcommands: - {list,check,clear,log,run,help,start,stop,restart,reload,status,enable,disable} - list Listing bots or queues - check Check installation and configuration - clear Clear a queue - log Get last log lines of a bot - run Run a bot interactively - check Check installation and configuration - help Show the help - start Start a bot or botnet - stop Stop a bot or botnet - restart Restart a bot or botnet - reload Reload a bot or botnet - status Status of a bot or botnet - enable Enable a bot - disable Disable a bot - - intelmqctl [start|stop|restart|status|reload] --group [collectors|parsers|experts|outputs] - intelmqctl [start|stop|restart|status|reload] bot-id - intelmqctl [start|stop|restart|status|reload] - intelmqctl list [bots|queues|queues-and-status] - intelmqctl log bot-id [number-of-lines [log-level]] - intelmqctl run bot-id message [get|pop|send] - intelmqctl run bot-id process [--msg|--dryrun] - intelmqctl run bot-id console - intelmqctl clear queue-id - intelmqctl check - -Starting a bot: - intelmqctl start bot-id -Stopping a bot: - intelmqctl stop bot-id -Reloading a bot: - intelmqctl reload bot-id -Restarting a bot: - intelmqctl restart bot-id -Get status of a bot: - intelmqctl status bot-id - -Run a bot directly for debugging purpose and temporarily leverage the logging level to DEBUG: - intelmqctl run bot-id -Get a pdb (or ipdb if installed) live console. - intelmqctl run bot-id console -See the message that waits in the input queue. - intelmqctl run bot-id message get -See additional help for further explanation. - intelmqctl run bot-id --help - -Starting the botnet (all bots): - intelmqctl start - etc. - -Starting a group of bots: - intelmqctl start --group experts - etc. - -Get a list of all configured bots: - intelmqctl list bots - -Get a list of all queues: - intelmqctl list queues -If -q is given, only queues with more than one item are listed. - -Get a list of all queues and status of the bots: - intelmqctl list queues-and-status - -Clear a queue: - intelmqctl clear queue-id - -Get logs of a bot: - intelmqctl log bot-id number-of-lines log-level -Reads the last lines from bot log. -Log level should be one of DEBUG, INFO, ERROR or CRITICAL. -Default is INFO. Number of lines defaults to 10, -1 gives all. Result -can be longer due to our logging format! - -Outputs are additionally logged to /opt/intelmq/var/log/intelmqctl -``` #### Botnet Concept @@ -620,7 +549,7 @@ redis-cli FLUSHALL ### Tool: intelmqdump -When bots are failing due to bad input data or programming errors, they can dump the problematic message to a file along with a traceback, if configured accordingly. These dumps are saved at `/opt/intelmq/var/log/[botid].dump` as JSON files. IntelMQ comes with an inspection and reinjection tool, called `intelmqdump`. It is an interactive tool to show all dumped files and the number of dumps per file. Choose a file by bot-id or listed numeric id. You can then choose to delete single entries from the file with `e 1,3,4`, show a message in more readable format with `s 1` (prints the raw-message, can be long!), recover some messages and put them back in the pipeline for the bot by `a` or `r 0,4,5`. Or delete the file with all dumped messages using `d`. +When bots are failing due to bad input data or programming errors, they can dump the problematic message to a file along with a traceback, if configured accordingly. These dumps are saved at in the logging directory as `[botid].dump` as JSON files. IntelMQ comes with an inspection and reinjection tool, called `intelmqdump`. It is an interactive tool to show all dumped files and the number of dumps per file. Choose a file by bot-id or listed numeric id. You can then choose to delete single entries from the file with `e 1,3,4`, show a message in more readable format with `s 1` (prints the raw-message, can be long!), recover some messages and put them back in the pipeline for the bot by `a` or `r 0,4,5`. Or delete the file with all dumped messages using `d`. ```bash $ intelmqdump -h @@ -637,6 +566,8 @@ positional arguments: optional arguments: -h, --help show this help message and exit + --truncate TRUNCATE, -t TRUNCATE + Truncate raw-data with more characters than given. 0 for no truncating. Default: 1000. Interactive actions after a file has been selected: - r, Recover by IDs @@ -691,12 +622,15 @@ Deleted file /opt/intelmq/var/log/dragon-research-group-ssh-parser.dump Bots and the intelmqdump tool use file locks to prevent writing to already opened files. Bots are trying to lock the file for up to 60 seconds if the dump file is locked already by another process (intelmqdump) and then give up. Intelmqdump does not wait and instead only shows an error message. +By default, the `show` command truncates the `raw` field of messages at 1000 characters to change this limit or disable truncating at all (value 0), use the `--truncate` parameter. + ## Monitoring Logs -All bots and `intelmqctl` log to `/opt/intelmq/var/log/`. In case of failures, messages are dumped to the same directory with the file ending `.dump`. +All bots and `intelmqctl` log to `/opt/intelmq/var/log/`/`var/log/intelmq/` (depending on your installation). In case of failures, messages are dumped to the same directory with the file ending `.dump`. ```bash tail -f /opt/intelmq/var/log/*.log +tail -f /var/log/intelmq/*.log ``` # Uninstall diff --git a/docs/intelmqctl.md b/docs/intelmqctl.md index 78918237c..b8559074f 100644 --- a/docs/intelmqctl.md +++ b/docs/intelmqctl.md @@ -239,8 +239,10 @@ If you wish to display the processed message as well, you the **--show-sent|-s** ### disable -Sets the `enabled` flag in runtime.conf to `false`. -Assume the bot is now enabled (default for all bots). +Sets the `enabled` flag in the runtime configuration of the bot to `false`. +By default, all bots are enabled. + +Example output: ```bash > intelmqctl status file-output @@ -252,8 +254,9 @@ intelmqctl: file-output is disabled. ### enable -Ensures that the `enabled` flag in runtime.conf is not set to `false`. -Assume that the bot is now dibbled. +Sets the `enabled` flag in the runtime configuration of the bot to `true`. + +Example output: ```bash > intelmqctl status file-output diff --git a/intelmq/__init__.py b/intelmq/__init__.py index b3e36735d..f2bfcdcc0 100644 --- a/intelmq/__init__.py +++ b/intelmq/__init__.py @@ -1,29 +1,33 @@ from .version import __version__, __version_info__ import os -if os.getenv('INTELMQ_PATHS_NO_OPT', False): - ROOT_DIR = "" - CONFIG_DIR = "/etc/intelmq/" - DEFAULT_LOGGING_LEVEL = "INFO" - BOTS_FILE = os.path.join(CONFIG_DIR, "BOTS") - DEFAULT_LOGGING_PATH = "/var/log/intelmq/" - DEFAULTS_CONF_FILE = os.path.join(CONFIG_DIR, "defaults.conf") - HARMONIZATION_CONF_FILE = os.path.join(CONFIG_DIR, "harmonization.conf") - PIPELINE_CONF_FILE = os.path.join(CONFIG_DIR, "pipeline.conf") - RUNTIME_CONF_FILE = os.path.join(CONFIG_DIR, "runtime.conf") - VAR_RUN_PATH = "/var/run/intelmq/" - VAR_STATE_PATH = "/var/lib/intelmq/bots/" -else: - ROOT_DIR = "/opt/intelmq/" + +path = "opt" +if os.getenv("INTELMQ_ROOT_DIR", False): + path = "opt" +elif os.getenv('INTELMQ_PATHS_NO_OPT', False): + path = "lsb" + + +if path == "lsb": + ROOT_DIR = os.getenv("ROOT_DIR", "/") + CONFIG_DIR = os.path.join(ROOT_DIR, "etc/intelmq/") + DEFAULT_LOGGING_PATH = os.path.join(ROOT_DIR, "var/log/intelmq/") + VAR_RUN_PATH = os.path.join(ROOT_DIR, "var/run/intelmq/") + VAR_STATE_PATH = os.path.join(ROOT_DIR, "var/lib/intelmq/bots/") +elif path == "opt": + ROOT_DIR = os.getenv("INTELMQ_ROOT_DIR", "/opt/intelmq/") CONFIG_DIR = os.path.join(ROOT_DIR, "etc/") - DEFAULT_LOGGING_LEVEL = "INFO" - BOTS_FILE = os.path.join(CONFIG_DIR, "BOTS") DEFAULT_LOGGING_PATH = os.path.join(ROOT_DIR, "var/log/") - DEFAULTS_CONF_FILE = os.path.join(CONFIG_DIR, "defaults.conf") - HARMONIZATION_CONF_FILE = os.path.join(CONFIG_DIR, "harmonization.conf") - PIPELINE_CONF_FILE = os.path.join(CONFIG_DIR, "pipeline.conf") - RUNTIME_CONF_FILE = os.path.join(CONFIG_DIR, "runtime.conf") VAR_RUN_PATH = os.path.join(ROOT_DIR, "var/run/") VAR_STATE_PATH = os.path.join(ROOT_DIR, "var/lib/bots/") -STATE_FILE_PATH = os.path.join(VAR_STATE_PATH, '../state.json') + +DEFAULT_LOGGING_LEVEL = "INFO" +DEFAULTS_CONF_FILE = os.path.join(CONFIG_DIR, "defaults.conf") +HARMONIZATION_CONF_FILE = os.path.join(CONFIG_DIR, "harmonization.conf") +PIPELINE_CONF_FILE = os.path.join(CONFIG_DIR, "pipeline.conf") +RUNTIME_CONF_FILE = os.path.join(CONFIG_DIR, "runtime.conf") +BOTS_FILE = os.path.join(CONFIG_DIR, "BOTS") +STATE_FILE_PATH = path = os.path.abspath(os.path.join(VAR_STATE_PATH, + '../state.json')) diff --git a/intelmq/bin/intelmq_gen_docs.py b/intelmq/bin/intelmq_gen_docs.py index a84d31ab0..a28e057ac 100755 --- a/intelmq/bin/intelmq_gen_docs.py +++ b/intelmq/bin/intelmq_gen_docs.py @@ -2,9 +2,9 @@ # -*- coding: utf-8 -*- import json +import os.path import textwrap -import pkg_resources import yaml import intelmq.lib.harmonization @@ -27,12 +27,13 @@ {} """ +BASEDIR = os.path.join(os.path.dirname(__file__), '../../') def harm_docs(): output = HEADER - with open(pkg_resources.resource_filename('intelmq', 'etc/harmonization.conf')) as fhandle: + with open(os.path.join(BASEDIR, 'intelmq/etc/harmonization.conf')) as fhandle: HARM = json.load(fhandle)['event'] for key, value in sorted(HARM.items()): @@ -67,7 +68,7 @@ def info(key, value=""): def feeds_docs(): - with open(pkg_resources.resource_filename('intelmq', 'etc/feeds.yaml')) as fhandle: + with open(os.path.join(BASEDIR, 'intelmq/etc/feeds.yaml')) as fhandle: config = yaml.safe_load(fhandle.read()) toc = "" @@ -99,10 +100,10 @@ def feeds_docs(): output += "## %s\n\n" % feed - if feed_info['status']: - output += info("status", "on") + if feed_info['public']: + output += info("public", "yes" if feed_info['public'] else "no") else: - output += info("status", "off") + output += info("public", "unknown") output += info("revision", feed_info['revision']) @@ -123,7 +124,7 @@ def feeds_docs(): output += info("Module", bot_info['module']) output += info("Configuration Parameters") - if bot_info['parameters']: + if bot_info.get('parameters'): for key, value in sorted(bot_info['parameters'].items(), key=lambda x: x[0]): if value == "__FEED__": @@ -142,7 +143,7 @@ def feeds_docs(): if __name__ == '__main__': # pragma: no cover - with open(pkg_resources.resource_filename('intelmq', '../docs/Harmonization-fields.md'), 'w') as handle: + with open(os.path.join(BASEDIR, 'docs/Harmonization-fields.md'), 'w') as handle: handle.write(harm_docs()) - with open(pkg_resources.resource_filename('intelmq', '../docs/Feeds.md'), 'w') as handle: + with open(os.path.join(BASEDIR, 'docs/Feeds.md'), 'w') as handle: handle.write(feeds_docs()) diff --git a/intelmq/bin/intelmq_generate_misp_objects_templates.py b/intelmq/bin/intelmq_generate_misp_objects_templates.py new file mode 100755 index 000000000..4b51e13cc --- /dev/null +++ b/intelmq/bin/intelmq_generate_misp_objects_templates.py @@ -0,0 +1,125 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Generates a MISP object template +see https://github.com/MISP/misp-objects/ +""" +import json +from pathlib import Path +from uuid import uuid4 +import argparse + + +class MISPObjectTemplateGenerator: + + def __init__(self, object_templates_path: Path, harmonization_file_path: Path): + intelmq_event_template_name = 'intelmq_event' + intelmq_report_template_name = 'intelmq_report' + event_template_dir = object_templates_path / 'objects' / intelmq_event_template_name + report_template_dir = object_templates_path / 'objects' / intelmq_report_template_name + event_template_dir.mkdir(exist_ok=True) + report_template_dir.mkdir(exist_ok=True) + + self.event_template_path = event_template_dir / 'definition.json' + if self.event_template_path.exists(): + with self.event_template_path.open() as f: + self.misp_object_intelmq_event = json.load(f) + self.misp_object_intelmq_event['version'] += 1 + else: + self.misp_object_intelmq_event = { + 'name': intelmq_event_template_name, + 'uuid': str(uuid4()), + 'meta-category': 'network', + 'description': 'IntelMQ Event', + 'version': 1, + 'attributes': {} + } + + self.report_template_path = report_template_dir / 'definition.json' + if self.report_template_path.exists(): + with self.report_template_path.open() as f: + self.misp_object_intelmq_report = json.load(f) + self.misp_object_intelmq_report['version'] += 1 + else: + self.misp_object_intelmq_report = { + 'name': intelmq_report_template_name, + 'uuid': str(uuid4()), + 'meta-category': 'network', + 'description': 'IntelMQ Report', + 'version': 1, + 'attributes': {} + } + + with harmonization_file_path.open() as f: + self.intelmq_fields = json.load(f) + + def _intelmq_misp_mapping(self, content, object_relation): + attribute = {'description': content['description'], 'ui-priority': 1} + if content['type'] in ['String', 'LowercaseString', 'ClassificationType', + 'UppercaseString', 'Registry', 'JSONDict', 'JSON', + 'TLP', 'Base64']: + attribute['misp-attribute'] = 'text' + elif content['type'] == 'DateTime': + attribute['misp-attribute'] = 'datetime' + elif content['type'] == 'ASN': + attribute['misp-attribute'] = 'AS' + elif content['type'] == 'FQDN': + attribute['misp-attribute'] = 'text' + elif content['type'] == 'Float': + attribute['misp-attribute'] = 'float' + elif (content['type'] in ['IPAddress', 'IPNetwork'] and + object_relation.startswith('destination')): + attribute['misp-attribute'] = 'ip-dst' + elif (content['type'] in ['IPAddress', 'IPNetwork'] and + object_relation.startswith('source')): + attribute['misp-attribute'] = 'ip-src' + elif content['type'] == 'Integer': + attribute['misp-attribute'] = 'counter' + elif content['type'] == 'Boolean': + attribute['misp-attribute'] = 'boolean' + elif content['type'] == 'URL': + attribute['misp-attribute'] = 'url' + elif content['type'] == 'Accuracy': + attribute['misp-attribute'] = 'float' + else: + raise Exception('Unknown type {content["type"]}: {object_relation} - {content}') + return attribute + + def generate_templates(self): + for object_relation, content in self.intelmq_fields['event'].items(): + self.misp_object_intelmq_event['attributes'].update( + {object_relation: self._intelmq_misp_mapping(content, object_relation)} + ) + + for object_relation, content in self.intelmq_fields['report'].items(): + self.misp_object_intelmq_report['attributes'].update( + {object_relation: self._intelmq_misp_mapping(content, object_relation)} + ) + + def dump_templates(self): + with self.event_template_path.open('w') as f: + json.dump(self.misp_object_intelmq_event, f, indent=2, sort_keys=True) + with self.report_template_path.open('w') as f: + json.dump(self.misp_object_intelmq_report, f, indent=2, sort_keys=True) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Generate or update MISP object templates.') + parser.add_argument("--objects", required=True, + help="Path to misp-objects repository. " + "The generated template will be written to this path or updated in this path.") + parser.add_argument("--harmonization", required=True, + help="Path to harmonization.conf to be used.") + args = parser.parse_args() + + objects = Path(args.objects) + if not objects.exists(): + raise Exception('Path to misp-objects repository does not exists: {args.objects}'.format(args=args)) + + harmonization_file = Path(args.harmonization) + if not harmonization_file.exists(): + raise Exception('Path to harmonization configuration does not exists: {args.harmonization}'.format(args=args)) + + g = MISPObjectTemplateGenerator(objects, harmonization_file) + g.generate_templates() + g.dump_templates() diff --git a/intelmq/bin/intelmq_psql_initdb.py b/intelmq/bin/intelmq_psql_initdb.py index 716dce724..970b04d19 100644 --- a/intelmq/bin/intelmq_psql_initdb.py +++ b/intelmq/bin/intelmq_psql_initdb.py @@ -3,8 +3,7 @@ """ Generates a SQL command file with commands to create the events table. -Reads the harmonization configuration from -`/opt/intelmq/etc/harmonization.conf` and generates an SQL command from it. +Reads the harmonization configuration and generates an SQL command from it. The SQL file is saved in `/tmp/initdb.sql` or a temporary name if the other one exists. """ diff --git a/intelmq/bin/intelmqctl.py b/intelmq/bin/intelmqctl.py index eb0979384..11d47addd 100644 --- a/intelmq/bin/intelmqctl.py +++ b/intelmq/bin/intelmqctl.py @@ -10,7 +10,6 @@ import logging import os import re -import shutil import signal import socket import subprocess @@ -22,17 +21,24 @@ from collections import OrderedDict import pkg_resources -import psutil -from termstyle import red, green +from termstyle import green from intelmq import (BOTS_FILE, DEFAULT_LOGGING_LEVEL, DEFAULTS_CONF_FILE, HARMONIZATION_CONF_FILE, PIPELINE_CONF_FILE, RUNTIME_CONF_FILE, VAR_RUN_PATH, STATE_FILE_PATH, - __version_info__) + DEFAULT_LOGGING_PATH, __version_info__, + CONFIG_DIR, ROOT_DIR) from intelmq.lib import utils from intelmq.lib.bot_debugger import BotDebugger +from intelmq.lib.exceptions import MissingDependencyError from intelmq.lib.pipeline import PipelineFactory import intelmq.lib.upgrades as upgrades +from typing import Union, Iterable + +try: + import psutil +except ImportError: + psutil = None class Parameters(object): @@ -61,13 +67,14 @@ class Parameters(object): } ERROR_MESSAGES = { - 'starting': red('Bot %s failed to START.'), - 'running': red('Bot %s is still running.'), + 'starting': 'Bot %s failed to START.', + 'running': 'Bot %s is still running.', 'stopped': 'Bot %s was NOT RUNNING.', - 'stopping': red('Bot %s failed to STOP.'), - 'not found': red('Bot %s failed to START because the file cannot be found.'), - 'access denied': red('Bot %s failed to %s because of missing permissions.'), - 'unknown': red('Status of Bot %s is unknown: %r.'), + 'stopping': 'Bot %s failed to STOP.', + 'not found': ('Bot %s FAILED to start because the executable cannot be found. ' + 'Check your PATH variable and your the installation.'), + 'access denied': 'Bot %s failed to %s because of missing permissions.', + 'unknown': 'Status of Bot %s is unknown: %r.', } LOG_LEVEL = OrderedDict([ @@ -132,6 +139,9 @@ def __init__(self, runtime_configuration, logger, controller): self.logger = logger self.controller = controller + if psutil is None: + raise MissingDependencyError('psutil') + if not os.path.exists(self.PIDDIR): try: os.makedirs(self.PIDDIR) @@ -343,19 +353,49 @@ def __remove_pidfile(self, bot_id): filename = self.PIDFILE.format(bot_id) os.remove(filename) + @staticmethod + def _interpret_commandline(pid: int, cmdline: Iterable[str], + module: str, bot_id: str) -> Union[bool, str]: + """ + Separate function to allow easy testing + + Parameters + ---------- + pid : int + Process ID, used for return values (error messages) only. + cmdline : Iterable[str] + The command line of the process. + module : str + The module of the bot. + bot_id : str + The ID of the bot. + + Returns + ------- + Union[bool, str] + DESCRIPTION. + """ + if len(cmdline) > 2 and cmdline[1].endswith('/%s' % module): + if cmdline[2] == bot_id: + return True + else: + return False + elif (len(cmdline) > 3 and cmdline[1].endswith('/intelmqctl') and + cmdline[2] == 'run'): + if cmdline[3] == bot_id: + return True + else: + return False + elif len(cmdline) > 1: + return 'Commandline of the process %d with commandline %r could not be interpreted.' % (pid, cmdline) + else: + return 'Unhandled error checking the process %d with commandline %r.' % (pid, cmdline) + def __status_process(self, pid, module, bot_id): - which = shutil.which(module) - if not which: - return 'Could not get path to the excutable (%r). Check your PATH variable (%r).' % (module, os.environ.get('PATH')) try: proc = psutil.Process(int(pid)) - if len(proc.cmdline()) > 1 and proc.cmdline()[1] == shutil.which(module): - return True - elif (len(proc.cmdline()) > 3 and proc.cmdline()[1] == shutil.which('intelmqctl') and - proc.cmdline()[2] == 'run' and proc.cmdline()[3] == bot_id): - return True - elif len(proc.cmdline()) > 1: - return 'Commandline of the program %r does not match expected value %r.' % (proc.cmdline()[1], shutil.which(module)) + cmdline = proc.cmdline() + return IntelMQProcessManager._interpret_commandline(pid, cmdline, module, bot_id) except psutil.NoSuchProcess: return False except psutil.AccessDenied: @@ -628,7 +668,8 @@ def _abort(self, message: str): class IntelMQController(): - def __init__(self, interactive: bool = False, return_type: str = "python", quiet: bool = False) -> None: + def __init__(self, interactive: bool = False, return_type: str = "python", quiet: bool = False, + no_file_logging: bool = False, drop_privileges: bool = True) -> None: """ Initializes intelmqctl. @@ -639,6 +680,8 @@ def __init__(self, interactive: bool = False, return_type: str = "python", quiet 'text': user-friendly output for cli, default for interactive use 'json': machine-readable output for managers quiet: False by default, can be activated for cron jobs etc. + no_file_logging: do not log to the log file + drop_privileges: Drop privileges and fail if it did not work. """ self.interactive = interactive global RETURN_TYPE @@ -663,6 +706,8 @@ def __init__(self, interactive: bool = False, return_type: str = "python", quiet logging_level_stream = log_level if log_level == 'DEBUG' else 'INFO' try: + if no_file_logging: + raise FileNotFoundError logger = utils.log('intelmqctl', log_level=log_level, log_format_stream=utils.LOG_FORMAT_SIMPLE, logging_level_stream=logging_level_stream) @@ -676,8 +721,8 @@ def __init__(self, interactive: bool = False, return_type: str = "python", quiet self.logger.exception('Loading the defaults configuration failed!', exc_info=defaults_loading_exc) - if not utils.drop_privileges(): - logger.warning('Running intelmqctl as root is highly discouraged!') + if drop_privileges and not utils.drop_privileges(): + self.abort('IntelMQ must not run as root. Dropping privileges did not work.') APPNAME = "intelmqctl" try: @@ -689,7 +734,7 @@ def __init__(self, interactive: bool = False, return_type: str = "python", quiet DESCRIPTION = """ description: intelmqctl is the tool to control intelmq system. - Outputs are logged to /opt/intelmq/var/log/intelmqctl""" + Outputs are logged to %s/intelmqctl.log""" % DEFAULT_LOGGING_PATH EPILOG = ''' intelmqctl [start|stop|restart|status|reload] --group [collectors|parsers|experts|outputs] intelmqctl [start|stop|restart|status|reload] bot-id @@ -702,6 +747,7 @@ def __init__(self, interactive: bool = False, return_type: str = "python", quiet intelmqctl clear queue-id intelmqctl check intelmqctl upgrade-config + intelmqctl debug Starting a bot: intelmqctl start bot-id @@ -756,7 +802,10 @@ def __init__(self, interactive: bool = False, return_type: str = "python", quiet intelmqctl upgrade-config Make a backup of your configuration first, also including bot's configuration files. -Outputs are additionally logged to /opt/intelmq/var/log/intelmqctl''' +Get some debugging output on the settings and the enviroment (to be extended): + intelmqctl debug --get-paths + intelmqctl debug --get-environment-variables +''' # stolen functions from the bot file # this will not work with various instances of REDIS @@ -928,8 +977,24 @@ def __init__(self, interactive: bool = False, return_type: str = "python", quiet parser_upgrade_conf.add_argument('-f', '--force', action='store_true', help='Force running the upgrade procedure.') + parser_upgrade_conf.add_argument('--state-file', + help='The state file location to use.', + default=STATE_FILE_PATH) + parser_upgrade_conf.add_argument('--no-backup', + help='Do not create backups of state and configuration files.', + action='store_true') parser_upgrade_conf.set_defaults(func=self.upgrade_conf) + parser_debug = subparsers.add_parser('debug', help='Get debugging output.') + parser_debug.add_argument('--get-paths', help='Give all paths', + action='append_const', dest='sections', + const='paths') + parser_debug.add_argument('--get-environment-variables', + help='Give environment variables', + action='append_const', dest='sections', + const='environment_variables') + parser_debug.set_defaults(func=self.debug) + self.parser = parser def load_defaults_configuration(self, silent=False): @@ -1335,7 +1400,7 @@ def check(self, no_connections=False): with open(filename) as file_handle: files[filename] = json.load(file_handle) except (IOError, ValueError) as exc: # pragma: no cover - check_logger.error('Coud not load %r: %s.', filename, exc) + check_logger.error('Could not load %r: %s.', filename, exc) retval = 1 if retval: if RETURN_TYPE == 'json': @@ -1476,9 +1541,9 @@ def check(self, no_connections=False): if RETURN_TYPE == 'json': if retval: - return 0, {'status': 'error', 'lines': list_handler.buffer} + return 1, {'status': 'error', 'lines': list_handler.buffer} else: - return 1, {'status': 'success', 'lines': list_handler.buffer} + return 0, {'status': 'success', 'lines': list_handler.buffer} else: if retval: self.logger.error('Some issues have been found, please check the above output.') @@ -1487,7 +1552,9 @@ def check(self, no_connections=False): self.logger.info('No issues found.') return retval, 'success' - def upgrade_conf(self, previous=None, dry_run=None, function=None, force=None): + def upgrade_conf(self, previous=None, dry_run=None, function=None, + force=None, state_file: str = STATE_FILE_PATH, + no_backup=False): """ Upgrade the IntelMQ configuration after a version upgrade. @@ -1495,6 +1562,8 @@ def upgrade_conf(self, previous=None, dry_run=None, function=None, force=None): previous: Assume the given version as the previous version function: Only execute this upgrade function force: Also upgrade if not necessary + state_file: location of the state file + no_backup: Do not create backups of state and configuration files state file: @@ -1520,11 +1589,11 @@ def upgrade_conf(self, previous=None, dry_run=None, function=None, force=None): "time": "..."} ] """ - if os.path.isfile(STATE_FILE_PATH): - if not os.access(STATE_FILE_PATH, os.W_OK) and not dry_run: - self.logger.error("State file %r is not writable.") - return 1, "State file %r is not writable." - state = utils.load_configuration(STATE_FILE_PATH) + if os.path.isfile(state_file): + if not os.access(state_file, os.W_OK) and not dry_run: + self.logger.error("State file %r is not writable.", state_file) + return 1, "State file %r is not writable." % state_file + state = utils.load_configuration(state_file) else: """ We create the state file directly before any upgrade function. @@ -1536,15 +1605,14 @@ def upgrade_conf(self, previous=None, dry_run=None, function=None, force=None): "results": []} if dry_run: self.logger.info('Would create state file now at %r.', - STATE_FILE_PATH) + state_file) return 0, 'success' try: - utils.write_configuration(STATE_FILE_PATH, state, new=True) + utils.write_configuration(state_file, state, new=True) except Exception as exc: - self.logger.error('Error writing state file %r: %s.', STATE_FILE_PATH, exc) - return 1, 'Error writing state file %r: %s.' % (STATE_FILE_PATH, exc) - self.logger.error('Successfully wrote initial state file. Please re-run this program.') - return 0, 'success' + self.logger.error('Error writing state file %r: %s.', state_file, exc) + return 1, 'Error writing state file %r: %s.' % (state_file, exc) + self.logger.info('Successfully wrote initial state file.') defaults = utils.load_configuration(DEFAULTS_CONF_FILE) runtime = utils.load_configuration(RUNTIME_CONF_FILE) @@ -1571,9 +1639,12 @@ def upgrade_conf(self, previous=None, dry_run=None, function=None, force=None): upgrades, function)(defaults, runtime, harmonization, dry_run) # Handle changed configurations if retval is True and not dry_run: - utils.write_configuration(DEFAULTS_CONF_FILE, defaults_new) - utils.write_configuration(RUNTIME_CONF_FILE, runtime_new) - utils.write_configuration(HARMONIZATION_CONF_FILE, harmonization_new) + utils.write_configuration(DEFAULTS_CONF_FILE, defaults_new, + backup=not no_backup) + utils.write_configuration(RUNTIME_CONF_FILE, runtime_new, + backup=not no_backup) + utils.write_configuration(HARMONIZATION_CONF_FILE, harmonization_new, + backup=not no_backup) except Exception: self.logger.exception('Upgrade %r failed, please report this bug ' 'with the shown traceback.', @@ -1603,7 +1674,8 @@ def upgrade_conf(self, previous=None, dry_run=None, function=None, force=None): state['results'].append(result) state['upgrades'][function] = result['success'] if not dry_run: - utils.write_configuration(STATE_FILE_PATH, state) + utils.write_configuration(state_file, state, + backup=not no_backup) if result['success']: return 0, 'success' @@ -1638,7 +1710,7 @@ def upgrade_conf(self, previous=None, dry_run=None, function=None, force=None): if funcs: todo.append((version, funcs, False)) else: - self.logger.info("Found no previous version, doing all upgrades.") + self.logger.info("Found no previous version or forced, doing all upgrades.") todo = [(version, bunch, True) for version, bunch in upgrades.UPGRADES.items()] todo.extend([(None, (function, ), False) @@ -1666,7 +1738,8 @@ def upgrade_conf(self, previous=None, dry_run=None, function=None, force=None): # already performed continue - docstring = textwrap.dedent(function.__doc__).strip() + # shown text should have only one line + docstring = textwrap.dedent(function.__doc__).strip().replace('\n', ' ') result = {"function": function.__name__, "time": datetime.datetime.now().isoformat() } @@ -1717,7 +1790,8 @@ def upgrade_conf(self, previous=None, dry_run=None, function=None, force=None): if error: # some upgrade function had a problem if not dry_run: - utils.write_configuration(STATE_FILE_PATH, state) + utils.write_configuration(state_file, state, + backup=not no_backup) self.logger.error('Some migration did not succeed or ' 'manual intervention is needed. Look at ' 'the output above. Afterwards, re-run ' @@ -1725,9 +1799,12 @@ def upgrade_conf(self, previous=None, dry_run=None, function=None, force=None): try: if not dry_run: - utils.write_configuration(DEFAULTS_CONF_FILE, defaults) - utils.write_configuration(RUNTIME_CONF_FILE, runtime) - utils.write_configuration(HARMONIZATION_CONF_FILE, harmonization) + utils.write_configuration(DEFAULTS_CONF_FILE, defaults, + backup=not no_backup) + utils.write_configuration(RUNTIME_CONF_FILE, runtime, + backup=not no_backup) + utils.write_configuration(HARMONIZATION_CONF_FILE, harmonization, + backup=not no_backup) except Exception as exc: self.logger.error('Writing defaults or runtime configuration ' 'did not succeed: %s\nFix the problem and ' @@ -1742,13 +1819,46 @@ def upgrade_conf(self, previous=None, dry_run=None, function=None, force=None): self.logger.info('Nothing to do!') if not dry_run: - utils.write_configuration(STATE_FILE_PATH, state) + utils.write_configuration(state_file, state, + backup=not no_backup) if error: return 1, 'error' else: return 0, 'success' + def debug(self, sections=None): + """ + Give debugging output + get_paths: + print path information + """ + + output = {} + if sections is None or 'paths' in sections: + output['paths'] = [] + variables = globals() + if RETURN_TYPE == 'text': + print('Paths:') + for path in ('BOTS_FILE', 'DEFAULTS_CONF_FILE', + 'HARMONIZATION_CONF_FILE', 'PIPELINE_CONF_FILE', + 'RUNTIME_CONF_FILE', 'VAR_RUN_PATH', 'STATE_FILE_PATH', + 'DEFAULT_LOGGING_PATH', '__file__', + 'CONFIG_DIR', 'ROOT_DIR'): + output['paths'].append((path, variables[path])) + if RETURN_TYPE == 'text': + print('%s: %r' % output['paths'][-1]) + if sections is None or 'environment_variables' in sections: + output['environment_variables'] = [] + if RETURN_TYPE == 'text': + print('Environment variables:') + for variable in ('INTELMQ_ROOT_DIR', 'INTELMQ_PATHS_NO_OPT', + 'INTELMQ_PATHS_OPT', 'INTELMQ_MANAGER_CONTROLLER_CMD'): + output['environment_variables'].append((variable, os.getenv(variable))) + if RETURN_TYPE == 'text': + print('%s: %r' % output['environment_variables'][-1]) + return 0, output + def main(): # pragma: no cover x = IntelMQController(interactive=True) diff --git a/intelmq/bin/intelmqdump.py b/intelmq/bin/intelmqdump.py index 27856bd9c..3188bec58 100644 --- a/intelmq/bin/intelmqdump.py +++ b/intelmq/bin/intelmqdump.py @@ -3,6 +3,7 @@ """ """ import argparse +import base64 import copy import fcntl import glob @@ -59,6 +60,11 @@ > s 0,4,5 Show the selected IP in a readable format. It's still a raw format from repr, but with newlines for message and traceback. +- v, Edit by ID + > v id + > v 0 + > v 1,2 + Opens an editor (by calling `sensible-editor`) on the message. The modified message is then saved in the dump. - q, Quit > q """ @@ -72,7 +78,7 @@ 'd': ('(d)elete file', False, True), 's': ('(s)how by ids', True, False), 'q': ('(q)uit', False, True), - 'v': ('edit id', True, False), + 'v': ('edit id (v)', True, False), } AVAILABLE_IDS = [key for key, value in ACTIONS.items() if value[1]] @@ -173,7 +179,13 @@ def main(): parser.add_argument('botid', metavar='botid', nargs='?', default=None, help='botid to inspect dumps of') + parser.add_argument('--truncate', '-t', type=int, + default=1000, + help='Truncate raw-data with more characters than given. ' + '0 for no truncating. Default: 1000.') args = parser.parse_args() + if args.truncate < 1: + args.truncate = None # Try to get log_level from defaults_configuration, else use default try: @@ -368,12 +380,16 @@ def main(): continue print('=' * 100, '\nShowing id {} {}\n'.format(count, key), '-' * 50) - if isinstance(value['message'], (bytes, str)): - value['message'] = json.loads(value['message']) - if ('raw' in value['message'] and - len(value['message']['raw']) > 1000): - value['message']['raw'] = value['message'][ - 'raw'][:1000] + '...[truncated]' + if value.get('message_type') == 'base64': + if args.truncate and len(value['message']) > args.truncate: + value['message'] = value['message'][:args.truncate] + '...[truncated]' + else: + if isinstance(value['message'], (bytes, str)): + value['message'] = json.loads(value['message']) + if (args.truncate and 'raw' in value['message'] and + len(value['message']['raw']) > args.truncate): + value['message']['raw'] = value['message'][ + 'raw'][:args.truncate] + '...[truncated]' if type(value['traceback']) is not list: value['traceback'] = value['traceback'].splitlines() pprint.pprint(value) @@ -383,19 +399,40 @@ def main(): print(red('Edit mode needs an id')) continue for entry in ids: - with tempfile.NamedTemporaryFile(mode='w+t', suffix='.json') as tmphandle: - filename = tmphandle.name - utils.write_configuration(configuration_filepath=filename, - content=json.loads(content[meta[entry][0]]['message']), - new=True, - backup=False) - proc = subprocess.call(['sensible-editor', filename]) - if proc != 0: - print(red('Calling editor failed.')) - else: - tmphandle.seek(0) - content[meta[entry][0]]['message'] = tmphandle.read() - save_file(handle, content) + if content[meta[entry][0]].get('message_type') == 'base64': + with tempfile.NamedTemporaryFile(mode='w+b', suffix='.txt') as tmphandle: + filename = tmphandle.name + tmphandle.write(base64.b64decode(content[meta[entry][0]]['message'])) + tmphandle.flush() + proc = subprocess.run(['sensible-editor', filename]) + if proc.returncode != 0: + print(red('Calling editor failed with exitcode %r.' % proc.returncode)) + else: + tmphandle.seek(0) + new_content = tmphandle.read() + try: + new_content = new_content.decode() + except UnicodeDecodeError as exc: + print(red("Could not write the new message because of the following error:")) + print(red(exceptions.DecodingError(exception=exc))) + else: + del content[meta[entry][0]]['message_type'] + content[meta[entry][0]]['message'] = new_content + save_file(handle, content) + else: + with tempfile.NamedTemporaryFile(mode='w+t', suffix='.json') as tmphandle: + filename = tmphandle.name + utils.write_configuration(configuration_filepath=filename, + content=json.loads(content[meta[entry][0]]['message']), + new=True, + backup=False) + proc = subprocess.run(['sensible-editor', filename]) + if proc.returncode != 0: + print(red('Calling editor failed with exitcode %r.' % proc.returncode)) + else: + tmphandle.seek(0) + content[meta[entry][0]]['message'] = tmphandle.read() + save_file(handle, content) if delete_file: os.remove(fname) diff --git a/intelmq/bin/intelmqsetup.py b/intelmq/bin/intelmqsetup.py old mode 100644 new mode 100755 index 8a6d76b54..a5945fa44 --- a/intelmq/bin/intelmqsetup.py +++ b/intelmq/bin/intelmqsetup.py @@ -11,50 +11,41 @@ * providing example configuration files if not already existing Reasoning: -Pip does not (and cannot) create `/opt/intelmq`, as described in +Pip does not (and cannot) create `/opt/intelmq`/user-given ROOT_DIR, as described in https://github.com/certtools/intelmq/issues/819 """ +import argparse import glob import os import shutil -import site import sys +import pkg_resources from pwd import getpwuid from intelmq import (CONFIG_DIR, DEFAULT_LOGGING_PATH, ROOT_DIR, VAR_RUN_PATH, - VAR_STATE_PATH) + VAR_STATE_PATH, BOTS_FILE, STATE_FILE_PATH) +from intelmq.bin.intelmqctl import IntelMQController -def main(): - if os.geteuid() != 0: - sys.exit('You need to run this program as root.') +def intelmqsetup(ownership=True, state_file=STATE_FILE_PATH): + if os.geteuid() != 0 and ownership: + sys.exit('You need to run this program as root (for setting file ownership)') - if not ROOT_DIR.startswith('/opt/'): + if not ROOT_DIR: sys.exit('Not a pip-installation of IntelMQ, nothing to initialize.') - intelmq_path = os.path.join(site.getsitepackages()[0], 'opt/intelmq/') - opt_path = os.path.join(site.getsitepackages()[0], 'opt/') - if os.path.isdir(intelmq_path) and os.path.isdir(ROOT_DIR): - print('%r already exists, not moving %r there.' % (ROOT_DIR, - intelmq_path)) - elif os.path.isdir(intelmq_path): - shutil.move(intelmq_path, '/opt/') - print('Moved %r to %r.' % (intelmq_path, '/opt/')) - try: - os.rmdir(opt_path) - except OSError: - print('Directory %r is not empty, did not remove it.' % opt_path) - create_dirs = ('/opt/intelmq/var/lib/bots/file-output', - '/opt/intelmq/var/run', - '/opt/intelmq/var/log') + create_dirs = ('%s/file-output' % VAR_STATE_PATH, + VAR_RUN_PATH, + DEFAULT_LOGGING_PATH, + CONFIG_DIR) for create_dir in create_dirs: if not os.path.isdir(create_dir): os.makedirs(create_dir, mode=0o755, exist_ok=True) print('Created directory %r.' % create_dir) - example_confs = glob.glob(os.path.join(CONFIG_DIR, 'examples/*.conf')) + example_confs = glob.glob(pkg_resources.resource_filename('intelmq', 'etc/*.conf')) for example_conf in example_confs: fname = os.path.split(example_conf)[-1] if os.path.exists(os.path.join(CONFIG_DIR, fname)): @@ -63,11 +54,34 @@ def main(): shutil.copy(example_conf, CONFIG_DIR) print('Use example %r.' % fname) - print('Setting intelmq as owner for it\'s directories.') - for obj in (CONFIG_DIR, DEFAULT_LOGGING_PATH, ROOT_DIR, VAR_RUN_PATH, - VAR_STATE_PATH, VAR_STATE_PATH + 'file-output'): - if getpwuid(os.stat(obj).st_uid).pw_name != 'intelmq': - shutil.chown(obj, user='intelmq') + print('Writing BOTS file.') + shutil.copy(pkg_resources.resource_filename('intelmq', 'bots/BOTS'), + BOTS_FILE) + + if ownership: + print('Setting intelmq as owner for it\'s directories.') + for obj in (CONFIG_DIR, DEFAULT_LOGGING_PATH, ROOT_DIR, VAR_RUN_PATH, + VAR_STATE_PATH, VAR_STATE_PATH + 'file-output'): + if getpwuid(os.stat(obj).st_uid).pw_name != 'intelmq': + shutil.chown(obj, user='intelmq') + + print('Calling `intelmqctl upgrade-config to update/create state file') + controller = IntelMQController(interactive=False, no_file_logging=True, + drop_privileges=False) + controller.upgrade_conf(state_file=state_file, no_backup=True) + + +def main(): + parser = argparse.ArgumentParser("Set's up directories and example " + "configurations for IntelMQ.") + parser.add_argument('--skip-ownership', action='store_true', + help='Skip setting file ownership') + parser.add_argument('--state-file', + help='The state file location to use.', + default=STATE_FILE_PATH) + args = parser.parse_args() + intelmqsetup(ownership=not args.skip_ownership, + state_file=args.state_file) if __name__ == '__main__': diff --git a/intelmq/bots/BOTS b/intelmq/bots/BOTS index 1ad7ecf9b..1d5d32e78 100755 --- a/intelmq/bots/BOTS +++ b/intelmq/bots/BOTS @@ -1,7 +1,7 @@ { "Collector": { "AMQP": { - "description": "Collecting from a (remote) AMQP Server and fetching either intelmq or any other messages.", + "description": "Collecting from a (remote) AMQP Server and fetching either intelmq or any other messages. Requires the pika python library.", "module": "intelmq.bots.collectors.amqp.collector_amqp", "parameters": { "connection_attempts": 3, @@ -54,7 +54,7 @@ "module": "intelmq.bots.collectors.mail.collector_mail_attach", "parameters": { "attach_regex": "csv.zip", - "attach_unzip": true, + "extract_files": true, "folder": "INBOX", "mail_host": "", "mail_password": "", @@ -137,10 +137,9 @@ "module": "intelmq.bots.collectors.misp.collector", "parameters": { "misp_key": "", - "misp_tag_processed": "", + "misp_tag_processed": "", "misp_tag_to_process": "", "misp_url": "", - "misp_verify": true, "name": "__FEED__", "provider": "__PROVIDER__", "rate_limit": 3600 @@ -184,7 +183,7 @@ } }, "TCP": { - "description": "TCP is the bot responsible to receive events on a TCP port (ex: from TCP Output of another IntelMQ instance). Might not be working on Python3.4.6.", + "description": "TCP is the bot responsible to receive events on a TCP port (ex: from TCP Output of another IntelMQ instance).", "module": "intelmq.bots.collectors.tcp.collector", "parameters": { "ip": "", @@ -250,12 +249,16 @@ "description": "", "module": "intelmq.bots.collectors.microsoft.collector_azure", "parameters": { - "account_key": "", - "account_name": "", - "delete": true, + "connection_string": "", + "container_name": "", "name": "Azure", "provider": "Microsoft", - "rate_limit": 3600 + "rate_limit": 3600, + "redis_cache_db": "5", + "redis_cache_host": "127.0.0.1", + "redis_cache_password": null, + "redis_cache_port": 6379, + "redis_cache_ttl": 864000 } }, "Microsoft Interflow": { @@ -318,6 +321,18 @@ "timelimit": "", "tweet_count": "" } + }, + "Github API": { + "description": "Collects files from github repository via the Github API. Optionally with github credentials.", + "module": "intelmq.bots.collectors.github_api.collector_github_contents_api", + "parameters": { + "name": "__FEED__", + "provider": "__PROVIDER__", + "basic_auth_username": "__USERNAME__", + "basic_auth_password": "__PASSWORD__", + "repository": "StrangerealIntel/DailyIOC", + "regex": ".*.json" + } } }, "Parser": { @@ -331,11 +346,6 @@ "module": "intelmq.bots.parsers.abusech.parser_ip", "parameters": {} }, - "Abuse.ch Ransomware": { - "description": "Abuse.ch Ransomware Parser is the bot responsible to parse the report and sanitize the information.", - "module": "intelmq.bots.parsers.abusech.parser_ransomware", - "parameters": {} - }, "AlienVault": { "description": "AlienVault Parser is the bot responsible to parse the report and sanitize the information.", "module": "intelmq.bots.parsers.alienvault.parser", @@ -349,7 +359,9 @@ "AnubisNetworks Cyberfeed Stream": { "description": "Parses single JSON-events from AnubisNetworks Cyberfeed stream.", "module": "intelmq.bots.parsers.anubisnetworks.parser", - "parameters": {} + "parameters": { + "use_malware_familiy_as_classification_identifier": true + } }, "Autoshun": { "description": "Autoshun Parser is the bot responsible to parse the report and sanitize the information.", @@ -361,11 +373,6 @@ "module": "intelmq.bots.parsers.bambenek.parser", "parameters": {} }, - "Bitcash Blocklist Feed": { - "description": "Bitcash Blocklist parser is the bot responsible to parse and sanitize the information.", - "module": "intelmq.bots.parsers.bitcash.parser", - "parameters": {} - }, "Blocklist.de": { "description": "Blocklist.DE Parser is the bot responsible to parse the report and sanitize the information.", "module": "intelmq.bots.parsers.blocklistde.parser", @@ -436,18 +443,6 @@ "module": "intelmq.bots.parsers.dyn.parser", "parameters": {} }, - "Fraunhofer DDoS Attack C&C": { - "description": "Fraunhofer DDoS Attack C&C Feed Parser is the bot responsible to parse the report and sanitize the information.", - "module": "intelmq.bots.parsers.fraunhofer.parser_ddosattack_cnc", - "parameters": { - "unknown_messagetype_accuracy": 10.0 - } - }, - "Fraunhofer DDoS Attack Target": { - "description": "Fraunhofer DDoS Attack Target Feed Parser is the bot responsible to parse the report and sanitize the information.", - "module": "intelmq.bots.parsers.fraunhofer.parser_ddosattack_target", - "parameters": {} - }, "Fraunhofer DGA": { "description": "Fraunhofer DGA Parser is the bot responsible to parse the report and sanitize the information.", "module": "intelmq.bots.parsers.fraunhofer.parser_dga", @@ -472,6 +467,11 @@ "type_translation": null } }, + "Github feed": { + "description": "Github feed parser dependent of the feed URL. Parses only known github feeds.", + "module": "intelmq.bots.parsers.github_feed.parser", + "parameters": {} + }, "HTML Table": { "description": "HTML Table Parser is a bot configurable to parse different html table data.", "module": "intelmq.bots.parsers.html_table.parser", @@ -582,11 +582,6 @@ "module": "intelmq.bots.parsers.netlab_360.parser", "parameters": {} }, - "Nothink": { - "description": "Nothink Feed Parser is the bot responsible to parse the SNMP, SSH, Telnet, and DNS Attack reports and sanitize the information.", - "module": "intelmq.bots.parsers.nothink.parser", - "parameters": {} - }, "OpenPhish": { "description": "OpenPhish Parser is the bot responsible to parse the report and sanitize the information.", "module": "intelmq.bots.parsers.openphish.parser", @@ -641,9 +636,7 @@ "Taichung": { "description": "Taichung Parser is the bot responsible to parse the report and sanitize the information.", "module": "intelmq.bots.parsers.taichung.parser", - "parameters": { - "error_log_message": false - } + "parameters": {} }, "Threatminer": { "description": "Threatminer Parser is the bot responsible to parse the report and sanitize the information.", @@ -664,11 +657,6 @@ "substitutions": " .net;[.]net" } }, - "URLVir": { - "description": "URLVir Parser is the bot responsible to parse the Export Hosts and Export IP Addresses reports and sanitize the information.", - "module": "intelmq.bots.parsers.urlvir.parser", - "parameters": {} - }, "VXVault": { "description": "VXVault Parser is the bot responsible to parse the report and sanitize the information.", "module": "intelmq.bots.parsers.vxvault.parser", @@ -693,6 +681,14 @@ "database": "/opt/intelmq/var/lib/bots/asn_lookup/ipasn.dat" } }, + "CSV Converter": { + "description": "Converts data to CSV.", + "module": "intelmq.bots.experts.csv_converter.expert", + "parameters": { + "delimiter": ",", + "fieldnames": "time.source,classification.type,source.ip" + } + }, "Abusix": { "description": "Abusix is the bot responsible to get the correspondent abuse contact from source IP and destination IP of the events.", "module": "intelmq.bots.experts.abusix.expert", @@ -850,6 +846,7 @@ "parameters": { "case_sensitive": true, "configuration_path": "/opt/intelmq/var/lib/bots/modify/modify.conf", + "maximum_matches": null, "overwrite": false } }, @@ -947,11 +944,19 @@ "parameters": { "overwrite": false } + }, + "MISP": { + "description": "An expert to for looking up values in MISP.", + "module": "intelmq.bots.experts.misp.expert", + "parameters": { + "misp_key": "", + "misp_url": "" + } } }, "Output": { "AMQP Topic": { - "description": "AMQP Topic is the bot responsible to send events to a AMQP topic exchange.", + "description": "AMQP Topic is the bot responsible to send events to a AMQP topic exchange. Requires the pika python library.", "module": "intelmq.bots.outputs.amqptopic.output", "parameters": { "connection_attempts": 3, @@ -977,7 +982,7 @@ } }, "Blackhole": { - "description": "AMQP Topic is the bot responsible to send events to a AMQP topic exchange.", + "description": "Discards messages.", "module": "intelmq.bots.outputs.blackhole.output", "parameters": {} }, @@ -985,7 +990,6 @@ "description": "Elasticsearch is the bot responsible to send events to a elasticsearch.", "module": "intelmq.bots.outputs.elasticsearch.output", "parameters": { - "elastic_doctype": "events", "elastic_host": "127.0.0.1", "elastic_index": "intelmq", "elastic_port": 9200, @@ -1008,7 +1012,10 @@ "file": "/opt/intelmq/var/lib/bots/file-output/events.txt", "format_filename": false, "hierarchical_output": false, - "single_key": null + "keep_raw_field": false, + "message_jsondict_as_string": false, + "message_with_type": false, + "single_key": false } }, "Files": { @@ -1017,7 +1024,10 @@ "parameters": { "dir": "/opt/intelmq/var/lib/bots/files-output/incoming", "hierarchical_output": false, - "single_key": null, + "keep_raw_field": false, + "message_jsondict_as_string": false, + "message_with_type": false, + "single_key": false, "suffix": ".json", "tmp": "/opt/intelmq/var/lib/bots/files-output/tmp" } @@ -1049,6 +1059,32 @@ "field": "source.ip" } }, + "MISP Feed": { + "description": "Generate an output in MISP Feed format.", + "module": "intelmq.bots.outputs.misp.output_feed", + "parameters": { + "misp_org_name": "", + "misp_org_uuid": "", + "output_dir": "/opt/intelmq/var/lib/bots/mispfeed-output", + "interval_event": "1 hour" + } + }, + "MISP API": { + "description": "Insert events into a MISP instance.", + "module": "intelmq.bots.outputs.misp.output_api", + "parameters": { + "add_feed_provider_as_tag": true, + "misp_additional_correlation_fields": [], + "misp_additional_tags": [], + "add_feed_name_as_tag": true, + "misp_key": "", + "misp_publish": false, + "misp_tag_for_bot": "", + "misp_to_ids_fields": [], + "misp_url": "", + "significant_fields": "" + } + }, "MongoDB": { "description": "MongoDB is the bot responsible to send events to a MongoDB database.", "module": "intelmq.bots.outputs.mongodb.output", @@ -1129,6 +1165,7 @@ "parameters": { "exchange": "/exchange/_push", "heartbeat": 60000, + "keep_raw_field": false, "message_hierarchical_output": false, "message_jsondict_as_string": false, "message_with_type": false, diff --git a/intelmq/bots/collectors/alienvault_otx/collector.py b/intelmq/bots/collectors/alienvault_otx/collector.py index 44e6e29eb..07793d697 100644 --- a/intelmq/bots/collectors/alienvault_otx/collector.py +++ b/intelmq/bots/collectors/alienvault_otx/collector.py @@ -3,6 +3,7 @@ import json from intelmq.lib.bot import CollectorBot +from intelmq.lib.exceptions import MissingDependencyError try: from OTXv2 import OTXv2 @@ -14,7 +15,7 @@ class AlienVaultOTXCollectorBot(CollectorBot): def init(self): if OTXv2 is None: - raise ValueError('Could not import OTXv2. Please install it.') + raise MissingDependencyError("OTXv2") self.modified_pulses_only = False if hasattr(self.parameters, 'modified_pulses_only'): diff --git a/intelmq/bots/collectors/amqp/REQUIREMENTS.txt b/intelmq/bots/collectors/amqp/REQUIREMENTS.txt new file mode 100644 index 000000000..df7f4230a --- /dev/null +++ b/intelmq/bots/collectors/amqp/REQUIREMENTS.txt @@ -0,0 +1 @@ +pika diff --git a/intelmq/bots/collectors/amqp/collector_amqp.py b/intelmq/bots/collectors/amqp/collector_amqp.py index b752c11a1..825edb49f 100644 --- a/intelmq/bots/collectors/amqp/collector_amqp.py +++ b/intelmq/bots/collectors/amqp/collector_amqp.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Collecting from a (remote) AMQP Server and fetching either intelmq or any other messages. @@ -8,6 +7,7 @@ from intelmq.bots.outputs.amqptopic.output import AMQPTopicOutputBot from intelmq.lib.bot import CollectorBot from intelmq.lib.message import MessageFactory +from intelmq.lib.exceptions import MissingDependencyError try: import pika @@ -23,17 +23,18 @@ class AMQPCollectorBot(AMQPTopicOutputBot, CollectorBot): def init(self): if pika is None: - raise ValueError("Could not import library 'pika'. Please install it.") + raise MissingDependencyError("pika", version=">=1.0") self.connection = None self.channel = None pika_version = tuple(int(x) for x in pika.__version__.split('.')) + if pika_version < (1, ): + raise MissingDependencyError("pika", version=">=1.0", + installed=pika.__version__) + self.kwargs = {} - if pika_version < (0, 11): - self.kwargs['heartbeat_interval'] = self.parameters.connection_heartbeat - else: - self.kwargs['heartbeat'] = self.parameters.connection_heartbeat + self.kwargs['heartbeat'] = self.parameters.connection_heartbeat self.connection_host = self.parameters.connection_host self.connection_port = self.parameters.connection_port @@ -58,8 +59,6 @@ def init(self): self.connect_server() - # TODO: message or report - def process(self): ''' Stop the Bot if cannot connect to AMQP Server after the defined connection attempts ''' diff --git a/intelmq/bots/collectors/api/collector_api.py b/intelmq/bots/collectors/api/collector_api.py index 9e6219bc7..3f6b68d13 100644 --- a/intelmq/bots/collectors/api/collector_api.py +++ b/intelmq/bots/collectors/api/collector_api.py @@ -5,6 +5,7 @@ from threading import Thread from intelmq.lib.bot import CollectorBot +from intelmq.lib.exceptions import MissingDependencyError try: import tornado.web @@ -29,7 +30,7 @@ class APICollectorBot(CollectorBot): def init(self): if IOLoop is None: - raise ValueError("Could not import 'tornado'. Please install it.") + raise MissingDependencyError("tornado") app = Application(self.request_handler, [ ("/intelmq/push", MainHandler), diff --git a/intelmq/bots/collectors/blueliv/collector_crimeserver.py b/intelmq/bots/collectors/blueliv/collector_crimeserver.py index c396f96b9..568a03d9f 100644 --- a/intelmq/bots/collectors/blueliv/collector_crimeserver.py +++ b/intelmq/bots/collectors/blueliv/collector_crimeserver.py @@ -3,6 +3,7 @@ import logging from intelmq.lib.bot import CollectorBot +from intelmq.lib.exceptions import MissingDependencyError try: from sdk.blueliv_api import BluelivAPI @@ -13,7 +14,7 @@ class BluelivCrimeserverCollectorBot(CollectorBot): def init(self): if BluelivAPI is None: - raise ValueError('Could not import sdk.blueliv_api.BluelivAPI. Please install it.') + raise MissingDependencyError("sdk.blueliv_api.BluelivAPI") if not hasattr(self.parameters, 'api_url'): setattr(self.parameters, 'api_url', 'https://freeapi.blueliv.com') diff --git a/intelmq/bots/collectors/calidog/collector_certstream.py b/intelmq/bots/collectors/calidog/collector_certstream.py index 294cbe589..caceef8d3 100644 --- a/intelmq/bots/collectors/calidog/collector_certstream.py +++ b/intelmq/bots/collectors/calidog/collector_certstream.py @@ -10,6 +10,7 @@ import json from intelmq.lib.bot import CollectorBot +from intelmq.lib.exceptions import MissingDependencyError try: from certstream.core import CertStreamClient @@ -20,7 +21,7 @@ class CertstreamCollectorBot(CollectorBot): def init(self): if CertStreamClient is None: - raise ValueError("Could not import library 'certstream'. Please install it.") + raise MissingDependencyError("certstream") def callback(self, message, context=None): # callback handler for certstream events. CertstreamCollectorBot.send_update(message=message, self=self) diff --git a/intelmq/bots/collectors/github_api/REQUIREMENTS.txt b/intelmq/bots/collectors/github_api/REQUIREMENTS.txt new file mode 100644 index 000000000..663bd1f6a --- /dev/null +++ b/intelmq/bots/collectors/github_api/REQUIREMENTS.txt @@ -0,0 +1 @@ +requests \ No newline at end of file diff --git a/intelmq/bots/parsers/bitcash/__init__.py b/intelmq/bots/collectors/github_api/__init__.py similarity index 100% rename from intelmq/bots/parsers/bitcash/__init__.py rename to intelmq/bots/collectors/github_api/__init__.py diff --git a/intelmq/bots/collectors/github_api/collector_github_api.py b/intelmq/bots/collectors/github_api/collector_github_api.py new file mode 100644 index 000000000..64d46f6c8 --- /dev/null +++ b/intelmq/bots/collectors/github_api/collector_github_api.py @@ -0,0 +1,59 @@ +# -*- coding: utf-8 -*- +""" +GITHUB API Collector bot +""" +import base64 + +from intelmq.lib.bot import CollectorBot + +try: + import requests +except ImportError: + requests = None + +static_params = { + 'headers': { + 'Accept': 'application/vnd.github.v3.text-match+json' + } +} + + +class GithubAPICollectorBot(CollectorBot): + + def init(self): + if requests is None: + raise ValueError('Could not import requests. Please install it.') + + self.__user_headers = static_params['headers'] + if hasattr(self.parameters, 'basic_auth_username') and hasattr(self.parameters, 'basic_auth_password'): + self.__user_headers.update(self.__produce_auth_header(getattr(self.parameters, 'basic_auth_username'), + getattr(self.parameters, 'basic_auth_password'))) + else: + self.logger.warning('Using unauthenticated API access, means the request limit is at 60 per hour.') + + def process(self): + self.process_request() + + def process_request(self): + """ + Requests github API with specific path and functionality + """ + raise NotImplementedError + + def github_api(self, api_path: str, **kwargs) -> dict: + try: + response = requests.get("{}".format(api_path), params=kwargs, headers=self.__user_headers) + if response.status_code == 401: + # bad credentials + raise ValueError(response.json()['message']) + else: + return response.json() + except requests.RequestException as e: + raise ValueError("Unknown repository {!r}.".format(api_path)) + + @staticmethod + def __produce_auth_header(username: str, password: str) -> dict: + encoded_auth_bytes = base64.b64encode(bytes('{}:{}'.format(username, password), encoding='utf-8')) + return { + 'Authorization': 'Basic {}'.format(encoded_auth_bytes.decode('utf-8')) + } diff --git a/intelmq/bots/collectors/github_api/collector_github_contents_api.py b/intelmq/bots/collectors/github_api/collector_github_contents_api.py new file mode 100644 index 000000000..ddf99e765 --- /dev/null +++ b/intelmq/bots/collectors/github_api/collector_github_contents_api.py @@ -0,0 +1,83 @@ +# -*- coding: utf-8 -*- +""" +GITHUB contents API Collector bot + +PARAMETERS: + 'basic_auth_username': github Basic authentication username (REQUIRED) + 'basic_auth_password': github Basic authentication password (REQUIRED) + 'repository': only one format ('/') is acceptable (REQUIRED) + 'extra_fields': comma-separated list of fields to extract from file (DEFAULT = []) + 'regex': file regex (DEFAULT = '*.json') +""" +import re + +from intelmq.lib.exceptions import InvalidArgument +from intelmq.bots.collectors.github_api.collector_github_api import GithubAPICollectorBot + +try: + import requests +except ImportError: + requests = None + + +class GithubContentsAPICollectorBot(GithubAPICollectorBot): + + def init(self): + super().init() + if hasattr(self.parameters, 'repository'): + self.__base_api_url = 'https://api.github.com/repos/{}/contents'.format( + getattr(self.parameters, 'repository')) + if hasattr(self.parameters, 'regex'): + try: + re.compile(getattr(self.parameters, 'regex')) + except Exception as e: + raise InvalidArgument('regex', expected='string', got=getattr(self.parameters, 'regex')) + else: + raise InvalidArgument('regex', expected='string', got=None) + if not hasattr(self.parameters, 'repository'): + raise InvalidArgument('repository', expected='string') + if hasattr(self.parameters, 'extra_fields'): + try: + self.__extra_fields = [x.strip() for x in getattr(self.parameters, 'extra_fields').split(',')] + except Exception: + raise InvalidArgument('extra_fields', expected='comma-separated list') + else: + self.__extra_fields = [] + + def process_request(self): + try: + for item in self.__recurse_repository_files(self.__base_api_url): + report = self.new_report() + report['raw'] = item['content'] + report['feed.url'] = item['download_url'] + if item['extra'] != {}: + report.add('extra.file_metadata', item['extra']) + self.send_message(report) + except requests.RequestException as e: + raise ConnectionError(e) + + def __recurse_repository_files(self, base_api_url: str, extracted_github_files: list = None) -> list: + if extracted_github_files is None: + extracted_github_files = [] + data = self.github_api(base_api_url) + for github_file in data: + if github_file['type'] == 'dir': + extracted_github_files = self.__recurse_repository_files(github_file['url'], extracted_github_files) + elif github_file['type'] == 'file' and bool(re.search(getattr(self.parameters, 'regex', '.*.json'), + github_file['name'])): + extracted_github_file_data = { + 'download_url': github_file['download_url'], + 'content': requests.get(github_file['download_url']).content, + 'extra': {} + } + for field_name in self.__extra_fields: + if field_name in github_file: + extracted_github_file_data['extra'][field_name] = github_file[field_name] + else: + self.logger.warning("Field '{}' does not exist in the Github file data.".format(field_name)) + extracted_github_files.append(extracted_github_file_data) + + return extracted_github_files + + +BOT = GithubContentsAPICollectorBot diff --git a/intelmq/bots/collectors/http/REQUIREMENTS.txt b/intelmq/bots/collectors/http/REQUIREMENTS.txt deleted file mode 100644 index 81b39e402..000000000 --- a/intelmq/bots/collectors/http/REQUIREMENTS.txt +++ /dev/null @@ -1 +0,0 @@ -requests>=2.2.0 diff --git a/intelmq/bots/collectors/http/collector_http.py b/intelmq/bots/collectors/http/collector_http.py index f62faa18c..fd427b73e 100644 --- a/intelmq/bots/collectors/http/collector_http.py +++ b/intelmq/bots/collectors/http/collector_http.py @@ -21,6 +21,7 @@ from intelmq.lib.bot import CollectorBot from intelmq.lib.utils import unzip, create_request_session_from_bot +from intelmq.lib.exceptions import MissingDependencyError try: import requests @@ -43,7 +44,7 @@ class HTTPCollectorBot(CollectorBot): def init(self): if requests is None: - raise ValueError('Could not import requests. Please install it.') + raise MissingDependencyError("requests") self.set_request_parameters() diff --git a/intelmq/bots/collectors/http/collector_http_stream.py b/intelmq/bots/collectors/http/collector_http_stream.py index 880c792e2..aac9d305e 100644 --- a/intelmq/bots/collectors/http/collector_http_stream.py +++ b/intelmq/bots/collectors/http/collector_http_stream.py @@ -21,6 +21,7 @@ from intelmq.lib.bot import CollectorBot from intelmq.lib.utils import decode, create_request_session_from_bot +from intelmq.lib.exceptions import MissingDependencyError class HTTPStreamCollectorBot(CollectorBot): @@ -29,7 +30,7 @@ class HTTPStreamCollectorBot(CollectorBot): def init(self): if requests is None: - raise ValueError('Could not import requests. Please install it.') + raise MissingDependencyError("requests") self.set_request_parameters() self.session = create_request_session_from_bot(self) diff --git a/intelmq/bots/collectors/mail/REQUIREMENTS.txt b/intelmq/bots/collectors/mail/REQUIREMENTS.txt index b1ce83736..578eb32f3 100644 --- a/intelmq/bots/collectors/mail/REQUIREMENTS.txt +++ b/intelmq/bots/collectors/mail/REQUIREMENTS.txt @@ -1,2 +1 @@ imbox>=0.8.5 -requests>=2.2.0 diff --git a/intelmq/bots/collectors/mail/collector_mail_attach.py b/intelmq/bots/collectors/mail/collector_mail_attach.py index e711859f7..2472faacc 100644 --- a/intelmq/bots/collectors/mail/collector_mail_attach.py +++ b/intelmq/bots/collectors/mail/collector_mail_attach.py @@ -27,7 +27,12 @@ def process_message(self, uid, message): if not attach: continue - attach_filename = attach['filename'] + try: + attach_filename = attach['filename'] + except KeyError: + # https://github.com/certtools/intelmq/issues/1538 + self.logger.debug('Skipping attachment because of missing filename.') + continue if attach_filename.startswith('"'): # for imbox versions older than 0.9.5, see also above attach_filename = attach_filename[1:-1] diff --git a/intelmq/bots/collectors/mail/collector_mail_url.py b/intelmq/bots/collectors/mail/collector_mail_url.py index 836a58e3e..cd770cf88 100644 --- a/intelmq/bots/collectors/mail/collector_mail_url.py +++ b/intelmq/bots/collectors/mail/collector_mail_url.py @@ -9,6 +9,7 @@ from intelmq.lib.utils import create_request_session_from_bot, file_name_from_response from .lib import MailCollectorBot +from intelmq.lib.exceptions import MissingDependencyError try: import requests @@ -21,7 +22,7 @@ class MailURLCollectorBot(MailCollectorBot): def init(self): super().init() if requests is None: - raise ValueError('Could not import requests. Please install it.') + raise MissingDependencyError("requests") # Build request self.set_request_parameters() diff --git a/intelmq/bots/collectors/mail/lib.py b/intelmq/bots/collectors/mail/lib.py index 6fc87bdc5..568652854 100644 --- a/intelmq/bots/collectors/mail/lib.py +++ b/intelmq/bots/collectors/mail/lib.py @@ -4,6 +4,7 @@ import ssl from intelmq.lib.bot import CollectorBot +from intelmq.lib.exceptions import MissingDependencyError try: import imbox @@ -15,10 +16,10 @@ class MailCollectorBot(CollectorBot): def init(self): if imbox is None: - raise ValueError('Could not import imbox. Please install it.') + raise MissingDependencyError("imbox") if getattr(self.parameters, 'attach_unzip', None) and not self.extract_files: - self.parameters.extract_files = True + self.extract_files = True self.logger.warning("The parameter 'attach_unzip' is deprecated and will " "be removed in version 4.0. Use 'extract_files' instead.") @@ -30,6 +31,8 @@ def connect_mailbox(self): self.parameters.mail_user, self.parameters.mail_password, self.parameters.mail_ssl, + # imbox itself uses ports 143/993 as default depending on SSL setting + port=getattr(self.parameters, 'mail_port', None), ssl_context=ssl_custom_context) return mailbox diff --git a/intelmq/bots/collectors/microsoft/REQUIREMENTS.txt b/intelmq/bots/collectors/microsoft/REQUIREMENTS.txt index 7e1376352..ad4a20cfe 100644 --- a/intelmq/bots/collectors/microsoft/REQUIREMENTS.txt +++ b/intelmq/bots/collectors/microsoft/REQUIREMENTS.txt @@ -1 +1 @@ -azure-storage>=0.33 +azure-storage-blob>=12.0.0 diff --git a/intelmq/bots/collectors/microsoft/collector_azure.py b/intelmq/bots/collectors/microsoft/collector_azure.py index 94df9277b..dc85fc706 100644 --- a/intelmq/bots/collectors/microsoft/collector_azure.py +++ b/intelmq/bots/collectors/microsoft/collector_azure.py @@ -1,64 +1,61 @@ # -*- coding: utf-8 -*- """ -Uses the azure.storage module from https://pypi.org/project/azure-storage/0.33.0/ -Tested with 0.33, probably works with >0.30 too. +Uses the azure.storage.blob module. Tested with version 12.13.1 """ -import datetime import gzip import io -import urllib.parse - -import pytz from intelmq.lib.bot import CollectorBot +from intelmq.lib.exceptions import MissingDependencyError +from intelmq.lib.cache import Cache try: - import azure.storage + from azure.storage.blob import ContainerClient +except ImportError: + ContainerClient = None # noqa +try: + from azure.storage.blob._shared.base_client import create_configuration except ImportError: - azure = None # noqa + create_configuration = None # noqa class MicrosoftAzureCollectorBot(CollectorBot): def init(self): - if azure is None: - raise ValueError('Could not import azure.storage. Please install it.') + if ContainerClient is None or create_configuration is None: + raise MissingDependencyError("azure.storage") + self.config = create_configuration(storage_sdk='blob') if hasattr(self.parameters, 'https_proxy'): - parsed = urllib.parse.urlparse(self.parameters.https_proxy) - self.proxy = {'host': parsed.hostname, 'port': parsed.port, - 'user': parsed.username, 'password': parsed.password} - else: - self.proxy = None + # Create a storage configuration object and update the proxy policy + self.config.proxy_policy.proxies = { + 'http': self.parameters.http_proxy, + 'https': self.parameters.https_proxy, + } + + self.cache = Cache(self.parameters.redis_cache_host, + self.parameters.redis_cache_port, + self.parameters.redis_cache_db, + getattr(self.parameters, 'redis_cache_ttl', 864000), # 10 days + getattr(self.parameters, "redis_cache_password", + None) + ) def process(self): - storage_client = azure.storage.CloudStorageAccount(self.parameters.account_name, - self.parameters.account_key) - blob_service = storage_client.create_block_blob_service() - if self.proxy: - blob_service.set_proxy(**self.proxy) - containers = blob_service.list_containers() - for container in containers: - self.logger.info('Processing Container %r.', container.name) - if container.name == 'heartbeat': - if self.parameters.delete: - blob_service.delete_container(container.name) + container_client = ContainerClient.from_connection_string(conn_str=self.parameters.connection_string, + container_name=self.parameters.container_name, + _configuration=self.config) + for blob in container_client.list_blobs(): + if self.cache.get(blob.name): + self.logger.debug('Processed file %r already.', blob.name) continue - time_container_fetch = datetime.datetime.now(pytz.timezone('UTC')) - for blob in blob_service.list_blobs(container.name): - self.logger.debug('Processing blob %r.', blob.name) - time_blob_fetch = datetime.datetime.now(pytz.timezone('UTC')) - blob_obj = io.BytesIO(blob_service.get_blob_to_bytes(container.name, - blob.name).content) - unzipped = gzip.GzipFile(fileobj=blob_obj).read().decode() - report = self.new_report() - report.add('raw', unzipped) - self.send_message(report) - if self.parameters.delete: - blob_service.delete_blob(container.name, blob.name, - if_unmodified_since=time_blob_fetch) - if self.parameters.delete: - blob_service.delete_container(container.name, - if_unmodified_since=time_container_fetch) + self.logger.debug('Processing blob %r.', blob.name) + blob_obj = io.BytesIO() + container_client.download_blob(blob).readinto(blob_obj) + blob_obj.seek(0) + report = self.new_report() + report.add('raw', gzip.GzipFile(fileobj=blob_obj).read().decode()) + self.send_message(report) + self.cache.set(blob.name, 1) # Redis-py >= 3.0.0 does not allow True BOT = MicrosoftAzureCollectorBot diff --git a/intelmq/bots/collectors/microsoft/collector_interflow.py b/intelmq/bots/collectors/microsoft/collector_interflow.py index b98d68bed..505794c72 100644 --- a/intelmq/bots/collectors/microsoft/collector_interflow.py +++ b/intelmq/bots/collectors/microsoft/collector_interflow.py @@ -38,6 +38,7 @@ from intelmq.lib.bot import CollectorBot from intelmq.lib.cache import Cache from intelmq.lib.utils import parse_relative, create_request_session_from_bot +from intelmq.lib.exceptions import MissingDependencyError try: import requests @@ -65,7 +66,7 @@ def check_ttl_time(self): def init(self): if requests is None: - raise ValueError('Could not import requests. Please install it.') + raise MissingDependencyError("requests") self.set_request_parameters() @@ -139,7 +140,16 @@ def process(self): report.add('feed.url', download_url) report.add('raw', raw) self.send_message(report) - self.cache.set(file['Name'], True) + # redis-py >= 3.0.0 does no longer support boolean values, cast to string explicitly, also for backwards compatibility + self.cache.set(file['Name'], "True") + + def print_filelist(self): + """ Can be called from the debugger for example. """ + self.logger.debug('Downloading file list.') + files = self.session.get(URL_LIST) + files.raise_for_status() + self.logger.debug('Downloaded file list, %s entries.', len(files.json())) + print(files.text) BOT = MicrosoftInterflowCollectorBot diff --git a/intelmq/bots/collectors/misp/REQUIREMENTS.txt b/intelmq/bots/collectors/misp/REQUIREMENTS.txt index 24be57ef3..8cd2e4fb5 100644 --- a/intelmq/bots/collectors/misp/REQUIREMENTS.txt +++ b/intelmq/bots/collectors/misp/REQUIREMENTS.txt @@ -1 +1,2 @@ -pymisp>=2.4.36 +pymisp>=2.4.36,<=2.4.119.1; python_version < '3.6' +pymisp>=2.4.36; python_version >= '3.6' diff --git a/intelmq/bots/collectors/misp/collector.py b/intelmq/bots/collectors/misp/collector.py index 70aeab042..ab6043a1a 100644 --- a/intelmq/bots/collectors/misp/collector.py +++ b/intelmq/bots/collectors/misp/collector.py @@ -1,42 +1,63 @@ # -*- coding: utf-8 -*- -""" -A collector for grabbing appropriately tagged events from MISP. +"""A collector for grabbing appropriately tagged events from MISP. Parameters: - misp_url: URL of the MISP server - misp_key: API key for accessing MISP - - misp_verify: true or false, check the validity of the certificate - misp_tag_to_process: MISP tag identifying events to be processed - misp_tag_processed: MISP tag identifying events that have been processed + +PyMISP versions released after January 2020 will no longer support the +"old" PyMISP class. +For compatibility: + * older versions of pymisp still work with this bot + * the deprecated parameter `misp_verify` will create a DeprecationWarning """ import json -from urllib.parse import urljoin +import warnings +import sys from intelmq.lib.bot import CollectorBot +from intelmq.lib.exceptions import MissingDependencyError try: - from pymisp import PyMISP + if sys.version_info >= (3, 6): + try: + from pymisp import ExpandedPyMISP as PyMISP + except ImportError: + from pymisp import PyMISP + else: + from pymisp import PyMISP + except ImportError: PyMISP = None + import_fail_reason = 'import' +except SyntaxError: + PyMISP = None + import_fail_reason = 'syntax' class MISPCollectorBot(CollectorBot): def init(self): - if PyMISP is None: - raise ValueError('Could not import pymisp. Please install it.') + if PyMISP is None and import_fail_reason == 'syntax': + raise MissingDependencyError("pymisp", + version='>=2.4.36,<=2.4.119.1', + additional_text="Python versions below 3.6 are " + "only supported by pymisp <= 2.4.119.1.") + elif PyMISP is None: + raise MissingDependencyError("pymisp") + + if hasattr(self.parameters, 'misp_verify'): + self.parameters.http_verify_cert = self.parameters.misp_verify + warnings.warn("The parameter 'misp_verify' is deprecated in favor of" + "'http_verify_cert'.", DeprecationWarning) # Initialize MISP connection self.misp = PyMISP(self.parameters.misp_url, self.parameters.misp_key, - self.parameters.misp_verify) - - # URLs used for deleting and adding MISP event tags - self.misp_add_tag_url = urljoin(self.parameters.misp_url, - 'events/addTag') - self.misp_del_tag_url = urljoin(self.parameters.misp_url, - 'events/removeTag') + self.parameters.http_verify_cert) def process(self): # Grab the events from MISP @@ -45,30 +66,32 @@ def process(self): ) # Process the response and events - if 'response' in misp_result: - # Extract the MISP event details - for e in misp_result['response']: - misp_event = e['Event'] + # Compatibility with old pymisp versions before 2019: + if 'response' in misp_result: + misp_result = misp_result['response'] - # Send the results to the parser - report = self.new_report() - report.add('raw', json.dumps(misp_event, sort_keys=True)) - report.add('feed.url', self.parameters.misp_url) - self.send_message(report) + # Extract the MISP event details + for e in misp_result: + misp_event = e['Event'] - # Finally, update the tags on the MISP events. - # Note PyMISP does not currently support this so we use - # the API URLs directly with the requests module. + # Send the results to the parser + report = self.new_report() + report.add('raw', json.dumps(misp_event, sort_keys=True)) + report.add('feed.url', self.parameters.misp_url) + self.send_message(report) - for misp_event in misp_result['response']: - # Remove the 'to be processed' tag - self.misp.remove_tag(misp_event, - self.parameters.misp_tag_to_process) + # Finally, update the tags on the MISP events. + for misp_event in misp_result: + if hasattr(self.parameters, 'misp_tag_processed'): # Add a 'processed' tag to the event - self.misp.add_tag(misp_event, - self.parameters.misp_tag_processed) + self.misp.tag(misp_event['uuid'], + self.parameters.misp_tag_processed) + + # Remove the 'to be processed' tag + self.misp.untag(misp_event['uuid'], + self.parameters.misp_tag_to_process) BOT = MISPCollectorBot diff --git a/intelmq/bots/collectors/opendxl/collector.py b/intelmq/bots/collectors/opendxl/collector.py index a09492a9b..6a1248142 100644 --- a/intelmq/bots/collectors/opendxl/collector.py +++ b/intelmq/bots/collectors/opendxl/collector.py @@ -11,6 +11,7 @@ import time from intelmq.lib.bot import CollectorBot +from intelmq.lib.exceptions import MissingDependencyError try: from dxlclient.callbacks import EventCallback @@ -24,7 +25,7 @@ class openDXLCollectorBot(CollectorBot): def init(self): if DxlClient is None: - raise ValueError('Could not import dxlclient. Please install it.') + raise MissingDependencyError("dxlclient") self.dxlclient = None def process(self): diff --git a/intelmq/bots/collectors/rsync/collector_rsync.py b/intelmq/bots/collectors/rsync/collector_rsync.py index 789c26777..06ffa829e 100644 --- a/intelmq/bots/collectors/rsync/collector_rsync.py +++ b/intelmq/bots/collectors/rsync/collector_rsync.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- from os import mkdir, path -from subprocess import call +from subprocess import run, PIPE from intelmq import VAR_STATE_PATH from intelmq.lib.bot import CollectorBot @@ -17,8 +17,14 @@ def init(self): def process(self): self.logger.info("Updating file {}.".format(self.parameters.file)) - if call(["rsync", path.join(self.parameters.rsync_path, self.parameters.file), self.rsync_data_directory]) != 0: - raise ValueError("Rsync file {} failed.".format(self.parameters.file)) + process = run(["rsync", path.join(self.parameters.rsync_path, self.parameters.file), + self.rsync_data_directory], + stderr=PIPE) + if process.returncode != 0: + raise ValueError("Rsync on file {!r} failed with exitcode {} and stderr {!r}." + "".format(self.parameters.file, + process.returncode, + process.stderr)) report = self.new_report() with open(path.join(self.rsync_data_directory, self.parameters.file), "r") as rsync_file: report.add("raw", rsync_file.read()) diff --git a/intelmq/bots/collectors/rt/REQUIREMENTS.txt b/intelmq/bots/collectors/rt/REQUIREMENTS.txt index 0011a9577..d3c3c39cd 100644 --- a/intelmq/bots/collectors/rt/REQUIREMENTS.txt +++ b/intelmq/bots/collectors/rt/REQUIREMENTS.txt @@ -1,2 +1 @@ -requests>=2.2.0 rt>=1.0.9 diff --git a/intelmq/bots/collectors/rt/collector_rt.py b/intelmq/bots/collectors/rt/collector_rt.py index 396b7b37e..51193e9d8 100644 --- a/intelmq/bots/collectors/rt/collector_rt.py +++ b/intelmq/bots/collectors/rt/collector_rt.py @@ -7,6 +7,7 @@ from intelmq.lib.bot import CollectorBot from intelmq.lib.utils import (parse_relative, create_request_session_from_bot, file_name_from_response, unzip) +from intelmq.lib.exceptions import MissingDependencyError try: import rt @@ -29,9 +30,9 @@ class RTCollectorBot(CollectorBot): def init(self): if requests is None: - raise ValueError('Could not import requests. Please install it.') + raise MissingDependencyError("requests") if rt is None: - raise ValueError('Could not import rt. Please install it.') + raise MissingDependencyError("rt") if getattr(self.parameters, 'search_not_older_than', None): try: diff --git a/intelmq/bots/collectors/stomp/collector.py b/intelmq/bots/collectors/stomp/collector.py index 64e6812f2..a462da2c6 100644 --- a/intelmq/bots/collectors/stomp/collector.py +++ b/intelmq/bots/collectors/stomp/collector.py @@ -2,13 +2,14 @@ import os.path from intelmq.lib.bot import CollectorBot +from intelmq.lib.exceptions import MissingDependencyError try: import stomp except ImportError: stomp = None else: - class StompListener(stomp.listener.PrintingListener): + class StompListener(stomp.PrintingListener): """ the stomp listener gets called asynchronously for every STOMP message @@ -17,6 +18,10 @@ def __init__(self, n6stompcollector, conn, destination): self.stompbot = n6stompcollector self.conn = conn self.destination = destination + super().__init__() + if stomp.__version__ >= (5, 0, 0): + # set the function directly, as the argument print_to_log logs to the generic logger + self._PrintingListener__print = n6stompcollector.logger.debug def on_heartbeat_timeout(self): self.stompbot.logger.info("Heartbeat timeout. Attempting to re-connect.") @@ -40,24 +45,27 @@ def on_disconnected(self): connect_and_subscribe(self.conn, self.stompbot.logger, self.destination) -def connect_and_subscribe(conn, logger, destination): - conn.start() - connect_status = conn.connect(wait=True) - subscribe_status = conn.subscribe(destination=destination, - id=1, ack='auto') - logger.info('Successfully connected and subscribed. ' - 'Connect status: %r, subscribe status: %r.', - connect_status, subscribe_status) +def connect_and_subscribe(conn, logger, destination, start=False): + if start: + conn.start() + conn.connect(wait=True) + conn.subscribe(destination=destination, + id=1, ack='auto') + logger.info('Successfully connected and subscribed.') class StompCollectorBot(CollectorBot): """ main class for the STOMP protocol collector """ collector_empty_process = True + conn = False # define here so shutdown method can check for it def init(self): if stomp is None: - raise ValueError('Could not import stomp. Please install it.') + raise MissingDependencyError("stomp") + elif stomp.__version__ < (4, 1, 8): + raise MissingDependencyError("stomp", version="4.1.8", + installed=stomp.__version__) self.server = getattr(self.parameters, 'server', 'n6stream.cert.pl') self.port = getattr(self.parameters, 'port', 61614) @@ -83,15 +91,16 @@ def init(self): ssl_key_file=self.ssl_cl_cert_key, ssl_cert_file=self.ssl_cl_cert, ssl_ca_certs=self.ssl_ca_cert, - wait_on_receipt=True, heartbeats=(self.heartbeat, self.heartbeat)) self.conn.set_listener('', StompListener(self, self.conn, self.exchange)) - self.conn.start() - connect_and_subscribe(self.conn, self.logger, self.exchange) + connect_and_subscribe(self.conn, self.logger, self.exchange, + start=stomp.__version__ < (4, 1, 20)) def shutdown(self): + if not stomp or not self.conn: + return try: self.conn.disconnect() except stomp.exception.NotConnectedException: diff --git a/intelmq/bots/collectors/tcp/collector.py b/intelmq/bots/collectors/tcp/collector.py index 95df5ed7a..76c6353a2 100644 --- a/intelmq/bots/collectors/tcp/collector.py +++ b/intelmq/bots/collectors/tcp/collector.py @@ -48,7 +48,7 @@ def process(self): if msg: # if the partner connection ended, our message are already sent conn.sendall(b"Ok") pass - except socket.error as e: + except socket.error: self.logger.exception("Socket error.") finally: if conn: @@ -60,10 +60,7 @@ def connect(self): struct.pack('ii', 1, 0)) # immediately unbind port after closing so that we can restart self.con.bind(self.address) self.con.settimeout(15) - if sys.version_info[1] > 4: # remove when we're having Python 3.5+, let here `self.con.listen()` - self.con.listen() - else: - self.con.listen(1) + self.con.listen() self.logger.info("Connected successfully to %s:%s.", self.address[0], self.address[1]) def shutdown(self): diff --git a/intelmq/bots/collectors/twitter/REQUIREMENTS.txt b/intelmq/bots/collectors/twitter/REQUIREMENTS.txt index ab4acc7ee..0c412e3e3 100644 --- a/intelmq/bots/collectors/twitter/REQUIREMENTS.txt +++ b/intelmq/bots/collectors/twitter/REQUIREMENTS.txt @@ -1,2 +1 @@ python-twitter -requests diff --git a/intelmq/bots/collectors/twitter/collector_twitter.py b/intelmq/bots/collectors/twitter/collector_twitter.py index 591460843..301ea2d96 100644 --- a/intelmq/bots/collectors/twitter/collector_twitter.py +++ b/intelmq/bots/collectors/twitter/collector_twitter.py @@ -36,6 +36,7 @@ from intelmq.lib.bot import CollectorBot from intelmq.lib.utils import create_request_session_from_bot +from intelmq.lib.exceptions import MissingDependencyError try: import requests @@ -52,17 +53,17 @@ class TwitterCollectorBot(CollectorBot): def init(self): if requests is None: - raise ValueError("Could not import 'requests'. Please install it.") + raise MissingDependencyError("requests") if twitter is None: - raise ValueError("Could not import 'twitter'. Please install it.") + raise MissingDependencyError("twitter") self.current_time_in_seconds = int(time.time()) self.target_timelines = [] - if getattr(self.parameters, "target_timelines", '') is not '': + if getattr(self.parameters, "target_timelines", '') != '': self.target_timelines.extend( self.parameters.target_timelines.split(',')) self.tweet_count = int(getattr(self.parameters, "tweet_count", 20)) self.follow_urls = [] - if getattr(self.parameters, "follow_urls", '') is not '': + if getattr(self.parameters, "follow_urls", '') != '': self.follow_urls.extend( self.parameters.follow_urls.split(',')) self.include_rts = getattr(self.parameters, "include_rts", False) diff --git a/intelmq/bots/collectors/xmpp/collector.py b/intelmq/bots/collectors/xmpp/collector.py index c899b6424..20e788e5d 100644 --- a/intelmq/bots/collectors/xmpp/collector.py +++ b/intelmq/bots/collectors/xmpp/collector.py @@ -29,6 +29,7 @@ from intelmq.lib.bot import CollectorBot +from intelmq.lib.exceptions import MissingDependencyError try: import sleekxmpp @@ -83,7 +84,7 @@ class XMPPCollectorBot(CollectorBot): def init(self): if sleekxmpp is None: - raise ValueError('Could not import sleekxmpp. Please install it.') + raise MissingDependencyError("sleekxmpp") # Retrieve Parameters from configuration xmpp_user = getattr(self.parameters, "xmpp_user", None) diff --git a/intelmq/bots/experts/asn_lookup/expert.py b/intelmq/bots/experts/asn_lookup/expert.py index 70a4635cb..ff8f6af93 100644 --- a/intelmq/bots/experts/asn_lookup/expert.py +++ b/intelmq/bots/experts/asn_lookup/expert.py @@ -2,6 +2,7 @@ import os from intelmq.lib.bot import Bot +from intelmq.lib.exceptions import MissingDependencyError try: import pyasn @@ -13,7 +14,7 @@ class ASNLookupExpertBot(Bot): def init(self): if pyasn is None: - raise ValueError('Could not import pyasn. Please install it.') + raise MissingDependencyError("pyasn") try: self.database = pyasn.pyasn(self.parameters.database) diff --git a/intelmq/bots/parsers/urlvir/__init__.py b/intelmq/bots/experts/csv_converter/__init__.py similarity index 100% rename from intelmq/bots/parsers/urlvir/__init__.py rename to intelmq/bots/experts/csv_converter/__init__.py diff --git a/intelmq/bots/experts/csv_converter/expert.py b/intelmq/bots/experts/csv_converter/expert.py new file mode 100644 index 000000000..c3f71c750 --- /dev/null +++ b/intelmq/bots/experts/csv_converter/expert.py @@ -0,0 +1,28 @@ +# -*- coding: utf-8 -*- +import csv +import io +from intelmq.lib.bot import Bot + + +class CSVConverterExpertBot(Bot): + + def init(self): + self.fieldnames = self.parameters.fieldnames.split(',') + self.delimiter = getattr(self.parameters, 'delimiter', ',') + + def process(self): + event = self.receive_message() + event.set_default_value('') + out = io.StringIO() + writer = csv.writer(out, delimiter=self.delimiter) + row = [] + for field in self.fieldnames: + row.append(event[field]) + writer.writerow(row) + event['output'] = out.getvalue().rstrip() + + self.send_message(event) + self.acknowledge_message() + + +BOT = CSVConverterExpertBot diff --git a/intelmq/bots/experts/cymru_whois/lib.py b/intelmq/bots/experts/cymru_whois/lib.py index f89cc4db7..325813b74 100644 --- a/intelmq/bots/experts/cymru_whois/lib.py +++ b/intelmq/bots/experts/cymru_whois/lib.py @@ -152,6 +152,7 @@ def __asn_query_parse(text): Inc.,US" Exception: "1930 | EU | ripencc | | RCCN Rede Ciencia Tecnologia e Sociedade (RCTS),PT" + Unicode: "10417 | BR | lacnic | 2000-02-15 | Funda\195\131\194\167\195\131\194\163o de Desenvolvimento da Pesquisa, BR" """ result = {} @@ -162,6 +163,8 @@ def __asn_query_parse(text): items = Cymru.__query_parse(text) if items[4]: - result['as_name'] = items[4] + # unicode characters need to be decoded explicitly + # with the help of https://stackoverflow.com/questions/60890590/ + result['as_name'] = items[4].encode('latin1').decode('utf8') return result diff --git a/intelmq/bots/experts/filter/expert.py b/intelmq/bots/experts/filter/expert.py index db3172245..d03c823b3 100644 --- a/intelmq/bots/experts/filter/expert.py +++ b/intelmq/bots/experts/filter/expert.py @@ -7,21 +7,26 @@ from dateutil import parser from intelmq.lib.bot import Bot -from intelmq.lib.utils import parse_relative +from intelmq.lib.utils import parse_relative, TIMESPANS class FilterExpertBot(Bot): _message_processed_verb = 'Forwarded' - # decide format of timefilter value and parse it def parse_timeattr(self, time_attr): - try: - absolute = parser.parse(time_attr) - except ValueError: + """ + Parses relative or absoute time specification, decides how to parse by + checking if the string contains any timespan identifier. + + See also https://github.com/certtools/intelmq/issues/1523 + dateutil.parser.parse detects strings like `10 hours` as absolute time. + """ + if any([timespan in time_attr for timespan in TIMESPANS.keys()]): relative = timedelta(minutes=parse_relative(time_attr)) self.logger.info("Filtering out events to (relative time) %r.", relative) return relative else: + absolute = parser.parse(time_attr) self.logger.info("Filtering out events to (absolute time) %r.", absolute) return absolute diff --git a/intelmq/bots/experts/generic_db_lookup/REQUIREMENTS.txt b/intelmq/bots/experts/generic_db_lookup/REQUIREMENTS.txt index 525434acf..0c0e7536f 100644 --- a/intelmq/bots/experts/generic_db_lookup/REQUIREMENTS.txt +++ b/intelmq/bots/experts/generic_db_lookup/REQUIREMENTS.txt @@ -1 +1 @@ -psycopg2-binary>=2.5.5 \ No newline at end of file +psycopg2-binary>=2.5.5 diff --git a/intelmq/bots/experts/idea/expert.py b/intelmq/bots/experts/idea/expert.py index 839fc5a80..19564ab2c 100644 --- a/intelmq/bots/experts/idea/expert.py +++ b/intelmq/bots/experts/idea/expert.py @@ -3,7 +3,7 @@ IDEA classification: https://idea.cesnet.cz/en/classifications """ from base64 import b64decode -from collections import Mapping, Sequence +from collections.abc import Mapping, Sequence from urllib.parse import quote_plus from uuid import uuid4 diff --git a/intelmq/bots/experts/maxmind_geoip/expert.py b/intelmq/bots/experts/maxmind_geoip/expert.py index d25e3c8bb..976bb0eef 100644 --- a/intelmq/bots/experts/maxmind_geoip/expert.py +++ b/intelmq/bots/experts/maxmind_geoip/expert.py @@ -5,6 +5,7 @@ """ from intelmq.lib.bot import Bot +from intelmq.lib.exceptions import MissingDependencyError try: import geoip2.database @@ -16,7 +17,7 @@ class GeoIPExpertBot(Bot): def init(self): if geoip2 is None: - raise ValueError('Could not import geoip2. Please install it.') + raise MissingDependencyError("geoip2") try: self.database = geoip2.database.Reader(self.parameters.database) diff --git a/intelmq/bots/experts/maxmind_geoip/update-geoip-data b/intelmq/bots/experts/maxmind_geoip/update-geoip-data index 79cc65a25..35658ec2a 100755 --- a/intelmq/bots/experts/maxmind_geoip/update-geoip-data +++ b/intelmq/bots/experts/maxmind_geoip/update-geoip-data @@ -12,20 +12,23 @@ PATH=/bin:/usr/bin # This product includes GeoLite2 data created by MaxMind, available from # http://www.maxmind.com. +# requires a license key +# https://blog.maxmind.com/2019/12/18/significant-changes-to-accessing-and-using-geolite2-databases/ set -e trap cleanup EXIT fail() { - echo >&2 "$@" + echo >&2 -e "$@" exit 23 } setup() { - [ "$#" -eq 1 ] || fail "Exactly one argument DESTINATION-FILE must be given." + [ "$#" -eq 2 ] || fail "Exactly two arguments must be given:\n * DESTINATION-FILE (with absolute path, mmdb format)\n * GEOIP-LICENSE (See https://blog.maxmind.com/2019/12/18/significant-changes-to-accessing-and-using-geolite2-databases/ for details.)" DEST_FILE="$1" + GEOIP_LICENSE="$2" TMP_DIR=`mktemp -d` dest_dir=`dirname "$DEST_FILE"` [ -d "$dest_dir" ] || mkdir -p "$dest_dir" @@ -39,9 +42,9 @@ cleanup() fetch_and_install() { cd "$TMP_DIR" - curl -s -S -O https://geolite.maxmind.com/download/geoip/database/GeoLite2-City.mmdb.gz - gunzip -q GeoLite2-City.mmdb - mv -f GeoLite2-City.mmdb "$DEST_FILE" + curl -s -S -o GeoLite2-City.mmdb.tar.gz "https://download.maxmind.com/app/geoip_download?edition_id=GeoLite2-City&suffix=tar.gz&license_key=$GEOIP_LICENSE" + tar -mzxf GeoLite2-City.mmdb.tar.gz + mv -f GeoLite2-City_*/GeoLite2-City.mmdb "$DEST_FILE" } setup "$@" diff --git a/intelmq/bots/experts/mcafee/expert_mar.py b/intelmq/bots/experts/mcafee/expert_mar.py index 8c07377a5..31761d4fb 100644 --- a/intelmq/bots/experts/mcafee/expert_mar.py +++ b/intelmq/bots/experts/mcafee/expert_mar.py @@ -12,12 +12,16 @@ try: from dxlclient.client_config import DxlClientConfig from dxlclient.client import DxlClient - from dxlmarclient import MarClient, ResultConstants except ImportError: DxlClient = None +try: + from dxlmarclient import MarClient, ResultConstants +except ImportError: + MarClient = None # imports for additional libraries and intelmq from intelmq.lib.bot import Bot +from intelmq.lib.exceptions import MissingDependencyError class MARExpertBot(Bot): @@ -81,8 +85,10 @@ class MARExpertBot(Bot): def init(self): if DxlClient is None: - raise ValueError('Could not import dxlclient or dxlmarclient. ' - 'Please install them.') + raise MissingDependencyError('dxlclient') + if MarClient is None: + raise MissingDependencyError('dxlmarclient') + self.config = DxlClientConfig.create_dxl_config_from_file(self.parameters.dxl_config_file) def process(self): diff --git a/intelmq/bots/experts/misp/REQUIREMENTS.txt b/intelmq/bots/experts/misp/REQUIREMENTS.txt new file mode 100644 index 000000000..36e2d50d8 --- /dev/null +++ b/intelmq/bots/experts/misp/REQUIREMENTS.txt @@ -0,0 +1 @@ +pymisp>=2.4.117.3 diff --git a/intelmq/tests/bots/parsers/bitcash/__init__.py b/intelmq/bots/experts/misp/__init__.py similarity index 100% rename from intelmq/tests/bots/parsers/bitcash/__init__.py rename to intelmq/bots/experts/misp/__init__.py diff --git a/intelmq/bots/experts/misp/expert.py b/intelmq/bots/experts/misp/expert.py new file mode 100644 index 000000000..b6522eba7 --- /dev/null +++ b/intelmq/bots/experts/misp/expert.py @@ -0,0 +1,54 @@ +# -*- coding: utf-8 -*- +""" +An expert to for looking up values in MISP. + +Parameters: + - misp_url: URL of the MISP server + - misp_key: API key for accessing MISP + - http_verify_cert: true or false, check the validity of the certificate +""" +import sys + +from intelmq.lib.bot import Bot +from intelmq.lib.exceptions import MissingDependencyError + +try: + if sys.version_info >= (3, 6): + from pymisp import ExpandedPyMISP +except ImportError: + ExpandedPyMISP = None + + +class MISPExpertBot(Bot): + + def init(self): + if sys.version_info < (3, 6): + raise ValueError('This bot requires Python >= 3.6.') + if ExpandedPyMISP is None: + raise MissingDependencyError('pymisp', '>=2.4.117.3') + + # Initialize MISP connection + self.misp = ExpandedPyMISP(self.parameters.misp_url, + self.parameters.misp_key, + self.parameters.http_verify_cert) + + def process(self): + event = self.receive_message() + + if 'source.ip' in event: + + # Grab the attributes from MISP + # TODO: Run the query in reverse order (new->old) + misp_result = self.misp.search('attributes', value=event['source.ip'], + page=1, limit=1, pythonify=True) + if misp_result: + attribute = misp_result[0] + # Process the response + event.add('misp.attribute_uuid', attribute.uuid) + event.add('misp.event_uuid', attribute.Event.uuid) + + self.send_message(event) + self.acknowledge_message() + + +BOT = MISPExpertBot diff --git a/intelmq/bots/experts/modify/examples/default.conf b/intelmq/bots/experts/modify/examples/default.conf index f4ed6bd88..19a2982ff 100755 --- a/intelmq/bots/experts/modify/examples/default.conf +++ b/intelmq/bots/experts/modify/examples/default.conf @@ -1,15 +1,4 @@ [ - { - "rulename": "Abuse.ch zeus", - "if": { - "feed.url": ".*zeustracker.*download=.*$", - "classification.identifier": "", - "feed.name": "^Abuse.ch$" - }, - "then": { - "classification.identifier": "zeus" - } - }, { "rulename": "Abuse.ch feodo", "if": { diff --git a/intelmq/bots/experts/modify/expert.py b/intelmq/bots/experts/modify/expert.py index cf43aff32..ece93abb9 100644 --- a/intelmq/bots/experts/modify/expert.py +++ b/intelmq/bots/experts/modify/expert.py @@ -63,6 +63,8 @@ def init(self): if isinstance(expression, str) and expression != '': self.config[-1]["if"][field] = re.compile(expression, **self.re_kwargs) + self.maximum_matches = getattr(self.parameters, 'maximum_matches', None) + def matches(self, identifier, event, condition): matches = {} @@ -107,13 +109,18 @@ def apply_action(self, event, action, matches): def process(self): event = self.receive_message() + num_matches = 0 for rule in self.config: rule_id, rule_selection, rule_action = rule['rulename'], rule['if'], rule['then'] matches = self.matches(rule_id, event, rule_selection) if matches is not None: + num_matches += 1 self.logger.debug('Apply rule %s.', rule_id) self.apply_action(event, rule_action, matches) + if self.maximum_matches and num_matches >= self.maximum_matches: + self.logger.debug('Reached maximum number of matches, breaking.') + break self.send_message(event) self.acknowledge_message() diff --git a/intelmq/bots/experts/national_cert_contact_certat/REQUIREMENTS.txt b/intelmq/bots/experts/national_cert_contact_certat/REQUIREMENTS.txt deleted file mode 100644 index 81b39e402..000000000 --- a/intelmq/bots/experts/national_cert_contact_certat/REQUIREMENTS.txt +++ /dev/null @@ -1 +0,0 @@ -requests>=2.2.0 diff --git a/intelmq/bots/experts/national_cert_contact_certat/expert.py b/intelmq/bots/experts/national_cert_contact_certat/expert.py index f41be038a..a27e92517 100644 --- a/intelmq/bots/experts/national_cert_contact_certat/expert.py +++ b/intelmq/bots/experts/national_cert_contact_certat/expert.py @@ -17,6 +17,7 @@ from intelmq.lib.bot import Bot from intelmq.lib.utils import create_request_session_from_bot +from intelmq.lib.exceptions import MissingDependencyError try: import requests @@ -30,7 +31,7 @@ class NationalCERTContactCertATExpertBot(Bot): def init(self): if requests is None: - raise ValueError('Could not import requests. Please install it.') + raise MissingDependencyError("requests") self.set_request_parameters() self.session = create_request_session_from_bot(self) @@ -50,6 +51,9 @@ def process(self): 'sep': 'semicolon', } req = self.session.get(URL, params=parameters) + if not req.text: + # empty response + continue response = req.text.strip().split(';') ccfield = '{}.geolocation.cc'.format(section) diff --git a/intelmq/bots/experts/reverse_dns/expert.py b/intelmq/bots/experts/reverse_dns/expert.py index 906565160..3f99c000e 100644 --- a/intelmq/bots/experts/reverse_dns/expert.py +++ b/intelmq/bots/experts/reverse_dns/expert.py @@ -2,7 +2,9 @@ from datetime import datetime -import dns +import dns.exception +import dns.resolver +import dns.reversename from intelmq.lib.bot import Bot from intelmq.lib.cache import Cache diff --git a/intelmq/bots/experts/ripe/REQUIREMENTS.txt b/intelmq/bots/experts/ripe/REQUIREMENTS.txt deleted file mode 100644 index 81b39e402..000000000 --- a/intelmq/bots/experts/ripe/REQUIREMENTS.txt +++ /dev/null @@ -1 +0,0 @@ -requests>=2.2.0 diff --git a/intelmq/bots/experts/ripe/expert.py b/intelmq/bots/experts/ripe/expert.py index 782ffb782..aa66a6df7 100644 --- a/intelmq/bots/experts/ripe/expert.py +++ b/intelmq/bots/experts/ripe/expert.py @@ -7,10 +7,12 @@ import json from contextlib import contextmanager +import warnings import intelmq.lib.utils as utils from intelmq.lib.bot import Bot from intelmq.lib.cache import Cache +from intelmq.lib.exceptions import MissingDependencyError try: import requests @@ -60,7 +62,7 @@ class RIPEExpertBot(Bot): def init(self): if requests is None: - raise ValueError("Could not import 'requests'. Please install the package.") + raise MissingDependencyError("requests") self.__mode = getattr(self.parameters, 'mode', 'append') self.__query = { @@ -88,43 +90,36 @@ def __initialize_cache(self): getattr(self.parameters, "redis_cache_password", None)) def process(self): - with self.event_context() as event: - for target in {'source.', 'destination.'}: - abuse_key = target + "abuse_contact" - abuse = set(event.get(abuse_key).split(',')) if self.__mode == 'append' and abuse_key in event else set() - - asn = event.get(target + "asn", None) - if asn: - if self.__query['stat_asn']: - abuse.update(self.__perform_cached_query('stat', asn)) - if self.__query['db_asn']: - abuse.update(self.__perform_cached_query('db_asn', asn)) - - ip = event.get(target + "ip", None) - if ip: - if self.__query['stat_ip']: - abuse.update(self.__perform_cached_query('stat', ip)) - if self.__query['db_ip']: - abuse.update(self.__perform_cached_query('db_ip', ip)) - if self.__query['stat_geo']: - info = self.__perform_cached_query('stat_geolocation', ip) - - should_overwrite = self.__mode == 'replace' - - for local_key, ripe_key in self.GEOLOCATION_REPLY_TO_INTERNAL: - if ripe_key in info: - event.add(target + "geolocation." + local_key, info[ripe_key], overwrite=should_overwrite) - - event.add(abuse_key, ','.join(abuse), overwrite=True) - - @contextmanager - def event_context(self): event = self.receive_message() - try: - yield event - finally: - self.send_message(event) - self.acknowledge_message() + for target in {'source.', 'destination.'}: + abuse_key = target + "abuse_contact" + abuse = set(event.get(abuse_key).split(',')) if self.__mode == 'append' and abuse_key in event else set() + + asn = event.get(target + "asn", None) + if asn: + if self.__query['stat_asn']: + abuse.update(self.__perform_cached_query('stat', asn)) + if self.__query['db_asn']: + abuse.update(self.__perform_cached_query('db_asn', asn)) + + ip = event.get(target + "ip", None) + if ip: + if self.__query['stat_ip']: + abuse.update(self.__perform_cached_query('stat', ip)) + if self.__query['db_ip']: + abuse.update(self.__perform_cached_query('db_ip', ip)) + if self.__query['stat_geo']: + info = self.__perform_cached_query('stat_geolocation', ip) + + should_overwrite = self.__mode == 'replace' + + for local_key, ripe_key in self.GEOLOCATION_REPLY_TO_INTERNAL: + if ripe_key in info: + event.add(target + "geolocation." + local_key, info[ripe_key], overwrite=should_overwrite) + + event.add(abuse_key, ','.join(abuse), overwrite=True) + self.send_message(event) + self.acknowledge_message() def __perform_cached_query(self, type, resource): cached_value = self.__cache.get('{}:{}'.format(type, resource)) @@ -148,7 +143,17 @@ def __perform_cached_query(self, type, resource): pass raise ValueError(STATUS_CODE_ERROR.format(response.status_code)) try: - data = self.REPLY_TO_DATA[type](response.json()) + response_data = response.json() + + # geolocation was marked as under maintenance by this, see + # https://lists.cert.at/pipermail/intelmq-users/2020-March/000140.html + status = response_data.get('data_call_status', '') + if status.startswith('maintenance'): + warnings.warn('The API call %s is currently under maintenance. ' + 'Response: %r. This warning is only given once per bot run.' + '' % (type, status)) + + data = self.REPLY_TO_DATA[type](response_data) self.__cache.set('{}:{}'.format(type, resource), (json.dumps(list(data) if isinstance(data, set) else data) if data else CACHE_NO_VALUE)) return data diff --git a/intelmq/bots/experts/ripencc_abuse_contact/REQUIREMENTS.txt b/intelmq/bots/experts/ripencc_abuse_contact/REQUIREMENTS.txt deleted file mode 100644 index 81b39e402..000000000 --- a/intelmq/bots/experts/ripencc_abuse_contact/REQUIREMENTS.txt +++ /dev/null @@ -1 +0,0 @@ -requests>=2.2.0 diff --git a/intelmq/bots/experts/sieve/expert.py b/intelmq/bots/experts/sieve/expert.py index 81f91e27a..72257e5d6 100644 --- a/intelmq/bots/experts/sieve/expert.py +++ b/intelmq/bots/experts/sieve/expert.py @@ -14,6 +14,7 @@ from intelmq import HARMONIZATION_CONF_FILE from intelmq.lib import utils from intelmq.lib.bot import Bot +from intelmq.lib.exceptions import MissingDependencyError try: import textx.model @@ -45,7 +46,7 @@ def init(self): @staticmethod def init_metamodel(): if metamodel_from_file is None: - raise ValueError('Could not import textx. Please install it') + raise MissingDependencyError("textx") try: grammarfile = os.path.join(os.path.dirname(__file__), 'sieve.tx') @@ -81,7 +82,7 @@ def check(parameters): metamodel = SieveExpertBot.init_metamodel() SieveExpertBot.read_sieve_file(parameters['file'], metamodel) - except Exception as e: + except Exception: return [['error', 'Validation of Sieve file failed with the following traceback: %r' % traceback.format_exc()]] def process(self): diff --git a/intelmq/bots/outputs/amqptopic/REQUIREMENTS.txt b/intelmq/bots/outputs/amqptopic/REQUIREMENTS.txt index 4747f167e..7386a39be 100644 --- a/intelmq/bots/outputs/amqptopic/REQUIREMENTS.txt +++ b/intelmq/bots/outputs/amqptopic/REQUIREMENTS.txt @@ -1,2 +1 @@ pika>=0.10.0 - diff --git a/intelmq/bots/outputs/amqptopic/output.py b/intelmq/bots/outputs/amqptopic/output.py index e29c0bbcf..05b7bbc32 100644 --- a/intelmq/bots/outputs/amqptopic/output.py +++ b/intelmq/bots/outputs/amqptopic/output.py @@ -1,8 +1,8 @@ # -*- coding: utf-8 -*- import ssl -from intelmq.lib.bot import Bot -from intelmq.lib.utils import base64_decode +from intelmq.lib.bot import OutputBot +from intelmq.lib.exceptions import MissingDependencyError try: import pika @@ -10,12 +10,12 @@ pika = None -class AMQPTopicOutputBot(Bot): +class AMQPTopicOutputBot(OutputBot): connection = None def init(self): if pika is None: - raise ValueError("Could not import library 'pika'. Please install it.") + raise MissingDependencyError("pika") self.connection = None self.channel = None @@ -32,7 +32,6 @@ def init(self): else: self.publish_raises_nack = True - self.keep_raw_field = self.parameters.keep_raw_field self.delivery_mode = self.parameters.delivery_mode self.content_type = self.parameters.content_type self.exchange = self.parameters.exchange_name @@ -56,17 +55,12 @@ def init(self): connection_attempts=self.parameters.connection_attempts, **self.kwargs) self.routing_key = self.parameters.routing_key + self.format_routing_key = getattr(self.parameters, 'format_routing_key', False) self.properties = pika.BasicProperties( content_type=self.content_type, delivery_mode=self.delivery_mode) self.connect_server() - self.hierarchical = getattr(self.parameters, "message_hierarchical", False) - self.with_type = getattr(self.parameters, "message_with_type", False) - self.jsondict_as_string = getattr(self.parameters, "message_jsondict_as_string", False) - - self.single_key = getattr(self.parameters, 'single_key', None) - def connect_server(self): self.logger.info('AMQP Connecting to %s:%s/%s.', self.connection_host, self.connection_port, self.connection_vhost) @@ -102,25 +96,18 @@ def process(self): self.connect_server() event = self.receive_message() - - if self.single_key: - if self.single_key == 'raw': - body = base64_decode(event.get('raw', '')) - else: - body = str(event.get(self.single_key)) - else: - if not self.keep_raw_field: - del event['raw'] - body = event.to_json(hierarchical=self.hierarchical, - with_type=self.with_type, - jsondict_as_string=self.jsondict_as_string) + body = self.export_event(event, return_type=str) # replace unicode characters when encoding (#1296) body = body.encode(errors='backslashreplace') + if self.format_routing_key: + routing_key = self.routing_key.format(ev=event) + else: + routing_key = self.routing_key try: if not self.channel.basic_publish(exchange=self.exchange, - routing_key=self.routing_key, + routing_key=routing_key, body=body, properties=self.properties, mandatory=True): diff --git a/intelmq/bots/outputs/elasticsearch/REQUIREMENTS.txt b/intelmq/bots/outputs/elasticsearch/REQUIREMENTS.txt index e5f9cc5ff..60b8a1c92 100644 --- a/intelmq/bots/outputs/elasticsearch/REQUIREMENTS.txt +++ b/intelmq/bots/outputs/elasticsearch/REQUIREMENTS.txt @@ -1,2 +1 @@ -elasticsearch>=5.0.0,<6.0.0 - +elasticsearch>=7.0.0,<8.0.0 diff --git a/intelmq/bots/outputs/elasticsearch/output.py b/intelmq/bots/outputs/elasticsearch/output.py index 10c25b2c6..c743cac2f 100644 --- a/intelmq/bots/outputs/elasticsearch/output.py +++ b/intelmq/bots/outputs/elasticsearch/output.py @@ -11,6 +11,7 @@ from json import loads from intelmq.lib.bot import Bot +from intelmq.lib.exceptions import MissingDependencyError try: from elasticsearch import Elasticsearch @@ -54,7 +55,7 @@ class ElasticsearchOutputBot(Bot): def init(self): if Elasticsearch is None: - raise ValueError('Missing elasticsearch module.') + raise MissingDependencyError('elasticsearch', version='>=5.0.0,<6.0.0') self.elastic_host = getattr(self.parameters, 'elastic_host', '127.0.0.1') @@ -70,8 +71,6 @@ def init(self): 'ssl_ca_certificate', None) self.ssl_show_warnings = getattr(self.parameters, 'ssl_show_warnings', True) - self.elastic_doctype = getattr(self.parameters, - 'elastic_doctype', 'events') self.replacement_char = getattr(self.parameters, 'replacement_char', None) self.flatten_fields = getattr(self.parameters, @@ -125,7 +124,6 @@ def process(self): replacement=self.replacement_char) self.es.index(index=self.get_index(event_dict, default_date=datetime.today().date()), - doc_type=self.elastic_doctype, body=event_dict) self.acknowledge_message() diff --git a/intelmq/bots/outputs/file/output.py b/intelmq/bots/outputs/file/output.py index 86f4351cc..a754d063b 100644 --- a/intelmq/bots/outputs/file/output.py +++ b/intelmq/bots/outputs/file/output.py @@ -4,11 +4,10 @@ from collections import defaultdict from pathlib import Path -from intelmq.lib.bot import Bot -from intelmq.lib.utils import base64_decode +from intelmq.lib.bot import OutputBot -class FileOutputBot(Bot): +class FileOutputBot(OutputBot): file = None is_multithreadable = False @@ -23,7 +22,6 @@ def init(self): if not self.format_filename: self.open_file(self.parameters.file) self.logger.info("File %r is open.", self.parameters.file) - self.single_key = getattr(self.parameters, 'single_key', None) def open_file(self, filename: str = None): if self.file is not None: @@ -64,12 +62,7 @@ def process(self): if not self.file or filename != self.file.name: self.open_file(filename) - if self.single_key: - event_data = str(event.get(self.single_key)) - if self.single_key == 'raw': - event_data = base64_decode(event_data) - else: - event_data = event.to_json(hierarchical=self.parameters.hierarchical_output) + event_data = self.export_event(event, return_type=str) try: self.file.write(event_data) diff --git a/intelmq/bots/outputs/files/output.py b/intelmq/bots/outputs/files/output.py index cb9db6a26..2d09bcae6 100644 --- a/intelmq/bots/outputs/files/output.py +++ b/intelmq/bots/outputs/files/output.py @@ -5,12 +5,11 @@ import socket import time from os import path - -from intelmq.lib.bot import Bot +from intelmq.lib.bot import OutputBot from intelmq.lib.exceptions import ConfigurationError -class FilesOutputBot(Bot): +class FilesOutputBot(OutputBot): def init(self): self.tmp = self._ensure_path(self.parameters.tmp) @@ -60,10 +59,8 @@ def create_unique_file(self): def process(self): event = self.receive_message() - if self.parameters.single_key: - event_data = str(event.get(self.parameters.single_key)) - else: - event_data = event.to_json(hierarchical=self.parameters.hierarchical_output) + event_data = self.export_event(event, return_type=str) + # Create file in tmp dir f, name = self.create_unique_file() f.write(event_data) diff --git a/intelmq/bots/outputs/mcafee/output_esm_ip.py b/intelmq/bots/outputs/mcafee/output_esm_ip.py index f07276962..5ec95566f 100644 --- a/intelmq/bots/outputs/mcafee/output_esm_ip.py +++ b/intelmq/bots/outputs/mcafee/output_esm_ip.py @@ -7,11 +7,12 @@ esm_user: username to connect to ESM esm_password: Password of esm_user esm_watchlist: Destination watchlist to update -field: field from intelMQ message to extract (e.g. destination.ip) +field: field from IntelMQ message to extract (e.g. destination.ip) """ from intelmq.lib.bot import Bot +from intelmq.lib.exceptions import MissingDependencyError try: from mfe_saw.esm import ESM @@ -23,7 +24,7 @@ class ESMIPOutputBot(Bot): def init(self): if ESM is None: - raise ValueError("Could not import 'mfe_saw'. Please install it.") + raise MissingDependencyError("mfe_saw") self.esm = ESM() try: diff --git a/intelmq/bots/outputs/misp/REQUIREMENTS.txt b/intelmq/bots/outputs/misp/REQUIREMENTS.txt new file mode 100644 index 000000000..9f2860348 --- /dev/null +++ b/intelmq/bots/outputs/misp/REQUIREMENTS.txt @@ -0,0 +1 @@ +pymisp>=2.4.120; python_version >= '3.6' diff --git a/intelmq/tests/bots/parsers/nothink/__init__.py b/intelmq/bots/outputs/misp/__init__.py similarity index 100% rename from intelmq/tests/bots/parsers/nothink/__init__.py rename to intelmq/bots/outputs/misp/__init__.py diff --git a/intelmq/bots/outputs/misp/output_api.py b/intelmq/bots/outputs/misp/output_api.py new file mode 100644 index 000000000..7c62366cf --- /dev/null +++ b/intelmq/bots/outputs/misp/output_api.py @@ -0,0 +1,236 @@ +"""Connect to a MISP instance and add event as MISPObject if not there already. + +SPDX-FileCopyrightText: 2020 Intevation GmbH +SPDX-License-Identifier: AGPL-3.0-or-later + +Funding: of initial version by SUNET +Author(s): + * Bernhard Reiter + +TODO: A shortened copy of this documentation is kept at `docs/Bots.md`, please + keep it current, when changing something. + +Parameters: + - add_feed_provider_as_tag: bool (use true when in doubt) + - add_feed_name_as_as_tag: bool (use true when in doubt) + - misp_additional_correlation_fields: list of fields for which + the correlation flags will be enabled (in addition to those which are + in significant_fields) + - misp_additional_tags: list of tags to set not be searched for + when looking for duplicates + - misp_key: str, API key for accessing MISP + - misp_publish: bool, if a new MISP event should be set to "publish". + Expert setting as MISP may really make it "public"! + (Use false when in doubt.) + - misp_tag_for_bot: str, used to mark MISP events + - misp_to_ids_fields: list of fields for which the to_ids flags will be set + - misp_url: str, URL of the MISP server + - significant_fields: list of intelmq field names + +The `significant_fields` values +will be searched for in all MISP attribute values +and if all values are found in the one MISP event, no new MISP event +will be created. +(The reason that all values are matched without considering the +attribute type is a technical limitation of the +search functionality exposed by the MISP/pymisp 2.4.120 API.) +Instead if the existing MISP events have the same feed.provider +and match closely, their timestamp will be updated. + +If a new MISP event is inserted the `significant_fields` and the +`misp_additional_correlation_fields` will be the attributes +where correlation is enabled. + +Make sure to build the IntelMQ Botnet in a way the rate of incoming +events is what MISP can handle, as IntelMQ can process many more events faster +than MISP (which is by design as MISP is for manual handling). +Also remove the fields of the IntelMQ events with an expert bot +that you do not want to be inserted into MISP. + +Example (of some parameters in JSON):: + + "add_feed_provider_as_tag": true, + "add_feed_name_as_tag": true, + "misp_additional_correlation_fields": ["source.asn"], + "misp_additional_tags": ["OSINT", "osint:certainty==\"90\""], + "misp_publish": false, + "misp_to_ids_fields": ["source.fqdn", "source.reverse_dns"], + "significant_fields": ["source.fqdn", "source.reverse_dns"], + + +Originally developed with pymisp v2.4.120 (which needs python v>=3.6). +""" +import datetime + +from intelmq.lib.bot import OutputBot +from intelmq.lib.exceptions import MissingDependencyError + +try: + import pymisp +except ImportError: + pymisp = None + import_fail_reason = 'import' +except SyntaxError: + pymisp = None + import_fail_reason = 'syntax' + +MISPOBJECT_NAME = 'intelmq_event' + + +class MISPAPIOutputBot(OutputBot): + is_multithreadable = False + + def init(self): + if pymisp is None and import_fail_reason == 'syntax': + raise MissingDependencyError( + "pymisp", + version='>=2.4.120', + additional_text="Python versions >= 3.6 are " + "required for this 'pymisp' version." + ) + elif pymisp is None: + raise MissingDependencyError('pymisp', version='>=2.4.120') + + self.logger.info('Significant fields are {}.'.format( + self.parameters.significant_fields)) + + self.logger.info('Connecting to MISP instance at {}.'.format( + self.parameters.misp_url)) + self.misp = pymisp.api.PyMISP(self.parameters.misp_url, + self.parameters.misp_key, + self.parameters.http_verify_cert) + + self.misp.toggle_global_pythonify() + + def process(self): + intelmq_event = self.receive_message().to_dict(jsondict_as_string=True) + + # search for existing events that have all values that are significant + values_to_search_for = [] + for sig_field in self.parameters.significant_fields: + if sig_field in intelmq_event and intelmq_event[sig_field]: + values_to_search_for.append(intelmq_event[sig_field]) + + if values_to_search_for == []: + msg = 'All significant_fields empty -> skipping event (raw={}).' + self.logger.warning(msg.format(intelmq_event.get('raw'))) + else: + vquery = self.misp.build_complex_query( + and_parameters=values_to_search_for + ) + # limit=20 is a safeguard against searches that'll find too much, + # as the returning python objects can take up much time and memory + # and because there should only be one matching MISPEvent + r = self.misp.search(tags=self.parameters.misp_tag_for_bot, + value=vquery, limit=20) + if len(r) > 0: + msg = 'Found MISP events matching {}: {} -> not inserting.' + self.logger.info(msg.format(vquery, [event.id for event in r])) + + for misp_event in r: + self._update_misp_event(misp_event, intelmq_event) + else: + self._insert_misp_event(intelmq_event) + + self.acknowledge_message() + + def _update_misp_event(self, misp_event, intelmq_event): + """Update timestamp on a found MISPEvent if it matches closely.""" + # As we insert only one MISPObject, we only examine the first one + misp_o = misp_event.get_objects_by_name(MISPOBJECT_NAME)[0] + + all_found = True + for field in ['feed.provider'] + self.parameters.significant_fields: + attributes = misp_o.get_attributes_by_relation(field) + value = attributes[0].value if len(attributes) > 0 else None + if not (value == intelmq_event.get(field)): + all_found = False + break + + if all_found: + misp_event.timestamp = datetime.datetime.now() + self.misp.update_event(misp_event) + msg = 'Updated timestamp of MISP event with id: {}' + self.logger.info(msg.format(misp_event.id)) + + def _insert_misp_event(self, intelmq_event): + """Insert a new MISPEvent.""" + new_misp_event = pymisp.MISPEvent() + + if 'feed.provider' in intelmq_event: + new_misp_event.info = 'from {} via IntelMQ'.format( + intelmq_event['feed.provider']) + else: + new_misp_event.info = 'via IntelMQ' + + # set the tags + new_misp_event.add_tag(self.parameters.misp_tag_for_bot) + + if (self.parameters.add_feed_provider_as_tag and + 'feed.provider' in intelmq_event): + new_tag = 'IntelMQ:feed.provider="{}"'.format( + intelmq_event['feed.provider']) + new_misp_event.add_tag(new_tag) + + if (self.parameters.add_feed_name_as_tag and + 'feed.name' in intelmq_event): + new_tag = 'IntelMQ:feed.name="{}"'.format( + intelmq_event['feed.name']) + new_misp_event.add_tag(new_tag) + + for new_tag in self.parameters.misp_additional_tags: + new_misp_event.add_tag(new_tag) + + # build the MISPObject and its attributes + obj = new_misp_event.add_object(name=MISPOBJECT_NAME) + + fields_to_correlate = ( + self.parameters.significant_fields + + self.parameters.misp_additional_correlation_fields + ) + + for object_relation, value in intelmq_event.items(): + try: + obj.add_attribute( + object_relation, + value=value, + disable_correlation=( + object_relation not in fields_to_correlate), + to_ids=( + object_relation in self.parameters.misp_to_ids_fields) + ) + except pymisp.NewAttributeError: + msg = 'Ignoring "{}":"{}" as not in object template.' + self.logger.debug(msg.format(object_relation, value)) + + misp_event = self.misp.add_event(new_misp_event) + if self.parameters.misp_publish: + self.misp.publish(misp_event) + self.logger.info( + 'Inserted new MISP event with id: {}'.format(misp_event.id)) + + @staticmethod + def check(parameters): + required_parameters = [ + 'add_feed_provider_as_tag', + 'add_feed_name_as_tag', + 'misp_additional_correlation_fields', + 'misp_additional_tags', + 'misp_key', + 'misp_publish', + 'misp_tag_for_bot', + 'misp_to_ids_fields', + 'misp_url', + 'significant_fields' + ] + missing_parameters = [] + for para in required_parameters: + if para not in parameters: + missing_parameters.append(para) + + if len(missing_parameters) > 0: + return [["error", + "Parameters missing: " + str(missing_parameters)]] + + +BOT = MISPAPIOutputBot diff --git a/intelmq/bots/outputs/misp/output_feed.py b/intelmq/bots/outputs/misp/output_feed.py new file mode 100644 index 000000000..929b2d3fe --- /dev/null +++ b/intelmq/bots/outputs/misp/output_feed.py @@ -0,0 +1,131 @@ +# -*- coding: utf-8 -*- +import datetime +import json +from pathlib import Path +from uuid import uuid4 +import re + +from intelmq.lib.bot import OutputBot +from intelmq.lib.exceptions import MissingDependencyError +from intelmq.lib.utils import parse_relative + +try: + from pymisp import MISPEvent, MISPOrganisation, NewAttributeError + from pymisp.tools import feed_meta_generator +except ImportError: + # catching SyntaxError because of https://github.com/MISP/PyMISP/issues/501 + MISPEvent = None + import_fail_reason = 'import' +except SyntaxError: + # catching SyntaxError because of https://github.com/MISP/PyMISP/issues/501 + MISPEvent = None + import_fail_reason = 'syntax' + + +# NOTE: This module is compatible with Python 3.6+ + + +class MISPFeedOutputBot(OutputBot): + is_multithreadable = False + + @staticmethod + def check_output_dir(dirname): + output_dir = Path(dirname) + if not output_dir.exists(): + output_dir.mkdir(mode=0o755, parents=True, exist_ok=True) + return True + + def init(self): + if MISPEvent is None and import_fail_reason == 'syntax': + raise MissingDependencyError("pymisp", + version='>=2.4.117.3', + additional_text="Python versions below 3.6 are " + "only supported by pymisp <= 2.4.119.1.") + elif MISPEvent is None: + raise MissingDependencyError('pymisp', version='>=2.4.117.3') + + self.current_event = None + + self.misp_org = MISPOrganisation() + self.misp_org.name = self.parameters.misp_org_name + self.misp_org.uuid = self.parameters.misp_org_uuid + + self.output_dir = Path(self.parameters.output_dir) + MISPFeedOutputBot.check_output_dir(self.output_dir) + + if not hasattr(self.parameters, 'interval_event'): + self.timedelta = datetime.timedelta(hours=1) + else: + self.timedelta = datetime.timedelta(minutes=parse_relative(self.parameters.interval_event)) + + if (self.output_dir / '.current').exists(): + with (self.output_dir / '.current').open() as f: + self.current_file = Path(f.read()) + self.current_event = MISPEvent() + self.current_event.load_file(self.current_file) + + last_min_time, last_max_time = re.findall('IntelMQ event (.*) - (.*)', self.current_event.info)[0] + last_min_time = datetime.datetime.strptime(last_min_time, '%Y-%m-%dT%H:%M:%S.%f') + last_max_time = datetime.datetime.strptime(last_max_time, '%Y-%m-%dT%H:%M:%S.%f') + if last_max_time < datetime.datetime.now(): + self.min_time_current = datetime.datetime.now() + self.max_time_current = self.min_time_current + self.timedelta + self.current_event = None + else: + self.min_time_current = last_min_time + self.max_time_current = last_max_time + else: + self.min_time_current = datetime.datetime.now() + self.max_time_current = self.min_time_current + self.timedelta + + def process(self): + + if not self.current_event or datetime.datetime.now() > self.max_time_current: + self.min_time_current = datetime.datetime.now() + self.max_time_current = self.min_time_current + self.timedelta + self.current_event = MISPEvent() + self.current_event.info = ('IntelMQ event {begin} - {end}' + ''.format(begin=self.min_time_current.isoformat(), + end=self.max_time_current.isoformat())) + self.current_event.set_date(datetime.date.today()) + self.current_event.Orgc = self.misp_org + self.current_event.uuid = str(uuid4()) + self.current_file = self.output_dir / '{self.current_event.uuid}.json'.format(self=self) + with (self.output_dir / '.current').open('w') as f: + f.write(str(self.current_file)) + + event = self.receive_message().to_dict(jsondict_as_string=True) + + obj = self.current_event.add_object(name='intelmq_event') + for object_relation, value in event.items(): + try: + obj.add_attribute(object_relation, value=value) + except NewAttributeError: + # This entry isn't listed in the harmonization file, ignoring. + pass + + feed_output = self.current_event.to_feed(with_meta=False) + + with self.current_file.open('w') as f: + json.dump(feed_output, f) + + feed_meta_generator(self.output_dir) + self.acknowledge_message() + + @staticmethod + def check(parameters): + if 'output_dir' not in parameters: + return [["error", "Parameter 'output_dir' not given."]] + try: + created = MISPFeedOutputBot.check_output_dir(parameters['output_dir']) + except IOError: + return [["error", + "Directory %r of parameter 'output_dir' does not exist and could not be created." % parameters['output_dir']]] + else: + if created: + return [["info", + "Directory %r of parameter 'output_dir' did not exist, but has now been created." + "" % parameters['output_dir']]] + + +BOT = MISPFeedOutputBot diff --git a/intelmq/bots/outputs/mongodb/output.py b/intelmq/bots/outputs/mongodb/output.py index 8c2cf86d4..c6390b4b4 100644 --- a/intelmq/bots/outputs/mongodb/output.py +++ b/intelmq/bots/outputs/mongodb/output.py @@ -6,6 +6,7 @@ import dateutil.parser from intelmq.lib.bot import Bot +from intelmq.lib.exceptions import MissingDependencyError try: import pymongo @@ -18,7 +19,7 @@ class MongoDBOutputBot(Bot): def init(self): if pymongo is None: - raise ValueError('Could not import pymongo. Please install it.') + raise MissingDependencyError("pymongo") self.pymongo_3 = pymongo.version_tuple >= (3, ) self.pymongo_35 = pymongo.version_tuple >= (3, 5) @@ -29,27 +30,33 @@ def init(self): self.username = getattr(self.parameters, "db_user", None) self.password = getattr(self.parameters, "db_pass", None) + self.port = int(getattr(self.parameters, "port", 27017)) if not self.password: # checking for username is sufficient then self.username = None self.connect() def connect(self): - self.logger.debug('Connecting to MongoDB server.') + self.logger.debug('Getting server info.') + server_info = pymongo.MongoClient(self.parameters.host, self.port).server_info() + server_version = server_info['version'] + server_version_split = tuple(server_version.split('.')) + self.logger.debug('Connecting to MongoDB server version %s.', + server_version) try: - if self.pymongo_35 and self.username: + if self.pymongo_35 and self.username and server_version_split >= ('3', '4'): self.client = pymongo.MongoClient(self.parameters.host, - int(self.parameters.port), + self.port, username=self.username, password=self.password) else: self.client = pymongo.MongoClient(self.parameters.host, - int(self.parameters.port)) + self.port) except pymongo.errors.ConnectionFailure: raise ValueError('Connection to MongoDB server failed.') else: db = self.client[self.parameters.database] - if self.username and not self.pymongo_35: + if self.username and not self.pymongo_35 or server_version_split < ('3', '4'): self.logger.debug('Trying to authenticate to database %s.', self.parameters.database) try: @@ -65,7 +72,7 @@ def process(self): if self.parameters.hierarchical_output: tmp_dict = event.to_dict(hierarchical=True) - if "time"in tmp_dict: + if "time" in tmp_dict: if "observation" in tmp_dict["time"]: tmp_dict["time"]["observation"] = dateutil.parser.parse(tmp_dict["time"]["observation"]) if "source" in tmp_dict["time"]: diff --git a/intelmq/bots/outputs/postgresql/output.py b/intelmq/bots/outputs/postgresql/output.py index f5efc31a4..62a060a85 100644 --- a/intelmq/bots/outputs/postgresql/output.py +++ b/intelmq/bots/outputs/postgresql/output.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Compatibility shim diff --git a/intelmq/bots/outputs/restapi/REQUIREMENTS.txt b/intelmq/bots/outputs/restapi/REQUIREMENTS.txt deleted file mode 100644 index 81b39e402..000000000 --- a/intelmq/bots/outputs/restapi/REQUIREMENTS.txt +++ /dev/null @@ -1 +0,0 @@ -requests>=2.2.0 diff --git a/intelmq/bots/outputs/restapi/output.py b/intelmq/bots/outputs/restapi/output.py index 1b4053a4a..e93ddaa53 100644 --- a/intelmq/bots/outputs/restapi/output.py +++ b/intelmq/bots/outputs/restapi/output.py @@ -7,13 +7,14 @@ import intelmq.lib.utils as utils from intelmq.lib.bot import Bot +from intelmq.lib.exceptions import MissingDependencyError class RestAPIOutputBot(Bot): def init(self): if requests is None: - raise ValueError('Could not import requests. Please install it.') + raise MissingDependencyError("requests") self.set_request_parameters() @@ -51,7 +52,8 @@ def process(self): "" % timeoutretries) if not req.ok: - self.logger.debug("Error during message sending with response body: %r.", r.text) + self.logger.debug("Error during message sending, response body: %r.", + req.text) req.raise_for_status() self.logger.debug('Sent message.') self.acknowledge_message() diff --git a/intelmq/bots/outputs/sql/REQUIREMENTS.txt b/intelmq/bots/outputs/sql/REQUIREMENTS.txt index 525434acf..0c0e7536f 100644 --- a/intelmq/bots/outputs/sql/REQUIREMENTS.txt +++ b/intelmq/bots/outputs/sql/REQUIREMENTS.txt @@ -1 +1 @@ -psycopg2-binary>=2.5.5 \ No newline at end of file +psycopg2-binary>=2.5.5 diff --git a/intelmq/bots/outputs/stomp/output.py b/intelmq/bots/outputs/stomp/output.py index 7e107078f..fc4443efc 100644 --- a/intelmq/bots/outputs/stomp/output.py +++ b/intelmq/bots/outputs/stomp/output.py @@ -1,8 +1,9 @@ # -*- coding: utf-8 -*- import os.path -from intelmq.lib.bot import Bot +from intelmq.lib.bot import OutputBot from intelmq.lib.utils import base64_decode +from intelmq.lib.exceptions import MissingDependencyError try: @@ -11,14 +12,14 @@ stomp = None -class StompOutputBot(Bot): +class StompOutputBot(OutputBot): """ main class for the STOMP protocol output bot """ conn = None def init(self): if stomp is None: - raise ValueError('Could not import stomp. Please install it.') + raise MissingDependencyError("stomp") self.server = getattr(self.parameters, 'server', '127.0.0.1') self.port = getattr(self.parameters, 'port', 61614) @@ -34,12 +35,6 @@ def init(self): self.http_verify_cert = getattr(self.parameters, 'http_verify_cert', True) - self.hierarchical = getattr(self.parameters, "message_hierarchical", False) - self.with_type = getattr(self.parameters, "message_with_type", False) - self.jsondict_as_string = getattr(self.parameters, "message_jsondict_as_string", False) - - self.single_key = getattr(self.parameters, 'single_key', None) - # check if certificates exist for f in [self.ssl_ca_cert, self.ssl_cl_cert, self.ssl_cl_cert_key]: if not os.path.isfile(f): @@ -69,17 +64,7 @@ def shutdown(self): def process(self): event = self.receive_message() - if self.single_key: - if self.single_key == 'raw': - body = base64_decode(event.get('raw', '')) - else: - body = str(event.get(self.single_key)) - else: - if not self.keep_raw_field: - del event['raw'] - body = event.to_json(hierarchical=self.hierarchical, - with_type=self.with_type, - jsondict_as_string=self.jsondict_as_string) + body = self.export_event(event) self.conn.send(body=body, destination=self.exchange) diff --git a/intelmq/bots/outputs/xmpp/output.py b/intelmq/bots/outputs/xmpp/output.py index 60ef2589a..7b4cb77fa 100644 --- a/intelmq/bots/outputs/xmpp/output.py +++ b/intelmq/bots/outputs/xmpp/output.py @@ -26,6 +26,7 @@ from intelmq.lib.bot import Bot +from intelmq.lib.exceptions import MissingDependencyError try: import sleekxmpp @@ -81,7 +82,7 @@ class XMPPOutputBot(Bot): def init(self): if sleekxmpp is None: - raise ValueError('Could not import sleekxmpp. Please install it.') + raise MissingDependencyError("sleekxmpp") # Retrieve Parameters from configuration xmpp_user = getattr(self.parameters, "xmpp_user", None) diff --git a/intelmq/bots/parsers/abusech/parser_ip.py b/intelmq/bots/parsers/abusech/parser_ip.py index 1437d2bde..03bc3b6be 100644 --- a/intelmq/bots/parsers/abusech/parser_ip.py +++ b/intelmq/bots/parsers/abusech/parser_ip.py @@ -69,7 +69,7 @@ def parse(self, report: dict): if 'Last updated' in line: self.__last_generated_date = dateutil.parser.parse(self.__date_regex.search(line).group(0)).isoformat() - lines = (l for l in raw_lines if not self.__is_comment_line_regex.search(l)) + lines = (line for line in raw_lines if not self.__is_comment_line_regex.search(line)) for line in lines: yield line.strip() diff --git a/intelmq/bots/parsers/abusech/parser_ransomware.py b/intelmq/bots/parsers/abusech/parser_ransomware.py deleted file mode 100644 index 34eb94a26..000000000 --- a/intelmq/bots/parsers/abusech/parser_ransomware.py +++ /dev/null @@ -1,66 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Parser for Intelmq and Abuse.ch Ransomware feed. -""" - -import csv -import io - -from intelmq.lib import utils -from intelmq.lib.bot import Bot -from intelmq.lib.message import Event - - -class AbuseCHRansomwaretrackerParserBot(Bot): - """ Abuse.ch Ransomware Tracker Bot """ - - def process(self): - """ - The Ransomware Tracker has comments in it. - The IP address field can also have more than one address. - The ASN and Country code are being ignored, an expert parser can get those added. - """ - - report = self.receive_message() - raw_report = utils.base64_decode(report.get("raw")) - raw_report = raw_report.translate({0: None}) - - for row in csv.reader(io.StringIO(raw_report)): - if row[0].startswith('#'): - continue - - if '|' in row[7]: - for ipaddr in row[7].split('|'): - new_row = '"' + row[0] + '","' + row[1] + '","' + row[2] + '","' + row[3] \ - + '","' + row[4] + '","' + row[5] + '","' + row[6] + '","' + ipaddr \ - + '","' + row[8] + '","' + row[9] + '"' - - for nrow in csv.reader(io.StringIO(new_row)): - ev = Event(report) - ev.add('classification.taxonomy', 'malicious code') - ev.add('classification.type', 'c2server') - ev.add('classification.identifier', nrow[2].lower()) - ev.add('time.source', nrow[0] + ' UTC', overwrite=True) - ev.add('status', nrow[5]) - if nrow[7] != '0.0.0.0': - ev.add('source.ip', nrow[7]) - ev.add('raw', ','.join(nrow)) - ev.add('source.fqdn', nrow[3], raise_failure=False) - ev.add('source.url', nrow[4], raise_failure=False) - self.send_message(ev) - else: - event = Event(report) - event.add('classification.taxonomy', 'malicious code') - event.add('classification.type', 'c2server') - event.add('classification.identifier', row[2].lower()) - event.add('time.source', row[0] + ' UTC') - event.add('status', row[5]) - event.add('raw', ','.join(row)) - event.add('source.ip', row[7], raise_failure=False) - event.add('source.fqdn', row[3], raise_failure=False) - event.add('source.url', row[4], raise_failure=False) - self.send_message(event) - self.acknowledge_message() - - -BOT = AbuseCHRansomwaretrackerParserBot diff --git a/intelmq/bots/parsers/anubisnetworks/parser.py b/intelmq/bots/parsers/anubisnetworks/parser.py index 8a3ed1884..7b782f7a7 100644 --- a/intelmq/bots/parsers/anubisnetworks/parser.py +++ b/intelmq/bots/parsers/anubisnetworks/parser.py @@ -1,44 +1,12 @@ # -*- coding: utf-8 -*- """ -AnubisNetworks Cyberfeed Stream parser :: +AnubisNetworks Cyberfeed Stream parser - _ts => time.source - trojanfamily => malware.name +TODO: Refactor with JSON mapping - env.remote_addr => source.ip - env.remote_port => source.port - env.server_addr => destination.ip - env.server_port => destination.port - env.server_name => destination.fqdn - env.request_method => extra.method - env.cookies => extra.cookies - env.path_info => extra.path_info - env.http_referer => extra.http_referer - - _origin => extra._origin - _provider => extra._provider - pattern_verified => extra.pattern_verified - - _geo_env_remote_addr.country_code => source.geolocation.cc - _geo_env_remote_addr.country_name => source.geolocation.country - _geo_env_remote_addr.region => source.geolocation.region - _geo_env_remote_addr.city => source.geolocation.city - _geo_env_remote_addr.asn => source.geolocation.asn - _geo_env_remote_addr.asn_name => source.geolocation.as_name - _geo_env_remote_addr.longitude => source.geolocation.longitude - _geo_env_remote_addr.longitude => source.geolocation.longitude - _geo_env_remote_addr.ip + netmask => source.network - -Currently ignored and probably useful:: - - btrack{id(hex),checkins(int),first(timestamp),since(int),days(int),changes(int),seen(ts),last_ip(ip),sameip(int)} - Tracking data for devices and relations to sinkholed domains - _geo_btrack_last_ip, _geo_env_server_addr (same fields as _geo_env_remote_addr) - _anbtr (hex) - env.http_xff (list of ips), X-Forwarded header as injected by proxies - dcu_ts (timestamp) - _geo_env_remote_addr.postal_code +There is an old format and a new one - distinguishable by the test cases +Migration to ParserBot does not make sense, as there's only one event per report anyway """ import json @@ -59,48 +27,215 @@ class AnubisNetworksParserBot(Bot): + def init(self): + self.malware_as_identifier = getattr(self.parameters, + 'use_malware_familiy_as_classification_identifier', + True) + def process(self): report = self.receive_message() - raw_report = json.loads(utils.base64_decode(report.get('raw'))) - extra = {} + raw = utils.base64_decode(report.get('raw')).strip() + if not raw: + self.acknowledge_message() + return + raw_report = json.loads(raw) + del raw event = self.new_event(report) event.change("feed.url", event["feed.url"].split("?key=")[0]) event.add("raw", report.get('raw'), sanitize=False) event.add('classification.type', 'malware') + event.add('classification.taxonomy', 'malicious code') event.add('event_description.text', 'Sinkhole attempted connection') for key, value in raw_report.items(): if key == "_ts": event.add('time.source', DateTime.from_timestamp(int(value))) # Source is UTC - if key == "trojanfamily": + elif key == "trojanfamily": event.add('malware.name', value) - if key == "env": - if "remote_addr" in value: - event.add('source.ip', value["remote_addr"]) - if "remote_port" in value: - event.add('source.port', value["remote_port"]) - if "server_addr" in value: - event.add('destination.ip', value["server_addr"]) - if "server_port" in value: - event.add('destination.port', value["server_port"]) - if "server_name" in value: - event.add('destination.fqdn', value["server_name"], - raise_failure=False) - for k in ["request_method", "cookies", "path_info", "http_referer"]: - if k in value: - extra[k] = value[k] - if key == "_geo_env_remote_addr": + elif key == "env": + for subkey, subvalue in value.items(): + if subkey == "remote_addr": + event.add('source.ip', subvalue) + elif subkey == "remote_port": + event.add('source.port', subvalue) + elif subkey == "server_addr": + event.add('destination.ip', subvalue) + elif subkey == "server_port": + event.add('destination.port', subvalue) + elif subkey == "server_name": + event.add('destination.fqdn', subvalue, + raise_failure=False) + elif subkey in ["request_method", "cookies", "path_info", "http_referer"]: + event['extra.%s' % subkey] = subvalue + else: + raise ValueError("Unable to parse data field env.%r. Please report this as bug." % subkey) + elif key == "src" or key == 'dst': + identity = 'source' if key == 'src' else 'destination' + for subkey, subvalue in value.items(): + if subkey == "ip": + event.add('%s.ip' % identity, subvalue) + elif subkey == "port": + event.add('%s.port' % identity, subvalue) + else: + raise ValueError("Unable to parse data field env.%r. Please report this as bug." % subkey) + elif key == "_geo_env_remote_addr": for k, v in MAP_geo_env_remote_addr.items(): if k in value: event[v] = value[k] if "ip" in value and "netmask" in value: event.add('source.network', '%s/%s' % (value["ip"], value["netmask"])) - if key in ["_origin", "_provider", "pattern_verified"]: - extra[key] = value - if extra: - event.add('extra', extra) + elif key == 'qtype': + event['extra.dns_query_type'] = value + elif key == 'app_proto': + event.add('protocol.application', value, overwrite=True) + elif key == 'malw': + for subkey, subvalue in value.items(): + if subkey == "severity": + event.add('extra.malware.severity', subvalue) + elif subkey == "family": + if self.malware_as_identifier: + event.add('classification.identifier', subvalue) + else: + if subvalue == value['variant']: + pass + else: + event.add('extra.malware.family', subvalue) + elif subkey == "variant": + event.add('malware.name', subvalue) + elif subkey == "categories": + event.add('extra.malware.categories', subvalue) + elif subkey in ["request_method", "cookies", "path_info", "http_referer"]: + event['extra.%s' % subkey] = subvalue + else: + raise ValueError("Unable to parse data field malw.%r. Please report this as bug." % subkey) + elif key == 'comm': + for subkey, subvalue in value.items(): + if subkey == "proto": + event.add('protocol.application', subvalue, overwrite=True) + elif subkey == "method": + event.add('extra.communication.type', subvalue) + elif subkey == "http": + for subsubkey, subsubvalue in subvalue.items(): + if subsubkey == 'method': + event.add('extra.request_method', subsubvalue) + elif subsubkey == 'host': + if not event.add('destination.fqdn', subsubvalue, raise_failure=False): + # event.add('destination.ip', subsubvalue) + assert raw_report['dst']['ip'] == subsubvalue + elif subsubkey == 'path': + event.add('destination.urlpath', subsubvalue) + elif subsubkey == 'user_agent': + event.add('extra.user_agent', subsubvalue) + elif subsubkey == 'more_headers': + event.add('extra.communication.headers', subsubvalue) + elif subsubkey in ('cookies', 'unverified_domain', 'x_forwarded_for'): + event.add('extra.communication.%s' % subsubkey, subsubvalue) + else: + raise ValueError("Unable to parse data field comm.http.%r. Please report this as bug." % subsubkey) + try: + event.add('destination.url', + '%s://%s%s' % (value['proto'], + subvalue['host'], + subvalue['path'])) + except KeyError: + pass + elif subkey == 'dns': + for subsubkey, subsubvalue in subvalue.items(): + if subsubkey == 'name': + event.add('destination.fqdn', subsubvalue) + elif subsubkey == 'qtype': + event['extra.dns_query_type'] = subsubvalue + else: + raise ValueError("Unable to parse data field comm.dns.%r. Please report this as bug." % subsubkey) + elif subkey == "categories": + event.add('extra.malware.categories', subvalue) + elif subkey in ["request_method", "cookies", "path_info", "http_referer"]: + event['extra.%s' % subkey] = subvalue + else: + raise ValueError("Unable to parse data field comm.%r. Please report this as bug." % subkey) + elif key == 'tracking': + for subkey, subvalue in value.items(): + if subkey == "id": + event.add('extra.tracking.id', subvalue) + elif subkey == 'last_ip': + event.add('extra.tracking.last.ip', subvalue) + elif subkey == 'first': + event.add('extra.first_seen', subvalue) + elif subkey == 'seen': + event.add('extra.last_seen', subvalue) + elif subkey == 'changes': + event.add('extra.tracking.changes', subvalue) + elif subkey == 'checkins': + event.add('extra.tracking.checkins', subvalue) + elif subkey == 'days': + event.add('extra.days_seen', subvalue) + elif subkey == 'same_ip': + event.add('extra.tracking.same_ip', subvalue) + elif subkey == 'tr': + event.add('extra.tracking.tr', subvalue) + else: + raise ValueError("Unable to parse data field tracking.%r. Please report this as bug." % subkey) + elif key == '_geo_src_ip': + event = self.parse_geo(event, value, 'source', raw_report, key) + elif key == '_geo_tracking_last_ip': + event = self.parse_geo(event, value, 'tracking.last', raw_report, key) + if value["path"] != 'tracking.last_ip': + raise ValueError('_geo_tracking_last_ip.path is not \'tracking.last_ip\' (%r).' + '' % subvalue) + elif key == '_geo_comm_http_host': + event = self.parse_geo(event, value, 'communication.http.host', raw_report, key) + if value["path"] != 'comm.http.host': + raise ValueError('_geo_tracking_last_ip.path is not \'comm.http.host\' (%r).' + '' % subvalue) + elif key.startswith('_geo_comm_http_x_forwarded_for_'): + event = self.parse_geo(event, value, + 'extra.communication.http.%s' % key[15:], + raw_report, '_geo_comm_http_x_forwarded_for_') + elif key in ["_origin", "_provider", "pattern_verified"]: + event['extra.%s' % key] = value + elif key == "metadata": + for subkey, subvalue in value.items(): + event['extra.metadata.%s' % subkey] = subvalue + else: + raise ValueError("Unable to parse data field %r. Please report this as bug." % key) self.send_message(event) self.acknowledge_message() + def parse_geo(self, event, value, namespace, raw_report, orig_name): + for subkey, subvalue in value.items(): + if subkey in ("ip", 'path'): + pass + elif subkey == "netmask": + event = self.event_add_fallback(event, + '%s.network' % namespace, + '%s/%s' % (value['ip'], subvalue)) + elif subkey == 'country_code': + event = self.event_add_fallback(event, + '%s.geolocation.cc' % namespace, + subvalue) + elif subkey == 'country_name': + event = self.event_add_fallback(event, + '%s.geolocation.country' % namespace, + subvalue) + elif subkey in ('region_code', 'postal_code', "region", "city", + "latitude", "longitude", "dma_code", "area_code", + "metro_code"): + event = self.event_add_fallback(event, '%s.geolocation.%s' % (namespace, subkey), subvalue) + elif subkey == 'asn': + event = self.event_add_fallback(event, '%s.asn' % namespace, subvalue) + elif subkey == 'asn_name': + event = self.event_add_fallback(event, '%s.as_name' % namespace, subvalue) + else: + raise ValueError("Unable to parse data field '%s.%s'. " + "Please report this as bug." % (orig_name, subkey)) + return event + + def event_add_fallback(self, event, key, value): + try: + event[key] = value + except KeyError: + event['extra.%s' % key] = value + return event + BOT = AnubisNetworksParserBot diff --git a/intelmq/bots/parsers/autoshun/parser.py b/intelmq/bots/parsers/autoshun/parser.py index 47c6f926a..c7c4cf240 100644 --- a/intelmq/bots/parsers/autoshun/parser.py +++ b/intelmq/bots/parsers/autoshun/parser.py @@ -1,7 +1,5 @@ # -*- coding: utf-8 -*- import html -import html.parser -import sys from intelmq.lib import utils from intelmq.lib.bot import ParserBot @@ -19,14 +17,6 @@ class AutoshunParserBot(ParserBot): def parse(self, report): - if sys.version_info[:2] == (3, 4): - # See https://docs.python.org/3/whatsnew/3.4.html#html - # https://docs.python.org/3/whatsnew/3.5.html#changes-in-the-python-api - # raises DeprecationWarning otherwise on 3.4, True by default in 3.5 - self.parser = html.parser.HTMLParser(convert_charrefs=True) - else: - self.parser = html.parser.HTMLParser() - raw_report = utils.base64_decode(report.get("raw")) splitted = raw_report.split("") self.tempdata = [''.join(splitted[:2])] @@ -43,10 +33,7 @@ def parse_line(self, line, report): ip = info[1].split('')[0].strip() last_seen = info[2].split('')[0].strip() + '-05:00' - if sys.version_info < (3, 4): - description = self.parser.unescape(info[3].split('')[0].strip()) - else: - description = html.unescape(info[3].split('')[0].strip()) + description = html.unescape(info[3].split('')[0].strip()) for key in ClassificationType.allowed_values: if description.lower().find(key.lower()) > -1: diff --git a/intelmq/bots/parsers/bambenek/parser.py b/intelmq/bots/parsers/bambenek/parser.py index 3c5370f46..f39f0754f 100644 --- a/intelmq/bots/parsers/bambenek/parser.py +++ b/intelmq/bots/parsers/bambenek/parser.py @@ -10,14 +10,17 @@ class BambenekParserBot(ParserBot): IPMASTERLIST = { 'http://osint.bambenekconsulting.com/feeds/c2-ipmasterlist.txt', 'https://osint.bambenekconsulting.com/feeds/c2-ipmasterlist.txt', + 'https://faf.bambenekconsulting.com/feeds/dga/c2-ipmasterlist.txt', } DOMMASTERLIST = { 'http://osint.bambenekconsulting.com/feeds/c2-dommasterlist.txt', 'https://osint.bambenekconsulting.com/feeds/c2-dommasterlist.txt', + 'https://faf.bambenekconsulting.com/feeds/dga/c2-dommasterlist.txt', } DGA_FEED = { 'http://osint.bambenekconsulting.com/feeds/dga-feed.txt', 'https://osint.bambenekconsulting.com/feeds/dga-feed.txt', + 'https://faf.bambenekconsulting.com/feeds/dga-feed.txt', } MALWARE_NAME_MAP = { diff --git a/intelmq/bots/parsers/bitcash/parser.py b/intelmq/bots/parsers/bitcash/parser.py deleted file mode 100644 index 2d0b15cb3..000000000 --- a/intelmq/bots/parsers/bitcash/parser.py +++ /dev/null @@ -1,29 +0,0 @@ -# -*- coding: utf-8 -*- -""" Parser for Bitcash blocklist feed. """ - -from intelmq.lib.bot import ParserBot -from intelmq.lib.message import Event - - -class BitcashBlocklistParserBot(ParserBot): - """ Parser for Bitcash blocklist feed. """ - - def parse_line(self, line, report): - if line.startswith('#'): - self.tempdata.append(line) - - else: - line = line.split() - event = Event(report) - event.add('time.source', line[5] + 'T' + line[6] + '+00:00') - event.add('source.ip', line[0]) - event.add('source.reverse_dns', line[2], raise_failure=False) - event.add('classification.type', 'scanner') - event.add('event_description.text', 'IPs banned for serious abusing of Bitcash services ' - '(scanning, sniffing, harvesting, dos attacks)') - event.add('raw', ','.join(line)) - - yield event - - -BOT = BitcashBlocklistParserBot diff --git a/intelmq/bots/parsers/cymru/parser_cap_program.py b/intelmq/bots/parsers/cymru/parser_cap_program.py index 67d417664..7b1b79e69 100644 --- a/intelmq/bots/parsers/cymru/parser_cap_program.py +++ b/intelmq/bots/parsers/cymru/parser_cap_program.py @@ -21,16 +21,26 @@ 'classification.identifier': 'dns-open-resolver', 'protocol.application': 'dns', }, + 'openresolver': {'classification.type': 'vulnerable service', + 'classification.identifier': 'dns-open-resolver', + 'protocol.application': 'dns', + }, 'scanner': {'classification.type': 'scanner', 'classification.identifier': 'scanner'}, 'spam': {'classification.type': 'spam', 'classification.identifier': 'spam'}, + 'conficker': {'classification.type': 'infected-system', + 'classification.identifier': 'conficker', + 'malware.name': 'conficker'}, } MAPPING_COMMENT = {'bruteforce': ('classification.identifier', 'protocol.application'), 'phishing': ('source.url', )} -PROTOCOL_MAPPING = {'6': 'tcp', # TODO: use getent in harmonization - '17': 'udp', - '1': 'icmp'} +PROTOCOL_MAPPING = { # TODO: use getent in harmonization + '1': 'icmp', + '6': 'tcp', + '11': 'nvp-ii', + '17': 'udp', +} BOGUS_HOSTNAME_PORT = re.compile('hostname: ([^:]+)port: ([0-9]+)') DESTINATION_PORT_NUMBERS_TOTAL = re.compile(r' \(total_count:\d+\)$') @@ -278,7 +288,7 @@ def parse_line_new(self, line, report): value = value.strip() if key == 'family': event['classification.identifier'] = event['malware.name'] = value.lower() - elif key == 'dest_addr': + elif key in ('dest_addr', 'destaddr'): event['destination.ip'] = value elif key in ('dest_port', 'ports_scanned', 'honeypot_port', 'darknet_port', 'destination_port_numbers'): @@ -304,7 +314,7 @@ def parse_line_new(self, line, report): event['source.port'] = port else: event['protocol.application'] = value - elif key == 'port': + elif key in ('port', 'srcport'): event['source.port'] = value else: raise ValueError('Unknown key %r in comment of category %r. Please report this.' % (key, category)) diff --git a/intelmq/bots/parsers/cymru/parser_full_bogons.py b/intelmq/bots/parsers/cymru/parser_full_bogons.py index 5bdf89e46..81947d98a 100644 --- a/intelmq/bots/parsers/cymru/parser_full_bogons.py +++ b/intelmq/bots/parsers/cymru/parser_full_bogons.py @@ -2,40 +2,39 @@ import dateutil from intelmq.lib import utils -from intelmq.lib.bot import Bot +from intelmq.lib.bot import ParserBot -class CymruFullBogonsParserBot(Bot): - - def process(self): - report = self.receive_message() +class CymruFullBogonsParserBot(ParserBot): + def parse(self, report): raw_report = utils.base64_decode(report.get("raw")).strip() if not len(raw_report): # We depend on first line = date - self.acknowledge_message() return - row = raw_report.splitlines()[0] - time_str = row[row.find('(') + 1:row.find(')')] - time = dateutil.parser.parse(time_str).isoformat() + first_row = raw_report[:raw_report.find('\n')] + time_str = first_row[first_row.find('(') + 1:first_row.find(')')] + self.last_updated = dateutil.parser.parse(time_str).isoformat() + self.tempdata.append(first_row) for row in raw_report.splitlines(): - val = row.strip() - if not len(val) or val.startswith('#') or val.startswith('//'): - continue + yield row.strip() + + def parse_line(self, val, report): + if not len(val) or val.startswith('#') or val.startswith('//'): + return - event = self.new_event(report) + event = self.new_event(report) - if not event.add('source.ip', val, raise_failure=False): - event.add('source.network', val) + if not event.add('source.ip', val, raise_failure=False): + event.add('source.network', val) - event.add('time.source', time) - event.add('classification.type', 'blacklist') - event.add('raw', row) + event.add('time.source', self.last_updated) + event.add('classification.type', 'blacklist') + event.add('raw', self.recover_line(val)) - self.send_message(event) - self.acknowledge_message() + yield event BOT = CymruFullBogonsParserBot diff --git a/intelmq/bots/parsers/fraunhofer/parser_ddosattack_cnc.py b/intelmq/bots/parsers/fraunhofer/parser_ddosattack_cnc.py deleted file mode 100644 index 8200cf699..000000000 --- a/intelmq/bots/parsers/fraunhofer/parser_ddosattack_cnc.py +++ /dev/null @@ -1,51 +0,0 @@ -# -*- coding: utf-8 -*- -""" -The source provides a stream/list of newline separated JSON objects. Each line -represents a single event observed by a DDoS C&C tracker, like an attack -command. This parser emits a c2server event for the C&C tracked server the -observed event originated from. If the bot receives a report with a known -C&C type but with an unknown message type, it generates a C&C event with a -feed.accuracy given by the parameter unknown_messagetype_accuracy, if set. -""" -import json -from builtins import ValueError, hasattr - -from intelmq.lib.bot import ParserBot - -__all__ = ['FraunhoferDdosAttackCncParserBot'] - - -class FraunhoferDdosAttackCncParserBot(ParserBot): - def parse_line(self, line, report): - feed_message = json.loads(line) - - return self.__parse_cnc_server(feed_message, line, report) - - def __parse_cnc_server(self, message, line, report): - if message['cnctype'] != 'classic_cnc': - raise ValueError('Unable to create cnc event due to ' - 'unsupported cnctype %s.' % message['cnctype']) - - event = self.__new_event(message, line, report) - event.add('classification.type', 'c2server') - event.add('classification.taxonomy', 'malicious code') - event.add('source.fqdn', message['cnc']['domain']) - event.add('source.ip', message['cnc']['ip']) - event.add('source.port', message['cnc']['port']) - - if message['messagetype'] != 'cnc_message' and hasattr(self.parameters, 'unknown_messagetype_accuracy'): - event.add('feed.accuracy', - self.parameters.unknown_messagetype_accuracy, - overwrite=True) - - return event - - def __new_event(self, message, line, report): - event = self.new_event(report) - event.add('raw', line) - event.add('malware.name', message['name']) - event.add('time.source', message['ts']) - return event - - -BOT = FraunhoferDdosAttackCncParserBot diff --git a/intelmq/bots/parsers/fraunhofer/parser_ddosattack_target.py b/intelmq/bots/parsers/fraunhofer/parser_ddosattack_target.py deleted file mode 100644 index 18a1fea57..000000000 --- a/intelmq/bots/parsers/fraunhofer/parser_ddosattack_target.py +++ /dev/null @@ -1,44 +0,0 @@ -# -*- coding: utf-8 -*- -""" -The source provides a stream/list of newline separated JSON objects. Each line -represents a single event observed by a DDoS C&C tracker, like an attack -command. This parser emits a ddos event for every target detected in the -observed event. -""" -import json -from builtins import ValueError - -from intelmq.lib.bot import ParserBot - -__all__ = ['FraunhoferDdosAttackTargetParserBot'] - - -class FraunhoferDdosAttackTargetParserBot(ParserBot): - def parse_line(self, line, report): - feed_message = json.loads(line) - - yield from self.__parse_ddos_targets(feed_message, line, report) - - def __parse_ddos_targets(self, message, line, report): - if message['messagetype'] != 'cnc_message': - raise ValueError('Unable to create ddos events due to ' - 'unsupported messagetype %s.' % message['messagetype']) - - for target_address in message['message']['targets']: - event = self.__new_event(message, line, report) - event.add('classification.type', 'ddos') - event.add('classification.taxonomy', 'availability') - if not event.add('destination.ip', target_address, raise_failure=False): - if not event.add('destination.network', target_address, raise_failure=False): - event.add('destination.fqdn', target_address) - yield event - - def __new_event(self, message, line, report): - event = self.new_event(report) - event.add('raw', line) - event.add('malware.name', message['name']) - event.add('time.source', message['ts']) - return event - - -BOT = FraunhoferDdosAttackTargetParserBot diff --git a/intelmq/bots/parsers/github_feed/REQUIREMENTS.txt b/intelmq/bots/parsers/github_feed/REQUIREMENTS.txt new file mode 100644 index 000000000..3feabebde --- /dev/null +++ b/intelmq/bots/parsers/github_feed/REQUIREMENTS.txt @@ -0,0 +1 @@ +validators \ No newline at end of file diff --git a/intelmq/tests/bots/parsers/urlvir/__init__.py b/intelmq/bots/parsers/github_feed/__init__.py similarity index 100% rename from intelmq/tests/bots/parsers/urlvir/__init__.py rename to intelmq/bots/parsers/github_feed/__init__.py diff --git a/intelmq/bots/parsers/github_feed/parser.py b/intelmq/bots/parsers/github_feed/parser.py new file mode 100644 index 000000000..f067218ff --- /dev/null +++ b/intelmq/bots/parsers/github_feed/parser.py @@ -0,0 +1,136 @@ +""" +Github IOC feeds' parser +""" +import ipaddress +import json + +try: + import validators + from validators.hashes import md5 as valid_md5, sha1 as valid_sha1, sha256 as valid_sha256 + from validators.domain import domain as valid_domain + from validators.url import url as valid_url +except ImportError: + validators = None + +from intelmq.lib.bot import Bot +from intelmq.lib.utils import base64_decode +from intelmq.lib.exceptions import MissingDependencyError + +HASH_VALIDATORS = { + 'sha1': lambda x: valid_sha1(x), + 'sha256': lambda x: valid_sha256(x), + 'md5': lambda x: valid_md5(x) +} + + +class GithubFeedParserBot(Bot): + + def init(self): + if validators is None: + raise MissingDependencyError('validators') + self.__supported_feeds = { + 'StrangerealIntel/DailyIOC': lambda logger: self.StrangerealIntelDailyIOC(logger) + } + + def process(self): + report = self.receive_message() + try: + decoded_content = json.loads(base64_decode(report['raw']).replace("'", '"')) + except json.JSONDecodeError as e: + self.logger.error("Invalid report['raw']: {}".format(e)) + self.acknowledge_message() + return + + for event in self.parse(report, decoded_content): + self.send_message(event) + self.acknowledge_message() + + def parse(self, report, json_content: dict): + event = self.new_event(report) + + # add extra metadata from report (when coming from Github API collector) + if 'extra.file_metadata' in report.keys(): + for k, v in report.get('extra.file_metadata').items(): + event.add('extra.file_metadata.' + k, v) + + for knonw_feed, feed_parser in self.__supported_feeds.items(): + if knonw_feed in report.get('feed.url'): + return feed_parser(self.logger).parse(event, json_content) + raise ValueError("Unknown feed '{}'.".format(report.get('feed.url'))) + + class StrangerealIntelDailyIOC: + def __init__(self, logger): + self.logger = logger + + def parse(self, event, json_content: dict): + """ + Parse the specific feed to sufficient fields + + :param event: output event object + :param json_content: IOC(s) in JSON format + """ + + class Next(Exception): + pass + + clean_event = event + + for ioc in json_content: + event = clean_event.copy() + event.add('raw', str(ioc)) + event.add('classification.type', 'unknown') + event.add('classification.taxonomy', 'other') + event.add('event_description.text', ioc['Description']) + + ioc_indicator = ioc['Indicator'] + + try: + for hash_type, validate_hash_func in HASH_VALIDATORS.items(): + if validate_hash_func(ioc_indicator): + yield parse_hash_indicator(event, ioc_indicator, hash_type) + raise Next + except Next: + continue + + if valid_domain(ioc_indicator): + yield parse_domain_indicator(event, ioc_indicator) + continue + + try: + ipaddress.ip_address(ioc_indicator) + yield parse_ip_indicator(event, ioc_indicator) + continue + except ValueError: + pass + + if valid_url(ioc_indicator): + yield parse_url_indicator(event, ioc_indicator) + continue + + # on default drop the event + self.logger.warning("IOC '{}' not in expected format.".format(ioc_indicator.replace('.', '[.]'))) + + +def parse_url_indicator(event, ioc_indicator: str): + event.add('source.url', ioc_indicator) + return event + + +def parse_ip_indicator(event, ioc_indicator: str): + event.add('source.ip', ioc_indicator) + return event + + +def parse_domain_indicator(event, ioc_indicator: str): + event.add('source.fqdn', ioc_indicator) + return event + + +def parse_hash_indicator(event, ioc_indicator: str, hash_type: str): + event.add('malware.hash.{}'.format(hash_type), ioc_indicator) + event.change('classification.taxonomy', 'malicious code') + event.change('classification.type', 'malware') + return event + + +BOT = GithubFeedParserBot diff --git a/intelmq/bots/parsers/hibp/parser_callback.py b/intelmq/bots/parsers/hibp/parser_callback.py index e74b6ebd3..53ff28f67 100644 --- a/intelmq/bots/parsers/hibp/parser_callback.py +++ b/intelmq/bots/parsers/hibp/parser_callback.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ There are two different Formats: Breaches and Pastes diff --git a/intelmq/bots/parsers/html_table/REQUIREMENTS.txt b/intelmq/bots/parsers/html_table/REQUIREMENTS.txt index 0aad3dc4f..6f83e94d2 100644 --- a/intelmq/bots/parsers/html_table/REQUIREMENTS.txt +++ b/intelmq/bots/parsers/html_table/REQUIREMENTS.txt @@ -1,3 +1,2 @@ beautifulsoup4 -lxml<4.4.0; python_version < '3.5' -lxml; python_version >= '3.5' +lxml diff --git a/intelmq/bots/parsers/html_table/parser.py b/intelmq/bots/parsers/html_table/parser.py index 5c05fcbc2..089f6bbd1 100644 --- a/intelmq/bots/parsers/html_table/parser.py +++ b/intelmq/bots/parsers/html_table/parser.py @@ -21,6 +21,7 @@ from intelmq.lib.bot import Bot from intelmq.lib.exceptions import InvalidArgument from intelmq.lib.harmonization import DateTime +from intelmq.lib.exceptions import MissingDependencyError try: @@ -33,7 +34,7 @@ class HTMLTableParserBot(Bot): def init(self): if bs is None: - raise ValueError("Could not import 'beautifulsoup4'. Please install it.") + raise MissingDependencyError("beautifulsoup4") self.columns = self.parameters.columns # convert columns to an array diff --git a/intelmq/bots/parsers/malwaredomains/parser.py b/intelmq/bots/parsers/malwaredomains/parser.py index dca3b7d1b..2251b8c19 100644 --- a/intelmq/bots/parsers/malwaredomains/parser.py +++ b/intelmq/bots/parsers/malwaredomains/parser.py @@ -1,4 +1,9 @@ # -*- coding: utf-8 -*- +""" +The descriptions give a hint about what the entry is about and is very mixed. +Most prominent description is "phishing", most of them are malware names. +More types could be mapped better, only the most obious ones are done currently. +""" import datetime from intelmq.lib import utils @@ -30,6 +35,15 @@ def process(self): event = self.new_event(report) event.add('source.fqdn', values[1]) + if values[2] == 'phishing': + event.add('classification.identifier', values[2]) + event.add('classification.type', 'phishing') + elif values[2] == 'C&C': + event.add('classification.identifier', values[2]) + event.add('classification.type', 'c2server') + else: + event.add('classification.identifier', values[2]) + event.add('classification.type', 'malware') event.add('event_description.text', values[2]) for i in range(4, len(values)): @@ -38,7 +52,6 @@ def process(self): values[i] + "T00:00:00+00:00", overwrite=True) break - event.add('classification.type', 'malware') event.add('raw', row) self.send_message(event) diff --git a/intelmq/bots/parsers/microsoft/parser_ctip.py b/intelmq/bots/parsers/microsoft/parser_ctip.py index a0cd5cde7..eb2d2f97c 100644 --- a/intelmq/bots/parsers/microsoft/parser_ctip.py +++ b/intelmq/bots/parsers/microsoft/parser_ctip.py @@ -3,41 +3,178 @@ Parses CTIP data in JSON format. Key indicatorexpirationdatetime is ignored, meaning is unknown. + +There are two different variants of data +1. Interflow format: JSON format, MAPPING +2. Azure format: JSON stream format, TODO + + "DataFeed": "CTIP-Infected", + "SourcedFrom": "SinkHoleMessage|SensorMessage"", + "DateTimeReceivedUtc": nt time + "DateTimeReceivedUtcTxt": human readable + "Malware": + "ThreatCode": "B67-SS-TINBA", + "ThreatConfidence": "High|Medium|Low|Informational", -> 100/50/20/10 + "TotalEncounters": 3, + "TLP": "Amber", + "SourceIp": + "SourcePort": + "DestinationIp": + "DestinationPort": + "TargetIp": Deprecated, so we gonne ignore it + "TargetPort": Deprecated, so we gonne ignore it + "SourceIpInfo": { + "SourceIpAsnNumber": + "SourceIpAsnOrgName": + "SourceIpCountryCode": + "SourceIpRegion": + "SourceIpCity" + "SourceIpPostalCode" + "SourceIpLatitude" + "SourceIpLongitude" + "SourceIpMetroCode" + "SourceIpAreaCode" + "SourceIpConnectionType" + }, + "HttpInfo": { + "HttpHost": "", + "HttpRequest": "", + "HttpMethod": "", + "HttpReferrer": "", + "HttpUserAgent": "", + "HttpVersion": "" + }, + "CustomInfo": { + "CustomField1": "", + "CustomField2": "", + "CustomField3": "", + "CustomField4": "", + "CustomField5": "" + }, + "Payload": base64 encoded json +} + """ import json +import intelmq.lib.utils as utils from intelmq.lib.bot import ParserBot +from intelmq.lib.harmonization import DateTime -MAPPING = {"additionalmetadata": "extra.additionalmetadata", - "description": "event_description.text", - "externalid": "malware.name", - "tlplevel": "tlp", - "firstreporteddatetime": "time.source", - "networksourceipv4": "source.ip", - "networksourceport": "source.port", - "networkdestinationipv4": "destination.ip", - "networkdestinationport": "destination.port", - "isproductlicensed": "extra.isproductlicensed", - "ispartnershareable": "extra.ispartnershareable", - "networksourceasn": "source.asn", - "hostname": "destination.fqdn", - "useragent": "extra.user_agent", - "severity": "extra.severity", - "tags": "extra.tags", - } +INTERFLOW = {"additionalmetadata": "extra.additionalmetadata", + "description": "event_description.text", + "externalid": "malware.name", + "tlplevel": "tlp", + "firstreporteddatetime": "time.source", + "networksourceipv4": "source.ip", + "networksourceport": "source.port", + "networkdestinationipv4": "destination.ip", + "networkdestinationport": "destination.port", + "isproductlicensed": "extra.isproductlicensed", + "ispartnershareable": "extra.ispartnershareable", + "networksourceasn": "source.asn", + "hostname": "destination.fqdn", + "useragent": "extra.user_agent", + "severity": "extra.severity", + "tags": "extra.tags", + } +AZURE = { + "DataFeed": "feed.name", + "SourcedFrom": "event_description.text", + "DateTimeReceivedUtc": "time.source", + "DateTimeReceivedUtcTxt": "__IGNORE__", + "Malware": "extra.malware", + "ThreatCode": "malware.name", + "ThreatConfidence": "feed.accuracy", + "TotalEncounters": "extra.total_encounters", + "TLP": "tlp", + "SourceIp": "source.ip", + "SourcePort": "source.port", + "DestinationIp": "destination.ip", + "DestinationPort": "destination.port", + "TargetIp": "__IGNORE__", + "TargetPort": "__IGNORE__", + "SourceIpInfo.SourceIpAsnNumber": "source.asn", + "SourceIpInfo.SourceIpAsnOrgName": "source.as_name", + "SourceIpInfo.SourceIpCountryCode": "source.geolocation.cc", + "SourceIpInfo.SourceIpRegion": "source.geolocation.region", + "SourceIpInfo.SourceIpCity": "source.geolocation.city", + "SourceIpInfo.SourceIpPostalCode": "extra.source.geolocation.postal_code", + "SourceIpInfo.SourceIpLatitude": "source.geolocation.latitude", + "SourceIpInfo.SourceIpLongitude": "source.geolocation.longitude", + "SourceIpInfo.SourceIpMetroCode": "extra.source.geolocation.metro_code", + "SourceIpInfo.SourceIpAreaCode": "extra.source.geolocation.area_code", + "SourceIpInfo.SourceIpConnectionType": "protocol.application", + "HttpInfo.HttpHost": "extra.http.host", + "HttpInfo.HttpRequest": "extra.http.request", + "HttpInfo.HttpMethod": "extra.http.method", + "HttpInfo.HttpReferrer": "extra.http.referrer", + "HttpInfo.HttpUserAgent": "extra.user_agent", + "HttpInfo.HttpVersion": "extra.http.version", + "CustomInfo.CustomField1": "extra.custom_field1", + "CustomInfo.CustomField2": "extra.custom_field2", + "CustomInfo.CustomField3": "extra.custom_field3", + "CustomInfo.CustomField4": "extra.custom_field4", + "CustomInfo.CustomField5": "extra.custom_field5", + "Payload.ts": "extra.payload.timestamp", + "Payload.ip": "extra.payload.ip", + "Payload.port": "extra.payload.port", + "Payload.serverIp": "extra.payload.server.ip", + "Payload.serverPort": "extra.payload.server.port", + "Payload.domain": "extra.payload.domain", + "Payload.family": "extra.payload.family", + "Payload.malware": "extra.payload.malware", + "Payload.response": "extra.payload.response", + "Payload.handler": "extra.payload.handler", + "Payload.type": "protocol.application", + "Payload": "extra.payload", + "Payload.Time": "extra.payload.time", + "Payload.SourceIP": "extra.payload.source.ip", + "Payload.DestIP": "extra.payload.destination.ip", + "Payload.RemotePort": "extra.payload.remote.port", + "Payload.RemoteHost": "extra.payload.remote.host", + "Payload.ServerPort": "extra.payload.server.port", + "Payload.BCode": "extra.payload.b_code", + "Payload.Protocol": "extra.payload.protocol", + "Payload.Length": "extra.payload.length", + "Payload.URI": "destination.urlpath", + "Payload.Referer": "extra.http_referer", + "Payload.UserAgent": "extra.user_agent", + "Payload.RequestMethod": "extra.http.method", + "Payload.HTTPHost": "extra.http.host", + "Payload.Custom1": "extra.payload.custom_field1", + "Payload.Custom2": "extra.payload.custom_field2", + "Payload.Custom3": "extra.payload.custom_field3", + "Payload.Custom4": "extra.payload.custom_field4", + "Payload.Custom5": "extra.payload.custom_field5", +} +CONFIDENCE = { + "High": 100, + "Medium": 50, + "Low": 20, + "Informational": 10, +} class MicrosoftCTIPParserBot(ParserBot): - parse = ParserBot.parse_json - - def recover_line(self, line: dict): - return json.dumps([line], sort_keys=True) # not applying formatting here + def parse(self, report): + raw_report = utils.base64_decode(report.get("raw")) + if raw_report.startswith('['): + self.recover_line = self.recover_line_json + yield from self.parse_json(report) + elif raw_report.startswith('{'): + self.recover_line = self.recover_line_json_stream + yield from self.parse_json_stream(report) def parse_line(self, line, report): + if line.get('version', None) == 1.5: + yield from self.parse_interflow(line, report) + else: + yield from self.parse_azure(line, report) + + def parse_interflow(self, line, report): raw = self.recover_line(line) - if line['version'] != 1.5: - raise ValueError('Data is in unknown format %r, only version 1.5 is supported.' % line['version']) if line['indicatorthreattype'] != 'Botnet': raise ValueError('Unknown indicatorthreattype %r, only Botnet is supported.' % line['indicatorthreattype']) if 'additionalmetadata' in line and line['additionalmetadata'] in [[], [''], ['null'], [None]]: @@ -62,7 +199,7 @@ def parse_line(self, line, report): since 2019-03-14, reported upstream, IP addresses are always the same """ value = value[:value.find(',')] - event[MAPPING[key]] = value + event[INTERFLOW[key]] = value event.add('feed.accuracy', event.get('feed.accuracy', 100) * line['confidence'] / 100, overwrite=True) @@ -70,5 +207,43 @@ def parse_line(self, line, report): event.add('raw', raw) yield event + def parse_azure(self, line, report): + raw = self.recover_line(line) + + event = self.new_event(report) + + for key, value in line.copy().items(): + if key == 'Payload': + if value == 'AA==': # NULL + del line[key] + continue + try: + value = json.loads(utils.base64_decode(value)) + # continue unpacking in next loop + except json.decoder.JSONDecodeError: + line[key] = utils.base64_decode(value) + if isinstance(value, dict): + for subkey, subvalue in value.items(): + line['%s.%s' % (key, subkey)] = subvalue + del line[key] + for key, value in line.items(): + if key == 'ThreatConfidence': + if value == 'None': + continue + value = event.get('feed.accuracy', 100) * CONFIDENCE[value] / 100 + elif key == 'DateTimeReceivedUtc': + value = DateTime.from_windows_nt(value) + elif key == 'Payload.ts': + value = DateTime.from_timestamp(value) + elif key == 'Payload.Protocol': + event.add('protocol.application', value[:value.find('/')]) # "HTTP/1.1", save additionally + elif not value: + continue + if AZURE[key] != '__IGNORE__': + event.add(AZURE[key], value, overwrite=True) + event.add('classification.type', 'infected-system') + event.add('raw', raw) + yield event + BOT = MicrosoftCTIPParserBot diff --git a/intelmq/bots/parsers/n6/parser_n6stomp.py b/intelmq/bots/parsers/n6/parser_n6stomp.py index a070dacfe..b58d961bc 100644 --- a/intelmq/bots/parsers/n6/parser_n6stomp.py +++ b/intelmq/bots/parsers/n6/parser_n6stomp.py @@ -3,12 +3,13 @@ The source provides a JSON file with a dictionary. The keys of this dict are identifiers and the values are lists of domains. """ - import json +import re from intelmq.lib import utils from intelmq.lib.bot import Bot from intelmq.lib.harmonization import DateTime +from intelmq.lib.exceptions import InvalidValue __all__ = ['N6StompParserBot'] mapping = {} @@ -112,12 +113,13 @@ def process(self): event.add("extra.expires", DateTime.sanitize(dict_report["expires"])) if "source" in dict_report: event.add("extra.feed_source", dict_report["source"]) - if ("category" in dict_report and "name" in dict_report and - dict_report["category"] == 'bots'): - event.add("malware.name", dict_report["name"]) - - if ("name" in dict_report): + if "name" in dict_report: mapping['bots']['identifier'] = dict_report["name"] + try: + event.add("malware.name", dict_report["name"]) + except InvalidValue: + event.add("malware.name", re.sub("[^ -~]", '', dict_report["name"])) + event.add("event_description.text", dict_report["name"]) else: mapping['bots']['identifier'] = "malware-generic" diff --git a/intelmq/bots/parsers/nothink/parser.py b/intelmq/bots/parsers/nothink/parser.py deleted file mode 100644 index a112f8e9a..000000000 --- a/intelmq/bots/parsers/nothink/parser.py +++ /dev/null @@ -1,74 +0,0 @@ -# -*- coding: utf-8 -*- -""" IntelMQ parser for Nothink feeds """ - -import dateutil.parser - -from intelmq.lib.bot import ParserBot - - -class NothinkParserBot(ParserBot): - lastgenerated = None - - SOURCE_FEEDS = {'http://www.nothink.org/blacklist/blacklist_snmp_day.txt': 'snmp', - 'http://www.nothink.org/blacklist/blacklist_snmp_week.txt': 'snmp', - 'http://www.nothink.org/blacklist/blacklist_snmp_year.txt': 'snmp', - 'http://www.nothink.org/blacklist/blacklist_ssh_day.txt': 'ssh', - 'http://www.nothink.org/blacklist/blacklist_ssh_week.txt': 'ssh', - 'http://www.nothink.org/blacklist/blacklist_ssh_year.txt': 'ssh', - 'http://www.nothink.org/blacklist/blacklist_telnet_day.txt': 'telnet', - 'http://www.nothink.org/blacklist/blacklist_telnet_week.txt': 'telnet', - 'http://www.nothink.org/blacklist/blacklist_telnet_year.txt': 'telnet', - 'http://www.nothink.org/honeypot_dns_attacks.txt': 'dns' - } - - BLACKLIST_FEED = {'http://www.nothink.org/blacklist/blacklist_snmp_day.txt', - 'http://www.nothink.org/blacklist/blacklist_snmp_week.txt', - 'http://www.nothink.org/blacklist/blacklist_snmp_year.txt', - 'http://www.nothink.org/blacklist/blacklist_ssh_day.txt', - 'http://www.nothink.org/blacklist/blacklist_ssh_week.txt', - 'http://www.nothink.org/blacklist/blacklist_ssh_year.txt', - 'http://www.nothink.org/blacklist/blacklist_telnet_day.txt', - 'http://www.nothink.org/blacklist/blacklist_telnet_week.txt', - 'http://www.nothink.org/blacklist/blacklist_telnet_year.txt' - } - - DNS_FEED = {'http://www.nothink.org/honeypot_dns_attacks.txt'} - - def parse_line(self, line, report): - if line.startswith('#') or len(line) == 0: - self.tempdata.append(line) - if 'Generated' in line: - self.lastgenerated = line.strip('# Generated ')[:19] - self.lastgenerated = dateutil.parser.parse(self.lastgenerated + '+00:00').isoformat() - - else: - event = self.new_event(report) - event.add('raw', line) - if report['feed.url'] in NothinkParserBot.BLACKLIST_FEED: - event.add('time.source', self.lastgenerated) - event.add('source.ip', line) - event.add('classification.type', 'scanner') - event.add('protocol.application', NothinkParserBot.SOURCE_FEEDS[report['feed.url']]) - - elif report['feed.url'] in NothinkParserBot.DNS_FEED: - value = line.strip('"').split('","') - event.add('time.source', dateutil.parser.parse(value[0] + '+00:00').isoformat()) - event.add('source.ip', value[1]) - event.add('source.asn', value[2]) - event.add('source.as_name', value[3]) - if value[4] not in ['.', 'n/a', 'bka']: - event.add('source.reverse_dns', value[4]) - if value[5] != 'UNK': - event.add('source.geolocation.cc', value[5]) - event.add('protocol.application', NothinkParserBot.SOURCE_FEEDS[report['feed.url']]) - event.add('classification.type', 'ddos') - event.add('event_description.text', 'On time.source the source.ip was seen' - ' performing DNS amplification attacks against honeypots') - - else: - raise ValueError('Unknown data feed %s.' % report['feed.url']) - - yield event - - -BOT = NothinkParserBot diff --git a/intelmq/bots/parsers/shadowserver/config.py b/intelmq/bots/parsers/shadowserver/config.py index 6a6e4a6f0..da969bb53 100644 --- a/intelmq/bots/parsers/shadowserver/config.py +++ b/intelmq/bots/parsers/shadowserver/config.py @@ -1486,6 +1486,10 @@ def convert_date(value): ('extra.', 'naics', invalidate_zero), ('extra.', 'sic', invalidate_zero), ('extra.', 'sector', validate_to_none), + ('extra.', 'tlsv13_support', validate_to_none), # always empty so far + ('extra.', 'tlsv13_cipher', validate_to_none), # always empty so far + ('extra.', 'cve20190708_vulnerable', convert_bool), + ('extra.', 'bluekeep_vulnerable', convert_bool), ], 'constant_fields': { 'classification.taxonomy': 'vulnerable', @@ -2170,6 +2174,76 @@ def convert_date(value): } } +# https://www.shadowserver.org/what-we-do/network-reporting/open-mqtt-report/ +open_mqtt = { + 'required_fields': [ + ('time.source', 'timestamp', add_UTC_to_timestamp), + ('source.ip', 'ip'), + ('source.port', 'port'), + ], + 'optional_fields': [ + ('protocol.transport', 'protocol'), + ('source.reverse_dns', 'hostname'), + # ('classification.identifier', 'tag'), # always set to 'open-mqtt' in constant_fields + ('source.asn', 'asn'), + ('source.geolocation.cc', 'geo'), + ('source.geolocation.region', 'region'), + ('source.geolocation.city', 'city'), + ('extra.', 'naics', invalidate_zero), + ('extra.', 'sic', invalidate_zero), + ('extra.', 'anonymous_access', convert_bool), + ('extra.', 'raw_response', validate_to_none), + ('extra.', 'hex_code', validate_to_none), + ('extra.', 'code', validate_to_none) + ], + 'constant_fields': { + 'classification.taxonomy': 'vulnerable', + 'classification.type': 'vulnerable service', + 'classification.identifier': 'open-mqtt', + 'protocol.application': 'mqtt', + } +} + +# https://www.shadowserver.org/what-we-do/network-reporting/open-ipp-report/ +open_ipp = { + 'required_fields': [ + ('time.source', 'timestamp', add_UTC_to_timestamp), + ('source.ip', 'ip'), + ('source.port', 'port'), + ], + 'optional_fields': [ + ('protocol.transport', 'protocol'), + ('source.reverse_dns', 'hostname'), + # ('classification.identifier', 'tag'), # always set to 'open-ipp' in constant_fields + ('source.asn', 'asn'), + ('source.geolocation.cc', 'geo'), + ('source.geolocation.region', 'region'), + ('source.geolocation.city', 'city'), + ('extra.', 'naics', invalidate_zero), + ('extra.', 'sic', invalidate_zero), + ('extra.', 'ipp_version', validate_to_none), + ('extra.', 'cups_version', validate_to_none), + ('extra.', 'printer_uris', validate_to_none), + ('extra.', 'printer_name', validate_to_none), + ('extra.', 'printer_info', validate_to_none), + ('extra.', 'printer_more_info', validate_to_none), + ('extra.', 'printer_make_and_model', validate_to_none), + ('extra.', 'printer_firmware_name', validate_to_none), + ('extra.', 'printer_firmware_string_version', validate_to_none), + ('extra.', 'printer_firmware_version', validate_to_none), + ('extra.', 'printer_organization', validate_to_none), + ('extra.', 'printer_organization_unit', validate_to_none), + ('extra.', 'printer_uuid', validate_to_none), + ('extra.', 'printer_wifi_ssid', validate_to_none) + ], + 'constant_fields': { + 'classification.taxonomy': 'vulnerable', + 'classification.type': 'vulnerable service', + 'classification.identifier': 'open-ipp', + 'protocol.application': 'ipp', + } +} + mapping = ( # feed name, file name, function ('Accessible-ADB', 'scan_adb', accessible_adb), @@ -2202,8 +2276,10 @@ def convert_date(value): ('Open-DB2-Discovery-Service', 'scan_db2', open_db2_discovery_service), ('Open-Elasticsearch', 'scan_elasticsearch', open_elasticsearch), ('Open-IPMI', 'scan_ipmi', open_ipmi), + ('Open-IPP', 'scan_ipp', open_ipp), ('Open-LDAP', 'scan_ldap', open_ldap), ('Open-LDAP-TCP', 'scan_ldap_tcp', open_ldap), + ('Open-MQTT', 'scan_mqtt', open_mqtt), ('Open-MSSQL', 'scan_mssql', open_mssql), ('Open-Memcached', 'scan_memcached', open_memcached), ('Open-MongoDB', 'scan_mongodb', open_mongodb), diff --git a/intelmq/bots/parsers/shadowserver/parser.py b/intelmq/bots/parsers/shadowserver/parser.py index 5418d5256..22ddd3043 100644 --- a/intelmq/bots/parsers/shadowserver/parser.py +++ b/intelmq/bots/parsers/shadowserver/parser.py @@ -29,20 +29,25 @@ class ShadowserverParserBot(ParserBot): recover_line = ParserBot.recover_line_csv_dict csv_params = {'dialect': 'unix'} - __is_filename_regex = re.compile(r'\d{4}-\d{2}-\d{2}-(\w+)(-\w+)*\.csv') + __is_filename_regex = re.compile(r'^(?:\d{4}-\d{2}-\d{2}-)?(\w+)(-\w+)*\.csv$') sparser_config = None feedname = None + mode = None def init(self): - if hasattr(self.parameters, 'feedname'): + if getattr(self.parameters, 'feedname', None): self.feedname = self.parameters.feedname self.sparser_config = config.get_feed_by_feedname(self.feedname) if self.sparser_config: self.logger.info('Using fixed feed name %r for parsing reports.' % self.feedname) + self.mode = 'fixed' else: self.logger.info('Could not determine the feed by the feed name %r given by parameter. ' 'Will determine the feed from the file names.', self.feedname) + self.mode = 'detect' + else: + self.mode = 'detect' # Set a switch if the parser shall reset the feed.name, # for this event @@ -52,11 +57,16 @@ def init(self): self.overwrite = True def parse(self, report): - if self.sparser_config and hasattr(self.parameters, 'feedname'): + if self.mode == 'fixed': return self.parse_csv_dict(report) # Set config to parse report self.report_name = report.get('extra.file_name') + if not self.report_name: + raise ValueError("No feedname given as parameter and the " + "processed report has no 'extra.file_name'. " + "Ensure that at least one is given. " + "Also have a look at the documentation of the bot.") filename_search = self.__is_filename_regex.search(self.report_name) if not filename_search: diff --git a/intelmq/bots/parsers/taichung/parser.py b/intelmq/bots/parsers/taichung/parser.py index f3cbab388..dc88d2416 100644 --- a/intelmq/bots/parsers/taichung/parser.py +++ b/intelmq/bots/parsers/taichung/parser.py @@ -1,21 +1,35 @@ # -*- coding: utf-8 -*- - +""" +unmapped: + : bing says "Over-the-line", +""" import re +import warnings from intelmq.lib import utils -from intelmq.lib.bot import Bot +from intelmq.lib.bot import ParserBot CLASSIFICATION = { - "brute-force": ["brute-force", "brute force", "mysql"], - "c2server": ["c&c server"], + "brute-force": ["brute-force", "brute force", "mysql", + "mssql 密碼猜測攻擊", # Password Guess Attack + "office 365 attack", "sip attack", "ssh attack", + "ssh密碼猜測攻擊", # Password Guess Attack + ], + "c2server": ["c&c server", "attack controller"], "infected-system": ["irc-botnet"], - "malware": ["malware provider", "malware website", '\u60e1\u610f', "worm"], + "malware": ["malware provider", "malware website", '\u60e1\u610f', "worm", "malware proxy"], "scanner": ["scan"], "exploit": ["bash", "php-cgi", "phpmyadmin"], + "ddos": ["ddos"], + "application-compromise": ["injection"], # apache vulns, sql + "ids-alert": ["backdoor"], # ids-alert is exploitation of known vulnerability + "dos": ["dns", "dos", # must be after ddos + "超量連線", # google: "Excess connection" + ], } -class TaichungCityNetflowParserBot(Bot): +class TaichungNetflowRecentParserBot(ParserBot): def get_type(self, value): value = value.lower() @@ -23,41 +37,45 @@ def get_type(self, value): for keyword in keywords: if keyword in value: return event_type + warnings.warn("Unknown classification: %r. Please report this as bug." + "" % value) return "unknown" - def process(self): - report = self.receive_message() - + def parse(self, report): raw_report = utils.base64_decode(report.get("raw")) for row in raw_report.split(''): + yield row - # Get IP and Type - info1 = re.search( - r">[\ ]*(\d+\.\d+\.\d+\.\d+)[\ ]*<.*([^<]+)", row) + def parse_line(self, row, report): + # Get IP Address and Type + info1 = re.search( + r">[\ ]*(\d+\.\d+\.\d+\.\d+)[\ ]*<.*([^<]+)", row) - if not info1: - continue + if not info1: + return # abort if no IP address found - # Get Timestamp - info2 = re.search( - r"[\ ]*(\d{4}-\d{2}-\d{2}\ \d{2}:\d{2}:\d{2})[\ ]*", - row) + # Get Timestamp + info2 = re.search( + r"[\ ]*(\d{4}-\d{2}-\d{2}\ \d{2}:\d{2}:\d{2})[\ ]*", + row) + cc_search = re.search(r'([a-z]+).gif"', row) - event = self.new_event(report) + event = self.new_event(report) - description = info1.group(2) - description = utils.decode(description) - event_type = self.get_type(description) - time_source = info2.group(1) + " UTC-8" + description = info1.group(2) + event_type = self.get_type(description) # without decoding here, b/c of the unicode signs + description = utils.decode(description) + time_source = info2.group(1) + " UTC-8" - event.add("time.source", time_source) - event.add("source.ip", info1.group(1)) - event.add('classification.type', event_type) - event.add('event_description.text', description) - event.add("raw", row) + event.add("time.source", time_source) + event.add("source.ip", info1.group(1)) + event.add('classification.type', event_type) + event.add('event_description.text', description) + if cc_search: + event.add('source.geolocation.cc', cc_search.group(1)) + event.add("raw", row) - self.send_message(event) - self.acknowledge_message() + yield event -BOT = TaichungCityNetflowParserBot +BOT = TaichungNetflowRecentParserBot diff --git a/intelmq/bots/parsers/twitter/parser.py b/intelmq/bots/parsers/twitter/parser.py index 3b9eaeea2..ba9cb5fdf 100644 --- a/intelmq/bots/parsers/twitter/parser.py +++ b/intelmq/bots/parsers/twitter/parser.py @@ -24,6 +24,7 @@ from intelmq.lib.bot import Bot, utils from intelmq.lib.exceptions import InvalidArgument from intelmq.lib.harmonization import ClassificationType +from intelmq.lib.exceptions import MissingDependencyError try: from url_normalize import url_normalize @@ -42,13 +43,13 @@ class TwitterParserBot(Bot): def init(self): if url_normalize is None: - raise ValueError("Could not import 'url-normalize'. Please install it.") + raise MissingDependencyError("url-normalize") url_version = pkg_resources.get_distribution("url-normalize").version if tuple(int(v) for v in url_version.split('.')) < (1, 4, 1) and hasattr(self.parameters, 'default_scheme'): raise ValueError("Parameter 'default_scheme' given but 'url-normalize' version %r does not support it. " "Get at least version '1.4.1'." % url_version) if get_tld is None: - raise ValueError("Could not import 'tld'. Please install it.") + raise MissingDependencyError("tld") try: update_tld_names() except tld.exceptions.TldIOError: diff --git a/intelmq/bots/parsers/urlvir/parser.py b/intelmq/bots/parsers/urlvir/parser.py deleted file mode 100644 index a4425d23e..000000000 --- a/intelmq/bots/parsers/urlvir/parser.py +++ /dev/null @@ -1,49 +0,0 @@ -# -*- coding: utf-8 -*- -""" IntelMQ parser for URLVIR feeds """ - -import dateutil.parser - -from intelmq.lib.bot import ParserBot - - -class URLVirParserBot(ParserBot): - - IP_FEED = {'http://www.urlvir.com/export-ip-addresses/'} - HOST_FEED = {'http://www.urlvir.com/export-hosts/'} - - def parse_line(self, line, report): - if line.startswith('#') or len(line) == 0: - self.tempdata.append(line) - if '#Updated on' in line: - self.lastgenerated = line.strip('#Updated on ') - self.lastgenerated = dateutil.parser.parse(self.lastgenerated + ' -04:00').isoformat() - - else: - event = self.new_event(report) - value = line.strip() - if self.lastgenerated: - event.add('time.source', self.lastgenerated) - event.add('raw', line) - event.add('classification.type', 'malware') - - if report['feed.url'] in URLVirParserBot.IP_FEED: - event.add('source.ip', value) - event.add('event_description.text', 'Active Malicious IP Addresses Hosting Malware') - event.add('event_description.url', 'http://www.urlvir.com/search-ip-address/' + value + '/') - - elif report['feed.url'] in URLVirParserBot.HOST_FEED: - if event.is_valid('source.ip', value): - event.add('source.ip', value) - event.add('event_description.url', 'http://www.urlvir.com/search-ip-address/' + value + '/') - else: - event.add('source.fqdn', value) - event.add('event_description.url', 'http://www.urlvir.com/search-host/' + value + '/') - event.add('event_description.text', 'Active Malicious Hosts') - - else: - raise ValueError('Unknown data feed %s.' % report['feed.url']) - - yield event - - -BOT = URLVirParserBot diff --git a/intelmq/etc/feeds.yaml b/intelmq/etc/feeds.yaml index c9baf844d..5e494b219 100644 --- a/intelmq/etc/feeds.yaml +++ b/intelmq/etc/feeds.yaml @@ -8,7 +8,7 @@ providers: collector: module: intelmq.bots.collectors.http.collector_http parameters: - http_url: http://tracker.viriback.com/ + http_url: http://tracker.viriback.com/ rate_limit: 86400 name: __FEED__ provider: __PROVIDER__ @@ -19,9 +19,9 @@ providers: type: malware time_format: from_format_midnight|%d-%m-%Y html_parser: lxml - revision: 27-06-2018 - status: on - documentation: + revision: 2018-06-27 + documentation: https://viriback.com/ + public: yes WebInspektor: Unsafe sites: description: Latest detected unsafe sites. @@ -37,9 +37,9 @@ providers: parser: module: intelmq.bots.parsers.webinspektor.parser parameters: - revision: 09-03-2018 - status: on + revision: 2018-03-09 documentation: + public: yes Sucuri: Hidden IFrames: description: Latest hidden iframes identified on compromised web sites. @@ -55,9 +55,9 @@ providers: parser: module: intelmq.bots.parsers.sucuri.parser parameters: - revision: 28-01-2018 - status: on - documentation: + revision: 2018-01-28 + documentation: http://labs.sucuri.net/?malware + public: yes Surbl: Malicious Domains: description: Detected malicious domains. Note that you have to opened up Sponsored Datafeed Service (SDS) access to the SURBL data via rsync for your IP address. @@ -71,9 +71,9 @@ providers: parser: module: intelmq.bots.parsers.surbl.parser parameters: - revision: 04-09-2018 - status: on + revision: 2018-09-04 documentation: + public: no MalwarePatrol: DansGuardian: description: Malware block list with URLs @@ -89,9 +89,9 @@ providers: parser: module: intelmq.bots.parsers.malwarepatrol.parser_dansguardian parameters: - revision: 20-01-2018 - status: on - documentation: + revision: 2018-01-20 + documentation: https://www.malwarepatrol.net/ + public: no Malware Domains: Malicious: description: Malware Prevention through Domain Blocking (Black Hole DNS Sinkhole) @@ -107,28 +107,9 @@ providers: parser: module: intelmq.bots.parsers.malwaredomains.parser parameters: - revision: 20-01-2018 - status: on - documentation: - Bitcash: - Banned IPs: - description: IPs banned for serious abusing of our services (scanning, sniffing, - harvesting, dos attacks). - additional_information: - bots: - collector: - module: intelmq.bots.collectors.http.collector_http - parameters: - http_url: https://bitcash.cz/misc/log/blacklist - rate_limit: 3600 - name: __FEED__ - provider: __PROVIDER__ - parser: - module: intelmq.bots.parsers.bitcash.parser - parameters: - revision: 20-01-2018 - status: on - documentation: + revision: 2018-01-20 + documentation: http://www.malwaredomains.com/ + public: yes ZoneH: Defacements: description: all the information contained in Zone-H's cybercrime archive were @@ -146,7 +127,7 @@ providers: sent_from: datazh@zone-h.org folder: INBOX subject_regex: Report - attach_unzip: false + extract_files: false attach_regex: csv rate_limit: 3600 name: __FEED__ @@ -154,11 +135,11 @@ providers: parser: module: intelmq.bots.parsers.zoneh.parser parameters: - revision: 20-01-2018 - status: on - documentation: + revision: 2018-01-20 + documentation: https://zone-h.org/ + public: no OpenPhish: - Phishing: + Public feed: description: OpenPhish is a fully automated self-contained platform for phishing intelligence. It identifies phishing sites and performs intelligence analysis in real time without human intervention and without using any external resources, @@ -175,16 +156,15 @@ providers: parser: module: intelmq.bots.parsers.openphish.parser parameters: - revision: 20-01-2018 - status: on - documentation: - OpenPhish Commercial: - Phishing: + revision: 2018-01-20 + documentation: https://www.openphish.com/ + public: yes + Premium Feed: description: OpenPhish is a fully automated self-contained platform for phishing intelligence. It identifies phishing sites and performs intelligence analysis in real time without human intervention and without using any external resources, such as blacklists. - additional_information: + additional_information: Discounts available for Government and National CERTs a well as for Nonprofit and Not-for-Profit organizations. bots: collector: module: intelmq.bots.collectors.http.collector_http @@ -198,131 +178,13 @@ providers: parser: module: intelmq.bots.parsers.openphish.parser_commercial parameters: - revision: 06-02-2018 - status: on - documentation: - Nothink: - SNMP: - description: 'There are a number of feeds you can use to depend on how far back - you would like to go. The time.source will still be the date and time the - feed was generated at nothink. This feed provides IP addresses of systems - that have connected to a honeypot via SNMP in the last 24 hours. reference: - http://www.nothink.org/honeypot_snmp.php' - additional_information: - bots: - collector: - module: intelmq.bots.collectors.http.collector_http - parameters: - http_url: http://www.nothink.org/blacklist/blacklist_snmp_day.txt - rate_limit: 86400 - name: __FEED__ - provider: __PROVIDER__ - parser: - module: intelmq.bots.parsers.nothink.parser - parameters: - revision: 20-01-2018 - status: on - documentation: - SSH: - description: 'There are a number of feeds you can use to depend on how far back - you would like to go. The time.source will still be the date and time the - feed was generated at nothink. This feed provides IP addresses of systems - that have connected to a honeypot via SSH in the last 24 hours. Reference: - http://www.nothink.org/honeypots.php' - additional_information: - bots: - collector: - module: intelmq.bots.collectors.http.collector_http - parameters: - http_url: http://www.nothink.org/blacklist/blacklist_ssh_day.txt - rate_limit: 86400 - name: __FEED__ - provider: __PROVIDER__ - parser: - module: intelmq.bots.parsers.nothink.parser - parameters: - revision: 20-01-2018 - status: on - documentation: - DNS Attack: - description: 'This feed provides attack information for attack information against - DNS honeypots. reference: http://www.nothink.org/honeypot_dns.php .' - additional_information: - bots: - collector: - module: intelmq.bots.collectors.http.collector_http - parameters: - http_url: http://www.nothink.org/honeypot_dns_attacks.txt - rate_limit: 3600 - name: __FEED__ - provider: __PROVIDER__ - parser: - module: intelmq.bots.parsers.nothink.parser - parameters: - revision: 20-01-2018 - status: on - documentation: - Telnet: - description: 'There are a number of feeds you can use to depend on how far back - you would like to go. The time.source will still be the date and time the - feed was generated at nothink. This feed provides IP addresses of systems - that have connected to a honeypot via Telnet in the last 24 hours. reference: - http://www.nothink.org/honeypots.php' - additional_information: - bots: - collector: - module: intelmq.bots.collectors.http.collector_http - parameters: - http_url: http://www.nothink.org/blacklist/blacklist_telnet_day.txt - rate_limit: 86400 - name: __FEED__ - provider: __PROVIDER__ - parser: - module: intelmq.bots.parsers.nothink.parser - parameters: - revision: 20-01-2018 - status: on - documentation: - URLVir: - Hosts: - description: This feed provides FQDN's or IP addresses for Active Malicious - Hosts. - additional_information: - bots: - collector: - module: intelmq.bots.collectors.http.collector_http - parameters: - http_url: http://www.urlvir.com/export-hosts/ - rate_limit: 129600 - name: __FEED__ - provider: __PROVIDER__ - parser: - module: intelmq.bots.parsers.urlvir.parser - parameters: - revision: 20-01-2018 - status: on - documentation: - IPs: - description: This feed provides IP addresses hosting Malware. - additional_information: - bots: - collector: - module: intelmq.bots.collectors.http.collector_http - parameters: - http_url: http://www.urlvir.com/export-ip-addresses/ - rate_limit: 129600 - name: __FEED__ - provider: __PROVIDER__ - parser: - module: intelmq.bots.parsers.urlvir.parser - parameters: - revision: 20-01-2018 - status: on - documentation: + revision: 2018-02-06 + documentation: https://www.openphish.com/phishing_feeds.html + public: no Netlab 360: Mirai Scanner: description: 'This feed provides IP addresses which actively scan for vulnerable - IoT devices and install Mirai Botnet. reference: http://data.netlab.360.com/mirai-scanner/' + IoT devices and install Mirai Botnet.' additional_information: bots: collector: @@ -335,13 +197,13 @@ providers: parser: module: intelmq.bots.parsers.netlab_360.parser parameters: - revision: 20-01-2018 - status: on - documentation: + revision: 2018-01-20 + documentation: http://data.netlab.360.com/mirai-scanner/ + public: yes Magnitude EK: description: 'This feed lists FQDN and possibly the URL used by Magnitude Exploit Kit. Information also includes the IP address used for the domain and last - time seen. reference: http://data.netlab.360.com/ek' + time seen.' additional_information: bots: collector: @@ -354,12 +216,12 @@ providers: parser: module: intelmq.bots.parsers.netlab_360.parser parameters: - revision: 20-01-2018 - status: on - documentation: + revision: 2018-01-20 + documentation: http://data.netlab.360.com/ek + public: yes DGA: description: 'This feed lists DGA family, Domain, Start and end of valid time(UTC) - of a number of DGA families. reference: http://data.netlab.360.com/dga' + of a number of DGA families.' additional_information: bots: collector: @@ -372,9 +234,9 @@ providers: parser: module: intelmq.bots.parsers.netlab_360.parser parameters: - revision: 20-01-2018 - status: on - documentation: + revision: 2018-01-20 + documentation: http://data.netlab.360.com/dga + public: yes Hajime Scanner: description: 'This feed lists IP address for know Hajime bots network. These IPs data are obtained by joining the DHT network and interacting with the Hajime node' additional_information: @@ -389,69 +251,10 @@ providers: parser: module: intelmq.bots.parsers.netlab_360.parser parameters: - revision: 01-08-2019 - status: on - documentation: + revision: 2019-08-01 + documentation: https://data.netlab.360.com/hajime/ + public: yes Abuse.ch: - Zeus Tracker IPs: - description: This list only includes IPv4 addresses that are used by the ZeuS - Trojan. It is the recommended list if you want to block only ZeuS IPs. It - excludes IP addresses that ZeuS Tracker believes to be hijacked (level 2) - or belong to a free web hosting provider (level 3). Hence the false positive - rate should be much lower compared to the standard ZeuS IP blocklist. - additional_information: - bots: - collector: - module: intelmq.bots.collectors.http.collector_http - parameters: - http_url: https://zeustracker.abuse.ch/blocklist.php?download=badips - rate_limit: 129600 - name: __FEED__ - provider: __PROVIDER__ - parser: - module: intelmq.bots.parsers.abusech.parser_ip - parameters: - revision: 20-01-2018 - status: off - documentation: - Ransomware Tracker: - description: Ransomware Tracker feed includes FQDN's, URL's, and known IP addresses - that were used for said FQDN's and URL's for various ransomware families. - additional_information: - bots: - collector: - module: intelmq.bots.collectors.http.collector_http - parameters: - http_url: https://ransomwaretracker.abuse.ch/feeds/csv/ - rate_limit: 129600 - name: __FEED__ - provider: __PROVIDER__ - parser: - module: intelmq.bots.parsers.abusech.parser_ransomware - parameters: - revision: 20-01-2018 - status: on - documentation: - Zeus Tracker Domains: - description: The ZeuS domain blocklist (BadDomains) is the recommended blocklist - if you want to block only ZeuS domain names. It has domain names that ZeuS - Tracker believes to be hijacked (level 2). Hence the false positive rate should - be much lower compared to the standard ZeuS domain blocklist. - additional_information: - bots: - collector: - module: intelmq.bots.collectors.http.collector_http - parameters: - http_url: https://zeustracker.abuse.ch/blocklist.php?download=baddomains - rate_limit: 129600 - name: __FEED__ - provider: __PROVIDER__ - parser: - module: intelmq.bots.parsers.abusech.parser_domain - parameters: - revision: 20-01-2018 - status: off - documentation: Feodo Tracker IPs: description: 'List of botnet Command&Control servers (C&Cs) tracked by Feodo Tracker, associated with Dridex and Emotet (aka Heodo).' @@ -468,9 +271,9 @@ providers: parser: module: intelmq.bots.parsers.abusech.parser_ip parameters: - revision: 25-03-2019 - status: on - documentation: + revision: 2019-03-25 + documentation: https://feodotracker.abuse.ch/ + public: yes URLhaus: description: URLhaus is a project from abuse.ch with the goal of sharing malicious URLs that are being used for malware distribution. URLhaus offers a country, ASN @@ -494,9 +297,9 @@ providers: default_url_protocol: http:// type_translation: '{"malware_download": "malware-distribution"}' columns: time.source,source.url,status,extra.urlhaus.threat_type,source.fqdn,source.ip,source.asn,source.geolocation.cc - revision: 14-02-2019 - status: on - documentation: + revision: 2019-02-14 + documentation: https://urlhaus.abuse.ch/ + public: yes Feodo Tracker Browse: description: '' additional_information: @@ -515,9 +318,9 @@ providers: ignore_values: ",,,,Not listed,," skip_table_head: True type: c2server - revision: 19-03-2019 - status: on - documentation: + revision: 2019-03-19 + documentation: https://feodotracker.abuse.ch/browse + public: yes Blueliv: CrimeServer: description: Blueliv Crimeserver Collector is the bot responsible to get the @@ -548,9 +351,9 @@ providers: parser: module: intelmq.bots.parsers.blueliv.parser_crimeserver parameters: - revision: 20-01-2018 - status: on - documentation: + revision: 2018-01-20 + documentation: https://www.blueliv.com/ + public: no Team Cymru: CAP: description: Team Cymru provides daily lists of compromised or abused devices @@ -577,10 +380,10 @@ providers: parser: module: intelmq.bots.parsers.cymru.parser_cap_program parameters: - revision: 20-01-2018 - status: on - documentation: - Full Bogons: + revision: 2018-01-20 + documentation: https://www.team-cymru.com/CSIRT-AP.html https://www.cymru.com/$certname/report_info.txt + public: no + Full Bogons IPv4: description: Fullbogons are a larger set which also includes IP space that has been allocated to an RIR, but not assigned by that RIR to an actual ISP or other end-user. IANA maintains a convenient IPv4 summary page listing allocated @@ -600,28 +403,49 @@ providers: parser: module: intelmq.bots.parsers.cymru.parser_full_bogons parameters: - revision: 20-01-2018 - status: on - documentation: + revision: 2018-01-20 + documentation: https://www.team-cymru.com/bogon-reference-http.html + public: yes + Full Bogons IPv6: + description: Fullbogons are a larger set which also includes IP space that has + been allocated to an RIR, but not assigned by that RIR to an actual ISP or + other end-user. IANA maintains a convenient IPv4 summary page listing allocated + and reserved netblocks, and each RIR maintains a list of all prefixes that + they have assigned to end-users. Our bogon reference pages include additional + links and resources to assist those who wish to properly filter bogon prefixes + within their networks. + additional_information: + bots: + collector: + module: intelmq.bots.collectors.http.collector_http + parameters: + http_url: https://www.team-cymru.org/Services/Bogons/fullbogons-ipv6.txt + rate_limit: 129600 + name: __FEED__ + provider: __PROVIDER__ + parser: + module: intelmq.bots.parsers.cymru.parser_full_bogons + parameters: + revision: 2018-01-20 + documentation: https://www.team-cymru.com/bogon-reference-http.html + public: yes Taichung: - Netflow: - description: Abnormal flows detected. + Netflow Recent: + description: "Abnormal flows detected: Attacking (DoS, Brute-Force, Scanners) and malicious hosts (C&C servers, hosting malware)" additional_information: bots: collector: module: intelmq.bots.collectors.http.collector_http parameters: - http_url: https://www.tc.edu.tw/net/netflow/lkout/recent/30 + http_url: https://www.tc.edu.tw/net/netflow/lkout/recent/ rate_limit: 3600 name: __FEED__ provider: __PROVIDER__ parser: module: intelmq.bots.parsers.taichung.parser - parameters: - error_log_message: 'false' - revision: 20-01-2018 - status: on - documentation: + revision: 2018-01-20 + documentation: https://www.tc.edu.tw/net/netflow/lkout/recent/ + public: yes HPHosts: Hosts: description: hpHosts is a community managed and maintained hosts file that allows @@ -640,9 +464,9 @@ providers: module: intelmq.bots.parsers.hphosts.parser parameters: error_log_message: 'false' - revision: 20-01-2018 - status: off - documentation: + revision: 2018-01-20 + documentation: http://hosts-file.net/ + public: yes Dataplane: SSH Client Connection: description: Entries below consist of fields with identifying characteristics @@ -662,9 +486,9 @@ providers: parser: module: intelmq.bots.parsers.dataplane.parser parameters: - revision: 20-01-2018 - status: on - documentation: + revision: 2018-01-20 + documentation: http://dataplane.org/ + public: yes SSH Password Authentication: description: Entries below consist of fields with identifying characteristics of a source IP address that has been seen attempting to remotely login to @@ -683,9 +507,9 @@ providers: parser: module: intelmq.bots.parsers.dataplane.parser parameters: - revision: 20-01-2018 - status: on - documentation: + revision: 2018-01-20 + documentation: http://dataplane.org/ + public: yes SIP Query: description: Entries consist of fields with identifying characteristics of a source IP address that has been seen initiating a SIP OPTIONS query to a remote @@ -704,9 +528,9 @@ providers: parser: module: intelmq.bots.parsers.dataplane.parser parameters: - revision: 20-01-2018 - status: on - documentation: + revision: 2018-01-20 + documentation: http://dataplane.org/ + public: yes SIP Registration: description: Entries consist of fields with identifying characteristics of a source IP address that has been seen initiating a SIP REGISTER operation to @@ -725,9 +549,9 @@ providers: parser: module: intelmq.bots.parsers.dataplane.parser parameters: - revision: 20-01-2018 - status: on - documentation: + revision: 2018-01-20 + documentation: http://dataplane.org/ + public: yes Turris: Greylist: description: The data are processed and clasified every week and behaviour of @@ -748,9 +572,9 @@ providers: parser: module: intelmq.bots.parsers.turris.parser parameters: - revision: 20-01-2018 - status: on - documentation: + revision: 2018-01-20 + documentation: https://project.turris.cz/greylist-data/legend.txt + public: yes Malc0de: Bind Format: description: This feed includes FQDN's of malicious hosts, the file format is @@ -767,9 +591,9 @@ providers: parser: module: intelmq.bots.parsers.malc0de.parser parameters: - revision: 20-01-2018 - status: on - documentation: + revision: 2018-01-20 + documentation: http://malc0de.com/dashboard/ + public: yes Windows Format: description: This feed includes FQDN's of malicious hosts, the file format is in Windows Hosts file format. @@ -785,9 +609,9 @@ providers: parser: module: intelmq.bots.parsers.malc0de.parser parameters: - revision: 20-01-2018 - status: on - documentation: + revision: 2018-01-20 + documentation: http://malc0de.com/dashboard/ + public: yes IP Blacklist: description: This feed includes IP Addresses of malicious hosts. additional_information: @@ -802,12 +626,12 @@ providers: parser: module: intelmq.bots.parsers.malc0de.parser parameters: - revision: 20-01-2018 - status: on - documentation: + revision: 2018-01-20 + documentation: http://malc0de.com/dashboard/ + public: yes University of Toulouse: Blacklist: - description: 'The collections and feed description can be found on: https://dsi.ut-capitole.fr/blacklists/.' + description: Various blacklist feeds additional_information: bots: collector: @@ -824,9 +648,9 @@ providers: type: "{depends on a collection}" delimiter: 'false' columns: "{depends on a collection}" - revision: 20-01-2018 - status: on - documentation: + revision: 2018-01-20 + documentation: https://dsi.ut-capitole.fr/blacklists/ + public: yes Autoshun: Shunlist: description: You need to register in order to use the list. @@ -842,9 +666,9 @@ providers: parser: module: intelmq.bots.parsers.autoshun.parser parameters: - revision: 20-01-2018 - status: off - documentation: + revision: 2018-01-20 + documentation: https://www.autoshun.org/ + public: no Danger Rulez: Bruteforce Blocker: description: Its main purpose is to block SSH bruteforce attacks via firewall. @@ -860,30 +684,9 @@ providers: parser: module: intelmq.bots.parsers.danger_rulez.parser parameters: - revision: 20-01-2018 - status: on - documentation: - SIP Invitation: - description: Entries consist of fields with identifying characteristics of a - source IP address that has been seen initiating a SIP INVITE operation to - a remote host. The report lists hosts that are suspicious of more than just - port scanning. These hosts may be SIP client cataloging or conducting various - forms of telephony abuse. Report is updated hourly. - additional_information: - bots: - collector: - module: intelmq.bots.collectors.http.collector_http - parameters: - http_url: http://dataplane.org/sipinvitation.txt - rate_limit: 3600 - name: __FEED__ - provider: __PROVIDER__ - parser: - module: intelmq.bots.parsers.dataplane.parser - parameters: - revision: 20-01-2018 - status: on - documentation: + revision: 2018-01-20 + documentation: http://danger.rulez.sk/index.php/bruteforceblocker/ + public: yes Spamhaus: Drop: description: The DROP list will not include any IP address space under the control @@ -903,9 +706,9 @@ providers: parser: module: intelmq.bots.parsers.spamhaus.parser_drop parameters: - revision: 20-01-2018 - status: on - documentation: + revision: 2018-01-20 + documentation: https://www.spamhaus.org/drop/ + public: yes ASN Drop: description: ASN-DROP contains a list of Autonomous System Numbers controlled by spammers or cyber criminals, as well as "hijacked" ASNs. ASN-DROP can be @@ -922,9 +725,9 @@ providers: parser: module: intelmq.bots.parsers.spamhaus.parser_drop parameters: - revision: 20-01-2018 - status: on - documentation: + revision: 2018-01-20 + documentation: https://www.spamhaus.org/drop/ + public: yes Dropv6: description: The DROPv6 list includes IPv6 ranges allocated to spammers or cyber criminals. DROPv6 will only include IPv6 netblocks allocated directly by an @@ -942,12 +745,12 @@ providers: parser: module: intelmq.bots.parsers.spamhaus.parser_drop parameters: - revision: 20-01-2018 - status: on - documentation: + revision: 2018-01-20 + documentation: https://www.spamhaus.org/drop/ + public: yes CERT: description: Spamhaus CERT Insight Portal. Access limited to CERTs and CSIRTs - with national or regional responsibility. https://www.spamhaus.org/news/article/705/spamhaus-launches-cert-insight-portal + with national or regional responsibility. . additional_information: bots: @@ -961,9 +764,9 @@ providers: parser: module: intelmq.bots.parsers.spamhaus.parser_cert parameters: - revision: 20-01-2018 - status: on - documentation: + revision: 2018-01-20 + documentation: https://www.spamhaus.org/news/article/705/spamhaus-launches-cert-insight-portal + public: no EDrop: description: EDROP is an extension of the DROP list that includes sub-allocated netblocks controlled by spammers or cyber criminals. EDROP is meant to be @@ -980,9 +783,9 @@ providers: parser: module: intelmq.bots.parsers.spamhaus.parser_drop parameters: - revision: 20-01-2018 - status: on - documentation: + revision: 2018-01-20 + documentation: https://www.spamhaus.org/drop/ + public: yes PhishTank: Online: description: PhishTank is a collaborative clearing house for data and information @@ -999,9 +802,9 @@ providers: parser: module: intelmq.bots.parsers.phishtank.parser parameters: - revision: 20-01-2018 - status: on - documentation: + revision: 2018-01-20 + documentation: https://data.phishtank.com/ + public: no CINSscore: Army List: description: 'The CINS Army list is a subset of the CINS Active Threat Intelligence @@ -1021,9 +824,9 @@ providers: parser: module: intelmq.bots.parsers.ci_army.parser parameters: - revision: 20-01-2018 - status: on - documentation: + revision: 2018-01-20 + documentation: https://cinsscore.com/#list + public: yes Blocklist.de: IRC Bots: description: No description provided by feed provider. @@ -1039,9 +842,9 @@ providers: parser: module: intelmq.bots.parsers.blocklistde.parser parameters: - revision: 20-01-2018 - status: on - documentation: + revision: 2018-01-20 + documentation: http://www.blocklist.de/en/export.html + public: yes Strong IPs: description: Blocklist.DE Strong IPs Collector is the bot responsible to get the report from source of information. All IPs which are older then 2 month @@ -1058,9 +861,9 @@ providers: parser: module: intelmq.bots.parsers.blocklistde.parser parameters: - revision: 20-01-2018 - status: on - documentation: + revision: 2018-01-20 + documentation: http://www.blocklist.de/en/export.html + public: yes Mail: description: Blocklist.DE Mail Collector is the bot responsible to get the report from source of information. All IP addresses which have been reported within @@ -1077,9 +880,9 @@ providers: parser: module: intelmq.bots.parsers.blocklistde.parser parameters: - revision: 20-01-2018 - status: on - documentation: + revision: 2018-01-20 + documentation: http://www.blocklist.de/en/export.html + public: yes Apache: description: Blocklist.DE Apache Collector is the bot responsible to get the report from source of information. All IP addresses which have been reported @@ -1097,9 +900,9 @@ providers: parser: module: intelmq.bots.parsers.blocklistde.parser parameters: - revision: 20-01-2018 - status: on - documentation: + revision: 2018-01-20 + documentation: http://www.blocklist.de/en/export.html + public: yes FTP: description: Blocklist.DE FTP Collector is the bot responsible to get the report from source of information. All IP addresses which have been reported within @@ -1116,9 +919,9 @@ providers: parser: module: intelmq.bots.parsers.blocklistde.parser parameters: - revision: 20-01-2018 - status: on - documentation: + revision: 2018-01-20 + documentation: http://www.blocklist.de/en/export.html + public: yes SSH: description: Blocklist.DE SSH Collector is the bot responsible to get the report from source of information. All IP addresses which have been reported within @@ -1135,9 +938,9 @@ providers: parser: module: intelmq.bots.parsers.blocklistde.parser parameters: - revision: 20-01-2018 - status: on - documentation: + revision: 2018-01-20 + documentation: http://www.blocklist.de/en/export.html + public: yes Brute-force Logins: description: Blocklist.DE Brute-force Login Collector is the bot responsible to get the report from source of information. All IPs which attacks Joomlas, @@ -1154,9 +957,9 @@ providers: parser: module: intelmq.bots.parsers.blocklistde.parser parameters: - revision: 20-01-2018 - status: on - documentation: + revision: 2018-01-20 + documentation: http://www.blocklist.de/en/export.html + public: yes Bots: description: Blocklist.DE Bots Collector is the bot responsible to get the report from source of information. All IP addresses which have been reported within @@ -1175,9 +978,9 @@ providers: parser: module: intelmq.bots.parsers.blocklistde.parser parameters: - revision: 20-01-2018 - status: on - documentation: + revision: 2018-01-20 + documentation: http://www.blocklist.de/en/export.html + public: yes IMAP: description: Blocklist.DE IMAP Collector is the bot responsible to get the report from source of information. All IP addresses which have been reported within @@ -1194,9 +997,9 @@ providers: parser: module: intelmq.bots.parsers.blocklistde.parser parameters: - revision: 20-01-2018 - status: on - documentation: + revision: 2018-01-20 + documentation: http://www.blocklist.de/en/export.html + public: yes SIP: description: Blocklist.DE SIP Collector is the bot responsible to get the report from source of information. All IP addresses that tried to login in a SIP-, @@ -1214,15 +1017,15 @@ providers: parser: module: intelmq.bots.parsers.blocklistde.parser parameters: - revision: 20-01-2018 - status: on - documentation: + revision: 2018-01-20 + documentation: http://www.blocklist.de/en/export.html + public: yes CERT.PL: N6 Stomp Stream: description: N6 Collector - CERT.pl's N6 Collector - N6 feed via STOMP interface. Note that rate_limit does not apply for this bot as it is waiting for messages on a stream. - additional_information: + additional_information: Contact cert.pl to get access to the feed. bots: collector: module: intelmq.bots.collectors.stomp.collector @@ -1240,9 +1043,9 @@ providers: parser: module: intelmq.bots.parsers.n6.parser_n6stomp parameters: - revision: 20-01-2018 - status: on - documentation: + revision: 2018-01-20 + documentation: https://n6.cert.pl/en/ + public: no AlienVault: OTX: description: AlienVault OTX Collector is the bot responsible to get the report @@ -1258,9 +1061,9 @@ providers: parser: module: intelmq.bots.parsers.alienvault.parser_otx parameters: - revision: 20-01-2018 - status: on - documentation: + revision: 2018-01-20 + documentation: https://otx.alienvault.com/ + public: no Reputation List: description: List of malicious IPs. additional_information: @@ -1275,9 +1078,9 @@ providers: parser: module: intelmq.bots.parsers.alienvault.parser parameters: - revision: 20-01-2018 - status: off + revision: 2018-01-20 documentation: + public: yes CleanMX: Virus: description: In order to download the CleanMX feed you need to use a custom @@ -1296,9 +1099,9 @@ providers: parser: module: intelmq.bots.parsers.cleanmx.parser parameters: - revision: 20-01-2018 - status: on - documentation: + revision: 2018-01-20 + documentation: http://clean-mx.de/ + public: no Phishing: description: In order to download the CleanMX feed you need to use a custom user agent and register that user agent. @@ -1316,9 +1119,9 @@ providers: parser: module: intelmq.bots.parsers.cleanmx.parser parameters: - revision: 20-01-2018 - status: on - documentation: + revision: 2018-01-20 + documentation: http://clean-mx.de/ + public: no Malware Domain List: Blacklist: description: No description provided by feed provider. @@ -1334,13 +1137,12 @@ providers: parser: module: intelmq.bots.parsers.malwaredomainlist.parser parameters: - revision: 20-01-2018 - status: on - documentation: + revision: 2018-01-20 + documentation: http://www.malwaredomainlist.com/ + public: yes AnubisNetworks: Cyberfeed Stream: - description: AnubisNetworks Collector is the bot responsible to get AnubisNetworks - Cyberfeed Stream. + description: Fetches and parsers the Cyberfeed data stream. additional_information: bots: collector: @@ -1353,63 +1155,68 @@ providers: parser: module: intelmq.bots.parsers.anubisnetworks.parser parameters: - revision: 20-01-2018 - status: on - documentation: + use_malware_familiy_as_classification_identifier: true + revision: 2020-06-15 + documentation: https://www.anubisnetworks.com/ https://www.bitsight.com/ + public: no Bambenek: C2 Domains: description: 'Master Feed of known, active and non-sinkholed C&Cs domain - names. License: https://osint.bambenekconsulting.com/license.txt' - additional_information: + names. Requires access credentials.' + additional_information: 'License: https://osint.bambenekconsulting.com/license.txt' bots: collector: module: intelmq.bots.collectors.http.collector_http parameters: - http_url: https://osint.bambenekconsulting.com/feeds/c2-dommasterlist.txt + http_url: https://faf.bambenekconsulting.com/feeds/c2-dommasterlist.txt + http_username: __USERNAME__ + http_password: __PASSWORD__ rate_limit: 3600 name: __FEED__ provider: __PROVIDER__ parser: module: intelmq.bots.parsers.bambenek.parser parameters: - revision: 20-01-2018 - status: on - documentation: + revision: 2018-01-20 + documentation: https://osint.bambenekconsulting.com/feeds/ + public: no C2 IPs: - description: 'Master Feed of known, active and non-sinkholed C&Cs IP addresses - License: https://osint.bambenekconsulting.com/license.txt' - additional_information: + description: 'Master Feed of known, active and non-sinkholed C&Cs IP addresses. + Requires access credentials.' + additional_information: 'License: https://osint.bambenekconsulting.com/license.txt' bots: collector: module: intelmq.bots.collectors.http.collector_http parameters: - http_url: https://osint.bambenekconsulting.com/feeds/c2-ipmasterlist.txt + http_url: https://faf.bambenekconsulting.com/feeds/c2-ipmasterlist.txt + http_username: __USERNAME__ + http_password: __PASSWORD__ rate_limit: 3600 name: __FEED__ provider: __PROVIDER__ parser: module: intelmq.bots.parsers.bambenek.parser parameters: - revision: 20-01-2018 - status: on - documentation: + revision: 2018-01-20 + documentation: https://osint.bambenekconsulting.com/feeds/ + public: no DGA Domains: - description: 'Domain feed of known DGA domains from -2 to +3 days License: https://osint.bambenekconsulting.com/license.txt' - additional_information: + description: Domain feed of known DGA domains from -2 to +3 days + additional_information: 'License: https://osint.bambenekconsulting.com/license.txt' bots: collector: module: intelmq.bots.collectors.http.collector_http parameters: - http_url: https://osint.bambenekconsulting.com/feeds/dga-feed.txt + http_url: https://faf.bambenekconsulting.com/feeds/dga-feed.txt rate_limit: 3600 name: __FEED__ provider: __PROVIDER__ parser: module: intelmq.bots.parsers.bambenek.parser parameters: - revision: 20-01-2018 - status: on - documentation: + revision: 2018-01-20 + documentation: https://osint.bambenekconsulting.com/feeds/ + public: yes DynDNS: Infected Domains: description: DynDNS ponmocup. List of ponmocup malware redirection domains and @@ -1426,9 +1233,9 @@ providers: parser: module: intelmq.bots.parsers.dyn.parser parameters: - revision: 20-01-2018 - status: on - documentation: + revision: 2018-01-20 + documentation: http://security-research.dyndns.org/pub/malware-feeds/ + public: yes DShield: Suspicious Domains: description: There are many suspicious domains on the internet. In an effort @@ -1447,9 +1254,9 @@ providers: parser: module: intelmq.bots.parsers.dshield.parser_domain parameters: - revision: 20-01-2018 - status: on - documentation: + revision: 2018-01-20 + documentation: https://www.dshield.org/reports.html + public: yes Block: description: This list summarizes the top 20 attacking class C (/24) subnets over the last three days. The number of 'attacks' indicates the number of @@ -1466,9 +1273,9 @@ providers: parser: module: intelmq.bots.parsers.dshield.parser_block parameters: - revision: 20-01-2018 - status: on - documentation: + revision: 2018-01-20 + documentation: https://www.dshield.org/reports.html + public: yes AS Details: description: No description provided by feed provider. additional_information: @@ -1483,9 +1290,9 @@ providers: parser: module: intelmq.bots.parsers.dshield.parser_asn parameters: - revision: 20-01-2018 - status: on - documentation: + revision: 2018-01-20 + documentation: https://www.dshield.org/reports.html + public: yes VXVault: URLs: description: This feed provides IP addresses hosting Malware. @@ -1501,9 +1308,9 @@ providers: parser: module: intelmq.bots.parsers.vxvault.parser parameters: - revision: 20-01-2018 - status: on - documentation: + revision: 2018-01-20 + documentation: http://vxvault.net/ViriList.php + public: yes ShadowServer: Via IMAP: description: Shadowserver sends out a variety of reports (see https://www.shadowserver.org/wiki/pmwiki.php/Services/Reports). @@ -1516,8 +1323,8 @@ providers: mail_password: __PASSWORD__ mail_ssl: true mail_user: __USERNAME__ - attach_unzip: true attach_regex: csv.zip + extract_files: true rate_limit: 86400 subject_regex: __REGEX__ folder: INBOX @@ -1526,9 +1333,9 @@ providers: parser: module: intelmq.bots.parsers.shadowserver.parser parameters: - revision: 20-01-2018 - status: on - documentation: + revision: 2018-01-20 + documentation: https://www.shadowserver.org/what-we-do/network-reporting/ + public: no Via Request Tracker: description: Shadowserver sends out a variety of reports (see https://www.shadowserver.org/wiki/pmwiki.php/Services/Reports). additional_information: The configuration retrieves the data from a RT/RTIR ticketing instance via the attachment or an download. @@ -1558,9 +1365,9 @@ providers: parser: module: intelmq.bots.parsers.shadowserver.parser parameters: - revision: 20-01-2018 - status: on - documentation: + revision: 2018-01-20 + documentation: https://www.shadowserver.org/what-we-do/network-reporting/ + public: no Fraunhofer: DGA Archive: description: Fraunhofer DGA collector fetches data from Fraunhofer's domain @@ -1579,62 +1386,9 @@ providers: parser: module: intelmq.bots.parsers.fraunhofer.parser_dga parameters: - revision: 20-01-2018 - status: on - documentation: - DDoS Attack Feed (C&C): - description: The Fraunhofer DDoS attack feed provides information about - tracked C&C servers and detected attack commands from these C&Cs. You - may request access to the feed via email to infection-reporter@fkie.fraunhofer.de - additional_information: The source feed provides a stream of newline separated JSON objects. - Each line represents a single event observed by DDoS C&C trackers, e.g. attack - commands. The feed can be retrieved with either the generic HTTP Stream - Collector Bot for a streaming live feed or with the generic HTTP Collector Bot - for a polled feed. - bots: - collector: - module: intelmq.bots.collectors.http.collector_http_stream - parameters: - http_url: https://feed.caad.fkie.fraunhofer.de/ddosattackfeed - http_password: "{{ your password }}" - http_username: "{{ your username }}" - rate_limit: 10 - strip_lines: true - name: __FEED__ - provider: __PROVIDER__ - parser: - module: intelmq.bots.parsers.fraunhofer.parser_ddosattack_cnc - parameters: - unknown_messagetype_accuracy: 80 - revision: 01-07-2018 - status: on - documentation: - DDoS Attack Feed (Targets): - description: The Fraunhofer DDoS attack feed provides information about - tracked C&C servers and detected attack commands from these C&Cs. You - may request access to the feed via email to infection-reporter@fkie.fraunhofer.de - additional_information: The source feed provides a stream of newline separated JSON objects. - Each line represents a single event observed by DDoS C&C trackers, e.g. attack - commands. The feed can be retrieved with either the generic HTTP Stream - Collector Bot for a streaming live feed or with the generic HTTP Collector Bot - for a polled feed. - bots: - collector: - module: intelmq.bots.collectors.http.collector_http_stream - parameters: - http_url: https://feed.caad.fkie.fraunhofer.de/ddosattackfeed - http_password: "{{ your password }}" - http_username: "{{ your username }}" - rate_limit: 10 - strip_lines: true - name: __FEED__ - provider: __PROVIDER__ - parser: - module: intelmq.bots.parsers.fraunhofer.parser_ddosattack_target - parameters: - revision: 01-07-2018 - status: on - documentation: + revision: 2018-01-20 + documentation: https://dgarchive.caad.fkie.fraunhofer.de/welcome/ + public: no MalwareURL: Latest malicious activity: description: Latest malicious domains/IPs. @@ -1650,12 +1404,12 @@ providers: parser: module: intelmq.bots.parsers.malwareurl.parser parameters: - revision: 05-02-2018 - status: on - documentation: + revision: 2018-02-05 + documentation: https://www.malwareurl.com/ + public: yes Microsoft: - BingMURLs: - description: Collects Malicious URLs detected by Bing from the Interflow API. + BingMURLs via Interflow: + description: Collects Malicious URLs detected by Bing from the Interflow API. The feed is available via Microsoft’s Government Security Program (GSP). additional_information: Depending on the file sizes you may need to increase the parameter 'http_timeout_sec' of the collector. bots: collector: @@ -1671,11 +1425,11 @@ providers: parser: module: intelmq.bots.parsers.microsoft.parser_bingmurls parameters: - revision: 29-05-2018 - status: on - documentation: - CTIP: - description: Collects CTIP files from the Interflow API. + revision: 2018-05-29 + documentation: https://docs.microsoft.com/en-us/security/gsp/informationsharingandexchange + public: no + CTIP via Interflow: + description: Collects CTIP (Sinkhole data) files from the Interflow API.The feed is available via Microsoft’s Government Security Program (GSP). additional_information: Depending on the file sizes you may need to increase the parameter 'http_timeout_sec' of the collector. As many IPs occur very often in the data, you may want to use a deduplicator specifically for the feed. bots: collector: @@ -1691,9 +1445,31 @@ providers: parser: module: intelmq.bots.parsers.microsoft.parser_ctip parameters: - revision: 06-03-2018 - status: on - documentation: + revision: 2018-03-06 + documentation: https://docs.microsoft.com/en-us/security/gsp/informationsharingandexchange + public: no + CTIP via Azure: + description: Collects CTIP (Sinkhole data) files from a shared Azure Storage. The feed is available via Microsoft’s Government Security Program (GSP). + additional_information: The cache is needed for memorizing which files have already been processed, the TTL should be higher than the oldest file available in the storage (currently the last three days are available). The connection string contains endpoint as well as authentication information. + bots: + collector: + module: intelmq.bots.collectors.microsoft.collector_azure + parameters: + connection_string: "{{your connection string}}" + container_name: "ctip-infected-summary" + name: __FEED__ + provider: __PROVIDER__ + rate_limit: 3600 + redis_cache_db: 5 + redis_cache_host: 127.0.0.1 + redis_cache_port: 6379 + redis_cache_ttl: 864000 + parser: + module: intelmq.bots.parsers.microsoft.parser_ctip + parameters: + revision: 2020-05-29 + documentation: https://docs.microsoft.com/en-us/security/gsp/informationsharingandexchange + public: no Threatminer: Recent domains: description: Latest malicious domains. @@ -1709,13 +1485,13 @@ providers: parser: module: intelmq.bots.parsers.threatminer.parser parameters: - revision: 06-02-2018 - status: on + revision: 2018-02-06 documentation: https://www.threatminer.org/ + public: yes Calidog: CertStream: description: HTTP Websocket Stream from certstream.calidog.io providing data from Certificate Transparency Logs. - additional_information: + additional_information: Be aware that this feed provides a lot of data and may overload your system quickly. bots: collector: module: intelmq.bots.collectors.certstream.collector_certstream @@ -1725,9 +1501,9 @@ providers: parser: module: intelmq.bots.parses.certstream.parser_certstream parameters: - revision: 15-06-2018 - status: on - documentation: + revision: 2018-06-15 + documentation: https://medium.com/cali-dog-security/introducing-certstream-3fc13bb98067 + public: yes McAfee Advanced Threat Defense: Sandbox Reports: description: Processes reports from McAfee's sandboxing solution via the openDXL API. @@ -1742,9 +1518,9 @@ providers: module: intelmq.bots.parsers.mcafee.parser_atd parameters: verdict_severity: 4 - revision: 05-07-2018 - status: on - documentation: + revision: 2018-07-05 + documentation: https://www.mcafee.com/enterprise/en-us/products/advanced-threat-defense.html + public: no CyberCrime Tracker: Latest: description: C2 servers @@ -1764,9 +1540,9 @@ providers: skip_table_head: true default_url_protocol: http:// type: c2server - revision: 19-03-2019 - status: on - documentation: + revision: 2019-03-19 + documentation: https://cybercrime-tracker.net/index.php + public: yes PrecisionSec: Agent Tesla: description: Agent Tesla IoCs, URLs where the malware is hosted. @@ -1786,9 +1562,9 @@ providers: skip_table_head: true default_url_protocol: http:// type: malware - revision: 02-04-2019 - status: on + revision: 2019-04-02 documentation: https://precisionsec.com/threat-intelligence-feeds/agent-tesla/ + public: yes Have I Been Pwned: Enterprise Callback: description: With the Enterprise Subscription of 'Have I Been Pwned' you are able to provide a callback URL and any new leak data is submitted to it. It is recommended to put a webserver with Authorization check, TLS etc. in front of the API collector. @@ -1824,6 +1600,25 @@ providers: parser: module: intelmq.bots.parsers.hibp.parser_callback parameters: - revision: 11-09-2019 - status: on + revision: 2019-09-11 documentation: https://haveibeenpwned.com/EnterpriseSubscriber/ + public: no + Strangereal Intel: + DailyIOC: + description: Daily IOC from tweets and articles + additional_information: | + collector's `extra_fields` parameter may be any of fields from the github [content API response](https://developer.github.com/v3/repos/contents/) + bots: + collector: + module: intelmq.bots.collectors.github_api.collector_github_contents_api + parameters: + basic_auth_username: USERNAME + basic_auth_password: PASSWORD + repository: StrangerealIntel/DailyIOC + regex: .*.json + parser: + module: intelmq.bots.parsers.github_feed + parameters: + revision: 2019-12-05 + documentation: https://github.com/StrangerealIntel/DailyIOC + public: yes diff --git a/intelmq/etc/harmonization.conf b/intelmq/etc/harmonization.conf index c01cbf99f..a173a29b6 100644 --- a/intelmq/etc/harmonization.conf +++ b/intelmq/etc/harmonization.conf @@ -217,7 +217,7 @@ }, "protocol.transport": { "description": "e.g. tcp, udp, icmp.", - "iregex": "^(ip|icmp|igmp|ggp|ipencap|st2|tcp|cbt|egp|igp|bbn-rcc|nvp|pup|argus|emcon|xnet|chaos|udp|mux|dcn|hmp|prm|xns-idp|trunk-1|trunk-2|leaf-1|leaf-2|rdp|irtp|iso-tp4|netblt|mfe-nsp|merit-inp|sep|3pc|idpr|xtp|ddp|idpr-cmtp|tp\\+\\+|il|ipv6|sdrp|ipv6-route|ipv6-frag|idrp|rsvp|gre|mhrp|bna|esp|ah|i-nlsp|swipe|narp|mobile|tlsp|skip|ipv6-icmp|ipv6-nonxt|ipv6-opts|cftp|sat-expak|kryptolan|rvd|ippc|sat-mon|visa|ipcv|cpnx|cphb|wsn|pvp|br-sat-mon|sun-nd|wb-mon|wb-expak|iso-ip|vmtp|secure-vmtp|vines|ttp|nsfnet-igp|dgp|tcf|eigrp|ospf|sprite-rpc|larp|mtp|ax.25|ipip|micp|scc-sp|etherip|encap|gmtp|ifmp|pnni|pim|aris|scps|qnx|a/n|ipcomp|snp|compaq-peer|ipx-in-ip|vrrp|pgm|l2tp|ddx|iatp|st|srp|uti|smp|sm|ptp|isis|fire|crtp|crdup|sscopmce|iplt|sps|pipe|sctp|fc|divert)$", + "iregex": "^(ip|icmp|igmp|ggp|ipencap|st2|tcp|cbt|egp|igp|bbn-rcc|nvp(-ii)?|pup|argus|emcon|xnet|chaos|udp|mux|dcn|hmp|prm|xns-idp|trunk-1|trunk-2|leaf-1|leaf-2|rdp|irtp|iso-tp4|netblt|mfe-nsp|merit-inp|sep|3pc|idpr|xtp|ddp|idpr-cmtp|tp\\+\\+|il|ipv6|sdrp|ipv6-route|ipv6-frag|idrp|rsvp|gre|mhrp|bna|esp|ah|i-nlsp|swipe|narp|mobile|tlsp|skip|ipv6-icmp|ipv6-nonxt|ipv6-opts|cftp|sat-expak|kryptolan|rvd|ippc|sat-mon|visa|ipcv|cpnx|cphb|wsn|pvp|br-sat-mon|sun-nd|wb-mon|wb-expak|iso-ip|vmtp|secure-vmtp|vines|ttp|nsfnet-igp|dgp|tcf|eigrp|ospf|sprite-rpc|larp|mtp|ax.25|ipip|micp|scc-sp|etherip|encap|gmtp|ifmp|pnni|pim|aris|scps|qnx|a/n|ipcomp|snp|compaq-peer|ipx-in-ip|vrrp|pgm|l2tp|ddx|iatp|st|srp|uti|smp|sm|ptp|isis|fire|crtp|crdup|sscopmce|iplt|sps|pipe|sctp|fc|divert)$", "length": 11, "type": "LowercaseString" }, diff --git a/intelmq/etc/runtime.conf b/intelmq/etc/runtime.conf index 11d9953f4..13bde6a86 100644 --- a/intelmq/etc/runtime.conf +++ b/intelmq/etc/runtime.conf @@ -1,13 +1,14 @@ { "cymru-whois-expert": { "bot_id": "cymru-whois-expert", - "description": "Cymry Whois (IP to ASN) is the bot responsible to add network information to the events (BGP, ASN, AS Name, Country, etc..).", + "description": "Cymru Whois (IP to ASN) is the bot responsible to add network information to the events (BGP, ASN, AS Name, Country, etc..).", "enabled": true, "group": "Expert", "groupname": "experts", "module": "intelmq.bots.experts.cymru_whois.expert", "name": "Cymru Whois", "parameters": { + "overwrite": true, "redis_cache_db": 5, "redis_cache_host": "127.0.0.1", "redis_cache_password": null, diff --git a/intelmq/lib/bot.py b/intelmq/lib/bot.py index 208d2adf2..73e026306 100644 --- a/intelmq/lib/bot.py +++ b/intelmq/lib/bot.py @@ -1,6 +1,10 @@ # -*- coding: utf-8 -*- """ - +The bot library has the base classes for all bots. + * Bot: generic base class for all kind of bots + * CollectorBot: base class for collectors + * ParserBot: base class for parsers + * SQLBot: base classs for any bots using SQL """ import atexit import csv @@ -29,9 +33,9 @@ RUNTIME_CONF_FILE, __version__) from intelmq.lib import cache, exceptions, utils from intelmq.lib.pipeline import PipelineFactory -from intelmq.lib.utils import RewindableFileHandle +from intelmq.lib.utils import RewindableFileHandle, base64_decode -__all__ = ['Bot', 'CollectorBot', 'ParserBot', 'SQLBot'] +__all__ = ['Bot', 'CollectorBot', 'ParserBot', 'SQLBot', 'OutputBot'] class Bot(object): @@ -57,8 +61,8 @@ class Bot(object): # Collectors with an empty process() should set this to true, prevents endless loops (#1364) collector_empty_process = False - def __init__(self, bot_id: str, start=False, sighup_event=None, - disable_multithreading=None): + def __init__(self, bot_id: str, start: bool = False, sighup_event=None, + disable_multithreading: bool = None): self.__log_buffer = [] self.parameters = Parameters() @@ -249,8 +253,9 @@ def start(self, starting: bool = True, error_on_pipeline: bool = True, self.parameters.error_retry_delay) time.sleep(self.parameters.error_retry_delay) - if error_on_message: - error_on_message = False + starting = False + error_on_message = False + message_to_dump = None if error_on_pipeline: try: @@ -260,9 +265,6 @@ def start(self, starting: bool = True, error_on_pipeline: bool = True, else: error_on_pipeline = False - if starting: - starting = False - self.__handle_sighup() self.process() self.__error_retries_counter = 0 # reset counter @@ -277,6 +279,15 @@ def start(self, starting: bool = True, error_on_pipeline: bool = True, self.logger.error('Pipeline failed.') self.__disconnect_pipelines() + except exceptions.DecodingError as exc: + self.logger.exception('Could not decode message from pipeline. No retries useful.') + + # ensure that we do not re-process the faulty message + self.__error_retries_counter = self.parameters.error_max_retries + 1 + error_on_message = sys.exc_info() + + message_to_dump = exc.object + except Exception as exc: # in case of serious system issues, exit immediately if isinstance(exc, MemoryError): @@ -308,10 +319,6 @@ def start(self, starting: bool = True, error_on_pipeline: bool = True, self.stop(exitcode=0) finally: - if getattr(self.parameters, 'testing', False): - self.stop(exitcode=0) - break - do_rate_limit = False if error_on_message or error_on_pipeline: @@ -327,7 +334,7 @@ def start(self, starting: bool = True, error_on_pipeline: bool = True, if self.parameters.error_dump_message: error_traceback = traceback.format_exception(*error_on_message) self._dump_message(error_traceback, - message=self.__current_message) + message=message_to_dump if message_to_dump else self.__current_message) else: warnings.warn("Message will be removed from the pipeline and not dumped to the disk. " "Set `error_dump_message` to true to save the message on disk. " @@ -335,8 +342,8 @@ def start(self, starting: bool = True, error_on_pipeline: bool = True, if self.__destination_queues and '_on_error' in self.__destination_queues: self.send_message(self.__current_message, path='_on_error') - # remove message from pipeline - self.acknowledge_message() + if message_to_dump or self.__current_message: + self.acknowledge_message() # when bot acknowledge the message, # don't need to wait again @@ -369,6 +376,10 @@ def start(self, starting: bool = True, error_on_pipeline: bool = True, self.logger.info('Shutting down scheduled bot.') self.stop(exitcode=0) + if getattr(self.parameters, 'testing', False): + self.logger.debug('Testing environment detected, returning now.') + return + # Do rate_limit at the end on success and after the retries # counter has been reset: https://github.com/certtools/intelmq/issues/1431 if do_rate_limit: @@ -380,7 +391,7 @@ def start(self, starting: bool = True, error_on_pipeline: bool = True, self.__stats() self.__handle_sighup() - def __stats(self, force=False): + def __stats(self, force: bool = False): """ Flush stats to redis @@ -526,12 +537,14 @@ def __disconnect_pipelines(self): self.__destination_pipeline = None self.logger.debug("Disconnected from destination pipeline.") - def send_message(self, *messages, path="_default", auto_add=None, - path_permissive=False): + def send_message(self, *messages, path: str = "_default", auto_add=None, + path_permissive: bool = False): """ Parameters: messages: Instances of intelmq.lib.message.Message class auto_add: ignored + path_permissive: If true, do not raise an error if the path is + not configured """ for message in messages: if not message: @@ -541,7 +554,7 @@ def send_message(self, *messages, path="_default", auto_add=None, raise exceptions.ConfigurationError('pipeline', 'No destination pipeline given, ' 'but needed') - self.logger.debug("Sending message.") + self.logger.debug("Sending message to path %r.", path) self.__message_counter["since"] += 1 self.__message_counter["path"][path] += 1 if not self.__message_counter["start"]: @@ -618,11 +631,11 @@ def acknowledge_message(self): self.__current_message = None def _dump_message(self, error_traceback, message: dict): + self.logger.info('Dumping message to dump file.') + if message is None or getattr(self.parameters, 'testing', False): return - self.logger.info('Dumping message to dump file.') - dump_file = os.path.join(self.parameters.logging_path, self.__bot_id + ".dump") timestamp = datetime.utcnow() @@ -633,7 +646,12 @@ def _dump_message(self, error_traceback, message: dict): new_dump_data[timestamp]["source_queue"] = self.__source_queues new_dump_data[timestamp]["traceback"] = error_traceback - new_dump_data[timestamp]["message"] = message.serialize() + if isinstance(message, bytes): + # decoding errors + new_dump_data[timestamp]["message"] = utils.base64_encode(message) + new_dump_data[timestamp]["message_type"] = 'base64' + else: + new_dump_data[timestamp]["message"] = message.serialize() if os.path.exists(dump_file): # existing dump @@ -817,7 +835,7 @@ def _parse_common_parameters(self): """ self._parse_extract_file_parameter('extract_files') - def _parse_extract_file_parameter(self, parameter_name='extract_files'): + def _parse_extract_file_parameter(self, parameter_name: str = 'extract_files'): """ Parses and sanitizes commonly used parameters: @@ -842,8 +860,8 @@ class ParserBot(Bot): handle = None current_line = None - def __init__(self, bot_id: str, start=False, sighup_event=None, - disable_multithreading=None): + def __init__(self, bot_id: str, start: bool = False, sighup_event=None, + disable_multithreading: bool = None): super().__init__(bot_id=bot_id) if self.__class__.__name__ == 'ParserBot': self.logger.error('ParserBot can\'t be started itself. ' @@ -851,7 +869,7 @@ def __init__(self, bot_id: str, start=False, sighup_event=None, self.stop() self.group = 'Parser' - def parse_csv(self, report: dict): + def parse_csv(self, report: libmessage.Report): """ A basic CSV parser. """ @@ -866,7 +884,7 @@ def parse_csv(self, report: dict): self.current_line = self.handle.current_line yield line - def parse_csv_dict(self, report: dict): + def parse_csv_dict(self, report: libmessage.Report): """ A basic CSV Dictionary parser. """ @@ -887,15 +905,24 @@ def parse_csv_dict(self, report: dict): self.current_line = self.handle.current_line yield line - def parse_json(self, report: dict): + def parse_json(self, report: libmessage.Report): """ - A basic JSON parser + A basic JSON parser. Assumes a *list* of objects to be yielded """ raw_report = utils.base64_decode(report.get("raw")) for line in json.loads(raw_report): yield line - def parse(self, report: dict): + def parse_json_stream(self, report: libmessage.Report): + """ + A JSON Stream parses (one JSON data structure per line) + """ + raw_report = utils.base64_decode(report.get("raw")) + for line in raw_report.splitlines(): + self.current_line = line + yield json.loads(line) + + def parse(self, report: libmessage.Report): """ A generator yielding the single elements of the data. @@ -916,7 +943,7 @@ def parse(self, report: dict): if not any([line.startswith(prefix) for prefix in self.ignore_lines_starting]): yield line - def parse_line(self, line, report): + def parse_line(self, line: Any, report: libmessage.Report): """ A generator which can yield one or more messages contained in line. @@ -970,18 +997,42 @@ def process(self): self.acknowledge_message() - def recover_line(self, line: str): + def recover_line(self, line: Optional[str] = None) -> str: """ - Reverse of parse for single lines. + Reverse of "parse" for single lines. + + Recovers a fully functional report with only the problematic line by + concatenating all strings in "self.tempdata" with "line" with LF + newlines. Works fine for most text files. + + Parameters + ---------- + line : Optional[str], optional + The currently process line which should be transferred into it's + original appearance. As fallback, "self.current_line" is used if + available (depending on self.parse). + The default is None. + + Raises + ------ + ValueError + If neither the parameter "line" nor the member "self.current_line" + is available. + + Returns + ------- + str + The reconstructed raw data. - Recovers a fully functional report with only the problematic line. """ if self.handle and self.handle.first_line and not self.tempdata: tempdata = [self.handle.first_line.strip()] else: tempdata = self.tempdata - if self.current_line: - line = self.current_line + if not line and not self.current_line: + raise ValueError('Parameter "line" is not given and ' + '"self.current_line" is also None. Please give one of them.') + line = line if line else self.current_line return '\n'.join(tempdata + [line]) def recover_line_csv(self, line: str): @@ -1007,7 +1058,22 @@ def recover_line_json(self, line: dict): Recovers a fully functional report with only the problematic pulse. """ - return json.dumps(line) + return json.dumps([line]) + + def recover_line_json_stream(self, line: dict) -> str: + """ + recover_line for json streams, just returns the current line, unparsed. + + Parameters + ---------- + line : dict + + Returns + ------- + str + unparsed JSON line. + """ + return self.current_line class CollectorBot(Bot): @@ -1019,8 +1085,8 @@ class CollectorBot(Bot): is_multithreadable = False - def __init__(self, bot_id: str, start=False, sighup_event=None, - disable_multithreading=None): + def __init__(self, bot_id: str, start: bool = False, sighup_event=None, + disable_multithreading: bool = None): super().__init__(bot_id=bot_id) if self.__class__.__name__ == 'CollectorBot': self.logger.error('CollectorBot can\'t be started itself. ' @@ -1028,22 +1094,16 @@ def __init__(self, bot_id: str, start=False, sighup_event=None, self.stop() self.group = 'Collector' - def __filter_empty_report(self, message: dict): + def __filter_empty_report(self, message: libmessage.Report): if 'raw' not in message: self.logger.warning('Ignoring report without raw field. ' 'Possible bug or misconfiguration of this bot.') return False return True - def __add_report_fields(self, report: dict): + def __add_report_fields(self, report: libmessage.Report): if hasattr(self.parameters, 'name'): report.add("feed.name", self.parameters.name) - if hasattr(self.parameters, 'feed'): - warnings.warn("The parameter 'feed' is deprecated and will be " - "removed in version 2.2. Use 'name' instead.", - DeprecationWarning) - if "feed.name" not in report: - report.add("feed.name", self.parameters.feed) if hasattr(self.parameters, 'code'): report.add("feed.code", self.parameters.code) if hasattr(self.parameters, 'documentation'): @@ -1053,7 +1113,7 @@ def __add_report_fields(self, report: dict): report.add("feed.accuracy", self.parameters.accuracy) return report - def send_message(self, *messages, path="_default", auto_add=True): + def send_message(self, *messages, path: str = "_default", auto_add: bool = True): """" Parameters: messages: Instances of intelmq.lib.message.Message class @@ -1094,7 +1154,7 @@ def init(self): else: raise ValueError("Wrong parameter 'engine' {0!r}, possible values are {1}".format(self.engine_name, engines)) - def _connect(self, engine, connect_args, autocommitable=False): + def _connect(self, engine, connect_args: dict, autocommitable: bool = False): self.engine = engine # imported external library that connects to the DB self.logger.debug("Connecting to database.") @@ -1113,7 +1173,7 @@ def _init_postgresql(self): import psycopg2 import psycopg2.extras except ImportError: - raise ValueError("Could not import 'psycopg2'. Please install it.") + raise exceptions.MissingDependencyError("psycopg2") self._connect(psycopg2, {"database": self.parameters.database, @@ -1130,7 +1190,7 @@ def _init_sqlite(self): try: import sqlite3 except ImportError: - raise ValueError("Could not import 'sqlite3'. Please install it.") + raise exceptions.MissingDependencyError("sqlite3") self._connect(sqlite3, {"database": self.parameters.database, @@ -1138,7 +1198,7 @@ def _init_sqlite(self): } ) - def execute(self, query, values, rollback=False): + def execute(self, query: str, values: tuple, rollback=False): try: self.logger.debug('Executing %r.', query, values) # note: this assumes, the DB was created with UTF-8 support! @@ -1167,5 +1227,75 @@ def execute(self, query, values, rollback=False): return False +class OutputBot(Bot): + """ + Base class for outputs. + """ + + def __init__(self, bot_id: str, start: bool = False, sighup_event=None, + disable_multithreading: bool = None): + super().__init__(bot_id=bot_id) + if self.__class__.__name__ == 'OutputBot': + self.logger.error('OutputBot can\'t be started itself. ' + 'Possible Misconfiguration.') + self.stop() + self.group = 'Output' + + self.hierarchical = getattr(self.parameters, "hierarchical_output", # file and files + getattr(self.parameters, "message_hierarchical", # stomp and amqp code + getattr(self.parameters, "message_hierarchical_output", False))) # stomp and amqp docs + self.with_type = getattr(self.parameters, "message_with_type", False) + self.jsondict_as_string = getattr(self.parameters, "message_jsondict_as_string", False) + + self.single_key = getattr(self.parameters, 'single_key', None) + self.keep_raw_field = getattr(self.parameters, 'keep_raw_field', False) + + def export_event(self, event: libmessage.Event, + return_type: Optional[type] = None): + """ + exports an event according to the following parameters: + * message_hierarchical + * message_with_type + * message_jsondict_as_string + * single_key + * keep_raw_field + + Parameters: + return_type: Ensure that the returned value is of the given type. + Optional. For example: str + If the resulting value is not an instance of this type, the + given object is called with the value as parameter E.g. `str(retval)` + """ + if self.single_key: + if self.single_key == 'raw': + return base64_decode(event.get('raw', '')) + elif self.single_key == 'output': + retval = event.get(self.single_key) + if return_type is str: + loaded = json.loads(retval) + if isinstance(loaded, return_type): + return loaded + else: + retval = json.loads(retval) + else: + retval = event.get(self.single_key) + else: + if not self.keep_raw_field: + if 'raw' in event: + del event['raw'] + if return_type is str: + return event.to_json(hierarchical=self.hierarchical, + with_type=self.with_type, + jsondict_as_string=self.jsondict_as_string) + else: + retval = event.to_dict(hierarchical=self.hierarchical, + with_type=self.with_type, + jsondict_as_string=self.jsondict_as_string) + + if return_type and not isinstance(retval, return_type): + return return_type(retval) + return retval + + class Parameters(object): pass diff --git a/intelmq/lib/exceptions.py b/intelmq/lib/exceptions.py index 71d76049f..ef5021475 100644 --- a/intelmq/lib/exceptions.py +++ b/intelmq/lib/exceptions.py @@ -4,9 +4,12 @@ ''' import traceback +from typing import Any, Optional + __all__ = ['InvalidArgument', 'ConfigurationError', 'IntelMQException', 'IntelMQHarmonizationException', 'InvalidKey', 'InvalidValue', 'KeyExists', 'KeyNotExists', 'PipelineError', + 'MissingDependencyError', ] @@ -23,7 +26,8 @@ def __init__(self, message): class InvalidArgument(IntelMQException): - def __init__(self, argument, got=None, expected=None, docs=None): + def __init__(self, argument: Any, got: Any = None, expected=None, + docs: str = None): message = "Argument {} is invalid.".format(repr(argument)) if expected is list: message += " Should be one of: {}.".format(list) @@ -38,7 +42,7 @@ def __init__(self, argument, got=None, expected=None, docs=None): class PipelineError(IntelMQException): - def __init__(self, argument): + def __init__(self, argument: Exception): if type(argument) is type and issubclass(argument, Exception): message = "pipeline failed - %s" % traceback.format_exc(argument) else: @@ -48,7 +52,7 @@ def __init__(self, argument): class ConfigurationError(IntelMQException): - def __init__(self, config, argument): + def __init__(self, config: str, argument: str): message = "%s configuration failed - %s" % (config, argument) super().__init__(message) @@ -75,29 +79,96 @@ def __init__(self, message): class InvalidValue(IntelMQHarmonizationException): - def __init__(self, key, value, reason=None): + def __init__(self, key: str, value: str, reason: Any = None): message = ("invalid value {value!r} ({type}) for key {key!r}{reason}" "".format(value=value, type=type(value), key=key, reason=': ' + reason if reason else '')) super().__init__(message) -class InvalidKey(IntelMQHarmonizationException): +class InvalidKey(IntelMQHarmonizationException, KeyError): - def __init__(self, key): + def __init__(self, key: str): message = "invalid key %s" % repr(key) super().__init__(message) class KeyExists(IntelMQHarmonizationException): - def __init__(self, key): + def __init__(self, key: str): message = "key %s already exists" % repr(key) super().__init__(message) class KeyNotExists(IntelMQHarmonizationException): - def __init__(self, key): + def __init__(self, key: str): message = "key %s not exists" % repr(key) super().__init__(message) + + +class MissingDependencyError(IntelMQException): + """ + A missing dependency was detected. Log instructions on installation. + """ + def __init__(self, dependency: str, version: Optional[str] = None, + installed: Optional[str] = None, + additional_text: Optional[str] = None): + """ + Parameters + ---------- + dependency : str + The dependency name. + version : Optional[str], optional + The required version. The default is None. + installed : Optional[str], optional + The currently installed version. Requires 'version' to be given The default is None. + additional_text : Optional[str], optional + Arbitrary additional text to show. The default is None. + + Returns + ------- + IntelMQException: with prepared text + + """ + appendix = "" + if version: + higher = " or higher" if not any(x in version for x in '<>=') else "" + appendix = (" Please note that this bot requires " + "{dependency} version {version}{higher}!" + "".format(dependency=dependency, + version=version, + higher=higher)) + if installed: + if isinstance(installed, tuple): + installed = ".".join(map(str, installed)) + appendix = appendix + (" Installed is version {installed!r}." + "".format(installed=installed)) + if additional_text: + appendix = "%s %s" % (appendix, additional_text) + message = ("Could not load dependency {dependency!r}, please install it " + "with apt/yum/dnf/zypper (possibly named " + "python3-{dependency}) or pip3.{appendix}" + "".format(dependency=dependency, + appendix=appendix)) + super().__init__(message) + + +class DecodingError(IntelMQException, ValueError): + """ + This is a separate Error to distinguish it from other exceptions as it is + unrecoverable. + """ + def __init__(self, encodings=None, exception: UnicodeDecodeError = None, + object: bytes = None): + self.object = object + suffix = [] + if encodings: + suffix.append("with given encodings %r" % encodings) + if exception: + suffix.append('at position %s with length %d (%r)' + '' % (exception.start, exception.end, + exception.object[exception.start:exception.end])) + suffix.append('with reason %r' % exception.reason) + suffix = (' ' + ' '.join(suffix)) if suffix else '' + super().__init__("Could not decode string%s." % suffix) diff --git a/intelmq/lib/harmonization.py b/intelmq/lib/harmonization.py index 90acb60a9..b16a7e8bf 100644 --- a/intelmq/lib/harmonization.py +++ b/intelmq/lib/harmonization.py @@ -38,6 +38,8 @@ import intelmq.lib.utils as utils +from typing import Optional + __all__ = ['Base64', 'Boolean', 'ClassificationType', 'DateTime', 'FQDN', 'Float', 'Accuracy', 'GenericType', 'IPAddress', 'IPNetwork', 'Integer', 'JSON', 'JSONDict', 'LowercaseString', 'Registry', @@ -48,7 +50,7 @@ class GenericType(object): @staticmethod - def is_valid(value, sanitize=False): + def is_valid(value: str, sanitize: bool = False) -> bool: if sanitize: value = GenericType().sanitize(value) @@ -64,7 +66,7 @@ def is_valid(value, sanitize=False): return True @staticmethod - def sanitize(value): + def sanitize(value) -> Optional[str]: if not value: return None @@ -87,7 +89,7 @@ class String(GenericType): """ @staticmethod - def is_valid(value, sanitize=False): + def is_valid(value: str, sanitize: bool = False) -> bool: if sanitize: value = GenericType().sanitize(value) @@ -111,7 +113,7 @@ class Base64(String): """ @staticmethod - def is_valid(value, sanitize=False): + def is_valid(value: str, sanitize: bool = False) -> bool: if sanitize: value = Base64().sanitize(value) @@ -126,7 +128,7 @@ def is_valid(value, sanitize=False): return True @staticmethod - def sanitize(value): + def sanitize(value: str) -> Optional[str]: try: value = utils.base64_encode(value) except AttributeError: # None @@ -142,7 +144,7 @@ class Boolean(GenericType): """ @staticmethod - def is_valid(value, sanitize=False): + def is_valid(value: bool, sanitize: bool = False) -> bool: if isinstance(value, bool): return True else: @@ -153,7 +155,7 @@ def is_valid(value, sanitize=False): return False @staticmethod - def sanitize(value): + def sanitize(value: bool) -> Optional[bool]: if isinstance(value, (str, bytes)): value = value.strip().lower() if value == 'true': @@ -243,7 +245,7 @@ class ClassificationType(String): __doc__ += '\n * '.join(allowed_values) @staticmethod - def is_valid(value, sanitize=False): + def is_valid(value: str, sanitize: bool = False) -> bool: if sanitize: value = ClassificationType().sanitize(value) @@ -259,7 +261,7 @@ def is_valid(value, sanitize=False): return True @staticmethod - def sanitize(value): + def sanitize(value: str) -> Optional[str]: value = LowercaseString.sanitize(value) if not value: return None @@ -291,7 +293,7 @@ class DateTime(String): midnight = datetime.time(0, 0, 0, 0) @staticmethod - def is_valid(value, sanitize=False): + def is_valid(value: str, sanitize: bool = False) -> bool: if sanitize: value = DateTime().sanitize(value) @@ -304,7 +306,7 @@ def is_valid(value, sanitize=False): return True @staticmethod - def sanitize(value): + def sanitize(value: str) -> Optional[str]: try: value = DateTime.__parse(value) except TypeError: # None @@ -312,7 +314,7 @@ def sanitize(value): return GenericType().sanitize(value) @staticmethod - def __parse(value): + def __parse(value: str) -> Optional[str]: try: return utils.decode(DateTime.parse_utc_isoformat(value)) except ValueError: @@ -327,7 +329,7 @@ def __parse(value): return utils.decode(value) @staticmethod - def parse_utc_isoformat(value): + def parse_utc_isoformat(value: str) -> Optional[datetime.datetime]: """ Parse format generated by datetime.isoformat() method with UTC timezone. It is much faster than universal dateutil parser. @@ -341,7 +343,7 @@ def parse_utc_isoformat(value): return value @staticmethod - def from_epoch_millis(tstamp, tzone='UTC'): + def from_epoch_millis(tstamp: str, tzone='UTC') -> datetime.datetime: """ Returns ISO formatted datetime from given epoch timestamp with milliseconds. It ignores the milliseconds, converts it into normal timestamp and processes it. @@ -356,7 +358,7 @@ def from_epoch_millis(tstamp, tzone='UTC'): return DateTime.from_timestamp(int_tstamp // 1000, tzone) @staticmethod - def from_timestamp(tstamp, tzone='UTC'): + def from_timestamp(tstamp: str, tzone='UTC') -> str: """ Returns ISO formatted datetime from given timestamp. You can give timezone for given timestamp, UTC by default. @@ -388,7 +390,7 @@ def from_windows_nt(tstamp: int) -> str: return dtime.isoformat() @staticmethod - def generate_datetime_now(): + def generate_datetime_now() -> str: value = datetime.datetime.now(pytz.timezone('UTC')) value = value.replace(microsecond=0) return value.isoformat() @@ -420,7 +422,7 @@ def convert_from_format_midnight(value: str, format: str) -> str: return value.isoformat() @staticmethod - def convert_fuzzy(value): + def convert_fuzzy(value) -> str: value = dateutil.parser.parse(value, fuzzy=True) if not value.tzinfo and sys.version_info <= (3, 6): value = pytz.utc.localize(value) @@ -429,7 +431,7 @@ def convert_fuzzy(value): return value.isoformat() @staticmethod - def convert(value, format='fuzzy'): + def convert(value, format='fuzzy') -> str: """ Converts date time strings according to the given format. If the timezone is not given or clear, the local time zone is assumed! @@ -474,7 +476,7 @@ class Float(GenericType): """ @staticmethod - def is_valid(value, sanitize=False): + def is_valid(value: float, sanitize: bool = False) -> bool: if sanitize: value = Float().sanitize(value) if value is not None: @@ -489,7 +491,7 @@ def is_valid(value, sanitize=False): return False @staticmethod - def sanitize(value): + def sanitize(value: float) -> Optional[float]: try: return float(value) except (ValueError, TypeError): @@ -502,7 +504,7 @@ class Accuracy(Float): """ @staticmethod - def is_valid(value, sanitize=False): + def is_valid(value: float, sanitize: bool = False) -> bool: if sanitize: value = Accuracy.sanitize(value) if value is not None and value >= 0 and value <= 100: @@ -517,7 +519,7 @@ def is_valid(value, sanitize=False): return False @staticmethod - def sanitize(value): + def sanitize(value: float) -> Optional[float]: try: if isinstance(value, bool): return float(value) * 100 @@ -541,7 +543,7 @@ class FQDN(String): """ @staticmethod - def is_valid(value, sanitize=False): + def is_valid(value: str, sanitize: bool = False) -> bool: if sanitize: value = FQDN().sanitize(value) @@ -565,7 +567,7 @@ def is_valid(value, sanitize=False): return True @staticmethod - def sanitize(value): + def sanitize(value: str) -> Optional[str]: try: value = GenericType().sanitize(value) except ValueError: @@ -580,7 +582,7 @@ def sanitize(value): return @staticmethod - def to_ip(value): + def to_ip(value: str) -> Optional[str]: try: value = str(dns.resolver.query(value, 'A')[0]) except dns.resolver.NXDOMAIN: # domain not found @@ -597,7 +599,7 @@ class Integer(GenericType): """ @staticmethod - def is_valid(value, sanitize=False): + def is_valid(value: int, sanitize: bool = False) -> bool: if sanitize: value = Integer().sanitize(value) if value is not None: @@ -612,7 +614,7 @@ def is_valid(value, sanitize=False): return False @staticmethod - def sanitize(value): + def sanitize(value: int) -> Optional[int]: try: return int(value) except (ValueError, TypeError): @@ -630,14 +632,14 @@ class ASN(Integer): > reserved and should not be used by operators. """ @staticmethod - def check_asn(value): + def check_asn(value: int) -> bool: if 0 < value <= 4294967295: return True else: return False @staticmethod - def is_valid(value, sanitize=False): + def is_valid(value: int, sanitize: bool = False) -> bool: if sanitize: value = ASN().sanitize(value) if not Integer.is_valid(value): @@ -647,7 +649,7 @@ def is_valid(value, sanitize=False): return True @staticmethod - def sanitize(value): + def sanitize(value: int) -> Optional[int]: if isinstance(value, str) and value.lower().startswith('as'): value = value[2:] value = Integer.sanitize(value) @@ -665,7 +667,7 @@ class IPAddress(String): """ @staticmethod - def is_valid(value, sanitize=False): + def is_valid(value: str, sanitize: bool = False) -> bool: if sanitize: value = IPAddress().sanitize(value) @@ -683,7 +685,7 @@ def is_valid(value, sanitize=False): return True @staticmethod - def sanitize(value): + def sanitize(value: str) -> Optional[str]: try: value = GenericType().sanitize(value) @@ -712,7 +714,7 @@ def sanitize(value): return GenericType().sanitize(value) @staticmethod - def to_int(value): + def to_int(value: str) -> Optional[int]: try: ip_integer = socket.inet_pton(socket.AF_INET, value) except socket.error: @@ -725,11 +727,11 @@ def to_int(value): return ip_integer @staticmethod - def version(value): + def version(value: str) -> int: return ipaddress.ip_address(value).version @staticmethod - def to_reverse(ip_addr): + def to_reverse(ip_addr: str) -> str: return str(dns.reversename.from_address(ip_addr)) @@ -744,7 +746,7 @@ class IPNetwork(String): """ @staticmethod - def is_valid(value, sanitize=False): + def is_valid(value: str, sanitize: bool = False) -> bool: if sanitize: value = IPNetwork().sanitize(value) @@ -759,7 +761,7 @@ def is_valid(value, sanitize=False): return True @staticmethod - def sanitize(value): + def sanitize(value: str) -> Optional[str]: try: value = GenericType().sanitize(value) @@ -770,7 +772,7 @@ def sanitize(value): return GenericType().sanitize(value) @staticmethod - def version(value): + def version(value: str) -> int: return ipaddress.ip_network(str(value)).version @@ -784,7 +786,7 @@ class JSON(String): """ @staticmethod - def is_valid(value, sanitize=False): + def is_valid(value: str, sanitize: bool = False) -> bool: if sanitize: value = JSON().sanitize(value) @@ -799,7 +801,7 @@ def is_valid(value, sanitize=False): return True @staticmethod - def sanitize(value): + def sanitize(value: str) -> Optional[str]: if value is None: return None if isinstance(value, (str, bytes)): @@ -822,7 +824,7 @@ class JSONDict(JSON): """ @staticmethod - def is_valid(value, sanitize=False): + def is_valid(value: str, sanitize: bool = False) -> bool: if sanitize: value = JSONDict().sanitize(value) @@ -840,11 +842,11 @@ def is_valid(value, sanitize=False): return False @staticmethod - def is_valid_subitem(value): + def is_valid_subitem(value: str) -> bool: return True @staticmethod - def sanitize(value): + def sanitize(value: str) -> Optional[str]: if not value: return None if isinstance(value, (str, bytes)): @@ -857,7 +859,7 @@ def sanitize(value): return None @staticmethod - def sanitize_subitem(value): + def sanitize_subitem(value: str) -> str: return value @@ -869,7 +871,7 @@ class LowercaseString(String): """ @staticmethod - def is_valid(value, sanitize=False): + def is_valid(value: str, sanitize: bool = False) -> bool: if sanitize: value = String().sanitize(value) value = LowercaseString().sanitize(value) @@ -883,7 +885,7 @@ def is_valid(value, sanitize=False): return True @staticmethod - def sanitize(value): + def sanitize(value: str) -> Optional[bool]: try: value = value.lower() except AttributeError: # None @@ -902,7 +904,7 @@ class URL(String): """ @staticmethod - def is_valid(value, sanitize=False): + def is_valid(value: str, sanitize: bool = False) -> bool: if sanitize: value = URL().sanitize(value) @@ -916,7 +918,7 @@ def is_valid(value, sanitize=False): return True @staticmethod - def sanitize(value): + def sanitize(value: str) -> Optional[str]: value = GenericType().sanitize(value) if not value: return @@ -936,14 +938,14 @@ def sanitize(value): return value @staticmethod - def to_ip(url): + def to_ip(url: str) -> Optional[str]: value = parse.urlsplit(url) if value.netloc != "": return FQDN().to_ip(value.netloc) return None @staticmethod - def to_domain_name(url): + def to_domain_name(url: str) -> Optional[str]: value = parse.urlsplit(url) if value.netloc != "" and not IPAddress.is_valid(value.netloc): return value.netloc @@ -958,7 +960,7 @@ class UppercaseString(String): """ @staticmethod - def is_valid(value, sanitize=False): + def is_valid(value: str, sanitize: bool = False) -> bool: if sanitize: value = UppercaseString().sanitize(value) @@ -971,7 +973,7 @@ def is_valid(value, sanitize=False): return True @staticmethod - def sanitize(value): + def sanitize(value: str) -> Optional[str]: try: value = value.upper() except AttributeError: # None @@ -989,7 +991,7 @@ class Registry(UppercaseString): ENUM = ['AFRINIC', 'APNIC', 'ARIN', 'LACNIC', 'RIPE'] @staticmethod - def is_valid(value, sanitize=False): + def is_valid(value: str, sanitize: bool = False) -> bool: if sanitize: value = Registry.sanitize(value) @@ -1002,7 +1004,7 @@ def is_valid(value, sanitize=False): return True @staticmethod - def sanitize(value): + def sanitize(value: str) -> str: value = UppercaseString.sanitize(value) if value in ['RIPENCC', 'RIPE-NCC']: value = 'RIPE' @@ -1021,7 +1023,7 @@ class TLP(UppercaseString): prefix_pattern = re.compile(r'^(TLP:?)?\s*', flags=re.IGNORECASE) @staticmethod - def is_valid(value, sanitize=False): + def is_valid(value: str, sanitize: bool = False) -> bool: if sanitize: value = TLP.sanitize(value) @@ -1034,7 +1036,7 @@ def is_valid(value, sanitize=False): return True @staticmethod - def sanitize(value): + def sanitize(value: str) -> Optional[str]: value = UppercaseString.sanitize(value) if value: value = TLP.prefix_pattern.sub('', value) diff --git a/intelmq/lib/message.py b/intelmq/lib/message.py index 485a3b832..228f062a8 100644 --- a/intelmq/lib/message.py +++ b/intelmq/lib/message.py @@ -9,7 +9,7 @@ import re import warnings from collections import defaultdict -from typing import Any, Dict, Optional, Sequence, Union +from typing import Any, Dict, Iterable, Optional, Sequence, Union import intelmq.lib.exceptions as exceptions import intelmq.lib.harmonization @@ -109,7 +109,7 @@ def __init__(self, message: Union[dict, tuple] = (), auto: bool = False, if (classname == 'event' and 'extra' in self.harmonization_config and self.harmonization_config['extra']['type'] == 'JSON'): warnings.warn("Assuming harmonization type 'JSONDict' for harmonization field 'extra'. " - "This assumption will be removed in version 2.0.", DeprecationWarning) + "This assumption will be removed in version 3.0.", DeprecationWarning) self.harmonization_config['extra']['type'] = 'JSONDict' for harm_key in self.harmonization_config.keys(): if not re.match('^[a-z_](.[a-z_0-9]+)*$', harm_key) and harm_key != '__type': @@ -374,7 +374,7 @@ def __get_type_config(self, key: str): def __hash__(self): return int(self.hash(), 16) - def hash(self, *, filter_keys=frozenset(), filter_type="blacklist"): + def hash(self, *, filter_keys: Iterable = frozenset(), filter_type: str = "blacklist"): """Return a SHA256 hash of the message as a hexadecimal string. The hash is computed over almost all key/value pairs. Depending on filter_type parameter (blacklist or whitelist), the keys defined in @@ -468,11 +468,12 @@ def to_dict(self, hierarchical: bool = False, with_type: bool = False, def to_json(self, hierarchical=False, with_type=False, jsondict_as_string=False): json_dict = self.to_dict(hierarchical=hierarchical, with_type=with_type) - return json.dumps(json_dict, ensure_ascii=False) + return json.dumps(json_dict, ensure_ascii=False, sort_keys=True) - def __eq__(self, other) -> bool: + def __eq__(self, other: dict) -> bool: """ - Necessary as we have an additional member harmonization_config and types. + Wrapper is necessary as we have additional members + harmonization_config and types. The additional checks are only performed for subclasses of Message. Comparison with other types e.g. dicts does not check the harmonization_config. @@ -487,7 +488,7 @@ def __eq__(self, other) -> bool: return True return False - def __ne__(self, other) -> bool: + def __ne__(self, other: dict) -> bool: return not self.__eq__(other) def set_default_value(self, value: Any = None): @@ -497,7 +498,7 @@ def set_default_value(self, value: Any = None): self._default_value_set = True self.default_value = value - def __contains__(self, item) -> bool: + def __contains__(self, item: str) -> bool: if item == 'extra': return 'extra' in self.to_dict(hierarchical=True) return super().__contains__(item) diff --git a/intelmq/lib/pipeline.py b/intelmq/lib/pipeline.py index 513f71a4e..dc2e2e127 100644 --- a/intelmq/lib/pipeline.py +++ b/intelmq/lib/pipeline.py @@ -2,7 +2,7 @@ import time import warnings from itertools import chain -from typing import Optional, Union +from typing import Dict, Optional, Union import ssl import redis @@ -78,7 +78,7 @@ def connect(self): def disconnect(self): raise NotImplementedError - def set_queues(self, queues, queues_type): + def set_queues(self, queues: Optional[str], queues_type: str): """ :param queues: For source queue, it's just string. For destination queue, it can be one of the following: @@ -113,7 +113,8 @@ def set_queues(self, queues, queues_type): else: raise exceptions.InvalidArgument('queues_type', got=queues_type, expected=['source', 'destination']) - def send(self, message, path="_default", path_permissive=False): + def send(self, message: str, path: str = "_default", + path_permissive: bool = False): raise NotImplementedError def receive(self) -> str: @@ -123,12 +124,28 @@ def receive(self) -> str: retval = self._receive() self._has_message = True - return retval + return utils.decode(retval) - def _receive(self) -> str: + def _receive(self) -> bytes: raise NotImplementedError def acknowledge(self): + """ + Acknowledge/delete the current message from the source queue + + Parameters + ---------- + + Raises + ------ + exceptions + exceptions.PipelineError: If no message is held + + Returns + ------- + None. + + """ if not self._has_message: raise exceptions.PipelineError("No message to acknowledge.") self._acknowledge() @@ -204,7 +221,8 @@ def set_queues(self, queues, queues_type): self.load_configurations(queues_type) super().set_queues(queues, queues_type) - def send(self, message, path="_default", path_permissive=False): + def send(self, message: str, path: str = "_default", + path_permissive: bool = False): if path not in self.destination_queues and path_permissive: return @@ -232,7 +250,7 @@ def send(self, message, path="_default", path_permissive=False): 'Look at redis\'s logs.') raise exceptions.PipelineError(exc) - def _receive(self) -> str: + def _receive(self) -> bytes: if self.source_queue is None: raise exceptions.ConfigurationError('pipeline', 'No source queue given.') try: @@ -246,9 +264,10 @@ def _receive(self) -> str: if not retval: retval = self.pipe.brpoplpush(self.source_queue, self.internal_queue, 0) - return utils.decode(retval) except Exception as exc: raise exceptions.PipelineError(exc) + else: + return retval def _acknowledge(self): try: @@ -257,7 +276,7 @@ def _acknowledge(self): raise exceptions.PipelineError(e) else: if not retval: - raise exceptions.PipelineError("Could not pop message from internal queue" + raise exceptions.PipelineError("Could not pop message from internal queue " "for acknowledgement. Return value was %r." "" % retval) @@ -319,9 +338,6 @@ def connect(self): def disconnect(self): pass - def sleep(self, interval): - warnings.warn("'Pipeline.sleep' will be removed in version 2.0.", DeprecationWarning) - def set_queues(self, queues, queues_type): super().set_queues(queues, queues_type) self.state[self.internal_queue] = [] @@ -329,7 +345,8 @@ def set_queues(self, queues, queues_type): for destination_queue in chain.from_iterable(self.destination_queues.values()): self.state[destination_queue] = [] - def send(self, message, path="_default", path_permissive=False): + def send(self, message: str, path: str = "_default", + path_permissive: bool = False): """Sends a message to the destination queues""" if path not in self.destination_queues and path_permissive: return @@ -340,23 +357,22 @@ def send(self, message, path="_default", path_permissive=False): else: self.state[destination_queue] = [utils.encode(message)] - def _receive(self) -> str: + def _receive(self) -> bytes: """ Receives the last not yet acknowledged message. Does not block unlike the other pipelines. """ - if len(self.state.get(self.internal_queue, [])) > 0: - return utils.decode(self.state[self.internal_queue].pop(0)) - - first_msg = self.state[self.source_queue].pop(0) + if len(self.state[self.internal_queue]) > 0: + return utils.decode(self.state[self.internal_queue][0]) - if self.internal_queue in self.state: - self.state[self.internal_queue].append(first_msg) - else: - self.state[self.internal_queue] = [first_msg] + try: + first_msg = self.state[self.source_queue].pop(0) + except IndexError as exc: + raise exceptions.PipelineError(exc) + self.state[self.internal_queue].append(first_msg) - return utils.decode(first_msg) + return first_msg def _acknowledge(self): """Removes a message from the internal queue and returns it""" @@ -501,6 +517,9 @@ def _send(self, destination_queue, message, reconnect=True): ) except Exception as exc: # UnroutableError, NackError in 1.0.0 if reconnect and isinstance(exc, pika.exceptions.ConnectionClosed): + self.logger.debug('Error sending the message. ' + 'Will re-connect and re-send.', + exc_info=True) self.connect() self._send(destination_queue, message, reconnect=False) else: @@ -509,7 +528,8 @@ def _send(self, destination_queue, message, reconnect=True): if not self.publish_raises_nack and not retval: raise exceptions.PipelineError('Sent message was not confirmed.') - def send(self, message: str, path="_default", path_permissive=False) -> None: + def send(self, message: str, path: str = "_default", + path_permissive: bool = False): """ In principle we could use AMQP's exchanges here but that architecture is incompatible to the format of our pipeline.conf file. @@ -531,25 +551,31 @@ def send(self, message: str, path="_default", path_permissive=False) -> None: for destination_queue in queues: self._send(destination_queue, message) - def _receive(self) -> str: + def _receive(self) -> bytes: if self.source_queue is None: raise exceptions.ConfigurationError('pipeline', 'No source queue given.') try: method, header, body = next(self.channel.consume(self.source_queue)) if method: self.delivery_tag = method.delivery_tag - return utils.decode(body) except Exception as exc: raise exceptions.PipelineError(exc) + else: + return body def _acknowledge(self): try: self.channel.basic_ack(delivery_tag=self.delivery_tag) except pika.exceptions.ConnectionClosed: + self.logger.debug('Error sending the message. ' + 'Will re-connect and re-send.', + exc_info=True) self.connect() self.channel.basic_ack(delivery_tag=self.delivery_tag) except Exception as e: raise exceptions.PipelineError(e) + else: + self.delivery_tag = None def _get_queues(self) -> dict: if self.username and self.password: @@ -585,7 +611,7 @@ def count_queued_messages(self, *queues) -> dict: def clear_queue(self, queue: str) -> bool: try: self.channel.queue_delete(queue=queue) - except pika.exceptions.ChannelClosed as exc: # channel not found and similar + except pika.exceptions.ChannelClosed: # channel not found and similar pass def nonempty_queues(self) -> set: diff --git a/intelmq/lib/test.py b/intelmq/lib/test.py index 68892e1cd..c656554b4 100644 --- a/intelmq/lib/test.py +++ b/intelmq/lib/test.py @@ -5,15 +5,12 @@ The BotTestCase can be used as base class for unittests on bots. It includes some basic generic tests (logged errors, correct pipeline setup). """ -import copy import io import json -import logging import os import re import unittest import unittest.mock as mock -import sys from itertools import chain import pkg_resources @@ -22,24 +19,27 @@ import intelmq.lib.message as message import intelmq.lib.pipeline as pipeline import intelmq.lib.utils as utils -from intelmq import CONFIG_DIR, PIPELINE_CONF_FILE, RUNTIME_CONF_FILE +from intelmq import CONFIG_DIR, PIPELINE_CONF_FILE, RUNTIME_CONF_FILE, DEFAULTS_CONF_FILE __all__ = ['BotTestCase'] -BOT_CONFIG = {"http_proxy": None, - "https_proxy": None, - "broker": "pythonlist", - "rate_limit": 0, - "retry_delay": 0, - "error_retry_delay": 0, - "error_max_retries": 0, - "redis_cache_host": "localhost", - "redis_cache_port": 6379, - "redis_cache_db": 4, - "redis_cache_ttl": 10, - "redis_cache_password": os.environ.get('INTELMQ_TEST_REDIS_PASSWORD'), - "testing": True, - } +BOT_CONFIG = utils.load_configuration(pkg_resources.resource_filename('intelmq', + 'etc/defaults.conf')) +BOT_CONFIG.update({"destination_pipeline_broker": "pythonlist", + "logging_handler": "stream", + "logging_path": None, + "rate_limit": 0, + "retry_delay": 0, + "error_retry_delay": 0, + "error_max_retries": 0, + "redis_cache_host": "localhost", + "redis_cache_port": 6379, + "redis_cache_db": 4, + "redis_cache_ttl": 10, + "redis_cache_password": os.environ.get('INTELMQ_TEST_REDIS_PASSWORD'), + "source_pipeline_broker": "pythonlist", + "testing": True, + }) class Parameters(object): @@ -53,14 +53,14 @@ def mocked(conf_file): "destination-queues": dst_names}, } elif conf_file == RUNTIME_CONF_FILE: - conf = BOT_CONFIG.copy() - conf.update(sysconfig) return {bot_id: {'description': 'Instance of a bot for automated unit tests.', 'group': group, 'module': module, 'name': 'Test Bot', - 'parameters': conf, + 'parameters': sysconfig, }} + elif conf_file == DEFAULTS_CONF_FILE: + return BOT_CONFIG elif conf_file.startswith(CONFIG_DIR): confname = os.path.join('etc/', os.path.split(conf_file)[-1]) fname = pkg_resources.resource_filename('intelmq', @@ -73,16 +73,6 @@ def mocked(conf_file): return mocked -def mocked_logger(logger): - def log(name, log_path=None, log_level=None, stream=None, syslog=None): - # Return a copy as the bot may modify the logger and we should always return the intial logger - logger_new = copy.copy(logger) - logger_new.setLevel(log_level) - return logger_new - - return log - - def skip_database(): return unittest.skipUnless(os.environ.get('INTELMQ_TEST_DATABASES'), 'Skipping database tests.') @@ -129,6 +119,10 @@ def setUpClass(cls): """ Set default values and save original functions. """ + if not utils.drop_privileges(): + raise ValueError('IntelMQ and IntelMQ tests must not run as root for security reasons. ' + 'Dropping privileges did not work.') + cls.bot_id = 'test-bot' cls.bot_name = None cls.bot = None @@ -184,6 +178,12 @@ def new_report(self, auto=False, examples=False): def new_event(self): return message.Event(harmonization=self.harmonization) + def get_mocked_logger(self, logger): + def log(name, *args, **kwargs): + logger.handlers = self.logger_handlers_backup + return logger + return log + def prepare_bot(self, parameters={}, destination_queues=None): """ Reconfigures the bot with the changed attributes. @@ -213,16 +213,15 @@ def prepare_bot(self, parameters={}, destination_queues=None): module=self.bot_reference.__module__, ) - logger = logging.getLogger(self.bot_id) - logger.setLevel("INFO") - console_formatter = logging.Formatter(utils.LOG_FORMAT) - console_handler = logging.StreamHandler(self.log_stream) - console_handler.setFormatter(console_formatter) - logger.addHandler(console_handler) - self.mocked_log = mocked_logger(logger) - logging.captureWarnings(True) - warnings_logger = logging.getLogger("py.warnings") - warnings_logger.addHandler(console_handler) + self.resulting_config = BOT_CONFIG.copy() + self.resulting_config.update(self.sysconfig) + self.resulting_config.update(parameters) + + self.logger = utils.log(self.bot_id, + log_path=False, stream=self.log_stream, + log_format_stream=utils.LOG_FORMAT, + log_level=self.resulting_config['logging_level']) + self.logger_handlers_backup = self.logger.handlers parameters = Parameters() setattr(parameters, 'source_queue', src_name) @@ -230,16 +229,16 @@ def prepare_bot(self, parameters={}, destination_queues=None): with mock.patch('intelmq.lib.utils.load_configuration', new=self.mocked_config): - with mock.patch('intelmq.lib.utils.log', self.mocked_log): + with mock.patch('intelmq.lib.utils.log', self.get_mocked_logger(self.logger)): self.bot = self.bot_reference(self.bot_id) self.bot._Bot__stats_cache = None - self.pipe = pipeline.Pythonlist(parameters, logger=logger, bot=self.bot) + self.pipe = pipeline.Pythonlist(parameters, logger=self.logger, bot=self.bot) self.pipe.set_queues(parameters.source_queue, "source") self.pipe.set_queues(parameters.destination_queues, "destination") if self.input_message is not None: - if type(self.input_message) is not list: + if not isinstance(self.input_message, (list, tuple)): self.input_message = [self.input_message] self.input_queue = [] for msg in self.input_message: @@ -255,23 +254,32 @@ def prepare_bot(self, parameters={}, destination_queues=None): self.input_queue = [self.default_input_message] def run_bot(self, iterations: int = 1, error_on_pipeline: bool = False, - prepare=True, parameters={}): + prepare=True, parameters={}, + allowed_error_count=0, + allowed_warning_count=0): """ Call this method for actually doing a test run for the specified bot. Parameters: iterations: Bot instance will be run the given times, defaults to 1. parameters: passed to prepare_bot + allowed_error_count: maximum number allow allowed errors in the logs + allowed_warning_count: maximum number allow allowed warnings in the logs """ if prepare: self.prepare_bot(parameters=parameters) + elif parameters: + raise ValueError("Parameter 'parameters' is given, but parameter " + "'prepare' is false. Parameters must be passed on " + "to 'prepare_bot' to be effective.") with mock.patch('intelmq.lib.utils.load_configuration', new=self.mocked_config): - with mock.patch('intelmq.lib.utils.log', self.mocked_log): + with mock.patch('intelmq.lib.utils.log', self.get_mocked_logger(self.logger)): for run in range(iterations): self.bot.start(error_on_pipeline=error_on_pipeline, source_pipeline=self.pipe, destination_pipeline=self.pipe) + self.bot.stop(exitcode=0) self.loglines_buffer = self.log_stream.getvalue() self.loglines = self.loglines_buffer.splitlines() @@ -308,8 +316,11 @@ def run_bot(self, iterations: int = 1, error_on_pipeline: bool = False, self.bot_id), "INFO") self.assertRegexpMatchesLog("INFO - Bot is starting.") self.assertLoglineEqual(-1, "Bot stopped.", "INFO") - self.assertNotRegexpMatchesLog("(ERROR.*?){%d}" % (self.allowed_error_count + 1)) - self.assertNotRegexpMatchesLog("(WARNING.*?){%d}" % (self.allowed_warning_count + 1)) + + allowed_error_count = max(allowed_error_count, self.allowed_error_count) + self.assertLessEqual(len(re.findall(' - ERROR - ', self.loglines_buffer)), allowed_error_count) + allowed_warning_count = max(allowed_warning_count, self.allowed_warning_count) + self.assertLessEqual(len(re.findall(' - WARNING - ', self.loglines_buffer)), allowed_warning_count) self.assertNotRegexpMatchesLog("CRITICAL") """ If no error happened (incl. tracebacks) we can check for formatting """ if not self.allowed_error_count: @@ -356,7 +367,6 @@ def test_bot_name(self): """ Test if Bot has a valid name. Must be CamelCase and end with CollectorBot etc. - Test class name must be Test{botclassname} """ counter = 0 for type_name, type_match in self.bot_types.items(): @@ -369,9 +379,6 @@ def test_bot_name(self): self.fail("Bot name {!r} does not match one of {!r}" "".format(self.bot_name, list(self.bot_types.values()))) # pragma: no cover - self.assertEqual('Test{}'.format(self.bot_name), - self.__class__.__name__.split('_')[0]) - def assertAnyLoglineEqual(self, message: str, levelname: str = "ERROR"): """ Asserts if any logline matches a specific requirement. @@ -403,9 +410,6 @@ def assertLoglineEqual(self, line_no: int, message: str, levelname: str = "ERROR message: Message text which is compared levelname: Log level of logline which is asserted """ - if sys.version_info >= (3, 7): - return True - self.assertIsNotNone(self.loglines) logline = self.loglines[line_no] fields = utils.parse_logline(logline) @@ -446,9 +450,6 @@ def assertLogMatches(self, pattern: str, levelname: str = "ERROR"): pattern: Message text which is compared, regular expression. levelname: Log level of the logline which is asserted, upper case. """ - if sys.version_info >= (3, 7): - return True - self.assertIsNotNone(self.loglines) for logline in self.loglines: fields = utils.parse_logline(logline) @@ -508,5 +509,7 @@ def assertMessageEqual(self, queue_pos, expected_msg, compare_raw=True, path="_d def tearDown(self): """ Check if the bot did consume all messages. + + Execued after every test run. """ self.assertEqual(len(self.input_queue), 0) diff --git a/intelmq/lib/upgrades.py b/intelmq/lib/upgrades.py index aa5266706..eb559edb4 100644 --- a/intelmq/lib/upgrades.py +++ b/intelmq/lib/upgrades.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- """ -© 2019 Sebastian Wagner +© 2020 Sebastian Wagner SPDX-License-Identifier: AGPL-3.0 """ @@ -20,6 +20,11 @@ 'v111_defaults_process_manager', 'v202_fixes', 'v210_deprecations', + 'v213_deprecations', + 'v213_feed_changes', + 'v220_configuration', + 'v220_azure_collector', + 'v220_feed_changes', ] @@ -220,7 +225,8 @@ def v111_defaults_process_manager(defaults, runtime, harmonization, dry_run): def v202_fixes(defaults, runtime, harmonization, dry_run): """ - Migrating collector parameter `feed` to `name`. RIPE expert set: `query_ripe_stat_ip` with `query_ripe_stat_asn` as default + Migrate Collector parameter `feed` to `name`. RIPE expert set `query_ripe_stat_ip` with `query_ripe_stat_asn` as default. + Set cymru whois expert `overwrite` to true. """ changed = None for bot_id, bot in runtime.items(): @@ -276,6 +282,60 @@ def v210_deprecations(defaults, runtime, harmonization, dry_run): return changed, defaults, runtime, harmonization +def v213_deprecations(defaults, runtime, harmonization, dry_run): + """ + migrate attach_unzip to extract_files for mail attachment collector + + """ + changed = None + for bot_id, bot in runtime.items(): + if bot["module"] == "intelmq.bots.collectors.mail.collector_mail_attach": + if "attach_unzip" not in bot["parameters"]: + continue + if "extract_files" in bot["parameters"] and "attach_unzip" in bot["parameters"]: + del bot["parameters"]["attach_unzip"] + changed = True + elif "extract_files" not in bot["parameters"] and "attach_unzip" in bot["parameters"]: + bot["parameters"]["extract_files"] = bot["parameters"]["attach_unzip"] + del bot["parameters"]["attach_unzip"] + changed = True + return changed, defaults, runtime, harmonization + + +def v220_configuration(defaults, runtime, harmonization, dry_run): + """ + Migrating configuration + """ + changed = None + for bot_id, bot in runtime.items(): + if bot["module"] == "intelmq.bots.collectors.misp.collector": + if "misp_verify" not in bot["parameters"]: + continue + if bot["parameters"]["misp_verify"] != defaults["http_verify_cert"]: + bot["parameters"]["http_verify_cert"] = bot["parameters"]["misp_verify"] + del bot["parameters"]["misp_verify"] + changed = True + elif bot["module"] == "intelmq.bots.outputs.elasticsearch.output": + if "elastic_doctype" in bot["parameters"]: + del bot["parameters"]["elastic_doctype"] + return changed, defaults, runtime, harmonization + + +def v220_azure_collector(defaults, runtime, harmonization, dry_run): + """ + Checking for the Microsoft Azure collector + """ + changed = None + for bot_id, bot in runtime.items(): + if bot["module"] == "intelmq.bots.collectors.microsoft.collector_azure": + if "connection_string" not in bot["parameters"]: + changed = ("The Microsoft Azure collector changed backwards-" + "incompatible in IntelMQ 2.2.0. Look at the bot's " + "documentation and NEWS file to adapt the " + "configuration.") + return changed, defaults, runtime, harmonization + + def harmonization(defaults, runtime, harmonization, dry_run): """ Checks if all harmonization fields and types are correct @@ -296,9 +356,112 @@ def harmonization(defaults, runtime, harmonization, dry_run): if harmonization[msg_type][fieldname]['type'] != original[msg_type][fieldname]['type']: harmonization[msg_type][fieldname]['type'] = original[msg_type][fieldname]['type'] changed = True + installed_regex = harmonization[msg_type][fieldname].get('regex') + original_regex = original[msg_type][fieldname].get('regex') + if original_regex and original_regex != installed_regex: + harmonization[msg_type][fieldname]['regex'] = original[msg_type][fieldname]['regex'] + changed = True + installed_regex = harmonization[msg_type][fieldname].get('iregex') + original_regex = original[msg_type][fieldname].get('iregex') + if original_regex and original_regex != installed_regex: + harmonization[msg_type][fieldname]['iregex'] = original[msg_type][fieldname]['iregex'] + changed = True return changed, defaults, runtime, harmonization +def v213_feed_changes(defaults, runtime, harmonization, dry_run): + """ + Migrates feed configuration for changed feed parameters. + """ + found_zeus = [] + found_bitcash = [] + found_ddos_attack = [] + found_ransomware = [] + found_bambenek = [] + found_nothink = [] + found_nothink_parser = [] + changed = None + messages = [] + for bot_id, bot in runtime.items(): + if bot["module"] == "intelmq.bots.collectors.http.collector_http": + if "http_url" not in bot["parameters"]: + continue + if bot["parameters"]["http_url"] == 'https://www.tc.edu.tw/net/netflow/lkout/recent/30': + bot["parameters"]["http_url"] = "https://www.tc.edu.tw/net/netflow/lkout/recent/" + changed = True + if bot["parameters"]["http_url"].startswith("https://zeustracker.abuse.ch/"): + found_zeus.append(bot_id) + elif bot["parameters"]["http_url"].startswith("https://bitcash.cz/misc/log/blacklist"): + found_bitcash.append(bot_id) + elif bot["parameters"]["http_url"].startswith("https://ransomwaretracker.abuse.ch/feeds/csv/"): + found_ransomware.append(bot_id) + elif bot["parameters"]["http_url"] == "https://osint.bambenekconsulting.com/feeds/dga-feed.txt": + bot["parameters"]["http_url"] = "https://faf.bambenekconsulting.com/feeds/dga-feed.txt" + changed = True + elif bot["parameters"]["http_url"] in ("http://osing.bambenekconsulting.com/feeds/dga/c2-ipmasterlist.txt", + "https://osing.bambenekconsulting.com/feeds/dga/c2-ipmasterlist.txt", + "http://osint.bambenekconsulting.com/feeds/c2-dommasterlist.txt", + "https://osint.bambenekconsulting.com/feeds/c2-dommasterlist.txt"): + found_bambenek.append(bot_id) + elif (bot["parameters"]["http_url"].startswith("http://www.nothink.org/") or + bot["parameters"]["http_url"].startswith("https://www.nothink.org/")): + found_nothink.append(bot_id) + elif bot["module"] == "intelmq.bots.collectors.http.collector_http_stream": + if bot["parameters"].get("http_url", "").startswith("https://feed.caad.fkie.fraunhofer.de/ddosattackfeed"): + found_ddos_attack.append(bot_id) + elif bot['module'] == "intelmq.bots.parsers.nothink.parser": + found_nothink_parser.append(bot_id) + if found_zeus: + messages.append('A discontinued feed "Zeus Tracker" has been found ' + 'as bot %s.' % ', '.join(sorted(found_zeus))) + if found_bitcash: + messages.append('The discontinued feed "Bitcash.cz" has been found ' + 'as bot %s.' % ', '.join(sorted(found_bitcash))) + if found_ddos_attack: + messages.append('The discontinued feed "Fraunhofer DDos Attack" has been found ' + 'as bot %s.' % ', '.join(sorted(found_ddos_attack))) + if found_ransomware: + messages.append('The discontinued feed "Abuse.ch Ransomware Tracker" has been found ' + 'as bot %s.' % ', '.join(sorted(found_ransomware))) + if found_bambenek: + messages.append('Many Bambenek feeds now require a license, see https://osint.bambenekconsulting.com/feeds/' + ' potentially affected bots are %s.' % ', '.join(sorted(found_bambenek))) + if found_nothink: + messages.append('All Nothink Honeypot feeds are discontinued, ' + 'potentially affected bots are %s.' % ', '.join(sorted(found_nothink))) + if found_nothink_parser: + messages.append('The Nothink Parser has been removed, ' + 'affected bots are %s.' % ', '.join(sorted(found_nothink_parser))) + messages = ' '.join(messages) + return messages + ' Remove affected bots yourself.' if messages else changed, defaults, runtime, harmonization + + +def v220_feed_changes(defaults, runtime, harmonization, dry_run): + """ + Migrates feed configuration for changed feed parameters. + """ + found_urlvir_feed = [] + found_urlvir_parser = [] + changed = None + messages = [] + for bot_id, bot in runtime.items(): + if bot["module"] == "intelmq.bots.collectors.http.collector_http": + if "http_url" not in bot["parameters"]: + continue + if bot["parameters"]["http_url"].startswith("http://www.urlvir.com/export-"): + found_urlvir_feed.append(bot_id) + elif bot['module'] == "intelmq.bots.parsers.urlvir.parser": + found_urlvir_parser.append(bot_id) + if found_urlvir_feed: + messages.append('A discontinued feed "URLVir" has been found ' + 'as bot %s.' % ', '.join(sorted(found_urlvir_feed))) + if found_urlvir_parser: + messages.append('The removed parser "URLVir" has been found ' + 'as bot %s.' % ', '.join(sorted(found_urlvir_parser))) + messages = ' '.join(messages) + return messages + ' Remove affected bots yourself.' if messages else changed, defaults, runtime, harmonization + + UPGRADES = OrderedDict([ ((1, 0, 0, 'dev7'), (v100_dev7_modify_syntax, )), ((1, 1, 0), (v110_shadowserver_feednames, v110_deprecations)), @@ -310,6 +473,10 @@ def harmonization(defaults, runtime, harmonization, dry_run): ((2, 0, 2), (v202_fixes, )), ((2, 1, 0), (v210_deprecations, )), ((2, 1, 1), ()), + ((2, 1, 2), ()), + ((2, 1, 3), (v213_deprecations, v213_feed_changes)), + ((2, 1, 4), ()), + ((2, 2, 0), (v220_configuration, v220_azure_collector, v220_feed_changes)), ]) ALWAYS = (harmonization, ) diff --git a/intelmq/lib/utils.py b/intelmq/lib/utils.py index 970a48d6b..38a617c80 100644 --- a/intelmq/lib/utils.py +++ b/intelmq/lib/utils.py @@ -28,12 +28,14 @@ import tarfile import traceback import zipfile -from typing import Any, Generator, Iterator, Optional, Sequence, Union +from typing import Any, Dict, Generator, Iterator, Optional, Sequence, Union import dateutil.parser from dateutil.relativedelta import relativedelta +from termstyle import red import intelmq +from intelmq.lib.exceptions import DecodingError __all__ = ['base64_decode', 'base64_encode', 'decode', 'encode', 'load_configuration', 'load_parameters', 'log', 'parse_logline', @@ -83,22 +85,22 @@ def decode(text: Union[bytes, str], encodings: Sequence[str] = ("utf-8",), """ if isinstance(text, str): return text + exception = None for encoding in encodings: try: return str(text.decode(encoding)) - except ValueError: - pass + except ValueError as exc: + exception = exc if force: for encoding in encodings: try: return str(text.decode(encoding, 'ignore')) - except ValueError: - pass + except ValueError as exc: + exception = exc - raise ValueError("Could not decode string with given encodings{!r}" - ".".format(encodings)) + raise DecodingError(encodings=encodings, exception=exception, object=text) def encode(text: Union[bytes, str], encodings: Sequence[str] = ("utf-8",), @@ -165,7 +167,7 @@ def base64_encode(value: Union[bytes, str]) -> str: return decode(base64.b64encode(encode(value, force=True)), force=True) -def flatten_queues(queues) -> Iterator[str]: +def flatten_queues(queues: Union[list, Dict]) -> Iterator[str]: """ Assure that output value will be a flattened. @@ -232,6 +234,7 @@ def write_configuration(configuration_filepath: str, json.dump(content, fp=handle, indent=4, sort_keys=True, separators=(',', ': ')) + handle.write('\n') def load_parameters(*configs: dict) -> Parameters: @@ -252,6 +255,8 @@ def load_parameters(*configs: dict) -> Parameters: class FileHandler(logging.FileHandler): + shell_color_pattern = re.compile(r'\x1b\[\d+m') + def emit_print(self, record): print(record.msg, record.args) @@ -261,6 +266,13 @@ def handleError(self, record): self.emit = self.emit_print raise + def emit(self, record): + """ + Strips shell colorization from messages + """ + record.msg = self.shell_color_pattern.sub('', record.msg) + super().emit(record) + class StreamHandler(logging.StreamHandler): def emit(self, record): @@ -268,9 +280,10 @@ def emit(self, record): msg = self.format(record) if record.levelno < logging.WARNING: # debug, info stream = sys.stdout + stream.write(msg) else: # warning, error, critical stream = sys.stderr - stream.write(msg) + stream.write(red(msg)) stream.write(self.terminator) self.flush() except Exception: @@ -288,7 +301,7 @@ def emit(self, record): self.buffer.append((record.levelname.lower(), record.getMessage())) -def log(name: str, log_path: Union[str, bool] = intelmq.DEFAULT_LOGGING_PATH, log_level: str = "DEBUG", +def log(name: str, log_path: Union[str, bool] = intelmq.DEFAULT_LOGGING_PATH, log_level: str = intelmq.DEFAULT_LOGGING_LEVEL, stream: Optional[object] = None, syslog: Union[bool, str, list, tuple] = None, log_format_stream: str = LOG_FORMAT_STREAM, logging_level_stream: Optional[str] = None): @@ -300,7 +313,7 @@ def log(name: str, log_path: Union[str, bool] = intelmq.DEFAULT_LOGGING_PATH, lo name: filename for logfile or string preceding lines in stream log_path: Path to log directory, defaults to DEFAULT_LOGGING_PATH If False, nothing is logged to files. - log_level: default is "DEBUG" + log_level: default is %r stream: By default (None), stdout and stderr will be used depending on the level. If False, stream output is not used. For everything else, the argument is used as stream output. @@ -321,7 +334,7 @@ def log(name: str, log_path: Union[str, bool] = intelmq.DEFAULT_LOGGING_PATH, lo LOG_FORMAT: Default log format for file handler LOG_FORMAT_STREAM: Default log format for stream handler LOG_FORMAT_SYSLOG: Default log format for syslog - """ + """ % intelmq.DEFAULT_LOGGING_LEVEL logging.captureWarnings(True) warnings_logger = logging.getLogger("py.warnings") # set the name of the warnings logger to the bot neme, see #1184 @@ -344,8 +357,6 @@ def log(name: str, log_path: Union[str, bool] = intelmq.DEFAULT_LOGGING_PATH, lo handler = logging.handlers.SysLogHandler(address=syslog) handler.setLevel(log_level) handler.setFormatter(logging.Formatter(LOG_FORMAT_SYSLOG)) - else: - raise ValueError("Invalid configuration, neither log_path is given nor syslog is used.") if log_path or syslog: logger.addHandler(handler) @@ -589,7 +600,7 @@ def __next__(self): return self.current_line -def object_pair_hook_bots(*args, **kwargs): +def object_pair_hook_bots(*args, **kwargs) -> Dict: """ A object_pair_hook function for the BOTS file to be used in the json's dump functions. @@ -635,14 +646,15 @@ def drop_privileges() -> bool: try: os.setgid(grp.getgrnam('intelmq').gr_gid) os.setuid(pwd.getpwnam('intelmq').pw_uid) - except OSError: + except (OSError, KeyError): + # KeyError: User or group 'intelmq' does not exist return False if os.geteuid() != 0: # For the unprobably possibility that intelmq is root return True return False -def setup_list_logging(name='intelmq', logging_level='INFO'): +def setup_list_logging(name: str = 'intelmq', logging_level: str = 'INFO'): check_logger = logging.getLogger('check') # name does not matter list_handler = ListHandler() list_handler.setLevel('INFO') @@ -679,7 +691,7 @@ def version_smaller(version1: tuple, version2: tuple) -> Optional[bool]: return None -def lazy_int(value: Any) -> Any: +def lazy_int(value: Any) -> Optional[Any]: """ Tries to conver the value to int if possible. Original value otherwise """ diff --git a/intelmq/tests/assets/feeds.schema.json b/intelmq/tests/assets/feeds.schema.json index e0f93548b..1932adf0f 100644 --- a/intelmq/tests/assets/feeds.schema.json +++ b/intelmq/tests/assets/feeds.schema.json @@ -28,9 +28,9 @@ }, "revision": { "required": true, - "type": "string" + "type": "date" }, - "status": { + "public": { "required": true, "type": "boolean" }, diff --git a/intelmq/tests/bin/test_intelmqctl.py b/intelmq/tests/bin/test_intelmqctl.py index 6f8fb79a3..3424ca9ae 100644 --- a/intelmq/tests/bin/test_intelmqctl.py +++ b/intelmq/tests/bin/test_intelmqctl.py @@ -1,2 +1,42 @@ # -*- coding: utf-8 -*- -import intelmq.bin.intelmqctl +import unittest + +import intelmq.bin.intelmqctl as ctl + + +class TestIntelMQProcessManager(unittest.TestCase): + def test_interpret_commandline(self): + func = ctl.IntelMQProcessManager._interpret_commandline + self.assertTrue(func(1, ('/usr/bin/python3', '/usr/bin/intelmq.bots.collectors.http.collector_http', 'test-collector'), + 'intelmq.bots.collectors.http.collector_http', 'test-collector')) + self.assertTrue(func(1, ('/usr/bin/python3', '/usr/local/bin/intelmq.bots.collectors.http.collector_http', 'test-collector'), + 'intelmq.bots.collectors.http.collector_http', 'test-collector')) + self.assertFalse(func(1, ('/usr/bin/python3', '/usr/bin/intelmq.bots.collectors.http.collector_http', 'test-collector'), + 'intelmq.bots.collectors.http.collector_http', 'other-collector')) + + self.assertTrue(func(1, ('/usr/bin/python3', '/usr/bin/intelmqctl', 'run', 'test-collector'), + 'intelmq.bots.collectors.http.collector_http', 'test-collector')) + self.assertTrue(func(1, ('/usr/bin/python3', '/usr/local/bin/intelmqctl', 'run', 'test-collector'), + 'intelmq.bots.collectors.http.collector_http', 'test-collector')) + self.assertFalse(func(1, ('/usr/bin/python3', '/usr/bin/intelmqctl', 'run', 'test-collector'), + 'intelmq.bots.collectors.http.collector_http', 'other-collector')) + + self.assertIn('could not be interpreted', + func(1, ('/usr/bin/python3', '/usr/bin/intelmqctl', 'run'), + 'intelmq.bots.collectors.http.collector_http', 'other-collector')) + self.assertIn('could not be interpreted', + func(1, ('/usr/bin/python3', '/usr/bin/intelmqctl'), + 'intelmq.bots.collectors.http.collector_http', 'other-collector')) + self.assertIn('could not be interpreted', + func(1, ('/usr/bin/python3'), + 'intelmq.bots.collectors.http.collector_http', 'other-collector')) + self.assertIn('could not be interpreted', + func(1, ('/usr/bin/python3', '/usr/bin/intelmq.bots.collectors.http.collector_http'), + 'intelmq.bots.collectors.http.collector_http', 'test-collector')) + self.assertIn('error', + func(1, (), + 'intelmq.bots.collectors.http.collector_http', 'other-collector')) + + +if __name__ == '__main__': # pragma: nocover + unittest.main() diff --git a/intelmq/tests/bots/collectors/amqp/test_collector_amqp.py b/intelmq/tests/bots/collectors/amqp/test_collector_amqp.py index d146e7a1f..2fec2d7fd 100644 --- a/intelmq/tests/bots/collectors/amqp/test_collector_amqp.py +++ b/intelmq/tests/bots/collectors/amqp/test_collector_amqp.py @@ -1,18 +1,13 @@ # -*- coding: utf-8 -*- import json -import os import unittest -import intelmq.lib.message as message import intelmq.lib.test as test import intelmq.lib.utils as utils from intelmq.bots.collectors.amqp.collector_amqp import AMQPCollectorBot from intelmq.tests.bots.outputs.redis.test_output import EXAMPLE_EVENT from intelmq.tests.bots.outputs.amqptopic.test_output import TestAMQPTopicOutputBot -if os.environ.get('INTELMQ_TEST_EXOTIC'): - import pika - BODY_PLAIN = b'foobar This is a test' REPORT_PLAIN = {'__type': 'Report', diff --git a/intelmq/tests/bots/collectors/github_api/__init__.py b/intelmq/tests/bots/collectors/github_api/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/intelmq/tests/bots/collectors/github_api/example_github_repo_contents_response.json b/intelmq/tests/bots/collectors/github_api/example_github_repo_contents_response.json new file mode 100644 index 000000000..8e44a62bd --- /dev/null +++ b/intelmq/tests/bots/collectors/github_api/example_github_repo_contents_response.json @@ -0,0 +1,34 @@ +[ + { + "name": "test_file_1.md", + "path": "test_file_1.md", + "sha": "aaa", + "size": 50, + "url": "some_url", + "html_url": "some_html_url", + "git_url": "some_git_url", + "download_url": "https://a_download.url/test_file_1.md", + "type": "file", + "_links": { + "self": "I_url", + "git": "some_git_url", + "html": "some_html_url" + } + }, + { + "name": "contents.txt", + "path": "contents.txt", + "sha": "bbb", + "size": 100, + "url": "some_url", + "html_url": "some_html_url", + "git_url": "some_git_url", + "download_url": "https://a_download.url/contents.txt", + "type": "file", + "_links": { + "self": "I_url", + "git": "some_git_url", + "html": "some_html_url" + } + } +] \ No newline at end of file diff --git a/intelmq/tests/bots/collectors/github_api/test_collector.py b/intelmq/tests/bots/collectors/github_api/test_collector.py new file mode 100644 index 000000000..f804ff3c8 --- /dev/null +++ b/intelmq/tests/bots/collectors/github_api/test_collector.py @@ -0,0 +1,172 @@ +# -*- coding: utf-8 -*- +""" +Testing Github API Collectors +""" +import json +import os +from unittest import TestCase, main as unittest_main +from unittest.mock import MagicMock, patch + +import intelmq.lib.exceptions as exceptions +import intelmq.lib.test as test +import intelmq.lib.utils as utils +from intelmq.bots.collectors.github_api import collector_github_contents_api + +with open(os.path.join(os.path.dirname(__file__), 'example_github_repo_contents_response.json')) as handle: + RAW_CONTENTS = handle.read() + JSON_CONTENTS = json.loads(RAW_CONTENTS) + +EXAMPLE_CONTENT_JSON = [ + { + "Description": "md5", + "Identifier": "iubegr73b497fb398br9v3br98ufh3r" + }, + { + "Description": "", + "Identifier": "iubegr73b497iubegr73b497fb398br9v3br98ufh3rfb398br9v3br98ufh3r" + } +] +EXAMPLE_CONTENT_STR = str(EXAMPLE_CONTENT_JSON) + +SHOULD_PASS_WITH_TXT_FILES_AND_EXTRA_FIELD_SIZE_TEST = { + 'CONFIG': { + 'name': 'Github feed', + 'basic_auth_username': 'dummy_user', + 'basic_auth_password': 'dummy_password', + 'repository': 'author/repository', + 'extra_fields': 'size, sha', + 'regex': '.*.txt' + }, + 'EXPECTED_REPORTS': [ + { + "__type": "Report", + "feed.name": "Github feed", + "feed.accuracy": 100., + "feed.url": JSON_CONTENTS[1]['download_url'], + "raw": utils.base64_encode(EXAMPLE_CONTENT_STR), + "extra.file_metadata": { + "sha": JSON_CONTENTS[1]['sha'], + "size": JSON_CONTENTS[1]['size'] + } + } + ] +} + +SHOULD_FAIL_BECAUSE_REPOSITORY_IS_NOT_VALID_CONFIG = { + 'CONFIG': { + 'name': 'Github feed', + 'basic_auth_username': 'dummy_user', + 'basic_auth_password': 'dummy_password', + 'repository': 'author/', + 'extra_fields': 'size', + 'regex': '.*.txt' + } +} + +SHOULD_FAIL_WITH_BAD_CREDENTIALS = { + 'CONFIG': { + 'name': 'Github feed', + 'basic_auth_username': 'dummy_user', + 'basic_auth_password': 'bad_dummy_password', + 'repository': 'author/repo', + 'regex': '.*.txt' + } +} + + +def print_requests_get_parameters(url, *args, **kwargs): + if 'headers' in kwargs and kwargs['headers']['Accept'] == 'application/vnd.github.v3.text-match+json': + """ + mocking of Github API requests + """ + main_mock = MagicMock() + main_mock.return_value.json = MagicMock() + main_mock.return_value = RAW_CONTENTS + main_mock.json.return_value = JSON_CONTENTS + return main_mock + else: + """ + mocking of basic GET request + """ + main_mock = MagicMock(content=EXAMPLE_CONTENT_STR) + return main_mock + + +class TestGithubContentsAPICollectorBot(test.BotTestCase, TestCase): + """ + A TestCase for GithubContentsAPICollectorBot. + """ + + @classmethod + def set_bot(cls): + cls.bot_reference = collector_github_contents_api.GithubContentsAPICollectorBot + + @patch('intelmq.bots.collectors.github_api.collector_github_contents_api.requests.get') + def test_message_queue_should_contain_the_right_fields(self, requests_get_mock): + requests_get_mock.side_effect = print_requests_get_parameters + + self.run_bot(parameters=SHOULD_PASS_WITH_TXT_FILES_AND_EXTRA_FIELD_SIZE_TEST['CONFIG'], prepare=True) + + self.assertOutputQueueLen(len(SHOULD_PASS_WITH_TXT_FILES_AND_EXTRA_FIELD_SIZE_TEST['EXPECTED_REPORTS'])) + for i in range(len(self.get_output_queue())): + self.assertMessageEqual(i, SHOULD_PASS_WITH_TXT_FILES_AND_EXTRA_FIELD_SIZE_TEST['EXPECTED_REPORTS'][i]) + + @patch('intelmq.bots.collectors.github_api.collector_github_contents_api.requests.get') + def test_collector_should_fail_with_bad_repository_error(self, requests_get_mock): + import requests + requests_get_mock.side_effect = requests.RequestException() + + self.allowed_error_count = 1 # allow only single and final Error to be raised + self.run_bot(parameters=SHOULD_FAIL_BECAUSE_REPOSITORY_IS_NOT_VALID_CONFIG['CONFIG'], prepare=True) + self.assertRegexpMatchesLog(pattern=".*Unknown repository.*") # assert the expected ValueError msg + + @patch('intelmq.bots.collectors.github_api.collector_github_contents_api.requests.get') + def test_collector_should_fail_with_bad_credentials(self, requests_get_mock): + requests_get_mock.return_value.json = MagicMock(return_value={'message': 'Bad Credentials'}) + requests_get_mock.return_value.configure_mock(status_code=401) + + self.allowed_error_count = 1 + self.run_bot(parameters=SHOULD_FAIL_WITH_BAD_CREDENTIALS['CONFIG'], prepare=True) + self.assertRegexpMatchesLog(pattern=".*Bad Credentials.*") + + @patch('intelmq.bots.collectors.github_api.collector_github_contents_api.requests.get') + def test_adding_extra_fields_should_warn(self, requests_get_mock): + requests_get_mock.side_effect = print_requests_get_parameters + + custom_config = SHOULD_PASS_WITH_TXT_FILES_AND_EXTRA_FIELD_SIZE_TEST['CONFIG'].copy() + custom_config['extra_fields'] = 'aaa,bbb' + + self.allowed_warning_count = 2 + self.run_bot(parameters=custom_config, prepare=True) + + self.assertRegexpMatchesLog(pattern=".*Field 'aaa' does not exist in the Github file data.*") + self.assertRegexpMatchesLog(pattern=".*Field 'bbb' does not exist in the Github file data.*") + self.assertMessageEqual(0, { + "__type": "Report", + "feed.name": "Github feed", + "feed.accuracy": 100., + "feed.url": JSON_CONTENTS[1]['download_url'], + "raw": utils.base64_encode(EXAMPLE_CONTENT_STR) + }) + + def test_collector_init_should_fail_with_invalid_argument(self): + custom_config = SHOULD_PASS_WITH_TXT_FILES_AND_EXTRA_FIELD_SIZE_TEST['CONFIG'].copy() + + config_with_wrong_regex = custom_config.copy() + with self.assertRaises(exceptions.InvalidArgument): + config_with_wrong_regex['regex'] = '*.txt' + self.run_bot(parameters=config_with_wrong_regex, prepare=True) + + config_with_missing_regex = custom_config.copy() + with self.assertRaises(exceptions.InvalidArgument): + del config_with_missing_regex['regex'] + self.run_bot(parameters=config_with_missing_regex, prepare=True) + + config_with_missing_repository = custom_config.copy() + with self.assertRaises(exceptions.InvalidArgument): + del config_with_missing_repository['repository'] + self.run_bot(parameters=config_with_missing_repository, prepare=True) + + +if __name__ == '__main__': # pragma: no cover + unittest_main() diff --git a/intelmq/tests/bots/collectors/mail/fake_attachment.eml b/intelmq/tests/bots/collectors/mail/fake_attachment.eml new file mode 100644 index 000000000..a1a8117f5 --- /dev/null +++ b/intelmq/tests/bots/collectors/mail/fake_attachment.eml @@ -0,0 +1,42 @@ +List-Post: +X-Spam-Status: No, score=-24.3 required=4 tests=[BAYES_00=-1.9, DCC_CHECK=1.1, DKIMWL_WL_HIGH=0.001, DKIM_SIGNED=0.1, DKIM_VALID=-0.1, DKIM_VALID_AU=-0.1, DKIM_VALID_EF=-0.1, MAILING_LIST_MULTI=-1, RCVD_IN_DNSWL_MED=-2.3, RCVD_IN_MSPIKE_H2=-0.001, SHADOW1=-20] autolearn=ham autolearn_force=no +Errors-To: austria-bounces@mail.shadowserver.org +List-Help: +X-Spam-Flag: NO +X-Mailman-Version: 2.1.12 +Content-Type: multipart/mixed; boundary="=-=U9qbhb6ctjJtRANyLAes3HswHs8H9QZAnQoA=-=" +Message-ID: <20200515063210.3Zjc-9Qor%autoreports@shadowserver.org> +X-Virus-Scanned: amavisd-new at cert.at +X-RT-Incoming-Encryption: Not encrypted +X-Spam-Score: -24.3 +Received: from cleophus.intern.cert.at (cleophus.intern.cert.at [172.21.47.118]) by buhmann.intern.cert.at (Postfix) with ESMTP id 3D2C220280 for ; Fri, 15 May 2020 08:32:32 +0200 (CEST) +Received: from trinli.cert.at (trinli.cert.at [83.136.38.186]) by cleophus.intern.cert.at (Postfix) with ESMTPS id 2B463220B70 for ; Fri, 15 May 2020 08:32:32 +0200 (CEST) +Received: from localhost (localhost [127.0.0.1]) by trinli.cert.at (Postfix) with ESMTP id 0871EABCE4 for ; Fri, 15 May 2020 08:32:32 +0200 (CEST) +Received: from trinli.cert.at ([127.0.0.1]) by localhost (trinli.cert.at [127.0.0.1]) (amavisd-new, port 10024) with ESMTP id 42AGlVHWYUj1 for ; Fri, 15 May 2020 08:32:28 +0200 (CEST) +Received: from mail.shadowserver.org (mail.shadowserver.org [65.49.51.53]) by trinli.cert.at (Postfix) with ESMTP for ; Fri, 15 May 2020 08:32:12 +0200 (CEST) +Received: from [127.0.0.1] (ssmail-00 [127.0.0.1]) by mail.shadowserver.org (Postfix) with ESMTP id 0244B26C77CDF9 for ; Thu, 14 May 2020 23:32:12 -0700 (PDT) +Received: by report-02.shadowserver.org (Postfix, from userid 0) id 902F1106A0B4B; Fri, 15 May 2020 06:32:10 +0000 (UTC) +Delivered-To: reports@rtir.cert.at +Delivered-To: austria@shadowserver.org +Subject: [Austria] Shadowserver Austria Open MQTT Report: 2020-05-14 +User-Agent: s-nail v14.8.6 +Return-Path: +X-Original-To: reports@rtir.cert.at +X-Original-To: austria@shadowserver.org +Dkim-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=shadowserver.org; s=mail; t=1589524332; bh=yPOndIndHnKoIgocwHI1FZjupJFBKjGItWcdPxjrKLY=; h=Date:From:To:Subject:List-Id:List-Unsubscribe:List-Post:List-Help: List-Subscribe:From; b=aOtH9D5tmu/oQPT8hSUH2het0dQgkQH1kwHZ/AowQpndGQoQ+D19qoLix0zNQ0XHe oZ5reJTC0skmax3CyXlLIhMy8NQerEyOfzVUDJIUUDrCJsaNirM5swLGrTQrpGXkUM aKK1A3lsjPx9CJvEyBP0CEmtPZnMpXSp/ULqu3FA= +List-Subscribe: , +Sender: austria-bounces@mail.shadowserver.org +Date: Fri, 15 May 2020 06:32:10 +0000 +X-Spam-Level: +Precedence: list +X-Beenthere: austria@mail.shadowserver.org +List-ID: +To: austria@shadowserver.org +List-Unsubscribe: , +From: autoreports@shadowserver.org +content-type: text/plain; charset="utf-8" +Content-Disposition: inline +X-RT-Original-Encoding: US-ASCII +content-transfer-encoding: 8bit + + diff --git a/intelmq/tests/bots/collectors/mail/lib.py b/intelmq/tests/bots/collectors/mail/lib.py index 54fe31dae..a93a494a7 100644 --- a/intelmq/tests/bots/collectors/mail/lib.py +++ b/intelmq/tests/bots/collectors/mail/lib.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Tue Sep 10 17:10:54 2019 @@ -6,6 +5,7 @@ @author: sebastian """ import os +from copy import deepcopy if os.getenv('INTELMQ_TEST_EXOTIC'): from imbox.parser import parse_email @@ -13,6 +13,8 @@ EMAIL_ZIP_FOOBAR = parse_email(handle.read()) with open(os.path.join(os.path.dirname(__file__), 'foobartxt.eml')) as handle: EMAIL_TXT_FOOBAR = parse_email(handle.read()) + with open(os.path.join(os.path.dirname(__file__), 'fake_attachment.eml')) as handle: + EMAIL_FAKE_ATTACHMENT = parse_email(handle.read()) class MockedImbox(): @@ -32,9 +34,15 @@ def logout(self): class MockedZipImbox(MockedImbox): def messages(self, *args, **kwargs): - yield 0, EMAIL_ZIP_FOOBAR + # without deepcopy only the first read() in the attachment works + yield 0, deepcopy(EMAIL_ZIP_FOOBAR) class MockedTxtImbox(MockedImbox): def messages(self, *args, **kwargs): - yield 0, EMAIL_TXT_FOOBAR + yield 0, deepcopy(EMAIL_TXT_FOOBAR) + + +class MockedBadAttachmentImbox(MockedImbox): + def messages(self, *args, **kwargs): + yield 0, deepcopy(EMAIL_FAKE_ATTACHMENT) diff --git a/intelmq/tests/bots/collectors/mail/test_collector_attach.py b/intelmq/tests/bots/collectors/mail/test_collector_attach.py index 399167c2c..24f887780 100644 --- a/intelmq/tests/bots/collectors/mail/test_collector_attach.py +++ b/intelmq/tests/bots/collectors/mail/test_collector_attach.py @@ -9,8 +9,9 @@ import intelmq.lib.test as test from intelmq.bots.collectors.mail.collector_mail_attach import MailAttachCollectorBot +from intelmq.lib.utils import base64_encode if os.getenv('INTELMQ_TEST_EXOTIC'): - from .lib import MockedZipImbox + from .lib import MockedZipImbox, MockedBadAttachmentImbox REPORT_FOOBARZIP = { '__type': 'Report', @@ -19,7 +20,8 @@ 'extra.email_subject': 'foobar zip', 'feed.accuracy': 100.0, 'feed.name': 'IMAP Feed', - 'raw': 'UEsDBAoAAAAAAG93AU+n9EgFCQAAAAkAAAAGABwAZm9vYmFyVVQJAAMx4kJdMeJCXXV4CwABBOgDAAAEZAAAAGJhciB0ZXh0ClBLAQIeAwoAAAAAAG93AU+n9EgFCQAAAAkAAAAGABgAAAAAAAEAAACkgQAAAABmb29iYXJVVAUAAzHiQl11eAsAAQToAwAABGQAAABQSwUGAAAAAAEAAQBMAAAASQAAAAAA', + 'raw': base64_encode('bar text\n'), + 'extra.file_name': 'foobar', } @@ -41,11 +43,25 @@ def set_bot(cls): 'name': 'IMAP Feed', } - def test_one(self): + def test_extract_files(self): with mock.patch('imbox.Imbox', new=MockedZipImbox): - self.run_bot() + self.run_bot(parameters={'extract_files': True}) + self.assertMessageEqual(0, REPORT_FOOBARZIP) + + def test_attach_unzip(self): + self.allowed_warning_count = 1 + with mock.patch('imbox.Imbox', new=MockedZipImbox): + self.run_bot(parameters={'attach_unzip': True}) self.assertMessageEqual(0, REPORT_FOOBARZIP) + def test_attach_no_filename(self): + """ + https://github.com/certtools/intelmq/issues/1538 + """ + with mock.patch('imbox.Imbox', new=MockedBadAttachmentImbox): + self.run_bot() + self.assertOutputQueueLen(0) + if __name__ == '__main__': # pragma: no cover unittest.main() diff --git a/intelmq/tests/bots/collectors/misp/test_collector_attach.py b/intelmq/tests/bots/collectors/misp/test_collector.py similarity index 100% rename from intelmq/tests/bots/collectors/misp/test_collector_attach.py rename to intelmq/tests/bots/collectors/misp/test_collector.py diff --git a/intelmq/tests/bots/collectors/rsync/test_collector.py b/intelmq/tests/bots/collectors/rsync/test_collector.py index d57584834..267b9a3b6 100644 --- a/intelmq/tests/bots/collectors/rsync/test_collector.py +++ b/intelmq/tests/bots/collectors/rsync/test_collector.py @@ -33,6 +33,11 @@ def test_events(self): self.run_bot(iterations=1) self.assertMessageEqual(0, OUTPUT) + def test_fail(self): + self.allowed_error_count = 1 + self.run_bot(iterations=1, parameters={"rsync_path": "/foobar"}) + self.assertLogMatches('.*failed with exitcode.*') + @classmethod def tearDownClass(cls): cls.base_dir.cleanup() diff --git a/intelmq/tests/bots/collectors/tcp/test_collector.py b/intelmq/tests/bots/collectors/tcp/test_collector.py index 5e1f12968..bb35eaef7 100644 --- a/intelmq/tests/bots/collectors/tcp/test_collector.py +++ b/intelmq/tests/bots/collectors/tcp/test_collector.py @@ -3,7 +3,6 @@ """ import socket import struct -import unittest.mock as mock import sys import threading import unittest @@ -80,11 +79,6 @@ def _delayed_start(self): self.run_bot(iterations=len(self.input_message)) -major, minor, micro, *_ = sys.version_info - - -@unittest.skipIf((major, minor) == (3, 4) and micro < 8, "Travis CI failed with Python3.4.6. " - "However, the developer managed to successfully test it on 3.4.8.") class TestTCPCollectorBot(test.BotTestCase, unittest.TestCase): """ A TestCase for TCPCollectorBot. @@ -103,7 +97,6 @@ def test_random_input(self): """ Check how we handle a random input, coming from an unknown source. We should put all the data to report['raw']. """ thread = threading.Thread(target=Client().random_client) thread.start() - self.input_message = None self.run_bot() self.assertOutputQueueLen(2) generated_report = MessageFactory.unserialize(self.get_output_queue()[1], harmonization=self.harmonization, @@ -172,21 +165,8 @@ def test_multiple_bots(self): thread = threading.Thread(target=Client().random_client) thread.start() - self.input_message = None - - # can't use standard .bot_run(iteration) or .start() because shutdown() would be called - # and we need to handle multiple connections - self.prepare_bot() - self.bot._Bot__source_pipeline = self.pipe - self.bot._Bot__destination_pipeline = self.pipe - for _ in range(client_count + 1): - # every single calling of process() method will serve to a single connection - with mock.patch('intelmq.lib.utils.load_configuration', - new=self.mocked_config): - with mock.patch('intelmq.lib.utils.log', self.mocked_log): - self.bot.process() - self.bot.stop() # let's call shutdown() and free up bound address + self.run_bot(iterations=client_count + 1) self.assertOutputQueueLen(client_count * msg_count + 2) diff --git a/intelmq/tests/bots/experts/abusix/test_expert.py b/intelmq/tests/bots/experts/abusix/test_expert.py index d63b170aa..2bec92936 100644 --- a/intelmq/tests/bots/experts/abusix/test_expert.py +++ b/intelmq/tests/bots/experts/abusix/test_expert.py @@ -1,5 +1,4 @@ # -*- coding: utf-8 -*- -import os import unittest import intelmq.lib.test as test diff --git a/intelmq/tests/bots/experts/asn_lookup/test_expert.py b/intelmq/tests/bots/experts/asn_lookup/test_expert.py index 9f3c5f2ca..234c162e3 100644 --- a/intelmq/tests/bots/experts/asn_lookup/test_expert.py +++ b/intelmq/tests/bots/experts/asn_lookup/test_expert.py @@ -1,9 +1,6 @@ # -*- coding: utf-8 -*- """ -Testing asn_lookup. - -see asn_lookup README for how to download database -It is expected at /opt/intelmq/var/lib/bots/asn_lookup/ipasn.dat by default +Testing asn_lookup with a faked local database """ import unittest diff --git a/intelmq/tests/bots/experts/csv_converter/__init__.py b/intelmq/tests/bots/experts/csv_converter/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/intelmq/tests/bots/experts/csv_converter/test_expert.py b/intelmq/tests/bots/experts/csv_converter/test_expert.py new file mode 100644 index 000000000..61aa5c291 --- /dev/null +++ b/intelmq/tests/bots/experts/csv_converter/test_expert.py @@ -0,0 +1,40 @@ +# -*- coding: utf-8 -*- +import unittest + +import intelmq.lib.test as test +from intelmq.bots.experts.csv_converter.expert import BOT + +EXAMPLE_INPUT = {"__type": "Event", + "source.ip": "93.184.216.34", + "destination.ip": "192.0.43.8", + "time.observation": "2015-01-01T00:00:00+00:00", + } +EXAMPLE_OUTPUT = {"__type": "Event", + "source.ip": "93.184.216.34", + "destination.ip": "192.0.43.8", + "time.observation": "2015-01-01T00:00:00+00:00", + "output": '"2015-01-01T00:00:00+00:00,93.184.216.34"', + } +DELIMITER_OUT = EXAMPLE_OUTPUT.copy() +DELIMITER_OUT['output'] = EXAMPLE_OUTPUT['output'].replace(',', ';') + + +class TestCSVConverterExpertBot(test.BotTestCase, unittest.TestCase): + @classmethod + def set_bot(cls): + cls.bot_reference = BOT + cls.sysconfig = {'fieldnames': 'time.observation,source.ip'} + + def test_default(self): + self.input_message = EXAMPLE_INPUT + self.run_bot() + self.assertMessageEqual(0, EXAMPLE_OUTPUT) + + def test_delimiter(self): + self.input_message = EXAMPLE_INPUT + self.run_bot(parameters={'delimiter': ';'}) + self.assertMessageEqual(0, DELIMITER_OUT) + + +if __name__ == '__main__': # pragma: no cover + unittest.main() diff --git a/intelmq/tests/bots/experts/cymru_whois/test_expert.py b/intelmq/tests/bots/experts/cymru_whois/test_expert.py index 645e13ff7..a3b0420cf 100644 --- a/intelmq/tests/bots/experts/cymru_whois/test_expert.py +++ b/intelmq/tests/bots/experts/cymru_whois/test_expert.py @@ -1,5 +1,4 @@ # -*- coding: utf-8 -*- -import os import unittest import intelmq.lib.test as test @@ -16,7 +15,7 @@ "source.network": "93.184.216.0/24", "source.allocated": "2008-06-02T00:00:00+00:00", "source.asn": 15133, - "source.as_name": "EDGECAST - MCI Communications Services, Inc. d/b/a Verizon Business, US", + "source.as_name": "EDGECAST, US", "time.observation": "2015-01-01T00:00:00+00:00", } EXAMPLE_INPUT6 = {"__type": "Event", @@ -27,7 +26,7 @@ "destination.ip": "2001:500:88:200::8", # iana.org "destination.registry": "ARIN", "destination.allocated": "2010-02-18T00:00:00+00:00", - "destination.as_name": "ICANN-DC - ICANN, US", + "destination.as_name": "ICANN-DC, US", "destination.geolocation.cc": "US", "time.observation": "2015-01-01T00:00:00+00:00", "destination.asn": 16876, @@ -51,19 +50,6 @@ "source.ip": "198.105.125.77", # no result "time.observation": "2015-01-01T00:00:00+00:00", } -NO_ASN_INPUT = {"__type": "Event", - "source.ip": "212.92.127.126", - "time.observation": "2015-01-01T00:00:00+00:00", - } -NO_ASN_OUTPUT = {"__type": "Event", - "source.ip": "212.92.127.126", - "time.observation": "2015-01-01T00:00:00+00:00", - "source.asn": 23456, - "source.geolocation.cc": 'RU', - "source.ip": '212.92.127.126', - "source.network": '212.92.127.0/24', - "source.registry": 'RIPE', - } EXAMPLE_6TO4_INPUT = {"__type": "Event", "source.ip": "2002:3ee0:3972:0001::1", "time.observation": "2015-01-01T00:00:00+00:00", @@ -75,6 +61,13 @@ "source.as_name": "SURFNET-NL SURFnet, The Netherlands, NL", "time.observation": "2015-01-01T00:00:00+00:00", } +EXAMPLE_6TO4_OUTPUT_1 = {"__type": "Event", + "source.ip": "2002:3ee0:3972:0001::1", + "source.network": "2002::/16", + "source.asn": 6939, + "source.as_name": "HURRICANE, US", + "time.observation": "2015-01-01T00:00:00+00:00", + } OVERWRITE_OUT = {"__type": "Event", "source.ip": "93.184.216.34", "source.geolocation.cc": "AA", @@ -82,7 +75,7 @@ "source.network": "93.184.216.0/24", "source.allocated": "2008-06-02T00:00:00+00:00", "source.asn": 15133, - "source.as_name": "EDGECAST - MCI Communications Services, Inc. d/b/a Verizon Business, US", + "source.as_name": "EDGECAST, US", "time.observation": "2015-01-01T00:00:00+00:00", } @@ -122,9 +115,16 @@ def test_empty_result(self): self.assertMessageEqual(0, EMPTY_INPUT) def test_6to4_result(self): + """ + Test the whois for an IPv6 to IPv4 network range. + The result can vary, so we test for two possible expected results. + """ self.input_message = EXAMPLE_6TO4_INPUT self.run_bot() - self.assertMessageEqual(0, EXAMPLE_6TO4_OUTPUT) + try: + self.assertMessageEqual(0, EXAMPLE_6TO4_OUTPUT) + except AssertionError: + self.assertMessageEqual(0, EXAMPLE_6TO4_OUTPUT_1) def test_overwrite(self): self.input_message = EXAMPLE_INPUT.copy() @@ -133,17 +133,6 @@ def test_overwrite(self): self.run_bot(parameters={'overwrite' : False}) self.assertMessageEqual(0, OVERWRITE_OUT) - @unittest.expectedFailure - def test_missing_asn(self): - """ - No information for ASN. - - https://github.com/certtools/intelmq/issues/635 - """ - self.input_message = NO_ASN_INPUT - self.run_bot() - self.assertMessageEqual(0, NO_ASN_OUTPUT) - if __name__ == '__main__': # pragma: no cover unittest.main() diff --git a/intelmq/tests/bots/experts/filter/test_expert_relative_after.py b/intelmq/tests/bots/experts/filter/test_expert_relative_after.py index 9f2599d61..eeb9bbf04 100644 --- a/intelmq/tests/bots/experts/filter/test_expert_relative_after.py +++ b/intelmq/tests/bots/experts/filter/test_expert_relative_after.py @@ -28,5 +28,16 @@ def test_Relative_After(self): self.run_bot() self.assertMessageEqual(0, EXAMPLE_INPUT) + def test_bug_1523(self): + """ + > For relative filter, if param not_after: x hours (x < 24) is given, instead of being parsed as datetime.timedelta + > ... + > Leading to error + > TypeError: can't compare offset-naive and offset-aware datetimes + https://github.com/certtools/intelmq/issues/1523 + """ + self.run_bot(parameters={'not_after': '10 hours'}) + self.assertMessageEqual(0, EXAMPLE_INPUT) + if __name__ == '__main__': # pragma: no cover unittest.main() diff --git a/intelmq/tests/bots/experts/filter/test_paths.py b/intelmq/tests/bots/experts/filter/test_paths.py index 33ad9a2f3..b87615f32 100644 --- a/intelmq/tests/bots/experts/filter/test_paths.py +++ b/intelmq/tests/bots/experts/filter/test_paths.py @@ -29,31 +29,45 @@ def set_bot(cls): 'filter_action': 'drop'} def test_extra_filter_drop(self): - self.prepare_bot(destination_queues=QUEUES) + self.prepare_bot(destination_queues=QUEUES, parameters={'logging_level': 'DEBUG'}) self.run_bot(prepare=False) self.assertOutputQueueLen(0, path="_default") + self.assertLogMatches(".*Sending message to path 'filter_match'.", + levelname='DEBUG') self.assertMessageEqual(0, EXAMPLE_INPUT, path="filter_match") self.assertOutputQueueLen(0, path="filter_no_match") + self.assertLogMatches(".*Sending message to path 'action_other'.", + levelname='DEBUG') self.assertMessageEqual(0, EXAMPLE_INPUT, path="action_other") def test_extra_filter_keep(self): - self.sysconfig = {'filter_key': 'extra.test2', - 'filter_value': 'bla', - 'filter_action': 'keep'} - self.prepare_bot(destination_queues=QUEUES) + self.prepare_bot(destination_queues=QUEUES, + parameters={'filter_key': 'extra.test2', + 'filter_value': 'bla', + 'filter_action': 'keep', + 'logging_level': 'DEBUG'}) self.run_bot(prepare=False) + self.assertLogMatches(".*Sending message to path '_default'.", + levelname='DEBUG') + self.assertLogMatches(".*Sending message to path 'filter_match'.", + levelname='DEBUG') self.assertMessageEqual(0, EXAMPLE_INPUT) self.assertMessageEqual(0, EXAMPLE_INPUT, path="filter_match") self.assertOutputQueueLen(0, path="filter_no_match") self.assertOutputQueueLen(0, path="action_other") def test_filter_no_match_keep(self): - self.sysconfig = {'filter_key': 'extra.test2', - 'filter_value': 'foo', - 'filter_action': 'keep'} - self.prepare_bot(destination_queues=QUEUES) + self.prepare_bot(destination_queues=QUEUES, + parameters={'filter_key': 'extra.test2', + 'filter_value': 'foo', + 'filter_action': 'keep', + 'logging_level': 'DEBUG'}) self.run_bot(prepare=False) + self.assertLogMatches(".*Sending message to path 'action_other'.", + levelname='DEBUG') self.assertMessageEqual(0, EXAMPLE_INPUT, path="action_other") + self.assertLogMatches(".*Sending message to path 'filter_no_match'.", + levelname='DEBUG') self.assertMessageEqual(0, EXAMPLE_INPUT, path="filter_no_match") self.assertOutputQueueLen(0, path="filter_match") self.assertOutputQueueLen(0) diff --git a/intelmq/tests/bots/experts/misp/__init__.py b/intelmq/tests/bots/experts/misp/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/intelmq/tests/bots/experts/misp/test_expert.py b/intelmq/tests/bots/experts/misp/test_expert.py new file mode 100644 index 000000000..344fb4cfd --- /dev/null +++ b/intelmq/tests/bots/experts/misp/test_expert.py @@ -0,0 +1,10 @@ +# -*- coding: utf-8 -*- +from intelmq.bots.experts.misp.expert import MISPExpertBot +from intelmq.lib.test import BotTestCase + +import unittest + +class TestMISPExpertBot(BotTestCase, unittest.TestCase): + @classmethod + def set_bot(cls): + cls.bot_reference = MISPExpertBot diff --git a/intelmq/tests/bots/experts/modify/test_expert.py b/intelmq/tests/bots/experts/modify/test_expert.py index 321cab33f..6e5e51849 100644 --- a/intelmq/tests/bots/experts/modify/test_expert.py +++ b/intelmq/tests/bots/experts/modify/test_expert.py @@ -19,18 +19,24 @@ "classification.type": "infected-system", "time.observation": "2015-01-01T00:00:00+00:00", } -INPUT = [{'feed.name': 'Abuse.ch', +INPUT = [ + # test_events: + {'feed.name': 'Abuse.ch', 'feed.url': 'https://feodotracker.abuse.ch/blocklist/?download=domainblocklist'}, {'malware.name': 'foobar', 'feed.name': 'Other Feed'}, {'source.port': 80, 'malware.name': 'zeus'}, {'malware.name': 'xcodeghost'}, {'malware.name': 'securityscorecard-someexample-value'}, - {'malware.name': 'anyvalue'}, # 5 + {'malware.name': 'anyvalue'}, + # rules 'Standard Protocols http' and 'Fraunhofer DGA' applied: + {'source.port': 80, 'feed.name': 'Fraunhofer DGA'}, + # test_overwrite: {}, + # test_types: {'source.tor_node': True}, {'source.tor_node': False}, {}, - {'feed.accuracy': 5.22}, # 10 + {'feed.accuracy': 5.22}, {'feed.accuracy': 100}, {'comment': 'integer value'}, ] @@ -41,11 +47,12 @@ {'classification.identifier': 'xcodeghost'}, {'classification.identifier': 'someexample-value'}, {'classification.identifier': 'anyvalue'}, # 5 + {'protocol.application': 'http', 'protocol.transport': 'tcp', 'classification.identifier': 'dga'}, {'classification.type': 'vulnerable service'}, {'event_description.text': 'This is a TOR node.'}, {'event_description.text': 'This is not a TOR node.'}, - {'event_description.text': 'We don\'t know if this is a TOR node.'}, - {'event_description.text': 'Accuracy is 10% or lower.'}, # 10 + {'event_description.text': 'We don\'t know if this is a TOR node.'}, # 10 + {'event_description.text': 'Accuracy is 10% or lower.'}, {'event_description.text': 'Accuracy is the highest.'}, {'extra.test': 1, 'event_description.text': 'We don\'t know if this is a TOR node.'}, ] @@ -74,11 +81,11 @@ def set_bot(cls): def test_events(self): """ Test if correct Events have been produced. """ - self.input_message = INPUT[:6] + self.input_message = INPUT[:7] self.allowed_warning_count = 1 - self.run_bot(iterations=6) + self.run_bot(iterations=7) - for position, event_out in enumerate(OUTPUT[:6]): + for position, event_out in enumerate(OUTPUT[:7]): self.assertMessageEqual(position, event_out) def test_conversion(self): @@ -100,10 +107,10 @@ def test_types(self): 'tests/bots/experts/modify/types.conf') parameters = {'configuration_path': config_path, 'overwrite': True} - self.input_message = INPUT[7:13] + self.input_message = INPUT[8:14] self.run_bot(parameters=parameters, - iterations=len(INPUT[7:13])) - for position, event_out in enumerate(OUTPUT[7:13]): + iterations=len(INPUT[8:14])) + for position, event_out in enumerate(OUTPUT[8:14]): self.assertMessageEqual(position, event_out) def test_overwrite(self): @@ -112,10 +119,10 @@ def test_overwrite(self): """ config_path = resource_filename('intelmq', 'tests/bots/experts/modify/overwrite.conf') - self.input_message = INPUT[6] + self.input_message = INPUT[7] self.allowed_warning_count = 1 self.run_bot(parameters={'configuration_path': config_path}) - self.assertMessageEqual(0, OUTPUT[6]) + self.assertMessageEqual(0, OUTPUT[7]) def test_overwrite_not(self): """ @@ -128,6 +135,22 @@ def test_overwrite_not(self): 'overwrite': False}) self.assertMessageEqual(0, EVENT_TEMPL) + def test_maximum_matches(self): + """test maximum_matches parameter """ + inp = EVENT_TEMPL.copy() + inp.update(INPUT[6]) + self.input_message = inp + self.run_bot(parameters={'overwrite': True, 'logging_level': 'DEBUG'}) + self.assertLogMatches('.*Apply rule Fraunhofer DGA\.$', 'DEBUG') + self.assertMessageEqual(0, OUTPUT[6]) + + self.input_message = inp + self.run_bot(parameters={'maximum_matches': 1, 'overwrite': True, 'logging_level': 'DEBUG'}) + self.assertLogMatches('Reached maximum number of matches, breaking\.$', 'DEBUG') + out = OUTPUT[6].copy() + del out['classification.identifier'] + self.assertMessageEqual(0, out) + if __name__ == '__main__': # pragma: no cover unittest.main() diff --git a/intelmq/tests/bots/experts/national_cert_contact_certat/test_expert.py b/intelmq/tests/bots/experts/national_cert_contact_certat/test_expert.py index fc28ff7fe..cd33f4fd5 100644 --- a/intelmq/tests/bots/experts/national_cert_contact_certat/test_expert.py +++ b/intelmq/tests/bots/experts/national_cert_contact_certat/test_expert.py @@ -32,6 +32,9 @@ "source.geolocation.cc": "US", "time.observation": "2015-01-01T00:00:00+00:00", } +MISSING_RESULT = {"__type": "Event", + "source.ip": "45.8.126.3", + } @test.skip_internet() @@ -58,6 +61,11 @@ def test_ipv6_lookup(self): self.run_bot() self.assertMessageEqual(0, EXAMPLE_OUTPUT6) + def test_missing_result(self): + self.input_message = MISSING_RESULT + self.run_bot() + self.assertMessageEqual(0, MISSING_RESULT) + if __name__ == '__main__': # pragma: no cover unittest.main() diff --git a/intelmq/tests/bots/experts/ripe/test_expert.py b/intelmq/tests/bots/experts/ripe/test_expert.py index d82f5d5fe..a225248ad 100644 --- a/intelmq/tests/bots/experts/ripe/test_expert.py +++ b/intelmq/tests/bots/experts/ripe/test_expert.py @@ -123,44 +123,54 @@ def test_empty_lookup(self): self.assertMessageEqual(0, EMPTY_INPUT) @test.skip_local_web() - def test_ripe_stat_errors(self): + def test_ripe_stat_error_json(self): """ Test RIPE stat for errors. """ - self.sysconfig = {'query_ripe_db_asn': False, - 'query_ripe_db_ip': False, - 'query_ripe_stat_asn': True, - 'query_ripe_stat_ip': True, - 'query_ripe_stat_geolocation': False, - } + parameters = {'query_ripe_db_asn': False, + 'query_ripe_db_ip': False, + 'query_ripe_stat_asn': True, + 'query_ripe_stat_ip': True, + 'query_ripe_stat_geolocation': False, + } self.input_message = EMPTY_INPUT - self.allowed_error_count = 1 - self.prepare_bot() + self.prepare_bot(parameters=parameters) old = self.bot.QUERY['stat'] self.bot.QUERY['stat'] = 'http://localhost/{}' - self.run_bot(prepare=False) + self.run_bot(prepare=False, allowed_error_count=1) # internal json in < and >= 3.5 and simplejson + self.bot.QUERY['stat'] = old self.assertLogMatches(pattern='.*(JSONDecodeError|ValueError|Expecting value|No JSON object could be decoded).*', levelname='ERROR') - self.bot.URL_STAT_CONTACT = 'http://localhost/{}' - self.run_bot(prepare=False) - self.bot.URL_STAT_CONTACT = old + @test.skip_local_web() + def test_ripe_stat_error_404(self): + """ Test RIPE stat for errors. """ + parameters = {'query_ripe_db_asn': False, + 'query_ripe_db_ip': False, + 'query_ripe_stat_asn': True, + 'query_ripe_stat_ip': True, + 'query_ripe_stat_geolocation': False, + } + self.input_message = EMPTY_INPUT + self.prepare_bot(parameters=parameters) + old = self.bot.QUERY['stat'] + self.bot.QUERY['stat'] = 'http://localhost/{}' + self.run_bot(prepare=False, allowed_error_count=1) + self.bot.QUERY['stat'] = old self.assertLogMatches(pattern='.*HTTP status code was 404.*', levelname='ERROR') - self.cache.flushdb() # collides with test_replace @test.skip_local_web() def test_ripe_db_as_errors(self): """ Test RIPE DB AS for errors. """ - self.sysconfig = {'query_ripe_db_asn': True, - 'query_ripe_db_ip': False, - 'query_ripe_stat_ip': False, - 'query_ripe_stat_asn': False, - 'query_ripe_stat_geolocation': False, - } self.input_message = EXAMPLE_INPUT self.allowed_error_count = 1 self.allowed_warning_count = 1 - self.prepare_bot() + self.prepare_bot(parameters={'query_ripe_db_asn': True, + 'query_ripe_db_ip': False, + 'query_ripe_stat_ip': False, + 'query_ripe_stat_asn': False, + 'query_ripe_stat_geolocation': False, + }) old = self.bot.QUERY['db_asn'] self.bot.QUERY['db_asn'] = 'http://localhost/{}' self.run_bot(prepare=False) @@ -171,16 +181,15 @@ def test_ripe_db_as_errors(self): @test.skip_local_web() def test_ripe_db_ip_errors(self): """ Test RIPE DB IP for errors. """ - self.sysconfig = {'query_ripe_db_asn': False, - 'query_ripe_db_ip': True, - 'query_ripe_stat_ip': False, - 'query_ripe_stat_asn': False, - 'query_ripe_stat_geolocation': False, - } self.input_message = EXAMPLE_INPUT self.allowed_error_count = 1 self.allowed_warning_count = 1 - self.prepare_bot() + self.prepare_bot(parameters={'query_ripe_db_asn': False, + 'query_ripe_db_ip': True, + 'query_ripe_stat_ip': False, + 'query_ripe_stat_asn': False, + 'query_ripe_stat_geolocation': False, + }) old = self.bot.QUERY['db_ip'] self.bot.QUERY['db_ip'] = 'http://localhost/{}' self.run_bot(prepare=False) @@ -189,60 +198,58 @@ def test_ripe_db_ip_errors(self): levelname='ERROR') def test_replace(self): - self.sysconfig = {'mode': 'replace', - 'query_ripe_db_asn': False, - 'query_ripe_db_ip': False, - 'query_ripe_stat_ip': True, - 'query_ripe_stat_asn': False, - 'query_ripe_stat_geolocation': False, - } self.input_message = EMPTY_INPUT - self.run_bot() + self.run_bot(parameters={'mode': 'replace', + 'query_ripe_db_asn': False, + 'query_ripe_db_ip': False, + 'query_ripe_stat_ip': True, + 'query_ripe_stat_asn': False, + 'query_ripe_stat_geolocation': False, + }) self.assertMessageEqual(0, EMPTY_REPLACED) self.assertEqual(self.cache.get('stat:127.0.0.1'), b'__no_contact') self.cache.flushdb() # collides with test_ripe_stat_errors def test_ripe_db_as_404(self): """ Server returns a 404 which should not be raised. """ - self.sysconfig = {'query_ripe_db_asn': True, - 'query_ripe_db_ip': False, - 'query_ripe_stat_ip': False, - 'query_ripe_stat_asn': False, - 'query_ripe_stat_geolocation': False, - } self.input_message = DB_404_AS - self.run_bot() + self.run_bot(parameters={'query_ripe_db_asn': True, + 'query_ripe_db_ip': False, + 'query_ripe_stat_ip': False, + 'query_ripe_stat_asn': False, + 'query_ripe_stat_geolocation': False, + }) self.assertMessageEqual(0, DB_404_AS) + @unittest.expectedFailure def test_geolocation(self): self.input_message = GEOLOCA_INPUT1 - self.sysconfig = {'query_ripe_db_asn': False, - 'query_ripe_db_ip': False, - 'query_ripe_stat_ip': False, - 'query_ripe_stat_asn': True, - } - self.run_bot() + self.run_bot(parameters={'query_ripe_db_asn': False, + 'query_ripe_db_ip': False, + 'query_ripe_stat_ip': False, + 'query_ripe_stat_asn': True, + }) self.assertMessageEqual(0, GEOLOCA_OUTPUT1) + @unittest.expectedFailure def test_geolocation_overwrite(self): self.input_message = GEOLOCA_INPUT2 - self.sysconfig = {'mode': 'replace', - 'query_ripe_db_asn': False, - 'query_ripe_db_ip': False, - 'query_ripe_stat_ip': False, - 'query_ripe_stat_asn': True, - } - self.run_bot() + self.run_bot(parameters={'mode': 'replace', + 'query_ripe_db_asn': False, + 'query_ripe_db_ip': False, + 'query_ripe_stat_ip': False, + 'query_ripe_stat_asn': True, + }) self.assertMessageEqual(0, GEOLOCA_OUTPUT1) + @unittest.expectedFailure def test_geolocation_not_overwrite(self): self.input_message = GEOLOCA_INPUT2 - self.sysconfig = {'query_ripe_db_asn': False, - 'query_ripe_db_ip': False, - 'query_ripe_stat_ip': False, - 'query_ripe_stat_asn': True, - } - self.run_bot() + self.run_bot(parameters={'query_ripe_db_asn': False, + 'query_ripe_db_ip': False, + 'query_ripe_stat_ip': False, + 'query_ripe_stat_asn': True, + }) self.assertMessageEqual(0, GEOLOCA_OUTPUT3) def test_index_error(self): @@ -256,13 +263,12 @@ def test_country_question_mark(self): https://stat.ripe.net/data/maxmind-geo-lite/data.json?resource=35.197.157.0 """ self.input_message = QUESTION_MARK - self.sysconfig = {'query_ripe_db_asn': False, - 'query_ripe_db_ip': False, - 'query_ripe_stat_asn': False, - 'query_ripe_stat_ip': False, - 'query_ripe_stat_geolocation': True, - } - self.run_bot() + self.run_bot(parameters={'query_ripe_db_asn': False, + 'query_ripe_db_ip': False, + 'query_ripe_stat_asn': False, + 'query_ripe_stat_ip': False, + 'query_ripe_stat_geolocation': True, + }) self.assertMessageEqual(0, QUESTION_MARK_OUTPUT) diff --git a/intelmq/tests/bots/outputs/elasticsearch/test_output.py b/intelmq/tests/bots/outputs/elasticsearch/test_output.py index 005298192..835d3a8ae 100644 --- a/intelmq/tests/bots/outputs/elasticsearch/test_output.py +++ b/intelmq/tests/bots/outputs/elasticsearch/test_output.py @@ -11,26 +11,28 @@ if os.environ.get('INTELMQ_TEST_DATABASES'): import elasticsearch -INPUT1 = {"__type": "Event", - "classification.type": "infected-system", - "source.asn": 64496, - "source.ip": "192.0.2.1", - "feed.name": "Example Feed", - "extra": '{"foo.bar": "test"}' - } -OUTPUT1 = {'classification.type': 'infected-system', - 'extra.foo.bar': 'test', - 'feed.name': 'Example Feed', - 'source.asn': 64496, - 'source.ip': '192.0.2.1', - } +INPUT1 = { + "__type": "Event", + "classification.type": "infected-system", + "source.asn": 64496, + "source.ip": "192.0.2.1", + "feed.name": "Example Feed", + "extra": '{"foo.bar": "test"}' +} +OUTPUT1 = { + 'classification.type': 'infected-system', + 'extra.foo.bar': 'test', + 'feed.name': 'Example Feed', + 'source.asn': 64496, + 'source.ip': '192.0.2.1', +} OUTPUT1_REPLACEMENT_CHARS = { 'classification_type': 'infected-system', 'extra_foo_bar': 'test', 'feed_name': 'Example Feed', 'source_asn': 64496, 'source_ip': '192.0.2.1', - } +} ES_SEARCH = { "query": { "constant_score": { @@ -56,26 +58,24 @@ SAMPLE_TEMPLATE = { "mappings": { - "events": { - "properties": { - "time.observation": { - "type": "date" - }, - "time.source": { - "type": "date" - }, - "classification.type": { - "type": "keyword" - }, - "source.asn": { - "type": "integer" - }, - "feed.name": { - "type": "text" - }, - "source.ip": { - "type": "ip" - } + "properties": { + "time.observation": { + "type": "date" + }, + "time.source": { + "type": "date" + }, + "classification.type": { + "type": "keyword" + }, + "source.asn": { + "type": "integer" + }, + "feed.name": { + "type": "text" + }, + "source.ip": { + "type": "ip" } } }, @@ -132,19 +132,11 @@ def test_event(self): self.run_bot() time.sleep(1) # ES needs some time between inserting and searching result = self.con.search(index='intelmq', body=ES_SEARCH)['hits']['hits'][0] - self.con.delete(index='intelmq', doc_type='events', id=result['_id']) + self.con.delete(index='intelmq', + # doc_type='events', + id=result['_id']) self.assertDictEqual(OUTPUT1, result['_source']) - def test_raise_when_no_template(self): - """ - Test that a bot raises a RuntimeError if 'rotate_index' is set, but a matching template doesn't exist in ES. - """ - self.sysconfig = {"flatten_fields": "extra", - "elastic_index": "intelmq", - "elastic_doctype": "events", - "rotate_index": "daily"} - self.assertRaises(RuntimeError, self.run_bot()) - def test_get_event_date(self): """ Test whether get_event_date detects the time.source and time.observation fields in an event. @@ -160,7 +152,6 @@ def test_replacement_characters(self): """ self.sysconfig = {"flatten_fields": "extra", "elastic_index": "intelmq", - "elastic_doctype": "events", "replacement_char": "_", "rotate_index": "never"} self.run_bot() @@ -169,7 +160,6 @@ def test_replacement_characters(self): body=ES_SEARCH_REPLACEMENT_CHARS)['hits']['hits'][0] self.con.delete(index=self.sysconfig.get('elastic_index'), - doc_type=self.sysconfig.get('elastic_doctype'), id=result['_id']) self.assertDictEqual(OUTPUT1_REPLACEMENT_CHARS, result['_source']) @@ -180,7 +170,6 @@ def test_index_detected_from_time_source(self): """ self.sysconfig = {"flatten_fields": "extra", "elastic_index": "intelmq", - "elastic_doctype": "events", "rotate_index": "daily"} expected_index_name = "{}-1869-12-02".format(self.sysconfig.get('elastic_index')) self.base_check_expected_index_created(INPUT_TIME_SOURCE, expected_index_name) @@ -192,7 +181,6 @@ def test_index_detected_from_time_observation(self): """ self.sysconfig = {"flatten_fields": "extra", "elastic_index": "intelmq", - "elastic_doctype": "events", "rotate_index": "daily"} expected_index_name = "{}-2020-02-02".format(self.sysconfig.get('elastic_index')) self.base_check_expected_index_created(INPUT_TIME_OBSERVATION, expected_index_name) @@ -206,13 +194,13 @@ def test_index_falls_back_to_default_date(self): self.sysconfig = {"flatten_fields": "extra", "elastic_index": "intelmq", - "elastic_doctype": "events", "rotate_index": "daily"} class FakeDateTime(datetime): """ Passed to bot to force expected datetime value for test. """ + @classmethod def today(cls): return datetime.strptime('2018-09-09T01:23:45+00:00', '%Y-%m-%dT%H:%M:%S+00:00') @@ -232,7 +220,6 @@ def test_index_falls_back_to_default_string(self): self.sysconfig = {"flatten_fields": "extra", "elastic_index": "intelmq", - "elastic_doctype": "events", "rotate_index": "daily"} self.prepare_bot() @@ -257,7 +244,8 @@ def base_check_expected_index_created(self, input_event, expected_index_name): result_index_name = result["_index"] # Clean up test event and check that the index name was set correctly - self.con.delete(index=result_index_name, doc_type=self.sysconfig.get('elastic_doctype'), id=result['_id']) + self.con.delete(index=result_index_name, + id=result['_id']) self.assertEqual(result_index_name, expected_index_name) diff --git a/intelmq/tests/bots/outputs/files/test_output.py b/intelmq/tests/bots/outputs/files/test_output.py index 339c53c6e..9dcb965ae 100644 --- a/intelmq/tests/bots/outputs/files/test_output.py +++ b/intelmq/tests/bots/outputs/files/test_output.py @@ -20,7 +20,7 @@ def set_bot(cls): cls.tmp_path = pth.join(cls.base_dir.name, "incoming") def setUp(self): - self.test_output = '{"asdf":"ghjk"}' + self.test_output = json.dumps({"asdf": "ghjk"}) self.input_message = {"__type": "Event", "output": self.test_output} def tearDown(self): diff --git a/intelmq/tests/bots/outputs/misp/__init__.py b/intelmq/tests/bots/outputs/misp/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/intelmq/tests/bots/outputs/misp/test_output_api.py b/intelmq/tests/bots/outputs/misp/test_output_api.py new file mode 100644 index 000000000..81016be17 --- /dev/null +++ b/intelmq/tests/bots/outputs/misp/test_output_api.py @@ -0,0 +1,10 @@ +import os +# import unittest + +# import intelmq.lib.test as test +if os.environ.get('INTELMQ_TEST_EXOTIC'): + from intelmq.bots.outputs.misp.output_api import MISPAPIOutputBot # noqa + +# This file is a stub +# We cannot do much more as we are missing a mock MISP instance to use +# to initialise pymisp diff --git a/intelmq/tests/bots/outputs/misp/test_output_feed.py b/intelmq/tests/bots/outputs/misp/test_output_feed.py new file mode 100644 index 000000000..efcd15d8f --- /dev/null +++ b/intelmq/tests/bots/outputs/misp/test_output_feed.py @@ -0,0 +1,58 @@ +# -*- coding: utf-8 -*- +import unittest +import sys +from tempfile import TemporaryDirectory + +import intelmq.lib.test as test +from intelmq.bots.outputs.misp.output_feed import MISPFeedOutputBot + +EXAMPLE_EVENT = {"classification.type": "malware", + "destination.port": 9796, + "feed.accuracy": 100.0, + "destination.ip": "52.18.196.169", + "malware.name": "salityp2p", + "event_description.text": "Sinkhole attempted connection", + "time.source": "2016-04-19T23:16:08+00:00", + "source.ip": "152.166.119.2", + "feed.url": "http://alerts.bitsighttech.com:8080/stream?", + "source.geolocation.country": "Dominican Republic", + "time.observation": "2016-04-19T23:16:08+00:00", + "source.port": 65118, + "__type": "Event", + "feed.name": "BitSight", + "extra.non_ascii": "ççãããã\x80\ua000 \164 \x80\x80 abcd \165\166", + "raw": "eyJ0cm9qYW5mYW1pbHkiOiJTYWxpdHlwMnAiLCJlbnYiOnsic" + "mVtb3RlX2FkZHIiOiIxNTIuMTY2LjExOS4yIiwicmVtb3RlX3" + "BvcnQiOiI2NTExOCIsInNlcnZlcl9hZGRyIjoiNTIuMTguMTk" + "2LjE2OSIsInNlcnZlcl9wb3J0IjoiOTc5NiJ9LCJfdHMiOjE0" + "NjExMDc3NjgsIl9nZW9fZW52X3JlbW90ZV9hZGRyIjp7ImNvd" + "W50cnlfbmFtZSI6IkRvbWluaWNhbiBSZXB1YmxpYyJ9fQ==", + "__type": "Event", + } + + +@test.skip_exotic() +@unittest.skipIf(sys.version_info < (3, 6), + 'The MISP Feed Output Bot does require Python >= 3.6.') +class TestMISPFeedOutputBot(test.BotTestCase, unittest.TestCase): + + @classmethod + def set_bot(cls): + cls.bot_reference = MISPFeedOutputBot + cls.default_input_message = EXAMPLE_EVENT + cls.directory = TemporaryDirectory() + cls.sysconfig = {"misp_org_name": 'IntelMQTestOrg', + "misp_org_uuid": "b89da4c2-0f74-11ea-96a1-6fa873a0eb4d", + "output_dir": cls.directory.name, + "interval_event": '1 hour'} + + def test_event(self): + self.run_bot() + + @classmethod + def tearDownClass(cls): + cls.directory.cleanup() + + +if __name__ == '__main__': # pragma: no cover + unittest.main() diff --git a/intelmq/tests/bots/parsers/abusech/test_parser_ip.py b/intelmq/tests/bots/parsers/abusech/test_parser_ip.py index f28282a7f..2c6a3d3be 100644 --- a/intelmq/tests/bots/parsers/abusech/test_parser_ip.py +++ b/intelmq/tests/bots/parsers/abusech/test_parser_ip.py @@ -1,6 +1,5 @@ # -*- coding: utf-8 -*- -from datetime import datetime import os import unittest diff --git a/intelmq/tests/bots/parsers/abusech/test_parser_ransomware.py b/intelmq/tests/bots/parsers/abusech/test_parser_ransomware.py deleted file mode 100644 index a2df94f2d..000000000 --- a/intelmq/tests/bots/parsers/abusech/test_parser_ransomware.py +++ /dev/null @@ -1,96 +0,0 @@ -# -*- coding: utf-8 -*- - -import os -import unittest - -import intelmq.lib.test as test -import intelmq.lib.utils as utils -from intelmq.bots.parsers.abusech.parser_ransomware import AbuseCHRansomwaretrackerParserBot - -with open(os.path.join(os.path.dirname(__file__), 'ransomwaretracker.csv')) as handle: - EXAMPLE_FILE = handle.read() - - -EXAMPLE_REPORT = {'feed.url': 'https://ransomwaretracker.abuse.ch/feeds/csv', - 'feed.name': 'AbuseCH Ransomwaretracker', - '__type': 'Report', - 'raw': utils.base64_encode(EXAMPLE_FILE), - 'time.observation': '2016-11-16T10:18:00+00:00' - } - -EXAMPLE_EVENT = [{'feed.url': 'https://ransomwaretracker.abuse.ch/feeds/csv', - 'feed.name': 'AbuseCH Ransomwaretracker', - '__type': 'Event', - 'time.observation': '2016-11-16T10:18:00+00:00', - 'raw': 'MjAxNi0xMS0xMSAxNjowNjowMCxEaXN0cmlidXRpb24gU2l0ZSxMb2NreSxtb3RlZnVndWUuY29tLGh0dHA6Ly9tb3RlZnVndWUuY29tLzY0NWQ1LG9ubGluZSxQQUtOSUMgKFBSSVZBVEUpIExJTUlURUQsMjEzLjE3Ni4yNDEuMjMwLDEzMDU1fDc5MjIsUlV8VVM=', - 'classification.identifier': 'locky', - 'classification.type': 'c2server', - 'classification.taxonomy': "malicious code", - 'time.source': '2016-11-11T16:06:00+00:00', - 'status': 'online', - 'source.ip': '213.176.241.230', - 'source.fqdn': 'motefugue.com', - 'source.url': 'http://motefugue.com/645d5', - }, - {'feed.url': 'https://ransomwaretracker.abuse.ch/feeds/csv', - 'feed.name': 'AbuseCH Ransomwaretracker', - '__type': 'Event', - 'time.observation': '2016-11-16T10:18:00+00:00', - 'raw': 'MjAxNi0xMS0xMSAxNjowNjowMCxEaXN0cmlidXRpb24gU2l0ZSxMb2NreSxtb3RlZnVndWUuY29tLGh0dHA6Ly9tb3RlZnVndWUuY29tLzY0NWQ1LG9ubGluZSxQQUtOSUMgKFBSSVZBVEUpIExJTUlURUQsNjcuMTcxLjY1LjY0LDEzMDU1fDc5MjIsUlV8VVM=', - 'classification.identifier': 'locky', - 'classification.type': 'c2server', - 'classification.taxonomy': "malicious code", - 'time.source': '2016-11-11T16:06:00+00:00', - 'status': 'online', - 'source.ip': '67.171.65.64', - 'source.fqdn': 'motefugue.com', - 'source.url': 'http://motefugue.com/645d5', - }, - {'feed.url': 'https://ransomwaretracker.abuse.ch/feeds/csv', - 'feed.name': 'AbuseCH Ransomwaretracker', - '__type': 'Event', - 'time.observation': '2016-11-16T10:18:00+00:00', - 'raw': 'MjAxNi0xMS0xNSAxMDowNzo1OSxQYXltZW50IFNpdGUsVG9ycmVudExvY2tlcixvam1la3p3NG11anZxZWp1Lm1pbml0aWxpLmF0LGh0dHA6Ly9vam1la3p3NG11anZxZWp1Lm1pbml0aWxpLmF0LyxvbmxpbmUsLDUuNzkuOTYuMzMsNjA3ODF8MjkxODJ8MjA3MDI3fDEyNjk1LE5M', - 'classification.identifier': 'torrentlocker', - 'classification.type': 'c2server', - 'classification.taxonomy': "malicious code", - 'time.source': '2016-11-15T10:07:59+00:00', - 'status': 'online', - 'source.ip': '5.79.96.33', - 'source.fqdn': 'ojmekzw4mujvqeju.minitili.at', - 'source.url': 'http://ojmekzw4mujvqeju.minitili.at/', - }, - {'feed.url': 'https://ransomwaretracker.abuse.ch/feeds/csv', - 'feed.name': 'AbuseCH Ransomwaretracker', - '__type': 'Event', - 'time.observation': '2016-11-16T10:18:00+00:00', - 'raw': 'MjAxNi0xMS0xNSAxMDowNzo1OSxQYXltZW50IFNpdGUsVG9ycmVudExvY2tlcixvam1la3p3NG11anZxZWp1Lm1pbml0aWxpLmF0LGh0dHA6Ly9vam1la3p3NG11anZxZWp1Lm1pbml0aWxpLmF0LyxvbmxpbmUsLDAuMC4wLjAsNjA3ODF8MjkxODJ8MjA3MDI3fDEyNjk1LE5M', - 'classification.identifier': 'torrentlocker', - 'classification.type': 'c2server', - 'classification.taxonomy': "malicious code", - 'time.source': '2016-11-15T10:07:59+00:00', - 'status': 'online', - 'source.fqdn': 'ojmekzw4mujvqeju.minitili.at', - 'source.url': 'http://ojmekzw4mujvqeju.minitili.at/', - }] - - -class TestAbuseCHRansomwaretrackerParserBot(test.BotTestCase, unittest.TestCase): - """ A TestCase for AbuseCHRansomwaretrackerParserBot. """ - - @classmethod - def set_bot(cls): - cls.bot_reference = AbuseCHRansomwaretrackerParserBot - cls.default_input_message = EXAMPLE_REPORT - - def test_event(self): - """ Test if correct Event hs been produced. """ - self.run_bot() - self.assertMessageEqual(0, EXAMPLE_EVENT[0]) - self.assertMessageEqual(1, EXAMPLE_EVENT[1]) - self.assertMessageEqual(2, EXAMPLE_EVENT[2]) - self.assertMessageEqual(3, EXAMPLE_EVENT[3]) - - -if __name__ == '__main__': # pragma: no cover - unittest.main() diff --git a/intelmq/tests/bots/parsers/anubisnetworks/example_report.json b/intelmq/tests/bots/parsers/anubisnetworks/example_report.json new file mode 100644 index 000000000..b561ba58b --- /dev/null +++ b/intelmq/tests/bots/parsers/anubisnetworks/example_report.json @@ -0,0 +1,27 @@ +{ + "_geo_env_remote_addr": { + "path": "env.remote_addr", + "asn_name": "ExampleAS", + "asn": 65536, + "longitude": 13, + "latitude": 37, + "ip": "203.0.113.2", + "netmask": 24, + "country_code": "AT", + "country_name": "Austria", + "region": "Vienna", + "region_code": "01", + "city": "Vienna", + "postal_code": "1010" + }, + "_ts": 1484041530, + "qtype": "A", + "_origin": "dnsmalware", + "_provider": "spikens", + "trojanfamily": "Nivdort", + "env": { + "server_port": 80, + "remote_addr": "203.0.113.2", + "request_method": "POST" + } +} diff --git a/intelmq/tests/bots/parsers/anubisnetworks/example_report2.json b/intelmq/tests/bots/parsers/anubisnetworks/example_report2.json new file mode 100644 index 000000000..1565d0adb --- /dev/null +++ b/intelmq/tests/bots/parsers/anubisnetworks/example_report2.json @@ -0,0 +1,15 @@ +{ + "trojanfamily": "SpyApp", + "env": { + "remote_port": "52888", + "server_name": "example.net", + "server_addr": "190.124.67.212", + "request_method": "POST", + "remote_addr": "190.124.67.211", + "server_port": "80" + }, + "_ts": 1461107754, + "_geo_env_remote_addr": { + "country_name": "Dominican Republic" + } +} diff --git a/intelmq/tests/bots/parsers/anubisnetworks/example_report3.json b/intelmq/tests/bots/parsers/anubisnetworks/example_report3.json new file mode 100644 index 000000000..b93c851cc --- /dev/null +++ b/intelmq/tests/bots/parsers/anubisnetworks/example_report3.json @@ -0,0 +1,117 @@ +{ + "metadata": { + "flowbits": [ + "_mt_s", + "_mt_sa", + "_mt_a", + "_mt_p" + ] + }, + "app_proto": "http", + "_ts": 1586252714, + "src": { + "ip": "203.0.113.2", + "port": 59645 + }, + "dst": { + "ip": "203.0.113.1", + "port": 80 + }, + "malw": { + "severity": 2, + "family": "MalwName", + "variant": "MalwName", + "categories": [ + "Adware" + ] + }, + "comm": { + "proto": "http", + "http": { + "method": "GET", + "host": "example.com", + "path": "/path", + "user_agent": "Agent", + "more_headers": [ + "Connection: Keep-Alive" + ], + "cookies": "dummy=cookie|foo=bar", + "unverified_domain": "true", + "x_forwarded_for": [ + "10.26.116.22" + ] + }, + "method": "sinkhole" + }, + "tracking": { + "id": "6b49906822f2431894f295921b3c1647", + "tr": "bd592af93f1a4deca63ee18e87170374", + "last_ip": "203.0.113.3", + "first": 1572960032, + "seen": 1586318132, + "checkins": 2158, + "changes": 5, + "days": 154, + "same_ip": "true" + }, + "_origin": "infections", + "_geo_src_ip": { + "ip": "203.0.113.2", + "netmask": 24, + "country_code": "AT", + "country_name": "Austria", + "region": "Wien", + "region_code": "09", + "city": "Vienna", + "postal_code": "1210", + "latitude": 48.2993, + "longitude": 16.3479, + "asn": 1, + "asn_name": "Example AS Name", + "path": "src.ip" + }, + "_geo_tracking_last_ip": { + "ip": "203.0.113.3", + "netmask": 24, + "country_code": "AT", + "country_name": "Austria", + "region": "Wien", + "region_code": "09", + "city": "Vienna", + "postal_code": "1210", + "latitude": 48.2993, + "longitude": 16.3479, + "asn": 1, + "asn_name": "Example AS Name", + "path": "tracking.last_ip", + "dma_code": 528, + "area_code": 305, + "metro_code": 528 + }, + "_geo_comm_http_host": { + "ip": "203.0.113.4", + "netmask": 21, + "country_code": "US", + "country_name": "United States", + "latitude": 37.751, + "longitude": -97.822, + "asn": 29791, + "asn_name": "Example AS Name", + "path": "comm.http.host" + }, + "_geo_comm_http_x_forwarded_for_#1": { + "ip": "10.26.116.22", + "netmask": 24, + "country_code": "AT", + "country_name": "Austria", + "region": "Salzburg", + "region_code": "05", + "city": "Salzburg", + "postal_code": "5020", + "latitude": 47.8007, + "longitude": 13.0442, + "asn": 1901, + "asn_name": "Example AS", + "path": "comm.http.x_forwarded_for.#1" + } +} diff --git a/intelmq/tests/bots/parsers/anubisnetworks/example_report_dns.json b/intelmq/tests/bots/parsers/anubisnetworks/example_report_dns.json new file mode 100644 index 000000000..62f2ba4b1 --- /dev/null +++ b/intelmq/tests/bots/parsers/anubisnetworks/example_report_dns.json @@ -0,0 +1,44 @@ +{ + "_ts": 1586319917, + "_origin": "infections", + "malw": { + "family": "Malware name DNS", + "variant": "Malware name DNS", + "severity": 2, + "categories": [ + "Adware", + "Trojan" + ] + }, + "src": { + "ip": "203.0.113.2", + "port": 11138 + }, + "dst": { + "ip": "203.0.113.1", + "port": 53 + }, + "comm": { + "dns": { + "name": "example.com", + "qtype": "A" + }, + "method": "sinkhole", + "proto": "dns" + }, + "_geo_src_ip": { + "ip": "203.0.113.2", + "netmask": 23, + "country_code": "AT", + "country_name": "Austria", + "region": "Steiermark", + "region_code": "06", + "city": "Graz", + "postal_code": "8000", + "latitude": 47.0832, + "longitude": 15.5666, + "asn": 1, + "asn_name": "Example AS Name", + "path": "src.ip" + } +} diff --git a/intelmq/tests/bots/parsers/anubisnetworks/test_parser.py b/intelmq/tests/bots/parsers/anubisnetworks/test_parser.py index 71a295361..c1a293507 100644 --- a/intelmq/tests/bots/parsers/anubisnetworks/test_parser.py +++ b/intelmq/tests/bots/parsers/anubisnetworks/test_parser.py @@ -1,20 +1,29 @@ # -*- coding: utf-8 -*- - +import os.path import unittest import intelmq.lib.test as test +import intelmq.lib.utils as utils from intelmq.bots.parsers.anubisnetworks.parser import AnubisNetworksParserBot +with open(os.path.join(os.path.dirname(__file__), 'example_report.json')) as handle: + EXAMPLE_RAW = handle.read() +with open(os.path.join(os.path.dirname(__file__), 'example_report2.json')) as handle: + EXAMPLE2_RAW = handle.read() +with open(os.path.join(os.path.dirname(__file__), 'example_report3.json')) as handle: + EXAMPLE3_RAW = handle.read() +with open(os.path.join(os.path.dirname(__file__), 'example_report_dns.json')) as handle: + EXAMPLE_DNS_RAW = handle.read() EXAMPLE_REPORT = {"feed.url": "https://prod.cyberfeed.net/stream?key=7b7cd29c7a424b2980ca", "feed.accuracy": 100.0, "__type": "Report", "feed.name": "AnubisNetworks", - "raw": "eyJfZ2VvX2Vudl9yZW1vdGVfYWRkciI6eyJwYXRoIjoiZW52LnJlbW90ZV9hZGRyIiwiYXNuX25hbWUiOiJFeGFtcGxlQVMiLCJhc24iOjY1NTM2LCJsb25naXR1ZGUiOjEzLCJsYXRpdHVkZSI6MzcsImlwIjoiMjAzLjAuMTEzLjIiLCJuZXRtYXNrIjoyNCwiY291bnRyeV9jb2RlIjoiQVQiLCJjb3VudHJ5X25hbWUiOiJBdXN0cmlhIiwicmVnaW9uIjoiVmllbm5hIiwicmVnaW9uX2NvZGUiOiIwMSIsImNpdHkiOiJWaWVubmEiLCJwb3N0YWxfY29kZSI6IjEwMTAifSwiX3RzIjoxNDg0MDQxNTMwLCJxdHlwZSI6IkEiLCJfb3JpZ2luIjoiZG5zbWFsd2FyZSIsIl9wcm92aWRlciI6InNwaWtlbnMiLCJ0cm9qYW5mYW1pbHkiOiJOaXZkb3J0IiwiZW52Ijp7InNlcnZlcl9wb3J0Ijo4MCwicmVtb3RlX2FkZHIiOiIyMDMuMC4xMTMuMiIsInJlcXVlc3RfbWV0aG9kIjoiUE9TVCJ9fQ==", + "raw": utils.base64_encode(EXAMPLE_RAW), "time.observation": "2016-04-19T23:16:08+00:00" } - -EXAMPLE_EVENT = {"classification.type": "malware", +EXAMPLE_EVENT = {"classification.taxonomy": "malicious code", + "classification.type": "malware", "destination.port": 80, "feed.accuracy": 100.0, "malware.name": "nivdort", @@ -38,29 +47,23 @@ 'extra._provider': 'spikens', 'extra.request_method': 'POST', 'extra._origin': 'dnsmalware', + 'extra.dns_query_type': 'A', } EXAMPLE_REPORT2 = {"feed.name": "AnubisNetworks", "feed.accuracy": 100.0, "feed.url": "https://prod.cyberfeed.net/stream?key=7b7cd29c7a424b2980ca", - "raw": "eyJ0cm9qYW5mYW1pbHkiOiJTcHlBcHAiLCJlbnYiOnsicmVtb" - "3RlX3BvcnQiOiI1Mjg4OCIsInNlcnZlcl9uYW1lIjoiZGV2LX" - "VwZGF0ZS5pbmZvIiwic2VydmVyX2FkZHIiOiIxOTUuMjIuMjg" - "uMTk2IiwicmVxdWVzdF9tZXRob2QiOiJQT1NUIiwicmVtb3Rl" - "X2FkZHIiOiIxOTAuMTI0LjY3LjIxMSIsInNlcnZlcl9wb3J0I" - "joiODAifSwiX3RzIjoxNDYxMTA3NzU0LCJfZ2VvX2Vudl9yZW" - "1vdGVfYWRkciI6eyJjb3VudHJ5X25hbWUiOiJEb21pbmljYW4" - "gUmVwdWJsaWMifX0=", + "raw": utils.base64_encode(EXAMPLE2_RAW), "__type": "Report", "time.observation": "2016-04-19T23:16:10+00:00" } - -EXAMPLE_EVENT2 = {"feed.name": "AnubisNetworks", +EXAMPLE_EVENT2 = {"classification.taxonomy": "malicious code", + "feed.name": "AnubisNetworks", "malware.name": "spyapp", - "destination.fqdn": "dev-update.info", + "destination.fqdn": "example.net", "source.ip": "190.124.67.211", - "destination.ip": "195.22.28.196", + "destination.ip": "190.124.67.212", "__type": "Event", "source.geolocation.country": "Dominican Republic", "time.source": "2016-04-19T23:15:54+00:00", @@ -70,18 +73,154 @@ "feed.url": "https://prod.cyberfeed.net/stream", "destination.port": 80, "feed.accuracy": 100.0, - "raw": "eyJ0cm9qYW5mYW1pbHkiOiJTcHlBcHAiLCJlbnYiOnsicmVt" - "b3RlX3BvcnQiOiI1Mjg4OCIsInNlcnZlcl9uYW1lIjoiZGV2" - "LXVwZGF0ZS5pbmZvIiwic2VydmVyX2FkZHIiOiIxOTUuMjIu" - "MjguMTk2IiwicmVxdWVzdF9tZXRob2QiOiJQT1NUIiwicmVt" - "b3RlX2FkZHIiOiIxOTAuMTI0LjY3LjIxMSIsInNlcnZlcl9w" - "b3J0IjoiODAifSwiX3RzIjoxNDYxMTA3NzU0LCJfZ2VvX2Vu" - "dl9yZW1vdGVfYWRkciI6eyJjb3VudHJ5X25hbWUiOiJEb21p" - "bmljYW4gUmVwdWJsaWMifX0=", + "raw": EXAMPLE_REPORT2['raw'], "classification.type": "malware", "event_description.text": "Sinkhole attempted connection" } +EXAMPLE_REPORT3 = {"feed.url": "https://prod.cyberfeed.net/stream?key=7b7cd29c7a424b2980ca", + "raw": utils.base64_encode(EXAMPLE3_RAW), + "__type": "Report", + "time.observation": "2016-04-19T23:16:10+00:00" + } +EXAMPLE_EVENT3 = {"classification.taxonomy": "malicious code", + "malware.name": "malwname", + "source.ip": "203.0.113.2", + "source.port": 59645, + "__type": "Event", + "time.source": "2020-04-07T09:45:14+00:00", + "time.observation": "2016-04-19T23:16:10+00:00", + "feed.url": "https://prod.cyberfeed.net/stream", + "destination.ip": "203.0.113.1", + "destination.port": 80, + "raw": EXAMPLE_REPORT3['raw'], + "classification.type": "malware", + "event_description.text": "Sinkhole attempted connection", + "extra.metadata.flowbits": [ + "_mt_s", + "_mt_sa", + "_mt_a", + "_mt_p" + ], + "protocol.application": "http", + "extra.malware.severity": 2, + "extra.malware.categories": [ + "Adware" + ], + "extra.request_method": "GET", + "destination.fqdn": "example.com", + "destination.urlpath": "/path", + "destination.url": "http://example.com/path", + "extra.user_agent": "Agent", + "extra.communication.headers": [ + "Connection: Keep-Alive" + ], + "extra.communication.x_forwarded_for": [ + "10.26.116.22" + ], + "extra.communication.type": "sinkhole", + "extra._origin": "infections", + "source.network": "203.0.113.0/24", + "source.geolocation.cc": "AT", + "source.geolocation.country": "Austria", + "source.geolocation.region": "Wien", + "extra.source.geolocation.region_code": "09", + "source.geolocation.city": "Vienna", + "extra.source.geolocation.postal_code": "1210", + "source.geolocation.latitude": 48.2993, + "source.geolocation.longitude": 16.3479, + "source.asn": 1, + "source.as_name": "Example AS Name", + 'extra.communication.cookies': 'dummy=cookie|foo=bar', + "extra.tracking.id": "6b49906822f2431894f295921b3c1647", + "extra.tracking.last.ip": "203.0.113.3", + "extra.first_seen": 1572960032, + "extra.last_seen": 1586318132, + "extra.tracking.checkins": 2158, + "extra.tracking.changes": 5, + "extra.tracking.tr": "bd592af93f1a4deca63ee18e87170374", + "extra.days_seen": 154, + "extra.tracking.same_ip": "true", + 'extra.tracking.last.as_name': 'Example AS Name', + 'extra.tracking.last.asn': 1, + 'extra.tracking.last.geolocation.cc': 'AT', + 'extra.tracking.last.geolocation.city': 'Vienna', + 'extra.tracking.last.geolocation.country': 'Austria', + 'extra.tracking.last.geolocation.latitude': 48.2993, + 'extra.tracking.last.geolocation.longitude': 16.3479, + 'extra.tracking.last.geolocation.postal_code': '1210', + 'extra.tracking.last.geolocation.region': 'Wien', + 'extra.tracking.last.geolocation.region_code': '09', + 'extra.tracking.last.network': '203.0.113.3/24', + "extra.tracking.last.geolocation.dma_code": 528, + "extra.tracking.last.geolocation.area_code": 305, + "extra.tracking.last.geolocation.metro_code": 528, + 'extra.communication.unverified_domain': 'true', + 'extra.communication.http.host.as_name': 'Example AS Name', + 'extra.communication.http.host.asn': 29791, + 'extra.communication.http.host.geolocation.cc': 'US', + 'extra.communication.http.host.geolocation.country': 'United States', + 'extra.communication.http.host.geolocation.latitude': 37.751, + 'extra.communication.http.host.geolocation.longitude': -97.822, + 'extra.communication.http.host.network': '203.0.113.4/21', + "extra.communication.http.x_forwarded_for_#1.network": "10.26.116.22/24", + "extra.communication.http.x_forwarded_for_#1.geolocation.cc": "AT", + "extra.communication.http.x_forwarded_for_#1.geolocation.country": "Austria", + "extra.communication.http.x_forwarded_for_#1.geolocation.region": "Salzburg", + "extra.communication.http.x_forwarded_for_#1.geolocation.region_code": "05", + "extra.communication.http.x_forwarded_for_#1.geolocation.city": "Salzburg", + "extra.communication.http.x_forwarded_for_#1.geolocation.postal_code": "5020", + "extra.communication.http.x_forwarded_for_#1.geolocation.latitude": 47.8007, + "extra.communication.http.x_forwarded_for_#1.geolocation.longitude": 13.0442, + "extra.communication.http.x_forwarded_for_#1.asn": 1901, + "extra.communication.http.x_forwarded_for_#1.as_name": "Example AS", + } + + +EXAMPLE_REPORT_DNS = {"feed.url": "https://prod.cyberfeed.net/stream?key=7b7cd29c7a424b2980ca", + "raw": utils.base64_encode(EXAMPLE_DNS_RAW), + "__type": "Report", + "time.observation": "2016-04-19T23:16:10+00:00" + } +EXAMPLE_EVENT_DNS = {"classification.taxonomy": "malicious code", + "malware.name": "malware name dns", + "source.ip": "203.0.113.2", + "source.port": 11138, + "__type": "Event", + 'time.source': '2020-04-08T04:25:17+00:00', + "time.observation": "2016-04-19T23:16:10+00:00", + "feed.url": "https://prod.cyberfeed.net/stream", + "destination.ip": "203.0.113.1", + "destination.port": 53, + "raw": EXAMPLE_REPORT_DNS['raw'], + "classification.type": "malware", + "classification.identifier": "Malware name DNS", + "event_description.text": "Sinkhole attempted connection", + "protocol.application": "dns", + "extra.malware.severity": 2, + "extra.malware.categories": [ + "Adware", + "Trojan" + ], + "destination.fqdn": "example.com", + "extra.communication.type": "sinkhole", + "extra._origin": "infections", + "source.network": "203.0.112.0/23", + "source.geolocation.cc": "AT", + "source.geolocation.country": "Austria", + "source.geolocation.region": "Steiermark", + "extra.source.geolocation.region_code": "06", + "source.geolocation.city": "Graz", + "extra.source.geolocation.postal_code": "8000", + "source.geolocation.latitude": 47.0832, + "source.geolocation.longitude": 15.5666, + "source.asn": 1, + "source.as_name": "Example AS Name", + 'extra.dns_query_type': 'A', + } +EMPTY_REPORT = EXAMPLE_REPORT.copy() +EMPTY_REPORT['raw'] = 'Cg==' + class TestAnubisNetworksParserBot(test.BotTestCase, unittest.TestCase): @@ -93,7 +232,6 @@ def set_bot(cls): def test_event(self): """ Test: report without fqdn """ self.run_bot() - self.assertMessageEqual(0, EXAMPLE_EVENT) def test_with_fqdn(self): @@ -102,6 +240,24 @@ def test_with_fqdn(self): self.run_bot() self.assertMessageEqual(0, EXAMPLE_EVENT2) + def test_third(self): + """ Test: report from 2020 """ + self.input_message = EXAMPLE_REPORT3 + self.run_bot(parameters={'use_malware_familiy_as_classification_identifier': False}) + self.assertMessageEqual(0, EXAMPLE_EVENT3) + + def test_dns(self): + """ Test: report with DNS data """ + self.input_message = EXAMPLE_REPORT_DNS + self.run_bot() + self.assertMessageEqual(0, EXAMPLE_EVENT_DNS) + + def test_empty(self): + """ Test empty line as input """ + self.input_message = EMPTY_REPORT + self.run_bot() + self.assertOutputQueueLen(0) + if __name__ == '__main__': # pragma: no cover unittest.main() diff --git a/intelmq/tests/bots/parsers/bitcash/blacklist b/intelmq/tests/bots/parsers/bitcash/blacklist deleted file mode 100644 index bfef40b33..000000000 --- a/intelmq/tests/bots/parsers/bitcash/blacklist +++ /dev/null @@ -1,4 +0,0 @@ -# Bitcash auto-blacklisting by carlos@bitcash.cz -# IPs banned for serious abusing of our services (scanning, sniffing, harvesting, dos attacks) -81.95.123.209 # npvpn.dco.fusa.be last access 2016-10-01 00:15:01 -194.213.39.138 # 194.213.39.138 last access 2016-10-01 17:10:01 diff --git a/intelmq/tests/bots/parsers/bitcash/test_parser.py b/intelmq/tests/bots/parsers/bitcash/test_parser.py deleted file mode 100644 index c3eb88478..000000000 --- a/intelmq/tests/bots/parsers/bitcash/test_parser.py +++ /dev/null @@ -1,57 +0,0 @@ -# -*- coding: utf-8 -*- - -import os -import unittest - -import intelmq.lib.test as test -import intelmq.lib.utils as utils - -from intelmq.bots.parsers.bitcash.parser import BitcashBlocklistParserBot - -with open(os.path.join(os.path.dirname(__file__), 'blacklist')) as handle: - EXAMPLE_FILE = handle.read() - -EXAMPLE_REPORT = {'feed.url': 'http://bitcash.cz/misc/log/blacklist', - 'feed.name': 'bitcash_blocklist', - '__type': 'Report', - 'raw': utils.base64_encode(EXAMPLE_FILE), - 'time.observation': '2016-11-21T20:05:54+00:00' - } - -EXAMPLE_EVENT = [{'feed.url': 'http://bitcash.cz/misc/log/blacklist', - 'feed.name': 'bitcash_blocklist', - 'time.source': '2016-10-01T00:15:01+00:00', - 'source.ip': '81.95.123.209', - 'source.reverse_dns': 'npvpn.dco.fusa.be', - 'classification.type': 'scanner', - 'event_description.text': 'IPs banned for serious abusing of Bitcash services (scanning, sniffing, harvesting, dos attacks)', - 'raw': 'ODEuOTUuMTIzLjIwOSwjLG5wdnBuLmRjby5mdXNhLmJlLGxhc3QsYWNjZXNzLDIwMTYtMTAtMDEsMDA6MTU6MDE=', - '__type': 'Event' - }, - {'feed.url': 'http://bitcash.cz/misc/log/blacklist', - 'feed.name': 'bitcash_blocklist', - 'time.source': '2016-10-01T17:10:01+00:00', - 'source.ip': '194.213.39.138', - 'classification.type': 'scanner', - 'event_description.text': 'IPs banned for serious abusing of Bitcash services (scanning, sniffing, harvesting, dos attacks)', - 'raw': 'MTk0LjIxMy4zOS4xMzgsIywxOTQuMjEzLjM5LjEzOCxsYXN0LGFjY2VzcywyMDE2LTEwLTAxLDE3OjEwOjAx', - '__type': 'Event' - }] - - -class TestBitcashBlocklistParserBot(test.BotTestCase, unittest.TestCase): - """ A TestCase of BitcashBlockListParserBot """ - - @classmethod - def set_bot(cls): - cls.bot_reference = BitcashBlocklistParserBot - cls.default_input_message = EXAMPLE_REPORT - - def test_event(self): - """ Test if correct Events have been produced """ - self.run_bot() - self.assertMessageEqual(0, EXAMPLE_EVENT[0]) - self.assertMessageEqual(1, EXAMPLE_EVENT[1]) - -if __name__ == '__main__': # pragma: no cover - unittest.main() diff --git a/intelmq/tests/bots/parsers/cert_eu/test_parser_csv.py b/intelmq/tests/bots/parsers/cert_eu/test_parser_csv.py index cb4798a31..e0b03a31a 100644 --- a/intelmq/tests/bots/parsers/cert_eu/test_parser_csv.py +++ b/intelmq/tests/bots/parsers/cert_eu/test_parser_csv.py @@ -30,7 +30,6 @@ "time.source": "2019-04-01T03:13:20+00:00", "tlp": "AMBER", "event_description.text": "A URL is the most common resource with reference to malware binary distribution.", - "source.geolocation.country": "Germany", 'source.asn': 65536, 'source.geolocation.city': 'Linz', 'source.geolocation.country': 'Austria', diff --git a/intelmq/tests/bots/parsers/cymru/certname_20190327.txt b/intelmq/tests/bots/parsers/cymru/certname_20190327.txt index b7f8d3bd1..36a5ba8f4 100644 --- a/intelmq/tests/bots/parsers/cymru/certname_20190327.txt +++ b/intelmq/tests/bots/parsers/cymru/certname_20190327.txt @@ -13,6 +13,7 @@ phishing|172.16.0.21|64496|2019-03-20 13:03:18|http://www.example.com/;|Example proxy|172.16.0.21|64496|2019-03-25 16:00:00|proxy_type: http-34320;|Example AS Name, AT proxy|172.16.0.21|64496|2019-03-25 10:38:00|socks4-61039;|Example AS Name, AT openresolvers|172.16.0.21|64496|2019-03-25 06:29:38||Example AS Name, AT +openresolver|172.16.0.21|64496|2020-06-08 18:28:35||Example AS Name, AT proxy|172.16.0.21|64496|2019-09-11 08:05:00|proxy_type: httppost;|Example AS Name, AT bot|172.16.0.21|64496|2019-09-11 16:39:57|srcport 61458 mwtype Conficker dstaddr 172.16.0.22|Example AS Name, AT bot|172.16.0.21|64496|2019-09-11 00:31:30|family: azorult;dest_addr: 172.16.0.22; dest_port: 80;port: 15390;protocol: 6;|Example AS Name, AT @@ -26,3 +27,5 @@ scanner|172.16.0.21|64496|2019-09-19 00:03:13|destination_port_numbers: 57518;po darknet|172.16.0.21|64496|2019-09-30 13:49:49|destination_port_numbers: 17875,24526,54449,9314,4903,1568,20749,30524,59316,60704 (total_count:19);port: 40434;protocol: 17;|Example AS Name, AT spam|172.16.0.21|64496|2019-10-02 23:00:17||Example AS Name, AT phishing|172.16.0.21|64496|2019-10-23 12:46:18||Example AS Name, AT +darknet|172.16.0.21|64496|2020-01-10 09:17:17|destination_port_numbers: 0;protocol: 11;|Example AS Name, AT +conficker|172.16.0.21|64496|2020-05-08 09:13:34|srcport: 1997; destaddr: 172.16.0.22;|Example AS Name, AT diff --git a/intelmq/tests/bots/parsers/cymru/test_cap_program.py b/intelmq/tests/bots/parsers/cymru/test_cap_program.py index 4950a26f8..659b52dba 100644 --- a/intelmq/tests/bots/parsers/cymru/test_cap_program.py +++ b/intelmq/tests/bots/parsers/cymru/test_cap_program.py @@ -17,7 +17,6 @@ 'time.observation': '2015-11-01T00:01:45+00:05', } EVENT0 = {'__type': 'Event', - 'time.source': '2017-10-31 10:00:00', 'time.observation': '2015-11-01T00:01:45+00:05', 'classification.identifier': 'ssh', 'classification.type': 'brute-force', diff --git a/intelmq/tests/bots/parsers/cymru/test_cap_program_new.py b/intelmq/tests/bots/parsers/cymru/test_cap_program_new.py index 82929842a..52efac728 100644 --- a/intelmq/tests/bots/parsers/cymru/test_cap_program_new.py +++ b/intelmq/tests/bots/parsers/cymru/test_cap_program_new.py @@ -98,6 +98,11 @@ 'classification.identifier': 'dns-open-resolver', 'protocol.application': 'dns', }, + {'time.source': '2020-06-08T18:28:35+00:00', + 'classification.type': 'vulnerable service', + 'classification.identifier': 'dns-open-resolver', + 'protocol.application': 'dns', + }, {'time.source': '2019-09-11T08:05:00+00:00', 'classification.type': 'proxy', 'classification.identifier': 'openproxy', @@ -180,10 +185,24 @@ 'classification.type': 'phishing', 'classification.identifier': 'phishing', }, + {'classification.type': 'scanner', + 'classification.identifier': 'darknet', + 'protocol.transport': 'nvp-ii', + 'destination.port': 0, + 'time.source': '2020-01-10T09:17:17+00:00', + }, + {'classification.type': 'infected-system', + 'classification.identifier': 'conficker', + 'malware.name': 'conficker', + 'source.port': 1997, + 'destination.ip': '172.16.0.22', + 'time.source': '2020-05-08T09:13:34+00:00', + }, ] + # The number of events a single line in the raw data produces NUM_EVENTS = [1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 10, 1, 1] + 1, 1, 10, 1, 1, 1, 1] RAWS = [] for i, line in enumerate(RAW_LINES[3:]): for count in range(NUM_EVENTS[i]): diff --git a/intelmq/tests/bots/parsers/cymru/test_full_bogons.py b/intelmq/tests/bots/parsers/cymru/test_full_bogons.py index 1f9bdaaff..63ae52e5d 100644 --- a/intelmq/tests/bots/parsers/cymru/test_full_bogons.py +++ b/intelmq/tests/bots/parsers/cymru/test_full_bogons.py @@ -17,7 +17,7 @@ 'source.network': '0.0.0.0/8', 'classification.type': 'blacklist', 'time.observation': '2015-11-01T00:01:45+00:05', - 'raw': 'MC4wLjAuMC84', + 'raw': 'IyBsYXN0IHVwZGF0ZWQgMTQ1MDE5MzcwMiAoVHVlIERlYyAxNSAxNTozNTowMiAyMDE1IEdNVCkKMC4wLjAuMC84', } EVENT2 = {'__type': 'Event', 'feed.url': 'https://www.team-cymru.org/Services/Bogons/fullbogons-ipv4.txt', @@ -25,7 +25,20 @@ 'source.network': '2.56.0.0/14', 'classification.type': 'blacklist', 'time.observation': '2015-11-01T00:01:45+00:05', - 'raw': 'Mi41Ni4wLjAvMTQ=' + 'raw': 'IyBsYXN0IHVwZGF0ZWQgMTQ1MDE5MzcwMiAoVHVlIERlYyAxNSAxNTozNTowMiAyMDE1IEdNVCkKMi41Ni4wLjAvMTQ=' + } +V6REPO = {'__type': 'Report', + 'feed.url': 'https://www.team-cymru.org/Services/Bogons/fullbogons-ipv6.txt', + 'raw': 'IyBsYXN0IHVwZGF0ZWQgMTU4NTE0MDYwMSAoV2VkIE1hciAyNSAxMjo1MDowMSAyMDIwIEdNVCkKOjovOAoxMDA6Oi84Cg==', + 'time.observation': '2020-03-25T16:42:45+00:00', + } +V6EVEN = {'__type': 'Event', + 'feed.url': 'https://www.team-cymru.org/Services/Bogons/fullbogons-ipv6.txt', + 'time.source': '2020-03-25T12:50:01+00:00', + 'source.network': '::/8', + 'classification.type': 'blacklist', + 'time.observation': '2020-03-25T16:42:45+00:00', + 'raw': 'IyBsYXN0IHVwZGF0ZWQgMTU4NTE0MDYwMSAoV2VkIE1hciAyNSAxMjo1MDowMSAyMDIwIEdNVCkKOjovOA==', } @@ -37,14 +50,19 @@ class TestCymruFullBogonsParserBot(test.BotTestCase, unittest.TestCase): @classmethod def set_bot(cls): cls.bot_reference = CymruFullBogonsParserBot - cls.default_input_message = {'__type': 'Report', 'raw': 'Cg=='} - def test_events(self): - """ Test if correct Events have been produced. """ + def test_ipv4_events(self): + """ Test if correct IPv4 Events have been produced. """ self.input_message = REPORT self.run_bot() self.assertMessageEqual(0, EVENT1) self.assertMessageEqual(1, EVENT2) + def test_ipv6_events(self): + """ Test if correct IPv6 Events have been produced. """ + self.input_message = V6REPO + self.run_bot() + self.assertMessageEqual(0, V6EVEN) + if __name__ == '__main__': # pragma: no cover unittest.main() diff --git a/intelmq/tests/bots/parsers/fraunhofer/ddosattack_tests_common.py b/intelmq/tests/bots/parsers/fraunhofer/ddosattack_tests_common.py index bf29e04d2..e55b9a3e7 100644 --- a/intelmq/tests/bots/parsers/fraunhofer/ddosattack_tests_common.py +++ b/intelmq/tests/bots/parsers/fraunhofer/ddosattack_tests_common.py @@ -1,6 +1,5 @@ # -*- coding: utf-8 -*- import json -import unittest from intelmq.lib import test, utils diff --git a/intelmq/tests/bots/parsers/fraunhofer/test_parser_ddosattack_cnc.py b/intelmq/tests/bots/parsers/fraunhofer/test_parser_ddosattack_cnc.py deleted file mode 100644 index 4d6101ef0..000000000 --- a/intelmq/tests/bots/parsers/fraunhofer/test_parser_ddosattack_cnc.py +++ /dev/null @@ -1,112 +0,0 @@ -# -*- coding: utf-8 -*- -import unittest - -from intelmq.bots.parsers.fraunhofer.parser_ddosattack_cnc import \ - FraunhoferDdosAttackCncParserBot -from intelmq.tests.bots.parsers.fraunhofer.ddosattack_tests_common import \ - FraunhoferDdosAttackTestCase, ddos_message, create_event - - -CNC_EVENT_TEMPLATE = { - 'feed.url': 'https://feed.caad.fkie.fraunhofer.de/ddosattackfeed', - 'feed.name': 'Fraunhofer DDoS Attack Feed', - '__type': 'Event', - 'classification.type': 'c2server', - 'classification.taxonomy': 'malicious code', - 'malware.name': 'some_malware', - 'time.source': '2018-02-05T10:15:42+00:00', -} - - -class TestFraunhoferDdosAttackCncParserBot(FraunhoferDdosAttackTestCase, unittest.TestCase): - """ - A TestCase for a FraunhoferDdosAttackCncParserBot. - """ - - @classmethod - def set_bot(cls): - cls.bot_reference = FraunhoferDdosAttackCncParserBot - - def test_classic_cnc_message_results_correct_cnc_event(self): - message = ddos_message() - self.set_input_message(message) - - self.run_bot() - - self.assertOutputQueueLen(1) - self.assert_cnc_event(0, message, {'source.ip': '1.2.3.4', - 'source.port': 4711}) - - def test_set_fqdn_when_cnc_domain_given(self): - message = ddos_message(domain='evil.com') - self.set_input_message(message) - - self.run_bot() - - self.assert_cnc_event(0, message, { - 'source.fqdn': 'evil.com', - 'source.ip': '1.2.3.4', - 'source.port': 4711 - }) - - def test_classic_cnc_message_with_unknown_messagetype_results_in_cnc_event_with_unknown_message_type_accuracy(self): - self.sysconfig = {'unknown_messagetype_accuracy': 11.0} - message = ddos_message( - message='content of unknown message', - messagetype='unknown_messagetype', - ) - self.set_input_message(message) - - self.run_bot() - - self.assertOutputQueueLen(1) - self.assert_cnc_event( - queue_pos=0, - original_message=message, - expected_fields={'feed.accuracy': 11.0, 'source.ip': '1.2.3.4', - 'source.port': 4711} - ) - - def test_message_with_unknown_cnc_type_results_in_no_events(self): - message = ddos_message( - cnc='some_cnc_information', - cnctype='some_unknown_cnc_type', - message='content of unknown message', - messagetype='unknown_messagetype', - ) - self.set_input_message(message) - self.allowed_error_count = 1 - - self.run_bot() - - self.assertRegexpMatchesLog('(ValueError.*unsupported cnctype ' - 'some_unknown_cnc_type)') - self.assertOutputQueueLen(0) - - def test_multiple_messages_are_parsed_correctly(self): - single_target = ddos_message() - unknown_message = ddos_message( - message='content of unknown message', - messagetype='unknown_messagetype', - ) - self.set_input_message( - single_target, - unknown_message - ) - - self.run_bot(iterations=2) - - self.assertOutputQueueLen(2) - self.assert_cnc_event(0, single_target, {'source.ip': '1.2.3.4', - 'source.port': 4711}) - self.assert_cnc_event(1, unknown_message, {'source.ip': '1.2.3.4', - 'source.port': 4711}) - - def assert_cnc_event(self, queue_pos, original_message, expected_fields): - event = create_event(CNC_EVENT_TEMPLATE, expected_fields, - original_message) - self.assertMessageEqual(queue_pos, event) - - -if __name__ == '__main__': # pragma: no cover - unittest.main() diff --git a/intelmq/tests/bots/parsers/fraunhofer/test_parser_ddosattack_target.py b/intelmq/tests/bots/parsers/fraunhofer/test_parser_ddosattack_target.py deleted file mode 100644 index 2323dbeb7..000000000 --- a/intelmq/tests/bots/parsers/fraunhofer/test_parser_ddosattack_target.py +++ /dev/null @@ -1,97 +0,0 @@ -# -*- coding: utf-8 -*- -import unittest - -from intelmq.bots.parsers.fraunhofer.parser_ddosattack_target import \ - FraunhoferDdosAttackTargetParserBot -from intelmq.tests.bots.parsers.fraunhofer.ddosattack_tests_common import \ - FraunhoferDdosAttackTestCase, ddos_message, create_event - - -DDOS_TARGET_EVENT_TEMPLATE = { - 'feed.url': 'https://feed.caad.fkie.fraunhofer.de/ddosattackfeed', - 'feed.name': 'Fraunhofer DDoS Attack Feed', - '__type': 'Event', - 'classification.type': 'ddos', - 'malware.name': 'some_malware', - 'classification.taxonomy': 'availability', - 'time.source': '2018-02-05T10:15:42+00:00', -} - - -class TestFraunhoferDdosAttackTargetParserBot(FraunhoferDdosAttackTestCase, unittest.TestCase): - """ - A TestCase for a FraunhoferDdosAttackTargetParserBot. - """ - - @classmethod - def set_bot(cls): - cls.bot_reference = FraunhoferDdosAttackTargetParserBot - - def test_cnc_message_with_single_ddos_target_results_in_correct_ddos_event(self): - message = ddos_message() - self.set_input_message(message) - - self.run_bot() - - self.assertOutputQueueLen(1) - self.assert_ddos_event(0, message, {'destination.ip': '4.3.2.1'}) - - def test_cnc_message_with_multiple_ddos_targets_results_in_multiple_ddos_events(self): - message = ddos_message( - targets=['4.3.2.1/32', '4.3.2.2', '4.3.2.3/24', 'sometarget.com'] - ) - self.set_input_message(message) - - self.run_bot() - - self.assertOutputQueueLen(4) - self.assert_ddos_event(0, message, {'destination.ip': '4.3.2.1'}) - self.assert_ddos_event(1, message, {'destination.ip': '4.3.2.2'}) - self.assert_ddos_event(2, message, {'destination.network': - '4.3.2.0/24'}) - self.assert_ddos_event(3, message, {'destination.fqdn': - 'sometarget.com'}) - - def test_cnc_message_with_unknown_messagetype_results_in_no_ddos_event(self): - message = ddos_message( - message='content of unknown message', - messagetype='unknown_messagetype', - ) - self.set_input_message(message) - self.allowed_error_count = 1 - - self.run_bot() - - self.assertRegexpMatchesLog('(ValueError.*unsupported messagetype ' - 'unknown_messagetype)') - self.assertOutputQueueLen(0) - - def test_multiple_messages_are_parsed_correctly(self): - single_target = ddos_message() - multi_target = ddos_message( - targets=['4.3.2.1/32', '4.3.2.2', '4.3.2.3/24', 'sometarget.com'] - ) - self.set_input_message( - single_target, - multi_target - ) - - self.run_bot(iterations=2) - - self.assertOutputQueueLen(5) - self.assert_ddos_event(0, single_target, {'destination.ip': '4.3.2.1'}) - self.assert_ddos_event(1, multi_target, {'destination.ip': '4.3.2.1'}) - self.assert_ddos_event(2, multi_target, {'destination.ip': '4.3.2.2'}) - self.assert_ddos_event(3, multi_target, {'destination.network': - '4.3.2.0/24'}) - self.assert_ddos_event(4, multi_target, {'destination.fqdn': - 'sometarget.com'}) - - def assert_ddos_event(self, queue_pos, original_message, expected_fields): - event = create_event(DDOS_TARGET_EVENT_TEMPLATE, expected_fields, - original_message) - self.assertMessageEqual(queue_pos, event) - - -if __name__ == '__main__': # pragma: no cover - unittest.main() diff --git a/intelmq/tests/bots/parsers/generic/test_parser_csv_extra_regex.py b/intelmq/tests/bots/parsers/generic/test_parser_csv_extra_regex.py index 890d8c264..b592bfa6b 100644 --- a/intelmq/tests/bots/parsers/generic/test_parser_csv_extra_regex.py +++ b/intelmq/tests/bots/parsers/generic/test_parser_csv_extra_regex.py @@ -16,8 +16,7 @@ "__type": "Report", "time.observation": "2015-01-01T00:00:00+00:00", } -EXAMPLE_EVENT = {"feed.name": "Sample CSV Feed", - "__type": "Event", +EXAMPLE_EVENT = {"__type": "Event", "raw": utils.base64_encode(SAMPLE_SPLIT[0] + '\r\n' + SAMPLE_SPLIT[1].replace('"', '')+'\r\n'), "time.observation": "2015-01-01T00:00:00+00:00", @@ -37,8 +36,7 @@ "source.asn": 65536, "time.source": "2017-03-25T23:59:43+00:00" } -EXAMPLE_EVENT2 = {"feed.name": "Sample CSV Feed", - "__type": "Event", +EXAMPLE_EVENT2 = {"__type": "Event", "raw": utils.base64_encode(SAMPLE_SPLIT[0] + '\r\n' + SAMPLE_SPLIT[2].replace('"', '')+'\r\n'), "time.observation": "2015-01-01T00:00:00+00:00", diff --git a/intelmq/tests/bots/parsers/generic/test_parser_multivalue_cols.py b/intelmq/tests/bots/parsers/generic/test_parser_multivalue_cols.py index 25cc27e3c..f417acd4c 100644 --- a/intelmq/tests/bots/parsers/generic/test_parser_multivalue_cols.py +++ b/intelmq/tests/bots/parsers/generic/test_parser_multivalue_cols.py @@ -5,7 +5,6 @@ import intelmq.lib.utils as utils import intelmq.lib.test as test -import intelmq.lib.exceptions as exceptions from intelmq.bots.parsers.generic.parser_csv import GenericCsvParserBot with open(os.path.join(os.path.dirname(__file__), 'multivalue_columns.csv')) as handle: diff --git a/intelmq/tests/bots/parsers/github_feed/__init__.py b/intelmq/tests/bots/parsers/github_feed/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/intelmq/tests/bots/parsers/github_feed/example_ioc_strangereal_intel.json b/intelmq/tests/bots/parsers/github_feed/example_ioc_strangereal_intel.json new file mode 100644 index 000000000..ad3a93585 --- /dev/null +++ b/intelmq/tests/bots/parsers/github_feed/example_ioc_strangereal_intel.json @@ -0,0 +1,22 @@ +[ + { + "Indicator": "A7A849895CFB3C24A599ABA4CA4BE666D149E60765F7801C47C18D9D8035E826", + "Description": "Document" + }, + { + "Indicator": "35dc4564216d51e64e40bc0eef932d2e", + "Description": "payroll-123456.xls" + }, + { + "Indicator": "hack.mds.dom", + "Description": "" + }, + { + "Indicator": "10.0.0.1", + "Description": "IP C2" + }, + { + "Indicator": "https://10.2.2.1/malicious.docm", + "Description": "URL" + } +] \ No newline at end of file diff --git a/intelmq/tests/bots/parsers/github_feed/test_parser.py b/intelmq/tests/bots/parsers/github_feed/test_parser.py new file mode 100644 index 000000000..8d31d4007 --- /dev/null +++ b/intelmq/tests/bots/parsers/github_feed/test_parser.py @@ -0,0 +1,109 @@ +#!/usr/bin/env python3 + +import json +import os +import unittest + +import intelmq.lib.test as test +import intelmq.lib.utils as utils +from intelmq.bots.parsers.github_feed.parser import GithubFeedParserBot + +with open(os.path.join(os.path.dirname(__file__), 'example_ioc_strangereal_intel.json')) as handle: + EXAMPLE_STRANGERINTEL_FILE_CONTENTS = handle.read() + EXAMPLE_STRANGERINTEL_FILE_JSON = json.loads(EXAMPLE_STRANGERINTEL_FILE_CONTENTS) + +EXAMPLE_STRANGEREALINTEL_REPORT = { + "feed.url": "https://raw.githubusercontent.com/StrangerealIntel/DailyIOC/master/02-12-19/JSON/IOC_TA505_Nov19_2.json", + "feed.name": "Strangereal Intel DailyIOC", + "time.observation": "2019-03-01T01:01:01+00:00", + "__type": "Report", + "raw": utils.base64_encode(EXAMPLE_STRANGERINTEL_FILE_CONTENTS) +} + +EXAMPLE_STRANGEREALINTEL_EVENT = { + "feed.url": "https://raw.githubusercontent.com/StrangerealIntel/DailyIOC/master/02-12-19/JSON/IOC_TA505_Nov19_2.json", + "feed.name": "Strangereal Intel DailyIOC", + "time.observation": "2019-03-01T01:01:01+00:00", + "classification.taxonomy": "other", + "classification.type": "unknown", + "__type": "Event" +} + + +@test.skip_exotic() +class TestGithubFeedParserBot(test.BotTestCase, unittest.TestCase): + """ + A TestCase for GithubFeedParserBot. + """ + + @classmethod + def set_bot(cls): + cls.bot_reference = GithubFeedParserBot + cls.default_input_message = EXAMPLE_STRANGEREALINTEL_REPORT + + def test_no_processing_is_executed_for_the_feed_is_unknown(self): + wrong_report = EXAMPLE_STRANGEREALINTEL_REPORT.copy() + wrong_report['feed.url'] = 'https://raw.githubusercontent.com/DummyUser/NonexistingFeed/master/02-12-19/JSON/IOC_TA505_Nov19_2.json' + + self.input_message = wrong_report + self.allowed_error_count = 1 + self.run_bot() + + self.assertRegexpMatchesLog("Unknown feed '{}'.".format(wrong_report['feed.url'])) + + def test_extra_fields_are_present_in_generated_event(self): + custom_report = EXAMPLE_STRANGEREALINTEL_REPORT.copy() + custom_report['extra.file_metadata'] = { + 'sha': 'e345678934567893456789', + 'size': 111 + } + + self.input_message = custom_report + self.run_bot() + + for event in self.get_output_queue(): + assert 'extra.file_metadata.sha' in event and 'extra.file_metadata.size' in event + + def test_strangerealintel_feed_processing_is_successful(self): + self.run_bot() + + self.assertOutputQueueLen(len(EXAMPLE_STRANGERINTEL_FILE_JSON)) + + sha256_event = EXAMPLE_STRANGEREALINTEL_EVENT.copy() + sha256_event['malware.hash.sha256'] = EXAMPLE_STRANGERINTEL_FILE_JSON[0]['Indicator'] + sha256_event['event_description.text'] = EXAMPLE_STRANGERINTEL_FILE_JSON[0]['Description'] + sha256_event['classification.taxonomy'] = 'malicious code' + sha256_event['classification.type'] = 'malware' + sha256_event['raw'] = utils.base64_encode(str(EXAMPLE_STRANGERINTEL_FILE_JSON[0])) + self.assertMessageEqual(0, sha256_event) + + md5_event = EXAMPLE_STRANGEREALINTEL_EVENT.copy() + md5_event['malware.hash.md5'] = EXAMPLE_STRANGERINTEL_FILE_JSON[1]['Indicator'] + md5_event['event_description.text'] = EXAMPLE_STRANGERINTEL_FILE_JSON[1]['Description'] + md5_event['classification.taxonomy'] = 'malicious code' + md5_event['classification.type'] = 'malware' + md5_event['raw'] = utils.base64_encode(str(EXAMPLE_STRANGERINTEL_FILE_JSON[1])) + self.assertMessageEqual(1, md5_event) + + domain_event = EXAMPLE_STRANGEREALINTEL_EVENT.copy() + domain_event['source.fqdn'] = EXAMPLE_STRANGERINTEL_FILE_JSON[2]['Indicator'] + # description text is empty so no field is created + # domain_event['event_description.text'] = EXAMPLE_STRANGERINTEL_FILE_JSON[2]['Description'] + domain_event['raw'] = utils.base64_encode(str(EXAMPLE_STRANGERINTEL_FILE_JSON[2])) + self.assertMessageEqual(2, domain_event) + + ip_event = EXAMPLE_STRANGEREALINTEL_EVENT.copy() + ip_event['source.ip'] = EXAMPLE_STRANGERINTEL_FILE_JSON[3]['Indicator'] + ip_event['event_description.text'] = EXAMPLE_STRANGERINTEL_FILE_JSON[3]['Description'] + ip_event['raw'] = utils.base64_encode(str(EXAMPLE_STRANGERINTEL_FILE_JSON[3])) + self.assertMessageEqual(3, ip_event) + + url_event = EXAMPLE_STRANGEREALINTEL_EVENT.copy() + url_event['source.url'] = EXAMPLE_STRANGERINTEL_FILE_JSON[4]['Indicator'] + url_event['event_description.text'] = EXAMPLE_STRANGERINTEL_FILE_JSON[4]['Description'] + url_event['raw'] = utils.base64_encode(str(EXAMPLE_STRANGERINTEL_FILE_JSON[4])) + self.assertMessageEqual(4, url_event) + + +if __name__ == '__main__': # pragma: no cover + unittest.main() diff --git a/intelmq/tests/bots/parsers/html_table/test_parser_column_split.py b/intelmq/tests/bots/parsers/html_table/test_parser_column_split.py index 3b4e6878a..9a5cba784 100644 --- a/intelmq/tests/bots/parsers/html_table/test_parser_column_split.py +++ b/intelmq/tests/bots/parsers/html_table/test_parser_column_split.py @@ -1,4 +1,5 @@ # -*- coding: utf-8 -*- +import datetime import os import unittest @@ -10,6 +11,7 @@ with open(os.path.join(os.path.dirname(__file__), 'html_table_column_split.data')) as handle: SAMPLE_FILE = handle.read() +THIS_YEAR = datetime.date.today().year EXAMPLE_REPORT = {"feed.name": "HTML Table Feed", "feed.url": "http://vxvault.net/ViriList.php", "raw": utils.base64_encode(SAMPLE_FILE), @@ -20,7 +22,7 @@ "feed.url": "http://vxvault.net/ViriList.php", "__type": "Event", "source.url": "http://lingvaworld.ru/media/system/css/messg.jpg", - "time.source": "2019-02-15T00:00:00+00:00", + "time.source": "%d-02-15T00:00:00+00:00" % THIS_YEAR, "classification.type": "malware", "source.ip": "81.177.135.172", "time.observation": "2019-01-01T00:00:00+00:00", diff --git a/intelmq/tests/bots/parsers/html_table/test_parser_with_attribute.py b/intelmq/tests/bots/parsers/html_table/test_parser_with_attribute.py index c0be2ca4b..6bac311c8 100644 --- a/intelmq/tests/bots/parsers/html_table/test_parser_with_attribute.py +++ b/intelmq/tests/bots/parsers/html_table/test_parser_with_attribute.py @@ -1,4 +1,5 @@ # -*- coding: utf-8 -*- +import datetime import os import unittest @@ -10,6 +11,7 @@ with open(os.path.join(os.path.dirname(__file__), 'html_table_with_attribute.data')) as handle: SAMPLE_FILE = handle.read() +THIS_YEAR = datetime.date.today().year EXAMPLE_REPORT = {"feed.name": "HTML Table Feed", "raw": utils.base64_encode(SAMPLE_FILE), "__type": "Report", @@ -28,7 +30,7 @@ "jE3MsKgPC90ZD4KPC90cj4=", "source.ip": "81.177.135.172", "source.url": "http://lingvaworld.ru/media/system/css/messg.jpg", - "time.source": "2019-02-15T00:00:00+00:00"} + "time.source": "%d-02-15T00:00:00+00:00" % THIS_YEAR} EXAMPLE_EVENT1 = {"feed.name": "HTML Table Feed", "__type": "Event", diff --git a/intelmq/tests/bots/parsers/malwaredomains/domains.txt b/intelmq/tests/bots/parsers/malwaredomains/domains.txt index ce28444bd..ce7983178 100644 --- a/intelmq/tests/bots/parsers/malwaredomains/domains.txt +++ b/intelmq/tests/bots/parsers/malwaredomains/domains.txt @@ -4,3 +4,4 @@ ## notice notice duplication is not permitted #=comment example.com phishing openphish.com 20160527 20160108 example.invalid phishing openphish.com 20160527 20160108 + example.net C&C source.example.com 20171201 20160719 20160310 diff --git a/intelmq/tests/bots/parsers/malwaredomains/test_parser.py b/intelmq/tests/bots/parsers/malwaredomains/test_parser.py index b0a59ab04..a25fbc1c1 100644 --- a/intelmq/tests/bots/parsers/malwaredomains/test_parser.py +++ b/intelmq/tests/bots/parsers/malwaredomains/test_parser.py @@ -10,17 +10,26 @@ RAW = base64.b64encode(fh.read()).decode() OUTPUT1 = {'__type': 'Event', - 'classification.type': 'malware', + 'classification.type': 'phishing', 'event_description.text': 'phishing', + 'classification.identifier': 'phishing', 'raw': 'CQlleGFtcGxlLmNvbQlwaGlzaGluZwlvcGVucGhpc2guY29tCTIwMTYwNTI3CTIwMTYwMTA4', 'source.fqdn': 'example.com', 'time.source': '2016-05-27T00:00:00+00:00'} OUTPUT2 = {'__type': 'Event', - 'classification.type': 'malware', + 'classification.type': 'phishing', 'event_description.text': 'phishing', + 'classification.identifier': 'phishing', 'raw': 'CQlleGFtcGxlLmludmFsaWQJcGhpc2hpbmcJb3BlbnBoaXNoLmNvbQkyMDE2MDUyNwkyMDE2MDEwOA==', 'source.fqdn': 'example.invalid', 'time.source': '2016-05-27T00:00:00+00:00'} +OUTPUT3 = {'__type': 'Event', + 'classification.type': 'c2server', + 'event_description.text': 'C&C', + 'classification.identifier': 'C&C', + 'raw': 'CQlleGFtcGxlLm5ldAlDJkMJc291cmNlLmV4YW1wbGUuY29tCTIwMTcxMjAxCTIwMTYwNzE5CTIwMTYwMzEw', + 'source.fqdn': 'example.net', + 'time.source': '2017-12-01T00:00:00+00:00'} class TestMalwareDomainsParserBot(test.BotTestCase, unittest.TestCase): @@ -37,6 +46,7 @@ def test_event(self): self.run_bot() self.assertMessageEqual(0, OUTPUT1) self.assertMessageEqual(1, OUTPUT2) + self.assertMessageEqual(2, OUTPUT3) if __name__ == '__main__': # pragma: no cover unittest.main() diff --git a/intelmq/tests/bots/parsers/microsoft/ctip_azure.txt b/intelmq/tests/bots/parsers/microsoft/ctip_azure.txt new file mode 100644 index 000000000..c7f6f74dd --- /dev/null +++ b/intelmq/tests/bots/parsers/microsoft/ctip_azure.txt @@ -0,0 +1,2 @@ +{"DataFeed":"CTIP-Infected","SourcedFrom":"SinkHoleMessage","DateTimeReceivedUtc":132348339284870000,"DateTimeReceivedUtcTxt":"Sunday May 24 2020 22:45:28.4870","Malware":"Avalanche","ThreatCode":"B67-SS-TINBA","ThreatConfidence":"Low","TotalEncounters":3,"TLP":"Amber","SourceIp":"224.0.5.8","SourcePort":65116,"DestinationIp":"198.18.18.18","DestinationPort":80,"TargetIp":"203.0.113.45","TargetPort":80,"SourceIpInfo":{"SourceIpAsnNumber":"64496","SourceIpAsnOrgName":"Example AS 1","SourceIpCountryCode":"AT","SourceIpRegion":"","SourceIpCity":"","SourceIpPostalCode":"","SourceIpLatitude":48.2,"SourceIpLongitude":16.3667,"SourceIpMetroCode":0,"SourceIpAreaCode":0,"SourceIpConnectionType":""},"HttpInfo":{"HttpHost":"","HttpRequest":"","HttpMethod":"","HttpReferrer":"","HttpUserAgent":"","HttpVersion":""},"CustomInfo":{"CustomField1":"tinba","CustomField2":"","CustomField3":"","CustomField4":"","CustomField5":""},"Payload":"eyJ0cyI6MTU5MDM2MDMyOC40ODc0MiwiaXAiOiIxMjcuMC4wLjEiLCJwb3J0Ijo2NTExNiwic2VydmVySXAiOiIxOTguMTguMTg1LjE2MiIsInNlcnZlclBvcnQiOjgwLCJkb21haW4iOiJleGFtcGxlLmNvbSIsImZhbWlseSI6InRpbmJhIiwibWFsd2FyZSI6e30sInJlc3BvbnNlIjoiUmVzcG9uc2UiLCJoYW5kbGVyIjoidGluYmEiLCJ0eXBlIjoiSHR0cCJ9"} +{"DataFeed":"CTIP-Infected","SourcedFrom":"SinkHoleMessage","DateTimeReceivedUtc":132348340630510000,"DateTimeReceivedUtcTxt":"Sunday May 24 2020 22:47:43.0510","Malware":"Avalanche","ThreatCode":"B67-SS-MATSNU","ThreatConfidence":"High","TotalEncounters":5,"TLP":"Amber","SourceIp":"224.0.5.8","SourcePort":49296,"DestinationIp":"198.18.18.18","DestinationPort":80,"TargetIp":"203.0.113.45","TargetPort":80,"SourceIpInfo":{"SourceIpAsnNumber":"64497","SourceIpAsnOrgName":"Example AS 2","SourceIpCountryCode":"AT","SourceIpRegion":"Vienna","SourceIpCity":"Vienna","SourceIpPostalCode":"1060","SourceIpLatitude":48.1951,"SourceIpLongitude":16.3483,"SourceIpMetroCode":0,"SourceIpAreaCode":9,"SourceIpConnectionType":""},"HttpInfo":{"HttpHost":"","HttpRequest":"","HttpMethod":"","HttpReferrer":"","HttpUserAgent":"","HttpVersion":""},"CustomInfo":{"CustomField1":"matsnu5","CustomField2":"","CustomField3":"","CustomField4":"","CustomField5":""},"Payload":"dGhpcyBpcyBqdXN0IHNvbWUgdGV4dA=="} diff --git a/intelmq/tests/bots/parsers/microsoft/test_parser_ctip.py b/intelmq/tests/bots/parsers/microsoft/test_parser_ctip.py index b8ec46d14..fedbb456d 100644 --- a/intelmq/tests/bots/parsers/microsoft/test_parser_ctip.py +++ b/intelmq/tests/bots/parsers/microsoft/test_parser_ctip.py @@ -35,7 +35,7 @@ 'source.ip': '224.0.5.8', 'source.port': 1204, 'time.source': '2018-02-06T09:37:02+00:00', - "raw": base64_encode(json.dumps([EXAMPLE_PARSED[0]], sort_keys=True)), + "raw": base64_encode(json.dumps([EXAMPLE_PARSED[0]])), }, { "__type": "Event", 'classification.type': 'infected-system', @@ -51,7 +51,7 @@ 'source.ip': '10.0.0.5', 'source.port': 25310, 'time.source': '2018-02-06T09:38:46+00:00', - "raw": base64_encode(json.dumps([EXAMPLE_PARSED[1]], sort_keys=True)), + "raw": base64_encode(json.dumps([EXAMPLE_PARSED[1]])), }, { "__type": "Event", 'classification.type': 'infected-system', @@ -67,7 +67,7 @@ 'source.ip': '19.168.46.126', 'source.port': 49970, 'time.source': '2018-02-06T09:40:19+00:00', - "raw": base64_encode(json.dumps([EXAMPLE_PARSED[2]], sort_keys=True)), + "raw": base64_encode(json.dumps([EXAMPLE_PARSED[2]])), }, { "__type": "Event", 'classification.type': 'infected-system', @@ -83,7 +83,7 @@ 'source.ip': '198.51.100.100', 'source.port': 42996, 'time.source': '2018-02-06T09:43:19+00:00', - "raw": base64_encode(json.dumps([EXAMPLE_PARSED[3]], sort_keys=True)), + "raw": base64_encode(json.dumps([EXAMPLE_PARSED[3]])), }, { # ignore hostname if invalid "__type": "Event", 'classification.type': 'infected-system', @@ -101,7 +101,7 @@ 'source.ip': '224.34.234.52', 'source.port': 55522, 'time.source': '2019-03-17T09:05:50+00:00', - "raw": base64_encode(json.dumps([EXAMPLE_PARSED[4]], sort_keys=True)), + "raw": base64_encode(json.dumps([EXAMPLE_PARSED[4]])), }, ] diff --git a/intelmq/tests/bots/parsers/microsoft/test_parser_ctip_azure.py b/intelmq/tests/bots/parsers/microsoft/test_parser_ctip_azure.py new file mode 100644 index 000000000..6c7d6542b --- /dev/null +++ b/intelmq/tests/bots/parsers/microsoft/test_parser_ctip_azure.py @@ -0,0 +1,109 @@ +# -*- coding: utf-8 -*- +import json +import os +import unittest + +from intelmq.bots.parsers.microsoft.parser_ctip import MicrosoftCTIPParserBot +from intelmq.lib import test +from intelmq.lib.utils import base64_encode + + +with open(os.path.join(os.path.dirname(__file__), 'ctip_azure.txt')) as handle: + EXAMPLE_DATA = handle.read() +EXAMPLE_LINES = EXAMPLE_DATA.splitlines() +EXAMPLE_PARSED = [json.loads(EXAMPLE_LINES[0]), json.loads(EXAMPLE_LINES[1])] + + +EXAMPLE_REPORT = { + "__type": "Report", + "feed.accuracy": 100.0, + "time.observation": "2016-06-15T09:25:26+00:00", + "raw": base64_encode(EXAMPLE_DATA) +} + +EXAMPLE_TEMPLATE = { + "feed.name": "CTIP-Infected", + "event_description.text": "SinkHoleMessage", + "tlp": "AMBER", + } + + +EXAMPLE_EVENTS = [{ + "__type": "Event", + 'feed.accuracy': 20.0, + 'classification.type': 'infected-system', + 'destination.ip': '198.18.18.18', + 'destination.port': 80, + 'malware.name': 'b67-ss-tinba', + 'source.asn': 64496, + 'source.ip': '224.0.5.8', +"extra.total_encounters": 3, + "source.port": 65116, + "time.source": "2020-05-24T22:45:28.487000+00:00", + "source.as_name": "Example AS 1", + "source.geolocation.cc": "AT", + "source.geolocation.latitude": 48.2, + "source.geolocation.longitude": 16.3667, + "extra.custom_field1": "tinba", + "raw": base64_encode(EXAMPLE_LINES[0]), + "extra.payload.timestamp": '2020-05-24T22:45:28.487420+00:00', + "extra.payload.ip": "127.0.0.1", + "extra.payload.port" :65116, + "extra.payload.server.ip": "198.18.185.162", + "extra.payload.server.port": 80, + "extra.payload.domain": "example.com", + "extra.payload.family":"tinba", + "extra.payload.response":"Response", + "extra.payload.handler":"tinba", + "protocol.application":"http", + 'extra.malware': 'Avalanche', + }, { + "__type": "Event", + 'feed.accuracy': 100.0, + 'classification.type': 'infected-system', + 'destination.ip': '198.18.18.18', + 'destination.port': 80, + 'malware.name': 'b67-ss-matsnu', +"extra.total_encounters": 5, + 'source.asn': 64497, + 'source.ip': '224.0.5.8', + "source.port": 49296, + "time.source": "2020-05-24T22:47:43.050999+00:00", + "source.as_name": "Example AS 2", + "source.geolocation.cc": "AT", + "source.geolocation.latitude": 48.1951, + "source.geolocation.longitude": 16.3483, +"extra.source.geolocation.area_code": 9, +"extra.source.geolocation.postal_code": '1060', +"source.geolocation.region": "Vienna", +"source.geolocation.city": "Vienna", + "extra.custom_field1": "matsnu5", + "raw": base64_encode(EXAMPLE_LINES[1]), + "extra.payload": 'this is just some text', + 'extra.malware': 'Avalanche', + }, + ] + +for index, data in enumerate(EXAMPLE_EVENTS): + EXAMPLE_EVENTS[index].update(EXAMPLE_TEMPLATE) + + +class TestMicrosoftCTIPParserBot(test.BotTestCase, unittest.TestCase): + """ + A TestCase for the MicrosoftCTIPParserBot. + """ + + @classmethod + def set_bot(cls): + cls.bot_reference = MicrosoftCTIPParserBot + cls.default_input_message = EXAMPLE_REPORT + + def test_event(self): + """ Test with azure format. """ + self.run_bot() + for i in range(2): + self.assertMessageEqual(i, EXAMPLE_EVENTS[i]) + + +if __name__ == '__main__': # pragma: no cover + unittest.main() diff --git a/intelmq/tests/bots/parsers/n6/test_parser.py b/intelmq/tests/bots/parsers/n6/test_parser.py index 4f9455f69..79918644c 100644 --- a/intelmq/tests/bots/parsers/n6/test_parser.py +++ b/intelmq/tests/bots/parsers/n6/test_parser.py @@ -59,12 +59,33 @@ "extra.confidence": "medium", "extra.feed_id": "14758f1afd44c09b7992073ccf00b43d", "time.source": "2018-09-26T08:05:19+00:00", + "malware.name": "some name", 'classification.identifier': 'c&c server', 'classification.taxonomy': 'malicious code', 'classification.type': 'c2server', 'extra.feed_source': 'hidden.64534', 'source.fqdn': 'secao.org', "raw": NO_ADDRESS_REPORT['raw']} +FURTHER_IOCS_REPORT = {"__type": "Report", + "time.observation": "2015-11-17T12:17:27.043452Z", + "raw": utils.base64_encode(""" +{"category": "cnc", "confidence": "medium", "name": "further iocs: text with invalid ’ char", "url": "http://example.net", "fqdn": "example.net", "source": "hidden", "time": "2020-05-04T10:54:15Z", "type": "event", "id": "2f3db54a45039180d452b73d780e5bed"} +""")} +FURTHER_IOCS_EVENT = {"__type": "Event", + "time.observation": "2015-11-17T12:17:27.043452Z", + "extra.confidence": "medium", + "extra.feed_id": "2f3db54a45039180d452b73d780e5bed", + "time.source": "2020-05-04T10:54:15+00:00", + "malware.name": "further iocs: text with invalid char", + "event_description.text": "further iocs: text with invalid ’ char", + 'classification.identifier': 'c&c server', + 'classification.taxonomy': 'malicious code', + 'classification.type': 'c2server', + 'extra.feed_source': 'hidden', + 'source.fqdn': 'example.net', + 'source.url': 'http://example.net', + "raw": FURTHER_IOCS_REPORT['raw']} + class TestN6StompParserBot(test.BotTestCase, unittest.TestCase): @@ -94,6 +115,12 @@ def test_no_address(self): self.run_bot() self.assertMessageEqual(0, NO_ADDRESS_EVENT) + def test_futher_ios(self): + """ Test an event with "further iocs""" + self.input_message = FURTHER_IOCS_REPORT + self.run_bot() + self.assertMessageEqual(0, FURTHER_IOCS_EVENT) + if __name__ == '__main__': # pragma: no cover unittest.main() diff --git a/intelmq/tests/bots/parsers/nothink/blacklist_snmp_day.txt b/intelmq/tests/bots/parsers/nothink/blacklist_snmp_day.txt deleted file mode 100644 index d65d00728..000000000 --- a/intelmq/tests/bots/parsers/nothink/blacklist_snmp_day.txt +++ /dev/null @@ -1,4 +0,0 @@ -# Provided by nothink.org -# SNMP blacklist, last 24 hours (IP address) -# Generated 2016-11-14 23:02:04 -185.128.40.162 \ No newline at end of file diff --git a/intelmq/tests/bots/parsers/nothink/blacklist_ssh_day.txt b/intelmq/tests/bots/parsers/nothink/blacklist_ssh_day.txt deleted file mode 100644 index 3997dbfd2..000000000 --- a/intelmq/tests/bots/parsers/nothink/blacklist_ssh_day.txt +++ /dev/null @@ -1,4 +0,0 @@ -# Provided by nothink.org -# SSH blacklist, last 24 hours (IP address) -# Generated 2016-11-14 23:02:04 UTC -185.128.40.162 diff --git a/intelmq/tests/bots/parsers/nothink/blacklist_telnet_day.txt b/intelmq/tests/bots/parsers/nothink/blacklist_telnet_day.txt deleted file mode 100644 index afc2745f0..000000000 --- a/intelmq/tests/bots/parsers/nothink/blacklist_telnet_day.txt +++ /dev/null @@ -1,4 +0,0 @@ -# Provided by nothink.org -# Telnet blacklist, last 24 hours (IP address) -# Generated 2016-11-14 23:02:04 UTC -185.128.40.162 diff --git a/intelmq/tests/bots/parsers/nothink/honeypot_dns_attacks.txt b/intelmq/tests/bots/parsers/nothink/honeypot_dns_attacks.txt deleted file mode 100644 index df7b2a4d3..000000000 --- a/intelmq/tests/bots/parsers/nothink/honeypot_dns_attacks.txt +++ /dev/null @@ -1,5 +0,0 @@ -# Provided by nothink.org -# Honeypot DNS and amplification attacks -# Generated 2016-11-21 23:08:02#Dateins,IP ATTACKED,ASN,ASN org,DNS,Geo -"2016-11-11 15:13:20","186.2.167.14","262254","DANCOM LTD,,, BZ","ddos-guard.net","BZ" -"2016-01-24 16:21:18","131.221.47.210","264409","Yax Tecnologia e Informaç...","n/a","UNK" diff --git a/intelmq/tests/bots/parsers/nothink/test_parser.py b/intelmq/tests/bots/parsers/nothink/test_parser.py deleted file mode 100644 index 1c883ee01..000000000 --- a/intelmq/tests/bots/parsers/nothink/test_parser.py +++ /dev/null @@ -1,145 +0,0 @@ -# -*- coding: utf-8 -*- - -import os -import unittest - -import intelmq.lib.test as test -import intelmq.lib.utils as utils - -from intelmq.bots.parsers.nothink.parser import NothinkParserBot - -with open(os.path.join(os.path.dirname(__file__), 'blacklist_snmp_day.txt')) as handle: - SNMP_FILE = handle.read() - -with open(os.path.join(os.path.dirname(__file__), 'blacklist_ssh_day.txt')) as handle: - SSH_FILE = handle.read() - -with open(os.path.join(os.path.dirname(__file__), 'blacklist_telnet_day.txt')) as handle: - TELNET_FILE = handle.read() - -with open(os.path.join(os.path.dirname(__file__), 'honeypot_dns_attacks.txt'), encoding='utf-8') as handle: - DNS_ATTACK_FILE = handle.read() - -SNMP_REPORT = {'feed.name': 'SNMP Blacklist', - 'feed.url': 'http://www.nothink.org/blacklist/blacklist_snmp_day.txt', - '__type': 'Report', - 'time.observation': '2016-12-05T09:23:46+00:00', - 'raw': utils.base64_encode(SNMP_FILE) - } -SNMP_EVENT = {'feed.name': 'SNMP Blacklist', - 'feed.url': 'http://www.nothink.org/blacklist/blacklist_snmp_day.txt', - '__type': 'Event', - 'time.observation': '2016-12-05T09:23:46+00:00', - 'raw': 'MTg1LjEyOC40MC4xNjI=', - 'time.source': '2016-11-14T23:02:04+00:00', - 'source.ip': '185.128.40.162', - 'classification.type': 'scanner', - 'protocol.application': 'snmp', - } - -SSH_REPORT = {'feed.name': 'SSH Blacklist', - 'feed.url': 'http://www.nothink.org/blacklist/blacklist_ssh_day.txt', - '__type': 'Report', - 'time.observation': '2016-12-05T09:23:46+00:00', - 'raw': utils.base64_encode(SSH_FILE) - } -SSH_EVENT = {'feed.name': 'SSH Blacklist', - 'feed.url': 'http://www.nothink.org/blacklist/blacklist_ssh_day.txt', - '__type': 'Event', - 'time.observation': '2016-12-05T09:23:46+00:00', - 'raw': 'MTg1LjEyOC40MC4xNjI=', - 'time.source': '2016-11-14T23:02:04+00:00', - 'source.ip': '185.128.40.162', - 'classification.type': 'scanner', - 'protocol.application': 'ssh', - } - -TELNET_REPORT = {'feed.name': 'Telnet Blacklist', - 'feed.url': 'http://www.nothink.org/blacklist/blacklist_telnet_day.txt', - '__type': 'Report', - 'time.observation': '2016-12-05T09:23:46+00:00', - 'raw': utils.base64_encode(TELNET_FILE) - } -TELNET_EVENT = {'feed.name': 'Telnet Blacklist', - 'feed.url': 'http://www.nothink.org/blacklist/blacklist_telnet_day.txt', - '__type': 'Event', - 'time.observation': '2016-12-05T09:23:46+00:00', - 'raw': 'MTg1LjEyOC40MC4xNjI=', - 'time.source': '2016-11-14T23:02:04+00:00', - 'source.ip': '185.128.40.162', - 'classification.type': 'scanner', - 'protocol.application': 'telnet', - } - -DNS_REPORT = {'feed.name': 'DNS Attack', - 'feed.url': 'http://www.nothink.org/honeypot_dns_attacks.txt', - '__type': 'Report', - 'time.observation': '2016-12-05T09:23:46+00:00', - 'raw': utils.base64_encode(DNS_ATTACK_FILE) - } -DNS_EVENT = [{'feed.name': 'DNS Attack', - 'feed.url': 'http://www.nothink.org/honeypot_dns_attacks.txt', - '__type': 'Event', - 'time.observation': '2016-12-05T09:23:46+00:00', - 'raw': 'IjIwMTYtMTEtMTEgMTU6MTM6MjAiLCIxODYuMi4xNjcuMTQiLCIyNjIyNTQiLCJEQU5DT00gTFRELCwsIEJaIiwiZGRvcy1ndWFyZC5uZXQiLCJCWiI=', - 'time.source': '2016-11-11T15:13:20+00:00', - 'source.ip': '186.2.167.14', - 'source.asn': 262254, - 'source.as_name': 'DANCOM LTD,,, BZ', - 'source.reverse_dns': 'ddos-guard.net', - 'source.geolocation.cc': 'BZ', - 'protocol.application': 'dns', - 'classification.type': 'ddos', - 'event_description.text': 'On time.source the source.ip was seen performing ' - 'DNS amplification attacks against honeypots', - }, - {'feed.name': 'DNS Attack', - 'feed.url': 'http://www.nothink.org/honeypot_dns_attacks.txt', - '__type': 'Event', - 'time.observation': '2016-12-05T09:23:46+00:00', - 'raw': 'IjIwMTYtMDEtMjQgMTY6MjE6MTgiLCIxMzEuMjIxLjQ3LjIxMCIsIjI2NDQwOSIsIllheCBUZWNub2xvZ2lhIGUgSW5mb3JtYcODwqcuLi4iLCJuL2EiLCJVTksi', - 'time.source': '2016-01-24T16:21:18+00:00', - 'source.ip': '131.221.47.210', - 'source.asn': 264409, - 'source.as_name': 'Yax Tecnologia e Informaç...', - 'protocol.application': 'dns', - 'classification.type': 'ddos', - 'event_description.text': 'On time.source the source.ip was seen performing ' - 'DNS amplification attacks against honeypots', - }] - - -class TestNothinkParserBot(test.BotTestCase, unittest.TestCase): - """ A TestCase of Nothink Feeds. """ - - @classmethod - def set_bot(cls): - cls.bot_reference = NothinkParserBot - cls.default_input_message = SNMP_REPORT - - def test_snmp(self): - """ Test if correct SNMP event has been produced. """ - self.run_bot() - self.assertMessageEqual(0, SNMP_EVENT) - - def test_ssh(self): - """ Test if correct SSH event has been produced. """ - self.input_message = SSH_REPORT - self.run_bot() - self.assertMessageEqual(0, SSH_EVENT) - - def test_telnet(self): - """ Test if correct TELNET event has been produced. """ - self.input_message = TELNET_REPORT - self.run_bot() - self.assertMessageEqual(0, TELNET_EVENT) - - def test_dns(self): - """ Test if correct DNS event has been produced. """ - self.input_message = DNS_REPORT - self.run_bot() - self.assertMessageEqual(0, DNS_EVENT[0]) - self.assertMessageEqual(1, DNS_EVENT[1]) - -if __name__ == '__main__': # pragma: no cover - unittest.main() diff --git a/intelmq/tests/bots/parsers/openphish/test_parser_commercial.py b/intelmq/tests/bots/parsers/openphish/test_parser_commercial.py index 48ce70ae9..874e48bd0 100644 --- a/intelmq/tests/bots/parsers/openphish/test_parser_commercial.py +++ b/intelmq/tests/bots/parsers/openphish/test_parser_commercial.py @@ -1,5 +1,4 @@ # -*- coding: utf-8 -*- -import base64 import os import unittest diff --git a/intelmq/tests/bots/parsers/shadowserver/test_broken.py b/intelmq/tests/bots/parsers/shadowserver/test_broken.py index 93aa7e30f..ae58a1e18 100644 --- a/intelmq/tests/bots/parsers/shadowserver/test_broken.py +++ b/intelmq/tests/bots/parsers/shadowserver/test_broken.py @@ -26,7 +26,7 @@ REPORT4 = {"raw": utils.base64_encode('adasdasdasdasd\nadasdasdafgf'), "__type": "Report", "time.observation": "2015-01-01T00:00:00+00:00", - "extra.file_name": "wrong-filename.csv", + "extra.file_name": "2020.wrong-filename.csv", } @@ -45,8 +45,7 @@ def test_broken(self): Test a report which does not have valid fields """ self.input_message = REPORT1 - self.allowed_error_count = 1 - self.run_bot() + self.run_bot(allowed_error_count=1) self.assertLogMatches(pattern="Detected report's file name: 'scan_http'.", levelname="DEBUG") self.assertLogMatches(pattern="Failed to parse line.") @@ -59,8 +58,7 @@ def test_half_broken(self): Test a report which does not have an optional field. """ self.input_message = REPORT2 - self.allowed_warning_count = 1 - self.run_bot() + self.run_bot(allowed_warning_count=54) self.assertLogMatches(pattern="Detected report's file name: 'scan_ftp'.", levelname="DEBUG") self.assertLogMatches(pattern="Optional key 'protocol' not found in feed 'Accessible-FTP'. Possible change in data format or misconfiguration.", @@ -73,8 +71,7 @@ def test_no_config(self): Test a report which does not have a valid extra.file_name """ self.input_message = REPORT3 - self.allowed_error_count = 1 - self.run_bot() + self.run_bot(allowed_error_count=1) self.assertLogMatches(pattern="ValueError: Could not get a config for 'some_string', check the documentation." ) def test_invalid_filename(self): @@ -82,11 +79,19 @@ def test_invalid_filename(self): Test a report which does not have a valid extra.file_name """ self.input_message = REPORT4 - self.allowed_error_count = 1 - self.run_bot() - self.assertLogMatches(pattern="ValueError: Report's 'extra.file_name' 'wrong-filename.csv' is not valid." ) - + self.run_bot(allowed_error_count=1) + self.assertLogMatches(pattern="ValueError: Report's 'extra.file_name' '2020.wrong-filename.csv' is not valid." ) + def test_no_report_name(self): + """ + Test a report without file_name and no given feedname as parameter. + Error message should be verbose. + """ + self.run_bot(allowed_error_count=1) + self.assertLogMatches(pattern="ValueError: No feedname given as parameter and the " + "processed report has no 'extra.file_name'. " + "Ensure that at least one is given. " + "Also have a look at the documentation of the bot.") if __name__ == '__main__': # pragma: no cover diff --git a/intelmq/tests/bots/parsers/shadowserver/test_mapping.py b/intelmq/tests/bots/parsers/shadowserver/test_mapping.py new file mode 100644 index 000000000..ae1013d8e --- /dev/null +++ b/intelmq/tests/bots/parsers/shadowserver/test_mapping.py @@ -0,0 +1,53 @@ +import unittest +import os + +import intelmq.lib.test as test +import intelmq.lib.utils as utils +from intelmq.bots.parsers.shadowserver.parser import ShadowserverParserBot + + +with open(os.path.join(os.path.dirname(__file__), + 'testdata/scan_telnet.csv')) as handle: + TELNET_FILE = handle.read() +EXAMPLE_TELNET = { + "raw": utils.base64_encode(TELNET_FILE), + "__type": "Report", + "time.observation": "2015-01-01T00:00:00+00:00", + "extra.file_name": "2019-01-01-scan_telnet.csv", +} +with open(os.path.join(os.path.dirname(__file__), + 'testdata/scan_vnc.csv')) as handle: + TELNET_FILE = handle.read() +EXAMPLE_VNC = { + "raw": utils.base64_encode(TELNET_FILE), + "__type": "Report", + "time.observation": "2015-01-01T00:00:00+00:00", + "extra.file_name": "2019-01-01-scan_vnc.csv", +} + + +class TestShadowserverMapping(test.BotTestCase, unittest.TestCase): + + def test_filename(self): + self.assertEqual('scan_chargen', + ShadowserverParserBot._ShadowserverParserBot__is_filename_regex.search('2020-01-01-scan_chargen.csv').group(1)) + self.assertEqual('scan_chargen', + ShadowserverParserBot._ShadowserverParserBot__is_filename_regex.search('scan_chargen.csv').group(1)) + + @classmethod + def set_bot(cls): + cls.bot_reference = ShadowserverParserBot + cls.sysconfig = {'feedname': ''} + + def test_changed_feed(self): + """ + Tests if the parser correctly re-detects the feed for the second received report + #1493 + """ + self.input_message = (EXAMPLE_TELNET, EXAMPLE_VNC) + self.run_bot(iterations=2) + + + +if __name__ == '__main__': # pragma: no cover + unittest.main() diff --git a/intelmq/tests/bots/parsers/shadowserver/test_parameters.py b/intelmq/tests/bots/parsers/shadowserver/test_parameters.py index 025db894b..6c9744d48 100644 --- a/intelmq/tests/bots/parsers/shadowserver/test_parameters.py +++ b/intelmq/tests/bots/parsers/shadowserver/test_parameters.py @@ -12,8 +12,7 @@ EXAMPLE_FILE = handle.read() EXAMPLE_LINES = EXAMPLE_FILE.splitlines() -EXAMPLE_REPORT = {'feed.name': 'DNS Open Resolvers', - "raw": utils.base64_encode(EXAMPLE_FILE), +EXAMPLE_REPORT = {"raw": utils.base64_encode(EXAMPLE_FILE), "__type": "Report", "time.observation": "2018-07-30T00:00:00+00:00", "extra.file_name": "2019-01-01-scan_dns-test-test.csv", diff --git a/intelmq/tests/bots/parsers/shadowserver/test_scan_chargen.py b/intelmq/tests/bots/parsers/shadowserver/test_scan_chargen.py index 0692e4876..e5082457a 100644 --- a/intelmq/tests/bots/parsers/shadowserver/test_scan_chargen.py +++ b/intelmq/tests/bots/parsers/shadowserver/test_scan_chargen.py @@ -164,8 +164,7 @@ def test_event(self): def test_event_short(self): """ Test with short header. """ self.input_message = EXAMPLE_REPORT_SHORT - self.allowed_warning_count = 2 - self.run_bot() + self.run_bot(allowed_warning_count=3) self.assertMessageEqual(0, EVENT_SHORT) diff --git a/intelmq/tests/bots/parsers/shadowserver/test_scan_cwmp.py b/intelmq/tests/bots/parsers/shadowserver/test_scan_cwmp.py index 66cec25cd..7ee0bd3d6 100644 --- a/intelmq/tests/bots/parsers/shadowserver/test_scan_cwmp.py +++ b/intelmq/tests/bots/parsers/shadowserver/test_scan_cwmp.py @@ -39,7 +39,6 @@ "protocol.transport": "tcp", 'raw': utils.base64_encode('\n'.join([EXAMPLE_LINES[0], EXAMPLE_LINES[1]])), - "source.asn": 6057, "source.asn": 5678, "source.geolocation.cc": "AA", "source.geolocation.city": "LOCATION", diff --git a/intelmq/tests/bots/parsers/shadowserver/test_scan_ipp.py b/intelmq/tests/bots/parsers/shadowserver/test_scan_ipp.py new file mode 100644 index 000000000..9fc033fa7 --- /dev/null +++ b/intelmq/tests/bots/parsers/shadowserver/test_scan_ipp.py @@ -0,0 +1,75 @@ +# -*- coding: utf-8 -*- + +import os +import unittest + +import intelmq.lib.test as test +import intelmq.lib.utils as utils +from intelmq.bots.parsers.shadowserver.parser import ShadowserverParserBot + +with open(os.path.join(os.path.dirname(__file__), 'testdata/scan_ipp.csv')) as handle: + EXAMPLE_FILE = handle.read() +EXAMPLE_LINES = EXAMPLE_FILE.splitlines() + +EXAMPLE_REPORT = {'feed.name': 'Open-IPP', + "raw": utils.base64_encode(EXAMPLE_FILE), + "__type": "Report", + "time.observation": "2020-06-09T00:00:00+00:00", + "extra.file_name": "2020-06-08-scan_ipp-test-geo.csv", + } +EVENTS = [{'__type': 'Event', + 'feed.name': 'Open-IPP', + "classification.identifier": "open-ipp", + "classification.taxonomy": "vulnerable", + "classification.type": "vulnerable service", + "extra.naics": 517311, + "extra.tag": "ipp", + "extra.ipp_version": "IPP/2.1", + "extra.cups_version": "CUPS/2.0", + "extra.printer_uris": "ipp://123.45.67.89:631/ipp/print", + "extra.printer_name": "NPI3F0D22", + "extra.printer_info": "HP Color LaserJet MFP M277dw", + "extra.printer_more_info": "http://123.45.67.89:631/hp/device/info_config_AirPrint.html?tab=Networking&menu=AirPrintStatus", + "extra.printer_make_and_model": "HP Color LaserJet MFP M277dw", + "extra.printer_firmware_name": "20191203", + "extra.printer_firmware_string_version": "20191203", + "extra.printer_firmware_version": "20191203", + "extra.printer_organization": "org", + "extra.printer_organization_unit": "unit", + "extra.printer_uuid": "urn:uuid:456e4238-4a44-4643-4c42-10e1813f0a18", + "extra.printer_wifi_ssid": "wifissid", + "protocol.application": "ipp", + "protocol.transport": "tcp", + 'raw': utils.base64_encode('\n'.join([EXAMPLE_LINES[0], + EXAMPLE_LINES[1]])), + "source.asn": 12345, + "source.geolocation.cc": "AA", + "source.geolocation.city": "CITY", + "source.geolocation.region": "REGION", + "source.ip": "123.45.67.89", + "source.port": 631, + 'source.reverse_dns': 'some.host.com', + "time.observation": "2020-06-09T00:00:00+00:00", + "time.source": "2020-06-08T11:30:14+00:00" + }, + ] + +class TestShadowserverParserBot(test.BotTestCase, unittest.TestCase): + """ + A TestCase for a ShadowserverParserBot. + """ + + @classmethod + def set_bot(cls): + cls.bot_reference = ShadowserverParserBot + cls.default_input_message = EXAMPLE_REPORT + + def test_event(self): + """ Test if correct Event has been produced. """ + self.run_bot() + for i, EVENT in enumerate(EVENTS): + self.assertMessageEqual(i, EVENT) + + +if __name__ == '__main__': # pragma: no cover + unittest.main() diff --git a/intelmq/tests/bots/parsers/shadowserver/test_scan_mqtt.py b/intelmq/tests/bots/parsers/shadowserver/test_scan_mqtt.py new file mode 100644 index 000000000..0658e4592 --- /dev/null +++ b/intelmq/tests/bots/parsers/shadowserver/test_scan_mqtt.py @@ -0,0 +1,90 @@ +# -*- coding: utf-8 -*- + +import os +import unittest + +import intelmq.lib.test as test +import intelmq.lib.utils as utils +from intelmq.bots.parsers.shadowserver.parser import ShadowserverParserBot + +with open(os.path.join(os.path.dirname(__file__), 'testdata/scan_mqtt.csv')) as handle: + EXAMPLE_FILE = handle.read() +EXAMPLE_LINES = EXAMPLE_FILE.splitlines() + +EXAMPLE_REPORT = {'feed.name': 'Open-MQTT', + "raw": utils.base64_encode(EXAMPLE_FILE), + "__type": "Report", + "time.observation": "2020-03-15T00:00:00+00:00", + "extra.file_name": "2020-03-14-scan_mqtt-test-geo.csv", + } +EVENTS = [{'__type': 'Event', + 'feed.name': 'Open-MQTT', + "classification.identifier": "open-mqtt", + "classification.taxonomy": "vulnerable", + "classification.type": "vulnerable service", + "extra.anonymous_access": True, + "extra.code": "Connection Accepted", + "extra.hex_code": "00", + "extra.naics": 518210, + "extra.raw_response": "20020000", + "extra.tag": "mqtt", + "protocol.application": "mqtt", + "protocol.transport": "tcp", + 'raw': utils.base64_encode('\n'.join([EXAMPLE_LINES[0], + EXAMPLE_LINES[1]])), + "source.asn": 12345, + "source.geolocation.cc": "AA", + "source.geolocation.city": "CITY", + "source.geolocation.region": "REGION", + "source.ip": "123.45.67.89", + "source.port": 1883, + 'source.reverse_dns': 'some.host.com', + "time.observation": "2020-03-15T00:00:00+00:00", + "time.source": "2020-03-14T05:45:48+00:00" + }, +{'__type': 'Event', + 'feed.name': 'Open-MQTT', + "classification.identifier": "open-mqtt", + "classification.taxonomy": "vulnerable", + "classification.type": "vulnerable service", + "extra.anonymous_access": False, + "extra.code": "Connection Refused, Server unavailable", + "extra.hex_code": "03", + "extra.naics": 454110, + "extra.raw_response": "20020003", + "extra.tag": "mqtt", + "protocol.application": "mqtt", + "protocol.transport": "tcp", + 'raw': utils.base64_encode('\n'.join([EXAMPLE_LINES[0], + EXAMPLE_LINES[2]])), + "source.asn": 12345, + "source.geolocation.cc": "AA", + "source.geolocation.city": "CITY", + "source.geolocation.region": "REGION", + "source.ip": "123.45.67.90", + "source.port": 1883, + 'source.reverse_dns': 'another.host.com', + "time.observation": "2020-03-15T00:00:00+00:00", + "time.source": "2020-03-14T05:45:51+00:00" + }, + ] + +class TestShadowserverParserBot(test.BotTestCase, unittest.TestCase): + """ + A TestCase for a ShadowserverParserBot. + """ + + @classmethod + def set_bot(cls): + cls.bot_reference = ShadowserverParserBot + cls.default_input_message = EXAMPLE_REPORT + + def test_event(self): + """ Test if correct Event has been produced. """ + self.run_bot() + for i, EVENT in enumerate(EVENTS): + self.assertMessageEqual(i, EVENT) + + +if __name__ == '__main__': # pragma: no cover + unittest.main() diff --git a/intelmq/tests/bots/parsers/shadowserver/test_scan_rdp.py b/intelmq/tests/bots/parsers/shadowserver/test_scan_rdp.py index 42298be18..c58040503 100644 --- a/intelmq/tests/bots/parsers/shadowserver/test_scan_rdp.py +++ b/intelmq/tests/bots/parsers/shadowserver/test_scan_rdp.py @@ -22,12 +22,12 @@ "classification.identifier": "open-rdp", "classification.taxonomy": "vulnerable", "classification.type": "vulnerable service", - "extra.bluekeep_vulnerable": "N", + "extra.bluekeep_vulnerable": False, "extra.cert_expiration_date": "2019-10-29 02:22:06", "extra.cert_issue_date": "2019-04-29 02:22:06", "extra.cert_length": 5678, "extra.cert_serial_number": "1EF2B37AF850C9BF4E88F18177001D6B", - "extra.cve20190708_vulnerable": "N", + "extra.cve20190708_vulnerable": False, "extra.issuer_common_name": "KABESRV.KABE.local", "extra.key_algorithm": "rsaEncryption", "extra.md5_fingerprint": "BC:6E:C3:E2:98:22:EC:BA:5B:30:E2:53:FD:4A:9D:FF", @@ -58,12 +58,12 @@ "classification.identifier": "open-rdp", "classification.taxonomy": "vulnerable", "classification.type": "vulnerable service", - "extra.bluekeep_vulnerable": "N", + "extra.bluekeep_vulnerable": False, "extra.cert_expiration_date": "2019-10-16 06:15:20", "extra.cert_issue_date": "2019-04-16 06:15:20", "extra.cert_length": 5678, "extra.cert_serial_number": "3FF3EBC5CF154BA54D128A8548C8AAF5", - "extra.cve20190708_vulnerable": "N", + "extra.cve20190708_vulnerable": False, "extra.issuer_common_name": "RAMBLA01.rambla.local", "extra.key_algorithm": "rsaEncryption", "extra.md5_fingerprint": "38:73:6A:B3:AA:41:69:C9:BA:E7:3D:D7:40:16:F8:AA", diff --git a/intelmq/tests/bots/parsers/shadowserver/testdata/scan_ipp.csv b/intelmq/tests/bots/parsers/shadowserver/testdata/scan_ipp.csv new file mode 100644 index 000000000..88173d6ba --- /dev/null +++ b/intelmq/tests/bots/parsers/shadowserver/testdata/scan_ipp.csv @@ -0,0 +1,2 @@ +"timestamp","ip","protocol","port","hostname","tag","asn","geo","region","city","naics","sic","ipp_version","cups_version","printer_uris","printer_name","printer_info","printer_more_info","printer_make_and_model","printer_firmware_name","printer_firmware_string_version","printer_firmware_version","printer_organization","printer_organization_unit","printer_uuid","printer_wifi_ssid" +"2020-06-08 11:30:14","123.45.67.89","tcp",631,"some.host.com","ipp",12345,"AA","REGION","CITY",517311,0,"IPP/2.1","CUPS/2.0","ipp://123.45.67.89:631/ipp/print","NPI3F0D22","HP Color LaserJet MFP M277dw","http://123.45.67.89:631/hp/device/info_config_AirPrint.html?tab=Networking&menu=AirPrintStatus","HP Color LaserJet MFP M277dw",20191203,20191203,20191203,"org","unit","urn:uuid:456e4238-4a44-4643-4c42-10e1813f0a18","wifissid" diff --git a/intelmq/tests/bots/parsers/shadowserver/testdata/scan_mqtt.csv b/intelmq/tests/bots/parsers/shadowserver/testdata/scan_mqtt.csv new file mode 100644 index 000000000..7e5c763a3 --- /dev/null +++ b/intelmq/tests/bots/parsers/shadowserver/testdata/scan_mqtt.csv @@ -0,0 +1,3 @@ +"timestamp","ip","protocol","port","hostname","tag","asn","geo","region","city","naics","sic","anonymous_access","raw_response","hex_code","code" +"2020-03-14 05:45:48","123.45.67.89","tcp",1883,"some.host.com","mqtt",12345,"AA","REGION","CITY",518210,0,"Y",20020000,00,"Connection Accepted" +"2020-03-14 05:45:51","123.45.67.90","tcp",1883,"another.host.com","mqtt",12345,"AA","REGION","CITY",454110,0,"N",20020003,03,"Connection Refused, Server unavailable" diff --git a/intelmq/tests/bots/parsers/taichung/recent.html b/intelmq/tests/bots/parsers/taichung/recent.html new file mode 100644 index 000000000..83e53007a --- /dev/null +++ b/intelmq/tests/bots/parsers/taichung/recent.html @@ -0,0 +1,265 @@ + + + + + + + + + 臺中市政府教育局全球資訊網 - 網路管理系統 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ +
臺中市政府教育局
+ +
+
+ + +
+ +
+ + + + +
+
+ + +
+
+ +
+
公務帳號
+
+ 《已登錄》
+
+ 查看 +
+
人氣指數
+
+ » 線上數: 22
+ » 今日數: 174
+ » 本週數: 35545
+ » 本月數: 309184
+ » 今年數: 820531
+ » 總次數: 35489367
+
+
+
+ QRCode +
+
災害警示
+
+ +
+
+
+
+
+

臺中市教育網路中心 流量異常/資安回報

+
+ +
外部流量異常限制IP列表
+
+ [回上頁] 最近 [1日] + [3日] [7日] + [30日] +
+ + + + + + + + + + + + + + +
限制IP限制型態流量起限時間距日管理
2192.88.99.4Office 365 Attack手動設定2019-10-29 13:18:47147.49封鎖
1237192.0.0.5惡意程式儲存FTP站手動設定2013-04-24 00:01:592527.04封鎖
477410.0.0.1SCAN-PORT-RDP9052020-03-24 01:15:500.99封鎖
4800127.0.0.1SCAN-PORT-137-138-1397632020-03-24 01:14:240.99封鎖
+ +
+ + +
+ * 市網中心對於外部IP(非中市網連線學校登記之IP)與市內連線單位有異常流量者逕行封鎖24小時,24小時後自動解除封鎖
+ * 外部IP若被封鎖需要解除,請直接與網路中心連絡。
+
+
+ 台中市政府教育局logo +
+
+ 網頁設計 台中市網路中心網路組

+ 黃國順 張本和 2011.5.30 +
+
+
+
+ + + + + + + + + +
+
+
+
+

檢視 IPV6 網站認證訊息(另開新視窗)

+
+

臺中市政府教育局版權所有 Copyright © 2011 - 2020. All Rights Reserved.

+

最佳瀏覽解析度 1024x768px. 建議使用瀏覽器 Firefox5.0以上或IE8.0以上或google chrome版本瀏覽本站.

+

臺中市政府教育局地址:420018 臺中市豐原區陽明街36號 【交通位置及停車資訊】【地圖】

+

市話總機代表號: 04-22289111 【各科室電話分機】 【教育局組織架構圖】

+

辦公時間:8:00-17:00,中午休息時間:12:00-13:00 ‧彈性上下班時間:8:00-8:30、13:00-13:30、17:00-17:30

+

【網站資訊安全及個人隱私權宣告】 【網站服務條款】

+

最後更新時間: 2020-03-25 00:11:11

+
+
+ + diff --git a/intelmq/tests/bots/parsers/taichung/recent30.html b/intelmq/tests/bots/parsers/taichung/recent30.html deleted file mode 100644 index db6dfb3ef..000000000 --- a/intelmq/tests/bots/parsers/taichung/recent30.html +++ /dev/null @@ -1,248 +0,0 @@ - - - - 臺中市政府教育局全球資訊網 - 網路管理系統 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
- -
臺中市政府教育局
- -
-
- - -
- - -
- - - - -
-
- -
-
-
- -
-
公務帳號
-
- 《已登錄》
-
- 查看 -
-
人氣指數
-
- » 線上數: 38
- » 今日數: 11784
- » 本週數: 51111
- » 本月數: 109733
- » 今年數: 3400837
- » 總次數: 22996267
-
-
-
-
-
-
-

臺中市教育網路中心 流量異常/資安回報

-
- -
外部流量異常限制IP列表
-
- [回上頁] 最近 [1日] - [3日] [7日] - [30日] -
- - - - - - - - - - -
限制IP限制型態流量起限時間距日管理
1192.88.99.4Malware Provider手動設定2016-11-08 22:49:172.02封鎖
2192.0.0.5Malware Provider手動設定2016-11-08 22:46:442.02
- -
- - -
- * 市網中心對於外部IP(非中市網連線學校登記之IP)與市內連線單位有異常流量者逕行封鎖24小時,24小時後自動解除封鎖
- * 外部IP若被封鎖需要解除,請直接與網路中心連絡。
-
-
- 台中市政府教育局logo -
-
- 網頁設計 台中市網路中心網路組

- 黃國順 張本和 2011.5.30 -
-
-
-
- - - - - - - - - -
-
-
-
-

檢視 IPV6 網站認證訊息(另開新視窗)

-
-

臺中市政府教育局版權所有 Copyright © 2011 - 2016. All Rights Reserved.

-

最佳瀏覽解析度 1024x768px. 建議使用瀏覽器 Firefox5.0以上或IE8.0以上或google chrome版本瀏覽本站.

-

臺中市政府教育局地址:42007 臺中市豐原區陽明街36號 【交通位置及停車資訊】【地圖】

-

市話總機代表號: 04-22289111 【各科室電話分機】 【教育局組織架構圖】 - 廉政檢舉【電話及傳真】【信箱】

-

辦公時間:8:00-17:00,中午休息時間:12:00-13:00 ‧彈性上下班時間:8:00-8:30、13:00-13:30、17:00-17:30

-

【網站資訊安全及個人隱私權宣告】 【網站服務條款】 【TC根憑證】 【TC中繼憑證】

-

最後更新時間: 2016-11-10 23:10:59

-
- - - - diff --git a/intelmq/tests/bots/parsers/taichung/test_parser.py b/intelmq/tests/bots/parsers/taichung/test_parser.py index ba975f942..1fabd4bca 100644 --- a/intelmq/tests/bots/parsers/taichung/test_parser.py +++ b/intelmq/tests/bots/parsers/taichung/test_parser.py @@ -4,48 +4,56 @@ import unittest import intelmq.lib.test as test -from intelmq.bots.parsers.taichung.parser import TaichungCityNetflowParserBot +from intelmq.bots.parsers.taichung.parser import TaichungNetflowRecentParserBot -with open(os.path.join(os.path.dirname(__file__), 'recent30.html'), 'rb') as fh: +with open(os.path.join(os.path.dirname(__file__), 'recent.html'), 'rb') as fh: RAW = base64.b64encode(fh.read()).decode() OUTPUT1 = {'__type': 'Event', - 'classification.type': 'malware', - 'event_description.text': 'Malware Provider', - 'raw': 'PHRkPjE8L3RkPjx0ZD48aW1nIHNyYz0icmVjZW50MzBfZmlsZXMvdXMuZ2lmIiBhbHQ9IiI+PHNwYW4' - 'gc3R5bGU9ImNvbG9yOiBibGFjazsiPjE5Mi44OC45OS40PC9zcGFuPjwvdGQ+PHRkPk1hbHdhcmUgUH' - 'JvdmlkZXI8L3RkPgogICAgICAgIDx0ZD7miYvli5XoqK3lrpo8L3RkPjx0ZD4yMDE2LTExLTA4IDIyO' - 'jQ5OjE3PC90ZD48dGQ+Mi4wMjwvdGQ+CiAgICAgICAgPHRkIHN0eWxlPSJjb2xvcjpyZWQ7Ij7lsIHp' - 'jpY8L3RkPjwvdHI+ICAgICAgICA=', + 'classification.type': 'brute-force', + 'event_description.text': 'Office 365 Attack', + 'raw': 'PHRkPjI8L3RkPjx0ZD48aW1nIHNyYz0iL2ltYWdlcy9mbGFncy9wbC5naWYiIGFsdD0iIj48c3BhbiBzdHlsZT0iY29sb3I6IGJsYWNrOyI+MTkyLjg4Ljk5LjQ8L3NwYW4+PC90ZD48dGQ+T2ZmaWNlIDM2NSBBdHRhY2s8L3RkPgogICAgICAgIDx0ZD7miYvli5XoqK3lrpo8L3RkPjx0ZD4yMDE5LTEwLTI5IDEzOjE4OjQ3PC90ZD48dGQ+MTQ3LjQ5PC90ZD4KICAgICAgICA8dGQgc3R5bGU9ImNvbG9yOnJlZDsiPuWwgemOljwvdGQ+PC90cj4gICAgICAgIA==', 'source.ip': '192.88.99.4', - 'time.source': '2016-11-08T14:49:17+00:00'} + 'source.geolocation.cc': 'PL', + 'time.source': '2019-10-29T05:18:47+00:00'} OUTPUT2 = {'__type': 'Event', 'classification.type': 'malware', - 'event_description.text': 'Malware Provider', - 'raw': 'PHRkPjI8L3RkPjx0ZD48aW1nIHNyYz0icmVjZW50MzBfZmlsZXMvcnUuZ2lmIiBhbHQ9IiI+PHNwYW4' - 'gc3R5bGU9ImNvbG9yOiBibGFjazsiPjE5Mi4wLjAuNTwvc3Bhbj48L3RkPjx0ZD5NYWx3YXJlIFByb3' - 'ZpZGVyPC90ZD4KICAgICAgICA8dGQ+5omL5YuV6Kit5a6aPC90ZD48dGQ+MjAxNi0xMS0wOCAyMjo0N' - 'jo0NDwvdGQ+PHRkPjIuMDI8L3RkPgogICAgICAgIDx0ZCBzdHlsZT0iY29sb3I6cmVkOyI+PC90ZD48' - 'L3RyPiAgPC90Ym9keT4KICA8L3RhYmxlPgoKICA8aHI+CiAgPHRhYmxlIGNsYXNzPSJuZXRib3R0b21' - '0YmwiIGJvcmRlcj0iMCI+PHRib2R5Pg==', + 'event_description.text': '惡意程式儲存FTP站', + 'raw': 'PHRkPjEyMzc8L3RkPjx0ZD48aW1nIHNyYz0iL2ltYWdlcy9mbGFncy9mci5naWYiIGFsdD0iIj48c3BhbiBzdHlsZT0iY29sb3I6IGJsYWNrOyI+MTkyLjAuMC41PC9zcGFuPjwvdGQ+PHRkPuaDoeaEj+eoi+W8j+WEsuWtmEZUUOermTwvdGQ+CiAgICAgICAgPHRkPuaJi+WLleioreWumjwvdGQ+PHRkPjIwMTMtMDQtMjQgMDA6MDE6NTk8L3RkPjx0ZD4yNTI3LjA0PC90ZD4KICAgICAgICA8dGQgc3R5bGU9ImNvbG9yOnJlZDsiPuWwgemOljwvdGQ+PC90cj4gICAgICAgIA==', 'source.ip': '192.0.0.5', - 'time.source': '2016-11-08T14:46:44+00:00'} - - -class TestTaichungCityNetflowParserBot(test.BotTestCase, unittest.TestCase): + 'source.geolocation.cc': 'FR', + 'time.source': '2013-04-23T16:01:59+00:00'} +OUTPUT3 = {'__type': 'Event', + 'classification.type': 'scanner', + 'event_description.text': 'SCAN-PORT-RDP', + 'raw': 'PHRkPjQ3NzQ8L3RkPjx0ZD48c3BhbiBzdHlsZT0iY29sb3I6IGJsYWNrOyI+PGEgaHJlZj0iaHR0cDovL3d3dy53aG9pczM2NS5jb20vdHcvaXAvMTAuMC4wLjEiIHRhcmdldD0iX2JsYW5rIj4xMC4wLjAuMTwvYT48L3NwYW4+PC90ZD48dGQ+U0NBTi1QT1JULVJEUDwvdGQ+CiAgICAgICAgPHRkPjkwNTwvdGQ+PHRkPjIwMjAtMDMtMjQgMDE6MTU6NTA8L3RkPjx0ZD4wLjk5PC90ZD4KICAgICAgICA8dGQgc3R5bGU9ImNvbG9yOnJlZDsiPuWwgemOljwvdGQ+PC90cj4gICAgICAgIA==', + 'source.ip': '10.0.0.1', + 'time.source': '2020-03-23T17:15:50+00:00'} +OUTPUT4 = {'__type': 'Event', + 'classification.type': 'scanner', + 'event_description.text': 'SCAN-PORT-137-138-139', + 'raw': 'PHRkPjQ4MDA8L3RkPjx0ZD48aW1nIHNyYz0iL2ltYWdlcy9mbGFncy91cy5naWYiIGFsdD0iIj48c3BhbiBzdHlsZT0iY29sb3I6IGJsYWNrOyI+PGEgaHJlZj0iaHR0cDovL3d3dy53aG9pczM2NS5jb20vdHcvaXAvMTI3LjAuMC4xIiB0YXJnZXQ9Il9ibGFuayI+MTI3LjAuMC4xPC9hPjwvc3Bhbj48L3RkPjx0ZD5TQ0FOLVBPUlQtMTM3LTEzOC0xMzk8L3RkPgogICAgICAgIDx0ZD43NjM8L3RkPjx0ZD4yMDIwLTAzLTI0IDAxOjE0OjI0PC90ZD48dGQ+MC45OTwvdGQ+CiAgICAgICAgPHRkIHN0eWxlPSJjb2xvcjpyZWQ7Ij7lsIHpjpY8L3RkPjwvdHI+ICA8L3Rib2R5PgogIDwvdGFibGU+CgogIDxocj4KICA8dGFibGUgYm9yZGVyPSIwIiBjbGFzcz0ibmV0Ym90dG9tdGJsIj4=', + 'source.ip': '127.0.0.1', + 'source.geolocation.cc': 'US', + 'time.source': '2020-03-23T17:14:24+00:00'} + + +class TestTaichungNetflowRecentParserBot(test.BotTestCase, unittest.TestCase): """ - A TestCase for TaichungCityNetflowParserBot. + A TestCase for TaichungNetflowRecentParserBot. """ @classmethod def set_bot(cls): - cls.bot_reference = TaichungCityNetflowParserBot + cls.bot_reference = TaichungNetflowRecentParserBot cls.default_input_message = {'__type': 'Report', 'raw': RAW} def test_event(self): self.run_bot() self.assertMessageEqual(0, OUTPUT1) self.assertMessageEqual(1, OUTPUT2) + self.assertMessageEqual(2, OUTPUT3) + self.assertMessageEqual(3, OUTPUT4) if __name__ == '__main__': # pragma: no cover unittest.main() diff --git a/intelmq/tests/bots/parsers/urlvir/export-hosts.txt b/intelmq/tests/bots/parsers/urlvir/export-hosts.txt deleted file mode 100644 index eadf57ad9..000000000 --- a/intelmq/tests/bots/parsers/urlvir/export-hosts.txt +++ /dev/null @@ -1,7 +0,0 @@ -################################################################## -#URLVir Active Malicious Hosts -#Updated on December 4, 2016, 8:29 am -#Free for noncommercial use only, contact us for more information -################################################################## -indirlivexstore.com -188.138.68.177 diff --git a/intelmq/tests/bots/parsers/urlvir/export-ip-addresses.txt b/intelmq/tests/bots/parsers/urlvir/export-ip-addresses.txt deleted file mode 100644 index 7186c02f8..000000000 --- a/intelmq/tests/bots/parsers/urlvir/export-ip-addresses.txt +++ /dev/null @@ -1,6 +0,0 @@ -################################################################## -#URLVir Active Malicious IP Addresses Hosting Malware -#Updated on December 4, 2016, 8:29 am -#Free for noncommercial use only, contact us for more information -################################################################## -103.6.246.83 diff --git a/intelmq/tests/bots/parsers/urlvir/test_parser.py b/intelmq/tests/bots/parsers/urlvir/test_parser.py deleted file mode 100644 index 34fd7fe42..000000000 --- a/intelmq/tests/bots/parsers/urlvir/test_parser.py +++ /dev/null @@ -1,88 +0,0 @@ -# -*- coding: utf-8 -*- - -import os -import unittest - -import intelmq.lib.test as test -import intelmq.lib.utils as utils - -from intelmq.bots.parsers.urlvir.parser import URLVirParserBot - -with open(os.path.join(os.path.dirname(__file__), 'export-hosts.txt')) as handle: - HOSTS_FILE = handle.read() - -with open(os.path.join(os.path.dirname(__file__), 'export-ip-addresses.txt')) as handle: - IP_ADDRESSES_FILE = handle.read() - -HOSTS_REPORT = {'feed.name': 'URLVir Export Hosts', - 'feed.url': 'http://www.urlvir.com/export-hosts/', - '__type': 'Report', - 'time.observation': '2016-12-04T07:50:15+00:00', - 'raw': utils.base64_encode(HOSTS_FILE) - } - -HOSTS_EVENTS = [{'feed.name': 'URLVir Export Hosts', - 'feed.url': 'http://www.urlvir.com/export-hosts/', - '__type': 'Event', - 'time.observation': '2016-12-04T07:50:15+00:00', - 'raw': 'aW5kaXJsaXZleHN0b3JlLmNvbQ==', - 'time.source': '2016-12-04T12:29:00+00:00', - 'source.fqdn': 'indirlivexstore.com', - 'classification.type': 'malware', - 'event_description.text': 'Active Malicious Hosts', - 'event_description.url': 'http://www.urlvir.com/search-host/indirlivexstore.com/' - }, - {'feed.name': 'URLVir Export Hosts', - 'feed.url': 'http://www.urlvir.com/export-hosts/', - '__type': 'Event', - 'time.observation': '2016-12-04T07:50:15+00:00', - 'raw': 'MTg4LjEzOC42OC4xNzc=', - 'time.source': '2016-12-04T12:29:00+00:00', - 'source.ip': '188.138.68.177', - 'classification.type': 'malware', - 'event_description.text': 'Active Malicious Hosts', - 'event_description.url': 'http://www.urlvir.com/search-ip-address/188.138.68.177/' - }] - -IP_ADDRESSES_REPORT = {'feed.name': 'URLVir Export IP Addresses', - 'feed.url': 'http://www.urlvir.com/export-ip-addresses/', - '__type': 'Report', - 'time.observation': '2016-12-04T07:50:15+00:00', - 'raw': utils.base64_encode(IP_ADDRESSES_FILE) - } - -IP_ADDRESS_EVENTS = {'feed.name': 'URLVir Export IP Addresses', - 'feed.url': 'http://www.urlvir.com/export-ip-addresses/', - '__type': 'Event', - 'time.observation': '2016-12-04T07:50:15+00:00', - 'raw': 'MTAzLjYuMjQ2Ljgz', - 'time.source': '2016-12-04T12:29:00+00:00', - 'source.ip': '103.6.246.83', - 'classification.type': 'malware', - 'event_description.text': 'Active Malicious IP Addresses Hosting Malware', - 'event_description.url': 'http://www.urlvir.com/search-ip-address/103.6.246.83/' - } - - -class TestURLVirParserBot(test.BotTestCase, unittest.TestCase): - """ A TestCase of URLVirParserBot with Host and IP Address Feeds. """ - - @classmethod - def set_bot(cls): - cls.bot_reference = URLVirParserBot - cls.default_input_message = HOSTS_REPORT - - def test_hosts(self): - """ Test if correct Host Events have been produced. """ - self.run_bot() - self.assertMessageEqual(0, HOSTS_EVENTS[0]) - self.assertMessageEqual(1, HOSTS_EVENTS[1]) - - def test_ip_addresses(self): - """ Test if coffect IP Address Events have been produced. """ - self.input_message = IP_ADDRESSES_REPORT - self.run_bot() - self.assertMessageEqual(0, IP_ADDRESS_EVENTS) - -if __name__ == '__main__': # pragma: no cover - unittest.main() diff --git a/intelmq/tests/lib/test_bot.py b/intelmq/tests/lib/test_bot.py index f02076217..333b691ab 100644 --- a/intelmq/tests/lib/test_bot.py +++ b/intelmq/tests/lib/test_bot.py @@ -3,52 +3,43 @@ Tests the Bot class itself. """ -import sys import unittest import intelmq.lib.test as test from intelmq.tests.lib import test_parser_bot -class TestBot(test.BotTestCase, unittest.TestCase): +class TestDummyParserBot(test.BotTestCase, unittest.TestCase): """ Testing generic functionalities of Bot base class. """ @classmethod def set_bot(cls): cls.bot_reference = test_parser_bot.DummyParserBot - cls.allowed_error_count = 1 - def test_bot_name(self): - pass - -# @test.skip_travis() - @unittest.skip("Strange blocking behavior") def test_pipeline_raising(self): self.default_input_message = None self.run_bot(parameters={"raise_on_connect": True}, - error_on_pipeline=True) + error_on_pipeline=True, + allowed_error_count=1) self.assertLogMatches(levelname='ERROR', pattern='Pipeline failed') def test_pipeline_empty(self): self.default_input_message = None - self.run_bot() - self.assertLogMatches(levelname='ERROR', pattern='Bot has found a problem') + self.run_bot(allowed_error_count=1) + self.assertLogMatches(levelname='ERROR', pattern='.*pipeline failed.*') - @unittest.skipIf(sys.version_info[:2] == (3, 7), - 'Unclear behavior with copies of logger in Python 3.7, see ' - 'https://bugs.python.org/issue9338 and https://github.com/certtools/intelmq/issues/1269') def test_logging_level_other(self): self.input_message = test_parser_bot.EXAMPLE_SHORT self.run_bot(parameters={"logging_level": "DEBUG"}) - self.assertLogMatches(levelname='DEBUG', pattern='test') + self.assertLogMatches(levelname='DEBUG', pattern='test!') def test_logging_catch_warnings(self): """ Test if the logger catches warnings issued by the warnings module. """ self.input_message = test_parser_bot.EXAMPLE_SHORT - self.allowed_warning_count = 1 - self.run_bot(parameters={'raise_warning': True}) + self.run_bot(parameters={'raise_warning': True}, + allowed_warning_count=1) self.assertLogMatches(levelname='WARNING', pattern='.*intelmq/tests/lib/test_parser_bot\.py\:[0-9]+\: UserWarning: This is a warning test.') def test_bot_group(self): @@ -59,6 +50,22 @@ def test_bot_group(self): self.prepare_bot() self.assertEqual(self.bot.group, 'Parser') + def test_invalid_input_message(self): + """ + Test if the bot is dumping / not retrying a message which is impossible to parse. + https://github.com/certtools/intelmq/issues/1494 + """ + self.input_message = b'foo\xc9bar' + self.run_bot(iterations=1, allowed_error_count=1) + self.assertLogMatches('.*intelmq\.lib\.exceptions\.DecodingError:.*') + self.assertLogMatches(pattern='Dumping message to dump file.', + levelname='INFO') + # raise ValueError(self.loglines) + # raise ValueError(self.input_queue) + self.assertEqual(self.pipe.state['test-bot-input-internal'], []) + self.assertEqual(self.pipe.state['test-bot-input'], []) + self.assertEqual(self.pipe.state['test-bot-output'], []) + if __name__ == '__main__': # pragma: no cover unittest.main() diff --git a/intelmq/tests/lib/test_bot_output.py b/intelmq/tests/lib/test_bot_output.py new file mode 100644 index 000000000..c1cb41abb --- /dev/null +++ b/intelmq/tests/lib/test_bot_output.py @@ -0,0 +1,95 @@ +# -*- coding: utf-8 -*- +""" +Test OutputBot specifics +""" + +from intelmq.lib.bot import OutputBot +from intelmq.lib.test import BotTestCase + +from unittest import TestCase +from json import dumps + +RAW = {"__type": "Event", "raw": "Cg=="} +DICT = {"foo": "bar", "foobar": 1} +OUTPUT_DICT = {"__type": "Event", "output": dumps(DICT, sort_keys=True)} +STRING = "foobar!" +OUTPUT_STRING = {"__type": "Event", "output": dumps(STRING)} +INT = 123 +OUTPUT_INT = {"__type": "Event", "output": dumps(INT)} +INPUT = {"__type": "Event", "raw": "Cg==", "source.ip": "127.0.0.1"} +RAW_HIERARCHICAL = {"raw": "Cg==", "source": {"ip": "127.0.0.1"}} +NO_RAW_TYPE = {"__type": "Event", "source.ip": "127.0.0.1"} + +class DummyOutputBot(OutputBot): + + def process(self): + event = self.receive_message() + self.result = self.export_event(event, return_type=self.parameters.return_type) + + +class TestDummyOutputBot(BotTestCase, TestCase): + @classmethod + def set_bot(cls): + cls.sysconfig = {"return_type": None} + cls.bot_reference = DummyOutputBot + cls.default_input_message = RAW + cls.allowed_error_count = 1 + + def test_export_raw(self): + self.run_bot(parameters={"single_key": "raw"}) + self.assertEqual(self.bot.result, "\n") + + def test_export_output_dict(self): + self.input_message = OUTPUT_DICT + self.run_bot(parameters={"single_key": "output"}) + self.assertEqual(self.bot.result, DICT) + + def test_export_output_dict_string(self): + self.input_message = OUTPUT_DICT + self.run_bot(parameters={"single_key": "output", "return_type": str}) + self.assertEqual(self.bot.result, OUTPUT_DICT['output']) + + def test_export_output_string(self): + self.input_message = OUTPUT_STRING + self.run_bot(parameters={"single_key": "output"}) + self.assertEqual(self.bot.result, STRING) + + def test_export_output_string_string(self): + self.input_message = OUTPUT_STRING + self.run_bot(parameters={"single_key": "output", "return_type": str}) + self.assertEqual(self.bot.result, STRING) + + def test_export_output_int(self): + self.input_message = OUTPUT_INT + self.run_bot(parameters={"single_key": "output"}) + self.assertEqual(self.bot.result, INT) + + def test_export_output_int_string(self): + self.input_message = OUTPUT_INT + self.run_bot(parameters={"single_key": "output", "return_type": str}) + self.assertEqual(self.bot.result, OUTPUT_INT['output']) + + def test_export_keep_raw_hierarchical(self): + self.input_message = INPUT + self.run_bot(parameters={"keep_raw_field": True, + "message_hierarchical": True, + "message_with_type": False, + }) + self.assertEqual(self.bot.result, RAW_HIERARCHICAL) + + def test_export_keep_raw_hierarchical_string(self): + self.input_message = INPUT + self.run_bot(parameters={"keep_raw_field": True, + "message_hierarchical": True, + "message_with_type": False, + "return_type": str, + }) + self.assertEqual(self.bot.result, dumps(RAW_HIERARCHICAL, + sort_keys=True)) + + def test_export_now_raw_type(self): + self.input_message = INPUT + self.run_bot(parameters={"keep_raw_field": False, + "message_with_type": True, + }) + self.assertEqual(self.bot.result, NO_RAW_TYPE) diff --git a/intelmq/tests/lib/test_exceptions.py b/intelmq/tests/lib/test_exceptions.py new file mode 100755 index 000000000..2fa3aa3b0 --- /dev/null +++ b/intelmq/tests/lib/test_exceptions.py @@ -0,0 +1,48 @@ +# -*- coding: utf-8 -*- +""" +Testing the IntelMQ-specific exceptions +""" +import unittest + +import intelmq.lib.exceptions as excs + + +class TestUtils(unittest.TestCase): + + def test_MissingDependencyError(self): + depname = 'libname' + version = '1.2.3' + installed = '1.0.0' + additional = 'This is the end.' + + exc = str(excs.MissingDependencyError(depname)) + self.assertIn(repr(depname), exc) + + exc = str(excs.MissingDependencyError(depname, version)) + self.assertIn(repr(depname), exc) + self.assertIn(version, exc) + self.assertIn('or higher', exc) + + exc = str(excs.MissingDependencyError(depname, '>1.0,<2.0')) + self.assertIn(repr(depname), exc) + self.assertNotIn('or higher', exc) + + exc = str(excs.MissingDependencyError(depname, version, installed)) + self.assertIn(repr(depname), exc) + self.assertIn(version, exc) + self.assertIn(repr(installed), exc) + + # installed should not show up if version is not given + exc = str(excs.MissingDependencyError(depname, installed=installed)) + self.assertIn(repr(depname), exc) + self.assertNotIn(version, exc) + self.assertNotIn(repr(installed), exc) + + # additional text at the end + exc = str(excs.MissingDependencyError(depname, additional_text=additional)) + self.assertIn(repr(depname), exc) + self.assertTrue(exc.endswith(" %s" % additional)) + + +if __name__ == '__main__': # pragma: no cover + unittest.main() diff --git a/intelmq/tests/lib/test_harmonization.py b/intelmq/tests/lib/test_harmonization.py index 9d8a1d451..d973fa9c4 100644 --- a/intelmq/tests/lib/test_harmonization.py +++ b/intelmq/tests/lib/test_harmonization.py @@ -295,10 +295,8 @@ def test_fqdn_sanitize_invalid(self): @test.skip_internet() def test_fqdn_to_ip(self): - """ Test FQDN.to_ip """ - self.assertEqual(None, harmonization.FQDN.to_ip('localhost')) - self.assertEqual('93.184.216.34', - harmonization.FQDN.to_ip('example.org')) + """ Test FQDN.to_ip, succeed if the answer was any valid IP address. """ + self.assertTrue(harmonization.IPAddress.is_valid(harmonization.FQDN.to_ip('example.org'))) def test_json_valid(self): """ Test JSON.is_valid with valid arguments. """ diff --git a/intelmq/tests/lib/test_message.py b/intelmq/tests/lib/test_message.py index 0cca78e31..e60ac990b 100644 --- a/intelmq/tests/lib/test_message.py +++ b/intelmq/tests/lib/test_message.py @@ -55,10 +55,10 @@ def assertDictContainsSubset(self, actual, expected): https://docs.python.org/3/whatsnew/3.2.html?highlight=assertdictcontainssubset - http://stackoverflow.com/a/21058312/2851664 + https://stackoverflow.com/a/57386339/2851664 cc by-sa 3.0 John1024 """ - self.assertTrue(set(expected.items()).issubset(set(actual.items()))) + self.assertGreaterEqual(expected.items(), actual.items()) def new_report(self, auto=False, examples=False): report = message.Report(harmonization=HARM, auto=auto) @@ -446,9 +446,9 @@ def test_event_hash_method_blacklist(self): filter_keys={"feed.name"})) self.assertNotEqual(event1.hash(filter_type="blacklist", - filter_keys={"feed.url, raw"}), + filter_keys={"feed.url", "raw"}), event2.hash(filter_type="blacklist", - filter_keys={"feed.url, raw"})) + filter_keys={"feed.url", "raw"})) def test_event_hash_method_whitelist(self): """ Test Event hash(blacklist) """ @@ -467,9 +467,9 @@ def test_event_hash_method_whitelist(self): filter_keys={"feed.name"})) self.assertEqual(event1.hash(filter_type="whitelist", - filter_keys={"feed.url, raw"}), + filter_keys={"feed.url", "raw"}), event2.hash(filter_type="whitelist", - filter_keys={"feed.url, raw"})) + filter_keys={"feed.url", "raw"})) def test_event_dict(self): """ Test Event to_dict. """ diff --git a/intelmq/tests/lib/test_parser_bot.py b/intelmq/tests/lib/test_parser_bot.py index 01e14b2eb..96c0cff42 100644 --- a/intelmq/tests/lib/test_parser_bot.py +++ b/intelmq/tests/lib/test_parser_bot.py @@ -81,7 +81,7 @@ def parse_line(self, line, report): self.tempdata.append(line) else: event = self.new_event(report) - self.logger.debug('test') + self.logger.debug('test!') line = line.split(',') event['time.source'] = line[0] event['source.fqdn'] = line[1] @@ -137,12 +137,12 @@ def run_bot(self, *args, **kwargs): super().run_bot(*args, **kwargs) def test_event(self): - """ Test if correct Event has been produced. """ + """ Test DummyParserBot """ self.run_bot() self.assertMessageEqual(0, EXAMPLE_EVENT) def test_missing_raw(self): - """ Test if correct Event has been produced. """ + """ Test DummyParserBot with missing raw. """ self.input_message = EXAMPLE_EMPTY_REPORT self.allowed_warning_count = 1 self.run_bot() @@ -177,10 +177,50 @@ def set_bot(cls): cls.default_input_message = EXAMPLE_REPO_1 def test_event(self): - """ Test if correct Event has been produced. """ + """ Test DummyCSVParserBot. """ self.run_bot() self.assertMessageEqual(0, EXAMPLE_EVE_1) +EXAMPLE_JSON_STREAM_REPORT = {'__type': 'Report', + 'raw': utils.base64_encode('''{"a": 1} +{"a": 2}''')} +EXAMPLE_JSON_STREAM_EVENTS = [{'__type': 'Event', + 'raw': utils.base64_encode('{"a": 1}'), + 'event_description.text': '1', + 'classification.type': 'other', + }, + {'__type': 'Event', + 'raw': utils.base64_encode('{"a": 2}'), + 'event_description.text': '2', + 'classification.type': 'other', + }, + ] + + +class DummyJSONStreamParserBot(bot.ParserBot): + parse = bot.ParserBot.parse_json_stream + recover_line = bot.ParserBot.recover_line_json_stream + + def parse_line(self, line, report): + event = self.new_event(report) + event['event_description.text'] = line['a'] + event['classification.type'] = 'other' + event['raw'] = self.recover_line(line) + yield event + + +class TestJSONStreamParserBot(test.BotTestCase, unittest.TestCase): + @classmethod + def set_bot(cls): + cls.bot_reference = DummyJSONStreamParserBot + cls.default_input_message = EXAMPLE_JSON_STREAM_REPORT + + def test_event(self): + self.run_bot() + self.assertMessageEqual(0, EXAMPLE_JSON_STREAM_EVENTS[0]) + self.assertMessageEqual(1, EXAMPLE_JSON_STREAM_EVENTS[1]) + + if __name__ == '__main__': # pragma: no cover unittest.main() diff --git a/intelmq/tests/lib/test_pipeline.py b/intelmq/tests/lib/test_pipeline.py index fd0e142c0..479d297f1 100644 --- a/intelmq/tests/lib/test_pipeline.py +++ b/intelmq/tests/lib/test_pipeline.py @@ -5,7 +5,6 @@ We are testing sending and receiving on the same queue for Redis and Pythonlist. TODO: clear_queues -TODO: acknowledge TODO: check internal representation of data in redis (like with Pythonlist) """ import logging @@ -15,11 +14,14 @@ import intelmq.lib.pipeline as pipeline import intelmq.lib.test as test +import intelmq.lib.exceptions as exceptions SAMPLES = {'normal': [b'Lorem ipsum dolor sit amet', 'Lorem ipsum dolor sit amet'], 'unicode': [b'\xc2\xa9\xc2\xab\xc2\xbb \xc2\xa4\xc2\xbc', - '©«» ¤¼']} + '©«» ¤¼'], + 'badencoding': b'foo\xc9bar', + } class Parameters(object): @@ -110,12 +112,32 @@ def test_reject(self): self.pipe.reject_message() self.assertEqual(SAMPLES['normal'][1], self.pipe.receive()) + def test_acknowledge(self): + self.pipe.state['test-bot-input'] = [SAMPLES['normal'][0]] + self.pipe.receive() + self.pipe.acknowledge() + self.assertEqual(self.pipe.count_queued_messages('test-bot-input')['test-bot-input'], 0) + self.assertEqual(self.pipe.count_queued_messages('test-bot-input-internal')['test-bot-input-internal'], 0) + + def test_bad_encoding_and_pop(self): + self.pipe.state['test-bot-input'] = [SAMPLES['badencoding']] + try: + self.pipe.receive() + except exceptions.DecodingError: + pass + self.pipe.acknowledge() + self.assertEqual(self.pipe.count_queued_messages('test-bot-input')['test-bot-input'], 0) + self.assertEqual(self.pipe.count_queued_messages('test-bot-input-internal')['test-bot-input-internal'], 0) + def tearDown(self): self.pipe.state = {} @test.skip_redis() class TestRedis(unittest.TestCase): + """ + We use the queue 'test' for both source and destination + """ def setUp(self): params = Parameters() @@ -166,6 +188,23 @@ def test_reject(self): self.pipe.reject_message() self.assertEqual(SAMPLES['normal'][1], self.pipe.receive()) + def test_acknowledge(self): + self.pipe.send(SAMPLES['normal'][0]) + self.pipe.receive() + self.pipe.acknowledge() + self.assertEqual(self.pipe.count_queued_messages('test')['test'], 0) + self.assertEqual(self.pipe.count_queued_messages('test-internal')['test-internal'], 0) + + def test_bad_encoding_and_pop(self): + self.pipe.send(SAMPLES['badencoding']) + try: + self.pipe.receive() + except exceptions.DecodingError: + pass + self.pipe.acknowledge() + self.assertEqual(self.pipe.count_queued_messages('test-bot-input')['test-bot-input'], 0) + self.assertEqual(self.pipe.count_queued_messages('test-bot-input-internal')['test-bot-input-internal'], 0) + def tearDown(self): self.pipe.disconnect() self.clear() @@ -226,6 +265,23 @@ def test_reject(self): self.pipe.reject_message() self.assertEqual(SAMPLES['normal'][1], self.pipe.receive()) + def test_acknowledge(self): + self.pipe.send(SAMPLES['normal'][0]) + self.pipe.receive() + self.pipe.acknowledge() + self.assertEqual(self.pipe.count_queued_messages('test')['test'], 0) + self.assertEqual(self.pipe.count_queued_messages('test-internal')['test-internal'], 0) + + def test_bad_encoding_and_pop(self): + self.pipe.send(SAMPLES['badencoding']) + try: + self.pipe.receive() + except exceptions.DecodingError: + pass + self.pipe.acknowledge() + self.assertEqual(self.pipe.count_queued_messages('test-bot-input')['test-bot-input'], 0) + self.assertEqual(self.pipe.count_queued_messages('test-bot-input-internal')['test-bot-input-internal'], 0) + def tearDown(self): self.clear() self.pipe.disconnect() diff --git a/intelmq/tests/lib/test_upgrades.py b/intelmq/tests/lib/test_upgrades.py index e863183ec..1324c0d03 100644 --- a/intelmq/tests/lib/test_upgrades.py +++ b/intelmq/tests/lib/test_upgrades.py @@ -195,6 +195,58 @@ } } } +V213 = {"mail-collector": { + "group": "Collector", + "module": "intelmq.bots.collectors.mail.collector_mail_attach", + "parameters": { + "attach_unzip": True, + } +}, + "mail-collector2": { + "group": "Collector", + "module": "intelmq.bots.collectors.mail.collector_mail_attach", + "parameters": { + "attach_unzip": False, + "extract_files": True, + } +} +} +V213_EXP = {"mail-collector": { + "group": "Collector", + "module": "intelmq.bots.collectors.mail.collector_mail_attach", + "parameters": { + "extract_files": True, + } +}, + "mail-collector2": { + "group": "Collector", + "module": "intelmq.bots.collectors.mail.collector_mail_attach", + "parameters": { + "extract_files": True, + }, +} +} +V220_MISP_VERIFY_FALSE = { +"misp-collector": { + "module": "intelmq.bots.collectors.misp.collector", + "parameters": { + "misp_verify": False}}} +V220_MISP_VERIFY_NULL = { +"misp-collector": { + "module": "intelmq.bots.collectors.misp.collector", + "parameters": {}}} +V220_MISP_VERIFY_TRUE = { +"misp-collector": { + "module": "intelmq.bots.collectors.misp.collector", + "parameters": { + "misp_verify": True}}} +V220_HTTP_VERIFY_FALSE = { +"misp-collector": { + "module": "intelmq.bots.collectors.misp.collector", + "parameters": { + "http_verify_cert": False}}} +DEFAULTS_HTTP_VERIFY_TRUE = { + "http_verify_cert": True} HARM = load_configuration(pkg_resources.resource_filename('intelmq', 'etc/harmonization.conf')) V210_HARM = deepcopy(HARM) @@ -203,6 +255,96 @@ del MISSING_REPORT['report'] WRONG_TYPE = deepcopy(HARM) WRONG_TYPE['event']['source.asn']['type'] = 'String' +WRONG_REGEX = deepcopy(HARM) +WRONG_REGEX['event']['protocol.transport']['iregex'] = 'foobar' +V213_FEED = {"zeus-collector": { + "group": "Collector", + "module": "intelmq.bots.collectors.http.collector_http", + "parameters": { + "http_url": "https://zeustracker.abuse.ch/blocklist.php?download=badips", + } +}, +"bitcash-collector": { + "group": "Collector", + "module": "intelmq.bots.collectors.http.collector_http", + "parameters": { + "http_url": "https://bitcash.cz/misc/log/blacklist", + } +}, +"ddos-attack-c2-collector": { + "group": "Collector", + "module": "intelmq.bots.collectors.http.collector_http_stream", + "parameters": { + "http_url": "https://feed.caad.fkie.fraunhofer.de/ddosattackfeed/", + } +}, +"ddos-attack-targets-collector": { + "group": "Collector", + "module": "intelmq.bots.collectors.http.collector_http_stream", + "parameters": { + "http_url": "https://feed.caad.fkie.fraunhofer.de/ddosattackfeed/", + } +}, +"taichung-collector": { + "group": "Collector", + "module": "intelmq.bots.collectors.http.collector_http", + "parameters": { + "http_url": "https://www.tc.edu.tw/net/netflow/lkout/recent/30", + }, +}, +"ransomware-collector": { + "group": "Collector", + "module": "intelmq.bots.collectors.http.collector_http", + "parameters": { + "http_url": "https://ransomwaretracker.abuse.ch/feeds/csv/", + }, +}, +"bambenek-dga-collector": { + "group": "Collector", + "module": "intelmq.bots.collectors.http.collector_http", + "parameters": { + "http_url": "https://osint.bambenekconsulting.com/feeds/dga-feed.txt", + }, +}, +"bambenek-c2dommasterlist-collector": { + "group": "Collector", + "module": "intelmq.bots.collectors.http.collector_http", + "parameters": { + "http_url": "http://osint.bambenekconsulting.com/feeds/c2-dommasterlist.txt", + }, +}, +"nothink-dns-collector": { + "group": "Collector", + "module": "intelmq.bots.collectors.http.collector_http", + "parameters": { + "http_url": "http://www.nothink.org/honeypot_dns_attacks.txt", + }, +}, +"nothink-ssh-collector": { + "group": "Collector", + "module": "intelmq.bots.collectors.http.collector_http", + "parameters": { + "http_url": "http://www.nothink.org/blacklist/blacklist_ssh_day.txt", + }, +}, +"nothink-parser": { + "group": "Parser", + "module": "intelmq.bots.parsers.nothink.parser", +}, +} +V220_FEED = { +"urlvir-hosts-collector": { + "group": "Collector", + "module": "intelmq.bots.collectors.http.collector_http", + "parameters": { + "http_url": "http://www.urlvir.com/export-hosts/", + }, +}, +"urlvir-parser": { + "group": "Parser", + "module": "intelmq.bots.parsers.urlvir.parser", +}, +} def generate_function(function): @@ -258,6 +400,17 @@ def test_v211_harmonization(self): self.assertTrue(result[0]) self.assertEqual(HARM, result[3]) + def test_v220_configuration(self): + """ Test v220_configuration. """ + result = upgrades.v220_configuration(DEFAULTS_HTTP_VERIFY_TRUE, + V220_MISP_VERIFY_TRUE, {}, False) + self.assertTrue(result[0]) + self.assertEqual(V220_MISP_VERIFY_NULL, result[2]) + result = upgrades.v220_configuration(DEFAULTS_HTTP_VERIFY_TRUE, + V220_MISP_VERIFY_FALSE, {}, False) + self.assertTrue(result[0]) + self.assertEqual(V220_HTTP_VERIFY_FALSE, result[2]) + def test_missing_report_harmonization(self): """ Test missing report in harmonization """ result = upgrades.harmonization({}, {}, MISSING_REPORT, False) @@ -270,6 +423,54 @@ def test_wrong_type_harmonization(self): self.assertTrue(result[0]) self.assertEqual(HARM, result[3]) + def test_wrong_regex_harmonization(self): + """ Test wrong regex in harmonization """ + result = upgrades.harmonization({}, {}, WRONG_REGEX, False) + self.assertTrue(result[0]) + self.assertEqual(HARM, result[3]) + + def test_v213_deprecations(self): + """ Test v213_fixes """ + result = upgrades.v213_deprecations({}, V213, {}, False) + self.assertTrue(result[0]) + self.assertEqual(V213_EXP, result[2]) + + def test_v213_feed_changes(self): + """ Test v213_feed_changes """ + result = upgrades.v213_feed_changes({}, V213_FEED, {}, False) + self.assertEqual('A discontinued feed "Zeus Tracker" has been found ' + 'as bot zeus-collector. ' + 'The discontinued feed "Bitcash.cz" has been found ' + 'as bot bitcash-collector. ' + 'The discontinued feed "Fraunhofer DDos Attack" has ' + 'been found as bot ddos-attack-c2-collector, ' + 'ddos-attack-targets-collector. ' + 'The discontinued feed "Abuse.ch Ransomware Tracker" ' + 'has been found as bot ransomware-collector. ' + 'Many Bambenek feeds now require a license, see ' + 'https://osint.bambenekconsulting.com/feeds/ ' + 'potentially affected bots are ' + 'bambenek-c2dommasterlist-collector. ' + 'All Nothink Honeypot feeds are discontinued, ' + 'potentially affected bots are nothink-dns-collector, ' + 'nothink-ssh-collector. ' + 'The Nothink Parser has been removed, ' + 'affected bots are nothink-parser. ' + 'Remove affected bots yourself.', + result[0]) + self.assertEqual(V213_FEED, result[2]) + + def test_v220_feed_changes(self): + """ Test v213_feed_changes """ + result = upgrades.v220_feed_changes({}, V220_FEED, {}, False) + self.assertEqual('A discontinued feed "URLVir" has been found ' + 'as bot urlvir-hosts-collector. ' + 'The removed parser "URLVir" has been found ' + 'as bot urlvir-parser. ' + 'Remove affected bots yourself.', + result[0]) + self.assertEqual(V220_FEED, result[2]) + for name in upgrades.__all__: setattr(TestUpgradeLib, 'test_function_%s' % name, diff --git a/intelmq/tests/lib/test_utils.py b/intelmq/tests/lib/test_utils.py index 1997e2255..6623b1376 100644 --- a/intelmq/tests/lib/test_utils.py +++ b/intelmq/tests/lib/test_utils.py @@ -6,12 +6,16 @@ parsing. base64 de-/encoding is not tested yet, as we fully rely on the module. """ +import contextlib import datetime import io import os import tempfile import unittest import requests +import sys + +import termstyle import intelmq.lib.utils as utils @@ -79,7 +83,7 @@ def test_file_logger(self): logger = utils.log(name, log_path=tempfile.tempdir, stream=io.StringIO()) - logger.info(LINES['spare'][0]) + logger.info(termstyle.green(LINES['spare'][0])) logger.error(LINES['spare'][1]) logger.critical(LINES['spare'][2]) handle.seek(0) @@ -89,8 +93,11 @@ def test_file_logger(self): for ind, line in enumerate(file_lines): self.assertRegex(line.strip(), line_format[ind]) - def test_stream_logger(self): - """Tests if a logger for a stream can be generated with log().""" + def test_stream_logger_given(self): + """ + Tests if a logger for a stream can be generated with log() + if the stream is explicitly given. + """ stream = io.StringIO() with tempfile.NamedTemporaryFile() as handle: @@ -107,6 +114,21 @@ def test_stream_logger(self): line_format = [line.format(name) for line in LINES['short']] self.assertSequenceEqual(line_format, stream_lines) + def test_stream_logger(self): + stdout = io.StringIO() + stderr = io.StringIO() + with contextlib.redirect_stdout(stdout): + with contextlib.redirect_stderr(stderr): + logger = utils.log('test-bot', log_path=None) + logger.info(LINES['spare'][0]) + logger.error(LINES['spare'][1]) + logger.critical(LINES['spare'][2]) + line_format = [line.format('test-bot') for line in LINES['short']] + self.assertEqual(stdout.getvalue(), line_format[0] + '\n') + self.assertEqual(stderr.getvalue(), + '\n'.join((termstyle.red(line_format[1]), + termstyle.red(line_format[2]))) + '\n') + def test_parse_logline(self): """Tests if the parse_logline() function works as expected""" line = ("2015-05-29 21:00:24,379 - malware-domain-list-collector - " diff --git a/intelmq/tests/test_conf.py b/intelmq/tests/test_conf.py index 2b78f2bb5..6132e9cd4 100644 --- a/intelmq/tests/test_conf.py +++ b/intelmq/tests/test_conf.py @@ -19,6 +19,9 @@ import intelmq.lib.harmonization as harmonization +from intelmq.lib.utils import lazy_int + + def to_json(obj): """ Transforms object into JSON with intelmq-style. @@ -68,7 +71,7 @@ def test_harmonization(self): event_copy = interpreted['event'].copy() del event_copy['raw']['description'] del event_copy['extra']['description'] - self.assertDictContainsSubset(interpreted['report'], event_copy) + self.assertGreaterEqual(event_copy.items(), interpreted['report'].items()) # check for valid regex, length and type for value in interpreted['event'].values(): @@ -142,9 +145,26 @@ def test_modules_in_bots(self): class CerberusTests(unittest.TestCase): + + cerberus_version = tuple(lazy_int(x) for x in cerberus.__version__.split('.')) + + def convert_cerberus_schema(self, schema: str) -> str: + """ + > [...] code using prior versions of cerberus would not break, but bring up wrong results! + > Rename keyschema to valueschema in your schemas. (0.9) + > Rename propertyschema to keyschema in your schemas. (1.0) + + https://docs.python-cerberus.org/en/stable/upgrading.html + """ + if self.cerberus_version >= (0, 9): + schema = schema.replace('"keyschema"', '"valueschema"') + if self.cerberus_version >= (1, 0): + schema = schema.replace('"propertyschema"', '"keyschema"') + return schema + def test_bots(self): with open(os.path.join(os.path.dirname(__file__), 'assets/bots.schema.json')) as handle: - schema = json.load(handle) + schema = json.loads(self.convert_cerberus_schema(handle.read())) with open(pkg_resources.resource_filename('intelmq', 'bots/BOTS')) as handle: bots = json.load(handle) @@ -156,9 +176,9 @@ def test_bots(self): def test_feeds(self): with open(os.path.join(os.path.dirname(__file__), 'assets/feeds.schema.json')) as handle: - schema = json.load(handle) + schema = json.loads(self.convert_cerberus_schema(handle.read())) with open(pkg_resources.resource_filename('intelmq', - 'etc/feeds.yaml')) as handle: + 'etc/feeds.yaml'), encoding='UTF-8') as handle: feeds = yaml.safe_load(handle) v = cerberus.Validator(schema) diff --git a/intelmq/version.py b/intelmq/version.py index cf6d810bf..6ccc22299 100644 --- a/intelmq/version.py +++ b/intelmq/version.py @@ -1,2 +1,2 @@ -__version_info__ = (2, 1, 1) +__version_info__ = (2, 2, 0) __version__ = '.'.join(map(str, __version_info__)) diff --git a/setup.cfg b/setup.cfg index 5e4d9fe1f..c9f27cebb 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,9 +1,6 @@ [bdist_wheel] universal = 1 -[install] -install-data=/opt/intelmq - [build_sphinx] source-dir = docs/ build-dir = docs/build @@ -21,4 +18,11 @@ ignore = E221, E722, W504 max-line-length = 132 [metadata] +name = intelmq +description = IntelMQ is a solution for IT security teams for collecting and processing security feeds using a message queuing protocol. +author = IntelMQ Community +maintainer = Wagner Sebastian +maintainer_email = wagner@cert.at +url = https://github.com/certtools/intelmq/ +license = AGPL-3.0-only license_file = LICENSE diff --git a/setup.py b/setup.py index d7fee9843..037add8d1 100644 --- a/setup.py +++ b/setup.py @@ -13,23 +13,7 @@ 'python-termstyle>=0.1.10', 'pytz>=2012c', 'redis>=2.10', -] -if sys.version_info < (3, 5): - REQUIRES.append('typing') - - -DATA = [ - ('/opt/intelmq/etc/', - ['intelmq/bots/BOTS', - ], - ), - ('/opt/intelmq/etc/examples', - ['intelmq/etc/defaults.conf', - 'intelmq/etc/harmonization.conf', - 'intelmq/etc/pipeline.conf', - 'intelmq/etc/runtime.conf', - ], - ), + 'requests>=2.2.0', ] exec(open(os.path.join(os.path.dirname(__file__), @@ -50,11 +34,12 @@ version=__version__, maintainer='Sebastian Wagner', maintainer_email='wagner@cert.at', - python_requires='>=3.4', + python_requires='>=3.5', install_requires=REQUIRES, tests_require=[ 'Cerberus!=1.3', 'pyyaml', + 'requests_mock', ], test_suite='intelmq.tests', extras_require={ @@ -85,16 +70,15 @@ 'Operating System :: POSIX :: Linux', 'Programming Language :: Python', 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.4', 'Programming Language :: Python :: 3.5', 'Programming Language :: Python :: 3.6', 'Programming Language :: Python :: 3.7', + 'Programming Language :: Python :: 3.8', 'Programming Language :: Python :: 3 :: Only', 'Programming Language :: Python :: Implementation :: CPython', 'Topic :: Security', ], keywords='incident handling cert csirt', - data_files=DATA, entry_points={ 'console_scripts': [ 'intelmqctl = intelmq.bin.intelmqctl:main',