Skip to content

Commit 3d6a87e

Browse files
committed
Updates in response to feedback.
1 parent b279b69 commit 3d6a87e

File tree

6 files changed

+130
-37
lines changed

6 files changed

+130
-37
lines changed

intelmq/bots/collectors/shadowserver/collector_reports_api.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,12 +68,19 @@ def init(self):
6868

6969
if self.file_format is not None:
7070
if not (self.file_format == 'csv'):
71-
raise ValueError('Invalid file_format')
71+
raise ValueError("Invalid file_format '%s'. Must be 'csv'." % self.file_format)
7272
else:
7373
self.file_format = 'csv'
7474

7575
self.preamble = f'{{ "apikey": "{self.api_key}" '
7676

77+
def check(parameters: dict):
78+
for key in parameters:
79+
if key == 'file_format' and parameters[key] != 'csv':
80+
return [["error", "Invalid file_format '%s'. Must be 'csv'." % parameters[key]]]
81+
elif key == 'country':
82+
return [["warning", "Deprecated parameter 'country' found. Please use 'reports' instead. The backwards-compatibility will be removed in IntelMQ version 4.0.0."]]
83+
7784
def _headers(self, data):
7885
return {'HMAC2': hmac.new(self.secret.encode(), data.encode('utf-8'), digestmod=hashlib.sha256).hexdigest()}
7986

intelmq/bots/parsers/shadowserver/README.md

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,16 +7,28 @@ This module is maintained by [The Shadowserver Foundation](https://www.shadowser
77

88
Please contact intelmq@shadowserver.org with any issues or concerns.
99

10-
The report configuration is now stored in a _schema.json_ file downloaded from https://interchange.shadowserver.org/intelmq/v1/schema.
10+
The report configuration is now stored in a _shadowserver-schema.json_ file downloaded from https://interchange.shadowserver.org/intelmq/v1/schema.
1111

12-
For environments that have internet connectivity the `update_schema.py` script should be called from a cron job to obtain the latest revision.
13-
The parser will attempt to download a schema update on startup unless INTELMQ_SKIP_INTERNET is set.
12+
The parser will attempt to download a schema update on startup when the *auto_update* option is enabled.
1413

15-
For air-gapped systems automation will be required to download and copy the _schema.json_ file into this directory.
14+
Schema downloads can also be scheduled as a cron job:
15+
16+
```
17+
02 01 * * * intelmq.bots.parsers.shadowserver.parser --update-schema
18+
```
19+
20+
For air-gapped systems automation will be required to download and copy the file to VAR_STATE_PATH/shadowserver-schema.json.
1621

1722
The parser will automatically reload the configuration when the file changes.
1823

1924

25+
## Schema contract
26+
27+
Once set the `classification.identifier`, `classification.taxonomy`, and `classification.type` fields will remain static.
28+
29+
Once set report fields will not be deleted.
30+
31+
2032
## Sample configuration:
2133

2234
```
@@ -46,6 +58,7 @@ shadowserver-parser:
4658
parameters:
4759
destination_queues:
4860
_default: [file-output-queue]
61+
auto_update: true
4962
run_mode: continuous
5063
```
5164

intelmq/bots/parsers/shadowserver/_config.py

Lines changed: 38 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -82,20 +82,23 @@
8282
import base64
8383
import binascii
8484
import json
85-
import urllib.request
8685
import tempfile
8786
from typing import Optional, Dict, Tuple, Any
8887

8988
import intelmq.lib.harmonization as harmonization
89+
from intelmq.lib.utils import create_request_session
90+
from intelmq import VAR_STATE_PATH
9091

9192

9293
class __Container:
9394
pass
9495

9596

9697
__config = __Container()
97-
__config.schema_file = os.path.join(os.path.dirname(__file__), 'schema.json')
98+
__config.schema_file = os.path.join(VAR_STATE_PATH, 'shadowserver-schema.json')
99+
__config.schema_base = os.path.join(os.path.dirname(__file__), 'schema.json.test')
98100
__config.schema_mtime = 0.0
101+
__config.auto_update = False
99102
__config.feedname_mapping = {}
100103
__config.filename_mapping = {}
101104

@@ -105,13 +108,16 @@ def set_logger(logger):
105108
__config.logger = logger
106109

107110

111+
def enable_auto_update(enable):
112+
""" Enable automatic schema update. """
113+
__config.auto_update = enable
114+
115+
108116
def get_feed_by_feedname(given_feedname: str) -> Optional[Dict[str, Any]]:
109-
reload()
110117
return __config.feedname_mapping.get(given_feedname, None)
111118

112119

113120
def get_feed_by_filename(given_filename: str) -> Optional[Tuple[str, Dict[str, Any]]]:
114-
reload()
115121
return __config.filename_mapping.get(given_filename, None)
116122

117123

@@ -289,19 +295,18 @@ def reload():
289295
else:
290296
__config.logger.info("The schema file does not exist.")
291297

292-
if __config.schema_mtime == 0.0 and mtime == 0.0 and not os.environ.get('INTELMQ_SKIP_INTERNET'):
293-
__config.logger.info("Attempting to download schema.")
298+
if __config.schema_mtime == 0.0 and mtime == 0.0 and __config.auto_update:
294299
update_schema()
295300

296301
__config.feedname_mapping.clear()
297302
__config.filename_mapping.clear()
298-
for schema_file in [__config.schema_file, ".".join([__config.schema_file, 'test'])]:
303+
for schema_file in [__config.schema_file, __config.schema_base]:
299304
if os.path.isfile(schema_file):
300305
with open(schema_file) as fh:
301306
schema = json.load(fh)
302307
for report in schema:
303308
if report == "_meta":
304-
__config.logger.info("Loading schema %s." % schema[report]['date_created'])
309+
__config.logger.info("Loading schema %r." % schema[report]['date_created'])
305310
for msg in schema[report]['change_log']:
306311
__config.logger.info(msg)
307312
else:
@@ -313,37 +318,55 @@ def reload():
313318
def update_schema():
314319
""" download the latest configuration """
315320
if os.environ.get('INTELMQ_SKIP_INTERNET'):
316-
return None
321+
return False
317322

318-
(th, tmp) = tempfile.mkstemp(dir=os.path.dirname(__file__))
323+
# download the schema to a temp file
324+
(th, tmp) = tempfile.mkstemp(dir=VAR_STATE_PATH)
319325
url = 'https://interchange.shadowserver.org/intelmq/v1/schema'
326+
__config.logger.info("Attempting to download schema from %r" % url)
327+
__config.logger.debug("Using temp file %r for the download." % tmp)
320328
try:
321-
urllib.request.urlretrieve(url, tmp)
329+
with create_request_session() as session:
330+
with session.get(url, stream=True) as r:
331+
r.raise_for_status()
332+
with open(tmp, 'wb') as f:
333+
for chunk in r.iter_content(chunk_size=8192):
334+
f.write(chunk)
322335
except:
323-
raise ValueError("Failed to download %r" % url)
336+
__config.logger.error("Failed to download %r" % url)
337+
return False
338+
__config.logger.info("Download successful.")
324339

325340
new_version = ''
326341
old_version = ''
327342

328343
try:
344+
# validate the downloaded file
329345
with open(tmp) as fh:
330346
schema = json.load(fh)
331347
new_version = schema['_meta']['date_created']
332348
except:
333349
# leave tempfile behind for diagnosis
334-
raise ValueError("Failed to validate %r" % tmp)
350+
__config.logger.error("Failed to validate %r" % tmp)
351+
return False
335352

336353
if os.path.exists(__config.schema_file):
354+
# compare the new version against the old; rename the existing file
337355
try:
338356
with open(__config.schema_file) as fh:
339357
schema = json.load(fh)
340358
old_version = schema['_meta']['date_created']
341359
if new_version != old_version:
342360
os.replace(__config.schema_file, ".".join([__config.schema_file, 'bak']))
343-
except:
344-
pass
361+
except Exception as e:
362+
__config.logger.error("Unable to replace schema file: %s" % str(e))
363+
return False
345364

346365
if new_version != old_version:
347366
os.replace(tmp, __config.schema_file)
367+
__config.logger.info("New schema version is %r." % new_version)
368+
return True
348369
else:
349370
os.unlink(tmp)
371+
372+
return False

intelmq/bots/parsers/shadowserver/parser.py

Lines changed: 39 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@
2626

2727
from intelmq.lib.bot import ParserBot
2828
from intelmq.lib.exceptions import InvalidKey, InvalidValue
29+
from intelmq.bin.intelmqctl import IntelMQController
30+
import intelmq.lib.utils as utils
2931
import intelmq.bots.parsers.shadowserver._config as config
3032

3133

@@ -34,8 +36,7 @@ class ShadowserverParserBot(ParserBot):
3436
Parse all ShadowServer feeds
3537
3638
Parameters:
37-
schema_file (str): Path to the report schema file
38-
39+
auto_update (boolean): Enable automatic schema download
3940
"""
4041

4142
recover_line = ParserBot.recover_line_csv_dict
@@ -45,13 +46,15 @@ class ShadowserverParserBot(ParserBot):
4546
feedname = None
4647
_mode = None
4748
overwrite = False
49+
auto_update = False
4850

4951
def init(self):
5052
config.set_logger(self.logger)
51-
try:
52-
config.update_schema()
53-
except Exception as e:
54-
self.logger.warning("Schema update failed: %s." % e)
53+
if self.auto_update:
54+
config.enable_auto_update(True)
55+
self.logger.debug("Feature 'auto_update' is enabled.")
56+
config.reload()
57+
5558
if self.feedname is not None:
5659
self._sparser_config = config.get_feed_by_feedname(self.feedname)
5760
if self._sparser_config:
@@ -228,5 +231,35 @@ def parse_line(self, row, report):
228231
def shutdown(self):
229232
self.feedname = None
230233

234+
@classmethod
235+
def _create_argparser(cls):
236+
argparser = super()._create_argparser()
237+
argparser.add_argument("--update-schema", action='store_true', help='downloads latest report schema')
238+
argparser.add_argument("--verbose", action='store_true', help='be verbose')
239+
return argparser
240+
241+
@classmethod
242+
def run(cls, parsed_args=None):
243+
if not parsed_args:
244+
parsed_args = cls._create_argparser().parse_args()
245+
if parsed_args.update_schema:
246+
logger = utils.log(__name__, log_path=None)
247+
if parsed_args.verbose:
248+
logger.setLevel('INFO')
249+
else:
250+
logger.setLevel('ERROR')
251+
config.set_logger(logger)
252+
if config.update_schema():
253+
runtime_conf = utils.get_bots_settings()
254+
try:
255+
ctl = IntelMQController()
256+
for bot in runtime_conf:
257+
if runtime_conf[bot]["module"] == __name__ and runtime_conf[bot]['parameters'].get('auto_update', True):
258+
ctl.bot_reload(bot)
259+
except Exception as e:
260+
logger.error("Failed to signal bot: %r" % str(e))
261+
else:
262+
super().run(parsed_args=parsed_args)
263+
231264

232265
BOT = ShadowserverParserBot

intelmq/bots/parsers/shadowserver/update_schema.py

Lines changed: 0 additions & 11 deletions
This file was deleted.
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
# SPDX-FileCopyrightText: 2023 The Shadowserver Foundation
2+
#
3+
# SPDX-License-Identifier: AGPL-3.0-or-later
4+
5+
# -*- coding: utf-8 -*-
6+
"""
7+
Created on Thu Jul 27 19:44:44 2023
8+
9+
"""
10+
11+
import unittest
12+
import os
13+
import logging
14+
from intelmq import VAR_STATE_PATH
15+
import intelmq.bots.parsers.shadowserver._config as config
16+
import intelmq.lib.utils as utils
17+
import intelmq.lib.test as test
18+
19+
@test.skip_internet()
20+
class TestShadowserverSchemaDownload(unittest.TestCase):
21+
22+
def test_download(self):
23+
schema_file = os.path.join(VAR_STATE_PATH, 'shadowserver-schema.json')
24+
config.set_logger(utils.log('test-bot', log_path=None))
25+
if os.path.exists(schema_file):
26+
os.unlink(schema_file)
27+
self.assertEqual(True, config.update_schema())
28+
self.assertEqual(True, os.path.exists(schema_file))

0 commit comments

Comments
 (0)