Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Initial test suite #142

Open
wants to merge 16 commits into
base: dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
- id: name-tests-test
- id: requirements-txt-fixer
- id: flake8

- repo: git://github.com/asottile/reorder_python_imports
sha: 3d86483455ab5bd06cc1069fdd5ac57be5463f10
hooks:
Expand Down
2 changes: 1 addition & 1 deletion combine/baler.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ def bale_CRITs_indicator(base_url, data, indicator_que):
logger.info("don't yet know what to do with: %s[%s]" % (indicator['indicator_type'], indicator['indicator']))


def bale_CRITs(harvest, filename):
def bale_CRITs(harvest):
""" taking the output from combine and pushing it to the CRITs web API"""
# checking the minimum requirements for parameters
# it would be nice to have some metadata on the feeds that can be imported in the intel library:
Expand Down
68 changes: 50 additions & 18 deletions combine/winnower.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,13 @@


def load_gi_org(filename):
# no return function because gi_org is scoped to the module
# ugly hack
with open(filename, 'rb') as f:
org_reader = csv.DictReader(f, fieldnames=['start', 'end', 'org'])
for row in org_reader:
gi_org[row['start']] = (IPRange(row['start'], row['end']), unicode(row['org'], errors='replace'))

return gi_org


def org_by_addr(address):
as_num = None
Expand All @@ -52,7 +52,6 @@ def maxhits(dns_records):
hmax = 0
hostname = None
for record in dns_records:
# logger.info("examining %s" % record)
if record['count'] > hmax:
hmax = record['count']
hostname = record['rrname'].rstrip('.')
Expand Down Expand Up @@ -119,44 +118,44 @@ def reserved(address):


def is_ipv4(address):
if re.match('(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$', address):
if re.match(r'(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$', address):
return True
else:
return False


def is_fqdn(address):
if re.match(r'(?=^.{4,255}$)(^((?!-)[a-zA-Z0-9-]{1,63}(?<!-)\.)+[a-zA-Z]{2,63}$)', address):
return True
return False


def is_ipv6(address):
ipv6_address = re.compile('^(?:(?:[0-9A-Fa-f]{1,4}:){6}(?:[0-9A-Fa-f]{1,4}:[0-9A-Fa-f]{1,4}|(?:(?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\\.){3}(?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5]))|::(?:[0-9A-Fa-f]{1,4}:){5}(?:[0-9A-Fa-f]{1,4}:[0-9A-Fa-f]{1,4}|(?:(?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\\.){3}(?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5]))|(?:[0-9A-Fa-f]{1,4})?::(?:[0-9A-Fa-f]{1,4}:){4}(?:[0-9A-Fa-f]{1,4}:[0-9A-Fa-f]{1,4}|(?:(?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\\.){3}(?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5]))|(?:[0-9A-Fa-f]{1,4}:[0-9A-Fa-f]{1,4})?::(?:[0-9A-Fa-f]{1,4}:){3}(?:[0-9A-Fa-f]{1,4}:[0-9A-Fa-f]{1,4}|(?:(?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\\.){3}(?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5]))|(?:(?:[0-9A-Fa-f]{1,4}:){,2}[0-9A-Fa-f]{1,4})?::(?:[0-9A-Fa-f]{1,4}:){2}(?:[0-9A-Fa-f]{1,4}:[0-9A-Fa-f]{1,4}|(?:(?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\\.){3}(?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5]))|(?:(?:[0-9A-Fa-f]{1,4}:){,3}[0-9A-Fa-f]{1,4})?::[0-9A-Fa-f]{1,4}:(?:[0-9A-Fa-f]{1,4}:[0-9A-Fa-f]{1,4}|(?:(?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\\.){3}(?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5]))|(?:(?:[0-9A-Fa-f]{1,4}:){,4}[0-9A-Fa-f]{1,4})?::(?:[0-9A-Fa-f]{1,4}:[0-9A-Fa-f]{1,4}|(?:(?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\\.){3}(?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5]))|(?:(?:[0-9A-Fa-f]{1,4}:){,5}[0-9A-Fa-f]{1,4})?::[0-9A-Fa-f]{1,4}|(?:(?:[0-9A-Fa-f]{1,4}:){,6}[0-9A-Fa-f]{1,4})?::)$')
if re.match(ipv6_address, address):
return True
return False


def winnow(in_file, out_file, enr_file):
config = ConfigParser.SafeConfigParser(allow_no_value=True)
cfg_success = config.read('combine.cfg')
if not cfg_success:
logger.error('Winnower: Could not read combine.cfg.')
logger.error('HINT: edit combine-example.cfg and save as combine.cfg.')
return

if not os.path.isfile('./tld-list.txt'):
uniaccept.refreshtlddb("./tld-list.txt")

server = config.get('Winnower', 'dnsdb_server')
api = config.get('Winnower', 'dnsdb_api')
def check_enrich_ip(config):
enrich_ip = config.getboolean('Winnower', 'enrich_ip')
if enrich_ip:
logger.info('Enriching IPv4 indicators: TRUE')
else:
logger.info('Enriching IPv4 indicators: FALSE')
return enrich_ip


def check_enrich_dns(config):
enrich_dns = config.getboolean('Winnower', 'enrich_dns')
if enrich_dns:
logger.info('Enriching DNS indicators: TRUE')
else:
logger.info('Enriching DNS indicators: FALSE')
return enrich_dns


def check_enrich_hash(config):
enrich_hash = config.getboolean('Winnower', 'enrich_hash')
if enrich_hash:
logger.info('Enriching Hash indicators: TRUE')
Expand All @@ -165,11 +164,44 @@ def winnow(in_file, out_file, enr_file):

logger.info('Setting up DNSDB client')


def setup_dnsdb(server, api):
logger.info('Setting up DNSDB client')
# handle the case where we aren't using DNSDB
dnsdb = dnsdb_query.DnsdbClient(server, api)
if api == 'YOUR_API_KEY_HERE' or len(dnsdb.query_rdata_name('google.com')) == 0:
dnsdb = None
logger.info('Invalid DNSDB configuration found')
return dnsdb


def winnow(in_file, out_file, enr_file):
config = ConfigParser.SafeConfigParser(allow_no_value=True)
cfg_success = config.read('combine.cfg')
if not cfg_success:
logger.error('Winnower: Could not read combine.cfg.')
logger.error('HINT: edit combine-example.cfg and save as combine.cfg.')
return

if not os.path.isfile('./tld-list.txt'):
uniaccept.refreshtlddb("./tld-list.txt")

server = config.get('Winnower', 'dnsdb_server')
api = config.get('Winnower', 'dnsdb_api')

enrich_ip = check_enrich_ip(config)
enrich_dns = check_enrich_dns(config)
if enrich_dns or enrich_ip:
server = config.get('Winnower', 'dnsdb_server')
api = config.get('Winnower', 'dnsdb_api')
else:
server = None
api = None

if server and api:
dnsdb = setup_dnsdb(server, api)
else:
dnsdb = None

with open(in_file, 'rb') as f:
crop = json.load(f)
Expand Down Expand Up @@ -207,7 +239,7 @@ def winnow(in_file, out_file, enr_file):
else:
enriched.append(dict(each.items() + enrich_IPv4(ipaddr, geo_data).items()))
else:
logger.error('Found invalid address: %s from: %s' % (indicator, each['source']))
logger.error('Found invalid address: %s', indicator)
elif (indicator_type == 'IPv4' or indicator_type == 'IPv6') and is_ipv6(indicator): # generic cleanup
each['indicator_type'] = 'IPv6'
wheat.append(each)
Expand Down