diff --git a/README.md b/README.md index 584b70c..92ad25b 100644 --- a/README.md +++ b/README.md @@ -33,7 +33,7 @@ The output data might be used to: ## How to run `$ python3 asprofiler.py -if asns.csv -of export.json` -_asns.csv_: contains all the candidate ASes to build their profile (with format 1,2,3) +_test.csv_: contains all the candidate ASes to build their profile (with format 1,2,3) _export.json_: contains all the exported information diff --git a/analyse.py b/analyse.py new file mode 100644 index 0000000..20092ea --- /dev/null +++ b/analyse.py @@ -0,0 +1,146 @@ +#!/usr/bin/env python3 + +import lib +import ujson +import bz2 +from collections import defaultdict +import requests + +class Analyse(): + + def __init__(self, ixp_filename, as_to_ixp_filename, as_to_facility_filename, as_to_relationship_v4_filename, as_to_relationship_v6_filename, customer_cone_filename, asns): + + self.asns = asns + self.ixp_info = {} + self.as_to_ixp_info = defaultdict(set) + self.as_to_facility_info = defaultdict(set) + self.p2c = {'v4': defaultdict(set), 'v6': defaultdict(set)} + self.c2p = {'v4': defaultdict(set), 'v6': defaultdict(set)} + self.p2p = {'v4': defaultdict(set), 'v6': defaultdict(set)} + self.customer_cone = {} + self.as2org = {} + + self.ixp_filename = ixp_filename + self.as_to_ixp_filename = as_to_ixp_filename + self.as_to_facility_filename = as_to_facility_filename + + self.as_to_relationship_v4_filename = as_to_relationship_v4_filename + self.as_to_relationship_v6_filename = as_to_relationship_v6_filename + self.customer_cone_filename = customer_cone_filename + + self.import_ixps_info() + self.import_as_to_ixps_info() + self.import_as_faciiity_info() + self.import_as_to_customer_cone() + self.import_as_relationship(self.as_to_relationship_v4_filename, 'v4') + self.import_as_relationship(self.as_to_relationship_v6_filename, 'v6') + self.get_as_to_organisations() + + + def export_data(self, output_filename): + + print('Exporting data to: '+output_filename) + + data_to_export = defaultdict(dict) + for asn in self.asns: + asn = int(asn) + data_to_export[asn]['asn'] = asn + data_to_export[asn]['orgname'] = self.as2org.get(asn) + data_to_export[asn]['ixps'] = self.as_to_ixp_info.get(asn, None) + data_to_export[asn]['facilities'] = self.as_to_facility_info.get(asn) + data_to_export[asn]['providers_v4'] = [(asn, self.as2org.get(asn)) for asn in self.c2p['v4'].get(asn, [])] + data_to_export[asn]['providers_v6'] = [(asn, self.as2org.get(asn)) for asn in self.c2p['v6'].get(asn, [])] + data_to_export[asn]['customers_v4'] = [(asn, self.as2org.get(asn)) for asn in self.p2c['v4'].get(asn, [])] + data_to_export[asn]['customers_v6'] = [(asn, self.as2org.get(asn)) for asn in self.p2c['v6'].get(asn, [])] + data_to_export[asn]['peers_v4'] = [(asn, self.as2org.get(asn)) for asn in self.p2p['v4'].get(asn, [])] + data_to_export[asn]['peers_v6'] = [(asn, self.as2org.get(asn)) for asn in self.p2p['v6'].get(asn, [])] + data_to_export[asn]['customer_cone'] = self.customer_cone.get(asn, None) + + lib.export_json(data_to_export, output_filename) + print('Finished') + + def get_as_to_organisations(self): + # Fetch for each candidate ASN its corresponding organisation name. + + print('Fetching organisation names for each ASN...') + + extra_asns=set([]) + for asn in self.asns: + extra_asns = extra_asns.union(self.c2p['v4'].get(asn, set())) + extra_asns = extra_asns.union(self.c2p['v6'].get(asn, set())) + extra_asns = extra_asns.union(self.p2c['v4'].get(asn, set())) + extra_asns = extra_asns.union(self.p2c['v6'].get(asn, set())) + extra_asns = extra_asns.union(self.p2p['v4'].get(asn, set())) + extra_asns = extra_asns.union(self.p2p['v6'].get(asn, set())) + + extra_asns = [str(asn) for asn in extra_asns.union(self.asns)] + + chunk = 400 + caida_api_url= 'https://api.data.caida.org/as2org/v1/asns/' + for i in range(0, len(extra_asns), chunk): + + caida_api_url+= '_'.join(extra_asns[i:i+chunk]) + request_response = requests.get(url=caida_api_url).json() + + for entry in request_response['data']: + asn = int(entry['asn']) + orgname = entry['orgName'] + self.as2org[asn] = orgname + + #reset query url + caida_api_url = 'https://api.data.caida.org/as2org/v1/asns/' + + + def import_ixps_info(self): + + with open(self.ixp_filename) as f: + next(f) + for line in f: + data = ujson.loads(line) + country = data.get('country') if not isinstance(data.get('country'), list) else data.get('country')[0] + city = data.get('city') if not isinstance(data.get('city'), list) else data.get('city')[0] + self.ixp_info[ int(data['ix_id'])] = (data['name'], country, city) + + + def import_as_to_ixps_info(self): + with open(self.as_to_ixp_filename) as f: + next(f) + for line in f: + data = ujson.loads(line) + self.as_to_ixp_info[int(data['asn'])].add( self.ixp_info[int(data['ix_id'])] ) + + + def import_as_faciiity_info(self): + data = lib.import_json(self.as_to_facility_filename) + for entry in data['netfac']['data']: + + self.as_to_facility_info[int(entry['local_asn'])].add((entry['name'], entry.get('country'), entry.get('city'))) + + + def import_as_relationship(self, filename, version): + with bz2.open(filename, 'rt') as file: + for line in file: + if not line.startswith('#'): + line = line.strip().split('|') + provider = int(line[0]) + customer = int(line[1]) + relationship = line[2] + + # provider-to-customer relations + if relationship == '-1': + self.p2c[version][provider].add(customer) + self.c2p[version][customer].add(provider) + # peer-to-peer relations + elif relationship == '0': + self.p2p[version][provider].add(customer) + self.p2p[version][customer].add(provider) + + + def import_as_to_customer_cone(self): + with bz2.open(self.customer_cone_filename, 'rt') as file: + for line in file: + if not line.startswith('#'): + line = line.strip().split() + self.customer_cone[int(line[0])] = [int(asn) for asn in line[1:]] + self.customer_cone[int(line[0])].remove(int(line[0])) + diff --git a/asprofiler.py b/asprofiler.py new file mode 100644 index 0000000..7cfe96f --- /dev/null +++ b/asprofiler.py @@ -0,0 +1,34 @@ +#!/usr/bin/env python3 + +import analyse +import download +import argparse +import lib + +def main(): + parser = argparse.ArgumentParser(description="A tool to profile Autonomous systems based on publicly available datasets") + parser.add_argument('-if', '--input_file', type=str, help='csv file containing the total list of ASNs (e.g. 1,2,3,4)', required=True) + parser.add_argument('-of', '--output_file', type=str, help='.json output filename containing all the available data for the profiled ASes', required=True) + args = parser.parse_args() + + # Step 1: Import all the ASes based on the given input file + asns = lib.import_csv(args.input_file) + + # Step 2: Download all the appropriate datasets from the puplicly available databases + download_ = download.Download() + + # Step 3: Sort and categorize downloaded datasets + analyse_ = analyse.Analyse( + download_.ixp_filename, + download_.as_to_ixp_filename, + download_.peeringdb_filename, + download_.as_relations_v4_filename, + download_.as_relations_v6_filename, + download_.as_cust_cone_filename, + asns) + + # Step 4: Profile the ASes and export the available data + analyse_.export_data(args.output_file) + +if __name__ == '__main__': + main() diff --git a/download.py b/download.py new file mode 100644 index 0000000..66488da --- /dev/null +++ b/download.py @@ -0,0 +1,119 @@ +#!/usr/bin/env python3 + +import datetime +from bs4 import BeautifulSoup +import requests +import wget +import os + +class Download(): + + def __init__(self): + self.ixp_filename = None + self.as_to_ixp_filename = None + self.peeringdb_filename = None + self.as_relations_v4_filename = None + self.as_relations_v6_filename = None + self.as_cust_cone_filename = None + + print('Searching for the most recent datasets...') + self.download_caida_relationship_cone_datasets() + self.download_peeringdb() + self.download_caida_ixp_asn_datasets() + + + def download_file(self, webpage, filename): + if os.path.exists('./' + filename): + print('Skipping, already exists:', filename) + else: + print('Downloading: ' + webpage+filename) + wget.download(webpage + filename) + print() + + + def download_caida_ixp_asn_datasets(self): + webpage = 'https://publicdata.caida.org/datasets/ixps/' + html_page = requests.get(webpage) + soup = BeautifulSoup(html_page.text, "lxml") + + ixs = [] + ix_asns = [] + for link in soup.findAll('a'): + if link.has_attr('href'): + + if 'ixs_' in link['href']: + ixs.append(link['href']) + elif 'ix-asns_' in link['href']: + ix_asns.append(link['href']) + + #To find the most recent snapshot + ixs.sort() + ix_asns.sort() + + self.download_file(webpage, ixs[-1]) + self.download_file(webpage, ix_asns[-1]) + self.ixp_filename = ixs[-1] + self.as_to_ixp_filename = ix_asns[-1] + + + def download_peeringdb(self): + + webpage = 'https://publicdata.caida.org/datasets/peeringdb/' + + previous_date = (datetime.datetime.today() - datetime.timedelta(days=1)) + year = str(previous_date.year) + month = '{:02d}'.format(previous_date.month) + day = '{:02d}'.format(previous_date.day) + + webpage += year+'/'+month+'/' + file = 'peeringdb_2_dump_' +year+'_'+month+'_'+day+'.json' + self.download_file(webpage, file) + self.peeringdb_filename = file + + + def download_caida_relationship_cone_datasets(self): + + webpage = 'https://publicdata.caida.org/datasets/as-relationships/serial-2/' + html_page = requests.get(webpage) + soup = BeautifulSoup(html_page.text, "lxml") + + as_relations_v4 = [] + + for link in soup.findAll('a'): + if link.has_attr('href'): + + if 'as-rel2.txt.bz2' in link['href']: + as_relations_v4.append(link['href']) + + #To find the most recent snapshot + as_relations_v4.sort() + + self.download_file(webpage, as_relations_v4[-1]) + self.as_relations_v4_filename = as_relations_v4[-1] + + + webpage = 'https://publicdata.caida.org/datasets/2013-asrank-data-supplement/data/' + html_page = requests.get(webpage) + soup = BeautifulSoup(html_page.text, "lxml") + + as_relations_v6 = [] + as_cust_cone = [] + + for link in soup.findAll('a'): + if link.has_attr('href'): + + if 'as-rel.v6-stable.txt.bz2' in link['href']: + as_relations_v6.append(link['href']) + elif 'ppdc-ases.txt.bz2' in link['href']: + as_cust_cone.append(link['href']) + + #To find the most recent snapshot + as_relations_v6.sort() + as_cust_cone.sort() + + self.download_file(webpage, as_relations_v6[-1]) + self.download_file(webpage, as_cust_cone[-1]) + self.as_relations_v6_filename = as_relations_v6[-1] + self.as_cust_cone_filename = as_cust_cone[-1] + + diff --git a/lib.py b/lib.py new file mode 100644 index 0000000..895e97c --- /dev/null +++ b/lib.py @@ -0,0 +1,32 @@ + +import ujson as json +import csv + +def import_csv(filename): + try: + asns = [] + with open(filename, 'r') as fp: + reader = csv.reader(fp) + for row in reader: + asns+=row + return set([int(asn) for asn in asns]) + except Exception: + print('Import Error with file:', filename) + exit() + +def import_json(filename): + try: + with open(filename, 'r') as fp: + data = json.load(fp) + return data + except Exception: + print('Import Error with file:', filename) + exit() + +def export_json(data, filename): + try: + with open(filename, 'w') as fp: + json.dump(data, fp) + except Exception: + print('Export Error with file:', filename) + exit() diff --git a/test.csv b/test.csv new file mode 100644 index 0000000..b0246d5 --- /dev/null +++ b/test.csv @@ -0,0 +1 @@ +1,2,3