first version

gnomikos · Jul 18, 2023 · 2c8ce73 · 2c8ce73
1 parent 1552b1d
commit 2c8ce73
Show file tree

Hide file tree

Showing 6 changed files with 333 additions and 1 deletion.
diff --git a/README.md b/README.md
@@ -33,7 +33,7 @@ The output data might be used to:
 ## How to run
 `$ python3 asprofiler.py -if asns.csv -of export.json`
 
-_asns.csv_: contains all the candidate ASes to build their profile (with format 1,2,3)
+_test.csv_: contains all the candidate ASes to build their profile (with format 1,2,3)
 
 _export.json_: contains all the exported information
 

diff --git a/analyse.py b/analyse.py
@@ -0,0 +1,146 @@
+#!/usr/bin/env python3
+
+import lib
+import ujson
+import bz2
+from collections import defaultdict
+import requests
+
+class Analyse():
+
+    def __init__(self, ixp_filename, as_to_ixp_filename, as_to_facility_filename, as_to_relationship_v4_filename, as_to_relationship_v6_filename, customer_cone_filename, asns):
+
+        self.asns = asns
+        self.ixp_info = {}
+        self.as_to_ixp_info = defaultdict(set)
+        self.as_to_facility_info = defaultdict(set)
+        self.p2c = {'v4': defaultdict(set), 'v6': defaultdict(set)}
+        self.c2p = {'v4': defaultdict(set), 'v6': defaultdict(set)}
+        self.p2p = {'v4': defaultdict(set), 'v6': defaultdict(set)}
+        self.customer_cone = {}
+        self.as2org = {}
+
+        self.ixp_filename = ixp_filename
+        self.as_to_ixp_filename = as_to_ixp_filename
+        self.as_to_facility_filename = as_to_facility_filename
+
+        self.as_to_relationship_v4_filename = as_to_relationship_v4_filename
+        self.as_to_relationship_v6_filename = as_to_relationship_v6_filename
+        self.customer_cone_filename = customer_cone_filename
+
+        self.import_ixps_info()
+        self.import_as_to_ixps_info()
+        self.import_as_faciiity_info()
+        self.import_as_to_customer_cone()
+        self.import_as_relationship(self.as_to_relationship_v4_filename, 'v4')
+        self.import_as_relationship(self.as_to_relationship_v6_filename, 'v6')
+        self.get_as_to_organisations()
+
+
+    def export_data(self, output_filename):
+
+        print('Exporting data to: '+output_filename)
+
+        data_to_export = defaultdict(dict)
+        for asn in self.asns: 
+            asn = int(asn)
+            data_to_export[asn]['asn'] = asn
+            data_to_export[asn]['orgname'] = self.as2org.get(asn)
+            data_to_export[asn]['ixps'] = self.as_to_ixp_info.get(asn, None)
+            data_to_export[asn]['facilities'] = self.as_to_facility_info.get(asn)
+            data_to_export[asn]['providers_v4'] = [(asn, self.as2org.get(asn)) for asn in self.c2p['v4'].get(asn, [])]
+            data_to_export[asn]['providers_v6'] = [(asn, self.as2org.get(asn)) for asn in self.c2p['v6'].get(asn, [])]
+            data_to_export[asn]['customers_v4'] = [(asn, self.as2org.get(asn)) for asn in self.p2c['v4'].get(asn, [])]
+            data_to_export[asn]['customers_v6'] = [(asn, self.as2org.get(asn)) for asn in self.p2c['v6'].get(asn, [])]
+            data_to_export[asn]['peers_v4'] = [(asn, self.as2org.get(asn)) for asn in self.p2p['v4'].get(asn, [])]
+            data_to_export[asn]['peers_v6'] = [(asn, self.as2org.get(asn)) for asn in self.p2p['v6'].get(asn, [])]
+            data_to_export[asn]['customer_cone'] = self.customer_cone.get(asn, None)
+
+        lib.export_json(data_to_export, output_filename)
+        print('Finished')
+
+    def get_as_to_organisations(self):
+        # Fetch for each candidate ASN its corresponding organisation name.
+
+        print('Fetching organisation names for each ASN...')        
+
+        extra_asns=set([])
+        for asn in self.asns:
+            extra_asns = extra_asns.union(self.c2p['v4'].get(asn, set()))
+            extra_asns = extra_asns.union(self.c2p['v6'].get(asn, set()))
+            extra_asns = extra_asns.union(self.p2c['v4'].get(asn, set()))
+            extra_asns = extra_asns.union(self.p2c['v6'].get(asn, set()))
+            extra_asns = extra_asns.union(self.p2p['v4'].get(asn, set()))
+            extra_asns = extra_asns.union(self.p2p['v6'].get(asn, set()))
+
+        extra_asns = [str(asn) for asn in extra_asns.union(self.asns)]
+
+        chunk = 400
+        caida_api_url= 'https://api.data.caida.org/as2org/v1/asns/'
+        for i in range(0, len(extra_asns), chunk):
+
+            caida_api_url+= '_'.join(extra_asns[i:i+chunk])
+            request_response = requests.get(url=caida_api_url).json()
+
+            for entry in request_response['data']:
+                asn = int(entry['asn'])
+                orgname = entry['orgName']
+                self.as2org[asn] = orgname
+
+            #reset query url
+            caida_api_url = 'https://api.data.caida.org/as2org/v1/asns/'
+
+
+    def import_ixps_info(self):
+
+        with open(self.ixp_filename) as f:
+            next(f)
+            for line in f:
+                data = ujson.loads(line)
+                country =  data.get('country') if not isinstance(data.get('country'), list) else data.get('country')[0]
+                city =  data.get('city') if not isinstance(data.get('city'), list) else data.get('city')[0]
+                self.ixp_info[ int(data['ix_id'])] = (data['name'], country, city)
+
+
+    def import_as_to_ixps_info(self):
+        with open(self.as_to_ixp_filename) as f:
+            next(f)
+            for line in f:
+                data = ujson.loads(line)
+                self.as_to_ixp_info[int(data['asn'])].add( self.ixp_info[int(data['ix_id'])] )
+
+
+    def import_as_faciiity_info(self):
+        data = lib.import_json(self.as_to_facility_filename)
+        for entry in data['netfac']['data']:
+
+            self.as_to_facility_info[int(entry['local_asn'])].add((entry['name'], entry.get('country'), entry.get('city')))
+
+
+    def import_as_relationship(self, filename, version):
+        with bz2.open(filename, 'rt') as file:
+            for line in file:
+                if not line.startswith('#'):
+                    line = line.strip().split('|')
+                    provider = int(line[0])
+                    customer = int(line[1])
+                    relationship = line[2]
+
+                    # provider-to-customer relations
+                    if relationship == '-1': 
+                        self.p2c[version][provider].add(customer)
+                        self.c2p[version][customer].add(provider)
+                    # peer-to-peer relations
+                    elif relationship == '0':
+                        self.p2p[version][provider].add(customer)
+                        self.p2p[version][customer].add(provider)
+
+
+    def import_as_to_customer_cone(self):
+        with bz2.open(self.customer_cone_filename, 'rt') as file:
+            for line in file:
+                if not line.startswith('#'):
+                    line = line.strip().split()
+                    self.customer_cone[int(line[0])] = [int(asn) for asn in line[1:]]
+                    self.customer_cone[int(line[0])].remove(int(line[0]))
+
diff --git a/asprofiler.py b/asprofiler.py
@@ -0,0 +1,34 @@
+#!/usr/bin/env python3
+
+import analyse
+import download
+import argparse
+import lib
+
+def main():
+    parser = argparse.ArgumentParser(description="A tool to profile Autonomous systems based on publicly available datasets")
+    parser.add_argument('-if', '--input_file', type=str, help='csv file containing the total list of ASNs (e.g. 1,2,3,4)', required=True)
+    parser.add_argument('-of', '--output_file', type=str, help='.json output filename containing all the available data for the profiled ASes', required=True)
+    args = parser.parse_args()
+
+    # Step 1: Import all the ASes based on the given input file
+    asns = lib.import_csv(args.input_file)
+
+    # Step 2: Download all the appropriate datasets from the puplicly available databases
+    download_ = download.Download()
+
+    # Step 3: Sort and categorize downloaded datasets
+    analyse_ = analyse.Analyse(
+                download_.ixp_filename,
+                download_.as_to_ixp_filename, 
+                download_.peeringdb_filename, 
+                download_.as_relations_v4_filename, 
+                download_.as_relations_v6_filename, 
+                download_.as_cust_cone_filename,
+                asns)
+
+    # Step 4: Profile the ASes and export the available data
+    analyse_.export_data(args.output_file)
+
+if __name__ == '__main__':
+    main()
diff --git a/download.py b/download.py
@@ -0,0 +1,119 @@
+#!/usr/bin/env python3
+
+import datetime
+from bs4 import BeautifulSoup
+import requests
+import wget
+import os
+
+class Download():
+
+    def __init__(self):
+        self.ixp_filename = None
+        self.as_to_ixp_filename = None
+        self.peeringdb_filename = None
+        self.as_relations_v4_filename = None
+        self.as_relations_v6_filename = None
+        self.as_cust_cone_filename = None
+
+        print('Searching for the most recent datasets...')
+        self.download_caida_relationship_cone_datasets()
+        self.download_peeringdb()
+        self.download_caida_ixp_asn_datasets()
+
+
+    def download_file(self, webpage, filename):
+        if os.path.exists('./' + filename):
+            print('Skipping, already exists:', filename)
+        else:
+            print('Downloading: ' + webpage+filename)
+            wget.download(webpage + filename)
+            print()
+
+
+    def download_caida_ixp_asn_datasets(self):
+        webpage = 'https://publicdata.caida.org/datasets/ixps/'
+        html_page = requests.get(webpage)
+        soup = BeautifulSoup(html_page.text, "lxml")
+
+        ixs = []
+        ix_asns = []
+        for link in soup.findAll('a'):
+            if link.has_attr('href'):
+
+                if 'ixs_' in link['href']:
+                    ixs.append(link['href'])
+                elif 'ix-asns_' in link['href']:
+                    ix_asns.append(link['href'])
+
+        #To find the most recent snapshot
+        ixs.sort()
+        ix_asns.sort()
+
+        self.download_file(webpage, ixs[-1])
+        self.download_file(webpage, ix_asns[-1])
+        self.ixp_filename = ixs[-1]
+        self.as_to_ixp_filename = ix_asns[-1]
+
+
+    def download_peeringdb(self):
+
+        webpage = 'https://publicdata.caida.org/datasets/peeringdb/'
+
+        previous_date = (datetime.datetime.today() - datetime.timedelta(days=1))
+        year = str(previous_date.year)
+        month = '{:02d}'.format(previous_date.month)
+        day = '{:02d}'.format(previous_date.day)
+
+        webpage += year+'/'+month+'/'
+        file = 'peeringdb_2_dump_' +year+'_'+month+'_'+day+'.json'
+        self.download_file(webpage, file)
+        self.peeringdb_filename =  file
+
+
+    def download_caida_relationship_cone_datasets(self):
+
+        webpage = 'https://publicdata.caida.org/datasets/as-relationships/serial-2/'
+        html_page = requests.get(webpage)
+        soup = BeautifulSoup(html_page.text, "lxml")
+
+        as_relations_v4 = []
+
+        for link in soup.findAll('a'):
+            if link.has_attr('href'):
+
+                if 'as-rel2.txt.bz2' in link['href']:
+                    as_relations_v4.append(link['href'])
+
+        #To find the most recent snapshot
+        as_relations_v4.sort()
+
+        self.download_file(webpage, as_relations_v4[-1])
+        self.as_relations_v4_filename = as_relations_v4[-1]
+
+
+        webpage = 'https://publicdata.caida.org/datasets/2013-asrank-data-supplement/data/'
+        html_page = requests.get(webpage)
+        soup = BeautifulSoup(html_page.text, "lxml")
+
+        as_relations_v6 = []
+        as_cust_cone = []
+
+        for link in soup.findAll('a'):
+            if link.has_attr('href'):
+
+                if 'as-rel.v6-stable.txt.bz2' in link['href']:
+                    as_relations_v6.append(link['href'])
+                elif 'ppdc-ases.txt.bz2' in link['href']:
+                    as_cust_cone.append(link['href'])
+
+        #To find the most recent snapshot
+        as_relations_v6.sort()
+        as_cust_cone.sort()
+
+        self.download_file(webpage, as_relations_v6[-1])
+        self.download_file(webpage, as_cust_cone[-1])
+        self.as_relations_v6_filename = as_relations_v6[-1]
+        self.as_cust_cone_filename = as_cust_cone[-1]
+
+
diff --git a/lib.py b/lib.py
@@ -0,0 +1,32 @@
+
+import ujson as json
+import csv
+
+def import_csv(filename):
+    try:
+        asns = []
+        with open(filename, 'r') as fp:
+            reader = csv.reader(fp)
+            for row in reader:
+                asns+=row
+        return set([int(asn) for asn in asns])
+    except Exception:
+        print('Import Error with file:', filename)
+        exit()
+
+def import_json(filename):
+    try:
+        with open(filename, 'r') as fp:
+            data = json.load(fp)
+        return data
+    except Exception:
+        print('Import Error with file:', filename)
+        exit()
+
+def export_json(data, filename):
+    try:
+        with open(filename, 'w') as fp:
+            json.dump(data, fp)
+    except Exception:
+        print('Export Error with file:', filename)
+        exit()
diff --git a/test.csv b/test.csv
@@ -0,0 +1 @@
+1,2,3