Skip to content

Commit

Permalink
first version
Browse files Browse the repository at this point in the history
  • Loading branch information
george committed Jul 18, 2023
1 parent 1552b1d commit 2c8ce73
Show file tree
Hide file tree
Showing 6 changed files with 333 additions and 1 deletion.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ The output data might be used to:
## How to run
`$ python3 asprofiler.py -if asns.csv -of export.json`

_asns.csv_: contains all the candidate ASes to build their profile (with format 1,2,3)
_test.csv_: contains all the candidate ASes to build their profile (with format 1,2,3)

_export.json_: contains all the exported information

Expand Down
146 changes: 146 additions & 0 deletions analyse.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
#!/usr/bin/env python3

import lib
import ujson
import bz2
from collections import defaultdict
import requests

class Analyse():

def __init__(self, ixp_filename, as_to_ixp_filename, as_to_facility_filename, as_to_relationship_v4_filename, as_to_relationship_v6_filename, customer_cone_filename, asns):

self.asns = asns
self.ixp_info = {}
self.as_to_ixp_info = defaultdict(set)
self.as_to_facility_info = defaultdict(set)
self.p2c = {'v4': defaultdict(set), 'v6': defaultdict(set)}
self.c2p = {'v4': defaultdict(set), 'v6': defaultdict(set)}
self.p2p = {'v4': defaultdict(set), 'v6': defaultdict(set)}
self.customer_cone = {}
self.as2org = {}

self.ixp_filename = ixp_filename
self.as_to_ixp_filename = as_to_ixp_filename
self.as_to_facility_filename = as_to_facility_filename

self.as_to_relationship_v4_filename = as_to_relationship_v4_filename
self.as_to_relationship_v6_filename = as_to_relationship_v6_filename
self.customer_cone_filename = customer_cone_filename

self.import_ixps_info()
self.import_as_to_ixps_info()
self.import_as_faciiity_info()
self.import_as_to_customer_cone()
self.import_as_relationship(self.as_to_relationship_v4_filename, 'v4')
self.import_as_relationship(self.as_to_relationship_v6_filename, 'v6')
self.get_as_to_organisations()


def export_data(self, output_filename):

print('Exporting data to: '+output_filename)

data_to_export = defaultdict(dict)
for asn in self.asns:
asn = int(asn)
data_to_export[asn]['asn'] = asn
data_to_export[asn]['orgname'] = self.as2org.get(asn)
data_to_export[asn]['ixps'] = self.as_to_ixp_info.get(asn, None)
data_to_export[asn]['facilities'] = self.as_to_facility_info.get(asn)
data_to_export[asn]['providers_v4'] = [(asn, self.as2org.get(asn)) for asn in self.c2p['v4'].get(asn, [])]
data_to_export[asn]['providers_v6'] = [(asn, self.as2org.get(asn)) for asn in self.c2p['v6'].get(asn, [])]
data_to_export[asn]['customers_v4'] = [(asn, self.as2org.get(asn)) for asn in self.p2c['v4'].get(asn, [])]
data_to_export[asn]['customers_v6'] = [(asn, self.as2org.get(asn)) for asn in self.p2c['v6'].get(asn, [])]
data_to_export[asn]['peers_v4'] = [(asn, self.as2org.get(asn)) for asn in self.p2p['v4'].get(asn, [])]
data_to_export[asn]['peers_v6'] = [(asn, self.as2org.get(asn)) for asn in self.p2p['v6'].get(asn, [])]
data_to_export[asn]['customer_cone'] = self.customer_cone.get(asn, None)

lib.export_json(data_to_export, output_filename)
print('Finished')

def get_as_to_organisations(self):
# Fetch for each candidate ASN its corresponding organisation name.

print('Fetching organisation names for each ASN...')

extra_asns=set([])
for asn in self.asns:
extra_asns = extra_asns.union(self.c2p['v4'].get(asn, set()))
extra_asns = extra_asns.union(self.c2p['v6'].get(asn, set()))
extra_asns = extra_asns.union(self.p2c['v4'].get(asn, set()))
extra_asns = extra_asns.union(self.p2c['v6'].get(asn, set()))
extra_asns = extra_asns.union(self.p2p['v4'].get(asn, set()))
extra_asns = extra_asns.union(self.p2p['v6'].get(asn, set()))

extra_asns = [str(asn) for asn in extra_asns.union(self.asns)]

chunk = 400
caida_api_url= 'https://api.data.caida.org/as2org/v1/asns/'
for i in range(0, len(extra_asns), chunk):

caida_api_url+= '_'.join(extra_asns[i:i+chunk])
request_response = requests.get(url=caida_api_url).json()

for entry in request_response['data']:
asn = int(entry['asn'])
orgname = entry['orgName']
self.as2org[asn] = orgname

#reset query url
caida_api_url = 'https://api.data.caida.org/as2org/v1/asns/'


def import_ixps_info(self):

with open(self.ixp_filename) as f:
next(f)
for line in f:
data = ujson.loads(line)
country = data.get('country') if not isinstance(data.get('country'), list) else data.get('country')[0]
city = data.get('city') if not isinstance(data.get('city'), list) else data.get('city')[0]
self.ixp_info[ int(data['ix_id'])] = (data['name'], country, city)


def import_as_to_ixps_info(self):
with open(self.as_to_ixp_filename) as f:
next(f)
for line in f:
data = ujson.loads(line)
self.as_to_ixp_info[int(data['asn'])].add( self.ixp_info[int(data['ix_id'])] )


def import_as_faciiity_info(self):
data = lib.import_json(self.as_to_facility_filename)
for entry in data['netfac']['data']:

self.as_to_facility_info[int(entry['local_asn'])].add((entry['name'], entry.get('country'), entry.get('city')))


def import_as_relationship(self, filename, version):
with bz2.open(filename, 'rt') as file:
for line in file:
if not line.startswith('#'):
line = line.strip().split('|')
provider = int(line[0])
customer = int(line[1])
relationship = line[2]

# provider-to-customer relations
if relationship == '-1':
self.p2c[version][provider].add(customer)
self.c2p[version][customer].add(provider)
# peer-to-peer relations
elif relationship == '0':
self.p2p[version][provider].add(customer)
self.p2p[version][customer].add(provider)


def import_as_to_customer_cone(self):
with bz2.open(self.customer_cone_filename, 'rt') as file:
for line in file:
if not line.startswith('#'):
line = line.strip().split()
self.customer_cone[int(line[0])] = [int(asn) for asn in line[1:]]
self.customer_cone[int(line[0])].remove(int(line[0]))

34 changes: 34 additions & 0 deletions asprofiler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
#!/usr/bin/env python3

import analyse
import download
import argparse
import lib

def main():
parser = argparse.ArgumentParser(description="A tool to profile Autonomous systems based on publicly available datasets")
parser.add_argument('-if', '--input_file', type=str, help='csv file containing the total list of ASNs (e.g. 1,2,3,4)', required=True)
parser.add_argument('-of', '--output_file', type=str, help='.json output filename containing all the available data for the profiled ASes', required=True)
args = parser.parse_args()

# Step 1: Import all the ASes based on the given input file
asns = lib.import_csv(args.input_file)

# Step 2: Download all the appropriate datasets from the puplicly available databases
download_ = download.Download()

# Step 3: Sort and categorize downloaded datasets
analyse_ = analyse.Analyse(
download_.ixp_filename,
download_.as_to_ixp_filename,
download_.peeringdb_filename,
download_.as_relations_v4_filename,
download_.as_relations_v6_filename,
download_.as_cust_cone_filename,
asns)

# Step 4: Profile the ASes and export the available data
analyse_.export_data(args.output_file)

if __name__ == '__main__':
main()
119 changes: 119 additions & 0 deletions download.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
#!/usr/bin/env python3

import datetime
from bs4 import BeautifulSoup
import requests
import wget
import os

class Download():

def __init__(self):
self.ixp_filename = None
self.as_to_ixp_filename = None
self.peeringdb_filename = None
self.as_relations_v4_filename = None
self.as_relations_v6_filename = None
self.as_cust_cone_filename = None

print('Searching for the most recent datasets...')
self.download_caida_relationship_cone_datasets()
self.download_peeringdb()
self.download_caida_ixp_asn_datasets()


def download_file(self, webpage, filename):
if os.path.exists('./' + filename):
print('Skipping, already exists:', filename)
else:
print('Downloading: ' + webpage+filename)
wget.download(webpage + filename)
print()


def download_caida_ixp_asn_datasets(self):
webpage = 'https://publicdata.caida.org/datasets/ixps/'
html_page = requests.get(webpage)
soup = BeautifulSoup(html_page.text, "lxml")

ixs = []
ix_asns = []
for link in soup.findAll('a'):
if link.has_attr('href'):

if 'ixs_' in link['href']:
ixs.append(link['href'])
elif 'ix-asns_' in link['href']:
ix_asns.append(link['href'])

#To find the most recent snapshot
ixs.sort()
ix_asns.sort()

self.download_file(webpage, ixs[-1])
self.download_file(webpage, ix_asns[-1])
self.ixp_filename = ixs[-1]
self.as_to_ixp_filename = ix_asns[-1]


def download_peeringdb(self):

webpage = 'https://publicdata.caida.org/datasets/peeringdb/'

previous_date = (datetime.datetime.today() - datetime.timedelta(days=1))
year = str(previous_date.year)
month = '{:02d}'.format(previous_date.month)
day = '{:02d}'.format(previous_date.day)

webpage += year+'/'+month+'/'
file = 'peeringdb_2_dump_' +year+'_'+month+'_'+day+'.json'
self.download_file(webpage, file)
self.peeringdb_filename = file


def download_caida_relationship_cone_datasets(self):

webpage = 'https://publicdata.caida.org/datasets/as-relationships/serial-2/'
html_page = requests.get(webpage)
soup = BeautifulSoup(html_page.text, "lxml")

as_relations_v4 = []

for link in soup.findAll('a'):
if link.has_attr('href'):

if 'as-rel2.txt.bz2' in link['href']:
as_relations_v4.append(link['href'])

#To find the most recent snapshot
as_relations_v4.sort()

self.download_file(webpage, as_relations_v4[-1])
self.as_relations_v4_filename = as_relations_v4[-1]


webpage = 'https://publicdata.caida.org/datasets/2013-asrank-data-supplement/data/'
html_page = requests.get(webpage)
soup = BeautifulSoup(html_page.text, "lxml")

as_relations_v6 = []
as_cust_cone = []

for link in soup.findAll('a'):
if link.has_attr('href'):

if 'as-rel.v6-stable.txt.bz2' in link['href']:
as_relations_v6.append(link['href'])
elif 'ppdc-ases.txt.bz2' in link['href']:
as_cust_cone.append(link['href'])

#To find the most recent snapshot
as_relations_v6.sort()
as_cust_cone.sort()

self.download_file(webpage, as_relations_v6[-1])
self.download_file(webpage, as_cust_cone[-1])
self.as_relations_v6_filename = as_relations_v6[-1]
self.as_cust_cone_filename = as_cust_cone[-1]


32 changes: 32 additions & 0 deletions lib.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@

import ujson as json
import csv

def import_csv(filename):
try:
asns = []
with open(filename, 'r') as fp:
reader = csv.reader(fp)
for row in reader:
asns+=row
return set([int(asn) for asn in asns])
except Exception:
print('Import Error with file:', filename)
exit()

def import_json(filename):
try:
with open(filename, 'r') as fp:
data = json.load(fp)
return data
except Exception:
print('Import Error with file:', filename)
exit()

def export_json(data, filename):
try:
with open(filename, 'w') as fp:
json.dump(data, fp)
except Exception:
print('Export Error with file:', filename)
exit()
1 change: 1 addition & 0 deletions test.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
1,2,3

0 comments on commit 2c8ce73

Please sign in to comment.