-
Notifications
You must be signed in to change notification settings - Fork 0
/
data_processing.py
120 lines (96 loc) · 3.41 KB
/
data_processing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
import csv
import os
import copy
__location__ = os.path.realpath(
os.path.join(os.getcwd(), os.path.dirname(__file__)))
cities = []
with open(os.path.join(__location__, 'Cities.csv')) as f:
rows = csv.DictReader(f)
for r in rows:
cities.append(dict(r))
countries = []
with open(os.path.join(__location__, 'Countries.csv')) as f:
rows = csv.DictReader(f)
for r in rows:
countries.append(dict(r))
class DB:
def __init__(self):
self.database = []
def insert(self, table):
self.database.append(table)
def search(self, table_name):
for table in self.database:
if table.table_name == table_name:
return table
return None
class Table:
def __init__(self, table_name, table):
self.table_name = table_name
self.table = table
def join(self, other_table, common_key):
joined_table = Table(self.table_name + '_joins_' + other_table.table_name, [])
for item1 in self.table:
for item2 in other_table.table:
if item1[common_key] == item2[common_key]:
dict1 = copy.deepcopy(item1)
dict2 = copy.deepcopy(item2)
dict1.update(dict2)
joined_table.table.append(dict1)
return joined_table
def filter(self, condition):
filtered_table = Table(self.table_name + '_filtered', [])
for item1 in self.table:
if condition(item1):
filtered_table.table.append(item1)
return filtered_table
def aggregate(self, function, aggregation_key):
temps = []
for item1 in self.table:
temps.append(float(item1[aggregation_key]))
return function(temps)
def select(self, attributes_list):
temps = []
for item1 in self.table:
dict_temp = {}
for key in item1:
if key in attributes_list:
dict_temp[key] = item1[key]
temps.append(dict_temp)
return temps
def __str__(self):
return self.table_name + ':' + str(self.table)
table1 = Table('cities', cities)
table2 = Table('countries', countries)
my_DB = DB()
my_DB.insert(table1)
my_DB.insert(table2)
my_table1 = my_DB.search('cities')
my_table1_filtered = my_table1.filter(lambda x: x['country'] == 'Italy')
my_table1_selected = my_table1.select(['city', 'latitude'])
print(my_table1)
print()
print(my_table1_selected)
temps = []
for item in my_table1_filtered.table:
temps.append(float(item['temperature']))
print(sum(temps) / len(temps))
print("Using aggregation")
print(my_table1_filtered.aggregate(lambda x: sum(x) / len(x), 'temperature'))
print()
my_table2 = my_DB.search('countries')
my_table3 = my_table1.join(my_table2, 'country')
my_table3_filtered = my_table3.filter(lambda x: x['EU'] == 'no').filter(lambda x: float(x['temperature']) < 5.0)
print(my_table3_filtered.table)
print()
city_table = my_DB.search('cities')
country_table = my_DB.search('countries')
big_table = city_table.join(country_table, 'country')
eu_no_cl_table = big_table.filter(lambda eu: eu['EU'] == 'yes').filter(lambda cl: cl['coastline'] == 'no')
print("Min")
print(eu_no_cl_table.aggregate(min, 'temperature'))
print("Max")
print(eu_no_cl_table.aggregate(max, 'temperature'))
print("Min latitude")
print(big_table.aggregate(min, 'latitude'))
print("Max latitude")
print(big_table.aggregate(max, 'latitude'))