-
Notifications
You must be signed in to change notification settings - Fork 0
/
MyData.py
94 lines (88 loc) · 3.2 KB
/
MyData.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
#Abolfazl Asudeh Jan 2020
# http://asudeh.github.io
import numpy as np
import csv
Items = [] # the indices/names of the items
Sets = [] # the indices/names of sets
data = None # the dataset: every row is an item; sets are the columns
color = [] # color of items
n = 0 # No items
m = 0 # No sets
S = [] # the items each set covers
H = [] # the sets each item hits
q = [] # this is only used for proportional fairness
def readCSVfile(filename,delim=',', header=1, SkipBeg=1, SkipEnd=1, colorIndex=[0],nrows=-1,ncols=-1): # the last columns are the sensitive attributes
# header: 1 if the dataset contains header
# SkipBeg: number of (ID) columns to skip from the beginning
# SkipEnd: number of columns to skip at the beginning
# colorIndex: the index(es) of the sensitive attribute to use (if it is right after the last set the index is 0)
# nrows: number of itmes: -1 = use all
# ncols: number of sets: -1 = use all
global Items, Sets, data, color, n, m, S, H
data_raw = np.genfromtxt(filename, delimiter=delim,skip_header=header)
(n,m) = data_raw.shape
n = int(n); m = int(m)-(SkipEnd + SkipBeg)
Sets = [i for i in range(m)]
Items = [i for i in range(n)]
data = data_raw[:, SkipBeg : m + SkipBeg]
if (type(colorIndex) is int) or len(colorIndex)==1:
color = list(data_raw[:,m + SkipBeg + colorIndex].astype(int))
else:
color = _getColors(data_raw[:,[m+SkipBeg+i for i in colorIndex]].astype(int))
if nrows>-1:
data = data[:nrows,:]
color = color[:nrows]
n = nrows
if ncols>-1:
data = data[:,:ncols]
m = ncols
H = [[] for i in range(n)]
S = [set([]) for i in range(m)]
for i in range(n):
for j in range(m):
if data[i,j] == 1:
H[i].append(j)
S[j].add(i)
def readChicago(nrow=3000,nset=56): # the last columns are the sensitive attributes
global Items, Sets, data, color, n, m, S, H,q
reader = csv.reader(open("ChicagoZipFMC.csv","r",newline=''), delimiter=',',quoting=csv.QUOTE_ALL)
_items = []
_cnt = 0
for row in reader:
_items.append(row)
_cnt+=1
if _cnt==nrow: break
m = nset; n = len(_items)
Sets = [i for i in range(m)]
Items = [i for i in range(n)]
H = [[] for i in range(n)]
S = [set([]) for i in range(m)]
for i in range(n):
for j in range(len(_items[i])-1): # the last index is color
k = int(_items[i][j])
if k>=m: continue #skip the sets that are not in this setting
H[i].append(k)
S[k].add(i)
color.append(int(_items[i][len(_items[i])-1]))
q=[.58,.42]
def _getColors(A):
A = A.astype(int)
maxvals = np.amax(A, axis=0)
n,m = A.shape
card = 1
for i in maxvals: card=card*(i+1)
colorIndex = {}
temp = [0 for i in range(m)]
for i in range(int(card)):
colorIndex[tuple(temp)]=i
temp = _next(temp,maxvals)
c = [colorIndex[tuple(A[i,:])] for i in range(n)]
return c
def _next(current,maxvals):
next = current[:]
for i in range(len(maxvals)):
if next[i]+1<= maxvals[i]:
next[i]= next[i] + 1
break
else: next[i]=0
return next