-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcorrelation_rank.py
50 lines (40 loc) · 1.78 KB
/
correlation_rank.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
#!/usr/bin/python
import sys, getopt
import pandas as pd
from time import time
def main(argv):
ground_truth = ''
corr_calc = ''
column_to_calculate = ''
corr_method=''
try:
opts, args = getopt.getopt(argv,"g:c:b:m:",["gtruthFile=","corrFile=","columnCalc=","method="])
except getopt.GetoptError:
print('test.py -g <ground_truth> -c <correlation_variant> -b <based column calculation> -m <method>')
sys.exit(2)
for opt, arg in opts:
if opt == '-h':
print('test.py -g <ground_truth> -c <correlation_variant> -b <based column calculation -m <method>')
sys.exit()
elif opt in ("-g", "--gtruthFile"):
ground_truth = arg
elif opt in ("-c", "--corrFile"):
corr_calc = arg
elif opt in ("-b", "--columnCalc"):
column_to_calculate = arg
elif opt in ("-m", "--method"):
corr_method = arg
ground_truth_df = pd.read_csv(ground_truth, header=[0], index_col=0)
corr_var_df = pd.read_csv(corr_calc, header=[0], index_col=0)
print('\nCorrelation Dataframe: ', corr_var_df)
print('\nCorrelation Method: ', corr_method)
COLUMN_TO_GET = column_to_calculate
print('\nGround Truth Dataframe: ', ground_truth_df)
columns_to_correlate = ground_truth_df.join(corr_var_df if COLUMN_TO_GET == '' else corr_var_df[COLUMN_TO_GET])
print('\nColumns to correlate: ', columns_to_correlate)
# ground_truth_df_norm.to_csv('ground_truth_df_norm{}.csv'.format(int(time())))
correllation_result = columns_to_correlate.corr(method=corr_method)
# with pd.option_context('display.max_rows', None, 'display.max_columns', None):
print('\nCorrelation Result: ', correllation_result)
if __name__ == "__main__":
main(sys.argv[1:])