-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutil_functions.py
167 lines (112 loc) · 3.36 KB
/
util_functions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
import pandas as pd
from tabulate import tabulate
from scipy.spatial.distance import euclidean
def print_df(df):
print(tabulate(df, headers='keys', tablefmt='psql'))
def remove_columns(df, col_list):
return df.drop(col_list, axis=1)
def select_data_filter(df, filter_dict):
"""
select data with filters
df: pandas data frame
filter_dict: dictionary
keys are column names
values are filter value
"""
df_part = df.copy()
for key in filter_dict.keys():
# filter out rows one by one
df_part = df_part.loc[(df_part[key] == filter_dict[key])]
return df_part
def get_intersection_on(df_x, df_y, on_col_labels_list):
"""
take intersection of two
with labels in list
most use full in getting common data points from two data frames
used for getting wifi data for common points
"""
return pd.merge(df_x, df_y, how='inner', on=on_col_labels_list)
def remove_wap_in_column_name(df, wap_str="WAP"):
"""
remove wap in the string name
:param wap_str:
:param df:
:return:
"""
new_cols = []
for col_name in df.columns:
new_cols.append(col_name.split(wap_str)[-1])
df.columns = new_cols
return df
def remove_extra_columns(main_df, sub_df):
"""
make columns in sub_df a subset of columns in main_df
:param main_df:
:param sub_df:
:return:
"""
pass
def set_df_index_begin(df, start_index=1):
"""
set start index of data frame
:param df:
:param start_index:
:return:
"""
df.index += start_index
return df
def spearman_footrule_distance(coordinate_a, coordinate_b):
dist = 0
for i in range(len(coordinate_a)):
dist = dist + abs(coordinate_b[i] - coordinate_a[i])
return dist
def euclideans(list_tuples1, list_tuples2):
dist = []
for (u1, v1), (u2, v2) in zip(list_tuples1, list_tuples2):
dist.append(euclidean((u1, v1), (u2, v2,)))
return dist
def average_euclidean(list_tuples1, list_tuples2):
dist = euclideans(list_tuples1, list_tuples2)
return sum(dist) / len(dist)
def convert_unit(number, unit):
return number / unit
def pick_rows_on_interval(df, interval, begin=None):
"""
pick only rows at an SAMPLE_INTERVAL
:param df:
:param begin:
:param interval:
:return:
"""
# create list of indices to drop
if not begin:
begin = int(min(df.index))
end = int(max(df.index))
keep_inds = []
for i in range(begin, end):
ind = interval * i
if ind < end:
keep_inds.append(ind)
else:
break
drop_inds = list(set(df.index) - set(keep_inds))
# drop indexes
print("dropping", str(len(drop_inds)), "of", str(len(df.index)))
df = df.drop(df.index[drop_inds])
# re-index
df.index = range(0, len(df.index))
return df
def pick_label_num_by_multiple(df, mul, pos_label_col):
picks = []
for i in range(1, int(max(df[pos_label_col].tolist()))):
if i % mul == 0:
picks.append(i)
return df[df[pos_label_col].isin(picks)]
def avg_error_factored(actual_pos, found_pos, conversion_factor=20):
"""
get avg. error in meters
"""
avg_err = convert_unit(average_euclidean(actual_pos,
found_pos),
conversion_factor)
return avg_err