-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdata_collector.py
143 lines (126 loc) · 5.29 KB
/
data_collector.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
import csv
import pandas as pd
import numpy as np
from sklearn.utils import shuffle
from sklearn import preprocessing
class DataCollector:
training_data_note = {
"vector_from_food_x": 0,
"vector_from_food_y": 0,
"vector_from_wall_x": 0,
"vector_from_wall_y": 0,
"is_food_on_top": 0,
"is_food_on_bottom": 0,
"is_food_on_left": 0,
"is_food_on_right": 0,
"is_obstacle_on_top": 0,
"is_obstacle_on_bottom": 0,
"is_obstacle_on_left": 0,
"is_obstacle_on_right": 0,
"score": 0,
"action": ""
}
csv_columns = [
"vector_from_food_x",
"vector_from_food_y",
"vector_from_wall_x",
"vector_from_wall_y",
"is_food_on_top",
"is_food_on_bottom",
"is_food_on_left",
"is_food_on_right",
"is_obstacle_on_top",
"is_obstacle_on_bottom",
"is_obstacle_on_left",
"is_obstacle_on_right",
"score",
"action"
]
@staticmethod
def save_header_to_csv_file(csv_columns, filename="data_set.csv"):
"""
Saves header of csv_columns to csv file specified in input param.
:param csv_columns: csv columns in lsit format to be inserted to the csv file.
:param filename: csv file path.
"""
with open(filename, "a") as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=csv_columns)
writer.writeheader()
@staticmethod
def save_data_row_to_csv_file(data_row, csv_columns, filename="data_set.csv"):
"""
Puts data row specified in input to a file.
:param data_row: List of values to be written to a csv file.
:param csv_columns: Csv columns,
:param filename: Path to the csv file.
"""
with open(filename, "a") as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=csv_columns)
writer.writerow(data_row)
@staticmethod
def load_data_from_csv_to_np_array(file_path, delimiter=",", keep_header=True):
"""
Loads the whole csv data and returns it as a numpy array type.
:param file_path: Path to the
:param delimiter: delimeter used in the csv file
:param keep_header: Bool to determine whether to keep csv header or not for the first array values.
:return: Numpy array with the values.
"""
return np.genfromtxt(file_path, dtype=float, delimiter=delimiter, names=keep_header)
@staticmethod
def load_data_from_csv_to_data_frame(file_path):
"""
Loads data specified in csv file path as param and returns it as pandas data frame.
:param file_path: Path to the csv file
:return: Data frame with values from csv file.
"""
return pd.read_csv(file_path)
@staticmethod
def split_data_frame_to_train_and_test(data_frame, percent_test_ratio=20, shuffle_rows=False):
"""
Splits the data set to the training and test by a given ratio.
:param data_frame: Data frame containing values to be split.
:param percent_test_ratio: Percent value of what is the percent of the test values.
:param shuffle_rows: If true, then we will shuffle rows inside given data set before split.
:return: data frame objects for test and training.
"""
if shuffle_rows:
data_frame = shuffle(data_frame)
total_number_of_rows = data_frame.shape[0]
test_rows_number = int(total_number_of_rows * percent_test_ratio / 100)
train_rows_number = total_number_of_rows - test_rows_number
df_train = data_frame[:train_rows_number]
df_test = data_frame[test_rows_number:]
return (df_train, df_test)
@staticmethod
def extract_labels_from_data_frame(data_frame, column_number=-1, column_name=""):
"""
Extracts labels from the data frame and deleted whole labels column returning data set without it. You can
provide either column name or the column number to this function in order to cut it out, no need for two at the
same time.
:param data_frame: data set containing labels column.
:param column_number: Column where labels are located.
:param column_name: Column name with labels.
:return: data set without labels column, and labels as the second data set.
"""
if column_name == "" and column_number == -1:
print("Error provide column number or column name!")
return (data_frame, 0)
if column_number != -1:
labels = data_frame[data_frame.column[column_number]]
del data_frame[data_frame.column[column_number]]
return (data_frame, labels)
elif column_name != "":
labels = data_frame[column_name]
del data_frame[column_name]
return (data_frame, labels)
@staticmethod
def normalize_data_frame_data(data_frame):
"""
Performes normalization of values to a given data frame.
:param data_frame: data set on which normalization of values shall be performed.
:return: data set with scaled values after normalization.
"""
min_max_scaler = preprocessing.MinMaxScaler()
data_frame_scaled = min_max_scaler.fit_transform(data_frame)
return pd.DataFrame(data_frame_scaled)