forked from VivekPa/AIAlpha
-
Notifications
You must be signed in to change notification settings - Fork 1
/
data_processing.py
62 lines (50 loc) · 2.82 KB
/
data_processing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import numpy as np
import pandas as pd
class DataProcessing:
def __init__(self, split, feature_split):
self.split = split
self.feature_split = feature_split
# self.train = pd.read_csv("train_data.csv", index_col=0)
# print(train.tail())
self.test = pd.read_csv("preprocessing/test_data.csv", index_col=0)
# print(test.tail())
self.test_stock = pd.read_csv("preprocessing/test_stock.csv", index_col=0)
# self.auto_train = pd.read_csv("features/autoencoded_corrected_data.csv", index_col=0)
self.auto_train = pd.read_csv("features/autoencoded_data.csv", index_col=0)
# auto_train.drop([0, 14, 16], axis=1, inplace=True)
# auto_train.to_csv("autoencoded_corrected_data.csv", index=None)
def make_train_data(self):
train_data = np.array(self.auto_train)[int(self.feature_split*len(self.auto_train))+1:
int((1-self.feature_split)*self.split*len(self.auto_train))]
train_data = pd.DataFrame(train_data, index=None)
train_data.to_csv("features/autoencoded_train_data.csv")
def make_test_data(self):
test_data = np.array(self.auto_train)[int((1-self.feature_split)*self.split*len(self.auto_train) +
self.feature_split*len(self.auto_train)+1):]
test_data = pd.DataFrame(test_data, index=None)
test_data.to_csv("features/autoencoded_test_data.csv")
def make_train_y(self):
train_y = np.array(self.test)[int(self.feature_split*len(self.auto_train))+1:
int((1-self.feature_split)*self.split*len(self.auto_train))]
train_y = pd.DataFrame(train_y, index=None)
train_y.to_csv("features/autoencoded_train_y.csv")
def make_test_y(self):
test_y = np.array(self.test)[int((1-self.feature_split)*self.split*len(self.auto_train) +
self.feature_split*len(self.auto_train))+1:]
test_y = pd.DataFrame(test_y)
test_y.to_csv("features/autoencoded_test_y.csv")
def make_stock_train_y(self):
test_y = np.array(self.test_stock)[int(self.feature_split*len(self.auto_train))+1:
int((1-self.feature_split)*self.split*len(self.auto_train))]
test_y = pd.DataFrame(test_y, index=None)
test_y.to_csv("features/nn_stock_train_y.csv")
def make_stock_test_y(self):
test_y = np.array(self.test_stock)[int((1-self.feature_split)*self.split*len(self.auto_train))+1:]
test_y = pd.DataFrame(test_y, index=None)
test_y.to_csv("features/nn_stock_test_y.csv")
if __name__ == "__main__":
process = DataProcessing(0.8, 0.25)
process.make_test_data()
process.make_train_data()
process.make_train_y()
process.make_test_y()