-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcleanup_features.py
42 lines (28 loc) · 1.03 KB
/
cleanup_features.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import pandas as pd
# Separate the features and the output
def separateFeatures_Output(data):
X = data.drop(columns = ['gross_tonnage']).copy()
y = data['gross_tonnage']
return X, y
# The ship name has no influence on the prediction
def removeShipname(data):
data = data.drop(columns = ['Ship_name']).copy()
print("Shipname removed")
print("........")
return data
# Change all object columns to categorical
def objectToCategorical(data):
count = 0
for column_name in data.columns:
if data.dtypes[column_name] == "object":
data[column_name].astype('category').cat.codes
count = count + 1
print("Number of converted columns: ", count)
return data
def addFeatures(X):
for i in range(158):
X["passenger_per_cabin"] = X["cabins"] / (X["passengers"] / 10)
X["length_per_cabin"] = X["cabins"] / X["length"]
X["passengers_per_length"] = X["length"] / X["passengers"]
X["total_people"] = X["crew"] + X["passengers"]
return X