-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathmodel.py
37 lines (31 loc) · 1.34 KB
/
model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
import pandas as pd
from sklearn.preprocessing import scale
from sklearn.neighbors import KNeighborsClassifier
import pickle
# Load dataset
ecomm = pd.read_csv("https://raw.githubusercontent.com/jadanpl/E-Commerce-Shipping/main/raw_data.csv")
# Rename columns
cols=[]
for i in ecomm.columns[1:-1]:
i = i.lower()
cols.append(i);
cols = ['ID'] + cols
cols.append('arrival')
ecomm.columns = cols
# Data preprocessing
ecomm['gender'] = ecomm.gender.map({'F':0, 'M':1})
ecomm['customer_rating'] = ecomm['customer_rating'].map({5:0, 4:0, 3:0, 2:0, 1:1})
dummy = pd.DataFrame(pd.get_dummies(ecomm[['warehouse_block', 'mode_of_shipment','product_importance']]))
ecomm1 = pd.DataFrame(scale(ecomm[['cost_of_the_product','weight_in_gms','discount_offered']]),
columns=['cost_of_the_product','weight_in_gms','discount_offered'])
ecomm_final = pd.concat([ecomm1,dummy,ecomm[['customer_care_calls', 'prior_purchases','gender', 'arrival','customer_rating']]],
axis=1)
# Split data into output and input
X = ecomm_final.iloc[:,:-1] # inputs
Y = ecomm_final['customer_rating'] # outputs
# Model building
KNN_model = KNeighborsClassifier(n_neighbors=11, metric='euclidean')
KNN_model.fit(X, Y)
# Save the model
filename = 'finalized_knn.pkl'
pickle.dump(KNN_model, open(filename, 'wb'))