-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpokemonSVM.py
138 lines (124 loc) · 5.42 KB
/
pokemonSVM.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
# reading and writing data
import pandas as pd
from sklearn.metrics import plot_confusion_matrix
from sklearn.model_selection import train_test_split
from matplotlib import cm
from pandas.plotting import scatter_matrix
from sklearn.svm import LinearSVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
import matplotlib.pyplot as plt
from sklearn import preprocessing
from sklearn.linear_model import LogisticRegression
# Import pokemon data
pokemon = pd.read_csv('Pokemon.csv', sep=',')
pokemon.columns = pokemon.columns.str.replace(' ', '_')
X = pokemon[['Type_1', 'Type_2', 'Total', 'HP', 'Attack', 'Defense', 'Sp._Atk', 'Sp._Def', 'Speed']]
X_scaled = preprocessing.scale(X)
y = pokemon['Legendary']
# Random_state: set seed for random# generator
# Test_size: default 25% testing, 75% training
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.10, random_state=42)
print("Our data set consists of " + str(len(pokemon.columns) - 1) + " attributes, however we only use 9 of them.")
print("Distribution of Legendary Pokemon:")
print(pokemon['Legendary'].value_counts())
print("Testing Split: 10% testing and 90% training")
seeData = True
if seeData:
# plotting a scatter matrix
cmap = cm.get_cmap('gnuplot')
scatter = scatter_matrix(X_train, c=y_train, marker='o', s=40, hist_kwds={'bins': 15}, figsize=(9, 9), cmap=cmap)
# plotting a 3D scatter plot
from mpl_toolkits.mplot3d import axes3d # must keep
fig = plt.figure()
ax = fig.add_subplot(111, projection = '3d')
ax.scatter(X_train['Total'], X_train['Attack'], X_train['Defense'], c=y_train, marker='o', s=100)
ax.set_xlabel('Total')
ax.set_ylabel('Attack')
ax.set_zlabel('Defense')
plt.show()
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=.1, random_state=42)
# kNN
knn = KNeighborsClassifier(n_neighbors=5, weights='uniform')
knn.fit(X_train, y_train)
print("\nkNN Training set score: {:.2f}%".format(100*knn.score(X_train, y_train)))
print("kNN Test set score: {:.2f}%".format(100*knn.score(X_test, y_test)))
print()
# Create classifier object: Create a linear SVM classifier
# C: Regularization parameter. Default C=1
svcL = LinearSVC(C=5, random_state=10, tol=1e-4, max_iter=10000)
svcL.fit(X_train, y_train)
print("Linear SVM Training set score: {:.2f}%".format(100*svcL.score(X_train, y_train)))
print("Linear SVM Test set score: {:.2f}%".format(100*svcL.score(X_test, y_test)))
print("LSvm.coef_: {}".format(svcL.coef_))
print("LSvm.intercept_: {}".format(svcL.intercept_))
print()
# Create classifier object: Create a nonlinear SVM classifier
# kernel, default= radial basis function
# if poly, default degree = 3
svcP = SVC(C=10, degree=3, kernel='poly', random_state=1)
svcP.fit(X_train, y_train)
print("SVM Poly Training set score: {:.2f}%".format(100*svcP.score(X_train, y_train)))
print("SVM Poly Test set score: {:.2f}%".format(100*svcP.score(X_test, y_test)))
print()
# Create classifier object: Create a nonlinear SVM classifier
# kernel, default = radial basis function
svcG = SVC(C=10)
svcG.fit(X_train, y_train)
print("SVM Gaussian Training set score: {:.2f}%".format(100*svcG.score(X_train, y_train)))
print("SVM Gaussian Test set score: {:.2f}%".format(100*svcG.score(X_test, y_test)))
print()
# Logistic Regression
LR = LogisticRegression(random_state=5)
LR.fit(X_train, y_train)
print("Logistic Regression set score: {:.2f}%".format(100*LR.score(X_train, y_train)))
print("Logistic Regression Test set score: {:.2f}%".format(100*LR.score(X_test, y_test)))
# Coefficients of linear model (b_1,b_2,...,b_p): log(p/(1-p)) = b0+b_1x_1+b_2x_2+...+b_px_p
print("lr.coef_: {}".format(LR.coef_))
print("lr.intercept_: {}".format(LR.intercept_))
print()
# Plot non-normalized confusion matrix
# Knn Matrix
titles_options = [("KNN", None)]
for title, normalize in titles_options:
disp = plot_confusion_matrix(knn, X_test, y_test,
display_labels=["Non-Legendary", "Legendary"],
cmap=plt.cm.Blues)
disp.ax_.set_title(title)
plt.show()
# Linear Matrix
titles_options = [("SVM Linear", None)]
for title, normalize in titles_options:
disp = plot_confusion_matrix(svcL, X_test, y_test,
display_labels=["Non-Legendary", "Legendary"],
cmap=plt.cm.Blues)
disp.ax_.set_title(title)
plt.show()
# Poly Matrix
seePoly = False
if seePoly:
titles_options = [("SVM Poly", None)]
for title, normalize in titles_options:
disp = plot_confusion_matrix(svcP, X_test, y_test,
display_labels=["Non-Legendary", "Legendary"],
cmap=plt.cm.Blues)
disp.ax_.set_title(title)
plt.show()
# Gaussian Matrix
seeGaussian = False
if seeGaussian:
titles_options = [("SVM Gaussian", None)]
for title, normalize in titles_options:
disp = plot_confusion_matrix(svcG, X_test, y_test,
display_labels=["Non-Legendary", "Legendary"],
cmap=plt.cm.Blues)
disp.ax_.set_title(title)
plt.show()
# Logistic Regression
titles_options = [("Logistic Regression", None)]
for title, normalize in titles_options:
disp = plot_confusion_matrix(LR, X_test, y_test,
display_labels=["Non-Legendary", "Legendary"],
cmap=plt.cm.Blues)
disp.ax_.set_title(title)
plt.show()