-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathChapter11AlgorithmEvaluationMat.py
154 lines (120 loc) · 3.43 KB
/
Chapter11AlgorithmEvaluationMat.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
# -*- coding: utf-8 -*-
"""
Created on Mon Aug 13 21:40:11 2018
@author: Administrator
"""
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from sklearn.model_selection import KFold
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.datasets import load_boston
from sklearn.linear_model import LinearRegression
#读取数据
filename='diabetes.csv'
names=['preg','plas','pres','skin','test','mass','pedi','age','class']
data=pd.read_csv(filename)
#将数据分为输入数据和输出数据
array=data.values
X=array[:,0:8]
y=array[:,8]
'''
k折分类准确度
'''
#初始化K的次数
num_folds=10
#随机种子
seed=7
#初始化Kflod
kfold=KFold(n_splits=num_folds,random_state=seed)
#创建LogisticRegression模型
model=LogisticRegression()
#交叉验证(此处进行模型的训练和评估)
result=cross_val_score(model,X,y,cv=kfold)
print('k折分类准确度算法评估准确度为:{0},标准差为:{1}'.format(result.mean(),result.std()))
'''
对数损失函数
'''
n_splits=10
seed=7
kfold=KFold(n_splits=n_splits,random_state=seed)
model=LogisticRegression()
#定义对数损失函数标签
scoring='neg_log_loss'
result=cross_val_score(model,X,y,cv=kfold,scoring=scoring)
print('对数损失函数算法评估准确度为:{0},标准差为:{1}'.format(result.mean(),result.std()))
'''
AUC图
'''
n_splits=10
seed=7
kfold=KFold(n_splits=n_splits,random_state=seed)
model=LogisticRegression()
scoring='roc_auc'
result=cross_val_score(model,X,y,scoring=scoring,cv=kfold)
print('AUC图算法评估准确度为:{0},标准差为:{1}'.format(result.mean(),result.std()))
'''
混淆矩阵
'''
test_size=0.33
seed=49
X_train,X_test,y_train,y_test=train_test_split(X,y,random_state=seed,test_size=test_size)
model=LogisticRegression()
model.fit(X_train,y_train)
predicted=model.predict(X_test)
#构建混淆矩阵
matrix=confusion_matrix(y_test,predicted)
classes=['0','1']
dataframe=pd.DataFrame(data=matrix,index=classes,columns=classes)
print(dataframe)
'''
分类报告
'''
test_size=0.33
seed=49
X_train,X_test,y_train,y_test=train_test_split(X,y,random_state=seed,test_size=test_size)
model=LogisticRegression()
model.fit(X_train,y_train)
predicted=model.predict(X_test)
report=classification_report(y_test,predicted)
print(report)
'''
回归算法矩阵:波士顿房价数据集
'''
'''
平均绝对误差
'''
datasets=load_boston()
X=datasets.data
y=datasets.target
n_splits=10
seed=7
kflod=KFold(n_splits=n_splits,random_state=seed)
model=LinearRegression()
scoring='neg_mean_absolute_error'
result=cross_val_score(model,X,y,scoring=scoring,cv=kflod)
print('MAE算法评估准确度为:{0},标准差为:{1}'.format(result.mean(),result.std()))
'''
均方误差
'''
n_splits=10
seed=7
kflod=KFold(n_splits=n_splits,random_state=seed)
model=LinearRegression()
scoring='neg_mean_squared_error'
result=cross_val_score(model,X,y,scoring=scoring,cv=kflod)
print('MSE算法评估准确度为:{0},标准差为:{1}'.format(result.mean(),result.std()))
'''
决定系数R^2
'''
n_splits=10
seed=7
kflod=KFold(n_splits=n_splits,random_state=seed)
model=LinearRegression()
scoring='r2'
result=cross_val_score(model,X,y,scoring=scoring,cv=kflod)
print('r2算法评估准确度为:{0},标准差为:{1}'.format(result.mean(),result.std()))