-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathkidnry_disease_prediction.py
208 lines (120 loc) · 3.68 KB
/
kidnry_disease_prediction.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
#!/usr/bin/env python
# coding: utf-8
# In[1]:
import keras as k
from keras.models import Sequential,load_model
import numpy as np
import pandas as pd
from keras.layers import Dense
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder,MinMaxScaler
import matplotlib.pyplot as plt
# In[2]:
#loading data
data = pd.read_csv('kidney_disease.csv')
data.columns
# We use the following representation to collect the dataset
# age - age
# bp - blood pressure
# sg - specific gravity
# al - albumin
# su - sugar
# rbc - red blood cells
# pc - pus cell
# pcc - pus cell clumps
# ba - bacteria
# bgr - blood glucose random
# bu - blood urea
# sc - serum creatinine
# sod - sodium
# pot - potassium
# hemo - hemoglobin
# pcv - packed cell volume
# wc - white blood cell count
# rc - red blood cell count
# htn - hypertension
# dm - diabetes mellitus
# cad - coronary artery disease
# appet - appetite
# pe - pedal edema
# ane - anemia
# class - class
# In[3]:
#shape of data
data.shape
# In[4]:
data.head()
# In[5]:
#list of column name
data_column = ['sg', 'al' , 'sc' , 'hemo' , 'pcv' , 'wc' , 'rc' , 'htn' , 'classification']
#taking only features present in data_colimn
df = data.drop([column for column in data.columns if not column in data_column] , axis=1)
df.head()
# In[6]:
#calculating nan values
df.isna().sum()
#removing nan values
df = df.dropna(axis=0)
#df.isna().sum()
# In[7]:
#trsnform non numeric data
for column in df.columns:
if df[column].dtype == np.number:
continue
df[column] = LabelEncoder().fit_transform(df[column])
# In[8]:
df.head()
# In[9]:
#splitting the data foe testing and training
x = df.drop(['classification'] ,axis= 1)
y = df['classification']
# In[10]:
#scaling
#using min-max-scaler this converts the data in to 0 to 1
x_scaler = MinMaxScaler()
x_scaler.fit(x) #fit the data for the x dataframe
column_name = x.columns #stores the columns name
x[column_name] = x_scaler.transform(x) #will transform the x dataform across the columns
# In[11]:
#splitting the data in train and test set
x_train ,x_test,y_train ,y_test = train_test_split(x , y ,test_size=0.2 ,shuffle=True )
# In[12]:
#building the model
#Sequential groups a linear stack of layers into a tf.keras.Model.
#Sequential provides training and inference features on this model.
#Dense layer is the regular deeply connected neural network layer. It is most common and frequently used layer
#Initializers define the way to set the initial random weights of Keras layers.
model = Sequential()
model.add(Dense(256 ,input_dim=len(x.columns), kernel_initializer = k.initializers.random_normal(seed =13), activation = 'relu'))
model.add(Dense(1,activation= 'hard_sigmoid'))
# In[13]:
#comppile the model
#loss function are of mant types
#for more info :- https://data-flair.training/blogs/compile-evaluate-predict-model-in-keras/
model.compile(loss= 'binary_crossentropy' , optimizer = 'adam' , metrics=['accuracy'])
# In[14]:
#train the model
training = model.fit(x_train,y_train , epochs=2000 ,batch_size = x_train.shape[0])
# In[15]:
#saving the model
model.save('chronic_kidney_disease.model')
# In[16]:
#visualizing the model in loss and accuracy
plt.plot(training.history['accuracy'])
plt.plot(training.history['loss'])
plt.title('MODEL ACCURACY AND LOSS')
plt.xlabel('EPOCH')
plt.ylabel('ACCURACY AND LOSS')
plt.show()
# In[17]:
print(f'training data shape :{x_train.shape}')
print(f'training data shape :{x_test.shape}')
# In[18]:
pred = model.predict(x_test)
pred = [1 if y>0.5 else 0 for y in pred]
pred
# In[19]:
y_test = [i for i in y_test]
print(f'ACTUAL VALUES: {y_test}')
print(f'PREDICTED VALUE: {pred}')
# In[ ]: