-
Notifications
You must be signed in to change notification settings - Fork 6
/
model_long_v8.py
302 lines (224 loc) · 9.16 KB
/
model_long_v8.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
#####################################################################
# This is the long model created to compare with the proposed model #
#####################################################################
## Import the libraries ##
from __future__ import division, print_function, absolute_import
from skimage import color, io
from scipy.misc import imresize, toimage
import numpy as np
from sklearn.cross_validation import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.metrics import f1_score
from sklearn.metrics import classification_report
from sklearn.model_selection import KFold
import os
from glob import glob
import tflearn
from tflearn.data_utils import shuffle, to_categorical
from tflearn.layers.core import input_data, dropout, fully_connected
from tflearn.layers.conv import conv_2d, max_pool_2d
from tflearn.layers.estimator import regression
from tflearn.layers.normalization import local_response_normalization
from tflearn.data_preprocessing import ImagePreprocessing
from tflearn.data_augmentation import ImageAugmentation
from tflearn.metrics import Accuracy
import decimal
from six.moves import cPickle
import pickle
import h5py
np.set_printoptions(suppress=True)
########################################
### Imports picture files
########################################
# TumorA = astrocytoma = 0
# TumorB = glioblastoma_multiforme = 1
# TumorC = oligodendroglioma = 2
# healthy = 3
# unknown = 4
f = open('full_dataset_final.pkl', 'rb')
print("pickle file open")
allX, allY = pickle.load(f)
print("pickle opened")
f.close()
size_image = 64
###################################
# Define model architecture
###################################
# Input is a 64x64 image with 1 color channel (grayscale image)
network = input_data(shape=[None, size_image, size_image, 3])
# 1: Convolution layer with 16 filters, size 5x5
conv_1 = conv_2d(network, nb_filter=16, filter_size=5, activation='relu', name='conv_1')
print("layer 1")
# 2: Max pooling layer
network = max_pool_2d(conv_1, 2)
print("layer 2")
# 3: Convolution layer with 16 filters, size 3x3
conv_2 = conv_2d(network, nb_filter=16, filter_size=3, activation='relu', name='conv_2')
print("layer 3")
# 4: Convolution layer with 32 filters, size 3x3
conv_3 = conv_2d(conv_2, nb_filter=32, filter_size=3, activation='relu', name='conv_3')
print("layer 4")
# 5: Max pooling layer
network = max_pool_2d(conv_3, 2)
print("layer 5")
# 6: Convolution layer with 32 filters, size 3x3
conv_4 = conv_2d(network, nb_filter=32, filter_size=3, activation='relu', name='conv_4')
print("layer 6")
# 7: Max pooling layer
network = max_pool_2d(conv_4, 2)
print("layer 7")
# 8: Convolution layer with 64 filters, size 3x3
conv_5 = conv_2d(network, nb_filter=64, filter_size=3, activation='relu', name='conv_5')
print("layer 8")
# 9: Convolution layer with 64 filters, size 2x2
conv_6 = conv_2d(conv_5, nb_filter=64, filter_size=2, activation='relu', name='conv_6')
print("layer 9")
# 10: Max pooling layer
network = max_pool_2d(conv_6, 2)
# 11: Convolution layer with 128 filters, size 2x2
conv_7 = conv_2d(network, nb_filter=128, filter_size=2, activation='relu', name='conv_7')
print("layer 11")
# 12: Max pooling layer
network = max_pool_2d(conv_7, 2)
print("layer 12")
# 13: Fully-connected layer, 512 nodes
network = fully_connected(network, 512, activation='relu')
print("layer 13")
# 14: Dropout layer to combat overfitting
network = dropout(network, 0.5)
print("layer 14")
# 15: Fully-connected layer with five outputs
network = fully_connected(network, 5, activation='softmax')
print("layer 15")
network = regression(network, optimizer='adam',
loss='categorical_crossentropy',
learning_rate=0.0001)
# Wrap the network in a model object
model = tflearn.DNN(network, tensorboard_verbose = 0)
print("model created done")
###################################################
# Prepare train & test samples and train the model
###################################################
## Using 6-fold cross validation
no_folds = 6 # for 6 fold cross validation
accuracy_array = np.zeros((no_folds), dtype='float64') # accuracies of the test dataset for each split in cross validation
accuracy_array2 = np.zeros((no_folds), dtype='float64') # accuracies for the complete dataset for each split in cross validation
i=0 # counter
split_no = 1 # counter for each split
kf = KFold(n_splits=no_folds, shuffle = True, random_state=42) # create split criteria using KFold in Sklearn.model_selection
#train_splits = []
#test_splits = []
###################################
# Train model for 100 epochs
###################################
for train_index, test_index in kf.split(allX):
# split dataset using kf criteria into train and test dataset
X, X_test = allX[train_index], allX[test_index]
Y, Y_test = allY[train_index], allY[test_index]
# create output labels for whole dataset and test dataset
Y = to_categorical(Y, 5)
Y_test = to_categorical(Y_test, 5)
print("train split: " , split_no)
split_no += 1 # iterate split no
# Train the network for 10 epochs per split (shuffles data) -> total no of training epochs=60
model.fit(X, Y, n_epoch=10, run_id='cancer_detector', shuffle=True,
show_metric=True)
#model.save('model_cancer_detector.tflearn')
print("Network trained")
# Calculate accuracies for test dataset and whole dataset in each split run
score = model.evaluate(X_test, Y_test)
score2 = model.evaluate(X, Y)
# populate the accuracy arrays
accuracy_array[i] = score[0] * 100
accuracy_array2[i] = score2[0] * 100
i += 1 # iterate
print("accuracy checked")
print("")
print("accuracy for test dataset: ", accuracy_array) # print accuracy for the test dataset
print("")
print("accuracy for whole dataset: ", accuracy_array2) # print accuracy for the whole dataset
print("done training using 6 fold validation")
# Retrieve the maximum accuracy of the accuracy arrays
max_accuracy = accuracy_array[np.argmax(accuracy_array)]
max_accuracy = round(max_accuracy, 3)
max_accuracy2 = accuracy_array2[np.argmax(accuracy_array2)]
max_accuracy2 = round(max_accuracy2, 3)
print("")
###################################################
## Test the model to predict labels ###############
###################################################
#no_iteration = 100
#kf = KFold(n_splits=no_iteration)
#x_splits = kf.split(allX)
# initiate y_label
y_label = 0
# counters
j = 0
k = 0
c = 0
b = 0
# create Y_true and y_pred np.arrays to save the corresponding label (true label and predicted label) -> labels are shown at the beginning of the program
y_pred = np.zeros((len(allY)), dtype='int32')
y_true = np.zeros((len(allY)), dtype='int32')
# split allX and allY into 90 sections
x_list = np.array_split(allX, 90)
y_list = np.array_split(allY, 90)
i = 0
for j in x_list:
# get the (i)th section from x_list and y_list to x_test and y_test (arrays renew for each j)
x_test = x_list[i]
y_test = y_list[i]
# y_label=predict results for the (i)th section in x_test
y_label = model.predict(x_test)
print("running here")
b = 0 # b is reset in each (j)th iteration
for k in y_label:
y_pred[c] = np.argmax(y_label[b]) # get the index of the maximum probability (prediction) for (b)th array in y_label
y_true[c] = y_test[b] # (b)th element is copied to y_true array
c += 1
b += 1
i += 1
##################################
# Test prints ####################
##################################
print("Prediction finished", c)
print("")
print(len(y_true), " bla bla ", len(y_pred))
print("")
# calculates F1-Score for the whole dataset
print("calculate f1 score")
f1Score = f1_score(y_true, y_pred, average=None)
print(f1Score)
print("")
# calculates Confusion Matrix for the whole dataset
print("calculate confusion matrix")
confusionMatrix = confusion_matrix(y_true, y_pred, labels=[0, 1, 2, 3, 4])
print("confusion Matrix Created")
print(confusionMatrix)
##################################
## Print the Results #############
##################################
print("")
print("")
print ("-----------------------------------------------------------------------------")
print ( " Cancer Tumor detector using Convolutional Neural Networks - 3-Fold cross validation")
print ("Author - Narmada Balasooriya")
print ("-----------------------------------------------------------------------------")
print("")
print("accuracy for the test dataset")
print(accuracy_array)
print("")
print("accuracy for the whole dataset")
print(accuracy_array2)
print("")
print("Maximum accuracy for test dataset: ", max_accuracy, '%')
print("")
print("Maximum accuracy for whole dataset: ", max_accuracy2, '%')
print("")
print("F1 score for the whole dataset")
print(f1Score)
print("")
print("confusion Matrix")
print(confusionMatrix)
print("")
print ("-----------------------------------------------------------------------------")