Predicting Blindness:

An Exploration of Diabetic Retinoempathy and Transfer Learning

Sections

  1. EDA
  2. Preprocessing
  3. Base Line CNN
  4. Tranfer Learning Exploration
  5. Tunning Model
  6. Conclusion
In [0]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import cv2
import os
general_path = "/blindness_2/blind/train_images"

training_data = pd.read_csv('/content/drive/My Drive/blindness 2/train_test_dfs/train.csv')
In [0]:
#### train test split
import pandas as pd
from sklearn.model_selection import train_test_split

training_data = pd.read_csv('/content/drive/My Drive/blindness 2/aptos2019-blindness-detection/train.csv')
training_data.id_code = training_data.id_code + '.png' 
x_train, x_test, y_train, y_test = train_test_split(training_data.id_code, training_data.diagnosis, test_size = .1, stratify=training_data.diagnosis)

training_data = pd.concat([x_train, y_train], axis=1)
test_data = pd.concat([x_test, y_test], axis=1)

del(x_train, x_test, y_train, y_test)### .png is used for file look up in the genorators

training_data.to_csv('/content/drive/My Drive/blindness 2/train_test_dfs/train.csv')
test_data.to_csv('/content/drive/My Drive/blindness 2/train_test_dfs/test.csv')





# training_data=training_data[1:200]  #### used for test purposes

This Chunk of code is Strictly for Colab.

The code below is to speed up processing in colab. Tranfering data into the acutal instance is much better for it decreases ovehead of that google drive creates.

In [2]:
### brining data into the instance ####
are_you_sure = input("are you sure you want to do this? y/n\n")
###### Zipping and UnZipping a file into instance Directory
if are_you_sure == 'y':
  import shutil
  new_path = '/blindness_2/blind.zip'
  current_loc = "/content/drive/My Drive/blindness 2/aptos2019-blindness-detection.zip"
  shutil.copyfile(current_loc, new_path) 

  from zipfile import ZipFile
  zf = ZipFile('/blindness_2/blind.zip', 'r')
  zf.extractall('/blindness_2/blind')
  zf.close()
  print("finshed moving files")
else:
  print('it did not run')
are you sure you want to do this? y/n
y
finshed moving files

Exploratory Data Analysis


  • Class Balance
  • An Example one of the most server cases
  • An Example of the No Dr to the most sever case
  • Changing to Black and White
  • Black and White with Gusian Blur
  • Color with Guasian Blur

Class Balance

In [8]:
class_balance = training_data.diagnosis.value_counts().sort_index()
test_balance = test.diagnosis.value_counts().sort_index()
col_names = ['No DR', 'Mild', 'Moderate','Severe','Proliferative DR']
fig, ax = plt.subplots(figsize = (10,10))
class_balance.index = col_names
test_balance.index = col_names
sns.barplot(x = class_balance.index, y = class_balance, ax=ax, color = 'green')
sns.barplot(x = test_balance.index, y = test_balance, ax=ax, color= 'red')
ax.set_title('Class Balance', size=20)
ax.set_xlabel('Diagnosis')
ax.set_ylabel('Class Counts')
Out[8]:
Text(0, 0.5, 'Class Counts')

Sever Case of DR

In [20]:
image = training_data[training_data.diagnosis == 4].iloc[10]
path = f"{general_path}/{image.id_code}"
fig = plt.figure(figsize=(25, 16))
plt.grid(False)
plt.axis('off')

# fig.add_subplot(1,5)
img= cv2.imread(path)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

img = cv2.resize(img, (256, 256))

plt.imshow(img)
Out[20]:
<matplotlib.image.AxesImage at 0x7f31f0662908>

Least Sever DR to Most Sever DR

In [21]:
#### Shows the basic Iamge for Least to Most Sever

def image_change(severity):
  image = training_data[training_data.diagnosis == severity].iloc[50]
  path = f"{general_path}/{image.id_code}"
  fig = plt.figure(figsize=(25, 16))
  # fig.add_subplot(1,5)
  img= cv2.imread(path)
  img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
  # img=cv2.addWeighted ( img,4, cv2.GaussianBlur( img , (0,0) , 10) ,-4 ,128)
  return img

fig, (ax0, ax1, ax2, ax3, ax4) = plt.subplots(1,5, figsize=(25, 16))
ax0.imshow(image_change(0))
ax0.set_title('No DR', size=10)
ax1.imshow(image_change(1))
ax1.set_title('Mild', size=10)
ax2.imshow(image_change(2))
ax2.set_title('Moderate', size=10)
ax3.imshow(image_change(3))
ax3.set_title('Sever', size=10)
ax4.imshow(image_change(4))
ax4.set_title('Proliferative DR', size=10)
Out[21]:
Text(0.5, 1.0, 'Proliferative DR')
<Figure size 1800x1152 with 0 Axes>
<Figure size 1800x1152 with 0 Axes>
<Figure size 1800x1152 with 0 Axes>
<Figure size 1800x1152 with 0 Axes>
<Figure size 1800x1152 with 0 Axes>

Black a White Rendering

In [22]:
#### Shows the Black and White Images for Least to Most Sever
def image_change(severity):
  image = training_data[training_data.diagnosis == severity].iloc[50]
  path = f"{general_path}/{image.id_code}"
  fig = plt.figure(figsize=(25, 16))
  # fig.add_subplot(1,5)
  img = cv2.imread(path)
  img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
  img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
  # img=cv2.addWeighted ( img,4, cv2.GaussianBlur( img , (0,0) , 10) ,-4 ,128)
  return img

fig, (ax0, ax1, ax2, ax3, ax4) = plt.subplots(1,5, figsize=(25, 16))
ax0.imshow(image_change(0))
ax0.set_title('No DR', size=10)
ax1.imshow(image_change(1))
ax1.set_title('Mild', size=10)
ax2.imshow(image_change(2))
ax2.set_title('Moderate', size=10)
ax3.imshow(image_change(3))
ax3.set_title('Sever', size=10)
ax4.imshow(image_change(4))
ax4.set_title('Proliferative DR', size=10)
Out[22]:
Text(0.5, 1.0, 'Proliferative DR')
<Figure size 1800x1152 with 0 Axes>
<Figure size 1800x1152 with 0 Axes>
<Figure size 1800x1152 with 0 Axes>
<Figure size 1800x1152 with 0 Axes>
<Figure size 1800x1152 with 0 Axes>

Black and White with Gusian Blur

In [23]:
#### Shows the BEN GRAM Method Images for Least to Most Sever
def image_change(severity, IMG_SIZE):
  image = training_data[training_data.diagnosis == severity].iloc[10]
  path = f"{general_path}/{image.id_code}"
  fig = plt.figure(figsize=(25, 16))
  # fig.add_subplot(1,5)
  img = cv2.imread(path)

  img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
  # img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
  img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))
  img = cv2.addWeighted(img,4, cv2.GaussianBlur(img , (0,0) , IMG_SIZE/10) ,-4 ,128) 
  return img

IMG_SIZE = 1000
fig, (ax0, ax1, ax2, ax3, ax4) = plt.subplots(1,5, figsize=(25, 16))
ax0.imshow(image_change(0,IMG_SIZE),cmap = 'gray')
ax0.set_title('No DR', size=10)
ax1.imshow(image_change(1,IMG_SIZE),cmap = 'gray')
ax1.set_title('Mild', size=10)
ax2.imshow(image_change(2,IMG_SIZE),cmap = 'gray')
ax2.set_title('Moderate', size=10)
ax3.imshow(image_change(3,IMG_SIZE),cmap = 'gray')
ax3.set_title('Sever', size=10)
ax4.imshow(image_change(4,IMG_SIZE),cmap = 'gray')
ax4.set_title('Proliferative DR', size=10)
Out[23]:
Text(0.5, 1.0, 'Proliferative DR')
<Figure size 1800x1152 with 0 Axes>
<Figure size 1800x1152 with 0 Axes>
<Figure size 1800x1152 with 0 Axes>
<Figure size 1800x1152 with 0 Axes>
<Figure size 1800x1152 with 0 Axes>

Color with Gusian Blur

In [24]:
#### Shows the BEN GRAM Method Images for Least to Most Sever
def image_change(severity, IMG_SIZE):
  image = training_data[training_data.diagnosis == severity].iloc[50]
  path = f"{general_path}/{image.id_code}"
  fig = plt.figure(figsize=(25, 16))
  img = cv2.imread(path)
  img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
  # img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
  img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))
  img = cv2.addWeighted(img,4, cv2.GaussianBlur(img , (0,0) , IMG_SIZE/10) ,-4 ,128) 
  return img

IMG_SIZE = 500
fig, (ax0, ax1, ax2, ax3, ax4) = plt.subplots(1,5, figsize=(25, 16))
ax0.imshow(image_change(0,IMG_SIZE))
ax0.set_title('No DR', size=10)
ax1.imshow(image_change(1,IMG_SIZE))
ax1.set_title('Mild', size=10)
ax2.imshow(image_change(2,IMG_SIZE))
ax2.set_title('Moderate', size=10)
ax3.imshow(image_change(3,IMG_SIZE))
ax3.set_title('Sever', size=10)
ax4.imshow(image_change(4,IMG_SIZE))
ax4.set_title('Proliferative DR', size=10)
Out[24]:
Text(0.5, 1.0, 'Proliferative DR')
<Figure size 1800x1152 with 0 Axes>
<Figure size 1800x1152 with 0 Axes>
<Figure size 1800x1152 with 0 Axes>
<Figure size 1800x1152 with 0 Axes>
<Figure size 1800x1152 with 0 Axes>

Preprocessing Pipeline


  • Genorators with Image Preprocessing
  • An Example of the Genorators Effect

Parameter that will set through basic CNN and Transfer Learning

In [0]:
BATCH_SIZE = 10
IMG_SIZE = 246
train_dir = "/blindness_2/blind/train_images"
train_df  = training_data
train_df.diagnosis = train_df.diagnosis.astype(str)
image_shift = .05
multiclass = True
epochs = 50
weight_classes = True
unfrezz_some_layers = True
trainable_layers = 5

###### Coverts to Binnary Classification
training_data = pd.read_csv('/content/drive/My Drive/blindness 2/aptos2019-blindness-detection/train.csv')
training_data.id_code = training_data.id_code + '.png'
if multiclass == False:
  training_data.diagnosis = training_data.diagnosis.apply(lambda x: 0 if x == '0' else '1' )
  classes = 2
  class_mode = 'binnary'
  loss = 'binary_crossentropy'
else:
  classes = 5
  class_mode = 'categorical'
  loss = 'categorical_crossentropy'

if weight_classes == True:
  from sklearn.utils import class_weight
  weights = class_weight.compute_class_weight('balanced', np.unique(training_data.diagnosis), training_data.diagnosis.sort_values())
  class_weight = {i: weight for i, weight in enumerate(weights)}
  del(weights)
else:
  class_weight = {i: 1 for i in range(0,6)}

Genorators with Preprocessing

Items

  • image_processesor : preprocessing the image while adding Color Gusian Blurr
  • ImageDataGenerator: Genrator that use df to look up files and add Nosie
  • train_gen/val_gen: genorators for training and testing
In [30]:
#### Genorators
import tensorflow as tf 
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import cv2



#### Preprocessing of the Image ####
def image_processesor(img):
  img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
  # img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
  img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))
  img = cv2.addWeighted(img,4, cv2.GaussianBlur(img , (0,0) , IMG_SIZE/10) ,-4 ,128) 
  return img


datagen_train = ImageDataGenerator(
    validation_split = .10,
    horizontal_flip = True,
    vertical_flip = True,
    preprocessing_function = image_processesor,
    zoom_range = .1,
    rotation_range = 360,
    rescale= 1 / 255., ### rescales for pil
    height_shift_range=[-image_shift, image_shift],
    width_shift_range=[-image_shift,image_shift]
)

train_gen = datagen_train.flow_from_dataframe(
    train_df,
    directory = train_dir,
    x_col =  'id_code',
    y_col = 'diagnosis',
    class_mode = class_mode,
    target_size=(IMG_SIZE, IMG_SIZE), 
    shuffle = True, 
    batch_size = BATCH_SIZE,
    subset = 'training'
    )

val_gen = datagen_train.flow_from_dataframe(
    train_df,
    directory = train_dir,
    x_col =  'id_code',
    y_col = 'diagnosis',
    class_mode = class_mode, 
    shuffle = True, 
    target_size=(IMG_SIZE, IMG_SIZE), 
    batch_size = BATCH_SIZE,
    subset = 'validation'
    )
Found 3296 validated image filenames belonging to 5 classes.
Found 366 validated image filenames belonging to 5 classes.

Below shows the effect of adding the image genorator

In [29]:
#### Genorator Check   ####### 
# must chnage class mode above to 'input' in order to run this cell!
#
from google.colab.patches import cv2_imshow

testing=train_gen.next()[1]
image_2 = testing[9]
# plt.imshow(image_2)




fig, (ax0, ax1, ax2, ax3, ax4) = plt.subplots(1,5, figsize=(25, 16))
ax0.imshow(cv2.cvtColor(testing[1], cv2.COLOR_RGB2BGR))
ax1.imshow(cv2.cvtColor(testing[2], cv2.COLOR_RGB2BGR))
ax2.imshow(cv2.cvtColor(testing[3], cv2.COLOR_RGB2BGR))
ax3.imshow(cv2.cvtColor(testing[4], cv2.COLOR_RGB2BGR))
ax4.imshow(cv2.cvtColor(testing[5], cv2.COLOR_RGB2BGR))
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Out[29]:
<matplotlib.image.AxesImage at 0x7f30e4260f98>

CNN Design and Preprocessing Parameters


CNN Build

In [32]:
################## importing Dependencies #################
import tensorflow as tf

############################ Kerras ###########################
from tensorflow.keras.models import Sequential   #### still a squence of layers and not a graph
from tensorflow.keras.layers import Convolution2D, MaxPooling2D, Flatten, Dense

#### Initializing CNN
classifier = Sequential()

### step one 

### convolution Layer
classifier.add(Convolution2D(32, (3, 3) , input_shape=(IMG_SIZE, IMG_SIZE, 3), activation='relu'))

#### Max Pooling Layer
classifier.add(MaxPooling2D(pool_size= (2,2)))

### Flattening layer 
classifier.add(Flatten())

#### fully connected layer
classifier.add(Dense(units=128, activation='relu')) #### outputdim = nuerons

#### output layer
classifier.add(Dense(units=5, activation = 'softmax')) #### outputdim = nuerons

#### compile
classifier.compile(optimizer='adam', loss = loss, metrics= ['accuracy',loss])
classifier.summary()
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
conv2d (Conv2D)              (None, 244, 244, 32)      896       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 122, 122, 32)      0         
_________________________________________________________________
flatten (Flatten)            (None, 476288)            0         
_________________________________________________________________
dense (Dense)                (None, 128)               60964992  
_________________________________________________________________
dense_1 (Dense)              (None, 5)                 645       
=================================================================
Total params: 60,966,533
Trainable params: 60,966,533
Non-trainable params: 0
_________________________________________________________________

Fitting CNN Model

In [0]:
classifier.fit_generator(
    train_gen,
    validation_data = val_gen,
    epochs = epochs,
    steps_per_epoch = train_gen.samples // BATCH_SIZE,
    validation_steps = val_gen.samples // BATCH_SIZE,
    use_multiprocessing=True,
    class_weight = class_weight
    )

Graphing CNN

In [71]:
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

cnn = pd.read_csv('/content/drive/My Drive/blindness 2/transfer_learning_dfs/cnnhist.csv')

fig, (ax1,ax2) = plt.subplots(2,1, figsize=(20,20))


df = cnn
sns.lineplot(x=df.index, y=df.accuracy, ax=ax1, color='blue')
sns.lineplot(x=df.index, y=df.val_accuracy, ax=ax1, color='red')

sns.lineplot(x=df.index, y=df.loss, ax=ax2, color='blue')
sns.lineplot(x=df.index, y=df.val_loss, ax=ax2, color='red')
Out[71]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f30cf714860>

Transfer Learning


  • fucntions for Transfer Learning
  • ResNet152V2 (Done)
  • VGG19 (Done)
  • DenseNet201
  • InceptionResNet
  • EfficientNetB7

Transfer Learning Models
Note: this trainng process was ran across multiple instances at the same time and therfore the output from '.fit' is not avaiable

Fucntions for Transfer Learning

In [0]:
import tensorflow as tf

############################ Kerras ###########################
from tensorflow.keras.models import Sequential   #### still a squence of layers and not a graph
from tensorflow.keras.layers import Convolution2D, MaxPooling2D, Flatten, Dense, GlobalAveragePooling2D, BatchNormalization, Dropout, ReLU
from tensorflow.keras.metrics import Recall, Precision

###### metrics 
recall = Recall(name='recall')
precision = Recall(name='precision')

def get_transfer_block(tranfer_learning_model, weights = 'imagenet', img_size = 246, unfrezz_some_layer=True, trainable = 5):
  """
  Description: Prepares a transfer learning block from keras\n
  Parameters:
  \t tranfer_learning_model: keras transfer learning block
  \t weights = pretrained weights to use
  \t img_size = the size of the training image
  \t unfrezz_some_layer = make some layers from transfer model trainable
  \t trainable = if unfreez is true how many layer are trainble on the tranfer model
  """
  ### reading in the tranfer learning model
  tranfer_learning_model = tranfer_learning_model(
      include_top=False,
      weights= weights,
      input_shape=(img_size, img_size, 3),

  )
  ### For Setting the number of trainable parameter
  for layer in tranfer_learning_model.layers:
      layer.trainable = False

  if unfrezz_some_layers == unfrezz_some_layer:
      trainable = trainable_layers
      for i in range(-trainable, 0):
        tranfer_learning_model.layers[i].trainable = True
  return tranfer_learning_model



def transfer_model(tranfer_learning_block, globalpooling = True, relu_1 = 1000, relu_2 = 500):
  """
  Description: Takes a keras transfer learning model and builds out the back end for image training.\n
  Parameter:
  \t  globalpooling: whether to use gloabaaverage pooling to bring into dense layer, if set to false flatten is used
  \t relu_1: how many nuerons to use in the first dense layer
  \t relu_2: how many nuerons to use in the second dense layer
  """
    #### Initializing CNN
  classifier = Sequential()
  classifier.add(tranfer_learning_block)  ### adding in the tranfer learning block
  ### What to use to bring into dense layer Globalpooling for skip based strcutures all others flatten
  if globalpooling == True:
    classifier.add(GlobalAveragePooling2D())
  else:
    classifier.add(Flatten())
  ## block one
  classifier.add(BatchNormalization())
  classifier.add(Dropout(0.4))
  classifier.add(Dense(relu_1, activation='relu'))
  ## clock two
  classifier.add(BatchNormalization())
  classifier.add(Dropout(0.4))
  classifier.add(Dense(relu_2, activation='relu'))
  # output layer and compile
  classifier.add(Dense(5, activation="softmax"))
  classifier.compile(optimizer='adam', loss='categorical_crossentropy', metrics= ['accuracy',loss, recall, precision])
  return classifier

ResNet152V2

  • Building ResNet
  • Fitting Resent
  • Graphs for Resnet
In [34]:
from tensorflow.keras.applications import ResNet152V2
classifier = transfer_model(get_transfer_block(ResNet152V2, unfrezz_some_layer=True, trainable=5))
classifier.summary()
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet152v2_weights_tf_dim_ordering_tf_kernels_notop.h5
234553344/234545216 [==============================] - 8s 0us/step
Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
resnet152v2 (Model)          (None, 8, 8, 2048)        58331648  
_________________________________________________________________
global_average_pooling2d (Gl (None, 2048)              0         
_________________________________________________________________
batch_normalization (BatchNo (None, 2048)              8192      
_________________________________________________________________
dropout (Dropout)            (None, 2048)              0         
_________________________________________________________________
dense_2 (Dense)              (None, 1000)              2049000   
_________________________________________________________________
batch_normalization_1 (Batch (None, 1000)              4000      
_________________________________________________________________
dropout_1 (Dropout)          (None, 1000)              0         
_________________________________________________________________
dense_3 (Dense)              (None, 500)               500500    
_________________________________________________________________
dense_4 (Dense)              (None, 5)                 2505      
=================================================================
Total params: 60,895,845
Trainable params: 3,612,821
Non-trainable params: 57,283,024
_________________________________________________________________
In [0]:
hist = classifier.fit(
    train_gen,
    validation_data = val_gen,
    epochs = 50,
    steps_per_epoch = train_gen.samples // BATCH_SIZE,
    validation_steps = val_gen.samples // BATCH_SIZE,
    use_multiprocessing=True,
    class_weight = class_weight)
In [72]:
# res_df = pd.DataFrame(hist.history) #### putting history into Dataframe
# res_df.to_csv("/content/drive/My Drive/blindness 2/transfer_learning_dfs/res_hist.csv")#### save the file


import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

res = pd.read_csv('/content/drive/My Drive/blindness 2/transfer_learning_dfs/res_hist.csv')

fig, (ax1,ax2) = plt.subplots(2,1, figsize=(20,20))


df = res
sns.lineplot(x=df.index, y=df.accuracy, ax=ax1, color='blue')
sns.lineplot(x=df.index, y=df.val_accuracy, ax=ax1, color='red')

sns.lineplot(x=df.index, y=df.loss, ax=ax2, color='blue')
sns.lineplot(x=df.index, y=df.val_loss, ax=ax2, color='red')
Out[72]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f30cf654668>

VGG19

  • Building VGG
  • Fitting VGG
  • Graphs for VGG
In [36]:
from tensorflow.keras.applications import VGG19
classifier = transfer_model(get_transfer_block(VGG19, unfrezz_some_layer=False), globalpooling=False)
classifier.summary()
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg19/vgg19_weights_tf_dim_ordering_tf_kernels_notop.h5
80142336/80134624 [==============================] - 2s 0us/step
Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
vgg19 (Model)                (None, 7, 7, 512)         20024384  
_________________________________________________________________
flatten_1 (Flatten)          (None, 25088)             0         
_________________________________________________________________
batch_normalization_2 (Batch (None, 25088)             100352    
_________________________________________________________________
dropout_2 (Dropout)          (None, 25088)             0         
_________________________________________________________________
dense_5 (Dense)              (None, 1000)              25089000  
_________________________________________________________________
batch_normalization_3 (Batch (None, 1000)              4000      
_________________________________________________________________
dropout_3 (Dropout)          (None, 1000)              0         
_________________________________________________________________
dense_6 (Dense)              (None, 500)               500500    
_________________________________________________________________
dense_7 (Dense)              (None, 5)                 2505      
=================================================================
Total params: 45,720,741
Trainable params: 25,644,181
Non-trainable params: 20,076,560
_________________________________________________________________
In [0]:
hist = classifier.fit(
    train_gen,
    validation_data = val_gen,
    epochs = 50,
    steps_per_epoch = train_gen.samples // BATCH_SIZE,
    validation_steps = val_gen.samples // BATCH_SIZE,
    use_multiprocessing=True,
    class_weight = class_weight)
In [73]:
# df = pd.DataFrame(hist.history) #### putting history into Dataframe
# df.to_csv("/content/drive/My Drive/blindness 2/transfer_learning_dfs/vgg_hist.csv")#### save the file

vgg = pd.read_csv('/content/drive/My Drive/blindness 2/transfer_learning_dfs/vgg_hist.csv')

fig, (ax1,ax2) = plt.subplots(2,1, figsize=(20,20))


df = vgg
sns.lineplot(x=df.index, y=df.accuracy, ax=ax1, color='blue')
sns.lineplot(x=df.index, y=df.val_accuracy, ax=ax1, color='red')

sns.lineplot(x=df.index, y=df.loss, ax=ax2, color='blue')
sns.lineplot(x=df.index, y=df.val_loss, ax=ax2, color='red')


### We'll Graph latter
Out[73]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f30cf589f28>

DenseNet

dav

  • Building DenseNet
  • Fitting DenseNet
  • Graphs for DenseNet
In [38]:
from tensorflow.keras.applications import DenseNet201
classifier = transfer_model(get_transfer_block(DenseNet201, unfrezz_some_layer=True), globalpooling=True)
classifier.summary()
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/densenet/densenet201_weights_tf_dim_ordering_tf_kernels_notop.h5
74842112/74836368 [==============================] - 1s 0us/step
Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
densenet201 (Model)          (None, 7, 7, 1920)        18321984  
_________________________________________________________________
global_average_pooling2d_1 ( (None, 1920)              0         
_________________________________________________________________
batch_normalization_4 (Batch (None, 1920)              7680      
_________________________________________________________________
dropout_4 (Dropout)          (None, 1920)              0         
_________________________________________________________________
dense_8 (Dense)              (None, 1000)              1921000   
_________________________________________________________________
batch_normalization_5 (Batch (None, 1000)              4000      
_________________________________________________________________
dropout_5 (Dropout)          (None, 1000)              0         
_________________________________________________________________
dense_9 (Dense)              (None, 500)               500500    
_________________________________________________________________
dense_10 (Dense)             (None, 5)                 2505      
=================================================================
Total params: 20,757,669
Trainable params: 2,470,549
Non-trainable params: 18,287,120
_________________________________________________________________
In [0]:
hist = classifier.fit(
    train_gen,
    validation_data = val_gen,
    epochs = 25,
    steps_per_epoch = train_gen.samples // BATCH_SIZE,
    validation_steps = val_gen.samples // BATCH_SIZE,
    use_multiprocessing=True,
    class_weight = class_weight)
In [74]:
# df = pd.DataFrame(hist.history) #### putting history into Dataframe
# df.to_csv("/content/drive/My Drive/blindness 2/transfer_learning_dfs/dense_hist.csv")#### save the file

den = pd.read_csv('/content/drive/My Drive/blindness 2/transfer_learning_dfs/dense_hist.csv')

fig, (ax1,ax2) = plt.subplots(2,1, figsize=(20,20))


df = den
sns.lineplot(x=df.index, y=df.accuracy, ax=ax1, color='blue')
sns.lineplot(x=df.index, y=df.val_accuracy, ax=ax1, color='red')

sns.lineplot(x=df.index, y=df.loss, ax=ax2, color='blue')
sns.lineplot(x=df.index, y=df.val_loss, ax=ax2, color='red')
Out[74]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f30cf4d6048>

EfficientNet

  • Building DenseNet
  • Fitting DenseNet
  • Graphs for DenseNet
In [0]:
!pip install efficientnet
In [42]:
import efficientnet.tfkeras as efn 

efficientnet = efn.EfficientNetB7

classifier = transfer_model(get_transfer_block(efficientnet, unfrezz_some_layer=True, trainable=5), globalpooling=True)
classifier.summary()
Downloading data from https://github.com/Callidior/keras-applications/releases/download/efficientnet/efficientnet-b7_weights_tf_dim_ordering_tf_kernels_autoaugment_notop.h5
258441216/258434480 [==============================] - 3s 0us/step
Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
efficientnet-b7 (Model)      (None, 8, 8, 2560)        64097680  
_________________________________________________________________
global_average_pooling2d_2 ( (None, 2560)              0         
_________________________________________________________________
batch_normalization_6 (Batch (None, 2560)              10240     
_________________________________________________________________
dropout_6 (Dropout)          (None, 2560)              0         
_________________________________________________________________
dense_11 (Dense)             (None, 1000)              2561000   
_________________________________________________________________
batch_normalization_7 (Batch (None, 1000)              4000      
_________________________________________________________________
dropout_7 (Dropout)          (None, 1000)              0         
_________________________________________________________________
dense_12 (Dense)             (None, 500)               500500    
_________________________________________________________________
dense_13 (Dense)             (None, 5)                 2505      
=================================================================
Total params: 67,175,925
Trainable params: 4,714,645
Non-trainable params: 62,461,280
_________________________________________________________________
In [0]:
hist_efficeint = classifier.fit(
    train_gen,
    validation_data = val_gen,
    epochs = 50,
    steps_per_epoch = train_gen.samples // BATCH_SIZE,
    validation_steps = val_gen.samples // BATCH_SIZE,
    use_multiprocessing=True,
    class_weight = class_weight)
In [75]:
# df = pd.DataFrame(hist.history) #### putting history into Dataframe
# df.to_csv("/content/drive/My Drive/blindness 2/transfer_learning_dfs/eff_hist.csv")#### save the file


eff = pd.read_csv('/content/drive/My Drive/blindness 2/transfer_learning_dfs/eff_hist.csv')

fig, (ax1,ax2) = plt.subplots(2,1, figsize=(20,20))


df = eff
sns.lineplot(x=df.index, y=df.accuracy, ax=ax1, color='blue')
sns.lineplot(x=df.index, y=df.val_accuracy, ax=ax1, color='red')

sns.lineplot(x=df.index, y=df.loss, ax=ax2, color='blue')
sns.lineplot(x=df.index, y=df.val_loss, ax=ax2, color='red')
Out[75]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f30cf410668>

InceptionResNetV2

  • Building DenseNet
  • Fitting DenseNet
  • Graphs for DenseNet
In [44]:
from tensorflow.keras.applications import InceptionResNetV2
classifier = transfer_model(get_transfer_block(InceptionResNetV2, unfrezz_some_layer=True), globalpooling=True)
classifier.summary()
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/inception_resnet_v2/inception_resnet_v2_weights_tf_dim_ordering_tf_kernels_notop.h5
219062272/219055592 [==============================] - 4s 0us/step
Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
inception_resnet_v2 (Model)  (None, 6, 6, 1536)        54336736  
_________________________________________________________________
global_average_pooling2d_3 ( (None, 1536)              0         
_________________________________________________________________
batch_normalization_211 (Bat (None, 1536)              6144      
_________________________________________________________________
dropout_8 (Dropout)          (None, 1536)              0         
_________________________________________________________________
dense_14 (Dense)             (None, 1000)              1537000   
_________________________________________________________________
batch_normalization_212 (Bat (None, 1000)              4000      
_________________________________________________________________
dropout_9 (Dropout)          (None, 1000)              0         
_________________________________________________________________
dense_15 (Dense)             (None, 500)               500500    
_________________________________________________________________
dense_16 (Dense)             (None, 5)                 2505      
=================================================================
Total params: 56,386,885
Trainable params: 6,175,413
Non-trainable params: 50,211,472
_________________________________________________________________
In [0]:
hist = classifier.fit(
    train_gen,
    validation_data = val_gen,
    epochs = 25,
    steps_per_epoch = train_gen.samples // BATCH_SIZE,
    validation_steps = val_gen.samples // BATCH_SIZE,
    use_multiprocessing=True,
    class_weight = class_weight)
In [76]:
# df = pd.DataFrame(hist.history) #### putting history into Dataframe
# df.to_csv("/content/drive/My Drive/blindness 2/transfer_learning_dfs/inception_hist.csv")#### save the file

incep = pd.read_csv('/content/drive/My Drive/blindness 2/transfer_learning_dfs/inception_hist.csv')

fig, (ax1,ax2) = plt.subplots(2,1, figsize=(20,20))


df = incep
sns.lineplot(x=df.index, y=df.accuracy, ax=ax1, color='blue')
sns.lineplot(x=df.index, y=df.val_accuracy, ax=ax1, color='red')

sns.lineplot(x=df.index, y=df.loss, ax=ax2, color='blue')
sns.lineplot(x=df.index, y=df.val_loss, ax=ax2, color='red')
Out[76]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f30cf359940>

Xception

  • Building Xception
  • Fitting Xception
  • Graphs for Xception
In [46]:
from tensorflow.keras.applications import Xception
classifier = transfer_model(get_transfer_block(Xception, unfrezz_some_layer=True), globalpooling=True)
classifier.summary()
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/xception/xception_weights_tf_dim_ordering_tf_kernels_notop.h5
83689472/83683744 [==============================] - 1s 0us/step
Model: "sequential_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
xception (Model)             (None, 8, 8, 2048)        20861480  
_________________________________________________________________
global_average_pooling2d_4 ( (None, 2048)              0         
_________________________________________________________________
batch_normalization_217 (Bat (None, 2048)              8192      
_________________________________________________________________
dropout_10 (Dropout)         (None, 2048)              0         
_________________________________________________________________
dense_17 (Dense)             (None, 1000)              2049000   
_________________________________________________________________
batch_normalization_218 (Bat (None, 1000)              4000      
_________________________________________________________________
dropout_11 (Dropout)         (None, 1000)              0         
_________________________________________________________________
dense_18 (Dense)             (None, 500)               500500    
_________________________________________________________________
dense_19 (Dense)             (None, 5)                 2505      
=================================================================
Total params: 23,425,677
Trainable params: 5,724,821
Non-trainable params: 17,700,856
_________________________________________________________________
In [0]:
hist = classifier.fit(
    train_gen,
    validation_data = val_gen,
    epochs = 25,
    steps_per_epoch = train_gen.samples // BATCH_SIZE,
    validation_steps = val_gen.samples // BATCH_SIZE,
    use_multiprocessing=True,
    class_weight = class_weight)
In [77]:
# df = pd.DataFrame(hist.history) #### putting history into Dataframe
# df.to_csv("/content/drive/My Drive/blindness 2/transfer_learning_dfs/xcept_hist.csv")#### save the file



xcept = pd.read_csv('/content/drive/My Drive/blindness 2/transfer_learning_dfs/xcept_hist.csv')

fig, (ax1,ax2) = plt.subplots(2,1, figsize=(20,20))


df = xcept
sns.lineplot(x=df.index, y=df.accuracy, ax=ax1, color='blue')
sns.lineplot(x=df.index, y=df.val_accuracy, ax=ax1, color='red')

sns.lineplot(x=df.index, y=df.loss, ax=ax2, color='blue')
sns.lineplot(x=df.index, y=df.val_loss, ax=ax2, color='red')
Out[77]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f30cf29f828>

Comparing the Models

In [0]:
### creating a master hist
models = [eff, den, xcept, vgg, res,incep, cnn]
names = ['eff', 'den', 'xcept', 'vgg', 'res','incep', 'cnn']
i=0

new_list = []

for i, model in enumerate(models):
  cols = names[i] + '_' + model.columns
  model.columns = cols
  new_list.append(model)


master_hist = pd.concat([new_list[0], new_list[1], new_list[2], new_list[3], new_list[4],new_list[5],new_list[6]], axis=1)
In [0]:
!pip install matplotlib-label-lines  ### package for getting inline labels
In [82]:
# Graphing the validation history of each model
from labellines import labelLine, labelLines
import seaborn as sns
names = ['eff', 'den', 'xcept', 'vgg', 'res','incep', 'cnn']

fig, (ax1, ax2) = plt.subplots(2,1, figsize=(20,20))

sns.lineplot(master_hist.index, master_hist.eff_val_accuracy, ax= ax1, label='Efficient Net')
sns.lineplot(master_hist.index, master_hist.den_val_accuracy, ax= ax1, label='DenseNet')
sns.lineplot(master_hist.index, master_hist.xcept_val_accuracy, ax= ax1, label='XceptionNet')
sns.lineplot(master_hist.index, master_hist.vgg_val_accuracy, ax= ax1, label='VGG')
sns.lineplot(master_hist.index, master_hist.res_val_accuracy, ax= ax1, label='ResNet')
sns.lineplot(master_hist.index, master_hist.incep_accuracy, ax= ax1, label='InceptionNet')
sns.lineplot(master_hist.index, master_hist.cnn_val_accuracy, ax= ax1, label='Baseline CNN')


ax1.legend(title = "Transfer Learning Methods", fontsize = 'large', title_fontsize ='large')
ax1.set_xlabel('Epochs', size=20)
ax1.set_ylabel('Validation Accuracey', size=20)
labelLines(ax1.get_lines())


sns.lineplot(master_hist.index, master_hist.eff_val_loss, ax= ax2, label='Efficient Net')
sns.lineplot(master_hist.index, master_hist.den_val_loss, ax= ax2, label='DenseNet')
sns.lineplot(master_hist.index, master_hist.xcept_val_loss, ax= ax2,label='XceptionNet')
sns.lineplot(master_hist.index, master_hist.vgg_val_loss, ax= ax2,label='VGG')
sns.lineplot(master_hist.index, master_hist.res_val_loss, ax= ax2,label='ResNet')
sns.lineplot(master_hist.index, master_hist.incep_loss, ax= ax2, label='InceptionNet')
sns.lineplot(master_hist.index, master_hist.cnn_val_loss, ax= ax2, label='Baseline CNN')

ax2.legend(title = "Transfer Learning Methods", fontsize = 'large', title_fontsize ='large')

ax2.set_xlabel('Epochs', size=20)
ax2.set_ylabel('Validation Loss', size=20)

labelLines(ax2.get_lines())

fig.suptitle("Transfer Learning Comparison", size=30)
# plt.tight_layout()
fig.subplots_adjust(top=.95)
# fig.tight_layout()

Finale Model


the code below was adjusted and re-adjusted several times to findout what was the best model. The model currently in use was found to be the best.

  • Model tunning
  • Analyzing final Results

Best Model Tunning

We used a 3 block design in order to find the best model. Each block was commented out one by one and re-ran. Also the learning rate was figured out to be best at .0001 (this was aquired through trial and error). Also each dense layer had its activation function switched out to either relu, selu or elu while reducing blocks.

In [0]:
#### Loading in the Trnafer Learning Model
from tensorflow.keras.applications import DenseNet121

DenseNet201 = DenseNet121(
  include_top=False,
  weights= 'imagenet',
  input_shape=(246, 246, 3),

)
### Ensuring all Layers are Trainable 
# for i in range(0,len(DenseNet201.layers)):
#   DenseNet201.layers[i].trainable = True
In [11]:
### building Tranfer model with front end
import tensorflow as tf
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GlobalAveragePooling2D, BatchNormalization, Dense, GaussianNoise, Dropout

adam=Adam(lr=0.0001, amsgrad=True)


### Model
classifier = Sequential()

classifier.add(DenseNet201)
classifier.add(GlobalAveragePooling2D())

### block one
classifier.add(BatchNormalization())
classifier.add(Dropout(0.4))
classifier.add(Dense(500, activation='elu'))

### block two
# classifier.add(BatchNormalization())
# classifier.add(Dropout(0.3))
# classifier.add(Dense(100, activation='elu'))
### block three
# classifier.add(BatchNormalization())
# classifier.add(Dropout(0.2))
# classifier.add(Dense(32, activation='elu'))

classifier.add(Dense(5, activation="softmax"))
classifier.compile(optimizer = adam, loss='categorical_crossentropy', metrics= ['accuracy'])

classifier.summary()
Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
densenet121 (Model)          (None, 7, 7, 1024)        7037504   
_________________________________________________________________
global_average_pooling2d_1 ( (None, 1024)              0         
_________________________________________________________________
batch_normalization_3 (Batch (None, 1024)              4096      
_________________________________________________________________
dropout_3 (Dropout)          (None, 1024)              0         
_________________________________________________________________
dense_4 (Dense)              (None, 500)               512500    
_________________________________________________________________
batch_normalization_4 (Batch (None, 500)               2000      
_________________________________________________________________
dropout_4 (Dropout)          (None, 500)               0         
_________________________________________________________________
dense_5 (Dense)              (None, 100)               50100     
_________________________________________________________________
batch_normalization_5 (Batch (None, 100)               400       
_________________________________________________________________
dropout_5 (Dropout)          (None, 100)               0         
_________________________________________________________________
dense_6 (Dense)              (None, 32)                3232      
_________________________________________________________________
dense_7 (Dense)              (None, 5)                 165       
=================================================================
Total params: 7,609,997
Trainable params: 7,523,101
Non-trainable params: 86,896
_________________________________________________________________

Fitting the Final model

In [0]:
from tensorflow.keras.callbacks import EarlyStopping, TensorBoard,  ModelCheckpoint
from tensorflow.keras.optimizers import Adam
#### Callbacks #####
early_stoping = EarlyStopping(monitor='val_loss', mode='min', patience=10)
check_point = ModelCheckpoint(filepath='/content/drive/My Drive/blindness 2/models/model.rd_32_3blk_elu_1blk_EP_{epoch:02d}-Loss_{val_loss:.2f}.h5', save_best_only=True)
tesnor_board=TensorBoard(log_dir='./logs_1', histogram_freq=0,  write_graph=True, update_freq='batch')

### fitting the model ###
hist = classifier.fit(
    train_gen,
    validation_data = val_gen,
    epochs = 25,
    steps_per_epoch = train_gen.samples // BATCH_SIZE,
    validation_steps = val_gen.samples // BATCH_SIZE,
    use_multiprocessing=True,
    class_weight = class_weight,
    callbacks= [check_point, tesnor_board])

Graphing Final Model

In [81]:
import pandas as pd
### finale model history
# df = pd.DataFrame(hist.history) #### putting history into Dataframe
# df.to_csv("/content/drive/My Drive/blindness 2/transfer_learning_dfs/elu_1blk_bloakc.csv")#### save the file

dense_4 = pd.read_csv('/content/drive/My Drive/blindness 2/transfer_learning_dfs/finale_elu_1blk_decrese_250.csv')

fig, (ax1,ax2) = plt.subplots(2,1, figsize=(20,20))


df = dense_4
sns.lineplot(x=df.index, y=df.accuracy, ax=ax1, color='blue')
sns.lineplot(x=df.index, y=df.val_accuracy, ax=ax1, color='red')

sns.lineplot(x=df.index, y=df.loss, ax=ax2, color='blue')
sns.lineplot(x=df.index, y=df.val_loss, ax=ax2, color='red')
Out[81]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f30cf009a58>

to run tensorboard this cell below must be ran befor fitting the model above

In [13]:
%load_ext tensorboard
%tensorboard --logdir logs_1

Analysis of Results

  • Test Genorator
  • Results
In [3]:
#### creating a test genorator in order to check how our model peformed on the test data
import tensorflow as tf 
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import cv2
from sklearn.metrics import ConfusionMatrixDisplay, confusion_matrix, classification_report
from tensorflow.keras.models import load_model

test = pd.read_csv('/content/drive/My Drive/blindness 2/train_test_dfs/test.csv')
test.diagnosis = test.diagnosis.astype(str)

best = load_model("/content/drive/My Drive/blindness 2/models/model.rd_45_elu_1blk_EP_11-Loss_0.49.h5")
#### Preprocessing of the Image ####

BATCH_SIZE = 10
IMG_SIZE = 246
train_df  = training_data
train_df.diagnosis = train_df.diagnosis.astype(str)
image_shift = .05
multiclass = True
epochs = 50
weight_classes = True
unfrezz_some_layers = True
trainable_layers = 5

def image_processesor(img):
  img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
  img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))
  img = cv2.addWeighted(img,4, cv2.GaussianBlur(img , (0,0) , IMG_SIZE/10) ,-4 ,128) 
  return img


### Directories 

train_dir = "/blindness_2/blind/train_images"


#### genorators
test_datagen = ImageDataGenerator(
    preprocessing_function = image_processesor,
    rescale= 1 / 255., ### rescales for pil
)

test_generator = test_datagen.flow_from_dataframe(
    test,
    directory = train_dir,
    x_col =  'id_code',
    y_col = 'diagnosis',
    class_mode = 'categorical',
    target_size=(246, 246), 
    shuffle = False, 
    batch_size = BATCH_SIZE,
    )


test_generator.reset() #### its recomened to reset the genorator before trying to make a prediction 
pred = best.predict(test_generator, steps=367/BATCH_SIZE, verbose=1)
Found 367 validated image filenames belonging to 5 classes.
37/36 [==============================] - 41s 1s/step
In [5]:
##### creating a confusion matrixs and classification report

import numpy as np
actual = test.diagnosis.astype(int).values
pred_2 = np.argmax(pred,axis=1)

print(confusion_matrix(actual,pred_2))

print(classification_report(actual,pred_2))
[[179   2   0   0   0]
 [  3  31   3   0   0]
 [  2  17  64  17   0]
 [  0   1   1  17   0]
 [  1   3   5   5  16]]
              precision    recall  f1-score   support

           0       0.97      0.99      0.98       181
           1       0.57      0.84      0.68        37
           2       0.88      0.64      0.74       100
           3       0.44      0.89      0.59        19
           4       1.00      0.53      0.70        30

    accuracy                           0.84       367
   macro avg       0.77      0.78      0.74       367
weighted avg       0.88      0.84      0.84       367

In [6]:
from sklearn.metrics import cohen_kappa_score
#### checking cohens kappa

cohen_kappa_score(actual, pred_2, weights = 'quadratic')
Out[6]:
0.8958874853256269