Note: If the author has requested for "Expert Guidance" and you can help, please start a New Topic in the "Discussions" Tab

Vaibhav Mali's other Models Reports

Major Concepts


Sign-Up/Login to access Several ML Models and also Deploy & Monetize your own ML solutions for free

Models Home » Domain Usecases » Health Care and Pharmaceuticals » Classification of gastrointestinal abnormalities by endoscopic imaging with deep learning

Classification of gastrointestinal abnormalities by endoscopic imaging with deep learning

Models Status

Model Overview

Gastrointestinal Endoscopy deals with the endoscopic examination, therapy or surgery of the gastrointestinal tract. Gastrointestinal Tract generally refers to the digestive structures stretching from the mouth to anus, but does not include the accessory glandular organs such as liver, billary tract, panceras. The gastrointestinal tract is categorized into various sections, these include:

There are 8 types of Gastrointestinal Disease are:
1)dyed-lifted-polyps : 0
2)dyed-resection-margins : 1
3)esophagitis : 2
4)normal-cecum : 3
5)normal-pylorus : 4
6)normal-z-line : 5
7)polyps : 6
8)ulcerative-colitis : 7

Dataset Link:-

Required Libraries
Here we import all the required libraries

import os
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# to split the data into random training and testing subsets.
from sklearn.model_selection import train_test_split

# to calculate the confusion matrix
from sklearn.metrics import confusion_matrix

# to reduce the learning rate when a metric has stopped improving.
from keras.callbacks import ReduceLROnPlateau

# to be able to import use the pre-train VGG19 model
from keras.applications import VGG19

# to convert class vector (y) (integers) to binary class matrix.
from keras.utils import to_categorical

# to generate altered images
from keras.preprocessing.image import ImageDataGenerator

# to initialize the gradient drop method
from keras.optimizers import SGD, Adam
from keras.models import Sequential, load_model
from keras.layers import Flatten, Dense, Dropout
import glob

import warnings

1.1. Define dataset categories
Here we define the categories (classes) of the dataset from the names of the folders of the dataset.
The dataset consists of 8 folders (one for each class) named according to their category, each folder contains 500 images.


dataset_dir = r"C:/Users/vaibhav1.mali/PycharmProjects/Gastrointestinal Disease Detection/kvasir-dataset"

def get_dataCategories(dataset_dir):

categories = []
for folder_name in os.listdir(dataset_dir):
if os.path.isdir(os.path.join(dataset_dir, folder_name)):
nbr_files = len(
glob.glob(os.path.join(dataset_dir, folder_name) + "/*.jpg")
categories.append(np.array([folder_name, nbr_files]))

categories.sort(key=lambda a: a[0])
cat = np.array(categories)

return list(cat[:, 0]), list(cat[:, 1])

categories, nbr_files = get_dataCategories(dataset_dir)

# Create DataFrame
df = pd.DataFrame({"categorie": categories, "number of files": nbr_files})
print("number of categories: ", len(categories))

1.2. Create all X features and y labels
To do this we read each image of the dataset and we put it in X, and we save the class of the image read in y.
The read images are resized to 100x100 to speed up the learning step

def create_dataset(datadir, categories, img_wid, img_high):
X, y = [], []
for category in categories:
path = os.path.join(datadir, category)
class_num = categories.index(category)
for img in os.listdir(path):
img_array = cv2.imread(os.path.join(path, img))
ima_resize_rgb = cv2.resize(img_array, (img_wid, img_high))


except Exception as e:

y = np.array(y)
X = np.array(X).reshape(y.shape[0], img_wid, img_wid, 3)
return X, y

img_wid, img_high = 100, 100
X, y = create_dataset(dataset_dir, categories, img_wid, img_high)

print(f"X: {X.shape}")
print(f"y: {y.shape}")

1.3. Display a random image for each category
We display for each class a random image

plt.figure(figsize=(12, 5))
st, end = 0, 500
for i in range(8):
plt.subplot(2, 4, i + 1)
idx = np.random.randint(st, end)
st = end + 1
end = (i + 2) * 500
# plt.imshow(X[idx][:,:,::-1])
plt.imshow(X[idx][:, :, ::-1])
plt.title(f"{i}. {categories[y[idx]]}")

2. Creation of the model and learning
2.1. Create the training, testing and validation set
In this step we create:
a)The x_train/y_taine training set
b)The x_test/y_test test set
c)The validation set x_val/y_val

2.1.1. The set of training (X/y)train and test (X/y)test
We divide the data (X, y) into training and testing sets using 80% of the data for training and the remaining 20% ​​for testing.

Y = np.reshape(y, (len(y), 1))

# split dataset to train and test set
X_train, X_test, y_train, y_test = train_test_split(X, Y, train_size=0.8, random_state=42)
print(f"X_train: {X_train.shape}")
print(f"y_train: {y_train.shape}")
print(f"X_test: {X_test.shape}")
print(f"y_test: {y_test.shape}")

2.1.2. Create validation set x_val/y_val
Here we divide 30% of the training set into a validation set

# defining training and test sets
x_train, x_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.3)
x_test = X_test

# Dimension of the dataset
print(f"x_train:{x_train.shape}, y_train:{y_train.shape}")
print(f"x_train:{x_val.shape}, y_train:{y_val.shape}")
print(f"x_train:{x_test.shape}, y_train:{y_test.shape}")

2.1.3. OneHot Encoding

We have to do a OneHot Encoding with to_categorical, to transform all the lables (y_train, y_val and y_test) so that we have a vector for each example,
because we have 8 classes and we should expect what the shape of (y_train, y_val and y_test) goes from 1 to 8

# One Hot Encoding
y_train = to_categorical(y_train)
y_val = to_categorical(y_val)
y_test = to_categorical(y_test)

# Verifying the dimension after one hot encoding
print(f"x_train:{x_train.shape}, y_train:{y_train.shape}")
print(f"x_train:{x_val.shape}, y_train:{y_val.shape}")
print(f"x_train:{x_test.shape}, y_train:{y_test.shape}")

2.2. Generating images

Here we will perform the image data augmentation. This is the technique used to increase the size of a training dataset by creating modified versions of images in the dataset.
The creation of these modified images is done by randomly rotating these images by any degree between 0 and 360.

First, we'll define individual ImageDataGenerator instances for augmentation, and then scale them to each of the training, testing, and validation datasets.

# Image Data Augmentation
train_generator = ImageDataGenerator(rotation_range=2, horizontal_flip=True, zoom_range=0.1)

val_generator = ImageDataGenerator(rotation_range=2, horizontal_flip=True, zoom_range=0.1)

test_generator = ImageDataGenerator(rotation_range=2, horizontal_flip=True, zoom_range=0.1)

# Fitting the augmentation defined above to the data

2.3. Download the VGG19 pre-trained model
Now we will instantiate the VGG19 model which is a pre-trained convolutional neural network as a transfer learning model.
The biggest advantage of this network is that it has been pre-trained on over a million images from the ImageNet database.
A pre-trained network can classify images into thousands of object categories. Because of this advantage, we will apply this model to our dataset which has 8 categories,
adding other layers to it.

# if it has already been created and saved --> read the model
if os.path.isfile("./saved_model/vgg19_model.h5"):
# load model
base_model = load_model("./saved_model/vgg19_model.h5")

# if it has not already been created --> create the model and save it
base_model = VGG19(
input_shape=(100, 100, 3),
# save model"./saved_model/vgg19_model.h5")

We will now define VGG19 as a deep learning architecture. For this, it will be defined as a sequential model of Keras with several dense layers.

model = Sequential()

Now to add more layers we need to see the dimension of our model.

# Model summary

Add dense layers with batch activation and normalization

model.add(Dense(1024, activation=("relu"), input_dim=512))
model.add(Dense(512, activation=("relu")))
model.add(Dense(256, activation=("relu")))
model.add(Dense(128, activation=("relu")))
model.add(Dense(y_train.shape[1], activation=("softmax")))


2.4. Hyperparameterization
Hyperparameters are tunable parameters that allow us to control the model training process. Model performance is highly dependent on hyperparameters.
As we have defined our model, now we need to initialize the necessary hyperparameters to train the model, and then finally, we will compile our model.

""" Initializing the hyperparameters """
batch_size = 100
epochs = 50
learn_rate = 0.001

sgd = SGD(learning_rate=learn_rate, momentum=0.9, nesterov=False)

adam = Adam(learning_rate=learn_rate, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False)

model.compile(optimizer=sgd, loss="categorical_crossentropy", metrics=["accuracy"])

Learning rate reduction decreases the learning rate after a certain number of iterations if the error rate does not change.
Here, thanks to this technique, we will monitor the accuracy of the validation and if it stops improving we will reduce the learning rate by 0.01

# Learning Rate Annealer
lrr = ReduceLROnPlateau(monitor="val_loss", factor=0.01, patience=3, min_lr=1e-5)

2.5. Model training
Now we start training our model.

# if model has already been trained and saved ---> read model
if os.path.isfile("./saved_model/model.h5"):

# load the model
model = load_model("./saved_model/model.h5")

# if model history was saved --> read history
if os.path.isfile("./saved_model/model_history.npy"):

# read history
history = np.load("./saved_model/model_history.npy", allow_pickle="TRUE").item()
history = None

# if the model has not already been trained --> train the model
history =
# train_generator.flow(x_train, y_train, batch_size= batch_size),
steps_per_epoch=x_train.shape[0] // batch_size,
validation_data=val_generator.flow(x_val, y_val, batch_size=batch_size),
history = history.history

# save model"./saved_model/model_history.npy", history)"./saved_model/model.h5")

3. Model evaluation
3.1. Evaluate accuracy on the test set Evaluate the accuracy as well as the loss of the model on the test set

score = model.evaluate(x_test, y_test, verbose=0)
print("Test loss:", round(score[0], 3))
print("Test accuracy:", round(score[1], 3))

Test loss: 0.788
Test accuracy: 0.85

3.2. Confusion Matrix
Finally, we will visualize classification performance on test data using confusion matrices.

def cm_plt(ax, cm, classes, cmap, title, normalize):

im = ax.imshow(cm, interpolation="nearest", cmap=cmap)
ax.figure.colorbar(im, ax=ax)
# We want to show all ticks...
# ... and label them with the respective list entries
ylabel="True label",
xlabel="Predicted label",

# Rotate the tick labels and set their alignment.
plt.setp(ax.get_xticklabels(), rotation=45, ha="right", rotation_mode="anchor")

# Loop over data dimensions and create text annotations.
fmt = ".2f" if normalize else "d"
thresh = cm.max() / 2.0
for i in range(cm.shape[0]):
for j in range(cm.shape[1]):
format(cm[i, j], fmt),
color="white" if cm[i, j] > thresh else "black",

return ax

# ----------------------------------------------------------------------
# Defining function for confusion matrix plot
def plt_confusion_mat(cm, classes, fig_size,
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=fig_size)
ax1 = cm_plt(
title="Confusion matrix, without normalization",

cmn = cm.astype("float") / cm.sum(axis=1)[:, np.newaxis]
ax2 = cm_plt(
title="Normalized confusion matrix",

Predict test set classes

# Making prediction
y_pred = np.argmax(model.predict(x_test), axis=1)
y_true = np.argmax(y_test, axis=1)

Displays the normalized and unnormalized confusion matrix.
We will see the exact number of correct and incorrect classifications using the unnormalized confusion matrix, and then we will see the same in percentage using the normalized confusion matrix.
As can be seen by classifying the images into 8 classes, the model gave a minimum accuracy of 79% and a maximum accuracy of 97%.
We can further refine the training parameters and retrain our model to see any possible scaling in the classification

# get confusion matrix
confuision_mat = confusion_matrix(y_true, y_pred)

# plot confusion_mat
plt_confusion_mat(confuision_mat, classes=categories, fig_size=(20, 7))

3.3. Random prediction
Predict randomly chosen images and compare the prediction with the ground truth

def predict_categorie_img(img, model, categories):
img = img[None, :, :, :]
raise TypeError("test image dimension != 3")
predict = model.predict(img)
idx_cat = np.argmax(predict, axis=1)[0]
return idx_cat, categories[idx_cat]

plt.figure(figsize=(20, 8))
for i in range(10):
idx = np.random.randint(len(y))
img = X[idx]
pred_class = predict_categorie_img(img, model, categories)
true_class = y[idx], categories[y[idx]]

plt.subplot(2, 5, i + 1)
plt.imshow(img[:, :, ::-1])

3.4 Classification Report
Now we check the classification report of the model

from sklearn.metrics import classification_report
class_names = ['dyed-lifted-polyps','dyed-resection-margins','esophagitis','normal-cecum',
print(classification_report(y_true, y_pred, target_names=class_names))