Cat/Non-Cat classifier¶
Objective: Create and compare a deep neural network and convolutional neural network fot cat recognition.
Import libraries¶
In [1]:
import numpy as np
import h5py
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from keras import Sequential, Input, layers, optimizers
import tensorflow as tf
2024-06-24 13:09:23.594539: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
Download the dataset¶
In [2]:
%%bash
mkdir data
wget -q https://raw.githubusercontent.com/LuisAngelMendozaVelasco/Deep_Learning_Specialization/main/Neural_Networks_and_Deep_Learning/Week2/Labs/datasets/train_catvnoncat.h5 -P ./data
wget -q https://raw.githubusercontent.com/LuisAngelMendozaVelasco/Deep_Learning_Specialization/main/Neural_Networks_and_Deep_Learning/Week2/Labs/datasets/test_catvnoncat.h5 -P ./data
Load the dataset¶
In [3]:
train_dataset = h5py.File('./data/train_catvnoncat.h5', "r")
train_X = train_dataset["train_set_x"][:]
train_y = train_dataset["train_set_y"][:]
train_ds = tf.data.Dataset.from_tensor_slices((train_X, train_y)).batch(32)
validation_dataset = h5py.File('./data/test_catvnoncat.h5', "r")
validation_X = validation_dataset["test_set_x"][:]
validation_y = validation_dataset["test_set_y"][:]
validation_ds = tf.data.Dataset.from_tensor_slices((validation_X, validation_y)).batch(32)
classes = validation_dataset["list_classes"][:].astype(str)
2024-06-24 13:09:26.826292: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1928] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 989 MB memory: -> device: 0, name: NVIDIA GeForce GTX 1650, pci bus id: 0000:01:00.0, compute capability: 7.5
In [4]:
print("Number of training examples: " + str(train_X.shape[0]))
print("Number of testing examples: " + str(validation_X.shape[0]))
print("Each image is of size: (" + str(train_X.shape[1]) + ", " + str(train_X.shape[2]) + ", 3)")
Number of training examples: 209 Number of testing examples: 50 Each image is of size: (64, 64, 3)
Visualize the dataset¶
In [5]:
samples = [(train_X[i], train_y[i]) for i in np.random.randint(low=0, high=train_X.shape[0], size=16)]
fig, axs = plt.subplots(4, 4, figsize=(8, 8))
fig.suptitle('Random samples')
for i, ax in enumerate(axs.flat):
ax.imshow(samples[i][0])
ax.axis("off")
ax.set_title(classes[samples[i][1]])
plt.tight_layout()
plt.show()
In [6]:
sns.set_style("whitegrid")
classes_frequency = pd.DataFrame(data={"Class": 2 * list(classes),
"Frequency": list(100 * np.unique(train_y, return_counts=True)[1] / train_y.shape[0]) + list(100 * np.unique(validation_y, return_counts=True)[1] / validation_y.shape[0]),
"Dataset": 2 * ["Train"] + 2 * ["Validation"]})
plt.figure()
sns.barplot(data=classes_frequency, x="Class", y="Frequency", hue="Dataset")
plt.xlabel("Class")
plt.ylabel("Percentage frequency [%]")
plt.title("Class distribution")
plt.show()
Deep neural network¶
Create the model¶
In [7]:
model1 = Sequential([Input(shape=(64, 64, 3)),
# Rescale
layers.Rescaling(scale=1 / 255),
# Data augmentation
layers.RandomFlip(mode="horizontal"),
layers.RandomTranslation(height_factor=0.2, width_factor=0.2, fill_mode="nearest"),
layers.RandomRotation(factor=0.2, fill_mode="nearest"),
layers.RandomZoom(height_factor=0.2, width_factor=0.2, fill_mode="nearest"),
# Deep layers
layers.Flatten(),
layers.BatchNormalization(),
layers.Dropout(0.5),
layers.Dense(32, activation="relu"),
layers.BatchNormalization(),
layers.Dropout(0.5),
layers.Dense(64, activation="relu"),
layers.BatchNormalization(),
layers.Dropout(0.5),
layers.Dense(128, activation="relu"),
layers.BatchNormalization(),
layers.Dense(1, activation="sigmoid")])
model1.summary()
Model: "sequential"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓ ┃ Layer (type) ┃ Output Shape ┃ Param # ┃ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩ │ rescaling (Rescaling) │ (None, 64, 64, 3) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ random_flip (RandomFlip) │ (None, 64, 64, 3) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ random_translation │ (None, 64, 64, 3) │ 0 │ │ (RandomTranslation) │ │ │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ random_rotation │ (None, 64, 64, 3) │ 0 │ │ (RandomRotation) │ │ │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ random_zoom (RandomZoom) │ (None, 64, 64, 3) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ flatten (Flatten) │ (None, 12288) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ batch_normalization │ (None, 12288) │ 49,152 │ │ (BatchNormalization) │ │ │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ dropout (Dropout) │ (None, 12288) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ dense (Dense) │ (None, 32) │ 393,248 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ batch_normalization_1 │ (None, 32) │ 128 │ │ (BatchNormalization) │ │ │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ dropout_1 (Dropout) │ (None, 32) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ dense_1 (Dense) │ (None, 64) │ 2,112 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ batch_normalization_2 │ (None, 64) │ 256 │ │ (BatchNormalization) │ │ │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ dropout_2 (Dropout) │ (None, 64) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ dense_2 (Dense) │ (None, 128) │ 8,320 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ batch_normalization_3 │ (None, 128) │ 512 │ │ (BatchNormalization) │ │ │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ dense_3 (Dense) │ (None, 1) │ 129 │ └─────────────────────────────────┴────────────────────────┴───────────────┘
Total params: 453,857 (1.73 MB)
Trainable params: 428,833 (1.64 MB)
Non-trainable params: 25,024 (97.75 KB)
Compile the model¶
In [8]:
model1.compile(optimizer="adam", loss='binary_crossentropy', metrics=['accuracy'])
Fit the model¶
In [9]:
history1 = model1.fit(train_ds, epochs=1000, verbose=0, validation_data=validation_ds)
In [10]:
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))
ax1.plot(history1.history['accuracy'])
ax1.plot(history1.history['val_accuracy'])
ax1.set_xlabel("Epochs")
ax1.set_ylabel("Accuracy")
ax1.legend(["Training", "Validation"])
ax2.plot(history1.history['loss'])
ax2.plot(history1.history['val_loss'])
ax2.set_xlabel("Epochs")
ax2.set_ylabel("Loss")
ax2.legend(["Training", "Validation"])
plt.show()
Evaluate the model¶
In [11]:
data_augmentation = Sequential([layers.RandomFlip(mode="horizontal"),
layers.RandomTranslation(height_factor=0.2, width_factor=0.2, fill_mode="nearest"),
layers.RandomRotation(factor=0.2, fill_mode="nearest"),
layers.RandomZoom(height_factor=0.2, width_factor=0.2, fill_mode="nearest")])
fig, axs = plt.subplots(4, 4, figsize=(8, 8))
for images, labels in validation_ds.take(np.random.randint(low=1, high=validation_ds.cardinality().numpy() + 1)).map(lambda x, y: (data_augmentation(x), y)):
for i, ax in enumerate(axs.flat):
prediction_proba = model1.predict(np.expand_dims(images[i], axis=0), verbose=0)
ax.imshow(images[i] / 255)
ax.axis("off")
ax.set_title("Prediction: " + classes[int(prediction_proba.squeeze().round())])
plt.tight_layout()
plt.show()
2024-06-24 13:10:14.494116: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
Convolutional neural network¶
Create the model¶
In [12]:
model2 = Sequential([Input(shape=(64, 64, 3)),
# Rescale
layers.Rescaling(scale=1 / 255),
# Data augmentation
layers.RandomFlip(mode="horizontal"),
layers.RandomTranslation(height_factor=0.2, width_factor=0.2, fill_mode="nearest"),
layers.RandomRotation(factor=0.2, fill_mode="nearest"),
layers.RandomZoom(height_factor=0.2, width_factor=0.2, fill_mode="nearest"),
# Convolutional layers
layers.Conv2D(16, 3, padding='same', activation='relu'),
layers.MaxPooling2D(),
layers.Conv2D(32, 3, padding='same', activation='relu'),
layers.MaxPooling2D(),
layers.Conv2D(64, 3, padding='same', activation='relu'),
layers.MaxPooling2D(),
# Deep layers
layers.Flatten(),
layers.BatchNormalization(),
layers.Dropout(0.5),
layers.Dense(128, activation="relu"),
layers.BatchNormalization(),
layers.Dense(1, activation="sigmoid")])
model2.summary()
Model: "sequential_2"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓ ┃ Layer (type) ┃ Output Shape ┃ Param # ┃ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩ │ rescaling_1 (Rescaling) │ (None, 64, 64, 3) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ random_flip_2 (RandomFlip) │ (None, 64, 64, 3) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ random_translation_2 │ (None, 64, 64, 3) │ 0 │ │ (RandomTranslation) │ │ │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ random_rotation_2 │ (None, 64, 64, 3) │ 0 │ │ (RandomRotation) │ │ │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ random_zoom_2 (RandomZoom) │ (None, 64, 64, 3) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ conv2d (Conv2D) │ (None, 64, 64, 16) │ 448 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ max_pooling2d (MaxPooling2D) │ (None, 32, 32, 16) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ conv2d_1 (Conv2D) │ (None, 32, 32, 32) │ 4,640 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ max_pooling2d_1 (MaxPooling2D) │ (None, 16, 16, 32) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ conv2d_2 (Conv2D) │ (None, 16, 16, 64) │ 18,496 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ max_pooling2d_2 (MaxPooling2D) │ (None, 8, 8, 64) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ flatten_1 (Flatten) │ (None, 4096) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ batch_normalization_4 │ (None, 4096) │ 16,384 │ │ (BatchNormalization) │ │ │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ dropout_3 (Dropout) │ (None, 4096) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ dense_4 (Dense) │ (None, 128) │ 524,416 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ batch_normalization_5 │ (None, 128) │ 512 │ │ (BatchNormalization) │ │ │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ dense_5 (Dense) │ (None, 1) │ 129 │ └─────────────────────────────────┴────────────────────────┴───────────────┘
Total params: 565,025 (2.16 MB)
Trainable params: 556,577 (2.12 MB)
Non-trainable params: 8,448 (33.00 KB)
Compile the model¶
In [13]:
model2.compile(optimizer=optimizers.Adam(5e-5), loss='binary_crossentropy', metrics=['accuracy'])
Fit the model¶
In [14]:
history2 = model2.fit(train_ds, epochs=1000, verbose=0, validation_data=validation_ds)
2024-06-24 13:10:19.537496: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:465] Loaded cuDNN version 8907
In [15]:
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))
ax1.plot(history2.history['accuracy'])
ax1.plot(history2.history['val_accuracy'])
ax1.set_xlabel("Epochs")
ax1.set_ylabel("Accuracy")
ax1.legend(["Training", "Validation"])
ax2.plot(history2.history['loss'])
ax2.plot(history2.history['val_loss'])
ax2.set_xlabel("Epochs")
ax2.set_ylabel("Loss")
ax2.legend(["Training", "Validation"])
plt.show()
Evaluate the model¶
In [16]:
data_augmentation = Sequential([layers.RandomFlip(mode="horizontal"),
layers.RandomTranslation(height_factor=0.2, width_factor=0.2, fill_mode="nearest"),
layers.RandomRotation(factor=0.2, fill_mode="nearest"),
layers.RandomZoom(height_factor=0.2, width_factor=0.2, fill_mode="nearest")])
fig, axs = plt.subplots(4, 4, figsize=(8, 8))
for images, labels in validation_ds.take(np.random.randint(low=1, high=validation_ds.cardinality().numpy() + 1)).map(lambda x, y: (data_augmentation(x), y)):
for i, ax in enumerate(axs.flat):
prediction_proba = model2.predict(np.expand_dims(images[i], axis=0), verbose=0)
ax.imshow(images[i] / 255)
ax.axis("off")
ax.set_title("Prediction: " + classes[int(prediction_proba.squeeze().round())])
plt.tight_layout()
plt.show()
2024-06-24 13:11:18.516243: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
Model comparison¶
In [17]:
train_accuracies = [history1.history['accuracy'], history2.history['accuracy']]
test_accuracies = [history1.history['val_accuracy'], history2.history['val_accuracy']]
train_losses = [history1.history['loss'], history2.history['loss']]
test_losses = [history1.history['val_loss'], history2.history['val_loss']]
fig, axs = plt.subplots(2, 2, figsize=(15, 10))
for train_accuracy in train_accuracies:
axs[0, 0].plot(train_accuracy)
axs[0, 0].set_ylabel("Accuracy")
axs[0, 0].set_title("Training accuracy")
axs[0, 0].legend(["Deep neural network", "Convolutional neural network"])
for test_accuracy in test_accuracies:
axs[0, 1].plot(test_accuracy)
axs[0, 1].set_title("Validation accuracy")
for train_loss in train_losses:
axs[1, 0].plot(train_loss)
axs[1, 0].set_xlabel("Epochs")
axs[1, 0].set_ylabel("Loss")
axs[1, 0].set_title("Training loss")
for test_loss in test_losses:
axs[1, 1].plot(test_loss)
axs[1, 1].set_xlabel("Epochs")
axs[1, 1].set_title("Validation loss")
plt.show()
Conclusions¶
- The convolutional neural network performs much better during training, however, in terms of validation, it does not show better metrics compared to the deep neural network.
- The remarkable fluctuations in the validation accuracy and validation loss could be due to the very low number of samples in the datasets and class imbalance.
In [18]:
!rm -rf ./data