import numpy as np
import h5py
from datetime import datetime
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, ConfusionMatrixDisplay
from keras import Sequential, Input, layers, applications, Model, optimizers, callbacks
import tensorflow as tf
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style("whitegrid")

2025-01-02 15:23:51.896386: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
WARNING: All log messages before absl::InitializeLog() is called are written to STDERR
E0000 00:00:1735853031.910198  186891 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1735853031.914356  186891 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-01-02 15:23:51.928496: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.

%%bash

wget -nc --progress=bar:force:noscroll https://raw.githubusercontent.com/LuisAngelMendozaVelasco/Deep_Learning_Specialization/main/Convolutional_Neural_Networks/Week1/Labs/datasets/test_signs.h5 -P /tmp
wget -nc --progress=bar:force:noscroll https://raw.githubusercontent.com/LuisAngelMendozaVelasco/Deep_Learning_Specialization/main/Convolutional_Neural_Networks/Week1/Labs/datasets/train_signs.h5.gz -P /tmp
gunzip -kf /tmp/train_signs.h5.gz

File '/tmp/test_signs.h5' already there; not retrieving.

File '/tmp/train_signs.h5.gz' already there; not retrieving.

dataset = h5py.File('/tmp/train_signs.h5', "r")
X = dataset["train_set_x"][:]
y = dataset["train_set_y"][:]
X_train, X_validation, y_train, y_validation = train_test_split(X, y, test_size=0.1, random_state=0)

dataset_test = h5py.File('/tmp/test_signs.h5', "r")
X_test = dataset_test["test_set_x"][:]
y_test = dataset_test["test_set_y"][:]

ds_train = tf.data.Dataset.from_tensor_slices((X_train, tf.one_hot(y_train, 6))).batch(32)
ds_validation = tf.data.Dataset.from_tensor_slices((X_test, tf.one_hot(y_test, 6))).batch(32)
classes = dataset_test["list_classes"][:].astype(str)

I0000 00:00:1735853033.887471  186891 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 1966 MB memory:  -> device: 0, name: NVIDIA GeForce GTX 1650, pci bus id: 0000:01:00.0, compute capability: 7.5

print("Number of training samples:", X_train.shape[0])
print("Number of validation samples:", X_validation.shape[0])
print("Number of test samples:", X_test.shape[0])
print("Each image has a shape of", X_train.shape[1:])

Number of training samples: 972
Number of validation samples: 108
Number of test samples: 120
Each image has a shape of (64, 64, 3)

indexes = np.random.choice(range(0, X_train.shape[0]), size=16, replace=False)
samples = zip(X_train[indexes], y_train[indexes])

fig, axs = plt.subplots(4, 4, figsize=(8, 8))
fig.suptitle('Random samples')

for ax, sample in zip(axs.flatten(), samples):
    ax.imshow(sample[0])
    ax.set_title(classes[sample[1]])
    ax.axis("off")

plt.tight_layout()
plt.show()

labels, sizes = np.unique(y_train, return_counts=True)

fig, ax = plt.subplots()
ax.pie(sizes, textprops={'color': "w", 'fontsize': '12'}, autopct=lambda pct: "{:.2f}%\n({:d})".format(pct, round(pct/100 * sum(sizes))))
ax.legend(labels)
ax.set_title("Class")
plt.show()

model_CNN = Sequential([Input(shape=(64, 64, 3)),
                        # Rescale
                        layers.Rescaling(scale=1 / 255),
                        # Data augmentation
                        layers.RandomFlip(mode="horizontal"),
                        layers.RandomTranslation(height_factor=0.2, width_factor=0.2, fill_mode="nearest"),
                        layers.RandomRotation(factor=0.2, fill_mode="nearest"),
                        layers.RandomZoom(height_factor=0.2, width_factor=0.2, fill_mode="nearest"),
                        # Convolutional layers
                        layers.Conv2D(16, 3, padding='same', activation='relu'),
                        layers.MaxPooling2D(),
                        layers.Conv2D(32, 3, padding='same', activation='relu'),
                        layers.MaxPooling2D(),
                        layers.Conv2D(64, 3, padding='same', activation='relu'),
                        layers.MaxPooling2D(),
                        # Deep layers
                        layers.Flatten(),
                        layers.Dense(128, activation="relu"),               
                        layers.Dense(6, activation="softmax")])

model_CNN.summary()

Model: "sequential"

┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓
┃ Layer (type)                    ┃ Output Shape           ┃       Param # ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩
│ rescaling (Rescaling)           │ (None, 64, 64, 3)      │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ random_flip (RandomFlip)        │ (None, 64, 64, 3)      │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ random_translation              │ (None, 64, 64, 3)      │             0 │
│ (RandomTranslation)             │                        │               │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ random_rotation                 │ (None, 64, 64, 3)      │             0 │
│ (RandomRotation)                │                        │               │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ random_zoom (RandomZoom)        │ (None, 64, 64, 3)      │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ conv2d (Conv2D)                 │ (None, 64, 64, 16)     │           448 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ max_pooling2d (MaxPooling2D)    │ (None, 32, 32, 16)     │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ conv2d_1 (Conv2D)               │ (None, 32, 32, 32)     │         4,640 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ max_pooling2d_1 (MaxPooling2D)  │ (None, 16, 16, 32)     │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ conv2d_2 (Conv2D)               │ (None, 16, 16, 64)     │        18,496 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ max_pooling2d_2 (MaxPooling2D)  │ (None, 8, 8, 64)       │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ flatten (Flatten)               │ (None, 4096)           │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dense (Dense)                   │ (None, 128)            │       524,416 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dense_1 (Dense)                 │ (None, 6)              │           774 │
└─────────────────────────────────┴────────────────────────┴───────────────┘

 Total params: 548,774 (2.09 MB)

 Trainable params: 548,774 (2.09 MB)

 Non-trainable params: 0 (0.00 B)

class CustomVerbose(callbacks.Callback):
    def __init__(self, epochs_to_show):
        self.epochs_to_show = epochs_to_show

    def on_epoch_begin(self, epoch, logs=None):
        if epoch in self.epochs_to_show:
            self.epoch_start_time = datetime.now()

    def on_epoch_end(self, epoch, logs=None):
        if epoch in self.epochs_to_show:
            self.epoch_stop_time = datetime.now()
            print(f"Epoch {epoch+1}/{self.epochs_to_show[-1] + 1}")
            print(f"\telapsed time: {(self.epoch_stop_time - self.epoch_start_time).total_seconds():.3f}s - accuracy: {logs['categorical_accuracy']:.4f} - loss: {logs['loss']:.4f} - val_accuracy: {logs['val_categorical_accuracy']:.4f} - val_loss: {logs['val_loss']:.4f}")

model_CNN.compile(optimizer="adam", loss='categorical_crossentropy', metrics=['categorical_accuracy'])

epochs = 500
epochs_to_show = [0] + [i for i in range(int(epochs/10)-1, epochs, int(epochs/10))]
custom_verbose = CustomVerbose(epochs_to_show)
early_stopping = callbacks.EarlyStopping(monitor='val_loss', patience=int(epochs/10), verbose=1)
history_CNN = model_CNN.fit(ds_train, epochs=epochs, verbose=0, validation_data=ds_validation, callbacks=[custom_verbose, early_stopping])

I0000 00:00:1735853037.700099  186973 cuda_dnn.cc:529] Loaded cuDNN version 90300

Epoch 1/500
	elapsed time: 3.509s - accuracy: 0.1770 - loss: 1.7999 - val_accuracy: 0.1667 - val_loss: 1.7859
Epoch 50/500
	elapsed time: 0.197s - accuracy: 0.8086 - loss: 0.4918 - val_accuracy: 0.8583 - val_loss: 0.3117
Epoch 100/500
	elapsed time: 0.193s - accuracy: 0.9033 - loss: 0.2863 - val_accuracy: 0.9667 - val_loss: 0.0991
Epoch 150/500
	elapsed time: 0.198s - accuracy: 0.9300 - loss: 0.1889 - val_accuracy: 0.9833 - val_loss: 0.0784
Epoch 200/500
	elapsed time: 0.191s - accuracy: 0.9342 - loss: 0.1791 - val_accuracy: 0.9750 - val_loss: 0.0640
Epoch 215: early stopping

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))

ax1.plot(history_CNN.history['categorical_accuracy'])
ax1.plot(history_CNN.history['val_categorical_accuracy'])
ax1.set_xlabel("Epochs")
ax1.set_ylabel("Accuracy")
ax1.legend(["Training", "Validation"])

ax2.plot(history_CNN.history['loss'])
ax2.plot(history_CNN.history['val_loss'])
ax2.set_xlabel("Epochs")
ax2.set_ylabel("Loss")
ax2.legend(["Training", "Validation"])

plt.show()

indexes = np.random.choice(range(0, X_test.shape[0]), size=16, replace=False)

fig, axs = plt.subplots(4, 4, figsize=(8, 8))
fig.suptitle('Random samples')

for image, ax in zip(X_test[indexes], axs.flatten()):
    prediction_proba = model_CNN.predict(np.expand_dims(image, axis=0), verbose=0)
    ax.imshow(image)
    ax.set_title("Prediction: " + classes[np.argmax(prediction_proba.squeeze())])
    ax.axis("off")

plt.tight_layout()
plt.show()

y_pred = []

for image in X_test:
    prediction_proba = model_CNN.predict(np.expand_dims(image, axis=0), verbose=0)
    y_pred.append(np.argmax(prediction_proba.squeeze()))

print(classification_report(y_test, y_pred, digits=4))

ConfusionMatrixDisplay.from_predictions(y_test, y_pred)
plt.grid(False)
plt.show()

              precision    recall  f1-score   support

           0     1.0000    1.0000    1.0000        20
           1     0.9524    1.0000    0.9756        20
           2     1.0000    0.9500    0.9744        20
           3     1.0000    1.0000    1.0000        20
           4     1.0000    1.0000    1.0000        20
           5     1.0000    1.0000    1.0000        20

    accuracy                         0.9917       120
   macro avg     0.9921    0.9917    0.9917       120
weighted avg     0.9921    0.9917    0.9917       120

ResNet50V2 = applications.ResNet50V2(include_top=False, input_shape=(64, 64, 3))
data_augmentation = Sequential([layers.RandomFlip(mode="horizontal"),
                                layers.RandomTranslation(height_factor=0.2, width_factor=0.2, fill_mode="nearest"),
                                layers.RandomRotation(factor=0.2, fill_mode="nearest"),
                                layers.RandomZoom(height_factor=0.2, width_factor=0.2, fill_mode="nearest")])

inputs = Input(shape=(64, 64, 3))
x = data_augmentation(inputs)
x = applications.resnet_v2.preprocess_input(x)
x = ResNet50V2(x)
x = layers.GlobalAveragePooling2D()(x)
outputs = layers.Dense(6, activation="softmax")(x)

model_ResNet = Model(inputs, outputs)
model_ResNet.summary()

Model: "functional_2"

┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓
┃ Layer (type)                    ┃ Output Shape           ┃       Param # ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩
│ input_layer_2 (InputLayer)      │ (None, 64, 64, 3)      │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ sequential_1 (Sequential)       │ (None, 64, 64, 3)      │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ true_divide (TrueDivide)        │ (None, 64, 64, 3)      │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ subtract (Subtract)             │ (None, 64, 64, 3)      │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ resnet50v2 (Functional)         │ (None, 2, 2, 2048)     │    23,564,800 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ global_average_pooling2d        │ (None, 2048)           │             0 │
│ (GlobalAveragePooling2D)        │                        │               │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dense_2 (Dense)                 │ (None, 6)              │        12,294 │
└─────────────────────────────────┴────────────────────────┴───────────────┘

 Total params: 23,577,094 (89.94 MB)

 Trainable params: 23,531,654 (89.77 MB)

 Non-trainable params: 45,440 (177.50 KB)

model_ResNet.compile(optimizer=optimizers.Adam(5e-5), loss='categorical_crossentropy', metrics=['categorical_accuracy'])

epochs = 100
epochs_to_show = [0] + [i for i in range(int(epochs/10)-1, epochs, int(epochs/10))]
custom_verbose = CustomVerbose(epochs_to_show)
early_stopping = callbacks.EarlyStopping(monitor='val_loss', patience=int(epochs/10), verbose=1)
history_ResNet = model_ResNet.fit(ds_train, epochs=epochs, verbose=0, validation_data=ds_validation, callbacks=[custom_verbose, early_stopping])

Epoch 1/100
	elapsed time: 31.244s - accuracy: 0.2521 - loss: 1.9734 - val_accuracy: 0.2750 - val_loss: 2.3893
Epoch 10/100
	elapsed time: 3.806s - accuracy: 0.7891 - loss: 0.5622 - val_accuracy: 0.8083 - val_loss: 0.6463
Epoch 20/100
	elapsed time: 3.808s - accuracy: 0.8909 - loss: 0.2906 - val_accuracy: 0.9250 - val_loss: 0.2013
Epoch 30/100
	elapsed time: 3.790s - accuracy: 0.9414 - loss: 0.1715 - val_accuracy: 0.9583 - val_loss: 0.1158
Epoch 39: early stopping

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))

ax1.plot(history_ResNet.history['categorical_accuracy'])
ax1.plot(history_ResNet.history['val_categorical_accuracy'])
ax1.set_xlabel("Epochs")
ax1.set_ylabel("Accuracy")
ax1.legend(["Training", "Validation"])

ax2.plot(history_ResNet.history['loss'])
ax2.plot(history_ResNet.history['val_loss'])
ax2.set_xlabel("Epochs")
ax2.set_ylabel("Loss")
ax2.legend(["Training", "Validation"])

plt.show()

indexes = np.random.choice(range(0, X_test.shape[0]), size=16, replace=False)

fig, axs = plt.subplots(4, 4, figsize=(8, 8))
fig.suptitle('Random samples')

for image, ax in zip(X_test[indexes], axs.flatten()):
    prediction_proba = model_ResNet.predict(np.expand_dims(image, axis=0), verbose=0)
    ax.imshow(image)
    ax.set_title("Prediction: " + classes[np.argmax(prediction_proba.squeeze())])
    ax.axis("off")

plt.tight_layout()
plt.show()

y_pred = []

for image in X_test:
    prediction_proba = model_ResNet.predict(np.expand_dims(image, axis=0), verbose=0)
    y_pred.append(np.argmax(prediction_proba.squeeze()))

print(classification_report(y_test, y_pred, digits=4))

ConfusionMatrixDisplay.from_predictions(y_test, y_pred)
plt.grid(False)
plt.show()

              precision    recall  f1-score   support

           0     1.0000    1.0000    1.0000        20
           1     0.9000    0.9000    0.9000        20
           2     0.8889    0.8000    0.8421        20
           3     0.9091    1.0000    0.9524        20
           4     1.0000    0.9500    0.9744        20
           5     0.9524    1.0000    0.9756        20

    accuracy                         0.9417       120
   macro avg     0.9417    0.9417    0.9407       120
weighted avg     0.9417    0.9417    0.9407       120

Finger Signs¶

Import libraries¶

Download the dataset¶

Load the dataset¶

Visualize the dataset¶

Visualize the class distribution¶

Build a Convolutional Neural Network¶

Create a custom callback¶

Compile and train the model¶

Evaluate the model¶

Build a Residual Neural Network using the ResNET50V2 architecture as a base¶

Compile and train the model¶

Evaluate the model¶