MNIST database¶

No description has been provided for this imageRun in Google Colab

Objective: Train a basic Convolutional Neural Network (CNN) to classify handwritten digits.

The MNIST database is a large dataset of handwritten digits that is commonly used for training image processing systems. It is also widely used for training machine learning models.

It contains 60,000 training images and 10,000 testing images of digits written by high school students and employees of the United States Census Bureau.

Import libraries¶

InĀ [1]:
import tensorflow as tf
from keras import Sequential, Input, layers, datasets, utils
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, ConfusionMatrixDisplay
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('whitegrid')
2025-06-19 02:52:06.181434: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
WARNING: All log messages before absl::InitializeLog() is called are written to STDERR
E0000 00:00:1750323126.194581  429391 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1750323126.198869  429391 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered

Load the dataset¶

InĀ [2]:
(X_train, y_train), (X_right, y_right) = datasets.mnist.load_data()
X_validation, X_test, y_validation, y_test = train_test_split(X_right, y_right, train_size=0.5, random_state=0)

ds_train = tf.data.Dataset.from_tensor_slices((X_train[..., np.newaxis], utils.to_categorical(y_train))).batch(32)
ds_validation = tf.data.Dataset.from_tensor_slices((X_validation[..., np.newaxis], utils.to_categorical(y_validation))).batch(32)

print("X_train shape:", X_train.shape)
print("X_validation shape:", X_validation.shape)
print("X_test shape:", X_test.shape)
X_train shape: (60000, 28, 28)
X_validation shape: (5000, 28, 28)
X_test shape: (5000, 28, 28)
I0000 00:00:1750323128.744457  429391 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 1728 MB memory:  -> device: 0, name: NVIDIA GeForce GTX 1650, pci bus id: 0000:01:00.0, compute capability: 7.5

Visualize the dataset¶

InĀ [3]:
indexes = np.random.choice(range(0, X_train.shape[0]), size=16, replace=False)
samples = zip(X_train[indexes], y_train[indexes])

fig, axs = plt.subplots(4, 4, figsize=(8, 8))
fig.suptitle('Random samples')

for ax, sample in zip(axs.flatten(), samples):
    ax.imshow(sample[0], cmap="gray")
    ax.set_title(sample[1])
    ax.axis("off")

plt.tight_layout()
plt.show()
No description has been provided for this image

Visualize the class distribution¶

InĀ [4]:
plt.figure()
sns.countplot(x=y_train, hue=y_train, palette="tab10", stat="percent", legend=False)
plt.title("Class")
plt.show()
No description has been provided for this image

Build a CNN¶

InĀ [5]:
number_classes = np.unique(y_train).size

model = Sequential()
model.add(Input(shape=(28, 28, 1)))
model.add(layers.Rescaling(scale=1 / 255))
model.add(layers.Conv2D(16, kernel_size=(5, 5), activation='relu'))
model.add(layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
model.add(layers.Conv2D(8, kernel_size=(2, 2), activation='relu'))
model.add(layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
model.add(layers.Flatten())
model.add(layers.Dense(100, activation='relu'))
model.add(layers.Dense(number_classes, activation="softmax"))

model.summary()
Model: "sequential"
ā”ā”ā”ā”ā”ā”ā”ā”ā”ā”ā”ā”ā”ā”ā”ā”ā”ā”ā”ā”ā”ā”ā”ā”ā”ā”ā”ā”ā”ā”ā”ā”ā”ā”ā”³ā”ā”ā”ā”ā”ā”ā”ā”ā”ā”ā”ā”ā”ā”ā”ā”ā”ā”ā”ā”ā”ā”ā”ā”ā”³ā”ā”ā”ā”ā”ā”ā”ā”ā”ā”ā”ā”ā”ā”ā”ā”“
ā”ƒ Layer (type)                    ā”ƒ Output Shape           ā”ƒ       Param # ā”ƒ
└━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩
│ rescaling (Rescaling)           │ (None, 28, 28, 1)      │             0 │
ā”œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”¼ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”¼ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”¤
│ conv2d (Conv2D)                 │ (None, 24, 24, 16)     │           416 │
ā”œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”¼ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”¼ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”¤
│ max_pooling2d (MaxPooling2D)    │ (None, 12, 12, 16)     │             0 │
ā”œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”¼ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”¼ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”¤
│ conv2d_1 (Conv2D)               │ (None, 11, 11, 8)      │           520 │
ā”œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”¼ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”¼ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”¤
│ max_pooling2d_1 (MaxPooling2D)  │ (None, 5, 5, 8)        │             0 │
ā”œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”¼ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”¼ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”¤
│ flatten (Flatten)               │ (None, 200)            │             0 │
ā”œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”¼ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”¼ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”¤
│ dense (Dense)                   │ (None, 100)            │        20,100 │
ā”œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”¼ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”¼ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”¤
│ dense_1 (Dense)                 │ (None, 10)             │         1,010 │
ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”“ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”“ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜
 Total params: 22,046 (86.12 KB)
 Trainable params: 22,046 (86.12 KB)
 Non-trainable params: 0 (0.00 B)

Compile and train the CNN¶

InĀ [6]:
model.compile(optimizer="adam", loss='categorical_crossentropy', metrics=['categorical_accuracy'])

epochs = 10
history_CNN = model.fit(ds_train, epochs=epochs, validation_data=ds_validation)
Epoch 1/10
I0000 00:00:1750323131.259744  429456 service.cc:148] XLA service 0x7f0fec00c150 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1750323131.259764  429456 service.cc:156]   StreamExecutor device (0): NVIDIA GeForce GTX 1650, Compute Capability 7.5
2025-06-19 02:52:11.281667: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
I0000 00:00:1750323131.384439  429456 cuda_dnn.cc:529] Loaded cuDNN version 91001
2025-06-19 02:52:11.556776: I external/local_xla/xla/service/gpu/autotuning/conv_algorithm_picker.cc:557] Omitted potentially buggy algorithm eng14{k25=2} for conv (f32[32,16,24,24]{3,2,1,0}, u8[0]{0}) custom-call(f32[32,1,28,28]{3,2,1,0}, f32[16,1,5,5]{3,2,1,0}, f32[16]{0}), window={size=5x5}, dim_labels=bf01_oi01->bf01, custom_call_target="__cudnn$convBiasActivationForward", backend_config={"cudnn_conv_backend_config":{"activation_mode":"kNone","conv_result_scale":1,"leakyrelu_alpha":0,"side_input_scale":0},"force_earliest_schedule":false,"operation_queue_id":"0","wait_on_operation_queues":[]}
  84/1875 ━━━━━━━━━━━━━━━━━━━━ 3s 2ms/step - categorical_accuracy: 0.3657 - loss: 1.9580
I0000 00:00:1750323132.649382  429456 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.
1842/1875 ━━━━━━━━━━━━━━━━━━━━ 0s 1ms/step - categorical_accuracy: 0.8487 - loss: 0.4930
2025-06-19 02:52:15.481168: I external/local_xla/xla/service/gpu/autotuning/conv_algorithm_picker.cc:557] Omitted potentially buggy algorithm eng14{k25=2} for conv (f32[32,16,24,24]{3,2,1,0}, u8[0]{0}) custom-call(f32[32,1,28,28]{3,2,1,0}, f32[16,1,5,5]{3,2,1,0}, f32[16]{0}), window={size=5x5}, dim_labels=bf01_oi01->bf01, custom_call_target="__cudnn$convBiasActivationForward", backend_config={"cudnn_conv_backend_config":{"activation_mode":"kRelu","conv_result_scale":1,"leakyrelu_alpha":0,"side_input_scale":0},"force_earliest_schedule":false,"operation_queue_id":"0","wait_on_operation_queues":[]}
2025-06-19 02:52:15.974356: I external/local_xla/xla/service/gpu/autotuning/conv_algorithm_picker.cc:557] Omitted potentially buggy algorithm eng14{k25=2} for conv (f32[8,16,24,24]{3,2,1,0}, u8[0]{0}) custom-call(f32[8,1,28,28]{3,2,1,0}, f32[16,1,5,5]{3,2,1,0}, f32[16]{0}), window={size=5x5}, dim_labels=bf01_oi01->bf01, custom_call_target="__cudnn$convBiasActivationForward", backend_config={"cudnn_conv_backend_config":{"activation_mode":"kRelu","conv_result_scale":1,"leakyrelu_alpha":0,"side_input_scale":0},"force_earliest_schedule":false,"operation_queue_id":"0","wait_on_operation_queues":[]}
1875/1875 ━━━━━━━━━━━━━━━━━━━━ 6s 2ms/step - categorical_accuracy: 0.8502 - loss: 0.4882 - val_categorical_accuracy: 0.9678 - val_loss: 0.1043
Epoch 2/10
1875/1875 ━━━━━━━━━━━━━━━━━━━━ 3s 1ms/step - categorical_accuracy: 0.9744 - loss: 0.0848 - val_categorical_accuracy: 0.9750 - val_loss: 0.0790
Epoch 3/10
1875/1875 ━━━━━━━━━━━━━━━━━━━━ 3s 1ms/step - categorical_accuracy: 0.9816 - loss: 0.0592 - val_categorical_accuracy: 0.9798 - val_loss: 0.0656
Epoch 4/10
1875/1875 ━━━━━━━━━━━━━━━━━━━━ 3s 2ms/step - categorical_accuracy: 0.9858 - loss: 0.0454 - val_categorical_accuracy: 0.9812 - val_loss: 0.0568
Epoch 5/10
1875/1875 ━━━━━━━━━━━━━━━━━━━━ 3s 1ms/step - categorical_accuracy: 0.9886 - loss: 0.0368 - val_categorical_accuracy: 0.9844 - val_loss: 0.0450
Epoch 6/10
1875/1875 ━━━━━━━━━━━━━━━━━━━━ 3s 1ms/step - categorical_accuracy: 0.9910 - loss: 0.0296 - val_categorical_accuracy: 0.9866 - val_loss: 0.0424
Epoch 7/10
1875/1875 ━━━━━━━━━━━━━━━━━━━━ 3s 1ms/step - categorical_accuracy: 0.9925 - loss: 0.0244 - val_categorical_accuracy: 0.9870 - val_loss: 0.0422
Epoch 8/10
1875/1875 ━━━━━━━━━━━━━━━━━━━━ 3s 1ms/step - categorical_accuracy: 0.9941 - loss: 0.0201 - val_categorical_accuracy: 0.9890 - val_loss: 0.0397
Epoch 9/10
1875/1875 ━━━━━━━━━━━━━━━━━━━━ 3s 1ms/step - categorical_accuracy: 0.9940 - loss: 0.0176 - val_categorical_accuracy: 0.9854 - val_loss: 0.0477
Epoch 10/10
1875/1875 ━━━━━━━━━━━━━━━━━━━━ 3s 1ms/step - categorical_accuracy: 0.9949 - loss: 0.0157 - val_categorical_accuracy: 0.9868 - val_loss: 0.0526
InĀ [7]:
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))

ax1.plot(history_CNN.history['categorical_accuracy'])
ax1.plot(history_CNN.history['val_categorical_accuracy'])
ax1.set_xticks(range(epochs))
ax1.set_xlabel("Epochs")
ax1.set_ylabel("Accuracy")
ax1.legend(["Training", "Validation"])

ax2.plot(history_CNN.history['loss'])
ax2.plot(history_CNN.history['val_loss'])
ax2.set_xticks(range(epochs))
ax2.set_xlabel("Epochs")
ax2.set_ylabel("Loss")
ax2.legend(["Training", "Validation"])

plt.show()
No description has been provided for this image

Evaluate the CNN¶

InĀ [8]:
indexes = np.random.choice(range(0, X_test.shape[0]), size=16, replace=False)

fig, axs = plt.subplots(4, 4, figsize=(8, 8))
fig.suptitle('Random samples')

for image, ax in zip(X_test[indexes], axs.flatten()):
    prediction_proba = model.predict(np.expand_dims(image, axis=0), verbose=0)
    ax.imshow(image, cmap="gray")
    ax.set_title("Prediction: " + str(np.argmax(prediction_proba)))
    ax.axis("off")

plt.tight_layout()
plt.show()
2025-06-19 02:52:40.954100: I external/local_xla/xla/service/gpu/autotuning/conv_algorithm_picker.cc:557] Omitted potentially buggy algorithm eng14{k25=2} for conv (f32[1,16,24,24]{3,2,1,0}, u8[0]{0}) custom-call(f32[1,1,28,28]{3,2,1,0}, f32[16,1,5,5]{3,2,1,0}, f32[16]{0}), window={size=5x5}, dim_labels=bf01_oi01->bf01, custom_call_target="__cudnn$convBiasActivationForward", backend_config={"cudnn_conv_backend_config":{"activation_mode":"kRelu","conv_result_scale":1,"leakyrelu_alpha":0,"side_input_scale":0},"force_earliest_schedule":false,"operation_queue_id":"0","wait_on_operation_queues":[]}
No description has been provided for this image
InĀ [9]:
y_pred = np.argmax(model.predict(X_test, verbose=0), axis=1)
print(classification_report(y_test, y_pred, digits=4))

ConfusionMatrixDisplay.from_predictions(y_test, y_pred)
plt.grid(False)
plt.show()
              precision    recall  f1-score   support

           0     0.9650    1.0000    0.9822       496
           1     0.9947    0.9876    0.9911       565
           2     0.9692    0.9941    0.9815       506
           3     0.9959    0.9739    0.9848       499
           4     0.9939    0.9781    0.9859       502
           5     0.9780    0.9867    0.9823       451
           6     0.9933    0.9803    0.9868       456
           7     0.9886    0.9886    0.9886       526
           8     0.9959    0.9837    0.9897       490
           9     0.9784    0.9784    0.9784       509

    accuracy                         0.9852      5000
   macro avg     0.9853    0.9851    0.9851      5000
weighted avg     0.9854    0.9852    0.9852      5000

No description has been provided for this image