Apple and Orange Classifier¶
Objective: Train a basic Convolutional Neural Network (CNN) for apple and orange classification.
A Convolutional Neural Network (CNN) is a type of Deep Neural Network that is specifically designed for image and video processing tasks. It uses convolutional layers to extract features from input data, and is particularly useful for tasks such as image classification, object detection, and image segmentation. CNNs are designed to take advantage of the spatial hierarchies and local feature extraction in images, and are often used in computer vision applications.
Import libraries¶
InĀ [1]:
import numpy as np
import os
from datetime import datetime
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, ConfusionMatrixDisplay
from keras import Sequential, Input, layers, callbacks
import tensorflow as tf
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('whitegrid')
2025-05-30 15:59:56.602073: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered WARNING: All log messages before absl::InitializeLog() is called are written to STDERR E0000 00:00:1748642396.614836 110604 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered E0000 00:00:1748642396.619027 110604 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered 2025-05-30 15:59:56.631737: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
Download the dataset¶
InĀ [2]:
%%bash
wget -nc --progress=bar:force:noscroll https://efrosgans.eecs.berkeley.edu/cyclegan/datasets/apple2orange.zip -P /tmp
unzip -qn /tmp/apple2orange.zip -d /tmp
File '/tmp/apple2orange.zip' already there; not retrieving.
Load the dataset¶
InĀ [3]:
def get_file_paths(path):
file_paths = []
for file in os.listdir(path):
if file.endswith(".jpg"):
file_paths.append(os.path.join(path, file))
return file_paths
def load_image(file_path, image_size):
image = tf.io.read_file(file_path)
image = tf.io.decode_jpeg(image, channels=3)
image = tf.image.resize(image, image_size)
return image
InĀ [4]:
apple_file_paths = get_file_paths("/tmp/apple2orange/trainA")[:500]
orange_file_paths = get_file_paths("/tmp/apple2orange/trainB")[:500]
X = np.array(list(map(lambda x: load_image(x, (128, 128)), apple_file_paths + orange_file_paths)))
y = np.array([0] * len(apple_file_paths) + [1] * len(orange_file_paths))
X_train, X_right, y_train, y_right = train_test_split(X, y, test_size=0.2, random_state=0)
X_test, X_validation, y_test, y_validation = train_test_split(X_right, y_right, test_size=0.5, random_state=0)
ds_train = tf.data.Dataset.from_tensor_slices((X_train, y_train)).batch(32)
ds_validation = tf.data.Dataset.from_tensor_slices((X_validation, y_validation)).batch(32)
classes = ["apple", "orange"]
I0000 00:00:1748642398.751423 110604 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 1691 MB memory: -> device: 0, name: NVIDIA GeForce GTX 1650, pci bus id: 0000:01:00.0, compute capability: 7.5
InĀ [5]:
print("Number of training samples:", X_train.shape[0])
print("Number of validation samples:", X_validation.shape[0])
print("Number of test samples:", X_test.shape[0])
print("Each image has a shape of", X_train.shape[1:])
Number of training samples: 800 Number of validation samples: 100 Number of test samples: 100 Each image has a shape of (128, 128, 3)
Visualize the dataset¶
InĀ [6]:
indexes = np.random.choice(range(0, X_train.shape[0]), size=16, replace=False)
samples = zip(X_train[indexes], y_train[indexes])
fig, axs = plt.subplots(4, 4, figsize=(8, 8))
fig.suptitle('Random samples')
for ax, sample in zip(axs.flatten(), samples):
ax.imshow(sample[0] / 255)
ax.set_title(classes[sample[1]])
ax.axis("off")
plt.tight_layout()
plt.show()
Visualize the class distribution¶
InĀ [7]:
labels, sizes = np.unique(y_train, return_counts=True)
fig, ax = plt.subplots()
ax.pie(sizes, textprops={'color': "w", 'fontsize': '12'}, autopct=lambda pct: "{:.2f}%\n({:d})".format(pct, round(pct/100 * sum(sizes))))
ax.legend([f"{label} ({classes[label]})" for label in labels])
ax.set_title("Class")
plt.show()
Define a custom callback¶
InĀ [8]:
class CustomVerbose(callbacks.Callback):
def __init__(self, epochs_to_show):
self.epochs_to_show = epochs_to_show
def on_epoch_begin(self, epoch, logs=None):
if epoch in self.epochs_to_show:
self.epoch_start_time = datetime.now()
def on_epoch_end(self, epoch, logs=None):
if epoch in self.epochs_to_show:
self.epoch_stop_time = datetime.now()
print(f"Epoch {epoch + 1}/{self.epochs_to_show[-1] + 1}")
print(f"\telapsed time: {(self.epoch_stop_time - self.epoch_start_time).total_seconds():.3f}s - accuracy: {logs['binary_accuracy']:.4f} - loss: {logs['loss']:.4f} - val_accuracy: {logs['val_binary_accuracy']:.4f} - val_loss: {logs['val_loss']:.4f}")
Build a CNN¶
InĀ [9]:
model = Sequential([Input(shape=(128, 128, 3)),
# Rescale
layers.Rescaling(scale=1 / 255),
# Data augmentation
layers.RandomFlip(mode="horizontal"),
layers.RandomTranslation(height_factor=0.2, width_factor=0.2, fill_mode="nearest"),
layers.RandomRotation(factor=0.2, fill_mode="nearest"),
layers.RandomZoom(height_factor=0.2, width_factor=0.2, fill_mode="nearest"),
# Convolutional layers
layers.Conv2D(16, 3, padding='same', activation='relu'),
layers.MaxPooling2D(),
layers.Conv2D(32, 3, padding='same', activation='relu'),
layers.MaxPooling2D(),
layers.Conv2D(64, 3, padding='same', activation='relu'),
layers.MaxPooling2D(),
# Deep layers
layers.Flatten(),
layers.Dense(128, activation="relu"),
layers.Dense(1, activation="sigmoid")])
model.summary()
Model: "sequential"
āāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāā³āāāāāāāāāāāāāāāāāāāāāāāāā³āāāāāāāāāāāāāāāā ā Layer (type) ā Output Shape ā Param # ā ā”āāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāā© ā rescaling (Rescaling) ā (None, 128, 128, 3) ā 0 ā āāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāā¼āāāāāāāāāāāāāāāāāāāāāāāāā¼āāāāāāāāāāāāāāā⤠ā random_flip (RandomFlip) ā (None, 128, 128, 3) ā 0 ā āāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāā¼āāāāāāāāāāāāāāāāāāāāāāāāā¼āāāāāāāāāāāāāāā⤠ā random_translation ā (None, 128, 128, 3) ā 0 ā ā (RandomTranslation) ā ā ā āāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāā¼āāāāāāāāāāāāāāāāāāāāāāāāā¼āāāāāāāāāāāāāāā⤠ā random_rotation ā (None, 128, 128, 3) ā 0 ā ā (RandomRotation) ā ā ā āāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāā¼āāāāāāāāāāāāāāāāāāāāāāāāā¼āāāāāāāāāāāāāāā⤠ā random_zoom (RandomZoom) ā (None, 128, 128, 3) ā 0 ā āāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāā¼āāāāāāāāāāāāāāāāāāāāāāāāā¼āāāāāāāāāāāāāāā⤠ā conv2d (Conv2D) ā (None, 128, 128, 16) ā 448 ā āāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāā¼āāāāāāāāāāāāāāāāāāāāāāāāā¼āāāāāāāāāāāāāāā⤠ā max_pooling2d (MaxPooling2D) ā (None, 64, 64, 16) ā 0 ā āāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāā¼āāāāāāāāāāāāāāāāāāāāāāāāā¼āāāāāāāāāāāāāāā⤠ā conv2d_1 (Conv2D) ā (None, 64, 64, 32) ā 4,640 ā āāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāā¼āāāāāāāāāāāāāāāāāāāāāāāāā¼āāāāāāāāāāāāāāā⤠ā max_pooling2d_1 (MaxPooling2D) ā (None, 32, 32, 32) ā 0 ā āāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāā¼āāāāāāāāāāāāāāāāāāāāāāāāā¼āāāāāāāāāāāāāāā⤠ā conv2d_2 (Conv2D) ā (None, 32, 32, 64) ā 18,496 ā āāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāā¼āāāāāāāāāāāāāāāāāāāāāāāāā¼āāāāāāāāāāāāāāā⤠ā max_pooling2d_2 (MaxPooling2D) ā (None, 16, 16, 64) ā 0 ā āāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāā¼āāāāāāāāāāāāāāāāāāāāāāāāā¼āāāāāāāāāāāāāāā⤠ā flatten (Flatten) ā (None, 16384) ā 0 ā āāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāā¼āāāāāāāāāāāāāāāāāāāāāāāāā¼āāāāāāāāāāāāāāā⤠ā dense (Dense) ā (None, 128) ā 2,097,280 ā āāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāā¼āāāāāāāāāāāāāāāāāāāāāāāāā¼āāāāāāāāāāāāāāā⤠ā dense_1 (Dense) ā (None, 1) ā 129 ā āāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāā“āāāāāāāāāāāāāāāāāāāāāāāāā“āāāāāāāāāāāāāāāā
Total params: 2,120,993 (8.09 MB)
Trainable params: 2,120,993 (8.09 MB)
Non-trainable params: 0 (0.00 B)
Compile and train the CNN¶
InĀ [10]:
model.compile(optimizer="adam", loss='binary_crossentropy', metrics=['binary_accuracy'])
epochs = 200
patience = int(epochs / 10)
epochs_to_show = [0] + [i for i in range(patience - 1, epochs, patience)]
custom_verbose = CustomVerbose(epochs_to_show)
early_stopping = callbacks.EarlyStopping(monitor='val_loss', patience=patience, verbose=1)
history = model.fit(ds_train, epochs=epochs, verbose=0, validation_data=ds_validation, callbacks=[custom_verbose, early_stopping])
I0000 00:00:1748642404.529830 110675 cuda_dnn.cc:529] Loaded cuDNN version 91001
Epoch 1/200 elapsed time: 4.101s - accuracy: 0.5838 - loss: 0.7129 - val_accuracy: 0.6200 - val_loss: 0.6239 Epoch 20/200 elapsed time: 0.438s - accuracy: 0.9375 - loss: 0.1638 - val_accuracy: 0.9200 - val_loss: 0.1953 Epoch 40/200 elapsed time: 0.433s - accuracy: 0.9613 - loss: 0.1099 - val_accuracy: 0.9400 - val_loss: 0.2195 Epoch 47: early stopping
InĀ [11]:
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))
ax1.plot(history.history['binary_accuracy'])
ax1.plot(history.history['val_binary_accuracy'])
ax1.set_xlabel("Epochs")
ax1.set_ylabel("Accuracy")
ax1.legend(["Training", "Validation"])
ax2.plot(history.history['loss'])
ax2.plot(history.history['val_loss'])
ax2.set_xlabel("Epochs")
ax2.set_ylabel("Loss")
ax2.legend(["Training", "Validation"])
plt.show()
Evaluate the CNN¶
InĀ [12]:
indexes = np.random.choice(range(0, X_test.shape[0]), size=16, replace=False)
fig, axs = plt.subplots(4, 4, figsize=(8, 8))
fig.suptitle('Random samples')
for image, ax in zip(X_test[indexes], axs.flatten()):
prediction_proba = model.predict(np.expand_dims(image, axis=0), verbose=0)
ax.imshow(image / 255)
ax.set_title("Prediction: " + classes[int(prediction_proba.squeeze().round())])
ax.axis("off")
plt.tight_layout()
plt.show()
InĀ [13]:
y_pred = model.predict(X_test, verbose=0).squeeze().round()
print(classification_report(y_test, y_pred, digits=4))
ConfusionMatrixDisplay.from_predictions(y_test, y_pred, display_labels=["0 (apple)", "1 (orange)"])
plt.grid(False)
plt.show()
precision recall f1-score support
0 0.9000 0.9574 0.9278 47
1 0.9600 0.9057 0.9320 53
accuracy 0.9300 100
macro avg 0.9300 0.9316 0.9299 100
weighted avg 0.9318 0.9300 0.9301 100
Run in Google Colab