Compressive Strength of Concrete¶
Objective: Build a regression model using Keras to predict the compressive strength of concrete.
Keras is a high-level API for building deep learning models. It has gained favor for its ease of use and syntactic simplicity facilitating fast development. Building a very complex deep learning network can be achieved with Keras with only few lines of code.
Import libraries¶
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from keras import Sequential, Input, layers, callbacks
from sklearn.metrics import mean_squared_error, r2_score
from datetime import datetime
import matplotlib.pyplot as plt
import seaborn as sns
Load the dataset¶
file_url = ''
df = pd.read_csv(file_url)
Cement | Blast Furnace Slag | Fly Ash | Water | Superplasticizer | Coarse Aggregate | Fine Aggregate | Age | Strength | |
0 | 540.0 | 0.0 | 0.0 | 162.0 | 2.5 | 1040.0 | 676.0 | 28 | 79.99 |
1 | 540.0 | 0.0 | 0.0 | 162.0 | 2.5 | 1055.0 | 676.0 | 28 | 61.89 |
2 | 332.5 | 142.5 | 0.0 | 228.0 | 0.0 | 932.0 | 594.0 | 270 | 40.27 |
3 | 332.5 | 142.5 | 0.0 | 228.0 | 0.0 | 932.0 | 594.0 | 365 | 41.05 |
4 | 198.6 | 132.4 | 0.0 | 192.0 | 0.0 | 978.4 | 825.5 | 360 | 44.30 |
Understand the dataset¶
The dataset is about the compressive strength of different samples of concrete based on the volumes of the different ingredients that were used to make them. Ingredients include:
- Cement
- Blast Furnace Slag
- Fly Ash
- Water
- Superplasticizer
- Coarse Aggregate
- Fine Aggregate
So the first concrete sample has 540 cubic meter of cement, 0 cubic meter of blast furnace slag, 0 cubic meter of fly ash, 162 cubic meter of water, 2.5 cubic meter of superplaticizer, 1040 cubic meter of coarse aggregate, 676 cubic meter of fine aggregate. Such a concrete mix which is 28 days old, has a compressive strength of 79.99 MPa.
<class 'pandas.core.frame.DataFrame'> RangeIndex: 1030 entries, 0 to 1029 Data columns (total 9 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Cement 1030 non-null float64 1 Blast Furnace Slag 1030 non-null float64 2 Fly Ash 1030 non-null float64 3 Water 1030 non-null float64 4 Superplasticizer 1030 non-null float64 5 Coarse Aggregate 1030 non-null float64 6 Fine Aggregate 1030 non-null float64 7 Age 1030 non-null int64 8 Strength 1030 non-null float64 dtypes: float64(8), int64(1) memory usage: 72.6 KB
Visualize the dataset¶
fig, axs = plt.subplots(3, 3, figsize=(15, 15))
for ax, feature in zip(axs.flatten(), df.columns):
if len(df[feature].unique()) <= 10:
labels, sizes = np.unique(df[feature], return_counts=True)
sns.barplot(x=labels, y=sizes, hue=labels, ax=ax, palette="tab10", legend=False)
sns.histplot(data=df, x=feature, ax=ax)
Analyze the target feature¶
fig, axs = plt.subplots(1, 2, figsize=(15, 5))
sns.histplot(data=df, x="Strength", kde=True, ax=axs[0])
sns.boxplot(data=df, x="Strength", orient="h", ax=axs[1], legend=False)
axs[1].set_title("Box plot")
Preprocess the dataset¶
X = df.drop("Strength", axis=1)
y = df["Strength"]
scaler = StandardScaler()
X = scaler.fit_transform(X)
X = pd.DataFrame(X, columns=df.columns[:-1])
Cement | Blast Furnace Slag | Fly Ash | Water | Superplasticizer | Coarse Aggregate | Fine Aggregate | Age | |
0 | 2.477915 | -0.856888 | -0.847144 | -0.916764 | -0.620448 | 0.863154 | -1.217670 | -0.279733 |
1 | 2.477915 | -0.856888 | -0.847144 | -0.916764 | -0.620448 | 1.056164 | -1.217670 | -0.279733 |
2 | 0.491425 | 0.795526 | -0.847144 | 2.175461 | -1.039143 | -0.526517 | -2.240917 | 3.553066 |
3 | 0.491425 | 0.795526 | -0.847144 | 2.175461 | -1.039143 | -0.526517 | -2.240917 | 5.057677 |
4 | -0.790459 | 0.678408 | -0.847144 | 0.488793 | -1.039143 | 0.070527 | 0.647884 | 4.978487 |
Split the dataset into train, validation and test subsets¶
X_train, X_right, y_train, y_right = train_test_split(X, y, train_size=0.8, random_state=0)
X_validation, X_test, y_validation, y_test = train_test_split(X_right, y_right, train_size=0.5, random_state=0)
print("X_train shape:", X_train.shape)
print("X_validation shape:", X_validation.shape)
print("X_test shape:", X_validation.shape)
X_train shape: (824, 8) X_validation shape: (103, 8) X_test shape: (103, 8)
Build a Neural Network¶
model = Sequential()
model.add(layers.Dense(50, activation="relu"))
model.add(layers.Dense(50, activation="relu"))
Model: "sequential"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓ ┃ Layer (type) ┃ Output Shape ┃ Param # ┃ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩ │ dense (Dense) │ (None, 50) │ 450 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ dense_1 (Dense) │ (None, 50) │ 2,550 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ dense_2 (Dense) │ (None, 1) │ 51 │ └─────────────────────────────────┴────────────────────────┴───────────────┘
Total params: 3,051 (11.92 KB)
Trainable params: 3,051 (11.92 KB)
Non-trainable params: 0 (0.00 B)
Create a custom callback¶
class CustomVerbose(callbacks.Callback):
def __init__(self, epochs_to_show):
self.epochs_to_show = epochs_to_show
def on_epoch_begin(self, epoch, logs=None):
if epoch in self.epochs_to_show:
self.epoch_start_time =
def on_epoch_end(self, epoch, logs=None):
if epoch in self.epochs_to_show:
self.epoch_stop_time =
print(f"Epoch {epoch+1}/{self.epochs_to_show[-1] + 1}")
print(f"\telapsed time: {(self.epoch_stop_time - self.epoch_start_time).total_seconds():.3f}s - r2_score: {logs['r2_score']:.4f} - loss: {logs['loss']:.4f} - val_r2_score: {logs['val_r2_score']:.4f} - val_loss: {logs['val_loss']:.4f}")
Compile and train the model¶
model.compile(optimizer="adam", loss='mean_squared_error', metrics=['r2_score'])
epochs = 300
epochs_to_show = [0] + [i for i in range(int(epochs/10)-1, epochs, int(epochs/10))]
custom_verbose = CustomVerbose(epochs_to_show)
early_stopping = callbacks.EarlyStopping(monitor='val_loss', patience=int(epochs/10), verbose=1)
history =, y=y_train, epochs=epochs, verbose=0, validation_data=(X_validation, y_validation), callbacks=[custom_verbose, early_stopping])
Epoch 1/300 elapsed time: 2.982s - r2_score: -4.4259 - loss: 1531.5669 - val_r2_score: -4.3278 - val_loss: 1318.7324 Epoch 30/300 elapsed time: 0.099s - r2_score: 0.5445 - loss: 128.5662 - val_r2_score: 0.4962 - val_loss: 124.6939 Epoch 60/300 elapsed time: 0.094s - r2_score: 0.7863 - loss: 60.3318 - val_r2_score: 0.7695 - val_loss: 57.0544 Epoch 90/300 elapsed time: 0.096s - r2_score: 0.8723 - loss: 36.0539 - val_r2_score: 0.8702 - val_loss: 32.1337 Epoch 120/300 elapsed time: 0.092s - r2_score: 0.9047 - loss: 26.9075 - val_r2_score: 0.8895 - val_loss: 27.3493 Epoch 150/300 elapsed time: 0.093s - r2_score: 0.9197 - loss: 22.6718 - val_r2_score: 0.8983 - val_loss: 25.1747 Epoch 180/300 elapsed time: 0.093s - r2_score: 0.9325 - loss: 19.0631 - val_r2_score: 0.9032 - val_loss: 23.9478 Epoch 210/300 elapsed time: 0.099s - r2_score: 0.9363 - loss: 17.9921 - val_r2_score: 0.9048 - val_loss: 23.5635 Epoch 233: early stopping
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))
ax1.set_ylabel("R² Score")
ax1.legend(["Training", "Validation"])
ax2.legend(["Training", "Validation"])
Evaluate the model¶
y_pred = model.predict(X_test, verbose=0)
print(f"MSE = {mean_squared_error(y_test, y_pred):.2f}")
print(f"R² = {r2_score(y_test, y_pred):.2f}")
MSE = 30.78 R² = 0.89