The first thing we need is a dataset (sample of photos) of people with and without masks for additional training of the MobileNetV2 neural network, which is in the public domain. I had a dataset of 981 photographs of masked people and the same number without, the same people.
I would like to note one important point that the MobileNetV2 neural network can be used for almost any classification, for example, it was possible to retrain it to determine gender, or try to automatically determine a person wearing glasses or not, which is why we freeze all the base layers of the model, and in the upper layer we serve what needs to be classified. But we will focus on the search for a medical mask, as the most relevant at the present time.
So, let's place our dataset of 1962 photos in two directories in the dataset folder with masks in "WithMask" and without a mask in "Withoutmask", respectively. Each contains 981 photographs. Another important note is that we are retraining it on the faces, and not just that the person in the image is wearing a mask or not, although it could have been so.
Next, we import the required libraries:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import AveragePooling2D
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Input
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.preprocessing.image import load_img
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from imutils import paths
import matplotlib.pyplot as plt
import numpy as np
import argparse
import os
# Specify initial hyperparameters
— , ,
INIT_LR = 0,004
— ,
EPOCHS = 20
— , .
BS = 32
, , , .
imagePaths = list(paths.list_images (r'C:\dataset')) #
data , labels = [] , []
for imagePath in imagePaths:
# ( )
label = imagePath.split(os.path.sep)[-2]
# 224224
image = load_img(imagePath, target_size = (224, 224))
image = img_to_array(image)
image = preprocess_input(image)
#
data.append(image)
labels.append(label)
# NumPy
data = np.array(data, dtype="float32")
labels = np.array(labels)
# , .. 0 1
lb = LabelBinarizer()
labels = lb.fit_transform(labels)
labels = to_categorical(labels)
# 80% 20%;
(trainX, testX, trainY, testY) = train_test_split(data, labels, test_size = 0.20, stratify = labels, random_state = 42)
#
aug = ImageDataGenerator(rotation_range = 20, zoom_range = 0.15,
width_shift_range = 0.2, height_shift_range = 0.2, shear_range=0.15, horizontal_flip = True, fill_mode = "nearest")
# c
path_weights = ‘mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_224_no_top.h5'
baseModel = MobileNetV2(weights=path_weights,
include_top=False, input_tensor=Input(shape=(224, 224, 3))
headModel = baseModel.output
headModel = AveragePooling2D(pool_size = (7, 7))(headModel)
headModel = Flatten(name = "flatten")(headModel)
headModel = Dense(128, activation = "relu")(headModel)
headModel = Dropout(0.5)(headModel)
headModel = Dense(2, activation = "softmax")(headModel)
model = Model(inputs = baseModel.input, outputs = headModel)
#
for layer in baseModel.layers:
layer.trainable = False
#
opt = Adam(lr = INIT_LR, decay = INIT_LR / EPOCHS)
model.compile(loss = "binary_crossentropy", optimizer = opt, metrics = ["accuracy"])
#
H = model.fit( aug.flow(trainX, trainY, batch_size = BS), steps_per_epoch = len(trainX) // BS,
validation_data = (testX, testY), validation_steps = len(testX) // BS, epochs = EPOCHS)
#
predIdxs = model.predict(testX, batch_size = BS)
# ,
predIdxs = np.argmax(predIdxs, axis=1)
#
print(classification_report(testY.argmax(axis = 1), predIdxs, target_names = lb.classes_))
#
model.save('model_mask_FACE', save_format = "h5")
model_mask = tf.keras.models.load_model('model_mask_FACE)
,
# , MTCNN
frame = cv2.cvtColor(cv2.imread(‘house.png'), cv2.COLOR_BGR2RGB)
frame_image = Image.fromarray(frame)
boxes, probs, landmarks = mtcnn.detect(frame_image, landmarks = True)
x1, y1, x2, y2 = [int(bx) for bx in boxes[0]]
image = Image.fromarray(frame[y1:y2, x1:x2]).resize((224,224))
face = img_to_array(image)
#
face = preprocess_input(face)
face = np.expand_dims(face, axis=0)
#
(mask, withoutMask) = model_mask.predict(face)[0]
image = cv2.imread(‘house.png’)
#
if mask > withoutMask and max(mask, withoutMask) > 0.8: #
label = "Mask" if mask > withoutMask else "No Mask"
color = (0, 122, 0) if label == "Mask" else (0, 0, 122)
label = "{}: {:.2f}%".format(label, max(mask, withoutMask) * 100)
cv2.putText(image, label, (x1, y1 - 10),cv2.FONT_HERSHEY_SIMPLEX, 2, color, 5)
cv2.rectangle(image, (x1, y1), (x2, y2), color, 5)
y = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
So, in this article we have shown how to retrain the MobileNetV2 neural network in order to classify the images of people with and without medical masks.