II. Objectives ( hang in the air )
Face mask recognition , Wear model adjustment , Complete the calibration of a specific face ( Can identify who wore the mask ).
method
Step 1 , object detection , Check whether the image is wearing a mask , And output classification label and location . Step 2 , Extract detection area , Face recognition .
one , preparation
A lot of face data online .
1,anaconda install , Find tutorials Online .
2, install labelImg, Baidu tutorial .
3, Environment configuration
import os import glob from xml.etree import ElementTree as ET import numpy as
np import skimage.color import skimage.io import skimage.transform import torch
from torch.utils.data import Dataset, DataLoader
4, data fetch
def xmlData_Read(self): # Two types of data: wearing masks and not wearing masks ,face_with_mask and face_without_mask
xml_list = glob.glob(self.path + '/*.xml') for i,xml in enumerate(xml_list):
target = {'id': '', 'IsMasked': "", 'boxes': ''} img_info = {'id': '', 'path':
'', 'fileName': '', 'width': '', 'height': '', 'depth': ''} tree =
ET.parse(xml) root = tree.getroot() # Add picture name and number target['id'] = str(i + 1)
img_info['id'] = str(i + 1) img_info['fileName'] = root.find('filename').text
img_info['path'] = root.find('path').text # Picture size for size in
root.findall('size'): img_info['width'] = size.find('width').text
img_info['height'] = size.find('height').text img_info['depth'] =
size.find('depth').text # Reconstruct picture size , And rewrite xml file if img_info['width'] != '256' and
img_info['height'] != 256: image =
skimage.transform.resize(skimage.io.imread(root.find('path').text), (256, 256))
# If grayscale. Convert to RGB for consistency. if image.ndim != 3: image =
skimage.color.gray2rgb(image) skimage.io.imsave(root.find('path').text,image)
for size in root.findall('size'): size.find('width').text = '256'
size.find('height').text = '256' size.find('depth').text = '3' tree.write(xml)
for ob in root.findall('object'): if 'face_with_mask' == ob.find('name').text:
target['IsMasked'] = '1' elif 'face_without_mask' == ob.find('name').text:
target['IsMasked'] = '0' for bndbox in ob.iter('bndbox'): box = []
box.append(int(bndbox.find('xmin').text))
box.append(int(bndbox.find('ymin').text))
box.append(int(bndbox.find('xmax').text))
box.append(int(bndbox.find('ymax').text)) target['boxes'] = box
self.img_Cont_Info.append(target) self.img_Info.append(img_info) # picture resize def
resize(self,path): img_list = os.listdir(os.path.join(path)) for i in img_list:
image = skimage.transform.resize(skimage.io.imread(path + '/' + i), (256, 256),
) skimage.io.imsave(path + '/' + i, image.astype(np.uint8)) return # Read image def
__getitem__(self, image_id): image_id = int(image_id) image =
skimage.io.imread(self.img_Info[image_id-1]['path']) if image.shape !=
(256,256,3): image = skimage.transform.resize(image, (256, 256)) # If
grayscale. Convert to RGB for consistency. if image.ndim != 3: image =
skimage.color.gray2rgb(image) # If has an alpha channel, remove it for
consistency if image.shape[-1] == 4: image = image[... :3] #
Convert all to torch.Tensor box =
torch.as_tensor(self.img_Cont_Info[image_id-1]['boxes'], dtype=torch.float32)
label = torch.as_tensor(int(self.img_Cont_Info[image_id-1]['IsMasked']),
dtype=torch.int64) image_id =
torch.as_tensor(int(self.img_Cont_Info[image_id-1]['id'])) target = [] #
target["image_id"] = image_id # target["label"] = label # target["boxes"] = box
target.append(label) target[1:1]=box if self.transforms is not None: image =
self.transforms(image) target = torch.Tensor(target).long() return image, target
two , Network construction (VGG11)
# N Is batch size ; D_in Yes input dimension ; # H Is a hidden dimension ; D_out Is the output dimension . N, D_in, H, D_out = 64, 65536,
100, 5 cfg = {'VGG11': [32, 'M', 64, 'M', 128, 128, 'M', 256, 256, 'M', 256,
256, 'M']} class VGG_Net(nn.Module): def __init__(self, vgg_name):
super(VGG_Net, self).__init__() self.features =
self._make_layers(cfg[vgg_name]) self.classifier = nn.Linear(16384, 5) def
forward(self, x): out = self.features(x) out = out.view(out.size(0), -1) out =
self.classifier(out) return out def num_flat_features(self, x): size =
x.size()[1:] # all dimensions except the batch dimension num_features = 1 for s
in size: num_features *= s return num_features def _make_layers(self, cfg):
layers = [] in_channels = 3 for x in cfg: if x == 'M': layers +=
[nn.MaxPool2d(kernel_size=2, stride=2)] else: layers += [nn.Conv2d(in_channels,
x, kernel_size=3, padding=1), nn.BatchNorm2d(x), nn.ReLU(inplace=True)]
in_channels = x layers += [nn.AvgPool2d(kernel_size=1, stride=1)] return
nn.Sequential(*layers)
three , model training
def train(epoch,net,trainloader,use_cuda,optimizer,criterion):
print('\nEpoch: %d' % epoch) net.train() train_loss = 0 correct = 0 total = 0
for batch_idx, (inputs, targets) in enumerate(trainloader): # Move data to GPU upper if
use_cuda: inputs, targets = inputs.cuda(), targets.cuda() # First optimizer Gradient set to 0
optimizer.zero_grad() # Variable Indicates that the variable is part of the calculation diagram , Here is the beginning of the graph calculation . Graph leaf variable
inputs, targets = Variable(inputs), Variable(targets) # Model output outputs =
net(inputs) # calculation loss, At the end of the graph print(outputs) print(targets) loss =
criterion(outputs, targets) # Back propagation , Calculate gradient loss.backward() # Update parameters optimizer.step()
# Note if you want to count loss, Do not use directly loss Addition , Instead, use loss.data[0]. because loss Is part of the calculation diagram , If you just add loss, representative total
loss Also part of the model , Then the picture gets bigger and bigger train_loss += loss.data[0] # data statistics _, predicted =
torch.max(outputs.data, 1) total += targets.size(0) correct +=
predicted.eq(targets.data).cpu().sum() progress_bar(batch_idx,
len(trainloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)' % (train_loss / (batch_idx
+ 1), 100. * correct / total, correct, total)) def main(): # from utils import
progress_bar use_cuda = torch.cuda.is_available() best_acc = 0 # best test
accuracy start_epoch = 0 # start from epoch 0 or last checkpoint epoch #
Get dataset , And conduct pretreatment first print('==> Preparing data..') transform_test =
transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.4914,
0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) tran = transforms.ToTensor()
trainset = DR.customDataset('C:/Users/ Small z\Desktop\label_data', tran)
trainset.xmlData_Read() trainloader = data.DataLoader(trainset, batch_size=5,
shuffle=False, num_workers=2) testset =
DR.customDataset('C:/Users/ Small z/Desktop/test', tran) testset.xmlData_Read()
testloader = data.DataLoader(testset, batch_size=2, num_workers=2) classes =
['face_with_mask', 'face_without_mask'] net = model.VGG_Net('VGG11') if
use_cuda: # move param and buffer to GPU net.cuda() # parallel use GPU net =
torch.nn.DataParallel(net) # speed up slightly cudnn.benchmark = True # Define metrics and optimizations
criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(net.parameters(),
lr=0.1, momentum=0.9, weight_decay=5e-4) # Operational model for epoch in range(start_epoch,
start_epoch+200): train(epoch,net,trainloader,use_cuda,optimizer,criterion)
torch.save(net.state_dict(), './checkpoint/') # test(epoch) # Clear some useless variables
torch.cuda.empty_cache()
Stuck in this section , There is a problem
The first is the model output, The second is target(IsMasked,xmin,ymin,xmax,ymax), There is something wrong with the two outputs , I hope you can give me some advice .
Technology