1.遮挡物检测简介
不同的目标检测应用场景有不同的检测难点,小目标、多尺度以及背景复杂等问题,被遮挡的物体仍然是最先进的物体检测器面临的挑战。本文尝试解决待测目标相互遮挡带来的检测困难,对于人脸遮挡提出了一个名为 SEAM
的注意力模块并引入了排斥损失来解决它,引入了分离和增强注意力模块来增强Neck
层输出部分后被遮挡人脸的响应能力。
1.1 Separated and Enhancement Attention Module (SEAM)
即不同人脸之间的遮挡,以及其他物体对人脸的遮挡。前者使得检测精度对 NMS
阈值非常敏感,从而导致漏检。作者使用排斥损失进行人脸检测,它惩罚预测框转移到其他真实目标,并要求每个预测框远离具有不同指定目标的其他预测框,以使检测结果对 NMS
不太敏感。后者导致特征消失导致定位不准确,设计了注意力模块 SEAM
来增强人脸特征的学习。
1.2 MultiSEAM
解决多尺度问题的主要方法是构建金字塔来融合人脸的多尺度特征。例如,在 YOLOv5
中,FPN
融合了 P3
、P4
和 P5
层的特征。但是对于小尺度的目标,经过多层卷积后信息很容易丢失,保留的像素信息很少,即使在较浅的P3
层也是如此。因此,提高特征图的分辨率无疑有利于小目标的检测。
1.3遮挡感知排斥损失
RepGT Loss
的作用是使当前边界框尽可能远离周围的ground truth box
。 这里的周围ground truth box
是指除了bounding box
本身要返回的对象之外,与人脸IoU
最大的人脸标签。
2.YoloV8加入 SEAM、MultiSEAM注意力机制
2.1 SEAM、MultiSEAM加入加入modules.py
中:
核心代码:
代码语言:javascript复制class SEAM(nn.Module):
def __init__(self, c1, c2, n, reduction=16):
super(SEAM, self).__init__()
if c1 != c2:
c2 = c1
self.DCovN = nn.Sequential(
# nn.Conv2d(c1, c2, kernel_size=3, stride=1, padding=1, groups=c1),
# nn.GELU(),
# nn.BatchNorm2d(c2),
*[nn.Sequential(
Residual(nn.Sequential(
nn.Conv2d(in_channels=c2, out_channels=c2, kernel_size=3, stride=1, padding=1, groups=c2),
nn.GELU(),
nn.BatchNorm2d(c2)
)),
nn.Conv2d(in_channels=c2, out_channels=c2, kernel_size=1, stride=1, padding=0, groups=1),
nn.GELU(),
nn.BatchNorm2d(c2)
) for i in range(n)]
)
self.avg_pool = torch.nn.AdaptiveAvgPool2d(1)
self.fc = nn.Sequential(
nn.Linear(c2, c2 // reduction, bias=False),
nn.ReLU(inplace=True),
nn.Linear(c2 // reduction, c2, bias=False),
nn.Sigmoid()
)
self._initialize_weights()
# self.initialize_layer(self.avg_pool)
self.initialize_layer(self.fc)
def forward(self, x):
b, c, _, _ = x.size()
y = self.DCovN(x)
y = self.avg_pool(y).view(b, c)
y = self.fc(y).view(b, c, 1, 1)
y = torch.exp(y)
return x * y.expand_as(x)
def _initialize_weights(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.xavier_uniform_(m.weight, gain=1)
elif isinstance(m, nn.BatchNorm2d):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
def initialize_layer(self, layer):
if isinstance(layer, (nn.Conv2d, nn.Linear)):
torch.nn.init.normal_(layer.weight, mean=0., std=0.001)
if layer.bias is not None:
torch.nn.init.constant_(layer.bias, 0)
def DcovN(c1, c2, depth, kernel_size=3, patch_size=3):
dcovn = nn.Sequential(
nn.Conv2d(c1, c2, kernel_size=patch_size, stride=patch_size),
nn.SiLU(),
nn.BatchNorm2d(c2),
*[nn.Sequential(
Residual(nn.Sequential(
nn.Conv2d(in_channels=c2, out_channels=c2, kernel_size=kernel_size, stride=1, padding=1, groups=c2),
nn.SiLU(),
nn.BatchNorm2d(c2)
)),
nn.Conv2d(in_channels=c2, out_channels=c2, kernel_size=1, stride=1, padding=0, groups=1),
nn.SiLU(),
nn.BatchNorm2d(c2)
) for i in range(depth)]
)
return dcovn
class MultiSEAM(nn.Module):
def __init__(self, c1, c2, depth, kernel_size=3, patch_size=[3, 5, 7], reduction=16):
super(MultiSEAM, self).__init__()
if c1 != c2:
c2 = c1
self.DCovN0 = DcovN(c1, c2, depth, kernel_size=kernel_size, patch_size=patch_size[0])
self.DCovN1 = DcovN(c1, c2, depth, kernel_size=kernel_size, patch_size=patch_size[1])
self.DCovN2 = DcovN(c1, c2, depth, kernel_size=kernel_size, patch_size=patch_size[2])
self.avg_pool = torch.nn.AdaptiveAvgPool2d(1)
self.fc = nn.Sequential(
nn.Linear(c2, c2 // reduction, bias=False),
nn.ReLU(inplace=True),
nn.Linear(c2 // reduction, c2, bias=False),
nn.Sigmoid()
)
def forward(self, x):
b, c, _, _ = x.size()
y0 = self.DCovN0(x)
y1 = self.DCovN1(x)
y2 = self.DCovN2(x)
y0 = self.avg_pool(y0).view(b, c)
y1 = self.avg_pool(y1).view(b, c)
y2 = self.avg_pool(y2).view(b, c)
y4 = self.avg_pool(x).view(b, c)
y = (y0 y1 y2 y4) / 4
y = self.fc(y).view(b, c, 1, 1)
y = torch.exp(y)
return x * y.expand_as(x)
by CSDN AI小怪兽 http://cv2023.blog.csdn.net
我正在参与2023腾讯技术创作特训营第三期有奖征文,组队打卡瓜分大奖!