VGG在我之前的博客中已经做过详解,详情见:https://blog.csdn.net/muye_IT/article/details/123797416
代码已提交github,详情见(麻烦Star!):回复:深度学习3 即可获取链接
1. VGG16
VGG网络有多个版本,一般常用的是VGG-16模型,其网络结构如下如所示:
2. 使用PyTorch搭建VGG网络
2.1 model.py
VGG网络分为 卷积层提取特征 和 全连接层进行分类 这两个模块
代码语言:javascript复制import torch.nn as nn
import torch
class VGG(nn.Module):
def __init__(self, features, num_classes=1000, init_weights=False):#
super(VGG, self).__init__()
self.features = features # 卷积层提取特征
self.classifier = nn.Sequential( # 3层全连接层进行分类
nn.Dropout(p=0.5),
nn.Linear(512*7*7, 2048),
nn.ReLU(True),
nn.Dropout(p=0.5),
nn.Linear(2048, 2048),
nn.ReLU(True),
nn.Linear(2048, num_classes)
)
if init_weights:
self._initialize_weights()
def forward(self, x):
# N x 3 x 224 x 224
x = self.features(x)
# N x 512 x 7 x 7
x = torch.flatten(x, start_dim=1)
# N x 512*7*7
x = self.classifier(x)
return x
def _initialize_weights(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
# nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
nn.init.xavier_uniform_(m.weight)
if m.bias is not None:
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.Linear):
nn.init.xavier_uniform_(m.weight)
# nn.init.normal_(m.weight, 0, 0.01)
nn.init.constant_(m.bias, 0)
但是VGG网络有 VGG-13、VGG-16等多种网络结构,我们能不能将这几种结构通过代码集合成一个模型呢? 以上图的A、B、D、E模型为例,其全连接层完全一样,卷积层只有卷积核个数稍有不同。
代码语言:javascript复制# vgg网络模型配置列表,数字表示卷积核个数,'M'表示最大池化层
cfgs = {
'vgg11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], # 模型A
'vgg13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], # 模型B
'vgg16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'], # 模型D
'vgg19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'], # 模型E
}
# 卷积层提取特征
def make_features(cfg: list): # 传入的是具体某个模型的参数列表
layers = []
in_channels = 3 # 输入的原始图像(rgb三通道)
for v in cfg:
# 最大池化层
if v == "M":
layers = [nn.MaxPool2d(kernel_size=2, stride=2)]
# 卷积层
else:
conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)
layers = [conv2d, nn.ReLU(True)]
in_channels = v
return nn.Sequential(*layers) # 单星号(*)将参数以元组(tuple)的形式导入
def vgg(model_name="vgg16", **kwargs): # 双星号(**)将参数以字典的形式导入
try:
cfg = cfgs[model_name]
except:
print("Warning: model number {} not in cfgs dict!".format(model_name))
exit(-1)
model = VGG(make_features(cfg), **kwargs)
return model
扩展:Python 函数参数前面一个星号(*)和两个星号(**)的区别
在 Python 的函数中经常能看到输入的参数前面有一个或者两个星号,例如:
代码语言:javascript复制def foo(param1, *param2):
def bar(param1, **param2):
这两种用法其实都是用来将任意个数的参数导入到 Python 函数中。 单星号():agrs 将所有参数以元组(tuple)的形式导入:
代码语言:javascript复制def foo(param1, *param2):
print (param1)
print (param2)
foo(1,2,3,4,5)
以上代码输出结果为:
代码语言:javascript复制1
(2, 3, 4, 5)
双星号():kwargs 双星号(**)将参数以字典的形式导入:
代码语言:javascript复制def bar(param1, **param2):
print (param1)
print (param2)
bar(1,a=2,b=3)
以上代码输出结果为:
代码语言:javascript复制1
{'a': 2, 'b': 3}
此外,单星号的另一个用法是解压参数列表:
代码语言:javascript复制
def foo(runoob_1, runoob_2):
print(runoob_1, runoob_2)
l = [1, 2]
foo(*l)
以上代码输出结果为:
代码语言:javascript复制1 2
当然这两个用法可以同时出现在一个函数中:
代码语言:javascript复制def foo(a, b=10, *args, **kwargs):
print (a)
print (b)
print (args)
print (kwargs)
foo(1, 2, 3, 4, e=5, f=6, g=7)
以上代码输出结果为:
代码语言:javascript复制1
2
(3, 4)
{'e': 5, 'f': 6, 'g': 7}
2.2 train.py
训练脚本跟上一篇AlexNet基本一致,需要注意的是实例化网络的过程:
代码语言:javascript复制model_name = "vgg16"
net = vgg(model_name=model_name, num_classes=5, init_weights=True)
函数调用关系:
代码语言:javascript复制net = vgg(model_name="vgg16", num_classes=5, init_weights=True)
cfg = cfgs[model_name]
= cfgs[vgg16] = [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M']
model = VGG(make_features(cfg), num_classes=5, init_weights=True)
make_features(cfg: list)
2.3 predict.py
train.py ——加载数据集并训练,训练集计算loss,测试集计算accuracy,保存训练好的网络参数
2.3.1 相关包的加载
代码语言:javascript复制import os
import sys
import json
import torch
import torch.nn as nn
from torchvision import transforms, datasets
import torch.optim as optim
from tqdm import tqdm
from model import vgg
2.3.2 数据预处理
代码语言:javascript复制data_transform = {
"train": transforms.Compose([transforms.RandomResizedCrop(224),#随机裁剪
transforms.RandomHorizontalFlip(),#随机水平翻转
transforms.ToTensor(),#转成Tensor格式
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]),#标准化处理
"val": transforms.Compose([transforms.Resize((224, 224)),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])}
2.3.3 加载训练集
代码语言:javascript复制 # 获取图像数据集的路径
data_root = os.path.abspath(os.path.join(os.getcwd(), "../..")) # get data root path
image_path = os.path.join(data_root, "data_set", "flower_data") # flower data set path
assert os.path.exists(image_path), "{} path does not exist.".format(image_path)
# 导入训练集并进行预处理
train_dataset = datasets.ImageFolder(root=os.path.join(image_path, "train"),
transform=data_transform["train"])
train_num = len(train_dataset)
#为了方便在 predict 时读取信息,将 索引:标签 存入到一个 json 文件中
# 字典,类别:索引 {'daisy':0, 'dandelion':1, 'roses':2, 'sunflower':3, 'tulips':4}
flower_list = train_dataset.class_to_idx
# 将 flower_list 中的 key 和 val 调换位置
cla_dict = dict((val, key) for key, val in flower_list.items())
# 将 cla_dict 写入 json 文件中
json_str = json.dumps(cla_dict, indent=4)
with open('class_indices.json', 'w') as json_file:
json_file.write(json_str)
batch_size = 64
nw =0 # number of workers
print('Using {} dataloader workers every process'.format(nw))
# 按batch_size分批次加载训练集
train_loader = torch.utils.data.DataLoader(train_dataset,
batch_size=batch_size, shuffle=True,
num_workers=nw)
2.3.4 加载验证集
代码语言:javascript复制 validate_dataset = datasets.ImageFolder(root=os.path.join(image_path, "val"),
transform=data_transform["val"])
val_num = len(validate_dataset)
validate_loader = torch.utils.data.DataLoader(validate_dataset,
batch_size=4, shuffle=False,
num_workers=nw)
2.3.5 训练网络与验证网络
代码语言:javascript复制 model_name = "vgg16"
net = vgg(model_name=model_name, num_classes=5, init_weights=True)
net.to(device)
loss_function = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=0.0001)
epochs = 30
best_acc = 0.0
save_path = './{}Net.pth'.format(model_name)
train_steps = len(train_loader)
for epoch in range(epochs):
# train
net.train()
running_loss = 0.0
train_bar = tqdm(train_loader, file=sys.stdout)
for step, data in enumerate(train_bar):
images, labels = data
optimizer.zero_grad()
outputs = net(images.to(device))
loss = loss_function(outputs, labels.to(device))
loss = loss.requires_grad_(True)
loss.backward()
optimizer.step()
# print statistics
running_loss = loss.item()
train_bar.desc = "train epoch[{}/{}] loss:{:.3f}".format(epoch 1,
epochs,
loss)
# validate
net.eval()
acc = 0.0 # accumulate accurate number / epoch
with torch.no_grad():
val_bar = tqdm(validate_loader, file=sys.stdout)
for val_data in val_bar:
val_images, val_labels = val_data
outputs = net(val_images.to(device))
predict_y = torch.max(outputs, dim=1)[1]
acc = torch.eq(predict_y, val_labels.to(device)).sum().item()
val_accurate = acc / val_num
print('[epoch %d] train_loss: %.3f val_accuracy: %.3f' %
(epoch 1, running_loss / train_steps, val_accurate))
if val_accurate > best_acc:
best_acc = val_accurate
torch.save(net.state_dict(), save_path)
print('Finished Training')
if hasattr(torch.cuda, 'empty_cache'):
torch.cuda.empty_cache()
if __name__ == '__main__':
main()
2.3.6 完整代码
代码语言:javascript复制import os
import sys
import json
import torch
import torch.nn as nn
from torchvision import transforms, datasets
import torch.optim as optim
from tqdm import tqdm
from model import vgg
def main():
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("using {} device.".format(device))
data_transform = {
"train": transforms.Compose([transforms.RandomResizedCrop(224),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]),
"val": transforms.Compose([transforms.Resize((224, 224)),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])}
data_root = os.path.abspath(os.path.join(os.getcwd(), "../..")) # get data root path
image_path = os.path.join(data_root, "data_set", "flower_data") # flower data set path
assert os.path.exists(image_path), "{} path does not exist.".format(image_path)
train_dataset = datasets.ImageFolder(root=os.path.join(image_path, "train"),
transform=data_transform["train"])
train_num = len(train_dataset)
# {'daisy':0, 'dandelion':1, 'roses':2, 'sunflower':3, 'tulips':4}
flower_list = train_dataset.class_to_idx
cla_dict = dict((val, key) for key, val in flower_list.items())
# write dict into json file
json_str = json.dumps(cla_dict, indent=4)
with open('class_indices.json', 'w') as json_file:
json_file.write(json_str)
batch_size = 32
nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) # number of workers
print('Using {} dataloader workers every process'.format(nw))
train_loader = torch.utils.data.DataLoader(train_dataset,
batch_size=batch_size, shuffle=True,
num_workers=nw)
validate_dataset = datasets.ImageFolder(root=os.path.join(image_path, "val"),
transform=data_transform["val"])
val_num = len(validate_dataset)
validate_loader = torch.utils.data.DataLoader(validate_dataset,
batch_size=batch_size, shuffle=False,
num_workers=nw)
print("using {} images for training, {} images for validation.".format(train_num,
val_num))
# test_data_iter = iter(validate_loader)
# test_image, test_label = test_data_iter.next()
model_name = "vgg16"
net = vgg(model_name=model_name, num_classes=5, init_weights=True)
net.to(device)
loss_function = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=0.0001)
epochs = 30
best_acc = 0.0
save_path = './{}Net.pth'.format(model_name)
train_steps = len(train_loader)
for epoch in range(epochs):
# train
net.train()
running_loss = 0.0
train_bar = tqdm(train_loader, file=sys.stdout)
for step, data in enumerate(train_bar):
images, labels = data
optimizer.zero_grad()
outputs = net(images.to(device))
loss = loss_function(outputs, labels.to(device))
loss = loss.requires_grad_(True)
loss.backward()
optimizer.step()
# print statistics
running_loss = loss.item()
train_bar.desc = "train epoch[{}/{}] loss:{:.3f}".format(epoch 1,
epochs,
loss)
# validate
net.eval()
acc = 0.0 # accumulate accurate number / epoch
with torch.no_grad():
val_bar = tqdm(validate_loader, file=sys.stdout)
for val_data in val_bar:
val_images, val_labels = val_data
outputs = net(val_images.to(device))
predict_y = torch.max(outputs, dim=1)[1]
acc = torch.eq(predict_y, val_labels.to(device)).sum().item()
val_accurate = acc / val_num
print('[epoch %d] train_loss: %.3f val_accuracy: %.3f' %
(epoch 1, running_loss / train_steps, val_accurate))
if val_accurate > best_acc:
best_acc = val_accurate
torch.save(net.state_dict(), save_path)
print('Finished Training')
if hasattr(torch.cuda, 'empty_cache'):
torch.cuda.empty_cache()
if __name__ == '__main__':
main()