在上一篇文章:CNN实战(一):pytorch处理图像数据(Dataset和Dataloader)里,大致介绍了怎么利用pytorch把猫狗图片处理成CNN需要的数据,本篇文章主要用该数据对自己定义的CNN模型进行训练及测试。
阅读本文前需要参考CNN模型的基本原理:一文读懂卷积神经网络(CNN)
•首先导入需要的包:
代码语言:javascript复制import torch
from torch import optim
import torch.nn as nn
from torch.autograd import Variable
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from PIL import Image
•定义自己的CNN网络
代码语言:javascript复制class cnn(nn.Module):
def __init__(self):
super(cnn, self).__init__() # 继承__init__功能
# 第一层卷积
# 输入3通道图片,输出32通道,卷积核大小为3,步长为1,最大池化
self.conv1 = nn.Sequential(
nn.Conv2d(
in_channels=3,
out_channels=32,
kernel_size=3,
stride=1,
),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2),
)
# 第二层卷积
# 输入32通道图片,输出64通道,卷积核大小为3,步长为1,最大池化
self.conv2 = nn.Sequential(
nn.Conv2d(
in_channels=32,
out_channels=64,
kernel_size=3,
stride=1,
padding=0
),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2),
)
# 第三层卷积
# 输入64通道图片,输出128通道,卷积核大小为3,步长为1,最大池化
self.conv3 = nn.Sequential(
nn.Conv2d(
in_channels=64,
out_channels=128,
kernel_size=3,
stride=1,
padding=0
),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2),
# nn.Dropout(0.1)
)
# 第四层卷积
# 输入128通道图片,输出256通道,卷积核大小为3,步长为1,最大池化
self.conv4 = nn.Sequential(
nn.Conv2d(
in_channels=128,
out_channels=256,
kernel_size=3,
stride=1,
padding=0
),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2),
)
# 输出分类信息
self.output = nn.Linear(in_features=256 * 14 * 14, out_features=2)
# 前向传播,依次计算
def forward(self, x):
x = self.conv1(x)
x = self.conv2(x)
x = self.conv3(x)
x = self.conv4(x)
temp = x.view(x.shape[0], -1)
output = self.output(temp)
return output, x
•训练(GPU)
代码语言:javascript复制def train():
train_loader, test_loader = load_data()
epoch_num = 30
# GPU计算
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = cnn().to(device)
# Adam优化
optimizer = optim.Adam(model.parameters(), lr=0.00005)
# 交叉熵损失函数
criterion = nn.CrossEntropyLoss().to(device)
for epoch in range(epoch_num):
for batch_idx, (data, target) in enumerate(train_loader, 0):
data, target = Variable(data).to(device), Variable(target.long()).to(device)
optimizer.zero_grad() # 梯度清0
output = model(data)[0] # 前向传播
loss = criterion(output, target) # 计算误差
loss.backward() # 反向传播
optimizer.step() # 更新参数
if batch_idx % 10 == 0:
print('Epoch: {} [{}/{} ({:.0f}%)]tLoss: {:.6f}'.format(
epoch, batch_idx * len(data), len(train_loader.dataset),
100. * batch_idx / len(train_loader), loss.item()))
# 模型保存
torch.save(model, 'cnn.pkl')
一共训练30轮,训练的步骤如下:
1.初始化模型:
代码语言:javascript复制model = cnn().to(device)
2.选择优化器以及优化算法,这里选择了Adam:
代码语言:javascript复制optimizer = optim.Adam(model.parameters(), lr=0.00005)
3.选择损失函数,这里选择了交叉熵:
代码语言:javascript复制criterion = nn.CrossEntropyLoss().to(device)
4.对每一个batch里的数据,先将它们转成能被GPU计算的类型:
代码语言:javascript复制data, target = Variable(data).to(device), Variable(target.long()).to(device)
5.梯度清零、前向传播、计算误差、反向传播、更新参数:
代码语言:javascript复制optimizer.zero_grad() # 梯度清0
output = model(data)[0] # 前向传播
loss = criterion(output, target) # 计算误差
loss.backward() # 反向传播
optimizer.step() # 更新参数
•测试(GPU)
代码语言:javascript复制def test():
train_loader, test_loader = load_data()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = torch.load('cnn.pkl') # load model
total = 0
current = 0
for data in test_loader:
images, labels = data
images, labels = images.to(device), labels.to(device)
outputs = model(images)[0]
predicted = torch.max(outputs.data, 1)[1].data
total = labels.size(0)
current = (predicted == labels).sum()
print('Accuracy:%d%%' % (100 * current / total))
一开始只是进行了3轮训练:
随后训练20轮:
训练30轮:
附上完整代码:
代码语言:javascript复制# -*- coding: utf-8 -*-
"""
@Time :2021/8/18 9:11
@Author :KI
@File :CNN.py
@Motto:Hungry And Humble
"""
import torch
from torch import optim
import torch.nn as nn
from torch.autograd import Variable
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from PIL import Image
def Myloader(path):
return Image.open(path).convert('RGB')
# 得到一个包含路径与标签的列表
def init_process(path, lens):
data = []
name = find_label(path)
for i in range(lens[0], lens[1]):
data.append([path % i, name])
return data
class MyDataset(Dataset):
def __init__(self, data, transform, loder):
self.data = data
self.transform = transform
self.loader = loder
def __getitem__(self, item):
img, label = self.data[item]
img = self.loader(img)
img = self.transform(img)
return img, label
def __len__(self):
return len(self.data)
def find_label(str):
first, last = 0, 0
for i in range(len(str) - 1, -1, -1):
if str[i] == '%' and str[i - 1] == '.':
last = i - 1
if (str[i] == 'c' or str[i] == 'd') and str[i - 1] == '/':
first = i
break
name = str[first:last]
if name == 'dog':
return 1
else:
return 0
def load_data():
transform = transforms.Compose([
transforms.RandomHorizontalFlip(p=0.3),
transforms.RandomVerticalFlip(p=0.3),
transforms.Resize((256, 256)),
transforms.ToTensor(),
transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)) # 归一化
])
path1 = 'cnn_data/data/training_data/cats/cat.%d.jpg'
data1 = init_process(path1, [0, 500])
path2 = 'cnn_data/data/training_data/dogs/dog.%d.jpg'
data2 = init_process(path2, [0, 500])
path3 = 'cnn_data/data/testing_data/cats/cat.%d.jpg'
data3 = init_process(path3, [1000, 1200])
path4 = 'cnn_data/data/testing_data/dogs/dog.%d.jpg'
data4 = init_process(path4, [1000, 1200])
# 1300个训练
train_data = data1 data2 data3[0:150] data4[0:150]
train = MyDataset(train_data, transform=transform, loder=Myloader)
# 100个测试
test_data = data3[150:200] data4[150:200]
test = MyDataset(test_data, transform=transform, loder=Myloader)
train_data = DataLoader(dataset=train, batch_size=10, shuffle=True, num_workers=0)
test_data = DataLoader(dataset=test, batch_size=1, shuffle=True, num_workers=0)
return train_data, test_data
class cnn(nn.Module):
def __init__(self):
super(cnn, self).__init__() # 继承__init__功能
# 第一层卷积
self.conv1 = nn.Sequential(
nn.Conv2d(
in_channels=3,
out_channels=32,
kernel_size=3,
stride=1,
),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2),
)
# 第二层卷积
self.conv2 = nn.Sequential(
nn.Conv2d(
in_channels=32,
out_channels=64,
kernel_size=3,
stride=1,
padding=0
),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2),
)
# 第三层卷积
self.conv3 = nn.Sequential(
nn.Conv2d(
in_channels=64,
out_channels=128,
kernel_size=3,
stride=1,
padding=0
),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2),
)
# 第四层卷积
self.conv4 = nn.Sequential(
nn.Conv2d(
in_channels=128,
out_channels=256,
kernel_size=3,
stride=1,
padding=0
),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2),
)
self.output = nn.Linear(in_features=256 * 14 * 14, out_features=2)
def forward(self, x):
x = self.conv1(x)
x = self.conv2(x)
x = self.conv3(x)
x = self.conv4(x)
temp = x.view(x.shape[0], -1)
output = self.output(temp)
return output, x
def train():
train_loader, test_loader = load_data()
epoch_num = 30
# GPU计算
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = cnn().to(device)
optimizer = optim.Adam(model.parameters(), lr=0.00005)
criterion = nn.CrossEntropyLoss().to(device)
for epoch in range(epoch_num):
for batch_idx, (data, target) in enumerate(train_loader, 0):
data, target = Variable(data).to(device), Variable(target.long()).to(device)
optimizer.zero_grad() # 梯度清0
output = model(data)[0] # 前向传播
loss = criterion(output, target) # 计算误差
loss.backward() # 反向传播
optimizer.step() # 更新参数
if batch_idx % 10 == 0:
print('Epoch: {} [{}/{} ({:.0f}%)]tLoss: {:.6f}'.format(
epoch, batch_idx * len(data), len(train_loader.dataset),
100. * batch_idx / len(train_loader), loss.item()))
torch.save(model, 'cnn.pkl')
def test():
train_loader, test_loader = load_data()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = torch.load('cnn.pkl') # load model
total = 0
current = 0
for data in test_loader:
images, labels = data
images, labels = images.to(device), labels.to(device)
outputs = model(images)[0]
predicted = torch.max(outputs.data, 1)[1].data
total = labels.size(0)
current = (predicted == labels).sum()
print('Accuracy:%d%%' % (100 * current / total))
if __name__ == '__main__':
train()
test()