Fashion_minst 是之前介绍 Tensorflow 时用过的数据集。下面用 PyTorch 再跑它一遍 ,用作 PyTorch 的入门示例。
深度神经网络的训练和监控-- FashionMNIST_train.py:
代码语言:javascript复制import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor
# Download training data from open datasets.
training_data = datasets.FashionMNIST(
root="data",
train=True,
download=True,
transform=ToTensor(),
)
# Download test data from open datasets.A
test_data = datasets.FashionMNIST(
root="data",
train=True,
download=True,
transform=ToTensor(),
)
# Create data loaders.
train_dataloader = DataLoader(training_data, batch_size=4, shuffle=True)
test_dataloader = DataLoader(test_data, batch_size=256)
# Define model
class NeuralNetwork(nn.Module):
def __init__(self):
super(NeuralNetwork, self).__init__()
conv1_outs = 16
# 1 为in_chanels, 第一个卷积层时为图片的chanel数(黑白为1,彩色为3或4)
self.conv1 = nn.Sequential(
nn.Conv2d(in_channels=1, out_channels=conv1_outs, kernel_size=(2, 2), stride=(1, 1)), # out 8*27*27
nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2)), # 池化stride 默认等于核尺寸 # out 8*13*13
nn.BatchNorm2d(num_features=conv1_outs, eps=1e-5, momentum=0.1, affine=True), # num_features 跟随卷积的参数out_channels
nn.ReLU(inplace=True) # 不改变 size
)
conv2_outs = 32
"""机器学习中,进行模型训练之前,需对数据做归一化处理,使其分布一致。在深度神经网络训练过程中,通常一次训练是一个batch,而非全体数据。
每个batch具有不同的分布产生了internal covarivate shift问题——在训练过程中,数据分布会发生变化,对下一层网络的学习带来困难。Batch
Normalization将数据拉回到标准正态分布上(归一化),一方面使得数据分布一致,另一方面避免梯度消失、梯度爆炸。BatchNorm2d 不改变 size"""
self.conv2 = nn.Sequential(
nn.Conv2d(in_channels=conv1_outs, out_channels=conv2_outs, kernel_size=(2, 2), stride=(1, 1)), # out 32*12*12
nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2)), # 池化stride 默认等于核尺寸 # out 32*6*6
nn.BatchNorm2d(num_features=conv2_outs, eps=1e-5, momentum=0.1, affine=True),
nn.ReLU(inplace=True) # in place 可节省存储空间
)
# self.conv2 = nn.Conv2d(in_channels=8, out_channels=16, kernel_size=(2, 2))
self.flatten = nn.Flatten()
self.fcs = nn.Sequential(
nn.Linear(1152, 512), # 1152 = 32*6*6 # 第一个全连接层的input尺寸要特别计算
nn.ReLU(inplace=True),
nn.Linear(512, 512),
nn.ReLU(inplace=True),
nn.Linear(512, 10),
nn.ReLU(inplace=True)
)
# self.relu = nn.ReLU()
# 也可不使用nn.Sequential
#self.fc1 = nn.Linear(2304, 512) # 2304 = 16*12*12
#self.fc2 = nn.Linear(512, 512)
#self.fc3 = nn.Linear(512, 10)
def forward(self, x):
x = self.conv1(x)
x = self.conv2(x)
x = self.flatten(x) # 扁平化时会保留第一维度(batch),其它维度展平。最终成二维张量
x = self.fcs(x)
"""nn.ReLu is a class, not a function. When you do x=nn.ReLu(x) you are instantiating the class nn.ReLu,
not computing a relu. You can either replace nn.ReLu by it’s corresponding nn.functional.relu or to instantiate
the activation in the init:self.relu=nn.ReLu() and replace x = nn.ReLU(x) by ``x=self.relu(x)"""
# x = self.relu(self.fc1(x))
# x = self.relu(self.fc2(x))
# x = self.relu(self.fc3(x))
return x
def train(dataloader, model, loss_fn, optimizer):
size = len(dataloader.dataset)
num_batches = len(dataloader)
train_loss, correct = 0, 0
model.train()
for batch, (X, y) in enumerate(dataloader):
X, y = X.to(device), y.to(device)
# Compute prediction error
pred = model(X)
loss = loss_fn(pred, y)
correct = (pred.argmax(1) == y).type(torch.float).sum().item() ##
# Backpropagation
optimizer.zero_grad()
loss.backward()
optimizer.step()
train_loss = loss.item() # only for monitor
# if batch % 100 == 0: # 训练监控
# loss, current = loss.item(), batch * len(X)
# print(f"loss: {loss:>7f} [{current:>5d}/{size:>5d}]")
train_loss /= num_batches
correct /= size
train_Avg_loss.append(train_loss)
train_Accuracy.append(100*correct)
print(f"training Accuracy: {(100*correct):>0.1f}")
def test(dataloader, model, loss_fn):
size = len(dataloader.dataset)
num_batches = len(dataloader)
model.eval()
test_loss, correct = 0, 0
with torch.no_grad():
for X, y in dataloader:
X, y = X.to(device), y.to(device)
pred = model(X)
test_loss = loss_fn(pred, y).item()
correct = (pred.argmax(1) == y).type(torch.float).sum().item()
test_loss /= num_batches
correct /= size
test_Avg_loss.append(test_loss)
test_Accuracy.append(100*correct)
print(f"Test Accuracy: {(100*correct):>0.1f}%, Test Avg loss: {test_loss:>8f}n")
if __name__ == "__main__":
for X, y in test_dataloader:
print(f"Shape of X [N, C, H, W]: {X.shape}")
print(f"Shape of y: {y.shape} {y.dtype}")
break
# Get cpu or gpu device for training.
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")
# device = "cpu"
model = NeuralNetwork().to(device)
print(model)
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)
# 设置动态学习率,每step_size 个 epochs后, lr *= gamma 。
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.5)
from matplotlib import pyplot as plt
from matplotlib import ticker
train_Accuracy = []
train_Avg_loss = []
test_Accuracy = []
test_Avg_loss = []
epochs = 40
for t in range(epochs):
print(f"Epoch {t 1}:")
print(f"current learning rate is {scheduler.get_last_lr()[0]}")
train(train_dataloader, model, loss_fn, optimizer)
scheduler.step()
test(test_dataloader, model, loss_fn)
print("Done.")
torch.save(model.state_dict(), "FashionMNIST_mode.pth")
print("Saved PyTorch Model State to model13.pth")
# 绘图显示正确率和平均损失
plt.subplot(2, 1, 1)
plt.plot(range(1, epochs 1), train_Accuracy, "r-", label="train_Accuracy")
plt.plot(range(1, epochs 1), test_Accuracy, "b-", label="test_Accuracy")
plt.xlabel("Epoch")
xticker_formatter = ticker.FuncFormatter(lambda x, pos: "%d" % x)
plt.gca().xaxis.set_major_formatter(xticker_formatter)
plt.ylabel("Accuracy[%]")
plt.legend()
plt.grid()
plt.subplot(2, 1, 2)
plt.plot(range(1, epochs 1), train_Avg_loss, "r-", label="train_Avg_loss")
plt.plot(range(1, epochs 1), test_Avg_loss, "b-", label="test_Avg_loss")
plt.xlabel("Epoch")
plt.gca().xaxis.set_major_formatter(xticker_formatter)
plt.ylabel("Avg_loss")
plt.legend()
plt.grid()
plt.savefig("Accuracy and loss plot.png")
plt.show()
代码中神经网络的结构包含两个卷积序列(含卷积,Pooling,批归一化和ReLu)和一个全连接层序列(含3个全连接层和ReLU):
代码语言:javascript复制NeuralNetwork(
(conv1): Sequential(
(0): Conv2d(1, 16, kernel_size=(2, 2), stride=(1, 1))
(1): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
(2): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(3): ReLU(inplace=True)
)
(conv2): Sequential(
(0): Conv2d(8, 32, kernel_size=(2, 2), stride=(1, 1))
(1): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
(2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(3): ReLU(inplace=True)
)
(flatten): Flatten(start_dim=1, end_dim=-1)
(fcs): Sequential(
(0): Linear(in_features=1152, out_features=512, bias=True)
(1): ReLU(inplace=True)
(2): Linear(in_features=512, out_features=512, bias=True)
(3): ReLU(inplace=True)
(4): Linear(in_features=512, out_features=10, bias=True)
(5): ReLU(inplace=True)
)
)
30 轮训练之后,在测试集上的准确率高达98.0%。
调用训练好的神经网络来识别图片:
代码语言:javascript复制
代码语言:javascript复制import torch
from torch.autograd import Variable
from FashionMNIST_train import NeuralNetwork, test_data
model = NeuralNetwork()
model.load_state_dict(torch.load("model1.pth"))
classes = [
"T-shirt/top",
"Trouser",
"Pullover",
"Dress",
"Coat",
"Sandal",
"Shirt",
"Sneaker",
"Bag",
"Ankle boot",
]
model.eval()
with torch.no_grad():
for i in range(20): # 预测测试集前20张图片
x, y = test_data[i][0], test_data[i][1]
x = Variable(torch.unsqueeze(x, dim=0), requires_grad=False)
# 3维升4维。全连接不用加。用卷积时不加的话会报类似以下错误: Expected 4-dimensional input for
# 4-dimensional weight [16, 1, 2, 2], but got 3-dimensional input of size [1, 28, 28] instead
pred = model(x)
predicted, actual = classes[pred[0].argmax(0)], classes[y]
print(f'Predicted: "{predicted}", Actual: "{actual}"')