文章目录
- 1. nn模块
- 2. torch.optim 优化器
- 3. 自定义nn模块
- 4. 权重共享
参考 http://pytorch123.com/
1. nn模块
代码语言:javascript复制import torch
N, D_in, Hidden_size, D_out = 64, 1000, 100, 10
torch.nn.Sequential
建立模型,跟 keras 很像
x = torch.randn(N, D_in)
y = torch.randn(N, D_out)
model = torch.nn.Sequential(
torch.nn.Linear(D_in, Hidden_size),
torch.nn.ReLU(),
torch.nn.Linear(Hidden_size, D_out)
)
# 损失函数
loss_fn = torch.nn.MSELoss(reduction='sum')
代码语言:javascript复制learning_rate = 1e-4
loss_list = []
for t in range(500):
y_pred = model(x) # 前向传播
loss = loss_fn(y_pred, y) # 损失
loss_list.append(loss.item())
print(t, loss.item())
model.zero_grad() # 清零梯度
loss.backward() # 反向传播,计算梯度
with torch.no_grad(): # 更新参数,不计入网络图的操作当中
for param in model.parameters():
param -= learning_rate*param.grad # 更新参数
代码语言:javascript复制# 绘制损失
import pandas as pd
loss_curve = pd.DataFrame(loss_list, columns=['loss'])
loss_curve.plot()
2. torch.optim 优化器
torch.optim.Adam
使用优化器optimizer.zero_grad() # 清零梯度
optimizer.step() # 更新参数
learning_rate = 1e-4
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
loss_list = []
for t in range(500):
y_pred = model(x) # 前向传播
loss = loss_fn(y_pred, y) # 损失
loss_list.append(loss.item())
print(t, loss.item())
optimizer.zero_grad() # 清零梯度
loss.backward() # 反向传播,计算梯度
optimizer.step() # 更新参数
3. 自定义nn模块
- 继承
nn.module
,并定义forward
前向传播函数
import torch
class myModel(torch.nn.Module):
def __init__(self, D_in, Hidden_size, D_out):
super(myModel, self).__init__()
self.fc1 = torch.nn.Linear(D_in, Hidden_size)
self.fc2 = torch.nn.Linear(Hidden_size, D_out)
def forward(self, x):
x = self.fc1(x).clamp(min=0) # clamp 修剪数据在 min - max 之间,relu的作用
x = self.fc2(x)
return x
代码语言:javascript复制N, D_in, Hidden_size, D_out = 64, 1000, 100, 10
x = torch.randn(N, D_in)
y = torch.randn(N, D_out)
model = myModel(D_in, Hidden_size, D_out) # 自定义模型
loss_fn = torch.nn.MSELoss(reduction='sum')
optimizer = torch.optim.SGD(model.parameters(), lr=1e-4)
loss_val = []
for t in range(500):
y_pred = model(x)
loss = loss_fn(y_pred, y)
loss_val.append(loss.item())
optimizer.zero_grad()
loss.backward()
optimizer.step()
import pandas as pd
loss_val = pd.DataFrame(loss_val, columns=['loss'])
loss_val.plot()
4. 权重共享
- 建立一个有3种FC层的玩具模型,中间
shareFC
层会被 for 循环重复 0-3 次(随机),这几层(次数随机)的参数是共享的
import random
import torch
class shareParamsModel(torch.nn.Module):
def __init__(self, D_in, Hidden_size, D_out):
super(shareParamsModel, self).__init__()
self.inputFC = torch.nn.Linear(D_in, Hidden_size)
self.shareFC = torch.nn.Linear(Hidden_size, Hidden_size)
self.outputFC = torch.nn.Linear(Hidden_size, D_out)
self.sharelayers = 0 # 记录随机出了多少层
def forward(self, x):
x = self.inputFC(x).clamp(min=0)
self.sharelayers = 0
for _ in range(random.randint(0, 3)):
x = self.shareFC(x).clamp(min=0)
self.sharelayers = 1
x = self.outputFC(x)
return x
代码语言:javascript复制N, D_in, Hidden_size, D_out = 64, 1000, 100, 10
x = torch.randn(N, D_in)
y = torch.randn(N, D_out)
model = shareParamsModel(D_in, Hidden_size, D_out)
loss_fn = torch.nn.MSELoss(reduction='sum')
optimizer = torch.optim.SGD(model.parameters(), lr=1e-4, momentum=0.9)
loss_val = []
for t in range(500):
y_pred = model(x)
print('share layers: ', model.sharelayers)
loss = loss_fn(y_pred, y)
loss_val.append(loss.item())
optimizer.zero_grad()
loss.backward()
optimizer.step()
for p in model.parameters():
print(p.size())
import pandas as pd
loss_val = pd.DataFrame(loss_val, columns=['loss'])
loss_val.plot()
输出:
代码语言:javascript复制share layers: 1
share layers: 0
share layers: 2
share layers: 1
share layers: 2
share layers: 1
share layers: 0
share layers: 1
share layers: 0
share layers: 0
share layers: 3
share layers: 3
。。。省略
参数数量,多次运行,均为以下结果
代码语言:javascript复制torch.Size([100, 1000])
torch.Size([100])
torch.Size([100, 100])
torch.Size([100])
torch.Size([10, 100])
torch.Size([10])