文章目录
RNN
RNN训练
- s_t = tanh(Ux_t Ws_{t-1})
- 分类器输出每个类输出的概率
- 交叉熵损失函数
- E(y_t,y'_t)=-y_tlog(y'_t) y_t 时刻的标准答案是一个只有一个是1,其他都是0;y'_t 维度是相同的,但是一个概率向量,因此需要累加
- E(y_t,ŷ_t)=sum^{t}{E_t(y_t,ŷ_t)}
- 优化使用BPTT,因为RNN的记忆不仅仅是与输入有关还与当前的记忆有关.
- [外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-ariabhT6-1640942687639)(https://raw.githubusercontent.com/errolyan/tuchuang/master/uPic/BRjOxP.png)]
numpy复现
代码语言:javascript
复制# -*- coding:utf-8 -*-
# /usr/bin/python
import os
import pickle
import numpy as np
import matplotlib.pyplot as plt
class rnn():
def __init__(self,epoch=10000,ALPHA=0.1,HIDDEN_DIM = 10,checkpoint_path="./"):
'''初始化参数'''
# data generation
self.BIN_DIM = 0
self.INPUT_DIM = 0
self.HIDDEN_DIM = HIDDEN_DIM
self.OUTPUT_DIM = 0
self.ALPHA = ALPHA
self.ITER_NUM = epoch
self.LOG_ITER = self.ITER_NUM // 10
self.PLOT_ITER = self.ITER_NUM // 200
self.checkpoint_path = checkpoint_path
def init(self):
'''初始化权重'''
self.w0 = np.random.normal(0, 1, [self.INPUT_DIM, self.HIDDEN_DIM])
self.w1 = np.random.normal(0, 1, [self.HIDDEN_DIM, self.OUTPUT_DIM])
self.wh = np.random.normal(0, 2, [self.HIDDEN_DIM, self.HIDDEN_DIM])
self.d0 = np.zeros_like(self.w0)
self.d1 = np.zeros_like(self.w1)
self.dh = np.zeros_like(self.wh)
def saveweight(self):
'''保存权重'''
weight = (self.w0,self.w1,self.wh,self.d0,self.d1,self.dh)
name = os.path.join(self.checkpoint_path,"weight.pkl")
with open(name,"wb") as wg:
pickle.dump(weight,wg)
def sigmoid(self,x):
'''激活函数'''
return 1 / (1 np.exp(-x))
def deriv_sigmoid(self,out):
'''激活函数求导'''
return out * (1 - out)
def bin2dec(self,b):
out = 0
for i, x in enumerate(b[::-1]):
out = x * pow(2, i)
return out
def forward_propagation(self,x_train,y_train):
'''前向传播'''
# forward propagation
overall_err = 0 # total error in the whole calculation process.
pred = np.zeros_like(y_train)
output_deltas = list()
hidden_values = list()
hidden_values.append(np.zeros(self.HIDDEN_DIM))
for pos in range(self.BIN_DIM)[::-1]:
X = np.array([x_train[pos]]) # shape=(1, 2)
Y = np.array([y_train[pos]]) # shape=(1, 1)
hidden = self.sigmoid(np.dot(X, self.w0) np.dot(hidden_values[-1], self.wh))
output = self.sigmoid(np.dot(hidden, self.w1))
pred[pos] = np.round(output[0][0])
# squared mean error
output_err = Y - output
output_deltas.append(output_err * self.deriv_sigmoid(output))
hidden_values.append(hidden)
overall_err = np.abs(output_err[0])
return hidden_values,output_deltas,overall_err,pred
def backpropagation(self,x_train,hidden_values,output_deltas):
'''后向传播'''
future_delta = np.zeros(self.HIDDEN_DIM)
# backpropagation through time
for pos in range(self.BIN_DIM):
X = np.array([x_train[pos]])
hidden = hidden_values[-(pos 1)]
prev_hidden = hidden_values[-(pos 2)]
output_delta = output_deltas[-(pos 1)]
hidden_delta = (np.dot(future_delta, self.wh.T) np.dot(output_delta, self.w1.T)) * self.deriv_sigmoid(hidden)
self.d1 = np.dot(np.atleast_2d(hidden).T, output_delta)
self.dh = np.dot(np.atleast_2d(prev_hidden).T, hidden_delta)
self.d0 = np.dot(X.T, hidden_delta)
future_delta = hidden_delta
self.w1 = self.ALPHA * self.d1
self.w0 = self.ALPHA * self.d0
self.wh = self.ALPHA * self.dh
self.d1 *= 0
self.d0 *= 0
self.dh *= 0
def acc(self,pred,c_dec):
'''准确率计算'''
if (self.bin2dec(pred) == c_dec):
rnn.accuracy = 1
def train(self,x_train,y_train):
'''训练'''
self.BIN_DIM = x_train.shape[0]
self.INPUT_DIM = x_train.shape[1]
self.OUTPUT_DIM = y_train.shape[1]
self.init()
errs = list()
accs = list()
error = 0
accuracy = 0
for i in range(self.ITER_NUM 1):
hidden_values,output_deltas,overall_err,pred = self.forward_propagation(x_train,y_train)
self.backpropagation(x_train,hidden_values,output_deltas)
error = overall_err
if (i % self.PLOT_ITER == 0):
errs.append(error / self.PLOT_ITER)
accs.append(accuracy / self.PLOT_ITER)
error = 0
accuracy = 0
if (i % self.LOG_ITER == 0):
print('Iter', i)
print("Error :", overall_err)
print('----------')
self.saveweight()
def predict(self,):
'''预测'''
# 加载权重
name = os.path.join(self.checkpoint_path, "weight.pkl")
with open(name, "rb") as wg:
(self.w0,self.w1,self.wh,self.d0,self.d1,self.dh) = pickle.load(wg)
# 预测
X = np.array([[0, 0]]) # shape=(1, 2)
hidden = self.sigmoid(np.dot(X, self.w0) np.dot(np.zeros(self.HIDDEN_DIM), self.wh))
output = self.sigmoid(np.dot(hidden, self.w1))
result = np.round(output[0][0])
print("预测结果为",result)
def show(self,):
'''显示训练图像'''
plt.plot(rnn.errs, label='error')
plt.plot(rnn.accs, label='accuracy')
plt.legend()
plt.show()
def generate_data():
# a b = c
BIN_DIM = 8
largest = pow(2, BIN_DIM)
a_dec = np.random.randint(largest / 2)
b_dec = np.random.randint(largest / 2)
c_dec = a_dec b_dec
decimal = np.array([range(largest)]).astype(np.uint8).T
binary = np.unpackbits(decimal, axis=1)
a_bin = binary[a_dec]
b_bin = binary[b_dec]
c_bin = binary[c_dec]
x_train = np.array([[0,0]])
y_train = np.array([[0]])
for pos in range(BIN_DIM)[::-1]:
X = np.array([[a_bin[pos], b_bin[pos]]]) # shape=(1, 2)
x_train = np.concatenate((x_train,X),axis=0)
Y = np.array([[c_bin[pos]]]) # shape=(1, 1)
y_train=np.concatenate((y_train, Y),axis=0)
return x_train,y_train
x_train,y_train = generate_data()
newrnn = rnn(epoch=10000,ALPHA=0.2,HIDDEN_DIM = 10,checkpoint_path="./")
newrnn.train(x_train,y_train)
newrnn.predict()