测试环境看看效果
env=single_pong_env(30,30,1);
fig.fig=figure(1);
fig.base=0;
fig.show = 1;
fig=env.render(fig);
for i=1:70
env = env.step(randi(3));
env.agent=env.observation;
fig=env.render(fig);
end
最终目标是通过挡板的移动保障小球一直在屏幕上来回运动
classdef single_pong_env
%二维弹球环境类
properties
weight
height
actions
fresh_time
agent
observation
reward
done
max_episodes
step_counter
end
% 上面是外部可操作的属性
% 下面是内部可操作的属性
properties (SetAccess = private)
weight_
height_
fresh_time_
score
end
% 在methods中是外部可调用的方法
methods
function self = single_pong_env(weight,height,fresh_time)
% 新建类时候初始化
switch nargin
case 1
self.weight=weight;
self.height=weight;
self.fresh_time=0.05;
case 2
self.weight=weight;
self.height=height;
self.fresh_time=0.05;
case 3
self.weight=weight;
self.height=height;
self.fresh_time=fresh_time;
otherwise
error('缺少参数');
end
self.max_episodes=1000;
self.step_counter=0;
self.actions=[1 2 3];
temp=randi(self.weight/5)*5;
self.agent=[ 10 temp; 10 temp; randi(2)*2-3; 1;temp];
self.observation=[ 10 temp; 10 temp; randi(2)*2-3; 1;temp];
self.reward=0;
self.done=0;
self.score=0;
self.weight_=self.weight;
self.height_=self.height;
self.fresh_time_=self.fresh_time;
end
function fig=render(self,fig)
% 绘制图像
if fig.show == 0
return
end
pause(self.fresh_time)
set(0,'CurrentFigure',fig.fig)
ballPos = self.observation(1:2);
blockVertices = @(x) [x-5,-1; x 5,-1; x 5, 0; x-5, 0];
if fig.base
set(fig.ball,'XData',ballPos(1),'YData',ballPos(2));
set(fig.block,'Vertices', blockVertices(self.agent(5)));
else
set(gcf,'units', 'normal','position', [.3 .3 .5 .5],... % figure属性
'color', [.6 .6 .8]);
set(gca,'color', 'black','position', [.05 .05 .9 .9],... % 坐标轴属性
'XLim', [-5 self.weight 5], 'YLim', [-5 self.height 5],...
'XTick', [], 'YTick', [],'nextplot', 'add')
fig.base=1;
% 底部方块
fig.block=patch('Vertices', blockVertices(self.agent(5)),...
'Faces', [1 2 3 4],'FaceColor', [.6 .8 .6]);
% 小球
fig.ball=plot(ballPos(1),ballPos(2),'.',...
'MarkerSize',50,'color',[.8 .6 .6]);
end
title(num2str(self.score));
end
function self = step(self,action)
% 一步动作
self.done = 0;
self.reward=0;
temp_w=self.agent(5);
if action == 1
temp_w = self.agent(5);
end
if action == 2
temp_w = self.agent(5) 5;
end
if action == 3
temp_w = self.agent(5)-5;
end
if temp_w < 0
temp_w=0;
end
if temp_w > self.weight
temp_w=self.weight;
end
self.reward=1;
ballPos = self.observation(1:2);
ballVel = self.observation(3:4);
if ballPos(1) > self.weight
ballPos(1) = self.weight;
ballVel(1) = -ballVel(1); % 边界
elseif ballPos(1) < 0
ballPos(1) = 0;
ballVel(1) = -ballVel(1);
elseif ballPos(2) >= self.height
ballPos(2) = self.height;
ballVel(2) = -ballVel(2); % 顶部墙
self.score=self.score 1;
elseif ballPos(2) < 1
if abs(self.agent(5) - ballPos(1)) < 5 % 底部方块
ballVel(2) = -ballVel(2);
else % 方块没接住球
self.reward=-1000;
self.done=1;
end
end
ballPos = ballPos ballVel;
self.observation=[ballPos;ballVel;temp_w];
end
function self=reset(self)
% 重置环境
self.weight=self.weight_;
self.height=self.height_;
self.fresh_time=self.fresh_time_;
self.actions=[1 2 3];
self.step_counter=0;
temp=randi(self.weight/5)*5;
self.agent=[ 10 temp; 10 temp; randi(2)*2-3; 1;temp];
self.observation=[ 10 temp; 10 temp; randi(2)*2-3; 1;temp];
self.reward=0;
self.done=0;
self.score=0;
end
end
end