采用 policy gradient 学习方法训练agent,并行训练的参数也需要调整
%% 读取环境
ccc
env = rlPredefinedEnv("CartPole-Discrete");
obsInfo = getObservationInfo(env);
numObservations = obsInfo.Dimension(1);
actInfo = getActionInfo(env);
rng(0)
%% 初始化agent
actorNetwork = [
imageInputLayer([numObservations 1 1],'Normalization','none','Name','state')
fullyConnectedLayer(2,'Name','action')];
actorOpts = rlRepresentationOptions('LearnRate',1e-2,'GradientThreshold',1);
actor = rlRepresentation(actorNetwork,obsInfo,actInfo,'Observation',{'state'},'Action',{'action'},actorOpts);
agent = rlPGAgent(actor);
%% 设置训练参数
trainOpts = rlTrainingOptions(...
'MaxEpisodes', 1000, ...
'MaxStepsPerEpisode', 200, ...
'Verbose', false, ...
'Plots','training-progress',...
'StopTrainingCriteria','AverageReward',...
'StopTrainingValue',195,...
'ScoreAveragingWindowLength',100);
plot(env)
%% 并行学习设置
trainOpts.UseParallel = true;
trainOpts.ParallelizationOptions.Mode = "async";
trainOpts.ParallelizationOptions.DataToSendFromWorkers = "Gradients";
trainOpts.ParallelizationOptions.StepsUntilDataIsSent = -1;
%% 训练
trainingStats = train(agent,env,trainOpts);
%% 结果展示
simOptions = rlSimulationOptions('MaxSteps',500);
experience = sim(env,agent,simOptions);
totalReward = sum(experience.Reward);