Error while running my Reinforcement learning model in R2021a and observation names not matching the names of the deep neural network (underlined below in the m file)
clc
clear
warning(‘off’,’all’)
mdl = ‘wind_DFIG_RL’;
open_system(mdl)
Ts = 7;%agent time
Tf = 7;% simulation time
set_param(mdl, ‘Solver’, ‘ode23tb’, ‘StopTime’, num2str(Tf))
agentblk = ‘wind_DFIG_RL/DFIG Wind Turbine/Control/Rotor-side Controls/WG_RL1/RL Agent’;
% Define the observation specification obsInfo and action specification actInfo.
numObservations = 4
observationInfo = rlNumericSpec([numObservations 1]);
observationInfo.Name = ‘observations’;
observationInfo.Description = ‘information on error and reference signal’;
% create action info
numActions = 2;
actionInfo = rlNumericSpec([numActions 1]);
actionInfo.Name = ‘vqdRef’;
%define environment
env = rlSimulinkEnv(mdl,agentblk,observationInfo,actionInfo);
%randomize refernce rpm
env.ResetFcn = @(in)localResetFcn(in);
Create Network Architecture and DDPG agent
Set the random seed generator for reproducibility.
rng(0)
First, create a critic deep neural network structure and representation
%critic network
% create a network to be used as underlying critic approximator
L = [400 300]; % number of neurons
statePath = [
imageInputLayer([numObservations 1 1],’Normalization’,’none’,’Name’,’state’)
fullyConnectedLayer(L(1),’Name’,’CriticStateFC1′)
clippedReluLayer(10,’Name’,’CriticClip1′)
fullyConnectedLayer(L(2),’Name’,’CriticStateFC2′)];
actionPath = [
imageInputLayer([numActions 1 1],’Normalization’,’none’,’Name’,’Action’)
fullyConnectedLayer(L(2),’Name’,’CriticActionFC1′)];
commonPath = [
additionLayer(2,’Name’,’add’)
clippedReluLayer(10,’Name’,’CriticCCommonClip’)
fullyConnectedLayer(1,’Name’,’CriticOutput’)];
criticNetwork = layerGraph();
criticNetwork = addLayers(criticNetwork,statePath);
criticNetwork = addLayers(criticNetwork,actionPath);
criticNetwork = addLayers(criticNetwork,commonPath);
criticNetwork = connectLayers(criticNetwork,’CriticStateFC2′,’add/in1′);
criticNetwork = connectLayers(criticNetwork,’CriticActionFC1′,’add/in2′);
plot(criticNetwork);
% view the critic network configuration.
% figure plot(criticNetwork)
Create the critic representations. Use the same network structure for both critics. The Soft Actor Critic (SAC) agent initializes the two networks using different default parameters.
criticOpts = rlRepresentationOptions(‘LearnRate’,1e-3,…
‘GradientThreshold’,1,’L2RegularizationFactor’,1e-4);
critic = rlQValueRepresentation(criticNetwork,observationInfo,actionInfo,…
‘Observation’,{‘4′},’Action’,{‘vqdRef’},criticOpts);
Create an actor deep neural network. Do not add a tanhLayer or scalingLayer in the mean output path. The SAC agent internally transforms the unbounded Gaussian distribution to the bounded distribution to compute the probability density function and entropy properly.
statePath = [
featureInputLayer(numObservations,’Normalization’,’none’,’Name’,’observation’)
fullyConnectedLayer(400, ‘Name’,’commonFC1′)
reluLayer(‘Name’,’CommonRelu’)];
meanPath = [
fullyConnectedLayer(300,’Name’,’MeanFC1′)
reluLayer(‘Name’,’MeanRelu’)
fullyConnectedLayer(numActions,’Name’,’Mean’)
];
stdPath = [
fullyConnectedLayer(300,’Name’,’StdFC1′)
reluLayer(‘Name’,’StdRelu’)
fullyConnectedLayer(numActions,’Name’,’StdFC2′)
softplusLayer(‘Name’,’StandardDeviation’)];
concatPath = concatenationLayer(1,2,’Name’,’GaussianParameters’);
actorNetwork = layerGraph(statePath);
actorNetwork = addLayers(actorNetwork,meanPath);
actorNetwork = addLayers(actorNetwork,stdPath);
actorNetwork = addLayers(actorNetwork,concatPath);
actorNetwork = connectLayers(actorNetwork,’CommonRelu’,’MeanFC1/in’);
actorNetwork = connectLayers(actorNetwork,’CommonRelu’,’StdFC1/in’);
actorNetwork = connectLayers(actorNetwork,’Mean’,’GaussianParameters/in1′);
actorNetwork = connectLayers(actorNetwork,’StandardDeviation’,’GaussianParameters/in2′);
Create a stochastic actor representation using the deep neural network.
plot(actorNetwork)
actorOptions = rlRepresentationOptions(‘Optimizer’,’adam’,’LearnRate’,1e-3,…
‘GradientThreshold’,1,’L2RegularizationFactor’,1e-5);
actor = rlStochasticActorRepresentation(actorNetwork,observationInfo,actionInfo,actorOptions,…
‘Observation’,{‘observation’});
Specify agent options.
agentOptions = rlSACAgentOptions;
agentOptions.SampleTime = Ts;
agentOptions.DiscountFactor = 0.99;
agentOptions.TargetSmoothFactor = 1e-3;
agentOptions.ExperienceBufferLength = 1e6;
agentOptions.MiniBatchSize = 32;
Create SAC agent using actor, critics, and options.
agent_DFIG_WG = rlSACAgent(actor,[critic critic2],agentOptions);
Train Agent
To train the agent, first specify the following training options.
Run each training for at most x episodes, with each episode lasting at most ceil(Tf/Ts) time steps.
Display the training progrQw in the Episode Manager (set the Plots option) and disable the command-line display (set the Verbose option).
Stop training when the agent receives an average cumulative reward greater than -x over ceil(Tf/Ts) consecutive episodes. At this point, the agent can control and reguate the measured signal.
For more information, see rlTrainingOptions.
maxepisodes = 20;
maxsteps = ceil(Tf/Ts);
trainOpts = rlTrainingOptions(…
‘MaxEpisodes’,maxepisodes, …
‘MaxStepsPerEpisode’,maxsteps, …
‘ScoreAveragingWindowLength’,5, …
‘Verbose’,true, …
‘Plots’,’training-progress’,…
‘StopTrainingCriteria’,’AverageReward’,…
‘StopTrainingValue’,480,…
‘Plots’,"training-progress");
doTraining = true; %true or false
if doTraining
% Train the agent.
Trainingstats = train(agent_DFIG_WG,env,trainOpts);
save(‘agent_DFIG_WG.mat’,’agent_DFIG_WG’)
else
% Load pretrained agent for the example.
% load(D:RL filesagent_DFIG_WG.mat’,’agent_DFIG_WG’)
end
Validate Trained Agent
Validate the learned agent against the model by simulation.
simOpts = rlSimulationOptions(‘MaxSteps’,maxsteps);
experiences = sim(env,agent_DFIG_WG,simOpts);clc
clear
warning(‘off’,’all’)
mdl = ‘wind_DFIG_RL’;
open_system(mdl)
Ts = 7;%agent time
Tf = 7;% simulation time
set_param(mdl, ‘Solver’, ‘ode23tb’, ‘StopTime’, num2str(Tf))
agentblk = ‘wind_DFIG_RL/DFIG Wind Turbine/Control/Rotor-side Controls/WG_RL1/RL Agent’;
% Define the observation specification obsInfo and action specification actInfo.
numObservations = 4
observationInfo = rlNumericSpec([numObservations 1]);
observationInfo.Name = ‘observations’;
observationInfo.Description = ‘information on error and reference signal’;
% create action info
numActions = 2;
actionInfo = rlNumericSpec([numActions 1]);
actionInfo.Name = ‘vqdRef’;
%define environment
env = rlSimulinkEnv(mdl,agentblk,observationInfo,actionInfo);
%randomize refernce rpm
env.ResetFcn = @(in)localResetFcn(in);
Create Network Architecture and DDPG agent
Set the random seed generator for reproducibility.
rng(0)
First, create a critic deep neural network structure and representation
%critic network
% create a network to be used as underlying critic approximator
L = [400 300]; % number of neurons
statePath = [
imageInputLayer([numObservations 1 1],’Normalization’,’none’,’Name’,’state’)
fullyConnectedLayer(L(1),’Name’,’CriticStateFC1′)
clippedReluLayer(10,’Name’,’CriticClip1′)
fullyConnectedLayer(L(2),’Name’,’CriticStateFC2′)];
actionPath = [
imageInputLayer([numActions 1 1],’Normalization’,’none’,’Name’,’Action’)
fullyConnectedLayer(L(2),’Name’,’CriticActionFC1′)];
commonPath = [
additionLayer(2,’Name’,’add’)
clippedReluLayer(10,’Name’,’CriticCCommonClip’)
fullyConnectedLayer(1,’Name’,’CriticOutput’)];
criticNetwork = layerGraph();
criticNetwork = addLayers(criticNetwork,statePath);
criticNetwork = addLayers(criticNetwork,actionPath);
criticNetwork = addLayers(criticNetwork,commonPath);
criticNetwork = connectLayers(criticNetwork,’CriticStateFC2′,’add/in1′);
criticNetwork = connectLayers(criticNetwork,’CriticActionFC1′,’add/in2′);
plot(criticNetwork);
% view the critic network configuration.
% figure plot(criticNetwork)
Create the critic representations. Use the same network structure for both critics. The Soft Actor Critic (SAC) agent initializes the two networks using different default parameters.
criticOpts = rlRepresentationOptions(‘LearnRate’,1e-3,…
‘GradientThreshold’,1,’L2RegularizationFactor’,1e-4);
critic = rlQValueRepresentation(criticNetwork,observationInfo,actionInfo,…
‘Observation’,{‘4′},’Action’,{‘vqdRef’},criticOpts);
Create an actor deep neural network. Do not add a tanhLayer or scalingLayer in the mean output path. The SAC agent internally transforms the unbounded Gaussian distribution to the bounded distribution to compute the probability density function and entropy properly.
statePath = [
featureInputLayer(numObservations,’Normalization’,’none’,’Name’,’observation’)
fullyConnectedLayer(400, ‘Name’,’commonFC1′)
reluLayer(‘Name’,’CommonRelu’)];
meanPath = [
fullyConnectedLayer(300,’Name’,’MeanFC1′)
reluLayer(‘Name’,’MeanRelu’)
fullyConnectedLayer(numActions,’Name’,’Mean’)
];
stdPath = [
fullyConnectedLayer(300,’Name’,’StdFC1′)
reluLayer(‘Name’,’StdRelu’)
fullyConnectedLayer(numActions,’Name’,’StdFC2′)
softplusLayer(‘Name’,’StandardDeviation’)];
concatPath = concatenationLayer(1,2,’Name’,’GaussianParameters’);
actorNetwork = layerGraph(statePath);
actorNetwork = addLayers(actorNetwork,meanPath);
actorNetwork = addLayers(actorNetwork,stdPath);
actorNetwork = addLayers(actorNetwork,concatPath);
actorNetwork = connectLayers(actorNetwork,’CommonRelu’,’MeanFC1/in’);
actorNetwork = connectLayers(actorNetwork,’CommonRelu’,’StdFC1/in’);
actorNetwork = connectLayers(actorNetwork,’Mean’,’GaussianParameters/in1′);
actorNetwork = connectLayers(actorNetwork,’StandardDeviation’,’GaussianParameters/in2′);
Create a stochastic actor representation using the deep neural network.
plot(actorNetwork)
actorOptions = rlRepresentationOptions(‘Optimizer’,’adam’,’LearnRate’,1e-3,…
‘GradientThreshold’,1,’L2RegularizationFactor’,1e-5);
actor = rlStochasticActorRepresentation(actorNetwork,observationInfo,actionInfo,actorOptions,…
‘Observation’,{‘observation’});
Specify agent options.
agentOptions = rlSACAgentOptions;
agentOptions.SampleTime = Ts;
agentOptions.DiscountFactor = 0.99;
agentOptions.TargetSmoothFactor = 1e-3;
agentOptions.ExperienceBufferLength = 1e6;
agentOptions.MiniBatchSize = 32;
Create SAC agent using actor, critics, and options.
agent_DFIG_WG = rlSACAgent(actor,[critic critic2],agentOptions);
Train Agent
To train the agent, first specify the following training options.
Run each training for at most x episodes, with each episode lasting at most ceil(Tf/Ts) time steps.
Display the training progrQw in the Episode Manager (set the Plots option) and disable the command-line display (set the Verbose option).
Stop training when the agent receives an average cumulative reward greater than -x over ceil(Tf/Ts) consecutive episodes. At this point, the agent can control and reguate the measured signal.
For more information, see rlTrainingOptions.
maxepisodes = 20;
maxsteps = ceil(Tf/Ts);
trainOpts = rlTrainingOptions(…
‘MaxEpisodes’,maxepisodes, …
‘MaxStepsPerEpisode’,maxsteps, …
‘ScoreAveragingWindowLength’,5, …
‘Verbose’,true, …
‘Plots’,’training-progress’,…
‘StopTrainingCriteria’,’AverageReward’,…
‘StopTrainingValue’,480,…
‘Plots’,"training-progress");
doTraining = true; %true or false
if doTraining
% Train the agent.
Trainingstats = train(agent_DFIG_WG,env,trainOpts);
save(‘agent_DFIG_WG.mat’,’agent_DFIG_WG’)
else
% Load pretrained agent for the example.
% load(D:RL filesagent_DFIG_WG.mat’,’agent_DFIG_WG’)
end
Validate Trained Agent
Validate the learned agent against the model by simulation.
simOpts = rlSimulationOptions(‘MaxSteps’,maxsteps);
experiences = sim(env,agent_DFIG_WG,simOpts); clc
clear
warning(‘off’,’all’)
mdl = ‘wind_DFIG_RL’;
open_system(mdl)
Ts = 7;%agent time
Tf = 7;% simulation time
set_param(mdl, ‘Solver’, ‘ode23tb’, ‘StopTime’, num2str(Tf))
agentblk = ‘wind_DFIG_RL/DFIG Wind Turbine/Control/Rotor-side Controls/WG_RL1/RL Agent’;
% Define the observation specification obsInfo and action specification actInfo.
numObservations = 4
observationInfo = rlNumericSpec([numObservations 1]);
observationInfo.Name = ‘observations’;
observationInfo.Description = ‘information on error and reference signal’;
% create action info
numActions = 2;
actionInfo = rlNumericSpec([numActions 1]);
actionInfo.Name = ‘vqdRef’;
%define environment
env = rlSimulinkEnv(mdl,agentblk,observationInfo,actionInfo);
%randomize refernce rpm
env.ResetFcn = @(in)localResetFcn(in);
Create Network Architecture and DDPG agent
Set the random seed generator for reproducibility.
rng(0)
First, create a critic deep neural network structure and representation
%critic network
% create a network to be used as underlying critic approximator
L = [400 300]; % number of neurons
statePath = [
imageInputLayer([numObservations 1 1],’Normalization’,’none’,’Name’,’state’)
fullyConnectedLayer(L(1),’Name’,’CriticStateFC1′)
clippedReluLayer(10,’Name’,’CriticClip1′)
fullyConnectedLayer(L(2),’Name’,’CriticStateFC2′)];
actionPath = [
imageInputLayer([numActions 1 1],’Normalization’,’none’,’Name’,’Action’)
fullyConnectedLayer(L(2),’Name’,’CriticActionFC1′)];
commonPath = [
additionLayer(2,’Name’,’add’)
clippedReluLayer(10,’Name’,’CriticCCommonClip’)
fullyConnectedLayer(1,’Name’,’CriticOutput’)];
criticNetwork = layerGraph();
criticNetwork = addLayers(criticNetwork,statePath);
criticNetwork = addLayers(criticNetwork,actionPath);
criticNetwork = addLayers(criticNetwork,commonPath);
criticNetwork = connectLayers(criticNetwork,’CriticStateFC2′,’add/in1′);
criticNetwork = connectLayers(criticNetwork,’CriticActionFC1′,’add/in2′);
plot(criticNetwork);
% view the critic network configuration.
% figure plot(criticNetwork)
Create the critic representations. Use the same network structure for both critics. The Soft Actor Critic (SAC) agent initializes the two networks using different default parameters.
criticOpts = rlRepresentationOptions(‘LearnRate’,1e-3,…
‘GradientThreshold’,1,’L2RegularizationFactor’,1e-4);
critic = rlQValueRepresentation(criticNetwork,observationInfo,actionInfo,…
‘Observation’,{‘4′},’Action’,{‘vqdRef’},criticOpts);
Create an actor deep neural network. Do not add a tanhLayer or scalingLayer in the mean output path. The SAC agent internally transforms the unbounded Gaussian distribution to the bounded distribution to compute the probability density function and entropy properly.
statePath = [
featureInputLayer(numObservations,’Normalization’,’none’,’Name’,’observation’)
fullyConnectedLayer(400, ‘Name’,’commonFC1′)
reluLayer(‘Name’,’CommonRelu’)];
meanPath = [
fullyConnectedLayer(300,’Name’,’MeanFC1′)
reluLayer(‘Name’,’MeanRelu’)
fullyConnectedLayer(numActions,’Name’,’Mean’)
];
stdPath = [
fullyConnectedLayer(300,’Name’,’StdFC1′)
reluLayer(‘Name’,’StdRelu’)
fullyConnectedLayer(numActions,’Name’,’StdFC2′)
softplusLayer(‘Name’,’StandardDeviation’)];
concatPath = concatenationLayer(1,2,’Name’,’GaussianParameters’);
actorNetwork = layerGraph(statePath);
actorNetwork = addLayers(actorNetwork,meanPath);
actorNetwork = addLayers(actorNetwork,stdPath);
actorNetwork = addLayers(actorNetwork,concatPath);
actorNetwork = connectLayers(actorNetwork,’CommonRelu’,’MeanFC1/in’);
actorNetwork = connectLayers(actorNetwork,’CommonRelu’,’StdFC1/in’);
actorNetwork = connectLayers(actorNetwork,’Mean’,’GaussianParameters/in1′);
actorNetwork = connectLayers(actorNetwork,’StandardDeviation’,’GaussianParameters/in2′);
Create a stochastic actor representation using the deep neural network.
plot(actorNetwork)
actorOptions = rlRepresentationOptions(‘Optimizer’,’adam’,’LearnRate’,1e-3,…
‘GradientThreshold’,1,’L2RegularizationFactor’,1e-5);
actor = rlStochasticActorRepresentation(actorNetwork,observationInfo,actionInfo,actorOptions,…
‘Observation’,{‘observation’});
Specify agent options.
agentOptions = rlSACAgentOptions;
agentOptions.SampleTime = Ts;
agentOptions.DiscountFactor = 0.99;
agentOptions.TargetSmoothFactor = 1e-3;
agentOptions.ExperienceBufferLength = 1e6;
agentOptions.MiniBatchSize = 32;
Create SAC agent using actor, critics, and options.
agent_DFIG_WG = rlSACAgent(actor,[critic critic2],agentOptions);
Train Agent
To train the agent, first specify the following training options.
Run each training for at most x episodes, with each episode lasting at most ceil(Tf/Ts) time steps.
Display the training progrQw in the Episode Manager (set the Plots option) and disable the command-line display (set the Verbose option).
Stop training when the agent receives an average cumulative reward greater than -x over ceil(Tf/Ts) consecutive episodes. At this point, the agent can control and reguate the measured signal.
For more information, see rlTrainingOptions.
maxepisodes = 20;
maxsteps = ceil(Tf/Ts);
trainOpts = rlTrainingOptions(…
‘MaxEpisodes’,maxepisodes, …
‘MaxStepsPerEpisode’,maxsteps, …
‘ScoreAveragingWindowLength’,5, …
‘Verbose’,true, …
‘Plots’,’training-progress’,…
‘StopTrainingCriteria’,’AverageReward’,…
‘StopTrainingValue’,480,…
‘Plots’,"training-progress");
doTraining = true; %true or false
if doTraining
% Train the agent.
Trainingstats = train(agent_DFIG_WG,env,trainOpts);
save(‘agent_DFIG_WG.mat’,’agent_DFIG_WG’)
else
% Load pretrained agent for the example.
% load(D:RL filesagent_DFIG_WG.mat’,’agent_DFIG_WG’)
end
Validate Trained Agent
Validate the learned agent against the model by simulation.
simOpts = rlSimulationOptions(‘MaxSteps’,maxsteps);
experiences = sim(env,agent_DFIG_WG,simOpts); dfig rl, r2021a MATLAB Answers — New Questions