I have Backpropagation doubt
I’m trying to do neural network with 2 hidden layers and one neuron in the output layer without any toolboxes and just with matrix and vectors multiplications. To do this, I created fictional simple data as below to help me in this task:
%Data
x = 1:1000;
y1 = sind(x);
y2 = sind(x+30);
y3 = cosd(x);
y4 = cosd(x+30);
y5 = cosd(x+45);
% y6 will be the desired output data taht I would like my neural network
% try to predict
y6 = (y1 + y2 + y3 + y4 + y5);
Then, I coded as I thought to be be the right way, but my neural network can´t reach a good result, as below:
My doubt is if the result isn´t good because my implementation isn´t right or because I need to add more mechanisms im my neural network (like momentum, regularization and etc.) ?
I will post my code below sorry about the naem of some variables, but originally I wrote this code in portuguese. I will comment the code to help undestand it
%Nueral network achictecture
n_h1 = 10;
n_h2 = 11;
n_out = 1;
%Adjustable parameters
w1 = rand(5,n_h1);
b1 = ones(1,n_h1)*rand(1,1);
w2 = rand(n_h1,n_h2);
b2 = ones(1,n_h2)*rand(1,1);
w_out = rand(n_h2,n_out);
b_out = ones(1,n_out)*rand(1,1);
sig_a = 1;
learning_rate = 0.001;
limiar = 0.002;
%Helpful variables
max_epocas = 1000;
conj_entrada = [y1;y2;y3;y4;y5];
erros_epoca = [];
%Backpropagation
for epoch = 1:max_epocas
for i = 1:size(conj_entrada,2)
if i ==1
soma = 0;
else
end
enter = conj_entrada(:,i);
h1_in = [w1;b1]’*[enter;1];
h1_out = sig(h1_in,sig_a,’False’);
h2_in = [w2;b2]’*[h1_out;1];
h2_out = sig(h2_in,sig_a,’False’);
saida_in = [w_out;b_out]’*[h2_out;1];
saida_out = saida_in;
erro = y6(i) – saida_out;
soma = soma + (erro^2);
%Here starts the part of the code where the gradients are being
%calculated. Note that, here, I tried to folllow the chain rule.
%let me try to help in the understanding. Saida in portuguese is
%like output in english so when you read ,for example,
%d_erro_d_saida_out you need to know that this is the derivative of
%the error in relation with the output of the output layer. In the
%same way, entrada means input and pesos means weights
%output layer
%chain rule
d_erro_d_saida_out = -1*erro;
d_saida_d_entrada_out = 1; %linear
grad_saida = erro*d_saida_d_entrada_out;
d_entrada_d_pesos_out = h2_out;
d_erro_d_pesos_out = d_erro_d_saida_out*d_saida_d_entrada_out*d_entrada_d_pesos_out;
% Update the wights and bias
w_out = w_out -learning_rate*d_erro_d_pesos_out;
b_out = b_out -learning_rate*d_erro_d_saida_out*d_saida_d_entrada_out;
%Second hidden layer (The neighbor layer of the output layer)
%chain rule
d_erro_d_saida_h2 = -1*w_out*grad_saida;
d_saida_d_entrada_h2 = sig(h2_in,sig_a,’True’);
grad_h2 = sum(grad_saida)*d_saida_d_entrada_h2;
d_entrada_d_pesos_h2 = h1_out;
d_erro_d_pesos_h2 = d_entrada_d_pesos_h2*grad_h2′;
% Update the wights and bias
w2 = w2 -1*learning_rate*d_erro_d_pesos_h2;
b2 = b2 -1*learning_rate*sum(d_erro_d_saida_h2.*d_saida_d_entrada_h2,1);
%First hidden layer (The neighbor layer of the seccond hidden layer)
%chain rule
d_erro_d_saida_h1 = -1*w2*grad_h2;
d_saida_d_entrada_h1 = sig(h1_in,sig_a,’True’);
grad_h1 = sum(grad_h2)*d_saida_d_entrada_h1; %então daqui, tem que sair um 3×1
d_entrada_d_pesos_h1 = enter;
d_erro_d_pesos_h1 = d_entrada_d_pesos_h1*grad_h1′; %a segunda variável tem que resultar em um 1×3
% Update the wights and bias
w1 = w1 -1*learning_rate*d_erro_d_pesos_h1;
b1 = b1 -1*learning_rate*sum(d_erro_d_saida_h1.*d_saida_d_entrada_h1,1);
end
erro_atual = (soma/(2*size(x,2)));
erros_epoca = [erros_epoca;erro_atual];
if erros_epoca(epoch) <limiar
break
else
end
end
%testing the output of neural network
vetor_teste = 1:1000;
resposta_teste = zeros(1,size(vetor_teste,2));
for i = 1:size(vetor_teste,2)
enter_teste = conj_entrada(:,i);
h1_in_teste = [w1;b1]’*[enter_teste;1];
h1_out_teste = sig(h1_in_teste,sig_a,’False’);
h2_in_teste = [w2;b2]’*[h1_out_teste;1];
h2_out_teste = sig(h2_in_teste,sig_a,’False’);
saida_in_teste = [w_out;b_out]’*[h2_out_teste;1];
saida_out_teste = saida_in_teste; % a função de saída é linear;
resposta_teste(i) = saida_out_teste;
end
plot(1:size(erros_epoca,1),erros_epoca);
% plot(x,y3,’b’,vetor_teste,resposta_teste,’r’);
The code of my sigmoid activation function is below:
function [vetor_saida] = sig(vetor_entrada, const1, derivative)
if strcmp(derivative, ‘False’) == 1
vetor_saida = 1 ./ (1 + exp(-const1 * vetor_entrada));
else
sig_value = sig(vetor_entrada, const1, ‘False’);
vetor_saida = const1 * sig_value .* (1 – sig_value);
end
endI’m trying to do neural network with 2 hidden layers and one neuron in the output layer without any toolboxes and just with matrix and vectors multiplications. To do this, I created fictional simple data as below to help me in this task:
%Data
x = 1:1000;
y1 = sind(x);
y2 = sind(x+30);
y3 = cosd(x);
y4 = cosd(x+30);
y5 = cosd(x+45);
% y6 will be the desired output data taht I would like my neural network
% try to predict
y6 = (y1 + y2 + y3 + y4 + y5);
Then, I coded as I thought to be be the right way, but my neural network can´t reach a good result, as below:
My doubt is if the result isn´t good because my implementation isn´t right or because I need to add more mechanisms im my neural network (like momentum, regularization and etc.) ?
I will post my code below sorry about the naem of some variables, but originally I wrote this code in portuguese. I will comment the code to help undestand it
%Nueral network achictecture
n_h1 = 10;
n_h2 = 11;
n_out = 1;
%Adjustable parameters
w1 = rand(5,n_h1);
b1 = ones(1,n_h1)*rand(1,1);
w2 = rand(n_h1,n_h2);
b2 = ones(1,n_h2)*rand(1,1);
w_out = rand(n_h2,n_out);
b_out = ones(1,n_out)*rand(1,1);
sig_a = 1;
learning_rate = 0.001;
limiar = 0.002;
%Helpful variables
max_epocas = 1000;
conj_entrada = [y1;y2;y3;y4;y5];
erros_epoca = [];
%Backpropagation
for epoch = 1:max_epocas
for i = 1:size(conj_entrada,2)
if i ==1
soma = 0;
else
end
enter = conj_entrada(:,i);
h1_in = [w1;b1]’*[enter;1];
h1_out = sig(h1_in,sig_a,’False’);
h2_in = [w2;b2]’*[h1_out;1];
h2_out = sig(h2_in,sig_a,’False’);
saida_in = [w_out;b_out]’*[h2_out;1];
saida_out = saida_in;
erro = y6(i) – saida_out;
soma = soma + (erro^2);
%Here starts the part of the code where the gradients are being
%calculated. Note that, here, I tried to folllow the chain rule.
%let me try to help in the understanding. Saida in portuguese is
%like output in english so when you read ,for example,
%d_erro_d_saida_out you need to know that this is the derivative of
%the error in relation with the output of the output layer. In the
%same way, entrada means input and pesos means weights
%output layer
%chain rule
d_erro_d_saida_out = -1*erro;
d_saida_d_entrada_out = 1; %linear
grad_saida = erro*d_saida_d_entrada_out;
d_entrada_d_pesos_out = h2_out;
d_erro_d_pesos_out = d_erro_d_saida_out*d_saida_d_entrada_out*d_entrada_d_pesos_out;
% Update the wights and bias
w_out = w_out -learning_rate*d_erro_d_pesos_out;
b_out = b_out -learning_rate*d_erro_d_saida_out*d_saida_d_entrada_out;
%Second hidden layer (The neighbor layer of the output layer)
%chain rule
d_erro_d_saida_h2 = -1*w_out*grad_saida;
d_saida_d_entrada_h2 = sig(h2_in,sig_a,’True’);
grad_h2 = sum(grad_saida)*d_saida_d_entrada_h2;
d_entrada_d_pesos_h2 = h1_out;
d_erro_d_pesos_h2 = d_entrada_d_pesos_h2*grad_h2′;
% Update the wights and bias
w2 = w2 -1*learning_rate*d_erro_d_pesos_h2;
b2 = b2 -1*learning_rate*sum(d_erro_d_saida_h2.*d_saida_d_entrada_h2,1);
%First hidden layer (The neighbor layer of the seccond hidden layer)
%chain rule
d_erro_d_saida_h1 = -1*w2*grad_h2;
d_saida_d_entrada_h1 = sig(h1_in,sig_a,’True’);
grad_h1 = sum(grad_h2)*d_saida_d_entrada_h1; %então daqui, tem que sair um 3×1
d_entrada_d_pesos_h1 = enter;
d_erro_d_pesos_h1 = d_entrada_d_pesos_h1*grad_h1′; %a segunda variável tem que resultar em um 1×3
% Update the wights and bias
w1 = w1 -1*learning_rate*d_erro_d_pesos_h1;
b1 = b1 -1*learning_rate*sum(d_erro_d_saida_h1.*d_saida_d_entrada_h1,1);
end
erro_atual = (soma/(2*size(x,2)));
erros_epoca = [erros_epoca;erro_atual];
if erros_epoca(epoch) <limiar
break
else
end
end
%testing the output of neural network
vetor_teste = 1:1000;
resposta_teste = zeros(1,size(vetor_teste,2));
for i = 1:size(vetor_teste,2)
enter_teste = conj_entrada(:,i);
h1_in_teste = [w1;b1]’*[enter_teste;1];
h1_out_teste = sig(h1_in_teste,sig_a,’False’);
h2_in_teste = [w2;b2]’*[h1_out_teste;1];
h2_out_teste = sig(h2_in_teste,sig_a,’False’);
saida_in_teste = [w_out;b_out]’*[h2_out_teste;1];
saida_out_teste = saida_in_teste; % a função de saída é linear;
resposta_teste(i) = saida_out_teste;
end
plot(1:size(erros_epoca,1),erros_epoca);
% plot(x,y3,’b’,vetor_teste,resposta_teste,’r’);
The code of my sigmoid activation function is below:
function [vetor_saida] = sig(vetor_entrada, const1, derivative)
if strcmp(derivative, ‘False’) == 1
vetor_saida = 1 ./ (1 + exp(-const1 * vetor_entrada));
else
sig_value = sig(vetor_entrada, const1, ‘False’);
vetor_saida = const1 * sig_value .* (1 – sig_value);
end
end I’m trying to do neural network with 2 hidden layers and one neuron in the output layer without any toolboxes and just with matrix and vectors multiplications. To do this, I created fictional simple data as below to help me in this task:
%Data
x = 1:1000;
y1 = sind(x);
y2 = sind(x+30);
y3 = cosd(x);
y4 = cosd(x+30);
y5 = cosd(x+45);
% y6 will be the desired output data taht I would like my neural network
% try to predict
y6 = (y1 + y2 + y3 + y4 + y5);
Then, I coded as I thought to be be the right way, but my neural network can´t reach a good result, as below:
My doubt is if the result isn´t good because my implementation isn´t right or because I need to add more mechanisms im my neural network (like momentum, regularization and etc.) ?
I will post my code below sorry about the naem of some variables, but originally I wrote this code in portuguese. I will comment the code to help undestand it
%Nueral network achictecture
n_h1 = 10;
n_h2 = 11;
n_out = 1;
%Adjustable parameters
w1 = rand(5,n_h1);
b1 = ones(1,n_h1)*rand(1,1);
w2 = rand(n_h1,n_h2);
b2 = ones(1,n_h2)*rand(1,1);
w_out = rand(n_h2,n_out);
b_out = ones(1,n_out)*rand(1,1);
sig_a = 1;
learning_rate = 0.001;
limiar = 0.002;
%Helpful variables
max_epocas = 1000;
conj_entrada = [y1;y2;y3;y4;y5];
erros_epoca = [];
%Backpropagation
for epoch = 1:max_epocas
for i = 1:size(conj_entrada,2)
if i ==1
soma = 0;
else
end
enter = conj_entrada(:,i);
h1_in = [w1;b1]’*[enter;1];
h1_out = sig(h1_in,sig_a,’False’);
h2_in = [w2;b2]’*[h1_out;1];
h2_out = sig(h2_in,sig_a,’False’);
saida_in = [w_out;b_out]’*[h2_out;1];
saida_out = saida_in;
erro = y6(i) – saida_out;
soma = soma + (erro^2);
%Here starts the part of the code where the gradients are being
%calculated. Note that, here, I tried to folllow the chain rule.
%let me try to help in the understanding. Saida in portuguese is
%like output in english so when you read ,for example,
%d_erro_d_saida_out you need to know that this is the derivative of
%the error in relation with the output of the output layer. In the
%same way, entrada means input and pesos means weights
%output layer
%chain rule
d_erro_d_saida_out = -1*erro;
d_saida_d_entrada_out = 1; %linear
grad_saida = erro*d_saida_d_entrada_out;
d_entrada_d_pesos_out = h2_out;
d_erro_d_pesos_out = d_erro_d_saida_out*d_saida_d_entrada_out*d_entrada_d_pesos_out;
% Update the wights and bias
w_out = w_out -learning_rate*d_erro_d_pesos_out;
b_out = b_out -learning_rate*d_erro_d_saida_out*d_saida_d_entrada_out;
%Second hidden layer (The neighbor layer of the output layer)
%chain rule
d_erro_d_saida_h2 = -1*w_out*grad_saida;
d_saida_d_entrada_h2 = sig(h2_in,sig_a,’True’);
grad_h2 = sum(grad_saida)*d_saida_d_entrada_h2;
d_entrada_d_pesos_h2 = h1_out;
d_erro_d_pesos_h2 = d_entrada_d_pesos_h2*grad_h2′;
% Update the wights and bias
w2 = w2 -1*learning_rate*d_erro_d_pesos_h2;
b2 = b2 -1*learning_rate*sum(d_erro_d_saida_h2.*d_saida_d_entrada_h2,1);
%First hidden layer (The neighbor layer of the seccond hidden layer)
%chain rule
d_erro_d_saida_h1 = -1*w2*grad_h2;
d_saida_d_entrada_h1 = sig(h1_in,sig_a,’True’);
grad_h1 = sum(grad_h2)*d_saida_d_entrada_h1; %então daqui, tem que sair um 3×1
d_entrada_d_pesos_h1 = enter;
d_erro_d_pesos_h1 = d_entrada_d_pesos_h1*grad_h1′; %a segunda variável tem que resultar em um 1×3
% Update the wights and bias
w1 = w1 -1*learning_rate*d_erro_d_pesos_h1;
b1 = b1 -1*learning_rate*sum(d_erro_d_saida_h1.*d_saida_d_entrada_h1,1);
end
erro_atual = (soma/(2*size(x,2)));
erros_epoca = [erros_epoca;erro_atual];
if erros_epoca(epoch) <limiar
break
else
end
end
%testing the output of neural network
vetor_teste = 1:1000;
resposta_teste = zeros(1,size(vetor_teste,2));
for i = 1:size(vetor_teste,2)
enter_teste = conj_entrada(:,i);
h1_in_teste = [w1;b1]’*[enter_teste;1];
h1_out_teste = sig(h1_in_teste,sig_a,’False’);
h2_in_teste = [w2;b2]’*[h1_out_teste;1];
h2_out_teste = sig(h2_in_teste,sig_a,’False’);
saida_in_teste = [w_out;b_out]’*[h2_out_teste;1];
saida_out_teste = saida_in_teste; % a função de saída é linear;
resposta_teste(i) = saida_out_teste;
end
plot(1:size(erros_epoca,1),erros_epoca);
% plot(x,y3,’b’,vetor_teste,resposta_teste,’r’);
The code of my sigmoid activation function is below:
function [vetor_saida] = sig(vetor_entrada, const1, derivative)
if strcmp(derivative, ‘False’) == 1
vetor_saida = 1 ./ (1 + exp(-const1 * vetor_entrada));
else
sig_value = sig(vetor_entrada, const1, ‘False’);
vetor_saida = const1 * sig_value .* (1 – sig_value);
end
end neural network, backpropagation MATLAB Answers — New Questions