fminunc limit step size
I am exploring fminunc with a 2-D but challenging function (see below), with a narrow and curved "canyon". Some times it works fine, descending to the bottom of the canyon and following it until the minimum. But at other starting points it makes a huge initial step that brings it nowhere. Isn’t there any way to limit the step size? Notice that first and second derivatives are analytic, i.e. no finite differences.
% spiralFuncion.m
% A challenging function to minimize in 2D
% JMS, Jun.2024
clear all
% Find function in a mesh
dx = 0.05;
xmax = 2.5;
x = -xmax:dx:xmax;
nx = numel(x);
[x,y] = ndgrid(x,x);
z = myfunc([x(:)’;y(:)’]);
z = reshape(z,nx,nx);
% Find minimum value at mesh points
i0 = find(z(:)==min(z(:)));
xmin = [x(i0),y(i0)]
zmin = z(i0)
% Plot function using surf and contour
figure(1)
contour(x’,y’,z’,20), axis equal
hold on, plot(xmin(1),xmin(2),’x’,’MarkerSize’,10), hold off
grid on
figure(2)
surf(x’,y’,z’)
grid on
% Set minimization options
opt = optimoptions(‘fminunc’);
opt = optimoptions(opt,’Algorithm’,’trust-region’);
opt = optimoptions(opt,’SubproblemAlgorithm’,’factorization’);
opt = optimoptions(opt,’SpecifyObjectiveGradient’,true);
opt = optimoptions(opt,’HessianFcn’,’objective’);
opt = optimoptions(opt,’MaxIterations’,1e3);
opt = optimoptions(opt,’MaxFunctionEvaluations’,1e3);
opt = optimoptions(opt,’Display’,’none’);
% Minimize function starting from a random point
x0 = randn(2,1);
[x,f,exitflag,output] = fminunc(@myfunc,x0,opt);
niter = output.iterations
fprintf(‘n%sn’,output.message)
% Find minimization path
for iter = 1:niter
opt = optimoptions(opt,’MaxIterations’,iter);
xpath(:,iter) = fminunc(@myfunc,x0,opt);
end
% opt = optimoptions(opt,’MaxIterations’,1);
% xpath(:,1) = fminunc(@myfunc,x0,opt);
% for iter = 2:niter
% xpath(:,iter) = fminunc(@myfunc,xpath(:,iter-1),opt);
% end
% Plot minimization path
figure(1)
hold on,
plot([x0(1),xpath(1,:)],[x0(2),xpath(2,:)],’r.-‘,’LineWidth’,2,’MarkerSize’,15)
plot(x(1),x(2),’.b’,’MarkerSize’,20)
hold off
%———————————
function [f,DfDx,D2fDx2] = myfunc(x)
a = 1; % smaller a => harder to minimize
r = sqrt(x(1,:).^2+x(2,:).^2);
s = atan2(x(2,:),x(1,:));
rs = 2*pi*r+s;
f = r.*exp(-a*r).*cos(rs);
% Firts derivatives
ts = x(2,:)./x(1,:); % ts=tan(s)
trs = tan(rs);
DtsDs = 1+ts.^2;
DtrsDrs = 1+trs.^2;
DsDts = 1./DtsDs;
DrsDr = 2*pi;
DrsDs = 1;
DrDx = x./r;
DtsDx = [ -x(2,:)./x(1,:).^2; 1./x(1,:) ];
DsDx = DsDts.*DtsDx;
DfDr = f./r – a*f – f.*trs.*DrsDr;
DfDs = -f.*trs.*DrsDs;
DfDx = DfDr.*DrDx + DfDs.*DsDx;
% Second derivatives
nx = size(x,2);
D2tsDs2 = 2*ts.*DtsDs;
D2trsDrs2 = 2*trs.*DtrsDrs;
D2rsDr2 = 0;
D2rsDs2 = 0;
D2rsDrDs = 0;
% note: d2x/dy2 = d(dy/dx)^-1/dy = d(dy/dx)^-1/dx * dx/dy =
% = -(dy/dx)^-2 * d2y/dx2 * (dy/dx)^-1 = -(dy/dx)^-3 * d2y/dx2
D2sDts2 = -D2tsDs2./DtsDs.^3;
D2fDr2 = DfDr./r – f./r.^2 – a*DfDr – DfDr.*trs.*DrsDr …
– f.*DtrsDrs.*DrsDr.^2 – f.*trs.*D2rsDr2;
D2fDs2 = – DfDs.*trs.*DrsDs – f.*DtrsDrs.*DrsDs.^2 …
– f.*trs.*D2rsDs2;
D2fDrDs = – DfDr.*trs.*DrsDs – f.*DtrsDrs.*DrsDr.*DrsDs …
– f.*trs.*D2rsDrDs;
for ix = 1:nx
D2rDx2 = eye(2)/r(ix) – x(:,ix).*x(:,ix)’/r(ix)^3;
D2tsDx2 = [ +2*x(2,ix)/x(1,ix).^3, -1/x(1,ix)^2
-1/x(1,ix).^2, 0 ];
D2sDx2 = D2sDts2(ix)*DtsDx(:,ix).*DtsDx(:,ix)’ + DsDts(ix)*D2tsDx2;
D2fDx2(:,:,ix) = D2fDr2(ix) *DrDx(:,ix).*DrDx(:,ix)’ …
+ D2fDs2(ix) *DsDx(:,ix).*DsDx(:,ix)’ …
+ D2fDrDs(ix)*DrDx(:,ix).*DsDx(:,ix)’ …
+ D2fDrDs(ix)*DsDx(:,ix).*DrDx(:,ix)’ …
+ DfDr(ix)*D2rDx2 + DfDs(ix)*D2sDx2;
end
endI am exploring fminunc with a 2-D but challenging function (see below), with a narrow and curved "canyon". Some times it works fine, descending to the bottom of the canyon and following it until the minimum. But at other starting points it makes a huge initial step that brings it nowhere. Isn’t there any way to limit the step size? Notice that first and second derivatives are analytic, i.e. no finite differences.
% spiralFuncion.m
% A challenging function to minimize in 2D
% JMS, Jun.2024
clear all
% Find function in a mesh
dx = 0.05;
xmax = 2.5;
x = -xmax:dx:xmax;
nx = numel(x);
[x,y] = ndgrid(x,x);
z = myfunc([x(:)’;y(:)’]);
z = reshape(z,nx,nx);
% Find minimum value at mesh points
i0 = find(z(:)==min(z(:)));
xmin = [x(i0),y(i0)]
zmin = z(i0)
% Plot function using surf and contour
figure(1)
contour(x’,y’,z’,20), axis equal
hold on, plot(xmin(1),xmin(2),’x’,’MarkerSize’,10), hold off
grid on
figure(2)
surf(x’,y’,z’)
grid on
% Set minimization options
opt = optimoptions(‘fminunc’);
opt = optimoptions(opt,’Algorithm’,’trust-region’);
opt = optimoptions(opt,’SubproblemAlgorithm’,’factorization’);
opt = optimoptions(opt,’SpecifyObjectiveGradient’,true);
opt = optimoptions(opt,’HessianFcn’,’objective’);
opt = optimoptions(opt,’MaxIterations’,1e3);
opt = optimoptions(opt,’MaxFunctionEvaluations’,1e3);
opt = optimoptions(opt,’Display’,’none’);
% Minimize function starting from a random point
x0 = randn(2,1);
[x,f,exitflag,output] = fminunc(@myfunc,x0,opt);
niter = output.iterations
fprintf(‘n%sn’,output.message)
% Find minimization path
for iter = 1:niter
opt = optimoptions(opt,’MaxIterations’,iter);
xpath(:,iter) = fminunc(@myfunc,x0,opt);
end
% opt = optimoptions(opt,’MaxIterations’,1);
% xpath(:,1) = fminunc(@myfunc,x0,opt);
% for iter = 2:niter
% xpath(:,iter) = fminunc(@myfunc,xpath(:,iter-1),opt);
% end
% Plot minimization path
figure(1)
hold on,
plot([x0(1),xpath(1,:)],[x0(2),xpath(2,:)],’r.-‘,’LineWidth’,2,’MarkerSize’,15)
plot(x(1),x(2),’.b’,’MarkerSize’,20)
hold off
%———————————
function [f,DfDx,D2fDx2] = myfunc(x)
a = 1; % smaller a => harder to minimize
r = sqrt(x(1,:).^2+x(2,:).^2);
s = atan2(x(2,:),x(1,:));
rs = 2*pi*r+s;
f = r.*exp(-a*r).*cos(rs);
% Firts derivatives
ts = x(2,:)./x(1,:); % ts=tan(s)
trs = tan(rs);
DtsDs = 1+ts.^2;
DtrsDrs = 1+trs.^2;
DsDts = 1./DtsDs;
DrsDr = 2*pi;
DrsDs = 1;
DrDx = x./r;
DtsDx = [ -x(2,:)./x(1,:).^2; 1./x(1,:) ];
DsDx = DsDts.*DtsDx;
DfDr = f./r – a*f – f.*trs.*DrsDr;
DfDs = -f.*trs.*DrsDs;
DfDx = DfDr.*DrDx + DfDs.*DsDx;
% Second derivatives
nx = size(x,2);
D2tsDs2 = 2*ts.*DtsDs;
D2trsDrs2 = 2*trs.*DtrsDrs;
D2rsDr2 = 0;
D2rsDs2 = 0;
D2rsDrDs = 0;
% note: d2x/dy2 = d(dy/dx)^-1/dy = d(dy/dx)^-1/dx * dx/dy =
% = -(dy/dx)^-2 * d2y/dx2 * (dy/dx)^-1 = -(dy/dx)^-3 * d2y/dx2
D2sDts2 = -D2tsDs2./DtsDs.^3;
D2fDr2 = DfDr./r – f./r.^2 – a*DfDr – DfDr.*trs.*DrsDr …
– f.*DtrsDrs.*DrsDr.^2 – f.*trs.*D2rsDr2;
D2fDs2 = – DfDs.*trs.*DrsDs – f.*DtrsDrs.*DrsDs.^2 …
– f.*trs.*D2rsDs2;
D2fDrDs = – DfDr.*trs.*DrsDs – f.*DtrsDrs.*DrsDr.*DrsDs …
– f.*trs.*D2rsDrDs;
for ix = 1:nx
D2rDx2 = eye(2)/r(ix) – x(:,ix).*x(:,ix)’/r(ix)^3;
D2tsDx2 = [ +2*x(2,ix)/x(1,ix).^3, -1/x(1,ix)^2
-1/x(1,ix).^2, 0 ];
D2sDx2 = D2sDts2(ix)*DtsDx(:,ix).*DtsDx(:,ix)’ + DsDts(ix)*D2tsDx2;
D2fDx2(:,:,ix) = D2fDr2(ix) *DrDx(:,ix).*DrDx(:,ix)’ …
+ D2fDs2(ix) *DsDx(:,ix).*DsDx(:,ix)’ …
+ D2fDrDs(ix)*DrDx(:,ix).*DsDx(:,ix)’ …
+ D2fDrDs(ix)*DsDx(:,ix).*DrDx(:,ix)’ …
+ DfDr(ix)*D2rDx2 + DfDs(ix)*D2sDx2;
end
end I am exploring fminunc with a 2-D but challenging function (see below), with a narrow and curved "canyon". Some times it works fine, descending to the bottom of the canyon and following it until the minimum. But at other starting points it makes a huge initial step that brings it nowhere. Isn’t there any way to limit the step size? Notice that first and second derivatives are analytic, i.e. no finite differences.
% spiralFuncion.m
% A challenging function to minimize in 2D
% JMS, Jun.2024
clear all
% Find function in a mesh
dx = 0.05;
xmax = 2.5;
x = -xmax:dx:xmax;
nx = numel(x);
[x,y] = ndgrid(x,x);
z = myfunc([x(:)’;y(:)’]);
z = reshape(z,nx,nx);
% Find minimum value at mesh points
i0 = find(z(:)==min(z(:)));
xmin = [x(i0),y(i0)]
zmin = z(i0)
% Plot function using surf and contour
figure(1)
contour(x’,y’,z’,20), axis equal
hold on, plot(xmin(1),xmin(2),’x’,’MarkerSize’,10), hold off
grid on
figure(2)
surf(x’,y’,z’)
grid on
% Set minimization options
opt = optimoptions(‘fminunc’);
opt = optimoptions(opt,’Algorithm’,’trust-region’);
opt = optimoptions(opt,’SubproblemAlgorithm’,’factorization’);
opt = optimoptions(opt,’SpecifyObjectiveGradient’,true);
opt = optimoptions(opt,’HessianFcn’,’objective’);
opt = optimoptions(opt,’MaxIterations’,1e3);
opt = optimoptions(opt,’MaxFunctionEvaluations’,1e3);
opt = optimoptions(opt,’Display’,’none’);
% Minimize function starting from a random point
x0 = randn(2,1);
[x,f,exitflag,output] = fminunc(@myfunc,x0,opt);
niter = output.iterations
fprintf(‘n%sn’,output.message)
% Find minimization path
for iter = 1:niter
opt = optimoptions(opt,’MaxIterations’,iter);
xpath(:,iter) = fminunc(@myfunc,x0,opt);
end
% opt = optimoptions(opt,’MaxIterations’,1);
% xpath(:,1) = fminunc(@myfunc,x0,opt);
% for iter = 2:niter
% xpath(:,iter) = fminunc(@myfunc,xpath(:,iter-1),opt);
% end
% Plot minimization path
figure(1)
hold on,
plot([x0(1),xpath(1,:)],[x0(2),xpath(2,:)],’r.-‘,’LineWidth’,2,’MarkerSize’,15)
plot(x(1),x(2),’.b’,’MarkerSize’,20)
hold off
%———————————
function [f,DfDx,D2fDx2] = myfunc(x)
a = 1; % smaller a => harder to minimize
r = sqrt(x(1,:).^2+x(2,:).^2);
s = atan2(x(2,:),x(1,:));
rs = 2*pi*r+s;
f = r.*exp(-a*r).*cos(rs);
% Firts derivatives
ts = x(2,:)./x(1,:); % ts=tan(s)
trs = tan(rs);
DtsDs = 1+ts.^2;
DtrsDrs = 1+trs.^2;
DsDts = 1./DtsDs;
DrsDr = 2*pi;
DrsDs = 1;
DrDx = x./r;
DtsDx = [ -x(2,:)./x(1,:).^2; 1./x(1,:) ];
DsDx = DsDts.*DtsDx;
DfDr = f./r – a*f – f.*trs.*DrsDr;
DfDs = -f.*trs.*DrsDs;
DfDx = DfDr.*DrDx + DfDs.*DsDx;
% Second derivatives
nx = size(x,2);
D2tsDs2 = 2*ts.*DtsDs;
D2trsDrs2 = 2*trs.*DtrsDrs;
D2rsDr2 = 0;
D2rsDs2 = 0;
D2rsDrDs = 0;
% note: d2x/dy2 = d(dy/dx)^-1/dy = d(dy/dx)^-1/dx * dx/dy =
% = -(dy/dx)^-2 * d2y/dx2 * (dy/dx)^-1 = -(dy/dx)^-3 * d2y/dx2
D2sDts2 = -D2tsDs2./DtsDs.^3;
D2fDr2 = DfDr./r – f./r.^2 – a*DfDr – DfDr.*trs.*DrsDr …
– f.*DtrsDrs.*DrsDr.^2 – f.*trs.*D2rsDr2;
D2fDs2 = – DfDs.*trs.*DrsDs – f.*DtrsDrs.*DrsDs.^2 …
– f.*trs.*D2rsDs2;
D2fDrDs = – DfDr.*trs.*DrsDs – f.*DtrsDrs.*DrsDr.*DrsDs …
– f.*trs.*D2rsDrDs;
for ix = 1:nx
D2rDx2 = eye(2)/r(ix) – x(:,ix).*x(:,ix)’/r(ix)^3;
D2tsDx2 = [ +2*x(2,ix)/x(1,ix).^3, -1/x(1,ix)^2
-1/x(1,ix).^2, 0 ];
D2sDx2 = D2sDts2(ix)*DtsDx(:,ix).*DtsDx(:,ix)’ + DsDts(ix)*D2tsDx2;
D2fDx2(:,:,ix) = D2fDr2(ix) *DrDx(:,ix).*DrDx(:,ix)’ …
+ D2fDs2(ix) *DsDx(:,ix).*DsDx(:,ix)’ …
+ D2fDrDs(ix)*DrDx(:,ix).*DsDx(:,ix)’ …
+ D2fDrDs(ix)*DsDx(:,ix).*DrDx(:,ix)’ …
+ DfDr(ix)*D2rDx2 + DfDs(ix)*D2sDx2;
end
end fminunc, step size MATLAB Answers — New Questions