Algorithm-LBFGS
view release on metacpan or search on metacpan
} else if (0. < x[i]) {
/* Differentiable. */
d[i] = -g[i] - param->orthantwise_c;
} else {
if (g[i] < -param->orthantwise_c) {
/* Take the right partial derivative. */
d[i] = -g[i] - param->orthantwise_c;
} else if (param->orthantwise_c < g[i]) {
/* Take the left partial derivative. */
d[i] = -g[i] + param->orthantwise_c;
} else {
d[i] = 0.;
}
}
}
}
/* Compute the initial step:
step = 1.0 / sqrt(vecdot(d, d, n))
*/
vecrnorm(&step, d, n);
k = 1;
end = 0;
for (;;) {
/* Store the current position and gradient vectors. */
veccpy(xp, x, n);
veccpy(gp, g, n);
/* Search for an optimal step. */
ls = line_search(
n, x, &fx, g, d, &step, w, proc_evaluate, instance, param);
if (ls < 0) {
ret = ls;
goto lbfgs_exit;
}
/* Compute x and g norms. */
vecnorm(&gnorm, g, n);
vecnorm(&xnorm, x, n);
/* Report the progress. */
if (proc_progress) {
if (ret = proc_progress(instance, x, g, fx, xnorm, gnorm, step, n, k, ls)) {
goto lbfgs_exit;
}
}
/*
Convergence test.
The criterion is given by the following formula:
|g(x)| / \max(1, |x|) < \epsilon
*/
if (xnorm < 1.0) xnorm = 1.0;
if (gnorm / xnorm <= param->epsilon) {
/* Convergence. */
ret = 0;
break;
}
if (param->max_iterations != 0 && param->max_iterations < k+1) {
/* Maximum number of iterations. */
ret = LBFGSERR_MAXIMUMITERATION;
break;
}
/*
Update vectors s and y:
s_{k+1} = x_{k+1} - x_{k} = \step * d_{k}.
y_{k+1} = g_{k+1} - g_{k}.
*/
it = &lm[end];
vecdiff(it->s, x, xp, n);
vecdiff(it->y, g, gp, n);
/*
Compute scalars ys and yy:
ys = y^t \cdot s = 1 / \rho.
yy = y^t \cdot y.
Notice that yy is used for scaling the hessian matrix H_0 (Cholesky factor).
*/
vecdot(&ys, it->y, it->s, n);
vecdot(&yy, it->y, it->y, n);
it->ys = ys;
/*
Recursive formula to compute dir = -(H \cdot g).
This is described in page 779 of:
Jorge Nocedal.
Updating Quasi-Newton Matrices with Limited Storage.
Mathematics of Computation, Vol. 35, No. 151,
pp. 773--782, 1980.
*/
bound = (m <= k) ? m : k;
++k;
end = (end + 1) % m;
if (param->orthantwise_c == 0.) {
/* Compute the negative of gradients. */
vecncpy(d, g, n);
} else {
/* Compute the negative of psuedo-gradients. */
for (i = 0;i < n;++i) {
if (x[i] < 0.) {
/* Differentiable. */
d[i] = -g[i] + param->orthantwise_c;
} else if (0. < x[i]) {
/* Differentiable. */
d[i] = -g[i] - param->orthantwise_c;
} else {
if (g[i] < -param->orthantwise_c) {
/* Take the right partial derivative. */
d[i] = -g[i] - param->orthantwise_c;
} else if (param->orthantwise_c < g[i]) {
/* Take the left partial derivative. */
d[i] = -g[i] + param->orthantwise_c;
} else {
d[i] = 0.;
}
}
}
/* Store the steepest direction.*/
( run in 1.226 second using v1.01-cache-2.11-cpan-71847e10f99 )