iterations results from the CPAN

iterations
Algorithm-LBFGS
view release on metacpan or search on metacpan
			} else if (0. < x[i]) {
				/* Differentiable. */
				d[i] = -g[i] - param->orthantwise_c;
			} else {
				if (g[i] < -param->orthantwise_c) {
					/* Take the right partial derivative. */
					d[i] = -g[i] - param->orthantwise_c;
				} else if (param->orthantwise_c < g[i]) {
					/* Take the left partial derivative. */
					d[i] = -g[i] + param->orthantwise_c;
				} else {
					d[i] = 0.;
				}
			}
		}
	}

	/* Compute the initial step:
		step = 1.0 / sqrt(vecdot(d, d, n))
	 */
	vecrnorm(&step, d, n);

	k = 1;
	end = 0;
	for (;;) {
		/* Store the current position and gradient vectors. */
		veccpy(xp, x, n);
		veccpy(gp, g, n);

		/* Search for an optimal step. */
		ls = line_search(
			n, x, &fx, g, d, &step, w, proc_evaluate, instance, param);
		if (ls < 0) {
			ret = ls;
			goto lbfgs_exit;
		}

		/* Compute x and g norms. */
		vecnorm(&gnorm, g, n);
		vecnorm(&xnorm, x, n);

		/* Report the progress. */
		if (proc_progress) {
			if (ret = proc_progress(instance, x, g, fx, xnorm, gnorm, step, n, k, ls)) {
				goto lbfgs_exit;
			}
		}

		/*
			Convergence test.
			The criterion is given by the following formula:
				|g(x)| / \max(1, |x|) < \epsilon
		 */
		if (xnorm < 1.0) xnorm = 1.0;
		if (gnorm / xnorm <= param->epsilon) {
			/* Convergence. */
			ret = 0;
			break;
		}

		if (param->max_iterations != 0 && param->max_iterations < k+1) {
			/* Maximum number of iterations. */
			ret = LBFGSERR_MAXIMUMITERATION;
			break;
		}

		/*
			Update vectors s and y:
				s_{k+1} = x_{k+1} - x_{k} = \step * d_{k}.
				y_{k+1} = g_{k+1} - g_{k}.
		 */
		it = &lm[end];
		vecdiff(it->s, x, xp, n);
		vecdiff(it->y, g, gp, n);

		/*
			Compute scalars ys and yy:
				ys = y^t \cdot s = 1 / \rho.
				yy = y^t \cdot y.
			Notice that yy is used for scaling the hessian matrix H_0 (Cholesky factor).
		 */
		vecdot(&ys, it->y, it->s, n);
		vecdot(&yy, it->y, it->y, n);
		it->ys = ys;

		/*
			Recursive formula to compute dir = -(H \cdot g).
				This is described in page 779 of:
				Jorge Nocedal.
				Updating Quasi-Newton Matrices with Limited Storage.
				Mathematics of Computation, Vol. 35, No. 151,
				pp. 773--782, 1980.
		 */
		bound = (m <= k) ? m : k;
		++k;
		end = (end + 1) % m;

		if (param->orthantwise_c == 0.) {
			/* Compute the negative of gradients. */
			vecncpy(d, g, n);
		} else {
			/* Compute the negative of psuedo-gradients. */
			for (i = 0;i < n;++i) {
				if (x[i] < 0.) {
					/* Differentiable. */
					d[i] = -g[i] + param->orthantwise_c;
				} else if (0. < x[i]) {
					/* Differentiable. */
					d[i] = -g[i] - param->orthantwise_c;
				} else {
					if (g[i] < -param->orthantwise_c) {
						/* Take the right partial derivative. */
						d[i] = -g[i] - param->orthantwise_c;
					} else if (param->orthantwise_c < g[i]) {
						/* Take the left partial derivative. */
						d[i] = -g[i] + param->orthantwise_c;
					} else {
						d[i] = 0.;
					}
				}
			}
			/* Store the steepest direction.*/
( run in 1.226 second using v1.01-cache-2.11-cpan-71847e10f99 )