Algorithm-LibLinear

 view release on metacpan or  search on metacpan

src/liblinear/linear.cpp  view on Meta::CPAN


				if(maxG-minG <= 1e-12)
					continue;
				else
					stopping = max(maxG - minG, stopping);

				for(m=0;m<active_size_i[i];m++)
					B[m] = G[m] - Ai*alpha_i[alpha_index_i[m]] ;

				solve_sub_problem(Ai, y_index[i], C[GETI(i)], active_size_i[i], alpha_new);
				int nz_d = 0;
				for(m=0;m<active_size_i[i];m++)
				{
					double d = alpha_new[m] - alpha_i[alpha_index_i[m]];
					alpha_i[alpha_index_i[m]] = alpha_new[m];
					if(fabs(d) >= 1e-12)
					{
						d_ind[nz_d] = alpha_index_i[m];
						d_val[nz_d] = d;
						nz_d++;
					}
				}

				xi = prob->x[i];
				while(xi->index != -1)
				{
					double *w_i = &w[(xi->index-1)*nr_class];
					for(m=0;m<nz_d;m++)
						w_i[d_ind[m]] += d_val[m]*xi->value;
					xi++;
				}
			}
		}

		iter++;
		if(iter % 10 == 0)
		{
			info(".");
		}

		if(stopping < eps_shrink)
		{
			if(stopping < eps && start_from_all == true)
				break;
			else
			{
				active_size = l;
				for(i=0;i<l;i++)
					active_size_i[i] = nr_class;
				info("*");
				eps_shrink = max(eps_shrink/2, eps);
				start_from_all = true;
			}
		}
		else
			start_from_all = false;
	}

	info("\noptimization finished, #iter = %d\n",iter);
	if (iter >= max_iter)
		info("\nWARNING: reaching max number of iterations\n");

	// calculate objective value
	double v = 0;
	int nSV = 0;
	for(i=0;i<w_size*nr_class;i++)
		v += w[i]*w[i];
	v = 0.5*v;
	for(i=0;i<l*nr_class;i++)
	{
		v += alpha[i];
		if(fabs(alpha[i]) > 0)
			nSV++;
	}
	for(i=0;i<l;i++)
		v -= alpha[i*nr_class+(int)prob->y[i]];
	info("Objective value = %lf\n",v);
	info("nSV = %d\n",nSV);

	delete [] alpha;
	delete [] alpha_new;
	delete [] index;
	delete [] QD;
	delete [] d_ind;
	delete [] d_val;
	delete [] alpha_index;
	delete [] y_index;
	delete [] active_size_i;
}

// A coordinate descent algorithm for
// L1-loss and L2-loss SVM dual problems
//
//  min_\alpha  0.5(\alpha^T (Q + D)\alpha) - e^T \alpha,
//    s.t.      0 <= \alpha_i <= upper_bound_i,
//
//  where Qij = yi yj xi^T xj and
//  D is a diagonal matrix
//
// In L1-SVM case:
//              upper_bound_i = Cp if y_i = 1
//              upper_bound_i = Cn if y_i = -1
//              D_ii = 0
// In L2-SVM case:
//              upper_bound_i = INF
//              D_ii = 1/(2*Cp) if y_i = 1
//              D_ii = 1/(2*Cn) if y_i = -1
//
// Given:
// x, y, Cp, Cn
// eps is the stopping tolerance
//
// solution will be put in w
//
// this function returns the number of iterations
//
// See Algorithm 3 of Hsieh et al., ICML 2008

#undef GETI
#define GETI(i) (y[i]+1)
// To support weights for instances, use GETI(i) (i)

static int solve_l2r_l1l2_svc(const problem *prob, const parameter *param, double *w, double Cp, double Cn, int max_iter=300)
{
	int l = prob->l;
	int w_size = prob->n;
	double eps = param->eps;
	int solver_type = param->solver_type;
	int i, s, iter = 0;
	double C, d, G;
	double *QD = new double[l];
	int *index = new int[l];
	double *alpha = new double[l];
	schar *y = new schar[l];
	int active_size = l;

	// PG: projected gradient, for shrinking and stopping
	double PG;
	double PGmax_old = INF;
	double PGmin_old = -INF;
	double PGmax_new, PGmin_new;

	// default solver_type: L2R_L2LOSS_SVC_DUAL
	double diag[3] = {0.5/Cn, 0, 0.5/Cp};
	double upper_bound[3] = {INF, 0, INF};
	if(solver_type == L2R_L1LOSS_SVC_DUAL)
	{
		diag[0] = 0;
		diag[2] = 0;
		upper_bound[0] = Cn;
		upper_bound[2] = Cp;
	}

	for(i=0; i<l; i++)
	{
		if(prob->y[i] > 0)
		{
			y[i] = +1;
		}
		else
		{
			y[i] = -1;
		}
	}

	// Initial alpha can be set here. Note that
	// 0 <= alpha[i] <= upper_bound[GETI(i)]
	for(i=0; i<l; i++)
		alpha[i] = 0;

	for(i=0; i<w_size; i++)
		w[i] = 0;
	for(i=0; i<l; i++)
	{
		QD[i] = diag[GETI(i)];

src/liblinear/linear.cpp  view on Meta::CPAN

				PGmin_old = -INF;
				continue;
			}
		}
		PGmax_old = PGmax_new;
		PGmin_old = PGmin_new;
		if (PGmax_old <= 0)
			PGmax_old = INF;
		if (PGmin_old >= 0)
			PGmin_old = -INF;
	}

	info("\noptimization finished, #iter = %d\n",iter);

	// calculate objective value

	double v = 0;
	int nSV = 0;
	for(i=0; i<w_size; i++)
		v += w[i]*w[i];
	for(i=0; i<l; i++)
	{
		v += alpha[i]*(alpha[i]*diag[GETI(i)] - 2);
		if(alpha[i] > 0)
			++nSV;
	}
	info("Objective value = %lf\n",v/2);
	info("nSV = %d\n",nSV);

	delete [] QD;
	delete [] alpha;
	delete [] y;
	delete [] index;

	return iter;
}


// A coordinate descent algorithm for
// L1-loss and L2-loss epsilon-SVR dual problem
//
//  min_\beta  0.5\beta^T (Q + diag(lambda)) \beta - p \sum_{i=1}^l|\beta_i| + \sum_{i=1}^l yi\beta_i,
//    s.t.      -upper_bound_i <= \beta_i <= upper_bound_i,
//
//  where Qij = xi^T xj and
//  D is a diagonal matrix
//
// In L1-SVM case:
//              upper_bound_i = C
//              lambda_i = 0
// In L2-SVM case:
//              upper_bound_i = INF
//              lambda_i = 1/(2*C)
//
// Given:
// x, y, p, C
// eps is the stopping tolerance
//
// solution will be put in w
//
// this function returns the number of iterations
//
// See Algorithm 4 of Ho and Lin, 2012

#undef GETI
#define GETI(i) (0)
// To support weights for instances, use GETI(i) (i)

static int solve_l2r_l1l2_svr(const problem *prob, const parameter *param, double *w, int max_iter=300)
{
	const int solver_type = param->solver_type;
	int l = prob->l;
	double C = param->C;
	double p = param->p;
	int w_size = prob->n;
	double eps = param->eps;
	int i, s, iter = 0;
	int active_size = l;
	int *index = new int[l];

	double d, G, H;
	double Gmax_old = INF;
	double Gmax_new, Gnorm1_new;
	double Gnorm1_init = -1.0; // Gnorm1_init is initialized at the first iteration
	double *beta = new double[l];
	double *QD = new double[l];
	double *y = prob->y;

	// L2R_L2LOSS_SVR_DUAL
	double lambda[1], upper_bound[1];
	lambda[0] = 0.5/C;
	upper_bound[0] = INF;

	if(solver_type == L2R_L1LOSS_SVR_DUAL)
	{
		lambda[0] = 0;
		upper_bound[0] = C;
	}

	// Initial beta can be set here. Note that
	// -upper_bound <= beta[i] <= upper_bound
	for(i=0; i<l; i++)
		beta[i] = 0;

	for(i=0; i<w_size; i++)
		w[i] = 0;
	for(i=0; i<l; i++)
	{
		feature_node * const xi = prob->x[i];
		QD[i] = sparse_operator::nrm2_sq(xi);
		sparse_operator::axpy(beta[i], xi, w);

		index[i] = i;
	}


	while(iter < max_iter)
	{
		Gmax_new = 0;
		Gnorm1_new = 0;

src/liblinear/linear.cpp  view on Meta::CPAN

			info(".");

		if(Gnorm1_new <= eps*Gnorm1_init)
		{
			if(active_size == l)
				break;
			else
			{
				active_size = l;
				info("*");
				Gmax_old = INF;
				continue;
			}
		}

		Gmax_old = Gmax_new;
	}

	info("\noptimization finished, #iter = %d\n", iter);

	// calculate objective value
	double v = 0;
	int nSV = 0;
	for(i=0; i<w_size; i++)
		v += w[i]*w[i];
	v = 0.5*v;
	for(i=0; i<l; i++)
	{
		v += p*fabs(beta[i]) - y[i]*beta[i] + 0.5*lambda[GETI(i)]*beta[i]*beta[i];
		if(beta[i] != 0)
			nSV++;
	}

	info("Objective value = %lf\n", v);
	info("nSV = %d\n",nSV);

	delete [] beta;
	delete [] QD;
	delete [] index;

	return iter;
}


// A coordinate descent algorithm for
// the dual of L2-regularized logistic regression problems
//
//  min_\alpha  0.5(\alpha^T Q \alpha) + \sum \alpha_i log (\alpha_i) + (upper_bound_i - \alpha_i) log (upper_bound_i - \alpha_i),
//    s.t.      0 <= \alpha_i <= upper_bound_i,
//
//  where Qij = yi yj xi^T xj and
//  upper_bound_i = Cp if y_i = 1
//  upper_bound_i = Cn if y_i = -1
//
// Given:
// x, y, Cp, Cn
// eps is the stopping tolerance
//
// solution will be put in w
//
// this function returns the number of iterations
//
// See Algorithm 5 of Yu et al., MLJ 2010

#undef GETI
#define GETI(i) (y[i]+1)
// To support weights for instances, use GETI(i) (i)

static int solve_l2r_lr_dual(const problem *prob, const parameter *param, double *w, double Cp, double Cn, int max_iter=300)
{
	int l = prob->l;
	int w_size = prob->n;
	double eps = param->eps;
	int i, s, iter = 0;
	double *xTx = new double[l];
	int *index = new int[l];
	double *alpha = new double[2*l]; // store alpha and C - alpha
	schar *y = new schar[l];
	int max_inner_iter = 100; // for inner Newton
	double innereps = 1e-2;
	double innereps_min = min(1e-8, eps);
	double upper_bound[3] = {Cn, 0, Cp};

	for(i=0; i<l; i++)
	{
		if(prob->y[i] > 0)
		{
			y[i] = +1;
		}
		else
		{
			y[i] = -1;
		}
	}

	// Initial alpha can be set here. Note that
	// 0 < alpha[i] < upper_bound[GETI(i)]
	// alpha[2*i] + alpha[2*i+1] = upper_bound[GETI(i)]
	for(i=0; i<l; i++)
	{
		alpha[2*i] = min(0.001*upper_bound[GETI(i)], 1e-8);
		alpha[2*i+1] = upper_bound[GETI(i)] - alpha[2*i];
	}

	for(i=0; i<w_size; i++)
		w[i] = 0;
	for(i=0; i<l; i++)
	{
		feature_node * const xi = prob->x[i];
		xTx[i] = sparse_operator::nrm2_sq(xi);
		sparse_operator::axpy(y[i]*alpha[2*i], xi, w);
		index[i] = i;
	}

	while (iter < max_iter)
	{
		for (i=0; i<l; i++)
		{
			int j = i+rand()%(l-i);
			swap(index[i], index[j]);
		}

src/liblinear/linear.cpp  view on Meta::CPAN

					z *= eta;
				else // tmpz in (0, C)
					z = tmpz;
				gp = a*(z-alpha_old)+sign*b+log(z/(C-z));
				newton_iter++;
				inner_iter++;
			}

			if(inner_iter > 0) // update w
			{
				alpha[ind1] = z;
				alpha[ind2] = C-z;
				sparse_operator::axpy(sign*(z-alpha_old)*yi, xi, w);
			}
		}

		iter++;
		if(iter % 10 == 0)
			info(".");

		if(Gmax < eps)
			break;

		if(newton_iter <= l/10)
			innereps = max(innereps_min, 0.1*innereps);

	}

	info("\noptimization finished, #iter = %d\n",iter);

	// calculate objective value

	double v = 0;
	for(i=0; i<w_size; i++)
		v += w[i] * w[i];
	v *= 0.5;
	for(i=0; i<l; i++)
		v += alpha[2*i] * log(alpha[2*i]) + alpha[2*i+1] * log(alpha[2*i+1])
			- upper_bound[GETI(i)] * log(upper_bound[GETI(i)]);
	info("Objective value = %lf\n", v);

	delete [] xTx;
	delete [] alpha;
	delete [] y;
	delete [] index;

	return iter;
}

// A coordinate descent algorithm for
// L1-regularized L2-loss support vector classification
//
//  min_w \sum |wj| + C \sum max(0, 1-yi w^T xi)^2,
//
// Given:
// x, y, Cp, Cn
// eps is the stopping tolerance
//
// solution will be put in w
//
// this function returns the number of iterations
//
// See Yuan et al. (2010) and appendix of LIBLINEAR paper, Fan et al. (2008)
//
// To not regularize the bias (i.e., regularize_bias = 0), a constant feature = 1
// must have been added to the original data. (see -B and -R option)

#undef GETI
#define GETI(i) (y[i]+1)
// To support weights for instances, use GETI(i) (i)

static int solve_l1r_l2_svc(const problem *prob_col, const parameter* param, double *w, double Cp, double Cn, double eps)
{
	int l = prob_col->l;
	int w_size = prob_col->n;
	int regularize_bias = param->regularize_bias;
	int j, s, iter = 0;
	int max_iter = 1000;
	int active_size = w_size;
	int max_num_linesearch = 20;

	double sigma = 0.01;
	double d, G_loss, G, H;
	double Gmax_old = INF;
	double Gmax_new, Gnorm1_new;
	double Gnorm1_init = -1.0; // Gnorm1_init is initialized at the first iteration
	double d_old, d_diff;
	double loss_old = 0, loss_new;
	double appxcond, cond;

	int *index = new int[w_size];
	schar *y = new schar[l];
	double *b = new double[l]; // b = 1-ywTx
	double *xj_sq = new double[w_size];
	feature_node *x;

	double C[3] = {Cn,0,Cp};

	// Initial w can be set here.
	for(j=0; j<w_size; j++)
		w[j] = 0;

	for(j=0; j<l; j++)
	{
		b[j] = 1;
		if(prob_col->y[j] > 0)
			y[j] = 1;
		else
			y[j] = -1;
	}
	for(j=0; j<w_size; j++)
	{
		index[j] = j;
		xj_sq[j] = 0;
		x = prob_col->x[j];
		while(x->index != -1)
		{
			int ind = x->index-1;
			x->value *= y[ind]; // x->value stores yi*xij
			double val = x->value;
			b[ind] -= w[j]*val;

src/liblinear/linear.cpp  view on Meta::CPAN

						b[ind] = b_new;
						if(b_new > 0)
							loss_new += C[GETI(ind)]*b_new*b_new;
						x++;
					}
				}

				cond = cond + loss_new - loss_old;
				if(cond <= 0)
					break;
				else
				{
					d_old = d;
					d *= 0.5;
					delta *= 0.5;
				}
			}

			w[j] += d;

			// recompute b[] if line search takes too many steps
			if(num_linesearch >= max_num_linesearch)
			{
				info("#");
				for(int i=0; i<l; i++)
					b[i] = 1;

				for(int i=0; i<w_size; i++)
				{
					if(w[i]==0) continue;
					x = prob_col->x[i];
					sparse_operator::axpy(-w[i], x, b);
				}
			}
		}

		if(iter == 0)
			Gnorm1_init = Gnorm1_new;
		iter++;
		if(iter % 10 == 0)
			info(".");

		if(Gnorm1_new <= eps*Gnorm1_init)
		{
			if(active_size == w_size)
				break;
			else
			{
				active_size = w_size;
				info("*");
				Gmax_old = INF;
				continue;
			}
		}

		Gmax_old = Gmax_new;
	}

	info("\noptimization finished, #iter = %d\n", iter);
	if(iter >= max_iter)
		info("\nWARNING: reaching max number of iterations\n");

	// calculate objective value

	double v = 0;
	int nnz = 0;
	for(j=0; j<w_size; j++)
	{
		x = prob_col->x[j];
		while(x->index != -1)
		{
			x->value *= prob_col->y[x->index-1]; // restore x->value
			x++;
		}
		if(w[j] != 0)
		{
			v += fabs(w[j]);
			nnz++;
		}
	}
	if (regularize_bias == 0)
		v -= fabs(w[w_size-1]);
	for(j=0; j<l; j++)
		if(b[j] > 0)
			v += C[GETI(j)]*b[j]*b[j];

	info("Objective value = %lf\n", v);
	info("#nonzeros/#features = %d/%d\n", nnz, w_size);

	delete [] index;
	delete [] y;
	delete [] b;
	delete [] xj_sq;

	return iter;
}

// A coordinate descent algorithm for
// L1-regularized logistic regression problems
//
//  min_w \sum |wj| + C \sum log(1+exp(-yi w^T xi)),
//
// Given:
// x, y, Cp, Cn
// eps is the stopping tolerance
//
// solution will be put in w
//
// this function returns the number of iterations
//
// See Yuan et al. (2011) and appendix of LIBLINEAR paper, Fan et al. (2008)
//
// To not regularize the bias (i.e., regularize_bias = 0), a constant feature = 1
// must have been added to the original data. (see -B and -R option)

#undef GETI
#define GETI(i) (y[i]+1)
// To support weights for instances, use GETI(i) (i)

static int solve_l1r_lr(const problem *prob_col, const parameter *param, double *w, double Cp, double Cn, double eps)
{
	int l = prob_col->l;
	int w_size = prob_col->n;
	int regularize_bias = param->regularize_bias;
	int j, s, newton_iter=0, iter=0;
	int max_newton_iter = 100;
	int max_iter = 1000;
	int max_num_linesearch = 20;
	int active_size;
	int QP_active_size;

	double nu = 1e-12;
	double inner_eps = 1;
	double sigma = 0.01;
	double w_norm, w_norm_new;
	double z, G, H;
	double Gnorm1_init = -1.0; // Gnorm1_init is initialized at the first iteration
	double Gmax_old = INF;
	double Gmax_new, Gnorm1_new;
	double QP_Gmax_old = INF;
	double QP_Gmax_new, QP_Gnorm1_new;
	double delta, negsum_xTd, cond;

	int *index = new int[w_size];
	schar *y = new schar[l];
	double *Hdiag = new double[w_size];
	double *Grad = new double[w_size];
	double *wpd = new double[w_size];
	double *xjneg_sum = new double[w_size];
	double *xTd = new double[l];
	double *exp_wTx = new double[l];
	double *exp_wTx_new = new double[l];
	double *tau = new double[l];
	double *D = new double[l];
	feature_node *x;

	double C[3] = {Cn,0,Cp};

	// Initial w can be set here.
	for(j=0; j<w_size; j++)
		w[j] = 0;

	for(j=0; j<l; j++)
	{
		if(prob_col->y[j] > 0)
			y[j] = 1;
		else
			y[j] = -1;

src/liblinear/linear.cpp  view on Meta::CPAN

					{
						if(Gp < 0)
							violation = -Gp;
						else if(Gn > 0)
							violation = Gn;
						//inner-level shrinking
						else if(Gp>QP_Gmax_old/l && Gn<-QP_Gmax_old/l)
						{
							QP_active_size--;
							swap(index[s], index[QP_active_size]);
							s--;
							continue;
						}
					}
					else if(wpd[j] > 0)
						violation = fabs(Gp);
					else
						violation = fabs(Gn);

					// obtain solution of one-variable problem
					if(Gp < H*wpd[j])
						z = -Gp/H;
					else if(Gn > H*wpd[j])
						z = -Gn/H;
					else
						z = -wpd[j];
				}
				QP_Gmax_new = max(QP_Gmax_new, violation);
				QP_Gnorm1_new += violation;

				if(fabs(z) < 1.0e-12)
					continue;
				z = min(max(z,-10.0),10.0);

				wpd[j] += z;

				x = prob_col->x[j];
				sparse_operator::axpy(z, x, xTd);
			}

			iter++;

			if(QP_Gnorm1_new <= inner_eps*Gnorm1_init)
			{
				//inner stopping
				if(QP_active_size == active_size)
					break;
				//active set reactivation
				else
				{
					QP_active_size = active_size;
					QP_Gmax_old = INF;
					continue;
				}
			}

			QP_Gmax_old = QP_Gmax_new;
		}

		if(iter >= max_iter)
			info("WARNING: reaching max number of inner iterations\n");

		delta = 0;
		w_norm_new = 0;
		for(j=0; j<w_size; j++)
		{
			delta += Grad[j]*(wpd[j]-w[j]);
			if(wpd[j] != 0)
				w_norm_new += fabs(wpd[j]);
		}
		if (regularize_bias == 0)
			w_norm_new -= fabs(wpd[w_size-1]);
		delta += (w_norm_new-w_norm);

		negsum_xTd = 0;
		for(int i=0; i<l; i++)
			if(y[i] == -1)
				negsum_xTd += C[GETI(i)]*xTd[i];

		int num_linesearch;
		for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++)
		{
			cond = w_norm_new - w_norm + negsum_xTd - sigma*delta;

			for(int i=0; i<l; i++)
			{
				double exp_xTd = exp(xTd[i]);
				exp_wTx_new[i] = exp_wTx[i]*exp_xTd;
				cond += C[GETI(i)]*log((1+exp_wTx_new[i])/(exp_xTd+exp_wTx_new[i]));
			}

			if(cond <= 0)
			{
				w_norm = w_norm_new;
				for(j=0; j<w_size; j++)
					w[j] = wpd[j];
				for(int i=0; i<l; i++)
				{
					exp_wTx[i] = exp_wTx_new[i];
					double tau_tmp = 1/(1+exp_wTx[i]);
					tau[i] = C[GETI(i)]*tau_tmp;
					D[i] = C[GETI(i)]*exp_wTx[i]*tau_tmp*tau_tmp;
				}
				break;
			}
			else
			{
				w_norm_new = 0;
				for(j=0; j<w_size; j++)
				{
					wpd[j] = (w[j]+wpd[j])*0.5;
					if(wpd[j] != 0)
						w_norm_new += fabs(wpd[j]);
				}
				if (regularize_bias == 0)
					w_norm_new -= fabs(wpd[w_size-1]);
				delta *= 0.5;
				negsum_xTd *= 0.5;
				for(int i=0; i<l; i++)
					xTd[i] *= 0.5;
			}
		}

		// Recompute some info due to too many line search steps
		if(num_linesearch >= max_num_linesearch)
		{
			for(int i=0; i<l; i++)
				exp_wTx[i] = 0;

			for(int i=0; i<w_size; i++)
			{
				if(w[i]==0) continue;
				x = prob_col->x[i];
				sparse_operator::axpy(w[i], x, exp_wTx);
			}

			for(int i=0; i<l; i++)
				exp_wTx[i] = exp(exp_wTx[i]);
		}

		if(iter == 1)
			inner_eps *= 0.25;

		newton_iter++;
		Gmax_old = Gmax_new;

		info("iter %3d  #CD cycles %d\n", newton_iter, iter);
	}

	info("=========================\n");
	info("optimization finished, #iter = %d\n", newton_iter);
	if(newton_iter >= max_newton_iter)
		info("WARNING: reaching max number of iterations\n");

	// calculate objective value

	double v = 0;
	int nnz = 0;
	for(j=0; j<w_size; j++)
		if(w[j] != 0)
		{
			v += fabs(w[j]);
			nnz++;
		}
	if (regularize_bias == 0)
		v -= fabs(w[w_size-1]);
	for(j=0; j<l; j++)
		if(y[j] == 1)
			v += C[GETI(j)]*log(1+1/exp_wTx[j]);
		else
			v += C[GETI(j)]*log(1+exp_wTx[j]);

	info("Objective value = %lf\n", v);
	info("#nonzeros/#features = %d/%d\n", nnz, w_size);

	delete [] index;
	delete [] y;
	delete [] Hdiag;
	delete [] Grad;
	delete [] wpd;
	delete [] xjneg_sum;
	delete [] xTd;
	delete [] exp_wTx;
	delete [] exp_wTx_new;
	delete [] tau;
	delete [] D;

	return newton_iter;
}

static int compare_feature_node(const void *a, const void *b)
{
	double a_value = (*(feature_node *)a).value;
	double b_value = (*(feature_node *)b).value;
	int a_index = (*(feature_node *)a).index;
	int b_index = (*(feature_node *)b).index;

	if(a_value < b_value)
		return -1;
	else if(a_value == b_value)
	{
		if(a_index < b_index)
			return -1;
		else if(a_index == b_index)
			return 0;
	}
	return 1;
}

// elements before the returned index are < pivot, while those after are >= pivot
static int partition(feature_node *nodes, int low, int high)
{
	int i;
	int index;

	swap(nodes[low + rand()%(high-low+1)], nodes[high]); // select and move pivot to the end

	index = low;
	for(i = low; i < high; i++)
		if (compare_feature_node(&nodes[i], &nodes[high]) == -1)
		{
			swap(nodes[index], nodes[i]);
			index++;
		}

	swap(nodes[high], nodes[index]);
	return index;
}

// rearrange nodes so that nodes[:k] contains nodes with the k smallest values.
static void quick_select_min_k(feature_node *nodes, int low, int high, int k)
{
	int pivot;
	if(low == high)
		return;
	pivot = partition(nodes, low, high);
	if(pivot == k)
		return;
	else if(k-1 < pivot)
		return quick_select_min_k(nodes, low, pivot-1, k);
	else
		return quick_select_min_k(nodes, pivot+1, high, k);
}

// A two-level coordinate descent algorithm for
// a scaled one-class SVM dual problem
//
//  min_\alpha  0.5(\alpha^T Q \alpha),
//    s.t.      0 <= \alpha_i <= 1 and
//              e^T \alpha = \nu l
//
//  where Qij = xi^T xj
//
// Given:
// x, nu
// eps is the stopping tolerance
//
// solution will be put in w and rho
//
// this function returns the number of iterations
//
// See Algorithm 7 in supplementary materials of Chou et al., SDM 2020.

static int solve_oneclass_svm(const problem *prob, const parameter *param, double *w, double *rho)
{
	int l = prob->l;
	int w_size = prob->n;
	double eps = param->eps;
	double nu = param->nu;
	int i, j, s, iter = 0;
	double Gi, Gj;
	double Qij, quad_coef, delta, sum;
	double old_alpha_i;
	double *QD = new double[l];
	double *G = new double[l];
	int *index = new int[l];
	double *alpha = new double[l];
	int max_inner_iter;
	int max_iter = 1000;
	int active_size = l;

	double negGmax;                 // max { -grad(f)_i | i in Iup }
	double negGmin;                 // min { -grad(f)_i | i in Ilow }
	// Iup = { i | alpha_i < 1 }, Ilow = { i | alpha_i > 0 }
	feature_node *max_negG_of_Iup = new feature_node[l];
	feature_node *min_negG_of_Ilow = new feature_node[l];
	feature_node node;

	int n = (int)(nu*l);            // # of alpha's at upper bound
	for(i=0; i<n; i++)
		alpha[i] = 1;
	if (n<l)
		alpha[i] = nu*l-n;
	for(i=n+1; i<l; i++)
		alpha[i] = 0;

	for(i=0; i<w_size; i++)
		w[i] = 0;
	for(i=0; i<l; i++)
	{
		feature_node * const xi = prob->x[i];
		QD[i] = sparse_operator::nrm2_sq(xi);
		sparse_operator::axpy(alpha[i], xi, w);

		index[i] = i;
	}

	while (iter < max_iter)
	{
		negGmax = -INF;
		negGmin = INF;

		for (s=0; s<active_size; s++)
		{
			i = index[s];
			feature_node * const xi = prob->x[i];
			G[i] = sparse_operator::dot(w, xi);
			if (alpha[i] < 1)
				negGmax = max(negGmax, -G[i]);
			if (alpha[i] > 0)

src/liblinear/linear.cpp  view on Meta::CPAN

			int violating_pair = 0;
			if (alpha[i] < 1 && alpha[j] > 0 && -Gj + 1e-12 < -Gi)
				violating_pair = 1;
			else
				if (alpha[i] > 0 && alpha[j] < 1 && -Gi + 1e-12 < -Gj)
					violating_pair = 1;
			if (violating_pair == 0)
				continue;

			Qij = sparse_operator::sparse_dot(xi, xj);
			quad_coef = QD[i] + QD[j] - 2*Qij;
			if(quad_coef <= 0)
				quad_coef = 1e-12;
			delta = (Gi - Gj) / quad_coef;
			old_alpha_i = alpha[i];
			sum = alpha[i] + alpha[j];
			alpha[i] = alpha[i] - delta;
			alpha[j] = alpha[j] + delta;
			if (sum > 1)
			{
				if (alpha[i] > 1)
				{
					alpha[i] = 1;
					alpha[j] = sum - 1;
				}
			}
			else
			{
				if (alpha[j] < 0)
				{
					alpha[j] = 0;
					alpha[i] = sum;
				}
			}
			if (sum > 1)
			{
				if (alpha[j] > 1)
				{
					alpha[j] = 1;
					alpha[i] = sum - 1;
				}
			}
			else
			{
				if (alpha[i] < 0)
				{
					alpha[i] = 0;
					alpha[j] = sum;
				}
			}
			delta = alpha[i] - old_alpha_i;
			sparse_operator::axpy(delta, xi, w);
			sparse_operator::axpy(-delta, xj, w);
		}
		iter++;
		if (iter % 10 == 0)
			info(".");
	}
	info("\noptimization finished, #iter = %d\n",iter);
	if (iter >= max_iter)
		info("\nWARNING: reaching max number of iterations\n\n");

	// calculate object value
	double v = 0;
	for(i=0; i<w_size; i++)
		v += w[i]*w[i];
	int nSV = 0;
	for(i=0; i<l; i++)
	{
		if (alpha[i] > 0)
			++nSV;
	}
	info("Objective value = %lf\n", v/2);
	info("nSV = %d\n", nSV);

	// calculate rho
	double nr_free = 0;
	double ub = INF, lb = -INF, sum_free = 0;
	for(i=0; i<l; i++)
	{
		double G = sparse_operator::dot(w, prob->x[i]);
		if (alpha[i] == 1)
			lb = max(lb, G);
		else if (alpha[i] == 0)
			ub = min(ub, G);
		else
		{
			++nr_free;
			sum_free += G;
		}
	}

	if (nr_free > 0)
		*rho = sum_free/nr_free;
	else
		*rho = (ub + lb)/2;
	info("rho = %lf\n", *rho);

	delete [] QD;
	delete [] G;
	delete [] index;
	delete [] alpha;
	delete [] max_negG_of_Iup;
	delete [] min_negG_of_Ilow;

	return iter;
}

// transpose matrix X from row format to column format
static void transpose(const problem *prob, feature_node **x_space_ret, problem *prob_col)
{
	int i;
	int l = prob->l;
	int n = prob->n;
	size_t nnz = 0;
	size_t *col_ptr = new size_t [n+1];
	feature_node *x_space;
	prob_col->l = l;
	prob_col->n = n;
	prob_col->y = new double[l];
	prob_col->x = new feature_node*[n];

src/liblinear/linear.cpp  view on Meta::CPAN

}

static void train_one(const problem *prob, const parameter *param, double *w, double Cp, double Cn)
{
	int solver_type = param->solver_type;
	int dual_solver_max_iter = 300;
	int iter;

	bool is_regression = (solver_type==L2R_L2LOSS_SVR ||
				solver_type==L2R_L1LOSS_SVR_DUAL ||
				solver_type==L2R_L2LOSS_SVR_DUAL);

	// Some solvers use Cp,Cn but not C array; extensions possible but no plan for now
	double *C = new double[prob->l];
	double primal_solver_tol = param->eps;
	if(is_regression)
	{
		for(int i=0;i<prob->l;i++)
			C[i] = param->C;
	}
	else
	{
		int pos = 0;
		for(int i=0;i<prob->l;i++)
		{
			if(prob->y[i] > 0)
			{
				pos++;
				C[i] = Cp;
			}
			else
				C[i] = Cn;
		}
		int neg = prob->l - pos;
		primal_solver_tol = param->eps*max(min(pos,neg), 1)/prob->l;
	}

	switch(solver_type)
	{
		case L2R_LR:
		{
			l2r_lr_fun fun_obj(prob, param, C);
			NEWTON newton_obj(&fun_obj, primal_solver_tol);
			newton_obj.set_print_string(liblinear_print_string);
			newton_obj.newton(w);
			break;
		}
		case L2R_L2LOSS_SVC:
		{
			l2r_l2_svc_fun fun_obj(prob, param, C);
			NEWTON newton_obj(&fun_obj, primal_solver_tol);
			newton_obj.set_print_string(liblinear_print_string);
			newton_obj.newton(w);
			break;
		}
		case L2R_L2LOSS_SVC_DUAL:
		{
			iter = solve_l2r_l1l2_svc(prob, param, w, Cp, Cn, dual_solver_max_iter);
			if(iter >= dual_solver_max_iter)
			{
				info("\nWARNING: reaching max number of iterations\nSwitching to use -s 2\n\n");
				// primal_solver_tol obtained from eps for dual may be too loose
				primal_solver_tol *= 0.1;
				l2r_l2_svc_fun fun_obj(prob, param, C);
				NEWTON newton_obj(&fun_obj, primal_solver_tol);
				newton_obj.set_print_string(liblinear_print_string);
				newton_obj.newton(w);
			}
			break;
		}
		case L2R_L1LOSS_SVC_DUAL:
		{
			iter = solve_l2r_l1l2_svc(prob, param, w, Cp, Cn, dual_solver_max_iter);
			if(iter >= dual_solver_max_iter)
				info("\nWARNING: reaching max number of iterations\nUsing -s 2 may be faster (also see FAQ)\n\n");			
			break;
		}
		case L1R_L2LOSS_SVC:
		{
			problem prob_col;
			feature_node *x_space = NULL;
			transpose(prob, &x_space ,&prob_col);
			solve_l1r_l2_svc(&prob_col, param, w, Cp, Cn, primal_solver_tol);
			delete [] prob_col.y;
			delete [] prob_col.x;
			delete [] x_space;
			break;
		}
		case L1R_LR:
		{
			problem prob_col;
			feature_node *x_space = NULL;
			transpose(prob, &x_space ,&prob_col);
			solve_l1r_lr(&prob_col, param, w, Cp, Cn, primal_solver_tol);
			delete [] prob_col.y;
			delete [] prob_col.x;
			delete [] x_space;
			break;
		}
		case L2R_LR_DUAL:
		{
			iter = solve_l2r_lr_dual(prob, param, w, Cp, Cn, dual_solver_max_iter);
			if(iter >= dual_solver_max_iter)
			{
				info("\nWARNING: reaching max number of iterations\nSwitching to use -s 0\n\n");
				// primal_solver_tol obtained from eps for dual may be too loose
				primal_solver_tol *= 0.1;
				l2r_lr_fun fun_obj(prob, param, C);
				NEWTON newton_obj(&fun_obj, primal_solver_tol);
				newton_obj.set_print_string(liblinear_print_string);
				newton_obj.newton(w);
			}
			break;
		}
		case L2R_L2LOSS_SVR:
		{
			l2r_l2_svr_fun fun_obj(prob, param, C);
			NEWTON newton_obj(&fun_obj, primal_solver_tol);
			newton_obj.set_print_string(liblinear_print_string);
			newton_obj.newton(w);
			break;
		}
		case L2R_L1LOSS_SVR_DUAL:
		{
			iter = solve_l2r_l1l2_svr(prob, param, w, dual_solver_max_iter);
			if(iter >= dual_solver_max_iter)
				info("\nWARNING: reaching max number of iterations\nUsing -s 11 may be faster (also see FAQ)\n\n");			

			break;
		}
		case L2R_L2LOSS_SVR_DUAL:
		{
			iter = solve_l2r_l1l2_svr(prob, param, w, dual_solver_max_iter);
			if(iter >= dual_solver_max_iter)
			{
				info("\nWARNING: reaching max number of iterations\nSwitching to use -s 11\n\n");
				// primal_solver_tol obtained from eps for dual may be too loose
				primal_solver_tol *= 0.001;
				l2r_l2_svr_fun fun_obj(prob, param, C);
				NEWTON newton_obj(&fun_obj, primal_solver_tol);
				newton_obj.set_print_string(liblinear_print_string);
				newton_obj.newton(w);
			}
			break;
		}
		default:
			fprintf(stderr, "ERROR: unknown solver_type\n");
			break;
	}

	delete[] C;
}

// Calculate the initial C for parameter selection
static double calc_start_C(const problem *prob, const parameter *param)
{
	int i;
	double xTx, max_xTx;
	max_xTx = 0;
	for(i=0; i<prob->l; i++)
	{
		xTx = 0;
		feature_node *xi=prob->x[i];
		while(xi->index != -1)
		{
			double val = xi->value;
			xTx += val*val;
			xi++;
		}
		if(xTx > max_xTx)
			max_xTx = xTx;
	}

	double min_C = 1.0;
	if(param->solver_type == L2R_LR)
		min_C = 1.0 / (prob->l * max_xTx);
	else if(param->solver_type == L2R_L2LOSS_SVC)
		min_C = 1.0 / (2 * prob->l * max_xTx);
	else if(param->solver_type == L2R_L2LOSS_SVR)
	{
		double sum_y, loss, y_abs;
		double delta2 = 0.1;
		sum_y = 0, loss = 0;
		for(i=0; i<prob->l; i++)
		{
			y_abs = fabs(prob->y[i]);
			sum_y += y_abs;
			loss += max(y_abs - param->p, 0.0) * max(y_abs - param->p, 0.0);
		}
		if(loss > 0)
			min_C = delta2 * delta2 * loss / (8 * sum_y * sum_y * max_xTx);
		else
			min_C = INF;
	}

	return pow( 2, floor(log(min_C) / log(2.0)) );



( run in 2.529 seconds using v1.01-cache-2.11-cpan-96521ef73a4 )