Algorithm-LibLinear

 view release on metacpan or  search on metacpan

README.pod  view on Meta::CPAN


=head2 find_cost_parameter(data_set => $data_set, num_folds => $num_folds [, initial => -1.0] [, update => 0])

Deprecated. Use C<find_parameters> instead.

Shorthand alias for C<find_parameters> only works on C<cost> parameter.
Notice that C<loss_sensitivity> is affected too when C<update> is set.

=head2 find_parameters(data_set => $data_set, num_folds => $num_folds [, initial_cost => -1.0] [, initial_loss_sensitivity => -1.0] [, update => 0])

Finds the best parameters by N-fold cross validation. If C<initial_cost> or C<initial_loss_sensitivity> is a negative, the value is automatically calculated.
Works only for 3 solvers: C<'L2R_LR'>, C<'L2R_L2LOSS_SVC'> and C<'L2R_L2LOSS_SVR'>. Error will be thrown for otherwise.

When C<update> is set true, the instance is updated to use the found parameters. This behaviour is disabled by default.

Return value is an ArrayRef containing 3 values: found C<cost>, found C<loss_sensitivity> (only if solver is C<'L2R_L2LOSS_SVR'>) and mean accuracy of cross validation with the found parameters.

=head2 train(data_set => $data_set)

Executes training and returns a trained L<Algorithm::LibLinear::Model> instance.
C<data_set> is same as the C<cross_validation>'s.

lib/Algorithm/LibLinear.pm  view on Meta::CPAN


=head2 find_cost_parameter(data_set => $data_set, num_folds => $num_folds [, initial => -1.0] [, update => 0])

Deprecated. Use C<find_parameters> instead.

Shorthand alias for C<find_parameters> only works on C<cost> parameter.
Notice that C<loss_sensitivity> is affected too when C<update> is set.

=head2 find_parameters(data_set => $data_set, num_folds => $num_folds [, initial_cost => -1.0] [, initial_loss_sensitivity => -1.0] [, update => 0])

Finds the best parameters by N-fold cross validation. If C<initial_cost> or C<initial_loss_sensitivity> is a negative, the value is automatically calculated.
Works only for 3 solvers: C<'L2R_LR'>, C<'L2R_L2LOSS_SVC'> and C<'L2R_L2LOSS_SVR'>. Error will be thrown for otherwise.

When C<update> is set true, the instance is updated to use the found parameters. This behaviour is disabled by default.

Return value is an ArrayRef containing 3 values: found C<cost>, found C<loss_sensitivity> (only if solver is C<'L2R_L2LOSS_SVR'>) and mean accuracy of cross validation with the found parameters.

=head2 train(data_set => $data_set)

Executes training and returns a trained L<Algorithm::LibLinear::Model> instance.
C<data_set> is same as the C<cross_validation>'s.

src/liblinear.xs  view on Meta::CPAN


AV *
ll_find_parameters(self, problem_, num_folds, initial_C, initial_p, update)
    struct parameter *self;
    struct problem *problem_;
    int num_folds;
    double initial_C;
    double initial_p;
    bool update;
CODE:
    double best_C, best_p, accuracy;
    find_parameters(
        problem_, self, num_folds, initial_C, initial_p, &best_C, &best_p,
        &accuracy);
    // LIBLINEAR 2.0 resets default printer function during call of
    // find_parameter_C(). So disable it again.
    set_print_string_function(dummy_puts);
    bool is_regression_model = self->solver_type == L2R_L2LOSS_SVR;
    if (update) {
        self->C = best_C;
        if (is_regression_model) {
          self->p = best_p;
        }
    }
    RETVAL = newAV();
    av_push(RETVAL, newSVnv(best_C));
    av_push(
        RETVAL,
        is_regression_model ? newSVnv(best_p) : newSVsv(&PL_sv_undef));
    av_push(RETVAL, newSVnv(accuracy));
OUTPUT:
    RETVAL

bool
ll_is_regression_solver(self)
    struct parameter *self;
CODE:
    RETVAL = is_regression_solver(self);
OUTPUT:

src/liblinear/linear.cpp  view on Meta::CPAN

static double calc_max_p(const problem *prob)
{
	int i;
	double max_p = 0.0;
	for(i = 0; i < prob->l; i++)
		max_p = max(max_p, fabs(prob->y[i]));

	return max_p;
}

static void find_parameter_C(const problem *prob, parameter *param_tmp, double start_C, double max_C, double *best_C, double *best_score, const int *fold_start, const int *perm, const problem *subprob, int nr_fold)
{
	// variables for CV
	int i;
	double *target = Malloc(double, prob->l);

	// variables for warm start
	double ratio = 2;
	double **prev_w = Malloc(double*, nr_fold);
	for(i = 0; i < nr_fold; i++)
		prev_w[i] = NULL;
	int num_unchanged_w = 0;
	void (*default_print_string) (const char *) = liblinear_print_string;

	if(param_tmp->solver_type == L2R_LR || param_tmp->solver_type == L2R_L2LOSS_SVC)
		*best_score = 0.0;
	else if(param_tmp->solver_type == L2R_L2LOSS_SVR)
		*best_score = INF;
	*best_C = start_C;

	param_tmp->C = start_C;
	while(param_tmp->C <= max_C)
	{
		//Output disabled for running CV at a particular C
		set_print_string_function(&print_null);

		for(i=0; i<nr_fold; i++)
		{
			int j;

src/liblinear/linear.cpp  view on Meta::CPAN

		}
		set_print_string_function(default_print_string);

		if(param_tmp->solver_type == L2R_LR || param_tmp->solver_type == L2R_L2LOSS_SVC)
		{
			int total_correct = 0;
			for(i=0; i<prob->l; i++)
				if(target[i] == prob->y[i])
					++total_correct;
			double current_rate = (double)total_correct/prob->l;
			if(current_rate > *best_score)
			{
				*best_C = param_tmp->C;
				*best_score = current_rate;
			}

			info("log2c=%7.2f\trate=%g\n",log(param_tmp->C)/log(2.0),100.0*current_rate);
		}
		else if(param_tmp->solver_type == L2R_L2LOSS_SVR)
		{
			double total_error = 0.0;
			for(i=0; i<prob->l; i++)
			{
				double y = prob->y[i];
				double v = target[i];
				total_error += (v-y)*(v-y);
			}
			double current_error = total_error/prob->l;
			if(current_error < *best_score)
			{
				*best_C = param_tmp->C;
				*best_score = current_error;
			}

			info("log2c=%7.2f\tp=%7.2f\tMean squared error=%g\n",log(param_tmp->C)/log(2.0),param_tmp->p,current_error);
		}

		num_unchanged_w++;
		if(num_unchanged_w == 5)
			break;
		param_tmp->C = param_tmp->C*ratio;
	}

src/liblinear/linear.cpp  view on Meta::CPAN

			target[perm[j]] = predict(submodel,prob->x[perm[j]]);
		free_and_destroy_model(&submodel);
		free(subprob.x);
		free(subprob.y);
	}
	free(fold_start);
	free(perm);
}


void find_parameters(const problem *prob, const parameter *param, int nr_fold, double start_C, double start_p, double *best_C, double *best_p, double *best_score)
{
	// prepare CV folds

	int i;
	int *fold_start;
	int l = prob->l;
	int *perm = Malloc(int, l);
	struct problem *subprob = Malloc(problem,nr_fold);

	if (nr_fold > l)

src/liblinear/linear.cpp  view on Meta::CPAN

		}
		for(j=end;j<l;j++)
		{
			subprob[i].x[k] = prob->x[perm[j]];
			subprob[i].y[k] = prob->y[perm[j]];
			++k;
		}
	}

	struct parameter param_tmp = *param;
	*best_p = -1;
	if(param->solver_type == L2R_LR || param->solver_type == L2R_L2LOSS_SVC)
	{
		if(start_C <= 0)
			start_C = calc_start_C(prob, &param_tmp);
		double max_C = 1024;
		start_C = min(start_C, max_C);
		double best_C_tmp, best_score_tmp;

		find_parameter_C(prob, &param_tmp, start_C, max_C, &best_C_tmp, &best_score_tmp, fold_start, perm, subprob, nr_fold);

		*best_C = best_C_tmp;
		*best_score = best_score_tmp;
	}
	else if(param->solver_type == L2R_L2LOSS_SVR)
	{
		double max_p = calc_max_p(prob);
		int num_p_steps = 20;
		double max_C = 1048576;
		*best_score = INF;

		i = num_p_steps-1;
		if(start_p > 0)
			i = min((int)(start_p/(max_p/num_p_steps)), i);
		for(; i >= 0; i--)
		{
			param_tmp.p = i*max_p/num_p_steps;
			double start_C_tmp;
			if(start_C <= 0)
				start_C_tmp = calc_start_C(prob, &param_tmp);
			else
				start_C_tmp = start_C;
			start_C_tmp = min(start_C_tmp, max_C);
			double best_C_tmp, best_score_tmp;

			find_parameter_C(prob, &param_tmp, start_C_tmp, max_C, &best_C_tmp, &best_score_tmp, fold_start, perm, subprob, nr_fold);

			if(best_score_tmp < *best_score)
			{
				*best_p = param_tmp.p;
				*best_C = best_C_tmp;
				*best_score = best_score_tmp;
			}
		}
	}

	free(fold_start);
	free(perm);
	for(i=0; i<nr_fold; i++)
	{
		free(subprob[i].x);
		free(subprob[i].y);

src/liblinear/linear.h  view on Meta::CPAN

	int nr_class;           /* number of classes */
	int nr_feature;
	double *w;
	int *label;             /* label of each class */
	double bias;
	double rho;             /* one-class SVM only */
};

struct model* train(const struct problem *prob, const struct parameter *param);
void cross_validation(const struct problem *prob, const struct parameter *param, int nr_fold, double *target);
void find_parameters(const struct problem *prob, const struct parameter *param, int nr_fold, double start_C, double start_p, double *best_C, double *best_p, double *best_score);

double predict_values(const struct model *model_, const struct feature_node *x, double* dec_values);
double predict(const struct model *model_, const struct feature_node *x);
double predict_probability(const struct model *model_, const struct feature_node *x, double* prob_estimates);

int save_model(const char *model_file_name, const struct model *model_);
struct model *load_model(const char *model_file_name);

int get_nr_feature(const struct model *model_);
int get_nr_class(const struct model *model_);



( run in 0.622 second using v1.01-cache-2.11-cpan-4e96b696675 )