Algorithm-SVM
view release on metacpan or search on metacpan
return(G[i] > Gmax1);
}
else
return(false);
}
void Solver::do_shrinking()
{
int i;
double Gmax1 = -INF; // max { -y_i * grad(f)_i | i in I_up(\alpha) }
double Gmax2 = -INF; // max { y_i * grad(f)_i | i in I_low(\alpha) }
// find maximal violating pair first
for(i=0;i<active_size;i++)
{
if(y[i]==+1)
{
if(!is_upper_bound(i))
{
if(-G[i] >= Gmax1)
Gmax1 = -G[i];
}
if(!is_lower_bound(i))
{
if(G[i] >= Gmax2)
Gmax2 = G[i];
}
}
else
{
if(!is_upper_bound(i))
{
if(-G[i] >= Gmax2)
Gmax2 = -G[i];
}
if(!is_lower_bound(i))
{
if(G[i] >= Gmax1)
Gmax1 = G[i];
}
}
}
// shrink
for(i=0;i<active_size;i++)
if (be_shrunken(i, Gmax1, Gmax2))
{
active_size--;
while (active_size > i)
{
if (!be_shrunken(active_size, Gmax1, Gmax2))
{
swap_index(i,active_size);
break;
}
active_size--;
}
}
// unshrink, check all variables again before final iterations
if(unshrinked || Gmax1 + Gmax2 > eps*10) return;
unshrinked = true;
reconstruct_gradient();
for(i=l-1;i>=active_size;i--)
if (!be_shrunken(i, Gmax1, Gmax2))
{
while (active_size < i)
{
if (be_shrunken(active_size, Gmax1, Gmax2))
{
swap_index(i,active_size);
break;
}
active_size++;
}
active_size++;
}
}
double Solver::calculate_rho()
{
double r;
int nr_free = 0;
double ub = INF, lb = -INF, sum_free = 0;
for(int i=0;i<active_size;i++)
{
double yG = y[i]*G[i];
if(is_upper_bound(i))
{
if(y[i]==-1)
ub = min(ub,yG);
else
lb = max(lb,yG);
}
else if(is_lower_bound(i))
{
if(y[i]==+1)
ub = min(ub,yG);
else
lb = max(lb,yG);
}
else
{
++nr_free;
sum_free += yG;
}
}
if(nr_free>0)
r = sum_free/nr_free;
else
r = (ub+lb)/2;
return r;
}
else
return(-G[i] > Gmax4);
}
else if(is_lower_bound(i))
{
if(y[i]==+1)
return(G[i] > Gmax2);
else
return(G[i] > Gmax3);
}
else
return(false);
}
void Solver_NU::do_shrinking()
{
double Gmax1 = -INF; // max { -y_i * grad(f)_i | y_i = +1, i in I_up(\alpha) }
double Gmax2 = -INF; // max { y_i * grad(f)_i | y_i = +1, i in I_low(\alpha) }
double Gmax3 = -INF; // max { -y_i * grad(f)_i | y_i = -1, i in I_up(\alpha) }
double Gmax4 = -INF; // max { y_i * grad(f)_i | y_i = -1, i in I_low(\alpha) }
// find maximal violating pair first
int i;
for(i=0;i<active_size;i++)
{
if(!is_upper_bound(i))
{
if(y[i]==+1)
{
if(-G[i] > Gmax1) Gmax1 = -G[i];
}
else if(-G[i] > Gmax4) Gmax4 = -G[i];
}
if(!is_lower_bound(i))
{
if(y[i]==+1)
{
if(G[i] > Gmax2) Gmax2 = G[i];
}
else if(G[i] > Gmax3) Gmax3 = G[i];
}
}
// shrinking
for(i=0;i<active_size;i++)
if (be_shrunken(i, Gmax1, Gmax2, Gmax3, Gmax4))
{
active_size--;
while (active_size > i)
{
if (!be_shrunken(active_size, Gmax1, Gmax2, Gmax3, Gmax4))
{
swap_index(i,active_size);
break;
}
active_size--;
}
}
// unshrink, check all variables again before final iterations
if(unshrinked || max(Gmax1+Gmax2,Gmax3+Gmax4) > eps*10) return;
unshrinked = true;
reconstruct_gradient();
for(i=l-1;i>=active_size;i--)
if (!be_shrunken(i, Gmax1, Gmax2, Gmax3, Gmax4))
{
while (active_size < i)
{
if (be_shrunken(active_size, Gmax1, Gmax2, Gmax3, Gmax4))
{
swap_index(i,active_size);
break;
}
active_size++;
}
active_size++;
}
}
double Solver_NU::calculate_rho()
{
int nr_free1 = 0,nr_free2 = 0;
double ub1 = INF, ub2 = INF;
double lb1 = -INF, lb2 = -INF;
double sum_free1 = 0, sum_free2 = 0;
for(int i=0;i<active_size;i++)
{
if(y[i]==+1)
{
if(is_upper_bound(i))
lb1 = max(lb1,G[i]);
else if(is_lower_bound(i))
ub1 = min(ub1,G[i]);
else
{
++nr_free1;
sum_free1 += G[i];
}
}
else
{
if(is_upper_bound(i))
lb2 = max(lb2,G[i]);
else if(is_lower_bound(i))
ub2 = min(ub2,G[i]);
else
{
++nr_free2;
sum_free2 += G[i];
}
}
}
double r1,r2;
if(nr_free1 > 0)
r1 = sum_free1/nr_free1;
if(fabs(alpha[i]) > 0)
{
++nSV;
if(prob->y[i] > 0)
{
if(fabs(alpha[i]) >= si.upper_bound_p)
++nBSV;
}
else
{
if(fabs(alpha[i]) >= si.upper_bound_n)
++nBSV;
}
}
}
info("nSV = %d, nBSV = %d\n",nSV,nBSV);
decision_function f;
f.alpha = alpha;
f.rho = si.rho;
return f;
}
//
// svm_model
//
struct svm_model
{
svm_parameter param; // parameter
int nr_class; // number of classes, = 2 in regression/one class svm
int l; // total #SV
svm_node **SV; // SVs (SV[l])
double **sv_coef; // coefficients for SVs in decision functions (sv_coef[k-1][l])
double *rho; // constants in decision functions (rho[k*(k-1)/2])
double *probA; // pariwise probability information
double *probB;
// for classification only
int *label; // label of each class (label[k])
int *nSV; // number of SVs for each class (nSV[k])
// nSV[0] + nSV[1] + ... + nSV[k-1] = l
// XXX
int free_sv; // 1 if svm_model is created by svm_load_model
// 0 if svm_model is created by svm_train
};
// Platt's binary SVM Probablistic Output: an improvement from Lin et al.
void sigmoid_train(
int l, const double *dec_values, const double *labels,
double& A, double& B)
{
double prior1=0, prior0 = 0;
int i;
for (i=0;i<l;i++)
if (labels[i] > 0) prior1+=1;
else prior0+=1;
int max_iter=100; // Maximal number of iterations
double min_step=1e-10; // Minimal step taken in line search
double sigma=1e-12; // For numerically strict PD of Hessian
double eps=1e-5;
double hiTarget=(prior1+1.0)/(prior1+2.0);
double loTarget=1/(prior0+2.0);
double *t=Malloc(double,l);
double fApB,p,q,h11,h22,h21,g1,g2,det,dA,dB,gd,stepsize;
double newA,newB,newf,d1,d2;
int iter;
// Initial Point and Initial Fun Value
A=0.0; B=log((prior0+1.0)/(prior1+1.0));
double fval = 0.0;
for (i=0;i<l;i++)
{
if (labels[i]>0) t[i]=hiTarget;
else t[i]=loTarget;
fApB = dec_values[i]*A+B;
if (fApB>=0)
fval += t[i]*fApB + log(1+exp(-fApB));
else
fval += (t[i] - 1)*fApB +log(1+exp(fApB));
}
for (iter=0;iter<max_iter;iter++)
{
// Update Gradient and Hessian (use H' = H + sigma I)
h11=sigma; // numerically ensures strict PD
h22=sigma;
h21=0.0;g1=0.0;g2=0.0;
for (i=0;i<l;i++)
{
fApB = dec_values[i]*A+B;
if (fApB >= 0)
{
p=exp(-fApB)/(1.0+exp(-fApB));
q=1.0/(1.0+exp(-fApB));
}
else
{
p=1.0/(1.0+exp(fApB));
q=exp(fApB)/(1.0+exp(fApB));
}
d2=p*q;
h11+=dec_values[i]*dec_values[i]*d2;
h22+=d2;
h21+=dec_values[i]*d2;
d1=t[i]-p;
g1+=dec_values[i]*d1;
g2+=d1;
}
// Stopping Criteria
if (fabs(g1)<eps && fabs(g2)<eps)
break;
// Finding Newton direction: -inv(H') * g
det=h11*h22-h21*h21;
dA=-(h22*g1 - h21 * g2) / det;
dB=-(-h21*g1+ h11 * g2) / det;
gd=g1*dA+g2*dB;
stepsize = 1; // Line Search
while (stepsize >= min_step)
{
newA = A + stepsize * dA;
newB = B + stepsize * dB;
// New function value
newf = 0.0;
for (i=0;i<l;i++)
{
fApB = dec_values[i]*newA+newB;
if (fApB >= 0)
newf += t[i]*fApB + log(1+exp(-fApB));
else
newf += (t[i] - 1)*fApB +log(1+exp(fApB));
}
// Check sufficient decrease
if (newf<fval+0.0001*stepsize*gd)
{
A=newA;B=newB;fval=newf;
break;
}
else
stepsize = stepsize / 2.0;
}
if (stepsize < min_step)
{
info("Line search fails in two-class probability estimates\n");
break;
}
}
if (iter>=max_iter)
info("Reaching maximal iterations in two-class probability estimates\n");
free(t);
}
double sigmoid_predict(double decision_value, double A, double B)
{
double fApB = decision_value*A+B;
if (fApB >= 0)
return exp(-fApB)/(1.0+exp(-fApB));
else
return 1.0/(1+exp(fApB)) ;
}
// Method 2 from the multiclass_prob paper by Wu, Lin, and Weng
void multiclass_probability(int k, double **r, double *p)
{
int t,j;
int iter = 0, max_iter=max(100,k);
double **Q=Malloc(double *,k);
double *Qp=Malloc(double,k);
double pQp, eps=0.005/k;
for (t=0;t<k;t++)
{
p[t]=1.0/k; // Valid if k = 1
Q[t]=Malloc(double,k);
Q[t][t]=0;
for (j=0;j<t;j++)
{
Q[t][t]+=r[j][t]*r[j][t];
Q[t][j]=Q[j][t];
}
for (j=t+1;j<k;j++)
{
Q[t][t]+=r[j][t]*r[j][t];
Q[t][j]=-r[j][t]*r[t][j];
}
}
for (iter=0;iter<max_iter;iter++)
{
// stopping condition, recalculate QP,pQP for numerical accuracy
pQp=0;
for (t=0;t<k;t++)
{
Qp[t]=0;
for (j=0;j<k;j++)
Qp[t]+=Q[t][j]*p[j];
pQp+=p[t]*Qp[t];
}
double max_error=0;
for (t=0;t<k;t++)
{
double error=fabs(Qp[t]-pQp);
if (error>max_error)
max_error=error;
}
if (max_error<eps) break;
for (t=0;t<k;t++)
{
double diff=(-Qp[t]+pQp)/Q[t][t];
( run in 0.972 second using v1.01-cache-2.11-cpan-96521ef73a4 )