#include #include #include #include #include #include #include #include #include #include "handler.h" // new: signalhandling for aborting calculations int canceled = 0; #ifdef _WIN32 #include BOOL WINAPI CtrlHandler( DWORD fdwCtrlType ) { fflush(stdout); canceled = 1; return TRUE ; } void setup_handler() { canceled = 0; SetConsoleCtrlHandler( (PHANDLER_ROUTINE) CtrlHandler, TRUE ); } #else #include void sig_die(int ignore__) { fflush(stdout); canceled = 1; }; void setup_handler() { canceled = 0; signal(SIGINT, sig_die); signal(SIGTERM, sig_die); signal(SIGABRT, sig_die); } #endif #include "linear.h" #include "tron.h" typedef signed char schar; template inline void swap(T& x, T& y) { T t=x; x=y; y=t; } #ifndef min template inline T min(T x,T y) { return (x inline T max(T x,T y) { return (x>y)?x:y; } #endif template inline void clone(T*& dst, S* src, int n) { dst = new T[n]; memcpy((void *)dst,(void *)src,sizeof(T)*n); } #define Malloc(type,n) (type *)malloc((n)*sizeof(type)) #define INF HUGE_VAL #if 1 void info(const char *fmt,...) { va_list ap; va_start(ap,fmt); vprintf(fmt,ap); va_end(ap); } void info_flush() { fflush(stdout); } #else void info(char *fmt,...) {} void info_flush() {} #endif class l2_lr_fun : public function { public: l2_lr_fun(const problem *prob, double Cp, double Cn); ~l2_lr_fun(); double fun(double *w); void grad(double *w, double *g); void Hv(double *s, double *Hs); int get_nr_variable(void); private: void Xv(double *v, double *Xv); void XTv(double *v, double *XTv); double *C; double *z; double *D; const problem *prob; }; l2_lr_fun::l2_lr_fun(const problem *prob, double Cp, double Cn) { int i; int l=prob->l; int *y=prob->y; this->prob = prob; z = new double[l]; D = new double[l]; C = new double[l]; for (i=0; iy; int l=prob->l; int n=prob->n; Xv(w, z); for(i=0;i= 0) f += C[i]*log(1 + exp(-yz)); else f += C[i]*(-yz+log(1 + exp(yz))); } f = 2*f; for(i=0;iy; int l=prob->l; int n=prob->n; for(i=0;in; } void l2_lr_fun::Hv(double *s, double *Hs) { int i; int l=prob->l; int n=prob->n; double *wa = new double[l]; Xv(s, wa); for(i=0;il; feature_node **x=prob->x; for(i=0;iindex!=-1) { Xv[i]+=v[s->index-1]*s->value; s++; } } } void l2_lr_fun::XTv(double *v, double *XTv) { int i; int l=prob->l; int n=prob->n; feature_node **x=prob->x; for(i=0;iindex!=-1) { XTv[s->index-1]+=v[i]*s->value; s++; } } } class l2loss_svm_fun : public function { public: l2loss_svm_fun(const problem *prob, double Cp, double Cn); ~l2loss_svm_fun(); double fun(double *w); void grad(double *w, double *g); void Hv(double *s, double *Hs); int get_nr_variable(void); private: void Xv(double *v, double *Xv); void subXv(double *v, double *Xv); void subXTv(double *v, double *XTv); double *C; double *z; double *D; int *I; int sizeI; const problem *prob; }; l2loss_svm_fun::l2loss_svm_fun(const problem *prob, double Cp, double Cn) { int i; int l=prob->l; int *y=prob->y; this->prob = prob; z = new double[l]; D = new double[l]; C = new double[l]; I = new int[l]; for (i=0; iy; int l=prob->l; int n=prob->n; Xv(w, z); for(i=0;i 0) f += C[i]*d*d; } f = 2*f; for(i=0;iy; int l=prob->l; int n=prob->n; sizeI = 0; for (i=0;in; } void l2loss_svm_fun::Hv(double *s, double *Hs) { int i; int l=prob->l; int n=prob->n; double *wa = new double[l]; subXv(s, wa); for(i=0;il; feature_node **x=prob->x; for(i=0;iindex!=-1) { Xv[i]+=v[s->index-1]*s->value; s++; } } } void l2loss_svm_fun::subXv(double *v, double *Xv) { int i; feature_node **x=prob->x; for(i=0;iindex!=-1) { Xv[i]+=v[s->index-1]*s->value; s++; } } } void l2loss_svm_fun::subXTv(double *v, double *XTv) { int i; int n=prob->n; feature_node **x=prob->x; for(i=0;iindex!=-1) { XTv[s->index-1]+=v[i]*s->value; s++; } } } // A coordinate descent algorithm for // multi-class support vector machines by Crammer and Singer // // min_{\alpha} 0.5 \sum_m ||w_m(\alpha)||^2 + \sum_i \sum_m e^m_i alpha^m_i // s.t. \alpha^m_i <= C^m_i \forall m,i , \sum_m \alpha^m_i=0 \forall i // // where e^m_i = 0 if y_i = m, // e^m_i = 1 if y_i != m, // C^m_i = C if m = y_i, // C^m_i = 0 if m != y_i, // and w_m(\alpha) = \sum_i \alpha^m_i x_i // // Given: // x, y, C // eps is the stopping tolerance // // solution will be put in w class Solver_MCSVM_CS { public: Solver_MCSVM_CS(const problem *prob, int nr_class, double *C, double eps=0.1, int max_iter=100000); ~Solver_MCSVM_CS(); int Solve(double *w, int verbose); private: void solve_sub_problem(double A_i, int yi, double C_yi, int active_i, double *alpha_new); bool be_shrunken(int m, int yi, double alpha_i, double minG); double *B, *C, *G; int n, l; int nr_class; int max_iter; double eps; const problem *prob; }; Solver_MCSVM_CS::Solver_MCSVM_CS(const problem *prob, int nr_class, double *C, double eps, int max_iter) { this->n = prob->n; this->l = prob->l; this->nr_class = nr_class; this->eps = eps; this->max_iter = max_iter; this->prob = prob; this->C = C; this->B = new double[nr_class]; this->G = new double[nr_class]; } Solver_MCSVM_CS::~Solver_MCSVM_CS() { delete[] B; delete[] G; } int compare_double(const void *a, const void *b) { if(*(double *)a > *(double *)b) return -1; if(*(double *)a < *(double *)b) return 1; return 0; } void Solver_MCSVM_CS::solve_sub_problem(double A_i, int yi, double C_yi, int active_i, double *alpha_new) { int r; double *D; clone(D, B, active_i); if(yi < active_i) D[yi] += A_i*C_yi; qsort(D, active_i, sizeof(double), compare_double); double beta = D[0] - A_i*C_yi; for(r=1;rx[i]; QD[i] = 0; while(xi->index != -1) { QD[i] += (xi->value)*(xi->value); xi++; } active_size_i[i] = nr_class; y_index[i] = prob->y[i]; index[i] = i; } setup_handler(); while(iter < max_iter) { if (canceled) break; double stopping = -INF; for(i=0;i 0) { for(m=0;mx[i]; while(xi->index!= -1) { double *w_i = &w[(xi->index-1)*nr_class]; for(m=0;mvalue); xi++; } double minG = INF; double maxG = -INF; for(m=0;m maxG) maxG = G[m]; } if(y_index[i] < active_size_i[i]) if(alpha_i[prob->y[i]] < C[prob->y[i]] && G[y_index[i]] < minG) minG = G[y_index[i]]; for(m=0;mm) { if(!be_shrunken(active_size_i[i], y_index[i], alpha_i[alpha_index_i[active_size_i[i]]], minG)) { swap(alpha_index_i[m], alpha_index_i[active_size_i[i]]); swap(G[m], G[active_size_i[i]]); if(y_index[i] == active_size_i[i]) y_index[i] = m; else if(y_index[i] == m) y_index[i] = active_size_i[i]; break; } active_size_i[i]--; } } } if(active_size_i[i] <= 1) { active_size--; swap(index[s], index[active_size]); s--; continue; } if(maxG-minG <= 1e-12) continue; else stopping = max(maxG - minG, stopping); for(m=0;my[i]], active_size_i[i], alpha_new); int nz_d = 0; for(m=0;m= 1e-12) { d_ind[nz_d] = alpha_index_i[m]; d_val[nz_d] = d; nz_d++; } } xi = prob->x[i]; while(xi->index != -1) { double *w_i = &w[(xi->index-1)*nr_class]; for(m=0;mvalue; xi++; } } } iter++; if((iter % 10 == 0 )&& verbose) { info("."); info_flush(); } if(stopping < eps_shrink) { if(stopping < eps && start_from_all == true) break; else { active_size = l; for(i=0;i= max_iter) info("Warning: reaching max number of iterations\n"); } // calculate objective value double v = 0; int nSV = 0; for(i=0;i 0) nSV++; } for(i=0;iy[i]]; if (verbose) { info("Objective value = %lf\n",v); info("nSV = %d\n",nSV); } delete [] alpha; delete [] alpha_new; delete [] index; delete [] QD; delete [] d_ind; delete [] d_val; delete [] alpha_index; delete [] y_index; delete [] active_size_i; return canceled; } // A coordinate descent algorithm for // L1-loss and L2-loss SVM dual problems // // min_\alpha 0.5(\alpha^T (Q + D)\alpha) - e^T \alpha, // s.t. 0 <= alpha_i <= upper_bound_i, // // where Qij = yi yj xi^T xj and // D is a diagonal matrix // // In L1-SVM case: // upper_bound_i = Cp if y_i = 1 // upper_bound_i = Cn if y_i = -1 // D_ii = 0 // In L2-Svm case: // upper_bound_i = INF // D_ii = 1/(2*Cp) if y_i = 1 // D_ii = 1/(2*Cn) if y_i = -1 // // Given: // x, y, Cp, Cn // eps is the stopping tolerance // // solution will be put in w static int solve_linear_c_svc( const problem *prob, double *w, double eps, double Cp, double Cn, int solver_type, int verbose) { int l = prob->l; int n = prob->n; int i, s, iter = 0; double C, d, G; double *QD = new double[l]; int max_iter = 20000; int *index = new int[l]; double *alpha = new double[l]; schar *y = new schar[l]; int active_size = l; // PG: projected gradient, for shrinking and stopping double PG; double PGmax_old = INF; double PGmin_old = -INF; double PGmax_new, PGmin_new; // default solver_type: L2LOSS_SVM_DUAL double diag_p = 0.5/Cp, diag_n = 0.5/Cn; double upper_bound_p = INF, upper_bound_n = INF; if(solver_type == L1LOSS_SVM_DUAL) { diag_p = 0; diag_n = 0; upper_bound_p = Cp; upper_bound_n = Cn; } for(i=0; iy[i] > 0) { y[i] = +1; QD[i] = diag_p; } else { y[i] = -1; QD[i] = diag_n; } feature_node *xi = prob->x[i]; while (xi->index != -1) { QD[i] += (xi->value)*(xi->value); xi++; } index[i] = i; } setup_handler(); while (iter < max_iter) { if (canceled) break; PGmax_new = -INF; PGmin_new = INF; for (i=0; ix[i]; while(xi->index!= -1) { G += w[xi->index-1]*(xi->value); xi++; } G = G*yi-1; if(yi == 1) { C = upper_bound_p; G += alpha[i]*diag_p; } else { C = upper_bound_n; G += alpha[i]*diag_n; } PG = 0; if (alpha[i] == 0) { if (G > PGmax_old) { active_size--; swap(index[s], index[active_size]); s--; continue; } else if (G < 0) PG = G; } else if (alpha[i] == C) { if (G < PGmin_old) { active_size--; swap(index[s], index[active_size]); s--; continue; } else if (G > 0) PG = G; } else PG = G; PGmax_new = max(PGmax_new, PG); PGmin_new = min(PGmin_new, PG); if(fabs(PG) > 1.0e-12) { double alpha_old = alpha[i]; alpha[i] = min(max(alpha[i] - G/QD[i], 0.0), C); d = (alpha[i] - alpha_old)*yi; xi = prob->x[i]; while (xi->index != -1) { w[xi->index-1] += d*xi->value; xi++; } } } iter++; if(iter % 10 == 0 && verbose) { info("."); info_flush(); } if(PGmax_new - PGmin_new <= eps) { if(active_size == l) break; else { active_size = l; if (verbose) { info("*"); info_flush(); } PGmax_old = INF; PGmin_old = -INF; continue; } } PGmax_old = PGmax_new; PGmin_old = PGmin_new; if (PGmax_old <= 0) PGmax_old = INF; if (PGmin_old >= 0) PGmin_old = -INF; } if (verbose) { if (canceled) info("\nCANCELED"); info("\noptimization finished, #iter = %d\n",iter); if (iter >= max_iter) info("Warning: reaching max number of iterations\n"); } // calculate objective value double v = 0; int nSV = 0; for(i=0; i 0) ++nSV; } if (verbose) { info("Objective value = %lf\n",v/2); info("nSV = %d\n",nSV); } delete [] QD; delete [] alpha; delete [] y; delete [] index; return canceled; } // label: label name, start: begin of each class, count: #data of classes, perm: indices to the original data // perm, length l, must be allocated before calling this subroutine void group_classes(const problem *prob, int *nr_class_ret, int **label_ret, int **start_ret, int **count_ret, int *perm) { int l = prob->l; int max_nr_class = 16; int nr_class = 0; int *label = Malloc(int,max_nr_class); int *count = Malloc(int,max_nr_class); int *data_label = Malloc(int,l); int i; for(i=0;iy[i]; int j; for(j=0;jeps; int pos = 0; int neg = 0; for(int i=0;il;i++) if(prob->y[i]==+1) pos++; neg = prob->l - pos; function *fun_obj=NULL; switch(param->solver_type) { case L2_LR: { fun_obj=new l2_lr_fun(prob, Cp, Cn); TRON tron_obj(fun_obj, eps*min(pos,neg)/prob->l, 1000, verbose); tron_obj.tron(w); delete fun_obj; break; } case L2LOSS_SVM: { fun_obj=new l2loss_svm_fun(prob, Cp, Cn); TRON tron_obj(fun_obj, eps*min(pos,neg)/prob->l, 1000, verbose); tron_obj.tron(w); delete fun_obj; break; } case L2LOSS_SVM_DUAL: solve_linear_c_svc(prob, w, eps, Cp, Cn, L2LOSS_SVM_DUAL, verbose); break; case L1LOSS_SVM_DUAL: solve_linear_c_svc(prob, w, eps, Cp, Cn, L1LOSS_SVM_DUAL, verbose); break; default: fprintf(stderr, "Error: unknown solver_type\n"); break; } } // // Interface functions // model* train(const problem *prob, const parameter *param, int verbose) { int i,j; int l = prob->l; int n = prob->n; model *model_ = Malloc(model,1); if(prob->bias>=0) model_->nr_feature=n-1; else model_->nr_feature=n; model_->param = *param; model_->bias = prob->bias; int nr_class; int *label = NULL; int *start = NULL; int *count = NULL; int *perm = Malloc(int,l); // group training data of the same class group_classes(prob,&nr_class,&label,&start,&count,perm); model_->nr_class=nr_class; model_->label = Malloc(int,nr_class); for(i=0;ilabel[i] = label[i]; // calculate weighted C double *weighted_C = Malloc(double, nr_class); for(i=0;iC; for(i=0;inr_weight;i++) { for(j=0;jweight_label[i] == label[j]) break; if(j == nr_class) fprintf(stderr,"warning: class label %d specified in weight is not found\n", param->weight_label[i]); else weighted_C[j] *= param->weight[i]; } // constructing the subproblem feature_node **x = Malloc(feature_node *,l); for(i=0;ix[perm[i]]; int k; problem sub_prob; sub_prob.l = l; sub_prob.n = n; sub_prob.x = Malloc(feature_node *,sub_prob.l); sub_prob.y = Malloc(int,sub_prob.l); for(k=0; ksolver_type == MCSVM_CS) { model_->w=Malloc(double, n*nr_class); for(i=0;ieps); Solver.Solve(model_->w, verbose); } else { if(nr_class == 2) { model_->w=Malloc(double, n); int e0 = start[0]+count[0]; k=0; for(; kw[0], weighted_C[0], weighted_C[1], verbose); } else { model_->w=Malloc(double, n*nr_class); double *w=Malloc(double, n); for(i=0;iC, verbose); for(int j=0;jw[j*nr_class+i] = w[j]; } free(w); } } free(x); free(label); free(start); free(count); free(perm); free(sub_prob.x); free(sub_prob.y); free(weighted_C); return model_; } void destroy_model(struct model *model_) { if(model_->w != NULL) free(model_->w); if(model_->label != NULL) free(model_->label); free(model_); } const char *solver_type_table[]= { "L2_LR", "L2LOSS_SVM_DUAL", "L2LOSS_SVM","L1LOSS_SVM_DUAL","MCSVM_CS", NULL }; int to_stream( std::ostream &os, const struct model *model_) { int i; int nr_feature=model_->nr_feature; int n; const parameter& param = model_->param; if(model_->bias>=0) n=nr_feature+1; else n=nr_feature; int nr_w; if(model_->nr_class==2 && model_->param.solver_type != MCSVM_CS) nr_w=1; else nr_w=model_->nr_class; os << "solver_type " << solver_type_table[param.solver_type] << std::endl; os << "nr_class " << model_->nr_class << std::endl; os << "label "; for(i=0; inr_class; i++) os << model_->label[i] << " "; os << std::endl; os << "nr_feature " << nr_feature << std::endl; os << std::setprecision(16); os << "bias " << model_->bias << std::endl; os << "w " << std::endl; for(i=0; iw[i*nr_w+j]<<" "; os << std::endl; } os << std::endl ; return os.fail()==0; } char * to_string(const struct model *model_) { std::ostringstream ss; if (to_stream(ss, model_)==0) return NULL; std::string res = ss.str(); int len = res.size(); char * rv = (char *) malloc((len+1)*sizeof(char)); strncpy(rv, res.c_str(), len+1); return rv; } int save_model(const char *model_file_name, const struct model *model_) { std::ofstream os(model_file_name); return to_stream(os, model_); } struct model *from_stream(std::istream &is) { int i; int nr_feature; int n; int nr_class; double bias; model *model_ = Malloc(model,1); parameter& param = model_->param; model_->label = NULL; char buff[81]; char cmd[81]; while(1) { is.getline(buff, 80); std::stringstream line(buff); line >> cmd; if(strcmp(cmd,"solver_type")==0) { line >> cmd; int i; for(i=0;solver_type_table[i];i++) { if(strcmp(solver_type_table[i],cmd)==0) { param.solver_type=i; break; } } if(solver_type_table[i] == NULL) { fprintf(stderr,"unknown solver type.\n"); free(model_->label); free(model_); return NULL; } } else if(strcmp(cmd,"nr_class")==0) { line >> nr_class; model_->nr_class=nr_class; } else if(strcmp(cmd,"nr_feature")==0) { line >> nr_feature; model_->nr_feature=nr_feature; } else if(strcmp(cmd,"bias")==0) { line >> bias; model_->bias=bias; } else if(strcmp(cmd,"w")==0) { break; } else if(strcmp(cmd,"label")==0) { int nr_class = model_->nr_class; model_->label = Malloc(int,nr_class); for(int i=0;i> model_->label[i]; } else { fprintf(stderr,"unknown text in model file: [%s]\n",cmd); free(model_); return NULL; } } nr_feature=model_->nr_feature; if(model_->bias>=0) n=nr_feature+1; else n=nr_feature; int nr_w; if(nr_class==2 && param.solver_type != MCSVM_CS) nr_w = 1; else nr_w = nr_class; model_->w=Malloc(double, n*nr_w); for(i=0; i> model_->w[i*nr_w+j]; } if (is.fail()) return NULL; return model_; } struct model *from_string(char *inp) { std::stringstream ss(inp); return from_stream(ss); } struct model *load_model(const char *model_file_name) { std::ifstream is(model_file_name); return from_stream(is); } int predict_values(const struct model *model_, const struct feature_node *x, double *dec_values) { int idx; int n; if(model_->bias>=0) n=model_->nr_feature+1; else n=model_->nr_feature; double *w=model_->w; int nr_class=model_->nr_class; int i; int nr_w; if(nr_class==2 && model_->param.solver_type != MCSVM_CS) nr_w = 1; else nr_w = nr_class; const feature_node *lx=x; for(i=0;iindex)!=-1; lx++) { // the dimension of testing data may exceed that of training if(idx<=n) for(i=0;ivalue; } if(nr_class==2) return (dec_values[0]>0)?model_->label[0]:model_->label[1]; else { int dec_max_idx = 0; for(i=1;i dec_values[dec_max_idx]) dec_max_idx = i; } return model_->label[dec_max_idx]; } } int predict(const model *model_, const feature_node *x) { double *dec_values = Malloc(double, model_->nr_class); int label=predict_values(model_, x, dec_values); free(dec_values); return label; } int predict_probability(const struct model *model_, const struct feature_node *x, double* prob_estimates) { if(model_->param.solver_type==L2_LR) { int i; int nr_class=model_->nr_class; int nr_w; if(nr_class==2) nr_w = 1; else nr_w = nr_class; int label=predict_values(model_, x, prob_estimates); for(i=0;iweight_label != NULL) free(param->weight_label); if(param->weight != NULL) free(param->weight); } const char *check_parameter(const problem *prob, const parameter *param) { if(param->eps <= 0) return "eps <= 0"; if(param->C <= 0) return "C <= 0"; if(param->solver_type != L2_LR && param->solver_type != L2LOSS_SVM_DUAL && param->solver_type != L2LOSS_SVM && param->solver_type != L1LOSS_SVM_DUAL && param->solver_type != MCSVM_CS) return "unknown solver type"; // if(param->solver_type == L1_LR) // return "sorry! sover_type = 1 (L1_LR) is not supported yet"; return NULL; } void cross_validation(const problem *prob, const parameter *param, int nr_fold, int *target, int verbose) { int i; int *fold_start = Malloc(int,nr_fold+1); int l = prob->l; int *perm = Malloc(int,l); for(i=0;ibias; subprob.n = prob->n; subprob.l = l-(end-begin); subprob.x = Malloc(struct feature_node*,subprob.l); subprob.y = Malloc(int,subprob.l); k=0; for(j=0;jx[perm[j]]; subprob.y[k] = prob->y[perm[j]]; ++k; } for(j=end;jx[perm[j]]; subprob.y[k] = prob->y[perm[j]]; ++k; } struct model *submodel = train(&subprob,param, verbose); for(j=begin;jx[perm[j]]); destroy_model(submodel); free(subprob.x); free(subprob.y); } if (verbose && canceled) info("CANCELED"); free(fold_start); free(perm); } int get_nr_feature(const model *model_) { return model_->nr_feature; } int get_nr_class(const model *model_) { return model_->nr_class; } void get_labels(const model *model_, int* label) { if (model_->label != NULL) for(int i=0;inr_class;i++) label[i] = model_->label[i]; }