SHOGUN  v1.1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
LibLinear.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2007-2010 Soeren Sonnenburg
8  * Copyright (c) 2007-2009 The LIBLINEAR Project.
9  * Copyright (C) 2007-2010 Fraunhofer Institute FIRST and Max-Planck-Society
10  */
11 #include <shogun/lib/config.h>
12 
13 #ifdef HAVE_LAPACK
14 #include <shogun/io/SGIO.h>
15 #include <shogun/lib/Signal.h>
16 #include <shogun/lib/Time.h>
17 #include <shogun/base/Parameter.h>
22 
23 using namespace shogun;
24 
27 {
28  init();
29 }
30 
33 {
34  init();
36 }
37 
39  float64_t C, CDotFeatures* traindat, CLabels* trainlab)
41 {
42  init();
43  C1=C;
44  C2=C;
45  use_bias=true;
46 
47  set_features(traindat);
48  set_labels(trainlab);
50 }
51 
52 void CLibLinear::init()
53 {
55  use_bias=false;
56  C1=1;
57  C2=1;
59  epsilon=1e-5;
60 
61  SG_ADD(&C1, "C1", "C Cost constant 1.", MS_AVAILABLE);
62  SG_ADD(&C2, "C2", "C Cost constant 2.", MS_AVAILABLE);
63  SG_ADD(&use_bias, "use_bias", "Indicates if bias is used.",
65  SG_ADD(&epsilon, "epsilon", "Convergence precision.", MS_NOT_AVAILABLE);
66  SG_ADD(&max_iterations, "max_iterations", "Max number of iterations.",
68  SG_ADD(&m_linear_term, "linear_term", "Linear Term", MS_NOT_AVAILABLE);
69  SG_ADD((machine_int_t*) &liblinear_solver_type, "liblinear_solver_type",
70  "Type of LibLinear solver.", MS_NOT_AVAILABLE);
71 }
72 
74 {
76 }
77 
79 {
81  ASSERT(labels);
82 
83  if (data)
84  {
85  if (!data->has_property(FP_DOT))
86  SG_ERROR("Specified features are not of type CDotFeatures\n");
87 
88  set_features((CDotFeatures*) data);
89  }
92 
93 
94  int32_t num_train_labels=labels->get_num_labels();
95  int32_t num_feat=features->get_dim_feature_space();
96  int32_t num_vec=features->get_num_vectors();
97 
100  {
101  if (num_feat!=num_train_labels)
102  {
103  SG_ERROR("L1 methods require the data to be transposed: "
104  "number of features %d does not match number of "
105  "training labels %d\n",
106  num_feat, num_train_labels);
107  }
108  CMath::swap(num_feat, num_vec);
109 
110  }
111  else
112  {
113  if (num_vec!=num_train_labels)
114  {
115  SG_ERROR("number of vectors %d does not match "
116  "number of training labels %d\n",
117  num_vec, num_train_labels);
118  }
119  }
120  SG_FREE(w);
121  if (use_bias)
122  w=SG_MALLOC(float64_t, num_feat+1);
123  else
124  w=SG_MALLOC(float64_t, num_feat+0);
125  w_dim=num_feat;
126 
127  problem prob;
128  if (use_bias)
129  {
130  prob.n=w_dim+1;
131  memset(w, 0, sizeof(float64_t)*(w_dim+1));
132  }
133  else
134  {
135  prob.n=w_dim;
136  memset(w, 0, sizeof(float64_t)*(w_dim+0));
137  }
138  prob.l=num_vec;
139  prob.x=features;
140  prob.y=SG_MALLOC(int, prob.l);
141  prob.use_bias=use_bias;
142 
143  for (int32_t i=0; i<prob.l; i++)
144  prob.y[i]=labels->get_int_label(i);
145 
146  int pos = 0;
147  int neg = 0;
148  for(int i=0;i<prob.l;i++)
149  {
150  if(prob.y[i]==+1)
151  pos++;
152  }
153  neg = prob.l - pos;
154 
155  SG_INFO("%d training points %d dims\n", prob.l, prob.n);
156 
157  function *fun_obj=NULL;
158  double Cp=C1;
159  double Cn=C2;
160  switch (liblinear_solver_type)
161  {
162  case L2R_LR:
163  {
164  fun_obj=new l2r_lr_fun(&prob, Cp, Cn);
165  CTron tron_obj(fun_obj, epsilon*CMath::min(pos,neg)/prob.l, max_iterations);
166  SG_DEBUG("starting L2R_LR training via tron\n");
167  tron_obj.tron(w, max_train_time);
168  SG_DEBUG("done with tron\n");
169  delete fun_obj;
170  break;
171  }
172  case L2R_L2LOSS_SVC:
173  {
174  fun_obj=new l2r_l2_svc_fun(&prob, Cp, Cn);
175  CTron tron_obj(fun_obj, epsilon*CMath::min(pos,neg)/prob.l, max_iterations);
176  tron_obj.tron(w, max_train_time);
177  delete fun_obj;
178  break;
179  }
180  case L2R_L2LOSS_SVC_DUAL:
181  solve_l2r_l1l2_svc(&prob, epsilon, Cp, Cn, L2R_L2LOSS_SVC_DUAL);
182  break;
183  case L2R_L1LOSS_SVC_DUAL:
184  solve_l2r_l1l2_svc(&prob, epsilon, Cp, Cn, L2R_L1LOSS_SVC_DUAL);
185  break;
186  case L1R_L2LOSS_SVC:
187  {
188  //ASSUME FEATURES ARE TRANSPOSED ALREADY
189  solve_l1r_l2_svc(&prob, epsilon*CMath::min(pos,neg)/prob.l, Cp, Cn);
190  break;
191  }
192  case L1R_LR:
193  {
194  //ASSUME FEATURES ARE TRANSPOSED ALREADY
195  solve_l1r_lr(&prob, epsilon*CMath::min(pos,neg)/prob.l, Cp, Cn);
196  break;
197  }
198  case MCSVM_CS:
199  {
201  /* TODO...
202  model_->w=Malloc(double, n*nr_class);
203  for(i=0;i<nr_class;i++)
204  for(j=start[i];j<start[i]+count[i];j++)
205  sub_prob.y[j] = i;
206  Solver_MCSVM_CS Solver(&sub_prob, nr_class, weighted_C, param->eps);
207  Solver.Solve(model_->w);
208  */
209  }
210  default:
211  SG_ERROR("Error: unknown solver_type\n");
212  break;
213  }
214 
215  if (use_bias)
216  set_bias(w[w_dim]);
217  else
218  set_bias(0);
219 
220  SG_FREE(prob.y);
221 
222  return true;
223 }
224 
225 // A coordinate descent algorithm for
226 // L1-loss and L2-loss SVM dual problems
227 //
228 // min_\alpha 0.5(\alpha^T (Q + D)\alpha) - e^T \alpha,
229 // s.t. 0 <= alpha_i <= upper_bound_i,
230 //
231 // where Qij = yi yj xi^T xj and
232 // D is a diagonal matrix
233 //
234 // In L1-SVM case:
235 // upper_bound_i = Cp if y_i = 1
236 // upper_bound_i = Cn if y_i = -1
237 // D_ii = 0
238 // In L2-SVM case:
239 // upper_bound_i = INF
240 // D_ii = 1/(2*Cp) if y_i = 1
241 // D_ii = 1/(2*Cn) if y_i = -1
242 //
243 // Given:
244 // x, y, Cp, Cn
245 // eps is the stopping tolerance
246 //
247 // solution will be put in w
248 
249 #undef GETI
250 #define GETI(i) (y[i]+1)
251 // To support weights for instances, use GETI(i) (i)
252 
253 void CLibLinear::solve_l2r_l1l2_svc(
254  const problem *prob, double eps, double Cp, double Cn, LIBLINEAR_SOLVER_TYPE st)
255 {
256  int l = prob->l;
257  int w_size = prob->n;
258  int i, s, iter = 0;
259  double C, d, G;
260  double *QD = SG_MALLOC(double, l);
261  int *index = SG_MALLOC(int, l);
262  double *alpha = SG_MALLOC(double, l);
263  int32_t *y = SG_MALLOC(int32_t, l);
264  int active_size = l;
265 
266  // PG: projected gradient, for shrinking and stopping
267  double PG;
268  double PGmax_old = CMath::INFTY;
269  double PGmin_old = -CMath::INFTY;
270  double PGmax_new, PGmin_new;
271 
272  // default solver_type: L2R_L2LOSS_SVC_DUAL
273  double diag[3] = {0.5/Cn, 0, 0.5/Cp};
274  double upper_bound[3] = {CMath::INFTY, 0, CMath::INFTY};
275  if(st == L2R_L1LOSS_SVC_DUAL)
276  {
277  diag[0] = 0;
278  diag[2] = 0;
279  upper_bound[0] = Cn;
280  upper_bound[2] = Cp;
281  }
282 
283  int n = prob->n;
284 
285  if (prob->use_bias)
286  n--;
287 
288  for(i=0; i<w_size; i++)
289  w[i] = 0;
290 
291  for(i=0; i<l; i++)
292  {
293  alpha[i] = 0;
294  if(prob->y[i] > 0)
295  {
296  y[i] = +1;
297  }
298  else
299  {
300  y[i] = -1;
301  }
302  QD[i] = diag[GETI(i)];
303 
304  QD[i] += prob->x->dot(i, prob->x,i);
305  index[i] = i;
306  }
307 
308 
309  CTime start_time;
310  while (iter < max_iterations && !CSignal::cancel_computations())
311  {
312  if (max_train_time > 0 && start_time.cur_time_diff() > max_train_time)
313  break;
314 
315  PGmax_new = -CMath::INFTY;
316  PGmin_new = CMath::INFTY;
317 
318  for (i=0; i<active_size; i++)
319  {
320  int j = i+rand()%(active_size-i);
321  CMath::swap(index[i], index[j]);
322  }
323 
324  for (s=0;s<active_size;s++)
325  {
326  i = index[s];
327  int32_t yi = y[i];
328 
329  G = prob->x->dense_dot(i, w, n);
330  if (prob->use_bias)
331  G+=w[n];
332 
333  if (m_linear_term.vector)
334  G = G*yi + m_linear_term.vector[i];
335  else
336  G = G*yi-1;
337 
338  C = upper_bound[GETI(i)];
339  G += alpha[i]*diag[GETI(i)];
340 
341  PG = 0;
342  if (alpha[i] == 0)
343  {
344  if (G > PGmax_old)
345  {
346  active_size--;
347  CMath::swap(index[s], index[active_size]);
348  s--;
349  continue;
350  }
351  else if (G < 0)
352  PG = G;
353  }
354  else if (alpha[i] == C)
355  {
356  if (G < PGmin_old)
357  {
358  active_size--;
359  CMath::swap(index[s], index[active_size]);
360  s--;
361  continue;
362  }
363  else if (G > 0)
364  PG = G;
365  }
366  else
367  PG = G;
368 
369  PGmax_new = CMath::max(PGmax_new, PG);
370  PGmin_new = CMath::min(PGmin_new, PG);
371 
372  if(fabs(PG) > 1.0e-12)
373  {
374  double alpha_old = alpha[i];
375  alpha[i] = CMath::min(CMath::max(alpha[i] - G/QD[i], 0.0), C);
376  d = (alpha[i] - alpha_old)*yi;
377 
378  prob->x->add_to_dense_vec(d, i, w, n);
379 
380  if (prob->use_bias)
381  w[n]+=d;
382  }
383  }
384 
385  iter++;
386  float64_t gap=PGmax_new - PGmin_new;
387  SG_SABS_PROGRESS(gap, -CMath::log10(gap), -CMath::log10(1), -CMath::log10(eps), 6);
388 
389  if(gap <= eps)
390  {
391  if(active_size == l)
392  break;
393  else
394  {
395  active_size = l;
396  PGmax_old = CMath::INFTY;
397  PGmin_old = -CMath::INFTY;
398  continue;
399  }
400  }
401  PGmax_old = PGmax_new;
402  PGmin_old = PGmin_new;
403  if (PGmax_old <= 0)
404  PGmax_old = CMath::INFTY;
405  if (PGmin_old >= 0)
406  PGmin_old = -CMath::INFTY;
407  }
408 
409  SG_DONE();
410  SG_INFO("optimization finished, #iter = %d\n",iter);
411  if (iter >= max_iterations)
412  {
413  SG_WARNING("reaching max number of iterations\nUsing -s 2 may be faster"
414  "(also see liblinear FAQ)\n\n");
415  }
416 
417  // calculate objective value
418 
419  double v = 0;
420  int nSV = 0;
421  for(i=0; i<w_size; i++)
422  v += w[i]*w[i];
423  for(i=0; i<l; i++)
424  {
425  v += alpha[i]*(alpha[i]*diag[GETI(i)] - 2);
426  if(alpha[i] > 0)
427  ++nSV;
428  }
429  SG_INFO("Objective value = %lf\n",v/2);
430  SG_INFO("nSV = %d\n",nSV);
431 
432  delete [] QD;
433  delete [] alpha;
434  delete [] y;
435  delete [] index;
436 }
437 
438 // A coordinate descent algorithm for
439 // L1-regularized L2-loss support vector classification
440 //
441 // min_w \sum |wj| + C \sum max(0, 1-yi w^T xi)^2,
442 //
443 // Given:
444 // x, y, Cp, Cn
445 // eps is the stopping tolerance
446 //
447 // solution will be put in w
448 
449 #undef GETI
450 #define GETI(i) (y[i]+1)
451 // To support weights for instances, use GETI(i) (i)
452 
453 void CLibLinear::solve_l1r_l2_svc(
454  problem *prob_col, double eps, double Cp, double Cn)
455 {
456  int l = prob_col->l;
457  int w_size = prob_col->n;
458  int j, s, iter = 0;
459  int active_size = w_size;
460  int max_num_linesearch = 20;
461 
462  double sigma = 0.01;
463  double d, G_loss, G, H;
464  double Gmax_old = CMath::INFTY;
465  double Gmax_new;
466  double Gmax_init=0;
467  double d_old, d_diff;
468  double loss_old=0, loss_new;
469  double appxcond, cond;
470 
471  int *index = SG_MALLOC(int, w_size);
472  int32_t *y = SG_MALLOC(int32_t, l);
473  double *b = SG_MALLOC(double, l); // b = 1-ywTx
474  double *xj_sq = SG_MALLOC(double, w_size);
475 
476  CDotFeatures* x = (CDotFeatures*) prob_col->x;
477  void* iterator;
478  int32_t ind;
479  float64_t val;
480 
481  double C[3] = {Cn,0,Cp};
482 
483  int n = prob_col->n;
484  if (prob_col->use_bias)
485  n--;
486 
487  for(j=0; j<l; j++)
488  {
489  b[j] = 1;
490  if(prob_col->y[j] > 0)
491  y[j] = 1;
492  else
493  y[j] = -1;
494  }
495 
496  for(j=0; j<w_size; j++)
497  {
498  w[j] = 0;
499  index[j] = j;
500  xj_sq[j] = 0;
501 
502  if (use_bias && j==n)
503  {
504  for (ind=0; ind<l; ind++)
505  xj_sq[n] += C[GETI(ind)];
506  }
507  else
508  {
509  iterator=x->get_feature_iterator(j);
510  while (x->get_next_feature(ind, val, iterator))
511  xj_sq[j] += C[GETI(ind)]*val*val;
512  x->free_feature_iterator(iterator);
513  }
514  }
515 
516 
517  CTime start_time;
518  while (iter < max_iterations && !CSignal::cancel_computations())
519  {
520  if (max_train_time > 0 && start_time.cur_time_diff() > max_train_time)
521  break;
522 
523  Gmax_new = 0;
524 
525  for(j=0; j<active_size; j++)
526  {
527  int i = j+rand()%(active_size-j);
528  CMath::swap(index[i], index[j]);
529  }
530 
531  for(s=0; s<active_size; s++)
532  {
533  j = index[s];
534  G_loss = 0;
535  H = 0;
536 
537  if (use_bias && j==n)
538  {
539  for (ind=0; ind<l; ind++)
540  {
541  if(b[ind] > 0)
542  {
543  double tmp = C[GETI(ind)]*y[ind];
544  G_loss -= tmp*b[ind];
545  H += tmp*y[ind];
546  }
547  }
548  }
549  else
550  {
551  iterator=x->get_feature_iterator(j);
552 
553  while (x->get_next_feature(ind, val, iterator))
554  {
555  if(b[ind] > 0)
556  {
557  double tmp = C[GETI(ind)]*val*y[ind];
558  G_loss -= tmp*b[ind];
559  H += tmp*val*y[ind];
560  }
561  }
562  x->free_feature_iterator(iterator);
563  }
564 
565  G_loss *= 2;
566 
567  G = G_loss;
568  H *= 2;
569  H = CMath::max(H, 1e-12);
570 
571  double Gp = G+1;
572  double Gn = G-1;
573  double violation = 0;
574  if(w[j] == 0)
575  {
576  if(Gp < 0)
577  violation = -Gp;
578  else if(Gn > 0)
579  violation = Gn;
580  else if(Gp>Gmax_old/l && Gn<-Gmax_old/l)
581  {
582  active_size--;
583  CMath::swap(index[s], index[active_size]);
584  s--;
585  continue;
586  }
587  }
588  else if(w[j] > 0)
589  violation = fabs(Gp);
590  else
591  violation = fabs(Gn);
592 
593  Gmax_new = CMath::max(Gmax_new, violation);
594 
595  // obtain Newton direction d
596  if(Gp <= H*w[j])
597  d = -Gp/H;
598  else if(Gn >= H*w[j])
599  d = -Gn/H;
600  else
601  d = -w[j];
602 
603  if(fabs(d) < 1.0e-12)
604  continue;
605 
606  double delta = fabs(w[j]+d)-fabs(w[j]) + G*d;
607  d_old = 0;
608  int num_linesearch;
609  for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++)
610  {
611  d_diff = d_old - d;
612  cond = fabs(w[j]+d)-fabs(w[j]) - sigma*delta;
613 
614  appxcond = xj_sq[j]*d*d + G_loss*d + cond;
615  if(appxcond <= 0)
616  {
617  if (use_bias && j==n)
618  {
619  for (ind=0; ind<l; ind++)
620  b[ind] += d_diff*y[ind];
621  break;
622  }
623  else
624  {
625  iterator=x->get_feature_iterator(j);
626  while (x->get_next_feature(ind, val, iterator))
627  b[ind] += d_diff*val*y[ind];
628 
629  x->free_feature_iterator(iterator);
630  break;
631  }
632  }
633 
634  if(num_linesearch == 0)
635  {
636  loss_old = 0;
637  loss_new = 0;
638 
639  if (use_bias && j==n)
640  {
641  for (ind=0; ind<l; ind++)
642  {
643  if(b[ind] > 0)
644  loss_old += C[GETI(ind)]*b[ind]*b[ind];
645  double b_new = b[ind] + d_diff*y[ind];
646  b[ind] = b_new;
647  if(b_new > 0)
648  loss_new += C[GETI(ind)]*b_new*b_new;
649  }
650  }
651  else
652  {
653  iterator=x->get_feature_iterator(j);
654  while (x->get_next_feature(ind, val, iterator))
655  {
656  if(b[ind] > 0)
657  loss_old += C[GETI(ind)]*b[ind]*b[ind];
658  double b_new = b[ind] + d_diff*val*y[ind];
659  b[ind] = b_new;
660  if(b_new > 0)
661  loss_new += C[GETI(ind)]*b_new*b_new;
662  }
663  x->free_feature_iterator(iterator);
664  }
665  }
666  else
667  {
668  loss_new = 0;
669  if (use_bias && j==n)
670  {
671  for (ind=0; ind<l; ind++)
672  {
673  double b_new = b[ind] + d_diff*y[ind];
674  b[ind] = b_new;
675  if(b_new > 0)
676  loss_new += C[GETI(ind)]*b_new*b_new;
677  }
678  }
679  else
680  {
681  iterator=x->get_feature_iterator(j);
682  while (x->get_next_feature(ind, val, iterator))
683  {
684  double b_new = b[ind] + d_diff*val*y[ind];
685  b[ind] = b_new;
686  if(b_new > 0)
687  loss_new += C[GETI(ind)]*b_new*b_new;
688  }
689  x->free_feature_iterator(iterator);
690  }
691  }
692 
693  cond = cond + loss_new - loss_old;
694  if(cond <= 0)
695  break;
696  else
697  {
698  d_old = d;
699  d *= 0.5;
700  delta *= 0.5;
701  }
702  }
703 
704  w[j] += d;
705 
706  // recompute b[] if line search takes too many steps
707  if(num_linesearch >= max_num_linesearch)
708  {
709  SG_INFO("#");
710  for(int i=0; i<l; i++)
711  b[i] = 1;
712 
713  for(int i=0; i<n; i++)
714  {
715  if(w[i]==0)
716  continue;
717 
718  iterator=x->get_feature_iterator(i);
719  while (x->get_next_feature(ind, val, iterator))
720  b[ind] -= w[i]*val*y[ind];
721  x->free_feature_iterator(iterator);
722  }
723 
724  if (use_bias && w[n])
725  {
726  for (ind=0; ind<l; ind++)
727  b[ind] -= w[n]*y[ind];
728  }
729  }
730  }
731 
732  if(iter == 0)
733  Gmax_init = Gmax_new;
734  iter++;
735 
736  SG_SABS_PROGRESS(Gmax_new, -CMath::log10(Gmax_new), -CMath::log10(Gmax_init), -CMath::log10(eps*Gmax_init), 6);
737 
738  if(Gmax_new <= eps*Gmax_init)
739  {
740  if(active_size == w_size)
741  break;
742  else
743  {
744  active_size = w_size;
745  Gmax_old = CMath::INFTY;
746  continue;
747  }
748  }
749 
750  Gmax_old = Gmax_new;
751  }
752 
753  SG_DONE();
754  SG_INFO("optimization finished, #iter = %d\n", iter);
755  if(iter >= max_iterations)
756  SG_WARNING("\nWARNING: reaching max number of iterations\n");
757 
758  // calculate objective value
759 
760  double v = 0;
761  int nnz = 0;
762  for(j=0; j<w_size; j++)
763  {
764  if(w[j] != 0)
765  {
766  v += fabs(w[j]);
767  nnz++;
768  }
769  }
770  for(j=0; j<l; j++)
771  if(b[j] > 0)
772  v += C[GETI(j)]*b[j]*b[j];
773 
774  SG_INFO("Objective value = %lf\n", v);
775  SG_INFO("#nonzeros/#features = %d/%d\n", nnz, w_size);
776 
777  delete [] index;
778  delete [] y;
779  delete [] b;
780  delete [] xj_sq;
781 }
782 
783 // A coordinate descent algorithm for
784 // L1-regularized logistic regression problems
785 //
786 // min_w \sum |wj| + C \sum log(1+exp(-yi w^T xi)),
787 //
788 // Given:
789 // x, y, Cp, Cn
790 // eps is the stopping tolerance
791 //
792 // solution will be put in w
793 
794 #undef GETI
795 #define GETI(i) (y[i]+1)
796 // To support weights for instances, use GETI(i) (i)
797 
798 void CLibLinear::solve_l1r_lr(
799  const problem *prob_col, double eps,
800  double Cp, double Cn)
801 {
802  int l = prob_col->l;
803  int w_size = prob_col->n;
804  int j, s, iter = 0;
805  int active_size = w_size;
806  int max_num_linesearch = 20;
807 
808  double x_min = 0;
809  double sigma = 0.01;
810  double d, G, H;
811  double Gmax_old = CMath::INFTY;
812  double Gmax_new;
813  double Gmax_init=0;
814  double sum1, appxcond1;
815  double sum2, appxcond2;
816  double cond;
817 
818  int *index = SG_MALLOC(int, w_size);
819  int32_t *y = SG_MALLOC(int32_t, l);
820  double *exp_wTx = SG_MALLOC(double, l);
821  double *exp_wTx_new = SG_MALLOC(double, l);
822  double *xj_max = SG_MALLOC(double, w_size);
823  double *C_sum = SG_MALLOC(double, w_size);
824  double *xjneg_sum = SG_MALLOC(double, w_size);
825  double *xjpos_sum = SG_MALLOC(double, w_size);
826 
827  CDotFeatures* x = prob_col->x;
828  void* iterator;
829  int ind;
830  double val;
831 
832  double C[3] = {Cn,0,Cp};
833 
834  int n = prob_col->n;
835  if (prob_col->use_bias)
836  n--;
837 
838  for(j=0; j<l; j++)
839  {
840  exp_wTx[j] = 1;
841  if(prob_col->y[j] > 0)
842  y[j] = 1;
843  else
844  y[j] = -1;
845  }
846  for(j=0; j<w_size; j++)
847  {
848  w[j] = 0;
849  index[j] = j;
850  xj_max[j] = 0;
851  C_sum[j] = 0;
852  xjneg_sum[j] = 0;
853  xjpos_sum[j] = 0;
854 
855  if (use_bias && j==n)
856  {
857  for (ind=0; ind<l; ind++)
858  {
859  x_min = CMath::min(x_min, 1.0);
860  xj_max[j] = CMath::max(xj_max[j], 1.0);
861  C_sum[j] += C[GETI(ind)];
862  if(y[ind] == -1)
863  xjneg_sum[j] += C[GETI(ind)];
864  else
865  xjpos_sum[j] += C[GETI(ind)];
866  }
867  }
868  else
869  {
870  iterator=x->get_feature_iterator(j);
871  while (x->get_next_feature(ind, val, iterator))
872  {
873  x_min = CMath::min(x_min, val);
874  xj_max[j] = CMath::max(xj_max[j], val);
875  C_sum[j] += C[GETI(ind)];
876  if(y[ind] == -1)
877  xjneg_sum[j] += C[GETI(ind)]*val;
878  else
879  xjpos_sum[j] += C[GETI(ind)]*val;
880  }
881  x->free_feature_iterator(iterator);
882  }
883  }
884 
885  CTime start_time;
886  while (iter < max_iterations && !CSignal::cancel_computations())
887  {
888  if (max_train_time > 0 && start_time.cur_time_diff() > max_train_time)
889  break;
890 
891  Gmax_new = 0;
892 
893  for(j=0; j<active_size; j++)
894  {
895  int i = j+rand()%(active_size-j);
896  CMath::swap(index[i], index[j]);
897  }
898 
899  for(s=0; s<active_size; s++)
900  {
901  j = index[s];
902  sum1 = 0;
903  sum2 = 0;
904  H = 0;
905 
906  if (use_bias && j==n)
907  {
908  for (ind=0; ind<l; ind++)
909  {
910  double exp_wTxind = exp_wTx[ind];
911  double tmp1 = 1.0/(1+exp_wTxind);
912  double tmp2 = C[GETI(ind)]*tmp1;
913  double tmp3 = tmp2*exp_wTxind;
914  sum2 += tmp2;
915  sum1 += tmp3;
916  H += tmp1*tmp3;
917  }
918  }
919  else
920  {
921  iterator=x->get_feature_iterator(j);
922  while (x->get_next_feature(ind, val, iterator))
923  {
924  double exp_wTxind = exp_wTx[ind];
925  double tmp1 = val/(1+exp_wTxind);
926  double tmp2 = C[GETI(ind)]*tmp1;
927  double tmp3 = tmp2*exp_wTxind;
928  sum2 += tmp2;
929  sum1 += tmp3;
930  H += tmp1*tmp3;
931  }
932  x->free_feature_iterator(iterator);
933  }
934 
935  G = -sum2 + xjneg_sum[j];
936 
937  double Gp = G+1;
938  double Gn = G-1;
939  double violation = 0;
940  if(w[j] == 0)
941  {
942  if(Gp < 0)
943  violation = -Gp;
944  else if(Gn > 0)
945  violation = Gn;
946  else if(Gp>Gmax_old/l && Gn<-Gmax_old/l)
947  {
948  active_size--;
949  CMath::swap(index[s], index[active_size]);
950  s--;
951  continue;
952  }
953  }
954  else if(w[j] > 0)
955  violation = fabs(Gp);
956  else
957  violation = fabs(Gn);
958 
959  Gmax_new = CMath::max(Gmax_new, violation);
960 
961  // obtain Newton direction d
962  if(Gp <= H*w[j])
963  d = -Gp/H;
964  else if(Gn >= H*w[j])
965  d = -Gn/H;
966  else
967  d = -w[j];
968 
969  if(fabs(d) < 1.0e-12)
970  continue;
971 
972  d = CMath::min(CMath::max(d,-10.0),10.0);
973 
974  double delta = fabs(w[j]+d)-fabs(w[j]) + G*d;
975  int num_linesearch;
976  for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++)
977  {
978  cond = fabs(w[j]+d)-fabs(w[j]) - sigma*delta;
979 
980  if(x_min >= 0)
981  {
982  double tmp = exp(d*xj_max[j]);
983  appxcond1 = log(1+sum1*(tmp-1)/xj_max[j]/C_sum[j])*C_sum[j] + cond - d*xjpos_sum[j];
984  appxcond2 = log(1+sum2*(1/tmp-1)/xj_max[j]/C_sum[j])*C_sum[j] + cond + d*xjneg_sum[j];
985  if(CMath::min(appxcond1,appxcond2) <= 0)
986  {
987  if (use_bias && j==n)
988  {
989  for (ind=0; ind<l; ind++)
990  exp_wTx[ind] *= exp(d);
991  }
992 
993  else
994  {
995  iterator=x->get_feature_iterator(j);
996  while (x->get_next_feature(ind, val, iterator))
997  exp_wTx[ind] *= exp(d*val);
998  x->free_feature_iterator(iterator);
999  }
1000  break;
1001  }
1002  }
1003 
1004  cond += d*xjneg_sum[j];
1005 
1006  int i = 0;
1007 
1008  if (use_bias && j==n)
1009  {
1010  for (ind=0; ind<l; ind++)
1011  {
1012  double exp_dx = exp(d);
1013  exp_wTx_new[i] = exp_wTx[ind]*exp_dx;
1014  cond += C[GETI(ind)]*log((1+exp_wTx_new[i])/(exp_dx+exp_wTx_new[i]));
1015  i++;
1016  }
1017  }
1018  else
1019  {
1020 
1021  iterator=x->get_feature_iterator(j);
1022  while (x->get_next_feature(ind, val, iterator))
1023  {
1024  double exp_dx = exp(d*val);
1025  exp_wTx_new[i] = exp_wTx[ind]*exp_dx;
1026  cond += C[GETI(ind)]*log((1+exp_wTx_new[i])/(exp_dx+exp_wTx_new[i]));
1027  i++;
1028  }
1029  x->free_feature_iterator(iterator);
1030  }
1031 
1032  if(cond <= 0)
1033  {
1034  i = 0;
1035  if (use_bias && j==n)
1036  {
1037  for (ind=0; ind<l; ind++)
1038  {
1039  exp_wTx[ind] = exp_wTx_new[i];
1040  i++;
1041  }
1042  }
1043  else
1044  {
1045  iterator=x->get_feature_iterator(j);
1046  while (x->get_next_feature(ind, val, iterator))
1047  {
1048  exp_wTx[ind] = exp_wTx_new[i];
1049  i++;
1050  }
1051  x->free_feature_iterator(iterator);
1052  }
1053  break;
1054  }
1055  else
1056  {
1057  d *= 0.5;
1058  delta *= 0.5;
1059  }
1060  }
1061 
1062  w[j] += d;
1063 
1064  // recompute exp_wTx[] if line search takes too many steps
1065  if(num_linesearch >= max_num_linesearch)
1066  {
1067  SG_INFO("#");
1068  for(int i=0; i<l; i++)
1069  exp_wTx[i] = 0;
1070 
1071  for(int i=0; i<w_size; i++)
1072  {
1073  if(w[i]==0) continue;
1074 
1075  if (use_bias && i==n)
1076  {
1077  for (ind=0; ind<l; ind++)
1078  exp_wTx[ind] += w[i];
1079  }
1080  else
1081  {
1082  iterator=x->get_feature_iterator(i);
1083  while (x->get_next_feature(ind, val, iterator))
1084  exp_wTx[ind] += w[i]*val;
1085  x->free_feature_iterator(iterator);
1086  }
1087  }
1088 
1089  for(int i=0; i<l; i++)
1090  exp_wTx[i] = exp(exp_wTx[i]);
1091  }
1092  }
1093 
1094  if(iter == 0)
1095  Gmax_init = Gmax_new;
1096  iter++;
1097  SG_SABS_PROGRESS(Gmax_new, -CMath::log10(Gmax_new), -CMath::log10(Gmax_init), -CMath::log10(eps*Gmax_init), 6);
1098 
1099  if(Gmax_new <= eps*Gmax_init)
1100  {
1101  if(active_size == w_size)
1102  break;
1103  else
1104  {
1105  active_size = w_size;
1106  Gmax_old = CMath::INFTY;
1107  continue;
1108  }
1109  }
1110 
1111  Gmax_old = Gmax_new;
1112  }
1113 
1114  SG_DONE();
1115  SG_INFO("optimization finished, #iter = %d\n", iter);
1116  if(iter >= max_iterations)
1117  SG_WARNING("\nWARNING: reaching max number of iterations\n");
1118 
1119  // calculate objective value
1120 
1121  double v = 0;
1122  int nnz = 0;
1123  for(j=0; j<w_size; j++)
1124  if(w[j] != 0)
1125  {
1126  v += fabs(w[j]);
1127  nnz++;
1128  }
1129  for(j=0; j<l; j++)
1130  if(y[j] == 1)
1131  v += C[GETI(j)]*log(1+1/exp_wTx[j]);
1132  else
1133  v += C[GETI(j)]*log(1+exp_wTx[j]);
1134 
1135  SG_INFO("Objective value = %lf\n", v);
1136  SG_INFO("#nonzeros/#features = %d/%d\n", nnz, w_size);
1137 
1138  delete [] index;
1139  delete [] y;
1140  delete [] exp_wTx;
1141  delete [] exp_wTx_new;
1142  delete [] xj_max;
1143  delete [] C_sum;
1144  delete [] xjneg_sum;
1145  delete [] xjpos_sum;
1146 }
1147 
1149 {
1151  SG_ERROR("Please assign linear term first!\n");
1152 
1153  return m_linear_term;
1154 }
1155 
1157 {
1158  if (!labels)
1159  SG_ERROR("Please assign labels first!\n");
1160 
1162 
1165 }
1166 
1167 #endif //HAVE_LAPACK

SHOGUN Machine Learning Toolbox - Documentation