SHOGUN  v1.1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
SubGradientSVM.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2007-2009 Soeren Sonnenburg
8  * Written (W) 2007-2008 Vojtech Franc
9  * Copyright (C) 2007-2009 Fraunhofer Institute FIRST and Max-Planck-Society
10  */
11 
12 #include <shogun/lib/config.h>
14 #include <shogun/lib/Signal.h>
15 #include <shogun/lib/Time.h>
20 #include <shogun/features/Labels.h>
21 
22 #undef DEBUG_SUBGRADIENTSVM
23 
24 using namespace shogun;
25 
27 : CLinearMachine(), C1(1), C2(1), epsilon(1e-5), qpsize(42),
28  qpsize_max(2000), use_bias(false), delta_active(0), delta_bound(0)
29 {
30 }
31 
33  float64_t C, CDotFeatures* traindat, CLabels* trainlab)
34 : CLinearMachine(), C1(C), C2(C), epsilon(1e-5), qpsize(42),
35  qpsize_max(2000), use_bias(false), delta_active(0), delta_bound(0)
36 {
37  set_features(traindat);
38  set_labels(trainlab);
39 }
40 
41 
43 {
44 }
45 
46 /*
47 int32_t CSubGradientSVM::find_active(int32_t num_feat, int32_t num_vec, int32_t& num_active, int32_t& num_bound)
48 {
49  int32_t delta_active=0;
50  num_active=0;
51  num_bound=0;
52 
53  for (int32_t i=0; i<num_vec; i++)
54  {
55  active[i]=0;
56 
57  //within margin/wrong side
58  if (proj[i] < 1-work_epsilon)
59  {
60  idx_active[num_active++]=i;
61  active[i]=1;
62  }
63 
64  //on margin
65  if (CMath::abs(proj[i]-1) <= work_epsilon)
66  {
67  idx_bound[num_bound++]=i;
68  active[i]=2;
69  }
70 
71  if (active[i]!=old_active[i])
72  delta_active++;
73  }
74 
75  return delta_active;
76 }
77 */
78 
80  int32_t num_feat, int32_t num_vec, int32_t& num_active, int32_t& num_bound)
81 {
82  delta_bound=0;
83  delta_active=0;
84  num_active=0;
85  num_bound=0;
86 
87  for (int32_t i=0; i<num_vec; i++)
88  {
89  active[i]=0;
90 
91  //within margin/wrong side
92  if (proj[i] < 1-autoselected_epsilon)
93  {
94  idx_active[num_active++]=i;
95  active[i]=1;
96  }
97 
98  //on margin
100  {
101  idx_bound[num_bound++]=i;
102  active[i]=2;
103  }
104 
105  if (active[i]!=old_active[i])
106  delta_active++;
107 
108  if (active[i]==2 && old_active[i]==2)
109  delta_bound++;
110  }
111 
112 
113  if (delta_active==0 && work_epsilon<=epsilon) //we converged
114  return 0;
115  else if (delta_active==0) //lets decrease work_epsilon
116  {
119  num_bound=qpsize;
120  }
121 
122  delta_bound=0;
123  delta_active=0;
124  num_active=0;
125  num_bound=0;
126 
127  for (int32_t i=0; i<num_vec; i++)
128  {
129  tmp_proj[i]=CMath::abs(proj[i]-1);
130  tmp_proj_idx[i]=i;
131  }
132 
134 
136 
137 #ifdef DEBUG_SUBGRADIENTSVM
138  //SG_PRINT("autoseleps: %15.15f\n", autoselected_epsilon);
139 #endif
140 
143 
145  {
147 
148  int32_t i=0;
149  while (i < num_vec && tmp_proj[i] <= autoselected_epsilon)
150  i++;
151 
152  //SG_PRINT("lower bound on epsilon requires %d variables in qp\n", i);
153 
154  if (i>=qpsize_max && autoselected_epsilon>epsilon) //qpsize limit
155  {
156  SG_INFO("qpsize limit (%d) reached\n", qpsize_max);
157  int32_t num_in_qp=i;
158  while (--i>=0 && num_in_qp>=qpsize_max)
159  {
161  {
163  num_in_qp--;
164  }
165  }
166 
167  //SG_PRINT("new qpsize will be %d, autoeps:%15.15f\n", num_in_qp, autoselected_epsilon);
168  }
169  }
170 
171  for (int32_t i=0; i<num_vec; i++)
172  {
173  active[i]=0;
174 
175  //within margin/wrong side
176  if (proj[i] < 1-autoselected_epsilon)
177  {
178  idx_active[num_active++]=i;
179  active[i]=1;
180  }
181 
182  //on margin
183  if (CMath::abs(proj[i]-1) <= autoselected_epsilon)
184  {
185  idx_bound[num_bound++]=i;
186  active[i]=2;
187  }
188 
189  if (active[i]!=old_active[i])
190  delta_active++;
191 
192  if (active[i]==2 && old_active[i]==2)
193  delta_bound++;
194  }
195 
196  //SG_PRINT("delta_bound: %d of %d (%02.2f)\n", delta_bound, num_bound, 100.0*delta_bound/num_bound);
197  return delta_active;
198 }
199 
200 
201 void CSubGradientSVM::update_active(int32_t num_feat, int32_t num_vec)
202 {
203  for (int32_t i=0; i<num_vec; i++)
204  {
205  if (active[i]==1 && old_active[i]!=1)
206  {
208  if (use_bias)
210  }
211  else if (old_active[i]==1 && active[i]!=1)
212  {
214  if (use_bias)
216  }
217  }
218 
220 }
221 
222 float64_t CSubGradientSVM::line_search(int32_t num_feat, int32_t num_vec)
223 {
224  float64_t sum_B = 0;
225  float64_t A_zero = 0.5*CMath::dot(grad_w, grad_w, num_feat);
226  float64_t B_zero = -CMath::dot(w, grad_w, num_feat);
227 
228  int32_t num_hinge=0;
229 
230  for (int32_t i=0; i<num_vec; i++)
231  {
232  float64_t p=get_label(i)*(features->dense_dot(i, grad_w, num_feat)+grad_b);
233  grad_proj[i]=p;
234  if (p!=0)
235  {
236  hinge_point[num_hinge]=(proj[i]-1)/p;
237  hinge_idx[num_hinge]=i;
238  num_hinge++;
239 
240  if (p<0)
241  sum_B+=p;
242  }
243  }
244  sum_B*=C1;
245 
247 
248 
249  float64_t alpha = hinge_point[0];
250  float64_t grad_val = 2*A_zero*alpha + B_zero + sum_B;
251 
252  //CMath::display_vector(grad_w, num_feat, "grad_w");
253  //CMath::display_vector(grad_proj, num_vec, "grad_proj");
254  //CMath::display_vector(hinge_point, num_vec, "hinge_point");
255  //SG_PRINT("A_zero=%f\n", A_zero);
256  //SG_PRINT("B_zero=%f\n", B_zero);
257  //SG_PRINT("sum_B=%f\n", sum_B);
258  //SG_PRINT("alpha=%f\n", alpha);
259  //SG_PRINT("grad_val=%f\n", grad_val);
260 
261  float64_t old_grad_val = grad_val;
262  float64_t old_alpha = alpha;
263 
264  for (int32_t i=1; i < num_hinge && grad_val < 0; i++)
265  {
266  alpha = hinge_point[i];
267  grad_val = 2*A_zero*alpha + B_zero + sum_B;
268 
269  if (grad_val > 0)
270  {
271  ASSERT(old_grad_val-grad_val != 0);
272  float64_t gamma = -grad_val/(old_grad_val-grad_val);
273  alpha = old_alpha*gamma + (1-gamma)*alpha;
274  }
275  else
276  {
277  old_grad_val = grad_val;
278  old_alpha = alpha;
279 
280  sum_B = sum_B + CMath::abs(C1*grad_proj[hinge_idx[i]]);
281  grad_val = 2*A_zero*alpha + B_zero + sum_B;
282  }
283  }
284 
285  return alpha;
286 }
287 
289  int32_t num_feat, int32_t num_vec, int32_t num_active, int32_t num_bound)
290 {
291  float64_t dir_deriv=0;
292 
293  if (num_bound > 0)
294  {
295 
296  CTime t2;
297  CMath::add(v, 1.0, w, -1.0, sum_CXy_active, num_feat);
298 
299  if (num_bound>=qpsize_max && num_it_noimprovement!=10) // if qp gets to large, lets just choose a random beta
300  {
301  //SG_PRINT("qpsize too large (%d>=%d) choosing random subgradient/beta\n", num_bound, qpsize_max);
302  for (int32_t i=0; i<num_bound; i++)
303  beta[i]=CMath::random(0.0,1.0);
304  }
305  else
306  {
307  memset(beta, 0, sizeof(float64_t)*num_bound);
308 
309  float64_t bias_const=0;
310 
311  if (use_bias)
312  bias_const=1;
313 
314  for (int32_t i=0; i<num_bound; i++)
315  {
316  for (int32_t j=i; j<num_bound; j++)
317  {
318  Z[i*num_bound+j]= 2.0*C1*C1*get_label(idx_bound[i])*get_label(idx_bound[j])*
319  (features->dot(idx_bound[i], features, idx_bound[j]) + bias_const);
320 
321  Z[j*num_bound+i]=Z[i*num_bound+j];
322 
323  }
324 
325  Zv[i]=-2.0*C1*get_label(idx_bound[i])*
326  (features->dense_dot(idx_bound[i], v, num_feat)-sum_Cy_active);
327  }
328 
329  //CMath::display_matrix(Z, num_bound, num_bound, "Z");
330  //CMath::display_vector(Zv, num_bound, "Zv");
331  t2.stop();
332 #ifdef DEBUG_SUBGRADIENTSVM
333  t2.time_diff_sec(true);
334 #endif
335 
336  CTime t;
337  CQPBSVMLib solver(Z,num_bound, Zv,num_bound, 1.0);
338  //solver.set_solver(QPB_SOLVER_GRADDESC);
339  //solver.set_solver(QPB_SOLVER_GS);
340 #ifdef USE_CPLEX
341  solver.set_solver(QPB_SOLVER_CPLEX);
342 #else
343  solver.set_solver(QPB_SOLVER_SCAS);
344 #endif
345 
346  solver.solve_qp(beta, num_bound);
347 
348  t.stop();
349 #ifdef DEBUG_SUBGRADIENTSVM
350  tim+=t.time_diff_sec(true);
351 #else
352  tim+=t.time_diff_sec(false);
353 #endif
354 
355  //CMath::display_vector(beta, num_bound, "beta gs");
356  //solver.set_solver(QPB_SOLVER_CPLEX);
357  //solver.solve_qp(beta, num_bound);
358  //CMath::display_vector(beta, num_bound, "beta cplex");
359 
360  //CMath::display_vector(grad_w, num_feat, "grad_w");
361  //SG_PRINT("grad_b:%f\n", grad_b);
362  }
363 
364  CMath::add(grad_w, 1.0, w, -1.0, sum_CXy_active, num_feat);
366 
367  for (int32_t i=0; i<num_bound; i++)
368  {
370  if (use_bias)
371  grad_b -= C1 * get_label(idx_bound[i])*beta[i];
372  }
373 
374  dir_deriv = CMath::dot(grad_w, v, num_feat) - grad_b*sum_Cy_active;
375  for (int32_t i=0; i<num_bound; i++)
376  {
378  dir_deriv += CMath::max(0.0, val);
379  }
380  }
381  else
382  {
383  CMath::add(grad_w, 1.0, w, -1.0, sum_CXy_active, num_feat);
385 
386  dir_deriv = CMath::dot(grad_w, grad_w, num_feat)+ grad_b*grad_b;
387  }
388 
389  return dir_deriv;
390 }
391 
392 float64_t CSubGradientSVM::compute_objective(int32_t num_feat, int32_t num_vec)
393 {
394  float64_t result= 0.5 * CMath::dot(w,w, num_feat);
395 
396  for (int32_t i=0; i<num_vec; i++)
397  {
398  if (proj[i]<1.0)
399  result += C1 * (1.0-proj[i]);
400  }
401 
402  return result;
403 }
404 
405 void CSubGradientSVM::compute_projection(int32_t num_feat, int32_t num_vec)
406 {
407  for (int32_t i=0; i<num_vec; i++)
408  proj[i]=get_label(i)*(features->dense_dot(i, w, num_feat)+bias);
409 }
410 
411 void CSubGradientSVM::update_projection(float64_t alpha, int32_t num_vec)
412 {
414 }
415 
416 void CSubGradientSVM::init(int32_t num_vec, int32_t num_feat)
417 {
418  // alloc normal and bias inited with 0
419  SG_FREE(w);
420  w=SG_MALLOC(float64_t, num_feat);
421  w_dim=num_feat;
422  memset(w,0,sizeof(float64_t)*num_feat);
423  //CMath::random_vector(w, num_feat, -1.0, 1.0);
424  bias=0;
426  grad_b=0;
427  qpsize_limit=5000;
428 
429  grad_w=SG_MALLOC(float64_t, num_feat);
430  memset(grad_w,0,sizeof(float64_t)*num_feat);
431 
433  memset(sum_CXy_active,0,sizeof(float64_t)*num_feat);
434 
435  v=SG_MALLOC(float64_t, num_feat);
436  memset(v,0,sizeof(float64_t)*num_feat);
437 
438  old_v=SG_MALLOC(float64_t, num_feat);
439  memset(old_v,0,sizeof(float64_t)*num_feat);
440 
441  sum_Cy_active=0;
442 
443  proj= SG_MALLOC(float64_t, num_vec);
444  memset(proj,0,sizeof(float64_t)*num_vec);
445 
446  tmp_proj=SG_MALLOC(float64_t, num_vec);
447  memset(proj,0,sizeof(float64_t)*num_vec);
448 
449  tmp_proj_idx= SG_MALLOC(int32_t, num_vec);
450  memset(tmp_proj_idx,0,sizeof(int32_t)*num_vec);
451 
452  grad_proj= SG_MALLOC(float64_t, num_vec);
453  memset(grad_proj,0,sizeof(float64_t)*num_vec);
454 
455  hinge_point= SG_MALLOC(float64_t, num_vec);
456  memset(hinge_point,0,sizeof(float64_t)*num_vec);
457 
458  hinge_idx= SG_MALLOC(int32_t, num_vec);
459  memset(hinge_idx,0,sizeof(int32_t)*num_vec);
460 
461  active=SG_MALLOC(uint8_t, num_vec);
462  memset(active,0,sizeof(uint8_t)*num_vec);
463 
464  old_active=SG_MALLOC(uint8_t, num_vec);
465  memset(old_active,0,sizeof(uint8_t)*num_vec);
466 
467  idx_bound=SG_MALLOC(int32_t, num_vec);
468  memset(idx_bound,0,sizeof(int32_t)*num_vec);
469 
470  idx_active=SG_MALLOC(int32_t, num_vec);
471  memset(idx_active,0,sizeof(int32_t)*num_vec);
472 
474  memset(Z,0,sizeof(float64_t)*qpsize_limit*qpsize_limit);
475 
476  Zv=SG_MALLOC(float64_t, qpsize_limit);
477  memset(Zv,0,sizeof(float64_t)*qpsize_limit);
478 
479  beta=SG_MALLOC(float64_t, qpsize_limit);
480  memset(beta,0,sizeof(float64_t)*qpsize_limit);
481 
482  old_Z=SG_MALLOC(float64_t, qpsize_limit*qpsize_limit);
483  memset(old_Z,0,sizeof(float64_t)*qpsize_limit*qpsize_limit);
484 
485  old_Zv=SG_MALLOC(float64_t, qpsize_limit);
486  memset(old_Zv,0,sizeof(float64_t)*qpsize_limit);
487 
488  old_beta=SG_MALLOC(float64_t, qpsize_limit);
489  memset(old_beta,0,sizeof(float64_t)*qpsize_limit);
490 
491 }
492 
494 {
498  SG_FREE(proj);
499  SG_FREE(tmp_proj);
501  SG_FREE(active);
506  SG_FREE(grad_w);
507  SG_FREE(v);
508  SG_FREE(Z);
509  SG_FREE(Zv);
510  SG_FREE(beta);
511  SG_FREE(old_v);
512  SG_FREE(old_Z);
513  SG_FREE(old_Zv);
514  SG_FREE(old_beta);
515 
516  hinge_idx=NULL;
517  proj=NULL;
518  active=NULL;
519  old_active=NULL;
520  idx_bound=NULL;
521  idx_active=NULL;
522  sum_CXy_active=NULL;
523  grad_w=NULL;
524  v=NULL;
525  Z=NULL;
526  Zv=NULL;
527  beta=NULL;
528 }
529 
531 {
532  tim=0;
533  SG_INFO("C=%f epsilon=%f\n", C1, epsilon);
534  ASSERT(labels);
535 
536  if (data)
537  {
538  if (!data->has_property(FP_DOT))
539  SG_ERROR("Specified features are not of type CDotFeatures\n");
540  set_features((CDotFeatures*) data);
541  }
542  ASSERT(get_features());
543 
544  int32_t num_iterations=0;
545  int32_t num_train_labels=labels->get_num_labels();
546  int32_t num_feat=features->get_dim_feature_space();
547  int32_t num_vec=features->get_num_vectors();
548 
549  ASSERT(num_vec==num_train_labels);
550 
551  init(num_vec, num_feat);
552 
553  int32_t num_active=0;
554  int32_t num_bound=0;
555  float64_t alpha=0;
556  float64_t dir_deriv=0;
557  float64_t obj=0;
558  delta_active=num_vec;
560 
561  work_epsilon=0.99;
563 
564  compute_projection(num_feat, num_vec);
565 
566  CTime time;
567 #ifdef DEBUG_SUBGRADIENTSVM
568  float64_t loop_time=0;
569 #endif
570  while (!(CSignal::cancel_computations()))
571  {
572  CTime t;
573  delta_active=find_active(num_feat, num_vec, num_active, num_bound);
574 
575  update_active(num_feat, num_vec);
576 
577 #ifdef DEBUG_SUBGRADIENTSVM
578  SG_PRINT("==================================================\niteration: %d ", num_iterations);
579  obj=compute_objective(num_feat, num_vec);
580  SG_PRINT("objective:%.10f alpha: %.10f dir_deriv: %f num_bound: %d num_active: %d work_eps: %10.10f eps: %10.10f auto_eps: %10.10f time:%f\n",
581  obj, alpha, dir_deriv, num_bound, num_active, work_epsilon, epsilon, autoselected_epsilon, loop_time);
582 #else
584 #endif
585  //CMath::display_vector(w, w_dim, "w");
586  //SG_PRINT("bias: %f\n", bias);
587  //CMath::display_vector(proj, num_vec, "proj");
588  //CMath::display_vector(idx_active, num_active, "idx_active");
589  //SG_PRINT("num_active: %d\n", num_active);
590  //CMath::display_vector(idx_bound, num_bound, "idx_bound");
591  //SG_PRINT("num_bound: %d\n", num_bound);
592  //CMath::display_vector(sum_CXy_active, num_feat, "sum_CXy_active");
593  //SG_PRINT("sum_Cy_active: %f\n", sum_Cy_active);
594  //CMath::display_vector(grad_w, num_feat, "grad_w");
595  //SG_PRINT("grad_b:%f\n", grad_b);
596 
597  dir_deriv=compute_min_subgradient(num_feat, num_vec, num_active, num_bound);
598 
599  alpha=line_search(num_feat, num_vec);
600 
601  if (num_it_noimprovement==10 || num_bound<qpsize_max)
602  {
603  float64_t norm_grad=CMath::dot(grad_w, grad_w, num_feat) +
604  grad_b*grad_b;
605 
606 #ifdef DEBUG_SUBGRADIENTSVM
607  SG_PRINT("CHECKING OPTIMALITY CONDITIONS: "
608  "work_epsilon: %10.10f delta_active:%d alpha: %10.10f norm_grad: %10.10f a*norm_grad:%10.16f\n",
609  work_epsilon, delta_active, alpha, norm_grad, CMath::abs(alpha*norm_grad));
610 #else
612 #endif
613 
614  if (work_epsilon<=epsilon && delta_active==0 && CMath::abs(alpha*norm_grad)<1e-6)
615  break;
616  else
618  }
619 
620  if ((dir_deriv<0 || alpha==0) && (work_epsilon<=epsilon && delta_active==0))
621  {
622  if (last_it_noimprovement==num_iterations-1)
623  {
624  SG_PRINT("no improvement...\n");
626  }
627  else
629 
630  last_it_noimprovement=num_iterations;
631  }
632 
633  CMath::vec1_plus_scalar_times_vec2(w, -alpha, grad_w, num_feat);
634  bias-=alpha*grad_b;
635 
636  update_projection(alpha, num_vec);
637  //compute_projection(num_feat, num_vec);
638  //CMath::display_vector(w, w_dim, "w");
639  //SG_PRINT("bias: %f\n", bias);
640  //CMath::display_vector(proj, num_vec, "proj");
641 
642  t.stop();
643 #ifdef DEBUG_SUBGRADIENTSVM
644  loop_time=t.time_diff_sec();
645 #endif
646  num_iterations++;
647 
649  break;
650  }
651  SG_DONE();
652 
653  SG_INFO("converged after %d iterations\n", num_iterations);
654 
655  obj=compute_objective(num_feat, num_vec);
656  SG_INFO("objective: %f alpha: %f dir_deriv: %f num_bound: %d num_active: %d\n",
657  obj, alpha, dir_deriv, num_bound, num_active);
658 
659 #ifdef DEBUG_SUBGRADIENTSVM
661  SG_PRINT("bias: %f\n", bias);
662 #endif
663  SG_INFO("solver time:%f s\n", tim);
664 
665  cleanup();
666 
667  return true;
668 }

SHOGUN Machine Learning Toolbox - Documentation