SHOGUN  v1.1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
SVMSGD.cpp
Go to the documentation of this file.
1 /*
2  SVM with stochastic gradient
3  Copyright (C) 2007- Leon Bottou
4 
5  This program is free software; you can redistribute it and/or
6  modify it under the terms of the GNU Lesser General Public
7  License as published by the Free Software Foundation; either
8  version 2.1 of the License, or (at your option) any later version.
9 
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License
16  along with this program; if not, write to the Free Software
17  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA
18  $Id: svmsgd.cpp,v 1.13 2007/10/02 20:40:06 cvs Exp $
19 
20  Shogun adjustments (w) 2008-2009 Soeren Sonnenburg
21 */
22 
24 #include <shogun/base/Parameter.h>
25 #include <shogun/lib/Signal.h>
26 #include <shogun/loss/HingeLoss.h>
27 
28 using namespace shogun;
29 
32 {
33  init();
34 }
35 
38 {
39  init();
40 
41  C1=C;
42  C2=C;
43 }
44 
47 {
48  init();
49  C1=C;
50  C2=C;
51 
52  set_features(traindat);
53  set_labels(trainlab);
54 }
55 
57 {
58  SG_UNREF(loss);
59 }
60 
62 {
63  if (loss)
64  SG_UNREF(loss);
65  loss=loss_func;
66  SG_REF(loss);
67 }
68 
70 {
71  // allocate memory for w and initialize everyting w and bias with 0
72  ASSERT(labels);
73 
74  if (data)
75  {
76  if (!data->has_property(FP_DOT))
77  SG_ERROR("Specified features are not of type CDotFeatures\n");
78  set_features((CDotFeatures*) data);
79  }
80 
83 
84  int32_t num_train_labels=labels->get_num_labels();
86  int32_t num_vec=features->get_num_vectors();
87 
88  ASSERT(num_vec==num_train_labels);
89  ASSERT(num_vec>0);
90 
91  SG_FREE(w);
93  memset(w, 0, w_dim*sizeof(float64_t));
94  bias=0;
95 
96  float64_t lambda= 1.0/(C1*num_vec);
97 
98  // Shift t in order to have a
99  // reasonable initial learning rate.
100  // This assumes |x| \approx 1.
101  float64_t maxw = 1.0 / sqrt(lambda);
102  float64_t typw = sqrt(maxw);
103  float64_t eta0 = typw / CMath::max(1.0,-loss->first_derivative(-typw,1));
104  t = 1 / (eta0 * lambda);
105 
106  SG_INFO("lambda=%f, epochs=%d, eta0=%f\n", lambda, epochs, eta0);
107 
108 
109  //do the sgd
110  calibrate();
111 
112  SG_INFO("Training on %d vectors\n", num_vec);
114 
115  ELossType loss_type = loss->get_loss_type();
116  bool is_log_loss = false;
117  if ((loss_type == L_LOGLOSS) || (loss_type == L_LOGLOSSMARGIN))
118  is_log_loss = true;
119 
120  for(int32_t e=0; e<epochs && (!CSignal::cancel_computations()); e++)
121  {
122  count = skip;
123  for (int32_t i=0; i<num_vec; i++)
124  {
125  float64_t eta = 1.0 / (lambda * t);
126  float64_t y = labels->get_label(i);
127  float64_t z = y * (features->dense_dot(i, w, w_dim) + bias);
128 
129  if (z < 1 || is_log_loss)
130  {
131  float64_t etd = -eta * loss->first_derivative(z,1);
132  features->add_to_dense_vec(etd * y / wscale, i, w, w_dim);
133 
134  if (use_bias)
135  {
136  if (use_regularized_bias)
137  bias *= 1 - eta * lambda * bscale;
138  bias += etd * y * bscale;
139  }
140  }
141 
142  if (--count <= 0)
143  {
144  float64_t r = 1 - eta * lambda * skip;
145  if (r < 0.8)
146  r = pow(1 - eta * lambda, skip);
148  count = skip;
149  }
150  t++;
151  }
152  }
153 
154  float64_t wnorm = CMath::dot(w,w, w_dim);
155  SG_INFO("Norm: %.6f, Bias: %.6f\n", wnorm, bias);
156 
157  return true;
158 }
159 
161 {
162  ASSERT(features);
163  int32_t num_vec=features->get_num_vectors();
164  int32_t c_dim=features->get_dim_feature_space();
165 
166  ASSERT(num_vec>0);
167  ASSERT(c_dim>0);
168 
169  float64_t* c=SG_MALLOC(float64_t, c_dim);
170  memset(c, 0, c_dim*sizeof(float64_t));
171 
172  SG_INFO("Estimating sparsity and bscale num_vec=%d num_feat=%d.\n", num_vec, c_dim);
173 
174  // compute average gradient size
175  int32_t n = 0;
176  float64_t m = 0;
177  float64_t r = 0;
178 
179  for (int32_t j=0; j<num_vec && m<=1000; j++, n++)
180  {
182  features->add_to_dense_vec(1, j, c, c_dim, true);
183 
184  //waste cpu cycles for readability
185  //(only changed dims need checking)
186  m=CMath::max(c, c_dim);
187  }
188 
189  // bias update scaling
190  bscale = 0.5*m/n;
191 
192  // compute weight decay skip
193  skip = (int32_t) ((16 * n * c_dim) / r);
194  SG_INFO("using %d examples. skip=%d bscale=%.6f\n", n, skip, bscale);
195 
196  SG_FREE(c);
197 }
198 
199 void CSVMSGD::init()
200 {
201  t=1;
202  C1=1;
203  C2=1;
204  wscale=1;
205  bscale=1;
206  epochs=5;
207  skip=1000;
208  count=1000;
209  use_bias=true;
210 
211  use_regularized_bias=false;
212 
213  loss=new CHingeLoss();
214  SG_REF(loss);
215 
216  m_parameters->add(&C1, "C1", "Cost constant 1.");
217  m_parameters->add(&C2, "C2", "Cost constant 2.");
218  m_parameters->add(&wscale, "wscale", "W scale");
219  m_parameters->add(&bscale, "bscale", "b scale");
220  m_parameters->add(&epochs, "epochs", "epochs");
221  m_parameters->add(&skip, "skip", "skip");
222  m_parameters->add(&count, "count", "count");
223  m_parameters->add(&use_bias, "use_bias", "Indicates if bias is used.");
224  m_parameters->add(&use_regularized_bias, "use_regularized_bias", "Indicates if bias is regularized.");
225 }

SHOGUN Machine Learning Toolbox - Documentation