SHOGUN  v1.1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
TOPFeatures.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 1999-2009 Soeren Sonnenburg
8  * Written (W) 1999-2008 Gunnar Raetsch
9  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
10  */
11 
13 #include <shogun/io/SGIO.h>
15 
16 using namespace shogun;
17 
19 {
20  init();
21 }
22 
24  int32_t size, CHMM* p, CHMM* n, bool neglin, bool poslin)
26 {
27  init();
28  neglinear=neglin;
29  poslinear=poslin;
30 
31  set_models(p,n);
32 }
33 
36 {
37  init();
38  pos=orig.pos;
39  neg=orig.neg;
40  neglinear=orig.neglinear;
41  poslinear=orig.poslinear;
42 }
43 
45 {
48  SG_FREE(pos_relevant_indizes.idx_a_cols);
49  SG_FREE(pos_relevant_indizes.idx_a_rows);
50  SG_FREE(pos_relevant_indizes.idx_b_cols);
51  SG_FREE(pos_relevant_indizes.idx_b_rows);
52 
55  SG_FREE(neg_relevant_indizes.idx_a_cols);
56  SG_FREE(neg_relevant_indizes.idx_a_rows);
57  SG_FREE(neg_relevant_indizes.idx_b_cols);
58  SG_FREE(neg_relevant_indizes.idx_b_rows);
59 
60  SG_UNREF(pos);
61  SG_UNREF(neg);
62 }
63 
65 {
66  ASSERT(p && n);
67  SG_REF(p);
68  SG_REF(n);
69 
70  pos=p;
71  neg=n;
72  set_num_vectors(0);
73 
75  feature_matrix=NULL ;
76 
77 
78  if (pos && pos->get_observations())
80 
84 
85  SG_DEBUG( "pos_feat=[%i,%i,%i,%i],neg_feat=[%i,%i,%i,%i] -> %i features\n", pos->get_N(), pos->get_N(), pos->get_N()*pos->get_N(), pos->get_N()*pos->get_M(), neg->get_N(), neg->get_N(), neg->get_N()*neg->get_N(), neg->get_N()*neg->get_M(),num_features) ;
86 }
87 
89  int32_t num, int32_t &len, float64_t* target)
90 {
91  float64_t* featurevector=target;
92 
93  if (!featurevector)
94  featurevector=SG_MALLOC(float64_t, get_num_features());
95 
96  if (!featurevector)
97  return NULL;
98 
99  compute_feature_vector(featurevector, num, len);
100 
101  return featurevector;
102 }
103 
105  float64_t* featurevector, int32_t num, int32_t& len)
106 {
107  int32_t i,j,p=0,x=num;
108  int32_t idx=0;
109 
110  float64_t posx=(poslinear) ?
112  float64_t negx=(neglinear) ?
114 
115  len=get_num_features();
116 
117  featurevector[p++]=(posx-negx);
118 
119  //first do positive model
120  if (poslinear)
121  {
122  for (i=0; i<pos->get_N(); i++)
123  {
124  for (j=0; j<pos->get_M(); j++)
125  featurevector[p++]=exp(pos->linear_model_derivative(i, j, x)-posx);
126  }
127  }
128  else
129  {
130  for (idx=0; idx< pos_relevant_indizes.num_p; idx++)
131  featurevector[p++]=exp(pos->model_derivative_p(pos_relevant_indizes.idx_p[idx], x)-posx);
132 
133  for (idx=0; idx< pos_relevant_indizes.num_q; idx++)
134  featurevector[p++]=exp(pos->model_derivative_q(pos_relevant_indizes.idx_q[idx], x)-posx);
135 
136  for (idx=0; idx< pos_relevant_indizes.num_a; idx++)
137  featurevector[p++]=exp(pos->model_derivative_a(pos_relevant_indizes.idx_a_rows[idx], pos_relevant_indizes.idx_a_cols[idx], x)-posx);
138 
139  for (idx=0; idx< pos_relevant_indizes.num_b; idx++)
140  featurevector[p++]=exp(pos->model_derivative_b(pos_relevant_indizes.idx_b_rows[idx], pos_relevant_indizes.idx_b_cols[idx], x)-posx);
141 
142 
143  //for (i=0; i<pos->get_N(); i++)
144  //{
145  // featurevector[p++]=exp(pos->model_derivative_p(i, x)-posx);
146  // featurevector[p++]=exp(pos->model_derivative_q(i, x)-posx);
147 
148  // for (j=0; j<pos->get_N(); j++)
149  // featurevector[p++]=exp(pos->model_derivative_a(i, j, x)-posx);
150 
151  // for (j=0; j<pos->get_M(); j++)
152  // featurevector[p++]=exp(pos->model_derivative_b(i, j, x)-posx);
153  //}
154  }
155 
156  //then do negative
157  if (neglinear)
158  {
159  for (i=0; i<neg->get_N(); i++)
160  {
161  for (j=0; j<neg->get_M(); j++)
162  featurevector[p++]= - exp(neg->linear_model_derivative(i, j, x)-negx);
163  }
164  }
165  else
166  {
167  for (idx=0; idx< neg_relevant_indizes.num_p; idx++)
168  featurevector[p++]= - exp(neg->model_derivative_p(neg_relevant_indizes.idx_p[idx], x)-negx);
169 
170  for (idx=0; idx< neg_relevant_indizes.num_q; idx++)
171  featurevector[p++]= - exp(neg->model_derivative_q(neg_relevant_indizes.idx_q[idx], x)-negx);
172 
173  for (idx=0; idx< neg_relevant_indizes.num_a; idx++)
174  featurevector[p++]= - exp(neg->model_derivative_a(neg_relevant_indizes.idx_a_rows[idx], neg_relevant_indizes.idx_a_cols[idx], x)-negx);
175 
176  for (idx=0; idx< neg_relevant_indizes.num_b; idx++)
177  featurevector[p++]= - exp(neg->model_derivative_b(neg_relevant_indizes.idx_b_rows[idx], neg_relevant_indizes.idx_b_cols[idx], x)-negx);
178 
179  //for (i=0; i<neg->get_N(); i++)
180  //{
181  // featurevector[p++]= - exp(neg->model_derivative_p(i, x)-negx);
182  // featurevector[p++]= - exp(neg->model_derivative_q(i, x)-negx);
183 
184  // for (j=0; j<neg->get_N(); j++)
185  // featurevector[p++]= - exp(neg->model_derivative_a(i, j, x)-negx);
186 
187  // for (j=0; j<neg->get_M(); j++)
188  // featurevector[p++]= - exp(neg->model_derivative_b(i, j, x)-negx);
189  //}
190  }
191 }
192 
194 {
195  int32_t len=0;
196 
199  ASSERT(pos);
201 
203  SG_INFO( "allocating top feature cache of size %.2fM\n", sizeof(float64_t)*num_features*num_vectors/1024.0/1024.0);
206  if (!feature_matrix)
207  {
208  SG_ERROR( "allocation not successful!");
209  return NULL ;
210  } ;
211 
212  SG_INFO( "calculating top feature matrix\n");
213 
214  for (int32_t x=0; x<num_vectors; x++)
215  {
216  if (!(x % (num_vectors/10+1)))
217  SG_DEBUG( "%02d%%.", (int) (100.0*x/num_vectors));
218  else if (!(x % (num_vectors/200+1)))
219  SG_DEBUG( ".");
220 
222  }
223 
224  SG_DONE();
225 
226  num_vectors=get_num_vectors() ;
228 
229  return feature_matrix;
230 }
231 
232 bool CTOPFeatures::compute_relevant_indizes(CHMM* hmm, T_HMM_INDIZES* hmm_idx)
233 {
234  int32_t i=0;
235  int32_t j=0;
236 
237  hmm_idx->num_p=0;
238  hmm_idx->num_q=0;
239  hmm_idx->num_a=0;
240  hmm_idx->num_b=0;
241 
242  for (i=0; i<hmm->get_N(); i++)
243  {
244  if (hmm->get_p(i)>CMath::ALMOST_NEG_INFTY)
245  hmm_idx->num_p++;
246 
247  if (hmm->get_q(i)>CMath::ALMOST_NEG_INFTY)
248  hmm_idx->num_q++;
249 
250  for (j=0; j<hmm->get_N(); j++)
251  {
252  if (hmm->get_a(i,j)>CMath::ALMOST_NEG_INFTY)
253  hmm_idx->num_a++;
254  }
255 
256  for (j=0; j<pos->get_M(); j++)
257  {
258  if (hmm->get_b(i,j)>CMath::ALMOST_NEG_INFTY)
259  hmm_idx->num_b++;
260  }
261  }
262 
263  if (hmm_idx->num_p > 0)
264  {
265  hmm_idx->idx_p=SG_MALLOC(int32_t, hmm_idx->num_p);
266  ASSERT(hmm_idx->idx_p);
267  }
268 
269  if (hmm_idx->num_q > 0)
270  {
271  hmm_idx->idx_q=SG_MALLOC(int32_t, hmm_idx->num_q);
272  ASSERT(hmm_idx->idx_q);
273  }
274 
275  if (hmm_idx->num_a > 0)
276  {
277  hmm_idx->idx_a_rows=SG_MALLOC(int32_t, hmm_idx->num_a);
278  hmm_idx->idx_a_cols=SG_MALLOC(int32_t, hmm_idx->num_a);
279  ASSERT(hmm_idx->idx_a_rows);
280  ASSERT(hmm_idx->idx_a_cols);
281  }
282 
283  if (hmm_idx->num_b > 0)
284  {
285  hmm_idx->idx_b_rows=SG_MALLOC(int32_t, hmm_idx->num_b);
286  hmm_idx->idx_b_cols=SG_MALLOC(int32_t, hmm_idx->num_b);
287  ASSERT(hmm_idx->idx_b_rows);
288  ASSERT(hmm_idx->idx_b_cols);
289  }
290 
291 
292  int32_t idx_p=0;
293  int32_t idx_q=0;
294  int32_t idx_a=0;
295  int32_t idx_b=0;
296 
297  for (i=0; i<hmm->get_N(); i++)
298  {
299  if (hmm->get_p(i)>CMath::ALMOST_NEG_INFTY)
300  {
301  ASSERT(idx_p < hmm_idx->num_p);
302  hmm_idx->idx_p[idx_p++]=i;
303  }
304 
305  if (hmm->get_q(i)>CMath::ALMOST_NEG_INFTY)
306  {
307  ASSERT(idx_q < hmm_idx->num_q);
308  hmm_idx->idx_q[idx_q++]=i;
309  }
310 
311  for (j=0; j<hmm->get_N(); j++)
312  {
313  if (hmm->get_a(i,j)>CMath::ALMOST_NEG_INFTY)
314  {
315  ASSERT(idx_a < hmm_idx->num_a);
316  hmm_idx->idx_a_rows[idx_a]=i;
317  hmm_idx->idx_a_cols[idx_a++]=j;
318  }
319  }
320 
321  for (j=0; j<pos->get_M(); j++)
322  {
323  if (hmm->get_b(i,j)>CMath::ALMOST_NEG_INFTY)
324  {
325  ASSERT(idx_b < hmm_idx->num_b);
326  hmm_idx->idx_b_rows[idx_b]=i;
327  hmm_idx->idx_b_cols[idx_b++]=j;
328  }
329  }
330  }
331 
332  return true;
333 }
334 
336 {
337  int32_t num=0;
338 
339  if (pos && neg)
340  {
341  num+=1; //zeroth- component
342 
343  if (poslinear)
344  num+=pos->get_N()*pos->get_M();
345  else
346  {
348  }
349 
350  if (neglinear)
351  num+=neg->get_N()*neg->get_M();
352  else
353  {
355  }
356 
357  //num+=1; //zeroth- component
358  //num+= (poslinear) ? (pos->get_N()*pos->get_M()) : (pos->get_N()*(1+pos->get_N()+1+pos->get_M()));
359  //num+= (neglinear) ? (neg->get_N()*neg->get_M()) : (neg->get_N()*(1+neg->get_N()+1+neg->get_M()));
360  }
361  return num;
362 }
363 
364 void CTOPFeatures::init()
365 {
366  pos = NULL;
367  neg = NULL;
368  neglinear = false;
369  poslinear = false;
370 
371  memset(&pos_relevant_indizes, 0, sizeof(pos_relevant_indizes));
372  memset(&neg_relevant_indizes, 0, sizeof(neg_relevant_indizes));
373 
374  unset_generic();
375  //TODO serialize HMMs
376  //m_parameters->add((CSGObject**) &pos, "pos", "HMM for positive class.");
377  //m_parameters->add((CSGObject**) &neg, "neg", "HMM for negative class.");
378  m_parameters->add(&neglinear, "neglinear", "If negative HMM is a LinearHMM");
379  m_parameters->add(&poslinear, "poslinear", "If positive HMM is a LinearHMM");
380 }

SHOGUN Machine Learning Toolbox - Documentation