SHOGUN  v1.1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Features.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 1999-2009 Soeren Sonnenburg
8  * Written (W) 1999-2008 Gunnar Raetsch
9  * Subset support written (W) 2011 Heiko Strathmann
10  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
11  */
12 
15 #include <shogun/io/SGIO.h>
16 #include <shogun/base/Parameter.h>
17 
18 #include <string.h>
19 
20 using namespace shogun;
21 
22 CFeatures::CFeatures(int32_t size)
23 : CSGObject()
24 {
25  init();
26  cache_size = size;
27 }
28 
30 : CSGObject(orig)
31 {
32  init();
33 
34  preproc = orig.preproc;
35  num_preproc = orig.num_preproc;
36 
37  preprocessed=SG_MALLOC(bool, orig.num_preproc);
38  memcpy(preprocessed, orig.preprocessed, sizeof(bool)*orig.num_preproc);
39 }
40 
42 : CSGObject()
43 {
44  init();
45 
46  load(loader);
47  SG_INFO("Feature object loaded (%p)\n",this) ;
48 }
49 
51 {
53  delete m_subset;
54 }
55 
56 void
57 CFeatures::init()
58 {
59  m_parameters->add(&properties, "properties",
60  "Feature properties.");
61  m_parameters->add(&cache_size, "cache_size",
62  "Size of cache in MB.");
63 
64  m_parameters->add_vector((CSGObject***) &preproc,
65  &num_preproc, "preproc",
66  "List of preprocessors.");
67  m_parameters->add_vector(&preprocessed,
68  &num_preproc, "preprocessed",
69  "Feature[i] is already preprocessed.");
70 
71  m_parameters->add((CSGObject**)&m_subset, "subset", "Subset object");
72 
73  m_subset=NULL;
74  properties = FP_NONE;
75  cache_size = 0;
76  preproc = NULL;
77  num_preproc = 0;
78  preprocessed = NULL;
79 }
80 
83 {
84  SG_INFO( "%d preprocs currently, new preproc list is\n", num_preproc);
85  ASSERT(p);
86 
87  bool* preprocd=SG_MALLOC(bool, num_preproc+1);
88  CPreprocessor** pps=SG_MALLOC(CPreprocessor*, num_preproc+1);
89  for (int32_t i=0; i<num_preproc; i++)
90  {
91  pps[i]=preproc[i];
92  preprocd[i]=preprocessed[i];
93  }
94  SG_FREE(preproc);
95  SG_FREE(preprocessed);
96  preproc=pps;
97  preprocessed=preprocd;
98  preproc[num_preproc]=p;
99  preprocessed[num_preproc]=false;
100 
101  num_preproc++;
102 
103  for (int32_t i=0; i<num_preproc; i++)
104  SG_INFO( "preproc[%d]=%s %ld\n",i, preproc[i]->get_name(), preproc[i]) ;
105 
106  SG_REF(p);
107 
108  return num_preproc;
109 }
110 
113 {
114  if (num<num_preproc)
115  {
116  SG_REF(preproc[num]);
117  return preproc[num];
118  }
119  else
120  return NULL;
121 }
122 
125 {
126  int32_t num=0;
127 
128  for (int32_t i=0; i<num_preproc; i++)
129  {
130  if (preprocessed[i])
131  num++;
132  }
133 
134  return num;
135 }
136 
139 {
140  while (del_preprocessor(0));
141 }
142 
145 {
146  CPreprocessor** pps=NULL;
147  bool* preprocd=NULL;
148  CPreprocessor* removed_preproc=NULL;
149 
150  if (num_preproc>0 && num<num_preproc)
151  {
152  removed_preproc=preproc[num];
153 
154  if (num_preproc>1)
155  {
156  pps= SG_MALLOC(CPreprocessor*, num_preproc-1);
157  preprocd= SG_MALLOC(bool, num_preproc-1);
158 
159  if (pps && preprocd)
160  {
161  int32_t j=0;
162  for (int32_t i=0; i<num_preproc; i++)
163  {
164  if (i!=num)
165  {
166  pps[j]=preproc[i];
167  preprocd[j]=preprocessed[i];
168  j++;
169  }
170  }
171  }
172  }
173 
174  SG_FREE(preproc);
175  preproc=pps;
176  SG_FREE(preprocessed);
177  preprocessed=preprocd;
178 
179  num_preproc--;
180 
181  for (int32_t i=0; i<num_preproc; i++)
182  SG_INFO( "preproc[%d]=%s\n",i, preproc[i]->get_name()) ;
183  }
184 
185  SG_UNREF(removed_preproc);
186  return removed_preproc;
187 }
188 
190 {
191  preprocessed[num]=true;
192 }
193 
194 bool CFeatures::is_preprocessed(int32_t num)
195 {
196  return preprocessed[num];
197 }
198 
200 {
201  return num_preproc;
202 }
203 
205 {
206  return cache_size;
207 }
208 
209 bool CFeatures::reshape(int32_t num_features, int32_t num_vectors)
210 {
212  return false;
213 }
214 
216 {
217  SG_INFO( "%p - ", this);
218  switch (get_feature_class())
219  {
220  case C_UNKNOWN:
221  SG_INFO( "C_UNKNOWN ");
222  break;
223  case C_SIMPLE:
224  SG_INFO( "C_SIMPLE ");
225  break;
226  case C_SPARSE:
227  SG_INFO( "C_SPARSE ");
228  break;
229  case C_STRING:
230  SG_INFO( "C_STRING ");
231  break;
232  case C_COMBINED:
233  SG_INFO( "C_COMBINED ");
234  break;
235  case C_COMBINED_DOT:
236  SG_INFO( "C_COMBINED_DOT ");
237  break;
238  case C_WD:
239  SG_INFO( "C_WD ");
240  break;
241  case C_SPEC:
242  SG_INFO( "C_SPEC ");
243  break;
244  case C_WEIGHTEDSPEC:
245  SG_INFO( "C_WEIGHTEDSPEC ");
246  break;
247  case C_STREAMING_SIMPLE:
248  SG_INFO( "C_STREAMING_SIMPLE ");
249  break;
250  case C_STREAMING_SPARSE:
251  SG_INFO( "C_STREAMING_SPARSE ");
252  break;
253  case C_STREAMING_STRING:
254  SG_INFO( "C_STREAMING_STRING ");
255  break;
256  case C_STREAMING_VW:
257  SG_INFO( "C_STREAMING_VW ");
258  break;
259  case C_ANY:
260  SG_INFO( "C_ANY ");
261  break;
262  default:
263  SG_ERROR( "ERROR UNKNOWN FEATURE CLASS");
264  }
265 
266  switch (get_feature_type())
267  {
268  case F_UNKNOWN:
269  SG_INFO( "F_UNKNOWN \n");
270  break;
271  case F_CHAR:
272  SG_INFO( "F_CHAR \n");
273  break;
274  case F_BYTE:
275  SG_INFO( "F_BYTE \n");
276  break;
277  case F_SHORT:
278  SG_INFO( "F_SHORT \n");
279  break;
280  case F_WORD:
281  SG_INFO( "F_WORD \n");
282  break;
283  case F_INT:
284  SG_INFO( "F_INT \n");
285  break;
286  case F_UINT:
287  SG_INFO( "F_UINT \n");
288  break;
289  case F_LONG:
290  SG_INFO( "F_LONG \n");
291  break;
292  case F_ULONG:
293  SG_INFO( "F_ULONG \n");
294  break;
295  case F_SHORTREAL:
296  SG_INFO( "F_SHORTEAL \n");
297  break;
298  case F_DREAL:
299  SG_INFO( "F_DREAL \n");
300  break;
301  case F_LONGREAL:
302  SG_INFO( "F_LONGREAL \n");
303  break;
304  case F_ANY:
305  SG_INFO( "F_ANY \n");
306  break;
307  default:
308  SG_ERROR( "ERROR UNKNOWN FEATURE TYPE\n");
309  }
310 }
311 
312 
313 void CFeatures::load(CFile* loader)
314 {
318 }
319 
320 void CFeatures::save(CFile* writer)
321 {
325 }
326 
328 {
329  bool result=false;
330 
331  if (f)
332  result= ( (this->get_feature_class() == f->get_feature_class()) &&
333  (this->get_feature_type() == f->get_feature_type()));
334  return result;
335 }
336 
338 {
339  return (properties & p) != 0;
340 }
341 
343 {
344  properties |= p;
345 }
346 
348 {
349  properties &= (properties | p) ^ p;
350 }
351 
353 {
355  m_subset=subset;
356  SG_REF(subset);
358 }
359 
361 {
362  return m_subset ? m_subset->subset_idx_conversion(idx) : idx;
363 }
364 
366 {
367  return m_subset!=NULL;
368 }
369 
371 {
372  set_subset(NULL);
373 }
374 
376 {
377  SG_ERROR("copy_subset and therefore model storage of CMachine "
378  "(required for cross-validation and model-selection is ",
379  "not yet implemented for feature type %s\n", get_name());
380  return NULL;
381 }

SHOGUN Machine Learning Toolbox - Documentation