SHOGUN  v1.1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
WeightedDegreeStringKernel.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 1999-2009 Soeren Sonnenburg
8  * Written (W) 1999-2008 Gunnar Raetsch
9  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
10  */
11 
12 #include <shogun/lib/common.h>
13 #include <shogun/io/SGIO.h>
14 #include <shogun/lib/Signal.h>
15 #include <shogun/lib/Trie.h>
16 #include <shogun/base/Parameter.h>
17 #include <shogun/base/Parallel.h>
18 
23 
24 #ifndef WIN32
25 #include <pthread.h>
26 #endif
27 
28 using namespace shogun;
29 
30 #ifndef DOXYGEN_SHOULD_SKIP_THIS
31 struct S_THREAD_PARAM
32 {
33 
34  int32_t* vec;
35  float64_t* result;
36  float64_t* weights;
38  CTrie<DNATrie>* tries;
39  float64_t factor;
40  int32_t j;
41  int32_t start;
42  int32_t end;
43  int32_t length;
44  int32_t* vec_idx;
45 };
46 #endif // DOXYGEN_SHOULD_SKIP_THIS
47 
49 : CStringKernel<char>()
50 {
51  init();
52 }
53 
54 
56  int32_t d, EWDKernType t)
57 : CStringKernel<char>()
58 {
59  init();
60 
61  degree=d;
62  type=t;
63 
64  if (type!=E_EXTERNAL)
66 }
67 
69  float64_t *w, int32_t d)
70 : CStringKernel<char>(10)
71 {
72  init();
73 
75  degree=d;
76 
80 
81  for (int32_t i=0; i<degree*(1+max_mismatch); i++)
82  weights[i]=w[i];
83 }
84 
87 : CStringKernel<char>(10)
88 {
89  init();
90  degree=d;
91  type=E_WD;
94  init(l, r);
95 }
96 
98 {
99  cleanup();
100 
101  SG_FREE(weights);
102  weights=NULL;
103  weights_degree=0;
104  weights_length=0;
105 
107  block_weights=NULL;
108 
110  position_weights=NULL;
111 
113  weights_buffer=NULL;
114 }
115 
116 
118 {
119  SG_DEBUG( "deleting CWeightedDegreeStringKernel optimization\n");
121 
122  if (tries!=NULL)
123  tries->destroy();
124 
126 }
127 
129 {
130  ASSERT(lhs);
131 
132  seq_length=((CStringFeatures<char>*) lhs)->get_max_vector_length();
133 
134  if (tries!=NULL)
135  {
136  tries->destroy() ;
138  }
139 }
140 
141 bool CWeightedDegreeStringKernel::init(CFeatures* l, CFeatures* r)
142 {
143  int32_t lhs_changed=(lhs!=l);
144  int32_t rhs_changed=(rhs!=r);
145 
147 
148  SG_DEBUG("lhs_changed: %i\n", lhs_changed);
149  SG_DEBUG("rhs_changed: %i\n", rhs_changed);
150 
153 
154  int32_t len=sf_l->get_max_vector_length();
155  if (lhs_changed && !sf_l->have_same_length(len))
156  SG_ERROR("All strings in WD kernel must have same length (lhs wrong)!\n");
157 
158  if (rhs_changed && !sf_r->have_same_length(len))
159  SG_ERROR("All strings in WD kernel must have same length (rhs wrong)!\n");
160 
162  alphabet=sf_l->get_alphabet();
163  CAlphabet* ralphabet=sf_r->get_alphabet();
164 
165  if (!((alphabet->get_alphabet()==DNA) || (alphabet->get_alphabet()==RNA)))
166  properties &= ((uint64_t) (-1)) ^ (KP_LINADD | KP_BATCHEVALUATION);
167 
168  ASSERT(ralphabet->get_alphabet()==alphabet->get_alphabet());
169  SG_UNREF(ralphabet);
170 
171  if (tries!=NULL) {
173  SG_UNREF(tries);
174  }
177 
179 
180  return init_normalizer();
181 }
182 
184 {
185  SG_DEBUG("deleting CWeightedDegreeStringKernel optimization\n");
187 
189  block_weights=NULL;
190 
191  if (tries!=NULL)
192  {
193  tries->destroy();
194  SG_UNREF(tries);
195  tries=NULL;
196  }
197 
198  seq_length=0;
199  tree_initialized = false;
200 
202  alphabet=NULL;
203 
205 }
206 
207 bool CWeightedDegreeStringKernel::init_optimization(int32_t count, int32_t* IDX, float64_t* alphas, int32_t tree_num)
208 {
209  if (tree_num<0)
210  SG_DEBUG( "deleting CWeightedDegreeStringKernel optimization\n");
211 
213 
214  if (tree_num<0)
215  SG_DEBUG( "initializing CWeightedDegreeStringKernel optimization\n") ;
216 
217  for (int32_t i=0; i<count; i++)
218  {
219  if (tree_num<0)
220  {
221  if ( (i % (count/10+1)) == 0)
222  SG_PROGRESS(i, 0, count);
223 
224  if (max_mismatch==0)
225  add_example_to_tree(IDX[i], alphas[i]) ;
226  else
227  add_example_to_tree_mismatch(IDX[i], alphas[i]) ;
228 
229  //SG_DEBUG( "number of used trie nodes: %i\n", tries.get_num_used_nodes()) ;
230  }
231  else
232  {
233  if (max_mismatch==0)
234  add_example_to_single_tree(IDX[i], alphas[i], tree_num) ;
235  else
236  add_example_to_single_tree_mismatch(IDX[i], alphas[i], tree_num) ;
237  }
238  }
239 
240  if (tree_num<0)
241  SG_DONE();
242 
243  //tries.compact_nodes(NO_CHILD, 0, weights) ;
244 
245  set_is_initialized(true) ;
246  return true ;
247 }
248 
250 {
251  if (get_is_initialized())
252  {
253  if (tries!=NULL)
255  set_is_initialized(false);
256  return true;
257  }
258 
259  return false;
260 }
261 
262 
264  char* avec, int32_t alen, char* bvec, int32_t blen)
265 {
266  float64_t sum = 0.0;
267 
268  for (int32_t i=0; i<alen; i++)
269  {
270  float64_t sumi = 0.0;
271  int32_t mismatches=0;
272 
273  for (int32_t j=0; (i+j<alen) && (j<degree); j++)
274  {
275  if (avec[i+j]!=bvec[i+j])
276  {
277  mismatches++ ;
278  if (mismatches>max_mismatch)
279  break ;
280  } ;
281  sumi += weights[j+degree*mismatches];
282  }
283  if (position_weights!=NULL)
284  sum+=position_weights[i]*sumi ;
285  else
286  sum+=sumi ;
287  }
288  return sum ;
289 }
290 
292  char* avec, int32_t alen, char* bvec, int32_t blen)
293 {
294  ASSERT(alen==blen);
295 
296  float64_t sum=0;
297  int32_t match_len=-1;
298 
299  for (int32_t i=0; i<alen; i++)
300  {
301  if (avec[i]==bvec[i])
302  match_len++;
303  else
304  {
305  if (match_len>=0)
306  sum+=block_weights[match_len];
307  match_len=-1;
308  }
309  }
310 
311  if (match_len>=0)
312  sum+=block_weights[match_len];
313 
314  return sum;
315 }
316 
318  char* avec, int32_t alen, char* bvec, int32_t blen)
319 {
320  float64_t sum = 0.0;
321 
322  for (int32_t i=0; i<alen; i++)
323  {
324  float64_t sumi = 0.0;
325 
326  for (int32_t j=0; (i+j<alen) && (j<degree); j++)
327  {
328  if (avec[i+j]!=bvec[i+j])
329  break ;
330  sumi += weights[j];
331  }
332  if (position_weights!=NULL)
333  sum+=position_weights[i]*sumi ;
334  else
335  sum+=sumi ;
336  }
337  return sum ;
338 }
339 
341  char* avec, int32_t alen, char* bvec, int32_t blen)
342 {
343  float64_t sum = 0.0;
344 
345  for (int32_t i=0; i<alen; i++)
346  {
347  float64_t sumi=0.0;
348  for (int32_t j=0; (i+j<alen) && (j<degree); j++)
349  {
350  if (avec[i+j]!=bvec[i+j])
351  break;
352  sumi += weights[i*degree+j];
353  }
354  if (position_weights!=NULL)
355  sum += position_weights[i]*sumi ;
356  else
357  sum += sumi ;
358  }
359 
360  return sum ;
361 }
362 
363 
364 float64_t CWeightedDegreeStringKernel::compute(int32_t idx_a, int32_t idx_b)
365 {
366  int32_t alen, blen;
367  bool free_avec, free_bvec;
368  char* avec=((CStringFeatures<char>*) lhs)->get_feature_vector(idx_a, alen, free_avec);
369  char* bvec=((CStringFeatures<char>*) rhs)->get_feature_vector(idx_b, blen, free_bvec);
370  float64_t result=0;
371 
372  if (max_mismatch==0 && length==0 && block_computation)
373  result=compute_using_block(avec, alen, bvec, blen);
374  else
375  {
376  if (max_mismatch>0)
377  result=compute_with_mismatch(avec, alen, bvec, blen);
378  else if (length==0)
379  result=compute_without_mismatch(avec, alen, bvec, blen);
380  else
381  result=compute_without_mismatch_matrix(avec, alen, bvec, blen);
382  }
383  ((CStringFeatures<char>*) lhs)->free_feature_vector(avec, idx_a, free_avec);
384  ((CStringFeatures<char>*) rhs)->free_feature_vector(bvec, idx_b, free_bvec);
385 
386  return result;
387 }
388 
389 
391  int32_t idx, float64_t alpha)
392 {
393  ASSERT(alphabet);
395 
396  int32_t len=0;
397  bool free_vec;
398  char* char_vec=((CStringFeatures<char>*) lhs)->get_feature_vector(idx, len, free_vec);
399  ASSERT(max_mismatch==0);
400  int32_t *vec=SG_MALLOC(int32_t, len);
401 
402  for (int32_t i=0; i<len; i++)
403  vec[i]=alphabet->remap_to_bin(char_vec[i]);
404  ((CStringFeatures<char>*) lhs)->free_feature_vector(char_vec, idx, free_vec);
405 
406  if (length == 0 || max_mismatch > 0)
407  {
408  for (int32_t i=0; i<len; i++)
409  {
410  float64_t alpha_pw=alpha;
411  /*if (position_weights!=NULL)
412  alpha_pw *= position_weights[i] ;*/
413  if (alpha_pw==0.0)
414  continue;
415  ASSERT(tries);
416  tries->add_to_trie(i, 0, vec, normalizer->normalize_lhs(alpha_pw, idx), weights, (length!=0));
417  }
418  }
419  else
420  {
421  for (int32_t i=0; i<len; i++)
422  {
423  float64_t alpha_pw=alpha;
424  /*if (position_weights!=NULL)
425  alpha_pw = alpha*position_weights[i] ;*/
426  if (alpha_pw==0.0)
427  continue ;
428  ASSERT(tries);
429  tries->add_to_trie(i, 0, vec, normalizer->normalize_lhs(alpha_pw, idx), weights, (length!=0));
430  }
431  }
432  SG_FREE(vec);
433  tree_initialized=true ;
434 }
435 
437  int32_t idx, float64_t alpha, int32_t tree_num)
438 {
439  ASSERT(alphabet);
441 
442  int32_t len;
443  bool free_vec;
444  char* char_vec=((CStringFeatures<char>*) lhs)->get_feature_vector(idx, len, free_vec);
445  ASSERT(max_mismatch==0);
446  int32_t *vec = SG_MALLOC(int32_t, len);
447 
448  for (int32_t i=tree_num; i<tree_num+degree && i<len; i++)
449  vec[i]=alphabet->remap_to_bin(char_vec[i]);
450  ((CStringFeatures<char>*) lhs)->free_feature_vector(char_vec, idx, free_vec);
451 
452 
453  ASSERT(tries);
454  if (alpha!=0.0)
455  tries->add_to_trie(tree_num, 0, vec, normalizer->normalize_lhs(alpha, idx), weights, (length!=0));
456 
457  SG_FREE(vec);
458  tree_initialized=true ;
459 }
460 
462 {
463  ASSERT(tries);
464  ASSERT(alphabet);
466 
467  int32_t len ;
468  bool free_vec;
469  char* char_vec=((CStringFeatures<char>*) lhs)->get_feature_vector(idx, len, free_vec);
470 
471  int32_t *vec = SG_MALLOC(int32_t, len);
472 
473  for (int32_t i=0; i<len; i++)
474  vec[i]=alphabet->remap_to_bin(char_vec[i]);
475  ((CStringFeatures<char>*) lhs)->free_feature_vector(char_vec, idx, free_vec);
476 
477  for (int32_t i=0; i<len; i++)
478  {
479  if (alpha!=0.0)
480  tries->add_example_to_tree_mismatch_recursion(NO_CHILD, i, normalizer->normalize_lhs(alpha, idx), &vec[i], len-i, 0, 0, max_mismatch, weights);
481  }
482 
483  SG_FREE(vec);
484  tree_initialized=true ;
485 }
486 
488  int32_t idx, float64_t alpha, int32_t tree_num)
489 {
490  ASSERT(tries);
491  ASSERT(alphabet);
493 
494  int32_t len=0;
495  bool free_vec;
496  char* char_vec=((CStringFeatures<char>*) lhs)->get_feature_vector(idx, len, free_vec);
497  int32_t *vec=SG_MALLOC(int32_t, len);
498 
499  for (int32_t i=tree_num; i<len && i<tree_num+degree; i++)
500  vec[i]=alphabet->remap_to_bin(char_vec[i]);
501  ((CStringFeatures<char>*) lhs)->free_feature_vector(char_vec, idx, free_vec);
502 
503  if (alpha!=0.0)
504  {
506  NO_CHILD, tree_num, normalizer->normalize_lhs(alpha, idx), &vec[tree_num], len-tree_num,
507  0, 0, max_mismatch, weights);
508  }
509 
510  SG_FREE(vec);
511  tree_initialized=true;
512 }
513 
514 
516 {
517  ASSERT(alphabet);
519 
520  int32_t len=0;
521  bool free_vec;
522  char* char_vec=((CStringFeatures<char>*) rhs)->get_feature_vector(idx, len, free_vec);
523  ASSERT(char_vec && len>0);
524  int32_t *vec=SG_MALLOC(int32_t, len);
525 
526  for (int32_t i=0; i<len; i++)
527  vec[i]=alphabet->remap_to_bin(char_vec[i]);
528  ((CStringFeatures<char>*) lhs)->free_feature_vector(char_vec, idx, free_vec);
529 
530  float64_t sum=0;
531  ASSERT(tries);
532  for (int32_t i=0; i<len; i++)
533  sum+=tries->compute_by_tree_helper(vec, len, i, i, i, weights, (length!=0));
534 
535  SG_FREE(vec);
536  return normalizer->normalize_rhs(sum, idx);
537 }
538 
540  int32_t idx, float64_t* LevelContrib)
541 {
542  ASSERT(alphabet);
544 
545  int32_t len ;
546  bool free_vec;
547  char* char_vec=((CStringFeatures<char>*) rhs)->get_feature_vector(idx, len, free_vec);
548 
549  int32_t *vec = SG_MALLOC(int32_t, len);
550 
551  for (int32_t i=0; i<len; i++)
552  vec[i]=alphabet->remap_to_bin(char_vec[i]);
553  ((CStringFeatures<char>*) lhs)->free_feature_vector(char_vec, idx, free_vec);
554 
555  ASSERT(tries);
556  for (int32_t i=0; i<len; i++)
557  {
558  tries->compute_by_tree_helper(vec, len, i, i, i, LevelContrib,
559  normalizer->normalize_rhs(1.0, idx),
560  mkl_stepsize, weights, (length!=0));
561  }
562 
563  SG_FREE(vec);
564 }
565 
567 {
568  ASSERT(tries);
569  return tries->compute_abs_weights(len);
570 }
571 
573 {
574  ASSERT(degree>0);
575  ASSERT(p_type==E_WD);
576 
577  SG_FREE(weights);
580  weights_length=1;
581 
582  if (weights)
583  {
584  int32_t i;
585  float64_t sum=0;
586  for (i=0; i<degree; i++)
587  {
588  weights[i]=degree-i;
589  sum+=weights[i];
590  }
591  for (i=0; i<degree; i++)
592  weights[i]/=sum;
593 
594  for (i=0; i<degree; i++)
595  {
596  for (int32_t j=1; j<=max_mismatch; j++)
597  {
598  if (j<i+1)
599  {
600  int32_t nk=CMath::nchoosek(i+1, j);
601  weights[i+j*degree]=weights[i]/(nk*CMath::pow(3.0,j));
602  }
603  else
604  weights[i+j*degree]= 0;
605  }
606  }
607 
608  if (which_degree>=0)
609  {
610  ASSERT(which_degree<degree);
611  for (i=0; i<degree; i++)
612  {
613  if (i!=which_degree)
614  weights[i]=0;
615  else
616  weights[i]=1;
617  }
618  }
619  return true;
620  }
621  else
622  return false;
623 }
624 
626 {
627  float64_t* ws=new_weights.matrix;
628  int32_t d=new_weights.num_rows;
629  int32_t len=new_weights.num_cols;
630 
631  if (d!=degree || len<0)
632  SG_ERROR("WD: Dimension mismatch (should be (seq_length | 1) x degree) got (%d x %d)\n", len, degree);
633 
634  degree=d;
635  length=len;
636 
637  if (len <= 0)
638  len=1;
639 
642 
643 
644  SG_DEBUG("Creating weights of size %dx%d\n", weights_degree, weights_length);
645  int32_t num_weights=weights_degree*weights_length;
646  SG_FREE(weights);
647  weights=SG_MALLOC(float64_t, num_weights);
648 
649  for (int32_t i=0; i<degree*len; i++)
650  weights[i]=ws[i];
651 
652  return true;
653 }
654 
656  float64_t* pws, int32_t len)
657 {
658  if (len==0)
659  {
661  position_weights=NULL;
662  ASSERT(tries);
664  }
665 
666  if (seq_length!=len)
667  SG_ERROR("seq_length = %i, position_weights_length=%i\n", seq_length, len);
668 
672  ASSERT(tries);
674 
675  if (position_weights)
676  {
677  for (int32_t i=0; i<len; i++)
678  position_weights[i]=pws[i];
679  return true;
680  }
681  else
682  return false;
683 }
684 
686 {
689 
690  int32_t k;
691  float64_t d=degree; // use float to evade rounding errors below
692 
693  for (k=0; k<degree; k++)
694  block_weights[k]=
695  (-CMath::pow(k, 3)+(3*d-3)*CMath::pow(k, 2)+(9*d-2)*k+6*d)/(3*d*(d+1));
696  for (k=degree; k<seq_length; k++)
697  block_weights[k]=(-d+3*k+4)/3;
698 
699  return true;
700 }
701 
703 {
704  ASSERT(weights);
707 
708  int32_t i=0;
709  block_weights[0]=weights[0];
710  for (i=1; i<CMath::max(seq_length,degree); i++)
711  block_weights[i]=0;
712 
713  for (i=1; i<CMath::max(seq_length,degree); i++)
714  {
716 
717  float64_t contrib=0;
718  for (int32_t j=0; j<CMath::min(degree,i+1); j++)
719  contrib+=weights[j];
720 
721  block_weights[i]+=contrib;
722  }
723  return true;
724 }
725 
727 {
730 
731  for (int32_t i=1; i<seq_length+1 ; i++)
732  block_weights[i-1]=1.0/seq_length;
733  return true;
734 }
735 
737 {
740 
741  for (int32_t i=1; i<seq_length+1 ; i++)
742  block_weights[i-1]=degree*i;
743 
744  return true;
745 }
746 
748 {
751 
752  for (int32_t i=1; i<degree+1 ; i++)
753  block_weights[i-1]=((float64_t) i)*i;
754 
755  for (int32_t i=degree+1; i<seq_length+1 ; i++)
756  block_weights[i-1]=i;
757 
758  return true;
759 }
760 
762 {
765 
766  for (int32_t i=1; i<degree+1 ; i++)
767  block_weights[i-1]=((float64_t) i)*i*i;
768 
769  for (int32_t i=degree+1; i<seq_length+1 ; i++)
770  block_weights[i-1]=i;
771  return true;
772 }
773 
775 {
778 
779  for (int32_t i=1; i<degree+1 ; i++)
780  block_weights[i-1]=exp(((float64_t) i/10.0));
781 
782  for (int32_t i=degree+1; i<seq_length+1 ; i++)
783  block_weights[i-1]=i;
784 
785  return true;
786 }
787 
789 {
792 
793  for (int32_t i=1; i<degree+1 ; i++)
795 
796  for (int32_t i=degree+1; i<seq_length+1 ; i++)
797  block_weights[i-1]=i-degree+1+CMath::pow(CMath::log(degree+1.0),2);
798 
799  return true;
800 }
801 
803 {
804  switch (type)
805  {
806  case E_WD:
808  case E_EXTERNAL:
810  case E_BLOCK_CONST:
811  return init_block_weights_const();
812  case E_BLOCK_LINEAR:
813  return init_block_weights_linear();
814  case E_BLOCK_SQPOLY:
815  return init_block_weights_sqpoly();
816  case E_BLOCK_CUBICPOLY:
818  case E_BLOCK_EXP:
819  return init_block_weights_exp();
820  case E_BLOCK_LOG:
821  return init_block_weights_log();
822  };
823  return false;
824 }
825 
826 
828 {
829  S_THREAD_PARAM* params = (S_THREAD_PARAM*) p;
830  int32_t j=params->j;
831  CWeightedDegreeStringKernel* wd=params->kernel;
832  CTrie<DNATrie>* tries=params->tries;
833  float64_t* weights=params->weights;
834  int32_t length=params->length;
835  int32_t* vec=params->vec;
836  float64_t* result=params->result;
837  float64_t factor=params->factor;
838  int32_t* vec_idx=params->vec_idx;
839 
841  CAlphabet* alpha=wd->alphabet;
842 
843  for (int32_t i=params->start; i<params->end; i++)
844  {
845  int32_t len=0;
846  bool free_vec;
847  char* char_vec=rhs_feat->get_feature_vector(vec_idx[i], len, free_vec);
848  for (int32_t k=j; k<CMath::min(len,j+wd->get_degree()); k++)
849  vec[k]=alpha->remap_to_bin(char_vec[k]);
850  rhs_feat->free_feature_vector(char_vec, vec_idx[i], free_vec);
851 
852  ASSERT(tries);
853 
854  result[i]+=factor*
855  wd->normalizer->normalize_rhs(tries->compute_by_tree_helper(vec, len, j, j, j, weights, (length!=0)), vec_idx[i]);
856  }
857 
858  SG_UNREF(rhs_feat);
859 
860  return NULL;
861 }
862 
864  int32_t num_vec, int32_t* vec_idx, float64_t* result, int32_t num_suppvec,
865  int32_t* IDX, float64_t* alphas, float64_t factor)
866 {
867  ASSERT(tries);
868  ASSERT(alphabet);
870  ASSERT(rhs);
871  ASSERT(num_vec<=rhs->get_num_vectors());
872  ASSERT(num_vec>0);
873  ASSERT(vec_idx);
874  ASSERT(result);
876 
877  int32_t num_feat=((CStringFeatures<char>*) rhs)->get_max_vector_length();
878  ASSERT(num_feat>0);
879  int32_t num_threads=parallel->get_num_threads();
880  ASSERT(num_threads>0);
881  int32_t* vec=SG_MALLOC(int32_t, num_threads*num_feat);
882 
883  if (num_threads < 2)
884  {
885 #ifdef CYGWIN
886  for (int32_t j=0; j<num_feat; j++)
887 #else
889  for (int32_t j=0; j<num_feat && !CSignal::cancel_computations(); j++)
890 #endif
891  {
892  init_optimization(num_suppvec, IDX, alphas, j);
893  S_THREAD_PARAM params;
894  params.vec=vec;
895  params.result=result;
896  params.weights=weights;
897  params.kernel=this;
898  params.tries=tries;
899  params.factor=factor;
900  params.j=j;
901  params.start=0;
902  params.end=num_vec;
903  params.length=length;
904  params.vec_idx=vec_idx;
905  compute_batch_helper((void*) &params);
906 
907  SG_PROGRESS(j,0,num_feat);
908  }
909  }
910 #ifndef WIN32
911  else
912  {
914  for (int32_t j=0; j<num_feat && !CSignal::cancel_computations(); j++)
915  {
916  init_optimization(num_suppvec, IDX, alphas, j);
917  pthread_t* threads = SG_MALLOC(pthread_t, num_threads-1);
918  S_THREAD_PARAM* params = SG_MALLOC(S_THREAD_PARAM, num_threads);
919  int32_t step= num_vec/num_threads;
920  int32_t t;
921 
922  for (t=0; t<num_threads-1; t++)
923  {
924  params[t].vec=&vec[num_feat*t];
925  params[t].result=result;
926  params[t].weights=weights;
927  params[t].kernel=this;
928  params[t].tries=tries;
929  params[t].factor=factor;
930  params[t].j=j;
931  params[t].start = t*step;
932  params[t].end = (t+1)*step;
933  params[t].length=length;
934  params[t].vec_idx=vec_idx;
935  pthread_create(&threads[t], NULL, CWeightedDegreeStringKernel::compute_batch_helper, (void*)&params[t]);
936  }
937  params[t].vec=&vec[num_feat*t];
938  params[t].result=result;
939  params[t].weights=weights;
940  params[t].kernel=this;
941  params[t].tries=tries;
942  params[t].factor=factor;
943  params[t].j=j;
944  params[t].start=t*step;
945  params[t].end=num_vec;
946  params[t].length=length;
947  params[t].vec_idx=vec_idx;
948  compute_batch_helper((void*) &params[t]);
949 
950  for (t=0; t<num_threads-1; t++)
951  pthread_join(threads[t], NULL);
952  SG_PROGRESS(j,0,num_feat);
953 
954  SG_FREE(params);
955  SG_FREE(threads);
956  }
957  }
958 #endif
959 
960  SG_FREE(vec);
961 
962  //really also free memory as this can be huge on testing especially when
963  //using the combined kernel
965 }
966 
968 {
969  if (type==E_EXTERNAL && max!=0)
970  return false;
971 
972  max_mismatch=max;
973 
974  if (lhs!=NULL && rhs!=NULL)
975  return init(lhs, rhs);
976  else
977  return true;
978 }
979 
980 void CWeightedDegreeStringKernel::init()
981 {
982  weights=NULL;
983  weights_degree=0;
984  weights_length=0;
985 
986  position_weights=NULL;
988 
989  weights_buffer=NULL;
990  mkl_stepsize=1;
991  degree=1;
992  length=0;
993 
994  max_mismatch=0;
995  seq_length=0;
996 
997  block_weights=NULL;
998  block_computation=true;
999  type=E_WD;
1000  which_degree=-1;
1001  tries=NULL;
1002 
1003  tree_initialized=false;
1004  alphabet=NULL;
1005 
1006  lhs=NULL;
1007  rhs=NULL;
1008 
1010 
1012 
1014  "weights", "WD Kernel weights.");
1016  "position_weights",
1017  "Weights per position.");
1018  m_parameters->add(&mkl_stepsize, "mkl_stepsize", "MKL step size.");
1019  m_parameters->add(&degree, "degree", "Order of WD kernel.");
1020  m_parameters->add(&max_mismatch, "max_mismatch",
1021  "Number of allowed mismatches.");
1022  m_parameters->add(&block_computation, "block_computation",
1023  "If block computation shall be used.");
1024  m_parameters->add((machine_int_t*) &type, "type",
1025  "WeightedDegree kernel type.");
1026  m_parameters->add(&which_degree, "which_degree",
1027  "Unqueal -1 if just a single degree is selected.");
1028  m_parameters->add((CSGObject**) &alphabet, "alphabet",
1029  "Alphabet of Features.");
1030 }

SHOGUN Machine Learning Toolbox - Documentation