SHOGUN  v1.1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
GUIHMM.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 1999-2008 Soeren Sonnenburg
8  * Written (W) 1999-2008 Gunnar Raetsch
9  * Copyright (C) 1999-2008 Fraunhofer Institute FIRST and Max-Planck-Society
10  */
11 
12 #include <shogun/ui/GUIHMM.h>
13 #include <shogun/ui/SGInterface.h>
14 
15 #include <shogun/lib/config.h>
16 #include <shogun/lib/common.h>
18 #include <shogun/features/Labels.h>
19 
20 #include <unistd.h>
21 
22 using namespace shogun;
23 
24 CGUIHMM::CGUIHMM(CSGInterface* ui_)
25 : CSGObject(), ui(ui_)
26 {
27  working=NULL;
28 
29  pos=NULL;
30  neg=NULL;
31  test=NULL;
32 
33  PSEUDO=1e-10;
34  M=4;
35 }
36 
38 {
40 }
41 
42 bool CGUIHMM::new_hmm(int32_t n, int32_t m)
43 {
45  working=new CHMM(n, m, NULL, PSEUDO);
46  M=m;
47  return true;
48 }
49 
51 {
52  if (!working)
53  SG_ERROR("Create HMM first.\n");
54 
55  CFeatures* trainfeatures=ui->ui_features->get_train_features();
56  if (!trainfeatures)
57  SG_ERROR("Assign train features first.\n");
58  if (trainfeatures->get_feature_type()!=F_WORD ||
59  trainfeatures->get_feature_class()!=C_STRING)
60  SG_ERROR("Features must be STRING of type WORD.\n");
61 
63  SG_DEBUG("Stringfeatures have %ld orig_symbols %ld symbols %d order %ld max_symbols\n", (int64_t) sf->get_original_num_symbols(), (int64_t) sf->get_num_symbols(), sf->get_order(), (int64_t) sf->get_max_num_symbols());
64 
66 
68 }
69 
70 
72 {
73  if (!working)
74  SG_ERROR("Create HMM first.\n");
75 
76  CFeatures* trainfeatures=ui->ui_features->get_train_features();
77  if (!trainfeatures)
78  SG_ERROR("Assign train features first.\n");
79  if (trainfeatures->get_feature_type()!=F_WORD ||
80  trainfeatures->get_feature_class()!=C_STRING)
81  SG_ERROR("Features must be STRING of type WORD.\n");
82 
84 
86 }
87 
88 
90 {
91  if (!working)
92  SG_ERROR("Create HMM first.\n");
93  if (!working->get_observations())
94  SG_ERROR("Assign observation first.\n");
95 
97 }
98 
100 {
101  if (!working)
102  SG_ERROR("Create HMM first.\n");
103  if (!working->get_observations())
104  SG_ERROR("Assign observation first.\n");
105 
107 }
108 
110 {
111  if (!working)
112  SG_ERROR("Create HMM first.\n");
113  if (!working->get_observations())
114  SG_ERROR("Assign observation first.\n");
115 
117 }
118 
119 bool CGUIHMM::linear_train(char align)
120 {
121  if (!working)
122  SG_ERROR("Create HMM first.\n");
123 
124  CFeatures* trainfeatures=ui->ui_features->get_train_features();
125  if (!trainfeatures)
126  SG_ERROR("Assign train features first.\n");
127  if (trainfeatures->get_feature_type()!=F_WORD ||
128  trainfeatures->get_feature_class()!=C_STRING)
129  SG_ERROR("Features must be STRING of type WORD.\n");
130 
132  ui_features->get_train_features());
133 
134  bool right_align=false;
135  if (align=='r')
136  {
137  SG_INFO("Using alignment to right.\n");
138  right_align=true;
139  }
140  else
141  SG_INFO("Using alignment to left.\n");
142  working->linear_train(right_align);
143 
144  return true;
145 }
146 
148 {
150  ui_features->get_test_features();
151  ASSERT(obs);
152  int32_t num_vec=obs->get_num_vectors();
153 
154  //CStringFeatures<uint16_t>* old_pos=pos->get_observations();
155  //CStringFeatures<uint16_t>* old_neg=neg->get_observations();
156 
157  pos->set_observations(obs);
158  neg->set_observations(obs);
159 
160  if (!result)
161  result=new CLabels(num_vec);
162 
163  for (int32_t i=0; i<num_vec; i++)
164  result->set_label(i, pos->model_probability(i) - neg->model_probability(i));
165 
166  //pos->set_observations(old_pos);
167  //neg->set_observations(old_neg);
168  return result;
169 }
170 
172 {
174  ui_features->get_test_features();
175  ASSERT(obs);
176 
177  //CStringFeatures<uint16_t>* old_pos=pos->get_observations();
178  //CStringFeatures<uint16_t>* old_neg=neg->get_observations();
179 
180  pos->set_observations(obs);
181  neg->set_observations(obs);
182 
183  float64_t result=pos->model_probability(idx) - neg->model_probability(idx);
184  //pos->set_observations(old_pos);
185  //neg->set_observations(old_neg);
186  return result;
187 }
188 
190 {
191  ASSERT(working);
192 
194  ui_features->get_test_features();
195  ASSERT(obs);
196  int32_t num_vec=obs->get_num_vectors();
197 
198  //CStringFeatures<uint16_t>* old_pos=working->get_observations();
200 
201  if (!result)
202  result=new CLabels(num_vec);
203 
204  for (int32_t i=0; i<num_vec; i++)
205  result->set_label(i, working->model_probability(i));
206 
207  //working->set_observations(old_pos);
208  return result;
209 }
210 
212 {
213  ASSERT(working);
214 
216  ui_features->get_test_features();
217  ASSERT(obs);
218  int32_t num_vec=obs->get_num_vectors();
219 
220  //CStringFeatures<uint16_t>* old_pos=working->get_observations();
222 
223  if (!result)
224  result=new CLabels(num_vec);
225 
226  for (int32_t i=0; i<num_vec; i++)
228 
229  //working->set_observations(old_pos);
230  return result;
231 }
232 
233 
235 {
236  ASSERT(working);
237 
239  ui_features->get_test_features();
240  ASSERT(obs);
241 
242  //CStringFeatures<uint16_t>* old_pos=pos->get_observations();
243 
244  pos->set_observations(obs);
245  neg->set_observations(obs);
246 
247  float64_t result=working->model_probability(idx);
248  //working->set_observations(old_pos);
249  return result;
250 }
251 
252 bool CGUIHMM::append_model(char* filename, int32_t base1, int32_t base2)
253 {
254  if (!working)
255  SG_ERROR("Create HMM first.\n");
256  if (!filename)
257  SG_ERROR("Invalid filename.\n");
258 
259  FILE* model_file=fopen(filename, "r");
260  if (!model_file)
261  SG_ERROR("Opening file %s failed.\n", filename);
262 
263  CHMM* h=new CHMM(model_file,PSEUDO);
264  if (!h || !h->get_status())
265  {
266  SG_UNREF(h);
267  fclose(model_file);
268  SG_ERROR("Reading file %s failed.\n", filename);
269  }
270 
271  fclose(model_file);
272  SG_INFO("File %s successfully read.\n", filename);
273 
274  SG_DEBUG("h %d , M: %d\n", h, h->get_M());
275  if (base1!=-1 && base2!=-1)
276  {
277  float64_t* cur_o=SG_MALLOC(float64_t, h->get_M());
278  float64_t* app_o=SG_MALLOC(float64_t, h->get_M());
279 
280  for (int32_t i=0; i<h->get_M(); i++)
281  {
282  if (i==base1)
283  cur_o[i]=0;
284  else
285  cur_o[i]=-1000;
286 
287  if (i==base2)
288  app_o[i]=0;
289  else
290  app_o[i]=-1000;
291  }
292 
293  working->append_model(h, cur_o, app_o);
294 
295  SG_FREE(cur_o);
296  SG_FREE(app_o);
297  }
298  else
299  working->append_model(h);
300 
301  SG_UNREF(h);
302  SG_INFO("New model has %i states.\n", working->get_N());
303  return true;
304 }
305 
306 bool CGUIHMM::add_states(int32_t num_states, float64_t value)
307 {
308  if (!working)
309  SG_ERROR("Create HMM first.\n");
310 
311  working->add_states(num_states, value);
312  SG_INFO("New model has %i states, value %f.\n", working->get_N(), value);
313  return true;
314 }
315 
317 {
318  PSEUDO=pseudo;
319  SG_INFO("Current setting: pseudo=%e.\n", PSEUDO);
320  return true;
321 }
322 
323 bool CGUIHMM::convergence_criteria(int32_t num_iterations, float64_t epsilon)
324 {
325  if (!working)
326  SG_ERROR("Create HMM first.\n");
327 
328  working->set_iterations(num_iterations);
329  working->set_epsilon(epsilon);
330 
331  SG_INFO("Current HMM convergence criteria: iterations=%i, epsilon=%e\n", working->get_iterations(), working->get_epsilon());
332  return true;
333 }
334 
335 bool CGUIHMM::set_hmm_as(char* target)
336 {
337  if (!working)
338  SG_ERROR("Create HMM first!\n");
339 
340  if (strncmp(target, "POS", 3)==0)
341  {
342  SG_UNREF(pos);
343  pos=working;
344  working=NULL;
345  }
346  else if (strncmp(target, "NEG", 3)==0)
347  {
348  SG_UNREF(neg);
349  neg=working;
350  working=NULL;
351  }
352  else if (strncmp(target, "TEST", 4)==0)
353  {
354  SG_UNREF(test);
355  test=working;
356  working=NULL;
357  }
358  else
359  SG_ERROR("Target POS|NEG|TEST is missing.\n");
360 
361  return true;
362 }
363 
364 bool CGUIHMM::load(char* filename)
365 {
366  bool result=false;
367 
368  FILE* model_file=fopen(filename, "r");
369  if (!model_file)
370  SG_ERROR("Opening file %s failed.\n", filename);
371 
372  SG_UNREF(working);
373  working=new CHMM(model_file, PSEUDO);
374  fclose(model_file);
375 
376  if (working && working->get_status())
377  {
378  SG_INFO("Loaded HMM successfully from file %s.\n", filename);
379  result=true;
380  }
381 
382  M=working->get_M();
383 
384  return result;
385 }
386 
387 bool CGUIHMM::save(char* filename, bool is_binary)
388 {
389  bool result=false;
390 
391  if (!working)
392  SG_ERROR("Create HMM first.\n");
393 
394  FILE* file=fopen(filename, "w");
395  if (file)
396  {
397  if (is_binary)
398  result=working->save_model_bin(file);
399  else
400  result=working->save_model(file);
401  }
402 
403  if (!file || !result)
404  SG_ERROR("Writing to file %s failed!\n", filename);
405  else
406  SG_INFO("Successfully written model into %s!\n", filename);
407 
408  if (file)
409  fclose(file);
410 
411  return result;
412 }
413 
414 bool CGUIHMM::load_definitions(char* filename, bool do_init)
415 {
416  if (!working)
417  SG_ERROR("Create HMM first.\n");
418 
419  bool result=false;
420  FILE* def_file=fopen(filename, "r");
421  if (!def_file)
422  SG_ERROR("Opening file %s failed\n", filename);
423 
424  if (working->load_definitions(def_file, true, do_init))
425  {
426  SG_INFO("Definitions successfully read from %s.\n", filename);
427  result=true;
428  }
429  else
430  SG_ERROR("Couldn't load definitions form file %s.\n", filename);
431 
432  fclose(def_file);
433  return result;
434 }
435 
436 bool CGUIHMM::save_likelihood(char* filename, bool is_binary)
437 {
438  bool result=false;
439 
440  if (!working)
441  SG_ERROR("Create HMM first\n");
442 
443  FILE* file=fopen(filename, "w");
444  if (file)
445  {
447  //if (binary)
448  // result=working->save_model_bin(file);
449  //else
450 
451  result=working->save_likelihood(file);
452  }
453 
454  if (!file || !result)
455  SG_ERROR("Writing to file %s failed!\n", filename);
456  else
457  SG_INFO("Successfully written likelihoods into %s!\n", filename);
458 
459  if (file)
460  fclose(file);
461 
462  return result;
463 }
464 
465 bool CGUIHMM::save_path(char* filename, bool is_binary)
466 {
467  bool result=false;
468  if (!working)
469  SG_ERROR("Create HMM first.\n");
470 
471  FILE* file=fopen(filename, "w");
472  if (file)
473  {
475  //if (binary)
476  //_train()/ result=working->save_model_bin(file);
477  //else
479  ui_features->get_test_features();
480  ASSERT(obs);
482 
483  result=working->save_path(file);
484  }
485 
486  if (!file || !result)
487  SG_ERROR("Writing to file %s failed!\n", filename);
488  else
489  SG_INFO("Successfully written path into %s!\n", filename);
490 
491  if (file)
492  fclose(file);
493 
494  return result;
495 }
496 
498 {
499  if (!working)
500  SG_ERROR("Create HMM first.\n");
501 
502  working->chop(value);
503  return true;
504 }
505 
507 {
508  if (!working)
509  SG_ERROR("Create HMM first!\n");
510 
511  working->output_model(false);
512  return true;
513 }
514 
516 {
517  if (!working)
518  SG_ERROR("Create HMM first!\n");
519 
520  working->output_model(true);
521  return true;
522 }
523 
525 {
526  if (!working)
527  SG_ERROR("Create HMM first!\n");
528 
530  return true;
531 }
532 
533 bool CGUIHMM::best_path(int32_t from, int32_t to)
534 {
535  // FIXME: from unused???
536 
537  if (!working)
538  SG_ERROR("Create HMM first.\n");
539 
540  //get path
541  working->best_path(0);
542 
543  for (int32_t t=0; t<working->get_observations()->get_vector_length(0)-1 && t<to; t++)
544  SG_PRINT("%d ", working->get_best_path_state(0, t));
545  SG_PRINT("\n");
546 
547  //for (t=0; t<p_observations->get_vector_length(0)-1 && t<to; t++)
548  // SG_PRINT( "%d ", PATH(0)[t]);
549  //
550  return true;
551 }
552 
553 bool CGUIHMM::normalize(bool keep_dead_states)
554 {
555  if (!working)
556  SG_ERROR("Create HMM first.\n");
557 
558  working->normalize(keep_dead_states);
559  return true;
560 }
561 
562 bool CGUIHMM::relative_entropy(float64_t*& values, int32_t& len)
563 {
564  if (!pos || !neg)
565  SG_ERROR("Set pos and neg HMM first!\n");
566 
567  int32_t pos_N=pos->get_N();
568  int32_t neg_N=neg->get_N();
569  int32_t pos_M=pos->get_M();
570  int32_t neg_M=neg->get_M();
571  if (pos_M!=neg_M || pos_N!=neg_N)
572  SG_ERROR("Pos and neg HMM's differ in number of emissions or states.\n");
573 
574  float64_t* p=SG_MALLOC(float64_t, pos_M);
575  float64_t* q=SG_MALLOC(float64_t, neg_M);
576 
577  SG_FREE(values);
578  values=SG_MALLOC(float64_t, pos_N);
579 
580  for (int32_t i=0; i<pos_N; i++)
581  {
582  for (int32_t j=0; j<pos_M; j++)
583  {
584  p[j]=pos->get_b(i, j);
585  q[j]=neg->get_b(i, j);
586  }
587 
588  values[i]=CMath::relative_entropy(p, q, pos_M);
589  }
590  SG_FREE(p);
591  SG_FREE(q);
592 
593  len=pos_N;
594  return true;
595 }
596 
597 bool CGUIHMM::entropy(float64_t*& values, int32_t& len)
598 {
599  if (!working)
600  SG_ERROR("Create HMM first!\n");
601 
602  int32_t n=working->get_N();
603  int32_t m=working->get_M();
605 
606  SG_FREE(values);
607  values=SG_MALLOC(float64_t, n);
608 
609  for (int32_t i=0; i<n; i++)
610  {
611  for (int32_t j=0; j<m; j++)
612  p[j]=working->get_b(i, j);
613 
614  values[i]=CMath::entropy(p, m);
615  }
616  SG_FREE(p);
617 
618  len=m;
619  return true;
620 }
621 
622 bool CGUIHMM::permutation_entropy(int32_t width, int32_t seq_num)
623 {
624  if (!working)
625  SG_ERROR("Create hmm first.\n");
626 
627  if (!working->get_observations())
628  SG_ERROR("Set observations first.\n");
629 
630  return working->permutation_entropy(width, seq_num);
631 }

SHOGUN Machine Learning Toolbox - Documentation