23 using namespace shogun;
37 for (int32_t i=0; i<n; i++)
55 for (int32_t i=0; i<components.
vlen; i++)
67 for (int32_t i=0; i<components.
vlen; i++)
121 SG_ERROR(
"Specified features are not of type CDotFeatures\n");
131 SG_ERROR(
"No features to train on.\n");
141 init_k_means->
train(dotdata);
144 alpha=alpha_init(init_means);
164 while (iter<max_iter)
166 log_likelihood_prev=log_likelihood_cur;
167 log_likelihood_cur=0;
169 for (int32_t i=0; i<num_vectors; i++)
180 log_likelihood_cur+=logPx[i];
190 if (iter>0 && log_likelihood_cur-log_likelihood_prev<min_change)
203 return log_likelihood_cur;
209 SG_ERROR(
"No features to train on.\n");
212 SG_ERROR(
"Can't run SMEM with less than 3 component mixture model.\n");
231 while (iter<max_iter)
236 for (int32_t i=0; i<num_vectors; i++)
273 for (int32_t j=0; j<num_vectors; j++)
288 bool better_found=
false;
289 int32_t candidates_checked=0;
296 candidates_checked++;
302 if (cand_likelihood>cur_likelihood)
304 cur_likelihood=cand_likelihood;
319 if (candidates_checked>=max_cand)
323 if (better_found || candidates_checked>=max_cand)
342 return cur_likelihood;
345 void CGMM::partial_em(int32_t comp1, int32_t comp2, int32_t comp3,
float64_t min_cov, int32_t max_em_iter,
float64_t min_change)
355 for (int32_t i=0; i<num_vectors; i++)
365 if (j!=comp1 && j!=comp2 && j!=comp3)
386 float64_t coef_sum=coefficients.vector[0]+coefficients.vector[1]+coefficients.vector[2];
388 int32_t dim_n=components.vector[0]->get_mean().vlen;
390 float64_t alpha1=coefficients.vector[1]/(coefficients.vector[1]+coefficients.vector[2]);
391 float64_t alpha2=coefficients.vector[2]/(coefficients.vector[1]+coefficients.vector[2]);
396 CMath::add(components.vector[1]->get_mean().vector, alpha1, components.vector[1]->get_mean().vector, alpha2,
397 components.vector[2]->get_mean().vector, dim_n);
399 for (int32_t i=0; i<dim_n; i++)
401 components.vector[2]->get_mean().vector[i]=components.vector[0]->get_mean().vector[i]+
CMath::randn_double()*noise_mag;
402 components.vector[0]->get_mean().vector[i]=components.vector[0]->get_mean().vector[i]+
CMath::randn_double()*noise_mag;
405 coefficients.vector[1]=coefficients.vector[1]+coefficients.vector[2];
406 coefficients.vector[2]=coefficients.vector[0]*0.5;
407 coefficients.vector[0]=coefficients.vector[0]*0.5;
409 if (components.vector[0]->get_cov_type()==
FULL)
416 components.vector[1]->set_u(c1);
421 for (int32_t i=0; i<dim_n; i++)
423 new_d+=
CMath::log(components.vector[0]->get_d().vector[i]);
424 for (int32_t j=0; j<dim_n; j++)
428 components.vector[0]->get_u().matrix[i*dim_n+j]=1;
429 components.vector[2]->get_u().matrix[i*dim_n+j]=1;
433 components.vector[0]->get_u().matrix[i*dim_n+j]=0;
434 components.vector[2]->get_u().matrix[i*dim_n+j]=0;
439 for (int32_t i=0; i<dim_n; i++)
441 components.vector[0]->get_d().vector[i]=new_d;
442 components.vector[2]->get_d().vector[i]=new_d;
445 else if(components.vector[0]->get_cov_type()==
DIAG)
447 CMath::add(components.vector[1]->get_d().vector, alpha1, components.vector[1]->get_d().vector,
448 alpha2, components.vector[2]->get_d().vector, dim_n);
451 for (int32_t i=0; i<dim_n; i++)
453 new_d+=
CMath::log(components.vector[0]->get_d().vector[i]);
456 for (int32_t i=0; i<dim_n; i++)
458 components.vector[0]->get_d().vector[i]=new_d;
459 components.vector[2]->get_d().vector[i]=new_d;
462 else if(components.vector[0]->get_cov_type()==
SPHERICAL)
464 components.vector[1]->get_d().vector[0]=alpha1*components.vector[1]->get_d().vector[0]+
465 alpha2*components.vector[2]->get_d().vector[0];
467 components.vector[2]->get_d().vector[0]=components.vector[0]->get_d().vector[0];
470 CGMM* partial_candidate=
new CGMM(components, coefficients);
481 while (iter<max_em_iter)
483 log_likelihood_prev=log_likelihood_cur;
484 log_likelihood_cur=0;
486 for (int32_t i=0; i<num_vectors; i++)
490 for (int32_t j=0; j<3; j++)
492 logPxy[i*3+j]=components.
vector[j]->compute_log_PDF(v)+
CMath::log(coefficients.vector[j]);
496 logPx[i]=
CMath::log(logPx[i]+init_logPx_fix[i]);
497 log_likelihood_cur+=logPx[i];
500 for (int32_t j=0; j<3; j++)
503 alpha.matrix[i*3+j]=
CMath::exp(logPxy[i*3+j]-logPx[i]+post_add[i]);
507 if (iter>0 && log_likelihood_cur-log_likelihood_prev<min_change)
521 delete partial_candidate;
522 alpha.destroy_matrix();
541 for (int32_t i=0; i<alpha.
num_cols; i++)
545 memset(mean_sum, 0, num_dim*
sizeof(
float64_t));
547 for (int32_t j=0; j<alpha.
num_rows; j++)
555 for (int32_t j=0; j<num_dim; j++)
556 mean_sum[j]/=alpha_sum;
565 memset(cov_sum, 0, num_dim*num_dim*
sizeof(
float64_t));
567 else if(cov_type==
DIAG)
570 memset(cov_sum, 0, num_dim*
sizeof(
float64_t));
578 for (int32_t j=0; j<alpha.
num_rows; j++)
587 1, (
double*) cov_sum, num_dim);
591 for (int32_t k=0; k<num_dim; k++)
598 for (int32_t k=0; k<num_dim; k++)
611 for (int32_t j=0; j<num_dim*num_dim; j++)
612 cov_sum[j]/=alpha_sum;
616 for (int32_t j=0; j<num_dim; j++)
624 for (int32_t j=0; j<num_dim; j++)
626 cov_sum[j]/=alpha_sum;
634 cov_sum[0]/=alpha_sum*num_dim;
643 alpha_sum_sum+=alpha_sum;
646 for (int32_t i=0; i<alpha.
num_cols; i++)
742 for (int32_t i=0; i<init_means.
num_cols; i++)
743 label_num.vector[i]=i;
752 for (int32_t i=0; i<num_vectors; i++)
768 if (cum_sum>=rand_num)
789 void CGMM::register_params()