SHOGUN
v1.1.0
|
The WeightedCommWordString kernel may be used to compute the weighted spectrum kernel (i.e. a spectrum kernel for 1 to K-mers, where each k-mer length is weighted by some coefficient ) from strings that have been mapped into unsigned 16bit integers.
These 16bit integers correspond to k-mers. To applicable in this kernel they need to be sorted (e.g. via the SortWordString pre-processor).
It basically uses the algorithm in the unix "comm" command (hence the name) to compute:
where maps a sequence
that consists of letters in
to a feature vector of size
. In this feature vector each entry denotes how often the k-mer appears in that
.
Note that this representation is especially tuned to small alphabets (like the 2-bit alphabet DNA), for which it enables spectrum kernels of order 8.
For this kernel the linadd speedups are quite efficiently implemented using direct maps.
Definition at line 50 of file WeightedCommWordStringKernel.h.
Public Member Functions | |
CWeightedCommWordStringKernel () | |
CWeightedCommWordStringKernel (int32_t size, bool use_sign) | |
CWeightedCommWordStringKernel (CStringFeatures< uint16_t > *l, CStringFeatures< uint16_t > *r, bool use_sign=false, int32_t size=10) | |
virtual | ~CWeightedCommWordStringKernel () |
virtual bool | init (CFeatures *l, CFeatures *r) |
virtual void | cleanup () |
virtual float64_t | compute_optimized (int32_t idx) |
virtual void | add_to_normal (int32_t idx, float64_t weight) |
void | merge_normal () |
bool | set_wd_weights () |
bool | set_weights (float64_t *w, int32_t d) |
virtual EKernelType | get_kernel_type () |
virtual const char * | get_name () const |
virtual EFeatureType | get_feature_type () |
virtual float64_t * | compute_scoring (int32_t max_degree, int32_t &num_feat, int32_t &num_sym, float64_t *target, int32_t num_suppvec, int32_t *IDX, float64_t *alphas, bool do_init=true) |
![]() | |
CCommWordStringKernel () | |
CCommWordStringKernel (int32_t size, bool use_sign) | |
CCommWordStringKernel (CStringFeatures< uint16_t > *l, CStringFeatures< uint16_t > *r, bool use_sign=false, int32_t size=10) | |
virtual | ~CCommWordStringKernel () |
virtual bool | init_dictionary (int32_t size) |
virtual bool | init_optimization (int32_t count, int32_t *IDX, float64_t *weights) |
virtual bool | delete_optimization () |
virtual void | clear_normal () |
void | get_dictionary (int32_t &dsize, float64_t *&dweights) |
char * | compute_consensus (int32_t &num_feat, int32_t num_suppvec, int32_t *IDX, float64_t *alphas) |
void | set_use_dict_diagonal_optimization (bool flag) |
bool | get_use_dict_diagonal_optimization () |
![]() | |
CStringKernel (int32_t cachesize=0) | |
CStringKernel (CFeatures *l, CFeatures *r) | |
virtual EFeatureClass | get_feature_class () |
![]() | |
CKernel () | |
CKernel (int32_t size) | |
CKernel (CFeatures *l, CFeatures *r, int32_t size) | |
virtual | ~CKernel () |
float64_t | kernel (int32_t idx_a, int32_t idx_b) |
SGMatrix< float64_t > | get_kernel_matrix () |
virtual SGVector< float64_t > | get_kernel_col (int32_t j) |
virtual SGVector< float64_t > | get_kernel_row (int32_t i) |
template<class T > | |
SGMatrix< T > | get_kernel_matrix () |
virtual bool | set_normalizer (CKernelNormalizer *normalizer) |
virtual CKernelNormalizer * | get_normalizer () |
virtual bool | init_normalizer () |
void | load (CFile *loader) |
void | save (CFile *writer) |
CFeatures * | get_lhs () |
CFeatures * | get_rhs () |
virtual int32_t | get_num_vec_lhs () |
virtual int32_t | get_num_vec_rhs () |
virtual bool | has_features () |
bool | get_lhs_equals_rhs () |
virtual void | remove_lhs_and_rhs () |
virtual void | remove_lhs () |
virtual void | remove_rhs () |
takes all necessary steps if the rhs is removed from kernel | |
void | set_cache_size (int32_t size) |
int32_t | get_cache_size () |
void | list_kernel () |
bool | has_property (EKernelProperty p) |
EOptimizationType | get_optimization_type () |
virtual void | set_optimization_type (EOptimizationType t) |
bool | get_is_initialized () |
bool | init_optimization_svm (CSVM *svm) |
virtual void | compute_batch (int32_t num_vec, int32_t *vec_idx, float64_t *target, int32_t num_suppvec, int32_t *IDX, float64_t *alphas, float64_t factor=1.0) |
float64_t | get_combined_kernel_weight () |
void | set_combined_kernel_weight (float64_t nw) |
virtual int32_t | get_num_subkernels () |
virtual void | compute_by_subkernel (int32_t vector_idx, float64_t *subkernel_contrib) |
virtual const float64_t * | get_subkernel_weights (int32_t &num_weights) |
virtual void | set_subkernel_weights (SGVector< float64_t > weights) |
![]() | |
CSGObject () | |
CSGObject (const CSGObject &orig) | |
virtual | ~CSGObject () |
virtual bool | is_generic (EPrimitiveType *generic) const |
template<class T > | |
void | set_generic () |
void | unset_generic () |
virtual void | print_serializable (const char *prefix="") |
virtual bool | save_serializable (CSerializableFile *file, const char *prefix="") |
virtual bool | load_serializable (CSerializableFile *file, const char *prefix="") |
void | set_global_io (SGIO *io) |
SGIO * | get_global_io () |
void | set_global_parallel (Parallel *parallel) |
Parallel * | get_global_parallel () |
void | set_global_version (Version *version) |
Version * | get_global_version () |
SGVector< char * > | get_modelsel_names () |
char * | get_modsel_param_descr (const char *param_name) |
index_t | get_modsel_param_index (const char *param_name) |
Protected Member Functions | |
virtual float64_t | compute_helper (int32_t idx_a, int32_t idx_b, bool do_sort) |
![]() | |
virtual float64_t | compute (int32_t idx_a, int32_t idx_b) |
virtual float64_t | compute_diag (int32_t idx_a) |
Protected Attributes | |
int32_t | degree |
float64_t * | weights |
![]() | |
int32_t | dictionary_size |
float64_t * | dictionary_weights |
bool | use_sign |
bool | use_dict_diagonal_optimization |
int32_t * | dict_diagonal_optimization |
Additional Inherited Members | |
![]() | |
SGIO * | io |
Parallel * | parallel |
Version * | version |
Parameter * | m_parameters |
Parameter * | m_model_selection_parameters |
![]() | |
template<class T > | |
static void * | get_kernel_matrix_helper (void *p) |
default constructor
Definition at line 18 of file WeightedCommWordStringKernel.cpp.
CWeightedCommWordStringKernel | ( | int32_t | size, |
bool | use_sign | ||
) |
constructor
size | cache size |
use_sign | if sign shall be used |
Definition at line 24 of file WeightedCommWordStringKernel.cpp.
CWeightedCommWordStringKernel | ( | CStringFeatures< uint16_t > * | l, |
CStringFeatures< uint16_t > * | r, | ||
bool | use_sign = false , |
||
int32_t | size = 10 |
||
) |
constructor
l | features of left-hand side |
r | features of right-hand side |
use_sign | if sign shall be used |
size | cache size |
Definition at line 32 of file WeightedCommWordStringKernel.cpp.
|
virtual |
Definition at line 43 of file WeightedCommWordStringKernel.cpp.
|
virtual |
add to normal
idx | where to add |
weight | what to add |
Reimplemented from CCommWordStringKernel.
Definition at line 191 of file WeightedCommWordStringKernel.cpp.
|
virtual |
clean up kernel
Reimplemented from CCommWordStringKernel.
Definition at line 59 of file WeightedCommWordStringKernel.cpp.
|
protectedvirtual |
helper for compute
idx_a | index a |
idx_b | index b |
do_sort | if sorting shall be performed |
Reimplemented from CCommWordStringKernel.
Definition at line 96 of file WeightedCommWordStringKernel.cpp.
|
virtual |
compute optimized
idx | index to compute |
Reimplemented from CCommWordStringKernel.
Definition at line 253 of file WeightedCommWordStringKernel.cpp.
|
virtual |
compute scoring
max_degree | maximum degree |
num_feat | number of features |
num_sym | number of symbols |
target | target |
num_suppvec | number of support vectors |
IDX | IDX |
alphas | alphas |
do_init | if initialization shall be performed |
Reimplemented from CCommWordStringKernel.
Definition at line 288 of file WeightedCommWordStringKernel.cpp.
|
virtual |
return feature type the kernel can deal with
Reimplemented from CCommWordStringKernel.
Definition at line 134 of file WeightedCommWordStringKernel.h.
|
virtual |
return what type of kernel we are
Reimplemented from CCommWordStringKernel.
Definition at line 122 of file WeightedCommWordStringKernel.h.
|
virtual |
return the kernel's name
Reimplemented from CCommWordStringKernel.
Definition at line 128 of file WeightedCommWordStringKernel.h.
initialize kernel
l | features of left-hand side |
r | features of right-hand side |
Reimplemented from CCommWordStringKernel.
Definition at line 48 of file WeightedCommWordStringKernel.cpp.
void merge_normal | ( | ) |
merge normal
Definition at line 221 of file WeightedCommWordStringKernel.cpp.
bool set_wd_weights | ( | ) |
set weighted degree weights
Definition at line 67 of file WeightedCommWordStringKernel.cpp.
bool set_weights | ( | float64_t * | w, |
int32_t | d | ||
) |
set custom weights (swig compatible)
w | weights |
d | degree (must match number of weights) |
Definition at line 85 of file WeightedCommWordStringKernel.cpp.
|
protected |
degree
Definition at line 168 of file WeightedCommWordStringKernel.h.
|
protected |
weights for each of the subkernels of degree 1...d
Definition at line 171 of file WeightedCommWordStringKernel.h.