SHOGUN
v1.1.0
|
The CommUlongString kernel may be used to compute the spectrum kernel from strings that have been mapped into unsigned 64bit integers.
These 64bit integers correspond to k-mers. To be applicable in this kernel they need to be sorted (e.g. via the SortUlongString pre-processor).
It basically uses the algorithm in the unix "comm" command (hence the name) to compute:
where maps a sequence
that consists of letters in
to a feature vector of size
. In this feature vector each entry denotes how often the k-mer appears in that
.
Note that this representation enables spectrum kernels of order 8 for 8bit alphabets (like binaries) and order 32 for 2-bit alphabets like DNA.
For this kernel the linadd speedups are implemented (though there is room for improvement here when a whole set of sequences is ADDed) using sorted lists.
Definition at line 48 of file CommUlongStringKernel.h.
Public Member Functions | |
CCommUlongStringKernel (int32_t size=10, bool use_sign=false) | |
CCommUlongStringKernel (CStringFeatures< uint64_t > *l, CStringFeatures< uint64_t > *r, bool use_sign=false, int32_t size=10) | |
virtual | ~CCommUlongStringKernel () |
virtual bool | init (CFeatures *l, CFeatures *r) |
virtual void | cleanup () |
virtual EKernelType | get_kernel_type () |
virtual const char * | get_name () const |
virtual bool | init_optimization (int32_t count, int32_t *IDX, float64_t *weights) |
virtual bool | delete_optimization () |
virtual float64_t | compute_optimized (int32_t idx) |
void | merge_dictionaries (int32_t &t, int32_t j, int32_t &k, uint64_t *vec, uint64_t *dic, float64_t *dic_weights, float64_t weight, int32_t vec_idx) |
virtual void | add_to_normal (int32_t idx, float64_t weight) |
virtual void | clear_normal () |
virtual void | remove_lhs () |
virtual void | remove_rhs () |
virtual EFeatureType | get_feature_type () |
void | get_dictionary (int32_t &dsize, uint64_t *&dict, float64_t *&dweights) |
![]() | |
CStringKernel (int32_t cachesize=0) | |
CStringKernel (CFeatures *l, CFeatures *r) | |
virtual EFeatureClass | get_feature_class () |
![]() | |
CKernel () | |
CKernel (int32_t size) | |
CKernel (CFeatures *l, CFeatures *r, int32_t size) | |
virtual | ~CKernel () |
float64_t | kernel (int32_t idx_a, int32_t idx_b) |
SGMatrix< float64_t > | get_kernel_matrix () |
virtual SGVector< float64_t > | get_kernel_col (int32_t j) |
virtual SGVector< float64_t > | get_kernel_row (int32_t i) |
template<class T > | |
SGMatrix< T > | get_kernel_matrix () |
virtual bool | set_normalizer (CKernelNormalizer *normalizer) |
virtual CKernelNormalizer * | get_normalizer () |
virtual bool | init_normalizer () |
void | load (CFile *loader) |
void | save (CFile *writer) |
CFeatures * | get_lhs () |
CFeatures * | get_rhs () |
virtual int32_t | get_num_vec_lhs () |
virtual int32_t | get_num_vec_rhs () |
virtual bool | has_features () |
bool | get_lhs_equals_rhs () |
virtual void | remove_lhs_and_rhs () |
void | set_cache_size (int32_t size) |
int32_t | get_cache_size () |
void | list_kernel () |
bool | has_property (EKernelProperty p) |
EOptimizationType | get_optimization_type () |
virtual void | set_optimization_type (EOptimizationType t) |
bool | get_is_initialized () |
bool | init_optimization_svm (CSVM *svm) |
virtual void | compute_batch (int32_t num_vec, int32_t *vec_idx, float64_t *target, int32_t num_suppvec, int32_t *IDX, float64_t *alphas, float64_t factor=1.0) |
float64_t | get_combined_kernel_weight () |
void | set_combined_kernel_weight (float64_t nw) |
virtual int32_t | get_num_subkernels () |
virtual void | compute_by_subkernel (int32_t vector_idx, float64_t *subkernel_contrib) |
virtual const float64_t * | get_subkernel_weights (int32_t &num_weights) |
virtual void | set_subkernel_weights (SGVector< float64_t > weights) |
![]() | |
CSGObject () | |
CSGObject (const CSGObject &orig) | |
virtual | ~CSGObject () |
virtual bool | is_generic (EPrimitiveType *generic) const |
template<class T > | |
void | set_generic () |
void | unset_generic () |
virtual void | print_serializable (const char *prefix="") |
virtual bool | save_serializable (CSerializableFile *file, const char *prefix="") |
virtual bool | load_serializable (CSerializableFile *file, const char *prefix="") |
void | set_global_io (SGIO *io) |
SGIO * | get_global_io () |
void | set_global_parallel (Parallel *parallel) |
Parallel * | get_global_parallel () |
void | set_global_version (Version *version) |
Version * | get_global_version () |
SGVector< char * > | get_modelsel_names () |
char * | get_modsel_param_descr (const char *param_name) |
index_t | get_modsel_param_index (const char *param_name) |
Protected Member Functions | |
float64_t | compute (int32_t idx_a, int32_t idx_b) |
Protected Attributes | |
CDynamicArray< uint64_t > | dictionary |
CDynamicArray< float64_t > | dictionary_weights |
bool | use_sign |
Additional Inherited Members | |
![]() | |
SGIO * | io |
Parallel * | parallel |
Version * | version |
Parameter * | m_parameters |
Parameter * | m_model_selection_parameters |
![]() | |
template<class T > | |
static void * | get_kernel_matrix_helper (void *p) |
CCommUlongStringKernel | ( | int32_t | size = 10 , |
bool | use_sign = false |
||
) |
constructor
size | cache size |
use_sign | if sign shall be used |
Definition at line 19 of file CommUlongStringKernel.cpp.
CCommUlongStringKernel | ( | CStringFeatures< uint64_t > * | l, |
CStringFeatures< uint64_t > * | r, | ||
bool | use_sign = false , |
||
int32_t | size = 10 |
||
) |
constructor
l | features of left-hand side |
r | features of right-hand side |
use_sign | if sign shall be used |
size | cache size |
Definition at line 28 of file CommUlongStringKernel.cpp.
|
virtual |
Definition at line 39 of file CommUlongStringKernel.cpp.
|
virtual |
add to normal
idx | where to add |
weight | what to add |
Reimplemented from CKernel.
Definition at line 145 of file CommUlongStringKernel.cpp.
|
virtual |
clean up kernel
Reimplemented from CKernel.
Definition at line 73 of file CommUlongStringKernel.cpp.
|
virtual |
|
protectedvirtual |
compute kernel function for features a and b idx_{a,b} denote the index of the feature vectors in the corresponding feature object
idx_a | index a |
idx_b | index b |
Implements CKernel.
Definition at line 80 of file CommUlongStringKernel.cpp.
|
virtual |
compute optimized
idx | index to compute |
Reimplemented from CKernel.
Definition at line 254 of file CommUlongStringKernel.cpp.
|
virtual |
delete optimization
Reimplemented from CKernel.
Definition at line 245 of file CommUlongStringKernel.cpp.
void get_dictionary | ( | int32_t & | dsize, |
uint64_t *& | dict, | ||
float64_t *& | dweights | ||
) |
get dictionary
dsize | dictionary size will be stored in here |
dict | dictionary will be stored in here |
dweights | dictionary weights will be stored in here |
Definition at line 183 of file CommUlongStringKernel.h.
|
virtual |
return feature type the kernel can deal with
Reimplemented from CStringKernel< uint64_t >.
Definition at line 175 of file CommUlongStringKernel.h.
|
virtual |
return what type of kernel we are
Implements CStringKernel< uint64_t >.
Definition at line 87 of file CommUlongStringKernel.h.
|
virtual |
return the kernel's name
Reimplemented from CStringKernel< uint64_t >.
Definition at line 93 of file CommUlongStringKernel.h.
initialize kernel
l | features of left-hand side |
r | features of right-hand side |
Reimplemented from CStringKernel< uint64_t >.
Definition at line 67 of file CommUlongStringKernel.cpp.
|
virtual |
initialize optimization
count | count |
IDX | index |
weights | weights |
Reimplemented from CKernel.
Definition at line 217 of file CommUlongStringKernel.cpp.
void merge_dictionaries | ( | int32_t & | t, |
int32_t | j, | ||
int32_t & | k, | ||
uint64_t * | vec, | ||
uint64_t * | dic, | ||
float64_t * | dic_weights, | ||
float64_t | weight, | ||
int32_t | vec_idx | ||
) |
merge dictionaries
t | t |
j | j |
k | k |
vec | vector |
dic | dictionary |
dic_weights | dictionary weights |
weight | weight |
vec_idx | vector index |
Definition at line 129 of file CommUlongStringKernel.h.
|
virtual |
remove lhs from kernel
Reimplemented from CKernel.
Definition at line 44 of file CommUlongStringKernel.cpp.
|
virtual |
remove rhs from kernel
Reimplemented from CKernel.
Definition at line 57 of file CommUlongStringKernel.cpp.
|
protected |
dictionary
Definition at line 204 of file CommUlongStringKernel.h.
|
protected |
dictionary weights
Definition at line 206 of file CommUlongStringKernel.h.
|
protected |
if sign shall be used
Definition at line 209 of file CommUlongStringKernel.h.