22 using namespace shogun;
63 getExpFunctionCache(max_len);
68 const std::string& sequence, uint32_t k_mer_length,
69 const std::string& allowed_characters,
70 std::vector< std::pair<int32_t, float64_t> >& values)
74 std::map<std::string::value_type, uint32_t> residue_values;
76 uint32_t number_of_residues = allowed_characters.size();
77 uint32_t sequence_length = sequence.size();
78 bool sequence_ok =
true;
81 for (uint32_t i = 0; i < sequence.size(); ++i)
83 if (allowed_characters.find(sequence.at(i)) == std::string::npos)
87 if (sequence_ok && k_mer_length <= sequence_length)
89 values.resize(sequence_length - k_mer_length + 1,
90 std::pair<int32_t, float64_t>());
91 for (uint32_t i = 0; i < number_of_residues; ++i)
93 residue_values.insert(std::make_pair(allowed_characters[i], counter));
96 for (int32_t
k = k_mer_length - 1;
k >= 0;
k--)
98 oligo_value += factor * residue_values[sequence[
k]];
99 factor *= number_of_residues;
101 factor /= number_of_residues;
103 values[counter].first = 1;
104 values[counter].second = oligo_value;
107 for (uint32_t j = 1; j < sequence_length - k_mer_length + 1; j++)
109 oligo_value -= factor * residue_values[sequence[j - 1]];
110 oligo_value = oligo_value * number_of_residues +
111 residue_values[sequence[j + k_mer_length - 1]];
113 values[counter].first = j + 1;
114 values[counter].second = oligo_value ;
117 stable_sort(values.begin(), values.end(), cmpOligos_);
126 const std::vector<std::string>& sequences, uint32_t k_mer_length,
127 const std::string& allowed_characters,
128 std::vector< std::vector< std::pair<int32_t, float64_t> > >& encoded_sequences)
130 std::vector< std::pair<int32_t, float64_t> > temp_vector;
131 encoded_sequences.resize(sequences.size(),
132 std::vector< std::pair<int32_t, float64_t> >());
134 for (uint32_t i = 0; i < sequences.size(); ++i)
136 encodeOligo(sequences[i], k_mer_length, allowed_characters, temp_vector);
137 encoded_sequences[i] = temp_vector;
141 void COligoStringKernel::getExpFunctionCache(uint32_t sequence_length)
147 for (uint32_t i = 1; i < sequence_length; i++)
154 const std::vector< std::pair<int32_t, float64_t> >& x,
155 const std::vector< std::pair<int32_t, float64_t> >& y,
156 int32_t max_distance)
162 uint32_t x_size = x.size();
163 uint32_t y_size = y.size();
165 while ((uint32_t) i1 + 1 < x_size && (uint32_t) i2 + 1 < y_size)
167 if (x[i1].second == y[i2].second)
170 || (abs(x[i1].first - y[i2].first)) <= max_distance)
172 result +=
gauss_table[abs((x[i1].first - y[i2].first))];
173 if (x[i1].second == x[i1 + 1].second)
178 else if (y[i2].second == y[i2 + 1].second)
192 if (x[i1].first < y[i2].first)
194 if (x[i1].second == x[i1 + 1].second)
198 else if (y[i2].second == y[i2 + 1].second)
200 while (y[i2].second == y[i2+1].second)
222 if (x[i1].second < y[i2].second)
239 std::vector< std::pair<int32_t, float64_t> > aenc;
240 std::vector< std::pair<int32_t, float64_t> > benc;
249 void COligoStringKernel::init()