1
2
3
4
5
6
7
8
9
10 """Kernels for Gaussian Process Regression and Classification."""
11
12
13 _DEV__DOC__ = """
14 Make use of Parameter Collections to keep parameters of the
15 kernels. Then we would get a uniform .reset() functionality. Now reset
16 is provided just for parts which are failing in the unittests, but
17 there is many more places where they are not reset properly if
18 classifier gets trained on some new data of different dimensionality
19 """
20
21 __docformat__ = 'restructuredtext'
22
23
24 import numpy as N
25
26 from mvpa.misc.exceptions import InvalidHyperparameterError
27 from mvpa.clfs.distance import squared_euclidean_distance
28
29 if __debug__:
30 from mvpa.base import debug, warning
31
32
34 """Kernel function base class.
35
36 """
37
40
43
44 - def compute(self, data1, data2=None):
45 raise NotImplementedError
46
48 """Resets the kernel dropping internal variables to the original values"""
49 pass
50
52 raise NotImplementedError
53
55 raise NotImplementedError
56
58 raise NotImplementedError
59
60 pass
61
63 """The constant kernel class.
64 """
65 - def __init__(self, sigma_0=1.0, **kwargs):
66 """Initialize the constant kernel instance.
67
68 :Parameters:
69 sigma_0 : float
70 standard deviation of the Gaussian prior probability
71 N(0,sigma_0**2) of the intercept of the constant regression.
72 (Defaults to 1.0)
73 """
74
75 Kernel.__init__(self, **kwargs)
76
77 self.sigma_0 = sigma_0
78 self.kernel_matrix = None
79
81 return "%s(sigma_0=%s)" % (self.__class__.__name__, str(self.sigma_0))
82
83 - def compute(self, data1, data2=None):
84 """Compute kernel matrix.
85
86 :Parameters:
87 data1 : numpy.ndarray
88 data
89 data2 : numpy.ndarray
90 data
91 (Defaults to None)
92 """
93 if data2 is None:
94 data2 = data1
95 pass
96 self.kernel_matrix = \
97 (self.sigma_0 ** 2) * N.ones((data1.shape[0], data2.shape[0]))
98 return self.kernel_matrix
99
105
107 K_grad_sigma_0 = 2*self.sigma_0
108
109
110
111 self.lml_gradient = 0.5*N.array(K_grad_sigma_0*alphaalphaT_Kinv.sum())
112 return self.lml_gradient
113
115 K_grad_sigma_0 = 2*self.sigma_0**2
116 self.lml_gradient = 0.5*N.array(K_grad_sigma_0*alphaalphaT_Kinv.sum())
117 return self.lml_gradient
118
119 pass
120
121
123 """The linear kernel class.
124 """
125 - def __init__(self, Sigma_p=None, sigma_0=1.0, **kwargs):
126 """Initialize the linear kernel instance.
127
128 :Parameters:
129 Sigma_p : numpy.ndarray
130 Covariance matrix of the Gaussian prior probability N(0,Sigma_p)
131 on the weights of the linear regression.
132 (Defaults to None)
133 sigma_0 : float
134 the standard deviation of the Gaussian prior N(0,sigma_0**2)
135 of the intercept of the linear regression.
136 (Deafults to 1.0)
137 """
138
139 Kernel.__init__(self, **kwargs)
140
141
142 self.Sigma_p = Sigma_p
143 self.Sigma_p_orig = Sigma_p
144 self.sigma_0 = sigma_0
145 self.kernel_matrix = None
146
147
149 return "%s(Sigma_p=%s, sigma_0=%s)" \
150 % (self.__class__.__name__, str(self.Sigma_p), str(self.sigma_0))
151
152
156
157
158 - def compute(self, data1, data2=None):
159 """Compute kernel matrix.
160 Set Sigma_p to correct dimensions and default value if necessary.
161
162 :Parameters:
163 data1 : numpy.ndarray
164 data
165 data2 : numpy.ndarray
166 data
167 (Defaults to None)
168 """
169 if data2 is None:
170 data2 = data1
171 pass
172
173
174 if self.Sigma_p is None:
175 self.Sigma_p = 1.0
176
177
178
179
180
181
182
183 if N.isscalar(self.Sigma_p):
184 if self.Sigma_p == 1.0:
185 data2_sc = data2.T
186 else:
187 data2_sc = self.Sigma_p * data2.T
188
189
190
191 elif len(self.Sigma_p.shape) == 1 and \
192 self.Sigma_p.shape[0] == data1.shape[1]:
193
194
195 data2_sc = (self.Sigma_p * data1).T
196
197
198
199 else:
200 data2_sc = N.dot(self.Sigma_p, data2.T)
201 pass
202
203
204
205 self.kernel_matrix = N.dot(data1, data2_sc) + self.sigma_0 ** 2
206 return self.kernel_matrix
207
209
210
211
212
213
214
215
216 if N.any(hyperparameter < 0):
217 raise InvalidHyperparameterError()
218 self.sigma_0 = N.array(hyperparameter[0])
219 self.Sigma_p = N.diagflat(hyperparameter[1:])
220 return
221
223 def lml_grad(K_grad_i):
224
225
226 return (alphaalphaT_Kinv*(K_grad_i.T)).sum()
227 self.lml_gradient = []
228 self.lml_gradient.append(2*self.sigma_0*alphaalphaT_Kinv.sum())
229 for i in range(self.Sigma_p.shape[0]):
230
231
232 K_grad_i = N.multiply.outer(data[:,i],data[:,i])
233 self.lml_gradient.append(lml_grad(K_grad_i))
234 pass
235 self.lml_gradient = 0.5*N.array(self.lml_gradient)
236 return self.lml_gradient
237
239 def lml_grad(K_grad_i):
240
241
242 return (alphaalphaT_Kinv*(K_grad_i.T)).sum()
243 self.lml_gradient = []
244 self.lml_gradient.append(2*self.sigma_0**2*alphaalphaT_Kinv.sum())
245 for i in range(self.Sigma_p.shape[0]):
246
247
248 K_grad_log_i = self.Sigma_p[i,i]*N.multiply.outer(data[:,i],data[:,i])
249 self.lml_gradient.append(lml_grad(K_grad_log_i))
250 pass
251 self.lml_gradient = 0.5*N.array(self.lml_gradient)
252 return self.lml_gradient
253
254 pass
255
256
258 """The Exponential kernel class.
259
260 Note that it can handle a length scale for each dimension for
261 Automtic Relevance Determination.
262
263 """
264 - def __init__(self, length_scale=1.0, sigma_f = 1.0, **kwargs):
265 """Initialize an Exponential kernel instance.
266
267 :Parameters:
268 length_scale : float OR numpy.ndarray
269 the characteristic length-scale (or length-scales) of the
270 phenomenon under investigation.
271 (Defaults to 1.0)
272 sigma_f : float
273 Signal standard deviation.
274 (Defaults to 1.0)
275 """
276
277 Kernel.__init__(self, **kwargs)
278
279 self.length_scale = length_scale
280 self.sigma_f = sigma_f
281 self.kernel_matrix = None
282
283
285 return "%s(length_scale=%s, sigma_f=%s)" \
286 % (self.__class__.__name__, str(self.length_scale), str(self.sigma_f))
287
288 - def compute(self, data1, data2=None):
289 """Compute kernel matrix.
290
291 :Parameters:
292 data1 : numpy.ndarray
293 data
294 data2 : numpy.ndarray
295 data
296 (Defaults to None)
297 """
298
299
300
301
302 self.wdm = N.sqrt(squared_euclidean_distance(
303 data1, data2, weight=(self.length_scale**-2)))
304 self.kernel_matrix = \
305 self.sigma_f**2 * N.exp(-self.wdm)
306 return self.kernel_matrix
307
309 """Compute gradient of the kernel matrix. A must for fast
310 model selection with high-dimensional data.
311 """
312 raise NotImplementedError
313
315 """Set hyperaparmeters from a vector.
316
317 Used by model selection.
318 """
319 if N.any(hyperparameter < 0):
320 raise InvalidHyperparameterError()
321 self.sigma_f = hyperparameter[0]
322 self.length_scale = hyperparameter[1:]
323 return
324
326 """Compute grandient of the kernel and return the portion of
327 log marginal likelihood gradient due to the kernel.
328 Shorter formula. Allows vector of lengthscales (ARD)
329 BUT THIS LAST OPTION SEEMS NOT TO WORK FOR (CURRENTLY)
330 UNKNOWN REASONS.
331 """
332 self.lml_gradient = []
333 def lml_grad(K_grad_i):
334
335
336 return (alphaalphaT_Kinv*(K_grad_i.T)).sum()
337 grad_sigma_f = 2.0/self.sigma_f*self.kernel_matrix
338 self.lml_gradient.append(lml_grad(grad_sigma_f))
339 if N.isscalar(self.length_scale) or self.length_scale.size==1:
340
341 K_grad_l = self.wdm*self.kernel_matrix*(self.length_scale**-1)
342 self.lml_gradient.append(lml_grad(K_grad_l))
343 else:
344
345 for i in range(self.length_scale.size):
346 K_grad_i = (self.length_scale[i]**-3)*(self.wdm**-1)*self.kernel_matrix*N.subtract.outer(data[:,i],data[:,i])**2
347 self.lml_gradient.append(lml_grad(K_grad_i))
348 pass
349 pass
350 self.lml_gradient = 0.5*N.array(self.lml_gradient)
351 return self.lml_gradient
352
354 """Compute grandient of the kernel and return the portion of
355 log marginal likelihood gradient due to the kernel.
356 Shorter formula. Allows vector of lengthscales (ARD).
357 BUT THIS LAST OPTION SEEMS NOT TO WORK FOR (CURRENTLY)
358 UNKNOWN REASONS.
359 """
360 self.lml_gradient = []
361 def lml_grad(K_grad_i):
362
363
364 return (alphaalphaT_Kinv*(K_grad_i.T)).sum()
365 grad_log_sigma_f = 2.0*self.kernel_matrix
366 self.lml_gradient.append(lml_grad(grad_log_sigma_f))
367 if N.isscalar(self.length_scale) or self.length_scale.size==1:
368
369 K_grad_l = self.wdm*self.kernel_matrix
370 self.lml_gradient.append(lml_grad(K_grad_l))
371 else:
372
373 for i in range(self.length_scale.size):
374 K_grad_i = (self.length_scale[i]**-2)*(self.wdm**-1)*self.kernel_matrix*N.subtract.outer(data[:,i],data[:,i])**2
375 self.lml_gradient.append(lml_grad(K_grad_i))
376 pass
377 pass
378 self.lml_gradient = 0.5*N.array(self.lml_gradient)
379 return self.lml_gradient
380
381 pass
382
383
385 """The Squared Exponential kernel class.
386
387 Note that it can handle a length scale for each dimension for
388 Automtic Relevance Determination.
389
390 """
391 - def __init__(self, length_scale=1.0, sigma_f=1.0, **kwargs):
392 """Initialize a Squared Exponential kernel instance.
393
394 :Parameters:
395 length_scale : float OR numpy.ndarray
396 the characteristic length-scale (or length-scales) of the
397 phenomenon under investigation.
398 (Defaults to 1.0)
399 sigma_f : float
400 Signal standard deviation.
401 (Defaults to 1.0)
402 """
403
404 Kernel.__init__(self, **kwargs)
405
406 self.length_scale = length_scale
407 self.length_scale_orig = length_scale
408 self.sigma_f = sigma_f
409 self.kernel_matrix = None
410
411
415
416
418 return "%s(length_scale=%s, sigma_f=%s)" \
419 % (self.__class__.__name__, str(self.length_scale), str(self.sigma_f))
420
421 - def compute(self, data1, data2=None):
422 """Compute kernel matrix.
423
424 :Parameters:
425 data1 : numpy.ndarray
426 data
427 data2 : numpy.ndarray
428 data
429 (Defaults to None)
430 """
431
432 self.wdm2 = squared_euclidean_distance(data1, data2, weight=(self.length_scale**-2))
433 self.kernel_matrix = self.sigma_f**2 * N.exp(-0.5*self.wdm2)
434
435
436
437
438 return self.kernel_matrix
439
441 """Set hyperaparmeters from a vector.
442
443 Used by model selection.
444 """
445 if N.any(hyperparameter < 0):
446 raise InvalidHyperparameterError()
447 self.sigma_f = hyperparameter[0]
448 self.length_scale = hyperparameter[1:]
449 return
450
452 """Compute grandient of the kernel and return the portion of
453 log marginal likelihood gradient due to the kernel.
454 Shorter formula. Allows vector of lengthscales (ARD).
455 """
456 self.lml_gradient = []
457 def lml_grad(K_grad_i):
458
459
460 return (alphaalphaT_Kinv*(K_grad_i.T)).sum()
461 grad_sigma_f = 2.0/self.sigma_f*self.kernel_matrix
462 self.lml_gradient.append(lml_grad(grad_sigma_f))
463 if N.isscalar(self.length_scale) or self.length_scale.size==1:
464
465 K_grad_l = self.wdm2*self.kernel_matrix*(1.0/self.length_scale)
466 self.lml_gradient.append(lml_grad(K_grad_l))
467 else:
468
469 for i in range(self.length_scale.size):
470 K_grad_i = 1.0/(self.length_scale[i]**3)*self.kernel_matrix*N.subtract.outer(data[:,i],data[:,i])**2
471 self.lml_gradient.append(lml_grad(K_grad_i))
472 pass
473 pass
474 self.lml_gradient = 0.5*N.array(self.lml_gradient)
475 return self.lml_gradient
476
478 """Compute grandient of the kernel and return the portion of
479 log marginal likelihood gradient due to the kernel.
480 Hyperparameters are in log scale which is sometimes more
481 stable. Shorter formula. Allows vector of lengthscales (ARD).
482 """
483 self.lml_gradient = []
484 def lml_grad(K_grad_i):
485
486
487 return (alphaalphaT_Kinv*(K_grad_i.T)).sum()
488 K_grad_log_sigma_f = 2.0*self.kernel_matrix
489 self.lml_gradient.append(lml_grad(K_grad_log_sigma_f))
490 if N.isscalar(self.length_scale) or self.length_scale.size==1:
491
492 K_grad_log_l = self.wdm2*self.kernel_matrix
493 self.lml_gradient.append(lml_grad(K_grad_log_l))
494 else:
495
496 for i in range(self.length_scale.size):
497 K_grad_log_l_i = 1.0/(self.length_scale[i]**2)*self.kernel_matrix*N.subtract.outer(data[:,i],data[:,i])**2
498 self.lml_gradient.append(lml_grad(K_grad_log_l_i))
499 pass
500 pass
501 self.lml_gradient = 0.5*N.array(self.lml_gradient)
502 return self.lml_gradient
503
504 pass
505
507 """The Matern kernel class for the case ni=3/2 or ni=5/2.
508
509 Note that it can handle a length scale for each dimension for
510 Automtic Relevance Determination.
511
512 """
513 - def __init__(self, length_scale=1.0, sigma_f=1.0, numerator=3.0, **kwargs):
514 """Initialize a Squared Exponential kernel instance.
515
516 :Parameters:
517 length_scale : float OR numpy.ndarray
518 the characteristic length-scale (or length-scales) of the
519 phenomenon under investigation.
520 (Defaults to 1.0)
521 sigma_f : float
522 Signal standard deviation.
523 (Defaults to 1.0)
524 numerator: float
525 the numerator of parameter ni of Matern covariance functions.
526 Currently only numerator=3.0 and numerator=5.0 are implemented.
527 (Defaults to 3.0)
528 """
529
530 Kernel.__init__(self, **kwargs)
531
532 self.length_scale = length_scale
533 self.sigma_f = sigma_f
534 self.kernel_matrix = None
535 if numerator == 3.0 or numerator == 5.0:
536 self.numerator = numerator
537 else:
538 raise NotImplementedError
539
541 return "%s(length_scale=%s, ni=%d/2)" \
542 % (self.__class__.__name__, str(self.length_scale), self.numerator)
543
544 - def compute(self, data1, data2=None):
545 """Compute kernel matrix.
546
547 :Parameters:
548 data1 : numpy.ndarray
549 data
550 data2 : numpy.ndarray
551 data
552 (Defaults to None)
553 """
554 tmp = squared_euclidean_distance(
555 data1, data2, weight=0.5 / (self.length_scale ** 2))
556 if self.numerator == 3.0:
557 tmp = N.sqrt(tmp)
558 self.kernel_matrix = \
559 self.sigma_f**2 * (1.0 + N.sqrt(3.0) * tmp) \
560 * N.exp(-N.sqrt(3.0) * tmp)
561 elif self.numerator == 5.0:
562 tmp2 = N.sqrt(tmp)
563 self.kernel_matrix = \
564 self.sigma_f**2 * (1.0 + N.sqrt(5.0) * tmp2 + 5.0 / 3.0 * tmp) \
565 * N.exp(-N.sqrt(5.0) * tmp2)
566 return self.kernel_matrix
567
569 """Compute gradient of the kernel matrix. A must for fast
570 model selection with high-dimensional data.
571 """
572
573
574
575 raise NotImplementedError
576
578 """Set hyperaparmeters from a vector.
579
580 Used by model selection.
581 Note: 'numerator' is not considered as an hyperparameter.
582 """
583 if N.any(hyperparameter < 0):
584 raise InvalidHyperparameterError()
585 self.sigma_f = hyperparameter[0]
586 self.length_scale = hyperparameter[1:]
587 return
588
589 pass
590
591
593 """The Matern kernel class for the case ni=5/2.
594
595 This kernel is just KernelMatern_3_2(numerator=5.0).
596 """
598 """Initialize a Squared Exponential kernel instance.
599
600 :Parameters:
601 length_scale : float OR numpy.ndarray
602 the characteristic length-scale (or length-scales) of the
603 phenomenon under investigation.
604 (Defaults to 1.0)
605 """
606 KernelMatern_3_2.__init__(self, numerator=5.0, **kwargs)
607 pass
608
609
611 """The Rational Quadratic (RQ) kernel class.
612
613 Note that it can handle a length scale for each dimension for
614 Automtic Relevance Determination.
615
616 """
617 - def __init__(self, length_scale=1.0, sigma_f=1.0, alpha=0.5, **kwargs):
618 """Initialize a Squared Exponential kernel instance.
619
620 :Parameters:
621 length_scale : float OR numpy.ndarray
622 the characteristic length-scale (or length-scales) of the
623 phenomenon under investigation.
624 (Defaults to 1.0)
625 sigma_f : float
626 Signal standard deviation.
627 (Defaults to 1.0)
628 alpha: float
629 The parameter of the RQ functions family.
630 (Defaults to 2.0)
631 """
632
633 Kernel.__init__(self, **kwargs)
634
635 self.length_scale = length_scale
636 self.sigma_f = sigma_f
637 self.kernel_matrix = None
638 self.alpha = alpha
639
641 return "%s(length_scale=%s, alpha=%f)" \
642 % (self.__class__.__name__, str(self.length_scale), self.alpha)
643
644 - def compute(self, data1, data2=None):
645 """Compute kernel matrix.
646
647 :Parameters:
648 data1 : numpy.ndarray
649 data
650 data2 : numpy.ndarray
651 data
652 (Defaults to None)
653 """
654 tmp = squared_euclidean_distance(
655 data1, data2, weight=1.0 / (self.length_scale ** 2))
656 self.kernel_matrix = \
657 self.sigma_f**2 * (1.0 + tmp / (2.0 * self.alpha)) ** -self.alpha
658 return self.kernel_matrix
659
661 """Compute gradient of the kernel matrix. A must for fast
662 model selection with high-dimensional data.
663 """
664
665
666
667 raise NotImplementedError
668
670 """Set hyperaparmeters from a vector.
671
672 Used by model selection.
673 Note: 'alpha' is not considered as an hyperparameter.
674 """
675 if N.any(hyperparameter < 0):
676 raise InvalidHyperparameterError()
677 self.sigma_f = hyperparameter[0]
678 self.length_scale = hyperparameter[1:]
679 return
680
681 pass
682
683
684
685 kernel_dictionary = {'constant': KernelConstant,
686 'linear': KernelLinear,
687 'exponential': KernelExponential,
688 'squared exponential': KernelSquaredExponential,
689 'Matern ni=3/2': KernelMatern_3_2,
690 'Matern ni=5/2': KernelMatern_5_2,
691 'rational quadratic': KernelRationalQuadratic}
692