1
2
3
4
5
6
7
8
9 """Base class for data measures: algorithms that quantify properties of
10 datasets.
11
12 Besides the `DatasetMeasure` base class this module also provides the
13 (abstract) `FeaturewiseDatasetMeasure` class. The difference between a general
14 measure and the output of the `FeaturewiseDatasetMeasure` is that the latter
15 returns a 1d map (one value per feature in the dataset). In contrast there are
16 no restrictions on the returned value of `DatasetMeasure` except for that it
17 has to be in some iterable container.
18
19 """
20
21 __docformat__ = 'restructuredtext'
22
23 import numpy as N
24 import mvpa.support.copy as copy
25
26 from mvpa.misc.state import StateVariable, ClassWithCollections
27 from mvpa.misc.args import group_kwargs
28 from mvpa.misc.transformers import FirstAxisMean, SecondAxisSumOfAbs
29 from mvpa.base.dochelpers import enhancedDocString
30 from mvpa.base import externals
31 from mvpa.clfs.stats import autoNullDist
32
33 if __debug__:
34 from mvpa.base import debug
35
36
38 """A measure computed from a `Dataset`
39
40 All dataset measures support arbitrary transformation of the measure
41 after it has been computed. Transformation are done by processing the
42 measure with a functor that is specified via the `transformer` keyword
43 argument of the constructor. Upon request, the raw measure (before
44 transformations are applied) is stored in the `raw_result` state variable.
45
46 Additionally all dataset measures support the estimation of the
47 probabilit(y,ies) of a measure under some distribution. Typically this will
48 be the NULL distribution (no signal), that can be estimated with
49 permutation tests. If a distribution estimator instance is passed to the
50 `null_dist` keyword argument of the constructor the respective
51 probabilities are automatically computed and stored in the `null_prob`
52 state variable.
53
54 .. note::
55 For developers: All subclasses shall get all necessary parameters via
56 their constructor, so it is possible to get the same type of measure for
57 multiple datasets by passing them to the __call__() method successively.
58 """
59
60 raw_result = StateVariable(enabled=False,
61 doc="Computed results before applying any " +
62 "transformation algorithm")
63 null_prob = StateVariable(enabled=True)
64 """Stores the probability of a measure under the NULL hypothesis"""
65 null_t = StateVariable(enabled=False)
66 """Stores the t-score corresponding to null_prob under assumption
67 of Normal distribution"""
68
69 - def __init__(self, transformer=None, null_dist=None, **kwargs):
70 """Does nothing special.
71
72 :Parameters:
73 transformer: Functor
74 This functor is called in `__call__()` to perform a final
75 processing step on the to be returned dataset measure. If None,
76 nothing is called
77 null_dist: instance of distribution estimator
78 The estimated distribution is used to assign a probability for a
79 certain value of the computed measure.
80 """
81 ClassWithCollections.__init__(self, **kwargs)
82
83 self.__transformer = transformer
84 """Functor to be called in return statement of all subclass __call__()
85 methods."""
86 null_dist_ = autoNullDist(null_dist)
87 if __debug__:
88 debug('SA', 'Assigning null_dist %s whenever original given was %s'
89 % (null_dist_, null_dist))
90 self.__null_dist = null_dist_
91
92
93 __doc__ = enhancedDocString('DatasetMeasure', locals(), ClassWithCollections)
94
95
97 """Compute measure on a given `Dataset`.
98
99 Each implementation has to handle a single arguments: the source
100 dataset.
101
102 Returns the computed measure in some iterable (list-like)
103 container applying transformer if such is defined
104 """
105 result = self._call(dataset)
106 result = self._postcall(dataset, result)
107 return result
108
109
110 - def _call(self, dataset):
111 """Actually compute measure on a given `Dataset`.
112
113 Each implementation has to handle a single arguments: the source
114 dataset.
115
116 Returns the computed measure in some iterable (list-like) container.
117 """
118 raise NotImplemented
119
120
121 - def _postcall(self, dataset, result):
122 """Some postprocessing on the result
123 """
124 self.raw_result = result
125 if not self.__transformer is None:
126 if __debug__:
127 debug("SA_", "Applying transformer %s" % self.__transformer)
128 result = self.__transformer(result)
129
130
131 if not self.__null_dist is None:
132 if __debug__:
133 debug("SA_", "Estimating NULL distribution using %s"
134 % self.__null_dist)
135
136
137
138
139 measure = copy.copy(self)
140 measure.__null_dist = None
141 self.__null_dist.fit(measure, dataset)
142
143 if self.states.isEnabled('null_t'):
144
145
146 null_prob, null_right_tail = \
147 self.__null_dist.p(result, return_tails=True)
148 self.null_prob = null_prob
149
150 externals.exists('scipy', raiseException=True)
151 from scipy.stats import norm
152
153
154
155 tail = self.null_dist.tail
156 if tail == 'left':
157 acdf = N.abs(null_prob)
158 elif tail == 'right':
159 acdf = 1.0 - N.abs(null_prob)
160 elif tail in ['any', 'both']:
161 acdf = 1.0 - N.clip(N.abs(null_prob), 0, 0.5)
162 else:
163 raise RuntimeError, 'Unhandled tail %s' % tail
164
165
166
167
168
169
170
171 clip = 1e-16
172 null_t = norm.ppf(N.clip(acdf, clip, 1.0 - clip))
173 null_t[~null_right_tail] *= -1.0
174 self.null_t = null_t
175 else:
176
177
178 self.null_prob = self.__null_dist.p(result)
179
180 return result
181
182
184 """String representation of DatasetMeasure
185
186 Includes only arguments which differ from default ones
187 """
188 prefixes = prefixes[:]
189 if self.__transformer is not None:
190 prefixes.append("transformer=%s" % self.__transformer)
191 if self.__null_dist is not None:
192 prefixes.append("null_dist=%s" % self.__null_dist)
193 return super(DatasetMeasure, self).__repr__(prefixes=prefixes)
194
196 """'Untraining' Measure
197
198 Some derived classes might used classifiers, so we need to
199 untrain those
200 """
201 pass
202
203 @property
205 """Return Null Distribution estimator"""
206 return self.__null_dist
207
208 @property
212
213
215 """A per-feature-measure computed from a `Dataset` (base class).
216
217 Should behave like a DatasetMeasure.
218 """
219
220 base_sensitivities = StateVariable(enabled=False,
221 doc="Stores basic sensitivities if the sensitivity " +
222 "relies on combining multiple ones")
223
224
225
226
227
228
229
230
231
232
233
234
235
237 """Initialize
238
239 :Parameters:
240 combiner : Functor
241 The combiner is only applied if the computed featurewise dataset
242 measure is more than one-dimensional. This is different from a
243 `transformer`, which is always applied. By default, the sum of
244 absolute values along the second axis is computed.
245 """
246 DatasetMeasure.__init__(self, **kwargs)
247
248 self.__combiner = combiner
249
257
258
259 - def _call(self, dataset):
260 """Computes a per-feature-measure on a given `Dataset`.
261
262 Behaves like a `DatasetMeasure`, but computes and returns a 1d ndarray
263 with one value per feature.
264 """
265 raise NotImplementedError
266
267
268 - def _postcall(self, dataset, result):
269 """Adjusts per-feature-measure for computed `result`
270
271
272 TODO: overlaps in what it does heavily with
273 CombinedSensitivityAnalyzer, thus this one might make use of
274 CombinedSensitivityAnalyzer yoh thinks, and here
275 base_sensitivities doesn't sound appropriate.
276 MH: There is indeed some overlap, but also significant differences.
277 This one operates on a single sensana and combines over second
278 axis, CombinedFeaturewiseDatasetMeasure uses first axis.
279 Additionally, 'Sensitivity' base class is
280 FeaturewiseDatasetMeasures which would have to be changed to
281 CombinedFeaturewiseDatasetMeasure to deal with stuff like
282 SMLRWeights that return multiple sensitivity values by default.
283 Not sure if unification of both (and/or removal of functionality
284 here does not lead to an overall more complicated situation,
285 without any real gain -- after all this one works ;-)
286 """
287 result_sq = result.squeeze()
288 if len(result_sq.shape)>1:
289 n_base = result.shape[1]
290 """Number of base sensitivities"""
291 if self.states.isEnabled('base_sensitivities'):
292 b_sensitivities = []
293 if not self.states.isKnown('biases'):
294 biases = None
295 else:
296 biases = self.biases
297 if len(self.biases) != n_base:
298 raise ValueError, \
299 "Number of biases %d is " % len(self.biases) \
300 + "different from number of base sensitivities" \
301 + "%d" % n_base
302 for i in xrange(n_base):
303 if not biases is None:
304 bias = biases[i]
305 else:
306 bias = None
307 b_sensitivities = StaticDatasetMeasure(
308 measure = result[:,i],
309 bias = bias)
310 self.base_sensitivities = b_sensitivities
311
312
313
314 if self.__combiner is not None:
315 result = self.__combiner(result)
316 else:
317
318
319
320 result = result_sq
321
322
323 result = DatasetMeasure._postcall(self, dataset, result)
324
325 return result
326
327 @property
329 """Return combiner"""
330 return self.__combiner
331
332
333
335 """A static (assigned) sensitivity measure.
336
337 Since implementation is generic it might be per feature or
338 per whole dataset
339 """
340
341 - def __init__(self, measure=None, bias=None, *args, **kwargs):
342 """Initialize.
343
344 :Parameters:
345 measure
346 actual sensitivity to be returned
347 bias
348 optionally available bias
349 """
350 DatasetMeasure.__init__(self, *args, **kwargs)
351 if measure is None:
352 raise ValueError, "Sensitivity measure has to be provided"
353 self.__measure = measure
354 self.__bias = bias
355
356 - def _call(self, dataset):
357 """Returns assigned sensitivity
358 """
359 return self.__measure
360
361
362 bias = property(fget=lambda self:self.__bias)
363
364
365
366
367
368
370
371 _LEGAL_CLFS = []
372 """If Sensitivity is classifier specific, classes of classifiers
373 should be listed in the list
374 """
375
376 - def __init__(self, clf, force_training=True, **kwargs):
377 """Initialize the analyzer with the classifier it shall use.
378
379 :Parameters:
380 clf : :class:`Classifier`
381 classifier to use.
382 force_training : Bool
383 if classifier was already trained -- do not retrain
384 """
385
386 """Does nothing special."""
387 FeaturewiseDatasetMeasure.__init__(self, **kwargs)
388
389 _LEGAL_CLFS = self._LEGAL_CLFS
390 if len(_LEGAL_CLFS) > 0:
391 found = False
392 for clf_class in _LEGAL_CLFS:
393 if isinstance(clf, clf_class):
394 found = True
395 break
396 if not found:
397 raise ValueError, \
398 "Classifier %s has to be of allowed class (%s), but is %s" \
399 % (clf, _LEGAL_CLFS, `type(clf)`)
400
401 self.__clf = clf
402 """Classifier used to computed sensitivity"""
403
404 self._force_training = force_training
405 """Either to force it to train"""
406
408 if prefixes is None:
409 prefixes = []
410 prefixes.append("clf=%s" % repr(self.clf))
411 if not self._force_training:
412 prefixes.append("force_training=%s" % self._force_training)
413 return super(Sensitivity, self).__repr__(prefixes=prefixes)
414
415
417 """Train classifier on `dataset` and then compute actual sensitivity.
418
419 If the classifier is already trained it is possible to extract the
420 sensitivities without passing a dataset.
421 """
422
423 clf = self.__clf
424 if not clf.trained or self._force_training:
425 if dataset is None:
426 raise ValueError, \
427 "Training classifier to compute sensitivities requires " \
428 "a dataset."
429 if __debug__:
430 debug("SA", "Training classifier %s %s" %
431 (`clf`,
432 {False: "since it wasn't yet trained",
433 True: "although it was trained previousely"}
434 [clf.trained]))
435 clf.train(dataset)
436
437 return FeaturewiseDatasetMeasure.__call__(self, dataset)
438
439
442
443
445 """Untrain corresponding classifier for Sensitivity
446 """
447 if self.__clf is not None:
448 self.__clf.untrain()
449
450 @property
452 """Return feature_ids used by the underlying classifier
453 """
454 return self.__clf._getFeatureIds()
455
456
457 clf = property(fget=lambda self:self.__clf,
458 fset=_setClassifier)
459
460
461
463 """Set sensitivity analyzers to be merged into a single output"""
464
465 sensitivities = StateVariable(enabled=False,
466 doc="Sensitivities produced by each analyzer")
467
468
469
470
471 - def __init__(self, analyzers=None,
472 combiner=None,
473 **kwargs):
474 """Initialize CombinedFeaturewiseDatasetMeasure
475
476 :Parameters:
477 analyzers : list or None
478 List of analyzers to be used. There is no logic to populate
479 such a list in __call__, so it must be either provided to
480 the constructor or assigned to .analyzers prior calling
481 """
482 if analyzers is None:
483 analyzers = []
484
485 FeaturewiseDatasetMeasure.__init__(self, **kwargs)
486 self.__analyzers = analyzers
487 """List of analyzers to use"""
488
489 self.__combiner = combiner
490 """Which functor to use to combine all sensitivities"""
491
492
493 - def _call(self, dataset):
514
515
517 """Untrain CombinedFDM
518 """
519 if self.__analyzers is not None:
520 for anal in self.__analyzers:
521 anal.untrain()
522
524 """Set the analyzers
525 """
526 self.__analyzers = analyzers
527 """Analyzers to use"""
528
529 analyzers = property(fget=lambda x:x.__analyzers,
530 fset=_setAnalyzers,
531 doc="Used analyzers")
532
533
534
535
536
537
539 """Compute measures across splits for a specific analyzer"""
540
541
542
543
544 sensitivities = StateVariable(enabled=False,
545 doc="Sensitivities produced for each split")
546
547 splits = StateVariable(enabled=False, doc=
548 """Store the actual splits of the data. Can be memory expensive""")
549
550 - def __init__(self, splitter, analyzer,
551 insplit_index=0, combiner=None, **kwargs):
552 """Initialize SplitFeaturewiseDatasetMeasure
553
554 :Parameters:
555 splitter : Splitter
556 Splitter to use to split the dataset
557 analyzer : DatasetMeasure
558 Measure to be used. Could be analyzer as well (XXX)
559 insplit_index : int
560 splitter generates tuples of dataset on each iteration
561 (usually 0th for training, 1st for testing).
562 On what split index in that tuple to operate.
563 """
564
565
566
567
568
569
570
571
572 FeaturewiseDatasetMeasure.__init__(self, combiner=None, **kwargs)
573
574 self.__analyzer = analyzer
575 """Analyzer to use per split"""
576
577 self.__combiner = combiner
578 """Which functor to use to combine all sensitivities"""
579
580 self.__splitter = splitter
581 """Splitter to be used on the dataset"""
582
583 self.__insplit_index = insplit_index
584
585
587 """Untrain SplitFeaturewiseDatasetMeasure
588 """
589 if self.__analyzer is not None:
590 self.__analyzer.untrain()
591
592
593 - def _call(self, dataset):
624
625
627 """Set sensitivity analyzers to be merged into a single output"""
628
629
630
631 @group_kwargs(prefixes=['slave_'], assign=True)
632 - def __init__(self,
633 clf,
634 analyzer=None,
635 combined_analyzer=None,
636 slave_kwargs={},
637 **kwargs):
638 """Initialize Sensitivity Analyzer for `BoostedClassifier`
639
640 :Parameters:
641 clf : `BoostedClassifier`
642 Classifier to be used
643 analyzer : analyzer
644 Is used to populate combined_analyzer
645 slave_*
646 Arguments to pass to created analyzer if analyzer is None
647 """
648 Sensitivity.__init__(self, clf, **kwargs)
649 if combined_analyzer is None:
650
651 kwargs.pop('force_training', None)
652 combined_analyzer = CombinedFeaturewiseDatasetMeasure(**kwargs)
653 self.__combined_analyzer = combined_analyzer
654 """Combined analyzer to use"""
655
656 if analyzer is not None and len(self._slave_kwargs):
657 raise ValueError, \
658 "Provide either analyzer of slave_* arguments, not both"
659 self.__analyzer = analyzer
660 """Analyzer to use for basic classifiers within boosted classifier"""
661
662
664 """Untrain BoostedClassifierSensitivityAnalyzer
665 """
666 if self.__analyzer is not None:
667 self.__analyzer.untrain()
668 self.__combined_analyzer.untrain()
669
670
671 - def _call(self, dataset):
702
703 combined_analyzer = property(fget=lambda x:x.__combined_analyzer)
704
705
707 """Set sensitivity analyzer output just to pass through"""
708
709 clf_sensitivities = StateVariable(enabled=False,
710 doc="Stores sensitivities of the proxied classifier")
711
712
713 @group_kwargs(prefixes=['slave_'], assign=True)
714 - def __init__(self,
715 clf,
716 analyzer=None,
717 **kwargs):
718 """Initialize Sensitivity Analyzer for `BoostedClassifier`
719 """
720 Sensitivity.__init__(self, clf, **kwargs)
721
722 if analyzer is not None and len(self._slave_kwargs):
723 raise ValueError, \
724 "Provide either analyzer of slave_* arguments, not both"
725
726 self.__analyzer = analyzer
727 """Analyzer to use for basic classifiers within boosted classifier"""
728
729
734
735
736 - def _call(self, dataset):
766
767 analyzer = property(fget=lambda x:x.__analyzer)
768
769
771 """Set sensitivity analyzer output be reverse mapped using mapper of the
772 slave classifier"""
773
774 - def _call(self, dataset):
782
783
785 """Set sensitivity analyzer output be reverse mapped using mapper of the
786 slave classifier"""
787
788 - def _call(self, dataset):
796