1
2
3
4
5
6
7
8
9 """Unit tests for PyMVPA SplittingSensitivityAnalyzer"""
10
11 from mvpa.base import externals
12 from mvpa.featsel.base import FeatureSelectionPipeline, \
13 SensitivityBasedFeatureSelection, CombinedFeatureSelection
14 from mvpa.clfs.transerror import TransferError
15 from mvpa.algorithms.cvtranserror import CrossValidatedTransferError
16 from mvpa.featsel.helpers import FixedNElementTailSelector, \
17 FractionTailSelector, RangeElementSelector
18
19 from mvpa.featsel.rfe import RFE
20
21 from mvpa.clfs.meta import SplitClassifier, MulticlassClassifier, \
22 FeatureSelectionClassifier
23 from mvpa.clfs.smlr import SMLR, SMLRWeights
24 from mvpa.misc.transformers import Absolute
25 from mvpa.datasets.splitters import NFoldSplitter, NoneSplitter
26
27 from mvpa.misc.transformers import Absolute, FirstAxisMean, \
28 SecondAxisSumOfAbs, DistPValue
29
30 from mvpa.measures.base import SplitFeaturewiseDatasetMeasure
31 from mvpa.measures.anova import OneWayAnova, CompoundOneWayAnova
32 from mvpa.measures.irelief import IterativeRelief, IterativeReliefOnline, \
33 IterativeRelief_Devel, IterativeReliefOnline_Devel
34
35 from tests_warehouse import *
36 from tests_warehouse_clfs import *
37
38 _MEASURES_2_SWEEP = [ OneWayAnova(),
39 CompoundOneWayAnova(combiner=SecondAxisSumOfAbs),
40 IterativeRelief(), IterativeReliefOnline(),
41 IterativeRelief_Devel(), IterativeReliefOnline_Devel()
42 ]
43 if externals.exists('scipy'):
44 from mvpa.measures.corrcoef import CorrCoef
45 _MEASURES_2_SWEEP += [ CorrCoef(),
46
47
48 ]
49
51
54
55
56 @sweepargs(dsm=_MEASURES_2_SWEEP)
58 data = datasets['dumbinv']
59
60 datass = data.samples.copy()
61
62
63 f = dsm(data)
64
65
66 self.failUnless(N.all(data.samples == datass))
67 self.failUnless(f.shape == (4,))
68 self.failUnless(abs(f[1]) <= 1e-12,
69 msg="Failed test with value %g instead of != 0.0" % f[1])
70 self.failUnless(f[0] > 0.1)
71
72
73 self.failUnless(not N.any(N.isnan(f)))
74
75
76
77 @sweepargs(clf=clfswh['has_sensitivity'])
79 """Test analyzers in split classifier
80 """
81
82 mclf = SplitClassifier(clf=clf,
83 enable_states=['training_confusion',
84 'confusion'])
85 sana = mclf.getSensitivityAnalyzer(transformer=Absolute,
86 enable_states=["sensitivities"])
87
88
89 self.failUnless(sana.transformer is Absolute)
90 self.failUnless(sana.combiner is FirstAxisMean)
91
92
93
94 map_ = sana(self.dataset)
95 self.failUnlessEqual(len(map_), self.dataset.nfeatures)
96
97 if cfg.getboolean('tests', 'labile', default='yes'):
98 for conf_matrix in [sana.clf.training_confusion] \
99 + sana.clf.confusion.matrices:
100 self.failUnless(
101 conf_matrix.percentCorrect>75,
102 msg="We must have trained on each one more or " \
103 "less correctly. Got %f%% correct on %d labels" %
104 (conf_matrix.percentCorrect,
105 len(self.dataset.uniquelabels)))
106
107 errors = [x.percentCorrect
108 for x in sana.clf.confusion.matrices]
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124 if 'meta' in clf._clf_internals and len(map_.nonzero()[0])<2:
125
126 return
127 for map__ in [map_]:
128 selected = FixedNElementTailSelector(
129 self.dataset.nfeatures -
130 len(self.dataset.nonbogus_features))(map__)
131 if cfg.getboolean('tests', 'labile', default='yes'):
132 self.failUnlessEqual(
133 list(selected),
134 list(self.dataset.nonbogus_features),
135 msg="At the end we should have selected the right features")
136
137
138 @sweepargs(clf=clfswh['has_sensitivity'])
158
159
160
161 @sweepargs(svm=clfswh['linear', 'svm'])
173
174
175 @sweepargs(svm=clfswh['linear', 'svm'])
187
188
189
190
191 @sweepargs(svm=clfswh['linear', 'svm', 'libsvm', '!sg', '!meta'])
193
194 kwargs = dict(combiner=None, transformer=None,
195 enable_states=["sensitivities"])
196 sana_split = svm.getSensitivityAnalyzer(
197 split_weights=True, **kwargs)
198 sana_full = svm.getSensitivityAnalyzer(
199 force_training=False, **kwargs)
200
201
202 ds2 = datasets['uni4large'].copy()
203 ds2.zscore(baselinelabels = [2, 3])
204 ds2 = ds2['labels', [0,1]]
205
206 map_split = sana_split(ds2)
207 map_full = sana_full(ds2)
208
209 self.failUnlessEqual(map_split.shape, (ds2.nfeatures, 2))
210 self.failUnlessEqual(map_full.shape, (ds2.nfeatures, ))
211
212
213
214 dmap = (-1*map_split[:, 1] + map_split[:, 0]) - map_full
215 self.failUnless((N.abs(dmap) <= 1e-10).all())
216
217
218
219
220
221
222 self.failUnlessRaises(NotImplementedError,
223 sana_split, datasets['uni3medium'])
224
225
227 ds = datasets['uni3small']
228 sana = SplitFeaturewiseDatasetMeasure(
229 analyzer=SMLR(
230 fit_all_weights=True).getSensitivityAnalyzer(combiner=None),
231 splitter=NFoldSplitter(),
232 combiner=None)
233
234 sens = sana(ds)
235
236 self.failUnless(sens.shape == (
237 len(ds.uniquechunks), ds.nfeatures, len(ds.uniquelabels)))
238
239
240
241 ds = datasets['uni3medium']
242 sana = SplitFeaturewiseDatasetMeasure(
243 analyzer=SMLR(
244 fit_all_weights=True).getSensitivityAnalyzer(combiner=None),
245 splitter=NoneSplitter(nperlabel=0.25, mode='first',
246 nrunspersplit=2),
247 combiner=None,
248 enable_states=['splits', 'sensitivities'])
249 sens = sana(ds)
250
251 self.failUnless(sens.shape == (2, ds.nfeatures, 3))
252 splits = sana.splits
253 self.failUnlessEqual(len(splits), 2)
254 self.failUnless(N.all([s[0].nsamples == ds.nsamples/4 for s in splits]))
255
256 self.failUnless(N.any([splits[0][0].origids != splits[1][0].origids]))
257
258 self.failUnless(N.any(sens[0] != sens[1]))
259
260
261 if not externals.exists('scipy'):
262 return
263
264 ds = datasets['uni2medium']
265 plain_sana = SVM().getSensitivityAnalyzer(
266 combiner=None, transformer=DistPValue())
267 boosted_sana = SplitFeaturewiseDatasetMeasure(
268 analyzer=SVM().getSensitivityAnalyzer(
269 combiner=None, transformer=DistPValue(fpp=0.05)),
270 splitter=NoneSplitter(nperlabel=0.8, mode='first', nrunspersplit=2),
271 combiner=FirstAxisMean,
272 enable_states=['splits', 'sensitivities'])
273
274 fsel = RangeElementSelector(upper=0.05, lower=0.95, inclusive=True)
275
276 sanas = dict(plain=plain_sana, boosted=boosted_sana)
277 for k,sana in sanas.iteritems():
278 clf = FeatureSelectionClassifier(SVM(),
279 SensitivityBasedFeatureSelection(sana, fsel),
280 descr='SVM on p=0.01(both tails) using %s' % k)
281 ce = CrossValidatedTransferError(TransferError(clf),
282 NFoldSplitter())
283 error = ce(ds)
284
285 sens = boosted_sana(ds)
286 sens_plain = plain_sana(ds)
287
288
289
290
291
292
293
294
295 @sweepargs(basic_clf=clfswh['has_sensitivity'])
318
320
321 fss = [SensitivityBasedFeatureSelection(
322 OneWayAnova(),
323 FractionTailSelector(0.05, mode='select', tail='upper')),
324 SensitivityBasedFeatureSelection(
325 SMLRWeights(SMLR(lm=1, implementation="C")),
326 RangeElementSelector(mode='select'))]
327
328 fs = CombinedFeatureSelection(fss, combiner='union',
329 enable_states=['selected_ids',
330 'selections_ids'])
331
332 od, otd = fs(self.dataset)
333
334 self.failUnless(fs.combiner == 'union')
335 self.failUnless(len(fs.selections_ids))
336 self.failUnless(len(fs.selections_ids) <= self.dataset.nfeatures)
337
338 self.failUnless(len(fs.selections_ids) == len(fss))
339
340 for s in fs.selections_ids:
341 self.failUnless(len(s) <= len(fs.selected_ids))
342
343 self.failUnless(od.nfeatures == len(fs.selected_ids))
344 for i, id in enumerate(fs.selected_ids):
345 self.failUnless((od.samples[:,i]
346 == self.dataset.samples[:,id]).all())
347
348
349 fs = CombinedFeatureSelection(fss, combiner='intersection',
350 enable_states=['selected_ids',
351 'selections_ids'])
352
353 od, otd = fs(self.dataset)
354
355
356
359
360
361 if __name__ == '__main__':
362 import runner
363