Package mvpa :: Package tests :: Module test_datameasure
[hide private]
[frames] | no frames]

Source Code for Module mvpa.tests.test_datameasure

  1  # emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*- 
  2  # vi: set ft=python sts=4 ts=4 sw=4 et: 
  3  ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 
  4  # 
  5  #   See COPYING file distributed along with the PyMVPA package for the 
  6  #   copyright and license terms. 
  7  # 
  8  ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 
  9  """Unit tests for PyMVPA SplittingSensitivityAnalyzer""" 
 10   
 11  from mvpa.base import externals 
 12  from mvpa.featsel.base import FeatureSelectionPipeline, \ 
 13       SensitivityBasedFeatureSelection, CombinedFeatureSelection 
 14  from mvpa.clfs.transerror import TransferError 
 15  from mvpa.algorithms.cvtranserror import CrossValidatedTransferError 
 16  from mvpa.featsel.helpers import FixedNElementTailSelector, \ 
 17                                   FractionTailSelector, RangeElementSelector 
 18   
 19  from mvpa.featsel.rfe import RFE 
 20   
 21  from mvpa.clfs.meta import SplitClassifier, MulticlassClassifier, \ 
 22       FeatureSelectionClassifier 
 23  from mvpa.clfs.smlr import SMLR, SMLRWeights 
 24  from mvpa.misc.transformers import Absolute 
 25  from mvpa.datasets.splitters import NFoldSplitter, NoneSplitter 
 26   
 27  from mvpa.misc.transformers import Absolute, FirstAxisMean, \ 
 28       SecondAxisSumOfAbs, DistPValue 
 29   
 30  from mvpa.measures.base import SplitFeaturewiseDatasetMeasure 
 31  from mvpa.measures.anova import OneWayAnova, CompoundOneWayAnova 
 32  from mvpa.measures.irelief import IterativeRelief, IterativeReliefOnline, \ 
 33       IterativeRelief_Devel, IterativeReliefOnline_Devel 
 34   
 35  from tests_warehouse import * 
 36  from tests_warehouse_clfs import * 
 37   
 38  _MEASURES_2_SWEEP = [ OneWayAnova(), 
 39                        CompoundOneWayAnova(combiner=SecondAxisSumOfAbs), 
 40                        IterativeRelief(), IterativeReliefOnline(), 
 41                        IterativeRelief_Devel(), IterativeReliefOnline_Devel() 
 42                        ] 
 43  if externals.exists('scipy'): 
 44      from mvpa.measures.corrcoef import CorrCoef 
 45      _MEASURES_2_SWEEP += [ CorrCoef(), 
 46                             # that one is good when small... handle later 
 47                             #CorrCoef(pvalue=True) 
 48                             ] 
 49   
50 -class SensitivityAnalysersTests(unittest.TestCase):
51
52 - def setUp(self):
53 self.dataset = datasets['uni2large']
54 55 56 @sweepargs(dsm=_MEASURES_2_SWEEP)
57 - def testBasic(self, dsm):
58 data = datasets['dumbinv'] 59 60 datass = data.samples.copy() 61 62 # compute scores 63 f = dsm(data) 64 65 # check if nothing evil is done to dataset 66 self.failUnless(N.all(data.samples == datass)) 67 self.failUnless(f.shape == (4,)) 68 self.failUnless(abs(f[1]) <= 1e-12, # some small value 69 msg="Failed test with value %g instead of != 0.0" % f[1]) 70 self.failUnless(f[0] > 0.1) # some reasonably large value 71 72 # we should not have NaNs 73 self.failUnless(not N.any(N.isnan(f)))
74 75 76 # XXX meta should work too but doesn't 77 @sweepargs(clf=clfswh['has_sensitivity'])
78 - def testAnalyzerWithSplitClassifier(self, clf):
79 """Test analyzers in split classifier 80 """ 81 # assumming many defaults it is as simple as 82 mclf = SplitClassifier(clf=clf, 83 enable_states=['training_confusion', 84 'confusion']) 85 sana = mclf.getSensitivityAnalyzer(transformer=Absolute, 86 enable_states=["sensitivities"]) 87 88 # Test access to transformers and combiners 89 self.failUnless(sana.transformer is Absolute) 90 self.failUnless(sana.combiner is FirstAxisMean) 91 # and lets look at all sensitivities 92 93 # and we get sensitivity analyzer which works on splits 94 map_ = sana(self.dataset) 95 self.failUnlessEqual(len(map_), self.dataset.nfeatures) 96 97 if cfg.getboolean('tests', 'labile', default='yes'): 98 for conf_matrix in [sana.clf.training_confusion] \ 99 + sana.clf.confusion.matrices: 100 self.failUnless( 101 conf_matrix.percentCorrect>75, 102 msg="We must have trained on each one more or " \ 103 "less correctly. Got %f%% correct on %d labels" % 104 (conf_matrix.percentCorrect, 105 len(self.dataset.uniquelabels))) 106 107 errors = [x.percentCorrect 108 for x in sana.clf.confusion.matrices] 109 110 # XXX 111 # That is too much to ask if the dataset is easy - thus 112 # disabled for now 113 #self.failUnless(N.min(errors) != N.max(errors), 114 # msg="Splits should have slightly but different " \ 115 # "generalization") 116 117 # lets go through all sensitivities and see if we selected the right 118 # features 119 # XXX yoh: disabled checking of each map separately since in 120 # BoostedClassifierSensitivityAnalyzer and 121 # ProxyClassifierSensitivityAnalyzer 122 # we don't have yet way to provide transformers thus internal call 123 # to getSensitivityAnalyzer in _call of them is not parametrized 124 if 'meta' in clf._clf_internals and len(map_.nonzero()[0])<2: 125 # Some meta classifiers (5% of ANOVA) are too harsh ;-) 126 return 127 for map__ in [map_]: # + sana.combined_analyzer.sensitivities: 128 selected = FixedNElementTailSelector( 129 self.dataset.nfeatures - 130 len(self.dataset.nonbogus_features))(map__) 131 if cfg.getboolean('tests', 'labile', default='yes'): 132 self.failUnlessEqual( 133 list(selected), 134 list(self.dataset.nonbogus_features), 135 msg="At the end we should have selected the right features")
136 137 138 @sweepargs(clf=clfswh['has_sensitivity'])
140 """Test sensitivity of the mapped classifier 141 """ 142 # Assuming many defaults it is as simple as 143 mclf = FeatureSelectionClassifier( 144 clf, 145 SensitivityBasedFeatureSelection( 146 OneWayAnova(), 147 FractionTailSelector(0.5, mode='select', tail='upper')), 148 enable_states=['training_confusion']) 149 150 sana = mclf.getSensitivityAnalyzer(transformer=Absolute, 151 enable_states=["sensitivities"]) 152 # and lets look at all sensitivities 153 154 dataset = datasets['uni2medium'] 155 # and we get sensitivity analyzer which works on splits 156 map_ = sana(dataset) 157 self.failUnlessEqual(len(map_), dataset.nfeatures)
158 159 160 161 @sweepargs(svm=clfswh['linear', 'svm'])
162 - def testLinearSVMWeights(self, svm):
163 # assumming many defaults it is as simple as 164 sana = svm.getSensitivityAnalyzer(enable_states=["sensitivities"] ) 165 166 # and lets look at all sensitivities 167 map_ = sana(self.dataset) 168 # for now we can do only linear SVM, so lets check if we raise 169 # a concern 170 svmnl = clfswh['non-linear', 'svm'][0] 171 self.failUnlessRaises(NotImplementedError, 172 svmnl.getSensitivityAnalyzer)
173 174 175 @sweepargs(svm=clfswh['linear', 'svm'])
176 - def testLinearSVMWeights(self, svm):
177 # assumming many defaults it is as simple as 178 sana = svm.getSensitivityAnalyzer(enable_states=["sensitivities"] ) 179 180 # and lets look at all sensitivities 181 map_ = sana(self.dataset) 182 # for now we can do only linear SVM, so lets check if we raise 183 # a concern 184 svmnl = clfswh['non-linear', 'svm'][0] 185 self.failUnlessRaises(NotImplementedError, 186 svmnl.getSensitivityAnalyzer)
187 188 # XXX doesn't work easily with meta since it would need 189 # to be explicitely passed to the slave classifier's 190 # getSengetSensitivityAnalyzer 191 @sweepargs(svm=clfswh['linear', 'svm', 'libsvm', '!sg', '!meta'])
192 - def testLinearSVMWeightsPerClass(self, svm):
193 # assumming many defaults it is as simple as 194 kwargs = dict(combiner=None, transformer=None, 195 enable_states=["sensitivities"]) 196 sana_split = svm.getSensitivityAnalyzer( 197 split_weights=True, **kwargs) 198 sana_full = svm.getSensitivityAnalyzer( 199 force_training=False, **kwargs) 200 201 # and lets look at all sensitivities 202 ds2 = datasets['uni4large'].copy() 203 ds2.zscore(baselinelabels = [2, 3]) 204 ds2 = ds2['labels', [0,1]] 205 206 map_split = sana_split(ds2) 207 map_full = sana_full(ds2) 208 209 self.failUnlessEqual(map_split.shape, (ds2.nfeatures, 2)) 210 self.failUnlessEqual(map_full.shape, (ds2.nfeatures, )) 211 212 # just to verify that we split properly and if we reconstruct 213 # manually we obtain the same 214 dmap = (-1*map_split[:, 1] + map_split[:, 0]) - map_full 215 self.failUnless((N.abs(dmap) <= 1e-10).all()) 216 #print "____" 217 #print map_split 218 #print SMLR().getSensitivityAnalyzer(combiner=None)(ds2) 219 220 # for now we can do split weights for binary tasks only, so 221 # lets check if we raise a concern 222 self.failUnlessRaises(NotImplementedError, 223 sana_split, datasets['uni3medium'])
224 225
227 ds = datasets['uni3small'] 228 sana = SplitFeaturewiseDatasetMeasure( 229 analyzer=SMLR( 230 fit_all_weights=True).getSensitivityAnalyzer(combiner=None), 231 splitter=NFoldSplitter(), 232 combiner=None) 233 234 sens = sana(ds) 235 236 self.failUnless(sens.shape == ( 237 len(ds.uniquechunks), ds.nfeatures, len(ds.uniquelabels))) 238 239 240 # Lets try more complex example with 'boosting' 241 ds = datasets['uni3medium'] 242 sana = SplitFeaturewiseDatasetMeasure( 243 analyzer=SMLR( 244 fit_all_weights=True).getSensitivityAnalyzer(combiner=None), 245 splitter=NoneSplitter(nperlabel=0.25, mode='first', 246 nrunspersplit=2), 247 combiner=None, 248 enable_states=['splits', 'sensitivities']) 249 sens = sana(ds) 250 251 self.failUnless(sens.shape == (2, ds.nfeatures, 3)) 252 splits = sana.splits 253 self.failUnlessEqual(len(splits), 2) 254 self.failUnless(N.all([s[0].nsamples == ds.nsamples/4 for s in splits])) 255 # should have used different samples 256 self.failUnless(N.any([splits[0][0].origids != splits[1][0].origids])) 257 # and should have got different sensitivities 258 self.failUnless(N.any(sens[0] != sens[1])) 259 260 261 if not externals.exists('scipy'): 262 return 263 # Most evil example 264 ds = datasets['uni2medium'] 265 plain_sana = SVM().getSensitivityAnalyzer( 266 combiner=None, transformer=DistPValue()) 267 boosted_sana = SplitFeaturewiseDatasetMeasure( 268 analyzer=SVM().getSensitivityAnalyzer( 269 combiner=None, transformer=DistPValue(fpp=0.05)), 270 splitter=NoneSplitter(nperlabel=0.8, mode='first', nrunspersplit=2), 271 combiner=FirstAxisMean, 272 enable_states=['splits', 'sensitivities']) 273 # lets create feature selector 274 fsel = RangeElementSelector(upper=0.05, lower=0.95, inclusive=True) 275 276 sanas = dict(plain=plain_sana, boosted=boosted_sana) 277 for k,sana in sanas.iteritems(): 278 clf = FeatureSelectionClassifier(SVM(), 279 SensitivityBasedFeatureSelection(sana, fsel), 280 descr='SVM on p=0.01(both tails) using %s' % k) 281 ce = CrossValidatedTransferError(TransferError(clf), 282 NFoldSplitter()) 283 error = ce(ds) 284 285 sens = boosted_sana(ds) 286 sens_plain = plain_sana(ds)
287 288 # TODO: make a really unittest out of it -- not just runtime 289 # bugs catcher 290 291 # TODO -- unittests for sensitivity analyzers which use combiners 292 # (linsvmweights for multi-class SVMs and smlrweights for SMLR) 293 294 295 @sweepargs(basic_clf=clfswh['has_sensitivity'])
297 #basic_clf = LinearNuSVMC() 298 multi_clf = MulticlassClassifier(clf=basic_clf) 299 #svm_weigths = LinearSVMWeights(svm) 300 301 # Proper RFE: aggregate sensitivities across multiple splits, 302 # but also due to multi class those need to be aggregated 303 # somehow. Transfer error here should be 'leave-1-out' error 304 # of split classifier itself 305 sclf = SplitClassifier(clf=basic_clf) 306 rfe = RFE(sensitivity_analyzer= 307 sclf.getSensitivityAnalyzer( 308 enable_states=["sensitivities"]), 309 transfer_error=trans_error, 310 feature_selector=FeatureSelectionPipeline( 311 [FractionTailSelector(0.5), 312 FixedNElementTailSelector(1)]), 313 train_clf=True) 314 315 # and we get sensitivity analyzer which works on splits and uses 316 # sensitivity 317 selected_features = rfe(self.dataset)
318
320 # two methods: 5% highes F-scores, non-zero SMLR weights 321 fss = [SensitivityBasedFeatureSelection( 322 OneWayAnova(), 323 FractionTailSelector(0.05, mode='select', tail='upper')), 324 SensitivityBasedFeatureSelection( 325 SMLRWeights(SMLR(lm=1, implementation="C")), 326 RangeElementSelector(mode='select'))] 327 328 fs = CombinedFeatureSelection(fss, combiner='union', 329 enable_states=['selected_ids', 330 'selections_ids']) 331 332 od, otd = fs(self.dataset) 333 334 self.failUnless(fs.combiner == 'union') 335 self.failUnless(len(fs.selections_ids)) 336 self.failUnless(len(fs.selections_ids) <= self.dataset.nfeatures) 337 # should store one set per methods 338 self.failUnless(len(fs.selections_ids) == len(fss)) 339 # no individual can be larger than union 340 for s in fs.selections_ids: 341 self.failUnless(len(s) <= len(fs.selected_ids)) 342 # check output dataset 343 self.failUnless(od.nfeatures == len(fs.selected_ids)) 344 for i, id in enumerate(fs.selected_ids): 345 self.failUnless((od.samples[:,i] 346 == self.dataset.samples[:,id]).all()) 347 348 # again for intersection 349 fs = CombinedFeatureSelection(fss, combiner='intersection', 350 enable_states=['selected_ids', 351 'selections_ids']) 352 # simply run it for now -- can't think of additional tests 353 od, otd = fs(self.dataset)
354 355 356
357 -def suite():
358 return unittest.makeSuite(SensitivityAnalysersTests)
359 360 361 if __name__ == '__main__': 362 import runner 363