Home | Trees | Indices | Help |
|
---|
|
1 # emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*- 2 # vi: set ft=python sts=4 ts=4 sw=4 et: 3 ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 4 # 5 # See COPYING file distributed along with the PyMVPA package for the 6 # copyright and license terms. 7 # 8 ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 9 """Dataset container""" 10 11 __docformat__ = 'restructuredtext' 12 13 import numpy as N 14 15 from sets import Set 16 import random 17 from mvpa.datasets.mapped import MappedDataset 18 19 20 21 if __debug__: 22 from mvpa.base import debug, warning 23 2426 """Dataset container 27 28 The class is useful to combine several Datasets with different origin and 29 type and bind them together. Such a combined dataset can then by used to 30 e.g. pass it to a classifier. 31 32 MetaDataset does not permanently duplicate data stored in the dataset it 33 contains. The combined samples matrix is build on demand and samples 34 attribute access is redirected to the first dataset in the container. 35 36 Currently operations other than samples or feature selection are not fully 37 supported, e.g. passing a MetaDataset to detrend() will initially result in 38 a detrended MetaDataset, but the combined and detrended samples matrix will 39 be lost after the next call to selectSamples() or selectFeatures(), which 40 freshly pulls samples from all datasets in the container. """ 41 42 # This class is intentionally _not_ implemented as a subclass of Dataset. 43 # IMHO Dataset contains to much logic unecessary logic. 44 # XXX implement MappedMetaDataset along with a MetaMapper that simply calls 45 # the mappers in the datasets in the container; or maybe just add flag to 46 # MetaDataset to behave like a MappedDataset20848 """Initialize dataset instance 49 50 :Parameters: 51 datasets : list 52 """ 53 # XXX Maybe add checks that all datasets have identical samples 54 # attributes 55 self.__datasets = datasets 56 57 # contains the combine samples matrix for caching 58 self.__samples = None59 6062 """Update the combined samples matrix from all underlying datasets. 63 """ 64 # note, that hstack will do a copy of _all_ data 65 self.__samples = N.hstack([ds.samples for ds in self.__datasets])66 6769 """Implemented to redirect access to underlying datasets. 70 """ 71 if name == 'samples': 72 # do something to combine (and cache) samples arrays 73 if self.__samples is None: 74 self.rebuildSamples() 75 return self.__samples 76 77 else: 78 # redirect all other to first dataset 79 # ??? maybe limit to some specific supported ones 80 return self.__datasets[0].__getattribute__(name)81 8284 """Do feature selection on all underlying datasets at once. 85 """ 86 # determine which features belong to what dataset 87 # and call its selectFeatures() accordingly 88 ids = N.asanyarray(ids) 89 result = [] 90 fsum = 0 91 for ds in self.__datasets: 92 # bool which meta feature ids belongs to this dataset 93 selector = N.logical_and(ids < fsum + ds.nfeatures, ids >= fsum) 94 # make feature ids relative to this dataset 95 selected = ids[selector] - fsum 96 # do feature selection on underlying dataset 97 # XXX not sure if we should keep empty datasets? (probably) 98 result.append(ds.selectFeatures(selected)) 99 fsum += ds.nfeatures 100 101 return MetaDataset(result)102 103105 """Apply a mapper on all underlying datasets. 106 """ 107 return MetaDataset([ds.applyMapper(*args, **kwargs) \ 108 for ds in self.__datasets])109 110112 """Select samples from all underlying datasets at once. 113 """ 114 return MetaDataset([ds.selectSamples(*args, **kwargs) \ 115 for ds in self.__datasets])116 117119 """Toggle label permutation. 120 """ 121 # permute on first 122 self.__datasets[0].permuteLabels(*args, **kwargs) 123 124 # and apply to all others 125 for ds in self.__datasets[1:]: 126 ds.samples[:] = self.__datasets[0].samples127 128130 """Return a MetaDataset with a random subset of samples. 131 """ 132 # if interger is given take this value for all classes 133 if isinstance(nperlabel, int): 134 nperlabel = [ nperlabel for i in self.__datasets[0].uniquelabels ] 135 136 sample = [] 137 # for each available class 138 for i, r in enumerate(self.__datasets[0].uniquelabels): 139 # get the list of pattern ids for this class 140 sample += \ 141 random.sample((self.__datasets[0].labels == r).nonzero()[0], 142 nperlabel[i] ) 143 144 return MetaDataset([ds.selectSamples(sample) \ 145 for ds in self.__datasets])146 147 152 153155 """Number of features per sample. 156 """ 157 return N.sum([ds.nfeatures for ds in self.__datasets])158 159161 """Set the data type of the samples array. 162 """ 163 # reset samples 164 self.__samples = None 165 166 for ds in self.__datasets: 167 if ds.samples.dtype != dtype: 168 ds.samples = ds.samples.astype(dtype)169 170172 """Perform reverse mapping 173 174 :Return: 175 List of results per each used mapper and the corresponding part of 176 the provided `val`. 177 """ 178 # assure array and transpose for easy slicing 179 # i.e. transpose of 1D does nothing, but of 2D puts features 180 # along first dimension 181 val = N.asanyarray(val).T 182 183 # do we have multiple or just one 184 mflag = len(val.shape) > 1 185 186 result = [] 187 fsum = 0 188 for ds in self.__datasets: 189 # calculate upper border 190 fsum_new = fsum + ds.nfeatures 191 192 # now map back if mapper is present, otherwise just store 193 # need to pass transposed!! 194 if isinstance(ds, MappedDataset): 195 result.append(ds.mapReverse(val[fsum:fsum_new].T)) 196 else: 197 result.append(val[fsum:fsum_new].T) 198 199 fsum = fsum_new 200 201 return result202 203 204 # read-only class properties 205 nsamples = property(fget=getNSamples) 206 nfeatures = property(fget=getNFeatures) 207 datasets = property(fget=lambda self: self.__datasets)
Home | Trees | Indices | Help |
|
---|
Generated by Epydoc 3.0beta1 on Sun Sep 6 14:24:15 2009 | http://epydoc.sourceforge.net |