1
2
3
4
5
6
7
8
9 """Misc function performing operations on datasets which are based on scipy
10 """
11
12 __docformat__ = 'restructuredtext'
13
14 from mvpa.base import externals
15
16 import numpy as N
17
18 from operator import isSequenceType
19
20 from mvpa.datasets.base import Dataset, datasetmethod
21 from mvpa.misc.support import getBreakPoints
22
23 if externals.exists('scipy', raiseException=True):
24 from scipy import signal
25 from scipy.linalg import lstsq
26 from scipy.special import legendre
27
28
29 @datasetmethod
30 -def detrend(dataset, perchunk=False, model='linear',
31 polyord=None, opt_reg=None):
32 """
33 Given a dataset, detrend the data inplace either entirely
34 or per each chunk
35
36 :Parameters:
37 `dataset` : `Dataset`
38 dataset to operate on
39 `perchunk` : bool
40 either to operate on whole dataset at once or on each chunk
41 separately
42 `model`
43 Type of detrending model to run. If 'linear' or 'constant',
44 scipy.signal.detrend is used to perform a linear or demeaning
45 detrend. Polynomial detrending is activated when 'regress' is
46 used or when polyord or opt_reg are specified.
47 `polyord` : int or list
48 Order of the Legendre polynomial to remove from the data. This
49 will remove every polynomial up to and including the provided
50 value. For example, 3 will remove 0th, 1st, 2nd, and 3rd order
51 polynomials from the data. N.B.: The 0th polynomial is the
52 baseline shift, the 1st is the linear trend.
53 If you specify a single int and perchunk is True, then this value
54 is used for each chunk. You can also specify a differnt polyord
55 value for each chunk by providing a list or ndarray of polyord
56 values the length of the number of chunks.
57 `opt_reg` : ndarray
58 Optional ndarray of additional information to regress out from the
59 dataset. One example would be to regress out motion parameters.
60 As with the data, time is on the first axis.
61
62 """
63 if polyord is not None or opt_reg is not None: model='regress'
64
65 if model in ['linear', 'constant']:
66
67 bp = 0
68
69 if perchunk:
70 try:
71 bp = getBreakPoints(dataset.chunks)
72 except ValueError, e:
73 raise ValueError, \
74 "Failed to assess break points between chunks. Often " \
75 "that is due to discontinuities within a chunk, which " \
76 "ruins idea of per-chunk detrending. Original " \
77 "exception was: %s" % str(e)
78
79 dataset.samples[:] = signal.detrend(dataset.samples, axis=0,
80 type=model, bp=bp)
81 elif model in ['regress']:
82
83 return __detrend_regress(dataset, perchunk=perchunk,
84 polyord=polyord, opt_reg=opt_reg)
85 else:
86
87 raise ValueError('Specified model type (%s) is unknown.'
88 % (model))
89
90
91
93 """
94 Given a dataset, perform a detrend inplace, regressing out polynomial
95 terms as well as optional regressors, such as motion parameters.
96
97 :Parameters:
98 `dataset`: `Dataset`
99 Dataset to operate on
100 `perchunk` : bool
101 Either to operate on whole dataset at once or on each chunk
102 separately. If perchunk is True, all the samples within a
103 chunk should be contiguous and the chunks should be sorted in
104 order from low to high.
105 `polyord` : int
106 Order of the Legendre polynomial to remove from the data. This
107 will remove every polynomial up to and including the provided
108 value. For example, 3 will remove 0th, 1st, 2nd, and 3rd order
109 polynomials from the data. N.B.: The 0th polynomial is the
110 baseline shift, the 1st is the linear trend.
111 If you specify a single int and perchunk is True, then this value
112 is used for each chunk. You can also specify a differnt polyord
113 value for each chunk by providing a list or ndarray of polyord
114 values the length of the number of chunks.
115 `opt_reg` : ndarray
116 Optional ndarray of additional information to regress out from the
117 dataset. One example would be to regress out motion parameters.
118 As with the data, time is on the first axis.
119 """
120
121
122 if not polyord is None:
123 if not isSequenceType(polyord):
124
125 polyord = [polyord]*len(dataset.uniquechunks)
126 elif perchunk and len(polyord) != len(dataset.uniquechunks):
127 raise ValueError("If you specify a sequence of polyord values " + \
128 "they sequence length must match the " + \
129 "number of unique chunks in the dataset.")
130
131
132 if perchunk:
133
134 uchunks = dataset.uniquechunks
135
136
137 reg = []
138 for n, chunk in enumerate(uchunks):
139
140 cinds = dataset.chunks == chunk
141
142
143 if not polyord is None:
144
145 x = N.linspace(-1, 1, cinds.sum())
146
147 for n in range(polyord[n] + 1):
148 newreg = N.zeros((dataset.nsamples, 1))
149 newreg[cinds, 0] = legendre(n)(x)
150 reg.append(newreg)
151 else:
152
153 reg = []
154
155 if not polyord is None:
156
157 x = N.linspace(-1, 1, dataset.nsamples)
158 for n in range(polyord[0] + 1):
159 reg.append(legendre(n)(x)[:, N.newaxis])
160
161
162 if not opt_reg is None:
163
164 reg.append(opt_reg)
165
166
167 if len(reg) > 0:
168 if len(reg) > 1:
169 regs = N.hstack(reg)
170 else:
171 regs = reg[0]
172 else:
173
174 raise ValueError("You must specify at least one " + \
175 "regressor to regress out.")
176
177
178 res = lstsq(regs, dataset.samples)
179
180
181 yhat = N.dot(regs, res[0])
182 dataset.samples -= yhat
183
184
185 return res, regs
186