1
2
3
4
5
6
7
8
9 """Least angle regression (LARS) classifier."""
10
11 __docformat__ = 'restructuredtext'
12
13
14 import numpy as N
15
16 import mvpa.base.externals as externals
17
18
19 if externals.exists('rpy', raiseException=True) and \
20 externals.exists('lars', raiseException=True):
21 import rpy
22 rpy.r.library('lars')
23
24
25
26 from mvpa.clfs.base import Classifier
27 from mvpa.measures.base import Sensitivity
28
29 if __debug__:
30 from mvpa.base import debug
31
32 known_models = ('lasso', 'stepwise', 'lar', 'forward.stagewise')
33
34 -class LARS(Classifier):
35 """Least angle regression (LARS) `Classifier`.
36
37 LARS is the model selection algorithm from:
38
39 Bradley Efron, Trevor Hastie, Iain Johnstone and Robert
40 Tibshirani, Least Angle Regression Annals of Statistics (with
41 discussion) (2004) 32(2), 407-499. A new method for variable
42 subset selection, with the lasso and 'epsilon' forward stagewise
43 methods as special cases.
44
45 Similar to SMLR, it performs a feature selection while performing
46 classification, but instead of starting with all features, it
47 starts with none and adds them in, which is similar to boosting.
48
49 This classifier behaves more like a ridge regression in that it
50 returns prediction values and it treats the training labels as
51 continuous.
52
53 In the true nature of the PyMVPA framework, this algorithm is
54 actually implemented in R by Trevor Hastie and wrapped via RPy.
55 To make use of LARS, you must have R and RPy installed as well as
56 the LARS contributed package. You can install the R and RPy with
57 the following command on Debian-based machines:
58
59 sudo aptitude install python-rpy python-rpy-doc r-base-dev
60
61 You can then install the LARS package by running R as root and
62 calling:
63
64 install.packages()
65
66 """
67
68
69 _clf_internals = [ 'lars', 'regression', 'linear', 'has_sensitivity',
70 'does_feature_selection',
71 ]
72 - def __init__(self, model_type="lasso", trace=False, normalize=True,
73 intercept=True, max_steps=None, use_Gram=False, **kwargs):
74 """
75 Initialize LARS.
76
77 See the help in R for further details on the following parameters:
78
79 :Parameters:
80 model_type : string
81 Type of LARS to run. Can be one of ('lasso', 'lar',
82 'forward.stagewise', 'stepwise').
83 trace : boolean
84 Whether to print progress in R as it works.
85 normalize : boolean
86 Whether to normalize the L2 Norm.
87 intercept : boolean
88 Whether to add a non-penalized intercept to the model.
89 max_steps : None or int
90 If not None, specify the total number of iterations to run. Each
91 iteration adds a feature, but leaving it none will add until
92 convergence.
93 use_Gram : boolean
94 Whether to compute the Gram matrix (this should be false if you
95 have more features than samples.)
96 """
97
98 Classifier.__init__(self, **kwargs)
99
100 if not model_type in known_models:
101 raise ValueError('Unknown model %s for LARS is specified. Known' %
102 model_type + 'are %s' % `known_models`)
103
104
105 self.__type = model_type
106 self.__normalize = normalize
107 self.__intercept = intercept
108 self.__trace = trace
109 self.__max_steps = max_steps
110 self.__use_Gram = use_Gram
111
112
113 self.__weights = None
114 """The beta weights for each feature."""
115 self.__trained_model = None
116 """The model object after training that will be used for
117 predictions."""
118
119
120
121
122
123
125 """String summary of the object
126 """
127 return "LARS(type='%s', normalize=%s, intercept=%s, trace=%s, " \
128 "max_steps=%s, use_Gram=%s, regression=%s, " \
129 "enable_states=%s)" % \
130 (self.__type,
131 self.__normalize,
132 self.__intercept,
133 self.__trace,
134 self.__max_steps,
135 self.__use_Gram,
136 self.regression,
137 str(self.states.enabled))
138
139
141 """Train the classifier using `data` (`Dataset`).
142 """
143 if self.__max_steps is None:
144
145 self.__trained_model = rpy.r.lars(data.samples,
146 data.labels[:,N.newaxis],
147 type=self.__type,
148 normalize=self.__normalize,
149 intercept=self.__intercept,
150 trace=self.__trace,
151 use_Gram=self.__use_Gram)
152 else:
153
154 self.__trained_model = rpy.r.lars(data.samples,
155 data.labels[:,N.newaxis],
156 type=self.__type,
157 normalize=self.__normalize,
158 intercept=self.__intercept,
159 trace=self.__trace,
160 use_Gram=self.__use_Gram,
161 max_steps=self.__max_steps)
162
163
164
165
166 Cp_vals = N.asarray([self.__trained_model['Cp'][str(x)]
167 for x in range(len(self.__trained_model['Cp']))])
168 if N.isnan(Cp_vals[0]):
169
170 self.__lowest_Cp_step = len(Cp_vals)-1
171 else:
172
173 self.__lowest_Cp_step = Cp_vals.argmin()
174
175
176 self.__weights = self.__trained_model['beta'][self.__lowest_Cp_step,:]
177
178
179
180
181
183 """
184 Predict the output for the provided data.
185 """
186
187
188 res = rpy.r.predict_lars(self.__trained_model,
189 data,
190 mode='step',
191 s=self.__lowest_Cp_step)
192
193
194 fit = N.asarray(res['fit'])
195 if len(fit.shape) == 0:
196
197 fit = fit.reshape( (1,) )
198
199 self.values = fit
200 return fit
201
202
204 """Return ids of the used features
205 """
206 return N.where(N.abs(self.__weights)>0)[0]
207
208
209
211 """Returns a sensitivity analyzer for LARS."""
212 return LARSWeights(self, **kwargs)
213
214 weights = property(lambda self: self.__weights)
215
216
217
219 """`SensitivityAnalyzer` that reports the weights LARS trained
220 on a given `Dataset`.
221 """
222
223 _LEGAL_CLFS = [ LARS ]
224
225 - def _call(self, dataset=None):
226 """Extract weights from LARS classifier.
227
228 LARS always has weights available, so nothing has to be computed here.
229 """
230 clf = self.clf
231 weights = clf.weights
232
233 if __debug__:
234 debug('LARS',
235 "Extracting weights for LARS - "+
236 "Result: min=%f max=%f" %\
237 (N.min(weights), N.max(weights)))
238
239 return weights
240