Package mvpa :: Package clfs :: Module lars
[hide private]
[frames] | no frames]

Source Code for Module mvpa.clfs.lars

  1  # emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*- 
  2  # vi: set ft=python sts=4 ts=4 sw=4 et: 
  3  ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 
  4  # 
  5  #   See COPYING file distributed along with the PyMVPA package for the 
  6  #   copyright and license terms. 
  7  # 
  8  ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 
  9  """Least angle regression (LARS) classifier.""" 
 10   
 11  __docformat__ = 'restructuredtext' 
 12   
 13  # system imports 
 14  import numpy as N 
 15   
 16  import mvpa.base.externals as externals 
 17   
 18  # do conditional to be able to build module reference 
 19  if externals.exists('rpy', raiseException=True) and \ 
 20     externals.exists('lars', raiseException=True): 
 21      import rpy 
 22      rpy.r.library('lars') 
 23   
 24   
 25  # local imports 
 26  from mvpa.clfs.base import Classifier 
 27  from mvpa.measures.base import Sensitivity 
 28   
 29  if __debug__: 
 30      from mvpa.base import debug 
 31   
 32  known_models = ('lasso', 'stepwise', 'lar', 'forward.stagewise') 
 33   
34 -class LARS(Classifier):
35 """Least angle regression (LARS) `Classifier`. 36 37 LARS is the model selection algorithm from: 38 39 Bradley Efron, Trevor Hastie, Iain Johnstone and Robert 40 Tibshirani, Least Angle Regression Annals of Statistics (with 41 discussion) (2004) 32(2), 407-499. A new method for variable 42 subset selection, with the lasso and 'epsilon' forward stagewise 43 methods as special cases. 44 45 Similar to SMLR, it performs a feature selection while performing 46 classification, but instead of starting with all features, it 47 starts with none and adds them in, which is similar to boosting. 48 49 This classifier behaves more like a ridge regression in that it 50 returns prediction values and it treats the training labels as 51 continuous. 52 53 In the true nature of the PyMVPA framework, this algorithm is 54 actually implemented in R by Trevor Hastie and wrapped via RPy. 55 To make use of LARS, you must have R and RPy installed as well as 56 the LARS contributed package. You can install the R and RPy with 57 the following command on Debian-based machines: 58 59 sudo aptitude install python-rpy python-rpy-doc r-base-dev 60 61 You can then install the LARS package by running R as root and 62 calling: 63 64 install.packages() 65 66 """ 67 68 # XXX from yoh: it is linear, isn't it? 69 _clf_internals = [ 'lars', 'regression', 'linear', 'has_sensitivity', 70 'does_feature_selection', 71 ]
72 - def __init__(self, model_type="lasso", trace=False, normalize=True, 73 intercept=True, max_steps=None, use_Gram=False, **kwargs):
74 """ 75 Initialize LARS. 76 77 See the help in R for further details on the following parameters: 78 79 :Parameters: 80 model_type : string 81 Type of LARS to run. Can be one of ('lasso', 'lar', 82 'forward.stagewise', 'stepwise'). 83 trace : boolean 84 Whether to print progress in R as it works. 85 normalize : boolean 86 Whether to normalize the L2 Norm. 87 intercept : boolean 88 Whether to add a non-penalized intercept to the model. 89 max_steps : None or int 90 If not None, specify the total number of iterations to run. Each 91 iteration adds a feature, but leaving it none will add until 92 convergence. 93 use_Gram : boolean 94 Whether to compute the Gram matrix (this should be false if you 95 have more features than samples.) 96 """ 97 # init base class first 98 Classifier.__init__(self, **kwargs) 99 100 if not model_type in known_models: 101 raise ValueError('Unknown model %s for LARS is specified. Known' % 102 model_type + 'are %s' % `known_models`) 103 104 # set up the params 105 self.__type = model_type 106 self.__normalize = normalize 107 self.__intercept = intercept 108 self.__trace = trace 109 self.__max_steps = max_steps 110 self.__use_Gram = use_Gram 111 112 # pylint friendly initializations 113 self.__weights = None 114 """The beta weights for each feature.""" 115 self.__trained_model = None 116 """The model object after training that will be used for 117 predictions."""
118 119 # It does not make sense to calculate a confusion matrix for a 120 # regression 121 # YOH: we do have summary statistics for regressions 122 #self.states.enable('training_confusion', False) 123
124 - def __repr__(self):
125 """String summary of the object 126 """ 127 return "LARS(type='%s', normalize=%s, intercept=%s, trace=%s, " \ 128 "max_steps=%s, use_Gram=%s, regression=%s, " \ 129 "enable_states=%s)" % \ 130 (self.__type, 131 self.__normalize, 132 self.__intercept, 133 self.__trace, 134 self.__max_steps, 135 self.__use_Gram, 136 self.regression, 137 str(self.states.enabled))
138 139
140 - def _train(self, data):
141 """Train the classifier using `data` (`Dataset`). 142 """ 143 if self.__max_steps is None: 144 # train without specifying max_steps 145 self.__trained_model = rpy.r.lars(data.samples, 146 data.labels[:,N.newaxis], 147 type=self.__type, 148 normalize=self.__normalize, 149 intercept=self.__intercept, 150 trace=self.__trace, 151 use_Gram=self.__use_Gram) 152 else: 153 # train with specifying max_steps 154 self.__trained_model = rpy.r.lars(data.samples, 155 data.labels[:,N.newaxis], 156 type=self.__type, 157 normalize=self.__normalize, 158 intercept=self.__intercept, 159 trace=self.__trace, 160 use_Gram=self.__use_Gram, 161 max_steps=self.__max_steps) 162 163 # find the step with the lowest Cp (risk) 164 # it is often the last step if you set a max_steps 165 # must first convert dictionary to array 166 Cp_vals = N.asarray([self.__trained_model['Cp'][str(x)] 167 for x in range(len(self.__trained_model['Cp']))]) 168 if N.isnan(Cp_vals[0]): 169 # sometimes may come back nan, so just pick the last one 170 self.__lowest_Cp_step = len(Cp_vals)-1 171 else: 172 # determine the lowest 173 self.__lowest_Cp_step = Cp_vals.argmin() 174 175 # set the weights to the lowest Cp step 176 self.__weights = self.__trained_model['beta'][self.__lowest_Cp_step,:]
177 178 # # set the weights to the final state 179 # self.__weights = self.__trained_model['beta'][-1,:] 180 181
182 - def _predict(self, data):
183 """ 184 Predict the output for the provided data. 185 """ 186 # predict with the final state (i.e., the last step) 187 # predict with the lowest Cp step 188 res = rpy.r.predict_lars(self.__trained_model, 189 data, 190 mode='step', 191 s=self.__lowest_Cp_step) 192 #s=self.__trained_model['beta'].shape[0]) 193 194 fit = N.asarray(res['fit']) 195 if len(fit.shape) == 0: 196 # if we just got 1 sample with a scalar 197 fit = fit.reshape( (1,) ) 198 199 self.values = fit 200 return fit
201 202
203 - def _getFeatureIds(self):
204 """Return ids of the used features 205 """ 206 return N.where(N.abs(self.__weights)>0)[0]
207 208 209
210 - def getSensitivityAnalyzer(self, **kwargs):
211 """Returns a sensitivity analyzer for LARS.""" 212 return LARSWeights(self, **kwargs)
213 214 weights = property(lambda self: self.__weights)
215 216 217
218 -class LARSWeights(Sensitivity):
219 """`SensitivityAnalyzer` that reports the weights LARS trained 220 on a given `Dataset`. 221 """ 222 223 _LEGAL_CLFS = [ LARS ] 224
225 - def _call(self, dataset=None):
226 """Extract weights from LARS classifier. 227 228 LARS always has weights available, so nothing has to be computed here. 229 """ 230 clf = self.clf 231 weights = clf.weights 232 233 if __debug__: 234 debug('LARS', 235 "Extracting weights for LARS - "+ 236 "Result: min=%f max=%f" %\ 237 (N.min(weights), N.max(weights))) 238 239 return weights
240