1
2
3
4
5
6
7
8
9
10 """Model selction."""
11
12 __docformat__ = 'restructuredtext'
13
14
15 import numpy as N
16 from mvpa.base import externals
17 from mvpa.misc.exceptions import InvalidHyperparameterError
18
19 if externals.exists("scipy", raiseException=True):
20 import scipy.linalg as SL
21
22
23 if externals.exists("openopt", raiseException=True):
24 from scikits.openopt import NLP
25
26 if __debug__:
27 from mvpa.base import debug
28
30
31
32 if __debug__:
33 da = debug.active
34 if 'OPENOPT' in da:
35 return 1
36 elif 'MOD_SEL' in da:
37 return 0
38 return -1
39
40
42 """Model selection facility.
43
44 Select a model among multiple models (i.e., a parametric model,
45 parametrized by a set of hyperparamenters).
46 """
47
48 - def __init__(self, parametric_model, dataset):
49 self.parametric_model = parametric_model
50 self.dataset = dataset
51 self.hyperparameters_best = None
52 self.log_marginal_likelihood_best = None
53 self.problem = None
54 pass
55
56 - def max_log_marginal_likelihood(self, hyp_initial_guess, maxiter=1,
57 optimization_algorithm="scipy_cg", ftol=1.0e-3, fixedHypers=None,
58 use_gradient=False, logscale=False):
59 """
60 Set up the optimization problem in order to maximize
61 the log_marginal_likelihood.
62
63 :Parameters:
64
65 parametric_model : Classifier
66 the actual parameteric model to be optimized.
67
68 hyp_initial_guess : numpy.ndarray
69 set of hyperparameters' initial values where to start
70 optimization.
71
72 optimization_algorithm : string
73 actual name of the optimization algorithm. See
74 http://scipy.org/scipy/scikits/wiki/NLP
75 for a comprehensive/updated list of available NLP solvers.
76 (Defaults to 'ralg')
77
78 ftol : float
79 threshold for the stopping criterion of the solver,
80 which is mapped in OpenOpt NLP.ftol
81 (Defaults to 1.0e-3)
82
83 fixedHypers : numpy.ndarray (boolean array)
84 boolean vector of the same size of hyp_initial_guess;
85 'False' means that the corresponding hyperparameter must
86 be kept fixed (so not optimized).
87 (Defaults to None, which during means all True)
88
89 NOTE: the maximization of log_marginal_likelihood is a non-linear
90 optimization problem (NLP). This fact is confirmed by Dmitrey,
91 author of OpenOpt.
92 """
93 self.problem = None
94 self.use_gradient = use_gradient
95 self.logscale = logscale
96 self.optimization_algorithm = optimization_algorithm
97 self.hyp_initial_guess = N.array(hyp_initial_guess)
98 self.hyp_initial_guess_log = N.log(self.hyp_initial_guess)
99 if fixedHypers is None:
100 fixedHypers = N.zeros(self.hyp_initial_guess.shape[0],dtype=bool)
101 pass
102 self.freeHypers = -fixedHypers
103 if self.logscale:
104 self.hyp_running_guess = self.hyp_initial_guess_log.copy()
105 else:
106 self.hyp_running_guess = self.hyp_initial_guess.copy()
107 pass
108 self.f_last_x = None
109
110 def f(x):
111 """
112 Wrapper to the log_marginal_likelihood to be
113 maximized.
114 """
115
116
117
118
119
120
121
122
123
124
125
126 self.f_last_x = x.copy()
127 self.hyp_running_guess[self.freeHypers] = x
128
129 try:
130 if self.logscale:
131 self.parametric_model.set_hyperparameters(N.exp(self.hyp_running_guess))
132 else:
133 self.parametric_model.set_hyperparameters(self.hyp_running_guess)
134 pass
135 except InvalidHyperparameterError:
136 if __debug__: debug("MOD_SEL","WARNING: invalid hyperparameters!")
137 return -N.inf
138 try:
139 self.parametric_model.train(self.dataset)
140 except (N.linalg.linalg.LinAlgError, SL.basic.LinAlgError, ValueError):
141
142 if __debug__: debug("MOD_SEL", "WARNING: Cholesky failed! Invalid hyperparameters!")
143 return -N.inf
144 log_marginal_likelihood = self.parametric_model.compute_log_marginal_likelihood()
145
146 return log_marginal_likelihood
147
148 def df(x):
149 """
150 Proxy to the log_marginal_likelihood first
151 derivative. Necessary for OpenOpt when using derivatives.
152 """
153 self.hyp_running_guess[self.freeHypers] = x
154
155
156
157
158
159
160
161
162 try:
163 if self.logscale:
164 self.parametric_model.set_hyperparameters(N.exp(self.hyp_running_guess))
165 else:
166 self.parametric_model.set_hyperparameters(self.hyp_running_guess)
167 pass
168 except InvalidHyperparameterError:
169 if __debug__: debug("MOD_SEL", "WARNING: invalid hyperparameters!")
170 return -N.inf
171
172
173
174
175 if N.any(x!=self.f_last_x):
176 if __debug__: debug("MOD_SEL","UNEXPECTED: recomputing train+log_marginal_likelihood.")
177 try:
178 self.parametric_model.train(self.dataset)
179 except (N.linalg.linalg.LinAlgError, SL.basic.LinAlgError, ValueError):
180 if __debug__: debug("MOD_SEL", "WARNING: Cholesky failed! Invalid hyperparameters!")
181
182
183 return N.zeros(x.size)
184 log_marginal_likelihood = self.parametric_model.compute_log_marginal_likelihood()
185 pass
186 if self.logscale:
187 gradient_log_marginal_likelihood = self.parametric_model.compute_gradient_log_marginal_likelihood_logscale()
188 else:
189 gradient_log_marginal_likelihood = self.parametric_model.compute_gradient_log_marginal_likelihood()
190 pass
191
192 return gradient_log_marginal_likelihood[self.freeHypers]
193
194
195 if self.logscale:
196
197 x0 = self.hyp_initial_guess_log[self.freeHypers]
198 else:
199 x0 = self.hyp_initial_guess[self.freeHypers]
200 pass
201 self.contol = 1.0e-20
202
203
204 if self.use_gradient:
205
206 self.problem = NLP(f, x0, df=df, contol=self.contol, goal='maximum')
207 else:
208 self.problem = NLP(f, x0, contol=self.contol, goal='maximum')
209 pass
210 self.problem.name = "Max LogMargLikelihood"
211 if not self.logscale:
212
213
214
215 self.problem.lb = N.zeros(self.problem.n)+self.contol
216 pass
217
218 self.problem.maxiter = maxiter
219
220
221 self.problem.checkdf = True
222
223 self.problem.ftol = ftol
224 self.problem.iprint = _openopt_debug()
225 return self.problem
226
227
228 - def solve(self, problem=None):
229 """Solve the maximization problem, check outcome and collect results.
230 """
231
232
233
234
235
236 if N.all(self.freeHypers==False):
237 self.hyperparameters_best = self.hyp_initial_guess.copy()
238 try:
239 self.parametric_model.set_hyperparameters(self.hyperparameters_best)
240 except InvalidHyperparameterError:
241 if __debug__: debug("MOD_SEL", "WARNING: invalid hyperparameters!")
242 self.log_marginal_likelihood_best = -N.inf
243 return self.log_marginal_likelihood_best
244 self.parametric_model.train(self.dataset)
245 self.log_marginal_likelihood_best = self.parametric_model.compute_log_marginal_likelihood()
246 return self.log_marginal_likelihood_best
247
248 result = self.problem.solve(self.optimization_algorithm)
249 if result.stopcase == -1:
250
251
252
253 print "Unable to find a maximum to log_marginal_likelihood"
254 elif result.stopcase == 0:
255 print "Limits exceeded"
256 elif result.stopcase == 1:
257 self.hyperparameters_best = self.hyp_initial_guess.copy()
258 if self.logscale:
259 self.hyperparameters_best[self.freeHypers] = N.exp(result.xf)
260 else:
261 self.hyperparameters_best[self.freeHypers] = result.xf
262 pass
263 self.log_marginal_likelihood_best = result.ff
264 pass
265 self.stopcase = result.stopcase
266 return self.log_marginal_likelihood_best
267
268 pass
269