Package mvpa :: Package misc :: Module state
[hide private]
[frames] | no frames]

Source Code for Module mvpa.misc.state

   1  # emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*- 
   2  # vi: set ft=python sts=4 ts=4 sw=4 et: 
   3  ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 
   4  # 
   5  #   See COPYING file distributed along with the PyMVPA package for the 
   6  #   copyright and license terms. 
   7  # 
   8  ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 
   9  """Classes to control and store state information. 
  10   
  11  It was devised to provide conditional storage  
  12  """ 
  13   
  14  # XXX: MH: The use of `index` as variable name confuses me. IMHO `index` refers 
  15  #          to a position in a container (i.e. list access). However, in this 
  16  #          file it is mostly used in the context of a `key` for dictionary 
  17  #          access. Can we refactor that? 
  18  __docformat__ = 'restructuredtext' 
  19   
  20  import operator, copy 
  21  from sets import Set 
  22  from textwrap import TextWrapper 
  23   
  24  import numpy as N 
  25   
  26  from mvpa.misc.vproperty import VProperty 
  27  from mvpa.misc.exceptions import UnknownStateError 
  28  from mvpa.misc.attributes import CollectableAttribute, StateVariable 
  29  from mvpa.base.dochelpers import enhancedDocString 
  30   
  31  from mvpa.base import externals 
  32   
  33  if __debug__: 
  34      from mvpa.base import debug 
  35   
  36   
  37  _in_ipython = externals.exists('running ipython env') 
  38  # Separators around definitions, needed for ReST, but bogus for 
  39  # interactive sessions 
  40  _def_sep = ('`', '')[int(_in_ipython)] 
  41   
  42  _object_getattribute = object.__getattribute__ 
  43  _object_setattr = object.__setattr__ 
  44   
  45   
  46  ################################################################### 
  47  # Collections 
  48  # 
  49  # TODO: refactor into collections.py. state.py now has 
  50  #       little in common with the main part of this file 
  51  # 
52 -class Collection(object):
53 """Container of some CollectableAttributes. 54 55 :Groups: 56 - `Public Access Functions`: `isKnown` 57 - `Access Implementors`: `_getListing`, `_getNames` 58 - `Mutators`: `__init__` 59 - `R/O Properties`: `listing`, `names`, `items` 60 61 XXX Seems to be not used and duplicating functionality: `_getListing` 62 (thus `listing` property) 63 """ 64
65 - def __init__(self, items=None, owner=None, name=None):
66 """Initialize the Collection 67 68 :Parameters: 69 items : dict of CollectableAttribute's 70 items to initialize with 71 owner : object 72 an object to which collection belongs 73 name : basestring 74 name of the collection (as seen in the owner, e.g. 'states') 75 """ 76 77 self.__owner = owner 78 79 if items == None: 80 items = {} 81 self._items = items 82 """Dictionary to contain registered states as keys and 83 values signal either they are enabled 84 """ 85 self.__name = name
86
87 - def _setName(self, name):
88 self.__name = name
89
90 - def __str__(self):
91 num = len(self._items) 92 if __debug__ and "ST" in debug.active: 93 maxnumber = 1000 # I guess all 94 else: 95 maxnumber = 4 96 if self.__name is not None: 97 res = self.__name 98 else: 99 res = "" 100 res += "{" 101 for i in xrange(min(num, maxnumber)): 102 if i > 0: 103 res += " " 104 res += "%s" % str(self._items.values()[i]) 105 if len(self._items) > maxnumber: 106 res += "..." 107 res += "}" 108 if __debug__: 109 if "ST" in debug.active: 110 res += " owner:%s#%s" % (self.owner.__class__.__name__, 111 id(self.owner)) 112 return res
113 114
115 - def _cls_repr(self):
116 """Collection specific part of __repr__ for a class containing 117 it, ie a part of __repr__ for the owner object 118 119 :Return: 120 list of items to be appended within __repr__ after a .join() 121 """ 122 # XXX For now we do not expect any pure non-specialized 123 # collection , thus just override in derived classes 124 raise NotImplementedError, "Class %s should override _cls_repr" \ 125 % self.__class__.__name__
126
127 - def _is_initializable(self, index):
128 """Checks if index could be assigned within collection via 129 _initialize 130 131 :Return: bool value for a given `index` 132 133 It is to facilitate dynamic assignment of collections' items 134 within derived classes' __init__ depending on the present 135 collections in the class. 136 """ 137 # XXX Each collection has to provide what indexes it allows 138 # to be set within constructor. Custom handling of some 139 # arguments (like (dis|en)able_states) is to be performed 140 # in _initialize 141 # raise NotImplementedError, \ 142 # "Class %s should override _is_initializable" \ 143 # % self.__class__.__name__ 144 145 # YYY lets just check if it is in the keys 146 return index in self._items.keys()
147 148
149 - def _initialize(self, index, value):
150 """Initialize `index` (no check performed) with `value` 151 """ 152 # by default we just set corresponding value 153 self.setvalue(index, value)
154 155
156 - def __repr__(self):
157 s = "%s(" % self.__class__.__name__ 158 items_s = "" 159 sep = "" 160 for item in self._items: 161 try: 162 itemvalue = "%s" % `self._items[item].value` 163 if len(itemvalue)>50: 164 itemvalue = itemvalue[:10] + '...' + itemvalue[-10:] 165 items_s += "%s'%s':%s" % (sep, item, itemvalue) 166 sep = ', ' 167 except: 168 pass 169 if items_s != "": 170 s += "items={%s}" % items_s 171 if self.owner is not None: 172 s += "%sowner=%s" % (sep, `self.owner`) 173 s += ")" 174 return s
175 176 177 # 178 # XXX TODO: figure out if there is a way to define proper 179 # __copy__'s for a hierarchy of classes. Probably we had 180 # to define __getinitargs__, etc... read more... 181 # 182 #def __copy__(self): 183 # TODO Remove or refactor? 184 # def _copy_states_(self, fromstate, deep=False): 185 # """Copy known here states from `fromstate` object into current object 186 # 187 # Crafted to overcome a problem mentioned above in the comment 188 # and is to be called from __copy__ of derived classes 189 # 190 # Probably sooner than later will get proper __getstate__, 191 # __setstate__ 192 # """ 193 # # Bad check... doesn't generalize well... 194 # # if not issubclass(fromstate.__class__, self.__class__): 195 # # raise ValueError, \ 196 # # "Class %s is not subclass of %s, " % \ 197 # # (fromstate.__class__, self.__class__) + \ 198 # # "thus not eligible for _copy_states_" 199 # # TODO: FOR NOW NO TEST! But this beast needs to be fixed... 200 # operation = { True: copy.deepcopy, 201 # False: copy.copy }[deep] 202 # 203 # if isinstance(fromstate, ClassWithCollections): 204 # fromstate = fromstate.states 205 # 206 # self.enabled = fromstate.enabled 207 # for name in self.names: 208 # if fromstate.isKnown(name): 209 # self._items[name] = operation(fromstate._items[name]) 210 211
212 - def isKnown(self, index):
213 """Returns `True` if state `index` is known at all""" 214 return self._items.has_key(index)
215 216
217 - def __isSet1(self, index):
218 """Returns `True` if state `index` has value set""" 219 self._checkIndex(index) 220 return self._items[index].isSet
221 222
223 - def isSet(self, index=None):
224 """If item (or any in the present or listed) was set 225 226 :Parameters: 227 index : None or basestring or list of basestring 228 What items to check if they were set in the collection 229 """ 230 _items = self._items 231 if not (index is None): 232 if isinstance(index, basestring): 233 self._checkIndex(index) # process just that single index 234 return _items[index].isSet 235 else: 236 items = index # assume that we got some list 237 else: 238 items = self._items # go through all the items 239 240 for index in items: 241 self._checkIndex(index) 242 if _items[index].isSet: 243 return True 244 return False
245 246
247 - def whichSet(self):
248 """Return list of indexes which were set""" 249 result = [] 250 # go through all members and if any isSet -- return True 251 for index,v in self._items.iteritems(): 252 if v.isSet: 253 result.append(index) 254 return result
255 256
257 - def _checkIndex(self, index):
258 """Verify that given `index` is a known/registered state. 259 260 :Raise `KeyError`: if given `index` is not known 261 """ 262 # OPT: lets not reuse isKnown, to don't incure 1 more function 263 # call 264 if not self._items.has_key(index): 265 raise KeyError, \ 266 "%s of %s has no key '%s' registered" \ 267 % (self.__class__.__name__, 268 self.__owner.__class__.__name__, 269 index)
270 271
272 - def add(self, item):
273 """Add a new CollectableAttribute to the collection 274 275 :Parameters: 276 item : CollectableAttribute 277 or of derived class. Must have 'name' assigned 278 279 TODO: we should make it stricter to don't add smth of 280 wrong type into Collection since it might lead to problems 281 282 Also we might convert to __setitem__ 283 """ 284 if not isinstance(item, CollectableAttribute): 285 raise ValueError, \ 286 "Collection can add only instances of " + \ 287 "CollectableAttribute-derived classes. Got %s" % `item` 288 if item.name is None: 289 raise ValueError, \ 290 "CollectableAttribute to be added %s must have 'name' set" % \ 291 item 292 self._items[item.name] = item 293 294 if not self.owner is None: 295 self._updateOwner(item.name)
296 297
298 - def remove(self, index):
299 """Remove item from the collection 300 """ 301 self._checkIndex(index) 302 self._updateOwner(index, register=False) 303 discard = self._items.pop(index)
304 305
306 - def __getattribute__(self, index):
307 """ 308 """ 309 #return all private and protected ones first since we will not have 310 # collectable's with _ (we should not have!) 311 if index[0] == '_': 312 return _object_getattribute(self, index) 313 _items = _object_getattribute(self, '_items') 314 if index in _items: 315 return _items[index].value 316 return _object_getattribute(self, index)
317 318
319 - def __setattr__(self, index, value):
320 if index[0] == '_': 321 return _object_setattr(self, index, value) 322 _items = _object_getattribute(self, '_items') 323 if index in _items: 324 _items[index].value = value 325 else: 326 _object_setattr(self, index, value)
327 328
329 - def __getitem__(self, index):
330 _items = _object_getattribute(self, '_items') 331 if index in _items: 332 self._checkIndex(index) 333 return _items[index] 334 else: 335 raise AttributeError("State collection %s has no %s attribute" 336 % (self, index))
337 338 339 # Probably not needed -- enable if need arises 340 # 341 #def __setattr__(self, index, value): 342 # if self._items.has_key(index): 343 # self._updateOwner(index, register=False) 344 # self._items[index] = value 345 # self._updateOwner(index, register=True) 346 # 347 # _object_setattr(self, index, value) 348 349
350 - def getvalue(self, index):
351 """Returns the value by index""" 352 self._checkIndex(index) 353 return self._items[index].value
354 355
356 - def get(self, index, default):
357 """Access the value by a given index. 358 359 Mimiquing regular dictionary behavior, if value cannot be obtained 360 (i.e. if any exception is caught) return default value. 361 """ 362 try: 363 return self[index].value 364 except Exception, e: 365 #if default is not None: 366 return default
367 #else: 368 # raise e 369 370
371 - def setvalue(self, index, value):
372 """Sets the value by index""" 373 self._checkIndex(index) 374 self._items[index].value = value
375 376
377 - def _action(self, index, func, missingok=False, **kwargs):
378 """Run specific func either on a single item or on all of them 379 380 :Parameters: 381 index : basestr 382 Name of the state variable 383 func 384 Function (not bound) to call given an item, and **kwargs 385 missingok : bool 386 If True - do not complain about wrong index 387 """ 388 if isinstance(index, basestring): 389 if index.upper() == 'ALL': 390 for index_ in self._items: 391 self._action(index_, func, missingok=missingok, **kwargs) 392 else: 393 try: 394 self._checkIndex(index) 395 func(self._items[index], **kwargs) 396 except: 397 if missingok: 398 return 399 raise 400 elif operator.isSequenceType(index): 401 for item in index: 402 self._action(item, func, missingok=missingok, **kwargs) 403 else: 404 raise ValueError, \ 405 "Don't know how to handle variable given by %s" % index
406 407
408 - def reset(self, index=None):
409 """Reset the state variable defined by `index`""" 410 411 if not index is None: 412 indexes = [ index ] 413 else: 414 indexes = self.names 415 416 if len(self.items): 417 for index in indexes: 418 # XXX Check if that works as desired 419 self._action(index, self._items.values()[0].__class__.reset, 420 missingok=False)
421 422
423 - def _getListing(self):
424 """Return a list of registered states along with the documentation""" 425 426 # lets assure consistent litsting order 427 items = self._items.items() 428 items.sort() 429 return [ "%s%s%s: %s" % (_def_sep, str(x[1]), _def_sep, x[1].__doc__) 430 for x in items ]
431 432
433 - def _getNames(self):
434 """Return ids for all registered state variables""" 435 return self._items.keys()
436 437
438 - def _getOwner(self):
439 return self.__owner
440 441
442 - def _setOwner(self, owner):
443 if not isinstance(owner, ClassWithCollections): 444 raise ValueError, \ 445 "Owner of the StateCollection must be ClassWithCollections object" 446 if __debug__: 447 try: strowner = str(owner) 448 except: strowner = "UNDEF: <%s#%s>" % (owner.__class__, id(owner)) 449 debug("ST", "Setting owner for %s to be %s" % (self, strowner)) 450 if not self.__owner is None: 451 # Remove attributes which were registered to that owner previousely 452 self._updateOwner(register=False) 453 self.__owner = owner 454 if not self.__owner is None: 455 self._updateOwner(register=True)
456 457
458 - def _updateOwner(self, index=None, register=True):
459 """Define an entry within owner's __dict__ 460 so ipython could easily complete it 461 462 :Parameters: 463 index : basestring or list of basestring 464 Name of the attribute. If None -- all known get registered 465 register : bool 466 Register if True or unregister if False 467 468 XXX Needs refactoring since we duplicate the logic of expansion of 469 index value 470 """ 471 if not index is None: 472 if not index in self._items: 473 raise ValueError, \ 474 "Attribute %s is not known to %s" % (index, self) 475 indexes = [ index ] 476 else: 477 indexes = self.names 478 479 ownerdict = self.owner.__dict__ 480 selfdict = self.__dict__ 481 owner_known = ownerdict['_known_attribs'] 482 for index_ in indexes: 483 if register: 484 if index_ in ownerdict: 485 raise RuntimeError, \ 486 "Cannot register attribute %s within %s " % \ 487 (index_, self.owner) + "since it has one already" 488 ownerdict[index_] = self._items[index_] 489 if index_ in selfdict: 490 raise RuntimeError, \ 491 "Cannot register attribute %s within %s " % \ 492 (index_, self) + "since it has one already" 493 selfdict[index_] = self._items[index_] 494 owner_known[index_] = self.__name 495 else: 496 if index_ in ownerdict: 497 # yoh doesn't think that we need to complain if False 498 ownerdict.pop(index_) 499 owner_known.pop(index_) 500 if index_ in selfdict: 501 selfdict.pop(index_)
502 503 504 # Properties 505 names = property(fget=_getNames) 506 items = property(fget=lambda x:x._items) 507 owner = property(fget=_getOwner, fset=_setOwner) 508 name = property(fget=lambda x:x.__name, fset=_setName) 509 510 # Virtual properties 511 listing = VProperty(fget=_getListing)
512 513 514
515 -class ParameterCollection(Collection):
516 """Container of Parameters for a stateful object. 517 """ 518 519 # def __init__(self, items=None, owner=None, name=None): 520 # """Initialize the state variables of a derived class 521 # 522 # :Parameters: 523 # items : dict 524 # dictionary of states 525 # """ 526 # Collection.__init__(self, items, owner, name) 527 # 528
529 - def _cls_repr(self):
530 """Part of __repr__ for the owner object 531 """ 532 prefixes = [] 533 for k in self.names: 534 # list only params with not default values 535 if self[k].isDefault: 536 continue 537 prefixes.append("%s=%s" % (k, self[k].value)) 538 return prefixes
539 540
541 - def resetvalue(self, index, missingok=False):
542 """Reset all parameters to default values""" 543 from param import Parameter 544 self._action(index, Parameter.resetvalue, missingok=False)
545 546
547 -class SampleAttributesCollection(Collection):
548 """Container for data and attributes of samples (ie data/labels/chunks/...) 549 """ 550 551 # def __init__(self, items=None, owner=None, name=None): 552 # """Initialize the state variables of a derived class 553 # 554 # :Parameters: 555 # items : dict 556 # dictionary of states 557 # """ 558 # Collection.__init__(self, items, owner, name) 559 # 560
561 - def _cls_repr(self):
562 """Part of __repr__ for the owner object 563 """ 564 return [] # TODO: return I guess samples/labels/chunks
565 566 567
568 -class StateCollection(Collection):
569 """Container of StateVariables for a stateful object. 570 571 :Groups: 572 - `Public Access Functions`: `isKnown`, `isEnabled`, `isActive` 573 - `Access Implementors`: `_getListing`, `_getNames`, `_getEnabled` 574 - `Mutators`: `__init__`, `enable`, `disable`, `_setEnabled` 575 - `R/O Properties`: `listing`, `names`, `items` 576 - `R/W Properties`: `enabled` 577 """ 578
579 - def __init__(self, items=None, owner=None):
580 """Initialize the state variables of a derived class 581 582 :Parameters: 583 items : dict 584 dictionary of states 585 owner : ClassWithCollections 586 object which owns the collection 587 name : basestring 588 literal description. Usually just attribute name for the 589 collection, e.g. 'states' 590 """ 591 Collection.__init__(self, items=items, owner=owner) 592 593 self.__storedTemporarily = [] 594 """List to contain sets of enabled states which were enabled 595 temporarily. 596 """
597 598 # 599 # XXX TODO: figure out if there is a way to define proper 600 # __copy__'s for a hierarchy of classes. Probably we had 601 # to define __getinitargs__, etc... read more... 602 # 603 #def __copy__(self): 604
605 - def _cls_repr(self):
606 """Part of __repr__ for the owner object 607 """ 608 prefixes = [] 609 for name, invert in ( ('enable', False), ('disable', True) ): 610 states = self._getEnabled(nondefault=False, 611 invert=invert) 612 if len(states): 613 prefixes.append("%s_states=%s" % (name, str(states))) 614 return prefixes
615 616
617 - def _is_initializable(self, index):
618 """Checks if index could be assigned within collection via 619 setvalue 620 """ 621 return index in ['enable_states', 'disable_states']
622 623
624 - def _initialize(self, index, value):
625 if value is None: 626 value = [] 627 if index == 'enable_states': 628 self.enable(value, missingok=True) 629 elif index == 'disable_states': 630 self.disable(value) 631 else: 632 raise ValueError, "StateCollection can accept only enable_states " \ 633 "and disable_states arguments for the initialization. " \ 634 "Got %s" % index
635 636
637 - def _copy_states_(self, fromstate, index=None, deep=False):
638 """Copy known here states from `fromstate` object into current object 639 640 :Parameters: 641 fromstate : Collection or ClassWithCollections 642 Source states to copy from 643 index : None or list of basestring 644 If not to copy all set state variables, index provides 645 selection of what to copy 646 deep : bool 647 Optional control over the way to copy 648 649 Crafted to overcome a problem mentioned above in the comment 650 and is to be called from __copy__ of derived classes 651 652 Probably sooner than later will get proper __getstate__, 653 __setstate__ 654 """ 655 # Bad check... doesn't generalize well... 656 # if not issubclass(fromstate.__class__, self.__class__): 657 # raise ValueError, \ 658 # "Class %s is not subclass of %s, " % \ 659 # (fromstate.__class__, self.__class__) + \ 660 # "thus not eligible for _copy_states_" 661 # TODO: FOR NOW NO TEST! But this beast needs to be fixed... 662 operation = { True: copy.deepcopy, 663 False: copy.copy }[deep] 664 665 if isinstance(fromstate, ClassWithCollections): 666 fromstate = fromstate.states 667 668 #self.enabled = fromstate.enabled 669 _items, from_items = self._items, fromstate._items 670 if index is None: 671 # copy all set ones 672 for name in fromstate.whichSet():#self.names: 673 #if fromstate.isKnown(name): 674 _items[name] = operation(from_items[name]) 675 else: 676 isKnown = fromstate.isKnown 677 for name in index: 678 if isKnown(name): 679 _items[name] = operation(from_items[name])
680 681
682 - def isEnabled(self, index):
683 """Returns `True` if state `index` is enabled""" 684 self._checkIndex(index) 685 return self._items[index].isEnabled
686 687
688 - def isActive(self, index):
689 """Returns `True` if state `index` is known and is enabled""" 690 return self.isKnown(index) and self.isEnabled(index)
691 692
693 - def enable(self, index, value=True, missingok=False):
694 """Enable state variable given in `index`""" 695 self._action(index, StateVariable.enable, missingok=missingok, 696 value=value)
697 698
699 - def disable(self, index):
700 """Disable state variable defined by `index` id""" 701 self._action(index, StateVariable.enable, missingok=False, value=False)
702 703 704 # TODO XXX think about some more generic way to grab temporary 705 # snapshot of CollectableAttributes to be restored later on...
706 - def _changeTemporarily(self, enable_states=None, 707 disable_states=None, other=None):
708 """Temporarily enable/disable needed states for computation 709 710 Enable or disable states which are enabled in `other` and listed in 711 `enable _states`. Use `resetEnabledTemporarily` to reset 712 to previous state of enabled. 713 714 `other` can be a ClassWithCollections object or StateCollection 715 """ 716 if enable_states == None: 717 enable_states = [] 718 if disable_states == None: 719 disable_states = [] 720 self.__storedTemporarily.append(self.enabled) 721 other_ = other 722 if isinstance(other, ClassWithCollections): 723 other = other.states 724 725 if not other is None: 726 # lets take states which are enabled in other but not in 727 # self 728 add_enable_states = list(Set(other.enabled).difference( 729 Set(enable_states)).intersection(self.names)) 730 if len(add_enable_states)>0: 731 if __debug__: 732 debug("ST", 733 "Adding states %s from %s to be enabled temporarily" % 734 (add_enable_states, other_) + 735 " since they are not enabled in %s" % 736 (self)) 737 enable_states += add_enable_states 738 739 # Lets go one by one enabling only disabled once... but could be as 740 # simple as 741 self.enable(enable_states) 742 self.disable(disable_states)
743 744
745 - def _resetEnabledTemporarily(self):
746 """Reset to previousely stored set of enabled states""" 747 if __debug__: 748 debug("ST", "Resetting to previous set of enabled states") 749 if len(self.enabled)>0: 750 self.enabled = self.__storedTemporarily.pop() 751 else: 752 raise ValueError("Trying to restore not-stored list of enabled " \ 753 "states")
754 755
756 - def _getEnabled(self, nondefault=True, invert=False):
757 """Return list of enabled states 758 759 :Parameters: 760 nondefault : bool 761 Either to return also states which are enabled simply by default 762 invert : bool 763 Would invert the meaning, ie would return disabled states 764 """ 765 if invert: 766 fmatch = lambda y: not self.isEnabled(y) 767 else: 768 fmatch = lambda y: self.isEnabled(y) 769 770 if nondefault: 771 ffunc = fmatch 772 else: 773 ffunc = lambda y: fmatch(y) and \ 774 self._items[y]._defaultenabled != self.isEnabled(y) 775 return filter(ffunc, self.names)
776 777
778 - def _setEnabled(self, indexlist):
779 """Given `indexlist` make only those in the list enabled 780 781 It might be handy to store set of enabled states and then to restore 782 it later on. It can be easily accomplished now:: 783 784 >>> from mvpa.misc.state import ClassWithCollections, StateVariable 785 >>> class Blah(ClassWithCollections): 786 ... bleh = StateVariable(enabled=False, doc='Example') 787 ... 788 >>> blah = Blah() 789 >>> states_enabled = blah.states.enabled 790 >>> blah.states.enabled = ['bleh'] 791 >>> blah.states.enabled = states_enabled 792 """ 793 for index in self._items.keys(): 794 self.enable(index, index in indexlist)
795 796 797 # Properties 798 enabled = property(fget=_getEnabled, fset=_setEnabled)
799 800 801 ################################################################## 802 # Base classes (and metaclass) which use collections 803 # 804 805 806 # 807 # Helper dictionaries for AttributesCollector 808 # 809 _known_collections = { 810 # Quite a generic one but mostly in classifiers 811 'StateVariable': ("states", StateCollection), 812 # For classifiers only 813 'Parameter': ("params", ParameterCollection), 814 'KernelParameter': ("kernel_params", ParameterCollection), 815 # For datasets 816 # XXX custom collections needed? 817 'SampleAttribute': ("sa", SampleAttributesCollection), 818 'FeatureAttribute': ("fa", SampleAttributesCollection), 819 'DatasetAttribute': ("dsa", SampleAttributesCollection), 820 } 821 822 823 _col2class = dict(_known_collections.values()) 824 """Mapping from collection name into Collection class""" 825 826 827 _COLLECTIONS_ORDER = ['sa', 'fa', 'dsa', 828 'params', 'kernel_params', 'states'] 829 830
831 -class AttributesCollector(type):
832 """Intended to collect and compose StateCollection for any child 833 class of this metaclass 834 """ 835 836
837 - def __init__(cls, name, bases, dict):
838 839 if __debug__: 840 debug( 841 "COLR", 842 "AttributesCollector call for %s.%s, where bases=%s, dict=%s " \ 843 % (cls, name, bases, dict)) 844 845 super(AttributesCollector, cls).__init__(name, bases, dict) 846 847 collections = {} 848 for name, value in dict.iteritems(): 849 if isinstance(value, CollectableAttribute): 850 baseclassname = value.__class__.__name__ 851 col = _known_collections[baseclassname][0] 852 # XXX should we allow to throw exceptions here? 853 if not collections.has_key(col): 854 collections[col] = {} 855 collections[col][name] = value 856 # and assign name if not yet was set 857 if value.name is None: 858 value.name = name 859 860 # XXX can we first collect parent's states and then populate with ours? 861 # TODO 862 863 for base in bases: 864 if hasattr(base, "__metaclass__") and \ 865 base.__metaclass__ == AttributesCollector: 866 # TODO take care about overriding one from super class 867 # for state in base.states: 868 # if state[0] = 869 newcollections = base._collections_template 870 if len(newcollections) == 0: 871 continue 872 if __debug__: 873 debug("COLR", 874 "Collect collections %s for %s from %s" % 875 (newcollections, cls, base)) 876 for col, collection in newcollections.iteritems(): 877 newitems = collection.items 878 if collections.has_key(col): 879 collections[col].update(newitems) 880 else: 881 collections[col] = newitems 882 883 884 if __debug__: 885 debug("COLR", 886 "Creating StateCollection template %s with collections %s" 887 % (cls, collections.keys())) 888 889 # if there is an explicit 890 if hasattr(cls, "_ATTRIBUTE_COLLECTIONS"): 891 for col in cls._ATTRIBUTE_COLLECTIONS: 892 if not col in _col2class: 893 raise ValueError, \ 894 "Requested collection %s is unknown to collector" % \ 895 col 896 if not col in collections: 897 collections[col] = None 898 899 # TODO: check on conflict in names of Collections' items! since 900 # otherwise even order is not definite since we use dict for 901 # collections. 902 # XXX should we switch to tuple? 903 904 for col, colitems in collections.iteritems(): 905 collections[col] = _col2class[col](colitems) 906 907 setattr(cls, "_collections_template", collections) 908 909 # 910 # Expand documentation for the class based on the listed 911 # parameters an if it is stateful 912 # 913 # TODO -- figure nice way on how to alter __init__ doc directly... 914 textwrapper = TextWrapper(subsequent_indent=" ", 915 initial_indent=" ", 916 width=70) 917 918 # Parameters 919 paramsdoc = "" 920 paramscols = [] 921 for col in ('params', 'kernel_params'): 922 if collections.has_key(col): 923 paramscols.append(col) 924 # lets at least sort the parameters for consistent output 925 col_items = collections[col].items 926 params = [(v._instance_index, k) for k,v in col_items.iteritems()] 927 params.sort() 928 paramsdoc += '\n'.join( 929 [col_items[param].doc(indent=' ') 930 for index,param in params]) + '\n' 931 932 # Parameters collection could be taked hash of to decide if 933 # any were changed? XXX may be not needed at all? 934 setattr(cls, "_paramscols", paramscols) 935 936 # States doc 937 statesdoc = "" 938 if collections.has_key('states'): 939 paramsdoc += """ enable_states : None or list of basestring 940 Names of the state variables which should be enabled additionally 941 to default ones 942 disable_states : None or list of basestring 943 Names of the state variables which should be disabled 944 """ 945 statesdoc = " * " 946 statesdoc += '\n * '.join(collections['states'].listing) 947 statesdoc += "\n\n(States enabled by default are listed with `+`)" 948 if __debug__: 949 debug("COLR", "Assigning __statesdoc to be %s" % statesdoc) 950 setattr(cls, "_statesdoc", statesdoc) 951 952 if paramsdoc != "": 953 if __debug__ and 'COLR' in debug.active: 954 debug("COLR", "Assigning __paramsdoc to be %s" % paramsdoc) 955 setattr(cls, "_paramsdoc", paramsdoc) 956 957 if paramsdoc + statesdoc != "": 958 cls.__doc__ = enhancedDocString(cls, *bases)
959 960 961
962 -class ClassWithCollections(object):
963 """Base class for objects which contain any known collection 964 965 Classes inherited from this class gain ability to access 966 collections and their items as simple attributes. Access to 967 collection items "internals" is done via <collection_name> attribute 968 and interface of a corresponding `Collection`. 969 """ 970 971 _DEV__doc__ = """ 972 TODO: rename 'descr'? -- it should simply 973 be 'doc' -- no need to drag classes docstring imho. 974 """ 975 976 __metaclass__ = AttributesCollector 977
978 - def __new__(cls, *args, **kwargs):
979 """Initialize ClassWithCollections object 980 981 :Parameters: 982 descr : basestring 983 Description of the instance 984 """ 985 self = super(ClassWithCollections, cls).__new__(cls) 986 987 s__dict__ = self.__dict__ 988 989 # init variable 990 # XXX: Added as pylint complained (rightfully) -- not sure if false 991 # is the proper default 992 self.__params_set = False 993 994 # need to check to avoid override of enabled states in the case 995 # of multiple inheritance, like both ClassWithCollectionsl and Harvestable 996 if not s__dict__.has_key('_collections'): 997 s__class__ = self.__class__ 998 999 collections = copy.deepcopy(s__class__._collections_template) 1000 s__dict__['_collections'] = collections 1001 s__dict__['_known_attribs'] = {} 1002 """Dictionary to contain 'links' to the collections from each 1003 known attribute. Is used to gain some speed up in lookup within 1004 __getattribute__ and __setattr__ 1005 """ 1006 1007 # Assign owner to all collections 1008 for col, collection in collections.iteritems(): 1009 if col in s__dict__: 1010 raise ValueError, \ 1011 "Object %s has already attribute %s" % \ 1012 (self, col) 1013 s__dict__[col] = collection 1014 collection.name = col 1015 collection.owner = self 1016 1017 self.__params_set = False 1018 1019 if __debug__: 1020 descr = kwargs.get('descr', None) 1021 debug("COL", "ClassWithCollections.__new__ was done " 1022 "for %s#%s with descr=%s" \ 1023 % (s__class__.__name__, id(self), descr)) 1024 1025 return self
1026 1027
1028 - def __init__(self, descr=None, **kwargs):
1029 1030 if not self.__params_set: 1031 self.__descr = descr 1032 """Set humane description for the object""" 1033 1034 # To avoid double initialization in case of multiple inheritance 1035 self.__params_set = True 1036 1037 collections = self._collections 1038 # Assign attributes values if they are given among 1039 # **kwargs 1040 for arg, argument in kwargs.items(): 1041 set = False 1042 for collection in collections.itervalues(): 1043 if collection._is_initializable(arg): 1044 collection._initialize(arg, argument) 1045 set = True 1046 break 1047 if set: 1048 trash = kwargs.pop(arg) 1049 else: 1050 known_params = reduce( 1051 lambda x,y:x+y, 1052 [x.items.keys() for x in collections.itervalues()], []) 1053 raise TypeError, \ 1054 "Unexpected keyword argument %s=%s for %s." \ 1055 % (arg, argument, self) \ 1056 + " Valid parameters are %s" % known_params 1057 1058 ## Initialize other base classes 1059 ## commented out since it seems to be of no use for now 1060 #if init_classes is not None: 1061 # # return back stateful arguments since they might be 1062 # # processed by underlying classes 1063 # kwargs.update(kwargs_stateful) 1064 # for cls in init_classes: 1065 # cls.__init__(self, **kwargs) 1066 #else: 1067 # if len(kwargs)>0: 1068 # known_params = reduce(lambda x, y: x + y, \ 1069 # [x.items.keys() for x in collections], 1070 # []) 1071 # raise TypeError, \ 1072 # "Unknown parameters %s for %s." % (kwargs.keys(), 1073 # self) \ 1074 # + " Valid parameters are %s" % known_params 1075 if __debug__: 1076 debug("COL", "ClassWithCollections.__init__ was done " 1077 "for %s#%s with descr=%s" \ 1078 % (self.__class__.__name__, id(self), descr))
1079 1080 1081 #__doc__ = enhancedDocString('ClassWithCollections', locals()) 1082 1083
1084 - def __getattribute__(self, index):
1085 # return all private ones first since smth like __dict__ might be 1086 # queried by copy before instance is __init__ed 1087 if index[0] == '_': 1088 return _object_getattribute(self, index) 1089 1090 s_dict = _object_getattribute(self, '__dict__') 1091 # check if it is a known collection 1092 collections = s_dict['_collections'] 1093 if index in collections: 1094 return collections[index] 1095 1096 # check if it is a part of any collection 1097 known_attribs = s_dict['_known_attribs'] 1098 if index in known_attribs: 1099 return collections[known_attribs[index]].getvalue(index) 1100 1101 # just a generic return 1102 return _object_getattribute(self, index)
1103 1104
1105 - def __setattr__(self, index, value):
1106 if index[0] == '_': 1107 return _object_setattr(self, index, value) 1108 1109 # Check if a part of a collection, and set appropriately 1110 s_dict = _object_getattribute(self, '__dict__') 1111 known_attribs = s_dict['_known_attribs'] 1112 if index in known_attribs: 1113 collections = s_dict['_collections'] 1114 return collections[known_attribs[index]].setvalue(index, value) 1115 1116 # Generic setattr 1117 return _object_setattr(self, index, value)
1118 1119 1120 # XXX not sure if we shouldn't implement anything else...
1121 - def reset(self):
1122 for collection in self._collections.values(): 1123 collection.reset()
1124 1125
1126 - def __str__(self):
1127 s = "%s:" % (self.__class__.__name__) 1128 if self.__descr is not None: 1129 s += "/%s " % self.__descr 1130 if hasattr(self, "_collections"): 1131 for col, collection in self._collections.iteritems(): 1132 s += " %d %s:%s" % (len(collection.items), col, str(collection)) 1133 return s
1134 1135
1136 - def __repr__(self, prefixes=None, fullname=False):
1137 """String definition of the object of ClassWithCollections object 1138 1139 :Parameters: 1140 fullname : bool 1141 Either to include full name of the module 1142 prefixes : list of strings 1143 What other prefixes to prepend to list of arguments 1144 """ 1145 if prefixes is None: 1146 prefixes = [] 1147 prefixes = prefixes[:] # copy list 1148 id_str = "" 1149 module_str = "" 1150 if __debug__: 1151 if 'MODULE_IN_REPR' in debug.active: 1152 fullname = True 1153 if 'ID_IN_REPR' in debug.active: 1154 id_str = '#%s' % id(self) 1155 1156 if fullname: 1157 modulename = '%s' % self.__class__.__module__ 1158 if modulename != "__main__": 1159 module_str = "%s." % modulename 1160 1161 # Collections' attributes 1162 collections = self._collections 1163 # we want them in this particular order 1164 for col in _COLLECTIONS_ORDER: 1165 collection = collections.get(col, None) 1166 if collection is None: 1167 continue 1168 prefixes += collection._cls_repr() 1169 1170 # Description if present 1171 descr = self.__descr 1172 if descr is not None: 1173 prefixes.append("descr=%s" % repr(descr)) 1174 1175 return "%s%s(%s)%s" % (module_str, self.__class__.__name__, 1176 ', '.join(prefixes), id_str)
1177 1178 1179 descr = property(lambda self: self.__descr, 1180 doc="Description of the object if any")
1181 1182 1183
1184 -class Harvestable(ClassWithCollections):
1185 """Classes inherited from this class intend to collect attributes 1186 within internal processing. 1187 1188 Subclassing Harvestable we gain ability to collect any internal 1189 data from the processing which is especially important if an 1190 object performs something in loop and discards some intermidiate 1191 possibly interesting results (like in case of 1192 CrossValidatedTransferError and states of the trained classifier 1193 or TransferError). 1194 1195 """ 1196 1197 harvested = StateVariable(enabled=False, doc= 1198 """Store specified attributes of classifiers at each split""") 1199 1200 _KNOWN_COPY_METHODS = [ None, 'copy', 'deepcopy' ] 1201 1202
1203 - def __init__(self, harvest_attribs=None, copy_attribs='copy', **kwargs):
1204 """Initialize state of harvestable 1205 1206 :Parameters: 1207 harvest_attribs : list of basestr or dicts 1208 What attributes of call to store and return within 1209 harvested state variable. If an item is a dictionary, 1210 following keys are used ['name', 'copy'] 1211 copy_attribs : None or basestr 1212 Default copying. If None -- no copying, 'copy' 1213 - shallow copying, 'deepcopy' -- deepcopying 1214 1215 """ 1216 ClassWithCollections.__init__(self, **kwargs) 1217 1218 self.__atribs = harvest_attribs 1219 self.__copy_attribs = copy_attribs 1220 1221 self._setAttribs(harvest_attribs)
1222 1223
1224 - def _setAttribs(self, attribs):
1225 """Set attributes to harvest 1226 1227 Each attribute in self.__attribs must have following fields 1228 - name : functional (or arbitrary if 'obj' or 'attr' is set) 1229 description of the thing to harvest, 1230 e.g. 'transerror.clf.training_time' 1231 - obj : name of the object to harvest from (if empty, 1232 'self' is assumed), 1233 e.g 'transerror' 1234 - attr : attribute of 'obj' to harvest, 1235 e.g. 'clf.training_time' 1236 - copy : None, 'copy' or 'deepcopy' - way to copy attribute 1237 """ 1238 if attribs: 1239 # force the state 1240 self.states.enable('harvested') 1241 self.__attribs = [] 1242 for i, attrib in enumerate(attribs): 1243 if isinstance(attrib, dict): 1244 if not 'name' in attrib: 1245 raise ValueError, \ 1246 "Harvestable: attribute must be a string or " + \ 1247 "a dictionary with 'name'" 1248 else: 1249 attrib = {'name': attrib} 1250 1251 # assign default method to copy 1252 if not 'copy' in attrib: 1253 attrib['copy'] = self.__copy_attribs 1254 1255 # check copy method 1256 if not attrib['copy'] in self._KNOWN_COPY_METHODS: 1257 raise ValueError, "Unknown method %s. Known are %s" % \ 1258 (attrib['copy'], self._KNOWN_COPY_METHODS) 1259 1260 if not ('obj' in attrib or 'attr' in attrib): 1261 # Process the item to harvest 1262 # split into obj, attr. If obj is empty, then assume self 1263 split = attrib['name'].split('.', 1) 1264 if len(split)==1: 1265 obj, attr = split[0], None 1266 else: 1267 obj, attr = split 1268 attrib.update({'obj':obj, 'attr':attr}) 1269 1270 if attrib['obj'] == '': 1271 attrib['obj'] = 'self' 1272 1273 # TODO: may be enabling of the states?? 1274 1275 self.__attribs.append(attrib) # place value back 1276 else: 1277 # just to make sure it is not None or 0 1278 self.__attribs = []
1279 1280
1281 - def _harvest(self, vars):
1282 """The harvesting function: must obtain dictionary of variables 1283 from the caller. 1284 1285 :Parameters: 1286 vars : dict 1287 Dictionary of available data. Most often locals() could be 1288 passed as `vars`. Mention that desired to be harvested 1289 private attributes better be bound locally to some variable 1290 1291 :Returns: 1292 nothing 1293 """ 1294 1295 if not self.states.isEnabled('harvested') or len(self.__attribs)==0: 1296 return 1297 1298 if not self.states.isSet('harvested'): 1299 self.harvested = dict([(a['name'], []) for a in self.__attribs]) 1300 1301 for attrib in self.__attribs: 1302 attrv = vars[attrib['obj']] 1303 1304 # access particular attribute if needed 1305 if not attrib['attr'] is None: 1306 attrv = eval('attrv.%s' % attrib['attr']) 1307 1308 # copy the value if needed 1309 attrv = {'copy':copy.copy, 1310 'deepcopy':copy.deepcopy, 1311 None:lambda x:x}[attrib['copy']](attrv) 1312 1313 self.harvested[attrib['name']].append(attrv)
1314 1315 1316 harvest_attribs = property(fget=lambda self:self.__attribs, 1317 fset=_setAttribs)
1318