Package Bio :: Package Phylo :: Module BaseTree
[hide private]
[frames] | no frames]

Source Code for Module Bio.Phylo.BaseTree

   1  # Copyright (C) 2009 by Eric Talevich (eric.talevich@gmail.com) 
   2  # This code is part of the Biopython distribution and governed by its 
   3  # license. Please see the LICENSE file that should have been included 
   4  # as part of this package. 
   5   
   6  """Base classes for Bio.Phylo objects. 
   7   
   8  All object representations for phylogenetic trees should derive from these base 
   9  classes in order to use the common methods defined on them. 
  10  """ 
  11  __docformat__ = "restructuredtext en" 
  12   
  13  import collections 
  14  import copy 
  15  import itertools 
  16  import random 
  17  import re 
  18   
  19  from Bio import _utils 
20 21 22 # General tree-traversal algorithms 23 24 -def _level_traverse(root, get_children):
25 """Traverse a tree in breadth-first (level) order.""" 26 Q = collections.deque([root]) 27 while Q: 28 v = Q.popleft() 29 yield v 30 Q.extend(get_children(v))
31
32 33 -def _preorder_traverse(root, get_children):
34 """Traverse a tree in depth-first pre-order (parent before children).""" 35 def dfs(elem): 36 yield elem 37 for v in get_children(elem): 38 for u in dfs(v): 39 yield u
40 for elem in dfs(root): 41 yield elem 42
43 44 -def _postorder_traverse(root, get_children):
45 """Traverse a tree in depth-first post-order (children before parent).""" 46 def dfs(elem): 47 for v in get_children(elem): 48 for u in dfs(v): 49 yield u 50 yield elem
51 for elem in dfs(root): 52 yield elem 53
54 55 -def _sorted_attrs(elem):
56 """Get a flat list of elem's attributes, sorted for consistency.""" 57 singles = [] 58 lists = [] 59 # Sort attributes for consistent results 60 for attrname, child in sorted(elem.__dict__.iteritems(), 61 key=lambda kv: kv[0]): 62 if child is None: 63 continue 64 if isinstance(child, list): 65 lists.extend(child) 66 else: 67 singles.append(child) 68 return (x for x in singles + lists 69 if isinstance(x, TreeElement))
70
71 72 # Factory functions to generalize searching for clades/nodes 73 74 -def _identity_matcher(target):
75 """Match a node to the target object by identity.""" 76 def match(node): 77 return (node is target)
78 return match 79
80 81 -def _class_matcher(target_cls):
82 """Match a node if it's an instance of the given class.""" 83 def match(node): 84 return isinstance(node, target_cls)
85 return match 86
87 88 -def _string_matcher(target):
89 def match(node): 90 return unicode(node) == target
91 return match 92
93 94 -def _attribute_matcher(kwargs):
95 """Match a node by specified attribute values. 96 97 ``terminal`` is a special case: True restricts the search to external (leaf) 98 nodes, False restricts to internal nodes, and None allows all tree elements 99 to be searched, including phyloXML annotations. 100 101 Otherwise, for a tree element to match the specification (i.e. for the 102 function produced by `_attribute_matcher` to return True when given a tree 103 element), it must have each of the attributes specified by the keys and 104 match each of the corresponding values -- think 'and', not 'or', for 105 multiple keys. 106 """ 107 def match(node): 108 if 'terminal' in kwargs: 109 # Special case: restrict to internal/external/any nodes 110 kwa_copy = kwargs.copy() 111 pattern = kwa_copy.pop('terminal') 112 if (pattern is not None and 113 (not hasattr(node, 'is_terminal') or 114 node.is_terminal() != pattern)): 115 return False 116 else: 117 kwa_copy = kwargs 118 for key, pattern in kwa_copy.iteritems(): 119 # Nodes must match all other specified attributes 120 if not hasattr(node, key): 121 return False 122 target = getattr(node, key) 123 if isinstance(pattern, basestring): 124 return (isinstance(target, basestring) and 125 re.match(pattern+'$', target)) 126 if isinstance(pattern, bool): 127 return (pattern == bool(target)) 128 if isinstance(pattern, int): 129 return (pattern == target) 130 if pattern is None: 131 return (target is None) 132 raise TypeError('invalid query type: %s' % type(pattern)) 133 return True
134 return match 135
136 137 -def _function_matcher(matcher_func):
138 """Safer attribute lookup -- returns False instead of raising an error.""" 139 def match(node): 140 try: 141 return matcher_func(node) 142 except (LookupError, AttributeError, ValueError, TypeError): 143 return False
144 return match 145
146 147 -def _object_matcher(obj):
148 """Retrieve a matcher function by passing an arbitrary object. 149 150 i.e. passing a `TreeElement` such as a `Clade` or `Tree` instance returns an 151 identity matcher, passing a type such as the `PhyloXML.Taxonomy` class 152 returns a class matcher, and passing a dictionary returns an attribute 153 matcher. 154 155 The resulting 'match' function returns True when given an object matching 156 the specification (identity, type or attribute values), otherwise False. 157 This is useful for writing functions that search the tree, and probably 158 shouldn't be used directly by the end user. 159 """ 160 if isinstance(obj, TreeElement): 161 return _identity_matcher(obj) 162 if isinstance(obj, type): 163 return _class_matcher(obj) 164 if isinstance(obj, basestring): 165 return _string_matcher(obj) 166 if isinstance(obj, dict): 167 return _attribute_matcher(obj) 168 if callable(obj): 169 return _function_matcher(obj) 170 raise ValueError("%s (type %s) is not a valid type for comparison." 171 % (obj, type(obj)))
172
173 174 -def _combine_matchers(target, kwargs, require_spec):
175 """Merge target specifications with keyword arguments. 176 177 Dispatch the components to the various matcher functions, then merge into a 178 single boolean function. 179 """ 180 if not target: 181 if not kwargs: 182 if require_spec: 183 raise ValueError("you must specify a target object or keyword " 184 "arguments.") 185 return lambda x: True 186 return _attribute_matcher(kwargs) 187 match_obj = _object_matcher(target) 188 if not kwargs: 189 return match_obj 190 match_kwargs = _attribute_matcher(kwargs) 191 return (lambda x: match_obj(x) and match_kwargs(x))
192
193 194 -def _combine_args(first, *rest):
195 """Convert ``[targets]`` or ``*targets`` arguments to a single iterable. 196 197 This helps other functions work like the built-in functions `max` and 198 `min`. 199 """ 200 # Background: is_monophyletic takes a single list or iterable (like the 201 # same method in Bio.Nexus.Trees); root_with_outgroup and common_ancestor 202 # take separate arguments. This mismatch was in the initial release and I 203 # didn't notice the inconsistency until after Biopython 1.55. I can think 204 # of cases where either style is more convenient, so let's support both 205 # (for backward compatibility and consistency between methods). 206 if hasattr(first, '__iter__') and not (isinstance(first, TreeElement) or 207 isinstance(first, type) or isinstance(first, basestring) or 208 isinstance(first, dict)): 209 # `terminals` is an iterable of targets 210 if rest: 211 raise ValueError("Arguments must be either a single list of " 212 "targets, or separately specified targets " 213 "(e.g. foo(t1, t2, t3)), but not both.") 214 return first 215 # `terminals` is a single target -- wrap in a container 216 return itertools.chain([first], rest)
217
218 219 # Class definitions 220 221 -class TreeElement(object):
222 """Base class for all Bio.Phylo classes.""" 223
224 - def __repr__(self):
225 """Show this object's constructor with its primitive arguments.""" 226 def pair_as_kwarg_string(key, val): 227 if isinstance(val, basestring): 228 return "%s='%s'" % (key, _utils.trim_str(unicode(val), 60, 229 u'...')) 230 return "%s=%s" % (key, val)
231 return u'%s(%s)' % (self.__class__.__name__, 232 ', '.join(pair_as_kwarg_string(key, val) 233 for key, val in self.__dict__.iteritems() 234 if val is not None and 235 type(val) in (str, int, float, bool, unicode) 236 ))
237 238 __str__ = __repr__ 239
240 241 -class TreeMixin(object):
242 """Methods for Tree- and Clade-based classes. 243 244 This lets `Tree` and `Clade` support the same traversal and searching 245 operations without requiring Clade to inherit from Tree, so Clade isn't 246 required to have all of Tree's attributes -- just ``root`` (a Clade 247 instance) and ``is_terminal``. 248 """ 249 # Traversal methods 250
251 - def _filter_search(self, filter_func, order, follow_attrs):
252 """Perform a BFS or DFS traversal through all elements in this tree. 253 254 :returns: generator of all elements for which `filter_func` is True. 255 """ 256 order_opts = {'preorder': _preorder_traverse, 257 'postorder': _postorder_traverse, 258 'level': _level_traverse} 259 try: 260 order_func = order_opts[order] 261 except KeyError: 262 raise ValueError("Invalid order '%s'; must be one of: %s" 263 % (order, tuple(order_opts.keys()))) 264 if follow_attrs: 265 get_children = _sorted_attrs 266 root = self 267 else: 268 get_children = lambda elem: elem.clades 269 root = self.root 270 return itertools.ifilter(filter_func, order_func(root, get_children))
271
272 - def find_any(self, *args, **kwargs):
273 """Return the first element found by find_elements(), or None. 274 275 This is also useful for checking whether any matching element exists in 276 the tree, and can be used in a conditional expression. 277 """ 278 hits = self.find_elements(*args, **kwargs) 279 try: 280 return hits.next() 281 except StopIteration: 282 return None
283
284 - def find_elements(self, target=None, terminal=None, order='preorder', 285 **kwargs):
286 """Find all tree elements matching the given attributes. 287 288 The arbitrary keyword arguments indicate the attribute name of the 289 sub-element and the value to match: string, integer or boolean. Strings 290 are evaluated as regular expression matches; integers are compared 291 directly for equality, and booleans evaluate the attribute's truth value 292 (True or False) before comparing. To handle nonzero floats, search with 293 a boolean argument, then filter the result manually. 294 295 If no keyword arguments are given, then just the class type is used for 296 matching. 297 298 The result is an iterable through all matching objects, by depth-first 299 search. (Not necessarily the same order as the elements appear in the 300 source file!) 301 302 :Parameters: 303 target : TreeElement instance, type, dict, or callable 304 Specifies the characteristics to search for. (The default, 305 TreeElement, matches any standard Bio.Phylo type.) 306 terminal : bool 307 A boolean value to select for or against terminal nodes (a.k.a. 308 leaf nodes). True searches for only terminal nodes, False 309 excludes terminal nodes, and the default, None, searches both 310 terminal and non-terminal nodes, as well as any tree elements 311 lacking the ``is_terminal`` method. 312 order : {'preorder', 'postorder', 'level'} 313 Tree traversal order: 'preorder' (default) is depth-first 314 search, 'postorder' is DFS with child nodes preceding parents, 315 and 'level' is breadth-first search. 316 317 Example 318 ------- 319 320 >>> from Bio.Phylo.IO import PhyloXMIO 321 >>> phx = PhyloXMLIO.read('phyloxml_examples.xml') 322 >>> matches = phx.phylogenies[5].find_elements(code='OCTVU') 323 >>> matches.next() 324 Taxonomy(code='OCTVU', scientific_name='Octopus vulgaris') 325 326 """ 327 if terminal is not None: 328 kwargs['terminal'] = terminal 329 is_matching_elem = _combine_matchers(target, kwargs, False) 330 return self._filter_search(is_matching_elem, order, True)
331
332 - def find_clades(self, target=None, terminal=None, order='preorder', 333 **kwargs):
334 """Find each clade containing a matching element. 335 336 That is, find each element as with find_elements(), but return the 337 corresponding clade object. (This is usually what you want.) 338 339 :returns: an iterable through all matching objects, searching 340 depth-first (preorder) by default. 341 """ 342 def match_attrs(elem): 343 orig_clades = elem.__dict__.pop('clades') 344 found = elem.find_any(target, **kwargs) 345 elem.clades = orig_clades 346 return (found is not None)
347 if terminal is None: 348 is_matching_elem = match_attrs 349 else: 350 def is_matching_elem(elem): 351 return ((elem.is_terminal() == terminal) and 352 match_attrs(elem))
353 return self._filter_search(is_matching_elem, order, False) 354
355 - def get_path(self, target=None, **kwargs):
356 """List the clades directly between this root and the given target. 357 358 :returns: list of all clade objects along this path, ending with the 359 given target, but excluding the root clade. 360 """ 361 # Only one path will work -- ignore weights and visits 362 path = [] 363 match = _combine_matchers(target, kwargs, True) 364 365 def check_in_path(v): 366 if match(v): 367 path.append(v) 368 return True 369 elif v.is_terminal(): 370 return False 371 for child in v: 372 if check_in_path(child): 373 path.append(v) 374 return True 375 return False
376 377 if not check_in_path(self.root): 378 return None 379 return path[-2::-1] 380
381 - def get_nonterminals(self, order='preorder'):
382 """Get a list of all of this tree's nonterminal (internal) nodes.""" 383 return list(self.find_clades(terminal=False, order=order))
384
385 - def get_terminals(self, order='preorder'):
386 """Get a list of all of this tree's terminal (leaf) nodes.""" 387 return list(self.find_clades(terminal=True, order=order))
388
389 - def trace(self, start, finish):
390 """List of all clade object between two targets in this tree. 391 392 Excluding `start`, including `finish`. 393 """ 394 mrca = self.common_ancestor(start, finish) 395 fromstart = mrca.get_path(start)[-2::-1] 396 to = mrca.get_path(finish) 397 return fromstart + [mrca] + to
398 399 # Information methods 400
401 - def common_ancestor(self, targets, *more_targets):
402 """Most recent common ancestor (clade) of all the given targets. 403 404 Edge cases: 405 - If no target is given, returns self.root 406 - If 1 target is given, returns the target 407 - If any target is not found in this tree, raises a ValueError 408 """ 409 paths = [self.get_path(t) 410 for t in _combine_args(targets, *more_targets)] 411 # Validation -- otherwise izip throws a spooky error below 412 for p, t in zip(paths, targets): 413 if p is None: 414 raise ValueError("target %s is not in this tree" % repr(t)) 415 mrca = self.root 416 for level in itertools.izip(*paths): 417 ref = level[0] 418 for other in level[1:]: 419 if ref is not other: 420 break 421 else: 422 mrca = ref 423 if ref is not mrca: 424 break 425 return mrca
426
427 - def count_terminals(self):
428 """Counts the number of terminal (leaf) nodes within this tree.""" 429 return _utils.iterlen(self.find_clades(terminal=True))
430
431 - def depths(self, unit_branch_lengths=False):
432 """Create a mapping of tree clades to depths (by branch length). 433 434 :Parameters: 435 unit_branch_lengths : bool 436 If True, count only the number of branches (levels in the tree). 437 By default the distance is the cumulative branch length leading 438 to the clade. 439 440 :returns: dict of {clade: depth}, where keys are all of the Clade 441 instances in the tree, and values are the distance from the root to 442 each clade (including terminals). 443 """ 444 if unit_branch_lengths: 445 depth_of = lambda c: 1 446 else: 447 depth_of = lambda c: c.branch_length or 0 448 depths = {} 449 450 def update_depths(node, curr_depth): 451 depths[node] = curr_depth 452 for child in node.clades: 453 new_depth = curr_depth + depth_of(child) 454 update_depths(child, new_depth)
455 456 update_depths(self.root, self.root.branch_length or 0) 457 return depths 458
459 - def distance(self, target1, target2=None):
460 """Calculate the sum of the branch lengths between two targets. 461 462 If only one target is specified, the other is the root of this tree. 463 """ 464 if target2 is None: 465 return sum(n.branch_length for n in self.get_path(target1) 466 if n.branch_length is not None) 467 mrca = self.common_ancestor(target1, target2) 468 return mrca.distance(target1) + mrca.distance(target2)
469
470 - def is_bifurcating(self):
471 """Return True if tree downstream of node is strictly bifurcating. 472 473 I.e., all nodes have either 2 or 0 children (internal or external, 474 respectively). The root may have 3 descendents and still be considered 475 part of a bifurcating tree, because it has no ancestor. 476 """ 477 # Root can be trifurcating 478 if isinstance(self, Tree) and len(self.root) == 3: 479 return (self.root.clades[0].is_bifurcating() and 480 self.root.clades[1].is_bifurcating() and 481 self.root.clades[2].is_bifurcating()) 482 if len(self.root) == 2: 483 return (self.root.clades[0].is_bifurcating() and 484 self.root.clades[1].is_bifurcating()) 485 if len(self.root) == 0: 486 return True 487 return False
488
489 - def is_monophyletic(self, terminals, *more_terminals):
490 """MRCA of terminals if they comprise a complete subclade, or False. 491 492 I.e., there exists a clade such that its terminals are the same set as 493 the given targets. 494 495 The given targets must be terminals of the tree. 496 497 To match both `Bio.Nexus.Trees` and the other multi-target methods in 498 Bio.Phylo, arguments to this method can be specified either of two ways: 499 (i) as a single list of targets, or (ii) separately specified targets, 500 e.g. is_monophyletic(t1, t2, t3) -- but not both. 501 502 For convenience, this method returns the common ancestor (MCRA) of the 503 targets if they are monophyletic (instead of the value True), and False 504 otherwise. 505 506 :returns: common ancestor if terminals are monophyletic, otherwise False. 507 """ 508 target_set = set(_combine_args(terminals, *more_terminals)) 509 current = self.root 510 while True: 511 if set(current.get_terminals()) == target_set: 512 return current 513 # Try a narrower subclade 514 for subclade in current.clades: 515 if set(subclade.get_terminals()).issuperset(target_set): 516 current = subclade 517 break 518 else: 519 return False
520
521 - def is_parent_of(self, target=None, **kwargs):
522 """True if target is a descendent of this tree. 523 524 Not required to be a direct descendent. 525 526 To check only direct descendents of a clade, simply use list membership 527 testing: ``if subclade in clade: ...`` 528 """ 529 return self.get_path(target, **kwargs) is not None
530
531 - def is_preterminal(self):
532 """True if all direct descendents are terminal.""" 533 if self.root.is_terminal(): 534 return False 535 for clade in self.root.clades: 536 if not clade.is_terminal(): 537 return False 538 return True
539
540 - def total_branch_length(self):
541 """Calculate the sum of all the branch lengths in this tree.""" 542 return sum(node.branch_length 543 for node in self.find_clades(branch_length=True))
544 545 # Tree manipulation methods 546
547 - def collapse(self, target=None, **kwargs):
548 """Deletes target from the tree, relinking its children to its parent. 549 550 :returns: the parent clade. 551 """ 552 path = self.get_path(target, **kwargs) 553 if not path: 554 raise ValueError("couldn't collapse %s in this tree" 555 % (target or kwargs)) 556 if len(path) == 1: 557 parent = self.root 558 else: 559 parent = path[-2] 560 popped = parent.clades.pop(parent.clades.index(path[-1])) 561 extra_length = popped.branch_length or 0 562 for child in popped: 563 child.branch_length += extra_length 564 parent.clades.extend(popped.clades) 565 return parent
566
567 - def collapse_all(self, target=None, **kwargs):
568 """Collapse all the descendents of this tree, leaving only terminals. 569 570 Total branch lengths are preserved, i.e. the distance to each terminal 571 stays the same. 572 573 For example, this will safely collapse nodes with poor bootstrap 574 support: 575 576 >>> tree.collapse_all(lambda c: c.confidence is not None and 577 ... c.confidence < 70) 578 579 This implementation avoids strange side-effects by using level-order 580 traversal and testing all clade properties (versus the target 581 specification) up front. In particular, if a clade meets the target 582 specification in the original tree, it will be collapsed. For example, 583 if the condition is: 584 585 >>> tree.collapse_all(lambda c: c.branch_length < 0.1) 586 587 Collapsing a clade's parent node adds the parent's branch length to the 588 child, so during the execution of collapse_all, a clade's branch_length 589 may increase. In this implementation, clades are collapsed according to 590 their properties in the original tree, not the properties when tree 591 traversal reaches the clade. (It's easier to debug.) If you want the 592 other behavior (incremental testing), modifying the source code of this 593 function is straightforward. 594 """ 595 # Read the iterable into a list to protect against in-place changes 596 matches = list(self.find_clades(target, False, 'level', **kwargs)) 597 if not matches: 598 # No matching nodes to collapse 599 return 600 # Skip the root node -- it can't be collapsed 601 if matches[0] == self.root: 602 matches.pop(0) 603 for clade in matches: 604 self.collapse(clade)
605
606 - def ladderize(self, reverse=False):
607 """Sort clades in-place according to the number of terminal nodes. 608 609 Deepest clades are last by default. Use ``reverse=True`` to sort clades 610 deepest-to-shallowest. 611 """ 612 self.root.clades.sort(key=lambda c: c.count_terminals(), 613 reverse=reverse) 614 for subclade in self.root.clades: 615 subclade.ladderize(reverse=reverse)
616
617 - def prune(self, target=None, **kwargs):
618 """Prunes a terminal clade from the tree. 619 620 If taxon is from a bifurcation, the connecting node will be collapsed 621 and its branch length added to remaining terminal node. This might be no 622 longer be a meaningful value. 623 624 :returns: parent clade of the pruned target 625 """ 626 if 'terminal' in kwargs and kwargs['terminal']: 627 raise ValueError("target must be terminal") 628 path = self.get_path(target, terminal=True, **kwargs) 629 if not path: 630 raise ValueError("can't find a matching target below this root") 631 if len(path) == 1: 632 parent = self.root 633 else: 634 parent = path[-2] 635 parent.clades.remove(path[-1]) 636 if len(parent) == 1: 637 # We deleted a branch from a bifurcation 638 if parent == self.root: 639 # If we're at the root, move the root upwards 640 # NB: This loses the length of the original branch 641 newroot = parent.clades[0] 642 newroot.branch_length = None 643 parent = self.root = newroot 644 else: 645 # If we're not at the root, collapse this parent 646 child = parent.clades[0] 647 if child.branch_length is not None: 648 child.branch_length += (parent.branch_length or 0.0) 649 if len(path) < 3: 650 grandparent = self.root 651 else: 652 grandparent = path[-3] 653 # Replace parent with child at the same place in grandparent 654 index = grandparent.clades.index(parent) 655 grandparent.clades.pop(index) 656 grandparent.clades.insert(index, child) 657 parent = grandparent 658 return parent
659
660 - def split(self, n=2, branch_length=1.0):
661 """Generate n (default 2) new descendants. 662 663 In a species tree, this is a speciation event. 664 665 New clades have the given branch_length and the same name as this 666 clade's root plus an integer suffix (counting from 0). For example, 667 splitting a clade named "A" produces sub-clades named "A0" and "A1". 668 If the clade has no name, the prefix "n" is used for child nodes, e.g. 669 "n0" and "n1". 670 """ 671 clade_cls = type(self.root) 672 base_name = self.root.name or 'n' 673 for i in range(n): 674 clade = clade_cls(name=base_name+str(i), 675 branch_length=branch_length) 676 self.root.clades.append(clade)
677
678 679 -class Tree(TreeElement, TreeMixin):
680 """A phylogenetic tree, containing global info for the phylogeny. 681 682 The structure and node-specific data is accessible through the 'root' 683 clade attached to the Tree instance. 684 685 :Parameters: 686 root : Clade 687 The starting node of the tree. If the tree is rooted, this will 688 usually be the root node. 689 rooted : bool 690 Whether or not the tree is rooted. By default, a tree is assumed to 691 be rooted. 692 id : str 693 The identifier of the tree, if there is one. 694 name : str 695 The name of the tree, in essence a label. 696 """
697 - def __init__(self, root=None, rooted=True, id=None, name=None):
698 self.root = root or Clade() 699 self.rooted = rooted 700 self.id = id 701 self.name = name
702 703 @classmethod
704 - def from_clade(cls, clade, **kwargs):
705 """Create a new Tree object given a clade. 706 707 Keyword arguments are the usual `Tree` constructor parameters. 708 """ 709 root = copy.deepcopy(clade) 710 return cls(root, **kwargs)
711 712 @classmethod
713 - def randomized(cls, taxa, branch_length=1.0, branch_stdev=None):
714 """Create a randomized bifurcating tree given a list of taxa. 715 716 :param taxa: Either an integer specifying the number of taxa to create 717 (automatically named taxon#), or an iterable of taxon names, as 718 strings. 719 720 :returns: a tree of the same type as this class. 721 """ 722 if isinstance(taxa, int): 723 taxa = ['taxon%s' % (i+1) for i in range(taxa)] 724 elif hasattr(taxa, '__iter__'): 725 taxa = list(taxa) 726 else: 727 raise TypeError("taxa argument must be integer (# taxa) or " 728 "iterable of taxon names.") 729 rtree = cls() 730 terminals = [rtree.root] 731 while len(terminals) < len(taxa): 732 newsplit = random.choice(terminals) 733 newsplit.split(branch_length=branch_length) 734 newterms = newsplit.clades 735 if branch_stdev: 736 # Add some noise to the branch lengths 737 for nt in newterms: 738 nt.branch_length = max(0, 739 random.gauss(branch_length, branch_stdev)) 740 terminals.remove(newsplit) 741 terminals.extend(newterms) 742 # Distribute taxon labels randomly 743 random.shuffle(taxa) 744 for node, name in zip(terminals, taxa): 745 node.name = name 746 return rtree
747 748 @property
749 - def clade(self):
750 """The first clade in this tree (not itself).""" 751 return self.root
752
753 - def as_phyloxml(self, **kwargs):
754 """Convert this tree to a PhyloXML-compatible Phylogeny. 755 756 This lets you use the additional annotation types PhyloXML defines, and 757 save this information when you write this tree as 'phyloxml'. 758 """ 759 from Bio.Phylo.PhyloXML import Phylogeny 760 return Phylogeny.from_tree(self, **kwargs)
761 762 # XXX Compatibility: In Python 2.6+, **kwargs can be replaced with the named 763 # keyword argument outgroup_branch_length=None
764 - def root_with_outgroup(self, outgroup_targets, *more_targets, **kwargs):
765 """Reroot this tree with the outgroup clade containing outgroup_targets. 766 767 Operates in-place. 768 769 Edge cases: 770 771 - If ``outgroup == self.root``, no change 772 - If outgroup is terminal, create new bifurcating root node with a 773 0-length branch to the outgroup 774 - If outgroup is internal, use the given outgroup node as the new 775 trifurcating root, keeping branches the same 776 - If the original root was bifurcating, drop it from the tree, 777 preserving total branch lengths 778 779 :param outgroup_branch_length: length of the branch leading to the 780 outgroup after rerooting. If not specified (None), then: 781 782 - If the outgroup is an internal node (not a single terminal taxon), 783 then use that node as the new root. 784 - Otherwise, create a new root node as the parent of the outgroup. 785 786 """ 787 # This raises a ValueError if any target is not in this tree 788 # Otherwise, common_ancestor guarantees outgroup is in this tree 789 outgroup = self.common_ancestor(outgroup_targets, *more_targets) 790 outgroup_path = self.get_path(outgroup) 791 if len(outgroup_path) == 0: 792 # Outgroup is the current root -- no change 793 return 794 795 prev_blen = outgroup.branch_length or 0.0 796 # Hideous kludge because Py2.x doesn't allow keyword args after *args 797 outgroup_branch_length = kwargs.get('outgroup_branch_length') 798 if outgroup_branch_length is not None: 799 assert 0 <= outgroup_branch_length <= prev_blen, \ 800 "outgroup_branch_length must be between 0 and the " \ 801 "original length of the branch leading to the outgroup." 802 803 if outgroup.is_terminal() or outgroup_branch_length is not None: 804 # Create a new root with a 0-length branch to the outgroup 805 outgroup.branch_length = outgroup_branch_length or 0.0 806 new_root = self.root.__class__( 807 branch_length=self.root.branch_length, clades=[outgroup]) 808 # The first branch reversal (see the upcoming loop) is modified 809 if len(outgroup_path) == 1: 810 # No nodes between the original root and outgroup to rearrange. 811 # Most of the code below will be skipped, but we still need 812 # 'new_parent' pointing at the new root. 813 new_parent = new_root 814 else: 815 parent = outgroup_path.pop(-2) 816 # First iteration of reversing the path to the outgroup 817 parent.clades.pop(parent.clades.index(outgroup)) 818 (prev_blen, parent.branch_length) = (parent.branch_length, 819 prev_blen - outgroup.branch_length) 820 new_root.clades.insert(0, parent) 821 new_parent = parent 822 else: 823 # Use the given outgroup node as the new (trifurcating) root 824 new_root = outgroup 825 new_root.branch_length = self.root.branch_length 826 new_parent = new_root 827 828 # Tracing the outgroup lineage backwards, reattach the subclades under a 829 # new root clade. Reverse the branches directly above the outgroup in 830 # the tree, but keep the descendants of those clades as they are. 831 for parent in outgroup_path[-2::-1]: 832 parent.clades.pop(parent.clades.index(new_parent)) 833 prev_blen, parent.branch_length = parent.branch_length, prev_blen 834 new_parent.clades.insert(0, parent) 835 new_parent = parent 836 837 # Finally, handle the original root according to number of descendents 838 old_root = self.root 839 if outgroup in old_root.clades: 840 assert len(outgroup_path) == 1 841 old_root.clades.pop(old_root.clades.index(outgroup)) 842 else: 843 old_root.clades.pop(old_root.clades.index(new_parent)) 844 if len(old_root) == 1: 845 # Delete the old bifurcating root & add branch lengths 846 ingroup = old_root.clades[0] 847 if ingroup.branch_length: 848 ingroup.branch_length += prev_blen 849 else: 850 ingroup.branch_length = prev_blen 851 new_parent.clades.insert(0, ingroup) 852 # ENH: If annotations are attached to old_root, do... something. 853 else: 854 # Keep the old trifurcating/polytomous root as an internal node 855 old_root.branch_length = prev_blen 856 new_parent.clades.insert(0, old_root) 857 858 self.root = new_root 859 self.rooted = True 860 return
861
862 - def root_at_midpoint(self):
863 """Root the tree at the midpoint of the two most distant taxa. 864 865 This operates in-place, leaving a bifurcating root. The topology of the 866 tree is otherwise retained, though no guarantees are made about the 867 stability of clade/node/taxon ordering. 868 """ 869 # Identify the largest pairwise distance 870 max_distance = 0.0 871 tips = self.get_terminals() 872 for tip in tips: 873 self.root_with_outgroup(tip) 874 new_max = max(self.depths().iteritems(), key=lambda nd: nd[1]) 875 if new_max[1] > max_distance: 876 tip1 = tip 877 tip2 = new_max[0] 878 max_distance = new_max[1] 879 self.root_with_outgroup(tip1) 880 # Depth to go from the ingroup tip toward the outgroup tip 881 root_remainder = 0.5 * (max_distance - (self.root.branch_length or 0)) 882 assert root_remainder >= 0 883 # Identify the midpoint and reroot there. 884 # Trace the path to the outgroup tip until all of the root depth has 885 # been traveled/accounted for. 886 for node in self.get_path(tip2): 887 root_remainder -= node.branch_length 888 if root_remainder < 0: 889 outgroup_node = node 890 outgroup_branch_length = -root_remainder 891 break 892 else: 893 raise ValueError("Somehow, failed to find the midpoint!") 894 self.root_with_outgroup(outgroup_node, 895 outgroup_branch_length=outgroup_branch_length)
896 897 # Method assumed by TreeMixin 898
899 - def is_terminal(self):
900 """True if the root of this tree is terminal.""" 901 return (not self.root.clades)
902 903 # Convention from SeqRecord and Alignment classes 904
905 - def __format__(self, format_spec):
906 """Serialize the tree as a string in the specified file format. 907 908 This method supports the ``format`` built-in function added in Python 909 2.6/3.0. 910 911 :param format_spec: a lower-case string supported by `Bio.Phylo.write` 912 as an output file format. 913 """ 914 if format_spec: 915 from StringIO import StringIO 916 from Bio.Phylo import _io 917 handle = StringIO() 918 _io.write([self], handle, format_spec) 919 return handle.getvalue() 920 else: 921 # Follow python convention and default to using __str__ 922 return str(self)
923
924 - def format(self, format):
925 """Serialize the tree as a string in the specified file format. 926 927 This duplicates the __format__ magic method for pre-2.6 Pythons. 928 """ 929 return self.__format__(format)
930 931 # Pretty-printer for the entire tree hierarchy 932
933 - def __str__(self):
934 """String representation of the entire tree. 935 936 Serializes each sub-clade recursively using ``repr`` to create a summary 937 of the object structure. 938 """ 939 TAB = ' ' 940 textlines = [] 941 942 def print_tree(obj, indent): 943 """Recursively serialize sub-elements. 944 945 This closes over textlines and modifies it in-place. 946 """ 947 textlines.append(TAB*indent + repr(obj)) 948 indent += 1 949 for attr in obj.__dict__: 950 child = getattr(obj, attr) 951 if isinstance(child, TreeElement): 952 print_tree(child, indent) 953 elif isinstance(child, list): 954 for elem in child: 955 if isinstance(elem, TreeElement): 956 print_tree(elem, indent)
957 958 print_tree(self, 0) 959 return '\n'.join(textlines)
960
961 962 -class Clade(TreeElement, TreeMixin):
963 """A recursively defined sub-tree. 964 965 :Parameters: 966 branch_length : str 967 The length of the branch leading to the root node of this clade. 968 name : str 969 The clade's name (a label). 970 clades : list 971 Sub-trees rooted directly under this tree's root. 972 confidence : number 973 Support. 974 color : BranchColor 975 The display color of the branch and descendents. 976 width : number 977 The display width of the branch and descendents. 978 """
979 - def __init__(self, branch_length=None, name=None, clades=None, 980 confidence=None, color=None, width=None):
981 self.branch_length = branch_length 982 self.name = name 983 self.clades = clades or [] 984 self.confidence = confidence 985 self.color = color 986 self.width = width
987 988 @property
989 - def root(self):
990 """Allow TreeMixin methods to traverse clades properly.""" 991 return self
992
993 - def is_terminal(self):
994 """True if this is a terminal (leaf) node.""" 995 return (not self.clades)
996 997 # Sequence-type behavior methods 998
999 - def __getitem__(self, index):
1000 """Get clades by index (integer or slice).""" 1001 if isinstance(index, int) or isinstance(index, slice): 1002 return self.clades[index] 1003 ref = self 1004 for idx in index: 1005 ref = ref[idx] 1006 return ref
1007
1008 - def __iter__(self):
1009 """Iterate through this tree's direct descendent clades (sub-trees).""" 1010 return iter(self.clades)
1011
1012 - def __len__(self):
1013 """Number of clades directy under the root.""" 1014 return len(self.clades)
1015
1016 - def __nonzero__(self):
1017 """Boolean value of an instance of this class. 1018 1019 NB: If this method is not defined, but ``__len__`` is, then the object 1020 is considered true if the result of ``__len__()`` is nonzero. We want 1021 Clade instances to always be considered True. 1022 """ 1023 return True
1024
1025 - def __str__(self):
1026 if self.name: 1027 return _utils.trim_str(self.name, 40, '...') 1028 return self.__class__.__name__
1029 1030 # Syntax sugar for setting the branch color
1031 - def _get_color(self):
1032 return self._color
1033
1034 - def _set_color(self, arg):
1035 if arg is None or isinstance(arg, BranchColor): 1036 self._color = arg 1037 elif isinstance(arg, basestring): 1038 if arg in BranchColor.color_names: 1039 # Known color name 1040 self._color = BranchColor.from_name(arg) 1041 elif arg.startswith('#') and len(arg) == 7: 1042 # HTML-style hex string 1043 self._color = BranchColor.from_hex(arg) 1044 else: 1045 raise ValueError("invalid color string %s" % arg) 1046 elif hasattr(arg, '__iter__') and len(arg) == 3: 1047 # RGB triplet 1048 self._color = BranchColor(*arg) 1049 else: 1050 raise ValueError("invalid color value %s" % arg)
1051 1052 color = property(_get_color, _set_color, doc="Branch color.")
1053
1054 1055 -class BranchColor(object):
1056 """Indicates the color of a clade when rendered graphically. 1057 1058 The color should be interpreted by client code (e.g. visualization 1059 programs) as applying to the whole clade, unless overwritten by the 1060 color(s) of sub-clades. 1061 1062 Color values must be integers from 0 to 255. 1063 """ 1064 1065 color_names = { 1066 'red': (255, 0, 0), 1067 'r': (255, 0, 0), 1068 'yellow': (255, 255, 0), 1069 'y': (255, 255, 0), 1070 'green': ( 0, 128, 0), 1071 'g': ( 0, 128, 0), 1072 'cyan': ( 0, 255, 255), 1073 'c': ( 0, 255, 255), 1074 'blue': ( 0, 0, 255), 1075 'b': ( 0, 0, 255), 1076 'magenta': (255, 0, 255), 1077 'm': (255, 0, 255), 1078 'black': ( 0, 0, 0), 1079 'k': ( 0, 0, 0), 1080 'white': (255, 255, 255), 1081 'w': (255, 255, 255), 1082 # Names standardized in HTML/CSS spec 1083 # http://w3schools.com/html/html_colornames.asp 1084 'maroon': (128, 0, 0), 1085 'olive': (128, 128, 0), 1086 'lime': ( 0, 255, 0), 1087 'aqua': ( 0, 255, 255), 1088 'teal': ( 0, 128, 128), 1089 'navy': ( 0, 0, 128), 1090 'fuchsia': (255, 0, 255), 1091 'purple': (128, 0, 128), 1092 'silver': (192, 192, 192), 1093 'gray': (128, 128, 128), 1094 # More definitions from matplotlib/gcolor2 1095 'grey': (128, 128, 128), 1096 'pink': (255, 192, 203), 1097 'salmon': (250, 128, 114), 1098 'orange': (255, 165, 0), 1099 'gold': (255, 215, 0), 1100 'tan': (210, 180, 140), 1101 'brown': (165, 42, 42), 1102 } 1103
1104 - def __init__(self, red, green, blue):
1105 for color in (red, green, blue): 1106 assert (isinstance(color, int) and 1107 0 <= color <= 255 1108 ), "Color values must be integers between 0 and 255." 1109 self.red = red 1110 self.green = green 1111 self.blue = blue
1112 1113 @classmethod
1114 - def from_hex(cls, hexstr):
1115 """Construct a BranchColor object from a hexadecimal string. 1116 1117 The string format is the same style used in HTML and CSS, such as 1118 '#FF8000' for an RGB value of (255, 128, 0). 1119 """ 1120 assert (isinstance(hexstr, basestring) and 1121 hexstr.startswith('#') and 1122 len(hexstr) == 7 1123 ), "need a 24-bit hexadecimal string, e.g. #000000" 1124 1125 def unpack(cc): 1126 return int('0x'+cc, base=16)
1127 1128 RGB = hexstr[1:3], hexstr[3:5], hexstr[5:] 1129 return cls(*map(unpack, RGB))
1130 1131 @classmethod
1132 - def from_name(cls, colorname):
1133 """Construct a BranchColor object by the color's name.""" 1134 return cls(*cls.color_names[colorname])
1135
1136 - def to_hex(self):
1137 """Return a 24-bit hexadecimal RGB representation of this color. 1138 1139 The returned string is suitable for use in HTML/CSS, as a color 1140 parameter in matplotlib, and perhaps other situations. 1141 1142 Example: 1143 1144 >>> bc = BranchColor(12, 200, 100) 1145 >>> bc.to_hex() 1146 '#0cc864' 1147 """ 1148 return '#' + hex( 1149 self.red * (16**4) 1150 + self.green * (16**2) 1151 + self.blue)[2:].zfill(6)
1152
1153 - def to_rgb(self):
1154 """Return a tuple of RGB values (0 to 255) representing this color. 1155 1156 Example: 1157 1158 >>> bc = BranchColor(255, 165, 0) 1159 >>> bc.to_rgb() 1160 (255, 165, 0) 1161 """ 1162 return (self.red, self.green, self.blue)
1163
1164 - def __repr__(self):
1165 """Preserve the standard RGB order when representing this object.""" 1166 return (u'%s(red=%d, green=%d, blue=%d)' 1167 % (self.__class__.__name__, self.red, self.green, self.blue))
1168
1169 - def __str__(self):
1170 """Show the color's RGB values.""" 1171 return "(%d, %d, %d)" % (self.red, self.green, self.blue)
1172