Package Bio :: Package Phylo :: Module BaseTree
[hide private]
[frames] | no frames]

Source Code for Module Bio.Phylo.BaseTree

   1  # Copyright (C) 2009 by Eric Talevich (eric.talevich@gmail.com) 
   2  # This code is part of the Biopython distribution and governed by its 
   3  # license. Please see the LICENSE file that should have been included 
   4  # as part of this package. 
   5   
   6  """Base classes for Bio.Phylo objects. 
   7   
   8  All object representations for phylogenetic trees should derive from these base 
   9  classes in order to use the common methods defined on them. 
  10  """ 
  11  __docformat__ = "restructuredtext en" 
  12   
  13  from Bio._py3k import zip 
  14  from Bio._py3k import filter 
  15  from Bio._py3k import basestring 
  16  from Bio._py3k import unicode 
  17   
  18  import collections 
  19  import copy 
  20  import itertools 
  21  import random 
  22  import re 
  23   
  24  from Bio import _utils 
25 26 27 # General tree-traversal algorithms 28 29 -def _level_traverse(root, get_children):
30 """Traverse a tree in breadth-first (level) order.""" 31 Q = collections.deque([root]) 32 while Q: 33 v = Q.popleft() 34 yield v 35 Q.extend(get_children(v))
36
37 38 -def _preorder_traverse(root, get_children):
39 """Traverse a tree in depth-first pre-order (parent before children).""" 40 def dfs(elem): 41 yield elem 42 for v in get_children(elem): 43 for u in dfs(v): 44 yield u
45 for elem in dfs(root): 46 yield elem 47
48 49 -def _postorder_traverse(root, get_children):
50 """Traverse a tree in depth-first post-order (children before parent).""" 51 def dfs(elem): 52 for v in get_children(elem): 53 for u in dfs(v): 54 yield u 55 yield elem
56 for elem in dfs(root): 57 yield elem 58
59 60 -def _sorted_attrs(elem):
61 """Get a flat list of elem's attributes, sorted for consistency.""" 62 singles = [] 63 lists = [] 64 # Sort attributes for consistent results 65 for attrname, child in sorted(elem.__dict__.items(), 66 key=lambda kv: kv[0]): 67 if child is None: 68 continue 69 if isinstance(child, list): 70 lists.extend(child) 71 else: 72 singles.append(child) 73 return (x for x in singles + lists 74 if isinstance(x, TreeElement))
75
76 77 # Factory functions to generalize searching for clades/nodes 78 79 -def _identity_matcher(target):
80 """Match a node to the target object by identity.""" 81 def match(node): 82 return (node is target)
83 return match 84
85 86 -def _class_matcher(target_cls):
87 """Match a node if it's an instance of the given class.""" 88 def match(node): 89 return isinstance(node, target_cls)
90 return match 91
92 93 -def _string_matcher(target):
94 def match(node): 95 return unicode(node) == target
96 return match 97
98 99 -def _attribute_matcher(kwargs):
100 """Match a node by specified attribute values. 101 102 ``terminal`` is a special case: True restricts the search to external (leaf) 103 nodes, False restricts to internal nodes, and None allows all tree elements 104 to be searched, including phyloXML annotations. 105 106 Otherwise, for a tree element to match the specification (i.e. for the 107 function produced by `_attribute_matcher` to return True when given a tree 108 element), it must have each of the attributes specified by the keys and 109 match each of the corresponding values -- think 'and', not 'or', for 110 multiple keys. 111 """ 112 def match(node): 113 if 'terminal' in kwargs: 114 # Special case: restrict to internal/external/any nodes 115 kwa_copy = kwargs.copy() 116 pattern = kwa_copy.pop('terminal') 117 if (pattern is not None and 118 (not hasattr(node, 'is_terminal') or 119 node.is_terminal() != pattern)): 120 return False 121 else: 122 kwa_copy = kwargs 123 for key, pattern in kwa_copy.items(): 124 # Nodes must match all other specified attributes 125 if not hasattr(node, key): 126 return False 127 target = getattr(node, key) 128 if isinstance(pattern, basestring): 129 return (isinstance(target, basestring) and 130 re.match(pattern+'$', target)) 131 if isinstance(pattern, bool): 132 return (pattern == bool(target)) 133 if isinstance(pattern, int): 134 return (pattern == target) 135 if pattern is None: 136 return (target is None) 137 raise TypeError('invalid query type: %s' % type(pattern)) 138 return True
139 return match 140
141 142 -def _function_matcher(matcher_func):
143 """Safer attribute lookup -- returns False instead of raising an error.""" 144 def match(node): 145 try: 146 return matcher_func(node) 147 except (LookupError, AttributeError, ValueError, TypeError): 148 return False
149 return match 150
151 152 -def _object_matcher(obj):
153 """Retrieve a matcher function by passing an arbitrary object. 154 155 i.e. passing a `TreeElement` such as a `Clade` or `Tree` instance returns an 156 identity matcher, passing a type such as the `PhyloXML.Taxonomy` class 157 returns a class matcher, and passing a dictionary returns an attribute 158 matcher. 159 160 The resulting 'match' function returns True when given an object matching 161 the specification (identity, type or attribute values), otherwise False. 162 This is useful for writing functions that search the tree, and probably 163 shouldn't be used directly by the end user. 164 """ 165 if isinstance(obj, TreeElement): 166 return _identity_matcher(obj) 167 if isinstance(obj, type): 168 return _class_matcher(obj) 169 if isinstance(obj, basestring): 170 return _string_matcher(obj) 171 if isinstance(obj, dict): 172 return _attribute_matcher(obj) 173 if callable(obj): 174 return _function_matcher(obj) 175 raise ValueError("%s (type %s) is not a valid type for comparison." 176 % (obj, type(obj)))
177
178 179 -def _combine_matchers(target, kwargs, require_spec):
180 """Merge target specifications with keyword arguments. 181 182 Dispatch the components to the various matcher functions, then merge into a 183 single boolean function. 184 """ 185 if not target: 186 if not kwargs: 187 if require_spec: 188 raise ValueError("you must specify a target object or keyword " 189 "arguments.") 190 return lambda x: True 191 return _attribute_matcher(kwargs) 192 match_obj = _object_matcher(target) 193 if not kwargs: 194 return match_obj 195 match_kwargs = _attribute_matcher(kwargs) 196 return (lambda x: match_obj(x) and match_kwargs(x))
197
198 199 -def _combine_args(first, *rest):
200 """Convert ``[targets]`` or ``*targets`` arguments to a single iterable. 201 202 This helps other functions work like the built-in functions `max` and 203 `min`. 204 """ 205 # Background: is_monophyletic takes a single list or iterable (like the 206 # same method in Bio.Nexus.Trees); root_with_outgroup and common_ancestor 207 # take separate arguments. This mismatch was in the initial release and I 208 # didn't notice the inconsistency until after Biopython 1.55. I can think 209 # of cases where either style is more convenient, so let's support both 210 # (for backward compatibility and consistency between methods). 211 if hasattr(first, '__iter__') and not (isinstance(first, TreeElement) or 212 isinstance(first, type) or isinstance(first, basestring) or 213 isinstance(first, dict)): 214 # `terminals` is an iterable of targets 215 if rest: 216 raise ValueError("Arguments must be either a single list of " 217 "targets, or separately specified targets " 218 "(e.g. foo(t1, t2, t3)), but not both.") 219 return first 220 # `terminals` is a single target -- wrap in a container 221 return itertools.chain([first], rest)
222
223 224 # Class definitions 225 226 -class TreeElement(object):
227 """Base class for all Bio.Phylo classes.""" 228
229 - def __repr__(self):
230 """Show this object's constructor with its primitive arguments.""" 231 def pair_as_kwarg_string(key, val): 232 if isinstance(val, basestring): 233 return "%s='%s'" % (key, _utils.trim_str(unicode(val), 60, 234 u'...')) 235 return "%s=%s" % (key, val)
236 return u'%s(%s)' % (self.__class__.__name__, 237 ', '.join(pair_as_kwarg_string(key, val) 238 for key, val in sorted(self.__dict__.items()) 239 if val is not None and 240 type(val) in (str, int, float, bool, unicode) 241 ))
242 243 __str__ = __repr__ 244
245 246 -class TreeMixin(object):
247 """Methods for Tree- and Clade-based classes. 248 249 This lets `Tree` and `Clade` support the same traversal and searching 250 operations without requiring Clade to inherit from Tree, so Clade isn't 251 required to have all of Tree's attributes -- just ``root`` (a Clade 252 instance) and ``is_terminal``. 253 """ 254 # Traversal methods 255
256 - def _filter_search(self, filter_func, order, follow_attrs):
257 """Perform a BFS or DFS traversal through all elements in this tree. 258 259 :returns: generator of all elements for which `filter_func` is True. 260 """ 261 order_opts = {'preorder': _preorder_traverse, 262 'postorder': _postorder_traverse, 263 'level': _level_traverse} 264 try: 265 order_func = order_opts[order] 266 except KeyError: 267 raise ValueError("Invalid order '%s'; must be one of: %s" 268 % (order, tuple(order_opts))) 269 if follow_attrs: 270 get_children = _sorted_attrs 271 root = self 272 else: 273 get_children = lambda elem: elem.clades 274 root = self.root 275 return filter(filter_func, order_func(root, get_children))
276
277 - def find_any(self, *args, **kwargs):
278 """Return the first element found by find_elements(), or None. 279 280 This is also useful for checking whether any matching element exists in 281 the tree, and can be used in a conditional expression. 282 """ 283 hits = self.find_elements(*args, **kwargs) 284 try: 285 return next(hits) 286 except StopIteration: 287 return None
288
289 - def find_elements(self, target=None, terminal=None, order='preorder', 290 **kwargs):
291 """Find all tree elements matching the given attributes. 292 293 The arbitrary keyword arguments indicate the attribute name of the 294 sub-element and the value to match: string, integer or boolean. Strings 295 are evaluated as regular expression matches; integers are compared 296 directly for equality, and booleans evaluate the attribute's truth value 297 (True or False) before comparing. To handle nonzero floats, search with 298 a boolean argument, then filter the result manually. 299 300 If no keyword arguments are given, then just the class type is used for 301 matching. 302 303 The result is an iterable through all matching objects, by depth-first 304 search. (Not necessarily the same order as the elements appear in the 305 source file!) 306 307 :Parameters: 308 target : TreeElement instance, type, dict, or callable 309 Specifies the characteristics to search for. (The default, 310 TreeElement, matches any standard Bio.Phylo type.) 311 terminal : bool 312 A boolean value to select for or against terminal nodes (a.k.a. 313 leaf nodes). True searches for only terminal nodes, False 314 excludes terminal nodes, and the default, None, searches both 315 terminal and non-terminal nodes, as well as any tree elements 316 lacking the ``is_terminal`` method. 317 order : {'preorder', 'postorder', 'level'} 318 Tree traversal order: 'preorder' (default) is depth-first 319 search, 'postorder' is DFS with child nodes preceding parents, 320 and 'level' is breadth-first search. 321 322 Example 323 ------- 324 325 >>> from Bio.Phylo.IO import PhyloXMIO 326 >>> phx = PhyloXMLIO.read('phyloxml_examples.xml') 327 >>> matches = phx.phylogenies[5].find_elements(code='OCTVU') 328 >>> next(matches) 329 Taxonomy(code='OCTVU', scientific_name='Octopus vulgaris') 330 331 """ 332 if terminal is not None: 333 kwargs['terminal'] = terminal 334 is_matching_elem = _combine_matchers(target, kwargs, False) 335 return self._filter_search(is_matching_elem, order, True)
336
337 - def find_clades(self, target=None, terminal=None, order='preorder', 338 **kwargs):
339 """Find each clade containing a matching element. 340 341 That is, find each element as with find_elements(), but return the 342 corresponding clade object. (This is usually what you want.) 343 344 :returns: an iterable through all matching objects, searching 345 depth-first (preorder) by default. 346 """ 347 def match_attrs(elem): 348 orig_clades = elem.__dict__.pop('clades') 349 found = elem.find_any(target, **kwargs) 350 elem.clades = orig_clades 351 return (found is not None)
352 if terminal is None: 353 is_matching_elem = match_attrs 354 else: 355 def is_matching_elem(elem): 356 return ((elem.is_terminal() == terminal) and 357 match_attrs(elem))
358 return self._filter_search(is_matching_elem, order, False) 359
360 - def get_path(self, target=None, **kwargs):
361 """List the clades directly between this root and the given target. 362 363 :returns: list of all clade objects along this path, ending with the 364 given target, but excluding the root clade. 365 """ 366 # Only one path will work -- ignore weights and visits 367 path = [] 368 match = _combine_matchers(target, kwargs, True) 369 370 def check_in_path(v): 371 if match(v): 372 path.append(v) 373 return True 374 elif v.is_terminal(): 375 return False 376 for child in v: 377 if check_in_path(child): 378 path.append(v) 379 return True 380 return False
381 382 if not check_in_path(self.root): 383 return None 384 return path[-2::-1] 385
386 - def get_nonterminals(self, order='preorder'):
387 """Get a list of all of this tree's nonterminal (internal) nodes.""" 388 return list(self.find_clades(terminal=False, order=order))
389
390 - def get_terminals(self, order='preorder'):
391 """Get a list of all of this tree's terminal (leaf) nodes.""" 392 return list(self.find_clades(terminal=True, order=order))
393
394 - def trace(self, start, finish):
395 """List of all clade object between two targets in this tree. 396 397 Excluding `start`, including `finish`. 398 """ 399 mrca = self.common_ancestor(start, finish) 400 fromstart = mrca.get_path(start)[-2::-1] 401 to = mrca.get_path(finish) 402 return fromstart + [mrca] + to
403 404 # Information methods 405
406 - def common_ancestor(self, targets, *more_targets):
407 """Most recent common ancestor (clade) of all the given targets. 408 409 Edge cases: 410 - If no target is given, returns self.root 411 - If 1 target is given, returns the target 412 - If any target is not found in this tree, raises a ValueError 413 """ 414 paths = [self.get_path(t) 415 for t in _combine_args(targets, *more_targets)] 416 # Validation -- otherwise izip throws a spooky error below 417 for p, t in zip(paths, targets): 418 if p is None: 419 raise ValueError("target %s is not in this tree" % repr(t)) 420 mrca = self.root 421 for level in zip(*paths): 422 ref = level[0] 423 for other in level[1:]: 424 if ref is not other: 425 break 426 else: 427 mrca = ref 428 if ref is not mrca: 429 break 430 return mrca
431
432 - def count_terminals(self):
433 """Counts the number of terminal (leaf) nodes within this tree.""" 434 return _utils.iterlen(self.find_clades(terminal=True))
435
436 - def depths(self, unit_branch_lengths=False):
437 """Create a mapping of tree clades to depths (by branch length). 438 439 :Parameters: 440 unit_branch_lengths : bool 441 If True, count only the number of branches (levels in the tree). 442 By default the distance is the cumulative branch length leading 443 to the clade. 444 445 :returns: dict of {clade: depth}, where keys are all of the Clade 446 instances in the tree, and values are the distance from the root to 447 each clade (including terminals). 448 """ 449 if unit_branch_lengths: 450 depth_of = lambda c: 1 451 else: 452 depth_of = lambda c: c.branch_length or 0 453 depths = {} 454 455 def update_depths(node, curr_depth): 456 depths[node] = curr_depth 457 for child in node.clades: 458 new_depth = curr_depth + depth_of(child) 459 update_depths(child, new_depth)
460 461 update_depths(self.root, self.root.branch_length or 0) 462 return depths 463
464 - def distance(self, target1, target2=None):
465 """Calculate the sum of the branch lengths between two targets. 466 467 If only one target is specified, the other is the root of this tree. 468 """ 469 if target2 is None: 470 return sum(n.branch_length for n in self.get_path(target1) 471 if n.branch_length is not None) 472 mrca = self.common_ancestor(target1, target2) 473 return mrca.distance(target1) + mrca.distance(target2)
474
475 - def is_bifurcating(self):
476 """Return True if tree downstream of node is strictly bifurcating. 477 478 I.e., all nodes have either 2 or 0 children (internal or external, 479 respectively). The root may have 3 descendents and still be considered 480 part of a bifurcating tree, because it has no ancestor. 481 """ 482 # Root can be trifurcating 483 if isinstance(self, Tree) and len(self.root) == 3: 484 return (self.root.clades[0].is_bifurcating() and 485 self.root.clades[1].is_bifurcating() and 486 self.root.clades[2].is_bifurcating()) 487 if len(self.root) == 2: 488 return (self.root.clades[0].is_bifurcating() and 489 self.root.clades[1].is_bifurcating()) 490 if len(self.root) == 0: 491 return True 492 return False
493
494 - def is_monophyletic(self, terminals, *more_terminals):
495 """MRCA of terminals if they comprise a complete subclade, or False. 496 497 I.e., there exists a clade such that its terminals are the same set as 498 the given targets. 499 500 The given targets must be terminals of the tree. 501 502 To match both `Bio.Nexus.Trees` and the other multi-target methods in 503 Bio.Phylo, arguments to this method can be specified either of two ways: 504 (i) as a single list of targets, or (ii) separately specified targets, 505 e.g. is_monophyletic(t1, t2, t3) -- but not both. 506 507 For convenience, this method returns the common ancestor (MCRA) of the 508 targets if they are monophyletic (instead of the value True), and False 509 otherwise. 510 511 :returns: common ancestor if terminals are monophyletic, otherwise False. 512 """ 513 target_set = set(_combine_args(terminals, *more_terminals)) 514 current = self.root 515 while True: 516 if set(current.get_terminals()) == target_set: 517 return current 518 # Try a narrower subclade 519 for subclade in current.clades: 520 if set(subclade.get_terminals()).issuperset(target_set): 521 current = subclade 522 break 523 else: 524 return False
525
526 - def is_parent_of(self, target=None, **kwargs):
527 """True if target is a descendent of this tree. 528 529 Not required to be a direct descendent. 530 531 To check only direct descendents of a clade, simply use list membership 532 testing: ``if subclade in clade: ...`` 533 """ 534 return self.get_path(target, **kwargs) is not None
535
536 - def is_preterminal(self):
537 """True if all direct descendents are terminal.""" 538 if self.root.is_terminal(): 539 return False 540 for clade in self.root.clades: 541 if not clade.is_terminal(): 542 return False 543 return True
544
545 - def total_branch_length(self):
546 """Calculate the sum of all the branch lengths in this tree.""" 547 return sum(node.branch_length 548 for node in self.find_clades(branch_length=True))
549 550 # Tree manipulation methods 551
552 - def collapse(self, target=None, **kwargs):
553 """Deletes target from the tree, relinking its children to its parent. 554 555 :returns: the parent clade. 556 """ 557 path = self.get_path(target, **kwargs) 558 if not path: 559 raise ValueError("couldn't collapse %s in this tree" 560 % (target or kwargs)) 561 if len(path) == 1: 562 parent = self.root 563 else: 564 parent = path[-2] 565 popped = parent.clades.pop(parent.clades.index(path[-1])) 566 extra_length = popped.branch_length or 0 567 for child in popped: 568 child.branch_length += extra_length 569 parent.clades.extend(popped.clades) 570 return parent
571
572 - def collapse_all(self, target=None, **kwargs):
573 """Collapse all the descendents of this tree, leaving only terminals. 574 575 Total branch lengths are preserved, i.e. the distance to each terminal 576 stays the same. 577 578 For example, this will safely collapse nodes with poor bootstrap 579 support: 580 581 >>> tree.collapse_all(lambda c: c.confidence is not None and 582 ... c.confidence < 70) 583 584 This implementation avoids strange side-effects by using level-order 585 traversal and testing all clade properties (versus the target 586 specification) up front. In particular, if a clade meets the target 587 specification in the original tree, it will be collapsed. For example, 588 if the condition is: 589 590 >>> tree.collapse_all(lambda c: c.branch_length < 0.1) 591 592 Collapsing a clade's parent node adds the parent's branch length to the 593 child, so during the execution of collapse_all, a clade's branch_length 594 may increase. In this implementation, clades are collapsed according to 595 their properties in the original tree, not the properties when tree 596 traversal reaches the clade. (It's easier to debug.) If you want the 597 other behavior (incremental testing), modifying the source code of this 598 function is straightforward. 599 """ 600 # Read the iterable into a list to protect against in-place changes 601 matches = list(self.find_clades(target, False, 'level', **kwargs)) 602 if not matches: 603 # No matching nodes to collapse 604 return 605 # Skip the root node -- it can't be collapsed 606 if matches[0] == self.root: 607 matches.pop(0) 608 for clade in matches: 609 self.collapse(clade)
610
611 - def ladderize(self, reverse=False):
612 """Sort clades in-place according to the number of terminal nodes. 613 614 Deepest clades are last by default. Use ``reverse=True`` to sort clades 615 deepest-to-shallowest. 616 """ 617 self.root.clades.sort(key=lambda c: c.count_terminals(), 618 reverse=reverse) 619 for subclade in self.root.clades: 620 subclade.ladderize(reverse=reverse)
621
622 - def prune(self, target=None, **kwargs):
623 """Prunes a terminal clade from the tree. 624 625 If taxon is from a bifurcation, the connecting node will be collapsed 626 and its branch length added to remaining terminal node. This might be no 627 longer be a meaningful value. 628 629 :returns: parent clade of the pruned target 630 """ 631 if 'terminal' in kwargs and kwargs['terminal']: 632 raise ValueError("target must be terminal") 633 path = self.get_path(target, terminal=True, **kwargs) 634 if not path: 635 raise ValueError("can't find a matching target below this root") 636 if len(path) == 1: 637 parent = self.root 638 else: 639 parent = path[-2] 640 parent.clades.remove(path[-1]) 641 if len(parent) == 1: 642 # We deleted a branch from a bifurcation 643 if parent == self.root: 644 # If we're at the root, move the root upwards 645 # NB: This loses the length of the original branch 646 newroot = parent.clades[0] 647 newroot.branch_length = None 648 parent = self.root = newroot 649 else: 650 # If we're not at the root, collapse this parent 651 child = parent.clades[0] 652 if child.branch_length is not None: 653 child.branch_length += (parent.branch_length or 0.0) 654 if len(path) < 3: 655 grandparent = self.root 656 else: 657 grandparent = path[-3] 658 # Replace parent with child at the same place in grandparent 659 index = grandparent.clades.index(parent) 660 grandparent.clades.pop(index) 661 grandparent.clades.insert(index, child) 662 parent = grandparent 663 return parent
664
665 - def split(self, n=2, branch_length=1.0):
666 """Generate n (default 2) new descendants. 667 668 In a species tree, this is a speciation event. 669 670 New clades have the given branch_length and the same name as this 671 clade's root plus an integer suffix (counting from 0). For example, 672 splitting a clade named "A" produces sub-clades named "A0" and "A1". 673 If the clade has no name, the prefix "n" is used for child nodes, e.g. 674 "n0" and "n1". 675 """ 676 clade_cls = type(self.root) 677 base_name = self.root.name or 'n' 678 for i in range(n): 679 clade = clade_cls(name=base_name+str(i), 680 branch_length=branch_length) 681 self.root.clades.append(clade)
682
683 684 -class Tree(TreeElement, TreeMixin):
685 """A phylogenetic tree, containing global info for the phylogeny. 686 687 The structure and node-specific data is accessible through the 'root' 688 clade attached to the Tree instance. 689 690 :Parameters: 691 root : Clade 692 The starting node of the tree. If the tree is rooted, this will 693 usually be the root node. 694 rooted : bool 695 Whether or not the tree is rooted. By default, a tree is assumed to 696 be rooted. 697 id : str 698 The identifier of the tree, if there is one. 699 name : str 700 The name of the tree, in essence a label. 701 """
702 - def __init__(self, root=None, rooted=True, id=None, name=None):
703 self.root = root or Clade() 704 self.rooted = rooted 705 self.id = id 706 self.name = name
707 708 @classmethod
709 - def from_clade(cls, clade, **kwargs):
710 """Create a new Tree object given a clade. 711 712 Keyword arguments are the usual `Tree` constructor parameters. 713 """ 714 root = copy.deepcopy(clade) 715 return cls(root, **kwargs)
716 717 @classmethod
718 - def randomized(cls, taxa, branch_length=1.0, branch_stdev=None):
719 """Create a randomized bifurcating tree given a list of taxa. 720 721 :param taxa: Either an integer specifying the number of taxa to create 722 (automatically named taxon#), or an iterable of taxon names, as 723 strings. 724 725 :returns: a tree of the same type as this class. 726 """ 727 if isinstance(taxa, int): 728 taxa = ['taxon%s' % (i+1) for i in range(taxa)] 729 elif hasattr(taxa, '__iter__'): 730 taxa = list(taxa) 731 else: 732 raise TypeError("taxa argument must be integer (# taxa) or " 733 "iterable of taxon names.") 734 rtree = cls() 735 terminals = [rtree.root] 736 while len(terminals) < len(taxa): 737 newsplit = random.choice(terminals) 738 newsplit.split(branch_length=branch_length) 739 newterms = newsplit.clades 740 if branch_stdev: 741 # Add some noise to the branch lengths 742 for nt in newterms: 743 nt.branch_length = max(0, 744 random.gauss(branch_length, branch_stdev)) 745 terminals.remove(newsplit) 746 terminals.extend(newterms) 747 # Distribute taxon labels randomly 748 random.shuffle(taxa) 749 for node, name in zip(terminals, taxa): 750 node.name = name 751 return rtree
752 753 @property
754 - def clade(self):
755 """The first clade in this tree (not itself).""" 756 return self.root
757
758 - def as_phyloxml(self, **kwargs):
759 """Convert this tree to a PhyloXML-compatible Phylogeny. 760 761 This lets you use the additional annotation types PhyloXML defines, and 762 save this information when you write this tree as 'phyloxml'. 763 """ 764 from Bio.Phylo.PhyloXML import Phylogeny 765 return Phylogeny.from_tree(self, **kwargs)
766 767 # XXX Compatibility: In Python 2.6+, **kwargs can be replaced with the named 768 # keyword argument outgroup_branch_length=None
769 - def root_with_outgroup(self, outgroup_targets, *more_targets, **kwargs):
770 """Reroot this tree with the outgroup clade containing outgroup_targets. 771 772 Operates in-place. 773 774 Edge cases: 775 776 - If ``outgroup == self.root``, no change 777 - If outgroup is terminal, create new bifurcating root node with a 778 0-length branch to the outgroup 779 - If outgroup is internal, use the given outgroup node as the new 780 trifurcating root, keeping branches the same 781 - If the original root was bifurcating, drop it from the tree, 782 preserving total branch lengths 783 784 :param outgroup_branch_length: length of the branch leading to the 785 outgroup after rerooting. If not specified (None), then: 786 787 - If the outgroup is an internal node (not a single terminal taxon), 788 then use that node as the new root. 789 - Otherwise, create a new root node as the parent of the outgroup. 790 791 """ 792 # This raises a ValueError if any target is not in this tree 793 # Otherwise, common_ancestor guarantees outgroup is in this tree 794 outgroup = self.common_ancestor(outgroup_targets, *more_targets) 795 outgroup_path = self.get_path(outgroup) 796 if len(outgroup_path) == 0: 797 # Outgroup is the current root -- no change 798 return 799 800 prev_blen = outgroup.branch_length or 0.0 801 # Hideous kludge because Py2.x doesn't allow keyword args after *args 802 outgroup_branch_length = kwargs.get('outgroup_branch_length') 803 if outgroup_branch_length is not None: 804 assert 0 <= outgroup_branch_length <= prev_blen, \ 805 "outgroup_branch_length must be between 0 and the " \ 806 "original length of the branch leading to the outgroup." 807 808 if outgroup.is_terminal() or outgroup_branch_length is not None: 809 # Create a new root with a 0-length branch to the outgroup 810 outgroup.branch_length = outgroup_branch_length or 0.0 811 new_root = self.root.__class__( 812 branch_length=self.root.branch_length, clades=[outgroup]) 813 # The first branch reversal (see the upcoming loop) is modified 814 if len(outgroup_path) == 1: 815 # No nodes between the original root and outgroup to rearrange. 816 # Most of the code below will be skipped, but we still need 817 # 'new_parent' pointing at the new root. 818 new_parent = new_root 819 else: 820 parent = outgroup_path.pop(-2) 821 # First iteration of reversing the path to the outgroup 822 parent.clades.pop(parent.clades.index(outgroup)) 823 (prev_blen, parent.branch_length) = (parent.branch_length, 824 prev_blen - outgroup.branch_length) 825 new_root.clades.insert(0, parent) 826 new_parent = parent 827 else: 828 # Use the given outgroup node as the new (trifurcating) root 829 new_root = outgroup 830 new_root.branch_length = self.root.branch_length 831 new_parent = new_root 832 833 # Tracing the outgroup lineage backwards, reattach the subclades under a 834 # new root clade. Reverse the branches directly above the outgroup in 835 # the tree, but keep the descendants of those clades as they are. 836 for parent in outgroup_path[-2::-1]: 837 parent.clades.pop(parent.clades.index(new_parent)) 838 prev_blen, parent.branch_length = parent.branch_length, prev_blen 839 new_parent.clades.insert(0, parent) 840 new_parent = parent 841 842 # Finally, handle the original root according to number of descendents 843 old_root = self.root 844 if outgroup in old_root.clades: 845 assert len(outgroup_path) == 1 846 old_root.clades.pop(old_root.clades.index(outgroup)) 847 else: 848 old_root.clades.pop(old_root.clades.index(new_parent)) 849 if len(old_root) == 1: 850 # Delete the old bifurcating root & add branch lengths 851 ingroup = old_root.clades[0] 852 if ingroup.branch_length: 853 ingroup.branch_length += prev_blen 854 else: 855 ingroup.branch_length = prev_blen 856 new_parent.clades.insert(0, ingroup) 857 # ENH: If annotations are attached to old_root, do... something. 858 else: 859 # Keep the old trifurcating/polytomous root as an internal node 860 old_root.branch_length = prev_blen 861 new_parent.clades.insert(0, old_root) 862 863 self.root = new_root 864 self.rooted = True 865 return
866
867 - def root_at_midpoint(self):
868 """Root the tree at the midpoint of the two most distant taxa. 869 870 This operates in-place, leaving a bifurcating root. The topology of the 871 tree is otherwise retained, though no guarantees are made about the 872 stability of clade/node/taxon ordering. 873 """ 874 # Identify the largest pairwise distance 875 max_distance = 0.0 876 tips = self.get_terminals() 877 for tip in tips: 878 self.root_with_outgroup(tip) 879 new_max = max(self.depths().items(), key=lambda nd: nd[1]) 880 if new_max[1] > max_distance: 881 tip1 = tip 882 tip2 = new_max[0] 883 max_distance = new_max[1] 884 self.root_with_outgroup(tip1) 885 # Depth to go from the ingroup tip toward the outgroup tip 886 root_remainder = 0.5 * (max_distance - (self.root.branch_length or 0)) 887 assert root_remainder >= 0 888 # Identify the midpoint and reroot there. 889 # Trace the path to the outgroup tip until all of the root depth has 890 # been traveled/accounted for. 891 for node in self.get_path(tip2): 892 root_remainder -= node.branch_length 893 if root_remainder < 0: 894 outgroup_node = node 895 outgroup_branch_length = -root_remainder 896 break 897 else: 898 raise ValueError("Somehow, failed to find the midpoint!") 899 self.root_with_outgroup(outgroup_node, 900 outgroup_branch_length=outgroup_branch_length)
901 902 # Method assumed by TreeMixin 903
904 - def is_terminal(self):
905 """True if the root of this tree is terminal.""" 906 return (not self.root.clades)
907 908 # Convention from SeqRecord and Alignment classes 909
910 - def __format__(self, format_spec):
911 """Serialize the tree as a string in the specified file format. 912 913 This method supports the ``format`` built-in function added in Python 914 2.6/3.0. 915 916 :param format_spec: a lower-case string supported by `Bio.Phylo.write` 917 as an output file format. 918 """ 919 if format_spec: 920 from Bio._py3k import StringIO 921 from Bio.Phylo import _io 922 handle = StringIO() 923 _io.write([self], handle, format_spec) 924 return handle.getvalue() 925 else: 926 # Follow python convention and default to using __str__ 927 return str(self)
928
929 - def format(self, format):
930 """Serialize the tree as a string in the specified file format. 931 932 This duplicates the __format__ magic method for pre-2.6 Pythons. 933 """ 934 return self.__format__(format)
935 936 # Pretty-printer for the entire tree hierarchy 937
938 - def __str__(self):
939 """String representation of the entire tree. 940 941 Serializes each sub-clade recursively using ``repr`` to create a summary 942 of the object structure. 943 """ 944 TAB = ' ' 945 textlines = [] 946 947 def print_tree(obj, indent): 948 """Recursively serialize sub-elements. 949 950 This closes over textlines and modifies it in-place. 951 """ 952 textlines.append(TAB*indent + repr(obj)) 953 indent += 1 954 for attr in obj.__dict__: 955 child = getattr(obj, attr) 956 if isinstance(child, TreeElement): 957 print_tree(child, indent) 958 elif isinstance(child, list): 959 for elem in child: 960 if isinstance(elem, TreeElement): 961 print_tree(elem, indent)
962 963 print_tree(self, 0) 964 return '\n'.join(textlines)
965
966 967 -class Clade(TreeElement, TreeMixin):
968 """A recursively defined sub-tree. 969 970 :Parameters: 971 branch_length : str 972 The length of the branch leading to the root node of this clade. 973 name : str 974 The clade's name (a label). 975 clades : list 976 Sub-trees rooted directly under this tree's root. 977 confidence : number 978 Support. 979 color : BranchColor 980 The display color of the branch and descendents. 981 width : number 982 The display width of the branch and descendents. 983 """
984 - def __init__(self, branch_length=None, name=None, clades=None, 985 confidence=None, color=None, width=None):
986 self.branch_length = branch_length 987 self.name = name 988 self.clades = clades or [] 989 self.confidence = confidence 990 self.color = color 991 self.width = width
992 993 @property
994 - def root(self):
995 """Allow TreeMixin methods to traverse clades properly.""" 996 return self
997
998 - def is_terminal(self):
999 """True if this is a terminal (leaf) node.""" 1000 return (not self.clades)
1001 1002 # Sequence-type behavior methods 1003
1004 - def __getitem__(self, index):
1005 """Get clades by index (integer or slice).""" 1006 if isinstance(index, int) or isinstance(index, slice): 1007 return self.clades[index] 1008 ref = self 1009 for idx in index: 1010 ref = ref[idx] 1011 return ref
1012
1013 - def __iter__(self):
1014 """Iterate through this tree's direct descendent clades (sub-trees).""" 1015 return iter(self.clades)
1016
1017 - def __len__(self):
1018 """Number of clades directy under the root.""" 1019 return len(self.clades)
1020 1021 #Python 3:
1022 - def __bool__(self):
1023 """Boolean value of an instance of this class (True). 1024 1025 NB: If this method is not defined, but ``__len__`` is, then the object 1026 is considered true if the result of ``__len__()`` is nonzero. We want 1027 Clade instances to always be considered True. 1028 """ 1029 return True
1030 #Python 2: 1031 __nonzero__ = __bool__ 1032
1033 - def __str__(self):
1034 if self.name: 1035 return _utils.trim_str(self.name, 40, '...') 1036 return self.__class__.__name__
1037 1038 # Syntax sugar for setting the branch color
1039 - def _get_color(self):
1040 return self._color
1041
1042 - def _set_color(self, arg):
1043 if arg is None or isinstance(arg, BranchColor): 1044 self._color = arg 1045 elif isinstance(arg, basestring): 1046 if arg in BranchColor.color_names: 1047 # Known color name 1048 self._color = BranchColor.from_name(arg) 1049 elif arg.startswith('#') and len(arg) == 7: 1050 # HTML-style hex string 1051 self._color = BranchColor.from_hex(arg) 1052 else: 1053 raise ValueError("invalid color string %s" % arg) 1054 elif hasattr(arg, '__iter__') and len(arg) == 3: 1055 # RGB triplet 1056 self._color = BranchColor(*arg) 1057 else: 1058 raise ValueError("invalid color value %s" % arg)
1059 1060 color = property(_get_color, _set_color, doc="Branch color.")
1061
1062 1063 -class BranchColor(object):
1064 """Indicates the color of a clade when rendered graphically. 1065 1066 The color should be interpreted by client code (e.g. visualization 1067 programs) as applying to the whole clade, unless overwritten by the 1068 color(s) of sub-clades. 1069 1070 Color values must be integers from 0 to 255. 1071 """ 1072 1073 color_names = { 1074 'red': (255, 0, 0), 1075 'r': (255, 0, 0), 1076 'yellow': (255, 255, 0), 1077 'y': (255, 255, 0), 1078 'green': ( 0, 128, 0), 1079 'g': ( 0, 128, 0), 1080 'cyan': ( 0, 255, 255), 1081 'c': ( 0, 255, 255), 1082 'blue': ( 0, 0, 255), 1083 'b': ( 0, 0, 255), 1084 'magenta': (255, 0, 255), 1085 'm': (255, 0, 255), 1086 'black': ( 0, 0, 0), 1087 'k': ( 0, 0, 0), 1088 'white': (255, 255, 255), 1089 'w': (255, 255, 255), 1090 # Names standardized in HTML/CSS spec 1091 # http://w3schools.com/html/html_colornames.asp 1092 'maroon': (128, 0, 0), 1093 'olive': (128, 128, 0), 1094 'lime': ( 0, 255, 0), 1095 'aqua': ( 0, 255, 255), 1096 'teal': ( 0, 128, 128), 1097 'navy': ( 0, 0, 128), 1098 'fuchsia': (255, 0, 255), 1099 'purple': (128, 0, 128), 1100 'silver': (192, 192, 192), 1101 'gray': (128, 128, 128), 1102 # More definitions from matplotlib/gcolor2 1103 'grey': (128, 128, 128), 1104 'pink': (255, 192, 203), 1105 'salmon': (250, 128, 114), 1106 'orange': (255, 165, 0), 1107 'gold': (255, 215, 0), 1108 'tan': (210, 180, 140), 1109 'brown': (165, 42, 42), 1110 } 1111
1112 - def __init__(self, red, green, blue):
1113 for color in (red, green, blue): 1114 assert (isinstance(color, int) and 1115 0 <= color <= 255 1116 ), "Color values must be integers between 0 and 255." 1117 self.red = red 1118 self.green = green 1119 self.blue = blue
1120 1121 @classmethod
1122 - def from_hex(cls, hexstr):
1123 """Construct a BranchColor object from a hexadecimal string. 1124 1125 The string format is the same style used in HTML and CSS, such as 1126 '#FF8000' for an RGB value of (255, 128, 0). 1127 """ 1128 assert (isinstance(hexstr, basestring) and 1129 hexstr.startswith('#') and 1130 len(hexstr) == 7 1131 ), "need a 24-bit hexadecimal string, e.g. #000000" 1132 1133 RGB = hexstr[1:3], hexstr[3:5], hexstr[5:] 1134 return cls(*[int('0x'+cc, base=16) for cc in RGB])
1135 1136 @classmethod
1137 - def from_name(cls, colorname):
1138 """Construct a BranchColor object by the color's name.""" 1139 return cls(*cls.color_names[colorname])
1140
1141 - def to_hex(self):
1142 """Return a 24-bit hexadecimal RGB representation of this color. 1143 1144 The returned string is suitable for use in HTML/CSS, as a color 1145 parameter in matplotlib, and perhaps other situations. 1146 1147 Example: 1148 1149 >>> bc = BranchColor(12, 200, 100) 1150 >>> bc.to_hex() 1151 '#0cc864' 1152 """ 1153 return "#%02x%02x%02x" % (self.red, self.green, self.blue)
1154
1155 - def to_rgb(self):
1156 """Return a tuple of RGB values (0 to 255) representing this color. 1157 1158 Example: 1159 1160 >>> bc = BranchColor(255, 165, 0) 1161 >>> bc.to_rgb() 1162 (255, 165, 0) 1163 """ 1164 return (self.red, self.green, self.blue)
1165
1166 - def __repr__(self):
1167 """Preserve the standard RGB order when representing this object.""" 1168 return (u'%s(red=%d, green=%d, blue=%d)' 1169 % (self.__class__.__name__, self.red, self.green, self.blue))
1170
1171 - def __str__(self):
1172 """Show the color's RGB values.""" 1173 return "(%d, %d, %d)" % (self.red, self.green, self.blue)
1174