1
2
3
4
5
6 """Base classes for Bio.Phylo objects.
7
8 All object representations for phylogenetic trees should derive from these base
9 classes in order to use the common methods defined on them.
10 """
11 __docformat__ = "restructuredtext en"
12
13 import collections
14 import copy
15 import itertools
16 import random
17 import re
18
19 from Bio import _utils
25 """Traverse a tree in breadth-first (level) order."""
26 Q = collections.deque([root])
27 while Q:
28 v = Q.popleft()
29 yield v
30 Q.extend(get_children(v))
31
34 """Traverse a tree in depth-first pre-order (parent before children)."""
35 def dfs(elem):
36 yield elem
37 for v in get_children(elem):
38 for u in dfs(v):
39 yield u
40 for elem in dfs(root):
41 yield elem
42
43
44 -def _postorder_traverse(root, get_children):
45 """Traverse a tree in depth-first post-order (children before parent)."""
46 def dfs(elem):
47 for v in get_children(elem):
48 for u in dfs(v):
49 yield u
50 yield elem
51 for elem in dfs(root):
52 yield elem
53
56 """Get a flat list of elem's attributes, sorted for consistency."""
57 singles = []
58 lists = []
59
60 for attrname, child in sorted(elem.__dict__.iteritems(),
61 key=lambda kv: kv[0]):
62 if child is None:
63 continue
64 if isinstance(child, list):
65 lists.extend(child)
66 else:
67 singles.append(child)
68 return (x for x in singles + lists
69 if isinstance(x, TreeElement))
70
75 """Match a node to the target object by identity."""
76 def match(node):
77 return (node is target)
78 return match
79
82 """Match a node if it's an instance of the given class."""
83 def match(node):
84 return isinstance(node, target_cls)
85 return match
86
89 def match(node):
90 return unicode(node) == target
91 return match
92
95 """Match a node by specified attribute values.
96
97 ``terminal`` is a special case: True restricts the search to external (leaf)
98 nodes, False restricts to internal nodes, and None allows all tree elements
99 to be searched, including phyloXML annotations.
100
101 Otherwise, for a tree element to match the specification (i.e. for the
102 function produced by `_attribute_matcher` to return True when given a tree
103 element), it must have each of the attributes specified by the keys and
104 match each of the corresponding values -- think 'and', not 'or', for
105 multiple keys.
106 """
107 def match(node):
108 if 'terminal' in kwargs:
109
110 kwa_copy = kwargs.copy()
111 pattern = kwa_copy.pop('terminal')
112 if (pattern is not None and
113 (not hasattr(node, 'is_terminal') or
114 node.is_terminal() != pattern)):
115 return False
116 else:
117 kwa_copy = kwargs
118 for key, pattern in kwa_copy.iteritems():
119
120 if not hasattr(node, key):
121 return False
122 target = getattr(node, key)
123 if isinstance(pattern, basestring):
124 return (isinstance(target, basestring) and
125 re.match(pattern+'$', target))
126 if isinstance(pattern, bool):
127 return (pattern == bool(target))
128 if isinstance(pattern, int):
129 return (pattern == target)
130 if pattern is None:
131 return (target is None)
132 raise TypeError('invalid query type: %s' % type(pattern))
133 return True
134 return match
135
138 """Safer attribute lookup -- returns False instead of raising an error."""
139 def match(node):
140 try:
141 return matcher_func(node)
142 except (LookupError, AttributeError, ValueError, TypeError):
143 return False
144 return match
145
148 """Retrieve a matcher function by passing an arbitrary object.
149
150 i.e. passing a `TreeElement` such as a `Clade` or `Tree` instance returns an
151 identity matcher, passing a type such as the `PhyloXML.Taxonomy` class
152 returns a class matcher, and passing a dictionary returns an attribute
153 matcher.
154
155 The resulting 'match' function returns True when given an object matching
156 the specification (identity, type or attribute values), otherwise False.
157 This is useful for writing functions that search the tree, and probably
158 shouldn't be used directly by the end user.
159 """
160 if isinstance(obj, TreeElement):
161 return _identity_matcher(obj)
162 if isinstance(obj, type):
163 return _class_matcher(obj)
164 if isinstance(obj, basestring):
165 return _string_matcher(obj)
166 if isinstance(obj, dict):
167 return _attribute_matcher(obj)
168 if callable(obj):
169 return _function_matcher(obj)
170 raise ValueError("%s (type %s) is not a valid type for comparison."
171 % (obj, type(obj)))
172
175 """Merge target specifications with keyword arguments.
176
177 Dispatch the components to the various matcher functions, then merge into a
178 single boolean function.
179 """
180 if not target:
181 if not kwargs:
182 if require_spec:
183 raise ValueError("you must specify a target object or keyword "
184 "arguments.")
185 return lambda x: True
186 return _attribute_matcher(kwargs)
187 match_obj = _object_matcher(target)
188 if not kwargs:
189 return match_obj
190 match_kwargs = _attribute_matcher(kwargs)
191 return (lambda x: match_obj(x) and match_kwargs(x))
192
195 """Convert ``[targets]`` or ``*targets`` arguments to a single iterable.
196
197 This helps other functions work like the built-in functions `max` and
198 `min`.
199 """
200
201
202
203
204
205
206 if hasattr(first, '__iter__') and not (isinstance(first, TreeElement) or
207 isinstance(first, type) or isinstance(first, basestring) or
208 isinstance(first, dict)):
209
210 if rest:
211 raise ValueError("Arguments must be either a single list of "
212 "targets, or separately specified targets "
213 "(e.g. foo(t1, t2, t3)), but not both.")
214 return first
215
216 return itertools.chain([first], rest)
217
222 """Base class for all Bio.Phylo classes."""
223
225 """Show this object's constructor with its primitive arguments."""
226 def pair_as_kwarg_string(key, val):
227 if isinstance(val, basestring):
228 return "%s='%s'" % (key, _utils.trim_str(unicode(val), 60,
229 u'...'))
230 return "%s=%s" % (key, val)
231 return u'%s(%s)' % (self.__class__.__name__,
232 ', '.join(pair_as_kwarg_string(key, val)
233 for key, val in self.__dict__.iteritems()
234 if val is not None and
235 type(val) in (str, int, float, bool, unicode)
236 ))
237
238 __str__ = __repr__
239
242 """Methods for Tree- and Clade-based classes.
243
244 This lets `Tree` and `Clade` support the same traversal and searching
245 operations without requiring Clade to inherit from Tree, so Clade isn't
246 required to have all of Tree's attributes -- just ``root`` (a Clade
247 instance) and ``is_terminal``.
248 """
249
250
252 """Perform a BFS or DFS traversal through all elements in this tree.
253
254 :returns: generator of all elements for which `filter_func` is True.
255 """
256 order_opts = {'preorder': _preorder_traverse,
257 'postorder': _postorder_traverse,
258 'level': _level_traverse}
259 try:
260 order_func = order_opts[order]
261 except KeyError:
262 raise ValueError("Invalid order '%s'; must be one of: %s"
263 % (order, tuple(order_opts.keys())))
264 if follow_attrs:
265 get_children = _sorted_attrs
266 root = self
267 else:
268 get_children = lambda elem: elem.clades
269 root = self.root
270 return itertools.ifilter(filter_func, order_func(root, get_children))
271
273 """Return the first element found by find_elements(), or None.
274
275 This is also useful for checking whether any matching element exists in
276 the tree, and can be used in a conditional expression.
277 """
278 hits = self.find_elements(*args, **kwargs)
279 try:
280 return hits.next()
281 except StopIteration:
282 return None
283
284 - def find_elements(self, target=None, terminal=None, order='preorder',
285 **kwargs):
286 """Find all tree elements matching the given attributes.
287
288 The arbitrary keyword arguments indicate the attribute name of the
289 sub-element and the value to match: string, integer or boolean. Strings
290 are evaluated as regular expression matches; integers are compared
291 directly for equality, and booleans evaluate the attribute's truth value
292 (True or False) before comparing. To handle nonzero floats, search with
293 a boolean argument, then filter the result manually.
294
295 If no keyword arguments are given, then just the class type is used for
296 matching.
297
298 The result is an iterable through all matching objects, by depth-first
299 search. (Not necessarily the same order as the elements appear in the
300 source file!)
301
302 :Parameters:
303 target : TreeElement instance, type, dict, or callable
304 Specifies the characteristics to search for. (The default,
305 TreeElement, matches any standard Bio.Phylo type.)
306 terminal : bool
307 A boolean value to select for or against terminal nodes (a.k.a.
308 leaf nodes). True searches for only terminal nodes, False
309 excludes terminal nodes, and the default, None, searches both
310 terminal and non-terminal nodes, as well as any tree elements
311 lacking the ``is_terminal`` method.
312 order : {'preorder', 'postorder', 'level'}
313 Tree traversal order: 'preorder' (default) is depth-first
314 search, 'postorder' is DFS with child nodes preceding parents,
315 and 'level' is breadth-first search.
316
317 Example
318 -------
319
320 >>> from Bio.Phylo.IO import PhyloXMIO
321 >>> phx = PhyloXMLIO.read('phyloxml_examples.xml')
322 >>> matches = phx.phylogenies[5].find_elements(code='OCTVU')
323 >>> matches.next()
324 Taxonomy(code='OCTVU', scientific_name='Octopus vulgaris')
325
326 """
327 if terminal is not None:
328 kwargs['terminal'] = terminal
329 is_matching_elem = _combine_matchers(target, kwargs, False)
330 return self._filter_search(is_matching_elem, order, True)
331
332 - def find_clades(self, target=None, terminal=None, order='preorder',
333 **kwargs):
334 """Find each clade containing a matching element.
335
336 That is, find each element as with find_elements(), but return the
337 corresponding clade object. (This is usually what you want.)
338
339 :returns: an iterable through all matching objects, searching
340 depth-first (preorder) by default.
341 """
342 def match_attrs(elem):
343 orig_clades = elem.__dict__.pop('clades')
344 found = elem.find_any(target, **kwargs)
345 elem.clades = orig_clades
346 return (found is not None)
347 if terminal is None:
348 is_matching_elem = match_attrs
349 else:
350 def is_matching_elem(elem):
351 return ((elem.is_terminal() == terminal) and
352 match_attrs(elem))
353 return self._filter_search(is_matching_elem, order, False)
354
355 - def get_path(self, target=None, **kwargs):
356 """List the clades directly between this root and the given target.
357
358 :returns: list of all clade objects along this path, ending with the
359 given target, but excluding the root clade.
360 """
361
362 path = []
363 match = _combine_matchers(target, kwargs, True)
364
365 def check_in_path(v):
366 if match(v):
367 path.append(v)
368 return True
369 elif v.is_terminal():
370 return False
371 for child in v:
372 if check_in_path(child):
373 path.append(v)
374 return True
375 return False
376
377 if not check_in_path(self.root):
378 return None
379 return path[-2::-1]
380
382 """Get a list of all of this tree's nonterminal (internal) nodes."""
383 return list(self.find_clades(terminal=False, order=order))
384
386 """Get a list of all of this tree's terminal (leaf) nodes."""
387 return list(self.find_clades(terminal=True, order=order))
388
389 - def trace(self, start, finish):
390 """List of all clade object between two targets in this tree.
391
392 Excluding `start`, including `finish`.
393 """
394 mrca = self.common_ancestor(start, finish)
395 fromstart = mrca.get_path(start)[-2::-1]
396 to = mrca.get_path(finish)
397 return fromstart + [mrca] + to
398
399
400
402 """Most recent common ancestor (clade) of all the given targets.
403
404 Edge cases:
405 - If no target is given, returns self.root
406 - If 1 target is given, returns the target
407 - If any target is not found in this tree, raises a ValueError
408 """
409 paths = [self.get_path(t)
410 for t in _combine_args(targets, *more_targets)]
411
412 for p, t in zip(paths, targets):
413 if p is None:
414 raise ValueError("target %s is not in this tree" % repr(t))
415 mrca = self.root
416 for level in itertools.izip(*paths):
417 ref = level[0]
418 for other in level[1:]:
419 if ref is not other:
420 break
421 else:
422 mrca = ref
423 if ref is not mrca:
424 break
425 return mrca
426
428 """Counts the number of terminal (leaf) nodes within this tree."""
429 return _utils.iterlen(self.find_clades(terminal=True))
430
431 - def depths(self, unit_branch_lengths=False):
432 """Create a mapping of tree clades to depths (by branch length).
433
434 :Parameters:
435 unit_branch_lengths : bool
436 If True, count only the number of branches (levels in the tree).
437 By default the distance is the cumulative branch length leading
438 to the clade.
439
440 :returns: dict of {clade: depth}, where keys are all of the Clade
441 instances in the tree, and values are the distance from the root to
442 each clade (including terminals).
443 """
444 if unit_branch_lengths:
445 depth_of = lambda c: 1
446 else:
447 depth_of = lambda c: c.branch_length or 0
448 depths = {}
449
450 def update_depths(node, curr_depth):
451 depths[node] = curr_depth
452 for child in node.clades:
453 new_depth = curr_depth + depth_of(child)
454 update_depths(child, new_depth)
455
456 update_depths(self.root, self.root.branch_length or 0)
457 return depths
458
459 - def distance(self, target1, target2=None):
460 """Calculate the sum of the branch lengths between two targets.
461
462 If only one target is specified, the other is the root of this tree.
463 """
464 if target2 is None:
465 return sum(n.branch_length for n in self.get_path(target1)
466 if n.branch_length is not None)
467 mrca = self.common_ancestor(target1, target2)
468 return mrca.distance(target1) + mrca.distance(target2)
469
471 """Return True if tree downstream of node is strictly bifurcating.
472
473 I.e., all nodes have either 2 or 0 children (internal or external,
474 respectively). The root may have 3 descendents and still be considered
475 part of a bifurcating tree, because it has no ancestor.
476 """
477
478 if isinstance(self, Tree) and len(self.root) == 3:
479 return (self.root.clades[0].is_bifurcating() and
480 self.root.clades[1].is_bifurcating() and
481 self.root.clades[2].is_bifurcating())
482 if len(self.root) == 2:
483 return (self.root.clades[0].is_bifurcating() and
484 self.root.clades[1].is_bifurcating())
485 if len(self.root) == 0:
486 return True
487 return False
488
490 """MRCA of terminals if they comprise a complete subclade, or False.
491
492 I.e., there exists a clade such that its terminals are the same set as
493 the given targets.
494
495 The given targets must be terminals of the tree.
496
497 To match both `Bio.Nexus.Trees` and the other multi-target methods in
498 Bio.Phylo, arguments to this method can be specified either of two ways:
499 (i) as a single list of targets, or (ii) separately specified targets,
500 e.g. is_monophyletic(t1, t2, t3) -- but not both.
501
502 For convenience, this method returns the common ancestor (MCRA) of the
503 targets if they are monophyletic (instead of the value True), and False
504 otherwise.
505
506 :returns: common ancestor if terminals are monophyletic, otherwise False.
507 """
508 target_set = set(_combine_args(terminals, *more_terminals))
509 current = self.root
510 while True:
511 if set(current.get_terminals()) == target_set:
512 return current
513
514 for subclade in current.clades:
515 if set(subclade.get_terminals()).issuperset(target_set):
516 current = subclade
517 break
518 else:
519 return False
520
522 """True if target is a descendent of this tree.
523
524 Not required to be a direct descendent.
525
526 To check only direct descendents of a clade, simply use list membership
527 testing: ``if subclade in clade: ...``
528 """
529 return self.get_path(target, **kwargs) is not None
530
532 """True if all direct descendents are terminal."""
533 if self.root.is_terminal():
534 return False
535 for clade in self.root.clades:
536 if not clade.is_terminal():
537 return False
538 return True
539
544
545
546
547 - def collapse(self, target=None, **kwargs):
548 """Deletes target from the tree, relinking its children to its parent.
549
550 :returns: the parent clade.
551 """
552 path = self.get_path(target, **kwargs)
553 if not path:
554 raise ValueError("couldn't collapse %s in this tree"
555 % (target or kwargs))
556 if len(path) == 1:
557 parent = self.root
558 else:
559 parent = path[-2]
560 popped = parent.clades.pop(parent.clades.index(path[-1]))
561 extra_length = popped.branch_length or 0
562 for child in popped:
563 child.branch_length += extra_length
564 parent.clades.extend(popped.clades)
565 return parent
566
568 """Collapse all the descendents of this tree, leaving only terminals.
569
570 Total branch lengths are preserved, i.e. the distance to each terminal
571 stays the same.
572
573 For example, this will safely collapse nodes with poor bootstrap
574 support:
575
576 >>> tree.collapse_all(lambda c: c.confidence is not None and
577 ... c.confidence < 70)
578
579 This implementation avoids strange side-effects by using level-order
580 traversal and testing all clade properties (versus the target
581 specification) up front. In particular, if a clade meets the target
582 specification in the original tree, it will be collapsed. For example,
583 if the condition is:
584
585 >>> tree.collapse_all(lambda c: c.branch_length < 0.1)
586
587 Collapsing a clade's parent node adds the parent's branch length to the
588 child, so during the execution of collapse_all, a clade's branch_length
589 may increase. In this implementation, clades are collapsed according to
590 their properties in the original tree, not the properties when tree
591 traversal reaches the clade. (It's easier to debug.) If you want the
592 other behavior (incremental testing), modifying the source code of this
593 function is straightforward.
594 """
595
596 matches = list(self.find_clades(target, False, 'level', **kwargs))
597 if not matches:
598
599 return
600
601 if matches[0] == self.root:
602 matches.pop(0)
603 for clade in matches:
604 self.collapse(clade)
605
607 """Sort clades in-place according to the number of terminal nodes.
608
609 Deepest clades are last by default. Use ``reverse=True`` to sort clades
610 deepest-to-shallowest.
611 """
612 self.root.clades.sort(key=lambda c: c.count_terminals(),
613 reverse=reverse)
614 for subclade in self.root.clades:
615 subclade.ladderize(reverse=reverse)
616
617 - def prune(self, target=None, **kwargs):
618 """Prunes a terminal clade from the tree.
619
620 If taxon is from a bifurcation, the connecting node will be collapsed
621 and its branch length added to remaining terminal node. This might be no
622 longer be a meaningful value.
623
624 :returns: parent clade of the pruned target
625 """
626 if 'terminal' in kwargs and kwargs['terminal']:
627 raise ValueError("target must be terminal")
628 path = self.get_path(target, terminal=True, **kwargs)
629 if not path:
630 raise ValueError("can't find a matching target below this root")
631 if len(path) == 1:
632 parent = self.root
633 else:
634 parent = path[-2]
635 parent.clades.remove(path[-1])
636 if len(parent) == 1:
637
638 if parent == self.root:
639
640
641 newroot = parent.clades[0]
642 newroot.branch_length = None
643 parent = self.root = newroot
644 else:
645
646 child = parent.clades[0]
647 if child.branch_length is not None:
648 child.branch_length += (parent.branch_length or 0.0)
649 if len(path) < 3:
650 grandparent = self.root
651 else:
652 grandparent = path[-3]
653
654 index = grandparent.clades.index(parent)
655 grandparent.clades.pop(index)
656 grandparent.clades.insert(index, child)
657 parent = grandparent
658 return parent
659
660 - def split(self, n=2, branch_length=1.0):
661 """Generate n (default 2) new descendants.
662
663 In a species tree, this is a speciation event.
664
665 New clades have the given branch_length and the same name as this
666 clade's root plus an integer suffix (counting from 0). For example,
667 splitting a clade named "A" produces sub-clades named "A0" and "A1".
668 If the clade has no name, the prefix "n" is used for child nodes, e.g.
669 "n0" and "n1".
670 """
671 clade_cls = type(self.root)
672 base_name = self.root.name or 'n'
673 for i in range(n):
674 clade = clade_cls(name=base_name+str(i),
675 branch_length=branch_length)
676 self.root.clades.append(clade)
677
678
679 -class Tree(TreeElement, TreeMixin):
680 """A phylogenetic tree, containing global info for the phylogeny.
681
682 The structure and node-specific data is accessible through the 'root'
683 clade attached to the Tree instance.
684
685 :Parameters:
686 root : Clade
687 The starting node of the tree. If the tree is rooted, this will
688 usually be the root node.
689 rooted : bool
690 Whether or not the tree is rooted. By default, a tree is assumed to
691 be rooted.
692 id : str
693 The identifier of the tree, if there is one.
694 name : str
695 The name of the tree, in essence a label.
696 """
697 - def __init__(self, root=None, rooted=True, id=None, name=None):
702
703 @classmethod
705 """Create a new Tree object given a clade.
706
707 Keyword arguments are the usual `Tree` constructor parameters.
708 """
709 root = copy.deepcopy(clade)
710 return cls(root, **kwargs)
711
712 @classmethod
713 - def randomized(cls, taxa, branch_length=1.0, branch_stdev=None):
714 """Create a randomized bifurcating tree given a list of taxa.
715
716 :param taxa: Either an integer specifying the number of taxa to create
717 (automatically named taxon#), or an iterable of taxon names, as
718 strings.
719
720 :returns: a tree of the same type as this class.
721 """
722 if isinstance(taxa, int):
723 taxa = ['taxon%s' % (i+1) for i in range(taxa)]
724 elif hasattr(taxa, '__iter__'):
725 taxa = list(taxa)
726 else:
727 raise TypeError("taxa argument must be integer (# taxa) or "
728 "iterable of taxon names.")
729 rtree = cls()
730 terminals = [rtree.root]
731 while len(terminals) < len(taxa):
732 newsplit = random.choice(terminals)
733 newsplit.split(branch_length=branch_length)
734 newterms = newsplit.clades
735 if branch_stdev:
736
737 for nt in newterms:
738 nt.branch_length = max(0,
739 random.gauss(branch_length, branch_stdev))
740 terminals.remove(newsplit)
741 terminals.extend(newterms)
742
743 random.shuffle(taxa)
744 for node, name in zip(terminals, taxa):
745 node.name = name
746 return rtree
747
748 @property
750 """The first clade in this tree (not itself)."""
751 return self.root
752
754 """Convert this tree to a PhyloXML-compatible Phylogeny.
755
756 This lets you use the additional annotation types PhyloXML defines, and
757 save this information when you write this tree as 'phyloxml'.
758 """
759 from Bio.Phylo.PhyloXML import Phylogeny
760 return Phylogeny.from_tree(self, **kwargs)
761
762
763
765 """Reroot this tree with the outgroup clade containing outgroup_targets.
766
767 Operates in-place.
768
769 Edge cases:
770
771 - If ``outgroup == self.root``, no change
772 - If outgroup is terminal, create new bifurcating root node with a
773 0-length branch to the outgroup
774 - If outgroup is internal, use the given outgroup node as the new
775 trifurcating root, keeping branches the same
776 - If the original root was bifurcating, drop it from the tree,
777 preserving total branch lengths
778
779 :param outgroup_branch_length: length of the branch leading to the
780 outgroup after rerooting. If not specified (None), then:
781
782 - If the outgroup is an internal node (not a single terminal taxon),
783 then use that node as the new root.
784 - Otherwise, create a new root node as the parent of the outgroup.
785
786 """
787
788
789 outgroup = self.common_ancestor(outgroup_targets, *more_targets)
790 outgroup_path = self.get_path(outgroup)
791 if len(outgroup_path) == 0:
792
793 return
794
795 prev_blen = outgroup.branch_length or 0.0
796
797 outgroup_branch_length = kwargs.get('outgroup_branch_length')
798 if outgroup_branch_length is not None:
799 assert 0 <= outgroup_branch_length <= prev_blen, \
800 "outgroup_branch_length must be between 0 and the " \
801 "original length of the branch leading to the outgroup."
802
803 if outgroup.is_terminal() or outgroup_branch_length is not None:
804
805 outgroup.branch_length = outgroup_branch_length or 0.0
806 new_root = self.root.__class__(
807 branch_length=self.root.branch_length, clades=[outgroup])
808
809 if len(outgroup_path) == 1:
810
811
812
813 new_parent = new_root
814 else:
815 parent = outgroup_path.pop(-2)
816
817 parent.clades.pop(parent.clades.index(outgroup))
818 (prev_blen, parent.branch_length) = (parent.branch_length,
819 prev_blen - outgroup.branch_length)
820 new_root.clades.insert(0, parent)
821 new_parent = parent
822 else:
823
824 new_root = outgroup
825 new_root.branch_length = self.root.branch_length
826 new_parent = new_root
827
828
829
830
831 for parent in outgroup_path[-2::-1]:
832 parent.clades.pop(parent.clades.index(new_parent))
833 prev_blen, parent.branch_length = parent.branch_length, prev_blen
834 new_parent.clades.insert(0, parent)
835 new_parent = parent
836
837
838 old_root = self.root
839 if outgroup in old_root.clades:
840 assert len(outgroup_path) == 1
841 old_root.clades.pop(old_root.clades.index(outgroup))
842 else:
843 old_root.clades.pop(old_root.clades.index(new_parent))
844 if len(old_root) == 1:
845
846 ingroup = old_root.clades[0]
847 if ingroup.branch_length:
848 ingroup.branch_length += prev_blen
849 else:
850 ingroup.branch_length = prev_blen
851 new_parent.clades.insert(0, ingroup)
852
853 else:
854
855 old_root.branch_length = prev_blen
856 new_parent.clades.insert(0, old_root)
857
858 self.root = new_root
859 self.rooted = True
860 return
861
863 """Root the tree at the midpoint of the two most distant taxa.
864
865 This operates in-place, leaving a bifurcating root. The topology of the
866 tree is otherwise retained, though no guarantees are made about the
867 stability of clade/node/taxon ordering.
868 """
869
870 max_distance = 0.0
871 tips = self.get_terminals()
872 for tip in tips:
873 self.root_with_outgroup(tip)
874 new_max = max(self.depths().iteritems(), key=lambda nd: nd[1])
875 if new_max[1] > max_distance:
876 tip1 = tip
877 tip2 = new_max[0]
878 max_distance = new_max[1]
879 self.root_with_outgroup(tip1)
880
881 root_remainder = 0.5 * (max_distance - (self.root.branch_length or 0))
882 assert root_remainder >= 0
883
884
885
886 for node in self.get_path(tip2):
887 root_remainder -= node.branch_length
888 if root_remainder < 0:
889 outgroup_node = node
890 outgroup_branch_length = -root_remainder
891 break
892 else:
893 raise ValueError("Somehow, failed to find the midpoint!")
894 self.root_with_outgroup(outgroup_node,
895 outgroup_branch_length=outgroup_branch_length)
896
897
898
900 """True if the root of this tree is terminal."""
901 return (not self.root.clades)
902
903
904
923
930
931
932
934 """String representation of the entire tree.
935
936 Serializes each sub-clade recursively using ``repr`` to create a summary
937 of the object structure.
938 """
939 TAB = ' '
940 textlines = []
941
942 def print_tree(obj, indent):
943 """Recursively serialize sub-elements.
944
945 This closes over textlines and modifies it in-place.
946 """
947 textlines.append(TAB*indent + repr(obj))
948 indent += 1
949 for attr in obj.__dict__:
950 child = getattr(obj, attr)
951 if isinstance(child, TreeElement):
952 print_tree(child, indent)
953 elif isinstance(child, list):
954 for elem in child:
955 if isinstance(elem, TreeElement):
956 print_tree(elem, indent)
957
958 print_tree(self, 0)
959 return '\n'.join(textlines)
960
961
962 -class Clade(TreeElement, TreeMixin):
963 """A recursively defined sub-tree.
964
965 :Parameters:
966 branch_length : str
967 The length of the branch leading to the root node of this clade.
968 name : str
969 The clade's name (a label).
970 clades : list
971 Sub-trees rooted directly under this tree's root.
972 confidence : number
973 Support.
974 color : BranchColor
975 The display color of the branch and descendents.
976 width : number
977 The display width of the branch and descendents.
978 """
979 - def __init__(self, branch_length=None, name=None, clades=None,
980 confidence=None, color=None, width=None):
987
988 @property
990 """Allow TreeMixin methods to traverse clades properly."""
991 return self
992
994 """True if this is a terminal (leaf) node."""
995 return (not self.clades)
996
997
998
1000 """Get clades by index (integer or slice)."""
1001 if isinstance(index, int) or isinstance(index, slice):
1002 return self.clades[index]
1003 ref = self
1004 for idx in index:
1005 ref = ref[idx]
1006 return ref
1007
1009 """Iterate through this tree's direct descendent clades (sub-trees)."""
1010 return iter(self.clades)
1011
1013 """Number of clades directy under the root."""
1014 return len(self.clades)
1015
1017 """Boolean value of an instance of this class.
1018
1019 NB: If this method is not defined, but ``__len__`` is, then the object
1020 is considered true if the result of ``__len__()`` is nonzero. We want
1021 Clade instances to always be considered True.
1022 """
1023 return True
1024
1029
1030
1033
1035 if arg is None or isinstance(arg, BranchColor):
1036 self._color = arg
1037 elif isinstance(arg, basestring):
1038 if arg in BranchColor.color_names:
1039
1040 self._color = BranchColor.from_name(arg)
1041 elif arg.startswith('#') and len(arg) == 7:
1042
1043 self._color = BranchColor.from_hex(arg)
1044 else:
1045 raise ValueError("invalid color string %s" % arg)
1046 elif hasattr(arg, '__iter__') and len(arg) == 3:
1047
1048 self._color = BranchColor(*arg)
1049 else:
1050 raise ValueError("invalid color value %s" % arg)
1051
1052 color = property(_get_color, _set_color, doc="Branch color.")
1053
1056 """Indicates the color of a clade when rendered graphically.
1057
1058 The color should be interpreted by client code (e.g. visualization
1059 programs) as applying to the whole clade, unless overwritten by the
1060 color(s) of sub-clades.
1061
1062 Color values must be integers from 0 to 255.
1063 """
1064
1065 color_names = {
1066 'red': (255, 0, 0),
1067 'r': (255, 0, 0),
1068 'yellow': (255, 255, 0),
1069 'y': (255, 255, 0),
1070 'green': ( 0, 128, 0),
1071 'g': ( 0, 128, 0),
1072 'cyan': ( 0, 255, 255),
1073 'c': ( 0, 255, 255),
1074 'blue': ( 0, 0, 255),
1075 'b': ( 0, 0, 255),
1076 'magenta': (255, 0, 255),
1077 'm': (255, 0, 255),
1078 'black': ( 0, 0, 0),
1079 'k': ( 0, 0, 0),
1080 'white': (255, 255, 255),
1081 'w': (255, 255, 255),
1082
1083
1084 'maroon': (128, 0, 0),
1085 'olive': (128, 128, 0),
1086 'lime': ( 0, 255, 0),
1087 'aqua': ( 0, 255, 255),
1088 'teal': ( 0, 128, 128),
1089 'navy': ( 0, 0, 128),
1090 'fuchsia': (255, 0, 255),
1091 'purple': (128, 0, 128),
1092 'silver': (192, 192, 192),
1093 'gray': (128, 128, 128),
1094
1095 'grey': (128, 128, 128),
1096 'pink': (255, 192, 203),
1097 'salmon': (250, 128, 114),
1098 'orange': (255, 165, 0),
1099 'gold': (255, 215, 0),
1100 'tan': (210, 180, 140),
1101 'brown': (165, 42, 42),
1102 }
1103
1112
1113 @classmethod
1115 """Construct a BranchColor object from a hexadecimal string.
1116
1117 The string format is the same style used in HTML and CSS, such as
1118 '#FF8000' for an RGB value of (255, 128, 0).
1119 """
1120 assert (isinstance(hexstr, basestring) and
1121 hexstr.startswith('#') and
1122 len(hexstr) == 7
1123 ), "need a 24-bit hexadecimal string, e.g. #000000"
1124
1125 def unpack(cc):
1126 return int('0x'+cc, base=16)
1127
1128 RGB = hexstr[1:3], hexstr[3:5], hexstr[5:]
1129 return cls(*map(unpack, RGB))
1130
1131 @classmethod
1133 """Construct a BranchColor object by the color's name."""
1134 return cls(*cls.color_names[colorname])
1135
1137 """Return a 24-bit hexadecimal RGB representation of this color.
1138
1139 The returned string is suitable for use in HTML/CSS, as a color
1140 parameter in matplotlib, and perhaps other situations.
1141
1142 Example:
1143
1144 >>> bc = BranchColor(12, 200, 100)
1145 >>> bc.to_hex()
1146 '#0cc864'
1147 """
1148 return '#' + hex(
1149 self.red * (16**4)
1150 + self.green * (16**2)
1151 + self.blue)[2:].zfill(6)
1152
1154 """Return a tuple of RGB values (0 to 255) representing this color.
1155
1156 Example:
1157
1158 >>> bc = BranchColor(255, 165, 0)
1159 >>> bc.to_rgb()
1160 (255, 165, 0)
1161 """
1162 return (self.red, self.green, self.blue)
1163
1165 """Preserve the standard RGB order when representing this object."""
1166 return (u'%s(red=%d, green=%d, blue=%d)'
1167 % (self.__class__.__name__, self.red, self.green, self.blue))
1168
1170 """Show the color's RGB values."""
1171 return "(%d, %d, %d)" % (self.red, self.green, self.blue)
1172