Package Bio :: Package Nexus :: Module Trees
[hide private]
[frames] | no frames]

Source Code for Module Bio.Nexus.Trees

  1  # Copyright 2005-2008 by Frank Kauff & Cymon J. Cox. All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license. Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5  # 
  6  # Bug reports welcome: fkauff@biologie.uni-kl.de or on Biopython's bugzilla. 
  7  """Tree class to handle phylogenetic trees. 
  8   
  9  Provides a set of methods to read and write newick-format tree descriptions, 
 10  get information about trees (monphyly of taxon sets, congruence between trees, 
 11  common ancestors,...) and to manipulate trees (reroot trees, split terminal 
 12  nodes). 
 13  """ 
 14   
 15  from __future__ import print_function 
 16   
 17  import random 
 18  import sys 
 19  from . import Nodes 
 20   
 21   
 22  PRECISION_BRANCHLENGTH = 6 
 23  PRECISION_SUPPORT = 6 
 24  NODECOMMENT_START = '[&' 
 25  NODECOMMENT_END = ']' 
 26   
 27   
28 -class TreeError(Exception):
29 pass
30 31
32 -class NodeData(object):
33 """Stores tree-relevant data associated with nodes (e.g. branches or otus)."""
34 - def __init__(self, taxon=None, branchlength=0.0, support=None, comment=None):
35 self.taxon = taxon 36 self.branchlength = branchlength 37 self.support = support 38 self.comment = comment
39 40
41 -class Tree(Nodes.Chain):
42 """Represents a tree using a chain of nodes with on predecessor (=ancestor) 43 and multiple successors (=subclades). 44 """ 45 # A newick tree is parsed into nested list and then converted to a node list in two stages 46 # mostly due to historical reasons. This could be done in one swoop). Note: parentheses ( ) and 47 # colon : are not allowed in taxon names. This is against NEXUS standard, but makes life much 48 # easier when parsing trees. 49 50 # NOTE: Tree should store its data class in something like self.dataclass=data, 51 # so that nodes that are generated have easy access to the data class 52 # Some routines use automatically NodeData, this needs to be more concise 53
54 - def __init__(self, tree=None, weight=1.0, rooted=False, name='', data=NodeData, values_are_support=False, max_support=1.0):
55 """Ntree(self,tree).""" 56 Nodes.Chain.__init__(self) 57 self.dataclass = data 58 self.__values_are_support = values_are_support 59 self.max_support = max_support 60 self.weight = weight 61 self.rooted = rooted 62 self.name = name 63 root = Nodes.Node(data()) 64 self.root = self.add(root) 65 if tree: # use the tree we have 66 # if Tree is called from outside Nexus parser, we need to get rid of linebreaks, etc 67 tree = tree.strip().replace('\n', '').replace('\r', '') 68 # there's discrepancy whether newick allows semicolons et the end 69 tree = tree.rstrip(';') 70 subtree_info, base_info = self._parse(tree) 71 root.data = self._add_nodedata(root.data, [[], base_info]) 72 self._add_subtree(parent_id=root.id, tree=subtree_info)
73
74 - def _parse(self, tree):
75 """Parses (a,b,c...)[[[xx]:]yy] into subcomponents and travels down recursively.""" 76 # Remove any leading/trailing white space - want any string starting 77 # with " (..." should be recognised as a leaf, "(..." 78 tree = tree.strip() 79 if tree.count('(') != tree.count(')'): 80 raise TreeError('Parentheses do not match in (sub)tree: ' + tree) 81 if tree.count('(') == 0: # a leaf 82 # check if there's a colon, or a special comment, or both after the taxon name 83 nodecomment = tree.find(NODECOMMENT_START) 84 colon = tree.find(':') 85 if colon == -1 and nodecomment == -1: # none 86 return [tree, [None]] 87 elif colon == -1 and nodecomment > -1: # only special comment 88 return [tree[:nodecomment], self._get_values(tree[nodecomment:])] 89 elif colon > -1 and nodecomment == -1: # only numerical values 90 return [tree[:colon], self._get_values(tree[colon + 1:])] 91 elif colon < nodecomment: # taxon name ends at first colon or with special comment 92 return [tree[:colon], self._get_values(tree[colon + 1:])] 93 else: 94 return [tree[:nodecomment], self._get_values(tree[nodecomment:])] 95 else: 96 closing = tree.rfind(')') 97 val = self._get_values(tree[closing + 1:]) 98 if not val: 99 val = [None] 100 subtrees = [] 101 plevel = 0 102 prev = 1 103 for p in range(1, closing): 104 if tree[p] == '(': 105 plevel += 1 106 elif tree[p] == ')': 107 plevel -= 1 108 elif tree[p] == ',' and plevel == 0: 109 subtrees.append(tree[prev:p]) 110 prev = p + 1 111 subtrees.append(tree[prev:closing]) 112 subclades = [self._parse(subtree) for subtree in subtrees] 113 return [subclades, val]
114
115 - def _add_subtree(self, parent_id=None, tree=None):
116 """Adds leaf or tree (in newick format) to a parent_id.""" 117 if parent_id is None: 118 raise TreeError('Need node_id to connect to.') 119 for st in tree: 120 nd = self.dataclass() 121 nd = self._add_nodedata(nd, st) 122 if isinstance(st[0], list): # it's a subtree 123 sn = Nodes.Node(nd) 124 self.add(sn, parent_id) 125 self._add_subtree(sn.id, st[0]) 126 else: # it's a leaf 127 nd.taxon = st[0] 128 leaf = Nodes.Node(nd) 129 self.add(leaf, parent_id)
130
131 - def _add_nodedata(self, nd, st):
132 """Add data to the node parsed from the comments, taxon and support. 133 """ 134 if isinstance(st[1][-1], str) and st[1][-1].startswith(NODECOMMENT_START): 135 nd.comment = st[1].pop(-1) 136 # if the first element is a string, it's the subtree node taxon 137 elif isinstance(st[1][0], str): 138 nd.taxon = st[1][0] 139 st[1] = st[1][1:] 140 if len(st) > 1: 141 if len(st[1]) >= 2: # if there's two values, support comes first. Is that always so? 142 nd.support = st[1][0] 143 if st[1][1] is not None: 144 nd.branchlength = st[1][1] 145 elif len(st[1]) == 1: # otherwise it could be real branchlengths or support as branchlengths 146 if not self.__values_are_support: # default 147 if st[1][0] is not None: 148 nd.branchlength = st[1][0] 149 else: 150 nd.support = st[1][0] 151 return nd
152
153 - def _get_values(self, text):
154 """Extracts values (support/branchlength) from xx[:yyy], xx.""" 155 156 if text == '': 157 return None 158 nodecomment = None 159 if NODECOMMENT_START in text: # if there's a [&....] comment, cut it out 160 nc_start = text.find(NODECOMMENT_START) 161 nc_end = text.find(NODECOMMENT_END) 162 if nc_end == -1: 163 raise TreeError('Error in tree description: Found %s without matching %s' 164 % (NODECOMMENT_START, NODECOMMENT_END)) 165 nodecomment = text[nc_start:nc_end + 1] 166 text = text[:nc_start] + text[nc_end + 1:] 167 168 # pase out supports and branchlengths, with internal node taxa info 169 values = [] 170 taxonomy = None 171 for part in [t.strip() for t in text.split(":")]: 172 if part: 173 try: 174 values.append(float(part)) 175 except ValueError: 176 assert taxonomy is None, "Two string taxonomies?" 177 taxonomy = part 178 if taxonomy: 179 values.insert(0, taxonomy) 180 if nodecomment: 181 values.append(nodecomment) 182 return values
183
184 - def _walk(self, node=None):
185 """Return all node_ids downwards from a node.""" 186 187 if node is None: 188 node = self.root 189 for n in self.node(node).succ: 190 yield n 191 for sn in self._walk(n): 192 yield sn
193
194 - def node(self, node_id):
195 """Return the instance of node_id. 196 197 node = node(self,node_id) 198 """ 199 if node_id not in self.chain: 200 raise TreeError('Unknown node_id: %d' % node_id) 201 return self.chain[node_id]
202
203 - def split(self, parent_id=None, n=2, branchlength=1.0):
204 """Speciation: generates n (default two) descendants of a node. 205 206 [new ids] = split(self,parent_id=None,n=2,branchlength=1.0): 207 """ 208 if parent_id is None: 209 raise TreeError('Missing node_id.') 210 ids = [] 211 parent_data = self.chain[parent_id].data 212 for i in range(n): 213 node = Nodes.Node() 214 if parent_data: 215 node.data = self.dataclass() 216 # each node has taxon and branchlength attribute 217 if parent_data.taxon: 218 node.data.taxon = parent_data.taxon + str(i) 219 node.data.branchlength = branchlength 220 ids.append(self.add(node, parent_id)) 221 return ids
222
223 - def search_taxon(self, taxon):
224 """Returns the first matching taxon in self.data.taxon. Not restricted to terminal nodes. 225 226 node_id = search_taxon(self,taxon) 227 """ 228 for id, node in self.chain.items(): 229 if node.data.taxon == taxon: 230 return id 231 return None
232
233 - def prune(self, taxon):
234 """Prunes a terminal taxon from the tree. 235 236 id_of_previous_node = prune(self,taxon) 237 If taxon is from a bifurcation, the connectiong node will be collapsed 238 and its branchlength added to remaining terminal node. This might be no 239 longer a meaningful value' 240 """ 241 242 id = self.search_taxon(taxon) 243 if id is None: 244 raise TreeError('Taxon not found: %s' % taxon) 245 elif id not in self.get_terminals(): 246 raise TreeError('Not a terminal taxon: %s' % taxon) 247 else: 248 prev = self.unlink(id) 249 self.kill(id) 250 if len(self.node(prev).succ) == 1: 251 if prev == self.root: # we deleted one branch of a bifurcating root, then we have to move the root upwards 252 self.root = self.node(self.root).succ[0] 253 self.node(self.root).branchlength = 0.0 254 self.kill(prev) 255 else: 256 succ = self.node(prev).succ[0] 257 new_bl = self.node(prev).data.branchlength + self.node(succ).data.branchlength 258 self.collapse(prev) 259 self.node(succ).data.branchlength = new_bl 260 return prev
261
262 - def get_taxa(self, node_id=None):
263 """Return a list of all otus downwards from a node. 264 265 nodes = get_taxa(self,node_id=None) 266 """ 267 268 if node_id is None: 269 node_id = self.root 270 if node_id not in self.chain: 271 raise TreeError('Unknown node_id: %d.' % node_id) 272 if self.chain[node_id].succ == []: 273 if self.chain[node_id].data: 274 return [self.chain[node_id].data.taxon] 275 else: 276 return None 277 else: 278 list = [] 279 for succ in self.chain[node_id].succ: 280 list.extend(self.get_taxa(succ)) 281 return list
282
283 - def get_terminals(self):
284 """Return a list of all terminal nodes.""" 285 return [i for i in self.all_ids() if self.node(i).succ == []]
286
287 - def is_terminal(self, node):
288 """Returns True if node is a terminal node.""" 289 return self.node(node).succ == []
290
291 - def is_internal(self, node):
292 """Returns True if node is an internal node.""" 293 return len(self.node(node).succ) > 0
294
295 - def is_preterminal(self, node):
296 """Returns True if all successors of a node are terminal ones.""" 297 if self.is_terminal(node): 298 return False not in [self.is_terminal(n) for n in self.node(node).succ] 299 else: 300 return False
301
302 - def count_terminals(self, node=None):
303 """Counts the number of terminal nodes that are attached to a node.""" 304 if node is None: 305 node = self.root 306 return len([n for n in self._walk(node) if self.is_terminal(n)])
307
308 - def collapse_genera(self, space_equals_underscore=True):
309 """Collapses all subtrees which belong to the same genus (i.e share the same first word in their taxon name.)""" 310 311 while True: 312 for n in self._walk(): 313 if self.is_terminal(n): 314 continue 315 taxa = self.get_taxa(n) 316 genera = [] 317 for t in taxa: 318 if space_equals_underscore: 319 t = t.replace(' ', '_') 320 try: 321 genus = t.split('_', 1)[0] 322 except IndexError: 323 genus = 'None' 324 if genus not in genera: 325 genera.append(genus) 326 if len(genera) == 1: 327 self.node(n).data.taxon = genera[0] + ' <collapsed>' 328 # now we kill all nodes downstream 329 nodes2kill = [kn for kn in self._walk(node=n)] 330 for kn in nodes2kill: 331 self.kill(kn) 332 self.node(n).succ = [] 333 break # break out of for loop because node list from _walk will be inconsistent 334 else: # for loop exhausted: no genera to collapse left 335 break # while
336
337 - def sum_branchlength(self, root=None, node=None):
338 """Adds up the branchlengths from root (default self.root) to node. 339 340 sum = sum_branchlength(self,root=None,node=None) 341 """ 342 343 if root is None: 344 root = self.root 345 if node is None: 346 raise TreeError('Missing node id.') 347 blen = 0.0 348 while node is not None and node is not root: 349 blen += self.node(node).data.branchlength 350 node = self.node(node).prev 351 return blen
352
353 - def set_subtree(self, node):
354 """Return subtree as a set of nested sets. 355 356 sets = set_subtree(self,node) 357 """ 358 359 if self.node(node).succ == []: 360 return self.node(node).data.taxon 361 else: 362 try: 363 return frozenset(self.set_subtree(n) for n in self.node(node).succ) 364 except: 365 print(node) 366 print(self.node(node).succ) 367 for n in self.node(node).succ: 368 print("%s %s" % (n, self.set_subtree(n))) 369 print([self.set_subtree(n) for n in self.node(node).succ]) 370 raise
371
372 - def is_identical(self, tree2):
373 """Compare tree and tree2 for identity. 374 375 result = is_identical(self,tree2) 376 """ 377 return self.set_subtree(self.root) == tree2.set_subtree(tree2.root)
378
379 - def is_compatible(self, tree2, threshold, strict=True):
380 """Compares branches with support>threshold for compatibility. 381 382 result = is_compatible(self,tree2,threshold) 383 """ 384 385 # check if both trees have the same set of taxa. strict=True enforces this. 386 missing2 = set(self.get_taxa()) - set(tree2.get_taxa()) 387 missing1 = set(tree2.get_taxa()) - set(self.get_taxa()) 388 if strict and (missing1 or missing2): 389 if missing1: 390 print('Taxon/taxa %s is/are missing in tree %s' % (','.join(missing1), self.name)) 391 if missing2: 392 print('Taxon/taxa %s is/are missing in tree %s' % (','.join(missing2), tree2.name)) 393 raise TreeError('Can\'t compare trees with different taxon compositions.') 394 t1 = [(set(self.get_taxa(n)), self.node(n).data.support) for n in self.all_ids() if 395 self.node(n).succ and 396 (self.node(n).data and self.node(n).data.support and self.node(n).data.support >= threshold)] 397 t2 = [(set(tree2.get_taxa(n)), tree2.node(n).data.support) for n in tree2.all_ids() if 398 tree2.node(n).succ and 399 (tree2.node(n).data and tree2.node(n).data.support and tree2.node(n).data.support >= threshold)] 400 conflict = [] 401 for (st1, sup1) in t1: 402 for (st2, sup2) in t2: 403 if not st1.issubset(st2) and not st2.issubset(st1): # don't hiccup on upstream nodes 404 intersect, notin1, notin2 = st1 & st2, st2 - st1, st1 - st2 # all three are non-empty sets 405 # if notin1==missing1 or notin2==missing2 <==> st1.issubset(st2) or st2.issubset(st1) ??? 406 if intersect and not (notin1.issubset(missing1) or notin2.issubset(missing2)): # omit conflicts due to missing taxa 407 conflict.append((st1, sup1, st2, sup2, intersect, notin1, notin2)) 408 return conflict
409
410 - def common_ancestor(self, node1, node2):
411 """Return the common ancestor that connects two nodes. 412 413 node_id = common_ancestor(self,node1,node2) 414 """ 415 416 l1 = [self.root] + self.trace(self.root, node1) 417 l2 = [self.root] + self.trace(self.root, node2) 418 return [n for n in l1 if n in l2][-1]
419
420 - def distance(self, node1, node2):
421 """Add and return the sum of the branchlengths between two nodes. 422 dist = distance(self,node1,node2) 423 """ 424 425 ca = self.common_ancestor(node1, node2) 426 return self.sum_branchlength(ca, node1) + self.sum_branchlength(ca, node2)
427
428 - def is_monophyletic(self, taxon_list):
429 """Return node_id of common ancestor if taxon_list is monophyletic, -1 otherwise. 430 431 result = is_monophyletic(self,taxon_list) 432 """ 433 if isinstance(taxon_list, str): 434 taxon_set = set([taxon_list]) 435 else: 436 taxon_set = set(taxon_list) 437 node_id = self.root 438 while True: 439 subclade_taxa = set(self.get_taxa(node_id)) 440 if subclade_taxa == taxon_set: # are we there? 441 return node_id 442 else: # check subnodes 443 for subnode in self.chain[node_id].succ: 444 if set(self.get_taxa(subnode)).issuperset(taxon_set): # taxon_set is downstream 445 node_id = subnode 446 break # out of for loop 447 else: 448 return -1 # taxon set was not with successors, for loop exhausted
449
450 - def is_bifurcating(self, node=None):
451 """Return True if tree downstream of node is strictly bifurcating.""" 452 if node is None: 453 node = self.root 454 if node == self.root and len(self.node(node).succ) == 3: # root can be trifurcating, because it has no ancestor 455 return self.is_bifurcating(self.node(node).succ[0]) and \ 456 self.is_bifurcating(self.node(node).succ[1]) and \ 457 self.is_bifurcating(self.node(node).succ[2]) 458 if len(self.node(node).succ) == 2: 459 return self.is_bifurcating(self.node(node).succ[0]) and self.is_bifurcating(self.node(node).succ[1]) 460 elif len(self.node(node).succ) == 0: 461 return True 462 else: 463 return False
464
465 - def branchlength2support(self):
466 """Move values stored in data.branchlength to data.support, and set branchlength to 0.0 467 468 This is necessary when support has been stored as branchlength (e.g. paup), and has thus 469 been read in as branchlength. 470 """ 471 472 for n in self.chain: 473 self.node(n).data.support = self.node(n).data.branchlength 474 self.node(n).data.branchlength = 0.0
475
476 - def convert_absolute_support(self, nrep):
477 """Convert absolute support (clade-count) to rel. frequencies. 478 479 Some software (e.g. PHYLIP consense) just calculate how often clades appear, instead of 480 calculating relative frequencies.""" 481 482 for n in self._walk(): 483 if self.node(n).data.support: 484 self.node(n).data.support /= float(nrep)
485
486 - def has_support(self, node=None):
487 """Returns True if any of the nodes has data.support != None.""" 488 for n in self._walk(node): 489 if self.node(n).data.support: 490 return True 491 else: 492 return False
493
494 - def randomize(self, ntax=None, taxon_list=None, branchlength=1.0, branchlength_sd=None, bifurcate=True):
495 """Generates a random tree with ntax taxa and/or taxa from taxlabels. 496 497 new_tree = randomize(self,ntax=None,taxon_list=None,branchlength=1.0,branchlength_sd=None,bifurcate=True) 498 Trees are bifurcating by default. (Polytomies not yet supported). 499 """ 500 501 if not ntax and taxon_list: 502 ntax = len(taxon_list) 503 elif not taxon_list and ntax: 504 taxon_list = ['taxon' + str(i + 1) for i in range(ntax)] 505 elif not ntax and not taxon_list: 506 raise TreeError('Either numer of taxa or list of taxa must be specified.') 507 elif ntax != len(taxon_list): 508 raise TreeError('Length of taxon list must correspond to ntax.') 509 # initiate self with empty root 510 self.__init__() 511 terminals = self.get_terminals() 512 # bifurcate randomly at terminal nodes until ntax is reached 513 while len(terminals) < ntax: 514 newsplit = random.choice(terminals) 515 new_terminals = self.split(parent_id=newsplit, branchlength=branchlength) 516 # if desired, give some variation to the branch length 517 if branchlength_sd: 518 for nt in new_terminals: 519 bl = random.gauss(branchlength, branchlength_sd) 520 if bl < 0: 521 bl = 0 522 self.node(nt).data.branchlength = bl 523 terminals.extend(new_terminals) 524 terminals.remove(newsplit) 525 # distribute taxon labels randomly 526 random.shuffle(taxon_list) 527 for (node, name) in zip(terminals, taxon_list): 528 self.node(node).data.taxon = name
529
530 - def display(self):
531 """Quick and dirty lists of all nodes.""" 532 table = [('#', 'taxon', 'prev', 'succ', 'brlen', 'blen (sum)', 'support', 'comment')] 533 # Sort this to be consistent across CPython, Jython, etc 534 for i in sorted(self.all_ids()): 535 n = self.node(i) 536 if not n.data: 537 table.append((str(i), '-', str(n.prev), str(n.succ), '-', '-', '-', '-')) 538 else: 539 tx = n.data.taxon 540 if not tx: 541 tx = '-' 542 blength = "%0.2f" % n.data.branchlength 543 if blength is None: 544 blength = '-' 545 sum_blength = '-' 546 else: 547 sum_blength = "%0.2f" % self.sum_branchlength(node=i) 548 support = n.data.support 549 if support is None: 550 support = '-' 551 else: 552 support = "%0.2f" % support 553 comment = n.data.comment 554 if comment is None: 555 comment = '-' 556 table.append((str(i), tx, str(n.prev), str(n.succ), 557 blength, sum_blength, support, comment)) 558 print('\n'.join('%3s %32s %15s %15s %8s %10s %8s %20s' % l for l in table)) 559 print('\nRoot: %s' % self.root)
560
561 - def to_string(self, support_as_branchlengths=False, branchlengths_only=False, plain=True, plain_newick=False, ladderize=None, ignore_comments=True):
562 """Return a paup compatible tree line.""" 563 # if there's a conflict in the arguments, we override plain=True 564 if support_as_branchlengths or branchlengths_only: 565 plain = False 566 self.support_as_branchlengths = support_as_branchlengths 567 self.branchlengths_only = branchlengths_only 568 self.ignore_comments = ignore_comments 569 self.plain = plain 570 571 def make_info_string(data, terminal=False): 572 """Creates nicely formatted support/branchlengths.""" 573 # CHECK FORMATTING 574 if self.plain: # plain tree only. That's easy. 575 info_string = '' 576 elif self.support_as_branchlengths: # support as branchlengths (eg. PAUP), ignore actual branchlengths 577 if terminal: # terminal branches have 100% support 578 info_string = ':%1.2f' % self.max_support 579 elif data.support: 580 info_string = ':%1.2f' % (data.support) 581 else: 582 info_string = ':0.00' 583 elif self.branchlengths_only: # write only branchlengths, ignore support 584 info_string = ':%1.5f' % (data.branchlength) 585 else: # write suport and branchlengths (e.g. .con tree of mrbayes) 586 if terminal: 587 info_string = ':%1.5f' % (data.branchlength) 588 else: 589 if data.branchlength is not None and data.support is not None: # we have blen and suppport 590 info_string = '%1.2f:%1.5f' % (data.support, data.branchlength) 591 elif data.branchlength is not None: # we have only blen 592 info_string = '0.00000:%1.5f' % (data.branchlength) 593 elif data.support is not None: # we have only support 594 info_string = '%1.2f:0.00000' % (data.support) 595 else: 596 info_string = '0.00:0.00000' 597 if not ignore_comments and hasattr(data, 'nodecomment'): 598 info_string = str(data.nodecomment) + info_string 599 return info_string
600 601 def ladderize_nodes(nodes, ladderize=None): 602 """Sorts node numbers according to the number of terminal nodes.""" 603 if ladderize in ['left', 'LEFT', 'right', 'RIGHT']: 604 succnode_terminals = sorted((self.count_terminals(node=n), n) for n in nodes) 605 if (ladderize == 'right' or ladderize == 'RIGHT'): 606 succnode_terminals.reverse() 607 if succnode_terminals: 608 succnodes = zip(*succnode_terminals)[1] 609 else: 610 succnodes = [] 611 else: 612 succnodes = nodes 613 return succnodes
614 615 def newickize(node, ladderize=None): 616 """Convert a node tree to a newick tree recursively.""" 617 618 if not self.node(node).succ: # terminal 619 return self.node(node).data.taxon + make_info_string(self.node(node).data, terminal=True) 620 else: 621 succnodes = ladderize_nodes(self.node(node).succ, ladderize=ladderize) 622 subtrees = [newickize(sn, ladderize=ladderize) for sn in succnodes] 623 return '(%s)%s' % (','.join(subtrees), make_info_string(self.node(node).data)) 624 625 treeline = ['tree'] 626 if self.name: 627 treeline.append(self.name) 628 else: 629 treeline.append('a_tree') 630 treeline.append('=') 631 if self.weight != 1: 632 treeline.append('[&W%s]' % str(round(float(self.weight), 3))) 633 if self.rooted: 634 treeline.append('[&R]') 635 succnodes = ladderize_nodes(self.node(self.root).succ) 636 subtrees = [newickize(sn, ladderize=ladderize) for sn in succnodes] 637 treeline.append('(%s)' % ','.join(subtrees)) 638 if plain_newick: 639 return treeline[-1] 640 else: 641 return ' '.join(treeline) + ';' 642
643 - def __str__(self):
644 """Short version of to_string(), gives plain tree""" 645 return self.to_string(plain=True)
646
647 - def unroot(self):
648 """Defines a unrooted Tree structure, using data of a rooted Tree.""" 649 650 # travel down the rooted tree structure and save all branches and the nodes they connect 651 652 def _get_branches(node): 653 branches = [] 654 for b in self.node(node).succ: 655 branches.append([node, b, self.node(b).data.branchlength, self.node(b).data.support]) 656 branches.extend(_get_branches(b)) 657 return branches
658 659 self.unrooted = _get_branches(self.root) 660 # if root is bifurcating, then it is eliminated 661 if len(self.node(self.root).succ) == 2: 662 # find the two branches that connect to root 663 rootbranches = [b for b in self.unrooted if self.root in b[:2]] 664 b1 = self.unrooted.pop(self.unrooted.index(rootbranches[0])) 665 b2 = self.unrooted.pop(self.unrooted.index(rootbranches[1])) 666 # Connect them two each other. If both have support, it should be identical (or one set to None?). 667 # If both have branchlengths, they will be added 668 newbranch = [b1[1], b2[1], b1[2] + b2[2]] 669 if b1[3] is None: 670 newbranch.append(b2[3]) # either None (both rootbranches are unsupported) or some support 671 elif b2[3] is None: 672 newbranch.append(b1[3]) # dito 673 elif b1[3] == b2[3]: 674 newbranch.append(b1[3]) # identical support 675 elif b1[3] == 0 or b2[3] == 0: 676 newbranch.append(b1[3] + b2[3]) # one is 0, take the other 677 else: 678 raise TreeError('Support mismatch in bifurcating root: %f, %f' 679 % (float(b1[3]), float(b2[3]))) 680 self.unrooted.append(newbranch) 681
682 - def root_with_outgroup(self, outgroup=None):
683 684 def _connect_subtree(parent, child): 685 """Hook subtree starting with node child to parent.""" 686 for i, branch in enumerate(self.unrooted): 687 if parent in branch[:2] and child in branch[:2]: 688 branch = self.unrooted.pop(i) 689 break 690 else: 691 raise TreeError('Unable to connect nodes for rooting: nodes %d and %d are not connected' 692 % (parent, child)) 693 self.link(parent, child) 694 self.node(child).data.branchlength = branch[2] 695 self.node(child).data.support = branch[3] 696 # now check if there are more branches connected to the child, and if so, connect them 697 child_branches = [b for b in self.unrooted if child in b[:2]] 698 for b in child_branches: 699 if child == b[0]: 700 succ = b[1] 701 else: 702 succ = b[0] 703 _connect_subtree(child, succ)
704 705 # check the outgroup we're supposed to root with 706 if outgroup is None: 707 return self.root 708 outgroup_node = self.is_monophyletic(outgroup) 709 if outgroup_node == -1: 710 return -1 711 # if tree is already rooted with outgroup on a bifurcating root, 712 # or the outgroup includes all taxa on the tree, then we're fine 713 if (len(self.node(self.root).succ) == 2 and outgroup_node in self.node(self.root).succ) or outgroup_node == self.root: 714 return self.root 715 716 self.unroot() 717 # now we find the branch that connects outgroup and ingroup 718 # print(self.node(outgroup_node).prev) 719 for i, b in enumerate(self.unrooted): 720 if outgroup_node in b[:2] and self.node(outgroup_node).prev in b[:2]: 721 root_branch = self.unrooted.pop(i) 722 break 723 else: 724 raise TreeError('Unrooted and rooted Tree do not match') 725 if outgroup_node == root_branch[1]: 726 ingroup_node = root_branch[0] 727 else: 728 ingroup_node = root_branch[1] 729 # now we destroy the old tree structure, but keep node data. Nodes will be reconnected according to new outgroup 730 for n in self.all_ids(): 731 self.node(n).prev = None 732 self.node(n).succ = [] 733 # now we just add both subtrees (outgroup and ingroup) branch for branch 734 root = Nodes.Node(data=NodeData()) # new root 735 self.add(root) # add to tree description 736 self.root = root.id # set as root 737 self.unrooted.append([root.id, ingroup_node, root_branch[2], root_branch[3]]) # add branch to ingroup to unrooted tree 738 self.unrooted.append([root.id, outgroup_node, 0.0, 0.0]) # add branch to outgroup to unrooted tree 739 _connect_subtree(root.id, ingroup_node) # add ingroup 740 _connect_subtree(root.id, outgroup_node) # add outgroup 741 # if theres still a lonely node in self.chain, then it's the old root, and we delete it 742 oldroot = [i for i in self.all_ids() if self.node(i).prev is None and i != self.root] 743 if len(oldroot) > 1: 744 raise TreeError('Isolated nodes in tree description: %s' 745 % ','.join(oldroot)) 746 elif len(oldroot) == 1: 747 self.kill(oldroot[0]) 748 return self.root 749
750 - def merge_with_support(self, bstrees=None, constree=None, threshold=0.5, outgroup=None):
751 """Merges clade support (from consensus or list of bootstrap-trees) with phylogeny. 752 753 tree=merge_bootstrap(phylo,bs_tree=<list_of_trees>) 754 or 755 tree=merge_bootstrap(phylo,consree=consensus_tree with clade support) 756 """ 757 758 if bstrees and constree: 759 raise TreeError('Specify either list of bootstrap trees or consensus tree, not both') 760 if not (bstrees or constree): 761 raise TreeError('Specify either list of bootstrap trees or consensus tree.') 762 # no outgroup specified: use the smallest clade of the root 763 if outgroup is None: 764 try: 765 succnodes = self.node(self.root).succ 766 smallest = min((len(self.get_taxa(n)), n) for n in succnodes) 767 outgroup = self.get_taxa(smallest[1]) 768 except: 769 raise TreeError("Error determining outgroup.") 770 else: # root with user specified outgroup 771 self.root_with_outgroup(outgroup) 772 773 if bstrees: # calculate consensus 774 constree = consensus(bstrees, threshold=threshold, outgroup=outgroup) 775 else: 776 if not constree.has_support(): 777 constree.branchlength2support() 778 constree.root_with_outgroup(outgroup) 779 # now we travel all nodes, and add support from consensus, if the clade is present in both 780 for pnode in self._walk(): 781 cnode = constree.is_monophyletic(self.get_taxa(pnode)) 782 if cnode > -1: 783 self.node(pnode).data.support = constree.node(cnode).data.support
784 785
786 -def consensus(trees, threshold=0.5, outgroup=None):
787 """Compute a majority rule consensus tree of all clades with relative frequency>=threshold from a list of trees.""" 788 789 total = len(trees) 790 if total == 0: 791 return None 792 # shouldn't we make sure that it's NodeData or subclass?? 793 dataclass = trees[0].dataclass 794 max_support = trees[0].max_support 795 clades = {} 796 # countclades={} 797 alltaxa = set(trees[0].get_taxa()) 798 # calculate calde frequencies 799 c = 0 800 for t in trees: 801 c += 1 802 # if c%100==0: 803 # print(c) 804 if alltaxa != set(t.get_taxa()): 805 raise TreeError('Trees for consensus must contain the same taxa') 806 t.root_with_outgroup(outgroup=outgroup) 807 for st_node in t._walk(t.root): 808 subclade_taxa = sorted(t.get_taxa(st_node)) 809 subclade_taxa = str(subclade_taxa) # lists are not hashable 810 if subclade_taxa in clades: 811 clades[subclade_taxa] += float(t.weight) / total 812 else: 813 clades[subclade_taxa] = float(t.weight) / total 814 # if subclade_taxa in countclades: 815 # countclades[subclade_taxa]+=t.weight 816 # else: 817 # countclades[subclade_taxa]=t.weight 818 # weed out clades below threshold 819 delclades = [c for c, p in clades.items() if round(p, 3) < threshold] # round can be necessary 820 for c in delclades: 821 del clades[c] 822 # create a tree with a root node 823 consensus = Tree(name='consensus_%2.1f' % float(threshold), data=dataclass) 824 # each clade needs a node in the new tree, add them as isolated nodes 825 for c, s in clades.items(): 826 node = Nodes.Node(data=dataclass()) 827 node.data.support = s 828 node.data.taxon = set(eval(c)) 829 consensus.add(node) 830 # set root node data 831 consensus.node(consensus.root).data.support = None 832 consensus.node(consensus.root).data.taxon = alltaxa 833 # we sort the nodes by no. of taxa in the clade, so root will be the last 834 consensus_ids = consensus.all_ids() 835 consensus_ids.sort(lambda x, y: len(consensus.node(x).data.taxon) - len(consensus.node(y).data.taxon)) 836 # now we just have to hook each node to the next smallest node that includes all taxa of the current 837 for i, current in enumerate(consensus_ids[:-1]): # skip the last one which is the root 838 # print('----') 839 # print('current: %s' % consensus.node(current).data.taxon) 840 # search remaining nodes 841 for parent in consensus_ids[i + 1:]: 842 # print('parent: %s' % consensus.node(parent).data.taxon) 843 if consensus.node(parent).data.taxon.issuperset(consensus.node(current).data.taxon): 844 break 845 else: 846 sys.exit('corrupt tree structure?') 847 # internal nodes don't have taxa 848 if len(consensus.node(current).data.taxon) == 1: 849 consensus.node(current).data.taxon = consensus.node(current).data.taxon.pop() 850 # reset the support for terminal nodes to maximum 851 # consensus.node(current).data.support=max_support 852 else: 853 consensus.node(current).data.taxon = None 854 consensus.link(parent, current) 855 # eliminate root taxon name 856 consensus.node(consensus_ids[-1]).data.taxon = None 857 if alltaxa != set(consensus.get_taxa()): 858 raise TreeError('FATAL ERROR: consensus tree is corrupt') 859 return consensus
860