Package Bio :: Package Nexus :: Module Trees
[hide private]
[frames] | no frames]

Source Code for Module Bio.Nexus.Trees

  1  # Copyright 2005-2008 by Frank Kauff & Cymon J. Cox. All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license. Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5  # 
  6  # Bug reports welcome: fkauff@biologie.uni-kl.de or on Biopython's bugzilla. 
  7  """Tree class to handle phylogenetic trees. 
  8   
  9  Provides a set of methods to read and write newick-format tree descriptions, 
 10  get information about trees (monphyly of taxon sets, congruence between trees, 
 11  common ancestors,...) and to manipulate trees (reroot trees, split terminal 
 12  nodes). 
 13  """ 
 14   
 15  from __future__ import print_function 
 16   
 17  import random 
 18  import sys 
 19  from . import Nodes 
 20   
 21   
 22  PRECISION_BRANCHLENGTH = 6 
 23  PRECISION_SUPPORT = 6 
 24  NODECOMMENT_START = '[&' 
 25  NODECOMMENT_END = ']' 
 26   
 27   
28 -class TreeError(Exception):
29 pass
30 31
32 -class NodeData(object):
33 """Stores tree-relevant data associated with nodes (e.g. branches or otus)."""
34 - def __init__(self, taxon=None, branchlength=0.0, support=None, comment=None):
35 self.taxon = taxon 36 self.branchlength = branchlength 37 self.support = support 38 self.comment = comment
39 40
41 -class Tree(Nodes.Chain):
42 """Represents a tree using a chain of nodes with on predecessor (=ancestor) 43 and multiple successors (=subclades). 44 """ 45 # A newick tree is parsed into nested list and then converted to a node list in two stages 46 # mostly due to historical reasons. This could be done in one swoop). Note: parentheses ( ) and 47 # colon : are not allowed in taxon names. This is against NEXUS standard, but makes life much 48 # easier when parsing trees. 49 50 # NOTE: Tree should store its data class in something like self.dataclass=data, 51 # so that nodes that are generated have easy access to the data class 52 # Some routines use automatically NodeData, this needs to be more concise 53
54 - def __init__(self, tree=None, weight=1.0, rooted=False, name='', data=NodeData, values_are_support=False, max_support=1.0):
55 """Ntree(self,tree).""" 56 Nodes.Chain.__init__(self) 57 self.dataclass = data 58 self.__values_are_support = values_are_support 59 self.max_support = max_support 60 self.weight = weight 61 self.rooted = rooted 62 self.name = name 63 root = Nodes.Node(data()) 64 self.root = self.add(root) 65 if tree: # use the tree we have 66 # if Tree is called from outside Nexus parser, we need to get rid of linebreaks, etc 67 tree = tree.strip().replace('\n', '').replace('\r', '') 68 # there's discrepancy whether newick allows semicolons et the end 69 tree = tree.rstrip(';') 70 subtree_info, base_info = self._parse(tree) 71 root.data = self._add_nodedata(root.data, [[], base_info]) 72 self._add_subtree(parent_id=root.id, tree=subtree_info)
73
74 - def _parse(self, tree):
75 """Parses (a,b,c...)[[[xx]:]yy] into subcomponents and travels down recursively.""" 76 # Remove any leading/trailing white space - want any string starting 77 # with " (..." should be recognised as a leaf, "(..." 78 tree = tree.strip() 79 if tree.count('(') != tree.count(')'): 80 raise TreeError('Parentheses do not match in (sub)tree: ' + tree) 81 if tree.count('(') == 0: # a leaf 82 # check if there's a colon, or a special comment, or both after the taxon name 83 nodecomment = tree.find(NODECOMMENT_START) 84 colon = tree.find(':') 85 if colon == -1 and nodecomment == -1: # none 86 return [tree, [None]] 87 elif colon == -1 and nodecomment > -1: # only special comment 88 return [tree[:nodecomment], self._get_values(tree[nodecomment:])] 89 elif colon > -1 and nodecomment == -1: # only numerical values 90 return [tree[:colon], self._get_values(tree[colon + 1:])] 91 elif colon < nodecomment: # taxon name ends at first colon or with special comment 92 return [tree[:colon], self._get_values(tree[colon + 1:])] 93 else: 94 return [tree[:nodecomment], self._get_values(tree[nodecomment:])] 95 else: 96 closing = tree.rfind(')') 97 val = self._get_values(tree[closing + 1:]) 98 if not val: 99 val = [None] 100 subtrees = [] 101 plevel = 0 102 prev = 1 103 incomment = False 104 for p in range(1, closing): 105 if not incomment and tree[p] == '(': 106 plevel += 1 107 elif not incomment and tree[p] == ')': 108 plevel -= 1 109 elif tree[p:].startswith(NODECOMMENT_START): 110 incomment = True 111 elif incomment and tree[p] == NODECOMMENT_END: 112 incomment = False 113 elif not incomment and tree[p] == ',' and plevel == 0: 114 subtrees.append(tree[prev:p]) 115 prev = p + 1 116 117 subtrees.append(tree[prev:closing]) 118 subclades = [self._parse(subtree) for subtree in subtrees] 119 return [subclades, val]
120
121 - def _add_subtree(self, parent_id=None, tree=None):
122 """Adds leaf or tree (in newick format) to a parent_id.""" 123 if parent_id is None: 124 raise TreeError('Need node_id to connect to.') 125 for st in tree: 126 nd = self.dataclass() 127 nd = self._add_nodedata(nd, st) 128 if isinstance(st[0], list): # it's a subtree 129 sn = Nodes.Node(nd) 130 self.add(sn, parent_id) 131 self._add_subtree(sn.id, st[0]) 132 else: # it's a leaf 133 nd.taxon = st[0] 134 leaf = Nodes.Node(nd) 135 self.add(leaf, parent_id)
136
137 - def _add_nodedata(self, nd, st):
138 """Add data to the node parsed from the comments, taxon and support.""" 139 if isinstance(st[1][-1], str) and st[1][-1].startswith(NODECOMMENT_START): 140 nd.comment = st[1].pop(-1) 141 # if the first element is a string, it's the subtree node taxon 142 elif isinstance(st[1][0], str): 143 nd.taxon = st[1][0] 144 st[1] = st[1][1:] 145 if len(st) > 1: 146 if len(st[1]) >= 2: # if there's two values, support comes first. Is that always so? 147 nd.support = st[1][0] 148 if st[1][1] is not None: 149 nd.branchlength = st[1][1] 150 elif len(st[1]) == 1: # otherwise it could be real branchlengths or support as branchlengths 151 if not self.__values_are_support: # default 152 if st[1][0] is not None: 153 nd.branchlength = st[1][0] 154 else: 155 nd.support = st[1][0] 156 return nd
157
158 - def _get_values(self, text):
159 """Extracts values (support/branchlength) from xx[:yyy], xx.""" 160 if text == '': 161 return None 162 nodecomment = None 163 if NODECOMMENT_START in text: # if there's a [&....] comment, cut it out 164 nc_start = text.find(NODECOMMENT_START) 165 nc_end = text.find(NODECOMMENT_END) 166 if nc_end == -1: 167 raise TreeError('Error in tree description: Found %s without matching %s' 168 % (NODECOMMENT_START, NODECOMMENT_END)) 169 nodecomment = text[nc_start:nc_end + 1] 170 text = text[:nc_start] + text[nc_end + 1:] 171 172 # pase out supports and branchlengths, with internal node taxa info 173 values = [] 174 taxonomy = None 175 for part in [t.strip() for t in text.split(":")]: 176 if part: 177 try: 178 values.append(float(part)) 179 except ValueError: 180 assert taxonomy is None, "Two string taxonomies?" 181 taxonomy = part 182 if taxonomy: 183 values.insert(0, taxonomy) 184 if nodecomment: 185 values.append(nodecomment) 186 return values
187
188 - def _walk(self, node=None):
189 """Return all node_ids downwards from a node.""" 190 if node is None: 191 node = self.root 192 for n in self.node(node).succ: 193 yield n 194 for sn in self._walk(n): 195 yield sn
196
197 - def node(self, node_id):
198 """Return the instance of node_id. 199 200 node = node(self,node_id) 201 """ 202 if node_id not in self.chain: 203 raise TreeError('Unknown node_id: %d' % node_id) 204 return self.chain[node_id]
205
206 - def split(self, parent_id=None, n=2, branchlength=1.0):
207 """Speciation: generates n (default two) descendants of a node. 208 209 [new ids] = split(self,parent_id=None,n=2,branchlength=1.0): 210 """ 211 if parent_id is None: 212 raise TreeError('Missing node_id.') 213 ids = [] 214 parent_data = self.chain[parent_id].data 215 for i in range(n): 216 node = Nodes.Node() 217 if parent_data: 218 node.data = self.dataclass() 219 # each node has taxon and branchlength attribute 220 if parent_data.taxon: 221 node.data.taxon = parent_data.taxon + str(i) 222 node.data.branchlength = branchlength 223 ids.append(self.add(node, parent_id)) 224 return ids
225
226 - def search_taxon(self, taxon):
227 """Returns the first matching taxon in self.data.taxon. Not restricted to terminal nodes. 228 229 node_id = search_taxon(self,taxon) 230 """ 231 for id, node in self.chain.items(): 232 if node.data.taxon == taxon: 233 return id 234 return None
235
236 - def prune(self, taxon):
237 """Prunes a terminal taxon from the tree. 238 239 id_of_previous_node = prune(self,taxon) 240 If taxon is from a bifurcation, the connectiong node will be collapsed 241 and its branchlength added to remaining terminal node. This might be no 242 longer a meaningful value' 243 """ 244 id = self.search_taxon(taxon) 245 if id is None: 246 raise TreeError('Taxon not found: %s' % taxon) 247 elif id not in self.get_terminals(): 248 raise TreeError('Not a terminal taxon: %s' % taxon) 249 else: 250 prev = self.unlink(id) 251 self.kill(id) 252 if len(self.node(prev).succ) == 1: 253 if prev == self.root: # we deleted one branch of a bifurcating root, then we have to move the root upwards 254 self.root = self.node(self.root).succ[0] 255 self.node(self.root).branchlength = 0.0 256 self.kill(prev) 257 else: 258 succ = self.node(prev).succ[0] 259 new_bl = self.node(prev).data.branchlength + self.node(succ).data.branchlength 260 self.collapse(prev) 261 self.node(succ).data.branchlength = new_bl 262 return prev
263
264 - def get_taxa(self, node_id=None):
265 """Return a list of all otus downwards from a node. 266 267 nodes = get_taxa(self,node_id=None) 268 """ 269 if node_id is None: 270 node_id = self.root 271 if node_id not in self.chain: 272 raise TreeError('Unknown node_id: %d.' % node_id) 273 if self.chain[node_id].succ == []: 274 if self.chain[node_id].data: 275 return [self.chain[node_id].data.taxon] 276 else: 277 return None 278 else: 279 list = [] 280 for succ in self.chain[node_id].succ: 281 list.extend(self.get_taxa(succ)) 282 return list
283
284 - def get_terminals(self):
285 """Return a list of all terminal nodes.""" 286 return [i for i in self.all_ids() if self.node(i).succ == []]
287
288 - def is_terminal(self, node):
289 """Returns True if node is a terminal node.""" 290 return self.node(node).succ == []
291
292 - def is_internal(self, node):
293 """Returns True if node is an internal node.""" 294 return len(self.node(node).succ) > 0
295
296 - def is_preterminal(self, node):
297 """Returns True if all successors of a node are terminal ones.""" 298 if self.is_terminal(node): 299 return False not in [self.is_terminal(n) for n in self.node(node).succ] 300 else: 301 return False
302
303 - def count_terminals(self, node=None):
304 """Counts the number of terminal nodes that are attached to a node.""" 305 if node is None: 306 node = self.root 307 return len([n for n in self._walk(node) if self.is_terminal(n)])
308
309 - def collapse_genera(self, space_equals_underscore=True):
310 """Collapses all subtrees which belong to the same genus. 311 312 (i.e share the same first word in their taxon name.) 313 """ 314 while True: 315 for n in self._walk(): 316 if self.is_terminal(n): 317 continue 318 taxa = self.get_taxa(n) 319 genera = [] 320 for t in taxa: 321 if space_equals_underscore: 322 t = t.replace(' ', '_') 323 try: 324 genus = t.split('_', 1)[0] 325 except IndexError: 326 genus = 'None' 327 if genus not in genera: 328 genera.append(genus) 329 if len(genera) == 1: 330 self.node(n).data.taxon = genera[0] + ' <collapsed>' 331 # now we kill all nodes downstream 332 nodes2kill = [kn for kn in self._walk(node=n)] 333 for kn in nodes2kill: 334 self.kill(kn) 335 self.node(n).succ = [] 336 break # break out of for loop because node list from _walk will be inconsistent 337 else: # for loop exhausted: no genera to collapse left 338 break # while
339
340 - def sum_branchlength(self, root=None, node=None):
341 """Adds up the branchlengths from root (default self.root) to node. 342 343 sum = sum_branchlength(self,root=None,node=None) 344 """ 345 if root is None: 346 root = self.root 347 if node is None: 348 raise TreeError('Missing node id.') 349 blen = 0.0 350 while node is not None and node is not root: 351 blen += self.node(node).data.branchlength 352 node = self.node(node).prev 353 return blen
354
355 - def set_subtree(self, node):
356 """Return subtree as a set of nested sets. 357 358 sets = set_subtree(self,node) 359 """ 360 if self.node(node).succ == []: 361 return self.node(node).data.taxon 362 else: 363 try: 364 return frozenset(self.set_subtree(n) for n in self.node(node).succ) 365 except: 366 print(node) 367 print(self.node(node).succ) 368 for n in self.node(node).succ: 369 print("%s %s" % (n, self.set_subtree(n))) 370 print([self.set_subtree(n) for n in self.node(node).succ]) 371 raise
372
373 - def is_identical(self, tree2):
374 """Compare tree and tree2 for identity. 375 376 result = is_identical(self,tree2) 377 """ 378 return self.set_subtree(self.root) == tree2.set_subtree(tree2.root)
379
380 - def is_compatible(self, tree2, threshold, strict=True):
381 """Compares branches with support>threshold for compatibility. 382 383 result = is_compatible(self,tree2,threshold) 384 """ 385 # check if both trees have the same set of taxa. strict=True enforces this. 386 missing2 = set(self.get_taxa()) - set(tree2.get_taxa()) 387 missing1 = set(tree2.get_taxa()) - set(self.get_taxa()) 388 if strict and (missing1 or missing2): 389 if missing1: 390 print('Taxon/taxa %s is/are missing in tree %s' % (','.join(missing1), self.name)) 391 if missing2: 392 print('Taxon/taxa %s is/are missing in tree %s' % (','.join(missing2), tree2.name)) 393 raise TreeError('Can\'t compare trees with different taxon compositions.') 394 t1 = [(set(self.get_taxa(n)), self.node(n).data.support) for n in self.all_ids() if 395 self.node(n).succ and 396 (self.node(n).data and self.node(n).data.support and self.node(n).data.support >= threshold)] 397 t2 = [(set(tree2.get_taxa(n)), tree2.node(n).data.support) for n in tree2.all_ids() if 398 tree2.node(n).succ and 399 (tree2.node(n).data and tree2.node(n).data.support and tree2.node(n).data.support >= threshold)] 400 conflict = [] 401 for (st1, sup1) in t1: 402 for (st2, sup2) in t2: 403 if not st1.issubset(st2) and not st2.issubset(st1): # don't hiccup on upstream nodes 404 intersect, notin1, notin2 = st1 & st2, st2 - st1, st1 - st2 # all three are non-empty sets 405 # if notin1==missing1 or notin2==missing2 <==> st1.issubset(st2) or st2.issubset(st1) ??? 406 if intersect and not (notin1.issubset(missing1) or notin2.issubset(missing2)): # omit conflicts due to missing taxa 407 conflict.append((st1, sup1, st2, sup2, intersect, notin1, notin2)) 408 return conflict
409
410 - def common_ancestor(self, node1, node2):
411 """Return the common ancestor that connects two nodes. 412 413 node_id = common_ancestor(self,node1,node2) 414 """ 415 l1 = [self.root] + self.trace(self.root, node1) 416 l2 = [self.root] + self.trace(self.root, node2) 417 return [n for n in l1 if n in l2][-1]
418
419 - def distance(self, node1, node2):
420 """Add and return the sum of the branchlengths between two nodes. 421 422 dist = distance(self,node1,node2) 423 """ 424 ca = self.common_ancestor(node1, node2) 425 return self.sum_branchlength(ca, node1) + self.sum_branchlength(ca, node2)
426
427 - def is_monophyletic(self, taxon_list):
428 """Return node_id of common ancestor if taxon_list is monophyletic, -1 otherwise. 429 430 result = is_monophyletic(self,taxon_list) 431 """ 432 if isinstance(taxon_list, str): 433 taxon_set = set([taxon_list]) 434 else: 435 taxon_set = set(taxon_list) 436 node_id = self.root 437 while True: 438 subclade_taxa = set(self.get_taxa(node_id)) 439 if subclade_taxa == taxon_set: # are we there? 440 return node_id 441 else: # check subnodes 442 for subnode in self.chain[node_id].succ: 443 if set(self.get_taxa(subnode)).issuperset(taxon_set): # taxon_set is downstream 444 node_id = subnode 445 break # out of for loop 446 else: 447 return -1 # taxon set was not with successors, for loop exhausted
448
449 - def is_bifurcating(self, node=None):
450 """Return True if tree downstream of node is strictly bifurcating.""" 451 if node is None: 452 node = self.root 453 if node == self.root and len(self.node(node).succ) == 3: # root can be trifurcating, because it has no ancestor 454 return self.is_bifurcating(self.node(node).succ[0]) and \ 455 self.is_bifurcating(self.node(node).succ[1]) and \ 456 self.is_bifurcating(self.node(node).succ[2]) 457 if len(self.node(node).succ) == 2: 458 return self.is_bifurcating(self.node(node).succ[0]) and self.is_bifurcating(self.node(node).succ[1]) 459 elif len(self.node(node).succ) == 0: 460 return True 461 else: 462 return False
463
464 - def branchlength2support(self):
465 """Move values stored in data.branchlength to data.support, and set branchlength to 0.0 466 467 This is necessary when support has been stored as branchlength (e.g. paup), and has thus 468 been read in as branchlength. 469 """ 470 for n in self.chain: 471 self.node(n).data.support = self.node(n).data.branchlength 472 self.node(n).data.branchlength = 0.0
473
474 - def convert_absolute_support(self, nrep):
475 """Convert absolute support (clade-count) to rel. frequencies. 476 477 Some software (e.g. PHYLIP consense) just calculate how often clades appear, instead of 478 calculating relative frequencies. 479 """ 480 for n in self._walk(): 481 if self.node(n).data.support: 482 self.node(n).data.support /= float(nrep)
483
484 - def has_support(self, node=None):
485 """Returns True if any of the nodes has data.support != None.""" 486 for n in self._walk(node): 487 if self.node(n).data.support: 488 return True 489 else: 490 return False
491
492 - def randomize(self, ntax=None, taxon_list=None, branchlength=1.0, branchlength_sd=None, bifurcate=True):
493 """Generates a random tree with ntax taxa and/or taxa from taxlabels. 494 495 new_tree = randomize(self,ntax=None,taxon_list=None,branchlength=1.0,branchlength_sd=None,bifurcate=True) 496 Trees are bifurcating by default. (Polytomies not yet supported). 497 """ 498 if not ntax and taxon_list: 499 ntax = len(taxon_list) 500 elif not taxon_list and ntax: 501 taxon_list = ['taxon' + str(i + 1) for i in range(ntax)] 502 elif not ntax and not taxon_list: 503 raise TreeError('Either numer of taxa or list of taxa must be specified.') 504 elif ntax != len(taxon_list): 505 raise TreeError('Length of taxon list must correspond to ntax.') 506 # initiate self with empty root 507 self.__init__() 508 terminals = self.get_terminals() 509 # bifurcate randomly at terminal nodes until ntax is reached 510 while len(terminals) < ntax: 511 newsplit = random.choice(terminals) 512 new_terminals = self.split(parent_id=newsplit, branchlength=branchlength) 513 # if desired, give some variation to the branch length 514 if branchlength_sd: 515 for nt in new_terminals: 516 bl = random.gauss(branchlength, branchlength_sd) 517 if bl < 0: 518 bl = 0 519 self.node(nt).data.branchlength = bl 520 terminals.extend(new_terminals) 521 terminals.remove(newsplit) 522 # distribute taxon labels randomly 523 random.shuffle(taxon_list) 524 for (node, name) in zip(terminals, taxon_list): 525 self.node(node).data.taxon = name
526
527 - def display(self):
528 """Quick and dirty lists of all nodes.""" 529 table = [('#', 'taxon', 'prev', 'succ', 'brlen', 'blen (sum)', 'support', 'comment')] 530 # Sort this to be consistent across CPython, Jython, etc 531 for i in sorted(self.all_ids()): 532 n = self.node(i) 533 if not n.data: 534 table.append((str(i), '-', str(n.prev), str(n.succ), '-', '-', '-', '-')) 535 else: 536 tx = n.data.taxon 537 if not tx: 538 tx = '-' 539 blength = "%0.2f" % n.data.branchlength 540 if blength is None: 541 blength = '-' 542 sum_blength = '-' 543 else: 544 sum_blength = "%0.2f" % self.sum_branchlength(node=i) 545 support = n.data.support 546 if support is None: 547 support = '-' 548 else: 549 support = "%0.2f" % support 550 comment = n.data.comment 551 if comment is None: 552 comment = '-' 553 table.append((str(i), tx, str(n.prev), str(n.succ), 554 blength, sum_blength, support, comment)) 555 print('\n'.join('%3s %32s %15s %15s %8s %10s %8s %20s' % l for l in table)) 556 print('\nRoot: %s' % self.root)
557
558 - def to_string(self, support_as_branchlengths=False, 559 branchlengths_only=False, plain=True, plain_newick=False, 560 ladderize=None, ignore_comments=True):
561 """Return a paup compatible tree line.""" 562 # if there's a conflict in the arguments, we override plain=True 563 if support_as_branchlengths or branchlengths_only: 564 plain = False 565 self.support_as_branchlengths = support_as_branchlengths 566 self.branchlengths_only = branchlengths_only 567 self.ignore_comments = ignore_comments 568 self.plain = plain 569 570 def make_info_string(data, terminal=False): 571 """Creates nicely formatted support/branchlengths.""" 572 # CHECK FORMATTING 573 if self.plain: # plain tree only. That's easy. 574 info_string = '' 575 elif self.support_as_branchlengths: # support as branchlengths (eg. PAUP), ignore actual branchlengths 576 if terminal: # terminal branches have 100% support 577 info_string = ':%1.2f' % self.max_support 578 elif data.support: 579 info_string = ':%1.2f' % (data.support) 580 else: 581 info_string = ':0.00' 582 elif self.branchlengths_only: # write only branchlengths, ignore support 583 info_string = ':%1.5f' % (data.branchlength) 584 else: # write suport and branchlengths (e.g. .con tree of mrbayes) 585 if terminal: 586 info_string = ':%1.5f' % (data.branchlength) 587 else: 588 if data.branchlength is not None and data.support is not None: # we have blen and suppport 589 info_string = '%1.2f:%1.5f' % (data.support, data.branchlength) 590 elif data.branchlength is not None: # we have only blen 591 info_string = '0.00000:%1.5f' % (data.branchlength) 592 elif data.support is not None: # we have only support 593 info_string = '%1.2f:0.00000' % (data.support) 594 else: 595 info_string = '0.00:0.00000' 596 if not ignore_comments and hasattr(data, 'nodecomment'): 597 info_string = str(data.nodecomment) + info_string 598 return info_string
599 600 def ladderize_nodes(nodes, ladderize=None): 601 """Sorts node numbers according to the number of terminal nodes.""" 602 if ladderize in ['left', 'LEFT', 'right', 'RIGHT']: 603 succnode_terminals = sorted((self.count_terminals(node=n), n) for n in nodes) 604 if (ladderize == 'right' or ladderize == 'RIGHT'): 605 succnode_terminals.reverse() 606 if succnode_terminals: 607 succnodes = zip(*succnode_terminals)[1] 608 else: 609 succnodes = [] 610 else: 611 succnodes = nodes 612 return succnodes
613 614 def newickize(node, ladderize=None): 615 """Convert a node tree to a newick tree recursively.""" 616 if not self.node(node).succ: # terminal 617 return self.node(node).data.taxon + make_info_string(self.node(node).data, terminal=True) 618 else: 619 succnodes = ladderize_nodes(self.node(node).succ, ladderize=ladderize) 620 subtrees = [newickize(sn, ladderize=ladderize) for sn in succnodes] 621 return '(%s)%s' % (','.join(subtrees), make_info_string(self.node(node).data)) 622 623 treeline = ['tree'] 624 if self.name: 625 treeline.append(self.name) 626 else: 627 treeline.append('a_tree') 628 treeline.append('=') 629 if self.weight != 1: 630 treeline.append('[&W%s]' % str(round(float(self.weight), 3))) 631 if self.rooted: 632 treeline.append('[&R]') 633 succnodes = ladderize_nodes(self.node(self.root).succ) 634 subtrees = [newickize(sn, ladderize=ladderize) for sn in succnodes] 635 treeline.append('(%s)' % ','.join(subtrees)) 636 if plain_newick: 637 return treeline[-1] 638 else: 639 return ' '.join(treeline) + ';' 640
641 - def __str__(self):
642 """Short version of to_string(), gives plain tree""" 643 return self.to_string(plain=True)
644
645 - def unroot(self):
646 """Defines a unrooted Tree structure, using data of a rooted Tree.""" 647 # travel down the rooted tree structure and save all branches and the nodes they connect 648 649 def _get_branches(node): 650 branches = [] 651 for b in self.node(node).succ: 652 branches.append([node, b, self.node(b).data.branchlength, self.node(b).data.support]) 653 branches.extend(_get_branches(b)) 654 return branches
655 656 self.unrooted = _get_branches(self.root) 657 # if root is bifurcating, then it is eliminated 658 if len(self.node(self.root).succ) == 2: 659 # find the two branches that connect to root 660 rootbranches = [b for b in self.unrooted if self.root in b[:2]] 661 b1 = self.unrooted.pop(self.unrooted.index(rootbranches[0])) 662 b2 = self.unrooted.pop(self.unrooted.index(rootbranches[1])) 663 # Connect them two each other. If both have support, it should be identical (or one set to None?). 664 # If both have branchlengths, they will be added 665 newbranch = [b1[1], b2[1], b1[2] + b2[2]] 666 if b1[3] is None: 667 newbranch.append(b2[3]) # either None (both rootbranches are unsupported) or some support 668 elif b2[3] is None: 669 newbranch.append(b1[3]) # dito 670 elif b1[3] == b2[3]: 671 newbranch.append(b1[3]) # identical support 672 elif b1[3] == 0 or b2[3] == 0: 673 newbranch.append(b1[3] + b2[3]) # one is 0, take the other 674 else: 675 raise TreeError('Support mismatch in bifurcating root: %f, %f' 676 % (float(b1[3]), float(b2[3]))) 677 self.unrooted.append(newbranch) 678
679 - def root_with_outgroup(self, outgroup=None):
680 681 def _connect_subtree(parent, child): 682 """Hook subtree starting with node child to parent.""" 683 for i, branch in enumerate(self.unrooted): 684 if parent in branch[:2] and child in branch[:2]: 685 branch = self.unrooted.pop(i) 686 break 687 else: 688 raise TreeError('Unable to connect nodes for rooting: nodes %d and %d are not connected' 689 % (parent, child)) 690 self.link(parent, child) 691 self.node(child).data.branchlength = branch[2] 692 self.node(child).data.support = branch[3] 693 # now check if there are more branches connected to the child, and if so, connect them 694 child_branches = [b for b in self.unrooted if child in b[:2]] 695 for b in child_branches: 696 if child == b[0]: 697 succ = b[1] 698 else: 699 succ = b[0] 700 _connect_subtree(child, succ)
701 702 # check the outgroup we're supposed to root with 703 if outgroup is None: 704 return self.root 705 outgroup_node = self.is_monophyletic(outgroup) 706 if outgroup_node == -1: 707 return -1 708 # if tree is already rooted with outgroup on a bifurcating root, 709 # or the outgroup includes all taxa on the tree, then we're fine 710 if (len(self.node(self.root).succ) == 2 and outgroup_node in self.node(self.root).succ) or outgroup_node == self.root: 711 return self.root 712 713 self.unroot() 714 # now we find the branch that connects outgroup and ingroup 715 # print(self.node(outgroup_node).prev) 716 for i, b in enumerate(self.unrooted): 717 if outgroup_node in b[:2] and self.node(outgroup_node).prev in b[:2]: 718 root_branch = self.unrooted.pop(i) 719 break 720 else: 721 raise TreeError('Unrooted and rooted Tree do not match') 722 if outgroup_node == root_branch[1]: 723 ingroup_node = root_branch[0] 724 else: 725 ingroup_node = root_branch[1] 726 # now we destroy the old tree structure, but keep node data. Nodes will be reconnected according to new outgroup 727 for n in self.all_ids(): 728 self.node(n).prev = None 729 self.node(n).succ = [] 730 # now we just add both subtrees (outgroup and ingroup) branch for branch 731 root = Nodes.Node(data=NodeData()) # new root 732 self.add(root) # add to tree description 733 self.root = root.id # set as root 734 self.unrooted.append([root.id, ingroup_node, root_branch[2], root_branch[3]]) # add branch to ingroup to unrooted tree 735 self.unrooted.append([root.id, outgroup_node, 0.0, 0.0]) # add branch to outgroup to unrooted tree 736 _connect_subtree(root.id, ingroup_node) # add ingroup 737 _connect_subtree(root.id, outgroup_node) # add outgroup 738 # if theres still a lonely node in self.chain, then it's the old root, and we delete it 739 oldroot = [i for i in self.all_ids() if self.node(i).prev is None and i != self.root] 740 if len(oldroot) > 1: 741 raise TreeError('Isolated nodes in tree description: %s' 742 % ','.join(oldroot)) 743 elif len(oldroot) == 1: 744 self.kill(oldroot[0]) 745 return self.root 746
747 - def merge_with_support(self, bstrees=None, constree=None, threshold=0.5, outgroup=None):
748 """Merges clade support (from consensus or list of bootstrap-trees) with phylogeny. 749 750 tree=merge_bootstrap(phylo,bs_tree=<list_of_trees>) 751 or 752 tree=merge_bootstrap(phylo,consree=consensus_tree with clade support) 753 """ 754 if bstrees and constree: 755 raise TreeError('Specify either list of bootstrap trees or consensus tree, not both') 756 if not (bstrees or constree): 757 raise TreeError('Specify either list of bootstrap trees or consensus tree.') 758 # no outgroup specified: use the smallest clade of the root 759 if outgroup is None: 760 try: 761 succnodes = self.node(self.root).succ 762 smallest = min((len(self.get_taxa(n)), n) for n in succnodes) 763 outgroup = self.get_taxa(smallest[1]) 764 except: 765 raise TreeError("Error determining outgroup.") 766 else: # root with user specified outgroup 767 self.root_with_outgroup(outgroup) 768 769 if bstrees: # calculate consensus 770 constree = consensus(bstrees, threshold=threshold, outgroup=outgroup) 771 else: 772 if not constree.has_support(): 773 constree.branchlength2support() 774 constree.root_with_outgroup(outgroup) 775 # now we travel all nodes, and add support from consensus, if the clade is present in both 776 for pnode in self._walk(): 777 cnode = constree.is_monophyletic(self.get_taxa(pnode)) 778 if cnode > -1: 779 self.node(pnode).data.support = constree.node(cnode).data.support
780 781
782 -def consensus(trees, threshold=0.5, outgroup=None):
783 """Compute a majority rule consensus tree of all clades with relative frequency>=threshold from a list of trees.""" 784 total = len(trees) 785 if total == 0: 786 return None 787 # shouldn't we make sure that it's NodeData or subclass?? 788 dataclass = trees[0].dataclass 789 max_support = trees[0].max_support 790 clades = {} 791 # countclades={} 792 alltaxa = set(trees[0].get_taxa()) 793 # calculate calde frequencies 794 c = 0 795 for t in trees: 796 c += 1 797 # if c%100==0: 798 # print(c) 799 if alltaxa != set(t.get_taxa()): 800 raise TreeError('Trees for consensus must contain the same taxa') 801 t.root_with_outgroup(outgroup=outgroup) 802 for st_node in t._walk(t.root): 803 subclade_taxa = sorted(t.get_taxa(st_node)) 804 subclade_taxa = str(subclade_taxa) # lists are not hashable 805 if subclade_taxa in clades: 806 clades[subclade_taxa] += float(t.weight) / total 807 else: 808 clades[subclade_taxa] = float(t.weight) / total 809 # if subclade_taxa in countclades: 810 # countclades[subclade_taxa]+=t.weight 811 # else: 812 # countclades[subclade_taxa]=t.weight 813 # weed out clades below threshold 814 delclades = [c for c, p in clades.items() if round(p, 3) < threshold] # round can be necessary 815 for c in delclades: 816 del clades[c] 817 # create a tree with a root node 818 consensus = Tree(name='consensus_%2.1f' % float(threshold), data=dataclass) 819 # each clade needs a node in the new tree, add them as isolated nodes 820 for c, s in clades.items(): 821 node = Nodes.Node(data=dataclass()) 822 node.data.support = s 823 node.data.taxon = set(eval(c)) 824 consensus.add(node) 825 # set root node data 826 consensus.node(consensus.root).data.support = None 827 consensus.node(consensus.root).data.taxon = alltaxa 828 # we sort the nodes by no. of taxa in the clade, so root will be the last 829 consensus_ids = consensus.all_ids() 830 consensus_ids.sort(lambda x, y: len(consensus.node(x).data.taxon) - len(consensus.node(y).data.taxon)) 831 # now we just have to hook each node to the next smallest node that includes all taxa of the current 832 for i, current in enumerate(consensus_ids[:-1]): # skip the last one which is the root 833 # print('----') 834 # print('current: %s' % consensus.node(current).data.taxon) 835 # search remaining nodes 836 for parent in consensus_ids[i + 1:]: 837 # print('parent: %s' % consensus.node(parent).data.taxon) 838 if consensus.node(parent).data.taxon.issuperset(consensus.node(current).data.taxon): 839 break 840 else: 841 sys.exit('corrupt tree structure?') 842 # internal nodes don't have taxa 843 if len(consensus.node(current).data.taxon) == 1: 844 consensus.node(current).data.taxon = consensus.node(current).data.taxon.pop() 845 # reset the support for terminal nodes to maximum 846 # consensus.node(current).data.support=max_support 847 else: 848 consensus.node(current).data.taxon = None 849 consensus.link(parent, current) 850 # eliminate root taxon name 851 consensus.node(consensus_ids[-1]).data.taxon = None 852 if alltaxa != set(consensus.get_taxa()): 853 raise TreeError('FATAL ERROR: consensus tree is corrupt') 854 return consensus
855