Package Bio :: Package Nexus :: Module Trees
[hide private]
[frames] | no frames]

Source Code for Module Bio.Nexus.Trees

  1  # Copyright 2005-2008 by Frank Kauff & Cymon J. Cox. All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license. Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5  # 
  6  # Bug reports welcome: fkauff@biologie.uni-kl.de or on Biopython's bugzilla. 
  7  """Tree class to handle phylogenetic trees. 
  8   
  9  Provides a set of methods to read and write newick-format tree descriptions, 
 10  get information about trees (monphyly of taxon sets, congruence between trees, 
 11  common ancestors,...) and to manipulate trees (reroot trees, split terminal 
 12  nodes). 
 13  """ 
 14   
 15  import random 
 16  import sys 
 17  import Nodes 
 18   
 19  PRECISION_BRANCHLENGTH=6 
 20  PRECISION_SUPPORT=6 
 21  NODECOMMENT_START='[&' 
 22  NODECOMMENT_END=']' 
 23   
 24   
25 -class TreeError(Exception):
26 pass
27 28
29 -class NodeData(object):
30 """Stores tree-relevant data associated with nodes (e.g. branches or otus)."""
31 - def __init__(self,taxon=None,branchlength=0.0,support=None,comment=None):
32 self.taxon=taxon 33 self.branchlength=branchlength 34 self.support=support 35 self.comment=comment
36 37
38 -class Tree(Nodes.Chain):
39 """Represents a tree using a chain of nodes with on predecessor (=ancestor) 40 and multiple successors (=subclades). 41 """ 42 # A newick tree is parsed into nested list and then converted to a node list in two stages 43 # mostly due to historical reasons. This could be done in one swoop). Note: parentheses ( ) and 44 # colon : are not allowed in taxon names. This is against NEXUS standard, but makes life much 45 # easier when parsing trees. 46 47 ## NOTE: Tree should store its data class in something like self.dataclass=data, 48 ## so that nodes that are generated have easy access to the data class 49 ## Some routines use automatically NodeData, this needs to be more concise 50
51 - def __init__(self,tree=None,weight=1.0,rooted=False,name='',data=NodeData,values_are_support=False,max_support=1.0):
52 """Ntree(self,tree).""" 53 Nodes.Chain.__init__(self) 54 self.dataclass=data 55 self.__values_are_support=values_are_support 56 self.max_support=max_support 57 self.weight=weight 58 self.rooted=rooted 59 self.name=name 60 root=Nodes.Node(data()) 61 self.root = self.add(root) 62 if tree: # use the tree we have 63 # if Tree is called from outside Nexus parser, we need to get rid of linebreaks, etc 64 tree=tree.strip().replace('\n','').replace('\r','') 65 # there's discrepancy whether newick allows semicolons et the end 66 tree=tree.rstrip(';') 67 subtree_info, base_info = self._parse(tree) 68 root.data = self._add_nodedata(root.data, [[], base_info]) 69 self._add_subtree(parent_id=root.id,tree=subtree_info)
70
71 - def _parse(self,tree):
72 """Parses (a,b,c...)[[[xx]:]yy] into subcomponents and travels down recursively.""" 73 #Remove any leading/trailing white space - want any string starting 74 #with " (..." should be recognised as a leaf, "(..." 75 tree = tree.strip() 76 if tree.count('(')!=tree.count(')'): 77 raise TreeError('Parentheses do not match in (sub)tree: '+tree) 78 if tree.count('(')==0: # a leaf 79 #check if there's a colon, or a special comment, or both after the taxon name 80 nodecomment=tree.find(NODECOMMENT_START) 81 colon=tree.find(':') 82 if colon==-1 and nodecomment==-1: # none 83 return [tree,[None]] 84 elif colon==-1 and nodecomment>-1: # only special comment 85 return [tree[:nodecomment],self._get_values(tree[nodecomment:])] 86 elif colon>-1 and nodecomment==-1: # only numerical values 87 return [tree[:colon],self._get_values(tree[colon+1:])] 88 elif colon < nodecomment: # taxon name ends at first colon or with special comment 89 return [tree[:colon],self._get_values(tree[colon+1:])] 90 else: 91 return [tree[:nodecomment],self._get_values(tree[nodecomment:])] 92 else: 93 closing=tree.rfind(')') 94 val=self._get_values(tree[closing+1:]) 95 if not val: 96 val=[None] 97 subtrees=[] 98 plevel=0 99 prev=1 100 for p in range(1,closing): 101 if tree[p]=='(': 102 plevel+=1 103 elif tree[p]==')': 104 plevel-=1 105 elif tree[p]==',' and plevel==0: 106 subtrees.append(tree[prev:p]) 107 prev=p+1 108 subtrees.append(tree[prev:closing]) 109 subclades=[self._parse(subtree) for subtree in subtrees] 110 return [subclades,val]
111
112 - def _add_subtree(self,parent_id=None,tree=None):
113 """Adds leaf or tree (in newick format) to a parent_id.""" 114 if parent_id is None: 115 raise TreeError('Need node_id to connect to.') 116 for st in tree: 117 nd=self.dataclass() 118 nd = self._add_nodedata(nd, st) 119 if type(st[0])==list: # it's a subtree 120 sn=Nodes.Node(nd) 121 self.add(sn,parent_id) 122 self._add_subtree(sn.id,st[0]) 123 else: # it's a leaf 124 nd.taxon=st[0] 125 leaf=Nodes.Node(nd) 126 self.add(leaf,parent_id)
127
128 - def _add_nodedata(self, nd, st):
129 """Add data to the node parsed from the comments, taxon and support. 130 """ 131 if isinstance(st[1][-1],str) and st[1][-1].startswith(NODECOMMENT_START): 132 nd.comment=st[1].pop(-1) 133 # if the first element is a string, it's the subtree node taxon 134 elif isinstance(st[1][0], str): 135 nd.taxon = st[1][0] 136 st[1] = st[1][1:] 137 if len(st)>1: 138 if len(st[1])>=2: # if there's two values, support comes first. Is that always so? 139 nd.support=st[1][0] 140 if st[1][1] is not None: 141 nd.branchlength=st[1][1] 142 elif len(st[1])==1: # otherwise it could be real branchlengths or support as branchlengths 143 if not self.__values_are_support: # default 144 if st[1][0] is not None: 145 nd.branchlength=st[1][0] 146 else: 147 nd.support=st[1][0] 148 return nd
149
150 - def _get_values(self, text):
151 """Extracts values (support/branchlength) from xx[:yyy], xx.""" 152 153 if text=='': 154 return None 155 nodecomment = None 156 if NODECOMMENT_START in text: # if there's a [&....] comment, cut it out 157 nc_start=text.find(NODECOMMENT_START) 158 nc_end=text.find(NODECOMMENT_END) 159 if nc_end==-1: 160 raise TreeError('Error in tree description: Found %s without matching %s' 161 % (NODECOMMENT_START, NODECOMMENT_END)) 162 nodecomment=text[nc_start:nc_end+1] 163 text=text[:nc_start]+text[nc_end+1:] 164 165 # pase out supports and branchlengths, with internal node taxa info 166 values = [] 167 taxonomy = None 168 for part in [t.strip() for t in text.split(":")]: 169 if part: 170 try: 171 values.append(float(part)) 172 except ValueError: 173 assert taxonomy is None, "Two string taxonomies?" 174 taxonomy = part 175 if taxonomy: 176 values.insert(0, taxonomy) 177 if nodecomment: 178 values.append(nodecomment) 179 return values
180
181 - def _walk(self,node=None):
182 """Return all node_ids downwards from a node.""" 183 184 if node is None: 185 node=self.root 186 for n in self.node(node).succ: 187 yield n 188 for sn in self._walk(n): 189 yield sn
190
191 - def node(self,node_id):
192 """Return the instance of node_id. 193 194 node = node(self,node_id) 195 """ 196 if node_id not in self.chain: 197 raise TreeError('Unknown node_id: %d' % node_id) 198 return self.chain[node_id]
199
200 - def split(self,parent_id=None,n=2,branchlength=1.0):
201 """Speciation: generates n (default two) descendants of a node. 202 203 [new ids] = split(self,parent_id=None,n=2,branchlength=1.0): 204 """ 205 if parent_id is None: 206 raise TreeError('Missing node_id.') 207 ids=[] 208 parent_data=self.chain[parent_id].data 209 for i in range(n): 210 node=Nodes.Node() 211 if parent_data: 212 node.data=self.dataclass() 213 # each node has taxon and branchlength attribute 214 if parent_data.taxon: 215 node.data.taxon=parent_data.taxon+str(i) 216 node.data.branchlength=branchlength 217 ids.append(self.add(node,parent_id)) 218 return ids
219
220 - def search_taxon(self,taxon):
221 """Returns the first matching taxon in self.data.taxon. Not restricted to terminal nodes. 222 223 node_id = search_taxon(self,taxon) 224 """ 225 for id,node in self.chain.iteritems(): 226 if node.data.taxon==taxon: 227 return id 228 return None
229
230 - def prune(self,taxon):
231 """Prunes a terminal taxon from the tree. 232 233 id_of_previous_node = prune(self,taxon) 234 If taxon is from a bifurcation, the connectiong node will be collapsed 235 and its branchlength added to remaining terminal node. This might be no 236 longer a meaningful value' 237 """ 238 239 id=self.search_taxon(taxon) 240 if id is None: 241 raise TreeError('Taxon not found: %s' % taxon) 242 elif id not in self.get_terminals(): 243 raise TreeError('Not a terminal taxon: %s' % taxon) 244 else: 245 prev=self.unlink(id) 246 self.kill(id) 247 if len(self.node(prev).succ)==1: 248 if prev==self.root: # we deleted one branch of a bifurcating root, then we have to move the root upwards 249 self.root=self.node(self.root).succ[0] 250 self.node(self.root).branchlength=0.0 251 self.kill(prev) 252 else: 253 succ=self.node(prev).succ[0] 254 new_bl=self.node(prev).data.branchlength+self.node(succ).data.branchlength 255 self.collapse(prev) 256 self.node(succ).data.branchlength=new_bl 257 return prev
258
259 - def get_taxa(self,node_id=None):
260 """Return a list of all otus downwards from a node. 261 262 nodes = get_taxa(self,node_id=None) 263 """ 264 265 if node_id is None: 266 node_id=self.root 267 if node_id not in self.chain: 268 raise TreeError('Unknown node_id: %d.' % node_id) 269 if self.chain[node_id].succ==[]: 270 if self.chain[node_id].data: 271 return [self.chain[node_id].data.taxon] 272 else: 273 return None 274 else: 275 list=[] 276 for succ in self.chain[node_id].succ: 277 list.extend(self.get_taxa(succ)) 278 return list
279
280 - def get_terminals(self):
281 """Return a list of all terminal nodes.""" 282 return [i for i in self.all_ids() if self.node(i).succ==[]]
283
284 - def is_terminal(self,node):
285 """Returns True if node is a terminal node.""" 286 return self.node(node).succ==[]
287
288 - def is_internal(self,node):
289 """Returns True if node is an internal node.""" 290 return len(self.node(node).succ)>0
291
292 - def is_preterminal(self,node):
293 """Returns True if all successors of a node are terminal ones.""" 294 if self.is_terminal(node): 295 return False not in [self.is_terminal(n) for n in self.node(node).succ] 296 else: 297 return False
298
299 - def count_terminals(self,node=None):
300 """Counts the number of terminal nodes that are attached to a node.""" 301 if node is None: 302 node=self.root 303 return len([n for n in self._walk(node) if self.is_terminal(n)])
304
305 - def collapse_genera(self,space_equals_underscore=True):
306 """Collapses all subtrees which belong to the same genus (i.e share the same first word in their taxon name.)""" 307 308 while True: 309 for n in self._walk(): 310 if self.is_terminal(n): 311 continue 312 taxa=self.get_taxa(n) 313 genera=[] 314 for t in taxa: 315 if space_equals_underscore: 316 t=t.replace(' ','_') 317 try: 318 genus=t.split('_',1)[0] 319 except: 320 genus='None' 321 if genus not in genera: 322 genera.append(genus) 323 if len(genera)==1: 324 self.node(n).data.taxon=genera[0]+' <collapsed>' 325 #now we kill all nodes downstream 326 nodes2kill=[kn for kn in self._walk(node=n)] 327 for kn in nodes2kill: 328 self.kill(kn) 329 self.node(n).succ=[] 330 break # break out of for loop because node list from _walk will be inconsistent 331 else: # for loop exhausted: no genera to collapse left 332 break # while
333
334 - def sum_branchlength(self,root=None,node=None):
335 """Adds up the branchlengths from root (default self.root) to node. 336 337 sum = sum_branchlength(self,root=None,node=None) 338 """ 339 340 if root is None: 341 root=self.root 342 if node is None: 343 raise TreeError('Missing node id.') 344 blen=0.0 345 while node is not None and node is not root: 346 blen+=self.node(node).data.branchlength 347 node=self.node(node).prev 348 return blen
349
350 - def set_subtree(self,node):
351 """Return subtree as a set of nested sets. 352 353 sets = set_subtree(self,node) 354 """ 355 356 if self.node(node).succ==[]: 357 return self.node(node).data.taxon 358 else: 359 try: 360 return frozenset([self.set_subtree(n) for n in self.node(node).succ]) 361 except: 362 print node 363 print self.node(node).succ 364 for n in self.node(node).succ: 365 print n, self.set_subtree(n) 366 print [self.set_subtree(n) for n in self.node(node).succ] 367 raise
368
369 - def is_identical(self,tree2):
370 """Compare tree and tree2 for identity. 371 372 result = is_identical(self,tree2) 373 """ 374 return self.set_subtree(self.root)==tree2.set_subtree(tree2.root)
375
376 - def is_compatible(self,tree2,threshold,strict=True):
377 """Compares branches with support>threshold for compatibility. 378 379 result = is_compatible(self,tree2,threshold) 380 """ 381 382 # check if both trees have the same set of taxa. strict=True enforces this. 383 missing2=set(self.get_taxa())-set(tree2.get_taxa()) 384 missing1=set(tree2.get_taxa())-set(self.get_taxa()) 385 if strict and (missing1 or missing2): 386 if missing1: 387 print 'Taxon/taxa %s is/are missing in tree %s' % (','.join(missing1) , self.name) 388 if missing2: 389 print 'Taxon/taxa %s is/are missing in tree %s' % (','.join(missing2) , tree2.name) 390 raise TreeError('Can\'t compare trees with different taxon compositions.') 391 t1=[(set(self.get_taxa(n)),self.node(n).data.support) for n in self.all_ids() if 392 self.node(n).succ and 393 (self.node(n).data and self.node(n).data.support and self.node(n).data.support>=threshold)] 394 t2=[(set(tree2.get_taxa(n)),tree2.node(n).data.support) for n in tree2.all_ids() if 395 tree2.node(n).succ and 396 (tree2.node(n).data and tree2.node(n).data.support and tree2.node(n).data.support>=threshold)] 397 conflict=[] 398 for (st1,sup1) in t1: 399 for (st2,sup2) in t2: 400 if not st1.issubset(st2) and not st2.issubset(st1): # don't hiccup on upstream nodes 401 intersect,notin1,notin2=st1 & st2, st2-st1, st1-st2 # all three are non-empty sets 402 # if notin1==missing1 or notin2==missing2 <==> st1.issubset(st2) or st2.issubset(st1) ??? 403 if intersect and not (notin1.issubset(missing1) or notin2.issubset(missing2)): # omit conflicts due to missing taxa 404 conflict.append((st1,sup1,st2,sup2,intersect,notin1,notin2)) 405 return conflict
406
407 - def common_ancestor(self,node1,node2):
408 """Return the common ancestor that connects two nodes. 409 410 node_id = common_ancestor(self,node1,node2) 411 """ 412 413 l1=[self.root]+self.trace(self.root,node1) 414 l2=[self.root]+self.trace(self.root,node2) 415 return [n for n in l1 if n in l2][-1]
416
417 - def distance(self,node1,node2):
418 """Add and return the sum of the branchlengths between two nodes. 419 dist = distance(self,node1,node2) 420 """ 421 422 ca=self.common_ancestor(node1,node2) 423 return self.sum_branchlength(ca,node1)+self.sum_branchlength(ca,node2)
424
425 - def is_monophyletic(self,taxon_list):
426 """Return node_id of common ancestor if taxon_list is monophyletic, -1 otherwise. 427 428 result = is_monophyletic(self,taxon_list) 429 """ 430 if isinstance(taxon_list,str): 431 taxon_set=set([taxon_list]) 432 else: 433 taxon_set=set(taxon_list) 434 node_id=self.root 435 while 1: 436 subclade_taxa=set(self.get_taxa(node_id)) 437 if subclade_taxa==taxon_set: # are we there? 438 return node_id 439 else: # check subnodes 440 for subnode in self.chain[node_id].succ: 441 if set(self.get_taxa(subnode)).issuperset(taxon_set): # taxon_set is downstream 442 node_id=subnode 443 break # out of for loop 444 else: 445 return -1 # taxon set was not with successors, for loop exhausted
446
447 - def is_bifurcating(self,node=None):
448 """Return True if tree downstream of node is strictly bifurcating.""" 449 if node is None: 450 node=self.root 451 if node==self.root and len(self.node(node).succ)==3: # root can be trifurcating, because it has no ancestor 452 return self.is_bifurcating(self.node(node).succ[0]) and \ 453 self.is_bifurcating(self.node(node).succ[1]) and \ 454 self.is_bifurcating(self.node(node).succ[2]) 455 if len(self.node(node).succ)==2: 456 return self.is_bifurcating(self.node(node).succ[0]) and self.is_bifurcating(self.node(node).succ[1]) 457 elif len(self.node(node).succ)==0: 458 return True 459 else: 460 return False
461
462 - def branchlength2support(self):
463 """Move values stored in data.branchlength to data.support, and set branchlength to 0.0 464 465 This is necessary when support has been stored as branchlength (e.g. paup), and has thus 466 been read in as branchlength. 467 """ 468 469 for n in self.chain: 470 self.node(n).data.support=self.node(n).data.branchlength 471 self.node(n).data.branchlength=0.0
472
473 - def convert_absolute_support(self,nrep):
474 """Convert absolute support (clade-count) to rel. frequencies. 475 476 Some software (e.g. PHYLIP consense) just calculate how often clades appear, instead of 477 calculating relative frequencies.""" 478 479 for n in self._walk(): 480 if self.node(n).data.support: 481 self.node(n).data.support/=float(nrep)
482
483 - def has_support(self,node=None):
484 """Returns True if any of the nodes has data.support != None.""" 485 for n in self._walk(node): 486 if self.node(n).data.support: 487 return True 488 else: 489 return False
490
491 - def randomize(self,ntax=None,taxon_list=None,branchlength=1.0,branchlength_sd=None,bifurcate=True):
492 """Generates a random tree with ntax taxa and/or taxa from taxlabels. 493 494 new_tree = randomize(self,ntax=None,taxon_list=None,branchlength=1.0,branchlength_sd=None,bifurcate=True) 495 Trees are bifurcating by default. (Polytomies not yet supported). 496 """ 497 498 if not ntax and taxon_list: 499 ntax=len(taxon_list) 500 elif not taxon_list and ntax: 501 taxon_list=['taxon'+str(i+1) for i in range(ntax)] 502 elif not ntax and not taxon_list: 503 raise TreeError('Either numer of taxa or list of taxa must be specified.') 504 elif ntax != len(taxon_list): 505 raise TreeError('Length of taxon list must correspond to ntax.') 506 # initiate self with empty root 507 self.__init__() 508 terminals=self.get_terminals() 509 # bifurcate randomly at terminal nodes until ntax is reached 510 while len(terminals)<ntax: 511 newsplit=random.choice(terminals) 512 new_terminals=self.split(parent_id=newsplit,branchlength=branchlength) 513 # if desired, give some variation to the branch length 514 if branchlength_sd: 515 for nt in new_terminals: 516 bl=random.gauss(branchlength,branchlength_sd) 517 if bl<0: 518 bl=0 519 self.node(nt).data.branchlength=bl 520 terminals.extend(new_terminals) 521 terminals.remove(newsplit) 522 # distribute taxon labels randomly 523 random.shuffle(taxon_list) 524 for (node,name) in zip(terminals,taxon_list): 525 self.node(node).data.taxon=name
526
527 - def display(self):
528 """Quick and dirty lists of all nodes.""" 529 table=[('#','taxon','prev','succ','brlen','blen (sum)','support','comment')] 530 #Sort this to be consistent across CPython, Jython, etc 531 for i in sorted(self.all_ids()): 532 n=self.node(i) 533 if not n.data: 534 table.append((str(i),'-',str(n.prev),str(n.succ),'-','-','-','-')) 535 else: 536 tx=n.data.taxon 537 if not tx: 538 tx='-' 539 blength="%0.2f" % n.data.branchlength 540 if blength is None: 541 blength='-' 542 sum_blength='-' 543 else: 544 sum_blength="%0.2f" % self.sum_branchlength(node=i) 545 support=n.data.support 546 if support is None: 547 support='-' 548 else: 549 support="%0.2f" % support 550 comment=n.data.comment 551 if comment is None: 552 comment='-' 553 table.append((str(i),tx,str(n.prev),str(n.succ), 554 blength, sum_blength, support, comment)) 555 print '\n'.join(['%3s %32s %15s %15s %8s %10s %8s %20s' % l for l in table]) 556 print '\nRoot: ',self.root
557
558 - def to_string(self,support_as_branchlengths=False,branchlengths_only=False,plain=True,plain_newick=False,ladderize=None,ignore_comments=True):
559 """Return a paup compatible tree line.""" 560 # if there's a conflict in the arguments, we override plain=True 561 if support_as_branchlengths or branchlengths_only: 562 plain=False 563 self.support_as_branchlengths=support_as_branchlengths 564 self.branchlengths_only=branchlengths_only 565 self.ignore_comments=ignore_comments 566 self.plain=plain 567 568 def make_info_string(data,terminal=False): 569 """Creates nicely formatted support/branchlengths.""" 570 # CHECK FORMATTING 571 if self.plain: # plain tree only. That's easy. 572 info_string= '' 573 elif self.support_as_branchlengths: # support as branchlengths (eg. PAUP), ignore actual branchlengths 574 if terminal: # terminal branches have 100% support 575 info_string= ':%1.2f' % self.max_support 576 elif data.support: 577 info_string= ':%1.2f' % (data.support) 578 else: 579 info_string=':0.00' 580 elif self.branchlengths_only: # write only branchlengths, ignore support 581 info_string= ':%1.5f' % (data.branchlength) 582 else: # write suport and branchlengths (e.g. .con tree of mrbayes) 583 if terminal: 584 info_string= ':%1.5f' % (data.branchlength) 585 else: 586 if data.branchlength is not None and data.support is not None: # we have blen and suppport 587 info_string= '%1.2f:%1.5f' % (data.support,data.branchlength) 588 elif data.branchlength is not None: # we have only blen 589 info_string= '0.00000:%1.5f' % (data.branchlength) 590 elif data.support is not None: # we have only support 591 info_string= '%1.2f:0.00000' % (data.support) 592 else: 593 info_string= '0.00:0.00000' 594 if not ignore_comments and hasattr(data,'nodecomment'): 595 info_string=str(data.nodecomment)+info_string 596 return info_string
597 598 def ladderize_nodes(nodes,ladderize=None): 599 """Sorts node numbers according to the number of terminal nodes.""" 600 if ladderize in ['left','LEFT','right','RIGHT']: 601 succnode_terminals=[(self.count_terminals(node=n),n) for n in nodes] 602 succnode_terminals.sort() 603 if (ladderize=='right' or ladderize=='RIGHT'): 604 succnode_terminals.reverse() 605 if succnode_terminals: 606 succnodes=zip(*succnode_terminals)[1] 607 else: 608 succnodes=[] 609 else: 610 succnodes=nodes 611 return succnodes
612 613 def newickize(node,ladderize=None): 614 """Convert a node tree to a newick tree recursively.""" 615 616 if not self.node(node).succ: # terminal 617 return self.node(node).data.taxon+make_info_string(self.node(node).data,terminal=True) 618 else: 619 succnodes=ladderize_nodes(self.node(node).succ,ladderize=ladderize) 620 subtrees=[newickize(sn,ladderize=ladderize) for sn in succnodes] 621 return '(%s)%s' % (','.join(subtrees),make_info_string(self.node(node).data)) 622 623 treeline=['tree'] 624 if self.name: 625 treeline.append(self.name) 626 else: 627 treeline.append('a_tree') 628 treeline.append('=') 629 if self.weight != 1: 630 treeline.append('[&W%s]' % str(round(float(self.weight),3))) 631 if self.rooted: 632 treeline.append('[&R]') 633 succnodes=ladderize_nodes(self.node(self.root).succ) 634 subtrees=[newickize(sn,ladderize=ladderize) for sn in succnodes] 635 treeline.append('(%s)' % ','.join(subtrees)) 636 if plain_newick: 637 return treeline[-1] 638 else: 639 return ' '.join(treeline)+';' 640
641 - def __str__(self):
642 """Short version of to_string(), gives plain tree""" 643 return self.to_string(plain=True)
644
645 - def unroot(self):
646 """Defines a unrooted Tree structure, using data of a rooted Tree.""" 647 648 # travel down the rooted tree structure and save all branches and the nodes they connect 649 650 def _get_branches(node): 651 branches=[] 652 for b in self.node(node).succ: 653 branches.append([node,b,self.node(b).data.branchlength,self.node(b).data.support]) 654 branches.extend(_get_branches(b)) 655 return branches
656 657 self.unrooted=_get_branches(self.root) 658 # if root is bifurcating, then it is eliminated 659 if len(self.node(self.root).succ)==2: 660 # find the two branches that connect to root 661 rootbranches=[b for b in self.unrooted if self.root in b[:2]] 662 b1=self.unrooted.pop(self.unrooted.index(rootbranches[0])) 663 b2=self.unrooted.pop(self.unrooted.index(rootbranches[1])) 664 # Connect them two each other. If both have support, it should be identical (or one set to None?). 665 # If both have branchlengths, they will be added 666 newbranch=[b1[1],b2[1],b1[2]+b2[2]] 667 if b1[3] is None: 668 newbranch.append(b2[3]) # either None (both rootbranches are unsupported) or some support 669 elif b2[3] is None: 670 newbranch.append(b1[3]) # dito 671 elif b1[3]==b2[3]: 672 newbranch.append(b1[3]) # identical support 673 elif b1[3]==0 or b2[3]==0: 674 newbranch.append(b1[3]+b2[3]) # one is 0, take the other 675 else: 676 raise TreeError('Support mismatch in bifurcating root: %f, %f' 677 % (float(b1[3]),float(b2[3]))) 678 self.unrooted.append(newbranch) 679
680 - def root_with_outgroup(self,outgroup=None):
681 682 def _connect_subtree(parent,child): 683 """Hook subtree starting with node child to parent.""" 684 for i,branch in enumerate(self.unrooted): 685 if parent in branch[:2] and child in branch[:2]: 686 branch=self.unrooted.pop(i) 687 break 688 else: 689 raise TreeError('Unable to connect nodes for rooting: nodes %d and %d are not connected' 690 % (parent,child)) 691 self.link(parent,child) 692 self.node(child).data.branchlength=branch[2] 693 self.node(child).data.support=branch[3] 694 #now check if there are more branches connected to the child, and if so, connect them 695 child_branches=[b for b in self.unrooted if child in b[:2]] 696 for b in child_branches: 697 if child==b[0]: 698 succ=b[1] 699 else: 700 succ=b[0] 701 _connect_subtree(child,succ)
702 703 # check the outgroup we're supposed to root with 704 if outgroup is None: 705 return self.root 706 outgroup_node=self.is_monophyletic(outgroup) 707 if outgroup_node==-1: 708 return -1 709 # if tree is already rooted with outgroup on a bifurcating root, 710 # or the outgroup includes all taxa on the tree, then we're fine 711 if (len(self.node(self.root).succ)==2 and outgroup_node in self.node(self.root).succ) or outgroup_node==self.root: 712 return self.root 713 714 self.unroot() 715 # now we find the branch that connects outgroup and ingroup 716 #print self.node(outgroup_node).prev 717 for i,b in enumerate(self.unrooted): 718 if outgroup_node in b[:2] and self.node(outgroup_node).prev in b[:2]: 719 root_branch=self.unrooted.pop(i) 720 break 721 else: 722 raise TreeError('Unrooted and rooted Tree do not match') 723 if outgroup_node==root_branch[1]: 724 ingroup_node=root_branch[0] 725 else: 726 ingroup_node=root_branch[1] 727 # now we destroy the old tree structure, but keep node data. Nodes will be reconnected according to new outgroup 728 for n in self.all_ids(): 729 self.node(n).prev=None 730 self.node(n).succ=[] 731 # now we just add both subtrees (outgroup and ingroup) branch for branch 732 root=Nodes.Node(data=NodeData()) # new root 733 self.add(root) # add to tree description 734 self.root=root.id # set as root 735 self.unrooted.append([root.id,ingroup_node,root_branch[2],root_branch[3]]) # add branch to ingroup to unrooted tree 736 self.unrooted.append([root.id,outgroup_node,0.0,0.0]) # add branch to outgroup to unrooted tree 737 _connect_subtree(root.id,ingroup_node) # add ingroup 738 _connect_subtree(root.id,outgroup_node) # add outgroup 739 # if theres still a lonely node in self.chain, then it's the old root, and we delete it 740 oldroot=[i for i in self.all_ids() if self.node(i).prev is None and i!=self.root] 741 if len(oldroot)>1: 742 raise TreeError('Isolated nodes in tree description: %s' 743 % ','.join(oldroot)) 744 elif len(oldroot)==1: 745 self.kill(oldroot[0]) 746 return self.root 747
748 - def merge_with_support(self,bstrees=None,constree=None,threshold=0.5,outgroup=None):
749 """Merges clade support (from consensus or list of bootstrap-trees) with phylogeny. 750 751 tree=merge_bootstrap(phylo,bs_tree=<list_of_trees>) 752 or 753 tree=merge_bootstrap(phylo,consree=consensus_tree with clade support) 754 """ 755 756 if bstrees and constree: 757 raise TreeError('Specify either list of bootstrap trees or consensus tree, not both') 758 if not (bstrees or constree): 759 raise TreeError('Specify either list of bootstrap trees or consensus tree.') 760 # no outgroup specified: use the smallest clade of the root 761 if outgroup is None: 762 try: 763 succnodes=self.node(self.root).succ 764 smallest=min([(len(self.get_taxa(n)),n) for n in succnodes]) 765 outgroup=self.get_taxa(smallest[1]) 766 except: 767 raise TreeError("Error determining outgroup.") 768 else: # root with user specified outgroup 769 self.root_with_outgroup(outgroup) 770 771 if bstrees: # calculate consensus 772 constree=consensus(bstrees,threshold=threshold,outgroup=outgroup) 773 else: 774 if not constree.has_support(): 775 constree.branchlength2support() 776 constree.root_with_outgroup(outgroup) 777 # now we travel all nodes, and add support from consensus, if the clade is present in both 778 for pnode in self._walk(): 779 cnode=constree.is_monophyletic(self.get_taxa(pnode)) 780 if cnode>-1: 781 self.node(pnode).data.support=constree.node(cnode).data.support
782 783
784 -def consensus(trees, threshold=0.5,outgroup=None):
785 """Compute a majority rule consensus tree of all clades with relative frequency>=threshold from a list of trees.""" 786 787 total=len(trees) 788 if total==0: 789 return None 790 # shouldn't we make sure that it's NodeData or subclass?? 791 dataclass=trees[0].dataclass 792 max_support=trees[0].max_support 793 clades={} 794 #countclades={} 795 alltaxa=set(trees[0].get_taxa()) 796 # calculate calde frequencies 797 c=0 798 for t in trees: 799 c+=1 800 #if c%100==0: 801 # print c 802 if alltaxa!=set(t.get_taxa()): 803 raise TreeError('Trees for consensus must contain the same taxa') 804 t.root_with_outgroup(outgroup=outgroup) 805 for st_node in t._walk(t.root): 806 subclade_taxa=t.get_taxa(st_node) 807 subclade_taxa.sort() 808 subclade_taxa=str(subclade_taxa) # lists are not hashable 809 if subclade_taxa in clades: 810 clades[subclade_taxa]+=float(t.weight)/total 811 else: 812 clades[subclade_taxa]=float(t.weight)/total 813 #if subclade_taxa in countclades: 814 # countclades[subclade_taxa]+=t.weight 815 #else: 816 # countclades[subclade_taxa]=t.weight 817 # weed out clades below threshold 818 delclades=[c for c,p in clades.iteritems() if round(p,3)<threshold] # round can be necessary 819 for c in delclades: 820 del clades[c] 821 # create a tree with a root node 822 consensus=Tree(name='consensus_%2.1f' % float(threshold),data=dataclass) 823 # each clade needs a node in the new tree, add them as isolated nodes 824 for c, s in clades.iteritems(): 825 node=Nodes.Node(data=dataclass()) 826 node.data.support=s 827 node.data.taxon=set(eval(c)) 828 consensus.add(node) 829 # set root node data 830 consensus.node(consensus.root).data.support=None 831 consensus.node(consensus.root).data.taxon=alltaxa 832 # we sort the nodes by no. of taxa in the clade, so root will be the last 833 consensus_ids=consensus.all_ids() 834 consensus_ids.sort(lambda x,y:len(consensus.node(x).data.taxon)-len(consensus.node(y).data.taxon)) 835 # now we just have to hook each node to the next smallest node that includes all taxa of the current 836 for i,current in enumerate(consensus_ids[:-1]): # skip the last one which is the root 837 #print '----' 838 #print 'current: ',consensus.node(current).data.taxon 839 # search remaining nodes 840 for parent in consensus_ids[i+1:]: 841 #print 'parent: ',consensus.node(parent).data.taxon 842 if consensus.node(parent).data.taxon.issuperset(consensus.node(current).data.taxon): 843 break 844 else: 845 sys.exit('corrupt tree structure?') 846 # internal nodes don't have taxa 847 if len(consensus.node(current).data.taxon)==1: 848 consensus.node(current).data.taxon=consensus.node(current).data.taxon.pop() 849 # reset the support for terminal nodes to maximum 850 #consensus.node(current).data.support=max_support 851 else: 852 consensus.node(current).data.taxon=None 853 consensus.link(parent,current) 854 # eliminate root taxon name 855 consensus.node(consensus_ids[-1]).data.taxon=None 856 if alltaxa != set(consensus.get_taxa()): 857 raise TreeError('FATAL ERROR: consensus tree is corrupt') 858 return consensus
859