Package Bio :: Package Phylo :: Module NewickIO
[hide private]
[frames] | no frames]

Source Code for Module Bio.Phylo.NewickIO

  1  # Copyright (C) 2009 by Eric Talevich (eric.talevich@gmail.com) 
  2  # Based on Bio.Nexus, copyright 2005-2008 by Frank Kauff & Cymon J. Cox. 
  3  # All rights reserved. 
  4  # This code is part of the Biopython distribution and governed by its 
  5  # license. Please see the LICENSE file that should have been included 
  6  # as part of this package. 
  7   
  8  """I/O function wrappers for the Newick file format. 
  9   
 10  See: http://evolution.genetics.washington.edu/phylip/newick_doc.html 
 11  """ 
 12  __docformat__ = "restructuredtext en" 
 13   
 14  from cStringIO import StringIO 
 15   
 16  from Bio.Phylo import Newick 
 17   
 18  # Definitions retrieved from Bio.Nexus.Trees 
 19  NODECOMMENT_START = '[&' 
 20  NODECOMMENT_END = ']' 
21 22 23 -class NewickError(Exception):
24 """Exception raised when Newick object construction cannot continue.""" 25 pass
26
27 28 # --------------------------------------------------------- 29 # Public API 30 31 -def parse(handle, **kwargs):
32 """Iterate over the trees in a Newick file handle. 33 34 :returns: generator of Bio.Phylo.Newick.Tree objects. 35 """ 36 return Parser(handle).parse(**kwargs)
37
38 39 -def write(trees, handle, plain=False, **kwargs):
40 """Write a trees in Newick format to the given file handle. 41 42 :returns: number of trees written. 43 """ 44 return Writer(trees).write(handle, plain=plain, **kwargs)
45
46 47 # --------------------------------------------------------- 48 # Input 49 50 -class Parser(object):
51 """Parse a Newick tree given a file handle. 52 53 Based on the parser in `Bio.Nexus.Trees`. 54 """ 55
56 - def __init__(self, handle):
57 self.handle = handle
58 59 @classmethod
60 - def from_string(cls, treetext):
61 handle = StringIO(treetext) 62 return cls(handle)
63
64 - def parse(self, values_are_confidence=False, rooted=False):
65 """Parse the text stream this object was initialized with.""" 66 self.values_are_confidence = values_are_confidence 67 self.rooted = rooted # XXX this attribue is useless 68 buf = '' 69 for line in self.handle: 70 buf += line.rstrip() 71 if buf.endswith(';'): 72 yield self._parse_tree(buf, rooted) 73 buf = '' 74 if buf: 75 # Last tree is missing a terminal ';' character -- that's OK 76 yield self._parse_tree(buf, rooted)
77
78 - def _parse_tree(self, text, rooted):
79 """Parses the text representation into an Tree object.""" 80 # XXX Pass **kwargs along from Parser.parse? 81 return Newick.Tree(root=self._parse_subtree(text), rooted=self.rooted)
82
83 - def _parse_subtree(self, text):
84 """Parse ``(a,b,c...)[[[xx]:]yy]`` into subcomponents, recursively.""" 85 text = text.strip().rstrip(';') 86 if text.count('(')!=text.count(')'): 87 raise NewickError("Parentheses do not match in (sub)tree: " + text) 88 # Text is now "(...)..." (balanced parens) or "..." (leaf node) 89 if text.count('(') == 0: 90 # Leaf/terminal node -- recursion stops here 91 return self._parse_tag(text) 92 # Handle one layer of the nested subtree 93 # XXX what if there's a paren in a comment or other string? 94 close_posn = text.rfind(')') 95 subtrees = [] 96 # Locate subtrees by counting nesting levels of parens 97 plevel = 0 98 prev = 1 99 for posn in range(1, close_posn): 100 if text[posn] == '(': 101 plevel += 1 102 elif text[posn] == ')': 103 plevel -= 1 104 elif text[posn] == ',' and plevel == 0: 105 subtrees.append(text[prev:posn]) 106 prev = posn + 1 107 subtrees.append(text[prev:close_posn]) 108 # Construct a new clade from trailing text, then attach subclades 109 clade = self._parse_tag(text[close_posn+1:]) 110 clade.clades = [self._parse_subtree(st) for st in subtrees] 111 return clade
112
113 - def _parse_tag(self, text):
114 """Extract the data for a node from text. 115 116 :returns: Clade instance containing any available data 117 """ 118 # Extract the comment 119 comment_start = text.find(NODECOMMENT_START) 120 if comment_start != -1: 121 comment_end = text.find(NODECOMMENT_END) 122 if comment_end == -1: 123 raise NewickError('Error in tree description: ' 124 'Found %s without matching %s' 125 % (NODECOMMENT_START, NODECOMMENT_END)) 126 comment = text[comment_start+len(NODECOMMENT_START):comment_end] 127 text = text[:comment_start] + text[comment_end+len(NODECOMMENT_END):] 128 else: 129 comment = None 130 clade = Newick.Clade(comment=comment) 131 # Extract name (taxon), and optionally support, branch length 132 # Float values are support and branch length, the string is name/taxon 133 values = [] 134 for part in (t.strip() for t in text.split(':')): 135 if part: 136 try: 137 values.append(float(part)) 138 except ValueError: 139 assert clade.name is None, "Two string taxonomies?" 140 clade.name = part 141 if len(values) == 1: 142 # Real branch length, or support as branch length 143 if self.values_are_confidence: 144 clade.confidence = values[0] 145 else: 146 clade.branch_length = values[0] 147 elif len(values) == 2: 148 # Two non-taxon values: support comes first. (Is that always so?) 149 clade.confidence, clade.branch_length = values 150 elif len(values) > 2: 151 raise NewickError("Too many colons in tag: " + text) 152 return clade
153
154 155 # --------------------------------------------------------- 156 # Output 157 158 -class Writer(object):
159 """Based on the writer in Bio.Nexus.Trees (str, to_string).""" 160
161 - def __init__(self, trees):
162 self.trees = trees
163
164 - def write(self, handle, **kwargs):
165 """Write this instance's trees to a file handle.""" 166 count = 0 167 for treestr in self.to_strings(**kwargs): 168 handle.write(treestr + '\n') 169 count += 1 170 return count
171
172 - def to_strings(self, confidence_as_branch_length=False, 173 branch_length_only=False, plain=False, 174 plain_newick=True, ladderize=None, max_confidence=1.0, 175 format_confidence='%1.2f', format_branch_length='%1.5f'):
176 """Return an iterable of PAUP-compatible tree lines.""" 177 # If there's a conflict in the arguments, we override plain=True 178 if confidence_as_branch_length or branch_length_only: 179 plain = False 180 make_info_string = self._info_factory(plain, 181 confidence_as_branch_length, branch_length_only, max_confidence, 182 format_confidence, format_branch_length) 183 184 def newickize(clade): 185 """Convert a node tree to a Newick tree string, recursively.""" 186 if clade.is_terminal(): # terminal 187 return ((clade.name or '') 188 + make_info_string(clade, terminal=True)) 189 else: 190 subtrees = (newickize(sub) for sub in clade) 191 return '(%s)%s' % (','.join(subtrees), 192 (clade.name or '') + make_info_string(clade))
193 194 # Convert each tree to a string 195 for tree in self.trees: 196 if ladderize in ('left', 'LEFT', 'right', 'RIGHT'): 197 # Nexus compatibility shim, kind of 198 tree.ladderize(reverse=(ladderize in ('right', 'RIGHT'))) 199 rawtree = newickize(tree.root) + ';' 200 if plain_newick: 201 yield rawtree 202 continue 203 # Nexus-style (?) notation before the raw Newick tree 204 treeline = ['tree', (tree.name or 'a_tree'), '='] 205 if tree.weight != 1: 206 treeline.append('[&W%s]' % round(float(tree.weight), 3)) 207 if tree.rooted: 208 treeline.append('[&R]') 209 treeline.append(rawtree) 210 yield ' '.join(treeline)
211
212 - def _info_factory(self, plain, confidence_as_branch_length, 213 branch_length_only, max_confidence, format_confidence, 214 format_branch_length):
215 """Return a function that creates a nicely formatted node tag.""" 216 if plain: 217 # Plain tree only. That's easy. 218 def make_info_string(clade, terminal=False): 219 return ''
220 221 elif confidence_as_branch_length: 222 # Support as branchlengths (eg. PAUP), ignore actual branchlengths 223 def make_info_string(clade, terminal=False): 224 if terminal: 225 # terminal branches have 100% support 226 return ':' + format_confidence % max_confidence 227 else: 228 return ':' + format_confidence % clade.confidence 229 230 elif branch_length_only: 231 # write only branchlengths, ignore support 232 def make_info_string(clade, terminal=False): 233 return ':' + format_branch_length % clade.branch_length 234 235 else: 236 # write support and branchlengths (e.g. .con tree of mrbayes) 237 def make_info_string(clade, terminal=False): 238 if (terminal or 239 not hasattr(clade, 'confidence') or 240 clade.confidence is None): 241 return (':' + format_branch_length 242 ) % (clade.branch_length or 0.0) 243 else: 244 return (format_confidence + ':' + format_branch_length 245 ) % (clade.confidence, clade.branch_length or 0.0) 246 247 return make_info_string 248