Package Bio :: Package Phylo :: Module NeXMLIO
[hide private]
[frames] | no frames]

Source Code for Module Bio.Phylo.NeXMLIO

  1  # Copyright (C) 2013 by Ben Morris (ben@bendmorris.com) 
  2  # Based on Bio.Nexus, copyright 2005-2008 by Frank Kauff & Cymon J. Cox 
  3  # and Bio.Phylo.Newick, copyright 2009 by Eric Talevich. 
  4  # All rights reserved. 
  5  # This code is part of the Biopython distribution and governed by its 
  6  # license. Please see the LICENSE file that should have been included 
  7  # as part of this package. 
  8   
  9  """I/O function wrappers for the NeXML file format. 
 10   
 11  See: http://www.nexml.org 
 12  """ 
 13   
 14   
 15  from Bio._py3k import StringIO 
 16   
 17  from Bio.Phylo import NeXML 
 18  from xml.dom import minidom 
 19  import sys 
 20  from ._cdao_owl import cdao_elements, cdao_namespaces, resolve_uri 
 21   
 22   
 23  # For speed try to use cElementTree rather than ElementTree 
 24  try: 
 25      if (3, 0) <= sys.version_info[:2] <= (3, 1): 
 26          # Workaround for bug in python 3.0 and 3.1, 
 27          # see http://bugs.python.org/issue9257 
 28          from xml.etree import ElementTree as ElementTree 
 29      else: 
 30          from xml.etree import cElementTree as ElementTree 
 31  except ImportError: 
 32      from xml.etree import ElementTree as ElementTree 
 33   
 34  NAMESPACES = { 
 35      'xsi': 'http://www.w3.org/2001/XMLSchema-instance', 
 36      'xml': 'http://www.w3.org/XML/1998/namespace', 
 37      'nex': 'http://www.nexml.org/2009', 
 38      'xsd': 'http://www.w3.org/2001/XMLSchema#', 
 39  } 
 40  NAMESPACES.update(cdao_namespaces) 
 41  DEFAULT_NAMESPACE = NAMESPACES['nex'] 
 42  VERSION = '0.9' 
 43  SCHEMA = 'http://www.nexml.org/2009/nexml/xsd/nexml.xsd' 
 44   
 45   
 46  try: 
 47      register_namespace = ElementTree.register_namespace 
 48  except AttributeError: 
 49      if not hasattr(ElementTree, '_namespace_map'): 
 50          # cElementTree needs the pure-Python xml.etree.ElementTree 
 51          from xml.etree import ElementTree as ET_py 
 52          ElementTree._namespace_map = ET_py._namespace_map 
53 54 - def register_namespace(prefix, uri):
55 ElementTree._namespace_map[uri] = prefix
56 57 for prefix, uri in NAMESPACES.items(): 58 register_namespace(prefix, uri)
59 60 61 -def qUri(s):
62 """Given a prefixed URI, return the full URI.""" 63 return resolve_uri(s, namespaces=NAMESPACES, xml_style=True)
64
65 66 -def cdao_to_obo(s):
67 """Optionally converts a CDAO-prefixed URI into an OBO-prefixed URI.""" 68 return 'obo:%s' % cdao_elements[s[len('cdao:'):]]
69
70 71 -def matches(s):
72 """Check for matches in both CDAO and OBO namespaces.""" 73 if s.startswith('cdao:'): 74 return (s, cdao_to_obo(s)) 75 else: 76 return (s,)
77
78 79 -class NeXMLError(Exception):
80 """Exception raised when NeXML object construction cannot continue.""" 81 82 pass
83
84 85 # --------------------------------------------------------- 86 # Public API 87 88 -def parse(handle, **kwargs):
89 """Iterate over the trees in a NeXML file handle. 90 91 :returns: generator of Bio.Phylo.NeXML.Tree objects. 92 93 """ 94 return Parser(handle).parse(**kwargs)
95
96 97 -def write(trees, handle, plain=False, **kwargs):
98 """Write a trees in NeXML format to the given file handle. 99 100 :returns: number of trees written. 101 102 """ 103 return Writer(trees).write(handle, plain=plain, **kwargs)
104
105 106 # --------------------------------------------------------- 107 # Input 108 109 -class Parser(object):
110 """Parse a NeXML tree given a file handle. 111 112 Based on the parser in `Bio.Nexus.Trees`. 113 """ 114
115 - def __init__(self, handle):
116 """Initialize parameters for NeXML file parser.""" 117 self.handle = handle
118 119 @classmethod
120 - def from_string(cls, treetext):
121 """Convert file handle to StringIO object.""" 122 handle = StringIO(treetext) 123 return cls(handle)
124
125 - def add_annotation(self, node_dict, meta_node):
126 """Add annotations for the NeXML parser.""" 127 if 'property' in meta_node.attrib: 128 prop = meta_node.attrib['property'] 129 else: 130 prop = 'meta' 131 132 if prop in matches('cdao:has_Support_Value'): 133 node_dict['confidence'] = float(meta_node.text) 134 else: 135 node_dict[prop] = meta_node.text
136
137 - def parse(self, values_are_confidence=False, rooted=False):
138 """Parse the text stream this object was initialized with.""" 139 nexml_doc = ElementTree.iterparse(self.handle, events=('end',)) 140 141 for event, node in nexml_doc: 142 if node.tag == qUri('nex:tree'): 143 node_dict = {} 144 node_children = {} 145 root = None 146 147 child_tags = node.getchildren() 148 nodes = [] 149 edges = [] 150 for child in child_tags: 151 if child.tag == qUri('nex:node'): 152 nodes.append(child) 153 if child.tag == qUri('nex:edge'): 154 edges.append(child) 155 156 for node in nodes: 157 node_id = node.attrib['id'] 158 this_node = node_dict[node_id] = {} 159 if 'otu' in node.attrib and node.attrib['otu']: 160 this_node['name'] = node.attrib['otu'] 161 if 'root' in node.attrib and node.attrib['root'] == 'true': 162 root = node_id 163 164 for child in node.getchildren(): 165 if child.tag == qUri('nex:meta'): 166 self.add_annotation(node_dict[node_id], child) 167 168 srcs = set() 169 tars = set() 170 for edge in edges: 171 src, tar = edge.attrib['source'], edge.attrib['target'] 172 srcs.add(src) 173 tars.add(tar) 174 if src not in node_children: 175 node_children[src] = set() 176 177 node_children[src].add(tar) 178 if 'length' in edge.attrib: 179 node_dict[tar]['branch_length'] = float(edge.attrib['length']) 180 if 'property' in edge.attrib and edge.attrib['property'] in matches('cdao:has_Support_Value'): 181 node_dict[tar]['confidence'] = float(edge.attrib['content']) 182 183 for child in edge.getchildren(): 184 if child.tag == qUri('nex:meta'): 185 self.add_annotation(node_dict[tar], child) 186 187 if root is None: 188 # if no root specified, start the recursive tree creation function 189 # with the first node that's not a child of any other nodes 190 rooted = False 191 possible_roots = (node.attrib['id'] for node in nodes 192 if node.attrib['id'] in srcs and 193 node.attrib['id'] not in tars) 194 root = next(possible_roots) 195 else: 196 rooted = True 197 198 yield NeXML.Tree(root=self._make_tree(root, node_dict, node_children), rooted=rooted)
199 200 @classmethod
201 - def _make_tree(cls, node, node_dict, children):
202 """Traverse the tree creating a nested clade structure. 203 204 Return a NeXML.Clade, and calls itself recursively for each child, 205 traversing the entire tree and creating a nested structure of NeXML.Clade 206 objects. 207 """ 208 this_node = node_dict[node] 209 clade = NeXML.Clade(**this_node) 210 211 if node in children: 212 clade.clades = [cls._make_tree(child, node_dict, children) 213 for child in children[node]] 214 215 return clade
216
217 # --------------------------------------------------------- 218 # Output 219 220 221 -class Writer(object):
222 """Based on the writer in Bio.Nexus.Trees (str, to_string).""" 223
224 - def __init__(self, trees):
225 """Initialize parameters for NeXML writer.""" 226 self.trees = trees 227 228 self.node_counter = 0 229 self.edge_counter = 0 230 self.tree_counter = 0
231
232 - def new_label(self, obj_type):
233 """Create new labels for the NeXML writer.""" 234 counter = '%s_counter' % obj_type 235 setattr(self, counter, getattr(self, counter) + 1) 236 return '%s%s' % (obj_type, getattr(self, counter))
237
238 - def write(self, handle, cdao_to_obo=True, **kwargs):
239 """Write this instance's trees to a file handle.""" 240 self.cdao_to_obo = cdao_to_obo 241 242 # set XML namespaces 243 root_node = ElementTree.Element('nex:nexml') 244 root_node.set('version', VERSION) 245 root_node.set('xmlns', DEFAULT_NAMESPACE) 246 root_node.set('xsi:schemaLocation', SCHEMA) 247 248 for prefix, uri in NAMESPACES.items(): 249 root_node.set('xmlns:%s' % prefix, uri) 250 251 otus = ElementTree.SubElement(root_node, 'otus', 252 **{'id': 'tax', 'label': 'RootTaxaBlock'}) 253 254 # create trees 255 trees = ElementTree.SubElement(root_node, 'trees', 256 **{'id': 'Trees', 'label': 'TreesBlockFromXML', 'otus': 'tax'}) 257 count = 0 258 tus = set() 259 for tree in self.trees: 260 this_tree = ElementTree.SubElement(trees, 'tree', 261 **{'id': self.new_label('tree')}) 262 263 first_clade = tree.clade 264 tus.update(self._write_tree(first_clade, this_tree, rooted=tree.rooted)) 265 266 count += 1 267 268 # create OTUs 269 for tu in tus: 270 otu = ElementTree.SubElement(otus, 'otu', **{'id': tu}) 271 272 # write XML document to file handle 273 # xml_doc = ElementTree.ElementTree(root_node) 274 # xml_doc.write(handle, 275 # xml_declaration=True, encoding='utf-8', 276 # method='xml') 277 278 # use xml.dom.minodom for pretty printing 279 rough_string = ElementTree.tostring(root_node, 'utf-8') 280 reparsed = minidom.parseString(rough_string) 281 try: 282 handle.write(reparsed.toprettyxml(indent=" ")) 283 except TypeError: 284 # for compatibility with Python 3 285 handle.write(bytes(reparsed.toprettyxml(indent=" "), 'utf8')) 286 287 return count
288
289 - def _write_tree(self, clade, tree, parent=None, rooted=False):
290 """Recursively process tree, adding nodes and edges to Tree object. 291 292 Returns a set of all OTUs encountered. 293 """ 294 tus = set() 295 296 convert_uri = cdao_to_obo if self.cdao_to_obo else (lambda s: s) 297 298 node_id = self.new_label('node') 299 clade.node_id = node_id 300 attrib = {'id': node_id, 'label': node_id} 301 root = rooted and parent is None 302 if root: 303 attrib['root'] = 'true' 304 if clade.name: 305 tus.add(clade.name) 306 attrib['otu'] = clade.name 307 node = ElementTree.SubElement(tree, 'node', **attrib) 308 309 if parent is not None: 310 edge_id = self.new_label('edge') 311 attrib = { 312 'id': edge_id, 'source': parent.node_id, 'target': node_id, 313 'length': str(clade.branch_length), 314 'typeof': convert_uri('cdao:Edge'), 315 } 316 if hasattr(clade, 'confidence') and clade.confidence is not None: 317 attrib.update({ 318 'property': convert_uri('cdao:has_Support_Value'), 319 'datatype': 'xsd:float', 320 'content': '%1.2f' % clade.confidence, 321 }) 322 node = ElementTree.SubElement(tree, 'edge', **attrib) 323 324 if not clade.is_terminal(): 325 for new_clade in clade.clades: 326 tus.update(self._write_tree(new_clade, tree, parent=clade)) 327 328 del clade.node_id 329 330 return tus
331