Package Bio :: Package Phylo :: Module NeXMLIO
[hide private]
[frames] | no frames]

Source Code for Module Bio.Phylo.NeXMLIO

  1  # Copyright (C) 2013 by Ben Morris (ben@bendmorris.com) 
  2  # Based on Bio.Nexus, copyright 2005-2008 by Frank Kauff & Cymon J. Cox 
  3  # and Bio.Phylo.Newick, copyright 2009 by Eric Talevich. 
  4  # All rights reserved. 
  5  # This code is part of the Biopython distribution and governed by its 
  6  # license. Please see the LICENSE file that should have been included 
  7  # as part of this package. 
  8   
  9  """I/O function wrappers for the NeXML file format. 
 10   
 11  See: http://www.nexml.org 
 12  """ 
 13   
 14   
 15  from Bio._py3k import StringIO 
 16   
 17  from Bio.Phylo import NeXML 
 18  from xml.dom import minidom 
 19  import sys 
 20  from ._cdao_owl import cdao_elements, cdao_namespaces, resolve_uri 
 21   
 22   
 23  # For speed try to use cElementTree rather than ElementTree 
 24  try: 
 25      if (3, 0) <= sys.version_info[:2] <= (3, 1): 
 26          # Workaround for bug in python 3.0 and 3.1, 
 27          # see http://bugs.python.org/issue9257 
 28          from xml.etree import ElementTree as ElementTree 
 29      else: 
 30          from xml.etree import cElementTree as ElementTree 
 31  except ImportError: 
 32      from xml.etree import ElementTree as ElementTree 
 33   
 34  NAMESPACES = { 
 35      'xsi': 'http://www.w3.org/2001/XMLSchema-instance', 
 36      'xml': 'http://www.w3.org/XML/1998/namespace', 
 37      'nex': 'http://www.nexml.org/2009', 
 38      'xsd': 'http://www.w3.org/2001/XMLSchema#', 
 39  } 
 40  NAMESPACES.update(cdao_namespaces) 
 41  DEFAULT_NAMESPACE = NAMESPACES['nex'] 
 42  VERSION = '0.9' 
 43  SCHEMA = 'http://www.nexml.org/2009/nexml/xsd/nexml.xsd' 
 44   
 45   
 46  try: 
 47      register_namespace = ElementTree.register_namespace 
 48  except AttributeError: 
 49      if not hasattr(ElementTree, '_namespace_map'): 
 50          # cElementTree needs the pure-Python xml.etree.ElementTree 
 51          from xml.etree import ElementTree as ET_py 
 52          ElementTree._namespace_map = ET_py._namespace_map 
53 54 - def register_namespace(prefix, uri):
55 ElementTree._namespace_map[uri] = prefix
56 57 for prefix, uri in NAMESPACES.items(): 58 register_namespace(prefix, uri)
59 60 61 -def qUri(s):
62 """Given a prefixed URI, return the full URI.""" 63 return resolve_uri(s, namespaces=NAMESPACES, xml_style=True)
64
65 66 -def cdao_to_obo(s):
67 """Optionally converts a CDAO-prefixed URI into an OBO-prefixed URI.""" 68 return 'obo:%s' % cdao_elements[s[len('cdao:'):]]
69
70 71 -def matches(s):
72 """Check for matches in both CDAO and OBO namespaces.""" 73 if s.startswith('cdao:'): 74 return (s, cdao_to_obo(s)) 75 else: 76 return (s,)
77
78 79 -class NeXMLError(Exception):
80 """Exception raised when NeXML object construction cannot continue.""" 81 pass
82
83 84 # --------------------------------------------------------- 85 # Public API 86 87 -def parse(handle, **kwargs):
88 """Iterate over the trees in a NeXML file handle. 89 90 :returns: generator of Bio.Phylo.NeXML.Tree objects. 91 """ 92 return Parser(handle).parse(**kwargs)
93
94 95 -def write(trees, handle, plain=False, **kwargs):
96 """Write a trees in NeXML format to the given file handle. 97 98 :returns: number of trees written. 99 """ 100 return Writer(trees).write(handle, plain=plain, **kwargs)
101
102 103 # --------------------------------------------------------- 104 # Input 105 106 -class Parser(object):
107 """Parse a NeXML tree given a file handle. 108 109 Based on the parser in `Bio.Nexus.Trees`. 110 """ 111
112 - def __init__(self, handle):
113 self.handle = handle
114 115 @classmethod
116 - def from_string(cls, treetext):
117 handle = StringIO(treetext) 118 return cls(handle)
119
120 - def add_annotation(self, node_dict, meta_node):
121 if 'property' in meta_node.attrib: 122 prop = meta_node.attrib['property'] 123 else: 124 prop = 'meta' 125 126 if prop in matches('cdao:has_Support_Value'): 127 node_dict['confidence'] = float(meta_node.text) 128 else: 129 node_dict[prop] = meta_node.text
130
131 - def parse(self, values_are_confidence=False, rooted=False):
132 """Parse the text stream this object was initialized with.""" 133 nexml_doc = ElementTree.iterparse(self.handle, events=('end',)) 134 135 for event, node in nexml_doc: 136 if node.tag == qUri('nex:tree'): 137 node_dict = {} 138 node_children = {} 139 root = None 140 141 child_tags = node.getchildren() 142 nodes = [] 143 edges = [] 144 for child in child_tags: 145 if child.tag == qUri('nex:node'): 146 nodes.append(child) 147 if child.tag == qUri('nex:edge'): 148 edges.append(child) 149 150 for node in nodes: 151 node_id = node.attrib['id'] 152 this_node = node_dict[node_id] = {} 153 if 'otu' in node.attrib and node.attrib['otu']: 154 this_node['name'] = node.attrib['otu'] 155 if 'root' in node.attrib and node.attrib['root'] == 'true': 156 root = node_id 157 158 for child in node.getchildren(): 159 if child.tag == qUri('nex:meta'): 160 self.add_annotation(node_dict[node_id], child) 161 162 srcs = set() 163 tars = set() 164 for edge in edges: 165 src, tar = edge.attrib['source'], edge.attrib['target'] 166 srcs.add(src) 167 tars.add(tar) 168 if src not in node_children: 169 node_children[src] = set() 170 171 node_children[src].add(tar) 172 if 'length' in edge.attrib: 173 node_dict[tar]['branch_length'] = float(edge.attrib['length']) 174 if 'property' in edge.attrib and edge.attrib['property'] in matches('cdao:has_Support_Value'): 175 node_dict[tar]['confidence'] = float(edge.attrib['content']) 176 177 for child in edge.getchildren(): 178 if child.tag == qUri('nex:meta'): 179 self.add_annotation(node_dict[tar], child) 180 181 if root is None: 182 # if no root specified, start the recursive tree creation function 183 # with the first node that's not a child of any other nodes 184 rooted = False 185 possible_roots = (node.attrib['id'] for node in nodes 186 if node.attrib['id'] in srcs and 187 node.attrib['id'] not in tars) 188 root = next(possible_roots) 189 else: 190 rooted = True 191 192 yield NeXML.Tree(root=self._make_tree(root, node_dict, node_children), rooted=rooted)
193 194 @classmethod
195 - def _make_tree(cls, node, node_dict, children):
196 """Traverse the tree creating a nested clade structure. 197 198 Return a NeXML.Clade, and calls itself recursively for each child, 199 traversing the entire tree and creating a nested structure of NeXML.Clade 200 objects. 201 """ 202 this_node = node_dict[node] 203 clade = NeXML.Clade(**this_node) 204 205 if node in children: 206 clade.clades = [cls._make_tree(child, node_dict, children) 207 for child in children[node]] 208 209 return clade
210
211 # --------------------------------------------------------- 212 # Output 213 214 215 -class Writer(object):
216 """Based on the writer in Bio.Nexus.Trees (str, to_string).""" 217
218 - def __init__(self, trees):
219 self.trees = trees 220 221 self.node_counter = 0 222 self.edge_counter = 0 223 self.tree_counter = 0
224
225 - def new_label(self, obj_type):
226 counter = '%s_counter' % obj_type 227 setattr(self, counter, getattr(self, counter) + 1) 228 return '%s%s' % (obj_type, getattr(self, counter))
229
230 - def write(self, handle, cdao_to_obo=True, **kwargs):
231 """Write this instance's trees to a file handle.""" 232 self.cdao_to_obo = cdao_to_obo 233 234 # set XML namespaces 235 root_node = ElementTree.Element('nex:nexml') 236 root_node.set('version', VERSION) 237 root_node.set('xmlns', DEFAULT_NAMESPACE) 238 root_node.set('xsi:schemaLocation', SCHEMA) 239 240 for prefix, uri in NAMESPACES.items(): 241 root_node.set('xmlns:%s' % prefix, uri) 242 243 otus = ElementTree.SubElement(root_node, 'otus', 244 **{'id': 'tax', 'label': 'RootTaxaBlock'}) 245 246 # create trees 247 trees = ElementTree.SubElement(root_node, 'trees', 248 **{'id': 'Trees', 'label': 'TreesBlockFromXML', 'otus': 'tax'}) 249 count = 0 250 tus = set() 251 for tree in self.trees: 252 this_tree = ElementTree.SubElement(trees, 'tree', 253 **{'id': self.new_label('tree')}) 254 255 first_clade = tree.clade 256 tus.update(self._write_tree(first_clade, this_tree, rooted=tree.rooted)) 257 258 count += 1 259 260 # create OTUs 261 for tu in tus: 262 otu = ElementTree.SubElement(otus, 'otu', **{'id': tu}) 263 264 # write XML document to file handle 265 # xml_doc = ElementTree.ElementTree(root_node) 266 # xml_doc.write(handle, 267 # xml_declaration=True, encoding='utf-8', 268 # method='xml') 269 270 # use xml.dom.minodom for pretty printing 271 rough_string = ElementTree.tostring(root_node, 'utf-8') 272 reparsed = minidom.parseString(rough_string) 273 try: 274 handle.write(reparsed.toprettyxml(indent=" ")) 275 except TypeError: 276 # for compatibility with Python 3 277 handle.write(bytes(reparsed.toprettyxml(indent=" "), 'utf8')) 278 279 return count
280
281 - def _write_tree(self, clade, tree, parent=None, rooted=False):
282 """Recursively process tree, adding nodes and edges to Tree object. 283 284 Returns a set of all OTUs encountered. 285 """ 286 tus = set() 287 288 convert_uri = cdao_to_obo if self.cdao_to_obo else (lambda s: s) 289 290 node_id = self.new_label('node') 291 clade.node_id = node_id 292 attrib = {'id': node_id, 'label': node_id} 293 root = rooted and parent is None 294 if root: 295 attrib['root'] = 'true' 296 if clade.name: 297 tus.add(clade.name) 298 attrib['otu'] = clade.name 299 node = ElementTree.SubElement(tree, 'node', **attrib) 300 301 if parent is not None: 302 edge_id = self.new_label('edge') 303 attrib = { 304 'id': edge_id, 'source': parent.node_id, 'target': node_id, 305 'length': str(clade.branch_length), 306 'typeof': convert_uri('cdao:Edge'), 307 } 308 if hasattr(clade, 'confidence') and clade.confidence is not None: 309 attrib.update({ 310 'property': convert_uri('cdao:has_Support_Value'), 311 'datatype': 'xsd:float', 312 'content': '%1.2f' % clade.confidence, 313 }) 314 node = ElementTree.SubElement(tree, 'edge', **attrib) 315 316 if not clade.is_terminal(): 317 for new_clade in clade.clades: 318 tus.update(self._write_tree(new_clade, tree, parent=clade)) 319 320 del clade.node_id 321 322 return tus
323