Package Bio :: Package Phylo :: Module NeXMLIO
[hide private]
[frames] | no frames]

Source Code for Module Bio.Phylo.NeXMLIO

  1  # Copyright (C) 2013 by Ben Morris (ben@bendmorris.com) 
  2  # Based on Bio.Nexus, copyright 2005-2008 by Frank Kauff & Cymon J. Cox 
  3  # and Bio.Phylo.Newick, copyright 2009 by Eric Talevich. 
  4  # All rights reserved. 
  5  # This code is part of the Biopython distribution and governed by its 
  6  # license. Please see the LICENSE file that should have been included 
  7  # as part of this package. 
  8   
  9  """I/O function wrappers for the NeXML file format. 
 10   
 11  See: http://www.nexml.org 
 12  """ 
 13   
 14   
 15  from Bio._py3k import StringIO 
 16   
 17  from Bio.Phylo import NeXML 
 18  from xml.dom import minidom 
 19  import sys 
 20  from ._cdao_owl import cdao_elements, cdao_namespaces, resolve_uri 
 21   
 22   
 23  # For speed try to use cElementTree rather than ElementTree 
 24  try: 
 25      if (3, 0) <= sys.version_info[:2] <= (3, 1): 
 26          # Workaround for bug in python 3.0 and 3.1, 
 27          # see http://bugs.python.org/issue9257 
 28          from xml.etree import ElementTree as ElementTree 
 29      else: 
 30          from xml.etree import cElementTree as ElementTree 
 31  except ImportError: 
 32      from xml.etree import ElementTree as ElementTree 
 33   
 34  NAMESPACES = { 
 35      'xsi': 'http://www.w3.org/2001/XMLSchema-instance', 
 36      'xml': 'http://www.w3.org/XML/1998/namespace', 
 37      'nex': 'http://www.nexml.org/2009', 
 38      'xsd': 'http://www.w3.org/2001/XMLSchema#', 
 39  } 
 40  NAMESPACES.update(cdao_namespaces) 
 41  DEFAULT_NAMESPACE = NAMESPACES['nex'] 
 42  VERSION = '0.9' 
 43  SCHEMA = 'http://www.nexml.org/2009/nexml/xsd/nexml.xsd' 
 44   
 45   
 46  try: 
 47      register_namespace = ElementTree.register_namespace 
 48  except AttributeError: 
 49      if not hasattr(ElementTree, '_namespace_map'): 
 50          # cElementTree needs the pure-Python xml.etree.ElementTree 
 51          from xml.etree import ElementTree as ET_py 
 52          ElementTree._namespace_map = ET_py._namespace_map 
53 54 - def register_namespace(prefix, uri):
55 ElementTree._namespace_map[uri] = prefix
56 57 for prefix, uri in NAMESPACES.items(): 58 register_namespace(prefix, uri)
59 60 61 -def qUri(s):
62 """Given a prefixed URI, return the full URI.""" 63 return resolve_uri(s, namespaces=NAMESPACES, xml_style=True)
64
65 66 -def cdao_to_obo(s):
67 """Optionally converts a CDAO-prefixed URI into an OBO-prefixed URI.""" 68 return 'obo:%s' % cdao_elements[s[len('cdao:'):]]
69
70 71 -def matches(s):
72 """Check for matches in both CDAO and OBO namespaces.""" 73 if s.startswith('cdao:'): 74 return (s, cdao_to_obo(s)) 75 else: 76 return (s,)
77
78 79 -class NeXMLError(Exception):
80 """Exception raised when NeXML object construction cannot continue.""" 81 pass
82
83 84 # --------------------------------------------------------- 85 # Public API 86 87 -def parse(handle, **kwargs):
88 """Iterate over the trees in a NeXML file handle. 89 90 :returns: generator of Bio.Phylo.NeXML.Tree objects. 91 """ 92 return Parser(handle).parse(**kwargs)
93
94 95 -def write(trees, handle, plain=False, **kwargs):
96 """Write a trees in NeXML format to the given file handle. 97 98 :returns: number of trees written. 99 """ 100 return Writer(trees).write(handle, plain=plain, **kwargs)
101
102 103 # --------------------------------------------------------- 104 # Input 105 106 -class Parser(object):
107 """Parse a NeXML tree given a file handle. 108 109 Based on the parser in `Bio.Nexus.Trees`. 110 """ 111
112 - def __init__(self, handle):
113 self.handle = handle
114 115 @classmethod
116 - def from_string(cls, treetext):
117 handle = StringIO(treetext) 118 return cls(handle)
119
120 - def add_annotation(self, node_dict, meta_node):
121 if 'property' in meta_node.attrib: 122 prop = meta_node.attrib['property'] 123 else: 124 prop = 'meta' 125 126 if prop in matches('cdao:has_Support_Value'): 127 node_dict['confidence'] = float(meta_node.text) 128 else: 129 node_dict[prop] = meta_node.text
130
131 - def parse(self, values_are_confidence=False, rooted=False):
132 """Parse the text stream this object was initialized with.""" 133 134 nexml_doc = ElementTree.iterparse(self.handle, events=('end',)) 135 136 for event, node in nexml_doc: 137 if node.tag == qUri('nex:tree'): 138 node_dict = {} 139 node_children = {} 140 root = None 141 142 child_tags = node.getchildren() 143 nodes = [] 144 edges = [] 145 for child in child_tags: 146 if child.tag == qUri('nex:node'): 147 nodes.append(child) 148 if child.tag == qUri('nex:edge'): 149 edges.append(child) 150 151 for node in nodes: 152 node_id = node.attrib['id'] 153 this_node = node_dict[node_id] = {} 154 if 'otu' in node.attrib and node.attrib['otu']: 155 this_node['name'] = node.attrib['otu'] 156 if 'root' in node.attrib and node.attrib['root'] == 'true': 157 root = node_id 158 159 for child in node.getchildren(): 160 if child.tag == qUri('nex:meta'): 161 self.add_annotation(node_dict[node_id], child) 162 163 srcs = set() 164 tars = set() 165 for edge in edges: 166 src, tar = edge.attrib['source'], edge.attrib['target'] 167 srcs.add(src) 168 tars.add(tar) 169 if src not in node_children: 170 node_children[src] = set() 171 172 node_children[src].add(tar) 173 if 'length' in edge.attrib: 174 node_dict[tar]['branch_length'] = float(edge.attrib['length']) 175 if 'property' in edge.attrib and edge.attrib['property'] in matches('cdao:has_Support_Value'): 176 node_dict[tar]['confidence'] = float(edge.attrib['content']) 177 178 for child in edge.getchildren(): 179 if child.tag == qUri('nex:meta'): 180 self.add_annotation(node_dict[tar], child) 181 182 if root is None: 183 # if no root specified, start the recursive tree creation function 184 # with the first node that's not a child of any other nodes 185 rooted = False 186 possible_roots = (node.attrib['id'] for node in nodes 187 if node.attrib['id'] in srcs and 188 node.attrib['id'] not in tars) 189 root = next(possible_roots) 190 else: 191 rooted = True 192 193 yield NeXML.Tree(root=self._make_tree(root, node_dict, node_children), rooted=rooted)
194 195 @classmethod
196 - def _make_tree(cls, node, node_dict, children):
197 """Traverse the tree creating a nested clade structure. 198 199 Return a NeXML.Clade, and calls itself recursively for each child, 200 traversing the entire tree and creating a nested structure of NeXML.Clade 201 objects. 202 """ 203 204 this_node = node_dict[node] 205 clade = NeXML.Clade(**this_node) 206 207 if node in children: 208 clade.clades = [cls._make_tree(child, node_dict, children) 209 for child in children[node]] 210 211 return clade
212
213 # --------------------------------------------------------- 214 # Output 215 216 217 -class Writer(object):
218 """Based on the writer in Bio.Nexus.Trees (str, to_string).""" 219
220 - def __init__(self, trees):
221 self.trees = trees 222 223 self.node_counter = 0 224 self.edge_counter = 0 225 self.tree_counter = 0
226
227 - def new_label(self, obj_type):
228 counter = '%s_counter' % obj_type 229 setattr(self, counter, getattr(self, counter) + 1) 230 return '%s%s' % (obj_type, getattr(self, counter))
231
232 - def write(self, handle, cdao_to_obo=True, **kwargs):
233 """Write this instance's trees to a file handle.""" 234 235 self.cdao_to_obo = cdao_to_obo 236 237 # set XML namespaces 238 root_node = ElementTree.Element('nex:nexml') 239 root_node.set('version', VERSION) 240 root_node.set('xmlns', DEFAULT_NAMESPACE) 241 root_node.set('xsi:schemaLocation', SCHEMA) 242 243 for prefix, uri in NAMESPACES.items(): 244 root_node.set('xmlns:%s' % prefix, uri) 245 246 otus = ElementTree.SubElement(root_node, 'otus', 247 **{'id': 'tax', 'label': 'RootTaxaBlock'}) 248 249 # create trees 250 trees = ElementTree.SubElement(root_node, 'trees', 251 **{'id': 'Trees', 'label': 'TreesBlockFromXML', 'otus': 'tax'}) 252 count = 0 253 tus = set() 254 for tree in self.trees: 255 this_tree = ElementTree.SubElement(trees, 'tree', 256 **{'id': self.new_label('tree')}) 257 258 first_clade = tree.clade 259 tus.update(self._write_tree(first_clade, this_tree, rooted=tree.rooted)) 260 261 count += 1 262 263 # create OTUs 264 for tu in tus: 265 otu = ElementTree.SubElement(otus, 'otu', **{'id': tu}) 266 267 # write XML document to file handle 268 # xml_doc = ElementTree.ElementTree(root_node) 269 # xml_doc.write(handle, 270 # xml_declaration=True, encoding='utf-8', 271 # method='xml') 272 273 # use xml.dom.minodom for pretty printing 274 rough_string = ElementTree.tostring(root_node, 'utf-8') 275 reparsed = minidom.parseString(rough_string) 276 try: 277 handle.write(reparsed.toprettyxml(indent=" ")) 278 except TypeError: 279 # for compatibility with Python 3 280 handle.write(bytes(reparsed.toprettyxml(indent=" "), 'utf8')) 281 282 return count
283
284 - def _write_tree(self, clade, tree, parent=None, rooted=False):
285 """Recursively process tree, adding nodes and edges to Tree object. 286 287 Returns a set of all OTUs encountered. 288 """ 289 tus = set() 290 291 convert_uri = cdao_to_obo if self.cdao_to_obo else (lambda s: s) 292 293 node_id = self.new_label('node') 294 clade.node_id = node_id 295 attrib = {'id': node_id, 'label': node_id} 296 root = rooted and parent is None 297 if root: 298 attrib['root'] = 'true' 299 if clade.name: 300 tus.add(clade.name) 301 attrib['otu'] = clade.name 302 node = ElementTree.SubElement(tree, 'node', **attrib) 303 304 if parent is not None: 305 edge_id = self.new_label('edge') 306 attrib = { 307 'id': edge_id, 'source': parent.node_id, 'target': node_id, 308 'length': str(clade.branch_length), 309 'typeof': convert_uri('cdao:Edge'), 310 } 311 if hasattr(clade, 'confidence') and clade.confidence is not None: 312 attrib.update({ 313 'property': convert_uri('cdao:has_Support_Value'), 314 'datatype': 'xsd:float', 315 'content': '%1.2f' % clade.confidence, 316 }) 317 node = ElementTree.SubElement(tree, 'edge', **attrib) 318 319 if not clade.is_terminal(): 320 for new_clade in clade.clades: 321 tus.update(self._write_tree(new_clade, tree, parent=clade)) 322 323 del clade.node_id 324 325 return tus
326