Package Bio :: Package Phylo :: Module NeXMLIO
[hide private]
[frames] | no frames]

Source Code for Module Bio.Phylo.NeXMLIO

  1  # Copyright (C) 2013 by Ben Morris (ben@bendmorris.com) 
  2  # Based on Bio.Nexus, copyright 2005-2008 by Frank Kauff & Cymon J. Cox 
  3  # and Bio.Phylo.Newick, copyright 2009 by Eric Talevich. 
  4  # All rights reserved. 
  5  # This code is part of the Biopython distribution and governed by its 
  6  # license. Please see the LICENSE file that should have been included 
  7  # as part of this package. 
  8   
  9  """I/O function wrappers for the NeXML file format. 
 10   
 11  See: http://www.nexml.org 
 12  """ 
 13  __docformat__ = "restructuredtext en" 
 14   
 15  from Bio._py3k import StringIO 
 16   
 17  from Bio.Phylo import NeXML 
 18  from xml.dom import minidom 
 19  import sys 
 20  from ._cdao_owl import cdao_elements, cdao_namespaces, resolve_uri 
 21   
 22   
 23  #For speed try to use cElementTree rather than ElementTree 
 24  try: 
 25      if (3, 0) <= sys.version_info[:2] <= (3, 1): 
 26          # Workaround for bug in python 3.0 and 3.1, 
 27          # see http://bugs.python.org/issue9257 
 28          from xml.etree import ElementTree as ElementTree 
 29      else: 
 30          from xml.etree import cElementTree as ElementTree 
 31  except ImportError: 
 32      from xml.etree import ElementTree as ElementTree 
 33   
 34  NAMESPACES = { 
 35                    'xsi': 'http://www.w3.org/2001/XMLSchema-instance', 
 36                    'xml': 'http://www.w3.org/XML/1998/namespace', 
 37                    'nex': 'http://www.nexml.org/2009', 
 38                    'xsd': 'http://www.w3.org/2001/XMLSchema#', 
 39                    } 
 40  NAMESPACES.update(cdao_namespaces) 
 41  DEFAULT_NAMESPACE = NAMESPACES['nex'] 
 42  VERSION = '0.9' 
 43  SCHEMA = 'http://www.nexml.org/2009/nexml/xsd/nexml.xsd' 
 44   
 45   
 46  try: 
 47      register_namespace = ElementTree.register_namespace 
 48  except AttributeError: 
 49      if not hasattr(ElementTree, '_namespace_map'): 
 50          # cElementTree needs the pure-Python xml.etree.ElementTree 
 51          from xml.etree import ElementTree as ET_py 
 52          ElementTree._namespace_map = ET_py._namespace_map 
53 54 - def register_namespace(prefix, uri):
55 ElementTree._namespace_map[uri] = prefix
56 57 for prefix, uri in NAMESPACES.items(): 58 register_namespace(prefix, uri)
59 60 61 -def qUri(s):
62 '''Given a prefixed URI, return the full URI.''' 63 return resolve_uri(s, namespaces=NAMESPACES, xml_style=True)
64
65 -def cdao_to_obo(s):
66 '''Optionally converts a CDAO-prefixed URI into an OBO-prefixed URI.''' 67 return 'obo:%s' % cdao_elements[s[len('cdao:'):]]
68
69 -def matches(s):
70 '''Check for matches in both CDAO and OBO namespaces.''' 71 if s.startswith('cdao:'): 72 return (s, cdao_to_obo(s)) 73 else: 74 return (s,)
75
76 -class NeXMLError(Exception):
77 """Exception raised when NeXML object construction cannot continue.""" 78 pass
79
80 81 # --------------------------------------------------------- 82 # Public API 83 84 -def parse(handle, **kwargs):
85 """Iterate over the trees in a NeXML file handle. 86 87 :returns: generator of Bio.Phylo.NeXML.Tree objects. 88 """ 89 return Parser(handle).parse(**kwargs)
90
91 92 -def write(trees, handle, plain=False, **kwargs):
93 """Write a trees in NeXML format to the given file handle. 94 95 :returns: number of trees written. 96 """ 97 return Writer(trees).write(handle, plain=plain, **kwargs)
98
99 100 # --------------------------------------------------------- 101 # Input 102 103 -class Parser(object):
104 """Parse a NeXML tree given a file handle. 105 106 Based on the parser in `Bio.Nexus.Trees`. 107 """ 108
109 - def __init__(self, handle):
110 self.handle = handle
111 112 @classmethod
113 - def from_string(cls, treetext):
114 handle = StringIO(treetext) 115 return cls(handle)
116
117 - def add_annotation(self, node_dict, meta_node):
118 if 'property' in meta_node.attrib: 119 prop = meta_node.attrib['property'] 120 else: 121 prop = 'meta' 122 123 if prop in matches('cdao:has_Support_Value'): 124 node_dict['confidence'] = float(meta_node.text) 125 else: 126 node_dict[prop] = meta_node.text
127
128 - def parse(self, values_are_confidence=False, rooted=False):
129 """Parse the text stream this object was initialized with.""" 130 131 nexml_doc = ElementTree.iterparse(self.handle, events=('end',)) 132 133 for event, node in nexml_doc: 134 if node.tag == qUri('nex:tree'): 135 node_dict = {} 136 node_children = {} 137 root = None 138 139 child_tags = node.getchildren() 140 nodes = [] 141 edges = [] 142 for child in child_tags: 143 if child.tag == qUri('nex:node'): 144 nodes.append(child) 145 if child.tag == qUri('nex:edge'): 146 edges.append(child) 147 148 for node in nodes: 149 node_id = node.attrib['id'] 150 this_node = node_dict[node_id] = {} 151 if 'otu' in node.attrib and node.attrib['otu']: 152 this_node['name'] = node.attrib['otu'] 153 if 'root' in node.attrib and node.attrib['root'] == 'true': 154 root = node_id 155 156 for child in node.getchildren(): 157 if child.tag == qUri('nex:meta'): 158 self.add_annotation(node_dict[node_id], child) 159 160 srcs = set() 161 tars = set() 162 for edge in edges: 163 src, tar = edge.attrib['source'], edge.attrib['target'] 164 srcs.add(src) 165 tars.add(tar) 166 if not src in node_children: 167 node_children[src] = set() 168 169 node_children[src].add(tar) 170 if 'length' in edge.attrib: 171 node_dict[tar]['branch_length'] = float(edge.attrib['length']) 172 if 'property' in edge.attrib and edge.attrib['property'] in matches('cdao:has_Support_Value'): 173 node_dict[tar]['confidence'] = float(edge.attrib['content']) 174 175 for child in edge.getchildren(): 176 if child.tag == qUri('nex:meta'): 177 self.add_annotation(node_dict[tar], child) 178 179 if root is None: 180 # if no root specified, start the recursive tree creation function 181 # with the first node that's not a child of any other nodes 182 rooted = False 183 possible_roots = (node.attrib['id'] for node in nodes 184 if node.attrib['id'] in srcs 185 and not node.attrib['id'] in tars) 186 root = next(possible_roots) 187 else: 188 rooted = True 189 190 yield NeXML.Tree(root=self._make_tree(root, node_dict, node_children), rooted=rooted)
191 192 193 @classmethod
194 - def _make_tree(cls, node, node_dict, children):
195 '''Return a NeXML.Clade, and calls itself recursively for each child, 196 traversing the entire tree and creating a nested structure of NeXML.Clade 197 objects.''' 198 199 this_node = node_dict[node] 200 clade = NeXML.Clade(**this_node) 201 202 if node in children: 203 clade.clades = [cls._make_tree(child, node_dict, children) for child in children[node]] 204 205 return clade
206
207 # --------------------------------------------------------- 208 # Output 209 210 -class Writer(object):
211 """Based on the writer in Bio.Nexus.Trees (str, to_string).""" 212
213 - def __init__(self, trees):
214 self.trees = trees 215 216 self.node_counter = 0 217 self.edge_counter = 0 218 self.tree_counter = 0
219
220 - def new_label(self, obj_type):
221 counter = '%s_counter' % obj_type 222 setattr(self, counter, getattr(self, counter) + 1) 223 return '%s%s' % (obj_type, getattr(self, counter))
224
225 - def write(self, handle, cdao_to_obo=True, **kwargs):
226 """Write this instance's trees to a file handle.""" 227 228 self.cdao_to_obo = cdao_to_obo 229 230 # set XML namespaces 231 root_node = ElementTree.Element('nex:nexml') 232 root_node.set('version', VERSION) 233 root_node.set('xmlns', DEFAULT_NAMESPACE) 234 root_node.set('xsi:schemaLocation', SCHEMA) 235 236 for prefix, uri in NAMESPACES.items(): 237 root_node.set('xmlns:%s' % prefix, uri) 238 239 otus = ElementTree.SubElement(root_node, 'otus', **{'id': 'tax', 'label': 'RootTaxaBlock'}) 240 241 # create trees 242 trees = ElementTree.SubElement(root_node, 'trees', **{'id':'Trees', 'label':'TreesBlockFromXML', 'otus': 'tax'}) 243 count = 0 244 tus = set() 245 for tree in self.trees: 246 this_tree = ElementTree.SubElement(trees, 'tree', **{'id':self.new_label('tree')}) 247 248 first_clade = tree.clade 249 tus.update(self._write_tree(first_clade, this_tree, rooted=tree.rooted)) 250 251 count += 1 252 253 # create OTUs 254 for tu in tus: 255 otu = ElementTree.SubElement(otus, 'otu', **{'id':tu}) 256 257 # write XML document to file handle 258 #xml_doc = ElementTree.ElementTree(root_node) 259 #xml_doc.write(handle, 260 # xml_declaration=True, encoding='utf-8', 261 # method='xml') 262 263 # use xml.dom.minodom for pretty printing 264 rough_string = ElementTree.tostring(root_node, 'utf-8') 265 reparsed = minidom.parseString(rough_string) 266 try: 267 handle.write(reparsed.toprettyxml(indent=" ")) 268 except TypeError: 269 # for compatibility with Python 3 270 handle.write(bytes(reparsed.toprettyxml(indent=" "), 'utf8')) 271 272 return count
273
274 - def _write_tree(self, clade, tree, parent=None, rooted=False):
275 '''Recursively process tree, adding nodes and edges to Tree object. 276 Returns a set of all OTUs encountered.''' 277 tus = set() 278 279 convert_uri = cdao_to_obo if self.cdao_to_obo else (lambda s: s) 280 281 node_id = self.new_label('node') 282 clade.node_id = node_id 283 attrib={'id':node_id, 'label':node_id} 284 root = rooted and parent is None 285 if root: 286 attrib['root'] = 'true' 287 if clade.name: 288 tus.add(clade.name) 289 attrib['otu'] = clade.name 290 node = ElementTree.SubElement(tree, 'node', **attrib) 291 292 if not parent is None: 293 edge_id = self.new_label('edge') 294 attrib={ 295 'id': edge_id, 'source': parent.node_id, 'target': node_id, 296 'length': str(clade.branch_length), 297 'typeof': convert_uri('cdao:Edge'), 298 } 299 if hasattr(clade, 'confidence') and not clade.confidence is None: 300 attrib.update({ 301 'property': convert_uri('cdao:has_Support_Value'), 302 'datatype': 'xsd:float', 303 'content': '%1.2f' % clade.confidence, 304 }) 305 node = ElementTree.SubElement(tree, 'edge', **attrib) 306 307 if not clade.is_terminal(): 308 for new_clade in clade.clades: 309 tus.update(self._write_tree(new_clade, tree, parent=clade)) 310 311 del clade.node_id 312 313 return tus
314