Package Bio :: Package Phylo :: Module PhyloXMLIO
[hide private]
[frames] | no frames]

Source Code for Module Bio.Phylo.PhyloXMLIO

  1  # Copyright (C) 2009 by Eric Talevich (eric.talevich@gmail.com) 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license. Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  """PhyloXML reader/parser, writer, and associated functions. 
  7   
  8  Instantiates tree elements from a parsed PhyloXML file, and constructs an XML 
  9  file from a `Bio.Phylo.PhyloXML` object. 
 10   
 11  About capitalization: 
 12   - phyloXML means the file format specification 
 13   - PhyloXML means the Biopython module `Bio.Phylo.PhyloXML` and its classes 
 14   - Phyloxml means the top-level class used by `PhyloXMLIO.read` (but not 
 15     `Bio.Phylo.read`!), containing a list of Phylogenies (objects derived from 
 16     `BaseTree.Tree`) 
 17   
 18  """ 
 19   
 20  import sys 
 21   
 22  from Bio._py3k import basestring 
 23  from Bio._py3k import unicode 
 24   
 25  from Bio.Phylo import PhyloXML as PX 
 26   
 27  # For speed try to use cElementTree rather than ElementTree 
 28  try: 
 29      if (3, 0) <= sys.version_info[:2] <= (3, 1): 
 30          # Workaround for bug in python 3.0 and 3.1, 
 31          # see http://bugs.python.org/issue9257 
 32          from xml.etree import ElementTree as ElementTree 
 33      else: 
 34          from xml.etree import cElementTree as ElementTree 
 35  except ImportError: 
 36      from xml.etree import ElementTree as ElementTree 
 37   
 38   
 39  # Recognize the phyloXML namespace when parsing 
 40  # See http://effbot.org/zone/element-namespaces.htm 
 41  NAMESPACES = { 
 42      'phy': 'http://www.phyloxml.org', 
 43  } 
 44   
 45  try: 
 46      register_namespace = ElementTree.register_namespace 
 47  except AttributeError: 
 48      if not hasattr(ElementTree, '_namespace_map'): 
 49          # cElementTree needs the pure-Python xml.etree.ElementTree 
 50          from xml.etree import ElementTree as ET_py 
 51          ElementTree._namespace_map = ET_py._namespace_map 
 52   
53 - def register_namespace(prefix, uri):
54 ElementTree._namespace_map[uri] = prefix
55 56 for prefix, uri in NAMESPACES.items(): 57 register_namespace(prefix, uri) 58 59 # Tell ElementTree how to write to text handles 60 DEFAULT_ENCODING = ("unicode" if sys.version_info[0] >= 3 else "utf-8") 61 62
63 -class PhyloXMLError(Exception):
64 """Exception raised when PhyloXML object construction cannot continue. 65 66 XML syntax errors will be found and raised by the underlying ElementTree 67 module; this exception is for valid XML that breaks the phyloXML 68 specification. 69 """ 70 71 pass
72 73 74 # --------------------------------------------------------- 75 # Public API 76
77 -def read(file):
78 """Parse a phyloXML file or stream and build a tree of Biopython objects. 79 80 The children of the root node are phylogenies and possibly other arbitrary 81 (non-phyloXML) objects. 82 83 :returns: a single `Bio.Phylo.PhyloXML.Phyloxml` object. 84 85 """ 86 return Parser(file).read()
87 88
89 -def parse(file):
90 """Iterate over the phylogenetic trees in a phyloXML file. 91 92 This ignores any additional data stored at the top level, but may be more 93 memory-efficient than the `read` function. 94 95 :returns: a generator of `Bio.Phylo.PhyloXML.Phylogeny` objects. 96 97 """ 98 return Parser(file).parse()
99 100
101 -def write(obj, file, encoding=DEFAULT_ENCODING, indent=True):
102 """Write a phyloXML file. 103 104 :Parameters: 105 obj 106 an instance of `Phyloxml`, `Phylogeny` or `BaseTree.Tree`, or an 107 iterable of either of the latter two. The object will be converted 108 to a Phyloxml object before serialization. 109 file 110 either an open handle or a file name. 111 112 """ 113 def fix_single(tree): 114 if isinstance(tree, PX.Phylogeny): 115 return tree 116 if isinstance(tree, PX.Clade): 117 return tree.to_phylogeny() 118 if isinstance(tree, PX.BaseTree.Tree): 119 return PX.Phylogeny.from_tree(tree) 120 if isinstance(tree, PX.BaseTree.Clade): 121 return PX.Phylogeny.from_tree(PX.BaseTree.Tree(root=tree)) 122 else: 123 raise ValueError("iterable must contain Tree or Clade types")
124 125 if isinstance(obj, PX.Phyloxml): 126 pass 127 elif isinstance(obj, (PX.BaseTree.Tree, PX.BaseTree.Clade)): 128 obj = fix_single(obj).to_phyloxml() 129 elif hasattr(obj, '__iter__'): 130 obj = PX.Phyloxml({}, phylogenies=(fix_single(t) for t in obj)) 131 else: 132 raise ValueError("First argument must be a Phyloxml, Phylogeny, " 133 "Tree, or iterable of Trees or Phylogenies.") 134 return Writer(obj).write(file, encoding=encoding, indent=indent) 135 136 137 # --------------------------------------------------------- 138 # Functions I wish ElementTree had 139
140 -def _local(tag):
141 """Extract the local tag from a namespaced tag name (PRIVATE).""" 142 if tag[0] == '{': 143 return tag[tag.index('}') + 1:] 144 return tag
145 146
147 -def _split_namespace(tag):
148 """Split a tag into namespace and local tag strings (PRIVATE).""" 149 try: 150 return tag[1:].split('}', 1) 151 except ValueError: 152 return ('', tag)
153 154
155 -def _ns(tag, namespace=NAMESPACES['phy']):
156 """Format an XML tag with the given namespace (PRIVATE).""" 157 return '{%s}%s' % (namespace, tag)
158 159
160 -def _get_child_as(parent, tag, construct):
161 """Find a child node by tag, and pass it through a constructor (PRIVATE). 162 163 Returns None if no matching child is found. 164 """ 165 child = parent.find(_ns(tag)) 166 if child is not None: 167 return construct(child)
168 169
170 -def _get_child_text(parent, tag, construct=unicode):
171 """Find a child node by tag; pass its text through a constructor (PRIVATE). 172 173 Returns None if no matching child is found. 174 """ 175 child = parent.find(_ns(tag)) 176 if child is not None and child.text: 177 return construct(child.text)
178 179
180 -def _get_children_as(parent, tag, construct):
181 """Find child nodes by tag; pass each through a constructor (PRIVATE). 182 183 Returns an empty list if no matching child is found. 184 """ 185 return [construct(child) for child in 186 parent.findall(_ns(tag))]
187 188
189 -def _get_children_text(parent, tag, construct=unicode):
190 """Find child nodes by tag; pass each node's text through a constructor (PRIVATE). 191 192 Returns an empty list if no matching child is found. 193 """ 194 return [construct(child.text) for child in 195 parent.findall(_ns(tag)) 196 if child.text]
197 198
199 -def _indent(elem, level=0):
200 """Add line breaks and indentation to ElementTree in-place (PRIVATE). 201 202 Sources: 203 - http://effbot.org/zone/element-lib.htm#prettyprint 204 - http://infix.se/2007/02/06/gentlemen-indent-your-xml 205 206 """ 207 i = "\n" + level * " " 208 if len(elem): 209 if not elem.text or not elem.text.strip(): 210 elem.text = i + " " 211 for e in elem: 212 _indent(e, level + 1) 213 if not e.tail or not e.tail.strip(): 214 e.tail = i + " " 215 if not e.tail or not e.tail.strip(): 216 e.tail = i 217 else: 218 if level and (not elem.tail or not elem.tail.strip()): 219 elem.tail = i
220 221 # --------------------------------------------------------- 222 # INPUT 223 # --------------------------------------------------------- 224 225
226 -def _str2bool(text):
227 if text == 'true' or text == '1': 228 return True 229 if text == 'false' or text == '0': 230 return False 231 raise ValueError('String could not be converted to boolean: ' + text)
232 233
234 -def _dict_str2bool(dct, keys):
235 out = dct.copy() 236 for key in keys: 237 if key in out: 238 out[key] = _str2bool(out[key]) 239 return out
240 241
242 -def _int(text):
243 if text is not None: 244 try: 245 return int(text) 246 except Exception: 247 return None
248 249
250 -def _float(text):
251 if text is not None: 252 try: 253 return float(text) 254 except Exception: 255 return None
256 257
258 -def _collapse_wspace(text):
259 """Replace all spans of whitespace with a single space character (PRIVATE). 260 261 Also remove leading and trailing whitespace. See "Collapse Whitespace 262 Policy" in the phyloXML spec glossary: 263 http://phyloxml.org/documentation/version_100/phyloxml.xsd.html#Glossary 264 """ 265 if text is not None: 266 return ' '.join(text.split())
267 268 269 # NB: Not currently used
270 -def _replace_wspace(text):
271 """Replace tab, LF and CR characters with spaces, but don't collapse (PRIVATE). 272 273 See "Replace Whitespace Policy" in the phyloXML spec glossary: 274 http://phyloxml.org/documentation/version_100/phyloxml.xsd.html#Glossary 275 """ 276 for char in ('\t', '\n', '\r'): 277 if char in text: 278 text = text.replace(char, ' ') 279 return text
280 281
282 -class Parser(object):
283 """Methods for parsing all phyloXML nodes from an XML stream. 284 285 To minimize memory use, the tree of ElementTree parsing events is cleared 286 after completing each phylogeny, clade, and top-level 'other' element. 287 Elements below the clade level are kept in memory until parsing of the 288 current clade is finished -- this shouldn't be a problem because clade is 289 the only recursive element, and non-clade nodes below this level are of 290 bounded size. 291 """ 292
293 - def __init__(self, file):
294 """Initialize the class.""" 295 # Get an iterable context for XML parsing events 296 context = iter(ElementTree.iterparse(file, events=('start', 'end'))) 297 event, root = next(context) 298 self.root = root 299 self.context = context
300
301 - def read(self):
302 """Parse the phyloXML file and create a single Phyloxml object.""" 303 phyloxml = PX.Phyloxml(dict((_local(key), val) 304 for key, val in self.root.items())) 305 other_depth = 0 306 for event, elem in self.context: 307 namespace, localtag = _split_namespace(elem.tag) 308 if event == 'start': 309 if namespace != NAMESPACES['phy']: 310 other_depth += 1 311 continue 312 if localtag == 'phylogeny': 313 phylogeny = self._parse_phylogeny(elem) 314 phyloxml.phylogenies.append(phylogeny) 315 if event == 'end' and namespace != NAMESPACES['phy']: 316 # Deal with items not specified by phyloXML 317 other_depth -= 1 318 if other_depth == 0: 319 # We're directly under the root node -- evaluate 320 otr = self.other(elem, namespace, localtag) 321 phyloxml.other.append(otr) 322 self.root.clear() 323 return phyloxml
324
325 - def parse(self):
326 """Parse the phyloXML file incrementally and return each phylogeny.""" 327 phytag = _ns('phylogeny') 328 for event, elem in self.context: 329 if event == 'start' and elem.tag == phytag: 330 yield self._parse_phylogeny(elem)
331 332 # Special parsing cases -- incremental, using self.context 333
334 - def _parse_phylogeny(self, parent):
335 """Parse a single phylogeny within the phyloXML tree (PRIVATE). 336 337 Recursively builds a phylogenetic tree with help from parse_clade, then 338 clears the XML event history for the phylogeny element and returns 339 control to the top-level parsing function. 340 """ 341 phylogeny = PX.Phylogeny(**_dict_str2bool(parent.attrib, 342 ['rooted', 'rerootable'])) 343 list_types = { 344 # XML tag, plural attribute 345 'confidence': 'confidences', 346 'property': 'properties', 347 'clade_relation': 'clade_relations', 348 'sequence_relation': 'sequence_relations', 349 } 350 for event, elem in self.context: 351 namespace, tag = _split_namespace(elem.tag) 352 if event == 'start' and tag == 'clade': 353 assert phylogeny.root is None, \ 354 "Phylogeny object should only have 1 clade" 355 phylogeny.root = self._parse_clade(elem) 356 continue 357 if event == 'end': 358 if tag == 'phylogeny': 359 parent.clear() 360 break 361 # Handle the other non-recursive children 362 if tag in list_types: 363 getattr(phylogeny, list_types[tag]).append( 364 getattr(self, tag)(elem)) 365 # Complex types 366 elif tag in ('date', 'id'): 367 setattr(phylogeny, tag, getattr(self, tag)(elem)) 368 # Simple types 369 elif tag in ('name', 'description'): 370 setattr(phylogeny, tag, _collapse_wspace(elem.text)) 371 # Unknown tags 372 elif namespace != NAMESPACES['phy']: 373 phylogeny.other.append(self.other(elem, namespace, tag)) 374 parent.clear() 375 else: 376 # NB: This shouldn't happen in valid files 377 raise PhyloXMLError('Misidentified tag: ' + tag) 378 return phylogeny
379 380 _clade_complex_types = ['color', 'events', 'binary_characters', 'date'] 381 _clade_list_types = { 382 'confidence': 'confidences', 383 'distribution': 'distributions', 384 'reference': 'references', 385 'property': 'properties', 386 } 387 _clade_tracked_tags = set(_clade_complex_types).union(_clade_list_types.keys()).union( 388 ['branch_length', 'name', 'node_id', 'width']) 389
390 - def _parse_clade(self, parent):
391 """Parse a Clade node and its children, recursively (PRIVATE).""" 392 clade = PX.Clade(**parent.attrib) 393 if clade.branch_length is not None: 394 clade.branch_length = float(clade.branch_length) 395 # NB: Only evaluate nodes at the current level 396 tag_stack = [] 397 for event, elem in self.context: 398 namespace, tag = _split_namespace(elem.tag) 399 if event == 'start': 400 if tag == 'clade': 401 clade.clades.append(self._parse_clade(elem)) 402 continue 403 if tag == 'taxonomy': 404 clade.taxonomies.append(self._parse_taxonomy(elem)) 405 continue 406 if tag == 'sequence': 407 clade.sequences.append(self._parse_sequence(elem)) 408 continue 409 if tag in self._clade_tracked_tags: 410 tag_stack.append(tag) 411 if event == 'end': 412 if tag == 'clade': 413 elem.clear() 414 break 415 if tag != tag_stack[-1]: 416 continue 417 tag_stack.pop() 418 # Handle the other non-recursive children 419 if tag in self._clade_list_types: 420 getattr(clade, self._clade_list_types[tag]).append( 421 getattr(self, tag)(elem)) 422 elif tag in self._clade_complex_types: 423 setattr(clade, tag, getattr(self, tag)(elem)) 424 elif tag == 'branch_length': 425 # NB: possible collision with the attribute 426 if clade.branch_length is not None: 427 raise PhyloXMLError( 428 'Attribute branch_length was already set ' 429 'for this Clade.') 430 clade.branch_length = _float(elem.text) 431 elif tag == 'width': 432 clade.width = _float(elem.text) 433 elif tag == 'name': 434 clade.name = _collapse_wspace(elem.text) 435 elif tag == 'node_id': 436 clade.node_id = PX.Id(elem.text.strip(), 437 elem.attrib.get('provider')) 438 elif namespace != NAMESPACES['phy']: 439 clade.other.append(self.other(elem, namespace, tag)) 440 elem.clear() 441 else: 442 raise PhyloXMLError('Misidentified tag: ' + tag) 443 return clade
444
445 - def _parse_sequence(self, parent):
446 sequence = PX.Sequence(**parent.attrib) 447 for event, elem in self.context: 448 namespace, tag = _split_namespace(elem.tag) 449 if event == 'end': 450 if tag == 'sequence': 451 parent.clear() 452 break 453 if tag in ('accession', 'mol_seq', 'uri', 454 'domain_architecture'): 455 setattr(sequence, tag, getattr(self, tag)(elem)) 456 elif tag == 'annotation': 457 sequence.annotations.append(self.annotation(elem)) 458 elif tag == 'name': 459 sequence.name = _collapse_wspace(elem.text) 460 elif tag in ('symbol', 'location'): 461 setattr(sequence, tag, elem.text) 462 elif namespace != NAMESPACES['phy']: 463 sequence.other.append(self.other(elem, namespace, tag)) 464 parent.clear() 465 return sequence
466
467 - def _parse_taxonomy(self, parent):
468 taxonomy = PX.Taxonomy(**parent.attrib) 469 for event, elem in self.context: 470 namespace, tag = _split_namespace(elem.tag) 471 if event == 'end': 472 if tag == 'taxonomy': 473 parent.clear() 474 break 475 if tag in ('id', 'uri'): 476 setattr(taxonomy, tag, getattr(self, tag)(elem)) 477 elif tag == 'common_name': 478 taxonomy.common_names.append(_collapse_wspace(elem.text)) 479 elif tag == 'synonym': 480 taxonomy.synonyms.append(elem.text) 481 elif tag in ('code', 'scientific_name', 'authority', 'rank'): 482 # ENH: check_str on rank 483 setattr(taxonomy, tag, elem.text) 484 elif namespace != NAMESPACES['phy']: 485 taxonomy.other.append(self.other(elem, namespace, tag)) 486 parent.clear() 487 return taxonomy
488
489 - def other(self, elem, namespace, localtag):
490 return PX.Other(localtag, namespace, elem.attrib, 491 value=elem.text and elem.text.strip() or None, 492 children=[self.other(child, *_split_namespace(child.tag)) 493 for child in elem])
494 495 # Complex types 496
497 - def accession(self, elem):
498 return PX.Accession(elem.text.strip(), elem.get('source'))
499
500 - def annotation(self, elem):
501 return PX.Annotation( 502 desc=_collapse_wspace(_get_child_text(elem, 'desc')), 503 confidence=_get_child_as(elem, 'confidence', self.confidence), 504 properties=_get_children_as(elem, 'property', self.property), 505 uri=_get_child_as(elem, 'uri', self.uri), 506 **elem.attrib)
507
508 - def binary_characters(self, elem):
509 def bc_getter(elem): 510 return _get_children_text(elem, 'bc')
511 return PX.BinaryCharacters( 512 type=elem.get('type'), 513 gained_count=_int(elem.get('gained_count')), 514 lost_count=_int(elem.get('lost_count')), 515 present_count=_int(elem.get('present_count')), 516 absent_count=_int(elem.get('absent_count')), 517 # Flatten BinaryCharacterList sub-nodes into lists of strings 518 gained=_get_child_as(elem, 'gained', bc_getter), 519 lost=_get_child_as(elem, 'lost', bc_getter), 520 present=_get_child_as(elem, 'present', bc_getter), 521 absent=_get_child_as(elem, 'absent', bc_getter))
522
523 - def clade_relation(self, elem):
524 return PX.CladeRelation( 525 elem.get('type'), elem.get('id_ref_0'), elem.get('id_ref_1'), 526 distance=elem.get('distance'), 527 confidence=_get_child_as(elem, 'confidence', self.confidence))
528
529 - def color(self, elem):
530 red, green, blue = (_get_child_text(elem, color, int) for color in 531 ('red', 'green', 'blue')) 532 return PX.BranchColor(red, green, blue)
533
534 - def confidence(self, elem):
535 return PX.Confidence( 536 _float(elem.text), 537 elem.get('type'))
538
539 - def date(self, elem):
540 return PX.Date( 541 unit=elem.get('unit'), 542 desc=_collapse_wspace(_get_child_text(elem, 'desc')), 543 value=_get_child_text(elem, 'value', float), 544 minimum=_get_child_text(elem, 'minimum', float), 545 maximum=_get_child_text(elem, 'maximum', float), 546 )
547
548 - def distribution(self, elem):
549 return PX.Distribution( 550 desc=_collapse_wspace(_get_child_text(elem, 'desc')), 551 points=_get_children_as(elem, 'point', self.point), 552 polygons=_get_children_as(elem, 'polygon', self.polygon))
553
554 - def domain(self, elem):
555 return PX.ProteinDomain(elem.text.strip(), 556 int(elem.get('from')) - 1, 557 int(elem.get('to')), 558 confidence=_float(elem.get('confidence')), 559 id=elem.get('id'))
560
561 - def domain_architecture(self, elem):
562 return PX.DomainArchitecture( 563 length=int(elem.get('length')), 564 domains=_get_children_as(elem, 'domain', self.domain))
565
566 - def events(self, elem):
567 return PX.Events( 568 type=_get_child_text(elem, 'type'), 569 duplications=_get_child_text(elem, 'duplications', int), 570 speciations=_get_child_text(elem, 'speciations', int), 571 losses=_get_child_text(elem, 'losses', int), 572 confidence=_get_child_as(elem, 'confidence', self.confidence))
573
574 - def id(self, elem):
575 provider = elem.get('provider') or elem.get('type') 576 return PX.Id(elem.text.strip(), provider)
577
578 - def mol_seq(self, elem):
579 is_aligned = elem.get('is_aligned') 580 if is_aligned is not None: 581 is_aligned = _str2bool(is_aligned) 582 return PX.MolSeq(elem.text.strip(), is_aligned=is_aligned)
583
584 - def point(self, elem):
585 return PX.Point( 586 elem.get('geodetic_datum'), 587 _get_child_text(elem, 'lat', float), 588 _get_child_text(elem, 'long', float), 589 alt=_get_child_text(elem, 'alt', float), 590 alt_unit=elem.get('alt_unit'))
591
592 - def polygon(self, elem):
593 return PX.Polygon( 594 points=_get_children_as(elem, 'point', self.point))
595
596 - def property(self, elem):
597 return PX.Property(elem.text.strip(), 598 elem.get('ref'), 599 elem.get('applies_to'), 600 elem.get('datatype'), 601 unit=elem.get('unit'), 602 id_ref=elem.get('id_ref'))
603
604 - def reference(self, elem):
605 return PX.Reference( 606 doi=elem.get('doi'), 607 desc=_get_child_text(elem, 'desc'))
608
609 - def sequence_relation(self, elem):
610 return PX.SequenceRelation( 611 elem.get('type'), elem.get('id_ref_0'), elem.get('id_ref_1'), 612 distance=_float(elem.get('distance')), 613 confidence=_get_child_as(elem, 'confidence', self.confidence))
614
615 - def uri(self, elem):
616 return PX.Uri(elem.text.strip(), 617 desc=_collapse_wspace(elem.get('desc')), 618 type=elem.get('type'))
619 620 621 # --------------------------------------------------------- 622 # OUTPUT 623 # --------------------------------------------------------- 624
625 -def _serialize(value):
626 """Convert a Python primitive to a phyloXML-compatible Unicode string (PRIVATE).""" 627 if isinstance(value, float): 628 return unicode(value).upper() 629 elif isinstance(value, bool): 630 return unicode(value).lower() 631 return unicode(value)
632 633
634 -def _clean_attrib(obj, attrs):
635 """Create a dictionary from an object's specified, non-None attributes (PRIVATE).""" 636 out = {} 637 for key in attrs: 638 val = getattr(obj, key) 639 if val is not None: 640 out[key] = _serialize(val) 641 return out
642 643
644 -def _handle_complex(tag, attribs, subnodes, has_text=False):
645 def wrapped(self, obj): 646 elem = ElementTree.Element(tag, _clean_attrib(obj, attribs)) 647 for subn in subnodes: 648 if isinstance(subn, basestring): 649 # singular object: method and attribute names are the same 650 if getattr(obj, subn) is not None: 651 elem.append(getattr(self, subn)(getattr(obj, subn))) 652 else: 653 # list: singular method, pluralized attribute name 654 method, plural = subn 655 for item in getattr(obj, plural): 656 elem.append(getattr(self, method)(item)) 657 if has_text: 658 elem.text = _serialize(obj.value) 659 return elem
660 wrapped.__doc__ = "Serialize a %s and its subnodes, in order." % tag 661 return wrapped 662 663
664 -def _handle_simple(tag):
665 def wrapped(self, obj): 666 elem = ElementTree.Element(tag) 667 elem.text = _serialize(obj) 668 return elem
669 wrapped.__doc__ = "Serialize a simple %s node." % tag 670 return wrapped 671 672
673 -class Writer(object):
674 """Methods for serializing a PhyloXML object to XML.""" 675
676 - def __init__(self, phyloxml):
677 """Build an ElementTree from a PhyloXML object.""" 678 assert isinstance(phyloxml, PX.Phyloxml), "Not a Phyloxml object" 679 self._tree = ElementTree.ElementTree(self.phyloxml(phyloxml))
680
681 - def write(self, file, encoding=DEFAULT_ENCODING, indent=True):
682 if indent: 683 _indent(self._tree.getroot()) 684 self._tree.write(file, encoding) 685 return len(self._tree.getroot())
686 687 # Convert classes to ETree elements 688
689 - def phyloxml(self, obj):
690 elem = ElementTree.Element('phyloxml', obj.attributes) # Namespaces 691 for tree in obj.phylogenies: 692 elem.append(self.phylogeny(tree)) 693 for otr in obj.other: 694 elem.append(self.other(otr)) 695 return elem
696
697 - def other(self, obj):
698 elem = ElementTree.Element(_ns(obj.tag, obj.namespace), obj.attributes) 699 elem.text = obj.value 700 for child in obj.children: 701 elem.append(self.other(child)) 702 return elem
703 704 phylogeny = _handle_complex('phylogeny', 705 ('rooted', 'rerootable', 706 'branch_length_unit', 'type'), 707 ('name', 708 'id', 709 'description', 710 'date', 711 ('confidence', 'confidences'), 712 'clade', 713 ('clade_relation', 'clade_relations'), 714 ('sequence_relation', 715 'sequence_relations'), 716 ('property', 'properties'), 717 ('other', 'other'), 718 )) 719 720 clade = _handle_complex('clade', ('id_source',), 721 ('name', 722 'branch_length', 723 ('confidence', 'confidences'), 724 'width', 725 'color', 726 'node_id', 727 ('taxonomy', 'taxonomies'), 728 ('sequence', 'sequences'), 729 'events', 730 'binary_characters', 731 ('distribution', 'distributions'), 732 'date', 733 ('reference', 'references'), 734 ('property', 'properties'), 735 ('clade', 'clades'), 736 ('other', 'other'), 737 )) 738 739 accession = _handle_complex('accession', ('source',), 740 (), has_text=True) 741 742 annotation = _handle_complex('annotation', 743 ('ref', 'source', 'evidence', 'type'), 744 ('desc', 745 'confidence', 746 ('property', 'properties'), 747 'uri', 748 )) 749
750 - def binary_characters(self, obj):
751 """Serialize a binary_characters node and its subnodes.""" 752 elem = ElementTree.Element('binary_characters', 753 _clean_attrib(obj, 754 ('type', 'gained_count', 'lost_count', 755 'present_count', 'absent_count'))) 756 for subn in ('gained', 'lost', 'present', 'absent'): 757 subelem = ElementTree.Element(subn) 758 for token in getattr(obj, subn): 759 subelem.append(self.bc(token)) 760 elem.append(subelem) 761 return elem
762 763 clade_relation = _handle_complex('clade_relation', 764 ('id_ref_0', 'id_ref_1', 765 'distance', 'type'), 766 ('confidence',)) 767 768 color = _handle_complex('color', (), ('red', 'green', 'blue')) 769 770 confidence = _handle_complex('confidence', ('type',), 771 (), has_text=True) 772 773 date = _handle_complex('date', ('unit',), 774 ('desc', 'value', 'minimum', 'maximum')) 775 776 distribution = _handle_complex('distribution', (), 777 ('desc', 778 ('point', 'points'), 779 ('polygon', 'polygons'), 780 )) 781
782 - def domain(self, obj):
783 """Serialize a domain node.""" 784 elem = ElementTree.Element('domain', 785 {'from': str(obj.start + 1), 'to': str(obj.end)}) 786 if obj.confidence is not None: 787 elem.set('confidence', _serialize(obj.confidence)) 788 if obj.id is not None: 789 elem.set('id', obj.id) 790 elem.text = _serialize(obj.value) 791 return elem
792 793 domain_architecture = _handle_complex('domain_architecture', 794 ('length',), 795 (('domain', 'domains'),)) 796 797 events = _handle_complex('events', (), 798 ('type', 799 'duplications', 800 'speciations', 801 'losses', 802 'confidence', 803 )) 804 805 id = _handle_complex('id', ('provider',), (), has_text=True) 806 807 mol_seq = _handle_complex('mol_seq', ('is_aligned',), 808 (), has_text=True) 809 810 node_id = _handle_complex('node_id', ('provider',), (), has_text=True) 811 812 point = _handle_complex('point', ('geodetic_datum', 'alt_unit'), 813 ('lat', 'long', 'alt')) 814 815 polygon = _handle_complex('polygon', (), (('point', 'points'),)) 816 817 property = _handle_complex('property', 818 ('ref', 'unit', 'datatype', 819 'applies_to', 'id_ref'), 820 (), has_text=True) 821 822 reference = _handle_complex('reference', ('doi',), ('desc',)) 823 824 sequence = _handle_complex('sequence', 825 ('type', 'id_ref', 'id_source'), 826 ('symbol', 827 'accession', 828 'name', 829 'location', 830 'mol_seq', 831 'uri', 832 ('annotation', 'annotations'), 833 'domain_architecture', 834 ('other', 'other'), 835 )) 836 837 sequence_relation = _handle_complex('sequence_relation', 838 ('id_ref_0', 'id_ref_1', 839 'distance', 'type'), 840 ('confidence',)) 841 842 taxonomy = _handle_complex('taxonomy', 843 ('id_source',), 844 ('id', 845 'code', 846 'scientific_name', 847 'authority', 848 ('common_name', 'common_names'), 849 ('synonym', 'synonyms'), 850 'rank', 851 'uri', 852 ('other', 'other'), 853 )) 854 855 uri = _handle_complex('uri', ('desc', 'type'), (), has_text=True) 856 857 # Primitive types 858 859 # Floating point 860 alt = _handle_simple('alt') 861 branch_length = _handle_simple('branch_length') 862 lat = _handle_simple('lat') 863 long = _handle_simple('long') 864 maximum = _handle_simple('maximum') 865 minimum = _handle_simple('minimum') 866 value = _handle_simple('value') 867 width = _handle_simple('width') 868 869 # Integers 870 blue = _handle_simple('blue') 871 duplications = _handle_simple('duplications') 872 green = _handle_simple('green') 873 losses = _handle_simple('losses') 874 red = _handle_simple('red') 875 speciations = _handle_simple('speciations') 876 877 # Strings 878 bc = _handle_simple('bc') 879 code = _handle_simple('code') 880 common_name = _handle_simple('common_name') 881 desc = _handle_simple('desc') 882 description = _handle_simple('description') 883 location = _handle_simple('location') 884 name = _handle_simple('name') 885 rank = _handle_simple('rank') 886 scientific_name = _handle_simple('scientific_name') 887 symbol = _handle_simple('symbol') 888 synonym = _handle_simple('synonym') 889 type = _handle_simple('type')
890