1
2
3
4
5
6 """I/O function wrappers for phylogenetic tree formats.
7
8 This API follows the same semantics as Biopython's `SeqIO` and `AlignIO`.
9 """
10
11
12 from __future__ import with_statement
13 __docformat__ = "restructuredtext en"
14
15 from Bio import File
16 from Bio.Phylo import BaseTree, NewickIO, NexusIO, PhyloXMLIO
17
18
19 supported_formats = {
20 'newick': NewickIO,
21 'nexus': NexusIO,
22 'phyloxml': PhyloXMLIO,
23 }
24
25
26 -def parse(file, format, **kwargs):
27 """Iteratively parse a file and return each of the trees it contains.
28
29 If a file only contains one tree, this still returns an iterable object that
30 contains one element.
31
32 Example
33 -------
34
35 >>> trees = parse('../../Tests/PhyloXML/apaf.xml', 'phyloxml')
36 >>> for tree in trees:
37 ... print tree.rooted
38 True
39 """
40 with File.as_handle(file, 'r') as fp:
41 for tree in getattr(supported_formats[format], 'parse')(fp, **kwargs):
42 yield tree
43
44
45 -def read(file, format, **kwargs):
46 """Parse a file in the given format and return a single tree.
47
48 Raises a `ValueError` if there are zero or multiple trees -- if this occurs,
49 use `parse` instead to get the complete sequence of trees.
50 """
51 try:
52 tree_gen = parse(file, format, **kwargs)
53 tree = tree_gen.next()
54 except StopIteration:
55 raise ValueError("There are no trees in this file.")
56 try:
57 tree_gen.next()
58 except StopIteration:
59 return tree
60 else:
61 raise ValueError(
62 "There are multiple trees in this file; use parse() instead.")
63
64
65 -def write(trees, file, format, **kwargs):
66 """Write a sequence of trees to file in the given format."""
67 if isinstance(trees, BaseTree.Tree) or isinstance(trees, BaseTree.Clade):
68
69 trees = [trees]
70 with File.as_handle(file, 'w+') as fp:
71 n = getattr(supported_formats[format], 'write')(trees, fp, **kwargs)
72 return n
73
74
75 -def convert(in_file, in_format, out_file, out_format, **kwargs):
76 """Convert between two tree file formats."""
77 trees = parse(in_file, in_format)
78 return write(trees, out_file, out_format, **kwargs)
79