1
2
3
4
5
6 """Utilities for handling, displaying and exporting Phylo trees.
7
8 Third-party libraries are loaded when the corresponding function is called.
9 """
10 __docformat__ = "restructuredtext en"
11
12 import math
13 import sys
14
15
17 """Convert a Tree object to a networkx graph.
18
19 The result is useful for graph-oriented analysis, and also interactive
20 plotting with pylab, matplotlib or pygraphviz, though the resulting diagram
21 is usually not ideal for displaying a phylogeny.
22
23 Requires NetworkX version 0.99 or later.
24 """
25 try:
26 import networkx
27 except ImportError:
28 from Bio import MissingPythonDependencyError
29 raise MissingPythonDependencyError(
30 "Install NetworkX if you want to use to_networkx.")
31
32
33
34
35
36
37 if networkx.__version__ >= '1.0':
38 def add_edge(graph, n1, n2):
39 graph.add_edge(n1, n2, weight=n2.branch_length or 1.0)
40
41 if hasattr(n2, 'color') and n2.color is not None:
42 graph[n1][n2]['color'] = n2.color.to_hex()
43 elif hasattr(n1, 'color') and n1.color is not None:
44
45 graph[n1][n2]['color'] = n1.color.to_hex()
46 n2.color = n1.color
47
48 if hasattr(n2, 'width') and n2.width is not None:
49 graph[n1][n2]['width'] = n2.width
50 elif hasattr(n1, 'width') and n1.width is not None:
51
52 graph[n1][n2]['width'] = n1.width
53 n2.width = n1.width
54 elif networkx.__version__ >= '0.99':
55 def add_edge(graph, n1, n2):
56 graph.add_edge(n1, n2, (n2.branch_length or 1.0))
57 else:
58 def add_edge(graph, n1, n2):
59 graph.add_edge(n1, n2)
60
61 def build_subgraph(graph, top):
62 """Walk down the Tree, building graphs, edges and nodes."""
63 for clade in top:
64 graph.add_node(clade.root)
65 add_edge(graph, top.root, clade.root)
66 build_subgraph(graph, clade)
67
68 if tree.rooted:
69 G = networkx.DiGraph()
70 else:
71 G = networkx.Graph()
72 G.add_node(tree.root)
73 build_subgraph(G, tree.root)
74 return G
75
76
77 -def draw_graphviz(tree, label_func=str, prog='twopi', args='',
78 node_color='#c0deff', **kwargs):
79 """Display a tree or clade as a graph, using the graphviz engine.
80
81 Requires NetworkX, matplotlib, Graphviz and either PyGraphviz or pydot.
82
83 The third and fourth parameters apply to Graphviz, and the remaining
84 arbitrary keyword arguments are passed directly to networkx.draw(), which
85 in turn mostly wraps matplotlib/pylab. See the documentation for Graphviz
86 and networkx for detailed explanations.
87
88 The NetworkX/matplotlib parameters are described in the docstrings for
89 networkx.draw() and pylab.scatter(), but the most reasonable options to try
90 are: *alpha, node_color, node_size, node_shape, edge_color, style,
91 font_size, font_color, font_weight, font_family*
92
93 :Parameters:
94
95 label_func : callable
96 A function to extract a label from a node. By default this is str(),
97 but you can use a different function to select another string
98 associated with each node. If this function returns None for a node,
99 no label will be shown for that node.
100
101 The label will also be silently skipped if the throws an exception
102 related to ordinary attribute access (LookupError, AttributeError,
103 ValueError); all other exception types will still be raised. This
104 means you can use a lambda expression that simply attempts to look
105 up the desired value without checking if the intermediate attributes
106 are available:
107
108 >>> Phylo.draw_graphviz(tree, lambda n: n.taxonomies[0].code)
109
110 prog : string
111 The Graphviz program to use when rendering the graph. 'twopi'
112 behaves the best for large graphs, reliably avoiding crossing edges,
113 but for moderate graphs 'neato' looks a bit nicer. For small
114 directed graphs, 'dot' may produce a normal-looking cladogram, but
115 will cross and distort edges in larger graphs. (The programs 'circo'
116 and 'fdp' are not recommended.)
117 args : string
118 Options passed to the external graphviz program. Normally not
119 needed, but offered here for completeness.
120
121 Example
122 -------
123
124 >>> import pylab
125 >>> from Bio import Phylo
126 >>> tree = Phylo.read('ex/apaf.xml', 'phyloxml')
127 >>> Phylo.draw_graphviz(tree)
128 >>> pylab.show()
129 >>> pylab.savefig('apaf.png')
130 """
131 try:
132 import networkx
133 except ImportError:
134 from Bio import MissingPythonDependencyError
135 raise MissingPythonDependencyError(
136 "Install NetworkX if you want to use to_networkx.")
137
138 G = to_networkx(tree)
139 Gi = networkx.convert_node_labels_to_integers(G, discard_old_labels=False)
140 try:
141 posi = networkx.graphviz_layout(Gi, prog, args=args)
142 except ImportError:
143 raise MissingPythonDependencyError(
144 "Install PyGraphviz or pydot if you want to use draw_graphviz.")
145
146 def get_label_mapping(G, selection):
147 for node in G.nodes():
148 if (selection is None) or (node in selection):
149 try:
150 label = label_func(node)
151 if label not in (None, node.__class__.__name__):
152 yield (node, label)
153 except (LookupError, AttributeError, ValueError):
154 pass
155
156 if 'nodelist' in kwargs:
157 labels = dict(get_label_mapping(G, set(kwargs['nodelist'])))
158 else:
159 labels = dict(get_label_mapping(G, None))
160 kwargs['nodelist'] = labels.keys()
161 if 'edge_color' not in kwargs:
162 kwargs['edge_color'] = [isinstance(e[2], dict) and
163 e[2].get('color', 'k') or 'k'
164 for e in G.edges(data=True)]
165 if 'width' not in kwargs:
166 kwargs['width'] = [isinstance(e[2], dict) and
167 e[2].get('width', 1.0) or 1.0
168 for e in G.edges(data=True)]
169
170 posn = dict((n, posi[Gi.node_labels[n]]) for n in G)
171 networkx.draw(G, posn, labels=labels, node_color=node_color, **kwargs)
172
173
174 -def draw_ascii(tree, file=sys.stdout, column_width=80):
175 """Draw an ascii-art phylogram of the given tree.
176
177 The printed result looks like::
178
179 _________ Orange
180 ______________|
181 | |______________ Tangerine
182 ______________|
183 | | _________________________ Grapefruit
184 _| |_________|
185 | |______________ Pummelo
186 |
187 |__________________________________ Apple
188
189
190 :Parameters:
191 file : file-like object
192 File handle opened for writing the output drawing.
193 column_width : int
194 Total number of text columns used by the drawing.
195 """
196 taxa = tree.get_terminals()
197
198 max_label_width = max(len(str(taxon)) for taxon in taxa)
199 drawing_width = column_width - max_label_width - 1
200 drawing_height = 2 * len(taxa) - 1
201
202 def get_col_positions(tree):
203 """Create a mapping of each clade to its column position."""
204 depths = tree.depths()
205
206 if not max(depths.itervalues()):
207 depths = tree.depths(unit_branch_lengths=True)
208
209 fudge_margin = int(math.ceil(math.log(len(taxa), 2)))
210 cols_per_branch_unit = ((drawing_width - fudge_margin)
211 / float(max(depths.itervalues())))
212 return dict((clade, int(round(blen*cols_per_branch_unit + 0.5)))
213 for clade, blen in depths.iteritems())
214
215 def get_row_positions(tree):
216 positions = dict((taxon, 2*idx) for idx, taxon in enumerate(taxa))
217
218 def calc_row(clade):
219 for subclade in clade:
220 if subclade not in positions:
221 calc_row(subclade)
222 positions[clade] = (positions[clade.clades[0]] +
223 positions[clade.clades[-1]]) / 2
224
225 calc_row(tree.root)
226 return positions
227
228 col_positions = get_col_positions(tree)
229 row_positions = get_row_positions(tree)
230 char_matrix = [[' ' for x in range(drawing_width)]
231 for y in range(drawing_height)]
232
233 def draw_clade(clade, startcol):
234 thiscol = col_positions[clade]
235 thisrow = row_positions[clade]
236
237 for col in range(startcol, thiscol):
238 char_matrix[thisrow][col] = '_'
239 if clade.clades:
240
241 toprow = row_positions[clade.clades[0]]
242 botrow = row_positions[clade.clades[-1]]
243 for row in range(toprow+1, botrow+1):
244 char_matrix[row][thiscol] = '|'
245
246 if (col_positions[clade.clades[0]] - thiscol) < 2:
247 char_matrix[toprow][thiscol] = ','
248
249 for child in clade:
250 draw_clade(child, thiscol+1)
251
252 draw_clade(tree.root, 0)
253
254 for idx, row in enumerate(char_matrix):
255 line = ''.join(row).rstrip()
256
257 if idx % 2 == 0:
258 line += ' ' + str(taxa[idx/2])
259 file.write(line + '\n')
260 file.write('\n')
261
262
263 -def draw(tree, label_func=str, do_show=True, show_confidence=True,
264
265 axes=None, branch_labels=None):
266 """Plot the given tree using matplotlib (or pylab).
267
268 The graphic is a rooted tree, drawn with roughly the same algorithm as
269 draw_ascii.
270
271 Visual aspects of the plot can be modified using pyplot's own functions and
272 objects (via pylab or matplotlib). In particular, the pyplot.rcParams
273 object can be used to scale the font size (rcParams["font.size"]) and line
274 width (rcParams["lines.linewidth"]).
275
276 :Parameters:
277 label_func : callable
278 A function to extract a label from a node. By default this is str(),
279 but you can use a different function to select another string
280 associated with each node. If this function returns None for a node,
281 no label will be shown for that node.
282 do_show : bool
283 Whether to show() the plot automatically.
284 show_confidence : bool
285 Whether to display confidence values, if present on the tree.
286 axes : matplotlib/pylab axes
287 If a valid matplotlib.axes.Axes instance, the phylogram is plotted
288 in that Axes. By default (None), a new figure is created.
289 branch_labels : dict or callable
290 A mapping of each clade to the label that will be shown along the
291 branch leading to it. By default this is the confidence value(s) of
292 the clade, taken from the ``confidence`` attribute, and can be
293 easily toggled off with this function's ``show_confidence`` option.
294 But if you would like to alter the formatting of confidence values,
295 or label the branches with something other than confidence, then use
296 this option.
297 """
298 try:
299 import matplotlib.pyplot as plt
300 except ImportError:
301 try:
302 import pylab as plt
303 except ImportError:
304 from Bio import MissingPythonDependencyError
305 raise MissingPythonDependencyError(
306 "Install matplotlib or pylab if you want to use draw.")
307
308
309 def conf2str(conf):
310 if int(conf) == conf:
311 return str(int(conf))
312 return str(conf)
313 if not branch_labels:
314 if show_confidence:
315 def format_branch_label(clade):
316 if hasattr(clade, 'confidences'):
317
318 return '/'.join(conf2str(cnf.value)
319 for cnf in clade.confidences)
320 if clade.confidence:
321 return conf2str(clade.confidence)
322 return None
323 else:
324 def format_branch_label(clade):
325 return None
326 elif isinstance(branch_labels, dict):
327 def format_branch_label(clade):
328 return branch_labels.get(clade)
329 else:
330 assert callable(branch_labels), \
331 "branch_labels must be either a dict or a callable (function)"
332 format_branch_label = branch_labels
333
334
335
336 def get_x_positions(tree):
337 """Create a mapping of each clade to its horizontal position.
338
339 Dict of {clade: x-coord}
340 """
341 depths = tree.depths()
342
343 if not max(depths.itervalues()):
344 depths = tree.depths(unit_branch_lengths=True)
345 return depths
346
347 def get_y_positions(tree):
348 """Create a mapping of each clade to its vertical position.
349
350 Dict of {clade: y-coord}.
351 Coordinates are negative, and integers for tips.
352 """
353 maxheight = tree.count_terminals()
354
355 heights = dict((tip, maxheight - i)
356 for i, tip in enumerate(reversed(tree.get_terminals())))
357
358
359 def calc_row(clade):
360 for subclade in clade:
361 if subclade not in heights:
362 calc_row(subclade)
363
364 heights[clade] = (heights[clade.clades[0]] +
365 heights[clade.clades[-1]]) / 2.0
366
367 if tree.root.clades:
368 calc_row(tree.root)
369 return heights
370
371 x_posns = get_x_positions(tree)
372 y_posns = get_y_positions(tree)
373
374 if axes is None:
375 fig = plt.figure()
376 axes = fig.add_subplot(1, 1, 1)
377 elif not isinstance(axes, plt.matplotlib.axes.Axes):
378 raise ValueError("Invalid argument for axes: %s" % axes)
379
380 def draw_clade(clade, x_start, color, lw):
381 """Recursively draw a tree, down from the given clade."""
382 x_here = x_posns[clade]
383 y_here = y_posns[clade]
384
385 if hasattr(clade, 'color') and clade.color is not None:
386 color = clade.color.to_hex()
387 if hasattr(clade, 'width') and clade.width is not None:
388 lw = clade.width * plt.rcParams['lines.linewidth']
389
390 axes.hlines(y_here, x_start, x_here, color=color, lw=lw)
391
392 label = label_func(clade)
393 if label not in (None, clade.__class__.__name__):
394 axes.text(x_here, y_here, ' %s' % label, verticalalignment='center')
395
396 conf_label = format_branch_label(clade)
397 if conf_label:
398 axes.text(0.5*(x_start + x_here), y_here, conf_label,
399 fontsize='small', horizontalalignment='center')
400 if clade.clades:
401
402 y_top = y_posns[clade.clades[0]]
403 y_bot = y_posns[clade.clades[-1]]
404
405 axes.vlines(x_here, y_bot, y_top, color=color)
406
407 for child in clade:
408 draw_clade(child, x_here, color, lw)
409
410 draw_clade(tree.root, 0, 'k', plt.rcParams['lines.linewidth'])
411
412
413
414 if hasattr(tree, 'name') and tree.name:
415 axes.set_title(tree.name)
416 axes.set_xlabel('branch length')
417 axes.set_ylabel('taxa')
418
419 xmax = max(x_posns.itervalues())
420 axes.set_xlim(-0.05 * xmax, 1.25 * xmax)
421
422
423 axes.set_ylim(max(y_posns.itervalues()) + 0.8, 0.2)
424 if do_show:
425 plt.show()
426