Package Bio :: Package Phylo :: Module _utils
[hide private]
[frames] | no frames]

Source Code for Module Bio.Phylo._utils

  1  # Copyright (C) 2009 by Eric Talevich (eric.talevich@gmail.com) 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license. Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  """Utilities for handling, displaying and exporting Phylo trees. 
  7   
  8  Third-party libraries are loaded when the corresponding function is called. 
  9  """ 
 10  __docformat__ = "restructuredtext en" 
 11   
 12  import math 
 13  import sys 
 14   
 15   
16 -def to_networkx(tree):
17 """Convert a Tree object to a networkx graph. 18 19 The result is useful for graph-oriented analysis, and also interactive 20 plotting with pylab, matplotlib or pygraphviz, though the resulting diagram 21 is usually not ideal for displaying a phylogeny. 22 23 Requires NetworkX version 0.99 or later. 24 """ 25 try: 26 import networkx 27 except ImportError: 28 from Bio import MissingPythonDependencyError 29 raise MissingPythonDependencyError( 30 "Install NetworkX if you want to use to_networkx.") 31 32 # NB (1/2010): the networkx API stabilized at v.1.0 33 # 1.0+: edges accept arbitrary data as kwargs, weights are floats 34 # 0.99: edges accept weight as a string, nothing else 35 # pre-0.99: edges accept no additional data 36 # Ubuntu Lucid LTS uses v0.99, let's support everything 37 if networkx.__version__ >= '1.0': 38 def add_edge(graph, n1, n2): 39 graph.add_edge(n1, n2, weight=n2.branch_length or 1.0) 40 # Copy branch color value as hex, if available 41 if hasattr(n2, 'color') and n2.color is not None: 42 graph[n1][n2]['color'] = n2.color.to_hex() 43 elif hasattr(n1, 'color') and n1.color is not None: 44 # Cascading color attributes 45 graph[n1][n2]['color'] = n1.color.to_hex() 46 n2.color = n1.color 47 # Copy branch weight value (float) if available 48 if hasattr(n2, 'width') and n2.width is not None: 49 graph[n1][n2]['width'] = n2.width 50 elif hasattr(n1, 'width') and n1.width is not None: 51 # Cascading width attributes 52 graph[n1][n2]['width'] = n1.width 53 n2.width = n1.width
54 elif networkx.__version__ >= '0.99': 55 def add_edge(graph, n1, n2): 56 graph.add_edge(n1, n2, (n2.branch_length or 1.0)) 57 else: 58 def add_edge(graph, n1, n2): 59 graph.add_edge(n1, n2) 60 61 def build_subgraph(graph, top): 62 """Walk down the Tree, building graphs, edges and nodes.""" 63 for clade in top: 64 graph.add_node(clade.root) 65 add_edge(graph, top.root, clade.root) 66 build_subgraph(graph, clade) 67 68 if tree.rooted: 69 G = networkx.DiGraph() 70 else: 71 G = networkx.Graph() 72 G.add_node(tree.root) 73 build_subgraph(G, tree.root) 74 return G 75 76
77 -def draw_graphviz(tree, label_func=str, prog='twopi', args='', 78 node_color='#c0deff', **kwargs):
79 """Display a tree or clade as a graph, using the graphviz engine. 80 81 Requires NetworkX, matplotlib, Graphviz and either PyGraphviz or pydot. 82 83 The third and fourth parameters apply to Graphviz, and the remaining 84 arbitrary keyword arguments are passed directly to networkx.draw(), which 85 in turn mostly wraps matplotlib/pylab. See the documentation for Graphviz 86 and networkx for detailed explanations. 87 88 The NetworkX/matplotlib parameters are described in the docstrings for 89 networkx.draw() and pylab.scatter(), but the most reasonable options to try 90 are: *alpha, node_color, node_size, node_shape, edge_color, style, 91 font_size, font_color, font_weight, font_family* 92 93 :Parameters: 94 95 label_func : callable 96 A function to extract a label from a node. By default this is str(), 97 but you can use a different function to select another string 98 associated with each node. If this function returns None for a node, 99 no label will be shown for that node. 100 101 The label will also be silently skipped if the throws an exception 102 related to ordinary attribute access (LookupError, AttributeError, 103 ValueError); all other exception types will still be raised. This 104 means you can use a lambda expression that simply attempts to look 105 up the desired value without checking if the intermediate attributes 106 are available: 107 108 >>> Phylo.draw_graphviz(tree, lambda n: n.taxonomies[0].code) 109 110 prog : string 111 The Graphviz program to use when rendering the graph. 'twopi' 112 behaves the best for large graphs, reliably avoiding crossing edges, 113 but for moderate graphs 'neato' looks a bit nicer. For small 114 directed graphs, 'dot' may produce a normal-looking cladogram, but 115 will cross and distort edges in larger graphs. (The programs 'circo' 116 and 'fdp' are not recommended.) 117 args : string 118 Options passed to the external graphviz program. Normally not 119 needed, but offered here for completeness. 120 121 Example 122 ------- 123 124 >>> import pylab 125 >>> from Bio import Phylo 126 >>> tree = Phylo.read('ex/apaf.xml', 'phyloxml') 127 >>> Phylo.draw_graphviz(tree) 128 >>> pylab.show() 129 >>> pylab.savefig('apaf.png') 130 """ 131 try: 132 import networkx 133 except ImportError: 134 from Bio import MissingPythonDependencyError 135 raise MissingPythonDependencyError( 136 "Install NetworkX if you want to use to_networkx.") 137 138 G = to_networkx(tree) 139 try: 140 # NetworkX version 1.8 or later (2013-01-20) 141 Gi = networkx.convert_node_labels_to_integers(G, 142 label_attribute='label') 143 int_labels = {} 144 for integer, nodeattrs in Gi.node.items(): 145 int_labels[nodeattrs['label']] = integer 146 except TypeError: 147 # Older NetworkX versions (before 1.8) 148 Gi = networkx.convert_node_labels_to_integers(G, 149 discard_old_labels=False) 150 int_labels = Gi.node_labels 151 152 try: 153 posi = networkx.graphviz_layout(Gi, prog, args=args) 154 except ImportError: 155 raise MissingPythonDependencyError( 156 "Install PyGraphviz or pydot if you want to use draw_graphviz.") 157 158 def get_label_mapping(G, selection): 159 """Apply the user-specified node relabeling.""" 160 for node in G.nodes(): 161 if (selection is None) or (node in selection): 162 try: 163 label = label_func(node) 164 if label not in (None, node.__class__.__name__): 165 yield (node, label) 166 except (LookupError, AttributeError, ValueError): 167 pass
168 169 if 'nodelist' in kwargs: 170 labels = dict(get_label_mapping(G, set(kwargs['nodelist']))) 171 else: 172 labels = dict(get_label_mapping(G, None)) 173 kwargs['nodelist'] = list(labels.keys()) 174 if 'edge_color' not in kwargs: 175 kwargs['edge_color'] = [isinstance(e[2], dict) and 176 e[2].get('color', 'k') or 'k' 177 for e in G.edges(data=True)] 178 if 'width' not in kwargs: 179 kwargs['width'] = [isinstance(e[2], dict) and 180 e[2].get('width', 1.0) or 1.0 181 for e in G.edges(data=True)] 182 183 posn = dict((n, posi[int_labels[n]]) for n in G) 184 networkx.draw(G, posn, labels=labels, node_color=node_color, **kwargs) 185 186
187 -def draw_ascii(tree, file=sys.stdout, column_width=80):
188 """Draw an ascii-art phylogram of the given tree. 189 190 The printed result looks like:: 191 192 _________ Orange 193 ______________| 194 | |______________ Tangerine 195 ______________| 196 | | _________________________ Grapefruit 197 _| |_________| 198 | |______________ Pummelo 199 | 200 |__________________________________ Apple 201 202 203 :Parameters: 204 file : file-like object 205 File handle opened for writing the output drawing. 206 column_width : int 207 Total number of text columns used by the drawing. 208 """ 209 taxa = tree.get_terminals() 210 # Some constants for the drawing calculations 211 max_label_width = max(len(str(taxon)) for taxon in taxa) 212 drawing_width = column_width - max_label_width - 1 213 drawing_height = 2 * len(taxa) - 1 214 215 def get_col_positions(tree): 216 """Create a mapping of each clade to its column position.""" 217 depths = tree.depths() 218 # If there are no branch lengths, assume unit branch lengths 219 if not max(depths.values()): 220 depths = tree.depths(unit_branch_lengths=True) 221 # Potential drawing overflow due to rounding -- 1 char per tree layer 222 fudge_margin = int(math.ceil(math.log(len(taxa), 2))) 223 cols_per_branch_unit = ((drawing_width - fudge_margin) 224 / float(max(depths.values()))) 225 return dict((clade, int(round(blen*cols_per_branch_unit + 0.5))) 226 for clade, blen in depths.items())
227 228 def get_row_positions(tree): 229 positions = dict((taxon, 2*idx) for idx, taxon in enumerate(taxa)) 230 231 def calc_row(clade): 232 for subclade in clade: 233 if subclade not in positions: 234 calc_row(subclade) 235 positions[clade] = ((positions[clade.clades[0]] + 236 positions[clade.clades[-1]]) // 2) 237 238 calc_row(tree.root) 239 return positions 240 241 col_positions = get_col_positions(tree) 242 row_positions = get_row_positions(tree) 243 char_matrix = [[' ' for x in range(drawing_width)] 244 for y in range(drawing_height)] 245 246 def draw_clade(clade, startcol): 247 thiscol = col_positions[clade] 248 thisrow = row_positions[clade] 249 # Draw a horizontal line 250 for col in range(startcol, thiscol): 251 char_matrix[thisrow][col] = '_' 252 if clade.clades: 253 # Draw a vertical line 254 toprow = row_positions[clade.clades[0]] 255 botrow = row_positions[clade.clades[-1]] 256 for row in range(toprow+1, botrow+1): 257 char_matrix[row][thiscol] = '|' 258 # NB: Short terminal branches need something to stop rstrip() 259 if (col_positions[clade.clades[0]] - thiscol) < 2: 260 char_matrix[toprow][thiscol] = ',' 261 # Draw descendents 262 for child in clade: 263 draw_clade(child, thiscol+1) 264 265 draw_clade(tree.root, 0) 266 # Print the complete drawing 267 for idx, row in enumerate(char_matrix): 268 line = ''.join(row).rstrip() 269 # Add labels for terminal taxa in the right margin 270 if idx % 2 == 0: 271 line += ' ' + str(taxa[idx//2]) 272 file.write(line + '\n') 273 file.write('\n') 274 275
276 -def draw(tree, label_func=str, do_show=True, show_confidence=True, 277 # For power users 278 axes=None, branch_labels=None, *args, **kwargs):
279 """Plot the given tree using matplotlib (or pylab). 280 281 The graphic is a rooted tree, drawn with roughly the same algorithm as 282 draw_ascii. 283 284 Additional keyword arguments passed into this function are used as pyplot 285 options. The input format should be in the form of: 286 pyplot_option_name=(tuple), pyplot_option_name=(tuple, dict), or 287 pyplot_option_name=(dict). 288 289 Example using the pyplot options 'axhspan' and 'axvline': 290 291 >>> Phylo.draw(tree, axhspan=((0.25, 7.75), {'facecolor':'0.5'}), 292 ... axvline={'x':'0', 'ymin':'0', 'ymax':'1'}) 293 294 Visual aspects of the plot can also be modified using pyplot's own functions 295 and objects (via pylab or matplotlib). In particular, the pyplot.rcParams 296 object can be used to scale the font size (rcParams["font.size"]) and line 297 width (rcParams["lines.linewidth"]). 298 299 :Parameters: 300 label_func : callable 301 A function to extract a label from a node. By default this is str(), 302 but you can use a different function to select another string 303 associated with each node. If this function returns None for a node, 304 no label will be shown for that node. 305 do_show : bool 306 Whether to show() the plot automatically. 307 show_confidence : bool 308 Whether to display confidence values, if present on the tree. 309 axes : matplotlib/pylab axes 310 If a valid matplotlib.axes.Axes instance, the phylogram is plotted 311 in that Axes. By default (None), a new figure is created. 312 branch_labels : dict or callable 313 A mapping of each clade to the label that will be shown along the 314 branch leading to it. By default this is the confidence value(s) of 315 the clade, taken from the ``confidence`` attribute, and can be 316 easily toggled off with this function's ``show_confidence`` option. 317 But if you would like to alter the formatting of confidence values, 318 or label the branches with something other than confidence, then use 319 this option. 320 """ 321 322 try: 323 import matplotlib.pyplot as plt 324 except ImportError: 325 try: 326 import pylab as plt 327 except ImportError: 328 from Bio import MissingPythonDependencyError 329 raise MissingPythonDependencyError( 330 "Install matplotlib or pylab if you want to use draw.") 331 332 import matplotlib.collections as mpcollections 333 334 # Arrays that store lines for the plot of clades 335 horizontal_linecollections = [] 336 vertical_linecollections = [] 337 338 # Options for displaying branch labels / confidence 339 def conf2str(conf): 340 if int(conf) == conf: 341 return str(int(conf)) 342 return str(conf)
343 if not branch_labels: 344 if show_confidence: 345 def format_branch_label(clade): 346 if hasattr(clade, 'confidences'): 347 # phyloXML supports multiple confidences 348 return '/'.join(conf2str(cnf.value) 349 for cnf in clade.confidences) 350 if clade.confidence: 351 return conf2str(clade.confidence) 352 return None 353 else: 354 def format_branch_label(clade): 355 return None 356 elif isinstance(branch_labels, dict): 357 def format_branch_label(clade): 358 return branch_labels.get(clade) 359 else: 360 assert callable(branch_labels), \ 361 "branch_labels must be either a dict or a callable (function)" 362 format_branch_label = branch_labels 363 364 # Layout 365 366 def get_x_positions(tree): 367 """Create a mapping of each clade to its horizontal position. 368 369 Dict of {clade: x-coord} 370 """ 371 depths = tree.depths() 372 # If there are no branch lengths, assume unit branch lengths 373 if not max(depths.values()): 374 depths = tree.depths(unit_branch_lengths=True) 375 return depths 376 377 def get_y_positions(tree): 378 """Create a mapping of each clade to its vertical position. 379 380 Dict of {clade: y-coord}. 381 Coordinates are negative, and integers for tips. 382 """ 383 maxheight = tree.count_terminals() 384 # Rows are defined by the tips 385 heights = dict((tip, maxheight - i) 386 for i, tip in enumerate(reversed(tree.get_terminals()))) 387 388 # Internal nodes: place at midpoint of children 389 def calc_row(clade): 390 for subclade in clade: 391 if subclade not in heights: 392 calc_row(subclade) 393 # Closure over heights 394 heights[clade] = (heights[clade.clades[0]] + 395 heights[clade.clades[-1]]) / 2.0 396 397 if tree.root.clades: 398 calc_row(tree.root) 399 return heights 400 401 x_posns = get_x_positions(tree) 402 y_posns = get_y_positions(tree) 403 # The function draw_clade closes over the axes object 404 if axes is None: 405 fig = plt.figure() 406 axes = fig.add_subplot(1, 1, 1) 407 elif not isinstance(axes, plt.matplotlib.axes.Axes): 408 raise ValueError("Invalid argument for axes: %s" % axes) 409 410 def draw_clade_lines(use_linecollection=False, orientation='horizontal', 411 y_here=0, x_start=0, x_here=0, y_bot=0, y_top=0, 412 color='black', lw='.1'): 413 """Create a line with or without a line collection object. 414 415 Graphical formatting of the lines representing clades in the plot can be 416 customized by altering this function. 417 """ 418 if (use_linecollection==False and orientation=='horizontal'): 419 axes.hlines(y_here, x_start, x_here, color=color, lw=lw) 420 elif (use_linecollection==True and orientation=='horizontal'): 421 horizontal_linecollections.append(mpcollections.LineCollection( 422 [[(x_start, y_here), (x_here, y_here)]], color=color, lw=lw),) 423 elif (use_linecollection==False and orientation=='vertical'): 424 axes.vlines(x_here, y_bot, y_top, color=color) 425 elif (use_linecollection==True and orientation=='vertical'): 426 vertical_linecollections.append(mpcollections.LineCollection( 427 [[(x_here, y_bot), (x_here, y_top)]], color=color, lw=lw),) 428 429 def draw_clade(clade, x_start, color, lw): 430 """Recursively draw a tree, down from the given clade.""" 431 x_here = x_posns[clade] 432 y_here = y_posns[clade] 433 # phyloXML-only graphics annotations 434 if hasattr(clade, 'color') and clade.color is not None: 435 color = clade.color.to_hex() 436 if hasattr(clade, 'width') and clade.width is not None: 437 lw = clade.width * plt.rcParams['lines.linewidth'] 438 # Draw a horizontal line from start to here 439 draw_clade_lines(use_linecollection=True, orientation='horizontal', 440 y_here=y_here, x_start=x_start, x_here=x_here, color=color, lw=lw) 441 # Add node/taxon labels 442 label = label_func(clade) 443 if label not in (None, clade.__class__.__name__): 444 axes.text(x_here, y_here, ' %s' % label, verticalalignment='center') 445 # Add label above the branch (optional) 446 conf_label = format_branch_label(clade) 447 if conf_label: 448 axes.text(0.5*(x_start + x_here), y_here, conf_label, 449 fontsize='small', horizontalalignment='center') 450 if clade.clades: 451 # Draw a vertical line connecting all children 452 y_top = y_posns[clade.clades[0]] 453 y_bot = y_posns[clade.clades[-1]] 454 # Only apply widths to horizontal lines, like Archaeopteryx 455 draw_clade_lines(use_linecollection=True, orientation='vertical', 456 x_here=x_here, y_bot=y_bot, y_top=y_top, color=color, lw=lw) 457 # Draw descendents 458 for child in clade: 459 draw_clade(child, x_here, color, lw) 460 461 draw_clade(tree.root, 0, 'k', plt.rcParams['lines.linewidth']) 462 463 # If line collections were used to create clade lines, here they are added 464 # to the pyplot plot. 465 for i in horizontal_linecollections: 466 axes.add_collection(i) 467 for i in vertical_linecollections: 468 axes.add_collection(i) 469 470 # Aesthetics 471 472 if hasattr(tree, 'name') and tree.name: 473 axes.set_title(tree.name) 474 axes.set_xlabel('branch length') 475 axes.set_ylabel('taxa') 476 # Add margins around the tree to prevent overlapping the axes 477 xmax = max(x_posns.values()) 478 axes.set_xlim(-0.05 * xmax, 1.25 * xmax) 479 # Also invert the y-axis (origin at the top) 480 # Add a small vertical margin, but avoid including 0 and N+1 on the y axis 481 axes.set_ylim(max(y_posns.values()) + 0.8, 0.2) 482 483 # Parse and process key word arguments as pyplot options 484 for key, value in kwargs.items(): 485 try: 486 # Check that the pyplot option input is iterable, as required 487 [i for i in value] 488 except TypeError: 489 raise ValueError('Keyword argument "%s=%s" is not in the format ' 490 'pyplot_option_name=(tuple), pyplot_option_name=(tuple, dict),' 491 ' or pyplot_option_name=(dict) ' 492 % (key, value)) 493 if isinstance(value, dict): 494 getattr(plt, str(key))(**dict(value)) 495 elif not (isinstance(value[0], tuple)): 496 getattr(plt, str(key))(*value) 497 elif (isinstance(value[0], tuple)): 498 getattr(plt, str(key))(*value[0], **dict(value[1])) 499 500 if do_show: 501 plt.show() 502