Package Bio :: Package Phylo :: Module _utils
[hide private]
[frames] | no frames]

Source Code for Module Bio.Phylo._utils

  1  # Copyright (C) 2009 by Eric Talevich (eric.talevich@gmail.com) 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license. Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  """Utilities for handling, displaying and exporting Phylo trees. 
  7   
  8  Third-party libraries are loaded when the corresponding function is called. 
  9  """ 
 10   
 11  import math 
 12  import sys 
 13   
 14  from Bio import MissingPythonDependencyError 
 15   
 16   
17 -def to_networkx(tree):
18 """Convert a Tree object to a networkx graph. 19 20 The result is useful for graph-oriented analysis, and also interactive 21 plotting with pylab, matplotlib or pygraphviz, though the resulting diagram 22 is usually not ideal for displaying a phylogeny. 23 24 Requires NetworkX version 0.99 or later. 25 """ 26 try: 27 import networkx 28 except ImportError: 29 raise MissingPythonDependencyError( 30 "Install NetworkX if you want to use to_networkx.") 31 32 # NB (1/2010): the networkx API stabilized at v.1.0 33 # 1.0+: edges accept arbitrary data as kwargs, weights are floats 34 # 0.99: edges accept weight as a string, nothing else 35 # pre-0.99: edges accept no additional data 36 # Ubuntu Lucid LTS uses v0.99, let's support everything 37 if networkx.__version__ >= '1.0': 38 def add_edge(graph, n1, n2): 39 graph.add_edge(n1, n2, weight=n2.branch_length or 1.0) 40 # Copy branch color value as hex, if available 41 if hasattr(n2, 'color') and n2.color is not None: 42 graph[n1][n2]['color'] = n2.color.to_hex() 43 elif hasattr(n1, 'color') and n1.color is not None: 44 # Cascading color attributes 45 graph[n1][n2]['color'] = n1.color.to_hex() 46 n2.color = n1.color 47 # Copy branch weight value (float) if available 48 if hasattr(n2, 'width') and n2.width is not None: 49 graph[n1][n2]['width'] = n2.width 50 elif hasattr(n1, 'width') and n1.width is not None: 51 # Cascading width attributes 52 graph[n1][n2]['width'] = n1.width 53 n2.width = n1.width
54 elif networkx.__version__ >= '0.99': 55 def add_edge(graph, n1, n2): 56 graph.add_edge(n1, n2, (n2.branch_length or 1.0)) 57 else: 58 def add_edge(graph, n1, n2): 59 graph.add_edge(n1, n2) 60 61 def build_subgraph(graph, top): 62 """Walk down the Tree, building graphs, edges and nodes.""" 63 for clade in top: 64 graph.add_node(clade.root) 65 add_edge(graph, top.root, clade.root) 66 build_subgraph(graph, clade) 67 68 if tree.rooted: 69 G = networkx.DiGraph() 70 else: 71 G = networkx.Graph() 72 G.add_node(tree.root) 73 build_subgraph(G, tree.root) 74 return G 75 76
77 -def draw_graphviz(tree, label_func=str, prog='twopi', args='', 78 node_color='#c0deff', **kwargs):
79 """Display a tree or clade as a graph, using the graphviz engine. 80 81 Requires NetworkX, matplotlib, Graphviz and either PyGraphviz or pydot. 82 83 The third and fourth parameters apply to Graphviz, and the remaining 84 arbitrary keyword arguments are passed directly to networkx.draw(), which 85 in turn mostly wraps matplotlib/pylab. See the documentation for Graphviz 86 and networkx for detailed explanations. 87 88 The NetworkX/matplotlib parameters are described in the docstrings for 89 networkx.draw() and pylab.scatter(), but the most reasonable options to try 90 are: *alpha, node_color, node_size, node_shape, edge_color, style, 91 font_size, font_color, font_weight, font_family* 92 93 :Parameters: 94 95 label_func : callable 96 A function to extract a label from a node. By default this is str(), 97 but you can use a different function to select another string 98 associated with each node. If this function returns None for a node, 99 no label will be shown for that node. 100 101 The label will also be silently skipped if the throws an exception 102 related to ordinary attribute access (LookupError, AttributeError, 103 ValueError); all other exception types will still be raised. This 104 means you can use a lambda expression that simply attempts to look 105 up the desired value without checking if the intermediate attributes 106 are available: 107 108 >>> Phylo.draw_graphviz(tree, lambda n: n.taxonomies[0].code) 109 110 prog : string 111 The Graphviz program to use when rendering the graph. 'twopi' 112 behaves the best for large graphs, reliably avoiding crossing edges, 113 but for moderate graphs 'neato' looks a bit nicer. For small 114 directed graphs, 'dot' may produce a normal-looking cladogram, but 115 will cross and distort edges in larger graphs. (The programs 'circo' 116 and 'fdp' are not recommended.) 117 args : string 118 Options passed to the external graphviz program. Normally not 119 needed, but offered here for completeness. 120 121 Example 122 ------- 123 124 >>> import pylab 125 >>> from Bio import Phylo 126 >>> tree = Phylo.read('ex/apaf.xml', 'phyloxml') 127 >>> Phylo.draw_graphviz(tree) 128 >>> pylab.show() 129 >>> pylab.savefig('apaf.png') 130 """ 131 try: 132 import networkx 133 except ImportError: 134 raise MissingPythonDependencyError( 135 "Install NetworkX if you want to use to_networkx.") 136 137 G = to_networkx(tree) 138 try: 139 # NetworkX version 1.8 or later (2013-01-20) 140 Gi = networkx.convert_node_labels_to_integers(G, 141 label_attribute='label') 142 int_labels = {} 143 for integer, nodeattrs in Gi.node.items(): 144 int_labels[nodeattrs['label']] = integer 145 except TypeError: 146 # Older NetworkX versions (before 1.8) 147 Gi = networkx.convert_node_labels_to_integers(G, 148 discard_old_labels=False) 149 int_labels = Gi.node_labels 150 151 try: 152 posi = networkx.graphviz_layout(Gi, prog, args=args) 153 except ImportError: 154 raise MissingPythonDependencyError( 155 "Install PyGraphviz or pydot if you want to use draw_graphviz.") 156 157 def get_label_mapping(G, selection): 158 """Apply the user-specified node relabeling.""" 159 for node in G.nodes(): 160 if (selection is None) or (node in selection): 161 try: 162 label = label_func(node) 163 if label not in (None, node.__class__.__name__): 164 yield (node, label) 165 except (LookupError, AttributeError, ValueError): 166 pass
167 168 if 'nodelist' in kwargs: 169 labels = dict(get_label_mapping(G, set(kwargs['nodelist']))) 170 else: 171 labels = dict(get_label_mapping(G, None)) 172 kwargs['nodelist'] = list(labels.keys()) 173 if 'edge_color' not in kwargs: 174 kwargs['edge_color'] = [isinstance(e[2], dict) and 175 e[2].get('color', 'k') or 'k' 176 for e in G.edges(data=True)] 177 if 'width' not in kwargs: 178 kwargs['width'] = [isinstance(e[2], dict) and 179 e[2].get('width', 1.0) or 1.0 180 for e in G.edges(data=True)] 181 182 posn = dict((n, posi[int_labels[n]]) for n in G) 183 networkx.draw(G, posn, labels=labels, with_labels=True, 184 node_color=node_color, **kwargs) 185 186
187 -def draw_ascii(tree, file=None, column_width=80):
188 """Draw an ascii-art phylogram of the given tree. 189 190 The printed result looks like:: 191 192 _________ Orange 193 ______________| 194 | |______________ Tangerine 195 ______________| 196 | | _________________________ Grapefruit 197 _| |_________| 198 | |______________ Pummelo 199 | 200 |__________________________________ Apple 201 202 203 :Parameters: 204 file : file-like object 205 File handle opened for writing the output drawing. (Default: 206 standard output) 207 column_width : int 208 Total number of text columns used by the drawing. 209 """ 210 if file is None: 211 file = sys.stdout 212 213 taxa = tree.get_terminals() 214 # Some constants for the drawing calculations 215 max_label_width = max(len(str(taxon)) for taxon in taxa) 216 drawing_width = column_width - max_label_width - 1 217 drawing_height = 2 * len(taxa) - 1 218 219 def get_col_positions(tree): 220 """Create a mapping of each clade to its column position.""" 221 depths = tree.depths() 222 # If there are no branch lengths, assume unit branch lengths 223 if not max(depths.values()): 224 depths = tree.depths(unit_branch_lengths=True) 225 # Potential drawing overflow due to rounding -- 1 char per tree layer 226 fudge_margin = int(math.ceil(math.log(len(taxa), 2))) 227 cols_per_branch_unit = ((drawing_width - fudge_margin) / 228 float(max(depths.values()))) 229 return dict((clade, int(blen * cols_per_branch_unit + 1.0)) 230 for clade, blen in depths.items())
231 232 def get_row_positions(tree): 233 positions = dict((taxon, 2 * idx) for idx, taxon in enumerate(taxa)) 234 235 def calc_row(clade): 236 for subclade in clade: 237 if subclade not in positions: 238 calc_row(subclade) 239 positions[clade] = ((positions[clade.clades[0]] + 240 positions[clade.clades[-1]]) // 2) 241 242 calc_row(tree.root) 243 return positions 244 245 col_positions = get_col_positions(tree) 246 row_positions = get_row_positions(tree) 247 char_matrix = [[' ' for x in range(drawing_width)] 248 for y in range(drawing_height)] 249 250 def draw_clade(clade, startcol): 251 thiscol = col_positions[clade] 252 thisrow = row_positions[clade] 253 # Draw a horizontal line 254 for col in range(startcol, thiscol): 255 char_matrix[thisrow][col] = '_' 256 if clade.clades: 257 # Draw a vertical line 258 toprow = row_positions[clade.clades[0]] 259 botrow = row_positions[clade.clades[-1]] 260 for row in range(toprow + 1, botrow + 1): 261 char_matrix[row][thiscol] = '|' 262 # NB: Short terminal branches need something to stop rstrip() 263 if (col_positions[clade.clades[0]] - thiscol) < 2: 264 char_matrix[toprow][thiscol] = ',' 265 # Draw descendents 266 for child in clade: 267 draw_clade(child, thiscol + 1) 268 269 draw_clade(tree.root, 0) 270 # Print the complete drawing 271 for idx, row in enumerate(char_matrix): 272 line = ''.join(row).rstrip() 273 # Add labels for terminal taxa in the right margin 274 if idx % 2 == 0: 275 line += ' ' + str(taxa[idx // 2]) 276 file.write(line + '\n') 277 file.write('\n') 278 279
280 -def draw(tree, label_func=str, do_show=True, show_confidence=True, 281 # For power users 282 axes=None, branch_labels=None, label_colors=None, *args, **kwargs):
283 """Plot the given tree using matplotlib (or pylab). 284 285 The graphic is a rooted tree, drawn with roughly the same algorithm as 286 draw_ascii. 287 288 Additional keyword arguments passed into this function are used as pyplot 289 options. The input format should be in the form of: 290 pyplot_option_name=(tuple), pyplot_option_name=(tuple, dict), or 291 pyplot_option_name=(dict). 292 293 Example using the pyplot options 'axhspan' and 'axvline': 294 295 >>> Phylo.draw(tree, axhspan=((0.25, 7.75), {'facecolor':'0.5'}), 296 ... axvline={'x':'0', 'ymin':'0', 'ymax':'1'}) 297 298 Visual aspects of the plot can also be modified using pyplot's own functions 299 and objects (via pylab or matplotlib). In particular, the pyplot.rcParams 300 object can be used to scale the font size (rcParams["font.size"]) and line 301 width (rcParams["lines.linewidth"]). 302 303 :Parameters: 304 label_func : callable 305 A function to extract a label from a node. By default this is str(), 306 but you can use a different function to select another string 307 associated with each node. If this function returns None for a node, 308 no label will be shown for that node. 309 do_show : bool 310 Whether to show() the plot automatically. 311 show_confidence : bool 312 Whether to display confidence values, if present on the tree. 313 axes : matplotlib/pylab axes 314 If a valid matplotlib.axes.Axes instance, the phylogram is plotted 315 in that Axes. By default (None), a new figure is created. 316 branch_labels : dict or callable 317 A mapping of each clade to the label that will be shown along the 318 branch leading to it. By default this is the confidence value(s) of 319 the clade, taken from the ``confidence`` attribute, and can be 320 easily toggled off with this function's ``show_confidence`` option. 321 But if you would like to alter the formatting of confidence values, 322 or label the branches with something other than confidence, then use 323 this option. 324 label_colors : dict or callable 325 A function or a dictionary specifying the color of the tip label. 326 If the tip label can't be found in the dict or label_colors is 327 None, the label will be shown in black. 328 """ 329 330 try: 331 import matplotlib.pyplot as plt 332 except ImportError: 333 try: 334 import pylab as plt 335 except ImportError: 336 raise MissingPythonDependencyError( 337 "Install matplotlib or pylab if you want to use draw.") 338 339 import matplotlib.collections as mpcollections 340 341 # Arrays that store lines for the plot of clades 342 horizontal_linecollections = [] 343 vertical_linecollections = [] 344 345 # Options for displaying branch labels / confidence 346 def conf2str(conf): 347 if int(conf) == conf: 348 return str(int(conf)) 349 return str(conf)
350 if not branch_labels: 351 if show_confidence: 352 def format_branch_label(clade): 353 if hasattr(clade, 'confidences'): 354 # phyloXML supports multiple confidences 355 return '/'.join(conf2str(cnf.value) 356 for cnf in clade.confidences) 357 if clade.confidence: 358 return conf2str(clade.confidence) 359 return None 360 else: 361 def format_branch_label(clade): 362 return None 363 elif isinstance(branch_labels, dict): 364 def format_branch_label(clade): 365 return branch_labels.get(clade) 366 else: 367 assert callable(branch_labels), \ 368 "branch_labels must be either a dict or a callable (function)" 369 format_branch_label = branch_labels 370 371 # options for displaying label colors. 372 if label_colors: 373 if callable(label_colors): 374 def get_label_color(label): 375 return label_colors(label) 376 else: 377 # label_colors is presumed to be a dict 378 def get_label_color(label): 379 return label_colors.get(label, 'black') 380 else: 381 def get_label_color(label): 382 # if label_colors is not specified, use black 383 return 'black' 384 385 # Layout 386 387 def get_x_positions(tree): 388 """Create a mapping of each clade to its horizontal position. 389 390 Dict of {clade: x-coord} 391 """ 392 depths = tree.depths() 393 # If there are no branch lengths, assume unit branch lengths 394 if not max(depths.values()): 395 depths = tree.depths(unit_branch_lengths=True) 396 return depths 397 398 def get_y_positions(tree): 399 """Create a mapping of each clade to its vertical position. 400 401 Dict of {clade: y-coord}. 402 Coordinates are negative, and integers for tips. 403 """ 404 maxheight = tree.count_terminals() 405 # Rows are defined by the tips 406 heights = dict((tip, maxheight - i) 407 for i, tip in enumerate(reversed(tree.get_terminals()))) 408 409 # Internal nodes: place at midpoint of children 410 def calc_row(clade): 411 for subclade in clade: 412 if subclade not in heights: 413 calc_row(subclade) 414 # Closure over heights 415 heights[clade] = (heights[clade.clades[0]] + 416 heights[clade.clades[-1]]) / 2.0 417 418 if tree.root.clades: 419 calc_row(tree.root) 420 return heights 421 422 x_posns = get_x_positions(tree) 423 y_posns = get_y_positions(tree) 424 # The function draw_clade closes over the axes object 425 if axes is None: 426 fig = plt.figure() 427 axes = fig.add_subplot(1, 1, 1) 428 elif not isinstance(axes, plt.matplotlib.axes.Axes): 429 raise ValueError("Invalid argument for axes: %s" % axes) 430 431 def draw_clade_lines(use_linecollection=False, orientation='horizontal', 432 y_here=0, x_start=0, x_here=0, y_bot=0, y_top=0, 433 color='black', lw='.1'): 434 """Create a line with or without a line collection object. 435 436 Graphical formatting of the lines representing clades in the plot can be 437 customized by altering this function. 438 """ 439 if not use_linecollection and orientation == 'horizontal': 440 axes.hlines(y_here, x_start, x_here, color=color, lw=lw) 441 elif use_linecollection and orientation == 'horizontal': 442 horizontal_linecollections.append(mpcollections.LineCollection( 443 [[(x_start, y_here), (x_here, y_here)]], color=color, lw=lw),) 444 elif not use_linecollection and orientation == 'vertical': 445 axes.vlines(x_here, y_bot, y_top, color=color) 446 elif use_linecollection and orientation == 'vertical': 447 vertical_linecollections.append(mpcollections.LineCollection( 448 [[(x_here, y_bot), (x_here, y_top)]], color=color, lw=lw),) 449 450 def draw_clade(clade, x_start, color, lw): 451 """Recursively draw a tree, down from the given clade.""" 452 x_here = x_posns[clade] 453 y_here = y_posns[clade] 454 # phyloXML-only graphics annotations 455 if hasattr(clade, 'color') and clade.color is not None: 456 color = clade.color.to_hex() 457 if hasattr(clade, 'width') and clade.width is not None: 458 lw = clade.width * plt.rcParams['lines.linewidth'] 459 # Draw a horizontal line from start to here 460 draw_clade_lines(use_linecollection=True, orientation='horizontal', 461 y_here=y_here, x_start=x_start, x_here=x_here, color=color, lw=lw) 462 # Add node/taxon labels 463 label = label_func(clade) 464 if label not in (None, clade.__class__.__name__): 465 axes.text(x_here, y_here, ' %s' % 466 label, verticalalignment='center', 467 color=get_label_color(label)) 468 # Add label above the branch (optional) 469 conf_label = format_branch_label(clade) 470 if conf_label: 471 axes.text(0.5 * (x_start + x_here), y_here, conf_label, 472 fontsize='small', horizontalalignment='center') 473 if clade.clades: 474 # Draw a vertical line connecting all children 475 y_top = y_posns[clade.clades[0]] 476 y_bot = y_posns[clade.clades[-1]] 477 # Only apply widths to horizontal lines, like Archaeopteryx 478 draw_clade_lines(use_linecollection=True, orientation='vertical', 479 x_here=x_here, y_bot=y_bot, y_top=y_top, color=color, lw=lw) 480 # Draw descendents 481 for child in clade: 482 draw_clade(child, x_here, color, lw) 483 484 draw_clade(tree.root, 0, 'k', plt.rcParams['lines.linewidth']) 485 486 # If line collections were used to create clade lines, here they are added 487 # to the pyplot plot. 488 for i in horizontal_linecollections: 489 axes.add_collection(i) 490 for i in vertical_linecollections: 491 axes.add_collection(i) 492 493 # Aesthetics 494 495 if hasattr(tree, 'name') and tree.name: 496 axes.set_title(tree.name) 497 axes.set_xlabel('branch length') 498 axes.set_ylabel('taxa') 499 # Add margins around the tree to prevent overlapping the axes 500 xmax = max(x_posns.values()) 501 axes.set_xlim(-0.05 * xmax, 1.25 * xmax) 502 # Also invert the y-axis (origin at the top) 503 # Add a small vertical margin, but avoid including 0 and N+1 on the y axis 504 axes.set_ylim(max(y_posns.values()) + 0.8, 0.2) 505 506 # Parse and process key word arguments as pyplot options 507 for key, value in kwargs.items(): 508 try: 509 # Check that the pyplot option input is iterable, as required 510 [i for i in value] 511 except TypeError: 512 raise ValueError('Keyword argument "%s=%s" is not in the format ' 513 'pyplot_option_name=(tuple), pyplot_option_name=(tuple, dict),' 514 ' or pyplot_option_name=(dict) ' 515 % (key, value)) 516 if isinstance(value, dict): 517 getattr(plt, str(key))(**dict(value)) 518 elif not (isinstance(value[0], tuple)): 519 getattr(plt, str(key))(*value) 520 elif (isinstance(value[0], tuple)): 521 getattr(plt, str(key))(*value[0], **dict(value[1])) 522 523 if do_show: 524 plt.show() 525