Package Bio :: Package Phylo :: Module _utils
[hide private]
[frames] | no frames]

Source Code for Module Bio.Phylo._utils

  1  # Copyright (C) 2009 by Eric Talevich (eric.talevich@gmail.com) 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license. Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  """Utilities for handling, displaying and exporting Phylo trees. 
  7   
  8  Third-party libraries are loaded when the corresponding function is called. 
  9  """ 
 10   
 11  import math 
 12  import sys 
 13   
 14  from Bio import MissingPythonDependencyError 
 15   
 16   
17 -def to_networkx(tree):
18 """Convert a Tree object to a networkx graph. 19 20 The result is useful for graph-oriented analysis, and also interactive 21 plotting with pylab, matplotlib or pygraphviz, though the resulting diagram 22 is usually not ideal for displaying a phylogeny. 23 24 Requires NetworkX version 0.99 or later. 25 """ 26 try: 27 import networkx 28 except ImportError: 29 raise MissingPythonDependencyError( 30 "Install NetworkX if you want to use to_networkx.") 31 32 # NB (1/2010): the networkx API stabilized at v.1.0 33 # 1.0+: edges accept arbitrary data as kwargs, weights are floats 34 # 0.99: edges accept weight as a string, nothing else 35 # pre-0.99: edges accept no additional data 36 # Ubuntu Lucid LTS uses v0.99, let's support everything 37 if networkx.__version__ >= '1.0': 38 def add_edge(graph, n1, n2): 39 graph.add_edge(n1, n2, weight=n2.branch_length or 1.0) 40 # Copy branch color value as hex, if available 41 if hasattr(n2, 'color') and n2.color is not None: 42 graph[n1][n2]['color'] = n2.color.to_hex() 43 elif hasattr(n1, 'color') and n1.color is not None: 44 # Cascading color attributes 45 graph[n1][n2]['color'] = n1.color.to_hex() 46 n2.color = n1.color 47 # Copy branch weight value (float) if available 48 if hasattr(n2, 'width') and n2.width is not None: 49 graph[n1][n2]['width'] = n2.width 50 elif hasattr(n1, 'width') and n1.width is not None: 51 # Cascading width attributes 52 graph[n1][n2]['width'] = n1.width 53 n2.width = n1.width
54 elif networkx.__version__ >= '0.99': 55 def add_edge(graph, n1, n2): 56 graph.add_edge(n1, n2, (n2.branch_length or 1.0)) 57 else: 58 def add_edge(graph, n1, n2): 59 graph.add_edge(n1, n2) 60 61 def build_subgraph(graph, top): 62 """Walk down the Tree, building graphs, edges and nodes.""" 63 for clade in top: 64 graph.add_node(clade.root) 65 add_edge(graph, top.root, clade.root) 66 build_subgraph(graph, clade) 67 68 if tree.rooted: 69 G = networkx.DiGraph() 70 else: 71 G = networkx.Graph() 72 G.add_node(tree.root) 73 build_subgraph(G, tree.root) 74 return G 75 76
77 -def draw_graphviz(tree, label_func=str, prog='twopi', args='', 78 node_color='#c0deff', **kwargs):
79 """Display a tree or clade as a graph, using the graphviz engine. 80 81 Requires NetworkX, matplotlib, Graphviz and either PyGraphviz or pydot. 82 83 The third and fourth parameters apply to Graphviz, and the remaining 84 arbitrary keyword arguments are passed directly to networkx.draw(), which 85 in turn mostly wraps matplotlib/pylab. See the documentation for Graphviz 86 and networkx for detailed explanations. 87 88 The NetworkX/matplotlib parameters are described in the docstrings for 89 networkx.draw() and pylab.scatter(), but the most reasonable options to try 90 are: *alpha, node_color, node_size, node_shape, edge_color, style, 91 font_size, font_color, font_weight, font_family* 92 93 :Parameters: 94 95 label_func : callable 96 A function to extract a label from a node. By default this is str(), 97 but you can use a different function to select another string 98 associated with each node. If this function returns None for a node, 99 no label will be shown for that node. 100 101 The label will also be silently skipped if the throws an exception 102 related to ordinary attribute access (LookupError, AttributeError, 103 ValueError); all other exception types will still be raised. This 104 means you can use a lambda expression that simply attempts to look 105 up the desired value without checking if the intermediate attributes 106 are available: 107 108 >>> Phylo.draw_graphviz(tree, lambda n: n.taxonomies[0].code) 109 110 prog : string 111 The Graphviz program to use when rendering the graph. 'twopi' 112 behaves the best for large graphs, reliably avoiding crossing edges, 113 but for moderate graphs 'neato' looks a bit nicer. For small 114 directed graphs, 'dot' may produce a normal-looking cladogram, but 115 will cross and distort edges in larger graphs. (The programs 'circo' 116 and 'fdp' are not recommended.) 117 args : string 118 Options passed to the external graphviz program. Normally not 119 needed, but offered here for completeness. 120 121 Example 122 ------- 123 124 >>> import pylab 125 >>> from Bio import Phylo 126 >>> tree = Phylo.read('ex/apaf.xml', 'phyloxml') 127 >>> Phylo.draw_graphviz(tree) 128 >>> pylab.show() 129 >>> pylab.savefig('apaf.png') 130 """ 131 # Deprecated in Biopython 1.70 (#1247) 132 import warnings 133 from Bio import BiopythonDeprecationWarning 134 warnings.warn("draw_graphviz is deprecated; use Bio.Phylo.draw instead", 135 BiopythonDeprecationWarning) 136 137 try: 138 import networkx 139 except ImportError: 140 raise MissingPythonDependencyError( 141 "Install NetworkX if you want to use to_networkx.") 142 143 G = to_networkx(tree) 144 try: 145 # NetworkX version 1.8 or later (2013-01-20) 146 Gi = networkx.convert_node_labels_to_integers(G, 147 label_attribute='label') 148 int_labels = {} 149 for integer, nodeattrs in Gi.node.items(): 150 int_labels[nodeattrs['label']] = integer 151 except TypeError: 152 # Older NetworkX versions (before 1.8) 153 Gi = networkx.convert_node_labels_to_integers(G, 154 discard_old_labels=False) 155 int_labels = Gi.node_labels 156 157 try: 158 if hasattr(networkx, 'graphviz_layout'): 159 # networkx versions before 1.11 (#1247) 160 graphviz_layout = networkx.graphviz_layout 161 else: 162 # networkx version 1.11 163 graphviz_layout = networkx.drawing.nx_agraph.graphviz_layout 164 posi = graphviz_layout(Gi, prog, args=args) 165 except ImportError: 166 raise MissingPythonDependencyError( 167 "Install PyGraphviz or pydot if you want to use draw_graphviz.") 168 169 def get_label_mapping(G, selection): 170 """Apply the user-specified node relabeling.""" 171 for node in G.nodes(): 172 if (selection is None) or (node in selection): 173 try: 174 label = label_func(node) 175 if label not in (None, node.__class__.__name__): 176 yield (node, label) 177 except (LookupError, AttributeError, ValueError): 178 pass
179 180 if 'nodelist' in kwargs: 181 labels = dict(get_label_mapping(G, set(kwargs['nodelist']))) 182 else: 183 labels = dict(get_label_mapping(G, None)) 184 kwargs['nodelist'] = list(labels.keys()) 185 if 'edge_color' not in kwargs: 186 kwargs['edge_color'] = [isinstance(e[2], dict) and 187 e[2].get('color', 'k') or 'k' 188 for e in G.edges(data=True)] 189 if 'width' not in kwargs: 190 kwargs['width'] = [isinstance(e[2], dict) and 191 e[2].get('width', 1.0) or 1.0 192 for e in G.edges(data=True)] 193 194 posn = dict((n, posi[int_labels[n]]) for n in G) 195 networkx.draw(G, posn, labels=labels, with_labels=True, 196 node_color=node_color, **kwargs) 197 198
199 -def draw_ascii(tree, file=None, column_width=80):
200 """Draw an ascii-art phylogram of the given tree. 201 202 The printed result looks like:: 203 204 _________ Orange 205 ______________| 206 | |______________ Tangerine 207 ______________| 208 | | _________________________ Grapefruit 209 _| |_________| 210 | |______________ Pummelo 211 | 212 |__________________________________ Apple 213 214 215 :Parameters: 216 file : file-like object 217 File handle opened for writing the output drawing. (Default: 218 standard output) 219 column_width : int 220 Total number of text columns used by the drawing. 221 222 """ 223 if file is None: 224 file = sys.stdout 225 226 taxa = tree.get_terminals() 227 # Some constants for the drawing calculations 228 max_label_width = max(len(str(taxon)) for taxon in taxa) 229 drawing_width = column_width - max_label_width - 1 230 drawing_height = 2 * len(taxa) - 1 231 232 def get_col_positions(tree): 233 """Create a mapping of each clade to its column position.""" 234 depths = tree.depths() 235 # If there are no branch lengths, assume unit branch lengths 236 if not max(depths.values()): 237 depths = tree.depths(unit_branch_lengths=True) 238 # Potential drawing overflow due to rounding -- 1 char per tree layer 239 fudge_margin = int(math.ceil(math.log(len(taxa), 2))) 240 cols_per_branch_unit = ((drawing_width - fudge_margin) / 241 float(max(depths.values()))) 242 return dict((clade, int(blen * cols_per_branch_unit + 1.0)) 243 for clade, blen in depths.items())
244 245 def get_row_positions(tree): 246 positions = dict((taxon, 2 * idx) for idx, taxon in enumerate(taxa)) 247 248 def calc_row(clade): 249 for subclade in clade: 250 if subclade not in positions: 251 calc_row(subclade) 252 positions[clade] = ((positions[clade.clades[0]] + 253 positions[clade.clades[-1]]) // 2) 254 255 calc_row(tree.root) 256 return positions 257 258 col_positions = get_col_positions(tree) 259 row_positions = get_row_positions(tree) 260 char_matrix = [[' ' for x in range(drawing_width)] 261 for y in range(drawing_height)] 262 263 def draw_clade(clade, startcol): 264 thiscol = col_positions[clade] 265 thisrow = row_positions[clade] 266 # Draw a horizontal line 267 for col in range(startcol, thiscol): 268 char_matrix[thisrow][col] = '_' 269 if clade.clades: 270 # Draw a vertical line 271 toprow = row_positions[clade.clades[0]] 272 botrow = row_positions[clade.clades[-1]] 273 for row in range(toprow + 1, botrow + 1): 274 char_matrix[row][thiscol] = '|' 275 # NB: Short terminal branches need something to stop rstrip() 276 if (col_positions[clade.clades[0]] - thiscol) < 2: 277 char_matrix[toprow][thiscol] = ',' 278 # Draw descendents 279 for child in clade: 280 draw_clade(child, thiscol + 1) 281 282 draw_clade(tree.root, 0) 283 # Print the complete drawing 284 for idx, row in enumerate(char_matrix): 285 line = ''.join(row).rstrip() 286 # Add labels for terminal taxa in the right margin 287 if idx % 2 == 0: 288 line += ' ' + str(taxa[idx // 2]) 289 file.write(line + '\n') 290 file.write('\n') 291 292
293 -def draw(tree, label_func=str, do_show=True, show_confidence=True, 294 # For power users 295 axes=None, branch_labels=None, label_colors=None, *args, **kwargs):
296 """Plot the given tree using matplotlib (or pylab). 297 298 The graphic is a rooted tree, drawn with roughly the same algorithm as 299 draw_ascii. 300 301 Additional keyword arguments passed into this function are used as pyplot 302 options. The input format should be in the form of: 303 pyplot_option_name=(tuple), pyplot_option_name=(tuple, dict), or 304 pyplot_option_name=(dict). 305 306 Example using the pyplot options 'axhspan' and 'axvline': 307 308 >>> Phylo.draw(tree, axhspan=((0.25, 7.75), {'facecolor':'0.5'}), 309 ... axvline={'x':'0', 'ymin':'0', 'ymax':'1'}) 310 311 Visual aspects of the plot can also be modified using pyplot's own functions 312 and objects (via pylab or matplotlib). In particular, the pyplot.rcParams 313 object can be used to scale the font size (rcParams["font.size"]) and line 314 width (rcParams["lines.linewidth"]). 315 316 :Parameters: 317 label_func : callable 318 A function to extract a label from a node. By default this is str(), 319 but you can use a different function to select another string 320 associated with each node. If this function returns None for a node, 321 no label will be shown for that node. 322 do_show : bool 323 Whether to show() the plot automatically. 324 show_confidence : bool 325 Whether to display confidence values, if present on the tree. 326 axes : matplotlib/pylab axes 327 If a valid matplotlib.axes.Axes instance, the phylogram is plotted 328 in that Axes. By default (None), a new figure is created. 329 branch_labels : dict or callable 330 A mapping of each clade to the label that will be shown along the 331 branch leading to it. By default this is the confidence value(s) of 332 the clade, taken from the ``confidence`` attribute, and can be 333 easily toggled off with this function's ``show_confidence`` option. 334 But if you would like to alter the formatting of confidence values, 335 or label the branches with something other than confidence, then use 336 this option. 337 label_colors : dict or callable 338 A function or a dictionary specifying the color of the tip label. 339 If the tip label can't be found in the dict or label_colors is 340 None, the label will be shown in black. 341 342 """ 343 try: 344 import matplotlib.pyplot as plt 345 except ImportError: 346 try: 347 import pylab as plt 348 except ImportError: 349 raise MissingPythonDependencyError( 350 "Install matplotlib or pylab if you want to use draw.") 351 352 import matplotlib.collections as mpcollections 353 354 # Arrays that store lines for the plot of clades 355 horizontal_linecollections = [] 356 vertical_linecollections = [] 357 358 # Options for displaying branch labels / confidence 359 def conf2str(conf): 360 if int(conf) == conf: 361 return str(int(conf)) 362 return str(conf)
363 if not branch_labels: 364 if show_confidence: 365 def format_branch_label(clade): 366 if hasattr(clade, 'confidences'): 367 # phyloXML supports multiple confidences 368 return '/'.join(conf2str(cnf.value) 369 for cnf in clade.confidences) 370 if clade.confidence: 371 return conf2str(clade.confidence) 372 return None 373 else: 374 def format_branch_label(clade): 375 return None 376 elif isinstance(branch_labels, dict): 377 def format_branch_label(clade): 378 return branch_labels.get(clade) 379 else: 380 assert callable(branch_labels), \ 381 "branch_labels must be either a dict or a callable (function)" 382 format_branch_label = branch_labels 383 384 # options for displaying label colors. 385 if label_colors: 386 if callable(label_colors): 387 def get_label_color(label): 388 return label_colors(label) 389 else: 390 # label_colors is presumed to be a dict 391 def get_label_color(label): 392 return label_colors.get(label, 'black') 393 else: 394 def get_label_color(label): 395 # if label_colors is not specified, use black 396 return 'black' 397 398 # Layout 399 400 def get_x_positions(tree): 401 """Create a mapping of each clade to its horizontal position. 402 403 Dict of {clade: x-coord} 404 """ 405 depths = tree.depths() 406 # If there are no branch lengths, assume unit branch lengths 407 if not max(depths.values()): 408 depths = tree.depths(unit_branch_lengths=True) 409 return depths 410 411 def get_y_positions(tree): 412 """Create a mapping of each clade to its vertical position. 413 414 Dict of {clade: y-coord}. 415 Coordinates are negative, and integers for tips. 416 """ 417 maxheight = tree.count_terminals() 418 # Rows are defined by the tips 419 heights = dict((tip, maxheight - i) 420 for i, tip in enumerate(reversed(tree.get_terminals()))) 421 422 # Internal nodes: place at midpoint of children 423 def calc_row(clade): 424 for subclade in clade: 425 if subclade not in heights: 426 calc_row(subclade) 427 # Closure over heights 428 heights[clade] = (heights[clade.clades[0]] + 429 heights[clade.clades[-1]]) / 2.0 430 431 if tree.root.clades: 432 calc_row(tree.root) 433 return heights 434 435 x_posns = get_x_positions(tree) 436 y_posns = get_y_positions(tree) 437 # The function draw_clade closes over the axes object 438 if axes is None: 439 fig = plt.figure() 440 axes = fig.add_subplot(1, 1, 1) 441 elif not isinstance(axes, plt.matplotlib.axes.Axes): 442 raise ValueError("Invalid argument for axes: %s" % axes) 443 444 def draw_clade_lines(use_linecollection=False, orientation='horizontal', 445 y_here=0, x_start=0, x_here=0, y_bot=0, y_top=0, 446 color='black', lw='.1'): 447 """Create a line with or without a line collection object. 448 449 Graphical formatting of the lines representing clades in the plot can be 450 customized by altering this function. 451 """ 452 if not use_linecollection and orientation == 'horizontal': 453 axes.hlines(y_here, x_start, x_here, color=color, lw=lw) 454 elif use_linecollection and orientation == 'horizontal': 455 horizontal_linecollections.append(mpcollections.LineCollection( 456 [[(x_start, y_here), (x_here, y_here)]], color=color, lw=lw),) 457 elif not use_linecollection and orientation == 'vertical': 458 axes.vlines(x_here, y_bot, y_top, color=color) 459 elif use_linecollection and orientation == 'vertical': 460 vertical_linecollections.append(mpcollections.LineCollection( 461 [[(x_here, y_bot), (x_here, y_top)]], color=color, lw=lw),) 462 463 def draw_clade(clade, x_start, color, lw): 464 """Recursively draw a tree, down from the given clade.""" 465 x_here = x_posns[clade] 466 y_here = y_posns[clade] 467 # phyloXML-only graphics annotations 468 if hasattr(clade, 'color') and clade.color is not None: 469 color = clade.color.to_hex() 470 if hasattr(clade, 'width') and clade.width is not None: 471 lw = clade.width * plt.rcParams['lines.linewidth'] 472 # Draw a horizontal line from start to here 473 draw_clade_lines(use_linecollection=True, orientation='horizontal', 474 y_here=y_here, x_start=x_start, x_here=x_here, color=color, lw=lw) 475 # Add node/taxon labels 476 label = label_func(clade) 477 if label not in (None, clade.__class__.__name__): 478 axes.text(x_here, y_here, ' %s' % 479 label, verticalalignment='center', 480 color=get_label_color(label)) 481 # Add label above the branch (optional) 482 conf_label = format_branch_label(clade) 483 if conf_label: 484 axes.text(0.5 * (x_start + x_here), y_here, conf_label, 485 fontsize='small', horizontalalignment='center') 486 if clade.clades: 487 # Draw a vertical line connecting all children 488 y_top = y_posns[clade.clades[0]] 489 y_bot = y_posns[clade.clades[-1]] 490 # Only apply widths to horizontal lines, like Archaeopteryx 491 draw_clade_lines(use_linecollection=True, orientation='vertical', 492 x_here=x_here, y_bot=y_bot, y_top=y_top, color=color, lw=lw) 493 # Draw descendents 494 for child in clade: 495 draw_clade(child, x_here, color, lw) 496 497 draw_clade(tree.root, 0, 'k', plt.rcParams['lines.linewidth']) 498 499 # If line collections were used to create clade lines, here they are added 500 # to the pyplot plot. 501 for i in horizontal_linecollections: 502 axes.add_collection(i) 503 for i in vertical_linecollections: 504 axes.add_collection(i) 505 506 # Aesthetics 507 508 if hasattr(tree, 'name') and tree.name: 509 axes.set_title(tree.name) 510 axes.set_xlabel('branch length') 511 axes.set_ylabel('taxa') 512 # Add margins around the tree to prevent overlapping the axes 513 xmax = max(x_posns.values()) 514 axes.set_xlim(-0.05 * xmax, 1.25 * xmax) 515 # Also invert the y-axis (origin at the top) 516 # Add a small vertical margin, but avoid including 0 and N+1 on the y axis 517 axes.set_ylim(max(y_posns.values()) + 0.8, 0.2) 518 519 # Parse and process key word arguments as pyplot options 520 for key, value in kwargs.items(): 521 try: 522 # Check that the pyplot option input is iterable, as required 523 [i for i in value] 524 except TypeError: 525 raise ValueError('Keyword argument "%s=%s" is not in the format ' 526 'pyplot_option_name=(tuple), pyplot_option_name=(tuple, dict),' 527 ' or pyplot_option_name=(dict) ' 528 % (key, value)) 529 if isinstance(value, dict): 530 getattr(plt, str(key))(**dict(value)) 531 elif not (isinstance(value[0], tuple)): 532 getattr(plt, str(key))(*value) 533 elif (isinstance(value[0], tuple)): 534 getattr(plt, str(key))(*value[0], **dict(value[1])) 535 536 if do_show: 537 plt.show() 538