Package Bio :: Package Graphics :: Package GenomeDiagram :: Module _Graph
[hide private]
[frames] | no frames]

Source Code for Module Bio.Graphics.GenomeDiagram._Graph

  1  # Copyright 2003-2008 by Leighton Pritchard.  All rights reserved. 
  2  # Revisions copyright 2008-2009 by Peter Cock. 
  3  # This code is part of the Biopython distribution and governed by its 
  4  # license.  Please see the LICENSE file that should have been included 
  5  # as part of this package. 
  6  # 
  7  # Contact:       Leighton Pritchard, Scottish Crop Research Institute, 
  8  #                Invergowrie, Dundee, Scotland, DD2 5DA, UK 
  9  #                L.Pritchard@scri.ac.uk 
 10  ################################################################################ 
 11   
 12  """ Graph module 
 13   
 14      Provides: 
 15   
 16      o GraphData - Contains data from which a graph will be drawn, and 
 17                      information about its presentation 
 18   
 19      For drawing capabilities, this module uses reportlab to draw and write 
 20      the diagram: 
 21   
 22      http://www.reportlab.com 
 23   
 24      For dealing with biological information, the package expects BioPython 
 25      objects: 
 26   
 27      http://www.biopython.org 
 28  """ 
 29   
 30  # ReportLab imports 
 31  from __future__ import print_function 
 32   
 33  from reportlab.lib import colors 
 34   
 35  from math import sqrt 
 36   
 37   
38 -class GraphData(object):
39 """ GraphData 40 41 Provides: 42 43 Methods: 44 45 o __init__(self, id=None, data=None, name=None, style='bar', 46 color=colors.lightgreen, altcolor=colors.darkseagreen) 47 Called on instantiation 48 49 o set_data(self, data) Load the object with data to be plotted 50 51 o get_data(self) Returns the data to be plotted as a list of 52 (position, value) tuples 53 54 o add_point(self, point) Add a single point to the data set 55 56 o quartiles(self) Returns a tuple of the data quartiles 57 58 o range(self) Returns a tuple of the base range covered by the graph 59 data 60 61 o mean(self) Returns a float of the mean data point value 62 63 o stdev(self) Returns the sample standard deviation of the data values 64 65 o __len__(self) Returns the length of sequence covered by the data 66 67 o __getitem__(self, index) Returns the value at the base specified, 68 or graph data in the base range 69 70 o __str__(self) Returns a formatted string describing the graph data 71 72 Attributes: 73 74 o id Unique identifier for the data 75 76 o data Dictionary of describing the data, keyed by position 77 78 o name String describing the data 79 80 o style String ('bar', 'heat', 'line') describing how to draw the data 81 82 o poscolor colors.Color for drawing high (some styles) or all 83 values 84 85 o negcolor colors.Color for drawing low values (some styles) 86 87 o linewidth Int, thickness to draw the line in 'line' styles 88 """
89 - def __init__(self, id=None, data=None, name=None, style='bar', 90 color=colors.lightgreen, altcolor=colors.darkseagreen, 91 center=None, colour=None, altcolour=None):
92 """__init__(self, id=None, data=None, name=None, style='bar', 93 color=colors.lightgreen, altcolor=colors.darkseagreen) 94 95 o id Unique ID for the graph 96 97 o data List of (position, value) tuples 98 99 o name String describing the graph 100 101 o style String describing the presentation style ('bar', 'line', 102 'heat') 103 104 o color colors.Color describing the color to draw all or the 105 'high' (some styles) values (overridden by backwards 106 compatible argument with UK spelling, colour). 107 108 o altcolor colors.Color describing the color to draw the 'low' 109 values (some styles only) (overridden by backwards 110 compatible argument with UK spelling, colour). 111 112 o center Value at which x-axis crosses y-axis. 113 114 """ 115 # Let the UK spelling (colour) override the USA spelling (color) 116 if colour is not None: 117 color = colour 118 if altcolour is not None: 119 altcolor = altcolour 120 121 self.id = id # Unique identifier for the graph 122 self.data = {} # holds values, keyed by sequence position 123 if data is not None: 124 self.set_data(data) 125 self.name = name # Descriptive string 126 127 # Attributes describing how the graph will be drawn 128 self.style = style # One of 'bar', 'heat' or 'line' 129 self.poscolor = color # Color to draw all, or 'high' values 130 self.negcolor = altcolor # Color to draw 'low' values 131 self.linewidth = 2 # linewidth to use in line graphs 132 self.center = center # value at which x-axis crosses y-axis
133
134 - def set_data(self, data):
135 """ set_data(self, data) 136 137 o data List of (position, value) tuples 138 139 Add data with a list of (position, value) tuples 140 """ 141 for (pos, val) in data: # Fill data dictionary 142 self.data[pos] = val
143
144 - def get_data(self):
145 """ get_data(self) -> [(int, float), (int, float), ...] 146 147 Return data as a list of sorted (position, value) tuples 148 """ 149 data = [] 150 for xval in self.data: 151 yval = self.data[xval] 152 data.append((xval, yval)) 153 data.sort() 154 return data
155
156 - def add_point(self, point):
157 """ add_point(self, point) 158 159 o point (position, value) tuple 160 161 Add a single point to the set of data 162 """ 163 pos, val = point 164 self.data[pos] = val
165
166 - def quartiles(self):
167 """ quartiles(self) -> (float, float, float, float, float) 168 169 Returns the (minimum, lowerQ, medianQ, upperQ, maximum) values as 170 a tuple 171 """ 172 data = sorted(self.data.values()) 173 datalen = len(data) 174 return(data[0], data[datalen // 4], data[datalen // 2], 175 data[3 * datalen // 4], data[-1])
176
177 - def range(self):
178 """ range(self) -> (int, int) 179 180 Returns the range of the data, i.e. its start and end points on 181 the genome as a (start, end) tuple 182 """ 183 positions = sorted(self.data) # i.e. dict keys 184 # Return first and last positions in graph 185 # print len(self.data) 186 return (positions[0], positions[-1])
187
188 - def mean(self):
189 """ mean(self) -> Float 190 191 Returns the mean value for the data points 192 """ 193 data = list(self.data.values()) 194 sum = 0. 195 for item in data: 196 sum += float(item) 197 return sum / len(data)
198
199 - def stdev(self):
200 """ stdev(self) -> Float 201 202 Returns the sample standard deviation for the data 203 """ 204 data = list(self.data.values()) 205 m = self.mean() 206 runtotal = 0. 207 for entry in data: 208 runtotal += float((entry - m) ** 2) 209 # This is sample standard deviation; population stdev would involve 210 # division by len(data), rather than len(data)-1 211 return sqrt(runtotal / (len(data) - 1))
212
213 - def __len__(self):
214 """ __len__(self) -> Int 215 216 Returns the number of points in the data set 217 """ 218 return len(self.data)
219
220 - def __getitem__(self, index):
221 """ __getitem__(self, index) -> Float or list of tuples 222 223 Given an integer representing position on the sequence 224 returns a float - the data value at the passed position. 225 226 If a slice, returns graph data from the region as a list or 227 (position, value) tuples. Slices with step are not supported. 228 229 Returns the data value at the passed position 230 """ 231 if isinstance(index, int): 232 return self.data[index] 233 elif isinstance(index, slice): 234 # TODO - Why does it treat the end points both as inclusive? 235 # This doesn't match Python norms does it? 236 low = index.start 237 high = index.stop 238 if index.step is not None and index.step != 1: 239 raise ValueError 240 outlist = [] 241 for pos in sorted(self.data): 242 if pos >= low and pos <= high: 243 outlist.append((pos, self.data[pos])) 244 return outlist 245 else: 246 raise TypeError("Need an integer or a slice")
247
248 - def __str__(self):
249 """ __str__(self) -> "" 250 251 Returns a string describing the graph data 252 """ 253 outstr = ["\nGraphData: %s, ID: %s" % (self.name, self.id)] 254 outstr.append("Number of points: %d" % len(self.data)) 255 outstr.append("Mean data value: %s" % self.mean()) 256 outstr.append("Sample SD: %.3f" % self.stdev()) 257 outstr.append("Minimum: %s\n1Q: %s\n2Q: %s\n3Q: %s\nMaximum: %s" % self.quartiles()) 258 outstr.append("Sequence Range: %s..%s" % self.range()) 259 return "\n".join(outstr)
260