Package Bio :: Package Restriction :: Module PrintFormat
[hide private]
[frames] | no frames]

Source Code for Module Bio.Restriction.PrintFormat

  1  #!/usr/bin/env python 
  2  # 
  3  #      Restriction Analysis Libraries. 
  4  #      Copyright (C) 2004. Frederic Sohm. 
  5  # 
  6  # This code is part of the Biopython distribution and governed by its 
  7  # license.  Please see the LICENSE file that should have been included 
  8  # as part of this package. 
  9  # 
 10  r"""Print the results of restriction enzyme analysis. 
 11   
 12  PrintFormat prints the results from restriction analysis in 3 different 
 13  format: list, column or map. 
 14   
 15  The easiest way to use it is: 
 16   
 17      >>> from Bio.Restriction.PrintFormat import PrintFormat 
 18      >>> from Bio.Restriction.Restriction import AllEnzymes 
 19      >>> from Bio import Entrez 
 20      >>> from Bio import SeqIO 
 21      >>> handle = Entrez.efetch(db="nucleotide", rettype="fasta", id="SYNPBR322") 
 22      >>> pBR322 = SeqIO.read(handle, "fasta") 
 23      >>> handle.close() 
 24      >>> dct = AllEnzymes.search(pBR322.seq) 
 25      >>> new = PrintFormat() 
 26      >>> new.print_that(dct, 'My pBR322 analysis:\n', 'No site:\n') 
 27      My pBR322 analysis: 
 28      AasI       :  2169, 2582. 
 29      AatII      :  4289. 
 30      ... 
 31      ZraI       :  4287. 
 32      ZrmI       :  3847. 
 33      No site: 
 34      AarI      AatI      Acc65I    AcsI      AcvI      AdeI      AflII     AgeI 
 35      ... 
 36      Vha464I   XapI      XbaI      XcmI      XhoI      XmaCI     XmaI      XmaJI 
 37      Zsp2I 
 38      >>> new.sequence = pBR322.seq 
 39      >>> new.print_as("map") 
 40      >>> new.print_that(dct) 
 41      ... 
 42   
 43  Some of the methods of PrintFormat are meant to be overridden by derived 
 44  class. 
 45  """ 
 46   
 47  from __future__ import print_function 
 48   
 49  import re 
 50   
 51  from Bio._py3k import range 
 52   
 53  from Bio.Restriction import RanaConfig as RanaConf 
 54   
 55   
56 -class PrintFormat(object):
57 """PrintFormat allow the printing of results of restriction analysis.""" 58 59 ConsoleWidth = RanaConf.ConsoleWidth 60 NameWidth = RanaConf.NameWidth 61 MaxSize = RanaConf.MaxSize 62 Cmodulo = ConsoleWidth % NameWidth 63 PrefWidth = ConsoleWidth - Cmodulo 64 Indent = RanaConf.Indent 65 linesize = PrefWidth - NameWidth 66
67 - def __init__(self):
68 """PrintFormat() -> new PrintFormat Instance""" 69 pass
70
71 - def print_as(self, what='list'):
72 """PF.print_as([what='list']) -> print the results as specified. 73 74 Valid format are: 75 'list' -> alphabetical order 76 'number' -> number of sites in the sequence 77 'map' -> a map representation of the sequence with the sites. 78 79 If you want more flexibility over-ride the virtual method make_format. 80 """ 81 if what == 'map': 82 self.make_format = self._make_map 83 elif what == 'number': 84 self.make_format = self._make_number 85 else: 86 self.make_format = self._make_list 87 88 return
89
90 - def print_that(self, dct, title='', s1=''):
91 """PF.print_that(dct, [title[, s1]]) -> Print dct nicely formatted. 92 93 Arguments: 94 - dct is a dictionary as returned by a RestrictionBatch.search() 95 - title is the title of the map. 96 It must be a formatted string, i.e. you must include the line break. 97 - s1 is the title separating the list of enzymes that have sites from 98 those without sites. 99 - s1 must be a formatted string as well. 100 101 The format of print_that is a list.""" 102 if not dct: 103 dct = self.results 104 ls, nc = [], [] 105 for k, v in dct.items(): 106 if v: 107 ls.append((k, v)) 108 else: 109 nc.append(k) 110 print(self.make_format(ls, title, nc, s1)) 111 return
112
113 - def make_format(self, cut=(), title='', nc=(), s1=''):
114 """PF.make_format(cut, nc, title, s) -> string 115 116 Virtual method. 117 Here to be pointed to one of the _make_* methods. 118 You can as well create a new method and point make_format to it. 119 """ 120 return self._make_list(cut, title, nc, s1)
121 122 # _make_* methods to be used with the virtual method make_format 123
124 - def _make_list(self, ls, title, nc, s1):
125 """PF._make_number(ls,title, nc,s1) -> string. 126 127 return a string of form:: 128 129 title. 130 131 enzyme1 : position1, position2. 132 enzyme2 : position1, position2, position3. 133 134 Arguments: 135 - ls is a tuple or list of cutting enzymes. 136 - title is the title. 137 - nc is a tuple or list of non cutting enzymes. 138 - s1 is the sentence before the non cutting enzymes. 139 """ 140 return self._make_list_only(ls, title) + self._make_nocut_only(nc, s1)
141
142 - def _make_map(self, ls, title, nc, s1):
143 """PF._make_number(ls,title, nc,s1) -> string. 144 145 return a string of form:: 146 147 | title. 148 | 149 | enzyme1, position 150 | | 151 | AAAAAAAAAAAAAAAAAAAAA... 152 | ||||||||||||||||||||| 153 | TTTTTTTTTTTTTTTTTTTTT... 154 155 Arguments: 156 - ls is a list of cutting enzymes. 157 - title is the title. 158 - nc is a list of non cutting enzymes. 159 - s1 is the sentence before the non cutting enzymes. 160 """ 161 return self._make_map_only(ls, title) + self._make_nocut_only(nc, s1)
162
163 - def _make_number(self, ls, title, nc, s1):
164 """PF._make_number(ls,title, nc,s1) -> string. 165 166 Returns a string in the form:: 167 168 title. 169 170 enzyme which cut 1 time: 171 172 enzyme1 : position1. 173 174 enzyme which cut 2 times: 175 176 enzyme2 : position1, position2. 177 ... 178 179 Arguments: 180 - ls is a list of cutting enzymes. 181 - title is the title. 182 - nc is a list of non cutting enzymes. 183 - s1 is the sentence before the non cutting enzymes. 184 """ 185 return self._make_number_only(ls, title) + self._make_nocut_only(nc, s1)
186
187 - def _make_nocut(self, ls, title, nc, s1):
188 """PF._make_nocut(ls,title, nc,s1) -> string. 189 190 return a formatted string of the non cutting enzymes. 191 192 ls is a list of cutting enzymes -> will not be used. 193 Here for compatibility with make_format. 194 195 Arguments: 196 - title is the title. 197 - nc is a list of non cutting enzymes. 198 - s1 is the sentence before the non cutting enzymes. 199 """ 200 return title + self._make_nocut_only(nc, s1)
201
202 - def _make_nocut_only(self, nc, s1, ls=(), title=''):
203 """PF._make_nocut_only(nc, s1) -> string. 204 205 return a formatted string of the non cutting enzymes. 206 207 Arguments: 208 - nc is a tuple or list of non cutting enzymes. 209 - s1 is the sentence before the non cutting enzymes. 210 """ 211 if not nc: 212 return s1 213 st = '' 214 stringsite = s1 or '\n Enzymes which do not cut the sequence.\n\n' 215 Join = ''.join 216 for key in sorted(nc): 217 st = Join((st, str.ljust(str(key), self.NameWidth))) 218 if len(st) > self.linesize: 219 stringsite = Join((stringsite, st, '\n')) 220 st = '' 221 stringsite = Join((stringsite, st, '\n')) 222 return stringsite
223
224 - def _make_list_only(self, ls, title, nc=(), s1=''):
225 """PF._make_list_only(ls, title) -> string. 226 227 return a string of form:: 228 229 title. 230 231 enzyme1 : position1, position2. 232 enzyme2 : position1, position2, position3. 233 ... 234 235 Arguments: 236 - ls is a tuple or list of results. 237 - title is a string. 238 - Non cutting enzymes are not included. 239 """ 240 if not ls: 241 return title 242 return self.__next_section(ls, title)
243
244 - def _make_number_only(self, ls, title, nc=(), s1=''):
245 """PF._make_number_only(ls, title) -> string. 246 247 return a string of form:: 248 249 title. 250 251 enzyme which cut 1 time: 252 253 enzyme1 : position1. 254 255 enzyme which cut 2 times: 256 257 enzyme2 : position1, position2. 258 ... 259 260 Arguments: 261 - ls is a list of results. 262 - title is a string. 263 - Non cutting enzymes are not included. 264 """ 265 if not ls: 266 return title 267 # TODO: Use key to sort! 268 ls.sort(lambda x, y: cmp(len(x[1]), len(y[1]))) 269 iterator = iter(ls) 270 cur_len = 1 271 new_sect = [] 272 for name, sites in iterator: 273 l = len(sites) 274 if l > cur_len: 275 title += "\n\nenzymes which cut %i times :\n\n" % cur_len 276 title = self.__next_section(new_sect, title) 277 new_sect, cur_len = [(name, sites)], l 278 continue 279 new_sect.append((name, sites)) 280 title += "\n\nenzymes which cut %i times :\n\n" % cur_len 281 return self.__next_section(new_sect, title)
282
283 - def _make_map_only(self, ls, title, nc=(), s1=''):
284 """PF._make_map_only(ls, title) -> string. 285 286 return a string of form:: 287 288 | title. 289 | 290 | enzyme1, position 291 | | 292 | AAAAAAAAAAAAAAAAAAAAA... 293 | ||||||||||||||||||||| 294 | TTTTTTTTTTTTTTTTTTTTT... 295 296 Arguments: 297 - ls is a list of results. 298 - title is a string. 299 - Non cutting enzymes are not included. 300 """ 301 if not ls: 302 return title 303 resultKeys = sorted(str(x) for x, y in ls) 304 map = title or '' 305 enzymemap = {} 306 for (enzyme, cut) in ls: 307 for c in cut: 308 if c in enzymemap: 309 enzymemap[c].append(str(enzyme)) 310 else: 311 enzymemap[c] = [str(enzyme)] 312 mapping = sorted(enzymemap.keys()) 313 cutloc = {} 314 x, counter, length = 0, 0, len(self.sequence) 315 for x in range(60, length, 60): 316 counter = x - 60 317 l = [] 318 cutloc[counter] = l 319 remaining = [] 320 for key in mapping: 321 if key <= x: 322 l.append(key) 323 else: 324 remaining.append(key) 325 mapping = remaining 326 cutloc[x] = mapping 327 sequence = str(self.sequence) 328 revsequence = str(self.sequence.complement()) 329 a = '|' 330 base, counter = 0, 0 331 emptyline = ' ' * 60 332 Join = ''.join 333 for base in range(60, length, 60): 334 counter = base - 60 335 line = emptyline 336 for key in cutloc[counter]: 337 s = '' 338 if key == base: 339 for n in enzymemap[key]: 340 s = ' '.join((s, n)) 341 l = line[0:59] 342 lineo = Join((l, str(key), s, '\n')) 343 line2 = Join((l, a, '\n')) 344 linetot = Join((lineo, line2)) 345 map = Join((map, linetot)) 346 break 347 for n in enzymemap[key]: 348 s = ' '.join((s, n)) 349 k = key % 60 350 lineo = Join((line[0:(k - 1)], str(key), s, '\n')) 351 line = Join((line[0:(k - 1)], a, line[k:])) 352 line2 = Join((line[0:(k - 1)], a, line[k:], '\n')) 353 linetot = Join((lineo, line2)) 354 map = Join((map, linetot)) 355 mapunit = '\n'.join((sequence[counter: base], a * 60, 356 revsequence[counter: base], 357 Join((str.ljust(str(counter + 1), 15), ' ' * 30, 358 str.rjust(str(base), 15), '\n\n')) 359 )) 360 map = Join((map, mapunit)) 361 line = ' ' * 60 362 for key in cutloc[base]: 363 s = '' 364 if key == length: 365 for n in enzymemap[key]: 366 s = Join((s, ' ', n)) 367 l = line[0:(length - 1)] 368 lineo = Join((l, str(key), s, '\n')) 369 line2 = Join((l, a, '\n')) 370 linetot = Join((lineo, line2)) 371 map = Join((map, linetot)) 372 break 373 for n in enzymemap[key]: 374 s = Join((s, ' ', n)) 375 k = key % 60 376 lineo = Join((line[0:(k - 1)], str(key), s, '\n')) 377 line = Join((line[0:(k - 1)], a, line[k:])) 378 line2 = Join((line[0:(k - 1)], a, line[k:], '\n')) 379 linetot = Join((lineo, line2)) 380 map = Join((map, linetot)) 381 mapunit = '' 382 mapunit = Join((sequence[base: length], '\n')) 383 mapunit = Join((mapunit, a * (length - base), '\n')) 384 mapunit = Join((mapunit, revsequence[base:length], '\n')) 385 mapunit = Join((mapunit, Join((str.ljust(str(base + 1), 15), ' ' * ( 386 length - base - 30), str.rjust(str(length), 15), 387 '\n\n')))) 388 map = Join((map, mapunit)) 389 return map
390 391 # private method to do lists: 392
393 - def __next_section(self, ls, into):
394 """FP.__next_section(ls, into) -> string. 395 396 Arguments: 397 - ls is a tuple/list of tuple (string, [int, int]). 398 - into is a string to which the formatted ls will be added. 399 400 Format ls as a string of lines: 401 The form is:: 402 403 enzyme1 : position1. 404 enzyme2 : position2, position3. 405 406 then add the formatted ls to tot 407 return tot. 408 """ 409 indentation = '\n' + (self.NameWidth + self.Indent) * ' ' 410 linesize = self.linesize - self.MaxSize 411 pat = re.compile("([\w,\s()]){1,%i}[,\.]" % linesize) 412 several, Join = '', ''.join 413 for name, sites in sorted(ls): 414 stringsite = '' 415 l = Join((', '.join(str(site) for site in sites), '.')) 416 if len(l) > linesize: 417 # 418 # cut where appropriate and add the indentation 419 # 420 l = [x.group() for x in re.finditer(pat, l)] 421 stringsite = indentation.join(l) 422 else: 423 stringsite = l 424 into = Join((into, 425 str(name).ljust(self.NameWidth), ' : ', stringsite, '\n')) 426 return into
427