Package Bio :: Package Restriction :: Module PrintFormat
[hide private]
[frames] | no frames]

Source Code for Module Bio.Restriction.PrintFormat

  1  #!/usr/bin/env python 
  2  # 
  3  #      Restriction Analysis Libraries. 
  4  #      Copyright (C) 2004. Frederic Sohm. 
  5  # 
  6  # This code is part of the Biopython distribution and governed by its 
  7  # license.  Please see the LICENSE file that should have been included 
  8  # as part of this package. 
  9  # 
 10  r"""Print the results of restriction enzyme analysis. 
 11   
 12  PrintFormat prints the results from restriction analysis in 3 different 
 13  format: list, column or map. 
 14   
 15  The easiest way to use it is: 
 16   
 17      >>> from Bio.Restriction.PrintFormat import PrintFormat 
 18      >>> from Bio.Restriction.Restriction import AllEnzymes 
 19      >>> from Bio import Entrez 
 20      >>> from Bio import SeqIO 
 21      >>> handle = Entrez.efetch(db="nucleotide", rettype="fasta", id="SYNPBR322") 
 22      >>> pBR322 = SeqIO.read(handle, "fasta") 
 23      >>> handle.close() 
 24      >>> dct = AllEnzymes.search(pBR322.seq) 
 25      >>> new = PrintFormat() 
 26      >>> new.print_that(dct, 'My pBR322 analysis:\n', 'No site:\n') 
 27      My pBR322 analysis: 
 28      AasI       :  2169, 2582. 
 29      AatII      :  4289. 
 30      ... 
 31      ZraI       :  4287. 
 32      ZrmI       :  3847. 
 33      No site: 
 34      AarI      AatI      Acc65I    AcsI      AcvI      AdeI      AflII     AgeI 
 35      ... 
 36      Vha464I   XapI      XbaI      XcmI      XhoI      XmaCI     XmaI      XmaJI 
 37      Zsp2I 
 38      >>> new.sequence = pBR322.seq 
 39      >>> new.print_as("map") 
 40      >>> new.print_that(dct) 
 41      ... 
 42   
 43  Some of the methods of PrintFormat are meant to be overridden by derived 
 44  class. 
 45  """ 
 46   
 47  from __future__ import print_function 
 48   
 49  import re 
 50   
 51  from Bio._py3k import range 
 52   
 53  from Bio.Restriction import RanaConfig as RanaConf 
 54   
 55   
56 -class PrintFormat(object):
57 """PrintFormat allow the printing of results of restriction analysis.""" 58 59 ConsoleWidth = RanaConf.ConsoleWidth 60 NameWidth = RanaConf.NameWidth 61 MaxSize = RanaConf.MaxSize 62 Cmodulo = ConsoleWidth % NameWidth 63 PrefWidth = ConsoleWidth - Cmodulo 64 Indent = RanaConf.Indent 65 linesize = PrefWidth - NameWidth 66
67 - def __init__(self):
68 """PrintFormat() -> new PrintFormat Instance""" 69 pass
70
71 - def print_as(self, what='list'):
72 """PF.print_as([what='list']) -> print the results as specified. 73 74 Valid format are: 75 'list' -> alphabetical order 76 'number' -> number of sites in the sequence 77 'map' -> a map representation of the sequence with the sites. 78 79 If you want more flexibility over-ride the virtual method make_format. 80 """ 81 if what == 'map': 82 self.make_format = self._make_map 83 elif what == 'number': 84 self.make_format = self._make_number 85 else: 86 self.make_format = self._make_list 87 88 return
89
90 - def format_output(self, dct, title='', s1=''):
91 """PF.print_that(dct, [title[, s1]]) -> string nicely formatted. 92 93 Arguments: 94 - dct is a dictionary as returned by a RestrictionBatch.search() 95 - title is the title of the map. 96 It must be a formatted string, i.e. you must include the line break. 97 - s1 is the title separating the list of enzymes that have sites from 98 those without sites. 99 - s1 must be a formatted string as well. 100 101 The format of print_that is a list.""" 102 if not dct: 103 dct = self.results 104 ls, nc = [], [] 105 for k, v in dct.items(): 106 if v: 107 ls.append((k, v)) 108 else: 109 nc.append(k) 110 return self.make_format(ls, title, nc, s1)
111
112 - def print_that(self, dct, title='', s1=''):
113 """PF.print_that(dct, [title[, s1]]) -> string nicely formatted. 114 115 Arguments: 116 - dct is a dictionary as returned by a RestrictionBatch.search() 117 - title is the title of the map. 118 It must be a formatted string, i.e. you must include the line break. 119 - s1 is the title separating the list of enzymes that have sites from 120 those without sites. 121 - s1 must be a formatted string as well. 122 123 This method prints the output of A.format_output() and it is here 124 for backwards compatibility.""" 125 print(format_output(dct, title, s1)) 126 return
127
128 - def make_format(self, cut=(), title='', nc=(), s1=''):
129 """PF.make_format(cut, nc, title, s) -> string 130 131 Virtual method. 132 Here to be pointed to one of the _make_* methods. 133 You can as well create a new method and point make_format to it. 134 """ 135 return self._make_list(cut, title, nc, s1)
136 137 # _make_* methods to be used with the virtual method make_format 138
139 - def _make_list(self, ls, title, nc, s1):
140 """PF._make_number(ls,title, nc,s1) -> string. 141 142 return a string of form:: 143 144 title. 145 146 enzyme1 : position1, position2. 147 enzyme2 : position1, position2, position3. 148 149 Arguments: 150 - ls is a tuple or list of cutting enzymes. 151 - title is the title. 152 - nc is a tuple or list of non cutting enzymes. 153 - s1 is the sentence before the non cutting enzymes. 154 """ 155 return self._make_list_only(ls, title) + self._make_nocut_only(nc, s1)
156
157 - def _make_map(self, ls, title, nc, s1):
158 """PF._make_number(ls,title, nc,s1) -> string. 159 160 return a string of form:: 161 162 | title. 163 | 164 | enzyme1, position 165 | | 166 | AAAAAAAAAAAAAAAAAAAAA... 167 | ||||||||||||||||||||| 168 | TTTTTTTTTTTTTTTTTTTTT... 169 170 Arguments: 171 - ls is a list of cutting enzymes. 172 - title is the title. 173 - nc is a list of non cutting enzymes. 174 - s1 is the sentence before the non cutting enzymes. 175 """ 176 return self._make_map_only(ls, title) + self._make_nocut_only(nc, s1)
177
178 - def _make_number(self, ls, title, nc, s1):
179 """PF._make_number(ls,title, nc,s1) -> string. 180 181 Returns a string in the form:: 182 183 title. 184 185 enzyme which cut 1 time: 186 187 enzyme1 : position1. 188 189 enzyme which cut 2 times: 190 191 enzyme2 : position1, position2. 192 ... 193 194 Arguments: 195 - ls is a list of cutting enzymes. 196 - title is the title. 197 - nc is a list of non cutting enzymes. 198 - s1 is the sentence before the non cutting enzymes. 199 """ 200 return self._make_number_only(ls, title) + self._make_nocut_only(nc, s1)
201
202 - def _make_nocut(self, ls, title, nc, s1):
203 """PF._make_nocut(ls,title, nc,s1) -> string. 204 205 return a formatted string of the non cutting enzymes. 206 207 ls is a list of cutting enzymes -> will not be used. 208 Here for compatibility with make_format. 209 210 Arguments: 211 - title is the title. 212 - nc is a list of non cutting enzymes. 213 - s1 is the sentence before the non cutting enzymes. 214 """ 215 return title + self._make_nocut_only(nc, s1)
216
217 - def _make_nocut_only(self, nc, s1, ls=(), title=''):
218 """PF._make_nocut_only(nc, s1) -> string. 219 220 return a formatted string of the non cutting enzymes. 221 222 Arguments: 223 - nc is a tuple or list of non cutting enzymes. 224 - s1 is the sentence before the non cutting enzymes. 225 """ 226 if not nc: 227 return s1 228 st = '' 229 stringsite = s1 or '\n Enzymes which do not cut the sequence.\n\n' 230 Join = ''.join 231 for key in sorted(nc): 232 st = Join((st, str.ljust(str(key), self.NameWidth))) 233 if len(st) > self.linesize: 234 stringsite = Join((stringsite, st, '\n')) 235 st = '' 236 stringsite = Join((stringsite, st, '\n')) 237 return stringsite
238
239 - def _make_list_only(self, ls, title, nc=(), s1=''):
240 """PF._make_list_only(ls, title) -> string. 241 242 return a string of form:: 243 244 title. 245 246 enzyme1 : position1, position2. 247 enzyme2 : position1, position2, position3. 248 ... 249 250 Arguments: 251 - ls is a tuple or list of results. 252 - title is a string. 253 - Non cutting enzymes are not included. 254 """ 255 if not ls: 256 return title 257 return self.__next_section(ls, title)
258
259 - def _make_number_only(self, ls, title, nc=(), s1=''):
260 """PF._make_number_only(ls, title) -> string. 261 262 return a string of form:: 263 264 title. 265 266 enzyme which cut 1 time: 267 268 enzyme1 : position1. 269 270 enzyme which cut 2 times: 271 272 enzyme2 : position1, position2. 273 ... 274 275 Arguments: 276 - ls is a list of results. 277 - title is a string. 278 - Non cutting enzymes are not included. 279 """ 280 if not ls: 281 return title 282 # TODO: Use key to sort! 283 ls.sort(lambda x, y: cmp(len(x[1]), len(y[1]))) 284 iterator = iter(ls) 285 cur_len = 1 286 new_sect = [] 287 for name, sites in iterator: 288 l = len(sites) 289 if l > cur_len: 290 title += "\n\nenzymes which cut %i times :\n\n" % cur_len 291 title = self.__next_section(new_sect, title) 292 new_sect, cur_len = [(name, sites)], l 293 continue 294 new_sect.append((name, sites)) 295 title += "\n\nenzymes which cut %i times :\n\n" % cur_len 296 return self.__next_section(new_sect, title)
297
298 - def _make_map_only(self, ls, title, nc=(), s1=''):
299 """PF._make_map_only(ls, title) -> string. 300 301 return a string of form:: 302 303 | title. 304 | 305 | enzyme1, position 306 | | 307 | AAAAAAAAAAAAAAAAAAAAA... 308 | ||||||||||||||||||||| 309 | TTTTTTTTTTTTTTTTTTTTT... 310 311 Arguments: 312 - ls is a list of results. 313 - title is a string. 314 - Non cutting enzymes are not included. 315 """ 316 if not ls: 317 return title 318 resultKeys = sorted(str(x) for x, y in ls) 319 map = title or '' 320 enzymemap = {} 321 for (enzyme, cut) in ls: 322 for c in cut: 323 if c in enzymemap: 324 enzymemap[c].append(str(enzyme)) 325 else: 326 enzymemap[c] = [str(enzyme)] 327 mapping = sorted(enzymemap.keys()) 328 cutloc = {} 329 x, counter, length = 0, 0, len(self.sequence) 330 for x in range(60, length, 60): 331 counter = x - 60 332 l = [] 333 cutloc[counter] = l 334 remaining = [] 335 for key in mapping: 336 if key <= x: 337 l.append(key) 338 else: 339 remaining.append(key) 340 mapping = remaining 341 cutloc[x] = mapping 342 sequence = str(self.sequence) 343 revsequence = str(self.sequence.complement()) 344 a = '|' 345 base, counter = 0, 0 346 emptyline = ' ' * 60 347 Join = ''.join 348 for base in range(60, length, 60): 349 counter = base - 60 350 line = emptyline 351 for key in cutloc[counter]: 352 s = '' 353 if key == base: 354 for n in enzymemap[key]: 355 s = ' '.join((s, n)) 356 l = line[0:59] 357 lineo = Join((l, str(key), s, '\n')) 358 line2 = Join((l, a, '\n')) 359 linetot = Join((lineo, line2)) 360 map = Join((map, linetot)) 361 break 362 for n in enzymemap[key]: 363 s = ' '.join((s, n)) 364 k = key % 60 365 lineo = Join((line[0:(k - 1)], str(key), s, '\n')) 366 line = Join((line[0:(k - 1)], a, line[k:])) 367 line2 = Join((line[0:(k - 1)], a, line[k:], '\n')) 368 linetot = Join((lineo, line2)) 369 map = Join((map, linetot)) 370 mapunit = '\n'.join((sequence[counter: base], a * 60, 371 revsequence[counter: base], 372 Join((str.ljust(str(counter + 1), 15), ' ' * 30, 373 str.rjust(str(base), 15), '\n\n')) 374 )) 375 map = Join((map, mapunit)) 376 line = ' ' * 60 377 for key in cutloc[base]: 378 s = '' 379 if key == length: 380 for n in enzymemap[key]: 381 s = Join((s, ' ', n)) 382 l = line[0:(length - 1)] 383 lineo = Join((l, str(key), s, '\n')) 384 line2 = Join((l, a, '\n')) 385 linetot = Join((lineo, line2)) 386 map = Join((map, linetot)) 387 break 388 for n in enzymemap[key]: 389 s = Join((s, ' ', n)) 390 k = key % 60 391 lineo = Join((line[0:(k - 1)], str(key), s, '\n')) 392 line = Join((line[0:(k - 1)], a, line[k:])) 393 line2 = Join((line[0:(k - 1)], a, line[k:], '\n')) 394 linetot = Join((lineo, line2)) 395 map = Join((map, linetot)) 396 mapunit = '' 397 mapunit = Join((sequence[base: length], '\n')) 398 mapunit = Join((mapunit, a * (length - base), '\n')) 399 mapunit = Join((mapunit, revsequence[base:length], '\n')) 400 mapunit = Join((mapunit, Join((str.ljust(str(base + 1), 15), ' ' * ( 401 length - base - 30), str.rjust(str(length), 15), 402 '\n\n')))) 403 map = Join((map, mapunit)) 404 return map
405 406 # private method to do lists: 407
408 - def __next_section(self, ls, into):
409 """FP.__next_section(ls, into) -> string. 410 411 Arguments: 412 - ls is a tuple/list of tuple (string, [int, int]). 413 - into is a string to which the formatted ls will be added. 414 415 Format ls as a string of lines: 416 The form is:: 417 418 enzyme1 : position1. 419 enzyme2 : position2, position3. 420 421 then add the formatted ls to tot 422 return tot. 423 """ 424 indentation = '\n' + (self.NameWidth + self.Indent) * ' ' 425 linesize = self.linesize - self.MaxSize 426 pat = re.compile("([\w,\s()]){1,%i}[,\.]" % linesize) 427 several, Join = '', ''.join 428 for name, sites in sorted(ls): 429 stringsite = '' 430 l = Join((', '.join(str(site) for site in sites), '.')) 431 if len(l) > linesize: 432 # 433 # cut where appropriate and add the indentation 434 # 435 l = [x.group() for x in re.finditer(pat, l)] 436 stringsite = indentation.join(l) 437 else: 438 stringsite = l 439 into = Join((into, 440 str(name).ljust(self.NameWidth), ' : ', stringsite, '\n')) 441 return into
442