Package Bio :: Package Restriction :: Module PrintFormat
[hide private]
[frames] | no frames]

Source Code for Module Bio.Restriction.PrintFormat

  1  #!/usr/bin/env python 
  2  # 
  3  #      Restriction Analysis Libraries. 
  4  #      Copyright (C) 2004. Frederic Sohm. 
  5  # 
  6  # This code is part of the Biopython distribution and governed by its 
  7  # license.  Please see the LICENSE file that should have been included 
  8  # as part of this package. 
  9  # 
 10  r"""Print the results of restriction enzyme analysis. 
 11   
 12  PrintFormat prints the results from restriction analysis in 3 different 
 13  format: list, column or map. 
 14   
 15  The easiest way to use it is: 
 16   
 17      >>> from Bio.Restriction.PrintFormat import PrintFormat 
 18      >>> from Bio.Restriction.Restriction import AllEnzymes 
 19      >>> from Bio import Entrez 
 20      >>> from Bio import SeqIO 
 21      >>> handle = Entrez.efetch(db="nucleotide", rettype="fasta", id="SYNPBR322") 
 22      >>> pBR322 = SeqIO.read(handle, "fasta") 
 23      >>> handle.close() 
 24      >>> dct = AllEnzymes.search(pBR322.seq) 
 25      >>> new = PrintFormat() 
 26      >>> new.print_that(dct, 'My pBR322 analysis:\n', 'No site:\n') 
 27      My pBR322 analysis: 
 28      AasI       :  2169, 2582. 
 29      AatII      :  4289. 
 30      ... 
 31      ZraI       :  4287. 
 32      ZrmI       :  3847. 
 33      No site: 
 34      AarI      AatI      Acc65I    AcsI      AcvI      AdeI      AflII     AgeI 
 35      ... 
 36      Vha464I   XapI      XbaI      XcmI      XhoI      XmaCI     XmaI      XmaJI 
 37      Zsp2I 
 38      >>> new.sequence = pBR322.seq 
 39      >>> new.print_as("map") 
 40      >>> new.print_that(dct) 
 41      ... 
 42   
 43  Some of the methods of PrintFormat are meant to be overridden by derived 
 44  class. 
 45  """ 
 46   
 47  from __future__ import print_function 
 48   
 49  import re 
 50   
 51  from Bio._py3k import range 
 52   
 53  from Bio.Restriction import RanaConfig as RanaConf 
 54   
 55   
56 -class PrintFormat(object):
57 """PrintFormat allow the printing of results of restriction analysis.""" 58 59 ConsoleWidth = RanaConf.ConsoleWidth 60 NameWidth = RanaConf.NameWidth 61 MaxSize = RanaConf.MaxSize 62 Cmodulo = ConsoleWidth % NameWidth 63 PrefWidth = ConsoleWidth - Cmodulo 64 Indent = RanaConf.Indent 65 linesize = PrefWidth - NameWidth 66
67 - def __init__(self):
68 """PrintFormat() -> new PrintFormat Instance""" 69 pass
70
71 - def print_as(self, what='list'):
72 """PF.print_as([what='list']) -> print the results as specified. 73 74 Valid format are: 75 'list' -> alphabetical order 76 'number' -> number of sites in the sequence 77 'map' -> a map representation of the sequence with the sites. 78 79 If you want more flexibility over-ride the virtual method make_format. 80 """ 81 if what == 'map': 82 self.make_format = self._make_map 83 elif what == 'number': 84 self.make_format = self._make_number 85 else: 86 self.make_format = self._make_list 87 88 return
89
90 - def format_output(self, dct, title='', s1=''):
91 """PF.print_that(dct, [title[, s1]]) -> string nicely formatted. 92 93 Arguments: 94 - dct is a dictionary as returned by a RestrictionBatch.search() 95 - title is the title of the map. 96 It must be a formatted string, i.e. you must include the line break. 97 - s1 is the title separating the list of enzymes that have sites from 98 those without sites. 99 - s1 must be a formatted string as well. 100 101 The format of print_that is a list. 102 """ 103 if not dct: 104 dct = self.results 105 ls, nc = [], [] 106 for k, v in dct.items(): 107 if v: 108 ls.append((k, v)) 109 else: 110 nc.append(k) 111 return self.make_format(ls, title, nc, s1)
112
113 - def print_that(self, dct, title='', s1=''):
114 """PF.print_that(dct, [title[, s1]]) -> string nicely formatted. 115 116 Arguments: 117 - dct is a dictionary as returned by a RestrictionBatch.search() 118 - title is the title of the map. 119 It must be a formatted string, i.e. you must include the line break. 120 - s1 is the title separating the list of enzymes that have sites from 121 those without sites. 122 - s1 must be a formatted string as well. 123 124 This method prints the output of A.format_output() and it is here 125 for backwards compatibility. 126 """ 127 print(self.format_output(dct, title, s1)) 128 return
129
130 - def make_format(self, cut=(), title='', nc=(), s1=''):
131 """PF.make_format(cut, nc, title, s) -> string 132 133 Virtual method. 134 Here to be pointed to one of the _make_* methods. 135 You can as well create a new method and point make_format to it. 136 """ 137 return self._make_list(cut, title, nc, s1)
138 139 # _make_* methods to be used with the virtual method make_format 140
141 - def _make_list(self, ls, title, nc, s1):
142 """PF._make_number(ls,title, nc,s1) -> string. 143 144 return a string of form:: 145 146 title. 147 148 enzyme1 : position1, position2. 149 enzyme2 : position1, position2, position3. 150 151 Arguments: 152 - ls is a tuple or list of cutting enzymes. 153 - title is the title. 154 - nc is a tuple or list of non cutting enzymes. 155 - s1 is the sentence before the non cutting enzymes. 156 157 """ 158 return self._make_list_only(ls, title) + self._make_nocut_only(nc, s1)
159
160 - def _make_map(self, ls, title, nc, s1):
161 """PF._make_number(ls,title, nc,s1) -> string. 162 163 return a string of form:: 164 165 | title. 166 | 167 | enzyme1, position 168 | | 169 | AAAAAAAAAAAAAAAAAAAAA... 170 | ||||||||||||||||||||| 171 | TTTTTTTTTTTTTTTTTTTTT... 172 173 Arguments: 174 - ls is a list of cutting enzymes. 175 - title is the title. 176 - nc is a list of non cutting enzymes. 177 - s1 is the sentence before the non cutting enzymes. 178 179 """ 180 return self._make_map_only(ls, title) + self._make_nocut_only(nc, s1)
181
182 - def _make_number(self, ls, title, nc, s1):
183 """PF._make_number(ls,title, nc,s1) -> string. 184 185 Returns a string in the form:: 186 187 title. 188 189 enzyme which cut 1 time: 190 191 enzyme1 : position1. 192 193 enzyme which cut 2 times: 194 195 enzyme2 : position1, position2. 196 ... 197 198 Arguments: 199 - ls is a list of cutting enzymes. 200 - title is the title. 201 - nc is a list of non cutting enzymes. 202 - s1 is the sentence before the non cutting enzymes. 203 204 """ 205 return self._make_number_only(ls, title) + self._make_nocut_only(nc, s1)
206
207 - def _make_nocut(self, ls, title, nc, s1):
208 """PF._make_nocut(ls,title, nc,s1) -> string. 209 210 return a formatted string of the non cutting enzymes. 211 212 ls is a list of cutting enzymes -> will not be used. 213 Here for compatibility with make_format. 214 215 Arguments: 216 - title is the title. 217 - nc is a list of non cutting enzymes. 218 - s1 is the sentence before the non cutting enzymes. 219 220 """ 221 return title + self._make_nocut_only(nc, s1)
222
223 - def _make_nocut_only(self, nc, s1, ls=(), title=''):
224 """PF._make_nocut_only(nc, s1) -> string. 225 226 return a formatted string of the non cutting enzymes. 227 228 Arguments: 229 - nc is a tuple or list of non cutting enzymes. 230 - s1 is the sentence before the non cutting enzymes. 231 232 """ 233 if not nc: 234 return s1 235 st = '' 236 stringsite = s1 or '\n Enzymes which do not cut the sequence.\n\n' 237 Join = ''.join 238 for key in sorted(nc): 239 st = Join((st, str.ljust(str(key), self.NameWidth))) 240 if len(st) > self.linesize: 241 stringsite = Join((stringsite, st, '\n')) 242 st = '' 243 stringsite = Join((stringsite, st, '\n')) 244 return stringsite
245
246 - def _make_list_only(self, ls, title, nc=(), s1=''):
247 """PF._make_list_only(ls, title) -> string. 248 249 return a string of form:: 250 251 title. 252 253 enzyme1 : position1, position2. 254 enzyme2 : position1, position2, position3. 255 ... 256 257 Arguments: 258 - ls is a tuple or list of results. 259 - title is a string. 260 - Non cutting enzymes are not included. 261 262 """ 263 if not ls: 264 return title 265 return self.__next_section(ls, title)
266
267 - def _make_number_only(self, ls, title, nc=(), s1=''):
268 """PF._make_number_only(ls, title) -> string. 269 270 return a string of form:: 271 272 title. 273 274 enzyme which cut 1 time: 275 276 enzyme1 : position1. 277 278 enzyme which cut 2 times: 279 280 enzyme2 : position1, position2. 281 ... 282 283 Arguments: 284 - ls is a list of results. 285 - title is a string. 286 - Non cutting enzymes are not included. 287 288 """ 289 if not ls: 290 return title 291 ls.sort(key=lambda x: len(x[1])) 292 iterator = iter(ls) 293 cur_len = 1 294 new_sect = [] 295 for name, sites in iterator: 296 l = len(sites) 297 if l > cur_len: 298 title += "\n\nenzymes which cut %i times :\n\n" % cur_len 299 title = self.__next_section(new_sect, title) 300 new_sect, cur_len = [(name, sites)], l 301 continue 302 new_sect.append((name, sites)) 303 title += "\n\nenzymes which cut %i times :\n\n" % cur_len 304 return self.__next_section(new_sect, title)
305
306 - def _make_map_only(self, ls, title, nc=(), s1=''):
307 """PF._make_map_only(ls, title) -> string. 308 309 return a string of form:: 310 311 | title. 312 | 313 | enzyme1, position 314 | | 315 | AAAAAAAAAAAAAAAAAAAAA... 316 | ||||||||||||||||||||| 317 | TTTTTTTTTTTTTTTTTTTTT... 318 319 Arguments: 320 - ls is a list of results. 321 - title is a string. 322 - Non cutting enzymes are not included. 323 324 """ 325 if not ls: 326 return title 327 resultKeys = sorted(str(x) for x, y in ls) 328 map = title or '' 329 enzymemap = {} 330 for (enzyme, cut) in ls: 331 for c in cut: 332 if c in enzymemap: 333 enzymemap[c].append(str(enzyme)) 334 else: 335 enzymemap[c] = [str(enzyme)] 336 mapping = sorted(enzymemap.keys()) 337 cutloc = {} 338 x, counter, length = 0, 0, len(self.sequence) 339 for x in range(60, length, 60): 340 counter = x - 60 341 l = [] 342 cutloc[counter] = l 343 remaining = [] 344 for key in mapping: 345 if key <= x: 346 l.append(key) 347 else: 348 remaining.append(key) 349 mapping = remaining 350 cutloc[x] = mapping 351 sequence = str(self.sequence) 352 revsequence = str(self.sequence.complement()) 353 a = '|' 354 base, counter = 0, 0 355 emptyline = ' ' * 60 356 Join = ''.join 357 for base in range(60, length, 60): 358 counter = base - 60 359 line = emptyline 360 for key in cutloc[counter]: 361 s = '' 362 if key == base: 363 for n in enzymemap[key]: 364 s = ' '.join((s, n)) 365 l = line[0:59] 366 lineo = Join((l, str(key), s, '\n')) 367 line2 = Join((l, a, '\n')) 368 linetot = Join((lineo, line2)) 369 map = Join((map, linetot)) 370 break 371 for n in enzymemap[key]: 372 s = ' '.join((s, n)) 373 k = key % 60 374 lineo = Join((line[0:(k - 1)], str(key), s, '\n')) 375 line = Join((line[0:(k - 1)], a, line[k:])) 376 line2 = Join((line[0:(k - 1)], a, line[k:], '\n')) 377 linetot = Join((lineo, line2)) 378 map = Join((map, linetot)) 379 mapunit = '\n'.join((sequence[counter: base], a * 60, 380 revsequence[counter: base], 381 Join((str.ljust(str(counter + 1), 15), ' ' * 30, 382 str.rjust(str(base), 15), '\n\n')) 383 )) 384 map = Join((map, mapunit)) 385 line = ' ' * 60 386 for key in cutloc[base]: 387 s = '' 388 if key == length: 389 for n in enzymemap[key]: 390 s = Join((s, ' ', n)) 391 l = line[0:(length - 1)] 392 lineo = Join((l, str(key), s, '\n')) 393 line2 = Join((l, a, '\n')) 394 linetot = Join((lineo, line2)) 395 map = Join((map, linetot)) 396 break 397 for n in enzymemap[key]: 398 s = Join((s, ' ', n)) 399 k = key % 60 400 lineo = Join((line[0:(k - 1)], str(key), s, '\n')) 401 line = Join((line[0:(k - 1)], a, line[k:])) 402 line2 = Join((line[0:(k - 1)], a, line[k:], '\n')) 403 linetot = Join((lineo, line2)) 404 map = Join((map, linetot)) 405 mapunit = '' 406 mapunit = Join((sequence[base: length], '\n')) 407 mapunit = Join((mapunit, a * (length - base), '\n')) 408 mapunit = Join((mapunit, revsequence[base:length], '\n')) 409 mapunit = Join((mapunit, Join((str.ljust(str(base + 1), 15), ' ' * ( 410 length - base - 30), str.rjust(str(length), 15), 411 '\n\n')))) 412 map = Join((map, mapunit)) 413 return map
414 415 # private method to do lists: 416
417 - def __next_section(self, ls, into):
418 """FP.__next_section(ls, into) -> string (PRIVATE). 419 420 Arguments: 421 - ls is a tuple/list of tuple (string, [int, int]). 422 - into is a string to which the formatted ls will be added. 423 424 Format ls as a string of lines: 425 The form is:: 426 427 enzyme1 : position1. 428 enzyme2 : position2, position3. 429 430 then add the formatted ls to tot 431 return tot. 432 """ 433 indentation = '\n' + (self.NameWidth + self.Indent) * ' ' 434 linesize = self.linesize - self.MaxSize 435 pat = re.compile("([\w,\s()]){1,%i}[,\.]" % linesize) 436 several, Join = '', ''.join 437 for name, sites in sorted(ls): 438 stringsite = '' 439 l = Join((', '.join(str(site) for site in sites), '.')) 440 if len(l) > linesize: 441 # 442 # cut where appropriate and add the indentation 443 # 444 l = [x.group() for x in re.finditer(pat, l)] 445 stringsite = indentation.join(l) 446 else: 447 stringsite = l 448 into = Join((into, 449 str(name).ljust(self.NameWidth), ' : ', stringsite, '\n')) 450 return into
451