Package Bio :: Package Restriction :: Module PrintFormat
[hide private]
[frames] | no frames]

Source Code for Module Bio.Restriction.PrintFormat

  1  #!/usr/bin/env python 
  2  # 
  3  #      Restriction Analysis Libraries. 
  4  #      Copyright (C) 2004. Frederic Sohm. 
  5  # 
  6  # This code is part of the Biopython distribution and governed by its 
  7  # license.  Please see the LICENSE file that should have been included 
  8  # as part of this package. 
  9  # 
 10  r"""Print the results of restriction enzyme analysis. 
 11   
 12  PrintFormat prints the results from restriction analysis in 3 different 
 13  format: list, column or map. 
 14   
 15  The easiest way to use it is: 
 16   
 17      >>> from Bio.Restriction.PrintFormat import PrintFormat 
 18      >>> from Bio.Restriction.Restriction import AllEnzymes 
 19      >>> from Bio import Entrez 
 20      >>> from Bio import SeqIO 
 21      >>> handle = Entrez.efetch(db="nucleotide", rettype="fasta", id="SYNPBR322") 
 22      >>> pBR322 = SeqIO.read(handle, "fasta") 
 23      >>> handle.close() 
 24      >>> dct = AllEnzymes.search(pBR322.seq) 
 25      >>> new = PrintFormat() 
 26      >>> new.print_that(dct, 'My pBR322 analysis:\n', 'No site:\n') 
 27      My pBR322 analysis: 
 28      AasI       :  2169, 2582. 
 29      AatII      :  4289. 
 30      ... 
 31      ZraI       :  4287. 
 32      ZrmI       :  3847. 
 33      No site: 
 34      AarI      AatI      Acc65I    AcsI      AcvI      AdeI      AflII     AgeI 
 35      ... 
 36      Vha464I   XapI      XbaI      XcmI      XhoI      XmaCI     XmaI      XmaJI 
 37      Zsp2I 
 38      >>> new.sequence = pBR322.seq 
 39      >>> new.print_as("map") 
 40      >>> new.print_that(dct) 
 41      ... 
 42   
 43  Some of the methods of PrintFormat are meant to be overridden by derived 
 44  class. 
 45  """ 
 46   
 47  from __future__ import print_function 
 48   
 49  import re 
 50   
 51  from Bio._py3k import range 
 52   
 53  from Bio.Restriction import RanaConfig as RanaConf 
 54   
 55  __docformat__ = "restructuredtext en" 
 56   
 57   
58 -class PrintFormat(object):
59 """PrintFormat allow the printing of results of restriction analysis.""" 60 61 ConsoleWidth = RanaConf.ConsoleWidth 62 NameWidth = RanaConf.NameWidth 63 MaxSize = RanaConf.MaxSize 64 Cmodulo = ConsoleWidth % NameWidth 65 PrefWidth = ConsoleWidth - Cmodulo 66 Indent = RanaConf.Indent 67 linesize = PrefWidth - NameWidth 68
69 - def __init__(self):
70 """PrintFormat() -> new PrintFormat Instance""" 71 pass
72
73 - def print_as(self, what='list'):
74 """PF.print_as([what='list']) -> print the results as specified. 75 76 Valid format are: 77 'list' -> alphabetical order 78 'number' -> number of sites in the sequence 79 'map' -> a map representation of the sequence with the sites. 80 81 If you want more flexibility over-ride the virtual method make_format. 82 """ 83 if what == 'map': 84 self.make_format = self._make_map 85 elif what == 'number': 86 self.make_format = self._make_number 87 else: 88 self.make_format = self._make_list 89 90 return
91
92 - def print_that(self, dct, title='', s1=''):
93 """PF.print_that(dct, [title[, s1]]) -> Print dct nicely formatted. 94 95 Arguments: 96 - dct is a dictionary as returned by a RestrictionBatch.search() 97 - title is the title of the map. 98 It must be a formatted string, i.e. you must include the line break. 99 - s1 is the title separating the list of enzymes that have sites from 100 those without sites. 101 - s1 must be a formatted string as well. 102 103 The format of print_that is a list.""" 104 if not dct: 105 dct = self.results 106 ls, nc = [], [] 107 for k, v in dct.items(): 108 if v: 109 ls.append((k, v)) 110 else: 111 nc.append(k) 112 print(self.make_format(ls, title, nc, s1)) 113 return
114
115 - def make_format(self, cut=[], title='', nc=[], s1=''):
116 """PF.make_format(cut, nc, title, s) -> string 117 118 Virtual method. 119 Here to be pointed to one of the _make_* methods. 120 You can as well create a new method and point make_format to it. 121 """ 122 return self._make_list(cut, title, nc, s1)
123 124 # _make_* methods to be used with the virtual method make_format 125
126 - def _make_list(self, ls, title, nc, s1):
127 """PF._make_number(ls,title, nc,s1) -> string. 128 129 return a string of form:: 130 131 title. 132 133 enzyme1 : position1, position2. 134 enzyme2 : position1, position2, position3. 135 136 Arguments: 137 - ls is a list of cutting enzymes. 138 - title is the title. 139 - nc is a list of non cutting enzymes. 140 - s1 is the sentence before the non cutting enzymes. 141 """ 142 return self._make_list_only(ls, title) + self._make_nocut_only(nc, s1)
143
144 - def _make_map(self, ls, title, nc, s1):
145 """PF._make_number(ls,title, nc,s1) -> string. 146 147 return a string of form:: 148 149 | title. 150 | 151 | enzyme1, position 152 | | 153 | AAAAAAAAAAAAAAAAAAAAA... 154 | ||||||||||||||||||||| 155 | TTTTTTTTTTTTTTTTTTTTT... 156 157 Arguments: 158 - ls is a list of cutting enzymes. 159 - title is the title. 160 - nc is a list of non cutting enzymes. 161 - s1 is the sentence before the non cutting enzymes. 162 """ 163 return self._make_map_only(ls, title) + self._make_nocut_only(nc, s1)
164
165 - def _make_number(self, ls, title, nc, s1):
166 """PF._make_number(ls,title, nc,s1) -> string. 167 168 Returns a string in the form:: 169 170 title. 171 172 enzyme which cut 1 time: 173 174 enzyme1 : position1. 175 176 enzyme which cut 2 times: 177 178 enzyme2 : position1, position2. 179 ... 180 181 Arguments: 182 - ls is a list of cutting enzymes. 183 - title is the title. 184 - nc is a list of non cutting enzymes. 185 - s1 is the sentence before the non cutting enzymes. 186 """ 187 return self._make_number_only(ls, title) + self._make_nocut_only(nc, s1)
188
189 - def _make_nocut(self, ls, title, nc, s1):
190 """PF._make_nocut(ls,title, nc,s1) -> string. 191 192 return a formatted string of the non cutting enzymes. 193 194 ls is a list of cutting enzymes -> will not be used. 195 Here for compatibility with make_format. 196 197 Arguments: 198 - title is the title. 199 - nc is a list of non cutting enzymes. 200 - s1 is the sentence before the non cutting enzymes. 201 """ 202 return title + self._make_nocut_only(nc, s1)
203
204 - def _make_nocut_only(self, nc, s1, ls=[], title=''):
205 """PF._make_nocut_only(nc, s1) -> string. 206 207 return a formatted string of the non cutting enzymes. 208 209 Arguments: 210 - nc is a list of non cutting enzymes. 211 - s1 is the sentence before the non cutting enzymes. 212 """ 213 if not nc: 214 return s1 215 nc.sort() 216 st = '' 217 stringsite = s1 or '\n Enzymes which do not cut the sequence.\n\n' 218 Join = ''.join 219 for key in nc: 220 st = Join((st, str.ljust(str(key), self.NameWidth))) 221 if len(st) > self.linesize: 222 stringsite = Join((stringsite, st, '\n')) 223 st = '' 224 stringsite = Join((stringsite, st, '\n')) 225 return stringsite
226
227 - def _make_list_only(self, ls, title, nc=[], s1=''):
228 """PF._make_list_only(ls, title) -> string. 229 230 return a string of form:: 231 232 title. 233 234 enzyme1 : position1, position2. 235 enzyme2 : position1, position2, position3. 236 ... 237 238 Arguments: 239 - ls is a list of results. 240 - title is a string. 241 - Non cutting enzymes are not included. 242 """ 243 if not ls: 244 return title 245 return self.__next_section(ls, title)
246
247 - def _make_number_only(self, ls, title, nc=[], s1=''):
248 """PF._make_number_only(ls, title) -> string. 249 250 return a string of form:: 251 252 title. 253 254 enzyme which cut 1 time: 255 256 enzyme1 : position1. 257 258 enzyme which cut 2 times: 259 260 enzyme2 : position1, position2. 261 ... 262 263 Arguments: 264 - ls is a list of results. 265 - title is a string. 266 - Non cutting enzymes are not included. 267 """ 268 if not ls: 269 return title 270 ls.sort(lambda x, y: cmp(len(x[1]), len(y[1]))) 271 iterator = iter(ls) 272 cur_len = 1 273 new_sect = [] 274 for name, sites in iterator: 275 l = len(sites) 276 if l > cur_len: 277 title += "\n\nenzymes which cut %i times :\n\n" % cur_len 278 title = self.__next_section(new_sect, title) 279 new_sect, cur_len = [(name, sites)], l 280 continue 281 new_sect.append((name, sites)) 282 title += "\n\nenzymes which cut %i times :\n\n" % cur_len 283 return self.__next_section(new_sect, title)
284
285 - def _make_map_only(self, ls, title, nc=[], s1=''):
286 """PF._make_map_only(ls, title) -> string. 287 288 return a string of form:: 289 290 | title. 291 | 292 | enzyme1, position 293 | | 294 | AAAAAAAAAAAAAAAAAAAAA... 295 | ||||||||||||||||||||| 296 | TTTTTTTTTTTTTTTTTTTTT... 297 298 Arguments: 299 - ls is a list of results. 300 - title is a string. 301 - Non cutting enzymes are not included. 302 """ 303 if not ls: 304 return title 305 resultKeys = sorted(str(x) for x, y in ls) 306 map = title or '' 307 enzymemap = {} 308 for (enzyme, cut) in ls: 309 for c in cut: 310 if c in enzymemap: 311 enzymemap[c].append(str(enzyme)) 312 else: 313 enzymemap[c] = [str(enzyme)] 314 mapping = sorted(enzymemap.keys()) 315 cutloc = {} 316 x, counter, length = 0, 0, len(self.sequence) 317 for x in range(60, length, 60): 318 counter = x - 60 319 l = [] 320 for key in mapping: 321 if key <= x: 322 l.append(key) 323 else: 324 cutloc[counter] = l 325 mapping = mapping[mapping.index(key):] 326 break 327 cutloc[x] = l 328 cutloc[x] = mapping 329 sequence = str(self.sequence) 330 revsequence = str(self.sequence.complement()) 331 a = '|' 332 base, counter = 0, 0 333 emptyline = ' ' * 60 334 Join = ''.join 335 for base in range(60, length, 60): 336 counter = base - 60 337 line = emptyline 338 for key in cutloc[counter]: 339 s = '' 340 if key == base: 341 for n in enzymemap[key]: 342 s = ' '.join((s, n)) 343 l = line[0:59] 344 lineo = Join((l, str(key), s, '\n')) 345 line2 = Join((l, a, '\n')) 346 linetot = Join((lineo, line2)) 347 map = Join((map, linetot)) 348 break 349 for n in enzymemap[key]: 350 s = ' '.join((s, n)) 351 k = key % 60 352 lineo = Join((line[0:(k - 1)], str(key), s, '\n')) 353 line = Join((line[0:(k - 1)], a, line[k:])) 354 line2 = Join((line[0:(k - 1)], a, line[k:], '\n')) 355 linetot = Join((lineo, line2)) 356 map = Join((map, linetot)) 357 mapunit = '\n'.join((sequence[counter: base], a * 60, 358 revsequence[counter: base], 359 Join((str.ljust(str(counter + 1), 15), ' ' * 30, 360 str.rjust(str(base), 15), '\n\n')) 361 )) 362 map = Join((map, mapunit)) 363 line = ' ' * 60 364 for key in cutloc[base]: 365 s = '' 366 if key == length: 367 for n in enzymemap[key]: 368 s = Join((s, ' ', n)) 369 l = line[0:(length - 1)] 370 lineo = Join((l, str(key), s, '\n')) 371 line2 = Join((l, a, '\n')) 372 linetot = Join((lineo, line2)) 373 map = Join((map, linetot)) 374 break 375 for n in enzymemap[key]: 376 s = Join((s, ' ', n)) 377 k = key % 60 378 lineo = Join((line[0:(k - 1)], str(key), s, '\n')) 379 line = Join((line[0:(k - 1)], a, line[k:])) 380 line2 = Join((line[0:(k - 1)], a, line[k:], '\n')) 381 linetot = Join((lineo, line2)) 382 map = Join((map, linetot)) 383 mapunit = '' 384 mapunit = Join((sequence[base: length], '\n')) 385 mapunit = Join((mapunit, a * (length - base), '\n')) 386 mapunit = Join((mapunit, revsequence[base:length], '\n')) 387 mapunit = Join((mapunit, Join((str.ljust(str(base + 1), 15), ' ' * ( 388 length - base - 30), str.rjust(str(length), 15), 389 '\n\n')))) 390 map = Join((map, mapunit)) 391 return map
392 393 # private method to do lists: 394
395 - def __next_section(self, ls, into):
396 """FP.__next_section(ls, into) -> string. 397 398 Arguments: 399 - ls is a list of tuple (string, [int, int]). 400 - into is a string to which the formatted ls will be added. 401 402 Format ls as a string of lines: 403 The form is:: 404 405 enzyme1 : position1. 406 enzyme2 : position2, position3. 407 408 then add the formatted ls to tot 409 return tot. 410 """ 411 ls.sort() 412 indentation = '\n' + (self.NameWidth + self.Indent) * ' ' 413 linesize = self.linesize - self.MaxSize 414 pat = re.compile("([\w,\s()]){1,%i}[,\.]" % linesize) 415 several, Join = '', ''.join 416 for name, sites in ls: 417 stringsite = '' 418 l = Join((', '.join(str(site) for site in sites), '.')) 419 if len(l) > linesize: 420 # 421 # cut where appropriate and add the indentation 422 # 423 l = [x.group() for x in re.finditer(pat, l)] 424 stringsite = indentation.join(l) 425 else: 426 stringsite = l 427 into = Join((into, 428 str(name).ljust(self.NameWidth), ' : ', stringsite, '\n')) 429 return into
430