Package Bio :: Package Restriction :: Module PrintFormat
[hide private]
[frames] | no frames]

Source Code for Module Bio.Restriction.PrintFormat

  1  #!/usr/bin/env python 
  2  # 
  3  #      Restriction Analysis Libraries. 
  4  #      Copyright (C) 2004. Frederic Sohm. 
  5  # 
  6  # This code is part of the Biopython distribution and governed by its 
  7  # license.  Please see the LICENSE file that should have been included 
  8  # as part of this package. 
  9  # 
 10   
 11  from __future__ import print_function 
 12   
 13  import re 
 14   
 15  from Bio._py3k import range 
 16   
 17  from Bio.Restriction import RanaConfig as RanaConf 
 18   
 19  """ 
 20  Usage: 
 21   
 22      PrintFormat allow to print the results from restriction analysis in 3 
 23      different format. 
 24      List, column or map. 
 25   
 26      the easiest way to use it is: 
 27   
 28      >>> from Bio.Restriction.PrintFormat import PrintFormat 
 29      >>> from Bio.Restriction.Restriction import AllEnzymes 
 30      >>> from Bio import Entrez 
 31      >>> from Bio import SeqIO 
 32      >>> handle = Entrez.efetch(db="nucleotide", rettype="fasta", id="SYNPBR322") 
 33      >>> pBR322 = SeqIO.read(handle, "fasta") 
 34      >>> handle.close() 
 35      >>> dct = AllEnzymes.search(pBR322.seq) 
 36      >>> new = PrintFormat() 
 37      >>> new.print_that(dct, '\n my pBR322 analysis\n\n', '\n no site :\n\n') 
 38   
 39       my pBR322 analysis 
 40   
 41      AasI       :  2169, 2582. 
 42      AatII      :  4289. 
 43      ... 
 44              More enzymes. 
 45      ... 
 46      ZraI       :  4287. 
 47      ZrmI       :  3847. 
 48   
 49       no site: 
 50   
 51      AarI      AatI      Acc65I    AcsI      AcvI      AdeI      AflII     AgeI 
 52      ... 
 53              More enzymes. 
 54      ... 
 55      Vha464I   XapI      XbaI      XcmI      XhoI      XmaCI     XmaI      XmaJI 
 56      Zsp2I 
 57   
 58      >>> new.sequence = pBR322.seq 
 59      >>> new.print_as("map") 
 60      >>> new.print_that(dct) 
 61      ... 
 62   
 63      Some of the methods of PrintFormat are meant to be overridden by derived 
 64      class. 
 65  """ 
 66   
 67   
68 -class PrintFormat(object):
69 """PrintFormat allow the printing of results of restriction analysis.""" 70 71 ConsoleWidth = RanaConf.ConsoleWidth 72 NameWidth = RanaConf.NameWidth 73 MaxSize = RanaConf.MaxSize 74 Cmodulo = ConsoleWidth%NameWidth 75 PrefWidth = ConsoleWidth - Cmodulo 76 Indent = RanaConf.Indent 77 linesize = PrefWidth - NameWidth 78
79 - def __init__(self):
80 """PrintFormat() -> new PrintFormat Instance""" 81 pass
82
83 - def print_as(self, what='list'):
84 """PF.print_as([what='list']) -> print the results as specified. 85 86 Valid format are: 87 'list' -> alphabetical order 88 'number' -> number of sites in the sequence 89 'map' -> a map representation of the sequence with the sites. 90 91 If you want more flexibility over-ride the virtual method make_format. 92 """ 93 if what == 'map': 94 self.make_format = self._make_map 95 elif what == 'number': 96 self.make_format = self._make_number 97 else: 98 self.make_format = self._make_list 99 100 return
101
102 - def print_that(self, dct, title='', s1=''):
103 """PF.print_that(dct, [title[, s1]]) -> Print dct nicely formatted. 104 105 dct is a dictionary as returned by a RestrictionBatch.search() 106 107 title is the title of the map. 108 It must be a formatted string, i.e. you must include the line break. 109 110 s1 is the title separating the list of enzymes that have sites from 111 those without sites. 112 s1 must be a formatted string as well. 113 114 The format of print_that is a list.""" 115 if not dct: 116 dct = self.results 117 ls, nc = [], [] 118 for k, v in dct.items(): 119 if v: 120 ls.append((k, v)) 121 else: 122 nc.append(k) 123 print(self.make_format(ls, title, nc, s1)) 124 return
125
126 - def make_format(self, cut=[], title='', nc=[], s1=''):
127 """PF.make_format(cut, nc, title, s) -> string 128 129 Virtual method. 130 Here to be pointed to one of the _make_* methods. 131 You can as well create a new method and point make_format to it.""" 132 return self._make_list(cut, title, nc, s1)
133 134 ###### _make_* methods to be used with the virtual method make_format 135
136 - def _make_list(self, ls, title, nc, s1):
137 """PF._make_number(ls,title, nc,s1) -> string. 138 139 return a string of form: 140 141 title. 142 143 enzyme1 : position1, position2. 144 enzyme2 : position1, position2, position3. 145 146 ls is a list of cutting enzymes. 147 title is the title. 148 nc is a list of non cutting enzymes. 149 s1 is the sentence before the non cutting enzymes.""" 150 return self._make_list_only(ls, title) + self._make_nocut_only(nc, s1)
151
152 - def _make_map(self, ls, title, nc, s1):
153 """PF._make_number(ls,title, nc,s1) -> string. 154 155 return a string of form: 156 157 title. 158 159 enzyme1, position 160 | 161 AAAAAAAAAAAAAAAAAAAAA... 162 ||||||||||||||||||||| 163 TTTTTTTTTTTTTTTTTTTTT... 164 165 ls is a list of cutting enzymes. 166 title is the title. 167 nc is a list of non cutting enzymes. 168 s1 is the sentence before the non cutting enzymes.""" 169 return self._make_map_only(ls, title) + self._make_nocut_only(nc, s1)
170
171 - def _make_number(self, ls, title, nc, s1):
172 """PF._make_number(ls,title, nc,s1) -> string. 173 174 title. 175 176 enzyme which cut 1 time: 177 178 enzyme1 : position1. 179 180 enzyme which cut 2 times: 181 182 enzyme2 : position1, position2. 183 ... 184 185 ls is a list of cutting enzymes. 186 title is the title. 187 nc is a list of non cutting enzymes. 188 s1 is the sentence before the non cutting enzymes.""" 189 return self._make_number_only(ls, title)+self._make_nocut_only(nc, s1)
190
191 - def _make_nocut(self, ls, title, nc, s1):
192 """PF._make_nocut(ls,title, nc,s1) -> string. 193 194 return a formatted string of the non cutting enzymes. 195 196 ls is a list of cutting enzymes -> will not be used. 197 Here for compatibility with make_format. 198 199 title is the title. 200 nc is a list of non cutting enzymes. 201 s1 is the sentence before the non cutting enzymes.""" 202 return title + self._make_nocut_only(nc, s1)
203
204 - def _make_nocut_only(self, nc, s1, ls =[],title=''):
205 """PF._make_nocut_only(nc, s1) -> string. 206 207 return a formatted string of the non cutting enzymes. 208 209 nc is a list of non cutting enzymes. 210 s1 is the sentence before the non cutting enzymes. 211 """ 212 if not nc: 213 return s1 214 nc.sort() 215 st = '' 216 stringsite = s1 or '\n Enzymes which do not cut the sequence.\n\n' 217 Join = ''.join 218 for key in nc: 219 st = Join((st, str.ljust(str(key), self.NameWidth))) 220 if len(st) > self.linesize: 221 stringsite = Join((stringsite, st, '\n')) 222 st = '' 223 stringsite = Join((stringsite, st, '\n')) 224 return stringsite
225
226 - def _make_list_only(self, ls, title, nc = [], s1 = ''):
227 """PF._make_list_only(ls, title) -> string. 228 229 return a string of form: 230 231 title. 232 233 enzyme1 : position1, position2. 234 enzyme2 : position1, position2, position3. 235 ... 236 237 ls is a list of results. 238 title is a string. 239 Non cutting enzymes are not included.""" 240 if not ls: 241 return title 242 return self.__next_section(ls, title)
243
244 - def _make_number_only(self, ls, title, nc = [], s1 =''):
245 """PF._make_number_only(ls, title) -> string. 246 247 return a string of form: 248 249 title. 250 251 enzyme which cut 1 time: 252 253 enzyme1 : position1. 254 255 enzyme which cut 2 times: 256 257 enzyme2 : position1, position2. 258 ... 259 260 ls is a list of results. 261 title is a string. 262 Non cutting enzymes are not included.""" 263 if not ls: 264 return title 265 ls.sort(lambda x, y : cmp(len(x[1]), len(y[1]))) 266 iterator = iter(ls) 267 cur_len = 1 268 new_sect = [] 269 for name, sites in iterator: 270 l = len(sites) 271 if l > cur_len: 272 title += "\n\nenzymes which cut %i times :\n\n"%cur_len 273 title = self.__next_section(new_sect, title) 274 new_sect, cur_len = [(name, sites)], l 275 continue 276 new_sect.append((name, sites)) 277 title += "\n\nenzymes which cut %i times :\n\n"%cur_len 278 return self.__next_section(new_sect, title)
279
280 - def _make_map_only(self, ls, title, nc = [], s1 = ''):
281 """PF._make_map_only(ls, title) -> string. 282 283 return a string of form: 284 285 title. 286 287 enzyme1, position 288 | 289 AAAAAAAAAAAAAAAAAAAAA... 290 ||||||||||||||||||||| 291 TTTTTTTTTTTTTTTTTTTTT... 292 293 ls is a list of results. 294 title is a string. 295 Non cutting enzymes are not included. 296 """ 297 if not ls: 298 return title 299 resultKeys = sorted(str(x) for x, y in ls) 300 map = title or '' 301 enzymemap = {} 302 for (enzyme, cut) in ls: 303 for c in cut: 304 if c in enzymemap: 305 enzymemap[c].append(str(enzyme)) 306 else: 307 enzymemap[c] = [str(enzyme)] 308 mapping = sorted(enzymemap.keys()) 309 cutloc = {} 310 x, counter, length = 0, 0, len(self.sequence) 311 for x in range(60, length, 60): 312 counter = x - 60 313 l=[] 314 for key in mapping: 315 if key <= x: 316 l.append(key) 317 else: 318 cutloc[counter] = l 319 mapping = mapping[mapping.index(key):] 320 break 321 cutloc[x] = l 322 cutloc[x] = mapping 323 sequence = str(self.sequence) 324 revsequence = str(self.sequence.complement()) 325 a = '|' 326 base, counter = 0, 0 327 emptyline = ' ' * 60 328 Join = ''.join 329 for base in range(60, length, 60): 330 counter = base - 60 331 line = emptyline 332 for key in cutloc[counter]: 333 s = '' 334 if key == base: 335 for n in enzymemap[key]: 336 s = ' '.join((s, n)) 337 l = line[0:59] 338 lineo = Join((l, str(key), s, '\n')) 339 line2 = Join((l, a, '\n')) 340 linetot = Join((lineo, line2)) 341 map = Join((map, linetot)) 342 break 343 for n in enzymemap[key]: 344 s = ' '.join((s, n)) 345 k = key%60 346 lineo = Join((line[0:(k-1)], str(key), s, '\n')) 347 line = Join((line[0:(k-1)], a, line[k:])) 348 line2 = Join((line[0:(k-1)], a, line[k:], '\n')) 349 linetot = Join((lineo, line2)) 350 map = Join((map, linetot)) 351 mapunit = '\n'.join((sequence[counter : base], a * 60, 352 revsequence[counter : base], 353 Join((str.ljust(str(counter+1), 15), ' '* 30, 354 str.rjust(str(base), 15), '\n\n')) 355 )) 356 map = Join((map, mapunit)) 357 line = ' '* 60 358 for key in cutloc[base]: 359 s = '' 360 if key == length: 361 for n in enzymemap[key]: 362 s = Join((s, ' ', n)) 363 l = line[0:(length-1)] 364 lineo = Join((l, str(key), s, '\n')) 365 line2 = Join((l, a, '\n')) 366 linetot = Join((lineo, line2)) 367 map = Join((map, linetot)) 368 break 369 for n in enzymemap[key]: 370 s = Join((s, ' ', n)) 371 k = key%60 372 lineo = Join((line[0:(k-1)], str(key), s, '\n')) 373 line = Join((line[0:(k-1)], a, line[k:])) 374 line2 = Join((line[0:(k-1)], a, line[k:], '\n')) 375 linetot = Join((lineo, line2)) 376 map = Join((map, linetot)) 377 mapunit = '' 378 mapunit = Join((sequence[base : length], '\n')) 379 mapunit = Join((mapunit, a * (length-base), '\n')) 380 mapunit = Join((mapunit, revsequence[base:length], '\n')) 381 mapunit = Join((mapunit, Join((str.ljust(str(base+1), 15), ' '*( 382 length-base-30), str.rjust(str(length), 15), 383 '\n\n')))) 384 map = Join((map, mapunit)) 385 return map
386 387 ###### private method to do lists: 388
389 - def __next_section(self, ls, into):
390 """FP.__next_section(ls, into) -> string. 391 392 ls is a list of tuple (string, [int, int]). 393 into is a string to which the formatted ls will be added. 394 395 Format ls as a string of lines: 396 The form is: 397 398 enzyme1 : position1. 399 enzyme2 : position2, position3. 400 401 then add the formatted ls to tot 402 return tot.""" 403 ls.sort() 404 indentation = '\n' + (self.NameWidth + self.Indent) * ' ' 405 linesize = self.linesize - self.MaxSize 406 pat = re.compile("([\w,\s()]){1,%i}[,\.]"%linesize) 407 several, Join = '', ''.join 408 for name, sites in ls: 409 stringsite = '' 410 l = Join((', '.join(str(site) for site in sites), '.')) 411 if len(l) > linesize: 412 # 413 # cut where appropriate and add the indentation 414 # 415 l = [x.group() for x in re.finditer(pat, l)] 416 stringsite = indentation.join(l) 417 else: 418 stringsite = l 419 into = Join((into, 420 str(name).ljust(self.NameWidth), ' : ', stringsite, '\n')) 421 return into
422