Package Bio :: Package Restriction :: Module PrintFormat
[hide private]
[frames] | no frames]

Source Code for Module Bio.Restriction.PrintFormat

  1  #!/usr/bin/env python 
  2  # 
  3  #      Restriction Analysis Libraries. 
  4  #      Copyright (C) 2004. Frederic Sohm. 
  5  # 
  6  # This code is part of the Biopython distribution and governed by its 
  7  # license.  Please see the LICENSE file that should have been included 
  8  # as part of this package. 
  9  # 
 10   
 11  import re 
 12  from Bio.Restriction import RanaConfig as RanaConf 
 13   
 14  """ 
 15  Usage: 
 16   
 17      PrintFormat allow to print the results from restriction analysis in 3 
 18      different format. 
 19      List, column or map. 
 20   
 21      the easiest way to use it is: 
 22   
 23      >>> from Bio.Restriction.PrintFormat import PrintFormat 
 24      >>> from Bio.Restriction.Restriction import AllEnzymes 
 25      >>> from Bio import Entrez 
 26      >>> from Bio import SeqIO 
 27      >>> handle = Entrez.efetch(db="nucleotide", rettype="fasta", id="SYNPBR322") 
 28      >>> pBR322 = SeqIO.read(handle, "fasta") 
 29      >>> handle.close() 
 30      >>> dct = AllEnzymes.search(pBR322.seq) 
 31      >>> new = PrintFormat() 
 32      >>> new.print_that(dct, '\n my pBR322 analysis\n\n','\n no site :\n\n') 
 33   
 34       my pBR322 analysis 
 35   
 36      AasI       :  2169, 2582. 
 37      AatII      :  4289. 
 38      ... 
 39              More enzymes. 
 40      ... 
 41      ZraI       :  4287. 
 42      ZrmI       :  3847. 
 43   
 44       no site: 
 45   
 46      AarI      AatI      Acc65I    AcsI      AcvI      AdeI      AflII     AgeI 
 47      ... 
 48              More enzymes. 
 49      ... 
 50      Vha464I   XapI      XbaI      XcmI      XhoI      XmaCI     XmaI      XmaJI 
 51      Zsp2I 
 52   
 53      >>> new.sequence = pBR322.seq 
 54      >>> new.print_as("map") 
 55      >>> new.print_that(dct) 
 56      ... 
 57   
 58      Some of the methods of PrintFormat are meant to be overridden by derived 
 59      class. 
 60  """ 
 61   
 62   
63 -class PrintFormat(object):
64 """PrintFormat allow the printing of results of restriction analysis.""" 65 66 ConsoleWidth = RanaConf.ConsoleWidth 67 NameWidth = RanaConf.NameWidth 68 MaxSize = RanaConf.MaxSize 69 Cmodulo = ConsoleWidth%NameWidth 70 PrefWidth = ConsoleWidth - Cmodulo 71 Indent = RanaConf.Indent 72 linesize = PrefWidth - NameWidth 73
74 - def __init__(self):
75 """PrintFormat() -> new PrintFormat Instance""" 76 pass
77
78 - def print_as(self, what='list'):
79 """PF.print_as([what='list']) -> print the results as specified. 80 81 Valid format are: 82 'list' -> alphabetical order 83 'number' -> number of sites in the sequence 84 'map' -> a map representation of the sequence with the sites. 85 86 If you want more flexibility over-ride the virtual method make_format. 87 """ 88 if what == 'map': 89 self.make_format = self._make_map 90 elif what == 'number': 91 self.make_format = self._make_number 92 else: 93 self.make_format = self._make_list 94 95 return
96
97 - def print_that(self, dct, title='', s1=''):
98 """PF.print_that(dct, [title[, s1]]) -> Print dct nicely formatted. 99 100 dct is a dictionary as returned by a RestrictionBatch.search() 101 102 title is the title of the map. 103 It must be a formatted string, i.e. you must include the line break. 104 105 s1 is the title separating the list of enzymes that have sites from 106 those without sites. 107 s1 must be a formatted string as well. 108 109 The format of print_that is a list.""" 110 if not dct: 111 dct = self.results 112 ls, nc = [], [] 113 for k, v in dct.iteritems(): 114 if v: 115 ls.append((k,v)) 116 else: 117 nc.append(k) 118 print self.make_format(ls, title, nc, s1) 119 return
120
121 - def make_format(self, cut=[], title='', nc=[], s1=''):
122 """PF.make_format(cut, nc, title, s) -> string 123 124 Virtual method. 125 Here to be pointed to one of the _make_* methods. 126 You can as well create a new method and point make_format to it.""" 127 return self._make_list(cut,title, nc,s1)
128 129 ###### _make_* methods to be used with the virtual method make_format 130
131 - def _make_list(self, ls,title, nc,s1):
132 """PF._make_number(ls,title, nc,s1) -> string. 133 134 return a string of form: 135 136 title. 137 138 enzyme1 : position1, position2. 139 enzyme2 : position1, position2, position3. 140 141 ls is a list of cutting enzymes. 142 title is the title. 143 nc is a list of non cutting enzymes. 144 s1 is the sentence before the non cutting enzymes.""" 145 return self._make_list_only(ls, title) + self._make_nocut_only(nc, s1)
146
147 - def _make_map(self, ls,title, nc,s1):
148 """PF._make_number(ls,title, nc,s1) -> string. 149 150 return a string of form: 151 152 title. 153 154 enzyme1, position 155 | 156 AAAAAAAAAAAAAAAAAAAAA... 157 ||||||||||||||||||||| 158 TTTTTTTTTTTTTTTTTTTTT... 159 160 ls is a list of cutting enzymes. 161 title is the title. 162 nc is a list of non cutting enzymes. 163 s1 is the sentence before the non cutting enzymes.""" 164 return self._make_map_only(ls, title) + self._make_nocut_only(nc, s1)
165
166 - def _make_number(self, ls,title, nc,s1):
167 """PF._make_number(ls,title, nc,s1) -> string. 168 169 title. 170 171 enzyme which cut 1 time: 172 173 enzyme1 : position1. 174 175 enzyme which cut 2 times: 176 177 enzyme2 : position1, position2. 178 ... 179 180 ls is a list of cutting enzymes. 181 title is the title. 182 nc is a list of non cutting enzymes. 183 s1 is the sentence before the non cutting enzymes.""" 184 return self._make_number_only(ls, title)+self._make_nocut_only(nc,s1)
185
186 - def _make_nocut(self, ls,title, nc,s1):
187 """PF._make_nocut(ls,title, nc,s1) -> string. 188 189 return a formatted string of the non cutting enzymes. 190 191 ls is a list of cutting enzymes -> will not be used. 192 Here for compatibility with make_format. 193 194 title is the title. 195 nc is a list of non cutting enzymes. 196 s1 is the sentence before the non cutting enzymes.""" 197 return title + self._make_nocut_only(nc, s1)
198
199 - def _make_nocut_only(self, nc, s1, ls =[],title=''):
200 """PF._make_nocut_only(nc, s1) -> string. 201 202 return a formatted string of the non cutting enzymes. 203 204 nc is a list of non cutting enzymes. 205 s1 is the sentence before the non cutting enzymes. 206 """ 207 if not nc: 208 return s1 209 nc.sort() 210 st = '' 211 stringsite = s1 or '\n Enzymes which do not cut the sequence.\n\n' 212 Join = ''.join 213 for key in nc: 214 st = Join((st, str.ljust(str(key), self.NameWidth))) 215 if len(st) > self.linesize: 216 stringsite = Join((stringsite, st, '\n')) 217 st = '' 218 stringsite = Join((stringsite, st, '\n')) 219 return stringsite
220
221 - def _make_list_only(self, ls, title, nc = [], s1 = ''):
222 """PF._make_list_only(ls, title) -> string. 223 224 return a string of form: 225 226 title. 227 228 enzyme1 : position1, position2. 229 enzyme2 : position1, position2, position3. 230 ... 231 232 ls is a list of results. 233 title is a string. 234 Non cutting enzymes are not included.""" 235 if not ls: 236 return title 237 return self.__next_section(ls, title)
238
239 - def _make_number_only(self, ls, title, nc = [], s1 =''):
240 """PF._make_number_only(ls, title) -> string. 241 242 return a string of form: 243 244 title. 245 246 enzyme which cut 1 time: 247 248 enzyme1 : position1. 249 250 enzyme which cut 2 times: 251 252 enzyme2 : position1, position2. 253 ... 254 255 ls is a list of results. 256 title is a string. 257 Non cutting enzymes are not included.""" 258 if not ls: 259 return title 260 ls.sort(lambda x,y : cmp(len(x[1]), len(y[1]))) 261 iterator = iter(ls) 262 cur_len = 1 263 new_sect = [] 264 for name, sites in iterator: 265 l = len(sites) 266 if l > cur_len: 267 title += "\n\nenzymes which cut %i times :\n\n"%cur_len 268 title = self.__next_section(new_sect, title) 269 new_sect, cur_len = [(name, sites)], l 270 continue 271 new_sect.append((name,sites)) 272 title += "\n\nenzymes which cut %i times :\n\n"%cur_len 273 return self.__next_section(new_sect, title)
274
275 - def _make_map_only(self, ls, title, nc = [], s1 = ''):
276 """PF._make_map_only(ls, title) -> string. 277 278 return a string of form: 279 280 title. 281 282 enzyme1, position 283 | 284 AAAAAAAAAAAAAAAAAAAAA... 285 ||||||||||||||||||||| 286 TTTTTTTTTTTTTTTTTTTTT... 287 288 ls is a list of results. 289 title is a string. 290 Non cutting enzymes are not included. 291 """ 292 if not ls: 293 return title 294 resultKeys = [str(x) for x,y in ls] 295 resultKeys.sort() 296 map = title or '' 297 enzymemap = {} 298 for (enzyme, cut) in ls: 299 for c in cut: 300 if c in enzymemap: 301 enzymemap[c].append(str(enzyme)) 302 else: 303 enzymemap[c] = [str(enzyme)] 304 mapping = enzymemap.keys() 305 mapping.sort() 306 cutloc = {} 307 x, counter, length = 0, 0, len(self.sequence) 308 for x in xrange(60, length, 60): 309 counter = x - 60 310 l=[] 311 for key in mapping: 312 if key <= x: 313 l.append(key) 314 else: 315 cutloc[counter] = l 316 mapping = mapping[mapping.index(key):] 317 break 318 cutloc[x] = l 319 cutloc[x] = mapping 320 sequence = str(self.sequence) 321 revsequence = str(self.sequence.complement()) 322 a = '|' 323 base, counter = 0, 0 324 emptyline = ' ' * 60 325 Join = ''.join 326 for base in xrange(60, length, 60): 327 counter = base - 60 328 line = emptyline 329 for key in cutloc[counter]: 330 s = '' 331 if key == base: 332 for n in enzymemap[key]: 333 s = ' '.join((s,n)) 334 l = line[0:59] 335 lineo = Join((l, str(key), s, '\n')) 336 line2 = Join((l, a, '\n')) 337 linetot = Join((lineo, line2)) 338 map = Join((map, linetot)) 339 break 340 for n in enzymemap[key]: 341 s = ' '.join((s,n)) 342 k = key%60 343 lineo = Join((line[0:(k-1)], str(key), s, '\n')) 344 line = Join((line[0:(k-1)], a, line[k:])) 345 line2 = Join((line[0:(k-1)], a, line[k:], '\n')) 346 linetot = Join((lineo,line2)) 347 map = Join((map,linetot)) 348 mapunit = '\n'.join((sequence[counter : base],a * 60, 349 revsequence[counter : base], 350 Join((str.ljust(str(counter+1), 15), ' '* 30, 351 str.rjust(str(base), 15),'\n\n')) 352 )) 353 map = Join((map, mapunit)) 354 line = ' '* 60 355 for key in cutloc[base]: 356 s = '' 357 if key == length: 358 for n in enzymemap[key]: 359 s = Join((s,' ',n)) 360 l = line[0:(length-1)] 361 lineo = Join((l,str(key),s,'\n')) 362 line2 = Join((l,a,'\n')) 363 linetot = Join((lineo, line2)) 364 map = Join((map, linetot)) 365 break 366 for n in enzymemap[key]: 367 s = Join((s,' ',n)) 368 k = key%60 369 lineo = Join((line[0:(k-1)],str(key),s,'\n')) 370 line = Join((line[0:(k-1)],a,line[k:])) 371 line2 = Join((line[0:(k-1)],a,line[k:],'\n')) 372 linetot = Join((lineo,line2)) 373 map = Join((map,linetot)) 374 mapunit = '' 375 mapunit = Join((sequence[base : length], '\n')) 376 mapunit = Join((mapunit, a * (length-base), '\n')) 377 mapunit = Join((mapunit,revsequence[base:length], '\n')) 378 mapunit = Join((mapunit, Join((str.ljust(str(base+1), 15), ' '*( 379 length-base-30),str.rjust(str(length), 15), 380 '\n\n')))) 381 map = Join((map,mapunit)) 382 return map
383 384 ###### private method to do lists: 385
386 - def __next_section(self, ls, into):
387 """FP.__next_section(ls, into) -> string. 388 389 ls is a list of tuple (string, [int, int]). 390 into is a string to which the formatted ls will be added. 391 392 Format ls as a string of lines: 393 The form is: 394 395 enzyme1 : position1. 396 enzyme2 : position2, position3. 397 398 then add the formatted ls to tot 399 return tot.""" 400 ls.sort() 401 indentation = '\n' + (self.NameWidth + self.Indent) * ' ' 402 linesize = self.linesize - self.MaxSize 403 pat = re.compile("([\w,\s()]){1,%i}[,\.]"%linesize) 404 several, Join = '', ''.join 405 for name, sites in ls: 406 stringsite = '' 407 l = Join((', '.join([str(site) for site in sites]), '.')) 408 if len(l) > linesize: 409 # 410 # cut where appropriate and add the indentation 411 # 412 l = [x.group() for x in re.finditer(pat, l)] 413 stringsite = indentation.join(l) 414 else: 415 stringsite = l 416 into = Join((into, 417 str(name).ljust(self.NameWidth),' : ',stringsite,'\n')) 418 return into
419