| Trees | Indices | Help |
|
|---|
|
|
1 #!/usr/bin/env python
2 #
3 # Restriction Analysis Libraries.
4 # Copyright (C) 2004. Frederic Sohm.
5 #
6 # This code is part of the Biopython distribution and governed by its
7 # license. Please see the LICENSE file that should have been included
8 # as part of this package.
9 #
10
11 import re
12 from Bio.Restriction import RanaConfig as RanaConf
13
14 """
15 Usage:
16
17 PrintFormat allow to print the results from restriction analysis in 3
18 different format.
19 List, column or map.
20
21 the easiest way to use it is:
22
23 >>> from Bio.Restriction.PrintFormat import PrintFormat
24 >>> from Bio.Restriction.Restriction import AllEnzymes
25 >>> from Bio import Entrez
26 >>> from Bio import SeqIO
27 >>> handle = Entrez.efetch(db="nucleotide", rettype="fasta", id="SYNPBR322")
28 >>> pBR322 = SeqIO.read(handle, "fasta")
29 >>> handle.close()
30 >>> dct = AllEnzymes.search(pBR322.seq)
31 >>> new = PrintFormat()
32 >>> new.print_that(dct, '\n my pBR322 analysis\n\n','\n no site :\n\n')
33
34 my pBR322 analysis
35
36 AasI : 2169, 2582.
37 AatII : 4289.
38 ...
39 More enzymes.
40 ...
41 ZraI : 4287.
42 ZrmI : 3847.
43
44 no site:
45
46 AarI AatI Acc65I AcsI AcvI AdeI AflII AgeI
47 ...
48 More enzymes.
49 ...
50 Vha464I XapI XbaI XcmI XhoI XmaCI XmaI XmaJI
51 Zsp2I
52
53 >>> new.sequence = pBR322.seq
54 >>> new.print_as("map")
55 >>> new.print_that(dct)
56 ...
57
58 Some of the methods of PrintFormat are meant to be overridden by derived
59 class.
60 """
61
62
64 """PrintFormat allow the printing of results of restriction analysis."""
65
66 ConsoleWidth = RanaConf.ConsoleWidth
67 NameWidth = RanaConf.NameWidth
68 MaxSize = RanaConf.MaxSize
69 Cmodulo = ConsoleWidth%NameWidth
70 PrefWidth = ConsoleWidth - Cmodulo
71 Indent = RanaConf.Indent
72 linesize = PrefWidth - NameWidth
73
77
79 """PF.print_as([what='list']) -> print the results as specified.
80
81 Valid format are:
82 'list' -> alphabetical order
83 'number' -> number of sites in the sequence
84 'map' -> a map representation of the sequence with the sites.
85
86 If you want more flexibility over-ride the virtual method make_format.
87 """
88 if what == 'map':
89 self.make_format = self._make_map
90 elif what == 'number':
91 self.make_format = self._make_number
92 else:
93 self.make_format = self._make_list
94
95 return
96
98 """PF.print_that(dct, [title[, s1]]) -> Print dct nicely formatted.
99
100 dct is a dictionary as returned by a RestrictionBatch.search()
101
102 title is the title of the map.
103 It must be a formatted string, i.e. you must include the line break.
104
105 s1 is the title separating the list of enzymes that have sites from
106 those without sites.
107 s1 must be a formatted string as well.
108
109 The format of print_that is a list."""
110 if not dct:
111 dct = self.results
112 ls, nc = [], []
113 for k, v in dct.iteritems():
114 if v:
115 ls.append((k,v))
116 else:
117 nc.append(k)
118 print self.make_format(ls, title, nc, s1)
119 return
120
122 """PF.make_format(cut, nc, title, s) -> string
123
124 Virtual method.
125 Here to be pointed to one of the _make_* methods.
126 You can as well create a new method and point make_format to it."""
127 return self._make_list(cut,title, nc,s1)
128
129 ###### _make_* methods to be used with the virtual method make_format
130
132 """PF._make_number(ls,title, nc,s1) -> string.
133
134 return a string of form:
135
136 title.
137
138 enzyme1 : position1, position2.
139 enzyme2 : position1, position2, position3.
140
141 ls is a list of cutting enzymes.
142 title is the title.
143 nc is a list of non cutting enzymes.
144 s1 is the sentence before the non cutting enzymes."""
145 return self._make_list_only(ls, title) + self._make_nocut_only(nc, s1)
146
148 """PF._make_number(ls,title, nc,s1) -> string.
149
150 return a string of form:
151
152 title.
153
154 enzyme1, position
155 |
156 AAAAAAAAAAAAAAAAAAAAA...
157 |||||||||||||||||||||
158 TTTTTTTTTTTTTTTTTTTTT...
159
160 ls is a list of cutting enzymes.
161 title is the title.
162 nc is a list of non cutting enzymes.
163 s1 is the sentence before the non cutting enzymes."""
164 return self._make_map_only(ls, title) + self._make_nocut_only(nc, s1)
165
167 """PF._make_number(ls,title, nc,s1) -> string.
168
169 title.
170
171 enzyme which cut 1 time:
172
173 enzyme1 : position1.
174
175 enzyme which cut 2 times:
176
177 enzyme2 : position1, position2.
178 ...
179
180 ls is a list of cutting enzymes.
181 title is the title.
182 nc is a list of non cutting enzymes.
183 s1 is the sentence before the non cutting enzymes."""
184 return self._make_number_only(ls, title)+self._make_nocut_only(nc,s1)
185
187 """PF._make_nocut(ls,title, nc,s1) -> string.
188
189 return a formatted string of the non cutting enzymes.
190
191 ls is a list of cutting enzymes -> will not be used.
192 Here for compatibility with make_format.
193
194 title is the title.
195 nc is a list of non cutting enzymes.
196 s1 is the sentence before the non cutting enzymes."""
197 return title + self._make_nocut_only(nc, s1)
198
200 """PF._make_nocut_only(nc, s1) -> string.
201
202 return a formatted string of the non cutting enzymes.
203
204 nc is a list of non cutting enzymes.
205 s1 is the sentence before the non cutting enzymes.
206 """
207 if not nc:
208 return s1
209 nc.sort()
210 st = ''
211 stringsite = s1 or '\n Enzymes which do not cut the sequence.\n\n'
212 Join = ''.join
213 for key in nc:
214 st = Join((st, str.ljust(str(key), self.NameWidth)))
215 if len(st) > self.linesize:
216 stringsite = Join((stringsite, st, '\n'))
217 st = ''
218 stringsite = Join((stringsite, st, '\n'))
219 return stringsite
220
222 """PF._make_list_only(ls, title) -> string.
223
224 return a string of form:
225
226 title.
227
228 enzyme1 : position1, position2.
229 enzyme2 : position1, position2, position3.
230 ...
231
232 ls is a list of results.
233 title is a string.
234 Non cutting enzymes are not included."""
235 if not ls:
236 return title
237 return self.__next_section(ls, title)
238
240 """PF._make_number_only(ls, title) -> string.
241
242 return a string of form:
243
244 title.
245
246 enzyme which cut 1 time:
247
248 enzyme1 : position1.
249
250 enzyme which cut 2 times:
251
252 enzyme2 : position1, position2.
253 ...
254
255 ls is a list of results.
256 title is a string.
257 Non cutting enzymes are not included."""
258 if not ls:
259 return title
260 ls.sort(lambda x,y : cmp(len(x[1]), len(y[1])))
261 iterator = iter(ls)
262 cur_len = 1
263 new_sect = []
264 for name, sites in iterator:
265 l = len(sites)
266 if l > cur_len:
267 title += "\n\nenzymes which cut %i times :\n\n"%cur_len
268 title = self.__next_section(new_sect, title)
269 new_sect, cur_len = [(name, sites)], l
270 continue
271 new_sect.append((name,sites))
272 title += "\n\nenzymes which cut %i times :\n\n"%cur_len
273 return self.__next_section(new_sect, title)
274
276 """PF._make_map_only(ls, title) -> string.
277
278 return a string of form:
279
280 title.
281
282 enzyme1, position
283 |
284 AAAAAAAAAAAAAAAAAAAAA...
285 |||||||||||||||||||||
286 TTTTTTTTTTTTTTTTTTTTT...
287
288 ls is a list of results.
289 title is a string.
290 Non cutting enzymes are not included.
291 """
292 if not ls:
293 return title
294 resultKeys = [str(x) for x,y in ls]
295 resultKeys.sort()
296 map = title or ''
297 enzymemap = {}
298 for (enzyme, cut) in ls:
299 for c in cut:
300 if c in enzymemap:
301 enzymemap[c].append(str(enzyme))
302 else:
303 enzymemap[c] = [str(enzyme)]
304 mapping = enzymemap.keys()
305 mapping.sort()
306 cutloc = {}
307 x, counter, length = 0, 0, len(self.sequence)
308 for x in xrange(60, length, 60):
309 counter = x - 60
310 l=[]
311 for key in mapping:
312 if key <= x:
313 l.append(key)
314 else:
315 cutloc[counter] = l
316 mapping = mapping[mapping.index(key):]
317 break
318 cutloc[x] = l
319 cutloc[x] = mapping
320 sequence = str(self.sequence)
321 revsequence = str(self.sequence.complement())
322 a = '|'
323 base, counter = 0, 0
324 emptyline = ' ' * 60
325 Join = ''.join
326 for base in xrange(60, length, 60):
327 counter = base - 60
328 line = emptyline
329 for key in cutloc[counter]:
330 s = ''
331 if key == base:
332 for n in enzymemap[key]:
333 s = ' '.join((s,n))
334 l = line[0:59]
335 lineo = Join((l, str(key), s, '\n'))
336 line2 = Join((l, a, '\n'))
337 linetot = Join((lineo, line2))
338 map = Join((map, linetot))
339 break
340 for n in enzymemap[key]:
341 s = ' '.join((s,n))
342 k = key%60
343 lineo = Join((line[0:(k-1)], str(key), s, '\n'))
344 line = Join((line[0:(k-1)], a, line[k:]))
345 line2 = Join((line[0:(k-1)], a, line[k:], '\n'))
346 linetot = Join((lineo,line2))
347 map = Join((map,linetot))
348 mapunit = '\n'.join((sequence[counter : base],a * 60,
349 revsequence[counter : base],
350 Join((str.ljust(str(counter+1), 15), ' '* 30,
351 str.rjust(str(base), 15),'\n\n'))
352 ))
353 map = Join((map, mapunit))
354 line = ' '* 60
355 for key in cutloc[base]:
356 s = ''
357 if key == length:
358 for n in enzymemap[key]:
359 s = Join((s,' ',n))
360 l = line[0:(length-1)]
361 lineo = Join((l,str(key),s,'\n'))
362 line2 = Join((l,a,'\n'))
363 linetot = Join((lineo, line2))
364 map = Join((map, linetot))
365 break
366 for n in enzymemap[key]:
367 s = Join((s,' ',n))
368 k = key%60
369 lineo = Join((line[0:(k-1)],str(key),s,'\n'))
370 line = Join((line[0:(k-1)],a,line[k:]))
371 line2 = Join((line[0:(k-1)],a,line[k:],'\n'))
372 linetot = Join((lineo,line2))
373 map = Join((map,linetot))
374 mapunit = ''
375 mapunit = Join((sequence[base : length], '\n'))
376 mapunit = Join((mapunit, a * (length-base), '\n'))
377 mapunit = Join((mapunit,revsequence[base:length], '\n'))
378 mapunit = Join((mapunit, Join((str.ljust(str(base+1), 15), ' '*(
379 length-base-30),str.rjust(str(length), 15),
380 '\n\n'))))
381 map = Join((map,mapunit))
382 return map
383
384 ###### private method to do lists:
385
387 """FP.__next_section(ls, into) -> string.
388
389 ls is a list of tuple (string, [int, int]).
390 into is a string to which the formatted ls will be added.
391
392 Format ls as a string of lines:
393 The form is:
394
395 enzyme1 : position1.
396 enzyme2 : position2, position3.
397
398 then add the formatted ls to tot
399 return tot."""
400 ls.sort()
401 indentation = '\n' + (self.NameWidth + self.Indent) * ' '
402 linesize = self.linesize - self.MaxSize
403 pat = re.compile("([\w,\s()]){1,%i}[,\.]"%linesize)
404 several, Join = '', ''.join
405 for name, sites in ls:
406 stringsite = ''
407 l = Join((', '.join([str(site) for site in sites]), '.'))
408 if len(l) > linesize:
409 #
410 # cut where appropriate and add the indentation
411 #
412 l = [x.group() for x in re.finditer(pat, l)]
413 stringsite = indentation.join(l)
414 else:
415 stringsite = l
416 into = Join((into,
417 str(name).ljust(self.NameWidth),' : ',stringsite,'\n'))
418 return into
419
| Trees | Indices | Help |
|
|---|
| Generated by Epydoc 3.0.1 on Tue Feb 5 18:02:55 2013 | http://epydoc.sourceforge.net |