1
2
3
4
5
6
7
8
9
10
11 """ Notes about the diverses class of the restriction enzyme implementation.
12
13 RestrictionType is the type of all restriction enzymes.
14 ----------------------------------------------------------------------------
15 AbstractCut implements some methods that are common to all enzymes.
16 ----------------------------------------------------------------------------
17 NoCut, OneCut,TwoCuts represent the number of double strand cuts
18 produced by the enzyme.
19 they correspond to the 4th field of the rebase
20 record emboss_e.NNN.
21 0->NoCut : the enzyme is not characterised.
22 2->OneCut : the enzyme produce one double strand cut.
23 4->TwoCuts : two double strand cuts.
24 ----------------------------------------------------------------------------
25 Meth_Dep, Meth_Undep represent the methylation susceptibility to
26 the enzyme.
27 Not implemented yet.
28 ----------------------------------------------------------------------------
29 Palindromic, if the site is palindromic or not.
30 NotPalindromic allow some optimisations of the code.
31 No need to check the reverse strand
32 with palindromic sites.
33 ----------------------------------------------------------------------------
34 Unknown, Blunt, represent the overhang.
35 Ov5, Ov3 Unknown is here for symetry reasons and
36 correspond to enzymes that are not characterised
37 in rebase.
38 ----------------------------------------------------------------------------
39 Defined, Ambiguous, represent the sequence of the overhang.
40 NotDefined
41 NotDefined is for enzymes not characterised in
42 rebase.
43
44 Defined correspond to enzymes that display a
45 constant overhang whatever the sequence.
46 ex : EcoRI. G^AATTC -> overhang :AATT
47 CTTAA^G
48
49 Ambiguous : the overhang varies with the
50 sequence restricted.
51 Typically enzymes which cut outside their
52 restriction site or (but not always)
53 inside an ambiguous site.
54 ex:
55 AcuI CTGAAG(22/20) -> overhang : NN
56 AasI GACNNN^NNNGTC -> overhang : NN
57 CTGN^NNNNNCAG
58
59 note : these 3 classes refers to the overhang not the site.
60 So the enzyme ApoI (RAATTY) is defined even if its restriction
61 site is ambiguous.
62
63 ApoI R^AATTY -> overhang : AATT -> Defined
64 YTTAA^R
65 Accordingly, blunt enzymes are always Defined even
66 when they cut outside their restriction site.
67 ----------------------------------------------------------------------------
68 Not_available, as found in rebase file emboss_r.NNN files.
69 Commercially_available
70 allow the selection of the enzymes according to
71 their suppliers to reduce the quantity
72 of results.
73 Also will allow the implementation of buffer
74 compatibility tables. Not implemented yet.
75
76 the list of suppliers is extracted from
77 emboss_s.NNN
78 ----------------------------------------------------------------------------
79 """
80
81 import re
82 import itertools
83
84 from Bio.Seq import Seq, MutableSeq
85 from Bio.Alphabet import IUPAC
86
87 from Bio.Restriction.Restriction_Dictionary import rest_dict as enzymedict
88 from Bio.Restriction.Restriction_Dictionary import typedict
89 from Bio.Restriction.Restriction_Dictionary import suppliers as suppliers_dict
90
91 from Bio.Restriction.RanaConfig import *
92 from Bio.Restriction.PrintFormat import PrintFormat
98 """Check characters in a string (PRIVATE).
99
100 Remove digits and white space present in string. Allows any valid ambiguous
101 IUPAC DNA single letters codes (ABCDGHKMNRSTVWY, lower case are converted).
102
103 Other characters (e.g. symbols) trigger a TypeError.
104
105 Returns the string WITH A LEADING SPACE (!). This is for backwards
106 compatibility, and may in part be explained by the fact that
107 Bio.Restriction doesn't use zero based counting.
108 """
109
110 seq_string = "".join(seq_string.split()).upper()
111
112 for c in "0123456789":
113 seq_string = seq_string.replace(c, "")
114
115 if not set(seq_string).issubset(set("ABCDGHKMNRSTVWY")) :
116 raise TypeError("Invalid character found in %s" % repr(seq_string))
117 return " " + seq_string
118
119
120 matching = {'A' : 'ARWMHVDN', 'C' : 'CYSMHBVN', 'G' : 'GRSKBVDN',
121 'T' : 'TYWKHBDN', 'R' : 'ABDGHKMNSRWV', 'Y' : 'CBDHKMNSTWVY',
122 'W' : 'ABDHKMNRTWVY', 'S' : 'CBDGHKMNSRVY', 'M' : 'ACBDHMNSRWVY',
123 'K' : 'BDGHKNSRTWVY', 'H' : 'ACBDHKMNSRTWVY',
124 'B' : 'CBDGHKMNSRTWVY', 'V' : 'ACBDGHKMNSRWVY',
125 'D' : 'ABDGHKMNSRTWVY', 'N' : 'ACBDGHKMNSRTWVY'}
126
127 DNA = Seq
228
231 """RestrictionType. Type from which derives all enzyme classes.
232
233 Implement the operator methods."""
234
235 - def __init__(cls, name='', bases=(), dct={}):
236 """RE(name, bases, dct) -> RestrictionType instance.
237
238 Not intended to be used in normal operation. The enzymes are
239 instantiated when importing the module.
240
241 see below."""
242 if "-" in name :
243 raise ValueError("Problem with hyphen in %s as enzyme name"
244 % repr(name))
245
246
247
248 try :
249 cls.compsite = re.compile(cls.compsite)
250 except Exception, err :
251 raise ValueError("Problem with regular expression, re.compiled(%s)"
252 % repr(cls.compsite))
253
265
267 """RE.__div__(other) -> list.
268
269 RE/other
270 returns RE.search(other)."""
271 return cls.search(other)
272
274 """RE.__rdiv__(other) -> list.
275
276 other/RE
277 returns RE.search(other)."""
278 return cls.search(other)
279
281 """RE.__truediv__(other) -> list.
282
283 RE/other
284 returns RE.search(other)."""
285 return cls.search(other)
286
288 """RE.__rtruediv__(other) -> list.
289
290 other/RE
291 returns RE.search(other)."""
292 return cls.search(other)
293
295 """RE.__floordiv__(other) -> list.
296
297 RE//other
298 returns RE.catalyse(other)."""
299 return cls.catalyse(other)
300
302 """RE.__rfloordiv__(other) -> list.
303
304 other//RE
305 returns RE.catalyse(other)."""
306 return cls.catalyse(other)
307
309 """RE.__str__() -> str.
310
311 return the name of the enzyme."""
312 return cls.__name__
313
315 """RE.__repr__() -> str.
316
317 used with eval or exec will instantiate the enzyme."""
318 return "%s" % cls.__name__
319
321 """RE.__len__() -> int.
322
323 length of the recognition site."""
324 return cls.size
325
327
328
329 return id(cls)
330
332 """RE == other -> bool
333
334 True if RE and other are the same enzyme.
335
336 Specifically this checks they are the same Python object.
337 """
338
339 return id(cls)==id(other)
340
342 """RE != other -> bool.
343 isoschizomer strict, same recognition site, same restriction -> False
344 all the other-> True
345
346 WARNING - This is not the inverse of the __eq__ method.
347 """
348 if not isinstance(other, RestrictionType):
349 return True
350 elif cls.charac == other.charac:
351 return False
352 else:
353 return True
354
356 """RE >> other -> bool.
357
358 neoschizomer : same recognition site, different restriction. -> True
359 all the others : -> False"""
360 if not isinstance(other, RestrictionType):
361 return False
362 elif cls.site == other.site and cls.charac != other.charac:
363 return True
364 else:
365 return False
366
368 """a % b -> bool.
369
370 Test compatibility of the overhang of a and b.
371 True if a and b have compatible overhang."""
372 if not isinstance(other, RestrictionType):
373 raise TypeError(
374 'expected RestrictionType, got %s instead' % type(other))
375 return cls._mod1(other)
376
378 """a >= b -> bool.
379
380 a is greater or equal than b if the a site is longer than b site.
381 if their site have the same length sort by alphabetical order of their
382 names."""
383 if not isinstance(other, RestrictionType):
384 raise NotImplementedError
385 if len(cls) > len(other):
386 return True
387 elif cls.size == len(other) and cls.__name__ >= other.__name__:
388 return True
389 else:
390 return False
391
393 """a > b -> bool.
394
395 sorting order:
396 1. size of the recognition site.
397 2. if equal size, alphabetical order of the names."""
398 if not isinstance(other, RestrictionType):
399 raise NotImplementedError
400 if len(cls) > len(other):
401 return True
402 elif cls.size == len(other) and cls.__name__ > other.__name__:
403 return True
404 else:
405 return False
406
408 """a <= b -> bool.
409
410 sorting order:
411 1. size of the recognition site.
412 2. if equal size, alphabetical order of the names."""
413 if not isinstance(other, RestrictionType):
414 raise NotImplementedError
415 elif len(cls) < len(other):
416 return True
417 elif len(cls) == len(other) and cls.__name__ <= other.__name__:
418 return True
419 else:
420 return False
421
423 """a < b -> bool.
424
425 sorting order:
426 1. size of the recognition site.
427 2. if equal size, alphabetical order of the names."""
428 if not isinstance(other, RestrictionType):
429 raise NotImplementedError
430 elif len(cls) < len(other):
431 return True
432 elif len(cls) == len(other) and cls.__name__ < other.__name__:
433 return True
434 else:
435 return False
436
439 """Implement the methods that are common to all restriction enzymes.
440
441 All the methods are classmethod.
442
443 For internal use only. Not meant to be instantiate."""
444
445 @classmethod
446 - def search(cls, dna, linear=True):
447 """RE.search(dna, linear=True) -> list.
448
449 return a list of all the site of RE in dna. Compensate for circular
450 sequences and so on.
451
452 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance.
453
454 if linear is False, the restriction sites than span over the boundaries
455 will be included.
456
457 The positions are the first base of the 3' fragment,
458 i.e. the first base after the position the enzyme will cut. """
459
460
461
462
463
464
465
466 if isinstance(dna, FormattedSeq):
467 cls.dna = dna
468 return cls._search()
469 else:
470 cls.dna = FormattedSeq(