Package Bio :: Package Alphabet :: Module IUPAC
[hide private]
[frames] | no frames]

Source Code for Module Bio.Alphabet.IUPAC

  1  # Copyright 2000-2001 by Andrew Dalke. 
  2  # Revisions copyright 2008 by Peter Cock. 
  3  # All rights reserved. 
  4  # This code is part of the Biopython distribution and governed by its 
  5  # license.  Please see the LICENSE file that should have been included 
  6  # as part of this package. 
  7   
  8  """Standard nucleotide and protein alphabets defined by IUPAC.""" 
  9   
 10  from Bio import Alphabet 
 11  from Bio.Data import IUPACData 
 12   
 13  ##################### Protein 
 14   
 15  # From the IUPAC definition at: 
 16  #   http://www.chem.qmw.ac.uk/iupac/AminoAcid/A2021.html#AA21 
 17   
 18  assert IUPACData.extended_protein_letters == IUPACData.extended_protein_letters.upper() 
 19   
 20   
21 -class ExtendedIUPACProtein(Alphabet.ProteinAlphabet):
22 """Extended uppercase IUPAC protein single letter alphabet including X etc. 23 24 In addition to the standard 20 single letter protein codes, this includes: 25 26 B = "Asx"; Aspartic acid (R) or Asparagine (N) 27 X = "Xxx"; Unknown or 'other' amino acid 28 Z = "Glx"; Glutamic acid (E) or Glutamine (Q) 29 J = "Xle"; Leucine (L) or Isoleucine (I), used in mass-spec (NMR) 30 U = "Sec"; Selenocysteine 31 O = "Pyl"; Pyrrolysine 32 33 This alphabet is not intended to be used with X for Selenocysteine 34 (an ad-hoc standard prior to the IUPAC adoption of U instead). 35 """ 36 letters = IUPACData.extended_protein_letters
37 38 extended_protein = ExtendedIUPACProtein() 39 40 assert IUPACData.protein_letters == IUPACData.protein_letters.upper() 41 42
43 -class IUPACProtein(ExtendedIUPACProtein):
44 """Uppercase IUPAC protein single letter alphabet of the 20 standard amino acids.""" 45 letters = IUPACData.protein_letters
46 47 protein = IUPACProtein() 48 49 ##################### DNA 50 51 52 # The next two are the IUPAC definitions, from: 53 # http://www.chem.qmw.ac.uk/iubmb/misc/naseq.html
54 -class IUPACAmbiguousDNA(Alphabet.DNAAlphabet):
55 """Uppercase IUPAC ambiguous DNA.""" 56 letters = IUPACData.ambiguous_dna_letters
57 58 ambiguous_dna = IUPACAmbiguousDNA() 59 60
61 -class IUPACUnambiguousDNA(IUPACAmbiguousDNA):
62 """Uppercase IUPAC unambiguous DNA (letters GATC only).""" 63 letters = IUPACData.unambiguous_dna_letters
64 65 unambiguous_dna = IUPACUnambiguousDNA() 66 67 68 # Also from the URL, but not part of the standard
69 -class ExtendedIUPACDNA(Alphabet.DNAAlphabet):
70 """Extended IUPAC DNA alphabet. 71 72 In addition to the standard letter codes GATC, this includes: 73 74 B = 5-bromouridine 75 D = 5,6-dihydrouridine 76 S = thiouridine 77 W = wyosine 78 """ 79 letters = IUPACData.extended_dna_letters
80 81 extended_dna = ExtendedIUPACDNA() 82 83 ##################### RNA 84 85
86 -class IUPACAmbiguousRNA(Alphabet.RNAAlphabet):
87 """Uppercase IUPAC ambiguous RNA.""" 88 letters = IUPACData.ambiguous_rna_letters
89 90 ambiguous_rna = IUPACAmbiguousRNA() 91 92
93 -class IUPACUnambiguousRNA(IUPACAmbiguousRNA):
94 """Uppercase IUPAC unambiguous RNA (letters GAUC only).""" 95 letters = IUPACData.unambiguous_rna_letters
96 97 unambiguous_rna = IUPACUnambiguousRNA() 98 99 # are there extended forms? 100 #class ExtendedIUPACRNA(Alphabet.RNAAlphabet): 101 # letters = extended_rna_letters 102 # # B == 5-bromouridine 103 # # D == 5,6-dihydrouridine 104 # # S == thiouridine 105 # # W == wyosine 106