Package Bio :: Package Alphabet :: Module IUPAC
[hide private]
[frames] | no frames]

Source Code for Module Bio.Alphabet.IUPAC

  1  # Copyright 2000-2001 by Andrew Dalke. 
  2  # Revisions copyright 2008 by Peter Cock. 
  3  # All rights reserved. 
  4  # This code is part of the Biopython distribution and governed by its 
  5  # license.  Please see the LICENSE file that should have been included 
  6  # as part of this package. 
  7   
  8  """Standard nucleotide and protein alphabets defined by IUPAC.""" 
  9   
 10  from Bio import Alphabet 
 11  from Bio.Data import IUPACData 
 12   
 13   
 14  # #################### Protein 
 15   
 16  # From the IUPAC definition at: 
 17  #   http://www.chem.qmw.ac.uk/iupac/AminoAcid/A2021.html#AA21 
 18   
 19  assert IUPACData.extended_protein_letters == IUPACData.extended_protein_letters.upper() 
 20   
 21   
22 -class ExtendedIUPACProtein(Alphabet.ProteinAlphabet):
23 """Extended uppercase IUPAC protein single letter alphabet including X etc. 24 25 In addition to the standard 20 single letter protein codes, this includes: 26 27 - `B` = "Asx"; Aspartic acid (R) or Asparagine (N) 28 - `X` = "Xxx"; Unknown or 'other' amino acid 29 - `Z` = "Glx"; Glutamic acid (E) or Glutamine (Q) 30 - `J` = "Xle"; Leucine (L) or Isoleucine (I), used in mass-spec (NMR) 31 - `U` = "Sec"; Selenocysteine 32 - `O` = "Pyl"; Pyrrolysine 33 34 This alphabet is not intended to be used with `X` for Selenocysteine 35 (an ad-hoc standard prior to the IUPAC adoption of `U` instead). 36 """ 37 38 letters = IUPACData.extended_protein_letters
39 40 41 extended_protein = ExtendedIUPACProtein() 42 43 assert IUPACData.protein_letters == IUPACData.protein_letters.upper() 44 45
46 -class IUPACProtein(ExtendedIUPACProtein):
47 """Uppercase IUPAC protein single letter alphabet of the 20 standard amino acids.""" 48 49 letters = IUPACData.protein_letters
50 51 52 protein = IUPACProtein() 53 54 # #################### DNA 55 56 57 # The next two are the IUPAC definitions, from: 58 # http://www.chem.qmw.ac.uk/iubmb/misc/naseq.html
59 -class IUPACAmbiguousDNA(Alphabet.DNAAlphabet):
60 """Uppercase IUPAC ambiguous DNA.""" 61 62 letters = IUPACData.ambiguous_dna_letters
63 64 65 ambiguous_dna = IUPACAmbiguousDNA() 66 67
68 -class IUPACUnambiguousDNA(IUPACAmbiguousDNA):
69 """Uppercase IUPAC unambiguous DNA (letters GATC only).""" 70 71 letters = IUPACData.unambiguous_dna_letters
72 73 74 unambiguous_dna = IUPACUnambiguousDNA() 75 76 77 # Also from the URL, but not part of the standard
78 -class ExtendedIUPACDNA(Alphabet.DNAAlphabet):
79 """Extended IUPAC DNA alphabet. 80 81 In addition to the standard letter codes GATC, this includes: 82 83 - `B` = 5-bromouridine 84 - `D` = 5,6-dihydrouridine 85 - `S` = thiouridine 86 - `W` = wyosine 87 88 """ 89 90 letters = IUPACData.extended_dna_letters
91 92 93 extended_dna = ExtendedIUPACDNA() 94 95 # #################### RNA 96 97
98 -class IUPACAmbiguousRNA(Alphabet.RNAAlphabet):
99 """Uppercase IUPAC ambiguous RNA.""" 100 101 letters = IUPACData.ambiguous_rna_letters
102 103 104 ambiguous_rna = IUPACAmbiguousRNA() 105 106
107 -class IUPACUnambiguousRNA(IUPACAmbiguousRNA):
108 """Uppercase IUPAC unambiguous RNA (letters GAUC only).""" 109 110 letters = IUPACData.unambiguous_rna_letters
111 112 113 unambiguous_rna = IUPACUnambiguousRNA() 114 115 # are there extended forms? 116 # class ExtendedIUPACRNA(Alphabet.RNAAlphabet): 117 # letters = extended_rna_letters 118 # # B == 5-bromouridine 119 # # D == 5,6-dihydrouridine 120 # # S == thiouridine 121 # # W == wyosine 122