Package Bio :: Package Alphabet :: Module IUPAC
[hide private]
[frames] | no frames]

Source Code for Module Bio.Alphabet.IUPAC

  1  # Copyright 2000-2001 by Andrew Dalke. 
  2  # Revisions copyright 2008 by Peter Cock. 
  3  # All rights reserved. 
  4  # This code is part of the Biopython distribution and governed by its 
  5  # license.  Please see the LICENSE file that should have been included 
  6  # as part of this package. 
  7   
  8  """Standard nucleotide and protein alphabets defined by IUPAC.""" 
  9   
 10  from Bio import Alphabet 
 11  from Bio.Data import IUPACData 
 12   
 13   
 14  # #################### Protein 
 15   
 16  # From the IUPAC definition at: 
 17  #   http://www.chem.qmw.ac.uk/iupac/AminoAcid/A2021.html#AA21 
 18   
 19  assert IUPACData.extended_protein_letters == IUPACData.extended_protein_letters.upper() 
 20   
 21   
22 -class ExtendedIUPACProtein(Alphabet.ProteinAlphabet):
23 """Extended uppercase IUPAC protein single letter alphabet including X etc. 24 25 In addition to the standard 20 single letter protein codes, this includes: 26 27 - B = "Asx"; Aspartic acid (R) or Asparagine (N) 28 - X = "Xxx"; Unknown or 'other' amino acid 29 - Z = "Glx"; Glutamic acid (E) or Glutamine (Q) 30 - J = "Xle"; Leucine (L) or Isoleucine (I), used in mass-spec (NMR) 31 - U = "Sec"; Selenocysteine 32 - O = "Pyl"; Pyrrolysine 33 34 This alphabet is not intended to be used with X for Selenocysteine 35 (an ad-hoc standard prior to the IUPAC adoption of U instead). 36 """ 37 letters = IUPACData.extended_protein_letters
38 39 extended_protein = ExtendedIUPACProtein() 40 41 assert IUPACData.protein_letters == IUPACData.protein_letters.upper() 42 43
44 -class IUPACProtein(ExtendedIUPACProtein):
45 """Uppercase IUPAC protein single letter alphabet of the 20 standard amino acids.""" 46 letters = IUPACData.protein_letters
47 48 protein = IUPACProtein() 49 50 # #################### DNA 51 52 53 # The next two are the IUPAC definitions, from: 54 # http://www.chem.qmw.ac.uk/iubmb/misc/naseq.html
55 -class IUPACAmbiguousDNA(Alphabet.DNAAlphabet):
56 """Uppercase IUPAC ambiguous DNA.""" 57 letters = IUPACData.ambiguous_dna_letters
58 59 ambiguous_dna = IUPACAmbiguousDNA() 60 61
62 -class IUPACUnambiguousDNA(IUPACAmbiguousDNA):
63 """Uppercase IUPAC unambiguous DNA (letters GATC only).""" 64 letters = IUPACData.unambiguous_dna_letters
65 66 unambiguous_dna = IUPACUnambiguousDNA() 67 68 69 # Also from the URL, but not part of the standard
70 -class ExtendedIUPACDNA(Alphabet.DNAAlphabet):
71 """Extended IUPAC DNA alphabet. 72 73 In addition to the standard letter codes GATC, this includes: 74 75 - B = 5-bromouridine 76 - D = 5,6-dihydrouridine 77 - S = thiouridine 78 - W = wyosine 79 """ 80 letters = IUPACData.extended_dna_letters
81 82 extended_dna = ExtendedIUPACDNA() 83 84 # #################### RNA 85 86
87 -class IUPACAmbiguousRNA(Alphabet.RNAAlphabet):
88 """Uppercase IUPAC ambiguous RNA.""" 89 letters = IUPACData.ambiguous_rna_letters
90 91 ambiguous_rna = IUPACAmbiguousRNA() 92 93
94 -class IUPACUnambiguousRNA(IUPACAmbiguousRNA):
95 """Uppercase IUPAC unambiguous RNA (letters GAUC only).""" 96 letters = IUPACData.unambiguous_rna_letters
97 98 unambiguous_rna = IUPACUnambiguousRNA() 99 100 # are there extended forms? 101 # class ExtendedIUPACRNA(Alphabet.RNAAlphabet): 102 # letters = extended_rna_letters 103 # # B == 5-bromouridine 104 # # D == 5,6-dihydrouridine 105 # # S == thiouridine 106 # # W == wyosine 107