1
2
3
4
5
6
7
8 """Standard nucleotide and protein alphabets defined by IUPAC."""
9
10 from Bio import Alphabet
11 from Bio.Data import IUPACData
12
13
14
15
16
17
18 assert IUPACData.extended_protein_letters == IUPACData.extended_protein_letters.upper()
19
20
22 """Extended uppercase IUPAC protein single letter alphabet including X etc.
23
24 In addition to the standard 20 single letter protein codes, this includes:
25
26 B = "Asx"; Aspartic acid (R) or Asparagine (N)
27 X = "Xxx"; Unknown or 'other' amino acid
28 Z = "Glx"; Glutamic acid (E) or Glutamine (Q)
29 J = "Xle"; Leucine (L) or Isoleucine (I), used in mass-spec (NMR)
30 U = "Sec"; Selenocysteine
31 O = "Pyl"; Pyrrolysine
32
33 This alphabet is not intended to be used with X for Selenocysteine
34 (an ad-hoc standard prior to the IUPAC adoption of U instead).
35 """
36 letters = IUPACData.extended_protein_letters
37
38 extended_protein = ExtendedIUPACProtein()
39
40 assert IUPACData.protein_letters == IUPACData.protein_letters.upper()
41
42
46
47 protein = IUPACProtein()
48
49
50
51
52
53
57
58 ambiguous_dna = IUPACAmbiguousDNA()
59
60
64
65 unambiguous_dna = IUPACUnambiguousDNA()
66
67
68
70 """Extended IUPAC DNA alphabet.
71
72 In addition to the standard letter codes GATC, this includes:
73
74 B = 5-bromouridine
75 D = 5,6-dihydrouridine
76 S = thiouridine
77 W = wyosine
78 """
79 letters = IUPACData.extended_dna_letters
80
81 extended_dna = ExtendedIUPACDNA()
82
83
84
85
89
90 ambiguous_rna = IUPACAmbiguousRNA()
91
92
96
97 unambiguous_rna = IUPACUnambiguousRNA()
98
99
100
101
102
103
104
105
106