Package Bio :: Package Data :: Module CodonTable
[hide private]
[frames] | no frames]

Source Code for Module Bio.Data.CodonTable

  1  # This code is part of the Biopython distribution and governed by its 
  2  # license.  Please see the LICENSE file that should have been included 
  3  # as part of this package. 
  4  """Codon tables based on those from the NCBI. 
  5   
  6  These tables are based on parsing the NCBI file 
  7  ftp://ftp.ncbi.nih.gov/entrez/misc/data/gc.prt 
  8  using Scripts/update_ncbi_codon_table.py 
  9   
 10  Last updated at Version 4.0 
 11  """ 
 12   
 13  from __future__ import print_function 
 14   
 15  from Bio import Alphabet 
 16  from Bio.Alphabet import IUPAC 
 17  from Bio.Data import IUPACData 
 18   
 19   
 20  unambiguous_dna_by_name = {} 
 21  unambiguous_dna_by_id = {} 
 22  unambiguous_rna_by_name = {} 
 23  unambiguous_rna_by_id = {} 
 24  generic_by_name = {}  # unambiguous DNA or RNA 
 25  generic_by_id = {}  # unambiguous DNA or RNA 
 26   
 27  ambiguous_dna_by_name = {} 
 28  ambiguous_dna_by_id = {} 
 29  ambiguous_rna_by_name = {} 
 30  ambiguous_rna_by_id = {} 
 31  ambiguous_generic_by_name = {}  # ambiguous DNA or RNA 
 32  ambiguous_generic_by_id = {}  # ambiguous DNA or RNA 
 33   
 34  # standard IUPAC unambiguous codons 
 35  standard_dna_table = None 
 36  standard_rna_table = None 
 37   
 38  # In the future, the back_table could return a statistically 
 39  # appropriate distribution of codons, so do not cache the results of 
 40  # back_table lookups! 
 41   
 42   
43 -class TranslationError(Exception):
44 pass
45 46
47 -class CodonTable(object):
48 """A codon-table, or genetic code.""" 49 nucleotide_alphabet = Alphabet.generic_nucleotide 50 protein_alphabet = Alphabet.generic_protein 51 52 forward_table = {} # only includes codons which actually code 53 back_table = {} # for back translations 54 start_codons = [] 55 stop_codons = [] 56 57 # Not always called from derived classes!
58 - def __init__(self, nucleotide_alphabet=nucleotide_alphabet, 59 protein_alphabet=protein_alphabet, 60 forward_table=forward_table, back_table=back_table, 61 start_codons=start_codons, stop_codons=stop_codons):
68
69 - def __str__(self):
70 """Returns a simple text representation of the codon table. 71 72 e.g. 73 74 >>> import Bio.Data.CodonTable 75 >>> print(Bio.Data.CodonTable.standard_dna_table) 76 >>> print(Bio.Data.CodonTable.generic_by_id[1]) 77 """ 78 79 if self.id: 80 answer = "Table %i" % self.id 81 else: 82 answer = "Table ID unknown" 83 if self.names: 84 answer += " " + ", ".join([x for x in self.names if x]) 85 86 # Use the main four letters (and the conventional ordering) 87 # even for ambiguous tables 88 letters = self.nucleotide_alphabet.letters 89 if isinstance(self.nucleotide_alphabet, Alphabet.DNAAlphabet) \ 90 or (letters is not None and "T" in letters): 91 letters = "TCAG" 92 else: 93 # Should be either RNA or generic nucleotides, 94 # e.g. Bio.Data.CodonTable.generic_by_id[1] 95 letters = "UCAG" 96 97 # Build the table... 98 answer += "\n\n |" + "|".join(" %s " % c2 for c2 in letters) + "|" 99 answer += "\n--+" + "+".join("---------" for c2 in letters) + "+--" 100 for c1 in letters: 101 for c3 in letters: 102 line = c1 + " |" 103 for c2 in letters: 104 codon = c1 + c2 + c3 105 line += " %s" % codon 106 if codon in self.stop_codons: 107 line += " Stop|" 108 else: 109 try: 110 amino = self.forward_table[codon] 111 except KeyError: 112 amino = "?" 113 except TranslationError: 114 amino = "?" 115 if codon in self.start_codons: 116 line += " %s(s)|" % amino 117 else: 118 line += " %s |" % amino 119 line += " " + c3 120 answer += "\n" + line 121 answer += "\n--+" + "+".join("---------" for c2 in letters) + "+--" 122 return answer
123 124
125 -def make_back_table(table, default_stop_codon):
126 """Back a back-table (naive single codon mapping). 127 128 ONLY RETURNS A SINGLE CODON, chosen from the possible alternatives 129 based on their sort order. 130 """ 131 # Do the sort so changes in the hash implementation won't affect 132 # the result when one amino acid is coded by more than one codon. 133 back_table = {} 134 for key in sorted(table): 135 back_table[table[key]] = key 136 back_table[None] = default_stop_codon 137 return back_table
138 139
140 -class NCBICodonTable(CodonTable):
141 nucleotide_alphabet = Alphabet.generic_nucleotide 142 protein_alphabet = IUPAC.protein 143
144 - def __init__(self, id, names, table, start_codons, stop_codons):
145 self.id = id 146 self.names = names 147 self.forward_table = table 148 self.back_table = make_back_table(table, stop_codons[0]) 149 self.start_codons = start_codons 150 self.stop_codons = stop_codons
151 152
153 -class NCBICodonTableDNA(NCBICodonTable):
154 nucleotide_alphabet = IUPAC.unambiguous_dna
155 156
157 -class NCBICodonTableRNA(NCBICodonTable):
158 nucleotide_alphabet = IUPAC.unambiguous_rna
159 160 161 # ######## Deal with ambiguous forward translations 162
163 -class AmbiguousCodonTable(CodonTable):
164 - def __init__(self, codon_table, 165 ambiguous_nucleotide_alphabet, 166 ambiguous_nucleotide_values, 167 ambiguous_protein_alphabet, 168 ambiguous_protein_values):
169 CodonTable.__init__(self, 170 ambiguous_nucleotide_alphabet, 171 ambiguous_protein_alphabet, 172 AmbiguousForwardTable(codon_table.forward_table, 173 ambiguous_nucleotide_values, 174 ambiguous_protein_values), 175 codon_table.back_table, 176 177 # These two are WRONG! I need to get the 178 # list of ambiguous codons which code for 179 # the stop codons XXX 180 list_ambiguous_codons(codon_table.start_codons, ambiguous_nucleotide_values), 181 list_ambiguous_codons(codon_table.stop_codons, ambiguous_nucleotide_values) 182 ) 183 self._codon_table = codon_table
184 185 # Be sneaky and forward attribute lookups to the original table. 186 # This lets us get the names, if the original table is an NCBI 187 # table.
188 - def __getattr__(self, name):
189 return getattr(self._codon_table, name)
190 191
192 -def list_possible_proteins(codon, forward_table, ambiguous_nucleotide_values):
193 c1, c2, c3 = codon 194 x1 = ambiguous_nucleotide_values[c1] 195 x2 = ambiguous_nucleotide_values[c2] 196 x3 = ambiguous_nucleotide_values[c3] 197 possible = {} 198 stops = [] 199 for y1 in x1: 200 for y2 in x2: 201 for y3 in x3: 202 try: 203 possible[forward_table[y1 + y2 + y3]] = 1 204 except KeyError: 205 # If tripping over a stop codon 206 stops.append(y1 + y2 + y3) 207 if stops: 208 if possible: 209 raise TranslationError("ambiguous codon %r codes for both" 210 " proteins and stop codons" % codon) 211 # This is a true stop codon - tell the caller about it 212 raise KeyError(codon) 213 return list(possible)
214 215
216 -def list_ambiguous_codons(codons, ambiguous_nucleotide_values):
217 """Extends a codon list to include all possible ambigous codons. 218 219 e.g.:: 220 221 ['TAG', 'TAA'] -> ['TAG', 'TAA', 'TAR'] 222 ['UAG', 'UGA'] -> ['UAG', 'UGA', 'URA'] 223 224 Note that ['TAG', 'TGA'] -> ['TAG', 'TGA'], this does not add 'TRR'. 225 Thus only two more codons are added in the following: 226 227 e.g.:: 228 229 ['TGA', 'TAA', 'TAG'] -> ['TGA', 'TAA', 'TAG', 'TRA', 'TAR'] 230 231 Returns a new (longer) list of codon strings. 232 """ 233 234 # Note ambiguous_nucleotide_values['R'] = 'AG' (etc) 235 # This will generate things like 'TRR' from ['TAG', 'TGA'], which 236 # we don't want to include: 237 c1_list = sorted(letter for (letter, meanings) 238 in ambiguous_nucleotide_values.items() 239 if set(codon[0] for codon in codons).issuperset(set(meanings))) 240 c2_list = sorted(letter for (letter, meanings) 241 in ambiguous_nucleotide_values.items() 242 if set(codon[1] for codon in codons).issuperset(set(meanings))) 243 c3_list = sorted(letter for (letter, meanings) 244 in ambiguous_nucleotide_values.items() 245 if set(codon[2] for codon in codons).issuperset(set(meanings))) 246 # candidates is a list (not a set) to preserve the iteration order 247 candidates = [] 248 for c1 in c1_list: 249 for c2 in c2_list: 250 for c3 in c3_list: 251 codon = c1 + c2 + c3 252 if codon not in candidates and codon not in codons: 253 candidates.append(codon) 254 answer = codons[:] # copy 255 # print "Have %i new candidates" % len(candidates) 256 for ambig_codon in candidates: 257 wanted = True 258 # e.g. 'TRR' -> 'TAA', 'TAG', 'TGA', 'TGG' 259 for codon in [c1 + c2 + c3 260 for c1 in ambiguous_nucleotide_values[ambig_codon[0]] 261 for c2 in ambiguous_nucleotide_values[ambig_codon[1]] 262 for c3 in ambiguous_nucleotide_values[ambig_codon[2]]]: 263 if codon not in codons: 264 # This ambiguous codon can code for a non-stop, exclude it! 265 wanted = False 266 # print "Rejecting %s" % ambig_codon 267 continue 268 if wanted: 269 answer.append(ambig_codon) 270 return answer
271 272 assert list_ambiguous_codons(['TGA', 'TAA'], IUPACData.ambiguous_dna_values) == ['TGA', 'TAA', 'TRA'] 273 assert list_ambiguous_codons(['TAG', 'TGA'], IUPACData.ambiguous_dna_values) == ['TAG', 'TGA'] 274 assert list_ambiguous_codons(['TAG', 'TAA'], IUPACData.ambiguous_dna_values) == ['TAG', 'TAA', 'TAR'] 275 assert list_ambiguous_codons(['UAG', 'UAA'], IUPACData.ambiguous_rna_values) == ['UAG', 'UAA', 'UAR'] 276 assert list_ambiguous_codons(['TGA', 'TAA', 'TAG'], 277 IUPACData.ambiguous_dna_values) == ['TGA', 'TAA', 'TAG', 'TAR', 'TRA'] 278 279 # Forward translation is "onto", that is, any given codon always maps 280 # to the same protein, or it doesn't map at all. Thus, I can build 281 # off of an existing table to produce the ambiguous mappings. 282 # 283 # This handles the general case. Perhaps it's overkill? 284 # >>> t = CodonTable.ambiguous_dna_by_id[1] 285 # >>> t.forward_table["AAT"] 286 # 'N' 287 # >>> t.forward_table["GAT"] 288 # 'D' 289 # >>> t.forward_table["RAT"] 290 # 'B' 291 # >>> t.forward_table["YTA"] 292 # 'L' 293 294
295 -class AmbiguousForwardTable(object):
296 - def __init__(self, forward_table, ambiguous_nucleotide, ambiguous_protein):
297 self.forward_table = forward_table 298 299 self.ambiguous_nucleotide = ambiguous_nucleotide 300 self.ambiguous_protein = ambiguous_protein 301 302 inverted = {} 303 for name, val in ambiguous_protein.items(): 304 for c in val: 305 x = inverted.get(c, {}) 306 x[name] = 1 307 inverted[c] = x 308 for name, val in inverted.items(): 309 inverted[name] = list(val) 310 self._inverted = inverted 311 312 self._cache = {}
313
314 - def get(self, codon, failobj=None):
315 try: 316 return self.__getitem__(codon) 317 except KeyError: 318 return failobj
319
320 - def __getitem__(self, codon):
321 try: 322 x = self._cache[codon] 323 except KeyError: 324 pass 325 else: 326 if x is TranslationError: 327 raise TranslationError(codon) # no unique translation 328 if x is KeyError: 329 raise KeyError(codon) # it's a stop codon 330 return x 331 try: 332 x = self.forward_table[codon] 333 self._cache[codon] = x 334 return x 335 except KeyError: 336 pass 337 338 # XXX Need to make part of this into a method which returns 339 # a list of all possible encodings for a codon! 340 try: 341 possible = list_possible_proteins(codon, 342 self.forward_table, 343 self.ambiguous_nucleotide) 344 except KeyError: 345 self._cache[codon] = KeyError 346 raise KeyError(codon) # stop codon 347 except TranslationError: 348 self._cache[codon] = TranslationError 349 raise TranslationError(codon) # does not code 350 assert len(possible) > 0, "unambiguous codons must code" 351 352 # Hah! Only one possible protein, so use it 353 if len(possible) == 1: 354 self._cache[codon] = possible[0] 355 return possible[0] 356 357 # See if there's an ambiguous protein encoding for the multiples. 358 # Find residues which exist in every coding set. 359 ambiguous_possible = {} 360 for amino in possible: 361 for term in self._inverted[amino]: 362 ambiguous_possible[term] = ambiguous_possible.get(term, 0) + 1 363 364 n = len(possible) 365 possible = [] 366 for amino, val in ambiguous_possible.items(): 367 if val == n: 368 possible.append(amino) 369 370 # No amino acid encoding for the results 371 if len(possible) == 0: 372 self._cache[codon] = TranslationError 373 raise TranslationError(codon) # no valid translation 374 375 # All of these are valid, so choose one 376 # To be unique, sort by smallet ambiguity then alphabetically 377 # Can get this if "X" encodes for everything. 378 # def _sort(x, y, table = self.ambiguous_protein): 379 # a = cmp(len(table[x]), len(table[y])) 380 # if a == 0: 381 # return cmp(x, y) 382 # return a 383 384 # Sort by key is 2.x and 3.x compatible 385 possible.sort(key=lambda x: (len(self.ambiguous_protein[x]), x)) 386 387 x = possible[0] 388 self._cache[codon] = x 389 return x
390 391
392 -def register_ncbi_table(name, alt_name, id, 393 table, start_codons, stop_codons):
394 """Turns codon table data into objects, and stores them in the dictionaries (PRIVATE).""" 395 # In most cases names are divided by "; ", however there is also 396 # Table 11 'Bacterial, Archaeal and Plant Plastid Code', previously 397 # 'Bacterial and Plant Plastid' which used to be just 'Bacterial' 398 names = [x.strip() for x in name.replace(" and ", "; ").replace(", ", "; ").split("; ")] 399 400 dna = NCBICodonTableDNA(id, names + [alt_name], table, start_codons, 401 stop_codons) 402 403 ambig_dna = AmbiguousCodonTable(dna, 404 IUPAC.ambiguous_dna, 405 IUPACData.ambiguous_dna_values, 406 IUPAC.extended_protein, 407 IUPACData.extended_protein_values) 408 409 # replace all T's with U's for the RNA tables 410 rna_table = {} 411 generic_table = {} 412 for codon, val in table.items(): 413 generic_table[codon] = val 414 codon = codon.replace("T", "U") 415 generic_table[codon] = val 416 rna_table[codon] = val 417 rna_start_codons = [] 418 generic_start_codons = [] 419 for codon in start_codons: 420 generic_start_codons.append(codon) 421 codon = codon.replace("T", "U") 422 generic_start_codons.append(codon) 423 rna_start_codons.append(codon) 424 rna_stop_codons = [] 425 generic_stop_codons = [] 426 for codon in stop_codons: 427 generic_stop_codons.append(codon) 428 codon = codon.replace("T", "U") 429 generic_stop_codons.append(codon) 430 rna_stop_codons.append(codon) 431 432 generic = NCBICodonTable(id, names + [alt_name], generic_table, 433 generic_start_codons, generic_stop_codons) 434 435 # The following isn't very elegant, but seems to work nicely. 436 _merged_values = dict(IUPACData.ambiguous_rna_values.items()) 437 _merged_values["T"] = "U" 438 ambig_generic = AmbiguousCodonTable(generic, 439 Alphabet.NucleotideAlphabet(), 440 _merged_values, 441 IUPAC.extended_protein, 442 IUPACData.extended_protein_values) 443 444 rna = NCBICodonTableRNA(id, names + [alt_name], rna_table, 445 rna_start_codons, rna_stop_codons) 446 447 ambig_rna = AmbiguousCodonTable(rna, 448 IUPAC.ambiguous_rna, 449 IUPACData.ambiguous_rna_values, 450 IUPAC.extended_protein, 451 IUPACData.extended_protein_values) 452 453 if id == 1: 454 global standard_dna_table, standard_rna_table 455 standard_dna_table = dna 456 standard_rna_table = rna 457 458 unambiguous_dna_by_id[id] = dna 459 unambiguous_rna_by_id[id] = rna 460 generic_by_id[id] = generic 461 ambiguous_dna_by_id[id] = ambig_dna 462 ambiguous_rna_by_id[id] = ambig_rna 463 ambiguous_generic_by_id[id] = ambig_generic 464 465 if alt_name is not None: 466 names.append(alt_name) 467 468 for name in names: 469 unambiguous_dna_by_name[name] = dna 470 unambiguous_rna_by_name[name] = rna 471 generic_by_name[name] = generic 472 ambiguous_dna_by_name[name] = ambig_dna 473 ambiguous_rna_by_name[name] = ambig_rna 474 ambiguous_generic_by_name[name] = ambig_generic
475 476 477 ########################################################################## 478 # Start of auto-generated output from Scripts/update_ncbi_codon_table.py # 479 ########################################################################## 480 481 482 register_ncbi_table(name='Standard', 483 alt_name='SGC0', id=1, 484 table={ 485 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 486 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 487 'TGT': 'C', 'TGC': 'C', 'TGG': 'W', 'CTT': 'L', 'CTC': 'L', 488 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 'CCA': 'P', 489 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 'CAG': 'Q', 490 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 'ATT': 'I', 491 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T', 'ACC': 'T', 492 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 'AAA': 'K', 493 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 'AGG': 'R', 494 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 'GCT': 'A', 495 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 'GAC': 'D', 496 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 'GGA': 'G', 497 'GGG': 'G', }, 498 stop_codons=['TAA', 'TAG', 'TGA'], 499 start_codons=['TTG', 'CTG', 'ATG']) 500 501 register_ncbi_table(name='Vertebrate Mitochondrial', 502 alt_name='SGC1', id=2, 503 table={ 504 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 505 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 506 'TGT': 'C', 'TGC': 'C', 'TGA': 'W', 'TGG': 'W', 'CTT': 'L', 507 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 508 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 509 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 510 'ATT': 'I', 'ATC': 'I', 'ATA': 'M', 'ATG': 'M', 'ACT': 'T', 511 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 512 'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'GTT': 'V', 513 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 'GCT': 'A', 'GCC': 'A', 514 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 'GAC': 'D', 'GAA': 'E', 515 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 'GGA': 'G', 'GGG': 'G', }, 516 stop_codons=['TAA', 'TAG', 'AGA', 'AGG'], 517 start_codons=['ATT', 'ATC', 'ATA', 'ATG', 'GTG']) 518 519 register_ncbi_table(name='Yeast Mitochondrial', 520 alt_name='SGC2', id=3, 521 table={ 522 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 523 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 524 'TGT': 'C', 'TGC': 'C', 'TGA': 'W', 'TGG': 'W', 'CTT': 'T', 525 'CTC': 'T', 'CTA': 'T', 'CTG': 'T', 'CCT': 'P', 'CCC': 'P', 526 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 527 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 528 'ATT': 'I', 'ATC': 'I', 'ATA': 'M', 'ATG': 'M', 'ACT': 'T', 529 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 530 'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 531 'AGG': 'R', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 532 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 533 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 534 'GGA': 'G', 'GGG': 'G', }, 535 stop_codons=['TAA', 'TAG'], 536 start_codons=['ATA', 'ATG']) 537 538 register_ncbi_table(name='Mold Mitochondrial; Protozoan Mitochondrial; Coelenterate Mitochondrial; Mycoplasma; Spiroplasma', 539 alt_name='SGC3', id=4, 540 table={ 541 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 542 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 543 'TGT': 'C', 'TGC': 'C', 'TGA': 'W', 'TGG': 'W', 'CTT': 'L', 544 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 545 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 546 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 547 'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T', 548 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 549 'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 550 'AGG': 'R', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 551 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 552 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 553 'GGA': 'G', 'GGG': 'G', }, 554 stop_codons=['TAA', 'TAG'], 555 start_codons=['TTA', 'TTG', 'CTG', 'ATT', 'ATC', 'ATA', 556 'ATG', 'GTG']) 557 558 register_ncbi_table(name='Invertebrate Mitochondrial', 559 alt_name='SGC4', id=5, 560 table={ 561 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 562 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 563 'TGT': 'C', 'TGC': 'C', 'TGA': 'W', 'TGG': 'W', 'CTT': 'L', 564 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 565 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 566 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 567 'ATT': 'I', 'ATC': 'I', 'ATA': 'M', 'ATG': 'M', 'ACT': 'T', 568 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 569 'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'S', 570 'AGG': 'S', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 571 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 572 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 573 'GGA': 'G', 'GGG': 'G', }, 574 stop_codons=['TAA', 'TAG'], 575 start_codons=['TTG', 'ATT', 'ATC', 'ATA', 'ATG', 'GTG']) 576 577 register_ncbi_table(name='Ciliate Nuclear; Dasycladacean Nuclear; Hexamita Nuclear', 578 alt_name='SGC5', id=6, 579 table={ 580 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 581 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 582 'TAA': 'Q', 'TAG': 'Q', 'TGT': 'C', 'TGC': 'C', 'TGG': 'W', 583 'CTT': 'L', 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 584 'CCC': 'P', 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 585 'CAA': 'Q', 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 586 'CGG': 'R', 'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 587 'ACT': 'T', 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 588 'AAC': 'N', 'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 589 'AGA': 'R', 'AGG': 'R', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 590 'GTG': 'V', 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 591 'GAT': 'D', 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 592 'GGC': 'G', 'GGA': 'G', 'GGG': 'G', }, 593 stop_codons=['TGA'], 594 start_codons=['ATG']) 595 596 register_ncbi_table(name='Echinoderm Mitochondrial; Flatworm Mitochondrial', 597 alt_name='SGC8', id=9, 598 table={ 599 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 600 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 601 'TGT': 'C', 'TGC': 'C', 'TGA': 'W', 'TGG': 'W', 'CTT': 'L', 602 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 603 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 604 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 605 'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T', 606 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 607 'AAA': 'N', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'S', 608 'AGG': 'S', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 609 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 610 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 611 'GGA': 'G', 'GGG': 'G', }, 612 stop_codons=['TAA', 'TAG'], 613 start_codons=['ATG', 'GTG']) 614 615 register_ncbi_table(name='Euplotid Nuclear', 616 alt_name='SGC9', id=10, 617 table={ 618 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 619 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 620 'TGT': 'C', 'TGC': 'C', 'TGA': 'C', 'TGG': 'W', 'CTT': 'L', 621 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 622 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 623 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 624 'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T', 625 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 626 'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 627 'AGG': 'R', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 628 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 629 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 630 'GGA': 'G', 'GGG': 'G', }, 631 stop_codons=['TAA', 'TAG'], 632 start_codons=['ATG']) 633 634 register_ncbi_table(name='Bacterial, Archaeal and Plant Plastid', 635 alt_name=None, id=11, 636 table={ 637 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 638 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 639 'TGT': 'C', 'TGC': 'C', 'TGG': 'W', 'CTT': 'L', 'CTC': 'L', 640 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 'CCA': 'P', 641 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 'CAG': 'Q', 642 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 'ATT': 'I', 643 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T', 'ACC': 'T', 644 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 'AAA': 'K', 645 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 'AGG': 'R', 646 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 'GCT': 'A', 647 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 'GAC': 'D', 648 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 'GGA': 'G', 649 'GGG': 'G', }, 650 stop_codons=['TAA', 'TAG', 'TGA'], 651 start_codons=['TTG', 'CTG', 'ATT', 'ATC', 'ATA', 'ATG', 652 'GTG']) 653 654 register_ncbi_table(name='Alternative Yeast Nuclear', 655 alt_name=None, id=12, 656 table={ 657 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 658 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 659 'TGT': 'C', 'TGC': 'C', 'TGG': 'W', 'CTT': 'L', 'CTC': 'L', 660 'CTA': 'L', 'CTG': 'S', 'CCT': 'P', 'CCC': 'P', 'CCA': 'P', 661 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 'CAG': 'Q', 662 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 'ATT': 'I', 663 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T', 'ACC': 'T', 664 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 'AAA': 'K', 665 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 'AGG': 'R', 666 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 'GCT': 'A', 667 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 'GAC': 'D', 668 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 'GGA': 'G', 669 'GGG': 'G', }, 670 stop_codons=['TAA', 'TAG', 'TGA'], 671 start_codons=['CTG', 'ATG']) 672 673 register_ncbi_table(name='Ascidian Mitochondrial', 674 alt_name=None, id=13, 675 table={ 676 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 677 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 678 'TGT': 'C', 'TGC': 'C', 'TGA': 'W', 'TGG': 'W', 'CTT': 'L', 679 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 680 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 681 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 682 'ATT': 'I', 'ATC': 'I', 'ATA': 'M', 'ATG': 'M', 'ACT': 'T', 683 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 684 'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'G', 685 'AGG': 'G', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 686 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 687 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 688 'GGA': 'G', 'GGG': 'G', }, 689 stop_codons=['TAA', 'TAG'], 690 start_codons=['TTG', 'ATA', 'ATG', 'GTG']) 691 692 register_ncbi_table(name='Alternative Flatworm Mitochondrial', 693 alt_name=None, id=14, 694 table={ 695 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 696 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 697 'TAA': 'Y', 'TGT': 'C', 'TGC': 'C', 'TGA': 'W', 'TGG': 'W', 698 'CTT': 'L', 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 699 'CCC': 'P', 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 700 'CAA': 'Q', 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 701 'CGG': 'R', 'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 702 'ACT': 'T', 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 703 'AAC': 'N', 'AAA': 'N', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 704 'AGA': 'S', 'AGG': 'S', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 705 'GTG': 'V', 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 706 'GAT': 'D', 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 707 'GGC': 'G', 'GGA': 'G', 'GGG': 'G', }, 708 stop_codons=['TAG'], 709 start_codons=['ATG']) 710 711 register_ncbi_table(name='Blepharisma Macronuclear', 712 alt_name=None, id=15, 713 table={ 714 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 715 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 716 'TAG': 'Q', 'TGT': 'C', 'TGC': 'C', 'TGG': 'W', 'CTT': 'L', 717 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 718 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 719 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 720 'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T', 721 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 722 'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 723 'AGG': 'R', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 724 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 725 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 726 'GGA': 'G', 'GGG': 'G', }, 727 stop_codons=['TAA', 'TGA'], 728 start_codons=['ATG']) 729 730 register_ncbi_table(name='Chlorophycean Mitochondrial', 731 alt_name=None, id=16, 732 table={ 733 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 734 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 735 'TAG': 'L', 'TGT': 'C', 'TGC': 'C', 'TGG': 'W', 'CTT': 'L', 736 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 737 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 738 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 739 'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T', 740 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 741 'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 742 'AGG': 'R', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 743 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 744 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 745 'GGA': 'G', 'GGG': 'G', }, 746 stop_codons=['TAA', 'TGA'], 747 start_codons=['ATG']) 748 749 register_ncbi_table(name='Trematode Mitochondrial', 750 alt_name=None, id=21, 751 table={ 752 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 753 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 754 'TGT': 'C', 'TGC': 'C', 'TGA': 'W', 'TGG': 'W', 'CTT': 'L', 755 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 756 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 757 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 758 'ATT': 'I', 'ATC': 'I', 'ATA': 'M', 'ATG': 'M', 'ACT': 'T', 759 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 760 'AAA': 'N', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'S', 761 'AGG': 'S', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 762 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 763 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 764 'GGA': 'G', 'GGG': 'G', }, 765 stop_codons=['TAA', 'TAG'], 766 start_codons=['ATG', 'GTG']) 767 768 register_ncbi_table(name='Scenedesmus obliquus Mitochondrial', 769 alt_name=None, id=22, 770 table={ 771 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 772 'TCC': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 'TAG': 'L', 773 'TGT': 'C', 'TGC': 'C', 'TGG': 'W', 'CTT': 'L', 'CTC': 'L', 774 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 'CCA': 'P', 775 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 'CAG': 'Q', 776 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 'ATT': 'I', 777 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T', 'ACC': 'T', 778 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 'AAA': 'K', 779 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 'AGG': 'R', 780 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 'GCT': 'A', 781 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 'GAC': 'D', 782 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 'GGA': 'G', 783 'GGG': 'G', }, 784 stop_codons=['TCA', 'TAA', 'TGA'], 785 start_codons=['ATG']) 786 787 register_ncbi_table(name='Thraustochytrium Mitochondrial', 788 alt_name=None, id=23, 789 table={ 790 'TTT': 'F', 'TTC': 'F', 'TTG': 'L', 'TCT': 'S', 'TCC': 'S', 791 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 'TGT': 'C', 792 'TGC': 'C', 'TGG': 'W', 'CTT': 'L', 'CTC': 'L', 'CTA': 'L', 793 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 'CCA': 'P', 'CCG': 'P', 794 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 'CAG': 'Q', 'CGT': 'R', 795 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 'ATT': 'I', 'ATC': 'I', 796 'ATA': 'I', 'ATG': 'M', 'ACT': 'T', 'ACC': 'T', 'ACA': 'T', 797 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 'AAA': 'K', 'AAG': 'K', 798 'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 'AGG': 'R', 'GTT': 'V', 799 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 'GCT': 'A', 'GCC': 'A', 800 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 'GAC': 'D', 'GAA': 'E', 801 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 'GGA': 'G', 'GGG': 'G', }, 802 stop_codons=['TTA', 'TAA', 'TAG', 'TGA'], 803 start_codons=['ATT', 'ATG', 'GTG']) 804 805 register_ncbi_table(name='Pterobranchia Mitochondrial', 806 alt_name=None, id=24, 807 table={ 808 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 809 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 810 'TGT': 'C', 'TGC': 'C', 'TGA': 'W', 'TGG': 'W', 'CTT': 'L', 811 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 812 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 813 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 814 'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T', 815 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 816 'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'S', 817 'AGG': 'K', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 818 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 819 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 820 'GGA': 'G', 'GGG': 'G', }, 821 stop_codons=['TAA', 'TAG'], 822 start_codons=['TTG', 'CTG', 'ATG', 'GTG']) 823 824 register_ncbi_table(name='Candidate Division SR1 and Gracilibacteria', 825 alt_name=None, id=25, 826 table={ 827 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 828 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 829 'TGT': 'C', 'TGC': 'C', 'TGA': 'G', 'TGG': 'W', 'CTT': 'L', 830 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 831 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 832 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 833 'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T', 834 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 835 'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 836 'AGG': 'R', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 837 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 838 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 839 'GGA': 'G', 'GGG': 'G', }, 840 stop_codons=['TAA', 'TAG'], 841 start_codons=['TTG', 'ATG', 'GTG']) 842 843 844 ######################################################################## 845 # End of auto-generated output from Scripts/update_ncbi_codon_table.py # 846 ######################################################################## 847 848 849 # This is currently missing in Version 4.0 of 850 # ftp://ftp.ncbi.nih.gov/entrez/misc/data/gc.prt 851 # and was entered by hand based on 852 # http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi#SG26 853 # 854 # Code 26 is used so far only for the ascomycete fungus Pachysolen 855 # tannophilus. The only difference to the standard code is the 856 # translation of CUG as alanine (as opposed to leucine). As of 857 # April 2016, there is no publication documenting this code. 858 register_ncbi_table(name='Pachysolen tannophilus Nuclear Code', 859 alt_name=None, id=26, 860 table={ 861 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 862 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 863 'TGT': 'C', 'TGC': 'C', 'TGG': 'W', 'CTT': 'L', 'CTC': 'L', 864 'CTA': 'L', 'CTG': 'A', 'CCT': 'P', 'CCC': 'P', 'CCA': 'P', 865 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 'CAG': 'Q', 866 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 'ATT': 'I', 867 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T', 'ACC': 'T', 868 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 'AAA': 'K', 869 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 'AGG': 'R', 870 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 'GCT': 'A', 871 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 'GAC': 'D', 872 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 'GGA': 'G', 873 'GGG': 'G', }, 874 stop_codons=['TAA', 'TAG', 'TGA'], 875 start_codons=['TTG', 'CTG', 'ATG']) 876 877 878 # Basic sanity test, 879 for key, val in generic_by_name.items(): 880 assert key in ambiguous_generic_by_name[key].names 881 for key, val in generic_by_id.items(): 882 assert ambiguous_generic_by_id[key].id == key 883 del key, val 884 885 for n in ambiguous_generic_by_id: 886 assert ambiguous_rna_by_id[n].forward_table["GUU"] == "V" 887 assert ambiguous_rna_by_id[n].forward_table["GUN"] == "V" 888 if n != 23: 889 # For table 23, UUN = F, L or stop. 890 assert ambiguous_rna_by_id[n].forward_table["UUN"] == "X" # F or L 891 # R = A or G, so URR = UAA or UGA / TRA = TAA or TGA = stop codons 892 if "UAA" in unambiguous_rna_by_id[n].stop_codons \ 893 and "UGA" in unambiguous_rna_by_id[n].stop_codons: 894 try: 895 print(ambiguous_dna_by_id[n].forward_table["TRA"]) 896 assert False, "Should be a stop only" 897 except KeyError: 898 pass 899 assert "URA" in ambiguous_generic_by_id[n].stop_codons 900 assert "URA" in ambiguous_rna_by_id[n].stop_codons 901 assert "TRA" in ambiguous_generic_by_id[n].stop_codons 902 assert "TRA" in ambiguous_dna_by_id[n].stop_codons 903 del n 904 assert ambiguous_generic_by_id[1] == ambiguous_generic_by_name["Standard"] 905 assert ambiguous_generic_by_id[4] == ambiguous_generic_by_name["SGC3"] 906 assert ambiguous_generic_by_id[11] == ambiguous_generic_by_name["Bacterial"] 907 assert ambiguous_generic_by_id[11] == ambiguous_generic_by_name["Archaeal"] 908 assert ambiguous_generic_by_id[11] == ambiguous_generic_by_name["Plant Plastid"] 909 assert ambiguous_generic_by_id[15] == ambiguous_generic_by_name['Blepharisma Macronuclear'] 910 assert ambiguous_generic_by_id[24] == ambiguous_generic_by_name["Pterobranchia Mitochondrial"] 911 assert generic_by_id[1] == generic_by_name["Standard"] 912 assert generic_by_id[4] == generic_by_name["SGC3"] 913 assert generic_by_id[11] == generic_by_name["Bacterial"] 914 assert generic_by_id[11] == generic_by_name["Plant Plastid"] 915 assert generic_by_id[15] == generic_by_name['Blepharisma Macronuclear'] 916 assert generic_by_id[24] == generic_by_name["Pterobranchia Mitochondrial"] 917