Package Bio :: Package Data :: Module CodonTable
[hide private]
[frames] | no frames]

Source Code for Module Bio.Data.CodonTable

  1  # This code is part of the Biopython distribution and governed by its 
  2  # license.  Please see the LICENSE file that should have been included 
  3  # as part of this package. 
  4  """Codon tables based on those from the NCBI. 
  5   
  6  These tables are based on parsing the NCBI file 
  7  ftp://ftp.ncbi.nih.gov/entrez/misc/data/gc.prt 
  8  using Scripts/update_ncbi_codon_table.py 
  9   
 10  Last updated at Version 4.0 
 11  """ 
 12   
 13  from __future__ import print_function 
 14   
 15  from Bio import Alphabet 
 16  from Bio.Alphabet import IUPAC 
 17  from Bio.Data import IUPACData 
 18   
 19   
 20  unambiguous_dna_by_name = {} 
 21  unambiguous_dna_by_id = {} 
 22  unambiguous_rna_by_name = {} 
 23  unambiguous_rna_by_id = {} 
 24  generic_by_name = {}  # unambiguous DNA or RNA 
 25  generic_by_id = {}  # unambiguous DNA or RNA 
 26   
 27  ambiguous_dna_by_name = {} 
 28  ambiguous_dna_by_id = {} 
 29  ambiguous_rna_by_name = {} 
 30  ambiguous_rna_by_id = {} 
 31  ambiguous_generic_by_name = {}  # ambiguous DNA or RNA 
 32  ambiguous_generic_by_id = {}  # ambiguous DNA or RNA 
 33   
 34  # standard IUPAC unambiguous codons 
 35  standard_dna_table = None 
 36  standard_rna_table = None 
 37   
 38  # In the future, the back_table could return a statistically 
 39  # appropriate distribution of codons, so do not cache the results of 
 40  # back_table lookups! 
 41   
 42   
43 -class TranslationError(Exception):
44 pass
45 46
47 -class CodonTable(object):
48 """A codon-table, or genetic code.""" 49 nucleotide_alphabet = Alphabet.generic_nucleotide 50 protein_alphabet = Alphabet.generic_protein 51 52 forward_table = {} # only includes codons which actually code 53 back_table = {} # for back translations 54 start_codons = [] 55 stop_codons = [] 56 57 # Not always called from derived classes!
58 - def __init__(self, nucleotide_alphabet=nucleotide_alphabet, 59 protein_alphabet=protein_alphabet, 60 forward_table=forward_table, back_table=back_table, 61 start_codons=start_codons, stop_codons=stop_codons):
68
69 - def __str__(self):
70 """Returns a simple text representation of the codon table. 71 72 e.g. 73 74 >>> import Bio.Data.CodonTable 75 >>> print(Bio.Data.CodonTable.standard_dna_table) 76 >>> print(Bio.Data.CodonTable.generic_by_id[1]) 77 """ 78 if self.id: 79 answer = "Table %i" % self.id 80 else: 81 answer = "Table ID unknown" 82 if self.names: 83 answer += " " + ", ".join([x for x in self.names if x]) 84 85 # Use the main four letters (and the conventional ordering) 86 # even for ambiguous tables 87 letters = self.nucleotide_alphabet.letters 88 if isinstance(self.nucleotide_alphabet, Alphabet.DNAAlphabet) \ 89 or (letters is not None and "T" in letters): 90 letters = "TCAG" 91 else: 92 # Should be either RNA or generic nucleotides, 93 # e.g. Bio.Data.CodonTable.generic_by_id[1] 94 letters = "UCAG" 95 96 # Build the table... 97 answer += "\n\n |" + "|".join(" %s " % c2 for c2 in letters) + "|" 98 answer += "\n--+" + "+".join("---------" for c2 in letters) + "+--" 99 for c1 in letters: 100 for c3 in letters: 101 line = c1 + " |" 102 for c2 in letters: 103 codon = c1 + c2 + c3 104 line += " %s" % codon 105 if codon in self.stop_codons: 106 line += " Stop|" 107 else: 108 try: 109 amino = self.forward_table[codon] 110 except KeyError: 111 amino = "?" 112 except TranslationError: 113 amino = "?" 114 if codon in self.start_codons: 115 line += " %s(s)|" % amino 116 else: 117 line += " %s |" % amino 118 line += " " + c3 119 answer += "\n" + line 120 answer += "\n--+" + "+".join("---------" for c2 in letters) + "+--" 121 return answer
122 123
124 -def make_back_table(table, default_stop_codon):
125 """Back a back-table (naive single codon mapping). 126 127 ONLY RETURNS A SINGLE CODON, chosen from the possible alternatives 128 based on their sort order. 129 """ 130 # Do the sort so changes in the hash implementation won't affect 131 # the result when one amino acid is coded by more than one codon. 132 back_table = {} 133 for key in sorted(table): 134 back_table[table[key]] = key 135 back_table[None] = default_stop_codon 136 return back_table
137 138
139 -class NCBICodonTable(CodonTable):
140 nucleotide_alphabet = Alphabet.generic_nucleotide 141 protein_alphabet = IUPAC.protein 142
143 - def __init__(self, id, names, table, start_codons, stop_codons):
144 self.id = id 145 self.names = names 146 self.forward_table = table 147 self.back_table = make_back_table(table, stop_codons[0]) 148 self.start_codons = start_codons 149 self.stop_codons = stop_codons
150 151
152 -class NCBICodonTableDNA(NCBICodonTable):
153 nucleotide_alphabet = IUPAC.unambiguous_dna
154 155
156 -class NCBICodonTableRNA(NCBICodonTable):
157 nucleotide_alphabet = IUPAC.unambiguous_rna
158 159 160 # ######## Deal with ambiguous forward translations 161
162 -class AmbiguousCodonTable(CodonTable):
163 - def __init__(self, codon_table, 164 ambiguous_nucleotide_alphabet, 165 ambiguous_nucleotide_values, 166 ambiguous_protein_alphabet, 167 ambiguous_protein_values):
168 CodonTable.__init__(self, 169 ambiguous_nucleotide_alphabet, 170 ambiguous_protein_alphabet, 171 AmbiguousForwardTable(codon_table.forward_table, 172 ambiguous_nucleotide_values, 173 ambiguous_protein_values), 174 codon_table.back_table, 175 176 # These two are WRONG! I need to get the 177 # list of ambiguous codons which code for 178 # the stop codons XXX 179 list_ambiguous_codons(codon_table.start_codons, ambiguous_nucleotide_values), 180 list_ambiguous_codons(codon_table.stop_codons, ambiguous_nucleotide_values) 181 ) 182 self._codon_table = codon_table
183 184 # Be sneaky and forward attribute lookups to the original table. 185 # This lets us get the names, if the original table is an NCBI 186 # table.
187 - def __getattr__(self, name):
188 return getattr(self._codon_table, name)
189 190
191 -def list_possible_proteins(codon, forward_table, ambiguous_nucleotide_values):
192 c1, c2, c3 = codon 193 x1 = ambiguous_nucleotide_values[c1] 194 x2 = ambiguous_nucleotide_values[c2] 195 x3 = ambiguous_nucleotide_values[c3] 196 possible = {} 197 stops = [] 198 for y1 in x1: 199 for y2 in x2: 200 for y3 in x3: 201 try: 202 possible[forward_table[y1 + y2 + y3]] = 1 203 except KeyError: 204 # If tripping over a stop codon 205 stops.append(y1 + y2 + y3) 206 if stops: 207 if possible: 208 raise TranslationError("ambiguous codon %r codes for both" 209 " proteins and stop codons" % codon) 210 # This is a true stop codon - tell the caller about it 211 raise KeyError(codon) 212 return list(possible)
213 214
215 -def list_ambiguous_codons(codons, ambiguous_nucleotide_values):
216 """Extends a codon list to include all possible ambigous codons. 217 218 e.g.:: 219 220 ['TAG', 'TAA'] -> ['TAG', 'TAA', 'TAR'] 221 ['UAG', 'UGA'] -> ['UAG', 'UGA', 'URA'] 222 223 Note that ['TAG', 'TGA'] -> ['TAG', 'TGA'], this does not add 'TRR'. 224 Thus only two more codons are added in the following: 225 226 e.g.:: 227 228 ['TGA', 'TAA', 'TAG'] -> ['TGA', 'TAA', 'TAG', 'TRA', 'TAR'] 229 230 Returns a new (longer) list of codon strings. 231 """ 232 # Note ambiguous_nucleotide_values['R'] = 'AG' (etc) 233 # This will generate things like 'TRR' from ['TAG', 'TGA'], which 234 # we don't want to include: 235 c1_list = sorted(letter for (letter, meanings) 236 in ambiguous_nucleotide_values.items() 237 if set(codon[0] for codon in codons).issuperset(set(meanings))) 238 c2_list = sorted(letter for (letter, meanings) 239 in ambiguous_nucleotide_values.items() 240 if set(codon[1] for codon in codons).issuperset(set(meanings))) 241 c3_list = sorted(letter for (letter, meanings) 242 in ambiguous_nucleotide_values.items() 243 if set(codon[2] for codon in codons).issuperset(set(meanings))) 244 # candidates is a list (not a set) to preserve the iteration order 245 candidates = [] 246 for c1 in c1_list: 247 for c2 in c2_list: 248 for c3 in c3_list: 249 codon = c1 + c2 + c3 250 if codon not in candidates and codon not in codons: 251 candidates.append(codon) 252 answer = codons[:] # copy 253 # print "Have %i new candidates" % len(candidates) 254 for ambig_codon in candidates: 255 wanted = True 256 # e.g. 'TRR' -> 'TAA', 'TAG', 'TGA', 'TGG' 257 for codon in [c1 + c2 + c3 258 for c1 in ambiguous_nucleotide_values[ambig_codon[0]] 259 for c2 in ambiguous_nucleotide_values[ambig_codon[1]] 260 for c3 in ambiguous_nucleotide_values[ambig_codon[2]]]: 261 if codon not in codons: 262 # This ambiguous codon can code for a non-stop, exclude it! 263 wanted = False 264 # print "Rejecting %s" % ambig_codon 265 continue 266 if wanted: 267 answer.append(ambig_codon) 268 return answer
269 270 assert list_ambiguous_codons(['TGA', 'TAA'], IUPACData.ambiguous_dna_values) == ['TGA', 'TAA', 'TRA'] 271 assert list_ambiguous_codons(['TAG', 'TGA'], IUPACData.ambiguous_dna_values) == ['TAG', 'TGA'] 272 assert list_ambiguous_codons(['TAG', 'TAA'], IUPACData.ambiguous_dna_values) == ['TAG', 'TAA', 'TAR'] 273 assert list_ambiguous_codons(['UAG', 'UAA'], IUPACData.ambiguous_rna_values) == ['UAG', 'UAA', 'UAR'] 274 assert list_ambiguous_codons(['TGA', 'TAA', 'TAG'], 275 IUPACData.ambiguous_dna_values) == ['TGA', 'TAA', 'TAG', 'TAR', 'TRA'] 276 277 # Forward translation is "onto", that is, any given codon always maps 278 # to the same protein, or it doesn't map at all. Thus, I can build 279 # off of an existing table to produce the ambiguous mappings. 280 # 281 # This handles the general case. Perhaps it's overkill? 282 # >>> t = CodonTable.ambiguous_dna_by_id[1] 283 # >>> t.forward_table["AAT"] 284 # 'N' 285 # >>> t.forward_table["GAT"] 286 # 'D' 287 # >>> t.forward_table["RAT"] 288 # 'B' 289 # >>> t.forward_table["YTA"] 290 # 'L' 291 292
293 -class AmbiguousForwardTable(object):
294 - def __init__(self, forward_table, ambiguous_nucleotide, ambiguous_protein):
295 self.forward_table = forward_table 296 297 self.ambiguous_nucleotide = ambiguous_nucleotide 298 self.ambiguous_protein = ambiguous_protein 299 300 inverted = {} 301 for name, val in ambiguous_protein.items(): 302 for c in val: 303 x = inverted.get(c, {}) 304 x[name] = 1 305 inverted[c] = x 306 for name, val in inverted.items(): 307 inverted[name] = list(val) 308 self._inverted = inverted 309 310 self._cache = {}
311
312 - def get(self, codon, failobj=None):
313 try: 314 return self.__getitem__(codon) 315 except KeyError: 316 return failobj
317
318 - def __getitem__(self, codon):
319 try: 320 x = self._cache[codon] 321 except KeyError: 322 pass 323 else: 324 if x is TranslationError: 325 raise TranslationError(codon) # no unique translation 326 if x is KeyError: 327 raise KeyError(codon) # it's a stop codon 328 return x 329 try: 330 x = self.forward_table[codon] 331 self._cache[codon] = x 332 return x 333 except KeyError: 334 pass 335 336 # XXX Need to make part of this into a method which returns 337 # a list of all possible encodings for a codon! 338 try: 339 possible = list_possible_proteins(codon, 340 self.forward_table, 341 self.ambiguous_nucleotide) 342 except KeyError: 343 self._cache[codon] = KeyError 344 raise KeyError(codon) # stop codon 345 except TranslationError: 346 self._cache[codon] = TranslationError 347 raise TranslationError(codon) # does not code 348 assert len(possible) > 0, "unambiguous codons must code" 349 350 # Hah! Only one possible protein, so use it 351 if len(possible) == 1: 352 self._cache[codon] = possible[0] 353 return possible[0] 354 355 # See if there's an ambiguous protein encoding for the multiples. 356 # Find residues which exist in every coding set. 357 ambiguous_possible = {} 358 for amino in possible: 359 for term in self._inverted[amino]: 360 ambiguous_possible[term] = ambiguous_possible.get(term, 0) + 1 361 362 n = len(possible) 363 possible = [] 364 for amino, val in ambiguous_possible.items(): 365 if val == n: 366 possible.append(amino) 367 368 # No amino acid encoding for the results 369 if len(possible) == 0: 370 self._cache[codon] = TranslationError 371 raise TranslationError(codon) # no valid translation 372 373 # All of these are valid, so choose one 374 # To be unique, sort by smallet ambiguity then alphabetically 375 # Can get this if "X" encodes for everything. 376 # def _sort(x, y, table = self.ambiguous_protein): 377 # a = cmp(len(table[x]), len(table[y])) 378 # if a == 0: 379 # return cmp(x, y) 380 # return a 381 382 # Sort by key is 2.x and 3.x compatible 383 possible.sort(key=lambda x: (len(self.ambiguous_protein[x]), x)) 384 385 x = possible[0] 386 self._cache[codon] = x 387 return x
388 389
390 -def register_ncbi_table(name, alt_name, id, 391 table, start_codons, stop_codons):
392 """Turns codon table data into objects, and stores them in the dictionaries (PRIVATE).""" 393 # In most cases names are divided by "; ", however there is also 394 # Table 11 'Bacterial, Archaeal and Plant Plastid Code', previously 395 # 'Bacterial and Plant Plastid' which used to be just 'Bacterial' 396 names = [x.strip() for x in name.replace(" and ", "; ").replace(", ", "; ").split("; ")] 397 398 dna = NCBICodonTableDNA(id, names + [alt_name], table, start_codons, 399 stop_codons) 400 401 ambig_dna = AmbiguousCodonTable(dna, 402 IUPAC.ambiguous_dna, 403 IUPACData.ambiguous_dna_values, 404 IUPAC.extended_protein, 405 IUPACData.extended_protein_values) 406 407 # replace all T's with U's for the RNA tables 408 rna_table = {} 409 generic_table = {} 410 for codon, val in table.items(): 411 generic_table[codon] = val 412 codon = codon.replace("T", "U") 413 generic_table[codon] = val 414 rna_table[codon] = val 415 rna_start_codons = [] 416 generic_start_codons = [] 417 for codon in start_codons: 418 generic_start_codons.append(codon) 419 codon = codon.replace("T", "U") 420 generic_start_codons.append(codon) 421 rna_start_codons.append(codon) 422 rna_stop_codons = [] 423 generic_stop_codons = [] 424 for codon in stop_codons: 425 generic_stop_codons.append(codon) 426 codon = codon.replace("T", "U") 427 generic_stop_codons.append(codon) 428 rna_stop_codons.append(codon) 429 430 generic = NCBICodonTable(id, names + [alt_name], generic_table, 431 generic_start_codons, generic_stop_codons) 432 433 # The following isn't very elegant, but seems to work nicely. 434 _merged_values = dict(IUPACData.ambiguous_rna_values.items()) 435 _merged_values["T"] = "U" 436 ambig_generic = AmbiguousCodonTable(generic, 437 Alphabet.NucleotideAlphabet(), 438 _merged_values, 439 IUPAC.extended_protein, 440 IUPACData.extended_protein_values) 441 442 rna = NCBICodonTableRNA(id, names + [alt_name], rna_table, 443 rna_start_codons, rna_stop_codons) 444 445 ambig_rna = AmbiguousCodonTable(rna, 446 IUPAC.ambiguous_rna, 447 IUPACData.ambiguous_rna_values, 448 IUPAC.extended_protein, 449 IUPACData.extended_protein_values) 450 451 if id == 1: 452 global standard_dna_table, standard_rna_table 453 standard_dna_table = dna 454 standard_rna_table = rna 455 456 unambiguous_dna_by_id[id] = dna 457 unambiguous_rna_by_id[id] = rna 458 generic_by_id[id] = generic 459 ambiguous_dna_by_id[id] = ambig_dna 460 ambiguous_rna_by_id[id] = ambig_rna 461 ambiguous_generic_by_id[id] = ambig_generic 462 463 if alt_name is not None: 464 names.append(alt_name) 465 466 for name in names: 467 unambiguous_dna_by_name[name] = dna 468 unambiguous_rna_by_name[name] = rna 469 generic_by_name[name] = generic 470 ambiguous_dna_by_name[name] = ambig_dna 471 ambiguous_rna_by_name[name] = ambig_rna 472 ambiguous_generic_by_name[name] = ambig_generic
473 474 475 ########################################################################## 476 # Start of auto-generated output from Scripts/update_ncbi_codon_table.py # 477 ########################################################################## 478 479 480 register_ncbi_table(name='Standard', 481 alt_name='SGC0', id=1, 482 table={ 483 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 484 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 485 'TGT': 'C', 'TGC': 'C', 'TGG': 'W', 'CTT': 'L', 'CTC': 'L', 486 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 'CCA': 'P', 487 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 'CAG': 'Q', 488 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 'ATT': 'I', 489 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T', 'ACC': 'T', 490 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 'AAA': 'K', 491 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 'AGG': 'R', 492 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 'GCT': 'A', 493 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 'GAC': 'D', 494 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 'GGA': 'G', 495 'GGG': 'G', }, 496 stop_codons=['TAA', 'TAG', 'TGA'], 497 start_codons=['TTG', 'CTG', 'ATG']) 498 499 register_ncbi_table(name='Vertebrate Mitochondrial', 500 alt_name='SGC1', id=2, 501 table={ 502 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 503 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 504 'TGT': 'C', 'TGC': 'C', 'TGA': 'W', 'TGG': 'W', 'CTT': 'L', 505 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 506 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 507 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 508 'ATT': 'I', 'ATC': 'I', 'ATA': 'M', 'ATG': 'M', 'ACT': 'T', 509 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 510 'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'GTT': 'V', 511 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 'GCT': 'A', 'GCC': 'A', 512 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 'GAC': 'D', 'GAA': 'E', 513 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 'GGA': 'G', 'GGG': 'G', }, 514 stop_codons=['TAA', 'TAG', 'AGA', 'AGG'], 515 start_codons=['ATT', 'ATC', 'ATA', 'ATG', 'GTG']) 516 517 register_ncbi_table(name='Yeast Mitochondrial', 518 alt_name='SGC2', id=3, 519 table={ 520 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 521 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 522 'TGT': 'C', 'TGC': 'C', 'TGA': 'W', 'TGG': 'W', 'CTT': 'T', 523 'CTC': 'T', 'CTA': 'T', 'CTG': 'T', 'CCT': 'P', 'CCC': 'P', 524 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 525 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 526 'ATT': 'I', 'ATC': 'I', 'ATA': 'M', 'ATG': 'M', 'ACT': 'T', 527 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 528 'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 529 'AGG': 'R', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 530 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 531 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 532 'GGA': 'G', 'GGG': 'G', }, 533 stop_codons=['TAA', 'TAG'], 534 start_codons=['ATA', 'ATG']) 535 536 register_ncbi_table(name='Mold Mitochondrial; Protozoan Mitochondrial; Coelenterate Mitochondrial; Mycoplasma; Spiroplasma', 537 alt_name='SGC3', id=4, 538 table={ 539 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 540 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 541 'TGT': 'C', 'TGC': 'C', 'TGA': 'W', 'TGG': 'W', 'CTT': 'L', 542 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 543 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 544 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 545 'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T', 546 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 547 'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 548 'AGG': 'R', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 549 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 550 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 551 'GGA': 'G', 'GGG': 'G', }, 552 stop_codons=['TAA', 'TAG'], 553 start_codons=['TTA', 'TTG', 'CTG', 'ATT', 'ATC', 'ATA', 554 'ATG', 'GTG']) 555 556 register_ncbi_table(name='Invertebrate Mitochondrial', 557 alt_name='SGC4', id=5, 558 table={ 559 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 560 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 561 'TGT': 'C', 'TGC': 'C', 'TGA': 'W', 'TGG': 'W', 'CTT': 'L', 562 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 563 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 564 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 565 'ATT': 'I', 'ATC': 'I', 'ATA': 'M', 'ATG': 'M', 'ACT': 'T', 566 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 567 'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'S', 568 'AGG': 'S', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 569 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 570 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 571 'GGA': 'G', 'GGG': 'G', }, 572 stop_codons=['TAA', 'TAG'], 573 start_codons=['TTG', 'ATT', 'ATC', 'ATA', 'ATG', 'GTG']) 574 575 register_ncbi_table(name='Ciliate Nuclear; Dasycladacean Nuclear; Hexamita Nuclear', 576 alt_name='SGC5', id=6, 577 table={ 578 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 579 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 580 'TAA': 'Q', 'TAG': 'Q', 'TGT': 'C', 'TGC': 'C', 'TGG': 'W', 581 'CTT': 'L', 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 582 'CCC': 'P', 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 583 'CAA': 'Q', 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 584 'CGG': 'R', 'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 585 'ACT': 'T', 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 586 'AAC': 'N', 'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 587 'AGA': 'R', 'AGG': 'R', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 588 'GTG': 'V', 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 589 'GAT': 'D', 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 590 'GGC': 'G', 'GGA': 'G', 'GGG': 'G', }, 591 stop_codons=['TGA'], 592 start_codons=['ATG']) 593 594 register_ncbi_table(name='Echinoderm Mitochondrial; Flatworm Mitochondrial', 595 alt_name='SGC8', id=9, 596 table={ 597 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 598 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 599 'TGT': 'C', 'TGC': 'C', 'TGA': 'W', 'TGG': 'W', 'CTT': 'L', 600 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 601 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 602 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 603 'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T', 604 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 605 'AAA': 'N', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'S', 606 'AGG': 'S', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 607 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 608 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 609 'GGA': 'G', 'GGG': 'G', }, 610 stop_codons=['TAA', 'TAG'], 611 start_codons=['ATG', 'GTG']) 612 613 register_ncbi_table(name='Euplotid Nuclear', 614 alt_name='SGC9', id=10, 615 table={ 616 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 617 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 618 'TGT': 'C', 'TGC': 'C', 'TGA': 'C', 'TGG': 'W', 'CTT': 'L', 619 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 620 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 621 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 622 'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T', 623 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 624 'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 625 'AGG': 'R', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 626 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 627 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 628 'GGA': 'G', 'GGG': 'G', }, 629 stop_codons=['TAA', 'TAG'], 630 start_codons=['ATG']) 631 632 register_ncbi_table(name='Bacterial, Archaeal and Plant Plastid', 633 alt_name=None, id=11, 634 table={ 635 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 636 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 637 'TGT': 'C', 'TGC': 'C', 'TGG': 'W', 'CTT': 'L', 'CTC': 'L', 638 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 'CCA': 'P', 639 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 'CAG': 'Q', 640 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 'ATT': 'I', 641 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T', 'ACC': 'T', 642 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 'AAA': 'K', 643 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 'AGG': 'R', 644 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 'GCT': 'A', 645 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 'GAC': 'D', 646 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 'GGA': 'G', 647 'GGG': 'G', }, 648 stop_codons=['TAA', 'TAG', 'TGA'], 649 start_codons=['TTG', 'CTG', 'ATT', 'ATC', 'ATA', 'ATG', 650 'GTG']) 651 652 register_ncbi_table(name='Alternative Yeast Nuclear', 653 alt_name=None, id=12, 654 table={ 655 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 656 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 657 'TGT': 'C', 'TGC': 'C', 'TGG': 'W', 'CTT': 'L', 'CTC': 'L', 658 'CTA': 'L', 'CTG': 'S', 'CCT': 'P', 'CCC': 'P', 'CCA': 'P', 659 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 'CAG': 'Q', 660 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 'ATT': 'I', 661 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T', 'ACC': 'T', 662 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 'AAA': 'K', 663 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 'AGG': 'R', 664 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 'GCT': 'A', 665 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 'GAC': 'D', 666 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 'GGA': 'G', 667 'GGG': 'G', }, 668 stop_codons=['TAA', 'TAG', 'TGA'], 669 start_codons=['CTG', 'ATG']) 670 671 register_ncbi_table(name='Ascidian Mitochondrial', 672 alt_name=None, id=13, 673 table={ 674 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 675 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 676 'TGT': 'C', 'TGC': 'C', 'TGA': 'W', 'TGG': 'W', 'CTT': 'L', 677 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 678 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 679 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 680 'ATT': 'I', 'ATC': 'I', 'ATA': 'M', 'ATG': 'M', 'ACT': 'T', 681 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 682 'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'G', 683 'AGG': 'G', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 684 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 685 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 686 'GGA': 'G', 'GGG': 'G', }, 687 stop_codons=['TAA', 'TAG'], 688 start_codons=['TTG', 'ATA', 'ATG', 'GTG']) 689 690 register_ncbi_table(name='Alternative Flatworm Mitochondrial', 691 alt_name=None, id=14, 692 table={ 693 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 694 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 695 'TAA': 'Y', 'TGT': 'C', 'TGC': 'C', 'TGA': 'W', 'TGG': 'W', 696 'CTT': 'L', 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 697 'CCC': 'P', 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 698 'CAA': 'Q', 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 699 'CGG': 'R', 'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 700 'ACT': 'T', 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 701 'AAC': 'N', 'AAA': 'N', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 702 'AGA': 'S', 'AGG': 'S', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 703 'GTG': 'V', 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 704 'GAT': 'D', 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 705 'GGC': 'G', 'GGA': 'G', 'GGG': 'G', }, 706 stop_codons=['TAG'], 707 start_codons=['ATG']) 708 709 register_ncbi_table(name='Blepharisma Macronuclear', 710 alt_name=None, id=15, 711 table={ 712 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 713 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 714 'TAG': 'Q', 'TGT': 'C', 'TGC': 'C', 'TGG': 'W', 'CTT': 'L', 715 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 716 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 717 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 718 'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T', 719 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 720 'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 721 'AGG': 'R', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 722 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 723 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 724 'GGA': 'G', 'GGG': 'G', }, 725 stop_codons=['TAA', 'TGA'], 726 start_codons=['ATG']) 727 728 register_ncbi_table(name='Chlorophycean Mitochondrial', 729 alt_name=None, id=16, 730 table={ 731 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 732 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 733 'TAG': 'L', 'TGT': 'C', 'TGC': 'C', 'TGG': 'W', 'CTT': 'L', 734 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 735 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 736 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 737 'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T', 738 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 739 'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 740 'AGG': 'R', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 741 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 742 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 743 'GGA': 'G', 'GGG': 'G', }, 744 stop_codons=['TAA', 'TGA'], 745 start_codons=['ATG']) 746 747 register_ncbi_table(name='Trematode Mitochondrial', 748 alt_name=None, id=21, 749 table={ 750 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 751 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 752 'TGT': 'C', 'TGC': 'C', 'TGA': 'W', 'TGG': 'W', 'CTT': 'L', 753 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 754 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 755 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 756 'ATT': 'I', 'ATC': 'I', 'ATA': 'M', 'ATG': 'M', 'ACT': 'T', 757 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 758 'AAA': 'N', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'S', 759 'AGG': 'S', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 760 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 761 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 762 'GGA': 'G', 'GGG': 'G', }, 763 stop_codons=['TAA', 'TAG'], 764 start_codons=['ATG', 'GTG']) 765 766 register_ncbi_table(name='Scenedesmus obliquus Mitochondrial', 767 alt_name=None, id=22, 768 table={ 769 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 770 'TCC': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 'TAG': 'L', 771 'TGT': 'C', 'TGC': 'C', 'TGG': 'W', 'CTT': 'L', 'CTC': 'L', 772 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 'CCA': 'P', 773 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 'CAG': 'Q', 774 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 'ATT': 'I', 775 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T', 'ACC': 'T', 776 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 'AAA': 'K', 777 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 'AGG': 'R', 778 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 'GCT': 'A', 779 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 'GAC': 'D', 780 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 'GGA': 'G', 781 'GGG': 'G', }, 782 stop_codons=['TCA', 'TAA', 'TGA'], 783 start_codons=['ATG']) 784 785 register_ncbi_table(name='Thraustochytrium Mitochondrial', 786 alt_name=None, id=23, 787 table={ 788 'TTT': 'F', 'TTC': 'F', 'TTG': 'L', 'TCT': 'S', 'TCC': 'S', 789 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 'TGT': 'C', 790 'TGC': 'C', 'TGG': 'W', 'CTT': 'L', 'CTC': 'L', 'CTA': 'L', 791 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 'CCA': 'P', 'CCG': 'P', 792 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 'CAG': 'Q', 'CGT': 'R', 793 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 'ATT': 'I', 'ATC': 'I', 794 'ATA': 'I', 'ATG': 'M', 'ACT': 'T', 'ACC': 'T', 'ACA': 'T', 795 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 'AAA': 'K', 'AAG': 'K', 796 'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 'AGG': 'R', 'GTT': 'V', 797 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 'GCT': 'A', 'GCC': 'A', 798 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 'GAC': 'D', 'GAA': 'E', 799 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 'GGA': 'G', 'GGG': 'G', }, 800 stop_codons=['TTA', 'TAA', 'TAG', 'TGA'], 801 start_codons=['ATT', 'ATG', 'GTG']) 802 803 register_ncbi_table(name='Pterobranchia Mitochondrial', 804 alt_name=None, id=24, 805 table={ 806 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 807 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 808 'TGT': 'C', 'TGC': 'C', 'TGA': 'W', 'TGG': 'W', 'CTT': 'L', 809 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 810 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 811 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 812 'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T', 813 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 814 'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'S', 815 'AGG': 'K', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 816 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 817 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 818 'GGA': 'G', 'GGG': 'G', }, 819 stop_codons=['TAA', 'TAG'], 820 start_codons=['TTG', 'CTG', 'ATG', 'GTG']) 821 822 register_ncbi_table(name='Candidate Division SR1 and Gracilibacteria', 823 alt_name=None, id=25, 824 table={ 825 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 826 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 827 'TGT': 'C', 'TGC': 'C', 'TGA': 'G', 'TGG': 'W', 'CTT': 'L', 828 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 829 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 830 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 831 'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T', 832 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 833 'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 834 'AGG': 'R', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 835 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 836 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 837 'GGA': 'G', 'GGG': 'G', }, 838 stop_codons=['TAA', 'TAG'], 839 start_codons=['TTG', 'ATG', 'GTG']) 840 841 842 ######################################################################## 843 # End of auto-generated output from Scripts/update_ncbi_codon_table.py # 844 ######################################################################## 845 846 847 # This is currently missing in Version 4.0 of 848 # ftp://ftp.ncbi.nih.gov/entrez/misc/data/gc.prt 849 # and was entered by hand based on 850 # http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi#SG26 851 # 852 # Code 26 is used so far only for the ascomycete fungus Pachysolen 853 # tannophilus. The only difference to the standard code is the 854 # translation of CUG as alanine (as opposed to leucine). As of 855 # April 2016, there is no publication documenting this code. 856 register_ncbi_table(name='Pachysolen tannophilus Nuclear Code', 857 alt_name=None, id=26, 858 table={ 859 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 860 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 861 'TGT': 'C', 'TGC': 'C', 'TGG': 'W', 'CTT': 'L', 'CTC': 'L', 862 'CTA': 'L', 'CTG': 'A', 'CCT': 'P', 'CCC': 'P', 'CCA': 'P', 863 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 'CAG': 'Q', 864 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 'ATT': 'I', 865 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T', 'ACC': 'T', 866 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 'AAA': 'K', 867 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 'AGG': 'R', 868 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 'GCT': 'A', 869 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 'GAC': 'D', 870 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 'GGA': 'G', 871 'GGG': 'G', }, 872 stop_codons=['TAA', 'TAG', 'TGA'], 873 start_codons=['TTG', 'CTG', 'ATG']) 874 875 876 # Basic sanity test, 877 for key, val in generic_by_name.items(): 878 assert key in ambiguous_generic_by_name[key].names 879 for key, val in generic_by_id.items(): 880 assert ambiguous_generic_by_id[key].id == key 881 del key, val 882 883 for n in ambiguous_generic_by_id: 884 assert ambiguous_rna_by_id[n].forward_table["GUU"] == "V" 885 assert ambiguous_rna_by_id[n].forward_table["GUN"] == "V" 886 if n != 23: 887 # For table 23, UUN = F, L or stop. 888 assert ambiguous_rna_by_id[n].forward_table["UUN"] == "X" # F or L 889 # R = A or G, so URR = UAA or UGA / TRA = TAA or TGA = stop codons 890 if "UAA" in unambiguous_rna_by_id[n].stop_codons \ 891 and "UGA" in unambiguous_rna_by_id[n].stop_codons: 892 try: 893 print(ambiguous_dna_by_id[n].forward_table["TRA"]) 894 assert False, "Should be a stop only" 895 except KeyError: 896 pass 897 assert "URA" in ambiguous_generic_by_id[n].stop_codons 898 assert "URA" in ambiguous_rna_by_id[n].stop_codons 899 assert "TRA" in ambiguous_generic_by_id[n].stop_codons 900 assert "TRA" in ambiguous_dna_by_id[n].stop_codons 901 del n 902 assert ambiguous_generic_by_id[1] == ambiguous_generic_by_name["Standard"] 903 assert ambiguous_generic_by_id[4] == ambiguous_generic_by_name["SGC3"] 904 assert ambiguous_generic_by_id[11] == ambiguous_generic_by_name["Bacterial"] 905 assert ambiguous_generic_by_id[11] == ambiguous_generic_by_name["Archaeal"] 906 assert ambiguous_generic_by_id[11] == ambiguous_generic_by_name["Plant Plastid"] 907 assert ambiguous_generic_by_id[15] == ambiguous_generic_by_name['Blepharisma Macronuclear'] 908 assert ambiguous_generic_by_id[24] == ambiguous_generic_by_name["Pterobranchia Mitochondrial"] 909 assert generic_by_id[1] == generic_by_name["Standard"] 910 assert generic_by_id[4] == generic_by_name["SGC3"] 911 assert generic_by_id[11] == generic_by_name["Bacterial"] 912 assert generic_by_id[11] == generic_by_name["Plant Plastid"] 913 assert generic_by_id[15] == generic_by_name['Blepharisma Macronuclear'] 914 assert generic_by_id[24] == generic_by_name["Pterobranchia Mitochondrial"] 915