Package Bio :: Package Data :: Module CodonTable
[hide private]
[frames] | no frames]

Source Code for Module Bio.Data.CodonTable

  1  # This code is part of the Biopython distribution and governed by its 
  2  # license.  Please see the LICENSE file that should have been included 
  3  # as part of this package. 
  4  """Codon tables based on those from the NCBI. 
  5   
  6  These tables are based on parsing the NCBI file: 
  7  ftp://ftp.ncbi.nih.gov/entrez/misc/data/gc.prt 
  8   
  9  Last updated at Version 4.0 
 10  """ 
 11   
 12  from __future__ import print_function 
 13   
 14  from Bio import Alphabet 
 15  from Bio.Alphabet import IUPAC 
 16  from Bio.Data import IUPACData 
 17   
 18   
 19  unambiguous_dna_by_name = {} 
 20  unambiguous_dna_by_id = {} 
 21  unambiguous_rna_by_name = {} 
 22  unambiguous_rna_by_id = {} 
 23  generic_by_name = {}  # unambiguous DNA or RNA 
 24  generic_by_id = {}  # unambiguous DNA or RNA 
 25   
 26  ambiguous_dna_by_name = {} 
 27  ambiguous_dna_by_id = {} 
 28  ambiguous_rna_by_name = {} 
 29  ambiguous_rna_by_id = {} 
 30  ambiguous_generic_by_name = {}  # ambiguous DNA or RNA 
 31  ambiguous_generic_by_id = {}  # ambiguous DNA or RNA 
 32   
 33  # standard IUPAC unambiguous codons 
 34  standard_dna_table = None 
 35  standard_rna_table = None 
 36   
 37  # In the future, the back_table could return a statistically 
 38  # appropriate distribution of codons, so do not cache the results of 
 39  # back_table lookups! 
 40   
 41   
42 -class TranslationError(Exception):
43 pass
44 45
46 -class CodonTable(object):
47 """A codon-table, or genetic code.""" 48 nucleotide_alphabet = Alphabet.generic_nucleotide 49 protein_alphabet = Alphabet.generic_protein 50 51 forward_table = {} # only includes codons which actually code 52 back_table = {} # for back translations 53 start_codons = [] 54 stop_codons = [] 55 56 # Not always called from derived classes!
57 - def __init__(self, nucleotide_alphabet=nucleotide_alphabet, 58 protein_alphabet=protein_alphabet, 59 forward_table=forward_table, back_table=back_table, 60 start_codons=start_codons, stop_codons=stop_codons):
67
68 - def __str__(self):
69 """Returns a simple text representation of the codon table. 70 71 e.g. 72 73 >>> import Bio.Data.CodonTable 74 >>> print(Bio.Data.CodonTable.standard_dna_table) 75 >>> print(Bio.Data.CodonTable.generic_by_id[1]) 76 """ 77 78 if self.id: 79 answer = "Table %i" % self.id 80 else: 81 answer = "Table ID unknown" 82 if self.names: 83 answer += " " + ", ".join([x for x in self.names if x]) 84 85 # Use the main four letters (and the conventional ordering) 86 # even for ambiguous tables 87 letters = self.nucleotide_alphabet.letters 88 if isinstance(self.nucleotide_alphabet, Alphabet.DNAAlphabet) \ 89 or (letters is not None and "T" in letters): 90 letters = "TCAG" 91 else: 92 # Should be either RNA or generic nucleotides, 93 # e.g. Bio.Data.CodonTable.generic_by_id[1] 94 letters = "UCAG" 95 96 # Build the table... 97 answer += "\n\n |" + "|".join(" %s " % c2 for c2 in letters) + "|" 98 answer += "\n--+" + "+".join("---------" for c2 in letters) + "+--" 99 for c1 in letters: 100 for c3 in letters: 101 line = c1 + " |" 102 for c2 in letters: 103 codon = c1 + c2 + c3 104 line += " %s" % codon 105 if codon in self.stop_codons: 106 line += " Stop|" 107 else: 108 try: 109 amino = self.forward_table[codon] 110 except KeyError: 111 amino = "?" 112 except TranslationError: 113 amino = "?" 114 if codon in self.start_codons: 115 line += " %s(s)|" % amino 116 else: 117 line += " %s |" % amino 118 line += " " + c3 119 answer += "\n" + line 120 answer += "\n--+" + "+".join("---------" for c2 in letters) + "+--" 121 return answer
122 123
124 -def make_back_table(table, default_stop_codon):
125 """Back a back-table (naive single codon mapping). 126 127 ONLY RETURNS A SINGLE CODON, chosen from the possible alternatives 128 based on their sort order. 129 """ 130 # Do the sort so changes in the hash implementation won't affect 131 # the result when one amino acid is coded by more than one codon. 132 back_table = {} 133 for key in sorted(table): 134 back_table[table[key]] = key 135 back_table[None] = default_stop_codon 136 return back_table
137 138
139 -class NCBICodonTable(CodonTable):
140 nucleotide_alphabet = Alphabet.generic_nucleotide 141 protein_alphabet = IUPAC.protein 142
143 - def __init__(self, id, names, table, start_codons, stop_codons):
144 self.id = id 145 self.names = names 146 self.forward_table = table 147 self.back_table = make_back_table(table, stop_codons[0]) 148 self.start_codons = start_codons 149 self.stop_codons = stop_codons
150 151
152 -class NCBICodonTableDNA(NCBICodonTable):
153 nucleotide_alphabet = IUPAC.unambiguous_dna
154 155
156 -class NCBICodonTableRNA(NCBICodonTable):
157 nucleotide_alphabet = IUPAC.unambiguous_rna
158 159 160 # ######## Deal with ambiguous forward translations 161
162 -class AmbiguousCodonTable(CodonTable):
163 - def __init__(self, codon_table, 164 ambiguous_nucleotide_alphabet, 165 ambiguous_nucleotide_values, 166 ambiguous_protein_alphabet, 167 ambiguous_protein_values):
168 CodonTable.__init__(self, 169 ambiguous_nucleotide_alphabet, 170 ambiguous_protein_alphabet, 171 AmbiguousForwardTable(codon_table.forward_table, 172 ambiguous_nucleotide_values, 173 ambiguous_protein_values), 174 codon_table.back_table, 175 176 # These two are WRONG! I need to get the 177 # list of ambiguous codons which code for 178 # the stop codons XXX 179 list_ambiguous_codons(codon_table.start_codons, ambiguous_nucleotide_values), 180 list_ambiguous_codons(codon_table.stop_codons, ambiguous_nucleotide_values) 181 ) 182 self._codon_table = codon_table
183 184 # Be sneaky and forward attribute lookups to the original table. 185 # This lets us get the names, if the original table is an NCBI 186 # table.
187 - def __getattr__(self, name):
188 return getattr(self._codon_table, name)
189 190
191 -def list_possible_proteins(codon, forward_table, ambiguous_nucleotide_values):
192 c1, c2, c3 = codon 193 x1 = ambiguous_nucleotide_values[c1] 194 x2 = ambiguous_nucleotide_values[c2] 195 x3 = ambiguous_nucleotide_values[c3] 196 possible = {} 197 stops = [] 198 for y1 in x1: 199 for y2 in x2: 200 for y3 in x3: 201 try: 202 possible[forward_table[y1 + y2 + y3]] = 1 203 except KeyError: 204 # If tripping over a stop codon 205 stops.append(y1 + y2 + y3) 206 if stops: 207 if possible: 208 raise TranslationError("ambiguous codon %r codes for both" 209 " proteins and stop codons" % codon) 210 # This is a true stop codon - tell the caller about it 211 raise KeyError(codon) 212 return list(possible)
213 214
215 -def list_ambiguous_codons(codons, ambiguous_nucleotide_values):
216 """Extends a codon list to include all possible ambigous codons. 217 218 e.g.:: 219 220 ['TAG', 'TAA'] -> ['TAG', 'TAA', 'TAR'] 221 ['UAG', 'UGA'] -> ['UAG', 'UGA', 'URA'] 222 223 Note that ['TAG', 'TGA'] -> ['TAG', 'TGA'], this does not add 'TRR'. 224 Thus only two more codons are added in the following: 225 226 e.g.:: 227 228 ['TGA', 'TAA', 'TAG'] -> ['TGA', 'TAA', 'TAG', 'TRA', 'TAR'] 229 230 Returns a new (longer) list of codon strings. 231 """ 232 233 # Note ambiguous_nucleotide_values['R'] = 'AG' (etc) 234 # This will generate things like 'TRR' from ['TAG', 'TGA'], which 235 # we don't want to include: 236 c1_list = sorted(letter for (letter, meanings) 237 in ambiguous_nucleotide_values.items() 238 if set(codon[0] for codon in codons).issuperset(set(meanings))) 239 c2_list = sorted(letter for (letter, meanings) 240 in ambiguous_nucleotide_values.items() 241 if set(codon[1] for codon in codons).issuperset(set(meanings))) 242 c3_list = sorted(letter for (letter, meanings) 243 in ambiguous_nucleotide_values.items() 244 if set(codon[2] for codon in codons).issuperset(set(meanings))) 245 # candidates is a list (not a set) to preserve the iteration order 246 candidates = [] 247 for c1 in c1_list: 248 for c2 in c2_list: 249 for c3 in c3_list: 250 codon = c1 + c2 + c3 251 if codon not in candidates and codon not in codons: 252 candidates.append(codon) 253 answer = codons[:] # copy 254 # print "Have %i new candidates" % len(candidates) 255 for ambig_codon in candidates: 256 wanted = True 257 # e.g. 'TRR' -> 'TAA', 'TAG', 'TGA', 'TGG' 258 for codon in [c1 + c2 + c3 259 for c1 in ambiguous_nucleotide_values[ambig_codon[0]] 260 for c2 in ambiguous_nucleotide_values[ambig_codon[1]] 261 for c3 in ambiguous_nucleotide_values[ambig_codon[2]]]: 262 if codon not in codons: 263 # This ambiguous codon can code for a non-stop, exclude it! 264 wanted = False 265 # print "Rejecting %s" % ambig_codon 266 continue 267 if wanted: 268 answer.append(ambig_codon) 269 return answer
270 271 assert list_ambiguous_codons(['TGA', 'TAA'], IUPACData.ambiguous_dna_values) == ['TGA', 'TAA', 'TRA'] 272 assert list_ambiguous_codons(['TAG', 'TGA'], IUPACData.ambiguous_dna_values) == ['TAG', 'TGA'] 273 assert list_ambiguous_codons(['TAG', 'TAA'], IUPACData.ambiguous_dna_values) == ['TAG', 'TAA', 'TAR'] 274 assert list_ambiguous_codons(['UAG', 'UAA'], IUPACData.ambiguous_rna_values) == ['UAG', 'UAA', 'UAR'] 275 assert list_ambiguous_codons(['TGA', 'TAA', 'TAG'], 276 IUPACData.ambiguous_dna_values) == ['TGA', 'TAA', 'TAG', 'TAR', 'TRA'] 277 278 # Forward translation is "onto", that is, any given codon always maps 279 # to the same protein, or it doesn't map at all. Thus, I can build 280 # off of an existing table to produce the ambiguous mappings. 281 # 282 # This handles the general case. Perhaps it's overkill? 283 # >>> t = CodonTable.ambiguous_dna_by_id[1] 284 # >>> t.forward_table["AAT"] 285 # 'N' 286 # >>> t.forward_table["GAT"] 287 # 'D' 288 # >>> t.forward_table["RAT"] 289 # 'B' 290 # >>> t.forward_table["YTA"] 291 # 'L' 292 293
294 -class AmbiguousForwardTable(object):
295 - def __init__(self, forward_table, ambiguous_nucleotide, ambiguous_protein):
296 self.forward_table = forward_table 297 298 self.ambiguous_nucleotide = ambiguous_nucleotide 299 self.ambiguous_protein = ambiguous_protein 300 301 inverted = {} 302 for name, val in ambiguous_protein.items(): 303 for c in val: 304 x = inverted.get(c, {}) 305 x[name] = 1 306 inverted[c] = x 307 for name, val in inverted.items(): 308 inverted[name] = list(val) 309 self._inverted = inverted 310 311 self._cache = {}
312
313 - def get(self, codon, failobj=None):
314 try: 315 return self.__getitem__(codon) 316 except KeyError: 317 return failobj
318
319 - def __getitem__(self, codon):
320 try: 321 x = self._cache[codon] 322 except KeyError: 323 pass 324 else: 325 if x is TranslationError: 326 raise TranslationError(codon) # no unique translation 327 if x is KeyError: 328 raise KeyError(codon) # it's a stop codon 329 return x 330 try: 331 x = self.forward_table[codon] 332 self._cache[codon] = x 333 return x 334 except KeyError: 335 pass 336 337 # XXX Need to make part of this into a method which returns 338 # a list of all possible encodings for a codon! 339 try: 340 possible = list_possible_proteins(codon, 341 self.forward_table, 342 self.ambiguous_nucleotide) 343 except KeyError: 344 self._cache[codon] = KeyError 345 raise KeyError(codon) # stop codon 346 except TranslationError: 347 self._cache[codon] = TranslationError 348 raise TranslationError(codon) # does not code 349 assert len(possible) > 0, "unambiguous codons must code" 350 351 # Hah! Only one possible protein, so use it 352 if len(possible) == 1: 353 self._cache[codon] = possible[0] 354 return possible[0] 355 356 # See if there's an ambiguous protein encoding for the multiples. 357 # Find residues which exist in every coding set. 358 ambiguous_possible = {} 359 for amino in possible: 360 for term in self._inverted[amino]: 361 ambiguous_possible[term] = ambiguous_possible.get(term, 0) + 1 362 363 n = len(possible) 364 possible = [] 365 for amino, val in ambiguous_possible.items(): 366 if val == n: 367 possible.append(amino) 368 369 # No amino acid encoding for the results 370 if len(possible) == 0: 371 self._cache[codon] = TranslationError 372 raise TranslationError(codon) # no valid translation 373 374 # All of these are valid, so choose one 375 # To be unique, sort by smallet ambiguity then alphabetically 376 # Can get this if "X" encodes for everything. 377 # def _sort(x, y, table = self.ambiguous_protein): 378 # a = cmp(len(table[x]), len(table[y])) 379 # if a == 0: 380 # return cmp(x, y) 381 # return a 382 383 # Sort by key is 2.x and 3.x compatible 384 possible.sort(key=lambda x: (len(self.ambiguous_protein[x]), x)) 385 386 x = possible[0] 387 self._cache[codon] = x 388 return x
389 390
391 -def register_ncbi_table(name, alt_name, id, 392 table, start_codons, stop_codons):
393 """Turns codon table data into objects, and stores them in the dictionaries (PRIVATE).""" 394 # In most cases names are divided by "; ", however there is also 395 # 'Bacterial and Plant Plastid' (which used to be just 'Bacterial') 396 names = [x.strip() for x in name.replace(" and ", "; ").split("; ")] 397 398 dna = NCBICodonTableDNA(id, names + [alt_name], table, start_codons, 399 stop_codons) 400 401 ambig_dna = AmbiguousCodonTable(dna, 402 IUPAC.ambiguous_dna, 403 IUPACData.ambiguous_dna_values, 404 IUPAC.extended_protein, 405 IUPACData.extended_protein_values) 406 407 # replace all T's with U's for the RNA tables 408 rna_table = {} 409 generic_table = {} 410 for codon, val in table.items(): 411 generic_table[codon] = val 412 codon = codon.replace("T", "U") 413 generic_table[codon] = val 414 rna_table[codon] = val 415 rna_start_codons = [] 416 generic_start_codons = [] 417 for codon in start_codons: 418 generic_start_codons.append(codon) 419 codon = codon.replace("T", "U") 420 generic_start_codons.append(codon) 421 rna_start_codons.append(codon) 422 rna_stop_codons = [] 423 generic_stop_codons = [] 424 for codon in stop_codons: 425 generic_stop_codons.append(codon) 426 codon = codon.replace("T", "U") 427 generic_stop_codons.append(codon) 428 rna_stop_codons.append(codon) 429 430 generic = NCBICodonTable(id, names + [alt_name], generic_table, 431 generic_start_codons, generic_stop_codons) 432 433 # The following isn't very elegant, but seems to work nicely. 434 _merged_values = dict(IUPACData.ambiguous_rna_values.items()) 435 _merged_values["T"] = "U" 436 ambig_generic = AmbiguousCodonTable(generic, 437 Alphabet.NucleotideAlphabet(), 438 _merged_values, 439 IUPAC.extended_protein, 440 IUPACData.extended_protein_values) 441 442 rna = NCBICodonTableRNA(id, names + [alt_name], rna_table, 443 rna_start_codons, rna_stop_codons) 444 445 ambig_rna = AmbiguousCodonTable(rna, 446 IUPAC.ambiguous_rna, 447 IUPACData.ambiguous_rna_values, 448 IUPAC.extended_protein, 449 IUPACData.extended_protein_values) 450 451 if id == 1: 452 global standard_dna_table, standard_rna_table 453 standard_dna_table = dna 454 standard_rna_table = rna 455 456 unambiguous_dna_by_id[id] = dna 457 unambiguous_rna_by_id[id] = rna 458 generic_by_id[id] = generic 459 ambiguous_dna_by_id[id] = ambig_dna 460 ambiguous_rna_by_id[id] = ambig_rna 461 ambiguous_generic_by_id[id] = ambig_generic 462 463 if alt_name is not None: 464 names.append(alt_name) 465 466 for name in names: 467 unambiguous_dna_by_name[name] = dna 468 unambiguous_rna_by_name[name] = rna 469 generic_by_name[name] = generic 470 ambiguous_dna_by_name[name] = ambig_dna 471 ambiguous_rna_by_name[name] = ambig_rna 472 ambiguous_generic_by_name[name] = ambig_generic
473 474 475 # These tables created from the data file 476 # ftp://ftp.ncbi.nih.gov/entrez/misc/data/gc.prt 477 # using the following: 478 # import re 479 # for line in open("gc.prt").readlines(): 480 # if line[:2] == " {": 481 # names = [] 482 # id = None 483 # aa = None 484 # start = None 485 # bases = [] 486 # elif line[:6] == " name": 487 # names.append(re.search('"([^"]*)"', line).group(1)) 488 # elif line[:8] == " name": 489 # names.append(re.search('"(.*)$', line).group(1)) 490 # elif line == ' Mitochondrial; Mycoplasma; Spiroplasma" ,\n': 491 # names[-1] = names[-1] + " Mitochondrial; Mycoplasma; Spiroplasma" 492 # elif line[:4] == " id": 493 # id = int(re.search('(\d+)', line).group(1)) 494 # elif line[:10] == " ncbieaa ": 495 # aa = line[12:12+64] 496 # elif line[:10] == " sncbieaa": 497 # start = line[12:12+64] 498 # elif line[:9] == " -- Base": 499 # bases.append(line[12:12+64]) 500 # elif line[:2] == " }": 501 # assert names != [] and id is not None and aa is not None 502 # assert start is not None and bases != [] 503 # if len(names) == 1: 504 # names.append(None) 505 # print("register_ncbi_table(name=%s," % repr(names[0])) 506 # print(" alt_name=%s, id=%d," % \ 507 # (repr(names[1]), id)) 508 # print(" table={") 509 # s = " " 510 # for i in range(64): 511 # if aa[i] != "*": 512 # t = " '%s%s%s': '%s'," % (bases[0][i], bases[1][i], 513 # bases[2][i], aa[i]) 514 # if len(s) + len(t) > 75: 515 # print(s) 516 # s = " " + t 517 # else: 518 # s = s + t 519 # print("%s }," % s) 520 521 # s = " stop_codons=[" 522 # for i in range(64): 523 # if aa[i] == "*": 524 # t = "'%s%s%s'," % (bases[0][i], bases[1][i], bases[2][i]) 525 # if len(s) + len(t) > 75: 526 # s_with_spaces = s.replace("','", "', '") 527 # print(s_with_spaces) 528 # s = " " + t 529 # else: 530 # s = s + t 531 # s_with_spaces = s.replace("','", "', '") 532 # print("%s ]," % s_with_spaces) 533 534 # s = " start_codons=[" 535 # for i in range(64): 536 # if start[i] == "M": 537 # t = "'%s%s%s'," % (bases[0][i], bases[1][i], bases[2][i]) 538 # if len(s) + len(t) > 75: 539 # s_with_spaces = s.replace("','", "', '") 540 # print(s_with_spaces) 541 # s = " " + t 542 # else: 543 # s = s + t 544 # s_with_spaces = s.replace("','", "', '") 545 # print("%s ]" % s_with_spaces) 546 # print(" )") 547 # elif line[:2] == "--" or line == "\n" or line == "}\n" or \ 548 # line == 'Genetic-code-table ::= {\n': 549 # pass 550 # else: 551 # raise Exception("Unparsed: " + repr(line)) 552 553 register_ncbi_table(name='Standard', 554 alt_name='SGC0', id=1, 555 table={ 556 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 557 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 558 'TGT': 'C', 'TGC': 'C', 'TGG': 'W', 'CTT': 'L', 'CTC': 'L', 559 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 'CCA': 'P', 560 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 'CAG': 'Q', 561 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 'ATT': 'I', 562 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T', 'ACC': 'T', 563 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 'AAA': 'K', 564 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 'AGG': 'R', 565 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 'GCT': 'A', 566 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 'GAC': 'D', 567 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 'GGA': 'G', 568 'GGG': 'G', }, 569 stop_codons=['TAA', 'TAG', 'TGA', ], 570 start_codons=['TTG', 'CTG', 'ATG', ] 571 ) 572 register_ncbi_table(name='Vertebrate Mitochondrial', 573 alt_name='SGC1', id=2, 574 table={ 575 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 576 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 577 'TGT': 'C', 'TGC': 'C', 'TGA': 'W', 'TGG': 'W', 'CTT': 'L', 578 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 579 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 580 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 581 'ATT': 'I', 'ATC': 'I', 'ATA': 'M', 'ATG': 'M', 'ACT': 'T', 582 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 583 'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'GTT': 'V', 584 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 'GCT': 'A', 'GCC': 'A', 585 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 'GAC': 'D', 'GAA': 'E', 586 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 'GGA': 'G', 'GGG': 'G', }, 587 stop_codons=['TAA', 'TAG', 'AGA', 'AGG', ], 588 start_codons=['ATT', 'ATC', 'ATA', 'ATG', 'GTG', ] 589 ) 590 register_ncbi_table(name='Yeast Mitochondrial', 591 alt_name='SGC2', id=3, 592 table={ 593 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 594 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 595 'TGT': 'C', 'TGC': 'C', 'TGA': 'W', 'TGG': 'W', 'CTT': 'T', 596 'CTC': 'T', 'CTA': 'T', 'CTG': 'T', 'CCT': 'P', 'CCC': 'P', 597 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 598 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 599 'ATT': 'I', 'ATC': 'I', 'ATA': 'M', 'ATG': 'M', 'ACT': 'T', 600 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 601 'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 602 'AGG': 'R', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 603 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 604 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 605 'GGA': 'G', 'GGG': 'G', }, 606 stop_codons=['TAA', 'TAG', ], 607 start_codons=['ATA', 'ATG', ] 608 ) 609 register_ncbi_table(name='Mold Mitochondrial; Protozoan Mitochondrial; Coelenterate Mitochondrial; Mycoplasma; Spiroplasma', 610 alt_name='SGC3', id=4, 611 table={ 612 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 613 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 614 'TGT': 'C', 'TGC': 'C', 'TGA': 'W', 'TGG': 'W', 'CTT': 'L', 615 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 616 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 617 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 618 'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T', 619 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 620 'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 621 'AGG': 'R', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 622 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 623 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 624 'GGA': 'G', 'GGG': 'G', }, 625 stop_codons=['TAA', 'TAG', ], 626 start_codons=['TTA', 'TTG', 'CTG', 'ATT', 'ATC', 627 'ATA', 'ATG', 'GTG', ] 628 ) 629 register_ncbi_table(name='Invertebrate Mitochondrial', 630 alt_name='SGC4', id=5, 631 table={ 632 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 633 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 634 'TGT': 'C', 'TGC': 'C', 'TGA': 'W', 'TGG': 'W', 'CTT': 'L', 635 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 636 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 637 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 638 'ATT': 'I', 'ATC': 'I', 'ATA': 'M', 'ATG': 'M', 'ACT': 'T', 639 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 640 'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'S', 641 'AGG': 'S', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 642 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 643 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 644 'GGA': 'G', 'GGG': 'G', }, 645 stop_codons=['TAA', 'TAG', ], 646 start_codons=['TTG', 'ATT', 'ATC', 'ATA', 'ATG', 647 'GTG', ] 648 ) 649 register_ncbi_table(name='Ciliate Nuclear; Dasycladacean Nuclear; Hexamita Nuclear', 650 alt_name='SGC5', id=6, 651 table={ 652 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 653 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 654 'TAA': 'Q', 'TAG': 'Q', 'TGT': 'C', 'TGC': 'C', 'TGG': 'W', 655 'CTT': 'L', 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 656 'CCC': 'P', 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 657 'CAA': 'Q', 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 658 'CGG': 'R', 'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 659 'ACT': 'T', 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 660 'AAC': 'N', 'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 661 'AGA': 'R', 'AGG': 'R', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 662 'GTG': 'V', 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 663 'GAT': 'D', 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 664 'GGC': 'G', 'GGA': 'G', 'GGG': 'G', }, 665 stop_codons=['TGA', ], 666 start_codons=['ATG', ] 667 ) 668 register_ncbi_table(name='Echinoderm Mitochondrial; Flatworm Mitochondrial', 669 alt_name='SGC8', id=9, 670 table={ 671 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 672 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 673 'TGT': 'C', 'TGC': 'C', 'TGA': 'W', 'TGG': 'W', 'CTT': 'L', 674 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 675 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 676 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 677 'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T', 678 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 679 'AAA': 'N', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'S', 680 'AGG': 'S', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 681 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 682 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 683 'GGA': 'G', 'GGG': 'G', }, 684 stop_codons=['TAA', 'TAG', ], 685 start_codons=['ATG', 'GTG', ] 686 ) 687 register_ncbi_table(name='Euplotid Nuclear', 688 alt_name='SGC9', id=10, 689 table={ 690 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 691 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 692 'TGT': 'C', 'TGC': 'C', 'TGA': 'C', 'TGG': 'W', 'CTT': 'L', 693 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 694 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 695 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 696 'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T', 697 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 698 'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 699 'AGG': 'R', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 700 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 701 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 702 'GGA': 'G', 'GGG': 'G', }, 703 stop_codons=['TAA', 'TAG', ], 704 start_codons=['ATG', ] 705 ) 706 register_ncbi_table(name='Bacterial and Plant Plastid', 707 alt_name=None, id=11, 708 table={ 709 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 710 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 711 'TGT': 'C', 'TGC': 'C', 'TGG': 'W', 'CTT': 'L', 'CTC': 'L', 712 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 'CCA': 'P', 713 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 'CAG': 'Q', 714 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 'ATT': 'I', 715 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T', 'ACC': 'T', 716 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 'AAA': 'K', 717 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 'AGG': 'R', 718 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 'GCT': 'A', 719 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 'GAC': 'D', 720 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 'GGA': 'G', 721 'GGG': 'G', }, 722 stop_codons=['TAA', 'TAG', 'TGA', ], 723 start_codons=['TTG', 'CTG', 'ATT', 'ATC', 'ATA', 724 'ATG', 'GTG', ] 725 ) 726 register_ncbi_table(name='Alternative Yeast Nuclear', 727 alt_name=None, id=12, 728 table={ 729 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 730 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 731 'TGT': 'C', 'TGC': 'C', 'TGG': 'W', 'CTT': 'L', 'CTC': 'L', 732 'CTA': 'L', 'CTG': 'S', 'CCT': 'P', 'CCC': 'P', 'CCA': 'P', 733 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 'CAG': 'Q', 734 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 'ATT': 'I', 735 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T', 'ACC': 'T', 736 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 'AAA': 'K', 737 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 'AGG': 'R', 738 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 'GCT': 'A', 739 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 'GAC': 'D', 740 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 'GGA': 'G', 741 'GGG': 'G', }, 742 stop_codons=['TAA', 'TAG', 'TGA', ], 743 start_codons=['CTG', 'ATG', ] 744 ) 745 register_ncbi_table(name='Ascidian Mitochondrial', 746 alt_name=None, id=13, 747 table={ 748 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 749 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 750 'TGT': 'C', 'TGC': 'C', 'TGA': 'W', 'TGG': 'W', 'CTT': 'L', 751 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 752 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 753 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 754 'ATT': 'I', 'ATC': 'I', 'ATA': 'M', 'ATG': 'M', 'ACT': 'T', 755 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 756 'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'G', 757 'AGG': 'G', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 758 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 759 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 760 'GGA': 'G', 'GGG': 'G', }, 761 stop_codons=['TAA', 'TAG', ], 762 start_codons=['TTG', 'ATA', 'ATG', 'GTG', ] 763 ) 764 register_ncbi_table(name='Alternative Flatworm Mitochondrial', 765 alt_name=None, id=14, 766 table={ 767 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 768 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 769 'TAA': 'Y', 'TGT': 'C', 'TGC': 'C', 'TGA': 'W', 'TGG': 'W', 770 'CTT': 'L', 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 771 'CCC': 'P', 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 772 'CAA': 'Q', 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 773 'CGG': 'R', 'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 774 'ACT': 'T', 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 775 'AAC': 'N', 'AAA': 'N', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 776 'AGA': 'S', 'AGG': 'S', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 777 'GTG': 'V', 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 778 'GAT': 'D', 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 779 'GGC': 'G', 'GGA': 'G', 'GGG': 'G', }, 780 stop_codons=['TAG', ], 781 start_codons=['ATG', ] 782 ) 783 register_ncbi_table(name='Blepharisma Macronuclear', 784 alt_name=None, id=15, 785 table={ 786 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 787 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 788 'TAG': 'Q', 'TGT': 'C', 'TGC': 'C', 'TGG': 'W', 'CTT': 'L', 789 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 790 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 791 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 792 'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T', 793 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 794 'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 795 'AGG': 'R', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 796 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 797 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 798 'GGA': 'G', 'GGG': 'G', }, 799 stop_codons=['TAA', 'TGA', ], 800 start_codons=['ATG', ] 801 ) 802 register_ncbi_table(name='Chlorophycean Mitochondrial', 803 alt_name=None, id=16, 804 table={ 805 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 806 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 807 'TAG': 'L', 'TGT': 'C', 'TGC': 'C', 'TGG': 'W', 'CTT': 'L', 808 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 809 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 810 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 811 'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T', 812 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 813 'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 814 'AGG': 'R', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 815 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 816 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 817 'GGA': 'G', 'GGG': 'G', }, 818 stop_codons=['TAA', 'TGA', ], 819 start_codons=['ATG', ] 820 ) 821 register_ncbi_table(name='Trematode Mitochondrial', 822 alt_name=None, id=21, 823 table={ 824 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 825 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 826 'TGT': 'C', 'TGC': 'C', 'TGA': 'W', 'TGG': 'W', 'CTT': 'L', 827 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 828 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 829 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 830 'ATT': 'I', 'ATC': 'I', 'ATA': 'M', 'ATG': 'M', 'ACT': 'T', 831 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 832 'AAA': 'N', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'S', 833 'AGG': 'S', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 834 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 835 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 836 'GGA': 'G', 'GGG': 'G', }, 837 stop_codons=['TAA', 'TAG', ], 838 start_codons=['ATG', 'GTG', ] 839 ) 840 register_ncbi_table(name='Scenedesmus obliquus Mitochondrial', 841 alt_name=None, id=22, 842 table={ 843 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 844 'TCC': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 'TAG': 'L', 845 'TGT': 'C', 'TGC': 'C', 'TGG': 'W', 'CTT': 'L', 'CTC': 'L', 846 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 'CCA': 'P', 847 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 'CAG': 'Q', 848 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 'ATT': 'I', 849 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T', 'ACC': 'T', 850 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 'AAA': 'K', 851 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 'AGG': 'R', 852 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 'GCT': 'A', 853 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 'GAC': 'D', 854 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 'GGA': 'G', 855 'GGG': 'G', }, 856 stop_codons=['TCA', 'TAA', 'TGA', ], 857 start_codons=['ATG', ] 858 ) 859 register_ncbi_table(name='Thraustochytrium Mitochondrial', 860 alt_name=None, id=23, 861 table={ 862 'TTT': 'F', 'TTC': 'F', 'TTG': 'L', 'TCT': 'S', 'TCC': 'S', 863 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 'TGT': 'C', 864 'TGC': 'C', 'TGG': 'W', 'CTT': 'L', 'CTC': 'L', 'CTA': 'L', 865 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 'CCA': 'P', 'CCG': 'P', 866 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 'CAG': 'Q', 'CGT': 'R', 867 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 'ATT': 'I', 'ATC': 'I', 868 'ATA': 'I', 'ATG': 'M', 'ACT': 'T', 'ACC': 'T', 'ACA': 'T', 869 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 'AAA': 'K', 'AAG': 'K', 870 'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 'AGG': 'R', 'GTT': 'V', 871 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 'GCT': 'A', 'GCC': 'A', 872 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 'GAC': 'D', 'GAA': 'E', 873 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 'GGA': 'G', 'GGG': 'G', }, 874 stop_codons=['TTA', 'TAA', 'TAG', 'TGA', ], 875 start_codons=['ATT', 'ATG', 'GTG', ] 876 ) 877 register_ncbi_table(name='Pterobranchia Mitochondrial', 878 alt_name=None, id=24, 879 table={ 880 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 881 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 882 'TGT': 'C', 'TGC': 'C', 'TGA': 'W', 'TGG': 'W', 'CTT': 'L', 883 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 884 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 885 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 886 'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T', 887 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 888 'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'S', 889 'AGG': 'K', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 890 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 891 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 892 'GGA': 'G', 'GGG': 'G', }, 893 stop_codons=['TAA', 'TAG', ], 894 start_codons=['TTG', 'CTG', 'ATG', 'GTG', ], 895 ) 896 897 register_ncbi_table(name='Candidate Division SR1 and Gracilibacteria', 898 alt_name=None, id=25, 899 table={ 900 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 901 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 902 'TGT': 'C', 'TGC': 'C', 'TGG': 'W', 'CTT': 'L', 'CTC': 'L', 903 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 'CCA': 'P', 904 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 'CAG': 'Q', 905 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 'ATT': 'I', 906 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T', 'ACC': 'T', 907 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 'AAA': 'K', 908 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 'AGG': 'R', 909 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 'GCT': 'A', 910 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 'GAC': 'D', 911 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 'GGA': 'G', 912 'GGG': 'G', 'TGA': 'G', }, 913 stop_codons=['TAA', 'TAG', ], 914 start_codons=['TTG', 'CTG', 'ATG', ] 915 ) 916 917 # Basic sanity test, 918 for key, val in generic_by_name.items(): 919 assert key in ambiguous_generic_by_name[key].names 920 for key, val in generic_by_id.items(): 921 assert ambiguous_generic_by_id[key].id == key 922 del key, val 923 924 for n in ambiguous_generic_by_id: 925 assert ambiguous_rna_by_id[n].forward_table["GUU"] == "V" 926 assert ambiguous_rna_by_id[n].forward_table["GUN"] == "V" 927 if n != 23: 928 # For table 23, UUN = F, L or stop. 929 assert ambiguous_rna_by_id[n].forward_table["UUN"] == "X" # F or L 930 # R = A or G, so URR = UAA or UGA / TRA = TAA or TGA = stop codons 931 if "UAA" in unambiguous_rna_by_id[n].stop_codons \ 932 and "UGA" in unambiguous_rna_by_id[n].stop_codons: 933 try: 934 print(ambiguous_dna_by_id[n].forward_table["TRA"]) 935 assert False, "Should be a stop only" 936 except KeyError: 937 pass 938 assert "URA" in ambiguous_generic_by_id[n].stop_codons 939 assert "URA" in ambiguous_rna_by_id[n].stop_codons 940 assert "TRA" in ambiguous_generic_by_id[n].stop_codons 941 assert "TRA" in ambiguous_dna_by_id[n].stop_codons 942 del n 943 assert ambiguous_generic_by_id[1] == ambiguous_generic_by_name["Standard"] 944 assert ambiguous_generic_by_id[4] == ambiguous_generic_by_name["SGC3"] 945 assert ambiguous_generic_by_id[11] == ambiguous_generic_by_name["Bacterial"] 946 assert ambiguous_generic_by_id[11] == ambiguous_generic_by_name["Plant Plastid"] 947 assert ambiguous_generic_by_id[15] == ambiguous_generic_by_name['Blepharisma Macronuclear'] 948 assert ambiguous_generic_by_id[24] == ambiguous_generic_by_name["Pterobranchia Mitochondrial"] 949 assert generic_by_id[1] == generic_by_name["Standard"] 950 assert generic_by_id[4] == generic_by_name["SGC3"] 951 assert generic_by_id[11] == generic_by_name["Bacterial"] 952 assert generic_by_id[11] == generic_by_name["Plant Plastid"] 953 assert generic_by_id[15] == generic_by_name['Blepharisma Macronuclear'] 954 assert generic_by_id[24] == generic_by_name["Pterobranchia Mitochondrial"] 955