Package Bio :: Package Data :: Module CodonTable
[hide private]
[frames] | no frames]

Source Code for Module Bio.Data.CodonTable

  1  # This code is part of the Biopython distribution and governed by its 
  2  # license.  Please see the LICENSE file that should have been included 
  3  # as part of this package. 
  4  """Codon tables based on those from the NCBI. 
  5   
  6  These tables are based on parsing the NCBI file: 
  7  ftp://ftp.ncbi.nih.gov/entrez/misc/data/gc.prt 
  8   
  9  Last updated at Version 4.0 
 10  """ 
 11   
 12  from __future__ import print_function 
 13   
 14  from Bio import Alphabet 
 15  from Bio.Alphabet import IUPAC 
 16  from Bio.Data import IUPACData 
 17   
 18  __docformat__ = "restructuredtext en" 
 19   
 20  unambiguous_dna_by_name = {} 
 21  unambiguous_dna_by_id = {} 
 22  unambiguous_rna_by_name = {} 
 23  unambiguous_rna_by_id = {} 
 24  generic_by_name = {}  # unambiguous DNA or RNA 
 25  generic_by_id = {}  # unambiguous DNA or RNA 
 26   
 27  ambiguous_dna_by_name = {} 
 28  ambiguous_dna_by_id = {} 
 29  ambiguous_rna_by_name = {} 
 30  ambiguous_rna_by_id = {} 
 31  ambiguous_generic_by_name = {}  # ambiguous DNA or RNA 
 32  ambiguous_generic_by_id = {}  # ambiguous DNA or RNA 
 33   
 34  # standard IUPAC unambiguous codons 
 35  standard_dna_table = None 
 36  standard_rna_table = None 
 37   
 38  # In the future, the back_table could return a statistically 
 39  # appropriate distribution of codons, so do not cache the results of 
 40  # back_table lookups! 
 41   
 42   
43 -class TranslationError(Exception):
44 pass
45 46
47 -class CodonTable(object):
48 """A codon-table, or genetic code.""" 49 nucleotide_alphabet = Alphabet.generic_nucleotide 50 protein_alphabet = Alphabet.generic_protein 51 52 forward_table = {} # only includes codons which actually code 53 back_table = {} # for back translations 54 start_codons = [] 55 stop_codons = [] 56 57 # Not always called from derived classes!
58 - def __init__(self, nucleotide_alphabet=nucleotide_alphabet, 59 protein_alphabet=protein_alphabet, 60 forward_table=forward_table, back_table=back_table, 61 start_codons=start_codons, stop_codons=stop_codons):
68
69 - def __str__(self):
70 """Returns a simple text representation of the codon table. 71 72 e.g. 73 74 >>> import Bio.Data.CodonTable 75 >>> print(Bio.Data.CodonTable.standard_dna_table) 76 >>> print(Bio.Data.CodonTable.generic_by_id[1]) 77 """ 78 79 if self.id: 80 answer = "Table %i" % self.id 81 else: 82 answer = "Table ID unknown" 83 if self.names: 84 answer += " " + ", ".join([x for x in self.names if x]) 85 86 # Use the main four letters (and the conventional ordering) 87 # even for ambiguous tables 88 letters = self.nucleotide_alphabet.letters 89 if isinstance(self.nucleotide_alphabet, Alphabet.DNAAlphabet) \ 90 or (letters is not None and "T" in letters): 91 letters = "TCAG" 92 else: 93 # Should be either RNA or generic nucleotides, 94 # e.g. Bio.Data.CodonTable.generic_by_id[1] 95 letters = "UCAG" 96 97 # Build the table... 98 answer += "\n\n |" + "|".join(" %s " % c2 for c2 in letters) + "|" 99 answer += "\n--+" + "+".join("---------" for c2 in letters) + "+--" 100 for c1 in letters: 101 for c3 in letters: 102 line = c1 + " |" 103 for c2 in letters: 104 codon = c1 + c2 + c3 105 line += " %s" % codon 106 if codon in self.stop_codons: 107 line += " Stop|" 108 else: 109 try: 110 amino = self.forward_table[codon] 111 except KeyError: 112 amino = "?" 113 except TranslationError: 114 amino = "?" 115 if codon in self.start_codons: 116 line += " %s(s)|" % amino 117 else: 118 line += " %s |" % amino 119 line += " " + c3 120 answer += "\n" + line 121 answer += "\n--+" + "+".join("---------" for c2 in letters) + "+--" 122 return answer
123 124
125 -def make_back_table(table, default_stop_codon):
126 """Back a back-table (naive single codon mapping). 127 128 ONLY RETURNS A SINGLE CODON, chosen from the possible alternatives 129 based on their sort order. 130 """ 131 # Do the sort so changes in the hash implementation won't affect 132 # the result when one amino acid is coded by more than one codon. 133 back_table = {} 134 for key in sorted(table): 135 back_table[table[key]] = key 136 back_table[None] = default_stop_codon 137 return back_table
138 139
140 -class NCBICodonTable(CodonTable):
141 nucleotide_alphabet = Alphabet.generic_nucleotide 142 protein_alphabet = IUPAC.protein 143
144 - def __init__(self, id, names, table, start_codons, stop_codons):
145 self.id = id 146 self.names = names 147 self.forward_table = table 148 self.back_table = make_back_table(table, stop_codons[0]) 149 self.start_codons = start_codons 150 self.stop_codons = stop_codons
151 152
153 -class NCBICodonTableDNA(NCBICodonTable):
154 nucleotide_alphabet = IUPAC.unambiguous_dna
155 156
157 -class NCBICodonTableRNA(NCBICodonTable):
158 nucleotide_alphabet = IUPAC.unambiguous_rna
159 160 161 # ######## Deal with ambiguous forward translations 162
163 -class AmbiguousCodonTable(CodonTable):
164 - def __init__(self, codon_table, 165 ambiguous_nucleotide_alphabet, 166 ambiguous_nucleotide_values, 167 ambiguous_protein_alphabet, 168 ambiguous_protein_values):
169 CodonTable.__init__(self, 170 ambiguous_nucleotide_alphabet, 171 ambiguous_protein_alphabet, 172 AmbiguousForwardTable(codon_table.forward_table, 173 ambiguous_nucleotide_values, 174 ambiguous_protein_values), 175 codon_table.back_table, 176 177 # These two are WRONG! I need to get the 178 # list of ambiguous codons which code for 179 # the stop codons XXX 180 list_ambiguous_codons(codon_table.start_codons, ambiguous_nucleotide_values), 181 list_ambiguous_codons(codon_table.stop_codons, ambiguous_nucleotide_values) 182 ) 183 self._codon_table = codon_table
184 185 # Be sneaky and forward attribute lookups to the original table. 186 # This lets us get the names, if the original table is an NCBI 187 # table.
188 - def __getattr__(self, name):
189 return getattr(self._codon_table, name)
190 191
192 -def list_possible_proteins(codon, forward_table, ambiguous_nucleotide_values):
193 c1, c2, c3 = codon 194 x1 = ambiguous_nucleotide_values[c1] 195 x2 = ambiguous_nucleotide_values[c2] 196 x3 = ambiguous_nucleotide_values[c3] 197 possible = {} 198 stops = [] 199 for y1 in x1: 200 for y2 in x2: 201 for y3 in x3: 202 try: 203 possible[forward_table[y1 + y2 + y3]] = 1 204 except KeyError: 205 # If tripping over a stop codon 206 stops.append(y1 + y2 + y3) 207 if stops: 208 if possible: 209 raise TranslationError("ambiguous codon '%s' codes " % codon 210 + "for both proteins and stop codons") 211 # This is a true stop codon - tell the caller about it 212 raise KeyError(codon) 213 return list(possible)
214 215
216 -def list_ambiguous_codons(codons, ambiguous_nucleotide_values):
217 """Extends a codon list to include all possible ambigous codons. 218 219 e.g.:: 220 221 ['TAG', 'TAA'] -> ['TAG', 'TAA', 'TAR'] 222 ['UAG', 'UGA'] -> ['UAG', 'UGA', 'URA'] 223 224 Note that ['TAG', 'TGA'] -> ['TAG', 'TGA'], this does not add 'TRR'. 225 Thus only two more codons are added in the following: 226 227 e.g.:: 228 229 ['TGA', 'TAA', 'TAG'] -> ['TGA', 'TAA', 'TAG', 'TRA', 'TAR'] 230 231 Returns a new (longer) list of codon strings. 232 """ 233 234 # Note ambiguous_nucleotide_values['R'] = 'AG' (etc) 235 # This will generate things like 'TRR' from ['TAG', 'TGA'], which 236 # we don't want to include: 237 c1_list = sorted(letter for (letter, meanings) 238 in ambiguous_nucleotide_values.items() 239 if set(codon[0] for codon in codons).issuperset(set(meanings))) 240 c2_list = sorted(letter for (letter, meanings) 241 in ambiguous_nucleotide_values.items() 242 if set(codon[1] for codon in codons).issuperset(set(meanings))) 243 c3_list = sorted(letter for (letter, meanings) 244 in ambiguous_nucleotide_values.items() 245 if set(codon[2] for codon in codons).issuperset(set(meanings))) 246 # candidates is a list (not a set) to preserve the iteration order 247 candidates = [] 248 for c1 in c1_list: 249 for c2 in c2_list: 250 for c3 in c3_list: 251 codon = c1 + c2 + c3 252 if codon not in candidates and codon not in codons: 253 candidates.append(codon) 254 answer = codons[:] # copy 255 # print "Have %i new candidates" % len(candidates) 256 for ambig_codon in candidates: 257 wanted = True 258 # e.g. 'TRR' -> 'TAA', 'TAG', 'TGA', 'TGG' 259 for codon in [c1 + c2 + c3 260 for c1 in ambiguous_nucleotide_values[ambig_codon[0]] 261 for c2 in ambiguous_nucleotide_values[ambig_codon[1]] 262 for c3 in ambiguous_nucleotide_values[ambig_codon[2]]]: 263 if codon not in codons: 264 # This ambiguous codon can code for a non-stop, exclude it! 265 wanted = False 266 # print "Rejecting %s" % ambig_codon 267 continue 268 if wanted: 269 answer.append(ambig_codon) 270 return answer
271 272 assert list_ambiguous_codons(['TGA', 'TAA'], IUPACData.ambiguous_dna_values) == ['TGA', 'TAA', 'TRA'] 273 assert list_ambiguous_codons(['TAG', 'TGA'], IUPACData.ambiguous_dna_values) == ['TAG', 'TGA'] 274 assert list_ambiguous_codons(['TAG', 'TAA'], IUPACData.ambiguous_dna_values) == ['TAG', 'TAA', 'TAR'] 275 assert list_ambiguous_codons(['UAG', 'UAA'], IUPACData.ambiguous_rna_values) == ['UAG', 'UAA', 'UAR'] 276 assert list_ambiguous_codons(['TGA', 'TAA', 'TAG'], 277 IUPACData.ambiguous_dna_values) == ['TGA', 'TAA', 'TAG', 'TAR', 'TRA'] 278 279 # Forward translation is "onto", that is, any given codon always maps 280 # to the same protein, or it doesn't map at all. Thus, I can build 281 # off of an existing table to produce the ambiguous mappings. 282 # 283 # This handles the general case. Perhaps it's overkill? 284 # >>> t = CodonTable.ambiguous_dna_by_id[1] 285 # >>> t.forward_table["AAT"] 286 # 'N' 287 # >>> t.forward_table["GAT"] 288 # 'D' 289 # >>> t.forward_table["RAT"] 290 # 'B' 291 # >>> t.forward_table["YTA"] 292 # 'L' 293 294
295 -class AmbiguousForwardTable(object):
296 - def __init__(self, forward_table, ambiguous_nucleotide, ambiguous_protein):
297 self.forward_table = forward_table 298 299 self.ambiguous_nucleotide = ambiguous_nucleotide 300 self.ambiguous_protein = ambiguous_protein 301 302 inverted = {} 303 for name, val in ambiguous_protein.items(): 304 for c in val: 305 x = inverted.get(c, {}) 306 x[name] = 1 307 inverted[c] = x 308 for name, val in inverted.items(): 309 inverted[name] = list(val) 310 self._inverted = inverted 311 312 self._cache = {}
313
314 - def get(self, codon, failobj=None):
315 try: 316 return self.__getitem__(codon) 317 except KeyError: 318 return failobj
319
320 - def __getitem__(self, codon):
321 try: 322 x = self._cache[codon] 323 except KeyError: 324 pass 325 else: 326 if x is TranslationError: 327 raise TranslationError(codon) # no unique translation 328 if x is KeyError: 329 raise KeyError(codon) # it's a stop codon 330 return x 331 try: 332 x = self.forward_table[codon] 333 self._cache[codon] = x 334 return x 335 except KeyError: 336 pass 337 338 # XXX Need to make part of this into a method which returns 339 # a list of all possible encodings for a codon! 340 try: 341 possible = list_possible_proteins(codon, 342 self.forward_table, 343 self.ambiguous_nucleotide) 344 except KeyError: 345 self._cache[codon] = KeyError 346 raise KeyError(codon) # stop codon 347 except TranslationError: 348 self._cache[codon] = TranslationError 349 raise TranslationError(codon) # does not code 350 assert len(possible) > 0, "unambiguous codons must code" 351 352 # Hah! Only one possible protein, so use it 353 if len(possible) == 1: 354 self._cache[codon] = possible[0] 355 return possible[0] 356 357 # See if there's an ambiguous protein encoding for the multiples. 358 # Find residues which exist in every coding set. 359 ambiguous_possible = {} 360 for amino in possible: 361 for term in self._inverted[amino]: 362 ambiguous_possible[term] = ambiguous_possible.get(term, 0) + 1 363 364 n = len(possible) 365 possible = [] 366 for amino, val in ambiguous_possible.items(): 367 if val == n: 368 possible.append(amino) 369 370 # No amino acid encoding for the results 371 if len(possible) == 0: 372 self._cache[codon] = TranslationError 373 raise TranslationError(codon) # no valid translation 374 375 # All of these are valid, so choose one 376 # To be unique, sort by smallet ambiguity then alphabetically 377 # Can get this if "X" encodes for everything. 378 # def _sort(x, y, table = self.ambiguous_protein): 379 # a = cmp(len(table[x]), len(table[y])) 380 # if a == 0: 381 # return cmp(x, y) 382 # return a 383 384 # Sort by key is 2.x and 3.x compatible 385 possible.sort(key=lambda x: (len(self.ambiguous_protein[x]), x)) 386 387 x = possible[0] 388 self._cache[codon] = x 389 return x
390 391
392 -def register_ncbi_table(name, alt_name, id, 393 table, start_codons, stop_codons):
394 """Turns codon table data into objects, and stores them in the dictionaries (PRIVATE).""" 395 # In most cases names are divided by "; ", however there is also 396 # 'Bacterial and Plant Plastid' (which used to be just 'Bacterial') 397 names = [x.strip() for x in name.replace(" and ", "; ").split("; ")] 398 399 dna = NCBICodonTableDNA(id, names + [alt_name], table, start_codons, 400 stop_codons) 401 402 ambig_dna = AmbiguousCodonTable(dna, 403 IUPAC.ambiguous_dna, 404 IUPACData.ambiguous_dna_values, 405 IUPAC.extended_protein, 406 IUPACData.extended_protein_values) 407 408 # replace all T's with U's for the RNA tables 409 rna_table = {} 410 generic_table = {} 411 for codon, val in table.items(): 412 generic_table[codon] = val 413 codon = codon.replace("T", "U") 414 generic_table[codon] = val 415 rna_table[codon] = val 416 rna_start_codons = [] 417 generic_start_codons = [] 418 for codon in start_codons: 419 generic_start_codons.append(codon) 420 codon = codon.replace("T", "U") 421 generic_start_codons.append(codon) 422 rna_start_codons.append(codon) 423 rna_stop_codons = [] 424 generic_stop_codons = [] 425 for codon in stop_codons: 426 generic_stop_codons.append(codon) 427 codon = codon.replace("T", "U") 428 generic_stop_codons.append(codon) 429 rna_stop_codons.append(codon) 430 431 generic = NCBICodonTable(id, names + [alt_name], generic_table, 432 generic_start_codons, generic_stop_codons) 433 434 # The following isn't very elegant, but seems to work nicely. 435 _merged_values = dict(IUPACData.ambiguous_rna_values.items()) 436 _merged_values["T"] = "U" 437 ambig_generic = AmbiguousCodonTable(generic, 438 Alphabet.NucleotideAlphabet(), 439 _merged_values, 440 IUPAC.extended_protein, 441 IUPACData.extended_protein_values) 442 443 rna = NCBICodonTableRNA(id, names + [alt_name], rna_table, 444 rna_start_codons, rna_stop_codons) 445 446 ambig_rna = AmbiguousCodonTable(rna, 447 IUPAC.ambiguous_rna, 448 IUPACData.ambiguous_rna_values, 449 IUPAC.extended_protein, 450 IUPACData.extended_protein_values) 451 452 if id == 1: 453 global standard_dna_table, standard_rna_table 454 standard_dna_table = dna 455 standard_rna_table = rna 456 457 unambiguous_dna_by_id[id] = dna 458 unambiguous_rna_by_id[id] = rna 459 generic_by_id[id] = generic 460 ambiguous_dna_by_id[id] = ambig_dna 461 ambiguous_rna_by_id[id] = ambig_rna 462 ambiguous_generic_by_id[id] = ambig_generic 463 464 if alt_name is not None: 465 names.append(alt_name) 466 467 for name in names: 468 unambiguous_dna_by_name[name] = dna 469 unambiguous_rna_by_name[name] = rna 470 generic_by_name[name] = generic 471 ambiguous_dna_by_name[name] = ambig_dna 472 ambiguous_rna_by_name[name] = ambig_rna 473 ambiguous_generic_by_name[name] = ambig_generic
474 475 476 # These tables created from the data file 477 # ftp://ftp.ncbi.nih.gov/entrez/misc/data/gc.prt 478 # using the following: 479 # import re 480 # for line in open("gc.prt").readlines(): 481 # if line[:2] == " {": 482 # names = [] 483 # id = None 484 # aa = None 485 # start = None 486 # bases = [] 487 # elif line[:6] == " name": 488 # names.append(re.search('"([^"]*)"', line).group(1)) 489 # elif line[:8] == " name": 490 # names.append(re.search('"(.*)$', line).group(1)) 491 # elif line == ' Mitochondrial; Mycoplasma; Spiroplasma" ,\n': 492 # names[-1] = names[-1] + " Mitochondrial; Mycoplasma; Spiroplasma" 493 # elif line[:4] == " id": 494 # id = int(re.search('(\d+)', line).group(1)) 495 # elif line[:10] == " ncbieaa ": 496 # aa = line[12:12+64] 497 # elif line[:10] == " sncbieaa": 498 # start = line[12:12+64] 499 # elif line[:9] == " -- Base": 500 # bases.append(line[12:12+64]) 501 # elif line[:2] == " }": 502 # assert names != [] and id is not None and aa is not None 503 # assert start is not None and bases != [] 504 # if len(names) == 1: 505 # names.append(None) 506 # print("register_ncbi_table(name=%s," % repr(names[0])) 507 # print(" alt_name=%s, id=%d," % \ 508 # (repr(names[1]), id)) 509 # print(" table={") 510 # s = " " 511 # for i in range(64): 512 # if aa[i] != "*": 513 # t = " '%s%s%s': '%s'," % (bases[0][i], bases[1][i], 514 # bases[2][i], aa[i]) 515 # if len(s) + len(t) > 75: 516 # print(s) 517 # s = " " + t 518 # else: 519 # s = s + t 520 # print("%s }," % s) 521 522 # s = " stop_codons=[" 523 # for i in range(64): 524 # if aa[i] == "*": 525 # t = "'%s%s%s'," % (bases[0][i], bases[1][i], bases[2][i]) 526 # if len(s) + len(t) > 75: 527 # s_with_spaces = s.replace("','", "', '") 528 # print(s_with_spaces) 529 # s = " " + t 530 # else: 531 # s = s + t 532 # s_with_spaces = s.replace("','", "', '") 533 # print("%s ]," % s_with_spaces) 534 535 # s = " start_codons=[" 536 # for i in range(64): 537 # if start[i] == "M": 538 # t = "'%s%s%s'," % (bases[0][i], bases[1][i], bases[2][i]) 539 # if len(s) + len(t) > 75: 540 # s_with_spaces = s.replace("','", "', '") 541 # print(s_with_spaces) 542 # s = " " + t 543 # else: 544 # s = s + t 545 # s_with_spaces = s.replace("','", "', '") 546 # print("%s ]" % s_with_spaces) 547 # print(" )") 548 # elif line[:2] == "--" or line == "\n" or line == "}\n" or \ 549 # line == 'Genetic-code-table ::= {\n': 550 # pass 551 # else: 552 # raise Exception("Unparsed: " + repr(line)) 553 554 register_ncbi_table(name='Standard', 555 alt_name='SGC0', id=1, 556 table={ 557 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 558 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 559 'TGT': 'C', 'TGC': 'C', 'TGG': 'W', 'CTT': 'L', 'CTC': 'L', 560 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 'CCA': 'P', 561 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 'CAG': 'Q', 562 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 'ATT': 'I', 563 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T', 'ACC': 'T', 564 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 'AAA': 'K', 565 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 'AGG': 'R', 566 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 'GCT': 'A', 567 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 'GAC': 'D', 568 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 'GGA': 'G', 569 'GGG': 'G', }, 570 stop_codons=['TAA', 'TAG', 'TGA', ], 571 start_codons=['TTG', 'CTG', 'ATG', ] 572 ) 573 register_ncbi_table(name='Vertebrate Mitochondrial', 574 alt_name='SGC1', id=2, 575 table={ 576 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 577 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 578 'TGT': 'C', 'TGC': 'C', 'TGA': 'W', 'TGG': 'W', 'CTT': 'L', 579 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 580 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 581 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 582 'ATT': 'I', 'ATC': 'I', 'ATA': 'M', 'ATG': 'M', 'ACT': 'T', 583 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 584 'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'GTT': 'V', 585 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 'GCT': 'A', 'GCC': 'A', 586 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 'GAC': 'D', 'GAA': 'E', 587 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 'GGA': 'G', 'GGG': 'G', }, 588 stop_codons=['TAA', 'TAG', 'AGA', 'AGG', ], 589 start_codons=['ATT', 'ATC', 'ATA', 'ATG', 'GTG', ] 590 ) 591 register_ncbi_table(name='Yeast Mitochondrial', 592 alt_name='SGC2', id=3, 593 table={ 594 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 595 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 596 'TGT': 'C', 'TGC': 'C', 'TGA': 'W', 'TGG': 'W', 'CTT': 'T', 597 'CTC': 'T', 'CTA': 'T', 'CTG': 'T', 'CCT': 'P', 'CCC': 'P', 598 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 599 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 600 'ATT': 'I', 'ATC': 'I', 'ATA': 'M', 'ATG': 'M', 'ACT': 'T', 601 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 602 'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 603 'AGG': 'R', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 604 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 605 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 606 'GGA': 'G', 'GGG': 'G', }, 607 stop_codons=['TAA', 'TAG', ], 608 start_codons=['ATA', 'ATG', ] 609 ) 610 register_ncbi_table(name='Mold Mitochondrial; Protozoan Mitochondrial; Coelenterate Mitochondrial; Mycoplasma; Spiroplasma', 611 alt_name='SGC3', id=4, 612 table={ 613 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 614 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 615 'TGT': 'C', 'TGC': 'C', 'TGA': 'W', 'TGG': 'W', 'CTT': 'L', 616 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 617 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 618 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 619 'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T', 620 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 621 'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 622 'AGG': 'R', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 623 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 624 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 625 'GGA': 'G', 'GGG': 'G', }, 626 stop_codons=['TAA', 'TAG', ], 627 start_codons=['TTA', 'TTG', 'CTG', 'ATT', 'ATC', 628 'ATA', 'ATG', 'GTG', ] 629 ) 630 register_ncbi_table(name='Invertebrate Mitochondrial', 631 alt_name='SGC4', id=5, 632 table={ 633 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 634 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 635 'TGT': 'C', 'TGC': 'C', 'TGA': 'W', 'TGG': 'W', 'CTT': 'L', 636 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 637 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 638 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 639 'ATT': 'I', 'ATC': 'I', 'ATA': 'M', 'ATG': 'M', 'ACT': 'T', 640 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 641 'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'S', 642 'AGG': 'S', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 643 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 644 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 645 'GGA': 'G', 'GGG': 'G', }, 646 stop_codons=['TAA', 'TAG', ], 647 start_codons=['TTG', 'ATT', 'ATC', 'ATA', 'ATG', 648 'GTG', ] 649 ) 650 register_ncbi_table(name='Ciliate Nuclear; Dasycladacean Nuclear; Hexamita Nuclear', 651 alt_name='SGC5', id=6, 652 table={ 653 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 654 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 655 'TAA': 'Q', 'TAG': 'Q', 'TGT': 'C', 'TGC': 'C', 'TGG': 'W', 656 'CTT': 'L', 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 657 'CCC': 'P', 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 658 'CAA': 'Q', 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 659 'CGG': 'R', 'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 660 'ACT': 'T', 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 661 'AAC': 'N', 'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 662 'AGA': 'R', 'AGG': 'R', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 663 'GTG': 'V', 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 664 'GAT': 'D', 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 665 'GGC': 'G', 'GGA': 'G', 'GGG': 'G', }, 666 stop_codons=['TGA', ], 667 start_codons=['ATG', ] 668 ) 669 register_ncbi_table(name='Echinoderm Mitochondrial; Flatworm Mitochondrial', 670 alt_name='SGC8', id=9, 671 table={ 672 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 673 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 674 'TGT': 'C', 'TGC': 'C', 'TGA': 'W', 'TGG': 'W', 'CTT': 'L', 675 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 676 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 677 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 678 'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T', 679 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 680 'AAA': 'N', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'S', 681 'AGG': 'S', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 682 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 683 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 684 'GGA': 'G', 'GGG': 'G', }, 685 stop_codons=['TAA', 'TAG', ], 686 start_codons=['ATG', 'GTG', ] 687 ) 688 register_ncbi_table(name='Euplotid Nuclear', 689 alt_name='SGC9', id=10, 690 table={ 691 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 692 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 693 'TGT': 'C', 'TGC': 'C', 'TGA': 'C', 'TGG': 'W', 'CTT': 'L', 694 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 695 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 696 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 697 'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T', 698 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 699 'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 700 'AGG': 'R', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 701 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 702 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 703 'GGA': 'G', 'GGG': 'G', }, 704 stop_codons=['TAA', 'TAG', ], 705 start_codons=['ATG', ] 706 ) 707 register_ncbi_table(name='Bacterial and Plant Plastid', 708 alt_name=None, id=11, 709 table={ 710 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 711 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 712 'TGT': 'C', 'TGC': 'C', 'TGG': 'W', 'CTT': 'L', 'CTC': 'L', 713 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 'CCA': 'P', 714 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 'CAG': 'Q', 715 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 'ATT': 'I', 716 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T', 'ACC': 'T', 717 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 'AAA': 'K', 718 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 'AGG': 'R', 719 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 'GCT': 'A', 720 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 'GAC': 'D', 721 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 'GGA': 'G', 722 'GGG': 'G', }, 723 stop_codons=['TAA', 'TAG', 'TGA', ], 724 start_codons=['TTG', 'CTG', 'ATT', 'ATC', 'ATA', 725 'ATG', 'GTG', ] 726 ) 727 register_ncbi_table(name='Alternative Yeast Nuclear', 728 alt_name=None, id=12, 729 table={ 730 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 731 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 732 'TGT': 'C', 'TGC': 'C', 'TGG': 'W', 'CTT': 'L', 'CTC': 'L', 733 'CTA': 'L', 'CTG': 'S', 'CCT': 'P', 'CCC': 'P', 'CCA': 'P', 734 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 'CAG': 'Q', 735 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 'ATT': 'I', 736 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T', 'ACC': 'T', 737 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 'AAA': 'K', 738 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 'AGG': 'R', 739 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 'GCT': 'A', 740 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 'GAC': 'D', 741 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 'GGA': 'G', 742 'GGG': 'G', }, 743 stop_codons=['TAA', 'TAG', 'TGA', ], 744 start_codons=['CTG', 'ATG', ] 745 ) 746 register_ncbi_table(name='Ascidian Mitochondrial', 747 alt_name=None, id=13, 748 table={ 749 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 750 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 751 'TGT': 'C', 'TGC': 'C', 'TGA': 'W', 'TGG': 'W', 'CTT': 'L', 752 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 753 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 754 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 755 'ATT': 'I', 'ATC': 'I', 'ATA': 'M', 'ATG': 'M', 'ACT': 'T', 756 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 757 'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'G', 758 'AGG': 'G', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 759 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 760 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 761 'GGA': 'G', 'GGG': 'G', }, 762 stop_codons=['TAA', 'TAG', ], 763 start_codons=['TTG', 'ATA', 'ATG', 'GTG', ] 764 ) 765 register_ncbi_table(name='Alternative Flatworm Mitochondrial', 766 alt_name=None, id=14, 767 table={ 768 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 769 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 770 'TAA': 'Y', 'TGT': 'C', 'TGC': 'C', 'TGA': 'W', 'TGG': 'W', 771 'CTT': 'L', 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 772 'CCC': 'P', 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 773 'CAA': 'Q', 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 774 'CGG': 'R', 'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 775 'ACT': 'T', 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 776 'AAC': 'N', 'AAA': 'N', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 777 'AGA': 'S', 'AGG': 'S', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 778 'GTG': 'V', 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 779 'GAT': 'D', 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 780 'GGC': 'G', 'GGA': 'G', 'GGG': 'G', }, 781 stop_codons=['TAG', ], 782 start_codons=['ATG', ] 783 ) 784 register_ncbi_table(name='Blepharisma Macronuclear', 785 alt_name=None, id=15, 786 table={ 787 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 788 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 789 'TAG': 'Q', 'TGT': 'C', 'TGC': 'C', 'TGG': 'W', 'CTT': 'L', 790 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 791 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 792 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 793 'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T', 794 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 795 'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 796 'AGG': 'R', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 797 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 798 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 799 'GGA': 'G', 'GGG': 'G', }, 800 stop_codons=['TAA', 'TGA', ], 801 start_codons=['ATG', ] 802 ) 803 register_ncbi_table(name='Chlorophycean Mitochondrial', 804 alt_name=None, id=16, 805 table={ 806 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 807 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 808 'TAG': 'L', 'TGT': 'C', 'TGC': 'C', 'TGG': 'W', 'CTT': 'L', 809 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 810 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 811 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 812 'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T', 813 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 814 'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 815 'AGG': 'R', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 816 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 817 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 818 'GGA': 'G', 'GGG': 'G', }, 819 stop_codons=['TAA', 'TGA', ], 820 start_codons=['ATG', ] 821 ) 822 register_ncbi_table(name='Trematode Mitochondrial', 823 alt_name=None, id=21, 824 table={ 825 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 826 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 827 'TGT': 'C', 'TGC': 'C', 'TGA': 'W', 'TGG': 'W', 'CTT': 'L', 828 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 829 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 830 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 831 'ATT': 'I', 'ATC': 'I', 'ATA': 'M', 'ATG': 'M', 'ACT': 'T', 832 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 833 'AAA': 'N', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'S', 834 'AGG': 'S', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 835 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 836 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 837 'GGA': 'G', 'GGG': 'G', }, 838 stop_codons=['TAA', 'TAG', ], 839 start_codons=['ATG', 'GTG', ] 840 ) 841 register_ncbi_table(name='Scenedesmus obliquus Mitochondrial', 842 alt_name=None, id=22, 843 table={ 844 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 845 'TCC': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 'TAG': 'L', 846 'TGT': 'C', 'TGC': 'C', 'TGG': 'W', 'CTT': 'L', 'CTC': 'L', 847 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 'CCA': 'P', 848 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 'CAG': 'Q', 849 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 'ATT': 'I', 850 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T', 'ACC': 'T', 851 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 'AAA': 'K', 852 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 'AGG': 'R', 853 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 'GCT': 'A', 854 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 'GAC': 'D', 855 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 'GGA': 'G', 856 'GGG': 'G', }, 857 stop_codons=['TCA', 'TAA', 'TGA', ], 858 start_codons=['ATG', ] 859 ) 860 register_ncbi_table(name='Thraustochytrium Mitochondrial', 861 alt_name=None, id=23, 862 table={ 863 'TTT': 'F', 'TTC': 'F', 'TTG': 'L', 'TCT': 'S', 'TCC': 'S', 864 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 'TGT': 'C', 865 'TGC': 'C', 'TGG': 'W', 'CTT': 'L', 'CTC': 'L', 'CTA': 'L', 866 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 'CCA': 'P', 'CCG': 'P', 867 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 'CAG': 'Q', 'CGT': 'R', 868 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 'ATT': 'I', 'ATC': 'I', 869 'ATA': 'I', 'ATG': 'M', 'ACT': 'T', 'ACC': 'T', 'ACA': 'T', 870 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 'AAA': 'K', 'AAG': 'K', 871 'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 'AGG': 'R', 'GTT': 'V', 872 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 'GCT': 'A', 'GCC': 'A', 873 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 'GAC': 'D', 'GAA': 'E', 874 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 'GGA': 'G', 'GGG': 'G', }, 875 stop_codons=['TTA', 'TAA', 'TAG', 'TGA', ], 876 start_codons=['ATT', 'ATG', 'GTG', ] 877 ) 878 register_ncbi_table(name='Pterobranchia Mitochondrial', 879 alt_name=None, id=24, 880 table={ 881 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 882 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 883 'TGT': 'C', 'TGC': 'C', 'TGA': 'W', 'TGG': 'W', 'CTT': 'L', 884 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 885 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 886 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 887 'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T', 888 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 889 'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'S', 890 'AGG': 'K', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 891 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 892 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 893 'GGA': 'G', 'GGG': 'G', }, 894 stop_codons=['TAA', 'TAG', ], 895 start_codons=['TTG', 'CTG', 'ATG', 'GTG', ], 896 ) 897 898 # Basic sanity test, 899 for key, val in generic_by_name.items(): 900 assert key in ambiguous_generic_by_name[key].names 901 for key, val in generic_by_id.items(): 902 assert ambiguous_generic_by_id[key].id == key 903 del key, val 904 905 for n in ambiguous_generic_by_id: 906 assert ambiguous_rna_by_id[n].forward_table["GUU"] == "V" 907 assert ambiguous_rna_by_id[n].forward_table["GUN"] == "V" 908 if n != 23: 909 # For table 23, UUN = F, L or stop. 910 assert ambiguous_rna_by_id[n].forward_table["UUN"] == "X" # F or L 911 # R = A or G, so URR = UAA or UGA / TRA = TAA or TGA = stop codons 912 if "UAA" in unambiguous_rna_by_id[n].stop_codons \ 913 and "UGA" in unambiguous_rna_by_id[n].stop_codons: 914 try: 915 print(ambiguous_dna_by_id[n].forward_table["TRA"]) 916 assert False, "Should be a stop only" 917 except KeyError: 918 pass 919 assert "URA" in ambiguous_generic_by_id[n].stop_codons 920 assert "URA" in ambiguous_rna_by_id[n].stop_codons 921 assert "TRA" in ambiguous_generic_by_id[n].stop_codons 922 assert "TRA" in ambiguous_dna_by_id[n].stop_codons 923 del n 924 assert ambiguous_generic_by_id[1] == ambiguous_generic_by_name["Standard"] 925 assert ambiguous_generic_by_id[4] == ambiguous_generic_by_name["SGC3"] 926 assert ambiguous_generic_by_id[11] == ambiguous_generic_by_name["Bacterial"] 927 assert ambiguous_generic_by_id[11] == ambiguous_generic_by_name["Plant Plastid"] 928 assert ambiguous_generic_by_id[15] == ambiguous_generic_by_name['Blepharisma Macronuclear'] 929 assert ambiguous_generic_by_id[24] == ambiguous_generic_by_name["Pterobranchia Mitochondrial"] 930 assert generic_by_id[1] == generic_by_name["Standard"] 931 assert generic_by_id[4] == generic_by_name["SGC3"] 932 assert generic_by_id[11] == generic_by_name["Bacterial"] 933 assert generic_by_id[11] == generic_by_name["Plant Plastid"] 934 assert generic_by_id[15] == generic_by_name['Blepharisma Macronuclear'] 935 assert generic_by_id[24] == generic_by_name["Pterobranchia Mitochondrial"] 936