Edit this page on GitHub

Methods for Degenerated Codons


Sometimes statistics on nucleotide sequences are limited to X-fold degenerated codons. The following code provides some functions to solve this problem, by generating a subsequence.


from Bio.Data.CodonTable import unambiguous_dna_by_id

def altcodons(codon, table):
    """List codons that code for the same aminonacid / are also stop.

    @param codon
    @table code table id
    @return list of codons

    tab = unambiguous_dna_by_id[table]

    if codon in tab.stop_codons:
        return tab.stop_codons

        aa = tab.forward_table[codon]
        return []

    return [k for (k, v) in tab.forward_table.iteritems()
            if v == aa and k[0] == codon[0] and k[1] == codon[1]]

def degeneration(codon, table):
    """Determine how many codons code for the same amino acid / are also stop

    @param codon the codon
    @param table code table id
    @param the number of codons also coding for the amino acid codon codes for

    return len(altcodons(codon, table))

def is_x_degenerated(x, codon, table):
    """Determine if codon is x-fold degenerated.

    @param codon the codon
    @param table code table id
    @param true if x <= the degeneration of the codon

    return (x <= len(altcodons(codon, table)))

def degenerated_subseq(seq, x, table):
    """Get a subsequence consisting of the x-fold degenerated codons only."""
    data = ""
    for i in range(0, len(seq), 3):
        codon = seq[i:i + 3].tostring()
        if isXdegenerated(x, codon, table):
            data += codon
    return data