Package Bio :: Package PopGen :: Package FDist :: Module Utils
[hide private]
[frames] | no frames]

Source Code for Module Bio.PopGen.FDist.Utils

  1  # Copyright 2007 by Tiago Antao <tiagoantao@gmail.com>.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6   
  7  from Bio.PopGen.GenePop import FileParser 
  8  import Bio.PopGen.FDist 
  9   
 10   
 11  # Quite a few utility functions could be done (like remove pop, 
 12  # add locus, etc...). The recommended strategy is convert back 
 13  # and forth from/to GenePop and use GenePop Utils 
 14   
 15   
16 -def convert_genepop_to_fdist(gp_rec, report_pops=None):
17 """Converts a GenePop record to a FDist one. 18 19 Parameters: 20 gp_rec - Genepop Record (either standard or big) 21 22 Returns: 23 FDist record. 24 """ 25 if hasattr(gp_rec, "populations"): 26 return _convert_genepop_to_fdist(gp_rec) 27 else: 28 return _convert_genepop_to_fdist_big(gp_rec, report_pops)
29 30
31 -def _convert_genepop_to_fdist(gp_rec):
32 """Converts a standard GenePop record to a FDist one. 33 34 Parameters: 35 gp_rec - Genepop Record (Standard) 36 37 Returns: 38 FDist record. 39 """ 40 fd_rec = Bio.PopGen.FDist.Record() 41 42 fd_rec.data_org = 0 43 fd_rec.num_loci = len(gp_rec.loci_list) 44 fd_rec.num_pops = len(gp_rec.populations) 45 for lc_i in range(len(gp_rec.loci_list)): 46 alleles = [] 47 pop_data = [] 48 for pop_i in range(len(gp_rec.populations)): 49 for indiv in gp_rec.populations[pop_i]: 50 for al in indiv[1][lc_i]: 51 if al is not None and al not in alleles: 52 alleles.append(al) 53 alleles.sort() # Dominance requires this 54 55 # here we go again (necessary...) 56 for pop_i in range(len(gp_rec.populations)): 57 allele_counts = {} 58 for indiv in gp_rec.populations[pop_i]: 59 for al in indiv[1][lc_i]: 60 if al is not None: 61 count = allele_counts.get(al, 0) 62 allele_counts[al] = count + 1 63 allele_array = [] # We need the same order as in alleles 64 for allele in alleles: 65 allele_array.append(allele_counts.get(allele, 0)) 66 pop_data.append(allele_array) 67 fd_rec.loci_data.append((len(alleles), pop_data)) 68 return fd_rec
69 70
71 -def _convert_genepop_to_fdist_big(gp_rec, report_pops=None):
72 """Converts a big GenePop record to a FDist one. 73 74 Parameters: 75 gp_rec - Genepop Record (Big) 76 77 Returns: 78 FDist record. 79 """ 80 fd_rec = Bio.PopGen.FDist.Record() 81 82 fd_rec.data_org = 1 83 fd_rec.num_loci = len(gp_rec.loci_list) 84 num_loci = len(gp_rec.loci_list) 85 loci = [] 86 for i in range(num_loci): 87 loci.append(set()) 88 pops = [] 89 work_rec = FileParser.read(gp_rec.fname) 90 lParser = work_rec.get_individual() 91 92 def init_pop(): 93 my_pop = [] 94 for i in range(num_loci): 95 my_pop.append({}) 96 return my_pop
97 98 curr_pop = init_pop() 99 num_pops = 1 100 if report_pops: 101 report_pops(num_pops) 102 while lParser: 103 if lParser is not True: 104 for loci_pos in range(num_loci): 105 for al in lParser[1][loci_pos]: 106 if al is not None: 107 loci[loci_pos].add(al) 108 curr_pop[loci_pos][al] = curr_pop[loci_pos].get(al, 0) + 1 109 else: 110 pops.append(curr_pop) 111 num_pops += 1 112 if report_pops: 113 report_pops(num_pops) 114 curr_pop = init_pop() 115 lParser = work_rec.get_individual() 116 work_rec._handle.close() # TODO - Needs a proper fix 117 pops.append(curr_pop) 118 fd_rec.num_pops = num_pops 119 for loci_pos in range(num_loci): 120 alleles = sorted(loci[loci_pos]) 121 loci_rec = [len(alleles), []] 122 for pop in pops: 123 pop_rec = [] 124 for allele in alleles: 125 pop_rec.append(pop[loci_pos].get(allele, 0)) 126 loci_rec[1].append(pop_rec) 127 fd_rec.loci_data.append(tuple(loci_rec)) 128 return fd_rec 129 130
131 -def _convert_genepop_to_fdist_big_old(gp_rec, report_loci=None):
132 """Converts a big GenePop record to a FDist one. 133 134 Parameters: 135 gp_rec - Genepop Record (Big) 136 137 Returns: 138 FDist record. 139 """ 140 fd_rec = Bio.PopGen.FDist.Record() 141 142 def countPops(rec): 143 f2 = FileParser.read(rec.fname) 144 popCnt = 1 145 while f2.skip_population(): 146 popCnt += 1 147 return popCnt
148 149 fd_rec.data_org = 0 150 fd_rec.num_loci = len(gp_rec.loci_list) 151 work_rec0 = FileParser.read(gp_rec.fname) 152 fd_rec.num_pops = countPops(work_rec0) 153 154 num_loci = len(gp_rec.loci_list) 155 for lc_i in range(num_loci): 156 if report_loci: 157 report_loci(lc_i, num_loci) 158 work_rec = FileParser.read(gp_rec.fname) 159 work_rec2 = FileParser.read(gp_rec.fname) 160 161 alleles = [] 162 pop_data = [] 163 lParser = work_rec.get_individual() 164 while lParser: 165 if lParser is not True: 166 for al in lParser[1][lc_i]: 167 if al is not None and al not in alleles: 168 alleles.append(al) 169 lParser = work_rec.get_individual() 170 # here we go again (necessary...) 171 alleles.sort() 172 173 def process_pop(pop_data, alleles, allele_counts): 174 allele_array = [] # We need the same order as in alleles 175 for allele in alleles: 176 allele_array.append(allele_counts.get(allele, 0)) 177 pop_data.append(allele_array) 178 179 lParser = work_rec2.get_individual() 180 allele_counts = {} 181 for allele in alleles: 182 allele_counts[allele] = 0 183 allele_counts[None] = 0 184 while lParser: 185 if lParser is True: 186 process_pop(pop_data, alleles, allele_counts) 187 allele_counts = {} 188 for allele in alleles: 189 allele_counts[allele] = 0 190 allele_counts[None] = 0 191 else: 192 for al in lParser[1][lc_i]: 193 allele_counts[al] += 1 194 lParser = work_rec2.get_individual() 195 process_pop(pop_data, alleles, allele_counts) 196 fd_rec.loci_data.append((len(alleles), pop_data)) 197 return fd_rec 198 199
200 -def approximate_fst(desired_fst, simulated_fst, parameter_fst, 201 max_run_fst=1, min_run_fst=0, limit=0.005):
202 """Calculates the next Fst attempt in order to approximate a 203 desired Fst. 204 """ 205 if abs(simulated_fst - desired_fst) < limit: 206 return parameter_fst, max_run_fst, min_run_fst 207 if simulated_fst > desired_fst: 208 max_run_fst = parameter_fst 209 next_parameter_fst = (min_run_fst + parameter_fst) / 2 210 else: 211 min_run_fst = parameter_fst 212 next_parameter_fst = (max_run_fst + parameter_fst) / 2 213 return next_parameter_fst, max_run_fst, min_run_fst
214