Package Bio :: Package PopGen :: Package FDist :: Module Utils
[hide private]
[frames] | no frames]

Source Code for Module Bio.PopGen.FDist.Utils

  1  # Copyright 2007 by Tiago Antao <tiagoantao@gmail.com>.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6   
  7  from Bio.PopGen.GenePop import FileParser 
  8  import Bio.PopGen.FDist 
  9   
 10  # Quite a few utility functions could be done (like remove pop, 
 11  # add locus, etc...). The recommended strategy is convert back 
 12  # and forth from/to GenePop and use GenePop Utils 
 13   
 14   
15 -def convert_genepop_to_fdist(gp_rec, report_pops=None):
16 """Converts a GenePop record to a FDist one. 17 18 Parameters: 19 gp_rec - Genepop Record (either standard or big) 20 21 Returns: 22 FDist record. 23 """ 24 if hasattr(gp_rec, "populations"): 25 return _convert_genepop_to_fdist(gp_rec) 26 else: 27 return _convert_genepop_to_fdist_big(gp_rec, report_pops)
28 29
30 -def _convert_genepop_to_fdist(gp_rec):
31 """Converts a standard GenePop record to a FDist one. 32 33 Parameters: 34 gp_rec - Genepop Record (Standard) 35 36 Returns: 37 FDist record. 38 """ 39 fd_rec = Bio.PopGen.FDist.Record() 40 41 fd_rec.data_org = 0 42 fd_rec.num_loci = len(gp_rec.loci_list) 43 fd_rec.num_pops = len(gp_rec.populations) 44 for lc_i in range(len(gp_rec.loci_list)): 45 alleles = [] 46 pop_data = [] 47 for pop_i in range(len(gp_rec.populations)): 48 for indiv in gp_rec.populations[pop_i]: 49 for al in indiv[1][lc_i]: 50 if al is not None and al not in alleles: 51 alleles.append(al) 52 alleles.sort() # Dominance requires this 53 54 # here we go again (necessary...) 55 for pop_i in range(len(gp_rec.populations)): 56 allele_counts = {} 57 for indiv in gp_rec.populations[pop_i]: 58 for al in indiv[1][lc_i]: 59 if al is not None: 60 count = allele_counts.get(al, 0) 61 allele_counts[al] = count + 1 62 allele_array = [] # We need the same order as in alleles 63 for allele in alleles: 64 allele_array.append(allele_counts.get(allele, 0)) 65 pop_data.append(allele_array) 66 fd_rec.loci_data.append((len(alleles), pop_data)) 67 return fd_rec
68 69
70 -def _convert_genepop_to_fdist_big(gp_rec, report_pops=None):
71 """Converts a big GenePop record to a FDist one. 72 73 Parameters: 74 gp_rec - Genepop Record (Big) 75 76 Returns: 77 FDist record. 78 """ 79 fd_rec = Bio.PopGen.FDist.Record() 80 81 fd_rec.data_org = 1 82 fd_rec.num_loci = len(gp_rec.loci_list) 83 num_loci = len(gp_rec.loci_list) 84 loci = [] 85 for i in range(num_loci): 86 loci.append(set()) 87 pops = [] 88 work_rec = FileParser.read(gp_rec.fname) 89 lParser = work_rec.get_individual() 90 91 def init_pop(): 92 my_pop = [] 93 for i in range(num_loci): 94 my_pop.append({}) 95 return my_pop
96 97 curr_pop = init_pop() 98 num_pops = 1 99 if report_pops: 100 report_pops(num_pops) 101 while lParser: 102 if lParser is not True: 103 for loci_pos in range(num_loci): 104 for al in lParser[1][loci_pos]: 105 if al is not None: 106 loci[loci_pos].add(al) 107 curr_pop[loci_pos][al]= curr_pop[loci_pos].get(al, 0)+1 108 else: 109 pops.append(curr_pop) 110 num_pops += 1 111 if report_pops: 112 report_pops(num_pops) 113 curr_pop = init_pop() 114 lParser = work_rec.get_individual() 115 work_rec._handle.close() # TODO - Needs a proper fix 116 pops.append(curr_pop) 117 fd_rec.num_pops = num_pops 118 for loci_pos in range(num_loci): 119 alleles = sorted(loci[loci_pos]) 120 loci_rec = [len(alleles), []] 121 for pop in pops: 122 pop_rec = [] 123 for allele in alleles: 124 pop_rec.append(pop[loci_pos].get(allele, 0)) 125 loci_rec[1].append(pop_rec) 126 fd_rec.loci_data.append(tuple(loci_rec)) 127 return fd_rec 128 129
130 -def _convert_genepop_to_fdist_big_old(gp_rec, report_loci=None):
131 """Converts a big GenePop record to a FDist one. 132 133 Parameters: 134 gp_rec - Genepop Record (Big) 135 136 Returns: 137 FDist record. 138 """ 139 fd_rec = Bio.PopGen.FDist.Record() 140 141 def countPops(rec): 142 f2 = FileParser.read(rec.fname) 143 popCnt = 1 144 while f2.skip_population(): 145 popCnt += 1 146 return popCnt
147 148 fd_rec.data_org = 0 149 fd_rec.num_loci = len(gp_rec.loci_list) 150 work_rec0 = FileParser.read(gp_rec.fname) 151 fd_rec.num_pops = countPops(work_rec0) 152 153 num_loci = len(gp_rec.loci_list) 154 for lc_i in range(num_loci): 155 if report_loci: 156 report_loci(lc_i, num_loci) 157 work_rec = FileParser.read(gp_rec.fname) 158 work_rec2 = FileParser.read(gp_rec.fname) 159 160 alleles = [] 161 pop_data = [] 162 lParser = work_rec.get_individual() 163 while lParser: 164 if lParser is not True: 165 for al in lParser[1][lc_i]: 166 if al is not None and al not in alleles: 167 alleles.append(al) 168 lParser = work_rec.get_individual() 169 # here we go again (necessary...) 170 alleles.sort() 171 172 def process_pop(pop_data, alleles, allele_counts): 173 allele_array = [] # We need the same order as in alleles 174 for allele in alleles: 175 allele_array.append(allele_counts.get(allele, 0)) 176 pop_data.append(allele_array) 177 178 lParser = work_rec2.get_individual() 179 allele_counts = {} 180 for allele in alleles: 181 allele_counts[allele] = 0 182 allele_counts[None]=0 183 while lParser: 184 if lParser is True: 185 process_pop(pop_data, alleles, allele_counts) 186 allele_counts = {} 187 for allele in alleles: 188 allele_counts[allele] = 0 189 allele_counts[None]=0 190 else: 191 for al in lParser[1][lc_i]: 192 allele_counts[al] += 1 193 lParser = work_rec2.get_individual() 194 process_pop(pop_data, alleles, allele_counts) 195 fd_rec.loci_data.append((len(alleles), pop_data)) 196 return fd_rec 197 198
199 -def approximate_fst(desired_fst, simulated_fst, parameter_fst, 200 max_run_fst=1, min_run_fst=0, limit=0.005):
201 """Calculates the next Fst attempt in order to approximate a 202 desired Fst. 203 """ 204 if abs(simulated_fst - desired_fst) < limit: 205 return parameter_fst, max_run_fst, min_run_fst 206 if simulated_fst > desired_fst: 207 max_run_fst = parameter_fst 208 next_parameter_fst = (min_run_fst + parameter_fst)/2 209 else: 210 min_run_fst = parameter_fst 211 next_parameter_fst = (max_run_fst + parameter_fst)/2 212 return next_parameter_fst, max_run_fst, min_run_fst
213