Package Bio :: Package PopGen :: Package FDist :: Module Utils
[hide private]
[frames] | no frames]

Source Code for Module Bio.PopGen.FDist.Utils

  1  # Copyright 2007 by Tiago Antao <tiagoantao@gmail.com>.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6   
  7  from Bio.PopGen.GenePop import FileParser 
  8  import Bio.PopGen.FDist 
  9   
 10  # Quite a few utility functions could be done (like remove pop, 
 11  # add locus, etc...). The recommended strategy is convert back 
 12  # and forth from/to GenePop and use GenePop Utils 
 13   
 14   
15 -def convert_genepop_to_fdist(gp_rec, report_pops = None):
16 """Converts a GenePop record to a FDist one. 17 18 Parameters: 19 gp_rec - Genepop Record (either standard or big) 20 21 Returns: 22 FDist record. 23 """ 24 if hasattr(gp_rec, "populations"): 25 return _convert_genepop_to_fdist(gp_rec) 26 else: 27 return _convert_genepop_to_fdist_big(gp_rec, report_pops)
28 29
30 -def _convert_genepop_to_fdist(gp_rec):
31 """Converts a standard GenePop record to a FDist one. 32 33 Parameters: 34 gp_rec - Genepop Record (Standard) 35 36 Returns: 37 FDist record. 38 """ 39 fd_rec = Bio.PopGen.FDist.Record() 40 41 fd_rec.data_org = 0 42 fd_rec.num_loci = len(gp_rec.loci_list) 43 fd_rec.num_pops = len(gp_rec.populations) 44 for lc_i in range(len(gp_rec.loci_list)): 45 alleles = [] 46 pop_data = [] 47 for pop_i in range(len(gp_rec.populations)): 48 for indiv in gp_rec.populations[pop_i]: 49 for al in indiv[1][lc_i]: 50 if al is not None and al not in alleles: 51 alleles.append(al) 52 alleles.sort() # Dominance requires this 53 54 #here we go again (necessary...) 55 for pop_i in range(len(gp_rec.populations)): 56 allele_counts = {} 57 for indiv in gp_rec.populations[pop_i]: 58 for al in indiv[1][lc_i]: 59 if al is not None: 60 count = allele_counts.get(al, 0) 61 allele_counts[al] = count + 1 62 allele_array = [] # We need the same order as in alleles 63 for allele in alleles: 64 allele_array.append(allele_counts.get(allele, 0)) 65 pop_data.append(allele_array) 66 #if lc_i==3: 67 # print alleles, allele_counts#, pop_data 68 fd_rec.loci_data.append((len(alleles), pop_data)) 69 return fd_rec
70 71
72 -def _convert_genepop_to_fdist_big(gp_rec, report_pops = None):
73 """Converts a big GenePop record to a FDist one. 74 75 Parameters: 76 gp_rec - Genepop Record (Big) 77 78 Returns: 79 FDist record. 80 """ 81 fd_rec = Bio.PopGen.FDist.Record() 82 83 fd_rec.data_org = 1 84 fd_rec.num_loci = len(gp_rec.loci_list) 85 num_loci = len(gp_rec.loci_list) 86 loci = [] 87 for i in range(num_loci): 88 loci.append(set()) 89 pops = [] 90 work_rec = FileParser.read(gp_rec.fname) 91 lParser = work_rec.get_individual() 92 93 def init_pop(): 94 my_pop = [] 95 for i in range(num_loci): 96 my_pop.append({}) 97 return my_pop
98 99 curr_pop = init_pop() 100 num_pops = 1 101 if report_pops: 102 report_pops(num_pops) 103 while lParser: 104 if lParser is not True: 105 for loci_pos in range(num_loci): 106 for al in lParser[1][loci_pos]: 107 if al is not None: 108 loci[loci_pos].add(al) 109 curr_pop[loci_pos][al]= curr_pop[loci_pos].get(al,0)+1 110 else: 111 pops.append(curr_pop) 112 num_pops += 1 113 if report_pops: 114 report_pops(num_pops) 115 curr_pop = init_pop() 116 lParser = work_rec.get_individual() 117 work_rec._handle.close() # TODO - Needs a proper fix 118 pops.append(curr_pop) 119 fd_rec.num_pops = num_pops 120 for loci_pos in range(num_loci): 121 alleles = list(loci[loci_pos]) 122 alleles.sort() 123 loci_rec = [len(alleles), []] 124 for pop in pops: 125 pop_rec = [] 126 for allele in alleles: 127 pop_rec.append(pop[loci_pos].get(allele, 0)) 128 loci_rec[1].append(pop_rec) 129 fd_rec.loci_data.append(tuple(loci_rec)) 130 return fd_rec 131 132
133 -def _convert_genepop_to_fdist_big_old(gp_rec, report_loci = None):
134 """Converts a big GenePop record to a FDist one. 135 136 Parameters: 137 gp_rec - Genepop Record (Big) 138 139 Returns: 140 FDist record. 141 """ 142 fd_rec = Bio.PopGen.FDist.Record() 143 144 def countPops(rec): 145 f2 = FileParser.read(rec.fname) 146 popCnt = 1 147 while f2.skip_population(): 148 popCnt += 1 149 return popCnt
150 151 fd_rec.data_org = 0 152 fd_rec.num_loci = len(gp_rec.loci_list) 153 work_rec0 = FileParser.read(gp_rec.fname) 154 fd_rec.num_pops = countPops(work_rec0) 155 156 num_loci = len(gp_rec.loci_list) 157 for lc_i in range(num_loci): 158 if report_loci: 159 report_loci(lc_i, num_loci) 160 work_rec = FileParser.read(gp_rec.fname) 161 work_rec2 = FileParser.read(gp_rec.fname) 162 163 alleles = [] 164 pop_data = [] 165 lParser = work_rec.get_individual() 166 while lParser: 167 if lParser is not True: 168 for al in lParser[1][lc_i]: 169 if al is not None and al not in alleles: 170 alleles.append(al) 171 lParser = work_rec.get_individual() 172 #here we go again (necessary...) 173 alleles.sort() 174 175 def process_pop(pop_data, alleles, allele_counts): 176 allele_array = [] # We need the same order as in alleles 177 for allele in alleles: 178 allele_array.append(allele_counts.get(allele, 0)) 179 pop_data.append(allele_array) 180 181 lParser = work_rec2.get_individual() 182 allele_counts = {} 183 for allele in alleles: 184 allele_counts[allele] = 0 185 allele_counts[None]=0 186 while lParser: 187 if lParser is True: 188 process_pop(pop_data, alleles, allele_counts) 189 allele_counts = {} 190 for allele in alleles: 191 allele_counts[allele] = 0 192 allele_counts[None]=0 193 else: 194 for al in lParser[1][lc_i]: 195 allele_counts[al] += 1 196 lParser = work_rec2.get_individual() 197 process_pop(pop_data, alleles, allele_counts) 198 fd_rec.loci_data.append((len(alleles), pop_data)) 199 return fd_rec 200 201
202 -def approximate_fst(desired_fst, simulated_fst, parameter_fst, 203 max_run_fst = 1, min_run_fst = 0, limit = 0.005):
204 """Calculates the next Fst attempt in order to approximate a 205 desired Fst. 206 """ 207 if abs(simulated_fst - desired_fst) < limit: 208 return parameter_fst, max_run_fst, min_run_fst 209 if simulated_fst > desired_fst: 210 max_run_fst = parameter_fst 211 next_parameter_fst = (min_run_fst + parameter_fst)/2 212 else: 213 min_run_fst = parameter_fst 214 next_parameter_fst = (max_run_fst + parameter_fst)/2 215 return next_parameter_fst, max_run_fst, min_run_fst
216