Package Bio :: Package PopGen :: Package FDist :: Module Utils
[hide private]
[frames] | no frames]

Source Code for Module Bio.PopGen.FDist.Utils

  1  # Copyright 2007 by Tiago Antao <tiagoantao@gmail.com>.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6   
  7  from __future__ import print_function 
  8   
  9  from Bio.PopGen.GenePop import FileParser 
 10  import Bio.PopGen.FDist 
 11   
 12  # Quite a few utility functions could be done (like remove pop, 
 13  # add locus, etc...). The recommended strategy is convert back 
 14  # and forth from/to GenePop and use GenePop Utils 
 15   
 16   
17 -def convert_genepop_to_fdist(gp_rec, report_pops = None):
18 """Converts a GenePop record to a FDist one. 19 20 Parameters: 21 gp_rec - Genepop Record (either standard or big) 22 23 Returns: 24 FDist record. 25 """ 26 if hasattr(gp_rec, "populations"): 27 return _convert_genepop_to_fdist(gp_rec) 28 else: 29 return _convert_genepop_to_fdist_big(gp_rec, report_pops)
30 31
32 -def _convert_genepop_to_fdist(gp_rec):
33 """Converts a standard GenePop record to a FDist one. 34 35 Parameters: 36 gp_rec - Genepop Record (Standard) 37 38 Returns: 39 FDist record. 40 """ 41 fd_rec = Bio.PopGen.FDist.Record() 42 43 fd_rec.data_org = 0 44 fd_rec.num_loci = len(gp_rec.loci_list) 45 fd_rec.num_pops = len(gp_rec.populations) 46 for lc_i in range(len(gp_rec.loci_list)): 47 alleles = [] 48 pop_data = [] 49 for pop_i in range(len(gp_rec.populations)): 50 for indiv in gp_rec.populations[pop_i]: 51 for al in indiv[1][lc_i]: 52 if al is not None and al not in alleles: 53 alleles.append(al) 54 alleles.sort() # Dominance requires this 55 56 #here we go again (necessary...) 57 for pop_i in range(len(gp_rec.populations)): 58 allele_counts = {} 59 for indiv in gp_rec.populations[pop_i]: 60 for al in indiv[1][lc_i]: 61 if al is not None: 62 count = allele_counts.get(al, 0) 63 allele_counts[al] = count + 1 64 allele_array = [] # We need the same order as in alleles 65 for allele in alleles: 66 allele_array.append(allele_counts.get(allele, 0)) 67 pop_data.append(allele_array) 68 #if lc_i==3: 69 # print alleles, allele_counts#, pop_data 70 fd_rec.loci_data.append((len(alleles), pop_data)) 71 return fd_rec
72 73
74 -def _convert_genepop_to_fdist_big(gp_rec, report_pops = None):
75 """Converts a big GenePop record to a FDist one. 76 77 Parameters: 78 gp_rec - Genepop Record (Big) 79 80 Returns: 81 FDist record. 82 """ 83 fd_rec = Bio.PopGen.FDist.Record() 84 85 fd_rec.data_org = 1 86 fd_rec.num_loci = len(gp_rec.loci_list) 87 num_loci = len(gp_rec.loci_list) 88 loci = [] 89 for i in range(num_loci): 90 loci.append(set()) 91 pops = [] 92 work_rec = FileParser.read(gp_rec.fname) 93 lParser = work_rec.get_individual() 94 95 def init_pop(): 96 my_pop = [] 97 for i in range(num_loci): 98 my_pop.append({}) 99 return my_pop
100 101 curr_pop = init_pop() 102 num_pops = 1 103 if report_pops: 104 report_pops(num_pops) 105 while lParser: 106 if lParser is not True: 107 for loci_pos in range(num_loci): 108 for al in lParser[1][loci_pos]: 109 if al is not None: 110 loci[loci_pos].add(al) 111 curr_pop[loci_pos][al]= curr_pop[loci_pos].get(al, 0)+1 112 else: 113 pops.append(curr_pop) 114 num_pops += 1 115 if report_pops: 116 report_pops(num_pops) 117 curr_pop = init_pop() 118 lParser = work_rec.get_individual() 119 work_rec._handle.close() # TODO - Needs a proper fix 120 pops.append(curr_pop) 121 fd_rec.num_pops = num_pops 122 for loci_pos in range(num_loci): 123 alleles = sorted(loci[loci_pos]) 124 loci_rec = [len(alleles), []] 125 for pop in pops: 126 pop_rec = [] 127 for allele in alleles: 128 pop_rec.append(pop[loci_pos].get(allele, 0)) 129 loci_rec[1].append(pop_rec) 130 fd_rec.loci_data.append(tuple(loci_rec)) 131 return fd_rec 132 133
134 -def _convert_genepop_to_fdist_big_old(gp_rec, report_loci = None):
135 """Converts a big GenePop record to a FDist one. 136 137 Parameters: 138 gp_rec - Genepop Record (Big) 139 140 Returns: 141 FDist record. 142 """ 143 fd_rec = Bio.PopGen.FDist.Record() 144 145 def countPops(rec): 146 f2 = FileParser.read(rec.fname) 147 popCnt = 1 148 while f2.skip_population(): 149 popCnt += 1 150 return popCnt
151 152 fd_rec.data_org = 0 153 fd_rec.num_loci = len(gp_rec.loci_list) 154 work_rec0 = FileParser.read(gp_rec.fname) 155 fd_rec.num_pops = countPops(work_rec0) 156 157 num_loci = len(gp_rec.loci_list) 158 for lc_i in range(num_loci): 159 if report_loci: 160 report_loci(lc_i, num_loci) 161 work_rec = FileParser.read(gp_rec.fname) 162 work_rec2 = FileParser.read(gp_rec.fname) 163 164 alleles = [] 165 pop_data = [] 166 lParser = work_rec.get_individual() 167 while lParser: 168 if lParser is not True: 169 for al in lParser[1][lc_i]: 170 if al is not None and al not in alleles: 171 alleles.append(al) 172 lParser = work_rec.get_individual() 173 #here we go again (necessary...) 174 alleles.sort() 175 176 def process_pop(pop_data, alleles, allele_counts): 177 allele_array = [] # We need the same order as in alleles 178 for allele in alleles: 179 allele_array.append(allele_counts.get(allele, 0)) 180 pop_data.append(allele_array) 181 182 lParser = work_rec2.get_individual() 183 allele_counts = {} 184 for allele in alleles: 185 allele_counts[allele] = 0 186 allele_counts[None]=0 187 while lParser: 188 if lParser is True: 189 process_pop(pop_data, alleles, allele_counts) 190 allele_counts = {} 191 for allele in alleles: 192 allele_counts[allele] = 0 193 allele_counts[None]=0 194 else: 195 for al in lParser[1][lc_i]: 196 allele_counts[al] += 1 197 lParser = work_rec2.get_individual() 198 process_pop(pop_data, alleles, allele_counts) 199 fd_rec.loci_data.append((len(alleles), pop_data)) 200 return fd_rec 201 202
203 -def approximate_fst(desired_fst, simulated_fst, parameter_fst, 204 max_run_fst = 1, min_run_fst = 0, limit = 0.005):
205 """Calculates the next Fst attempt in order to approximate a 206 desired Fst. 207 """ 208 if abs(simulated_fst - desired_fst) < limit: 209 return parameter_fst, max_run_fst, min_run_fst 210 if simulated_fst > desired_fst: 211 max_run_fst = parameter_fst 212 next_parameter_fst = (min_run_fst + parameter_fst)/2 213 else: 214 min_run_fst = parameter_fst 215 next_parameter_fst = (max_run_fst + parameter_fst)/2 216 return next_parameter_fst, max_run_fst, min_run_fst
217