Package Bio :: Package PopGen :: Package FDist :: Module Utils
[hide private]
[frames] | no frames]

Source Code for Module Bio.PopGen.FDist.Utils

  1  # Copyright 2007 by Tiago Antao <tiagoantao@gmail.com>.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  """Utility functions for working with FDist (DEPRECATED).""" 
  7   
  8  from Bio.PopGen.GenePop import FileParser 
  9  import Bio.PopGen.FDist 
 10   
 11   
 12  # Quite a few utility functions could be done (like remove pop, 
 13  # add locus, etc...). The recommended strategy is convert back 
 14  # and forth from/to GenePop and use GenePop Utils 
 15   
 16   
17 -def convert_genepop_to_fdist(gp_rec, report_pops=None):
18 """Converts a GenePop record to a FDist one. 19 20 Parameters: 21 gp_rec - Genepop Record (either standard or big) 22 23 Returns: 24 FDist record. 25 """ 26 if hasattr(gp_rec, "populations"): 27 return _convert_genepop_to_fdist(gp_rec) 28 else: 29 return _convert_genepop_to_fdist_big(gp_rec, report_pops)
30 31
32 -def _convert_genepop_to_fdist(gp_rec):
33 """Converts a standard GenePop record to a FDist one. 34 35 Parameters: 36 gp_rec - Genepop Record (Standard) 37 38 Returns: 39 FDist record. 40 """ 41 fd_rec = Bio.PopGen.FDist.Record() 42 43 fd_rec.data_org = 0 44 fd_rec.num_loci = len(gp_rec.loci_list) 45 fd_rec.num_pops = len(gp_rec.populations) 46 for lc_i in range(len(gp_rec.loci_list)): 47 alleles = [] 48 pop_data = [] 49 for pop_i in range(len(gp_rec.populations)): 50 for indiv in gp_rec.populations[pop_i]: 51 for al in indiv[1][lc_i]: 52 if al is not None and al not in alleles: 53 alleles.append(al) 54 alleles.sort() # Dominance requires this 55 56 # here we go again (necessary...) 57 for pop_i in range(len(gp_rec.populations)): 58 allele_counts = {} 59 for indiv in gp_rec.populations[pop_i]: 60 for al in indiv[1][lc_i]: 61 if al is not None: 62 count = allele_counts.get(al, 0) 63 allele_counts[al] = count + 1 64 allele_array = [] # We need the same order as in alleles 65 for allele in alleles: 66 allele_array.append(allele_counts.get(allele, 0)) 67 pop_data.append(allele_array) 68 fd_rec.loci_data.append((len(alleles), pop_data)) 69 return fd_rec
70 71
72 -def _convert_genepop_to_fdist_big(gp_rec, report_pops=None):
73 """Converts a big GenePop record to a FDist one. 74 75 Parameters: 76 gp_rec - Genepop Record (Big) 77 78 Returns: 79 FDist record. 80 """ 81 fd_rec = Bio.PopGen.FDist.Record() 82 83 fd_rec.data_org = 1 84 fd_rec.num_loci = len(gp_rec.loci_list) 85 num_loci = len(gp_rec.loci_list) 86 loci = [] 87 for i in range(num_loci): 88 loci.append(set()) 89 pops = [] 90 work_rec = FileParser.read(gp_rec.fname) 91 lParser = work_rec.get_individual() 92 93 def init_pop(): 94 my_pop = [] 95 for i in range(num_loci): 96 my_pop.append({}) 97 return my_pop
98 99 curr_pop = init_pop() 100 num_pops = 1 101 if report_pops: 102 report_pops(num_pops) 103 while lParser: 104 if lParser is not True: 105 for loci_pos in range(num_loci): 106 for al in lParser[1][loci_pos]: 107 if al is not None: 108 loci[loci_pos].add(al) 109 curr_pop[loci_pos][al] = curr_pop[loci_pos].get(al, 0) + 1 110 else: 111 pops.append(curr_pop) 112 num_pops += 1 113 if report_pops: 114 report_pops(num_pops) 115 curr_pop = init_pop() 116 lParser = work_rec.get_individual() 117 work_rec._handle.close() # TODO - Needs a proper fix 118 pops.append(curr_pop) 119 fd_rec.num_pops = num_pops 120 for loci_pos in range(num_loci): 121 alleles = sorted(loci[loci_pos]) 122 loci_rec = [len(alleles), []] 123 for pop in pops: 124 pop_rec = [] 125 for allele in alleles: 126 pop_rec.append(pop[loci_pos].get(allele, 0)) 127 loci_rec[1].append(pop_rec) 128 fd_rec.loci_data.append(tuple(loci_rec)) 129 return fd_rec 130 131
132 -def _convert_genepop_to_fdist_big_old(gp_rec, report_loci=None):
133 """Converts a big GenePop record to a FDist one. 134 135 Parameters: 136 gp_rec - Genepop Record (Big) 137 138 Returns: 139 FDist record. 140 """ 141 fd_rec = Bio.PopGen.FDist.Record() 142 143 def countPops(rec): 144 f2 = FileParser.read(rec.fname) 145 popCnt = 1 146 while f2.skip_population(): 147 popCnt += 1 148 return popCnt
149 150 fd_rec.data_org = 0 151 fd_rec.num_loci = len(gp_rec.loci_list) 152 work_rec0 = FileParser.read(gp_rec.fname) 153 fd_rec.num_pops = countPops(work_rec0) 154 155 num_loci = len(gp_rec.loci_list) 156 for lc_i in range(num_loci): 157 if report_loci: 158 report_loci(lc_i, num_loci) 159 work_rec = FileParser.read(gp_rec.fname) 160 work_rec2 = FileParser.read(gp_rec.fname) 161 162 alleles = [] 163 pop_data = [] 164 lParser = work_rec.get_individual() 165 while lParser: 166 if lParser is not True: 167 for al in lParser[1][lc_i]: 168 if al is not None and al not in alleles: 169 alleles.append(al) 170 lParser = work_rec.get_individual() 171 # here we go again (necessary...) 172 alleles.sort() 173 174 def process_pop(pop_data, alleles, allele_counts): 175 allele_array = [] # We need the same order as in alleles 176 for allele in alleles: 177 allele_array.append(allele_counts.get(allele, 0)) 178 pop_data.append(allele_array) 179 180 lParser = work_rec2.get_individual() 181 allele_counts = {} 182 for allele in alleles: 183 allele_counts[allele] = 0 184 allele_counts[None] = 0 185 while lParser: 186 if lParser is True: 187 process_pop(pop_data, alleles, allele_counts) 188 allele_counts = {} 189 for allele in alleles: 190 allele_counts[allele] = 0 191 allele_counts[None] = 0 192 else: 193 for al in lParser[1][lc_i]: 194 allele_counts[al] += 1 195 lParser = work_rec2.get_individual() 196 process_pop(pop_data, alleles, allele_counts) 197 fd_rec.loci_data.append((len(alleles), pop_data)) 198 return fd_rec 199 200
201 -def approximate_fst(desired_fst, simulated_fst, parameter_fst, 202 max_run_fst=1, min_run_fst=0, limit=0.005):
203 """Calculates the next Fst attempt in order to approximate a 204 desired Fst. 205 """ 206 if abs(simulated_fst - desired_fst) < limit: 207 return parameter_fst, max_run_fst, min_run_fst 208 if simulated_fst > desired_fst: 209 max_run_fst = parameter_fst 210 next_parameter_fst = (min_run_fst + parameter_fst) / 2 211 else: 212 min_run_fst = parameter_fst 213 next_parameter_fst = (max_run_fst + parameter_fst) / 2 214 return next_parameter_fst, max_run_fst, min_run_fst
215