Package Bio :: Package PopGen :: Package GenePop
[hide private]
[frames] | no frames]

Source Code for Package Bio.PopGen.GenePop

  1  # Copyright 2007 by Tiago Antao.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  """Code to work with GenePop. 
  7   
  8  See http://wbiomed.curtin.edu.au/genepop/ , the format is documented 
  9  here: http://wbiomed.curtin.edu.au/genepop/help_input.html . 
 10   
 11  Classes: 
 12  Record           Holds GenePop data. 
 13   
 14  Functions: 
 15  read             Parses a GenePop record (file) into a Record object. 
 16   
 17   
 18  Partially inspired by MedLine Code. 
 19   
 20  """ 
 21  from copy import deepcopy 
 22   
 23   
24 -def get_indiv(line):
25 def int_no_zero(val): 26 v = int(val) 27 if v == 0: 28 return None 29 return v
30 indiv_name, marker_line = line.split(',') 31 markers = marker_line.replace('\t', ' ').split(' ') 32 markers = [marker for marker in markers if marker != ''] 33 if len(markers[0]) in [2, 4]: # 2 digits per allele 34 marker_len = 2 35 else: 36 marker_len = 3 37 try: 38 allele_list = [(int_no_zero(marker[0:marker_len]), 39 int_no_zero(marker[marker_len:])) 40 for marker in markers] 41 except ValueError: # Haploid 42 allele_list = [(int_no_zero(marker[0:marker_len]),) 43 for marker in markers] 44 return indiv_name, allele_list, marker_len 45 46
47 -def read(handle):
48 """Parses a handle containing a GenePop file. 49 50 handle is a file-like object that contains a GenePop record. 51 """ 52 record = Record() 53 record.comment_line = str(next(handle)).rstrip() 54 # We can now have one loci per line or all loci in a single line 55 # separated by either space or comma+space... 56 # We will remove all commas on loci... that should not be a problem 57 sample_loci_line = str(next(handle)).rstrip().replace(',', '') 58 all_loci = sample_loci_line.split(' ') 59 record.loci_list.extend(all_loci) 60 for line in handle: 61 line = line.rstrip() 62 if line.upper() == 'POP': 63 break 64 record.loci_list.append(line) 65 else: 66 raise ValueError('No population data found, file probably not GenePop related') 67 record.populations.append([]) 68 for line in handle: 69 line = line.rstrip() 70 if line.upper() == 'POP': 71 record.populations.append([]) 72 else: 73 indiv_name, allele_list, record.marker_len = get_indiv(line) 74 record.populations[-1].append((indiv_name, allele_list)) 75 loci = record.loci_list 76 for pop in record.populations: 77 record.pop_list.append(pop[-1][0]) 78 for indiv in pop: 79 for mk_i in range(len(loci)): 80 mk_orig = indiv[1][mk_i] 81 mk_real = [] 82 for al in mk_orig: 83 if al == 0: 84 mk_real.append(None) 85 else: 86 mk_real.append(al) 87 indiv[1][mk_i] = tuple(mk_real) 88 return record
89 90
91 -class Record(object):
92 """Holds information from a GenePop record. 93 94 Members: 95 96 - marker_len The marker length (2 or 3 digit code per allele). 97 98 - comment_line Comment line. 99 100 - loci_list List of loci names. 101 102 - pop_list List of population names. 103 104 - populations List of population data. 105 106 In most genepop files, the population name is not trustable. 107 It is strongly recommended that populations are referred by index. 108 109 populations has one element per population. Each element is itself 110 a list of individuals, each individual is a pair composed by individual 111 name and a list of alleles (2 per marker or 1 for haploids): 112 Example:: 113 114 [ 115 [ 116 ('Ind1', [(1,2), (3,3), (200,201)], 117 ('Ind2', [(2,None), (3,3), (None,None)], 118 ], 119 [ 120 ('Other1', [(1,1), (4,3), (200,200)], 121 ] 122 ] 123 124 """ 125
126 - def __init__(self):
127 self.marker_len = 0 128 self.comment_line = "" 129 self.loci_list = [] 130 self.pop_list = [] 131 self.populations = []
132
133 - def __str__(self):
134 """Returns (reconstructs) a GenePop textual representation.""" 135 rep = [self.comment_line + '\n'] 136 rep.append('\n'.join(self.loci_list) + '\n') 137 for pop in self.populations: 138 rep.append('Pop\n') 139 for indiv in pop: 140 name, markers = indiv 141 rep.append(name) 142 rep.append(',') 143 for marker in markers: 144 rep.append(' ') 145 for al in marker: 146 if al is None: 147 al = '0' 148 aStr = str(al) 149 while len(aStr) < self.marker_len: 150 aStr = "".join(['0', aStr]) 151 rep.append(aStr) 152 rep.append('\n') 153 return "".join(rep)
154
155 - def split_in_pops(self, pop_names):
156 """Splits a GP record in a dictionary with 1 pop per entry. 157 158 Given a record with n pops and m loci returns a dictionary 159 of records (key pop_name) where each item is a record 160 with a single pop and m loci. 161 162 Arguments: 163 - pop_names - Population names 164 165 """ 166 gp_pops = {} 167 for i in range(len(self.populations)): 168 gp_pop = Record() 169 gp_pop.marker_len = self.marker_len 170 gp_pop.comment_line = self.comment_line 171 gp_pop.loci_list = deepcopy(self.loci_list) 172 gp_pop.populations = [deepcopy(self.populations[i])] 173 gp_pops[pop_names[i]] = gp_pop 174 return gp_pops
175
176 - def split_in_loci(self, gp):
177 """Splits a GP record in a dictionary with 1 locus per entry. 178 179 Given a record with n pops and m loci returns a dictionary 180 of records (key locus name) where each item is a record 181 with a single locus and n pops. 182 """ 183 gp_loci = {} 184 for i in range(len(self.loci_list)): 185 gp_pop = Record() 186 gp_pop.marker_len = self.marker_len 187 gp_pop.comment_line = self.comment_line 188 gp_pop.loci_list = [self.loci_list[i]] 189 gp_pop.populations = [] 190 for pop in self.populations: 191 my_pop = [] 192 for indiv in pop: 193 my_pop.append((indiv[0], [indiv[1][i]])) 194 gp_pop.populations.append(my_pop) 195 gp_loci[gp_pop.loci_list[0]] = gp_pop 196 return gp_loci
197
198 - def remove_population(self, pos):
199 """Removes a population (by position).""" 200 del self.populations[pos]
201
202 - def remove_locus_by_position(self, pos):
203 """Removes a locus by position.""" 204 del self.loci_list[pos] 205 for pop in self.populations: 206 for indiv in pop: 207 name, loci = indiv 208 del loci[pos]
209
210 - def remove_locus_by_name(self, name):
211 """Removes a locus by name.""" 212 for i in range(len(self.loci_list)): 213 if self.loci_list[i] == name: 214 self.remove_locus_by_position(i) 215 return
216 # If here than locus not existent... Maybe raise exception? 217 # Although it should be Ok... Just a boolean return, maybe? 218