Package Bio :: Package PopGen :: Package GenePop
[hide private]
[frames] | no frames]

Source Code for Package Bio.PopGen.GenePop

  1  # Copyright 2007 by Tiago Antao.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  """ 
  7  This module provides code to work with GenePop. 
  8   
  9  See http://wbiomed.curtin.edu.au/genepop/ , the format is documented 
 10  here: http://wbiomed.curtin.edu.au/genepop/help_input.html . 
 11   
 12  Classes: 
 13  Record           Holds GenePop data. 
 14   
 15  Functions: 
 16  read             Parses a GenePop record (file) into a Record object. 
 17   
 18   
 19  Partially inspired on MedLine Code. 
 20   
 21  """ 
 22  from copy import deepcopy 
 23   
 24   
25 -def get_indiv(line):
26 def int_no_zero(val): 27 v = int(val) 28 if v == 0: 29 return None 30 return v
31 indiv_name, marker_line = line.split(',') 32 markers = marker_line.replace('\t', ' ').split(' ') 33 markers = [marker for marker in markers if marker != ''] 34 if len(markers[0]) in [2, 4]: # 2 digits per allele 35 marker_len = 2 36 else: 37 marker_len = 3 38 try: 39 allele_list = [(int_no_zero(marker[0:marker_len]), 40 int_no_zero(marker[marker_len:])) 41 for marker in markers] 42 except ValueError: # Haploid 43 allele_list = [(int_no_zero(marker[0:marker_len]),) 44 for marker in markers] 45 return indiv_name, allele_list, marker_len 46 47
48 -def read(handle):
49 """Parses a handle containing a GenePop file. 50 51 handle is a file-like object that contains a GenePop record. 52 """ 53 record = Record() 54 record.comment_line = str(next(handle)).rstrip() 55 # We can now have one loci per line or all loci in a single line 56 # separated by either space or comma+space... 57 # We will remove all commas on loci... that should not be a problem 58 sample_loci_line = str(next(handle)).rstrip().replace(',', '') 59 all_loci = sample_loci_line.split(' ') 60 record.loci_list.extend(all_loci) 61 for line in handle: 62 line = line.rstrip() 63 if line.upper() == 'POP': 64 break 65 record.loci_list.append(line) 66 else: 67 raise ValueError('No population data found, file probably not GenePop related') 68 record.populations.append([]) 69 for line in handle: 70 line = line.rstrip() 71 if line.upper() == 'POP': 72 record.populations.append([]) 73 else: 74 indiv_name, allele_list, record.marker_len = get_indiv(line) 75 record.populations[-1].append((indiv_name, allele_list)) 76 loci = record.loci_list 77 for pop in record.populations: 78 record.pop_list.append(pop[-1][0]) 79 for indiv in pop: 80 for mk_i in range(len(loci)): 81 mk_orig = indiv[1][mk_i] 82 mk_real = [] 83 for al in mk_orig: 84 if al == 0: 85 mk_real.append(None) 86 else: 87 mk_real.append(al) 88 indiv[1][mk_i] = tuple(mk_real) 89 return record
90 91
92 -class Record(object):
93 """Holds information from a GenePop record. 94 95 Members: 96 97 - marker_len The marker length (2 or 3 digit code per allele). 98 99 - comment_line Comment line. 100 101 - loci_list List of loci names. 102 103 - pop_list List of population names. 104 105 - populations List of population data. 106 107 In most genepop files, the population name is not trustable. 108 It is strongly recommended that populations are referred by index. 109 110 populations has one element per population. Each element is itself 111 a list of individuals, each individual is a pair composed by individual 112 name and a list of alleles (2 per marker or 1 for haploids): 113 Example:: 114 115 [ 116 [ 117 ('Ind1', [(1,2), (3,3), (200,201)], 118 ('Ind2', [(2,None), (3,3), (None,None)], 119 ], 120 [ 121 ('Other1', [(1,1), (4,3), (200,200)], 122 ] 123 ] 124 125 """
126 - def __init__(self):
127 self.marker_len = 0 128 self.comment_line = "" 129 self.loci_list = [] 130 self.pop_list = [] 131 self.populations = []
132
133 - def __str__(self):
134 """Returns (reconstructs) a GenePop textual representation. 135 """ 136 rep = [self.comment_line + '\n'] 137 rep.append('\n'.join(self.loci_list) + '\n') 138 for pop in self.populations: 139 rep.append('Pop\n') 140 for indiv in pop: 141 name, markers = indiv 142 rep.append(name) 143 rep.append(',') 144 for marker in markers: 145 rep.append(' ') 146 for al in marker: 147 if al is None: 148 al = '0' 149 aStr = str(al) 150 while len(aStr) < self.marker_len: 151 aStr = "".join(['0', aStr]) 152 rep.append(aStr) 153 rep.append('\n') 154 return "".join(rep)
155
156 - def split_in_pops(self, pop_names):
157 """Splits a GP record in a dictionary with 1 pop per entry. 158 159 Given a record with n pops and m loci returns a dictionary 160 of records (key pop_name) where each item is a record 161 with a single pop and m loci. 162 163 Parameters: 164 pop_names - Population names 165 """ 166 gp_pops = {} 167 for i in range(len(self.populations)): 168 gp_pop = Record() 169 gp_pop.marker_len = self.marker_len 170 gp_pop.comment_line = self.comment_line 171 gp_pop.loci_list = deepcopy(self.loci_list) 172 gp_pop.populations = [deepcopy(self.populations[i])] 173 gp_pops[pop_names[i]] = gp_pop 174 return gp_pops
175
176 - def split_in_loci(self, gp):
177 """Splits a GP record in a dictionary with 1 locus per entry. 178 179 Given a record with n pops and m loci returns a dictionary 180 of records (key locus name) where each item is a record 181 with a single locus and n pops. 182 """ 183 gp_loci = {} 184 for i in range(len(self.loci_list)): 185 gp_pop = Record() 186 gp_pop.marker_len = self.marker_len 187 gp_pop.comment_line = self.comment_line 188 gp_pop.loci_list = [self.loci_list[i]] 189 gp_pop.populations = [] 190 for pop in self.populations: 191 my_pop = [] 192 for indiv in pop: 193 my_pop.append((indiv[0], [indiv[1][i]])) 194 gp_pop.populations.append(my_pop) 195 gp_loci[gp_pop.loci_list[0]] = gp_pop 196 return gp_loci
197
198 - def remove_population(self, pos):
199 """Removes a population (by position). 200 """ 201 del self.populations[pos]
202
203 - def remove_locus_by_position(self, pos):
204 """Removes a locus by position. 205 """ 206 del self.loci_list[pos] 207 for pop in self.populations: 208 for indiv in pop: 209 name, loci = indiv 210 del loci[pos]
211
212 - def remove_locus_by_name(self, name):
213 """Removes a locus by name. 214 """ 215 for i in range(len(self.loci_list)): 216 if self.loci_list[i] == name: 217 self.remove_locus_by_position(i) 218 return
219 # If here than locus not existent... Maybe raise exception? 220 # Although it should be Ok... Just a boolean return, maybe? 221