Package Bio :: Package PopGen :: Package GenePop :: Module Controller
[hide private]
[frames] | no frames]

Source Code for Module Bio.PopGen.GenePop.Controller

  1  # Copyright 2009 by Tiago Antao <tiagoantao@gmail.com>.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  """ 
  7  This module allows to control GenePop. 
  8   
  9  """ 
 10   
 11  import os 
 12  import re 
 13  import shutil 
 14   
 15  from Bio.Application import AbstractCommandline, _Argument 
 16   
 17   
18 -def _gp_float(tok):
19 """Gets a float from a token, if it fails, returns the string. 20 """ 21 try: 22 return float(tok) 23 except ValueError: 24 return str(tok)
25 26
27 -def _gp_int(tok):
28 """Gets a int from a token, if it fails, returns the string. 29 """ 30 try: 31 return int(tok) 32 except ValueError: 33 return str(tok)
34 35
36 -def _read_allele_freq_table(f):
37 l = f.readline() 38 while ' --' not in l: 39 if l == "": 40 raise StopIteration 41 if 'No data' in l: 42 return None, None 43 l = f.readline() 44 alleles = filter(lambda x: x != '', f.readline().rstrip().split(" ")) 45 alleles = map(lambda x: _gp_int(x), alleles) 46 l = f.readline().rstrip() 47 table = [] 48 while l != "": 49 line = filter(lambda x: x != '', l.split(" ")) 50 try: 51 table.append( 52 (line[0], 53 map(lambda x: _gp_float(x), line[1:-1]), 54 _gp_int(line[-1]))) 55 except ValueError: 56 table.append( 57 (line[0], 58 [None] * len(alleles), 59 0)) 60 l = f.readline().rstrip() 61 return alleles, table
62 63
64 -def _read_table(f, funs):
65 table = [] 66 l = f.readline().rstrip() 67 while '---' not in l: 68 l = f.readline().rstrip() 69 l = f.readline().rstrip() 70 while '===' not in l and '---' not in l and l != "": 71 toks = filter(lambda x: x != "", l.split(" ")) 72 line = [] 73 for i in range(len(toks)): 74 try: 75 line.append(funs[i](toks[i])) 76 except ValueError: 77 line.append(toks[i]) # Could not cast 78 table.append(tuple(line)) 79 l = f.readline().rstrip() 80 return table
81 82
83 -def _read_triangle_matrix(f):
84 matrix = [] 85 l = f.readline().rstrip() 86 while l != "": 87 matrix.append( 88 map(lambda x: _gp_float(x), 89 filter(lambda y: y != "", l.split(" ")))) 90 l = f.readline().rstrip() 91 return matrix
92 93
94 -def _read_headed_triangle_matrix(f):
95 matrix = {} 96 header = f.readline().rstrip() 97 if '---' in header or '===' in header: 98 header = f.readline().rstrip() 99 nlines = len(filter(lambda x:x != '', header.split(' '))) - 1 100 for line_pop in range(nlines): 101 l = f.readline().rstrip() 102 vals = filter(lambda x:x != '', l.split(' ')[1:]) 103 clean_vals = [] 104 for val in vals: 105 try: 106 clean_vals.append(_gp_float(val)) 107 except ValueError: 108 clean_vals.append(None) 109 for col_pop in range(len(clean_vals)): 110 matrix[(line_pop+1, col_pop)] = clean_vals[col_pop] 111 return matrix
112 113
114 -def _hw_func(stream, is_locus, has_fisher = False):
115 l = stream.readline() 116 if is_locus: 117 hook = "Locus " 118 else: 119 hook = " Pop : " 120 while l != "": 121 if l.startswith(hook): 122 stream.readline() 123 stream.readline() 124 stream.readline() 125 table = _read_table(stream,[str,_gp_float,_gp_float,_gp_float,_gp_float,_gp_int,str]) 126 #loci might mean pop if hook="Locus " 127 loci = {} 128 for entry in table: 129 if len(entry) < 3: 130 loci[entry[0]] = None 131 else: 132 locus, p, se, fis_wc, fis_rh, steps = entry[:-1] 133 if se == "-": 134 se = None 135 loci[locus] = p, se, fis_wc, fis_rh, steps 136 return loci 137 l = stream.readline() 138 #self.done = True 139 raise StopIteration
140 141
142 -class _FileIterator:
143 """Iterator which crawls over a stream of lines with a function. 144 145 The generator function is expected to yield a tuple, while 146 consuming input 147 """
148 - def __init__(self, func, stream, fname):
149 self.func = func 150 self.stream = stream 151 self.fname = fname 152 self.done = False
153
154 - def __iter__(self):
155 if self.done: 156 self.done = True 157 raise StopIteration 158 return self
159
160 - def next(self):
161 return self.func(self)
162
163 - def __del__(self):
164 self.stream.close() 165 try: 166 os.remove(self.fname) 167 except OSError: 168 #Jython seems to call the iterator twice 169 pass
170 171
172 -class _GenePopCommandline(AbstractCommandline):
173 """ Command Line Wrapper for GenePop. 174 """
175 - def __init__(self, genepop_dir=None, cmd='Genepop', **kwargs):
176 self.parameters = [ 177 _Argument(["command"], 178 "GenePop option to be called", 179 is_required=True), 180 _Argument(["mode"], 181 "Should allways be batch", 182 is_required=True), 183 _Argument(["input"], 184 "Input file", 185 is_required=True), 186 _Argument(["Dememorization"], 187 "Dememorization step"), 188 _Argument(["BatchNumber"], 189 "Number of MCMC batches"), 190 _Argument(["BatchLength"], 191 "Length of MCMC chains"), 192 _Argument(["HWtests"], 193 "Enumeration or MCMC"), 194 _Argument(["IsolBDstatistic"], 195 "IBD statistic (a or e)"), 196 _Argument(["MinimalDistance"], 197 "Minimal IBD distance"), 198 _Argument(["GeographicScale"], 199 "Log or Linear"), 200 ] 201 AbstractCommandline.__init__(self, cmd, **kwargs) 202 self.set_parameter("mode", "Mode=Batch")
203
204 - def set_menu(self, option_list):
205 """Sets the menu option. 206 207 Example set_menu([6,1]) = get all F statistics (menu 6.1) 208 """ 209 self.set_parameter("command", "MenuOptions="+ 210 ".".join(map(lambda x:str(x),option_list)))
211
212 - def set_input(self, fname):
213 """Sets the input file name. 214 """ 215 self.set_parameter("input", "InputFile="+fname)
216 217
218 -class GenePopController(object):
219 - def __init__(self, genepop_dir = None):
220 """Initializes the controller. 221 222 genepop_dir is the directory where GenePop is. 223 224 The binary should be called Genepop (capital G) 225 """ 226 self.controller = _GenePopCommandline(genepop_dir)
227
228 - def _remove_garbage(self, fname_out):
229 try: 230 if fname_out is not None: 231 os.remove(fname_out) 232 except OSError: 233 pass # safe 234 try: 235 os.remove("genepop.txt") 236 except OSError: 237 pass # safe 238 try: 239 os.remove("fichier.in") 240 except OSError: 241 pass # safe 242 try: 243 os.remove("cmdline.txt") 244 except OSError: 245 pass # safe
246
247 - def _get_opts(self, dememorization, batches, iterations, enum_test=None):
248 opts = {} 249 opts["Dememorization"]=dememorization 250 opts["BatchNumber"]=batches 251 opts["BatchLength"]=iterations 252 if enum_test is not None: 253 if enum_test is True: 254 opts["HWtests"]="Enumeration" 255 else: 256 opts["HWtests"]="MCMC" 257 return opts
258
259 - def _run_genepop(self, extensions, option, fname, opts={}):
260 for extension in extensions: 261 self._remove_garbage(fname + extension) 262 self.controller.set_menu(option) 263 self.controller.set_input(fname) 264 for opt in opts: 265 self.controller.set_parameter(opt, opt+"="+str(opts[opt])) 266 self.controller() # checks error level is zero 267 self._remove_garbage(None) 268 return
269
270 - def _test_pop_hz_both(self, fname, type, ext, enum_test = True, 271 dememorization = 10000, batches = 20, 272 iterations = 5000):
273 """Hardy-Weinberg test for heterozygote deficiency/excess. 274 275 Returns a population iterator containg 276 A dictionary[locus]=(P-val, SE, Fis-WC, Fis-RH, steps) 277 Some loci have a None if the info is not available 278 SE might be none (for enumerations) 279 """ 280 opts = self._get_opts(dememorization, batches, iterations, enum_test) 281 self._run_genepop([ext], [1, type], fname, opts) 282 f = open(fname + ext) 283 284 def hw_func(self): 285 return _hw_func(self.stream, False)
286 287 return _FileIterator(hw_func, f, fname + ext)
288
289 - def _test_global_hz_both(self, fname, type, ext, enum_test = True, 290 dememorization = 10000, batches = 20, 291 iterations = 5000):
292 """Global Hardy-Weinberg test for heterozygote deficiency/excess. 293 294 Returns a triple with: 295 A list per population containg 296 (pop_name, P-val, SE, switches) 297 Some pops have a None if the info is not available 298 SE might be none (for enumerations) 299 A list per loci containg 300 (locus_name, P-val, SE, switches) 301 Some loci have a None if the info is not available 302 SE might be none (for enumerations) 303 Overall results (P-val, SE, switches) 304 305 """ 306 opts = self._get_opts(dememorization, batches, iterations, enum_test) 307 self._run_genepop([ext], [1, type], fname, opts) 308 309 def hw_pop_func(self): 310 return _read_table(self.stream, [str, _gp_float, _gp_float, _gp_float])
311 312 f1 = open(fname + ext) 313 l = f1.readline() 314 while "by population" not in l: 315 l = f1.readline() 316 pop_p = _read_table(f1, [str, _gp_float, _gp_float, _gp_float]) 317 f2 = open(fname + ext) 318 l = f2.readline() 319 while "by locus" not in l: 320 l = f2.readline() 321 loc_p = _read_table(f2, [str, _gp_float, _gp_float, _gp_float]) 322 f = open(fname + ext) 323 l = f.readline() 324 while "all locus" not in l: 325 l = f.readline() 326 f.readline() 327 f.readline() 328 f.readline() 329 f.readline() 330 l = f.readline().rstrip() 331 p, se, switches = tuple(map(lambda x: _gp_float(x), 332 filter(lambda y: y != "",l.split(" ")))) 333 f.close() 334 return pop_p, loc_p, (p, se, switches) 335 336 #1.1
337 - def test_pop_hz_deficiency(self, fname, enum_test = True, 338 dememorization = 10000, batches = 20, 339 iterations = 5000):
340 """Hardy-Weinberg test for heterozygote deficiency. 341 342 Returns a population iterator containg 343 A dictionary[locus]=(P-val, SE, Fis-WC, Fis-RH, steps) 344 Some loci have a None if the info is not available 345 SE might be none (for enumerations) 346 """ 347 return self._test_pop_hz_both(fname, 1, ".D", enum_test, 348 dememorization, batches, iterations)
349 350 #1.2
351 - def test_pop_hz_excess(self, fname, enum_test = True, 352 dememorization = 10000, batches = 20, 353 iterations = 5000):
354 """Hardy-Weinberg test for heterozygote deficiency. 355 356 Returns a population iterator containg 357 A dictionary[locus]=(P-val, SE, Fis-WC, Fis-RH, steps) 358 Some loci have a None if the info is not available 359 SE might be none (for enumerations) 360 """ 361 return self._test_pop_hz_both(fname, 2, ".E", enum_test, 362 dememorization, batches, iterations)
363 364 #1.3 P file
365 - def test_pop_hz_prob(self, fname, ext, enum_test = False, 366 dememorization = 10000, batches = 20, 367 iterations = 5000):
368 """Hardy-Weinberg test based on probability. 369 370 Returns 2 iterators and a final tuple: 371 372 1. Returns a loci iterator containing 373 b. A dictionary[pop_pos]=(P-val, SE, Fis-WC, Fis-RH, steps) 374 Some pops have a None if the info is not available 375 SE might be none (for enumerations) 376 c. Result of Fisher's test (Chi2, deg freedom, prob) 377 2. Returns a population iterator containg 378 a. A dictionary[locus]=(P-val, SE, Fis-WC, Fis-RH, steps) 379 Some loci have a None if the info is not available 380 SE might be none (for enumerations) 381 b. Result of Fisher's test (Chi2, deg freedom, prob) 382 3. (Chi2, deg freedom, prob) 383 """ 384 opts = self._get_opts(dememorization, batches, iterations, enum_test) 385 self._run_genepop([ext], [1, 3], fname, opts) 386 387 def hw_prob_loci_func(self): 388 return _hw_func(self.stream, True, True)
389 390 def hw_prob_pop_func(self): 391 return _hw_func(self.stream, False, True) 392 393 shutil.copyfile(fname+".P", fname+".P2") 394 f1 = open(fname + ".P") 395 f2 = open(fname + ".P2") 396 return _FileIterator(hw_prob_loci_func, f1, fname + ".P"), _FileIterator(hw_prob_pop_func, f2, fname + ".P2") 397 398 #1.4
399 - def test_global_hz_deficiency(self, fname, enum_test = True, 400 dememorization = 10000, batches = 20, 401 iterations = 5000):
402 """Global Hardy-Weinberg test for heterozygote deficiency. 403 404 Returns a triple with: 405 An list per population containg 406 (pop_name, P-val, SE, switches) 407 Some pops have a None if the info is not available 408 SE might be none (for enumerations) 409 An list per loci containg 410 (locus_name, P-val, SE, switches) 411 Some loci have a None if the info is not available 412 SE might be none (for enumerations) 413 Overall results (P-val, SE, switches) 414 """ 415 return self._test_global_hz_both(fname, 4, ".DG", enum_test, 416 dememorization, batches, iterations)
417 418 #1.5
419 - def test_global_hz_excess(self, fname, enum_test = True, 420 dememorization = 10000, batches = 20, 421 iterations = 5000):
422 """Global Hardy-Weinberg test for heterozygote excess. 423 424 Returns a triple with: 425 An list per population containg 426 (pop_name, P-val, SE, switches) 427 Some pops have a None if the info is not available 428 SE might be none (for enumerations) 429 An list per loci containg 430 (locus_name, P-val, SE, switches) 431 Some loci have a None if the info is not available 432 SE might be none (for enumerations) 433 Overall results (P-val, SE, switches) 434 """ 435 return self._test_global_hz_both(fname, 5, ".EG", enum_test, 436 dememorization, batches, iterations)
437 438 #2.1
439 - def test_ld(self, fname, 440 dememorization = 10000, batches = 20, iterations = 5000):
441 opts = self._get_opts(dememorization, batches, iterations) 442 self._run_genepop([".DIS"], [2, 1], fname, opts) 443 444 def ld_pop_func(self): 445 current_pop = None 446 l = self.stream.readline().rstrip() 447 if l == "": 448 self.done = True 449 raise StopIteration 450 toks = filter(lambda x: x != "", l.split(" ")) 451 pop, locus1, locus2 = toks[0], toks[1], toks[2] 452 if not hasattr(self, "start_locus1"): 453 start_locus1, start_locus2 = locus1, locus2 454 current_pop = -1 455 if locus1 == start_locus1 and locus2 == start_locus2: 456 current_pop += 1 457 if toks[3] == "No": 458 return current_pop, pop, (locus1, locus2), None 459 p, se, switches = _gp_float(toks[3]), _gp_float(toks[4]), _gp_int(toks[5]) 460 return current_pop, pop, (locus1, locus2), (p, se, switches)
461 462 def ld_func(self): 463 l = self.stream.readline().rstrip() 464 if l == "": 465 self.done = True 466 raise StopIteration 467 toks = filter(lambda x: x != "", l.split(" ")) 468 locus1, locus2 = toks[0], toks[2] 469 try: 470 chi2, df, p = _gp_float(toks[3]), _gp_int(toks[4]), _gp_float(toks[5]) 471 except ValueError: 472 return (locus1, locus2), None 473 return (locus1, locus2), (chi2, df, p) 474 475 f1 = open(fname + ".DIS") 476 l = f1.readline() 477 while "----" not in l: 478 l = f1.readline() 479 shutil.copyfile(fname + ".DIS", fname + ".DI2") 480 f2 = open(fname + ".DI2") 481 l = f2.readline() 482 while "Locus pair" not in l: 483 l = f2.readline() 484 while "----" not in l: 485 l = f2.readline() 486 return _FileIterator(ld_pop_func, f1, fname+".DIS"), _FileIterator(ld_func, f2, fname + ".DI2") 487 488 #2.2
489 - def create_contingency_tables(self, fname):
490 raise NotImplementedError
491 492 #3.1 PR/GE files
493 - def test_genic_diff_all(self, fname, 494 dememorization = 10000, batches = 20, iterations = 5000):
495 raise NotImplementedError
496 497 #3.2 PR2/GE2 files
498 - def test_genic_diff_pair(self, fname, 499 dememorization = 10000, batches = 20, iterations = 5000):
500 raise NotImplementedError
501 502 #3.3 G files
503 - def test_genotypic_diff_all(self, fname, 504 dememorization = 10000, batches = 20, iterations = 5000):
505 raise NotImplementedError
506 507 #3.4 2G2 files
508 - def test_genotypic_diff_pair(self, fname, 509 dememorization = 10000, batches = 20, iterations = 5000):
510 raise NotImplementedError
511 512 #4
513 - def estimate_nm(self, fname):
514 self._run_genepop(["PRI"], [4], fname) 515 f = open(fname + ".PRI") 516 lines = f.readlines() # Small file, it is ok 517 f.close() 518 for line in lines: 519 m = re.search("Mean sample size: ([.0-9]+)", line) 520 if m is not None: 521 mean_sample_size = _gp_float(m.group(1)) 522 m = re.search("Mean frequency of private alleles p\(1\)= ([.0-9]+)", line) 523 if m is not None: 524 mean_priv_alleles = _gp_float(m.group(1)) 525 m = re.search("N=10: ([.0-9]+)", line) 526 if m is not None: 527 mig10 = _gp_float(m.group(1)) 528 m = re.search("N=25: ([.0-9]+)", line) 529 if m is not None: 530 mig25 = _gp_float(m.group(1)) 531 m = re.search("N=50: ([.0-9]+)", line) 532 if m is not None: 533 mig50 = _gp_float(m.group(1)) 534 m = re.search("for size= ([.0-9]+)", line) 535 if m is not None: 536 mig_corrected = _gp_float(m.group(1)) 537 os.remove(fname + ".PRI") 538 return mean_sample_size, mean_priv_alleles, mig10, mig25, mig50, mig_corrected
539 540 #5.1
541 - def calc_allele_genotype_freqs(self, fname):
542 """Calculates allele and genotype frequencies per locus and per sample. 543 544 Parameters: 545 fname - file name 546 547 Returns tuple with 2 elements: 548 Population iterator with 549 population name 550 Locus dictionary with key = locus name and content tuple as 551 Genotype List with 552 (Allele1, Allele2, observed, expected) 553 (expected homozygotes, observed hm, 554 expected heterozygotes, observed ht) 555 Allele frequency/Fis dictionary with allele as key and 556 (count, frequency, Fis Weir & Cockerham) 557 Totals as a pair 558 count 559 Fis Weir & Cockerham, 560 Fis Robertson & Hill 561 Locus iterator with 562 Locus name 563 allele list 564 Population list with a triple 565 population name 566 list of allele frequencies in the same order as allele list above 567 number of genes 568 569 Will create a file called fname.INF 570 """ 571 self._run_genepop(["INF"], [5,1], fname) 572 #First pass, general information 573 #num_loci = None 574 #num_pops = None 575 #f = open(fname + ".INF") 576 #l = f.readline() 577 #while (num_loci is None or num_pops is None) and l != '': 578 # m = re.search("Number of populations detected : ([0-9+])", l) 579 # if m is not None: 580 # num_pops = _gp_int(m.group(1)) 581 # m = re.search("Number of loci detected : ([0-9+])", l) 582 # if m is not None: 583 # num_loci = _gp_int(m.group(1)) 584 # l = f.readline() 585 #f.close() 586 587 def pop_parser(self): 588 if hasattr(self, "old_line"): 589 l = self.old_line 590 del self.old_line 591 else: 592 l = self.stream.readline() 593 loci_content = {} 594 while l != '': 595 l = l.rstrip() 596 if "Tables of allelic frequencies for each locus" in l: 597 return self.curr_pop, loci_content 598 match = re.match(".*Pop: (.+) Locus: (.+)", l) 599 if match is not None: 600 pop = match.group(1) 601 locus = match.group(2) 602 if not hasattr(self, "first_locus"): 603 self.first_locus = locus 604 if hasattr(self, "curr_pop"): 605 if self.first_locus == locus: 606 old_pop = self.curr_pop 607 #self.curr_pop = pop 608 self.old_line = l 609 del self.first_locus 610 del self.curr_pop 611 return old_pop, loci_content 612 self.curr_pop = pop 613 else: 614 l = self.stream.readline() 615 continue 616 geno_list = [] 617 l = self.stream.readline() 618 if "No data" in l: 619 continue 620 621 while "Genotypes Obs." not in l: 622 l = self.stream.readline() 623 624 while l != "\n": 625 m2 = re.match(" +([0-9]+) , ([0-9]+) *([0-9]+) *(.+)",l) 626 if m2 is not None: 627 geno_list.append((_gp_int(m2.group(1)), _gp_int(m2.group(2)), 628 _gp_int(m2.group(3)), _gp_float(m2.group(4)))) 629 else: 630 l = self.stream.readline() 631 continue 632 l = self.stream.readline() 633 634 while "Expected number of ho" not in l: 635 l = self.stream.readline() 636 expHo = _gp_float(l[38:]) 637 l = self.stream.readline() 638 obsHo = _gp_int(l[38:]) 639 l = self.stream.readline() 640 expHe = _gp_float(l[38:]) 641 l = self.stream.readline() 642 obsHe = _gp_int(l[38:]) 643 l = self.stream.readline() 644 645 while "Sample count" not in l: 646 l = self.stream.readline() 647 l = self.stream.readline() 648 freq_fis={} 649 overall_fis = None 650 while "----" not in l: 651 vals = filter(lambda x: x!='', 652 l.rstrip().split(' ')) 653 if vals[0]=="Tot": 654 overall_fis = _gp_int(vals[1]), \ 655 _gp_float(vals[2]), _gp_float(vals[3]) 656 else: 657 freq_fis[_gp_int(vals[0])] = _gp_int(vals[1]), \ 658 _gp_float(vals[2]), _gp_float(vals[3]) 659 l = self.stream.readline() 660 loci_content[locus] = geno_list, \ 661 (expHo, obsHo, expHe, obsHe), \ 662 freq_fis, overall_fis 663 self.done = True 664 raise StopIteration
665 666 def locus_parser(self): 667 l = self.stream.readline() 668 while l != "": 669 l = l.rstrip() 670 match = re.match(" Locus: (.+)", l) 671 if match is not None: 672 locus = match.group(1) 673 alleles, table = _read_allele_freq_table(self.stream) 674 return locus, alleles, table 675 l = self.stream.readline() 676 self.done = True 677 raise StopIteration 678 679 popf = open(fname + ".INF") 680 shutil.copyfile(fname + ".INF", fname + ".IN2") 681 locf = open(fname + ".IN2") 682 pop_iter = _FileIterator(pop_parser, popf, fname + ".INF") 683 locus_iter = _FileIterator(locus_parser, locf, fname + ".IN2") 684 return (pop_iter, locus_iter) 685
686 - def _calc_diversities_fis(self, fname, ext):
687 self._run_genepop([ext], [5,2], fname) 688 f = open(fname + ext) 689 l = f.readline() 690 while l != "": 691 l = l.rstrip() 692 if l.startswith("Statistics per sample over all loci with at least two individuals typed"): 693 avg_fis = _read_table(f, [str, _gp_float, _gp_float, _gp_float]) 694 avg_Qintra = _read_table(f, [str, _gp_float]) 695 l = f.readline() 696 f.close() 697 698 def fis_func(self): 699 l = self.stream.readline() 700 while l != "": 701 l = l.rstrip() 702 m = re.search("Locus: (.+)", l) 703 if m is not None: 704 locus = m.group(1) 705 self.stream.readline() 706 if "No complete" in self.stream.readline(): 707 return locus, None 708 self.stream.readline() 709 fis_table = _read_table(self.stream, [str, _gp_float, _gp_float, _gp_float]) 710 self.stream.readline() 711 avg_qinter, avg_fis = tuple(map(lambda x: _gp_float(x), 712 filter(lambda y:y != "", self.stream.readline().split(" ")))) 713 return locus, fis_table, avg_qinter, avg_fis 714 l = self.stream.readline() 715 self.done = True 716 raise StopIteration
717 718 dvf = open(fname + ext) 719 return _FileIterator(fis_func, dvf, fname + ext), avg_fis, avg_Qintra 720 721 #5.2
722 - def calc_diversities_fis_with_identity(self, fname):
723 return self._calc_diversities_fis(fname, ".DIV")
724 725 #5.3
726 - def calc_diversities_fis_with_size(self, fname):
727 raise NotImplementedError
728 729 #6.1 Less genotype frequencies
730 - def calc_fst_all(self, fname):
731 """Executes GenePop and gets Fst/Fis/Fit (all populations) 732 733 Parameters: 734 fname - file name 735 736 Returns: 737 (multiLocusFis, multiLocusFst, multiLocus Fit), 738 Iterator of tuples 739 (Locus name, Fis, Fst, Fit, Qintra, Qinter) 740 741 Will create a file called fname.FST . 742 743 This does not return the genotype frequencies. 744 """ 745 self._run_genepop([".FST"], [6,1], fname) 746 f = open(fname + ".FST") 747 l = f.readline() 748 while l != '': 749 if l.startswith(' All:'): 750 toks=filter(lambda x:x!="", l.rstrip().split(' ')) 751 try: 752 allFis = _gp_float(toks[1]) 753 except ValueError: 754 allFis = None 755 try: 756 allFst = _gp_float(toks[2]) 757 except ValueError: 758 allFst = None 759 try: 760 allFit = _gp_float(toks[3]) 761 except ValueError: 762 allFit = None 763 l = f.readline() 764 f.close() 765 f = open(fname + ".FST") 766 767 def proc(self): 768 if hasattr(self, "last_line"): 769 l = self.last_line 770 del self.last_line 771 else: 772 l = self.stream.readline() 773 locus = None 774 fis = None 775 fst = None 776 fit = None 777 qintra = None 778 qinter = None 779 while l != '': 780 l = l.rstrip() 781 if l.startswith(' Locus:'): 782 if locus is not None: 783 self.last_line = l 784 return locus, fis, fst, fit, qintra, qinter 785 else: 786 locus = l.split(':')[1].lstrip() 787 elif l.startswith('Fis^='): 788 fis = _gp_float(l.split(' ')[1]) 789 elif l.startswith('Fst^='): 790 fst = _gp_float(l.split(' ')[1]) 791 elif l.startswith('Fit^='): 792 fit = _gp_float(l.split(' ')[1]) 793 elif l.startswith('1-Qintra^='): 794 qintra = _gp_float(l.split(' ')[1]) 795 elif l.startswith('1-Qinter^='): 796 qinter = _gp_float(l.split(' ')[1]) 797 return locus, fis, fst, fit, qintra, qinter 798 l = self.stream.readline() 799 if locus is not None: 800 return locus, fis, fst, fit, qintra, qinter 801 self.stream.close() 802 self.done = True 803 raise StopIteration
804 return (allFis, allFst, allFit), _FileIterator(proc , f, fname + ".FST") 805 806 #6.2
807 - def calc_fst_pair(self, fname):
808 self._run_genepop([".ST2", ".MIG"], [6,2], fname) 809 f = open(fname + ".ST2") 810 l = f.readline() 811 while l != "": 812 l = l.rstrip() 813 if l.startswith("Estimates for all loci"): 814 avg_fst = _read_headed_triangle_matrix(f) 815 l = f.readline() 816 f.close() 817 818 def loci_func(self): 819 l = self.stream.readline() 820 while l != "": 821 l = l.rstrip() 822 m = re.search(" Locus: (.+)", l) 823 if m is not None: 824 locus = m.group(1) 825 matrix = _read_headed_triangle_matrix(self.stream) 826 return locus, matrix 827 l = self.stream.readline() 828 self.done = True 829 raise StopIteration
830 831 stf = open(fname + ".ST2") 832 os.remove(fname + ".MIG") 833 return _FileIterator(loci_func, stf, fname + ".ST2"), avg_fst 834 835 #6.3
836 - def calc_rho_all(self, fname):
837 raise NotImplementedError
838 839 #6.4
840 - def calc_rho_pair(self, fname):
841 raise NotImplementedError
842
843 - def _calc_ibd(self, fname, sub, stat="a", scale="Log", min_dist=0.00001):
844 """Calculates isolation by distance statistics 845 """ 846 self._run_genepop([".GRA", ".MIG", ".ISO"], [6,sub], 847 fname, opts = { 848 "MinimalDistance" : min_dist, 849 "GeographicScale" : scale, 850 "IsolBDstatistic" : stat, 851 }) 852 f = open(fname + ".ISO") 853 f.readline() 854 f.readline() 855 f.readline() 856 f.readline() 857 estimate = _read_triangle_matrix(f) 858 f.readline() 859 f.readline() 860 distance = _read_triangle_matrix(f) 861 f.readline() 862 match = re.match("a = (.+), b = (.+)", f.readline().rstrip()) 863 a = _gp_float(match.group(1)) 864 b = _gp_float(match.group(2)) 865 f.readline() 866 f.readline() 867 match = re.match(" b=(.+)", f.readline().rstrip()) 868 bb = _gp_float(match.group(1)) 869 match = re.match(".*\[(.+) ; (.+)\]", f.readline().rstrip()) 870 bblow = _gp_float(match.group(1)) 871 bbhigh = _gp_float(match.group(2)) 872 f.close() 873 os.remove(fname + ".MIG") 874 os.remove(fname + ".GRA") 875 os.remove(fname + ".ISO") 876 return estimate, distance, (a, b), (bb, bblow, bbhigh)
877 878 #6.5
879 - def calc_ibd_diplo(self, fname, stat="a", scale="Log", min_dist=0.00001):
880 """Calculates isolation by distance statistics for diploid data. 881 882 See _calc_ibd for parameter details. 883 Note that each pop can only have a single individual and 884 the individual name has to be the sample coordinates. 885 """ 886 return self._calc_ibd(fname, 5, stat, scale, min_dist)
887 888 #6.6
889 - def calc_ibd_haplo(self, fname, stat="a", scale="Log", min_dist=0.00001):
890 """Calculates isolation by distance statistics for haploid data. 891 892 See _calc_ibd for parameter details. 893 Note that each pop can only have a single individual and 894 the individual name has to be the sample coordinates. 895 """ 896 return self._calc_ibd(fname, 6, stat, scale, min_dist)
897