Package Bio :: Package Phylo :: Package PAML :: Module codeml
[hide private]
[frames] | no frames]

Source Code for Module Bio.Phylo.PAML.codeml

  1  # Copyright (C) 2011 by Brandon Invergo (b.invergo@gmail.com) 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license. Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  from __future__ import print_function 
  7   
  8  import os 
  9  import os.path 
 10  from ._paml import Paml, _relpath 
 11  from . import _parse_codeml 
 12   
 13   
14 -class CodemlError(EnvironmentError):
15 """CODEML has failed. Run with verbose = True to view CODEML's error 16 message"""
17 18
19 -class Codeml(Paml):
20 """This class implements an interface to CODEML, part of the PAML package.""" 21
22 - def __init__(self, alignment=None, tree=None, working_dir=None, 23 out_file=None):
24 """Initialize the codeml instance. 25 26 The user may optionally pass in strings specifying the locations 27 of the input alignment and tree files, the working directory and 28 the final output file. Other options found in the CODEML control 29 have typical settings by default to run site class models 0, 1 and 30 2 on a nucleotide alignment. 31 """ 32 Paml.__init__(self, alignment, working_dir, out_file) 33 if tree is not None: 34 if not os.path.exists(tree): 35 raise IOError("The specified tree file does not exist.") 36 self.tree = tree 37 self.ctl_file = "codeml.ctl" 38 self._options = {"noisy": None, 39 "verbose": None, 40 "runmode": None, 41 "seqtype": None, 42 "CodonFreq": None, 43 "ndata": None, 44 "clock": None, 45 "aaDist": None, 46 "aaRatefile": None, 47 "model": None, 48 "NSsites": None, 49 "icode": None, 50 "Mgene": None, 51 "fix_kappa": None, 52 "kappa": None, 53 "fix_omega": None, 54 "omega": None, 55 "fix_alpha": None, 56 "alpha": None, 57 "Malpha": None, 58 "ncatG": None, 59 "getSE": None, 60 "RateAncestor": None, 61 "Small_Diff": None, 62 "cleandata": None, 63 "fix_blength": None, 64 "method": None, 65 "rho": None, 66 "fix_rho": None}
67
68 - def write_ctl_file(self):
69 """Dynamically build a CODEML control file from the options. 70 71 The control file is written to the location specified by the 72 ctl_file property of the codeml class. 73 """ 74 # Make sure all paths are relative to the working directory 75 self._set_rel_paths() 76 if True: # Dummy statement to preserve indentation for diff 77 with open(self.ctl_file, 'w') as ctl_handle: 78 ctl_handle.write("seqfile = %s\n" % self._rel_alignment) 79 ctl_handle.write("outfile = %s\n" % self._rel_out_file) 80 ctl_handle.write("treefile = %s\n" % self._rel_tree) 81 for option in self._options.items(): 82 if option[1] is None: 83 # If an option has a value of None, there's no need 84 # to write it in the control file; it's normally just 85 # commented out. 86 continue 87 if option[0] == "NSsites": 88 # NSsites is stored in Python as a list but in the 89 # control file it is specified as a series of numbers 90 # separated by spaces. 91 NSsites = " ".join(str(site) for site in option[1]) 92 ctl_handle.write("%s = %s\n" % (option[0], NSsites)) 93 else: 94 ctl_handle.write("%s = %s\n" % (option[0], option[1]))
95
96 - def read_ctl_file(self, ctl_file):
97 """Parse a control file and load the options into the Codeml instance. 98 """ 99 temp_options = {} 100 if not os.path.isfile(ctl_file): 101 raise IOError("File not found: %r" % ctl_file) 102 else: 103 with open(ctl_file) as ctl_handle: 104 for line in ctl_handle: 105 line = line.strip() 106 uncommented = line.split("*", 1)[0] 107 if uncommented != "": 108 if "=" not in uncommented: 109 raise AttributeError( 110 "Malformed line in control file:\n%r" % line) 111 (option, value) = uncommented.split("=") 112 option = option.strip() 113 value = value.strip() 114 if option == "seqfile": 115 self.alignment = value 116 elif option == "treefile": 117 self.tree = value 118 elif option == "outfile": 119 self.out_file = value 120 elif option == "NSsites": 121 site_classes = value.split(" ") 122 for n in range(len(site_classes)): 123 try: 124 site_classes[n] = int(site_classes[n]) 125 except: 126 raise TypeError( 127 "Invalid site class: %s" % site_classes[n]) 128 temp_options["NSsites"] = site_classes 129 elif option not in self._options: 130 raise KeyError("Invalid option: %s" % option) 131 else: 132 if "." in value: 133 try: 134 converted_value = float(value) 135 except: 136 converted_value = value 137 else: 138 try: 139 converted_value = int(value) 140 except: 141 converted_value = value 142 temp_options[option] = converted_value 143 for option in self._options: 144 if option in temp_options: 145 self._options[option] = temp_options[option] 146 else: 147 self._options[option] = None
148
149 - def print_options(self):
150 """Print out all of the options and their current settings.""" 151 for option in self._options.items(): 152 if option[0] == "NSsites" and option[1] is not None: 153 # NSsites is stored in Python as a list but in the 154 # control file it is specified as a series of numbers 155 # separated by spaces. 156 NSsites = " ".join(str(site) for site in option[1]) 157 print("%s = %s" % (option[0], NSsites)) 158 else: 159 print("%s = %s" % (option[0], option[1]))
160
161 - def _set_rel_paths(self):
162 """Convert all file/directory locations to paths relative to the current working directory. 163 164 CODEML requires that all paths specified in the control file be 165 relative to the directory from which it is called rather than 166 absolute paths. 167 """ 168 Paml._set_rel_paths(self) 169 if self.tree is not None: 170 self._rel_tree = _relpath(self.tree, self.working_dir)
171
172 - def run(self, ctl_file=None, verbose=False, command="codeml", 173 parse=True):
174 """Run codeml using the current configuration and then parse the results. 175 176 Return a process signal so the user can determine if 177 the execution was successful (return code 0 is successful, -N 178 indicates a failure). The arguments may be passed as either 179 absolute or relative paths, despite the fact that CODEML 180 requires relative paths. 181 """ 182 if self.tree is None: 183 raise ValueError("Tree file not specified.") 184 if not os.path.exists(self.tree): 185 raise IOError("The specified tree file does not exist.") 186 Paml.run(self, ctl_file, verbose, command) 187 if parse: 188 results = read(self.out_file) 189 else: 190 results = None 191 return results
192 193
194 -def read(results_file):
195 """Parse a CODEML results file.""" 196 results = {} 197 if not os.path.exists(results_file): 198 raise IOError("Results file does not exist.") 199 with open(results_file) as handle: 200 lines = handle.readlines() 201 (results, multi_models, multi_genes) = _parse_codeml.parse_basics(lines, 202 results) 203 results = _parse_codeml.parse_nssites(lines, results, multi_models, 204 multi_genes) 205 results = _parse_codeml.parse_pairwise(lines, results) 206 results = _parse_codeml.parse_distances(lines, results) 207 if len(results) == 0: 208 raise ValueError("Invalid results file") 209 return results
210