Package Bio :: Package Phylo :: Package PAML :: Module codeml
[hide private]
[frames] | no frames]

Source Code for Module Bio.Phylo.PAML.codeml

  1  # Copyright (C) 2011 by Brandon Invergo (b.invergo@gmail.com) 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license. Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  from __future__ import print_function 
  7   
  8  import os.path 
  9  from ._paml import Paml 
 10  from . import _parse_codeml 
 11   
 12   
13 -class CodemlError(EnvironmentError):
14 """CODEML failed. Run with verbose=True to view CODEML's error message."""
15 16
17 -class Codeml(Paml):
18 """An interface to CODEML, part of the PAML package.""" 19
20 - def __init__(self, alignment=None, tree=None, working_dir=None, 21 out_file=None):
22 """Initialize the codeml instance. 23 24 The user may optionally pass in strings specifying the locations 25 of the input alignment and tree files, the working directory and 26 the final output file. Other options found in the CODEML control 27 have typical settings by default to run site class models 0, 1 and 28 2 on a nucleotide alignment. 29 """ 30 Paml.__init__(self, alignment, working_dir, out_file) 31 if tree is not None: 32 if not os.path.exists(tree): 33 raise IOError("The specified tree file does not exist.") 34 self.tree = tree 35 self.ctl_file = "codeml.ctl" 36 self._options = {"noisy": None, 37 "verbose": None, 38 "runmode": None, 39 "seqtype": None, 40 "CodonFreq": None, 41 "ndata": None, 42 "clock": None, 43 "aaDist": None, 44 "aaRatefile": None, 45 "model": None, 46 "NSsites": None, 47 "icode": None, 48 "Mgene": None, 49 "fix_kappa": None, 50 "kappa": None, 51 "fix_omega": None, 52 "omega": None, 53 "fix_alpha": None, 54 "alpha": None, 55 "Malpha": None, 56 "ncatG": None, 57 "getSE": None, 58 "RateAncestor": None, 59 "Small_Diff": None, 60 "cleandata": None, 61 "fix_blength": None, 62 "method": None, 63 "rho": None, 64 "fix_rho": None}
65
66 - def write_ctl_file(self):
67 """Dynamically build a CODEML control file from the options. 68 69 The control file is written to the location specified by the 70 ctl_file property of the codeml class. 71 """ 72 # Make sure all paths are relative to the working directory 73 self._set_rel_paths() 74 if True: # Dummy statement to preserve indentation for diff 75 with open(self.ctl_file, 'w') as ctl_handle: 76 ctl_handle.write("seqfile = %s\n" % self._rel_alignment) 77 ctl_handle.write("outfile = %s\n" % self._rel_out_file) 78 ctl_handle.write("treefile = %s\n" % self._rel_tree) 79 for option in self._options.items(): 80 if option[1] is None: 81 # If an option has a value of None, there's no need 82 # to write it in the control file; it's normally just 83 # commented out. 84 continue 85 if option[0] == "NSsites": 86 # NSsites is stored in Python as a list but in the 87 # control file it is specified as a series of numbers 88 # separated by spaces. 89 NSsites = " ".join(str(site) for site in option[1]) 90 ctl_handle.write("%s = %s\n" % (option[0], NSsites)) 91 else: 92 ctl_handle.write("%s = %s\n" % (option[0], option[1]))
93
94 - def read_ctl_file(self, ctl_file):
95 """Parse a control file and load the options into the Codeml instance.""" 96 temp_options = {} 97 if not os.path.isfile(ctl_file): 98 raise IOError("File not found: %r" % ctl_file) 99 else: 100 with open(ctl_file) as ctl_handle: 101 for line in ctl_handle: 102 line = line.strip() 103 uncommented = line.split("*", 1)[0] 104 if uncommented != "": 105 if "=" not in uncommented: 106 raise AttributeError( 107 "Malformed line in control file:\n%r" % line) 108 (option, value) = uncommented.split("=", 1) 109 option = option.strip() 110 value = value.strip() 111 if option == "seqfile": 112 self.alignment = value 113 elif option == "treefile": 114 self.tree = value 115 elif option == "outfile": 116 self.out_file = value 117 elif option == "NSsites": 118 site_classes = value.split(" ") 119 for n in range(len(site_classes)): 120 try: 121 site_classes[n] = int(site_classes[n]) 122 except ValueError: 123 raise TypeError( 124 "Invalid site class: %s" % site_classes[n]) 125 temp_options["NSsites"] = site_classes 126 elif option not in self._options: 127 raise KeyError("Invalid option: %s" % option) 128 else: 129 if "." in value: 130 try: 131 converted_value = float(value) 132 except ValueError: 133 converted_value = value 134 else: 135 try: 136 converted_value = int(value) 137 except ValueError: 138 converted_value = value 139 temp_options[option] = converted_value 140 for option in self._options: 141 if option in temp_options: 142 self._options[option] = temp_options[option] 143 else: 144 self._options[option] = None
145
146 - def print_options(self):
147 """Print out all of the options and their current settings.""" 148 for option in self._options.items(): 149 if option[0] == "NSsites" and option[1] is not None: 150 # NSsites is stored in Python as a list but in the 151 # control file it is specified as a series of numbers 152 # separated by spaces. 153 NSsites = " ".join(str(site) for site in option[1]) 154 print("%s = %s" % (option[0], NSsites)) 155 else: 156 print("%s = %s" % (option[0], option[1]))
157
158 - def _set_rel_paths(self):
159 """Convert all file/directory locations to paths relative to the current 160 working directory. 161 162 CODEML requires that all paths specified in the control file be 163 relative to the directory from which it is called rather than 164 absolute paths. 165 """ 166 Paml._set_rel_paths(self) 167 if self.tree is not None: 168 self._rel_tree = os.path.relpath(self.tree, self.working_dir)
169
170 - def run(self, ctl_file=None, verbose=False, command="codeml", parse=True):
171 """Run codeml using the current configuration and then parse the results. 172 173 Return a process signal so the user can determine if 174 the execution was successful (return code 0 is successful, -N 175 indicates a failure). The arguments may be passed as either 176 absolute or relative paths, despite the fact that CODEML 177 requires relative paths. 178 """ 179 if self.tree is None: 180 raise ValueError("Tree file not specified.") 181 if not os.path.exists(self.tree): 182 raise IOError("The specified tree file does not exist.") 183 Paml.run(self, ctl_file, verbose, command) 184 if parse: 185 results = read(self.out_file) 186 else: 187 results = None 188 return results
189 190
191 -def read(results_file):
192 """Parse a CODEML results file.""" 193 results = {} 194 if not os.path.exists(results_file): 195 raise IOError("Results file does not exist.") 196 with open(results_file) as handle: 197 lines = handle.readlines() 198 (results, multi_models, multi_genes) = _parse_codeml.parse_basics(lines, 199 results) 200 results = _parse_codeml.parse_nssites(lines, results, multi_models, 201 multi_genes) 202 results = _parse_codeml.parse_pairwise(lines, results) 203 results = _parse_codeml.parse_distances(lines, results) 204 if len(results) == 0: 205 raise ValueError("Invalid results file") 206 return results
207