Package Bio :: Package Wise :: Module dnal
[hide private]
[frames] | no frames]

Source Code for Module Bio.Wise.dnal

  1  #!/usr/bin/env python 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5  # 
  6  """Run and process output from the Wise2 package tool dnal. 
  7   
  8  Bio.Wise contains modules for running and processing the output of 
  9  some of the models in the Wise2 package by Ewan Birney available from: 
 10  ftp://ftp.ebi.ac.uk/pub/software/unix/wise2/ 
 11  http://www.ebi.ac.uk/Wise2/ 
 12   
 13  Bio.Wise.psw is for protein Smith-Waterman alignments 
 14  Bio.Wise.dnal is for Smith-Waterman DNA alignments 
 15  """ 
 16   
 17  from __future__ import print_function 
 18   
 19  import re 
 20   
 21  # Importing with leading underscore as not intended to be exposed 
 22  from Bio._py3k import getoutput as _getoutput 
 23  from Bio._py3k import zip 
 24  from Bio._py3k import map 
 25   
 26  from Bio import Wise 
 27   
 28   
 29  _SCORE_MATCH = 4 
 30  _SCORE_MISMATCH = -1 
 31  _SCORE_GAP_START = -5 
 32  _SCORE_GAP_EXTENSION = -1 
 33   
 34  _CMDLINE_DNAL = ["dnal", "-alb", "-nopretty"] 
 35   
 36   
37 -def _build_dnal_cmdline(match, mismatch, gap, extension):
38 res = _CMDLINE_DNAL[:] 39 res.extend(["-match", str(match)]) 40 res.extend(["-mis", str(mismatch)]) 41 res.extend(["-gap", str(-gap)]) # negative: convert score to penalty 42 res.extend(["-ext", str(-extension)]) # negative: convert score to penalty 43 44 return res
45 46 47 _CMDLINE_FGREP_COUNT = "fgrep -c '%s' %s" 48 49
50 -def _fgrep_count(pattern, file):
51 return int(_getoutput(_CMDLINE_FGREP_COUNT % (pattern, file)))
52 53 54 _re_alb_line2coords = re.compile(r"^\[([^:]+):[^\[]+\[([^:]+):") 55 56
57 -def _alb_line2coords(line):
58 return tuple([int(coord) + 1 # one-based -> zero-based 59 for coord 60 in _re_alb_line2coords.match(line).groups()])
61 62
63 -def _get_coords(filename):
64 alb = open(filename) 65 66 start_line = None 67 end_line = None 68 69 for line in alb: 70 if line.startswith("["): 71 if not start_line: 72 start_line = line # rstrip not needed 73 else: 74 end_line = line 75 76 if end_line is None: # sequence is too short 77 return [(0, 0), (0, 0)] 78 79 return list(zip(*map(_alb_line2coords, [start_line, end_line]))) # returns [(start0, end0), (start1, end1)]
80 81
82 -class Statistics(object):
83 """Calculate statistics from an ALB report.""" 84
85 - def __init__(self, filename, match, mismatch, gap, extension):
86 self.matches = _fgrep_count('"SEQUENCE" %s' % match, filename) 87 self.mismatches = _fgrep_count('"SEQUENCE" %s' % mismatch, filename) 88 self.gaps = _fgrep_count('"INSERT" %s' % gap, filename) 89 90 if gap == extension: 91 self.extensions = 0 92 else: 93 self.extensions = _fgrep_count('"INSERT" %s' % extension, filename) 94 95 self.score = (match * self.matches + 96 mismatch * self.mismatches + 97 gap * self.gaps + 98 extension * self.extensions) 99 100 if self.matches or self.mismatches or self.gaps or self.extensions: 101 self.coords = _get_coords(filename) 102 else: 103 self.coords = [(0, 0), (0, 0)]
104
105 - def identity_fraction(self):
106 return self.matches / (self.matches + self.mismatches)
107 108 header = "identity_fraction\tmatches\tmismatches\tgaps\textensions" 109
110 - def __str__(self):
111 return "\t".join(str(x) for x in (self.identity_fraction(), 112 self.matches, self.mismatches, 113 self.gaps, self.extensions))
114 115
116 -def align(pair, match=_SCORE_MATCH, mismatch=_SCORE_MISMATCH, gap=_SCORE_GAP_START, extension=_SCORE_GAP_EXTENSION, **keywds):
117 cmdline = _build_dnal_cmdline(match, mismatch, gap, extension) 118 temp_file = Wise.align(cmdline, pair, **keywds) 119 try: 120 return Statistics(temp_file.name, match, mismatch, gap, extension) 121 except AttributeError: 122 try: 123 keywds['dry_run'] 124 return None 125 except KeyError: 126 raise
127 128
129 -def main():
130 import sys 131 stats = align(sys.argv[1:3]) 132 print("\n".join("%s: %s" % (attr, getattr(stats, attr)) 133 for attr in ("matches", "mismatches", "gaps", "extensions"))) 134 print("identity_fraction: %s" % stats.identity_fraction()) 135 print("coords: %s" % stats.coords)
136 137
138 -def _test(*args, **keywds):
139 import doctest 140 import sys 141 doctest.testmod(sys.modules[__name__], *args, **keywds)
142 143 144 if __name__ == "__main__": 145 if __debug__: 146 _test() 147 main() 148