Package Bio :: Package Wise :: Module dnal
[hide private]
[frames] | no frames]

Source Code for Module Bio.Wise.dnal

  1  #!/usr/bin/env python 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5  # 
  6  """ 
  7  Bio.Wise contains modules for running and processing the output of 
  8  some of the models in the Wise2 package by Ewan Birney available from: 
  9  ftp://ftp.ebi.ac.uk/pub/software/unix/wise2/ 
 10  http://www.ebi.ac.uk/Wise2/ 
 11   
 12  Bio.Wise.psw is for protein Smith-Waterman alignments 
 13  Bio.Wise.dnal is for Smith-Waterman DNA alignments 
 14  """ 
 15   
 16  from __future__ import print_function 
 17   
 18  import re 
 19   
 20  # Importing with leading underscore as not intended to be exposed 
 21  from Bio._py3k import getoutput as _getoutput 
 22  from Bio._py3k import zip 
 23  from Bio._py3k import map 
 24   
 25  from Bio import Wise 
 26   
 27   
 28  _SCORE_MATCH = 4 
 29  _SCORE_MISMATCH = -1 
 30  _SCORE_GAP_START = -5 
 31  _SCORE_GAP_EXTENSION = -1 
 32   
 33  _CMDLINE_DNAL = ["dnal", "-alb", "-nopretty"] 
 34   
 35   
36 -def _build_dnal_cmdline(match, mismatch, gap, extension):
37 res = _CMDLINE_DNAL[:] 38 res.extend(["-match", str(match)]) 39 res.extend(["-mis", str(mismatch)]) 40 res.extend(["-gap", str(-gap)]) # negative: convert score to penalty 41 res.extend(["-ext", str(-extension)]) # negative: convert score to penalty 42 43 return res
44 45 _CMDLINE_FGREP_COUNT = "fgrep -c '%s' %s" 46 47
48 -def _fgrep_count(pattern, file):
49 return int(_getoutput(_CMDLINE_FGREP_COUNT % (pattern, file)))
50 51 _re_alb_line2coords = re.compile(r"^\[([^:]+):[^\[]+\[([^:]+):") 52 53
54 -def _alb_line2coords(line):
55 return tuple([int(coord) + 1 # one-based -> zero-based 56 for coord 57 in _re_alb_line2coords.match(line).groups()])
58 59
60 -def _get_coords(filename):
61 alb = file(filename) 62 63 start_line = None 64 end_line = None 65 66 for line in alb: 67 if line.startswith("["): 68 if not start_line: 69 start_line = line # rstrip not needed 70 else: 71 end_line = line 72 73 if end_line is None: # sequence is too short 74 return [(0, 0), (0, 0)] 75 76 return list(zip(*map(_alb_line2coords, [start_line, end_line]))) # returns [(start0, end0), (start1, end1)]
77 78
79 -class Statistics(object):
80 """ 81 Calculate statistics from an ALB report 82 """
83 - def __init__(self, filename, match, mismatch, gap, extension):
84 self.matches = _fgrep_count('"SEQUENCE" %s' % match, filename) 85 self.mismatches = _fgrep_count('"SEQUENCE" %s' % mismatch, filename) 86 self.gaps = _fgrep_count('"INSERT" %s' % gap, filename) 87 88 if gap == extension: 89 self.extensions = 0 90 else: 91 self.extensions = _fgrep_count('"INSERT" %s' % extension, filename) 92 93 self.score = (match * self.matches + 94 mismatch * self.mismatches + 95 gap * self.gaps + 96 extension * self.extensions) 97 98 if self.matches or self.mismatches or self.gaps or self.extensions: 99 self.coords = _get_coords(filename) 100 else: 101 self.coords = [(0, 0), (0, 0)]
102
103 - def identity_fraction(self):
104 return self.matches / (self.matches + self.mismatches)
105 106 header = "identity_fraction\tmatches\tmismatches\tgaps\textensions" 107
108 - def __str__(self):
109 return "\t".join(str(x) for x in (self.identity_fraction(), 110 self.matches, self.mismatches, 111 self.gaps, self.extensions))
112 113
114 -def align(pair, match=_SCORE_MATCH, mismatch=_SCORE_MISMATCH, gap=_SCORE_GAP_START, extension=_SCORE_GAP_EXTENSION, **keywds):
115 cmdline = _build_dnal_cmdline(match, mismatch, gap, extension) 116 temp_file = Wise.align(cmdline, pair, **keywds) 117 try: 118 return Statistics(temp_file.name, match, mismatch, gap, extension) 119 except AttributeError: 120 try: 121 keywds['dry_run'] 122 return None 123 except KeyError: 124 raise
125 126
127 -def main():
128 import sys 129 stats = align(sys.argv[1:3]) 130 print("\n".join("%s: %s" % (attr, getattr(stats, attr)) 131 for attr in ("matches", "mismatches", "gaps", "extensions"))) 132 print("identity_fraction: %s" % stats.identity_fraction()) 133 print("coords: %s" % stats.coords)
134 135
136 -def _test(*args, **keywds):
137 import doctest 138 import sys 139 doctest.testmod(sys.modules[__name__], *args, **keywds)
140 141 if __name__ == "__main__": 142 if __debug__: 143 _test() 144 main() 145