Package Bio :: Package SearchIO :: Package ExonerateIO :: Module exonerate_cigar
[hide private]
[frames] | no frames]

Source Code for Module Bio.SearchIO.ExonerateIO.exonerate_cigar

  1  # Copyright 2012 by Wibowo Arindrarto.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  """Bio.SearchIO parser for Exonerate cigar output format.""" 
  7   
  8  import re 
  9   
 10  from Bio._py3k import _bytes_to_string 
 11   
 12  from ._base import _BaseExonerateParser, _STRAND_MAP 
 13  from .exonerate_vulgar import ExonerateVulgarIndexer 
 14   
 15   
 16  __all__ = ('ExonerateCigarParser', 'ExonerateCigarIndexer') 
 17   
 18   
 19  # precompile regex 
 20  _RE_CIGAR = re.compile(r"""^cigar:\s+ 
 21          (\S+)\s+(\d+)\s+(\d+)\s+([\+-\.])\s+  # query: ID, start, end, strand 
 22          (\S+)\s+(\d+)\s+(\d+)\s+([\+-\.])\s+  # hit: ID, start, end, strand 
 23          (\d+)(\s+.*)$                         # score, vulgar components 
 24          """, re.VERBOSE) 
 25   
 26   
27 -class ExonerateCigarParser(_BaseExonerateParser):
28 """Parser for Exonerate cigar strings.""" 29 30 _ALN_MARK = 'cigar' 31
32 - def parse_alignment_block(self, header):
33 qresult = header['qresult'] 34 hit = header['hit'] 35 hsp = header['hsp'] 36 self.read_until(lambda line: line.startswith('cigar')) 37 cigars = re.search(_RE_CIGAR, self.line) 38 # if the file has c4 alignments 39 # check if cigar values match our previously parsed header values 40 if self.has_c4_alignment: 41 assert qresult['id'] == cigars.group(1) 42 assert hsp['query_start'] == cigars.group(2) 43 assert hsp['query_end'] == cigars.group(3) 44 assert hsp['query_strand'] == cigars.group(4) 45 assert hit['id'] == cigars.group(5) 46 assert hsp['hit_start'] == cigars.group(6) 47 assert hsp['hit_end'] == cigars.group(7) 48 assert hsp['hit_strand'] == cigars.group(8) 49 assert hsp['score'] == cigars.group(9) 50 else: 51 qresult['id'] = cigars.group(1) 52 hsp['query_start'] = cigars.group(2) 53 hsp['query_end'] = cigars.group(3) 54 hsp['query_strand'] = cigars.group(4) 55 hit['id'] = cigars.group(5) 56 hsp['hit_start'] = cigars.group(6) 57 hsp['hit_end'] = cigars.group(7) 58 hsp['hit_strand'] = cigars.group(8) 59 hsp['score'] = cigars.group(9) 60 61 # adjust strands 62 hsp['query_strand'] = _STRAND_MAP[hsp['query_strand']] 63 hsp['hit_strand'] = _STRAND_MAP[hsp['hit_strand']] 64 # cast coords into ints 65 qstart = int(hsp['query_start']) 66 qend = int(hsp['query_end']) 67 hstart = int(hsp['hit_start']) 68 hend = int(hsp['hit_end']) 69 # set coords (start <= end) 70 hsp['query_start'] = min(qstart, qend) 71 hsp['query_end'] = max(qstart, qend) 72 hsp['hit_start'] = min(hstart, hend) 73 hsp['hit_end'] = max(hstart, hend) 74 # cast score into int 75 hsp['score'] = int(hsp['score']) 76 # store cigar components 77 hsp['cigar_comp'] = cigars.group(10) 78 # HACK: since we can't really figure out exactly when a 79 # HSP starts or ends, we set the entire alignment as one HSP 80 hsp['query_ranges'] = [(hsp['query_start'], hsp['query_end'])] 81 hsp['hit_ranges'] = [(hsp['hit_start'], hsp['hit_end'])] 82 83 return {'qresult': qresult, 'hit': hit, 'hsp': hsp}
84 85
86 -class ExonerateCigarIndexer(ExonerateVulgarIndexer):
87 """Indexer class for exonerate cigar lines.""" 88 89 _parser = ExonerateCigarParser 90 _query_mark = b"cigar" 91
92 - def get_qresult_id(self, pos):
93 """Returns the query ID of the nearest cigar line.""" 94 handle = self._handle 95 handle.seek(pos) 96 # get line, check if it's a vulgar line, and get query ID 97 line = handle.readline() 98 assert line.startswith(self._query_mark), line 99 id = re.search(_RE_CIGAR, _bytes_to_string(line)) 100 return id.group(1)
101 102 103 # if not used as a module, run the doctest 104 if __name__ == "__main__": 105 from Bio._utils import run_doctest 106 run_doctest() 107