1
2
3
4
5
6 """Bio.SearchIO parser for Exonerate cigar output format."""
7
8 import re
9
10 from Bio._py3k import _as_bytes, _bytes_to_string
11
12 from _base import _BaseExonerateParser, _STRAND_MAP
13 from exonerate_vulgar import ExonerateVulgarIndexer
14
15
16 __all__ = ['ExonerateCigarParser', 'ExonerateCigarIndexer']
17
18
19
20 _RE_CIGAR = re.compile(r"""^cigar:\s+
21 (\S+)\s+(\d+)\s+(\d+)\s+([\+-\.])\s+ # query: ID, start, end, strand
22 (\S+)\s+(\d+)\s+(\d+)\s+([\+-\.])\s+ # hit: ID, start, end, strand
23 (\d+)(\s+.*)$ # score, vulgar components
24 """, re.VERBOSE)
25
26
28
29 """Parser for Exonerate cigar strings."""
30
31 _ALN_MARK = 'cigar'
32
34 qresult = header['qresult']
35 hit = header['hit']
36 hsp = header['hsp']
37 self.read_until(lambda line: line.startswith('cigar'))
38 cigars = re.search(_RE_CIGAR, self.line)
39
40
41 if self.has_c4_alignment:
42 assert qresult['id'] == cigars.group(1)
43 assert hsp['query_start'] == cigars.group(2)
44 assert hsp['query_end'] == cigars.group(3)
45 assert hsp['query_strand'] == cigars.group(4)
46 assert hit['id'] == cigars.group(5)
47 assert hsp['hit_start'] == cigars.group(6)
48 assert hsp['hit_end'] == cigars.group(7)
49 assert hsp['hit_strand'] == cigars.group(8)
50 assert hsp['score'] == cigars.group(9)
51 else:
52 qresult['id'] = cigars.group(1)
53 hsp['query_start'] = cigars.group(2)
54 hsp['query_end'] = cigars.group(3)
55 hsp['query_strand'] = cigars.group(4)
56 hit['id'] = cigars.group(5)
57 hsp['hit_start'] = cigars.group(6)
58 hsp['hit_end'] = cigars.group(7)
59 hsp['hit_strand'] = cigars.group(8)
60 hsp['score'] = cigars.group(9)
61
62
63 hsp['query_strand'] = _STRAND_MAP[hsp['query_strand']]
64 hsp['hit_strand'] = _STRAND_MAP[hsp['hit_strand']]
65
66 qstart = int(hsp['query_start'])
67 qend = int(hsp['query_end'])
68 hstart = int(hsp['hit_start'])
69 hend = int(hsp['hit_end'])
70
71 hsp['query_start'] = min(qstart, qend)
72 hsp['query_end'] = max(qstart, qend)
73 hsp['hit_start'] = min(hstart, hend)
74 hsp['hit_end'] = max(hstart, hend)
75
76 hsp['score'] = int(hsp['score'])
77
78 hsp['cigar_comp'] = cigars.group(10)
79
80
81 hsp['query_ranges'] = [(hsp['query_start'], hsp['query_end'])]
82 hsp['hit_ranges'] = [(hsp['hit_start'], hsp['hit_end'])]
83
84 return {'qresult': qresult, 'hit': hit, 'hsp': hsp}
85
86
103
104
105
106 if __name__ == "__main__":
107 from Bio._utils import run_doctest
108 run_doctest()
109