Package Bio :: Package SearchIO :: Package BlastIO :: Module blast_text
[hide private]
[frames] | no frames]

Source Code for Module Bio.SearchIO.BlastIO.blast_text

  1  # Copyright 2012 by Wibowo Arindrarto.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  """Bio.SearchIO parser for BLAST+ plain text output formats. 
  7   
  8  At the moment this is a wrapper around Biopython's NCBIStandalone text 
  9  parser (which is now deprecated). 
 10   
 11  """ 
 12   
 13  from Bio.Alphabet import generic_dna, generic_protein 
 14  from Bio.SearchIO._model import QueryResult, Hit, HSP, HSPFragment 
 15   
 16  import warnings 
 17  from Bio import BiopythonDeprecationWarning 
 18  with warnings.catch_warnings(): 
 19      warnings.simplefilter('ignore', BiopythonDeprecationWarning) 
 20      from Bio.Blast import NCBIStandalone 
 21   
 22   
 23  __all__ = ['BlastTextParser'] 
 24   
 25  __docformat__ = "restructuredtext en" 
 26   
 27   
28 -class BlastTextParser(object):
29 30 """Parser for the BLAST text format.""" 31
32 - def __init__(self, handle):
33 self.handle = handle 34 blast_parser = NCBIStandalone.BlastParser() 35 self.blast_iter = NCBIStandalone.Iterator(handle, blast_parser)
36
37 - def __iter__(self):
38 for rec in self.blast_iter: 39 # set attributes to SearchIO's 40 # get id and desc 41 if rec.query.startswith('>'): 42 rec.query = rec.query[1:] 43 try: 44 qid, qdesc = rec.query.split(' ', 1) 45 except ValueError: 46 qid, qdesc = rec.query, '' 47 qdesc = qdesc.replace('\n', '').replace('\r', '') 48 49 qresult = QueryResult(id=qid) 50 qresult.program = rec.application.lower() 51 qresult.target = rec.database 52 qresult.seq_len = rec.query_letters 53 qresult.version = rec.version 54 55 # determine alphabet based on program 56 if qresult.program == 'blastn': 57 alphabet = generic_dna 58 elif qresult.program in ['blastp', 'blastx', 'tblastn', 'tblastx']: 59 alphabet = generic_protein 60 61 # iterate over the 'alignments' (hits) and the hit table 62 for idx, aln in enumerate(rec.alignments): 63 # get id and desc 64 if aln.title.startswith('> '): 65 aln.title = aln.title[2:] 66 elif aln.title.startswith('>'): 67 aln.title = aln.title[1:] 68 try: 69 hid, hdesc = aln.title.split(' ', 1) 70 except ValueError: 71 hid, hdesc = aln.title, '' 72 hdesc = hdesc.replace('\n', '').replace('\r', '') 73 74 # iterate over the hsps and group them in a list 75 hsp_list = [] 76 for bhsp in aln.hsps: 77 frag = HSPFragment(hid, qid) 78 frag.alphabet = alphabet 79 # set alignment length 80 frag.aln_span = bhsp.identities[1] 81 # set frames 82 try: 83 frag.query_frame = int(bhsp.frame[0]) 84 except IndexError: 85 if qresult.program in ('blastp', 'tblastn'): 86 frag.query_frame = 0 87 else: 88 frag.query_frame = 1 89 try: 90 frag.hit_frame = int(bhsp.frame[1]) 91 except IndexError: 92 if qresult.program in ('blastp', 'tblastn'): 93 frag.hit_frame = 0 94 else: 95 frag.hit_frame = 1 96 # set query coordinates 97 frag.query_start = min(bhsp.query_start, 98 bhsp.query_end) - 1 99 frag.query_end = max(bhsp.query_start, bhsp.query_end) 100 # set hit coordinates 101 frag.hit_start = min(bhsp.sbjct_start, 102 bhsp.sbjct_end) - 1 103 frag.hit_end = max(bhsp.sbjct_start, bhsp.sbjct_end) 104 # set query, hit sequences and its annotation 105 qseq = '' 106 hseq = '' 107 midline = '' 108 for seqtrio in zip(bhsp.query, bhsp.sbjct, bhsp.match): 109 qchar, hchar, mchar = seqtrio 110 if qchar == ' ' or hchar == ' ': 111 assert all(' ' == x for x in seqtrio) 112 else: 113 qseq += qchar 114 hseq += hchar 115 midline += mchar 116 frag.query, frag.hit = qseq, hseq 117 frag.aln_annotation['similarity'] = midline 118 119 # create HSP object with the fragment 120 hsp = HSP([frag]) 121 hsp.evalue = bhsp.expect 122 hsp.bitscore = bhsp.bits 123 hsp.bitscore_raw = bhsp.score 124 # set gap 125 try: 126 hsp.gap_num = bhsp.gaps[0] 127 except IndexError: 128 hsp.gap_num = 0 129 # set identity 130 hsp.ident_num = bhsp.identities[0] 131 hsp.pos_num = bhsp.positives[0] 132 if hsp.pos_num is None: 133 hsp.pos_num = hsp[0].aln_span 134 135 hsp_list.append(hsp) 136 137 hit = Hit(hsp_list) 138 hit.seq_len = aln.length 139 hit.description = hdesc 140 qresult.append(hit) 141 142 qresult.description = qdesc 143 yield qresult
144 145 146 # if not used as a module, run the doctest 147 if __name__ == "__main__": 148 from Bio._utils import run_doctest 149 run_doctest() 150