Package Bio :: Package SearchIO :: Package BlastIO :: Module blast_text
[hide private]
[frames] | no frames]

Source Code for Module Bio.SearchIO.BlastIO.blast_text

  1  # Copyright 2012 by Wibowo Arindrarto.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  """Bio.SearchIO parser for BLAST+ plain text output formats. 
  7   
  8  At the moment this is a wrapper around Biopython's NCBIStandalone text 
  9  parser (which is now deprecated). 
 10   
 11  """ 
 12   
 13  from Bio.Alphabet import generic_dna, generic_protein 
 14  from Bio.SearchIO._model import QueryResult, Hit, HSP, HSPFragment 
 15   
 16  import warnings 
 17  from Bio import BiopythonDeprecationWarning 
 18  with warnings.catch_warnings(): 
 19      warnings.simplefilter('ignore', BiopythonDeprecationWarning) 
 20      from Bio.Blast import NCBIStandalone 
 21   
 22   
 23  __all__ = ['BlastTextParser'] 
 24   
 25   
26 -class BlastTextParser(object):
27 28 """Parser for the BLAST text format.""" 29
30 - def __init__(self, handle):
31 self.handle = handle 32 blast_parser = NCBIStandalone.BlastParser() 33 self.blast_iter = NCBIStandalone.Iterator(handle, blast_parser)
34
35 - def __iter__(self):
36 for rec in self.blast_iter: 37 # set attributes to SearchIO's 38 # get id and desc 39 if rec.query.startswith('>'): 40 rec.query = rec.query[1:] 41 try: 42 qid, qdesc = rec.query.split(' ', 1) 43 except ValueError: 44 qid, qdesc = rec.query, '' 45 qdesc = qdesc.replace('\n', '').replace('\r', '') 46 47 qresult = QueryResult(id=qid) 48 qresult.program = rec.application.lower() 49 qresult.target = rec.database 50 qresult.seq_len = rec.query_letters 51 qresult.version = rec.version 52 53 # determine alphabet based on program 54 if qresult.program == 'blastn': 55 alphabet = generic_dna 56 elif qresult.program in ['blastp', 'blastx', 'tblastn', 'tblastx']: 57 alphabet = generic_protein 58 59 # iterate over the 'alignments' (hits) and the hit table 60 for idx, aln in enumerate(rec.alignments): 61 # get id and desc 62 if aln.title.startswith('> '): 63 aln.title = aln.title[2:] 64 elif aln.title.startswith('>'): 65 aln.title = aln.title[1:] 66 try: 67 hid, hdesc = aln.title.split(' ', 1) 68 except ValueError: 69 hid, hdesc = aln.title, '' 70 hdesc = hdesc.replace('\n', '').replace('\r', '') 71 72 # iterate over the hsps and group them in a list 73 hsp_list = [] 74 for bhsp in aln.hsps: 75 frag = HSPFragment(hid, qid) 76 frag.alphabet = alphabet 77 # set alignment length 78 frag.aln_span = bhsp.identities[1] 79 # set frames 80 try: 81 frag.query_frame = int(bhsp.frame[0]) 82 except IndexError: 83 if qresult.program in ('blastp', 'tblastn'): 84 frag.query_frame = 0 85 else: 86 frag.query_frame = 1 87 try: 88 frag.hit_frame = int(bhsp.frame[1]) 89 except IndexError: 90 if qresult.program in ('blastp', 'tblastn'): 91 frag.hit_frame = 0 92 else: 93 frag.hit_frame = 1 94 # set query coordinates 95 frag.query_start = min(bhsp.query_start, 96 bhsp.query_end) - 1 97 frag.query_end = max(bhsp.query_start, bhsp.query_end) 98 # set hit coordinates 99 frag.hit_start = min(bhsp.sbjct_start, 100 bhsp.sbjct_end) - 1 101 frag.hit_end = max(bhsp.sbjct_start, bhsp.sbjct_end) 102 # set query, hit sequences and its annotation 103 qseq = '' 104 hseq = '' 105 midline = '' 106 for seqtrio in zip(bhsp.query, bhsp.sbjct, bhsp.match): 107 qchar, hchar, mchar = seqtrio 108 if qchar == ' ' or hchar == ' ': 109 assert all(' ' == x for x in seqtrio) 110 else: 111 qseq += qchar 112 hseq += hchar 113 midline += mchar 114 frag.query, frag.hit = qseq, hseq 115 frag.aln_annotation['homology'] = midline 116 117 # create HSP object with the fragment 118 hsp = HSP([frag]) 119 hsp.evalue = bhsp.expect 120 hsp.bitscore = bhsp.bits 121 hsp.bitscore_raw = bhsp.score 122 # set gap 123 try: 124 hsp.gap_num = bhsp.gaps[0] 125 except IndexError: 126 hsp.gap_num = 0 127 # set identity 128 hsp.ident_num = bhsp.identities[0] 129 hsp.pos_num = bhsp.positives[0] 130 if hsp.pos_num is None: 131 hsp.pos_num = hsp[0].aln_span 132 133 hsp_list.append(hsp) 134 135 hit = Hit(hsp_list) 136 hit.seq_len = aln.length 137 hit.description = hdesc 138 qresult.append(hit) 139 140 qresult.description = qdesc 141 yield qresult
142 143 144 # if not used as a module, run the doctest 145 if __name__ == "__main__": 146 from Bio._utils import run_doctest 147 run_doctest() 148