Package Bio :: Package ExPASy :: Module ScanProsite
[hide private]
[frames] | no frames]

Source Code for Module Bio.ExPASy.ScanProsite

  1  # Copyright 2009 by Michiel de Hoon. All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license. Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  # Importing these functions with leading underscore as not intended for reuse 
  7  from Bio._py3k import urlopen as _urlopen 
  8  from Bio._py3k import urlencode as _urlencode 
  9   
 10  from xml.sax import handler 
 11  from xml.sax.expatreader import ExpatParser 
 12   
 13   
14 -class Record(list):
15 """Represents search results returned by ScanProsite. 16 17 This record is a list containing the search results returned by 18 ScanProsite. The record also contains the data members n_match, 19 n_seq, capped, and warning. 20 """ 21
22 - def __init__(self):
23 self.n_match = None 24 self.n_seq = None 25 self.capped = None 26 self.warning = None
27 28
29 -def scan(seq="", mirror='http://www.expasy.org', output='xml', **keywords):
30 """Execute a ScanProsite search. 31 32 Arguments: 33 - mirror: The ScanProsite mirror to be used 34 (default: http://www.expasy.org). 35 - seq: The query sequence, or UniProtKB (Swiss-Prot, 36 TrEMBL) accession 37 - output: Format of the search results 38 (default: xml) 39 40 Further search parameters can be passed as keywords; see the 41 documentation for programmatic access to ScanProsite at 42 http://www.expasy.org/tools/scanprosite/ScanPrositeREST.html 43 for a description of such parameters. 44 45 This function returns a handle to the search results returned by 46 ScanProsite. Search results in the XML format can be parsed into a 47 Python object, by using the Bio.ExPASy.ScanProsite.read function. 48 """ 49 parameters = {'seq': seq, 50 'output': output} 51 for key, value in keywords.items(): 52 if value is not None: 53 parameters[key] = value 54 command = _urlencode(parameters) 55 url = "%s/cgi-bin/prosite/PSScan.cgi?%s" % (mirror, command) 56 handle = _urlopen(url) 57 return handle
58 59
60 -def read(handle):
61 """Parse search results returned by ScanProsite into a Python object""" 62 content_handler = ContentHandler() 63 saxparser = Parser() 64 saxparser.setContentHandler(content_handler) 65 saxparser.parse(handle) 66 record = content_handler.record 67 return record
68 69 # The functions below are considered private 70 71
72 -class Parser(ExpatParser):
73
74 - def __init__(self):
75 ExpatParser.__init__(self) 76 self.firsttime = True
77
78 - def feed(self, data, isFinal=0):
79 # Error messages returned by the ScanProsite server are formatted as 80 # as plain text instead of an XML document. To catch such error 81 # messages, we override the feed method of the Expat parser. 82 # The error message is (hopefully) contained in the data that was just 83 # fed to the parser. 84 if self.firsttime: 85 if data[:5].decode('utf-8') != "<?xml": 86 raise ValueError(data) 87 self.firsttime = False 88 return ExpatParser.feed(self, data, isFinal)
89 90
91 -class ContentHandler(handler.ContentHandler):
92 integers = ("start", "stop") 93 strings = ("sequence_ac", 94 "sequence_id", 95 "sequence_db", 96 "signature_ac", 97 "level", 98 "level_tag") 99
100 - def __init__(self):
101 self.element = []
102
103 - def startElement(self, name, attrs):
104 self.element.append(name) 105 self.content = "" 106 if self.element == ["matchset"]: 107 self.record = Record() 108 self.record.n_match = int(attrs["n_match"]) 109 self.record.n_seq = int(attrs["n_seq"]) 110 elif self.element == ["matchset", "match"]: 111 match = {} 112 self.record.append(match)
113
114 - def endElement(self, name):
115 assert name == self.element.pop() 116 name = str(name) 117 if self.element == ["matchset", "match"]: 118 match = self.record[-1] 119 if name in ContentHandler.integers: 120 match[name] = int(self.content) 121 elif name in ContentHandler.strings: 122 match[name] = self.content 123 else: 124 # Unknown type, treat it as a string 125 match[name] = self.content
126
127 - def characters(self, content):
128 self.content += content
129