1
2
3
4
5
6 """Code to support writing parsers (OBSOLETE).
7
8
9
10 Classes:
11 AbstractParser Base class for parsers.
12 AbstractConsumer Base class of all Consumers.
13 TaggingConsumer Consumer that tags output with its event. For debugging
14 EventGenerator Generate Biopython Events from Martel XML output
15 (note that Martel is now DEPRECATED)
16
17 Functions:
18 safe_readline Read a line from a handle, with check for EOF.
19 safe_peekline Peek at next line, with check for EOF.
20 read_and_call Read a line from a handle and pass it to a method.
21 read_and_call_while Read many lines, as long as a condition is met.
22 read_and_call_until Read many lines, until a condition is met.
23 attempt_read_and_call Like read_and_call, but forgiving of errors.
24 is_blank_line Test whether a line is blank.
25
26 """
27
28
29 import warnings
30 warnings.warn("The module Bio.ParserSupport is now obsolete, and will be deprecated and removed in a future release of Biopython.", PendingDeprecationWarning)
31
32
33 import sys
34 try:
35 from types import InstanceType
36 except ImportError:
37
38 InstanceType = object
39 from types import MethodType
40 import StringIO
41
42 from Bio import File
43
44
45 try:
46 from xml.sax import handler
47 xml_support = 1
48 except ImportError:
49 sys.stderr.write("Warning: Could not import SAX for dealing with XML.\n" +
50 "This causes problems with some ParserSupport modules\n")
51 xml_support = 0
52
53
55 """Base class for other parsers.
56
57 """
59 raise NotImplementedError("Please implement in a derived class")
60
62 return self.parse(StringIO.StringIO(string))
63
65 h = open(filename)
66 try:
67 retval = self.parse(h)
68 finally:
69 h.close()
70 return retval
71
72
74 """Base class for other Consumers.
75
76 Derive Consumers from this class and implement appropriate
77 methods for each event that you want to receive.
78
79 """
82
85
92
93
95 """A Consumer that tags the data stream with the event and
96 prints it to a handle. Useful for debugging.
97
98 """
99 - def __init__(self, handle=None, colwidth=15, maxwidth=80):
100 """TaggingConsumer(handle=sys.stdout, colwidth=15, maxwidth=80)"""
101
102
103
104
105
106 if handle is None:
107 handle = sys.stdout
108 self._handle = handle
109 self._colwidth = colwidth
110 self._maxwidth = maxwidth
111
114
117
119 if data is None:
120
121 self._handle.write("%s %s\n" % ("*"*self._colwidth, name))
122 else:
123
124 self._handle.write("%-*s: %s\n" % (
125 self._colwidth, name[:self._colwidth],
126 data[:self._maxwidth-self._colwidth-2].rstrip()))
127
129 if attr[:6] == 'start_' or attr[:4] == 'end_':
130 method = lambda a=attr, s=self: s._print_name(a)
131 else:
132 method = lambda x, a=attr, s=self: s._print_name(a, x)
133 return method
134
135
136
137 if xml_support:
139 """Handler to generate events associated with a Martel parsed file.
140
141 This acts like a normal SAX handler, and accepts XML generated by
142 Martel during parsing. These events are then converted into
143 'Biopython events', which can then be caught by a standard
144 biopython consumer.
145
146 Note that Martel is now DEPRECATED.
147 """
148 - def __init__(self, consumer, interest_tags, callback_finalizer = None,
149 exempt_tags = []):
150 """Initialize to begin catching and firing off events.
151
152 Arguments:
153 o consumer - The consumer that we'll send Biopython events to.
154
155 o interest_tags - A listing of all the tags we are interested in.
156
157 o callback_finalizer - A function to deal with the collected
158 information before passing it on to the consumer. By default
159 the collected information is a list of all of the lines read
160 for a particular tag -- if there are multiple tags in a row
161 like:
162
163 <some_info>Spam<some_info>
164 <some_info>More Spam<some_info>
165
166 In this case the list of information would be:
167
168 ['Spam', 'More Spam']
169
170 This list of lines will be passed to the callback finalizer if
171 it is present. Otherwise the consumer will be called with the
172 list of content information.
173
174 o exempt_tags - A listing of particular tags that are exempt from
175 being processed by the callback_finalizer. This allows you to
176 use a finalizer to deal with most tags, but leave those you don't
177 want touched.
178 """
179 self._consumer = consumer
180 self.interest_tags = interest_tags
181 self._finalizer = callback_finalizer
182 self._exempt_tags = exempt_tags
183
184
185
186
187
188 self.info = {}
189 for tag in self.interest_tags:
190 self.info[tag] = []
191
192
193
194
195
196 self._previous_tag = ''
197
198
199 self._cur_content = []
200
201 self._collect_characters = 0
202
204 """Determine if we should collect characters from this tag.
205 """
206 if name in self.interest_tags:
207 self._collect_characters = 1
208
210 """Extract the information if we are interested in it.
211 """
212 if self._collect_characters:
213 self._cur_content.append(content)
214
216 """Send the information to the consumer.
217
218 Once we've got the end element we've collected up all of the
219 character information we need, and we need to send this on to
220 the consumer to do something with it.
221
222 We have a delay of one tag on doing this, so that we can collect
223 all of the info from multiple calls to the same element at once.
224 """
225
226
227 if self._collect_characters:
228
229 self.info[name].append("".join(self._cur_content))
230
231 self._cur_content = []
232 self._collect_characters = 0
233
234
235 if self._previous_tag and self._previous_tag != name:
236 self._make_callback(self._previous_tag)
237
238
239 self._previous_tag = name
240
242 """Call the callback function with the info with the given name.
243 """
244
245 callback_function = getattr(self._consumer, name)
246
247
248
249 if self._finalizer is not None and name not in self._exempt_tags:
250 info_to_pass = self._finalizer(self.info[name])
251
252 else:
253 info_to_pass = self.info[name]
254
255 callback_function(info_to_pass)
256
257
258 self.info[name] = []
259
261 """Make sure all of our information has been passed.
262
263 This just flushes out any stored tags that need to be passed.
264 """
265 if self._previous_tag:
266 self._make_callback(self._previous_tag)
267
268
270 """read_and_call(uhandle, method[, start][, end][, contains][, blank][, has_re])
271
272 Read a line from uhandle, check it, and pass it to the method.
273 Raises a ValueError if the line does not pass the checks.
274
275 start, end, contains, blank, and has_re specify optional conditions
276 that the line must pass. start and end specifies what the line must
277 begin or end with (not counting EOL characters). contains
278 specifies a substring that must be found in the line. If blank
279 is a true value, then the line must be blank. has_re should be
280 a regular expression object with a pattern that the line must match
281 somewhere.
282
283 """
284 line = safe_readline(uhandle)
285 errmsg = _fails_conditions(*(line,), **keywds)
286 if errmsg is not None:
287 raise ValueError(errmsg)
288 method(line)
289
290
292 """read_and_call_while(uhandle, method[, start][, end][, contains][, blank][, has_re]) -> number of lines
293
294 Read a line from uhandle and pass it to the method as long as
295 some condition is true. Returns the number of lines that were read.
296
297 See the docstring for read_and_call for a description of the parameters.
298
299 """
300 nlines = 0
301 while 1:
302 line = safe_readline(uhandle)
303
304 if _fails_conditions(*(line,), **keywds):
305 uhandle.saveline(line)
306 break
307 method(line)
308 nlines = nlines + 1
309 return nlines
310
311
313 """read_and_call_until(uhandle, method,
314 start=None, end=None, contains=None, blank=None) -> number of lines
315
316 Read a line from uhandle and pass it to the method until
317 some condition is true. Returns the number of lines that were read.
318
319 See the docstring for read_and_call for a description of the parameters.
320
321 """
322 nlines = 0
323 while 1:
324 line = safe_readline(uhandle)
325
326 if not _fails_conditions(*(line,), **keywds):
327 uhandle.saveline(line)
328 break
329 method(line)
330 nlines = nlines + 1
331 return nlines
332
333
335 """attempt_read_and_call(uhandle, method, **keywds) -> boolean
336
337 Similar to read_and_call, but returns a boolean specifying
338 whether the line has passed the checks. Does not raise
339 exceptions.
340
341 See docs for read_and_call for a description of the function
342 arguments.
343
344 """
345 line = safe_readline(uhandle)
346 passed = not _fails_conditions(*(line,), **keywds)
347 if passed:
348 method(line)
349 else:
350 uhandle.saveline(line)
351 return passed
352
353
354 -def _fails_conditions(line, start=None, end=None, contains=None, blank=None,
355 has_re=None):
356 if start is not None:
357 if line[:len(start)] != start:
358 return "Line does not start with '%s':\n%s" % (start, line)
359 if end is not None:
360 if line.rstrip()[-len(end):] != end:
361 return "Line does not end with '%s':\n%s" % (end, line)
362 if contains is not None:
363 if contains not in line:
364 return "Line does not contain '%s':\n%s" % (contains, line)
365 if blank is not None:
366 if blank:
367 if not is_blank_line(line):
368 return "Expected blank line, but got:\n%s" % line
369 else:
370 if is_blank_line(line):
371 return "Expected non-blank line, but got a blank one"
372 if has_re is not None:
373 if has_re.search(line) is None:
374 return "Line does not match regex '%s':\n%s" % (
375 has_re.pattern, line)
376 return None
377
378
380 """is_blank_line(line, allow_spaces=0) -> boolean
381
382 Return whether a line is blank. allow_spaces specifies whether to
383 allow whitespaces in a blank line. A true value signifies that a
384 line containing whitespaces as well as end-of-line characters
385 should be considered blank.
386
387 """
388 if not line:
389 return 1
390 if allow_spaces:
391 return line.rstrip() == ''
392 return line[0] == '\n' or line[0] == '\r'
393
394
396 """safe_readline(handle) -> line
397
398 Read a line from an UndoHandle and return it. If there are no more
399 lines to read, I will raise a ValueError.
400
401 """
402 line = handle.readline()
403 if not line:
404 raise ValueError("Unexpected end of stream.")
405 return line
406
407
409 """safe_peekline(handle) -> line
410
411 Peek at the next line in an UndoHandle and return it. If there are no
412 more lines to peek, I will raise a ValueError.
413
414 """
415 line = handle.peekline()
416 if not line:
417 raise ValueError("Unexpected end of stream.")
418 return line
419