1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16 """ FeatureSet module
17
18 Provides:
19
20 o FeatureSet - container for Feature objects
21
22 For drawing capabilities, this module uses reportlab to draw and write
23 the diagram:
24
25 http://www.reportlab.com
26
27 For dealing with biological information, the package expects BioPython
28 objects:
29
30 http://www.biopython.org
31 """
32
33
34
35
36
37 from reportlab.pdfbase import _fontdata
38 from reportlab.lib import colors
39
40
41 from _Feature import Feature
42
43
44 import re
45
46
47
48
49
50
51
52
54 """ FeatureSet
55
56 Provides:
57
58 Methods:
59
60 o __init__(self, set_id=None, name=None) Called on instantiation
61
62 o add_feature(self, feature, color=colors.lightgreen) Add a Feature
63 object to the set
64
65 o del_feature(self, feature_id) Remove a feature from the set, by id
66
67 o set_all_features(self, attr, value) Set the passed attribute to the
68 passed value in all features in the set
69
70 o get_features(self) Returns a list of Features from the set
71
72 o get_ids(self) Returns a list of unique ids for features in the set
73
74 o range(self) Returns the range of bases covered by features in
75 the set
76
77 o to_string(self, verbose=0) Returns a string describing the set
78
79 o __len__(self) Returns the length of sequence covered by the set
80
81 o __getitem__(self, key) Returns a feature from the set, keyed by id
82
83 o __str__(self) Returns a string describing the set
84
85 Attributes:
86
87 o id Unique id for the set
88
89 o name String describing the set
90 """
91 - def __init__(self, set_id=None, name=None, parent=None):
92 """ __init__(self, set_id=None, name=None)
93
94 o set_id Unique id for the set
95
96 o name String identifying the feature set
97 """
98 self.parent = parent
99 self.id = id
100 self.next_id = 0
101 self.features = {}
102 self.name = name
103
105 """ add_feature(self, feature, **args)
106
107 o feature Bio.SeqFeature object
108
109 o **kwargs Keyword arguments for Feature. Named attributes
110 of the Feature
111
112 Add a Bio.SeqFeature object to the diagram (will be stored
113 internally in a Feature wrapper
114 """
115 id = self.next_id
116 f = Feature(self, id, feature)
117 self.features[id] = f
118 for key in kwargs:
119 if key == "colour" or key == "color":
120
121
122
123
124
125 self.features[id].set_color(kwargs[key])
126 continue
127 setattr(self.features[id], key, kwargs[key])
128 self.next_id += 1
129 return f
130
132 """ del_feature(self, feature_id)
133
134 o feature_id Unique id of the feature to delete
135
136 Remove a feature from the set, indicated by its id
137 """
138 del self.features[feature_id]
139
141 """ set_all_features(self, attr, value)
142
143 o attr An attribute of the Feature class
144
145 o value The value to set that attribute
146
147 Set the passed attribute of all features in the set to the
148 passed value
149 """
150 changed = 0
151 for feature in self.features.values():
152
153 if hasattr(feature, attr):
154 if getattr(feature, attr) != value:
155 setattr(feature, attr, value)
156
157
158
159
160
161
162 - def get_features(self, attribute=None, value=None, comparator=None):
163 """ get_features(self, attribute=None, value=None, comparator=None) ->
164 [Feature, Feature, ...]
165
166 o attribute String, attribute of a Feature object
167
168 o value The value desired of the attribute
169
170 o comparator String, how to compare the Feature attribute to the
171 passed value
172
173 If no attribute or value is given, return a list of all features in the
174 feature set. If both an attribute and value are given, then depending
175 on the comparator, then a list of all features in the FeatureSet
176 matching (or not) the passed value will be returned. Allowed comparators
177 are: 'startswith', 'not', 'like'.
178
179 The user is expected to make a responsible decision about which feature
180 attributes to use with which passed values and comparator settings.
181 """
182
183 if attribute is None or value is None:
184 return self.features.values()
185
186
187 if comparator is None:
188 return [feature for feature in self.features.values() if
189 getattr(feature, attribute) == value]
190
191
192 elif comparator == 'not':
193 return [feature for feature in self.features.values() if
194 getattr(feature, attribute) != value]
195
196
197 elif comparator == 'startswith':
198 return [feature for feature in self.features.values() if
199 getattr(feature, attribute).startswith(value)]
200
201
202 elif comparator == 'like':
203 return [feature for feature in self.features.values() if
204 re.search(value, getattr(feature, attribute))]
205
206 return []
207
209 """ get_ids(self) -> [int, int, ...]
210
211 Return a list of all ids for the feature set
212 """
213 return self.features.keys()
214
216 """ range(self)
217
218 Returns the lowest and highest base (or mark) numbers as a tuple
219 """
220 lows, highs = [], []
221 for feature in self.features.values():
222 for start, end in feature.locations:
223 lows.append(start)
224 highs.append(end)
225 if len(lows) != 0 and len(highs) != 0:
226 return (min(lows), max(highs))
227 return 0, 0
228
230 """ to_string(self, verbose=0) -> ""
231
232 o verbose Boolean indicating whether a short or complete
233 account of the set is required
234
235 Returns a formatted string with information about the set
236 """
237 if not verbose:
238 return "%s" % self
239 else:
240 outstr = ["\n<%s: %s>" % (self.__class__, self.name)]
241 outstr.append("%d features" % len(self.features))
242 for key in self.features:
243 outstr.append("feature: %s" % self.features[key])
244 return "\n".join(outstr)
245
247 """ __len__(self) -> int
248
249 Return the number of features in the set
250 """
251 return len(self.features)
252
254 """ __getitem__(self, key) -> Feature
255
256 Return a feature, keyed by id
257 """
258 return self.features[key]
259
261 """ __str__(self) -> ""
262
263 Returns a formatted string with information about the feature set
264 """
265 outstr = ["\n<%s: %s %d features>" % (self.__class__, self.name,
266 len(self.features))]
267 return "\n".join(outstr)
268
269
270
271
272
273 if __name__ == '__main__':
274 from Bio import SeqIO
275
276 genbank_entry = SeqIO.read('/data/Genomes/Bacteria/Nanoarchaeum_equitans/NC_005213.gbk', 'gb')
277
278
279 gdfs = FeatureSet(0, 'Nanoarchaeum equitans CDS')
280 for feature in genbank_entry.features:
281 if feature.type == 'CDS':
282 gdfs.add_feature(feature)
283
284
285
286
287
288
289
290
291
292