1
2
3
4
5
6 """Polypeptide-related classes (construction and representation).
7
8 Simple example with multiple chains,
9
10 >>> from Bio.PDB.PDBParser import PDBParser
11 >>> from Bio.PDB.Polypeptide import PPBuilder
12 >>> structure = PDBParser().get_structure('2BEG', 'PDB/2BEG.pdb')
13 >>> ppb=PPBuilder()
14 >>> for pp in ppb.build_peptides(structure):
15 ... print pp.get_sequence()
16 LVFFAEDVGSNKGAIIGLMVGGVVIA
17 LVFFAEDVGSNKGAIIGLMVGGVVIA
18 LVFFAEDVGSNKGAIIGLMVGGVVIA
19 LVFFAEDVGSNKGAIIGLMVGGVVIA
20 LVFFAEDVGSNKGAIIGLMVGGVVIA
21
22 Example with non-standard amino acids using HETATM lines in the PDB file,
23 in this case selenomethionine (MSE):
24
25 >>> from Bio.PDB.PDBParser import PDBParser
26 >>> from Bio.PDB.Polypeptide import PPBuilder
27 >>> structure = PDBParser().get_structure('1A8O', 'PDB/1A8O.pdb')
28 >>> ppb=PPBuilder()
29 >>> for pp in ppb.build_peptides(structure):
30 ... print pp.get_sequence()
31 DIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNW
32 TETLLVQNANPDCKTILKALGPGATLEE
33 TACQG
34
35 If you want to, you can include non-standard amino acids in the peptides:
36
37 >>> for pp in ppb.build_peptides(structure, aa_only=False):
38 ... print pp.get_sequence()
39 ... print pp.get_sequence()[0], pp[0].get_resname()
40 ... print pp.get_sequence()[-7], pp[-7].get_resname()
41 ... print pp.get_sequence()[-6], pp[-6].get_resname()
42 MDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPGATLEEMMTACQG
43 M MSE
44 M MSE
45 M MSE
46
47 In this case the selenomethionines (the first and also seventh and sixth from
48 last residues) have been shown as M (methionine) by the get_sequence method.
49 """
50
51 import warnings
52
53 from Bio.Alphabet import generic_protein
54 from Bio.Seq import Seq
55 from Bio.SCOP.Raf import to_one_letter_code
56 from Bio.PDB.PDBExceptions import PDBException
57 from Bio.PDB.Vector import calc_dihedral, calc_angle
58
59
60 standard_aa_names=["ALA", "CYS", "ASP", "GLU", "PHE", "GLY", "HIS", "ILE", "LYS",
61 "LEU", "MET", "ASN", "PRO", "GLN", "ARG", "SER", "THR", "VAL",
62 "TRP", "TYR"]
63
64
65 aa1="ACDEFGHIKLMNPQRSTVWY"
66 aa3=standard_aa_names
67
68 d1_to_index={}
69 dindex_to_1={}
70 d3_to_index={}
71 dindex_to_3={}
72
73
74 for i in range(0, 20):
75 n1=aa1[i]
76 n3=aa3[i]
77 d1_to_index[n1]=i
78 dindex_to_1[i]=n1
79 d3_to_index[n3]=i
80 dindex_to_3[i]=n3
81
82
84 """Index to corresponding one letter amino acid name.
85
86 >>> index_to_one(0)
87 'A'
88 >>> index_to_one(19)
89 'Y'
90 """
91 return dindex_to_1[index]
92
93
95 """One letter code to index.
96
97 >>> one_to_index('A')
98 0
99 >>> one_to_index('Y')
100 19
101 """
102 return d1_to_index[s]
103
104
106 """Index to corresponding three letter amino acid name.
107
108 >>> index_to_three(0)
109 'ALA'
110 >>> index_to_three(19)
111 'TYR'
112 """
113 return dindex_to_3[i]
114
115
117 """Three letter code to index.
118
119 >>> three_to_index('ALA')
120 0
121 >>> three_to_index('TYR')
122 19
123 """
124 return d3_to_index[s]
125
126
128 """Three letter code to one letter code.
129
130 >>> three_to_one('ALA')
131 'A'
132 >>> three_to_one('TYR')
133 'Y'
134
135 For non-standard amino acids, you get a KeyError:
136
137 >>> three_to_one('MSE')
138 Traceback (most recent call last):
139 ...
140 KeyError: 'MSE'
141 """
142 i=d3_to_index[s]
143 return dindex_to_1[i]
144
145
147 """One letter code to three letter code.
148
149 >>> one_to_three('A')
150 'ALA'
151 >>> one_to_three('Y')
152 'TYR'
153 """
154 i=d1_to_index[s]
155 return dindex_to_3[i]
156
157
158 -def is_aa(residue, standard=False):
159 """Return True if residue object/string is an amino acid.
160
161 @param residue: a L{Residue} object OR a three letter amino acid code
162 @type residue: L{Residue} or string
163
164 @param standard: flag to check for the 20 AA (default false)
165 @type standard: boolean
166
167 >>> is_aa('ALA')
168 True
169
170 Known three letter codes for modified amino acids are supported,
171
172 >>> is_aa('FME')
173 True
174 >>> is_aa('FME', standard=True)
175 False
176 """
177
178 if not isinstance(residue, basestring):
179 residue=residue.get_resname()
180 residue=residue.upper()
181 if standard:
182 return residue in d3_to_index
183 else:
184 return residue in to_one_letter_code
185
186
188 """A polypeptide is simply a list of L{Residue} objects."""
190 """Get list of C-alpha atoms in the polypeptide.
191
192 @return: the list of C-alpha atoms
193 @rtype: [L{Atom}, L{Atom}, ...]
194 """
195 ca_list=[]
196 for res in self:
197 ca=res["CA"]
198 ca_list.append(ca)
199 return ca_list
200
202 """Return the list of phi/psi dihedral angles."""
203 ppl=[]
204 lng=len(self)
205 for i in range(0, lng):
206 res=self[i]
207 try:
208 n=res['N'].get_vector()
209 ca=res['CA'].get_vector()
210 c=res['C'].get_vector()
211 except:
212
213
214 ppl.append((None, None))
215 res.xtra["PHI"]=None
216 res.xtra["PSI"]=None
217 continue
218
219 if i>0:
220 rp=self[i-1]
221 try:
222 cp=rp['C'].get_vector()
223 phi=calc_dihedral(cp, n, ca, c)
224 except:
225 phi=None
226 else:
227
228 phi=None
229
230 if i<(lng-1):
231 rn=self[i+1]
232 try:
233 nn=rn['N'].get_vector()
234 psi=calc_dihedral(n, ca, c, nn)
235 except:
236 psi=None
237 else:
238
239 psi=None
240 ppl.append((phi, psi))
241
242 res.xtra["PHI"]=phi
243 res.xtra["PSI"]=psi
244 return ppl
245
247 """List of tau torsions angles for all 4 consecutive Calpha atoms."""
248 ca_list=self.get_ca_list()
249 tau_list=[]
250 for i in range(0, len(ca_list)-3):
251 atom_list = (ca_list[i], ca_list[i+1], ca_list[i+2], ca_list[i+3])
252 v1, v2, v3, v4 = [a.get_vector() for a in atom_list]
253 tau=calc_dihedral(v1, v2, v3, v4)
254 tau_list.append(tau)
255
256 res=ca_list[i+2].get_parent()
257 res.xtra["TAU"]=tau
258 return tau_list
259
261 """List of theta angles for all 3 consecutive Calpha atoms."""
262 theta_list=[]
263 ca_list=self.get_ca_list()
264 for i in range(0, len(ca_list)-2):
265 atom_list = (ca_list[i], ca_list[i+1], ca_list[i+2])
266 v1, v2, v3 = [a.get_vector() for a in atom_list]
267 theta=calc_angle(v1, v2, v3)
268 theta_list.append(theta)
269
270 res=ca_list[i+1].get_parent()
271 res.xtra["THETA"]=theta
272 return theta_list
273
285
287 """Return string representation of the polypeptide.
288
289 Return <Polypeptide start=START end=END>, where START
290 and END are sequence identifiers of the outer residues.
291 """
292 start=self[0].get_id()[1]
293 end=self[-1].get_id()[1]
294 s="<Polypeptide start=%s end=%s>" % (start, end)
295 return s
296
297
299 """Base class to extract polypeptides.
300
301 It checks if two consecutive residues in a chain are connected.
302 The connectivity test is implemented by a subclass.
303
304 This assumes you want both standard and non-standard amino acids.
305 """
307 """
308 @param radius: distance
309 @type radius: float
310 """
311 self.radius=radius
312
313 - def _accept(self, residue, standard_aa_only):
314 """Check if the residue is an amino acid (PRIVATE)."""
315 if is_aa(residue, standard=standard_aa_only):
316 return True
317 elif not standard_aa_only and "CA" in residue.child_dict:
318
319
320
321 warnings.warn("Assuming residue %s is an unknown modified "
322 "amino acid" % residue.get_resname())
323 return True
324 else:
325
326 return False
327
329 """Build and return a list of Polypeptide objects.
330
331 @param entity: polypeptides are searched for in this object
332 @type entity: L{Structure}, L{Model} or L{Chain}
333
334 @param aa_only: if 1, the residue needs to be a standard AA
335 @type aa_only: int
336 """
337 is_connected=self._is_connected
338 accept=self._accept
339 level=entity.get_level()
340
341 if level=="S":
342 model=entity[0]
343 chain_list=model.get_list()
344 elif level=="M":
345 chain_list=entity.get_list()
346 elif level=="C":
347 chain_list=[entity]
348 else:
349 raise PDBException("Entity should be Structure, Model or Chain.")
350 pp_list=[]
351 for chain in chain_list:
352 chain_it=iter(chain)
353 try:
354 prev_res = chain_it.next()
355 while not accept(prev_res, aa_only):
356 prev_res = chain_it.next()
357 except StopIteration:
358
359 continue
360 pp=None
361 for next_res in chain_it:
362 if accept(prev_res, aa_only) \
363 and accept(next_res, aa_only) \
364 and is_connected(prev_res, next_res):
365 if pp is None:
366 pp=Polypeptide()
367 pp.append(prev_res)
368 pp_list.append(pp)
369 pp.append(next_res)
370 else:
371
372
373 pp=None
374 prev_res=next_res
375 return pp_list
376
377
379 """Use CA--CA distance to find polypeptides."""
382
384 for r in [prev_res, next_res]:
385 if not r.has_id("CA"):
386 return False
387 n=next_res["CA"]
388 p=prev_res["CA"]
389
390 if n.is_disordered():
391 nlist=n.disordered_get_list()
392 else:
393 nlist=[n]
394 if p.is_disordered():
395 plist=p.disordered_get_list()
396 else:
397 plist=[p]
398 for nn in nlist:
399 for pp in plist:
400 if (nn-pp)<self.radius:
401 return True
402 return False
403
404
406 """Use C--N distance to find polypeptides."""
409
444
446 """Return 1 if distance between atoms<radius (PRIVATE)."""
447 if (c-n)<self.radius:
448 return 1
449 else:
450 return 0
451
452
453 if __name__=="__main__":
454 import sys
455 from Bio.PDB.PDBParser import PDBParser
456
457 p=PDBParser(PERMISSIVE=True)
458
459 s=p.get_structure("scr", sys.argv[1])
460
461 ppb=PPBuilder()
462
463 print "C-N"
464 for pp in ppb.build_peptides(s):
465 print pp.get_sequence()
466 for pp in ppb.build_peptides(s[0]):
467 print pp.get_sequence()
468 for pp in ppb.build_peptides(s[0]["A"]):
469 print pp.get_sequence()
470
471 for pp in ppb.build_peptides(s):
472 for phi, psi in pp.get_phi_psi_list():
473 print phi, psi
474
475 ppb=CaPPBuilder()
476
477 print "CA-CA"
478 for pp in ppb.build_peptides(s):
479 print pp.get_sequence()
480 for pp in ppb.build_peptides(s[0]):
481 print pp.get_sequence()
482 for pp in ppb.build_peptides(s[0]["A"]):
483 print pp.get_sequence()
484