Source code for deeprank.tools.sasa

import numpy as np
import pdb2sql


[docs]class SASA(object): def __init__(self, pdbfile): """Simple class that computes Surface Accessible Solvent Area. The method follows some of the approaches presented in: Solvent accessible surface area approximations for rapid and accurate protein structure prediction https://link.springer.com/article/10.1007%2Fs00894-009-0454-9 Example: >>> sasa = SASA('1AK4_1w.pdb') >>> NV = sasa.neighbor_vector() >>> print(NV) Args: pdbfile (str): PDB file of the conformation """ self.pdbfile = pdbfile
[docs] def get_center(self, chain1='A', chain2='B', center='cb'): """Get the center of the resiudes. Args: chain1 (str, optional): Name of the first chain chain2 (str, optional): Name of the second chain center (str, optional): Specify the center. 'cb': the center locates on carbon beta of each residue 'center': average position of all atoms of the residue Raises: ValueError: If the center is not recpgnized """ if center == 'center': self.get_residue_center(chain1=chain1, chain2=chain2) elif center == 'cb': self.get_residue_carbon_beta(chain1=chain1, chain2=chain2) else: raise ValueError( 'Options %s not recognized in SASA.get_center' % center)
[docs] def get_residue_center(self, chain1='A', chain2='B'): """Compute the average position of all the residues. Args: chain1 (str, optional): Name of the first chain chain2 (str, optional): Name of the second chain """ sql = pdb2sql.pdb2sql(self.pdbfile) resA = np.array(sql.get('resSeq,resName', chainID=chain1)) resB = np.array(sql.get('resSeq,resName', chainID=chain2)) resSeqA = np.unique(resA[:, 0].astype(np.int32)) resSeqB = np.unique(resB[:, 0].astype(np.int32)) self.xyz = {} self.xyz[chain1] = [] for r in resSeqA: xyz = sql.get('x,y,z', chainID=chain1, resSeq=str(r)) self.xyz[chain1].append(np.mean(xyz)) self.xyz[chain2] = [] for r in resSeqB: xyz = sql.get('x,y,z', chainID=chain2, resSeq=str(r)) self.xyz[chain1].append(np.mean(xyz)) self.resinfo = {} self.resinfo[chain1] = [] for r in resA[:, :2]: if tuple(r) not in self.resinfo[chain1]: self.resinfo[chain1].append(tuple(r)) self.resinfo[chain2] = [] for r in resB[:, :2]: if tuple(r) not in self.resinfo[chain2]: self.resinfo[chain2].append(tuple(r)) sql._close()
[docs] def get_residue_carbon_beta(self, chain1='A', chain2='B'): """Extract the position of the carbon beta of each residue. Args: chain1 (str, optional): Name of the first chain chain2 (str, optional): Name of the second chain """ sql = pdb2sql.pdb2sql(self.pdbfile) resA = np.array( sql.get( 'resSeq,resName,x,y,z', name='CB', chainID=chain1)) resB = np.array( sql.get( 'resSeq,resName,x,y,z', name='CB', chainID=chain2)) sql._close() assert len(resA[:, 0].astype(np.int32).tolist()) == len( np.unique(resA[:, 0].astype(np.int32)).tolist()) assert len(resB[:, 0].astype(np.int32).tolist()) == len( np.unique(resB[:, 0].astype(np.int32)).tolist()) self.xyz = {} self.xyz[chain1] = resA[:, 2:].astype(np.float32) self.xyz[chain2] = resB[:, 2:].astype(np.float32) self.resinfo = {} self.resinfo[chain1] = resA[:, :2] self.resinfo[chain2] = resB[:, :2]
[docs] def neighbor_vector( self, lbound=3.3, ubound=11.1, chain1='A', chain2='B', center='cb'): """Compute teh SASA folowing the neighbour vector approach. The method is based on Eq on page 1097 of https://link.springer.com/article/10.1007%2Fs00894-009-0454-9 Args: lbound (float, optional): lower boubd ubound (float, optional): upper bound chain1 (str, optional): name of the first chain chain2 (str, optional): name of the second chain center (str, optional): specify the center (see get_residue_center) Returns: dict: neighbouring vectors """ # get the center self.get_center(chain1=chain1, chain2=chain2, center=center) NV = {} for chain in [chain1, chain2]: for i, xyz in enumerate(self.xyz[chain]): vect = self.xyz[chain] - xyz dist = np.sqrt(np.sum((self.xyz[chain] - xyz)**2, 1)) dist = np.delete(dist, i, 0) vect = np.delete(vect, i, 0) vect /= np.linalg.norm(vect, axis=1).reshape(-1, 1) weight = self.neighbor_weight( dist, lbound=lbound, ubound=ubound).reshape(-1, 1) vect *= weight vect = np.sum(vect, 0) vect /= np.sum(weight) resSeq, resName = self.resinfo[chain][i].tolist() key = tuple([chain, int(resSeq), resName]) value = np.linalg.norm(vect) NV[key] = value return NV
[docs] def neighbor_count( self, lbound=4.0, ubound=11.4, chain1='A', chain2='B', center='cb'): """Compute the neighbourhood count of each residue. The method is based on Eq on page 1097 of https://link.springer.com/article/10.1007%2Fs00894-009-0454-9 Args: lbound (float, optional): lower boubd ubound (float, optional): upper bound chain1 (str, optional): name of the first chain chain2 (str, optional): name of the second chain center (str, optional): specify the center (see get_residue_center) Returns: dict: Neighborhood count """ # get the center self.get_center(chain1=chain1, chain2=chain2, center=center) # dict of NC NC = {} for chain in [chain1, chain2]: for i, xyz in enumerate(self.xyz[chain]): dist = np.sqrt(np.sum((self.xyz[chain] - xyz)**2, 1)) resSeq, resName = self.resinfo[chain][i].tolist() key = tuple([chain, int(resSeq), resName]) value = np.sum(self.neighbor_weight(dist, lbound, ubound)) NC[key] = value return NC
[docs] @staticmethod def neighbor_weight(dist, lbound, ubound): """Neighboor weight. Args: dist (np.array): distance from neighboors lbound (float): lower bound ubound (float): upper bound Returns: float: distance """ ind = np.argwhere((dist > lbound) & (dist < ubound)) dist[ind] = 0.5 * \ (np.cos(np.pi * (dist[ind] - lbound) / (ubound - lbound)) + 1) dist[dist <= lbound] = 1 dist[dist >= ubound] = 0 return dist
if __name__ == '__main__': sasa = SASA('1AK4_1w.pdb') NV = sasa.neighbor_vector() print(NV)