Source code for deeprank.tools.sasa

import numpy as np
import pdb2sql


[docs]class SASA(object):

    def __init__(self, pdbfile):
        """Simple class that computes Surface Accessible Solvent Area.

        The method follows some of the approaches presented in:

        Solvent accessible surface area approximations for rapid and
        accurate protein structure prediction
        https://link.springer.com/article/10.1007%2Fs00894-009-0454-9

        Example:
            >>> sasa = SASA('1AK4_1w.pdb')
            >>> NV = sasa.neighbor_vector()
            >>> print(NV)

        Args:
            pdbfile (str): PDB file of the conformation
        """

        self.pdbfile = pdbfile

[docs]    def get_center(self, chain1='A', chain2='B', center='cb'):
        """Get the center of the resiudes.

        Args:
            chain1 (str, optional): Name of the first chain
            chain2 (str, optional): Name of the second chain
            center (str, optional): Specify the center.
                'cb': the center locates on carbon beta of each residue
                'center': average position of all atoms of the residue
        Raises:
            ValueError: If the center is not recpgnized
        """

        if center == 'center':
            self.get_residue_center(chain1=chain1, chain2=chain2)
        elif center == 'cb':
            self.get_residue_carbon_beta(chain1=chain1, chain2=chain2)
        else:
            raise ValueError(
                'Options %s not recognized in SASA.get_center' %
                center)

[docs]    def get_residue_center(self, chain1='A', chain2='B'):
        """Compute the average position of all the residues.

        Args:
            chain1 (str, optional): Name of the first chain
            chain2 (str, optional): Name of the second chain
        """

        sql = pdb2sql.pdb2sql(self.pdbfile)
        resA = np.array(sql.get('resSeq,resName', chainID=chain1))
        resB = np.array(sql.get('resSeq,resName', chainID=chain2))

        resSeqA = np.unique(resA[:, 0].astype(np.int32))
        resSeqB = np.unique(resB[:, 0].astype(np.int32))

        self.xyz = {}

        self.xyz[chain1] = []
        for r in resSeqA:
            xyz = sql.get('x,y,z', chainID=chain1, resSeq=str(r))
            self.xyz[chain1].append(np.mean(xyz))

        self.xyz[chain2] = []
        for r in resSeqB:
            xyz = sql.get('x,y,z', chainID=chain2, resSeq=str(r))
            self.xyz[chain1].append(np.mean(xyz))

        self.resinfo = {}
        self.resinfo[chain1] = []
        for r in resA[:, :2]:
            if tuple(r) not in self.resinfo[chain1]:
                self.resinfo[chain1].append(tuple(r))

        self.resinfo[chain2] = []
        for r in resB[:, :2]:
            if tuple(r) not in self.resinfo[chain2]:
                self.resinfo[chain2].append(tuple(r))
        sql._close()

[docs]    def get_residue_carbon_beta(self, chain1='A', chain2='B'):
        """Extract the position of the carbon beta of each residue.

        Args:
            chain1 (str, optional): Name of the first chain
            chain2 (str, optional): Name of the second chain
        """

        sql = pdb2sql.pdb2sql(self.pdbfile)
        resA = np.array(
            sql.get(
                'resSeq,resName,x,y,z',
                name='CB',
                chainID=chain1))
        resB = np.array(
            sql.get(
                'resSeq,resName,x,y,z',
                name='CB',
                chainID=chain2))
        sql._close()

        assert len(resA[:, 0].astype(np.int32).tolist()) == len(
            np.unique(resA[:, 0].astype(np.int32)).tolist())
        assert len(resB[:, 0].astype(np.int32).tolist()) == len(
            np.unique(resB[:, 0].astype(np.int32)).tolist())

        self.xyz = {}
        self.xyz[chain1] = resA[:, 2:].astype(np.float32)
        self.xyz[chain2] = resB[:, 2:].astype(np.float32)

        self.resinfo = {}
        self.resinfo[chain1] = resA[:, :2]
        self.resinfo[chain2] = resB[:, :2]

[docs]    def neighbor_vector(
            self,
            lbound=3.3,
            ubound=11.1,
            chain1='A',
            chain2='B',
            center='cb'):
        """Compute teh SASA folowing the neighbour vector approach.

        The method is based on Eq on page 1097 of
        https://link.springer.com/article/10.1007%2Fs00894-009-0454-9

        Args:
            lbound (float, optional): lower boubd
            ubound (float, optional): upper bound
            chain1 (str, optional): name of the first chain
            chain2 (str, optional): name of the second chain
            center (str, optional): specify the center
                (see get_residue_center)

        Returns:
            dict: neighbouring vectors
        """

        # get the center
        self.get_center(chain1=chain1, chain2=chain2, center=center)

        NV = {}

        for chain in [chain1, chain2]:

            for i, xyz in enumerate(self.xyz[chain]):

                vect = self.xyz[chain] - xyz
                dist = np.sqrt(np.sum((self.xyz[chain] - xyz)**2, 1))

                dist = np.delete(dist, i, 0)
                vect = np.delete(vect, i, 0)

                vect /= np.linalg.norm(vect, axis=1).reshape(-1, 1)

                weight = self.neighbor_weight(
                    dist, lbound=lbound, ubound=ubound).reshape(-1, 1)
                vect *= weight

                vect = np.sum(vect, 0)
                vect /= np.sum(weight)

                resSeq, resName = self.resinfo[chain][i].tolist()
                key = tuple([chain, int(resSeq), resName])
                value = np.linalg.norm(vect)
                NV[key] = value

        return NV

[docs]    def neighbor_count(
            self,
            lbound=4.0,
            ubound=11.4,
            chain1='A',
            chain2='B',
            center='cb'):
        """Compute the neighbourhood count of each residue.

        The method is based on Eq on page 1097 of
        https://link.springer.com/article/10.1007%2Fs00894-009-0454-9

        Args:
            lbound (float, optional): lower boubd
            ubound (float, optional): upper bound
            chain1 (str, optional): name of the first chain
            chain2 (str, optional): name of the second chain
            center (str, optional): specify the center
            (see get_residue_center)

        Returns:
            dict: Neighborhood count
        """

        # get the center
        self.get_center(chain1=chain1, chain2=chain2, center=center)

        # dict of NC
        NC = {}

        for chain in [chain1, chain2]:

            for i, xyz in enumerate(self.xyz[chain]):
                dist = np.sqrt(np.sum((self.xyz[chain] - xyz)**2, 1))
                resSeq, resName = self.resinfo[chain][i].tolist()
                key = tuple([chain, int(resSeq), resName])
                value = np.sum(self.neighbor_weight(dist, lbound, ubound))
                NC[key] = value

        return NC

[docs]    @staticmethod
    def neighbor_weight(dist, lbound, ubound):
        """Neighboor weight.

        Args:
            dist (np.array): distance from neighboors
            lbound (float): lower bound
            ubound (float): upper bound

        Returns:
            float: distance
        """
        ind = np.argwhere((dist > lbound) & (dist < ubound))
        dist[ind] = 0.5 * \
            (np.cos(np.pi * (dist[ind] - lbound) / (ubound - lbound)) + 1)
        dist[dist <= lbound] = 1
        dist[dist >= ubound] = 0
        return dist


if __name__ == '__main__':

    sasa = SASA('1AK4_1w.pdb')
    NV = sasa.neighbor_vector()
    print(NV)