Source code for deeprank.features.ResidueDensity

import itertools
import warnings
import pdb2sql

from deeprank.features import FeatureClass
from deeprank import config


[docs]class ResidueDensity(FeatureClass): def __init__(self, pdb_data, chain1='A', chain2='B'): """Compute the residue contacts between polar/apolar/charged residues. Args: pdb_data (list(byte) or str): pdb data or pdb filename chain1 (str): First chain ID. Defaults to 'A' chain2 (str): Second chain ID. Defaults to 'B' Example: >>> rcd = ResidueDensity('1EWY_100w.pdb') >>> rcd.get(cutoff=5.5) >>> rcd.extract_features() """ self.pdb_data = pdb_data self.sql = pdb2sql.interface(pdb_data) self.chains_label = [chain1, chain2] self.chain1 = chain1 self.chain2 = chain2 self.feature_data = {} self.feature_data_xyz = {} self.residue_types = config.AA_properties
[docs] def get(self, cutoff=5.5): """Get residue contacts. Raises: ValueError: No residue contact found. """ # res = {('chain1,resSeq,resName'): set( # ('chain2,res1Seq,res1Name), # ('chain2,res2Seq,res2Name'))} res = self.sql.get_contact_residues(chain1=self.chains_label[0], chain2=self.chains_label[1], cutoff=cutoff, return_contact_pairs=True) self.residue_contacts = {} for key, other_res in res.items(): # some residues are not amino acids if key[2] not in self.residue_types: continue if key not in self.residue_contacts: self.residue_contacts[key] = residue_pair( key, self.residue_types[key[2]]) self.residue_contacts[key].density['total'] += len(other_res) for key2 in other_res: # some residues are not amino acids if key2[2] not in self.residue_types: continue self.residue_contacts[key].density[ self.residue_types[key2[2]]] += 1 self.residue_contacts[key].connections[ self.residue_types[key2[2]]].append(key2) if key2 not in self.residue_contacts: self.residue_contacts[key2] = residue_pair( key2, self.residue_types[key2[2]]) self.residue_contacts[key2].density['total'] += 1 self.residue_contacts[key2].density[ self.residue_types[key[2]]] += 1 self.residue_contacts[key2].connections[ self.residue_types[key[2]]].append(key) # calculate the total number of contacts total_ctc = 0 for i in self.residue_contacts: total_ctc += self.residue_contacts[i].density['total'] total_ctc = total_ctc / 2 # handle with small interface or no interface if total_ctc == 0: # first close the sql self.sql._close() raise ValueError( f"No residue contact found with the cutoff {cutoff}Å. " f"Failed to calculate the feature residue contact " f"density.") elif total_ctc < 5: # this is an empirical value warnings.warn( f"Only {total_ctc} residue contacts found with " f" cutoff {cutoff}Å. Be careful with using the feature " f"residue contact density")
[docs] def extract_features(self): """Compute the feature of residue contacts between polar/apolar/charged residues. It generates following features: RCD_apolar-apolar RCD_apolar-charged RCD_charged-charged RCD_polar-apolar RCD_polar-charged RCD_polar-polar RCD_total """ self.feature_data['RCD_total'] = {} # raw data for human read self.feature_data_xyz['RCD_total'] = {} # for machine read restype = ['polar', 'apolar', 'charged'] pairtype = ['-'.join(p) for p in list(itertools.combinations_with_replacement(restype, 2))] for p in pairtype: self.feature_data['RCD_' + p] = {} self.feature_data_xyz['RCD_' + p] = {} for key, res in self.residue_contacts.items(): # total density in raw format self.feature_data['RCD_total'][key] = [res.density['total']] # get the center _, xyz = self.get_residue_center(self.sql, res=key) xyz_key = tuple([{self.chain1: 0, self.chain2: 1}[key[0]]] + xyz[0]) self.feature_data_xyz['RCD_total'][xyz_key] = [ res.density['total']] # iterate through all the connection for r in restype: pairtype = 'RCD_' + res.type + '-' + r if pairtype not in self.feature_data: pairtype = 'RCD_' + r + '-' + res.type self.feature_data[pairtype][key] = [res.density[r]] self.feature_data_xyz[pairtype][xyz_key] = [res.density[r]]
[docs]class residue_pair(object): def __init__(self, res, rtype): """Ancillary class that holds information for a given residue.""" self.res = res self.type = rtype self.density = {'total': 0, 'polar': 0, 'apolar': 0, 'charged': 0} self.connections = {'polar': [], 'apolar': [], 'charged': []}
######################################################################## # # THE MAIN FUNCTION CALLED IN THE INTERNAL FEATURE CALCULATOR # ########################################################################
[docs]def __compute_feature__(pdb_data, featgrp, featgrp_raw, chain1, chain2): """Main function called in deeprank for the feature calculations. Args: pdb_data (list(bytes)): pdb information featgrp (str): name of the group where to save xyz-val data featgrp_raw (str): name of the group where to save human readable data chain1 (str): First chain ID chain2 (str): Second chain ID """ # create instance resdens = ResidueDensity(pdb_data, chain1=chain1, chain2=chain2) # get the residue conacts resdens.get() # extract the features resdens.extract_features() # export in the hdf5 file resdens.export_dataxyz_hdf5(featgrp) resdens.export_data_hdf5(featgrp_raw) # close sql resdens.sql._close()
######################################################################## # # TEST THE CLASS # ######################################################################## if __name__ == '__main__': import os from pprint import pprint # get base path */deeprank, i.e. the path of deeprank package base_path = os.path.dirname(os.path.dirname(os.path.dirname( os.path.realpath(__file__)))) pdb_file = os.path.join(base_path, "test/1AK4/native/1AK4.pdb") # create instance resdens = ResidueDensity(pdb_file) resdens.get() resdens.extract_features() resdens.sql._close() pprint(resdens.feature_data) print() pprint(resdens.feature_data_xyz)