Source code for deeprank.generate.GridTools


import itertools
import sys
from time import time

import numpy as np
from scipy.signal import bspline
import pdb2sql

from deeprank.config import logger
from deeprank.tools import sparse

try:
    from tqdm import tqdm
except ImportError:
    def tqdm(x):
        return x


[docs]def logif(string, cond): return logger.info(string) if cond else None


[docs]class GridTools(object):

    def __init__(self, molgrp, chain1, chain2,
                 number_of_points=30, resolution=1.,
                 atomic_densities=None, atomic_densities_mode='ind',
                 feature=None, feature_mode='ind',
                 contact_distance=8.5,
                 cuda=False, gpu_block=None, cuda_func=None, cuda_atomic=None,
                 prog_bar=False, time=False, try_sparse=True):
        """Map the feature of a complex on the grid.

        Args:
            molgrp(str): name of the group of the molecule in the HDF5 file.
            chain1 (str): First chain ID.
            chain2 (str): Second chain ID.
            number_of_points(int, optional): number of points we want in
                each direction of the grid.
            resolution(float, optional): distance(in Angs) between two points.
            atomic_densities(dict, optional): dictionary of element types with
                their vdw radius, see deeprank.config.atom_vdw_radius_noH
            atomic_densities_mode(str, optional): Mode for mapping
                (deprecated must be 'ind').
            feature(None, optional): Name of the features to be mapped.
                By default all the features present in
                hdf5_file['< molgrp > /features/] will be mapped.
            feature_mode(str, optional): Mode for mapping
                (deprecated must be 'ind').
            contact_distance(float, optional): the dmaximum distance
                between two contact atoms default 8.5Å.
            cuda(bool, optional): Use CUDA or not.
            gpu_block(tuple(int), optional): GPU block size to use.
            cuda_func(None, optional): Name of the CUDA function to be
                used for the mapping of the features.
                Must be present in kernel_cuda.c.
            cuda_atomic(None, optional): Name of the CUDA function to be
                used for the mapping of the atomic densities.
                Must be present in kernel_cuda.c.
            prog_bar(bool, optional): print progression bar for
                individual grid (default False).
            time(bool, optional): print timing statistic for
                individual grid (default False).
            try_sparse(bool, optional): Try to store the matrix in
                sparse format (default True).
        """

        # mol and hdf5 file
        self.molgrp = molgrp
        self.mol_basename = molgrp.name

        # chain IDs
        self.chain1 = chain1
        self.chain2 = chain2

        # hdf5 file to strore data
        self.hdf5 = self.molgrp.file
        self.try_sparse = try_sparse

        # parameter of the grid
        if number_of_points is not None:
            if not isinstance(number_of_points, list):
                number_of_points = [number_of_points] * 3
            self.npts = np.array(number_of_points).astype('int')

        if resolution is not None:
            if not isinstance(resolution, list):
                resolution = [resolution] * 3
            self.res = np.array(resolution)

        # feature requested
        self.atomic_densities = atomic_densities
        self.feature = feature

        # mapping mode
        self.feature_mode = feature_mode
        self.atomic_densities_mode = atomic_densities_mode

        # cuda support
        self.cuda = cuda
        if self.cuda:  # pragma: no cover
            self.gpu_block = gpu_block
            self.gpu_grid = [int(np.ceil(n / b))
                             for b, n in zip(self.gpu_block, self.npts)]

        # cuda
        self.cuda_func = cuda_func
        self.cuda_atomic = cuda_atomic

        # parameter of the atomic system
        self.atom_xyz = None
        self.atom_index = None
        self.atom_type = None

        # grid points
        self.x = None
        self.y = None
        self.z = None

        # grids for calculation of atomic densities
        self.xgrid = None
        self.ygrid = None
        self.zgrid = None

        # dictionaries of atomic densities
        self.atdens = {}

        # conversion from boh to angs for VMD visualization
        self.bohr2ang = 0.52918

        # contact distance to locate the interface
        self.contact_distance = contact_distance

        # progress bar
        self.local_tqdm = lambda x: tqdm(x) if prog_bar else x
        self.time = time

        # if we already have an output containing the grid
        # we update the existing features
        _update_ = False
        if self.mol_basename + '/grid_points/x' in self.hdf5:
            _update_ = True

        if _update_:
            logif(f'\n=Updating grid data for {self.mol_basename}.',
                  self.time)
            self.update_feature()
        else:
            logif(f'\n= Creating grid and grid data for {self.mol_basename}.',
                  self.time)
            self.create_new_data()

    ################################################################

[docs]    def create_new_data(self):
        """Create new feature for a given complex."""

        # get the position/atom type .. of the complex
        self.read_pdb()

        # get the contact atoms and interface center
        self.get_contact_center()

        # define the grid
        self.define_grid_points()

        # save the grid points
        self.export_grid_points()

        # map the features
        self.add_all_features()

        # if we wnat the atomic densisties
        self.add_all_atomic_densities()

        # cloe the db file
        self.sqldb._close()

    ################################################################

[docs]    def update_feature(self):
        """Update existing feature in a complex."""

        # get the position/atom type .. of the complex
        # get self.sqldb
        self.read_pdb()

        # read the grid from the hdf5
        grid = self.hdf5.get(self.mol_basename + '/grid_points/')
        self.x, self.y, self.z = grid['x'][()], grid['y'][()], grid['z'][()]

        # create the grid
        self.ygrid, self.xgrid, self.zgrid = np.meshgrid(
            self.y, self.x, self.z)

        # set the resolution/dimension
        self.npts = np.array([len(self.x), len(self.y), len(self.z)])
        self.res = np.array(
            [self.x[1] - self.x[0], self.y[1] - self.y[0], self.z[1] - self.z[0]])

        # map the features
        self.add_all_features()

        # if we want the atomic densisties
        self.add_all_atomic_densities()

        # cloe the db file
        self.sqldb._close()

    ################################################################

[docs]    def read_pdb(self):
        """Create a sql databse for the pdb."""

        self.sqldb = pdb2sql.interface(self.molgrp['complex'][()])

    # get the contact atoms and interface center
[docs]    def get_contact_center(self):
        """Get the center of conact atoms."""

        contact_atoms = self.sqldb.get_contact_atoms(
            cutoff=self.contact_distance, chain1=self.chain1, chain2=self.chain2)

        tmp = []
        for i in contact_atoms.values():
            tmp.extend(i)
        contact_atoms = list(set(tmp))

        # get interface center
        self.center_contact = np.mean(
            np.array(self.sqldb.get('x,y,z', rowID=contact_atoms)), 0)

    ################################################################
    # shortcut to add all the feature a
    # and atomic densities in just one line
    ################################################################

    # add all the residue features to the data

[docs]    def add_all_features(self):
        """Add all the features toa given molecule."""

        # map the features
        if self.feature is not None:

            # map the residue features
            dict_data = self.map_features(self.feature)

            # save to hdf5 if specfied
            t0 = time()
            logif('-- Save Features to HDF5', self.time)
            self.hdf5_grid_data(dict_data, 'Feature_%s' % (self.feature_mode))
            logif('      Total %f ms' % ((time() - t0) * 1000), self.time)

    # add all the atomic densities to the data

[docs]    def add_all_atomic_densities(self):
        """Add all atomic densities."""

        # if we wnat the atomic densisties
        if self.atomic_densities is not None:

            # compute the atomic densities
            self.map_atomic_densities()

            # save to hdf5
            t0 = time()
            logif('-- Save Atomic Densities to HDF5', self.time)
            self.hdf5_grid_data(self.atdens, 'AtomicDensities_%s' %
                                (self.atomic_densities_mode))
            logif('      Total %f ms' % ((time() - t0) * 1000), self.time)

    ################################################################
    # define the grid points
    # there is an issue maybe with the ordering
    # In order to visualize the data in VMD the Y and X axis must be inverted ...
    # I keep it like that for now as it should not matter for the CNN
    # and maybe we don't need atomic denisties as features
    ################################################################

[docs]    def define_grid_points(self):
        """Define the grid points."""

        logif('-- Define %dx%dx%d grid ' %
              (self.npts[0], self.npts[1], self.npts[2]), self.time)
        logif('-- Resolution of %1.2fx%1.2fx%1.2f Angs' %
              (self.res[0], self.res[1], self.res[2]), self.time)

        halfdim = 0.5 * (self.npts * self.res)
        center = self.center_contact

        low_lim = center - halfdim
        hgh_lim = low_lim + self.res * (np.array(self.npts) - 1)

        self.x = np.linspace(low_lim[0], hgh_lim[0], self.npts[0])
        self.y = np.linspace(low_lim[1], hgh_lim[1], self.npts[1])
        self.z = np.linspace(low_lim[2], hgh_lim[2], self.npts[2])

        # there is something fishy about the meshgrid 3d
        # the axis are a bit screwy ....
        # i dont quite get why the ordering is like that
        self.ygrid, self.xgrid, self.zgrid = np.meshgrid(
            self.y, self.x, self.z)

    ################################################################
    # Atomic densities
    # as defined in the paper about ligand in protein
    ################################################################

    # compute all the atomic densities data
[docs]    def map_atomic_densities(self, only_contact=True):
        """Map the atomic densities to the grid.

        Args:
            only_contact(bool, optional): Map only the contact atoms

        Raises:
            ImportError: Description
        """
        mode = self.atomic_densities_mode
        logif('-- Map atomic densities on %dx%dx%d grid (mode=%s)' %
              (self.npts[0], self.npts[1], self.npts[2], mode), self.time)

        # prepare the cuda memory
        if self.cuda:  # pragma: no cover

            # try to import pycuda
            try:
                from pycuda import driver, compiler, gpuarray, tools
                import pycuda.autoinit
            except BaseException:
                raise ImportError("Error when importing pyCuda in GridTools")

            # book mem on the gpu
            x_gpu = gpuarray.to_gpu(self.x.astype(np.float32))
            y_gpu = gpuarray.to_gpu(self.y.astype(np.float32))
            z_gpu = gpuarray.to_gpu(self.z.astype(np.float32))
            grid_gpu = gpuarray.zeros(self.npts, np.float32)

        # get the contact atoms
        if only_contact:
            index = self.sqldb.get_contact_atoms(cutoff=self.contact_distance,
                chain1=self.chain1, chain2=self.chain2)
        else:
            index = {self.chain1: self.sqldb.get('rowID', chainID=self.chain1),
                     self.chain2: self.sqldb.get('rowID', chainID=self.chain2)}

        # loop over all the data we want
        for elementtype, vdw_rad in self.local_tqdm(
                self.atomic_densities.items()):

            t0 = time()

            xyzA = np.array(self.sqldb.get(
                'x,y,z', rowID=index[self.chain1], element=elementtype))
            xyzB = np.array(self.sqldb.get(
                'x,y,z', rowID=index[self.chain2], element=elementtype))

            tprocess = time() - t0

            t0 = time()
            # if we use CUDA
            if self.cuda:  # pragma: no cover

                # reset the grid
                grid_gpu *= 0

                # get the atomic densities of chain A
                for pos in xyzA:
                    x0, y0, z0 = pos.astype(np.float32)
                    vdw = np.float32(vdw_rad)
                    self.cuda_atomic(
                        vdw, x0, y0, z0, x_gpu, y_gpu, z_gpu, grid_gpu, block=tuple(
                            self.gpu_block), grid=tuple(
                            self.gpu_grid))
                    atdensA = grid_gpu.get()

                # reset the grid
                grid_gpu *= 0

                # get the atomic densities of chain B
                for pos in xyzB:
                    x0, y0, z0 = pos.astype(np.float32)
                    vdw = np.float32(vdw_rad)
                    self.cuda_atomic(
                        vdw, x0, y0, z0, x_gpu, y_gpu, z_gpu, grid_gpu, block=tuple(
                            self.gpu_block), grid=tuple(
                            self.gpu_grid))
                    atdensB = grid_gpu.get()

            # if we don't use CUDA
            else:

                # init the grid
                atdensA = np.zeros(self.npts)
                atdensB = np.zeros(self.npts)

                # run on the atoms
                for pos in xyzA:
                    atdensA += self.densgrid(pos, vdw_rad)

                # run on the atoms
                for pos in xyzB:
                    atdensB += self.densgrid(pos, vdw_rad)

            # create the final grid: A - B
            if mode == 'diff':
                self.atdens[elementtype] = atdensA - atdensB

            # create the final grid: A + B
            elif mode == 'sum':
                self.atdens[elementtype] = atdensA + atdensB

            # create the final grid: A and B
            elif mode == 'ind':
                self.atdens[elementtype + '_chain1'] = atdensA
                self.atdens[elementtype + '_chain2'] = atdensB
            else:
                raise ValueError(f'Atomic density mode {mode} not recognized')

            tgrid = time() - t0
            logif('     Process time %f ms' % (tprocess * 1000), self.time)
            logif('     Grid    time %f ms' % (tgrid * 1000), self.time)

    # compute the atomic denisties on the grid
[docs]    def densgrid(self, center, vdw_radius):
        """Function to map individual atomic density on the grid.

        The formula is equation (1) of the Koes paper
        Protein-Ligand Scoring with Convolutional NN Arxiv:1612.02751v1

        Args:
            center (list(float)): position of the atoms
            vdw_radius (float): vdw radius of the atom

        Returns:
            TYPE: np.array (mapped density)
        """

        x0, y0, z0 = center
        dd = np.sqrt((self.xgrid - x0)**2
                     + (self.ygrid - y0)**2
                     + (self.zgrid - z0)**2)

        dgrid = np.zeros(self.npts)

        index_shortd = dd < vdw_radius
        index_longd = (dd >= vdw_radius) & (dd < 1.5 * vdw_radius)
        dgrid[index_shortd] = np.exp(-2 * dd[index_shortd]**2 / vdw_radius**2)
        dgrid[index_longd] = 4. / np.e**2 / vdw_radius**2 * dd[index_longd]**2 \
            - 12. / np.e**2 / vdw_radius * dd[index_longd] + 9. / np.e**2
        return dgrid

    ################################################################
    # Residue or Atomic features
    # read the file provided in input
    # and map it on the grid
    ################################################################

    # map residue a feature on the grid
[docs]    def map_features(self, featlist, transform=None):
        """Map individual feature to the grid.

        For residue based feature the feature file must be of the format
        chainID residue_name(3-letter)  residue_number [values]

        For atom based feature it must be
        chainID residue_name(3-letter)  residue_number atome_name [values]

        Args:
            featlist (list(str)): list of features to be mapped
            transform (callable, optional): transformation of the feature (?)

        Returns:
            np.array: Mapped features

        Raises:
            ImportError: Description
            ValueError: Description
        """

        # declare the total dictionary
        dict_data = {}

        # prepare the cuda memory
        if self.cuda:  # pragma: no cover

            # try to import pycuda
            try:
                from pycuda import driver, compiler, gpuarray, tools
                import pycuda.autoinit
            except BaseException:
                raise ImportError("Error when importing pyCuda in GridTools")

            # book mem on the gpu
            x_gpu = gpuarray.to_gpu(self.x.astype(np.float32))
            y_gpu = gpuarray.to_gpu(self.y.astype(np.float32))
            z_gpu = gpuarray.to_gpu(self.z.astype(np.float32))
            grid_gpu = gpuarray.zeros(self.npts, np.float32)

        # loop over all the features required
        for feature_name in featlist:

            logif('-- Map %s on %dx%dx%d grid '
                  % (feature_name, self.npts[0],
                     self.npts[1], self.npts[2]), self.time)

            # read the data
            featgrp = self.molgrp['features']
            if feature_name in featgrp.keys():
                data = featgrp[feature_name][:]
            else:
                print('Error Feature not found \n\tPossible features: ' +
                      ' | '.join(featgrp.keys()))
                raise ValueError(
                    'feature %s  not found in the file' % (feature_name))

            # detect if we have a xyz format
            # or a byte format
            # define how many elements (ntext)
            # are present before the feature values
            # xyz: 4 (chain x y z)
            # byte - residue: 3 (chain resSeq resName)
            # byte - atomic: 4 (chain resSeq resName name)

            # all the format are now xyz
            feature_type = 'xyz'
            ntext = 4

            # test if the transform is callable
            # and test it on the first line of the data
            # get the data on the first line
            if data.shape[0] != 0:

                data_test = data[0, ntext:]

                # define the length of the output
                if transform is None:
                    nFeat = len(data_test)
                elif callable(transform):
                    nFeat = len(transform(data_test))
                else:
                    print('Error transform in map_feature must be callable')
                    return None
            else:
                nFeat = 1

            # declare the dict
            # that will in fine holds all the data
            if nFeat == 1:
                if self.feature_mode == 'ind':
                    dict_data[feature_name + '_chain1'] = np.zeros(self.npts)
                    dict_data[feature_name + '_chain2'] = np.zeros(self.npts)
                else:
                    dict_data[feature_name] = np.zeros(self.npts)
            else: # do we need that ?!
                for iF in range(nFeat):
                    if self.feature_mode == 'ind':
                        dict_data[feature_name + '_chain1_%03d' %
                                  iF] = np.zeros(self.npts)
                        dict_data[feature_name + '_chain2_%03d' %
                                  iF] = np.zeros(self.npts)
                    else:
                        dict_data[feature_name + '_%03d' %
                                  iF] = np.zeros(self.npts)

            # skip empty features
            if data.shape[0] == 0:
                continue

            # rest the grid and get the x y z values
            if self.cuda:  # pragma: no cover
                grid_gpu *= 0

            # timing
            tprocess = 0
            tgrid = 0

            # map all the features
            for line in self.local_tqdm(data):
                t0 = time()
                # if the feature was written with xyz data
                # i.e chain x y z values
                if feature_type == 'xyz':

                    chain = [self.chain1, self.chain2][int(line[0])]
                    pos = line[1:ntext]
                    feat_values = np.array(line[ntext:])

                # if the feature was written with bytes
                # i.e chain resSeq resName (name) values
                # TODO deprecated?
                else:

                    # decode the line
                    line = line.decode('utf-8').split()

                    # get the position of the resnumber
                    chain, resName, resNum = line[0], line[1], line[2]

                    # get the atom name for atomic data
                    if feature_type == 'atomic':
                        atName = line[3]

                    # get the position
                    # TODO deprecated? the definition of center postion is
                    #  different from that in features e.g. BSA.py
                    if feature_type == 'residue':
                        pos = np.mean(np.array(self.sqldb.get(
                            'x,y,z', chainID=chain, resSeq=resNum)), 0)
                        sql_resName = list(set(self.sqldb.get(
                            'resName', chainID=chain, resSeq=resNum)))
                    else:
                        pos = np.array(self.sqldb.get(
                            'x,y,z', chainID=chain,
                            resSeq=resNum, name=atName))[0]
                        sql_resName = list(set(self.sqldb.get(
                            'resName', chainID=chain,
                            resSeq=resNum, name=atName)))

                    # check if  the resname correspond
                    if len(sql_resName) == 0:
                        print('Error: SQL query returned empty list')
                        print('Tip  : Make sure the parameter file ')
                        print('Tip  : corresponds to the pdb file %s' %
                              (self.sqldb.pdbfile))
                        sys.exit()
                    else:
                        sql_resName = sql_resName[0]

                    if resName != sql_resName:
                        print('Residue Name Error in the Feature file ')
                        print('Feature File: chain %s resNum %s  resName %s' %
                              (chain, resNum, resName))
                        print('SQL data    : chain %s resNum %s  resName %s' %
                              (chain, resNum, sql_resName))
                        sys.exit()

                    # get the values of the feature(s) for thsi residue
                    feat_values = np.array(list(map(float, line[ntext:])))

                # postporcess the data
                if callable(transform):
                    feat_values = transform(feat_values)

                # handle the mode
                fname = feature_name
                if self.feature_mode == "diff":
                    coeff = {self.chain1: 1, self.chain2: -1}[chain]
                else:
                    coeff = 1
                if self.feature_mode == "ind":
                    chain_name = {self.chain1: '1', self.chain2: '2'}[chain]
                    fname = feature_name + "_chain" + chain_name
                tprocess += time() - t0

                t0 = time()
                # map this feature(s) on the grid(s)
                if not self.cuda:
                    if nFeat == 1:
                        dict_data[fname] += coeff * \
                            self.featgrid(pos, feat_values)
                    else:
                        for iF in range(nFeat):
                            dict_data[fname + '_%03d' % iF] += coeff * \
                                self.featgrid(pos, feat_values[iF])

                # try to use cuda to speed it up
                else:  # pragma: no cover
                    if nFeat == 1:
                        x0, y0, z0 = pos.astype(np.float32)
                        alpha = np.float32(coeff * feat_values)
                        self.cuda_func(alpha,
                                       x0, y0, z0,
                                       x_gpu, y_gpu, z_gpu,
                                       grid_gpu,
                                       block=tuple(self.gpu_block),
                                       grid=tuple(self.gpu_grid))
                    else:
                        raise ValueError(
                            'CUDA only possible for single-valued features')

                tgrid += time() - t0

            if self.cuda:  # pragma: no cover
                dict_data[fname] = grid_gpu.get()
                driver.Context.synchronize()

            logif('     Process time %f ms' % (tprocess * 1000), self.time)
            logif('     Grid    time %f ms' % (tgrid * 1000), self.time)

        return dict_data

    # compute the a given feature on the grid
[docs]    def featgrid(self, center, value, type_='fast_gaussian'):
        """Map an individual feature (atomic or residue) on the grid.

        Args:
            center (list(float)): position of the feature center
            value (float): value of the feature
            type_ (str, optional): method to map

        Returns:
            np.array: Mapped feature

        Raises:
            ValueError: Description
        """

        # shortcut for th center
        x0, y0, z0 = center
        sigma = np.sqrt(1. / 2)
        beta = 0.5 / (sigma**2)

        # simple Gaussian
        if type_ == 'gaussian':
            dd = np.sqrt((self.xgrid - x0)**2
                         + (self.ygrid - y0)**2
                         + (self.zgrid - z0)**2)
            dd = value * np.exp(-beta * dd)
            return dd

        # fast gaussian
        elif type_ == 'fast_gaussian':

            cutoff = 5. * beta

            dd = np.sqrt((self.xgrid - x0)**2
                         + (self.ygrid - y0)**2
                         + (self.zgrid - z0)**2)
            dgrid = np.zeros(self.npts)

            dgrid[dd < cutoff] = value * np.exp(-beta * dd[dd < cutoff])

            return dgrid

        # Bsline
        elif type_ == 'bspline':
            spline_order = 4
            spl = bspline((self.xgrid - x0) / self.res[0], spline_order) \
                * bspline((self.ygrid - y0) / self.res[1], spline_order) \
                * bspline((self.zgrid - z0) / self.res[2], spline_order)
            dd = value * spl
            return dd

        # nearest neighbours
        elif type_ == 'nearest':

            # distances
            dx = np.abs(self.x - x0)
            dy = np.abs(self.y - y0)
            dz = np.abs(self.z - z0)

            # index
            indx = np.argsort(dx)[:2]
            indy = np.argsort(dy)[:2]
            indz = np.argsort(dz)[:2]

            # weight
            wx = dx[indx]
            wx /= np.sum(wx)

            wy = dy[indy]
            wy /= np.sum(wy)

            wz = dx[indz]
            wz /= np.sum(wz)

            # define the points
            indexes = [indx, indy, indz]
            points = list(itertools.product(*indexes))

            # define the weight
            W = [wx, wy, wz]
            W = list(itertools.product(*W))
            W = [np.sum(iw) for iw in W]

            # put that on the grid
            dgrid = np.zeros(self.npts)

            for w, pt in zip(W, points):
                dgrid[pt[0], pt[1], pt[2]] = w * value

            return dgrid

        # default
        else:
            raise ValueError(f'Options not recognized for the grid {type_}')

    ################################################################
    # export the grid points for external calculations of some
    # features. For example the electrostatic potential etc ...
    ################################################################

[docs]    def export_grid_points(self):
        """export the grid points to the hdf5 file."""

        grd = self.hdf5.require_group(self.mol_basename + '/grid_points')
        grd.create_dataset('x', data=self.x)
        grd.create_dataset('y', data=self.y)
        grd.create_dataset('z', data=self.z)

        # add center or update it when the old value is different
        if 'center' not in grd:
            grd.create_dataset('center', data=self.center_contact)
        elif not all(grd['center'][()] == self.center_contact):
            grd['center'][...] = self.center_contact

    # save the data in the hdf5 file

[docs]    def hdf5_grid_data(self, dict_data, data_name):
        """Save the mapped feature to the hdf5 file.

        Args:
            dict_data(dict): feature values stored as a dict
            data_name(str): feature name
        """
        # get the group og the feature
        feat_group = self.hdf5.require_group(
            self.mol_basename + '/mapped_features/' + data_name)

        # gothrough all the feature elements
        for key, value in dict_data.items():

            # remove only subgroup
            if key in feat_group:
                del feat_group[key]

            # create new one
            sub_feat_group = feat_group.create_group(key)

            # try  a sparse representation
            if self.try_sparse:

                # check if the grid is sparse or not
                t0 = time()
                spg = sparse.FLANgrid()
                spg.from_dense(value, beta=1E-2)
                if self.time:
                    print('      Sparsing time %f ms' % ((time() - t0) * 1000))

                # if we have a sparse matrix
                if spg.sparse:
                    sub_feat_group.attrs['sparse'] = spg.sparse
                    sub_feat_group.attrs['type'] = 'sparse_matrix'
                    sub_feat_group.create_dataset(
                        'index', data=spg.index,
                        compression='gzip', compression_opts=9)
                    sub_feat_group.create_dataset(
                        'value', data=spg.value,
                        compression='gzip', compression_opts=9)

                else:
                    sub_feat_group.attrs['sparse'] = spg.sparse
                    sub_feat_group.attrs['type'] = 'dense_matrix'
                    sub_feat_group.create_dataset(
                        'value', data=spg.value,
                        compression='gzip', compression_opts=9)

            else:
                sub_feat_group.attrs['sparse'] = False
                sub_feat_group.attrs['type'] = 'dense_matrix'
                sub_feat_group.create_dataset(
                    'value', data=value,
                    compression='gzip', compression_opts=9)

########################################################################