Source code for mindpype.kernels.feature_selection

from ..core import MPEnums
from ..kernel import Kernel
from ..graph import Node, Parameter
from ..containers import Tensor
from .kernel_utils import extract_init_inputs

from sklearn.feature_selection import SelectKBest
import numpy as np


[docs]
class FeatureSelectionKernel(Kernel):
    """
    Performs feature selection using f_classif method from sklearn.feature_selection
    to determine the most relevent features from the data.
    
    .. note:: 
        This kernel utilizes the 
        :class:`SelectKBest <sklearn:sklearn.feature_selection.SelectKBest>` 
        class from the :mod:`sklearn <sklearn:sklearn>` package.

     

    Parameters
    ----------

    graph : Graph
        Graph that the kernel should be added to

    inA : Tensor
        Input data (n_samples, n_features)

    outA : Tensor
        Output data (n_samples, n_selected_features)

    initialization_data : Tensor
        Initialization data

    labels : Tensor
        Initialization data labels (n_samples, )
    """

    def __init__(self, graph, inA, outA, k=10, initialization_data=None, labels=None):
        """ Init """
        super().__init__('FeatureSelection', MPEnums.INIT_FROM_DATA, graph)
        self.inputs = [inA]
        self.outputs = [outA]

        self._initialized = False
        self._k = k

        if initialization_data is not None:
            self.init_inputs = [initialization_data]

        if labels is not None:
            self.init_input_labels = labels


    def _initialize(self, init_inputs, init_outputs, labels):
        """
        Initialize feature selection kernel

        Parameters
        ----------

        init_inputs: Tensor 
            Input data
        
        init_outputs: Tensor
            Output data
        
        labels : Tensor
            Initialization data labels (n_samples, )
        """
        # check that the input init data is in the correct type
        init_in = init_inputs[0]
        accepted_inputs = (MPEnums.TENSOR,MPEnums.ARRAY,MPEnums.CIRCLE_BUFFER)

        for init_obj in (init_in,labels):
            if init_obj.mp_type not in accepted_inputs:
                raise TypeError('Initialization data must be a tensor or array of tensors')

        # extract the initialization data from a potentially nested array of tensors
        X = extract_init_inputs(init_in)
        y = extract_init_inputs(labels)

        # ensure the shapes are valid
        if len(X.shape) == 3:
            index1, index2, index3 = X.shape
            X = np.reshape(X, (index1, index2 * index3))

        if len(y.shape) == 2:
            y = np.squeeze(y)

        # initialize model
        self._model = SelectKBest(k=self._k)
        self._model.fit(X, y)

        # set the initialization output
        if init_outputs[0] is not None:
            init_tensor = Tensor.create_from_data(self.session, X)

            # adjust output shapes if necessary
            if self.init_outputs[0] is not None and self.init_outputs[0].virtual:
                self.init_outputs[0].shape = (X.shape[0], self._k)

            self._process_data([init_tensor],
                               self.init_outputs)

        self._initialized = True


    def _verify(self):
        """similar verification process to individual classifier kernels"""

        # inputs must be a tensor or array of tensors
        accepted_input_types = (MPEnums.TENSOR,
                                MPEnums.ARRAY,
                                MPEnums.CIRCLE_BUFFER)

        d_in = self.inputs[0]
        if d_in.mp_type not in accepted_input_types:
            raise TypeError('Input data must be a tensor or array of tensors')

        # if input is an array, check that its elements are tensors
        if (d_in.mp_type != MPEnums.TENSOR):
            e = d_in.get_element(0)
            if e.mp_type != MPEnums.TENSOR:
                raise TypeError('Input data must be a tensor or array of tensors')

    def _process_data(self, inputs, outputs):
        """
        Process data according to outlined kernel function

        Parameters
        ----------

        inputs: list of Tensors
            Input data container, list of length 1

        outputs: list of Tensors
            Output data container, list of length 1
        """
        inA = inputs[0]
        outA = outputs[0]

        # convert input to tensor if needed
        if inA.mp_type != MPEnums.TENSOR:
            inA = inA.to_tensor()

        # extract and reshape data
        if len(inA.shape) == 1:
            input_data = np.expand_dims(inA.data,axis=0)
        else:
            input_data = inA.data

        outA.data = self._model.transform(input_data)


[docs]
    @classmethod
    def add_to_graph(cls, graph, inA, outA, k=10, init_inputs = None, labels = None):
        """
        Factory method to create a feature selection kernel and add it to a graph as a generic node object

        Parameters
        ----------

        graph : Graph
            Graph that the kernel should be added to

        inA : Tensor
            Input data (n_channels, n_samples)

        outA : Scalar
            Output data

        init_inputs : Tensor
            Initialization data for the graph

        labels : Tensor
            Labels corresponding to initialization data class labels
        """

        # create the kernel object
        c = cls(graph, inA, outA, k, init_inputs, labels)

        params = (Parameter(inA, MPEnums.INPUT),
                  Parameter(outA, MPEnums.OUTPUT))

        node = Node(graph, c, params)

        graph.add_node(node)

        return node