Source code for mindpype.kernels.feature_selection

from ..core import MPEnums
from ..kernel import Kernel
from ..graph import Node, Parameter
from ..containers import Tensor
from .kernel_utils import extract_init_inputs

from sklearn.feature_selection import SelectKBest
import numpy as np

[docs] class FeatureSelectionKernel(Kernel): """ Performs feature selection using f_classif method from sklearn.feature_selection to determine the most relevent features from the data. .. note:: This kernel utilizes the :class:`SelectKBest <sklearn:sklearn.feature_selection.SelectKBest>` class from the :mod:`sklearn <sklearn:sklearn>` package. Parameters ---------- graph : Graph Graph that the kernel should be added to inA : Tensor Input data (n_samples, n_features) outA : Tensor Output data (n_samples, n_selected_features) initialization_data : Tensor Initialization data labels : Tensor Initialization data labels (n_samples, ) """ def __init__(self, graph, inA, outA, k=10, initialization_data=None, labels=None): """ Init """ super().__init__('FeatureSelection', MPEnums.INIT_FROM_DATA, graph) self.inputs = [inA] self.outputs = [outA] self._initialized = False self._k = k if initialization_data is not None: self.init_inputs = [initialization_data] if labels is not None: self.init_input_labels = labels def _initialize(self, init_inputs, init_outputs, labels): """ Initialize feature selection kernel Parameters ---------- init_inputs: Tensor Input data init_outputs: Tensor Output data labels : Tensor Initialization data labels (n_samples, ) """ # check that the input init data is in the correct type init_in = init_inputs[0] accepted_inputs = (MPEnums.TENSOR,MPEnums.ARRAY,MPEnums.CIRCLE_BUFFER) for init_obj in (init_in,labels): if init_obj.mp_type not in accepted_inputs: raise TypeError('Initialization data must be a tensor or array of tensors') # extract the initialization data from a potentially nested array of tensors X = extract_init_inputs(init_in) y = extract_init_inputs(labels) # ensure the shapes are valid if len(X.shape) == 3: index1, index2, index3 = X.shape X = np.reshape(X, (index1, index2 * index3)) if len(y.shape) == 2: y = np.squeeze(y) # initialize model self._model = SelectKBest(k=self._k) self._model.fit(X, y) # set the initialization output if init_outputs[0] is not None: init_tensor = Tensor.create_from_data(self.session, X) # adjust output shapes if necessary if self.init_outputs[0] is not None and self.init_outputs[0].virtual: self.init_outputs[0].shape = (X.shape[0], self._k) self._process_data([init_tensor], self.init_outputs) self._initialized = True def _verify(self): """similar verification process to individual classifier kernels""" # inputs must be a tensor or array of tensors accepted_input_types = (MPEnums.TENSOR, MPEnums.ARRAY, MPEnums.CIRCLE_BUFFER) d_in = self.inputs[0] if d_in.mp_type not in accepted_input_types: raise TypeError('Input data must be a tensor or array of tensors') # if input is an array, check that its elements are tensors if (d_in.mp_type != MPEnums.TENSOR): e = d_in.get_element(0) if e.mp_type != MPEnums.TENSOR: raise TypeError('Input data must be a tensor or array of tensors') def _process_data(self, inputs, outputs): """ Process data according to outlined kernel function Parameters ---------- inputs: list of Tensors Input data container, list of length 1 outputs: list of Tensors Output data container, list of length 1 """ inA = inputs[0] outA = outputs[0] # convert input to tensor if needed if inA.mp_type != MPEnums.TENSOR: inA = inA.to_tensor() # extract and reshape data if len(inA.shape) == 1: input_data = np.expand_dims(inA.data,axis=0) else: input_data = inA.data outA.data = self._model.transform(input_data)
[docs] @classmethod def add_to_graph(cls, graph, inA, outA, k=10, init_inputs = None, labels = None): """ Factory method to create a feature selection kernel and add it to a graph as a generic node object Parameters ---------- graph : Graph Graph that the kernel should be added to inA : Tensor Input data (n_channels, n_samples) outA : Scalar Output data init_inputs : Tensor Initialization data for the graph labels : Tensor Labels corresponding to initialization data class labels """ # create the kernel object c = cls(graph, inA, outA, k, init_inputs, labels) params = (Parameter(inA, MPEnums.INPUT), Parameter(outA, MPEnums.OUTPUT)) node = Node(graph, c, params) graph.add_node(node) return node