OB.DAAC Logo
NASA Logo
Ocean Color Science Software

ocssw V2022
KBestTransformer.py
Go to the documentation of this file.
1 from ._CustomTransformer import _CustomTransformer
2 
3 from sklearn.feature_selection import SelectKBest, mutual_info_regression
4 import numpy as np
5 
6 
8  ''' Select the top K features, based on mutual information with the target variable.
9  When multiple target variables exist, each target variable receives an even share
10  of the features (i.e. 3 targets, K=6 -> K=2 for each target) '''
11  def __init__(self, n_features, *args, **kwargs):
12  self.n_features = n_features
13 
14  def _fit(self, X, y, *args, **kwargs):
15  self.k_transformer = []
16  for i in range(y.shape[-1]):
17  valid = np.isfinite(y[..., i])
18  self.k_transformer.append( SelectKBest(mutual_info_regression, k=self.n_features // y.shape[-1]) )
19  self.k_transformer[-1].fit(X[valid], y[valid, i])
20  return self
21 
22  def _transform(self, X, *args, **kwargs):
23  return X[..., sorted(np.unique([np.argsort(t.scores_)[-t.k:] for t in self.k_transformer]))]
def __init__(self, n_features, *args, **kwargs)