OB.DAAC Logo
NASA Logo
Ocean Color Science Software

ocssw V2022
__init__.py
Go to the documentation of this file.
1 from sklearn import preprocessing, decomposition
2 
3 from ..utils import using_feature
4 from ..meta import get_sensor_bands
5 
6 from ._CustomTransformer import _CustomTransformer
7 from .AUCTransformer import AUCTransformer
8 from .BaggingColumnTransformer import BaggingColumnTransformer
9 from .ColumnSelectionTransformer import ColumnSelectionTransformer
10 from .DatasetMembershipTransformer import DatasetMembershipTransformer
11 from .ExclusionTransformer import ExclusionTransformer
12 from .IdentityTransformer import IdentityTransformer
13 from .KBestTransformer import KBestTransformer
14 from .LogTransformer import LogTransformer
15 from .NegLogTransformer import NegLogTransformer
16 from .RatioTransformer import RatioTransformer
17 from .TanhTransformer import TanhTransformer
18 
19 
20 def generate_scalers(args, x_train=None, x_test=None, column_bagging=False):
21  ''' Add scalers to the args object based on the contained parameter settings '''
22  wavelengths = get_sensor_bands(args.sensor, args)
23  serialize = lambda scaler, args=[], kwargs={}: (scaler, args, kwargs)
24  setattr(args, 'wavelengths', wavelengths)
25 
26  # Note that the scaler list is applied in order, e.g. MinMaxScaler( LogTransformer(y) )
27  args.x_scalers = [
28  serialize(preprocessing.RobustScaler),
29  # serialize(preprocessing.MinMaxScaler, [(-1, 1)]),
30  ]
31  args.y_scalers = [
32  serialize(LogTransformer),
33  serialize(preprocessing.MinMaxScaler, [(-1, 1)]),
34  ]
35 
36  # We only want bagging to be applied to the columns if there are a large number of extra features (e.g. ancillary features included)
37  many_features = column_bagging and any(x is not None and (x.shape[1]-len(wavelengths)) > 15 for x in [x_train, x_test])
38 
39  # Add bagging to the columns (use a random subset of columns, excluding the first <n_wavelengths> columns from the process)
40  if column_bagging and using_feature(args, 'bagging') and (using_feature(args, 'ratio') or many_features):
41  n_extra = 0 if not using_feature(args, 'ratio') else RatioTransformer(wavelengths, args.sensor).get_n_features() # Number of ratio features added
42  args.x_scalers = [
43  serialize(BaggingColumnTransformer, [len(wavelengths)], {'n_extra':n_extra, 'seed': args.seed}),
44  ] + args.x_scalers
45 
46  # Feature selection via mutual information
47  if using_feature(args, 'kbest'):
48  args.x_scalers = [
49  serialize(KBestTransformer, [args.use_kbest]),
50  ] + args.x_scalers
51 
52  # Add additional features to the inputs
53  if using_feature(args, 'ratio') or using_feature(args, 'all_ratio'):
54  kwargs = {}
55  if using_feature(args, 'excl_Rrs'): kwargs.update({'excl_Rrs' : True})
56  if using_feature(args, 'all_ratio'): kwargs.update({'all_ratio' : True})
57  args.x_scalers = [
58  serialize(RatioTransformer, [list(wavelengths), args.sensor], kwargs),
59  serialize(decomposition.TruncatedSVD, [], {'n_components' : 15}),#0.999, 'whiten': False}),#, 'svd_solver': 'randomized'}),
60  # serialize(decomposition.PCA, [], {'n_components' : 15}),#0.999, 'whiten': False}),#, 'svd_solver': 'randomized'}),
61  # serialize(decomposition.FastICA, [], {'n_components' : 15}),#, 'svd_solver': 'randomized'}),
62  ] + args.x_scalers
63 
64  # Normalize input features using AUC
65  if using_feature(args, 'auc'):
66  args.x_scalers = [
67  serialize(AUCTransformer, [list(wavelengths)]),
68  ] + args.x_scalers
69 
70 
71 
73  ''' Apply multiple transformers seamlessly '''
74 
75  def __init__(self, scalers=[]):
76  self.scalers = scalers
77 
78  def _fit(self, X, *args, **kwargs):
79  for scaler in self.scalers:
80  X = scaler.fit_transform(X, *args, **kwargs)
81  return self
82 
83  def _transform(self, X, *args, **kwargs):
84  for scaler in self.scalers:
85  X = scaler.transform(X, *args, **kwargs)
86  return X
87 
88  def _inverse_transform(self, X, *args, **kwargs):
89  for scaler in self.scalers[::-1]:
90  X = scaler.inverse_transform(X, *args, **kwargs)
91  return X
92 
93  def fit_transform(self, X, *args, **kwargs):
94  # Manually apply a fit_transform to avoid transforming twice
95  for scaler in self.scalers:
96  X = scaler.fit_transform(X, *args, **kwargs)
97  return X
98 
99 
100 
101 
102 
list(APPEND LIBS ${PGSTK_LIBRARIES}) add_executable(atteph_info_modis atteph_info_modis.c) target_link_libraries(atteph_info_modis $
Definition: CMakeLists.txt:7
def fit_transform(self, X, *args, **kwargs)
Definition: __init__.py:93
def get_sensor_bands(sensor, args=None)
Definition: meta.py:114
def using_feature(args, flag)
Definition: utils.py:158
def __init__(self, scalers=[])
Definition: __init__.py:75
def generate_scalers(args, x_train=None, x_test=None, column_bagging=False)
Definition: __init__.py:20