Due to the lapse in federal government funding, NASA is not updating this website. We sincerely regret this inconvenience.
NASA Logo
Ocean Color Science Software

ocssw V2022
RatioTransformer.py
Go to the documentation of this file.
1 from ._CustomTransformer import _CustomTransformer
2 from .utils import spearmanr, get_unique_features
3 
4 from itertools import combinations
5 from functools import wraps, partial
6 from pathlib import Path
7 
8 import pandas as pd
9 import numpy as np
10 import math
11 
12 import pandas as pd
13 import numpy as np
14 
15 
16 def add_labels(n_bands, label_function):
17  ''' Decorator which adds labels to the RatioTransformer.labels
18  list, using the given label_function to combine lists of
19  wavelengths that are passed to the decorated function. '''
20  def decorator(function):
21  setattr(function, 'n_bands', n_bands)
22 
23  @wraps(function)
24  def wrapper(Rrs, labels, *args, **kwargs):
25  # Cast to a series to handle single values, and ensure they are strings
26  as_str_series = lambda v: pd.Series(v, dtype=str)
27  labels.extend( map(label_function, *map(as_str_series, args)) )
28  return function(Rrs, *args, **kwargs)
29  return wrapper
30  return decorator
31 
32 
33 #=========================================
34 # Below functions represent commonly used
35 # product formulations found in literature
36 #=========================================
37 
38 @add_labels(2, lambda W1, W2: f'{W2}/{W1}')
39 def BR2(Rrs, W1, W2):
40  ''' 2-Band ratio '''
41  return Rrs(W2) / Rrs(W1)
42 
43 
44 @add_labels(3, lambda W1, W2, W3: f'{W3}/{W1}-{W3}/{W2}')
45 def BR3(Rrs, W1, W2, W3):
46  ''' 3-Band ratio '''
47  return Rrs(W3)/Rrs(W1) - Rrs(W3)/Rrs(W2)
48 
49 
50 @add_labels(3, lambda W1, W2, W3: f'{W1}|{W2}|{W3}')
51 def LH(Rrs, W1, W2, W3):
52  ''' Line height '''
53  c = (W3 - W2) / (W3 - W1)
54  return Rrs(W2) - c*Rrs(W1) - (1-c)*Rrs(W3)
55 
56 
57 @add_labels(2, lambda W1, W2: f'{W2}-{W1}/{W2}+{W1}')
58 def ND(Rrs, W1, W2):
59  ''' Normalized difference '''
60  return (Rrs(W2)-Rrs(W1)) / (Rrs(W2)+Rrs(W1))
61 
62 
63 @add_labels(3, lambda W1, W2, W3: f'({W3}+{W1})/2-{W2}')
64 def AVG(Rrs, W1, W2, W3):
65  ''' Average difference '''
66  return (Rrs(W3)+Rrs(W1))/2 - Rrs(W2)
67 
68 
69 @add_labels(2, lambda W1, W2: f'peak|{W1}-{W2}')
70 def PEAK(Rrs, min_wvl, max_wvl, wavelengths=None):
71  ''' Peak location in a range of wavelengths '''
72  wave = np.array(wavelengths)
73  area = (wave[:, None] >= min_wvl) & (wave[:, None] <= max_wvl)
74  idxs = [Rrs( wave[a] ).argmax(axis=-1) for a in area.T]
75  peak = [wave[a][i] - wave[a].min() for a,i in zip(area.T, idxs)]
76  return np.stack(peak, axis=1)
77 
78 #=========================================
79 
80 
81 
83  ''' Add ratio features '''
84  functions = [BR2, BR3, LH, ND, AVG]
85 
86  def __init__(self, wavelengths, sensor, *args, excl_Rrs=False, all_ratio=False, cutoff=0.5, **kwargs):
87  self.wavelengths = list(wavelengths)
88  self.all_ratio = all_ratio
89  self.excl_Rrs = excl_Rrs
90  self.sensor = sensor
91  self.cutoff = cutoff
92 
93 
94  @staticmethod
95  def config_info(*args, **kwargs):
96  transformer = RatioTransformer(*args, **kwargs)
97  return transformer.get_n_features()
98 
99 
100  def get_n_features(self):
101  n_wavelengths = len(self.wavelengths)
102  n_combinations = lambda n, r, f=math.factorial: f(n) // f(r) // f(n-r)
103  function_names = ', '.join([f.__name__ for f in self.functions])
104  function_bands = [f.n_bands for f in self.functions]
105  total_number = sum(map(partial(n_combinations, n_wavelengths), function_bands))
106  return f'[{function_names}] applied to {n_wavelengths} wavelengths yields up to {total_number} features'
107 
108 
109  def _inverse_transform(self, X, *args, **kwargs):
110  if self.excl_Rrs: raise Exception('No inverse when Rrs is excluded from features')
111  return np.array(X)[..., :self.shape]
112 
113 
114  def _fit(self, X, y, *args, **kwargs):
115  from ..benchmarks.utils import has_band, closest_wavelength
116 
117  self.features = []
118  self.labels = []
119  self.shape = X.shape[-1]
120 
121  Rrs_df = pd.DataFrame(X, columns=self.wavelengths)
122  Rrs = lambda cols: Rrs_df[cols].values
123 
124  # Steps:
125  # 1. Enumerate all wavelength combinations for the selected feature functions
126  # 2. Calculate correlations between features and target variables, filtering those below cutoff
127  # 3. Filter remaining combinations by removing those which are too similar to others
128  # 4. Store the final remaining wavelength combinations for use in the _transform function
129  for function in self.functions:
130  wavelengths = list(combinations(self.wavelengths, function.n_bands))
131  values = function(Rrs, [], *map(np.array, zip(*wavelengths)))
132  correlation = [spearmanr(yy[np.isfinite(yy), None].T, values[np.isfinite(yy)].T) for yy in y.T]
133  mask = np.max(correlation, axis=0) > self.cutoff
134  mask[mask] &= get_unique_features(values[:, mask], threshold=0.05)
135 
136  mask_index = pd.MultiIndex.from_tuples(wavelengths)
137  mask_frame = pd.DataFrame(mask, index=mask_index)
138  wavelengths = mask_frame[mask_frame].dropna().index.to_frame()
139  self.features.append( (function, wavelengths.values.T) )
140 
141  # Add any additional manually specified features to the list
142  additional = [
143  (partial(PEAK, wavelengths=self.wavelengths), [
144  (450, 520), # Peak in blue
145  (550, 600), # Peak in green
146  (620, 670), # Peak in red
147  (680, 720), # Peak in nir
148  ])
149  ]
150 
151  # Ensure required wavelengths are available, and use the exact values available
152  for function, wavelengths in additional:
153  valid = lambda wvls: all([has_band(w, self.wavelengths) for w in wvls])
154  select_wvls = closest = np.array(wavelengths)[list(map(valid, wavelengths))]
155 
156  if len(select_wvls):
157  closest = closest_wavelength(select_wvls.flatten(), self.wavelengths)
158  wavelengths = closest.reshape(select_wvls.shape).T
159  self.features.append( (function, wavelengths) )
160 
161  # Run the transformation to add the labels being used
162  self._transform(X, labels=self.labels)
163  print(f'\nFit RatioTransformer with {len(self.labels)} features')
164 
165 
166  def _transform(self, X, *args, labels=None, **kwargs):
167  Rrs_df = pd.DataFrame(X, columns=self.wavelengths)
168  Rrs = lambda cols: Rrs_df[cols].values
169  x_new = [] if self.excl_Rrs else [Rrs_df.values]
170 
171  if labels is None:
172  labels = []
173 
174  if not self.excl_Rrs:
175  labels.extend( list(map(str, self.wavelengths)) )
176 
177  for function, wavelengths in self.features:
178  x_new.append( function(Rrs, labels, *wavelengths) )
179  x_new = np.concatenate(x_new, axis=-1)
180  x_new = np.maximum(-1e8, np.minimum(1e8, x_new))
181 
182  # Sanity checks
183  assert(x_new.shape[-1] == len(labels)), f'Mismatch between features and labels: {x_new.shape[-1]} vs {len(labels)}'
184  assert(len(self.labels)== len(labels)), f'Mismatch between old and new labels: {len(self.labels)} vs {len(labels)}'
185  return x_new
def add_labels(n_bands, label_function)
def has_band(w, waves, tol=5)
Definition: utils.py:30
def get_unique_features(features, threshold=0.025, chunksize=10, _bottom=False)
Definition: utils.py:7
double precision function f(R1)
Definition: tmd.lp.f:1454
def __init__(self, wavelengths, sensor, *args, excl_Rrs=False, all_ratio=False, cutoff=0.5, **kwargs)
list(APPEND LIBS ${NETCDF_LIBRARIES}) find_package(GSL REQUIRED) include_directories($
Definition: CMakeLists.txt:8
void print(std::ostream &stream, const char *format)
Definition: PrintDebug.hpp:38
def closest_wavelength(k, waves, validate=True, tol=5, squeeze=False)
Definition: utils.py:24
def PEAK(Rrs, min_wvl, max_wvl, wavelengths=None)
def spearmanr(x, y)
Definition: utils.py:58