OB.DAAC Logo
NASA Logo
Ocean Color Science Software

ocssw V2022
RatioTransformer.py
Go to the documentation of this file.
1 from ._CustomTransformer import _CustomTransformer
2 from .utils import spearmanr, get_unique_features
3 
4 from itertools import combinations
5 from functools import wraps, partial
6 from pathlib import Path
7 
8 import pandas as pd
9 import numpy as np
10 import math
11 
12 import pandas as pd
13 import numpy as np
14 
15 
16 def add_labels(n_bands, label_function):
17  ''' Decorator which adds labels to the RatioTransformer.labels
18  list, using the given label_function to combine lists of
19  wavelengths that are passed to the decorated function. '''
20  def decorator(function):
21  setattr(function, 'n_bands', n_bands)
22 
23  @wraps(function)
24  def wrapper(Rrs, labels, *args, **kwargs):
25  # Cast to a series to handle single values, and ensure they are strings
26  as_str_series = lambda v: pd.Series(v, dtype=str)
27  labels.extend( map(label_function, *map(as_str_series, args)) )
28  return function(Rrs, *args, **kwargs)
29  return wrapper
30  return decorator
31 
32 
33 #=========================================
34 # Below functions represent commonly used
35 # product formulations found in literature
36 #=========================================
37 
38 @add_labels(2, lambda W1, W2: f'{W2}/{W1}')
39 def BR2(Rrs, W1, W2):
40  ''' 2-Band ratio '''
41  return Rrs(W2) / Rrs(W1)
42 
43 
44 @add_labels(3, lambda W1, W2, W3: f'{W3}/{W1}-{W3}/{W2}')
45 def BR3(Rrs, W1, W2, W3):
46  ''' 3-Band ratio '''
47  return Rrs(W3)/Rrs(W1) - Rrs(W3)/Rrs(W2)
48 
49 
50 @add_labels(3, lambda W1, W2, W3: f'{W1}|{W2}|{W3}')
51 def LH(Rrs, W1, W2, W3):
52  ''' Line height '''
53  c = (W3 - W2) / (W3 - W1)
54  return Rrs(W2) - c*Rrs(W1) - (1-c)*Rrs(W3)
55 
56 
57 @add_labels(2, lambda W1, W2: f'{W2}-{W1}/{W2}+{W1}')
58 def ND(Rrs, W1, W2):
59  ''' Normalized difference '''
60  return (Rrs(W2)-Rrs(W1)) / (Rrs(W2)+Rrs(W1))
61 
62 
63 @add_labels(3, lambda W1, W2, W3: f'({W3}+{W1})/2-{W2}')
64 def AVG(Rrs, W1, W2, W3):
65  ''' Average difference '''
66  return (Rrs(W3)+Rrs(W1))/2 - Rrs(W2)
67 
68 
69 @add_labels(2, lambda W1, W2: f'peak|{W1}-{W2}')
70 def PEAK(Rrs, min_wvl, max_wvl, wavelengths=None):
71  ''' Peak location in a range of wavelengths '''
72  wave = np.array(wavelengths)
73  area = (wave[:, None] >= min_wvl) & (wave[:, None] <= max_wvl)
74  idxs = [Rrs( wave[a] ).argmax(axis=-1) for a in area.T]
75  peak = [wave[a][i] - wave[a].min() for a,i in zip(area.T, idxs)]
76  return np.stack(peak, axis=1)
77 
78 #=========================================
79 
80 
81 
83  ''' Add ratio features '''
84  functions = [BR2, BR3, LH, ND, AVG]
85 
86  def __init__(self, wavelengths, sensor, *args, excl_Rrs=False, all_ratio=False, cutoff=0.5, **kwargs):
87  self.wavelengths = list(wavelengths)
88  self.all_ratio = all_ratio
89  self.excl_Rrs = excl_Rrs
90  self.sensor = sensor
91  self.cutoff = cutoff
92 
93 
94  @staticmethod
95  def config_info(*args, **kwargs):
96  transformer = RatioTransformer(*args, **kwargs)
97  return transformer.get_n_features()
98 
99 
100  def get_n_features(self):
101  n_wavelengths = len(self.wavelengths)
102  n_combinations = lambda n, r, f=math.factorial: f(n) // f(r) // f(n-r)
103  function_names = ', '.join([f.__name__ for f in self.functions])
104  function_bands = [f.n_bands for f in self.functions]
105  total_number = sum(map(partial(n_combinations, n_wavelengths), function_bands))
106  return f'[{function_names}] applied to {n_wavelengths} wavelengths yields up to {total_number} features'
107 
108 
109  def _inverse_transform(self, X, *args, **kwargs):
110  if self.excl_Rrs: raise Exception('No inverse when Rrs is excluded from features')
111  return np.array(X)[..., :self.shape]
112 
113 
114  def _fit(self, X, y, *args, **kwargs):
115  from ..benchmarks.utils import has_band, closest_wavelength
116 
117  self.features = []
118  self.labels = []
119  self.shape = X.shape[-1]
120 
121  Rrs_df = pd.DataFrame(X, columns=self.wavelengths)
122  Rrs = lambda cols: Rrs_df[cols].values
123 
124  # Steps:
125  # 1. Enumerate all wavelength combinations for the selected feature functions
126  # 2. Calculate correlations between features and target variables, filtering those below cutoff
127  # 3. Filter remaining combinations by removing those which are too similar to others
128  # 4. Store the final remaining wavelength combinations for use in the _transform function
129  for function in self.functions:
130  wavelengths = list(combinations(self.wavelengths, function.n_bands))
131  values = function(Rrs, [], *map(np.array, zip(*wavelengths)))
132  correlation = [spearmanr(yy[np.isfinite(yy), None].T, values[np.isfinite(yy)].T) for yy in y.T]
133  mask = np.max(correlation, axis=0) > self.cutoff
134  mask[mask] &= get_unique_features(values[:, mask], threshold=0.05)
135 
136  mask_index = pd.MultiIndex.from_tuples(wavelengths)
137  mask_frame = pd.DataFrame(mask, index=mask_index)
138  wavelengths = mask_frame[mask_frame].dropna().index.to_frame()
139  self.features.append( (function, wavelengths.values.T) )
140 
141  # Add any additional manually specified features to the list
142  additional = [
143  (partial(PEAK, wavelengths=self.wavelengths), [
144  (450, 520), # Peak in blue
145  (550, 600), # Peak in green
146  (620, 670), # Peak in red
147  (680, 720), # Peak in nir
148  ])
149  ]
150 
151  # Ensure required wavelengths are available, and use the exact values available
152  for function, wavelengths in additional:
153  valid = lambda wvls: all([has_band(w, self.wavelengths) for w in wvls])
154  select_wvls = closest = np.array(wavelengths)[list(map(valid, wavelengths))]
155 
156  if len(select_wvls):
157  closest = closest_wavelength(select_wvls.flatten(), self.wavelengths)
158  wavelengths = closest.reshape(select_wvls.shape).T
159  self.features.append( (function, wavelengths) )
160 
161  # Run the transformation to add the labels being used
162  self._transform(X, labels=self.labels)
163  print(f'\nFit RatioTransformer with {len(self.labels)} features')
164 
165 
166  def _transform(self, X, *args, labels=None, **kwargs):
167  Rrs_df = pd.DataFrame(X, columns=self.wavelengths)
168  Rrs = lambda cols: Rrs_df[cols].values
169  x_new = [] if self.excl_Rrs else [Rrs_df.values]
170 
171  if labels is None:
172  labels = []
173 
174  if not self.excl_Rrs:
175  labels.extend( list(map(str, self.wavelengths)) )
176 
177  for function, wavelengths in self.features:
178  x_new.append( function(Rrs, labels, *wavelengths) )
179  x_new = np.concatenate(x_new, axis=-1)
180  x_new = np.maximum(-1e8, np.minimum(1e8, x_new))
181 
182  # Sanity checks
183  assert(x_new.shape[-1] == len(labels)), f'Mismatch between features and labels: {x_new.shape[-1]} vs {len(labels)}'
184  assert(len(self.labels)== len(labels)), f'Mismatch between old and new labels: {len(self.labels)} vs {len(labels)}'
185  return x_new
def add_labels(n_bands, label_function)
def has_band(w, waves, tol=5)
Definition: utils.py:30
list(APPEND LIBS ${PGSTK_LIBRARIES}) add_executable(atteph_info_modis atteph_info_modis.c) target_link_libraries(atteph_info_modis $
Definition: CMakeLists.txt:7
def get_unique_features(features, threshold=0.025, chunksize=10, _bottom=False)
Definition: utils.py:7
double precision function f(R1)
Definition: tmd.lp.f:1454
def __init__(self, wavelengths, sensor, *args, excl_Rrs=False, all_ratio=False, cutoff=0.5, **kwargs)
def closest_wavelength(k, waves, validate=True, tol=5, squeeze=False)
Definition: utils.py:24
def PEAK(Rrs, min_wvl, max_wvl, wavelengths=None)
def spearmanr(x, y)
Definition: utils.py:58