OB.DAAC Logo
NASA Logo
Ocean Color Science Software

ocssw V2022
merge_afrt.py
Go to the documentation of this file.
1 #!/usr/bin/env python3
2 '''
3 * NAME: merge_afrt.py
4 *
5 * DESCRIPTION: Executable program to merge afrt (radiative
6 * transfer) NetCDF4 files
7 
8 * Created on October 09, 2018
9 
10 * Author: Samuel Anderson
11 * heavily modified on August 05, 2019 by Sean Bailey
12 '''
13 
14 import os
15 import sys
16 import re
17 import time
18 from datetime import datetime as dt
19 import argparse
20 import logging
21 LOG = logging.getLogger('afrt-merge')
22 from netCDF4 import Dataset
23 import numpy as np
24 import collections
25 
26 
29 
30 #sensorInfoFile = os.path.join(os.environ['OCDATAROOT'] , 'common','SensorInfo.json')
31 #with open(sensorInfoFile, 'r') as myfile:
32 # sensorDefs=myfile.read()
33 #
34 
37 
38 sensors = collections.defaultdict(dict)
39 
40 sensors['modisa']['name'] = "MODIS-Aqua"
41 sensors['modisa']['instrument'] = "MODIS"
42 sensors['modisa']['platform'] = "Aqua"
43 sensors['modist']['name'] = "MODIS-Terra"
44 sensors['modist']['instrument'] = "MODIS"
45 sensors['modist']['platform'] = "Terra"
46 sensors['seawifs']['name'] = "SeaWiFS"
47 sensors['seawifs']['instrument'] = "SeaWiFS"
48 sensors['seawifs']['platform'] = "Orb-View 2"
49 sensors['meris']['name'] = "MERIS"
50 sensors['meris']['instrument'] = "MERIS"
51 sensors['meris']['platform'] = "ENVISAT"
52 sensors['octs']['name'] = "OCTS"
53 sensors['octs']['instrument'] = "OCTS"
54 sensors['octs']['platform'] = "ADEOS-I"
55 sensors['czcs']['name'] = "CZCS"
56 sensors['czcs']['instrument'] = "CZCS"
57 sensors['czcs']['platform'] = "NIMBUS-7"
58 sensors['viirsn']['name'] = "VIIRS-SNPP"
59 sensors['viirsn']['instrument'] = "VIIRS"
60 sensors['viirsn']['platform'] = "SNPP"
61 sensors['viirsj1']['name'] = "VIIRS-JPSS1"
62 sensors['viirsj1']['instrument'] = "VIIRS"
63 sensors['viirsj1']['platform'] = "JPSS1"
64 sensors['aviris']['name'] = "AVIRIS"
65 sensors['aviris']['instrument'] = "AVIRIS"
66 sensors['aviris']['platform'] = "ER-2"
67 sensors['hico']['name'] = "HICO"
68 sensors['hico']['instrument'] = "HICO"
69 sensors['hico']['platform'] = "ISS"
70 sensors['goci']['name'] = "GOCI"
71 sensors['goci']['instrument'] = "GOCI"
72 sensors['goci']['platform'] = "COMSAT-2"
73 sensors['misr']['name'] = "MISR"
74 sensors['misr']['instrument'] = "MISR"
75 sensors['misr']['platform'] = "Terra"
76 
77 def sortbymodel(nameRegEx,fname):
78  parsed = re.match(nameRegEx,fname)
79  if parsed:
80  return int(parsed.group(2)) + 0.1*int(parsed.group(3))
81  else:
82  return 0
83 
84 def createMergedNetCDF(ofile,dimensions,variables,sensor):
85 
86  LOG.info("Writing netCDF file: %s" % ofile)
87  # Create the netCDF file and add dimensions
88  if os.path.exists(ofile):
89  LOG.error("%s exists! Bailing out..." % ofile)
90  return None
91 
92  ncfile = Dataset(ofile, 'w',format='NETCDF4')
93  for dim in dimensions:
94  ncfile.createDimension(dimensions[dim]['name'], dimensions[dim]['size'])
95 
96  for var in variables:
97  LOG.debug("var: %s dimensions: %s" %(var,variables[var]['dimensions']))
98  LOG.debug(variables[var]['chunksize'])
99  vardims = []
100  for dim in variables[var]['dimensions']:
101  vardims.append(dimensions[dim]['name'])
102 
103  ncfile.createVariable(var.lower(), variables[var]['dtype'],
104  tuple(vardims), zlib=True,
105  chunksizes=variables[var]['chunksize'])
106 
107  # Set global attributes
108  ncfile.title = "Ahmad-Fraser Radiatitve Transfer Results"
109  ncfile.description = "Ahmad-Fraser Radiatitve Transfer results for " + sensors[sensor]['name']
110  ncfile.date_created = (dt.fromtimestamp(time.time()).strftime('%Y-%m-%dT%H:%M:%SZ')).encode('ascii')
111  ncfile.creator_name = "NASA/GSFC/OBPG"
112  ncfile.creator_email = "data@oceancolor.gsfc.nasa.gov"
113  ncfile.creator_url = "https://oceancolor.gsfc.nasa.gov"
114  ncfile.product_name=os.path.basename(ofile).encode("ascii")
115  ncfile.history = ' '.join(sys.argv).encode("ascii")
116 
117 
118  return ncfile
119 
120 
123 
124 def main():
125 
126  description = '''Merge AFRT NetCDF output files.'''
127  __version__ = "1.0.0"
128 
129  parser = argparse.ArgumentParser(description=description)
130  parser.add_argument('--version', action='version', version='%(prog)s ' + __version__)
131  parser.add_argument('-i','--input-dir',
132  dest="inputDir" ,
133  type=str,
134  required=True,
135  help="The input directory")
136  parser.add_argument('-s','--sensor',
137  dest="sensor",
138  choices=['modisa',
139  'modist',
140  'viirsn',
141  'viirsj1',
142  'seawifs',
143  'meris',
144  'aviris',
145  'octs',
146  'czcs',
147  'misr',
148  'oci',
149  'goci',
150  'hico'],
151  default='modis-aqua', help='Sensor for which to generate the table')
152  # Optional arguments
153  parser.add_argument('-n','--nameregex',
154  dest="nameRegEx",
155  type=str,
156  default= "^.*_WL(\d+)_MD(\d+)_TA(\d+)_WD(\d+)-(\d+).nc$",
157  help="regular expression for parsing filename to get parameter dimensions")
158 
159  parser.add_argument('-o','--output-dir',
160  dest="outputDir" ,
161  type=str,
162  default=os.getcwd(),
163  help="The output directory")
164 
165  parser.add_argument('-v', '--verbose',
166  dest='verbosity',
167  action="count",
168  default=0,
169  help='each occurrence increases verbosity 1 level from ERROR: -v=WARNING -vv=INFO -vvv=DEBUG')
170 
171 
172  # Parse the arguments from the command line
173  args = parser.parse_args()
174 
175  # Set up the logging levels
176  levels = [logging.ERROR, logging.WARN, logging.INFO, logging.DEBUG]
177  logging.basicConfig(level = levels[args.verbosity])
178 
179  # Check for mutually exclusive options
180  LOG.info('Merging radiative transfer tables in %s' % (args.inputDir) )
181 
182  if os.path.isdir(args.outputDir):
183  o_path = args.outputDir
184  elif os.path.isdir(os.path.dirname(args.outputDir)):
185  o_path = args.outputDir
186  os.mkdir(o_path)
187  else:
188  print ("Output path invalid")
189  return
190  if not os.path.isdir(args.inputDir):
191  os.mkdir(args.inputDir)
192 # if args.fname:
193 # with open(args.fname) as fp:
194 # contents = fp.readlines()
195 # for fn in contents:
196 # fn = fn.lstrip()
197 # fn = fn.strip("\n")
198 # command = "wget -N --content-disposition https://oceandata.sci.gsfc.nasa.gov/cgi/gethiddenfile/" + fn
199 # print (command)
200 # subprocess.run(command, shell=True)
201 
202  listing = os.listdir(args.inputDir)
203  listing.sort(key=lambda fname:sortbymodel(args.nameRegEx,fname))
204  wvls = set()
205  models = set()
206  winds = set()
207  taus = set()
208  inputFiles = []
209  for fname in listing:
210  fpath = args.inputDir + "/" + fname
211  if os.path.isdir(fpath):
212  continue
213  parsed = re.match(args.nameRegEx,fname)
214  if parsed:
215  inputFiles.append(fpath)
216  wvls.add(int(parsed.group(1)))
217  models.add(int(parsed.group(2)))
218  taus.add(int(parsed.group(3)))
219  winds.add(int(parsed.group(4)))
220  winds.add(int(parsed.group(5)))
221 
222  variables = collections.defaultdict(dict)
223  dimensions = collections.defaultdict(dict)
224 
225  dimensions['dim_size']['name'] = 'nmodels'
226  dimensions['dim_size']['size'] = max(models)
227  dimensions['dim_wave']['name'] = 'nwavelengths'
228  dimensions['dim_wave']['size'] = max(wvls)
229  dimensions['dim_wind']['name'] = 'nwindspeeds'
230  dimensions['dim_wind']['size'] = max(winds)
231  dimensions['dim_tau']['name'] = 'ntaus'
232  dimensions['dim_tau']['size'] = max(taus)
233 
234  rtFile = Dataset(inputFiles[0],'r')
235  dimensions['dim_phi']['name'] = 'nphi'
236  dimensions['dim_phi']['size'] = rtFile.dimensions['dim_phi'].size
237  dimensions['dim_sza']['name'] = 'nsolz'
238  dimensions['dim_sza']['size'] = rtFile.dimensions['dim_sza'].size
239  dimensions['dim_the']['name'] = 'nsenz'
240  dimensions['dim_the']['size'] = rtFile.dimensions['dim_the'].size
241  dimensions['dim_stokes']['name'] = 'nstokes'
242  dimensions['dim_stokes']['size'] = rtFile.dimensions['dim_stokes'].size
243 
244  for var in rtFile.variables:
245  lcvar = var.lower()
246  if lcvar == 'aot':
247  lcvar = 'tau_aerosol'
248  if var == 'THETA':
249  lcvar = 'sensor_zenith'
250  if var == 'SZA':
251  lcvar = 'solar_zenith'
252  if var == 'PHI':
253  lcvar = 'relative_azimuth'
254  # skip a few...tau_aerosol - it's the same as AOT
255  # sbar, tau_h20 and tau_ozone are currently zero
256  # tau_total...yeah, we can do simple addition...
257 
258  if var in ('TAU_AEROSOL','TAU_H2O','TAU_OZONE','TAU_TOTAL','SBAR'):
259  continue
260  if 'index' not in var:
261  variables[lcvar]['chunksize'] = None
262  if var == 'TAU_RAYLEIGH':
263  variables[lcvar]['dimensions'] = rtFile.variables['WAVELENGTH'].dimensions
264  else:
265  variables[lcvar]['dimensions'] = rtFile.variables[var].dimensions
266  if var in ('WIND_SPEED','SZA','THETA','PHI'):
267  variables[lcvar]['dtype'] = 'u2'
268  else:
269  variables[lcvar]['dtype'] = 'f4'
270  if var in ('BOA_DOWN_DIRECT','SBAR','BOA_DOWN_DIFFUSE','TOA_UP_DIFFUSE','HEM_REFLECTANCE','TRANSMITTANCE'):
271  variables[lcvar]['chunksize'] = (1,1,1,1,dimensions['dim_sza']['size'])
272  if var in ('TOA_RADIANCE','BOA_RADIANCE'):
273  variables[lcvar]['chunksize'] = (1,1,1,1,dimensions['dim_sza']['size'], dimensions['dim_the']['size'],1,1)
274 
275  rtFile.close()
276 
277  rstr = "_WL%(l0)d-%(l1)d_MD%(s0)d-%(s1)d_TA%(t0)d-%(t1)d_WD%(w0)d-%(w1)d" % \
278  {"l0":min(wvls),"l1":max(wvls),"s0":min(models),"s1":max(models),"t0":min(taus),"t1":max(taus),"w0":min(winds),"w1":max(winds)}
279 
280  ofilepath = args.outputDir + "/" + args.sensor + rstr + '.nc'
281  ncFile = createMergedNetCDF(ofilepath,dimensions,variables,args.sensor)
282 
283  if ncFile:
284  fixed = True
285  tauset = set()
286  wvlset = set()
287  rayset = set()
288  fcnt = 0
289  for fpath in inputFiles:
290  fcnt += 1
291  LOG.info("Processing [%d of %d] %s ..." % (fcnt,len(inputFiles),fpath))
292  if fpath != inputFiles[0]:
293  fixed = False
294 
295  rtFile = Dataset(fpath,'r')
296  wvl_idx = np.unique(rtFile.variables['wavelength_index'][:]) -1
297  model_idx = np.unique(rtFile.variables['model_index'][:]) - 1
298  tau_idx = np.unique(rtFile.variables['tau_index'][:]) - 1
299  wind_idx = np.unique(rtFile.variables['wind_index'][:]) - 1
300  # TODO: should the order of the processing change in ODPS from the
301  # current 'windspeed as the last element', the dimension indexing
302  # will need to be modified to accomodate. The current assumption:
303  # 1 model, 1 wavelength, 1 tau, N windsppeds
304  LOG.debug("wvl_idx: %d\tmodel_idx: %d\ttau_idx: %d" % (wvl_idx,model_idx,tau_idx))
305  LOG.debug(wind_idx)
306 
307 
308  for var in rtFile.variables:
309  if var in ('TAU_AEROSOL','TAU_H2O','TAU_OZONE','TAU_TOTAL','SBAR'):
310  continue
311  lcvar = var.lower()
312  # These are fixed for all inputs, so only do the first
313  if var == 'THETA' and fixed:
314  ncFile.variables['sensor_zenith'][:] = rtFile.variables[var][:]
315  if var == 'SZA' and fixed:
316  ncFile.variables['solar_zenith'][:] = rtFile.variables[var][:]
317  if var == 'PHI' and fixed:
318  ncFile.variables['relative_azimuth'][:] = rtFile.variables[var][:]
319  # TODO: since the current ODPS processing runs all windspeeds in a single run,
320  # wind_speed is also fixed. Should the order change, this will need to be
321  # changed as well...
322  if var == 'WIND_SPEED' and fixed:
323  ncFile.variables['wind_speed'][:] = rtFile.variables[var][:]
324 
325  if var == 'WAVELENGTH':
326  wvldata = rtFile.variables[var][:]
327  if wvldata[0] not in wvlset:
328  ncFile.variables[lcvar][wvl_idx] = wvldata[0]
329  wvlset.add(wvldata[0])
330 
331  if var == 'AOT':
332  taudata = rtFile.variables[var][:]
333  if taudata[0] not in tauset:
334  ncFile.variables['tau_aerosol'][tau_idx] = taudata[0]
335  tauset.add(taudata[0])
336 
337  if var in ('TAU_RAYLEIGH'):
338  raydata = rtFile.variables[var][0,:,0,0]
339  if raydata[0] not in rayset:
340  ncFile.variables[lcvar][wvl_idx] = raydata[0]
341  rayset.add(raydata[0])
342 
343  if var in ('BOA_DOWN_DIRECT','SBAR','BOA_DOWN_DIFFUSE','TOA_UP_DIFFUSE','HEM_REFLECTANCE','TRANSMITTANCE'):
344  ncFile.variables[lcvar][model_idx,wvl_idx,tau_idx,:,:] = rtFile.variables[var][:]
345 
346  if var in ('TOA_RADIANCE','BOA_RADIANCE'):
347  ncFile.variables[lcvar][model_idx,wvl_idx,tau_idx,:,:,:,:] = rtFile.variables[var][:]
348 
349  rtFile.close()
350 
351  ncFile.close()
352 
353  LOG.info("Merge complete for " + ofilepath)
354  LOG.info('Exiting...')
355  return 0
356 
357 if __name__=='__main__':
358  sys.exit(main())
def sortbymodel(nameRegEx, fname)
Definition: merge_afrt.py:77
def main()
Main Function #.
Definition: merge_afrt.py:31
def createMergedNetCDF(ofile, dimensions, variables, sensor)
Definition: merge_afrt.py:84