NASA Logo
Ocean Color Science Software

ocssw V2022
l1aconvert_seawifs.py
Go to the documentation of this file.
1 #! /usr/bin/env python3
2 
3 import numpy as np
4 from pyhdf.SD import SD as HDF
5 from pyhdf.SD import SDC
6 from netCDF4 import Dataset as NC
7 from datetime import datetime, timedelta, timezone
8 import argparse
9 
10 datetime.isoformat
11 __version__ = "1.1 2024-09-12"
12 
13 
14 
15 HDF_READ = SDC.READ
16 NC_WRITE = "w"
17 NC_FORMAT = "NETCDF4"
18 
19 # Do not copy these to NetCDF.
20 # year, day and milisecs will be made into isodate
21 SKIP_ATTRIBUTES = (
22  "Start Time", # dont need these because of time_coverage_start and end
23  "Start Year",
24  "Start Day",
25  "Start Millisec",
26  "End Time",
27  "End Year",
28  "End Day",
29  "End Millisec",
30  "Scene Center Time",
31  "Node Crossing Time",
32  "Processing Time",
33  "Latitude Units",
34  "Longitude Units",
35  "Scene Center Latitude",
36  "Scene Center Longitude",
37  "Scene Center Solar Zenith",
38  "Upper Left Latitude",
39  "Upper Left Longitude",
40  "Upper Right Latitude",
41  "Upper Right Longitude",
42  "Lower Left Latitude",
43  "Lower Left Longitude",
44  "Lower Right Latitude",
45  "Lower Right Longitude",
46  "Northernmost Latitude",
47  "Southernmost Latitude",
48  "Westernmost Longitude",
49  "Easternmost Longitude",
50  "Start Center Latitude",
51  "Start Center Longitude",
52  "End Center Latitude",
53  "End Center Longitude",
54  "NORAD Line 1",
55  "NORAD Line 2",
56  "Pixels per Scan Line",
57  "Number of Scan Lines",
58  "Scene Center Scan Line",
59  "Processing Log",
60  "Gain 1 Saturated Pixels",
61  "Gain 2 Saturated Pixels",
62  "Gain 1 Non-Saturated Pixels",
63  "Gain 2 Non-Saturated Pixels",
64  "Zero Pixels",
65  "Mean Gain 1 Radiance",
66  "Mean Gain 2 Radiance",
67  "Filled Scan Lines",
68  "FF Missing Frames",
69  "SDPS Missing Frames",
70  "Replacement Flag",
71  "Processing Control"
72 )
73 
74 
75 # python has no switch, so this dict is used to get the NetCDF type equivalent based on HDF4 types.
76 # NetCDF API sets data types for variabled based on NumPy's type.
77 # key = HDF4 types
78 # value = NetCDF types
79 GET_NC_TYPES = {
80  SDC.UINT8: np.uint8, # ubyte
81  SDC.FLOAT32: np.float32, # float
82  SDC.FLOAT64: np.float64, # double
83  SDC.INT16: np.int16, # short
84  SDC.UINT16: np.uint16, # ushort
85  SDC.UINT32: np.uint32, # uint
86  SDC.INT32: np.int32, # int,
87  SDC.CHAR: 'S1' # string stored as 8bit char array
88 }
89 
90 # Global variables to make reading/writing easier across functions
91 hdfFile = None
92 ncFile = None
93 
94 # Convert HDF naming to NetCDF naming
95 # ie: "Data Center" to "data_center"
96 def convertToNetcdfName(hdfname:str):
97  ncname = hdfname.lower().replace(" ", "_")
98  if ncname == "number_of_scan_lines": return "scans"
99  elif ncname == "pixels_per_scan_line": return "pixels"
100  elif ncname == "number_of_knees": return "knees"
101  elif ncname == "number_of_bands": return "bands"
102  elif ncname == "number_of_sides": return "mirror_sides"
103  elif ncname == "number_of_tilts": return "tilts"
104  elif ncname == "number_of_gains": return "gains"
105  elif ncname == "start_node": return "startDirection"
106  elif ncname == "end_node": return "endDirection"
107  elif ncname == "msec": return "scan_time"
108  elif ncname.startswith('lac'): return ncname.replace('lac','LAC')
109  else: return ncname
110 
111 # Convert HDF naming to NetCDF naming
112 # ie: "Data Center" to "data_center"
113 def convertToNetcdfDimension(dim:str, sds:str):
114  # HDF dimensions are not all named, so we need to give them NetCDF names
115  if (dim == "1"): return None
116  elif (dim == "2"):
117  if (sds == "tilt_ranges"): return "scan_index"
118  if (sds == "sc_id"): return "sc_ids"
119  elif (dim == "3"): return "vector_elements"
120  elif (dim == "4"):
121  if (sds == "eng_qual") or (sds == "s_flags"): return "quality_flags"
122  if (sds == "sc_ttag"): return "sc_time_tags"
123  elif (dim == "6"): return "scan_track_coefficients"
124  elif (dim == "8"): return "navigation_flags"
125  elif (dim == "20"): return "tilts"
126  elif (dim == "32"): return "inst_discrete_telemetries"
127  elif (dim == "40"):
128  if (sds == "inst_ana"): return "inst_analog_telemetries"
129  if (sds == "sc_ana"): return "sc_analog_telemetries"
130  if (sds == "sc_dis"): return "sc_discrete_telemetries"
131  elif (dim == "44"): return "inst_telemetries"
132  elif (dim == "775"): return "sc_sohs"
133  else:
134  ncdim = dim.lower().replace(" ", "_")
135  if ncdim == "number_of_scan_lines": return "scans"
136  elif ncdim == "pixels_per_scan_line": return "pixels"
137  elif ncdim == "number_of_knees": return "knees"
138  elif ncdim == "number_of_bands": return "bands"
139  elif ncdim == "number_of_sides": return "mirror_sides"
140  elif ncdim == "number_of_tilts": return "tilts"
141  elif ncdim == "number_of_gains": return "gains"
142  else: return ncdim
143 
144 # copy global attributes from the hdf4 file into new NETCDF file
145 def copyGlobalAttributes(iFile: str, oFile: str):
146  global hdfFile
147  global ncFile
148  global errorAt
149 
150  try:
151  print("Copying global attributes")
152  globalAttr = hdfFile.attributes()
153 
154  for name, val in globalAttr.items():
155  errorAt = name
156 
157 
158  if (name in SKIP_ATTRIBUTES): continue
159 
160  # handle different data types when copying
161  # if string, then do nothing and write normally
162 
163  # strings
164  if (isinstance(val, str)):
165  ncFile.setncattr(convertToNetcdfName(name), val)
166  # numbers
167  else:
168  val = np.float32(val) if isinstance(val, float) else np.int32(val)
169  ncFile.setncattr(convertToNetcdfName(name), val)
170 
171 
172 
173  ncFile.setncattr(convertToNetcdfName(name), val)
174 
175  # convert node crossing time
176  errorAt = "node_crossing_time"
177  node_crossing_str = globalAttr.get("Node Crossing Time")
178  nct = node_crossing_str[:13]
179  fsec = node_crossing_str[13:17]
180  node_crossing = datetime.strptime(nct,"%Y%j%H%M%S")
181  ncFile.setncattr("node_crossing_time", str(node_crossing.strftime('%Y-%m-%dT%H:%M:%S.') + fsec[:3] + 'Z'))
182 
183  # convert scene center time
184  errorAt = "scene_center_time"
185  scene_center_str = globalAttr.get("Scene Center Time")
186  sct = scene_center_str[:13]
187  fsec = scene_center_str[13:17]
188  scene_center = datetime.strptime(sct,"%Y%j%H%M%S")
189  ncFile.setncattr("scene_center_time", str(scene_center.strftime('%Y-%m-%dT%H:%M:%S.') + fsec[:3] + 'Z'))
190 
191  # make isodate for start/end time
192  errorAt = "time_coverage_start"
193  syear = globalAttr.get("Start Year")
194  sday = globalAttr.get("Start Day")
195  ssec = float("{:.3f}".format(globalAttr.get("Start Millisec")/1000))
196  fssec = str(ssec).split('.')[1]
197 
198  start_of_year = datetime(syear, 1, 1,tzinfo=timezone.utc)
199  stime = start_of_year + timedelta(days=(sday-1), seconds=ssec)
200 
201  ncFile.setncattr("time_coverage_start", str(stime.strftime('%Y-%m-%dT%H:%M:%S.') + fssec + 'Z'))
202 
203  errorAt = "time_coverage_end"
204  eyear = globalAttr.get("End Year")
205  eday = globalAttr.get("End Day") - 1
206  esec = float("{:.3f}".format(globalAttr.get("End Millisec")/1000))
207  fesec = str(esec).split('.')[1]
208 
209  end_of_year = datetime(eyear, 1, 1, tzinfo=timezone.utc)
210 
211  etime = end_of_year + timedelta(days=eday, seconds=esec)
212 
213  ncFile.setncattr("time_coverage_end",str(etime.strftime('%Y-%m-%dT%H:%M:%S.') + fesec + 'Z'))
214 
215  # add converter version
216  ncFile.setncattr("l1aconvert_seawifs_version", __version__)
217 
218  # netcdf has history global attribute, so add one here:
219  # to convert a file, it's static except for the input and output files
220  ncFile.setncattr("history", "{}; python3 l1acovert_seawifs {} {}".format(globalAttr.get("Processing Control")[:-1], iFile, oFile))
221 
222  #add date_created
223  errorAt = "date_created"
224  create_date = datetime.now(tz=timezone.utc)
225 
226  ncFile.setncattr("date_created",str(create_date.strftime('%Y-%m-%dT%H:%M:%SZ')))
227 
228  # some static metadata
229  ncFile.setncattr("cdm_data_type","swath")
230  ncFile.setncattr("Conventions","CF-1.8, ACDD-1.3")
231  ncFile.setncattr("creator_email","data@oceancolor.gsfc.nasa.gov")
232  ncFile.setncattr("creator_name","NASA/GSFC/OBPG")
233  ncFile.setncattr("creator_url","https://oceancolor.gsfc.nasa.gov")
234  ncFile.setncattr("institution","NASA Goddard Space Flight Center, Ocean Biology Processing Group")
235  ncFile.setncattr("instrument","SeaWiFS")
236  ncFile.setncattr("keywords_vocabulary","NASA Global Change Master Directory (GCMD) Science Keywords")
237  ncFile.setncattr("license","https://www.earthdata.nasa.gov/engage/open-data-services-and-software/data-and-information-policy")
238  ncFile.setncattr("naming_authority","gov.nasa.gsfc.oceancolor")
239  ncFile.setncattr("platform","Orbview-2")
240  ncFile.setncattr("processing_level","L1A")
241  ncFile.setncattr("processing_version","V2")
242  ncFile.setncattr("product_name","{}".format(oFile.name))
243  ncFile.setncattr("project","Ocean Biology Processing Group")
244  ncFile.setncattr("publisher_email","data@oceancolor.gsfc.nasa.gov")
245  ncFile.setncattr("publisher_name","NASA/GSFC/OB.DAAC")
246  ncFile.setncattr("publisher_url","https://oceancolor.gsfc.nasa.gov")
247  ncFile.setncattr("standard_name_vocabulary","CF Standard Name Table v79")
248 
249  except:
250  print(f"-E- Error copying global attributes. Was processing <{errorAt}> from HDF4 when error was caught.")
251  exit()
252  errorAt = "" # reset
253 
254 # Given a NetCDF variable, assign the attributes in attrList.
255 # attrList is the attribute list from the HDF4 dataset and it is copying
256 # that over to the NetCDF version
257 def assignNcVarAttr(ncVar, attrDict:dict, dataType):
258  for attr in attrDict.keys():
259  if (attr == "long_name" or attr == "long name"):
260  ncVar.long_name = attrDict.get(attr)
261  if (attr == "units"):
262  if ncVar.name != 'l1a_data':
263  ncVar.units = attrDict.get(attr)
264 
265  # valid range is valid_min and valid_max in netcdf
266  if (attr == "valid_range" or attr == "valid range"):
267 
268  # get the valid range
269  validRange = attrDict.get(attr)
270 
271  # valid range can be a string tuple, extract the values '(1,3)
272  # slice the string to get rid of the Parentheses ()
273  # otherwise, it is already a list
274  if isinstance(validRange, str):
275  validRange = attrDict.get(attr)[1:-2].split(",")
276 
277  # Based on the nctype, make sure the value is in numpy type
278  npValidMin = None
279  npValidMax = None
280  if (dataType == np.float32): # float
281  npValidMin = np.float32(validRange[0])
282  npValidMax = np.float32(validRange[1])
283 
284  elif (dataType == np.float64): # double
285  npValidMin = np.float64(validRange[0])
286  npValidMax = np.float64(validRange[1])
287 
288  elif (dataType == np.uint16): # ushort
289  npValidMin = np.uint16(validRange[0])
290  npValidMax = np.uint16(validRange[1])
291 
292  elif (dataType == np.uint32): # uint
293  npValidMin = np.uint32(validRange[0])
294  npValidMax = np.uint32(validRange[1])
295 
296  elif (dataType == np.int16): # short
297  npValidMin = np.int16(validRange[0])
298  npValidMax = np.int16(validRange[1])
299 
300  else: # int
301  npValidMin = np.int32(validRange[0])
302  npValidMax = np.int32(validRange[1])
303 
304  # assign it to valid min and max
305  ncVar.valid_min = npValidMin
306  ncVar.valid_max = npValidMax
307 
308 def getChunking(ncDims):
309  global ncFile
310  sizes = {
311  'tilts': 2,
312  'scan_index': 1,
313  'pixel_index': 1,
314  'knees': 5,
315  'sc_time_tags': 4,
316  'inst_analog_telemetries': 4,
317  'scans': 32,
318  'inst_discrete_telemetries': 2,
319  'gains': 1,
320  'quality_flags': 1,
321  'mirror_sides': 2,
322  'sc_discrete_telemetries': 4,
323  'inst_telemetries': 4,
324  'navigation_flags': 8,
325  'scan_track_coefficients': 6,
326  'bands': 8,
327  'sc_ids': 2,
328  'sc_sohs': 25,
329  'sc_analog_telemetries': 4,
330  'pixels': 128,
331  'vector_elements': 3,
332  'matrix_column_elements': 3,
333  'matrix_row_elements': 3,
334  }
335  chunks = []
336  for dim in ncDims:
337  dimsize = len(ncFile.dimensions[dim])
338  if sizes[dim] <= dimsize:
339  chunks.append(sizes[dim])
340  else:
341  chunks.append(1)
342  return chunks
343 
344 # Retrieves the HDF4 dataset and get all the required information to build
345 # the NetCDF version
347  global hdfFile
348  global ncFile
349  global errorAt
350  skipSDS = (
351  'ntilts',
352  'entry_year',
353  'entry_day',
354  'ref_year',
355  'ref_day',
356  'ref_minute'
357  )
358  try:
359  print("Copying datasets/variables...")
360 
361  datasetNames = hdfFile.datasets().keys()
362  for name in datasetNames:
363  if name in skipSDS:
364  continue
365  errorAt = name
366 
367  # hdf4 getting data
368  currSet = hdfFile.select(name)
369  hdfDataType = currSet.info()[3]; # index 3 gives the datasets data type
370  hdfDims = currSet.dimensions().keys() # get dimension names
371  data = currSet.get() # retrieves the data
372  hdfDatasetAttr = currSet.attributes() # variable attrs like long_name, units, etc.
373 
374  # netcdf writing data
375  # get nc data type based on the hdf type
376  ncDatatype = GET_NC_TYPES.get(hdfDataType)
377  # get the dimensions to be set as a tuple
378  ncDims = tuple(map(lambda dim: convertToNetcdfDimension(dim, name), hdfDims))
379 
380  if name == "tilt_lats" or name == "tilt_lons":
381  ncDims = ('tilts','scan_index','pixel_index')
382  if name == "sen_mat":
383  ncDims = ('scans','matrix_column_elements','matrix_row_elements')
384 
385  # create the variable and then assign the data
386  chunks = getChunking(ncDims)
387  newVariable = ncFile.createVariable(convertToNetcdfName(name), ncDatatype, ncDims, chunksizes=chunks, zlib=True)
388  newVariable[:] = data # slice entire variable and assign it
389 
390  # netcdf assigning attributes to the variables. NcVar is an object and can be
391  # passed by reference, so the function takes care of it
392  assignNcVarAttr(newVariable, hdfDatasetAttr, ncDatatype)
393  if name == 'msec':
394  newVariable.setncattr("units","milliseconds")
395  newVariable.setncattr("comment","milliseconds since start of day, day boundary crossing indicated if value is smaller than previous scan")
396 
397  except Exception as e:
398  print(f"-E- Error copying datasets/variables. Error occurred with HDF4 dataset named <{errorAt}>")
399  print(f"Reason: {e}")
400  exit()
401 
403  global ncFile
404 
405  try:
406  print("adding CF-compliant time variable...")
407 
408  msecs = ncFile['scan_time'][:]
409  timevar = np.zeros(len(msecs),np.float64)
410  time_coverage_start = datetime.fromisoformat(ncFile.time_coverage_start[:23])
411  day_start = datetime.strptime(time_coverage_start.strftime('%Y%m%d'),'%Y%m%d')
412  for i,msec in enumerate(msecs):
413  seconds = msec / 1000.
414  scantime = day_start + timedelta(seconds=seconds)
415  if scantime < time_coverage_start:
416  scantime = scantime+ timedelta(days=1)
417  timevar[i] = scantime.strftime('%s.%f')
418  chunksize = [32]
419  if len(ncFile.dimensions['scans']) < chunksize[0]:
420  chunksize[0] = 1
421  timeVariable = ncFile.createVariable('time', np.float64, 'scans', chunksizes=chunksize, zlib=True)
422  timeVariable[:] = timevar
423  timeVariable.setncattr("long_name","time")
424  timeVariable.setncattr("units","seconds since 1970-1-1")
425 
426  except Exception as e:
427  print(f"-E- Error copying datasets/variables. Error occurred with adding time dataset...")
428  print(f"Reason: {e}")
429  exit()
430 
431 # Runs at the end of the program, closing both files.
433  global hdfFile
434  global ncFile
435 
436  print("Closing HDF4 File...")
437  hdfFile.end()
438  print("Closing NetCDF File...")
439  ncFile.close()
440 
441 
442 # Given a list of variables, extract the dimension data from it
443 # pyhdf lib does not have a way to get only the dims, you need to
444 # go through the variables and get it
446  global hdfFile
447  global ncFile
448  global errorAt
449 
450  errorAt = "Setting Dimensions"
451  # create a set so that when parsing all the variables with their dims,
452  # dims do not get repeated in the set
453  dimSet = set()
454  dimSet.add(("scan_index",2))
455  dimSet.add(("pixel_index",2))
456  dimSet.add(("sc_ids", 2))
457  dimSet.add(("vector_elements", 3))
458  dimSet.add(("matrix_column_elements", 3))
459  dimSet.add(("matrix_row_elements", 3))
460  dimSet.add(("quality_flags",4))
461  dimSet.add(("sc_time_tags", 4))
462  dimSet.add(("scan_track_coefficients", 6))
463  dimSet.add(("navigation_flags", 8))
464  dimSet.add(("tilts", 20))
465  dimSet.add(("inst_discrete_telemetries", 32))
466  dimSet.add(("inst_analog_telemetries", 40))
467  dimSet.add(("sc_analog_telemetries", 40))
468  dimSet.add(("sc_discrete_telemetries", 40))
469  dimSet.add(("inst_telemetries", 44))
470  dimSet.add(("sc_sohs", 775))
471  dimSet.add(("bands", 8))
472  dimSet.add(("mirror_sides", 2))
473  dimSet.add(("gains", 4))
474  dimSet.add(("knees", 5))
475  try:
476  errorAt = "extract dimension details from HDF file"
477  # get key, value pair of all the HDF
478  hdfDatasets = hdfFile.datasets()
479  var = "l1a_data"
480  errorAt = f"extract {var} from HDF file"
481  varInfo = hdfDatasets[var] # returns something like: (('bands',), (8,), 22, 0
482  dimNames = varInfo[0] # tuple of dim names for var
483  dimVals = varInfo[1] # tuple of dim values for the dims
484 
485  # go though each dim and save it into the set
486  for i in range(len(dimNames)):
487  name = dimNames[i]
488  val = dimVals[i]
489  newDim = (convertToNetcdfName(name), val)
490  dimSet.add(newDim)
491 
492  # after getting all the dims, go through and set them in the NetCDF file
493 
494  for dim in dimSet:
495  errorAt = f"set {dim} in the NetCDF file"
496  name = dim[0]
497  val = dim[1]
498  ncFile.createDimension(name, val)
499 
500  except:
501  print(f"-E- Error copying dimensions. Was trying to {errorAt} when error was caught.")
502  exit()
503 
504 def main():
505  print(f"l1aconvert_seawifs {__version__}")
506 
507  # get global reference to the file variables
508  global ncFile
509  global hdfFile
510 
511  parser = argparse.ArgumentParser(description='''
512  Given an HDF4 Scientific Dataset (SD), convert it
513  into a NetCDF (NC) file format with the same name.
514  If the optional argument [oFile] name is given,
515  output as the oFile name.
516  ''')
517 
518  parser.add_argument("ifile", type=str, help="input HDF4 File")
519  parser.add_argument("ofile", nargs='?', type=str, help="Optional output file name; default = input + '.nc'")
520  parser.add_argument("--doi","-d",type=str,default=None,help="DOI string for metadata")
521 
522  # Parse the command-line arguments
523  # if oFile isn't given, then use the filename with .nc extension
524  args = parser.parse_args()
525  fileName = args.ifile
526  oFileName = fileName + ".nc" if args.ofile is None else args.ofile
527 
528 
529  print(f"Input File:\t{fileName}")
530  print(f"Output File:\t{oFileName}\n")
531 
532  # Opening the hdf4 file for reading:
533  try:
534  hdfFile = HDF(fileName, HDF_READ)
535  print(f"Opening file:\t{fileName}")
536  except:
537  print(f"\n-E- Error opening file named: {fileName}.\n Make sure the filetype is hdf4.\n")
538  exit()
539 
540  # if opened successfully, create netcdf file to be written into
541  ncFile = NC(oFileName, NC_WRITE, NC_FORMAT)
542  setDimensions()
543  copyGlobalAttributes(fileName, oFileName)
544  if args.doi:
545  ncFile.setncattr("doi",args.doi)
546  copyDatasets()
548  closeFiles()
549  print("Conversion of L1A HDF4 to NETCDF successful")
550 
551 if __name__ == '__main__':
552  main()
#define NC
Definition: l1_octs.c:42
def assignNcVarAttr(ncVar, dict attrDict, dataType)
def convertToNetcdfName(str hdfname)
def copyGlobalAttributes(str iFile, str oFile)
void print(std::ostream &stream, const char *format)
Definition: PrintDebug.hpp:38
def convertToNetcdfDimension(str dim, str sds)
Definition: aerosol.c:136