NASA Logo
Ocean Color Science Software

ocssw V2022
l1aconvert_czcs.py
Go to the documentation of this file.
1 #! /usr/bin/env python3
2 
3 import argparse
4 import sys
5 from netCDF4 import Dataset as NC
6 from pyhdf.SD import SD as HDF
7 from pyhdf.SD import SDC
8 import numpy as np
9 from datetime import datetime
10 from datetime import datetime, timedelta
11 
12 datetime.isoformat
13 
14 
17 
18 __version__ = "1.0 2024-05-20"
19 
20 # Constants
21 HDF_MODE = SDC.READ
22 NC_MODE = "w"
23 NC_FORMAT = "NETCDF4"
24 
25 
26 
27 # python has no switch, so this dict is used to get the NC equal based on HDF4 types
28 # NetCDF API sets data types for variabled based on NumPy's type.
29 # list is not extensive, only includes ones used in CZCS
30 GET_NC_TYPES = {
31  SDC.UINT8: np.uint8, # ubyte
32  SDC.FLOAT32: np.float32, # float
33  SDC.INT16: np.int16, # short
34  SDC.INT32: np.int32 # int
35 }
36 
37 
38 # Global files to make reading and writing easier across functions
39 hdfFile = None
40 ncFile = None
41 errorAt = "" # name of whatever dataset the program was trying to copy/do
42 
43 
44 
45 def main():
46  print(f"l1aconvert_czcs {__version__}")
47  global ncFile
48  global hdfFile
49 
50  # Create an ArgumentParser object
51  parser = argparse.ArgumentParser(description="Given a HDF4 Scientific Dataset (SD), convert it into NetCDF format with the same name.")
52 
53  # Define arguments
54  parser.add_argument("iFile", type=str, help="HDF4 file")
55  parser.add_argument("oFile", nargs='?', type=str, help="Optional Output file name")
56 
57  # Parse the command-line arguments
58  # if oFile name is not given, then use the filename with .nc extension
59  args = parser.parse_args()
60  fileName = args.iFile
61  oFileName = fileName + ".nc" if args.oFile is None else args.oFile
62 
63  print(f"\nInput file:\t{fileName}")
64  print(f"Output file:\t{oFileName}\n")
65 
66  # Opening the file
67  try:
68  hdfFile = HDF(fileName, HDF_MODE)
69  print(f"Opening file:\t{fileName}")
70  except:
71  print(f"\n-E- Error opening file named: {fileName}.\n Make sure the filetype is hdf4.\n")
72  exit()
73 
74  # on successful open, make a netcdf file to be written into
75  #ncFileName = fileName + ".nc" if oFileName == "" else oFileName
76  ncFile = NC(oFileName, NC_MODE, NC_FORMAT)
77 
78  copyGlobalAttributes(fileName, oFileName)
80  copyDatasets()
81  closeFiles()
82 
83  print("Finished!")
84 
85 
86 
87 # Runs at the end of the program, closing both files.
88 def closeFiles():
89  global hdfFile
90  global ncFile
91 
92  print("Closing HDF4 File...")
93  hdfFile.end()
94  print("Closing NetCDF File...")
95  ncFile.close()
96 
97 
98 
99 # Convert HDF naming to NetCDF naming
100 # ie: "Data Center" to "data_center"
101 def convertToNetcdfName(string:str):
102 
103  # hdf has 3 and 6 as names, but in NetCDF, give it a proper name
104  if (string == "3"):
105  return "vector_elements"
106  elif (string == "6"):
107  return "calibration_elements"
108  return string.lower().replace(" ", "_")
109 
110 
111 
112 # Copy global attributes from the HDF4 file into the new NetCDF file
113 def copyGlobalAttributes(iFile:str, oFile:str):
114  global hdfFile
115  global ncFile
116  global errorAt
117 
118  # Per CF Conventions, put unit descriptions and values in proper places.
119  # Items in this list are already in the dimensions or described in the variables.
120  # Some items are not needed (ie. Satrt Year, Day, etc. bc of time_coverage_start)
121  IGNORE = [ # already in:
122  "Pixel per Scan Line", # dimensions
123  "Number of Scan Lines", # dimensions
124  "Number of Pixel Control Points", # dimensions
125  "Number of Scan Control Points", # dimensions
126  "Start Year", # made time_coverate_start and
127  "Start Day", # time_converate_end
128  "Start Millisec",
129  "End Year",
130  "End Day",
131  "End Millisec",
132 
133  # not sure about these because variable "slope" has this information
134  # for each scan already.
135  "Calibration Slope",
136  "Calibration Intercept",
137  ]
138 
139 
140  try:
141  print("Copying global attributes...")
142 
143  gloablAttr = hdfFile.attributes()
144 
145  for name, val in gloablAttr.items():
146  errorAt = name
147 
148  if (name in IGNORE):
149  continue
150 
151  valType = type(val)
152 
153  # strings
154  if (isinstance(val, str)):
155  ncFile.setncattr(convertToNetcdfName(name), val)
156 
157  # numbers
158  else:
159  val = np.float32(val) if isinstance(val, float) else np.int32(val)
160  ncFile.setncattr(convertToNetcdfName(name), val)
161 
162 
163  # after copying all the global attrs, make the isodate time
164  # make isodate for start time
165  errorAt = "time_coverage_start"
166  year = gloablAttr.get("Start Year")
167  day = gloablAttr.get("Start Day")
168  msec = gloablAttr.get("Start Millisec")
169 
170  start_of_year = datetime(year, 1, 1)
171  time = start_of_year + timedelta(days=(day-1), seconds=(msec/1000))
172 
173  ncFile.setncattr("time_coverage_start", str(time.isoformat()))
174 
175  # make isodate for end time
176  errorAt = "time_coverage_end"
177  year = gloablAttr.get("End Year")
178  day = gloablAttr.get("End Day")
179  msec = gloablAttr.get("End Millisec")
180 
181  start_of_year = datetime(year, 1, 1)
182  time = start_of_year + timedelta(days=(day-1), seconds=(msec/1000))
183 
184  ncFile.setncattr("time_coverage_end", str(time.isoformat()))
185 
186  # netcdf has history global attribute, so add one here:
187  # to convert a file, it's static except for the input and output files
188  ncFile.setncattr("history", f"python3 l1acovert_czcs {iFile} {oFile}")
189 
190  # add converter version into the global attributes
191  ncFile.setncattr("l1aconvert_czcs_version", __version__)
192 
193 
194  except:
195  print(f"-E- Error copying global attributes. Was processing <{errorAt}> from HDF4 when error was caught.")
196  exit()
197  errorAt = "" # reset
198 
199 
200 
201 # Open band1 dataset to copy number of scan lines and pixles per scan line
202 # Open cntl_pt_cols for number of pixel control points
203 # other dimensions are constant
205  global hdfFile
206  global ncFile
207  global errorAt
208 
209  try:
210  print("Copying dimensions...")
211 
212  # these 2 dimensions were not named in HDF4, so giving it a name in NetCDF
213  # datasets that uses these 2 are named 6 and 3.
214  ncFile.createDimension("calibration_elements", 6)
215  ncFile.createDimension("vector_elements", 3)
216 
217  # constant dims
218  ncFile.createDimension("num_qual", 5)
219  ncFile.createDimension("number_of_bands", 6)
220 
221  # Copy over number of scan lines and pixel per scan line data
222  errorAt = "Number of Scan Lines & Pixel per Scan Line"
223  currSet = hdfFile.select("band1")
224  dims = currSet.dimensions()
225  ncFile.createDimension("number_of_scan_lines", dims["Number of Scan Lines"])
226  ncFile.createDimension("pixels_per_scan_line", dims["Pixels per Scan Line"])
227  currSet.endaccess()
228 
229  # copy over number of pixel control points
230  errorAt = "Number of Pixel Control Points"
231  currSet = hdfFile.select("cntl_pt_cols")
232  dims = currSet.dimensions()
233  ncFile.createDimension("number_of_pixel_control_points", dims["Number of Pixel Control Points"])
234  currSet.endaccess()
235 
236  except:
237  print(f"-E- Error copying dimensions. Was trying to copy the dimension(s) <{errorAt}> when error was caught.")
238  exit()
239 
240 
241 
242 # Given a NetCDF variable, assign the attributes in attrList.
243 # attrList is the attribute list from the HDF4 dataset and it is copying
244 # that over to the NetCDF version
245 #
246 # When assigning valid range, it is in string format. The dataType parameter
247 # helps with the slicing and converting it into the right range
248 def assignNcVarAttr(ncVar, attrDict:dict, dataType):
249 
250  for attr in attrDict.keys():
251  if (attr == "long_name"):
252  ncVar.long_name = attrDict.get(attr)
253  if (attr == "units"):
254  ncVar.units = attrDict.get(attr)
255 
256  # valid range is valid_min and valid_max in netcdf
257  if (attr == "valid range" or attr == "valid_range"):
258 
259  # valid range is a string tuple, extract the values '(1,3)
260  # slice the string to get rid of the Parentheses ()
261  validRange = attrDict.get(attr)[1:-2].split(",")
262 
263  # floats like ranges
264  if (dataType == np.float32):
265  ncVar.valid_min = np.float32(validRange[0])
266  ncVar.valid_max = np.float32(validRange[1])
267  elif (dataType == np.uint8):
268  ncVar.valid_min = np.uint8(validRange[0])
269  ncVar.valid_max = np.uint8(validRange[1])
270  elif (dataType == np.int16):
271  ncVar.valid_min = np.int16(validRange[0])
272  ncVar.valid_max = np.int16(validRange[1])
273  else:
274  ncVar.valid_min = np.int32(validRange[0])
275  ncVar.valid_max = np.int32(validRange[1])
276 
277 
278 
279 # Retrieves the HDF4 dataset and get all the required information to build
280 # the NetCDF version
282  global hdfFile
283  global ncFile
284  global errorAt
285 
286  try:
287  print("Copying datasets/variables...")
288 
289  datasetNames = hdfFile.datasets().keys()
290  for name in datasetNames:
291  errorAt = name
292  # hdf4 getting data
293  currSet = hdfFile.select(name)
294  hdfDataType = currSet.info()[3]; # index 3 gives the datasets data type
295  hdfDims = currSet.dimensions().keys() # get dimension names
296  setAttrs = currSet.attributes() # variable attrs like long_name, units, etc.
297  data = currSet.get() # retrieves the data
298  hdfDatasetAttr = currSet.attributes()
299 
300  # netcdf writing data
301  ncDatatype = GET_NC_TYPES.get(hdfDataType)
302  ncDims = tuple(map(lambda dim: convertToNetcdfName(dim), hdfDims))
303  newVariable = ncFile.createVariable(name, ncDatatype, ncDims)
304  newVariable[:] = data
305 
306 
307  # netcdf assiging attributes to the variables. NcVar is an object and can be
308  # passed by reference, so the function takes care of it
309  assignNcVarAttr(newVariable, hdfDatasetAttr, ncDatatype)
310 
311  except:
312  print(f"-E- Error copying datasets/variables. Error occured with HDF4 dataset named <{errorAt}>")
313  exit()
314 
315 
316 
317 if __name__ == '__main__':
318  main()
#define NC
Definition: l1_octs.c:42
void print(std::ostream &stream, const char *format)
Definition: PrintDebug.hpp:38
def copyGlobalAttributes(str iFile, str oFile)
def convertToNetcdfName(str string)
def assignNcVarAttr(ncVar, dict attrDict, dataType)
Definition: aerosol.c:136