OB.DAAC Logo
NASA Logo
Ocean Color Science Software

ocssw V2022
get_output_name_utils.py
Go to the documentation of this file.
1 """
2 Utility functions for get_output_name.
3 """
4 
5 import calendar
6 import datetime
7 import os
8 import re
9 import sys
10 import types
11 #from lxml.html.diff import start_tag
12 
14 import mlp.obpg_data_file as obpg_data_file
15 import seadasutils.ProcUtils as ProcUtils
16 import seadasutils.time_utils as time_utils
17 
18 __author__ = 'byang'
19 
20 __version__ = '1.0.6-2021-09-29'
21 
22 DEBUG = False
23 #DEBUG = True
24 
25 def convert_str_to_int(short_str):
26  """
27  Returns an integer taken from the passed in string.
28  """
29  try:
30  int_value = int(short_str)
31  except ValueError:
32  err_msg = "Error! Unable to convert {0} to integer.".format(short_str)
33  sys.exit(err_msg)
34  return int_value
35 
36 def find_extension(format_data_list, search_term):
37  """
38  Returns the extension from format_data_list that is indicated by
39  search_term.
40  """
41  extension = None
42  try:
43  # Are we searching for a matching index number ...
44  int(search_term)
45  tuple_index = 0
46  except ValueError:
47  # ... or a matching format name?
48  tuple_index = 1
49  # Use a generator to find the match.
50  format_index = next((i for i, t in enumerate(format_data_list) if format_data_list[i][tuple_index].lower() == search_term.lower()), None)
51  if (format_index != None) and (format_index < len(format_data_list)):
52  extension = format_data_list[format_index][2]
53  else:
54  for ext_candidate in format_data_list:
55  if search_term.lower() == ext_candidate[2].lower():
56  extension = ext_candidate[2]
57  break
58  return extension
59 
60 def get_base_element(data_files, target_program, clopts):
61  """
62  Returns the base element from input filename.
63  """
64  indicator = get_platform_indicator(data_files[0])
65  if len(data_files) > 1:
66  for file in data_files:
67  indctr = get_platform_indicator(file)
68  if indctr.find(indicator) == -1:
69  if indctr.find('MODIS') != -1 and indicator.find('MODIS') != -1:
70  indicator = 'CROSS_MODIS.'
71  elif indctr.find('VIIRS') != -1 and indicator.find('VIIRS') != -1:
72  indicator = 'CROSS_VIIRS.'
73  else:
74  indicator = 'CROSS_SENSOR.'
75  break
76  if target_program.find('bin') != -1 or \
77  target_program == 'mapgen' or target_program == 'l3mapgen':
78  base_element_name = indicator + get_l3_time(data_files)
79  else:
80  if data_files[0].file_type.find('Level 0') != -1:
81  time_stamp = get_l0_timestamp(data_files[0].name)
82  else:
83  time_stamp = data_files[0].start_time
84  dt_obj = datetime.datetime.strptime(time_stamp, "%Y%j%H%M%S")
85 
86  base_element_name = "{}{}T{}".format(indicator, dt_obj.strftime("%Y%m%d"), time_stamp[7:])
87 
88  return base_element_name
89 
90 def get_end_doy_year(data_files):
91  """
92  Extract a day of year and year from an L0 file's metadata and return
93  them as integer values .
94  """
95  if data_files[-1].end_time:
96  year = convert_str_to_int(data_files[-1].end_time[0:4])
97  day = convert_str_to_int(data_files[-1].end_time[4:7])
98  elif data_files[-1].metadata:
99  day_str = 'End Day'
100  yr_str = 'End Year'
101  day = convert_str_to_int(data_files[-1].metadata[day_str])
102  year = convert_str_to_int(data_files[-1].metadata[yr_str])
103  else:
104  err_msg = 'Error! Cannot find end time for {0}'.format(
105  data_files[-1].name)
106  sys.exit(err_msg)
107  return day, year
108 
109 def _get_data_files_info(flf):
110  """
111  Returns a list of data files read from the specified input file.
112  """
113  data_file_list = []
114  with open(flf, 'rt') as file_list_file:
115  inp_lines = file_list_file.readlines()
116  for line in inp_lines:
117  filename = line.strip()
118  if os.path.exists(filename):
119  file_typer = mlp.get_obpg_file_type.ObpgFileTyper(filename)
120  file_type, sensor = file_typer.get_file_type()
121  stime, etime = file_typer.get_file_times()
122  data_file = obpg_data_file.ObpgDataFile(filename, file_type,
123  sensor, stime, etime)
124  data_file_list.append(data_file)
125  data_file_list.sort()
126  return data_file_list
127 
128 def get_l0_timestamp(l0_file_name):
129  """
130  A method to get the date/time stamp from L0 files.
131  """
132  # Todo: Add check & handling for time stamp in metadata.
133  if os.path.exists(l0_file_name + '.const'):
134  with open(l0_file_name + '.const') as constructor_file:
135  constructor_data = constructor_file.readlines()
136  for line in constructor_data:
137  if line.find('starttime=') != -1:
138  start_time = line[line.find('=') + 1].strip()
139  break
140  time_stamp = ProcUtils.date_convert(start_time, 't', 'j')
141  else:
142  input_basename = os.path.basename(l0_file_name)
143  matched_name = re.match(r"MOD00.?.[AP](\d\d\d\d\d\d\d).(\d\d\d\d)", input_basename)
144  if matched_name is None:
145  matched_name = re.match(r"[AP](\d\d\d\d\d\d\d)(\d\d\d\d)\d\d\.L0_.{3}", input_basename)
146  if matched_name:
147  time_stamp = matched_name.group(1)+matched_name.group(2) + '00'
148  else:
149  err_msg = "Unable to determine time stamp for input file {0}".\
150  format(l0_file_name)
151  sys.exit(err_msg)
152  return time_stamp
153 
154 def get_days_diff(day1, day2):
155  """
156  Returns the number of days between two days, by subtracting day2 from day1.
157  """
158  return (day1 - day2).days
159 
160 def get_end_day_year(metadata):
161  """
162  Returns the end day and year for a file, determined from the contents of
163  metadata as ints.
164  """
165  if 'End Day' in metadata:
166  eday = convert_str_to_int(metadata['End Day'])
167  elif 'Period End Day' in metadata:
168  eday = convert_str_to_int(metadata['Period End Day'])
169  elif 'time_coverage_end' in metadata:
170  eday = time_utils.convert_month_day_to_doy(
171  metadata['time_coverage_end'][5:7],
172  metadata['time_coverage_end'][8:10],
173  metadata['time_coverage_end'][0:4])
174  else:
175  err_msg = 'Error! Cannot determine end day.'
176  sys.exit(err_msg)
177  if 'End Year' in metadata:
178  eyear = convert_str_to_int(metadata['End Year'])
179  elif 'Period End Year' in metadata:
180  eyear = convert_str_to_int(metadata['Period End Year'])
181  elif 'time_coverage_end' in metadata:
182  eyear = convert_str_to_int(metadata['time_coverage_end'][0:4])
183  else:
184  err_msg = 'Error! Cannot determine end year.'
185  sys.exit(err_msg)
186  return eday, eyear
187 
188 def get_extension(program, clopts):
189  """
190  Returns the extension appropriate for the program.
191  """
192  extension_dict = {'level 1a': '.hdf',
193  'modis_L1A': '.hdf',
194  'geo': '.hdf',
195  'modis_GEO': '.hdf',
196  'geolocate_hawkeye': '.hdf',
197  'geolocate_viirs': '.hdf',
198  'l1aextract': '.nc',
199  'l1aextract_modis': '.nc',
200  'l1aextract_viirs': '.nc',
201  'l1aextract_seawifs': '.nc',
202  'l1brsgen': '.hdf',
203  'l1mapgen': '.png',
204  'level 1b': '.hdf',
205  'modis_L1B': '.hdf',
206  'calibrate_viirs': '.hdf',
207  'l1bgen': '.hdf',
208  'l2gen': '.nc',
209  'l2extract': '.nc',
210  'l2brsgen': '.hdf',
211  # 'l2mapgen': '.ppm',
212  'l2bin': '.nc',
213  'l3bin': '.nc',
214  'l3mapgen': '.nc',
215  'mapgen': '.png'}
216  extension_allowed_dict = {'level 1a': '.hdf',
217  'geo': '.hdf',
218  'modis_GEO': '.hdf',
219  'geolocate_hawkeye': '.hdf',
220  'geolocate_viirs': '.hdf',
221  'l1aextract': '.nc',
222  'l1aextract_modis': '.nc',
223  'l1aextract_viirs': '.nc',
224  'l1aextract_seawifs': '.nc',
225  'l1brsgen': {'.hdf', '.bin', '.png', '.ppm'},
226  'l1mapgen': {'.ppm', '.png', '.tiff'},
227  'level 1b': '.hdf',
228  'modis_L1B': '.hdf',
229  'calibrate_viirs': '.hdf',
230  'l1bgen': '.hdf',
231  'l2gen': '.nc',
232  'l2extract': '.nc',
233  'l2brsgen': {'.hdf', '.png', '.ppm'},
234  # 'l2mapgen': {'ppm', 'png', 'tiff'},
235  'l2bin': '.nc',
236  'l3bin': '.nc',
237  'l3mapgen': {'.nc', '.ppm', '.png', '.tiff'},
238  'mapgen': {'.nc', '.ppm', '.png', '.tiff'}}
239  if program in list(extension_dict.keys()):
240  if clopts and 'oformat' in clopts and clopts['oformat'] != None :
241  file_formats = read_fileformats()
242  format_ext = '.' + find_extension(file_formats, clopts['oformat'])
243  if format_ext == '.':
244  ext = '.hdf'
245  elif format_ext in extension_allowed_dict[program]:
246  ext = format_ext
247  else:
248  err_msg = 'Error! The oformat {0} is not supported by {1}.'.format(
249  clopts['oformat'], program)
250  sys.exit(err_msg)
251  else:
252  ext = extension_dict[program]
253  return ext
254 
255 def get_extra_bits(data_files, target_program, clopts):
256  """
257  A method to get the extra bits for l2bin, l3mapgen.
258  """
259  extra_bits =''
260  if target_program.find('bin') != -1 or \
261  target_program == 'l3mapgen' or target_program == 'mapgen':
262  sday, syear = get_start_doy_year(data_files)
263  eday, eyear = get_end_doy_year(data_files)
264  if sday and syear and sday > 0 and syear > 0:
265  sdate = datetime.datetime.strptime(str(syear) + '-' + str(sday),
266  '%Y-%j')
267  else:
268  err_msg = 'Error! Cannot process start date data: year = ' \
269  '{0}, doy = {1}'.format(syear, sday)
270  sys.exit(err_msg)
271  if eday and eyear and eday > 0 and eyear > 0:
272  edate = datetime.datetime.strptime(str(eyear) + '-' + str(eday),
273  '%Y-%j')
274  else:
275  err_msg = 'Error! Cannot process end date data: year = {0},' \
276  'doy = {1}'.format(eyear, eday)
277  sys.exit(err_msg)
278  days_diff = get_days_diff(edate, sdate)
279  if clopts and 'suite' in clopts and clopts['suite']!= None:
280  suite = '.' + clopts['suite']
281  # elif data_files[0].metadata != None and 'suite' in data_files[0].metadata and \
282  # data_files[0].metadata['suite'].strip() != '':
283  # suite = '.' + data_files[0].metadata['suite'].strip()
284  else:
285  suite = ''
286  if suite == None:
287  suite = ''
288  if days_diff == 0:
289  extra_bits = '.DAY' + suite
290  else:
291  if days_diff == 7:
292  extra_bits = '.8D' + suite
293  else:
294  extra_bits = '.CU' + suite
295  if (target_program.find('l3mapgen') != -1 or target_program.find('mapgen') != -1)\
296  and clopts and 'resolution' in clopts and clopts['resolution'] != None:
297  extra_bits += '.' + clopts['resolution']
298  elif target_program.find('l2gen') != -1:
299  if clopts and 'suite' in clopts and clopts['suite'] != None:
300  extra_bits = '.' + clopts['suite']
301  if data_files[0].name.find('sub') != -1:
302  extra_bits += '.sub'
303  return extra_bits
304 
305 def get_l3_time(data_files):
306  """
307  An internal method to return the L3bin time from an L2 or
308  L3bin file.
309  """
310  l3_time = ''
311  sday, syear = get_start_doy_year(data_files)
312  eday, eyear = get_end_doy_year(data_files)
313  if sday and syear and sday > 0 and syear > 0:
314  sdate = datetime.datetime.strptime(str(syear) + '-' + str(sday),
315  '%Y-%j')
316  else:
317  err_msg = 'Error! Cannot process start date data: year = {0}' \
318  ', doy = {1}'.format(syear, sday)
319  sys.exit(err_msg)
320  if eday and eyear and eday > 0 and eyear > 0:
321  edate = datetime.datetime.strptime(str(eyear) + '-' + str(eday),
322  '%Y-%j')
323  else:
324  err_msg = 'Error! Cannot process end date data: year = {0},' \
325  'doy = {1}'.format(eyear, eday)
326  sys.exit(err_msg)
327  days_diff = (edate, sdate)
328  if days_diff == 0:
329  l3_time = '%d%02d%02d' % (syear, sdate.month, sdate.day)
330  else:
331  l3_time = '%d%02d%02d%d%02d%02d' % (syear, sdate.month, sdate.day, eyear, edate.month, edate.day)
332  return l3_time
333 
334 def get_level(program, data_files):
335  """
336  Returns the level element for the target_program.
337  """
338  level_dict = {'level 1a': '.L1A',
339  'modis_L1A': '.L1A',
340  'geo': '.GEO',
341  'modis_GEO': '.GEO',
342  'geolocate_hawkeye': '.GEO',
343  'geolocate_viirs': '.GEO',
344  'l1aextract': '.L1A.sub',
345  'l1aextract_modis': '.L1A.sub',
346  'l1aextract_viirs': '.L1A.sub',
347  'l1aextract_seawifs': '.L1A.sub',
348  'l1brsgen': '.L1BRS',
349  'level 1b': '.L1B',
350  'modis_L1B': '.L1B',
351  'calibrate_viirs': '.L1B',
352  'l1bgen': '.L1B',
353  'l2gen': '.L2',
354  'l2extract': '.L2.sub',
355  'l2brsgen': '.L2BRS',
356  # 'l2mapgen': '.L2',
357  'l2bin': '.L3b',
358  'l3bin': '.L3b',
359  'l3mapgen': '.L3m',
360  'mapgen': '.L3m'}
361  if program == 'geo' and data_files[0].sensor.find('VIIRS') != -1:
362  program = 'geolocate_viirs'
363  if program in list(level_dict.keys()):
364  level = level_dict[program]
365  elif program == 'l1mapgen':
366  if data_files[0].file_type.find('Level 1A') != -1:
367  level = 'L1A_MAP'
368  elif data_files[0].file_type.find('Level 1B') != -1:
369  level = '.L1B_MAP'
370  return level
371 
372 def get_output_name(data_files, target_program, clopts):
373  """
374  Returns the file name derived from the input file name, target program name and oformat .
375  """
376  if clopts and not isinstance(clopts, dict):
377  # Assuming the clopts passed in is a group of options from optparse.
378  clopts = vars(clopts)
379  if target_program == 'mapgen':
380  if data_files[0].file_type.find('Level 1') != -1 and len(data_files) ==1:
381  target_program = 'l1mapgen'
382  output_name = get_base_element(data_files, target_program, clopts) + get_level(target_program, data_files)\
383  + get_extra_bits(data_files, target_program, clopts) + get_extension(target_program, clopts)
384  return output_name
385 
386 def get_platform_indicator(data_file):
387  """
388  Returns a character which indicates what platform (instrument) the
389  data in the file is from.
390  """
391  indicator_dict = {'Aquarius': 'SACD',
392  'CZCS': 'NIMBUS7',
393  'GOCI': 'COMS',
394  'HICO': 'ISS',
395  'MERIS': 'ENVISAT',
396  'MOS': 'IRSP3',
397  'HAWKEYE': 'SEAHAWK1',
398  'OCIS': 'PACE',
399  'OCM2': 'OCEANSAT2',
400  'OCTS': 'ADEOS',
401  'OLCI S3A': 'S3A',
402  'OLCI S3B': 'S3B',
403  # 'OLI L8': 'LANDSAT8',
404  # 'OLI L9': 'LANDSAT9',
405  'OSMI': 'KOMSAT1',
406  'SeaWiFS': 'SEASTAR',
407  'SGLI': 'GC1'}
408  data_type = ''
409  if data_file.name.find('CROSS_SENSOR') != -1:
410  indicator = 'CROSS_SENSOR.'
411  return indicator
412  elif data_file.name.find('CROSS_MODIS') != -1:
413  indicator = 'CROSS_MODIS.'
414  return indicator
415  elif data_file.name.find('CROSS_VIIRS') != -1:
416  indicator = 'CROSS_VIIRS.'
417  return indicator
418  if data_file.sensor in list(indicator_dict.keys()):
419  sensor = data_file.sensor.upper()
420  indicator = indicator_dict[data_file.sensor]
421  if sensor.find('OCTS') != -1:
422  data_type = 'GAC'
423  elif sensor.find('MERIS') != -1:
424  if 'FRS' in data_file.name:
425  data_type = 'FRS'
426  elif 'RR' in data_file.name:
427  data_type = 'RR'
428  elif sensor.find('SEAWIFS') != -1:
429  if 'GAC' in data_file.name:
430  data_type = 'GAC'
431  elif 'infile' in data_file.metadata and 'GAC' in data_file.metadata['infile']:
432  data_type = "GAC"
433  else:
434  data_type = "LAC"
435  elif sensor.find('OLCI') != -1:
436  sensor = 'OLCI'
437  if 'data_type' in data_file.metadata:
438  data_type = data_file.metadata['data_type']
439  elif 'EFR' in data_file.name:
440  data_type = 'EFR'
441  elif 'ERR' in data_file.name:
442  data_type = 'ERR'
443  elif data_file.sensor.find('MODIS') != -1:
444  sensor = 'MODIS'
445  if data_file.sensor.find('Aqua') != -1:
446  indicator = 'AQUA'
447  elif data_file.sensor.find('Terra') != -1:
448  indicator = 'TERRA'
449  else:
450  err_msg = 'Error! Could not determine platform indicator for MODIS file {0}.'.\
451  format(data_file.name)
452  sys.exit(err_msg)
453  elif data_file.sensor.find('VIIRS') != -1:
454  sensor = 'VIIRS'
455  if data_file.sensor.find('J1') != -1:
456  indicator = 'JPSS1'
457  elif data_file.sensor.find('NPP')!= -1:
458  indicator = 'SNPP'
459  else:
460  err_msg = 'Error! Could not determine platform indicator for VIIRS file {0}.'.\
461  format(data_file.name)
462  sys.exit(err_msg)
463  elif data_file.sensor.find('MSI') != -1:
464  sensor = 'MSI'
465  if data_file.sensor == 'MSI S2A':
466  indicator = 'S2A'
467  elif data_file.sensor == 'MSI S2B':
468  indicator = 'S2B'
469  else:
470  err_msg = 'Error! Could not determine platform indicator for MSI file {0}.'.\
471  format(data_file.name)
472  sys.exit(err_msg)
473  elif data_file.sensor.find('OLI') != -1:
474  sensor = 'OLI'
475  if data_file.sensor == 'OLI L8':
476  indicator = 'LANDSAT8'
477  elif data_file.sensor == 'OLI L9':
478  indicator = 'LANDSAT9'
479  else:
480  err_msg = 'Error! Could not determine platform indicator for MSI file {0}.'.\
481  format(data_file.name)
482  sys.exit(err_msg)
483  else:
484  err_msg = 'Error! Platform indicator, {0}, for {1} is not known.'.\
485  format(data_file.sensor, data_file.name)
486  sys.exit(err_msg)
487  # for dfile in self.data_files[1:]:
488  # if dfile.sensor in list(indicator_dict.keys()):
489  # if indicator != indicator_dict[dfile.sensor]:
490  # indicator = 'X'
491  # break
492  # else:
493  # indicator = 'X'
494  # break
495  if data_type:
496  indicator += '_' + sensor + '_' + data_type + '.'
497  else:
498  indicator += '_' + sensor + '.'
499  return indicator
500 
501 def get_start_doy_year(data_files):
502  """
503  Extract a day of year and year from a file's metadata and return
504  them as integer values .
505  """
506  if data_files[0].end_time:
507  year = convert_str_to_int(data_files[0].start_time[0:4])
508  day = convert_str_to_int(data_files[0].start_time[4:7])
509  elif data_files[0].metadata:
510  day_str = 'Start Day'
511  yr_str = 'Start Year'
512  day = convert_str_to_int(data_files[0].metadata[day_str])
513  year = convert_str_to_int(data_files[0].metadata[yr_str])
514  else:
515  err_msg = 'Error! Cannot find end time for {0}'.format(
516  data_files[0].name)
517  sys.exit(err_msg)
518  return day, year
519 
520 def get_start_day_year(metadata):
521  """
522  Returns the start day and year for a file, determined from the contents of
523  metadata as ints.
524  """
525  if 'Start Day' in metadata:
526  sday = convert_str_to_int(metadata['Start Day'])
527  elif 'Period Start Day' in metadata:
528  sday = convert_str_to_int(metadata['Period Start Day'])
529  elif 'time_coverage_start' in metadata:
530  sday = time_utils.convert_month_day_to_doy(
531  metadata['time_coverage_start'][5:7],
532  metadata['time_coverage_start'][8:10],
533  metadata['time_coverage_start'][0:4])
534  else:
535  err_msg = 'Error! Cannot determine start day.'
536  sys.exit(err_msg)
537  if 'Start Year' in metadata:
538  syear = convert_str_to_int(metadata['Start Year'])
539  elif 'Period Start Year' in metadata:
540  syear = convert_str_to_int(metadata['Period Start Year'])
541  elif 'time_coverage_start' in metadata:
542  syear = convert_str_to_int(metadata['time_coverage_start'][0:4])
543  else:
544  err_msg = 'Error! Cannot determine start year.'
545  sys.exit(err_msg)
546  return sday, syear
547 
548 def get_time_period_extension(start_date_str, end_date_str):
549  """
550  Return the part of the file extension based on the time period within the
551  start and end dates.
552  """
553  first_date = datetime.datetime.strptime(start_date_str, '%Y%j%H%M%S')
554  last_date = datetime.datetime.strptime(end_date_str, '%Y%j%H%M%S')
555  date_diff = last_date - first_date
556  if date_diff.days == 0:
557  time_ext = '.DAY'
558  elif date_diff.days == 7:
559  time_ext = '.8D'
560  elif is_month(first_date, last_date):
561  time_ext = '.MO'
562  elif is_year(first_date, last_date):
563  time_ext = '.YR'
564  else:
565  time_ext = '.CU'
566  return time_ext
567 
568 def is_month(day1, day2):
569  """
570  Returns True if the days are the endpoints of a month; False otherwise.
571  """
572  return day1.month == day2.month and day1.day == 1 and\
573  day2.day == calendar.monthrange(day1.year, day1.month)[1]
574 
575 def is_year(day1, day2):
576  """
577  Returns True if the days are the endpoints of a year; False otherwise.
578  """
579  return day1.year == day2.year and day1.month == 1 and day1.day == 1 and\
580  day2.month == 12 and day2.day == 31
581 
583  """
584  Returns a tuple containing the file formats.
585  """
586 
587  format_file_path = os.path.join(os.getenv('OCDATAROOT'), 'common',
588  'file_formats.txt')
589  if os.path.exists(format_file_path):
590  file_formats = []
591  format_file_hndl = open(format_file_path)
592  inp_lines = format_file_hndl.readlines()
593  format_file_hndl.close()
594  for line in inp_lines:
595  cleaned_line = line.strip()
596  if cleaned_line[0] != '#':
597  #format = get_format(cleaned_line)
598  file_format = tuple(cleaned_line.split(':'))
599 
600  file_formats.append(file_format)
601 
602  return file_formats
603  else:
604  err_msg = 'Error! Cannot find file {0}.'.format(format_file_path)
605  sys.exit(err_msg)
list(APPEND LIBS ${PGSTK_LIBRARIES}) add_executable(atteph_info_modis atteph_info_modis.c) target_link_libraries(atteph_info_modis $
Definition: CMakeLists.txt:7
def get_base_element(data_files, target_program, clopts)
def get_level(program, data_files)
def get_extra_bits(data_files, target_program, clopts)
def get_extension(program, clopts)
const char * str
Definition: l1c_msi.cpp:35
def get_output_name(data_files, target_program, clopts)
def get_time_period_extension(start_date_str, end_date_str)
def find_extension(format_data_list, search_term)