NASA Logo
Ocean Color Science Software

ocssw V2022
l1cextract.py
Go to the documentation of this file.
1 #! /usr/bin/env python3
2 
3 # Extractor for L1C files
4 
5 import argparse
6 import netCDF4
7 import pathlib
8 from os.path import basename
9 import numpy as np
10 import sys
11 import time
12 import calendar
13 import datetime
14 import re
15 import math
16 from shutil import copyfile as cp
17 import xml.etree.ElementTree as ET
18 from seadasutils.pixlin_utils import pixlin
19 from seadasutils.setupenv import env
20 #from seadasutils.MetaUtils import readMetadata
21 from seadasutils.netcdf_utils import nccopy, nccopy_grp, ncsubset_vars
22 #import seadasutils.ProcUtils as ProcUtils
23 
24 versionStr = "1.3 (2024-09-23)"
25 
26 class extract:
27 
28  def __init__(self, ifile, ofile=None,
29  north=None, south=None, west=None, east=None,
30  spixl=None, epixl=None, sline=None, eline=None,
31  verbose=False):
32  # inputs
33  self.ifile = pathlib.Path(ifile)
34  self.ofile = pathlib.Path(ofile)
35  self.north = north
36  self.south = south
37  self.west = west
38  self.east = east
39  self.spixl = spixl
40  self.epixl = epixl
41  self.sline = sline
42  self.eline = eline
43  self.verbose = verbose
44 
45  # defaults
46  self.runtime = None
47  self.attrs = None
48 
49  # unused, but needed by setupenv.py
50  self.dirs = {}
51  self.ancdir = None
52  self.curdir = False
53  self.sensor = None
54  env(self) # run setupenv
55 
56  def runextract(self, subset):
57 
58  srcfile = self.ifile
59  if srcfile.exists():
60  dstfile = self.ofile
61  if self.verbose:
62  print('Extracting', srcfile)
63 
64  ncsubset_vars(srcfile, dstfile, subset, timestamp=self.runtime, verbose=self.verbose)
65 
66  # Read infile as netCDF dataset
67  infile = netCDF4.Dataset(srcfile, 'r')
68  # Read and write outfile as netCDF4 dataset
69  outfile = netCDF4.Dataset(dstfile, 'r+')
70 
71  # Update nadir_bin
72  try:
73  if infile.nadir_bin:
74  nadir_bin = np.dtype('int32').type(infile.nadir_bin)
75  if (nadir_bin > self.epixl):
76  outfile.nadir_bin = np.dtype('int32').type(-1)
77  else:
78  outfile.nadir_bin = np.dtype('int32').type(nadir_bin - (self.spixl + 1))
79  except:
80  pass
81 
82  #_____________________________________________________________________________________________________________________________________
83  # |
84  # Add extract_pixel/line_start/stop: |
85  #_________________________________________________________________________________|
86 
87  if 'extract_pixel_start' in infile.ncattrs():
88  outfile.extract_pixel_start = np.dtype('int32').type(infile.extract_pixel_start + self.spixl + 1)
89  outfile.extract_pixel_stop = np.dtype('int32').type(infile.extract_pixel_stop + self.epixl + 1)
90  outfile.extract_line_start = np.dtype('int32').type(infile.extract_line_start + self.sline + 1)
91  outfile.extract_line_stop = np.dtype('int32').type(infile.extract_line_stop + self.eline + 1)
92  else:
93  outfile.extract_pixel_start = np.dtype('int32').type(self.spixl + 1)
94  outfile.extract_pixel_stop = np.dtype('int32').type(self.epixl + 1)
95 
96  outfile.extract_line_start = np.dtype('int32').type(self.sline + 1)
97  outfile.extract_line_stop = np.dtype('int32').type(self.eline + 1)
98 
99  #_____________________________________________________________________________________________________________________________________
100  # |
101  # Modify time_coverage_start/end of output file: |
102  #_________________________________________________________________________________|
103 
104  # Read infile as netCDF dataset
105  infile = netCDF4.Dataset(srcfile, 'r')
106  # Read and write outfile as netCDF4 dataset
107  outfile = netCDF4.Dataset(dstfile, 'r+')
108 
109  # Account for different file formats
110  token = 0
111  try:
112  if 'nadir_view_time' in infile.groups['bin_attributes'].variables:
113  # Number of seconds at infile time_coverage_start/end
114  infile_start_sec = infile.groups['bin_attributes'].variables['nadir_view_time'][0]
115  # catch negative start time error
116  if (np.ma.is_masked(infile_start_sec)): raise ValueError("iFile contains a negative start time for the variable nadir_view_time.")
117  infile_end_sec = infile.groups['bin_attributes'].variables['nadir_view_time'][infile.dimensions['bins_along_track'].size - 1]
118  # Number of seconds at outfile time_coverage_start/end
119  outfile_start_sec = outfile.groups['bin_attributes'].variables['nadir_view_time'][0]
120  outfile_end_sec = outfile.groups['bin_attributes'].variables['nadir_view_time'][outfile.dimensions['bins_along_track'].size - 1]
121 
122  # Take infile time_coverage_start/end
123  infile_start_time = infile.time_coverage_start
124  infile_end_time = infile.time_coverage_end
125 
126  # Extract year, month, day, hours, minutes, seconds from infile time coverage start/end
127  start_form = datetime.datetime.strptime(infile_start_time[0:19], '%Y-%m-%dT%H:%M:%S')
128  end_form = datetime.datetime.strptime(infile_end_time[0:19], '%Y-%m-%dT%H:%M:%S')
129  # Extract year,...,seconds from epoch
130  token = 1
131  except ValueError:
132  print("iFile contains a negative start time for the variable nadir_view_time.")
133  sys.exit(1)
134  except AttributeError:
135  if 'time_nadir' in infile.groups['geolocation_data'].variables:
136  #infile.groups['geolocation_data']['time_nadir'].valid_max = 86400.00
137  #infile.groups['geolocation_data']['time_nadir'].valid_min = 0.00
138 
139  # Number of seconds at infile time_coverage_start/end
140  infile_start_sec: float = infile.groups['geolocation_data'].variables['time_nadir'][0]
141  infile_end_sec = infile.groups['geolocation_data'].variables['time_nadir'][infile.dimensions['bins_along_track'].size - 1]
142  # Number of seconds at outfile time_coverage_start/end
143  outfile_start_sec = outfile.groups['geolocation_data'].variables['time_nadir'][0]
144  outfile_end_sec = outfile.groups['geolocation_data'].variables['time_nadir'][outfile.dimensions['bins_along_track'].size - 1]
145 
146  # Take infile time_coverage_start/end
147  infile_start_time = infile.time_coverage_start
148  infile_end_time = infile.time_coverage_end
149 
150  # Extract year, month, day, hours, minutes, seconds from infile time coverage start/end
151  start_form = datetime.datetime.strptime(infile_start_time[0:19], '%Y-%m-%dT%H:%M:%S')
152  end_form = datetime.datetime.strptime(infile_end_time[0:19], '%Y-%m-%dT%H:%M:%S')
153  # Extract year,...,seconds from epoch
154  token = 1
155  except:
156  pass
157  if token == 1:
158 
159  # Calculate the number of seconds contained in the time difference in previous step
160  diff_sec_start = start_form.timestamp()
161  # Calculate the number of seconds contained in the time difference in previous step
162  diff_sec_end = end_form.timestamp()
163 
164  # Seconds between infile/outfile time_coverage_start/end
165  diff_infile_outfile_start = outfile_start_sec - infile_start_sec
166  diff_infile_outfile_end = outfile_end_sec - infile_end_sec
167 
168  # Add the input/output file time_coverage_start/end difference to the infile time_coverage_start/end # of seconds
169  outfile_tot_start_sec = diff_sec_start + diff_infile_outfile_start
170  outfile_tot_end_sec = diff_sec_end + diff_infile_outfile_end
171 
172  # Create time structure for the outfile time_coverage_start/end
173  outfile_start_time_since = time.gmtime(outfile_tot_start_sec)
174  outfile_end_time_since = time.gmtime(outfile_tot_end_sec)
175 
176  # Extract year, month, day, hours, minutes, seconds from outfile start/end time structs
177  ostart_y = outfile_start_time_since.tm_year
178  ostart_mon = "{0:0=2d}".format(outfile_start_time_since.tm_mon)
179  ostart_d = "{0:0=2d}".format(outfile_start_time_since.tm_mday)
180  ostart_h = "{0:0=2d}".format(outfile_start_time_since.tm_hour)
181  ostart_min = "{0:0=2d}".format(outfile_start_time_since.tm_min)
182  ostart_s = "{0:0=2d}".format(outfile_start_time_since.tm_sec)
183 
184  oend_y = outfile_end_time_since.tm_year
185  oend_mon = "{0:0=2d}".format(outfile_end_time_since.tm_mon)
186  oend_d = "{0:0=2d}".format(outfile_end_time_since.tm_mday)
187  oend_h = "{0:0=2d}".format(outfile_end_time_since.tm_hour)
188  oend_min = "{0:0=2d}".format(outfile_end_time_since.tm_min)
189  oend_s = "{0:0=2d}".format(outfile_end_time_since.tm_sec)
190 
191  # Change outfile time_coverage_start/end
192  outfile.time_coverage_start = str(ostart_y) + '-' + str(ostart_mon) + '-' + str(ostart_d) + 'T' + str(ostart_h) + ':' + str(ostart_min) + ':' + str(ostart_s) + 'Z'
193  outfile.time_coverage_end = str(oend_y) + '-' + str(oend_mon) + '-' + str(oend_d) + 'T' + str(oend_h) + ':' + str(oend_min) + ':' + str(oend_s) + 'Z'
194 
195  #_____________________________________________________________________________________________________________________________________
196  # |
197  # Gring Calculation: |
198  #_________________________________________________________________________________|
199  outfile.set_auto_mask(False)
200  try:
201  if 'latitude' in outfile.groups['geolocation_data'].variables:
202  bins_along_track = outfile.dimensions['bins_along_track'].size - 1
203  bins_across_track = outfile.dimensions['bins_across_track'].size - 1
204 
205  latitude = outfile.groups['geolocation_data'].variables['latitude']
206  longitude = outfile.groups['geolocation_data'].variables['longitude']
207 
208  lon_min = longitude[0, 0]
209  lon_max = longitude[bins_along_track, bins_across_track]
210  lat_min = latitude[0, 0]
211  lat_max = latitude[bins_along_track, bins_across_track]
212 
213  # Degrees to separate latitude points, here it is 20
214  lat_add = float((lat_max - lat_min) / 20)
215 
216  # Place one point in the middle of longitude
217  lat_l = []
218  lat_r = []
219  lon_l = []
220  lon_r = []
221 
222  # add latitude right values
223  for i in range(0, bins_along_track - 1, int(bins_along_track/lat_add)):
224  lat_r.append(i)
225  # add longitude left/right values
226  lon_l.append(0)
227  lon_r.append(bins_across_track)
228 
229  # add latitude left values
230  lat_l = list(reversed(lat_r))
231 
232  # add longitude up values
233  lon_u = [bins_across_track, (bins_across_track/2), 0]
234  # add latitude up values
235  lat_u = [bins_along_track, bins_along_track, bins_along_track]
236  # add longitude down values
237  lon_d = list(reversed(lon_u))
238  # add longitude up values
239  lat_d = [0, 0, 0]
240 
241  lat_values_u = []
242  lon_values_u = []
243  lat_values_d = []
244  lon_values_d = []
245  lat_values_l = []
246  lon_values_l = []
247  lat_values_r = []
248  lon_values_r = []
249 
250  num = 0
251  for i in range(len(lat_u)):
252  lat_values_u.append(float(lat_u[i]))
253  lon_values_u.append(float(lon_u[i]))
254  num += 1
255  for i in range(len(lat_l)):
256  lat_values_l.append(float(lat_l[i]))
257  lon_values_l.append(float(lon_l[i]))
258  num += 1
259  for i in range(len(lat_d)):
260  lat_values_d.append(float(lat_d[i]))
261  lon_values_d.append(float(lon_d[i]))
262  num += 1
263  for i in range(len(lat_r)):
264  lat_values_r.append(float(lat_r[i]))
265  lon_values_r.append(float(lon_r[i]))
266  num += 1
267 
268  p_seq = []
269 
270  for i in range(num):
271  p_seq.append(np.dtype('int32').type(i + 1))
272 
273  args_lat = (lat_values_u, lat_values_l, lat_values_d, lat_values_r)
274  args_lon = (lon_values_u, lon_values_l, lon_values_d, lon_values_r)
275  lat_values = np.concatenate(args_lat)
276  lon_values = np.concatenate(args_lon)
277 
278  g_lat = []
279  g_lon = []
280 
281  for i in range(0,len(lat_values)):
282  g_lat.append(latitude[int(lat_values[i])][int(lon_values[i])])
283  g_lon.append(longitude[int(lat_values[i])][int(lon_values[i])])
284 
285  # Format geospatial_bounds to WKT format
286  geospatial_bounds_str = "POLYGON((" + ', '.join([f"{lon} {lat}" for lon, lat in zip(g_lon, g_lat)]) + '))'
287  outfile.setncattr('geospatial_bounds', geospatial_bounds_str)
288 
289  # Update geospatial lat/lon min/max
290  outfile.setncattr('geospatial_lat_max', str(max(g_lat)))
291  outfile.setncattr('geospatial_lat_min', str(min(g_lat)))
292  outfile.setncattr('geospatial_lon_max', str(max(g_lon)))
293  outfile.setncattr('geospatial_lon_min', str(min(g_lon)))
294 
295  # Set gringpoints as global attributes and exclude last repeated pnts
296  outfile.setncattr('gringpointlatitude', ', '.join(map(str, g_lat[:-1])))
297  outfile.setncattr('gringpointlongitude', ', '.join(map(str, g_lon[:-1])))
298  outfile.setncattr('gringpointsequence', ', '.join(map(str, p_seq[:-1])))
299 
300  except:
301  pass
302 
303  return 0
304 
305  def getpixlin(self):
306  if self.verbose:
307  print("north={} south={} west={} east={}".
308  format(self.north, self.south, self.west, self.east))
309 
310  # run lonlat2pixline
311  pl = pixlin(file=self.ifile,
312  north=self.north, south=self.south,
313  west=self.west, east=self.east,
314  verbose=self.verbose)
315  pl.lonlat2pixline(zero=False) # using 1-based indices
316  self.spixl, self.epixl, self.sline, self.eline = \
317  (pl.spixl, pl.epixl, pl.sline, pl.eline)
318  return pl.status
319 
320  def run(self):
321  # convert to zero-based index
322  self.spixl, self.epixl, self.sline, self.eline = \
323  (v-1 for v in (self.spixl, self.epixl, self.sline, self.eline))
324 
325  # extract file
326  subset = {'bins_across_track':[self.spixl, self.epixl],
327  'bins_along_track': [self.sline, self.eline]}
328  self.runextract(subset)
329 
330  #return status
331  return 0
332 
333 def chk_pixl(args, infile):
334  if args.epixl == -1:
335  args.epixl = infile.dimensions['bins_across_track'].size
336  if args.eline == -1:
337  args.eline = infile.dimensions['bins_along_track'].size
338  return args.spixl, args.epixl, args.sline, args.eline
339 
340 def chk_spex_width(args, infile):
341  args.spixl = 245
342  args.epixl = 273
343  if args.eline == -1:
344  args.eline = infile.dimensions['bins_along_track'].size
345  return args.spixl, args.epixl, args.sline, args.eline
346 
347 if __name__ == "__main__":
348  print("l1cextract", versionStr)
349 
350  # parse command line
351  parser = argparse.ArgumentParser(
352  description='Extract specified area from OLCI Level 1C files.',
353  epilog='Specify either geographic limits or pixel/line ranges, not both.')
354  parser.add_argument('-v', '--verbose', help='print status messages',
355  action='store_true')
356  parser.add_argument('ifile',
357  help='Level 1C input file')
358  parser.add_argument('ofile',
359  help='output file')
360 
361  group1 = parser.add_argument_group('geographic limits')
362  group1.add_argument('-n', '--north', type=float, help='northernmost latitude')
363  group1.add_argument('-s', '--south', type=float, help='southernmost latitude')
364  group1.add_argument('-w', '--west', type=float, help='westernmost longitude')
365  group1.add_argument('-e', '--east', type=float, help='easternmost longitude')
366 
367  group2 = parser.add_argument_group('pixel/line ranges (1-based)')
368  group2.add_argument('--spixl', type=int, help='start pixel', default = 1)
369  group2.add_argument('--epixl', type=int, help='end pixel', default = -1)
370 
371  group2.add_argument('--sline', type=int, help='start line', default = 1)
372  group2.add_argument('--eline', type=int, help='end line', default = -1)
373 
374  group3 = parser.add_argument_group('spex width (overwrites any pixel ranges or geographic limits)')
375  group3.add_argument('--spex_width', help='spex width', action='store_true', default=None)
376 
377  if len(sys.argv) == 1:
378  parser.print_help()
379  sys.exit(1)
380  args = parser.parse_args()
381 
382  # Read infile as netCDF dataset
383  infile = netCDF4.Dataset(args.ifile, 'r')
384 
385  if args.spex_width == None:
386  token = 0
387  else:
388  if args.spex_width != None:
389  token = 1
390 
391  if token == 1:
392  args.spixl, args.epixl, args.sline, args.eline = chk_spex_width(args, infile)
393  else:
394  args.spixl, args.epixl, args.sline, args.in_eline = chk_pixl(args, infile)
395 
396  # initialize
397  this = extract(ifile=args.ifile,
398  ofile=args.ofile,
399  north=args.north,
400  south=args.south,
401  west=args.west,
402  east=args.east,
403  spixl=args.spixl,
404  epixl=args.epixl,
405  sline=args.sline,
406  eline=args.eline,
407  verbose=args.verbose)
408 
409  # input value checks
410  goodlatlons = None not in (this.north, this.south, this.west, this.east)
411  goodindices = None not in (this.spixl, this.epixl, this.sline, this.eline)
412  if (goodlatlons and goodindices):
413  print("ERROR: Specify either geographic limits or pixel/line ranges, not both.")
414  sys.exit(1)
415  elif goodlatlons:
416  status = this.getpixlin()
417  if status not in (0, 110):
418  print("No extract; lonlat2pixline status =", status)
419  exit(status)
420  elif goodindices:
421  pass
422 
423  status = this.run()
424 
425  exit(status)
def getpixlin(self)
Definition: l1cextract.py:305
def __init__(self, ifile, ofile=None, north=None, south=None, west=None, east=None, spixl=None, epixl=None, sline=None, eline=None, verbose=False)
Definition: l1cextract.py:28
def runextract(self, subset)
Definition: l1cextract.py:56
def chk_pixl(args, infile)
Definition: l1cextract.py:333
list(APPEND LIBS ${NETCDF_LIBRARIES}) find_package(GSL REQUIRED) include_directories($
Definition: CMakeLists.txt:8
void print(std::ostream &stream, const char *format)
Definition: PrintDebug.hpp:38
def chk_spex_width(args, infile)
Definition: l1cextract.py:340
def env(self)
Definition: setupenv.py:7
Definition: aerosol.c:136
def ncsubset_vars(srcfile, dstfile, subset, verbose=False, **kwargs)