OB.DAAC Logo
NASA Logo
Ocean Color Science Software

ocssw V2022
ProcUtils.py
Go to the documentation of this file.
1 """
2 
3 SeaDAS library for commonly used functions within other python scripts
4 
5 """
6 import hashlib
7 import os
8 import sys
9 import re
10 import subprocess
11 from tarfile import BLOCKSIZE
12 import time
13 from datetime import datetime, timedelta, date
14 import logging
15 import requests
16 from requests.adapters import HTTPAdapter
17 from pathlib import Path
18 
19 from seadasutils.MetaUtils import readMetadata
20 
21 
22 # ------------------ DANGER -------------------
23 #
24 # The next 5 functions:
25 # getSession
26 # isRequestAuthFailure
27 # httpdl
28 # uncompressFile
29 # get_file_time
30 #
31 # exist in two places:
32 # OCSSWROOT/src/manifest/manifest.py
33 # OCSSWROOT/src/scripts/seadasutils/ProcUtils.py
34 #
35 # Make sure changes get into both files.
36 #
37 
38 DEFAULT_CHUNK_SIZE = 131072
39 BLOCKSIZE = 65536
40 
41 # requests session object used to keep connections around
42 obpgSession = None
43 
44 def getSession(verbose=0, ntries=5):
45  global obpgSession
46 
47  if not obpgSession:
48  # turn on debug statements for requests
49  if verbose > 1:
50  logging.basicConfig(level=logging.DEBUG)
51 
52  obpgSession = requests.Session()
53  obpgSession.mount('https://', HTTPAdapter(max_retries=ntries))
54 
55  if verbose:
56  print("OBPG session started")
57  else:
58  if verbose > 1:
59  print("reusing existing OBPG session")
60 
61  return obpgSession
62 
63 # ------------------ DANGER -------------------
64 # See comment above
66  ctype = req.headers.get('Content-Type')
67  if ctype and ctype.startswith('text/html'):
68  if "<title>Earthdata Login</title>" in req.text:
69  return True
70  return False
71 
72 # ------------------ DANGER -------------------
73 # See comment above
74 def httpdl(server, request, localpath='.', outputfilename=None, ntries=5,
75  uncompress=False, timeout=30., verbose=0, force_download=False,
76  chunk_size=DEFAULT_CHUNK_SIZE):
77 
78  status = 0
79  urlStr = 'https://' + server + request
80 
81  global obpgSession
82  localpath = Path(localpath)
83  getSession(verbose=verbose, ntries=ntries)
84 
85  modified_since = None
86  headers = {}
87 
88  if not force_download:
89  if outputfilename:
90  ofile = localpath / outputfilename
91  modified_since = get_file_time(ofile)
92  else:
93  rpath = Path(request.rstrip())
94  if 'requested_files' in request:
95  rpath = Path(request.rstrip().split('?')[0])
96  ofile = localpath / rpath.name
97  if re.search(r'(?<=\?)(\w+)', ofile.name):
98  ofile = Path(ofile.name.split('?')[0])
99 
100  modified_since = get_file_time(ofile)
101 
102  if modified_since:
103  headers = {"If-Modified-Since":modified_since.strftime("%a, %d %b %Y %H:%M:%S GMT")}
104 
105  with obpgSession.get(urlStr, stream=True, timeout=timeout, headers=headers) as req:
106 
107  if req.status_code != 200:
108  status = req.status_code
109  elif isRequestAuthFailure(req):
110  status = 401
111  else:
112  if not Path.exists(localpath):
113  os.umask(0o02)
114  Path.mkdir(localpath, mode=0o2775, parents=True)
115 
116  if not outputfilename:
117  cd = req.headers.get('Content-Disposition')
118  if cd:
119  outputfilename = re.findall("filename=(.+)", cd)[0]
120  else:
121  outputfilename = urlStr.split('/')[-1]
122 
123  ofile = localpath / outputfilename
124 
125  # This is here just in case we didn't get a 304 when we should have...
126  download = True
127  if 'last-modified' in req.headers:
128  remote_lmt = req.headers['last-modified']
129  remote_ftime = datetime.strptime(remote_lmt, "%a, %d %b %Y %H:%M:%S GMT").replace(tzinfo=None)
130  if modified_since and not force_download:
131  if (remote_ftime - modified_since).total_seconds() < 0:
132  download = False
133  if verbose:
134  print("Skipping download of %s" % outputfilename)
135 
136  if download:
137  total_length = req.headers.get('content-length')
138  length_downloaded = 0
139  total_length = int(total_length)
140  if verbose >0:
141  print("Downloading %s (%8.2f MBs)" % (outputfilename,total_length /1024/1024))
142 
143  with open(ofile, 'wb') as fd:
144 
145  for chunk in req.iter_content(chunk_size=chunk_size):
146  if chunk: # filter out keep-alive new chunks
147  length_downloaded += len(chunk)
148  fd.write(chunk)
149  if verbose > 0:
150  percent_done = int(50 * length_downloaded / total_length)
151  sys.stdout.write("\r[%s%s]" % ('=' * percent_done, ' ' * (50-percent_done)))
152  sys.stdout.flush()
153 
154  if uncompress:
155  if ofile.suffix in {'.Z', '.gz', '.bz2'}:
156  if verbose:
157  print("\nUncompressing {}".format(ofile))
158  compressStatus = uncompressFile(ofile)
159  if compressStatus:
160  status = compressStatus
161  else:
162  status = 0
163 
164  if verbose:
165  print("\n...Done")
166 
167  return status
168 
169 
170 # ------------------ DANGER -------------------
171 # See comment above
172 def uncompressFile(compressed_file):
173  """
174  uncompress file
175  compression methods:
176  bzip2
177  gzip
178  UNIX compress
179  """
180 
181  compProg = {".gz": "gunzip -f ", ".Z": "gunzip -f ", ".bz2": "bunzip2 -f "}
182  exten = Path(compressed_file).suffix
183  unzip = compProg[exten]
184  cmd = [unzip,str(compressed_file.resolve())]
185  p = subprocess.Popen(cmd, shell=False)
186  status = os.waitpid(p.pid, 0)[1]
187  if status:
188  print("Warning! Unable to decompress %s" % compressed_file)
189  return status
190  else:
191  return 0
192 
193 # ------------------ DANGER -------------------
194 # See comment above
195 def get_file_time(localFile):
196  ftime = None
197  localFile = Path(localFile)
198  if not Path.is_file(localFile):
199  while localFile.suffix in {'.Z', '.gz', '.bz2'}:
200  localFile = localFile.with_suffix('')
201 
202  if Path.is_file(localFile):
203  ftime = datetime.fromtimestamp(localFile.stat().st_mtime)
204 
205  return ftime
206 
207 def cleanList(filename, parse=None):
208  """
209  Parses file list from oceandata.sci.gsfc.nasa.gov through html source
210  intended for update_luts.py, by may have other uses
211  """
212  oldfile = Path.resolve(filename)
213  newlist = []
214  if parse is None:
215  parse = re.compile(r"(?<=(\"|\')>)\S+(\.(hdf|h5|dat|txt))")
216  if not Path.exists(oldfile):
217  print('Error: ' + oldfile + ' does not exist')
218  sys.exit(1)
219  else:
220  of = open(oldfile, 'r')
221  for line in of:
222  if '<td><a' in line:
223  try:
224  newlist.append(parse.search(line).group(0))
225  except Exception:
226  pass
227  of.close()
228  Path.unlink(oldfile)
229  return newlist
230 
231 
232 def date_convert(datetime_i, in_datetype=None, out_datetype=None):
233  """
234  Convert between datetime object and/or standard string formats
235 
236  Inputs:
237  datetime_i datetime object or formatted string
238  in_datetype input format code;
239  must be present if datetime_i is a string
240  out_datetype output format code; if absent, return datetime object
241 
242  datetype may be one of:
243  'j': Julian YYYYDDDHHMMSS
244  'g': Gregorian YYYYMMDDHHMMSS
245  't': TAI YYYY-MM-DDTHH:MM:SS.uuuuuuZ
246  'h': HDF-EOS YYYY-MM-DD HH:MM:SS.uuuuuu
247  """
248 
249  # define commonly used date formats
250  date_time_format = {
251  'd': "%Y%m%d", # YYYYMMDD
252  'j': "%Y%j%H%M%S", # Julian YYYYDDDHHMMSS
253  'g': "%Y%m%d%H%M%S", # Gregorian YYYYMMDDHHMMSS
254  't': "%Y-%m-%dT%H:%M:%S.%fZ", # TAI YYYY-MM-DDTHH:MM:SS.uuuuuuZ
255  'h': "%Y-%m-%d %H:%M:%S.%f", # HDF-EOS YYYY-MM-DD HH:MM:SS.uuuuuu
256  }
257  if in_datetype is None:
258  dateobj = datetime_i
259  else:
260  dateobj = datetime.strptime(datetime_i, date_time_format[in_datetype])
261 
262  if out_datetype is None:
263  return dateobj
264  else:
265  return dateobj.strftime(date_time_format[out_datetype])
266 
267 
268 def addsecs(datetime_i, dsec, datetype=None):
269  """
270  Offset datetime_i by dsec seconds.
271  """
272  dateobj = date_convert(datetime_i, datetype)
273  delta = timedelta(seconds=dsec)
274  return date_convert(dateobj + delta, out_datetype=datetype)
275 
276 def diffsecs(time0, time1, datetype=None):
277  """
278  Return difference in seconds.
279  """
280  t0 = date_convert(time0, datetype)
281  t1 = date_convert(time1, datetype)
282  return (t1-t0).total_seconds()
283 
284 def round_minutes(datetime_i, datetype=None, resolution=5, rounding=0):
285  """Round to nearest "resolution" minutes, preserving format.
286 
287  Parameters
288  ----------
289  datetime_i : string
290  String representation of datetime, in "datetype" format
291  datetype : string
292  Format of datetime, as strftime or date_convert() code
293  resolution : integer, optional
294  Number of minutes to round to (default=5)
295  rounding : integer, optional
296  Rounding "direction", where
297  <0 = round down
298  0 = round to nearest (default)
299  >0 = round up
300  """
301  dateobj = date_convert(datetime_i, datetype)
302 
303  if rounding < 0: # round down
304  new_minute = (dateobj.minute // resolution) * resolution
305  elif rounding > 0: # round up
306  new_minute = (dateobj.minute // resolution + 1) * resolution
307  else: # round to nearest value
308  new_minute = ((dateobj.minute + resolution/2.0) // resolution) * resolution
309 
310  # truncate to current hour; add new minutes
311  dateobj -= timedelta(minutes=dateobj.minute,
312  seconds=dateobj.second,
313  microseconds=dateobj.microsecond)
314  dateobj += timedelta(minutes=new_minute)
315 
316  return date_convert(dateobj, out_datetype=datetype)
317 
318 
319 def remove(file_to_delete):
320  """
321  Delete a file from the system
322  A simple wrapper for Path.unlink
323  """
324  file_to_delete = Path(file_to_delete)
325  if Path.exists(file_to_delete):
326  Path.unlink(file_to_delete)
327  return 0
328 
329  return 1
330 
331 
332 def ctime(the_file):
333  """
334  returns days since file creation
335  """
336 
337  today = date.today().toordinal()
338  p = Path(the_file)
339  utc_create = time.localtime(p.stat().st_ctime)
340 
341  return today - date(utc_create.tm_year, utc_create.tm_mon, utc_create.tm_mday).toordinal()
342 
343 
344 def mtime(the_file):
345  """
346  returns days since last file modification
347  """
348 
349  today = date.today().toordinal()
350  p = Path(the_file)
351  utc_mtime = time.localtime(p.stat().st_mtime)
352 
353  return today - date(utc_mtime.tm_year, utc_mtime.tm_mon, utc_mtime.tm_mday).toordinal()
354 
355 
356 def cat(file_to_print):
357  """
358  Print a file to the standard output.
359  """
360  with open(file_to_print) as f:
361  print(f.read())
362 
363 def compare_checksum(filepath,checksum):
364  hasher = hashlib.sha1()
365  with open(filepath, 'rb') as afile:
366  buf = afile.read(BLOCKSIZE)
367  while len(buf) > 0:
368  hasher.update(buf)
369  buf = afile.read(BLOCKSIZE)
370 
371  if hasher.hexdigest() == checksum:
372  return False
373  else:
374  return True
375 
376 def check_sensor(inp_file):
377  """
378  Determine the satellite sensor from the file metadata
379  if unable to determine the sensor, return 'X'
380  """
381 
382  senlst = {'Sea-viewing Wide Field-of-view Sensor (SeaWiFS)': 'seawifs',
383  'SeaWiFS': 'seawifs',
384  'Coastal Zone Color Scanner (CZCS)': 'czcs',
385  'Ocean Color and Temperature Scanner (OCTS)': 'octs',
386  'Ocean Scanning Multi-Spectral Imager (OSMI)': 'osmi',
387  'Ocean Color Monitor OCM-2': 'ocm2',
388  'Second-generation Global Imager (SGLI)': 'sgli',
389  'GOCI': 'goci', 'Hawkeye': 'hawkeye', 'hico': 'hico',
390  'OCIS': 'ocis', 'MERIS': 'meris','MOS': 'mos', 'TM': 'tm',
391  'Aquarius': 'aquarius', 'VIIRS': 'viirs'}
392 
393 
394  fileattr = readMetadata(inp_file)
395  if not fileattr:
396  # sys.stderr.write('empty fileattr found in ' + inp_file + '\n')
397  return 'X'
398  if 'ASSOCIATEDPLATFORMSHORTNAME' in fileattr:
399  print(fileattr['ASSOCIATEDPLATFORMSHORTNAME'])
400  return fileattr['ASSOCIATEDPLATFORMSHORTNAME']
401  elif 'Instrument_Short_Name' in fileattr:
402  print(senlst[str(fileattr['Instrument_Short_Name'])])
403  return senlst[str(fileattr['Instrument_Short_Name'])]
404  elif 'Sensor' in fileattr:
405  print(senlst[(fileattr['Sensor']).strip()])
406  return senlst[(fileattr['Sensor']).strip()]
407  elif 'Sensor name' in fileattr:
408  print(senlst[(fileattr['Sensor name']).strip()])
409  return senlst[(fileattr['Sensor name']).strip()]
410  elif 'SENSOR_ID' in fileattr and re.search('(OLI|ETM)', fileattr['SENSOR_ID']):
411  if 'SPACECRAFT_ID' in fileattr and re.search('LANDSAT_9', fileattr['SPACECRAFT_ID']):
412  print('L9')
413  return 'L9'
414  else:
415  print(fileattr['SENSOR_ID'].strip())
416  return fileattr['SENSOR_ID'].strip()
417  elif 'PRODUCT' in fileattr and re.search('MER', fileattr['PRODUCT']):
418  print(fileattr['PRODUCT'])
419  return 'meris'
420  elif 'instrument' in fileattr:
421  print(fileattr['instrument'])
422  if re.search('(OLCI|MSI)', fileattr['instrument']):
423  if 'platform' in fileattr:
424  return fileattr['platform']
425  else:
426  return senlst[(fileattr['instrument'])].strip()
427  else:
428  return 'X'
def isRequestAuthFailure(req)
Definition: ProcUtils.py:65
def httpdl(server, request, localpath='.', outputfilename=None, ntries=5, uncompress=False, timeout=30., verbose=0, force_download=False, chunk_size=DEFAULT_CHUNK_SIZE)
Definition: ProcUtils.py:74
def get_file_time(localFile)
Definition: ProcUtils.py:195
def getSession(verbose=0, ntries=5)
Definition: ProcUtils.py:44
def check_sensor(inp_file)
Definition: ProcUtils.py:376
def diffsecs(time0, time1, datetype=None)
Definition: ProcUtils.py:276
def remove(file_to_delete)
Definition: ProcUtils.py:319
def cleanList(filename, parse=None)
Definition: ProcUtils.py:207
def mtime(the_file)
Definition: ProcUtils.py:344
def uncompressFile(compressed_file)
Definition: ProcUtils.py:172
def cat(file_to_print)
Definition: ProcUtils.py:356
def readMetadata(filename)
Definition: MetaUtils.py:201
def round_minutes(datetime_i, datetype=None, resolution=5, rounding=0)
Definition: ProcUtils.py:284
def compare_checksum(filepath, checksum)
Definition: ProcUtils.py:363
const char * str
Definition: l1c_msi.cpp:35
def date_convert(datetime_i, in_datetype=None, out_datetype=None)
Definition: ProcUtils.py:232
def addsecs(datetime_i, dsec, datetype=None)
Definition: ProcUtils.py:268
def ctime(the_file)
Definition: ProcUtils.py:332