NASA Ocean Color

ocssw V2022

 import os
 import sys
 import time
 import json
  
 from seadasutils.ProcUtils import getSession, httpdl
  
 # URL parsing utils:
  
 from urllib.parse import urljoin, urlsplit, urlunsplit  # python 3
  
 def base_url(url):
     parts = urlsplit(url)
     return urlunsplit((parts.scheme, parts.netloc, parts.path, None, None))
  
  
 def full_url(url, link):
     """
     Add query to urljoin() results
     ONLY if it's a page
     """
     base = base_url(urljoin(url, link))
     if not is_page(base):
         return base
     else:
         scheme, netloc, path, query, fragment = urlsplit(base)
         query = urlsplit(url).query
         return urlunsplit((scheme, netloc, path, query, None))
  
  
 def is_page(url):
     """
     Make the dangerous assumption that URLs
     pointing to another web page always end in '/'.
     """
     return base_url(url).endswith('/')
  
  
 # General utils:
  
 def retry(func, *args, **kwargs):
     """
     Retry specified function call after a short delay
     """
     ntries = kwargs.get('ntries')
     if ntries:
         delay = int(5 + (30. * (1. / (float(ntries) + 1.))))
         if kwargs.get('verbose'):
             print('Sleeping {}s; {} tries left.'.format(delay, ntries - 1))
         time.sleep(delay)
         kwargs['ntries'] = ntries - 1
     return func(*args, **kwargs)
  
  
 def set_mtime(filepath, mtime):
     """
     Set modification time for specified file.
     Set access time to "now".
     """
     atime = time.time()
     try:
         os.utime(filepath, times=(atime, mtime))  # python 3
     except TypeError:
         os.utime(filepath, (atime, mtime))  # python 2
  
  
 # Table/link parsing utils:
  
 def linkdict(rows):
     """
     Each link in list is a dictionary describing a remote file:
     link['href']  = URL pointing to file
     link['mtime'] = timestamp as seconds since the epoch
     link['size']  = size in bytes
 """
     keys = ['href', 'mtime', 'size']
     linklist = []
     for row in rows:
         link = dict(list(zip(keys, row)))
         link['mtime'] = link_mtime(link['mtime'])
         linklist.append(link)
     return linklist
  
  
 def link_mtime(mtime):
     """
     Format remote file timestamp as seconds since the epoch.
     """
     try:
         urltime = time.strptime(mtime, "%Y-%m-%d %H:%M:%S")
         return time.mktime(urltime)
     except ValueError:
         return sys.maxsize
  
  
 def getlinks_json(content):
     return linkdict(json.loads(content.decode('utf-8'))['rows'])
  
  
 def needs_download(link, filepath, check_times=False):
     """
     Returns False if filepath is present and size matches remote url;
     True otherwise.  Optionally check timestamp as well.
     """
  
     # only download files
     if is_page(link['href']):
         return False
  
     # always download missing files
     if not os.path.isfile(filepath):
         return True
  
     # check file size
     diffsize = os.path.getsize(filepath) != link['size']
     if not check_times:
         return diffsize
  
     # optionally check timestamp
     else:
         older = os.path.getmtime(filepath) < link['mtime']
         return diffsize or older
  
  
 # HTTPResponse utils:
 def is_json(response):
     return response and ('json' in response.headers.get('Content-Type'))
  
  
 def ok_status(response):
     return response and (response.status < 400)
  
  
 class SessionUtils:
  
     def __init__(self, timeout=5, max_tries=5, verbose=0, clobber=False):
         self.timeout = timeout
         self.max_tries = max_tries
         self.verbose = verbose
         self.clobber = clobber
         self.status = 0
  
     def download_file(self, url, filepath):
         try:
             parts = urlsplit(url)
             outputdir = os.path.dirname(filepath)
             status = httpdl(parts.netloc, parts.path, localpath=outputdir,
                             timeout=self.timeout, ntries=self.max_tries, verbose=self.verbose)
             if status:
                 self.status = 1
                 print('Error downloading {}'.format(filepath))
         except Exception as e:
             self.status = 1
             print('Exception: {:}'.format(e))
         return
  
     def get_links(self, url, regex=''):
         """
         Returns a unique set of links from a given url.
         Optionally specify regex to filter for acceptable files;
         default is to list only links starting with url.
         """
         linklist = []
         session = getSession(verbose=self.verbose, ntries=self.max_tries)
         with session.get(url, stream=True, timeout=self.timeout) as response:
             if is_json(response):
                 linklist = getlinks_json(response.content)
             else:
                 return []
  
         # make relative urls fully-qualified
         for link in linklist:
             link['href'] = full_url(url, link['href'])
  
         # filter for regex
         if regex != '':
             import re
             regex = re.compile(regex)
             linklist = [link for link in linklist if regex.search(link['href'])]
         else:  # if no filter, return only links containing url
             linklist = [link for link in linklist if base_url(url) in link['href']]
  
         return linklist
  
     def download_allfiles(self, url, dirpath, regex='', check_times=False,
                           clobber=False, dry_run=False):
         """
         Downloads all available files from a remote url into a local dirpath.
         Default is to download only if local file doesn't match remote size;
         set clobber=True to always download.
         """
         downloaded = []
         if dry_run and self.verbose:
             print('Dry run:')
         if not os.path.exists(dirpath) and not dry_run:
             os.makedirs(dirpath)
  
         all_links = self.get_links(url, regex=regex)
         for link in all_links:
             f = os.path.basename(link['href'])
             filepath = os.path.join(dirpath, f)
             if clobber or needs_download(link, filepath,
                                          check_times=check_times):
                 if not dry_run:
                     self.download_file(link['href'], filepath)
                     set_mtime(filepath, link['mtime'])
                     downloaded.append(filepath)
                 if self.verbose:
                     print('+ ' + f)
  
         return downloaded
  
 # end of class SessionUtils
  
  
 if __name__ == '__main__':
     # parameters
     if len(sys.argv) > 1:
         url = sys.argv[1]
     else:
         url = 'https://oceandata.sci.gsfc.nasa.gov/Ancillary/LUTs/?format=json'
  
     sessionUtil = SessionUtils()
     links = sessionUtil.get_links(url)
     print(links)

MDN.parameters.float

float

Definition: parameters.py:23

manifest.getSession

def getSession(verbose=0, ntries=5)

Definition: manifest.py:45

seadasutils.ProcUtils

Definition: ProcUtils.py:1

seadasutils.JsonUtils.ok_status

def ok_status(response)

Definition: JsonUtils.py:130

seadasutils.JsonUtils.SessionUtils.download_file

def download_file(self, url, filepath)

Definition: JsonUtils.py:143

seadasutils.JsonUtils.is_page

def is_page(url)

Definition: JsonUtils.py:31

seadasutils.JsonUtils.link_mtime

def link_mtime(mtime)

Definition: JsonUtils.py:85

MDN.parameters.int

int

Definition: parameters.py:18

seadasutils.JsonUtils.SessionUtils.verbose

verbose

Definition: JsonUtils.py:139

seadasutils.JsonUtils.needs_download

def needs_download(link, filepath, check_times=False)

Definition: JsonUtils.py:100

seadasutils.JsonUtils.getlinks_json

def getlinks_json(content)

Definition: JsonUtils.py:96

seadasutils.JsonUtils.SessionUtils.clobber

clobber

Definition: JsonUtils.py:140

list

list(APPEND LIBS ${NETCDF_LIBRARIES}) find_package(GSL REQUIRED) include_directories($

Definition: CMakeLists.txt:8

seadasutils.JsonUtils.SessionUtils.__init__

def __init__(self, timeout=5, max_tries=5, verbose=0, clobber=False)

Definition: JsonUtils.py:136

void print(std::ostream &stream, const char *format)

Definition: PrintDebug.hpp:38

func

subroutine func(x, conec, n, bconecno, bn, units, u, inno, i, outno, o, Input, Targ, p, sqerr)

Definition: ffnet.f:287

seadasutils.JsonUtils.set_mtime

def set_mtime(filepath, mtime)

Definition: JsonUtils.py:55

seadasutils.JsonUtils.SessionUtils.timeout

timeout

Definition: JsonUtils.py:137

seadasutils.JsonUtils.retry

def retry(func, *args, **kwargs)

Definition: JsonUtils.py:41

seadasutils.JsonUtils.linkdict

def linkdict(rows)

Definition: JsonUtils.py:69

seadasutils.JsonUtils.SessionUtils.download_allfiles

def download_allfiles(self, url, dirpath, regex='', check_times=False, clobber=False, dry_run=False)

Definition: JsonUtils.py:185

seadasutils.JsonUtils.full_url

def full_url(url, link)

Definition: JsonUtils.py:17

seadasutils.JsonUtils.SessionUtils

Definition: JsonUtils.py:134

seadasutils.JsonUtils.is_json

def is_json(response)

Definition: JsonUtils.py:126

seadasutils.JsonUtils.SessionUtils.status

status

Definition: JsonUtils.py:141

seadasutils.JsonUtils.SessionUtils.get_links

def get_links(self, url, regex='')

Definition: JsonUtils.py:157

manifest.httpdl

def httpdl(server, request, localpath='.', outputfilename=None, ntries=5, uncompress=False, timeout=30., verbose=0, force_download=False, chunk_size=DEFAULT_CHUNK_SIZE)

Definition: manifest.py:75

seadasutils.JsonUtils.base_url

def base_url(url)

Definition: JsonUtils.py:12

seadasutils.JsonUtils.SessionUtils.max_tries

max_tries

Definition: JsonUtils.py:138