OB.DAAC Logo
NASA Logo
Ocean Color Science Software

ocssw V2022
obdaac_download.py
Go to the documentation of this file.
1 #!/usr/bin/env python3
2 #
3 import argparse
4 import os
5 import re
6 import requests
7 import sys
8 import textwrap
9 from seadasutils.ProcUtils import httpdl, compare_checksum
10 from urllib.parse import urlparse
11 from pathlib import Path, PurePath
12 
13 def retrieveURL(request,localpath='.', uncompress=False, verbose=0,force_download=False, appkey=False, checksum=False):
14  if args.verbose:
15  print("Retrieving %s" % request.rstrip())
16 
17  server = "oceandata.sci.gsfc.nasa.gov"
18  parsedRequest = urlparse(request)
19  netpath = parsedRequest.path
20 
21  if parsedRequest.netloc:
22  server = parsedRequest.netloc
23  else:
24  if not re.match(".*getfile",netpath):
25  netpath = '/ob/getfile/' + netpath
26 
27  joiner = '?'
28  if (re.match(".*getfile",netpath)) and appkey:
29  netpath = netpath + joiner +'appkey=' + appkey
30  joiner = '&'
31 
32  if parsedRequest.query:
33  netpath = netpath + joiner + parsedRequest.query
34 
35  status = httpdl(server, netpath, localpath=localpath, uncompress=uncompress, verbose=verbose,force_download=force_download)
36 
37  if checksum and not uncompress:
38  cksumURL = 'https://'+server + '/checkdata/' + parsedRequest.path
39  dnldfile = localpath / parsedRequest.path
40  if compare_checksum(dnldfile,requests.get(cksumURL).text):
41  print("The file %s failed checksum test" % parsedRequest.path)
42  status = 1
43 
44  return status
45 
46 
47 if __name__ == "__main__":
48  # parse command line
49  parser = argparse.ArgumentParser(
50  formatter_class=argparse.RawTextHelpFormatter,
51  description='Download files archived at the OB.DAAC',
52  epilog=textwrap.dedent('''
53 Provide one of either filename, --filelist or --http_manifest.
54 
55 NOTE: For authentication, a valid .netrc file in the user home ($HOME) directory\nor a valid appkey is required.
56 
57  Example .netrc:
58  machine urs.earthdata.nasa.gov login USERNAME password PASSWD\n
59 
60  An appkey can be obtained from:
61  https://oceandata.sci.gsfc.nasa.gov/appkey/
62 '''
63  ))
64  parser.add_argument('-v', '--verbose', help='print status messages',
65  action='count',default=0)
66  parser.add_argument('filename', nargs='?', help='name of the file (or the URL of the file) to retreive')
67  parser.add_argument('--filelist',
68  help='file containing list of filenames to retreive, one per line')
69  parser.add_argument('--http_manifest',
70  help='URL to http_manifest file for OB.DAAC data order')
71  parser.add_argument('--odir',
72  help='full path to desired output directory; \ndefaults to current working directory: %s' % Path.cwd(),
73  default=Path.cwd())
74  parser.add_argument('--uncompress',action="store_true",
75  help='uncompress the retrieved files (if compressed)',
76  default=False)
77  parser.add_argument('--checksum',action="store_true",
78  help='compare retrieved file checksum; cannot be used with --uncompress',
79  default=False)
80  parser.add_argument('--failed',help='filename to contain list of files that failed to be retrieved')
81  parser.add_argument('--appkey',help='value of the users application key')
82  parser.add_argument('--force',action='store_true',
83  help='force download even if file already exists locally',
84  default=False)
85  args = parser.parse_args()
86 
87  filelist = []
88 
89  if args.http_manifest:
90  status = retrieveURL(args.http_manifest,verbose=args.verbose,force_download=True,appkey=args.appkey)
91  if status:
92  print("There was a problem retrieving %s (received status %d)" % (args.http_manifest,status))
93  sys.exit("Bailing out...")
94  else:
95  with open('http_manifest.txt') as flist:
96  for filename in flist:
97  filelist.append(filename.rstrip())
98  elif args.filename:
99  filelist.append(args.filename)
100  elif args.filelist:
101  with open(os.path.expandvars(args.filelist)) as flist:
102  for filename in flist:
103  filelist.append(os.path.expandvars(filename.rstrip()))
104 
105  if not len(filelist):
106  parser.print_usage()
107  sys.exit("Please provide a filename (or list file) to retrieve")
108 
109  if args.uncompress and args.checksum:
110  parser.print_usage()
111  sys.exit("--uncompress is incompatible with --checksum")
112 
113  outpath = Path.resolve(Path.expanduser(Path(os.path.expandvars(args.odir))))
114 
115  if args.verbose:
116  print("Output directory: %s" % outpath)
117 
118  failed = None
119  if args.failed:
120  failed = open(args.failed, 'w')
121 
122  for request in filelist:
123  status = retrieveURL(request,localpath=outpath, uncompress=args.uncompress,
124  verbose=args.verbose,force_download=args.force,
125  appkey=args.appkey,checksum=args.checksum)
126  if status:
127  if status == 304:
128  if args.verbose:
129  print("%s is not newer than local copy, skipping download" % request)
130  else:
131  print("There was a problem retrieving %s (received status %d)" % (request,status))
132  if failed:
133  failed.write(request)
134  failed.write("\n")
135 
136  if failed:
137  failed.close()
def compare_checksum(filepath, checksum)
Definition: ProcUtils.py:363
def retrieveURL(request, localpath='.', uncompress=False, verbose=0, force_download=False, appkey=False, checksum=False)
def httpdl(server, request, localpath='.', outputfilename=None, ntries=5, uncompress=False, timeout=30., verbose=0, force_download=False, chunk_size=DEFAULT_CHUNK_SIZE)
Definition: manifest.py:75