|
ocssw
V2022
|
Public Member Functions | |
| def | __init__ (self, timeout=5, max_tries=5, verbose=False, clobber=False) |
| def | open_url (self, url, ntries=None, get=False) |
| def | needs_download (self, url, filepath, check_times=False, response=None) |
| def | download_file (self, url, filepath) |
| def | list_pageurls (self, url, regex='') |
| def | download_allfiles (self, url, dirpath, regex='', check_times=False, response=None, clobber=False, dry_run=False) |
| def | spider (self, url, level=0, visited=None) |
Public Attributes | |
| timeout | |
| max_tries | |
| verbose | |
| clobber | |
| session | |
Detailed Description
Definition at line 137 of file SessionUtils.py.
Constructor & Destructor Documentation
◆ __init__()
Definition at line 139 of file SessionUtils.py.
Member Function Documentation
◆ download_allfiles()
| def download_allfiles | ( | self, | |
| url, | |||
| dirpath, | |||
regex = '', |
|||
check_times = False, |
|||
response = None, |
|||
clobber = False, |
|||
dry_run = False |
|||
| ) |
Downloads all available files from a remote url into a local dirpath. Default is to download only if local file doesn't match remote size; set clobber=True to always download.
Definition at line 271 of file SessionUtils.py.
◆ download_file()
| def download_file | ( | self, | |
| url, | |||
| filepath | |||
| ) |
Definition at line 235 of file SessionUtils.py.
◆ list_pageurls()
| def list_pageurls | ( | self, | |
| url, | |||
regex = '' |
|||
| ) |
Returns a sorted, unique set of links from a given url. Optionally specify regex to filter for acceptable files; default is to list only links starting with url.
Definition at line 246 of file SessionUtils.py.
◆ needs_download()
| def needs_download | ( | self, | |
| url, | |||
| filepath, | |||
check_times = False, |
|||
response = None |
|||
| ) |
Returns False if filepath is present and size matches remote url; True otherwise. Optionally check timestamp as well.
Definition at line 205 of file SessionUtils.py.
◆ open_url()
| def open_url | ( | self, | |
| url, | |||
ntries = None, |
|||
get = False |
|||
| ) |
Return requests.Session object for specified url. Retries up to self.max_tries times if server is busy. By default, retrieves header only.
Definition at line 146 of file SessionUtils.py.
◆ spider()
| def spider | ( | self, | |
| url, | |||
level = 0, |
|||
visited = None |
|||
| ) |
Demo crawler
Definition at line 302 of file SessionUtils.py.
Member Data Documentation
◆ clobber
| clobber |
Definition at line 143 of file SessionUtils.py.
◆ max_tries
| max_tries |
Definition at line 141 of file SessionUtils.py.
◆ session
| session |
Definition at line 144 of file SessionUtils.py.
◆ timeout
| timeout |
Definition at line 140 of file SessionUtils.py.
◆ verbose
| verbose |
Definition at line 142 of file SessionUtils.py.
The documentation for this class was generated from the following file:
- /gfs-oceanweb/web/ocssw/ocssw_src/src/scripts/seadasutils/SessionUtils.py


