OB.DAAC Logo
NASA Logo
Ocean Color Science Software

ocssw V2022
netcdf_utils.py
Go to the documentation of this file.
1 #! /usr/bin/env python3
2 """
3 Module containing utilities to manipulate netCDF4 files.
4 """
5 __author__ = 'gfireman'
6 
7 import sys
8 import time
9 import numpy as np
10 import netCDF4
11 from os.path import basename
12 
13 def nccopy_var(srcvar, dstgrp, indices=None, verbose=False):
14  """Copy a netCDF4 variable, optionally subsetting some dimensions.
15 
16  Function to copy a single netCDF4 variable and associated attributes.
17  Optionally subset specified dimensions.
18 
19  Parameters
20  ----------
21  srcvar : netCDF4.Variable
22  Open variable to be copied
23  dstgrp : netCDF4.Group
24  Open Group or Dataset destination object to copy stuff to
25  indices : dict, optional
26  Dict of dimname:[indexarr] to subset a dimension
27  verbose : boolean, optional
28  Print extra info
29 
30  Side Effects
31  ------------
32  Strings are written as H5T_CSET_ASCII, not H5T_CSET_UTF8
33  Empty attributes are written as scalar "" instead of NULL
34  """
35 
36  # create variable with same name, dimnames, storage format
37  zlib = srcvar.filters().get('zlib', False)
38  shuffle = srcvar.filters().get('shuffle', False)
39  complevel = srcvar.filters().get('complevel', 0)
40  dstvar = dstgrp.createVariable(srcvar.name,
41  srcvar.dtype,
42  srcvar.dimensions,
43  zlib=zlib,
44  shuffle=shuffle,
45  complevel=complevel)
46  # TODO: get and set chunksizes.
47 
48  # set variable attributes
49  dstvar.setncatts(srcvar.__dict__)
50 
51  # if no dimension changes, copy all
52  if not indices or not any(k in indices for k in srcvar.dimensions):
53  if verbose:
54  print("\tcopying",srcvar.name)
55  dstvar[:] = srcvar[:]
56 
57  # otherwise, copy only the subset
58  else:
59  if verbose:
60  print("\tsubsetting",srcvar.name)
61  tmpvar = srcvar[:]
62  for dimname in indices:
63  try:
64  axis = srcvar.dimensions.index(dimname)
65  except ValueError:
66  continue
67  tmpvar = np.take(tmpvar, indices[dimname], axis=axis)
68  dstvar[:] = tmpvar
69 
70  # make sure it's written out
71  dstgrp.sync()
72 
73 
74 def nccopy_grp(srcgrp, dstgrp, indices=None, verbose=False):
75  """Recursively copy a netCDF4 group, optionally subsetting some dimensions.
76 
77  Function to recursively copy a netCDF4 group,
78  with associated attributes, dimensions and variables.
79  Optionally subset specified dimensions.
80 
81  Parameters
82  ----------
83  srcgrp : netCDF4.Group
84  Open Group or Dataset source object containing stuff to be copied
85  dstgrp : netCDF4.Group
86  Open Group or Dataset destination object to copy stuff to
87  indices : dict, optional
88  Dict of dimname:[indexarr] to subset a dimension
89  verbose : boolean, optional
90  Print extra info
91  """
92 
93  if verbose:
94  print('grp: ', srcgrp.path)
95 
96  # copy all group attributes
97  dstgrp.setncatts(srcgrp.__dict__)
98 
99  # define each dimension
100  for dimname, dim in srcgrp.dimensions.items():
101  if dim.isunlimited():
102  dimsize = None
103  elif indices and dimname in indices:
104  dimsize = len(indices[dimname])
105  else:
106  dimsize = len(dim)
107  dstgrp.createDimension(dimname, dimsize)
108 
109  # define each variable
110  for varname, srcvar in srcgrp.variables.items():
111  if verbose:
112  print('var: ', '/'.join([srcgrp.path, srcvar.name]))
113  nccopy_var(srcvar, dstgrp, indices=indices, verbose=verbose)
114 
115  # define each subgroup
116  for grpname, srcsubgrp in srcgrp.groups.items():
117  dstsubgrp = dstgrp.createGroup(grpname)
118  nccopy_grp(srcsubgrp, dstsubgrp, indices=indices, verbose=verbose)
119 
120 
121 def nccopy(srcfile, dstfile, verbose=False):
122  """Copy a netCDF4 file.
123 
124  Function to copy a netCDF4 file to a new file.
125  Intended mostly as a demonstration.
126 
127  Parameters
128  ----------
129  srcfile : str
130  Path to source file; must be netCDF4 format.
131  dstfile : str
132  Path to destination file; directory must exist.
133  verbose : boolean, optional
134  Print extra info
135  """
136 
137  with netCDF4.Dataset(srcfile, 'r') as src, \
138  netCDF4.Dataset(dstfile, 'w') as dst:
139  if verbose:
140  print('\nfile:', src.filepath())
141  nccopy_grp(src, dst, verbose=verbose)
142 
143 
144 def ncsubset_vars(srcfile, dstfile, subset, verbose=False, **kwargs):
145  """Copy a netCDF4 file, with some dimensions subsetted.
146 
147  Function to copy netCDF4 file to a new file,
148 
149  Function to copy a single netCDF4 variable and associated attributes.
150  Optionally subset specified dimensions.
151 
152  Parameters
153  ----------
154  srcfile : str
155  Path to source file; must be netCDF4 format.
156  dstfile : str
157  Path to destination file; directory must exist.
158  subset : dict, optional
159  Dict of dimname:[startindex,endindex] to subset a dimension
160  verbose : boolean, optional
161  Print extra info
162 
163  Side Effects
164  ------------
165  Strings are written as H5T_CSET_ASCII, not H5T_CSET_UTF8
166  Empty attributes are written as scalar "" instead of NULL
167  """
168 
169  # works only for dimensions defined in root group
170  # TODO: allow dimensions specified in subgroups
171 
172  if verbose:
173  print('opening', srcfile)
174  with netCDF4.Dataset(srcfile, 'r') as src:
175 
176  # validate input
177  for dimname in subset:
178  if dimname not in src.dimensions:
179  print('Warning: dimension "' +
180  dimname + '" does not exist in input file root group.')
181  if (subset[dimname][0] > subset[dimname][1]):
182  print('Invalid indices for dimension "' +
183  dimname + '"; exiting.')
184  return
185  for dimname, dim in src.dimensions.items():
186  if ((dimname in subset) and
187  any((0 > d or d > len(dim) - 1) for d in subset[dimname])):
188  oldsubset = subset.copy()
189  subset[dimname] = np.clip(subset[dimname], a_min=0,
190  a_max=len(dim) - 1).tolist()
191  print('Clipping "' + dimname +
192  '" dimension indices to match input file:',
193  oldsubset[dimname], '->', subset[dimname])
194 
195  # construct index arrays
196  indices = {k : np.arange(subset[k][0],
197  subset[k][1] + 1) for k in subset}
198 
199  # copy source file
200  if verbose:
201  print('opening', dstfile)
202  with netCDF4.Dataset(dstfile, 'w') as dst:
203  nccopy_grp(src, dst, indices=indices, verbose=verbose)
204  update_history(dst, **kwargs)
205 
206  # dstfile closes automatically
207  # srcfile closes automatically
208 
209 
210 def update_history(dataset, timestamp=None, cmdline=None):
211  """Update 'date_created' and 'history' attributes
212 
213  Function to add or update 'date_created' and 'history'
214  attributes for specified dataset (usually root).
215 
216  Parameters
217  ----------
218  dataset : netCDF4.Group
219  Open Group or Dataset destination object to update
220  timestamp : time.struct_time, optional
221  Timestamp to add to history attribute
222  Defaults to current time
223  cmdline : string, optional
224  Description to add to history attribute
225  """
226 
227  if not timestamp:
228  timestamp = time.gmtime()
229  fmt = '%Y-%m-%dT%H:%M:%SZ' # ISO 8601 extended date format
230  date_created = time.strftime(fmt, timestamp)
231 
232  if not cmdline:
233  cmdline = ' '.join([basename(sys.argv[0])]+sys.argv[1:])
234  cmdline = ''.join([date_created, ': ', cmdline])
235  if 'history' in dataset.ncattrs():
236  history = ''.join([dataset.history.strip(), '; ', cmdline])
237  else:
238  history = cmdline
239 
240  dataset.setncattr('date_created', date_created)
241  dataset.setncattr('history', history)
def nccopy_var(srcvar, dstgrp, indices=None, verbose=False)
Definition: netcdf_utils.py:13
def nccopy_grp(srcgrp, dstgrp, indices=None, verbose=False)
Definition: netcdf_utils.py:74
#define basename(s)
Definition: l0chunk_modis.c:29
def update_history(dataset, timestamp=None, cmdline=None)
def nccopy(srcfile, dstfile, verbose=False)
def ncsubset_vars(srcfile, dstfile, subset, verbose=False, **kwargs)