NASA Logo
Ocean Color Science Software

ocssw V2022
netcdf_utils.py
Go to the documentation of this file.
1 #! /usr/bin/env python3
2 """
3 Module containing utilities to manipulate netCDF4 files.
4 """
5 __author__ = 'gfireman'
6 
7 import sys
8 import time
9 import numpy as np
10 import netCDF4
11 from os.path import basename
12 
13 def nccopy_var(srcvar, dstgrp, indices=None, verbose=False):
14  """Copy a netCDF4 variable, optionally subsetting some dimensions.
15 
16  Function to copy a single netCDF4 variable and associated attributes.
17  Optionally subset specified dimensions.
18 
19  Parameters
20  ----------
21  srcvar : netCDF4.Variable
22  Open variable to be copied
23  dstgrp : netCDF4.Group
24  Open Group or Dataset destination object to copy stuff to
25  indices : dict, optional
26  Dict of dimname:[indexarr] to subset a dimension
27  verbose : boolean, optional
28  Print extra info
29 
30  Side Effects
31  ------------
32  Strings are written as H5T_CSET_ASCII, not H5T_CSET_UTF8
33  Empty attributes are written as scalar "" instead of NULL
34  """
35 
36  # create variable with same name, dimnames, storage format
37  zlib = srcvar.filters().get('zlib', False)
38  shuffle = srcvar.filters().get('shuffle', False)
39  complevel = srcvar.filters().get('complevel', 0)
40 
41  # Define chunk sizes
42  outputDims = {}
43  # Ensure dimensions are correctly handled
44  if indices:
45  for dimname in srcvar.dimensions:
46  if dimname in indices:
47  outputDims[dimname] = len(indices[dimname])
48 
49  if srcvar.chunking() == 'contiguous': # if srcvar is not chunked
50  dstvar = dstgrp.createVariable(srcvar.name,
51  srcvar.dtype,
52  srcvar.dimensions,
53  zlib=zlib,
54  shuffle=shuffle,
55  complevel=complevel)
56 
57  else:
58  srcChunks = list(srcvar.chunking())
59  newChunks = []
60  for idx, dimname in enumerate(srcvar.dimensions):
61  if dimname in outputDims:
62  newChunks.append(min(srcChunks[idx], outputDims[dimname]))
63  else:
64  newChunks.append(srcChunks[idx])
65 
66  dstvar = dstgrp.createVariable(srcvar.name,
67  srcvar.dtype,
68  srcvar.dimensions,
69  zlib=zlib,
70  shuffle=shuffle,
71  complevel=complevel,
72  chunksizes=newChunks)
73 
74 
75  # set variable attributes
76  dstvar.setncatts(srcvar.__dict__)
77 
78  # if no dimension changes, copy all
79  if not indices or not any(k in indices for k in srcvar.dimensions):
80  if verbose:
81  print("\tcopying",srcvar.name)
82  dstvar[:] = srcvar[:]
83 
84  # otherwise, copy only the subset
85  else:
86  if verbose:
87  print("\tsubsetting",srcvar.name)
88  tmpvar = srcvar[:]
89  for dimname in indices:
90  try:
91  axis = srcvar.dimensions.index(dimname)
92  except ValueError:
93  continue
94  tmpvar = np.take(tmpvar, indices[dimname], axis=axis)
95  dstvar[:] = tmpvar
96 
97  # make sure it's written out
98  dstgrp.sync()
99 
100 
101 def nccopy_grp(srcgrp, dstgrp, indices=None, verbose=False):
102  """Recursively copy a netCDF4 group, optionally subsetting some dimensions.
103 
104  Function to recursively copy a netCDF4 group,
105  with associated attributes, dimensions and variables.
106  Optionally subset specified dimensions.
107 
108  Parameters
109  ----------
110  srcgrp : netCDF4.Group
111  Open Group or Dataset source object containing stuff to be copied
112  dstgrp : netCDF4.Group
113  Open Group or Dataset destination object to copy stuff to
114  indices : dict, optional
115  Dict of dimname:[indexarr] to subset a dimension
116  verbose : boolean, optional
117  Print extra info
118  """
119 
120  if verbose:
121  print('grp: ', srcgrp.path)
122 
123  # copy all group attributes
124  dstgrp.setncatts(srcgrp.__dict__)
125 
126  # define each dimension
127  for dimname, dim in srcgrp.dimensions.items():
128  if dim.isunlimited():
129  dimsize = None
130  elif indices and dimname in indices:
131  dimsize = len(indices[dimname])
132  else:
133  dimsize = len(dim)
134  dstgrp.createDimension(dimname, dimsize)
135 
136  # define each variable
137  for varname, srcvar in srcgrp.variables.items():
138  if verbose:
139  print('var: ', '/'.join([srcgrp.path, srcvar.name]))
140  nccopy_var(srcvar, dstgrp, indices=indices, verbose=verbose)
141 
142  # define each subgroup
143  for grpname, srcsubgrp in srcgrp.groups.items():
144  dstsubgrp = dstgrp.createGroup(grpname)
145  nccopy_grp(srcsubgrp, dstsubgrp, indices=indices, verbose=verbose)
146 
147 
148 def nccopy(srcfile, dstfile, verbose=False):
149  """Copy a netCDF4 file.
150 
151  Function to copy a netCDF4 file to a new file.
152  Intended mostly as a demonstration.
153 
154  Parameters
155  ----------
156  srcfile : str
157  Path to source file; must be netCDF4 format.
158  dstfile : str
159  Path to destination file; directory must exist.
160  verbose : boolean, optional
161  Print extra info
162  """
163 
164  with netCDF4.Dataset(srcfile, 'r') as src, \
165  netCDF4.Dataset(dstfile, 'w') as dst:
166  if verbose:
167  print('\nfile:', src.filepath())
168  nccopy_grp(src, dst, verbose=verbose)
169 
170 
171 def ncsubset_vars(srcfile, dstfile, subset, verbose=False, **kwargs):
172  """Copy a netCDF4 file, with some dimensions subsetted.
173 
174  Function to copy netCDF4 file to a new file,
175 
176  Function to copy a single netCDF4 variable and associated attributes.
177  Optionally subset specified dimensions.
178 
179  Parameters
180  ----------
181  srcfile : str
182  Path to source file; must be netCDF4 format.
183  dstfile : str
184  Path to destination file; directory must exist.
185  subset : dict, optional
186  Dict of dimname:[startindex,endindex] to subset a dimension
187  verbose : boolean, optional
188  Print extra info
189 
190  Side Effects
191  ------------
192  Strings are written as H5T_CSET_ASCII, not H5T_CSET_UTF8
193  Empty attributes are written as scalar "" instead of NULL
194  """
195 
196  # works only for dimensions defined in root group
197  # TODO: allow dimensions specified in subgroups
198 
199  if verbose:
200  print('opening', srcfile)
201  with netCDF4.Dataset(srcfile, 'r') as src:
202 
203  # validate input
204  for dimname in subset:
205  if dimname not in src.dimensions:
206  print('Warning: dimension "' +
207  dimname + '" does not exist in input file root group.')
208  if (subset[dimname][0] > subset[dimname][1]):
209  print('Invalid indices for dimension "' +
210  dimname + '"; exiting.')
211  return
212  for dimname, dim in src.dimensions.items():
213  if ((dimname in subset) and
214  any((0 > d or d > len(dim) - 1) for d in subset[dimname])):
215  oldsubset = subset.copy()
216  subset[dimname] = np.clip(subset[dimname], a_min=0,
217  a_max=len(dim) - 1).tolist()
218  print('Clipping "' + dimname +
219  '" dimension indices to match input file:',
220  oldsubset[dimname], '->', subset[dimname])
221 
222  # construct index arrays
223  indices = {k : np.arange(subset[k][0],
224  subset[k][1] + 1) for k in subset}
225 
226  # copy source file
227  if verbose:
228  print('opening', dstfile)
229  with netCDF4.Dataset(dstfile, 'w') as dst:
230  nccopy_grp(src, dst, indices=indices, verbose=verbose)
231  update_history(dst, **kwargs)
232 
233  # dstfile closes automatically
234  # srcfile closes automatically
235 
236 
237 def update_history(dataset, timestamp=None, cmdline=None):
238  """Update 'date_created' and 'history' attributes
239 
240  Function to add or update 'date_created' and 'history'
241  attributes for specified dataset (usually root).
242 
243  Parameters
244  ----------
245  dataset : netCDF4.Group
246  Open Group or Dataset destination object to update
247  timestamp : time.struct_time, optional
248  Timestamp to add to history attribute
249  Defaults to current time
250  cmdline : string, optional
251  Description to add to history attribute
252  """
253 
254  if not timestamp:
255  timestamp = time.gmtime()
256  fmt = '%Y-%m-%dT%H:%M:%SZ' # ISO 8601 extended date format
257  date_created = time.strftime(fmt, timestamp)
258 
259  if not cmdline:
260  cmdline = ' '.join([basename(sys.argv[0])]+sys.argv[1:])
261  cmdline = ''.join([date_created, ': ', cmdline])
262  if 'history' in dataset.ncattrs():
263  history = ''.join([dataset.history.strip(), '; ', cmdline])
264  else:
265  history = cmdline
266 
267  dataset.setncattr('date_created', date_created)
268  dataset.setncattr('history', history)
list(APPEND LIBS ${NETCDF_LIBRARIES}) find_package(GSL REQUIRED) include_directories($
Definition: CMakeLists.txt:8
void print(std::ostream &stream, const char *format)
Definition: PrintDebug.hpp:38
def nccopy_var(srcvar, dstgrp, indices=None, verbose=False)
Definition: netcdf_utils.py:13
def nccopy_grp(srcgrp, dstgrp, indices=None, verbose=False)
#define basename(s)
Definition: l0chunk_modis.c:29
def update_history(dataset, timestamp=None, cmdline=None)
def nccopy(srcfile, dstfile, verbose=False)
def ncsubset_vars(srcfile, dstfile, subset, verbose=False, **kwargs)