NASA Logo
Ocean Color Science Software

ocssw V2022
decode.py
Go to the documentation of this file.
1 """Internal decoding routines."""
2 from __future__ import division
3 from collections import namedtuple
4 import math
5 import warnings
6 
7 import numpy as np
8 
9 from .constants import (
10  BITS_PER_BYTE,
11  PRIMARY_HEADER_NUM_BYTES,
12 )
13 
14 __author__ = "Daniel da Silva <mail@danieldasilva.org>"
15 
16 
17 def _get_packet_total_bytes(primary_header_bytes):
18  """Parse the number of bytes in a packet from the bytes associated
19  with a packet's primary header.
20 
21  Parameters
22  ----------
23  primary_header_bytes : bytes
24  Bytes associated with the packet primary header, of length
25  `ccsdspy.constants.PRIMARY_HEADER_NUM_BYTES`.
26 
27  Returns
28  -------
29  num_bytes : int
30  Total number of bytes in the packet, including the primary header.
31 
32  Raises
33  ------
34  ValueError
35  The number of bytes in the supplied argument is too short. It must be
36  of length `ccsdspy.constants.PRIMARY_HEADER_NUM_BYTES`.
37  """
38  if len(primary_header_bytes) != PRIMARY_HEADER_NUM_BYTES:
39  raise ValueError(
40  f"Primary header byte sequence must be {PRIMARY_HEADER_NUM_BYTES} bytes long"
41  )
42 
43  # These variables are named based on 1-indexing
44  primary_header_byte5 = primary_header_bytes[4]
45  primary_header_byte6 = primary_header_bytes[5]
46 
47  # Number of bytes listed in the orimary header. The value in the
48  # primary header is the number of byes in the body minus one.
49  num_bytes = primary_header_byte5 << BITS_PER_BYTE
50  num_bytes += primary_header_byte6
51  num_bytes += 1
52  num_bytes += PRIMARY_HEADER_NUM_BYTES
53 
54  return num_bytes
55 
56 
57 def _get_packet_apid(primary_header_bytes):
58  """Parse the APID of a packet from the bytes associated
59  with a packet's primary header.
60 
61  Parameters
62  ----------
63  primary_header_bytes : bytes
64  Bytes associated with the packet primary header, of length
65  `ccsdspy.constants.PRIMARY_HEADER_NUM_BYTES`.
66 
67  Raises
68  ------
69  ValueError
70  The number of bytes in the supplied argument is too short. It must be
71  of length `ccsdspy.constants.PRIMARY_HEADER_NUM_BYTES`.
72  """
73  if len(primary_header_bytes) != PRIMARY_HEADER_NUM_BYTES:
74  raise ValueError(
75  f"Primary header byte sequence must be {PRIMARY_HEADER_NUM_BYTES} bytes long"
76  )
77 
78  # These variables are named based on 1-indexing
79  primary_header_byte1 = primary_header_bytes[0]
80  primary_header_byte2 = primary_header_bytes[1]
81 
82  # Read as 2-byte unisgned integer and mask out unwanted parts of the first
83  # byte
84  apid = primary_header_byte1 << BITS_PER_BYTE
85  apid += primary_header_byte2
86  apid &= 0x07FF
87 
88  return apid
89 
90 
91 def _decode_fixed_length(file_bytes, fields):
92  """Decode a fixed length packet stream of a single APID.
93 
94  Parameters
95  ----------
96  file_bytes : array
97  A NumPy array of uint8 type, holding the bytes of the file to decode.
98  fields : list of ccsdspy.PacketField
99  A list of fields, including the secondary header but excluding the
100  primary header.
101 
102  Returns
103  -------
104  dictionary mapping field names to NumPy arrays, stored in the same order as
105  the fields array passed.
106  """
107  # Setup a dictionary mapping a bit offset to each field. It is assumed
108  # that the `fields` array contains entries for the secondary header.
109  packet_nbytes = _get_packet_total_bytes(file_bytes[:PRIMARY_HEADER_NUM_BYTES])
110  body_nbytes = sum(field._bit_length for field in fields) // BITS_PER_BYTE
111  counter_start = max(0, (packet_nbytes - body_nbytes) * BITS_PER_BYTE)
112  counter = counter_start
113 
114  bit_offset = {}
115 
116  for i, field in enumerate(fields):
117  if i == 0 and field._bit_offset is not None:
118  # case: using bit_offset to fix the start position
119  bit_offset[field._name] = field._bit_offset
120  counter = field._bit_offset + field._bit_length
121  elif field._bit_offset is None:
122  # case: floating start position such that packet def fills to
123  # to end of packet. What's missing is assumed to be header at the beginning.
124  bit_offset[field._name] = counter
125  counter += field._bit_length
126  elif field._bit_offset < counter:
127  # case: bit_offset specifying to backtrack. This condition
128  # seems odd and unlikely. Eg. one or more bits of a packet overlap?
129  bit_offset[field._name] = field._bit_offset
130  # don't update counter unless the the overlap goes past counter
131  counter = max(field._bit_offset + field._bit_length, counter)
132  elif field._bit_offset >= counter:
133  # case: otherwise, bit_offset is ahead of counter and we're skipping
134  # definition of 0 or more bits.
135  bit_offset[field._name] = field._bit_offset
136  counter = field._bit_offset + field._bit_length
137  else:
138  raise RuntimeError(
139  f"Unexpected case: could not compare"
140  f" bit_offset {field._bit_offset} with "
141  f"counter {counter} for field {field._name}"
142  )
143 
144  if all(field._bit_offset is None for field in fields):
145  assert counter == packet_nbytes * BITS_PER_BYTE, "Field definition != packet length"
146  elif counter > packet_nbytes * BITS_PER_BYTE:
147  body_bits = sum(field._bit_length for field in fields)
148  raise RuntimeError(
149  (
150  "Packet definition larger than packet length"
151  f" by {counter-(packet_nbytes*BITS_PER_BYTE)} bits"
152  f" (packet length in file is {packet_nbytes*BITS_PER_BYTE} bits, defined fields are {body_bits} bits)"
153  )
154  )
155 
156  # Setup metadata for each field, consiting of where to look for the field in
157  # the file and how to parse it.
158  FieldMeta = namedtuple("Meta", ["nbytes_file", "start_byte_file", "nbytes_final", "np_dtype"])
159  field_meta = {}
160 
161  for field in fields:
162  nbytes_file = np.ceil(field._bit_length / BITS_PER_BYTE).astype(int)
163  nbytes_final = {3: 4, 5: 8, 6: 8, 7: 8}.get(nbytes_file, nbytes_file)
164  start_byte_file = bit_offset[field._name] // BITS_PER_BYTE
165 
166  # byte_order_symbol is only used to control float types here.
167  # - uint and int byte order are handled with byteswap later
168  # - fill is independent of byte order (all 1's)
169  # - byte order is not applicable to str types
170  byte_order_symbol = "<" if field._byte_order == "little" else ">"
171  np_dtype = {
172  "uint": ">u%d" % nbytes_final,
173  "int": ">i%d" % nbytes_final,
174  "fill": "S%d" % nbytes_final,
175  "float": "%sf%d" % (byte_order_symbol, nbytes_final),
176  "str": "S%d" % nbytes_final,
177  }[field._data_type]
178 
179  field_meta[field] = FieldMeta(nbytes_file, start_byte_file, nbytes_final, np_dtype)
180 
181  # Read the file and calculate length of packet and number of packets in the
182  # file. Trim extra bytes that may have occurred by a break in the downlink
183  # while a packet was beign transferred.
184  extra_bytes = file_bytes.size % packet_nbytes
185 
186  if extra_bytes > 0:
187  file_bytes = file_bytes[:-extra_bytes]
188 
189  packet_count = file_bytes.size // packet_nbytes
190 
191  # Create byte arrays for each field. At the end of this method they are left
192  # as the numpy uint8 type.
193  field_bytes = {}
194 
195  for field in fields:
196  meta = field_meta[field]
197  arr = np.zeros(packet_count * meta.nbytes_final, "u1")
198  xbytes = meta.nbytes_final - meta.nbytes_file
199 
200  for i in range(xbytes, meta.nbytes_final):
201  arr[i :: meta.nbytes_final] = file_bytes[
202  meta.start_byte_file + i - xbytes :: packet_nbytes
203  ]
204  field_bytes[field] = arr
205 
206  # Switch dtype of byte arrays to the final dtype, and apply masks and shifts
207  # to interpret the correct bits.
208  field_arrays = {}
209 
210  for field in fields:
211  meta = field_meta[field]
212  arr = field_bytes[field]
213 
214  if field._data_type == "int":
215  # Signed integers will be treated as unsigned integers in the following
216  # block, and then get special treatmenet later
217  arr.dtype = meta.np_dtype.replace("i", "u")
218  else:
219  arr.dtype = meta.np_dtype
220 
221  if field._data_type in ("int", "uint"):
222  xbytes = meta.nbytes_final - meta.nbytes_file
223 
224  bitmask_left = (
225  bit_offset[field._name]
226  + BITS_PER_BYTE * xbytes
227  - BITS_PER_BYTE * meta.start_byte_file
228  )
229 
230  bitmask_right = BITS_PER_BYTE * meta.nbytes_final - bitmask_left - field._bit_length
231 
232  bitmask_left, bitmask_right = np.array([bitmask_left, bitmask_right]).astype(
233  meta.np_dtype
234  )
235 
236  bitmask = np.zeros(arr.shape, arr.dtype)
237  bitmask |= (1 << int(BITS_PER_BYTE * meta.nbytes_final - bitmask_left)) - 1
238  tmp = np.left_shift([1], bitmask_right)
239  bitmask &= np.bitwise_not(tmp[0] - 1).astype(arr.dtype)
240 
241  arr &= bitmask
242  arr >>= bitmask_right
243 
244  if field._byte_order == "little":
245  arr.byteswap(inplace=True)
246 
247  if field._data_type == "int":
248  arr.dtype = meta.np_dtype
249  sign_bit = (arr >> (field._bit_length - 1)) & 1
250 
251  # Set bits between start_bit and stop_bit to 1
252  one = np.zeros_like(arr) + 1
253  stop_bit = arr.itemsize * BITS_PER_BYTE
254  start_bit = field._bit_length
255  mask = ((one << (start_bit - one)) - one) ^ ((one << stop_bit) - one)
256  arr |= sign_bit * mask
257 
258  field_arrays[field._name] = arr
259 
260  return field_arrays
261 
262 
263 def _decode_variable_length(file_bytes, fields):
264  """Decode a variable length packet stream of a single APID.
265 
266  Parameters
267  ----------
268  file_bytes : array
269  A NumPy array of uint8 type, holding the bytes of the file to decode.
270  fields : list of ccsdspy.PacketField
271  A list of fields, excluding the
272  primary header.
273 
274  Returns
275  -------
276  dict
277  A dictionary mapping field names to NumPy arrays, stored in the same order as the fields.
278  """
279  # Get start indices of each packet -------------------------------------
280  packet_starts = []
281  offset = 0
282 
283  while offset < len(file_bytes):
284  packet_starts.append(offset)
285  offset += file_bytes[offset + 4] * 256 + file_bytes[offset + 5] + 7
286 
287  if offset != len(file_bytes):
288  missing_bytes = offset - len(file_bytes)
289  message = f"File appears truncated - missing {missing_bytes} bytes (or maybe garbage at end)"
290  warnings.warn(message)
291 
292  npackets = len(packet_starts)
293 
294  # Initialize output dictionary of field arrays, their dtypes, and the offsets
295  # that can be determined before parsing each packet.
296  # ------------------------------------------------------------------------
297  field_arrays, numpy_dtypes, bit_offsets = _varlength_intialize_field_arrays(fields, npackets)
298 
299  # Loop through packets
300  # ----------------------------------------------------------------------------
301  for pkt_num, packet_start in enumerate(packet_starts):
302  packet_nbytes = file_bytes[packet_start + 4] * 256 + file_bytes[packet_start + 5] + 7
303  bit_offsets_cur = bit_offsets.copy()
304  bit_lengths_cur = {}
305 
306  offset_counter = 0
307  offset_history = []
308 
309  for i, field in enumerate(fields):
310  # Determine the bit length for field
311  # ----------------------------------
312  if field._array_shape == "expand":
313  footer_bits = sum(field._bit_length for fld in fields[i + 1 :])
314  bit_length = packet_nbytes * BITS_PER_BYTE - footer_bits - offset_counter
315  elif isinstance(field._array_shape, str):
316  # Defined by previous field
317  bit_length = field_arrays[field._array_shape][pkt_num] * field._bit_length
318  else:
319  bit_length = field._bit_length
320 
321  bit_lengths_cur[field._name] = bit_length
322 
323  # Determine both offset
324  if field._name not in bit_offsets_cur:
325  bit_offsets_cur[field._name] = offset_counter
326 
327  offset_history.append(offset_counter)
328  offset_counter += bit_length
329 
330  # Parse field data
331  # ------------------
332  field_raw_data = None # will be array of uint8
333  if bit_offsets_cur[field._name] < 0:
334  # Footer byte after expanding field: Referenced from end of packet
335  start_byte = (
336  packet_start + packet_nbytes + bit_offsets_cur[field._name] // BITS_PER_BYTE
337  )
338  else:
339  # Header byte before expanding field: Referenced from start of packet
340  start_byte = packet_start + bit_offsets_cur[field._name] // BITS_PER_BYTE
341 
342  if isinstance(field._array_shape, str):
343  stop_byte = start_byte + bit_lengths_cur[field._name] // BITS_PER_BYTE
344  field_raw_data = file_bytes[start_byte:stop_byte]
345  else:
346  # Get field_raw_data, which are the bytes of the field as uint8 for this
347  # packet
348  bit_offset = bit_offsets_cur[field._name]
349  nbytes_file = (
350  (bit_offset + field._bit_length - 1) // BITS_PER_BYTE
351  - bit_offset // BITS_PER_BYTE
352  + 1
353  )
354 
355  nbytes_final = {3: 4, 5: 8, 6: 8, 7: 8}.get(nbytes_file, nbytes_file)
356  xbytes = nbytes_final - nbytes_file
357  field_raw_data = np.zeros(nbytes_final, "u1")
358 
359  for i in range(xbytes, nbytes_final):
360  idx = start_byte + i - xbytes
361  field_raw_data[i] = file_bytes[idx]
362 
363  # Switch dtype of byte arrays to the final dtype, and apply masks and shifts
364  # to interpret the correct bits.
365  if field._data_type == "int":
366  # Signed integers will be treated as unsigned integers in the following
367  # block, and then get special treatmenet later
368  field_raw_data.dtype = numpy_dtypes[field._name].replace("i", "u")
369  else:
370  field_raw_data.dtype = numpy_dtypes[field._name]
371 
372  if field._data_type in ("uint", "int"):
373  if not isinstance(field._array_shape, str):
374  last_byte = start_byte + nbytes_file
375  end_last_parent_byte = last_byte * BITS_PER_BYTE
376 
377  b = bit_offsets_cur[field._name]
378  if b < 0:
379  b = packet_nbytes * BITS_PER_BYTE + bit_offsets_cur[field._name]
380 
381  last_occupied_bit = packet_start * BITS_PER_BYTE + b + bit_length
382  left_bits_before_shift = b % BITS_PER_BYTE
383  right_shift = end_last_parent_byte - last_occupied_bit
384 
385  assert right_shift >= 0, f"right_shift={right_shift}, {field}"
386 
387  if left_bits_before_shift > 0:
388  mask = int(
389  "1" * ((nbytes_file * BITS_PER_BYTE) - left_bits_before_shift), 2
390  )
391  field_raw_data &= mask
392 
393  if right_shift > 0:
394  field_raw_data >>= right_shift
395 
396  if field._byte_order == "little":
397  field_raw_data.byteswap(inplace=True)
398 
399  if field._data_type == "int":
400  field_raw_data.dtype = numpy_dtypes[field._name]
401  sign_bit = (field_raw_data >> (field._bit_length - 1)) & 1
402 
403  if sign_bit:
404  # Set bits between start_bit and stop_bit to 1
405  one = np.zeros_like(field_raw_data) + 1
406  stop_bit = field_raw_data.itemsize * BITS_PER_BYTE
407  start_bit = field._bit_length
408  mask = ((one << (start_bit - one)) - one) ^ ((one << stop_bit) - one)
409 
410  field_raw_data |= mask
411 
412  # Set the field in the final array
413  if isinstance(field._array_shape, str):
414  field_arrays[field._name][pkt_num] = field_raw_data
415  else:
416  field_arrays[field._name][pkt_num] = field_raw_data[0]
417 
418  return field_arrays
419 
420 
421 def _varlength_intialize_field_arrays(fields, npackets):
422  """
423  Initialize output dictionary of field arrays, their dtypes, and the offsets
424  that can be determined before parsing each packet.
425 
426  Expanding fields will be an array of dtype=object (jagged array), which will be
427  an array refrence at each index. Non-expanding fields are the matched to the most
428  suitable data type.
429 
430  Parameters
431  ----------
432  fields : list of ccsdspy.PacketField
433  A list of fields, including the secondary header but excluding the
434  primary header.
435 
436  npackets : int
437  Number of packets in the file
438 
439  Returns
440  -------
441  field_arrays : dict, str to array
442  Dictionary of initialized field arrays, mapping string field name to numpy array
443  numpy_dtypes : dict, str to numpy dtype
444  Dictionary of datatypes for the final field arrays, mapping string field names
445  to numpy data types
446  bit_offsets : dict, str to int/None
447  Dictionary of bit offsets that can be determined before parsing each packet. Maps
448  string field names to integers or None
449  """
450  # First pass determination of bit lengths for non-variable fields
451  # ---------------------------------------------------------------
452  bit_offsets = {}
453  counter = 0
454  last_var_idx = None
455 
456  for i, field in enumerate(fields):
457  if isinstance(field._array_shape, str):
458  break
459  elif field._bit_offset is None:
460  bit_offsets[field._name] = counter
461  counter += field._bit_length
462  else:
463  bit_offsets[field._name] = field._bit_offset
464  counter = max(field._bit_offset + field._bit_length, counter)
465 
466  for i, field in enumerate(fields):
467  if isinstance(field._array_shape, str):
468  last_var_idx = i
469 
470  if last_var_idx is not None:
471  counter = 0
472  footer_fields = fields[last_var_idx + 1 :]
473  for i, field in enumerate(reversed(footer_fields)):
474  bit_offsets[field._name] = counter - field._bit_length
475  counter -= field._bit_length
476 
477  # Generate field arrays
478  # ---------------------
479  field_arrays = {}
480  numpy_dtypes = {}
481  nbytes_file = {}
482 
483  for i, field in enumerate(fields):
484  # Number of bytes that the field spans in the file
485  if isinstance(field._array_shape, str):
486  nbytes_file[field._name] = field._bit_length // BITS_PER_BYTE
487  else:
488  if i > 0 and isinstance(fields[i - 1]._array_shape, str):
489  # if preceding field is vairable length, we can assume this is byte
490  # asgined
491  nbytes_file[field._name] = math.ceil(field._bit_length / BITS_PER_BYTE)
492  else:
493  bit_offset = bit_offsets[field._name]
494  nbytes_file[field._name] = (
495  (bit_offset + field._bit_length - 1) // BITS_PER_BYTE
496  - bit_offset // BITS_PER_BYTE
497  + 1
498  )
499 
500  # NumPy only has 2-byte, 4-byte and 8-byte variants (eg, float16, float32,
501  # float64, but not float48). Map them to an nbytes for the output.
502  nbytes_final = {3: 4, 5: 8, 6: 8, 7: 8}.get(
503  nbytes_file[field._name], nbytes_file[field._name]
504  )
505 
506  # byte_order_symbol is only used to control float types here.
507  # - uint and int byte order are handled with byteswap later
508  # - fill is independent of byte order (all 1's)
509  # - byte order is not applicable to str types
510  byte_order_symbol = "<" if field._byte_order == "little" else ">"
511  np_dtype = {
512  "uint": ">u%d" % nbytes_final,
513  "int": ">i%d" % nbytes_final,
514  "fill": "S%d" % nbytes_final,
515  "float": "%sf%d" % (byte_order_symbol, nbytes_final),
516  "str": "S%d" % nbytes_final,
517  }[field._data_type]
518 
519  numpy_dtypes[field._name] = np_dtype
520 
521  if isinstance(field._array_shape, str):
522  field_arrays[field._name] = np.zeros(npackets, dtype=object)
523  else:
524  field_arrays[field._name] = np.zeros(npackets, dtype=np_dtype)
525 
526  return field_arrays, numpy_dtypes, bit_offsets