From 1b30794990b53132cc08b403386b358b603b95a9 Mon Sep 17 00:00:00 2001 From: paulhamer-noaa <81647525+paulhamer-noaa@users.noreply.github.com> Date: Tue, 15 Oct 2024 16:48:51 -0600 Subject: [PATCH 1/5] Second attempt to getting github upto date --- .../idsse_common/idsse/common/sci/bit_pack.py | 326 ++++++++++++++++++ python/idsse_common/test/sci/test_bit_pack.py | 81 +++++ 2 files changed, 407 insertions(+) create mode 100644 python/idsse_common/idsse/common/sci/bit_pack.py create mode 100644 python/idsse_common/test/sci/test_bit_pack.py diff --git a/python/idsse_common/idsse/common/sci/bit_pack.py b/python/idsse_common/idsse/common/sci/bit_pack.py new file mode 100644 index 0000000..a85095c --- /dev/null +++ b/python/idsse_common/idsse/common/sci/bit_pack.py @@ -0,0 +1,326 @@ +"""Utility module for packing data""" +# ---------------------------------------------------------------------------------- +# Created on Thu Dec 14 2023 +# +# Copyright (c) 2023 Regents of the University of Colorado. All rights reserved. (1) +# Copyright (c) 2023 Colorado State University. All rights reserved. (2) +# +# Contributors: +# Geary Layne (1) +# Paul Hamer (2) +# +# ---------------------------------------------------------------------------------- + +from enum import IntEnum +from typing import NamedTuple +import numpy + +class PackType(IntEnum): + """Enumerated type used to indicate if data is packed into a byte or short (16 bit)""" + BYTE = 8 + SHORT = 16 + + +class PackInfo(NamedTuple): + """Data class used to hold packing info""" + type: PackType + scale: float + offset: float + + +class PackData(NamedTuple): + """Data class used to hold packed data and the corresponding packing info""" + type: PackType + scale: float + offset: float + data: list | numpy.ndarray + +def get_min_max(data: list | numpy.ndarray) -> (float, float): + """Get the minimum and maximum from the numpy array or list. Testing showed (on a mac) + that numpy was faster... + + Args: + data (list | numpy.ndarray): Input data, either in list form + (e.g. list[list[float]] for 2D) or as a numpy array + Raises: + TypeError: If the arguments for the call to the function are invalid + Returns: + (float, float): The minimum, maximum from the supplied argument + """ + if isinstance(data, list): + arr = numpy.array(data).ravel(order='K') + return numpy.min(arr), numpy.max(arr) + data.ravel(order='K') + return numpy.min(data), numpy.max(data) + +def get_pack_info( + min_value: float, + max_value: float, + decimals: int = None, + pack_type: PackType = None +) -> PackInfo: + """Retrieve appropriate packing info based on min/max values and + either decimals of precision or pack type. + + Args: + min_value (float): The minimum value of the data to be packed + max_value (float): The maximum value of the data to be packed. + decimals (int, optional): The number of decimal places of precision being requested. + If the decimals is None, it indicates the PackType should + be used. Defaults to None. + pack_type (PackType, optional): The pack type (byte or short) being requested. + If the pack_type is None, it indicate that decimals + of precision should be used. Defaults to None. + + Raises: + ValueError: If one and only one of decimals aor pack_type aren't provided, or + if not appropriate packing type can be found. + + Returns: + PackInfo: The determined packing info, including packing type (byte or short), and + scale and offset. + """ + if decimals is None and pack_type is None: + raise ValueError('Either decimals or pack_type must be non None') + + if decimals is not None and pack_type is not None: + raise ValueError('Either decimals or pack_type must be non None, but not both') + + if pack_type: + scale = (max_value - min_value) / _max_values[pack_type] + return PackInfo(pack_type, float(scale), float(min_value)) + + scale = _get_scale(decimals) + unique_values = int((max_value-min_value) / scale) + for p_type, max_val in _max_values.items(): + if unique_values <= max_val: + return PackInfo(p_type, float(scale), float(min_value)) + + raise ValueError('Unable to find appropriate packing') + + +def pack_to_list( + data: list | numpy.ndarray, + pack_info: PackInfo | None = None, + decimals: int | None = None, + in_place: bool = True +): + """Preform bit packing of input data, utilizing the the pack_info if provided or + a derived pack_info if not. + + Args: + data (list | numpy.ndarray): Input data, either in list form + (e.g. list[list[float]] for 2D) or as a numpy array + pack_info (PackInfo | None, optional): Pre-determined packing info. Defaults to None. + decimals (int | None, optional): If pack_info is not provided, decimals is used + to derive packing info. Defaults to None. + in_place (bool, optional): A flag to indicate to preform bit packing in place to the + extent possible. Defaults to True. + + Raises: + KeyError: If the input data is not of support type. + + Returns: + PackData: Returns the packed data and meta data (i.e. PackInfo) + """ + if isinstance(data, numpy.ndarray): + return pack_numpy_to_list(data, pack_info, decimals) + + if isinstance(data, list): + return pack_list_to_list(data, pack_info, decimals, in_place) + + raise KeyError(f'Data type ({type(data)}), is not supported') + + +def pack_numpy_to_numpy( + data: numpy.ndarray, + pack_info: PackInfo | None = None, + decimals: int | None = None, + in_place: bool = True +) -> PackData: + """Preform bit packing of input data, utilizing the the pack_info if provided or + a derived pack_info if not. Input and output are numpy arrays. + + Args: + data (numpy.ndarray): Input data. + pack_info (PackInfo | None, optional): Pre-determined packing info. Defaults to None. + decimals (int | None, optional): If pack_info is not provided, decimals is used + to derive packing info. Defaults to None. + in_place (bool, optional): A flag to indicate to preform bit packing in place to the + extent possible. Defaults to True. + + Raises: + ValueError: If input data is not numpy array + + Returns: + PackData: Returns the packed data and meta data (i.e. PackInfo), where data will be + numpy array. + """ + if not isinstance(data, numpy.ndarray): + raise ValueError(f'Data must be a numpy.ndarray but is of type {type(data)}') + + pack_type, scale, offset = _resolve_pack_info(data, pack_info, decimals) + + if in_place: + if not numpy.issubdtype(data.dtype, numpy.floating): + raise ValueError('Can not complete "in_place" bit packing with ' + 'non floating point array') + data -= offset + else: + data = data-offset + + data /= scale + return PackData(pack_type, scale, offset, numpy.trunc(data, out=data)) + + +def pack_list_to_list( + data: list, + pack_info: PackInfo | None = None, + decimals: int | None = None, + in_place: bool = True +) -> PackData: + """Preform bit-packing of input data, utilizing the pack_info if provided or + a derived pack_info if not. Input and output are python list (can be nested lists). + + Args: + data (list): Input data. + pack_info (PackInfo | None, optional): Pre-determined packing info. Defaults to None. + decimals (int | None, optional): If pack_info is not provided, decimals is used + to derive packing info. Defaults to None. + in_place (bool, optional): A flag to indicate to preform bit packing in place to the + extent possible. Defaults to True. + + Raises: + ValueError: If input data is not python list + + Returns: + PackData: Returns the packed data and meta data (i.e. PackInfo), where data will be + python list (possibly nested). + """ + if not isinstance(data, list): + raise ValueError(f'Data must be a python list but is of type {type(data)}') + + pack_type, scale, offset = _resolve_pack_info(data, pack_info, decimals) + if in_place: + return PackData(pack_type, scale, offset, _pack_list_to_list_in_place(data, scale, offset)) + return PackData(pack_type, scale, offset, _pack_list_to_list_copy(data, scale, offset)) + + +def pack_numpy_to_list( + data: numpy.array, + pack_info: PackInfo | None = None, + decimals: int | None = None, +) -> PackData: + """Preform bit packing of input data, utilizing the the pack_info if provided or + a derived pack_info if not. Input is numpy array and output is python list. + + Args: + data (numpy.ndarray): Input data. + pack_info (PackInfo | None, optional): Pre-determined packing info. Defaults to None. + decimals (int | None, optional): If pack_info is not provided, decimals is used + to derive packing info. Defaults to None. + + Raises: + ValueError: If input data is not numpy array + + Returns: + PackData: Returns the packed data and meta data (i.e. PackInfo), where data will be + python list. + """ + if not isinstance(data, numpy.ndarray): + raise ValueError(f'Data must be a numpy.ndarray but is of type {type(data)}') + + pack_type, scale, offset = _resolve_pack_info(data, pack_info, decimals) + return PackData(pack_type, scale, offset, _pack_np_array_to_list(data, scale, offset)) + + +# determine the appropriate pack info, basically if pack info if provided return that, +# else compute it based on decimal, and if nothing else use short packing. This is needed +# because of the overloaded nature of the public pack functions. A caller can provide a +# specific pack_info to use or the decimal precision (which with the actual data can be used to +# create a pack_info), or lastly if not providing a pack_info or decimal then a pack_info using +# short packing would be used. +def _resolve_pack_info( + data, + pack_info: PackInfo | None = None, + decimals: int | None = None +) -> PackInfo: + return (PackInfo(pack_info.type, + float(pack_info.scale), + float(pack_info.offset)) + if pack_info is not None + else get_pack_info(numpy.min(data), + numpy.max(data), + decimals=decimals) + if decimals is not None + else get_pack_info(numpy.min(data), + numpy.max(data), + pack_type=PackType.SHORT) + ) + + +# core packing code specific to in place packing of a list(s) +def _pack_list_to_list_in_place( + data: list, + scale: float, + offset: float +) -> list: + # Convert list into numpy array (it creates a copy) + np_data = numpy.array(data, dtype=float) + data = _pack_np_array_to_list(np_data, scale, offset) + return data + +# core packing code specific to packing a list(s) with forced copying +def _pack_list_to_list_copy( + data: list, + scale: float, + offset: float +) -> list: + np_data = numpy.array(data, dtype=float) + return _pack_np_array_to_list(np_data, scale, offset) + + +# core packing code specific to packing numpy array to a list, in place not possible +def _pack_np_array_to_list( + data: numpy.array, + scale: float, + offset: float +) -> list: + np_data = numpy.copy(data) + np_data -= offset + np_data /= scale + # Return the truncated array and a int list. + return (numpy.trunc(np_data).astype(int)).tolist() + +# core packing code using diplib package (sometimes slower than the original so not used but here +# for an option. +def _diplib_pack(data: numpy.array, + scale: float, + offset: float) -> numpy.array: + dip_data = dip.Image(data) + dip_data -= offset + dip_data /= scale + return np.trunc(dip_data) + + +# private lookup for the max value possible for bit packing type +_max_values = { + PackType.BYTE: 255, + PackType.SHORT: 65535 +} + +_scale_lookup = { + 0: 1., + 1: 0.1, + 2: 0.01, + 3: 0.001, + 4: 0.0001, + 5: 0.00001, + 6: 0.000001 +} + + +# private lookup function of num decimal to actual decimals. Using the lookup remove rounding +# issues with math.pow for the most common decimal values +def _get_scale(decimals): + return _scale_lookup.get(decimals, .1**decimals) diff --git a/python/idsse_common/test/sci/test_bit_pack.py b/python/idsse_common/test/sci/test_bit_pack.py new file mode 100644 index 0000000..f8ec4d4 --- /dev/null +++ b/python/idsse_common/test/sci/test_bit_pack.py @@ -0,0 +1,81 @@ +'''Module for testing the bit pack utils''' +# ---------------------------------------------------------------------------------- +# Created on Fri Dec 15 2023 +# +# Copyright (c) 2023 Regents of the University of Colorado. All rights reserved. (1) +# Copyright (c) 2023 Colorado State University. All rights reserved. (2) +# +# Contributors: +# Geary Layne (1) +# Paul Hamer (2) +# +# ---------------------------------------------------------------------------------- +# pylint: disable=missing-function-docstring + +import numpy +import pytest + +from idsse.common.sci.bit_pack import (get_pack_info, + pack_numpy_to_numpy, + pack_numpy_to_list, + pack_to_list, + PackInfo, + PackType) + + +def test_get_pack_info_with_decimals(): + result = get_pack_info(-1, 1, decimals=2) + expected = PackInfo(PackType.BYTE, 0.01, -1) + assert result == expected + + +def test_get_pack_info_with_pack_type(): + result = get_pack_info(-1, 1, pack_type=PackType.SHORT) + expected = PackInfo(PackType.SHORT, 3.0518043793392844e-05, -1) + assert result.type == expected.type + assert result.scale == pytest.approx(expected.scale) + assert result.offset == expected.offset + + +def test_pack_list_to_list(): + data = [[10, 50, 100, 200, 500], [30, 150, 300, 400, 600]] + result = pack_to_list(data, in_place=False) + expected = [[0, 4443, 9996, 21104, 54427], [2221, 15550, 32212, 43319, 65535]] + numpy.testing.assert_array_equal(result.data, expected) + assert data[0][0] != result.data[0][0] + + +def test_pack_to_list(): + data = numpy.array([[.01, .05, .1, .2, .5], [.03, .15, .3, .4, .6]]) + result = pack_to_list(data, in_place=False) + expected = [[0, 4443, 9996, 21104, 54427], [2221, 15550, 32212, 43319, 65535]] + numpy.testing.assert_array_equal(result.data, expected) + assert data[0][0] != result.data[0][0] + + +def test_pack_numpy(): + data = numpy.array([[-1, -.5, 0, .5, 1], [-1, -.25, 0, .25, 1]]) + result = pack_numpy_to_numpy(data, in_place=False) + expected = numpy.array([[0, 16383, 32767, 49151, 65535], + [0, 24575, 32767, 40959, 65535]]) + numpy.testing.assert_array_equal(result.data, expected) + assert data[0, 0] != result.data[0, 0] + + +def test_pack_numpy_in_place(): + data = numpy.array([[-100., -50, 0, 50, 100], [-100, 0, 100, 200, 300]]) + result = pack_numpy_to_numpy(data, in_place=True) + expected = numpy.array([[0., 8191., 16383., 24575., 32767.], + [0., 16383., 32767., 49151., 65535.]]) + numpy.testing.assert_array_equal(data, result.data, expected) + assert data[0, 0] == result.data[0, 0] + + +def test_pack_numpy_to_list(): + data = numpy.array([[-1, -.5, 0, .5, 1], [-1, -.25, 0, .25, 1]]) + result = pack_numpy_to_list(data, decimals=2) + expected = [[0, 50, 100, 150, 200], + [0, 75, 100, 125, 200]] + assert isinstance(result.data, list) + assert isinstance(result.data[0][0], int) + numpy.testing.assert_array_equal(result.data, expected) From ce928599f64da0682a767b00559a64e35ff88ea3 Mon Sep 17 00:00:00 2001 From: paulhamer-noaa <81647525+paulhamer-noaa@users.noreply.github.com> Date: Tue, 15 Oct 2024 17:03:20 -0600 Subject: [PATCH 2/5] lint fixes --- python/idsse_common/idsse/common/sci/bit_pack.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/python/idsse_common/idsse/common/sci/bit_pack.py b/python/idsse_common/idsse/common/sci/bit_pack.py index a85095c..54d7f9c 100644 --- a/python/idsse_common/idsse/common/sci/bit_pack.py +++ b/python/idsse_common/idsse/common/sci/bit_pack.py @@ -294,13 +294,13 @@ def _pack_np_array_to_list( # core packing code using diplib package (sometimes slower than the original so not used but here # for an option. -def _diplib_pack(data: numpy.array, - scale: float, - offset: float) -> numpy.array: - dip_data = dip.Image(data) - dip_data -= offset - dip_data /= scale - return np.trunc(dip_data) +#def _diplib_pack(data: numpy.array, +# scale: float, +# offset: float) -> numpy.array: +# dip_data = dip.Image(data) +# dip_data -= offset +# dip_data /= scale +# return numpy.trunc(dip_data) # private lookup for the max value possible for bit packing type From 3c27ac27e281609dbf354605196ab248951ac858 Mon Sep 17 00:00:00 2001 From: paulhamer-noaa <81647525+paulhamer-noaa@users.noreply.github.com> Date: Wed, 16 Oct 2024 09:57:06 -0600 Subject: [PATCH 3/5] Install of curent btanch required for lint/pytest --- .github/workflows/linter.yml | 10 ++++++++++ .github/workflows/run-tests.yml | 10 ++++++++++ 2 files changed, 20 insertions(+) diff --git a/.github/workflows/linter.yml b/.github/workflows/linter.yml index 8a11ec0..2ceae8a 100644 --- a/.github/workflows/linter.yml +++ b/.github/workflows/linter.yml @@ -26,6 +26,16 @@ jobs: python -m pip install --upgrade pip pip install pytest pylint==2.17.5 python-dateutil==2.8.2 pint==0.21 importlib-metadata==6.7.0 jsonschema==4.19.0 pika==1.3.1 pyproj numpy==1.26.2 shapely==2.0.2 netcdf4==1.6.3 h5netcdf==1.1.0 pillow==10.2.0 python-logging-rabbitmq==2.3.0 + - name: Checkout idss-engine-commons + uses: actions/checkout@v2 + with: + repository: NOAA-GSL/idss-engine-commons + path: commons/ + + - name: Install IDSSE python commons + working-directory: commons/python/idsse_common + run: pip install . + - name: Set PYTHONPATH for pylint run: | echo "PYTHONPATH=python/idsse_common/idsse/common" >> $GITHUB_ENV diff --git a/.github/workflows/run-tests.yml b/.github/workflows/run-tests.yml index a677616..35cca26 100644 --- a/.github/workflows/run-tests.yml +++ b/.github/workflows/run-tests.yml @@ -28,6 +28,16 @@ jobs: - name: Set PYTHONPATH for pytest run: | echo "PYTHONPATH=python/idsse_common/idsse/common" >> $GITHUB_ENV + + - name: Checkout idss-engine-commons + uses: actions/checkout@v2 + with: + repository: NOAA-GSL/idss-engine-commons + path: commons/ + + - name: Install IDSSE python commons + working-directory: commons/python/idsse_common + run: pip install . - name: Test with pytest working-directory: python/idsse_common From 0f981ae088a4cd0b14b0cd7c22f5418021c97b98 Mon Sep 17 00:00:00 2001 From: paulhamer-noaa <81647525+paulhamer-noaa@users.noreply.github.com> Date: Wed, 16 Oct 2024 12:00:28 -0600 Subject: [PATCH 4/5] Up'd the test coverage to 96% --- python/idsse_common/test/sci/test_bit_pack.py | 38 +++++++++++++++++-- 1 file changed, 34 insertions(+), 4 deletions(-) diff --git a/python/idsse_common/test/sci/test_bit_pack.py b/python/idsse_common/test/sci/test_bit_pack.py index f8ec4d4..c7f411c 100644 --- a/python/idsse_common/test/sci/test_bit_pack.py +++ b/python/idsse_common/test/sci/test_bit_pack.py @@ -16,11 +16,20 @@ import pytest from idsse.common.sci.bit_pack import (get_pack_info, + get_min_max, pack_numpy_to_numpy, pack_numpy_to_list, pack_to_list, PackInfo, PackType) +def test_get_min_max(): + l = [-1.0, 0.0, 1.0, 2.0] + expected = (-1.0, 2.0) + res = get_min_max(l) + assert res == expected + a = numpy.array(l) + res = get_min_max(a) + assert res == expected def test_get_pack_info_with_decimals(): @@ -35,23 +44,38 @@ def test_get_pack_info_with_pack_type(): assert result.type == expected.type assert result.scale == pytest.approx(expected.scale) assert result.offset == expected.offset + # Check for exceptions... + with pytest.raises(ValueError): + result = get_pack_info(-1, 1) + with pytest.raises(ValueError): + result = get_pack_info(-1, 1, decimals=2, pack_type=PackType.SHORT) - -def test_pack_list_to_list(): +def test_pack_to_list(): data = [[10, 50, 100, 200, 500], [30, 150, 300, 400, 600]] result = pack_to_list(data, in_place=False) expected = [[0, 4443, 9996, 21104, 54427], [2221, 15550, 32212, 43319, 65535]] numpy.testing.assert_array_equal(result.data, expected) assert data[0][0] != result.data[0][0] + data = numpy.array(data, dtype=float) + result = pack_to_list(data, in_place=False) + expected = [[0, 4443, 9996, 21104, 54427], [2221, 15550, 32212, 43319, 65535]] + numpy.testing.assert_array_equal(result.data, expected) + assert data[0][0] != result.data[0][0] -def test_pack_to_list(): - data = numpy.array([[.01, .05, .1, .2, .5], [.03, .15, .3, .4, .6]]) + with pytest.raises(KeyError): + result = pack_to_list((-1,1)) + +def test_pack_list_to_list(): + data = [[10, 50, 100, 200, 500], [30, 150, 300, 400, 600]] result = pack_to_list(data, in_place=False) expected = [[0, 4443, 9996, 21104, 54427], [2221, 15550, 32212, 43319, 65535]] numpy.testing.assert_array_equal(result.data, expected) assert data[0][0] != result.data[0][0] + result = pack_to_list(data, in_place=True) + with pytest.raises(KeyError): + result = pack_to_list((-1,1)) def test_pack_numpy(): data = numpy.array([[-1, -.5, 0, .5, 1], [-1, -.25, 0, .25, 1]]) @@ -61,6 +85,12 @@ def test_pack_numpy(): numpy.testing.assert_array_equal(result.data, expected) assert data[0, 0] != result.data[0, 0] + with pytest.raises(ValueError): + result = pack_numpy_to_numpy(numpy.array([0,1,2], dtype=int), in_place=True) + + with pytest.raises(ValueError): + result = pack_numpy_to_numpy((-1,1)) + def test_pack_numpy_in_place(): data = numpy.array([[-100., -50, 0, 50, 100], [-100, 0, 100, 200, 300]]) From 1555ce9956d416b1d41b5614eee0ba52bc4f798a Mon Sep 17 00:00:00 2001 From: paulhamer-noaa <81647525+paulhamer-noaa@users.noreply.github.com> Date: Wed, 16 Oct 2024 12:08:48 -0600 Subject: [PATCH 5/5] Linter.... --- python/idsse_common/test/sci/test_bit_pack.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/python/idsse_common/test/sci/test_bit_pack.py b/python/idsse_common/test/sci/test_bit_pack.py index c7f411c..4b445b5 100644 --- a/python/idsse_common/test/sci/test_bit_pack.py +++ b/python/idsse_common/test/sci/test_bit_pack.py @@ -23,12 +23,12 @@ PackInfo, PackType) def test_get_min_max(): - l = [-1.0, 0.0, 1.0, 2.0] + example = [-1.0, 0.0, 1.0, 2.0] expected = (-1.0, 2.0) - res = get_min_max(l) + res = get_min_max(example) assert res == expected - a = numpy.array(l) - res = get_min_max(a) + example = numpy.array(example) + res = get_min_max(example) assert res == expected