From 48910a000fc590d0272aead20e916cb2a97d1089 Mon Sep 17 00:00:00 2001 From: Kyle A Logue Date: Mon, 12 Feb 2024 08:46:12 -0800 Subject: [PATCH] Pull request #5: Quantization Helper & pyproject improvement Merge in CSID/glaucus from feature/toml-and-utils to main Squashed commit of the following: commit 631df6e10625bdef6580e0da333d0b43704e34f2 Author: Kyle A Logue Date: Thu Feb 8 15:57:03 2024 -0800 Quantization Helper & pyproject improvement * move all configuration into pyproject.toml * add function to adapt quantized weights to non-quantized model * increment to v1.1.4 --- README.md | 28 ++++++++++++-- glaucus/__init__.py | 3 +- glaucus/utils.py | 44 ++++++++++++++++++++++ pyproject.toml | 76 ++++++++++++++++++++++++++++++++++++++ setup.py | 38 ------------------- tests/__init__.py | 0 tests/test_autoencoders.py | 24 ++++++------ tests/test_blocks.py | 33 ++++++++--------- tests/test_layers.py | 21 +++++------ tests/test_rfloss.py | 30 +++++++-------- tests/test_version.py | 10 ++--- 11 files changed, 204 insertions(+), 103 deletions(-) create mode 100644 glaucus/utils.py create mode 100644 pyproject.toml delete mode 100755 setup.py create mode 100644 tests/__init__.py diff --git a/README.md b/README.md index 89737ab..c33cc92 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,8 @@ # Glaucus The Aerospace Corporation is proud to present our complex-valued encoder, -decoder, and a new loss function for RF DSP in PyTorch. +decoder, and a new loss function for radio frequency (RF) digital signal +processing (DSP) in PyTorch. ## Video (click to play) @@ -18,8 +19,9 @@ decoder, and a new loss function for RF DSP in PyTorch. ### Testing -* `coverage run -a --source=glaucus -m pytest --doctest-modules; coverage html` -* `pytest .` +* `pytest` +* `coverage run` +* `pylint glaucus tests` ### Use pre-trained model with SigMF data @@ -41,6 +43,7 @@ state_dict = torch.hub.load_state_dict_from_url( map_location='cpu') model.load_state_dict(state_dict) # prepare for prediction +model.freeze() model.eval() torch.quantization.convert(model, inplace=True) # get samples into NL tensor @@ -53,6 +56,7 @@ y_encoded_uint8 = torch.int_repr(y_encoded) ``` #### Higher-accuracy pre-trained model + ```python # define architecture import torch @@ -71,6 +75,24 @@ model.load_state_dict(state_dict) # see above for rest ``` +#### Use pre-trained model & discard quantization layers + +```python +# create model, but skip quantization +from glaucus.utils import adapt_glaucus_quantized_weights +model = GlaucusAE(bottleneck_quantize=False, data_format='nl') +state_dict = torch.hub.load_state_dict_from_url( + 'https://github.com/the-aerospace-corporation/glaucus/releases/download/v1.1.0/glaucus-512-3275-5517642b.pth', + map_location='cpu') +state_dict = adapt_glaucus_quantized_weights(state_dict) +# ignore "unexpected_keys" warning +model.load_state_dict(state_dict, strict=False) +# prepare for evaluation mode +model.freeze() +model.eval() +# see above for rest +``` + ### Get loss between two RF signals ```python diff --git a/glaucus/__init__.py b/glaucus/__init__.py index 4fe1d3f..4a2a514 100644 --- a/glaucus/__init__.py +++ b/glaucus/__init__.py @@ -2,10 +2,11 @@ # This file is a part of Glaucus # SPDX-License-Identifier: LGPL-3.0-or-later -__version__ = '1.1.3' +__version__ = '1.1.4' from .rfloss import * from .layers import * from .gblocks import * from .fcblocks import * from .autoencoders import * +from .utils import * diff --git a/glaucus/utils.py b/glaucus/utils.py new file mode 100644 index 0000000..7b8051b --- /dev/null +++ b/glaucus/utils.py @@ -0,0 +1,44 @@ +'''utilities''' +# Copyright 2023 The Aerospace Corporation +# This file is a part of Glaucus +# SPDX-License-Identifier: LGPL-3.0-or-later + +import copy +import re + + +def adapt_glaucus_quantized_weights(state_dict: dict) -> dict: + """ + The pretrained Glaucus models have a quantization layer that shifts the + encoder list positions, so if we create a model w/o quantization we have to + shift those layers slightly to make the pretrained model work. + + This function decrements the position of the decoder layers in the state + dict to allow loading from a pre-trained model that was quantization aware. + + ie: `fc_decoder._fc.1.weight` becomes `fc_decoder._fc.0.weight` + + There will be extra layers remaining, but we can discard them by loading + with `strict=False`. See the README for an example. + + Parameters + ---------- + state_dict : dict + Torch state dictionary including quantization layers. + + Returns + ------- + new_state_dict : dict + State dictionary without quantization layers. + """ + new_state_dict = copy.deepcopy(state_dict) + + pattern = r"(fc_decoder._fc.)(\d+)(\.\w+)" # regex pattern + + for key, value in state_dict.items(): + match = re.match(pattern, key) + if match: + extracted_int = int(match.group(2)) + new_key = f"{match.group(1)}{extracted_int-1}{match.group(3)}" + new_state_dict[new_key] = value + return new_state_dict diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..f046b75 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,76 @@ +[project] +name = "glaucus" +description = "Glaucus is a PyTorch complex-valued ML autoencoder & RF estimation python module. " +keywords = ["dsp", "ml", "autoencoder", "sigint", "rf"] +classifiers = [ + "License :: OSI Approved :: GNU Lesser General Public License v3 or later (LGPLv3+)", + "Operating System :: OS Independent", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", +] +dynamic = ["version", "readme"] +authors = [ + {name = "Kyle Logue", email = "kyle.logue@aero.org"} +] +requires-python = ">=3.8" +dependencies = [ + "torch", # basic ML framework + "lightning", # extensions for PyTorch + "madgrad", # our favorite optimizer + "hypothesis", # best unit testing + ] + [project.urls] + repository = "https://github.com/the-aerospace-corporation/glaucus" + +[tool.setuptools] +packages = ["glaucus"] + [tool.setuptools.dynamic] + version = {attr = "glaucus.__version__"} + readme = {file = ["README.md"], content-type = "text/markdown"} + +[build-system] +requires = ["setuptools>=65.0", "setuptools-scm"] +build-backend = "setuptools.build_meta" + +[tool.coverage.run] +branch = true +source = ["glaucus", "tests"] +# -rA captures stdout from all tests and places it after the pytest summary +command_line = "-m pytest -rA --doctest-modules --junitxml=pytest.xml" + +[tool.pytest.ini_options] +addopts = "--doctest-modules" +testpaths = ["glaucus", "tests"] + +[tool.pylint] + [tool.pylint.main] + load-plugins = [ + "pylint.extensions.typing", + "pylint.extensions.docparams", + ] + exit-zero = true + [tool.pylint.messages_control] + disable = [ + "logging-not-lazy", + "missing-module-docstring", + "import-error", + "unspecified-encoding", + ] + max-line-length = 160 + [tool.pylint.REPORTS] + # omit from the similarity reports + ignore-comments = "yes" + ignore-docstrings = "yes" + ignore-imports = "yes" + ignore-signatures = "yes" + min-similarity-lines = 4 + +[tool.pytype] +inputs = ["glaucus", "tests"] + +[tool.black] +line-length = 160 diff --git a/setup.py b/setup.py deleted file mode 100755 index 5144655..0000000 --- a/setup.py +++ /dev/null @@ -1,38 +0,0 @@ -#!/usr/bin/env python3 -''' - ________.__ - / _____/| | _____ __ __ ____ __ __ ______ -/ \ ___| | \__ \ | | \_/ ___\| | \/ ___/ -\ \_\ \ |__/ __ \| | /\ \___| | /\___ \ - \______ /____(____ /____/ \___ >____//____ > - \/ \/ \/ \/ -''' -import os -import re -from setuptools import setup - -with open(os.path.join('glaucus', '__init__.py'), encoding='utf-8') as derp: - version = re.search(r'__version__\s*=\s*[\'"]([^\'"]*)[\'"]', derp.read()).group(1) - -with open('README.md') as derp: - long_description = derp.read() - -setup( - name='glaucus', - version=version, - author='Kyle Logue', - author_email='kyle.logue@aero.org', - url='https://github.com/the-aerospace-corporation/glaucus', - license='GNU Lesser General Public License v3 or later (LGPLv3+)', - test_suite='tests', - packages=['glaucus'], - description='Complex-valued encoder, decoder, and loss for RF DSP in PyTorch.', - long_description=long_description, - long_description_content_type='text/markdown', - install_requires=[ - 'torch', # basic ML framework - 'lightning', # extensions for PyTorch - 'madgrad', # our favorite optimizer - 'hypothesis', # best unit testing - ], -) diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_autoencoders.py b/tests/test_autoencoders.py index c5cf008..2aba9a0 100644 --- a/tests/test_autoencoders.py +++ b/tests/test_autoencoders.py @@ -1,4 +1,4 @@ -'''ensure autoencoders are working''' +"""ensure autoencoders are working""" # Copyright 2023 The Aerospace Corporation # This file is a part of Glaucus # SPDX-License-Identifier: LGPL-3.0-or-later @@ -6,18 +6,18 @@ import unittest import torch -from glaucus import GlaucusAE, FullyConnectedAE +from glaucus import FullyConnectedAE, GlaucusAE class TestAE(unittest.TestCase): def test_ae_roundtrip(self): - '''the output size should always be the same as the input size''' + """the output size should always be the same as the input size""" for AE in [GlaucusAE, FullyConnectedAE]: - for data_format in ['ncl', 'nl']: - for domain in ['time', 'freq']: + for data_format in ["ncl", "nl"]: + for domain in ["time", "freq"]: # note if we use a diff spatial_size, will need to gen new encoder & decoder bocks spatial_size = 4096 - if data_format == 'ncl': + if data_format == "ncl": trash_x = torch.randn(7, 2, spatial_size) else: trash_x = torch.randn(7, spatial_size, dtype=torch.complex64) @@ -26,14 +26,14 @@ def test_ae_roundtrip(self): self.assertEqual(trash_x.shape, trash_y.shape) def test_ae_quantization(self): - '''If quantization enabled, should use quint8 as latent output''' + """If quantization enabled, should use quint8 as latent output""" for AE in [FullyConnectedAE, GlaucusAE]: - for data_format in ['ncl', 'nl']: + for data_format in ["ncl", "nl"]: for is_quantized in [True, False]: target = torch.quint8 if is_quantized else torch.float32 # note if we use a diff spatial_size, will need to gen new encoder & decoder bocks spatial_size = 4096 - if data_format == 'ncl': + if data_format == "ncl": trash_x = torch.randn(7, 2, spatial_size) else: trash_x = torch.randn(7, spatial_size, dtype=torch.complex64) @@ -47,13 +47,13 @@ def test_ae_quantization(self): self.assertEqual(trash_latent.dtype, target) def test_ae_backprop(self): - '''catch errors during backpropagation''' - for data_format in ['ncl', 'nl']: + """catch errors during backpropagation""" + for data_format in ["ncl", "nl"]: for AE in [FullyConnectedAE, GlaucusAE]: for is_quantized in [True, False]: # note if we use a diff spatial_size, will need to gen new encoder & decoder bocks spatial_size = 4096 - if data_format == 'ncl': + if data_format == "ncl": trash_x = torch.randn(7, 2, spatial_size) else: trash_x = torch.randn(7, spatial_size, dtype=torch.complex64) diff --git a/tests/test_blocks.py b/tests/test_blocks.py index 546f8b5..ac2a023 100644 --- a/tests/test_blocks.py +++ b/tests/test_blocks.py @@ -1,4 +1,4 @@ -'''ensure blocks are working''' +"""ensure blocks are working""" # Copyright 2023 The Aerospace Corporation # This file is a part of Glaucus # SPDX-License-Identifier: LGPL-3.0-or-later @@ -7,11 +7,12 @@ import torch from hypothesis import settings, given, strategies as st -from glaucus import GlaucusNet, blockgen, FullyConnected, GBlock +from glaucus import FullyConnected, GBlock, GlaucusNet, blockgen class TestParams(unittest.TestCase): - '''autoencoders should operate over all valid params''' + """autoencoders should operate over all valid params""" + @settings(deadline=None, max_examples=100) @given( exponent=st.integers(min_value=2, max_value=14), @@ -19,13 +20,13 @@ class TestParams(unittest.TestCase): filters_mid=st.integers(min_value=1, max_value=100) ) def test_io_glaucusnet(self, exponent, steps, filters_mid): - '''design works on a variety of spatial sizes''' + """design works on a variety of spatial sizes""" spatial_dim = 2**exponent # for spatial_dim in 2**np.arange(8, 14): - encoder_blocks = blockgen(steps=steps, spatial_in=spatial_dim, spatial_out=8, filters_in=2, filters_out=filters_mid, mode='encoder') - decoder_blocks = blockgen(steps=steps, spatial_in=8, spatial_out=spatial_dim, filters_in=filters_mid, filters_out=2, mode='decoder') - encoder = GlaucusNet(mode='encoder', blocks=encoder_blocks, spatial_dim=spatial_dim) - decoder = GlaucusNet(mode='decoder', blocks=decoder_blocks, spatial_dim=spatial_dim) + encoder_blocks = blockgen(steps=steps, spatial_in=spatial_dim, spatial_out=8, filters_in=2, filters_out=filters_mid, mode="encoder") + decoder_blocks = blockgen(steps=steps, spatial_in=8, spatial_out=spatial_dim, filters_in=filters_mid, filters_out=2, mode="decoder") + encoder = GlaucusNet(mode="encoder", blocks=encoder_blocks, spatial_dim=spatial_dim) + decoder = GlaucusNet(mode="decoder", blocks=decoder_blocks, spatial_dim=spatial_dim) trash_x = torch.randn(3, 2, spatial_dim) trash_y = decoder(encoder(trash_x)) self.assertEqual(trash_x.shape, trash_y.shape) @@ -53,9 +54,7 @@ def test_io_gblock(self, spatial_exponent, filters_in, filters_out, stride, expa squeeze_ratio -= 1 squeeze_ratio = max(filters_in * expand_ratio, squeeze_ratio) blk = GBlock( - filters_in=filters_in, filters_out=filters_out, - stride=stride, kernel_size=kernel_size, - expand_ratio=expand_ratio, squeeze_ratio=squeeze_ratio + filters_in=filters_in, filters_out=filters_out, stride=stride, kernel_size=kernel_size, expand_ratio=expand_ratio, squeeze_ratio=squeeze_ratio ) trash_x = torch.randn(2, filters_in, spatial_size) trash_y = blk(trash_x) @@ -71,16 +70,14 @@ def test_io_gblock(self, spatial_exponent, filters_in, filters_out, stride, expa exponent_in=st.integers(min_value=2, max_value=14), exponent_out=st.integers(min_value=2, max_value=14), steps=st.integers(min_value=1, max_value=5), - quantize_in=st.booleans(), quantize_out=st.booleans(), - use_dropout=st.booleans() + quantize_in=st.booleans(), + quantize_out=st.booleans(), + use_dropout=st.booleans(), ) def test_io_fc(self, exponent_in, exponent_out, steps, quantize_in, quantize_out, use_dropout): - '''block should work with a variety of configs''' + """block should work with a variety of configs""" size_in, size_out = exponent_in**2, exponent_out**2 - autoencoder = FullyConnected( - size_in=size_in, size_out=size_out, - steps=steps, quantize_in=quantize_in, quantize_out=quantize_out - ) + autoencoder = FullyConnected(size_in=size_in, size_out=size_out, steps=steps, quantize_in=quantize_in, quantize_out=quantize_out) trash_x = torch.randn(3, size_in) trash_y = autoencoder(trash_x) self.assertEqual(trash_y.shape[1], size_out) diff --git a/tests/test_layers.py b/tests/test_layers.py index 74b6539..cb5ddf3 100644 --- a/tests/test_layers.py +++ b/tests/test_layers.py @@ -1,11 +1,10 @@ -'''ensure layers are working''' +"""ensure layers are working""" # Copyright 2023 The Aerospace Corporation # This file is a part of Glaucus # SPDX-License-Identifier: LGPL-3.0-or-later import unittest import numpy as np - import torch from glaucus import ( @@ -19,12 +18,12 @@ class TestDomainTransforms(unittest.TestCase): def test_roundtrip(self): - ''' + """ Time -> Freq -> Time should yield identical results Freq -> Time -> Freq should yield identical results - ''' + """ batch_size = np.random.randint(0, 64) - spatial_size = 2**np.random.randint(8, 16) + spatial_size = 2 ** np.random.randint(8, 16) original = torch.rand(batch_size, 2, spatial_size) layer_f2t = FreqDomain2TimeDomain() layer_t2f = TimeDomain2FreqDomain() @@ -36,11 +35,11 @@ def test_roundtrip(self): class TestNormalization(unittest.TestCase): def test_rms_normalize(self): - ''' + """ Tests the RMSNormalize layer to ensure the layer is normalizing inputs to RMS. - ''' + """ batch_size = np.random.randint(1, 64) - spatial_size = 2**np.random.randint(8, 16) + spatial_size = 2 ** np.random.randint(8, 16) # generate batches with different means and stdevs means = np.geomspace(1e-2, 1e3, 6) * (np.random.randint(0, 2, size=6) * 2 - 1) stdevs = np.geomspace(1e-2, 1e8, 4) @@ -58,10 +57,10 @@ def test_rms_normalize(self): class TestGaussianNoise(unittest.TestCase): def test_skip_on_eval(self): - ''' + """ When self.training == True (before eval) noise will be added with this layer. Otherwise it will just return the same input. - ''' + """ noise_layer = GaussianNoise(spatial_size=64) alpha = torch.randn(1, 2, 64) omega, _ = noise_layer(alpha) @@ -71,7 +70,7 @@ def test_skip_on_eval(self): self.assertTrue(torch.equal(alpha, omega)) def test_snr_ranges(self): - '''lower the SNR, lower the relationship to original signal''' + """lower the SNR, lower the relationship to original signal""" alpha = torch.randn(1, 2, 64) rfloss = RFLoss(weight_spec=0) for min_snr_db in np.arange(-10, 15, 5): diff --git a/tests/test_rfloss.py b/tests/test_rfloss.py index 4a7ab95..3e7e23d 100644 --- a/tests/test_rfloss.py +++ b/tests/test_rfloss.py @@ -1,4 +1,4 @@ -'''ensure RFLoss is working''' +"""ensure RFLoss is working""" # Copyright 2023 The Aerospace Corporation # This file is a part of Glaucus # SPDX-License-Identifier: LGPL-3.0-or-later @@ -15,12 +15,12 @@ def _gen_x(self, batch_size=32, spatial_size=4096) -> None: self.omega = torch.randn(batch_size, 2, spatial_size) def test_weights(self): - ''' + """ make sure returns correct number of values zero out weights for keys one by one should always return individual loss and the total loss - ''' - weight_keys = ['weight_env', 'weight_fft', 'weight_align', 'weight_spec', 'weight_xcor'] + """ + weight_keys = ["weight_env", "weight_fft", "weight_align", "weight_spec", "weight_xcor"] self._gen_x() for test_key in weight_keys: kwargs = {test_key: 1} @@ -33,37 +33,37 @@ def test_weights(self): self.assertEqual(2, len(metrics)) def test_naive_case(self): - '''loss between identical signals should be near zero''' - for spatial_size in 2**torch.arange(8, 14): + """loss between identical signals should be near zero""" + for spatial_size in 2 ** torch.arange(8, 14): self._gen_x(spatial_size=spatial_size) criterion = RFLoss(spatial_size=spatial_size) total_loss, _ = criterion(self.alpha, self.alpha) self.assertAlmostEqual(total_loss.numpy(), 0, places=5) def test_spec_loss(self): - ''' + """ the spec_loss weight is scaled by spatial_size, so make sure that's working as intended - ''' - for spatial_size in 2**torch.arange(8, 14): + """ + for spatial_size in 2 ** torch.arange(8, 14): self._gen_x(spatial_size=spatial_size) criterion = RFLoss(spatial_size=spatial_size) # should be around 1 for uncorrelated inputs _, metrics = criterion(self.alpha, self.omega) - self.assertAlmostEqual(metrics['spec_loss'].numpy(), 0.858, places=1) + self.assertAlmostEqual(metrics["spec_loss"].numpy(), 0.858, places=1) # should be 0 for equal inputs _, metrics = criterion(self.alpha, self.alpha) - self.assertAlmostEqual(metrics['spec_loss'].numpy(), 0) + self.assertAlmostEqual(metrics["spec_loss"].numpy(), 0) # should be 0 for inversely correlated inputs due to absolute value _, metrics = criterion(self.alpha, -self.alpha) - self.assertAlmostEqual(metrics['spec_loss'].numpy(), 0, places=2) + self.assertAlmostEqual(metrics["spec_loss"].numpy(), 0, places=2) def test_fft_loss(self): - for spatial_size in 2**torch.arange(8, 14): + for spatial_size in 2 ** torch.arange(8, 14): criterion = RFLoss(spatial_size=spatial_size, weight_fft=1) # create a pair of slices with different AM tones alpha = torch.vstack((torch.sin(torch.arange(spatial_size)), torch.zeros(spatial_size))).unsqueeze(0) - omega = torch.vstack((torch.sin(torch.arange(spatial_size) * .1), torch.zeros(spatial_size))).unsqueeze(0) + omega = torch.vstack((torch.sin(torch.arange(spatial_size) * 0.1), torch.zeros(spatial_size))).unsqueeze(0) # fft_loss should be very high _, metrics = criterion(alpha, omega) - self.assertGreater(metrics['fft_loss'], 0.95) + self.assertGreater(metrics["fft_loss"], 0.95) diff --git a/tests/test_version.py b/tests/test_version.py index 9580033..aecd83f 100644 --- a/tests/test_version.py +++ b/tests/test_version.py @@ -1,4 +1,4 @@ -'''version should be parsable''' +"""version should be parsable""" # Copyright 2023 The Aerospace Corporation # This file is a part of Glaucus # SPDX-License-Identifier: LGPL-3.0-or-later @@ -10,8 +10,8 @@ class TestVersion(unittest.TestCase): def test_version(self): - ''' + """ Ensure the version string is valid. - ''' - major, minor, micro = tuple(int(x) for x in __version__.split('.')) - print(f'mod_classify v{major}.{minor}.{micro}') + """ + major, minor, micro = tuple(int(x) for x in __version__.split(".")) + print(f"glaucus v{major}.{minor}.{micro}")