diff --git a/.dockerignore b/.dockerignore deleted file mode 100644 index c4041b2..0000000 --- a/.dockerignore +++ /dev/null @@ -1 +0,0 @@ -.nox diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f003d23..48dab51 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -85,14 +85,22 @@ jobs: max-parallel: 10 matrix: include: - - {"salt-version": "3005.0", "python-version": "3.7", "source": "pypi"} - - {"salt-version": "3005.0", "python-version": "3.8", "source": "pypi"} - - {"salt-version": "3005.0", "python-version": "3.9", "source": "pypi"} - - {"salt-version": "3006.0", "python-version": "3.7", "source": "pypi"} - - {"salt-version": "3006.0", "python-version": "3.8", "source": "pypi"} - - {"salt-version": "3006.0", "python-version": "3.9", "source": "pypi"} - - {"salt-version": "3006.0", "python-version": "3.10", "source": "pypi"} - - {"salt-version": "3006.x", "python-version": "3.10", "source": "onedir"} + - {"salt-version": "3005.0", "python-version": "3.7", "testsuite": "core", "source": "pypi"} + - {"salt-version": "3005.0", "python-version": "3.7", "testsuite": "examples", "source": "pypi"} + - {"salt-version": "3005.0", "python-version": "3.8", "testsuite": "core", "source": "pypi"} + - {"salt-version": "3005.0", "python-version": "3.8", "testsuite": "examples", "source": "pypi"} + - {"salt-version": "3005.0", "python-version": "3.9", "testsuite": "core", "source": "pypi"} + - {"salt-version": "3005.0", "python-version": "3.9", "testsuite": "examples", "source": "pypi"} + - {"salt-version": "3006.0", "python-version": "3.7", "testsuite": "core", "source": "pypi"} + - {"salt-version": "3006.0", "python-version": "3.7", "testsuite": "examples", "source": "pypi"} + - {"salt-version": "3006.0", "python-version": "3.8", "testsuite": "core", "source": "pypi"} + - {"salt-version": "3006.0", "python-version": "3.8", "testsuite": "examples", "source": "pypi"} + - {"salt-version": "3006.0", "python-version": "3.9", "testsuite": "core", "source": "pypi"} + - {"salt-version": "3006.0", "python-version": "3.9", "testsuite": "examples", "source": "pypi"} + - {"salt-version": "3006.0", "python-version": "3.10", "testsuite": "core", "source": "pypi"} + - {"salt-version": "3006.0", "python-version": "3.10", "testsuite": "examples", "source": "pypi"} + - {"salt-version": "3006.x", "python-version": "3.10", "testsuite": "core", "source": "onedir"} + - {"salt-version": "3006.x", "python-version": "3.10", "testsuite": "examples", "source": "onedir"} steps: - uses: actions/checkout@v3 @@ -121,20 +129,20 @@ jobs: env: SALT_REQUIREMENT: salt~=${{ matrix.salt-version }} run: | - nox --force-color -e tests-${{ matrix.source == 'onedir' && 'onedir' || '3' }} --install-only + nox --force-color -e tests${{ matrix.testsuite == 'examples' && '-examples' || '' }}-${{ matrix.source == 'onedir' && 'onedir' || '3' }} --install-only - name: Test env: SALT_REQUIREMENT: salt~=${{ matrix.salt-version }} SKIP_REQUIREMENTS_INSTALL: YES run: | - nox --force-color -e tests-${{ matrix.source == 'onedir' && 'onedir' || '3' }} -- -vv tests/ + nox --force-color -e tests${{ matrix.testsuite == 'examples' && '-examples' || '' }}-${{ matrix.source == 'onedir' && 'onedir' || '3' }} -- -vv - name: Upload Logs if: always() uses: actions/upload-artifact@main with: - name: runtests-${{ runner.os }}-${{ matrix.python-version }}-salt-${{ matrix.salt-version }}-${{ matrix.source }}.log + name: runtests-${{ runner.os }}-${{ matrix.python-version }}-salt-${{ matrix.salt-version }}-${{ matrix.source }}${{ matrix.testsuite == 'examples' && '-examples' || '' }}.log path: artifacts/runtests-*.log @@ -150,11 +158,16 @@ jobs: max-parallel: 10 matrix: include: - - {"salt-version": "3005.0", "python-version": "3.8", "source": "pypi"} - - {"salt-version": "3006.0", "python-version": "3.8", "source": "pypi"} - - {"salt-version": "3006.0", "python-version": "3.9", "source": "pypi"} - - {"salt-version": "3006.0", "python-version": "3.10", "source": "pypi"} - - {"salt-version": "3006.x", "python-version": "3.10", "source": "onedir"} + - {"salt-version": "3005.0", "python-version": "3.8", "testsuite": "core", "source": "pypi"} + - {"salt-version": "3005.0", "python-version": "3.8", "testsuite": "examples", "source": "pypi"} + - {"salt-version": "3006.0", "python-version": "3.8", "testsuite": "core", "source": "pypi"} + - {"salt-version": "3006.0", "python-version": "3.8", "testsuite": "examples", "source": "pypi"} + - {"salt-version": "3006.0", "python-version": "3.9", "testsuite": "core", "source": "pypi"} + - {"salt-version": "3006.0", "python-version": "3.9", "testsuite": "examples", "source": "pypi"} + - {"salt-version": "3006.0", "python-version": "3.10", "testsuite": "core", "source": "pypi"} + - {"salt-version": "3006.0", "python-version": "3.10", "testsuite": "examples", "source": "pypi"} + - {"salt-version": "3006.x", "python-version": "3.10", "testsuite": "core", "source": "onedir"} + - {"salt-version": "3006.x", "python-version": "3.10", "testsuite": "examples", "source": "onedir"} steps: - uses: actions/checkout@v3 @@ -195,7 +208,7 @@ jobs: - name: Define test session shell: bash id: define-test-session - run: echo "test-session=tests-${{ matrix.source == 'onedir' && 'onedir' || '3' }}" >> "$GITHUB_OUTPUT" + run: echo "test-session=tests${{ matrix.testsuite == 'examples' && '-examples' || '' }}-${{ matrix.source == 'onedir' && 'onedir' || '3' }}" >> "$GITHUB_OUTPUT" - name: Install Test Requirements shell: bash @@ -230,7 +243,7 @@ jobs: if: always() uses: actions/upload-artifact@main with: - name: runtests-${{ runner.os }}-${{ matrix.python-version }}-salt-${{ matrix.salt-version }}-${{ matrix.source }}.log + name: runtests-${{ runner.os }}-${{ matrix.python-version }}-salt-${{ matrix.salt-version }}-${{ matrix.source }}${{ matrix.testsuite == 'examples' && '-examples' || '' }}.log path: artifacts/runtests-*.log @@ -246,10 +259,14 @@ jobs: max-parallel: 6 matrix: include: - - {"salt-version": "3005.0", "python-version": "3.9", "source": "pypi"} - - {"salt-version": "3006.0", "python-version": "3.9", "source": "pypi"} - - {"salt-version": "3006.0", "python-version": "3.10", "source": "pypi"} - - {"salt-version": "3006.x", "python-version": "3.10", "source": "onedir"} + - {"salt-version": "3005.0", "python-version": "3.9", "testsuite": "core", "source": "pypi"} + - {"salt-version": "3005.0", "python-version": "3.9", "testsuite": "examples", "source": "pypi"} + - {"salt-version": "3006.0", "python-version": "3.9", "testsuite": "core", "source": "pypi"} + - {"salt-version": "3006.0", "python-version": "3.9", "testsuite": "examples", "source": "pypi"} + - {"salt-version": "3006.0", "python-version": "3.10", "testsuite": "core", "source": "pypi"} + - {"salt-version": "3006.0", "python-version": "3.10", "testsuite": "examples", "source": "pypi"} + - {"salt-version": "3006.x", "python-version": "3.10", "testsuite": "core", "source": "onedir"} + - {"salt-version": "3006.x", "python-version": "3.10", "testsuite": "examples", "source": "onedir"} steps: - uses: actions/checkout@v3 @@ -278,20 +295,20 @@ jobs: env: SALT_REQUIREMENT: salt~=${{ matrix.salt-version }} run: | - nox --force-color -e tests-${{ matrix.source == 'onedir' && 'onedir' || '3' }} --install-only + nox --force-color -e tests${{ matrix.testsuite == 'examples' && '-examples' || '' }}-${{ matrix.source == 'onedir' && 'onedir' || '3' }} --install-only - name: Test env: SALT_REQUIREMENT: salt~=${{ matrix.salt-version }} SKIP_REQUIREMENTS_INSTALL: YES run: | - nox --force-color -e tests-${{ matrix.source == 'onedir' && 'onedir' || '3' }} -- -vv tests/ + nox --force-color -e tests${{ matrix.testsuite == 'examples' && '-examples' || '' }}-${{ matrix.source == 'onedir' && 'onedir' || '3' }} -- -vv - name: Upload Logs if: always() uses: actions/upload-artifact@main with: - name: runtests-${{ runner.os }}-${{ matrix.python-version }}-salt-${{ matrix.salt-version }}-${{ matrix.source }}.log + name: runtests-${{ runner.os }}-${{ matrix.python-version }}-salt-${{ matrix.salt-version }}-${{ matrix.source }}${{ matrix.testsuite == 'examples' && '-examples' || '' }}.log path: artifacts/runtests-*.log build: diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 7ec8893..41e0672 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -69,6 +69,13 @@ repos: - pyyaml==6.0 - jinja2==3.1.2 - packaging==23.0 + - id: tools + alias: examples-requirements + name: Collect examples requirements + files: "^examples/requirements/" + args: + - pre-commit + - examples-requirements # ----- Code Formatting and Analysis ----------------------------------------------------------> - repo: https://github.com/charliermarsh/ruff-pre-commit @@ -100,7 +107,7 @@ repos: hooks: - id: mypy name: Run mypy against the code base - files: ^(src/|tests/).*\.py$ + files: ^(src/|tests/|examples/).*\.py$ args: [] additional_dependencies: - types-attrs diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..f70da0c --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1 @@ +recursive-exclude examples * diff --git a/README.rst b/README.rst index b088952..edfc5df 100644 --- a/README.rst +++ b/README.rst @@ -86,6 +86,7 @@ For more detailed information, refer to `CONTRIBUTING`_. .. _salt: https://github.com/saltstack/salt .. _engine: https://docs.saltproject.io/en/latest/topics/engines/index.html .. _CONTRIBUTING: https://github.com/saltstack/salt-analytics-framework/blob/main/CONTRIBUTING.md +.. _examples: https://github.com/saltstack/salt-analytics-framework/blob/main/examples .. include-ends-here @@ -94,3 +95,9 @@ Documentation ============= The full documentation can be seen `here `_. + + +Examples +======== + +Some examples of custom pipelines are provided. You can find them at `examples`_. diff --git a/changelog/54.feature.rst b/changelog/54.feature.rst new file mode 100644 index 0000000..3c4d6b8 --- /dev/null +++ b/changelog/54.feature.rst @@ -0,0 +1 @@ +Add a Jupyter notebook processor that allows running parameterized notebooks using `papermill` diff --git a/examples/LICENSE b/examples/LICENSE new file mode 100644 index 0000000..36aa562 --- /dev/null +++ b/examples/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2023 Caleb Beard + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/examples/README.md b/examples/README.md new file mode 100644 index 0000000..861a6ce --- /dev/null +++ b/examples/README.md @@ -0,0 +1,28 @@ +# Examples + +Example plugins for quick experimentation with Salt Analytics + +## Usage + +First, you'll need to install this examples extension into your salt environment targeting the root of this directory. This could be done a couple of different ways, including `salt-pip install`, `salt-call --local pip.installed`, or however you want to get this extension into your environment. + +Each of these example pipelines has their own set of requirements, but if you would like to install them all at once, you can install [all the requirements](examples/requirements/all.txt) into the same environment this extension was installed into. + +### Initial Configuration + +Each of these pipelines will have their own unique configuration. It is recommended that you extract them out into separate files and place them in the default include directory for your master or minion (by default, `master.d/*.conf` and `minion.d/*.conf` at the roots of the directories of the respective configuration files). + +To start, if the engine is not already enabled in your configuration, you can add an `analytics-engine.conf` file populated like this... + +``` +engines: + - analytics +``` + +This will enable the salt-analytics-framework engine to run alongside your minion or master. +## Directory Listing + +Each example pipeline's instructions reside in its own directory under [pipelines](pipelines/). + +- [MNIST Network](pipelines/mnist-network/README.md) +- [MNIST Notebook](pipelines/mnist-notebook/README.md) diff --git a/examples/pipelines/mnist-network/README.md b/examples/pipelines/mnist-network/README.md new file mode 100644 index 0000000..4c46a56 --- /dev/null +++ b/examples/pipelines/mnist-network/README.md @@ -0,0 +1,50 @@ +# MNIST Network + +This pipeline will run test data through a saved tensorflow model trained on the [MNIST digits dataset](https://www.tensorflow.org/datasets/catalog/mnist). + +## Try it! + +### Install the requirements + +If you haven't already, install the [requirements](../../requirements/mnist.txt) into your python environment. + +### Training the model + +You will need a trained and saved tensorflow model that can take the MNIST digits as input. An example model can be found [here](mnist.ipynb). You may need to install the Jupyter notebook cli in order to train this model, or you can port it to a Python script and just run that, something similar to [this](mnist.py). In both of these options, you will need to alter the save path of the model to suit your needs, or take note of the default. + +If you do not want to train the model, we use a [saved one for testing purposes](../../tests/pipelines/files/mnist), and you can copy it to the desired destination on your system. + +### Configuration + +Once your saved model is in place, you just need to add this config to either your `analytics-engine.conf` file or into a new file, perhaps `salt-analytics-mnist-network.conf`. Anything within angled brackets (`<>`) will need to be tuned to your system's setup and own preferences. + +``` +analytics: + collectors: + mnist-digits-collector: + interval: 0.1 + plugin: mnist_digits + path: + + processors: + mnist-network-processor: + plugin: mnist_network + model: + + forwarders: + mnist-disk-forwarder: + plugin: disk + path: + filename: + pretty_print: True + + pipelines: + mnist-network: + collect: mnist-digits-collector + process: mnist-network-processor + forward: mnist-disk-forwarder +``` + +### Output + +Once your master or minion are started, you should see data dumped to `/` that shows whether the model accurately predicted the correct digit, and a running average of accuracy and loss for your model. diff --git a/examples/pipelines/mnist-network/mnist.ipynb b/examples/pipelines/mnist-network/mnist.ipynb new file mode 100644 index 0000000..14955d5 --- /dev/null +++ b/examples/pipelines/mnist-network/mnist.ipynb @@ -0,0 +1,161 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "from tensorflow import keras" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "# Load the mnist data\n", + "(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "# Normalize the data\n", + "x_train = x_train / 255\n", + "x_test = x_test / 255" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "# Flattening the data\n", + "x_train_flattened = x_train.reshape(len(x_train), 28*28)\n", + "x_test_flattened = x_test.reshape(len(x_test), 28*28)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 1/5\n", + "1875/1875 [==============================] - 11s 5ms/step - loss: 0.2712 - accuracy: 0.9226\n", + "Epoch 2/5\n", + "1875/1875 [==============================] - 9s 5ms/step - loss: 0.1218 - accuracy: 0.9646\n", + "Epoch 3/5\n", + "1875/1875 [==============================] - 10s 5ms/step - loss: 0.0859 - accuracy: 0.9745\n", + "Epoch 4/5\n", + "1875/1875 [==============================] - 10s 5ms/step - loss: 0.0661 - accuracy: 0.9805\n", + "Epoch 5/5\n", + "1875/1875 [==============================] - 9s 5ms/step - loss: 0.0520 - accuracy: 0.9841\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:absl:Found untraced functions such as _update_step_xla while saving (showing 1 of 1). These functions will not be directly callable after loading.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "INFO:tensorflow:Assets written to: mnist/assets\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:tensorflow:Assets written to: mnist/assets\n" + ] + } + ], + "source": [ + "# Create a simple model using only sigmoid activation functions\n", + "model = keras.Sequential([\n", + " keras.layers.Dense(100, input_shape=(784,), activation='relu'),\n", + " keras.layers.Dense(10, activation='sigmoid')\n", + "])\n", + "\n", + "# Set the parameters on the model\n", + "model.compile(\n", + " optimizer='adam',\n", + " loss='sparse_categorical_crossentropy',\n", + " metrics=['accuracy']\n", + ")\n", + "\n", + "# Train\n", + "model.fit(x_train_flattened, y_train, epochs=5)\n", + "\n", + "\n", + "# Save the model so we can reload it later\n", + "model_name = \"mnist\"\n", + "model.save(model_name)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "313/313 [==============================] - 1s 3ms/step - loss: 0.0840 - accuracy: 0.9740\n" + ] + }, + { + "data": { + "text/plain": [ + "[0.08401071280241013, 0.9739999771118164]" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Run the model against the test data\n", + "model.evaluate(x_test_flattened, y_test)\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "aether", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.8" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/examples/pipelines/mnist-network/mnist.py b/examples/pipelines/mnist-network/mnist.py new file mode 100644 index 0000000..86e74bc --- /dev/null +++ b/examples/pipelines/mnist-network/mnist.py @@ -0,0 +1,38 @@ +# Copyright 2023 VMware, Inc. +# SPDX-License-Identifier: Apache-2.0 +# +from tensorflow import keras + +# Load the mnist data +(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data() + +# Normalize the data +x_train = x_train / 255 +x_test = x_test / 255 + +# Flattening the data +x_train_flattened = x_train.reshape(len(x_train), 28 * 28) +x_test_flattened = x_test.reshape(len(x_test), 28 * 28) + +# Create a simple model using only sigmoid activation functions +model = keras.Sequential( + [ + keras.layers.Dense(100, input_shape=(784,), activation="relu"), + keras.layers.Dense(10, activation="sigmoid"), + ] +) + +# Set the parameters on the model +model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"]) + +# Train +model.fit(x_train_flattened, y_train, epochs=5) + + +# Save the model so we can reload it later +model_name = "mnist" +model.save(model_name) + + +# Run the model against the test data +model.evaluate(x_test_flattened, y_test) diff --git a/examples/pipelines/mnist-notebook/README.md b/examples/pipelines/mnist-notebook/README.md new file mode 100644 index 0000000..a2dbac5 --- /dev/null +++ b/examples/pipelines/mnist-notebook/README.md @@ -0,0 +1,71 @@ +# MNIST Notebook + +This pipeline will run test data through a saved tensorflow model trained on the [MNIST digits dataset](https://www.tensorflow.org/datasets/catalog/mnist). This is simlar to the [MNIST Network](../mnist-network) example, but uses a jupyter notebook directly. + +## Try it! + +### Install the requirements + +If you haven't already, install the [requirements](../../requirements/mnist-notebook.txt) into your python environment. + +### Training the model + +You will need a trained and saved tensorflow model that can take the MNIST digits as input. An example model can be found [here](../mnist-network/mnist.ipynb). You may need to install the Jupyter notebook cli in order to train this model, or you can port it to a Python script and just run that, something similar to [this](../mnist-network/mnist.py). In both of these options, you will need to alter the save path of the model to suit your needs, or take note of the default. + +If you do not want to train the model, we use a [saved one for testing purposes](../../tests/pipelines/files/mnist), and you can copy it to the desired destination on your system. + +### Creating the notebook + +Now, you will need a Jupyter noteobook that is able to take in the test data and run predictions on it one-by-one. There is a [simple one that is used for testing purposes](../../tests/pipelines/files/mnist_saf.ipynb) that should suit most systems well enough. + +### Configuration + +Once your saved model and notebook are in place, you just need to add this config to either your `analytics-engine.conf` file or into a new file, perhaps `salt-analytics-mnist-notebook.conf`. Anything within angled brackets (`<>`) will need to be tuned to your system's setup and own preferences. + +``` +analytics: + collectors: + mnist-digits-collector: + interval: 0.1 + plugin: mnist_digits + path: + + processors: + numpy-save-keys-processor: + plugin: numpy_save_keys + base_path: + + jupyter-notebook-processor: + plugin: jupyter_notebook + notebook: + output_notebook: + params: + model_path: + input_keys: + - x_path + - y_path + output_tag: output + + notebook-output-processor: + plugin: notebook_output + + forwarders: + mnist-notebook-disk-forwarder: + plugin: disk + path: + filename: + pretty_print: True + + pipelines: + mnist-notebook: + collect: mnist-digits-collector + process: + - numpy-save-keys-processor + - jupyter-notebook-processor + - notebook-output-processor + forward: mnist-notebook-disk-forwarder +``` + +### Output + +Once your master or minion are started, you should see data dumped to `/` that shows whether the model accurately predicted the correct digit, and a running average of accuracy and loss for your model. diff --git a/examples/pyproject.toml b/examples/pyproject.toml new file mode 100644 index 0000000..1862f7c --- /dev/null +++ b/examples/pyproject.toml @@ -0,0 +1,6 @@ +[build-system] +requires = ["setuptools>=50.3.2", "wheel", "setuptools-declarative-requirements"] +build-backend = "setuptools.build_meta" + +[tool.black] +line-length = 100 diff --git a/examples/requirements/all.txt b/examples/requirements/all.txt new file mode 100644 index 0000000..3acf00e --- /dev/null +++ b/examples/requirements/all.txt @@ -0,0 +1,2 @@ +-r mnist-notebook.txt +-r mnist.txt diff --git a/examples/requirements/mnist-notebook.txt b/examples/requirements/mnist-notebook.txt new file mode 100644 index 0000000..62e992e --- /dev/null +++ b/examples/requirements/mnist-notebook.txt @@ -0,0 +1,3 @@ +papermill +ipykernel +importlib-metadata>=3.4.0,<5.0.0 diff --git a/examples/requirements/mnist.txt b/examples/requirements/mnist.txt new file mode 100644 index 0000000..0c8c3d2 --- /dev/null +++ b/examples/requirements/mnist.txt @@ -0,0 +1,2 @@ +numpy +tensorflow diff --git a/examples/setup.cfg b/examples/setup.cfg new file mode 100644 index 0000000..9c48d80 --- /dev/null +++ b/examples/setup.cfg @@ -0,0 +1,58 @@ +[metadata] +name = salt-analytics.examples +description = Example plugins for quick experimentation with Salt Analytics +long_description = file: README.md +long_description_content_type = text/markdown +author = Caleb Beard +author_email = calebb@vmware.com +keywords = salt-extension salt-analytics-framework +url = https://saltproject.io +project_urls = + Source=https://github.com/saltstack/salt-analytics-framework + Tracker=https://github.com/saltstack/salt-analytics-framework/issues +license = Apache Software License +classifiers = + Programming Language :: Python + Programming Language :: Cython + Programming Language :: Python :: 3 + Programming Language :: Python :: 3 :: Only + Programming Language :: Python :: 3.7 + Programming Language :: Python :: 3.8 + Programming Language :: Python :: 3.9 + Development Status :: 4 - Beta + Intended Audience :: Developers + License :: OSI Approved :: Apache Software License +platforms = any + +[options] +zip_safe = False +include_package_data = True +package_dir = + =src +packages = find_namespace: +python_requires = >= 3.7 +setup_requires = + wheel + setuptools>=50.3.2 + setuptools-declarative-requirements + +[options.packages.find] +where = src +# exclude = +# tests + +[options.entry_points] +saf.collect = + mnist_digits = saltext.safexamples.collect.mnist_digits +saf.process = + mnist_network = saltext.safexamples.process.mnist_network + notebook_output = saltext.safexamples.process.notebook_output + numpy_save_keys = saltext.safexamples.process.numpy_save_keys + +[bdist_wheel] +# Use this option if your package is pure-python +universal = 1 + +[sdist] +owner = root +group = root diff --git a/examples/setup.py b/examples/setup.py new file mode 100644 index 0000000..a0ac98f --- /dev/null +++ b/examples/setup.py @@ -0,0 +1,8 @@ +# Copyright 2023 VMware, Inc. +# SPDX-License-Identifier: Apache-2.0 +# +# pylint: disable=missing-module-docstring +import setuptools + +if __name__ == "__main__": + setuptools.setup() diff --git a/examples/src/saltext/safexamples/__init__.py b/examples/src/saltext/safexamples/__init__.py new file mode 100644 index 0000000..e65286c --- /dev/null +++ b/examples/src/saltext/safexamples/__init__.py @@ -0,0 +1,32 @@ +# Copyright 2023 VMware, Inc. +# SPDX-License-Identifier: Apache-2.0 +# +""" +Define the version. +""" +import contextlib +import pathlib + +PACKAGE_ROOT = pathlib.Path(__file__).resolve().parent +try: + from .version import __version__ +except ImportError: # pragma: no cover + __version__ = "0.0.0.not-installed" + try: + from importlib.metadata import PackageNotFoundError + from importlib.metadata import version + + with contextlib.suppress(PackageNotFoundError): + __version__ = version(__name__) + + except ImportError: + try: + from pkg_resources import DistributionNotFound + from pkg_resources import get_distribution + + with contextlib.suppress(DistributionNotFound): + __version__ = get_distribution(__name__).version + + except ImportError: + # pkg resources isn't even available?! + pass diff --git a/examples/src/saltext/safexamples/collect/__init__.py b/examples/src/saltext/safexamples/collect/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/examples/src/saltext/safexamples/collect/mnist_digits.py b/examples/src/saltext/safexamples/collect/mnist_digits.py new file mode 100644 index 0000000..5dff2a1 --- /dev/null +++ b/examples/src/saltext/safexamples/collect/mnist_digits.py @@ -0,0 +1,54 @@ +# Copyright 2021-2023 VMware, Inc. +# SPDX-License-Identifier: Apache-2.0 +""" +A plugin which downloads (if not already downloaded) and yields the mnist digits dataset. +""" +from __future__ import annotations + +import asyncio +import logging +import pathlib +import random +from typing import AsyncIterator +from typing import Type + +from tensorflow import keras + +from saf.models import CollectConfigBase +from saf.models import CollectedEvent +from saf.models import PipelineRunContext + +log = logging.getLogger(__name__) + + +class MNISTDigitsConfig(CollectConfigBase): + """ + Configuration schema for the mnist_digits collect plugin. + """ + + path: str + interval: float = 5 + + +def get_config_schema() -> Type[MNISTDigitsConfig]: + """ + Get the mnist_digits plugin configuration schema. + """ + return MNISTDigitsConfig + + +async def collect(*, ctx: PipelineRunContext[MNISTDigitsConfig]) -> AsyncIterator[CollectedEvent]: + """ + Periodically yield a random MNIST test digit and it's desired output. + """ + file_path = pathlib.Path(ctx.config.path) + log.debug("Downloading the MNIST digits dataset to %s", file_path) + (x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data(path=file_path) + x_test = x_test / 255 # Normalize + x_test_flattened = x_test.reshape(len(x_test), 28 * 28) # Flatten + + while True: + idx = random.choice(range(len(x_test_flattened))) # noqa: S311 + event = CollectedEvent(data={"x": x_test_flattened[idx], "y": y_test[idx]}) + yield event + await asyncio.sleep(ctx.config.interval) diff --git a/examples/src/saltext/safexamples/process/__init__.py b/examples/src/saltext/safexamples/process/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/examples/src/saltext/safexamples/process/mnist_network.py b/examples/src/saltext/safexamples/process/mnist_network.py new file mode 100644 index 0000000..d101d64 --- /dev/null +++ b/examples/src/saltext/safexamples/process/mnist_network.py @@ -0,0 +1,73 @@ +# Copyright 2021-2023 VMware, Inc. +# SPDX-License-Identifier: Apache-2.0 +""" +Run the salt-analytics-framework port of the MNIST network. +""" +from __future__ import annotations + +import logging +import pathlib +from typing import AsyncIterator +from typing import Type + +import numpy as np +from tensorflow import keras + +from saf.models import CollectedEvent +from saf.models import PipelineRunContext +from saf.models import ProcessConfigBase + +log = logging.getLogger(__name__) + + +class MNISTNetworkConfig(ProcessConfigBase): + """ + Configuration schema for the MNIST network processor plugin. + """ + + model: str + + +def get_config_schema() -> Type[ProcessConfigBase]: + """ + Get the MNIST network processor plugin configuration schema. + """ + return MNISTNetworkConfig + + +async def process( + *, + ctx: PipelineRunContext[MNISTNetworkConfig], + event: CollectedEvent, +) -> AsyncIterator[CollectedEvent]: + """ + Run the MNIST network. + """ + if "mnist_model" not in ctx.cache: + model_path = pathlib.Path(ctx.config.model) + log.debug("Loading the mnist model from %s", model_path) + ctx.cache["mnist_model"] = keras.models.load_model(model_path) + ctx.cache["mnist_model_evaluations"] = [] + else: + log.debug("Did not load the model, already cached it") + + model = ctx.cache["mnist_model"] + x = event.data["x"] + y = event.data["y"] + evaluate = model.evaluate(np.asarray([x]), np.asarray([y])) + log.debug("Evaluate result: %s", evaluate) + ctx.cache["mnist_model_evaluations"].append(evaluate) + avg_accuracy = sum([res[1] for res in ctx.cache["mnist_model_evaluations"]]) / len( + ctx.cache["mnist_model_evaluations"] + ) + avg_loss = sum([res[0] for res in ctx.cache["mnist_model_evaluations"]]) / len( + ctx.cache["mnist_model_evaluations"] + ) + log.debug("Average accuracy: %s, average loss: %s", avg_accuracy, avg_loss) + event.data = { + "evaluation": evaluate, + "accuracy": avg_accuracy, + "loss": avg_loss, + } + + yield event diff --git a/examples/src/saltext/safexamples/process/notebook_output.py b/examples/src/saltext/safexamples/process/notebook_output.py new file mode 100644 index 0000000..b73318a --- /dev/null +++ b/examples/src/saltext/safexamples/process/notebook_output.py @@ -0,0 +1,60 @@ +# Copyright 2021-2023 VMware, Inc. +# SPDX-License-Identifier: Apache-2.0 +""" +Evaluate the output of a Jupyter notebook. +""" +from __future__ import annotations + +import logging +from ast import literal_eval +from typing import AsyncIterator +from typing import Type + +from saf.models import CollectedEvent +from saf.models import PipelineRunContext +from saf.models import ProcessConfigBase + +log = logging.getLogger(__name__) + + +class NotebookOutputConfig(ProcessConfigBase): + """ + Configuration schema for the notebook output processor plugin. + """ + + +def get_config_schema() -> Type[ProcessConfigBase]: + """ + Get the notebook output processor plugin configuration schema. + """ + return NotebookOutputConfig + + +async def process( + *, + ctx: PipelineRunContext[NotebookOutputConfig], + event: CollectedEvent, +) -> AsyncIterator[CollectedEvent]: + """ + Process the notebook output and perform some simple averaging. + """ + if "mnist_model_evaluations" not in ctx.cache: + ctx.cache["mnist_model_evaluations"] = [] + + evaluate = literal_eval(event.data["trimmed_outputs"][0]["data"]["text/plain"]) + log.debug("Evaluate result: %s", evaluate) + ctx.cache["mnist_model_evaluations"].append(evaluate) + avg_accuracy = sum([res[1] for res in ctx.cache["mnist_model_evaluations"]]) / len( + ctx.cache["mnist_model_evaluations"] + ) + avg_loss = sum([res[0] for res in ctx.cache["mnist_model_evaluations"]]) / len( + ctx.cache["mnist_model_evaluations"] + ) + log.debug("Average accuracy: %s, average loss: %s", avg_accuracy, avg_loss) + event.data = { + "evaluation": evaluate, + "accuracy": avg_accuracy, + "loss": avg_loss, + } + + yield event diff --git a/examples/src/saltext/safexamples/process/numpy_save_keys.py b/examples/src/saltext/safexamples/process/numpy_save_keys.py new file mode 100644 index 0000000..970159f --- /dev/null +++ b/examples/src/saltext/safexamples/process/numpy_save_keys.py @@ -0,0 +1,58 @@ +# Copyright 2021-2023 VMware, Inc. +# SPDX-License-Identifier: Apache-2.0 +""" +Save the values using numpy of the data dict to paths associated with their keys. +""" +from __future__ import annotations + +import logging +import pathlib # noqa: TCH003 +from typing import AsyncIterator +from typing import Type + +import numpy as np + +from saf.models import CollectedEvent +from saf.models import PipelineRunContext +from saf.models import ProcessConfigBase + +log = logging.getLogger(__name__) + + +class NumpySaveKeysConfig(ProcessConfigBase): + """ + Configuration schema for the numpy save keys processor plugin. + """ + + base_path: pathlib.Path + + +def get_config_schema() -> Type[ProcessConfigBase]: + """ + Get the numpy save keys plugin configuration schema. + """ + return NumpySaveKeysConfig + + +async def process( + *, + ctx: PipelineRunContext[NumpySaveKeysConfig], + event: CollectedEvent, +) -> AsyncIterator[CollectedEvent]: + """ + Save the keys using numpy. + """ + config = ctx.config + if not config.base_path.exists(): + config.base_path.mkdir(parents=True) + new_data = {} + for key, value in event.data.items(): + key_path = config.base_path / f"{key}.npy" + key_path.touch() + with key_path.open("wb"): + np.save(key_path, value, allow_pickle=False) + event_key = f"{key}_path" + new_data[event_key] = str(key_path) + event.data = new_data + + yield event diff --git a/examples/tests/__init__.py b/examples/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/examples/tests/conftest.py b/examples/tests/conftest.py new file mode 100644 index 0000000..e5d9ee7 --- /dev/null +++ b/examples/tests/conftest.py @@ -0,0 +1,63 @@ +# Copyright 2021-2023 VMware, Inc. +# SPDX-License-Identifier: Apache-2.0 +# +from __future__ import annotations + +import os +import pathlib +import sys +from typing import Any + +import pytest + +TESTS_DIR = pathlib.Path(__file__).resolve().parent +CODE_DIR = TESTS_DIR.parent.parent + +# Coverage +if "COVERAGE_PROCESS_START" in os.environ: + MAYBE_RUN_COVERAGE = True + COVERAGERC_FILE = os.environ["COVERAGE_PROCESS_START"] +else: + COVERAGERC_FILE = str(CODE_DIR / ".coveragerc") + MAYBE_RUN_COVERAGE = sys.argv[0].endswith("pytest.py") or "_COVERAGE_RCFILE" in os.environ + if MAYBE_RUN_COVERAGE: + # Flag coverage to track suprocesses by pointing it to the right .coveragerc file + os.environ["COVERAGE_PROCESS_START"] = str(COVERAGERC_FILE) + + +# ----- PyTest Tempdir Plugin Hooks --------------------------------------------------------------> +def pytest_tempdir_basename() -> str: + """ + Return the temporary directory basename for the salt test suite. + """ + return "analytics" + + +# <---- PyTest Tempdir Plugin Hooks --------------------------------------------------------------- + + +@pytest.fixture(scope="session") +def salt_factories_config() -> dict[str, Any]: + """ + Return a dictionary with the keyword arguments for FactoriesManager. + """ + if os.environ.get("CI"): + start_timeout = 120 + else: + start_timeout = 60 + if os.environ.get("ONEDIR_TESTRUN", "0") == "1": + code_dir = None + else: + code_dir = str(CODE_DIR) + + kwargs = { + "code_dir": code_dir, + "start_timeout": start_timeout, + "inject_sitecustomize": MAYBE_RUN_COVERAGE, + } + if MAYBE_RUN_COVERAGE: + kwargs["coverage_rc_path"] = str(COVERAGERC_FILE) + else: + kwargs["coverage_rc_path"] = None + kwargs["coverage_db_path"] = os.environ.get("COVERAGE_FILE") + return kwargs diff --git a/examples/tests/pipelines/__init__.py b/examples/tests/pipelines/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/examples/tests/pipelines/conftest.py b/examples/tests/pipelines/conftest.py new file mode 100644 index 0000000..b1c6f90 --- /dev/null +++ b/examples/tests/pipelines/conftest.py @@ -0,0 +1,66 @@ +# Copyright 2021-2023 VMware, Inc. +# SPDX-License-Identifier: Apache-2.0 +# +from __future__ import annotations + +import pathlib +import shutil +from typing import Iterator + +import pytest +from saltfactories.cli.run import SaltRun +from saltfactories.cli.salt import SaltCli +from saltfactories.daemons.master import SaltMaster +from saltfactories.daemons.minion import SaltMinion +from saltfactories.utils import random_string + + +@pytest.fixture(scope="module", autouse=True) +def minion( + master: SaltMaster, analytics_events_dump_directory, analytics_config_contents +) -> SaltMinion: + default_config = { + "engines": ["analytics"], + } + factory = master.salt_minion_daemon(random_string("minion-"), defaults=default_config) + with pytest.helpers.temp_file( + "analytics", contents=analytics_config_contents, directory=factory.config_dir + ), factory.started(): + yield factory + + +@pytest.fixture(scope="package") +def analytics_events_dump_directory(tmp_path_factory) -> Iterator[pathlib.Path]: + dump_path = tmp_path_factory.mktemp("analytics-events-dump") + try: + yield dump_path + finally: + shutil.rmtree(str(dump_path), ignore_errors=True) + + +@pytest.fixture(autouse=True) +def cleanup_analytics_events_dump_directory( + analytics_events_dump_directory: pathlib.Path, +) -> Iterator[pathlib.Path]: + try: + yield analytics_events_dump_directory + finally: + for path in analytics_events_dump_directory.iterdir(): + path.unlink() + + +@pytest.fixture(scope="package") +def master(salt_factories, analytics_events_dump_directory) -> SaltMaster: + factory = salt_factories.salt_master_daemon(random_string("master-")) + with factory.started(): + yield factory + + +@pytest.fixture() +def salt_run_cli(master: SaltMaster) -> SaltRun: + return master.get_salt_run_cli() + + +@pytest.fixture() +def salt_cli(master: SaltMaster) -> SaltCli: + return master.get_salt_cli() diff --git a/examples/tests/pipelines/files/mnist/fingerprint.pb b/examples/tests/pipelines/files/mnist/fingerprint.pb new file mode 100644 index 0000000..d6ea4e6 --- /dev/null +++ b/examples/tests/pipelines/files/mnist/fingerprint.pb @@ -0,0 +1 @@ +֡٩l/ښ򭠒7 ̳(2 \ No newline at end of file diff --git a/examples/tests/pipelines/files/mnist/keras_metadata.pb b/examples/tests/pipelines/files/mnist/keras_metadata.pb new file mode 100644 index 0000000..5c01f6b --- /dev/null +++ b/examples/tests/pipelines/files/mnist/keras_metadata.pb @@ -0,0 +1,6 @@ + +!root"_tf_keras_sequential* {"name": "sequential_1", "trainable": true, "expects_training_arg": true, "dtype": "float32", "batch_input_shape": null, "must_restore_from_config": false, "preserve_input_structure_in_config": false, "autocast": false, "class_name": "Sequential", "config": {"name": "sequential_1", "layers": [{"class_name": "InputLayer", "config": {"batch_input_shape": {"class_name": "__tuple__", "items": [null, 784]}, "dtype": "float32", "sparse": false, "ragged": false, "name": "dense_2_input"}}, {"class_name": "Dense", "config": {"name": "dense_2", "trainable": true, "dtype": "float32", "batch_input_shape": {"class_name": "__tuple__", "items": [null, 784]}, "units": 100, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}}, {"class_name": "Dense", "config": {"name": "dense_3", "trainable": true, "dtype": "float32", "units": 10, "activation": "sigmoid", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}}]}, "shared_object_id": 7, "input_spec": [{"class_name": "InputSpec", "config": {"dtype": null, "shape": {"class_name": "__tuple__", "items": [null, 784]}, "ndim": 2, "max_ndim": null, "min_ndim": null, "axes": {}}}], "build_input_shape": {"class_name": "TensorShape", "items": [null, 784]}, "is_graph_network": true, "full_save_spec": {"class_name": "__tuple__", "items": [[{"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 784]}, "float32", "dense_2_input"]}], {}]}, "save_spec": {"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 784]}, "float32", "dense_2_input"]}, "keras_version": "2.12.0", "backend": "tensorflow", "model_config": {"class_name": "Sequential", "config": {"name": "sequential_1", "layers": [{"class_name": "InputLayer", "config": {"batch_input_shape": {"class_name": "__tuple__", "items": [null, 784]}, "dtype": "float32", "sparse": false, "ragged": false, "name": "dense_2_input"}, "shared_object_id": 0}, {"class_name": "Dense", "config": {"name": "dense_2", "trainable": true, "dtype": "float32", "batch_input_shape": {"class_name": "__tuple__", "items": [null, 784]}, "units": 100, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 1}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 2}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "shared_object_id": 3}, {"class_name": "Dense", "config": {"name": "dense_3", "trainable": true, "dtype": "float32", "units": 10, "activation": "sigmoid", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 4}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 5}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "shared_object_id": 6}]}}, "training_config": {"loss": "sparse_categorical_crossentropy", "metrics": [[{"class_name": "MeanMetricWrapper", "config": {"name": "accuracy", "dtype": "float32", "fn": "sparse_categorical_accuracy"}, "shared_object_id": 9}]], "weighted_metrics": null, "loss_weights": null, "optimizer_config": {"class_name": "Custom>Adam", "config": {"name": "Adam", "weight_decay": null, "clipnorm": null, "global_clipnorm": null, "clipvalue": null, "use_ema": false, "ema_momentum": 0.99, "ema_overwrite_frequency": null, "jit_compile": false, "is_legacy_optimizer": false, "learning_rate": 0.0010000000474974513, "beta_1": 0.9, "beta_2": 0.999, "epsilon": 1e-07, "amsgrad": false}}}}2 +root.layer_with_weights-0"_tf_keras_layer*{"name": "dense_2", "trainable": true, "expects_training_arg": false, "dtype": "float32", "batch_input_shape": {"class_name": "__tuple__", "items": [null, 784]}, "stateful": false, "must_restore_from_config": false, "preserve_input_structure_in_config": false, "autocast": true, "class_name": "Dense", "config": {"name": "dense_2", "trainable": true, "dtype": "float32", "batch_input_shape": {"class_name": "__tuple__", "items": [null, 784]}, "units": 100, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 1}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 2}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "shared_object_id": 3, "input_spec": {"class_name": "InputSpec", "config": {"dtype": null, "shape": null, "ndim": null, "max_ndim": null, "min_ndim": 2, "axes": {"-1": 784}}, "shared_object_id": 10}, "build_input_shape": {"class_name": "TensorShape", "items": [null, 784]}}2 +root.layer_with_weights-1"_tf_keras_layer*{"name": "dense_3", "trainable": true, "expects_training_arg": false, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "preserve_input_structure_in_config": false, "autocast": true, "class_name": "Dense", "config": {"name": "dense_3", "trainable": true, "dtype": "float32", "units": 10, "activation": "sigmoid", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 4}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 5}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "shared_object_id": 6, "input_spec": {"class_name": "InputSpec", "config": {"dtype": null, "shape": null, "ndim": null, "max_ndim": null, "min_ndim": 2, "axes": {"-1": 100}}, "shared_object_id": 11}, "build_input_shape": {"class_name": "TensorShape", "items": [null, 100]}}2 +?root.keras_api.metrics.0"_tf_keras_metric*{"class_name": "Mean", "name": "loss", "dtype": "float32", "config": {"name": "loss", "dtype": "float32"}, "shared_object_id": 12}2 +@root.keras_api.metrics.1"_tf_keras_metric*{"class_name": "MeanMetricWrapper", "name": "accuracy", "dtype": "float32", "config": {"name": "accuracy", "dtype": "float32", "fn": "sparse_categorical_accuracy"}, "shared_object_id": 9}2 \ No newline at end of file diff --git a/examples/tests/pipelines/files/mnist/saved_model.pb b/examples/tests/pipelines/files/mnist/saved_model.pb new file mode 100644 index 0000000..de80082 Binary files /dev/null and b/examples/tests/pipelines/files/mnist/saved_model.pb differ diff --git a/examples/tests/pipelines/files/mnist/variables/variables.data-00000-of-00001 b/examples/tests/pipelines/files/mnist/variables/variables.data-00000-of-00001 new file mode 100644 index 0000000..731b07b Binary files /dev/null and b/examples/tests/pipelines/files/mnist/variables/variables.data-00000-of-00001 differ diff --git a/examples/tests/pipelines/files/mnist/variables/variables.index b/examples/tests/pipelines/files/mnist/variables/variables.index new file mode 100644 index 0000000..41325a4 Binary files /dev/null and b/examples/tests/pipelines/files/mnist/variables/variables.index differ diff --git a/examples/tests/pipelines/files/mnist_saf.ipynb b/examples/tests/pipelines/files/mnist_saf.ipynb new file mode 100644 index 0000000..1b03f8a --- /dev/null +++ b/examples/tests/pipelines/files/mnist_saf.ipynb @@ -0,0 +1,74 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "parameters" + ] + }, + "outputs": [], + "source": [ + "x_path = None\n", + "y_path = None\n", + "model_path = None" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Normalize the data\n", + "import numpy as np\n", + "from tensorflow import keras\n", + "with open(x_path, \"rb\") as xf:\n", + " x_test = np.load(xf)\n", + "with open(y_path, \"rb\") as yf:\n", + " y_test = np.load(yf)\n", + "x_test = np.asarray([x_test])\n", + "y_test = np.asarray([y_test])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "format": "application/json", + "tags": [ + "output" + ] + }, + "outputs": [], + "source": [ + "# Load the model and evaluate it\n", + "model = keras.models.load_model(model_path)\n", + "model.evaluate(x_test, y_test)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "aether", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.8" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/examples/tests/pipelines/test_mnist_network.py b/examples/tests/pipelines/test_mnist_network.py new file mode 100644 index 0000000..ad672c5 --- /dev/null +++ b/examples/tests/pipelines/test_mnist_network.py @@ -0,0 +1,77 @@ +# Copyright 2021-2023 VMware, Inc. +# SPDX-License-Identifier: Apache-2.0 +# +from __future__ import annotations + +import json +import logging +import pathlib +import time + +import pytest + +from saf.models import CollectedEvent + +log = logging.getLogger(__name__) + + +pytestmark = [ + pytest.mark.skip_on_windows, +] + + +@pytest.fixture(scope="module") +def analytics_config_contents(analytics_events_dump_directory) -> str: + return """ + collectors: + mnist-digits-collector: + interval: 0.1 + plugin: mnist_digits + path: {} + + processors: + mnist-network-processor: + plugin: mnist_network + model: {} + + forwarders: + disk-forwarder: + plugin: disk + path: {} + filename: mnist-network-dump + pretty_print: False + + pipelines: + my-pipeline: + collect: mnist-digits-collector + process: mnist-network-processor + forward: disk-forwarder + """.format( + analytics_events_dump_directory / "mnist_digits", + pathlib.Path(__file__).resolve().parent / "files" / "mnist", + analytics_events_dump_directory, + ) + + +def test_pipeline(analytics_events_dump_directory: pathlib.Path): + """ + Test output of the MNIST digits network being dumped to disk. + """ + timeout = 300 + dumpfile = analytics_events_dump_directory / "mnist-network-dump" + + while timeout: + time.sleep(1) + timeout -= 1 + if dumpfile.exists() and dumpfile.read_text().strip(): + break + else: + pytest.fail(f"Failed to find dumped events in {analytics_events_dump_directory}") + + contents = [ + CollectedEvent.parse_obj(json.loads(i)) for i in dumpfile.read_text().strip().split("\n") + ] + for event in contents: + assert "evaluation" in event.data + assert "accuracy" in event.data + assert "loss" in event.data diff --git a/examples/tests/pipelines/test_mnist_notebook.py b/examples/tests/pipelines/test_mnist_notebook.py new file mode 100644 index 0000000..9a86cba --- /dev/null +++ b/examples/tests/pipelines/test_mnist_notebook.py @@ -0,0 +1,97 @@ +# Copyright 2021-2023 VMware, Inc. +# SPDX-License-Identifier: Apache-2.0 +# +from __future__ import annotations + +import json +import logging +import pathlib +import time + +import pytest + +from saf.models import CollectedEvent + +log = logging.getLogger(__name__) + + +pytestmark = [ + pytest.mark.skip_on_windows, +] + + +@pytest.fixture(scope="module") +def analytics_config_contents(analytics_events_dump_directory) -> str: + return """ + collectors: + mnist-digits-collector: + interval: 0.1 + plugin: mnist_digits + path: {} + + processors: + numpy-save-keys-processor: + plugin: numpy_save_keys + base_path: {} + + jupyter-notebook-processor: + plugin: jupyter_notebook + notebook: {} + output_notebook: {} + params: + model_path: {} + input_keys: + - x_path + - y_path + output_tag: output + + notebook-output-processor: + plugin: notebook_output + + forwarders: + disk-forwarder: + plugin: disk + path: {} + filename: mnist-notebook-dump + pretty_print: False + + pipelines: + my-pipeline: + collect: mnist-digits-collector + process: + - numpy-save-keys-processor + - jupyter-notebook-processor + - notebook-output-processor + forward: disk-forwarder + """.format( + analytics_events_dump_directory / "mnist_digits", + analytics_events_dump_directory, + pathlib.Path(__file__).resolve().parent / "files" / "mnist_saf.ipynb", + analytics_events_dump_directory / "mnist_saf.out.ipynb", + pathlib.Path(__file__).resolve().parent / "files" / "mnist", + analytics_events_dump_directory, + ) + + +def test_pipeline(analytics_events_dump_directory: pathlib.Path): + """ + Test output of the MNIST digits network (inside a Jupyter notebook) being dumped to disk. + """ + timeout = 300 + dumpfile = analytics_events_dump_directory / "mnist-notebook-dump" + + while timeout: + time.sleep(1) + timeout -= 1 + if dumpfile.exists() and dumpfile.read_text().strip(): + break + else: + pytest.fail(f"Failed to find dumped events in {analytics_events_dump_directory}") + + contents = [ + CollectedEvent.parse_obj(json.loads(i)) for i in dumpfile.read_text().strip().split("\n") + ] + for event in contents: + assert "evaluation" in event.data + assert "accuracy" in event.data + assert "loss" in event.data diff --git a/examples/tests/test_plugins_available.py b/examples/tests/test_plugins_available.py new file mode 100644 index 0000000..5bacfb0 --- /dev/null +++ b/examples/tests/test_plugins_available.py @@ -0,0 +1,12 @@ +# Copyright 2021-2023 VMware, Inc. +# SPDX-License-Identifier: Apache-2.0 +# +from __future__ import annotations + +from saf.plugins import PluginsList + + +def test_example_plugins_available(): + plugins_list = PluginsList() + assert "mnist_digits" in plugins_list.collectors + assert "mnist_network" in plugins_list.processors diff --git a/noxfile.py b/noxfile.py index ef81fa3..9a7e0a1 100644 --- a/noxfile.py +++ b/noxfile.py @@ -63,6 +63,8 @@ COVERAGE_REPORT_PROJECT = ARTIFACTS_DIR.relative_to(REPO_ROOT) / "coverage-project.xml" COVERAGE_REPORT_TESTS = ARTIFACTS_DIR.relative_to(REPO_ROOT) / "coverage-tests.xml" JUNIT_REPORT = ARTIFACTS_DIR.relative_to(REPO_ROOT) / "junit-report.xml" +EXAMPLES_EXTENSION_DIR = REPO_ROOT / "examples" +EXAMPLES_EXTENSION_TESTS_DIR = REPO_ROOT / "tests" / "examples" def _get_session_python_version_info(session): @@ -96,6 +98,7 @@ def _install_requirements( install_source=False, install_salt=True, install_extras=None, + install_examples=True, onedir=False, ): install_extras = install_extras or [] @@ -128,6 +131,17 @@ def _install_requirements( if passed_requirements: session.install("--progress-bar=off", *passed_requirements, silent=PIP_INSTALL_SILENT) + if install_examples: + session.install( + "--progress-bar=off", + "-r", + f"{EXAMPLES_EXTENSION_DIR / 'requirements' / 'all.txt'}", + silent=PIP_INSTALL_SILENT, + ) + session.install( + "--progress-bar=off", f"{EXAMPLES_EXTENSION_DIR}", silent=PIP_INSTALL_SILENT + ) + if install_source: pkg = "." if install_extras: @@ -142,8 +156,8 @@ def _install_requirements( session.install("--progress-bar=off", pkg, silent=PIP_INSTALL_SILENT) -def _tests(session, onedir=False): - _install_requirements(session, install_source=True, onedir=onedir) +def _tests(session, onedir=False, examples=False): + _install_requirements(session, install_source=True, onedir=onedir, install_examples=examples) sitecustomize_dir = session.run("salt-factories", "--coverage", silent=True, log=False) python_path_env_var = os.environ.get("PYTHONPATH") or None @@ -182,8 +196,9 @@ def _tests(session, onedir=False): ] if session._runner.global_config.forcecolor: args.append("--color=yes") + tests_root = pathlib.Path("examples", "tests") if examples else pathlib.Path("tests") if not session.posargs: - args.append("tests/") + args.append(str(tests_root)) else: for arg in session.posargs: if arg.startswith("--color") and args[0].startswith("--color"): @@ -192,22 +207,22 @@ def _tests(session, onedir=False): for arg in session.posargs: if arg.startswith("-"): continue - if arg.startswith(f"tests{os.sep}"): + if arg.startswith(str(tests_root)): break try: - pathlib.Path(arg).resolve().relative_to(REPO_ROOT / "tests") + pathlib.Path(arg).resolve().relative_to(REPO_ROOT / tests_root) break except ValueError: continue else: - args.append("tests/") + args.append(tests_root) + try: session.run("coverage", "run", "-m", "pytest", *args, env=env) finally: # Always combine and generate the XML coverage report with contextlib.suppress(CommandFailed): session.run("coverage", "combine") - try: # Generate report for salt code coverage session.run( @@ -243,7 +258,7 @@ def _tests(session, onedir=False): ) except CommandFailed as exc: # Tracking code coverage is still not working that well - if onedir is False: + if onedir is False and examples is False: raise exc from None @@ -268,6 +283,27 @@ def test_onedir(session): _tests(session, onedir=True) +@nox.session(python=PYTHON_VERSIONS, name="tests-examples") +def tests_examples(session): + _tests(session, onedir=False, examples=True) + + +@nox.session( + python=str(ONEDIR_PYTHON_PATH), + name="tests-examples-onedir", + venv_params=["--system-site-packages"], +) +def test_onedir_examples(session): + if not ONEDIR_ARTIFACT_PATH.exists(): + session.error( + "The salt onedir artifact, expected to be in '{}', was not found".format( + ONEDIR_ARTIFACT_PATH.relative_to(REPO_ROOT) + ) + ) + + _tests(session, onedir=True, examples=True) + + class Tee: """ Python class to mimic linux tee behavior. diff --git a/pyproject.toml b/pyproject.toml index 104ef31..288de28 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -60,9 +60,11 @@ src = [ "src", "tests", "tools", + "examples", ] extend-exclude = [ ".nox/**", + "examples/pipelines/**", ] extend-include = [ "setup.py", @@ -94,6 +96,10 @@ format = "grouped" "D104", # Missing docstring in public package "D107", # Missing docstring in `__init__` ] +"examples/src/**/*.py" = [ + "D104", # Missing docstring in public package + "D107", # Missing docstring in `__init__` +] "src/**/*.pyi" = [ "D100", # Missing docstring in public module ] @@ -151,6 +157,23 @@ format = "grouped" "TCH002", # Move third-party import into a type-checking block "TCH003", # Move standard library import `pathlib` into a type-checking block ] +"examples/tests/**/*.py" = [ + "ANN", # Ignore missing type annotations in tests + "ARG001", # Unused function argument + "D100", # Missing docstring in public module + "D103", # Missing docstring in public function + "D104", # Missing docstring in public package + "DTZ003", # The use of `datetime.datetime.utcnow()` is not allowed, use `datetime.datetime.now(tz=)` instead + "PLR2004", # Magic value used in comparison, consider replacing 3 with a constant variable + "PT001", # use @pytest.fixture() over @pytest.fixture + "PT023", # use @pytest.mark.() over @pytest.mark. + "RET504", # Unnecessary variable assignment before `return` statement" + "S101", # Ignore the use of 'assert ...' in tests + "S603", # `subprocess` call: check for execution of untrusted input + "SIM117", # Use a single `with` statement with multiple contexts instead of nested `with` statements + "TCH002", # Move third-party import into a type-checking block + "TCH003", # Move standard library import `pathlib` into a type-checking block +] "tests/unit/process/test_regex_mask.py" = [ "S105", # Possible hardcoded password ] diff --git a/setup.cfg b/setup.cfg index 83f2183..6275364 100644 --- a/setup.cfg +++ b/setup.cfg @@ -56,6 +56,7 @@ saf.collect = saf.process = regex_mask = saf.process.regex_mask shannon_mask = saf.process.shannon_mask + jupyter_notebook = saf.process.jupyter_notebook test = saf.process.test saf.forward = disk = saf.forward.disk @@ -161,6 +162,7 @@ warn_unused_ignores = True disallow_any_generics = True check_untyped_defs = True no_implicit_reexport = True +explicit_package_bases = True # for strict mypy: (this is the tricky one :-)) #disallow_untyped_defs = True diff --git a/src/saf/process/jupyter_notebook.py b/src/saf/process/jupyter_notebook.py new file mode 100644 index 0000000..2f5c0cb --- /dev/null +++ b/src/saf/process/jupyter_notebook.py @@ -0,0 +1,80 @@ +# Copyright 2021-2023 VMware, Inc. +# SPDX-License-Identifier: Apache-2.0 +""" +Run a jupyter notebook using papermill. +""" +from __future__ import annotations + +import logging +import pathlib # noqa: TCH003 +from typing import Any +from typing import AsyncIterator +from typing import Dict +from typing import List +from typing import Optional +from typing import Type + +import papermill + +from saf.models import CollectedEvent +from saf.models import PipelineRunContext +from saf.models import ProcessConfigBase + +log = logging.getLogger(__name__) + + +class JupyterNotebookConfig(ProcessConfigBase): + """ + Configuration schema for the jupyter notebook processor plugin. + """ + + notebook: pathlib.Path + output_notebook: Optional[pathlib.Path] + params: Dict[str, Any] = {} + papermill_kwargs: Dict[str, Any] = {} + output_tag: Optional[str] + input_keys: List[str] + + +def get_config_schema() -> Type[ProcessConfigBase]: + """ + Get the jupyter notebook processor plugin configuration schema. + """ + return JupyterNotebookConfig + + +async def process( + *, + ctx: PipelineRunContext[JupyterNotebookConfig], + event: CollectedEvent, +) -> AsyncIterator[CollectedEvent]: + """ + Run the jupyter notebook, doing papermill parameterizing using the event data given. + """ + output = ctx.config.output_notebook or ctx.config.notebook + params = ctx.config.params.copy() + for key in ctx.config.input_keys: + params[key] = event.data[key] + notebook = papermill.execute_notebook( + str(ctx.config.notebook), + str(output), + parameters=params, + **ctx.config.papermill_kwargs, + ) + # Now let's find the cell with the output + # If no output tag is given, we resort to the last cell + cells = notebook.cells + if ctx.config.output_tag: + for cell in cells: + if ctx.config.output_tag in cell.metadata.tags: + notebook_output = cell.outputs + break + else: + notebook_output = cells[-1].outputs + trimmed_outputs = [] + for out in notebook_output: + if out.output_type == "execute_result": + trimmed_outputs.append(out) + event.data = {"trimmed_outputs": trimmed_outputs} + + yield event diff --git a/src/saf/process/regex_mask.pyi b/src/saf/process/regex_mask.pyi deleted file mode 100644 index b6b8b65..0000000 --- a/src/saf/process/regex_mask.pyi +++ /dev/null @@ -1,15 +0,0 @@ -from typing import Any -from typing import overload - -from saf.process.regex_mask import RegexMaskProcessConfig - -@overload -def _regex_process(obj: str, config: RegexMaskProcessConfig) -> str: ... -@overload -def _regex_process(obj: set[Any], config: RegexMaskProcessConfig) -> set[Any]: ... -@overload -def _regex_process(obj: list[Any], config: RegexMaskProcessConfig) -> list[Any]: ... -@overload -def _regex_process(obj: tuple[Any, ...], config: RegexMaskProcessConfig) -> tuple[Any, ...]: ... -@overload -def _regex_process(obj: dict[str, Any], config: RegexMaskProcessConfig) -> dict[str, Any]: ... diff --git a/tools/pre_commit.py b/tools/pre_commit.py index ba53eda..3dee78b 100644 --- a/tools/pre_commit.py +++ b/tools/pre_commit.py @@ -7,6 +7,7 @@ from __future__ import annotations import logging +import pathlib import shutil from ptscripts import Context @@ -18,6 +19,39 @@ cgroup = command_group(name="pre-commit", help="Pre-Commit Related Commands", description=__doc__) +@cgroup.command( + name="examples-requirements", + arguments={ + "files": { + "help": "Files to consider when compiling all.txt", + "nargs": "*", + }, + }, +) +def examples_requirements(ctx: Context, files: list[pathlib.Path]): + """ + Include all individual examples requirements files in `all.txt`. + """ + if files: + examples_requirements_dir = ( + pathlib.Path(__file__).resolve().parent.parent / "examples" / "requirements" + ) + includes = [] + for file in examples_requirements_dir.iterdir(): + if file.name != "all.txt": + includes.append(f"-r {file.name}") + + all_file = examples_requirements_dir / "all.txt" + + with all_file.open("r") as rfh: + original_contents = [line.strip() for line in rfh.readlines()] + + if set(original_contents) != set(includes): + includes.append("") + all_file.write_text("\n".join(includes)) + ctx.error(f"Modified {all_file}") + + @cgroup.command( name="actionlint", arguments={ diff --git a/tools/pre_commit.py~ b/tools/pre_commit.py~ deleted file mode 100644 index a225d12..0000000 --- a/tools/pre_commit.py~ +++ /dev/null @@ -1,51 +0,0 @@ -# Copyright 2023 VMware, Inc. -# SPDX-License-Identifier: Apache-2.0 -""" -These commands are used by pre-commit. -""" -# pylint: disable=resource-leakage,broad-except,3rd-party-module-not-gated -from __future__ import annotations - -import logging -import shutil - -from ptscripts import command_group -from ptscripts import Context - -log = logging.getLogger(__name__) - -# Define the command group -cgroup = command_group(name="pre-commit", help="Pre-Commit Related Commands", description=__doc__) - - -@cgroup.command( - name="actionlint", - arguments={ - "files": { - "help": "Files to run actionlint against", - "nargs": "*", - }, - "no_color": { - "help": "Disable colors in output", - }, - }, -) -def actionlint(ctx: Context, files: list[str], no_color: bool = False): - """ - Run `actionlint` - """ - actionlint = shutil.which("actionlint") - if not actionlint: - ctx.warn("Could not find the 'actionlint' binary") - ctx.exit(0) - cmdline = [actionlint] - if no_color is False: - cmdline.append("-color") - shellcheck = shutil.which("shellcheck") - if shellcheck: - cmdline.append(f"-shellcheck={shellcheck}") - pyflakes = shutil.which("pyflakes") - if pyflakes: - cmdline.append(f"-pyflakes={pyflakes}") - ret = ctx.run(*cmdline, *files, check=False) - ctx.exit(ret.returncode)