From 5a17b92f045c9c96a75243234b1e75fc2400f775 Mon Sep 17 00:00:00 2001 From: Christopher Harris Date: Fri, 30 Aug 2024 11:31:35 -0500 Subject: [PATCH 1/5] Run yapf, flake8, isort as part of pre-commit (#1859) Closes #1702 ## By Submitting this PR I confirm: - I am familiar with the [Contributing Guidelines](https://github.com/nv-morpheus/Morpheus/blob/main/docs/source/developer_guide/contributing.md). - When the PR is ready for review, new or existing tests cover these changes. - When the PR is ready for review, the documentation is up to date with these changes. Authors: - Christopher Harris (https://github.com/cwharris) Approvers: - Anuradha Karuppiah (https://github.com/AnuradhaKaruppiah) URL: https://github.com/nv-morpheus/Morpheus/pull/1859 --- .pre-commit-config.yaml | 20 +++++ ci/scripts/fix_all.sh | 2 +- ci/scripts/python_checks.sh | 60 +------------ morpheus.code-workspace | 4 +- pyproject.toml | 85 +++++++++++++++++-- .../morpheus/_lib/common/__init__.pyi | 6 +- .../morpheus/_lib/cudf_helpers/__init__.pyi | 7 +- .../morpheus/morpheus/_lib/doca/__init__.pyi | 9 +- .../morpheus/_lib/messages/__init__.pyi | 7 +- .../morpheus/_lib/modules/__init__.pyi | 4 +- .../morpheus/_lib/stages/__init__.pyi | 11 ++- python/morpheus/setup.cfg | 70 --------------- .../morpheus_llm/_lib/llm/__init__.pyi | 7 +- 13 files changed, 137 insertions(+), 155 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 2682c98fb7..5b0e6c5fca 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -19,6 +19,26 @@ repos: hooks: - id: rapids-dependency-file-generator args: ["--clean"] + - repo: https://github.com/PyCQA/isort + rev: 5.12.0 + hooks: + - id: isort + args: ["--settings-file=./pyproject.toml"] + files: ^python/ + - repo: https://github.com/PyCQA/flake8 + rev: 6.1.0 + hooks: + - id: flake8 + entry: pflake8 + additional_dependencies: [pyproject-flake8] + args: ["--config=./pyproject.toml"] + files: ^python/ + - repo: https://github.com/google/yapf + rev: v0.40.2 + hooks: + - id: yapf + args: ["--style", "./pyproject.toml"] + files: ^python/ default_language_version: python: python3 diff --git a/ci/scripts/fix_all.sh b/ci/scripts/fix_all.sh index 100f0d62f6..e2004c84b6 100755 --- a/ci/scripts/fix_all.sh +++ b/ci/scripts/fix_all.sh @@ -105,5 +105,5 @@ fi # Run yapf if [[ "${SKIP_YAPF}" == "" ]]; then echo "Running yapf..." - python3 -m yapf -i --style ${PY_CFG} -r ${PY_MODIFIED_FILES[@]} + python3 -m yapf -i --style ${PROJ_TOML} -r ${PY_MODIFIED_FILES[@]} fi diff --git a/ci/scripts/python_checks.sh b/ci/scripts/python_checks.sh index e7b9386e95..84acabefe6 100755 --- a/ci/scripts/python_checks.sh +++ b/ci/scripts/python_checks.sh @@ -26,10 +26,7 @@ LC_ALL=C.UTF-8 LANG=C.UTF-8 # Pre-populate the return values in case they are skipped -ISORT_RETVAL=0 -FLAKE_RETVAL=0 PYLINT_RETVAL=0 -YAPF_RETVAL=0 get_modified_files ${PYTHON_FILE_REGEX} MORPHEUS_MODIFIED_FILES @@ -41,45 +38,16 @@ if [[ -n "${MORPHEUS_MODIFIED_FILES}" ]]; then echo " $f" done - if [[ "${SKIP_ISORT}" == "" ]]; then - ISORT_OUTPUT=`python3 -m isort --settings-file ${PROJ_TOML} --filter-files --check-only ${MORPHEUS_MODIFIED_FILES[@]} 2>&1` - ISORT_RETVAL=$? - fi - if [[ "${SKIP_PYLINT}" == "" ]]; then NUM_PROC=$(get_num_proc) PYLINT_OUTPUT=`pylint -j ${NUM_PROC} ${MORPHEUS_MODIFIED_FILES[@]} 2>&1` PYLINT_RETVAL=$? fi - if [[ "${SKIP_FLAKE}" == "" ]]; then - FLAKE_OUTPUT=`python3 -m flake8 --config ${PY_CFG} ${MORPHEUS_MODIFIED_FILES[@]} 2>&1` - FLAKE_RETVAL=$? - fi - - if [[ "${SKIP_YAPF}" == "" ]]; then - # Run yapf. Will return 1 if there are any diffs - YAPF_OUTPUT=`python3 -m yapf --style ${PY_CFG} --diff ${MORPHEUS_MODIFIED_FILES[@]} 2>&1` - YAPF_RETVAL=$? - fi - else echo "No modified Python files to check" fi -# Output results if failure otherwise show pass -if [[ "${SKIP_ISORT}" != "" ]]; then - echo -e "\n\n>>>> SKIPPED: isort check\n\n" -elif [ "${ISORT_RETVAL}" != "0" ]; then - echo -e "\n\n>>>> FAILED: isort style check; begin output\n\n" - echo -e "${ISORT_OUTPUT}" - echo -e "\n\n>>>> FAILED: isort style check; end output\n\n" \ - "To auto-fix many issues (not all) run:\n" \ - " ./ci/scripts/fix_all.sh\n\n" -else - echo -e "\n\n>>>> PASSED: isort style check\n\n" -fi - if [[ "${SKIP_PYLINT}" != "" ]]; then echo -e "\n\n>>>> SKIPPED: pylint check\n\n" elif [ "${PYLINT_RETVAL}" != "0" ]; then @@ -90,33 +58,7 @@ else echo -e "\n\n>>>> PASSED: pylint style check\n\n" fi -if [[ "${SKIP_FLAKE}" != "" ]]; then - echo -e "\n\n>>>> SKIPPED: flake8 check\n\n" -elif [ "${FLAKE_RETVAL}" != "0" ]; then - echo -e "\n\n>>>> FAILED: flake8 style check; begin output\n\n" - echo -e "${FLAKE_OUTPUT}" - echo -e "\n\n>>>> FAILED: flake8 style check; end output\n\n" \ - "To auto-fix many issues (not all) run:\n" \ - " ./ci/scripts/fix_all.sh\n\n" -else - echo -e "\n\n>>>> PASSED: flake8 style check\n\n" -fi - -if [[ "${SKIP_YAPF}" != "" ]]; then - echo -e "\n\n>>>> SKIPPED: yapf check\n\n" -elif [ "${YAPF_RETVAL}" != "0" ]; then - echo -e "\n\n>>>> FAILED: yapf style check; begin output\n\n" - echo -e "Incorrectly formatted files:" - YAPF_OUTPUT=`echo "${YAPF_OUTPUT}" | sed -nr 's/^\+\+\+ ([^ ]*) *\(reformatted\)$/\1/p'` - echo -e "${YAPF_OUTPUT}" - echo -e "\n\n>>>> FAILED: yapf style check; end output\n\n" \ - "To auto-fix many issues (not all) run:\n" \ - " ./ci/scripts/fix_all.sh\n\n" -else - echo -e "\n\n>>>> PASSED: yapf style check\n\n" -fi - -RETVALS=(${ISORT_RETVAL} ${FLAKE_RETVAL} ${PYLINT_RETVAL} ${YAPF_RETVAL}) +RETVALS=(${PYLINT_RETVAL}) IFS=$'\n' RETVAL=`echo "${RETVALS[*]}" | sort -nr | head -n1` diff --git a/morpheus.code-workspace b/morpheus.code-workspace index 5fb3a58375..0ec937642b 100644 --- a/morpheus.code-workspace +++ b/morpheus.code-workspace @@ -679,7 +679,7 @@ "files.trimFinalNewlines": true, "files.trimTrailingWhitespace": true, "flake8.args": [ - "--style=${workspaceFolder}/python/morpheus/setup.cfg" + "--style=${workspaceFolder}/pyproject.toml" ], "pylint.args": [ "--rcfile=${workspaceFolder}/pyproject.toml", @@ -730,7 +730,7 @@ } ], "yapf.args": [ - "--style=${workspaceFolder}/python/morpheus/setup.cfg" + "--style=${workspaceFolder}/pyproject.toml" ] } } diff --git a/pyproject.toml b/pyproject.toml index a2490e0456..c597d71c03 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -45,13 +45,6 @@ addopts = "--benchmark-disable" asyncio_mode = "auto" -[tool.yapfignore] -ignore_patterns = [ - "**/*.pyx", - "**/*.pxd", - "**/_version.py", -] - [tool.mypy] # Allow None for argument default values implicit_optional = true @@ -753,3 +746,81 @@ skip= [ "models", "thirdparty" ] + +[tool.flake8] +filename=[ + "*.py", + "*.pyx", + "*.pxd" +] +# TODO: Remove dfencoder https://github.com/nv-morpheus/Morpheus/issues/786 +exclude=[ + "__pycache__", + ".git", + ".tmp/", + "*.egg", + "build/", + "cpp", + "docs", + "models/", + "python/morpheus/morpheus/models/dfencoder/*.py", + "thirdparty" +] +max-line-length=120 +max-doc-length=120 + +extend-ignore=[ + # Ignore missing docstrings __init__ methods as we document those on the class + "D107", + # Don't require quotes to be placed on the same line as a one-line docstring, useful when the docstring is close + # to the line limit + "D200", + # Allow a blank line between a docstring and the code + "D202", + # D204: 1 blank line required after class docstring + "D204", + # D205: 1 blank line required between summary line and description (allow for a paragraph) + "D205", + # D400: First line should end with a period (only works if we're adhering to D205) + "D400", + # D401: First line should be in imperative mood + "D401" +] + +# Cython Rules ignored: +# E999: invalid syntax (works for Python, not Cython) +# E225: Missing whitespace around operators (breaks cython casting syntax like ) +# E226: Missing whitespace around arithmetic operators (breaks cython pointer syntax like int*) +# E227: Missing whitespace around bitwise or shift operator (Can also break casting syntax) +# W503: line break before binary operator (breaks lines that start with a pointer) +# W504: line break after binary operator (breaks lines that end with a pointer) + +per-file-ignores =[ + # imported but unused + "__init__.py: F401, E402", + # Ignore additional deps needed for examples + "examples/*.py: F821", + # Cython Exclusions + "*.pyx: E999, E225, E226, E227, W503, W504", + "*.pxd: E999, E225, E226, E227, W503, W504", + # Remove some documentation requirements for tests + # D100: Missing docstring in public module + # D101: Missing docstring in public class + # D102: Missing docstring in public method + # D103: Missing docstring in public function + "tests/*.py: D100, D101, D102, D103" +] + +[tool.yapfignore] +ignore_patterns = [ + "**/*.pyx", + "**/*.pxd", + "**/_version.py" +] + +[tool.yapf] +based_on_style="pep8" +column_limit=120 +split_all_top_level_comma_separated_values=true +join_multiple_lines=true +indent_dictionary_value=true diff --git a/python/morpheus/morpheus/_lib/common/__init__.pyi b/python/morpheus/morpheus/_lib/common/__init__.pyi index 7e11e81ccd..f1436eb5e7 100644 --- a/python/morpheus/morpheus/_lib/common/__init__.pyi +++ b/python/morpheus/morpheus/_lib/common/__init__.pyi @@ -5,9 +5,11 @@ :toctree: _generate """ from __future__ import annotations -import morpheus._lib.common -import typing + import os +import typing + +import morpheus._lib.common __all__ = [ "FiberQueue", diff --git a/python/morpheus/morpheus/_lib/cudf_helpers/__init__.pyi b/python/morpheus/morpheus/_lib/cudf_helpers/__init__.pyi index 4ce3dd3269..027b74ecc0 100644 --- a/python/morpheus/morpheus/_lib/cudf_helpers/__init__.pyi +++ b/python/morpheus/morpheus/_lib/cudf_helpers/__init__.pyi @@ -1,8 +1,11 @@ from __future__ import annotations -import morpheus._lib.cudf_helpers + import typing -from cudf.core.dtypes import StructDtype + import cudf +from cudf.core.dtypes import StructDtype + +import morpheus._lib.cudf_helpers __all__ = [ "StructDtype", diff --git a/python/morpheus/morpheus/_lib/doca/__init__.pyi b/python/morpheus/morpheus/_lib/doca/__init__.pyi index 100af5654a..f65ba226dc 100644 --- a/python/morpheus/morpheus/_lib/doca/__init__.pyi +++ b/python/morpheus/morpheus/_lib/doca/__init__.pyi @@ -1,10 +1,13 @@ from __future__ import annotations -import morpheus._lib.doca -import typing + import datetime -import morpheus._lib.messages +import typing + import mrc.core.segment +import morpheus._lib.doca +import morpheus._lib.messages + __all__ = [ "DocaConvertStage", "DocaSourceStage" diff --git a/python/morpheus/morpheus/_lib/messages/__init__.pyi b/python/morpheus/morpheus/_lib/messages/__init__.pyi index f4ef520152..41a7723c44 100644 --- a/python/morpheus/morpheus/_lib/messages/__init__.pyi +++ b/python/morpheus/morpheus/_lib/messages/__init__.pyi @@ -6,12 +6,15 @@ """ from __future__ import annotations -import morpheus._lib.messages + import typing + import cupy -import morpheus._lib.common import mrc.core.node +import morpheus._lib.common +import morpheus._lib.messages + __all__ = [ "ControlMessage", "ControlMessageType", diff --git a/python/morpheus/morpheus/_lib/modules/__init__.pyi b/python/morpheus/morpheus/_lib/modules/__init__.pyi index 0ec21dfaad..ff105f6d3b 100644 --- a/python/morpheus/morpheus/_lib/modules/__init__.pyi +++ b/python/morpheus/morpheus/_lib/modules/__init__.pyi @@ -6,9 +6,11 @@ """ from __future__ import annotations -import morpheus._lib.modules + import typing +import morpheus._lib.modules + __all__ = [ ] diff --git a/python/morpheus/morpheus/_lib/stages/__init__.pyi b/python/morpheus/morpheus/_lib/stages/__init__.pyi index 86a91e9088..ffd9a46c91 100644 --- a/python/morpheus/morpheus/_lib/stages/__init__.pyi +++ b/python/morpheus/morpheus/_lib/stages/__init__.pyi @@ -6,13 +6,16 @@ """ from __future__ import annotations -import morpheus._lib.stages + +import os import typing -from morpheus._lib.common import FilterSource -import morpheus._lib.common + import mrc.core.coro import mrc.core.segment -import os + +import morpheus._lib.common +import morpheus._lib.stages +from morpheus._lib.common import FilterSource __all__ = [ "AddClassificationsControlMessageStage", diff --git a/python/morpheus/setup.cfg b/python/morpheus/setup.cfg index 1c7bcb39fe..6b57906472 100644 --- a/python/morpheus/setup.cfg +++ b/python/morpheus/setup.cfg @@ -23,73 +23,3 @@ versionfile_source = morpheus/_version.py versionfile_build = morpheus/_version.py tag_prefix = v parentdir_prefix = morpheus- - -# ===== flake8 Config ===== -[flake8] -filename = *.py, *.pyx, *.pxd - -# TODO: Remove dfencoder https://github.com/nv-morpheus/Morpheus/issues/786 -exclude = - __pycache__, - .git, - .tmp/, - *.egg, - build/, - cpp, - docs, - models/, - morpheus/models/dfencoder/*.py, - thirdparty -max-line-length = 120 -max-doc-length = 120 - -extend-ignore = - # Ignore missing docstrings __init__ methods as we document those on the class - D107, - # Don't require quotes to be placed on the same line as a one-line docstring, useful when the docstring is close - # to the line limit - D200, - # Allow a blank line between a docstring and the code - D202, - # D204: 1 blank line required after class docstring - D204, - # D205: 1 blank line required between summary line and description (allow for a paragraph) - D205, - # D400: First line should end with a period (only works if we're adhering to D205) - D400, - # D401: First line should be in imperative mood - D401 - -# Cython Rules ignored: -# E999: invalid syntax (works for Python, not Cython) -# E225: Missing whitespace around operators (breaks cython casting syntax like ) -# E226: Missing whitespace around arithmetic operators (breaks cython pointer syntax like int*) -# E227: Missing whitespace around bitwise or shift operator (Can also break casting syntax) -# W503: line break before binary operator (breaks lines that start with a pointer) -# W504: line break after binary operator (breaks lines that end with a pointer) - -per-file-ignores = - # imported but unused - __init__.py: F401, E402 - # Ignore additional deps needed for examples - examples/*.py: F821 - # Cython Exclusions - *.pyx: E999, E225, E226, E227, W503, W504 - *.pxd: E999, E225, E226, E227, W503, W504 - # Remove some documentation requirements for tests - tests/*.py: - # D100: Missing docstring in public module - D100, - # D101: Missing docstring in public class - D101, - # D102: Missing docstring in public method - D102, - # D103: Missing docstring in public function - D103 - -[yapf] -based_on_style = pep8 -column_limit = 120 -split_all_top_level_comma_separated_values = true -join_multiple_lines = true -indent_dictionary_value = true diff --git a/python/morpheus_llm/morpheus_llm/_lib/llm/__init__.pyi b/python/morpheus_llm/morpheus_llm/_lib/llm/__init__.pyi index 36c202e16c..351790410d 100644 --- a/python/morpheus_llm/morpheus_llm/_lib/llm/__init__.pyi +++ b/python/morpheus_llm/morpheus_llm/_lib/llm/__init__.pyi @@ -6,12 +6,15 @@ """ from __future__ import annotations -import morpheus_llm._lib.llm + import typing -import morpheus._lib.messages + import mrc.core.coro import mrc.core.segment +import morpheus._lib.llm +import morpheus._lib.messages + __all__ = [ "InputMap", "LLMContext", From d36ec8ea44ffb84b5d8f8e6c1eb9e2cb682ea2db Mon Sep 17 00:00:00 2001 From: Christopher Harris Date: Fri, 30 Aug 2024 12:18:59 -0500 Subject: [PATCH 2/5] Remove cloudtrail debug log from autoencoder source stage (#1865) Closes #1697 ## By Submitting this PR I confirm: - I am familiar with the [Contributing Guidelines](https://github.com/nv-morpheus/Morpheus/blob/main/docs/source/developer_guide/contributing.md). - When the PR is ready for review, new or existing tests cover these changes. - When the PR is ready for review, the documentation is up to date with these changes. Authors: - Christopher Harris (https://github.com/cwharris) Approvers: - Anuradha Karuppiah (https://github.com/AnuradhaKaruppiah) URL: https://github.com/nv-morpheus/Morpheus/pull/1865 --- .../morpheus/stages/input/autoencoder_source_stage.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/python/morpheus/morpheus/stages/input/autoencoder_source_stage.py b/python/morpheus/morpheus/stages/input/autoencoder_source_stage.py index b0529c7164..6675b3eacd 100644 --- a/python/morpheus/morpheus/stages/input/autoencoder_source_stage.py +++ b/python/morpheus/morpheus/stages/input/autoencoder_source_stage.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import logging import os import typing from abc import abstractmethod @@ -30,8 +29,6 @@ from morpheus.pipeline.stage_schema import StageSchema from morpheus.utils.directory_watcher import DirectoryWatcher -logger = logging.getLogger(__name__) - class AutoencoderSourceStage(PreallocatorMixin, SingleOutputSource): """ @@ -204,12 +201,6 @@ def batch_user_split(x: typing.List[pd.DataFrame], combined_df.index.name = saved_index_name - logger.debug( - "CloudTrail loading complete. Total rows: %d. Timespan: %s", - len(combined_df), - str(combined_df.loc[combined_df.index[-1], datetime_column_name] - - combined_df.loc[combined_df.index[0], datetime_column_name])) - # Get the users in this DF unique_users = combined_df[userid_column_name].unique() From 583149caaa6bdb02885a80014ab676f0eb0e185d Mon Sep 17 00:00:00 2001 From: Christopher Harris Date: Fri, 30 Aug 2024 19:10:16 -0500 Subject: [PATCH 3/5] Prefer `len(os.sched_getaffinity(0))` over `os.cpu_count()` (#1866) Closes #30 ## By Submitting this PR I confirm: - I am familiar with the [Contributing Guidelines](https://github.com/nv-morpheus/Morpheus/blob/main/docs/source/developer_guide/contributing.md). - When the PR is ready for review, new or existing tests cover these changes. - When the PR is ready for review, the documentation is up to date with these changes. Authors: - Christopher Harris (https://github.com/cwharris) Approvers: - Anuradha Karuppiah (https://github.com/AnuradhaKaruppiah) URL: https://github.com/nv-morpheus/Morpheus/pull/1866 --- docs/source/developer_guide/guides/2_real_world_phishing.md | 4 ++-- .../guides/6_digital_fingerprinting_reference.md | 2 +- examples/abp_pcap_detection/run.py | 2 +- examples/developer_guide/2_1_real_world_phishing/run.py | 2 +- examples/developer_guide/2_2_rabbitmq/read_simple.py | 2 +- examples/developer_guide/2_2_rabbitmq/write_simple.py | 2 +- examples/developer_guide/4_rabbitmq_cpp_stage/README.md | 2 +- .../developer_guide/4_rabbitmq_cpp_stage/src/read_simple.py | 2 +- .../developer_guide/4_rabbitmq_cpp_stage/src/write_simple.py | 2 +- examples/digital_fingerprinting/production/grafana/run.py | 2 +- .../production/morpheus/dfp/utils/config_generator.py | 2 +- .../production/morpheus/dfp_azure_pipeline.py | 2 +- .../production/morpheus/dfp_duo_pipeline.py | 2 +- .../production/morpheus/notebooks/dfp_azure_inference.ipynb | 2 +- .../production/morpheus/notebooks/dfp_azure_training.ipynb | 2 +- .../production/morpheus/notebooks/dfp_duo_inference.ipynb | 2 +- .../production/morpheus/notebooks/dfp_duo_training.ipynb | 2 +- examples/digital_fingerprinting/starter/run_cloudtrail_dfp.py | 2 +- .../visualization/dfp_viz_azure_pipeline.py | 2 +- .../visualization/dfp_viz_duo_pipeline.py | 2 +- examples/doca/run_udp_convert.py | 2 +- examples/gnn_fraud_detection_pipeline/run.py | 2 +- examples/llm/agents/run.py | 4 ++-- examples/llm/completion/run.py | 2 +- examples/llm/rag/run.py | 2 +- examples/llm/vdb_upload/run.py | 2 +- examples/log_parsing/run.py | 2 +- examples/ransomware_detection/run.py | 2 +- examples/sid_visualization/run.py | 2 +- python/morpheus/morpheus/cli/commands.py | 2 +- .../morpheus/stages/input/http_server_source_stage.py | 4 ++-- .../morpheus/morpheus/stages/output/http_server_sink_stage.py | 4 ++-- tests/common/test_http_server.py | 2 +- 33 files changed, 37 insertions(+), 37 deletions(-) diff --git a/docs/source/developer_guide/guides/2_real_world_phishing.md b/docs/source/developer_guide/guides/2_real_world_phishing.md index ccdccab74a..4043e863ec 100644 --- a/docs/source/developer_guide/guides/2_real_world_phishing.md +++ b/docs/source/developer_guide/guides/2_real_world_phishing.md @@ -388,7 +388,7 @@ To start, we will need to instantiate and set a few attributes of the `Config` c config = Config() config.mode = PipelineModes.NLP -config.num_threads = os.cpu_count() +config.num_threads = len(os.sched_getaffinity(0)) config.feature_length = model_fea_length with open(labels_file, encoding='UTF-8') as fh: @@ -563,7 +563,7 @@ def run_pipeline(use_stage_function: bool, config.mode = PipelineModes.NLP # Set the thread count to match our cpu count - config.num_threads = os.cpu_count() + config.num_threads = len(os.sched_getaffinity(0)) config.feature_length = model_fea_length with open(labels_file, encoding='UTF-8') as fh: diff --git a/docs/source/developer_guide/guides/6_digital_fingerprinting_reference.md b/docs/source/developer_guide/guides/6_digital_fingerprinting_reference.md index cd9c2c99bd..b3c0213192 100644 --- a/docs/source/developer_guide/guides/6_digital_fingerprinting_reference.md +++ b/docs/source/developer_guide/guides/6_digital_fingerprinting_reference.md @@ -41,7 +41,7 @@ from morpheus.cli.utils import load_labels_file CppConfig.set_should_use_cpp(False) config = Config() -config.num_threads = os.cpu_count() +config.num_threads = len(os.sched_getaffinity(0)) config.ae = ConfigAutoEncoder() config.ae.feature_columns = load_labels_file(get_package_relative_file("data/columns_ae_azure.txt")) ``` diff --git a/examples/abp_pcap_detection/run.py b/examples/abp_pcap_detection/run.py index bdd7fb7fb8..b1a654bbd9 100644 --- a/examples/abp_pcap_detection/run.py +++ b/examples/abp_pcap_detection/run.py @@ -39,7 +39,7 @@ @click.command() @click.option( "--num_threads", - default=os.cpu_count(), + default=len(os.sched_getaffinity(0)), type=click.IntRange(min=1), help="Number of internal pipeline threads to use.", ) diff --git a/examples/developer_guide/2_1_real_world_phishing/run.py b/examples/developer_guide/2_1_real_world_phishing/run.py index 42963b58b7..b0907924aa 100755 --- a/examples/developer_guide/2_1_real_world_phishing/run.py +++ b/examples/developer_guide/2_1_real_world_phishing/run.py @@ -98,7 +98,7 @@ def run_pipeline(use_stage_function: bool, config.mode = PipelineModes.NLP # Set the thread count to match our cpu count - config.num_threads = os.cpu_count() + config.num_threads = len(os.sched_getaffinity(0)) config.feature_length = model_fea_length with open(labels_file, encoding='UTF-8') as fh: diff --git a/examples/developer_guide/2_2_rabbitmq/read_simple.py b/examples/developer_guide/2_2_rabbitmq/read_simple.py index eb509b1316..2b26d2ba6a 100755 --- a/examples/developer_guide/2_2_rabbitmq/read_simple.py +++ b/examples/developer_guide/2_2_rabbitmq/read_simple.py @@ -38,7 +38,7 @@ def run_pipeline(use_source_function: bool): configure_logging(log_level=logging.DEBUG) config = Config() - config.num_threads = os.cpu_count() + config.num_threads = len(os.sched_getaffinity(0)) # Create a linear pipeline object pipeline = LinearPipeline(config) diff --git a/examples/developer_guide/2_2_rabbitmq/write_simple.py b/examples/developer_guide/2_2_rabbitmq/write_simple.py index 5f70f63dd5..78fa2c3d26 100755 --- a/examples/developer_guide/2_2_rabbitmq/write_simple.py +++ b/examples/developer_guide/2_2_rabbitmq/write_simple.py @@ -32,7 +32,7 @@ def run_pipeline(): input_file = os.path.join(root_dir, 'examples/data/email.jsonlines') config = Config() - config.num_threads = os.cpu_count() + config.num_threads = len(os.sched_getaffinity(0)) # Create a linear pipeline object pipeline = LinearPipeline(config) diff --git a/examples/developer_guide/4_rabbitmq_cpp_stage/README.md b/examples/developer_guide/4_rabbitmq_cpp_stage/README.md index 8c9414ae88..1fba854fde 100644 --- a/examples/developer_guide/4_rabbitmq_cpp_stage/README.md +++ b/examples/developer_guide/4_rabbitmq_cpp_stage/README.md @@ -18,7 +18,7 @@ limitations under the License. # Example RabbitMQ stages This example builds upon the `examples/developer_guide/2_2_rabbitmq` example adding a C++ implementation for the `RabbitMQSourceStage` along with adding package install scripts. -This example adds two flags to the `read_simple.py` script. A `--use_cpp` flag which defaults to `True` and a `--num_threads` flag which defaults to the number of cores on the system as returned by `os.cpu_count()`. +This example adds two flags to the `read_simple.py` script. A `--use_cpp` flag which defaults to `True` and a `--num_threads` flag which defaults to the number of cores on the system as returned by `len(os.sched_getaffinity(0))`. ## Supported Environments | Environment | Supported | Notes | diff --git a/examples/developer_guide/4_rabbitmq_cpp_stage/src/read_simple.py b/examples/developer_guide/4_rabbitmq_cpp_stage/src/read_simple.py index d9db97274d..b8271bb79a 100755 --- a/examples/developer_guide/4_rabbitmq_cpp_stage/src/read_simple.py +++ b/examples/developer_guide/4_rabbitmq_cpp_stage/src/read_simple.py @@ -32,7 +32,7 @@ @click.option('--use_cpp', default=True) @click.option( "--num_threads", - default=os.cpu_count(), + default=len(os.sched_getaffinity(0)), type=click.IntRange(min=1), help="Number of internal pipeline threads to use", ) diff --git a/examples/developer_guide/4_rabbitmq_cpp_stage/src/write_simple.py b/examples/developer_guide/4_rabbitmq_cpp_stage/src/write_simple.py index 9cac7d7331..b9cdf761e5 100755 --- a/examples/developer_guide/4_rabbitmq_cpp_stage/src/write_simple.py +++ b/examples/developer_guide/4_rabbitmq_cpp_stage/src/write_simple.py @@ -32,7 +32,7 @@ def run_pipeline(): input_file = os.path.join(root_dir, 'examples/data/email.jsonlines') config = Config() - config.num_threads = os.cpu_count() + config.num_threads = len(os.sched_getaffinity(0)) # Create a linear pipeline object pipeline = LinearPipeline(config) diff --git a/examples/digital_fingerprinting/production/grafana/run.py b/examples/digital_fingerprinting/production/grafana/run.py index 1f10cd4f67..f5768eab1c 100644 --- a/examples/digital_fingerprinting/production/grafana/run.py +++ b/examples/digital_fingerprinting/production/grafana/run.py @@ -245,7 +245,7 @@ def run_pipeline(train_users, CppConfig.set_should_use_cpp(False) - config.num_threads = os.cpu_count() + config.num_threads = len(os.sched_getaffinity(0)) config.ae = ConfigAutoEncoder() diff --git a/examples/digital_fingerprinting/production/morpheus/dfp/utils/config_generator.py b/examples/digital_fingerprinting/production/morpheus/dfp/utils/config_generator.py index daeb21ac21..ecd2143167 100644 --- a/examples/digital_fingerprinting/production/morpheus/dfp/utils/config_generator.py +++ b/examples/digital_fingerprinting/production/morpheus/dfp/utils/config_generator.py @@ -174,7 +174,7 @@ def generate_ae_config(source: str, pipeline_batch_size: int = 0, edge_buffer_size: int = 0, use_cpp: bool = False, - num_threads: int = os.cpu_count()): + num_threads: int = len(os.sched_getaffinity(0))): config = Config() CppConfig.set_should_use_cpp(use_cpp) diff --git a/examples/digital_fingerprinting/production/morpheus/dfp_azure_pipeline.py b/examples/digital_fingerprinting/production/morpheus/dfp_azure_pipeline.py index e458105a4f..81de60094d 100644 --- a/examples/digital_fingerprinting/production/morpheus/dfp_azure_pipeline.py +++ b/examples/digital_fingerprinting/production/morpheus/dfp_azure_pipeline.py @@ -233,7 +233,7 @@ def run_pipeline(train_users, CppConfig.set_should_use_cpp(False) - config.num_threads = os.cpu_count() + config.num_threads = len(os.sched_getaffinity(0)) config.ae = ConfigAutoEncoder() diff --git a/examples/digital_fingerprinting/production/morpheus/dfp_duo_pipeline.py b/examples/digital_fingerprinting/production/morpheus/dfp_duo_pipeline.py index 51828764ab..4ba43aced1 100644 --- a/examples/digital_fingerprinting/production/morpheus/dfp_duo_pipeline.py +++ b/examples/digital_fingerprinting/production/morpheus/dfp_duo_pipeline.py @@ -230,7 +230,7 @@ def run_pipeline(train_users, CppConfig.set_should_use_cpp(False) - config.num_threads = os.cpu_count() + config.num_threads = len(os.sched_getaffinity(0)) config.ae = ConfigAutoEncoder() diff --git a/examples/digital_fingerprinting/production/morpheus/notebooks/dfp_azure_inference.ipynb b/examples/digital_fingerprinting/production/morpheus/notebooks/dfp_azure_inference.ipynb index 108b9bfbc8..6dc4d4ff76 100644 --- a/examples/digital_fingerprinting/production/morpheus/notebooks/dfp_azure_inference.ipynb +++ b/examples/digital_fingerprinting/production/morpheus/notebooks/dfp_azure_inference.ipynb @@ -219,7 +219,7 @@ "\n", "CppConfig.set_should_use_cpp(False)\n", "\n", - "config.num_threads = os.cpu_count()\n", + "config.num_threads = len(os.sched_getaffinity(0))\n", "\n", "config.ae = ConfigAutoEncoder()\n", "\n", diff --git a/examples/digital_fingerprinting/production/morpheus/notebooks/dfp_azure_training.ipynb b/examples/digital_fingerprinting/production/morpheus/notebooks/dfp_azure_training.ipynb index d9f28305bf..8bc19d88b0 100644 --- a/examples/digital_fingerprinting/production/morpheus/notebooks/dfp_azure_training.ipynb +++ b/examples/digital_fingerprinting/production/morpheus/notebooks/dfp_azure_training.ipynb @@ -216,7 +216,7 @@ "\n", "CppConfig.set_should_use_cpp(False)\n", "\n", - "config.num_threads = os.cpu_count()\n", + "config.num_threads = len(os.sched_getaffinity(0))\n", "\n", "config.ae = ConfigAutoEncoder()\n", "\n", diff --git a/examples/digital_fingerprinting/production/morpheus/notebooks/dfp_duo_inference.ipynb b/examples/digital_fingerprinting/production/morpheus/notebooks/dfp_duo_inference.ipynb index b2dedfb6a8..1d4adf907a 100644 --- a/examples/digital_fingerprinting/production/morpheus/notebooks/dfp_duo_inference.ipynb +++ b/examples/digital_fingerprinting/production/morpheus/notebooks/dfp_duo_inference.ipynb @@ -218,7 +218,7 @@ "\n", "CppConfig.set_should_use_cpp(False)\n", "\n", - "config.num_threads = os.cpu_count()\n", + "config.num_threads = len(os.sched_getaffinity(0))\n", "\n", "config.ae = ConfigAutoEncoder()\n", "\n", diff --git a/examples/digital_fingerprinting/production/morpheus/notebooks/dfp_duo_training.ipynb b/examples/digital_fingerprinting/production/morpheus/notebooks/dfp_duo_training.ipynb index dead9ef462..c45065ae09 100644 --- a/examples/digital_fingerprinting/production/morpheus/notebooks/dfp_duo_training.ipynb +++ b/examples/digital_fingerprinting/production/morpheus/notebooks/dfp_duo_training.ipynb @@ -217,7 +217,7 @@ "\n", "CppConfig.set_should_use_cpp(False)\n", "\n", - "config.num_threads = os.cpu_count()\n", + "config.num_threads = len(os.sched_getaffinity(0))\n", "\n", "config.ae = ConfigAutoEncoder()\n", "\n", diff --git a/examples/digital_fingerprinting/starter/run_cloudtrail_dfp.py b/examples/digital_fingerprinting/starter/run_cloudtrail_dfp.py index 1bbc0f4227..835b3e2809 100644 --- a/examples/digital_fingerprinting/starter/run_cloudtrail_dfp.py +++ b/examples/digital_fingerprinting/starter/run_cloudtrail_dfp.py @@ -39,7 +39,7 @@ @click.command() @click.option( "--num_threads", - default=os.cpu_count(), + default=len(os.sched_getaffinity(0)), type=click.IntRange(min=1), help="Number of internal pipeline threads to use", ) diff --git a/examples/digital_fingerprinting/visualization/dfp_viz_azure_pipeline.py b/examples/digital_fingerprinting/visualization/dfp_viz_azure_pipeline.py index 966295a21f..f67460a6bd 100644 --- a/examples/digital_fingerprinting/visualization/dfp_viz_azure_pipeline.py +++ b/examples/digital_fingerprinting/visualization/dfp_viz_azure_pipeline.py @@ -183,7 +183,7 @@ def run_pipeline(train_users, CppConfig.set_should_use_cpp(False) - config.num_threads = os.cpu_count() + config.num_threads = len(os.sched_getaffinity(0)) config.ae = ConfigAutoEncoder() diff --git a/examples/digital_fingerprinting/visualization/dfp_viz_duo_pipeline.py b/examples/digital_fingerprinting/visualization/dfp_viz_duo_pipeline.py index 93bcbefed3..961f82d676 100644 --- a/examples/digital_fingerprinting/visualization/dfp_viz_duo_pipeline.py +++ b/examples/digital_fingerprinting/visualization/dfp_viz_duo_pipeline.py @@ -186,7 +186,7 @@ def run_pipeline(train_users, CppConfig.set_should_use_cpp(False) - config.num_threads = os.cpu_count() + config.num_threads = len(os.sched_getaffinity(0)) config.ae = ConfigAutoEncoder() diff --git a/examples/doca/run_udp_convert.py b/examples/doca/run_udp_convert.py index 6b78a67e72..52c9b216b7 100644 --- a/examples/doca/run_udp_convert.py +++ b/examples/doca/run_udp_convert.py @@ -43,7 +43,7 @@ ) @click.option( "--num_threads", - default=os.cpu_count(), + default=len(os.sched_getaffinity(0)), type=click.IntRange(min=1), show_default=True, help="Number of internal pipeline threads to use.", diff --git a/examples/gnn_fraud_detection_pipeline/run.py b/examples/gnn_fraud_detection_pipeline/run.py index ae91845b86..80cd9d5d0b 100644 --- a/examples/gnn_fraud_detection_pipeline/run.py +++ b/examples/gnn_fraud_detection_pipeline/run.py @@ -38,7 +38,7 @@ @click.command() @click.option( "--num_threads", - default=os.cpu_count(), + default=len(os.sched_getaffinity(0)), type=click.IntRange(min=1), help="Number of internal pipeline threads to use.", ) diff --git a/examples/llm/agents/run.py b/examples/llm/agents/run.py index 6bec05ae43..b643926a2d 100644 --- a/examples/llm/agents/run.py +++ b/examples/llm/agents/run.py @@ -27,7 +27,7 @@ def run(): @run.command(help="Runs a simple finite pipeline with a single execution of a LangChain agent from a fixed input") @click.option( "--num_threads", - default=os.cpu_count(), + default=len(os.sched_getaffinity(0)), type=click.IntRange(min=1), help="Number of internal pipeline threads to use", ) @@ -67,7 +67,7 @@ def simple(**kwargs): @run.command(help="Runs a pipeline LangChain agents which pulls inputs from a Kafka message bus") @click.option( "--num_threads", - default=os.cpu_count(), + default=len(os.sched_getaffinity(0)), type=click.IntRange(min=1), help="Number of internal pipeline threads to use", ) diff --git a/examples/llm/completion/run.py b/examples/llm/completion/run.py index 851fe8dbfb..611a5105db 100644 --- a/examples/llm/completion/run.py +++ b/examples/llm/completion/run.py @@ -28,7 +28,7 @@ def run(): @run.command() @click.option( "--num_threads", - default=os.cpu_count(), + default=len(os.sched_getaffinity(0)), type=click.IntRange(min=1), help="Number of internal pipeline threads to use", ) diff --git a/examples/llm/rag/run.py b/examples/llm/rag/run.py index ace82fea0f..5160d4356a 100644 --- a/examples/llm/rag/run.py +++ b/examples/llm/rag/run.py @@ -27,7 +27,7 @@ def run(): @run.command() @click.option( "--num_threads", - default=os.cpu_count(), + default=len(os.sched_getaffinity(0)), type=click.IntRange(min=1), help="Number of internal pipeline threads to use", ) diff --git a/examples/llm/vdb_upload/run.py b/examples/llm/vdb_upload/run.py index c43ef91ed7..f02ed5dfe0 100644 --- a/examples/llm/vdb_upload/run.py +++ b/examples/llm/vdb_upload/run.py @@ -78,7 +78,7 @@ def run(): ) @click.option( "--num_threads", - default=os.cpu_count(), + default=len(os.sched_getaffinity(0)), type=click.IntRange(min=1), help="Number of internal pipeline threads to use", ) diff --git a/examples/log_parsing/run.py b/examples/log_parsing/run.py index 7fff20bd27..d4879f4c55 100644 --- a/examples/log_parsing/run.py +++ b/examples/log_parsing/run.py @@ -35,7 +35,7 @@ @click.command() @click.option( "--num_threads", - default=os.cpu_count(), + default=len(os.sched_getaffinity(0)), type=click.IntRange(min=1), help="Number of internal pipeline threads to use.", ) diff --git a/examples/ransomware_detection/run.py b/examples/ransomware_detection/run.py index 5a80265996..0c06f21f95 100644 --- a/examples/ransomware_detection/run.py +++ b/examples/ransomware_detection/run.py @@ -41,7 +41,7 @@ @click.option('--use_cpp', default=False, help="Enable C++ execution for this pipeline, currently this is unsupported.") @click.option( "--num_threads", - default=os.cpu_count(), + default=len(os.sched_getaffinity(0)), type=click.IntRange(min=1), help="Number of internal pipeline threads to use.", ) diff --git a/examples/sid_visualization/run.py b/examples/sid_visualization/run.py index 2c2c2dd1a2..4db84fac11 100644 --- a/examples/sid_visualization/run.py +++ b/examples/sid_visualization/run.py @@ -123,7 +123,7 @@ def _generate_frames(self): @click.option('--use_cpp', default=True) @click.option( "--num_threads", - default=os.cpu_count(), + default=len(os.sched_getaffinity(0)), type=click.IntRange(min=1), help="Number of internal pipeline threads to use.", ) diff --git a/python/morpheus/morpheus/cli/commands.py b/python/morpheus/morpheus/cli/commands.py index 3136e3e0c5..e7df1d3b75 100644 --- a/python/morpheus/morpheus/cli/commands.py +++ b/python/morpheus/morpheus/cli/commands.py @@ -265,7 +265,7 @@ def install(**kwargs): @cli.group(short_help="Run one of the available pipelines", no_args_is_help=True, cls=AliasedGroup) @click.option('--num_threads', - default=os.cpu_count(), + default=len(os.sched_getaffinity(0)), type=click.IntRange(min=1), help="Number of internal pipeline threads to use") @click.option('--pipeline_batch_size', diff --git a/python/morpheus/morpheus/stages/input/http_server_source_stage.py b/python/morpheus/morpheus/stages/input/http_server_source_stage.py index 0c1619c905..d540007b80 100644 --- a/python/morpheus/morpheus/stages/input/http_server_source_stage.py +++ b/python/morpheus/morpheus/stages/input/http_server_source_stage.py @@ -68,7 +68,7 @@ class HttpServerSourceStage(PreallocatorMixin, SingleOutputSource): Maximum number of requests to queue before rejecting requests. If `None` then `config.edge_buffer_size` will be used. num_server_threads : int, default None - Number of threads to use for the HTTP server. If `None` then `os.cpu_count()` will be used. + Number of threads to use for the HTTP server. If `None` then `len(os.sched_getaffinity(0))` will be used. max_payload_size : int, default 10 The maximum size in megabytes of the payload that the server will accept in a single request. request_timeout_secs : int, default 30 @@ -117,7 +117,7 @@ def __init__(self, self._sleep_time = sleep_time self._queue_timeout = queue_timeout self._max_queue_size = max_queue_size or config.edge_buffer_size - self._num_server_threads = num_server_threads or os.cpu_count() + self._num_server_threads = num_server_threads or len(os.sched_getaffinity(0)) self._max_payload_size_bytes = max_payload_size * 1024 * 1024 self._request_timeout_secs = request_timeout_secs self._lines = lines diff --git a/python/morpheus/morpheus/stages/output/http_server_sink_stage.py b/python/morpheus/morpheus/stages/output/http_server_sink_stage.py index aac5a93b99..dcf59d3864 100644 --- a/python/morpheus/morpheus/stages/output/http_server_sink_stage.py +++ b/python/morpheus/morpheus/stages/output/http_server_sink_stage.py @@ -65,7 +65,7 @@ class HttpServerSinkStage(PassThruTypeMixin, SinglePortStage): Maximum number of requests to queue before rejecting requests. If `None` then `config.edge_buffer_size` will be used. Once the queue is full, the incoming edge buffer will begin to fill up. num_server_threads : int, default None - Number of threads to use for the HTTP server. If `None` then `os.cpu_count()` will be used. + Number of threads to use for the HTTP server. If `None` then `len(os.sched_getaffinity(0))` will be used. max_rows_per_response : int, optional Maximum number of rows to include in a single response, by default 10000. overflow_pct: float, optional @@ -103,7 +103,7 @@ def __init__(self, self._port = port self._endpoint = endpoint self._method = method - self._num_server_threads = num_server_threads or os.cpu_count() + self._num_server_threads = num_server_threads or len(os.sched_getaffinity(0)) self._max_rows_per_response = max_rows_per_response self._overflow_pct = overflow_pct self._request_timeout_secs = request_timeout_secs diff --git a/tests/common/test_http_server.py b/tests/common/test_http_server.py index 238ad51e5c..256ab1a5ff 100644 --- a/tests/common/test_http_server.py +++ b/tests/common/test_http_server.py @@ -42,7 +42,7 @@ def make_parse_fn(status: HTTPStatus = HTTPStatus.OK, @pytest.mark.parametrize("method", ["GET", "POST", "PUT"]) @pytest.mark.parametrize("use_callback", [True, False]) @pytest.mark.parametrize("use_context_mgr", [True, False]) -@pytest.mark.parametrize("num_threads", [1, 2, min(8, os.cpu_count())]) +@pytest.mark.parametrize("num_threads", [1, 2, min(8, len(os.sched_getaffinity(0)))]) @pytest.mark.parametrize("status,content_type,content", [(HTTPStatus.OK, MimeTypes.TEXT.value, "OK"), (HTTPStatus.OK, MimeTypes.JSON.value, '{"test": "OK"}'), From 667b51f86d548d6fd5679df0c627f5068a3ad062 Mon Sep 17 00:00:00 2001 From: Christopher Harris Date: Fri, 6 Sep 2024 14:29:20 -0500 Subject: [PATCH 4/5] ensure columns are strings before concatenation (#1857) Closes #1849 Ensures all columns are string columns prior to attempting string concatenation in `column_info.py`. ## By Submitting this PR I confirm: - I am familiar with the [Contributing Guidelines](https://github.com/nv-morpheus/Morpheus/blob/main/docs/source/developer_guide/contributing.md). - When the PR is ready for review, new or existing tests cover these changes. - When the PR is ready for review, the documentation is up to date with these changes. Authors: - Christopher Harris (https://github.com/cwharris) Approvers: - Michael Demoret (https://github.com/mdemoret-nv) URL: https://github.com/nv-morpheus/Morpheus/pull/1857 --- python/morpheus/morpheus/utils/column_info.py | 4 ++-- tests/test_column_info.py | 7 +++++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/python/morpheus/morpheus/utils/column_info.py b/python/morpheus/morpheus/utils/column_info.py index eaef229666..80f9a73db3 100644 --- a/python/morpheus/morpheus/utils/column_info.py +++ b/python/morpheus/morpheus/utils/column_info.py @@ -469,9 +469,9 @@ def _process_column(self, df: pd.DataFrame) -> pd.Series: The processed column as a string Series. """ - first_col = df[self.input_columns[0]] + first_col = df[self.input_columns[0]].astype(str) - return first_col.str.cat(others=df[self.input_columns[1:]], sep=self.sep) + return first_col.str.cat(others=df[self.input_columns[1:]].astype(str), sep=self.sep) @dataclasses.dataclass diff --git a/tests/test_column_info.py b/tests/test_column_info.py index c40e7854ac..f117ca9d9f 100644 --- a/tests/test_column_info.py +++ b/tests/test_column_info.py @@ -149,8 +149,11 @@ def test_string_cat_column(): ], sep=", ") - with pytest.raises(Exception): - string_cat_col_with_int._process_column(df) + actual = string_cat_col_with_int._process_column(df) + + expected = pd.Series(["New York, 10001", "Dallas, 75001", "Austin, 73301"]) + + assert actual.equals(expected) @pytest.mark.use_python From 3a28c5b9b30dd8c6babe92a0d348f5bb38090ffe Mon Sep 17 00:00:00 2001 From: Christopher Harris Date: Fri, 6 Sep 2024 14:30:36 -0500 Subject: [PATCH 5/5] Warn when `Config`'s `pipeline_batch_size < model_max_batch_size` (#1858) Closes #420 ## By Submitting this PR I confirm: - I am familiar with the [Contributing Guidelines](https://github.com/nv-morpheus/Morpheus/blob/main/docs/source/developer_guide/contributing.md). - When the PR is ready for review, new or existing tests cover these changes. - When the PR is ready for review, the documentation is up to date with these changes. Authors: - Christopher Harris (https://github.com/cwharris) Approvers: - Michael Demoret (https://github.com/mdemoret-nv) URL: https://github.com/nv-morpheus/Morpheus/pull/1858 --- python/morpheus/morpheus/config.py | 27 +++++++++++++++++++++++++-- tests/test_config.py | 23 +++++++++++++++++++++++ 2 files changed, 48 insertions(+), 2 deletions(-) diff --git a/python/morpheus/morpheus/config.py b/python/morpheus/morpheus/config.py index 074d1515aa..15e0416819 100644 --- a/python/morpheus/morpheus/config.py +++ b/python/morpheus/morpheus/config.py @@ -208,10 +208,10 @@ class Config(ConfigBase): mode: PipelineModes = PipelineModes.OTHER + _pipeline_batch_size: int = 256 + _model_max_batch_size: int = 8 feature_length: int = 256 - pipeline_batch_size: int = 256 num_threads: int = 1 - model_max_batch_size: int = 8 edge_buffer_size: int = 128 # Class labels to convert class index to label. @@ -220,6 +220,29 @@ class Config(ConfigBase): ae: ConfigAutoEncoder = dataclasses.field(default=None) fil: ConfigFIL = dataclasses.field(default=None) + @property + def pipeline_batch_size(self): + return self._pipeline_batch_size + + @pipeline_batch_size.setter + def pipeline_batch_size(self, value: int): + self._pipeline_batch_size = value + self._validate_config() + + @property + def model_max_batch_size(self): + return self._model_max_batch_size + + @model_max_batch_size.setter + def model_max_batch_size(self, value: int): + self._model_max_batch_size = value + self._validate_config() + + def _validate_config(self): + if self._pipeline_batch_size < self._model_max_batch_size: + logging.warning("Config has `pipeline_batch_size < model_max_batch_size` which effectively limits " + "`model_max_batch_size`. This may reduce performance.") + def save(self, filename: str): """ Save Config to file. diff --git a/tests/test_config.py b/tests/test_config.py index a1517ad355..9817b5ab09 100755 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -15,9 +15,12 @@ # limitations under the License. import json +import logging import os from unittest import mock +import pytest + import morpheus import morpheus.config from _utils import assert_path_exists @@ -104,3 +107,23 @@ def test_to_string(config): conf_str = config.to_string() assert isinstance(conf_str, str) assert isinstance(json.loads(conf_str), dict) + + +def test_warning_model_batch_size_less_than_pipeline_batch_size(caplog: pytest.LogCaptureFixture): + config = morpheus.config.Config() + config.pipeline_batch_size = 256 + with caplog.at_level(logging.WARNING): + config.model_max_batch_size = 257 + assert len(caplog.records) == 1 + import re + assert re.match(".*pipeline_batch_size < model_max_batch_size.*", caplog.records[0].message) is not None + + +def test_warning_pipeline_batch_size_less_than_model_batch_size(caplog: pytest.LogCaptureFixture): + config = morpheus.config.Config() + config.model_max_batch_size = 8 + with caplog.at_level(logging.WARNING): + config.pipeline_batch_size = 7 + assert len(caplog.records) == 1 + import re + assert re.match(".*pipeline_batch_size < model_max_batch_size.*", caplog.records[0].message) is not None