From 1d02332d6a6fa57a1198565d036ca646b2c6e50e Mon Sep 17 00:00:00 2001 From: Yuchen Zhang <134643420+yczhang-nv@users.noreply.github.com> Date: Wed, 11 Sep 2024 09:07:51 -0700 Subject: [PATCH 1/8] Removing support for `MultiMessage` from stages (#1803) Part of finalizing `MultiMessage` deprecation. Previously the stages support processing both `ControlMessage` and `MultiMessage` - this PR removes the code paths for `MultiMessage` from the following stages (from both C++ & Python implementation): - Pre-process stages: - `deserialize_stage` - `preprocess_base_stage` - `preprocess_ae_stage` - `preprocess_fil_stage` - `preprocess_nlp_stage` - `train_ae_stage` - Post-process stages: - `serialize_stage` - `add_scores_stage_base_stage` - `add_scores_stage` - `add_classification_stage` - `filter_detections_stage` - `generate_viz_frames_stage` - `ml_flow_drift_stage` - `time_series_stage` - `validation_stage` - Inference stages: - `auto_encoder_inference_stage` - `identity_inference_stage` - `inference_stage` - `pytorch_inference_stage` - `triton_inference_stage` - Output stages: - `write_to_vector_db_stage` The related unit tests are also updated to use only `ControlMessage`. Morpheus examples that imports stages/messages directly (rather than creating their own version) are also updated to get CI passed for this PR. Closes #1887 ## By Submitting this PR I confirm: - I am familiar with the [Contributing Guidelines](https://github.com/nv-morpheus/Morpheus/blob/main/docs/source/developer_guide/contributing.md). - When the PR is ready for review, new or existing tests cover these changes. - When the PR is ready for review, the documentation is up to date with these changes. Authors: - Yuchen Zhang (https://github.com/yczhang-nv) - Michael Demoret (https://github.com/mdemoret-nv) Approvers: - Michael Demoret (https://github.com/mdemoret-nv) - David Gardner (https://github.com/dagardner-nv) URL: https://github.com/nv-morpheus/Morpheus/pull/1803 --- .pre-commit-config.yaml | 2 +- ci/scripts/documentation_checks.sh | 2 +- ci/scripts/download_kafka.py | 2 +- ci/scripts/gitutils.py | 2 +- docs/source/devcontainer.md | 2 +- .../guides/9_control_messages.md | 25 +- .../examples/abp_nvsmi_detection/README.md | 2 +- .../examples/abp_pcap_detection/README.md | 2 +- docs/source/examples/doca/README.md | 2 +- .../gnn_fraud_detection_pipeline/README.md | 2 +- docs/source/examples/llm/agents/README.md | 2 +- docs/source/examples/llm/completion/README.md | 2 +- docs/source/examples/llm/rag/README.md | 2 +- docs/source/examples/llm/vdb_upload/README.md | 2 +- docs/source/examples/log_parsing/README.md | 2 +- .../examples/nlp_si_detection/README.md | 2 +- .../examples/ransomware_detection/README.md | 2 +- .../examples/root_cause_analysis/README.md | 2 +- .../examples/sid_visualization/README.md | 2 +- .../abp_pcap_preprocessing.py | 36 +- examples/llm/agents/common.py | 6 +- examples/llm/completion/pipeline.py | 4 +- examples/llm/rag/standalone_pipeline.py | 4 +- examples/log_parsing/inference.py | 66 +-- examples/log_parsing/postprocessing.py | 15 +- .../stages/create_features.py | 26 +- .../stages/preprocessing.py | 26 +- manifest.yaml | 2 +- models/docker/build_container.sh | 2 +- .../morpheus/_lib/common/__init__.pyi | 264 ++++++--- .../morpheus/morpheus/_lib/common/module.cpp | 2 +- .../morpheus/_lib/cudf_helpers/__init__.pyi | 8 +- .../include/morpheus/messages/control.hpp | 1 + .../morpheus/objects/tensor_object.hpp | 7 +- .../morpheus/stages/add_classification.hpp | 41 +- .../include/morpheus/stages/add_scores.hpp | 39 +- .../morpheus/stages/add_scores_stage_base.hpp | 43 +- .../include/morpheus/stages/deserialize.hpp | 141 ++--- .../morpheus/stages/filter_detections.hpp | 115 ++-- .../stages/inference_client_stage.hpp | 79 +-- .../include/morpheus/stages/preallocate.hpp | 19 +- .../morpheus/stages/preprocess_fil.hpp | 60 +- .../morpheus/stages/preprocess_nlp.hpp | 75 +-- .../include/morpheus/stages/serialize.hpp | 66 +-- .../morpheus/_lib/messages/__init__.pyi | 525 ++++++++++++++---- .../morpheus/_lib/modules/__init__.pyi | 5 +- .../morpheus/_lib/src/io/data_loader.cpp | 1 + .../morpheus/_lib/src/messages/meta.cpp | 2 +- .../_lib/src/stages/add_classification.cpp | 42 +- .../morpheus/_lib/src/stages/add_scores.cpp | 41 +- .../_lib/src/stages/add_scores_stage_base.cpp | 122 +--- .../morpheus/_lib/src/stages/deserialize.cpp | 102 +++- .../_lib/src/stages/filter_detections.cpp | 177 ++---- .../src/stages/inference_client_stage.cpp | 231 +++----- .../_lib/src/stages/preprocess_fil.cpp | 272 +++------ .../_lib/src/stages/preprocess_nlp.cpp | 184 ++---- .../morpheus/_lib/src/stages/serialize.cpp | 89 +-- .../_lib/src/utilities/http_server.cpp | 2 +- .../morpheus/_lib/stages/__init__.pyi | 277 ++++++--- .../morpheus/morpheus/_lib/stages/module.cpp | 245 +++----- .../_lib/tests/io/test_data_loader.cpp | 1 + .../tests/stages/test_add_classification.cpp | 42 +- .../_lib/tests/stages/test_add_scores.cpp | 48 +- .../_lib/tests/stages/test_preprocess_fil.cpp | 28 +- .../_lib/tests/stages/test_preprocess_nlp.cpp | 63 +-- .../stages/test_triton_inference_stage.cpp | 114 ++-- .../filter_detections_controller.py | 74 +-- .../controllers/monitor_controller.py | 42 +- .../controllers/serialize_controller.py | 17 +- python/morpheus/morpheus/messages/__init__.py | 18 +- .../morpheus/messages/message_base.py | 8 - python/morpheus/morpheus/models/__init__.py | 2 +- .../morpheus/modules/general/monitor.py | 2 +- .../modules/output/write_to_vector_db.py | 58 +- .../modules/preprocess/deserialize.py | 64 +-- .../modules/schemas/deserialize_schema.py | 4 +- .../schemas/multi_file_source_schema.py | 2 +- .../modules/schemas/rss_source_schema.py | 2 +- python/morpheus/morpheus/modules/serialize.py | 2 +- python/morpheus/morpheus/pipeline/__init__.py | 2 +- .../morpheus/pipeline/boundary_stage_mixin.py | 2 +- ...sage_stage.py => control_message_stage.py} | 17 +- .../morpheus/pipeline/preallocator_mixin.py | 13 +- .../morpheus/pipeline/stage_decorator.py | 7 +- python/morpheus/morpheus/service/__init__.py | 2 +- .../inference/auto_encoder_inference_stage.py | 73 ++- .../inference/identity_inference_stage.py | 21 +- .../stages/inference/inference_stage.py | 125 ++--- .../inference/pytorch_inference_stage.py | 25 +- .../inference/triton_inference_stage.py | 41 +- .../stages/output/in_memory_sink_stage.py | 2 +- .../stages/output/write_to_vector_db_stage.py | 6 +- .../postprocess/add_classifications_stage.py | 11 +- .../stages/postprocess/add_scores_stage.py | 5 +- .../postprocess/add_scores_stage_base.py | 50 +- .../postprocess/filter_detections_stage.py | 30 +- .../postprocess/generate_viz_frames_stage.py | 29 +- .../stages/postprocess/ml_flow_drift_stage.py | 19 +- .../stages/postprocess/serialize_stage.py | 24 +- .../stages/postprocess/timeseries_stage.py | 64 +-- .../stages/postprocess/validation_stage.py | 5 +- .../stages/preprocess/deserialize_stage.py | 53 +- .../stages/preprocess/preprocess_ae_stage.py | 81 +-- .../preprocess/preprocess_base_stage.py | 33 +- .../stages/preprocess/preprocess_fil_stage.py | 74 +-- .../stages/preprocess/preprocess_nlp_stage.py | 109 +--- .../stages/preprocess/train_ae_stage.py | 33 +- python/morpheus/morpheus/utils/concat_df.py | 21 +- python/morpheus/morpheus/utils/seed.py | 2 +- .../morpheus/morpheus/utils/type_aliases.py | 2 +- .../_lib/include/morpheus_llm/llm/fwd.hpp | 2 +- .../morpheus_llm/_lib/llm/CMakeLists.txt | 2 +- .../morpheus_llm/_lib/src/llm/llm_engine.cpp | 3 +- .../llm/test_llm_task_handler_runner.cpp | 1 + .../_lib/tests/llm/test_utils.cpp | 1 + .../morpheus_llm/llm/nodes/__init__.py | 2 +- .../llm/nodes/llm_generate_node.py | 2 +- .../morpheus_llm/llm/services/__init__.py | 2 +- .../morpheus_llm/llm/services/llm_service.py | 2 +- .../llm/services/utils/__init__.py | 2 +- .../llm/task_handlers/__init__.py | 2 +- .../llm/task_handlers/simple_task_handler.py | 2 +- .../morpheus_llm/stages/llm/__init__.py | 2 +- tests/_utils/inference_worker.py | 4 +- tests/_utils/llm.py | 2 +- tests/_utils/stages/check_pre_alloc.py | 8 +- tests/_utils/stages/conv_msg.py | 38 +- .../test_bench_agents_simple_pipeline.py | 4 +- .../test_bench_completion_pipeline.py | 4 +- .../test_bench_rag_standalone_pipeline.py | 4 +- .../test_abp_pcap_preprocessing.py | 58 +- tests/examples/log_parsing/test_inference.py | 137 ++--- .../log_parsing/test_postprocessing.py | 15 +- .../test_create_features.py | 31 +- .../test_preprocessing.py | 16 +- tests/llm/nodes/conftest.py | 2 +- .../nodes/test_langchain_agent_node_pipe.py | 4 +- .../llm/nodes/test_llm_generate_node_pipe.py | 4 +- .../llm/nodes/test_llm_retriever_node_pipe.py | 6 +- .../nodes/test_prompt_template_node_pipe.py | 4 +- tests/llm/nodes/test_rag_node_pipe.py | 4 +- tests/llm/services/test_llm_service.py | 2 +- tests/llm/services/test_llm_service_pipe.py | 4 +- tests/llm/test_agents_simple_pipe.py | 4 +- tests/llm/test_completion_pipe.py | 7 +- ...test_extractor_simple_task_handler_pipe.py | 4 +- tests/llm/test_rag_standalone_pipe.py | 4 +- tests/pipeline/test_pipeline.py | 8 +- tests/pipeline/test_preallocation_pipe.py | 11 +- tests/pipeline/test_stage_decorator.py | 11 +- tests/stages/test_deserialize_stage_pipe.py | 31 +- tests/stages/test_filter_detections_stage.py | 109 +--- .../stages/test_generate_viz_frames_stage.py | 31 +- tests/stages/test_llm_engine_stage_pipe.py | 4 +- tests/stages/test_ml_flow_drift_stage.py | 21 +- tests/stages/test_preprocess_ae_stage.py | 26 +- tests/stages/test_preprocess_fil_stage.py | 45 +- tests/stages/test_preprocess_nlp_stage.py | 71 --- tests/stages/test_timeseries_stage.py | 13 - tests/stages/test_validation_stage.py | 20 +- tests/test_abp.py | 38 +- tests/test_add_classifications_stage.py | 46 +- tests/test_add_classifications_stage_pipe.py | 35 +- tests/test_add_scores_stage.py | 46 +- tests/test_add_scores_stage_pipe.py | 55 +- tests/test_concat_df.py | 12 +- tests/test_dfp.py | 16 +- tests/test_file_in_out.py | 4 +- ...est_filter_detections_stage_column_pipe.py | 2 +- tests/test_filter_detections_stage_pipe.py | 35 +- tests/test_inference_stage.py | 101 ++-- tests/test_inference_worker.py | 50 +- tests/test_messages.py | 65 --- ...st_milvus_write_to_vector_db_stage_pipe.py | 52 -- tests/test_monitor_stage.py | 7 +- tests/test_multi_message.py | 42 -- tests/test_serialize_stage.py | 16 +- tests/test_sid.py | 19 +- tests/test_sid_kafka.py | 2 +- tests/test_triton_inference_stage.py | 21 +- 180 files changed, 2514 insertions(+), 4043 deletions(-) rename python/morpheus/morpheus/pipeline/{multi_message_stage.py => control_message_stage.py} (75%) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 5b0e6c5fca..890c82a115 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/ci/scripts/documentation_checks.sh b/ci/scripts/documentation_checks.sh index 4db5a35589..0d1860c446 100755 --- a/ci/scripts/documentation_checks.sh +++ b/ci/scripts/documentation_checks.sh @@ -1,5 +1,5 @@ #!/bin/bash -# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/ci/scripts/download_kafka.py b/ci/scripts/download_kafka.py index 85edfbfa47..2ba0625ac3 100644 --- a/ci/scripts/download_kafka.py +++ b/ci/scripts/download_kafka.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -# SPDX-FileCopyrightText: Copyright (c) 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/ci/scripts/gitutils.py b/ci/scripts/gitutils.py index d3eb6e384a..ead1d7ea96 100755 --- a/ci/scripts/gitutils.py +++ b/ci/scripts/gitutils.py @@ -202,7 +202,7 @@ def add_files(*files_to_add): @staticmethod def get_file_add_date(file_path): """Return the date a given file was added to git""" - date_str = _git("log", "--follow", "--format=%as", "--", file_path, "|", "tail", "-n 1") + date_str = _run_cmd(f"git log --follow --format=%as -- {file_path} | tail -n 1") return datetime.datetime.strptime(date_str, "%Y-%m-%d") @staticmethod diff --git a/docs/source/devcontainer.md b/docs/source/devcontainer.md index d6e7bd58a3..ae9f3a80da 100644 --- a/docs/source/devcontainer.md +++ b/docs/source/devcontainer.md @@ -1,5 +1,5 @@