diff --git a/docs/design/index.rst b/docs/design/index.rst index b0e2b6cc..0a29f784 100644 --- a/docs/design/index.rst +++ b/docs/design/index.rst @@ -6,5 +6,6 @@ This section contains design documentation for the ros2_medkit project packages. .. toctree:: :maxdepth: 1 + ros2_medkit_fault_manager/index ros2_medkit_gateway/index diff --git a/docs/design/ros2_medkit_fault_manager b/docs/design/ros2_medkit_fault_manager new file mode 120000 index 00000000..6ee4eae4 --- /dev/null +++ b/docs/design/ros2_medkit_fault_manager @@ -0,0 +1 @@ +../../src/ros2_medkit_fault_manager/design \ No newline at end of file diff --git a/src/ros2_medkit_fault_manager/CMakeLists.txt b/src/ros2_medkit_fault_manager/CMakeLists.txt new file mode 100644 index 00000000..1150364c --- /dev/null +++ b/src/ros2_medkit_fault_manager/CMakeLists.txt @@ -0,0 +1,116 @@ +# Copyright 2025 mfaferek93 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +cmake_minimum_required(VERSION 3.8) +project(ros2_medkit_fault_manager) + +set(CMAKE_CXX_STANDARD 23) +set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(CMAKE_EXPORT_COMPILE_COMMANDS ON) + +if(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang") + add_compile_options(-Wall -Wextra -Wpedantic -Wshadow -Wconversion) +endif() + +# Code coverage option +option(ENABLE_COVERAGE "Enable code coverage reporting" OFF) +if(ENABLE_COVERAGE) + message(STATUS "Code coverage enabled") + add_compile_options(--coverage -O0 -g) + add_link_options(--coverage) +endif() + +find_package(ament_cmake REQUIRED) +find_package(rclcpp REQUIRED) +find_package(ros2_medkit_msgs REQUIRED) + +# Library target (shared between executable and tests) +add_library(fault_manager_lib STATIC + src/fault_manager_node.cpp + src/fault_storage.cpp +) + +target_include_directories(fault_manager_lib PUBLIC + $ + $ +) + +ament_target_dependencies(fault_manager_lib + rclcpp + ros2_medkit_msgs +) + +if(ENABLE_COVERAGE) + target_compile_options(fault_manager_lib PRIVATE --coverage -O0 -g) + target_link_options(fault_manager_lib PRIVATE --coverage) +endif() + +# Executable target +add_executable(fault_manager_node src/main.cpp) +target_link_libraries(fault_manager_node fault_manager_lib) + +# Apply coverage flags to executable +if(ENABLE_COVERAGE) + target_compile_options(fault_manager_node PRIVATE --coverage -O0 -g) + target_link_options(fault_manager_node PRIVATE --coverage) +endif() + +# Install targets +install(TARGETS fault_manager_node + DESTINATION lib/${PROJECT_NAME} +) + +install(DIRECTORY include/ + DESTINATION include +) + +install(DIRECTORY launch + DESTINATION share/${PROJECT_NAME} +) + +# Testing +if(BUILD_TESTING) + find_package(ament_lint_auto REQUIRED) + find_package(ament_cmake_gtest REQUIRED) + find_package(launch_testing_ament_cmake REQUIRED) + + set(ament_cmake_clang_format_CONFIG_FILE "${CMAKE_CURRENT_SOURCE_DIR}/../../.clang-format") + set(ament_cmake_clang_tidy_CONFIG_FILE "${CMAKE_CURRENT_SOURCE_DIR}/../../.clang-tidy") + list(APPEND AMENT_LINT_AUTO_EXCLUDE ament_cmake_uncrustify ament_cmake_cpplint) + ament_lint_auto_find_test_dependencies() + + # Unit tests + ament_add_gtest(test_fault_manager test/test_fault_manager.cpp) + target_link_libraries(test_fault_manager fault_manager_lib) + ament_target_dependencies(test_fault_manager rclcpp ros2_medkit_msgs) + + # Apply coverage flags to test target + if(ENABLE_COVERAGE) + target_compile_options(test_fault_manager PRIVATE --coverage -O0 -g) + target_link_options(test_fault_manager PRIVATE --coverage) + endif() + + # Integration tests + install(DIRECTORY test + DESTINATION share/${PROJECT_NAME} + ) + + add_launch_test( + test/test_integration.test.py + TARGET test_integration + TIMEOUT 60 + ) +endif() + +ament_package() diff --git a/src/ros2_medkit_fault_manager/README.md b/src/ros2_medkit_fault_manager/README.md new file mode 100644 index 00000000..0775947a --- /dev/null +++ b/src/ros2_medkit_fault_manager/README.md @@ -0,0 +1,66 @@ +# ros2_medkit_fault_manager + +Central fault manager node for the ros2_medkit fault management system. + +## Overview + +The FaultManager node provides a central point for fault aggregation and lifecycle management. +It receives fault reports from multiple sources, aggregates them by `fault_code`, and provides +query and clearing interfaces. + +## Services + +| Service | Type | Description | +|---------|------|-------------| +| `~/report_fault` | `ros2_medkit_msgs/srv/ReportFault` | Report a fault occurrence | +| `~/get_faults` | `ros2_medkit_msgs/srv/GetFaults` | Query faults with filtering | +| `~/clear_fault` | `ros2_medkit_msgs/srv/ClearFault` | Clear/acknowledge a fault | + +## Features + +- **Multi-source aggregation**: Same `fault_code` from different sources creates a single fault +- **Occurrence tracking**: Counts total reports and tracks all reporting sources +- **Severity escalation**: Fault severity is updated if a higher severity is reported +- **Status lifecycle**: PENDING → CONFIRMED → CLEARED (automatic status transitions in Issue #6) + +## Usage + +### Launch + +```bash +ros2 launch ros2_medkit_fault_manager fault_manager.launch.py +``` + +### Manual Testing + +```bash +# Report a fault +ros2 service call /fault_manager/report_fault ros2_medkit_msgs/srv/ReportFault \ + "{fault_code: 'MOTOR_OVERHEAT', severity: 2, description: 'Motor temp exceeded', source_id: '/motor_node'}" + +# Get all faults (including PENDING) +ros2 service call /fault_manager/get_faults ros2_medkit_msgs/srv/GetFaults \ + "{filter_by_severity: false, severity: 0, statuses: ['PENDING', 'CONFIRMED']}" + +# Clear a fault +ros2 service call /fault_manager/clear_fault ros2_medkit_msgs/srv/ClearFault \ + "{fault_code: 'MOTOR_OVERHEAT'}" +``` + +## Building + +```bash +colcon build --packages-select ros2_medkit_fault_manager +source install/setup.bash +``` + +## Testing + +```bash +colcon test --packages-select ros2_medkit_fault_manager +colcon test-result --verbose +``` + +## License + +Apache-2.0 diff --git a/src/ros2_medkit_fault_manager/design/architecture.puml b/src/ros2_medkit_fault_manager/design/architecture.puml new file mode 100644 index 00000000..e91742e7 --- /dev/null +++ b/src/ros2_medkit_fault_manager/design/architecture.puml @@ -0,0 +1,93 @@ +@startuml ros2_medkit_fault_manager_architecture + +skinparam linetype ortho +skinparam classAttributeIconSize 0 + +title ROS 2 Medkit Fault Manager - Class Architecture + +package "ROS 2 Framework" { + class "rclcpp::Node" { + +create_service() + +get_logger() + +now() + } +} + +package "ros2_medkit_msgs" { + class "msg::Fault" { + +fault_code: string + +severity: uint8 + +description: string + +first_occurred: Time + +last_occurred: Time + +occurrence_count: uint32 + +status: string + +reporting_sources: string[] + } + + class "srv::ReportFault" { + +Request: fault_code, severity, description, source_id + +Response: success, message + } + + class "srv::GetFaults" { + +Request: filter_by_severity, severity, statuses + +Response: faults[] + } + + class "srv::ClearFault" { + +Request: fault_code + +Response: success, message + } +} + +package "ros2_medkit_fault_manager" { + + class FaultManagerNode { + + get_storage(): FaultStorage& + } + + abstract class FaultStorage <> { + + {abstract} report_fault(): bool + + {abstract} get_faults(): vector + + {abstract} get_fault(): optional + + {abstract} clear_fault(): bool + + {abstract} size(): size_t + + {abstract} contains(): bool + } + + class InMemoryFaultStorage { + + report_fault(): bool + + get_faults(): vector + + get_fault(): optional + + clear_fault(): bool + + size(): size_t + + contains(): bool + } + + class FaultState <> { + + to_msg(): Fault + } +} + +' Relationships + +' Inheritance +FaultManagerNode -up-|> "rclcpp::Node" : extends +InMemoryFaultStorage -up-|> FaultStorage : implements + +' Composition +FaultManagerNode *-down-> InMemoryFaultStorage : owns + +' InMemoryFaultStorage contains FaultStates +InMemoryFaultStorage o-right-> FaultState : contains many + +' FaultState converts to message +FaultState ..> "msg::Fault" : converts to + +' Node uses service types +FaultManagerNode ..> "srv::ReportFault" : handles +FaultManagerNode ..> "srv::GetFaults" : handles +FaultManagerNode ..> "srv::ClearFault" : handles + +@enduml diff --git a/src/ros2_medkit_fault_manager/design/index.rst b/src/ros2_medkit_fault_manager/design/index.rst new file mode 100644 index 00000000..eceef500 --- /dev/null +++ b/src/ros2_medkit_fault_manager/design/index.rst @@ -0,0 +1,204 @@ +ros2_medkit_fault_manager +========================= + +This section contains design documentation for the ros2_medkit_fault_manager project. + +Architecture +------------ + +The following diagram shows the relationships between the main components of the fault manager. + +.. plantuml:: + :caption: ROS 2 Medkit Fault Manager Class Architecture + + @startuml ros2_medkit_fault_manager_architecture + + skinparam linetype ortho + skinparam classAttributeIconSize 0 + + title ROS 2 Medkit Fault Manager - Class Architecture + + package "ROS 2 Framework" { + class "rclcpp::Node" { + +create_service() + +get_logger() + +now() + } + } + + package "ros2_medkit_msgs" { + class "msg::Fault" { + +fault_code: string + +severity: uint8 + +description: string + +first_occurred: Time + +last_occurred: Time + +occurrence_count: uint32 + +status: string + +reporting_sources: string[] + } + + class "srv::ReportFault" { + +Request: fault_code, severity, description, source_id + +Response: success, message + } + + class "srv::GetFaults" { + +Request: filter_by_severity, severity, statuses + +Response: faults[] + } + + class "srv::ClearFault" { + +Request: fault_code + +Response: success, message + } + } + + package "ros2_medkit_fault_manager" { + + class FaultManagerNode { + + get_storage(): FaultStorage& + } + + abstract class FaultStorage <> { + + {abstract} report_fault(): bool + + {abstract} get_faults(): vector + + {abstract} get_fault(): optional + + {abstract} clear_fault(): bool + + {abstract} size(): size_t + + {abstract} contains(): bool + } + + class InMemoryFaultStorage { + + report_fault(): bool + + get_faults(): vector + + get_fault(): optional + + clear_fault(): bool + + size(): size_t + + contains(): bool + } + + class FaultState <> { + + to_msg(): Fault + } + } + + ' Relationships + + ' Inheritance + FaultManagerNode -up-|> "rclcpp::Node" : extends + InMemoryFaultStorage -up-|> FaultStorage : implements + + ' Composition + FaultManagerNode *-down-> InMemoryFaultStorage : owns + + ' InMemoryFaultStorage contains FaultStates + InMemoryFaultStorage o-right-> FaultState : contains many + + ' FaultState converts to message + FaultState ..> "msg::Fault" : converts to + + ' Node uses service types + FaultManagerNode ..> "srv::ReportFault" : handles + FaultManagerNode ..> "srv::GetFaults" : handles + FaultManagerNode ..> "srv::ClearFault" : handles + + @enduml + +Main Components +--------------- + +1. **FaultManagerNode** - The main ROS 2 node that provides fault management services + - Extends ``rclcpp::Node`` + - Owns a ``FaultStorage`` implementation for fault state persistence + - Provides three ROS 2 services for fault reporting, querying, and clearing + - Validates input parameters (fault_code, severity, source_id) + - Logs fault lifecycle events at appropriate severity levels + +2. **FaultStorage** - Abstract interface for fault storage backends + - Defines the contract for fault storage implementations + - Enables pluggable storage backends (in-memory, persistent, distributed) + - Future implementations can be added in Issue #8: Fault Persistence Options + +3. **InMemoryFaultStorage** - Thread-safe in-memory implementation of FaultStorage + - Uses ``std::map`` keyed by ``fault_code`` for O(log n) lookups + - Protected by ``std::mutex`` for concurrent service request handling + - Aggregates reports from multiple sources into single fault entries + - Implements severity escalation (higher severity overwrites lower) + - Tracks occurrence counts and all reporting sources + +4. **FaultState** - Internal representation of a fault entry + - Maps directly to ``ros2_medkit_msgs::msg::Fault`` via ``to_msg()`` + - Uses ``std::set`` for reporting_sources to ensure uniqueness + - Tracks first and last occurrence timestamps + - Manages fault status lifecycle (PENDING → CONFIRMED → CLEARED) + +Services +-------- + +~/report_fault +~~~~~~~~~~~~~~ + +Reports a new fault or updates an existing one. + +- **Input validation**: fault_code and source_id cannot be empty, severity must be 0-3 +- **Aggregation**: Same fault_code from different sources creates a single fault entry +- **Severity escalation**: Fault severity is updated if a higher severity is reported +- **Returns**: ``success=true`` with message indicating "New fault" or "Fault updated" + +~/get_faults +~~~~~~~~~~~~ + +Queries faults with optional filtering. + +- **Status filter**: Filter by status (PENDING, CONFIRMED, CLEARED); defaults to CONFIRMED +- **Severity filter**: When ``filter_by_severity=true``, returns only faults of specified severity +- **Returns**: List of ``Fault`` messages matching the filter criteria + +~/clear_fault +~~~~~~~~~~~~~ + +Clears (acknowledges) a fault by setting its status to CLEARED. + +- **Input validation**: fault_code cannot be empty +- **Idempotent**: Clearing an already-cleared fault succeeds +- **Returns**: ``success=true`` if fault existed, ``success=false`` if not found + +Design Decisions +---------------- + +Thread Safety +~~~~~~~~~~~~~ + +All ``FaultStorage`` public methods acquire a mutex lock to ensure thread safety +when handling concurrent service requests. This is essential since ROS 2 service +callbacks may execute on different threads. + +Fault Aggregation +~~~~~~~~~~~~~~~~~ + +Multiple reports of the same ``fault_code`` (from same or different sources) are +aggregated into a single fault entry. This provides: + +- **Deduplication**: Prevents fault flooding from repeated reports +- **Source tracking**: Identifies all sources reporting the same fault +- **Occurrence counting**: Tracks how many times a fault was reported + +Severity Escalation +~~~~~~~~~~~~~~~~~~~ + +When a fault is re-reported with a higher severity, the stored severity is updated. +This ensures the fault reflects the worst-case condition. Severity levels are ordered: +``INFO(0) < WARN(1) < ERROR(2) < CRITICAL(3)``. + +Status Lifecycle +~~~~~~~~~~~~~~~~ + +Faults follow a lifecycle: PENDING → CONFIRMED → CLEARED + +- **PENDING**: Initial status when fault is first reported +- **CONFIRMED**: Status after automatic or manual confirmation (Issue #6) +- **CLEARED**: Status after fault is cleared/acknowledged + +Currently, faults start as PENDING and move to CLEARED when explicitly cleared. +Automatic PENDING → CONFIRMED transitions will be implemented in Issue #6. diff --git a/src/ros2_medkit_fault_manager/include/ros2_medkit_fault_manager/fault_manager_node.hpp b/src/ros2_medkit_fault_manager/include/ros2_medkit_fault_manager/fault_manager_node.hpp new file mode 100644 index 00000000..390ab632 --- /dev/null +++ b/src/ros2_medkit_fault_manager/include/ros2_medkit_fault_manager/fault_manager_node.hpp @@ -0,0 +1,63 @@ +// Copyright 2025 mfaferek93 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include + +#include "rclcpp/rclcpp.hpp" +#include "ros2_medkit_fault_manager/fault_storage.hpp" +#include "ros2_medkit_msgs/srv/clear_fault.hpp" +#include "ros2_medkit_msgs/srv/get_faults.hpp" +#include "ros2_medkit_msgs/srv/report_fault.hpp" + +namespace ros2_medkit_fault_manager { + +/// Central fault manager node +/// +/// Provides service interfaces for fault reporting, querying, and clearing. +/// Stores faults in memory and aggregates reports by fault_code. +class FaultManagerNode : public rclcpp::Node { + public: + explicit FaultManagerNode(const rclcpp::NodeOptions & options = rclcpp::NodeOptions()); + + /// Get read-only access to fault storage (for testing) + const FaultStorage & get_storage() const { + return storage_; + } + + private: + /// Handle ReportFault service request + void handle_report_fault(const std::shared_ptr & request, + const std::shared_ptr & response); + + /// Handle GetFaults service request + void handle_get_faults(const std::shared_ptr & request, + const std::shared_ptr & response); + + /// Handle ClearFault service request + void handle_clear_fault(const std::shared_ptr & request, + const std::shared_ptr & response); + + /// Validate severity value + static bool is_valid_severity(uint8_t severity); + + InMemoryFaultStorage storage_; + + rclcpp::Service::SharedPtr report_fault_srv_; + rclcpp::Service::SharedPtr get_faults_srv_; + rclcpp::Service::SharedPtr clear_fault_srv_; +}; + +} // namespace ros2_medkit_fault_manager diff --git a/src/ros2_medkit_fault_manager/include/ros2_medkit_fault_manager/fault_storage.hpp b/src/ros2_medkit_fault_manager/include/ros2_medkit_fault_manager/fault_storage.hpp new file mode 100644 index 00000000..9a82d9de --- /dev/null +++ b/src/ros2_medkit_fault_manager/include/ros2_medkit_fault_manager/fault_storage.hpp @@ -0,0 +1,115 @@ +// Copyright 2025 mfaferek93 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include +#include +#include +#include + +#include "rclcpp/rclcpp.hpp" +#include "ros2_medkit_msgs/msg/fault.hpp" + +namespace ros2_medkit_fault_manager { + +/// Internal fault state stored in memory +struct FaultState { + std::string fault_code; + uint8_t severity{0}; + std::string description; + rclcpp::Time first_occurred; + rclcpp::Time last_occurred; + uint32_t occurrence_count{0}; + std::string status; + std::set reporting_sources; + + /// Convert to ROS 2 message + ros2_medkit_msgs::msg::Fault to_msg() const; +}; + +/// Abstract interface for fault storage backends +class FaultStorage { + public: + virtual ~FaultStorage() = default; + + /// Store or update a fault report + /// @param fault_code Global fault identifier + /// @param severity Fault severity level + /// @param description Human-readable description + /// @param source_id Reporting source identifier + /// @param timestamp Current time for tracking + /// @return true if this is a new fault, false if existing fault was updated + virtual bool report_fault(const std::string & fault_code, uint8_t severity, const std::string & description, + const std::string & source_id, const rclcpp::Time & timestamp) = 0; + + /// Get faults matching filter criteria + /// @param filter_by_severity Whether to filter by severity + /// @param severity Severity level to filter (if filter_by_severity is true) + /// @param statuses List of statuses to include (empty = CONFIRMED only) + /// @return Vector of matching faults + virtual std::vector get_faults(bool filter_by_severity, uint8_t severity, + const std::vector & statuses) const = 0; + + /// Get a single fault by fault_code + /// @param fault_code The fault code to look up + /// @return The fault if found, nullopt otherwise + virtual std::optional get_fault(const std::string & fault_code) const = 0; + + /// Clear a fault by fault_code + /// @param fault_code The fault code to clear + /// @return true if fault was found and cleared, false if not found + virtual bool clear_fault(const std::string & fault_code) = 0; + + /// Get total number of stored faults + virtual size_t size() const = 0; + + /// Check if a fault exists + virtual bool contains(const std::string & fault_code) const = 0; + + protected: + FaultStorage() = default; + FaultStorage(const FaultStorage &) = default; + FaultStorage & operator=(const FaultStorage &) = default; + FaultStorage(FaultStorage &&) = default; + FaultStorage & operator=(FaultStorage &&) = default; +}; + +/// Thread-safe in-memory fault storage implementation +class InMemoryFaultStorage : public FaultStorage { + public: + InMemoryFaultStorage() = default; + + bool report_fault(const std::string & fault_code, uint8_t severity, const std::string & description, + const std::string & source_id, const rclcpp::Time & timestamp) override; + + std::vector get_faults(bool filter_by_severity, uint8_t severity, + const std::vector & statuses) const override; + + std::optional get_fault(const std::string & fault_code) const override; + + bool clear_fault(const std::string & fault_code) override; + + size_t size() const override; + + bool contains(const std::string & fault_code) const override; + + private: + mutable std::mutex mutex_; + std::map faults_; +}; + +} // namespace ros2_medkit_fault_manager diff --git a/src/ros2_medkit_fault_manager/launch/fault_manager.launch.py b/src/ros2_medkit_fault_manager/launch/fault_manager.launch.py new file mode 100644 index 00000000..b47ec0f8 --- /dev/null +++ b/src/ros2_medkit_fault_manager/launch/fault_manager.launch.py @@ -0,0 +1,27 @@ +# Copyright 2025 mfaferek93 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from launch import LaunchDescription +from launch_ros.actions import Node + + +def generate_launch_description(): + return LaunchDescription([ + Node( + package='ros2_medkit_fault_manager', + executable='fault_manager_node', + name='fault_manager', + output='screen', + ), + ]) diff --git a/src/ros2_medkit_fault_manager/package.xml b/src/ros2_medkit_fault_manager/package.xml new file mode 100644 index 00000000..c9781b4b --- /dev/null +++ b/src/ros2_medkit_fault_manager/package.xml @@ -0,0 +1,27 @@ + + + + ros2_medkit_fault_manager + 0.1.0 + Central fault manager node for ros2_medkit fault management system + + bburda + Apache-2.0 + + ament_cmake + + rclcpp + ros2_medkit_msgs + + ament_lint_auto + ament_lint_common + ament_cmake_clang_format + ament_cmake_clang_tidy + ament_cmake_gtest + launch_testing_ament_cmake + launch_testing_ros + + + ament_cmake + + diff --git a/src/ros2_medkit_fault_manager/src/fault_manager_node.cpp b/src/ros2_medkit_fault_manager/src/fault_manager_node.cpp new file mode 100644 index 00000000..23d1bb3e --- /dev/null +++ b/src/ros2_medkit_fault_manager/src/fault_manager_node.cpp @@ -0,0 +1,115 @@ +// Copyright 2025 mfaferek93 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ros2_medkit_fault_manager/fault_manager_node.hpp" + +namespace ros2_medkit_fault_manager { + +FaultManagerNode::FaultManagerNode(const rclcpp::NodeOptions & options) : Node("fault_manager", options) { + // Create service servers + report_fault_srv_ = create_service( + "~/report_fault", [this](const std::shared_ptr & request, + const std::shared_ptr & response) { + handle_report_fault(request, response); + }); + + get_faults_srv_ = create_service( + "~/get_faults", [this](const std::shared_ptr & request, + const std::shared_ptr & response) { + handle_get_faults(request, response); + }); + + clear_fault_srv_ = create_service( + "~/clear_fault", [this](const std::shared_ptr & request, + const std::shared_ptr & response) { + handle_clear_fault(request, response); + }); + + RCLCPP_INFO(get_logger(), "FaultManager node started"); +} + +void FaultManagerNode::handle_report_fault( + const std::shared_ptr & request, + const std::shared_ptr & response) { + // Validate fault_code + if (request->fault_code.empty()) { + response->success = false; + response->message = "fault_code cannot be empty"; + return; + } + + // Validate severity + if (!is_valid_severity(request->severity)) { + response->success = false; + response->message = "Invalid severity value. Must be 0-3 (INFO, WARN, ERROR, CRITICAL)"; + return; + } + + // Validate source_id + if (request->source_id.empty()) { + response->success = false; + response->message = "source_id cannot be empty"; + return; + } + + // Report the fault + bool is_new = + storage_.report_fault(request->fault_code, request->severity, request->description, request->source_id, now()); + + response->success = true; + if (is_new) { + response->message = "New fault reported: " + request->fault_code; + RCLCPP_INFO(get_logger(), "New fault reported: %s (severity=%d, source=%s)", request->fault_code.c_str(), + request->severity, request->source_id.c_str()); + } else { + response->message = "Fault updated: " + request->fault_code; + RCLCPP_DEBUG(get_logger(), "Fault updated: %s (source=%s)", request->fault_code.c_str(), + request->source_id.c_str()); + } +} + +void FaultManagerNode::handle_get_faults(const std::shared_ptr & request, + const std::shared_ptr & response) { + response->faults = storage_.get_faults(request->filter_by_severity, request->severity, request->statuses); + + RCLCPP_DEBUG(get_logger(), "GetFaults returned %zu faults", response->faults.size()); +} + +void FaultManagerNode::handle_clear_fault( + const std::shared_ptr & request, + const std::shared_ptr & response) { + // Validate fault_code + if (request->fault_code.empty()) { + response->success = false; + response->message = "fault_code cannot be empty"; + return; + } + + bool cleared = storage_.clear_fault(request->fault_code); + + response->success = cleared; + if (cleared) { + response->message = "Fault cleared: " + request->fault_code; + RCLCPP_INFO(get_logger(), "Fault cleared: %s", request->fault_code.c_str()); + } else { + response->message = "Fault not found: " + request->fault_code; + RCLCPP_WARN(get_logger(), "Attempted to clear non-existent fault: %s", request->fault_code.c_str()); + } +} + +bool FaultManagerNode::is_valid_severity(uint8_t severity) { + return severity <= ros2_medkit_msgs::msg::Fault::SEVERITY_CRITICAL; +} + +} // namespace ros2_medkit_fault_manager diff --git a/src/ros2_medkit_fault_manager/src/fault_storage.cpp b/src/ros2_medkit_fault_manager/src/fault_storage.cpp new file mode 100644 index 00000000..8e38af66 --- /dev/null +++ b/src/ros2_medkit_fault_manager/src/fault_storage.cpp @@ -0,0 +1,164 @@ +// Copyright 2025 mfaferek93 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ros2_medkit_fault_manager/fault_storage.hpp" + +#include + +namespace ros2_medkit_fault_manager { + +ros2_medkit_msgs::msg::Fault FaultState::to_msg() const { + ros2_medkit_msgs::msg::Fault msg; + msg.fault_code = fault_code; + msg.severity = severity; + msg.description = description; + msg.first_occurred = first_occurred; + msg.last_occurred = last_occurred; + msg.occurrence_count = occurrence_count; + msg.status = status; + + // Convert set to vector + msg.reporting_sources.reserve(reporting_sources.size()); + for (const auto & source : reporting_sources) { + msg.reporting_sources.push_back(source); + } + + return msg; +} + +bool InMemoryFaultStorage::report_fault(const std::string & fault_code, uint8_t severity, + const std::string & description, const std::string & source_id, + const rclcpp::Time & timestamp) { + std::lock_guard lock(mutex_); + + auto it = faults_.find(fault_code); + if (it == faults_.end()) { + // New fault + FaultState state; + state.fault_code = fault_code; + state.severity = severity; + state.description = description; + state.first_occurred = timestamp; + state.last_occurred = timestamp; + state.occurrence_count = 1; + state.status = ros2_medkit_msgs::msg::Fault::STATUS_PENDING; + state.reporting_sources.insert(source_id); + + faults_.emplace(fault_code, std::move(state)); + return true; + } + + // Existing fault - update + auto & state = it->second; + state.last_occurred = timestamp; + + // Increment occurrence_count with saturation + if (state.occurrence_count < std::numeric_limits::max()) { + ++state.occurrence_count; + } + + // Add source if not already present + state.reporting_sources.insert(source_id); + + // Update severity if higher (severity constants are ordered: INFO=0 < WARN=1 < ERROR=2 < CRITICAL=3) + if (severity > state.severity) { + state.severity = severity; + } + + // Update description if provided + if (!description.empty()) { + state.description = description; + } + + return false; +} + +std::vector +InMemoryFaultStorage::get_faults(bool filter_by_severity, uint8_t severity, + const std::vector & statuses) const { + std::lock_guard lock(mutex_); + + // Determine which statuses to include + std::set status_filter; + if (statuses.empty()) { + // Default: CONFIRMED only + status_filter.insert(ros2_medkit_msgs::msg::Fault::STATUS_CONFIRMED); + } else { + for (const auto & s : statuses) { + // Only add valid statuses (invalid ones are silently ignored) + if (s == ros2_medkit_msgs::msg::Fault::STATUS_PENDING || s == ros2_medkit_msgs::msg::Fault::STATUS_CONFIRMED || + s == ros2_medkit_msgs::msg::Fault::STATUS_CLEARED) { + status_filter.insert(s); + } + } + // If all provided statuses were invalid, default to CONFIRMED for consistency + if (status_filter.empty()) { + status_filter.insert(ros2_medkit_msgs::msg::Fault::STATUS_CONFIRMED); + } + } + + std::vector result; + result.reserve(faults_.size()); + + for (const auto & [code, state] : faults_) { + // Filter by status + if (status_filter.find(state.status) == status_filter.end()) { + continue; + } + + // Filter by severity if requested + if (filter_by_severity && state.severity != severity) { + continue; + } + + result.push_back(state.to_msg()); + } + + return result; +} + +std::optional InMemoryFaultStorage::get_fault(const std::string & fault_code) const { + std::lock_guard lock(mutex_); + + auto it = faults_.find(fault_code); + if (it == faults_.end()) { + return std::nullopt; + } + + return it->second.to_msg(); +} + +bool InMemoryFaultStorage::clear_fault(const std::string & fault_code) { + std::lock_guard lock(mutex_); + + auto it = faults_.find(fault_code); + if (it == faults_.end()) { + return false; + } + + it->second.status = ros2_medkit_msgs::msg::Fault::STATUS_CLEARED; + return true; +} + +size_t InMemoryFaultStorage::size() const { + std::lock_guard lock(mutex_); + return faults_.size(); +} + +bool InMemoryFaultStorage::contains(const std::string & fault_code) const { + std::lock_guard lock(mutex_); + return faults_.find(fault_code) != faults_.end(); +} + +} // namespace ros2_medkit_fault_manager diff --git a/src/ros2_medkit_fault_manager/src/main.cpp b/src/ros2_medkit_fault_manager/src/main.cpp new file mode 100644 index 00000000..26cb967a --- /dev/null +++ b/src/ros2_medkit_fault_manager/src/main.cpp @@ -0,0 +1,26 @@ +// Copyright 2025 mfaferek93 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include "rclcpp/rclcpp.hpp" +#include "ros2_medkit_fault_manager/fault_manager_node.hpp" + +int main(int argc, char * argv[]) { + rclcpp::init(argc, argv); + auto node = std::make_shared(); + rclcpp::spin(node); + rclcpp::shutdown(); + return 0; +} diff --git a/src/ros2_medkit_fault_manager/test/test_fault_manager.cpp b/src/ros2_medkit_fault_manager/test/test_fault_manager.cpp new file mode 100644 index 00000000..f20d58fb --- /dev/null +++ b/src/ros2_medkit_fault_manager/test/test_fault_manager.cpp @@ -0,0 +1,171 @@ +// Copyright 2025 mfaferek93 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include + +#include "rclcpp/rclcpp.hpp" +#include "ros2_medkit_fault_manager/fault_manager_node.hpp" +#include "ros2_medkit_fault_manager/fault_storage.hpp" +#include "ros2_medkit_msgs/msg/fault.hpp" + +using ros2_medkit_fault_manager::FaultManagerNode; +using ros2_medkit_fault_manager::InMemoryFaultStorage; +using ros2_medkit_msgs::msg::Fault; + +class FaultStorageTest : public ::testing::Test { + protected: + InMemoryFaultStorage storage_; +}; + +TEST_F(FaultStorageTest, ReportNewFault) { + rclcpp::Clock clock; + auto timestamp = clock.now(); + + bool is_new = storage_.report_fault("MOTOR_OVERHEAT", Fault::SEVERITY_ERROR, "Motor temperature exceeded threshold", + "/powertrain/motor", timestamp); + + EXPECT_TRUE(is_new); + EXPECT_EQ(storage_.size(), 1u); + EXPECT_TRUE(storage_.contains("MOTOR_OVERHEAT")); +} + +TEST_F(FaultStorageTest, ReportExistingFaultUpdates) { + rclcpp::Clock clock; + auto timestamp1 = clock.now(); + auto timestamp2 = clock.now(); + + storage_.report_fault("MOTOR_OVERHEAT", Fault::SEVERITY_WARN, "Initial report", "/powertrain/motor1", timestamp1); + + bool is_new = + storage_.report_fault("MOTOR_OVERHEAT", Fault::SEVERITY_ERROR, "Second report", "/powertrain/motor2", timestamp2); + + EXPECT_FALSE(is_new); + EXPECT_EQ(storage_.size(), 1u); + + auto fault = storage_.get_fault("MOTOR_OVERHEAT"); + ASSERT_TRUE(fault.has_value()); + EXPECT_EQ(fault->occurrence_count, 2u); + EXPECT_EQ(fault->severity, Fault::SEVERITY_ERROR); // Updated to higher severity + EXPECT_EQ(fault->reporting_sources.size(), 2u); +} + +TEST_F(FaultStorageTest, GetFaultsDefaultReturnsConfirmedOnly) { + rclcpp::Clock clock; + auto timestamp = clock.now(); + + // Report a fault (starts as PENDING) + storage_.report_fault("FAULT_1", Fault::SEVERITY_ERROR, "Test", "/node1", timestamp); + + // Default query should return empty (only PENDING exists) + auto faults = storage_.get_faults(false, 0, {}); + EXPECT_EQ(faults.size(), 0u); +} + +TEST_F(FaultStorageTest, GetFaultsWithPendingStatus) { + rclcpp::Clock clock; + auto timestamp = clock.now(); + + storage_.report_fault("FAULT_1", Fault::SEVERITY_ERROR, "Test", "/node1", timestamp); + + // Query with PENDING status + auto faults = storage_.get_faults(false, 0, {"PENDING"}); + EXPECT_EQ(faults.size(), 1u); + EXPECT_EQ(faults[0].fault_code, "FAULT_1"); + EXPECT_EQ(faults[0].status, Fault::STATUS_PENDING); +} + +TEST_F(FaultStorageTest, GetFaultsFilterBySeverity) { + rclcpp::Clock clock; + auto timestamp = clock.now(); + + storage_.report_fault("FAULT_INFO", Fault::SEVERITY_INFO, "Info", "/node1", timestamp); + storage_.report_fault("FAULT_ERROR", Fault::SEVERITY_ERROR, "Error", "/node1", timestamp); + + // Filter by ERROR severity + auto faults = storage_.get_faults(true, Fault::SEVERITY_ERROR, {"PENDING"}); + EXPECT_EQ(faults.size(), 1u); + EXPECT_EQ(faults[0].fault_code, "FAULT_ERROR"); +} + +TEST_F(FaultStorageTest, ClearFault) { + rclcpp::Clock clock; + auto timestamp = clock.now(); + + storage_.report_fault("MOTOR_OVERHEAT", Fault::SEVERITY_ERROR, "Test", "/node1", timestamp); + + bool cleared = storage_.clear_fault("MOTOR_OVERHEAT"); + EXPECT_TRUE(cleared); + + auto fault = storage_.get_fault("MOTOR_OVERHEAT"); + ASSERT_TRUE(fault.has_value()); + EXPECT_EQ(fault->status, Fault::STATUS_CLEARED); +} + +TEST_F(FaultStorageTest, ClearNonExistentFault) { + bool cleared = storage_.clear_fault("NON_EXISTENT"); + EXPECT_FALSE(cleared); +} + +TEST_F(FaultStorageTest, GetClearedFaults) { + rclcpp::Clock clock; + auto timestamp = clock.now(); + + storage_.report_fault("FAULT_1", Fault::SEVERITY_ERROR, "Test", "/node1", timestamp); + storage_.clear_fault("FAULT_1"); + + // Query cleared faults + auto faults = storage_.get_faults(false, 0, {"CLEARED"}); + EXPECT_EQ(faults.size(), 1u); + EXPECT_EQ(faults[0].status, Fault::STATUS_CLEARED); +} + +TEST_F(FaultStorageTest, InvalidStatusDefaultsToConfirmed) { + rclcpp::Clock clock; + auto timestamp = clock.now(); + + storage_.report_fault("FAULT_1", Fault::SEVERITY_ERROR, "Test", "/node1", timestamp); + + // Query with invalid status - defaults to CONFIRMED (fault is PENDING, so no matches) + auto faults = storage_.get_faults(false, 0, {"INVALID_STATUS"}); + EXPECT_EQ(faults.size(), 0u); +} + +// FaultManagerNode tests +class FaultManagerNodeTest : public ::testing::Test { + protected: + void SetUp() override { + node_ = std::make_shared(); + } + + void TearDown() override { + node_.reset(); + } + + std::shared_ptr node_; +}; + +TEST_F(FaultManagerNodeTest, NodeCreation) { + EXPECT_STREQ(node_->get_name(), "fault_manager"); + EXPECT_EQ(node_->get_storage().size(), 0u); +} + +int main(int argc, char ** argv) { + rclcpp::init(argc, argv); + ::testing::InitGoogleTest(&argc, argv); + int result = RUN_ALL_TESTS(); + rclcpp::shutdown(); + return result; +} diff --git a/src/ros2_medkit_fault_manager/test/test_integration.test.py b/src/ros2_medkit_fault_manager/test/test_integration.test.py new file mode 100644 index 00000000..532cab95 --- /dev/null +++ b/src/ros2_medkit_fault_manager/test/test_integration.test.py @@ -0,0 +1,339 @@ +#!/usr/bin/env python3 +# Copyright 2025 mfaferek93 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Integration tests for ros2_medkit_fault_manager services.""" + +import os +import unittest + +from launch import LaunchDescription +import launch_ros.actions +import launch_testing.actions +import launch_testing.markers +import rclpy +from rclpy.node import Node +from ros2_medkit_msgs.msg import Fault +from ros2_medkit_msgs.srv import ClearFault, GetFaults, ReportFault + + +def get_coverage_env(): + """ + Get environment variables for gcov coverage data collection. + + When running with coverage enabled (ENABLE_COVERAGE=ON), subprocess nodes + need GCOV_PREFIX set to write coverage data to the correct build directory. + This allows integration test coverage to be captured alongside unit tests. + + Returns + ------- + dict + Environment variables dict with GCOV_PREFIX and GCOV_PREFIX_STRIP, + or empty dict if coverage path cannot be determined. + + """ + try: + from ament_index_python.packages import get_package_prefix + pkg_prefix = get_package_prefix('ros2_medkit_fault_manager') + # pkg_prefix is like /path/to/workspace/install/ros2_medkit_fault_manager + # workspace is 2 levels up from install/package_name + workspace = os.path.dirname(os.path.dirname(pkg_prefix)) + build_dir = os.path.join(workspace, 'build', 'ros2_medkit_fault_manager') + + if os.path.exists(build_dir): + # GCOV_PREFIX_STRIP removes leading path components from compiled-in paths + # GCOV_PREFIX prepends the new path for .gcda file output + return { + 'GCOV_PREFIX': build_dir, + 'GCOV_PREFIX_STRIP': str(build_dir.count(os.sep)), + } + except Exception: + # Ignore: if coverage environment cannot be determined, + # return empty dict so tests proceed without coverage data. + pass + return {} + + +def generate_test_description(): + """Generate launch description with fault_manager node.""" + fault_manager_node = launch_ros.actions.Node( + package='ros2_medkit_fault_manager', + executable='fault_manager_node', + name='fault_manager', + output='screen', + additional_env=get_coverage_env(), + ) + + return ( + LaunchDescription([ + fault_manager_node, + launch_testing.actions.ReadyToTest(), + ]), + { + 'fault_manager_node': fault_manager_node, + }, + ) + + +class TestFaultManagerIntegration(unittest.TestCase): + """Integration tests for FaultManager services.""" + + @classmethod + def setUpClass(cls): + """Initialize ROS 2 context.""" + rclpy.init() + cls.node = Node('test_fault_manager_client') + + # Create service clients + cls.report_fault_client = cls.node.create_client( + ReportFault, '/fault_manager/report_fault' + ) + cls.get_faults_client = cls.node.create_client( + GetFaults, '/fault_manager/get_faults' + ) + cls.clear_fault_client = cls.node.create_client( + ClearFault, '/fault_manager/clear_fault' + ) + + # Wait for services to be available + assert cls.report_fault_client.wait_for_service(timeout_sec=10.0), \ + 'report_fault service not available' + assert cls.get_faults_client.wait_for_service(timeout_sec=10.0), \ + 'get_faults service not available' + assert cls.clear_fault_client.wait_for_service(timeout_sec=10.0), \ + 'clear_fault service not available' + + @classmethod + def tearDownClass(cls): + """Shutdown ROS 2 context.""" + cls.node.destroy_node() + rclpy.shutdown() + + def _call_service(self, client, request): + """Call a service and wait for response.""" + future = client.call_async(request) + rclpy.spin_until_future_complete(self.node, future, timeout_sec=5.0) + self.assertIsNotNone(future.result(), 'Service call timed out') + return future.result() + + def test_01_report_fault_success(self): + """Test reporting a new fault.""" + request = ReportFault.Request() + request.fault_code = 'TEST_FAULT_001' + request.severity = Fault.SEVERITY_ERROR + request.description = 'Test fault from integration test' + request.source_id = '/test_node' + + response = self._call_service(self.report_fault_client, request) + + self.assertTrue(response.success) + self.assertIn('TEST_FAULT_001', response.message) + print(f'Report fault response: {response.message}') + + def test_02_report_fault_empty_code_fails(self): + """Test that empty fault_code is rejected.""" + request = ReportFault.Request() + request.fault_code = '' + request.severity = Fault.SEVERITY_ERROR + request.description = 'Test fault' + request.source_id = '/test_node' + + response = self._call_service(self.report_fault_client, request) + + self.assertFalse(response.success) + self.assertIn('empty', response.message.lower()) + print(f'Empty code response: {response.message}') + + def test_03_report_fault_empty_source_fails(self): + """Test that empty source_id is rejected.""" + request = ReportFault.Request() + request.fault_code = 'TEST_FAULT_002' + request.severity = Fault.SEVERITY_ERROR + request.description = 'Test fault' + request.source_id = '' + + response = self._call_service(self.report_fault_client, request) + + self.assertFalse(response.success) + self.assertIn('empty', response.message.lower()) + print(f'Empty source response: {response.message}') + + def test_04_report_fault_invalid_severity_fails(self): + """Test that invalid severity is rejected.""" + request = ReportFault.Request() + request.fault_code = 'TEST_FAULT_003' + request.severity = 99 # Invalid severity + request.description = 'Test fault' + request.source_id = '/test_node' + + response = self._call_service(self.report_fault_client, request) + + self.assertFalse(response.success) + self.assertIn('severity', response.message.lower()) + print(f'Invalid severity response: {response.message}') + + def test_05_get_faults_pending(self): + """Test getting faults with PENDING status.""" + # First report a fault + report_request = ReportFault.Request() + report_request.fault_code = 'TEST_FAULT_GET_001' + report_request.severity = Fault.SEVERITY_WARN + report_request.description = 'Fault for get test' + report_request.source_id = '/test_node' + self._call_service(self.report_fault_client, report_request) + + # Query PENDING faults + get_request = GetFaults.Request() + get_request.filter_by_severity = False + get_request.severity = 0 + get_request.statuses = ['PENDING'] + + response = self._call_service(self.get_faults_client, get_request) + + self.assertGreater(len(response.faults), 0) + fault_codes = [f.fault_code for f in response.faults] + self.assertIn('TEST_FAULT_GET_001', fault_codes) + print(f'Get faults returned {len(response.faults)} faults') + + def test_06_get_faults_filter_by_severity(self): + """Test filtering faults by severity.""" + # Report faults with different severities + for i, severity in enumerate([Fault.SEVERITY_INFO, Fault.SEVERITY_ERROR]): + request = ReportFault.Request() + request.fault_code = f'TEST_FAULT_SEV_{i}' + request.severity = severity + request.description = f'Fault with severity {severity}' + request.source_id = '/test_node' + self._call_service(self.report_fault_client, request) + + # Query ERROR severity only + get_request = GetFaults.Request() + get_request.filter_by_severity = True + get_request.severity = Fault.SEVERITY_ERROR + get_request.statuses = ['PENDING'] + + response = self._call_service(self.get_faults_client, get_request) + + for fault in response.faults: + self.assertEqual(fault.severity, Fault.SEVERITY_ERROR) + print(f'Severity filter returned {len(response.faults)} ERROR faults') + + def test_07_clear_fault_success(self): + """Test clearing a fault.""" + # First report a fault + report_request = ReportFault.Request() + report_request.fault_code = 'TEST_FAULT_CLEAR' + report_request.severity = Fault.SEVERITY_ERROR + report_request.description = 'Fault to be cleared' + report_request.source_id = '/test_node' + self._call_service(self.report_fault_client, report_request) + + # Clear the fault + clear_request = ClearFault.Request() + clear_request.fault_code = 'TEST_FAULT_CLEAR' + + response = self._call_service(self.clear_fault_client, clear_request) + + self.assertTrue(response.success) + self.assertIn('cleared', response.message.lower()) + print(f'Clear fault response: {response.message}') + + # Verify fault is now CLEARED + get_request = GetFaults.Request() + get_request.filter_by_severity = False + get_request.severity = 0 + get_request.statuses = ['CLEARED'] + + get_response = self._call_service(self.get_faults_client, get_request) + + cleared_codes = [f.fault_code for f in get_response.faults] + self.assertIn('TEST_FAULT_CLEAR', cleared_codes) + + def test_08_clear_nonexistent_fault_fails(self): + """Test clearing a non-existent fault returns false.""" + request = ClearFault.Request() + request.fault_code = 'NONEXISTENT_FAULT_XYZ' + + response = self._call_service(self.clear_fault_client, request) + + self.assertFalse(response.success) + self.assertIn('not found', response.message.lower()) + print(f'Clear nonexistent response: {response.message}') + + def test_09_clear_fault_empty_code_fails(self): + """Test that clearing with empty fault_code fails.""" + request = ClearFault.Request() + request.fault_code = '' + + response = self._call_service(self.clear_fault_client, request) + + self.assertFalse(response.success) + self.assertIn('empty', response.message.lower()) + print(f'Clear empty code response: {response.message}') + + def test_10_report_updates_existing_fault(self): + """Test that reporting same fault_code updates existing fault.""" + fault_code = 'TEST_FAULT_UPDATE' + + # Report first time + request1 = ReportFault.Request() + request1.fault_code = fault_code + request1.severity = Fault.SEVERITY_WARN + request1.description = 'First report' + request1.source_id = '/node1' + + response1 = self._call_service(self.report_fault_client, request1) + self.assertTrue(response1.success) + self.assertIn('New fault', response1.message) + + # Report second time from different source + request2 = ReportFault.Request() + request2.fault_code = fault_code + request2.severity = Fault.SEVERITY_ERROR # Higher severity + request2.description = 'Second report' + request2.source_id = '/node2' + + response2 = self._call_service(self.report_fault_client, request2) + self.assertTrue(response2.success) + self.assertIn('updated', response2.message.lower()) + + # Verify fault was updated + get_request = GetFaults.Request() + get_request.filter_by_severity = False + get_request.severity = 0 + get_request.statuses = ['PENDING'] + + get_response = self._call_service(self.get_faults_client, get_request) + + updated_fault = None + for f in get_response.faults: + if f.fault_code == fault_code: + updated_fault = f + break + + self.assertIsNotNone(updated_fault) + self.assertEqual(updated_fault.severity, Fault.SEVERITY_ERROR) + self.assertEqual(updated_fault.occurrence_count, 2) + self.assertEqual(len(updated_fault.reporting_sources), 2) + print(f'Updated fault: occurrence_count={updated_fault.occurrence_count}, ' + f'sources={updated_fault.reporting_sources}') + + +@launch_testing.post_shutdown_test() +class TestFaultManagerShutdown(unittest.TestCase): + """Post-shutdown tests.""" + + def test_exit_code(self, proc_info): + """Verify fault_manager exits cleanly.""" + launch_testing.asserts.assertExitCodes(proc_info)