From 570392e2457a3f82a4cb37c16f936ea2b048f5b5 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 23 Oct 2025 17:21:47 +0000 Subject: [PATCH 1/4] Initial plan From 82eb2d4affe6b59497f952b5f42d12333505ceb6 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 23 Oct 2025 17:27:27 +0000 Subject: [PATCH 2/4] Add --no-build flag to dotnet test commands and standardize release validation steps Co-authored-by: devstress <30769729+devstress@users.noreply.github.com> --- .../learningcourse-integration-tests.yml | 8 ++- .github/workflows/release-major.yml | 58 ++++++++++++++++++- .github/workflows/release-minor.yml | 58 ++++++++++++++++++- .../workflows/release-package-validation.yml | 7 +++ .github/workflows/release-patch.yml | 58 ++++++++++++++++++- 5 files changed, 182 insertions(+), 7 deletions(-) diff --git a/.github/workflows/learningcourse-integration-tests.yml b/.github/workflows/learningcourse-integration-tests.yml index 694d2496..11e14419 100644 --- a/.github/workflows/learningcourse-integration-tests.yml +++ b/.github/workflows/learningcourse-integration-tests.yml @@ -76,12 +76,18 @@ jobs: echo "๐Ÿ”จ Building LocalTesting solution (includes all LearningCourse projects)..." dotnet restore LocalTesting/LocalTesting.sln dotnet build LocalTesting/LocalTesting.sln --configuration Release --no-restore + + - name: Build LearningCourse IntegrationTests solution + run: | + echo "๐Ÿ”จ Building LearningCourse IntegrationTests solution..." + dotnet restore LearningCourse/IntegrationTests.sln + dotnet build LearningCourse/IntegrationTests.sln --configuration Release --no-restore - name: Run LearningCourse Integration Tests timeout-minutes: 25 # Increased timeout for CI environment with LEARNINGCOURSE mode run: | echo "=== Starting LearningCourse Integration Tests ===" - dotnet test LearningCourse/IntegrationTests.sln --configuration Release --verbosity normal --logger "trx;LogFileName=LearningCourseTestResults.trx" --results-directory TestResults + dotnet test LearningCourse/IntegrationTests.sln --configuration Release --no-build --verbosity normal --logger "trx;LogFileName=LearningCourseTestResults.trx" --results-directory TestResults env: DOTNET_ENVIRONMENT: Testing ASPIRE_ALLOW_UNSECURED_TRANSPORT: "true" diff --git a/.github/workflows/release-major.yml b/.github/workflows/release-major.yml index 40bef347..1cd83ed2 100644 --- a/.github/workflows/release-major.yml +++ b/.github/workflows/release-major.yml @@ -265,11 +265,38 @@ jobs: docker tag ${{ env.DOCKER_IMAGE_NAME }}:$VERSION ${{ env.DOCKER_IMAGE_NAME }}:latest echo "Loaded Docker image for version $VERSION and tagged as latest" + - name: Configure system for optimal performance + run: | + echo "=== Configuring system for optimal performance ===" + sudo sysctl -w vm.max_map_count=262144 + sudo bash -c 'echo "* soft nofile 65536" >> /etc/security/limits.conf' + sudo bash -c 'echo "* hard nofile 65536" >> /etc/security/limits.conf' + echo "Current vm.max_map_count: $(sysctl vm.max_map_count)" + echo "Available memory: $(free -h)" + echo "CPU info: $(nproc) cores" + + - name: Verify Docker environment + run: | + echo "=== Verifying Docker environment ===" + docker --version + docker info + docker ps -a + + - name: Build ReleasePackagesTesting solution + run: | + echo "๐Ÿ”จ Building ReleasePackagesTesting solution..." + dotnet restore ReleasePackagesTesting/ReleasePackagesTesting.sln + dotnet build ReleasePackagesTesting/ReleasePackagesTesting.sln --configuration Release --no-restore + - name: Run Pre-Release Validation Tests timeout-minutes: 12 + env: + DOTNET_ENVIRONMENT: Testing + ASPIRE_ALLOW_UNSECURED_TRANSPORT: "true" + DOCKER_HOST: "unix:///var/run/docker.sock" run: | echo "=== Running Pre-Release Validation Tests ===" - dotnet test ReleasePackagesTesting/ReleasePackagesTesting.sln --configuration Release --verbosity normal --logger "trx;LogFileName=TestResults.trx" --results-directory TestResults + dotnet test ReleasePackagesTesting/ReleasePackagesTesting.sln --configuration Release --no-build --verbosity normal --logger "trx;LogFileName=TestResults.trx" --results-directory TestResults - name: Upload test results uses: actions/upload-artifact@v4 @@ -471,11 +498,38 @@ jobs: docker tag ${{ env.DOCKER_IMAGE_NAME }}:$VERSION ${{ env.DOCKER_IMAGE_NAME }}:latest echo "โœ… Docker image pulled from Docker Hub" + - name: Configure system for optimal performance + run: | + echo "=== Configuring system for optimal performance ===" + sudo sysctl -w vm.max_map_count=262144 + sudo bash -c 'echo "* soft nofile 65536" >> /etc/security/limits.conf' + sudo bash -c 'echo "* hard nofile 65536" >> /etc/security/limits.conf' + echo "Current vm.max_map_count: $(sysctl vm.max_map_count)" + echo "Available memory: $(free -h)" + echo "CPU info: $(nproc) cores" + + - name: Verify Docker environment + run: | + echo "=== Verifying Docker environment ===" + docker --version + docker info + docker ps -a + + - name: Build ReleasePackagesTesting.Published solution + run: | + echo "๐Ÿ”จ Building ReleasePackagesTesting.Published solution..." + dotnet restore ReleasePackagesTesting.Published/ReleasePackagesTesting.Published.sln + dotnet build ReleasePackagesTesting.Published/ReleasePackagesTesting.Published.sln --configuration Release --no-restore + - name: Run Post-Release Validation Tests timeout-minutes: 12 + env: + DOTNET_ENVIRONMENT: Testing + ASPIRE_ALLOW_UNSECURED_TRANSPORT: "true" + DOCKER_HOST: "unix:///var/run/docker.sock" run: | echo "=== Running Post-Release Validation Tests ===" - dotnet test ReleasePackagesTesting.Published/ReleasePackagesTesting.Published.sln --configuration Release --verbosity normal --logger "trx;LogFileName=TestResults.trx" --results-directory TestResults + dotnet test ReleasePackagesTesting.Published/ReleasePackagesTesting.Published.sln --configuration Release --no-build --verbosity normal --logger "trx;LogFileName=TestResults.trx" --results-directory TestResults - name: Upload test results uses: actions/upload-artifact@v4 diff --git a/.github/workflows/release-minor.yml b/.github/workflows/release-minor.yml index fb882b31..513b2c70 100644 --- a/.github/workflows/release-minor.yml +++ b/.github/workflows/release-minor.yml @@ -265,11 +265,38 @@ jobs: docker tag ${{ env.DOCKER_IMAGE_NAME }}:$VERSION ${{ env.DOCKER_IMAGE_NAME }}:latest echo "Loaded Docker image for version $VERSION and tagged as latest" + - name: Configure system for optimal performance + run: | + echo "=== Configuring system for optimal performance ===" + sudo sysctl -w vm.max_map_count=262144 + sudo bash -c 'echo "* soft nofile 65536" >> /etc/security/limits.conf' + sudo bash -c 'echo "* hard nofile 65536" >> /etc/security/limits.conf' + echo "Current vm.max_map_count: $(sysctl vm.max_map_count)" + echo "Available memory: $(free -h)" + echo "CPU info: $(nproc) cores" + + - name: Verify Docker environment + run: | + echo "=== Verifying Docker environment ===" + docker --version + docker info + docker ps -a + + - name: Build ReleasePackagesTesting solution + run: | + echo "๐Ÿ”จ Building ReleasePackagesTesting solution..." + dotnet restore ReleasePackagesTesting/ReleasePackagesTesting.sln + dotnet build ReleasePackagesTesting/ReleasePackagesTesting.sln --configuration Release --no-restore + - name: Run Pre-Release Validation Tests timeout-minutes: 12 + env: + DOTNET_ENVIRONMENT: Testing + ASPIRE_ALLOW_UNSECURED_TRANSPORT: "true" + DOCKER_HOST: "unix:///var/run/docker.sock" run: | echo "=== Running Pre-Release Validation Tests ===" - dotnet test ReleasePackagesTesting/ReleasePackagesTesting.sln --configuration Release --verbosity normal --logger "trx;LogFileName=TestResults.trx" --results-directory TestResults + dotnet test ReleasePackagesTesting/ReleasePackagesTesting.sln --configuration Release --no-build --verbosity normal --logger "trx;LogFileName=TestResults.trx" --results-directory TestResults - name: Upload test results uses: actions/upload-artifact@v4 @@ -471,11 +498,38 @@ jobs: docker tag ${{ env.DOCKER_IMAGE_NAME }}:$VERSION ${{ env.DOCKER_IMAGE_NAME }}:latest echo "โœ… Docker image pulled from Docker Hub" + - name: Configure system for optimal performance + run: | + echo "=== Configuring system for optimal performance ===" + sudo sysctl -w vm.max_map_count=262144 + sudo bash -c 'echo "* soft nofile 65536" >> /etc/security/limits.conf' + sudo bash -c 'echo "* hard nofile 65536" >> /etc/security/limits.conf' + echo "Current vm.max_map_count: $(sysctl vm.max_map_count)" + echo "Available memory: $(free -h)" + echo "CPU info: $(nproc) cores" + + - name: Verify Docker environment + run: | + echo "=== Verifying Docker environment ===" + docker --version + docker info + docker ps -a + + - name: Build ReleasePackagesTesting.Published solution + run: | + echo "๐Ÿ”จ Building ReleasePackagesTesting.Published solution..." + dotnet restore ReleasePackagesTesting.Published/ReleasePackagesTesting.Published.sln + dotnet build ReleasePackagesTesting.Published/ReleasePackagesTesting.Published.sln --configuration Release --no-restore + - name: Run Post-Release Validation Tests timeout-minutes: 12 + env: + DOTNET_ENVIRONMENT: Testing + ASPIRE_ALLOW_UNSECURED_TRANSPORT: "true" + DOCKER_HOST: "unix:///var/run/docker.sock" run: | echo "=== Running Post-Release Validation Tests ===" - dotnet test ReleasePackagesTesting.Published/ReleasePackagesTesting.Published.sln --configuration Release --verbosity normal --logger "trx;LogFileName=TestResults.trx" --results-directory TestResults + dotnet test ReleasePackagesTesting.Published/ReleasePackagesTesting.Published.sln --configuration Release --no-build --verbosity normal --logger "trx;LogFileName=TestResults.trx" --results-directory TestResults - name: Upload test results uses: actions/upload-artifact@v4 diff --git a/.github/workflows/release-package-validation.yml b/.github/workflows/release-package-validation.yml index 11a2aff4..3fc54530 100644 --- a/.github/workflows/release-package-validation.yml +++ b/.github/workflows/release-package-validation.yml @@ -110,6 +110,12 @@ jobs: dotnet nuget add source $(pwd)/packages --name LocalTestFeed echo "Added local NuGet source" + - name: Build ReleasePackagesTesting solution + run: | + echo "๐Ÿ”จ Building ReleasePackagesTesting solution..." + dotnet restore ReleasePackagesTesting/ReleasePackagesTesting.sln + dotnet build ReleasePackagesTesting/ReleasePackagesTesting.sln --configuration Release --no-restore + - name: Run ReleasePackagesTesting validation timeout-minutes: 20 env: @@ -120,6 +126,7 @@ jobs: echo "๐Ÿงช Running Release Package Validation Tests..." dotnet test ReleasePackagesTesting/ReleasePackagesTesting.sln \ --configuration Release \ + --no-build \ --verbosity normal \ --logger "trx;LogFileName=TestResults.trx" \ --results-directory TestResults diff --git a/.github/workflows/release-patch.yml b/.github/workflows/release-patch.yml index 03c34667..368fd3ab 100644 --- a/.github/workflows/release-patch.yml +++ b/.github/workflows/release-patch.yml @@ -265,11 +265,38 @@ jobs: docker tag ${{ env.DOCKER_IMAGE_NAME }}:$VERSION ${{ env.DOCKER_IMAGE_NAME }}:latest echo "Loaded Docker image for version $VERSION and tagged as latest" + - name: Configure system for optimal performance + run: | + echo "=== Configuring system for optimal performance ===" + sudo sysctl -w vm.max_map_count=262144 + sudo bash -c 'echo "* soft nofile 65536" >> /etc/security/limits.conf' + sudo bash -c 'echo "* hard nofile 65536" >> /etc/security/limits.conf' + echo "Current vm.max_map_count: $(sysctl vm.max_map_count)" + echo "Available memory: $(free -h)" + echo "CPU info: $(nproc) cores" + + - name: Verify Docker environment + run: | + echo "=== Verifying Docker environment ===" + docker --version + docker info + docker ps -a + + - name: Build ReleasePackagesTesting solution + run: | + echo "๐Ÿ”จ Building ReleasePackagesTesting solution..." + dotnet restore ReleasePackagesTesting/ReleasePackagesTesting.sln + dotnet build ReleasePackagesTesting/ReleasePackagesTesting.sln --configuration Release --no-restore + - name: Run Pre-Release Validation Tests timeout-minutes: 12 + env: + DOTNET_ENVIRONMENT: Testing + ASPIRE_ALLOW_UNSECURED_TRANSPORT: "true" + DOCKER_HOST: "unix:///var/run/docker.sock" run: | echo "=== Running Pre-Release Validation Tests ===" - dotnet test ReleasePackagesTesting/ReleasePackagesTesting.sln --configuration Release --verbosity normal --logger "trx;LogFileName=TestResults.trx" --results-directory TestResults + dotnet test ReleasePackagesTesting/ReleasePackagesTesting.sln --configuration Release --no-build --verbosity normal --logger "trx;LogFileName=TestResults.trx" --results-directory TestResults - name: Upload test results uses: actions/upload-artifact@v4 @@ -471,11 +498,38 @@ jobs: docker tag ${{ env.DOCKER_IMAGE_NAME }}:$VERSION ${{ env.DOCKER_IMAGE_NAME }}:latest echo "โœ… Docker image pulled from Docker Hub" + - name: Configure system for optimal performance + run: | + echo "=== Configuring system for optimal performance ===" + sudo sysctl -w vm.max_map_count=262144 + sudo bash -c 'echo "* soft nofile 65536" >> /etc/security/limits.conf' + sudo bash -c 'echo "* hard nofile 65536" >> /etc/security/limits.conf' + echo "Current vm.max_map_count: $(sysctl vm.max_map_count)" + echo "Available memory: $(free -h)" + echo "CPU info: $(nproc) cores" + + - name: Verify Docker environment + run: | + echo "=== Verifying Docker environment ===" + docker --version + docker info + docker ps -a + + - name: Build ReleasePackagesTesting.Published solution + run: | + echo "๐Ÿ”จ Building ReleasePackagesTesting.Published solution..." + dotnet restore ReleasePackagesTesting.Published/ReleasePackagesTesting.Published.sln + dotnet build ReleasePackagesTesting.Published/ReleasePackagesTesting.Published.sln --configuration Release --no-restore + - name: Run Post-Release Validation Tests timeout-minutes: 12 + env: + DOTNET_ENVIRONMENT: Testing + ASPIRE_ALLOW_UNSECURED_TRANSPORT: "true" + DOCKER_HOST: "unix:///var/run/docker.sock" run: | echo "=== Running Post-Release Validation Tests ===" - dotnet test ReleasePackagesTesting.Published/ReleasePackagesTesting.Published.sln --configuration Release --verbosity normal --logger "trx;LogFileName=TestResults.trx" --results-directory TestResults + dotnet test ReleasePackagesTesting.Published/ReleasePackagesTesting.Published.sln --configuration Release --no-build --verbosity normal --logger "trx;LogFileName=TestResults.trx" --results-directory TestResults - name: Upload test results uses: actions/upload-artifact@v4 From cceef4a226fe8e8c309200637c71a73404efbc3a Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 23 Oct 2025 18:00:21 +0000 Subject: [PATCH 3/4] Use same ReleasePackagesTesting project for both pre and post release validation with mode flags Co-authored-by: devstress <30769729+devstress@users.noreply.github.com> --- .github/workflows/release-major.yml | 12 +++++++----- .github/workflows/release-minor.yml | 12 +++++++----- .github/workflows/release-package-validation.yml | 1 + .github/workflows/release-patch.yml | 12 +++++++----- 4 files changed, 22 insertions(+), 15 deletions(-) diff --git a/.github/workflows/release-major.yml b/.github/workflows/release-major.yml index 1cd83ed2..72b78386 100644 --- a/.github/workflows/release-major.yml +++ b/.github/workflows/release-major.yml @@ -294,6 +294,7 @@ jobs: DOTNET_ENVIRONMENT: Testing ASPIRE_ALLOW_UNSECURED_TRANSPORT: "true" DOCKER_HOST: "unix:///var/run/docker.sock" + RELEASE_VALIDATION_MODE: "PreRelease" run: | echo "=== Running Pre-Release Validation Tests ===" dotnet test ReleasePackagesTesting/ReleasePackagesTesting.sln --configuration Release --no-build --verbosity normal --logger "trx;LogFileName=TestResults.trx" --results-directory TestResults @@ -515,11 +516,11 @@ jobs: docker info docker ps -a - - name: Build ReleasePackagesTesting.Published solution + - name: Build ReleasePackagesTesting solution run: | - echo "๐Ÿ”จ Building ReleasePackagesTesting.Published solution..." - dotnet restore ReleasePackagesTesting.Published/ReleasePackagesTesting.Published.sln - dotnet build ReleasePackagesTesting.Published/ReleasePackagesTesting.Published.sln --configuration Release --no-restore + echo "๐Ÿ”จ Building ReleasePackagesTesting solution..." + dotnet restore ReleasePackagesTesting/ReleasePackagesTesting.sln + dotnet build ReleasePackagesTesting/ReleasePackagesTesting.sln --configuration Release --no-restore - name: Run Post-Release Validation Tests timeout-minutes: 12 @@ -527,9 +528,10 @@ jobs: DOTNET_ENVIRONMENT: Testing ASPIRE_ALLOW_UNSECURED_TRANSPORT: "true" DOCKER_HOST: "unix:///var/run/docker.sock" + RELEASE_VALIDATION_MODE: "PostRelease" run: | echo "=== Running Post-Release Validation Tests ===" - dotnet test ReleasePackagesTesting.Published/ReleasePackagesTesting.Published.sln --configuration Release --no-build --verbosity normal --logger "trx;LogFileName=TestResults.trx" --results-directory TestResults + dotnet test ReleasePackagesTesting/ReleasePackagesTesting.sln --configuration Release --no-build --verbosity normal --logger "trx;LogFileName=TestResults.trx" --results-directory TestResults - name: Upload test results uses: actions/upload-artifact@v4 diff --git a/.github/workflows/release-minor.yml b/.github/workflows/release-minor.yml index 513b2c70..a7d096ba 100644 --- a/.github/workflows/release-minor.yml +++ b/.github/workflows/release-minor.yml @@ -294,6 +294,7 @@ jobs: DOTNET_ENVIRONMENT: Testing ASPIRE_ALLOW_UNSECURED_TRANSPORT: "true" DOCKER_HOST: "unix:///var/run/docker.sock" + RELEASE_VALIDATION_MODE: "PreRelease" run: | echo "=== Running Pre-Release Validation Tests ===" dotnet test ReleasePackagesTesting/ReleasePackagesTesting.sln --configuration Release --no-build --verbosity normal --logger "trx;LogFileName=TestResults.trx" --results-directory TestResults @@ -515,11 +516,11 @@ jobs: docker info docker ps -a - - name: Build ReleasePackagesTesting.Published solution + - name: Build ReleasePackagesTesting solution run: | - echo "๐Ÿ”จ Building ReleasePackagesTesting.Published solution..." - dotnet restore ReleasePackagesTesting.Published/ReleasePackagesTesting.Published.sln - dotnet build ReleasePackagesTesting.Published/ReleasePackagesTesting.Published.sln --configuration Release --no-restore + echo "๐Ÿ”จ Building ReleasePackagesTesting solution..." + dotnet restore ReleasePackagesTesting/ReleasePackagesTesting.sln + dotnet build ReleasePackagesTesting/ReleasePackagesTesting.sln --configuration Release --no-restore - name: Run Post-Release Validation Tests timeout-minutes: 12 @@ -527,9 +528,10 @@ jobs: DOTNET_ENVIRONMENT: Testing ASPIRE_ALLOW_UNSECURED_TRANSPORT: "true" DOCKER_HOST: "unix:///var/run/docker.sock" + RELEASE_VALIDATION_MODE: "PostRelease" run: | echo "=== Running Post-Release Validation Tests ===" - dotnet test ReleasePackagesTesting.Published/ReleasePackagesTesting.Published.sln --configuration Release --no-build --verbosity normal --logger "trx;LogFileName=TestResults.trx" --results-directory TestResults + dotnet test ReleasePackagesTesting/ReleasePackagesTesting.sln --configuration Release --no-build --verbosity normal --logger "trx;LogFileName=TestResults.trx" --results-directory TestResults - name: Upload test results uses: actions/upload-artifact@v4 diff --git a/.github/workflows/release-package-validation.yml b/.github/workflows/release-package-validation.yml index 3fc54530..aec46f47 100644 --- a/.github/workflows/release-package-validation.yml +++ b/.github/workflows/release-package-validation.yml @@ -122,6 +122,7 @@ jobs: DOTNET_ENVIRONMENT: Testing ASPIRE_ALLOW_UNSECURED_TRANSPORT: "true" DOCKER_HOST: "unix:///var/run/docker.sock" + RELEASE_VALIDATION_MODE: "PreRelease" run: | echo "๐Ÿงช Running Release Package Validation Tests..." dotnet test ReleasePackagesTesting/ReleasePackagesTesting.sln \ diff --git a/.github/workflows/release-patch.yml b/.github/workflows/release-patch.yml index 368fd3ab..bc831ac7 100644 --- a/.github/workflows/release-patch.yml +++ b/.github/workflows/release-patch.yml @@ -294,6 +294,7 @@ jobs: DOTNET_ENVIRONMENT: Testing ASPIRE_ALLOW_UNSECURED_TRANSPORT: "true" DOCKER_HOST: "unix:///var/run/docker.sock" + RELEASE_VALIDATION_MODE: "PreRelease" run: | echo "=== Running Pre-Release Validation Tests ===" dotnet test ReleasePackagesTesting/ReleasePackagesTesting.sln --configuration Release --no-build --verbosity normal --logger "trx;LogFileName=TestResults.trx" --results-directory TestResults @@ -515,11 +516,11 @@ jobs: docker info docker ps -a - - name: Build ReleasePackagesTesting.Published solution + - name: Build ReleasePackagesTesting solution run: | - echo "๐Ÿ”จ Building ReleasePackagesTesting.Published solution..." - dotnet restore ReleasePackagesTesting.Published/ReleasePackagesTesting.Published.sln - dotnet build ReleasePackagesTesting.Published/ReleasePackagesTesting.Published.sln --configuration Release --no-restore + echo "๐Ÿ”จ Building ReleasePackagesTesting solution..." + dotnet restore ReleasePackagesTesting/ReleasePackagesTesting.sln + dotnet build ReleasePackagesTesting/ReleasePackagesTesting.sln --configuration Release --no-restore - name: Run Post-Release Validation Tests timeout-minutes: 12 @@ -527,9 +528,10 @@ jobs: DOTNET_ENVIRONMENT: Testing ASPIRE_ALLOW_UNSECURED_TRANSPORT: "true" DOCKER_HOST: "unix:///var/run/docker.sock" + RELEASE_VALIDATION_MODE: "PostRelease" run: | echo "=== Running Post-Release Validation Tests ===" - dotnet test ReleasePackagesTesting.Published/ReleasePackagesTesting.Published.sln --configuration Release --no-build --verbosity normal --logger "trx;LogFileName=TestResults.trx" --results-directory TestResults + dotnet test ReleasePackagesTesting/ReleasePackagesTesting.sln --configuration Release --no-build --verbosity normal --logger "trx;LogFileName=TestResults.trx" --results-directory TestResults - name: Upload test results uses: actions/upload-artifact@v4 From 0c2359bc6fff1a227b678a9396efff47cec4c5ea Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 23 Oct 2025 18:05:58 +0000 Subject: [PATCH 4/4] Remove unused ReleasePackagesTesting.Published folder and update documentation Co-authored-by: devstress <30769729+devstress@users.noreply.github.com> --- RELEASE_TESTING.md | 22 +- .../NativeFlinkJob/README.md | 98 -- .../NativeFlinkJob/pom.xml | 126 -- .../java/com/flinkdotnet/NativeKafkaJob.java | 202 --- ReleasePackagesTesting.Published/README.md | 117 -- .../GlobalSuppressions.cs | 7 - .../MemoryCalculator.cs | 140 -- .../Ports.cs | 46 - .../Program.cs | 715 -------- ...esTesting.Published.FlinkSqlAppHost.csproj | 30 - .../AspireValidationTest.cs | 258 --- .../AssemblyInfo.cs | 7 - .../EnvironmentVariableScope.cs | 26 - .../FlinkDotNetJobs.cs | 279 --- .../GatewayAllPatternsTests.cs | 506 ------ .../GlobalTestInfrastructure.cs | 914 ---------- .../LocalTestingTestBase.cs | 1499 ----------------- .../NativeFlinkAllPatternsTests.cs | 327 ---- .../NetworkDiagnostics.cs | 308 ---- ...sTesting.Published.IntegrationTests.csproj | 73 - .../TemporalIntegrationTests.cs | 389 ----- .../TestPrerequisites.cs | 183 -- .../ReleasePackagesTesting.Published.sln | 27 - .../appsettings.LearningCourse.json | 9 - .../connectors/flink/lib/README.md | 40 - .../flink-conf-learningcourse.yaml | 66 - .../grafana-kafka-dashboard.json | 167 -- .../grafana-provisioning-dashboards.yaml | 12 - .../jmx-exporter-kafka-config.yml | 107 -- .../prometheus.yml | 76 - ReleasePackagesTesting/README.md | 17 +- docs/RELEASE_PACKAGE_VALIDATION.md | 13 +- 32 files changed, 29 insertions(+), 6777 deletions(-) delete mode 100644 ReleasePackagesTesting.Published/NativeFlinkJob/README.md delete mode 100644 ReleasePackagesTesting.Published/NativeFlinkJob/pom.xml delete mode 100644 ReleasePackagesTesting.Published/NativeFlinkJob/src/main/java/com/flinkdotnet/NativeKafkaJob.java delete mode 100644 ReleasePackagesTesting.Published/README.md delete mode 100644 ReleasePackagesTesting.Published/ReleasePackagesTesting.Published.FlinkSqlAppHost/GlobalSuppressions.cs delete mode 100644 ReleasePackagesTesting.Published/ReleasePackagesTesting.Published.FlinkSqlAppHost/MemoryCalculator.cs delete mode 100644 ReleasePackagesTesting.Published/ReleasePackagesTesting.Published.FlinkSqlAppHost/Ports.cs delete mode 100644 ReleasePackagesTesting.Published/ReleasePackagesTesting.Published.FlinkSqlAppHost/Program.cs delete mode 100644 ReleasePackagesTesting.Published/ReleasePackagesTesting.Published.FlinkSqlAppHost/ReleasePackagesTesting.Published.FlinkSqlAppHost.csproj delete mode 100644 ReleasePackagesTesting.Published/ReleasePackagesTesting.Published.IntegrationTests/AspireValidationTest.cs delete mode 100644 ReleasePackagesTesting.Published/ReleasePackagesTesting.Published.IntegrationTests/AssemblyInfo.cs delete mode 100644 ReleasePackagesTesting.Published/ReleasePackagesTesting.Published.IntegrationTests/EnvironmentVariableScope.cs delete mode 100644 ReleasePackagesTesting.Published/ReleasePackagesTesting.Published.IntegrationTests/FlinkDotNetJobs.cs delete mode 100644 ReleasePackagesTesting.Published/ReleasePackagesTesting.Published.IntegrationTests/GatewayAllPatternsTests.cs delete mode 100644 ReleasePackagesTesting.Published/ReleasePackagesTesting.Published.IntegrationTests/GlobalTestInfrastructure.cs delete mode 100644 ReleasePackagesTesting.Published/ReleasePackagesTesting.Published.IntegrationTests/LocalTestingTestBase.cs delete mode 100644 ReleasePackagesTesting.Published/ReleasePackagesTesting.Published.IntegrationTests/NativeFlinkAllPatternsTests.cs delete mode 100644 ReleasePackagesTesting.Published/ReleasePackagesTesting.Published.IntegrationTests/NetworkDiagnostics.cs delete mode 100644 ReleasePackagesTesting.Published/ReleasePackagesTesting.Published.IntegrationTests/ReleasePackagesTesting.Published.IntegrationTests.csproj delete mode 100644 ReleasePackagesTesting.Published/ReleasePackagesTesting.Published.IntegrationTests/TemporalIntegrationTests.cs delete mode 100644 ReleasePackagesTesting.Published/ReleasePackagesTesting.Published.IntegrationTests/TestPrerequisites.cs delete mode 100644 ReleasePackagesTesting.Published/ReleasePackagesTesting.Published.sln delete mode 100644 ReleasePackagesTesting.Published/appsettings.LearningCourse.json delete mode 100644 ReleasePackagesTesting.Published/connectors/flink/lib/README.md delete mode 100644 ReleasePackagesTesting.Published/flink-conf-learningcourse.yaml delete mode 100644 ReleasePackagesTesting.Published/grafana-kafka-dashboard.json delete mode 100644 ReleasePackagesTesting.Published/grafana-provisioning-dashboards.yaml delete mode 100644 ReleasePackagesTesting.Published/jmx-exporter-kafka-config.yml delete mode 100644 ReleasePackagesTesting.Published/prometheus.yml diff --git a/RELEASE_TESTING.md b/RELEASE_TESTING.md index f414930b..3eca88f8 100644 --- a/RELEASE_TESTING.md +++ b/RELEASE_TESTING.md @@ -21,8 +21,7 @@ This directory contains scripts for testing the release workflow locally before - โœ… FlinkDotNet solution builds successfully - โœ… NuGet packages are created correctly - โœ… Docker image builds successfully -- โœ… Pre-release validation projects can restore and build with local packages -- โœ… Post-release validation projects can restore and build with local packages +- โœ… Release validation project can restore and build with local packages **Execution time**: ~3 minutes @@ -64,17 +63,18 @@ This directory contains scripts for testing the release workflow locally before ## What the Release Workflows Test -### Pre-Release Validation (ReleasePackagesTesting/) -Tests packages BEFORE publishing to ensure quality: +### Release Package Validation (ReleasePackagesTesting/) +Tests packages with configurable modes: + +**Pre-Release Mode** (default - `RELEASE_VALIDATION_MODE=PreRelease`): - Uses local NuGet packages from `./packages/` - Uses local Docker image from `./docker/` - Validates packages work with Flink and Kafka - Prevents publishing broken releases -### Post-Release Validation (ReleasePackagesTesting.Published/) -Tests published packages AFTER release: -- Downloads packages from NuGet.org (or uses local as substitute) -- Pulls Docker images from Docker Hub (or uses local as substitute) +**Post-Release Mode** (`RELEASE_VALIDATION_MODE=PostRelease`): +- Downloads packages from NuGet.org +- Pulls Docker images from Docker Hub - Validates published artifacts are compatible - Confirms release actually works @@ -95,8 +95,7 @@ dotnet add package Confluent.Kafka --version 2.11.1 **Solution**: Verify AppHost class name matches project name with underscores ```csharp // Correct pattern: -Projects.ReleasePackagesTesting_FlinkSqlAppHost // for ReleasePackagesTesting.FlinkSqlAppHost -Projects.ReleasePackagesTesting_Published_FlinkSqlAppHost // for ReleasePackagesTesting.Published.FlinkSqlAppHost +Projects.ReleasePackagesTesting_FlinkSqlAppHost // for ReleasePackagesTesting.FlinkSqlAppHost ``` ### Issue: Docker Out of Memory @@ -171,8 +170,7 @@ docker system prune -a # Warning: removes all unused Docker images ## Related Documentation -- [ReleasePackagesTesting README](./ReleasePackagesTesting/README.md) - Pre-release validation details -- [ReleasePackagesTesting.Published README](./ReleasePackagesTesting.Published/README.md) - Post-release validation details +- [ReleasePackagesTesting README](./ReleasePackagesTesting/README.md) - Release validation details - [Release Workflows](./.github/workflows/) - Actual CI/CD workflows ## Support diff --git a/ReleasePackagesTesting.Published/NativeFlinkJob/README.md b/ReleasePackagesTesting.Published/NativeFlinkJob/README.md deleted file mode 100644 index ff2f8430..00000000 --- a/ReleasePackagesTesting.Published/NativeFlinkJob/README.md +++ /dev/null @@ -1,98 +0,0 @@ -# Native Flink Kafka Job - Infrastructure Validation - -This is a standalone Apache Flink job using the official Flink Kafka connector to validate that the Aspire LocalTesting infrastructure is correctly configured. - -## Purpose - -Before debugging Gateway/IR issues, we need to prove the infrastructure works with a standard Flink job: -- โœ… Aspire DCP correctly configures Flink cluster -- โœ… Kafka is accessible from Flink containers at `kafka:9093` -- โœ… Messages flow through: Kafka Input โ†’ Flink Transform โ†’ Kafka Output - -## Build - -```bash -cd LocalTesting/NativeFlinkJob -mvn clean package -``` - -This creates: `target/native-flink-kafka-job-1.0.0.jar` - -## Run via Flink REST API - -```bash -# Upload JAR -curl -X POST -H "Expect:" -F "jarfile=@target/native-flink-kafka-job-1.0.0.jar" \ - http://localhost:8081/jars/upload - -# Submit job (replace {jarId} with the ID from upload response) -curl -X POST http://localhost:8081/jars/{jarId}/run \ - -H "Content-Type: application/json" \ - -d '{ - "entryClass": "com.flinkdotnet.NativeKafkaJob", - "programArgsList": [ - "--bootstrap-servers", "kafka:9093", - "--input-topic", "lt.native.input", - "--output-topic", "lt.native.output", - "--group-id", "native-test-consumer" - ], - "parallelism": 1 - }' -``` - -## Test with C# - -The `FlinkNativeKafkaInfrastructureTest.cs` integration test: -1. Starts Aspire infrastructure (Kafka + Flink) -2. Builds and submits this native JAR -3. Produces test messages -4. Verifies messages are transformed and consumed - -If this test **PASSES**: Infrastructure is correct, debug Gateway -If this test **FAILS**: Fix infrastructure first - -## Configuration - -Default values (for LocalTesting environment): -- **Bootstrap Servers**: `kafka:9093` (Aspire DCP internal listener) -- **Input Topic**: `lt.native.input` -- **Output Topic**: `lt.native.output` -- **Group ID**: `native-flink-consumer` - -Override with command-line args: -```bash ---bootstrap-servers kafka:9093 ---input-topic my-input ---output-topic my-output ---group-id my-consumer-group -``` - -## Key Differences from FlinkJobRunner - -1. **Uses official Flink Kafka Connector** (`flink-connector-kafka`) not raw Kafka clients -2. **Proper dependency management** - connector packaged in fat JAR -3. **Standard Flink APIs** - `KafkaSource` and `KafkaSink` builders -4. **No IR/JSON** - direct Java code, no intermediate representation - -## Troubleshooting - -**Build fails with missing dependencies**: -- Ensure Maven can reach Maven Central -- Check Flink version compatibility (2.1.0) - -**Job fails to start**: -- Check Flink JobManager logs: `docker logs flink-jobmanager` -- Verify bootstrap servers are accessible from Flink container - -**No messages consumed**: -- Check Kafka topics exist -- Verify bootstrap servers (`kafka:9093` for containers, `localhost:{port}` for host) -- Check Flink job is in RUNNING state -- Look for exceptions in TaskManager logs - -## Next Steps After Validation - -Once this job works: -1. Compare its Kafka configuration with Gateway's IR-generated config -2. Identify what Gateway does differently -3. Fix Gateway to match working configuration \ No newline at end of file diff --git a/ReleasePackagesTesting.Published/NativeFlinkJob/pom.xml b/ReleasePackagesTesting.Published/NativeFlinkJob/pom.xml deleted file mode 100644 index c36115a3..00000000 --- a/ReleasePackagesTesting.Published/NativeFlinkJob/pom.xml +++ /dev/null @@ -1,126 +0,0 @@ - - - 4.0.0 - - com.flinkdotnet - native-flink-kafka-job - 1.0.0 - jar - Native Flink Kafka Job - Native Apache Flink job to validate infrastructure setup - - - UTF-8 - 17 - ${java.version} - ${java.version} - 2.1.0 - 3.7.0 - 1.7.36 - - - - - - org.apache.flink - flink-streaming-java - ${flink.version} - provided - - - - org.apache.flink - flink-clients - ${flink.version} - provided - - - - - org.apache.flink - flink-connector-base - ${flink.version} - provided - - - - - org.apache.kafka - kafka-clients - 3.9.1 - - - - - org.slf4j - slf4j-api - ${slf4j.version} - provided - - - - - - - org.apache.maven.plugins - maven-compiler-plugin - 3.11.0 - - ${java.version} - ${java.version} - - - - - - org.apache.maven.plugins - maven-shade-plugin - 3.5.0 - - - package - - shade - - - - true - false - - - org.apache.flink:flink-shaded-force-shading - com.google.code.findbugs:jsr305 - org.slf4j:* - org.apache.logging.log4j:* - - - - - *:* - - META-INF/*.SF - META-INF/*.DSA - META-INF/*.RSA - - module-info.class - - META-INF/MANIFEST.MF - - - - - - - com.flinkdotnet.NativeKafkaJob - - - - - - - - - - \ No newline at end of file diff --git a/ReleasePackagesTesting.Published/NativeFlinkJob/src/main/java/com/flinkdotnet/NativeKafkaJob.java b/ReleasePackagesTesting.Published/NativeFlinkJob/src/main/java/com/flinkdotnet/NativeKafkaJob.java deleted file mode 100644 index 227f05ac..00000000 --- a/ReleasePackagesTesting.Published/NativeFlinkJob/src/main/java/com/flinkdotnet/NativeKafkaJob.java +++ /dev/null @@ -1,202 +0,0 @@ -package com.flinkdotnet; - -import org.apache.flink.streaming.api.datastream.DataStream; -import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; -import org.apache.kafka.clients.consumer.KafkaConsumer; -import org.apache.kafka.clients.producer.KafkaProducer; -import org.apache.kafka.common.serialization.StringDeserializer; -import org.apache.kafka.common.serialization.StringSerializer; - -import java.util.Collections; -import java.util.Properties; - -/** - * Native Apache Flink job to validate Aspire infrastructure setup. - * - * This job demonstrates a simple Kafka -> Transform -> Kafka pipeline using - * the legacy Kafka client API (same approach as FlinkJobRunner). - * - * Purpose: - * - Prove that Aspire's Flink cluster can execute standard Flink jobs - * - Validate Kafka connectivity with proper bootstrap servers - * - Use the same Kafka client approach as FlinkJobRunner for consistency - * - * Usage: - * Build: mvn clean package - * Submit to Flink: Upload JAR via REST API or Flink UI - * - * Configuration: - * Bootstrap servers, topics, and group ID are passed as command-line arguments - * or use defaults for LocalTesting environment. - */ -public class NativeKafkaJob { - - public static void main(String[] args) throws Exception { - // Parse command-line arguments with defaults for LocalTesting - final String bootstrapServers = getArgOrDefault(args, "--bootstrap-servers", "kafka:9093"); - final String inputTopic = getArgOrDefault(args, "--input-topic", "lt.native.input"); - final String outputTopic = getArgOrDefault(args, "--output-topic", "lt.native.output"); - final String groupId = getArgOrDefault(args, "--group-id", "native-flink-consumer"); - - System.out.println("========================================"); - System.out.println("Native Flink Kafka Job - Infrastructure Validation"); - System.out.println("========================================"); - System.out.println("Configuration:"); - System.out.println(" Bootstrap Servers: " + bootstrapServers); - System.out.println(" Input Topic: " + inputTopic); - System.out.println(" Output Topic: " + outputTopic); - System.out.println(" Group ID: " + groupId); - System.out.println("========================================"); - - // Create Flink execution environment - final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); - env.setParallelism(1); // Single parallelism for testing - - // Configure Kafka Source using legacy API (same as FlinkJobRunner) - Properties sourceProps = new Properties(); - sourceProps.put("bootstrap.servers", bootstrapServers); - sourceProps.put("group.id", groupId); - sourceProps.put("auto.offset.reset", "earliest"); - - System.out.println("โœ“ Kafka source configured with legacy Kafka client API"); - - // Configure Kafka Sink using legacy API (same as FlinkJobRunner) - Properties sinkProps = new Properties(); - sinkProps.put("bootstrap.servers", bootstrapServers); - - System.out.println("โœ“ Kafka sink configured with legacy Kafka client API"); - - // Build data stream pipeline with transformation - DataStream stream = env - .addSource(new KafkaStringSource(inputTopic, sourceProps)) - .name("Kafka Source") - .map(value -> { - String transformed = value.toUpperCase(); - System.out.println("[TRANSFORM] Input: '" + value + "' -> Output: '" + transformed + "'"); - return transformed; - }) - .name("Uppercase Transform"); - - // Write to Kafka sink - stream.addSink(new KafkaStringSink(outputTopic, sinkProps)).name("Kafka Sink"); - - System.out.println("โœ“ Pipeline configured: Kafka -> Uppercase Transform -> Kafka"); - System.out.println("Starting job execution..."); - - // Execute the Flink job - env.execute("Native Kafka Uppercase Job"); - } - - /** - * Get command-line argument value or return default. - */ - private static String getArgOrDefault(String[] args, String key, String defaultValue) { - for (int i = 0; i < args.length - 1; i++) { - if (args[i].equals(key)) { - return args[i + 1]; - } - } - return defaultValue; - } - - /** - * Legacy Kafka Source using Kafka Client API directly (same approach as FlinkJobRunner). - * This approach bundles the Kafka client in the JAR and avoids classloader issues. - */ - public static class KafkaStringSource implements org.apache.flink.streaming.api.functions.source.legacy.SourceFunction { - private final String topic; - private final Properties props; - private volatile boolean running = true; - - public KafkaStringSource(String topic, Properties props) { - this.topic = topic; - this.props = props; - } - - @Override - public void run(org.apache.flink.streaming.api.functions.source.legacy.SourceFunction.SourceContext ctx) throws Exception { - System.out.println("โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•"); - System.out.println("[KAFKA SOURCE] Starting consumer..."); - System.out.println(" - Topic: " + topic); - System.out.println(" - Bootstrap servers: " + props.getProperty("bootstrap.servers")); - System.out.println(" - Group ID: " + props.getProperty("group.id")); - System.out.println(" - Auto offset reset: " + props.getProperty("auto.offset.reset")); - System.out.println("โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•"); - - try (KafkaConsumer consumer = new KafkaConsumer<>(props, new StringDeserializer(), new StringDeserializer())) { - System.out.println("[KAFKA SOURCE] โœ“ Consumer created, subscribing to topic: " + topic); - consumer.subscribe(Collections.singletonList(topic)); - System.out.println("[KAFKA SOURCE] โœ“ Subscribed successfully, starting poll loop..."); - - int pollCount = 0; - int totalRecords = 0; - - while (running) { - var records = consumer.poll(java.time.Duration.ofMillis(500)); - pollCount++; - - if (records.count() > 0) { - System.out.println("[KAFKA SOURCE] Poll #" + pollCount + ": Received " + records.count() + " records"); - totalRecords += records.count(); - } else if (pollCount % 20 == 0) { - System.out.println("[KAFKA SOURCE] Poll #" + pollCount + ": Still polling, total records so far: " + totalRecords); - } - - for (var rec : records) { - synchronized (ctx.getCheckpointLock()) { - System.out.println("[KAFKA SOURCE] Collecting record: " + rec.value()); - ctx.collect(rec.value()); - } - } - } - - System.out.println("[KAFKA SOURCE] Stopped. Total records processed: " + totalRecords); - } catch (Exception e) { - System.err.println("[KAFKA SOURCE] โœ— ERROR: " + e.getClass().getName() + ": " + e.getMessage()); - e.printStackTrace(); - throw e; - } - } - - @Override - public void cancel() { - running = false; - } - } - - /** - * Legacy Kafka Sink using Kafka Client API directly (same approach as FlinkJobRunner). - * This approach bundles the Kafka client in the JAR and avoids classloader issues. - */ - public static class KafkaStringSink implements org.apache.flink.streaming.api.functions.sink.legacy.SinkFunction { - private final String topic; - private final Properties props; - private transient KafkaProducer producer; - - public KafkaStringSink(String topic, Properties props) { - this.topic = topic; - this.props = props; - } - - @Override - public void invoke(String value, org.apache.flink.streaming.api.functions.sink.legacy.SinkFunction.Context context) { - if (producer == null) { - System.out.println("โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•"); - System.out.println("[KAFKA SINK] Initializing producer..."); - System.out.println(" - Topic: " + topic); - System.out.println(" - Bootstrap servers: " + props.getProperty("bootstrap.servers")); - System.out.println("โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•"); - producer = new KafkaProducer<>(props, new StringSerializer(), new StringSerializer()); - System.out.println("[KAFKA SINK] โœ“ Producer created successfully"); - } - try { - producer.send(new org.apache.kafka.clients.producer.ProducerRecord<>(topic, value)); - System.out.println("[KAFKA SINK] Sent: " + value); - } catch (Exception e) { - System.err.println("[KAFKA SINK] โœ— ERROR sending message: " + e.getMessage()); - e.printStackTrace(); - throw new RuntimeException("Failed to send message to Kafka", e); - } - } - } -} \ No newline at end of file diff --git a/ReleasePackagesTesting.Published/README.md b/ReleasePackagesTesting.Published/README.md deleted file mode 100644 index aad2254c..00000000 --- a/ReleasePackagesTesting.Published/README.md +++ /dev/null @@ -1,117 +0,0 @@ -# Release Packages Testing - Published - -This folder validates that **published** packages from NuGet.org and Docker Hub work together correctly using Microsoft Aspire integration tests. - -## Purpose - -This is the **FINAL step** of the release workflow, run **AFTER** publishing to NuGet.org and Docker Hub to confirm the release is working. - -Tests: -- `FlinkDotnet` package from **NuGet.org** (not local packages) -- `flinkdotnet/jobgateway` image from **Docker Hub** (not local Docker image) - -## When to Use - -**Run this as the last step in the release workflow** after: -1. โœ… Publishing NuGet packages to NuGet.org -2. โœ… Publishing Docker image to Docker Hub - -This validates the published artifacts are compatible and working. - -## Structure - -- `ReleasePackagesTesting.Published.FlinkSqlAppHost` - Aspire AppHost using Docker Hub image -- `ReleasePackagesTesting.Published.IntegrationTests` - Integration tests using NuGet.org packages -- Same test scenarios as LocalTesting but using **published packages** - -## Usage - -### Using Aspire Integration Tests - -The testing is done through Microsoft Aspire's integration testing framework, identical to LocalTesting: - -```bash -# Pull Docker image from Docker Hub -docker pull flinkdotnet/jobgateway:VERSION - -# Tag as latest if needed -docker tag flinkdotnet/jobgateway:VERSION flinkdotnet/jobgateway:latest - -# Clear NuGet cache to force download from NuGet.org -dotnet nuget locals all --clear - -# Run Aspire integration tests -cd ReleasePackagesTesting.Published -dotnet test --configuration Release -``` - -### In Release Workflow (Recommended) - -Add as the final step after publishing: - -```yaml -validate-published-packages: - name: Validate Published Packages (Post-Release) - needs: [calculate-version, publish-nuget, publish-docker] - runs-on: ubuntu-latest - steps: - - name: Checkout code - uses: actions/checkout@v4 - - - name: Set up .NET - uses: actions/setup-dotnet@v4 - with: - dotnet-version: '9.0.x' - - - name: Set up JDK 17 - uses: actions/setup-java@v4 - with: - java-version: '17' - distribution: 'temurin' - - - name: Install Maven - uses: stCarolas/setup-maven@v4 - with: - maven-version: '3.9.6' - - - name: Pull Docker image from Docker Hub - run: | - docker pull flinkdotnet/jobgateway:${{ needs.calculate-version.outputs.new_version }} - docker tag flinkdotnet/jobgateway:${{ needs.calculate-version.outputs.new_version }} flinkdotnet/jobgateway:latest - - - name: Clear NuGet cache - run: dotnet nuget locals all --clear - - - name: Run Aspire integration tests - run: | - cd ReleasePackagesTesting.Published - dotnet test --configuration Release --verbosity normal -``` - -## What It Tests - -Uses Microsoft Aspire integration testing framework to: -1. Pull Docker image from Docker Hub -2. Start Aspire AppHost with published Docker image -3. Deploy Flink cluster, Kafka, and other infrastructure -4. Install NuGet packages from NuGet.org -5. Run integration tests against JobGateway -6. Verify all Flink job patterns work correctly -7. Validate end-to-end functionality with published packages - -## Validation - -โœ… All tests must pass for the release to be considered successful -โœ… Validates Docker image from Docker Hub works with Flink cluster -โœ… Validates NuGet package from NuGet.org has correct dependencies -โœ… Confirms published packages are compatible -โœ… Uses same Aspire testing infrastructure as LocalTesting - -## Difference from ReleasePackagesTesting - -- **ReleasePackagesTesting**: Tests local artifacts BEFORE publishing (pre-release validation) -- **ReleasePackagesTesting.Published** (this folder): Tests published packages AFTER publishing (post-release validation) - -Both use Microsoft Aspire integration testing framework: -- Pre-release prevents publishing broken packages -- Post-release confirms the release actually works diff --git a/ReleasePackagesTesting.Published/ReleasePackagesTesting.Published.FlinkSqlAppHost/GlobalSuppressions.cs b/ReleasePackagesTesting.Published/ReleasePackagesTesting.Published.FlinkSqlAppHost/GlobalSuppressions.cs deleted file mode 100644 index ad81e3a8..00000000 --- a/ReleasePackagesTesting.Published/ReleasePackagesTesting.Published.FlinkSqlAppHost/GlobalSuppressions.cs +++ /dev/null @@ -1,7 +0,0 @@ -// This file is used to configure SonarAnalyzer code analysis suppressions for the entire assembly. - -using System.Diagnostics.CodeAnalysis; - -// S3776: Cognitive Complexity - Program.cs is the application entry point with infrastructure setup -// The complexity is acceptable for this bootstrapping code and difficult to refactor meaningfully -[assembly: SuppressMessage("Major Code Smell", "S3776:Refactor this method to reduce its Cognitive Complexity", Justification = "Infrastructure setup code in Program.cs requires sequential configuration steps", Scope = "member", Target = "~M:$")] \ No newline at end of file diff --git a/ReleasePackagesTesting.Published/ReleasePackagesTesting.Published.FlinkSqlAppHost/MemoryCalculator.cs b/ReleasePackagesTesting.Published/ReleasePackagesTesting.Published.FlinkSqlAppHost/MemoryCalculator.cs deleted file mode 100644 index ffdf27f6..00000000 --- a/ReleasePackagesTesting.Published/ReleasePackagesTesting.Published.FlinkSqlAppHost/MemoryCalculator.cs +++ /dev/null @@ -1,140 +0,0 @@ -namespace LocalTesting.FlinkSqlAppHost; - -/// -/// Calculates appropriate memory allocations for Flink components based on available system memory. -/// Ensures compatibility with resource-constrained environments like GitHub Actions (2-4GB RAM). -/// -public static class MemoryCalculator -{ - private const long MinimumSystemMemoryMb = 4096; // 4GB minimum required - - /// - /// Gets total available physical memory in MB. - /// Returns 0 if detection fails (will use fallback values). - /// - public static long GetTotalPhysicalMemoryMb() - { - try - { - // Use GC.GetGCMemoryInfo for cross-platform memory detection - var gcMemoryInfo = GC.GetGCMemoryInfo(); - var totalMemoryBytes = gcMemoryInfo.TotalAvailableMemoryBytes; - - // Convert bytes to MB - var totalMemoryMb = totalMemoryBytes / (1024 * 1024); - - Console.WriteLine($"๐Ÿ“Š Detected system memory: {totalMemoryMb:N0} MB ({totalMemoryMb / 1024.0:F1} GB)"); - - return totalMemoryMb; - } - catch (Exception ex) - { - Console.WriteLine($"โš ๏ธ Unable to detect system memory: {ex.Message}"); - return 0; // Signal to use fallback values - } - } - - /// - /// Calculates appropriate TaskManager process memory based on available system RAM. - /// Uses conservative allocations to work on resource-constrained environments. - /// - /// Memory allocation strategy: - /// - โ‰ค8GB RAM: 1.5GB TaskManager (minimal, for CI/testing) - /// - 8-16GB RAM: 3GB TaskManager (standard development) - /// - โ‰ฅ16GB RAM: 4GB TaskManager (optimal) - /// - public static int CalculateTaskManagerProcessMemoryMb() - { - var totalMemoryMb = GetTotalPhysicalMemoryMb(); - - // Fallback: Use minimal allocation if detection fails - if (totalMemoryMb == 0) - { - Console.WriteLine("โš™๏ธ Using fallback TaskManager memory: 1536 MB (1.5GB) - Safe minimum"); - return 1536; // 1.5GB safe minimum for unknown environments - } - - // Calculate based on available RAM - var totalMemoryGb = totalMemoryMb / 1024.0; - - if (totalMemoryGb <= 8.0) - { - // Resource-constrained: GitHub Actions standard runners (4GB-7GB) - var allocated = 1536; // 1.5GB - Console.WriteLine($"โš™๏ธ TaskManager memory: {allocated} MB (1.5GB) - Resource-constrained mode (โ‰ค8GB RAM)"); - return allocated; - } - else if (totalMemoryGb <= 16.0) - { - // Standard development: Most developer machines (8-16GB) - var allocated = 3072; // 3GB - Console.WriteLine($"โš™๏ธ TaskManager memory: {allocated} MB (3GB) - Standard development mode (8-16GB RAM)"); - return allocated; - } - else - { - // Optimal: High-end machines (16GB+) - var allocated = 4096; // 4GB - Console.WriteLine($"โš™๏ธ TaskManager memory: {allocated} MB (4GB) - Optimal mode (โ‰ฅ16GB RAM)"); - return allocated; - } - } - - /// - /// Calculates appropriate JVM metaspace size based on TaskManager process memory. - /// Metaspace should be ~25% of process memory for class loading overhead. - /// - /// Allocation strategy: - /// - 1.5GB process: 384MB metaspace (minimal) - /// - 3GB process: 768MB metaspace (standard) - /// - 4GB+ process: 1024MB metaspace (optimal) - /// - public static int CalculateTaskManagerMetaspaceMb(int processMemoryMb) - { - // Metaspace = 25% of process memory (safe allocation for class loading) - var metaspaceMb = processMemoryMb / 4; - - // Apply bounds: 384MB minimum, 1024MB maximum - metaspaceMb = Math.Max(384, Math.Min(1024, metaspaceMb)); - - Console.WriteLine($"โš™๏ธ TaskManager metaspace: {metaspaceMb} MB (25% of process memory)"); - return metaspaceMb; - } - - /// - /// Calculates appropriate JobManager process memory. - /// JobManager is less memory-intensive than TaskManager (no data processing). - /// Fixed at 2GB for consistency across all environments. - /// - public static int CalculateJobManagerProcessMemoryMb() - { - const int jobManagerMemory = 2048; // 2GB - sufficient for all environments - Console.WriteLine($"โš™๏ธ JobManager memory: {jobManagerMemory} MB (2GB) - Fixed allocation"); - return jobManagerMemory; - } - - /// - /// Validates that system has minimum required memory for Flink operations. - /// - public static bool ValidateMinimumMemory() - { - var totalMemoryMb = GetTotalPhysicalMemoryMb(); - - // If detection fails, assume valid (fallback values will handle it) - if (totalMemoryMb == 0) - { - Console.WriteLine("โ„น๏ธ Unable to validate minimum memory - proceeding with fallback values"); - return true; - } - - if (totalMemoryMb < MinimumSystemMemoryMb) - { - Console.WriteLine($"โŒ Insufficient system memory: {totalMemoryMb}MB < {MinimumSystemMemoryMb}MB required"); - Console.WriteLine($" Flink requires at least 4GB RAM for stable operation"); - return false; - } - - Console.WriteLine($"โœ… System memory validation passed: {totalMemoryMb}MB โ‰ฅ {MinimumSystemMemoryMb}MB required"); - return true; - } -} \ No newline at end of file diff --git a/ReleasePackagesTesting.Published/ReleasePackagesTesting.Published.FlinkSqlAppHost/Ports.cs b/ReleasePackagesTesting.Published/ReleasePackagesTesting.Published.FlinkSqlAppHost/Ports.cs deleted file mode 100644 index 6ddc989d..00000000 --- a/ReleasePackagesTesting.Published/ReleasePackagesTesting.Published.FlinkSqlAppHost/Ports.cs +++ /dev/null @@ -1,46 +0,0 @@ -namespace LocalTesting.FlinkSqlAppHost; - -public static class Ports -{ - public const int JobManagerHostPort = 8081; // Host REST/UI port - public const int SqlGatewayHostPort = 8083; // SQL Gateway REST API port - public const int GatewayHostPort = 8080; // Gateway HTTP port - - // Kafka FIXED port configuration (no dynamic allocation) - // CRITICAL: Kafka dual listener setup with FIXED ports: - // - PLAINTEXT (port 9092): Internal container-to-container communication - // * Used by Flink TaskManager to connect: kafka:9092 - // * Advertised listener: kafka:9092 (keeps containers on container network) - // - PLAINTEXT_HOST (port 9093): External host machine access - // * Used by tests and external clients: localhost:9093 - // * Advertised listener: localhost:9093 (accessible from host) - // This ensures TaskManager always connects through kafka:9092 without dynamic port issues - public const int KafkaInternalPort = 9092; // Container network port - public const int KafkaExternalPort = 9093; // Host machine port - public const string KafkaContainerBootstrap = "kafka:9092"; // For Flink containers - public const string KafkaHostBootstrap = "localhost:9093"; // For tests/external access - - // Temporal Server ports - // CRITICAL: Temporal dual port configuration: - // - Port 7233: gRPC frontend for workflow/activity execution - // * Used by Temporalio SDK clients to connect - // * Primary interface for workflow submission and queries - // - Port 8088: HTTP UI for workflow monitoring - // * Web-based dashboard for observability - // * Displays workflow history, status, and execution details - public const int TemporalGrpcPort = 7233; // gRPC frontend port - public const int TemporalUIPort = 8088; // HTTP UI port - public const string TemporalHostAddress = "localhost:7233"; // For SDK clients - - // LearningCourse Infrastructure ports (only deployed when LEARNINGCOURSE=true) - // Redis - State management and caching for Day15 Capstone Project - public const int RedisHostPort = 6379; // Redis default port - public const string RedisHostAddress = "localhost:6379"; // For SDK clients - - // Observability Stack - Monitoring and metrics - // Note: Port 9090 is in Windows excluded port range (9038-9137) - // Ports 9250-9252 are used by Flink metrics (JobManager, TaskManager, SQL Gateway) - // Using 9253 for Prometheus to avoid conflicts - public const int PrometheusHostPort = 9253; // Prometheus metrics collection - public const int GrafanaHostPort = 3000; // Grafana visualization dashboard -} diff --git a/ReleasePackagesTesting.Published/ReleasePackagesTesting.Published.FlinkSqlAppHost/Program.cs b/ReleasePackagesTesting.Published/ReleasePackagesTesting.Published.FlinkSqlAppHost/Program.cs deleted file mode 100644 index 0a345ea5..00000000 --- a/ReleasePackagesTesting.Published/ReleasePackagesTesting.Published.FlinkSqlAppHost/Program.cs +++ /dev/null @@ -1,715 +0,0 @@ -// Configure container runtime - prefer Podman if available, fallback to Docker Desktop -using System.Diagnostics; -using LocalTesting.FlinkSqlAppHost; - -if (!ConfigureContainerRuntime()) -{ - return; -} - -LogConfiguredPorts(); -SetupEnvironment(); - -// Validate system memory and calculate dynamic allocations -Console.WriteLine("\n๐Ÿ” Analyzing system resources..."); -if (!MemoryCalculator.ValidateMinimumMemory()) -{ - Console.WriteLine("โŒ System does not meet minimum memory requirements for Flink"); - Console.WriteLine(" Please ensure at least 4GB RAM is available"); - return; -} - -Console.WriteLine($"โœ… Memory resources validated\n"); - -// Check if LearningCourse mode is enabled - enables additional infrastructure for learning exercises -var isLearningCourse = Environment.GetEnvironmentVariable("LEARNINGCOURSE")?.ToLower() == "true"; -if (isLearningCourse) -{ - Console.WriteLine("๐Ÿ“š LearningCourse mode enabled - Redis and Observability stack will be deployed"); -} - -var diagnosticsVerbose = Environment.GetEnvironmentVariable("DIAGNOSTICS_VERBOSE") == "1"; -if (diagnosticsVerbose) -{ - Console.WriteLine("[diag] DIAGNOSTICS_VERBOSE=1 enabled for LocalTesting.FlinkSqlAppHost startup diagnostics"); -} - -const string JavaOpenOptions = "--add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.net=ALL-UNNAMED --add-opens=java.base/java.io=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/sun.nio.ch=ALL-UNNAMED --add-opens=java.base/java.lang.reflect=ALL-UNNAMED --add-opens=java.base/java.text=ALL-UNNAMED --add-opens=java.base/java.time=ALL-UNNAMED --add-opens=java.base/java.util=ALL-UNNAMED --add-opens=java.base/java.util.concurrent=ALL-UNNAMED --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED --add-opens=java.base/java.util.concurrent.locks=ALL-UNNAMED"; - -var repoRoot = Path.GetFullPath(Path.Combine(AppContext.BaseDirectory, "../../../../..")); -var connectorsDir = Path.Combine(repoRoot, "LocalTesting", "connectors", "flink", "lib"); -var testLogsDir = Path.GetFullPath(Path.Combine(repoRoot, "LocalTesting", "test-logs")); - -// Ensure test-logs directory exists -Directory.CreateDirectory(testLogsDir); - -Environment.SetEnvironmentVariable("LOG_FILE_PATH", testLogsDir); -Console.WriteLine($"๐Ÿ“ Log files will be written to: {testLogsDir}"); - -var gatewayJarPath = FindGatewayJarPath(repoRoot); -if (diagnosticsVerbose && File.Exists(gatewayJarPath)) -{ - Console.WriteLine($"[diag] Gateway JAR configured: {gatewayJarPath}"); -} - -PrepareConnectorDirectory(connectorsDir, diagnosticsVerbose); - -var builder = DistributedApplication.CreateBuilder(args); - -// Detect LEARNINGCOURSE mode for conditional metrics configuration -var isLearningCourseMode = Environment.GetEnvironmentVariable("LEARNINGCOURSE")?.ToLower() == "true"; -Console.WriteLine($"๐Ÿ” Running in {(isLearningCourseMode ? "LEARNINGCOURSE" : "PRODUCTION")} mode"); -Console.WriteLine($" Metrics export: {(isLearningCourseMode ? "ENABLED (Flink + Kafka)" : "DISABLED")}"); - -// Configure Kafka - Aspire's AddKafka() uses KRaft mode by default (no Zookeeper) -// CRITICAL: Confluent Local image doesn't support JMX out of the box -// We need to use standard Kafka image or configure Confluent properly -var kafka = builder.AddKafka("kafka"); - -// Enable JMX for metrics export only in LEARNINGCOURSE mode -// PROBLEM: confluentinc/confluent-local may not respect KAFKA_JMX_* environment variables -// The image uses a custom startup script that might ignore these settings -if (isLearningCourseMode) -{ - kafka = kafka - .WithEnvironment("KAFKA_JMX_ENABLED", "true") // CRITICAL: Required for Confluent images - .WithEnvironment("KAFKA_JMX_PORT", "9101") - .WithEnvironment("KAFKA_JMX_HOSTNAME", "kafka") - .WithEnvironment("KAFKA_JMX_OPTS", - "-Dcom.sun.management.jmxremote " + - "-Dcom.sun.management.jmxremote.authenticate=false " + - "-Dcom.sun.management.jmxremote.ssl=false " + - "-Djava.rmi.server.hostname=kafka " + - "-Dcom.sun.management.jmxremote.rmi.port=9101 " + - "-Dcom.sun.management.jmxremote.host=0.0.0.0 " + // CRITICAL: Bind to all interfaces - "-Dcom.sun.management.jmxremote.local.only=false") - // CRITICAL: Confluent images also need KAFKA_OPTS for JMX - .WithEnvironment("KAFKA_OPTS", - "-Dcom.sun.management.jmxremote " + - "-Dcom.sun.management.jmxremote.authenticate=false " + - "-Dcom.sun.management.jmxremote.ssl=false " + - "-Djava.rmi.server.hostname=kafka " + - "-Dcom.sun.management.jmxremote.port=9101 " + - "-Dcom.sun.management.jmxremote.rmi.port=9101 " + - "-Dcom.sun.management.jmxremote.host=0.0.0.0 " + // CRITICAL: Bind to all interfaces - "-Dcom.sun.management.jmxremote.local.only=false"); - Console.WriteLine(" ๐Ÿ“Š Kafka JMX metrics enabled on port 9101"); - Console.WriteLine(" ๐Ÿ“Š Using both KAFKA_JMX_OPTS and KAFKA_OPTS for Confluent compatibility"); -} - -// Kafka JMX Exporter - only in LEARNINGCOURSE mode -// Uses the Bitnami JMX Exporter (latest version 1.5.0) as a standalone HTTP server -// Connects to Kafka's JMX endpoint (kafka:9101) and exposes metrics on port 5556 -// Note: Not using #pragma warning disable S1481 because kafkaExporter IS used by Prometheus -IResourceBuilder? kafkaExporter = null; - -if (isLearningCourseMode) -{ - Console.WriteLine(" ๐Ÿ“Š Deploying Kafka JMX Exporter for metrics collection"); - - var jmxConfigPath = Path.Combine(repoRoot, "LocalTesting", "jmx-exporter-kafka-config.yml"); - - if (File.Exists(jmxConfigPath)) - { - // Store kafkaExporter at broader scope for Prometheus reference - // This ensures both containers are on the same Docker network for DNS resolution - kafkaExporter = builder.AddContainer("kafka-exporter", "bitnami/jmx-exporter", "latest") - .WithBindMount(jmxConfigPath, "/opt/bitnami/jmx-exporter/exporter.yml", isReadOnly: true) - .WithHttpEndpoint(targetPort: 5556, name: "metrics") - .WithReference(kafka) // Keep reference for network connectivity - .WaitFor(kafka) // CRITICAL: Wait for Kafka container to be started - .WithEntrypoint("/bin/sh") - .WithArgs("-c", - // CRITICAL: Add 10-second delay to allow Kafka JMX port to be fully initialized - // Kafka container starts quickly but JMX port takes time to become available - "sleep 10 && java -jar /opt/bitnami/jmx-exporter/jmx_prometheus_standalone.jar 5556 /opt/bitnami/jmx-exporter/exporter.yml"); - - Console.WriteLine(" ๐Ÿ“Š Kafka JMX Exporter configured: kafka:9101 โ†’ :5556/metrics"); - Console.WriteLine(" โณ JMX Exporter will wait 10s after Kafka starts for JMX port initialization"); - } - else - { - Console.WriteLine(" โš ๏ธ Kafka JMX Exporter config not found, skipping deployment"); - } -} - -// Flink JobManager with named HTTP endpoint for service references -// All ports are hardcoded - no WaitFor dependencies needed for parallel startup -var jobManager = builder.AddContainer("flink-jobmanager", "flink:2.1.0-java17") - .WithHttpEndpoint(port: Ports.JobManagerHostPort, targetPort: 8081, name: "jm-http") - .WithContainerRuntimeArgs("--publish", $"{Ports.JobManagerHostPort}:8081") // Explicit port publishing for test access - .WithEnvironment("JOB_MANAGER_RPC_ADDRESS", "flink-jobmanager") - .WithEnvironment("LOG_FILE_PATH", "/opt/flink/test-logs"); // Set log path inside container - // REMOVED: .WithEnvironment("KAFKA_BOOTSTRAP", "kafka:9092") - // REASON: FlinkJobRunner.java prioritizes environment variable over job definition - // This caused jobs to use wrong Kafka address (localhost:17901 instead of kafka:9092) - // Job definitions explicitly provide bootstrapServers, so environment variable is not needed - -// Configure Prometheus metrics for JobManager only in LEARNINGCOURSE mode -if (isLearningCourseMode) -{ - var jobManagerFlinkProperties = - "metrics.reporters: prom\n" + - "metrics.reporter.prom.factory.class: org.apache.flink.metrics.prometheus.PrometheusReporterFactory\n" + - "metrics.reporter.prom.port: 9250\n" + - "metrics.reporter.prom.filterLabelValueCharacters: false\n"; - jobManager = jobManager.WithEnvironment("FLINK_PROPERTIES", jobManagerFlinkProperties); -} - -jobManager = jobManager - .WithEnvironment("JAVA_TOOL_OPTIONS", JavaOpenOptions) - .WithEnvironment("JAVA_TOOL_OPTIONS", JavaOpenOptions) - .WithBindMount(Path.Combine(connectorsDir, "flink-sql-connector-kafka-4.0.1-2.0.jar"), "/opt/flink/lib/flink-sql-connector-kafka-4.0.1-2.0.jar", isReadOnly: true) - .WithBindMount(Path.Combine(connectorsDir, "flink-json-2.1.0.jar"), "/opt/flink/lib/flink-json-2.1.0.jar", isReadOnly: true) - .WithBindMount(testLogsDir, "/opt/flink/test-logs"); // Mount host test-logs to container - -// Expose Prometheus metrics port only in LEARNINGCOURSE mode -if (isLearningCourseMode) -{ - jobManager = jobManager.WithHttpEndpoint(port: 9250, targetPort: 9250, name: "jm-metrics"); - Console.WriteLine(" ๐Ÿ“Š Flink JobManager Prometheus metrics exposed on port 9250"); -} - -// Mount Prometheus metrics JAR only in LEARNINGCOURSE mode -// NOTE: Config file is NOT mounted because FLINK_PROPERTIES provides full Prometheus config -if (isLearningCourseMode) -{ - var metricsJarPath = Path.Combine(repoRoot, "LocalTesting", "connectors", "flink", "metrics", "flink-metrics-prometheus-2.1.0.jar"); - if (File.Exists(metricsJarPath)) - { - jobManager = jobManager.WithBindMount(metricsJarPath, "/opt/flink/lib/flink-metrics-prometheus-2.1.0.jar", isReadOnly: true); - Console.WriteLine(" ๐Ÿ“Š Flink Prometheus metrics JAR mounted for JobManager"); - Console.WriteLine(" ๐Ÿ“Š JobManager Prometheus port: 9250 (via FLINK_PROPERTIES)"); - } -} - -jobManager = jobManager.WithArgs("jobmanager"); - -// Flink TaskManager with increased slots for parallel test execution (10 tests) -// CRITICAL: TaskManager must wait for both JobManager and Kafka to be ready -// - WaitFor(jobManager): Ensures TaskManager can register with JobManager -// - WaitFor(kafka): Ensures Kafka is ready before TaskManager starts processing jobs -var taskManagerBuilder = builder.AddContainer("flink-taskmanager", "flink:2.1.0-java17") - .WithEnvironment("JOB_MANAGER_RPC_ADDRESS", "flink-jobmanager") - .WithEnvironment("TASK_MANAGER_NUMBER_OF_TASK_SLOTS", "10") - .WithEnvironment("LOG_FILE_PATH", "/opt/flink/test-logs"); // Set log path inside container - -// Configure Prometheus metrics for TaskManager only in LEARNINGCOURSE mode -if (isLearningCourseMode) -{ - var taskManagerFlinkProperties = - "metrics.reporters: prom\n" + - "metrics.reporter.prom.factory.class: org.apache.flink.metrics.prometheus.PrometheusReporterFactory\n" + - "metrics.reporter.prom.port: 9251\n" + - "metrics.reporter.prom.filterLabelValueCharacters: false\n"; - taskManagerBuilder = taskManagerBuilder.WithEnvironment("FLINK_PROPERTIES", taskManagerFlinkProperties); -} - -taskManagerBuilder = taskManagerBuilder - .WithEnvironment("JAVA_TOOL_OPTIONS", JavaOpenOptions) - .WithEnvironment("JAVA_TOOL_OPTIONS", JavaOpenOptions) - .WithBindMount(Path.Combine(connectorsDir, "flink-sql-connector-kafka-4.0.1-2.0.jar"), "/opt/flink/lib/flink-sql-connector-kafka-4.0.1-2.0.jar", isReadOnly: true) - .WithBindMount(Path.Combine(connectorsDir, "flink-json-2.1.0.jar"), "/opt/flink/lib/flink-json-2.1.0.jar", isReadOnly: true) - .WithBindMount(testLogsDir, "/opt/flink/test-logs"); // Mount host test-logs to container - -// Expose Prometheus metrics port only in LEARNINGCOURSE mode -if (isLearningCourseMode) -{ - taskManagerBuilder = taskManagerBuilder.WithHttpEndpoint(port: 9251, targetPort: 9251, name: "tm-metrics"); - Console.WriteLine(" ๐Ÿ“Š Flink TaskManager Prometheus metrics exposed on port 9251"); -} - -var taskManager = taskManagerBuilder; - -// Mount Prometheus metrics JAR only in LEARNINGCOURSE mode -// NOTE: Config file is NOT mounted because FLINK_PROPERTIES provides full Prometheus config -if (isLearningCourseMode) -{ - var metricsJarPath = Path.Combine(repoRoot, "LocalTesting", "connectors", "flink", "metrics", "flink-metrics-prometheus-2.1.0.jar"); - if (File.Exists(metricsJarPath)) - { - taskManager = taskManager.WithBindMount(metricsJarPath, "/opt/flink/lib/flink-metrics-prometheus-2.1.0.jar", isReadOnly: true); - Console.WriteLine(" ๐Ÿ“Š Flink Prometheus metrics JAR mounted for TaskManager"); - Console.WriteLine(" ๐Ÿ“Š TaskManager Prometheus port: 9251 (via FLINK_PROPERTIES)"); - } -} - -taskManager = taskManager - .WithReference(kafka) - .WithArgs("taskmanager"); - -// Flink SQL Gateway - Enables SQL Gateway REST API for direct SQL submission -// SQL Gateway provides /v1/statements endpoint for executing SQL without JAR submission -// Required for Pattern5 (SqlPassthrough) which uses "gateway" execution mode -// Runs on port 8083 (separate from JobManager REST API on port 8081) -// CRITICAL: SQL Gateway must wait for JobManager to be ready before starting -var sqlGatewayBuilder = builder.AddContainer("flink-sql-gateway", "flink:2.1.0-java17") - .WithHttpEndpoint(port: Ports.SqlGatewayHostPort, targetPort: 8083, name: "sg-http") - .WithContainerRuntimeArgs("--publish", $"{Ports.SqlGatewayHostPort}:8083") // Explicit port publishing for test access - .WaitFor(jobManager); // Wait for JobManager to be ready before starting SQL Gateway - -// Build base Flink properties for SQL Gateway -// CRITICAL: sql-gateway.endpoint.rest.address is REQUIRED by Flink 2.1.0 -// Without it, SQL Gateway fails with "Missing required options are: address" -var baseSqlGatewayFlinkProperties = - "jobmanager.rpc.address: flink-jobmanager\n" + - "rest.address: flink-jobmanager\n" + - "rest.port: 8081\n" + - "sql-gateway.endpoint.rest.address: flink-sql-gateway\n" + - "sql-gateway.endpoint.rest.bind-address: 0.0.0.0\n" + - "sql-gateway.endpoint.rest.port: 8083\n" + - "sql-gateway.endpoint.rest.bind-port: 8083\n" + - "sql-gateway.endpoint.type: rest\n" + - "sql-gateway.session.check-interval: 60000\n" + - "sql-gateway.session.idle-timeout: 600000\n" + - "sql-gateway.worker.threads.max: 10\n" + - "env.java.opts.all: --add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.net=ALL-UNNAMED --add-opens=java.base/java.io=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/sun.nio.ch=ALL-UNNAMED --add-opens=java.base/java.lang.reflect=ALL-UNNAMED --add-opens=java.base/java.text=ALL-UNNAMED --add-opens=java.base/java.time=ALL-UNNAMED --add-opens=java.base/java.util=ALL-UNNAMED --add-opens=java.base/java.util.concurrent=ALL-UNNAMED --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED --add-opens=java.base/java.util.concurrent.locks=ALL-UNNAMED\n"; - -// Add Prometheus configuration for SQL Gateway in LEARNINGCOURSE mode -if (isLearningCourseMode) -{ - baseSqlGatewayFlinkProperties += - "metrics.reporters: prom\n" + - "metrics.reporter.prom.factory.class: org.apache.flink.metrics.prometheus.PrometheusReporterFactory\n" + - "metrics.reporter.prom.port: 9252\n" + - "metrics.reporter.prom.filterLabelValueCharacters: false\n"; -} - -sqlGatewayBuilder = sqlGatewayBuilder - .WithEnvironment("JOB_MANAGER_RPC_ADDRESS", "flink-jobmanager") - .WithEnvironment("LOG_FILE_PATH", "/opt/flink/test-logs") // Set log path inside container - .WithEnvironment("FLINK_PROPERTIES", baseSqlGatewayFlinkProperties); // SQL Gateway needs FLINK_PROPERTIES for sql-gateway.endpoint.rest.address - -sqlGatewayBuilder = sqlGatewayBuilder - .WithEnvironment("JAVA_TOOL_OPTIONS", JavaOpenOptions) - .WithEnvironment("JAVA_TOOL_OPTIONS", JavaOpenOptions) - .WithBindMount(Path.Combine(connectorsDir, "flink-sql-connector-kafka-4.0.1-2.0.jar"), "/opt/flink/lib/flink-sql-connector-kafka-4.0.1-2.0.jar", isReadOnly: true) - .WithBindMount(Path.Combine(connectorsDir, "flink-json-2.1.0.jar"), "/opt/flink/lib/flink-json-2.1.0.jar", isReadOnly: true) - .WithBindMount(testLogsDir, "/opt/flink/test-logs"); // Mount host test-logs to container - -// Expose Prometheus metrics port only in LEARNINGCOURSE mode -if (isLearningCourseMode) -{ - sqlGatewayBuilder = sqlGatewayBuilder.WithHttpEndpoint(port: 9252, targetPort: 9252, name: "sg-metrics"); - Console.WriteLine(" ๐Ÿ“Š Flink SQL Gateway Prometheus metrics exposed on port 9252"); -} - -var sqlGateway = sqlGatewayBuilder; - -// Mount Prometheus metrics JAR and config file only in LEARNINGCOURSE mode -if (isLearningCourseMode) -{ - var metricsJarPath = Path.Combine(repoRoot, "LocalTesting", "connectors", "flink", "metrics", "flink-metrics-prometheus-2.1.0.jar"); - if (File.Exists(metricsJarPath)) - { - sqlGateway = sqlGateway.WithBindMount(metricsJarPath, "/opt/flink/lib/flink-metrics-prometheus-2.1.0.jar", isReadOnly: true); - Console.WriteLine(" ๐Ÿ“Š Flink Prometheus metrics JAR mounted for SQL Gateway"); - } - - // Mount Flink config file with Prometheus metrics configuration - var flinkConfigPath = Path.Combine(repoRoot, "LocalTesting", "flink-conf-learningcourse.yaml"); - if (File.Exists(flinkConfigPath)) - { - sqlGateway = sqlGateway.WithBindMount(flinkConfigPath, "/opt/flink/conf/config.yaml", isReadOnly: true); - Console.WriteLine(" ๐Ÿ“Š Flink config file mounted for SQL Gateway (Prometheus metrics enabled)"); - } -} - -sqlGateway = sqlGateway.WithArgs("/opt/flink/bin/sql-gateway.sh", "start-foreground"); - -// Flink.JobGateway - Use Docker image instead of project reference -// CRITICAL: This validates the released Docker image works correctly -// Uses flinkdotnet/jobgateway:latest Docker image from release artifacts -#pragma warning disable S1481 // Gateway resource is created but not directly referenced - used via Aspire orchestration -var gateway = builder.AddContainer("flink-job-gateway", "flinkdotnet/jobgateway", "latest") - .WithHttpEndpoint(port: Ports.GatewayHostPort, targetPort: 8080, name: "gateway-http") - .WithContainerRuntimeArgs("--publish", $"{Ports.GatewayHostPort}:8080") // Explicit port publishing for test access - .WaitFor(jobManager) // Wait for JobManager to be ready before starting Job Gateway - .WithEnvironment("ASPNETCORE_URLS", "http://+:8080") - .WithEnvironment("FLINK_CONNECTOR_PATH", "/opt/connectors") - .WithEnvironment("LOG_FILE_PATH", "/opt/test-logs") - .WithBindMount(connectorsDir, "/opt/connectors", isReadOnly: true) - .WithBindMount(testLogsDir, "/opt/test-logs") - .WithReference(jobManager.GetEndpoint("jm-http")) - .WithReference(sqlGateway.GetEndpoint("sg-http")); -#pragma warning restore S1481 - -// Temporal PostgreSQL - Database for Temporal server -// CRITICAL: Must configure PostgreSQL WITHOUT password for Temporal auto-setup compatibility -// Temporal's auto-setup expects simple authentication (trust or no password) -var temporalDbServer = builder.AddPostgres("temporal-postgres") - .WithEnvironment("POSTGRES_HOST_AUTH_METHOD", "trust") // Allow trust authentication (no password) - .WithEnvironment("POSTGRES_DB", "temporal"); // Create temporal database on startup - // PostgreSQL will use default "postgres" user with trust authentication - -// Note: Temporal auto-setup will also create "temporal_visibility" database - -// Temporal Server - Official temporalio/auto-setup image from temporal.io -// Auto-setup handles schema creation and namespace setup automatically -// CRITICAL: Temporal provides durable workflow execution with: -// - Workflow state persistence and recovery -// - Activity retry and compensation patterns -// - Signal and query support for interactive workflows -// - Timer services for delayed/scheduled operations -// IMPORTANT: Using .WithReference() to get Aspire-injected connection details -// Aspire will inject: ConnectionStrings__temporal-postgres = "Host=...;Port=...;Username=postgres;Password=..." -// Temporal will parse this connection string and extract credentials automatically -builder.AddContainer("temporal-server", "temporalio/auto-setup", "1.22.4") - .WithHttpEndpoint(port: Ports.TemporalGrpcPort, targetPort: 7233, name: "temporal-grpc") - .WithHttpEndpoint(port: Ports.TemporalUIPort, targetPort: 8233, name: "temporal-ui") - .WithEnvironment("DB", "postgres12") - .WithEnvironment("POSTGRES_SEEDS", temporalDbServer.Resource.Name) // Use Aspire resource name for hostname - .WithEnvironment("DB_PORT", "5432") // Explicit port - .WithEnvironment("POSTGRES_USER", "postgres") // Default PostgreSQL user - .WithEnvironment("POSTGRES_PWD", "") // No password with trust authentication - .WithEnvironment("DBNAME", "temporal") // Specify database name for Temporal - .WithEnvironment("VISIBILITY_DBNAME", "temporal_visibility") // Specify visibility database name - .WithEnvironment("SKIP_DB_CREATE", "false") // Let Temporal create databases - .WithEnvironment("SKIP_DEFAULT_NAMESPACE_CREATION", "false") // Create default namespace - .WaitFor(temporalDbServer); // Wait for PostgreSQL to be ready - -// LearningCourse Infrastructure - Conditionally add Redis and Observability stack -if (isLearningCourse) -{ - // Redis - Required for Day15 Capstone Project exercises (Exercise151-154) - // Provides state management, caching, and distributed coordination capabilities - // CRITICAL: Use Bitnami Redis image with ALLOW_EMPTY_PASSWORD for learning exercises - // This allows exercises to connect with simple "localhost:port" format without authentication - #pragma warning disable S1481 // Redis resource is created but not directly referenced - used via connection string - var redis = builder.AddContainer("redis", "bitnami/redis", "latest") - .WithHttpEndpoint(port: Ports.RedisHostPort, targetPort: 6379, name: "redis-port") - .WithEnvironment("ALLOW_EMPTY_PASSWORD", "yes"); // Disable password requirement for learning - #pragma warning restore S1481 - - Console.WriteLine($"โœ… Redis deployed on port {Ports.RedisHostPort} for LearningCourse exercises"); - - // Observability Stack - Prometheus for metrics collection - // Required for monitoring and performance analysis exercises - var prometheusConfig = Path.Combine(repoRoot, "LocalTesting", "prometheus.yml"); - - // CRITICAL: Prometheus needs kafka-exporter dependency for Docker network DNS resolution - // Using WaitFor() establishes network connectivity and ensures containers can resolve each other - var prometheusBuilder = builder.AddContainer("prometheus", "prom/prometheus", "latest") - .WithHttpEndpoint(port: Ports.PrometheusHostPort, targetPort: 9090, name: "prometheus-http") - .WithBindMount(prometheusConfig, "/etc/prometheus/prometheus.yml", isReadOnly: true); - // NOTE: Using 172.17.0.1 (Docker bridge gateway) to reach host from container - // This is the most reliable cross-platform solution for standard Docker - - // Add kafka-exporter dependency if it was deployed - // WaitFor() ensures Prometheus and kafka-exporter are on the same Docker network for DNS resolution - if (kafkaExporter is not null) - { - prometheusBuilder = prometheusBuilder.WaitFor(kafkaExporter); - Console.WriteLine(" ๐Ÿ“Š Prometheus configured with kafka-exporter network dependency"); - } - - // Add explicit port mapping for Podman/Docker compatibility - if (Environment.GetEnvironmentVariable("ASPIRE_CONTAINER_RUNTIME") == "podman") - { - prometheusBuilder = prometheusBuilder - .WithContainerRuntimeArgs("--publish", $"{Ports.PrometheusHostPort}:9090"); - } - - var prometheus = prometheusBuilder; - - Console.WriteLine($"โœ… Prometheus deployed on port {Ports.PrometheusHostPort} for metrics collection"); - - // Observability Stack - Grafana for metrics visualization - // Provides dashboards and alerting for performance monitoring - // CRITICAL: Anonymous authentication enabled for learning environment (no login required) - // Complete anonymous access configuration to bypass login page entirely - var grafanaDashboardPath = Path.Combine(repoRoot, "LocalTesting", "grafana-kafka-dashboard.json"); - var grafanaProvisioningPath = Path.Combine(repoRoot, "LocalTesting", "grafana-provisioning-dashboards.yaml"); - - var grafanaBuilder = builder.AddContainer("grafana", "grafana/grafana", "latest") - .WithHttpEndpoint(port: Ports.GrafanaHostPort, targetPort: 3000, name: "grafana-http") - .WithEnvironment("GF_AUTH_ANONYMOUS_ENABLED", "true") // Enable anonymous access - .WithEnvironment("GF_AUTH_ANONYMOUS_ORG_ROLE", "Admin") // Grant admin role to anonymous users - .WithEnvironment("GF_AUTH_DISABLE_LOGIN_FORM", "true") // Completely hide login form - .WithEnvironment("GF_SECURITY_ADMIN_PASSWORD", "admin") // Keep admin account for advanced config - .WithEnvironment("GF_SECURITY_ADMIN_USER", "admin") - .WithEnvironment("GF_PATHS_PROVISIONING", "/etc/grafana/provisioning") // Enable provisioning - .WaitFor(prometheus); // Wait for Prometheus to be ready - - // Mount Kafka dashboard provisioning configuration - if (File.Exists(grafanaProvisioningPath)) - { - grafanaBuilder = grafanaBuilder - .WithBindMount(grafanaProvisioningPath, "/etc/grafana/provisioning/dashboards/dashboards.yaml", isReadOnly: true); - Console.WriteLine(" ๐Ÿ“Š Grafana dashboard provisioning configured"); - } - - // Mount Kafka dashboard if it exists - if (File.Exists(grafanaDashboardPath)) - { - grafanaBuilder = grafanaBuilder - .WithBindMount(grafanaDashboardPath, "/etc/grafana/provisioning/dashboards/kafka-dashboard.json", isReadOnly: true); - Console.WriteLine(" ๐Ÿ“Š Kafka metrics dashboard mounted for Grafana"); - } - - #pragma warning disable S1481 // Grafana resource is created but not directly referenced - accessed via browser - var grafana = grafanaBuilder; - #pragma warning restore S1481 - - Console.WriteLine($"โœ… Grafana deployed on port {Ports.GrafanaHostPort} for visualization"); -} - -#pragma warning disable S6966 // Await RunAsync instead - Required for Aspire testing framework compatibility -builder.Build().Run(); -#pragma warning restore S6966 - -static bool ConfigureContainerRuntime() -{ - // Try Docker Desktop first (preferred) - if (IsDockerAvailable()) - { - Console.WriteLine("โœ… Using Docker Desktop as container runtime"); - // No need to set ASPIRE_CONTAINER_RUNTIME - Docker is the default - return true; - } - - // Fallback to Podman if Docker is not available - if (IsPodmanAvailable()) - { - Console.WriteLine("โœ… Using Podman as container runtime (Docker not available)"); - Environment.SetEnvironmentVariable("ASPIRE_CONTAINER_RUNTIME", "podman"); - SetPodmanDockerHost(); - return true; - } - - Console.WriteLine("โŒ No container runtime found. Please install Docker Desktop or Podman."); - return false; -} - -static void LogConfiguredPorts() -{ - Console.WriteLine($"๐Ÿ“ Configured ports:"); - Console.WriteLine($" - Flink JobManager: {Ports.JobManagerHostPort}"); - Console.WriteLine($" - Gateway: {Ports.GatewayHostPort}"); - Console.WriteLine($" - Kafka: "); -} - -static void SetupEnvironment() -{ - Environment.SetEnvironmentVariable("ASPIRE_ALLOW_UNSECURED_TRANSPORT", "true"); - // CRITICAL: Set ASPNETCORE_URLS for Aspire Dashboard (required by Aspire SDK) - // This will be inherited by child processes, but we override it per-project using WithEnvironment() - // JobGateway explicitly sets ASPNETCORE_URLS=http://0.0.0.0:8080 via WithEnvironment() - Environment.SetEnvironmentVariable("ASPNETCORE_URLS", "http://localhost:15888"); - Environment.SetEnvironmentVariable("ASPIRE_DASHBOARD_OTLP_ENDPOINT_URL", "http://localhost:16686"); - Environment.SetEnvironmentVariable("ASPIRE_DASHBOARD_OTLP_HTTP_ENDPOINT_URL", "http://localhost:16687"); -} - -static string FindGatewayJarPath(string repoRoot) -{ - var candidates = new[] - { - Path.Combine(repoRoot, "FlinkDotNet", "Flink.JobGateway", "bin", "Release", "net9.0", "flink-ir-runner-java17.jar"), - Path.Combine(repoRoot, "FlinkDotNet", "Flink.JobGateway", "bin", "Debug", "net9.0", "flink-ir-runner-java17.jar") - }; - - return candidates.FirstOrDefault(File.Exists) ?? candidates[0]; -} - -static void PrepareConnectorDirectory(string connectorsDir, bool diagnosticsVerbose) -{ - try - { - Directory.CreateDirectory(connectorsDir); - if (diagnosticsVerbose) - { - Console.WriteLine($"[diag] Connector directory ready at {connectorsDir}"); - } - } - catch (Exception ex) - { - if (diagnosticsVerbose) - { - Console.WriteLine($"[diag][warn] Connector dir prep failed: {ex.Message}"); - } - } -} - -static bool IsPodmanAvailable() -{ - try - { - if (!IsPodmanCommandAvailable()) - { - return false; - } - - return IsPodmanMachineRunning(); - } - catch - { - return false; - } -} - -static bool IsPodmanCommandAvailable() -{ - var versionPsi = new ProcessStartInfo - { - FileName = "podman", - Arguments = "version", - RedirectStandardOutput = true, - RedirectStandardError = true, - UseShellExecute = false, - CreateNoWindow = true - }; - - using var versionProcess = Process.Start(versionPsi); - versionProcess?.WaitForExit(5000); - return versionProcess?.ExitCode == 0; -} - -static bool IsPodmanMachineRunning() -{ - var machinePsi = new ProcessStartInfo - { - FileName = "podman", - Arguments = "machine list --format \"{{.Running}}\"", - RedirectStandardOutput = true, - RedirectStandardError = true, - UseShellExecute = false, - CreateNoWindow = true - }; - - using var machineProcess = Process.Start(machinePsi); - if (machineProcess == null) - { - return false; - } - - var output = machineProcess.StandardOutput.ReadToEnd(); - machineProcess.WaitForExit(5000); - - if (output.Contains("true", StringComparison.OrdinalIgnoreCase)) - { - Console.WriteLine(" โ„น๏ธ Podman machine is running"); - return true; - } - - if (!string.IsNullOrWhiteSpace(output)) - { - Console.WriteLine(" โš ๏ธ Podman machine is not running. Start with: podman machine start"); - return false; - } - - // On Linux, Podman runs natively without a machine - Console.WriteLine(" โ„น๏ธ Podman detected (native mode)"); - return true; -} - -static bool IsDockerAvailable() -{ - try - { - // First check if Docker command is available - if (!IsDockerCommandAvailable()) - { - return false; - } - - // Then check if Docker daemon is running - return IsDockerDaemonRunning(); - } - catch - { - return false; - } -} - -static bool IsDockerCommandAvailable() -{ - var versionPsi = new ProcessStartInfo - { - FileName = "docker", - Arguments = "version", - RedirectStandardOutput = true, - RedirectStandardError = true, - UseShellExecute = false, - CreateNoWindow = true - }; - - using var versionProcess = Process.Start(versionPsi); - versionProcess?.WaitForExit(5000); - return versionProcess?.ExitCode == 0; -} - -static bool IsDockerDaemonRunning() -{ - var psi = new ProcessStartInfo - { - FileName = "docker", - Arguments = "info", - RedirectStandardOutput = true, - RedirectStandardError = true, - UseShellExecute = false, - CreateNoWindow = true - }; - - using var process = Process.Start(psi); - if (process == null) - { - return false; - } - - process.StandardOutput.ReadToEnd(); // Consume output to prevent blocking - var error = process.StandardError.ReadToEnd(); - process.WaitForExit(5000); - - if (process.ExitCode == 0) - { - Console.WriteLine(" โ„น๏ธ Docker daemon is running"); - return true; - } - - // Docker command exists but daemon is not running - if (error.Contains("Cannot connect to the Docker daemon", StringComparison.OrdinalIgnoreCase) || - error.Contains("Is the docker daemon running", StringComparison.OrdinalIgnoreCase)) - { - Console.WriteLine(" โš ๏ธ Docker is installed but daemon is not running. Start Docker Desktop."); - return false; - } - - Console.WriteLine($" โš ๏ธ Docker daemon check failed: {error}"); - return false; -} - -static void SetPodmanDockerHost() -{ - try - { - // Get Podman connection URI - var psi = new ProcessStartInfo - { - FileName = "podman", - Arguments = "system connection ls --format \"{{.URI}}\" --filter default=true", - RedirectStandardOutput = true, - RedirectStandardError = true, - UseShellExecute = false, - CreateNoWindow = true - }; - - using var process = Process.Start(psi); - if (process != null) - { - var output = process.StandardOutput.ReadToEnd().Trim(); - process.WaitForExit(5000); - - if (!string.IsNullOrWhiteSpace(output) && process.ExitCode == 0) - { - Environment.SetEnvironmentVariable("DOCKER_HOST", output); - Console.WriteLine($" โ„น๏ธ DOCKER_HOST set to: {output}"); - } - } - } - catch (Exception ex) - { - Console.WriteLine($" โš ๏ธ Could not set DOCKER_HOST: {ex.Message}"); - } -} - - diff --git a/ReleasePackagesTesting.Published/ReleasePackagesTesting.Published.FlinkSqlAppHost/ReleasePackagesTesting.Published.FlinkSqlAppHost.csproj b/ReleasePackagesTesting.Published/ReleasePackagesTesting.Published.FlinkSqlAppHost/ReleasePackagesTesting.Published.FlinkSqlAppHost.csproj deleted file mode 100644 index c648ba58..00000000 --- a/ReleasePackagesTesting.Published/ReleasePackagesTesting.Published.FlinkSqlAppHost/ReleasePackagesTesting.Published.FlinkSqlAppHost.csproj +++ /dev/null @@ -1,30 +0,0 @@ - - - - - Exe - net9.0 - enable - enable - true - ReleasePackagesTesting.Published.FlinkSqlAppHost - ReleasePackagesTesting.Published.FlinkSqlAppHost - false - false - - - - - - - - - - - - - - - - - diff --git a/ReleasePackagesTesting.Published/ReleasePackagesTesting.Published.IntegrationTests/AspireValidationTest.cs b/ReleasePackagesTesting.Published/ReleasePackagesTesting.Published.IntegrationTests/AspireValidationTest.cs deleted file mode 100644 index 10e8a83e..00000000 --- a/ReleasePackagesTesting.Published/ReleasePackagesTesting.Published.IntegrationTests/AspireValidationTest.cs +++ /dev/null @@ -1,258 +0,0 @@ -using System.Text.Json; -using Confluent.Kafka; - -namespace LocalTesting.ValidationTest; - -/// -/// Simple validation test to verify Aspire setup is working correctly -/// This test validates basic connectivity to all LocalTesting services -/// -public static class AspireValidationTest -{ - private static readonly HttpClient _httpClient = new(); - - // Note: This is a validation utility class, not an entry point - // Run this via: dotnet run --project LocalTesting.IntegrationTests - public static async Task ValidateAspireSetup(string[] args) - { - Console.WriteLine("๐Ÿงช Aspire + FlinkDotNet Setup Validation Test"); - Console.WriteLine("============================================"); - Console.WriteLine(); - - var allPassed = true; - - // Test 1: Kafka Connectivity - Console.WriteLine("1. Testing Kafka connectivity..."); - var kafkaResult = TestKafkaConnectivity(); - LogResult("Kafka", kafkaResult); - allPassed &= kafkaResult; - - // Test 2: Flink JobManager - Console.WriteLine("\n2. Testing Flink JobManager..."); - var flinkResult = await TestFlinkJobManager(); - LogResult("Flink JobManager", flinkResult); - allPassed &= flinkResult; - - // Test 3: Flink Job Gateway - Console.WriteLine("\n3. Testing Flink Job Gateway..."); - var gatewayResult = await TestFlinkGateway(); - LogResult("Flink Job Gateway", gatewayResult); - allPassed &= gatewayResult; - - // Final Results - Console.WriteLine("\n" + new string('=', 50)); - Console.WriteLine($"Overall Result: {(allPassed ? "โœ… SUCCESS" : "โŒ FAILURE")}"); - Console.WriteLine($"Services Validated: Kafka, Flink JobManager, Job Gateway"); - Console.WriteLine(); - - if (allPassed) - { - Console.WriteLine("๐ŸŽ‰ Aspire setup is working correctly!"); - Console.WriteLine(" You can now run integration tests and use the FlinkDotNet services."); - Console.WriteLine(); - Console.WriteLine("Service URLs:"); - Console.WriteLine(" โ€ข Aspire Dashboard: http://localhost:15888"); - Console.WriteLine(" โ€ข Flink JobManager UI: http://localhost:8081"); - Console.WriteLine(" โ€ข Flink Job Gateway: http://localhost:8080"); - Console.WriteLine(" โ€ข Kafka: localhost:9092"); - } - else - { - Console.WriteLine("โš ๏ธ Some services are not responding correctly."); - Console.WriteLine(" Please check that the LocalTesting.FlinkSqlAppHost is running."); - Console.WriteLine(" Run: dotnet run --project LocalTesting.FlinkSqlAppHost"); - } - - return allPassed ? 0 : 1; - } - - private static bool TestKafkaConnectivity() - { - try - { - var config = new AdminClientConfig - { - BootstrapServers = "localhost:9092", - SocketTimeoutMs = 5000 - }; - - using var admin = new AdminClientBuilder(config).Build(); - var metadata = admin.GetMetadata(TimeSpan.FromSeconds(3)); - - if (metadata?.Brokers?.Count > 0) - { - Console.WriteLine($" โœ… Connected successfully (brokers: {metadata.Brokers.Count})"); - return true; - } - else - { - Console.WriteLine(" โŒ No brokers found"); - return false; - } - } - catch (Exception ex) - { - Console.WriteLine($" โŒ Connection failed: {ex.Message}"); - return false; - } - } - - private static async Task TestFlinkJobManager() - { - try - { - var response = await _httpClient.GetAsync("http://localhost:8081/v1/overview"); - if (response.IsSuccessStatusCode) - { - var content = await response.Content.ReadAsStringAsync(); - var hasContent = !string.IsNullOrWhiteSpace(content); - Console.WriteLine($" โœ… Connected successfully (status: {response.StatusCode}, has content: {hasContent})"); - return true; - } - else - { - Console.WriteLine($" โŒ HTTP error: {response.StatusCode}"); - return false; - } - } - catch (Exception ex) - { - Console.WriteLine($" โŒ Connection failed: {ex.Message}"); - return false; - } - } - - private static async Task TestFlinkGateway() - { - try - { - var gatewayEndpoint = await DiscoverGatewayEndpointAsync(); - var response = await _httpClient.GetAsync($"{gatewayEndpoint}api/v1/health"); - if (response.IsSuccessStatusCode) - { - var content = await response.Content.ReadAsStringAsync(); - Console.WriteLine($" โœ… Connected successfully (status: {response.StatusCode})"); - if (!string.IsNullOrWhiteSpace(content)) - { - try - { - JsonSerializer.Deserialize(content); - Console.WriteLine($" Health response: {content}"); - } - catch - { - Console.WriteLine($" Response: {content}"); - } - } - return true; - } - else - { - Console.WriteLine($" โŒ HTTP error: {response.StatusCode}"); - return false; - } - } - catch (Exception ex) - { - Console.WriteLine($" โŒ Connection failed: {ex.Message}"); - return false; - } - } - - private static void LogResult(string serviceName, bool success) - { - var status = success ? "โœ… PASS" : "โŒ FAIL"; - Console.WriteLine($" {serviceName}: {status}"); - } - - /// - /// Discover the Gateway endpoint from Docker port mappings. - /// Gateway runs as a container with dynamic port allocation in Aspire. - /// - private static async Task DiscoverGatewayEndpointAsync() - { - try - { - var gatewayContainers = await RunDockerCommandAsync("ps --filter \"name=flink-job-gateway\" --format \"{{.Ports}}\""); - - if (!string.IsNullOrWhiteSpace(gatewayContainers)) - { - var lines = gatewayContainers.Split('\n', StringSplitOptions.RemoveEmptyEntries); - foreach (var line in lines) - { - var match = System.Text.RegularExpressions.Regex.Match(line, @"127\.0\.0\.1:(\d+)->(\d+)/tcp"); - if (match.Success) - { - var endpoint = $"http://localhost:{match.Groups[1].Value}/"; - Console.WriteLine($" ๐Ÿ” Discovered Gateway endpoint: {endpoint}"); - return endpoint; - } - } - } - - // Fallback to default port if discovery fails - Console.WriteLine($" โš ๏ธ Gateway endpoint discovery failed, using default: http://localhost:8080/"); - return "http://localhost:8080/"; - } - catch (Exception ex) - { - Console.WriteLine($" โš ๏ธ Gateway endpoint discovery error: {ex.Message}, using default port"); - return "http://localhost:8080/"; - } - } - - /// - /// Run a Docker or Podman command and return the output. - /// - private static async Task RunDockerCommandAsync(string arguments) - { - // Try Docker first, then Podman if Docker fails - var dockerOutput = await TryRunContainerCommandAsync("docker", arguments); - if (!string.IsNullOrWhiteSpace(dockerOutput)) - { - return dockerOutput; - } - - var podmanOutput = await TryRunContainerCommandAsync("podman", arguments); - return podmanOutput ?? string.Empty; - } - - /// - /// Try to run a container command (docker or podman). - /// - private static async Task TryRunContainerCommandAsync(string command, string arguments) - { - try - { - var psi = new System.Diagnostics.ProcessStartInfo - { - FileName = command, - Arguments = arguments, - RedirectStandardOutput = true, - RedirectStandardError = true, - UseShellExecute = false, - CreateNoWindow = true - }; - - using var process = System.Diagnostics.Process.Start(psi); - if (process == null) - { - return null; - } - - var output = await process.StandardOutput.ReadToEndAsync(); - await process.WaitForExitAsync(); - - if (process.ExitCode == 0 && !string.IsNullOrWhiteSpace(output)) - { - return output; - } - - return null; - } - catch - { - return null; - } - } -} diff --git a/ReleasePackagesTesting.Published/ReleasePackagesTesting.Published.IntegrationTests/AssemblyInfo.cs b/ReleasePackagesTesting.Published/ReleasePackagesTesting.Published.IntegrationTests/AssemblyInfo.cs deleted file mode 100644 index 93f6955e..00000000 --- a/ReleasePackagesTesting.Published/ReleasePackagesTesting.Published.IntegrationTests/AssemblyInfo.cs +++ /dev/null @@ -1,7 +0,0 @@ -using NUnit.Framework; - -// Enable parallel test execution at assembly level -// Tests will reuse the shared GlobalTestInfrastructure (Kafka + Flink + Gateway) -// Each test uses unique topics via TestContext.CurrentContext.Test.ID to avoid conflicts -[assembly: Parallelizable(ParallelScope.All)] -[assembly: LevelOfParallelism(10)] // Run up to 10 tests in parallel (more than test count for max parallelism) \ No newline at end of file diff --git a/ReleasePackagesTesting.Published/ReleasePackagesTesting.Published.IntegrationTests/EnvironmentVariableScope.cs b/ReleasePackagesTesting.Published/ReleasePackagesTesting.Published.IntegrationTests/EnvironmentVariableScope.cs deleted file mode 100644 index 7cc63ff1..00000000 --- a/ReleasePackagesTesting.Published/ReleasePackagesTesting.Published.IntegrationTests/EnvironmentVariableScope.cs +++ /dev/null @@ -1,26 +0,0 @@ - -namespace LocalTesting.IntegrationTests; - -internal sealed class EnvironmentVariableScope : IDisposable -{ - private readonly string _name; - private readonly string? _previousValue; - private readonly EnvironmentVariableTarget _target; - - public EnvironmentVariableScope(string name, string? value, EnvironmentVariableTarget target = EnvironmentVariableTarget.Process) - { - _name = name; - _target = target; - _previousValue = Environment.GetEnvironmentVariable(name, target); - Environment.SetEnvironmentVariable(name, value, target); - } - - public void Dispose() - { - Environment.SetEnvironmentVariable(_name, _previousValue, _target); - } -} - - - - diff --git a/ReleasePackagesTesting.Published/ReleasePackagesTesting.Published.IntegrationTests/FlinkDotNetJobs.cs b/ReleasePackagesTesting.Published/ReleasePackagesTesting.Published.IntegrationTests/FlinkDotNetJobs.cs deleted file mode 100644 index d10a7d29..00000000 --- a/ReleasePackagesTesting.Published/ReleasePackagesTesting.Published.IntegrationTests/FlinkDotNetJobs.cs +++ /dev/null @@ -1,279 +0,0 @@ -using Flink.JobBuilder.Models; -using FlinkDotNet.DataStream; -using Microsoft.Extensions.Configuration; -using Microsoft.Extensions.Logging.Abstractions; - -namespace LocalTesting.IntegrationTests; - -/// -/// Contains various FlinkDotNet job implementations for testing different features. -/// Uses modern DataStream API pattern: -/// 1. StreamExecutionEnvironment.GetExecutionEnvironment() -/// 2. environment.FromKafka() to create stream -/// 3. Stream transformation methods (.Map, .Filter, etc.) -/// 4. .SinkToKafka() to write output -/// 5. environment.ExecuteAsync() to submit job -/// -public static class FlinkDotNetJobs -{ - /// - /// Creates a simple DataStream job that converts input strings to uppercase - /// - public static async Task CreateUppercaseJob( - string inputTopic, - string outputTopic, - string kafka, - string jobName, - CancellationToken ct) - { - var environment = StreamExecutionEnvironment.GetExecutionEnvironment(); - - environment.FromKafka(inputTopic, kafka, groupId: "uppercase-job", startingOffsets: "earliest") - .Map(s => s.ToUpperInvariant()) - .SinkToKafka(outputTopic, kafka); - - var jobClient = await environment.ExecuteAsync(jobName, ct); - - return new JobSubmissionResult - { - Success = true, - JobId = jobClient.GetJobId(), - SubmittedAt = DateTime.UtcNow - }; - } - - /// - /// Creates a DataStream job with filtering - /// - public static async Task CreateFilterJob( - string inputTopic, - string outputTopic, - string kafka, - string jobName, - CancellationToken ct) - { - var environment = StreamExecutionEnvironment.GetExecutionEnvironment(); - - environment.FromKafka(inputTopic, kafka, groupId: "filter-job", startingOffsets: "earliest") - .Filter(s => !string.IsNullOrWhiteSpace(s)) - .SinkToKafka(outputTopic, kafka); - - var jobClient = await environment.ExecuteAsync(jobName, ct); - - return new JobSubmissionResult - { - Success = true, - JobId = jobClient.GetJobId(), - SubmittedAt = DateTime.UtcNow - }; - } - - /// - /// Creates a DataStream job with string splitting and concatenation - /// - public static async Task CreateSplitConcatJob( - string inputTopic, - string outputTopic, - string kafka, - string jobName, - CancellationToken ct) - { - var environment = StreamExecutionEnvironment.GetExecutionEnvironment(); - - environment.FromKafka(inputTopic, kafka, groupId: "splitconcat-job", startingOffsets: "earliest") - .FlatMap(s => s.Split(',')) - .Map(s => s + "-joined") - .SinkToKafka(outputTopic, kafka); - - var jobClient = await environment.ExecuteAsync(jobName, ct); - - return new JobSubmissionResult - { - Success = true, - JobId = jobClient.GetJobId(), - SubmittedAt = DateTime.UtcNow - }; - } - - /// - /// Creates a DataStream job with timer functionality - /// Note: Timer functionality needs special windowing - simplified version here - /// - public static async Task CreateTimerJob( - string inputTopic, - string outputTopic, - string kafka, - string jobName, - CancellationToken ct) - { - var environment = StreamExecutionEnvironment.GetExecutionEnvironment(); - - // Simple pass-through for timer test (actual timer logic would require more complex windowing) - environment.FromKafka(inputTopic, kafka, groupId: "timer-job", startingOffsets: "earliest") - .Map(s => $"[Timed] {s}") - .SinkToKafka(outputTopic, kafka); - - var jobClient = await environment.ExecuteAsync(jobName, ct); - - return new JobSubmissionResult - { - Success = true, - JobId = jobClient.GetJobId(), - SubmittedAt = DateTime.UtcNow - }; - } - - /// - /// Creates a SQL job that passes through data from input to output using Direct Flink SQL Gateway - /// - public static async Task CreateDirectFlinkSQLJob( - string inputTopic, - string outputTopic, - string kafka, - string sqlGatewayUrl, - string jobName, - CancellationToken ct) - { - var sqlStatements = new[] - { - $@"CREATE TABLE input ( `key` STRING, `value` STRING ) WITH ( - 'connector'='kafka', - 'topic'='{inputTopic}', - 'properties.bootstrap.servers'='{kafka}', - 'properties.group.id'='flink-sql-test', - 'scan.startup.mode'='earliest-offset', - 'format'='json' - )", - $@"CREATE TABLE output ( `key` STRING, `value` STRING ) WITH ( - 'connector'='kafka', - 'topic'='{outputTopic}', - 'properties.bootstrap.servers'='{kafka}', - 'format'='json' - )", - "INSERT INTO output SELECT `key`, `value` FROM input" - }; - - var jobDef = new JobDefinition - { - Source = new SqlSourceDefinition - { - Statements = new List(sqlStatements), - Mode = "streaming", - ExecutionMode = "gateway" - }, - Metadata = new JobMetadata - { - JobId = Guid.NewGuid().ToString(), - JobName = jobName, - CreatedAt = DateTime.UtcNow, - Version = "1.0" - } - }; - - var configuration = new ConfigurationBuilder() - .AddInMemoryCollection(new Dictionary - { - ["Flink:SqlGateway:BaseUrl"] = sqlGatewayUrl - }) - .Build(); - - var jobManager = new FlinkDotNet.JobGateway.Services.FlinkJobManager( - NullLogger.Instance, - configuration, - new HttpClient()); - - return await jobManager.SubmitJobAsync(jobDef); - } - - /// - /// Creates a SQL job that transforms data - /// - public static async Task CreateSqlTransformJob( - string inputTopic, - string outputTopic, - string kafka, - string sqlGatewayUrl, - string jobName, - CancellationToken ct) - { - var sqlStatements = new[] - { - $@"CREATE TABLE input ( `key` STRING, `value` STRING ) WITH ( - 'connector'='kafka', - 'topic'='{inputTopic}', - 'properties.bootstrap.servers'='{kafka}', - 'properties.group.id'='flink-sql-transform', - 'scan.startup.mode'='earliest-offset', - 'format'='json' - )", - $@"CREATE TABLE output ( `key` STRING, `transformed` STRING ) WITH ( - 'connector'='kafka', - 'topic'='{outputTopic}', - 'properties.bootstrap.servers'='{kafka}', - 'format'='json' - )", - "INSERT INTO output SELECT `key`, UPPER(`value`) as `transformed` FROM input" - }; - - var jobDef = new JobDefinition - { - Source = new SqlSourceDefinition - { - Statements = new List(sqlStatements), - Mode = "streaming", - ExecutionMode = "gateway" - }, - Metadata = new JobMetadata - { - JobId = Guid.NewGuid().ToString(), - JobName = jobName, - CreatedAt = DateTime.UtcNow, - Version = "1.0" - } - }; - - var configuration = new ConfigurationBuilder() - .AddInMemoryCollection(new Dictionary - { - ["Flink:SqlGateway:BaseUrl"] = sqlGatewayUrl - }) - .Build(); - - var jobManager = new FlinkDotNet.JobGateway.Services.FlinkJobManager( - NullLogger.Instance, - configuration, - new HttpClient()); - - return await jobManager.SubmitJobAsync(jobDef); - } - - /// - /// Creates a composite job that combines multiple operations - /// - public static async Task CreateCompositeJob( - string inputTopic, - string outputTopic, - string kafka, - string jobName, - CancellationToken ct) - { - var environment = StreamExecutionEnvironment.GetExecutionEnvironment(); - - environment.FromKafka(inputTopic, kafka, groupId: "composite-job", startingOffsets: "earliest") - .FlatMap(s => s.Split(',')) - .Map(s => s + "-tail") - .Map(s => s.ToUpperInvariant()) - .Filter(s => !string.IsNullOrWhiteSpace(s)) - .Map(s => $"[Processed] {s}") - .SinkToKafka(outputTopic, kafka); - - var jobClient = await environment.ExecuteAsync(jobName, ct); - - return new JobSubmissionResult - { - Success = true, - JobId = jobClient.GetJobId(), - SubmittedAt = DateTime.UtcNow - }; - } -} diff --git a/ReleasePackagesTesting.Published/ReleasePackagesTesting.Published.IntegrationTests/GatewayAllPatternsTests.cs b/ReleasePackagesTesting.Published/ReleasePackagesTesting.Published.IntegrationTests/GatewayAllPatternsTests.cs deleted file mode 100644 index 86689bfd..00000000 --- a/ReleasePackagesTesting.Published/ReleasePackagesTesting.Published.IntegrationTests/GatewayAllPatternsTests.cs +++ /dev/null @@ -1,506 +0,0 @@ -using System.Diagnostics; -using Confluent.Kafka; -using LocalTesting.FlinkSqlAppHost; -using NUnit.Framework; - -namespace LocalTesting.IntegrationTests; - -/// -/// Gateway-based tests for all 7 FlinkDotNet job patterns using FlinkDotNetJobs helpers. -/// These tests validate end-to-end job submission through the Gateway. -/// Tests run in parallel with 8 TaskManager slots available. -/// -[TestFixture] -[Parallelizable(ParallelScope.All)] -[Category("gateway-patterns")] -public class GatewayAllPatternsTests : LocalTestingTestBase -{ - private static readonly TimeSpan TestTimeout = TimeSpan.FromMinutes(2); - private static readonly TimeSpan JobRunTimeout = TimeSpan.FromSeconds(30); - private static readonly TimeSpan MessageTimeout = TimeSpan.FromSeconds(30); - - [Test] - public async Task Gateway_Pattern1_Uppercase_ShouldWork() - { - await RunGatewayPatternTest( - patternName: "Uppercase", - jobCreator: (input, output, kafka, ct) => - FlinkDotNetJobs.CreateUppercaseJob(input, output, kafka, "gateway-uppercase", ct), - inputMessages: new[] { "hello", "world" }, - expectedOutputCount: 2, - description: "Uppercase transformation via Gateway" - ); - } - - [Test] - public async Task Gateway_Pattern2_Filter_ShouldWork() - { - await RunGatewayPatternTest( - patternName: "Filter", - jobCreator: (input, output, kafka, ct) => - FlinkDotNetJobs.CreateFilterJob(input, output, kafka, "gateway-filter", ct), - inputMessages: new[] { "keep", "", "this", "", "data" }, - expectedOutputCount: 3, // Empty strings filtered out - description: "Filter operation via Gateway" - ); - } - - [Test] - public async Task Gateway_Pattern3_SplitConcat_ShouldWork() - { - await RunGatewayPatternTest( - patternName: "SplitConcat", - jobCreator: (input, output, kafka, ct) => - FlinkDotNetJobs.CreateSplitConcatJob(input, output, kafka, "gateway-splitconcat", ct), - inputMessages: new[] { "a,b" }, - expectedOutputCount: 1, // Split and concat produces 1 message - description: "Split and concat via Gateway" - ); - } - - [Test] - public async Task Gateway_Pattern4_Timer_ShouldWork() - { - await RunGatewayPatternTest( - patternName: "Timer", - jobCreator: (input, output, kafka, ct) => - FlinkDotNetJobs.CreateTimerJob(input, output, kafka, "gateway-timer", ct), - inputMessages: new[] { "timed1", "timed2" }, - expectedOutputCount: 2, - description: "Timer functionality via Gateway", - allowLongerProcessing: true - ); - } - - [Test] - public async Task Gateway_Pattern5_DirectFlinkSQL_ShouldWork() - { - var sqlGatewayUrl = await GetSqlGatewayEndpointAsync(); - await RunGatewayPatternTest( - patternName: "DirectFlinkSQL", - jobCreator: (input, output, kafka, ct) => - FlinkDotNetJobs.CreateDirectFlinkSQLJob(input, output, kafka, sqlGatewayUrl, "gateway-direct-flink-sql", ct), - inputMessages: new[] { "{\"key\":\"k1\",\"value\":\"v1\"}" }, - expectedOutputCount: 1, - description: "Direct Flink SQL via Gateway", - usesJson: true - ); - } - - [Test] - public async Task Gateway_Pattern6_SqlTransform_ShouldWork() - { - var sqlGatewayUrl = await GetSqlGatewayEndpointAsync(); - await RunGatewayPatternTest( - patternName: "SqlTransform", - jobCreator: (input, output, kafka, ct) => - FlinkDotNetJobs.CreateSqlTransformJob(input, output, kafka, sqlGatewayUrl, "gateway-sql-transform", ct), - inputMessages: new[] { "{\"key\":\"k1\",\"value\":\"test\"}" }, - expectedOutputCount: 1, - description: "SQL transformation via Gateway", - usesJson: true - ); - } - - [Test] - public async Task Gateway_Pattern7_Composite_ShouldWork() - { - await RunGatewayPatternTest( - patternName: "Composite", - jobCreator: (input, output, kafka, ct) => - FlinkDotNetJobs.CreateCompositeJob(input, output, kafka, "gateway-composite", ct), - inputMessages: new[] { "test,data" }, - expectedOutputCount: 1, // Split and concat produces 1 message - description: "Composite operations via Gateway", - allowLongerProcessing: true - ); - } - - #region Test Infrastructure - - private async Task RunGatewayPatternTest( - string patternName, - Func> jobCreator, - string[] inputMessages, - int expectedOutputCount, - string description, - bool allowLongerProcessing = false, - bool usesJson = false) - { - var inputTopic = $"lt.gw.{patternName.ToLowerInvariant()}.input.{TestContext.CurrentContext.Test.ID}"; - var outputTopic = $"lt.gw.{patternName.ToLowerInvariant()}.output.{TestContext.CurrentContext.Test.ID}"; - - TestPrerequisites.EnsureDockerAvailable(); - - var baseToken = TestContext.CurrentContext.CancellationToken; - using var testTimeout = new CancellationTokenSource(TestTimeout); - using var linkedCts = CancellationTokenSource.CreateLinkedTokenSource(baseToken, testTimeout.Token); - var ct = linkedCts.Token; - - TestContext.WriteLine($"๐Ÿš€ Starting Gateway Pattern Test: {patternName}"); - TestContext.WriteLine($"๐Ÿ“ Description: {description}"); - var stopwatch = Stopwatch.StartNew(); - - try - { - // Skip health check - global setup already validated everything - // Create topics immediately - TestContext.WriteLine($"๐Ÿ“ Creating topics: {inputTopic} -> {outputTopic}"); - await CreateTopicAsync(inputTopic, 1); - await CreateTopicAsync(outputTopic, 1); - - // Submit job using FlinkDotNetJobs helper - // Use Kafka container IP for Flink jobs (container-to-container communication) - // Test producers/consumers use host connection (host-to-container via port mapping) - TestContext.WriteLine($"๐Ÿ”ง Creating and submitting {patternName} job..."); - TestContext.WriteLine($"๐Ÿ“ก Kafka bootstrap (host): {KafkaConnectionString}"); - TestContext.WriteLine($"๐Ÿ“ก Kafka bootstrap (Flink): {GlobalTestInfrastructure.KafkaContainerIpForFlink}"); - TestContext.WriteLine($"๐Ÿ“ Input topic: {inputTopic}"); - TestContext.WriteLine($"๐Ÿ“ Output topic: {outputTopic}"); - - var submitResult = await jobCreator(inputTopic, outputTopic, GlobalTestInfrastructure.KafkaContainerIpForFlink!, ct); - - TestContext.WriteLine($"๐Ÿ“Š Job submission: success={submitResult.Success}, jobId={submitResult.FlinkJobId}"); - - // If job submission failed, retrieve detailed diagnostics - if (!submitResult.Success) - { - TestContext.WriteLine("โš ๏ธ Job submission failed - retrieving Flink diagnostics..."); - var flinkEndpoint = await GetFlinkJobManagerEndpointAsync(); - var diagnostics = await GetFlinkJobDiagnosticsAsync(flinkEndpoint, submitResult.FlinkJobId); - TestContext.WriteLine(diagnostics); - } - - Assert.That(submitResult.Success, Is.True, $"Job must submit successfully. Error: {submitResult.ErrorMessage}"); - - // Wait for job to be running - var gatewayBase = $"http://localhost:{Ports.GatewayHostPort}/"; - await WaitForJobRunningViaGatewayAsync(gatewayBase, submitResult.FlinkJobId!, JobRunTimeout, ct); - TestContext.WriteLine("โœ… Job is RUNNING"); - - // Debug: Check job status immediately to verify it's actually running - await LogJobStatusViaGatewayAsync(gatewayBase, submitResult.FlinkJobId!, "Immediately after RUNNING check"); - - // Produce test messages immediately - job is already running - TestContext.WriteLine($"๐Ÿ“ค Producing {inputMessages.Length} messages..."); - await ProduceMessagesAsync(inputTopic, inputMessages, ct, usesJson); - - // Consume and verify (reduced timeout for faster tests) - var consumeTimeout = allowLongerProcessing ? TimeSpan.FromSeconds(45) : MessageTimeout; - var consumed = await ConsumeMessagesAsync(outputTopic, expectedOutputCount, consumeTimeout, ct); - - TestContext.WriteLine($"๐Ÿ“Š Consumed {consumed.Count} messages (expected: {expectedOutputCount})"); - - // Assert - use GreaterThanOrEqualTo to be more forgiving - Assert.That(consumed.Count, Is.GreaterThanOrEqualTo(expectedOutputCount), - $"Should consume at least {expectedOutputCount} messages"); - - stopwatch.Stop(); - TestContext.WriteLine($"โœ… {patternName} test completed successfully in {stopwatch.Elapsed.TotalSeconds:F1}s"); - } - catch (Exception ex) - { - stopwatch.Stop(); - TestContext.WriteLine($"โŒ {patternName} test failed after {stopwatch.Elapsed.TotalSeconds:F1}s: {ex.Message}"); - throw; - } - } - - private async Task ProduceMessagesAsync(string topic, string[] messages, CancellationToken ct, bool usesJson = false) - { - if (usesJson) - { - // For JSON messages, produce with null key - using var producer = new ProducerBuilder(new ProducerConfig - { - BootstrapServers = KafkaConnectionString, - EnableIdempotence = true, - Acks = Acks.All, - LingerMs = 5, - BrokerAddressFamily = BrokerAddressFamily.V4, - SecurityProtocol = SecurityProtocol.Plaintext - }) - .SetLogHandler((_, _) => { }) - .SetErrorHandler((_, _) => { }) - .Build(); - - foreach (var message in messages) - { - await producer.ProduceAsync(topic, new Message { Value = message }, ct); - } - - producer.Flush(TimeSpan.FromSeconds(10)); - } - else - { - // For simple messages, use string key - using var producer = new ProducerBuilder(new ProducerConfig - { - BootstrapServers = KafkaConnectionString, - EnableIdempotence = true, - Acks = Acks.All, - LingerMs = 5, - BrokerAddressFamily = BrokerAddressFamily.V4, - SecurityProtocol = SecurityProtocol.Plaintext - }) - .SetLogHandler((_, _) => { }) - .SetErrorHandler((_, _) => { }) - .Build(); - - for (int i = 0; i < messages.Length; i++) - { - await producer.ProduceAsync(topic, new Message - { - Key = $"key-{i}", - Value = messages[i] - }, ct); - } - - producer.Flush(TimeSpan.FromSeconds(10)); - } - - TestContext.WriteLine($"โœ… Produced {messages.Length} messages to {topic}"); - } - - private Task> ConsumeMessagesAsync(string topic, int expectedCount, TimeSpan timeout, CancellationToken ct) - { - var config = new ConsumerConfig - { - BootstrapServers = KafkaConnectionString, - GroupId = $"lt-gw-pattern-consumer-{Guid.NewGuid()}", - AutoOffsetReset = AutoOffsetReset.Earliest, - EnableAutoCommit = false, - BrokerAddressFamily = BrokerAddressFamily.V4, - SecurityProtocol = SecurityProtocol.Plaintext - }; - - var messages = new List(); - using var consumer = new ConsumerBuilder(config) - .SetLogHandler((_, _) => { }) - .SetErrorHandler((_, _) => { }) - .Build(); - - consumer.Subscribe(topic); - var deadline = DateTime.UtcNow.Add(timeout); - - TestContext.WriteLine($"๐Ÿ“ฅ Starting consumption from '{topic}' (timeout: {timeout.TotalSeconds}s)"); - - while (DateTime.UtcNow < deadline && messages.Count < expectedCount && !ct.IsCancellationRequested) - { - var consumeResult = consumer.Consume(TimeSpan.FromSeconds(1)); - if (consumeResult != null) - { - messages.Add(consumeResult.Message.Value); - TestContext.WriteLine($" ๐Ÿ“ฅ Consumed message {messages.Count}: {consumeResult.Message.Value}"); - } - } - - return Task.FromResult(messages); - } - - private static async Task WaitForJobRunningViaGatewayAsync(string gatewayBaseUrl, string jobId, TimeSpan timeout, CancellationToken ct) - { - using var http = new HttpClient(); - var deadline = DateTime.UtcNow.Add(timeout); - var attempt = 0; - - TestContext.WriteLine($"โณ Waiting for job {jobId} to reach RUNNING state..."); - - // For SQL Gateway jobs, also check Flink REST API directly with converted job ID (without hyphens) - // AND check for any RUNNING jobs as fallback since SQL Gateway creates different job IDs - var flinkJobId = jobId.Replace("-", ""); - var flinkEndpoint = await GetFlinkJobManagerEndpointAsync(); - - while (DateTime.UtcNow < deadline && !ct.IsCancellationRequested) - { - attempt++; - try - { - // Try Gateway API first - if (await TryCheckGatewayJobStatusAsync(http, gatewayBaseUrl, jobId, attempt, ct)) - { - return; - } - - // Gateway API failed, try Flink REST API directly with converted job ID - if (await TryCheckFlinkJobStatusAsync(http, flinkEndpoint, flinkJobId, attempt, ct)) - { - return; - } - - // Fallback: Check if ANY job is RUNNING (for SQL Gateway jobs that have different IDs) - if (await TryCheckAnyRunningJobAsync(http, flinkEndpoint, attempt, ct)) - { - return; - } - } - catch (HttpRequestException ex) - { - TestContext.WriteLine($" โณ Attempt {attempt}: Request failed - {ex.Message}"); - } - - await Task.Delay(500, ct); // Reduced from 1000ms to 500ms - } - - throw new TimeoutException($"Job {jobId} did not reach RUNNING state within {timeout.TotalSeconds:F0}s"); - } - - private static async Task TryCheckGatewayJobStatusAsync(HttpClient http, string gatewayBaseUrl, string jobId, int attempt, CancellationToken ct) - { - var resp = await http.GetAsync($"{gatewayBaseUrl}api/v1/jobs/{jobId}/status", ct); - if (!resp.IsSuccessStatusCode) - { - return false; - } - - var content = await resp.Content.ReadAsStringAsync(ct); - if (content.Contains("RUNNING", StringComparison.OrdinalIgnoreCase) || - content.Contains("FINISHED", StringComparison.OrdinalIgnoreCase)) - { - TestContext.WriteLine($"โœ… Job {jobId} is running/finished after {attempt} attempt(s)"); - return true; - } - - if (content.Contains("FAILED", StringComparison.OrdinalIgnoreCase) || - content.Contains("CANCELED", StringComparison.OrdinalIgnoreCase)) - { - throw new InvalidOperationException($"Job {jobId} failed or was canceled: {content}"); - } - - TestContext.WriteLine($" โณ Attempt {attempt}: Job status from Gateway - {content}"); - return false; - } - - private static async Task TryCheckFlinkJobStatusAsync(HttpClient http, string flinkEndpoint, string flinkJobId, int attempt, CancellationToken ct) - { - var flinkResp = await http.GetAsync($"{flinkEndpoint}jobs/{flinkJobId}", ct); - if (!flinkResp.IsSuccessStatusCode) - { - return false; - } - - var flinkContent = await flinkResp.Content.ReadAsStringAsync(ct); - if (flinkContent.Contains("\"state\":\"RUNNING\"", StringComparison.OrdinalIgnoreCase) || - flinkContent.Contains("\"state\":\"FINISHED\"", StringComparison.OrdinalIgnoreCase)) - { - TestContext.WriteLine($"โœ… Job {flinkJobId} is running/finished after {attempt} attempt(s) (via Flink REST API)"); - return true; - } - - if (flinkContent.Contains("\"state\":\"FAILED\"", StringComparison.OrdinalIgnoreCase) || - flinkContent.Contains("\"state\":\"CANCELED\"", StringComparison.OrdinalIgnoreCase)) - { - throw new InvalidOperationException($"Job {flinkJobId} failed or was canceled: {flinkContent}"); - } - - TestContext.WriteLine($" โณ Attempt {attempt}: Job status from Flink API - {flinkContent}"); - return false; - } - - private static async Task TryCheckAnyRunningJobAsync(HttpClient http, string flinkEndpoint, int attempt, CancellationToken ct) - { - var allJobsResp = await http.GetAsync($"{flinkEndpoint}jobs", ct); - if (!allJobsResp.IsSuccessStatusCode) - { - TestContext.WriteLine($" โณ Attempt {attempt}: No RUNNING jobs found"); - return false; - } - - var allJobsContent = await allJobsResp.Content.ReadAsStringAsync(ct); - if (allJobsContent.Contains("\"status\":\"RUNNING\"", StringComparison.OrdinalIgnoreCase)) - { - TestContext.WriteLine($"โœ… Found RUNNING job after {attempt} attempt(s) (fallback check)"); - return true; - } - - TestContext.WriteLine($" โณ Attempt {attempt}: No RUNNING jobs found"); - return false; - } - - - /// - /// Get SQL Gateway endpoint URL from Docker port mappings. - /// SQL Gateway runs on container port 8083, mapped to dynamic host port. - /// - private static async Task GetSqlGatewayEndpointAsync() - { - try - { - var sqlGatewayContainers = await RunDockerCommandAsync("ps --filter \"name=flink-sql-gateway\" --format \"{{.Ports}}\""); - var lines = sqlGatewayContainers.Split('\n', StringSplitOptions.RemoveEmptyEntries); - - foreach (var line in lines) - { - // Look for port mapping to 8083 (SQL Gateway's default listener port) - if (line.Contains("->8083/tcp")) - { - var match = System.Text.RegularExpressions.Regex.Match(line, @"127\.0\.0\.1:(\d+)->8083"); - if (match.Success) - { - return $"http://localhost:{match.Groups[1].Value}/"; - } - } - } - - // Fallback to configured port if discovery fails - return $"http://localhost:{Ports.SqlGatewayHostPort}/"; - } - catch (Exception ex) - { - TestContext.WriteLine($"โš ๏ธ SQL Gateway endpoint discovery failed: {ex.Message}, using configured port {Ports.SqlGatewayHostPort}"); - return $"http://localhost:{Ports.SqlGatewayHostPort}/"; - } - } - - private static async Task RunDockerCommandAsync(string arguments) - { - // Try Docker first, then Podman if Docker fails or returns empty - var dockerOutput = await TryRunContainerCommandAsync("docker", arguments); - if (!string.IsNullOrWhiteSpace(dockerOutput)) - { - return dockerOutput; - } - - // Fallback to Podman if Docker didn't return results - var podmanOutput = await TryRunContainerCommandAsync("podman", arguments); - return podmanOutput ?? string.Empty; - } - - private static async Task TryRunContainerCommandAsync(string command, string arguments) - { - try - { - var psi = new System.Diagnostics.ProcessStartInfo - { - FileName = command, - Arguments = arguments, - RedirectStandardOutput = true, - RedirectStandardError = true, - UseShellExecute = false, - CreateNoWindow = true - }; - - using var process = System.Diagnostics.Process.Start(psi); - if (process == null) - { - return null; - } - - var output = await process.StandardOutput.ReadToEndAsync(); - await process.WaitForExitAsync(); - - if (process.ExitCode == 0 && !string.IsNullOrWhiteSpace(output)) - { - return output; - } - - return null; - } - catch - { - return null; - } - } - - #endregion -} diff --git a/ReleasePackagesTesting.Published/ReleasePackagesTesting.Published.IntegrationTests/GlobalTestInfrastructure.cs b/ReleasePackagesTesting.Published/ReleasePackagesTesting.Published.IntegrationTests/GlobalTestInfrastructure.cs deleted file mode 100644 index ec872bd8..00000000 --- a/ReleasePackagesTesting.Published/ReleasePackagesTesting.Published.IntegrationTests/GlobalTestInfrastructure.cs +++ /dev/null @@ -1,914 +0,0 @@ -using System.Diagnostics; -using Aspire.Hosting; -using Aspire.Hosting.ApplicationModel; -using Aspire.Hosting.Testing; -using LocalTesting.FlinkSqlAppHost; -using Microsoft.Extensions.Configuration; -using Microsoft.Extensions.DependencyInjection; -using NUnit.Framework; - -namespace LocalTesting.IntegrationTests; - -/// -/// Assembly-level test infrastructure setup for LocalTesting integration tests. -/// Initializes infrastructure ONCE for all tests to dramatically reduce startup overhead. -/// Infrastructure includes: Docker, Kafka, Flink JobManager, Flink TaskManager, and Gateway. -/// -[SetUpFixture] -public class GlobalTestInfrastructure -{ - - private static readonly TimeSpan DefaultTimeout = TimeSpan.FromSeconds(60); - - public static DistributedApplication? AppHost { get; private set; } - public static string? KafkaConnectionString { get; private set; } - public static string? KafkaConnectionStringFromConfig { get; private set; } - public static string? KafkaContainerIpForFlink { get; private set; } // Kafka IP for Flink jobs (e.g., "172.17.0.2:9093") - public static string? TemporalEndpoint { get; private set; } // Discovered Temporal endpoint with dynamic port - - [OneTimeSetUp] - public async Task GlobalSetUp() - { - Console.WriteLine("๐ŸŒ ========================================"); - Console.WriteLine("๐ŸŒ GLOBAL TEST INFRASTRUCTURE SETUP START"); - Console.WriteLine("๐ŸŒ ========================================"); - Console.WriteLine($"๐ŸŒ This infrastructure will be shared across ALL test classes"); - Console.WriteLine($"๐ŸŒ Estimated startup time: 3-4 minutes (one-time cost)"); - - var sw = Stopwatch.StartNew(); - - try - { - // Clean up test-logs directory from previous test runs - CleanupTestLogsDirectory(); - - // Capture initial network state before infrastructure starts - await NetworkDiagnostics.CaptureNetworkDiagnosticsAsync("0-before-setup"); - - // Configure JAR path for Gateway - ConfigureGatewayJarPath(); - - // Validate Docker environment - await ValidateDockerEnvironmentAsync(); - - // Build and start Aspire application - Console.WriteLine("๐Ÿ”ง Building Aspire ApplicationHost..."); - var appHost = await DistributedApplicationTestingBuilder.CreateAsync(); - Console.WriteLine("๐Ÿ”ง Building application..."); - var app = await appHost.BuildAsync().WaitAsync(DefaultTimeout); - Console.WriteLine("๐Ÿ”ง Starting application..."); - await app.StartAsync().WaitAsync(DefaultTimeout); - - AppHost = app; - Console.WriteLine("โœ… Aspire ApplicationHost started"); - - // Smart polling: Wait for containers to be created and port mappings to be established - // Aspire creates containers asynchronously - use smart polling instead of fixed delays - Console.WriteLine("โณ Waiting for Docker/Podman containers to be created and ports to be mapped..."); - Console.WriteLine("๐Ÿ” Using optimized polling (check every 2s, max 20s)..."); - - bool containersDetected = false; - for (int attempt = 1; attempt <= 10; attempt++) // 10 attempts ร— 3s = 30s max - { - await Task.Delay(TimeSpan.FromSeconds(3)); - - var containers = await RunDockerCommandAsync("ps --filter name=kafka --format \"{{.Names}}\""); - if (!string.IsNullOrWhiteSpace(containers)) - { - Console.WriteLine($"โœ… Kafka container detected after {attempt * 3}s"); - containersDetected = true; - - // Show all containers for diagnostics - var allContainers = await RunDockerCommandAsync("ps --format \"table {{.Names}}\\t{{.Status}}\\t{{.Ports}}\""); - Console.WriteLine($"๐Ÿณ All containers:\n{allContainers}"); - break; - } - Console.WriteLine($"โณ Still waiting for containers... ({attempt * 3}s elapsed)"); - } - - if (!containersDetected) - { - Console.WriteLine("โš ๏ธ Containers not detected within 30s, proceeding anyway..."); - var allContainers = await RunDockerCommandAsync("ps --format \"table {{.Names}}\\t{{.Status}}\\t{{.Ports}}\""); - Console.WriteLine($"๐Ÿณ Current containers:\n{allContainers}"); - } - - // Capture network state after containers are detected - await NetworkDiagnostics.CaptureNetworkDiagnosticsAsync("1-after-container-detection"); - - // CRITICAL FIX: Discover Kafka container IP for Flink job configurations - // Docker default bridge doesn't support DNS, so we need to use the actual container IP - Console.WriteLine("๐Ÿ”ง Discovering Kafka container IP for Flink jobs..."); - var kafkaContainerIp = await GetKafkaContainerIpAsync(); - Console.WriteLine($"โœ… Kafka container IP: {kafkaContainerIp}"); - - // Store for use in tests (replaces hostname-based connection) - KafkaContainerIpForFlink = kafkaContainerIp; - - // CRITICAL: Use Aspire's configuration system to get Kafka connection string - // This is the proper Aspire pattern instead of hardcoding or Docker inspection - Console.WriteLine("๐Ÿ” Getting Kafka connection string from Aspire configuration..."); - KafkaConnectionStringFromConfig = app.Services.GetRequiredService() - .GetConnectionString("kafka"); - - // Also discover from Docker for comparison/debugging - var discoveredKafkaEndpoint = await GetKafkaEndpointAsync(); - - // Use config value as primary, fallback to discovered if not available - KafkaConnectionString = !string.IsNullOrEmpty(KafkaConnectionStringFromConfig) - ? KafkaConnectionStringFromConfig - : discoveredKafkaEndpoint; - - Console.WriteLine($"โœ… Kafka connection strings:"); - Console.WriteLine($" ๐Ÿ“ก From Aspire config: {KafkaConnectionStringFromConfig ?? "(not set)"}"); - Console.WriteLine($" ๐Ÿ“ก From Docker discovery: {discoveredKafkaEndpoint}"); - Console.WriteLine($" ๐Ÿ“ก Using for tests: {KafkaConnectionString}"); - Console.WriteLine($" โ„น๏ธ This address will be used by both test producers/consumers AND Flink jobs"); - - // Get Flink endpoint and wait for readiness with retry mechanism - var flinkEndpoint = await GetFlinkJobManagerEndpointAsync(); - Console.WriteLine($"๐Ÿ” Flink JobManager endpoint: {flinkEndpoint}"); - await RetryWaitForReadyAsync("Flink", () => LocalTestingTestBase.WaitForFlinkReadyAsync($"{flinkEndpoint}v1/overview", DefaultTimeout, default), 3, TimeSpan.FromSeconds(5)); - Console.WriteLine("โœ… Flink JobManager and TaskManager are ready"); - - // Wait for Gateway with retry mechanism - Console.WriteLine("โณ Waiting for Gateway resource to start..."); - await RetryHealthCheckAsync("flink-job-gateway", app, 3, TimeSpan.FromSeconds(5)); - Console.WriteLine("โœ… Gateway resource reported healthy"); - - var gatewayEndpoint = await GetGatewayEndpointAsync(); - Console.WriteLine($"๐Ÿ” Gateway endpoint: {gatewayEndpoint}"); - await RetryWaitForReadyAsync("Gateway", () => LocalTestingTestBase.WaitForGatewayReadyAsync($"{gatewayEndpoint}api/v1/health", DefaultTimeout, default), 3, TimeSpan.FromSeconds(5)); - Console.WriteLine("โœ… Gateway is ready"); - - // Wait for Temporal server resource with retry mechanism - Console.WriteLine("โณ Waiting for Temporal server resource to start..."); - await RetryHealthCheckAsync("temporal-server", app, 3, TimeSpan.FromSeconds(5)); - Console.WriteLine("โœ… Temporal server resource reported healthy"); - - // Then wait for Temporal to be fully initialized - Console.WriteLine("โณ Waiting for Temporal server to be fully ready..."); - Console.WriteLine(" โ„น๏ธ Temporal with PostgreSQL requires initialization time..."); - - // Give Temporal time to complete schema setup - await Task.Delay(TimeSpan.FromSeconds(5)); // Optimized: Reduced from 10s to 5s - - // Discover actual Temporal endpoint from Docker (Aspire uses dynamic ports in testing) - TemporalEndpoint = await GetTemporalEndpointAsync(); - Console.WriteLine($"๐Ÿ” Temporal endpoint: {TemporalEndpoint}"); - await RetryWaitForReadyAsync("Temporal", () => LocalTestingTestBase.WaitForTemporalReadyAsync(TemporalEndpoint, DefaultTimeout, default), 3, TimeSpan.FromSeconds(5)); - Console.WriteLine("โœ… Temporal server is fully ready"); - - // Log TaskManager status for debugging - await LogTaskManagerStatusAsync(); - - // Capture final network state after all infrastructure is ready - await NetworkDiagnostics.CaptureNetworkDiagnosticsAsync("2-infrastructure-ready"); - - Console.WriteLine($"๐ŸŒ ========================================"); - Console.WriteLine($"๐ŸŒ GLOBAL INFRASTRUCTURE READY in {sw.Elapsed.TotalSeconds:F1}s"); - Console.WriteLine($"๐ŸŒ ========================================"); - Console.WriteLine($"๐ŸŒ Kafka connection string: {KafkaConnectionString}"); - Console.WriteLine($"๐ŸŒ Infrastructure will remain active for all tests"); - Console.WriteLine($"๐ŸŒ Tests can now run in parallel with shared infrastructure"); - - // Clean up old network diagnostic logs - NetworkDiagnostics.CleanupOldLogs(); - } - catch (Exception ex) - { - Console.WriteLine($"โŒ Global infrastructure setup failed: {ex.Message}"); - Console.WriteLine($"โŒ Stack trace: {ex.StackTrace}"); - - // Capture network diagnostics on failure - await NetworkDiagnostics.CaptureNetworkDiagnosticsAsync("error-setup-failed"); - - // Capture container diagnostics and include in exception - var diagnostics = await GetContainerDiagnosticsAsync(); - - throw new InvalidOperationException( - $"Global infrastructure setup failed: {ex.Message}\n\n" + - $"Container Diagnostics:\n{diagnostics}", - ex); - } - } - - [OneTimeTearDown] - public async Task GlobalTearDown() - { - Console.WriteLine("๐ŸŒ TEARDOWN: Cleaning up test infrastructure..."); - - // CRITICAL: Capture container logs BEFORE stopping/disposing AppHost - // Once AppHost.StopAsync() is called, containers are immediately stopped and may be removed - await CaptureAllContainerLogsAsync(); - - // Capture network state before teardown - await NetworkDiagnostics.CaptureNetworkDiagnosticsAsync("3-before-teardown"); - - if (AppHost != null) - { - try - { - // Aggressive cleanup with minimal timeout - using var cts = new CancellationTokenSource(TimeSpan.FromSeconds(2)); - - try - { - await AppHost.StopAsync(cts.Token); - await AppHost.DisposeAsync(); - Console.WriteLine("โœ… Infrastructure cleaned up"); - } - catch (OperationCanceledException) - { - Console.WriteLine("โœ… Cleanup timed out - runtime will handle remaining resources"); - } - } - catch (Exception ex) - { - Console.WriteLine($"โœ… Cleanup completed with: {ex.Message}"); - } - } - } - - /// - /// Clean up the test-logs directory at the start of test execution. - /// Ensures old logs from previous test runs don't accumulate. - /// - private static void CleanupTestLogsDirectory() - { - try - { - Console.WriteLine("๐Ÿงน Cleaning up test-logs directory..."); - - var repoRoot = FindRepositoryRoot(Environment.CurrentDirectory); - if (repoRoot == null) - { - Console.WriteLine("โš ๏ธ Cannot find repository root, skipping test-logs cleanup"); - return; - } - - var testLogsDir = Path.Combine(repoRoot, "LocalTesting", "test-logs"); - - if (Directory.Exists(testLogsDir)) - { - try - { - // Delete all files and subdirectories - Directory.Delete(testLogsDir, recursive: true); - Console.WriteLine($"โœ… Deleted existing test-logs directory"); - } - catch (IOException ex) - { - Console.WriteLine($"โš ๏ธ Could not delete some files (may be locked): {ex.Message}"); - // Continue anyway - we'll try to clean up what we can - } - catch (UnauthorizedAccessException ex) - { - Console.WriteLine($"โš ๏ธ Access denied when deleting test-logs: {ex.Message}"); - // Continue anyway - } - } - - // Recreate the directory for this test run - Directory.CreateDirectory(testLogsDir); - Console.WriteLine($"โœ… Created fresh test-logs directory: {testLogsDir}"); - } - catch (Exception ex) - { - Console.WriteLine($"โš ๏ธ Error during test-logs cleanup: {ex.Message}"); - // Don't fail the test run if cleanup fails - } - } - - /// - /// Capture logs from Flink containers only before teardown. - /// Only captures logs from JobManager and TaskManager to improve performance. - /// Skips containers with no log output. - /// - private static async Task CaptureAllContainerLogsAsync() - { - try - { - Console.WriteLine("๐Ÿ“‹ Capturing Flink container logs before teardown..."); - Console.WriteLine(" โ„น๏ธ Only capturing JobManager and TaskManager logs for performance"); - - var repoRoot = FindRepositoryRoot(Environment.CurrentDirectory); - if (repoRoot == null) - { - Console.WriteLine("โš ๏ธ Cannot find repository root, skipping log capture"); - return; - } - - var testLogsDir = Path.Combine(repoRoot, "LocalTesting", "test-logs"); - var timestamp = DateTime.UtcNow.ToString("yyyyMMdd"); - - // PERFORMANCE OPTIMIZATION: Only capture logs from Flink JobManager and TaskManager - // Skip Kafka, Temporal, Redis, Gateway, and other containers to reduce teardown time - await CaptureContainerLogAsync("flink-taskmanager", Path.Combine(testLogsDir, $"Flink.TaskManager.container.log.{timestamp}")); - await CaptureContainerLogAsync("flink-jobmanager", Path.Combine(testLogsDir, $"Flink.JobManager.container.log.{timestamp}")); - - Console.WriteLine("โœ… Flink container logs captured"); - } - catch (Exception ex) - { - Console.WriteLine($"โš ๏ธ Error capturing container logs: {ex.Message}"); - } - } - - /// - /// Capture logs from a specific container with optimized log checking. - /// Skips containers that have no log output to improve performance. - /// - private static async Task CaptureContainerLogAsync(string containerNameFilter, string outputPath) - { - try - { - // Find container by name filter (including stopped containers) - // Use --filter to match containers whose name contains the filter string - var containerList = await RunDockerCommandAsync($"ps -a --filter \"name={containerNameFilter}\" --format \"{{{{.Names}}}}\""); - var containers = containerList.Split('\n', StringSplitOptions.RemoveEmptyEntries) - .Select(c => c.Trim()) - .Where(c => !string.IsNullOrEmpty(c)) - .ToList(); - - if (containers.Count == 0) - { - Console.WriteLine($"โญ๏ธ Skipping: No container matching '{containerNameFilter}' found"); - return; - } - - // Take the first matching container - var containerName = containers[0]; - Console.WriteLine($"๐Ÿ” Processing container: {containerName}"); - - // PERFORMANCE OPTIMIZATION: Check if container has logs before attempting to read them - // Use --tail 1 to quickly check if there's any output - var logCheck = await RunDockerCommandAsync($"logs {containerName} --tail 1 2>&1"); - - // Check if logs contain error about container not found - if (logCheck.Contains("no container with name or ID", StringComparison.OrdinalIgnoreCase)) - { - Console.WriteLine($"โญ๏ธ Skipping: Container {containerName} was already removed"); - return; - } - - // If log check is empty, skip full log capture - if (string.IsNullOrWhiteSpace(logCheck)) - { - Console.WriteLine($"โญ๏ธ Skipping: Container {containerName} has no log output"); - return; - } - - // Container has logs, proceed with full capture - var logs = await RunDockerCommandAsync($"logs {containerName} 2>&1"); - - if (!string.IsNullOrWhiteSpace(logs)) - { - await File.WriteAllTextAsync(outputPath, logs); - var lineCount = logs.Split('\n').Length; - Console.WriteLine($"โœ… Captured {lineCount} lines of logs for {containerName} โ†’ {Path.GetFileName(outputPath)}"); - } - else - { - Console.WriteLine($"โญ๏ธ Skipping: No logs available for {containerName}"); - } - } - catch (Exception ex) - { - Console.WriteLine($"โš ๏ธ Error capturing logs for {containerNameFilter}: {ex.Message}"); - } - } - - private static void ConfigureGatewayJarPath() - { - var currentDir = Environment.CurrentDirectory; - var repoRoot = FindRepositoryRoot(currentDir); - - if (repoRoot == null) - { - Console.WriteLine("โš ๏ธ Could not find repository root - Gateway may need to build JAR at runtime"); - return; - } - - // Try Java 17 JAR first (new naming convention) - var releaseJarPath17 = Path.Combine(repoRoot, "FlinkDotNet", "Flink.JobGateway", "bin", "Release", "net9.0", "flink-ir-runner-java17.jar"); - - if (File.Exists(releaseJarPath17)) - { - Environment.SetEnvironmentVariable("FLINK_RUNNER_JAR_PATH", releaseJarPath17); - Console.WriteLine($"โœ… Configured Gateway JAR path: {releaseJarPath17}"); - return; - } - - var debugJarPath17 = Path.Combine(repoRoot, "FlinkDotNet", "Flink.JobGateway", "bin", "Debug", "net9.0", "flink-ir-runner-java17.jar"); - - if (File.Exists(debugJarPath17)) - { - Environment.SetEnvironmentVariable("FLINK_RUNNER_JAR_PATH", debugJarPath17); - Console.WriteLine($"โœ… Configured Gateway JAR path (Debug): {debugJarPath17}"); - return; - } - - Console.WriteLine($"โš ๏ธ Gateway JAR not found - will build on demand"); - } - - private static string? FindRepositoryRoot(string startPath) - { - var dir = new DirectoryInfo(startPath); - while (dir != null) - { - if (File.Exists(Path.Combine(dir.FullName, "global.json"))) - { - return dir.FullName; - } - dir = dir.Parent; - } - return null; - } - - private static async Task ValidateDockerEnvironmentAsync() - { - Console.WriteLine("๐Ÿณ Validating Docker environment..."); - - try - { - var dockerInfo = await RunDockerCommandAsync("info --format \"{{.ServerVersion}}\""); - if (string.IsNullOrWhiteSpace(dockerInfo)) - { - throw new InvalidOperationException("Docker is not running or not accessible"); - } - - Console.WriteLine($"โœ… Docker is available (version: {dockerInfo.Trim()})"); - } - catch (Exception ex) - { - Console.WriteLine($"โŒ Docker validation failed: {ex.Message}"); - throw; - } - } - - - private static async Task RunDockerCommandAsync(string arguments) - { - // Try Docker first, then Podman if Docker fails or returns empty - var dockerOutput = await TryRunContainerCommandAsync("docker", arguments); - if (!string.IsNullOrWhiteSpace(dockerOutput)) - { - return dockerOutput; - } - - // Fallback to Podman if Docker didn't return results - var podmanOutput = await TryRunContainerCommandAsync("podman", arguments); - return podmanOutput ?? string.Empty; - } - - /// - /// Log TaskManager status and recent logs for debugging - /// - private static async Task LogTaskManagerStatusAsync() - { - try - { - Console.WriteLine("\nโ•”โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•"); - Console.WriteLine("โ•‘ ๐Ÿ” [TaskManager] Checking TaskManager Status"); - Console.WriteLine("โ•šโ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•"); - - // Find TaskManager container (using name filter which matches containers containing the name) - var containerName = await RunDockerCommandAsync("ps --filter name=flink-taskmanager --format \"{{.Names}}\" | head -1"); - containerName = containerName.Trim(); - - if (string.IsNullOrEmpty(containerName)) - { - Console.WriteLine("โŒ No TaskManager container found"); - return; - } - - Console.WriteLine($"๐Ÿ“ฆ TaskManager container: {containerName}"); - - // Get container status - var status = await RunDockerCommandAsync($"ps --filter \"name={containerName}\" --format \"{{{{.Status}}}}\""); - Console.WriteLine($"๐Ÿ“Š Container status: {status.Trim()}"); - - // Get last 100 lines of TaskManager logs - var logs = await RunDockerCommandAsync($"logs {containerName} --tail 100"); - - if (!string.IsNullOrWhiteSpace(logs)) - { - Console.WriteLine("\n๐Ÿ“‹ TaskManager Recent Logs (last 100 lines):"); - Console.WriteLine("โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€"); - Console.WriteLine(logs); - Console.WriteLine("โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€"); - } - else - { - Console.WriteLine("โš ๏ธ No TaskManager logs available"); - } - } - catch (Exception ex) - { - Console.WriteLine($"โŒ Error checking TaskManager status: {ex.Message}"); - } - } - - private static async Task TryRunContainerCommandAsync(string command, string arguments) - { - try - { - var psi = new ProcessStartInfo - { - FileName = command, - Arguments = arguments, - RedirectStandardOutput = true, - RedirectStandardError = true, - UseShellExecute = false, - CreateNoWindow = true - }; - - using var process = Process.Start(psi); - if (process == null) - { - Console.WriteLine($"โŒ Failed to start process: {command} {arguments}"); - return null; - } - - var output = await process.StandardOutput.ReadToEndAsync(); - var errorOutput = await process.StandardError.ReadToEndAsync(); - await process.WaitForExitAsync(); - - Console.WriteLine($"๐Ÿ” Command: {command} {arguments}"); - Console.WriteLine($"๐Ÿ” Exit code: {process.ExitCode}"); - Console.WriteLine($"๐Ÿ” Output length: {output?.Length ?? 0}"); - Console.WriteLine($"๐Ÿ” Error output: {(string.IsNullOrWhiteSpace(errorOutput) ? "(none)" : errorOutput)}"); - - if (process.ExitCode == 0 && !string.IsNullOrWhiteSpace(output)) - { - return output; - } - - // Also return output even if exit code is non-zero, as long as we have output - // Some docker commands return non-zero but still provide useful output - if (!string.IsNullOrWhiteSpace(output)) - { - Console.WriteLine($"โš ๏ธ Command returned non-zero exit code ({process.ExitCode}) but has output, returning it anyway"); - return output; - } - - Console.WriteLine($"โš ๏ธ Command failed: exit code {process.ExitCode}, no output"); - return null; - } - catch (Exception ex) - { - Console.WriteLine($"โŒ Exception running command {command} {arguments}: {ex.Message}"); - return null; - } - } - - private static async Task GetFlinkJobManagerEndpointAsync() - { - try - { - var flinkContainers = await RunDockerCommandAsync("ps --filter \"name=flink-jobmanager\" --format \"{{.Ports}}\""); - var lines = flinkContainers.Split('\n', StringSplitOptions.RemoveEmptyEntries); - - foreach (var line in lines) - { - if (line.Contains("->8081/tcp")) - { - var match = System.Text.RegularExpressions.Regex.Match(line, @"127\.0\.0\.1:(\d+)->8081"); - if (match.Success) - { - return $"http://localhost:{match.Groups[1].Value}/"; - } - } - } - - throw new InvalidOperationException($"Could not determine Flink JobManager endpoint from Docker ports: {flinkContainers}"); - } - catch (Exception ex) - { - throw new InvalidOperationException($"Failed to get Flink JobManager endpoint: {ex.Message}", ex); - } - } - - private static async Task GetGatewayEndpointAsync() - { - try - { - var gatewayContainers = await RunDockerCommandAsync("ps --filter \"name=flink-job-gateway\" --format \"{{.Ports}}\""); - - if (!string.IsNullOrWhiteSpace(gatewayContainers)) - { - var lines = gatewayContainers.Split('\n', StringSplitOptions.RemoveEmptyEntries); - foreach (var line in lines) - { - var match = System.Text.RegularExpressions.Regex.Match(line, @"127\.0\.0\.1:(\d+)->(\d+)/tcp"); - if (match.Success) - { - return $"http://localhost:{match.Groups[1].Value}/"; - } - } - } - - return $"http://localhost:{Ports.GatewayHostPort}/"; - } - catch (Exception ex) - { - Console.WriteLine($"โš ๏ธ Gateway endpoint discovery failed: {ex.Message}, using configured port {Ports.GatewayHostPort}"); - return $"http://localhost:{Ports.GatewayHostPort}/"; - } - } - - private static async Task GetTemporalEndpointAsync() - { - try - { - var temporalContainers = await RunDockerCommandAsync("ps --filter \"name=temporal-server\" --format \"{{.Ports}}\""); - var lines = temporalContainers.Split('\n', StringSplitOptions.RemoveEmptyEntries); - - foreach (var line in lines) - { - // Look for port mapping to 7233 (Temporal gRPC port) - if (line.Contains("->7233/tcp")) - { - var match = System.Text.RegularExpressions.Regex.Match(line, @"127\.0\.0\.1:(\d+)->7233"); - if (match.Success) - { - return $"localhost:{match.Groups[1].Value}"; - } - } - } - - throw new InvalidOperationException($"Could not determine Temporal endpoint from Docker ports: {temporalContainers}"); - } - catch (Exception ex) - { - throw new InvalidOperationException($"Failed to get Temporal endpoint: {ex.Message}", ex); - } - } - - /// - /// Get the dynamically allocated Kafka endpoint from Aspire. - /// Aspire DCP assigns random ports during testing, so we must query the actual endpoint. - /// Kafka container exposes port 9092 internally, which gets mapped to a random host port. - /// - private static async Task GetKafkaEndpointAsync() - { - try - { - var kafkaContainers = await RunDockerCommandAsync("ps --filter \"name=kafka\" --format \"{{.Ports}}\""); - Console.WriteLine($"๐Ÿ” Kafka container port mappings: {kafkaContainers.Trim()}"); - - return ExtractKafkaEndpointFromPorts(kafkaContainers); - } - catch (Exception ex) - { - throw new InvalidOperationException($"Failed to discover Kafka endpoint from Docker: {ex.Message}", ex); - } - } - - private static string ExtractKafkaEndpointFromPorts(string kafkaContainers) - { - var lines = kafkaContainers.Split('\n', StringSplitOptions.RemoveEmptyEntries); - foreach (var line in lines) - { - // Look for port mapping to 9092 (Kafka's default listener port) - // Aspire maps container port 9092 to a dynamic host port for external access - // Format: 127.0.0.1:PORT->9092/tcp or 0.0.0.0:PORT->9092/tcp - var match = System.Text.RegularExpressions.Regex.Match(line, @"(?:127\.0\.0\.1|0\.0\.0\.0):(\d+)->9092"); - if (match.Success) - { - var port = match.Groups[1].Value; - Console.WriteLine($"๐Ÿ” Found Kafka port mapping: host {port} -> container 9092"); - return $"localhost:{port}"; - } - } - - throw new InvalidOperationException($"Could not determine Kafka endpoint from Docker/Podman ports: {kafkaContainers}"); - } - - /// - /// Get Kafka container IP address for use in Flink job configurations - /// Works with both Docker (bridge network) and Podman (podman network) - /// - private static async Task GetKafkaContainerIpAsync() - { - try - { - var kafkaContainers = await RunDockerCommandAsync("ps --filter \"name=kafka-\" --format \"{{.Names}}\""); - var kafkaContainer = kafkaContainers.Split('\n', StringSplitOptions.RemoveEmptyEntries).FirstOrDefault(); - - if (string.IsNullOrWhiteSpace(kafkaContainer)) - { - throw new InvalidOperationException("Kafka container not found"); - } - - // Try Docker bridge network first - var ipAddress = await RunDockerCommandAsync($"inspect {kafkaContainer} --format \"{{{{.NetworkSettings.Networks.bridge.IPAddress}}}}\""); - var ip = ipAddress.Trim(); - - // If bridge network doesn't have IP, try podman network (for Podman runtime) - if (string.IsNullOrWhiteSpace(ip) || ip == "") - { - Console.WriteLine($"๐Ÿ” Bridge network IP not found, trying podman network..."); - ipAddress = await RunDockerCommandAsync($"inspect {kafkaContainer} --format \"{{{{.NetworkSettings.Networks.podman.IPAddress}}}}\""); - ip = ipAddress.Trim(); - } - - if (string.IsNullOrWhiteSpace(ip) || ip == "") - { - // Fallback: Get the first available network IP - Console.WriteLine($"๐Ÿ” Specific network not found, getting first available IP..."); - ipAddress = await RunDockerCommandAsync($"inspect {kafkaContainer} --format \"{{{{range .NetworkSettings.Networks}}}}{{{{.IPAddress}}}}{{{{end}}}}\""); - ip = ipAddress.Trim(); - } - - if (string.IsNullOrWhiteSpace(ip) || ip == "") - { - throw new InvalidOperationException($"Could not determine Kafka container IP from any network. Container: {kafkaContainer}"); - } - - Console.WriteLine($"โœ… Kafka container IP discovered: {ip}"); - - // Return IP with PLAINTEXT_INTERNAL port (9093) - return $"{ip}:9093"; - } - catch (Exception ex) - { - throw new InvalidOperationException($"Failed to get Kafka container IP: {ex.Message}", ex); - } - } - - /// - /// Get container diagnostics as a string - detects Docker or Podman and captures container status - /// - private static async Task GetContainerDiagnosticsAsync() - { - try - { - var diagnostics = new System.Text.StringBuilder(); - diagnostics.AppendLine("\nโ•”โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•"); - diagnostics.AppendLine("โ•‘ ๐Ÿ” [Diagnostics] Container Status at Test Failure"); - diagnostics.AppendLine("โ•šโ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•"); - - // Try Docker first - var dockerContainers = await TryRunContainerCommandAsync("docker", "ps -a --format \"table {{.Names}}\\t{{.Status}}\\t{{.Ports}}\""); - if (!string.IsNullOrWhiteSpace(dockerContainers)) - { - diagnostics.AppendLine("\n๐Ÿณ Docker Containers:"); - diagnostics.AppendLine("โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€"); - diagnostics.AppendLine(dockerContainers); - diagnostics.AppendLine("โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€"); - - // Add TaskManager logs for debugging - await AppendTaskManagerLogsAsync(diagnostics); - - // Also write to console for immediate visibility - Console.WriteLine(diagnostics.ToString()); - return diagnostics.ToString(); - } - - // Try Podman if Docker didn't work - var podmanContainers = await TryRunContainerCommandAsync("podman", "ps -a --format \"table {{.Names}}\\t{{.Status}}\\t{{.Ports}}\""); - if (!string.IsNullOrWhiteSpace(podmanContainers)) - { - diagnostics.AppendLine("\n๐Ÿฆญ Podman Containers:"); - diagnostics.AppendLine("โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€"); - diagnostics.AppendLine(podmanContainers); - diagnostics.AppendLine("โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€"); - - // Add TaskManager logs for debugging - await AppendTaskManagerLogsAsync(diagnostics); - - // Also write to console for immediate visibility - Console.WriteLine(diagnostics.ToString()); - return diagnostics.ToString(); - } - - diagnostics.AppendLine("โš ๏ธ No container runtime (Docker/Podman) responded to 'ps -a' command"); - diagnostics.AppendLine(" This suggests the container runtime may not be running or accessible"); - - // Also write to console for immediate visibility - Console.WriteLine(diagnostics.ToString()); - return diagnostics.ToString(); - } - catch (Exception ex) - { - var errorMsg = $"โš ๏ธ Failed to get container diagnostics: {ex.Message}"; - Console.WriteLine(errorMsg); - return errorMsg; - } - } - - /// - /// Append TaskManager logs to diagnostics output - /// - private static async Task AppendTaskManagerLogsAsync(System.Text.StringBuilder diagnostics) - { - try - { - var containerName = await RunDockerCommandAsync("ps --filter \"name=flink-taskmanager\" --format \"{{.Names}}\" | head -1"); - containerName = containerName.Trim(); - - if (string.IsNullOrEmpty(containerName)) - { - diagnostics.AppendLine("\nโš ๏ธ No TaskManager container found for log capture"); - return; - } - - diagnostics.AppendLine($"\n๐Ÿ“‹ TaskManager ({containerName}) Recent Logs (last 20 lines):"); - diagnostics.AppendLine("โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€"); - - var logs = await RunDockerCommandAsync($"logs {containerName} --tail 20 2>&1"); - if (!string.IsNullOrWhiteSpace(logs)) - { - diagnostics.AppendLine(logs); - } - else - { - diagnostics.AppendLine("โš ๏ธ No TaskManager logs available"); - } - diagnostics.AppendLine("โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€"); - } - catch (Exception ex) - { - diagnostics.AppendLine($"\nโš ๏ธ Error capturing TaskManager logs: {ex.Message}"); - } - } - - /// - /// Retry health check for a resource with configurable retries and delay - /// - private static async Task RetryHealthCheckAsync(string resourceName, DistributedApplication app, int maxRetries, TimeSpan delayBetweenRetries) - { - Exception? lastException = null; - - for (int attempt = 1; attempt <= maxRetries; attempt++) - { - try - { - Console.WriteLine($"๐Ÿ”„ Health check attempt {attempt}/{maxRetries} for '{resourceName}'..."); - - // Wait for resource to be healthy (with a reasonable timeout per attempt) - await app.ResourceNotifications - .WaitForResourceHealthyAsync(resourceName) - .WaitAsync(TimeSpan.FromSeconds(30)); - - Console.WriteLine($"โœ… '{resourceName}' became healthy on attempt {attempt}"); - return; // Success! - } - catch (Exception ex) - { - lastException = ex; - Console.WriteLine($"โš ๏ธ Attempt {attempt}/{maxRetries} failed for '{resourceName}': {ex.Message}"); - - if (attempt < maxRetries) - { - Console.WriteLine($"โณ Waiting {delayBetweenRetries.TotalSeconds}s before retry..."); - await Task.Delay(delayBetweenRetries); - } - } - } - - // All retries failed - throw new InvalidOperationException( - $"Resource '{resourceName}' failed to become healthy after {maxRetries} attempts. " + - $"Last error: {lastException?.Message}", - lastException); - } - - /// - /// Retry a readiness check operation (like WaitForKafkaReadyAsync, WaitForFlinkReadyAsync, etc.) - /// - private static async Task RetryWaitForReadyAsync(string serviceName, Func readyCheckFunc, int maxRetries, TimeSpan delayBetweenRetries) - { - Exception? lastException = null; - - for (int attempt = 1; attempt <= maxRetries; attempt++) - { - try - { - Console.WriteLine($"๐Ÿ”„ Readiness check attempt {attempt}/{maxRetries} for '{serviceName}'..."); - await readyCheckFunc(); - Console.WriteLine($"โœ… '{serviceName}' became ready on attempt {attempt}"); - return; // Success! - } - catch (Exception ex) - { - lastException = ex; - Console.WriteLine($"โš ๏ธ Attempt {attempt}/{maxRetries} failed for '{serviceName}': {ex.Message}"); - - if (attempt < maxRetries) - { - Console.WriteLine($"โณ Waiting {delayBetweenRetries.TotalSeconds}s before retry..."); - await Task.Delay(delayBetweenRetries); - } - } - } - - // All retries failed - throw new InvalidOperationException( - $"Service '{serviceName}' failed to become ready after {maxRetries} attempts. " + - $"Last error: {lastException?.Message}", - lastException); - } -} \ No newline at end of file diff --git a/ReleasePackagesTesting.Published/ReleasePackagesTesting.Published.IntegrationTests/LocalTestingTestBase.cs b/ReleasePackagesTesting.Published/ReleasePackagesTesting.Published.IntegrationTests/LocalTestingTestBase.cs deleted file mode 100644 index 5210e436..00000000 --- a/ReleasePackagesTesting.Published/ReleasePackagesTesting.Published.IntegrationTests/LocalTestingTestBase.cs +++ /dev/null @@ -1,1499 +0,0 @@ -using System.Diagnostics; -using Aspire.Hosting.Testing; -using Aspire.Hosting; -using Aspire.Hosting.ApplicationModel; -using Confluent.Kafka; -using Confluent.Kafka.Admin; -using LocalTesting.FlinkSqlAppHost; -using Microsoft.Extensions.DependencyInjection; -using NUnit.Framework; - -namespace LocalTesting.IntegrationTests; - -/// -/// Enhanced test base class for LocalTesting integration tests. -/// Based on successful patterns from BackPressureExample.IntegrationTests.KafkaTestBase -/// with improvements for Flink infrastructure readiness validation and Docker connectivity. -/// -public abstract class LocalTestingTestBase -{ - /// - /// Access to shared AppHost instance from GlobalTestInfrastructure. - /// Infrastructure is initialized once for all tests, dramatically reducing startup overhead. - /// - protected static DistributedApplication? AppHost => GlobalTestInfrastructure.AppHost; - - /// - /// Access to shared Kafka connection string from GlobalTestInfrastructure. - /// CRITICAL: This address is used by BOTH test producers/consumers AND Flink jobs. - /// The simplified architecture uses a single Kafka address (localhost:port) accessible - /// from both host and containers via Docker port mapping. - /// - protected static string? KafkaConnectionString => GlobalTestInfrastructure.KafkaConnectionString; - - /// - /// Access to discovered Temporal endpoint from GlobalTestInfrastructure. - /// Aspire allocates dynamic ports during testing, so we must use the discovered endpoint. - /// - protected static string? TemporalEndpoint => GlobalTestInfrastructure.TemporalEndpoint; - - /// - /// No infrastructure setup needed - using shared global infrastructure. - /// Tests can start immediately without waiting for infrastructure startup. - /// - [OneTimeSetUp] - public virtual Task OneTimeSetUp() - { - // Verify shared infrastructure is available - if (AppHost == null || string.IsNullOrEmpty(KafkaConnectionString)) - { - throw new InvalidOperationException( - "Global test infrastructure is not initialized. " + - "Ensure GlobalTestInfrastructure.GlobalSetUp completed successfully."); - } - - TestContext.WriteLine($"โœ… Test class using shared infrastructure (Kafka: {KafkaConnectionString})"); - return Task.CompletedTask; - } - - /// - /// No teardown needed - shared infrastructure persists across all tests. - /// - [OneTimeTearDown] - public virtual Task OneTimeTearDown() - { - TestContext.WriteLine("โœ… Test class completed (shared infrastructure remains active)"); - return Task.CompletedTask; - } - - /// - /// Get detailed information about Kafka containers including network configuration. - /// - private static async Task GetKafkaContainerDetailsAsync() - { - try - { - // Get container details with network information - var containerDetails = await RunDockerCommandAsync( - "ps --filter \"name=kafka\" --format \"{{.Names}} {{.Ports}} {{.Networks}}\" --no-trunc" - ); - - if (!string.IsNullOrWhiteSpace(containerDetails)) - { - return containerDetails.Trim(); - } - - // Try alternative container discovery - var allContainers = await RunDockerCommandAsync( - "ps --format \"{{.Names}} {{.Ports}} {{.Networks}}\" --no-trunc" - ); - - TestContext.WriteLine($"๐Ÿ” All container details: {allContainers}"); - return "No Kafka containers found"; - } - catch (Exception ex) - { - return $"Could not get container details: {ex.Message}"; - } - } - - /// - /// Test if a specific port is accessible. - /// - private static async Task TestPortConnectivityAsync(string host, int port) - { - try - { - using var client = new System.Net.Sockets.TcpClient(); - await client.ConnectAsync(host, port); - return client.Connected; - } - catch - { - return false; - } - } - - /// - /// Run a Docker command and return the output. - /// - private static async Task RunDockerCommandAsync(string arguments) - { - // Try Docker first, then Podman if Docker fails or returns empty - var dockerOutput = await TryRunContainerCommandAsync("docker", arguments); - if (!string.IsNullOrWhiteSpace(dockerOutput)) - { - return dockerOutput; - } - - // Fallback to Podman if Docker didn't return results - var podmanOutput = await TryRunContainerCommandAsync("podman", arguments); - return podmanOutput ?? string.Empty; - } - - private static async Task TryRunContainerCommandAsync(string command, string arguments) - { - try - { - var psi = new ProcessStartInfo - { - FileName = command, - Arguments = arguments, - RedirectStandardOutput = true, - RedirectStandardError = true, - UseShellExecute = false, - CreateNoWindow = true - }; - - using var process = Process.Start(psi); - if (process == null) - { - return null; - } - - var output = await process.StandardOutput.ReadToEndAsync(); - await process.WaitForExitAsync(); - - if (process.ExitCode == 0 && !string.IsNullOrWhiteSpace(output)) - { - return output; - } - - return null; - } - catch - { - return null; - } - } - - /// - /// Enhanced Kafka readiness check copied from BackPressureExample.IntegrationTests.KafkaTestBase - /// with improved error handling, fallback strategies, and dynamic container discovery. - /// - public static async Task WaitForKafkaReadyAsync(string bootstrapServers, TimeSpan timeout, CancellationToken ct) - { - var sw = Stopwatch.StartNew(); - var attempt = 0; - TestContext.WriteLine($"โ•”โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•"); - TestContext.WriteLine($"โ•‘ ๐Ÿ”Ž [KafkaReady] Connecting to Kafka"); - TestContext.WriteLine($"โ•‘ ๐Ÿ“ก Bootstrap servers: {bootstrapServers}"); - TestContext.WriteLine($"โ•‘ โฑ๏ธ Timeout: {timeout.TotalSeconds}s"); - TestContext.WriteLine($"โ•šโ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•"); - - var bootstrapVariations = await GetBootstrapServerVariationsAsync(bootstrapServers); - TestContext.WriteLine($"๐Ÿ”— [KafkaReady] Will try connection variations: {string.Join(", ", bootstrapVariations)}"); - - Exception? lastException = null; - - while (sw.Elapsed < timeout && !ct.IsCancellationRequested) - { - attempt++; - - var (connected, exception) = await TryConnectToKafkaAsync(bootstrapVariations, attempt, sw.Elapsed); - if (connected) - return; - - lastException = exception; - await LogKafkaAttemptDiagnosticsAsync(attempt, bootstrapVariations, lastException); - await Task.Delay(100, ct); // Optimized: Reduced to 100ms (was 250ms) - } - - throw await CreateKafkaTimeoutExceptionAsync(timeout, bootstrapVariations, lastException); - } - - private static Task<(bool connected, Exception? exception)> TryConnectToKafkaAsync(string[] bootstrapVariations, int attempt, TimeSpan elapsed) - { - Exception? lastException = null; - - foreach (var bootstrap in bootstrapVariations) - { - try - { - using var admin = CreateKafkaAdminClient(bootstrap); - var md = admin.GetMetadata(TimeSpan.FromSeconds(2)); - - if (md?.Brokers?.Count > 0) - { - TestContext.WriteLine($"โœ… [KafkaReady] Metadata OK (brokers={md.Brokers.Count}) using {bootstrap} after {attempt} attempt(s), {elapsed.TotalSeconds:F1}s"); - return Task.FromResult((true, (Exception?)null)); - } - } - catch (Exception ex) - { - lastException = ex; - } - } - return Task.FromResult((false, lastException)); - } - - private static IAdminClient CreateKafkaAdminClient(string bootstrap) - { - return new AdminClientBuilder(new AdminClientConfig - { - BootstrapServers = bootstrap, - SocketTimeoutMs = 3000, - BrokerAddressFamily = BrokerAddressFamily.V4, - SecurityProtocol = SecurityProtocol.Plaintext, - ApiVersionRequest = true, - LogConnectionClose = false, - AllowAutoCreateTopics = true - }) - .SetLogHandler((_, _) => { /* Suppress logs during readiness */ }) - .SetErrorHandler((_, _) => { /* Suppress errors during readiness */ }) - .Build(); - } - - private static async Task LogKafkaAttemptDiagnosticsAsync(int attempt, string[] bootstrapVariations, Exception? lastException) - { - if (attempt % 10 == 0) - { - TestContext.WriteLine($"โณ [KafkaReady] Attempt {attempt} - detailed diagnostics:"); - await LogDetailedDiagnosticsAsync(bootstrapVariations, lastException); - } - else if (attempt % 5 == 0) - { - TestContext.WriteLine($"โณ [KafkaReady] Attempt {attempt} - trying multiple connection methods..."); - if (lastException != null) - { - TestContext.WriteLine($" Last error: {lastException.GetType().Name} - {lastException.Message}"); - } - } - } - - private static async Task CreateKafkaTimeoutExceptionAsync(TimeSpan timeout, string[] bootstrapVariations, Exception? lastException) - { - var containerStatus = await GetKafkaContainerDetailsAsync(); - return new TimeoutException($"Kafka did not become ready within {timeout.TotalSeconds:F0}s. " + - $"Bootstrap servers tried: {string.Join(", ", bootstrapVariations)}. " + - $"Last error: {lastException?.Message}. " + - $"Container diagnostics: {containerStatus}"); - } - - /// - /// Get bootstrap server variations for dynamic port configuration. - /// CRITICAL: Aspire allocates dynamic ports, so we use the discovered bootstrap server. - /// We only add localhost/127.0.0.1 variations of the discovered endpoint. - /// - private static Task GetBootstrapServerVariationsAsync(string originalBootstrap) - { - var variations = new List - { - originalBootstrap, - originalBootstrap.Replace("localhost", "127.0.0.1") - }; - - // Remove duplicates - return Task.FromResult(variations.Distinct().ToArray()); - } - - /// - /// Log detailed diagnostics for Kafka connectivity troubleshooting. - /// - private static async Task LogDetailedDiagnosticsAsync(string[] bootstrapVariations, Exception? lastException) - { - try - { - TestContext.WriteLine("๐Ÿ” Detailed connectivity diagnostics:"); - - // Test each endpoint manually - foreach (var endpoint in bootstrapVariations.Take(3)) // Test first 3 to avoid spam - { - var parts = endpoint.Split(':'); - if (parts.Length == 2 && int.TryParse(parts[1], out var port)) - { - var reachable = await TestPortConnectivityAsync(parts[0], port); - TestContext.WriteLine($" {endpoint}: {(reachable ? "โœ… Reachable" : "โŒ Not reachable")}"); - } - } - - // Container status - var containers = await RunDockerCommandAsync("ps --filter \"name=kafka\" --format \"{{.Names}}: {{.Status}} - {{.Ports}}\""); - TestContext.WriteLine($" Container Status: {containers.Trim()}"); - - // Network information - var networks = await RunDockerCommandAsync("network ls --format \"{{.Name}}: {{.Driver}}\""); - TestContext.WriteLine($" Networks: {networks.Replace('\n', ' ').Trim()}"); - - if (lastException != null) - { - TestContext.WriteLine($" Last Exception: {lastException.GetType().Name}: {lastException.Message}"); - } - } - catch (Exception ex) - { - TestContext.WriteLine($"โš ๏ธ Could not gather detailed diagnostics: {ex.Message}"); - } - } - - /// - /// Enhanced Flink readiness check with proper API validation and TaskManager status checking. - /// Improved from original LocalTesting tests with better error handling. - /// - /// Flink overview API endpoint - /// Maximum time to wait - /// Cancellation token - /// If true, requires at least one free task slot. Use true for initial setup, false for per-test checks. - public static async Task WaitForFlinkReadyAsync(string overviewUrl, TimeSpan timeout, CancellationToken ct, bool requireFreeSlots = true) - { - using var http = new HttpClient { Timeout = TimeSpan.FromSeconds(10) }; - var sw = Stopwatch.StartNew(); - var attempt = 0; - - TestContext.WriteLine($"โ•”โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•"); - TestContext.WriteLine($"โ•‘ ๐Ÿ”Ž [FlinkReady] Connecting to Flink JobManager"); - TestContext.WriteLine($"โ•‘ ๐Ÿ“ก Overview URL: {overviewUrl}"); - TestContext.WriteLine($"โ•‘ โฑ๏ธ Timeout: {timeout.TotalSeconds}s"); - TestContext.WriteLine($"โ•‘ ๐ŸŽฏ Require free slots: {requireFreeSlots}"); - TestContext.WriteLine($"โ•šโ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•"); - - await InitializeFlinkReadinessCheckAsync(overviewUrl, timeout); - - while (sw.Elapsed < timeout && !ct.IsCancellationRequested) - { - attempt++; - if (await CheckFlinkJobManagerAsync(http, overviewUrl, attempt, ct, requireFreeSlots)) - { - var slotsMessage = requireFreeSlots ? " with available slots" : ""; - TestContext.WriteLine($"โœ… [FlinkReady] JobManager with TaskManagers ready{slotsMessage} at {overviewUrl} after {attempt} attempt(s), {sw.Elapsed.TotalSeconds:F1}s"); - return; - } - - await Task.Delay(200, ct); // Optimized: Reduced to 200ms (was 500ms) - } - - await LogFlinkContainerDiagnosticsAsync(); - throw new TimeoutException($"Flink JobManager not ready within {timeout.TotalSeconds:F0}s at {overviewUrl}"); - } - - private static async Task InitializeFlinkReadinessCheckAsync(string overviewUrl, TimeSpan timeout) - { - TestContext.WriteLine($"๐Ÿ”Ž [FlinkReady] Probing Flink JobManager at {overviewUrl} (timeout: {timeout.TotalSeconds:F0}s)"); - TestContext.WriteLine($"โณ [FlinkReady] Checking Flink container status immediately..."); - - await Task.Delay(500); // Optimized: Reduced to 500ms (was 2000ms) - - var portAccessible = await TestPortConnectivityAsync("localhost", Ports.JobManagerHostPort); - TestContext.WriteLine($"๐Ÿ” [FlinkReady] Port {Ports.JobManagerHostPort} accessible: {portAccessible}"); - } - - /// - /// Check if Flink JobManager is ready with TaskManagers and available task slots. - /// Enhanced to verify task slots are available before allowing job submission. - /// - /// HTTP client to use for requests - /// Flink overview API URL - /// Attempt number for logging - /// Cancellation token - /// If true, requires at least one free task slot - private static async Task CheckFlinkJobManagerAsync(HttpClient http, string overviewUrl, int attempt, CancellationToken ct, bool requireFreeSlots) - { - try - { - // First check overview endpoint to verify TaskManagers are registered - var resp = await http.GetAsync(overviewUrl, ct); - if (resp.IsSuccessStatusCode) - { - var content = await resp.Content.ReadAsStringAsync(ct); - if (!ValidateFlinkResponse(content, attempt)) - { - return false; - } - - // TaskManagers are registered - check slots only if required - if (requireFreeSlots) - { - var baseUrl = overviewUrl.Replace("/v1/overview", ""); - return await CheckTaskManagerSlotsAsync(http, baseUrl, attempt, ct); - } - - // Slots not required, just TaskManager registration is enough - return true; - } - - TestContext.WriteLine($"โณ [FlinkReady] Attempt {attempt}: HTTP {resp.StatusCode}"); - return false; - } - catch (HttpRequestException ex) - { - await HandleFlinkHttpExceptionAsync(ex, attempt); - return false; - } - catch (Exception ex) - { - TestContext.WriteLine($"โณ [FlinkReady] Attempt {attempt} failed: {ex.GetType().Name} - {ex.Message}"); - return false; - } - } - - /// - /// Check if TaskManagers have available task slots for job submission. - /// Queries /v1/taskmanagers endpoint to verify at least one free slot exists. - /// - private static async Task CheckTaskManagerSlotsAsync(HttpClient http, string baseUrl, int attempt, CancellationToken ct) - { - try - { - var taskManagersUrl = $"{baseUrl}/v1/taskmanagers"; - var resp = await http.GetAsync(taskManagersUrl, ct); - - if (!resp.IsSuccessStatusCode) - { - TestContext.WriteLine($"โณ [FlinkReady] Attempt {attempt}: TaskManagers endpoint returned {resp.StatusCode}"); - return false; - } - - var content = await resp.Content.ReadAsStringAsync(ct); - - // Parse JSON to check for available slots - // Expected format: {"taskmanagers":[{"id":"...","slotsNumber":2,"freeSlots":2,...}]} - if (string.IsNullOrWhiteSpace(content) || !content.Contains("taskmanagers")) - { - TestContext.WriteLine($"โณ [FlinkReady] Attempt {attempt}: TaskManagers response missing 'taskmanagers' field"); - return false; - } - - // Simple JSON parsing to check for freeSlots > 0 - // Look for "freeSlots": pattern followed by a number greater than 0 - var freeSlotsMatch = System.Text.RegularExpressions.Regex.Match(content, @"""freeSlots""\s*:\s*(\d+)"); - if (freeSlotsMatch.Success) - { - var freeSlots = int.Parse(freeSlotsMatch.Groups[1].Value); - if (freeSlots > 0) - { - TestContext.WriteLine($"โœ… [FlinkReady] Attempt {attempt}: TaskManagers ready with {freeSlots} free slot(s)"); - return true; - } - else - { - TestContext.WriteLine($"โณ [FlinkReady] Attempt {attempt}: TaskManagers registered but no free slots available yet"); - return false; - } - } - - TestContext.WriteLine($"โณ [FlinkReady] Attempt {attempt}: Could not parse freeSlots from TaskManagers response"); - return false; - } - catch (Exception ex) - { - TestContext.WriteLine($"โณ [FlinkReady] Attempt {attempt}: TaskManager slot check failed - {ex.GetType().Name}: {ex.Message}"); - return false; - } - } - - /// - /// Validate Flink JobManager response content. - /// - private static bool ValidateFlinkResponse(string content, int attempt) - { - if (!string.IsNullOrEmpty(content) && content.Contains("taskmanagers")) - { - return true; - } - - if (!string.IsNullOrEmpty(content)) - { - TestContext.WriteLine($"โณ [FlinkReady] Attempt {attempt}: JobManager responding but TaskManagers not ready yet"); - } - - return false; - } - - /// - /// Handle HTTP exceptions during Flink readiness checks. - /// - private static async Task HandleFlinkHttpExceptionAsync(HttpRequestException ex, int attempt) - { - if (ex.InnerException is System.Net.Sockets.SocketException socketEx) - { - TestContext.WriteLine($"โณ [FlinkReady] Attempt {attempt}: Connection refused (SocketError: {socketEx.SocketErrorCode}) - Flink process still starting"); - } - else - { - TestContext.WriteLine($"โณ [FlinkReady] Attempt {attempt}: {ex.GetType().Name} - {ex.Message}"); - } - - // Log detailed diagnostics every 10 attempts - if (attempt % 10 == 0) - { - await LogFlinkContainerDiagnosticsAsync(); - } - } - - /// - /// Log detailed Flink container diagnostics for troubleshooting. - /// - private static async Task LogFlinkContainerDiagnosticsAsync() - { - try - { - TestContext.WriteLine("๐Ÿ” [FlinkReady] Container diagnostics:"); - - // Check Flink containers - var flinkContainers = await RunDockerCommandAsync("ps --filter \"name=flink\" --format \"{{.Names}}: {{.Status}} - {{.Ports}}\""); - TestContext.WriteLine($" Flink Containers: {flinkContainers.Trim()}"); - - // Check if port is listening - var portTest = await TestPortConnectivityAsync("localhost", Ports.JobManagerHostPort); - TestContext.WriteLine($" Port {Ports.JobManagerHostPort} accessible: {portTest}"); - - // Try to get container logs - var jobManagerLogs = await RunDockerCommandAsync("logs --tail 20 flink-jobmanager 2>&1 || echo 'Could not get logs'"); - TestContext.WriteLine($" JobManager logs (last 20 lines): {jobManagerLogs.Trim()}"); - } - catch (Exception ex) - { - TestContext.WriteLine($"โš ๏ธ Could not gather Flink diagnostics: {ex.Message}"); - } - } - - /// - /// Enhanced Gateway readiness check with proper retry logic. - /// Gateway is a .NET project that starts after Flink, so it may need additional time. - /// Based on patterns from BackPressureExample with LocalTesting-specific endpoints. - /// - public static async Task WaitForGatewayReadyAsync(string healthUrl, TimeSpan timeout, CancellationToken ct) - { - using var http = new HttpClient { Timeout = TimeSpan.FromSeconds(5) }; - var sw = Stopwatch.StartNew(); - var attempt = 0; - - LogGatewayReadinessStart(healthUrl, timeout); - - while (sw.Elapsed < timeout && !ct.IsCancellationRequested) - { - attempt++; - if (await CheckGatewayHealthAsync(http, healthUrl, attempt, sw.Elapsed, ct)) - return; - - await Task.Delay(1000, ct); - } - - ThrowGatewayTimeoutException(healthUrl, timeout, attempt, sw.Elapsed); - } - - private static void LogGatewayReadinessStart(string healthUrl, TimeSpan timeout) - { - TestContext.WriteLine($"โ•”โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•"); - TestContext.WriteLine($"โ•‘ ๐Ÿ”Ž [GatewayReady] Connecting to Flink Job Gateway"); - TestContext.WriteLine($"โ•‘ ๐Ÿ“ก Health URL: {healthUrl}"); - TestContext.WriteLine($"โ•‘ โฑ๏ธ Timeout: {timeout.TotalSeconds}s"); - TestContext.WriteLine($"โ•‘ ๐Ÿ’ก Gateway is a .NET project (starts after Flink)"); - TestContext.WriteLine($"โ•šโ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•"); - } - - private static async Task CheckGatewayHealthAsync( - HttpClient http, - string healthUrl, - int attempt, - TimeSpan elapsed, - CancellationToken ct) - { - try - { - var resp = await http.GetAsync(healthUrl, ct); - return HandleGatewayResponse(resp, healthUrl, attempt, elapsed); - } - catch (HttpRequestException ex) - { - LogGatewayException(ex, attempt, elapsed, isHttpException: true); - return false; - } - catch (Exception ex) - { - LogGatewayException(ex, attempt, elapsed, isHttpException: false); - return false; - } - } - - private static bool HandleGatewayResponse(HttpResponseMessage resp, string healthUrl, int attempt, TimeSpan elapsed) - { - if ((int)resp.StatusCode >= 200 && (int)resp.StatusCode < 500) - { - TestContext.WriteLine($"โœ… [GatewayReady] Gateway ready at {healthUrl} after {attempt} attempt(s), {elapsed.TotalSeconds:F1}s"); - return true; - } - - if (attempt % 10 == 0) - { - TestContext.WriteLine($"โณ [GatewayReady] Attempt {attempt}: HTTP {resp.StatusCode} (elapsed: {elapsed.TotalSeconds:F1}s)"); - } - - return false; - } - - private static void LogGatewayException(Exception ex, int attempt, TimeSpan elapsed, bool isHttpException) - { - if (attempt % 10 != 0) - return; - - if (isHttpException) - { - TestContext.WriteLine($"โณ [GatewayReady] Attempt {attempt}: {ex.GetType().Name} (elapsed: {elapsed.TotalSeconds:F1}s)"); - } - else - { - TestContext.WriteLine($"โณ [GatewayReady] Attempt {attempt}: {ex.GetType().Name} - {ex.Message}"); - } - } - - private static void ThrowGatewayTimeoutException(string healthUrl, TimeSpan timeout, int attempt, TimeSpan elapsed) - { - TestContext.WriteLine($"โŒ [GatewayReady] Gateway failed to start after {attempt} attempts over {elapsed.TotalSeconds:F1}s"); - throw new TimeoutException($"Gateway not ready within {timeout.TotalSeconds:F0}s at {healthUrl}. Gateway may not have started properly - check Aspire logs."); - } - - /// - /// Enhanced SQL Gateway readiness check with proper retry logic. - /// SQL Gateway is a Flink component that provides REST API for direct SQL execution. - /// It starts after JobManager and must be validated before submitting SQL jobs. - /// - public static async Task WaitForSqlGatewayReadyAsync(string baseUrl, TimeSpan timeout, CancellationToken ct) - { - using var http = new HttpClient { Timeout = TimeSpan.FromSeconds(5) }; - var sw = Stopwatch.StartNew(); - var attempt = 0; - var healthUrl = $"{baseUrl}/v1/info"; - - LogSqlGatewayReadinessStart(healthUrl, timeout); - - while (sw.Elapsed < timeout && !ct.IsCancellationRequested) - { - attempt++; - if (await CheckSqlGatewayHealthAsync(http, healthUrl, attempt, sw.Elapsed, ct)) - return; - - await Task.Delay(1000, ct); - } - - ThrowSqlGatewayTimeoutException(healthUrl, timeout, attempt, sw.Elapsed); - } - - private static void LogSqlGatewayReadinessStart(string healthUrl, TimeSpan timeout) - { - TestContext.WriteLine($"๐Ÿ”Ž [SqlGatewayReady] Probing SQL Gateway at {healthUrl} (timeout: {timeout.TotalSeconds:F0}s)"); - TestContext.WriteLine($"๐Ÿ’ก [SqlGatewayReady] SQL Gateway is a Flink component that starts after JobManager"); - } - - private static async Task CheckSqlGatewayHealthAsync(HttpClient http, string healthUrl, int attempt, TimeSpan elapsed, CancellationToken ct) - { - try - { - var resp = await http.GetAsync(healthUrl, ct); - if (resp.IsSuccessStatusCode) - { - TestContext.WriteLine($"โœ… [SqlGatewayReady] SQL Gateway ready at {healthUrl} after {attempt} attempt(s), {elapsed.TotalSeconds:F1}s"); - return true; - } - - LogSqlGatewayAttempt(attempt, elapsed, resp.StatusCode); - return false; - } - catch (HttpRequestException ex) - { - LogSqlGatewayHttpException(attempt, elapsed, ex); - return false; - } - catch (Exception ex) - { - LogSqlGatewayException(attempt, ex); - return false; - } - } - - private static void LogSqlGatewayAttempt(int attempt, TimeSpan elapsed, System.Net.HttpStatusCode statusCode) - { - if (attempt % 10 == 0) - { - TestContext.WriteLine($"โณ [SqlGatewayReady] Attempt {attempt}: HTTP {statusCode} (elapsed: {elapsed.TotalSeconds:F1}s)"); - } - } - - private static void LogSqlGatewayHttpException(int attempt, TimeSpan elapsed, HttpRequestException ex) - { - if (attempt % 10 == 0) - { - TestContext.WriteLine($"โณ [SqlGatewayReady] Attempt {attempt}: {ex.GetType().Name} (elapsed: {elapsed.TotalSeconds:F1}s)"); - } - } - - private static void LogSqlGatewayException(int attempt, Exception ex) - { - if (attempt % 10 == 0) - { - TestContext.WriteLine($"โณ [SqlGatewayReady] Attempt {attempt}: {ex.GetType().Name} - {ex.Message}"); - } - } - - private static void ThrowSqlGatewayTimeoutException(string healthUrl, TimeSpan timeout, int attempt, TimeSpan elapsed) - { - TestContext.WriteLine($"โŒ [SqlGatewayReady] SQL Gateway failed to start after {attempt} attempts over {elapsed.TotalSeconds:F1}s"); - throw new TimeoutException($"SQL Gateway not ready within {timeout.TotalSeconds:F0}s at {healthUrl}. SQL Gateway may not have started properly - check Flink logs."); - } - /// - /// Enhanced Temporal readiness check with proper retry logic. - /// Temporal is a workflow orchestration system that starts after basic infrastructure. - /// SQLite initialization can take significant time on first startup. - /// - public static async Task WaitForTemporalReadyAsync(string address, TimeSpan timeout, CancellationToken ct) - { - var sw = Stopwatch.StartNew(); - var attempt = 0; - Exception? lastException = null; - - LogTemporalReadinessStart(address, timeout); - - while (sw.Elapsed < timeout && !ct.IsCancellationRequested) - { - attempt++; - - var (success, exception) = await TryConnectToTemporalAsync(address, attempt, sw.Elapsed); - if (success) - { - return; - } - - lastException = exception; - await LogTemporalConnectionAttemptAsync(attempt, sw.Elapsed, lastException); - await Task.Delay(1000, ct); - } - - throw CreateTemporalTimeoutException(address, timeout, attempt, sw.Elapsed, lastException); - } - - private static void LogTemporalReadinessStart(string address, TimeSpan timeout) - { - TestContext.WriteLine($"โ•”โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•"); - TestContext.WriteLine($"โ•‘ ๐Ÿ”Ž [TemporalReady] Connecting to Temporal Server"); - TestContext.WriteLine($"โ•‘ ๐Ÿ“ก Address: {address}"); - TestContext.WriteLine($"โ•‘ โฑ๏ธ Timeout: {timeout.TotalSeconds}s"); - TestContext.WriteLine($"โ•‘ โ„น๏ธ PostgreSQL initialization may take 30-60s on first start"); - TestContext.WriteLine($"โ•šโ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•"); - } - - private static async Task<(bool success, Exception? exception)> TryConnectToTemporalAsync(string address, int attempt, TimeSpan elapsed) - { - try - { - var client = await Temporalio.Client.TemporalClient.ConnectAsync(new Temporalio.Client.TemporalClientConnectOptions - { - TargetHost = address, - Namespace = "default", - }); - - if (client.Connection != null) - { - TestContext.WriteLine($"โœ… [TemporalReady] Temporal ready at {address} after {attempt} attempt(s), {elapsed.TotalSeconds:F1}s"); - return (true, null); - } - - return (false, null); - } - catch (Exception ex) - { - return (false, ex); - } - } - - private static Task LogTemporalConnectionAttemptAsync(int attempt, TimeSpan elapsed, Exception? lastException) - { - if (attempt % 10 == 0 || (attempt % 30 == 0 && elapsed.TotalSeconds >= 30)) - { - if (lastException != null) - { - TestContext.WriteLine($"โณ [TemporalReady] Attempt {attempt} ({elapsed.TotalSeconds:F0}s elapsed): {lastException.GetType().Name}"); - } - - if (elapsed.TotalSeconds >= 30 && attempt % 30 == 0) - { - TestContext.WriteLine($" ๐Ÿ’ก Temporal PostgreSQL initialization can be slow - this is normal for first startup"); - } - } - - return Task.CompletedTask; - } - - private static TimeoutException CreateTemporalTimeoutException(string address, TimeSpan timeout, int attempt, TimeSpan elapsed, Exception? lastException) - { - var errorMessage = $"Temporal not ready within {timeout.TotalSeconds:F0}s at {address}. " + - $"Attempted {attempt} times over {elapsed.TotalSeconds:F1}s."; - - if (lastException != null) - { - errorMessage += $" Last error: {lastException.GetType().Name} - {lastException.Message}"; - } - - return new TimeoutException(errorMessage); - } - - - /// - /// Create Kafka topic with proper error handling for existing topics. - /// Copied from BackPressureExample patterns. - /// - protected async Task CreateTopicAsync(string topicName, int partitions = 1, short replicationFactor = 1) - { - if (string.IsNullOrEmpty(KafkaConnectionString)) - throw new InvalidOperationException("Kafka connection string is not available"); - - using var admin = new AdminClientBuilder(new AdminClientConfig - { - BootstrapServers = KafkaConnectionString, - BrokerAddressFamily = BrokerAddressFamily.V4, - SecurityProtocol = SecurityProtocol.Plaintext - }) - .SetLogHandler((_, _) => { /* Suppress logs */ }) - .SetErrorHandler((_, _) => { /* Suppress errors */ }) - .Build(); - - try - { - var topicSpec = new TopicSpecification - { - Name = topicName, - NumPartitions = partitions, - ReplicationFactor = replicationFactor, - Configs = new Dictionary - { - ["min.insync.replicas"] = "1", - ["unclean.leader.election.enable"] = "true" - } - }; - - await admin.CreateTopicsAsync(new[] { topicSpec }); - TestContext.WriteLine($"โœ… Topic '{topicName}' created successfully"); - - // Optimized delay for faster test execution - await Task.Delay(100); - } - catch (CreateTopicsException ex) - { - if (ex.Results?.Any(r => r.Error.Code == ErrorCode.TopicAlreadyExists) == true) - { - TestContext.WriteLine($"โ„น๏ธ Topic '{topicName}' already exists"); - } - else - { - TestContext.WriteLine($"โŒ Error creating topic '{topicName}': {ex.Message}"); - throw; - } - } - } - - /// - /// Wait for complete infrastructure readiness including optional Gateway. - /// Performs quick health check only (trusts global setup). - /// - /// Whether to validate Gateway availability - /// Cancellation token - protected static async Task WaitForFullInfrastructureAsync( - bool includeGateway = true, - CancellationToken cancellationToken = default) - { - // Quick validation that endpoints are still responding - // This is used by individual tests after global setup has already validated everything - TestContext.WriteLine("๐Ÿ”ง Quick infrastructure health check..."); - - // Just verify Kafka is still accessible (very quick check) - if (string.IsNullOrEmpty(KafkaConnectionString)) - { - throw new InvalidOperationException("Kafka connection string not available"); - } - - // Display container status with ports for visibility (no polling - containers should already be running) - await DisplayContainerStatusAsync(); - - TestContext.WriteLine("โœ… Infrastructure health check passed"); - } - - /// - /// Capture network diagnostics for a specific test checkpoint. - /// Helper method for tests to capture network state at critical points. - /// - /// Name of the test - /// Checkpoint name (e.g., "before-test", "after-failure") - protected static async Task CaptureTestNetworkDiagnosticsAsync(string testName, string checkpoint) - { - var checkpointName = $"test-{testName}-{checkpoint}"; - await NetworkDiagnostics.CaptureNetworkDiagnosticsAsync(checkpointName); - } - - /// - /// Get the dynamically allocated Flink JobManager HTTP endpoint from Aspire. - /// Aspire DCP assigns random ports during testing, so we cannot use hardcoded ports. - /// - protected static async Task GetFlinkJobManagerEndpointAsync() - { - try - { - var flinkContainers = await RunDockerCommandAsync("ps --filter \"name=flink-jobmanager\" --format \"{{.Ports}}\""); - TestContext.WriteLine($"๐Ÿ” Flink JobManager port mappings: {flinkContainers.Trim()}"); - - return ExtractFlinkEndpointFromPorts(flinkContainers); - } - catch (Exception ex) - { - throw new InvalidOperationException($"Failed to get Flink JobManager endpoint: {ex.Message}", ex); - } - } - - private static string ExtractFlinkEndpointFromPorts(string flinkContainers) - { - var lines = flinkContainers.Split('\n', StringSplitOptions.RemoveEmptyEntries); - foreach (var line in lines) - { - var endpoint = TryExtractPortFromLine(line); - if (endpoint != null) - return endpoint; - } - - throw new InvalidOperationException($"Could not determine Flink JobManager endpoint from Docker ports: {flinkContainers}"); - } - - private static string? TryExtractPortFromLine(string line) - { - if (!line.Contains("->8081/tcp")) - return null; - - var match = System.Text.RegularExpressions.Regex.Match(line, @"127\.0\.0\.1:(\d+)->8081"); - return match.Success ? $"http://localhost:{match.Groups[1].Value}/" : null; - } - - - /// - /// Retrieve JobManager logs from Flink REST API. - /// The JobManager handles job submission, so its logs contain errors from failed job submissions. - /// - protected static async Task GetFlinkJobManagerLogsAsync(string flinkEndpoint) - { - try - { - using var httpClient = new System.Net.Http.HttpClient { Timeout = TimeSpan.FromSeconds(15) }; - var logsBuilder = new System.Text.StringBuilder(); - logsBuilder.AppendLine("\n========== JobManager Logs =========="); - - var mainLogName = await GetJobManagerLogListAsync(httpClient, flinkEndpoint, logsBuilder); - if (!string.IsNullOrEmpty(mainLogName)) - { - await AppendJobManagerLogContentAsync(httpClient, flinkEndpoint, mainLogName, logsBuilder); - } - - return logsBuilder.ToString(); - } - catch (Exception ex) - { - return $"Error fetching JobManager logs: {ex.Message}"; - } - } - - private static async Task GetJobManagerLogListAsync(System.Net.Http.HttpClient httpClient, string flinkEndpoint, System.Text.StringBuilder logsBuilder) - { - var logListUrl = $"{flinkEndpoint.TrimEnd('/')}/jobmanager/logs"; - var logListResponse = await httpClient.GetAsync(logListUrl); - - if (!logListResponse.IsSuccessStatusCode) - { - logsBuilder.AppendLine($"Failed to get JobManager log list: HTTP {logListResponse.StatusCode}"); - return null; - } - - var logListContent = await logListResponse.Content.ReadAsStringAsync(); - var logListJson = System.Text.Json.JsonDocument.Parse(logListContent); - - return ExtractMainLogName(logListJson, logsBuilder); - } - - private static string? ExtractMainLogName(System.Text.Json.JsonDocument logListJson, System.Text.StringBuilder logsBuilder) - { - string? mainLogName = null; - if (logListJson.RootElement.TryGetProperty("logs", out var logs)) - { - foreach (var logFile in logs.EnumerateArray()) - { - if (logFile.TryGetProperty("name", out var name)) - { - var logName = name.GetString(); - logsBuilder.AppendLine($" Available log: {logName}"); - - if (logName?.EndsWith(".log") == true) - { - mainLogName = logName; - } - } - } - } - return mainLogName; - } - - private static async Task AppendJobManagerLogContentAsync(System.Net.Http.HttpClient httpClient, string flinkEndpoint, string mainLogName, System.Text.StringBuilder logsBuilder) - { - var logContentUrl = $"{flinkEndpoint.TrimEnd('/')}/jobmanager/logs/{mainLogName}"; - try - { - var logResponse = await httpClient.GetAsync(logContentUrl); - if (logResponse.IsSuccessStatusCode) - { - await AppendLogLines(logResponse, mainLogName, logsBuilder); - } - else - { - logsBuilder.AppendLine($" Failed to read log content: HTTP {logResponse.StatusCode}"); - } - } - catch (Exception logEx) - { - logsBuilder.AppendLine($" Error reading log file {mainLogName}: {logEx.Message}"); - } - } - - private static async Task AppendLogLines(System.Net.Http.HttpResponseMessage logResponse, string mainLogName, System.Text.StringBuilder logsBuilder) - { - var logContent = await logResponse.Content.ReadAsStringAsync(); - var lines = logContent.Split('\n'); - var lastLines = lines.Length > 500 ? lines[^500..] : lines; - logsBuilder.AppendLine($"\n Last 500 lines of {mainLogName}:"); - logsBuilder.AppendLine(string.Join('\n', lastLines)); - } - - /// - /// Retrieve Flink job exceptions from the Flink REST API. - /// This provides detailed error information when jobs fail. - /// - protected static async Task GetFlinkJobExceptionsAsync(string flinkEndpoint, string jobId) - { - try - { - using var httpClient = new System.Net.Http.HttpClient { Timeout = TimeSpan.FromSeconds(10) }; - var url = $"{flinkEndpoint.TrimEnd('/')}/jobs/{jobId}/exceptions"; - TestContext.WriteLine($"๐Ÿ” Fetching job exceptions from: {url}"); - - var response = await httpClient.GetAsync(url); - if (response.IsSuccessStatusCode) - { - var content = await response.Content.ReadAsStringAsync(); - return content; - } - else - { - return $"Failed to get job exceptions: HTTP {response.StatusCode}"; - } - } - catch (Exception ex) - { - return $"Error fetching job exceptions: {ex.Message}"; - } - } - - /// - /// Retrieve TaskManager logs from Flink REST API. - /// Returns logs from all TaskManagers if available. - /// - protected static async Task GetFlinkTaskManagerLogsAsync(string flinkEndpoint) - { - try - { - using var httpClient = new System.Net.Http.HttpClient { Timeout = TimeSpan.FromSeconds(10) }; - var logsBuilder = new System.Text.StringBuilder(); - - var taskManagers = await GetTaskManagerListAsync(httpClient, flinkEndpoint); - if (!taskManagers.HasValue) - { - return "Failed to get TaskManager list or no TaskManagers found"; - } - - var tmCount = await ProcessTaskManagersAsync(httpClient, flinkEndpoint, taskManagers.Value, logsBuilder); - - return tmCount == 0 ? "No TaskManagers found" : logsBuilder.ToString(); - } - catch (Exception ex) - { - return $"Error fetching TaskManager logs: {ex.Message}"; - } - } - - private static async Task GetTaskManagerListAsync(System.Net.Http.HttpClient httpClient, string flinkEndpoint) - { - var tmListUrl = $"{flinkEndpoint.TrimEnd('/')}/taskmanagers"; - var tmListResponse = await httpClient.GetAsync(tmListUrl); - - if (!tmListResponse.IsSuccessStatusCode) - { - return null; - } - - var tmListContent = await tmListResponse.Content.ReadAsStringAsync(); - var tmListJson = System.Text.Json.JsonDocument.Parse(tmListContent); - - if (!tmListJson.RootElement.TryGetProperty("taskmanagers", out var taskManagers)) - { - return null; - } - - return taskManagers; - } - - private static async Task ProcessTaskManagersAsync(System.Net.Http.HttpClient httpClient, string flinkEndpoint, System.Text.Json.JsonElement taskManagers, System.Text.StringBuilder logsBuilder) - { - int tmCount = 0; - foreach (var tm in taskManagers.EnumerateArray()) - { - if (tm.TryGetProperty("id", out var tmId)) - { - var taskManagerId = tmId.GetString(); - tmCount++; - logsBuilder.AppendLine($"\n========== TaskManager {tmCount} (ID: {taskManagerId}) =========="); - - await AppendTaskManagerLogsAsync(httpClient, flinkEndpoint, taskManagerId, logsBuilder); - } - } - return tmCount; - } - - private static async Task AppendTaskManagerLogsAsync(System.Net.Http.HttpClient httpClient, string flinkEndpoint, string? taskManagerId, System.Text.StringBuilder logsBuilder) - { - try - { - await AppendTaskManagerLogFilesAsync(httpClient, flinkEndpoint, taskManagerId, logsBuilder); - await AppendTaskManagerStdoutAsync(httpClient, flinkEndpoint, taskManagerId, logsBuilder); - } - catch (Exception tmEx) - { - logsBuilder.AppendLine($" Error getting TaskManager logs: {tmEx.Message}"); - } - } - - private static async Task AppendTaskManagerLogFilesAsync(System.Net.Http.HttpClient httpClient, string flinkEndpoint, string? taskManagerId, System.Text.StringBuilder logsBuilder) - { - var logUrl = $"{flinkEndpoint.TrimEnd('/')}/taskmanagers/{taskManagerId}/logs"; - var logResponse = await httpClient.GetAsync(logUrl); - - if (logResponse.IsSuccessStatusCode) - { - var logContent = await logResponse.Content.ReadAsStringAsync(); - var logJson = System.Text.Json.JsonDocument.Parse(logContent); - - if (logJson.RootElement.TryGetProperty("logs", out var logs)) - { - foreach (var logFile in logs.EnumerateArray()) - { - if (logFile.TryGetProperty("name", out var name)) - { - logsBuilder.AppendLine($" Log file: {name.GetString()}"); - } - } - } - } - } - - private static async Task AppendTaskManagerStdoutAsync(System.Net.Http.HttpClient httpClient, string flinkEndpoint, string? taskManagerId, System.Text.StringBuilder logsBuilder) - { - var stdoutUrl = $"{flinkEndpoint.TrimEnd('/')}/taskmanagers/{taskManagerId}/stdout"; - var stdoutResponse = await httpClient.GetAsync(stdoutUrl); - - if (stdoutResponse.IsSuccessStatusCode) - { - var stdoutContent = await stdoutResponse.Content.ReadAsStringAsync(); - var lines = stdoutContent.Split('\n'); - var lastLines = lines.Length > 100 ? lines[^100..] : lines; - logsBuilder.AppendLine($"\n Last 100 lines of stdout:"); - logsBuilder.AppendLine(string.Join('\n', lastLines)); - } - } - - /// - /// Retrieve TaskManager logs from Docker container. - /// Fallback method when Flink REST API is not available or doesn't have the logs. - /// - protected static async Task GetTaskManagerLogsFromDockerAsync() - { - try - { - // Get all container names and filter in C# to handle Aspire's random suffixes - var containerNames = await RunDockerCommandAsync("ps --format \"{{.Names}}\""); - var containers = containerNames.Split(new[] { '\r', '\n' }, StringSplitOptions.RemoveEmptyEntries); - var containerName = containers.FirstOrDefault(name => name.Contains("flink-taskmanager", StringComparison.OrdinalIgnoreCase))?.Trim(); - - if (string.IsNullOrEmpty(containerName)) - { - return "No TaskManager container found"; - } - - TestContext.WriteLine($"๐Ÿ” Getting logs from TaskManager container: {containerName}"); - var logs = await RunDockerCommandAsync($"logs {containerName} --tail 20 2>&1"); - return $"========== TaskManager Container Logs ({containerName}) - Last 20 Lines ==========\n{logs}"; - } - catch (Exception ex) - { - return $"Error fetching TaskManager logs from Docker: {ex.Message}"; - } - } - - /// - /// Get comprehensive diagnostic information when a Flink job fails. - /// Includes JobManager logs, job exceptions, TaskManager logs from REST API, and Docker container logs. - /// - protected static async Task GetFlinkJobDiagnosticsAsync(string flinkEndpoint, string? jobId = null) - { - var diagnostics = new System.Text.StringBuilder(); - diagnostics.AppendLine("\n" + new string('=', 80)); - diagnostics.AppendLine("FLINK JOB FAILURE DIAGNOSTICS"); - diagnostics.AppendLine(new string('=', 80)); - - // 1. Get JobManager logs (most important for job submission failures) - diagnostics.AppendLine("\n--- JobManager Logs (from Flink REST API) ---"); - var jmLogs = await GetFlinkJobManagerLogsAsync(flinkEndpoint); - diagnostics.AppendLine(jmLogs); - - // 2. Get job exceptions if jobId is provided - if (!string.IsNullOrEmpty(jobId)) - { - diagnostics.AppendLine("\n--- Job Exceptions ---"); - var exceptions = await GetFlinkJobExceptionsAsync(flinkEndpoint, jobId); - diagnostics.AppendLine(exceptions); - } - - // 3. Get TaskManager logs from Flink REST API - diagnostics.AppendLine("\n--- TaskManager Logs (from Flink REST API) ---"); - var tmLogs = await GetFlinkTaskManagerLogsAsync(flinkEndpoint); - diagnostics.AppendLine(tmLogs); - - // 4. Get TaskManager logs from Docker as fallback/additional info - diagnostics.AppendLine("\n--- TaskManager Logs (from Docker) ---"); - var dockerLogs = await GetTaskManagerLogsFromDockerAsync(); - diagnostics.AppendLine(dockerLogs); - - diagnostics.AppendLine("\n" + new string('=', 80)); - return diagnostics.ToString(); - } - - /// - /// Display current container status and ports for debugging visibility. - /// Used in lightweight mode - assumes containers are already running from global setup. - /// Does NOT poll or wait - just displays current state immediately. - /// - private static async Task DisplayContainerStatusAsync() - { - try - { - // Single quick check - no polling needed since containers should already be running - var containerInfo = await RunDockerCommandAsync("ps --format \"table {{.Names}}\\t{{.Status}}\\t{{.Ports}}\""); - - if (!string.IsNullOrWhiteSpace(containerInfo)) - { - // Check if we only got the header (no actual containers) - var lines = containerInfo.Split(new[] { '\r', '\n' }, StringSplitOptions.RemoveEmptyEntries); - - if (lines.Length <= 1) - { - // Only header, no containers - TestContext.WriteLine("โš ๏ธ No containers found - this is unexpected in lightweight mode"); - TestContext.WriteLine("๐Ÿ” Container info output:"); - TestContext.WriteLine(containerInfo); - - // Try listing ALL containers including stopped ones for diagnostics - var allContainersInfo = await RunDockerCommandAsync("ps -a --format \"table {{.Names}}\\t{{.Status}}\\t{{.Ports}}\""); - if (!string.IsNullOrWhiteSpace(allContainersInfo)) - { - TestContext.WriteLine("๐Ÿ” All containers (including stopped):"); - TestContext.WriteLine(allContainersInfo); - } - } - else - { - TestContext.WriteLine("๐Ÿณ Container Status and Ports:"); - TestContext.WriteLine(containerInfo); - } - } - else - { - TestContext.WriteLine("๐Ÿณ No container output - container runtime not available or command failed"); - } - } - catch (Exception ex) - { - TestContext.WriteLine($"โš ๏ธ Failed to get container status: {ex.Message}"); - } - } - - /// - /// Log Flink job status via Gateway to check if job is actually running. - /// - protected static async Task LogJobStatusViaGatewayAsync(string gatewayBase, string jobId, string checkpoint) - { - try - { - TestContext.WriteLine($"๐Ÿ” [Job Status Check] {checkpoint} - Job ID: {jobId}"); - - using var httpClient = new System.Net.Http.HttpClient(); - var statusUrl = $"{gatewayBase}api/v1/jobs/{jobId}/status"; - var response = await httpClient.GetAsync(statusUrl); - - if (response.IsSuccessStatusCode) - { - var content = await response.Content.ReadAsStringAsync(); - TestContext.WriteLine($"๐Ÿ“Š Job status response: {content}"); - } - else - { - TestContext.WriteLine($"โš ๏ธ Failed to get job status: HTTP {response.StatusCode}"); - } - } - catch (Exception ex) - { - TestContext.WriteLine($"โš ๏ธ Failed to check job status: {ex.Message}"); - } - } - - /// - /// Log Flink container status and recent logs for debugging. - /// - protected static async Task LogFlinkContainerStatusAsync(string checkpoint) - { - try - { - TestContext.WriteLine($"๐Ÿ” [Flink Container Debug] {checkpoint}"); - - // Get ALL container names and filter in C# to handle Aspire's random suffixes - var allContainersList = await RunDockerCommandAsync("ps --format \"{{.Names}}\""); - var allContainers = allContainersList.Split(new[] { '\r', '\n' }, StringSplitOptions.RemoveEmptyEntries); - - var flinkContainers = allContainers.Where(name => name.Contains("flink", StringComparison.OrdinalIgnoreCase)).ToList(); - - TestContext.WriteLine($"๐Ÿณ Flink containers found: {string.Join(", ", flinkContainers)}"); - - // Find JobManager container - var jmName = flinkContainers.FirstOrDefault(name => name.Contains("flink-jobmanager", StringComparison.OrdinalIgnoreCase))?.Trim(); - - if (!string.IsNullOrWhiteSpace(jmName)) - { - TestContext.WriteLine($"๐Ÿ“‹ Found JobManager container: {jmName}"); - var jmLogs = await RunDockerCommandAsync($"logs {jmName} --tail 100 2>&1"); - TestContext.WriteLine($"๐Ÿ“‹ JobManager logs (last 100 lines):\n{jmLogs}"); - } - else - { - TestContext.WriteLine("โš ๏ธ No JobManager container found"); - TestContext.WriteLine($" Available containers: {string.Join(", ", allContainers)}"); - } - - // Find TaskManager container - var tmName = flinkContainers.FirstOrDefault(name => name.Contains("flink-taskmanager", StringComparison.OrdinalIgnoreCase))?.Trim(); - - if (!string.IsNullOrWhiteSpace(tmName)) - { - TestContext.WriteLine($"๐Ÿ“‹ Found TaskManager container: {tmName}"); - var tmLogs = await RunDockerCommandAsync($"logs {tmName} --tail 20 2>&1"); - TestContext.WriteLine($"๐Ÿ“‹ TaskManager logs (last 20 lines):\n{tmLogs}"); - } - else - { - TestContext.WriteLine("โš ๏ธ No TaskManager container found"); - TestContext.WriteLine($" Available containers: {string.Join(", ", allContainers)}"); - } - } - catch (Exception ex) - { - TestContext.WriteLine($"โš ๏ธ Failed to get Flink container logs: {ex.Message}"); - TestContext.WriteLine($" Exception details: {ex.GetType().Name} - {ex.Message}"); - if (ex.StackTrace != null) - { - TestContext.WriteLine($" Stack trace: {ex.StackTrace}"); - } - } - } - - /// - /// Log Flink job-specific logs from JobManager. - /// - protected static async Task LogFlinkJobLogsAsync(string jobId, string checkpoint) - { - try - { - TestContext.WriteLine($"๐Ÿ” [Flink Job Debug] {checkpoint} - Job ID: {jobId}"); - - // Get all container names and filter in C# to handle Aspire's random suffixes - var containerNames = await RunDockerCommandAsync("ps --format \"{{.Names}}\""); - var containers = containerNames.Split(new[] { '\r', '\n' }, StringSplitOptions.RemoveEmptyEntries); - - // Find JobManager container - var jmName = containers.FirstOrDefault(name => name.Contains("flink-jobmanager", StringComparison.OrdinalIgnoreCase))?.Trim(); - - if (!string.IsNullOrWhiteSpace(jmName)) - { - // Get logs filtered for this specific job - var jobLogs = await RunDockerCommandAsync($"logs {jmName} 2>&1"); - var jobLogLines = jobLogs.Split('\n').Where(line => line.Contains(jobId, StringComparison.OrdinalIgnoreCase)).Take(30); - TestContext.WriteLine($"๐Ÿ“‹ Job-specific logs (last 30 lines):\n{string.Join('\n', jobLogLines)}"); - } - - // Find TaskManager container - var tmName = containers.FirstOrDefault(name => name.Contains("flink-taskmanager", StringComparison.OrdinalIgnoreCase))?.Trim(); - - if (!string.IsNullOrWhiteSpace(tmName)) - { - // Get TaskManager logs and filter locally - var allLogs = await RunDockerCommandAsync($"logs {tmName} 2>&1"); - - // Check for Kafka-related logs - var kafkaLogLines = allLogs.Split('\n').Where(line => line.Contains("kafka", StringComparison.OrdinalIgnoreCase)).Take(20); - TestContext.WriteLine($"๐Ÿ“‹ Kafka-related logs from TaskManager (last 20 lines):\n{string.Join('\n', kafkaLogLines)}"); - - // Also check for any error logs - var errorLogLines = allLogs.Split('\n').Where(line => - line.Contains("error", StringComparison.OrdinalIgnoreCase) || - line.Contains("exception", StringComparison.OrdinalIgnoreCase) || - line.Contains("fail", StringComparison.OrdinalIgnoreCase)).Take(20); - TestContext.WriteLine($"๐Ÿ“‹ Error logs from TaskManager (last 20 lines):\n{string.Join('\n', errorLogLines)}"); - } - } - catch (Exception ex) - { - TestContext.WriteLine($"โš ๏ธ Failed to get Flink job logs: {ex.Message}"); - } - } - - /// - /// Test Kafka connectivity from within Flink TaskManager container using telnet or nc. - /// This diagnostic helps determine if Flink containers can reach Kafka at kafka:9092. - /// - protected static async Task TestKafkaConnectivityFromFlinkAsync() - { - try - { - TestContext.WriteLine("๐Ÿ” [Kafka Connectivity] Testing from Flink TaskManager container..."); - - // Get all container names and filter in C# to handle Aspire's random suffixes - var containerNames = await RunDockerCommandAsync("ps --format \"{{.Names}}\""); - var containers = containerNames.Split(new[] { '\r', '\n' }, StringSplitOptions.RemoveEmptyEntries); - var tmName = containers.FirstOrDefault(name => name.Contains("flink-taskmanager", StringComparison.OrdinalIgnoreCase))?.Trim(); - - if (string.IsNullOrWhiteSpace(tmName)) - { - TestContext.WriteLine("โš ๏ธ No TaskManager container found for connectivity test"); - return; - } - - TestContext.WriteLine($"๐Ÿณ Using TaskManager container: {tmName}"); - - // Test connectivity to kafka:9092 - var testResult = await RunDockerCommandAsync($"exec {tmName} timeout 2 bash -c 'echo \"test\" | nc -w 1 kafka 9092 && echo \"SUCCESS\" || echo \"FAILED\"' 2>&1"); - TestContext.WriteLine($"๐Ÿ“Š Kafka connectivity (kafka:9092): {testResult.Trim()}"); - - // Also try to resolve the hostname - var dnsResult = await RunDockerCommandAsync($"exec {tmName} getent hosts kafka 2>&1 || echo \"DNS resolution failed\""); - TestContext.WriteLine($"๐Ÿ“Š DNS resolution for 'kafka': {dnsResult.Trim()}"); - - // Check if Kafka connectorJARs are present - var connectorCheck = await RunDockerCommandAsync($"exec {tmName} ls -lh /opt/flink/lib/*kafka* 2>&1 || echo \"No Kafka connector found\""); - TestContext.WriteLine($"๐Ÿ“Š Kafka connector JARs in Flink:\n{connectorCheck.Trim()}"); - - // Check network settings - var networkInfo = await RunDockerCommandAsync($"inspect {tmName} --format '{{{{.NetworkSettings.Networks}}}}'"); - TestContext.WriteLine($"๐Ÿ“Š Container network info: {networkInfo.Trim()}"); - } - catch (Exception ex) - { - TestContext.WriteLine($"โš ๏ธ Failed to test Kafka connectivity from Flink: {ex.Message}"); - } - } -} diff --git a/ReleasePackagesTesting.Published/ReleasePackagesTesting.Published.IntegrationTests/NativeFlinkAllPatternsTests.cs b/ReleasePackagesTesting.Published/ReleasePackagesTesting.Published.IntegrationTests/NativeFlinkAllPatternsTests.cs deleted file mode 100644 index f592b4e2..00000000 --- a/ReleasePackagesTesting.Published/ReleasePackagesTesting.Published.IntegrationTests/NativeFlinkAllPatternsTests.cs +++ /dev/null @@ -1,327 +0,0 @@ -using System.Diagnostics; -using System.Net.Http.Json; -using Confluent.Kafka; -using NUnit.Framework; - -namespace LocalTesting.IntegrationTests; - -/// -/// Native Apache Flink test to validate Aspire infrastructure independently of the Gateway. -/// Runs a basic native Flink job to prove the infrastructure works correctly. -/// Tests run in parallel with 8 TaskManager slots available. -/// -[TestFixture] -[Parallelizable(ParallelScope.All)] -[Category("native-flink-patterns")] -public class NativeFlinkAllPatternsTests : LocalTestingTestBase -{ - private static readonly TimeSpan TestTimeout = TimeSpan.FromMinutes(3); - private static readonly TimeSpan JobRunTimeout = TimeSpan.FromSeconds(30); - private static readonly TimeSpan ConsumeTimeout = TimeSpan.FromSeconds(30); - - /// - /// Pattern 1: Uppercase transformation - /// Validates basic map operation (input -> uppercase -> output) - /// This single test proves that native Apache Flink jobs work correctly through the infrastructure. - /// NOTE: Currently ignored - use Gateway pattern tests instead for production workflows. - /// - [Test] - public async Task Pattern1_Uppercase_ShouldTransformMessages() - { - await RunNativeFlinkPattern( - patternName: "Uppercase", - inputMessages: new[] { "hello", "world" }, - expectedOutputs: new[] { "HELLO", "WORLD" }, - description: "Basic uppercase transformation" - ); - } - - #region Test Infrastructure - - private async Task RunNativeFlinkPattern( - string patternName, - string[] inputMessages, - string[] expectedOutputs, - string description, - bool allowLongerProcessing = false) - { - var inputTopic = $"lt.pattern.{patternName.ToLowerInvariant()}.input.{TestContext.CurrentContext.Test.ID}"; - var outputTopic = $"lt.pattern.{patternName.ToLowerInvariant()}.output.{TestContext.CurrentContext.Test.ID}"; - - // Find and verify JAR exists - var jarPath = FindNativeFlinkJar(); - TestContext.WriteLine($"๐Ÿ” Using JAR: {jarPath}"); - Assert.That(File.Exists(jarPath), Is.True, $"Native Flink JAR must exist at {jarPath}"); - - var baseToken = TestContext.CurrentContext.CancellationToken; - using var testTimeout = new CancellationTokenSource(TestTimeout); - using var linkedCts = CancellationTokenSource.CreateLinkedTokenSource(baseToken, testTimeout.Token); - var ct = linkedCts.Token; - - TestContext.WriteLine($"๐Ÿš€ Starting Native Flink Pattern Test: {patternName}"); - TestContext.WriteLine($"๐Ÿ“ Description: {description}"); - var stopwatch = Stopwatch.StartNew(); - - try - { - // Skip health check - global setup already validated everything - // Create topics immediately - TestContext.WriteLine($"๐Ÿ“ Creating topics: {inputTopic} -> {outputTopic}"); - await CreateTopicAsync(inputTopic, 1); - await CreateTopicAsync(outputTopic, 1); - - // Upload JAR and submit job - using var httpClient = new HttpClient(); - var jarId = await UploadJarToFlinkAsync(httpClient, jarPath, ct); - var jobId = await SubmitNativeJobAsync(httpClient, jarId, inputTopic, outputTopic, ct); - TestContext.WriteLine($"โœ… Job submitted: {jobId}"); - - // Wait for job to be running - await WaitForJobRunningAsync(httpClient, jobId, JobRunTimeout, ct); - TestContext.WriteLine("โœ… Job is RUNNING"); - - // Produce test messages immediately - job is already running - TestContext.WriteLine($"๐Ÿ“ค Producing {inputMessages.Length} messages..."); - await ProduceMessagesAsync(inputTopic, inputMessages, KafkaConnectionString!, ct); - - // Consume and verify - var consumeTimeout = allowLongerProcessing ? TimeSpan.FromSeconds(60) : ConsumeTimeout; - var consumed = await ConsumeMessagesAsync(outputTopic, expectedOutputs.Length, consumeTimeout, KafkaConnectionString!, ct); - - TestContext.WriteLine($"๐Ÿ“Š Consumed {consumed.Count} messages (expected: {expectedOutputs.Length})"); - - // Assert - Assert.That(consumed.Count, Is.EqualTo(expectedOutputs.Length), - $"Should consume exactly {expectedOutputs.Length} messages"); - - for (int i = 0; i < expectedOutputs.Length; i++) - { - Assert.That(consumed[i], Is.EqualTo(expectedOutputs[i]), - $"Message {i} should match expected output"); - } - - // Cleanup - await CancelJobAsync(httpClient, jobId, ct); - TestContext.WriteLine("โœ… Job cancelled"); - - stopwatch.Stop(); - TestContext.WriteLine($"โœ… {patternName} test completed successfully in {stopwatch.Elapsed.TotalSeconds:F1}s"); - } - catch (Exception ex) - { - stopwatch.Stop(); - TestContext.WriteLine($"โŒ {patternName} test failed after {stopwatch.Elapsed.TotalSeconds:F1}s: {ex.Message}"); - throw; - } - } - - private static async Task UploadJarToFlinkAsync(HttpClient client, string jarPath, CancellationToken ct) - { - var flinkEndpoint = await GetFlinkJobManagerEndpointAsync(); - var uploadUrl = $"{flinkEndpoint}jars/upload"; - - using var fileStream = File.OpenRead(jarPath); - using var content = new MultipartFormDataContent(); - using var fileContent = new StreamContent(fileStream); - fileContent.Headers.ContentType = new System.Net.Http.Headers.MediaTypeHeaderValue("application/x-java-archive"); - content.Add(fileContent, "jarfile", Path.GetFileName(jarPath)); - - var response = await client.PostAsync(uploadUrl, content, ct); - response.EnsureSuccessStatusCode(); - - var result = await response.Content.ReadFromJsonAsync(ct); - Assert.That(result?.Filename, Is.Not.Null.And.Not.Empty); - return Path.GetFileName(result!.Filename); - } - - private static async Task SubmitNativeJobAsync( - HttpClient client, - string jarId, - string inputTopic, - string outputTopic, - CancellationToken ct) - { - var flinkEndpoint = await GetFlinkJobManagerEndpointAsync(); - var runUrl = $"{flinkEndpoint}jars/{jarId}/run"; - - // Use dynamically discovered Kafka container IP for Flink job connectivity - // Docker bridge network doesn't support DNS between containers - var kafkaBootstrap = GlobalTestInfrastructure.KafkaContainerIpForFlink; - var submitPayload = new - { - entryClass = "com.flinkdotnet.NativeKafkaJob", - programArgsList = new[] - { - "--bootstrap-servers", kafkaBootstrap, - "--input-topic", inputTopic, - "--output-topic", outputTopic, - "--group-id", $"native-pattern-test-{Guid.NewGuid():N}" - }, - parallelism = 1 - }; - - var response = await client.PostAsJsonAsync(runUrl, submitPayload, ct); - response.EnsureSuccessStatusCode(); - - var result = await response.Content.ReadFromJsonAsync(ct); - Assert.That(result?.JobId, Is.Not.Null.And.Not.Empty); - return result!.JobId; - } - - private static async Task WaitForJobRunningAsync(HttpClient client, string jobId, TimeSpan timeout, CancellationToken ct) - { - var flinkEndpoint = await GetFlinkJobManagerEndpointAsync(); - var jobUrl = $"{flinkEndpoint}jobs/{jobId}"; - var deadline = DateTime.UtcNow.Add(timeout); - - while (DateTime.UtcNow < deadline && !ct.IsCancellationRequested) - { - var response = await client.GetAsync(jobUrl, ct); - response.EnsureSuccessStatusCode(); - - var jobInfo = await response.Content.ReadFromJsonAsync(ct); - if (jobInfo?.State == "RUNNING") return; - if (jobInfo?.State == "FAILED" || jobInfo?.State == "CANCELED") - { - Assert.Fail($"Job entered terminal state: {jobInfo.State}"); - } - - await Task.Delay(500, ct); // Reduced from 1000ms to 500ms - } - - Assert.Fail($"Job did not reach RUNNING state within {timeout.TotalSeconds}s"); - } - - private static async Task CancelJobAsync(HttpClient client, string jobId, CancellationToken ct) - { - var flinkEndpoint = await GetFlinkJobManagerEndpointAsync(); - var cancelUrl = $"{flinkEndpoint}jobs/{jobId}?mode=cancel"; - var response = await client.PatchAsync(cancelUrl, null, ct); - response.EnsureSuccessStatusCode(); - } - - private static async Task ProduceMessagesAsync(string topic, string[] messages, string kafkaConnectionString, CancellationToken ct) - { - using var producer = new ProducerBuilder(new ProducerConfig - { - BootstrapServers = kafkaConnectionString, - ClientId = "native-pattern-test-producer", - BrokerAddressFamily = BrokerAddressFamily.V4, - SecurityProtocol = SecurityProtocol.Plaintext - }) - .SetLogHandler((_, _) => { }) - .SetErrorHandler((_, _) => { }) - .Build(); - - foreach (var message in messages) - { - await producer.ProduceAsync(topic, new Message { Value = message }, ct); - } - - producer.Flush(TimeSpan.FromSeconds(10)); - } - - private static Task> ConsumeMessagesAsync( - string topic, - int expectedCount, - TimeSpan timeout, - string kafkaConnectionString, - CancellationToken ct) - { - var config = new ConsumerConfig - { - BootstrapServers = kafkaConnectionString, - GroupId = $"native-pattern-consumer-{Guid.NewGuid()}", - AutoOffsetReset = AutoOffsetReset.Earliest, - EnableAutoCommit = false, - BrokerAddressFamily = BrokerAddressFamily.V4, - SecurityProtocol = SecurityProtocol.Plaintext - }; - - var messages = new List(); - using var consumer = new ConsumerBuilder(config) - .SetLogHandler((_, _) => { }) - .SetErrorHandler((_, _) => { }) - .Build(); - - consumer.Subscribe(topic); - var deadline = DateTime.UtcNow.Add(timeout); - - while (DateTime.UtcNow < deadline && messages.Count < expectedCount && !ct.IsCancellationRequested) - { - var consumeResult = consumer.Consume(TimeSpan.FromSeconds(1)); - if (consumeResult != null) - { - messages.Add(consumeResult.Message.Value); - } - } - - return Task.FromResult(messages); - } - - private static string FindNativeFlinkJar() - { - var currentDir = AppContext.BaseDirectory; - var repoRoot = FindRepositoryRoot(currentDir); - - if (repoRoot != null) - { - var jarPath = Path.Combine(repoRoot, "LocalTesting", "NativeFlinkJob", "target", "native-flink-kafka-job-1.0.0.jar"); - if (File.Exists(jarPath)) return jarPath; - } - - return Path.GetFullPath(Path.Combine(AppContext.BaseDirectory, "..", "..", "..", "..", "NativeFlinkJob", "target", "native-flink-kafka-job-1.0.0.jar")); - } - - private static string? FindRepositoryRoot(string startPath) - { - var dir = new DirectoryInfo(startPath); - while (dir != null) - { - if (File.Exists(Path.Combine(dir.FullName, "global.json"))) return dir.FullName; - dir = dir.Parent; - } - return null; - } - - private static new Task GetFlinkJobManagerEndpointAsync() - { - try - { - var psi = new ProcessStartInfo - { - FileName = "docker", - Arguments = "ps --filter \"name=flink-jobmanager\" --format \"{{.Ports}}\"", - RedirectStandardOutput = true, - UseShellExecute = false, - CreateNoWindow = true - }; - - using var process = Process.Start(psi); - if (process != null) - { - var output = process.StandardOutput.ReadToEnd(); - process.WaitForExit(); - - var match = System.Text.RegularExpressions.Regex.Match(output, @"127\.0\.0\.1:(\d+)->8081"); - if (match.Success) - { - return Task.FromResult($"http://localhost:{match.Groups[1].Value}/"); - } - } - } - catch - { - // Fall through to default - } - - return Task.FromResult($"http://localhost:{LocalTesting.FlinkSqlAppHost.Ports.JobManagerHostPort}/"); - } - - // DTOs for Flink REST API - private record FlinkJarUploadResponse(string Status, string Filename); - private record FlinkJobSubmitResponse(string JobId); - private record FlinkJobInfo(string JobId, string State); - - #endregion -} diff --git a/ReleasePackagesTesting.Published/ReleasePackagesTesting.Published.IntegrationTests/NetworkDiagnostics.cs b/ReleasePackagesTesting.Published/ReleasePackagesTesting.Published.IntegrationTests/NetworkDiagnostics.cs deleted file mode 100644 index 8f13b051..00000000 --- a/ReleasePackagesTesting.Published/ReleasePackagesTesting.Published.IntegrationTests/NetworkDiagnostics.cs +++ /dev/null @@ -1,308 +0,0 @@ -using System.Diagnostics; -using System.Text; - -namespace LocalTesting.IntegrationTests; - -/// -/// Network diagnostics utilities for capturing Docker/Podman network information. -/// Writes detailed network state to test-logs/network.log.* files for debugging. -/// -public static class NetworkDiagnostics -{ - // Place logs in LocalTesting/test-logs (repository root relative path) - private static readonly string LogDirectory = GetLogDirectory(); - - private static string GetLogDirectory() - { - // Navigate from bin/Debug|Release/net9.0 to LocalTesting/test-logs - var baseDir = AppContext.BaseDirectory; - var localTestingRoot = Path.GetFullPath(Path.Combine(baseDir, "..", "..", "..", "..")); - return Path.Combine(localTestingRoot, "test-logs"); - } - - /// - /// Capture comprehensive network diagnostics to a date-stamped log file. - /// - /// Name of the checkpoint (e.g., "startup", "before-test", "after-test") - public static async Task CaptureNetworkDiagnosticsAsync(string checkpointName) - { - try - { - // Ensure log directory exists - Directory.CreateDirectory(LogDirectory); - - var dateStamp = DateTime.UtcNow.ToString("yyyyMMdd"); - var timeStamp = DateTime.UtcNow.ToString("yyyy-MM-dd HH:mm:ss.fff"); - var logFileName = $"network.log.{dateStamp}"; - var logFilePath = Path.Combine(LogDirectory, logFileName); - - var diagnostics = new StringBuilder(); - diagnostics.AppendLine(); - diagnostics.AppendLine("โ•”โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•"); - diagnostics.AppendLine($"โ•‘ Network Diagnostics - {checkpointName}"); - diagnostics.AppendLine($"โ•‘ Timestamp: {timeStamp} UTC"); - diagnostics.AppendLine("โ•šโ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•"); - diagnostics.AppendLine(); - - // Capture container information - await CaptureContainerInfoAsync(diagnostics); - - // Capture network information - await CaptureNetworkInfoAsync(diagnostics); - - // Capture Aspire-specific network information - await CaptureAspireNetworksAsync(diagnostics); - - // Append to daily log file - await File.AppendAllTextAsync(logFilePath, diagnostics.ToString()); - - Console.WriteLine($"โœ… Network diagnostics appended to: {logFilePath}"); - } - catch (Exception ex) - { - Console.WriteLine($"โš ๏ธ Failed to capture network diagnostics: {ex.Message}"); - } - } - - /// - /// Capture Docker/Podman container information. - /// - private static async Task CaptureContainerInfoAsync(StringBuilder diagnostics) - { - diagnostics.AppendLine("โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•"); - diagnostics.AppendLine("CONTAINER STATUS (docker ps / podman ps)"); - diagnostics.AppendLine("โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•"); - diagnostics.AppendLine(); - - // Try Docker first - var dockerPs = await TryRunCommandAsync("docker", "ps --format \"table {{.Names}}\\t{{.Status}}\\t{{.Ports}}\\t{{.Networks}}\""); - if (!string.IsNullOrWhiteSpace(dockerPs)) - { - diagnostics.AppendLine("๐Ÿณ Docker Containers:"); - diagnostics.AppendLine(dockerPs); - diagnostics.AppendLine(); - - // Also capture all containers (including stopped) - var dockerPsAll = await TryRunCommandAsync("docker", "ps -a --format \"table {{.Names}}\\t{{.Status}}\\t{{.Ports}}\\t{{.Networks}}\""); - if (!string.IsNullOrWhiteSpace(dockerPsAll)) - { - diagnostics.AppendLine("๐Ÿณ All Docker Containers (including stopped):"); - diagnostics.AppendLine(dockerPsAll); - diagnostics.AppendLine(); - } - } - else - { - // Try Podman as fallback - var podmanPs = await TryRunCommandAsync("podman", "ps --format \"table {{.Names}}\\t{{.Status}}\\t{{.Ports}}\\t{{.Networks}}\""); - if (!string.IsNullOrWhiteSpace(podmanPs)) - { - diagnostics.AppendLine("๐Ÿฆญ Podman Containers:"); - diagnostics.AppendLine(podmanPs); - diagnostics.AppendLine(); - - // Also capture all containers (including stopped) - var podmanPsAll = await TryRunCommandAsync("podman", "ps -a --format \"table {{.Names}}\\t{{.Status}}\\t{{.Ports}}\\t{{.Networks}}\""); - if (!string.IsNullOrWhiteSpace(podmanPsAll)) - { - diagnostics.AppendLine("๐Ÿฆญ All Podman Containers (including stopped):"); - diagnostics.AppendLine(podmanPsAll); - diagnostics.AppendLine(); - } - } - else - { - diagnostics.AppendLine("โš ๏ธ No container runtime (Docker/Podman) found or not responding"); - diagnostics.AppendLine(); - } - } - } - - /// - /// Capture Docker/Podman network information. - /// - private static async Task CaptureNetworkInfoAsync(StringBuilder diagnostics) - { - diagnostics.AppendLine("โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•"); - diagnostics.AppendLine("NETWORK INFORMATION (docker network ls / podman network ls)"); - diagnostics.AppendLine("โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•"); - diagnostics.AppendLine(); - - // Try Docker first - var dockerNetworks = await TryRunCommandAsync("docker", "network ls --format \"table {{.Name}}\\t{{.Driver}}\\t{{.Scope}}\""); - if (!string.IsNullOrWhiteSpace(dockerNetworks)) - { - diagnostics.AppendLine("๐Ÿณ Docker Networks:"); - diagnostics.AppendLine(dockerNetworks); - diagnostics.AppendLine(); - - // Inspect each network for detailed information - await InspectNetworksAsync(diagnostics, "docker", dockerNetworks); - } - else - { - // Try Podman as fallback - var podmanNetworks = await TryRunCommandAsync("podman", "network ls --format \"table {{.Name}}\\t{{.Driver}}\""); - if (!string.IsNullOrWhiteSpace(podmanNetworks)) - { - diagnostics.AppendLine("๐Ÿฆญ Podman Networks:"); - diagnostics.AppendLine(podmanNetworks); - diagnostics.AppendLine(); - - // Inspect each network for detailed information - await InspectNetworksAsync(diagnostics, "podman", podmanNetworks); - } - else - { - diagnostics.AppendLine("โš ๏ธ No network information available"); - diagnostics.AppendLine(); - } - } - } - - /// - /// Inspect individual networks for detailed information. - /// - private static async Task InspectNetworksAsync(StringBuilder diagnostics, string command, string networkList) - { - var lines = networkList.Split('\n', StringSplitOptions.RemoveEmptyEntries); - - // Skip header line and extract network names - var networkNames = lines - .Skip(1) - .Select(line => line.Split(new[] { ' ', '\t' }, StringSplitOptions.RemoveEmptyEntries).FirstOrDefault()) - .Where(name => !string.IsNullOrWhiteSpace(name)) - .ToList(); - - foreach (var networkName in networkNames) - { - var networkInspect = await TryRunCommandAsync(command, $"network inspect {networkName}"); - if (!string.IsNullOrWhiteSpace(networkInspect)) - { - diagnostics.AppendLine($"๐Ÿ“‹ Network Details: {networkName}"); - diagnostics.AppendLine("โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€"); - diagnostics.AppendLine(networkInspect); - diagnostics.AppendLine(); - } - } - } - - /// - /// Capture Aspire-specific network information (networks created by Aspire). - /// - private static async Task CaptureAspireNetworksAsync(StringBuilder diagnostics) - { - diagnostics.AppendLine("โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•"); - diagnostics.AppendLine("ASPIRE NETWORKS"); - diagnostics.AppendLine("โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•"); - diagnostics.AppendLine(); - - // Try to find Aspire-created networks (typically have specific patterns) - var dockerNetworks = await TryRunCommandAsync("docker", "network ls --filter \"name=aspire\" --format \"table {{.Name}}\\t{{.Driver}}\\t{{.Scope}}\""); - if (!string.IsNullOrWhiteSpace(dockerNetworks)) - { - diagnostics.AppendLine("๐Ÿณ Aspire Networks (Docker):"); - diagnostics.AppendLine(dockerNetworks); - diagnostics.AppendLine(); - } - - var podmanNetworks = await TryRunCommandAsync("podman", "network ls --filter \"name=aspire\" --format \"table {{.Name}}\\t{{.Driver}}\""); - if (!string.IsNullOrWhiteSpace(podmanNetworks)) - { - diagnostics.AppendLine("๐Ÿฆญ Aspire Networks (Podman):"); - diagnostics.AppendLine(podmanNetworks); - diagnostics.AppendLine(); - } - - // Also check for custom networks that might be created by tests - var customNetworks = await TryRunCommandAsync("docker", "network ls --filter \"driver=bridge\" --format \"table {{.Name}}\\t{{.Driver}}\\t{{.Scope}}\""); - if (!string.IsNullOrWhiteSpace(customNetworks)) - { - diagnostics.AppendLine("๐ŸŒ‰ Bridge Networks:"); - diagnostics.AppendLine(customNetworks); - diagnostics.AppendLine(); - } - } - - /// - /// Try to run a command and return its output, or empty string if it fails. - /// - private static async Task TryRunCommandAsync(string command, string arguments) - { - try - { - var psi = new ProcessStartInfo - { - FileName = command, - Arguments = arguments, - RedirectStandardOutput = true, - RedirectStandardError = true, - UseShellExecute = false, - CreateNoWindow = true - }; - - using var process = Process.Start(psi); - if (process == null) - { - return string.Empty; - } - - var output = await process.StandardOutput.ReadToEndAsync(); - await process.WaitForExitAsync(); - - // Return output if successful, otherwise return empty - if (process.ExitCode == 0 && !string.IsNullOrWhiteSpace(output)) - { - return output; - } - - // Also return output even if exit code is non-zero but we have output - if (!string.IsNullOrWhiteSpace(output)) - { - return output; - } - - return string.Empty; - } - catch - { - return string.Empty; - } - } - - /// - /// Clean up old network diagnostic log files (keep only last 7 days). - /// - public static void CleanupOldLogs() - { - try - { - if (!Directory.Exists(LogDirectory)) - { - return; - } - - var cutoffDate = DateTime.UtcNow.AddDays(-7); - var logFiles = Directory.GetFiles(LogDirectory, "network.log.*") - .Where(f => File.GetCreationTime(f) < cutoffDate) - .ToList(); - - foreach (var file in logFiles) - { - try - { - File.Delete(file); - Console.WriteLine($"๐Ÿงน Deleted old network log: {Path.GetFileName(file)}"); - } - catch - { - // Ignore deletion failures - } - } - } - catch - { - // Ignore cleanup failures - } - } -} \ No newline at end of file diff --git a/ReleasePackagesTesting.Published/ReleasePackagesTesting.Published.IntegrationTests/ReleasePackagesTesting.Published.IntegrationTests.csproj b/ReleasePackagesTesting.Published/ReleasePackagesTesting.Published.IntegrationTests/ReleasePackagesTesting.Published.IntegrationTests.csproj deleted file mode 100644 index 2b89e38c..00000000 --- a/ReleasePackagesTesting.Published/ReleasePackagesTesting.Published.IntegrationTests/ReleasePackagesTesting.Published.IntegrationTests.csproj +++ /dev/null @@ -1,73 +0,0 @@ - - - - net9.0 - enable - enable - true - true - false - false - - - - - - - - - - - - - - - - - - - - - - - 3.9.11 - $([System.IO.Path]::GetFullPath('$(MSBuildProjectDirectory)\..\..\tools')) - $(ToolsDir)\apache-maven-$(MavenVersion) - $(MavenInstallDir)\bin - $(JAVA_HOME) - - - - - - <_NativeFlinkJobDir>..\..\LocalTesting\NativeFlinkJob - <_NativeFlinkJobJar>$(_NativeFlinkJobDir)\target\native-flink-kafka-job-1.0.0.jar - - - - - - - true - true - true - mvn.cmd - mvn - - - - - - - - - - - diff --git a/ReleasePackagesTesting.Published/ReleasePackagesTesting.Published.IntegrationTests/TemporalIntegrationTests.cs b/ReleasePackagesTesting.Published/ReleasePackagesTesting.Published.IntegrationTests/TemporalIntegrationTests.cs deleted file mode 100644 index aa4f7db6..00000000 --- a/ReleasePackagesTesting.Published/ReleasePackagesTesting.Published.IntegrationTests/TemporalIntegrationTests.cs +++ /dev/null @@ -1,389 +0,0 @@ -using System.Diagnostics; -using LocalTesting.FlinkSqlAppHost; -using NUnit.Framework; -using Temporalio.Activities; -using Temporalio.Client; -using Temporalio.Worker; -using Temporalio.Workflows; -using Temporalio.Exceptions; - -namespace LocalTesting.IntegrationTests; - -/// -/// Temporal integration test demonstrating BizTalk-style orchestration patterns. -/// This test validates complex workflow scenarios that Flink cannot handle: -/// - Long-running processes with state persistence -/// - Human interaction points (signals/queries) -/// - Complex compensation logic -/// - Multi-step business processes with branching -/// Tests bring total integration test count to 10 (7 Gateway + 1 Native + 2 Temporal). -/// -[TestFixture] -[Parallelizable(ParallelScope.All)] -[Category("temporal-orchestration")] -public class TemporalIntegrationTests : LocalTestingTestBase -{ - private static readonly TimeSpan TestTimeout = TimeSpan.FromMinutes(2); - - [Test] - public async Task Temporal_BizTalkStyleOrchestration_ComplexOrderProcessing() - { - TestPrerequisites.EnsureDockerAvailable(); - - var baseToken = TestContext.CurrentContext.CancellationToken; - using var testTimeout = new CancellationTokenSource(TestTimeout); - using var linkedCts = CancellationTokenSource.CreateLinkedTokenSource(baseToken, testTimeout.Token); - var ct = linkedCts.Token; - - TestContext.WriteLine("โ•”โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•—"); - TestContext.WriteLine("โ•‘ ๐Ÿš€ Temporal + Kafka + FlinkDotNet Integration Test โ•‘"); - TestContext.WriteLine("โ•šโ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•"); - TestContext.WriteLine(""); - TestContext.WriteLine("๐Ÿ“‹ Test Scenario: BizTalk-Style Order Processing with Full Stack Integration"); - TestContext.WriteLine(" 1. Temporal Workflow orchestrates multi-step business process"); - TestContext.WriteLine(" 2. Kafka provides message transport for order events"); - TestContext.WriteLine(" 3. FlinkDotNet processes real-time order analytics"); - TestContext.WriteLine(""); - TestContext.WriteLine("โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”"); - TestContext.WriteLine("โ”‚ Infrastructure Validation โ”‚"); - TestContext.WriteLine("โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜"); - TestContext.WriteLine($"โœ… Kafka Endpoint: {KafkaConnectionString}"); - TestContext.WriteLine($"โœ… Temporal Endpoint: {TemporalEndpoint}"); - TestContext.WriteLine($"โœ… Infrastructure: All services ready from global setup"); - TestContext.WriteLine(""); - - var stopwatch = Stopwatch.StartNew(); - - try - { - // CRITICAL: Verify Temporal endpoint is available from global infrastructure - if (string.IsNullOrEmpty(TemporalEndpoint)) - { - throw new InvalidOperationException( - "Temporal endpoint not available. Ensure GlobalTestInfrastructure completed successfully."); - } - - // The GlobalTestInfrastructure already started Temporal and discovered the dynamic endpoint - TestContext.WriteLine($"๐Ÿ” Using discovered Temporal endpoint: {TemporalEndpoint}"); - TestContext.WriteLine($"โœ… Temporal infrastructure verified and ready"); - - // Connect to Temporal using discovered endpoint (not hardcoded port) - TestContext.WriteLine($"๐Ÿ“ก Connecting to Temporal at {TemporalEndpoint}"); - var client = await TemporalClient.ConnectAsync(new TemporalClientConnectOptions - { - TargetHost = TemporalEndpoint, - Namespace = "default", - }); - - var taskQueue = $"order-processing-{TestContext.CurrentContext.Test.ID}"; - TestContext.WriteLine($"๐Ÿ”ง Creating worker on task queue: {taskQueue}"); - - using var worker = new TemporalWorker( - client, - new TemporalWorkerOptions(taskQueue) - .AddWorkflow() - .AddAllActivities(new OrderActivities())); - - await worker.ExecuteAsync(async () => - { - TestContext.WriteLine("โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”"); - TestContext.WriteLine("โ”‚ Step 1: Initialize Kafka Topics for Order Events โ”‚"); - TestContext.WriteLine("โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜"); - - var orderInputTopic = $"order-input-{TestContext.CurrentContext.Test.ID}"; - var orderEventsTopic = $"order-events-{TestContext.CurrentContext.Test.ID}"; - - TestContext.WriteLine($"๐Ÿ“จ Creating Kafka topics:"); - TestContext.WriteLine($" Input Topic: {orderInputTopic}"); - TestContext.WriteLine($" Events Topic: {orderEventsTopic}"); - TestContext.WriteLine(""); - - // Start complex order processing workflow - var orderId = $"ORDER-{Guid.NewGuid().ToString()[..8]}"; - var workflowId = $"order-workflow-{orderId}"; - - TestContext.WriteLine("โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”"); - TestContext.WriteLine("โ”‚ Step 2: Start Temporal Workflow for Order Orchestration โ”‚"); - TestContext.WriteLine("โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜"); - TestContext.WriteLine($"๐Ÿ“ฆ Order ID: {orderId}"); - TestContext.WriteLine($"๐Ÿ”ง Workflow ID: {workflowId}"); - TestContext.WriteLine($"๐Ÿ“‹ Task Queue: {taskQueue}"); - TestContext.WriteLine(""); - - var orderRequest = new OrderRequest - { - OrderId = orderId, - CustomerId = "CUST-001", - Amount = 1500.00m, - Items = new[] { "Product A", "Product B" }, - RequiresApproval = true // High-value order needs approval - }; - - var handle = await client.StartWorkflowAsync( - (OrderProcessingOrchestration wf) => wf.ProcessOrderAsync(orderRequest), - new WorkflowOptions(id: workflowId, taskQueue: taskQueue) - { - TaskTimeout = TimeSpan.FromSeconds(10), - }); - - TestContext.WriteLine("โœ… Workflow started successfully"); - TestContext.WriteLine(""); - - TestContext.WriteLine("โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”"); - TestContext.WriteLine("โ”‚ Step 3: Workflow Executes - Order Validation Activity โ”‚"); - TestContext.WriteLine("โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜"); - TestContext.WriteLine("๐Ÿ”„ Temporal executes ValidateOrderAsync activity"); - TestContext.WriteLine(" - Validates order amount > 0"); - TestContext.WriteLine(" - Validates items array not empty"); - TestContext.WriteLine(""); - - // Simulate human approval (signal) after brief delay - await Task.Delay(1000, ct); - - TestContext.WriteLine("โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”"); - TestContext.WriteLine("โ”‚ Step 4: Human Interaction - Manager Approval Signal โ”‚"); - TestContext.WriteLine("โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜"); - TestContext.WriteLine("๐Ÿ‘ค Simulating manager approval signal (MANAGER-001)"); - TestContext.WriteLine(" ๐Ÿ“ก Sending signal to workflow..."); - await handle.SignalAsync(wf => wf.ApproveOrder("MANAGER-001")); - TestContext.WriteLine(" โœ… Approval signal received by workflow"); - TestContext.WriteLine(""); - - TestContext.WriteLine("โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”"); - TestContext.WriteLine("โ”‚ Step 5: Workflow Continues - Payment & Inventory Activities โ”‚"); - TestContext.WriteLine("โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜"); - TestContext.WriteLine("๐Ÿ’ณ Temporal executes ProcessPaymentAsync activity (with retry policy)"); - TestContext.WriteLine("๐Ÿ“ฆ Temporal executes ReserveInventoryAsync activities in parallel"); - TestContext.WriteLine("๐Ÿšš Temporal executes CreateShipmentAsync activity"); - TestContext.WriteLine(""); - - TestContext.WriteLine("โณ Waiting for workflow completion..."); - var result = await handle.GetResultAsync(); - - TestContext.WriteLine(""); - TestContext.WriteLine("โ•”โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•—"); - TestContext.WriteLine("โ•‘ ๐Ÿ“Š Workflow Execution Result โ•‘"); - TestContext.WriteLine("โ•šโ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•"); - TestContext.WriteLine($"โœ… Status: {result.Status}"); - TestContext.WriteLine($"๐Ÿ“ฆ Order ID: {result.OrderId}"); - TestContext.WriteLine($"๐Ÿšš Shipment ID: {result.ShipmentId}"); - TestContext.WriteLine($"๐Ÿ“‹ Total Steps: {result.Steps.Count}"); - TestContext.WriteLine(""); - TestContext.WriteLine("Execution Steps:"); - foreach (var step in result.Steps) - { - TestContext.WriteLine($" โœ“ {step}"); - } - TestContext.WriteLine(""); - - TestContext.WriteLine("โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”"); - TestContext.WriteLine("โ”‚ Integration Architecture Demonstrated โ”‚"); - TestContext.WriteLine("โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜"); - TestContext.WriteLine("๐Ÿ”„ Temporal Workflow: Orchestrated multi-step business process"); - TestContext.WriteLine(" - Long-running state management (order approval wait)"); - TestContext.WriteLine(" - Human interaction via signals (manager approval)"); - TestContext.WriteLine(" - Automatic retry policies (payment processing)"); - TestContext.WriteLine(" - Parallel activity execution (inventory reservation)"); - TestContext.WriteLine(""); - TestContext.WriteLine("๐Ÿ“จ Kafka Integration: Message transport layer ready"); - TestContext.WriteLine($" - Kafka Endpoint: {KafkaConnectionString}"); - TestContext.WriteLine($" - Input Topic: {orderInputTopic} (configured for order intake)"); - TestContext.WriteLine($" - Events Topic: {orderEventsTopic} (configured for event publishing)"); - TestContext.WriteLine(" - Flink jobs can consume these topics for real-time analytics"); - TestContext.WriteLine(""); - TestContext.WriteLine("โšก FlinkDotNet + Flink: Available for stream processing"); - TestContext.WriteLine(" - Flink JobManager: Running with TaskManagers"); - TestContext.WriteLine(" - FlinkDotNet Gateway: Ready for job submission"); - TestContext.WriteLine(" - Can process order events in real-time"); - TestContext.WriteLine(" - Would aggregate: orders/sec, revenue, avg amount, etc."); - TestContext.WriteLine(""); - - // Verify orchestration completed all steps - Assert.That(result.Status, Is.EqualTo("Completed"), "Order should be completed"); - Assert.That(result.Steps.Count, Is.GreaterThanOrEqualTo(5), "Should have multiple orchestration steps"); - Assert.That(result.Steps, Does.Contain("Order validated"), "Should validate order"); - Assert.That(result.Steps.Any(s => s.StartsWith("Approval received")), Is.True, "Should receive approval"); - Assert.That(result.Steps, Does.Contain("Payment processed"), "Should process payment"); - Assert.That(result.Steps, Does.Contain("Inventory reserved"), "Should reserve inventory"); - Assert.That(result.Steps.Any(s => s.StartsWith("Shipment created")), Is.True, "Should create shipment"); - - stopwatch.Stop(); - TestContext.WriteLine("โ•”โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•—"); - TestContext.WriteLine($"โ•‘ โœ… Integration Test PASSED - Completed in {stopwatch.Elapsed.TotalSeconds:F1}s โ•‘"); - TestContext.WriteLine("โ•šโ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•"); - }, ct); - } - catch (Exception ex) - { - stopwatch.Stop(); - TestContext.WriteLine($"โŒ Orchestration failed after {stopwatch.Elapsed.TotalSeconds:F1}s: {ex.Message}"); - throw; - } - } -} - -#region Workflow and Activity Definitions (BizTalk-Style Orchestration) - -/// -/// Complex order processing orchestration - demonstrates BizTalk-style workflow. -/// This pattern cannot be implemented in Flink because it requires: -/// - Long-running state (hours/days) -/// - Human interaction (approval signals) -/// - Complex branching and compensation logic -/// - Durable execution with automatic retries -/// -[Workflow] -public class OrderProcessingOrchestration -{ - private bool approved = false; - private string? approver; - private readonly List steps = new(); - - [WorkflowRun] - public async Task ProcessOrderAsync(OrderRequest request) - { - steps.Add("Workflow started"); - - // Step 1: Validate order (synchronous activity) - var isValid = await Workflow.ExecuteActivityAsync( - (OrderActivities act) => act.ValidateOrderAsync(request), - new ActivityOptions { StartToCloseTimeout = TimeSpan.FromSeconds(10) }); - - if (!isValid) - { - steps.Add("Order validation failed"); - return new OrderResult { OrderId = request.OrderId, Status = "Rejected", Steps = steps }; - } - steps.Add("Order validated"); - - // Step 2: Wait for approval if required (human interaction - cannot do in Flink!) - if (request.RequiresApproval) - { - steps.Add("Waiting for approval"); - await Workflow.WaitConditionAsync(() => approved, TimeSpan.FromSeconds(30)); - steps.Add($"Approval received from {approver}"); - } - - // Step 3: Process payment (with retry logic) - var paymentSuccess = await Workflow.ExecuteActivityAsync( - (OrderActivities act) => act.ProcessPaymentAsync(request.OrderId, request.Amount), - new ActivityOptions - { - StartToCloseTimeout = TimeSpan.FromSeconds(10), - RetryPolicy = new() { MaximumAttempts = 3 } // Automatic retries - }); - - if (!paymentSuccess) - { - steps.Add("Payment failed - order cancelled"); - return new OrderResult { OrderId = request.OrderId, Status = "Cancelled", Steps = steps }; - } - steps.Add("Payment processed"); - - // Step 4: Reserve inventory (parallel activities - Flink can do but not with state management) - steps.Add("Reserving inventory"); - var inventoryTasks = request.Items.Select(item => - Workflow.ExecuteActivityAsync( - (OrderActivities act) => act.ReserveInventoryAsync(item), - new ActivityOptions { StartToCloseTimeout = TimeSpan.FromSeconds(10) })).ToList(); - - await Task.WhenAll(inventoryTasks); - steps.Add("Inventory reserved"); - - // Step 5: Create shipment - var shipmentId = await Workflow.ExecuteActivityAsync( - (OrderActivities act) => act.CreateShipmentAsync(request.OrderId), - new ActivityOptions { StartToCloseTimeout = TimeSpan.FromSeconds(10) }); - - steps.Add($"Shipment created: {shipmentId}"); - steps.Add("Order processing complete"); - - return new OrderResult - { - OrderId = request.OrderId, - Status = "Completed", - ShipmentId = shipmentId, - Steps = steps - }; - } - - [WorkflowSignal] - public async Task ApproveOrder(string approverName) - { - approver = approverName; - approved = true; - await Task.CompletedTask; - } - - [WorkflowQuery] - public List GetCurrentSteps() => steps; -} - -/// -/// Activities represent individual business operations in the orchestration. -/// Each activity can be retried independently if it fails. -/// MUST be instance methods for Temporal activity registration. -/// -#pragma warning disable S2325 // Methods should not be static - Required for Temporal activity pattern -public sealed class OrderActivities -{ - [Activity] - public Task ValidateOrderAsync(OrderRequest request) - { - // Simulate validation logic - var isValid = request.Amount > 0 && request.Items.Length > 0; - return Task.FromResult(isValid); - } - - [Activity] - public Task ProcessPaymentAsync(string orderId, decimal amount) - { - // Simulate payment processing - orderId used for simulation context - _ = orderId; // Acknowledge parameter usage - _ = amount; - return Task.FromResult(true); - } - - [Activity] - public Task ReserveInventoryAsync(string item) - { - // Simulate inventory reservation - item used for simulation context - _ = item; // Acknowledge parameter usage - return Task.FromResult(true); - } - - [Activity] - public Task CreateShipmentAsync(string orderId) - { - // Simulate shipment creation - orderId used for tracking context - var shipmentId = $"SHIP-{Guid.NewGuid().ToString()[..8]}"; - _ = orderId; // Acknowledge parameter usage - return Task.FromResult(shipmentId); - } -} -#pragma warning restore S2325 - -/// -/// Request model for order processing. -/// -public record OrderRequest -{ - public required string OrderId { get; init; } - public required string CustomerId { get; init; } - public required decimal Amount { get; init; } - public required string[] Items { get; init; } - public bool RequiresApproval { get; init; } -} - -/// -/// Result model for order processing. -/// -public record OrderResult -{ - public required string OrderId { get; init; } - public required string Status { get; init; } - public string? ShipmentId { get; init; } - public required List Steps { get; init; } -} - -#endregion \ No newline at end of file diff --git a/ReleasePackagesTesting.Published/ReleasePackagesTesting.Published.IntegrationTests/TestPrerequisites.cs b/ReleasePackagesTesting.Published/ReleasePackagesTesting.Published.IntegrationTests/TestPrerequisites.cs deleted file mode 100644 index 35be5362..00000000 --- a/ReleasePackagesTesting.Published/ReleasePackagesTesting.Published.IntegrationTests/TestPrerequisites.cs +++ /dev/null @@ -1,183 +0,0 @@ -using System.Diagnostics; -using NUnit.Framework; - -namespace LocalTesting.IntegrationTests; - -internal static class TestPrerequisites -{ - private static bool? _containerRuntimeAvailable; - - internal static void EnsureDockerAvailable() - { - _containerRuntimeAvailable ??= ProbeContainerRuntime(); - - if (_containerRuntimeAvailable != true) - { - Assert.That(_containerRuntimeAvailable, Is.True, - "Container runtime (Docker or Podman) is not available or not responsive. " + - "Ensure Docker Desktop or Podman is running before executing LocalTesting integration tests."); - } - } - - internal static bool ProbeFlinkGatewayBuildable() - { - // IMPORTANT: Do NOT use cached value - always re-check to detect newly built JARs - // The previous caching caused tests to fail even after JARs were built - - var repoRoot = Path.GetFullPath(Path.Combine(AppContext.BaseDirectory, "../../../../..")); - var gatewayProj = Path.Combine(repoRoot, "FlinkDotNet", "FlinkDotNet.JobGateway", "FlinkDotNet.JobGateway.csproj"); - - if (!ValidateGatewayProjectExists(gatewayProj)) - { - return false; - } - - try - { - var runnerJarExists = CheckRunnerJarExists(repoRoot); - return runnerJarExists; - } - catch (Exception ex) - { - TestContext.WriteLine($"FlinkDotNet.JobGateway build probe threw {ex.GetType().Name}: {ex.Message}"); - return false; - } - } - - private static bool ValidateGatewayProjectExists(string gatewayProj) - { - if (File.Exists(gatewayProj)) - { - return true; - } - - TestContext.WriteLine($"FlinkDotNet.JobGateway project not found at {gatewayProj}"); - return false; - } - - private static bool CheckRunnerJarExists(string repoRoot) - { - var candidateNames = new[] { "flink-ir-runner-java17.jar" }; - var candidateDirs = new[] - { - // Check Gateway build output directories first (where MSBuild copies JARs) - Path.Combine(repoRoot, "FlinkDotNet", "FlinkDotNet.JobGateway", "bin", "Release", "net9.0"), - Path.Combine(repoRoot, "FlinkDotNet", "FlinkDotNet.JobGateway", "bin", "Debug", "net9.0"), - // Then check Maven build locations - Path.Combine(repoRoot, "FlinkIRRunner", "target"), - Path.Combine(repoRoot, "FlinkDotNet", "FlinkDotNet.JobGateway", "FlinkIRRunner", "target") - }; - - foreach (var dir in candidateDirs) - { - foreach (var name in candidateNames) - { - var full = Path.Combine(dir, name); - if (File.Exists(full)) - { - return true; - } - } - } - - return false; - } - - private static bool ProbeContainerRuntime() - { - // Try Docker first - if (ProbeRuntime("docker")) - { - return true; - } - - // Try Podman as fallback - if (ProbeRuntime("podman")) - { - return true; - } - - return false; - } - - private static bool ProbeRuntime(string runtimeCommand) - { - try - { - var psi = new ProcessStartInfo - { - FileName = runtimeCommand, - RedirectStandardOutput = true, - RedirectStandardError = true, - UseShellExecute = false, - CreateNoWindow = true - }; - - // Use 'version' command which works consistently for both Docker and Podman - psi.ArgumentList.Add("version"); - psi.ArgumentList.Add("--format"); - - // Docker uses {{.Server.Version}}, Podman uses {{.Version}} - // Use the simpler format that works for both - if (runtimeCommand.Equals("docker", StringComparison.OrdinalIgnoreCase)) - { - psi.ArgumentList.Add("{{.Server.Version}}"); - } - else // podman - { - psi.ArgumentList.Add("{{.Version}}"); - } - - using var process = Process.Start(psi); - if (process == null) - { - return false; - } - - if (!process.WaitForExit(1000)) - { - try - { - process.Kill(entireProcessTree: true); - } - catch (InvalidOperationException) - { - // Process already exited - } - return false; - } - - if (process.ExitCode != 0) - { - var error = process.StandardError.ReadToEnd(); - TestContext.WriteLine($"{runtimeCommand} probe failed with exit code {process.ExitCode}: {error}"); - return false; - } - - var output = process.StandardOutput.ReadToEnd().Trim(); - if (string.IsNullOrEmpty(output) || string.Equals(output, "null", StringComparison.OrdinalIgnoreCase)) - { - TestContext.WriteLine($"{runtimeCommand} probe returned an unexpected payload."); - return false; - } - - return true; - } - catch (Exception ex) - { - TestContext.WriteLine($"{runtimeCommand} probe threw {ex.GetType().Name}: {ex.Message}"); - return false; - } - } -} - - - - - - - - - - - diff --git a/ReleasePackagesTesting.Published/ReleasePackagesTesting.Published.sln b/ReleasePackagesTesting.Published/ReleasePackagesTesting.Published.sln deleted file mode 100644 index f7046612..00000000 --- a/ReleasePackagesTesting.Published/ReleasePackagesTesting.Published.sln +++ /dev/null @@ -1,27 +0,0 @@ -Microsoft Visual Studio Solution File, Format Version 12.00 -# Visual Studio Version 17 -VisualStudioVersion = 17.0.31903.59 -MinimumVisualStudioVersion = 10.0.40219.1 -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ReleasePackagesTesting.Published.FlinkSqlAppHost", "ReleasePackagesTesting.Published.FlinkSqlAppHost\ReleasePackagesTesting.Published.FlinkSqlAppHost.csproj", "{C1D2E3F4-A5B6-7890-CDEF-123456789ABC}" -EndProject -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ReleasePackagesTesting.Published.IntegrationTests", "ReleasePackagesTesting.Published.IntegrationTests\ReleasePackagesTesting.Published.IntegrationTests.csproj", "{D2E3F4A5-B6C7-8901-DEFF-23456789ABCD}" -EndProject -Global -GlobalSection(SolutionConfigurationPlatforms) = preSolution -Debug|Any CPU = Debug|Any CPU -Release|Any CPU = Release|Any CPU -EndGlobalSection -GlobalSection(ProjectConfigurationPlatforms) = postSolution -{C1D2E3F4-A5B6-7890-CDEF-123456789ABC}.Debug|Any CPU.ActiveCfg = Debug|Any CPU -{C1D2E3F4-A5B6-7890-CDEF-123456789ABC}.Debug|Any CPU.Build.0 = Debug|Any CPU -{C1D2E3F4-A5B6-7890-CDEF-123456789ABC}.Release|Any CPU.ActiveCfg = Release|Any CPU -{C1D2E3F4-A5B6-7890-CDEF-123456789ABC}.Release|Any CPU.Build.0 = Release|Any CPU -{D2E3F4A5-B6C7-8901-DEFF-23456789ABCD}.Debug|Any CPU.ActiveCfg = Debug|Any CPU -{D2E3F4A5-B6C7-8901-DEFF-23456789ABCD}.Debug|Any CPU.Build.0 = Debug|Any CPU -{D2E3F4A5-B6C7-8901-DEFF-23456789ABCD}.Release|Any CPU.ActiveCfg = Release|Any CPU -{D2E3F4A5-B6C7-8901-DEFF-23456789ABCD}.Release|Any CPU.Build.0 = Release|Any CPU -EndGlobalSection -GlobalSection(SolutionProperties) = preSolution -HideSolutionNode = FALSE -EndGlobalSection -EndGlobal diff --git a/ReleasePackagesTesting.Published/appsettings.LearningCourse.json b/ReleasePackagesTesting.Published/appsettings.LearningCourse.json deleted file mode 100644 index bf34f52a..00000000 --- a/ReleasePackagesTesting.Published/appsettings.LearningCourse.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "Metrics": { - "Prometheus": { - "Enabled": true, - "Port": 8080, - "Path": "/metrics" - } - } -} \ No newline at end of file diff --git a/ReleasePackagesTesting.Published/connectors/flink/lib/README.md b/ReleasePackagesTesting.Published/connectors/flink/lib/README.md deleted file mode 100644 index effc7056..00000000 --- a/ReleasePackagesTesting.Published/connectors/flink/lib/README.md +++ /dev/null @@ -1,40 +0,0 @@ -# LocalTesting Flink Connector Library - -This directory is mounted into Flink containers at `/opt/flink/usrlib/` and loaded via `FLINK_CLASSPATH`. - -## Important: Version Compatibility - -**The connector JARs in this directory MUST match your Flink cluster version precisely!** - -Current Flink cluster version: **2.1.0-java17** (see `LocalTesting.FlinkSqlAppHost/Program.cs`) - -## Required Connectors for SQL Jobs - -For SQL-based jobs (Pattern 5 & 6), you need compatible Flink 2.x connectors: - -- `flink-sql-connector-kafka` (version 4.x.x-2.0 or compatible with Flink 2.x) -- `flink-sql-json` (version 2.1.0) -- `flink-table-planner_2.12` (version 2.1.0) -- `flink-table-runtime` (version 2.1.0) - -## Currently Installed Connectors - -- `flink-sql-connector-kafka-4.0.1-2.0.jar` - **Compatible with Flink 2.0/2.1** - -This is the latest official Flink SQL Kafka connector from Maven Central compatible with Flink 2.x series. - -## DataStream API Jobs - -DataStream API jobs (Patterns 1-4, 7) do NOT require these connectors - they use the `kafka-clients` library which is bundled in the FlinkIRRunner JAR. - -## Installation - -Download compatible connector JARs and place them in this directory. The LocalTesting Aspire host will automatically mount them into the Flink containers. - -If targeting a production cluster, copy these JARs to `/opt/flink/lib` (or your distribution's equivalent). - -## Version Notes - -- **Flink 2.0 connectors** (4.0.x-2.0) are compatible with Flink 2.1.0 -- Connector version follows pattern: `-` -- Always use connectors matching your Flink major version (2.x for Flink 2.1.0) diff --git a/ReleasePackagesTesting.Published/flink-conf-learningcourse.yaml b/ReleasePackagesTesting.Published/flink-conf-learningcourse.yaml deleted file mode 100644 index 09ee6ddf..00000000 --- a/ReleasePackagesTesting.Published/flink-conf-learningcourse.yaml +++ /dev/null @@ -1,66 +0,0 @@ -# Flink Configuration for LEARNINGCOURSE Mode with Prometheus Metrics -# This configuration enables Prometheus metrics export for observability - -# JobManager Configuration -jobmanager.rpc.address: flink-jobmanager -jobmanager.rpc.port: 6123 -jobmanager.memory.process.size: 1600m - -# TaskManager Configuration -taskmanager.memory.process.size: 1728m -taskmanager.memory.jvm-metaspace.size: 512m -taskmanager.numberOfTaskSlots: 10 - -# High Availability -high-availability.type: none - -# Checkpointing -state.backend: hashmap -state.checkpoints.dir: file:///tmp/flink-checkpoints -state.savepoints.dir: file:///tmp/flink-savepoints - -# Metrics Reporters - PROMETHEUS CONFIGURATION -# Port will be overridden per component via FLINK_PROPERTIES environment variable -# JobManager: 9250, TaskManager: 9251, SQL Gateway: 9252 -metrics.reporters: prom -metrics.reporter.prom.factory.class: org.apache.flink.metrics.prometheus.PrometheusReporterFactory -metrics.reporter.prom.port: 9250-9252 -metrics.reporter.prom.filterLabelValueCharacters: false - -# Rest API -rest.port: 8081 -rest.address: 0.0.0.0 -rest.bind-address: 0.0.0.0 - -# SQL Gateway Configuration (required for Flink 2.1.0) -sql-gateway.endpoint.rest.address: flink-sql-gateway -sql-gateway.endpoint.rest.bind-address: 0.0.0.0 -sql-gateway.endpoint.rest.port: 8083 -sql-gateway.endpoint.rest.bind-port: 8083 -sql-gateway.endpoint.type: rest -sql-gateway.session.check-interval: 60000 -sql-gateway.session.idle-timeout: 600000 -sql-gateway.worker.threads.max: 10 - -# Parallelism -parallelism.default: 1 - -# Heartbeat -heartbeat.interval: 5000 -heartbeat.timeout: 30000 - -# Pekko Configuration -pekko.ask.timeout: 30s - -# Classloader Configuration -classloader.resolve-order: parent-first -classloader.parent-first-patterns.default: org.apache.flink.;org.apache.kafka.;com.fasterxml.jackson. - -# Java Options -env.java.opts.all: --add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.net=ALL-UNNAMED --add-opens=java.base/java.io=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/sun.nio.ch=ALL-UNNAMED --add-opens=java.base/java.lang.reflect=ALL-UNNAMED --add-opens=java.base/java.text=ALL-UNNAMED --add-opens=java.base/java.time=ALL-UNNAMED --add-opens=java.base/java.util=ALL-UNNAMED --add-opens=java.base/java.util.concurrent=ALL-UNNAMED --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED --add-opens=java.base/java.util.concurrent.locks=ALL-UNNAMED - -# Monitoring Scope Templates -metrics.scope.jm: .jobmanager -metrics.scope.jm.job: .jobmanager. -metrics.scope.tm: .taskmanager. -metrics.scope.task: .taskmanager.... \ No newline at end of file diff --git a/ReleasePackagesTesting.Published/grafana-kafka-dashboard.json b/ReleasePackagesTesting.Published/grafana-kafka-dashboard.json deleted file mode 100644 index 9babf9e5..00000000 --- a/ReleasePackagesTesting.Published/grafana-kafka-dashboard.json +++ /dev/null @@ -1,167 +0,0 @@ -{ - "dashboard": { - "title": "Kafka Metrics Dashboard", - "tags": ["kafka", "metrics"], - "timezone": "browser", - "schemaVersion": 16, - "version": 0, - "refresh": "5s", - "panels": [ - { - "id": 1, - "title": "Kafka Messages In Per Second (All Topics)", - "type": "graph", - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 0 - }, - "targets": [ - { - "expr": "kafka_server_brokertopicmetrics_messagesinpersec_count_total", - "legendFormat": "{{topic}}", - "refId": "A" - } - ], - "yaxes": [ - { - "format": "short", - "label": "Messages" - }, - { - "format": "short" - } - ], - "xaxis": { - "mode": "time" - } - }, - { - "id": 2, - "title": "Kafka Bytes In Per Second (All Topics)", - "type": "graph", - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 0 - }, - "targets": [ - { - "expr": "kafka_server_brokertopicmetrics_bytesinpersec_count_total", - "legendFormat": "{{topic}}", - "refId": "A" - } - ], - "yaxes": [ - { - "format": "bytes", - "label": "Bytes" - }, - { - "format": "short" - } - ], - "xaxis": { - "mode": "time" - } - }, - { - "id": 3, - "title": "Kafka Messages Out Per Second (All Topics)", - "type": "graph", - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 8 - }, - "targets": [ - { - "expr": "kafka_server_brokertopicmetrics_messagesoutpersec_count_total", - "legendFormat": "{{topic}}", - "refId": "A" - } - ], - "yaxes": [ - { - "format": "short", - "label": "Messages" - }, - { - "format": "short" - } - ], - "xaxis": { - "mode": "time" - } - }, - { - "id": 4, - "title": "Kafka Bytes Out Per Second (All Topics)", - "type": "graph", - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 8 - }, - "targets": [ - { - "expr": "kafka_server_brokertopicmetrics_bytesoutpersec_count_total", - "legendFormat": "{{topic}}", - "refId": "A" - } - ], - "yaxes": [ - { - "format": "bytes", - "label": "Bytes" - }, - { - "format": "short" - } - ], - "xaxis": { - "mode": "time" - } - }, - { - "id": 5, - "title": "Kafka Message Rate (Messages/sec)", - "type": "graph", - "gridPos": { - "h": 8, - "w": 24, - "x": 0, - "y": 16 - }, - "targets": [ - { - "expr": "rate(kafka_server_brokertopicmetrics_messagesinpersec_count_total[1m])", - "legendFormat": "In: {{topic}}", - "refId": "A" - }, - { - "expr": "rate(kafka_server_brokertopicmetrics_messagesoutpersec_count_total[1m])", - "legendFormat": "Out: {{topic}}", - "refId": "B" - } - ], - "yaxes": [ - { - "format": "short", - "label": "Messages/sec" - }, - { - "format": "short" - } - ], - "xaxis": { - "mode": "time" - } - } - ] - } -} \ No newline at end of file diff --git a/ReleasePackagesTesting.Published/grafana-provisioning-dashboards.yaml b/ReleasePackagesTesting.Published/grafana-provisioning-dashboards.yaml deleted file mode 100644 index 9b5aa860..00000000 --- a/ReleasePackagesTesting.Published/grafana-provisioning-dashboards.yaml +++ /dev/null @@ -1,12 +0,0 @@ -apiVersion: 1 - -providers: - - name: 'Kafka Metrics' - orgId: 1 - folder: '' - type: file - disableDeletion: false - updateIntervalSeconds: 10 - allowUiUpdates: true - options: - path: /etc/grafana/provisioning/dashboards \ No newline at end of file diff --git a/ReleasePackagesTesting.Published/jmx-exporter-kafka-config.yml b/ReleasePackagesTesting.Published/jmx-exporter-kafka-config.yml deleted file mode 100644 index 8ffab730..00000000 --- a/ReleasePackagesTesting.Published/jmx-exporter-kafka-config.yml +++ /dev/null @@ -1,107 +0,0 @@ -# Kafka JMX Exporter Configuration for Prometheus -# This file configures which Kafka JMX metrics to export to Prometheus -# Used by bitnami/jmx-exporter container to scrape kafka:9101 JMX endpoint - -# JMX connection configuration -# Format: hostPort: "hostname:port" for JMX RMI connection -hostPort: kafka:9101 - -# Lower case output names for consistency -lowercaseOutputName: true -lowercaseOutputLabelNames: true - -# Whitelist patterns for Kafka metrics to export -whitelistObjectNames: - - "kafka.server:*" - - "kafka.controller:*" - - "kafka.network:*" - - "kafka.log:*" - - "java.lang:*" - -# Rules to transform JMX bean names to Prometheus metrics -rules: - # Kafka Server BrokerTopicMetrics with topic label (MOST SPECIFIC - must be first) - # Note: lowercaseOutputName setting will lowercase the entire metric name automatically - - pattern: kafka.server<>Count - name: kafka_server_brokertopicmetrics_$1_count_total - type: COUNTER - labels: - topic: "$2" - - - pattern: kafka.server<>(.+) - name: kafka_server_brokertopicmetrics_$1_$3 - type: GAUGE - labels: - topic: "$2" - - # Kafka Server BrokerTopicMetrics without topic (aggregate metrics) - - pattern: kafka.server<>Count - name: kafka_server_brokertopicmetrics_$1_count_total - type: COUNTER - - - pattern: kafka.server<>(.+) - name: kafka_server_brokertopicmetrics_$1_$2 - type: GAUGE - - # Kafka Server metrics (general patterns) - - pattern: kafka.server<>Value - name: kafka_server_$1_$2 - type: GAUGE - labels: - clientId: "$3" - topic: "$4" - partition: "$5" - - - pattern: kafka.server<>Value - name: kafka_server_$1_$2 - type: GAUGE - labels: - clientId: "$3" - broker: "$4:$5" - - - pattern: kafka.server<>Value - name: kafka_server_$1_$2 - type: GAUGE - - # Kafka Controller metrics - - pattern: kafka.controller<>Value - name: kafka_controller_$1_$2 - type: GAUGE - - # Kafka Network metrics - - pattern: kafka.network<>Value - name: kafka_network_$1_$2 - type: GAUGE - - # Kafka Log metrics - - pattern: kafka.log<>Value - name: kafka_log_$1_$2 - type: GAUGE - labels: - topic: "$3" - partition: "$4" - - - pattern: kafka.log<>Value - name: kafka_log_$1_$2 - type: GAUGE - - # JVM metrics - - pattern: 'java.lang(.+)' - name: java_lang_memory_heap_$1 - type: GAUGE - - - pattern: 'java.lang(.+)' - name: java_lang_memory_nonheap_$1 - type: GAUGE - - - pattern: 'java.lang<>CollectionCount' - name: java_lang_gc_collection_count - type: COUNTER - labels: - gc: "$1" - - - pattern: 'java.lang<>CollectionTime' - name: java_lang_gc_collection_time_ms - type: COUNTER - labels: - gc: "$1" \ No newline at end of file diff --git a/ReleasePackagesTesting.Published/prometheus.yml b/ReleasePackagesTesting.Published/prometheus.yml deleted file mode 100644 index 115ea673..00000000 --- a/ReleasePackagesTesting.Published/prometheus.yml +++ /dev/null @@ -1,76 +0,0 @@ -# Prometheus configuration for FlinkDotNet (Aspire-compatible) -# Scrapes metrics from Flink, Kafka, Gateway, and Prometheus itself -# NOTE: This is a template file - host.docker.internal is replaced at runtime - -global: - scrape_interval: 1s - evaluation_interval: 1s - external_labels: - monitor: 'flinkdotnet-monitor' - environment: 'development' - -scrape_configs: - # Flink JobManager metrics (Prometheus reporter on port 9250) - - job_name: 'flink-jobmanager' - metrics_path: '/metrics' - static_configs: - - targets: ['flink-jobmanager:9250'] - labels: - component: 'flink' - role: 'jobmanager' - - # Flink TaskManager metrics (Prometheus reporter on port 9251) - - job_name: 'flink-taskmanager' - metrics_path: '/metrics' - static_configs: - - targets: ['flink-taskmanager:9251'] - labels: - component: 'flink' - role: 'taskmanager' - - # Kafka JMX metrics via JMX Exporter - # โœ… STATUS: WORKING - Successfully collecting 231+ metric lines from Kafka JMX - # Metrics include: topic info, partition counts, consumer groups, producer stats, broker health - # Container: kafka-exporter exposes JMX metrics from kafka:9101 on port 5556 - # NOTE: Container name pattern is kafka-exporter-{random} from Aspire - # Using dynamic DNS resolution from Aspire's Docker network - # IMPORTANT: scrape_interval and scrape_timeout increased because JMX metrics collection takes time - # JMX exporter needs to query Kafka MBeans and format them as Prometheus metrics - # scrape_timeout must be less than scrape_interval - - job_name: 'kafka' - scrape_interval: 10s - scrape_timeout: 8s - metrics_path: '/metrics' - static_configs: - - targets: ['kafka-exporter:5556'] - labels: - component: 'kafka' - role: 'broker' - - # Flink SQL Gateway metrics (Prometheus reporter on port 9252) - # NOTE: SQL Gateway Prometheus endpoint not responding as of 2025-10-21 - # Port 9252 is exposed but endpoint returns connection refused - # Flink SQL Gateway may not support Prometheus metrics in current configuration - # TODO: Investigate if additional configuration or JARs needed for SQL Gateway metrics - # Commenting out to prevent Prometheus from marking target as DOWN - # - job_name: 'flink-sql-gateway' - # metrics_path: '/metrics' - # static_configs: - # - targets: ['flink-sql-gateway:9252'] - # labels: - # component: 'flink' - # role: 'sql-gateway' - - # FlinkDotNet JobGateway metrics - REMOVED - # DECISION: Do not scrape JobGateway metrics in current configuration - # JobGateway runs as host process (.AddProject) not containerized - # Container-to-host networking is complex (requires gateway IP detection) - # Future: When JobGateway is containerized, add Prometheus metrics scraping - - # Prometheus self-monitoring - - job_name: 'prometheus' - static_configs: - - targets: ['localhost:9090'] - labels: - component: 'prometheus' - role: 'server' \ No newline at end of file diff --git a/ReleasePackagesTesting/README.md b/ReleasePackagesTesting/README.md index 2b789cf2..42255b10 100644 --- a/ReleasePackagesTesting/README.md +++ b/ReleasePackagesTesting/README.md @@ -112,9 +112,18 @@ Uses Microsoft Aspire integration testing framework to: โœ… Uses same Aspire testing infrastructure as LocalTesting โœ… Prevents publishing broken releases -## Difference from ReleasePackagesTesting.Published +## Validation Modes -- **ReleasePackagesTesting** (this folder): Tests local artifacts BEFORE publishing (pre-release validation) -- **ReleasePackagesTesting.Published**: Tests published packages AFTER publishing (post-release validation) +This project supports two validation modes controlled by the `RELEASE_VALIDATION_MODE` environment variable: -Both use Microsoft Aspire integration testing framework for comprehensive validation. +- **PreRelease Mode** (default): Tests local artifacts BEFORE publishing (pre-release validation) + - Uses local NuGet packages from `./packages/` + - Uses local Docker image + - Prevents publishing broken releases + +- **PostRelease Mode**: Tests published packages AFTER publishing (post-release validation) + - Downloads packages from NuGet.org + - Pulls Docker images from Docker Hub + - Confirms the release actually works + +Both modes use the same Microsoft Aspire integration testing framework for comprehensive validation. diff --git a/docs/RELEASE_PACKAGE_VALIDATION.md b/docs/RELEASE_PACKAGE_VALIDATION.md index 5ec38814..898ee27a 100644 --- a/docs/RELEASE_PACKAGE_VALIDATION.md +++ b/docs/RELEASE_PACKAGE_VALIDATION.md @@ -41,16 +41,16 @@ Can be triggered manually with custom version: ## Local Testing -You can run the validation tests locally by executing `dotnet test` on the ReleasePackagesTesting projects: +You can run the validation tests locally by executing `dotnet test` on the ReleasePackagesTesting project: ```bash -# Run pre-release validation tests +# Run pre-release validation tests (default mode) cd ReleasePackagesTesting dotnet test --configuration Release -# Run post-release validation tests -cd ../ReleasePackagesTesting.Published -dotnet test --configuration Release +# Run post-release validation tests (use PostRelease mode) +cd ReleasePackagesTesting +RELEASE_VALIDATION_MODE=PostRelease dotnet test --configuration Release ``` ### Prerequisites for Local Testing @@ -156,5 +156,4 @@ Potential improvements: - [Release Workflows](.github/workflows/release-*.yml) - Production release processes - [LocalTesting README](LocalTesting/README.md) - Local development testing -- [ReleasePackagesTesting README](ReleasePackagesTesting/README.md) - Pre-release validation -- [ReleasePackagesTesting.Published README](ReleasePackagesTesting.Published/README.md) - Post-release validation +- [ReleasePackagesTesting README](ReleasePackagesTesting/README.md) - Release validation (pre and post)