diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 1f2883ebe0b..999658c7673 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -130,7 +130,7 @@ jobs: - name: Build package run: uv build NonStructural-States: - name: Full Suite - Non-Structural YAML (States) + name: Full Suite - States (excl NY) & Python runs-on: ubuntu-latest steps: - name: Checkout repo @@ -152,8 +152,14 @@ jobs: run: | echo "Running non-structural YAML tests (states)..." uv run make test-yaml-no-structural-states + - name: Run Python-based tests + env: + PYTHONUNBUFFERED: 1 + run: | + echo "Running Python-based tests..." + uv run make test-other NonStructural-Other: - name: Full Suite - Non-Structural YAML (Other) + name: Full Suite - Baseline (incl NY) & Reform runs-on: ubuntu-latest steps: - name: Checkout repo @@ -175,8 +181,8 @@ jobs: run: | echo "Running non-structural YAML tests (other)..." uv run make test-yaml-no-structural-other - Structural-and-Python: - name: Full Suite - Structural YAML & Python + Structural-Heavy: + name: Full Suite - Structural (States) runs-on: ubuntu-latest steps: - name: Checkout repo @@ -192,20 +198,14 @@ jobs: - name: Turn off default branching shell: bash run: bash ./update_itemization.sh - - name: Run structural YAML tests - env: - PYTHONUNBUFFERED: 1 - run: | - echo "Running structural YAML tests..." - uv run make test-yaml-structural - - name: Run Python-based tests + - name: Run structural YAML tests (states) env: PYTHONUNBUFFERED: 1 run: | - echo "Running Python-based tests..." - uv run make test-other - Structural-Heavy: - name: Full Suite - Structural YAML (Heavy) + echo "Running structural YAML tests (contrib/states)..." + uv run make test-yaml-structural-heavy + Structural: + name: Full Suite - Structural (Other) runs-on: ubuntu-latest steps: - name: Checkout repo @@ -221,9 +221,9 @@ jobs: - name: Turn off default branching shell: bash run: bash ./update_itemization.sh - - name: Run heavy structural YAML tests (states + congress) + - name: Run structural YAML tests (other) env: PYTHONUNBUFFERED: 1 run: | - echo "Running heavy structural YAML tests..." - uv run make test-yaml-structural-heavy + echo "Running structural YAML tests (contrib excl states)..." + uv run make test-yaml-structural diff --git a/.github/workflows/push.yaml b/.github/workflows/push.yaml index 80ed804720c..3a2ccb4aea7 100644 --- a/.github/workflows/push.yaml +++ b/.github/workflows/push.yaml @@ -5,7 +5,7 @@ on: # Concurrency: queues runs to ensure each merge completes its versioning/publishing concurrency: group: ${{ github.workflow }}-${{ github.ref }} - cancel-in-progress: false + cancel-in-progress: true jobs: Lint: runs-on: ubuntu-latest @@ -43,8 +43,6 @@ jobs: run: uv pip install "yaml-changelog>=0.1.7" --system - name: Build changelog run: make changelog - - name: Update lock file - run: uv lock --upgrade - name: Update changelog uses: EndBug/add-and-commit@v9 with: @@ -55,7 +53,7 @@ jobs: github_token: ${{ secrets.POLICYENGINE_GITHUB }} fetch: false NonStructural-States: - name: Non-Structural YAML Tests (States) + name: States (excl NY) & Python runs-on: ubuntu-latest if: | (github.repository == 'PolicyEngine/policyengine-us') @@ -80,8 +78,14 @@ jobs: run: | echo "Running non-structural YAML tests (states)..." uv run make test-yaml-no-structural-states + - name: Run Python-based tests + env: + PYTHONUNBUFFERED: 1 + run: | + echo "Running Python-based tests..." + uv run make test-other NonStructural-Other: - name: Non-Structural YAML Tests (Other) + name: Baseline (incl NY) & Reform runs-on: ubuntu-latest if: | (github.repository == 'PolicyEngine/policyengine-us') @@ -106,8 +110,8 @@ jobs: run: | echo "Running non-structural YAML tests (other)..." uv run make test-yaml-no-structural-other - Structural-and-Python: - name: Structural YAML & Python Tests + Structural-Heavy: + name: Structural (States + Congress) runs-on: ubuntu-latest if: | (github.repository == 'PolicyEngine/policyengine-us') @@ -126,20 +130,14 @@ jobs: - name: Turn off default branching shell: bash run: bash ./update_itemization.sh - - name: Run structural YAML tests - env: - PYTHONUNBUFFERED: 1 - run: | - echo "Running structural YAML tests..." - uv run make test-yaml-structural - - name: Run Python-based tests + - name: Run heavy structural YAML tests (states + congress) env: PYTHONUNBUFFERED: 1 run: | - echo "Running Python-based tests..." - uv run make test-other - Structural-Heavy: - name: Structural YAML Tests (Heavy) + echo "Running heavy structural YAML tests..." + uv run make test-yaml-structural-heavy + Structural: + name: Structural (Other) runs-on: ubuntu-latest if: | (github.repository == 'PolicyEngine/policyengine-us') @@ -158,18 +156,18 @@ jobs: - name: Turn off default branching shell: bash run: bash ./update_itemization.sh - - name: Run heavy structural YAML tests (states + congress) + - name: Run structural YAML tests env: PYTHONUNBUFFERED: 1 run: | - echo "Running heavy structural YAML tests..." - uv run make test-yaml-structural-heavy + echo "Running structural YAML tests..." + uv run make test-yaml-structural Publish: runs-on: ubuntu-latest if: | (github.repository == 'PolicyEngine/policyengine-us') && (github.event.head_commit.message == 'Update PolicyEngine US') - needs: [NonStructural-States, NonStructural-Other, Structural-and-Python, Structural-Heavy] + needs: [NonStructural-States, NonStructural-Other, Structural, Structural-Heavy] steps: - name: Checkout repo uses: actions/checkout@v4 diff --git a/Makefile b/Makefile index 80e407241d0..9ea85bc28a9 100644 --- a/Makefile +++ b/Makefile @@ -9,13 +9,13 @@ test: coverage run -a --branch -m policyengine_core.scripts.policyengine_command test policyengine_us/tests/policy/ -c policyengine_us coverage xml -i test-yaml-structural: - python policyengine_us/tests/test_batched.py policyengine_us/tests/policy/contrib --exclude states,congress + python policyengine_us/tests/test_batched.py policyengine_us/tests/policy/contrib --exclude states test-yaml-structural-heavy: python policyengine_us/tests/test_batched.py policyengine_us/tests/policy/contrib/states --batches 1 - python policyengine_us/tests/test_batched.py policyengine_us/tests/policy/contrib/congress --batches 1 test-yaml-no-structural-states: - python policyengine_us/tests/test_batched.py policyengine_us/tests/policy/baseline/gov/states --batches 1 + python policyengine_us/tests/test_batched.py policyengine_us/tests/policy/baseline/gov/states --batches 1 --exclude ny test-yaml-no-structural-other: + python policyengine_us/tests/test_batched.py policyengine_us/tests/policy/baseline/gov/states/ny --batches 1 python policyengine_us/tests/test_batched.py policyengine_us/tests/policy/baseline --batches 1 --exclude states python policyengine_us/tests/test_batched.py policyengine_us/tests/policy/baseline/household --batches 1 python policyengine_us/tests/test_batched.py policyengine_us/tests/policy/baseline/contrib --batches 1 diff --git a/changelog_entry.yaml b/changelog_entry.yaml index e69de29bb2d..da7736cfe80 100644 --- a/changelog_entry.yaml +++ b/changelog_entry.yaml @@ -0,0 +1,4 @@ +- bump: patch + changes: + changed: + - Rebalance CI test jobs by moving NY state tests to NonStructural-Other and CRFB structural tests to Structural-Heavy. diff --git a/policyengine_us/tests/test_batched.py b/policyengine_us/tests/test_batched.py index 6c463637a5d..d5a6b54e14a 100644 --- a/policyengine_us/tests/test_batched.py +++ b/policyengine_us/tests/test_batched.py @@ -37,11 +37,31 @@ def split_into_batches( if exclude is None: exclude = [] - # Special handling for contrib tests - each folder is its own batch + # Special handling for contrib tests - split into 7 batches by memory usage # Only apply to policy/contrib (structural tests), not baseline/contrib if str(base_path).endswith("policy/contrib"): - # Get all subdirectories and sort them alphabetically - # Exclude specified directories + # Define batches by memory usage (measured empirically) + BATCH_1 = ["federal", "harris", "treasury"] # ~9.0 GB + BATCH_2 = ["ctc", "snap_ea", "ubi_center"] # ~8.6 GB + BATCH_3 = ["deductions", "aca", "snap"] # ~8.1 GB + BATCH_4 = [ + "tax_exempt", + "eitc", + "state_dependent_exemptions", + "additional_tax_bracket", + ] # ~8.0 GB + # Batch 5 is the catch-all for unknown/new folders (~7.8 GB + headroom) + BATCH_5_DEFINED = [ + "local", + "dc_single_joint_threshold_ratio.yaml", + "reconciliation", + "dc_kccatc.yaml", + "reported_state_income_tax.yaml", + ] + BATCH_6 = ["crfb"] # ~8.9 GB, always alone + BATCH_7 = ["congress"] # ~6.3 GB + + # Get all subdirectories (excluding states which is in Heavy job) subdirs = sorted( [ item @@ -50,25 +70,100 @@ def split_into_batches( ] ) - # Get root level YAML files and sort them + # Get root level YAML files root_files = sorted(list(base_path.glob("*.yaml"))) - # Create one batch per subdirectory - batches = [] + # Build batches + def get_batch_paths(batch_names, subdirs, root_files): + paths = [] + for name in batch_names: + # Check if it's a directory + for subdir in subdirs: + if subdir.name == name: + paths.append(str(subdir)) + break + # Check if it's a root file + for f in root_files: + if f.name == name: + paths.append(str(f)) + break + return paths + + # Collect known folders/files + all_known = set( + BATCH_1 + + BATCH_2 + + BATCH_3 + + BATCH_4 + + BATCH_5_DEFINED + + BATCH_6 + + BATCH_7 + ) + + # Find unknown folders/files (new additions go to Batch 5) + unknown = [] for subdir in subdirs: - batches.append([str(subdir)]) + if subdir.name not in all_known: + unknown.append(str(subdir)) + for f in root_files: + if f.name not in all_known: + unknown.append(str(f)) + + # Build all batches + batch1 = get_batch_paths(BATCH_1, subdirs, root_files) + batch2 = get_batch_paths(BATCH_2, subdirs, root_files) + batch3 = get_batch_paths(BATCH_3, subdirs, root_files) + batch4 = get_batch_paths(BATCH_4, subdirs, root_files) + batch5 = ( + get_batch_paths(BATCH_5_DEFINED, subdirs, root_files) + unknown + ) + batch6 = get_batch_paths(BATCH_6, subdirs, root_files) + batch7 = get_batch_paths(BATCH_7, subdirs, root_files) - # If there are root files, group them together in their own batch - if root_files: - root_batch = [str(file) for file in root_files] - batches.append(root_batch) + # Return non-empty batches in order + batches = [] + for batch in [batch1, batch2, batch3, batch4, batch5, batch6, batch7]: + if batch: + batches.append(batch) return batches + # Special handling for contrib/states - each subfolder is its own batch + # to allow garbage collection between state tests + # Memory usage per state varies significantly (1.3 GB - 5.2 GB measured) + # Note: contrib/congress runs all together (~6.3 GB total, under 7 GB limit) + if str(base_path).endswith("contrib/states"): + subdirs = sorted( + [item for item in base_path.iterdir() if item.is_dir()] + ) + # Each state folder becomes its own batch + batches = [[str(subdir)] for subdir in subdirs] + + # Also include any root-level YAML files as a separate batch + root_files = sorted(list(base_path.glob("*.yaml"))) + if root_files: + batches.append([str(file) for file in root_files]) + + return batches if batches else [[str(base_path)]] + # Special handling for reform tests - run all together in one batch if "reform" in str(base_path): return [[str(base_path)]] + # Special handling for states directory - support excluding specific states + if str(base_path).endswith("gov/states"): + subdirs = sorted( + [ + item + for item in base_path.iterdir() + if item.is_dir() and item.name not in exclude + ] + ) + # Return all non-excluded state directories as a single batch + if subdirs: + return [[str(subdir) for subdir in subdirs]] + return [] + # Special handling for baseline tests if "baseline" in str(base_path) and str(base_path).endswith("baseline"): states_path = base_path / "gov" / "states"