PolicyEngine · hua7450 · Dec 18, 2025 · Dec 12, 2025 · Dec 13, 2025 · Dec 13, 2025
diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
@@ -130,7 +130,7 @@ jobs:
       - name: Build package
         run: uv build
   NonStructural-States:
-    name: Full Suite - Non-Structural YAML (States)
+    name: Full Suite - States (excl NY) & Python
     runs-on: ubuntu-latest
     steps:
       - name: Checkout repo
@@ -152,8 +152,14 @@ jobs:
         run: |
           echo "Running non-structural YAML tests (states)..."
           uv run make test-yaml-no-structural-states
+      - name: Run Python-based tests
+        env:
+          PYTHONUNBUFFERED: 1
+        run: |
+          echo "Running Python-based tests..."
+          uv run make test-other
   NonStructural-Other:
-    name: Full Suite - Non-Structural YAML (Other)
+    name: Full Suite - Baseline (incl NY) & Reform
     runs-on: ubuntu-latest
     steps:
       - name: Checkout repo
@@ -175,8 +181,8 @@ jobs:
         run: |
           echo "Running non-structural YAML tests (other)..."
           uv run make test-yaml-no-structural-other
-  Structural-and-Python:
-    name: Full Suite - Structural YAML & Python
+  Structural-Heavy:
+    name: Full Suite - Structural (States)
     runs-on: ubuntu-latest
     steps:
       - name: Checkout repo
@@ -192,20 +198,14 @@ jobs:
       - name: Turn off default branching
         shell: bash
         run: bash ./update_itemization.sh
-      - name: Run structural YAML tests
-        env:
-          PYTHONUNBUFFERED: 1
-        run: |
-          echo "Running structural YAML tests..."
-          uv run make test-yaml-structural
-      - name: Run Python-based tests
+      - name: Run structural YAML tests (states)
         env:
           PYTHONUNBUFFERED: 1
         run: |
-          echo "Running Python-based tests..."
-          uv run make test-other
-  Structural-Heavy:
-    name: Full Suite - Structural YAML (Heavy)
+          echo "Running structural YAML tests (contrib/states)..."
+          uv run make test-yaml-structural-heavy
+  Structural:
+    name: Full Suite - Structural (Other)
     runs-on: ubuntu-latest
     steps:
       - name: Checkout repo
@@ -221,9 +221,9 @@ jobs:
       - name: Turn off default branching
         shell: bash
         run: bash ./update_itemization.sh
-      - name: Run heavy structural YAML tests (states + congress)
+      - name: Run structural YAML tests (other)
         env:
           PYTHONUNBUFFERED: 1
         run: |
-          echo "Running heavy structural YAML tests..."
-          uv run make test-yaml-structural-heavy
+          echo "Running structural YAML tests (contrib excl states)..."
+          uv run make test-yaml-structural
diff --git a/.github/workflows/push.yaml b/.github/workflows/push.yaml
@@ -5,7 +5,7 @@ on:
 # Concurrency: queues runs to ensure each merge completes its versioning/publishing
 concurrency:
   group: ${{ github.workflow }}-${{ github.ref }}
-  cancel-in-progress: false
+  cancel-in-progress: true
 jobs:
   Lint:
     runs-on: ubuntu-latest
@@ -43,8 +43,6 @@ jobs:
         run: uv pip install "yaml-changelog>=0.1.7" --system
       - name: Build changelog
         run: make changelog
-      - name: Update lock file
-        run: uv lock --upgrade
       - name: Update changelog
         uses: EndBug/add-and-commit@v9
         with:
@@ -55,7 +53,7 @@ jobs:
           github_token: ${{ secrets.POLICYENGINE_GITHUB }}
           fetch: false
   NonStructural-States:
-    name: Non-Structural YAML Tests (States)
+    name: States (excl NY) & Python
     runs-on: ubuntu-latest
     if: |
       (github.repository == 'PolicyEngine/policyengine-us')
@@ -80,8 +78,14 @@ jobs:
         run: |
           echo "Running non-structural YAML tests (states)..."
           uv run make test-yaml-no-structural-states
+      - name: Run Python-based tests
+        env:
+          PYTHONUNBUFFERED: 1
+        run: |
+          echo "Running Python-based tests..."
+          uv run make test-other
   NonStructural-Other:
-    name: Non-Structural YAML Tests (Other)
+    name: Baseline (incl NY) & Reform
     runs-on: ubuntu-latest
     if: |
       (github.repository == 'PolicyEngine/policyengine-us')
@@ -106,8 +110,8 @@ jobs:
         run: |
           echo "Running non-structural YAML tests (other)..."
           uv run make test-yaml-no-structural-other
-  Structural-and-Python:
-    name: Structural YAML & Python Tests
+  Structural-Heavy:
+    name: Structural (States + Congress)
     runs-on: ubuntu-latest
     if: |
       (github.repository == 'PolicyEngine/policyengine-us')
@@ -126,20 +130,14 @@ jobs:
       - name: Turn off default branching
         shell: bash
         run: bash ./update_itemization.sh
-      - name: Run structural YAML tests
-        env:
-          PYTHONUNBUFFERED: 1
-        run: |
-          echo "Running structural YAML tests..."
-          uv run make test-yaml-structural
-      - name: Run Python-based tests
+      - name: Run heavy structural YAML tests (states + congress)
         env:
           PYTHONUNBUFFERED: 1
         run: |
-          echo "Running Python-based tests..."
-          uv run make test-other
-  Structural-Heavy:
-    name: Structural YAML Tests (Heavy)
+          echo "Running heavy structural YAML tests..."
+          uv run make test-yaml-structural-heavy
+  Structural:
+    name: Structural (Other)
     runs-on: ubuntu-latest
     if: |
       (github.repository == 'PolicyEngine/policyengine-us')
@@ -158,18 +156,18 @@ jobs:
       - name: Turn off default branching
         shell: bash
         run: bash ./update_itemization.sh
-      - name: Run heavy structural YAML tests (states + congress)
+      - name: Run structural YAML tests
         env:
           PYTHONUNBUFFERED: 1
         run: |
-          echo "Running heavy structural YAML tests..."
-          uv run make test-yaml-structural-heavy
+          echo "Running structural YAML tests..."
+          uv run make test-yaml-structural
   Publish:
     runs-on: ubuntu-latest
     if: |
       (github.repository == 'PolicyEngine/policyengine-us')
       && (github.event.head_commit.message == 'Update PolicyEngine US')
-    needs: [NonStructural-States, NonStructural-Other, Structural-and-Python, Structural-Heavy]
+    needs: [NonStructural-States, NonStructural-Other, Structural, Structural-Heavy]
     steps:
       - name: Checkout repo
         uses: actions/checkout@v4

diff --git a/Makefile b/Makefile
@@ -9,13 +9,13 @@ test:
 	coverage run -a --branch -m policyengine_core.scripts.policyengine_command test policyengine_us/tests/policy/ -c policyengine_us
 	coverage xml -i
 test-yaml-structural:
-	python policyengine_us/tests/test_batched.py policyengine_us/tests/policy/contrib --exclude states,congress
+	python policyengine_us/tests/test_batched.py policyengine_us/tests/policy/contrib --exclude states
 test-yaml-structural-heavy:
 	python policyengine_us/tests/test_batched.py policyengine_us/tests/policy/contrib/states --batches 1
-	python policyengine_us/tests/test_batched.py policyengine_us/tests/policy/contrib/congress --batches 1
 test-yaml-no-structural-states:
-	python policyengine_us/tests/test_batched.py policyengine_us/tests/policy/baseline/gov/states --batches 1
+	python policyengine_us/tests/test_batched.py policyengine_us/tests/policy/baseline/gov/states --batches 1 --exclude ny
 test-yaml-no-structural-other:
+	python policyengine_us/tests/test_batched.py policyengine_us/tests/policy/baseline/gov/states/ny --batches 1
 	python policyengine_us/tests/test_batched.py policyengine_us/tests/policy/baseline --batches 1 --exclude states
 	python policyengine_us/tests/test_batched.py policyengine_us/tests/policy/baseline/household --batches 1
 	python policyengine_us/tests/test_batched.py policyengine_us/tests/policy/baseline/contrib --batches 1

diff --git a/changelog_entry.yaml b/changelog_entry.yaml
@@ -0,0 +1,4 @@
+- bump: patch
+  changes:
+    changed:
+    - Rebalance CI test jobs by moving NY state tests to NonStructural-Other and CRFB structural tests to Structural-Heavy.
diff --git a/policyengine_us/tests/test_batched.py b/policyengine_us/tests/test_batched.py
@@ -37,11 +37,31 @@ def split_into_batches(
     if exclude is None:
         exclude = []
 
-    # Special handling for contrib tests - each folder is its own batch
+    # Special handling for contrib tests - split into 7 batches by memory usage
     # Only apply to policy/contrib (structural tests), not baseline/contrib
     if str(base_path).endswith("policy/contrib"):
-        # Get all subdirectories and sort them alphabetically
-        # Exclude specified directories
+        # Define batches by memory usage (measured empirically)
+        BATCH_1 = ["federal", "harris", "treasury"]  # ~9.0 GB
+        BATCH_2 = ["ctc", "snap_ea", "ubi_center"]  # ~8.6 GB
+        BATCH_3 = ["deductions", "aca", "snap"]  # ~8.1 GB
+        BATCH_4 = [
+            "tax_exempt",
+            "eitc",
+            "state_dependent_exemptions",
+            "additional_tax_bracket",
+        ]  # ~8.0 GB
+        # Batch 5 is the catch-all for unknown/new folders (~7.8 GB + headroom)
+        BATCH_5_DEFINED = [
+            "local",
+            "dc_single_joint_threshold_ratio.yaml",
+            "reconciliation",
+            "dc_kccatc.yaml",
+            "reported_state_income_tax.yaml",
+        ]
+        BATCH_6 = ["crfb"]  # ~8.9 GB, always alone
+        BATCH_7 = ["congress"]  # ~6.3 GB
+
+        # Get all subdirectories (excluding states which is in Heavy job)
         subdirs = sorted(
             [
                 item
@@ -50,25 +70,100 @@ def split_into_batches(
             ]
         )
 
-        # Get root level YAML files and sort them
+        # Get root level YAML files
         root_files = sorted(list(base_path.glob("*.yaml")))
 
-        # Create one batch per subdirectory
-        batches = []
+        # Build batches
+        def get_batch_paths(batch_names, subdirs, root_files):
+            paths = []
+            for name in batch_names:
+                # Check if it's a directory
+                for subdir in subdirs:
+                    if subdir.name == name:
+                        paths.append(str(subdir))
+                        break
+                # Check if it's a root file
+                for f in root_files:
+                    if f.name == name:
+                        paths.append(str(f))
+                        break
+            return paths
+
+        # Collect known folders/files
+        all_known = set(
+            BATCH_1
+            + BATCH_2
+            + BATCH_3
+            + BATCH_4
+            + BATCH_5_DEFINED
+            + BATCH_6
+            + BATCH_7
+        )
+
+        # Find unknown folders/files (new additions go to Batch 5)
+        unknown = []
         for subdir in subdirs:
-            batches.append([str(subdir)])
+            if subdir.name not in all_known:
+                unknown.append(str(subdir))
+        for f in root_files:
+            if f.name not in all_known:
+                unknown.append(str(f))
+
+        # Build all batches
+        batch1 = get_batch_paths(BATCH_1, subdirs, root_files)
+        batch2 = get_batch_paths(BATCH_2, subdirs, root_files)
+        batch3 = get_batch_paths(BATCH_3, subdirs, root_files)
+        batch4 = get_batch_paths(BATCH_4, subdirs, root_files)
+        batch5 = (
+            get_batch_paths(BATCH_5_DEFINED, subdirs, root_files) + unknown
+        )
+        batch6 = get_batch_paths(BATCH_6, subdirs, root_files)
+        batch7 = get_batch_paths(BATCH_7, subdirs, root_files)
 
-        # If there are root files, group them together in their own batch
-        if root_files:
-            root_batch = [str(file) for file in root_files]
-            batches.append(root_batch)
+        # Return non-empty batches in order
+        batches = []
+        for batch in [batch1, batch2, batch3, batch4, batch5, batch6, batch7]:
+            if batch:
+                batches.append(batch)
 
         return batches
 
+    # Special handling for contrib/states - each subfolder is its own batch
+    # to allow garbage collection between state tests
+    # Memory usage per state varies significantly (1.3 GB - 5.2 GB measured)
+    # Note: contrib/congress runs all together (~6.3 GB total, under 7 GB limit)
+    if str(base_path).endswith("contrib/states"):
+        subdirs = sorted(
+            [item for item in base_path.iterdir() if item.is_dir()]
+        )
+        # Each state folder becomes its own batch
+        batches = [[str(subdir)] for subdir in subdirs]
+
+        # Also include any root-level YAML files as a separate batch
+        root_files = sorted(list(base_path.glob("*.yaml")))
+        if root_files:
+            batches.append([str(file) for file in root_files])
+
+        return batches if batches else [[str(base_path)]]
+
     # Special handling for reform tests - run all together in one batch
     if "reform" in str(base_path):
         return [[str(base_path)]]
 
+    # Special handling for states directory - support excluding specific states
+    if str(base_path).endswith("gov/states"):
+        subdirs = sorted(
+            [
+                item
+                for item in base_path.iterdir()
+                if item.is_dir() and item.name not in exclude
+            ]
+        )
+        # Return all non-excluded state directories as a single batch
+        if subdirs:
+            return [[str(subdir) for subdir in subdirs]]
+        return []
+
     # Special handling for baseline tests
     if "baseline" in str(base_path) and str(base_path).endswith("baseline"):
         states_path = base_path / "gov" / "states"