Skip to content

Commit

Permalink
NI validation (#779)
Browse files Browse the repository at this point in the history
* Validate NI against UKMOD

* Versioning

* Skip tests online

* Format

* Fix UKMOD skip

* Import pytest

* Fix test
  • Loading branch information
nikhilwoodruff authored Dec 15, 2023
1 parent 4954c75 commit 3dbe3da
Show file tree
Hide file tree
Showing 44 changed files with 610 additions and 249 deletions.
2 changes: 1 addition & 1 deletion .vscode/python.code-snippets
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
" entity = ${4:Person}",
" definition_period = ${5:YEAR}",
" value_type = ${6:float}",
" unit = \"${7:currency-GBP}\"",
" unit = ${7:GBP}",
"",
""
],
Expand Down
4 changes: 4 additions & 0 deletions changelog_entry.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
- bump: minor
changes:
changed:
- Validated and standardised National Insurance variables.
2 changes: 2 additions & 0 deletions policyengine_uk/data/datasets/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
CalibratedSPIEnhancedPooledFRS_2018_20,
CalibratedSPIEnhancedPooledFRS_2019_21,
EnhancedFRS,
UKMOD_FRS_2018,
)

DATASETS = [
Expand All @@ -34,4 +35,5 @@
CalibratedSPIEnhancedPooledFRS_2018_20,
CalibratedSPIEnhancedPooledFRS_2019_21,
EnhancedFRS,
UKMOD_FRS_2018,
]
1 change: 1 addition & 0 deletions policyengine_uk/data/datasets/frs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,4 @@
)
from .stacked_frs import StackedFRS, PooledFRS_2018_20, PooledFRS_2019_21
from .enhanced_frs import EnhancedFRS
from .ukmod import UKMOD_FRS_2018
63 changes: 63 additions & 0 deletions policyengine_uk/data/datasets/frs/ukmod.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
import pandas as pd
from policyengine_uk.data.storage import STORAGE_FOLDER
import numpy as np
from policyengine_core.data import Dataset


class UKMOD_FRS_2018(Dataset):
name = "ukmod_frs_2018"
label = "UKMOD (2018-19 FRS)"
data_format = Dataset.TIME_PERIOD_ARRAYS
file_path = STORAGE_FOLDER / "ukmod_frs_2018.h5"
time_period = "2018"

def generate(self):
data = {}
ukmod_output = pd.read_csv(
STORAGE_FOLDER / "uk_2018_std.txt", delimiter="\t"
)
ukmod_input = pd.read_csv(
STORAGE_FOLDER / "uk_2018_a4.txt", delimiter="\t"
)
output_columns = [
column
for column in ukmod_output.columns
if column not in ukmod_input.columns
]
ukmod = pd.merge(
ukmod_output[output_columns + ["idperson"]],
ukmod_input,
on="idperson",
how="right",
)
# Add ID variables first
data["person_id"] = ukmod.idperson
data["person_benunit_id"] = person_benunit_id = (
ukmod.idorigbenunit * 10 + ukmod.idorighh
)
data["person_household_id"] = person_household_id = (
ukmod.idorighh * 100
)
data["person_state_id"] = np.ones_like(ukmod.idperson)

data["benunit_id"] = person_benunit_id.unique()
data["household_id"] = person_household_id.unique()
data["state_id"] = np.array([1])

data["age"] = ukmod.dag.values
data["gender"] = np.where(
ukmod.dgn == 0,
"FEMALE",
"MALE",
).astype("S")
data["employment_income"] = ukmod.yem.values * 12
data["self_employment_income"] = ukmod.yse.values * 12
data["pension_income"] = ukmod.ypp.values * 12
data["statutory_sick_pay"] = ukmod.bhlwk.values * 12
data["statutory_maternity_pay"] = ukmod.bmact_s.values * 12
data["statutory_paternity_pay"] = ukmod.bpact_s.values * 12

for variable in data:
data[variable] = {"2018": data[variable]}

self.save_dataset(data)
3 changes: 3 additions & 0 deletions policyengine_uk/data/storage/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from pathlib import Path

STORAGE_FOLDER = Path(__file__).parent
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ values:
2012-01-01: 62
2014-01-01: 63
2016-01-01: 64
2018-01-01: 65
2019-01-01: 65
2020-01-01: 66
metadata:
unit: year
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ metadata:
- https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/882271/Table-a4.pdf
- https://www.gov.uk/government/publications/rates-and-allowances-national-insurance-contributions/rates-and-allowances-national-insurance-contributions
unit: currency-GBP
uprating: gov.benefit_uprating_cpi
values:
2015-04-06: 155
2016-04-06: 155
Expand All @@ -24,3 +23,4 @@ values:
title: The Social Security (Contributions) (Rates, Limits and Thresholds Amendments
and National Insurance Funds Payments) Regulations 2022(6)
value: 175
2024-01-01: 175
66 changes: 66 additions & 0 deletions policyengine_uk/tests/microsimulation/test_against_ukmod.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
from policyengine_uk import Microsimulation
from policyengine_uk.data.datasets import UKMOD_FRS_2018
from policyengine_uk.data.storage import STORAGE_FOLDER
import pandas as pd
import numpy as np
import pytest

SKIP_UKMOD_TESTS = True

if not SKIP_UKMOD_TESTS:
ukmod_output = pd.read_csv(
STORAGE_FOLDER / "uk_2018_std.txt", delimiter="\t"
)
ukmod_input = pd.read_csv(
STORAGE_FOLDER / "uk_2018_a4.txt", delimiter="\t"
)
output_columns = [
column
for column in ukmod_output.columns
if column not in ukmod_input.columns
]
ukmod = pd.merge(
ukmod_output[output_columns + ["idperson"]],
ukmod_input,
on="idperson",
how="right",
)

UKMOD_FRS_2018().generate()
sim = Microsimulation(dataset="ukmod_frs_2018")


@pytest.mark.skip(reason="UKMOD data not publicly shareable")
def test_ni_class_1():
# NI Class 1 income matches.
assert np.allclose(
sim.calculate("ni_class_1_income").values,
ukmod.il_empniearns.values * 12,
atol=1,
)


@pytest.mark.skip(reason="UKMOD data not publicly shareable")
def test_ni_class_1_employee():
# NI contributions are off by more because the thresholds change mid-year,
# and PolicyEngine simulates over the full year while UKMOD simulates one
# month.
assert np.allclose(
sim.calculate("ni_class_1_employee").values,
ukmod.tscee_s.values * 12,
atol=50,
)


@pytest.mark.skip(reason="UKMOD data not publicly shareable")
def test_ni_self_employed():
# NI self-employed contributions don't match entirely for people with both
# self-employed and employment income. This might be due to a different
# interpretation of the rules around capped NI contributions (our Class 4
# maximum uses the legislation as a reference).

error = np.abs(
sim.calculate("ni_self_employed").values - ukmod.tscse_s.values * 12
)

assert (error < 50).mean() > 0.99

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
- name: NI Class 1 employee contributions - below PT
period: 2023
input:
ni_class_1_income: 11_000
output:
ni_class_1_employee: 0

- name: NI Class 1 employee contributions - between PT and UEL
period: 2023
absolute_error_margin: 1
input:
ni_class_1_income: 30_000
output:
ni_class_1_employee: (30_000 - 12_570) * 0.12

- name: NI Class 1 employee contributions - above UEL
period: 2023
absolute_error_margin: 1
input:
ni_class_1_income: 70000
output:
ni_class_1_employee: (50_270 - 12_570) * 0.12 + (70_000 - 50_270) * 0.0325
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
- name: NI Class 1 employee additional contributions - income below UEL
period: 2023
absolute_error_margin: 1
input:
ni_class_1_income: 30_000
output:
ni_class_1_employee_additional: 0

- name: NI Class 1 employee additional contributions - income above UEL
period: 2023
absolute_error_margin: 1
input:
ni_class_1_income: 100_000
output:
ni_class_1_employee_additional: (100_000 - 50_270) * 0.0325
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
- name: £20k income has NI Class 1 liability
period: 2023
absolute_error_margin: 1
input:
employment_income: 20_000
output:
ni_class_1_employee_primary: (20_000 - 12_570) * 0.12
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
- name: NI Class 1 employer contributions for low income in 2023
period: 2023
absolute_error_margin: 0.01
input:
ni_class_1_income: 8_000 # Annual income below secondary threshold
output:
ni_class_1_employer: 0.00 # Expected employer contributions

- name: NI Class 1 employer contributions for moderate income in 2023
period: 2023
absolute_error_margin: 0.01
input:
ni_class_1_income: 30_000 # Annual income above secondary threshold but below upper limit
output:
ni_class_1_employer: (30_000 - 175 * 52) * 0.138 # Expected employer contributions
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
- name: NI Class 1 income sums income components.
period: 2023
input:
employment_income: 1
statutory_sick_pay: 2
statutory_maternity_pay: 4
statutory_paternity_pay: 8
output:
ni_class_1_income: 15
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
- name: Child isn't liable for NI.
period: 2023
input:
age: 15
output:
ni_liable: false

- name: Working-age adult is liable for NI.
period: 2023
input:
age: 35
output:
ni_liable: true

- name: Retired adult isn't liable for NI.
period: 2023
input:
age: 70
output:
ni_liable: false
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
- name: NI Class 2 - under LPL.
period: 2023
input:
self_employment_income: 5_000
output:
ni_class_2: 0

- name: NI Class 2 - over LPL.
period: 2023
input:
self_employment_income: 15_000
output:
ni_class_2: 3.15 * 52
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
- name: NI Class 4 - under UPL.
period: 2023
absolute_error_margin: 1
input:
self_employment_income: 30_000
output:
ni_class_4: 1628
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
- name: NI Class 4 - over UPL has maximum capped.
period: 2023
absolute_error_margin: 1
input:
self_employment_income: 100_000
output:
ni_class_4_main: 3452
Loading

0 comments on commit 3dbe3da

Please sign in to comment.