From 1bd00d99a04c4e9151799111f7f2ee1a82846275 Mon Sep 17 00:00:00 2001 From: Jonah Kagan Date: Mon, 6 Apr 2020 15:54:23 -0700 Subject: [PATCH] Add endpoint for jurisdiction admins to create audit boards Adds an endpoint `POST /election//jurisdiction//round//audit-board` that jurisdiction admins can use to create audit boards. When the audit boards are created, the sampled ballots for that round are divvyed up as fairly as possible between the audit boards. Also adds a db constraint that audit board names must be unique within a jurisdiction for each round. --- arlo_server/__init__.py | 9 +- arlo_server/audit_boards.py | 121 +++++++++ arlo_server/models.py | 2 + arlo_server/routes.py | 13 +- tests/conftest.py | 12 +- tests/helpers.py | 18 +- tests/routes_tests/test_audit_boards.py | 303 +++++++++++++++++++++++ tests/routes_tests/test_audit_status.py | 12 +- tests/routes_tests/test_jurisdictions.py | 35 +-- tests/routes_tests/test_report.py | 10 +- util/group_by.py | 9 + 11 files changed, 481 insertions(+), 63 deletions(-) create mode 100644 arlo_server/audit_boards.py create mode 100644 tests/routes_tests/test_audit_boards.py create mode 100644 util/group_by.py diff --git a/arlo_server/__init__.py b/arlo_server/__init__.py index 7a6b7b2ff..6fa7221fc 100644 --- a/arlo_server/__init__.py +++ b/arlo_server/__init__.py @@ -41,12 +41,19 @@ # The order of these imports is important as it defines route precedence. # Be careful when re-ordering them. -import arlo_server.election_settings + +# Single-jurisdiction flow routes +# (Plus some routes for multi-jurisdiction flow that were created before we had +# separate routes modules) import arlo_server.routes + +# Multi-jurisdiction flow routes +import arlo_server.election_settings import arlo_server.contests import arlo_server.jurisdictions import arlo_server.sample_sizes import arlo_server.rounds +import arlo_server.audit_boards # Error handlers import arlo_server.errors diff --git a/arlo_server/audit_boards.py b/arlo_server/audit_boards.py new file mode 100644 index 000000000..9d7117a32 --- /dev/null +++ b/arlo_server/audit_boards.py @@ -0,0 +1,121 @@ +from flask import jsonify, request +import uuid +from typing import List +from xkcdpass import xkcd_password as xp +from werkzeug.exceptions import Conflict +from sqlalchemy.exc import IntegrityError + +from arlo_server import app, db +from arlo_server.auth import with_jurisdiction_access +from arlo_server.rounds import get_current_round +from arlo_server.models import ( + AuditBoard, + Round, + Election, + Jurisdiction, + SampledBallot, + Batch, +) +from arlo_server.errors import handle_unique_constraint_error +from util.jsonschema import validate, JSONDict +from util.binpacking import BalancedBucketList, Bucket +from util.group_by import group_by + +WORDS = xp.generate_wordlist(wordfile=xp.locate_wordfile()) + +CREATE_AUDIT_BOARD_REQUEST_SCHEMA = { + "type": "object", + "properties": {"name": {"type": "string"},}, + "additionalProperties": False, + "required": ["name"], +} + +# Raises if invalid +def validate_audit_boards( + audit_boards: List[JSONDict], + election: Election, + jurisdiction: Jurisdiction, + round: Round, +): + current_round = get_current_round(election) + if not current_round or round.id != current_round.id: + raise Conflict(f"Round {round.round_num} is not the current round") + + if any(ab for ab in jurisdiction.audit_boards if ab.round_id == round.id): + raise Conflict(f"Audit boards already created for round {round.round_num}") + + validate( + audit_boards, {"type": "array", "items": CREATE_AUDIT_BOARD_REQUEST_SCHEMA} + ) + + +def assign_sampled_ballots( + jurisdiction: Jurisdiction, round: Round, audit_boards: List[AuditBoard], +): + # Collect the physical ballots for each batch that were sampled for this + # jurisdiction for this round + sampled_ballots = ( + SampledBallot.query.join(Batch) + .filter_by(jurisdiction_id=jurisdiction.id) + .join(SampledBallot.draws) + .filter_by(round_id=round.id) + .order_by(SampledBallot.batch_id) # group_by prefers a sorted list + .all() + ) + ballots_by_batch = group_by(sampled_ballots, key=lambda sb: sb.batch_id) + + # Divvy up batches of ballots between the audit boards. + # Note: BalancedBucketList doesn't care which buckets have which batches to + # start, so we add all the batches to the first bucket before balancing. + buckets = [Bucket(audit_board.id) for audit_board in audit_boards] + for batch_id, sampled_ballots in ballots_by_batch.items(): + buckets[0].add_batch(batch_id, len(sampled_ballots)) + balanced_buckets = BalancedBucketList(buckets) + + for bucket in balanced_buckets.buckets: + ballots_in_bucket = [ + ballot + for batch_id in bucket.batches + for ballot in ballots_by_batch[batch_id] + ] + for ballot in ballots_in_bucket: + ballot.audit_board_id = bucket.name + db.session.add(ballot) + + db.session.commit() + + +@app.route( + "/election//jurisdiction//round//audit-board", + methods=["POST"], +) +@with_jurisdiction_access +def create_audit_boards(election: Election, jurisdiction: Jurisdiction, round_id: str): + json_audit_boards = request.get_json() + round = Round.query.get_or_404(round_id) + validate_audit_boards(json_audit_boards, election, jurisdiction, round) + + audit_boards = [ + AuditBoard( + id=str(uuid.uuid4()), + name=json_audit_board["name"], + jurisdiction_id=jurisdiction.id, + round_id=round.id, + passphrase=xp.generate_xkcdpassword(WORDS, numwords=4, delimiter="-"), + ) + for json_audit_board in json_audit_boards + ] + db.session.add_all(audit_boards) + + try: + db.session.commit() + except IntegrityError as e: + handle_unique_constraint_error( + e, + constraint_name="audit_board_jurisdiction_id_round_id_name_key", + message="Audit board names must be unique", + ) + + assign_sampled_ballots(jurisdiction, round, audit_boards) + + return jsonify(status="ok") diff --git a/arlo_server/models.py b/arlo_server/models.py index 19c6f53b2..96de7b4a6 100644 --- a/arlo_server/models.py +++ b/arlo_server/models.py @@ -253,6 +253,8 @@ class AuditBoard(BaseModel): "SampledBallot", backref="audit_board", passive_deletes=True ) + __table_args__ = (db.UniqueConstraint("jurisdiction_id", "round_id", "name"),) + class Round(BaseModel): id = db.Column(db.String(200), primary_key=True) diff --git a/arlo_server/routes.py b/arlo_server/routes.py index 7232ed12d..66138a874 100644 --- a/arlo_server/routes.py +++ b/arlo_server/routes.py @@ -1,4 +1,4 @@ -import os, datetime, csv, io, json, uuid, re, hmac, urllib.parse, itertools +import os, datetime, csv, io, json, uuid, re, hmac, urllib.parse from flask import jsonify, request, Response, redirect, session from flask_httpauth import HTTPBasicAuth @@ -40,6 +40,7 @@ from util.isoformat import isoformat from util.jsonschema import validate from util.process_file import serialize_file, serialize_file_processing +from util.group_by import group_by AUDIT_BOARD_MEMBER_COUNT = 2 WORDS = xp.generate_wordlist(wordfile=xp.locate_wordfile()) @@ -1074,16 +1075,14 @@ def audit_report(election_id): # First group all the ballot draws by the actual ballot for _, ballot_draws in group_by( all_sampled_ballot_draws, key=lambda b: (b.batch_id, b.ballot_position) - ): - ballot_draws = list(ballot_draws) + ).items(): b = ballot_draws[0] # Then group the draws for this ballot by round ticket_numbers = [] for round_num, round_draws in group_by( ballot_draws, key=lambda b: b.round.round_num - ): - round_draws = list(round_draws) + ).items(): ticket_numbers_str = ", ".join( sorted(d.ticket_number for d in round_draws) ) @@ -1108,10 +1107,6 @@ def audit_report(election_id): return response -def group_by(xs, key=None): - return itertools.groupby(sorted(xs, key=key), key=key) - - def pretty_affiliation(affiliation): mapping = { "DEM": "Democrat", diff --git a/tests/conftest.py b/tests/conftest.py index cee1e1645..abb2b0148 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -45,15 +45,17 @@ def election_id(client: FlaskClient) -> Generator[str, None, None]: def jurisdiction_ids( client: FlaskClient, election_id: str ) -> Generator[List[str], None, None]: + # We expect the API to order the jurisdictions by name, so we upload them + # out of order. rv = client.put( f"/election/{election_id}/jurisdiction/file", data={ "jurisdictions": ( io.BytesIO( b"Jurisdiction,Admin Email\n" - b"J1,a1@example.com\n" b"J2,a2@example.com\n" - b"J3,a3@example.com" + b"J3,a3@example.com\n" + b"J1,a1@example.com" ), "jurisdictions.csv", ) @@ -61,7 +63,11 @@ def jurisdiction_ids( ) assert_ok(rv) bgcompute_update_election_jurisdictions_file() - jurisdictions = Jurisdiction.query.filter_by(election_id=election_id).all() + jurisdictions = ( + Jurisdiction.query.filter_by(election_id=election_id) + .order_by(Jurisdiction.name) + .all() + ) yield [j.id for j in jurisdictions] diff --git a/tests/helpers.py b/tests/helpers.py index 1070a7075..d46808617 100644 --- a/tests/helpers.py +++ b/tests/helpers.py @@ -71,6 +71,17 @@ def create_org_and_admin( return org.id, aa.id +def create_jurisdiction_admin( + jurisdiction_id: str, user_email: str = DEFAULT_JA_EMAIL +) -> str: + ja = create_user(user_email) + db.session.add(ja) + admin = JurisdictionAdministration(user_id=ja.id, jurisdiction_id=jurisdiction_id) + db.session.add(admin) + db.session.commit() + return str(ja.id) + + def create_jurisdiction_and_admin( election_id: str, jurisdiction_name: str = "Test Jurisdiction", @@ -79,13 +90,10 @@ def create_jurisdiction_and_admin( jurisdiction = Jurisdiction( id=str(uuid.uuid4()), election_id=election_id, name=jurisdiction_name ) - ja = create_user(user_email) - db.session.add(ja) - admin = JurisdictionAdministration(user_id=ja.id, jurisdiction_id=jurisdiction.id) db.session.add(jurisdiction) - db.session.add(admin) db.session.commit() - return jurisdiction.id, ja.id + ja_id = create_jurisdiction_admin(jurisdiction.id, user_email) + return jurisdiction.id, ja_id def create_election( diff --git a/tests/routes_tests/test_audit_boards.py b/tests/routes_tests/test_audit_boards.py new file mode 100644 index 000000000..7736d576d --- /dev/null +++ b/tests/routes_tests/test_audit_boards.py @@ -0,0 +1,303 @@ +import pytest, json +from flask.testing import FlaskClient +from typing import List +from datetime import datetime +from collections import defaultdict + +from tests.helpers import ( + post_json, + assert_ok, + create_jurisdiction_admin, + set_logged_in_user, +) +from arlo_server.models import ( + db, + AuditBoard, + Round, + RoundContestResult, + Contest, + SampledBallot, + Batch, +) +from arlo_server.auth import UserType + +JA_EMAIL = "ja@example.com" +SAMPLE_SIZE = 119 # Bravo sample size + + +def assert_ballots_got_assigned_correctly( + jurisdiction_id: str, + round_id: str, + expected_num_audit_boards: int, + expected_num_ballots: int, +): + # We got the right number of audit boards + audit_boards = AuditBoard.query.filter_by( + jurisdiction_id=jurisdiction_id, round_id=round_id + ).all() + assert len(audit_boards) == expected_num_audit_boards + + # We got the right number of sampled ballots + ballots = ( + SampledBallot.query.join(Batch) + .filter_by(jurisdiction_id=jurisdiction_id) + .join(SampledBallot.draws) + .filter_by(round_id=round_id) + .distinct(SampledBallot.batch_id, SampledBallot.ballot_position) + .all() + ) + assert len(ballots) == expected_num_ballots + + # All the ballots got assigned + assert sum(len(ab.sampled_ballots) for ab in audit_boards) == expected_num_ballots + + # Every audit board got some ballots + for audit_board in audit_boards: + assert len(audit_board.sampled_ballots) > 0 + + # All the ballots from each batch got assigned to the same audit board + audit_boards_by_batch = defaultdict(set) + for audit_board in audit_boards: + for ballot in audit_board.sampled_ballots: + audit_boards_by_batch[ballot.batch_id].add(audit_board.id) + for batch_id, audit_board_ids in audit_boards_by_batch.items(): + assert ( + len(audit_board_ids) == 1 + ), f"Different audit boards assigned ballots from the same batch" + + +@pytest.fixture +def round_id( + client: FlaskClient, + election_id: str, + jurisdiction_ids: List[str], # pylint: disable=unused-argument + contest_id: str, # pylint: disable=unused-argument + election_settings, # pylint: disable=unused-argument + manifests, # pylint: disable=unused-argument +) -> str: + rv = post_json( + client, + f"/election/{election_id}/round", + {"roundNum": 1, "sampleSize": SAMPLE_SIZE}, + ) + assert_ok(rv) + rv = client.get(f"/election/{election_id}/round",) + rounds = json.loads(rv.data)["rounds"] + yield rounds[0]["id"] + + +@pytest.fixture +def round_2_id( + client: FlaskClient, election_id: str, contest_id: str, round_id: str, +) -> str: + # Fake that the first round got completed by setting Round.ended_at. + # We also need to add RoundContestResults so that the next round sample + # size can get computed. + round = Round.query.get(round_id) + round.ended_at = datetime.utcnow() + contest = Contest.query.get(contest_id) + db.session.add( + RoundContestResult( + round_id=round.id, + contest_id=contest.id, + contest_choice_id=contest.choices[0].id, + result=70, + ) + ) + db.session.add( + RoundContestResult( + round_id=round.id, + contest_id=contest.id, + contest_choice_id=contest.choices[1].id, + result=49, + ) + ) + db.session.commit() + + set_logged_in_user(client, UserType.AUDIT_ADMIN, "aa@example.com") + rv = post_json(client, f"/election/{election_id}/round", {"roundNum": 2},) + assert_ok(rv) + + rv = client.get(f"/election/{election_id}/round",) + rounds = json.loads(rv.data)["rounds"] + yield rounds[1]["id"] + + +@pytest.fixture +def as_jurisdiction_admin(client: FlaskClient, jurisdiction_ids: List[str]): + create_jurisdiction_admin(jurisdiction_ids[0], JA_EMAIL) + set_logged_in_user(client, UserType.JURISDICTION_ADMIN, JA_EMAIL) + + +def test_audit_boards_create_one( + client: FlaskClient, + election_id: str, + jurisdiction_ids: List[str], + round_id: str, + as_jurisdiction_admin, # pylint: disable=unused-argument +): + rv = post_json( + client, + f"/election/{election_id}/jurisdiction/{jurisdiction_ids[0]}/round/{round_id}/audit-board", + [{"name": "Audit Board #1"}], + ) + assert_ok(rv) + assert_ballots_got_assigned_correctly( + jurisdiction_ids[0], + round_id, + expected_num_audit_boards=1, + expected_num_ballots=75, + ) + + +def test_audit_boards_create_two( + client: FlaskClient, + election_id: str, + jurisdiction_ids: List[str], + round_id: str, + as_jurisdiction_admin, # pylint: disable=unused-argument +): + rv = post_json( + client, + f"/election/{election_id}/jurisdiction/{jurisdiction_ids[0]}/round/{round_id}/audit-board", + [{"name": "Audit Board #1"}, {"name": "Audit Board #2"}], + ) + assert_ok(rv) + assert_ballots_got_assigned_correctly( + jurisdiction_ids[0], + round_id, + expected_num_audit_boards=2, + expected_num_ballots=75, + ) + + +def test_audit_boards_round_2( + client: FlaskClient, + election_id: str, + jurisdiction_ids: List[str], + round_2_id: str, + as_jurisdiction_admin, # pylint: disable=unused-argument +): + rv = post_json( + client, + f"/election/{election_id}/jurisdiction/{jurisdiction_ids[0]}/round/{round_2_id}/audit-board", + [ + {"name": "Audit Board #1"}, + {"name": "Audit Board #2"}, + {"name": "Audit Board #3"}, + ], + ) + assert_ok(rv) + + assert_ballots_got_assigned_correctly( + jurisdiction_ids[0], + round_2_id, + expected_num_audit_boards=3, + expected_num_ballots=134, + ) + + +def test_audit_boards_missing_field( + client: FlaskClient, + election_id: str, + jurisdiction_ids: List[str], + round_id: str, + as_jurisdiction_admin, # pylint: disable=unused-argument +): + rv = post_json( + client, + f"/election/{election_id}/jurisdiction/{jurisdiction_ids[0]}/round/{round_id}/audit-board", + [{}, {"name": "Audit Board #2"}], + ) + assert rv.status_code == 400 + assert json.loads(rv.data) == { + "errors": [ + {"errorType": "Bad Request", "message": "'name' is a required property",} + ] + } + + +def test_audit_boards_duplicate_name( + client: FlaskClient, + election_id: str, + jurisdiction_ids: List[str], + round_id: str, + as_jurisdiction_admin, # pylint: disable=unused-argument +): + rv = post_json( + client, + f"/election/{election_id}/jurisdiction/{jurisdiction_ids[0]}/round/{round_id}/audit-board", + [{"name": "Audit Board #1"}, {"name": "Audit Board #1"}], + ) + assert rv.status_code == 409 + assert json.loads(rv.data) == { + "errors": [ + {"errorType": "Conflict", "message": "Audit board names must be unique",} + ] + } + + +def test_audit_boards_already_created( + client: FlaskClient, + election_id: str, + jurisdiction_ids: List[str], + round_id: str, + as_jurisdiction_admin, # pylint: disable=unused-argument +): + rv = post_json( + client, + f"/election/{election_id}/jurisdiction/{jurisdiction_ids[0]}/round/{round_id}/audit-board", + [{"name": "Audit Board #1"}], + ) + assert_ok(rv) + + rv = post_json( + client, + f"/election/{election_id}/jurisdiction/{jurisdiction_ids[0]}/round/{round_id}/audit-board", + [{"name": "Audit Board #2"}], + ) + assert rv.status_code == 409 + assert json.loads(rv.data) == { + "errors": [ + { + "errorType": "Conflict", + "message": "Audit boards already created for round 1", + } + ] + } + + +def test_audit_boards_wrong_round( + client: FlaskClient, + election_id: str, + jurisdiction_ids: List[str], + round_id: str, + round_2_id: str, # pylint: disable=unused-argument + as_jurisdiction_admin, # pylint: disable=unused-argument +): + rv = post_json( + client, + f"/election/{election_id}/jurisdiction/{jurisdiction_ids[0]}/round/{round_id}/audit-board", + [{"name": "Audit Board #1"}], + ) + assert rv.status_code == 409 + assert json.loads(rv.data) == { + "errors": [ + {"errorType": "Conflict", "message": "Round 1 is not the current round",} + ] + } + + +def test_audit_boards_bad_round_id( + client: FlaskClient, + election_id: str, + jurisdiction_ids: List[str], + round_id: str, # pylint: disable=unused-argument + as_jurisdiction_admin, # pylint: disable=unused-argument +): + rv = post_json( + client, + f"/election/{election_id}/jurisdiction/{jurisdiction_ids[0]}/round/not-a-valid-id/audit-board", + [{"name": "Audit Board #1"}], + ) + assert rv.status_code == 404 diff --git a/tests/routes_tests/test_audit_status.py b/tests/routes_tests/test_audit_status.py index 4b619c198..51bb9cd4e 100644 --- a/tests/routes_tests/test_audit_status.py +++ b/tests/routes_tests/test_audit_status.py @@ -62,12 +62,6 @@ def test_audit_status(client, election_id): "jurisdictions": [ { "auditBoards": [ - { - "id": audit_board_id_2, - "members": [], - "name": "audit board #2", - "passphrase": assert_is_passphrase, - }, { "id": audit_board_id_1, "members": [ @@ -77,6 +71,12 @@ def test_audit_status(client, election_id): "name": "Audit Board #1", "passphrase": assert_is_passphrase, }, + { + "id": audit_board_id_2, + "members": [], + "name": "audit board #2", + "passphrase": assert_is_passphrase, + }, ], "ballotManifest": { "file": {"name": "manifest.csv", "uploadedAt": assert_is_date}, diff --git a/tests/routes_tests/test_jurisdictions.py b/tests/routes_tests/test_jurisdictions.py index e5e48a369..d049b898c 100644 --- a/tests/routes_tests/test_jurisdictions.py +++ b/tests/routes_tests/test_jurisdictions.py @@ -16,45 +16,12 @@ ) from arlo_server import db from arlo_server.models import ( - Jurisdiction, AuditBoard, SampledBallot, SampledBallotDraw, USState, ) -from bgcompute import ( - bgcompute_update_election_jurisdictions_file, - bgcompute_update_ballot_manifest_file, -) - - -@pytest.fixture() -def jurisdiction_ids(client: FlaskClient, election_id: str) -> List[str]: - # We expect the list endpoint to order the jurisdictions by name, so we - # upload them out of order. - rv = client.put( - f"/election/{election_id}/jurisdiction/file", - data={ - "jurisdictions": ( - io.BytesIO( - b"Jurisdiction,Admin Email\n" - b"J2,a2@example.com\n" - b"J3,a3@example.com\n" - b"J1,a1@example.com" - ), - "jurisdictions.csv", - ) - }, - ) - assert_ok(rv) - bgcompute_update_election_jurisdictions_file() - jurisdictions = ( - Jurisdiction.query.filter_by(election_id=election_id) - .order_by(Jurisdiction.name) - .all() - ) - assert len(jurisdictions) == 3 - yield [j.id for j in jurisdictions] +from bgcompute import bgcompute_update_ballot_manifest_file def test_jurisdictions_list_empty(client: FlaskClient, election_id: str): diff --git a/tests/routes_tests/test_report.py b/tests/routes_tests/test_report.py index 7b7d5a07d..076fabbc5 100644 --- a/tests/routes_tests/test_report.py +++ b/tests/routes_tests/test_report.py @@ -286,16 +286,16 @@ def test_two_round_audit_report(client, election_id): # We'll just test a sampling of lines that should include a good variety of cases assert ballot_lines[:10] == [ - f'"Batch 1, #111",Round 2: 0.034167626,Audited,{candidate_id_1},Comment for ballot 0', + f'"Batch 1, #111",Round 2: 0.034167626,Audited,{candidate_id_2},', f'"Batch 1, #122",Round 1: 0.012066605,Audited,{candidate_id_1},', f'"Batch 10, #10",Round 1: 0.010939432,Audited,{candidate_id_1},Comment for ballot 0', f'"Batch 10, #103",Round 2: 0.031357473,Audited,{candidate_id_1},', - '"Batch 10, #151",Round 1: 0.012381762,Audited,Blank vote/no mark,Comment for ballot 3', - '"Batch 10, #175",Round 2: 0.021956866,Audited,Audit board can\'t agree,', + f'"Batch 10, #151",Round 1: 0.012381762,Audited,Blank vote/no mark,Comment for ballot 3', + f'"Batch 10, #175",Round 2: 0.021956866,Audited,{candidate_id_2},Comment for ballot 1017', f'"Batch 10, #200",Round 1: 0.000030407,Audited,{candidate_id_2},', f'"Batch 10, #59",Round 1: 0.002728647,Audited,{candidate_id_1},', - '"Batch 10, #72",Round 1: 0.009650515,Audited,Audit board can\'t agree,', - '"Batch 100, #106",Round 2: 0.015314474,Audited,Blank vote/no mark,Comment for ballot 3', + f'"Batch 10, #72",Round 1: 0.009650515,Audited,Audit board can\'t agree,', + f'"Batch 100, #106",Round 2: 0.015314474,Audited,{candidate_id_2},', ] # Check one of the ballots sampled in both rounds to make sure it formats correctly diff --git a/util/group_by.py b/util/group_by.py new file mode 100644 index 000000000..ab24641aa --- /dev/null +++ b/util/group_by.py @@ -0,0 +1,9 @@ +import itertools + + +def group_by_iter(xs, key=None): + return itertools.groupby(sorted(xs, key=key), key=key) + + +def group_by(xs, key=None): + return {k: list(vs) for k, vs in group_by_iter(xs, key=key)}