Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add migration to age buckets, fixes #2914 #2918

Merged
merged 3 commits into from
Oct 26, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions .github/workflows/manual-migration-helpers.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
name: Manual migration helpers CI

on:
push:
branches: [main]
paths:
- '.github/workflows/manual-migration-helpers.yml'
- 'data-serving/scripts/setup-db/manual-migration-helpers/**'
- '!data-serving/scripts/setup-db/manual-migration-helpers/README.md'
pull_request:
paths:
- '.github/workflows/manual-migration-helpers.yml'
- 'data-serving/scripts/setup-db/manual-migration-helpers/**'
- '!data-serving/scripts/setup-db/manual-migration-helpers/README.md'

jobs:
ci:
runs-on: ubuntu-20.04
steps:
- uses: actions/checkout@v3
- name: Test with docker
working-directory: data-serving/scripts/setup-db/manual-migration-helpers
run: ./test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
# `python-base` sets up all our shared environment variables
FROM python:3.10-slim as python-base

ENV PYTHONUNBUFFERED=1 \
# prevents python creating .pyc files
PYTHONDONTWRITEBYTECODE=1 \
\
PIP_NO_CACHE_DIR=off \
PIP_DISABLE_PIP_VERSION_CHECK=on \
PIP_DEFAULT_TIMEOUT=100 \
\
# https://python-poetry.org/docs/configuration/#using-environment-variables
POETRY_VERSION=1.2.2 \
# make poetry install to this location
POETRY_HOME="/opt/poetry" \
# make poetry create the virtual environment in the project's root
# it gets named `.venv`
POETRY_VIRTUALENVS_IN_PROJECT=true \
# do not ask any interactive question
POETRY_NO_INTERACTION=1 \
\
# this is where our requirements + virtual environment will live
PYSETUP_PATH="/opt/pysetup" \
VENV_PATH="/opt/pysetup/.venv"

# prepend poetry and venv to path
ENV PATH="$POETRY_HOME/bin:$VENV_PATH/bin:$PATH"

# `builder-base` stage is used to build deps + create our virtual environment
FROM python-base as builder-base
RUN apt-get update \
&& apt-get install --no-install-recommends -y curl

# install poetry - respects $POETRY_VERSION & $POETRY_HOME
RUN curl -sSL https://install.python-poetry.org | python3 -

# copy project requirement files here to ensure they will be cached.
WORKDIR $PYSETUP_PATH
COPY poetry.lock pyproject.toml ./

# install runtime deps - uses $POETRY_VIRTUALENVS_IN_PROJECT internally
RUN poetry install --no-dev

# `development` image is used during development / testing
FROM python-base as development

RUN apt-get update && apt-get upgrade -y curl

WORKDIR $PYSETUP_PATH

# copy in our built poetry + venv
COPY --from=builder-base $POETRY_HOME $POETRY_HOME
COPY --from=builder-base $PYSETUP_PATH $PYSETUP_PATH

# quicker install as runtime deps are already installed
RUN poetry install

# will become mountpoint of our code
WORKDIR /app

COPY ./ ./

CMD ["./test.sh"]
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
version: "3.7"

services:
test:
build:
context: .
dockerfile: Dockerfile-test
mongo:
image: mongo:5.0
ports:
- "27017:27017"

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
[tool.poetry]
name = "age-buckets-transition"
version = "0.1.0"
description = "Manual migration script for age buckets transition"
authors = ["Global.health maintainers <info@global.health>"]
license = "MIT"

[tool.poetry.dependencies]
python = "^3.10"
pymongo = {extras = ["srv"], version = "^4.3.2"}
tqdm = "^4.64.1"

[tool.poetry.dev-dependencies]
pytest = "^7.1.3"

[build-system]
requires = ["poetry-core>=1.0.0"]
build-backend = "poetry.core.masonry.api"
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
"""
Manual migration script for age bucket transition

This script deploys the data transition from
demographics.ageRange.{start,end} to demographics.ageBuckets. The age
buckets are defined in an ageBuckets collection in the DB that is created
using an automatic migration. While this defines the age buckets, it does
not alter the currently existing information in the database which still
uses ageRange. To manually transition the data, this reads in each case and
uses the ageBuckets collection to figure out the buckets and write them to
the database.
"""

import os
import logging
from typing import Hashable

import pymongo
from tqdm import tqdm

DEFAULT_DB = "covid19"


def find_age_buckets(
start: int, end: int, age_buckets: dict[Hashable, tuple[int, int]]
) -> list[Hashable]:
return [
bucket
for (bucket, (bstart, bend)) in age_buckets.items()
if (bstart <= start <= bend)
or (bstart <= end <= bend)
or (bstart > start and bend < end)
]


def migrate_age_buckets(db, collection: str = "cases"):
age_buckets = {
record["_id"]: (record["start"], record["end"])
for record in db.ageBuckets.find()
}
assert age_buckets
for case in tqdm(
db[collection].find({"list": True, "demographics.ageRange": {"$exists": True}})
):
db[collection].find_one_and_update(
{"_id": case["_id"]},
{
"$set": {
"demographics.ageBuckets": find_age_buckets(
int(case["demographics"]["ageRange"]["start"]),
int(case["demographics"]["ageRange"]["end"]),
age_buckets,
)
},
"$unset": {"demographics.ageRange": ""},
},
)


if __name__ == "__main__":
try:
if (CONN := os.getenv("CONN")):
client = pymongo.MongoClient(CONN)
else:
client = pymongo.MongoClient()
except Exception as e:
logging.error(e)
raise

db = client[os.getenv("DB_NAME", DEFAULT_DB)]
migrate_age_buckets(db)
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#!/bin/bash

set -eou pipefail

DOCKERIZED=1 poetry run pytest .
echo "Tests and code quality checks passed"
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#!/bin/bash

set -eo pipefail

pushd "$(dirname $0)"

function cleanup() {
docker compose -f docker-compose-test.yml stop
docker compose -f docker-compose-test.yml down -v --remove-orphans
popd
}

trap cleanup EXIT

docker compose -f docker-compose-test.yml up --build --exit-code-from test
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
import os

import pytest
import pymongo

import run

AGE_BUCKETS = [
{"_id": "0", "start": 0, "end": 0},
{"_id": "1-5", "start": 1, "end": 5},
{"_id": "6-10", "start": 6, "end": 10},
{"_id": "11-15", "start": 11, "end": 15},
{"_id": "16-20", "start": 16, "end": 20},
{"_id": "21-25", "start": 21, "end": 25},
{"_id": "26-30", "start": 26, "end": 30},
{"_id": "31-35", "start": 31, "end": 35},
{"_id": "36-40", "start": 36, "end": 40},
{"_id": "41-45", "start": 41, "end": 45},
{"_id": "46-50", "start": 46, "end": 50},
{"_id": "51-55", "start": 51, "end": 55},
{"_id": "56-60", "start": 56, "end": 60},
{"_id": "61-65", "start": 61, "end": 65},
{"_id": "66-70", "start": 66, "end": 70},
{"_id": "71-75", "start": 71, "end": 75},
{"_id": "76-80", "start": 76, "end": 80},
{"_id": "81-85", "start": 81, "end": 85},
{"_id": "86-90", "start": 86, "end": 90},
{"_id": "91-95", "start": 91, "end": 95},
{"_id": "96-100", "start": 96, "end": 100},
{"_id": "101-105", "start": 101, "end": 105},
{"_id": "106-110", "start": 106, "end": 110},
{"_id": "111-115", "start": 111, "end": 115},
{"_id": "116-120", "start": 116, "end": 120},
]

AGES = [(60, 60), (72, 80), (70, 79), (130, 140)]


@pytest.fixture
def db():
client = pymongo.MongoClient(host="mongo")
return client.covid19


@pytest.fixture
def age_buckets(db):
db.ageBuckets.drop()
db.ageBuckets.insert_many(AGE_BUCKETS)
return {
record["_id"]: (record["start"], record["end"])
for record in db.ageBuckets.find()
}


@pytest.fixture
def setup_cases(db):
db.cases.drop()
db.cases.insert_many(
[
{"list": True, "demographics": {"ageRange": {"start": start, "end": end}}}
for start, end in AGES
]
)


@pytest.mark.skipif(
os.getenv("DOCKERIZED") is None,
reason="Test disabled outside dockerized environment",
)
@pytest.mark.parametrize(
"age_limits,expected",
[
((60, 60), ["56-60"]),
((72, 80), ["71-75", "76-80"]),
((70, 79), ["66-70", "71-75", "76-80"]),
((130, 140), []),
],
)
def test_find_age_buckets(age_buckets, age_limits, expected):
assert run.find_age_buckets(*age_limits, age_buckets) == expected


@pytest.mark.skipif(
os.getenv("DOCKERIZED") is None,
reason="Test disabled outside dockerized environment",
)
def test_migrate_age_buckets(db, setup_cases):
run.migrate_age_buckets(db)
# no demographics.ageRange should be present
assert not list(
db.cases.find({"list": True, "demographics.ageRange": {"$exists": True}})
)
assert [case["demographics"]["ageBuckets"] for case in db.cases.find()] == [
["56-60"],
["71-75", "76-80"],
["66-70", "71-75", "76-80"],
[],
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#!/bin/bash
set -eou pipefail
echo "==> Testing age bucket transition"
./age-buckets-transition/test_docker.sh
2 changes: 1 addition & 1 deletion data-serving/scripts/setup-db/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
"lint": "tsc --noEmit && eslint '*/**/*.{js,ts,tsx}' --quiet --fix",
"import-sample-data": "python3 ./import-sample-data.py",
"migrate": "npm ci && migrate-mongo up",
"delete-all-cases": "mongo $CONN --eval 'db.cases.deleteMany({})'"
"delete-all-cases": "mongosh $CONN --eval 'db.cases.deleteMany({})'"
},
"repository": {
"type": "git",
Expand Down