Skip to content

Commit

Permalink
Merge branch 'main' into jeremy/issue-161-radiology
Browse files Browse the repository at this point in the history
  • Loading branch information
jeremyestein committed Jan 9, 2024
2 parents e9413cd + 22aa62c commit e52f023
Show file tree
Hide file tree
Showing 17 changed files with 95 additions and 58 deletions.
53 changes: 50 additions & 3 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ jobs:

- name: Install Python dependencies
run: |
pip install pixl_core/[test]
pip install pixl_core/[test]
- name: Run tests
working-directory: pixl_core/tests
Expand Down Expand Up @@ -143,10 +143,33 @@ jobs:
ehr-api-tests:
runs-on: ubuntu-22.04
timeout-minutes: 15
timeout-minutes: 30
steps:
- uses: actions/checkout@v3

- uses: docker/setup-buildx-action@v3
# pre-build and cache the postgres container as installing python3 takes a while, doesn't push
- name: Cache Docker layers
uses: actions/cache@v3
with:
path: /tmp/.buildx-cache
key: ${{ runner.os }}-buildx-${{ github.sha }}
restore-keys: |
${{ runner.os }}-buildx-
- uses: docker/build-push-action@v5
with:
context: .
file: docker/postgres/Dockerfile
cache-from: type=local,src=/tmp/.buildx-cache
cache-to: type=local,dest=/tmp/.buildx-cache-new,mode=max
- # Temp fix
# https://github.com/docker/build-push-action/issues/252
# https://github.com/moby/buildkit/issues/1896
name: Move cache
run: |
rm -rf /tmp/.buildx-cache
mv /tmp/.buildx-cache-new /tmp/.buildx-cache
- name: Init Python
uses: actions/setup-python@v4
with:
Expand Down Expand Up @@ -184,11 +207,35 @@ jobs:
./run-tests.sh
system-test:
if: '! github.event.pull_request.draft'
runs-on: ubuntu-22.04
timeout-minutes: 15
timeout-minutes: 30
steps:
- uses: actions/checkout@v3

- uses: docker/setup-buildx-action@v3
# pre-build and cache the postgres container as installing python3 takes a while, doesn't push
- name: Cache Docker layers
uses: actions/cache@v3
with:
path: /tmp/.buildx-cache
key: ${{ runner.os }}-buildx-${{ github.sha }}
restore-keys: |
${{ runner.os }}-buildx-
- uses: docker/build-push-action@v5
with:
context: .
file: docker/postgres/Dockerfile
cache-from: type=local,src=/tmp/.buildx-cache
cache-to: type=local,dest=/tmp/.buildx-cache-new,mode=max
- # Temp fix
# https://github.com/docker/build-push-action/issues/252
# https://github.com/moby/buildkit/issues/1896
name: Move cache
run: |
rm -rf /tmp/.buildx-cache
mv /tmp/.buildx-cache-new /tmp/.buildx-cache
- name: Init Python
uses: actions/setup-python@v4
with:
Expand Down
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,4 +29,4 @@ repos:
pass_filenames: false
entry: mypy
args: ['--config-file=setup.cfg']
additional_dependencies: ['mypy', 'types-PyYAML', 'types-requests', 'types-python-slugify']
additional_dependencies: ['mypy', 'types-PyYAML', 'types-requests', 'types-python-slugify', 'types-psycopg2']
14 changes: 9 additions & 5 deletions cli/src/pixl_cli/_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

import json
from datetime import datetime
from pathlib import Path
from typing import TYPE_CHECKING

import pandas as pd
from core.omop import ParquetExport
Expand All @@ -25,6 +25,9 @@
from pixl_cli._logging import logger
from pixl_cli._utils import string_is_non_empty

if TYPE_CHECKING:
from pathlib import Path


def messages_from_state_file(filepath: Path) -> list[Message]:
"""
Expand All @@ -40,9 +43,7 @@ def messages_from_state_file(filepath: Path) -> list[Message]:
msg = f"Invalid file suffix for {filepath}. Expected .state"
raise ValueError(msg)

return [
deserialise(line) for line in Path.open(filepath).readlines() if string_is_non_empty(line)
]
return [deserialise(line) for line in filepath.open().readlines() if string_is_non_empty(line)]


def copy_parquet_return_logfile_fields(parquet_path: Path) -> tuple[str, datetime]:
Expand Down Expand Up @@ -124,8 +125,11 @@ def _check_and_parse_parquet(private_dir: Path, public_dir: Path) -> pd.DataFram
accessions = pd.read_parquet(private_dir / "PROCEDURE_OCCURRENCE_LINKS.parquet")
# study_date is in procedure.procedure_date
procedure = pd.read_parquet(public_dir / "PROCEDURE_OCCURRENCE.parquet")
# TODO: move from hardcoded ng tube and chest x-rays to being configurable # noqa: FIX002
# https://github.com/UCLH-Foundry/PIXL/issues/212
imaging_procedures = procedure.loc[procedure.procedure_concept_id.isin([4163872, 42538241])]
# joining data together
people_procedures = people.merge(procedure, on="person_id")
people_procedures = people.merge(imaging_procedures, on="person_id")
return people_procedures.merge(accessions, on="procedure_occurrence_id")


Expand Down
2 changes: 1 addition & 1 deletion cli/src/pixl_cli/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

def clear_file(filepath: Path) -> None:
"""Clear the contents of a file"""
Path.open(filepath, "w").close()
filepath.open("w").close()


def string_is_non_empty(string: str) -> bool:
Expand Down
2 changes: 1 addition & 1 deletion cli/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def db_engine(monkeymodule) -> Engine:
:returns Engine: Engine for use in other setup fixtures
"""
# SQLite doesnt support schemas, so remove pixl schema from engine options
execution_options = {"schema_translate_map": {"pixl": None}}
execution_options = {"schema_translate_map": {"pipeline": None}}
engine = create_engine(
"sqlite:///:memory:",
execution_options=execution_options,
Expand Down
Binary file not shown.
Binary file modified cli/tests/resources/omop/public/PROCEDURE_OCCURRENCE.parquet
Binary file not shown.
34 changes: 9 additions & 25 deletions cli/tests/test_messages_from_parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,9 @@

def test_messages_from_parquet(resources: Path) -> None:
"""
Given a valid OMOP ES extract directory that has had the logfile parsed
Given a valid OMOP ES extract with 4 procedures, two of which are x-rays.
When the messages are generated from the directory and the output of logfile parsing
Then the messages should match expected values
Then two messages should be generated
"""
# Arrange
omop_parquet_dir = resources / "omop"
Expand All @@ -39,35 +39,19 @@ def test_messages_from_parquet(resources: Path) -> None:
assert all(isinstance(msg, Message) for msg in messages)

expected_messages = [
Message(
mrn="12345678",
accession_number="12345678",
study_date=datetime.date.fromisoformat("2021-07-01"),
procedure_occurrence_id=1,
project_name="test-extract-uclh-omop-cdm",
omop_es_timestamp=datetime.datetime.fromisoformat("2023-12-07T14:08:58"),
),
Message(
mrn="12345678",
accession_number="ABC1234567",
study_date=datetime.date.fromisoformat("2021-07-01"),
procedure_occurrence_id=2,
project_name="test-extract-uclh-omop-cdm",
omop_es_timestamp=datetime.datetime.fromisoformat("2023-12-07T14:08:58"),
),
Message(
mrn="987654321",
accession_number="ABC1234560",
study_date=datetime.date.fromisoformat("2020-05-01"),
procedure_occurrence_id=3,
accession_number="AA12345601",
study_date=datetime.date.fromisoformat("2020-05-23"),
procedure_occurrence_id=4,
project_name="test-extract-uclh-omop-cdm",
omop_es_timestamp=datetime.datetime.fromisoformat("2023-12-07T14:08:58"),
),
Message(
mrn="5020765",
accession_number="MIG0234560",
study_date=datetime.date.fromisoformat("2015-05-01"),
procedure_occurrence_id=4,
mrn="987654321",
accession_number="AA12345605",
study_date=datetime.date.fromisoformat("2020-05-23"),
procedure_occurrence_id=5,
project_name="test-extract-uclh-omop-cdm",
omop_es_timestamp=datetime.datetime.fromisoformat("2023-12-07T14:08:58"),
),
Expand Down
16 changes: 6 additions & 10 deletions docker/postgres/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -11,28 +11,24 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
FROM postgres:14-bullseye
FROM postgres:16-bookworm

# OS setup
RUN export DEBIAN_FRONTEND=noninteractive && \
apt-get update && \
apt-get install --yes --no-install-recommends procps ca-certificates locales build-essential zlib1g-dev libncurses5-dev libgdbm-dev libnss3-dev libssl-dev libreadline-dev libffi-dev libsqlite3-dev wget libbz2-dev && \
apt-get install --yes --no-install-recommends procps ca-certificates locales python3 python3-pip python3.11-venv && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
RUN sed -i '/en_GB.UTF-8/s/^# //g' /etc/locale.gen && locale-gen
RUN wget https://www.python.org/ftp/python/3.10.0/Python-3.10.0.tgz && \
tar -xvf Python-3.10.0.tgz && \
cd Python-3.10.0 && \
./configure --enable-optimizations && \
make -j 2 && \
make altinstall

COPY --chmod=0755 ./postgres/postgres.conf /etc/postgresql/postgresql.conf

COPY --chmod=0777 ./postgres/pixl-db_init.sh /docker-entrypoint-initdb.d/pixl-db_init.sh

COPY ./postgres/create_pixl_tbls.py /pixl/create_pixl_tbls.py
COPY --chmod=0777 ./postgres/create_pixl_tbls.py /pixl/create_pixl_tbls.py

COPY ./pixl_core/ /pixl/pixl_core

RUN python3.10 -m pip install /pixl/pixl_core/
RUN python3 -m venv /pixl/venv \
&& cd /pixl/venv/bin \
&& ./pip install /pixl/pixl_core/
4 changes: 2 additions & 2 deletions orthanc/orthanc-anon/plugin/pixl.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,15 +237,15 @@ def AnonymiseCallback(dataset):
orthanc.LogWarning("Removed overlays")

# Apply anonymisation.
with Path.open("/etc/orthanc/tag-operations.yaml") as file:
with Path("/etc/orthanc/tag-operations.yaml").open() as file:
# Load tag operations scheme from YAML.
tags = yaml.safe_load(file)
# Apply scheme to instance
dataset = pixl_dcmd.apply_tag_scheme(dataset, tags)
# Apply whitelist
dataset = pixl_dcmd.enforce_whitelist(dataset, tags)

# Write anoymised instance to disk.
# Write anonymised instance to disk.
return orthanc.ReceivedInstanceAction.MODIFY, pixl_dcmd.write_dataset_to_bytes(dataset)


Expand Down
2 changes: 1 addition & 1 deletion pixl_core/src/core/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
class Base(DeclarativeBase):
"""sqlalchemy base class"""

metadata = MetaData(schema="pixl")
metadata = MetaData(schema="pipeline")


class Extract(Base):
Expand Down
4 changes: 2 additions & 2 deletions pixl_core/src/core/patient_queue/subscriber.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@

import asyncio
import logging
from pathlib import Path
from typing import TYPE_CHECKING, Any

import aio_pika
Expand All @@ -27,6 +26,7 @@

if TYPE_CHECKING:
from collections.abc import Awaitable, Callable
from pathlib import Path

from typing_extensions import Self

Expand Down Expand Up @@ -112,7 +112,7 @@ def consume_all(self, file_path: Path, timeout_in_seconds: int = 5) -> int:
def callback(method: Any, properties: Any, body: Any) -> None: # noqa: ARG001
"""Consume to file."""
try:
with Path.open(file_path, "a") as csv_file:
with file_path.open("a") as csv_file:
print(str(body.decode()), file=csv_file)
except: # noqa: E722
logger.exception("Failed to consume")
Expand Down
7 changes: 5 additions & 2 deletions pixl_ehr/src/pixl_ehr/_queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,17 @@
# limitations under the License.
from __future__ import annotations

from pathlib import Path
from typing import TYPE_CHECKING

if TYPE_CHECKING:
from pathlib import Path


class SQLQuery:
def __init__(self, filepath: Path, context: dict) -> None:
self.values: list[str] = []
self._filepath = filepath
self._lines = Path.open(filepath).readlines()
self._lines = filepath.open().readlines()
self._replace_placeholders_and_populate_values(context)

def __str__(self) -> str:
Expand Down
2 changes: 1 addition & 1 deletion postgres/create_pixl_tbls.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
engine = create_engine(url, echo=True, echo_pool="debug")

with engine.connect() as connection:
connection.execute(CreateSchema("pixl", if_not_exists=True))
connection.execute(CreateSchema("pipeline", if_not_exists=True))
connection.commit()

Base.metadata.create_all(engine)
4 changes: 3 additions & 1 deletion postgres/pixl-db_init.sh
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,6 @@ ehr_create_command="CREATE SCHEMA emap_data AUTHORIZATION ${POSTGRES_USER}
"
psql -U "${POSTGRES_USER}" --dbname "${POSTGRES_DB}" -c "$ehr_create_command"

python3.10 /pixl/create_pixl_tbls.py
source /pixl/venv/bin/activate

python3 /pixl/create_pixl_tbls.py
5 changes: 3 additions & 2 deletions scripts/filter_cohort_for_those_present_in_raw.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import os
import sys
from json import JSONDecodeError
from pathlib import Path
from typing import Any

import requests
Expand Down Expand Up @@ -91,8 +92,8 @@ def _deserialise(response: requests.Response) -> Any:
present_accession_numbers = [orthanc.accession_number(s) for s in orthanc.studies]
print(f"Found {len(present_accession_numbers)} total studies")

with Path.open(filename) as file:
with Path.open(f"{filename.rstrip('.csv')}_filtered.csv", "w") as new_file:
with Path(filename).open() as file:
with Path(f"{filename.rstrip('.csv')}_filtered.csv").open("w") as new_file:
for line in file:
if any(a in line for a in present_accession_numbers):
continue
Expand Down
2 changes: 1 addition & 1 deletion test/scripts/check_entry_in_pixl_anon.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,4 @@ set -euxo pipefail
_sql_command="select * from emap_data.ehr_anon"
_result=$(docker exec system-test-postgres-1 /bin/bash -c \
"PGPASSWORD=pixl_db_password psql -U pixl_db_username -d pixl -c \"$_sql_command\"")
echo "$_result" | grep -q "6 row"
echo "$_result" | grep -q "2 row"

0 comments on commit e52f023

Please sign in to comment.