Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GPL-740 new fit to pick control rules #170

Merged
merged 20 commits into from
Dec 11, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 20 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,15 @@ in the below relevant section.
The implementation of the current version can be found in [FilteredPositiveIdentifier](./crawler/filtered_positive_identifier.py),
with the implementation of previous versions (if any) in the git history.

#### Version 1 `v1` - **Current Version**
#### Version 0 `v0`

A sample is filtered positive if:

* it has a positive RESULT

This is the pre-"fit-to-pick" implementation, without any extra filtering on top of the RESULT=Positive requirement.

#### Version 1 `v1`

A sample is filtered positive if:

Expand All @@ -115,6 +123,17 @@ A sample is filtered positive if:
More information on this version can be found on [this](https://ssg-confluence.internal.sanger.ac.uk/display/PSDPUB/UAT+6th+October+2020)
Confluence page.

#### Version 2 `v2` - **Current Version**

A sample is filtered positive if:

* it has a 'Positive' RESULT
* it is not a control (ROOT_SAMPLE_ID does not start with 'CBIQA_', 'QC0', or 'ZZA000')
* all of CH1_CQ, CH2_CQ and CH3_CQ are `None`, or one of these is less than or equal to 30

More information on this version can be found on [this](https://ssg-confluence.internal.sanger.ac.uk/display/PSDPUB/Fit+to+pick+-+v2)
Confluence page.

#### Propagating Filtered Positive version changes to MongoDB, MLWH and DART

On changing the positive filtering version/definition, all unpicked samples stored in MongoDB, MLWH and DART need
Expand Down
3 changes: 2 additions & 1 deletion crawler/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,9 +87,10 @@

# positive Result value
POSITIVE_RESULT_VALUE = "Positive"
LIMIT_OF_DETECTION_RESULT_VALUE = "limit of detection"

# allowed Result field values
ALLOWED_RESULT_VALUES = (POSITIVE_RESULT_VALUE, "Negative", "limit of detection", "Void")
ALLOWED_RESULT_VALUES = (POSITIVE_RESULT_VALUE, "Negative", LIMIT_OF_DETECTION_RESULT_VALUE, "Void")

# allowed CT channel CHn-Target field values (or can be null)
ALLOWED_CH_TARGET_VALUES = ("ORF1ab", "N gene", "S gene", "MS2")
Expand Down
11 changes: 5 additions & 6 deletions crawler/file_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@
run_mysql_executemany_query,
set_dart_well_properties,
)
from crawler.filtered_positive_identifier import FilteredPositiveIdentifier
from crawler.filtered_positive_identifier import current_filtered_positive_identifier
from crawler.helpers.general_helpers import (
current_time,
get_dart_well_index,
Expand Down Expand Up @@ -254,7 +254,7 @@ class CentreFile:
FIELD_CH4_CQ,
}

filtered_positive_identifier = FilteredPositiveIdentifier()
filtered_positive_identifier = current_filtered_positive_identifier()

def __init__(self, file_name, centre):
"""Initialiser for the class representing the file
Expand Down Expand Up @@ -1005,10 +1005,9 @@ def parse_and_format_row(self, row, line_number, seen_rows) -> Any:
modified_row[FIELD_UPDATED_AT] = import_timestamp

# filtered-positive calculations
if modified_row[FIELD_RESULT] == POSITIVE_RESULT_VALUE:
modified_row[FIELD_FILTERED_POSITIVE] = self.filtered_positive_identifier.is_positive(modified_row)
modified_row[FIELD_FILTERED_POSITIVE_VERSION] = self.filtered_positive_identifier.current_version()
modified_row[FIELD_FILTERED_POSITIVE_TIMESTAMP] = import_timestamp
modified_row[FIELD_FILTERED_POSITIVE] = self.filtered_positive_identifier.is_positive(modified_row)
modified_row[FIELD_FILTERED_POSITIVE_VERSION] = self.filtered_positive_identifier.version
modified_row[FIELD_FILTERED_POSITIVE_TIMESTAMP] = import_timestamp

# add lh sample uuid
modified_row[FIELD_LH_SAMPLE_UUID] = str(uuid.uuid4())
Expand Down
90 changes: 60 additions & 30 deletions crawler/filtered_positive_identifier.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from abc import ABC
import decimal
import re

Expand All @@ -14,23 +15,20 @@
from crawler.types import Sample


class FilteredPositiveIdentifier:
# record/reference all versions and definitions here
versions = [
"v1", # initial implementation, as per GPL-669
]
result_regex = re.compile(f"^{POSITIVE_RESULT_VALUE}", re.IGNORECASE)
root_sample_id_control_regex = re.compile("^CBIQA_")
ct_value_limit = decimal.Decimal(30)
d128_context = create_decimal128_context()
# record/reference all versions and definitions here
FILTERED_POSITIVE_VERSION_0 = "v0" # pre-filtered_positive definitions
FILTERED_POSITIVE_VERSION_1 = "v1" # initial implementation, as per GPL-669
FILTERED_POSITIVE_VERSION_2 = "v2" # updated as per GPL-699 and GPL-740

def current_version(self) -> str:
"""Returns the current version of the identifier.

Returns:
{str} - the version number
"""
return self.versions[-1]
class FilteredPositiveIdentifier(ABC):
def __init__(self):
self.version = None
self.ct_value_limit = decimal.Decimal(30)
Chris-Friend marked this conversation as resolved.
Show resolved Hide resolved
self.d128_context = create_decimal128_context()
self.result_regex = re.compile(f"^{POSITIVE_RESULT_VALUE}", re.IGNORECASE)
self.root_sample_id_control_regex = None
self.evaluate_ct_values = False

def is_positive(self, sample: Sample) -> bool:
"""Determines whether a sample is a filtered positive.
Expand All @@ -44,23 +42,55 @@ def is_positive(self, sample: Sample) -> bool:
if self.result_regex.match(sample[FIELD_RESULT]) is None:
return False

if self.root_sample_id_control_regex.match(sample[FIELD_ROOT_SAMPLE_ID]) is not None:
if (
self.root_sample_id_control_regex
and self.root_sample_id_control_regex.match(sample[FIELD_ROOT_SAMPLE_ID]) is not None
):
return False

ch1_cq = sample.get(FIELD_CH1_CQ)
ch2_cq = sample.get(FIELD_CH2_CQ)
ch3_cq = sample.get(FIELD_CH3_CQ)
if self.evaluate_ct_values:
ch1_cq = sample.get(FIELD_CH1_CQ)
ch2_cq = sample.get(FIELD_CH2_CQ)
ch3_cq = sample.get(FIELD_CH3_CQ)

if ch1_cq is None and ch2_cq is None and ch3_cq is None:
return True

if ch1_cq is None and ch2_cq is None and ch3_cq is None:
with decimal.localcontext(self.d128_context):
# type check before attempting to convert to decimal
if ch1_cq is not None and ch1_cq.to_decimal() <= self.ct_value_limit:
return True
elif ch2_cq is not None and ch2_cq.to_decimal() <= self.ct_value_limit:
return True
elif ch3_cq is not None and ch3_cq.to_decimal() <= self.ct_value_limit:
return True
else:
return False
else:
return True

with decimal.localcontext(self.d128_context):
# type check before attempting to convert to decimal
if ch1_cq is not None and ch1_cq.to_decimal() <= self.ct_value_limit:
return True
elif ch2_cq is not None and ch2_cq.to_decimal() <= self.ct_value_limit:
return True
elif ch3_cq is not None and ch3_cq.to_decimal() <= self.ct_value_limit:
return True
else:
return False

def current_filtered_positive_identifier() -> FilteredPositiveIdentifier:
return FilteredPositiveIdentifierV2()


class FilteredPositiveIdentifierV0(FilteredPositiveIdentifier):
def __init__(self):
super(FilteredPositiveIdentifierV0, self).__init__()
self.version = FILTERED_POSITIVE_VERSION_0


class FilteredPositiveIdentifierV1(FilteredPositiveIdentifier):
def __init__(self):
super(FilteredPositiveIdentifierV1, self).__init__()
self.version = FILTERED_POSITIVE_VERSION_1
self.root_sample_id_control_regex = re.compile("^CBIQA_")
self.evaluate_ct_values = True


class FilteredPositiveIdentifierV2(FilteredPositiveIdentifier):
def __init__(self):
super(FilteredPositiveIdentifierV2, self).__init__()
self.version = FILTERED_POSITIVE_VERSION_2
self.root_sample_id_control_regex = re.compile("^(?:CBIQA_|QC0|ZZA000)")
self.evaluate_ct_values = True
6 changes: 3 additions & 3 deletions migrations/helpers/dart_samples_update_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
get_mongo_db,
run_mysql_executemany_query,
)
from crawler.filtered_positive_identifier import FilteredPositiveIdentifier
from crawler.filtered_positive_identifier import current_filtered_positive_identifier
from crawler.helpers.general_helpers import map_mongo_doc_to_sql_columns
from crawler.sql_queries import SQL_MLWH_MULTIPLE_INSERT
from crawler.types import Sample, SourcePlate
Expand Down Expand Up @@ -121,8 +121,8 @@ def migrate_all_dbs(config, s_start_datetime: str = "", s_end_datetime: str = ""
logger.info(f"{len(samples)} samples between these timestamps and not cherry-picked")

# 3. add the relevant filtered positive fields if not present
filtered_positive_identifier = FilteredPositiveIdentifier()
version = filtered_positive_identifier.current_version()
filtered_positive_identifier = current_filtered_positive_identifier()
version = filtered_positive_identifier.version
update_timestamp = datetime.now()

"""
Expand Down
2 changes: 1 addition & 1 deletion migrations/helpers/update_filtered_positives_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ def update_filtered_positive_fields(
"""
logger.debug("Updating filtered positive fields")

version = filtered_positive_identifier.current_version()
version = filtered_positive_identifier.version

# Expect all samples to be passed into here to have a positive result
for sample in samples:
Expand Down
6 changes: 3 additions & 3 deletions migrations/update_filtered_positives.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import logging.config
from datetime import datetime

from crawler.filtered_positive_identifier import FilteredPositiveIdentifier
from crawler.filtered_positive_identifier import current_filtered_positive_identifier
from crawler.helpers.general_helpers import get_config

from migrations.helpers.update_filtered_positives_helper import (
Expand Down Expand Up @@ -47,8 +47,8 @@ def run(settings_module: str = "") -> None:
positive_pending_samples = positive_result_samples_from_mongo(config, pending_plate_barcodes)
if num_positive_pending_samples := len(positive_pending_samples):
logger.info(f"{num_positive_pending_samples} positive samples in pending plates found in Mongo")
filtered_positive_identifier = FilteredPositiveIdentifier()
version = filtered_positive_identifier.current_version()
filtered_positive_identifier = current_filtered_positive_identifier()
version = filtered_positive_identifier.version
update_timestamp = datetime.now()
logger.info("Updating filtered positives...")
update_filtered_positive_fields(
Expand Down
17 changes: 5 additions & 12 deletions tests/migrations/helpers/test_update_filtered_positives_helper.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from datetime import datetime
from unittest.mock import patch
from unittest.mock import MagicMock, patch

import pytest
from crawler.constants import (
Expand Down Expand Up @@ -56,14 +56,6 @@ def mock_mongo_collection():
yield mock_collection


@pytest.fixture
def mock_positive_identifier():
with patch("migrations.helpers.update_filtered_positives_helper.FilteredPositiveIdentifier") as mock_identifier:
mock_identifier.is_positive.return_value = True
mock_identifier.current_version.return_value = "v2.3"
yield mock_identifier


# ----- test pending_plate_barcodes_from_dart method -----


Expand Down Expand Up @@ -137,12 +129,13 @@ def test_positive_result_samples_from_mongo_returns_expected_samples(config, tes
# ----- test update_filtered_positive_fields method -----


def test_update_filtered_positive_fields_assigns_expected_filtered_positive_fields(
mock_positive_identifier,
):
def test_update_filtered_positive_fields_assigns_expected_filtered_positive_fields():
samples = [{}, {}]
timestamp = datetime.now()
version = "v2.3"
mock_positive_identifier = MagicMock()
mock_positive_identifier.is_positive.return_value = True
mock_positive_identifier.version = version

update_filtered_positive_fields(mock_positive_identifier, samples, version, timestamp)
for sample in samples:
Expand Down
5 changes: 4 additions & 1 deletion tests/test_file_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@
def centre_file_with_mocked_filtered_postitive_identifier(config, file_name):
centre = Centre(config, config.CENTRES[0])
centre_file = CentreFile(file_name, centre)
centre_file.filtered_positive_identifier.current_version = MagicMock(return_value="v2.3")
centre_file.filtered_positive_identifier.version = "v2.3"
centre_file.filtered_positive_identifier.is_positive = MagicMock(return_value=True)
return centre_file

Expand Down Expand Up @@ -561,6 +561,9 @@ def test_parse_and_format_file_rows_to_add_file_details(config):
"updated_at": timestamp,
"Result": "Negative",
"Lab ID": None,
"filtered_positive": True,
"filtered_positive_version": "v2.3",
"filtered_positive_timestamp": timestamp,
"lh_sample_uuid": str(test_uuid),
},
]
Expand Down
Loading