Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DIG-1247: Check that submitter_specimen_id refers to a valid specimen #26

Merged
merged 7 commits into from
Aug 17, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions mappings.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,9 +121,9 @@ def flat_list_val(data_values):
# Convert various responses to boolean
def boolean(data_values):
cell = single_val(data_values)
if cell is None or cell.lower() == "nan":
if cell is None or cell.lower().strip() == "nan":
return None
if cell.lower() == "no" or cell.lower == "false":
if cell.lower().strip() == "no" or cell.lower().strip() == "false":
return False
return True

Expand Down
16 changes: 8 additions & 8 deletions moh_template.csv
Original file line number Diff line number Diff line change
Expand Up @@ -58,9 +58,9 @@ DONOR.INDEX.primary_diagnoses.INDEX.specimens.INDEX.surgery.margin_types_not_inv
DONOR.INDEX.primary_diagnoses.INDEX.specimens.INDEX.surgery.margin_types_not_assessed.INDEX, {indexed_on(SPECIMENS_SHEET.margin_types_not_assessed)}
DONOR.INDEX.primary_diagnoses.INDEX.specimens.INDEX.surgery.lymphovascular_invasion, {single_val(SPECIMENS_SHEET.lymphovascular_invasion)}
DONOR.INDEX.primary_diagnoses.INDEX.specimens.INDEX.surgery.perineural_invasion, {single_val(SPECIMENS_SHEET.perineural_invasion)}
DONOR.INDEX.primary_diagnoses.INDEX.specimens.INDEX.surgery.tumour_length, {single_val(SPECIMENS_SHEET.tumour_length)}
DONOR.INDEX.primary_diagnoses.INDEX.specimens.INDEX.surgery.tumour_width, {single_val(SPECIMENS_SHEET.tumour_width)}
DONOR.INDEX.primary_diagnoses.INDEX.specimens.INDEX.surgery.greatest_dimension_tumour, {single_val(SPECIMENS_SHEET.greatest_dimension_tumour)}
DONOR.INDEX.primary_diagnoses.INDEX.specimens.INDEX.surgery.tumour_length, {integer(SPECIMENS_SHEET.tumour_length)}
DONOR.INDEX.primary_diagnoses.INDEX.specimens.INDEX.surgery.tumour_width, {integer(SPECIMENS_SHEET.tumour_width)}
DONOR.INDEX.primary_diagnoses.INDEX.specimens.INDEX.surgery.greatest_dimension_tumour, {integer(SPECIMENS_SHEET.greatest_dimension_tumour)}
DONOR.INDEX.primary_diagnoses.INDEX.specimens.INDEX.surgery.submitter_specimen_id, {single_val(SPECIMENS_SHEET.submitter_specimen_id)}
DONOR.INDEX.primary_diagnoses.INDEX.specimens.INDEX.sample_registrations.INDEX, {indexed_on(SAMPLE_REGISTRATIONS_SHEET.submitter_specimen_id)}
DONOR.INDEX.primary_diagnoses.INDEX.specimens.INDEX.sample_registrations.INDEX.submitter_sample_id, {single_val(SAMPLE_REGISTRATIONS_SHEET.submitter_sample_id)}
Expand Down Expand Up @@ -90,8 +90,8 @@ DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.chemotherapies.INDEX.drug_r
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.chemotherapies.INDEX.prescribed_cumulative_drug_dose, {integer(CHEMOTHERAPIES_SHEET.prescribed_cumulative_drug_dose)}
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.chemotherapies.INDEX.actual_cumulative_drug_dose, {integer(CHEMOTHERAPIES_SHEET.actual_cumulative_drug_dose)}
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.hormone_therapies.INDEX, {indexed_on(HORMONE_THERAPIES_SHEET.submitter_treatment_id)}
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.hormone_therapies.INDEX.hormone_drug_dose_units, {integer(HORMONE_THERAPIES_SHEET.hormone_drug_dose_units)}
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.hormone_therapies.INDEX.drug_reference_database, {integer(HORMONE_THERAPIES_SHEET.drug_reference_database)}
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.hormone_therapies.INDEX.hormone_drug_dose_units, {single_val(HORMONE_THERAPIES_SHEET.hormone_drug_dose_units)}
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.hormone_therapies.INDEX.drug_reference_database, {single_val(HORMONE_THERAPIES_SHEET.drug_reference_database)}
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.hormone_therapies.INDEX.drug_name, {single_val(HORMONE_THERAPIES_SHEET.drug_name)}
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.hormone_therapies.INDEX.drug_reference_identifier, {single_val(HORMONE_THERAPIES_SHEET.drug_reference_identifier)}
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.hormone_therapies.INDEX.prescribed_cumulative_drug_dose, {integer(HORMONE_THERAPIES_SHEET.prescribed_cumulative_drug_dose)}
Expand All @@ -107,9 +107,9 @@ DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.immunotherapies.INDEX.actua
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.radiation.radiation_therapy_modality, {single_val(TREATMENTS_SHEET.radiation_therapy_modality)}
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.radiation.radiation_therapy_type, {single_val(TREATMENTS_SHEET.radiation_therapy_type)}
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.radiation.anatomical_site_irradiated, {single_val(TREATMENTS_SHEET.anatomical_site_irradiated)}
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.radiation.radiation_therapy_fractions, {single_val(TREATMENTS_SHEET.radiation_therapy_fractions)}
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.radiation.radiation_therapy_dosage, {single_val(TREATMENTS_SHEET.radiation_therapy_dosage)}
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.radiation.radiation_boost, {single_val(TREATMENTS_SHEET.radiation_boost)}
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.radiation.radiation_therapy_fractions, {integer(TREATMENTS_SHEET.radiation_therapy_fractions)}
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.radiation.radiation_therapy_dosage, {integer(TREATMENTS_SHEET.radiation_therapy_dosage)}
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.radiation.radiation_boost, {boolean(TREATMENTS_SHEET.radiation_boost)}
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.radiation.reference_radiation_treatment_id, {single_val(TREATMENTS_SHEET.reference_radiation_treatment_id)}
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.surgery.surgery_type, {single_val(TREATMENTS_SHEET.surgery_type)}
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.surgery.surgery_site, {single_val(TREATMENTS_SHEET.surgery_site)}
Expand Down
80 changes: 43 additions & 37 deletions mohschema.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def __str__(self):
def warn(message):
message = ">".join(STACK_LOCATION) + ": " + message
VALIDATION_MESSAGES.append(f"{message}")
raise MoHValidationError(message)
#raise MoHValidationError(message)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this be commented out?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I think it would be better to collate the warnings together and report them at the end, rather than error out when you reach any of them.



def fail(message):
Expand Down Expand Up @@ -359,13 +359,15 @@ def validate_primary_diagnosis(self, map_json):
if f not in map_json:
fail(f"{f} required for primary_diagnosis")

specimen_ids = []
is_tumour = False
# should either have a clinical staging system specified
# OR have a specimen with a pathological staging system specified
if "clinical_tumour_staging_system" in map_json:
is_tumour = True
elif "specimens" in map_json:
if "specimens" in map_json:
for specimen in map_json["specimens"]:
specimen_ids.append(specimen["submitter_specimen_id"])
if "pathological_tumour_staging_system" in specimen:
is_tumour = True

Expand All @@ -384,7 +386,7 @@ def validate_primary_diagnosis(self, map_json):
self.validate_specimen(specimen, "clinical_tumour_staging_system" in map_json)
case "treatments":
for treatment in map_json["treatments"]:
self.validate_treatment(treatment)
self.validate_treatment(treatment, specimen_ids)
case "biomarkers":
for biomarker in map_json["biomarkers"]:
self.validate_biomarker(biomarker, "submitter_primary_diagnosis_id", map_json["submitter_primary_diagnosis_id"])
Expand Down Expand Up @@ -502,7 +504,7 @@ def validate_followup(self, map_json):
STACK_LOCATION.pop()


def validate_treatment(self, map_json):
def validate_treatment(self, map_json, specimen_ids):
STACK_LOCATION.append(map_json['submitter_treatment_id'])
print(f"Validating schema for treatment {STACK_LOCATION[-1]}...")

Expand All @@ -524,37 +526,38 @@ def validate_treatment(self, map_json):
for prop in map_json:
match prop:
case "treatment_type":
match map_json["treatment_type"]:
case "Chemotherapy":
if "chemotherapies" not in map_json:
warn("treatment type Chemotherapy should have one or more chemotherapies submitted")
else:
for x in map_json["chemotherapies"]:
self.validate_chemotherapy(x)
case "Hormonal therapy":
if "hormone_therapies" not in map_json:
warn("treatment type Hormonal therapy should have one or more hormone_therapies submitted")
else:
for x in map_json["hormone_therapies"]:
self.validate_hormone_therapy(x)
case "Immunotherapy":
if "immunotherapies" not in map_json:
warn("treatment type Immunotherapy should have one or more immunotherapies submitted")
else:
for x in map_json["immunotherapies"]:
self.validate_immunotherapy(x)
case "Radiation therapy":
if "radiation" not in map_json:
warn("treatment type Radiation therapy should have one or more radiation submitted")
else:
for x in map_json["radiation"]:
self.validate_radiation(x)
case "Surgery":
if "surgery" not in map_json:
warn("treatment type Surgery should have one or more surgery submitted")
else:
for x in map_json["surgery"]:
self.validate_surgery(x)
for type in map_json["treatment_type"]:
match type:
case "Chemotherapy":
if "chemotherapies" not in map_json:
warn("treatment type Chemotherapy should have one or more chemotherapies submitted")
else:
for x in map_json["chemotherapies"]:
self.validate_chemotherapy(x)
case "Hormonal therapy":
if "hormone_therapies" not in map_json:
warn("treatment type Hormonal therapy should have one or more hormone_therapies submitted")
else:
for x in map_json["hormone_therapies"]:
self.validate_hormone_therapy(x)
case "Immunotherapy":
if "immunotherapies" not in map_json:
warn("treatment type Immunotherapy should have one or more immunotherapies submitted")
else:
for x in map_json["immunotherapies"]:
self.validate_immunotherapy(x)
case "Radiation therapy":
if "radiation" not in map_json:
warn("treatment type Radiation therapy should have one or more radiation submitted")
else:
for x in map_json["radiation"]:
self.validate_radiation(x)
case "Surgery":
if "surgery" not in map_json:
warn("treatment type Surgery should have one or more surgery submitted")
else:
for x in map_json["surgery"]:
self.validate_surgery(x, specimen_ids)
case "followups":
for followup in map_json["followups"]:
self.validate_followup(followup)
Expand Down Expand Up @@ -660,11 +663,10 @@ def validate_radiation(self, map_json):
STACK_LOCATION.pop()


def validate_surgery(self, map_json):
def validate_surgery(self, map_json, specimen_ids):
STACK_LOCATION.append(f"surgery {STACK_LOCATION[-1]}")
print(f"Validating schema for {STACK_LOCATION[-1]}...")


required_fields = [
"surgery_type"
]
Expand All @@ -677,6 +679,10 @@ def validate_surgery(self, map_json):
warn("surgery_site required if submitter_specimen_id not submitted")
if "surgery_location" not in map_json:
warn("surgery_location required if submitter_specimen_id not submitted")
else:
if map_json["submitter_specimen_id"] not in specimen_ids:
warn(f"submitter_specimen_id {map_json['submitter_specimen_id']} does not correspond to one of the available specimen_ids {specimen_ids}")

STACK_LOCATION.pop()


Expand Down
1 change: 1 addition & 0 deletions test_data/raw_data/Surgery.csv
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
submitter_donor_id,submitter_specimen_id,submitter_treatment_id,surgery_type,surgery_site,surgery_location,tumour_length,tumour_width,greatest_dimension_tumour,tumour_focality,residual_tumour_classification,margin_types_involved,margin_types_not_involved,lymphovascular_invasion,margin_types_not_assessed,perineural_invasion
DONOR_2,SPECIMEN_4,TR_7,Axillary lymph nodes sampling,C14,Primary,9,7,5,Unifocal,R2,Distal margin|Circumferential resection margin,,Absent,Unknown,Absent
DONOR_6,SPECIMEN_43,TR_9,Axillary lymph nodes sampling,C14,Primary,9,7,5,Unifocal,R2,Distal margin|Circumferential resection margin,,Absent,Unknown,Absent
1 change: 1 addition & 0 deletions test_data/raw_data/Treatment.csv
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,4 @@ TR_5,DONOR_5,PD_5,Radiation therapy,Yes,,2021-09,2022-09,Induction,Preventive,7,
TR_6,DONOR_6,PD_6,Stem cell transplant,No,,2021-07,2022-07,Locally advanced,Diagnostic,5,2,Physician Assessed Response Criteria,Immune partial response (iPR),Treatment stopped due to lack of efficacy (disease progression)
TR_7,DONOR_2,PD_2_1,Surgery,Yes,,2021-02,2022-02,Maintenance,Diagnostic,3,1,Blazer score,Progressive disease,Treatment stopped due to acute toxicity
TR_8,DONOR_2,PD_2_1,Immunotherapy,No,,2021-03,2022-03,Locally advanced,Forensic,2,2,AML Response Criteria,Immune confirmed progressive disease (iCPD),Other
TR_9,DONOR_6,PD_6,Surgery,No,,2021-02,2022-02,Maintenance,Diagnostic,,,Blazer score,Progressive disease,
19 changes: 11 additions & 8 deletions test_data/test2moh.csv
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@
DONOR.INDEX, {indexed_on(Donor.submitter_donor_id)}
DONOR.INDEX.submitter_donor_id, {single_val(Donor.submitter_donor_id)}
DONOR.INDEX.program_id, {single_val(Donor.program_id)}
DONOR.INDEX.is_deceased, {boolean(Donor.is_deceased)}
DONOR.INDEX.cause_of_death, {single_val(Donor.cause_of_death)}
DONOR.INDEX.date_of_birth, {single_date(Donor.date_of_birth)}
DONOR.INDEX.date_of_death, {single_date(Donor.date_of_death)}
DONOR.INDEX.gender, {single_val(Donor.gender)}
DONOR.INDEX.sex_at_birth, {single_val(Donor.sex_at_birth)}
Expand Down Expand Up @@ -74,8 +77,8 @@ DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.chemotherapies.INDEX.prescr
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.chemotherapies.INDEX.actual_cumulative_drug_dose, {integer(Chemotherapy.actual_cumulative_drug_dose)}
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.hormone_therapies.INDEX, {indexed_on(HormoneTherapy.submitter_treatment_id)}
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.hormone_therapies.INDEX.submitter_treatment_id, {single_val(HormoneTherapy.submitter_treatment_id)}
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.hormone_therapies.INDEX.hormone_drug_dose_units, {integer(HormoneTherapy.hormone_drug_dose_units)}
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.hormone_therapies.INDEX.drug_reference_database, {integer(HormoneTherapy.drug_reference_database)}
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.hormone_therapies.INDEX.hormone_drug_dose_units, {single_val(HormoneTherapy.hormone_drug_dose_units)}
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.hormone_therapies.INDEX.drug_reference_database, {single_val(HormoneTherapy.drug_reference_database)}
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.hormone_therapies.INDEX.drug_name, {single_val(HormoneTherapy.drug_name)}
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.hormone_therapies.INDEX.drug_reference_identifier, {single_val(HormoneTherapy.drug_reference_identifier)}
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.hormone_therapies.INDEX.prescribed_cumulative_drug_dose, {integer(HormoneTherapy.prescribed_cumulative_drug_dose)}
Expand All @@ -94,9 +97,9 @@ DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.radiation.INDEX.submitter_t
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.radiation.INDEX.radiation_therapy_modality, {single_val(Radiation.radiation_therapy_modality)}
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.radiation.INDEX.radiation_therapy_type, {single_val(Radiation.radiation_therapy_type)}
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.radiation.INDEX.anatomical_site_irradiated, {single_val(Radiation.anatomical_site_irradiated)}
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.radiation.INDEX.radiation_therapy_fractions, {single_val(Radiation.radiation_therapy_fractions)}
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.radiation.INDEX.radiation_therapy_dosage, {single_val(Radiation.radiation_therapy_dosage)}
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.radiation.INDEX.radiation_boost, {single_val(Radiation.radiation_boost)}
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.radiation.INDEX.radiation_therapy_fractions, {integer(Radiation.radiation_therapy_fractions)}
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.radiation.INDEX.radiation_therapy_dosage, {integer(Radiation.radiation_therapy_dosage)}
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.radiation.INDEX.radiation_boost, {boolean(Radiation.radiation_boost)}
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.radiation.INDEX.reference_radiation_treatment_id, {single_val(Radiation.reference_radiation_treatment_id)}
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.surgery.INDEX, {indexed_on(Surgery.submitter_treatment_id)}
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.surgery.INDEX.submitter_treatment_id, {single_val(Surgery.submitter_treatment_id)}
Expand All @@ -110,9 +113,9 @@ DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.surgery.INDEX.margin_types_
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.surgery.INDEX.margin_types_not_assessed, {pipe_delim(Surgery.margin_types_not_assessed)}
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.surgery.INDEX.lymphovascular_invasion, {single_val(Surgery.lymphovascular_invasion)}
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.surgery.INDEX.perineural_invasion, {single_val(Surgery.perineural_invasion)}
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.surgery.INDEX.tumour_length, {single_val(Surgery.tumour_length)}
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.surgery.INDEX.tumour_width, {single_val(Surgery.tumour_width)}
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.surgery.INDEX.greatest_dimension_tumour, {single_val(Surgery.greatest_dimension_tumour)}
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.surgery.INDEX.tumour_length, {integer(Surgery.tumour_length)}
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.surgery.INDEX.tumour_width, {integer(Surgery.tumour_width)}
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.surgery.INDEX.greatest_dimension_tumour, {integer(Surgery.greatest_dimension_tumour)}
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.surgery.INDEX.submitter_specimen_id, {single_val(Surgery.submitter_specimen_id)}
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.followups.INDEX, {indexed_on(Followup.submitter_treatment_id)}
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.followups.INDEX.submitter_treatment_id, {single_val(Followup.submitter_treatment_id)}
Expand Down
Loading