Skip to content

Commit

Permalink
Merge pull request #26 from CanDIG/daisieh/mapping-fixes
Browse files Browse the repository at this point in the history
DIG-1247: Check that submitter_specimen_id refers to a valid specimen
  • Loading branch information
daisieh authored Aug 17, 2023
2 parents 098f754 + 2552629 commit 52a8232
Show file tree
Hide file tree
Showing 7 changed files with 105 additions and 58 deletions.
4 changes: 2 additions & 2 deletions mappings.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,9 +121,9 @@ def flat_list_val(data_values):
# Convert various responses to boolean
def boolean(data_values):
cell = single_val(data_values)
if cell is None or cell.lower() == "nan":
if cell is None or cell.lower().strip() == "nan":
return None
if cell.lower() == "no" or cell.lower == "false":
if cell.lower().strip() == "no" or cell.lower().strip() == "false":
return False
return True

Expand Down
16 changes: 8 additions & 8 deletions moh_template.csv
Original file line number Diff line number Diff line change
Expand Up @@ -58,9 +58,9 @@ DONOR.INDEX.primary_diagnoses.INDEX.specimens.INDEX.surgery.margin_types_not_inv
DONOR.INDEX.primary_diagnoses.INDEX.specimens.INDEX.surgery.margin_types_not_assessed.INDEX, {indexed_on(SPECIMENS_SHEET.margin_types_not_assessed)}
DONOR.INDEX.primary_diagnoses.INDEX.specimens.INDEX.surgery.lymphovascular_invasion, {single_val(SPECIMENS_SHEET.lymphovascular_invasion)}
DONOR.INDEX.primary_diagnoses.INDEX.specimens.INDEX.surgery.perineural_invasion, {single_val(SPECIMENS_SHEET.perineural_invasion)}
DONOR.INDEX.primary_diagnoses.INDEX.specimens.INDEX.surgery.tumour_length, {single_val(SPECIMENS_SHEET.tumour_length)}
DONOR.INDEX.primary_diagnoses.INDEX.specimens.INDEX.surgery.tumour_width, {single_val(SPECIMENS_SHEET.tumour_width)}
DONOR.INDEX.primary_diagnoses.INDEX.specimens.INDEX.surgery.greatest_dimension_tumour, {single_val(SPECIMENS_SHEET.greatest_dimension_tumour)}
DONOR.INDEX.primary_diagnoses.INDEX.specimens.INDEX.surgery.tumour_length, {integer(SPECIMENS_SHEET.tumour_length)}
DONOR.INDEX.primary_diagnoses.INDEX.specimens.INDEX.surgery.tumour_width, {integer(SPECIMENS_SHEET.tumour_width)}
DONOR.INDEX.primary_diagnoses.INDEX.specimens.INDEX.surgery.greatest_dimension_tumour, {integer(SPECIMENS_SHEET.greatest_dimension_tumour)}
DONOR.INDEX.primary_diagnoses.INDEX.specimens.INDEX.surgery.submitter_specimen_id, {single_val(SPECIMENS_SHEET.submitter_specimen_id)}
DONOR.INDEX.primary_diagnoses.INDEX.specimens.INDEX.sample_registrations.INDEX, {indexed_on(SAMPLE_REGISTRATIONS_SHEET.submitter_specimen_id)}
DONOR.INDEX.primary_diagnoses.INDEX.specimens.INDEX.sample_registrations.INDEX.submitter_sample_id, {single_val(SAMPLE_REGISTRATIONS_SHEET.submitter_sample_id)}
Expand Down Expand Up @@ -90,8 +90,8 @@ DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.chemotherapies.INDEX.drug_r
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.chemotherapies.INDEX.prescribed_cumulative_drug_dose, {integer(CHEMOTHERAPIES_SHEET.prescribed_cumulative_drug_dose)}
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.chemotherapies.INDEX.actual_cumulative_drug_dose, {integer(CHEMOTHERAPIES_SHEET.actual_cumulative_drug_dose)}
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.hormone_therapies.INDEX, {indexed_on(HORMONE_THERAPIES_SHEET.submitter_treatment_id)}
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.hormone_therapies.INDEX.hormone_drug_dose_units, {integer(HORMONE_THERAPIES_SHEET.hormone_drug_dose_units)}
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.hormone_therapies.INDEX.drug_reference_database, {integer(HORMONE_THERAPIES_SHEET.drug_reference_database)}
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.hormone_therapies.INDEX.hormone_drug_dose_units, {single_val(HORMONE_THERAPIES_SHEET.hormone_drug_dose_units)}
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.hormone_therapies.INDEX.drug_reference_database, {single_val(HORMONE_THERAPIES_SHEET.drug_reference_database)}
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.hormone_therapies.INDEX.drug_name, {single_val(HORMONE_THERAPIES_SHEET.drug_name)}
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.hormone_therapies.INDEX.drug_reference_identifier, {single_val(HORMONE_THERAPIES_SHEET.drug_reference_identifier)}
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.hormone_therapies.INDEX.prescribed_cumulative_drug_dose, {integer(HORMONE_THERAPIES_SHEET.prescribed_cumulative_drug_dose)}
Expand All @@ -107,9 +107,9 @@ DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.immunotherapies.INDEX.actua
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.radiation.radiation_therapy_modality, {single_val(TREATMENTS_SHEET.radiation_therapy_modality)}
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.radiation.radiation_therapy_type, {single_val(TREATMENTS_SHEET.radiation_therapy_type)}
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.radiation.anatomical_site_irradiated, {single_val(TREATMENTS_SHEET.anatomical_site_irradiated)}
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.radiation.radiation_therapy_fractions, {single_val(TREATMENTS_SHEET.radiation_therapy_fractions)}
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.radiation.radiation_therapy_dosage, {single_val(TREATMENTS_SHEET.radiation_therapy_dosage)}
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.radiation.radiation_boost, {single_val(TREATMENTS_SHEET.radiation_boost)}
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.radiation.radiation_therapy_fractions, {integer(TREATMENTS_SHEET.radiation_therapy_fractions)}
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.radiation.radiation_therapy_dosage, {integer(TREATMENTS_SHEET.radiation_therapy_dosage)}
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.radiation.radiation_boost, {boolean(TREATMENTS_SHEET.radiation_boost)}
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.radiation.reference_radiation_treatment_id, {single_val(TREATMENTS_SHEET.reference_radiation_treatment_id)}
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.surgery.surgery_type, {single_val(TREATMENTS_SHEET.surgery_type)}
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.surgery.surgery_site, {single_val(TREATMENTS_SHEET.surgery_site)}
Expand Down
80 changes: 43 additions & 37 deletions mohschema.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def __str__(self):
def warn(message):
message = ">".join(STACK_LOCATION) + ": " + message
VALIDATION_MESSAGES.append(f"{message}")
raise MoHValidationError(message)
#raise MoHValidationError(message)


def fail(message):
Expand Down Expand Up @@ -359,13 +359,15 @@ def validate_primary_diagnosis(self, map_json):
if f not in map_json:
fail(f"{f} required for primary_diagnosis")

specimen_ids = []
is_tumour = False
# should either have a clinical staging system specified
# OR have a specimen with a pathological staging system specified
if "clinical_tumour_staging_system" in map_json:
is_tumour = True
elif "specimens" in map_json:
if "specimens" in map_json:
for specimen in map_json["specimens"]:
specimen_ids.append(specimen["submitter_specimen_id"])
if "pathological_tumour_staging_system" in specimen:
is_tumour = True

Expand All @@ -384,7 +386,7 @@ def validate_primary_diagnosis(self, map_json):
self.validate_specimen(specimen, "clinical_tumour_staging_system" in map_json)
case "treatments":
for treatment in map_json["treatments"]:
self.validate_treatment(treatment)
self.validate_treatment(treatment, specimen_ids)
case "biomarkers":
for biomarker in map_json["biomarkers"]:
self.validate_biomarker(biomarker, "submitter_primary_diagnosis_id", map_json["submitter_primary_diagnosis_id"])
Expand Down Expand Up @@ -502,7 +504,7 @@ def validate_followup(self, map_json):
STACK_LOCATION.pop()


def validate_treatment(self, map_json):
def validate_treatment(self, map_json, specimen_ids):
STACK_LOCATION.append(map_json['submitter_treatment_id'])
print(f"Validating schema for treatment {STACK_LOCATION[-1]}...")

Expand All @@ -524,37 +526,38 @@ def validate_treatment(self, map_json):
for prop in map_json:
match prop:
case "treatment_type":
match map_json["treatment_type"]:
case "Chemotherapy":
if "chemotherapies" not in map_json:
warn("treatment type Chemotherapy should have one or more chemotherapies submitted")
else:
for x in map_json["chemotherapies"]:
self.validate_chemotherapy(x)
case "Hormonal therapy":
if "hormone_therapies" not in map_json:
warn("treatment type Hormonal therapy should have one or more hormone_therapies submitted")
else:
for x in map_json["hormone_therapies"]:
self.validate_hormone_therapy(x)
case "Immunotherapy":
if "immunotherapies" not in map_json:
warn("treatment type Immunotherapy should have one or more immunotherapies submitted")
else:
for x in map_json["immunotherapies"]:
self.validate_immunotherapy(x)
case "Radiation therapy":
if "radiation" not in map_json:
warn("treatment type Radiation therapy should have one or more radiation submitted")
else:
for x in map_json["radiation"]:
self.validate_radiation(x)
case "Surgery":
if "surgery" not in map_json:
warn("treatment type Surgery should have one or more surgery submitted")
else:
for x in map_json["surgery"]:
self.validate_surgery(x)
for type in map_json["treatment_type"]:
match type:
case "Chemotherapy":
if "chemotherapies" not in map_json:
warn("treatment type Chemotherapy should have one or more chemotherapies submitted")
else:
for x in map_json["chemotherapies"]:
self.validate_chemotherapy(x)
case "Hormonal therapy":
if "hormone_therapies" not in map_json:
warn("treatment type Hormonal therapy should have one or more hormone_therapies submitted")
else:
for x in map_json["hormone_therapies"]:
self.validate_hormone_therapy(x)
case "Immunotherapy":
if "immunotherapies" not in map_json:
warn("treatment type Immunotherapy should have one or more immunotherapies submitted")
else:
for x in map_json["immunotherapies"]:
self.validate_immunotherapy(x)
case "Radiation therapy":
if "radiation" not in map_json:
warn("treatment type Radiation therapy should have one or more radiation submitted")
else:
for x in map_json["radiation"]:
self.validate_radiation(x)
case "Surgery":
if "surgery" not in map_json:
warn("treatment type Surgery should have one or more surgery submitted")
else:
for x in map_json["surgery"]:
self.validate_surgery(x, specimen_ids)
case "followups":
for followup in map_json["followups"]:
self.validate_followup(followup)
Expand Down Expand Up @@ -660,11 +663,10 @@ def validate_radiation(self, map_json):
STACK_LOCATION.pop()


def validate_surgery(self, map_json):
def validate_surgery(self, map_json, specimen_ids):
STACK_LOCATION.append(f"surgery {STACK_LOCATION[-1]}")
print(f"Validating schema for {STACK_LOCATION[-1]}...")


required_fields = [
"surgery_type"
]
Expand All @@ -677,6 +679,10 @@ def validate_surgery(self, map_json):
warn("surgery_site required if submitter_specimen_id not submitted")
if "surgery_location" not in map_json:
warn("surgery_location required if submitter_specimen_id not submitted")
else:
if map_json["submitter_specimen_id"] not in specimen_ids:
warn(f"submitter_specimen_id {map_json['submitter_specimen_id']} does not correspond to one of the available specimen_ids {specimen_ids}")

STACK_LOCATION.pop()


Expand Down
1 change: 1 addition & 0 deletions test_data/raw_data/Surgery.csv
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
submitter_donor_id,submitter_specimen_id,submitter_treatment_id,surgery_type,surgery_site,surgery_location,tumour_length,tumour_width,greatest_dimension_tumour,tumour_focality,residual_tumour_classification,margin_types_involved,margin_types_not_involved,lymphovascular_invasion,margin_types_not_assessed,perineural_invasion
DONOR_2,SPECIMEN_4,TR_7,Axillary lymph nodes sampling,C14,Primary,9,7,5,Unifocal,R2,Distal margin|Circumferential resection margin,,Absent,Unknown,Absent
DONOR_6,SPECIMEN_43,TR_9,Axillary lymph nodes sampling,C14,Primary,9,7,5,Unifocal,R2,Distal margin|Circumferential resection margin,,Absent,Unknown,Absent
1 change: 1 addition & 0 deletions test_data/raw_data/Treatment.csv
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,4 @@ TR_5,DONOR_5,PD_5,Radiation therapy,Yes,,2021-09,2022-09,Induction,Preventive,7,
TR_6,DONOR_6,PD_6,Stem cell transplant,No,,2021-07,2022-07,Locally advanced,Diagnostic,5,2,Physician Assessed Response Criteria,Immune partial response (iPR),Treatment stopped due to lack of efficacy (disease progression)
TR_7,DONOR_2,PD_2_1,Surgery,Yes,,2021-02,2022-02,Maintenance,Diagnostic,3,1,Blazer score,Progressive disease,Treatment stopped due to acute toxicity
TR_8,DONOR_2,PD_2_1,Immunotherapy,No,,2021-03,2022-03,Locally advanced,Forensic,2,2,AML Response Criteria,Immune confirmed progressive disease (iCPD),Other
TR_9,DONOR_6,PD_6,Surgery,No,,2021-02,2022-02,Maintenance,Diagnostic,,,Blazer score,Progressive disease,
19 changes: 11 additions & 8 deletions test_data/test2moh.csv
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@
DONOR.INDEX, {indexed_on(Donor.submitter_donor_id)}
DONOR.INDEX.submitter_donor_id, {single_val(Donor.submitter_donor_id)}
DONOR.INDEX.program_id, {single_val(Donor.program_id)}
DONOR.INDEX.is_deceased, {boolean(Donor.is_deceased)}
DONOR.INDEX.cause_of_death, {single_val(Donor.cause_of_death)}
DONOR.INDEX.date_of_birth, {single_date(Donor.date_of_birth)}
DONOR.INDEX.date_of_death, {single_date(Donor.date_of_death)}
DONOR.INDEX.gender, {single_val(Donor.gender)}
DONOR.INDEX.sex_at_birth, {single_val(Donor.sex_at_birth)}
Expand Down Expand Up @@ -74,8 +77,8 @@ DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.chemotherapies.INDEX.prescr
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.chemotherapies.INDEX.actual_cumulative_drug_dose, {integer(Chemotherapy.actual_cumulative_drug_dose)}
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.hormone_therapies.INDEX, {indexed_on(HormoneTherapy.submitter_treatment_id)}
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.hormone_therapies.INDEX.submitter_treatment_id, {single_val(HormoneTherapy.submitter_treatment_id)}
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.hormone_therapies.INDEX.hormone_drug_dose_units, {integer(HormoneTherapy.hormone_drug_dose_units)}
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.hormone_therapies.INDEX.drug_reference_database, {integer(HormoneTherapy.drug_reference_database)}
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.hormone_therapies.INDEX.hormone_drug_dose_units, {single_val(HormoneTherapy.hormone_drug_dose_units)}
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.hormone_therapies.INDEX.drug_reference_database, {single_val(HormoneTherapy.drug_reference_database)}
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.hormone_therapies.INDEX.drug_name, {single_val(HormoneTherapy.drug_name)}
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.hormone_therapies.INDEX.drug_reference_identifier, {single_val(HormoneTherapy.drug_reference_identifier)}
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.hormone_therapies.INDEX.prescribed_cumulative_drug_dose, {integer(HormoneTherapy.prescribed_cumulative_drug_dose)}
Expand All @@ -94,9 +97,9 @@ DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.radiation.INDEX.submitter_t
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.radiation.INDEX.radiation_therapy_modality, {single_val(Radiation.radiation_therapy_modality)}
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.radiation.INDEX.radiation_therapy_type, {single_val(Radiation.radiation_therapy_type)}
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.radiation.INDEX.anatomical_site_irradiated, {single_val(Radiation.anatomical_site_irradiated)}
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.radiation.INDEX.radiation_therapy_fractions, {single_val(Radiation.radiation_therapy_fractions)}
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.radiation.INDEX.radiation_therapy_dosage, {single_val(Radiation.radiation_therapy_dosage)}
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.radiation.INDEX.radiation_boost, {single_val(Radiation.radiation_boost)}
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.radiation.INDEX.radiation_therapy_fractions, {integer(Radiation.radiation_therapy_fractions)}
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.radiation.INDEX.radiation_therapy_dosage, {integer(Radiation.radiation_therapy_dosage)}
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.radiation.INDEX.radiation_boost, {boolean(Radiation.radiation_boost)}
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.radiation.INDEX.reference_radiation_treatment_id, {single_val(Radiation.reference_radiation_treatment_id)}
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.surgery.INDEX, {indexed_on(Surgery.submitter_treatment_id)}
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.surgery.INDEX.submitter_treatment_id, {single_val(Surgery.submitter_treatment_id)}
Expand All @@ -110,9 +113,9 @@ DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.surgery.INDEX.margin_types_
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.surgery.INDEX.margin_types_not_assessed, {pipe_delim(Surgery.margin_types_not_assessed)}
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.surgery.INDEX.lymphovascular_invasion, {single_val(Surgery.lymphovascular_invasion)}
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.surgery.INDEX.perineural_invasion, {single_val(Surgery.perineural_invasion)}
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.surgery.INDEX.tumour_length, {single_val(Surgery.tumour_length)}
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.surgery.INDEX.tumour_width, {single_val(Surgery.tumour_width)}
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.surgery.INDEX.greatest_dimension_tumour, {single_val(Surgery.greatest_dimension_tumour)}
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.surgery.INDEX.tumour_length, {integer(Surgery.tumour_length)}
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.surgery.INDEX.tumour_width, {integer(Surgery.tumour_width)}
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.surgery.INDEX.greatest_dimension_tumour, {integer(Surgery.greatest_dimension_tumour)}
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.surgery.INDEX.submitter_specimen_id, {single_val(Surgery.submitter_specimen_id)}
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.followups.INDEX, {indexed_on(Followup.submitter_treatment_id)}
DONOR.INDEX.primary_diagnoses.INDEX.treatments.INDEX.followups.INDEX.submitter_treatment_id, {single_val(Followup.submitter_treatment_id)}
Expand Down
Loading

0 comments on commit 52a8232

Please sign in to comment.