From e2a339b5d324a8f5c2080b5f71472674fd647cf1 Mon Sep 17 00:00:00 2001 From: Peter van Heusden Date: Tue, 12 Oct 2021 22:17:59 +0200 Subject: [PATCH 1/5] Add parser for Mykrobe (https://github.com/Mykrobe-tools/mykrobe) --- hAMRonization/MykrobeIO.py | 118 ++++++++++++++++++++++++++++++ hAMRonization/__init__.py | 8 +- hAMRonization/hAMRonizedResult.py | 6 +- 3 files changed, 129 insertions(+), 3 deletions(-) create mode 100644 hAMRonization/MykrobeIO.py diff --git a/hAMRonization/MykrobeIO.py b/hAMRonization/MykrobeIO.py new file mode 100644 index 0000000..dc1bb9b --- /dev/null +++ b/hAMRonization/MykrobeIO.py @@ -0,0 +1,118 @@ +#!/usr/bin/env python + +import json +import re +from .Interfaces import hAMRonizedResultIterator + +required_metadata = [] + + +class MykrobeIterator(hAMRonizedResultIterator): + reference_genomes = { + 'sonnei': 'NC_016822.1', # Shigella sonnei + 'staph': 'BX571856.1', # Staphylococcus aureus + 'tb': 'NC_000962.3', # Mycobacterium tuberculosis, + 'typhi': 'AL513382.1' # Salmonella typhi + } + + aa_symbols = { + 'A': 'Ala', + 'C': 'Cys', + 'D': 'Asp', + 'E': 'Glu', + 'F': 'Phe', + 'G': 'Gly', + 'H': 'His', + 'I': 'Ile', + 'K': 'Lys', + 'L': 'Leu', + 'M': 'Met', + 'N': 'Asn', + 'P': 'Pro', + 'Q': 'Gln', + 'R': 'Arg', + 'S': 'Ser', + 'T': 'Thr', + 'V': 'Val', + 'W': 'Trp', + 'Y': 'Tyr' + } + + def __init__(self, source, metadata): + metadata['analysis_software_name'] = 'Mykrobe' + self.metadata = metadata + + self.field_mapping = { + 'filename': 'input_file_name', + 'gene_symbol': 'gene_symbol', + 'gene_name': 'gene_name', + 'drug': 'drug_class', + 'type': 'genetic_variation_type', + 'frequency': 'variant_frequency', + 'db_name': 'reference_database_id', + 'db_version': 'reference_database_version', + 'software_name': 'analysis_software_name', + 'mykrobe_version': 'analysis_software_version', + 'reference_accession': 'reference_accession', + 'nucleotide_mutation': 'nucleotide_mutation', + 'protein_mutation': 'protein_mutation', + 'nucleotide_mutation_interpretation': 'nucleotide_mutation_interpretation', + 'protein_mutation_interpretation': 'protein_mutation_interpretation' + } + + super().__init__(source, self.field_mapping, self.metadata) + + + def parse(self, handle): + variant_info_re = re.compile(r'(?P[^_]+)_(?P(?P[A-Z])(?P\d+)(?P[A-Z]))-(?P(?P[ACTG]{1,3})(?P\d+)(?P[ACTG]{1,3}))') + panel_name_re = re.compile(r'.*mykrobe/data/(?P.*)/') + data = json.load(handle) + + sample_names = list(data.keys()) + assert len(sample_names) == 1, "can only parse output with a single sample currently, found {}".format(len(sample_names)) + sample_name = sample_names[0] + panel_name_match = panel_name_re.match(data[sample_name]['probe_sets'][0]) + assert panel_name_match is not None, "can't match panel name from {}".format(data[sample_name]['probe_sets'][0]) + panel_name = panel_name_match.group('panel') + if panel_name not in self.reference_genomes: + raise ValueError('Unknown panel {}'.format(panel_name)) + reference_accession = self.reference_genomes[panel_name] + mykrobe_version = data[sample_name]['version']['mykrobe-predictor'] + mykrobe_atlas_version = data[sample_name]['version']['mykrobe-atlas'] + db_name = ';'.join([ re.sub(r'.*mykrobe/data/(.*)', r'\1', probe_set) for probe_set in data[sample_name]['probe_sets'] ]) + + for drug_name in data[sample_name]['susceptibility']: + drug = data[sample_name]['susceptibility'][drug_name] + if drug['predict'] == 'S': + continue + for variant in drug['called_by']: + #katG_S315T-GCT2155167GGT + variant_match = variant_info_re.match(variant) + assert variant_match is not None, "variant_info_re failed to match {}".format(variant) + gene_symbol = variant_match.group('gene_symbol') + frequency = drug['called_by'][variant]['info']['coverage']['alternate']['percent_coverage'] / 100 + if len(variant_match.group('codon_from')) == 1: + # this not a protein change + variant_type = 'nucleotide_variant' # TODO: should we have 'rrna_change' ?? + protein_mutation = None, + else: + variant_type = 'protein_variant' + protein_mutation = 'p.' + self.aa_symbols[variant_match.group('aa_from')] + variant_match.group('aa_pos') + self.aa_symbols[variant_match.group('aa_to')] + result = { + 'filename': handle.name, + 'gene_symbol': gene_symbol, + 'gene_name': gene_symbol, + 'drug': drug_name, + 'type': variant_type, + 'frequency': frequency, + 'software_name': 'mykrobe', + 'mykrobe_version': mykrobe_version, + 'db_name': db_name, + 'db_version': mykrobe_atlas_version, + 'reference_accession': reference_accession, + 'nucleotide_mutation': variant_match.group('codon_change'), # TODO: make this work using lookup table of gene positions + 'protein_mutation': protein_mutation, + 'nucleotide_mutation_interpretation': None, + 'protein_mutation_interpretation': None + } + yield self.hAMRonize(result, self.metadata) \ No newline at end of file diff --git a/hAMRonization/__init__.py b/hAMRonization/__init__.py index f8a3fc6..fb85019 100644 --- a/hAMRonization/__init__.py +++ b/hAMRonization/__init__.py @@ -18,6 +18,7 @@ from hAMRonization import AmrPlusPlusIO from hAMRonization import ResFamsIO from hAMRonization import TBProfilerIO +from hAMRonization import MykrobeIO _FormatToIterator = { "abricate": AbricateIO.AbricateIterator, @@ -35,7 +36,8 @@ "csstar": CSStarIO.CSStarIterator, "amrplusplus": AmrPlusPlusIO.AmrPlusPlusIterator, "resfams": ResFamsIO.ResFamsIterator, - "tbprofiler": TBProfilerIO.TBProfilerIterator + "tbprofiler": TBProfilerIO.TBProfilerIterator, + "mykrobe": MykrobeIO.MykrobeIterator, } _ReportFileToUse = { @@ -54,7 +56,8 @@ "csstar": "OUTPUT.tsv", "amrplusplus": "gene.tsv", "resfams": "resfams.tblout", - "tbprofiler": "OUTPUT.results.json" + "tbprofiler": "OUTPUT.results.json", + "mykrobe": "OUTPUT.json" } @@ -75,6 +78,7 @@ "resfams": ResFamsIO.required_metadata, "groot": GrootIO.required_metadata, "tbprofiler": TBProfilerIO.required_metadata, + "mykrobe": MykrobeIO.required_metadata } diff --git a/hAMRonization/hAMRonizedResult.py b/hAMRonization/hAMRonizedResult.py index 7d2005c..47199cd 100644 --- a/hAMRonization/hAMRonizedResult.py +++ b/hAMRonization/hAMRonizedResult.py @@ -23,7 +23,11 @@ class hAMRonizedResult(): # variant specific optional fields variant_frequency: float = None genetic_variation_type: str = None - + nucleotide_mutation: str = None + nucleotide_mutation_interpretation: str = None + protein_mutation: str = None + protein_mutation_interpretation: str = None + # optional fields sequence_identity: float = None input_sequence_id: str = None From 6860bdab5da5709c2a625de6e5636e11781ff98f Mon Sep 17 00:00:00 2001 From: Peter van Heusden Date: Fri, 15 Oct 2021 07:25:35 +0200 Subject: [PATCH 2/5] Add test, change output to use coverage/depth --- hAMRonization/MykrobeIO.py | 22 ++++++++++--------- test/test_sanity.py | 44 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+), 10 deletions(-) diff --git a/hAMRonization/MykrobeIO.py b/hAMRonization/MykrobeIO.py index dc1bb9b..f1ffc03 100644 --- a/hAMRonization/MykrobeIO.py +++ b/hAMRonization/MykrobeIO.py @@ -43,12 +43,11 @@ def __init__(self, source, metadata): self.metadata = metadata self.field_mapping = { - 'filename': 'input_file_name', + 'filename': 'input_file_name', 'gene_symbol': 'gene_symbol', 'gene_name': 'gene_name', 'drug': 'drug_class', 'type': 'genetic_variation_type', - 'frequency': 'variant_frequency', 'db_name': 'reference_database_id', 'db_version': 'reference_database_version', 'software_name': 'analysis_software_name', @@ -57,12 +56,13 @@ def __init__(self, source, metadata): 'nucleotide_mutation': 'nucleotide_mutation', 'protein_mutation': 'protein_mutation', 'nucleotide_mutation_interpretation': 'nucleotide_mutation_interpretation', - 'protein_mutation_interpretation': 'protein_mutation_interpretation' + 'protein_mutation_interpretation': 'protein_mutation_interpretation', + 'coverage_percentage': 'coverage_percentage', + 'median_coverage_depth': 'coverage_depth' } super().__init__(source, self.field_mapping, self.metadata) - def parse(self, handle): variant_info_re = re.compile(r'(?P[^_]+)_(?P(?P[A-Z])(?P\d+)(?P[A-Z]))-(?P(?P[ACTG]{1,3})(?P\d+)(?P[ACTG]{1,3}))') panel_name_re = re.compile(r'.*mykrobe/data/(?P.*)/') @@ -79,18 +79,19 @@ def parse(self, handle): reference_accession = self.reference_genomes[panel_name] mykrobe_version = data[sample_name]['version']['mykrobe-predictor'] mykrobe_atlas_version = data[sample_name]['version']['mykrobe-atlas'] - db_name = ';'.join([ re.sub(r'.*mykrobe/data/(.*)', r'\1', probe_set) for probe_set in data[sample_name]['probe_sets'] ]) + db_name = ';'.join([re.sub(r'.*mykrobe/data/(.*)', r'\1', probe_set) for probe_set in data[sample_name]['probe_sets']]) for drug_name in data[sample_name]['susceptibility']: drug = data[sample_name]['susceptibility'][drug_name] if drug['predict'] == 'S': continue for variant in drug['called_by']: - #katG_S315T-GCT2155167GGT variant_match = variant_info_re.match(variant) assert variant_match is not None, "variant_info_re failed to match {}".format(variant) gene_symbol = variant_match.group('gene_symbol') - frequency = drug['called_by'][variant]['info']['coverage']['alternate']['percent_coverage'] / 100 + coverage_percentage = drug['called_by'][variant]['info']['coverage']['alternate']['percent_coverage'] + median_coverage_depth = drug['called_by'][variant]['info']['coverage']['alternate']['median_depth'] + if len(variant_match.group('codon_from')) == 1: # this not a protein change variant_type = 'nucleotide_variant' # TODO: should we have 'rrna_change' ?? @@ -104,7 +105,6 @@ def parse(self, handle): 'gene_name': gene_symbol, 'drug': drug_name, 'type': variant_type, - 'frequency': frequency, 'software_name': 'mykrobe', 'mykrobe_version': mykrobe_version, 'db_name': db_name, @@ -113,6 +113,8 @@ def parse(self, handle): 'nucleotide_mutation': variant_match.group('codon_change'), # TODO: make this work using lookup table of gene positions 'protein_mutation': protein_mutation, 'nucleotide_mutation_interpretation': None, - 'protein_mutation_interpretation': None + 'protein_mutation_interpretation': None, + 'coverage_percentage': coverage_percentage, + 'median_coverage_depth': median_coverage_depth } - yield self.hAMRonize(result, self.metadata) \ No newline at end of file + yield self.hAMRonize(result, self.metadata) diff --git a/test/test_sanity.py b/test/test_sanity.py index 8ed121a..7f77342 100644 --- a/test/test_sanity.py +++ b/test/test_sanity.py @@ -609,3 +609,47 @@ def test_tbprofiler(): assert result.reference_protein_stop is None assert result.reference_gene_start is None assert result.reference_gene_stop is None + +def test_mykrobe(): + metadata = {} + parsed_report = hAMRonization.parse("dummy/mykrobe/mykrobe.json", metadata, "mykrobe") + + for result in parsed_report: + # assert mandatory fields + assert result.input_file_name == 'mykrobe.json' + assert result.gene_symbol == 'rpoB' + assert result.gene_name == 'rpoB' + assert result.reference_database_id == 'tb/tb-species-170421.fasta.gz;tb/tb-hunt-probe-set-jan-03-2019.fasta.gz;tb/tb.lineage.20200930.probes.fa.gz' + assert result.reference_database_version == 'v0.10.0' + assert result.reference_accession == 'NC_000962.3' + assert result.analysis_software_name == 'mykrobe' + assert result.analysis_software_version == 'v0.10.0' + assert result.genetic_variation_type == 'protein_variant' + + # optional fields - present in dummy dataset + assert result.drug_class == 'Rifampicin' # TODO: this is not following the spec as this is not an ARO term, what to do? + assert result.coverage_percentage == 100 + assert result.coverage_depth == 60 + assert result.protein_mutation == "p.Ser450Leu" + # assert result.frequency == "" # TODO: this is not working yet + # assert result.nucleotide_mutation == "" # TODO: this is not working yet + + # missing data in report + assert result.sequence_identity is None + assert result.reference_gene_length is None + assert result.input_gene_length is None + assert result.input_sequence_id is None + assert result.input_gene_start is None + assert result.input_gene_stop is None + assert result.strand_orientation is None + assert result.antimicrobial_agent is None + assert result.reference_protein_length is None + assert result.coverage_ratio is None + assert result.input_protein_length is None + assert result.resistance_mechanism is None + assert result.input_protein_start is None + assert result.input_protein_stop is None + assert result.reference_protein_start is None + assert result.reference_protein_stop is None + assert result.reference_gene_start is None + assert result.reference_gene_stop is None From 73f67c00eb9f8c1fa158364a3b17b4274f25f2a0 Mon Sep 17 00:00:00 2001 From: Peter van Heusden Date: Fri, 15 Oct 2021 07:27:07 +0200 Subject: [PATCH 3/5] Add test data for mykrobe --- test/dummy/mykrobe/mykrobe.json | 269 ++++++++++++++++++++++++++++++++ 1 file changed, 269 insertions(+) create mode 100644 test/dummy/mykrobe/mykrobe.json diff --git a/test/dummy/mykrobe/mykrobe.json b/test/dummy/mykrobe/mykrobe.json new file mode 100644 index 0000000..3b7e0cc --- /dev/null +++ b/test/dummy/mykrobe/mykrobe.json @@ -0,0 +1,269 @@ +{ + "SRR6916544": { + "susceptibility": { + "Rifampicin": { + "predict": "r", + "called_by": { + "rpoB_S450L-TCG761154TTG": { + "variant": null, + "genotype": [ + 0, + 1 + ], + "genotype_likelihoods": [ + -3594.140082249598, + -136.57739012799902, + -2127.6812715680494 + ], + "info": { + "coverage": { + "reference": { + "percent_coverage": 100.0, + "median_depth": 40, + "min_non_zero_depth": 32, + "kmer_count": 796, + "klen": 21 + }, + "alternate": { + "percent_coverage": 100.0, + "median_depth": 60, + "min_non_zero_depth": 58, + "kmer_count": 1150, + "klen": 20 + } + }, + "expected_depths": [ + 124 + ], + "contamination_depths": [], + "filter": [], + "conf": 3458 + }, + "_cls": "Call.VariantCall" + } + } + } + }, + "phylogenetics": { + "phylo_group": { + "Mycobacterium_tuberculosis_complex": { + "percent_coverage": 99.681, + "median_depth": 124 + } + }, + "sub_complex": { + "Unknown": { + "percent_coverage": -1, + "median_depth": -1 + } + }, + "species": { + "Mycobacterium_tuberculosis": { + "percent_coverage": 98.804, + "median_depth": 121 + } + }, + "lineage": { + "lineage": [ + "lineage2.2.10", + "lineage3" + ], + "calls_summary": { + "lineage2.2.10": { + "good_nodes": 3, + "tree_depth": 3, + "genotypes": { + "lineage2": 0.5, + "lineage2.2": 0.5, + "lineage2.2.10": 0.5 + } + }, + "lineage3": { + "good_nodes": 1, + "tree_depth": 1, + "genotypes": { + "lineage3": 0.5 + } + } + }, + "calls": { + "lineage2.2.10": { + "lineage2": { + "G497491A": { + "variant": "ref-G497491A?var_name=G497491A&num_alts=1&ref=NC_000962.3&enum=0&gene=NA&mut=G497491A", + "genotype": [ + 0, + 1 + ], + "genotype_likelihoods": [ + -4852.294042193726, + -68.00214340670391, + -4045.7527933854217 + ], + "info": { + "coverage": { + "reference": { + "percent_coverage": 100.0, + "median_depth": 76, + "min_non_zero_depth": 70, + "kmer_count": 1498, + "klen": 21 + }, + "alternate": { + "percent_coverage": 100.0, + "median_depth": 85, + "min_non_zero_depth": 82, + "kmer_count": 1695, + "klen": 21 + } + }, + "expected_depths": [ + 124 + ], + "contamination_depths": [], + "filter": [], + "conf": 4784 + }, + "_cls": "Call.VariantCall" + } + }, + "lineage2.2": { + "G2505085A": { + "variant": "ref-G2505085A?var_name=G2505085A&num_alts=2&ref=NC_000962.3&enum=0&gene=NA&mut=G2505085A", + "genotype": [ + 0, + 1 + ], + "genotype_likelihoods": [ + -3967.5854789566783, + -20.859193826341652, + -3054.5971617777313 + ], + "info": { + "coverage": { + "reference": { + "percent_coverage": 100.0, + "median_depth": 58, + "min_non_zero_depth": 56, + "kmer_count": 1160, + "klen": 21 + }, + "alternate": { + "percent_coverage": 100.0, + "median_depth": 69, + "min_non_zero_depth": 64, + "kmer_count": 1383, + "klen": 21 + } + }, + "expected_depths": [ + 124 + ], + "contamination_depths": [], + "filter": [], + "conf": 3947 + }, + "_cls": "Call.VariantCall" + } + }, + "lineage2.2.10": { + "G1364706A": { + "variant": "ref-G1364706A?var_name=G1364706A&num_alts=2&ref=NC_000962.3&enum=0&gene=NA&mut=G1364706A", + "genotype": [ + 0, + 1 + ], + "genotype_likelihoods": [ + -2360.5217287767423, + -145.4041778818164, + -3052.427673185898 + ], + "info": { + "coverage": { + "reference": { + "percent_coverage": 100.0, + "median_depth": 51, + "min_non_zero_depth": 47, + "kmer_count": 1017, + "klen": 21 + }, + "alternate": { + "percent_coverage": 100.0, + "median_depth": 42, + "min_non_zero_depth": 42, + "kmer_count": 848, + "klen": 21 + } + }, + "expected_depths": [ + 124 + ], + "contamination_depths": [], + "filter": [], + "conf": 2215 + }, + "_cls": "Call.VariantCall" + } + } + }, + "lineage3": { + "lineage3": { + "C3273107A": { + "variant": "ref-C3273107A?var_name=C3273107A&num_alts=1&ref=NC_000962.3&enum=0&gene=NA&mut=C3273107A", + "genotype": [ + 0, + 1 + ], + "genotype_likelihoods": [ + -3093.863597972382, + -19.52878016336399, + -3761.204834296007 + ], + "info": { + "coverage": { + "reference": { + "percent_coverage": 100.0, + "median_depth": 66, + "min_non_zero_depth": 64, + "kmer_count": 1323, + "klen": 21 + }, + "alternate": { + "percent_coverage": 100.0, + "median_depth": 58, + "min_non_zero_depth": 54, + "kmer_count": 1160, + "klen": 21 + } + }, + "expected_depths": [ + 124 + ], + "contamination_depths": [], + "filter": [], + "conf": 3074 + }, + "_cls": "Call.VariantCall" + } + } + } + } + } + }, + "kmer": 21, + "probe_sets": [ + "/home/pvh/miniconda3/envs/mykrobe/lib/python3.9/site-packages/mykrobe/data/tb/tb-species-170421.fasta.gz", + "/home/pvh/miniconda3/envs/mykrobe/lib/python3.9/site-packages/mykrobe/data/tb/tb-hunt-probe-set-jan-03-2019.fasta.gz", + "/home/pvh/miniconda3/envs/mykrobe/lib/python3.9/site-packages/mykrobe/data/tb/tb.lineage.20200930.probes.fa.gz" + ], + "files": [ + "../SRR6916544_1.fastq.gz", + "../SRR6916544_2.fastq.gz" + ], + "version": { + "mykrobe-predictor": "v0.10.0", + "mykrobe-atlas": "v0.10.0" + }, + "genotype_model": "kmer_count" + } +} \ No newline at end of file From 31942045685fc66adcd519abd6e37086d6f97244 Mon Sep 17 00:00:00 2001 From: Peter van Heusden Date: Fri, 15 Oct 2021 07:30:34 +0200 Subject: [PATCH 4/5] Fix merge conflict --- hAMRonization/hAMRonizedResult.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/hAMRonization/hAMRonizedResult.py b/hAMRonization/hAMRonizedResult.py index 88509c0..170cf5c 100644 --- a/hAMRonization/hAMRonizedResult.py +++ b/hAMRonization/hAMRonizedResult.py @@ -23,10 +23,6 @@ class hAMRonizedResult(): # variant specific optional fields genetic_variation_type: str = None # To be made mandatory? variant_frequency: float = None -<<<<<<< HEAD - genetic_variation_type: str = None -======= ->>>>>>> 92356c4049ced11cf3bdda965a207b4749c87836 nucleotide_mutation: str = None nucleotide_mutation_interpretation: str = None protein_mutation: str = None From cb76523eeb202850c0fcc7e35b0db1f4e8fc1f5f Mon Sep 17 00:00:00 2001 From: Peter van Heusden Date: Fri, 15 Oct 2021 07:36:51 +0200 Subject: [PATCH 5/5] Compute variant frequency --- hAMRonization/MykrobeIO.py | 8 ++++++-- test/test_sanity.py | 2 +- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/hAMRonization/MykrobeIO.py b/hAMRonization/MykrobeIO.py index f1ffc03..12e5e40 100644 --- a/hAMRonization/MykrobeIO.py +++ b/hAMRonization/MykrobeIO.py @@ -58,7 +58,8 @@ def __init__(self, source, metadata): 'nucleotide_mutation_interpretation': 'nucleotide_mutation_interpretation', 'protein_mutation_interpretation': 'protein_mutation_interpretation', 'coverage_percentage': 'coverage_percentage', - 'median_coverage_depth': 'coverage_depth' + 'median_coverage_depth': 'coverage_depth', + 'frequency': 'variant_frequency' } super().__init__(source, self.field_mapping, self.metadata) @@ -91,6 +92,8 @@ def parse(self, handle): gene_symbol = variant_match.group('gene_symbol') coverage_percentage = drug['called_by'][variant]['info']['coverage']['alternate']['percent_coverage'] median_coverage_depth = drug['called_by'][variant]['info']['coverage']['alternate']['median_depth'] + ref_median_coverage_depth = drug['called_by'][variant]['info']['coverage']['reference']['median_depth'] + frequency = median_coverage_depth / (median_coverage_depth + ref_median_coverage_depth) if len(variant_match.group('codon_from')) == 1: # this not a protein change @@ -115,6 +118,7 @@ def parse(self, handle): 'nucleotide_mutation_interpretation': None, 'protein_mutation_interpretation': None, 'coverage_percentage': coverage_percentage, - 'median_coverage_depth': median_coverage_depth + 'median_coverage_depth': median_coverage_depth, + 'frequency': frequency } yield self.hAMRonize(result, self.metadata) diff --git a/test/test_sanity.py b/test/test_sanity.py index 0f03675..c8738e7 100644 --- a/test/test_sanity.py +++ b/test/test_sanity.py @@ -637,7 +637,7 @@ def test_mykrobe(): assert result.coverage_percentage == 100 assert result.coverage_depth == 60 assert result.protein_mutation == "p.Ser450Leu" - # assert result.frequency == "" # TODO: this is not working yet + assert result.variant_frequency == 0.6 # TODO: confirm with tool authors that this is correct calculation # assert result.nucleotide_mutation == "" # TODO: this is not working yet # missing data in report