diff --git a/CSVConvert.py b/CSVConvert.py
index 81e5931..e30c1c7 100644
--- a/CSVConvert.py
+++ b/CSVConvert.py
@@ -31,14 +31,14 @@ def parse_args():
     return args
 
 
-# Combine dataframes from multiple sheets, delete any duplicate patients by merging data
 def process_data(raw_csv_dfs, identifier):
-    # for each dataframe, merge all occurrences for an identifier into a single row with arrayed values
+    """Takes a set of raw dataframes with a common identifier and merges into the internal JSON data structure."""
     final_merged = {}
     cols_index = {}
     individuals = []
 
     for page in raw_csv_dfs.keys():
+        print(f"Processing sheet {page}...")
         df = raw_csv_dfs[page].dropna(axis='index', how='all')\
             .dropna(axis='columns', how='all')\
             .applymap(str)\
@@ -99,9 +99,9 @@ def process_data(raw_csv_dfs, identifier):
 
 
 def map_row_to_mcodepacket(identifier, indexed_data, node):
-    # walk through the provided node of the mcodepacket and fill in the details
+    """Given a particular individual's data, and a node in the schema, return the node with mapped data."""
     if "str" in str(type(node)) and node != "":
-        return translate_mapping(identifier, indexed_data, node)
+        return eval_mapping(identifier, indexed_data, node)
     elif "list" in str(type(node)):
         new_node = []
         for item in node:
@@ -121,6 +121,7 @@ def map_row_to_mcodepacket(identifier, indexed_data, node):
 
 
 def translate_mapping(identifier, indexed_data, mapping):
+    """Given the identifier field, the data, and a particular mapping, figure out what the method and the mapped values are."""
     func_match = re.match(r".*\{(.+?)\((.+)\)\}.*", mapping)
     if func_match is not None:  # it's a function, prep the dictionary and exec it
         items = func_match.group(2).split(";")
@@ -146,16 +147,27 @@ def translate_mapping(identifier, indexed_data, mapping):
                         new_dict[item][sheet] = indexed_data["data"][sheet][identifier][item]
                     else:
                         new_dict[item][sheet] = []
+        return func_match.group(1), new_dict
+    return None, None
+
+
+def eval_mapping(identifier, indexed_data, node):
+    """Given the identifier field, the data, and a particular schema node, evaluate the mapping and return the final JSON for the node in the schema."""
+    method, mapping = translate_mapping(identifier, indexed_data, node)
+    if method is not None:
+        if "mappings" not in mappings.MODULES:
+            mappings.MODULES["mappings"] = importlib.import_module("mappings")
+        module = mappings.MODULES["mappings"]
         # is the function something in a dynamically-loaded module?
-        subfunc_match = re.match(r"(.+)\.(.+)", func_match.group(1))
+        subfunc_match = re.match(r"(.+)\.(.+)", method)
         if subfunc_match is not None:
             module = mappings.MODULES[subfunc_match.group(1)]
-            return eval(f'module.{subfunc_match.group(2)}({new_dict})')
-        return eval(f'mappings.{func_match.group(1)}({new_dict})')
+            method = subfunc_match.group(2)
+        return eval(f'module.{method}({mapping})')
 
 
-# Ingest either an excel file or a directory of csvs
 def ingest_raw_data(input_path, indexed):
+    """Ingest the csvs or xlsx and create dataframes for processing."""
     raw_csv_dfs = {}
     output_file = "mCodePacket"
     # input can either be an excel file or a directory of csvs
@@ -181,15 +193,15 @@ def ingest_raw_data(input_path, indexed):
     return raw_csv_dfs, output_file
 
 
-# Create a template for mcodepacket, for use with the --template flag
 def generate_mapping_template(node, node_name="", node_names=None):
+    """Create a template for mcodepacket, for use with the --template flag."""
     if node_names is None:
         node_names = []
     if node_name != "":
         # check to see if the last node_name is a header for this node_name:
         if len(node_names) > 0:
             x = node_names.pop()
-            x_match = re.match(r"\"(.+?)\**\",.*", x)
+            x_match = re.match(r"(.+?)\**,.*", x)
             if x_match is not None:
                 if x_match.group(1) in node_name:
                     node_names.append(f"##{x}")
@@ -198,9 +210,9 @@ def generate_mapping_template(node, node_name="", node_names=None):
             else:
                 node_names.append(x)
         if "description" in node:
-            node_names.append(f"\"{node_name}\",\"##{node['description']}\"")
+            node_names.append(f"{node_name},\"##{node['description']}\"")
         else:
-            node_names.append(f"\"{node_name}\",")
+            node_names.append(f"{node_name},")
     if "type" in node:
         if node["type"] == "string":
             return "string", node_names
@@ -222,9 +234,9 @@ def generate_mapping_template(node, node_name="", node_names=None):
                                       or node["$id"] == "katsu:mcode:complex_ontology"):
                     # add a + to the name of the node to denote that this needs to be looked up in an ontology
                     name = node_names.pop()
-                    name_match = re.match(r"\"(.+?)\"(.+)", name)
+                    name_match = re.match(r"(.+?),(.+)", name)
                     if name_match is not None:
-                        name = f"\"{name_match.group(1)}+\"{name_match.group(2)}"
+                        name = f"{name_match.group(1)}+,{name_match.group(2)}"
                     node_names.append(name)
                     return node["$id"], node_names
             if "properties" in node:
@@ -242,23 +254,31 @@ def generate_mapping_template(node, node_name="", node_names=None):
     return None, node_names
 
 
-# Given a mapping csv file, create a scaffold mapping.
+def process_mapping(line, test=False):
+    """Given a csv mapping line, process into its component pieces."""
+    line_match = re.match(r"(.+?),(.*$)", line.replace("\"", ""))
+    if line_match is not None:
+        element = line_match.group(1)
+        value = ""
+        if test:
+            value = "test"
+        if line_match.group(2) != "" and not line_match.group(2).startswith("##"):
+            value = line_match.group(2).replace(",", ";")
+        elems = element.replace("*", "").replace("+", "").split(".")
+        return value, elems
+    return line, None
+
+
 def create_mapping_scaffold(lines, test=False):
+    """Given lines from a mapping csv file, create a scaffold mapping."""
     props = {}
     for line in lines:
         if line.startswith("#"):
             continue
         if re.match(r"^\s*$", line):
             continue
-        line_match = re.match(r"(.+?),(.*$)", line.replace("\"", ""))
-        if line_match is not None:
-            element = line_match.group(1)
-            value = ""
-            if test:
-                value = "test"
-            if line_match.group(2) != "" and not line_match.group(2).startswith("##"):
-                value = line_match.group(2).replace(",", ";")
-            elems = element.replace("*", "").replace("+", "").split(".")
+        value, elems = process_mapping(line, test)
+        if elems is not None:
             x = elems.pop(0)
             if x not in props:
                 props[x] = []
@@ -295,6 +315,7 @@ def create_mapping_scaffold(lines, test=False):
 
 
 def load_manifest(mapping):
+    """Given a manifest file's path, return the data inside it."""
     identifier = None
     schema = "mcode"
     mapping_scaffold = None
@@ -313,9 +334,16 @@ def load_manifest(mapping):
         mapping_path = os.path.join(manifest_dir, manifest["mapping"])
         if os.path.isabs(manifest["mapping"]):
             mapping_path = manifest["mapping"]
+        mapping = []
         with open(mapping_path, 'r') as f:
             lines = f.readlines()
-            mapping_scaffold = create_mapping_scaffold(lines)
+            for line in lines:
+                if line.startswith("#"):
+                    continue
+                if re.match(r"^\s*$", line):
+                    continue
+                mapping.append(line)
+        mapping_scaffold = create_mapping_scaffold(mapping)
     if "functions" in manifest:
         for mod in manifest["functions"]:
             try:
@@ -337,6 +365,7 @@ def load_manifest(mapping):
         "identifier": identifier,
         "schema": schema,
         "scaffold": mapping_scaffold,
+        "mapping": mapping,
         "indexed": indexed
     }
 
diff --git a/README.md b/README.md
index 6cc5997..2ffab80 100644
--- a/README.md
+++ b/README.md
@@ -111,14 +111,15 @@ Prerequisites:
 You'll need to set up a free [account](https://bioportal.bioontology.org/account) at NCBI Bioportal to obtain an API key.
 
 ## Running from command line
-`$ python CSVConvert.py [-h] [--input INPUT] [--template TEMPLATE] [--mapping|manifest MAPPING]`
+```
+$ python CSVConvert.py [-h] [--input INPUT] [--template TEMPLATE] [--mapping|manifest MAPPING]
 
 --input: path to dataset to be converted to mCODE data model
 
 --template: If provided, generate a mapping template at the specified file (only needed if you are creating a new template sheet)
 
 --mapping or --manifest: Path to a manifest file describing the mapping
-
+```
 ## Testing
 Continuous Integration is implemented through Pytest and Travis CI which runs when git pushes occur. Build results can be found at [this repository's Travis build page](https://travis-ci.com/github/CanDIG/medidata_mCode_ETL)
 
@@ -129,6 +130,20 @@ To run tests manually, enter from command line `$ pytest`
 ## Creating a dummy json file for testing
 You can use a template file (created as described above with `--template`) alone to create a dummy ingest file without actual data. 
 
-`python create_test_mapping.py` creates a file at `mcode_template_testmap.json` that is filled in (without using mapping functions) with placeholder or dummy values. You can specify the placeholder value with the argument `--placeholder`.
+`python create_test_mapping.py` creates a JSON that is filled in (without using mapping functions) with placeholder or dummy values. You can specify the placeholder value with the argument `--placeholder`. If no template file is specified with `--template`, the current MCODE_SCHEMA of katsu is used and the JSON is outputted to stdout. Otherwise, the file is saved to `<template>_testmap.json`.
 
 This JSON file can be ingested into katsu and compared with the ingested value using https://github.com/CanDIG/candigv2-ingest/blob/main/katsu_validate_dataset.py.
+
+## Quantifying coverage for datasets and mappings
+The `quantify_coverage.py` tool takes the same arguments as `CSVConvert.py`:
+```
+$ python CSVConvert.py [-h] [--input INPUT] [--mapping|manifest MAPPING]
+
+--input: path to dataset
+
+--mapping or --manifest: Path to a manifest file describing the mapping
+```
+
+This tool outputs information quantifying:
+* how much of the schema is covered by the mapping
+* how much of the dataset is covered by the mapping
\ No newline at end of file
diff --git a/create_test_mapping.py b/create_test_mapping.py
index dbbd29e..ce4d800 100644
--- a/create_test_mapping.py
+++ b/create_test_mapping.py
@@ -1,10 +1,7 @@
 from copy import deepcopy
-import importlib.util
 import json
 import os
 import re
-import yaml
-from datetime import datetime
 from CSVConvert import create_mapping_scaffold, generate_mapping_template
 import argparse
 from chord_metadata_service.mcode.schemas import MCODE_SCHEMA
@@ -12,7 +9,7 @@
 
 def parse_args():
     parser = argparse.ArgumentParser()
-    parser.add_argument('--template', type=str, default="mcode_template.csv", help="Path to a template mapping file.")
+    parser.add_argument('--template', type=str, help="Path to a template mapping file.")
     parser.add_argument('--placeholder', type=str, default="abcd", help="Value for placeholder strings.")
     args = parser.parse_args()
     return args
@@ -90,27 +87,26 @@ def pick_value_for_node(placeholder_val, node, schema):
 
 def main(args):
     template = args.template
-    schema, nn = generate_mapping_template(MCODE_SCHEMA)
-    # print(json.dumps(MCODE_SCHEMA, indent=4))
     if template is not None:
         with open(template, 'r') as f:
-            lines = f.readlines()
-            mapping_scaffold = create_mapping_scaffold(lines, test=True)
-            # print(json.dumps(mapping_scaffold, indent=4))
-        if mapping_scaffold is None:
-            print("No mapping scaffold was loaded. Either katsu was not found or no schema was specified.")
-            return
+            mapping = f.readlines()
     else:
-        print("A manifest file is required, using the --manifest argument")
+        schema, mapping = generate_mapping_template(MCODE_SCHEMA)
+    mapping_scaffold = create_mapping_scaffold(mapping, test=True)
+    # print(json.dumps(mapping_scaffold, indent=4))
+    if mapping_scaffold is None:
+        print("No mapping scaffold was loaded. Either katsu was not found or no schema was specified.")
         return
 
-    output_file, ext = os.path.splitext(template)
-
     mcodepackets = [map_to_mcodepacket(args.placeholder, deepcopy(mapping_scaffold), MCODE_SCHEMA)]
 
-    with open(f"{output_file}_testmap.json", 'w') as f:    # write to json file for ingestion
-        json.dump(mcodepackets, f, indent=4)
-    print(f"Test mapping saved as {output_file}_testmap.json")
+    if template is not None:
+        output_file, ext = os.path.splitext(template)
+        with open(f"{output_file}_testmap.json", 'w') as f:    # write to json file for ingestion
+            json.dump(mcodepackets, f, indent=4)
+        print(f"Test mapping saved as {output_file}_testmap.json")
+    else:
+        print(json.dumps(mcodepackets, indent=4))
 
 if __name__ == '__main__':
     main(parse_args())
diff --git a/mappings.py b/mappings.py
index 6885f36..9896a5e 100644
--- a/mappings.py
+++ b/mappings.py
@@ -97,3 +97,15 @@ def is_null(cell):
     if cell == 'nan' or cell is None or cell == '':
         return True
     return False
+
+# Placeholder function to make a fake ontology entry
+def ontology_placeholder(mapping):
+    if "str" in str(type(mapping)):
+        return {
+            "id": "placeholder",
+            "label": mapping
+        }
+    return {
+        "id": "placeholder",
+        "label": single_val(mapping)
+    }
\ No newline at end of file
diff --git a/mcode_template.csv b/mcode_template.csv
index 47cbbe2..78aa1f2 100644
--- a/mcode_template.csv
+++ b/mcode_template.csv
@@ -1,118 +1,124 @@
-## schema based on version 2.7.0,
-## directly checked out from https://github.com/CanDIG/katsu.git, commit 6d58fd7225c1c91d8cf428135c4df757cff3003d
+## schema based on version 2.11.0,
+## directly checked out from https://github.com/CanDIG/katsu.git, commit 7d252e9e7ea6cabc77b265946c1e880b7e766dd0
 ## mcodepacket element, description (overwrite with mapped element)
 ## (.0 is an array element) (* is required) (+ denotes ontology term),
-"id","##An arbitrary identifier for the mcodepacket."
-##"subject","##An individual who is a subject of mcodepacket."
-"subject.id*","##A unique researcher-specified identifier for an individual."
-##"subject.alternate_ids","##A list of alternative identifiers for an individual."
-"subject.alternate_ids.0","##One of possibly many alternative identifiers for an individual."
-"subject.date_of_birth","##A timestamp representing an individual's date of birth; either exactly or imprecisely."
-"subject.age","##The age or age range of the individual."
-"subject.sex","##The phenotypic sex of an individual, as would be determined by a midwife or physician at birth."
-"subject.karyotypic_sex","##The karyotypic sex of an individual."
-"subject.taxonomy+","##An ontology term specified when more than one organism may be studied. It is advised that codesfrom the NCBI Taxonomy resource are used, e.g. NCBITaxon:9606 for humans."
-"subject.active","##Whether a patient's record is in active use."
-"subject.deceased","##Whether a patient is deceased."
-"subject.race","##A code for a person's race (mCode)."
-"subject.ethnicity","##A code for a person's ethnicity (mCode)."
-##"subject.comorbid_condition","##One or more conditions that occur with primary condition."
-"subject.comorbid_condition.clinical_status+","##An ontology term."
-"subject.comorbid_condition.code+","##An ontology term."
-"subject.ecog_performance_status+","##Value representing the Eastern Cooperative Oncology Group performance status."
-"subject.karnofsky+","##Value representing the Karnofsky Performance status."
-"subject.extra_properties",
-##"genomics_report","##A genomics report associated with an Individual."
-"genomics_report.id*","##An arbitrary identifier for the genetics report."
-"genomics_report.code*+","##An ontology or controlled vocabulary term to identify the laboratory test. Accepted value sets: LOINC, GTR."
-"genomics_report.performing_organization_name","##The name of the organization  producing the genomics report."
-"genomics_report.issued*","##The date/time this report was issued."
-##"genomics_report.genetic_specimen","##List of related genetic specimens."
-##"genomics_report.genetic_specimen.0","##Class to describe a biosample used for genomics testing or analysis."
-"genomics_report.genetic_specimen.0.id*","##An arbitrary identifier for the genetic specimen."
-"genomics_report.genetic_specimen.0.specimen_type*+","##The kind of material that forms the specimen."
-"genomics_report.genetic_specimen.0.collection_body+","##The anatomical collection site."
-"genomics_report.genetic_specimen.0.laterality+","##Body side of the collection site, if needed to distinguish from a similar location on the other side of the body."
-"genomics_report.genetic_specimen.0.extra_properties",
-##"genomics_report.genetic_variant","##Related genetic variant."
-"genomics_report.genetic_variant.id*","##An arbitrary identifier for the cancer genetic variant."
-"genomics_report.genetic_variant.data_value+","##The overall result of the genetic test; specifically, whether a variant is present, absent, no call, or indeterminant."
-"genomics_report.genetic_variant.method+","##The method used to perform the genetic test."
-"genomics_report.genetic_variant.amino_acid_change+","##The symbolic representation of an amino acid variant reported using HGVS nomenclature (pHGVS)."
-"genomics_report.genetic_variant.amino_acid_change_type+","##The type of change related to the amino acid variant."
-"genomics_report.genetic_variant.cytogenetic_location","##The cytogenetic (chromosome) location."
-"genomics_report.genetic_variant.cytogenetic_nomenclature+","##The cytogenetic (chromosome) location, represented using the International System for Human Cytogenetic Nomenclature (ISCN)."
-##"genomics_report.genetic_variant.gene_studied","##A gene targeted for mutation analysis, identified in HUGO Gene Nomenclature Committee (HGNC) notation."
-"genomics_report.genetic_variant.gene_studied.0",
-"genomics_report.genetic_variant.genomic_dna_change+","##The symbolic representation of a genetic structural variant reported using HGVS nomenclature (gHGVS)."
-"genomics_report.genetic_variant.genomic_source_class+","##The genomic class of the specimen being analyzed, for example, germline for inherited genome, somatic for cancer genome, and prenatal for fetal genome."
-##"genomics_report.genetic_variant.variation_code","##The variation ID assigned by ClinVar."
-"genomics_report.genetic_variant.variation_code.0+","##An ontology term."
-"genomics_report.genetic_variant.extra_properties",
-##"genomics_report.genomic_region_studied","##Related genomic region studied."
-"genomics_report.genomic_region_studied.id*","##An arbitrary identifier for the genomic region studied."
-##"genomics_report.genomic_region_studied.dna_ranges_examined","##The range(s) of the DNA sequence examined."
-"genomics_report.genomic_region_studied.dna_ranges_examined.0+","##An ontology term."
-##"genomics_report.genomic_region_studied.dna_region_description","##The description for the DNA region studied in the genomics report."
-"genomics_report.genomic_region_studied.dna_region_description.0",
-##"genomics_report.genomic_region_studied.gene_mutation","##The gene mutations tested for in blood or tissue by molecular genetics methods."
-"genomics_report.genomic_region_studied.gene_mutation.0+","##An ontology term."
-##"genomics_report.genomic_region_studied.gene_studied","##The ID for the gene studied."
-"genomics_report.genomic_region_studied.gene_studied.0+","##An ontology term."
-"genomics_report.genomic_region_studied.genomic_reference_sequence_id","##Range(s) of DNA sequence examined."
-"genomics_report.genomic_region_studied.genomic_region_coordinate_system+","##The method of counting along the genome."
-"genomics_report.genomic_region_studied.extra_properties",
-"genomics_report.extra_properties",
-##"cancer_condition","##An Individual's cancer condition."
-"cancer_condition.id*","##An arbitrary identifier for the labs/vital tests."
-"cancer_condition.condition_type*",
-##"cancer_condition.body_site","##Ontology class list"
-"cancer_condition.body_site.0+","##An ontology term."
-"cancer_condition.clinical_status+","##An ontology term."
-"cancer_condition.code*+","##An ontology term."
-"cancer_condition.date_of_diagnosis",
-"cancer_condition.histology_morphology_behavior+","##An ontology term."
-##"cancer_condition.tnm_staging",
-##"cancer_condition.tnm_staging.0","##A description of the cancer spread in a patient's body."
-"cancer_condition.tnm_staging.0.id*","##An arbitrary identifier for the TNM staging."
-"cancer_condition.tnm_staging.0.tnm_type*","##TNM type: clinical or pathological."
-"cancer_condition.tnm_staging.0.stage_group*+","##The extent of the cancer in the body, according to the TNM classification system.Accepted ontologies: SNOMED CT, AJCC and others."
-"cancer_condition.tnm_staging.0.primary_tumor_category*+","##Category of the primary tumor, based on its size and extent. Accepted ontologies: SNOMED CT, AJCC and others."
-"cancer_condition.tnm_staging.0.regional_nodes_category*+","##Category of the presence or absence of metastases in regional lymph nodes. Accepted ontologies: SNOMED CT, AJCC and others."
-"cancer_condition.tnm_staging.0.distant_metastases_category*+","##Category describing the presence or absence of metastases in remote anatomical locations. Accepted ontologies: SNOMED CT, AJCC and others."
-"cancer_condition.tnm_staging.0.cancer_condition*","##Cancer condition."
-"cancer_condition.tnm_staging.0.extra_properties",
-"cancer_condition.extra_properties",
-##"cancer_related_procedures","##A radiological or surgical procedures addressing a cancer condition."
-"cancer_related_procedures.id*","##An arbitrary identifier for the procedure."
-"cancer_related_procedures.procedure_type*","##Type of cancer related procedure: radiation or surgical."
-"cancer_related_procedures.code*+","##Code for the procedure performed."
-##"cancer_related_procedures.body_site","##The body location(s) where the procedure was performed."
-"cancer_related_procedures.body_site.0+","##An ontology term."
-"cancer_related_procedures.laterality+","##Body side of the body location, if needed to distinguish from a similar location on the other side of the body."
-"cancer_related_procedures.treatment_intent+","##The purpose of a treatment."
-"cancer_related_procedures.reason_code+","##The explanation or justification for why the surgical procedure was performed."
-##"cancer_related_procedures.reason_reference","##Reference to a primary or secondary cancer condition."
-"cancer_related_procedures.reason_reference.0",
-"cancer_related_procedures.extra_properties",
-##"medication_statement","##Medication treatment addressed to an Individual."
-##"medication_statement.0","##Description of medication use."
-"medication_statement.0.id*","##An arbitrary identifier for the medication statement."
-"medication_statement.0.medication_code*+","##A code for medication. Accepted code systems: Medication Clinical Drug (RxNorm) and other."
-##"medication_statement.0.termination_reason","##A code explaining unplanned or premature termination of a course of medication. Accepted ontologies: SNOMED CT."
-"medication_statement.0.termination_reason.0+","##An ontology term."
-"medication_statement.0.treatment_intent+","##The purpose of a treatment. Accepted ontologies: SNOMED CT."
-"medication_statement.0.start_date","##The start date/time of the medication."
-"medication_statement.0.end_date","##The end date/time of the medication."
-"medication_statement.0.extra_properties",
-"date_of_death","##An indication that the patient is no longer living, given by a date of death or boolean."
-##"tumor_marker",
-##"tumor_marker.0","##A description of tests performed on patient."
-"tumor_marker.0.id*","##An arbitrary identifier for the labs/vital tests."
-"tumor_marker.0.individual*","##The individual who is the subject of the tests."
-"tumor_marker.0.tumor_marker_code*+","##A code identifying the type of tumor marker test."
-##"tumor_marker.0.tumor_marker_data_value","##The result of a tumor marker test."
-"tumor_marker.0.tumor_marker_data_value.value",
-"tumor_marker.0.extra_properties",
-"cancer_disease_status+","##A clinician's qualitative judgment on the current trend of the cancer, e.g., whether it is stable, worsening (progressing), or improving (responding)."
-"extra_properties",
+id,"##An arbitrary identifier for the mcodepacket."
+##subject,"##An individual who is a subject of mcodepacket."
+subject.id*,"##A unique researcher-specified identifier for an individual."
+##subject.alternate_ids,"##A list of alternative identifiers for an individual."
+subject.alternate_ids.0,"##One of possibly many alternative identifiers for an individual."
+subject.date_of_birth,"##A timestamp representing an individual's date of birth; either exactly or imprecisely."
+subject.age,"##The age or age range of the individual."
+subject.sex,"##The phenotypic sex of an individual, as would be determined by a midwife or physician at birth."
+subject.karyotypic_sex,"##The karyotypic sex of an individual."
+subject.taxonomy+,"##An ontology term specified when more than one organism may be studied. It is advised that codesfrom the NCBI Taxonomy resource are used, e.g. NCBITaxon:9606 for humans."
+subject.active,"##Whether a patient's record is in active use."
+subject.deceased,"##Whether a patient is deceased."
+subject.race,"##A code for a person's race (mCode)."
+subject.ethnicity,"##A code for a person's ethnicity (mCode)."
+##subject.comorbid_condition,"##One or more conditions that occur with primary condition."
+subject.comorbid_condition.clinical_status+,"##An ontology term."
+subject.comorbid_condition.code+,"##An ontology term."
+subject.ecog_performance_status+,"##Value representing the Eastern Cooperative Oncology Group performance status."
+subject.karnofsky+,"##Value representing the Karnofsky Performance status."
+subject.extra_properties,
+##genomics_report,"##A genomics report associated with an Individual."
+genomics_report.id*,"##An arbitrary identifier for the genetics report."
+genomics_report.code*+,"##An ontology or controlled vocabulary term to identify the laboratory test. Accepted value sets: LOINC, GTR."
+genomics_report.performing_organization_name,"##The name of the organization  producing the genomics report."
+genomics_report.issued*,"##The date/time this report was issued."
+##genomics_report.genetic_specimen,"##List of related genetic specimens."
+##genomics_report.genetic_specimen.0,"##Class to describe a biosample used for genomics testing or analysis."
+genomics_report.genetic_specimen.0.id*,"##An arbitrary identifier for the genetic specimen."
+genomics_report.genetic_specimen.0.specimen_type*+,"##The kind of material that forms the specimen."
+genomics_report.genetic_specimen.0.collection_body+,"##The anatomical collection site."
+genomics_report.genetic_specimen.0.laterality+,"##Body side of the collection site, if needed to distinguish from a similar location on the other side of the body."
+genomics_report.genetic_specimen.0.extra_properties,
+##genomics_report.genetic_variant,"##Related genetic variant."
+genomics_report.genetic_variant.id*,"##An arbitrary identifier for the cancer genetic variant."
+genomics_report.genetic_variant.data_value+,"##The overall result of the genetic test; specifically, whether a variant is present, absent, no call, or indeterminant."
+genomics_report.genetic_variant.method+,"##The method used to perform the genetic test."
+genomics_report.genetic_variant.amino_acid_change+,"##The symbolic representation of an amino acid variant reported using HGVS nomenclature (pHGVS)."
+genomics_report.genetic_variant.amino_acid_change_type+,"##The type of change related to the amino acid variant."
+genomics_report.genetic_variant.cytogenetic_location,"##The cytogenetic (chromosome) location."
+genomics_report.genetic_variant.cytogenetic_nomenclature+,"##The cytogenetic (chromosome) location, represented using the International System for Human Cytogenetic Nomenclature (ISCN)."
+##genomics_report.genetic_variant.gene_studied,"##A gene targeted for mutation analysis, identified in HUGO Gene Nomenclature Committee (HGNC) notation."
+##genomics_report.genetic_variant.gene_studied.0,"##A representation of an identifier for a gene."
+genomics_report.genetic_variant.gene_studied.0.id*,"##Official identifier of the gene. It SHOULD be a CURIE identifier with a prefix used by the official organism gene nomenclature committee, e.g. HGNC:347 for humans."
+##genomics_report.genetic_variant.gene_studied.0.alternate_ids,"##A list of identifiers for alternative resources where the gene is used or catalogued."
+genomics_report.genetic_variant.gene_studied.0.alternate_ids.0,"##An alternative identifier from a resource where the gene is used or catalogued."
+genomics_report.genetic_variant.gene_studied.0.symbol*,"##A gene's official gene symbol as designated by the organism's gene nomenclature committee, e.g. ETF1 from the HUGO Gene Nomenclature committee."
+genomics_report.genetic_variant.gene_studied.0.extra_properties,
+genomics_report.genetic_variant.genomic_dna_change+,"##The symbolic representation of a genetic structural variant reported using HGVS nomenclature (gHGVS)."
+genomics_report.genetic_variant.genomic_source_class+,"##The genomic class of the specimen being analyzed, for example, germline for inherited genome, somatic for cancer genome, and prenatal for fetal genome."
+##genomics_report.genetic_variant.variation_code,"##The variation ID assigned by ClinVar."
+genomics_report.genetic_variant.variation_code.0+,"##An ontology term."
+genomics_report.genetic_variant.extra_properties,
+##genomics_report.genomic_region_studied,"##Related genomic region studied."
+genomics_report.genomic_region_studied.id*,"##An arbitrary identifier for the genomic region studied."
+##genomics_report.genomic_region_studied.dna_ranges_examined,"##The range(s) of the DNA sequence examined."
+genomics_report.genomic_region_studied.dna_ranges_examined.0+,"##An ontology term."
+##genomics_report.genomic_region_studied.dna_region_description,"##The description for the DNA region studied in the genomics report."
+genomics_report.genomic_region_studied.dna_region_description.0,
+##genomics_report.genomic_region_studied.gene_mutation,"##The gene mutations tested for in blood or tissue by molecular genetics methods."
+genomics_report.genomic_region_studied.gene_mutation.0+,"##An ontology term."
+##genomics_report.genomic_region_studied.gene_studied,"##The ID for the gene studied."
+genomics_report.genomic_region_studied.gene_studied.0+,"##An ontology term."
+genomics_report.genomic_region_studied.genomic_reference_sequence_id,"##Range(s) of DNA sequence examined."
+genomics_report.genomic_region_studied.genomic_region_coordinate_system+,"##The method of counting along the genome."
+genomics_report.genomic_region_studied.extra_properties,
+genomics_report.extra_properties,
+##cancer_condition,"##An Individual's cancer condition."
+cancer_condition.id*,"##An arbitrary identifier for the labs/vital tests."
+cancer_condition.condition_type*,
+##cancer_condition.body_site,"##Ontology class list"
+cancer_condition.body_site.0+,"##An ontology term."
+cancer_condition.clinical_status+,"##An ontology term."
+cancer_condition.code*+,"##An ontology term."
+cancer_condition.date_of_diagnosis,
+cancer_condition.histology_morphology_behavior+,"##An ontology term."
+##cancer_condition.tnm_staging,
+##cancer_condition.tnm_staging.0,"##A description of the cancer spread in a patient's body."
+cancer_condition.tnm_staging.0.id*,"##An arbitrary identifier for the TNM staging."
+cancer_condition.tnm_staging.0.tnm_type*,"##TNM type: clinical or pathological."
+cancer_condition.tnm_staging.0.stage_group*+,"##The extent of the cancer in the body, according to the TNM classification system.Accepted ontologies: SNOMED CT, AJCC and others."
+cancer_condition.tnm_staging.0.primary_tumor_category*+,"##Category of the primary tumor, based on its size and extent. Accepted ontologies: SNOMED CT, AJCC and others."
+cancer_condition.tnm_staging.0.regional_nodes_category*+,"##Category of the presence or absence of metastases in regional lymph nodes. Accepted ontologies: SNOMED CT, AJCC and others."
+cancer_condition.tnm_staging.0.distant_metastases_category*+,"##Category describing the presence or absence of metastases in remote anatomical locations. Accepted ontologies: SNOMED CT, AJCC and others."
+cancer_condition.tnm_staging.0.cancer_condition*,"##Cancer condition."
+cancer_condition.tnm_staging.0.extra_properties,
+cancer_condition.extra_properties,
+##cancer_related_procedures,"##A radiological or surgical procedures addressing a cancer condition."
+##cancer_related_procedures.0,"##Description of radiological treatment or surgical action addressing a cancer condition."
+cancer_related_procedures.0.id*,"##An arbitrary identifier for the procedure."
+cancer_related_procedures.0.procedure_type*,"##Type of cancer related procedure: radiation or surgical."
+cancer_related_procedures.0.code*+,"##Code for the procedure performed."
+##cancer_related_procedures.0.body_site,"##The body location(s) where the procedure was performed."
+cancer_related_procedures.0.body_site.0+,"##An ontology term."
+cancer_related_procedures.0.laterality+,"##Body side of the body location, if needed to distinguish from a similar location on the other side of the body."
+cancer_related_procedures.0.treatment_intent+,"##The purpose of a treatment."
+cancer_related_procedures.0.reason_code+,"##The explanation or justification for why the surgical procedure was performed."
+##cancer_related_procedures.0.reason_reference,"##Reference to a primary or secondary cancer condition."
+cancer_related_procedures.0.reason_reference.0,
+cancer_related_procedures.0.extra_properties,
+##medication_statement,"##Medication treatment addressed to an Individual."
+##medication_statement.0,"##Description of medication use."
+medication_statement.0.id*,"##An arbitrary identifier for the medication statement."
+medication_statement.0.medication_code*+,"##A code for medication. Accepted code systems: Medication Clinical Drug (RxNorm) and other."
+##medication_statement.0.termination_reason,"##A code explaining unplanned or premature termination of a course of medication. Accepted ontologies: SNOMED CT."
+medication_statement.0.termination_reason.0+,"##An ontology term."
+medication_statement.0.treatment_intent+,"##The purpose of a treatment. Accepted ontologies: SNOMED CT."
+medication_statement.0.start_date,"##The start date/time of the medication."
+medication_statement.0.end_date,"##The end date/time of the medication."
+medication_statement.0.extra_properties,
+date_of_death,"##An indication that the patient is no longer living, given by a date of death or boolean."
+##tumor_marker,"##A description of tests performed on patient."
+##tumor_marker.0,"##A description of tests performed on patient."
+tumor_marker.0.id*,"##An arbitrary identifier for the labs/vital tests."
+tumor_marker.0.individual*,"##The individual who is the subject of the tests."
+tumor_marker.0.tumor_marker_code*+,"##A code identifying the type of tumor marker test."
+##tumor_marker.0.tumor_marker_data_value,"##The result of a tumor marker test."
+tumor_marker.0.tumor_marker_data_value.value,
+tumor_marker.0.extra_properties,
+cancer_disease_status+,"##A clinician's qualitative judgment on the current trend of the cancer, e.g., whether it is stable, worsening (progressing), or improving (responding)."
+extra_properties,
diff --git a/requirements.txt b/requirements.txt
index a67d1ee..9cc5f5f 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -5,4 +5,5 @@ pytest~=6.2.5
 katsu@git+https://github.com/CanDIG/katsu.git@develop
 PyYAML~=5.4.1
 dateparser~=1.1.0
-openpyxl~=3.0.9
\ No newline at end of file
+openpyxl~=3.0.9
+jsoncomparison~=1.1.0
\ No newline at end of file
diff --git a/test_data_ingest.py b/test_data_ingest.py
index 26c80cb..84c5dcc 100644
--- a/test_data_ingest.py
+++ b/test_data_ingest.py
@@ -5,29 +5,29 @@
 indexed_data = CSVConvert.process_data(raw_csvs, "Subject")
 
 def test_single_val():
-    test = CSVConvert.translate_mapping("ABC-01-03", indexed_data, "{single_val(THER_TX_NAME)}")
+    test = CSVConvert.eval_mapping("ABC-01-03", indexed_data, "{single_val(THER_TX_NAME)}")
     assert test == "IRINOTECAN,IRINOTECAN"
 
 
 def test_date():
-    test = CSVConvert.translate_mapping("ABC-01-03", indexed_data, "{single_date(DTH_DT_RAW)}")
+    test = CSVConvert.eval_mapping("ABC-01-03", indexed_data, "{single_date(DTH_DT_RAW)}")
     assert test == "2023-09-25"
 
 
 def test_list_val():
-    test = CSVConvert.translate_mapping("ABC-01-05", indexed_data, "{list_val(DTH_DT_RAW)}")
+    test = CSVConvert.eval_mapping("ABC-01-05", indexed_data, "{list_val(DTH_DT_RAW)}")
     assert len(test) == 2
 
-    test = CSVConvert.translate_mapping("ABC-01-03", indexed_data, "{list_val(THER_TX_NAME)}")
+    test = CSVConvert.eval_mapping("ABC-01-03", indexed_data, "{list_val(THER_TX_NAME)}")
     assert test == ['IRINOTECAN,IRINOTECAN', 'IRINOTECAN,IRINOTECAN']
 
-    test = CSVConvert.translate_mapping("ABC-01-03", indexed_data, "{flat_list_val(THER_TX_NAME)}")
+    test = CSVConvert.eval_mapping("ABC-01-03", indexed_data, "{flat_list_val(THER_TX_NAME)}")
     assert test == ['IRINOTECAN', 'IRINOTECAN', 'IRINOTECAN', 'IRINOTECAN']
 
 
 def test_multiple_sheet_val():
-    test = CSVConvert.translate_mapping("ABC-01-04", indexed_data, "{single_val(Diagnosis.DTH_DT_RAW)}")
+    test = CSVConvert.eval_mapping("ABC-01-04", indexed_data, "{single_val(Diagnosis.DTH_DT_RAW)}")
     assert test == '2024 OCT 25'
 
-    test = CSVConvert.translate_mapping("ABC-01-04", indexed_data, "{list_val(DTH_DT_RAW)}")
+    test = CSVConvert.eval_mapping("ABC-01-04", indexed_data, "{list_val(DTH_DT_RAW)}")
     assert test == ['2024 OCT 25', '2023 SEP 25']
diff --git a/validate_coverage.py b/validate_coverage.py
new file mode 100644
index 0000000..f9814b6
--- /dev/null
+++ b/validate_coverage.py
@@ -0,0 +1,217 @@
+import argparse
+import json
+import re
+from CSVConvert import ingest_raw_data, process_data, load_manifest, translate_mapping, process_mapping, generate_mapping_template, map_row_to_mcodepacket, create_mapping_scaffold
+from create_test_mapping import map_to_mcodepacket
+from chord_metadata_service.mcode.schemas import MCODE_SCHEMA
+from copy import deepcopy
+from jsoncomparison import Compare
+
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--mapping', '--manifest', type=str, help="Path to a manifest file describing the mapping.")
+    parser.add_argument('--input', type=str, help="Clinical data for mapping.")
+    args = parser.parse_args()
+    return args
+
+
+def clean_compare(compare, expected, actual):
+    new_compare = {}
+    for key in compare:
+        if "_message" in compare[key]:
+            if "Values not equal." not in compare[key]["_message"]:
+                new_compare[key] = compare[key]
+                if "Key does not exists." in compare[key]["_message"]:
+                    new_compare.pop(key)
+        elif "_content" in compare[key]:
+            check = clean_compare(compare[key]["_content"][0], expected[key][0], actual[key][0])
+            if len(check) > 0:
+                new_compare[key] = check
+        else:
+            check = clean_compare(compare[key], expected[key], actual[key])
+            if len(check) > 0:
+                new_compare[key] = check
+    return new_compare
+
+
+def flatten_mapping(node, node_name="", node_names=None):
+    if node_names is None:
+        node_names = []
+    if node_name != "":
+        node_names.append(node_name)
+    if "list" in str(type(node)) and len(node) > 0:
+        new_node_name = ".".join((node_name, "0"))
+        sc, nn = flatten_mapping(node[0], new_node_name, node_names)
+        return [sc], nn
+    elif "dict" in str(type(node)):
+        scaffold = {}
+        for prop in node.keys():
+            if node_name == "":
+                new_node_name = prop
+            else:
+                new_node_name = ".".join((node_name, prop))
+            scaffold[prop], node_names = flatten_mapping(node[prop], new_node_name, node_names)
+        return scaffold, node_names
+    else:
+        return "string", node_names
+    return None, node_names
+
+
+def main(args):
+    input_path = args.input
+    mapping = args.mapping
+    
+    # if mapping is provided, we should create a mapping scaffold
+    if mapping is not None:
+        manifest = load_manifest(mapping)
+        identifier = manifest["identifier"]
+        schema = manifest["schema"]
+        scaffold = manifest["scaffold"]
+        indexed = manifest["indexed"]
+        mapping = manifest["mapping"]
+        if identifier is None:
+            print("Need to specify what the main identifier column name is in the manifest file")
+            return
+        if scaffold is None:
+            print("No mapping scaffold was loaded. Either katsu was not found or no schema was specified.")
+            return
+    else:
+        print("A manifest file is required, using the --manifest argument")
+        return
+
+    if input_path is not None:
+        raw_csv_dfs, output_file = ingest_raw_data(input_path, indexed)
+        if not raw_csv_dfs:
+            print(f"No ingestable files (csv or xlsx) were found at {input_path}")
+            return
+    else:
+        print("An input file (or directory of input csvs) is required, using the --input argument")
+        return
+
+    indexed_data = process_data(raw_csv_dfs, identifier)
+
+    # actual mapping
+    key = indexed_data["individuals"][0]
+    actual = map_row_to_mcodepacket(key, indexed_data, scaffold)
+
+    # test mapping
+    schema, expected_flattened = generate_mapping_template(MCODE_SCHEMA)
+    expected = map_to_mcodepacket(key, create_mapping_scaffold(expected_flattened, test=True), MCODE_SCHEMA)
+
+    # compare the actual mapping and report any mismatches
+    compare = clean_compare(Compare().check(expected, actual), expected, actual)
+    if len(compare.keys()) > 0:
+        print("\n\nSome items in the mapping do not match the schema:")
+        print(json.dumps(compare, indent=4))
+
+    # quantify mapping coverage of all data sheets
+    all_sheets = list(indexed_data["columns"][identifier]) # identifier is in all sheets
+
+    mappings = set()
+    for line in mapping:
+        value, elems = process_mapping(line)
+        method, mapping = translate_mapping(identifier, indexed_data, value)
+        if mapping is not None:
+            for col in mapping.keys():
+                mappings.add(json.dumps({col: list(mapping[col].keys())}))
+    vals = []
+    for mapping in mappings:
+        mapping = json.loads(mapping)
+        column = list(mapping.keys())[0]
+        if column != identifier:
+            vals.append({"column": column, "sheets": mapping[column]})
+    vals_by_sheets = sorted(vals, key=(lambda x: len(x["sheets"])))
+
+    accessed_sheets = {}
+    for val in vals_by_sheets:
+        sheet = val["sheets"].pop()
+        if len(val["sheets"]) == 0: # only one value, so we need it for sure:
+            if sheet not in accessed_sheets:
+                accessed_sheets[sheet] = [val['column']]
+            elif val['column'] not in accessed_sheets[sheet]:
+                accessed_sheets[sheet].append(val['column'])
+        elif len(val["sheets"]) > 0: # look for this column name in existing sheets
+            while len(val["sheets"]) > 0:
+                if sheet in accessed_sheets:
+                    break
+                sheet = val["sheets"].pop()
+            if len(val["sheets"]) == 0: # it's not in any of those, so add the col to the last sheet seen
+                accessed_sheets[sheet] = [val['column']]
+    # print(json.dumps(accessed_sheets, indent=4))
+    print("\n\nMapping coverage of the clinical data provided:")
+    print("Sheet\tColumns used\tTotal columns (not including identifier)")
+    for sheet in all_sheets:
+        subject = list(indexed_data["data"][sheet].keys())[0]
+        data = list(indexed_data["data"][sheet][subject].keys())
+        data.remove(identifier)
+        cols_used = 0
+        if sheet in accessed_sheets:
+            if "index" in accessed_sheets[sheet]: # don't count index; it's not a real column
+                accessed_sheets[sheet].remove("index")
+            cols_used = len(accessed_sheets[sheet])
+        print(f"{sheet}\t{cols_used}\t{len(data)}")
+
+    # look for missing fields from the schema
+    # create an actual mapping of all items used for any individual
+    items_used = []
+    for key in indexed_data["individuals"]:
+        sc, actual_flattened = flatten_mapping(map_row_to_mcodepacket(key, indexed_data, scaffold))
+        for i in range(0, len(actual_flattened)):
+            curr_item = actual_flattened[i]
+            if curr_item not in items_used:
+                # if this is not the first item, file it in the spot it goes in the order:
+                if i > 0:
+                    prev_item = actual_flattened[i-1]
+                    prev_index = items_used.index(prev_item)
+                    items_used.insert(prev_index+1, curr_item)
+                else:
+                    items_used.insert(0, curr_item)    
+
+    # print the actual items used:
+    print("Items successfully mapped onto the schema:")
+    print("\n".join(items_used))
+
+    missing = []
+
+    print("\n\nMapping is missing the following items from the schema:")
+    actual = items_used.pop(0)
+    while len(items_used) > 0:
+        if len(expected_flattened) == 0:
+            break
+        expected = expected_flattened.pop(0)
+
+        # skip any extra_properties, because these are definitely not needed
+        while "extra_properties" in expected and len(expected_flattened) > 0:
+            expected = expected_flattened.pop(0)
+        if len(expected_flattened) == 0:
+            break
+
+        while "extra_properties" in actual and len(items_used) > 0:
+            actual = items_used.pop(0)
+        if len(items_used) == 0:
+            break
+
+        patt = re.compile(f"^(##)*{actual}([\*\+])*,.*")
+        expected_match = re.match(patt, expected)
+        if expected_match is not None:
+            # print(f"++{actual}, {expected}")
+            if expected_match.group(2) == "+":
+                # need to pop the next two actuals
+                actual = items_used.pop(0)
+                actual = items_used.pop(0)
+            if len(items_used) > 0:
+                actual = items_used.pop(0)
+            else:
+                break
+        else:
+            # print(f"--{actual}, {expected}")
+            comment_match = re.match(r"^(##)*(.*?)(,.*)", expected)
+            if comment_match is not None:
+                if comment_match.group(1) is None:
+                    missing.append(comment_match.group(2))
+    print("\n".join(missing))
+
+
+if __name__ == '__main__':
+    main(parse_args())