IGVF-DACC · ottojolanki · Feb 12, 2025 · Feb 7, 2025 · Feb 7, 2025 · Feb 10, 2025
diff --git a/cdk/README.md b/cdk/README.md
@@ -67,7 +67,7 @@ Log in at [SSO login portal](https://igvf-dacc.awsapps.com/start/#), choose `Acc
 Open a terminal window and run aws sso configuration command:
 
 ```bash
-$ aws sso configure
+$ aws configure sso
 ```
 
 Choose `igvf-dev` account, enter the `SSO start URL` and `SSO region`, choose `PowerUserAccess` role. The `CLI Profile name` will default to `PowerUserAccess-xyz`, you might want to enter something more easy to remember, such as `igvf-dev-sso`.

diff --git a/src/igvfd/__init__.py b/src/igvfd/__init__.py
@@ -1,4 +1,4 @@
-__version__ = '65.0.0'
+__version__ = '66.0.0'
 
 
 import igvfd.schema_formats  # needed to import before snovault to add FormatCheckers

diff --git a/src/igvfd/audit/analysis_set.py b/src/igvfd/audit/analysis_set.py
@@ -175,7 +175,7 @@ def audit_analysis_set_multiplexed_samples(value, system):
         {
             "audit_description": "Analysis sets with multiplexed samples are expected to specify a demultiplexed sample.",
             "audit_category": "missing demultiplexed sample",
-            "audit_level": "WARNING"
+            "audit_level": "NOT_COMPLIANT"
         },
         {
             "audit_description": "Analysis sets are only expected to specify a demultiplexed sample if it has samples and they are all multiplexed.",

diff --git a/src/igvfd/audit/file.py b/src/igvfd/audit/file.py
@@ -80,7 +80,7 @@ def audit_external_identifiers(value, system):
         {
             "audit_description": "Externally hosted files are expected to have identifiers from external resources in dbxrefs.",
             "audit_category": "missing dbxrefs",
-            "audit_level": "WARNING"
+            "audit_level": "NOT_COMPLIANT"
         }
     ]
     '''
@@ -102,7 +102,7 @@ def audit_external_reference_files(value, system):
         {
             "audit_description": "Reference files uploaded from external resources are expected to have external identifiers in dbxrefs.",
             "audit_category": "missing dbxrefs",
-            "audit_level": "WARNING"
+            "audit_level": "NOT_COMPLIANT"
         }
     ]
     '''

diff --git a/src/igvfd/audit/file_set.py b/src/igvfd/audit/file_set.py
@@ -32,25 +32,36 @@ def single_cell_check(system, value, object_type):
         assay_term = value.get('assay_term')
         return assay_term in single_cell_assay_terms
     elif object_type == 'Auxiliary set':
-        assay_terms = []
         measurement_sets = value.get('measurement_sets')
         for measurement_set in measurement_sets:
             measurement_set_obj = system.get('request').embed(measurement_set, '@@object?skip_calculated=true')
             assay_term = measurement_set_obj.get('assay_term')
-            assay_terms.append(assay_term)
-        return any(assay in single_cell_assay_terms for assay in assay_terms)
+            if assay_term in single_cell_assay_terms:
+                return True
+        return False
     elif object_type == 'Construct library set':
-        assay_terms = []
         samples = value.get('applied_to_samples')
         for sample in samples:
-            sample_obj = system.get('request').embed(sample)
+            sample_obj = system.get('request').embed(
+                sample, '@@object_with_select_calculated_properties?field=file_sets')
             file_sets = sample_obj.get('file_sets')
             for file_set in file_sets:
                 if file_set.startswith('/measurement-sets/'):
                     measurement_set_obj = system.get('request').embed(measurement_set, '@@object?skip_calculated=true')
                     assay_term = measurement_set_obj.get('assay_term')
-                    assay_terms.append(assay_term)
-        return any(assay in single_cell_assay_terms for assay in assay_terms)
+                    if assay_term in single_cell_assay_terms:
+                        return True
+        return False
+    elif object_type == 'Analysis set':
+        for input_file_set in value.get('input_file_sets', []):
+            if input_file_set.startswith('/measurement-sets/'):
+                measurement_set_obj = system.get('request').embed(input_file_set, '@@object?skip_calculated=true')
+                assay_term = measurement_set_obj.get('assay_term')
+                if assay_term in single_cell_assay_terms:
+                    return True
+        return False
+    else:
+        return False
 
 
 @audit_checker('FileSet', frame='object')
@@ -60,13 +71,13 @@ def audit_no_files(value, system):
         {
             "audit_description": "File sets are expected to have files.",
             "audit_category": "missing files",
-            "audit_level": "WARNING"
+            "audit_level": "NOT_COMPLIANT"
         }
     ]
     '''
     object_type = space_in_words(value['@type'][0]).capitalize()
     audit_message_missing_files = get_audit_message(audit_no_files, index=0)
-    if not (value.get('files', '')):
+    if not (value.get('files', '')) and object_type != 'Construct library set':
         detail = (
             f'{object_type} {audit_link(path_to_text(value["@id"]), value["@id"])} '
             f'has no `files`.'
@@ -79,14 +90,18 @@ def audit_missing_seqspec(value, system):
     '''
     [
         {
-            "audit_description": "Sequence files in a file set are expected to link to a sequence specification file.",
+            "audit_description": "Sequence files in a file set associated with bulk data are expected to link to a sequence specification file.",
             "audit_category": "missing sequence specification",
             "audit_level": "INTERNAL_ACTION"
+        },
+        {
+            "audit_description": "Sequence files in a file set associated with single cell data are expected to link to a sequence specification file.",
+            "audit_category": "missing sequence specification",
+            "audit_level": "NOT_COMPLIANT"
         }
     ]
     '''
     object_type = space_in_words(value['@type'][0]).capitalize()
-    audit_message = get_audit_message(audit_missing_seqspec)
     if 'files' in value:
         no_seqspec = []
         for file in value['files']:
@@ -97,6 +112,10 @@ def audit_missing_seqspec(value, system):
         if no_seqspec:
             no_seqspec = ', '.join([audit_link(path_to_text(file_no_seqspec), file_no_seqspec)
                                    for file_no_seqspec in no_seqspec])
+            if single_cell_check(system, value, object_type):
+                audit_message = get_audit_message(audit_missing_seqspec, index=1)
+            else:
+                audit_message = get_audit_message(audit_missing_seqspec, index=0)
             detail = (
                 f'{object_type} {audit_link(path_to_text(value["@id"]), value["@id"])} has sequence file(s): '
                 f'{no_seqspec} which do not have any `seqspecs`.'
@@ -287,15 +306,19 @@ def audit_inconsistent_sequencing_kit(value, system):
             "audit_level": "ERROR"
         },
         {
-            "audit_description": "Sequence files should specify a sequencing kit.",
+            "audit_description": "Sequence files in a file set associated wtih bulk data should specify a sequencing kit.",
             "audit_category": "missing sequencing kit",
             "audit_level": "INTERNAL_ACTION"
+        },
+        {
+            "audit_description": "Sequence files in a file set associated wtih single cell data should specify a sequencing kit.",
+            "audit_category": "missing sequencing kit",
+            "audit_level": "NOT_COMPLIANT"
         }
     ]
     '''
     object_type = space_in_words(value['@type'][0]).capitalize()
     audit_message_inconsistent_kit = get_audit_message(audit_inconsistent_sequencing_kit, index=0)
-    audit_message_missing_kit = get_audit_message(audit_inconsistent_sequencing_kit, index=1)
     if 'files' in value:
         file_info = {}
         for file in value['files']:
@@ -327,6 +350,10 @@ def audit_inconsistent_sequencing_kit(value, system):
                     yield AuditFailure(audit_message_inconsistent_kit.get('audit_category', ''), f'{detail} {audit_message_inconsistent_kit.get("audit_description", "")}', level=audit_message_inconsistent_kit.get('audit_level', ''))
 
     if missing_kit:
+        if single_cell_check(system, value, object_type):
+            audit_message_missing_kit = get_audit_message(audit_inconsistent_sequencing_kit, index=2)
+        else:
+            audit_message_missing_kit = get_audit_message(audit_inconsistent_sequencing_kit, index=1)
         detail = (
             f'{object_type} {audit_link(path_to_text(value["@id"]), value["@id"])} has sequence '
             f'file(s) {", ".join([audit_link(path_to_text(f), f) for f in missing_kit])} '
@@ -423,7 +450,7 @@ def audit_input_for(value, system):
         {
             "audit_description": "Raw data sets with files are expected to be associated with at least one analysis set.",
             "audit_category": "missing analysis",
-            "audit_level": "NOT_COMPLIANT"
+            "audit_level": "WARNING"
         }
     ]
     '''
@@ -484,7 +511,7 @@ def audit_MPRA_read_names(value, system):
         {
             "audit_description": "MPRA measurement set and auxiliary set sequence files are expected to specify a read name.",
             "audit_category": "missing read names",
-            "audit_level": "WARNING"
+            "audit_level": "NOT_COMPLIANT"
         },
         {
             "audit_description": "MPRA measurement set and auxiliary set sequence files are only expected to specify read names: Barcode forward, UMI, or Barcode reverse.",
@@ -588,3 +615,38 @@ def audit_single_cell_read_names(value, system):
                     f'pipeline.'
                 )
                 yield AuditFailure(audit_message_unexpected_read_names.get('audit_category', ''), f'{detail} {audit_message_unexpected_read_names.get("audit_description", "")}', level=audit_message_unexpected_read_names.get('audit_level', ''))
+
+
+@audit_checker('FileSet', frame='object')
+def audit_control_for_control_type(value, system):
+    '''
+    [
+        {
+            "audit_description": "File sets that are controls for other file sets are expected to define a control type.",
+            "audit_category": "missing control type",
+            "audit_level": "NOT_COMPLIANT"
+        },
+        {
+            "audit_description": "File sets that specify a control type are expected to be a control for other file sets.",
+            "audit_category": "missing control for",
+            "audit_level": "NOT_COMPLIANT"
+        }
+    ]
+    '''
+    object_type = space_in_words(value['@type'][0]).capitalize()
+    audit_message_missing_control_type = get_audit_message(audit_control_for_control_type, index=0)
+    audit_message_missing_control_for = get_audit_message(audit_control_for_control_type, index=1)
+    if value.get('control_for', '') and not (value.get('control_type', '')):
+        detail = (
+            f'{object_type} {audit_link(path_to_text(value["@id"]), value["@id"])} '
+            f'has no `control_type`.'
+        )
+        yield AuditFailure(audit_message_missing_control_type.get('audit_category', ''), f'{detail} {audit_message_missing_control_type.get("audit_description", "")}', level=audit_message_missing_control_type.get('audit_level', ''))
+    elif value.get('control_type', '') and not (value.get('control_for', '')):
+        detail = (
+            f'{object_type} {audit_link(path_to_text(value["@id"]), value["@id"])} '
+            f'has no `control_for`. The `control_file_sets` should be patched on '
+            f'the file sets it serves as a control for and this property, `control_for`, '
+            f'will be reverse calculated.'
+        )
+        yield AuditFailure(audit_message_missing_control_for.get('audit_category', ''), f'{detail} {audit_message_missing_control_for.get("audit_description", "")}', level=audit_message_missing_control_for.get('audit_level', ''))
diff --git a/src/igvfd/audit/in_vitro_system.py b/src/igvfd/audit/in_vitro_system.py
@@ -41,7 +41,7 @@ def audit_cell_fate_change_treatments_purpose(value, system):
         {
             "audit_description": "Cell fate change treatments are expected to have a purpose associated with cell fate change.",
             "audit_category": "inconsistent treatment purpose",
-            "audit_level": "WARNING"
+            "audit_level": "ERROR"
         }
     ]
     '''

diff --git a/src/igvfd/audit/matrix_file.py b/src/igvfd/audit/matrix_file.py
@@ -16,7 +16,7 @@ def audit_matrix_file_dimensions(value, system):
         {
             "audit_description": "Matrix files, with the exception of .hic, .cool, and .mcool files, are expected to have different values for each dimension.",
             "audit_category": "inconsistent dimensions",
-            "audit_level": "WARNING"
+            "audit_level": "ERROR"
         }
     ]
     '''

diff --git a/src/igvfd/audit/measurement_set.py b/src/igvfd/audit/measurement_set.py
@@ -167,7 +167,7 @@ def audit_preferred_assay_title(value, system):
         {
             "audit_description": "Measurement sets are expected to specify an appropriate preferred assay title for its respective assay term.",
             "audit_category": "inconsistent preferred assay title",
-            "audit_level": "WARNING"
+            "audit_level": "ERROR"
         }
     ]
     '''
@@ -266,7 +266,7 @@ def audit_missing_auxiliary_set_link(value, system):
         {
             "audit_description": "Measurement sets are expected to link to auxiliary sets if they share the same sample.",
             "audit_category": "missing auxiliary set",
-            "audit_level": "WARNING"
+            "audit_level": "ERROR"
         }
     ]
     '''
@@ -297,7 +297,7 @@ def audit_targeted_genes(value, system):
         {
             "audit_description": "Only ChIP-seq and CRISPR flow cytometry assays are expected to specify targeted gene(s).",
             "audit_category": "unexpected targeted genes",
-            "audit_level": "WARNING"
+            "audit_level": "ERROR"
         }
     ]
     '''
@@ -490,12 +490,12 @@ def audit_onlist(value, system):
         {
             "audit_description": "Measurement sets to be processed via the single cell uniform pipeline are expected to have onlist files and onlist methods indicated.",
             "audit_category": "missing barcode onlist",
-            "audit_level": "WARNING"
+            "audit_level": "NOT_COMPLIANT"
         },
         {
             "audit_description": "Measurement sets not intended for the single cell uniform pipeline are expected not to have onlist files or onlist methods.",
             "audit_category": "unexpected barcode onlist",
-            "audit_level": "WARNING"
+            "audit_level": "ERROR"
         }
     ]
     '''
@@ -536,12 +536,12 @@ def audit_inconsistent_onlist_info(value, system):
         {
             "audit_description": "Measurement sets with 2 or more barcode onlist files are expected to have an onlist method of either product or multi.",
             "audit_category": "inconsistent barcode onlist",
-            "audit_level": "WARNING"
+            "audit_level": "ERROR"
         },
         {
             "audit_description": "Measurement sets with only 1 barcode onlist files are expected to have an onlist method of no combination.",
             "audit_category": "inconsistent barcode onlist",
-            "audit_level": "WARNING"
+            "audit_level": "ERROR"
         }
     ]
     '''
@@ -566,7 +566,7 @@ def audit_unexpected_onlist_content(value, system):
         {
             "audit_description": "Onlist files are expected to be tabular files with barcode onlist as the content type.",
             "audit_category": "unexpected onlist files",
-            "audit_level": "WARNING"
+            "audit_level": "ERROR"
         }
     ]
     '''

diff --git a/src/igvfd/audit/model_set.py b/src/igvfd/audit/model_set.py
@@ -16,7 +16,7 @@ def audit_external_input_data_content_type(value, system):
         {
             "audit_description": "Tabular files linked as `external_input_data` must be of type external source data.",
             "audit_category": "inconsistent external input data",
-            "audit_level": "WARNING"
+            "audit_level": "ERROR"
         }
     ]
     '''

diff --git a/src/igvfd/audit/sample.py b/src/igvfd/audit/sample.py
@@ -169,7 +169,7 @@ def audit_construct_library_sets_types(value, system):
         {
             "audit_description": "Samples are expected to link to a construct library sets with the same file set type.",
             "audit_category": "inconsistent construct library sets",
-            "audit_level": "WARNING"
+            "audit_level": "ERROR"
         }
     ]
     '''
@@ -215,3 +215,24 @@ def audit_parent_sample_with_singular_child(value, system):
                 f'in `{child_sample_type}`.'
             )
             yield AuditFailure(audit_message.get('audit_category', ''), f'{detail} {audit_message.get("audit_description", "")}', level=audit_message.get('audit_level', ''))
+
+
+@audit_checker('Sample', frame='object')
+def audit_missing_nucleic_acid_delivery(value, system):
+    '''
+    [
+        {
+            "audit_description": "Samples linked to construct library sets are expected to specify nucleic acid delivery method.",
+            "audit_category": "missing nucleic acid delivery",
+            "audit_level": "WARNING"
+        }
+    ]
+    '''
+    object_type = space_in_words(value['@type'][0]).capitalize()
+    audit_message = get_audit_message(audit_missing_nucleic_acid_delivery)
+    if 'construct_library_sets' in value and 'nucleic_acid_delivery' not in value:
+        detail = (
+            f'{object_type} {audit_link(path_to_text(value["@id"]), value["@id"])} '
+            f'has `construct_library_sets` but is missing `nucleic_acid_delivery`.'
+        )
+        yield AuditFailure(audit_message.get('audit_category', ''), f'{detail} {audit_message.get("audit_description", "")}', level=audit_message.get('audit_level', ''))
diff --git a/src/igvfd/loadxl.py b/src/igvfd/loadxl.py
@@ -64,7 +64,12 @@
     'tabular_file',
     'index_file',
     'genome_browser_annotation_file',
-    'image_file'
+    'image_file',
+    'mpra_quality_metric',
+    'perturb_seq_quality_metric',
+    'single_cell_atac_seq_quality_metric',
+    'single_cell_rna_seq_quality_metric',
+    'starr_seq_quality_metric'
 ]
 
 IS_ATTACHMENT = [

diff --git a/src/igvfd/mappings/alignment_file.json b/src/igvfd/mappings/alignment_file.json
@@ -1,6 +1,6 @@
 {
-    "hash": "19e93a3331de4afde65b629878761208",
-    "index_name": "alignment_file_19e93a33",
+    "hash": "ec6a9f459d2d7882011e2dbca2eb1505",
+    "index_name": "alignment_file_ec6a9f45",
     "item_type": "alignment_file",
     "mapping": {
         "dynamic_templates": [
@@ -1719,6 +1719,9 @@
                     "notes": {
                         "type": "text"
                     },
+                    "quality_metrics": {
+                        "type": "keyword"
+                    },
                     "read_count": {
                         "store": true,
                         "type": "long"