feat: Enable list processing for factor values (#95)

bihealth · Jan 19, 2024 · 5518814 · 5518814
1 parent 90a5b70
commit 5518814
Show file tree

Hide file tree

Showing 6 changed files with 128 additions and 18 deletions.
diff --git a/altamisa/isatab/parse_assay_study.py b/altamisa/isatab/parse_assay_study.py
@@ -385,7 +385,7 @@ def build(self, line: List[str]) -> models.Material:
             models.Comment(hdr.label, line[hdr.col_no]) for hdr in self.comment_headers
         )
         factor_values = tuple(
-            self._build_complex(hdr, line, models.build_factor_value)
+            self._build_complex(hdr, line, models.build_factor_value, allow_list=True)
             for hdr in self.factor_value_headers
         )
         material_type = self._build_freetext_or_term_ref(self.material_type_header, line)

diff --git a/altamisa/isatab/validate_assay_study.py b/altamisa/isatab/validate_assay_study.py
@@ -95,7 +95,7 @@ def has_content(value):
             [any(has_content(v) for v in char.value) for char in material.characteristics]
         )
         any_comm = any([comm.value for comm in material.comments])
-        any_fact = any([fact.value for fact in material.factor_values])
+        any_fact = any([any(has_content(v) for v in fact.value) for fact in material.factor_values])
         if not material.name and any(
             (
                 any_char,

diff --git a/docs/index.rst b/docs/index.rst
@@ -53,7 +53,7 @@ Special Extensions
 In addition to the original ISA-Tab format specifications, AltamISA supports
  the following special modifications to improve specific use cases:
 
-- **List of values** in ``Characterics`` or ``Parameter Value`` fields by using
+- **List of values** in ``Characterics``, ``Parameter Value``, or ``Factor Value`` fields by using
   semicolon-separators (";"). Note, for ontology terms the same number of
   splits is expected in the associated field ``Term Source REF`` and
   ``Term Accession Number``.

diff --git a/tests/__snapshots__/test_parse_study.ambr b/tests/__snapshots__/test_parse_study.ambr
@@ -0,0 +1,86 @@
+# serializer version: 1
+# name: test_study_reader_minimal_study
+  list([
+    '''
+      Investigation with only one study contains metadata:
+      	ID:	i_minimal
+      	Title:	Minimal Investigation
+      	Path:	i_minimal.txt
+      	Submission Date:	
+      	Public Release Date:	None
+      	Prefer recording metadata in the study section.
+    ''',
+    '''
+      Assay without platform:
+      Path:	a_minimal.txt
+      Measurement Type:	exome sequencing assay
+      Technology Type:	nucleotide sequencing
+      Technology Platform:	
+    ''',
+  ])
+# ---
+# name: test_study_reader_minimal_study_iostring
+  list([
+    '''
+      Investigation with only one study contains metadata:
+      	ID:	i_minimal
+      	Title:	Minimal Investigation
+      	Path:	<no file>
+      	Submission Date:	
+      	Public Release Date:	None
+      	Prefer recording metadata in the study section.
+    ''',
+    '''
+      Assay without platform:
+      Path:	a_minimal.txt
+      Measurement Type:	exome sequencing assay
+      Technology Type:	nucleotide sequencing
+      Technology Platform:	
+    ''',
+  ])
+# ---
+# name: test_study_reader_minimal_study_iostring2
+  list([
+    '''
+      Investigation with only one study contains metadata:
+      	ID:	i_minimal
+      	Title:	Minimal Investigation
+      	Path:	<no file>
+      	Submission Date:	
+      	Public Release Date:	None
+      	Prefer recording metadata in the study section.
+    ''',
+    '''
+      Assay without platform:
+      Path:	a_minimal.txt
+      Measurement Type:	exome sequencing assay
+      Technology Type:	nucleotide sequencing
+      Technology Platform:	
+    ''',
+  ])
+# ---
+# name: test_study_reader_small_study
+  list([
+    '''
+      Investigation with only one study contains metadata:
+      	ID:	i_small
+      	Title:	Small Investigation
+      	Path:	i_small.txt
+      	Submission Date:	
+      	Public Release Date:	None
+      	Prefer recording metadata in the study section.
+    ''',
+    '''
+      Assay without platform:
+      Path:	a_small.txt
+      Measurement Type:	exome sequencing assay
+      Technology Type:	nucleotide sequencing
+      Technology Platform:	
+    ''',
+  ])
+# ---
+# name: test_study_reader_small_study.1
+  list([
+    "Found annotated material/file without name: Material(type='Sample Name', unique_name='S1-Empty Sample Name-13-5', name='', extract_label=None, characteristics=(Characteristics(name='status', value=['1'], unit=None),), comments=(), factor_values=(FactorValue(name='treatment', value=[''], unit=None),), material_type=None, headers=['Sample Name', 'Characteristics[status]', 'Factor Value[treatment]'])",
+  ])
+# ---
diff --git a/tests/data/i_small/s_small.txt b/tests/data/i_small/s_small.txt
@@ -3,4 +3,5 @@ Source Name	Characteristics[organism]	Term Source REF	Term Accession Number	Char
 0815	Mus musculus	NCBITAXON	http://purl.bioontology.org/ontology/NCBITAXON/10090	90	day	UO	http://purl.obolibrary.org/obo/UO_0000033	sample collection	scalpel	John Doe	2018-02-02	0815-N1	0	yes
 0815	Mus musculus	NCBITAXON	http://purl.bioontology.org/ontology/NCBITAXON/10090	90	day	UO	http://purl.obolibrary.org/obo/UO_0000033	sample collection	scalpel type A;scalpel type B	John Doe	2018-02-02	0815-T1	2	
 0816	Mus musculus				day	UO	http://purl.obolibrary.org/obo/UO_0000033	sample collection	scalpel	John Doe	2018-02-02	0816-T1	1	yes
-0817				150	day	UO	http://purl.obolibrary.org/obo/UO_0000033	sample collection	scalpel	John Doe	2018-02-02			
+0817				150	day	UO	http://purl.obolibrary.org/obo/UO_0000033	sample collection	scalpel	John Doe	2018-02-02		1	
+0818				150	day	UO	http://purl.obolibrary.org/obo/UO_0000033	sample collection	scalpel	John Doe	2018-02-02			
diff --git a/tests/test_parse_study.py b/tests/test_parse_study.py
@@ -7,6 +7,7 @@
 import os
 
 import pytest
+from syrupy.assertion import SnapshotAssertion
 
 from altamisa.constants import table_headers
 from altamisa.exceptions import IsaWarning
@@ -69,7 +70,9 @@ def test_study_row_reader_minimal_study(minimal_investigation_file, minimal_stud
     assert expected == first_row[2]
 
 
-def test_study_reader_minimal_study(minimal_investigation_file, minimal_study_file):
+def test_study_reader_minimal_study(
+    minimal_investigation_file, minimal_study_file, snapshot: SnapshotAssertion
+):
     """Use ``StudyReader`` to read in minimal study file.
 
     Using the ``StudyReader`` instead of the ``StudyRowReader`` gives us
@@ -81,7 +84,7 @@ def test_study_reader_minimal_study(minimal_investigation_file, minimal_study_fi
         InvestigationValidator(investigation).validate()
 
     # Check warnings
-    assert 2 == len(record)
+    assert snapshot == [str(r.message) for r in record]
 
     # Create new row reader and check read headers
     reader = StudyReader.from_stream("S1", minimal_study_file)
@@ -155,7 +158,7 @@ def test_study_row_reader_small_study(small_investigation_file, small_study_file
     rows = list(row_reader.read())
 
     # Check results
-    assert 5 == len(rows)
+    assert 6 == len(rows)
     first_row = rows[0]
     second_row = rows[1]
     third_row = rows[2]
@@ -318,15 +321,17 @@ def test_study_row_reader_small_study(small_investigation_file, small_study_file
     assert expected == third_row[2]
 
 
-def test_study_reader_small_study(small_investigation_file, small_study_file):
+def test_study_reader_small_study(
+    small_investigation_file, small_study_file, snapshot: SnapshotAssertion
+):
     """Use ``StudyReader`` to read in small study file."""
     # Load investigation (tested elsewhere)
     with pytest.warns(IsaWarning) as record:
         investigation = InvestigationReader.from_stream(small_investigation_file).read()
         InvestigationValidator(investigation).validate()
 
     # Check warnings
-    assert 2 == len(record)
+    assert snapshot == [str(r.message) for r in record]
 
     # Create new row reader and check read headers
     reader = StudyReader.from_stream("S1", small_study_file)
@@ -337,14 +342,14 @@ def test_study_reader_small_study(small_investigation_file, small_study_file):
     with pytest.warns(IsaWarning) as record:
         StudyValidator(investigation, investigation.studies[0], study).validate()
     # Check warnings
-    assert 1 == len(record)
+    assert snapshot == [str(r.message) for r in record]
 
     # Check results
     assert os.path.normpath(str(study.file)).endswith(os.path.normpath("data/i_small/s_small.txt"))
     assert 13 == len(study.header)
-    assert 9 == len(study.materials)
-    assert 5 == len(study.processes)
-    assert 10 == len(study.arcs)
+    assert 11 == len(study.materials)
+    assert 6 == len(study.processes)
+    assert 12 == len(study.arcs)
 
     headers_source = [
         table_headers.SOURCE_NAME,
@@ -476,13 +481,25 @@ def test_study_reader_small_study(small_investigation_file, small_study_file):
         "S1-Empty Sample Name-13-5",
         "",
         None,
-        (models.Characteristics("status", [""], None),),
+        (models.Characteristics("status", ["1"], None),),
         (),
         (models.FactorValue("treatment", [""], None),),
         None,
         headers_sample,
     )
     assert expected == study.materials["S1-Empty Sample Name-13-5"]
+    expected = models.Material(
+        "Sample Name",
+        "S1-Empty Sample Name-13-6",
+        "",
+        None,
+        (models.Characteristics("status", [""], None),),
+        (),
+        (models.FactorValue("treatment", [""], None),),
+        None,
+        headers_sample,
+    )
+    assert expected == study.materials["S1-Empty Sample Name-13-6"]
 
     expected = models.Process(
         "sample collection",
@@ -541,19 +558,23 @@ def test_study_reader_small_study(small_investigation_file, small_study_file):
         models.Arc("S1-sample collection-9-4", "S1-sample-0816-T1"),
         models.Arc("S1-source-0817", "S1-sample collection-9-5"),
         models.Arc("S1-sample collection-9-5", "S1-Empty Sample Name-13-5"),
+        models.Arc("S1-source-0818", "S1-sample collection-9-6"),
+        models.Arc("S1-sample collection-9-6", "S1-Empty Sample Name-13-6"),
     )
     assert expected == study.arcs
 
 
-def test_study_reader_minimal_study_iostring(minimal_investigation_file, minimal_study_file):
+def test_study_reader_minimal_study_iostring(
+    minimal_investigation_file, minimal_study_file, snapshot: SnapshotAssertion
+):
     # Load investigation (tested elsewhere)
     stringio = io.StringIO(minimal_investigation_file.read())
     investigation = InvestigationReader.from_stream(stringio).read()
     with pytest.warns(IsaWarning) as record:
         InvestigationValidator(investigation).validate()
 
     # Check warnings
-    assert 2 == len(record)
+    assert snapshot == [str(r.message) for r in record]
 
     # Create new study reader and read from StringIO with original filename indicated
     stringio = io.StringIO(minimal_study_file.read())
@@ -574,15 +595,17 @@ def test_study_reader_minimal_study_iostring(minimal_investigation_file, minimal
     assert 2 == len(study.arcs)
 
 
-def test_study_reader_minimal_study_iostring2(minimal_investigation_file, minimal_study_file):
+def test_study_reader_minimal_study_iostring2(
+    minimal_investigation_file, minimal_study_file, snapshot: SnapshotAssertion
+):
     # Load investigation (tested elsewhere)
     stringio = io.StringIO(minimal_investigation_file.read())
     investigation = InvestigationReader.from_stream(stringio).read()
     with pytest.warns(IsaWarning) as record:
         InvestigationValidator(investigation).validate()
 
     # Check warnings
-    assert 2 == len(record)
+    assert snapshot == [str(r.message) for r in record]
 
     # Create new study reader and read from StringIO with no filename indicated
     stringio = io.StringIO(minimal_study_file.read())