Skip to content

Commit

Permalink
feat: Enable list processing for factor values (#95)
Browse files Browse the repository at this point in the history
  • Loading branch information
sellth authored Jan 19, 2024
1 parent 90a5b70 commit 5518814
Show file tree
Hide file tree
Showing 6 changed files with 128 additions and 18 deletions.
2 changes: 1 addition & 1 deletion altamisa/isatab/parse_assay_study.py
Original file line number Diff line number Diff line change
Expand Up @@ -385,7 +385,7 @@ def build(self, line: List[str]) -> models.Material:
models.Comment(hdr.label, line[hdr.col_no]) for hdr in self.comment_headers
)
factor_values = tuple(
self._build_complex(hdr, line, models.build_factor_value)
self._build_complex(hdr, line, models.build_factor_value, allow_list=True)
for hdr in self.factor_value_headers
)
material_type = self._build_freetext_or_term_ref(self.material_type_header, line)
Expand Down
2 changes: 1 addition & 1 deletion altamisa/isatab/validate_assay_study.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ def has_content(value):
[any(has_content(v) for v in char.value) for char in material.characteristics]
)
any_comm = any([comm.value for comm in material.comments])
any_fact = any([fact.value for fact in material.factor_values])
any_fact = any([any(has_content(v) for v in fact.value) for fact in material.factor_values])
if not material.name and any(
(
any_char,
Expand Down
2 changes: 1 addition & 1 deletion docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ Special Extensions
In addition to the original ISA-Tab format specifications, AltamISA supports
the following special modifications to improve specific use cases:

- **List of values** in ``Characterics`` or ``Parameter Value`` fields by using
- **List of values** in ``Characterics``, ``Parameter Value``, or ``Factor Value`` fields by using
semicolon-separators (";"). Note, for ontology terms the same number of
splits is expected in the associated field ``Term Source REF`` and
``Term Accession Number``.
Expand Down
86 changes: 86 additions & 0 deletions tests/__snapshots__/test_parse_study.ambr
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
# serializer version: 1
# name: test_study_reader_minimal_study
list([
'''
Investigation with only one study contains metadata:
ID: i_minimal
Title: Minimal Investigation
Path: i_minimal.txt
Submission Date:
Public Release Date: None
Prefer recording metadata in the study section.
''',
'''
Assay without platform:
Path: a_minimal.txt
Measurement Type: exome sequencing assay
Technology Type: nucleotide sequencing
Technology Platform:
''',
])
# ---
# name: test_study_reader_minimal_study_iostring
list([
'''
Investigation with only one study contains metadata:
ID: i_minimal
Title: Minimal Investigation
Path: <no file>
Submission Date:
Public Release Date: None
Prefer recording metadata in the study section.
''',
'''
Assay without platform:
Path: a_minimal.txt
Measurement Type: exome sequencing assay
Technology Type: nucleotide sequencing
Technology Platform:
''',
])
# ---
# name: test_study_reader_minimal_study_iostring2
list([
'''
Investigation with only one study contains metadata:
ID: i_minimal
Title: Minimal Investigation
Path: <no file>
Submission Date:
Public Release Date: None
Prefer recording metadata in the study section.
''',
'''
Assay without platform:
Path: a_minimal.txt
Measurement Type: exome sequencing assay
Technology Type: nucleotide sequencing
Technology Platform:
''',
])
# ---
# name: test_study_reader_small_study
list([
'''
Investigation with only one study contains metadata:
ID: i_small
Title: Small Investigation
Path: i_small.txt
Submission Date:
Public Release Date: None
Prefer recording metadata in the study section.
''',
'''
Assay without platform:
Path: a_small.txt
Measurement Type: exome sequencing assay
Technology Type: nucleotide sequencing
Technology Platform:
''',
])
# ---
# name: test_study_reader_small_study.1
list([
"Found annotated material/file without name: Material(type='Sample Name', unique_name='S1-Empty Sample Name-13-5', name='', extract_label=None, characteristics=(Characteristics(name='status', value=['1'], unit=None),), comments=(), factor_values=(FactorValue(name='treatment', value=[''], unit=None),), material_type=None, headers=['Sample Name', 'Characteristics[status]', 'Factor Value[treatment]'])",
])
# ---
3 changes: 2 additions & 1 deletion tests/data/i_small/s_small.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,5 @@ Source Name Characteristics[organism] Term Source REF Term Accession Number Char
0815 Mus musculus NCBITAXON http://purl.bioontology.org/ontology/NCBITAXON/10090 90 day UO http://purl.obolibrary.org/obo/UO_0000033 sample collection scalpel John Doe 2018-02-02 0815-N1 0 yes
0815 Mus musculus NCBITAXON http://purl.bioontology.org/ontology/NCBITAXON/10090 90 day UO http://purl.obolibrary.org/obo/UO_0000033 sample collection scalpel type A;scalpel type B John Doe 2018-02-02 0815-T1 2
0816 Mus musculus day UO http://purl.obolibrary.org/obo/UO_0000033 sample collection scalpel John Doe 2018-02-02 0816-T1 1 yes
0817 150 day UO http://purl.obolibrary.org/obo/UO_0000033 sample collection scalpel John Doe 2018-02-02
0817 150 day UO http://purl.obolibrary.org/obo/UO_0000033 sample collection scalpel John Doe 2018-02-02 1
0818 150 day UO http://purl.obolibrary.org/obo/UO_0000033 sample collection scalpel John Doe 2018-02-02
51 changes: 37 additions & 14 deletions tests/test_parse_study.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import os

import pytest
from syrupy.assertion import SnapshotAssertion

from altamisa.constants import table_headers
from altamisa.exceptions import IsaWarning
Expand Down Expand Up @@ -69,7 +70,9 @@ def test_study_row_reader_minimal_study(minimal_investigation_file, minimal_stud
assert expected == first_row[2]


def test_study_reader_minimal_study(minimal_investigation_file, minimal_study_file):
def test_study_reader_minimal_study(
minimal_investigation_file, minimal_study_file, snapshot: SnapshotAssertion
):
"""Use ``StudyReader`` to read in minimal study file.
Using the ``StudyReader`` instead of the ``StudyRowReader`` gives us
Expand All @@ -81,7 +84,7 @@ def test_study_reader_minimal_study(minimal_investigation_file, minimal_study_fi
InvestigationValidator(investigation).validate()

# Check warnings
assert 2 == len(record)
assert snapshot == [str(r.message) for r in record]

# Create new row reader and check read headers
reader = StudyReader.from_stream("S1", minimal_study_file)
Expand Down Expand Up @@ -155,7 +158,7 @@ def test_study_row_reader_small_study(small_investigation_file, small_study_file
rows = list(row_reader.read())

# Check results
assert 5 == len(rows)
assert 6 == len(rows)
first_row = rows[0]
second_row = rows[1]
third_row = rows[2]
Expand Down Expand Up @@ -318,15 +321,17 @@ def test_study_row_reader_small_study(small_investigation_file, small_study_file
assert expected == third_row[2]


def test_study_reader_small_study(small_investigation_file, small_study_file):
def test_study_reader_small_study(
small_investigation_file, small_study_file, snapshot: SnapshotAssertion
):
"""Use ``StudyReader`` to read in small study file."""
# Load investigation (tested elsewhere)
with pytest.warns(IsaWarning) as record:
investigation = InvestigationReader.from_stream(small_investigation_file).read()
InvestigationValidator(investigation).validate()

# Check warnings
assert 2 == len(record)
assert snapshot == [str(r.message) for r in record]

# Create new row reader and check read headers
reader = StudyReader.from_stream("S1", small_study_file)
Expand All @@ -337,14 +342,14 @@ def test_study_reader_small_study(small_investigation_file, small_study_file):
with pytest.warns(IsaWarning) as record:
StudyValidator(investigation, investigation.studies[0], study).validate()
# Check warnings
assert 1 == len(record)
assert snapshot == [str(r.message) for r in record]

# Check results
assert os.path.normpath(str(study.file)).endswith(os.path.normpath("data/i_small/s_small.txt"))
assert 13 == len(study.header)
assert 9 == len(study.materials)
assert 5 == len(study.processes)
assert 10 == len(study.arcs)
assert 11 == len(study.materials)
assert 6 == len(study.processes)
assert 12 == len(study.arcs)

headers_source = [
table_headers.SOURCE_NAME,
Expand Down Expand Up @@ -476,13 +481,25 @@ def test_study_reader_small_study(small_investigation_file, small_study_file):
"S1-Empty Sample Name-13-5",
"",
None,
(models.Characteristics("status", [""], None),),
(models.Characteristics("status", ["1"], None),),
(),
(models.FactorValue("treatment", [""], None),),
None,
headers_sample,
)
assert expected == study.materials["S1-Empty Sample Name-13-5"]
expected = models.Material(
"Sample Name",
"S1-Empty Sample Name-13-6",
"",
None,
(models.Characteristics("status", [""], None),),
(),
(models.FactorValue("treatment", [""], None),),
None,
headers_sample,
)
assert expected == study.materials["S1-Empty Sample Name-13-6"]

expected = models.Process(
"sample collection",
Expand Down Expand Up @@ -541,19 +558,23 @@ def test_study_reader_small_study(small_investigation_file, small_study_file):
models.Arc("S1-sample collection-9-4", "S1-sample-0816-T1"),
models.Arc("S1-source-0817", "S1-sample collection-9-5"),
models.Arc("S1-sample collection-9-5", "S1-Empty Sample Name-13-5"),
models.Arc("S1-source-0818", "S1-sample collection-9-6"),
models.Arc("S1-sample collection-9-6", "S1-Empty Sample Name-13-6"),
)
assert expected == study.arcs


def test_study_reader_minimal_study_iostring(minimal_investigation_file, minimal_study_file):
def test_study_reader_minimal_study_iostring(
minimal_investigation_file, minimal_study_file, snapshot: SnapshotAssertion
):
# Load investigation (tested elsewhere)
stringio = io.StringIO(minimal_investigation_file.read())
investigation = InvestigationReader.from_stream(stringio).read()
with pytest.warns(IsaWarning) as record:
InvestigationValidator(investigation).validate()

# Check warnings
assert 2 == len(record)
assert snapshot == [str(r.message) for r in record]

# Create new study reader and read from StringIO with original filename indicated
stringio = io.StringIO(minimal_study_file.read())
Expand All @@ -574,15 +595,17 @@ def test_study_reader_minimal_study_iostring(minimal_investigation_file, minimal
assert 2 == len(study.arcs)


def test_study_reader_minimal_study_iostring2(minimal_investigation_file, minimal_study_file):
def test_study_reader_minimal_study_iostring2(
minimal_investigation_file, minimal_study_file, snapshot: SnapshotAssertion
):
# Load investigation (tested elsewhere)
stringio = io.StringIO(minimal_investigation_file.read())
investigation = InvestigationReader.from_stream(stringio).read()
with pytest.warns(IsaWarning) as record:
InvestigationValidator(investigation).validate()

# Check warnings
assert 2 == len(record)
assert snapshot == [str(r.message) for r in record]

# Create new study reader and read from StringIO with no filename indicated
stringio = io.StringIO(minimal_study_file.read())
Expand Down

0 comments on commit 5518814

Please sign in to comment.