Merge pull request #565 from ISA-tools/prs-patch-2

adjusting bh23 workbook, and model class
ISA-tools · Nov 5, 2024 · d6c182c · d6c182c
2 parents 11913a7 + 5a69357
commit d6c182c
Show file tree

Hide file tree

Showing 11 changed files with 227 additions and 223 deletions.
diff --git a/isa-cookbook/content/notebooks/isa-api-programmatic-BH2023-multiomics-isa.ipynb b/isa-cookbook/content/notebooks/isa-api-programmatic-BH2023-multiomics-isa.ipynb
diff --git a/isa-cookbook/content/notebooks/isa-api-programmatic-rebuild-of-BII-S-3.ipynb b/isa-cookbook/content/notebooks/isa-api-programmatic-rebuild-of-BII-S-3.ipynb
@@ -4,15 +4,13 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "    ### Loading the ISA-API"
+    "### Loading the ISA-API"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "scrolled": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "from isatools.model import (\n",
@@ -53,9 +51,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "scrolled": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "investigation = Investigation()\n",
@@ -223,9 +219,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "scrolled": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "study.sources = [Source(name=\"GSM255770\"), Source(name=\"GSM255771\"),Source(name=\"GSM255772\"),Source(name=\"GSM255773\")]\n",
@@ -268,9 +262,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "scrolled": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "# Starting by declaring the 2 types of assays used in BII-S-3 as coded with ISAcreator tool\n",
@@ -308,9 +300,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "scrolled": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "\n",
@@ -435,9 +425,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "scrolled": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "study.assays.append(assay)\n",
@@ -447,9 +435,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "scrolled": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "# assay_g = investigation.studies[0].assays[1]\n",
@@ -506,9 +492,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "scrolled": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "from isatools.isatab import dump\n",
@@ -527,9 +511,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "scrolled": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "from isatools.isatab import load\n",
@@ -581,9 +563,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "scrolled": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "# import hashlib\n",
@@ -594,9 +574,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "scrolled": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "import filecmp\n",
@@ -607,9 +585,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "scrolled": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "filecmp.cmp('./output/BII-S-3-synth/s_BII-S-3-synthesis.txt', './notebook-output/BII-S-3-roundtrip/s_BII-S-3-synthesis.txt')"
@@ -618,9 +594,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "scrolled": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "filecmp.cmp('./output/BII-S-3-synth/a_gilbert-assay-Gx.txt', './notebook-output/BII-S-3-roundtrip/a_gilbert-assay-Gx.txt')"
@@ -629,9 +603,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "scrolled": false
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "filecmp.cmp('./output/BII-S-3-synth/a_gilbert-assay-Tx.txt', './notebook-output/BII-S-3-roundtrip/a_gilbert-assay-Tx.txt')"

diff --git a/isatools/isatab/__init__.py b/isatools/isatab/__init__.py
@@ -4,6 +4,8 @@
     write_study_table_files,
     write_assay_table_files,
     write_value_columns,
+    flatten,
+
     dump_tables_to_dataframes
 )
 from isatools.isatab.load import (
@@ -18,3 +20,4 @@
 from isatools.isatab.defaults import default_config_dir
 from isatools.isatab.utils import IsaTabDataFrame, TransposedTabParser
 from isatools.isatab.validate import validate, batch_validate
+
diff --git a/isatools/isatab/dump/__init__.py b/isatools/isatab/dump/__init__.py
@@ -1,2 +1,2 @@
 from isatools.isatab.dump.core import dump, dumps, dump_tables_to_dataframes
-from isatools.isatab.dump.write import write_study_table_files, write_assay_table_files, write_value_columns
+from isatools.isatab.dump.write import write_study_table_files, write_assay_table_files, write_value_columns, flatten
diff --git a/isatools/isatab/dump/write.py b/isatools/isatab/dump/write.py
@@ -26,6 +26,26 @@
 )
 
 
+def flatten(current_list) -> list:
+    """
+    :rtype: object
+    :param current_list: List
+    :return: flattened_listL: List
+    """
+    flattened_list = []
+    if current_list is not None:
+
+        for sublist in current_list:
+            if sublist is not None:
+                for item in sublist:
+                    flattened_list.append(item)
+            else:
+                raise ValueError
+    else:
+        raise ValueError
+    return flattened_list
+
+
 def write_study_table_files(inv_obj, output_dir):
     """Writes out study table files according to pattern defined by
 
@@ -49,10 +69,6 @@ def write_study_table_files(inv_obj, output_dir):
             break
         protrefcount = 0
         protnames = dict()
-
-        def flatten(current_list):
-            return [item for sublist in current_list for item in sublist]
-
         columns = []
 
         # start_nodes, end_nodes = _get_start_end_nodes(s_graph)
@@ -135,9 +151,11 @@ def flatten(current_list):
                     protocol_in_path_count += 1
                     df_dict[olabel][-1] = node.executes_protocol.name
                     for pv in node.parameter_values:
-                        pvlabel = "{0}.Parameter Value[{1}]".format(
-                            olabel, pv.category.parameter_name.term)
-                        write_value_columns(df_dict, pvlabel, pv)
+                        if pv.category:
+                            pvlabel = "{0}.Parameter Value[{1}]".format(olabel, pv.category.parameter_name.term)
+                            write_value_columns(df_dict, pvlabel, pv)
+                        else:
+                            raise(ValueError, "Protocol Value has no valid parameter_name")
                     if node.date is not None:
                         df_dict[olabel + ".Date"][-1] = node.date
                     if node.performer is not None:
@@ -255,10 +273,6 @@ def write_assay_table_files(inv_obj, output_dir, write_factor_values=False):
                 break
             protrefcount = 0
             protnames = dict()
-
-            def flatten(current_list):
-                return [item for sublist in current_list for item in sublist]
-
             columns = []
 
             paths, indexes = _build_paths_and_indexes(assay_obj.process_sequence)
@@ -393,8 +407,11 @@ def pbar(x):
                         if node.performer is not None:
                             df_dict[olabel + ".Performer"][-1] = node.performer
                         for pv in node.parameter_values:
-                            pvlabel = "{0}.Parameter Value[{1}]".format(olabel, pv.category.parameter_name.term)
-                            write_value_columns(df_dict, pvlabel, pv)
+                            if pv.category:
+                                pvlabel = "{0}.Parameter Value[{1}]".format(olabel, pv.category.parameter_name.term)
+                                write_value_columns(df_dict, pvlabel, pv)
+                            else:
+                                raise(ValueError, "Protocol Value has no valid parameter_name")
                         for co in node.comments:
                             colabel = "{0}.Comment[{1}]".format(olabel, co.name)
                             df_dict[colabel][-1] = co.value

diff --git a/isatools/isatab/utils.py b/isatools/isatab/utils.py
@@ -388,10 +388,15 @@ def get_pv_columns(label, pv):
     """
     columns = None
     try:
-        columns = ["{0}.Parameter Value[{1}]".format(label, pv.category.parameter_name.term)]
+        if pv.category is not None:
+            columns = ["{0}.Parameter Value[{1}]".format(label, pv.category.parameter_name.term)]
+            print(columns)
+        else:
+            raise ValueError
     except AttributeError:
         log.fatal(label, pv)
-    columns.extend(get_value_columns(columns[0], pv))
+    if columns is not None:
+        columns.extend(get_value_columns(columns[0], pv))
     return columns
 
 

diff --git a/isatools/model/parameter_value.py b/isatools/model/parameter_value.py
@@ -27,7 +27,7 @@ def __init__(self, category=None, value=None, unit=None, comments=None):
 
         # Shouldn't this be in the setter to avoid manually setting a non-numerical value when a unit is supplied ?
         if not isinstance(value, Number) and unit:
-            raise ValueError("ParameterValue value mus be quantitative (i.e. numeric) if a unit is supplied")
+            raise ValueError("ParameterValue value must be quantitative (i.e. numeric) if a unit is supplied")
         self.value = value
         self.unit = unit
 

diff --git a/isatools/model/utils.py b/isatools/model/utils.py
@@ -1,7 +1,5 @@
 import itertools
-
 import networkx as nx
-import os
 
 from isatools.model.datafile import DataFile
 from isatools.model.process import Process
@@ -60,7 +58,6 @@ def _expand_path(path, identifiers_to_objects, dead_end_outputs):
     path_modified = False
     for i, identifier in enumerate(path):
         node = identifiers_to_objects[identifier]
-
         # If the node is a process at beginning of the path, add a path for each of its inputs.
         if i == 0 and isinstance(node, Process):
             identifier_list = [input_.sequence_identifier for input_ in node.inputs]
@@ -90,7 +87,7 @@ def _expand_path(path, identifiers_to_objects, dead_end_outputs):
             output_sequence_identifiers = {output.sequence_identifier for output in node.outputs}
             input_sequence_identifiers = {input_.sequence_identifier for input_ in
                                           identifiers_to_objects[path[i + 1]].inputs}
-            identifier_intersection = output_sequence_identifiers.intersection(input_sequence_identifiers)
+            identifier_intersection = list(output_sequence_identifiers.intersection(input_sequence_identifiers))
 
             combinations = _compute_combinations(identifier_intersection, identifiers_to_objects)
             for combo in combinations:
@@ -206,6 +203,7 @@ def _build_paths_and_indexes(process_sequence=None):
 
     return paths, identifiers_to_objects
 
+
 def _build_assay_graph(process_sequence=None):
     """:obj:`networkx.DiGraph` Returns a directed graph object based on a
     given ISA process sequence."""
@@ -289,7 +287,7 @@ def batch_create_assays(*args, n=1):
     from a prototype sequence (currently works only as flat end-to-end
     processes of Material->Process->Material->...)
 
-    :param *args: An argument list representing the process sequence prototype
+    # :param *args: An argument list representing the process sequence prototype
     :param n: Number of process sequences to create in the batch
     :returns: List of process sequences replicating the prototype sequence
 
@@ -376,13 +374,13 @@ def batch_create_assays(*args, n=1):
                     if isinstance(material_a, list):
                         process.inputs = material_a
                     else:
-                        process.inputs.append(material_a)
+                        process.inputs = [material_a]  # /.append(material_a)
                     if isinstance(material_b, list):
                         process.outputs = material_b
                         for material in material_b:
                             material.derives_from = [material_a]
                     else:
-                        process.outputs.append(material_b)
+                        process.outputs = [material_b]  # .append(material_b)
                         material_b.derives_from = [material_a]
                     process_sequence.append(process)
                 material_a = material_b
@@ -402,4 +400,3 @@ def _deep_copy(isa_object):
     if isinstance(isa_object, ProcessSequenceNode):
         new_obj.assign_identifier()
     return new_obj
-
diff --git a/isatools/net/mw2isa/__init__.py b/isatools/net/mw2isa/__init__.py
@@ -60,7 +60,7 @@ def getblock(container, start_marker, end_marker):
 
 
 def get_archived_file(mw_study_id):
-    """A method of download Metabolomics Workbench archived data from their anonymous FTP site input: a valid Metabolomics
+    """ A method of download Metabolomics Workbench archived data from their anonymous FTP site input: a valid Metabolomics
     Workbench study accession number that should follow this pattern ^ST\d+[6]
     :param mw_study_id -> str
     :return: success -> boolean