146 separate classification module from standard dependencies (#149)

* Update build-and-test.yml * specify mac version of io-gcs * Update pyproject.toml * implement option to run processing without classification * simplify extract_particles. better error reporting in classify * update docs * Update __init__.py * Update classify.py * better project metadata * Create CITATION.cff * change silicon to arm64
SINTEF · Dec 6, 2023 · e7aaa1b · e7aaa1b
1 parent 37d0d9e
commit e7aaa1b
Show file tree

Hide file tree

Showing 10 changed files with 142 additions and 45 deletions.
diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml
@@ -5,7 +5,7 @@ on: [pull_request]
 jobs:
   Windows_pip:
     runs-on: windows-latest
-    timeout-minutes: 60
+    timeout-minutes: 10
 
     steps:
       - name: Check out code
@@ -17,13 +17,13 @@ jobs:
           python-version: '3.10'
 
       - name: Install pyopia
-        run: pip install ./
+        run: pip install -e ".[classification]"
 
       - name: Test
         run: python -m pytest -v
   Ubuntu_poetry:
     runs-on: ubuntu-latest
-    timeout-minutes: 60
+    timeout-minutes: 10
 
     steps:
       - uses: actions/checkout@v3
@@ -34,12 +34,12 @@ jobs:
       - name: Install poetry
         uses: abatilo/actions-poetry@v2
       - name: Install dependencies
-        run: poetry install
+        run: poetry install --extras "classification"
       - name: Run the automated tests
         run: poetry run pytest -v
   MacOS_poetry:
     runs-on: macos-latest
-    timeout-minutes: 60
+    timeout-minutes: 10
 
     steps:
       - uses: actions/checkout@v3
@@ -50,6 +50,6 @@ jobs:
       - name: Install poetry
         uses: abatilo/actions-poetry@v2
       - name: Install dependencies
-        run: poetry install
+        run: poetry install --extras "classification"
       - name: Run the automated tests
         run: poetry run pytest -v
diff --git a/README.md b/README.md
@@ -12,18 +12,14 @@ A Python Ocean Particle Image Analysis toolbox
 
 - Under development. See/regester issues, [here](https://github.com/SINTEF/pyopia/issues)
 
-- Refining a structure for a standard [processing piplines](https://pyopia.readthedocs.io/en/latest/pyopia.pipeline.html)
-
-- Building a system for metadata and [output files](https://pyopia.readthedocs.io/en/latest/pyopia.io.html)
-
 ----
 
 # Installing for users
 
-Users are expected to be familiar with Python, and have [Python](https://github.com/conda-forge/miniforge/#download), [pip](https://pypi.org/project/pip/) and [git](https://github.com/git-guides/install-git) installed. You can then install Pyopia like this:
+Users are expected to be familiar with Python, and have [Python](https://github.com/conda-forge/miniforge/#download), [pip](https://pypi.org/project/pip/). You can then install Pyopia like this:
 
 ```
-pip install git+https://github.com/SINTEF/pyopia.git@main
+pip install pyopia
 ```
 
 We would usually recommend installing within a virtual python environment, which you can read more about [here](https://jni.github.io/using-python-for-science/intro-to-environments.html).
@@ -57,7 +53,7 @@ We welcome additions and improvements to the code! We request that you follow a
 4. All pull requests must be reviewed by a person. The benefits from code review are plenty, but we like to emphasise that code reviews help spreading the awarenes of code changes. Please note that code reviews should be a pleasant experience, so be plesant, polite and remember that there is a human being with good intentions on the other side of the screen.
 5. All contributions are linted with flake8. We recommend that you run flake8 on your code while developing to fix any issues as you go. We recommend using autopep8 to autoformat your Python code (but please check the code behaviour is not affected by autoformatting before pushing). This makes flake8 happy, and makes it easier for us all to maintain a consistent and readable code base.
 
-## Installing for developers
+## Installing from source for developers
 
 Install [Python](https://github.com/conda-forge/miniforge/#download).
 

diff --git a/docs/intro.md b/docs/intro.md
@@ -36,14 +36,26 @@ Full documentation for the code is [here](api)
 Installing
 ==================================
 
-Users are expected to be familiar with Python, and have [Python](https://github.com/conda-forge/miniforge/#download), [pip](https://pypi.org/project/pip/) and [git](https://github.com/git-guides/install-git) installed. You can then install PyOPIA like this:
+Users are expected to be familiar with Python, and have [Python](https://github.com/conda-forge/miniforge/#download), [pip](https://pypi.org/project/pip/). You can then install PyOPIA like this:
 
 ```
-pip install git+https://github.com/SINTEF/PyOPIA.git@main
+pip install pyopia
 ```
 
 We would usually recommend installing within a virtual python environment, which you can read more about [here](https://jni.github.io/using-python-for-science/intro-to-environments.html).
 
+If you want to use PyOPIA's Classificaiton module, you need to also install the extra classification dependencies, like this:
+
+````
+pip install pyopia[classification]
+````
+
+or (for Apple silicon)
+
+```
+pip install pyopia[classification-arm64]
+```
+
 Links to libraries PyOPIA uses
 ==================================
 

diff --git a/docs/notebooks/stats.ipynb b/docs/notebooks/stats.ipynb
@@ -158,7 +158,7 @@
     "\n",
     "The position of each particle within the original raw image are given by the bounding box at location (minr, minc, maxr, maxc) - with r and c being rows and columns, respectively.\n",
     "\n",
-    "Classification probabilities are given by columns with 'probability_*'."
+    "Classification probabilities are given by columns with 'probability_*'. Note: If `[steps.classifier]`is not defined in the config, the classification will be skipped and no probabilities reported. To use PyOPIA's Classification module requires the extra dependencies (pip install pyopia[classification] or pip install pyopia[classification-arm64])"
    ]
   },
   {
@@ -426,7 +426,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.5"
+   "version": "3.10.12"
   },
   "vscode": {
    "interpreter": {

diff --git a/pyopia/CITATION.cff b/pyopia/CITATION.cff
@@ -0,0 +1,38 @@
+cff-version: 1.2.0
+message: "If you use this software, please cite it as below."
+authors:
+  - family-names: Davies
+    given-names: Emlyn
+  - family-names: Nimmo-Smith
+    given-names: Alex
+  - family-names: Nepstad
+    given-names: Raymond
+  - family-names: Nordam
+    given-names: Tor
+  - family-names: Brönner
+    given-names: Ute
+  - family-names: Steinvika
+    given-names: Andreas
+  - family-names: Sari
+    given-names: Giering
+  - family-names: Masoudi
+    given-names: Mojtaba
+  - family-names: Liu
+    given-names: Zonghua
+  - family-names: Hélaouët
+    given-names: Pierre
+  - family-names: Cursons
+    given-names: Kairan
+  - family-names: Rau
+    given-names: Matthew
+  - family-names: Song
+    given-names: Yixuan
+  - family-names: Mostaani
+    given-names: Arsalan
+  - family-names: Barstein
+    given-names: Karoline
+  - family-names: Buscombe
+    given-names: Daniel
+title: "PyOPIA: A Python Ocean Particle Image Analysis toolbox"
+url: "https://pyopia.readthedocs.io"
+repository-code: "https://github.com/sintef/pyopia"
diff --git a/pyopia/__init__.py b/pyopia/__init__.py
@@ -1 +1 @@
-__version__ = '1.0.0'
+__version__ = '1.1.0'
diff --git a/pyopia/classify.py b/pyopia/classify.py
@@ -40,6 +40,16 @@ class Classify():
         pipeline_class = 'pyopia.classify.Classify'
         model_path = 'keras_model.h5' # path to trained nn model
 
+    If `[steps.classifier]`is not defined, the classification will be skipped and no probabilities reported.
+
+    If you want to use an example trained model for SilCam data
+    (no guarantee of accuracy for other applications), you can get it using `exampledata`
+    within the notebooks folder (https://github.com/SINTEF/pyopia/blob/main/notebooks/exampledata.py):
+
+    .. code-block:: python
+
+        model_path = exampledata.get_example_model()
+
     '''
     def __init__(self, model_path=None):
         self.model_path = model_path
@@ -62,12 +72,12 @@ def load_model(self):
         try:
             from tensorflow import keras
         except ImportError:
-            info_str = 'WARNING: Could not import Keras, Classify will not work'
-            info_str += ' until you install tensorflow (pip install tensorflow-cpu)'
-            print(info_str)
-            self.model = lambda x: None
-            self.class_labels = []
-            return
+            info_str = 'ERROR: Could not import Keras. Classify will not work'
+            info_str += ' until you install tensorflow.\n'
+            info_str += 'Use: pip install pyopia[classification]\n'
+            info_str += ' or: pip install pyopia[classification-arm64]'
+            info_str += ' for tensorflow-macos (silicon chips)'
+            raise ImportError(info_str)
 
         os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
         keras.backend.clear_session()

diff --git a/pyopia/pipeline.py b/pyopia/pipeline.py
@@ -65,6 +65,7 @@ def __init__(self, settings,
         self.initial_steps = initial_steps
         print('Initialising pipeline')
         self.data = Data()
+        self.data['cl'] = None
         self.data['settings'] = settings
 
         self.pass_general_settings()

diff --git a/pyopia/process.py b/pyopia/process.py
@@ -192,6 +192,29 @@ def write_segmented_images(imbw, imc, settings, timestamp):
         imsave(fname, imc)
 
 
+def put_roi_in_h5(export_outputpath, HDF5File, roi, filename, i):
+    '''Adds rois to an open hdf file if export_outputpath is not None.
+    For use within {func}`pyopia.process.export_particles`
+
+    Parameters
+    ----------
+    export_outputpath : str
+    HDF5File : h5 file object
+    roi : uint8
+    i : int
+        particle number
+
+    Returns
+    -------
+    str
+        filename
+    '''
+    filename = filename + '-PN' + str(i)
+    if export_outputpath is not None:
+        HDF5File.create_dataset('PN' + str(i), data=roi)
+    return filename
+
+
 def extract_particles(imc, timestamp, Classification, region_properties,
                       export_outputpath=None, min_length=0, propnames=['major_axis_length', 'minor_axis_length',
                                                                        'equivalent_diameter']):
@@ -213,11 +236,12 @@ def extract_particles(imc, timestamp, Classification, region_properties,
     '''
     filenames = ['not_exported'] * len(region_properties)
 
-    # pre-allocation
-    predictions = np.zeros((len(region_properties),
-                            len(Classification.class_labels)),
-                           dtype='float64')
-    predictions *= np.nan
+    if Classification is not None:
+        # pre-allocation
+        predictions = np.zeros((len(region_properties),
+                                len(Classification.class_labels)),
+                               dtype='float64')
+        predictions *= np.nan
 
     # obtain the original image filename from the timestamp
     filename = timestamp.strftime('D%Y%m%dT%H%M%S.%f')
@@ -239,6 +263,8 @@ def extract_particles(imc, timestamp, Classification, region_properties,
         meta.attrs['Raw image name'] = filename
         # @todo include more useful information in this meta data, e.g. possibly raw image location and background
         #  stack file list.
+    else:
+        HDF5File = None
 
     # pre-allocate some things
     data = np.zeros((len(region_properties), len(propnames)), dtype=np.float64)
@@ -258,15 +284,13 @@ def extract_particles(imc, timestamp, Classification, region_properties,
             # extract the region of interest from the corrected colour image
             roi = extract_roi(imc, bboxes[i, :].astype(int))
 
-            # add the roi to the HDF5 file
-            filenames[int(i)] = filename + '-PN' + str(i)
-            if export_outputpath is not None:
-                HDF5File.create_dataset('PN' + str(i), data=roi)
-                # @todo also include particle stats here too.
+            if Classification is not None:
+                # run a prediction on what type of particle this might be
+                prediction = Classification.proc_predict(roi.astype(np.uint8))
+                predictions[int(i), :] = prediction[0]
 
-            # run a prediction on what type of particle this might be
-            prediction = Classification.proc_predict(roi.astype(np.uint8))
-            predictions[int(i), :] = prediction[0]
+            # add the roi to the HDF5 file
+            filenames[int(i)] = put_roi_in_h5(export_outputpath, HDF5File, roi, filename, i)
 
     if export_outputpath is not None:
         # close the HDF5 file
@@ -283,9 +307,10 @@ def extract_particles(imc, timestamp, Classification, region_properties,
 
     print('EXTRACTING {0} IMAGES from {1}'.format(nb_extractable_part, len(stats['major_axis_length'])))
 
-    # add classification predictions to the particle statistics data
-    for n, c in enumerate(Classification.class_labels):
-        stats['probability_' + c] = predictions[:, n]
+    if Classification is not None:
+        # add classification predictions to the particle statistics data
+        for n, c in enumerate(Classification.class_labels):
+            stats['probability_' + c] = predictions[:, n]
 
     # add the filenames of the HDF5 file and particle number tag to the
     # particle statistics data
@@ -356,7 +381,8 @@ def segment(img, threshold=0.98, minimum_area=12, fill_holes=True):
     return imbw
 
 
-def statextract(imbw, timestamp, imc, Classification,
+def statextract(imbw, timestamp, imc,
+                Classification=None,
                 max_coverage=30,
                 max_particles=5000,
                 export_outputpath=None,
@@ -510,7 +536,8 @@ def __init__(self,
 
     def __call__(self, data):
         print('statextract')
-        stats, saturation = statextract(data['imbw'], data['timestamp'], data['imc'], data['cl'],
+        stats, saturation = statextract(data['imbw'], data['timestamp'], data['imc'],
+                                        Classification=data['cl'],
                                         max_coverage=self.max_coverage,
                                         max_particles=self.max_particles,
                                         export_outputpath=self.export_outputpath,

diff --git a/pyproject.toml b/pyproject.toml
@@ -2,8 +2,14 @@
 name = "PyOPIA"
 version = "0"
 description = "A Python Ocean Particle Image Analysis toolbox."
-authors = ["Emlyn Davies <emlyn.davies@sintef.no>"]
+authors = [
+    "Emlyn Davies <emlyn.davies@sintef.no>",
+    "Alex Nimmo Smith@plymouth.ac.uk <alex.nimmo.smith@plymouth.ac.uk>"
+]
 readme = "README.md"
+repository = "https://github.com/sintef/pyopia"
+documentation = "https://pyopia.readthedocs.io"
+keywords = ["Ocean", "Particles", "Imaging", "Measurement", "Size distribution"]
 packages = [{include = "pyopia"}]
 
 [tool.poetry.dependencies]
@@ -35,9 +41,16 @@ xarray = "^2023.8.0"
 typer = {extras = ["all"], version = "^0.9.0"}
 pandas = {version = "^2.1.1", extras = ["computation"]}
 h5py = "^3.9.0"
-tensorflow-io-gcs-filesystem = "0.31.0"
-tensorflow-cpu = "2.11.0"
 poetry-version-plugin = "^0.2.0"
+tensorflow-macos = {version = "2.11.0", optional = true, markers = "sys_platform == 'darwin' and platform_machine == 'arm64'"}
+tensorflow-cpu = {version = "2.11.0", optional = true}
+tensorflow-io-gcs-filesystem = [
+    {version = ">=0.31.0", optional=true}
+]
+
+[tool.poetry.extras]
+classification-arm64 = ["tensorflow-io-gcs-filesystem", "tensorflow-macos"]
+classification = ["tensorflow-io-gcs-filesystem", "tensorflow-cpu"]
 
 [tool.poetry-version-plugin]
 source = "init"