From eed0b1a8d2002f4f8675b378d1a44d02a982593b Mon Sep 17 00:00:00 2001
From: tkchafin <tylerkchafin@gmail.com>
Date: Thu, 11 Apr 2024 20:36:36 +0100
Subject: [PATCH] fix some warnings

---
 src/resistnet/CFPT.py               | 11 +++----
 src/resistnet/model_optimisation.py | 27 ++++++++-------
 src/resistnet/samc_network.py       |  6 ++--
 src/resistnet/utils.py              | 51 +++++++++++++++++++++++++++++
 4 files changed, 71 insertions(+), 24 deletions(-)

diff --git a/src/resistnet/CFPT.py b/src/resistnet/CFPT.py
index 987391ce..74c76469 100644
--- a/src/resistnet/CFPT.py
+++ b/src/resistnet/CFPT.py
@@ -1,19 +1,18 @@
 import sys
+import warnings
 import numpy as np
-from scipy.sparse import diags, eye, csr_matrix
+from scipy.sparse import diags, eye, csr_matrix, SparseEfficiencyWarning
 from scipy.sparse.linalg import spsolve, lgmres, cg
+from scipy.sparse import SparseEfficiencyWarning
 
-# 1. Scale dest row off-diagonals by absorption 
-# 2. fill diagonals 
-# 3. Extract Qj and qj 
-# 4. Negate Qj and +1 diagonals
+warnings.simplefilter('ignore', SparseEfficiencyWarning)
 
 def CFPT(Q, R, edge_site_indices):
     N = len(edge_site_indices)
     cfpt_matrix = np.zeros((N, N))
 
     for i, dest in enumerate(edge_site_indices):
-        Q_temp = Q.copy().tolil()  # LIL format for easier row manipulation
+        Q_temp = Q.copy().tolil()
         absorption_factor = R[dest]
 
         # Get indices and data for the dest row
diff --git a/src/resistnet/model_optimisation.py b/src/resistnet/model_optimisation.py
index fb38dca6..b7c69be2 100644
--- a/src/resistnet/model_optimisation.py
+++ b/src/resistnet/model_optimisation.py
@@ -1,6 +1,6 @@
 import traceback
 import random
-import sys
+import math
 import pandas as pd
 import numpy as np
 from queue import Empty
@@ -214,7 +214,6 @@ def run_ga(self, maxgens=1, fitmetric="aic", burnin=0, deltaB=None,
             cxpb, indpb = self.cxpb, self.indpb
             fails, current_best = 0, None
 
-
             # Run for maxgens generations
             for g in range(1, maxgens + 1):
                 if self.verbose:
@@ -287,19 +286,19 @@ def run_ga(self, maxgens=1, fitmetric="aic", burnin=0, deltaB=None,
                             )
 
                     length = len(self.population)
-                    if length > 0 and all(isinstance(
-                            fit, (int, float)) for fit in fits
-                    ):
-                        mean = sum(fits) / length
-                        sum2 = sum(x * x for x in fits)
-                        variance = sum2 / length - mean**2
-
-                        # Check for negative variance due to floating-point
-                        # arithmetic issues
-                        std = (abs(variance) ** 0.5) if variance >= 0 else 0
+                    if length > 0:
+                        if any(math.isinf(fit) for fit in fits):
+                            # Set stats to NaN if any values are inf or -inf
+                            mean = variance = std = float('nan')
+                        else:
+                            mean = sum(fits) / length
+                            sum2 = sum(x * x for x in fits)
+                            variance = sum2 / length - mean**2
+
+                            std = (math.sqrt(variance)
+                                   if variance >= 0 else float('nan'))
                     else:
-                        mean = float('nan')
-                        std = float('nan')
+                        mean = variance = std = float('nan')
 
                     if self.verbose:
                         print("  Worst %s" % worst)
diff --git a/src/resistnet/samc_network.py b/src/resistnet/samc_network.py
index 05239b4f..9893c59d 100644
--- a/src/resistnet/samc_network.py
+++ b/src/resistnet/samc_network.py
@@ -468,13 +468,11 @@ def evaluate(self, individual):
         else:
             # complete Q matrix
             # minmax scale 0-1
-            multi.data = utils.minmax(multi.data)
+            multi.data = utils.minmax_nonzero(multi.data)
 
             # inverse to get transition rates
             # avoid divide-by-zero by setting zero to smallest non-zero element
-            non_zero_min = np.min(multi.data[np.nonzero(multi.data)])
-            multi.data[multi.data == 0] = non_zero_min
-            multi.data = utils.minmax(1 / multi.data)
+            multi.data = utils.minmax_nonzero(1 / multi.data)
 
             # compute cfpt matrix
             cfpt, res = rd.conditionalFirstPassTime(
diff --git a/src/resistnet/utils.py b/src/resistnet/utils.py
index 96c6b830..dad3e323 100644
--- a/src/resistnet/utils.py
+++ b/src/resistnet/utils.py
@@ -64,6 +64,37 @@ def graph_to_dag_converging(K, origin):
     return D
 
 
+def minmax_lil(lil_matrix):
+    """
+    Perform min-max scaling on the non-zero values of a lil_matrix, scaling
+    all values to be between 0 and 1. This function modifies the matrix in
+    place.
+
+    Args:
+        lil_matrix (scipy.sparse.lil_matrix): The input sparse matrix
+
+    Returns:
+        None: The matrix is modified in place.
+    """
+    # Flatten all data to find global min and max
+    all_data = np.hstack(lil_matrix.data)
+
+    if all_data.size == 0:
+        return  # No data to scale, exit the function
+    
+    X_min = all_data.min()
+    X_max = all_data.max()
+    
+    # Avoid division by zero if all values are the same
+    if X_min == X_max:
+        return
+    
+    # Scale each non-zero value in lil_matrix.data
+    for row_data in lil_matrix.data:
+        for i in range(len(row_data)):
+            row_data[i] = (row_data[i] - X_min) / (X_max - X_min)
+
+
 def minmax(X):
     """
     Perform min-max scaling on a NumPy array, scaling all values to be between 0 and 1.
@@ -80,6 +111,26 @@ def minmax(X):
     return X_scaled
 
 
+def minmax_nonzero(X):
+    """
+    Perform min-max scaling on a NumPy array, initially scaling all values to be between 0 and 1,
+    and then adjusting the scale so that the smallest non-zero value becomes 1 and other values
+    are adjusted accordingly.
+
+    Args:
+        X (numpy.ndarray): The input array to be scaled.
+
+    Returns:
+        numpy.ndarray: The scaled array, with values adjusted as described.
+    """
+    X_min = X.min()
+    X_max = X.max()
+    X_scaled = (X - X_min) / (X_max - X_min)
+    smallest_nonzero = np.min(X_scaled[X_scaled > 0])
+    X_scaled[X_scaled == 0] = smallest_nonzero
+    return X_scaled
+
+
 def masked_minmax(X, mask):
     """
     Perform min-max scaling on selected elements of a NumPy array. Selection