From eed0b1a8d2002f4f8675b378d1a44d02a982593b Mon Sep 17 00:00:00 2001 From: tkchafin Date: Thu, 11 Apr 2024 20:36:36 +0100 Subject: [PATCH] fix some warnings --- src/resistnet/CFPT.py | 11 +++---- src/resistnet/model_optimisation.py | 27 ++++++++------- src/resistnet/samc_network.py | 6 ++-- src/resistnet/utils.py | 51 +++++++++++++++++++++++++++++ 4 files changed, 71 insertions(+), 24 deletions(-) diff --git a/src/resistnet/CFPT.py b/src/resistnet/CFPT.py index 987391ce..74c76469 100644 --- a/src/resistnet/CFPT.py +++ b/src/resistnet/CFPT.py @@ -1,19 +1,18 @@ import sys +import warnings import numpy as np -from scipy.sparse import diags, eye, csr_matrix +from scipy.sparse import diags, eye, csr_matrix, SparseEfficiencyWarning from scipy.sparse.linalg import spsolve, lgmres, cg +from scipy.sparse import SparseEfficiencyWarning -# 1. Scale dest row off-diagonals by absorption -# 2. fill diagonals -# 3. Extract Qj and qj -# 4. Negate Qj and +1 diagonals +warnings.simplefilter('ignore', SparseEfficiencyWarning) def CFPT(Q, R, edge_site_indices): N = len(edge_site_indices) cfpt_matrix = np.zeros((N, N)) for i, dest in enumerate(edge_site_indices): - Q_temp = Q.copy().tolil() # LIL format for easier row manipulation + Q_temp = Q.copy().tolil() absorption_factor = R[dest] # Get indices and data for the dest row diff --git a/src/resistnet/model_optimisation.py b/src/resistnet/model_optimisation.py index fb38dca6..b7c69be2 100644 --- a/src/resistnet/model_optimisation.py +++ b/src/resistnet/model_optimisation.py @@ -1,6 +1,6 @@ import traceback import random -import sys +import math import pandas as pd import numpy as np from queue import Empty @@ -214,7 +214,6 @@ def run_ga(self, maxgens=1, fitmetric="aic", burnin=0, deltaB=None, cxpb, indpb = self.cxpb, self.indpb fails, current_best = 0, None - # Run for maxgens generations for g in range(1, maxgens + 1): if self.verbose: @@ -287,19 +286,19 @@ def run_ga(self, maxgens=1, fitmetric="aic", burnin=0, deltaB=None, ) length = len(self.population) - if length > 0 and all(isinstance( - fit, (int, float)) for fit in fits - ): - mean = sum(fits) / length - sum2 = sum(x * x for x in fits) - variance = sum2 / length - mean**2 - - # Check for negative variance due to floating-point - # arithmetic issues - std = (abs(variance) ** 0.5) if variance >= 0 else 0 + if length > 0: + if any(math.isinf(fit) for fit in fits): + # Set stats to NaN if any values are inf or -inf + mean = variance = std = float('nan') + else: + mean = sum(fits) / length + sum2 = sum(x * x for x in fits) + variance = sum2 / length - mean**2 + + std = (math.sqrt(variance) + if variance >= 0 else float('nan')) else: - mean = float('nan') - std = float('nan') + mean = variance = std = float('nan') if self.verbose: print(" Worst %s" % worst) diff --git a/src/resistnet/samc_network.py b/src/resistnet/samc_network.py index 05239b4f..9893c59d 100644 --- a/src/resistnet/samc_network.py +++ b/src/resistnet/samc_network.py @@ -468,13 +468,11 @@ def evaluate(self, individual): else: # complete Q matrix # minmax scale 0-1 - multi.data = utils.minmax(multi.data) + multi.data = utils.minmax_nonzero(multi.data) # inverse to get transition rates # avoid divide-by-zero by setting zero to smallest non-zero element - non_zero_min = np.min(multi.data[np.nonzero(multi.data)]) - multi.data[multi.data == 0] = non_zero_min - multi.data = utils.minmax(1 / multi.data) + multi.data = utils.minmax_nonzero(1 / multi.data) # compute cfpt matrix cfpt, res = rd.conditionalFirstPassTime( diff --git a/src/resistnet/utils.py b/src/resistnet/utils.py index 96c6b830..dad3e323 100644 --- a/src/resistnet/utils.py +++ b/src/resistnet/utils.py @@ -64,6 +64,37 @@ def graph_to_dag_converging(K, origin): return D +def minmax_lil(lil_matrix): + """ + Perform min-max scaling on the non-zero values of a lil_matrix, scaling + all values to be between 0 and 1. This function modifies the matrix in + place. + + Args: + lil_matrix (scipy.sparse.lil_matrix): The input sparse matrix + + Returns: + None: The matrix is modified in place. + """ + # Flatten all data to find global min and max + all_data = np.hstack(lil_matrix.data) + + if all_data.size == 0: + return # No data to scale, exit the function + + X_min = all_data.min() + X_max = all_data.max() + + # Avoid division by zero if all values are the same + if X_min == X_max: + return + + # Scale each non-zero value in lil_matrix.data + for row_data in lil_matrix.data: + for i in range(len(row_data)): + row_data[i] = (row_data[i] - X_min) / (X_max - X_min) + + def minmax(X): """ Perform min-max scaling on a NumPy array, scaling all values to be between 0 and 1. @@ -80,6 +111,26 @@ def minmax(X): return X_scaled +def minmax_nonzero(X): + """ + Perform min-max scaling on a NumPy array, initially scaling all values to be between 0 and 1, + and then adjusting the scale so that the smallest non-zero value becomes 1 and other values + are adjusted accordingly. + + Args: + X (numpy.ndarray): The input array to be scaled. + + Returns: + numpy.ndarray: The scaled array, with values adjusted as described. + """ + X_min = X.min() + X_max = X.max() + X_scaled = (X - X_min) / (X_max - X_min) + smallest_nonzero = np.min(X_scaled[X_scaled > 0]) + X_scaled[X_scaled == 0] = smallest_nonzero + return X_scaled + + def masked_minmax(X, mask): """ Perform min-max scaling on selected elements of a NumPy array. Selection