From 8e1986ad84402e79ecd97f41d527567a56ec80e5 Mon Sep 17 00:00:00 2001 From: vcantarella Date: Wed, 26 Jun 2024 16:43:48 +0200 Subject: [PATCH] fix to specsim function and minor typos in the code. --- examples/ammer/gaussian_kernel_regression.py | 95 ++++++++++++++ examples/ammer/top_botm.csv | 7 + .../{test_heinz.py => heinz_highenergy.py} | 0 src/hyvr/tools.py | 123 ++++++++++-------- src/hyvr/utils.py | 6 +- tests/test_surface.py | 79 ++++++----- 6 files changed, 226 insertions(+), 84 deletions(-) create mode 100644 examples/ammer/gaussian_kernel_regression.py create mode 100644 examples/ammer/top_botm.csv rename examples/heinz_highenergy/{test_heinz.py => heinz_highenergy.py} (100%) diff --git a/examples/ammer/gaussian_kernel_regression.py b/examples/ammer/gaussian_kernel_regression.py new file mode 100644 index 0000000..bbf901b --- /dev/null +++ b/examples/ammer/gaussian_kernel_regression.py @@ -0,0 +1,95 @@ +import george +import numpy as np +from george.kernels import ExpSquaredKernel +import scipy + + +var = 2**2 +corr_lengths = np.array([100**2, 200**2]) + +kernel = var*ExpSquaredKernel(corr_lengths, ndim=2, ) + +kernel.get_parameter_names() +kernel.get_parameter_vector() +np.exp(kernel.get_parameter_vector()) + +top_botm = np.loadtxt("top_botm.csv",delimiter=",", skiprows=1,usecols=(1,2,3,4)) +top = top_botm[:,2] +bottom = top_botm[:,3] +x_y = top_botm[:,0:2] + +gp_top = george.GP(kernel, mean=np.mean(top), fit_mean=True, + white_noise=np.log(0.5**2), fit_white_noise=True) +gp_top.compute(x_y) +print(gp_top.log_likelihood(top)) +print(gp_top.grad_log_likelihood(top)) + +def nll_top(p): + gp_top.set_parameter_vector(p) + ll = gp_top.log_likelihood(top, quiet=True) + return -ll if np.isfinite(ll) else 1e25 + +def grad_nll_top(p): + gp_top.set_parameter_vector(p) + return -gp_top.grad_log_likelihood(top, quiet=True) + +gp_top.compute(x_y) + +# Print the initial ln-likelihood. +print(gp_top.log_likelihood(top)) + +# Run the optimization routine. +p0 = gp_top.get_parameter_vector() +results = scipy.optimize.minimize(nll_top, p0, jac=grad_nll_top, method="L-BFGS-B") + +# Update the kernel and print the final log-likelihood. +gp_top.set_parameter_vector(results.x) +print(gp_top.log_likelihood(top)) + +gp_top.get_parameter_names() +params = gp_top.get_parameter_vector() +params = np.concatenate([np.array([params[0]]), np.exp(params[1:])]) +params + +kernelb = var*ExpSquaredKernel(corr_lengths, ndim=2, ) + +kernelb.get_parameter_names() +kernelb.get_parameter_vector() + + +gp_bottom = george.GP(kernel, mean=np.mean(bottom), fit_mean=False, + white_noise=np.log(0.5**2), fit_white_noise=False) +gp_bottom.compute(x_y) +print(gp_bottom.log_likelihood(bottom)) +print(gp_bottom.grad_log_likelihood(bottom)) + +def nll_bottom(p): + gp_bottom.set_parameter_vector(p) + ll = gp_bottom.log_likelihood(bottom, quiet=True) + return -ll if np.isfinite(ll) else 1e25 + +def grad_nll_bottom(p): + gp_bottom.set_parameter_vector(p) + return -gp_bottom.grad_log_likelihood(bottom, quiet=True) + +gp_bottom.compute(x_y) + +# Print the initial ln-likelihood. +print(gp_bottom.log_likelihood(bottom)) + +# Run the optimization routine. +p0 = gp_bottom.get_parameter_vector() +results = scipy.optimize.minimize(nll_bottom, p0, jac=grad_nll_bottom, method="L-BFGS-B", tol = 1e-6) + +# Update the kernel and print the final log-likelihood. +gp_bottom.set_parameter_vector(results.x) +print(gp_bottom.log_likelihood(bottom)) + +gp_bottom.get_parameter_names() +paramsb = gp_bottom.get_parameter_vector() +kern_pars = np.exp(paramsb) +corr_lb = np.sqrt(kern_pars) +corr_lb +paramsb = np.concatenate([np.array([paramsb[0]]), np.exp(paramsb[1:])]) +paramsb + diff --git a/examples/ammer/top_botm.csv b/examples/ammer/top_botm.csv new file mode 100644 index 0000000..8bd0107 --- /dev/null +++ b/examples/ammer/top_botm.csv @@ -0,0 +1,7 @@ +well,x,y,top_depth_tufa,bottom_depth_tufa +A5,3498882.77,5375943.75,1.41661,7.095870000000001 +A3,3499465.69,5375551.362,1.51276,8.60863 +A4,3498999.87,5375576.76,2.19863,5.948480000000001 +A6,3498066.91,5375946.45,1.58968,3.0768 +A9,3499559.9,5375903.5,2.37811,6.9548499999999995 +A7,3500458.48,5375818.8,2.53195,3.2114100000000003 diff --git a/examples/heinz_highenergy/test_heinz.py b/examples/heinz_highenergy/heinz_highenergy.py similarity index 100% rename from examples/heinz_highenergy/test_heinz.py rename to examples/heinz_highenergy/heinz_highenergy.py diff --git a/src/hyvr/tools.py b/src/hyvr/tools.py index 09a87f6..cddfa5a 100644 --- a/src/hyvr/tools.py +++ b/src/hyvr/tools.py @@ -4,7 +4,17 @@ from .utils import ferguson_theta_ode from scipy.spatial.distance import pdist, squareform -def ferguson_curve(h, k, eps_factor, flow_angle, s_max, xstart, ystart, extra_noise=0.0): + +def ferguson_curve( + h: np.float64, + k: np.float64, + eps_factor: np.float64, + flow_angle: np.float64, + s_max: np.float64, + xstart: np.float64, + ystart: np.float64, + extra_noise=0.0, +): """ Simulate extruded parabola centrelines using the Ferguson (1976) disturbed meander model Implementation of AR2 autoregressive model @@ -35,7 +45,9 @@ def ferguson_curve(h, k, eps_factor, flow_angle, s_max, xstart, ystart, extra_no """ # Parameters # Calculate curve directions - theta, s, xp, yp = ferguson_theta_ode(s_max, eps_factor, k, h, 0., extra_noise)#np.pi / 2) + theta, s, xp, yp, tau = ferguson_theta_ode( + s_max, eps_factor, k, h, 0.0, extra_noise + ) # np.pi / 2) # Interpolate curve direction over interval of interest # s_interp, th_interp = curve_interp(s, theta, 10) @@ -59,9 +71,13 @@ def ferguson_curve(h, k, eps_factor, flow_angle, s_max, xstart, ystart, extra_no outputs[:, 4] = s # Rotate meanders into mean flow direction - rot_angle = flow_angle #-np.mean(theta) - rotMatrix = np.array([[np.cos(rot_angle), -np.sin(rot_angle)], - [np.sin(rot_angle), np.cos(rot_angle)]]) + rot_angle = flow_angle # -np.mean(theta) + rotMatrix = np.array( + [ + [np.cos(rot_angle), -np.sin(rot_angle)], + [np.sin(rot_angle), np.cos(rot_angle)], + ] + ) roro = np.dot(rotMatrix, outputs[:, 0:2].transpose()) outputs[:, 2:4] = np.dot(rotMatrix, outputs[:, 2:4].transpose()).transpose() @@ -74,11 +90,16 @@ def ferguson_curve(h, k, eps_factor, flow_angle, s_max, xstart, ystart, extra_no return outputs[:, 0], outputs[:, 1], outputs[:, 2], outputs[:, 3], outputs[:, 4] -def contact_surface(x:np.array,y:np.array, - mean:np.float64,var:np.float64, - corl:np.array, - mask=None): - ''' + +def contact_surface( + x: np.array, + y: np.array, + mean: np.float64, + var: np.float64, + corl: np.array, + mask=None, +): + """ Creates gaussian random contact surface with mean value and variance input with the spectral method from Dietrich & Newsam (1993). Input: @@ -90,18 +111,20 @@ def contact_surface(x:np.array,y:np.array, mask: mask array (same dimensions as x and y) Returns: Z: output np.array with same dimensions as x and y and with Z values corrensponding to the surface - ''' + """ return None + def surface_gauss_regression( - x:np.array, - y:np.array, - mean:np.float64, - variance:np.float64, - corl:np.array, - dataset:np.array, - error:np.array): - ''' + x: np.array, + y: np.array, + mean: np.float64, + variance: np.float64, + corl: np.array, + dataset: np.array, + error: np.array, +): + """ Performs surface gaussian regression on input x,y data with given dataset (x,y,z) and error Based on the algorithm from Rasmussen & Williams (2006) Gaussian Processes for Machine Learning Input: @@ -109,39 +132,38 @@ def surface_gauss_regression( x,y: 2D grid of x and y points Returns: Z: output np.array with same dimensions as x and y and with Z values corrensponding to the surface - ''' + """ # Calculating distances: shape = x.shape x = np.expand_dims(x.ravel(), axis=1) - y = np.expand_dims(y.ravel(),axis=1) + y = np.expand_dims(y.ravel(), axis=1) # distance of the training points: - x_t = np.expand_dims(dataset[:,0].ravel(),axis=1) - y_t = np.expand_dims(dataset[:,1].ravel(),axis=1) - z_t = dataset[:,2].ravel() - x_dist_t = pdist(x_t, 'euclidean') - y_dist_t = pdist(y_t, 'euclidean') - - - #covariance matrix: + x_t = np.expand_dims(dataset[:, 0].ravel(), axis=1) + y_t = np.expand_dims(dataset[:, 1].ravel(), axis=1) + z_t = dataset[:, 2].ravel() + x_dist_t = pdist(x_t, "euclidean") + y_dist_t = pdist(y_t, "euclidean") + + # covariance matrix: @numba.njit() - def kernel_2d(sigma_f, M, x_dist, y_dist)->np.float64: + def kernel_2d(sigma_f, M, x_dist, y_dist) -> np.float64: x = np.expand_dims(np.array((x_dist, y_dist)), axis=1) x = x.astype(np.float64) - cov = sigma_f*np.exp(-(1/2) * x.T @ M @ x) - return cov[0,0] - - M = np.eye(2)*np.array([1/corl[0]**2,1/corl[1]**2]) + cov = sigma_f * np.exp(-(1 / 2) * x.T @ M @ x) + return cov[0, 0] + + M = np.eye(2) * np.array([1 / corl[0] ** 2, 1 / corl[1] ** 2]) @numba.njit() - def cov_matrix(sigma_f,x_dist,y_dist,M): + def cov_matrix(sigma_f, x_dist, y_dist, M): cov = np.zeros(x_dist.shape[0]) for i in range(x_dist.shape[0]): - cov[i] = kernel_2d(sigma_f,M,x_dist[i],y_dist[i]) + cov[i] = kernel_2d(sigma_f, M, x_dist[i], y_dist[i]) return cov - - cov = cov_matrix(variance,x_dist_t,y_dist_t,M) + + cov = cov_matrix(variance, x_dist_t, y_dist_t, M) cov = squareform(cov) cov = cov + np.eye(cov.shape[0]) error = np.diag(error) @@ -150,25 +172,22 @@ def cov_matrix(sigma_f,x_dist,y_dist,M): L, lower = scipy.linalg.cho_factor(cov, lower=True) L_y = scipy.linalg.cho_solve((L, lower), z_t) - #covariance between test and training points: + # covariance between test and training points: @numba.njit() - def distance(x,y,x_t,y_t): - x_dist = np.zeros((x.shape[0],x_t.shape[0])) - y_dist = np.zeros((x.shape[0],x_t.shape[0])) + def distance(x, y, x_t, y_t): + x_dist = np.zeros((x.shape[0], x_t.shape[0])) + y_dist = np.zeros((x.shape[0], x_t.shape[0])) for i in range(x.shape[0]): - x_dist[i,:] = np.ravel((x[i,0]-x_t)**2) - y_dist[i,:] = np.ravel((y[i,0]-y_t)**2) - return x_dist,y_dist - - x_dist,y_dist = distance(x,y,x_t,y_t) + x_dist[i, :] = np.ravel((x[i, 0] - x_t) ** 2) + y_dist[i, :] = np.ravel((y[i, 0] - y_t) ** 2) + return x_dist, y_dist + + x_dist, y_dist = distance(x, y, x_t, y_t) cov_shape = x_dist.shape - cov_test = cov_matrix(variance,x_dist.ravel(),y_dist.ravel(),M) + cov_test = cov_matrix(variance, x_dist.ravel(), y_dist.ravel(), M) cov_test = cov_test.reshape(cov_shape) # Mean prediction mean = cov_test.T @ L_y + mean # Variance prediction # to be implemented v = scipy.linalg.solve_triangular(L, cov_test, lower=True) - return np.reshape(mean, shape) - - - + return np.reshape(mean, shape) \ No newline at end of file diff --git a/src/hyvr/utils.py b/src/hyvr/utils.py index 7bc4e1b..78a3b26 100644 --- a/src/hyvr/utils.py +++ b/src/hyvr/utils.py @@ -279,7 +279,7 @@ def min_distance(x, y, P): return d_array, glob_min_idx -def ferguson_theta_ode(s_max, eps_factor, k, h, omega, err:float): +def ferguson_theta_ode(s_max: np.float64, eps_factor: np.float64, k:np.float64, h:np.float64, omega:np.float64, err:np.float64): """ Implementation of (Ferguson, 1976, Eq.9). The equation is formulated as an initial value problem and integrated with scipy function for integration (solve_ivp) @@ -515,7 +515,7 @@ def specsim( two_dim = len(corl) < 3 # boolean weather calculations should be done in two or 3D if two_dim: Y = np.empty(x.shape) - h_square = (x_calc / corl[0]) ** 2 + (y_calc / corl[1]) ** 2 + h_square = 0.5*(x_calc / corl[0]) ** 2 + 0.5*(y_calc / corl[1]) ** 2 else: if mask is None: z_calc = z @@ -523,7 +523,7 @@ def specsim( z_calc = np.where(mask, z, z_calc) Y = np.empty(z.shape) h_square = ( - (x_calc / corl[0]) ** 2 + (y_calc / corl[1]) ** 2 + (z_calc / corl[2]) ** 2 + 0.5 * (x_calc / corl[0]) ** 2 + 0.5* (y_calc / corl[1]) ** 2 + 0.5 * (z_calc / corl[2]) ** 2 ) ntot = h_square.size # Covariance matrix of variables diff --git a/tests/test_surface.py b/tests/test_surface.py index 45a78ae..3cbdcf5 100644 --- a/tests/test_surface.py +++ b/tests/test_surface.py @@ -1,50 +1,71 @@ import numpy as np from src.hyvr.tools import surface_gauss_regression, ferguson_curve +from src.hyvr.utils import specsim + # Test data def test_curve(): - h=0.1, - k=np.pi / 100, - eps_factor=1, + h = 0.1 + k = np.pi / 200 + eps_factor = (np.pi / 1.5) ** 2 channel_curve_1 = ferguson_curve( - h=0.1, - k=np.pi / 200, - eps_factor=(np.pi / 1.5) ** 2, - flow_angle=0.0, - s_max=400, - xstart=0, - ystart=25, + h=h, + k=k, + eps_factor=eps_factor, + flow_angle=0.0, + s_max=400, + xstart=0, + ystart=25, ) x_t, y_t, vx, vy, s = channel_curve_1 +def test_specsim(): + x = np.linspace(0, 100, 1000) + y = np.linspace(0, 100, 1000) + x, y = np.meshgrid(x, y) + mean = 2.5 + variance = 0.05**2 + Z = specsim( + x, + y, + mean=mean, + var=variance, + corl=np.array([0.01, 0.01]), + mask=None, + covmod="gaussian", + ) + assert Z.shape == x.shape + assert np.allclose(Z.mean(), mean, atol=1e-3) + assert np.allclose(Z.var(), variance, atol=1e-3) + + def test_gauss_surface_regression(): - - x = np.linspace(0,100, 1000) - y = np.linspace(0,100, 1000) + x = np.linspace(0, 100, 1000) + y = np.linspace(0, 100, 1000) x, y = np.meshgrid(x, y) # training data: - h=0.4 - k=np.pi / 200 - eps_factor=(np.pi / 1.5) ** 2 + h = 0.4 + k = np.pi / 200 + eps_factor = (np.pi / 1.5) ** 2 x_t, y_t, vx, vy, s = ferguson_curve( - h=h, - k=k, - eps_factor=eps_factor, - flow_angle=0.0, - s_max=400, - xstart=0, - ystart=25, - extra_noise=1e-4, + h=h, + k=k, + eps_factor=eps_factor, + flow_angle=0.0, + s_max=400, + xstart=0, + ystart=25, + extra_noise=1e-4, ) z_t = np.sin(s) dataset = np.array([x_t, y_t, z_t]).T error = np.repeat(0.3, dataset.shape[0]) corl = np.array([10, 10]) - mean = 0. - variance = 1. - #Z = surface_gauss_regression(x, y, mean, variance, corl, dataset, error) - #assert Z.shape == x.shape - #assert Z.shape == y.shape + mean = 0.0 + variance = 1.0 + # Z = surface_gauss_regression(x, y, mean, variance, corl, dataset, error) + # assert Z.shape == x.shape + # assert Z.shape == y.shape assert x_t.shape == y_t.shape assert x_t.shape == vx.shape