From 8e1986ad84402e79ecd97f41d527567a56ec80e5 Mon Sep 17 00:00:00 2001
From: vcantarella <vcantarella@gmail.com>
Date: Wed, 26 Jun 2024 16:43:48 +0200
Subject: [PATCH] fix to specsim function and minor typos in the code.

---
 examples/ammer/gaussian_kernel_regression.py  |  95 ++++++++++++++
 examples/ammer/top_botm.csv                   |   7 +
 .../{test_heinz.py => heinz_highenergy.py}    |   0
 src/hyvr/tools.py                             | 123 ++++++++++--------
 src/hyvr/utils.py                             |   6 +-
 tests/test_surface.py                         |  79 ++++++-----
 6 files changed, 226 insertions(+), 84 deletions(-)
 create mode 100644 examples/ammer/gaussian_kernel_regression.py
 create mode 100644 examples/ammer/top_botm.csv
 rename examples/heinz_highenergy/{test_heinz.py => heinz_highenergy.py} (100%)

diff --git a/examples/ammer/gaussian_kernel_regression.py b/examples/ammer/gaussian_kernel_regression.py
new file mode 100644
index 0000000..bbf901b
--- /dev/null
+++ b/examples/ammer/gaussian_kernel_regression.py
@@ -0,0 +1,95 @@
+import george
+import numpy as np
+from george.kernels import ExpSquaredKernel
+import scipy
+
+
+var = 2**2
+corr_lengths = np.array([100**2, 200**2])
+
+kernel = var*ExpSquaredKernel(corr_lengths, ndim=2, )
+
+kernel.get_parameter_names()
+kernel.get_parameter_vector()
+np.exp(kernel.get_parameter_vector())
+
+top_botm = np.loadtxt("top_botm.csv",delimiter=",", skiprows=1,usecols=(1,2,3,4))
+top = top_botm[:,2]
+bottom = top_botm[:,3]
+x_y = top_botm[:,0:2]
+
+gp_top = george.GP(kernel, mean=np.mean(top), fit_mean=True,
+               white_noise=np.log(0.5**2), fit_white_noise=True)
+gp_top.compute(x_y)
+print(gp_top.log_likelihood(top))
+print(gp_top.grad_log_likelihood(top))
+
+def nll_top(p):
+    gp_top.set_parameter_vector(p)
+    ll = gp_top.log_likelihood(top, quiet=True)
+    return -ll if np.isfinite(ll) else 1e25
+
+def grad_nll_top(p):
+    gp_top.set_parameter_vector(p)
+    return -gp_top.grad_log_likelihood(top, quiet=True)
+
+gp_top.compute(x_y)
+
+# Print the initial ln-likelihood.
+print(gp_top.log_likelihood(top))
+
+# Run the optimization routine.
+p0 = gp_top.get_parameter_vector()
+results = scipy.optimize.minimize(nll_top, p0, jac=grad_nll_top, method="L-BFGS-B")
+
+# Update the kernel and print the final log-likelihood.
+gp_top.set_parameter_vector(results.x)
+print(gp_top.log_likelihood(top))
+
+gp_top.get_parameter_names()
+params = gp_top.get_parameter_vector()
+params = np.concatenate([np.array([params[0]]), np.exp(params[1:])])
+params
+
+kernelb = var*ExpSquaredKernel(corr_lengths, ndim=2, )
+
+kernelb.get_parameter_names()
+kernelb.get_parameter_vector()
+
+
+gp_bottom = george.GP(kernel, mean=np.mean(bottom), fit_mean=False,
+               white_noise=np.log(0.5**2), fit_white_noise=False)
+gp_bottom.compute(x_y)
+print(gp_bottom.log_likelihood(bottom))
+print(gp_bottom.grad_log_likelihood(bottom))
+
+def nll_bottom(p):
+    gp_bottom.set_parameter_vector(p)
+    ll = gp_bottom.log_likelihood(bottom, quiet=True)
+    return -ll if np.isfinite(ll) else 1e25
+
+def grad_nll_bottom(p):
+    gp_bottom.set_parameter_vector(p)
+    return -gp_bottom.grad_log_likelihood(bottom, quiet=True)
+
+gp_bottom.compute(x_y)
+
+# Print the initial ln-likelihood.
+print(gp_bottom.log_likelihood(bottom))
+
+# Run the optimization routine.
+p0 = gp_bottom.get_parameter_vector()
+results = scipy.optimize.minimize(nll_bottom, p0, jac=grad_nll_bottom, method="L-BFGS-B", tol = 1e-6)
+
+# Update the kernel and print the final log-likelihood.
+gp_bottom.set_parameter_vector(results.x)
+print(gp_bottom.log_likelihood(bottom))
+
+gp_bottom.get_parameter_names()
+paramsb = gp_bottom.get_parameter_vector()
+kern_pars = np.exp(paramsb)
+corr_lb = np.sqrt(kern_pars)
+corr_lb
+paramsb = np.concatenate([np.array([paramsb[0]]), np.exp(paramsb[1:])])
+paramsb
+
diff --git a/examples/ammer/top_botm.csv b/examples/ammer/top_botm.csv
new file mode 100644
index 0000000..8bd0107
--- /dev/null
+++ b/examples/ammer/top_botm.csv
@@ -0,0 +1,7 @@
+well,x,y,top_depth_tufa,bottom_depth_tufa
+A5,3498882.77,5375943.75,1.41661,7.095870000000001
+A3,3499465.69,5375551.362,1.51276,8.60863
+A4,3498999.87,5375576.76,2.19863,5.948480000000001
+A6,3498066.91,5375946.45,1.58968,3.0768
+A9,3499559.9,5375903.5,2.37811,6.9548499999999995
+A7,3500458.48,5375818.8,2.53195,3.2114100000000003
diff --git a/examples/heinz_highenergy/test_heinz.py b/examples/heinz_highenergy/heinz_highenergy.py
similarity index 100%
rename from examples/heinz_highenergy/test_heinz.py
rename to examples/heinz_highenergy/heinz_highenergy.py
diff --git a/src/hyvr/tools.py b/src/hyvr/tools.py
index 09a87f6..cddfa5a 100644
--- a/src/hyvr/tools.py
+++ b/src/hyvr/tools.py
@@ -4,7 +4,17 @@
 from .utils import ferguson_theta_ode
 from scipy.spatial.distance import pdist, squareform
 
-def ferguson_curve(h, k, eps_factor, flow_angle, s_max, xstart, ystart, extra_noise=0.0):
+
+def ferguson_curve(
+    h: np.float64,
+    k: np.float64,
+    eps_factor: np.float64,
+    flow_angle: np.float64,
+    s_max: np.float64,
+    xstart: np.float64,
+    ystart: np.float64,
+    extra_noise=0.0,
+):
     """
     Simulate extruded parabola centrelines using the Ferguson (1976) disturbed meander model
     Implementation of AR2 autoregressive model
@@ -35,7 +45,9 @@ def ferguson_curve(h, k, eps_factor, flow_angle, s_max, xstart, ystart, extra_no
     """
     # Parameters
     # Calculate curve directions
-    theta, s, xp, yp = ferguson_theta_ode(s_max, eps_factor, k, h, 0., extra_noise)#np.pi / 2)
+    theta, s, xp, yp, tau = ferguson_theta_ode(
+        s_max, eps_factor, k, h, 0.0, extra_noise
+    )  # np.pi / 2)
 
     # Interpolate curve direction over interval of interest
     # s_interp, th_interp = curve_interp(s, theta, 10)
@@ -59,9 +71,13 @@ def ferguson_curve(h, k, eps_factor, flow_angle, s_max, xstart, ystart, extra_no
     outputs[:, 4] = s
 
     # Rotate meanders into mean flow direction
-    rot_angle = flow_angle #-np.mean(theta)
-    rotMatrix = np.array([[np.cos(rot_angle), -np.sin(rot_angle)],
-                          [np.sin(rot_angle), np.cos(rot_angle)]])
+    rot_angle = flow_angle  # -np.mean(theta)
+    rotMatrix = np.array(
+        [
+            [np.cos(rot_angle), -np.sin(rot_angle)],
+            [np.sin(rot_angle), np.cos(rot_angle)],
+        ]
+    )
     roro = np.dot(rotMatrix, outputs[:, 0:2].transpose())
 
     outputs[:, 2:4] = np.dot(rotMatrix, outputs[:, 2:4].transpose()).transpose()
@@ -74,11 +90,16 @@ def ferguson_curve(h, k, eps_factor, flow_angle, s_max, xstart, ystart, extra_no
 
     return outputs[:, 0], outputs[:, 1], outputs[:, 2], outputs[:, 3], outputs[:, 4]
 
-def contact_surface(x:np.array,y:np.array,
-                    mean:np.float64,var:np.float64,
-                    corl:np.array,
-                    mask=None):
-    '''
+
+def contact_surface(
+    x: np.array,
+    y: np.array,
+    mean: np.float64,
+    var: np.float64,
+    corl: np.array,
+    mask=None,
+):
+    """
     Creates gaussian random contact surface with mean value and variance input
     with the spectral method from  Dietrich & Newsam (1993).
     Input:
@@ -90,18 +111,20 @@ def contact_surface(x:np.array,y:np.array,
     mask: mask array (same dimensions as x and y)
     Returns:
     Z: output np.array with same dimensions as x and y and with Z values corrensponding to the surface
-    '''
+    """
     return None
 
+
 def surface_gauss_regression(
-    x:np.array,
-    y:np.array,
-    mean:np.float64,
-    variance:np.float64,
-    corl:np.array,
-    dataset:np.array,
-    error:np.array):
-    '''
+    x: np.array,
+    y: np.array,
+    mean: np.float64,
+    variance: np.float64,
+    corl: np.array,
+    dataset: np.array,
+    error: np.array,
+):
+    """
     Performs surface gaussian regression on input x,y data with given dataset (x,y,z) and error
     Based on the algorithm from Rasmussen & Williams (2006) Gaussian Processes for Machine Learning
     Input:
@@ -109,39 +132,38 @@ def surface_gauss_regression(
     x,y: 2D grid of x and y points
     Returns:
     Z: output np.array with same dimensions as x and y and with Z values corrensponding to the surface
-    '''
+    """
     # Calculating distances:
     shape = x.shape
 
     x = np.expand_dims(x.ravel(), axis=1)
-    y = np.expand_dims(y.ravel(),axis=1)
+    y = np.expand_dims(y.ravel(), axis=1)
 
     # distance of the training points:
-    x_t = np.expand_dims(dataset[:,0].ravel(),axis=1)
-    y_t = np.expand_dims(dataset[:,1].ravel(),axis=1)
-    z_t = dataset[:,2].ravel()
-    x_dist_t = pdist(x_t, 'euclidean')
-    y_dist_t = pdist(y_t, 'euclidean')
-
-    
-    #covariance matrix:
+    x_t = np.expand_dims(dataset[:, 0].ravel(), axis=1)
+    y_t = np.expand_dims(dataset[:, 1].ravel(), axis=1)
+    z_t = dataset[:, 2].ravel()
+    x_dist_t = pdist(x_t, "euclidean")
+    y_dist_t = pdist(y_t, "euclidean")
+
+    # covariance matrix:
     @numba.njit()
-    def kernel_2d(sigma_f, M, x_dist, y_dist)->np.float64:
+    def kernel_2d(sigma_f, M, x_dist, y_dist) -> np.float64:
         x = np.expand_dims(np.array((x_dist, y_dist)), axis=1)
         x = x.astype(np.float64)
-        cov = sigma_f*np.exp(-(1/2) * x.T @ M @ x)
-        return cov[0,0]
-    
-    M = np.eye(2)*np.array([1/corl[0]**2,1/corl[1]**2])
+        cov = sigma_f * np.exp(-(1 / 2) * x.T @ M @ x)
+        return cov[0, 0]
+
+    M = np.eye(2) * np.array([1 / corl[0] ** 2, 1 / corl[1] ** 2])
 
     @numba.njit()
-    def cov_matrix(sigma_f,x_dist,y_dist,M):
+    def cov_matrix(sigma_f, x_dist, y_dist, M):
         cov = np.zeros(x_dist.shape[0])
         for i in range(x_dist.shape[0]):
-            cov[i] = kernel_2d(sigma_f,M,x_dist[i],y_dist[i])
+            cov[i] = kernel_2d(sigma_f, M, x_dist[i], y_dist[i])
         return cov
-    
-    cov = cov_matrix(variance,x_dist_t,y_dist_t,M)
+
+    cov = cov_matrix(variance, x_dist_t, y_dist_t, M)
     cov = squareform(cov)
     cov = cov + np.eye(cov.shape[0])
     error = np.diag(error)
@@ -150,25 +172,22 @@ def cov_matrix(sigma_f,x_dist,y_dist,M):
     L, lower = scipy.linalg.cho_factor(cov, lower=True)
     L_y = scipy.linalg.cho_solve((L, lower), z_t)
 
-    #covariance between test and training points:
+    # covariance between test and training points:
     @numba.njit()
-    def distance(x,y,x_t,y_t):
-        x_dist = np.zeros((x.shape[0],x_t.shape[0]))
-        y_dist = np.zeros((x.shape[0],x_t.shape[0]))
+    def distance(x, y, x_t, y_t):
+        x_dist = np.zeros((x.shape[0], x_t.shape[0]))
+        y_dist = np.zeros((x.shape[0], x_t.shape[0]))
         for i in range(x.shape[0]):
-            x_dist[i,:] = np.ravel((x[i,0]-x_t)**2)
-            y_dist[i,:] = np.ravel((y[i,0]-y_t)**2)
-        return x_dist,y_dist
-    
-    x_dist,y_dist = distance(x,y,x_t,y_t)
+            x_dist[i, :] = np.ravel((x[i, 0] - x_t) ** 2)
+            y_dist[i, :] = np.ravel((y[i, 0] - y_t) ** 2)
+        return x_dist, y_dist
+
+    x_dist, y_dist = distance(x, y, x_t, y_t)
     cov_shape = x_dist.shape
-    cov_test = cov_matrix(variance,x_dist.ravel(),y_dist.ravel(),M)
+    cov_test = cov_matrix(variance, x_dist.ravel(), y_dist.ravel(), M)
     cov_test = cov_test.reshape(cov_shape)
     # Mean prediction
     mean = cov_test.T @ L_y + mean
     # Variance prediction
     # to be implemented v = scipy.linalg.solve_triangular(L, cov_test, lower=True)
-    return np.reshape(mean, shape)
-
-
-
+    return np.reshape(mean, shape)
\ No newline at end of file
diff --git a/src/hyvr/utils.py b/src/hyvr/utils.py
index 7bc4e1b..78a3b26 100644
--- a/src/hyvr/utils.py
+++ b/src/hyvr/utils.py
@@ -279,7 +279,7 @@ def min_distance(x, y, P):
     return d_array, glob_min_idx
 
 
-def ferguson_theta_ode(s_max, eps_factor, k, h, omega, err:float):
+def ferguson_theta_ode(s_max: np.float64, eps_factor: np.float64, k:np.float64, h:np.float64, omega:np.float64, err:np.float64):
     """
     Implementation of  (Ferguson, 1976, Eq.9).
     The equation is formulated as an initial value problem and integrated with scipy function for integration (solve_ivp)
@@ -515,7 +515,7 @@ def specsim(
     two_dim = len(corl) < 3  # boolean weather calculations should be done in two or 3D
     if two_dim:
         Y = np.empty(x.shape)
-        h_square = (x_calc / corl[0]) ** 2 + (y_calc / corl[1]) ** 2
+        h_square = 0.5*(x_calc / corl[0]) ** 2 + 0.5*(y_calc / corl[1]) ** 2
     else:
         if mask is None:
             z_calc = z
@@ -523,7 +523,7 @@ def specsim(
             z_calc = np.where(mask, z, z_calc)
         Y = np.empty(z.shape)
         h_square = (
-            (x_calc / corl[0]) ** 2 + (y_calc / corl[1]) ** 2 + (z_calc / corl[2]) ** 2
+            0.5 * (x_calc / corl[0]) ** 2 + 0.5* (y_calc / corl[1]) ** 2 + 0.5 * (z_calc / corl[2]) ** 2
         )
     ntot = h_square.size
     # Covariance matrix of variables
diff --git a/tests/test_surface.py b/tests/test_surface.py
index 45a78ae..3cbdcf5 100644
--- a/tests/test_surface.py
+++ b/tests/test_surface.py
@@ -1,50 +1,71 @@
 import numpy as np
 from src.hyvr.tools import surface_gauss_regression, ferguson_curve
+from src.hyvr.utils import specsim
+
 
 # Test data
 def test_curve():
-    h=0.1,
-    k=np.pi / 100,
-    eps_factor=1,
+    h = 0.1
+    k = np.pi / 200
+    eps_factor = (np.pi / 1.5) ** 2
     channel_curve_1 = ferguson_curve(
-    h=0.1,
-    k=np.pi / 200,
-    eps_factor=(np.pi / 1.5) ** 2,
-    flow_angle=0.0,
-    s_max=400,
-    xstart=0,
-    ystart=25,
+        h=h,
+        k=k,
+        eps_factor=eps_factor,
+        flow_angle=0.0,
+        s_max=400,
+        xstart=0,
+        ystart=25,
     )
     x_t, y_t, vx, vy, s = channel_curve_1
 
 
+def test_specsim():
+    x = np.linspace(0, 100, 1000)
+    y = np.linspace(0, 100, 1000)
+    x, y = np.meshgrid(x, y)
+    mean = 2.5
+    variance = 0.05**2
+    Z = specsim(
+        x,
+        y,
+        mean=mean,
+        var=variance,
+        corl=np.array([0.01, 0.01]),
+        mask=None,
+        covmod="gaussian",
+    )
+    assert Z.shape == x.shape
+    assert np.allclose(Z.mean(), mean, atol=1e-3)
+    assert np.allclose(Z.var(), variance, atol=1e-3)
+
+
 def test_gauss_surface_regression():
-    
-    x = np.linspace(0,100, 1000)
-    y = np.linspace(0,100, 1000)
+    x = np.linspace(0, 100, 1000)
+    y = np.linspace(0, 100, 1000)
     x, y = np.meshgrid(x, y)
     # training data:
-    h=0.4
-    k=np.pi / 200
-    eps_factor=(np.pi / 1.5) ** 2
+    h = 0.4
+    k = np.pi / 200
+    eps_factor = (np.pi / 1.5) ** 2
     x_t, y_t, vx, vy, s = ferguson_curve(
-    h=h,
-    k=k,
-    eps_factor=eps_factor,
-    flow_angle=0.0,
-    s_max=400,
-    xstart=0,
-    ystart=25,
-    extra_noise=1e-4,
+        h=h,
+        k=k,
+        eps_factor=eps_factor,
+        flow_angle=0.0,
+        s_max=400,
+        xstart=0,
+        ystart=25,
+        extra_noise=1e-4,
     )
     z_t = np.sin(s)
     dataset = np.array([x_t, y_t, z_t]).T
     error = np.repeat(0.3, dataset.shape[0])
     corl = np.array([10, 10])
-    mean = 0.
-    variance = 1.
-    #Z = surface_gauss_regression(x, y, mean, variance, corl, dataset, error)
-    #assert Z.shape == x.shape
-    #assert Z.shape == y.shape
+    mean = 0.0
+    variance = 1.0
+    # Z = surface_gauss_regression(x, y, mean, variance, corl, dataset, error)
+    # assert Z.shape == x.shape
+    # assert Z.shape == y.shape
     assert x_t.shape == y_t.shape
     assert x_t.shape == vx.shape