Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

The accuracy of g-PINNs is worse than PINNs when solving the inverse problem #5

Open
Letmesleepp opened this issue Mar 1, 2023 · 1 comment

Comments

@Letmesleepp
Copy link

Thanks for this excellent python library.
I tried to solve one chromatography process inverse problem using DeepXDE. The estimated parameters are close to the true value, but the convergence didn't achieve.
Then, I also used g-PINNs to solve the same problem. However, g-PINNs failed to predict the unknown parameters.
Could you give me some hints or suggestions?
Here are the codes of PINNs and g-PINNs:
PINNs

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import numpy as np
import matplotlib.pyplot as plt
from scipy.integrate import solve_bvp
import sys
import re

import deepxde as dde
from deepxde.backend import tf
import math
import pandas as pd

L = 0.1  # Column length(m)
d = 0.0212  # Column diameter(m)
A = np.pi*d*d/4  # Column cross sectional area (m2)
e = 0.62  # porosity
v = 30.0/1000.0/1000/60.0/A/e  # Velocity (m/s)
f = 12  # Feed concentration (-)
te = 280  # final time (s)
t_inj = 50  # first injected time for component A and B (s)


# Scale the problem
scale_x = 100
scale_t = 1/28
scale_y = 1
L_scaled = L * scale_x
v_scaled = v * scale_x / scale_t
t_scaled = te * scale_t
t_inj_scaled = t_inj * scale_t


# Datasets
data1 = pd.read_csv(r"C:\Users\10716\OneDrive\桌面\DeepXDE\SMB\Inverse_A_50s_FC12.csv")
data2 = pd.read_csv(r"C:\Users\10716\OneDrive\桌面\DeepXDE\SMB\Inverse_B_50s_FC12.csv")

np.save(r"C:\Users\10716\OneDrive\桌面\DeepXDE (5)\SMB\Inverse_A_50s_FC12", data1)
np.save(r"C:\Users\10716\OneDrive\桌面\DeepXDE (5)\SMB\Inverse_B_50s_FC12", data2)

traindata1 = np.load(r"C:\Users\10716\OneDrive\桌面\DeepXDE (5)\SMB\Inverse_A_50s_FC12.npy")
traindata2 = np.load(r"C:\Users\10716\OneDrive\桌面\DeepXDE (5)\SMB\Inverse_B_50s_FC12.npy")


xx1 = traindata1[1:, 2:3]
tt1 = traindata1[1:, 0:1]
Ca = traindata1[1:, 1:2]
xx2 = traindata2[1:, 2:3]
tt2 = traindata2[1:, 0:1]
Cb = traindata2[1:, 1:2]

observe_x1, observe_Ca = np.hstack((scale_x * xx1, scale_t * tt1)), Ca
observe_x2, observe_Cb = np.hstack((scale_x * xx2, scale_t * tt2)), Cb

observe_y1 = dde.icbc.PointSetBC(observe_x1, Ca, component=0)
observe_y2 = dde.icbc.PointSetBC(observe_x2, Cb, component=1)

observe_x = np.vstack((observe_x1, observe_x2))


# Non-uniform distribution for the left boundary condition
z = np.linspace(0, (t_inj / 280) * L_scaled, num=200)
t = 0
Z, T = np.meshgrid(z, t)
Training_Points = np.vstack((Z.flatten(), T.flatten())).T

Extra_Points = np.vstack((observe_x, Training_Points))


# Constraint the search range of unknown parameters
ka_scaled_ = dde.Variable(0.0)  # Mass transfer coefficient of A (1/s)
kb_scaled_ = dde.Variable(0.0)  # Mass transfer coefficient of B (1/s)

Ha_ = dde.Variable(0.0)  # Henry constant of A (-)
Hb_ = dde.Variable(0.0)  # Henry constant of B (-)

ba_ = dde.Variable(0.0)  # equilibrium coefficient
bb_ = dde.Variable(0.0)  # equilibrium coefficient

ka_scaled = 10 * tf.tanh(ka_scaled_) + (0.5 / scale_t)
kb_scaled = 10 * tf.tanh(kb_scaled_) + (0.5 / scale_t)

Ha = 10 * tf.tanh(Ha_) + 10
Hb = 10 * tf.tanh(Hb_) + 10

ba = 0.5 * tf.tanh(ba_) + 0.5
bb = 0.5 * tf.tanh(bb_) + 0.5


geom = dde.geometry.Interval(0, L_scaled)
timedomain = dde.geometry.TimeDomain(0, t_scaled)
geomtime = dde.geometry.GeometryXTime(geom, timedomain)


# PINNs
def pde(x, y):
    y_star = scale_y * y
    ca, cb, qa, qb = y_star[:, 0:1], y_star[:, 1:2], y_star[:, 2:3], y_star[:, 3:4]

    ca_x = dde.grad.jacobian(y_star, x, i=0, j=0)
    ca_t = dde.grad.jacobian(y_star, x, i=0, j=1)

    cb_x = dde.grad.jacobian(y_star, x, i=1, j=0)
    cb_t = dde.grad.jacobian(y_star, x, i=1, j=1)

    qa_t = dde.grad.jacobian(y_star, x, i=2, j=1)

    qb_t = dde.grad.jacobian(y_star, x, i=3, j=1)

    massbalance_liquid_a = (
            ca_t + (1 - e) / e * qa_t + v_scaled * ca_x
    )
    massbalance_liquid_b = (
            cb_t + (1 - e) / e * qb_t + v_scaled * cb_x
    )
    massbalance_solid_a = (
            ka_scaled * ((Ha * ca) / (1 + ba * ca + bb * cb) - qa) - qa_t
    )
    massbalance_solid_b = (
            kb_scaled * ((Hb * cb) / (1 + ba * ca + bb * cb) - qb) - qb_t
    )
    return [massbalance_liquid_a, massbalance_liquid_b, massbalance_solid_a, massbalance_solid_b]


def feed_concentration(x):
    # x, t = np.split(x, 2, axis=1)
    return np.piecewise(x[:, 1:], [x[:, 1:] <= t_inj_scaled, x[:, 1:] > t_inj_scaled], [lambda x: f, lambda x: 0])


def boundary_beg(x, on_boundary):
    return on_boundary and np.isclose(x[0], 0)


bc_beg_ca = dde.DirichletBC(
    geomtime, feed_concentration, boundary_beg, component=0
)
bc_beg_cb = dde.DirichletBC(
    geomtime, feed_concentration, boundary_beg, component=1
)

initial_condition_ca = dde.IC(
    geomtime, lambda x: 0, lambda _, on_initial: on_initial, component=0
)
initial_condition_cb = dde.IC(
    geomtime, lambda x: 0, lambda _, on_initial: on_initial, component=1
)
initial_condition_qa = dde.IC(
    geomtime, lambda x: 0, lambda _, on_initial: on_initial, component=2
)
initial_condition_qb = dde.IC(
    geomtime, lambda x: 0, lambda _, on_initial: on_initial, component=3
)


data = dde.data.TimePDE(
    geomtime,
    pde,
    [initial_condition_ca,
     initial_condition_cb,
     initial_condition_qa,
     initial_condition_qb,
     bc_beg_ca,
     bc_beg_cb,
     observe_y1,
     observe_y2],
    num_domain=1500,
    num_initial=300,
    num_boundary=900,
    anchors=Extra_Points,
)
layer_size = [2] + [50] * 4 + [4]
activation = "tanh"
initializer = "Glorot uniform"
net = dde.maps.FNN(layer_size, activation, initializer)
# net.apply_output_transform(lambda x, y: y * scale_y)

gPINNmodel = dde.Model(data, net)
resampler = dde.callbacks.PDEResidualResampler(period=100)
gPINNmodel.compile("adam", lr=0.0001, external_trainable_variables=[ka_scaled, kb_scaled, Ha, Hb, ba, bb], loss_weights=[60, 150, 1e-2, 1e-2, 1, 1, 1, 1, 1, 1, 1, 1])
variable = dde.callbacks.VariableValue([ka_scaled * scale_t, kb_scaled * scale_t, Ha, Hb, ba, bb], period=1000, filename="variables.dat")
losshistory, train_state = gPINNmodel.train(epochs=200000, callbacks=[variable, resampler], disregard_previous_best=True)


# plots
"""Get the domain: x = L_scaled and t from 0 to t_scaled"""
X_nn = L_scaled * np.ones((100, 1))
T_nn = np.linspace(0, t_scaled, 100).reshape(100, 1)
X_pred = np.append(X_nn, T_nn, axis=1)

y_pred = gPINNmodel.predict(X_pred)
ca_pred, cb_pred, qa_pred, qb_pred = y_pred[:, 0:1], y_pred[:, 1:2], y_pred[:, 2:3], y_pred[:, 3:]
plt.figure()
plt.plot(T_nn / scale_t, ca_pred, color='blue', linewidth=3., label='Concentration A')
plt.plot(T_nn / scale_t, cb_pred, color='red', linewidth=3., label='Concentration B')
# plt.plot(X_pred, qa_pred)
# plt.plot(X_pred, qb_pred)

plt.legend()
plt.grid(True)
plt.xlabel('t')
plt.ylabel('Concentration')

plt.show()

dde.saveplot(losshistory, train_state, issave=True, isplot=True)

g-PINNs

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import numpy as np
import matplotlib.pyplot as plt
from scipy.integrate import solve_bvp
import sys
import re

import deepxde as dde
from deepxde.backend import tf
import math
import pandas as pd

L = 0.1  # Column length(m)
d = 0.0212  # Column diameter(m)
A = np.pi*d*d/4  # Column cross sectional area (m2)
e = 0.62  # porosity
v = 30.0/1000.0/1000/60.0/A/e  # Velocity (m/s)
f = 12  # Feed concentration (-)
te = 280  # final time (s)
t_inj = 50  # first injected time for component A and B (s)


# Scale the problem
scale_x = 100
scale_t = 1/28
scale_y = 1
L_scaled = L * scale_x
v_scaled = v * scale_x / scale_t
t_scaled = te * scale_t
t_inj_scaled = t_inj * scale_t


# Datasets
data1 = pd.read_csv(r"C:\Users\10716\OneDrive\桌面\DeepXDE\SMB\Inverse_A_50s_FC12.csv")
data2 = pd.read_csv(r"C:\Users\10716\OneDrive\桌面\DeepXDE\SMB\Inverse_B_50s_FC12.csv")

np.save(r"C:\Users\10716\OneDrive\桌面\DeepXDE (5)\SMB\Inverse_A_50s_FC12", data1)
np.save(r"C:\Users\10716\OneDrive\桌面\DeepXDE (5)\SMB\Inverse_B_50s_FC12", data2)

traindata1 = np.load(r"C:\Users\10716\OneDrive\桌面\DeepXDE (5)\SMB\Inverse_A_50s_FC12.npy")
traindata2 = np.load(r"C:\Users\10716\OneDrive\桌面\DeepXDE (5)\SMB\Inverse_B_50s_FC12.npy")


xx1 = traindata1[1:, 2:3]
tt1 = traindata1[1:, 0:1]
Ca = traindata1[1:, 1:2]
xx2 = traindata2[1:, 2:3]
tt2 = traindata2[1:, 0:1]
Cb = traindata2[1:, 1:2]

observe_x1, observe_Ca = np.hstack((scale_x * xx1, scale_t * tt1)), Ca
observe_x2, observe_Cb = np.hstack((scale_x * xx2, scale_t * tt2)), Cb

observe_y1 = dde.icbc.PointSetBC(observe_x1, Ca, component=0)
observe_y2 = dde.icbc.PointSetBC(observe_x2, Cb, component=1)

observe_x = np.vstack((observe_x1, observe_x2))


# Non-uniform distribution for the left boundary condition
z = np.linspace(0, (t_inj / 280) * L_scaled, num=200)
t = 0
Z, T = np.meshgrid(z, t)
Training_Points = np.vstack((Z.flatten(), T.flatten())).T

Extra_Points = np.vstack((observe_x, Training_Points))


# Constraint the search range of unknown parameters
ka_scaled_ = dde.Variable(0.0)  # Mass transfer coefficient of A (1/s)
kb_scaled_ = dde.Variable(0.0)  # Mass transfer coefficient of B (1/s)

Ha_ = dde.Variable(0.0)  # Henry constant of A (-)
Hb_ = dde.Variable(0.0)  # Henry constant of B (-)

ba_ = dde.Variable(0.0)  # equilibrium coefficient
bb_ = dde.Variable(0.0)  # equilibrium coefficient

ka_scaled = 4 * tf.tanh(ka_scaled_) + (0.3 / scale_t)
kb_scaled = 4 * tf.tanh(kb_scaled_) + (0.3 / scale_t)

Ha = 3 * tf.tanh(Ha_) + 5
Hb = 3 * tf.tanh(Hb_) + 5

ba = 0.2 * tf.tanh(ba_) + 0.3
bb = 0.2 * tf.tanh(bb_) + 0.3

geom = dde.geometry.Interval(0, L_scaled)
timedomain = dde.geometry.TimeDomain(0, t_scaled)
geomtime = dde.geometry.GeometryXTime(geom, timedomain)


# gPINNs
def pde(x, y):
    y_star = scale_y * y
    ca, cb, qa, qb = y_star[:, 0:1], y_star[:, 1:2], y_star[:, 2:3], y_star[:, 3:4]

    ca_x = dde.grad.jacobian(y_star, x, i=0, j=0)
    ca_t = dde.grad.jacobian(y_star, x, i=0, j=1)
    ca_xx = dde.grad.hessian(y_star, x, i=0, j=0, component=0)
    ca_xt = dde.grad.hessian(y_star, x, i=0, j=1, component=0)
    ca_tt = dde.grad.hessian(y_star, x, i=1, j=1, component=0)

    cb_x = dde.grad.jacobian(y_star, x, i=1, j=0)
    cb_t = dde.grad.jacobian(y_star, x, i=1, j=1)
    cb_xx = dde.grad.hessian(y_star, x, i=0, j=0, component=1)
    cb_xt = dde.grad.hessian(y_star, x, i=0, j=1, component=1)
    cb_tt = dde.grad.hessian(y_star, x, i=1, j=1, component=1)

    qa_x = dde.grad.jacobian(y_star, x, i=2, j=0)
    qa_t = dde.grad.jacobian(y_star, x, i=2, j=1)
    qa_xt = dde.grad.hessian(y_star, x, i=0, j=1, component=2)
    qa_tt = dde.grad.hessian(y_star, x, i=1, j=1, component=2)

    qb_x = dde.grad.jacobian(y_star, x, i=3, j=0)
    qb_t = dde.grad.jacobian(y_star, x, i=3, j=1)
    qb_xt = dde.grad.hessian(y_star, x, i=0, j=1, component=3)
    qb_tt = dde.grad.hessian(y_star, x, i=1, j=1, component=3)

    massbalance_liquid_a = (
            ca_t + (1 - e) / e * qa_t + v_scaled * ca_x
    )
    massbalance_liquid_b = (
            cb_t + (1 - e) / e * qb_t + v_scaled * cb_x
    )
    massbalance_solid_a = (
            ka_scaled * ((Ha * ca) / (1 + ba * ca + bb * cb) - qa) - qa_t
    )
    massbalance_solid_b = (
            kb_scaled * ((Hb * cb) / (1 + ba * ca + bb * cb) - qb) - qb_t
    )
    additional_loss_term_1 = (
            ca_tt + (1 - e) / e * qa_tt + v_scaled * ca_xt
    )
    additional_loss_term_2 = (
            ca_xt + (1 - e) / e * qa_xt + v_scaled * ca_xx
    )
    additional_loss_term_3 = (
            cb_tt + (1 - e) / e * qb_tt + v_scaled * cb_xt
    )
    additional_loss_term_4 = (
            cb_xt + (1 - e) / e * qb_xt + v_scaled * cb_xx
    )
    additional_loss_term_5 = (
            ka_scaled * ((Ha * (ca_t * (1 + ba * ca + bb * cb) - ca * (ba * ca_t + bb * cb_t))) / (1 + ba * ca + bb * cb)**2 - qa_t) - qa_tt
    )
    additional_loss_term_6 = (
            ka_scaled * ((Ha * (ca_x * (1 + ba * ca + bb * cb) - ca * (ba * ca_x + bb * cb_x))) / (1 + ba * ca + bb * cb)**2 - qa_x) - qa_xt
    )
    additional_loss_term_7 = (
            kb_scaled * ((Hb * (cb_t * (1 + ba * ca + bb * cb) - cb * (ba * ca_t + bb * cb_t))) / (1 + ba * ca + bb * cb)**2 - qb_t) - qb_tt
    )
    additional_loss_term_8 = (
            kb_scaled * ((Hb * (cb_x * (1 + ba * ca + bb * cb) - cb * (ba * ca_x + bb * cb_x))) / (1 + ba * ca + bb * cb)**2 - qb_x) - qb_xt
    )
    return [massbalance_liquid_a, massbalance_liquid_b, massbalance_solid_a, massbalance_solid_b,
            additional_loss_term_1, additional_loss_term_2, additional_loss_term_3, additional_loss_term_4,
            additional_loss_term_5, additional_loss_term_6, additional_loss_term_7, additional_loss_term_8]


def feed_concentration(x):
    # x, t = np.split(x, 2, axis=1)
    return np.piecewise(x[:, 1:], [x[:, 1:] <= t_inj_scaled, x[:, 1:] > t_inj_scaled], [lambda x: f, lambda x: 0])


def boundary_beg(x, on_boundary):
    return on_boundary and np.isclose(x[0], 0)


bc_beg_ca = dde.DirichletBC(
    geomtime, feed_concentration, boundary_beg, component=0
)
bc_beg_cb = dde.DirichletBC(
    geomtime, feed_concentration, boundary_beg, component=1
)

initial_condition_ca = dde.IC(
    geomtime, lambda x: 0, lambda _, on_initial: on_initial, component=0
)
initial_condition_cb = dde.IC(
    geomtime, lambda x: 0, lambda _, on_initial: on_initial, component=1
)
initial_condition_qa = dde.IC(
    geomtime, lambda x: 0, lambda _, on_initial: on_initial, component=2
)
initial_condition_qb = dde.IC(
    geomtime, lambda x: 0, lambda _, on_initial: on_initial, component=3
)


data = dde.data.TimePDE(
    geomtime,
    pde,
    [initial_condition_ca,
     initial_condition_cb,
     initial_condition_qa,
     initial_condition_qb,
     bc_beg_ca,
     bc_beg_cb,
     observe_y1,
     observe_y2],
    num_domain=1000,
    num_initial=300,
    num_boundary=500,
    anchors=Extra_Points,
)
layer_size = 2, [20, 20, 20, 20], [20, 20, 20, 20], [20, 20, 20, 20], 4
activation = "tanh"
initializer = "Glorot uniform"
net = dde.maps.PFNN(layer_size, activation, initializer)
# net.apply_output_transform(lambda x, y: y * scale_y)

gPINNmodel = dde.Model(data, net)
resampler = dde.callbacks.PDEResidualResampler(period=100)
gPINNmodel.compile("adam", lr=0.0001, external_trainable_variables=[ka_scaled, kb_scaled, Ha, Hb, ba, bb], loss_weights=[60, 150, 1e-2, 1e-2, 1e-2, 1e-2, 1e-2, 1e-2, 1e-2, 1e-2, 1e-2, 1e-2, 1, 1, 1, 1, 1, 1, 1, 1])
variable = dde.callbacks.VariableValue([ka_scaled * scale_t, kb_scaled * scale_t, Ha, Hb, ba, bb], period=1000, filename="variables.dat")
losshistory, train_state = gPINNmodel.train(epochs=100000, callbacks=[variable, resampler], disregard_previous_best=True)


# plots
"""Get the domain: x = L_scaled and t from 0 to t_scaled"""
X_nn = L_scaled * np.ones((100, 1))
T_nn = np.linspace(0, t_scaled, 100).reshape(100, 1)
X_pred = np.append(X_nn, T_nn, axis=1)

y_pred = gPINNmodel.predict(X_pred)
ca_pred, cb_pred, qa_pred, qb_pred = y_pred[:, 0:1], y_pred[:, 1:2], y_pred[:, 2:3], y_pred[:, 3:]
plt.figure()
plt.plot(T_nn / scale_t, ca_pred, color='blue', linewidth=3., label='Concentration A')
plt.plot(T_nn / scale_t, cb_pred, color='red', linewidth=3., label='Concentration B')
# plt.plot(X_pred, qa_pred)
# plt.plot(X_pred, qb_pred)

plt.legend()
plt.grid(True)
plt.xlabel('t')
plt.ylabel('Concentration')

plt.show()

dde.saveplot(losshistory, train_state, issave=True, isplot=True)

@lululxvi
Copy link
Member

lululxvi commented Mar 8, 2023

You may tune the hyperparameters such as the loss weights.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants