From a7509755fffa38a55617b7a977f0308c336e54d1 Mon Sep 17 00:00:00 2001
From: heatingma <115260102+heatingma@users.noreply.github.com>
Date: Fri, 25 Nov 2022 16:09:42 +0800
Subject: [PATCH 1/6] Update numpy_backend.py

---
 pygmtools/numpy_backend.py | 528 ++++++++++++++++++++++++++++++++++++-
 1 file changed, 527 insertions(+), 1 deletion(-)

diff --git a/pygmtools/numpy_backend.py b/pygmtools/numpy_backend.py
index d5f7d5df..427d1511 100644
--- a/pygmtools/numpy_backend.py
+++ b/pygmtools/numpy_backend.py
@@ -328,6 +328,466 @@ def _check_and_init_gm(K, n1, n2, n1max, n2max, x0):
     return batch_num, n1, n2, n1max, n2max, n1n2, v0
 
 
+############################################
+#      Multi-Graph Matching Solvers        #
+############################################
+def cao_solver(K, X, num_graph, num_node, max_iter, lambda_init, lambda_step, lambda_max, iter_boost):
+
+    m, n = num_graph, num_node
+    param_lambda = lambda_init
+
+    def _comp_aff_score(x, k):
+        return np.expand_dims(np.expand_dims(pygmtools.utils.compute_affinity_score(x, k, backend='numpy'),axis=-1),axis=-1)
+
+    for iter in range(max_iter):
+        if iter >= iter_boost:
+            param_lambda = np.min([param_lambda * lambda_step, lambda_max])
+        # pair_con = get_batch_pc_opt(X)
+        pair_aff = _comp_aff_score(X.reshape(-1, n, n), K.reshape(-1, n * n, n * n)).reshape(m, m)
+        pair_aff = pair_aff - np.eye(m) * pair_aff
+        norm = np.max(pair_aff)
+        for i in range(m):
+            for j in range(m):
+                if i >= j:
+                    continue
+                aff_ori = _comp_aff_score(X[i, j], K[i, j]) / norm
+                con_ori = _get_single_pc_opt(X, i, j)
+                # con_ori = torch.sqrt(pair_con[i, j])
+                if iter < iter_boost:
+                    score_ori = aff_ori
+                else:
+                    score_ori = aff_ori * (1 - param_lambda) + con_ori * param_lambda
+                X_upt = X[i, j]
+                for k in range(m):
+                    X_combo = np.matmul(X[i, k], X[k, j])
+                    aff_combo = _comp_aff_score(X_combo, K[i, j]) / norm
+                    con_combo = _get_single_pc_opt(X, i, j, X_combo)
+                    # con_combo = torch.sqrt(pair_con[i, k] * pair_con[k, j])
+                    if iter < iter_boost:
+                        score_combo = aff_combo
+                    else:
+                        score_combo = aff_combo * (1 - param_lambda) + con_combo * param_lambda
+                    if score_combo > score_ori:
+                        X_upt = X_combo
+                X[i, j] = X_upt
+                X[j, i] = X_upt.swapaxes(0,1)
+    return X
+
+def cao_fast_solver(K, X, num_graph, num_node, max_iter, lambda_init, lambda_step, lambda_max, iter_boost):
+    r"""
+    Numpy implementation of CAO solver in fast config (mode="pc")
+
+    :param K: affinity matrix, (m, m, n*n, n*n)
+    :param X: initial matching, (m, m, n, n)
+    :param num_graph: number of graphs, int
+    :param num_node: number of nodes, int
+    :return: X, (m, m, n, n)
+    """
+    m, n = num_graph, num_node
+    param_lambda = lambda_init
+
+    def _comp_aff_score(x, k):
+        return np.expand_dims(np.expand_dims(pygmtools.utils.compute_affinity_score(x, k, backend='numpy'),axis=-1),axis=-1)
+
+    mask1 = np.arange(m).reshape(m, 1).repeat(m,axis=1)
+    mask2 = np.arange(m).reshape(1, m).repeat(m,axis=0)
+    mask = (mask1 < mask2).astype(float)
+    X_mask = mask.reshape(m, m, 1, 1)
+
+    for iter in range(max_iter):
+        if iter >= iter_boost:
+            param_lambda = np.min([param_lambda * lambda_step, lambda_max])
+
+        pair_aff = _comp_aff_score(X.reshape(-1, n, n), K.reshape(-1, n * n, n * n)).reshape(m, m)
+        pair_aff = pair_aff - np.eye(m) * pair_aff
+        norm = np.max(pair_aff)
+
+        X1 = X.reshape(m, 1, m, n, n)
+        X1 = np.tile(X1,(1, m, 1, 1, 1)).reshape(-1, n, n)  # X1[i,j,k] = X[i,k]
+        X2 = X.reshape(1, m, m, n, n)
+        X2 = np.tile(X2,(m, 1, 1, 1, 1)).swapaxes(1, 2).reshape(-1, n, n)  # X2[i,j,k] = X[k,j]
+        X_combo = np.matmul(X1, X2).reshape(m, m, m, n, n) # X_combo[i,j,k] = X[i, k] * X[k, j]
+
+        aff_ori = (_comp_aff_score(X.reshape(-1, n, n), K.reshape(-1, n * n, n * n)) / norm).reshape(m, m)
+        pair_con = _get_batch_pc_opt(X)
+        con_ori = np.sqrt(pair_con)
+
+        K_repeat = np.repeat(K.reshape(m, m, 1, n * n, n * n),m,axis=2).reshape(-1, n * n, n * n)
+        aff_combo = (_comp_aff_score(X_combo.reshape(-1, n, n), K_repeat) / norm).reshape(m, m, m)
+        con1 = pair_con.reshape(m, 1, m)
+        con1 = np.tile(con1,(1, m, 1))  # con1[i,j,k] = pair_con[i,k]
+        con2 = pair_con.reshape(1, m, m)
+        con2 = np.tile(con2,(m, 1, 1)).swapaxes(1,2)  # con2[i,j,k] = pair_con[j,k]
+        con_combo = np.sqrt(con1 * con2)
+
+        if iter < iter_boost:
+            score_ori = aff_ori
+            score_combo = aff_combo
+        else:
+            score_ori = aff_ori * (1 - param_lambda) + con_ori * param_lambda
+            score_combo = aff_combo * (1 - param_lambda) + con_combo * param_lambda
+        
+        idx = np.argmax(score_combo,axis=-1)
+        score_combo = np.max(score_combo, axis=-1)
+        
+        assert np.all(score_combo >= score_ori), np.min(score_combo - score_ori)
+        X_upt = X_combo[mask1, mask2, idx, :, :]
+        X = X_upt * X_mask + X_upt.swapaxes(0,1).swapaxes(2,3) * X_mask.swapaxes(0,1) + X * (1 - X_mask - X_mask.swapaxes(0, 1))
+        assert np.all(X.swapaxes(0,1).swapaxes(2,3) == X)
+    return X
+
+def mgm_floyd_solver(K, X, num_graph, num_node, param_lambda):
+    m, n = num_graph, num_node
+
+    def _comp_aff_score(x, k):
+        return np.expand_dims(np.expand_dims(pygmtools.utils.compute_affinity_score(x, k, backend='numpy'),axis=-1),axis=-1)
+
+    for k in range(m):
+        pair_aff = _comp_aff_score(X.reshape(-1, n, n), K.reshape(-1, n * n, n * n)).reshape(m, m)
+        pair_aff = pair_aff - np.eye(m) * pair_aff
+        norm = np.max(pair_aff)
+
+        # print("iter:{} aff:{:.4f} con:{:.4f}".format(
+        #     k, torch.mean(pair_aff).item(), torch.mean(get_batch_pc_opt(X)).item()
+        # ))
+
+        for i in range(m):
+            for j in range(m):
+                if i >= j:
+                    continue
+                score_ori = _comp_aff_score(X[i, j], K[i, j]) / norm
+                X_combo = np.matmul(X[i, k], X[k, j])
+                score_combo = _comp_aff_score(X_combo, K[i, j]) / norm
+
+                if score_combo > score_ori:
+                    X[i, j] = X_combo
+                    X[j, i] = X_combo.swapaxes(0, 1)
+
+    for k in range(m):
+        pair_aff = _comp_aff_score(X.reshape(-1, n, n), K.reshape(-1, n * n, n * n)).reshape(m, m)
+        pair_aff = pair_aff - np.eye(m) * pair_aff
+        norm = np.max(pair_aff)
+
+        pair_con = _get_batch_pc_opt(X)
+        for i in range(m):
+            for j in range(m):
+                if i >= j:
+                    continue
+                aff_ori = _comp_aff_score(X[i, j], K[i, j]) / norm
+                con_ori = _get_single_pc_opt(X, i, j)
+                # con_ori = torch.sqrt(pair_con[i, j])
+                score_ori = aff_ori * (1 - param_lambda) + con_ori * param_lambda
+
+                X_combo = np.matmul(X[i, k], X[k, j])
+                aff_combo = _comp_aff_score(X_combo, K[i, j]) / norm
+                con_combo = _get_single_pc_opt(X, i, j, X_combo)
+                # con_combo = torch.sqrt(pair_con[i, k] * pair_con[k, j])
+                score_combo = aff_combo * (1 - param_lambda) + con_combo * param_lambda
+
+                if score_combo > score_ori:
+                    X[i, j] = X_combo
+                    X[j, i] = X_combo.swapaxes(0,1)
+    return X
+
+def mgm_floyd_fast_solver(K, X, num_graph, num_node, param_lambda):
+    m, n = num_graph, num_node
+
+    def _comp_aff_score(x, k):
+        return np.expand_dims(np.expand_dims(pygmtools.utils.compute_affinity_score(x, k, backend='numpy'),axis=-1),axis=-1)
+
+    mask1 = np.arange(m).reshape(m, 1).repeat(m,axis=1)
+    mask2 = np.arange(m).reshape(1, m).repeat(m,axis=0)
+    mask = (mask1 < mask2).astype(float)
+    X_mask = mask.reshape(m, m, 1, 1)
+
+    for k in range(m):
+        pair_aff = _comp_aff_score(X.reshape(-1, n, n), K.reshape(-1, n * n, n * n)).reshape(m, m)
+        pair_aff = pair_aff - np.eye(m) * pair_aff
+        norm = np.max(pair_aff)
+
+        # print("iter:{} aff:{:.4f} con:{:.4f}".format(
+        #     k, torch.mean(pair_aff).item(), torch.mean(get_batch_pc_opt(X)).item()
+        # ))
+
+        X1 = X[:, k].reshape(m, 1, n, n)
+        X1 = np.tile(X1,(1, m, 1, 1)).reshape(-1, n, n)  # X[i, j] = X[i, k]
+        X2 = X[k, :].reshape(1, m, n, n)
+        X2 = np.tile(X2,(m, 1, 1, 1)).reshape(-1, n, n)  # X[i, j] = X[j, k]
+        X_combo = np.matmul(X1, X2).reshape(m, m, n, n)
+
+        aff_ori = (_comp_aff_score(X.reshape(-1, n, n), K.reshape(-1, n * n, n * n)) / norm).reshape(m, m)
+        aff_combo = (_comp_aff_score(X_combo.reshape(-1, n, n), K.reshape(-1, n * n, n * n)) / norm).reshape(m, m)
+
+        score_ori = aff_ori
+        score_combo = aff_combo
+
+        upt = (score_ori < score_combo).astype(float)
+        upt = (upt * mask).reshape(m, m, 1, 1)
+        X = X * (1.0 - upt) + X_combo * upt
+        X = X * X_mask + X.swapaxes(0,1).swapaxes(2, 3) * (1 - X_mask)
+
+    for k in range(m):
+        pair_aff = _comp_aff_score(X.reshape(-1, n, n), K.reshape(-1, n * n, n * n)).reshape(m, m)
+        pair_aff = pair_aff - np.eye(m) * pair_aff
+        norm = np.max(pair_aff)
+
+        pair_con = _get_batch_pc_opt(X)
+
+        X1 = X[:, k].reshape(m, 1, n, n)
+        X1 = np.tile(X1,(1, m, 1, 1)).reshape(-1, n, n)  # X[i, j] = X[i, k]
+        X2 = X[k, :].reshape(1, m, n, n)
+        X2 = np.tile(X2,(m, 1, 1, 1)).reshape(-1, n, n)  # X[i, j] = X[j, k]
+        X_combo = np.matmul(X1, X2).reshape(m, m, n, n)
+
+        aff_ori = (_comp_aff_score(X.reshape(-1, n, n), K.reshape(-1, n * n, n * n)) / norm).reshape(m, m)
+        aff_combo = (_comp_aff_score(X_combo.reshape(-1, n, n), K.reshape(-1, n * n, n * n)) / norm).reshape(m, m)
+
+        con_ori = np.sqrt(pair_con)
+        con1 = pair_con[:, k].reshape(m, 1).repeat(m,axis=1)
+        con2 = pair_con[k, :].reshape(1, m).repeat(m,axis=0)
+        con_combo = np.sqrt(con1 * con2)
+
+        score_ori = aff_ori * (1 - param_lambda) + con_ori * param_lambda
+        score_combo = aff_combo * (1 - param_lambda) + con_combo * param_lambda
+
+        upt = (score_ori < score_combo).astype(float)
+        upt = (upt * mask).reshape(m, m, 1, 1)
+        X = X * (1.0 - upt) + X_combo * upt
+        X = X * X_mask + X.swapaxes(0,1).swapaxes(2, 3) * (1 - X_mask)
+    return X
+
+def _get_single_pc_opt(X, i, j, Xij=None):
+    """
+    CAO/Floyd helper function (compute consistency)
+    :param X: (m, m, n, n) all the matching results
+    :param i: index
+    :param j: index
+    :return: the consistency of X_ij
+    """
+    m, _, n, _ = X.shape
+    if Xij is None:
+        Xij = X[i, j]
+    X1 = X[i, :].reshape(-1, n, n)
+    X2 = X[:, j].reshape(-1, n, n)
+    X_combo = np.matmul(X1, X2)
+    pair_con = 1 - np.sum(np.abs(Xij - X_combo)) / (2 * n * m)
+    return pair_con
+
+def _get_batch_pc_opt(X):
+    """
+    CAO/Floyd-fast helper function (compute consistency in batch)
+    :param X: (m, m, n, n) all the matching results
+    :return: (m, m) the consistency of X
+    """
+    m = X.shape[0]
+    n = X.shape[2]
+    X1 = X.reshape(m, 1, m, n, n)
+    X1 = np.tile(X1,(1, m, 1, 1, 1)).reshape(-1, n, n)  # X1[i, j, k] = X[i, k]
+    X2 = X.reshape(1, m, m, n, n)
+    X2 = np.tile(X2,(m, 1, 1, 1, 1)).swapaxes(1,2).reshape(-1, n, n)  # X2[i, j, k] = X[k, j]
+    X_combo = np.matmul(X1, X2).reshape(m, m, m, n, n)
+    X_ori = X.reshape(m, m, 1, n, n)
+    X_ori = np.tile(X_ori,(1, 1, m, 1, 1))
+    pair_con = 1 - np.sum(np.abs(X_combo - X_ori), axis=(2, 3, 4)) / (2 * n * m)
+    return pair_con
+
+def gamgm(
+        A, W, ns, n_univ, U0,
+        init_tau, min_tau, sk_gamma,
+        sk_iter, max_iter, quad_weight,
+        converge_thresh, outlier_thresh, bb_smooth,
+        verbose,
+        cluster_M=None, projector='sinkhorn', hung_iter=True # these arguments are reserved for clustering
+):
+    """
+    Numpy implementation of Graduated Assignment for Multi-Graph Matching (with compatibility for 2GM and clustering)
+    """
+    num_graphs = A.shape[0]
+    if ns is None:
+        ns = np.full((num_graphs,), A.shape[1], dtype='i4')
+    n_indices = np.cumsum(ns, axis=0)
+
+    # build a super adjacency matrix A
+    supA = np.zeros((n_indices[-1], n_indices[-1]))
+    for i in range(num_graphs):
+        start_n = n_indices[i] - ns[i]
+        end_n = n_indices[i]
+        supA[start_n:end_n, start_n:end_n] = A[i, :ns[i], :ns[i]]
+
+    # handle the type of n_univ
+    if type(n_univ) is np.ndarray:
+        n_univ = n_univ.item()
+
+    # randomly init U
+    if U0 is None:
+        U0 = np.full((n_indices[-1], n_univ), 1 / n_univ)
+        U0 += np.random.rand(n_indices[-1], n_univ) / 1000
+
+    # init cluster_M if not given
+    if cluster_M is None:
+        cluster_M = np.ones((num_graphs, num_graphs))
+
+    # reshape W into supW
+    supW = np.zeros((n_indices[-1], n_indices[-1]))
+    for i, j in itertools.product(range(num_graphs), repeat=2):
+        start_x = n_indices[i] - ns[i]
+        end_x = n_indices[i]
+        start_y = n_indices[j] - ns[j]
+        end_y = n_indices[j]
+        supW[start_x:end_x, start_y:end_y] = W[i, j, :ns[i], :ns[j]]
+
+    U = gamgm_real(
+        supA, supW, ns, n_indices, n_univ, num_graphs, U0,
+        init_tau, min_tau, sk_gamma,
+        sk_iter, max_iter, quad_weight,
+        converge_thresh, outlier_thresh,
+        verbose,
+        cluster_M, projector, hung_iter
+        )
+
+    result = pygmtools.utils.MultiMatchingResult(True, 'numpy')
+
+    for i in range(num_graphs):
+        start_n = n_indices[i] - ns[i]
+        end_n = n_indices[i]
+        result[i] = U[start_n:end_n]
+
+    return result
+
+def gamgm_real(
+        supA, supW, ns, n_indices, n_univ, num_graphs, U0,
+        init_tau, min_tau, sk_gamma,
+        sk_iter, max_iter, quad_weight,
+        converge_thresh, outlier_thresh,
+        verbose,
+        cluster_M, projector, hung_iter # these arguments are reserved for clustering
+        ):
+    """
+    The real forward function of GAMGM
+    """
+    U = U0
+    sinkhorn_tau = init_tau
+    iter_flag = True
+
+    while iter_flag:
+        for i in range(max_iter):
+            # compact matrix form update of V
+            UUt = np.matmul(U, U.T)
+            lastUUt = UUt
+            cluster_weight = np.repeat(cluster_M, ns.astype('i4'), axis=0)
+            cluster_weight = np.repeat(cluster_weight, ns.astype('i4'), axis=1)
+            quad = np.matmul(np.matmul(np.matmul(supA, UUt * cluster_weight), supA), U) * quad_weight * 2
+            unary = np.matmul(supW * cluster_weight, U)
+            if verbose:
+                if projector == 'sinkhorn':
+                    print_str = f'tau={sinkhorn_tau:.3e}'
+                else:
+                    print_str = 'hungarian'
+                print(print_str + f' #iter={i}/{max_iter} '
+                      f'quad score: {(quad * U).sum():.3e}, unary score: {(unary * U).sum():.3e}')
+            V = (quad + unary) / num_graphs
+
+            U_list = []
+            if projector == 'hungarian':
+                n_start = 0
+                for n_end in n_indices:
+                    U_list.append(pygmtools.hungarian(V[n_start:n_end, :n_univ], backend='numpy'))
+                    n_start = n_end
+            elif projector == 'sinkhorn':
+                if np.all(ns == ns[0]):
+                    if ns[0] <= n_univ:
+                        U_list.append(
+                            sinkhorn(
+                                V.reshape(num_graphs, -1, n_univ),
+                                max_iter=sk_iter, tau=sinkhorn_tau, batched_operation=True, dummy_row=True
+                            ).reshape(-1, n_univ))
+                    else:
+                        U_list.append(
+                            sinkhorn(
+                                V.reshape(num_graphs, -1, n_univ).swapaxes(1, 2),
+                                max_iter=sk_iter, tau=sinkhorn_tau, batched_operation=True, dummy_row=True
+                            ).swapaxes(1, 2).reshape(-1, n_univ))
+                else:
+                    V_list = []
+                    n1 = []
+                    n_start = 0
+                    for n_end in n_indices:
+                        V_list.append(V[n_start:n_end, :n_univ])
+                        n1.append(n_end - n_start)
+                        n_start = n_end
+                    V_batch = build_batch(V_list)
+                    n1 = np.ndarray(n1)
+                    U = sinkhorn(V_batch, n1,
+                                 max_iter=sk_iter, tau=sinkhorn_tau, batched_operation=True, dummy_row=True)
+                    n_start = 0
+                    for idx, n_end in enumerate(n_indices):
+                        U_list.append(U[idx, :n_end - n_start, :])
+                        n_start = n_end
+            else:
+                raise NameError('Unknown projecter name: {}'.format(projector))
+
+            U = np.concatenate(U_list, axis=0)
+            if num_graphs == 2:
+                U[:ns[0], :] = np.eye(ns[0], n_univ)
+
+            # calculate gap to discrete
+            if projector == 'sinkhorn' and verbose:
+                U_list_hung = []
+                n_start = 0
+                for n_end in n_indices:
+                    U_list_hung.append(pygmtools.hungarian(V[n_start:n_end, :n_univ], backend='numpy'))
+                    n_start = n_end
+                U_hung = np.concatenate(U_list_hung, axis=0)
+                diff = np.linalg.norm(np.matmul(U, U.t()) - lastUUt)
+                print(f'tau={sinkhorn_tau:.3e} #iter={i}/{max_iter} '
+                      f'gap to discrete: {np.mean(np.abs(U - U_hung)):.3e}, iter diff: {diff:.3e}')
+
+            if projector == 'hungarian' and outlier_thresh > 0:
+                U_hung = U
+                UUt = np.matmul(U_hung, U_hung.t())
+                cluster_weight = np.repeat(cluster_M, ns.astype('i4'), axis=0)
+                cluster_weight = np.repeat(cluster_weight, ns.astype('i4'), axis=1)
+                quad = np.linalg.multi_dot(supA, UUt * cluster_weight, supA, U_hung) * quad_weight * 2
+                unary = np.matmul(supW * cluster_weight, U_hung)
+                max_vals = (unary + quad).max(axis=1).values
+                U = U * (unary + quad > outlier_thresh)
+                if verbose:
+                    print(f'hungarian #iter={i}/{max_iter} '
+                          f'unary+quad score thresh={outlier_thresh:.3f}, #>thresh={np.sum(max_vals > outlier_thresh)}/{max_vals.shape[0]}'
+                          f' min:{max_vals.min():.4f}, mean:{max_vals.mean():.4f}, median:{max_vals.median():.4f}, max:{max_vals.max():.4f}')
+
+            if np.linalg.norm(np.matmul(U, U.T) - lastUUt) < converge_thresh:
+                break
+
+        if verbose: print('-' * 20)
+
+        if i == max_iter - 1: # not converged
+            if hung_iter:
+                pass
+            else:
+                U_list = [pygmtools.hungarian(_, backend='numpy') for _ in U_list]
+                U = np.concatenate(U_list, axis=0)
+                break
+
+        # projection control
+        if projector == 'hungarian':
+            break
+        elif sinkhorn_tau > min_tau:
+            sinkhorn_tau *= sk_gamma
+        else:
+            if hung_iter:
+                projector = 'hungarian'
+            else:
+                U_list = [pygmtools.hungarian(_, backend='numpy') for _ in U_list]
+                U = np.concatenate(U_list, axis=0)
+                break
+
+    return U
+
+############################################
+#          Neural Network Solvers          #
+############################################
+
 #############################################
 #              Utils Functions              #
 #############################################
@@ -337,7 +797,7 @@ def inner_prod_aff_fn(feat1, feat2):
     """
     numpy implementation of inner product affinity function
     """
-    return np.matmul(feat1, feat2.transpose((0, 2, 1)))
+    return np.matmul(feat1, feat2.swapaxes(1,2))
 
 
 def gaussian_aff_fn(feat1, feat2, sigma):
@@ -390,6 +850,15 @@ def dense_to_sparse(dense_adj):
     edge_weight = build_batch([dense_adj[b][(conn[b, :, 0], conn[b, :, 1])] for b in range(batch_size)])
     return conn, np.expand_dims(edge_weight, axis=-1), nedges
 
+def compute_affinity_score(X, K):
+    """
+    Numpy implementation of computing affinity score
+    """
+    b, n, _ = X.shape
+    vx = X.swapaxes(1,2).reshape(b, -1, 1)  # (b, n*n, 1)
+    vxt = vx.swapaxes(1, 2)  # (b, 1, n*n)
+    affinity = np.squeeze(np.squeeze(np.matmul(np.matmul(vxt, K), vx),axis=-1),axis=-1)
+    return affinity
 
 def to_numpy(input):
     """
@@ -404,7 +873,64 @@ def from_numpy(input, device):
     """
     return input
 
+def generate_isomorphic_graphs(node_num, graph_num, node_feat_dim=0):
+    """
+    Numpy implementation of generate_isomorphic_graphs
+    """
+    X_gt = np.zeros((graph_num, node_num, node_num))
+    X_gt[0, np.arange(0, node_num, dtype='i4'), np.arange(0, node_num, dtype='i4')] = 1
+    for i in range(graph_num):
+        if i > 0:
+            X_gt[i, np.arange(0, node_num, dtype='i4'), np.random.permutation(node_num)] = 1
+    joint_X = X_gt.reshape(graph_num * node_num, node_num)
+    X_gt = np.matmul(joint_X, joint_X.T)
+    X_gt = X_gt.reshape(graph_num, node_num, graph_num, node_num).transpose(0, 2, 1, 3)
+    A0 = np.random.rand(node_num, node_num)
+    A0[np.arange(node_num),np.arange(node_num)] = 0
+    As = [A0]
+    for i in range(graph_num):
+        if i > 0:
+            As.append(np.matmul(np.matmul(X_gt[i, 0], A0), X_gt[0, i]))
+    if node_feat_dim > 0:
+        F0 = np.random.rand(node_num, node_feat_dim)
+        Fs = [F0]
+        for i in range(graph_num):
+            if i > 0:
+                Fs.append(np.matmul(X_gt[i, 0], F0))
+        return np.stack(As,axis=0), X_gt, np.stack(Fs,axis=0)
+    else:
+        return np.stack(As,axis=0), X_gt
+"""
+def permutation_loss(pred_dsmat:np.ndarray, gt_perm: np.ndarray, n1: np.ndarray, n2:np.ndarray) -> np.ndarray:
+
+    #Numpy implementation of permutation_loss
+
+    batch_num = pred_dsmat.shape[0]
 
+    pred_dsmat = pred_dsmat.to(dtype='f')
+
+    if not np.all((pred_dsmat >= 0) * (pred_dsmat <= 1)):
+        raise ValueError("pred_dsmat contains invalid numerical entries.")
+    if not np.all((gt_perm >= 0) * (gt_perm <= 1)):
+        raise ValueError("gt_perm contains invalid numerical entries.")
+
+    if n1 is None:
+        n1 = np.array([pred_dsmat.shape[1] for _ in range(batch_num)])
+    if n2 is None:
+        n2 = np.array([pred_dsmat.shape[2] for _ in range(batch_num)])
+
+    loss = np.array(0.)
+    n_sum = np.zeros_like(loss)
+    for b in range(batch_num):
+        batch_slice = [b, slice(n1[b]), slice(n2[b])]
+        loss += array.nn.functional.binary_cross_entropy(
+            pred_dsmat[batch_slice],
+            gt_perm[batch_slice],
+            reduction='sum')
+        n_sum += n1[b].to(n_sum.dtype).to(pred_dsmat.device)
+
+    return loss / n_sum
+"""
 def _aff_mat_from_node_edge_aff(node_aff: np.ndarray, edge_aff: np.ndarray, connectivity1: np.ndarray, connectivity2: np.ndarray,
                                 n1, n2, ne1, ne2):
     """

From 82800a1d05b2e1d28411a7a8d7880def712761c4 Mon Sep 17 00:00:00 2001
From: heatingma <115260102+heatingma@users.noreply.github.com>
Date: Fri, 25 Nov 2022 16:11:06 +0800
Subject: [PATCH 2/6] Update test_multi_graph_solvers.py

---
 tests/test_multi_graph_solvers.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/test_multi_graph_solvers.py b/tests/test_multi_graph_solvers.py
index bd8cbb5f..65d6b91a 100644
--- a/tests/test_multi_graph_solvers.py
+++ b/tests/test_multi_graph_solvers.py
@@ -145,7 +145,7 @@ def test_cao():
         'qap_solver': [functools.partial(pygm.ipfp, n1max=num_nodes, n2max=num_nodes), None],
         'edge_aff_fn': [functools.partial(pygm.utils.gaussian_aff_fn, sigma=1.), pygm.utils.inner_prod_aff_fn],
         'node_aff_fn': [functools.partial(pygm.utils.gaussian_aff_fn, sigma=.1), pygm.utils.inner_prod_aff_fn]
-    }, ['pytorch', 'paddle', 'jittor'])
+    }, ['pytorch', 'numpy', 'paddle', 'jittor'])
     for i in range(max_retries - 1):
         error_flag = False
         try:
@@ -170,7 +170,7 @@ def test_mgm_floyd():
         'qap_solver': [functools.partial(pygm.ipfp, n1max=num_nodes, n2max=num_nodes), None],
         'edge_aff_fn': [functools.partial(pygm.utils.gaussian_aff_fn, sigma=1.), pygm.utils.inner_prod_aff_fn],
         'node_aff_fn': [functools.partial(pygm.utils.gaussian_aff_fn, sigma=.1), pygm.utils.inner_prod_aff_fn]
-    }, ['pytorch', 'paddle', 'jittor'])
+    }, ['pytorch', 'numpy', 'paddle', 'jittor'])
     for i in range(max_retries - 1):
         error_flag = False
         try:
@@ -194,7 +194,7 @@ def test_gamgm():
             'sk_min_tau': [0.1, 0.05],
             'param_lambda': [0.1, 0.5],
             'node_aff_fn': [functools.partial(pygm.utils.gaussian_aff_fn, sigma=.1), pygm.utils.inner_prod_aff_fn]
-        }, ['pytorch', 'paddle', 'jittor']
+        }, ['pytorch', 'numpy', 'paddle', 'jittor']
     )
     for i in range(max_retries - 1):
         error_flag = False

From 6a0d9b45061cd7ee58822785ace76911e60b22b5 Mon Sep 17 00:00:00 2001
From: heatingma <115260102+heatingma@users.noreply.github.com>
Date: Fri, 25 Nov 2022 16:28:30 +0800
Subject: [PATCH 3/6] Update numpy_backend.py

---
 pygmtools/numpy_backend.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/pygmtools/numpy_backend.py b/pygmtools/numpy_backend.py
index 427d1511..a34d7053 100644
--- a/pygmtools/numpy_backend.py
+++ b/pygmtools/numpy_backend.py
@@ -1,8 +1,10 @@
+import itertools
+import functools
 import scipy.special
 import scipy.optimize
 import numpy as np
 from multiprocessing import Pool
-
+import pygmtools.utils
 
 #############################################
 #     Linear Assignment Problem Solvers     #

From 1afb7ac97b2c952dcefb4a5522618542016181a3 Mon Sep 17 00:00:00 2001
From: heatingma <115260102+heatingma@users.noreply.github.com>
Date: Mon, 28 Nov 2022 12:52:50 +0800
Subject: [PATCH 4/6] Update numpy_backend.py

---
 pygmtools/numpy_backend.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pygmtools/numpy_backend.py b/pygmtools/numpy_backend.py
index a34d7053..3ee47e33 100644
--- a/pygmtools/numpy_backend.py
+++ b/pygmtools/numpy_backend.py
@@ -718,7 +718,6 @@ def gamgm_real(
                         n1.append(n_end - n_start)
                         n_start = n_end
                     V_batch = build_batch(V_list)
-                    n1 = np.ndarray(n1)
                     U = sinkhorn(V_batch, n1,
                                  max_iter=sk_iter, tau=sinkhorn_tau, batched_operation=True, dummy_row=True)
                     n_start = 0

From 55aec4052a4167c74b1ccbc132d268ef28ff5d09 Mon Sep 17 00:00:00 2001
From: heatingma <115260102+heatingma@users.noreply.github.com>
Date: Mon, 28 Nov 2022 12:55:18 +0800
Subject: [PATCH 5/6] Update multi_graph_solvers.py

---
 pygmtools/multi_graph_solvers.py | 144 +++++++++++++++++++++++++++++++
 1 file changed, 144 insertions(+)

diff --git a/pygmtools/multi_graph_solvers.py b/pygmtools/multi_graph_solvers.py
index f8979683..63935b2c 100644
--- a/pygmtools/multi_graph_solvers.py
+++ b/pygmtools/multi_graph_solvers.py
@@ -56,6 +56,53 @@ def cao(K, x0=None, qap_solver=None,
 
        Multi-graph matching methods process all graphs at once and do not support the additional batch dimension. Please
        note that this behavior is different from two-graph matching solvers in :mod:`~pygmtools.classic_solvers`.
+    
+    .. dropdown:: Numpy Example
+
+        ::
+
+            >>> import numpy as np
+            >>> import pygmtools as pygm
+            >>> pygm.BACKEND = 'numpy'
+            >>> np.random.seed(1)
+
+            # Generate 10 isomorphic graphs
+            >>> graph_num = 10
+            >>> As, X_gt = pygm.utils.generate_isomorphic_graphs(node_num=4, graph_num=10)
+            >>> As_1, As_2 = [], []
+            >>> for i in range(graph_num):
+            ...     for j in range(graph_num):
+            ...         As_1.append(As[i])
+            ...         As_2.append(As[j])
+            >>> As_1 = np.stack(As_1, axis=0)
+            >>> As_2 = np.stack(As_2, axis=0)
+
+            # Build affinity matrix
+            >>> conn1, edge1, ne1 = pygm.utils.dense_to_sparse(As_1)
+            >>> conn2, edge2, ne2 = pygm.utils.dense_to_sparse(As_2)
+            >>> import functools
+            >>> gaussian_aff = functools.partial(pygm.utils.gaussian_aff_fn, sigma=1.) # set affinity function
+            >>> K = pygm.utils.build_aff_mat(None, edge1, conn1, None, edge2, conn2, None, None, None, None, edge_aff_fn=gaussian_aff)
+            >>> K = K.reshape(graph_num, graph_num, 4*4, 4*4)
+            >>> K.shape
+            (10, 10, 16, 16)
+
+            # Solve the multi-matching problem
+            >>> X = pygm.cao(K)
+            >>> (X * X_gt).sum() / X_gt.sum()
+            1.0
+
+            # Use the IPFP solver for two-graph matching
+            >>> ipfp_func = functools.partial(pygmtools.ipfp, n1max=4, n2max=4)
+            >>> X = pygm.cao(K, qap_solver=ipfp_func)
+            >>> (X * X_gt).sum() / X_gt.sum()
+            1.0
+
+            # Run the faster version of CAO algorithm
+            >>> X = pygm.cao(K, mode='fast')
+            >>> (X * X_gt).sum() / X_gt.sum()
+            1.0
+
 
     .. dropdown:: Pytorch Example
 
@@ -290,6 +337,53 @@ def mgm_floyd(K, x0=None, qap_solver=None,
     :param backend: (default: ``pygmtools.BACKEND`` variable) the backend for computation.
     :return: :math:`(m\times m \times n \times n)` the multi-graph matching result
 
+    .. dropdown:: Numpy Example
+
+        ::
+
+            >>> import numpy as np
+            >>> import pygmtools as pygm
+            >>> pygm.BACKEND = 'numpy'
+            >>> np.random.seed(1)
+
+            # Generate 10 isomorphic graphs
+            >>> graph_num = 10
+            >>> As, X_gt = pygm.utils.generate_isomorphic_graphs(node_num=4, graph_num=10)
+            >>> As_1, As_2 = [], []
+            >>> for i in range(graph_num):
+            ...     for j in range(graph_num):
+            ...         As_1.append(As[i])
+            ...         As_2.append(As[j])
+            >>> As_1 = np.stack(As_1, axis=0)
+            >>> As_2 = np.stack(As_2, axis=0)
+
+            # Build affinity matrix
+            >>> conn1, edge1, ne1 = pygm.utils.dense_to_sparse(As_1)
+            >>> conn2, edge2, ne2 = pygm.utils.dense_to_sparse(As_2)
+            >>> import functools
+            >>> gaussian_aff = functools.partial(pygm.utils.gaussian_aff_fn, sigma=1.) # set affinity function
+            >>> K = pygm.utils.build_aff_mat(None, edge1, conn1, None, edge2, conn2, None, None, None, None, edge_aff_fn=gaussian_aff)
+            >>> K = K.reshape(graph_num, graph_num, 4*4, 4*4)
+            >>> K.shape
+            (10, 10, 16, 16)
+
+            # Solve the multi-matching problem
+            >>> X = pygm.mgm_floyd(K)
+            >>> (X * X_gt).sum() / X_gt.sum()
+            1.0
+
+            # Use the IPFP solver for two-graph matching
+            >>> ipfp_func = functools.partial(pygm.ipfp, n1max=4, n2max=4)
+            >>> X = pygm.mgm_floyd(K, qap_solver=ipfp_func)
+            >>> (X * X_gt).sum() / X_gt.sum()
+            1.0
+
+            # Run the faster version of CAO algorithm
+            >>> X = pygm.mgm_floyd(K, mode='fast')
+            >>> (X * X_gt).sum() / X_gt.sum()
+            1.0
+
+
     .. dropdown:: Pytorch Example
 
         ::
@@ -558,6 +652,56 @@ def gamgm(A, W,
 
         Setting ``verbose=True`` may help you tune the parameters.
 
+    .. dropdown:: Numpy Example
+
+        ::
+            >>> import numpy as np
+            >>> import pygmtools as pygm
+            >>> import itertools
+            >>> import time
+            >>> pygm.BACKEND = 'numpy'
+            >>> np.random.seed(1)
+
+            # Generate 10 isomorphic graphs
+            >>> graph_num = 10
+            >>> As, X_gt, Fs = pygm.utils.generate_isomorphic_graphs(node_num=4, graph_num=10, node_feat_dim=20)
+
+            # Compute node-wise similarity by inner-product and Sinkhorn
+            >>> W = np.matmul(np.expand_dims(Fs,axis=1), np.expand_dims(Fs.swapaxes(1, 2),axis=0))
+            >>> W = pygm.sinkhorn(W.reshape(graph_num ** 2, 4, 4)).reshape(graph_num, graph_num, 4, 4)
+
+            # Solve the multi-matching problem
+            >>> X = pygm.gamgm(As, W)
+            >>> matched = 0
+            for i, j in itertools.product(range(graph_num), repeat=2):
+            ...    matched += (X[i,j] * X_gt[i,j]).sum()
+            >>> acc = matched / X_gt.sum()
+            >>> acc
+            1.0
+
+            # This function supports graphs with different nodes (also known as partial matching)
+            # In the following we ignore the last node from the last 5 graphs
+            >>> ns = np.array([4, 4, 4, 4, 4, 3, 3, 3, 3, 3], dtype='i4')
+            >>> for i in range(graph_num):
+            ...    As[i, ns[i]:, :] = 0
+            ...    As[i, :, ns[i]:] = 0
+            >>> for i, j in itertools.product(range(graph_num), repeat=2):
+            ...    X_gt[i, j, ns[i]:, :] = 0
+            ...    X_gt[i, j, :, ns[j]:] = 0
+            ...    W[i, j, ns[i]:, :] = 0
+            ...    W[i, j, :, ns[j]:] = 0
+
+            # Partial matching is challenging and the following parameters are carefully tuned
+            >>> X = pygm.gamgm(As, W, ns, n_univ=4, sk_init_tau=.1, sk_min_tau=0.01, param_lambda=0.3)
+
+            # Check the partial matching result
+            >>> matched = 0
+            >>> for i, j in itertools.product(range(graph_num), repeat=2):
+            ...    matched += (X[i,j] * X_gt[i, j, :ns[i], :ns[j]]).sum()
+            >>> matched / X_gt.sum()
+            1.0
+
+
     .. dropdown:: Pytorch Example
 
         ::

From 21248f19e868e1fd7d2e45126eac3492f309cfa7 Mon Sep 17 00:00:00 2001
From: Runzhong Wang <18309862+rogerwwww@users.noreply.github.com>
Date: Mon, 28 Nov 2022 13:06:26 +0800
Subject: [PATCH 6/6] fix double blank lines between functions

---
 pygmtools/numpy_backend.py | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/pygmtools/numpy_backend.py b/pygmtools/numpy_backend.py
index 3ee47e33..e48d0a19 100644
--- a/pygmtools/numpy_backend.py
+++ b/pygmtools/numpy_backend.py
@@ -375,6 +375,7 @@ def _comp_aff_score(x, k):
                 X[j, i] = X_upt.swapaxes(0,1)
     return X
 
+
 def cao_fast_solver(K, X, num_graph, num_node, max_iter, lambda_init, lambda_step, lambda_max, iter_boost):
     r"""
     Numpy implementation of CAO solver in fast config (mode="pc")
@@ -438,6 +439,7 @@ def _comp_aff_score(x, k):
         assert np.all(X.swapaxes(0,1).swapaxes(2,3) == X)
     return X
 
+
 def mgm_floyd_solver(K, X, num_graph, num_node, param_lambda):
     m, n = num_graph, num_node
 
@@ -491,6 +493,7 @@ def _comp_aff_score(x, k):
                     X[j, i] = X_combo.swapaxes(0,1)
     return X
 
+
 def mgm_floyd_fast_solver(K, X, num_graph, num_node, param_lambda):
     m, n = num_graph, num_node
 
@@ -558,6 +561,7 @@ def _comp_aff_score(x, k):
         X = X * X_mask + X.swapaxes(0,1).swapaxes(2, 3) * (1 - X_mask)
     return X
 
+
 def _get_single_pc_opt(X, i, j, Xij=None):
     """
     CAO/Floyd helper function (compute consistency)
@@ -575,6 +579,7 @@ def _get_single_pc_opt(X, i, j, Xij=None):
     pair_con = 1 - np.sum(np.abs(Xij - X_combo)) / (2 * n * m)
     return pair_con
 
+
 def _get_batch_pc_opt(X):
     """
     CAO/Floyd-fast helper function (compute consistency in batch)
@@ -656,6 +661,7 @@ def gamgm(
 
     return result
 
+
 def gamgm_real(
         supA, supW, ns, n_indices, n_univ, num_graphs, U0,
         init_tau, min_tau, sk_gamma,
@@ -785,10 +791,12 @@ def gamgm_real(
 
     return U
 
+
 ############################################
 #          Neural Network Solvers          #
 ############################################
 
+
 #############################################
 #              Utils Functions              #
 #############################################
@@ -851,6 +859,7 @@ def dense_to_sparse(dense_adj):
     edge_weight = build_batch([dense_adj[b][(conn[b, :, 0], conn[b, :, 1])] for b in range(batch_size)])
     return conn, np.expand_dims(edge_weight, axis=-1), nedges
 
+
 def compute_affinity_score(X, K):
     """
     Numpy implementation of computing affinity score
@@ -861,6 +870,7 @@ def compute_affinity_score(X, K):
     affinity = np.squeeze(np.squeeze(np.matmul(np.matmul(vxt, K), vx),axis=-1),axis=-1)
     return affinity
 
+
 def to_numpy(input):
     """
     identity function
@@ -874,6 +884,7 @@ def from_numpy(input, device):
     """
     return input
 
+
 def generate_isomorphic_graphs(node_num, graph_num, node_feat_dim=0):
     """
     Numpy implementation of generate_isomorphic_graphs
@@ -901,6 +912,8 @@ def generate_isomorphic_graphs(node_num, graph_num, node_feat_dim=0):
         return np.stack(As,axis=0), X_gt, np.stack(Fs,axis=0)
     else:
         return np.stack(As,axis=0), X_gt
+
+
 """
 def permutation_loss(pred_dsmat:np.ndarray, gt_perm: np.ndarray, n1: np.ndarray, n2:np.ndarray) -> np.ndarray:
 
@@ -932,6 +945,8 @@ def permutation_loss(pred_dsmat:np.ndarray, gt_perm: np.ndarray, n1: np.ndarray,
 
     return loss / n_sum
 """
+
+
 def _aff_mat_from_node_edge_aff(node_aff: np.ndarray, edge_aff: np.ndarray, connectivity1: np.ndarray, connectivity2: np.ndarray,
                                 n1, n2, ne1, ne2):
     """
@@ -1020,6 +1035,7 @@ def _transpose(input: np.ndarray, dim1, dim2):
     """
     return np.swapaxes(input, dim1, dim2)
 
+
 def _mm(input1: np.ndarray, input2: np.ndarray):
     """
     numpy implementation of _mm