From caf534bdb93c02679cebf795fe075e1cf79e685e Mon Sep 17 00:00:00 2001 From: Bell <35297086+doabell@users.noreply.github.com> Date: Fri, 5 Apr 2024 15:54:41 -0400 Subject: [PATCH] feat: hypergraph random edge shuffle (#531) * feat: random edge shuffles * refactor random_edge_shuffle as Hypergraph method * docs: add missing hypergraph methods * tests: verify edge dict dual --- .../core/xgi.core.hypergraph.Hypergraph.rst | 3 + tests/core/test_hypergraph.py | 64 +++++++++++++++ xgi/core/hypergraph.py | 81 +++++++++++++++++++ 3 files changed, 148 insertions(+) diff --git a/docs/source/api/core/xgi.core.hypergraph.Hypergraph.rst b/docs/source/api/core/xgi.core.hypergraph.Hypergraph.rst index d1afde383..8006542b8 100644 --- a/docs/source/api/core/xgi.core.hypergraph.Hypergraph.rst +++ b/docs/source/api/core/xgi.core.hypergraph.Hypergraph.rst @@ -40,8 +40,11 @@ ~Hypergraph.remove_node_from_edge ~Hypergraph.clear ~Hypergraph.clear_edges + ~Hypergraph.merge_duplicate_edges ~Hypergraph.cleanup ~Hypergraph.freeze + ~Hypergraph.double_edge_swap + ~Hypergraph.random_edge_shuffle .. rubric:: Methods that return other hypergraphs diff --git a/tests/core/test_hypergraph.py b/tests/core/test_hypergraph.py index b3a330c58..2437aeca2 100644 --- a/tests/core/test_hypergraph.py +++ b/tests/core/test_hypergraph.py @@ -478,6 +478,70 @@ def test_double_edge_swap(edgelist1): H.double_edge_swap(6, 7, 2, 3) +def test_random_edge_shuffle(edgelist4): + # trivial hypergraph + H0 = xgi.trivial_hypergraph() + with pytest.raises(ValueError): + H0.random_edge_shuffle() + + # hypergraph with only two edges + S = xgi.Hypergraph([[0, 1, 2, 3], [2, 3, 5, 6, 8]]) + H = S.copy() + H.random_edge_shuffle() + + # the intersection of the two edges is preserved + assert {2, 3}.issubset(H._edge[0]) + assert {2, 3}.issubset(H._edge[1]) + + # edge sizes are preserved + assert len(H._edge[0]) == len(S._edge[0]) + assert len(H._edge[1]) == len(S._edge[1]) + + # verify dual of edge dict is nodes dict + assert xgi.utilities.dual_dict(H._edge) == H._node + + # hypergraph with more than two edges + S = xgi.Hypergraph(edgelist4) + H = S.copy() + + # specify edges to shuffle + H.random_edge_shuffle(e_id1=0, e_id2=1) + + # not shuffled edges are preserved + assert H._edge[2] == S._edge[2] + + # the intersection of the two edges is preserved + assert {2, 3}.issubset(H._edge[0]) + assert {2, 3}.issubset(H._edge[1]) + + # edge sizes are preserved + for edge_id in H._edge: + assert len(H._edge[edge_id]) == len(S._edge[edge_id]) + + # verify dual of edge dict is nodes dict + assert xgi.utilities.dual_dict(H._edge) == H._node + + # random hypergraph + S = xgi.random_hypergraph(50, [0.1, 0.01, 0.001], seed=1) + H = S.copy() + H.random_edge_shuffle() + + # number of nodes and edges are preserved + assert H.num_nodes == S.num_nodes + assert H.num_edges == S.num_edges + + # all edge sizes are preserved + for edge_id in H._edge: + assert len(H._edge[edge_id]) == len(S._edge[edge_id]) + + # all node degrees are preserved + for node_id in H._node: + assert len(H._node[node_id]) == len(S._node[node_id]) + + # verify dual of edge dict is nodes dict + assert xgi.utilities.dual_dict(H._edge) == H._node + + def test_duplicate_edges(edgelist1): H = xgi.Hypergraph(edgelist1) assert list(H.edges.duplicates()) == [] diff --git a/xgi/core/hypergraph.py b/xgi/core/hypergraph.py index 0b605400a..14693eab0 100644 --- a/xgi/core/hypergraph.py +++ b/xgi/core/hypergraph.py @@ -1,4 +1,5 @@ """Base class for undirected hypergraphs.""" +import random from collections import defaultdict from collections.abc import Hashable, Iterable from copy import copy, deepcopy @@ -958,6 +959,86 @@ def double_edge_swap(self, n_id1, n_id2, e_id1, e_id2): self._edge[e_id1] = temp_members1 self._edge[e_id2] = temp_members2 + def random_edge_shuffle(self, e_id1=None, e_id2=None): + """Randomly redistributes nodes between two hyperedges. + + The process is as follows: + + 1. randomly select two hyperedges + 2. place all their nodes into a single bucket + 3. randomly redistribute the nodes between those two hyperedges + + Parameters + ---------- + e_id1 : node ID, optional + ID of first edge to shuffle. + e_id2 : node ID, optional + ID of second edge to shuffle. + + Note + ---- + After shuffling, the sizes of the two hyperedges are unchanged. + Edge IDs and attributes are also unchanged. + If the same node appears in both hyperedges, then this is still true after reshuffling. + If either `e_id1` or `e_id2` is not provided, then two random edges are selected. + + Reference + --------- + Philip S C., 2020. + "Configuration models of random hypergraphs." + Journal of Complex Networks, 8(3). + https://doi.org/10.1093/comnet/cnaa018 + + Example + ------- + >>> import xgi + >>> random.seed(42) + >>> H = xgi.Hypergraph([[1, 2, 3], [3, 4], [4, 5]]) + >>> H.random_edge_shuffle() + >>> H.edges.members() + [{2, 4, 5}, {3, 4}, {1, 3}] + + """ + if len(self._edge) < 2: + raise ValueError("Hypergraph must have at least two edges.") + + # select two random edges + if e_id1 is None or e_id2 is None: + e_id1, e_id2 = random.sample(list(self._edge), 2) + + # extract edges (lists of nodes) + e1 = self._edge[e_id1] + e2 = self._edge[e_id2] + + # nodes in both edges should not be shuffled + nodes_both = e1 & e2 + e1 -= nodes_both + e2 -= nodes_both + + # put all nodes in a single bucket + nodes = e1 | e2 + + # randomly redistribute nodes between the two edges + e1_new = set(random.sample(list(nodes), len(e1))) + e2_new = nodes - e1_new + + # update edge memberships + for n_id in e1_new & e2: + self._node[n_id].remove(e_id2) + self._node[n_id].add(e_id1) + + for n_id in e2_new & e1: + self._node[n_id].remove(e_id1) + self._node[n_id].add(e_id2) + + # add nodes in both edges back + e1_new |= nodes_both + e2_new |= nodes_both + + # update hypergraph + self._edge[e_id1] = e1_new + self._edge[e_id2] = e2_new + def add_node_to_edge(self, edge, node): """Add one node to an existing edge.