Merge pull request #102 from pygod-team/dmgd

Add DMGD
pygod-team · Feb 1, 2024 · 23a63cb · 23a63cb
2 parents a5ead23 + a9a98b2
commit 23a63cb
Show file tree

Hide file tree

Showing 33 changed files with 691 additions and 116 deletions.
diff --git a/.github/workflows/testing-cron.yml b/.github/workflows/testing-cron.yml
@@ -28,7 +28,7 @@ jobs:
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
-        pip install torch --index-url https://download.pytorch.org/whl/cpu
+        pip install torch==2.1.0 --index-url https://download.pytorch.org/whl/cpu
         pip install torch_geometric
         pip install torch_scatter torch_sparse torch_cluster torch_spline_conv -f https://data.pyg.org/whl/torch-2.1.0+cpu.html
         pip install pytest

diff --git a/.github/workflows/testing.yml b/.github/workflows/testing.yml
@@ -33,7 +33,7 @@ jobs:
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
-        pip install torch --index-url https://download.pytorch.org/whl/cpu
+        pip install torch==2.1.0 --index-url https://download.pytorch.org/whl/cpu
         pip install torch_geometric
         pip install torch_scatter torch_sparse torch_cluster torch_spline_conv -f https://data.pyg.org/whl/torch-2.1.0+cpu.html
         pip install pytest

diff --git a/README.rst b/README.rst
@@ -184,11 +184,12 @@ DONE                2020   MLP+AE       Yes          [#Bandyopadhyay2020Outlier]
 AdONE               2020   MLP+AE       Yes          [#Bandyopadhyay2020Outlier]_
 AnomalyDAE          2020   GNN+AE       Yes          [#Fan2020AnomalyDAE]_
 GAAN                2020   GAN          Yes          [#Chen2020Generative]_
+DMGD                2020   GNN+AE       Yes          [#Bandyopadhyay2020Integrating]_
 OCGNN               2021   GNN          Yes          [#Wang2021One]_
 CoLA                2021   GNN+AE+SSL   Yes          [#Liu2021Anomaly]_
 GUIDE               2021   GNN+AE       Yes          [#Yuan2021Higher]_
 CONAD               2022   GNN+AE+SSL   Yes          [#Xu2022Contrastive]_
-GADNR               2023   GNN+AE       Yes          [#Roy2023Gadnr]_
+GADNR               2024   GNN+AE       Yes          [#Roy2024Gadnr]_
 ==================  =====  ===========  ===========  ========================================
 
 
@@ -248,6 +249,8 @@ Reference
 
 .. [#Chen2020Generative] Chen, Z., Liu, B., Wang, M., Dai, P., Lv, J. and Bo, L., 2020, October. Generative adversarial attributed network anomaly detection. In Proceedings of the 29th ACM International Conference on Information & Knowledge Management (CIKM).
 
+.. [#Bandyopadhyay2020Integrating] Bandyopadhyay, S., Vishal Vivek, S. and Murty, M.N., 2020. Integrating network embedding and community outlier detection via multiclass graph description. Frontiers in Artificial Intelligence and Applications, (FAIA).
+
 .. [#Wang2021One] Wang, X., Jin, B., Du, Y., Cui, P., Tan, Y. and Yang, Y., 2021. One-class graph neural networks for anomaly detection in attributed networks. Neural computing and applications.
 
 .. [#Liu2021Anomaly] Liu, Y., Li, Z., Pan, S., Gong, C., Zhou, C. and Karypis, G., 2021. Anomaly detection on attributed networks via contrastive self-supervised learning. IEEE transactions on neural networks and learning systems (TNNLS).
@@ -256,4 +259,4 @@ Reference
 
 .. [#Xu2022Contrastive] Xu, Z., Huang, X., Zhao, Y., Dong, Y., and Li, J., 2022. Contrastive Attributed Network Anomaly Detection with Data Augmentation. In Proceedings of the 26th Pacific-Asia Conference on Knowledge Discovery and Data Mining (PAKDD).
 
-.. [#Roy2023Gadnr] Roy, A., Shu, J., Li, J., Yang, C., Elshocht, O., Smeets, J. and Li, P., 2023. GAD-NR: Graph Anomaly Detection via Neighborhood Reconstruction. In Proceedings of the 17th ACM International Conference on Web Search and Data Mining (WSDM).
+.. [#Roy2024Gadnr] Roy, A., Shu, J., Li, J., Yang, C., Elshocht, O., Smeets, J. and Li, P., 2024. GAD-NR: Graph Anomaly Detection via Neighborhood Reconstruction. In Proceedings of the 17th ACM International Conference on Web Search and Data Mining (WSDM).
diff --git a/docs/examples/2_convert.py b/docs/examples/2_convert.py
@@ -27,7 +27,7 @@
 # -----------------
 # Initialize and train a detector in PyGOD. Here, we use
 # ``pygod.detector.DOMINANT`` as an example. For faster demonstration,
-# we set `epoch` to 3.
+# we set ``epoch`` to 3.
 
 
 from pygod.detector import DOMINANT
@@ -39,7 +39,7 @@
 # Obtaining Node Score
 # --------------------
 # After training, we obtain raw outlier scores for each node with
-# `predict`. The shape of `node_score` is ``(N, )``.
+# ``predict``. The shape of ``node_score`` is ``(N, )``.
 
 
 node_score = detector.predict(data, return_pred=False, return_score=True)
@@ -49,7 +49,7 @@
 # Converting Score to Edge Level
 # ------------------------------
 # To detect outlier edges, we convert the outlier scores on node level
-# to edge level. The shape of `edge_score` is ``(E, )``.
+# to edge level. The shape of ``edge_score`` is ``(E, )``.
 
 
 from pygod.utils import to_edge_score
@@ -61,10 +61,17 @@
 # Converting Score to Graph Level
 # -------------------------------
 # To detect outlier graphs, we convert the outlier scores on node level
-# to graph level. `graph_score` is a scalar for a `Data` object.
+# to graph level for each graph. ``graph_score`` is a scalar for each
+# ``Data`` object. Here, we give an example for scoring a list of graph.
 
 
 from pygod.utils import to_graph_score
 
-graph_score = to_graph_score(node_score)
-print(graph_score)
+data_list = [data, data, data]
+graph_scores = []
+for data in data_list:
+    node_score = detector.predict(data, return_pred=False, return_score=True)
+    graph_score = to_graph_score(node_score)
+    graph_scores.append(graph_score.item())
+
+print(graph_scores)
diff --git a/docs/index.rst b/docs/index.rst
@@ -100,11 +100,12 @@ DONE                2020   MLP+AE       Yes          :class:`pygod.detector.DONE
 AdONE               2020   MLP+AE       Yes          :class:`pygod.detector.AdONE`
 AnomalyDAE          2020   GNN+AE       Yes          :class:`pygod.detector.AnomalyDAE`
 GAAN                2020   GAN          Yes          :class:`pygod.detector.GAAN`
+DMGD                2020   GNN+AE       Yes          :class:`pygod.detector.DMGD`
 OCGNN               2021   GNN          Yes          :class:`pygod.detector.OCGNN`
 CoLA                2021   GNN+AE+SSL   Yes          :class:`pygod.detector.CoLA`
 GUIDE               2021   GNN+AE       Yes          :class:`pygod.detector.GUIDE`
 CONAD               2022   GNN+AE+SSL   Yes          :class:`pygod.detector.CONAD`
-GADNR               2023   GNN+AE       Yes          :class:`pygod.detector.GADNR`
+GADNR               2024   GNN+AE       Yes          :class:`pygod.detector.GADNR`
 ==================  =====  ===========  ===========  ==============================================
 
 

diff --git a/docs/pygod.detector.rst b/docs/pygod.detector.rst
@@ -11,6 +11,7 @@ pygod.detector
     ~pygod.detector.AnomalyDAE
     ~pygod.detector.CoLA
     ~pygod.detector.CONAD
+    ~pygod.detector.DMGD
     ~pygod.detector.DOMINANT
     ~pygod.detector.DONE
     ~pygod.detector.GAAN

diff --git a/docs/pygod.nn.rst b/docs/pygod.nn.rst
@@ -9,6 +9,7 @@ pygod.nn
     ~pygod.nn.AdONEBase
     ~pygod.nn.AnomalyDAEBase
     ~pygod.nn.CoLABase
+    ~pygod.nn.DMGDBase
     ~pygod.nn.DOMINANTBase
     ~pygod.nn.DONEBase
     ~pygod.nn.GAANBase

diff --git a/docs/requirements.txt b/docs/requirements.txt
@@ -1,12 +1,12 @@
 # dependencies required for documentation
 furo
-https://download.pytorch.org/whl/cpu/torch-2.0.0%2Bcpu-cp38-cp38-linux_x86_64.whl
+https://download.pytorch.org/whl/cpu/torch-2.1.0%2Bcpu-cp38-cp38-linux_x86_64.whl
 torch_geometric
-https://data.pyg.org/whl/torch-2.0.0%2Bcpu/pyg_lib-0.2.0%2Bpt20cpu-cp38-cp38-linux_x86_64.whl
-https://data.pyg.org/whl/torch-2.0.0%2Bcpu/torch_cluster-1.6.1%2Bpt20cpu-cp38-cp38-linux_x86_64.whl
-https://data.pyg.org/whl/torch-2.0.0%2Bcpu/torch_scatter-2.1.1%2Bpt20cpu-cp38-cp38-linux_x86_64.whl
-https://data.pyg.org/whl/torch-2.0.0%2Bcpu/torch_sparse-0.6.17%2Bpt20cpu-cp38-cp38-linux_x86_64.whl
-https://data.pyg.org/whl/torch-2.0.0%2Bcpu/torch_spline_conv-1.2.2%2Bpt20cpu-cp38-cp38-linux_x86_64.whl
+https://data.pyg.org/whl/torch-2.1.0%2Bcpu/pyg_lib-0.2.0%2Bpt20cpu-cp38-cp38-linux_x86_64.whl
+https://data.pyg.org/whl/torch-2.1.0%2Bcpu/torch_cluster-1.6.1%2Bpt20cpu-cp38-cp38-linux_x86_64.whl
+https://data.pyg.org/whl/torch-2.1.0%2Bcpu/torch_scatter-2.1.1%2Bpt20cpu-cp38-cp38-linux_x86_64.whl
+https://data.pyg.org/whl/torch-2.1.0%2Bcpu/torch_sparse-0.6.17%2Bpt20cpu-cp38-cp38-linux_x86_64.whl
+https://data.pyg.org/whl/torch-2.1.0%2Bcpu/torch_spline_conv-1.2.2%2Bpt20cpu-cp38-cp38-linux_x86_64.whl
 setuptools
 sphinxcontrib-bibtex
 matplotlib

diff --git a/docs/zreferences.bib b/docs/zreferences.bib
@@ -173,9 +173,19 @@ @article{kipf2016variational
   year={2016}
 }
 
-@inproceedings{roy2023gadnr,
+@inproceedings{roy2024gadnr,
   title  = {GAD-NR : Graph Anomaly Detection via Neighborhood Reconstruction},
   author = {Roy, Amit and Shu, Juan and Li, Jia and Yang, Carl and Elshocht, Olivier and Smeets, Jeroen and Li, Pan},
   booktitle={Proceedings of the 17th ACM International Conference on Web Search and Data Mining},
   year   = {2024}
 }
+
+@article{bandyopadhyay2020integrating,
+  title={Integrating network embedding and community outlier detection via multiclass graph description},
+  author={Bandyopadhyay, S and Vishal Vivek, S and Murty, MN},
+  journal={Frontiers in Artificial Intelligence and Applications},
+  volume={325},
+  pages={976--983},
+  year={2020},
+  publisher={IOS Press BV}
+}
diff --git a/pygod/detector/__init__.py b/pygod/detector/__init__.py
@@ -6,6 +6,7 @@
 from .anomalydae import AnomalyDAE
 from .cola import CoLA
 from .conad import CONAD
+from .dmgd import DMGD
 from .dominant import DOMINANT
 from .done import DONE
 from .gaan import GAAN
@@ -19,6 +20,6 @@
 
 __all__ = [
     "Detector", "DeepDetector", "AdONE", "ANOMALOUS", "AnomalyDAE", "CoLA",
-    "CONAD", "DOMINANT", "DONE", "GAAN", "GADNR", "GAE", "GUIDE", "OCGNN", "ONE",
-    "Radar", "SCAN"
+    "CONAD", "DMGD", "DOMINANT", "DONE", "GAAN", "GADNR", "GAE", "GUIDE",
+    "OCGNN", "ONE", "Radar", "SCAN"
 ]
diff --git a/pygod/detector/adone.py b/pygod/detector/adone.py
@@ -89,7 +89,7 @@ class AdONE(DeepDetector):
         fitted.
     threshold_ : float
         The threshold is based on ``contamination``. It is the
-        :math:`N`*``contamination`` most abnormal samples in
+        :math:`N \\times` ``contamination`` most abnormal samples in
         ``decision_score_``. The threshold is calculated for generating
         binary outlier labels.
     label_ : torch.Tensor
@@ -148,6 +148,7 @@ def __init__(self,
                                     batch_size=batch_size,
                                     num_neigh=num_neigh,
                                     verbose=verbose,
+                                    gan=True,
                                     save_emb=save_emb,
                                     compile_model=compile_model,
                                     **kwargs)
@@ -197,23 +198,31 @@ def forward_model(self, data):
         s = data.s.to(self.device)
         edge_index = data.edge_index.to(self.device)
 
-        x_, s_, h_a, h_s, dna, dns, dis_a, dis_s = self.model(x, s, edge_index)
-        loss, oa, os, oc = self.model.loss_func(x[:batch_size],
-                                                x_[:batch_size],
-                                                s[:batch_size],
-                                                s_[:batch_size],
-                                                h_a[:batch_size],
-                                                h_s[:batch_size],
-                                                dna[:batch_size],
-                                                dns[:batch_size],
-                                                dis_a[:batch_size],
-                                                dis_s[:batch_size])
+        x_, s_, h_a, h_s, dna, dns = self.model(x, s, edge_index)
+
+        loss_d = self.model.loss_func_d(h_a[:batch_size].detach(),
+                                        h_s[:batch_size].detach())
+
+        self.opt_in.zero_grad()
+        loss_d.backward()
+        self.opt_in.step()
+
+        self.epoch_loss_in += loss_d.item() * batch_size
+
+        loss_g, oa, os, oc = self.model.loss_func_g(x[:batch_size],
+                                                    x_[:batch_size],
+                                                    s[:batch_size],
+                                                    s_[:batch_size],
+                                                    h_a[:batch_size],
+                                                    h_s[:batch_size],
+                                                    dna[:batch_size],
+                                                    dns[:batch_size])
 
         self.attribute_score_[node_idx[:batch_size]] = oa.detach().cpu()
         self.structural_score_[node_idx[:batch_size]] = os.detach().cpu()
         self.combined_score_[node_idx[:batch_size]] = oc.detach().cpu()
 
-        return loss, ((oa + os + oc) / 3).detach().cpu()
+        return loss_g, ((oa + os + oc) / 3).detach().cpu()
 
     def decision_function(self, data, label=None):
         if data is not None:

diff --git a/pygod/detector/anomalydae.py b/pygod/detector/anomalydae.py
@@ -89,7 +89,7 @@ class AnomalyDAE(DeepDetector):
         fitted.
     threshold_ : float
         The threshold is based on ``contamination``. It is the
-        :math:`N`*``contamination`` most abnormal samples in
+        :math:`N \\times` ``contamination`` most abnormal samples in
         ``decision_score_``. The threshold is calculated for generating
         binary outlier labels.
     label_ : torch.Tensor

diff --git a/pygod/detector/base.py b/pygod/detector/base.py
@@ -41,7 +41,7 @@ class Detector(ABC):
 
     threshold_ : float
         The threshold is based on ``contamination``. It is the
-        :math:`N`*``contamination`` most abnormal samples in
+        :math:`N \\times` ``contamination`` most abnormal samples in
         ``decision_score_``. The threshold is calculated for generating
         binary outlier labels.
 
@@ -354,7 +354,7 @@ class DeepDetector(Detector, ABC):
         fitted.
     threshold_ : float
         The threshold is based on ``contamination``. It is the
-        :math:`N`*``contamination`` most abnormal samples in
+        :math:`N \\times` ``contamination`` most abnormal samples in
         ``decision_score_``. The threshold is calculated for generating
         binary outlier labels.
     label_ : torch.Tensor
@@ -428,8 +428,8 @@ def __init__(self,
 
     def fit(self, data, label=None):
 
-        self.num_nodes, self.in_dim = data.x.shape
         self.process_graph(data)
+        self.num_nodes, self.in_dim = data.x.shape
         if self.batch_size == 0:
             self.batch_size = data.x.shape[0]
         loader = NeighborLoader(data,
@@ -444,10 +444,10 @@ def fit(self, data, label=None):
                                          lr=self.lr,
                                          weight_decay=self.weight_decay)
         else:
-            self.opt_g = torch.optim.Adam(self.model.generator.parameters(),
-                                          lr=self.lr,
-                                          weight_decay=self.weight_decay)
-            optimizer = torch.optim.Adam(self.model.discriminator.parameters(),
+            self.opt_in = torch.optim.Adam(self.model.inner.parameters(),
+                                           lr=self.lr,
+                                           weight_decay=self.weight_decay)
+            optimizer = torch.optim.Adam(self.model.outer.parameters(),
                                          lr=self.lr,
                                          weight_decay=self.weight_decay)
 
@@ -457,15 +457,15 @@ def fit(self, data, label=None):
             start_time = time.time()
             epoch_loss = 0
             if self.gan:
-                self.epoch_loss_g = 0
+                self.epoch_loss_in = 0
             for sampled_data in loader:
                 batch_size = sampled_data.batch_size
                 node_idx = sampled_data.n_id
 
                 loss, score = self.forward_model(sampled_data)
                 epoch_loss += loss.item() * batch_size
                 if self.save_emb:
-                    if type(self.emb) == tuple:
+                    if type(self.emb) is tuple:
                         self.emb[0][node_idx[:batch_size]] = \
                             self.model.emb[0][:batch_size].cpu()
                         self.emb[1][node_idx[:batch_size]] = \
@@ -481,7 +481,7 @@ def fit(self, data, label=None):
 
             loss_value = epoch_loss / data.x.shape[0]
             if self.gan:
-                loss_value = (self.epoch_loss_g / data.x.shape[0], loss_value)
+                loss_value = (self.epoch_loss_in / data.x.shape[0], loss_value)
             logger(epoch=epoch,
                    loss=loss_value,
                    score=self.decision_score_,
@@ -509,6 +509,7 @@ def decision_function(self, data, label=None):
             else:
                 self.emb = torch.zeros(data.x.shape[0], self.hid_dim)
         start_time = time.time()
+        test_loss = 0
         for sampled_data in loader:
             loss, score = self.forward_model(sampled_data)
             batch_size = sampled_data.batch_size
@@ -523,9 +524,14 @@ def decision_function(self, data, label=None):
                     self.emb[node_idx[:batch_size]] = \
                         self.model.emb[:batch_size].cpu()
 
+            test_loss = loss.item() * batch_size
             outlier_score[node_idx[:batch_size]] = score
 
-        logger(loss=loss.item() / data.x.shape[0],
+        loss_value = test_loss / data.x.shape[0]
+        if self.gan:
+            loss_value = (self.epoch_loss_in / data.x.shape[0], loss_value)
+
+        logger(loss=loss_value,
                score=outlier_score,
                target=label,
                time=time.time() - start_time,
@@ -610,15 +616,15 @@ def predict(self,
                                                    prob_method,
                                                    return_conf)
         if return_emb:
-            if type(output) == tuple:
+            if type(output) is tuple:
                 output += (self.emb,)
             else:
                 output = (output, self.emb)
 
         return output
 
     @abstractmethod
-    def init_model(self):
+    def init_model(self, **kwargs):
         """
         Initialize the neural network detector.
 

diff --git a/pygod/detector/cola.py b/pygod/detector/cola.py
@@ -73,7 +73,7 @@ class CoLA(DeepDetector):
         fitted.
     threshold_ : float
         The threshold is based on ``contamination``. It is the
-        :math:`N`*``contamination`` most abnormal samples in
+        :math:`N \\times` ``contamination`` most abnormal samples in
         ``decision_score_``. The threshold is calculated for generating
         binary outlier labels.
     label_ : torch.Tensor

diff --git a/pygod/detector/conad.py b/pygod/detector/conad.py
@@ -98,7 +98,7 @@ class CONAD(DeepDetector):
         fitted.
     threshold_ : float
         The threshold is based on ``contamination``. It is the
-        :math:`N`*``contamination`` most abnormal samples in
+        :math:`N \\times` ``contamination`` most abnormal samples in
         ``decision_score_``. The threshold is calculated for generating
         binary outlier labels.
     label_ : torch.Tensor