Skip to content

Commit

Permalink
Merge pull request #102 from pygod-team/dmgd
Browse files Browse the repository at this point in the history
Add DMGD
  • Loading branch information
kayzliu authored Feb 1, 2024
2 parents a5ead23 + a9a98b2 commit 23a63cb
Show file tree
Hide file tree
Showing 33 changed files with 691 additions and 116 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/testing-cron.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ jobs:
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install torch --index-url https://download.pytorch.org/whl/cpu
pip install torch==2.1.0 --index-url https://download.pytorch.org/whl/cpu
pip install torch_geometric
pip install torch_scatter torch_sparse torch_cluster torch_spline_conv -f https://data.pyg.org/whl/torch-2.1.0+cpu.html
pip install pytest
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/testing.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ jobs:
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install torch --index-url https://download.pytorch.org/whl/cpu
pip install torch==2.1.0 --index-url https://download.pytorch.org/whl/cpu
pip install torch_geometric
pip install torch_scatter torch_sparse torch_cluster torch_spline_conv -f https://data.pyg.org/whl/torch-2.1.0+cpu.html
pip install pytest
Expand Down
7 changes: 5 additions & 2 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -184,11 +184,12 @@ DONE 2020 MLP+AE Yes [#Bandyopadhyay2020Outlier]
AdONE 2020 MLP+AE Yes [#Bandyopadhyay2020Outlier]_
AnomalyDAE 2020 GNN+AE Yes [#Fan2020AnomalyDAE]_
GAAN 2020 GAN Yes [#Chen2020Generative]_
DMGD 2020 GNN+AE Yes [#Bandyopadhyay2020Integrating]_
OCGNN 2021 GNN Yes [#Wang2021One]_
CoLA 2021 GNN+AE+SSL Yes [#Liu2021Anomaly]_
GUIDE 2021 GNN+AE Yes [#Yuan2021Higher]_
CONAD 2022 GNN+AE+SSL Yes [#Xu2022Contrastive]_
GADNR 2023 GNN+AE Yes [#Roy2023Gadnr]_
GADNR 2024 GNN+AE Yes [#Roy2024Gadnr]_
================== ===== =========== =========== ========================================


Expand Down Expand Up @@ -248,6 +249,8 @@ Reference
.. [#Chen2020Generative] Chen, Z., Liu, B., Wang, M., Dai, P., Lv, J. and Bo, L., 2020, October. Generative adversarial attributed network anomaly detection. In Proceedings of the 29th ACM International Conference on Information & Knowledge Management (CIKM).
.. [#Bandyopadhyay2020Integrating] Bandyopadhyay, S., Vishal Vivek, S. and Murty, M.N., 2020. Integrating network embedding and community outlier detection via multiclass graph description. Frontiers in Artificial Intelligence and Applications, (FAIA).
.. [#Wang2021One] Wang, X., Jin, B., Du, Y., Cui, P., Tan, Y. and Yang, Y., 2021. One-class graph neural networks for anomaly detection in attributed networks. Neural computing and applications.
.. [#Liu2021Anomaly] Liu, Y., Li, Z., Pan, S., Gong, C., Zhou, C. and Karypis, G., 2021. Anomaly detection on attributed networks via contrastive self-supervised learning. IEEE transactions on neural networks and learning systems (TNNLS).
Expand All @@ -256,4 +259,4 @@ Reference
.. [#Xu2022Contrastive] Xu, Z., Huang, X., Zhao, Y., Dong, Y., and Li, J., 2022. Contrastive Attributed Network Anomaly Detection with Data Augmentation. In Proceedings of the 26th Pacific-Asia Conference on Knowledge Discovery and Data Mining (PAKDD).
.. [#Roy2023Gadnr] Roy, A., Shu, J., Li, J., Yang, C., Elshocht, O., Smeets, J. and Li, P., 2023. GAD-NR: Graph Anomaly Detection via Neighborhood Reconstruction. In Proceedings of the 17th ACM International Conference on Web Search and Data Mining (WSDM).
.. [#Roy2024Gadnr] Roy, A., Shu, J., Li, J., Yang, C., Elshocht, O., Smeets, J. and Li, P., 2024. GAD-NR: Graph Anomaly Detection via Neighborhood Reconstruction. In Proceedings of the 17th ACM International Conference on Web Search and Data Mining (WSDM).
19 changes: 13 additions & 6 deletions docs/examples/2_convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
# -----------------
# Initialize and train a detector in PyGOD. Here, we use
# ``pygod.detector.DOMINANT`` as an example. For faster demonstration,
# we set `epoch` to 3.
# we set ``epoch`` to 3.


from pygod.detector import DOMINANT
Expand All @@ -39,7 +39,7 @@
# Obtaining Node Score
# --------------------
# After training, we obtain raw outlier scores for each node with
# `predict`. The shape of `node_score` is ``(N, )``.
# ``predict``. The shape of ``node_score`` is ``(N, )``.


node_score = detector.predict(data, return_pred=False, return_score=True)
Expand All @@ -49,7 +49,7 @@
# Converting Score to Edge Level
# ------------------------------
# To detect outlier edges, we convert the outlier scores on node level
# to edge level. The shape of `edge_score` is ``(E, )``.
# to edge level. The shape of ``edge_score`` is ``(E, )``.


from pygod.utils import to_edge_score
Expand All @@ -61,10 +61,17 @@
# Converting Score to Graph Level
# -------------------------------
# To detect outlier graphs, we convert the outlier scores on node level
# to graph level. `graph_score` is a scalar for a `Data` object.
# to graph level for each graph. ``graph_score`` is a scalar for each
# ``Data`` object. Here, we give an example for scoring a list of graph.


from pygod.utils import to_graph_score

graph_score = to_graph_score(node_score)
print(graph_score)
data_list = [data, data, data]
graph_scores = []
for data in data_list:
node_score = detector.predict(data, return_pred=False, return_score=True)
graph_score = to_graph_score(node_score)
graph_scores.append(graph_score.item())

print(graph_scores)
3 changes: 2 additions & 1 deletion docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -100,11 +100,12 @@ DONE 2020 MLP+AE Yes :class:`pygod.detector.DONE
AdONE 2020 MLP+AE Yes :class:`pygod.detector.AdONE`
AnomalyDAE 2020 GNN+AE Yes :class:`pygod.detector.AnomalyDAE`
GAAN 2020 GAN Yes :class:`pygod.detector.GAAN`
DMGD 2020 GNN+AE Yes :class:`pygod.detector.DMGD`
OCGNN 2021 GNN Yes :class:`pygod.detector.OCGNN`
CoLA 2021 GNN+AE+SSL Yes :class:`pygod.detector.CoLA`
GUIDE 2021 GNN+AE Yes :class:`pygod.detector.GUIDE`
CONAD 2022 GNN+AE+SSL Yes :class:`pygod.detector.CONAD`
GADNR 2023 GNN+AE Yes :class:`pygod.detector.GADNR`
GADNR 2024 GNN+AE Yes :class:`pygod.detector.GADNR`
================== ===== =========== =========== ==============================================


Expand Down
1 change: 1 addition & 0 deletions docs/pygod.detector.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ pygod.detector
~pygod.detector.AnomalyDAE
~pygod.detector.CoLA
~pygod.detector.CONAD
~pygod.detector.DMGD
~pygod.detector.DOMINANT
~pygod.detector.DONE
~pygod.detector.GAAN
Expand Down
1 change: 1 addition & 0 deletions docs/pygod.nn.rst
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ pygod.nn
~pygod.nn.AdONEBase
~pygod.nn.AnomalyDAEBase
~pygod.nn.CoLABase
~pygod.nn.DMGDBase
~pygod.nn.DOMINANTBase
~pygod.nn.DONEBase
~pygod.nn.GAANBase
Expand Down
12 changes: 6 additions & 6 deletions docs/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
# dependencies required for documentation
furo
https://download.pytorch.org/whl/cpu/torch-2.0.0%2Bcpu-cp38-cp38-linux_x86_64.whl
https://download.pytorch.org/whl/cpu/torch-2.1.0%2Bcpu-cp38-cp38-linux_x86_64.whl
torch_geometric
https://data.pyg.org/whl/torch-2.0.0%2Bcpu/pyg_lib-0.2.0%2Bpt20cpu-cp38-cp38-linux_x86_64.whl
https://data.pyg.org/whl/torch-2.0.0%2Bcpu/torch_cluster-1.6.1%2Bpt20cpu-cp38-cp38-linux_x86_64.whl
https://data.pyg.org/whl/torch-2.0.0%2Bcpu/torch_scatter-2.1.1%2Bpt20cpu-cp38-cp38-linux_x86_64.whl
https://data.pyg.org/whl/torch-2.0.0%2Bcpu/torch_sparse-0.6.17%2Bpt20cpu-cp38-cp38-linux_x86_64.whl
https://data.pyg.org/whl/torch-2.0.0%2Bcpu/torch_spline_conv-1.2.2%2Bpt20cpu-cp38-cp38-linux_x86_64.whl
https://data.pyg.org/whl/torch-2.1.0%2Bcpu/pyg_lib-0.2.0%2Bpt20cpu-cp38-cp38-linux_x86_64.whl
https://data.pyg.org/whl/torch-2.1.0%2Bcpu/torch_cluster-1.6.1%2Bpt20cpu-cp38-cp38-linux_x86_64.whl
https://data.pyg.org/whl/torch-2.1.0%2Bcpu/torch_scatter-2.1.1%2Bpt20cpu-cp38-cp38-linux_x86_64.whl
https://data.pyg.org/whl/torch-2.1.0%2Bcpu/torch_sparse-0.6.17%2Bpt20cpu-cp38-cp38-linux_x86_64.whl
https://data.pyg.org/whl/torch-2.1.0%2Bcpu/torch_spline_conv-1.2.2%2Bpt20cpu-cp38-cp38-linux_x86_64.whl
setuptools
sphinxcontrib-bibtex
matplotlib
Expand Down
12 changes: 11 additions & 1 deletion docs/zreferences.bib
Original file line number Diff line number Diff line change
Expand Up @@ -173,9 +173,19 @@ @article{kipf2016variational
year={2016}
}

@inproceedings{roy2023gadnr,
@inproceedings{roy2024gadnr,
title = {GAD-NR : Graph Anomaly Detection via Neighborhood Reconstruction},
author = {Roy, Amit and Shu, Juan and Li, Jia and Yang, Carl and Elshocht, Olivier and Smeets, Jeroen and Li, Pan},
booktitle={Proceedings of the 17th ACM International Conference on Web Search and Data Mining},
year = {2024}
}

@article{bandyopadhyay2020integrating,
title={Integrating network embedding and community outlier detection via multiclass graph description},
author={Bandyopadhyay, S and Vishal Vivek, S and Murty, MN},
journal={Frontiers in Artificial Intelligence and Applications},
volume={325},
pages={976--983},
year={2020},
publisher={IOS Press BV}
}
5 changes: 3 additions & 2 deletions pygod/detector/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from .anomalydae import AnomalyDAE
from .cola import CoLA
from .conad import CONAD
from .dmgd import DMGD
from .dominant import DOMINANT
from .done import DONE
from .gaan import GAAN
Expand All @@ -19,6 +20,6 @@

__all__ = [
"Detector", "DeepDetector", "AdONE", "ANOMALOUS", "AnomalyDAE", "CoLA",
"CONAD", "DOMINANT", "DONE", "GAAN", "GADNR", "GAE", "GUIDE", "OCGNN", "ONE",
"Radar", "SCAN"
"CONAD", "DMGD", "DOMINANT", "DONE", "GAAN", "GADNR", "GAE", "GUIDE",
"OCGNN", "ONE", "Radar", "SCAN"
]
35 changes: 22 additions & 13 deletions pygod/detector/adone.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ class AdONE(DeepDetector):
fitted.
threshold_ : float
The threshold is based on ``contamination``. It is the
:math:`N`*``contamination`` most abnormal samples in
:math:`N \\times` ``contamination`` most abnormal samples in
``decision_score_``. The threshold is calculated for generating
binary outlier labels.
label_ : torch.Tensor
Expand Down Expand Up @@ -148,6 +148,7 @@ def __init__(self,
batch_size=batch_size,
num_neigh=num_neigh,
verbose=verbose,
gan=True,
save_emb=save_emb,
compile_model=compile_model,
**kwargs)
Expand Down Expand Up @@ -197,23 +198,31 @@ def forward_model(self, data):
s = data.s.to(self.device)
edge_index = data.edge_index.to(self.device)

x_, s_, h_a, h_s, dna, dns, dis_a, dis_s = self.model(x, s, edge_index)
loss, oa, os, oc = self.model.loss_func(x[:batch_size],
x_[:batch_size],
s[:batch_size],
s_[:batch_size],
h_a[:batch_size],
h_s[:batch_size],
dna[:batch_size],
dns[:batch_size],
dis_a[:batch_size],
dis_s[:batch_size])
x_, s_, h_a, h_s, dna, dns = self.model(x, s, edge_index)

loss_d = self.model.loss_func_d(h_a[:batch_size].detach(),
h_s[:batch_size].detach())

self.opt_in.zero_grad()
loss_d.backward()
self.opt_in.step()

self.epoch_loss_in += loss_d.item() * batch_size

loss_g, oa, os, oc = self.model.loss_func_g(x[:batch_size],
x_[:batch_size],
s[:batch_size],
s_[:batch_size],
h_a[:batch_size],
h_s[:batch_size],
dna[:batch_size],
dns[:batch_size])

self.attribute_score_[node_idx[:batch_size]] = oa.detach().cpu()
self.structural_score_[node_idx[:batch_size]] = os.detach().cpu()
self.combined_score_[node_idx[:batch_size]] = oc.detach().cpu()

return loss, ((oa + os + oc) / 3).detach().cpu()
return loss_g, ((oa + os + oc) / 3).detach().cpu()

def decision_function(self, data, label=None):
if data is not None:
Expand Down
2 changes: 1 addition & 1 deletion pygod/detector/anomalydae.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ class AnomalyDAE(DeepDetector):
fitted.
threshold_ : float
The threshold is based on ``contamination``. It is the
:math:`N`*``contamination`` most abnormal samples in
:math:`N \\times` ``contamination`` most abnormal samples in
``decision_score_``. The threshold is calculated for generating
binary outlier labels.
label_ : torch.Tensor
Expand Down
32 changes: 19 additions & 13 deletions pygod/detector/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ class Detector(ABC):
threshold_ : float
The threshold is based on ``contamination``. It is the
:math:`N`*``contamination`` most abnormal samples in
:math:`N \\times` ``contamination`` most abnormal samples in
``decision_score_``. The threshold is calculated for generating
binary outlier labels.
Expand Down Expand Up @@ -354,7 +354,7 @@ class DeepDetector(Detector, ABC):
fitted.
threshold_ : float
The threshold is based on ``contamination``. It is the
:math:`N`*``contamination`` most abnormal samples in
:math:`N \\times` ``contamination`` most abnormal samples in
``decision_score_``. The threshold is calculated for generating
binary outlier labels.
label_ : torch.Tensor
Expand Down Expand Up @@ -428,8 +428,8 @@ def __init__(self,

def fit(self, data, label=None):

self.num_nodes, self.in_dim = data.x.shape
self.process_graph(data)
self.num_nodes, self.in_dim = data.x.shape
if self.batch_size == 0:
self.batch_size = data.x.shape[0]
loader = NeighborLoader(data,
Expand All @@ -444,10 +444,10 @@ def fit(self, data, label=None):
lr=self.lr,
weight_decay=self.weight_decay)
else:
self.opt_g = torch.optim.Adam(self.model.generator.parameters(),
lr=self.lr,
weight_decay=self.weight_decay)
optimizer = torch.optim.Adam(self.model.discriminator.parameters(),
self.opt_in = torch.optim.Adam(self.model.inner.parameters(),
lr=self.lr,
weight_decay=self.weight_decay)
optimizer = torch.optim.Adam(self.model.outer.parameters(),
lr=self.lr,
weight_decay=self.weight_decay)

Expand All @@ -457,15 +457,15 @@ def fit(self, data, label=None):
start_time = time.time()
epoch_loss = 0
if self.gan:
self.epoch_loss_g = 0
self.epoch_loss_in = 0
for sampled_data in loader:
batch_size = sampled_data.batch_size
node_idx = sampled_data.n_id

loss, score = self.forward_model(sampled_data)
epoch_loss += loss.item() * batch_size
if self.save_emb:
if type(self.emb) == tuple:
if type(self.emb) is tuple:
self.emb[0][node_idx[:batch_size]] = \
self.model.emb[0][:batch_size].cpu()
self.emb[1][node_idx[:batch_size]] = \
Expand All @@ -481,7 +481,7 @@ def fit(self, data, label=None):

loss_value = epoch_loss / data.x.shape[0]
if self.gan:
loss_value = (self.epoch_loss_g / data.x.shape[0], loss_value)
loss_value = (self.epoch_loss_in / data.x.shape[0], loss_value)
logger(epoch=epoch,
loss=loss_value,
score=self.decision_score_,
Expand Down Expand Up @@ -509,6 +509,7 @@ def decision_function(self, data, label=None):
else:
self.emb = torch.zeros(data.x.shape[0], self.hid_dim)
start_time = time.time()
test_loss = 0
for sampled_data in loader:
loss, score = self.forward_model(sampled_data)
batch_size = sampled_data.batch_size
Expand All @@ -523,9 +524,14 @@ def decision_function(self, data, label=None):
self.emb[node_idx[:batch_size]] = \
self.model.emb[:batch_size].cpu()

test_loss = loss.item() * batch_size
outlier_score[node_idx[:batch_size]] = score

logger(loss=loss.item() / data.x.shape[0],
loss_value = test_loss / data.x.shape[0]
if self.gan:
loss_value = (self.epoch_loss_in / data.x.shape[0], loss_value)

logger(loss=loss_value,
score=outlier_score,
target=label,
time=time.time() - start_time,
Expand Down Expand Up @@ -610,15 +616,15 @@ def predict(self,
prob_method,
return_conf)
if return_emb:
if type(output) == tuple:
if type(output) is tuple:
output += (self.emb,)
else:
output = (output, self.emb)

return output

@abstractmethod
def init_model(self):
def init_model(self, **kwargs):
"""
Initialize the neural network detector.
Expand Down
2 changes: 1 addition & 1 deletion pygod/detector/cola.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ class CoLA(DeepDetector):
fitted.
threshold_ : float
The threshold is based on ``contamination``. It is the
:math:`N`*``contamination`` most abnormal samples in
:math:`N \\times` ``contamination`` most abnormal samples in
``decision_score_``. The threshold is calculated for generating
binary outlier labels.
label_ : torch.Tensor
Expand Down
2 changes: 1 addition & 1 deletion pygod/detector/conad.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ class CONAD(DeepDetector):
fitted.
threshold_ : float
The threshold is based on ``contamination``. It is the
:math:`N`*``contamination`` most abnormal samples in
:math:`N \\times` ``contamination`` most abnormal samples in
``decision_score_``. The threshold is calculated for generating
binary outlier labels.
label_ : torch.Tensor
Expand Down
Loading

0 comments on commit 23a63cb

Please sign in to comment.