Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Users/hc/gnntest #20

Open
wants to merge 51 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
51 commits
Select commit Hold shift + click to select a range
6dd79a8
Fix softmax test
nicelhc13 Mar 26, 2024
1311d4e
Fix sample edge test
nicelhc13 Mar 26, 2024
b9aa4e3
Fix accuracy test
nicelhc13 Mar 26, 2024
5ce91cc
Fix fb test
nicelhc13 Mar 26, 2024
f036f17
Refine cmake
nicelhc13 Mar 26, 2024
203f047
Add input graphs; no lfs
nicelhc13 Mar 26, 2024
6d3dd81
Specify input graph path
nicelhc13 Mar 26, 2024
d0942f0
Apply clang-format
nicelhc13 Mar 26, 2024
663212d
Transitioning input graphs to Git LFS
nicelhc13 Mar 28, 2024
65d2596
Some fix
nicelhc13 Mar 28, 2024
62c0d1c
Add ctest for gnn tests
nicelhc13 Mar 28, 2024
4fa70ba
make precommits
nicelhc13 Mar 28, 2024
79f3ac1
Fix inputs/cmake
nicelhc13 Mar 28, 2024
ea78739
fix
nicelhc13 Mar 28, 2024
f40b66b
revert input cmake
nicelhc13 Mar 28, 2024
3e901ab
Remove input graphs from repo
nicelhc13 Mar 28, 2024
8d9458c
remove lfs incorrect git attributes
nicelhc13 Mar 28, 2024
2ab7a50
delete inputs/cmake
nicelhc13 Mar 28, 2024
6da8785
fix input cmakeList
nicelhc13 Mar 28, 2024
8c6823f
track tester and cora through lfs
nicelhc13 Mar 28, 2024
d907a03
try gitlfs aggain
nicelhc13 Mar 28, 2024
acd7e90
Add lfs pointers
nicelhc13 Mar 28, 2024
f1368eb
retry; delete
nicelhc13 Mar 28, 2024
9e439d4
reupload input graphs to lfs storage
nicelhc13 Mar 28, 2024
45c99dd
Fix lfs file tracking
nicelhc13 Mar 28, 2024
89e8a0b
Apply clang-format-15 instead of 14
nicelhc13 Mar 28, 2024
eb6315a
precommit with clang-format18
nicelhc13 Mar 28, 2024
297d2c8
Fix gcn sampling test
nicelhc13 Apr 1, 2024
5d02f49
Apply clang-format-16
nicelhc13 Apr 1, 2024
a8ad353
Apply clang-format14 and fix libgnn test input paths
nicelhc13 Apr 1, 2024
0599283
Fix test cmake file
nicelhc13 Apr 1, 2024
dbd0476
Reapply clang..
nicelhc13 Apr 1, 2024
0bd9cc1
Fix input test directory
nicelhc13 Apr 2, 2024
5e7327e
test
nicelhc13 Apr 2, 2024
65ac12f
retest
nicelhc13 Apr 2, 2024
28e93db
applyclang 16.0.6
nicelhc13 Apr 2, 2024
490cd13
revert slash
nicelhc13 Apr 2, 2024
de657e3
Remove dummy files
nicelhc13 Apr 2, 2024
3a6c00a
reduce the number of hosts testing
nicelhc13 Apr 2, 2024
9a4edd5
Test regarding file read
nicelhc13 Apr 2, 2024
bb78699
change ci/cd config
nicelhc13 Apr 2, 2024
72f0f44
revert paths
nicelhc13 Apr 2, 2024
2588b62
revert exclude config
nicelhc13 Apr 2, 2024
b17c83b
workflow
nicelhc13 Apr 2, 2024
e2e62f9
remove debug tests
nicelhc13 Apr 2, 2024
d992e80
clang-format
nicelhc13 Apr 2, 2024
1c0e88e
reduce sanitizer test to narrow problem down
nicelhc13 Apr 2, 2024
a802901
revert some paths back
nicelhc13 Apr 2, 2024
e6fde34
Add dummy prints to narrow cusp errors down
nicelhc13 Apr 2, 2024
ebbd2de
check what's goin on in CI/CD
nicelhc13 Apr 3, 2024
bf6eec7
debug
nicelhc13 Apr 3, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 31 additions & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
inputs/cora filter=lfs diff=lfs merge=lfs -text
inputs/tester filter=lfs diff=lfs merge=lfs -text
inputs/cora/cora_edgelist.txt filter=lfs diff=lfs merge=lfs -text
inputs/cora/cora-labels.txt filter=lfs diff=lfs merge=lfs -text
inputs/cora/cora.sgr filter=lfs diff=lfs merge=lfs -text
inputs/cora/cora.csgr filter=lfs diff=lfs merge=lfs -text
inputs/cora/cora-dims.txt filter=lfs diff=lfs merge=lfs -text
inputs/cora/cora_full.npz filter=lfs diff=lfs merge=lfs -text
inputs/cora/cora_labels.txt filter=lfs diff=lfs merge=lfs -text
inputs/cora/cora-val_mask.txt filter=lfs diff=lfs merge=lfs -text
inputs/cora/cora.el filter=lfs diff=lfs merge=lfs -text
inputs/cora/cora.features filter=lfs diff=lfs merge=lfs -text
inputs/cora/cora.ft filter=lfs diff=lfs merge=lfs -text
inputs/cora/cora-train_mask.txt filter=lfs diff=lfs merge=lfs -text
inputs/cora/cora-feat.bin filter=lfs diff=lfs merge=lfs -text
inputs/cora/cora-feats.bin filter=lfs diff=lfs merge=lfs -text
inputs/cora/cora.gr filter=lfs diff=lfs merge=lfs -text
inputs/cora/cora-test_mask.txt filter=lfs diff=lfs merge=lfs -text
inputs/tester/tester-test-feats.bin filter=lfs diff=lfs merge=lfs -text
inputs/tester/tester-test-labels.bin filter=lfs diff=lfs merge=lfs -text
inputs/tester/tester-test-labels-dims.txt filter=lfs diff=lfs merge=lfs -text
inputs/tester/tester-test_mask.txt filter=lfs diff=lfs merge=lfs -text
inputs/tester/tester-dims.txt filter=lfs diff=lfs merge=lfs -text
inputs/tester/tester.el filter=lfs diff=lfs merge=lfs -text
inputs/tester/tester-feats.bin filter=lfs diff=lfs merge=lfs -text
inputs/tester/tester-mlabels.txt filter=lfs diff=lfs merge=lfs -text
inputs/tester/tester-train_mask.txt filter=lfs diff=lfs merge=lfs -text
inputs/tester/tester.csgr filter=lfs diff=lfs merge=lfs -text
inputs/tester/tester-labels.txt filter=lfs diff=lfs merge=lfs -text
inputs/tester/tester-test-mapping.bin filter=lfs diff=lfs merge=lfs -text
inputs/tester/tester-val_mask.txt filter=lfs diff=lfs merge=lfs -text
14 changes: 7 additions & 7 deletions .github/workflows/build-and-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -53,11 +53,11 @@ jobs:
shell: bash -l {0}
strategy:
matrix:
build-type: ['Release', 'Debug']
sanitizer-type: ['nosan', 'san']
exclude:
- build-type: 'Debug'
sanitizer-type: 'san'
build-type: ['Release']
sanitizer-type: ['nosan']
# exclude:
# - build-type: 'Debug'
# sanitizer-type: 'san'
needs: docker-create-ubuntu-2204

steps:
Expand Down Expand Up @@ -105,9 +105,9 @@ jobs:
make docker

- name: Run Tests
timeout-minutes: 5
timeout-minutes: 15
run: |
CONTAINER_CMD="make run-tests" \
CONTAINER_CMD="bash -lc 'source /opt/intel/oneapi/setvars.sh && make run-tests'" \
IMAGE_NAME="${{ env.IMAGE_NAME }}" \
VERSION="${{ env.IMAGE_VERSION }}" \
make docker
Expand Down
3 changes: 2 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -117,11 +117,12 @@ run-tests:
@ctest --test-dir build -R wmd --verbose
@ctest --test-dir build -R large-vec --verbose
@ctest --test-dir build -R compile-lscsr --verbose
@ctest --test-dir build/libgnn/test --verbose

# this command is slow since hooks are not stored in the container image
# this is mostly for CI use
docker-pre-commit:
@docker --context ${CONTAINER_CONTEXT} run --rm \
-v ${SRC_DIR}/:${CONTAINER_SRC_DIR} --privileged \
--workdir=${CONTAINER_WORKDIR} -t \
${IMAGE_NAME}:${VERSION} bash -lc "git config --global --add safe.directory /pando-galois && make hooks && make pre-commit"
${IMAGE_NAME}:${VERSION} bash -lc "git config --global --add safe.directory /galois && make hooks && make pre-commit"
3 changes: 3 additions & 0 deletions inputs/cora/cora-dims.txt
Git LFS file not shown
3 changes: 3 additions & 0 deletions inputs/cora/cora-feat.bin
Git LFS file not shown
3 changes: 3 additions & 0 deletions inputs/cora/cora-feats.bin
Git LFS file not shown
3 changes: 3 additions & 0 deletions inputs/cora/cora-labels.txt
Git LFS file not shown
3 changes: 3 additions & 0 deletions inputs/cora/cora-test_mask.txt
Git LFS file not shown
3 changes: 3 additions & 0 deletions inputs/cora/cora-train_mask.txt
Git LFS file not shown
3 changes: 3 additions & 0 deletions inputs/cora/cora-val_mask.txt
Git LFS file not shown
3 changes: 3 additions & 0 deletions inputs/cora/cora.csgr
Git LFS file not shown
3 changes: 3 additions & 0 deletions inputs/cora/cora.el
Git LFS file not shown
3 changes: 3 additions & 0 deletions inputs/cora/cora.features
Git LFS file not shown
3 changes: 3 additions & 0 deletions inputs/cora/cora.ft
Git LFS file not shown
3 changes: 3 additions & 0 deletions inputs/cora/cora.gr
Git LFS file not shown
3 changes: 3 additions & 0 deletions inputs/cora/cora.sgr
Git LFS file not shown
3 changes: 3 additions & 0 deletions inputs/cora/cora_edgelist.txt
Git LFS file not shown
3 changes: 3 additions & 0 deletions inputs/cora/cora_full.npz
Git LFS file not shown
3 changes: 3 additions & 0 deletions inputs/cora/cora_labels.txt
Git LFS file not shown
3 changes: 3 additions & 0 deletions inputs/tester/tester-dims.txt
Git LFS file not shown
3 changes: 3 additions & 0 deletions inputs/tester/tester-feats.bin
Git LFS file not shown
3 changes: 3 additions & 0 deletions inputs/tester/tester-labels.txt
Git LFS file not shown
3 changes: 3 additions & 0 deletions inputs/tester/tester-mlabels.txt
Git LFS file not shown
3 changes: 3 additions & 0 deletions inputs/tester/tester-test-feats.bin
Git LFS file not shown
3 changes: 3 additions & 0 deletions inputs/tester/tester-test-labels-dims.txt
Git LFS file not shown
3 changes: 3 additions & 0 deletions inputs/tester/tester-test-labels.bin
Git LFS file not shown
3 changes: 3 additions & 0 deletions inputs/tester/tester-test-mapping.bin
Git LFS file not shown
3 changes: 3 additions & 0 deletions inputs/tester/tester-test_mask.txt
Git LFS file not shown
3 changes: 3 additions & 0 deletions inputs/tester/tester-train_mask.txt
Git LFS file not shown
3 changes: 3 additions & 0 deletions inputs/tester/tester-val_mask.txt
Git LFS file not shown
3 changes: 3 additions & 0 deletions inputs/tester/tester.csgr
Git LFS file not shown
3 changes: 3 additions & 0 deletions inputs/tester/tester.el
Git LFS file not shown
32 changes: 32 additions & 0 deletions libcusp/include/galois/graphs/NewGeneric.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
#include "shad/ShadGraphConverter.h"

#include <optional>
#include <filesystem>
#include <sstream>

#define CUSP_PT_TIMER 0
Expand Down Expand Up @@ -236,6 +237,8 @@ class NewDistGraphGeneric : public DistGraph<NodeTy, EdgeTy> {
"GraphPartitioningTime", GRNAME);
Tgraph_construct.start();

std::cout << "CuSP side file path:" << localGraphFileName << "\n" << std::flush;

if (readFromFile) {
galois::gDebug("[", base_DistGraph::id,
"] Reading local graph from file ", localGraphFileName);
Expand All @@ -245,6 +248,7 @@ class NewDistGraphGeneric : public DistGraph<NodeTy, EdgeTy> {
}

galois::graphs::OfflineGraph* offlineGraph{nullptr};
std::cout << "offline graph starts..\n" << std::flush;

std::string host_prefix =
std::string("[") +
Expand All @@ -255,6 +259,7 @@ class NewDistGraphGeneric : public DistGraph<NodeTy, EdgeTy> {
galois::graphs::BufferedGraph<EdgeTy> bufGraph;
bufGraph.resetReadCounters();

std::cout << "offline graph starts.. 1\n" << std::flush;
std::vector<unsigned> dummy;
// not actually getting masters, but getting assigned readers for nodes
if (masterBlockFile == "") {
Expand Down Expand Up @@ -302,7 +307,18 @@ class NewDistGraphGeneric : public DistGraph<NodeTy, EdgeTy> {
md, base_DistGraph::numGlobalNodes, base_DistGraph::numGlobalEdges,
shadConverter.getOutIndexBuffer(), dummy, nodeWeight, edgeWeight);
} else {
std::cout << filename << " is given to offline graph\n";
if (FILE* fp = fopen(filename.c_str(), "r")) {
std::cout << "succeeded to read the input file at NewGeneric.h\n" << std::flush;
fclose(fp);
} else {
std::cout << "failed to read the input file at NewGeneric.h\n" << std::flush;
}

std::filesystem::path p{filename.c_str()};
std::cout << "File size:" << std::filesystem::file_size(p) << "\n" << std::flush;
offlineGraph = new galois::graphs::OfflineGraph(filename);
std::cout << "offline graph reading failed\n" << std::flush;
base_DistGraph::numGlobalNodes = offlineGraph->size();
base_DistGraph::numGlobalEdges = offlineGraph->sizeEdges();
base_DistGraph::computeMasters(md, *offlineGraph, dummy, nodeWeight,
Expand All @@ -316,14 +332,17 @@ class NewDistGraphGeneric : public DistGraph<NodeTy, EdgeTy> {
base_DistGraph::readersFromFile(*offlineGraph, masterBlockFile);
}

std::cout << "offline graph starts.. 2\n" << std::flush;
graphPartitioner = std::make_unique<Partitioner>(
host, _numHosts, base_DistGraph::numGlobalNodes,
base_DistGraph::numGlobalEdges);
// TODO abstract this away somehow
graphPartitioner->saveGIDToHost(base_DistGraph::gid2host);

std::cout << "offline graph starts.. 3\n" << std::flush;
// get training nodes and split evenly among hosts
std::vector<uint32_t> trainPoints = this->getGNNBreakpoints(filename);
std::cout << "offline graph starts.. 4\n" << std::flush;
// TODO(hc)
if (!trainPoints.empty()) {
std::vector<unsigned> testDistribution =
Expand Down Expand Up @@ -359,6 +378,7 @@ class NewDistGraphGeneric : public DistGraph<NodeTy, EdgeTy> {
galois::gWarn("partitioning policy used doesn't use trainpoints");
}
}
std::cout << "offline graph starts.. 5\n" << std::flush;

// signifies how many outgoing edges a particular host should expect from
// this host
Expand All @@ -377,6 +397,7 @@ class NewDistGraphGeneric : public DistGraph<NodeTy, EdgeTy> {
hasIncomingEdge.resize(base_DistGraph::numHosts);
}

std::cout << "offline graph starts.. 6\n" << std::flush;
// phase 0

galois::gDebug("[", base_DistGraph::id, "] Starting graph reading.");
Expand All @@ -401,6 +422,7 @@ class NewDistGraphGeneric : public DistGraph<NodeTy, EdgeTy> {
host_prefix);
}

std::cout << "offline graph starts.. 7\n" << std::flush;
graphReadTimer.stop();
galois::gDebug("[", base_DistGraph::id, "] Reading graph complete.");

Expand All @@ -414,6 +436,7 @@ class NewDistGraphGeneric : public DistGraph<NodeTy, EdgeTy> {
galois::gDebug("[", base_DistGraph::id, "] Master assignment complete.");
}

std::cout << "offline graph starts.. 8\n" << std::flush;
galois::StatTimer inspectionTimer("EdgeInspection", GRNAME);
inspectionTimer.start();
bufGraph.resetReadCounters();
Expand All @@ -439,6 +462,7 @@ class NewDistGraphGeneric : public DistGraph<NodeTy, EdgeTy> {
edgeCutInspection(bufGraph, inspectionTimer, edgeOffset,
prefixSumOfEdges);
}
std::cout << "offline graph starts.. 9\n" << std::flush;
// inspection timer is stopped in edgeInspection function

// flip partitioners that have a master assignment phase to stage 2
Expand All @@ -448,6 +472,7 @@ class NewDistGraphGeneric : public DistGraph<NodeTy, EdgeTy> {
graphPartitioner->enterStage2();
}

std::cout << "offline graph starts.. 10\n" << std::flush;
// get memory back from inspection metadata
numOutgoingEdges.clear();
hasIncomingEdge.clear();
Expand All @@ -463,6 +488,7 @@ class NewDistGraphGeneric : public DistGraph<NodeTy, EdgeTy> {
base_DistGraph::numEdges);
base_DistGraph::graph.constructNodes();

std::cout << "offline graph starts.. 11\n" << std::flush;
// edge end fixing
auto& base_graph = base_DistGraph::graph;
galois::do_all(
Expand All @@ -477,10 +503,12 @@ class NewDistGraphGeneric : public DistGraph<NodeTy, EdgeTy> {
freeVector(prefixSumOfEdges); // should no longer use this variable
galois::CondStatTimer<MORE_DIST_STATS> TfillMirrors("FillMirrors", GRNAME);

std::cout << "offline graph starts.. 12\n" << std::flush;
TfillMirrors.start();
fillMirrors();
TfillMirrors.stop();

std::cout << "offline graph starts.. 13\n" << std::flush;
if (_edgeStateRounds > 1) {
// reset edge load since we need exact same answers again
resetEdgeLoad();
Expand All @@ -495,6 +523,7 @@ class NewDistGraphGeneric : public DistGraph<NodeTy, EdgeTy> {
bufGraph.resetAndFree();
}

std::cout << "offline graph starts.. 14\n" << std::flush;
// Finalization

// TODO this is a hack; fix it somehow
Expand All @@ -517,6 +546,7 @@ class NewDistGraphGeneric : public DistGraph<NodeTy, EdgeTy> {
galois::CondStatTimer<MORE_DIST_STATS> Tthread_ranges("ThreadRangesTime",
GRNAME);

std::cout << "offline graph starts.. 15\n" << std::flush;
Tthread_ranges.start();
base_DistGraph::determineThreadRanges();
Tthread_ranges.stop();
Expand All @@ -528,6 +558,7 @@ class NewDistGraphGeneric : public DistGraph<NodeTy, EdgeTy> {
Tgraph_construct.stop();
galois::gDebug("[", base_DistGraph::id, "] Graph construction complete.");

std::cout << "offline graph starts.. 16\n" << std::flush;
if (useWMD) {
// Different from the gr format file that has been used by Galois
// and does not contain node data in the file,
Expand All @@ -537,6 +568,7 @@ class NewDistGraphGeneric : public DistGraph<NodeTy, EdgeTy> {
assignNodeDataFromSHADProp(&shadConverter);
}

std::cout << "offline graph starts.. 17\n" << std::flush;
// report state rounds
if (base_DistGraph::id == 0) {
galois::runtime::reportStat_Single(GRNAME, "CuSPStateRounds",
Expand Down
11 changes: 4 additions & 7 deletions libcusp/test/shad-dist-graph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,13 +53,10 @@ int main() {
sumGlobalNodes += graph->numMasters();
sumGlobalEdges += graph->sizeEdges();

uint64_t reducedSumGlobalNodes = sumGlobalNodes.reduce();
uint64_t reducedSumGlobalEdges = sumGlobalEdges.reduce();

assert(reducedSumGlobalNodes == numNodes);
assert(reducedSumGlobalNodes == graph->globalSize());
assert(reducedSumGlobalEdges == numEdges);
assert(reducedSumGlobalEdges == graph->globalSizeEdges());
assert(sumGlobalNodes.reduce() == numNodes);
assert(sumGlobalNodes.reduce() == graph->globalSize());
assert(sumGlobalEdges.reduce() == numEdges);
assert(sumGlobalEdges.reduce() == graph->globalSizeEdges());

std::cout << "Num. nodes/edges tests has been passed\n";

Expand Down
Loading
Loading