From 12eff4c7892599154dcd2a145e206dd02c9ff5a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=9C=84=E7=A3=8A?= Date: Wed, 14 Jun 2023 13:48:38 +0800 Subject: [PATCH] add graphx bench results --- .../java/grape-graphx/performance.md | 74 +++++++++++++++++++ analytical_engine/java/performance.md | 5 ++ 2 files changed, 79 insertions(+) create mode 100644 analytical_engine/java/grape-graphx/performance.md diff --git a/analytical_engine/java/grape-graphx/performance.md b/analytical_engine/java/grape-graphx/performance.md new file mode 100644 index 000000000000..654d3cd87867 --- /dev/null +++ b/analytical_engine/java/grape-graphx/performance.md @@ -0,0 +1,74 @@ +# Performance + +We test GraphScope for GraphX in end-to-end scenarios to measure the performance improvement of graph computing on Spark GraphX. This includes: +- Graph loading: loading graphs from the file system into memory in the form of a graph +- RDD Op: transforming the graph using RDD-defined operators +- Pregel computin: running graph algorithms based on GraphX Pregel, such as SSSP, PageRank, and CC + +## Settings: + +| dataset | num of vertices | num of edges | avg degree | +|:--------------: |:---------------: |:-------------: |:-----------: | +| datagen-9_0-fb | 12,857,672 | 1,049,527,226 | 81.6 | +| com-friendster | 65,608,366 | 1,806,067,135 | 27.5 | + +The following tests are run on 4 Nodes cluster, each with 48 cores, 96 cpu. + + +## End-to-End time + +By using ORC-format files as input, the time for graph loading and converting it to ```RDD[(Long, Long)]``` is the same for GraphScope and GraphX. + +### On com-friendster +#### 256 partitions + +| Algorithm | GS Graph Loading | GraphX Graph Loading | GS Query Time | GraphX Query Time | GS E2E Time| GraphX E2E Time | Performance Gain Query | Performance Gain E2E | +|:--------------:|:--------------:|:--------------:|:--------------:|:--------------:|:--------------:|:--------------:|:--------------:|:--------------: | +|PageRank | 108s | 106s | 152s | 1129s | 260s | 1235s | 7.4x | 4.8x | +| SSSP | 108s | 106s | 31s | 164s | 139s | 270s | 5.3 | 1.9x | +| CC | 108s | 106s | 58s | 228s | 166s | 334s | 3.9x | 2x | + + +#### 320 partitions + +| Algorithm | GS Graph Loading | GraphX Graph Loading | GS Query Time | GraphX Query Time | GS E2E Time| GraphX E2E Time | Performance Gain Query | Performance Gain E2d | +|:--------------:|:--------------:|:--------------:|:--------------:|:--------------:|:--------------:|:--------------:|:--------------:|:--------------: | +| PageRank | 100s | 100s | 158s | 1089s | 268s | 1189s | 6.5x | 4.4x | +| SSSP | 100s | 100s | 31s | 156s | 131s | 256s | 5x | 2x | +| CC | 100s | 100s | 62s | 219s | 162s | 319s | 2.8x | 2x | + +#### 384 partitions +| Algorithm | GS Graph Loading | GraphX Graph Loading | GS Query Time | GraphX Query Time | GS E2E Time| GraphX E2E Time | Performance Gain Query | Performance Gain E2d | +|:--------------:|:--------------:|:--------------:|:--------------:|:--------------:|:--------------:|:--------------:|:--------------:|:--------------: | +| PageRank | 99s | 98s | 154s | 1028s | 253s | 1126s | 6.7x | 4.5x | +| SSSP | 99s | 98s | 33s | 163s | 132s | 261s | 5x | 2x | +| CC | 99s | 98s | 60s | 223s | 159s | 321s | 2.8x | 2x | + + + +### On Datagen-9_0-fb + +#### 256 partitions + +| Algorithm | GS Graph Loading | GraphX Graph Loading | GS Query Time | GraphX Query Time | GS E2E Time| GraphX E2E Time | Performance Gain Query | Performance Gain E2d | +|:--------------:|:--------------:|:--------------:|:--------------:|:--------------:|:--------------:|:--------------:|:--------------:|:--------------: | +| PageRank | 70s | 84s | 90s | 430s | 160s | 514s | 4.8x | 3.2x | +| SSSP | 70s | 84s | 14s | 45s | 84s | 129s | 3x | 1.5x | +| CC | 70s | 84s | 36s | 74s | 106s | 158s | 2x | 1.5x | + + +#### 320 partitions + +| Algorithm | GS Graph Loading | GraphX Graph Loading | GS Query Time | GraphX Query Time | GS E2E Time| GraphX E2E Time | Performance Gain Query | Performance Gain E2d | +|:--------------:|:--------------:|:--------------:|:--------------:|:--------------:|:--------------:|:--------------:|:--------------:|:--------------: | +| PageRank | 68s | 76s | 87s | 406s | 155s | 482s | 4.7x | 3.1x | +| SSSP | 68s | 76s | 13s | 40s | 81s | 116s | 3x | 1.4x | +| CC | 68s | 76s | 30s | 53s | 98s | 129s | 1.8x | 1.3x | + +#### 384 partitions + +| Algorithm | GS Graph Loading | GraphX Graph Loading | GS Query Time | GraphX Query Time | GS E2E Time| GraphX E2E Time | Performance Gain Query | Performance Gain E2d | +|:--------------:|:--------------:|:--------------:|:--------------:|:--------------:|:--------------:|:--------------:|:--------------:|:--------------: | +| PageRank | 68s | 73s | 82s | 395s | 150s | 468s | 4.8x | 3x | +| SSSP | 68s | 73s | 13s | 40s | 81s | 113s | 3x | 1.4x | +| CC | 68s | 73s | 30s | 50s | 98s | 143s | 1.7x | 1.4x | diff --git a/analytical_engine/java/performance.md b/analytical_engine/java/performance.md index d757febd648b..0e3597672cf9 100644 --- a/analytical_engine/java/performance.md +++ b/analytical_engine/java/performance.md @@ -136,3 +136,8 @@ pr_delta set to 0.85, running for 50 rounds. | C++ time | 24.15 | 12.46 | 6.59 | 3.59 | 2.11 | 1.56 | 1.53 | | Java time | 80.77 | 40.94 | 20.87 | 14.55 | 8.14 | 5.13 | 5.15 | | Java(+LLVM4JNI) time | 49.80 | 24.15 | 10.54 | 6.63 | 3.83 | 2.95 | 3.42 | + + +## Graphscope-GraphX Integration + +We also evaluate the performance of `grape-graphx`, the integration of GraphScope on Spark GraphX. See [grape-graphX performace](grape-graphx/performance.md). \ No newline at end of file