cherry pick fix data structure for nebula datasource (#33)

vesoft-inc · Jan 6, 2022 · 0dc354c · 0dc354c
1 parent 92940e6
commit 0dc354c
Show file tree

Hide file tree

Showing 2 changed files with 6 additions and 4 deletions.
diff --git a/nebula-algorithm/src/main/scala/com/vesoft/nebula/algorithm/config/Configs.scala b/nebula-algorithm/src/main/scala/com/vesoft/nebula/algorithm/config/Configs.scala
@@ -197,6 +197,8 @@ case class NebulaReadConfigEntry(address: String = "",
                                  space: String = "",
                                  labels: List[String] = List(),
                                  weightCols: List[String] = List()) {
+  assert(weightCols.isEmpty || labels.size == weightCols.size,
+         "weightCols must be empty or has the same amount values with labels")
   override def toString: String = {
     s"NebulaReadConfigEntry: " +
       s"{address: $address, space: $space, labels: ${labels.mkString(",")}, " +

diff --git a/nebula-algorithm/src/main/scala/com/vesoft/nebula/algorithm/reader/DataReader.scala b/nebula-algorithm/src/main/scala/com/vesoft/nebula/algorithm/reader/DataReader.scala
@@ -48,11 +48,11 @@ class NebulaReader(spark: SparkSession, configs: Configs, partitionNum: String)
         .withReturnCols(returnCols.toList)
         .withPartitionNum(partition)
         .build()
-      if (dataset == null) {
-        dataset = spark.read.nebula(config, nebulaReadEdgeConfig).loadEdgesToDF()
-      } else {
-        dataset = dataset.union(spark.read.nebula(config, nebulaReadEdgeConfig).loadEdgesToDF())
+      var df = spark.read.nebula(config, nebulaReadEdgeConfig).loadEdgesToDF()
+      if (weights.nonEmpty) {
+        df = df.select("_srcId", "_dstId", weights(i))
       }
+      dataset = if (dataset == null) df else dataset.union(df)
     }
     dataset
   }