From 1846a4c6bfd397b81985528cece5ea8a9bd435a5 Mon Sep 17 00:00:00 2001 From: acezen Date: Tue, 21 May 2024 03:33:23 +0000 Subject: [PATCH] use custom schema for edge --- .../graphar/example/LdbcSample2GraphAr.scala | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/maven-projects/spark/graphar/src/main/scala/org/apache/graphar/example/LdbcSample2GraphAr.scala b/maven-projects/spark/graphar/src/main/scala/org/apache/graphar/example/LdbcSample2GraphAr.scala index 40c52da33..314cb397c 100644 --- a/maven-projects/spark/graphar/src/main/scala/org/apache/graphar/example/LdbcSample2GraphAr.scala +++ b/maven-projects/spark/graphar/src/main/scala/org/apache/graphar/example/LdbcSample2GraphAr.scala @@ -22,6 +22,12 @@ package org.apache.graphar.example import org.apache.graphar.graph.GraphWriter import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.types.{ + IntegerType, + StringType, + StructField, + StructType +} object LdbcSample2GraphAr { @@ -86,10 +92,18 @@ object LdbcSample2GraphAr { writer.PutVertexData("Person", person_df) // read edges with type "Person"->"Knows"->"Person" from given path as a DataFrame + // FIXME(@acezen): the schema should be inferred from the data + val schema = StructType( + Array( + StructField("src", IntegerType, true), + StructField("dst", IntegerType, true), + StructField("creationDate", StringType, true) + ) + ) val produced_edge_df = spark.read .option("delimiter", "|") .option("header", "true") - .option("inferSchema", "true") + .schema(schema) .format("csv") .load(personKnowsPersonInputPath) // put into writer, source vertex label is "Person", edge label is "Knows"