From 83f74a6d2bc33dbe83bd12d8982bb32858281d09 Mon Sep 17 00:00:00 2001
From: jasinliu <939282975@qq.com>
Date: Fri, 2 Aug 2024 23:43:03 +0800
Subject: [PATCH] feat(spark): init graphar cli
---
maven-projects/spark/graphar-cli/README.md | 37 +++
maven-projects/spark/graphar-cli/import.json | 102 ++++++++
maven-projects/spark/graphar-cli/pom.xml | 234 ++++++++++++++++++
.../src/main/resources/log4j.properties | 43 ++++
.../scala/org/apache/graphar/cli/Main.scala | 62 +++++
.../apache/graphar/cli/commands/Check.scala | 110 ++++++++
.../apache/graphar/cli/commands/Import.scala | 160 ++++++++++++
.../apache/graphar/cli/commands/Show.scala | 65 +++++
.../org/apache/graphar/cli/util/Config.scala | 75 ++++++
.../org/apache/graphar/cli/util/Utils.scala | 37 +++
maven-projects/spark/pom.xml | 6 +
11 files changed, 931 insertions(+)
create mode 100644 maven-projects/spark/graphar-cli/README.md
create mode 100644 maven-projects/spark/graphar-cli/import.json
create mode 100644 maven-projects/spark/graphar-cli/pom.xml
create mode 100644 maven-projects/spark/graphar-cli/src/main/resources/log4j.properties
create mode 100644 maven-projects/spark/graphar-cli/src/main/scala/org/apache/graphar/cli/Main.scala
create mode 100644 maven-projects/spark/graphar-cli/src/main/scala/org/apache/graphar/cli/commands/Check.scala
create mode 100644 maven-projects/spark/graphar-cli/src/main/scala/org/apache/graphar/cli/commands/Import.scala
create mode 100644 maven-projects/spark/graphar-cli/src/main/scala/org/apache/graphar/cli/commands/Show.scala
create mode 100644 maven-projects/spark/graphar-cli/src/main/scala/org/apache/graphar/cli/util/Config.scala
create mode 100644 maven-projects/spark/graphar-cli/src/main/scala/org/apache/graphar/cli/util/Utils.scala
diff --git a/maven-projects/spark/graphar-cli/README.md b/maven-projects/spark/graphar-cli/README.md
new file mode 100644
index 000000000..73dbb1a0d
--- /dev/null
+++ b/maven-projects/spark/graphar-cli/README.md
@@ -0,0 +1,37 @@
+# GraphAr Cli Tool
+
+This is a project that depends on the GraphAr spark library.
+
+## Building
+
+To build this project, we need to use `Maven` to enable the
+`datasource` and `graphar-cli` profiles in the `spark`
+directory (that is, the parent directory of the current path).
+
+```bash
+ $ git clone https://github.com/apache/incubator-graphar.git
+ $ cd incubator-graphar
+ $ cd mavens-projects/spark
+```
+
+
+Build the package:
+
+```bash
+ $ mvn clean install -DskipTests -P datasources-32,graphar-cli
+```
+
+## Running
+
+The build produces a shaded Jar that can be run using the `spark-submit` command:
+
+```bash
+ $ cd graphar-cli/target
+ $ spark-submit --class org.apache.graphar.cli.Main graphar-cli-0.12.0-SNAPSHOT-shaded.jar
+```
+
+For a shorter command-line invocation, add an alias to your shell like this:
+
+```
+alias graphar="spark-submit --class org.apache.graphar.cli.Main /path/to/graphar-cli-0.12.0-SNAPSHOT-shaded.jar"
+```
diff --git a/maven-projects/spark/graphar-cli/import.json b/maven-projects/spark/graphar-cli/import.json
new file mode 100644
index 000000000..3a1b296f2
--- /dev/null
+++ b/maven-projects/spark/graphar-cli/import.json
@@ -0,0 +1,102 @@
+{
+ "graphar": {
+ "path": "/tmp/graphar/movie",
+ "name": "MovieGraph",
+ "vertexChunkSize": 100,
+ "edgeChunkSize": 1024,
+ "fileType": "parquet"
+ },
+ "sourceType": "csv",
+ "neo4j": {
+ "url": "bolt://localhost:7687",
+ "username": "neo4j",
+ "password": "neo4j"
+ },
+ "schema": {
+ "vertices": [
+ {
+ "label": "Person",
+ "primary": "name",
+ "properties": [
+ {
+ "name": "name",
+ "type": "string"
+ },
+ {
+ "name": "born",
+ "type": "int",
+ "nullable": true
+ }
+ ],
+ "propertyGroups": [
+ [
+ "name",
+ "born"
+ ]
+ ],
+ "source": {
+ "path": "/tmp/graphar/movie/Person.csv",
+ "delimiter": ",",
+ "columns": [
+ "name",
+ "born"
+ ]
+ }
+ },
+ {
+ "label": "Movie",
+ "primary": "title",
+ "properties": [
+ {
+ "name": "title",
+ "type": "string"
+ },
+ {
+ "name": "tagline",
+ "type": "string",
+ "nullable": true
+ }
+ ],
+ "propertyGroups": [
+ [
+ "title",
+ "tagline"
+ ]
+ ],
+ "source": {
+ "path": "/tmp/graphar/movie/Movie.csv",
+ "delimiter": ","
+ }
+ }
+ ],
+ "edges": [
+ {
+ "label": "PRODUCED",
+ "adjListType": "",
+ "srcLabel": "Person",
+ "srcProp": "name",
+ "dstLabel": "Movie",
+ "dstProp": "title",
+ "source": {
+ "path": "/tmp/graphar/movie/Produced.csv",
+ "delimiter": ","
+ }
+ },
+ {
+ "label": "REVIEWED",
+ "srcLabel": "Person",
+ "srcProp": "name",
+ "dstLabel": "Movie",
+ "dstProp": "title",
+ "properties": [
+ "rating",
+ "summary"
+ ],
+ "source": {
+ "path": "/tmp/graphar/movie/Reviewed.csv",
+ "delimiter": ","
+ }
+ }
+ ]
+ }
+}
\ No newline at end of file
diff --git a/maven-projects/spark/graphar-cli/pom.xml b/maven-projects/spark/graphar-cli/pom.xml
new file mode 100644
index 000000000..36b18228a
--- /dev/null
+++ b/maven-projects/spark/graphar-cli/pom.xml
@@ -0,0 +1,234 @@
+
+
+
+
+ 4.0.0
+
+
+ org.apache.graphar
+ spark
+ ${graphar.version}
+ ../pom.xml
+
+
+ graphar-cli
+ ${graphar.version}
+ jar
+
+
+ 1.7.30
+
+
+
+
+ info.picocli
+ picocli
+ 4.7.6
+
+
+ org.slf4j
+ slf4j-api
+ ${slf4j.version}
+
+
+ org.slf4j
+ slf4j-log4j12
+ ${slf4j.version}
+ ${deps.scope}
+
+
+ log4j
+ log4j
+ 1.2.17
+ ${deps.scope}
+
+
+ org.apache.graphar
+ graphar-commons
+ ${graphar.version}
+
+
+ org.apache.spark
+ spark-sql_${scala.binary.version}
+ ${spark.version}
+ provided
+
+
+
+
+
+
+ org.scala-tools
+ maven-scala-plugin
+ 2.15.2
+
+ ${scala.version}
+
+ -target:jvm-1.8
+
+
+ -Xss4096K
+
+
+
+
+ scala-compile
+
+ compile
+
+
+
+ META-INF/*.SF
+ META-INF/*.DSA
+ META-INF/*.RSA
+
+
+
+
+ scala-test-compile
+
+ testCompile
+
+
+
+
+
+ org.scalatest
+ scalatest-maven-plugin
+ 2.0.0
+
+
+ test
+
+ test
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-shade-plugin
+ 2.1
+
+
+ package
+
+ shade
+
+
+ false
+ true
+
+
+
+ *:*
+
+
+
+
+ *:*
+
+ META-INF/*.SF
+ META-INF/*.DSA
+ META-INF/*.RSA
+ **/log4j.properties
+
+
+
+
+
+ reference.conf
+
+
+
+
+
+
+
+ net.alchim31.maven
+ scala-maven-plugin
+ 4.8.0
+
+
+
+ compile
+ testCompile
+
+
+
+
+
+ -Xms64m
+ -Xmx1024m
+
+
+ -Ywarn-unused
+
+
+
+ org.scalameta
+ semanticdb-scalac_2.12.10
+ 4.3.24
+
+
+
+
+
+ io.github.evis
+ scalafix-maven-plugin_2.13
+ 0.1.8_0.11.0
+
+
+ org.apache.maven.plugins
+ maven-source-plugin
+
+
+ attach-sources
+
+ jar
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-javadoc-plugin
+
+
+ attach-javadocs
+
+ jar
+
+
+
+
+
+ maven-site-plugin
+ 3.7.1
+
+
+
+
\ No newline at end of file
diff --git a/maven-projects/spark/graphar-cli/src/main/resources/log4j.properties b/maven-projects/spark/graphar-cli/src/main/resources/log4j.properties
new file mode 100644
index 000000000..3eccfe10d
--- /dev/null
+++ b/maven-projects/spark/graphar-cli/src/main/resources/log4j.properties
@@ -0,0 +1,43 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+# debug log4j configuration
+#log4j.debug=true
+
+# by default, log anything but cli console to component logger
+log4j.rootLogger = WARN, component
+
+# Set the appender named console to be a ConsoleAppender
+log4j.appender.console=org.apache.log4j.ConsoleAppender
+
+# CLI console output
+log4j.logger.org.apache.graphar.cli=INFO, console
+log4j.additivity.org.apache.graphar.cli=false
+
+# Define the layout for console appender
+log4j.appender.console.layout=org.apache.log4j.PatternLayout
+log4j.appender.console.layout.ConversionPattern=%m%n
+
+# Change to turn on component logging
+log4j.appender.component=org.apache.log4j.varia.NullAppender
+
+# Define the layout for component appender
+log4j.appender.component.layout=org.apache.log4j.PatternLayout
+log4j.appender.component.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss} %-5p :: %m [%C]%n
+
diff --git a/maven-projects/spark/graphar-cli/src/main/scala/org/apache/graphar/cli/Main.scala b/maven-projects/spark/graphar-cli/src/main/scala/org/apache/graphar/cli/Main.scala
new file mode 100644
index 000000000..5cf6560f5
--- /dev/null
+++ b/maven-projects/spark/graphar-cli/src/main/scala/org/apache/graphar/cli/Main.scala
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.graphar.cli
+
+import org.apache.commons.logging.LogFactory
+import org.apache.graphar.cli.commands.{Check, Import, Show}
+import org.apache.log4j.PropertyConfigurator
+import org.slf4j.{Logger, LoggerFactory}
+import picocli.CommandLine
+import picocli.CommandLine.Command
+
+import java.util.concurrent.Callable
+
+/**
+ * Main class for the GraphAr Cli Tool.
+ */
+@Command(
+ name = "main",
+ subcommands = Array(classOf[Show], classOf[Check], classOf[Import]),
+ mixinStandardHelpOptions = true,
+ description = Array("GraphAr Cli Tool")
+)
+class Main extends Callable[Int] {
+ val console: Logger = LoggerFactory.getLogger(classOf[Main])
+ def call: Int = {
+ console.info(
+ "GraphAr Cli Tool. Use -h or --help to see available commands."
+ )
+ 1
+ }
+}
+
+object Main {
+ def main(args: Array[String]): Unit = {
+ PropertyConfigurator.configure(
+ classOf[Main].getResource("/log4j.properties")
+ )
+ LogFactory.getFactory.setAttribute(
+ "org.apache.commons.logging.Log",
+ "org.apache.commons.logging.impl.Log4JLogger"
+ )
+ val exitCode = new CommandLine(new Main()).execute(args: _*)
+ System.exit(exitCode)
+ }
+}
diff --git a/maven-projects/spark/graphar-cli/src/main/scala/org/apache/graphar/cli/commands/Check.scala b/maven-projects/spark/graphar-cli/src/main/scala/org/apache/graphar/cli/commands/Check.scala
new file mode 100644
index 000000000..096f9f148
--- /dev/null
+++ b/maven-projects/spark/graphar-cli/src/main/scala/org/apache/graphar/cli/commands/Check.scala
@@ -0,0 +1,110 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.graphar.cli.commands
+
+import org.apache.graphar.GraphInfo
+import org.apache.graphar.cli.Main
+import org.apache.spark.sql.SparkSession
+import org.slf4j.Logger
+import picocli.CommandLine.{Command, Option, ParentCommand}
+
+import java.util.concurrent.Callable
+
+/**
+ * Check the graphar meta information.
+ */
+@Command(
+ name = "show",
+ mixinStandardHelpOptions = true,
+ description = Array("Check the graphar meta information")
+)
+class Check extends Callable[Int] {
+ @ParentCommand
+ private var parent: Main = _
+
+ @Option(
+ names = Array("-p", "--path"),
+ description = Array(
+ "The path of the YAML file."
+ ),
+ required = true
+ )
+ private var path: String = _
+
+// TODO(ljj): Support check all the meta information and files.
+// @Option(
+// names = Array("-a", "--all"),
+// description = Array(
+// "Check all the meta information and files."
+// )
+// )
+// private var checkALL: Boolean = false
+
+ def call(): Int = {
+ val console: Logger = parent.console
+ val spark = SparkSession.builder
+ .appName("GraphArCli")
+ .master("local[*]")
+ .getOrCreate()
+
+ try {
+ val graphInfo = GraphInfo.loadGraphInfo(path, spark)
+ val vertexInfos = graphInfo.getVertexInfos()
+ for ((vertexLabel, vertexInfo) <- vertexInfos) {
+ if (!vertexInfo.isValidated()) {
+ console.error(s"VertexInfo ${vertexLabel} is not validated.")
+ return 0
+ }
+ console.info(s"VertexInfo ${vertexLabel} is validated.")
+ }
+ val vertexLabels = vertexInfos.keys.toList
+ val edgeInfos = graphInfo.getEdgeInfos()
+ for ((concatKeys, edgeInfo) <- edgeInfos) {
+ if (!edgeInfo.isValidated()) {
+ console.error(s"EdgeInfo ${concatKeys} is not validated.")
+ return 0
+ }
+ console.info(s"EdgeInfo ${concatKeys} is validated.")
+ val srcVertexLabel = edgeInfo.src_label
+ if (!vertexLabels.contains(srcVertexLabel)) {
+ console.error(
+ s"The src_label ${srcVertexLabel} of the edge ${concatKeys} does not exist."
+ )
+ return 0
+ }
+ val dstVertexLabel = edgeInfo.dst_label
+ if (!vertexLabels.contains(dstVertexLabel)) {
+ console.error(
+ s"The dst_label ${dstVertexLabel} of the edge ${concatKeys} does not exist."
+ )
+ return 0
+ }
+ }
+ console.info("All vertexInfos and edgeInfos are validated.")
+ spark.close()
+ 1
+ } catch {
+ case e: Exception =>
+ console.error(s"Check failed: ${e.getMessage}")
+ 0
+ }
+
+ }
+}
diff --git a/maven-projects/spark/graphar-cli/src/main/scala/org/apache/graphar/cli/commands/Import.scala b/maven-projects/spark/graphar-cli/src/main/scala/org/apache/graphar/cli/commands/Import.scala
new file mode 100644
index 000000000..25a43c779
--- /dev/null
+++ b/maven-projects/spark/graphar-cli/src/main/scala/org/apache/graphar/cli/commands/Import.scala
@@ -0,0 +1,160 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.graphar.cli.commands
+
+import org.apache.graphar.cli.Main
+import org.apache.graphar.cli.util.Config
+import org.apache.graphar.cli.util.Utils.parseStringToSparkType
+import org.apache.graphar.graph.GraphWriter
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.types.{StructField, StructType}
+import org.json4s.{DefaultFormats, JValue}
+import org.json4s.jackson.JsonMethods.parse
+import org.slf4j.Logger
+import picocli.CommandLine.{Command, Option, ParentCommand}
+
+import java.util.concurrent.Callable
+import scala.io.Source
+
+/**
+ * Read the configuration file and import data.
+ */
+@Command(
+ name = "import",
+ mixinStandardHelpOptions = true,
+ description = Array("Show the GraphAr information.")
+)
+class Import extends Callable[Int] {
+ @ParentCommand
+ private var parent: Main = _
+ @Option(
+ names = Array("-c", "--config"),
+ description = Array(
+ "The path of the import config file."
+ ),
+ required = true
+ )
+ private var confPath: String = _
+
+ def call(): Int = {
+ val console: Logger = parent.console
+ val source = Source.fromFile(confPath)
+ val confString =
+ try source.mkString
+ finally source.close()
+ val confJson: JValue = parse(confString)
+
+// TODO: validate the config file
+ implicit val formats: DefaultFormats.type = DefaultFormats
+ val config = (confJson \ "graphar").extract[Config]
+ console.info("Config read successfully.")
+
+ config.sourceType match {
+ case "csv" =>
+ val spark = SparkSession.builder
+ .appName("GraphAr Cli")
+ .master("local[*]")
+ .getOrCreate()
+ val writer: GraphWriter = new GraphWriter()
+ val schema = config.schema
+
+ for (vertex <- schema.vertices) {
+ val source = vertex.source
+ val dfSchema = StructType(
+ vertex.properties.map(property =>
+ StructField(
+ property.name,
+ parseStringToSparkType(property.`type`),
+ nullable = property.nullable.getOrElse(true)
+ )
+ )
+ )
+ val vertex_df = spark.read
+ .option("header", "true")
+ .schema(dfSchema)
+ .option("delimiter", source.delimiter)
+ .csv(source.path.get)
+
+ writer.PutVertexData(
+ vertex.label,
+ vertex_df,
+ primaryKey = vertex.primary
+ )
+ }
+
+ for (edge <- schema.edges) {
+ val source = edge.source
+ val srcStructField = StructField(
+ edge.srcProp,
+ parseStringToSparkType(
+ schema.vertices
+ .find(_.label == edge.srcLabel)
+ .get
+ .properties
+ .find(_.name == edge.srcProp)
+ .get
+ .`type`
+ )
+ )
+ val dstStructField = StructField(
+ edge.dstProp,
+ parseStringToSparkType(
+ schema.vertices
+ .find(_.label == edge.dstLabel)
+ .get
+ .properties
+ .find(_.name == edge.dstProp)
+ .get
+ .`type`
+ )
+ )
+ val dfSchema = StructType(
+ srcStructField +:
+ dstStructField +:
+ edge.properties.map(property =>
+ StructField(
+ property.name,
+ parseStringToSparkType(property.`type`),
+ nullable = property.nullable.getOrElse(true)
+ )
+ )
+ )
+ val edge_df = spark.read
+ .option("header", "true")
+ .schema(dfSchema)
+ .option("delimiter", source.delimiter)
+ .csv(source.path.get)
+
+ writer.PutEdgeData(
+ (edge.srcLabel, edge.label, edge.dstLabel),
+ edge_df
+ )
+ }
+
+ }
+
+ val spark = SparkSession.builder
+ .appName("GraphAr Cli")
+ .master("local[*]")
+ .getOrCreate()
+ spark.close()
+ 1
+ }
+}
diff --git a/maven-projects/spark/graphar-cli/src/main/scala/org/apache/graphar/cli/commands/Show.scala b/maven-projects/spark/graphar-cli/src/main/scala/org/apache/graphar/cli/commands/Show.scala
new file mode 100644
index 000000000..f4865efd5
--- /dev/null
+++ b/maven-projects/spark/graphar-cli/src/main/scala/org/apache/graphar/cli/commands/Show.scala
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.graphar.cli.commands
+
+import org.apache.graphar.GraphInfo
+import org.apache.graphar.cli.Main
+import org.apache.spark.sql.SparkSession
+import org.slf4j.Logger
+import picocli.CommandLine.{Command, Option, ParentCommand}
+
+import java.util.concurrent.Callable
+
+/**
+ * Show the graphar information.
+ */
+@Command(
+ name = "show",
+ mixinStandardHelpOptions = true,
+ description = Array("Show the GraphAr information.")
+)
+class Show extends Callable[Int] {
+ @ParentCommand
+ private var parent: Main = _
+
+ @Option(
+ names = Array("-p", "--path"),
+ description = Array(
+ "The path of the YAML file."
+ ),
+ required = true
+ )
+ private var path: String = _
+
+// TODO(ljj): Support show more information and more options.
+
+ def call(): Int = {
+ val console: Logger = parent.console
+ val spark = SparkSession
+ .builder()
+ .enableHiveSupport()
+ .master("local[*]")
+ .getOrCreate()
+ val graphInfo = GraphInfo.loadGraphInfo(path, spark)
+ console.info(graphInfo.dump())
+ spark.close()
+ 1
+ }
+}
diff --git a/maven-projects/spark/graphar-cli/src/main/scala/org/apache/graphar/cli/util/Config.scala b/maven-projects/spark/graphar-cli/src/main/scala/org/apache/graphar/cli/util/Config.scala
new file mode 100644
index 000000000..d6558bc3a
--- /dev/null
+++ b/maven-projects/spark/graphar-cli/src/main/scala/org/apache/graphar/cli/util/Config.scala
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.graphar.cli.util
+
+case class GraphAr(
+ path: String,
+ name: String,
+ vertexChunkSize: Int,
+ edgeChunkSize: Int,
+ fileType: String
+)
+
+case class Neo4j(url: String, username: String, password: String)
+
+case class Property(
+ name: String,
+ `type`: String,
+ nullable: Option[Boolean] = None
+)
+
+case class Source(
+ `type`: String,
+ path: Option[String] = None,
+ url: Option[String] = None,
+ delimiter: String = ","
+)
+
+case class Vertex(
+ label: String,
+ primary: String,
+ properties: List[Property],
+ propertyGroups: Option[List[List[String]]] = None,
+ source: Source
+)
+
+case class Edge(
+ label: String,
+ adjListType: String = "ordered_by_source",
+ srcLabel: String,
+ srcProp: String,
+ dstLabel: String,
+ dstProp: String,
+ properties: List[Property],
+ propertyGroups: Option[List[List[String]]] = None,
+ source: Source
+)
+
+case class Schema(
+ vertices: List[Vertex],
+ edges: List[Edge]
+)
+
+case class Config(
+ graphar: GraphAr,
+ sourceType: String,
+ schema: Schema,
+ neo4j: Option[Neo4j]
+)
diff --git a/maven-projects/spark/graphar-cli/src/main/scala/org/apache/graphar/cli/util/Utils.scala b/maven-projects/spark/graphar-cli/src/main/scala/org/apache/graphar/cli/util/Utils.scala
new file mode 100644
index 000000000..2a3175dd2
--- /dev/null
+++ b/maven-projects/spark/graphar-cli/src/main/scala/org/apache/graphar/cli/util/Utils.scala
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.graphar.cli.util
+
+import org.apache.spark.sql.types._
+
+object Utils {
+ def parseStringToSparkType(dataType: String): DataType = {
+ dataType match {
+ case "bool" => BooleanType
+ case "int32" => IntegerType
+ case "int64" => LongType
+ case "float" => FloatType
+ case "double" => DoubleType
+ case "string" => StringType
+ case _ =>
+ throw new IllegalArgumentException("Unknown data type: " + dataType)
+ }
+ }
+}
diff --git a/maven-projects/spark/pom.xml b/maven-projects/spark/pom.xml
index 455fb1754..54fa50465 100644
--- a/maven-projects/spark/pom.xml
+++ b/maven-projects/spark/pom.xml
@@ -77,6 +77,12 @@
datasources-33
+
+ graphar-cli
+
+ graphar-cli
+
+