Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

support general jdbc datasource #96

Merged
merged 1 commit into from
Sep 11, 2022
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
support general jdbc datasource
Nicole00 committed Sep 8, 2022

Verified

This commit was created on GitHub.com and signed with GitHub’s verified signature. The key has expired.
commit 0641da84f55d19780d171f466f342578d0fd6859
Original file line number Diff line number Diff line change
@@ -573,6 +573,7 @@ object Configs {
case "CLICKHOUSE" => SourceCategory.CLICKHOUSE
case "POSTGRESQL" => SourceCategory.POSTGRESQL
case "ORACLE" => SourceCategory.ORACLE
case "JDBC" => SourceCategory.JDBC
case _ => throw new IllegalArgumentException(s"${category} not support")
}
}
@@ -682,6 +683,46 @@ object Configs {
config.getString("table"),
getOrElse(config, "sentence", null)
)
case SourceCategory.JDBC =>
val partitionColumn =
if (config.hasPath("partitionColumn"))
Some(config.getString("partitionColumn"))
else None

val lowerBound =
if (config.hasPath("lowerBound"))
Some(config.getLong("lowerBound"))
else None

val upperBound =
if (config.hasPath("upperBound"))
Some(config.getLong("upperBound"))
else None

val numPartitions =
if (config.hasPath("numPartitions"))
Some(config.getLong("numPartitions"))
else None

val fetchSize =
if (config.hasPath("fetchSize"))
Some(config.getLong("fetchSize"))
else None

JdbcConfigEntry(
SourceCategory.JDBC,
config.getString("url"),
config.getString("driver"),
config.getString("user"),
config.getString("password"),
config.getString("table"),
partitionColumn,
lowerBound,
upperBound,
numPartitions,
fetchSize,
getOrElse(config, "sentence", null)
)
case SourceCategory.KAFKA =>
val intervalSeconds =
if (config.hasPath("interval.seconds")) config.getInt("interval.seconds")
Original file line number Diff line number Diff line change
@@ -28,6 +28,7 @@ object SourceCategory extends Enumeration {
val CLICKHOUSE = Value("CLICKHOUSE")
val POSTGRESQL = Value("POSTGRESQL")
val ORACLE = Value("ORACLE")
val JDBC = Value("JDBC")

val SOCKET = Value("SOCKET")
val KAFKA = Value("KAFKA")
@@ -312,3 +313,34 @@ case class OracleConfigEntry(override val category: SourceCategory.Value,
s"Oracle source {url:$url, driver:$driver, user:$user, passwd:$passwd, table:$table, sentence:$sentence}"
}
}

/**
* JdbcConfigEntry
*
* @param url JDBC database url of the form `jdbc:subprotocol:subname`.
* @param table Name of the table in the external database.
* @param partitionColumn the name of a column of integral type that will be used for partitioning.
* @param lowerBound the minimum value of `columnName` used to decide partition stride.
* @param upperBound the maximum value of `columnName` used to decide partition stride.
* @param numPartitions the number of partitions. This, along with `lowerBound` (inclusive),
* `upperBound` (exclusive), form partition strides for generated WHERE
* clause expressions used to split the column `columnName` evenly. When
* the input is less than 1, the number is set to 1.
*/
case class JdbcConfigEntry(override val category: SourceCategory.Value,
url: String,
driver: String,
user: String,
passwd: String,
table: String,
partitionColumn: Option[String] = None,
lowerBound: Option[Long] = None,
upperBound: Option[Long] = None,
numPartitions: Option[Long] = None,
fetchSize: Option[Long] = None,
override val sentence: String)
extends ServerDataSourceConfigEntry {
override def toString: String = {
s"Jdbc source {url:$url, driver:$driver, user:$user, passwd:$passwd, table:$table, sentence:$sentence}"
}
}
Original file line number Diff line number Diff line change
@@ -6,8 +6,8 @@
package com.vesoft.nebula.exchange

import org.apache.spark.sql.{DataFrame, SparkSession}

import java.io.File

import com.vesoft.exchange.Argument
import com.vesoft.exchange.common.{CheckPointHandler, ErrorHandler}
import com.vesoft.exchange.common.config.{
@@ -18,6 +18,7 @@ import com.vesoft.exchange.common.config.{
HBaseSourceConfigEntry,
HiveSourceConfigEntry,
JanusGraphSourceConfigEntry,
JdbcConfigEntry,
KafkaSourceConfigEntry,
MaxComputeConfigEntry,
MySQLSourceConfigEntry,
@@ -35,14 +36,15 @@ import com.vesoft.nebula.exchange.reader.{
HiveReader,
JSONReader,
JanusGraphReader,
JdbcReader,
KafkaReader,
MaxcomputeReader,
MySQLReader,
Neo4JReader,
OracleReader,
ORCReader,
PostgreSQLReader,
OracleReader,
ParquetReader,
PostgreSQLReader,
PulsarReader
}
import com.vesoft.exchange.common.processor.ReloadProcessor
@@ -321,6 +323,11 @@ object Exchange {
val reader = new OracleReader(session, oracleConfig)
Some(reader.read())
}
case SourceCategory.JDBC => {
val jdbcConfig = config.asInstanceOf[JdbcConfigEntry]
val reader = new JdbcReader(session, jdbcConfig)
Some(reader.read())
}
case _ => {
LOG.error(s"Data source ${config.category} not supported")
None
Original file line number Diff line number Diff line change
@@ -11,6 +11,7 @@ import com.vesoft.exchange.common.config.{
HBaseSourceConfigEntry,
HiveSourceConfigEntry,
JanusGraphSourceConfigEntry,
JdbcConfigEntry,
MaxComputeConfigEntry,
MySQLSourceConfigEntry,
Neo4JSourceConfigEntry,
@@ -317,3 +318,47 @@ class OracleReader(override val session: SparkSession, oracleConfig: OracleConfi
df
}
}

/**
* Jdbc reader
*/
class JdbcReader(override val session: SparkSession, jdbcConfig: JdbcConfigEntry)
extends ServerBaseReader(session, jdbcConfig.sentence) {
Class.forName(jdbcConfig.driver)
override def read(): DataFrame = {
var dfReader = session.read
.format("jdbc")
.option("url", jdbcConfig.url)
.option("dbtable", jdbcConfig.table)
.option("user", jdbcConfig.user)
.option("password", jdbcConfig.passwd)
.option("driver", jdbcConfig.driver)

if (jdbcConfig.partitionColumn.isDefined) {
dfReader.option("partitionColumn", jdbcConfig.partitionColumn.get)
}
if (jdbcConfig.numPartitions.isDefined) {
dfReader.option("numPartitions", jdbcConfig.numPartitions.get)
}
if (jdbcConfig.lowerBound.isDefined) {
dfReader.option("lowerBound", jdbcConfig.lowerBound.get)
}
if (jdbcConfig.upperBound.isDefined) {
dfReader.option("upperBound", jdbcConfig.upperBound.get)
}
if (jdbcConfig.fetchSize.isDefined) {
dfReader.option("fetchsize", jdbcConfig.fetchSize.get)
}

var df = dfReader.load()

if (jdbcConfig.sentence != null) {
val tableName = if (jdbcConfig.table.contains(".")) {
jdbcConfig.table.split("\\.")(1)
} else jdbcConfig.table
df.createOrReplaceTempView(tableName)
df = session.sql(sentence)
}
df
}
}
Original file line number Diff line number Diff line change
@@ -6,8 +6,8 @@
package com.vesoft.nebula.exchange

import org.apache.spark.sql.{DataFrame, SparkSession}

import java.io.File

import com.vesoft.exchange.Argument
import com.vesoft.exchange.common.{CheckPointHandler, ErrorHandler}
import com.vesoft.exchange.common.config.{
@@ -18,6 +18,7 @@ import com.vesoft.exchange.common.config.{
HBaseSourceConfigEntry,
HiveSourceConfigEntry,
JanusGraphSourceConfigEntry,
JdbcConfigEntry,
KafkaSourceConfigEntry,
MaxComputeConfigEntry,
MySQLSourceConfigEntry,
@@ -35,14 +36,15 @@ import com.vesoft.nebula.exchange.reader.{
HiveReader,
JSONReader,
JanusGraphReader,
JdbcReader,
KafkaReader,
MaxcomputeReader,
MySQLReader,
Neo4JReader,
OracleReader,
ORCReader,
PostgreSQLReader,
OracleReader,
ParquetReader,
PostgreSQLReader,
PulsarReader
}
import com.vesoft.exchange.common.processor.ReloadProcessor
@@ -321,6 +323,11 @@ object Exchange {
val reader = new OracleReader(session, oracleConfig)
Some(reader.read())
}
case SourceCategory.JDBC => {
val jdbcConfig = config.asInstanceOf[JdbcConfigEntry]
val reader = new JdbcReader(session, jdbcConfig)
Some(reader.read())
}
case _ => {
LOG.error(s"Data source ${config.category} not supported")
None
Original file line number Diff line number Diff line change
@@ -11,6 +11,7 @@ import com.vesoft.exchange.common.config.{
HBaseSourceConfigEntry,
HiveSourceConfigEntry,
JanusGraphSourceConfigEntry,
JdbcConfigEntry,
MaxComputeConfigEntry,
MySQLSourceConfigEntry,
Neo4JSourceConfigEntry,
@@ -375,3 +376,47 @@ class OracleReader(override val session: SparkSession, oracleConfig: OracleConfi
df
}
}

/**
* Jdbc reader
*/
class JdbcReader(override val session: SparkSession, jdbcConfig: JdbcConfigEntry)
extends ServerBaseReader(session, jdbcConfig.sentence) {
Class.forName(jdbcConfig.driver)
override def read(): DataFrame = {
var dfReader = session.read
.format("jdbc")
.option("url", jdbcConfig.url)
.option("dbtable", jdbcConfig.table)
.option("user", jdbcConfig.user)
.option("password", jdbcConfig.passwd)
.option("driver", jdbcConfig.driver)

if (jdbcConfig.partitionColumn.isDefined) {
dfReader.option("partitionColumn", jdbcConfig.partitionColumn.get)
}
if (jdbcConfig.numPartitions.isDefined) {
dfReader.option("numPartitions", jdbcConfig.numPartitions.get)
}
if (jdbcConfig.lowerBound.isDefined) {
dfReader.option("lowerBound", jdbcConfig.lowerBound.get)
}
if (jdbcConfig.upperBound.isDefined) {
dfReader.option("upperBound", jdbcConfig.upperBound.get)
}
if (jdbcConfig.fetchSize.isDefined) {
dfReader.option("fetchsize", jdbcConfig.fetchSize.get)
}

var df = dfReader.load()

if (jdbcConfig.sentence != null) {
val tableName = if (jdbcConfig.table.contains(".")) {
jdbcConfig.table.split("\\.")(1)
} else jdbcConfig.table
df.createOrReplaceTempView(tableName)
df = session.sql(sentence)
}
df
}
}
Original file line number Diff line number Diff line change
@@ -7,6 +7,7 @@ package com.vesoft.nebula.exchange

import org.apache.spark.sql.{DataFrame, SparkSession}
import java.io.File

import com.vesoft.exchange.Argument
import com.vesoft.exchange.common.{CheckPointHandler, ErrorHandler}
import com.vesoft.exchange.common.config.{
@@ -17,6 +18,7 @@ import com.vesoft.exchange.common.config.{
HBaseSourceConfigEntry,
HiveSourceConfigEntry,
JanusGraphSourceConfigEntry,
JdbcConfigEntry,
KafkaSourceConfigEntry,
MaxComputeConfigEntry,
MySQLSourceConfigEntry,
@@ -34,12 +36,13 @@ import com.vesoft.nebula.exchange.reader.{
HiveReader,
JSONReader,
JanusGraphReader,
JdbcReader,
KafkaReader,
MaxcomputeReader,
MySQLReader,
Neo4JReader,
OracleReader,
ORCReader,
OracleReader,
ParquetReader,
PostgreSQLReader,
PulsarReader
@@ -320,6 +323,11 @@ object Exchange {
val reader = new OracleReader(session, oracleConfig)
Some(reader.read())
}
case SourceCategory.JDBC => {
val jdbcConfig = config.asInstanceOf[JdbcConfigEntry]
val reader = new JdbcReader(session, jdbcConfig)
Some(reader.read())
}
case _ => {
LOG.error(s"Data source ${config.category} not supported")
None
Loading