Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add JDBC datasource #1361

Closed
wants to merge 16 commits into from
3 changes: 2 additions & 1 deletion docs/category.json
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@
"user/ppl/functions/string.rst",
"user/ppl/functions/condition.rst",
"user/ppl/functions/relevance.rst",
"user/ppl/functions/expressions.rst"
"user/ppl/functions/expressions.rst",
"user/ppl/admin/jdbc.rst"
],
"sql_cli": [
"user/dql/expressions.rst",
Expand Down
80 changes: 80 additions & 0 deletions docs/user/ppl/admin/jdbc.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
.. highlight:: sh

==============
JDBC Connector
==============

.. rubric:: Table of contents

.. contents::
:local:
:depth: 1


Introduction
============

This page covers JDBC connector properties for dataSource configuration and the nuances associated with JDBC connector.


JDBC Connector Properties in DataSource Configuration
=====================================================
JDBC Connector Properties.

* ``url`` [Required].
* This parameters provides the URL to connect to a database instance provided endpoint.
* ``driver`` [Required].
* This parameters provides the Driver to connect to a database instance provided endpoint. Only support ``org.apache.hive.jdbc.HiveDriver``
* ``username`` [Optional].
* This username for basicauth.
* ``password`` [Optional].
* This password for basicauth.

Example dataSource configuration with basic authentications
===========================================================

No Auth ::

[{
"name" : "myspark",
"connector": "jdbc",
"properties" : {
"url" : "jdbc:hive2://localhost:10000/default",
"driver" : "org.apache.hive.jdbc.HiveDriver"
}
}]

Basic Auth ::

[{
"name" : "myspark",
"connector": "jdbc",
"properties" : {
"url" : "jdbc:hive2://localhost:10000/default",
"driver" : "org.apache.hive.jdbc.HiveDriver",
"username" : "username",
"password" : "password"
}
}]

PPL supported for jdbc connector
================================

JDBC Table Function
-------------------
JDBC datasource could execute direct SQL against the target database. The SQL must be supported by target database.

Example::

os> source = myspark.jdbc('SHOW DATABASES');
fetched rows / total rows = 1/1
+-------------+
| namespace |
|-------------|
| default |
+-------------+

Limitation
================================

* PPL command other source is not supported. for example, if user use ``source = myspark.jdbc('SHOW DATABASES') | fields namespace``, query engine will throw exception.
2 changes: 2 additions & 0 deletions docs/user/ppl/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ The query start with search command and then flowing a set of command delimited

- `Prometheus Connector <admin/prometheus_connector.rst>`_

- `JDBC Connector <admin/jdbc.rst>`_

* **Commands**

- `Syntax <cmd/syntax.rst>`_
Expand Down
92 changes: 58 additions & 34 deletions doctest/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,54 @@ task bootstrap(type: Exec, dependsOn: ['cloneSqlCli']) {

}

String SPARK_VERSION = "spark-3.3.2"
String SPARK_BINARY = "${SPARK_VERSION}-bin-hadoop3";

task startMaster(type: SpawnProcessTask) {
doFirst {
download.run {
src "https://dlcdn.apache.org/spark/${SPARK_VERSION}/${SPARK_BINARY}.tgz"
dest new File("$projectDir/bin", "${SPARK_BINARY}.tgz")
overwrite false
}
copy {
from tarTree("$projectDir/bin/${SPARK_BINARY}.tgz")
into "$projectDir/bin"
}
}
command "$projectDir/bin/${SPARK_BINARY}/bin/spark-class org.apache.spark.deploy.master.Master -h localhost -p 7077 --webui-port 8080"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it cross-platform?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To piggy-back on this, is it possible to run Spark in Docker?

ready 'started'
pidLockFileName '.spark-master.pid.lock'
}

task startWorker(type: SpawnProcessTask, dependsOn: startMaster) {
command "$projectDir/bin/${SPARK_BINARY}/bin/spark-class org.apache.spark.deploy.worker.Worker spark://localhost:7077"
ready 'started'
pidLockFileName '.spark-worker.pid.lock'
}

task startThrift(type: SpawnProcessTask, dependsOn: startWorker) {
command "$projectDir/bin/${SPARK_BINARY}/bin/spark-submit --class org.apache.spark.sql.hive.thriftserver.HiveThriftServer2 spark://localhost:7077"
ready 'started'
pidLockFileName '.spark-thriftserver.pid.lock'
}

task stopMaster(type: KillProcessTask) {
pidLockFileName '.spark-master.pid.lock'
}

task stopWorker(type: KillProcessTask, dependsOn: stopMaster) {
pidLockFileName '.spark-worker.pid.lock'
}

task stopThrift(type: KillProcessTask, dependsOn: stopWorker) {
pidLockFileName '.spark-thriftserver.pid.lock'
doLast {
file("$projectDir/bin/${SPARK_BINARY}").deleteDir()
file("$projectDir/bin/${SPARK_BINARY}.tgz").delete()
}
}

task startPrometheus(type: SpawnProcessTask) {
doFirst {
download.run {
Expand Down Expand Up @@ -99,51 +147,27 @@ task stopPrometheus() {
}
}
}


clean.doLast {
file("$projectDir/bin/${SPARK_BINARY}").deleteDir()
file("$projectDir/bin/${SPARK_BINARY}.tgz").delete()
}

if(getOSFamilyType() != "windows") {
stopPrometheus.mustRunAfter startPrometheus
startOpenSearch.dependsOn startPrometheus
stopOpenSearch.finalizedBy stopPrometheus
stopThrift.mustRunAfter startThrift
startOpenSearch.dependsOn(startPrometheus, startThrift)
stopOpenSearch.finalizedBy(stopPrometheus, stopThrift)
}
doctest.dependsOn startOpenSearch
doctest.finalizedBy stopOpenSearch
check.dependsOn doctest
clean.dependsOn(cleanBootstrap)

// 2.0.0-alpha1-SNAPSHOT -> 2.0.0.0-alpha1-SNAPSHOT
String opensearch_no_snapshot = opensearch_version.replace('-SNAPSHOT', '')
String[] version_tokens = opensearch_no_snapshot.tokenize('-')
String opensearch_build = version_tokens[0] + '.0'
if (version_tokens.length > 1) {
opensearch_build += '-' + version_tokens[1]
}
String mlCommonsRemoteFile = 'https://ci.opensearch.org/ci/dbc/distribution-build-opensearch/' + opensearch_no_snapshot + '/latest/linux/x64/tar/builds/opensearch/plugins/opensearch-ml-' + opensearch_build + '.zip'
String mlCommonsPlugin = 'opensearch-ml'

testClusters {
docTestCluster {
keystore 'plugins.query.federation.datasources.config', new File("$projectDir/datasource", 'datasources.json')
// Disable loading of `ML-commons` plugin, because it might be unavailable (not released yet).
/*
Yury-Fridlyand marked this conversation as resolved.
Show resolved Hide resolved
plugin(provider(new Callable<RegularFile>(){
@Override
RegularFile call() throws Exception {
return new RegularFile() {
@Override
File getAsFile() {
File dir = new File('./doctest/' + mlCommonsPlugin)
if (!dir.exists()) {
dir.mkdirs()
}
File f = new File(dir, mlCommonsPlugin + '-' + opensearch_build + '.zip')
if (!f.exists()) {
new URL(mlCommonsRemoteFile).withInputStream{ ins -> f.withOutputStream{ it << ins } }
}
return fileTree(mlCommonsPlugin).getSingleFile()
}
}
}
}))
*/
plugin ':opensearch-sql-plugin'
testDistribution = 'integ_test'
}
Expand Down
10 changes: 9 additions & 1 deletion doctest/datasource/datasources.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,13 @@
"properties" : {
"prometheus.uri" : "http://localhost:9090"
}
},
{
"name" : "myspark",
"connector": "jdbc",
"properties" : {
"url" : "jdbc:hive2://localhost:10000/default",
"driver": "org.apache.hive.jdbc.HiveDriver"
}
}
]
]
3 changes: 2 additions & 1 deletion integ-test/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ configurations.all {
resolutionStrategy.force 'junit:junit:4.13.2'
resolutionStrategy.force "commons-logging:commons-logging:1.2"
// enforce 1.1.3, https://www.whitesourcesoftware.com/vulnerability-database/WS-2019-0379
resolutionStrategy.force 'commons-codec:commons-codec:1.13'
resolutionStrategy.force 'commons-codec:commons-codec:1.15'
resolutionStrategy.force 'com.google.guava:guava:31.0.1-jre'
resolutionStrategy.force "com.fasterxml.jackson.core:jackson-core:${versions.jackson}"
resolutionStrategy.force "com.fasterxml.jackson.dataformat:jackson-dataformat-yaml:${versions.jackson}"
Expand All @@ -78,6 +78,7 @@ configurations.all {
resolutionStrategy.force "org.apache.httpcomponents:httpcore:4.4.13"
resolutionStrategy.force "joda-time:joda-time:2.10.12"
resolutionStrategy.force "org.slf4j:slf4j-api:1.7.36"
resolutionStrategy.force "org.apache.httpcomponents:httpclient:4.5.13"
}

configurations {
Expand Down
118 changes: 118 additions & 0 deletions jdbc/build.gradle
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

plugins {
id 'java-library'
id "io.freefair.lombok"
id 'jacoco'
id 'info.solidsoft.pitest' version '1.9.0'
}

dependencies {
implementation project(':core')
implementation project(':common')
runtimeOnly('org.apache.hive:hive-jdbc:3.1.3') {
exclude group: "org.apache.hadoop", module: "hadoop-common"
exclude group: 'org.apache.zookeeper'
exclude group: "org.apache.hive", module: "hive-metastore"
exclude group: "org.apache.hive", module: "hive-shims"
exclude group: 'org.apache.hive', module: 'hive-llap-server'
exclude group: 'org.apache.hive', module: 'hive-upgrade-acid'

// exclude for resolving version conflict
exclude group: 'org.apache.httpcomponents', module: 'httpcore'
exclude group: 'org.apache.httpcomponents', module: 'httpclient'

// exclude for include with transitive = false
exclude group: "org.apache.hive", module: "hive-common"
exclude group: "org.apache.hive", module: "hive-service"
exclude group: "org.apache.hive", module: "hive-serde"
exclude group: 'org.apache.hive', module: 'hive-service-rpc'

// exclude because of CVE-2019-0205
exclude group: 'org.apache.thrift', module: 'libthrift'
}
runtimeOnly('org.apache.httpcomponents:httpcore:4.4.12')
runtimeOnly('org.apache.httpcomponents:httpclient:4.5.13') {
exclude group: 'commons-codec', module: 'commons-codec'
}
runtimeOnly('org.apache.hive:hive-service:3.1.3') {
transitive = false
}
runtimeOnly('org.apache.hive:hive-serde:3.1.3') {
transitive = false
}
runtimeOnly('org.apache.hive:hive-common:3.1.3') {
transitive = false
}
runtimeOnly('org.apache.hive:hive-service-rpc:3.1.3') {
transitive = false
}
runtimeOnly('commons-codec:commons-codec:1.13') {
transitive = false
}
runtimeOnly('org.apache.thrift:libthrift:0.18.1')
runtimeOnly 'commons-lang:commons-lang:2.6'

testImplementation('org.junit.jupiter:junit-jupiter:5.6.2')
testImplementation group: 'org.hamcrest', name: 'hamcrest-library', version: '2.1'
testImplementation group: 'org.mockito', name: 'mockito-core', version: '3.12.4'
testImplementation group: 'org.mockito', name: 'mockito-junit-jupiter', version: '3.12.4'
}

pitest {
targetClasses = ['org.opensearch.sql.*']
pitestVersion = '1.9.0'
threads = 4
outputFormats = ['HTML', 'XML']
timestampedReports = false
junit5PluginVersion = '1.0.0'
}

test {
useJUnitPlatform()
testLogging {
events "skipped", "failed"
exceptionFormat "full"
}
}

jacocoTestReport {
reports {
html.enabled true
xml.enabled true
}
afterEvaluate {
classDirectories.setFrom(files(classDirectories.files.collect {
fileTree(dir: it)
}))
}
}
test.finalizedBy(project.tasks.jacocoTestReport)

jacocoTestCoverageVerification {
violationRules {
rule {
element = 'CLASS'
excludes = [
]
limit {
counter = 'LINE'
minimum = 1.0
}
limit {
counter = 'BRANCH'
minimum = 1.0
}
}
}
afterEvaluate {
classDirectories.setFrom(files(classDirectories.files.collect {
fileTree(dir: it)
}))
}
}
check.dependsOn jacocoTestCoverageVerification
jacocoTestCoverageVerification.dependsOn jacocoTestReport
Loading