opensearch-project · penghuo · Feb 16, 2023 · Feb 18, 2023 · Feb 20, 2023 · Feb 20, 2023
@@ -33,7 +33,8 @@
     "user/ppl/functions/string.rst",
     "user/ppl/functions/condition.rst",
     "user/ppl/functions/relevance.rst",
-    "user/ppl/functions/expressions.rst"
+    "user/ppl/functions/expressions.rst",
+    "user/ppl/admin/jdbc.rst"
   ],
   "sql_cli": [
     "user/dql/expressions.rst",

@@ -0,0 +1,80 @@
+.. highlight:: sh
+
+==============
+JDBC Connector
+==============
+
+.. rubric:: Table of contents
+
+.. contents::
+   :local:
+   :depth: 1
+
+
+Introduction
+============
+
+This page covers JDBC connector properties for dataSource configuration and the nuances associated with JDBC connector.
+
+
+JDBC Connector Properties in DataSource Configuration
+=====================================================
+JDBC Connector Properties.
+
+* ``url`` [Required].
+    * This parameters provides the URL to connect to a database instance provided endpoint.
+* ``driver`` [Required].
+    * This parameters provides the Driver to connect to a database instance provided endpoint. Only support ``org.apache.hive.jdbc.HiveDriver``
+* ``username`` [Optional].
+    * This username for basicauth.
+* ``password`` [Optional].
+    * This password for basicauth.
+
+Example dataSource configuration with basic authentications
+===========================================================
+
+No Auth ::
+
+    [{
+        "name" : "myspark",
+        "connector": "jdbc",
+        "properties" : {
+            "url" : "jdbc:hive2://localhost:10000/default",
+            "driver" : "org.apache.hive.jdbc.HiveDriver"
+        }
+    }]
+
+Basic Auth ::
+
+    [{
+        "name" : "myspark",
+        "connector": "jdbc",
+        "properties" : {
+            "url" : "jdbc:hive2://localhost:10000/default",
+            "driver" : "org.apache.hive.jdbc.HiveDriver",
+            "username" : "username",
+            "password" : "password"
+        }
+    }]
+
+PPL supported for jdbc connector
+================================
+
+JDBC Table Function
+-------------------
+JDBC datasource could execute direct SQL against the target database. The SQL must be supported by target database.
+
+Example::
+
+    os> source = myspark.jdbc('SHOW DATABASES');
+    fetched rows / total rows = 1/1
+    +-------------+
+    | namespace   |
+    |-------------|
+    | default     |
+    +-------------+
+
+Limitation
+================================
+
+* PPL command other source is not supported. for example, if user use ``source = myspark.jdbc('SHOW DATABASES') | fields namespace``, query engine will throw exception.
@@ -38,6 +38,8 @@ The query start with search command and then flowing a set of command delimited
 
   - `Prometheus Connector <admin/prometheus_connector.rst>`_
 
+  - `JDBC Connector <admin/jdbc.rst>`_
+
 * **Commands**
 
   - `Syntax <cmd/syntax.rst>`_

@@ -32,6 +32,54 @@ task bootstrap(type: Exec, dependsOn: ['cloneSqlCli']) {
 
 }
 
+String SPARK_VERSION = "spark-3.3.2"
+String SPARK_BINARY = "${SPARK_VERSION}-bin-hadoop3";
+
+task startMaster(type: SpawnProcessTask) {
+    doFirst {
+        download.run {
+            src "https://dlcdn.apache.org/spark/${SPARK_VERSION}/${SPARK_BINARY}.tgz"
+            dest new File("$projectDir/bin", "${SPARK_BINARY}.tgz")
+            overwrite false
+        }
+        copy {
+            from tarTree("$projectDir/bin/${SPARK_BINARY}.tgz")
+            into "$projectDir/bin"
+        }
+    }
+    command "$projectDir/bin/${SPARK_BINARY}/bin/spark-class org.apache.spark.deploy.master.Master -h localhost -p 7077 --webui-port 8080"
+    ready 'started'
+    pidLockFileName '.spark-master.pid.lock'
+}
+
+task startWorker(type: SpawnProcessTask, dependsOn: startMaster) {
+    command "$projectDir/bin/${SPARK_BINARY}/bin/spark-class org.apache.spark.deploy.worker.Worker spark://localhost:7077"
+    ready 'started'
+    pidLockFileName '.spark-worker.pid.lock'
+}
+
+task startThrift(type: SpawnProcessTask, dependsOn: startWorker) {
+    command "$projectDir/bin/${SPARK_BINARY}/bin/spark-submit --class org.apache.spark.sql.hive.thriftserver.HiveThriftServer2 spark://localhost:7077"
+    ready 'started'
+    pidLockFileName '.spark-thriftserver.pid.lock'
+}
+
+task stopMaster(type: KillProcessTask) {
+    pidLockFileName '.spark-master.pid.lock'
+}
+
+task stopWorker(type: KillProcessTask, dependsOn: stopMaster) {
+    pidLockFileName '.spark-worker.pid.lock'
+}
+
+task stopThrift(type: KillProcessTask, dependsOn: stopWorker) {
+    pidLockFileName '.spark-thriftserver.pid.lock'
+    doLast {
+        file("$projectDir/bin/${SPARK_BINARY}").deleteDir()
+        file("$projectDir/bin/${SPARK_BINARY}.tgz").delete()
+    }
+}
+
 task startPrometheus(type: SpawnProcessTask) {
     doFirst {
         download.run {
@@ -99,51 +147,27 @@ task stopPrometheus() {
         }
     }
 }
+
+
+clean.doLast {
+    file("$projectDir/bin/${SPARK_BINARY}").deleteDir()
+    file("$projectDir/bin/${SPARK_BINARY}.tgz").delete()
+}
+
 if(getOSFamilyType() != "windows") {
     stopPrometheus.mustRunAfter startPrometheus
-    startOpenSearch.dependsOn startPrometheus
-    stopOpenSearch.finalizedBy stopPrometheus
+    stopThrift.mustRunAfter startThrift
+    startOpenSearch.dependsOn(startPrometheus, startThrift)
+    stopOpenSearch.finalizedBy(stopPrometheus, stopThrift)
 }
 doctest.dependsOn startOpenSearch
 doctest.finalizedBy stopOpenSearch
 check.dependsOn doctest
 clean.dependsOn(cleanBootstrap)
 
-// 2.0.0-alpha1-SNAPSHOT -> 2.0.0.0-alpha1-SNAPSHOT
-String opensearch_no_snapshot = opensearch_version.replace('-SNAPSHOT', '')
-String[] version_tokens = opensearch_no_snapshot.tokenize('-')
-String opensearch_build = version_tokens[0] + '.0'
-if (version_tokens.length > 1) {
-    opensearch_build += '-' + version_tokens[1]
-}
-String mlCommonsRemoteFile = 'https://ci.opensearch.org/ci/dbc/distribution-build-opensearch/' + opensearch_no_snapshot + '/latest/linux/x64/tar/builds/opensearch/plugins/opensearch-ml-' + opensearch_build + '.zip'
-String mlCommonsPlugin = 'opensearch-ml'
-
 testClusters {
     docTestCluster {
         keystore 'plugins.query.federation.datasources.config', new File("$projectDir/datasource", 'datasources.json')
-        // Disable loading of `ML-commons` plugin, because it might be unavailable (not released yet).
-        /*
-        plugin(provider(new Callable<RegularFile>(){
-            @Override
-            RegularFile call() throws Exception {
-                return new RegularFile() {
-                    @Override
-                    File getAsFile() {
-                        File dir = new File('./doctest/' + mlCommonsPlugin)
-                        if (!dir.exists()) {
-                            dir.mkdirs()
-                        }
-                        File f = new File(dir, mlCommonsPlugin + '-' + opensearch_build + '.zip')
-                        if (!f.exists()) {
-                            new URL(mlCommonsRemoteFile).withInputStream{ ins -> f.withOutputStream{ it << ins } }
-                        }
-                        return fileTree(mlCommonsPlugin).getSingleFile()
-                    }
-                }
-            }
-        }))
-        */
         plugin ':opensearch-sql-plugin'
         testDistribution = 'integ_test'
     }

@@ -5,5 +5,13 @@
     "properties" : {
       "prometheus.uri" : "http://localhost:9090"
     }
+  },
+  {
+    "name" : "myspark",
+    "connector": "jdbc",
+    "properties" : {
+      "url" : "jdbc:hive2://localhost:10000/default",
+      "driver": "org.apache.hive.jdbc.HiveDriver"
+    }
   }
-]
+]
@@ -65,7 +65,7 @@ configurations.all {
     resolutionStrategy.force 'junit:junit:4.13.2'
     resolutionStrategy.force "commons-logging:commons-logging:1.2"
     // enforce 1.1.3, https://www.whitesourcesoftware.com/vulnerability-database/WS-2019-0379
-    resolutionStrategy.force 'commons-codec:commons-codec:1.13'
+    resolutionStrategy.force 'commons-codec:commons-codec:1.15'
     resolutionStrategy.force 'com.google.guava:guava:31.0.1-jre'
     resolutionStrategy.force "com.fasterxml.jackson.core:jackson-core:${versions.jackson}"
     resolutionStrategy.force "com.fasterxml.jackson.dataformat:jackson-dataformat-yaml:${versions.jackson}"
@@ -78,6 +78,7 @@ configurations.all {
     resolutionStrategy.force "org.apache.httpcomponents:httpcore:4.4.13"
     resolutionStrategy.force "joda-time:joda-time:2.10.12"
     resolutionStrategy.force "org.slf4j:slf4j-api:1.7.36"
+    resolutionStrategy.force "org.apache.httpcomponents:httpclient:4.5.13"
 }
 
 configurations {

@@ -0,0 +1,118 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+plugins {
+    id 'java-library'
+    id "io.freefair.lombok"
+    id 'jacoco'
+    id 'info.solidsoft.pitest' version '1.9.0'
+}
+
+dependencies {
+    implementation project(':core')
+    implementation project(':common')
+    runtimeOnly('org.apache.hive:hive-jdbc:3.1.3') {
+        exclude group: "org.apache.hadoop", module: "hadoop-common"
+        exclude group: 'org.apache.zookeeper'
+        exclude group: "org.apache.hive", module: "hive-metastore"
+        exclude group: "org.apache.hive", module: "hive-shims"
+        exclude group: 'org.apache.hive', module: 'hive-llap-server'
+        exclude group: 'org.apache.hive', module: 'hive-upgrade-acid'
+
+        // exclude for resolving version conflict
+        exclude group: 'org.apache.httpcomponents', module: 'httpcore'
+        exclude group: 'org.apache.httpcomponents', module: 'httpclient'
+
+        // exclude for include with transitive = false
+        exclude group: "org.apache.hive", module: "hive-common"
+        exclude group: "org.apache.hive", module: "hive-service"
+        exclude group: "org.apache.hive", module: "hive-serde"
+        exclude group: 'org.apache.hive', module: 'hive-service-rpc'
+
+        // exclude because of CVE-2019-0205
+        exclude group: 'org.apache.thrift', module: 'libthrift'
+    }
+    runtimeOnly('org.apache.httpcomponents:httpcore:4.4.12')
+    runtimeOnly('org.apache.httpcomponents:httpclient:4.5.13') {
+        exclude group: 'commons-codec', module: 'commons-codec'
+    }
+    runtimeOnly('org.apache.hive:hive-service:3.1.3') {
+        transitive = false
+    }
+    runtimeOnly('org.apache.hive:hive-serde:3.1.3') {
+        transitive = false
+    }
+    runtimeOnly('org.apache.hive:hive-common:3.1.3') {
+        transitive = false
+    }
+    runtimeOnly('org.apache.hive:hive-service-rpc:3.1.3') {
+        transitive = false
+    }
+    runtimeOnly('commons-codec:commons-codec:1.13') {
+        transitive = false
+    }
+    runtimeOnly('org.apache.thrift:libthrift:0.18.1')
+    runtimeOnly 'commons-lang:commons-lang:2.6'
+
+    testImplementation('org.junit.jupiter:junit-jupiter:5.6.2')
+    testImplementation group: 'org.hamcrest', name: 'hamcrest-library', version: '2.1'
+    testImplementation group: 'org.mockito', name: 'mockito-core', version: '3.12.4'
+    testImplementation group: 'org.mockito', name: 'mockito-junit-jupiter', version: '3.12.4'
+}
+
+pitest {
+    targetClasses = ['org.opensearch.sql.*']
+    pitestVersion = '1.9.0'
+    threads = 4
+    outputFormats = ['HTML', 'XML']
+    timestampedReports = false
+    junit5PluginVersion = '1.0.0'
+}
+
+test {
+    useJUnitPlatform()
+    testLogging {
+        events "skipped", "failed"
+        exceptionFormat "full"
+    }
+}
+
+jacocoTestReport {
+    reports {
+        html.enabled true
+        xml.enabled true
+    }
+    afterEvaluate {
+        classDirectories.setFrom(files(classDirectories.files.collect {
+            fileTree(dir: it)
+        }))
+    }
+}
+test.finalizedBy(project.tasks.jacocoTestReport)
+
+jacocoTestCoverageVerification {
+    violationRules {
+        rule {
+            element = 'CLASS'
+            excludes = [
+            ]
+            limit {
+                counter = 'LINE'
+                minimum = 1.0
+            }
+            limit {
+                counter = 'BRANCH'
+                minimum = 1.0
+            }
+        }
+    }
+    afterEvaluate {
+        classDirectories.setFrom(files(classDirectories.files.collect {
+            fileTree(dir: it)
+        }))
+    }
+}
+check.dependsOn jacocoTestCoverageVerification
+jacocoTestCoverageVerification.dependsOn jacocoTestReport