🎉 Databricks destination: rename to Databricks delta lake (#13630)

* Rename databricks connector * Rename connector in the seed * Update changelog with pr id * auto-bump connector version Co-authored-by: Octavia Squidington III <octavia-squidington-iii@users.noreply.github.com>
airbytehq · Jun 9, 2022 · bb46941 · bb46941
1 parent cd4f445
commit bb46941
Show file tree

Hide file tree

Showing 7 changed files with 55 additions and 26 deletions.
diff --git a/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml b/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml
@@ -70,10 +70,10 @@
   dockerImageTag: 0.1.6
   documentationUrl: https://docs.airbyte.io/integrations/destinations/clickhouse
   releaseStage: alpha
-- name: Databricks
+- name: Databricks Delta Lake
   destinationDefinitionId: 072d5540-f236-4294-ba7c-ade8fd918496
   dockerRepository: airbyte/destination-databricks
-  dockerImageTag: 0.2.0
+  dockerImageTag: 0.2.1
   documentationUrl: https://docs.airbyte.io/integrations/destinations/databricks
   icon: databricks.svg
   releaseStage: alpha

diff --git a/airbyte-config/init/src/main/resources/seed/destination_specs.yaml b/airbyte-config/init/src/main/resources/seed/destination_specs.yaml
@@ -996,12 +996,12 @@
     - "overwrite"
     - "append"
     - "append_dedup"
-- dockerImage: "airbyte/destination-databricks:0.2.0"
+- dockerImage: "airbyte/destination-databricks:0.2.1"
   spec:
     documentationUrl: "https://docs.airbyte.io/integrations/destinations/databricks"
     connectionSpecification:
       $schema: "http://json-schema.org/draft-07/schema#"
-      title: "Databricks Destination Spec"
+      title: "Databricks Delta Lake Destination Spec"
       type: "object"
       required:
       - "accept_terms"
@@ -1017,25 +1017,29 @@
           description: "You must agree to the Databricks JDBC Driver <a href=\"https://databricks.com/jdbc-odbc-driver-license\"\
             >Terms & Conditions</a> to use this connector."
           default: false
+          order: 1
         databricks_server_hostname:
           title: "Server Hostname"
           type: "string"
           description: "Databricks Cluster Server Hostname."
           examples:
           - "abc-12345678-wxyz.cloud.databricks.com"
+          order: 2
         databricks_http_path:
           title: "HTTP Path"
           type: "string"
           description: "Databricks Cluster HTTP Path."
           examples:
           - "sql/protocolvx/o/1234567489/0000-1111111-abcd90"
+          order: 3
         databricks_port:
           title: "Port"
           type: "string"
           description: "Databricks Cluster Port."
           default: "443"
           examples:
           - "443"
+          order: 4
         databricks_personal_access_token:
           title: "Access Token"
           type: "string"
@@ -1044,6 +1048,7 @@
           examples:
           - "dapi0123456789abcdefghij0123456789AB"
           airbyte_secret: true
+          order: 5
         database_schema:
           title: "Database Schema"
           type: "string"
@@ -1053,6 +1058,7 @@
           default: "public"
           examples:
           - "public"
+          order: 6
         data_source:
           title: "Data Source"
           type: "object"
@@ -1072,20 +1078,23 @@
                 enum:
                 - "S3"
                 default: "S3"
+                order: 1
               s3_bucket_name:
                 title: "S3 Bucket Name"
                 type: "string"
                 description: "The name of the S3 bucket to use for intermittent staging\
                   \ of the data."
                 examples:
                 - "airbyte.staging"
+                order: 2
               s3_bucket_path:
                 title: "S3 Bucket Path"
                 type: "string"
                 description: "The directory under the S3 bucket where data will be\
                   \ written."
                 examples:
                 - "data_sync/test"
+                order: 3
               s3_bucket_region:
                 title: "S3 Bucket Region"
                 type: "string"
@@ -1119,6 +1128,7 @@
                 - "me-south-1"
                 - "us-gov-east-1"
                 - "us-gov-west-1"
+                order: 4
               s3_access_key_id:
                 type: "string"
                 description: "The Access Key Id granting allow one to access the above\
@@ -1128,18 +1138,22 @@
                 examples:
                 - "A012345678910EXAMPLE"
                 airbyte_secret: true
+                order: 5
               s3_secret_access_key:
                 title: "S3 Secret Access Key"
                 type: "string"
                 description: "The corresponding secret to the above access key id."
                 examples:
                 - "a012345678910ABCDEFGH/AbCdEfGhEXAMPLEKEY"
                 airbyte_secret: true
+                order: 6
+          order: 7
         purge_staging_data:
           title: "Purge Staging Files and Tables"
           type: "boolean"
           description: "Default to 'true'. Switch it to 'false' for debugging purpose."
           default: true
+          order: 8
     supportsIncremental: true
     supportsNormalization: false
     supportsDBT: false

diff --git a/airbyte-integrations/connectors/destination-databricks/BOOTSTRAP.md b/airbyte-integrations/connectors/destination-databricks/BOOTSTRAP.md
@@ -1,6 +1,6 @@
-# Databricks Destination Connector Bootstrap
+# Databricks Delta Lake Destination Connector Bootstrap
 
-The Databricks Connector enables a developer to sync data into a Databricks cluster. It does so in two steps:
+The Databricks Delta Lake Connector enables a developer to sync data into a Databricks cluster. It does so in two steps:
 
 1. Persist source data in S3 staging files in the Parquet format.
 2. Create delta table based on the Parquet staging files.

diff --git a/airbyte-integrations/connectors/destination-databricks/Dockerfile b/airbyte-integrations/connectors/destination-databricks/Dockerfile
@@ -16,5 +16,5 @@ ENV APPLICATION destination-databricks
 
 COPY --from=build /airbyte /airbyte
 
-LABEL io.airbyte.version=0.2.0
+LABEL io.airbyte.version=0.2.1
 LABEL io.airbyte.name=airbyte/destination-databricks
diff --git a/airbyte-integrations/connectors/destination-databricks/README.md b/airbyte-integrations/connectors/destination-databricks/README.md
@@ -1,4 +1,4 @@
-# Destination Databricks
+# Destination Databricks Delta Lake
 
 This is the repository for the Databricks destination connector in Java.
 For information about how to use this connector within Airbyte, see [the User Documentation](https://docs.airbyte.io/integrations/destinations/databricks).

diff --git a/airbyte-integrations/connectors/destination-databricks/src/main/resources/spec.json b/airbyte-integrations/connectors/destination-databricks/src/main/resources/spec.json
@@ -6,7 +6,7 @@
   "supported_destination_sync_modes": ["overwrite", "append"],
   "connectionSpecification": {
     "$schema": "http://json-schema.org/draft-07/schema#",
-    "title": "Databricks Destination Spec",
+    "title": "Databricks Delta Lake Destination Spec",
     "type": "object",
     "required": [
       "accept_terms",
@@ -21,40 +21,46 @@
         "title": "Agree to the Databricks JDBC Driver Terms & Conditions",
         "type": "boolean",
         "description": "You must agree to the Databricks JDBC Driver <a href=\"https://databricks.com/jdbc-odbc-driver-license\">Terms & Conditions</a> to use this connector.",
-        "default": false
+        "default": false,
+        "order": 1
       },
       "databricks_server_hostname": {
         "title": "Server Hostname",
         "type": "string",
         "description": "Databricks Cluster Server Hostname.",
-        "examples": ["abc-12345678-wxyz.cloud.databricks.com"]
+        "examples": ["abc-12345678-wxyz.cloud.databricks.com"],
+        "order": 2
       },
       "databricks_http_path": {
         "title": "HTTP Path",
         "type": "string",
         "description": "Databricks Cluster HTTP Path.",
-        "examples": ["sql/protocolvx/o/1234567489/0000-1111111-abcd90"]
+        "examples": ["sql/protocolvx/o/1234567489/0000-1111111-abcd90"],
+        "order": 3
       },
       "databricks_port": {
         "title": "Port",
         "type": "string",
         "description": "Databricks Cluster Port.",
         "default": "443",
-        "examples": ["443"]
+        "examples": ["443"],
+        "order": 4
       },
       "databricks_personal_access_token": {
         "title": "Access Token",
         "type": "string",
         "description": "Databricks Personal Access Token for making authenticated requests.",
         "examples": ["dapi0123456789abcdefghij0123456789AB"],
-        "airbyte_secret": true
+        "airbyte_secret": true,
+        "order": 5
       },
       "database_schema": {
         "title": "Database Schema",
         "type": "string",
         "description": "The default schema tables are written to if the source does not specify a namespace. Unless specifically configured, the usual value for this field is \"public\".",
         "default": "public",
-        "examples": ["public"]
+        "examples": ["public"],
+        "order": 6
       },
       "data_source": {
         "title": "Data Source",
@@ -75,19 +81,22 @@
               "data_source_type": {
                 "type": "string",
                 "enum": ["S3"],
-                "default": "S3"
+                "default": "S3",
+                "order": 1
               },
               "s3_bucket_name": {
                 "title": "S3 Bucket Name",
                 "type": "string",
                 "description": "The name of the S3 bucket to use for intermittent staging of the data.",
-                "examples": ["airbyte.staging"]
+                "examples": ["airbyte.staging"],
+                "order": 2
               },
               "s3_bucket_path": {
                 "title": "S3 Bucket Path",
                 "type": "string",
                 "description": "The directory under the S3 bucket where data will be written.",
-                "examples": ["data_sync/test"]
+                "examples": ["data_sync/test"],
+                "order": 3
               },
               "s3_bucket_region": {
                 "title": "S3 Bucket Region",
@@ -121,31 +130,36 @@
                   "me-south-1",
                   "us-gov-east-1",
                   "us-gov-west-1"
-                ]
+                ],
+                "order": 4
               },
               "s3_access_key_id": {
                 "type": "string",
                 "description": "The Access Key Id granting allow one to access the above S3 staging bucket. Airbyte requires Read and Write permissions to the given bucket.",
                 "title": "S3 Access Key ID",
                 "examples": ["A012345678910EXAMPLE"],
-                "airbyte_secret": true
+                "airbyte_secret": true,
+                "order": 5
               },
               "s3_secret_access_key": {
                 "title": "S3 Secret Access Key",
                 "type": "string",
                 "description": "The corresponding secret to the above access key id.",
                 "examples": ["a012345678910ABCDEFGH/AbCdEfGhEXAMPLEKEY"],
-                "airbyte_secret": true
+                "airbyte_secret": true,
+                "order": 6
               }
             }
           }
-        ]
+        ],
+        "order": 7
       },
       "purge_staging_data": {
         "title": "Purge Staging Files and Tables",
         "type": "boolean",
         "description": "Default to 'true'. Switch it to 'false' for debugging purpose.",
-        "default": true
+        "default": true,
+        "order": 8
       }
     }
   }

diff --git a/docs/integrations/destinations/databricks.md b/docs/integrations/destinations/databricks.md
@@ -1,8 +1,8 @@
-# Databricks
+# Databricks Delta Lake
 
 ## Overview
 
-This destination syncs data to Databricks cluster. Each stream is written to its own table.
+This destination syncs data to Databricks Delta Lake. Each stream is written to its own delta table.
 
 This connector requires a JDBC driver to connect to Databricks cluster. By using the driver and the connector, you must agree to the [JDBC ODBC driver license](https://databricks.com/jdbc-odbc-driver-license). This means that you can only use this connector to connector third party applications to Apache Spark SQL within a Databricks offering using the ODBC and/or JDBC protocols.
 
@@ -19,7 +19,7 @@ Currently, this connector requires 30+MB of memory for each stream. When syncing
 
 ## Data Source
 
-Databricks supports various cloud storage as the [data source](https://docs.databricks.com/data/data-sources/index.html). Currently, only Amazon S3 is supported.
+Databricks Delta Lake supports various cloud storage as the [data source](https://docs.databricks.com/data/data-sources/index.html). Currently, only Amazon S3 is supported by this connector.
 
 ## Configuration
 
@@ -103,6 +103,7 @@ Under the hood, an Airbyte data stream in Json schema is first converted to an A
 
 | Version | Date | Pull Request | Subject |
 | :--- | :--- | :--- | :--- |
+| 0.2.1 | 2022-06-08 | [\#13630](https://github.com/airbytehq/airbyte/pull/13630) | Rename to "Databricks Delta Lake" and add field orders in the spec. |
 | 0.2.0 | 2022-05-15 | [\#12861](https://github.com/airbytehq/airbyte/pull/12861) | Use new public Databricks JDBC driver, and open source the connector. |
 | 0.1.5 | 2022-05-04 | [\#12578](https://github.com/airbytehq/airbyte/pull/12578) | In JSON to Avro conversion, log JSON field values that do not follow Avro schema for debugging. |
 | 0.1.4 | 2022-02-14 | [\#10256](https://github.com/airbytehq/airbyte/pull/10256) | Add `-XX:+ExitOnOutOfMemoryError` JVM option |