From 9d280ec792e7c969e25e7158b02b8593131ed265 Mon Sep 17 00:00:00 2001
From: Josh Rosen <joshrosen@databricks.com>
Date: Tue, 18 Aug 2015 13:02:20 -0700
Subject: [PATCH] Update build to test using official Amazon JDBC driver.

---
 README.md                                     | 17 +++++++++--------
 build.sbt                                     | 10 +++++++---
 .../spark/redshift/Parameters.scala           |  4 ++--
 .../spark/redshift/ParametersSuite.scala      |  8 ++++----
 .../spark/redshift/RedshiftSourceSuite.scala  | 19 +++++++++++--------
 5 files changed, 33 insertions(+), 25 deletions(-)
diff --git a/README.md b/README.md
index f14f919f..126d83e9 100644
--- a/README.md
+++ b/README.md
@@ -28,8 +28,9 @@ Further, as Redshift is an AWS product, some AWS libraries will be required. Thi
 your deployment environment will include `hadoop-aws`, or other things necessary to access S3, credentials,
 etc. Check the dependencies with "provided" scope in <tt>build.sbt</tt> if you're at all unclear.
 
-You're also going to need a JDBC driver that is compatible with Redshift. The one used for testing can be
-found in <tt>build.sbt</tt>, however Amazon recommend that you use [their driver](http://docs.aws.amazon.com/redshift/latest/mgmt/configure-jdbc-connection.html).
+You're also going to need a JDBC driver that is compatible with Redshift. Amazon recommend that you
+use [their driver](http://docs.aws.amazon.com/redshift/latest/mgmt/configure-jdbc-connection.html),
+although this library has also been successfully tested using the Postgres JDBC driver.
 
 ## Usage
 
@@ -49,7 +50,7 @@ val sqlContext = new SQLContext(sc)
 // Get some data from a Redshift table
 val df: DataFrame = sqlContext.read
     .format("com.databricks.spark.redshift")
-    .option("url", "jdbc:postgresql://redshifthost:5439/database?user=username&password=pass")
+    .option("url", "jdbc:redshift://redshifthost:5439/database?user=username&password=pass")
     .option("dbtable" -> "my_table")
     .option("tempdir" -> "s3://path/for/temp/data")
     .load()
@@ -59,7 +60,7 @@ val df: DataFrame = sqlContext.read
 
 df.write
   .format("com.databricks.spark.redshift")
-    .option("url", "jdbc:postgresql://redshifthost:5439/database?user=username&password=pass")
+    .option("url", "jdbc:redshift://redshifthost:5439/database?user=username&password=pass")
     .option("dbtable" -> "my_table_copy")
     .option("tempdir" -> "s3://path/for/temp/data")
   .mode("error")
@@ -77,7 +78,7 @@ sql_context = SQLContext(sc)
 # Read data from a table
 df = sql_context.read \
     .format("com.databricks.spark.redshift") \
-    .option("url", "jdbc:postgresql://redshifthost:5439/database?user=username&password=pass") \
+    .option("url", "jdbc:redshift://redshifthost:5439/database?user=username&password=pass") \
     .option("dbtable" -> "my_table") \
     .option("tempdir" -> "s3://path/for/temp/data") \
     .load()
@@ -85,7 +86,7 @@ df = sql_context.read \
 # Write back to a table
 df.write \
   .format("com.databricks.spark.redshift")
-  .option("url", "jdbc:postgresql://redshifthost:5439/database?user=username&password=pass") \
+  .option("url", "jdbc:redshift://redshifthost:5439/database?user=username&password=pass") \
   .option("dbtable" -> "my_table_copy") \
   .option("tempdir" -> "s3://path/for/temp/data") \
   .mode("error")
@@ -99,7 +100,7 @@ CREATE TABLE my_table
 USING com.databricks.spark.redshift
 OPTIONS (dbtable 'my_table',
          tempdir 's3://my_bucket/tmp',
-         url 'jdbc:postgresql://host:port/db?user=username&password=pass');
+         url 'jdbc:redshift://host:port/db?user=username&password=pass');
 ```
 
 ### Scala helper functions
@@ -204,7 +205,7 @@ and use that as a temp location for this data.
  <tr>
     <td><tt>jdbcdriver</tt></td>
     <td>No</td>
-    <td><tt>org.postgresql.Driver</tt></td>
+    <td><tt>com.amazon.redshift.jdbc4.Driver</tt></td>
     <td>The class name of the JDBC driver to load before JDBC operations. Must be on classpath.</td>
  </tr>
  <tr>
diff --git a/build.sbt b/build.sbt
index 918627ec..72aa96c8 100644
--- a/build.sbt
+++ b/build.sbt
@@ -33,9 +33,13 @@ libraryDependencies += "com.databricks" %% "spark-avro" % "1.0.0"
 libraryDependencies += "org.apache.avro" % "avro-mapred" % "1.7.6" % "provided" exclude("org.mortbay.jetty", "servlet-api")
 
 // A Redshift-compatible JDBC driver must be present on the classpath for spark-redshift to work.
-// For testing, we using a Postgres driver, but it is recommended that the Amazon driver is used
-// in production. See http://docs.aws.amazon.com/redshift/latest/mgmt/configure-jdbc-connection.html
-libraryDependencies += "postgresql" % "postgresql" % "8.3-606.jdbc4" % "provided"
+// The Amazon driver is recommended for production use; it can be obtained from
+// http://docs.aws.amazon.com/redshift/latest/mgmt/configure-jdbc-connection.html
+
+// A Redshift-compatible JDBC driver must be present on the classpath for spark-redshift to work.
+// For testing, we use an Amazon driver, which is available from
+// http://docs.aws.amazon.com/redshift/latest/mgmt/configure-jdbc-connection.html
+libraryDependencies += "com.amazon.redshift" % "jdbc4" % "1.1.7.1007" % "test" from "https://s3.amazonaws.com/redshift-downloads/drivers/RedshiftJDBC4-1.1.7.1007.jar"
 
 libraryDependencies += "com.google.guava" % "guava" % "14.0.1" % Test
 
diff --git a/src/main/scala/com/databricks/spark/redshift/Parameters.scala b/src/main/scala/com/databricks/spark/redshift/Parameters.scala
index 10d5a7e1..e8626e56 100644
--- a/src/main/scala/com/databricks/spark/redshift/Parameters.scala
+++ b/src/main/scala/com/databricks/spark/redshift/Parameters.scala
@@ -34,7 +34,7 @@ private [redshift] object Parameters extends Logging {
     // * sortkeyspec has no default, but is optional
     // * distkey has no default, but is optional unless using diststyle KEY
 
-    "jdbcdriver" -> "org.postgresql.Driver",
+    "jdbcdriver" -> "com.amazon.redshift.jdbc4.Driver",
     "overwrite" -> "false",
     "diststyle" -> "EVEN",
     "usestagingtable" -> "true",
@@ -101,7 +101,7 @@ private [redshift] object Parameters extends Logging {
 
     /**
      * The JDBC driver class name. This is used to make sure the driver is registered before connecting over
-     * JDBC. Default is "org.postgresql.Driver"
+     * JDBC. Default is "com.amazon.redshift.jdbc4.Driver"
      */
     def jdbcDriver = parameters("jdbcdriver")
 
diff --git a/src/test/scala/com/databricks/spark/redshift/ParametersSuite.scala b/src/test/scala/com/databricks/spark/redshift/ParametersSuite.scala
index edea5bbb..57c990c5 100644
--- a/src/test/scala/com/databricks/spark/redshift/ParametersSuite.scala
+++ b/src/test/scala/com/databricks/spark/redshift/ParametersSuite.scala
@@ -28,7 +28,7 @@ class ParametersSuite extends FunSuite with Matchers {
       Map(
         "tempdir" -> "s3://foo/bar",
         "dbtable" -> "test_table",
-        "url" -> "jdbc:postgresql://foo/bar")
+        "url" -> "jdbc:redshift://foo/bar")
 
     val mergedParams = Parameters.mergeParameters(params)
 
@@ -47,7 +47,7 @@ class ParametersSuite extends FunSuite with Matchers {
       Map(
         "tempdir" -> "s3://foo/bar",
         "dbtable" -> "test_table",
-        "url" -> "jdbc:postgresql://foo/bar")
+        "url" -> "jdbc:redshift://foo/bar")
 
     val mergedParams1 = Parameters.mergeParameters(params)
     val mergedParams2 = Parameters.mergeParameters(params)
@@ -63,8 +63,8 @@ class ParametersSuite extends FunSuite with Matchers {
       }
     }
 
-    checkMerge(Map("dbtable" -> "test_table", "url" -> "jdbc:postgresql://foo/bar"))
-    checkMerge(Map("tempdir" -> "s3://foo/bar", "url" -> "jdbc:postgresql://foo/bar"))
+    checkMerge(Map("dbtable" -> "test_table", "url" -> "jdbc:redshift://foo/bar"))
+    checkMerge(Map("tempdir" -> "s3://foo/bar", "url" -> "jdbc:redshift://foo/bar"))
     checkMerge(Map("dbtable" -> "test_table", "tempdir" -> "s3://foo/bar"))
   }
 }
diff --git a/src/test/scala/com/databricks/spark/redshift/RedshiftSourceSuite.scala b/src/test/scala/com/databricks/spark/redshift/RedshiftSourceSuite.scala
index 0cb2566f..cf79b019 100644
--- a/src/test/scala/com/databricks/spark/redshift/RedshiftSourceSuite.scala
+++ b/src/test/scala/com/databricks/spark/redshift/RedshiftSourceSuite.scala
@@ -151,7 +151,8 @@ class RedshiftSourceSuite
 
   test("DefaultSource can load Redshift UNLOAD output to a DataFrame") {
 
-    val params = Map("url" -> "jdbc:postgresql://foo/bar",
+    val params = Map(
+      "url" -> "jdbc:redshift://foo/bar",
       "tempdir" -> "tmp",
       "dbtable" -> "test_table",
       "aws_access_key_id" -> "test1",
@@ -173,7 +174,8 @@ class RedshiftSourceSuite
 
   test("DefaultSource supports simple column filtering") {
 
-    val params = Map("url" -> "jdbc:postgresql://foo/bar",
+    val params = Map(
+      "url" -> "jdbc:redshift://foo/bar",
       "tempdir" -> "tmp",
       "dbtable" -> "test_table",
       "aws_access_key_id" -> "test1",
@@ -201,7 +203,8 @@ class RedshiftSourceSuite
 
   test("DefaultSource supports user schema, pruned and filtered scans") {
 
-    val params = Map("url" -> "jdbc:postgresql://foo/bar",
+    val params = Map(
+      "url" -> "jdbc:redshift://foo/bar",
       "tempdir" -> "tmp",
       "dbtable" -> "test_table",
       "aws_access_key_id" -> "test1",
@@ -235,7 +238,7 @@ class RedshiftSourceSuite
 
     val testSqlContext = new SQLContext(sc)
 
-    val jdbcUrl = "jdbc:postgresql://foo/bar"
+    val jdbcUrl = "jdbc:redshift://foo/bar"
     val params =
       Map("url" -> jdbcUrl,
           "tempdir" -> tempDir,
@@ -284,7 +287,7 @@ class RedshiftSourceSuite
   test("Failed copies are handled gracefully when using a staging table") {
     val testSqlContext = new SQLContext(sc)
 
-    val jdbcUrl = "jdbc:postgresql://foo/bar"
+    val jdbcUrl = "jdbc:redshift://foo/bar"
     val params =
       Map("url" -> jdbcUrl,
         "tempdir" -> tempDir,
@@ -368,7 +371,7 @@ class RedshiftSourceSuite
   test("Append SaveMode doesn't destroy existing data") {
     val testSqlContext = new SQLContext(sc)
 
-    val jdbcUrl = "jdbc:postgresql://foo/bar"
+    val jdbcUrl = "jdbc:redshift://foo/bar"
     val params =
       Map("url" -> jdbcUrl,
         "tempdir" -> tempDir,
@@ -410,7 +413,7 @@ class RedshiftSourceSuite
   test("Respect SaveMode.ErrorIfExists when table exists") {
     val testSqlContext = new SQLContext(sc)
 
-    val jdbcUrl = "jdbc:postgresql://foo/bar"
+    val jdbcUrl = "jdbc:redshift://foo/bar"
     val params =
       Map("url" -> jdbcUrl,
           "tempdir" -> tempDir,
@@ -438,7 +441,7 @@ class RedshiftSourceSuite
   test("Do nothing when table exists if SaveMode = Ignore") {
     val testSqlContext = new SQLContext(sc)
 
-    val jdbcUrl = "jdbc:postgresql://foo/bar"
+    val jdbcUrl = "jdbc:redshift://foo/bar"
     val params =
       Map("url" -> jdbcUrl,
         "tempdir" -> tempDir,