diff --git a/README.md b/README.md
index f14f919f..126d83e9 100644
--- a/README.md
+++ b/README.md
@@ -28,8 +28,9 @@ Further, as Redshift is an AWS product, some AWS libraries will be required. Thi
your deployment environment will include `hadoop-aws`, or other things necessary to access S3, credentials,
etc. Check the dependencies with "provided" scope in build.sbt if you're at all unclear.
-You're also going to need a JDBC driver that is compatible with Redshift. The one used for testing can be
-found in build.sbt, however Amazon recommend that you use [their driver](http://docs.aws.amazon.com/redshift/latest/mgmt/configure-jdbc-connection.html).
+You're also going to need a JDBC driver that is compatible with Redshift. Amazon recommend that you
+use [their driver](http://docs.aws.amazon.com/redshift/latest/mgmt/configure-jdbc-connection.html),
+although this library has also been successfully tested using the Postgres JDBC driver.
## Usage
@@ -49,7 +50,7 @@ val sqlContext = new SQLContext(sc)
// Get some data from a Redshift table
val df: DataFrame = sqlContext.read
.format("com.databricks.spark.redshift")
- .option("url", "jdbc:postgresql://redshifthost:5439/database?user=username&password=pass")
+ .option("url", "jdbc:redshift://redshifthost:5439/database?user=username&password=pass")
.option("dbtable" -> "my_table")
.option("tempdir" -> "s3://path/for/temp/data")
.load()
@@ -59,7 +60,7 @@ val df: DataFrame = sqlContext.read
df.write
.format("com.databricks.spark.redshift")
- .option("url", "jdbc:postgresql://redshifthost:5439/database?user=username&password=pass")
+ .option("url", "jdbc:redshift://redshifthost:5439/database?user=username&password=pass")
.option("dbtable" -> "my_table_copy")
.option("tempdir" -> "s3://path/for/temp/data")
.mode("error")
@@ -77,7 +78,7 @@ sql_context = SQLContext(sc)
# Read data from a table
df = sql_context.read \
.format("com.databricks.spark.redshift") \
- .option("url", "jdbc:postgresql://redshifthost:5439/database?user=username&password=pass") \
+ .option("url", "jdbc:redshift://redshifthost:5439/database?user=username&password=pass") \
.option("dbtable" -> "my_table") \
.option("tempdir" -> "s3://path/for/temp/data") \
.load()
@@ -85,7 +86,7 @@ df = sql_context.read \
# Write back to a table
df.write \
.format("com.databricks.spark.redshift")
- .option("url", "jdbc:postgresql://redshifthost:5439/database?user=username&password=pass") \
+ .option("url", "jdbc:redshift://redshifthost:5439/database?user=username&password=pass") \
.option("dbtable" -> "my_table_copy") \
.option("tempdir" -> "s3://path/for/temp/data") \
.mode("error")
@@ -99,7 +100,7 @@ CREATE TABLE my_table
USING com.databricks.spark.redshift
OPTIONS (dbtable 'my_table',
tempdir 's3://my_bucket/tmp',
- url 'jdbc:postgresql://host:port/db?user=username&password=pass');
+ url 'jdbc:redshift://host:port/db?user=username&password=pass');
```
### Scala helper functions
@@ -204,7 +205,7 @@ and use that as a temp location for this data.
| jdbcdriver |
No |
- org.postgresql.Driver |
+ com.amazon.redshift.jdbc4.Driver |
The class name of the JDBC driver to load before JDBC operations. Must be on classpath. |
diff --git a/build.sbt b/build.sbt
index 918627ec..72aa96c8 100644
--- a/build.sbt
+++ b/build.sbt
@@ -33,9 +33,13 @@ libraryDependencies += "com.databricks" %% "spark-avro" % "1.0.0"
libraryDependencies += "org.apache.avro" % "avro-mapred" % "1.7.6" % "provided" exclude("org.mortbay.jetty", "servlet-api")
// A Redshift-compatible JDBC driver must be present on the classpath for spark-redshift to work.
-// For testing, we using a Postgres driver, but it is recommended that the Amazon driver is used
-// in production. See http://docs.aws.amazon.com/redshift/latest/mgmt/configure-jdbc-connection.html
-libraryDependencies += "postgresql" % "postgresql" % "8.3-606.jdbc4" % "provided"
+// The Amazon driver is recommended for production use; it can be obtained from
+// http://docs.aws.amazon.com/redshift/latest/mgmt/configure-jdbc-connection.html
+
+// A Redshift-compatible JDBC driver must be present on the classpath for spark-redshift to work.
+// For testing, we use an Amazon driver, which is available from
+// http://docs.aws.amazon.com/redshift/latest/mgmt/configure-jdbc-connection.html
+libraryDependencies += "com.amazon.redshift" % "jdbc4" % "1.1.7.1007" % "test" from "https://s3.amazonaws.com/redshift-downloads/drivers/RedshiftJDBC4-1.1.7.1007.jar"
libraryDependencies += "com.google.guava" % "guava" % "14.0.1" % Test
diff --git a/src/main/scala/com/databricks/spark/redshift/Parameters.scala b/src/main/scala/com/databricks/spark/redshift/Parameters.scala
index a2121a4a..0477f972 100644
--- a/src/main/scala/com/databricks/spark/redshift/Parameters.scala
+++ b/src/main/scala/com/databricks/spark/redshift/Parameters.scala
@@ -35,7 +35,7 @@ private[redshift] object Parameters extends Logging {
// * sortkeyspec has no default, but is optional
// * distkey has no default, but is optional unless using diststyle KEY
- "jdbcdriver" -> "org.postgresql.Driver",
+ "jdbcdriver" -> "com.amazon.redshift.jdbc4.Driver",
"overwrite" -> "false",
"diststyle" -> "EVEN",
"usestagingtable" -> "true",
@@ -100,7 +100,7 @@ private[redshift] object Parameters extends Logging {
/**
* The JDBC driver class name. This is used to make sure the driver is registered before
- * connecting over JDBC. Default is "org.postgresql.Driver"
+ * connecting over JDBC. Default is "com.amazon.redshift.jdbc4.Driver"
*/
def jdbcDriver: String = parameters("jdbcdriver")
diff --git a/src/test/scala/com/databricks/spark/redshift/ParametersSuite.scala b/src/test/scala/com/databricks/spark/redshift/ParametersSuite.scala
index c55f8363..307e4f83 100644
--- a/src/test/scala/com/databricks/spark/redshift/ParametersSuite.scala
+++ b/src/test/scala/com/databricks/spark/redshift/ParametersSuite.scala
@@ -27,7 +27,7 @@ class ParametersSuite extends FunSuite with Matchers {
val params = Map(
"tempdir" -> "s3://foo/bar",
"dbtable" -> "test_table",
- "url" -> "jdbc:postgresql://foo/bar")
+ "url" -> "jdbc:redshift://foo/bar")
val mergedParams = Parameters.mergeParameters(params)
@@ -45,7 +45,7 @@ class ParametersSuite extends FunSuite with Matchers {
val params = Map(
"tempdir" -> "s3://foo/bar",
"dbtable" -> "test_table",
- "url" -> "jdbc:postgresql://foo/bar")
+ "url" -> "jdbc:redshift://foo/bar")
val mergedParams1 = Parameters.mergeParameters(params)
val mergedParams2 = Parameters.mergeParameters(params)
@@ -61,8 +61,8 @@ class ParametersSuite extends FunSuite with Matchers {
}
}
- checkMerge(Map("dbtable" -> "test_table", "url" -> "jdbc:postgresql://foo/bar"))
- checkMerge(Map("tempdir" -> "s3://foo/bar", "url" -> "jdbc:postgresql://foo/bar"))
+ checkMerge(Map("dbtable" -> "test_table", "url" -> "jdbc:redshift://foo/bar"))
+ checkMerge(Map("tempdir" -> "s3://foo/bar", "url" -> "jdbc:redshift://foo/bar"))
checkMerge(Map("dbtable" -> "test_table", "tempdir" -> "s3://foo/bar"))
}
}