diff --git a/README.md b/README.md index 4a007ace..d5229e60 100644 --- a/README.md +++ b/README.md @@ -119,26 +119,11 @@ OPTIONS (dbtable 'my_table', url 'jdbc:redshift://host:port/db?user=username&password=pass'); ``` -### Scala helper functions - -The com.databricks.spark.redshift package has some shortcuts if you're working directly -from a Scala application and don't want to use the Data Sources API: - -```scala -import com.databricks.spark.redshift._ - -val sqlContext = new SQLContext(sc) - -val dataFrame = sqlContext.redshiftTable( ... ) -dataFrame.saveAsRedshiftTable( ... ) -``` - ### Hadoop InputFormat The library contains a Hadoop input format for Redshift tables unloaded with the ESCAPE option, which you may make direct use of as follows: -Usage in Spark Core: ```scala import com.databricks.spark.redshift.RedshiftInputFormat @@ -149,17 +134,6 @@ val records = sc.newAPIHadoopFile( classOf[Array[String]]) ``` -Usage in Spark SQL: -```scala -import com.databricks.spark.redshift._ - -// Call redshiftFile() that returns a SchemaRDD with all string columns. -val records: DataFrame = sqlContext.redshiftFile(path, Seq("name", "age")) - -// Call redshiftFile() with the table schema. -val records: DataFrame = sqlContext.redshiftFile(path, "name varchar(10) age integer") -``` - ## Parameters The parameter map or OPTIONS provided in Spark SQL supports the following settings. @@ -236,14 +210,6 @@ and use that as a temp location for this data. com.amazon.redshift.jdbc4.Driver The class name of the JDBC driver to load before JDBC operations. Must be on classpath. - - overwrite - No - false - -If true, drop any existing data before writing new content. Only applies when using the Scala `saveAsRedshiftTable` function -directly, as `SaveMode` will be preferred when using the Data Source API. See also usestagingtable - diststyle No diff --git a/src/main/scala/com/databricks/spark/redshift/Parameters.scala b/src/main/scala/com/databricks/spark/redshift/Parameters.scala index 9b04fc5d..9f5caa7e 100644 --- a/src/main/scala/com/databricks/spark/redshift/Parameters.scala +++ b/src/main/scala/com/databricks/spark/redshift/Parameters.scala @@ -123,6 +123,7 @@ private[redshift] object Parameters extends Logging { * * Defaults to false. */ + @deprecated("Use SaveMode instead", "0.5.0") def overwrite: Boolean = parameters("overwrite").toBoolean /** diff --git a/src/main/scala/com/databricks/spark/redshift/SchemaParser.scala b/src/main/scala/com/databricks/spark/redshift/SchemaParser.scala index b69d67d3..d58c6f22 100644 --- a/src/main/scala/com/databricks/spark/redshift/SchemaParser.scala +++ b/src/main/scala/com/databricks/spark/redshift/SchemaParser.scala @@ -22,7 +22,11 @@ import org.apache.spark.sql.types._ /** * A simple parser for Redshift table schemas. + * + * Note: the only method which uses this class has been deprecated, so this class should be + * removed in `spark-redshift` 0.6. We will not accept patches to extend this parser. */ +@deprecated("Do not use SchemaParser directly", "0.5.0") private[redshift] object SchemaParser extends JavaTokenParsers { // redshift data types: http://docs.aws.amazon.com/redshift/latest/dg/c_Supported_data_types.html private val SMALLINT: Parser[DataType] = ("smallint" | "int2") ^^^ ShortType diff --git a/src/main/scala/com/databricks/spark/redshift/package.scala b/src/main/scala/com/databricks/spark/redshift/package.scala index 129cfb8b..272464be 100644 --- a/src/main/scala/com/databricks/spark/redshift/package.scala +++ b/src/main/scala/com/databricks/spark/redshift/package.scala @@ -17,10 +17,9 @@ package com.databricks.spark -import com.databricks.spark.redshift.DefaultJDBCWrapper -import org.apache.spark.sql.functions._ +import org.apache.spark.sql.functions.col import org.apache.spark.sql.types.{StringType, StructField, StructType} -import org.apache.spark.sql.{DataFrame, Row, SQLContext} +import org.apache.spark.sql.{DataFrame, DataFrameReader, DataFrameWriter, Row, SQLContext} package object redshift { @@ -47,6 +46,7 @@ package object redshift { /** * Reads a table unload from Redshift with its schema in format "name0 type0 name1 type1 ...". */ + @deprecated("Use data sources API or perform string -> data type casts yourself", "0.5.0") def redshiftFile(path: String, schema: String): DataFrame = { val structType = SchemaParser.parseSchema(schema) val casts = structType.fields.map { field => @@ -59,6 +59,7 @@ package object redshift { * Read a Redshift table into a DataFrame, using S3 for data transfer and JDBC * to control Redshift and resolve the schema */ + @deprecated("Use sqlContext.read()", "0.5.0") def redshiftTable(parameters: Map[String, String]): DataFrame = { val params = Parameters.mergeParameters(parameters) sqlContext.baseRelationToDataFrame( @@ -69,11 +70,13 @@ package object redshift { /** * Add write functionality to DataFrame */ + @deprecated("Use DataFrame.write()", "0.5.0") implicit class RedshiftDataFrame(dataFrame: DataFrame) { /** * Load the DataFrame into a Redshift database table */ + @deprecated("Use DataFrame.write()", "0.5.0") def saveAsRedshiftTable(parameters: Map[String, String]): Unit = { val params = Parameters.mergeParameters(parameters) DefaultRedshiftWriter.saveToRedshift(dataFrame.sqlContext, dataFrame, params)