typelevel · OlivierBlanvillain · Nov 21, 2017 · Nov 16, 2017 · Nov 17, 2017 · Nov 17, 2017
diff --git a/dataset/src/main/scala/frameless/TypedDataset.scala b/dataset/src/main/scala/frameless/TypedDataset.scala
@@ -9,6 +9,7 @@ import org.apache.spark.sql.catalyst.plans.{Inner, LeftOuter}
 import org.apache.spark.sql._
 import shapeless._
 import shapeless.ops.hlist.{Prepend, ToTraversable, Tupler}
+import shapeless.ops.record.{Remover, Values}
 
 /** [[TypedDataset]] is a safer interface for working with `Dataset`.
   *
@@ -605,6 +606,51 @@ class TypedDataset[T] protected[frameless](val dataset: Dataset[T])(implicit val
     }
   }
 
+  /**
+    * Returns a new Dataset as a tuple with the specified
+    * column dropped.
+    * Does not allow for dropping from a single column TypedDataset
+    *
+    * {{{
+    *   val d: TypedDataset[Foo(a: String, b: Int...)] = ???
+    *   val result = TypedDataset[(Int, ...)] = d.drop('a)
+    * }}}
+    * @param column column to drop specified as a Symbol
+    * @param genOfT LabelledGeneric derived for T
+    * @param remover Remover derived for TRep and column
+    * @param values values of T with column removed
+    * @param tupler tupler of values
+    * @param encoder evidence of encoder of the tupled values
+    * @tparam Out Tupled return type
+    * @tparam TRep shapeless' record representation of T
+    * @tparam Removed record of T with column removed
+    * @tparam ValuesFromRemoved values of T with column removed as an HList
+    * @tparam V value type of column in T
+    * @return
+    */
+  def drop[
+  Out,
+  TRep <: HList,
+  Removed <: HList,
+  ValuesFromRemoved <: HList,
+  V
+  ](
+    column: Witness.Lt[Symbol]
+  )(implicit
+    genOfT: LabelledGeneric.Aux[T, TRep],
+    remover: Remover.Aux[TRep, column.T, (V, Removed)],
+    values: Values.Aux[Removed, ValuesFromRemoved],
+    tupler: Tupler.Aux[ValuesFromRemoved, Out],
+    encoder: TypedEncoder[Out]
+  ): TypedDataset[Out] = {
+    val dropped = dataset
+      .toDF()
+      .drop(column.value.name)
+      .as[Out](TypedExpressionEncoder[Out])
+
+    TypedDataset.create[Out](dropped)
+  }
+
   /** Prepends a new column to the Dataset.
     *
     * {{{

diff --git a/dataset/src/test/scala/frameless/DropTest.scala b/dataset/src/test/scala/frameless/DropTest.scala
@@ -0,0 +1,69 @@
+package frameless
+
+import org.scalacheck.Prop
+import org.scalacheck.Prop._
+
+class DropTest extends TypedDatasetSuite {
+  test("drop five columns") {
+    def prop[A: TypedEncoder](value: A): Prop = {
+      val d5 = TypedDataset.create(X5(value, value, value, value, value) :: Nil)
+      val d4 = d5.drop('a) //drops first column
+      val d3 = d4.drop('_4) //drops last column
+      val d2 = d3.drop('_2) //drops middle column
+      val d1 = d2.drop('_2)
+
+      Tuple1(value) ?= d1.collect().run().head
+    }
+
+    check(prop[Int] _)
+    check(prop[Long] _)
+    check(prop[String] _)
+    check(prop[SQLDate] _)
+    check(prop[Option[X1[Boolean]]] _)
+  }
+
+  test("drop first column") {
+    def prop[A: TypedEncoder](value: A): Prop = {
+      val d3 = TypedDataset.create(X3(value, value, value) :: Nil)
+      val d2 = d3.drop('a)
+
+      (value, value) ?= d2.collect().run().head
+    }
+
+    check(prop[Int] _)
+    check(prop[Long] _)
+    check(prop[String] _)
+    check(prop[SQLDate] _)
+    check(prop[Option[X1[Boolean]]] _)
+  }
+
+  test("drop middle column") {
+    def prop[A: TypedEncoder](value: A): Prop = {
+      val d3 = TypedDataset.create(X3(value, value, value) :: Nil)
+      val d2 = d3.drop('b)
+
+      (value, value) ?= d2.collect().run().head
+    }
+
+    check(prop[Int] _)
+    check(prop[Long] _)
+    check(prop[String] _)
+    check(prop[SQLDate] _)
+    check(prop[Option[X1[Boolean]]] _)
+  }
+
+  test("drop last column") {
+    def prop[A: TypedEncoder](value: A): Prop = {
+      val d3 = TypedDataset.create(X3(value, value, value) :: Nil)
+      val d2 = d3.drop('c)
+
+      (value, value) ?= d2.collect().run().head
+    }
+
+    check(prop[Int] _)
+    check(prop[Long] _)
+    check(prop[String] _)
+    check(prop[SQLDate] _)
+    check(prop[Option[X1[Boolean]]] _)
+  }
+}