@@ -5,7 +5,6 @@ import java.util.Date
5
5
import java .time .{ Duration , Instant , LocalDate , Period }
6
6
import java .sql .Timestamp
7
7
import scala .reflect .ClassTag
8
- import FramelessInternals .UserDefinedType
9
8
import org .apache .spark .sql .catalyst .expressions .{
10
9
Expression ,
11
10
UnsafeArrayData ,
@@ -18,7 +17,6 @@ import org.apache.spark.sql.catalyst.util.{
18
17
}
19
18
import org .apache .spark .sql .types ._
20
19
import org .apache .spark .unsafe .types .UTF8String
21
-
22
20
import shapeless ._
23
21
import shapeless .ops .hlist .IsHCons
24
22
import com .sparkutils .shim .expressions .{
@@ -34,6 +32,8 @@ import org.apache.spark.sql.shim.{
34
32
Invoke5 => Invoke
35
33
}
36
34
35
+ import scala .collection .immutable .{ ListSet , TreeSet }
36
+
37
37
abstract class TypedEncoder [T ](
38
38
implicit
39
39
val classTag : ClassTag [T ])
@@ -509,10 +509,70 @@ object TypedEncoder {
509
509
override def toString : String = s " arrayEncoder( $jvmRepr) "
510
510
}
511
511
512
- implicit def collectionEncoder [C [X ] <: Seq [X ], T ](
512
+ /**
513
+ * Per #804 - when MapObjects is used in interpreted mode the type returned is Seq, not the derived type used in compilation
514
+ *
515
+ * This type class offers extensible conversion for more specific types. By default Seq, List and Vector for Seq's and Set, TreeSet and ListSet are supported.
516
+ *
517
+ * @tparam C
518
+ */
519
+ trait CollectionConversion [F [_], C [_], Y ] extends Serializable {
520
+ def convert (c : F [Y ]): C [Y ]
521
+ }
522
+
523
+ object CollectionConversion {
524
+
525
+ implicit def seqToSeq [Y ] = new CollectionConversion [Seq , Seq , Y ] {
526
+ override def convert (c : Seq [Y ]): Seq [Y ] = c
527
+ }
528
+
529
+ implicit def seqToVector [Y ] = new CollectionConversion [Seq , Vector , Y ] {
530
+ override def convert (c : Seq [Y ]): Vector [Y ] = c.toVector
531
+ }
532
+
533
+ implicit def seqToList [Y ] = new CollectionConversion [Seq , List , Y ] {
534
+ override def convert (c : Seq [Y ]): List [Y ] = c.toList
535
+ }
536
+
537
+ implicit def setToSet [Y ] = new CollectionConversion [Set , Set , Y ] {
538
+ override def convert (c : Set [Y ]): Set [Y ] = c
539
+ }
540
+
541
+ implicit def setToTreeSet [Y ](
542
+ implicit
543
+ ordering : Ordering [Y ]
544
+ ) = new CollectionConversion [Set , TreeSet , Y ] {
545
+
546
+ override def convert (c : Set [Y ]): TreeSet [Y ] =
547
+ TreeSet .newBuilder.++= (c).result()
548
+ }
549
+
550
+ implicit def setToListSet [Y ] = new CollectionConversion [Set , ListSet , Y ] {
551
+
552
+ override def convert (c : Set [Y ]): ListSet [Y ] =
553
+ ListSet .newBuilder.++= (c).result()
554
+ }
555
+ }
556
+
557
+ implicit def seqEncoder [C [X ] <: Seq [X ], T ](
558
+ implicit
559
+ i0 : Lazy [RecordFieldEncoder [T ]],
560
+ i1 : ClassTag [C [T ]],
561
+ i2 : CollectionConversion [Seq , C , T ]
562
+ ) = collectionEncoder[Seq , C , T ]
563
+
564
+ implicit def setEncoder [C [X ] <: Set [X ], T ](
513
565
implicit
514
566
i0 : Lazy [RecordFieldEncoder [T ]],
515
- i1 : ClassTag [C [T ]]
567
+ i1 : ClassTag [C [T ]],
568
+ i2 : CollectionConversion [Set , C , T ]
569
+ ) = collectionEncoder[Set , C , T ]
570
+
571
+ def collectionEncoder [O [_], C [X ], T ](
572
+ implicit
573
+ i0 : Lazy [RecordFieldEncoder [T ]],
574
+ i1 : ClassTag [C [T ]],
575
+ i2 : CollectionConversion [O , C , T ]
516
576
): TypedEncoder [C [T ]] = new TypedEncoder [C [T ]] {
517
577
private lazy val encodeT = i0.value.encoder
518
578
@@ -529,38 +589,31 @@ object TypedEncoder {
529
589
if (ScalaReflection .isNativeType(enc.jvmRepr)) {
530
590
NewInstance (classOf [GenericArrayData ], path :: Nil , catalystRepr)
531
591
} else {
532
- MapObjects (enc.toCatalyst, path, enc.jvmRepr, encodeT.nullable)
592
+ // converts to Seq, both Set and Seq handling must convert to Seq first
593
+ MapObjects (
594
+ enc.toCatalyst,
595
+ SeqCaster (path),
596
+ enc.jvmRepr,
597
+ encodeT.nullable
598
+ )
533
599
}
534
600
}
535
601
536
602
def fromCatalyst (path : Expression ): Expression =
537
- MapObjects (
538
- i0.value.fromCatalyst,
539
- path,
540
- encodeT.catalystRepr,
541
- encodeT.nullable,
542
- Some (i1.runtimeClass) // This will cause MapObjects to build a collection of type C[_] directly
543
- )
603
+ CollectionCaster [O , C , T ](
604
+ MapObjects (
605
+ i0.value.fromCatalyst,
606
+ path,
607
+ encodeT.catalystRepr,
608
+ encodeT.nullable,
609
+ Some (i1.runtimeClass) // This will cause MapObjects to build a collection of type C[_] directly when compiling
610
+ ),
611
+ implicitly[CollectionConversion [O , C , T ]]
612
+ ) // This will convert Seq to the appropriate C[_] when eval'ing.
544
613
545
614
override def toString : String = s " collectionEncoder( $jvmRepr) "
546
615
}
547
616
548
- /**
549
- * @param i1 implicit lazy `RecordFieldEncoder[T]` to encode individual elements of the set.
550
- * @param i2 implicit `ClassTag[Set[T]]` to provide runtime information about the set type.
551
- * @tparam T the element type of the set.
552
- * @return a `TypedEncoder` instance for `Set[T]`.
553
- */
554
- implicit def setEncoder [T ](
555
- implicit
556
- i1 : shapeless.Lazy [RecordFieldEncoder [T ]],
557
- i2 : ClassTag [Set [T ]]
558
- ): TypedEncoder [Set [T ]] = {
559
- implicit val inj : Injection [Set [T ], Seq [T ]] = Injection (_.toSeq, _.toSet)
560
-
561
- TypedEncoder .usingInjection
562
- }
563
-
564
617
/**
565
618
* @tparam A the key type
566
619
* @tparam B the value type
0 commit comments