1
1
package frameless
2
2
3
3
import java .math .BigInteger
4
-
5
4
import java .util .Date
6
-
7
- import java .time .{ Duration , Instant , Period , LocalDate }
8
-
5
+ import java .time .{Duration , Instant , LocalDate , Period }
9
6
import java .sql .Timestamp
10
-
11
7
import scala .reflect .ClassTag
12
-
13
8
import org .apache .spark .sql .FramelessInternals
14
9
import org .apache .spark .sql .FramelessInternals .UserDefinedType
15
- import org .apache .spark .sql .{ reflection => ScalaReflection }
10
+ import org .apache .spark .sql .{reflection => ScalaReflection }
16
11
import org .apache .spark .sql .catalyst .expressions ._
17
12
import org .apache .spark .sql .catalyst .expressions .objects ._
18
- import org .apache .spark .sql .catalyst .util .{
19
- ArrayBasedMapData ,
20
- DateTimeUtils ,
21
- GenericArrayData
22
- }
13
+ import org .apache .spark .sql .catalyst .util .{ArrayBasedMapData , DateTimeUtils , GenericArrayData }
23
14
import org .apache .spark .sql .types ._
24
15
import org .apache .spark .unsafe .types .UTF8String
25
-
26
16
import shapeless ._
27
17
import shapeless .ops .hlist .IsHCons
28
18
19
+ import scala .collection .generic .CanBuildFrom
20
+ import scala .collection .immutable .TreeSet
21
+
29
22
abstract class TypedEncoder [T ](
30
23
implicit
31
24
val classTag : ClassTag [T ])
@@ -501,27 +494,57 @@ object TypedEncoder {
501
494
override def toString : String = s " arrayEncoder( $jvmRepr) "
502
495
}
503
496
504
- trait SeqConversion [C [_]] extends Serializable {
505
- def convertSeq [Y ](c : Seq [Y ]): C [Y ]
497
+ /**
498
+ * Per #804 - when MapObjects is used in interpreted mode the type returned is Seq, not the derived type used in compilation
499
+ *
500
+ * This type class offers extensible conversion for more specific types. By default Seq, List and Vector are supported.
501
+ *
502
+ * @tparam C
503
+ */
504
+ trait CollectionConversion [F [_], C [_], Y ] extends Serializable {
505
+ def convert (c : F [Y ]): C [Y ]
506
506
}
507
507
508
- object SeqConversion {
509
- implicit val seqToSeq = new SeqConversion [Seq ] {
510
- override def convertSeq [Y ](c : Seq [Y ]): Seq [Y ] = c
508
+ object CollectionConversion {
509
+ implicit def seqToSeq [Y ](implicit cbf : CanBuildFrom [Nothing , Y , Seq [Y ]]) = new CollectionConversion [Seq , Seq , Y ] {
510
+ override def convert (c : Seq [Y ]): Seq [Y ] = c
511
+ }
512
+ implicit def seqToVector [Y ](implicit cbf : CanBuildFrom [Nothing , Y , Vector [Y ]]) = new CollectionConversion [Seq , Vector , Y ] {
513
+ override def convert (c : Seq [Y ]): Vector [Y ] = c.toVector
514
+ }
515
+ implicit def seqToList [Y ](implicit cbf : CanBuildFrom [Nothing , Y , List [Y ]]) = new CollectionConversion [Seq , List , Y ] {
516
+ override def convert (c : Seq [Y ]): List [Y ] = c.toList
511
517
}
512
- implicit val seqToVector = new SeqConversion [ Vector ] {
513
- override def convertSeq [ Y ] (c : Seq [Y ]): Vector [Y ] = c.toVector
518
+ implicit def setToSet [ Y ]( implicit cbf : CanBuildFrom [ Nothing , Y , Set [ Y ]]) = new CollectionConversion [ Set , Set , Y ] {
519
+ override def convert (c : Set [Y ]): Set [Y ] = c
514
520
}
515
- implicit val seqToList = new SeqConversion [ List ] {
516
- override def convertSeq [ Y ] (c : Seq [Y ]): List [Y ] = c.toList
521
+ implicit def setToTreeSet [ Y ]( implicit cbf : CanBuildFrom [ Nothing , Y , TreeSet [ Y ]]) = new CollectionConversion [ Set , TreeSet , Y ] {
522
+ override def convert (c : Set [Y ]): TreeSet [Y ] = c.to[ TreeSet ]
517
523
}
518
524
}
519
525
520
- implicit def collectionEncoder [C [X ] <: Seq [X ], T ](
526
+ implicit def seqEncoder [C [X ] <: Seq [X ], T ](
527
+ implicit
528
+ i0 : Lazy [RecordFieldEncoder [T ]],
529
+ i1 : ClassTag [C [T ]],
530
+ i2 : CollectionConversion [Seq , C , T ],
531
+ i3 : CanBuildFrom [Nothing , T , C [T ]]
532
+ ) = collectionEncoder[Seq , C , T ]
533
+
534
+ implicit def setEncoder [C [X ] <: Set [X ], T ](
535
+ implicit
536
+ i0 : Lazy [RecordFieldEncoder [T ]],
537
+ i1 : ClassTag [C [T ]],
538
+ i2 : CollectionConversion [Set , C , T ],
539
+ i3 : CanBuildFrom [Nothing , T , C [T ]]
540
+ ) = collectionEncoder[Set , C , T ]
541
+
542
+ def collectionEncoder [O [_], C [X ], T ](
521
543
implicit
522
544
i0 : Lazy [RecordFieldEncoder [T ]],
523
545
i1 : ClassTag [C [T ]],
524
- i2 : SeqConversion [C ]
546
+ i2 : CollectionConversion [O , C , T ],
547
+ i3 : CanBuildFrom [Nothing , T , C [T ]]
525
548
): TypedEncoder [C [T ]] = new TypedEncoder [C [T ]] {
526
549
private lazy val encodeT = i0.value.encoder
527
550
@@ -538,20 +561,20 @@ object TypedEncoder {
538
561
if (ScalaReflection .isNativeType(enc.jvmRepr)) {
539
562
NewInstance (classOf [GenericArrayData ], path :: Nil , catalystRepr)
540
563
} else {
541
- MapObjects (enc.toCatalyst, path, enc.jvmRepr, encodeT.nullable)
564
+ // converts to Seq, both Set and Seq handling must convert to Seq first
565
+ MapObjects (enc.toCatalyst, SeqCaster (path), enc.jvmRepr, encodeT.nullable)
542
566
}
543
567
}
544
568
545
569
def fromCatalyst (path : Expression ): Expression =
546
- CollectionCaster (
570
+ CollectionCaster [ O , C , T ] (
547
571
MapObjects (
548
572
i0.value.fromCatalyst,
549
573
path,
550
574
encodeT.catalystRepr,
551
575
encodeT.nullable,
552
- Some (i1.runtimeClass) // This will cause MapObjects to build a collection of type C[_] directly
553
- )
554
- , implicitly[SeqConversion [C ]])
576
+ Some (i1.runtimeClass) // This will cause MapObjects to build a collection of type C[_] directly when compiling
577
+ ), implicitly[CollectionConversion [O ,C ,T ]]) // This will convert Seq to the appropriate C[_] when eval'ing.
555
578
556
579
override def toString : String = s " collectionEncoder( $jvmRepr) "
557
580
}
@@ -561,16 +584,18 @@ object TypedEncoder {
561
584
* @param i2 implicit `ClassTag[Set[T]]` to provide runtime information about the set type.
562
585
* @tparam T the element type of the set.
563
586
* @return a `TypedEncoder` instance for `Set[T]`.
564
- */
565
- implicit def setEncoder [T ](
587
+
588
+ implicit def setEncoder[C[X] <: Seq[X], T](
566
589
implicit
567
590
i1: shapeless.Lazy[RecordFieldEncoder[T]],
568
- i2 : ClassTag [Set [T ]]
591
+ i2: ClassTag[Set[T]],
592
+ i3: CollectionConversion[Set, C, T],
593
+ i4: CanBuildFrom[Nothing, T, C[T]]
569
594
): TypedEncoder[Set[T]] = {
570
595
implicit val inj: Injection[Set[T], Seq[T]] = Injection(_.toSeq, _.toSet)
571
596
572
597
TypedEncoder.usingInjection
573
- }
598
+ }*/
574
599
575
600
/**
576
601
* @tparam A the key type
0 commit comments