Skip to content
This repository has been archived by the owner on Feb 14, 2023. It is now read-only.

Ce/recursive #1

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,16 @@ object ProtoSQL {
StructType(cmp.javaDescriptor.getFields.asScala.map(structFieldFor))
}

private def toRowData(fd: FieldDescriptor, obj: Any) = fd.getJavaType match {
private def toRowData(msg: Any, fd: FieldDescriptor, obj: Any) = fd.getJavaType match {
case JavaType.BYTE_STRING => obj.asInstanceOf[ByteString].toByteArray
case JavaType.ENUM => obj.asInstanceOf[EnumValueDescriptor].getName
case JavaType.MESSAGE => messageToRow(obj.asInstanceOf[T forSome { type T <: GeneratedMessage with Message[T] }])
case JavaType.MESSAGE => {
if(obj.isInstanceOf[msg.type]) {
null
} else {
messageToRow(obj.asInstanceOf[T forSome { type T <: GeneratedMessage with Message[T] }])
}
}
case _ => obj
}

Expand All @@ -41,9 +47,10 @@ object ProtoSQL {
val obj = msg.getField(fd)
if (obj != null) {
if (fd.isRepeated) {
obj.asInstanceOf[Traversable[Any]].map(toRowData(fd, _))
obj.asInstanceOf[Traversable[Any]].map(toRowData(msg, fd, _))
} else {
toRowData(fd, obj)
// Pass `msg` to this function, if `obj` and `msg` are of the same type, return null?
toRowData(msg, fd, obj)
}
} else null
}: _*)
Expand All @@ -63,7 +70,10 @@ object ProtoSQL {
case ENUM => StringType
case MESSAGE =>
import collection.JavaConverters._
StructType(fd.getMessageType.getFields.asScala.map(structFieldFor))
StructType(fd.getMessageType.getFields.asScala.map( childFd =>
recursiveStructFieldFor(childFd, fd)
)
)
}
}

Expand All @@ -75,4 +85,22 @@ object ProtoSQL {
nullable = !fd.isRequired && !fd.isRepeated
)
}

def recursiveDataTypeFor(parentFd: FieldDescriptor, fd: FieldDescriptor) = {
import org.apache.spark.sql.types._
if (parentFd.getFullName() == fd.getFullName()) {
StringType
} else {
dataTypeFor(fd)
}
}

def recursiveStructFieldFor(fd: FieldDescriptor, parentFd: FieldDescriptor): StructField = {
val dataType = recursiveDataTypeFor(fd, parentFd)
StructField(
fd.getName,
if (fd.isRepeated) ArrayType(dataType, containsNull = false) else dataType,
nullable = !fd.isRequired && !fd.isRepeated
)
}
}