Skip to content

Commit

Permalink
Release 0.5.11.
Browse files Browse the repository at this point in the history
Signed-off-by: Simeon H.K. Fitch <fitch@astraea.io>
  • Loading branch information
metasim committed Mar 27, 2018
1 parent 478783e commit 9b04956
Show file tree
Hide file tree
Showing 23 changed files with 2,648 additions and 149 deletions.
444 changes: 444 additions & 0 deletions bench/archive/jmh-results-20171130120731.json

Large diffs are not rendered by default.

652 changes: 652 additions & 0 deletions bench/archive/jmh-results-20171130135353.json

Large diffs are not rendered by default.

452 changes: 452 additions & 0 deletions bench/archive/jmh-results-20171130145409.json

Large diffs are not rendered by default.

452 changes: 452 additions & 0 deletions bench/archive/jmh-results-20171201123901.json

Large diffs are not rendered by default.

8 changes: 8 additions & 0 deletions bench/build.sbt
Original file line number Diff line number Diff line change
@@ -1,9 +1,17 @@
enablePlugins(BenchmarkPlugin)

libraryDependencies ++= Seq(
spark("core"),
spark("sql"),
geotrellis("spark"),
geotrellis("raster")
)

jmhIterations := Some(5)
jmhTimeUnit := None
javaOptions in Jmh := Seq("-Xmx4g")

// To enable profiling:
// jmhExtraOptions := Some("-prof jmh.extras.JFR")
// jmhExtraOptions := Some("-prof gc")

Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
/*
* This software is licensed under the Apache 2 license, quoted below.
*
* Copyright 2017 Astraea, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* [http://www.apache.org/licenses/LICENSE-2.0]
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*
*/

package astraea.spark.rasterframes.bench

import java.util.concurrent.TimeUnit

import geotrellis.raster.Tile
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
import org.apache.spark.sql.gt.InternalRowTile
import org.apache.spark.sql.gt.types.TileUDT
import org.openjdk.jmh.annotations._

@BenchmarkMode(Array(Mode.AverageTime))
@State(Scope.Benchmark)
@OutputTimeUnit(TimeUnit.MICROSECONDS)
/**
* @author sfitch
* @since 9/29/17
*/
class TileCellScanBench extends SparkEnv {

@Param(Array("uint8", "int32", "float32", "float64"))
var cellTypeName: String = _

@Param(Array("512"))
var tileSize: Int = _

@transient
var tileRow: InternalRow = _

@Setup(Level.Trial)
def setupData(): Unit = {
tileRow = TileUDT.serialize(randomTile(tileSize, tileSize, cellTypeName))
}

@Benchmark
def deserializeRead(): Double = {
val tile = TileUDT.deserialize(tileRow)
val (cols, rows) = tile.dimensions
tile.getDouble(cols - 1, rows - 1) +
tile.getDouble(cols/2, rows/2) +
tile.getDouble(0, 0)
}

@Benchmark
def internalRowRead(): Double = {
val tile = new InternalRowTile(tileRow)
val cols = tile.cols
val rows = tile.rows
tile.getDouble(cols - 1, rows - 1) +
tile.getDouble(cols/2, rows/2) +
tile.getDouble(0, 0)
}
}

Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
/*
* This software is licensed under the Apache 2 license, quoted below.
*
* Copyright 2017 Astraea, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* [http://www.apache.org/licenses/LICENSE-2.0]
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*
*/
package astraea.spark.rasterframes.bench

import java.util.concurrent.TimeUnit

import astraea.spark.rasterframes._
import org.apache.spark.sql._
import org.apache.spark.sql.functions._
import org.openjdk.jmh.annotations._

/**
*
* @author sfitch
* @since 10/4/17
*/
@BenchmarkMode(Array(Mode.AverageTime))
@State(Scope.Benchmark)
@OutputTimeUnit(TimeUnit.MILLISECONDS)
class TileExplodeBench extends SparkEnv {
import spark.implicits._

@Param(Array("uint8", "uint16ud255", "float32", "float64"))
var cellTypeName: String = _

@Param(Array("256"))
var tileSize: Int = _

@Param(Array("100"))
var numTiles: Int = _

@transient
var tiles: DataFrame = _

@Setup(Level.Trial)
def setupData(): Unit = {
tiles = Seq.fill(numTiles)(randomTile(tileSize, tileSize, cellTypeName))
.toDF("tile").repartition(10)
}

@Benchmark
def arrayExplode() = {
tiles.select(posexplode(tileToArray[Double]($"tile"))).count()
}

@Benchmark
def tileExplode() = {
tiles.select(explodeTiles($"tile")).count()
}
}
17 changes: 10 additions & 7 deletions project/ProjectPlugin.scala
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,7 @@ object ProjectPlugin extends AutoPlugin {
"spark" -> "2.1.0"
)

private def geotrellis(module: String) =
"org.locationtech.geotrellis" %% s"geotrellis-$module" % versions("geotrellis")
private def spark(module: String) =
"org.apache.spark" %% s"spark-$module" % versions("spark")
import autoImport._

override def projectSettings = Seq(
organization := "io.astraea",
Expand Down Expand Up @@ -79,6 +76,12 @@ object ProjectPlugin extends AutoPlugin {
)

object autoImport {

def geotrellis(module: String) =
"org.locationtech.geotrellis" %% s"geotrellis-$module" % versions("geotrellis")
def spark(module: String) =
"org.apache.spark" %% s"spark-$module" % versions("spark")

def releaseSettings: Seq[Def.Setting[_]] = {
val buildSite: (State) State = releaseStepTask(makeSite)
val publishSite: (State) State = releaseStepTask(ghpagesPushSite)
Expand Down Expand Up @@ -124,9 +127,9 @@ object ProjectPlugin extends AutoPlugin {
apiURL := Some(url("http://rasterframes.io/latest/api")),
autoAPIMappings := false,
paradoxProperties in Paradox ++= Map(
"github.base_url" -> "https://github.com/s22s/raster-frames"//,
//"scaladoc.org.apache.spark.sql.gt" -> "http://rasterframes.io/latest" //,
//"scaladoc.geotrellis.base_url" -> "https://geotrellis.github.io/scaladocs/latest"
"github.base_url" -> "https://github.com/s22s/raster-frames",
"scaladoc.org.apache.spark.sql.gt" -> "http://rasterframes.io/latest",
"scaladoc.geotrellis.base_url" -> "https://geotrellis.github.io/scaladocs/latest"
),
sourceDirectory in Paradox := tutTargetDirectory.value,
sourceDirectory in Paradox in paradoxTheme := sourceDirectory.value / "main" / "paradox" / "_template",
Expand Down
2 changes: 1 addition & 1 deletion project/build.properties
Original file line number Diff line number Diff line change
@@ -1 +1 @@
sbt.version=1.0.2
sbt.version=1.0.4
2 changes: 1 addition & 1 deletion project/plugins.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ addSbtPlugin("de.heikoseeberger" % "sbt-header" % "3.0.2")

addSbtPlugin("com.lucidchart" % "sbt-scalafmt" % "1.10")

addSbtPlugin("org.tpolecat" % "tut-plugin" % "0.6.0")
addSbtPlugin("org.tpolecat" % "tut-plugin" % "0.6.2")

addSbtPlugin("com.typesafe.sbt" % "sbt-site" % "1.3.1")

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,13 @@
package astraea.spark.rasterframes.expressions

import astraea.spark.rasterframes
import geotrellis.raster.{NODATA, Tile}
import geotrellis.raster._
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
import org.apache.spark.sql.catalyst.expressions.{Expression, Generator}
import org.apache.spark.sql.gt.types.TileUDT
import org.apache.spark.sql.catalyst.expressions.{Expression, Generator, GenericInternalRow}
import org.apache.spark.sql.gt.InternalRowTile
import org.apache.spark.sql.types._
import spire.syntax.cfor.cfor

/**
* Catalyst expression for converting a tile column into a pixel column, with each tile pixel occupying a separate row.
Expand All @@ -49,35 +50,42 @@ private[rasterframes] case class ExplodeTileExpression(
.map(n StructField(n, DoubleType, false)))
}

private def keep(): Boolean = {
if (sampleFraction >= 1.0) true
else scala.util.Random.nextDouble() <= sampleFraction
}
private def sample[T](things: Seq[T]) = scala.util.Random.shuffle(things)
.take(math.ceil(things.length * sampleFraction).toInt)

override def eval(input: InternalRow): TraversableOnce[InternalRow] = {
// Do we need to worry about deserializing all the tiles in a row like this?
val tiles = for (child children)
yield TileUDT.deserialize(child.eval(input).asInstanceOf[InternalRow])

val tiles = Array.ofDim[Tile](children.length)
cfor(0)(_ < tiles.length, _ + 1) { index =>
val row = children(index).eval(input).asInstanceOf[InternalRow]
tiles(index) = if(row != null) InternalRowTile(row) else null
}
val dims = tiles.filter(_ != null).map(_.dimensions)
if(dims.isEmpty) Seq.empty[InternalRow]
else {
require(
dims.distinct.size == 1,
dims.distinct.length == 1,
"Multi-column explode requires equally sized tiles. Found " + dims
)

def safeGet(tile: Tile, col: Int, row: Int): Double =
if (tile == null) NODATA else tile.getDouble(col, row)

val numOutCols = tiles.length + 2
val (cols, rows) = dims.head

for {
row 0 until rows
col 0 until cols
if keep()
contents = Seq[Any](col, row) ++ tiles.map(safeGet(_, col, row))
} yield InternalRow(contents: _*)
val retval = Array.ofDim[InternalRow](cols * rows)
cfor(0)(_ < rows, _ + 1) { row =>
cfor(0)(_ < cols, _ + 1) { col =>
val rowIndex = row * cols + col
val outCols = Array.ofDim[Any](numOutCols)
outCols(0) = col
outCols(1) = row
cfor(0)(_ < tiles.length, _ + 1) { index =>
val tile = tiles(index)
outCols(index + 2) = if(tile == null) doubleNODATA else tile.getDouble(col, row)
}
retval(rowIndex) = new GenericInternalRow(outCols)
}
}
if(sampleFraction < 1.0) sample(retval)
else retval
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.{TypeCheckFailure, TypeCheckSuccess}
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode}
import org.apache.spark.sql.gt.InternalRowTile
import org.apache.spark.sql.gt.types.TileUDT
import org.apache.spark.sql.types._

Expand All @@ -34,6 +35,7 @@ import org.apache.spark.sql.types._
* @since 10/10/17
*/
package object expressions {
import InternalRowTile.C._
private def row(input: Any) = input.asInstanceOf[InternalRow]

protected trait RequiresTile { self: UnaryExpression
Expand All @@ -51,10 +53,10 @@ package object expressions {
def dataType: DataType = StringType

override protected def nullSafeEval(input: Any): Any =
row(input).getUTF8String(TileUDT.C.CELL_TYPE)
row(input).getUTF8String(CELL_TYPE)

protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode =
defineCodeGen(ctx, ev, c s"$c.getUTF8String(${TileUDT.C.CELL_TYPE});")
defineCodeGen(ctx, ev, c s"$c.getUTF8String($CELL_TYPE);")
}

/** Extract a Tile's dimensions */
Expand All @@ -66,8 +68,8 @@ package object expressions {

override protected def nullSafeEval(input: Any): Any = {
val r = row(input)
val cols = r.getShort(TileUDT.C.COLS)
val rows = r.getShort(TileUDT.C.ROWS)
val cols = r.getShort(COLS)
val rows = r.getShort(ROWS)
InternalRow(cols, rows)
}

Expand All @@ -76,15 +78,11 @@ package object expressions {
val rows = ctx.freshName("rows")
nullSafeCodeGen(ctx, ev, eval
s"""
final short $cols = $eval.getShort(${TileUDT.C.COLS});
final short $rows = $eval.getShort(${TileUDT.C.ROWS});
final short $cols = $eval.getShort($COLS);
final short $rows = $eval.getShort($ROWS);
${ev.value} = new GenericInternalRow(new Object[] { $cols, $rows });
//${ctx.setColumn(ev.value, ShortType, 0, cols)};
//${ctx.setColumn(ev.value, ShortType, 1, rows)};
"""
)
}

//defineCodeGen(ctx, ev, c ⇒ s"$c.getUTF8String(${TileUDT.C.CELL_TYPE})")
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,7 @@

package astraea.spark.rasterframes.functions

import astraea.spark.rasterframes.expressions.{CellType, ExplodeTileExpression}
import org.apache.spark.sql.SQLContext
import org.apache.spark.sql.catalyst.analysis.FunctionRegistry
import org.apache.spark.sql.gt.types

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,10 @@ package object functions {
else if (o2 == null) o1
else op(o1, o2)
}

@inline
private[rasterframes] def safeEval[P, R](f: P R): P R =
(p) if (p == null) null.asInstanceOf[R] else f(p)
@inline
private[rasterframes] def safeEval[P1, P2, R](f: (P1, P2) R): (P1, P2) R =
(p1, p2) if (p1 == null || p2 == null) null.asInstanceOf[R] else f(p1, p2)

Expand Down Expand Up @@ -209,7 +210,7 @@ package object functions {
private[rasterframes] val localDivide: (Tile, Tile) Tile = safeEval(Divide.apply)

/** Render tile as ASCII string. */
private[rasterframes] val renderAscii: (Tile) String = safeEval(_.asciiDraw)
private[rasterframes] val renderAscii: (Tile) String = safeEval(_.renderAscii())

/** Constructor for constant tiles */
private[rasterframes] val makeConstantTile: (Number, Int, Int, String) Tile = (value, cols, rows, cellTypeName) {
Expand Down
Loading

0 comments on commit 9b04956

Please sign in to comment.