-
Notifications
You must be signed in to change notification settings - Fork 346
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Browse files
Browse the repository at this point in the history
GH-345: Parameterize CMS to CMS[K] and decouple counting/querying from heavy hitters
- Loading branch information
Showing
8 changed files
with
1,608 additions
and
365 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
[Caliper](https://code.google.com/p/caliper/)-based Benchmarks for Algebird data structures. | ||
|
||
# Usage | ||
|
||
Run the following commands from the top-level Algebird directory: | ||
|
||
$ ./sbt # <<< enter sbt REPL | ||
> project algebird-caliper | ||
|
||
Now you can run the following commands from within the sbt REPL: | ||
|
||
# List available benchmarks | ||
> show cappi::benchmarks | ||
|
||
# Run a particular benchmark | ||
> cappi::benchmarkOnly com.twitter.algebird.caliper.HLLBenchmark | ||
|
||
# Debug a particular benchmark (shows e.g. number of repetitions that will be run) | ||
> cappi::benchmarkOnly --debug com.twitter.algebird.caliper.HLLBenchmark | ||
|
||
# Run all benchmarks (apparently this is broken, see https://github.com/softprops/cappi/issues/1) | ||
> cappi::benchmarks | ||
|
||
You can find further details in the [cappi](https://github.com/softprops/cappi) documentation, which is the sbt plugin | ||
we use to run the caliper benchmarks. | ||
|
||
Example output for [CMSBenchmark](src/test/scala/com/twitter/algebird/caliper/CMSBenchmark.scala): | ||
|
||
> cappi::benchmarkOnly com.twitter.algebird.caliper.CMSBenchmark | ||
[info] Running com.google.caliper.Runner com.twitter.algebird.caliper.CMSBenchmark | ||
[info] 0% Scenario{vm=java, trial=0, benchmark=PlusOfFirstHundredIntegersWithLongCms, delta=0.0000001, eps=0.1, heavyHittersPct=0.2, maxBits=2048, operations=100} 292576.31 ns; σ=1271.12 ns @ 3 trials | ||
[info] 17% Scenario{vm=java, trial=0, benchmark=PlusOfFirstHundredIntegersWithBigIntCms, delta=0.0000001, eps=0.1, heavyHittersPct=0.2, maxBits=2048, operations=100} 830195.29 ns; σ=7349.10 ns @ 3 trials | ||
[info] 33% Scenario{vm=java, trial=0, benchmark=PlusOfRandom2048BitNumbersWithBigIntCms, delta=0.0000001, eps=0.1, heavyHittersPct=0.2, maxBits=2048, operations=100} 3362751.81 ns; σ=104683.16 ns @ 10 trials | ||
[info] 50% Scenario{vm=java, trial=0, benchmark=PlusOfFirstHundredIntegersWithLongCms, delta=0.0000001, eps=0.005, heavyHittersPct=0.2, maxBits=2048, operations=100} 384133.61 ns; σ=41211.47 ns @ 10 trials | ||
[info] 67% Scenario{vm=java, trial=0, benchmark=PlusOfFirstHundredIntegersWithBigIntCms, delta=0.0000001, eps=0.005, heavyHittersPct=0.2, maxBits=2048, operations=100} 1018308.55 ns; σ=43285.12 ns @ 10 trials | ||
[info] 83% Scenario{vm=java, trial=0, benchmark=PlusOfRandom2048BitNumbersWithBigIntCms, delta=0.0000001, eps=0.005, heavyHittersPct=0.2, maxBits=2048, operations=100} 3610991.09 ns; σ=195033.95 ns @ 10 trials | ||
[info] | ||
[info] benchmark eps us linear runtime | ||
[info] PlusOfFirstHundredIntegersWithLongCms 0.1 293 == | ||
[info] PlusOfFirstHundredIntegersWithLongCms 0.005 384 === | ||
[info] PlusOfFirstHundredIntegersWithBigIntCms 0.1 830 ====== | ||
[info] PlusOfFirstHundredIntegersWithBigIntCms 0.005 1018 ======== | ||
[info] PlusOfRandom2048BitNumbersWithBigIntCms 0.1 3363 =========================== | ||
[info] PlusOfRandom2048BitNumbersWithBigIntCms 0.005 3611 ============================== | ||
[info] | ||
[info] vm: java | ||
[info] trial: 0 | ||
[info] delta: 0.0000001 | ||
[info] heavyHittersPct: 0.2 | ||
[info] maxBits: 2048 | ||
[info] operations: 100 | ||
[success] Total time: 74 s, completed Oct 12, 2014 2:36:04 PM |
86 changes: 86 additions & 0 deletions
86
algebird-caliper/src/test/scala/com/twitter/algebird/caliper/CMSBenchmark.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
package com.twitter.algebird.caliper | ||
|
||
import com.google.caliper.{ Param, SimpleBenchmark } | ||
import com.twitter.algebird.{ TopPctCMS, TopCMS, CMSHasherImplicits, TopPctCMSMonoid } | ||
|
||
/** | ||
* Benchmarks the Count-Min sketch implementation in Algebird. | ||
* | ||
* We benchmark different `K` types as well as different input data streams. | ||
*/ | ||
// Once we can convince cappi (https://github.com/softprops/capp) -- the sbt plugin we use to run | ||
// caliper benchmarks -- to work with the latest caliper 1.0-beta-1, we would: | ||
// - Let `CMSBenchmark` extend `Benchmark` (instead of `SimpleBenchmark`) | ||
// - Annotate `timePlus` with `@MacroBenchmark`. | ||
class CMSBenchmark extends SimpleBenchmark { | ||
|
||
@Param(Array("0.1", "0.005")) | ||
val eps: Double = 0.0 | ||
|
||
@Param(Array("0.0000001" /* 1E-8 */ )) | ||
val delta: Double = 0.0 | ||
|
||
@Param(Array("0.2")) | ||
val heavyHittersPct: Double = 0.0 | ||
|
||
@Param(Array("100")) | ||
val operations: Int = 0 // Number of operations per benchmark repetition (cf. `reps`) | ||
|
||
@Param(Array("2048")) | ||
val maxBits: Int = 0 | ||
|
||
var random: scala.util.Random = _ | ||
var cmsLongMonoid: TopPctCMSMonoid[Long] = _ | ||
var cmsBigIntMonoid: TopPctCMSMonoid[BigInt] = _ | ||
|
||
override def setUp { | ||
// Required import of implicit values (e.g. for BigInt- or Long-backed CMS instances) | ||
import CMSHasherImplicits._ | ||
|
||
cmsLongMonoid = { | ||
val seed = 1 | ||
TopPctCMS.monoid[Long](eps, delta, seed, heavyHittersPct) | ||
} | ||
|
||
cmsBigIntMonoid = { | ||
val seed = 1 | ||
TopPctCMS.monoid[BigInt](eps, delta, seed, heavyHittersPct) | ||
} | ||
|
||
random = new scala.util.Random | ||
} | ||
|
||
// Case A (K=Long): We count the first hundred integers, i.e. [1, 100] | ||
def timePlusOfFirstHundredIntegersWithLongCms(reps: Int): Int = { | ||
var dummy = 0 | ||
while (dummy < reps) { | ||
(1 to operations).view.foldLeft(cmsLongMonoid.zero)((l, r) => { l ++ cmsLongMonoid.create(r) }) | ||
dummy += 1 | ||
} | ||
dummy | ||
} | ||
|
||
// Case B.1 (K=BigInt): We count the first hundred integers, i.e. [1, 100] | ||
def timePlusOfFirstHundredIntegersWithBigIntCms(reps: Int): Int = { | ||
var dummy = 0 | ||
while (dummy < reps) { | ||
(1 to operations).view.foldLeft(cmsBigIntMonoid.zero)((l, r) => { l ++ cmsBigIntMonoid.create(r) }) | ||
dummy += 1 | ||
} | ||
dummy | ||
} | ||
|
||
// Case B.2 (K=BigInt): We draw numbers randomly from a 2^maxBits address space | ||
def timePlusOfRandom2048BitNumbersWithBigIntCms(reps: Int): Int = { | ||
var dummy = 0 | ||
while (dummy < reps) { | ||
(1 to operations).view.foldLeft(cmsBigIntMonoid.zero)((l, r) => { | ||
val n = scala.math.BigInt(maxBits, random) | ||
l ++ cmsBigIntMonoid.create(n) | ||
}) | ||
dummy += 1 | ||
} | ||
dummy | ||
} | ||
|
||
} |
Oops, something went wrong.