Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

cats.kernel.Hash port for Scala CHAMP HashSet #4185

Closed
wants to merge 24 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
81b9e15
Initial port of Scala CHAMP HashSet
DavidGregory084 Apr 12, 2022
9894c03
Implement === and equals for HashSet.Nodes
DavidGregory084 Apr 13, 2022
9efb68b
Remove usage of String#repeat which does not exist on Java 8
DavidGregory084 Apr 13, 2022
2eecf83
Update Scaladoc and remove outdated parameter references
DavidGregory084 Apr 13, 2022
99f6c4a
Add hash collision vector content size check to equals and ===
DavidGregory084 Apr 13, 2022
5dffde2
Add union operation and a benchmark suite
DavidGregory084 Apr 14, 2022
ca5328c
Avoid using distinct on mutable list buffer
DavidGregory084 Apr 14, 2022
85ecfd9
Fix some usages of universal equality
DavidGregory084 Apr 14, 2022
157d427
Fix formatting in Scala 2.13+ benchmark
DavidGregory084 Apr 14, 2022
8df826a
Update access modifiers and make many methods final in HashSet
DavidGregory084 Apr 19, 2022
df99d00
Extend IterableOnce in 2.13+ HashSet for improved interop
DavidGregory084 Apr 19, 2022
6775210
Fix issue with sub-node propagation
DavidGregory084 Apr 21, 2022
cfba20e
Remove sizeHint - it serves no purpose now that size is cached in bit…
DavidGregory084 Apr 21, 2022
4e31494
Revert sizeHint removal as I was wrong - it prevents erroneous subnod…
DavidGregory084 Apr 21, 2022
6b8b0b7
Narrow conditions for subnode escalation
DavidGregory084 Apr 21, 2022
fcca6a6
Use byteswap hashing in HashSet to improve input hash
DavidGregory084 Apr 21, 2022
da97224
Add copyright notice from Scala standard library to meet license obli…
DavidGregory084 Apr 25, 2022
4a33f4d
Override concrete UnorderedFoldable operations for HashSet
DavidGregory084 May 3, 2022
31dc59c
Apply review suggestions from #4193 to this PR as well
DavidGregory084 May 17, 2022
ea58125
Add diff, filter, filterNot operations
DavidGregory084 Jun 16, 2022
d604c58
Merge branch 'main' of github.com:typelevel/cats into eq-based-set
DavidGregory084 Jun 16, 2022
388c547
Merge branch 'main' of github.com:typelevel/cats into eq-based-set
DavidGregory084 Jun 21, 2022
edf0d00
Implement intersect operation on HashSet
DavidGregory084 Jun 21, 2022
43017a3
Add missing intersect case to HashSet Arbitrary instance
DavidGregory084 Jun 21, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
206 changes: 206 additions & 0 deletions bench/src/main/scala-2.12/cats/bench/HashSetBench.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,206 @@
/*
* Copyright (c) 2015 Typelevel
*
* Permission is hereby granted, free of charge, to any person obtaining a copy of
* this software and associated documentation files (the "Software"), to deal in
* the Software without restriction, including without limitation the rights to
* use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
* the Software, and to permit persons to whom the Software is furnished to do so,
* subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
* FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
* COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
* IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/

package cats.bench

import cats.data.HashSet
import java.util.concurrent.TimeUnit
import org.openjdk.jmh.annotations._
import org.openjdk.jmh.infra.Blackhole
import scala.collection.immutable.{HashSet => SHashSet}

@BenchmarkMode(Array(Mode.AverageTime))
@OutputTimeUnit(TimeUnit.NANOSECONDS)
@State(Scope.Benchmark)
class HashSetBench {
@Param(Array("0", "1", "2", "3", "4", "7", "8", "15", "16", "17", "39", "282", "4096", "131070", "7312102"))
var size: Int = _

var hashSet: HashSet[Long] = _
var otherHashSet: HashSet[Long] = _
var scalaSet: SHashSet[Long] = _
var otherScalaSet: SHashSet[Long] = _
var pred: Long => Boolean = _

def hashSetOfSize(n: Int) = HashSet.fromSeq(1L to (n.toLong))
def scalaSetOfSize(n: Int) = SHashSet.newBuilder[Long].++=(1L to (n.toLong)).result()

@Setup(Level.Trial)
def init(): Unit = {
hashSet = hashSetOfSize(size)
otherHashSet = hashSetOfSize(size)
scalaSet = scalaSetOfSize(size)
otherScalaSet = scalaSetOfSize(size)
pred = (l: Long) => l % 2 == 0
}

@Benchmark
def hashSetFromSeq(bh: Blackhole): Unit =
bh.consume(hashSetOfSize(size))

@Benchmark
def scalaSetFromSeq(bh: Blackhole): Unit =
bh.consume(scalaSetOfSize(size))

@Benchmark
@OperationsPerInvocation(1000)
def hashSetAdd(bh: Blackhole): Unit = {
var hs = hashSet
var i = 0L
while (i < 1000L) {
hs = hs.add(-i)
i += 1L
}
bh.consume(hs)
}

@Benchmark
@OperationsPerInvocation(1000)
def scalaSetAdd(bh: Blackhole): Unit = {
var ss = scalaSet
var i = 0L
while (i < 1000L) {
ss += -i
i += 1L
}
bh.consume(ss)
}

@Benchmark
@OperationsPerInvocation(1000)
def hashSetRemove(bh: Blackhole): Unit = {
var hs = hashSet
var i = 0L
while (i < 1000L) {
hs = hs.remove(i)
i += 1L
}
bh.consume(hs)
}

@Benchmark
@OperationsPerInvocation(1000)
def scalaSetRemove(bh: Blackhole): Unit = {
var ss = scalaSet
var i = 0L
while (i < 1000L) {
ss -= i
i += 1L
}
bh.consume(ss)
}

@Benchmark
@OperationsPerInvocation(1000)
def hashSetContains(bh: Blackhole): Unit = {
var i = 0L
while (i < 1000L) {
bh.consume(hashSet.contains(i))
i += 1L
}
}

@Benchmark
@OperationsPerInvocation(1000)
def scalaSetContains(bh: Blackhole): Unit = {
var i = 0L
while (i < 1000L) {
bh.consume(scalaSet.contains(i))
i += 1L
}
}

@Benchmark
def hashSetForeach(bh: Blackhole): Unit =
hashSet.foreach(bh.consume)

@Benchmark
def scalaSetForeach(bh: Blackhole): Unit =
scalaSet.foreach(bh.consume)

@Benchmark
def hashSetIterator(bh: Blackhole): Unit = {
val it = hashSet.iterator
while (it.hasNext) {
bh.consume(it.next())
}
}

@Benchmark
def scalaSetIterator(bh: Blackhole): Unit = {
val it = scalaSet.iterator
while (it.hasNext) {
bh.consume(it.next())
}
}

@Benchmark
def hashSetUnion(bh: Blackhole): Unit =
bh.consume(hashSet.union(otherHashSet))

@Benchmark
def scalaSetUnion(bh: Blackhole): Unit =
bh.consume(scalaSet | otherScalaSet)

@Benchmark
def hashSetDiff(bh: Blackhole): Unit =
bh.consume(hashSet.diff(otherHashSet))

@Benchmark
def scalaSetDiff(bh: Blackhole): Unit =
bh.consume(scalaSet -- otherScalaSet)

@Benchmark
def hashSetIntersect(bh: Blackhole): Unit =
bh.consume(hashSet.intersect(otherHashSet))

@Benchmark
def scalaSetIntersect(bh: Blackhole): Unit =
bh.consume(scalaSet & otherScalaSet)

@Benchmark
def hashSetFilter(bh: Blackhole): Unit =
bh.consume(hashSet.filter(pred))

@Benchmark
def scalaSetFilter(bh: Blackhole): Unit =
bh.consume(scalaSet.filter(pred))

@Benchmark
def hashSetFilterNot(bh: Blackhole): Unit =
bh.consume(hashSet.filterNot(pred))

@Benchmark
def scalaSetFilterNot(bh: Blackhole): Unit =
bh.consume(scalaSet.filterNot(pred))

@Benchmark
def hashSetUniversalEquals(bh: Blackhole): Unit =
bh.consume(hashSet == otherHashSet)

@Benchmark
def hashSetEqEquals(bh: Blackhole): Unit =
bh.consume(hashSet === otherHashSet)

@Benchmark
def scalaSetUniversalEquals(bh: Blackhole): Unit =
bh.consume(scalaSet == otherScalaSet)
}
Loading