Skip to content

Commit

Permalink
Make various TDigest implementations Serializable (so that it can be …
Browse files Browse the repository at this point in the history
…used with Spark).
  • Loading branch information
rayortigas committed Jun 2, 2015
1 parent 422561e commit f4ed7af
Show file tree
Hide file tree
Showing 8 changed files with 81 additions and 9 deletions.
6 changes: 6 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,12 @@
<version>0.0.5</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
<version>3.4</version>
<scope>test</scope>
</dependency>
</dependencies>

<dependencyManagement>
Expand Down
3 changes: 2 additions & 1 deletion src/main/java/com/tdunning/math/stats/AVLGroupTree.java
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

package com.tdunning.math.stats;

import java.io.Serializable;
import java.util.AbstractCollection;
import java.util.ArrayList;
import java.util.Arrays;
Expand All @@ -26,7 +27,7 @@
/**
* A tree of t-digest centroids.
*/
final class AVLGroupTree extends AbstractCollection<Centroid> {
final class AVLGroupTree extends AbstractCollection<Centroid> implements Serializable {

/* For insertions into the tree */
private double centroid;
Expand Down
5 changes: 3 additions & 2 deletions src/main/java/com/tdunning/math/stats/ArrayDigest.java
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

package com.tdunning.math.stats;

import java.io.Serializable;
import java.nio.ByteBuffer;
import java.util.AbstractCollection;
import java.util.ArrayList;
Expand Down Expand Up @@ -755,7 +756,7 @@ protected Index computeNext() {
public final static int VERBOSE_ARRAY_DIGEST = 3;
public final static int SMALL_ARRAY_DIGEST = 4;

class Index {
class Index implements Serializable {
final int page, subPage;

private Index(int page, int subPage) {
Expand All @@ -772,7 +773,7 @@ int count() {
}
}

private static class Page {
private static class Page implements Serializable {
private final boolean recordAllData;
private final int pageSize;

Expand Down
3 changes: 2 additions & 1 deletion src/main/java/com/tdunning/math/stats/Centroid.java
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,15 @@

package com.tdunning.math.stats;

import java.io.Serializable;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.atomic.AtomicInteger;

/**
* A single centroid which represents a number of data points.
*/
public class Centroid implements Comparable<Centroid> {
public class Centroid implements Comparable<Centroid>, Serializable {
private static final AtomicInteger uniqueCount = new AtomicInteger(1);

private double centroid = 0;
Expand Down
3 changes: 2 additions & 1 deletion src/main/java/com/tdunning/math/stats/GroupTree.java
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

package com.tdunning.math.stats;

import java.io.Serializable;
import java.util.AbstractCollection;
import java.util.ArrayDeque;
import java.util.Deque;
Expand All @@ -27,7 +28,7 @@
* A tree containing TDigest.Centroid. This adds to the normal NavigableSet the
* ability to sum up the size of elements to the left of a particular group.
*/
public class GroupTree extends AbstractCollection<Centroid> {
public class GroupTree extends AbstractCollection<Centroid> implements Serializable {
private long count;
private int size;
private int depth;
Expand Down
7 changes: 4 additions & 3 deletions src/main/java/com/tdunning/math/stats/IntAVLTree.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package com.tdunning.math.stats;

import java.io.Serializable;
import java.util.Arrays;


Expand All @@ -9,7 +10,7 @@
* want to add data to the nodes, typically by using arrays and node
* identifiers as indices.
*/
abstract class IntAVLTree {
abstract class IntAVLTree implements Serializable {

/**
* We use <tt>0</tt> instead of <tt>-1</tt> so that left(NIL) works without
Expand Down Expand Up @@ -507,7 +508,7 @@ void checkBalance(int node) {
/**
* A stack of int values.
*/
private static class IntStack {
private static class IntStack implements Serializable {

private int[] stack;
private int size;
Expand Down Expand Up @@ -535,7 +536,7 @@ void push(int v) {

}

private static class NodeAllocator {
private static class NodeAllocator implements Serializable {

private int nextNode;
private final IntStack releasedNodes;
Expand Down
3 changes: 2 additions & 1 deletion src/main/java/com/tdunning/math/stats/TDigest.java
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

package com.tdunning.math.stats;

import java.io.Serializable;
import java.nio.ByteBuffer;
import java.util.Collection;

Expand All @@ -39,7 +40,7 @@
* <p/>
* g) easy to adapt for use with map-reduce
*/
public abstract class TDigest {
public abstract class TDigest implements Serializable {
/**
* Creates an ArrayDigest with default page size.
*
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.tdunning.math.stats;

import org.apache.commons.lang3.SerializationUtils;
import org.junit.*;

import java.io.ByteArrayOutputStream;

import static org.junit.Assert.assertNotNull;

/**
* Verifies that the various TDigest implementations can be serialized.
*
* Serializability is important, for example, if we want to use t-digests with Spark.
*/
public class TDigestSerializationTest {

@Test
public void testTreeDigest() {
assertSerializesAndDeserializes(new TreeDigest(100));
}

@Test
public void testMergingDigest() {
assertSerializesAndDeserializes(new MergingDigest(100));
}

@Test
public void testAVLTreeDigest() {
assertSerializesAndDeserializes(new AVLTreeDigest(100));
}

@Test
public void testArrayDigest() {
assertSerializesAndDeserializes(new ArrayDigest(32, 100));
}

protected void assertSerializesAndDeserializes(TDigest tdigest) {
assertNotNull(SerializationUtils.deserialize(SerializationUtils.serialize(tdigest)));

tdigest.add(1);
assertNotNull(SerializationUtils.deserialize(SerializationUtils.serialize(tdigest)));
}
}

0 comments on commit f4ed7af

Please sign in to comment.