Skip to content

Commit

Permalink
long hash functions issue zinggAI#539
Browse files Browse the repository at this point in the history
  • Loading branch information
vikasgupta78 committed Mar 20, 2023
1 parent cb992e9 commit 6218fc0
Show file tree
Hide file tree
Showing 10 changed files with 222 additions and 0 deletions.
16 changes: 16 additions & 0 deletions common/core/src/main/java/zingg/common/core/hash/IdentityLong.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
package zingg.common.core.hash;


public class IdentityLong extends BaseHash<Long,Long>{

private static final long serialVersionUID = 1L;

public IdentityLong() {
setName("identityLong");
}

public Long call(Long field) {
return field;
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
package zingg.common.core.hash;

public class LessThanZeroLong extends BaseHash<Long,Boolean>{
private static final long serialVersionUID = 1L;

public LessThanZeroLong() {
setName("lessThanZeroLong");
}

public Boolean call(Long field) {
Boolean r = false;
if (field != null) {
r = field < 0 ? true : false;
}
return r;
}

}
33 changes: 33 additions & 0 deletions common/core/src/main/java/zingg/common/core/hash/RangeLong.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
package zingg.common.core.hash;

public class RangeLong extends BaseHash<Long,Long>{
private static final long serialVersionUID = 1L;
private long lowerLimit;
private long upperLimit;

public RangeLong(long lower, long upper) {
setName("rangeBetween" + lower + "And" + upper + "Long");
this.lowerLimit = lower;
this.upperLimit = upper;
}


public Long call(Long field) {
long withinRange = 0;
if (field != null && field >= lowerLimit && field < upperLimit) {
withinRange = 1;
}
return withinRange;
}


public long getLowerLimit() {
return lowerLimit;
}


public long getUpperLimit() {
return upperLimit;
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
package zingg.common.core.hash;

/**
* Base class for hash functions related to trimming of longs
*
*/
public class TrimLastDigitsLong extends BaseHash<Long,Long>{
private static final long serialVersionUID = 1L;
private int numDigits;
static final int[] POWERS_OF_10 = {1, 10, 100, 1000, 10000, 100000};
public TrimLastDigitsLong(int count) {
setName("trimLast" + count + "DigitsLong");
this.numDigits = count;
}

public Long call(Long field) {
Long r = null;
if (field == null) {
r = field;
} else {
r = field / POWERS_OF_10[numDigits];
}
return r;
}

public int getNumDigits() {
return numDigits;
}

}
27 changes: 27 additions & 0 deletions common/core/src/main/resources/hashFunctions.json
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@
{
"name":"identityInteger"
},
{
"name":"identityLong"
},
{
"name":"truncateDoubleTo1Places"
},
Expand All @@ -54,6 +57,9 @@
{
"name":"lessThanZeroInt"
},
{
"name":"lessThanZeroLong"
},
{
"name":"trimLast1DigitsDbl"
},
Expand All @@ -72,6 +78,15 @@
{
"name":"trimLast3DigitsInt"
},
{
"name":"trimLast1DigitsLong"
},
{
"name":"trimLast2DigitsLong"
},
{
"name":"trimLast3DigitsLong"
},
{
"name":"rangeBetween0And10Dbl"
},
Expand All @@ -96,6 +111,18 @@
{
"name":"rangeBetween1000And10000Int"
},
{
"name":"rangeBetween0And10Long"
},
{
"name":"rangeBetween10And100Long"
},
{
"name":"rangeBetween100And1000Long"
},
{
"name":"rangeBetween1000And10000Long"
},
{
"name":"round"
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ public SparkHashFunctionRegistry() {
init(new SparkFirst3CharsBox());

init(new SparkIdentityInteger());

init(new SparkIdentityLong());

init(new SparkTruncateDouble(1));
init(new SparkTruncateDouble(2));
Expand All @@ -49,6 +51,8 @@ public SparkHashFunctionRegistry() {

init(new SparkLessThanZeroInt());

init(new SparkLessThanZeroLong());

init(new SparkTrimLastDigitsDbl(1));
init(new SparkTrimLastDigitsDbl(2));
init(new SparkTrimLastDigitsDbl(3));
Expand All @@ -57,6 +61,10 @@ public SparkHashFunctionRegistry() {
init(new SparkTrimLastDigitsInt(2));
init(new SparkTrimLastDigitsInt(3));

init(new SparkTrimLastDigitsLong(1));
init(new SparkTrimLastDigitsLong(2));
init(new SparkTrimLastDigitsLong(3));

init(new SparkRangeDbl(0,10));
init(new SparkRangeDbl(10,100));
init(new SparkRangeDbl(100,1000));
Expand All @@ -67,6 +75,11 @@ public SparkHashFunctionRegistry() {
init(new SparkRangeInt(100,1000));
init(new SparkRangeInt(1000,10000));

init(new SparkRangeLong(0,10));
init(new SparkRangeLong(10,100));
init(new SparkRangeLong(100,1000));
init(new SparkRangeLong(1000,10000));

init(new SparkRound());

}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
package zingg.spark.core.hash;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.spark.sql.types.DataTypes;

import zingg.common.core.hash.IdentityLong;

public class SparkIdentityLong extends SparkHashFunction<Long, Long>{

private static final long serialVersionUID = 1L;
public static final Log LOG = LogFactory.getLog(SparkIdentityLong.class);

public SparkIdentityLong() {
setBaseHash(new IdentityLong());
setDataType(DataTypes.LongType);
setReturnType(DataTypes.LongType);
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
package zingg.spark.core.hash;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.spark.sql.types.DataTypes;

import zingg.common.core.hash.LessThanZeroLong;

public class SparkLessThanZeroLong extends SparkHashFunction<Long, Boolean>{

private static final long serialVersionUID = 1L;
public static final Log LOG = LogFactory.getLog(SparkLessThanZeroLong.class);

public SparkLessThanZeroLong() {
setBaseHash(new LessThanZeroLong());
setDataType(DataTypes.LongType);
setReturnType(DataTypes.BooleanType);
}

}
20 changes: 20 additions & 0 deletions spark/core/src/main/java/zingg/spark/core/hash/SparkRangeLong.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
package zingg.spark.core.hash;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.spark.sql.types.DataTypes;

import zingg.common.core.hash.RangeLong;

public class SparkRangeLong extends SparkHashFunction<Long, Long>{

private static final long serialVersionUID = 1L;
public static final Log LOG = LogFactory.getLog(SparkRangeLong.class);

public SparkRangeLong(long lower, long upper) {
setBaseHash(new RangeLong(lower ,upper));
setDataType(DataTypes.LongType);
setReturnType(DataTypes.LongType);
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
package zingg.spark.core.hash;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.spark.sql.types.DataTypes;

import zingg.common.core.hash.TrimLastDigitsLong;

/**
* Spark specific trim function for Long
*
*
*/
public class SparkTrimLastDigitsLong extends SparkHashFunction<Long, Long>{

private static final long serialVersionUID = 1L;
public static final Log LOG = LogFactory.getLog(SparkTrimLastDigitsLong.class);

public SparkTrimLastDigitsLong(int count){
setBaseHash(new TrimLastDigitsLong(count));
setDataType(DataTypes.LongType);
setReturnType(DataTypes.LongType);
}

}

0 comments on commit 6218fc0

Please sign in to comment.