Skip to content

Commit

Permalink
Merge pull request #537 from zinggAI/main
Browse files Browse the repository at this point in the history
fixing float, long similarity and hash issue
  • Loading branch information
sonalgoyal authored Mar 25, 2023
2 parents 7b4da5e + de141d3 commit e94d762
Show file tree
Hide file tree
Showing 23 changed files with 582 additions and 5 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
package zingg.common.core.feature;

import zingg.common.client.FieldDefinition;
import zingg.common.client.MatchType;
import zingg.common.core.similarity.function.FloatSimilarityFunction;


public class FloatFeature extends BaseFeature<Float> {

private static final long serialVersionUID = 1L;

public FloatFeature() {

}

public void init(FieldDefinition newParam) {
setFieldDefinition(newParam);
if (newParam.getMatchType().contains(MatchType.FUZZY)) {
addSimFunction(new FloatSimilarityFunction());
}
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
package zingg.common.core.feature;

import zingg.common.client.FieldDefinition;
import zingg.common.client.MatchType;
import zingg.common.core.similarity.function.LongSimilarityFunction;
public class LongFeature extends BaseFeature<Long> {

private static final long serialVersionUID = 1L;

public LongFeature() {

}

public void init(FieldDefinition newParam) {
setFieldDefinition(newParam);
if (newParam.getMatchType().contains(MatchType.FUZZY)) {
addSimFunction(new LongSimilarityFunction());
}
}

}
16 changes: 16 additions & 0 deletions common/core/src/main/java/zingg/common/core/hash/IdentityLong.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
package zingg.common.core.hash;


public class IdentityLong extends BaseHash<Long,Long>{

private static final long serialVersionUID = 1L;

public IdentityLong() {
setName("identityLong");
}

public Long call(Long field) {
return field;
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
package zingg.common.core.hash;

public class LessThanZeroFloat extends BaseHash<Float,Boolean>{

private static final long serialVersionUID = 1L;


public LessThanZeroFloat() {
setName("lessThanZeroFloat");
}


public Boolean call(Float field) {
Boolean r = false;
if (field != null) {
r = field < 0 ? true : false;
}
return r;
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
package zingg.common.core.hash;

public class LessThanZeroLong extends BaseHash<Long,Boolean>{
private static final long serialVersionUID = 1L;

public LessThanZeroLong() {
setName("lessThanZeroLong");
}

public Boolean call(Long field) {
Boolean r = false;
if (field != null) {
r = field < 0 ? true : false;
}
return r;
}

}
33 changes: 33 additions & 0 deletions common/core/src/main/java/zingg/common/core/hash/RangeFloat.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
package zingg.common.core.hash;

public class RangeFloat extends BaseHash<Float,Integer>{
private static final long serialVersionUID = 1L;
private int lowerLimit;
private int upperLimit;

public RangeFloat(int lower, int upper) {
setName("rangeBetween" + lower + "And" + upper + "Float");
this.lowerLimit = lower;
this.upperLimit = upper;
}


public Integer call(Float field) {
int withinRange = 0;
if (field != null && field >= lowerLimit && field < upperLimit) {
withinRange = 1;
}
return withinRange;
}


public int getLowerLimit() {
return lowerLimit;
}


public int getUpperLimit() {
return upperLimit;
}

}
33 changes: 33 additions & 0 deletions common/core/src/main/java/zingg/common/core/hash/RangeLong.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
package zingg.common.core.hash;

public class RangeLong extends BaseHash<Long,Long>{
private static final long serialVersionUID = 1L;
private long lowerLimit;
private long upperLimit;

public RangeLong(long lower, long upper) {
setName("rangeBetween" + lower + "And" + upper + "Long");
this.lowerLimit = lower;
this.upperLimit = upper;
}


public Long call(Long field) {
long withinRange = 0;
if (field != null && field >= lowerLimit && field < upperLimit) {
withinRange = 1;
}
return withinRange;
}


public long getLowerLimit() {
return lowerLimit;
}


public long getUpperLimit() {
return upperLimit;
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
package zingg.common.core.hash;

/**
* Base class for hash functions related to trimming of floats
*
*/
public class TrimLastDigitsFloat extends BaseHash<Float,Float>{
private static final long serialVersionUID = 1L;
private int numDigits;
static final int[] POWERS_OF_10 = {1, 10, 100, 1000, 10000, 100000};
public TrimLastDigitsFloat(int count) {
setName("trimLast" + count + "DigitsFloat");
this.numDigits = count;
}


public Float call(Float field) {
Float r = null;
if (field == null) {
r = field;
} else {
r = (float)(Math.floor(field / POWERS_OF_10[numDigits]));
}
return r;
}


public int getNumDigits() {
return numDigits;
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
package zingg.common.core.hash;

/**
* Base class for hash functions related to trimming of longs
*
*/
public class TrimLastDigitsLong extends BaseHash<Long,Long>{
private static final long serialVersionUID = 1L;
private int numDigits;
static final int[] POWERS_OF_10 = {1, 10, 100, 1000, 10000, 100000};
public TrimLastDigitsLong(int count) {
setName("trimLast" + count + "DigitsLong");
this.numDigits = count;
}

public Long call(Long field) {
Long r = null;
if (field == null) {
r = field;
} else {
r = field / POWERS_OF_10[numDigits];
}
return r;
}

public int getNumDigits() {
return numDigits;
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
package zingg.common.core.hash;

/**
* Base class for hash functions related to truncating of floats
*
*
*/
public class TruncateFloat extends BaseHash<Float,Float>{
private static final long serialVersionUID = 1L;
private int numDecimalPlaces;
static final int[] POWERS_OF_10 = {1, 10, 100, 1000, 10000, 100000};
public TruncateFloat(int numDecimalPlaces) {
setName("truncateFloatTo" + numDecimalPlaces + "Places");
this.numDecimalPlaces = numDecimalPlaces;
}


public Float call(Float field) {
Float r = null;
if (field == null) {
r = field;
} else {
r = (float)(Math.floor(field * POWERS_OF_10[numDecimalPlaces]) / POWERS_OF_10[numDecimalPlaces]);
}
return r;
}


public int getNumDecimalPlaces() {
return numDecimalPlaces;
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
package zingg.common.core.similarity.function;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

public class FloatSimilarityFunction extends SimFunction<Float> {
private static final long serialVersionUID = 1L;
public static final Log LOG = LogFactory
.getLog(FloatSimilarityFunction.class);

public FloatSimilarityFunction() {
super("FloatSimilarityFunction");
}

@Override
public Double call(Float first, Float second) {
if (first == null || first.isNaN()) return 1d;
if (second == null || second.isNaN()) return 1d;
//we want similarity, hence we subtract from 1 so that closer values have higher score
double score = 1 - (Math.abs(first-second))/(1.0+first + second);
LOG.debug(" DoubleSim bw " + first + " and second " + second + " is "
+ score);
return score;
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
package zingg.common.core.similarity.function;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

public class LongSimilarityFunction extends SimFunction<Long> {
private static final long serialVersionUID = 1L;
public static final Log LOG = LogFactory
.getLog(LongSimilarityFunction.class);

public LongSimilarityFunction() {
super("LongSimilarityFunction");
}

@Override
public Double call(Long first, Long second) {
double score = 0;
if (first != null && second != null) {
if (first+second != 0) score = 2.0*Math.abs(first - second)/(first + second);
LOG.debug(" LongSim bw " + first + " and second " + second + " is "
+ score);
}
return score;
}
}
Loading

0 comments on commit e94d762

Please sign in to comment.