-
Notifications
You must be signed in to change notification settings - Fork 122
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #537 from zinggAI/main
fixing float, long similarity and hash issue
- Loading branch information
Showing
23 changed files
with
582 additions
and
5 deletions.
There are no files selected for viewing
23 changes: 23 additions & 0 deletions
23
common/core/src/main/java/zingg/common/core/feature/FloatFeature.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
package zingg.common.core.feature; | ||
|
||
import zingg.common.client.FieldDefinition; | ||
import zingg.common.client.MatchType; | ||
import zingg.common.core.similarity.function.FloatSimilarityFunction; | ||
|
||
|
||
public class FloatFeature extends BaseFeature<Float> { | ||
|
||
private static final long serialVersionUID = 1L; | ||
|
||
public FloatFeature() { | ||
|
||
} | ||
|
||
public void init(FieldDefinition newParam) { | ||
setFieldDefinition(newParam); | ||
if (newParam.getMatchType().contains(MatchType.FUZZY)) { | ||
addSimFunction(new FloatSimilarityFunction()); | ||
} | ||
} | ||
|
||
} |
21 changes: 21 additions & 0 deletions
21
common/core/src/main/java/zingg/common/core/feature/LongFeature.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
package zingg.common.core.feature; | ||
|
||
import zingg.common.client.FieldDefinition; | ||
import zingg.common.client.MatchType; | ||
import zingg.common.core.similarity.function.LongSimilarityFunction; | ||
public class LongFeature extends BaseFeature<Long> { | ||
|
||
private static final long serialVersionUID = 1L; | ||
|
||
public LongFeature() { | ||
|
||
} | ||
|
||
public void init(FieldDefinition newParam) { | ||
setFieldDefinition(newParam); | ||
if (newParam.getMatchType().contains(MatchType.FUZZY)) { | ||
addSimFunction(new LongSimilarityFunction()); | ||
} | ||
} | ||
|
||
} |
16 changes: 16 additions & 0 deletions
16
common/core/src/main/java/zingg/common/core/hash/IdentityLong.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
package zingg.common.core.hash; | ||
|
||
|
||
public class IdentityLong extends BaseHash<Long,Long>{ | ||
|
||
private static final long serialVersionUID = 1L; | ||
|
||
public IdentityLong() { | ||
setName("identityLong"); | ||
} | ||
|
||
public Long call(Long field) { | ||
return field; | ||
} | ||
|
||
} |
21 changes: 21 additions & 0 deletions
21
common/core/src/main/java/zingg/common/core/hash/LessThanZeroFloat.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
package zingg.common.core.hash; | ||
|
||
public class LessThanZeroFloat extends BaseHash<Float,Boolean>{ | ||
|
||
private static final long serialVersionUID = 1L; | ||
|
||
|
||
public LessThanZeroFloat() { | ||
setName("lessThanZeroFloat"); | ||
} | ||
|
||
|
||
public Boolean call(Float field) { | ||
Boolean r = false; | ||
if (field != null) { | ||
r = field < 0 ? true : false; | ||
} | ||
return r; | ||
} | ||
|
||
} |
18 changes: 18 additions & 0 deletions
18
common/core/src/main/java/zingg/common/core/hash/LessThanZeroLong.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
package zingg.common.core.hash; | ||
|
||
public class LessThanZeroLong extends BaseHash<Long,Boolean>{ | ||
private static final long serialVersionUID = 1L; | ||
|
||
public LessThanZeroLong() { | ||
setName("lessThanZeroLong"); | ||
} | ||
|
||
public Boolean call(Long field) { | ||
Boolean r = false; | ||
if (field != null) { | ||
r = field < 0 ? true : false; | ||
} | ||
return r; | ||
} | ||
|
||
} |
33 changes: 33 additions & 0 deletions
33
common/core/src/main/java/zingg/common/core/hash/RangeFloat.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
package zingg.common.core.hash; | ||
|
||
public class RangeFloat extends BaseHash<Float,Integer>{ | ||
private static final long serialVersionUID = 1L; | ||
private int lowerLimit; | ||
private int upperLimit; | ||
|
||
public RangeFloat(int lower, int upper) { | ||
setName("rangeBetween" + lower + "And" + upper + "Float"); | ||
this.lowerLimit = lower; | ||
this.upperLimit = upper; | ||
} | ||
|
||
|
||
public Integer call(Float field) { | ||
int withinRange = 0; | ||
if (field != null && field >= lowerLimit && field < upperLimit) { | ||
withinRange = 1; | ||
} | ||
return withinRange; | ||
} | ||
|
||
|
||
public int getLowerLimit() { | ||
return lowerLimit; | ||
} | ||
|
||
|
||
public int getUpperLimit() { | ||
return upperLimit; | ||
} | ||
|
||
} |
33 changes: 33 additions & 0 deletions
33
common/core/src/main/java/zingg/common/core/hash/RangeLong.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
package zingg.common.core.hash; | ||
|
||
public class RangeLong extends BaseHash<Long,Long>{ | ||
private static final long serialVersionUID = 1L; | ||
private long lowerLimit; | ||
private long upperLimit; | ||
|
||
public RangeLong(long lower, long upper) { | ||
setName("rangeBetween" + lower + "And" + upper + "Long"); | ||
this.lowerLimit = lower; | ||
this.upperLimit = upper; | ||
} | ||
|
||
|
||
public Long call(Long field) { | ||
long withinRange = 0; | ||
if (field != null && field >= lowerLimit && field < upperLimit) { | ||
withinRange = 1; | ||
} | ||
return withinRange; | ||
} | ||
|
||
|
||
public long getLowerLimit() { | ||
return lowerLimit; | ||
} | ||
|
||
|
||
public long getUpperLimit() { | ||
return upperLimit; | ||
} | ||
|
||
} |
32 changes: 32 additions & 0 deletions
32
common/core/src/main/java/zingg/common/core/hash/TrimLastDigitsFloat.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
package zingg.common.core.hash; | ||
|
||
/** | ||
* Base class for hash functions related to trimming of floats | ||
* | ||
*/ | ||
public class TrimLastDigitsFloat extends BaseHash<Float,Float>{ | ||
private static final long serialVersionUID = 1L; | ||
private int numDigits; | ||
static final int[] POWERS_OF_10 = {1, 10, 100, 1000, 10000, 100000}; | ||
public TrimLastDigitsFloat(int count) { | ||
setName("trimLast" + count + "DigitsFloat"); | ||
this.numDigits = count; | ||
} | ||
|
||
|
||
public Float call(Float field) { | ||
Float r = null; | ||
if (field == null) { | ||
r = field; | ||
} else { | ||
r = (float)(Math.floor(field / POWERS_OF_10[numDigits])); | ||
} | ||
return r; | ||
} | ||
|
||
|
||
public int getNumDigits() { | ||
return numDigits; | ||
} | ||
|
||
} |
30 changes: 30 additions & 0 deletions
30
common/core/src/main/java/zingg/common/core/hash/TrimLastDigitsLong.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
package zingg.common.core.hash; | ||
|
||
/** | ||
* Base class for hash functions related to trimming of longs | ||
* | ||
*/ | ||
public class TrimLastDigitsLong extends BaseHash<Long,Long>{ | ||
private static final long serialVersionUID = 1L; | ||
private int numDigits; | ||
static final int[] POWERS_OF_10 = {1, 10, 100, 1000, 10000, 100000}; | ||
public TrimLastDigitsLong(int count) { | ||
setName("trimLast" + count + "DigitsLong"); | ||
this.numDigits = count; | ||
} | ||
|
||
public Long call(Long field) { | ||
Long r = null; | ||
if (field == null) { | ||
r = field; | ||
} else { | ||
r = field / POWERS_OF_10[numDigits]; | ||
} | ||
return r; | ||
} | ||
|
||
public int getNumDigits() { | ||
return numDigits; | ||
} | ||
|
||
} |
33 changes: 33 additions & 0 deletions
33
common/core/src/main/java/zingg/common/core/hash/TruncateFloat.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
package zingg.common.core.hash; | ||
|
||
/** | ||
* Base class for hash functions related to truncating of floats | ||
* | ||
* | ||
*/ | ||
public class TruncateFloat extends BaseHash<Float,Float>{ | ||
private static final long serialVersionUID = 1L; | ||
private int numDecimalPlaces; | ||
static final int[] POWERS_OF_10 = {1, 10, 100, 1000, 10000, 100000}; | ||
public TruncateFloat(int numDecimalPlaces) { | ||
setName("truncateFloatTo" + numDecimalPlaces + "Places"); | ||
this.numDecimalPlaces = numDecimalPlaces; | ||
} | ||
|
||
|
||
public Float call(Float field) { | ||
Float r = null; | ||
if (field == null) { | ||
r = field; | ||
} else { | ||
r = (float)(Math.floor(field * POWERS_OF_10[numDecimalPlaces]) / POWERS_OF_10[numDecimalPlaces]); | ||
} | ||
return r; | ||
} | ||
|
||
|
||
public int getNumDecimalPlaces() { | ||
return numDecimalPlaces; | ||
} | ||
|
||
} |
26 changes: 26 additions & 0 deletions
26
common/core/src/main/java/zingg/common/core/similarity/function/FloatSimilarityFunction.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
package zingg.common.core.similarity.function; | ||
|
||
import org.apache.commons.logging.Log; | ||
import org.apache.commons.logging.LogFactory; | ||
|
||
public class FloatSimilarityFunction extends SimFunction<Float> { | ||
private static final long serialVersionUID = 1L; | ||
public static final Log LOG = LogFactory | ||
.getLog(FloatSimilarityFunction.class); | ||
|
||
public FloatSimilarityFunction() { | ||
super("FloatSimilarityFunction"); | ||
} | ||
|
||
@Override | ||
public Double call(Float first, Float second) { | ||
if (first == null || first.isNaN()) return 1d; | ||
if (second == null || second.isNaN()) return 1d; | ||
//we want similarity, hence we subtract from 1 so that closer values have higher score | ||
double score = 1 - (Math.abs(first-second))/(1.0+first + second); | ||
LOG.debug(" DoubleSim bw " + first + " and second " + second + " is " | ||
+ score); | ||
return score; | ||
} | ||
|
||
} |
25 changes: 25 additions & 0 deletions
25
common/core/src/main/java/zingg/common/core/similarity/function/LongSimilarityFunction.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
package zingg.common.core.similarity.function; | ||
|
||
import org.apache.commons.logging.Log; | ||
import org.apache.commons.logging.LogFactory; | ||
|
||
public class LongSimilarityFunction extends SimFunction<Long> { | ||
private static final long serialVersionUID = 1L; | ||
public static final Log LOG = LogFactory | ||
.getLog(LongSimilarityFunction.class); | ||
|
||
public LongSimilarityFunction() { | ||
super("LongSimilarityFunction"); | ||
} | ||
|
||
@Override | ||
public Double call(Long first, Long second) { | ||
double score = 0; | ||
if (first != null && second != null) { | ||
if (first+second != 0) score = 2.0*Math.abs(first - second)/(first + second); | ||
LOG.debug(" LongSim bw " + first + " and second " + second + " is " | ||
+ score); | ||
} | ||
return score; | ||
} | ||
} |
Oops, something went wrong.