Skip to content

Commit

Permalink
fix: remove not essential dependencies (#33)
Browse files Browse the repository at this point in the history
* chore: add owasp dependency-check plugin

* fix: add replacement classes for guava and commons-lang

* fix: remove guava and commons-lang dependencies

* fix: remove commons-lang3 dependency

* fix: remove commons-text dependency

* fix: fix URIs should not be hardcoded (java:S1075)

* refactor: test and algos

* fix: remove useless validation after codice generation

* test: cover missing instrunctions

* chore: gitignore

* fix: remove commons-csv dependency

* refact: fix technical debt

---------

Co-authored-by: Alessio D'Innocenti <a.dinnocenti@jdk.it>
  • Loading branch information
kamaladafrica and kamaladafrica authored Sep 18, 2023
1 parent 5419f86 commit d4361fa
Show file tree
Hide file tree
Showing 54 changed files with 1,936 additions and 327 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
.project
.settings/
.factorypath
.envrc

target/

Expand Down Expand Up @@ -32,4 +33,4 @@ hs_err_pid*
/.apt_generated_tests/

.idea/
*.iml
*.iml
39 changes: 16 additions & 23 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,11 @@
<artifactId>nexus-staging-maven-plugin</artifactId>
<version>1.6.7</version>
</plugin>
<plugin>
<groupId>org.owasp</groupId>
<artifactId>dependency-check-maven</artifactId>
<version>8.4.0</version>
</plugin>
</plugins>
</pluginManagement>
<plugins>
Expand Down Expand Up @@ -174,10 +179,18 @@
<autoReleaseAfterClose>true</autoReleaseAfterClose>
</configuration>
</plugin>


<plugin>
<groupId>org.owasp</groupId>
<artifactId>dependency-check-maven</artifactId>
<executions>
<execution>
<goals>
<goal>check</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>

</build>

<dependencies>
Expand All @@ -187,26 +200,6 @@
<version>1.18.10</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-text</artifactId>
<version>1.10.0</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
<version>3.9</version>
</dependency>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<version>32.1.1-jre</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-csv</artifactId>
<version>1.4</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
Expand Down
31 changes: 14 additions & 17 deletions src/main/java/it/kamaladafrica/codicefiscale/CodiceFiscale.java
Original file line number Diff line number Diff line change
@@ -1,14 +1,9 @@
package it.kamaladafrica.codicefiscale;

import static org.apache.commons.lang3.Validate.inclusiveBetween;
import static org.apache.commons.lang3.Validate.matchesPattern;

import java.util.Locale;
import java.util.Objects;
import java.util.regex.Pattern;

import org.apache.commons.lang3.Validate;

import it.kamaladafrica.codicefiscale.city.CityByBelfiore;
import it.kamaladafrica.codicefiscale.city.CityProvider;
import it.kamaladafrica.codicefiscale.internal.BelfiorePart;
Expand Down Expand Up @@ -96,8 +91,10 @@ public boolean isEqual(CodiceFiscale other, boolean ignoreOmocode) {

public CodiceFiscale toOmocodeLevel(int level) {
if ((level & OMOCODE_LEVEL_MASK) != 0) {
inclusiveBetween(0, OMOCODE_LEVEL_MASK, level, "invalid omocode level: 0 <= %s <= %s", level,
OMOCODE_LEVEL_MASK);
if (level < 0 || level > OMOCODE_LEVEL_MASK) {
throw new IllegalArgumentException(
String.format("invalid omocode level: 0 <= %s <= %s", level, OMOCODE_LEVEL_MASK));
}
}
DatePart datePart = getDate().toOmocodeLevel(level & OMOCODE_LEVEL_DATE_MASK);
BelfiorePart belfiorePart = getBelfiore().toOmocodeLevel(level & OMOCODE_LEVEL_BELFIORE_MASK);
Expand Down Expand Up @@ -158,13 +155,7 @@ public static CodiceFiscale of(String value, CityByBelfiore provider) {
final int level = (date.getOmocodeLevel().getLevel() << OMOCODE_LEVEL_DATE_OFFSET)
| (belfiore.getOmocodeLevel().getLevel() << OMOCODE_LEVEL_BELFIORE_OFFSET);

final CodiceFiscale result = new CodiceFiscale(person, lastname, firstname, date, belfiore, level);

Validate.isTrue(Objects.equals(result.getValue(), value), "expected %s, but found %s", value,
result.getValue());

return result;

return new CodiceFiscale(person, lastname, firstname, date, belfiore, level);
}

public static boolean isCompatible(String code, Person person) {
Expand All @@ -181,11 +172,17 @@ public static boolean isFormatValid(String value) {
}

public static String validate(String value) {
matchesPattern(value, VALIDATION_PATTERN);
if (!value.matches(VALIDATION_PATTERN)) {
throw new IllegalArgumentException(
String.format("The string %s does not match the pattern %s", value, VALIDATION_PATTERN));
}
final ControlPart control = ControlPart.of(value.substring(LASTNAME_PART_INDEX, CONTROL_PART_INDEX));
final char currentControl = value.charAt(CONTROL_PART_INDEX);
Validate.isTrue(control.isEqual(currentControl), "invalid control char: expected %s, but found %s",
control.getValue(), currentControl);

if (!control.isEqual(currentControl)) {
throw new IllegalArgumentException(String.format("invalid control char: expected %s, but found %s",
control.getValue(), currentControl));
}
return value;
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
package it.kamaladafrica.codicefiscale.city;

import java.util.function.Supplier;
import java.util.stream.Stream;

import it.kamaladafrica.codicefiscale.City;

public interface CityStreamSupplier extends Supplier<Stream<City>> {

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
package it.kamaladafrica.codicefiscale.city.algo;

import java.util.Objects;

/**
* A similarity algorithm that returns 1.0 only if left and right matches, 0.0
* otherwise.
*
* By default computation ignore the case,so "AAA" and "aaa" are equals
*/
public class ExactMatchAlgoritm implements ScoreAlgoritm<Double> {

public static final double MATCH_SCORE = 1.0;
public static final double NO_MATCH_SCORE = 0.0;

private final boolean ignoreCase;

public ExactMatchAlgoritm(boolean ignoreCase) {
this.ignoreCase = ignoreCase;
}

public ExactMatchAlgoritm() {
this(true);
}

/**
* Computes the Exact Match Similarity between two character sequences.
*
* <pre>
* sim.apply(null, null) = IllegalArgumentException
* sim.apply("foo", null) = IllegalArgumentException
* sim.apply(null, "foo") = IllegalArgumentException
* sim.apply("", "") = 1.0
* sim.apply("foo", "foo") = 1.0
* sim.apply("foo", "foo ") = 0.0
* sim.apply("", "a") = 0.0
* sim.apply("frog", "fog") = 0.0
* sim.apply("fly", "ant") = 0.0
* sim.apply("fly", "FLY") = 1.0 if ignoreCase is true, 0.0 otherwise
* sim.apply("fly", "fLy") = 1.0 if ignoreCase is true, 0.0 otherwise
* </pre>
*
* @param left the first CharSequence, must not be null
* @param right the second CharSequence, must not be null
* @return result similarity
* @throws IllegalArgumentException if either CharSequence input is {@code null}
*/
@Override
public Double apply(final CharSequence left, final CharSequence right) {
if (left == null || right == null) {
throw new IllegalArgumentException("CharSequences must not be null");
}
return toScore(areEquals(left, right, ignoreCase));
}

private static boolean areEquals(final CharSequence left, final CharSequence right, boolean ignoreCase) {
if (Objects.equals(left, right)) {
return true;
}

if (left.length() != right.length()) {
return false;
}

// Step-wise comparison
final int length = left.length();
for (int i = 0; i < length; i++) {
char lc = left.charAt(i);
char rc = right.charAt(i);
if (ignoreCase && (Character.isLowerCase(lc) != Character.isLowerCase(rc))) {
lc = Character.toLowerCase(lc);
rc = Character.toLowerCase(rc);
}
if (lc != rc) {
return false;
}
}
return true;
}

private static double toScore(boolean match) {
return match ? MATCH_SCORE : NO_MATCH_SCORE;
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
package it.kamaladafrica.codicefiscale.city.algo;

import java.util.Arrays;

import it.kamaladafrica.codicefiscale.utils.StringUtils;

/**
* A similarity algorithm indicating the percentage of matched characters
* between two character sequences.
*
* <p>
* The Jaro measure is the weighted sum of percentage of matched characters from
* each file and transposed characters. Winkler increased this measure for
* matching initial characters.
* </p>
*
* <p>
* This implementation is based on the Jaro Winkler similarity algorithm from
* <a href="http://en.wikipedia.org/wiki/Jaro%E2%80%93Winkler_distance">
* http://en.wikipedia.org/wiki/Jaro%E2%80%93Winkler_distance</a>.
* </p>
*
* <p>
* This code has been adapted from Apache Commons Lang 3.3.
* </p>
*
* @since 1.7
*/
public class JaroWinklerAlgoritm implements ScoreAlgoritm<Double> {

/**
* This method returns the Jaro-Winkler string matches, half transpositions,
* prefix array.
*
* @param first the first string to be matched
* @param second the second string to be matched
* @return mtp array containing: matches, half transpositions, and prefix
*/
protected static int[] matches(final CharSequence first, final CharSequence second) {
final CharSequence max;
final CharSequence min;
if (first.length() > second.length()) {
max = first;
min = second;
} else {
max = second;
min = first;
}
final int range = Math.max(max.length() / 2 - 1, 0);
final int[] matchIndexes = new int[min.length()];
Arrays.fill(matchIndexes, -1);
final boolean[] matchFlags = new boolean[max.length()];
int matches = 0;
for (int mi = 0; mi < min.length(); mi++) {
final char c1 = min.charAt(mi);
for (int xi = Math.max(mi - range, 0), xn = Math.min(mi + range + 1, max.length()); xi < xn; xi++) {
if (!matchFlags[xi] && c1 == max.charAt(xi)) {
matchIndexes[mi] = xi;
matchFlags[xi] = true;
matches++;
break;
}
}
}
final char[] ms1 = new char[matches];
final char[] ms2 = new char[matches];
for (int i = 0, si = 0; i < min.length(); i++) {
if (matchIndexes[i] != -1) {
ms1[si] = min.charAt(i);
si++;
}
}
for (int i = 0, si = 0; i < max.length(); i++) {
if (matchFlags[i]) {
ms2[si] = max.charAt(i);
si++;
}
}
int halfTranspositions = 0;
for (int mi = 0; mi < ms1.length; mi++) {
if (ms1[mi] != ms2[mi]) {
halfTranspositions++;
}
}
int prefix = 0;
for (int mi = 0; mi < Math.min(4, min.length()); mi++) {
if (first.charAt(mi) != second.charAt(mi)) {
break;
}
prefix++;
}
return new int[] { matches, halfTranspositions, prefix };
}

/**
* Computes the Jaro Winkler Similarity between two character sequences.
*
* <pre>
* sim.apply(null, null) = IllegalArgumentException
* sim.apply("foo", null) = IllegalArgumentException
* sim.apply(null, "foo") = IllegalArgumentException
* sim.apply("", "") = 1.0
* sim.apply("foo", "foo") = 1.0
* sim.apply("foo", "foo ") = 0.94
* sim.apply("foo", "foo ") = 0.91
* sim.apply("foo", " foo ") = 0.87
* sim.apply("foo", " foo") = 0.51
* sim.apply("", "a") = 0.0
* sim.apply("aaapppp", "") = 0.0
* sim.apply("frog", "fog") = 0.93
* sim.apply("fly", "ant") = 0.0
* sim.apply("elephant", "hippo") = 0.44
* sim.apply("hippo", "elephant") = 0.44
* sim.apply("hippo", "zzzzzzzz") = 0.0
* sim.apply("hello", "hallo") = 0.88
* sim.apply("ABC Corporation", "ABC Corp") = 0.91
* sim.apply("D N H Enterprises Inc", "D &amp; H Enterprises, Inc.") = 0.95
* sim.apply("My Gym Children's Fitness Center", "My Gym. Childrens Fitness") = 0.92
* sim.apply("PENNSYLVANIA", "PENNCISYLVNIA") = 0.88
* </pre>
*
* @param left the first CharSequence, must not be null
* @param right the second CharSequence, must not be null
* @return result similarity
* @throws IllegalArgumentException if either CharSequence input is {@code null}
*/
@Override
public Double apply(final CharSequence left, final CharSequence right) {
final double defaultScalingFactor = 0.1;

if (left == null || right == null) {
throw new IllegalArgumentException("CharSequences must not be null");
}

if (StringUtils.equals(left, right)) {
return 1d;
}

final int[] mtp = matches(left, right);
final double m = mtp[0];
if (m == 0) {
return 0d;
}
final double j = (m / left.length() + m / right.length() + (m - (double) mtp[1] / 2) / m) / 3;
return j < 0.7d ? j : j + defaultScalingFactor * mtp[2] * (1d - j);
}

}
Loading

0 comments on commit d4361fa

Please sign in to comment.