From caa21a8f1b6a8930d262578eca68b34db89df734 Mon Sep 17 00:00:00 2001 From: Ben Waters Date: Mon, 26 Jun 2023 17:31:09 -0400 Subject: [PATCH 1/2] CODEC-308: change NYSIIS encoding to not remove the first character if its an A or S --- .../java/org/apache/commons/codec/language/Nysiis.java | 9 ++++++++- .../org/apache/commons/codec/language/NysiisTest.java | 3 ++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/apache/commons/codec/language/Nysiis.java b/src/main/java/org/apache/commons/codec/language/Nysiis.java index 36b4241249..57c5f9feeb 100644 --- a/src/main/java/org/apache/commons/codec/language/Nysiis.java +++ b/src/main/java/org/apache/commons/codec/language/Nysiis.java @@ -275,7 +275,8 @@ public String nysiis(String str) { // First character of key = first character of name. final StringBuilder key = new StringBuilder(str.length()); - key.append(str.charAt(0)); + final char firstChar = str.charAt(0); + key.append(firstChar); // Transcode remaining characters, incrementing by one character each time final char[] chars = str.toCharArray(); @@ -314,6 +315,12 @@ public String nysiis(String str) { if (lastChar == 'A') { key.deleteCharAt(key.length() - 1); } + + if(key.length()==0){ + // We've removed the first character of the string. Likely because it was an S or A + // We should return at least the first character + key.append(firstChar); + } } final String string = key.toString(); diff --git a/src/test/java/org/apache/commons/codec/language/NysiisTest.java b/src/test/java/org/apache/commons/codec/language/NysiisTest.java index d13525a8e0..0ae3972438 100644 --- a/src/test/java/org/apache/commons/codec/language/NysiisTest.java +++ b/src/test/java/org/apache/commons/codec/language/NysiisTest.java @@ -140,7 +140,8 @@ public void testDropBy() throws EncoderException { new String[] { "JILES", "JAL" }, // violates 6: if the last two characters are AY, remove A new String[] { "CARRAWAY", "CARY" }, // Original: CARAY - new String[] { "YAMADA", "YANAD" }); + new String[] { "YAMADA", "YANAD" }, + new String[] { "ASH", "A"}); } @Test From 4c2792997de0e761d756c5dbd32fb5a82841cc66 Mon Sep 17 00:00:00 2001 From: Gary Gregory Date: Mon, 26 Jun 2023 18:18:43 -0400 Subject: [PATCH 2/2] Fix formatting --- src/main/java/org/apache/commons/codec/language/Nysiis.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/apache/commons/codec/language/Nysiis.java b/src/main/java/org/apache/commons/codec/language/Nysiis.java index 57c5f9feeb..9ad16f9497 100644 --- a/src/main/java/org/apache/commons/codec/language/Nysiis.java +++ b/src/main/java/org/apache/commons/codec/language/Nysiis.java @@ -316,7 +316,7 @@ public String nysiis(String str) { key.deleteCharAt(key.length() - 1); } - if(key.length()==0){ + if (key.length() == 0) { // We've removed the first character of the string. Likely because it was an S or A // We should return at least the first character key.append(firstChar);