From 4cb81261b42eab3cce01400902356b0a4c97e57a Mon Sep 17 00:00:00 2001 From: Ed Shryane Date: Fri, 12 Mar 2021 15:16:29 +0100 Subject: [PATCH] Separate email address and leading/trailing space matching into separate patterns. Added tests. --- .../db/whois/common/PunycodeConversion.java | 34 +++++--- .../whois/common/PunycodeConversionTest.java | 87 ++++++++++++++++++- 2 files changed, 107 insertions(+), 14 deletions(-) diff --git a/whois-rpsl/src/main/java/net/ripe/db/whois/common/PunycodeConversion.java b/whois-rpsl/src/main/java/net/ripe/db/whois/common/PunycodeConversion.java index dc3a50900a..ee3d39a3e5 100644 --- a/whois-rpsl/src/main/java/net/ripe/db/whois/common/PunycodeConversion.java +++ b/whois-rpsl/src/main/java/net/ripe/db/whois/common/PunycodeConversion.java @@ -13,25 +13,33 @@ public class PunycodeConversion { private static final IDNEmailAddressConverter CONVERTER = new IDNEmailAddressConverter(); - private static final Pattern EMAIL_ADDRESS_PATTERN = Pattern.compile("(?m)^(abuse-mailbox|e-mail|irt-nfy|mnt-nfy|notify|ref-nfy|upd-to)(?:\\:)(\\s+)(.*)(\\n|$)"); + private static final Pattern EMAIL_ADDRESS_PATTERN = Pattern.compile("(?m)^(abuse-mailbox|e-mail|irt-nfy|mnt-nfy|notify|ref-nfy|upd-to)(?:\\:)([^#\\n]*)(.*)(\\n|$)"); + private static final Pattern WHITESPACE_PATTERN = Pattern.compile("^(\\s*)(.*?)(\\s*)$"); private PunycodeConversion() { // do not instantiate } public static String convert(final String value) { - final Matcher matcher = EMAIL_ADDRESS_PATTERN.matcher(value); - while (matcher.find()) { - final String key = matcher.group(1); - final String space = matcher.group(2); - final String address = matcher.group(3); - final String newline = matcher.group(4); - - final String convertedAddress = toAscii(address); - if (!convertedAddress.equals(address)) { - final String originalMatch = matcher.group(0); - final String convertedMatch = String.format("%s:%s%s%s", key, space, convertedAddress, newline); - return convert(value.replace(originalMatch, convertedMatch)); + final Matcher emailMatcher = EMAIL_ADDRESS_PATTERN.matcher(value); + while (emailMatcher.find()) { + final String attrKey = emailMatcher.group(1); + final String attrValue = emailMatcher.group(2); + final String attrComment = emailMatcher.group(3); + final String newline = emailMatcher.group(4); + + final Matcher whitespaceMatcher = WHITESPACE_PATTERN.matcher(attrValue); + if (whitespaceMatcher.find()) { + final String leadingSpaces = whitespaceMatcher.group(1); + final String address = whitespaceMatcher.group(2); + final String trailingSpaces = whitespaceMatcher.group(3); + + final String convertedAddress = toAscii(address); + if (!convertedAddress.equals(address)) { + final String originalMatch = emailMatcher.group(0); + final String convertedMatch = String.format("%s:%s%s%s%s%s", attrKey, leadingSpaces, convertedAddress, trailingSpaces, attrComment, newline); + return convert(value.replace(originalMatch, convertedMatch)); + } } } diff --git a/whois-rpsl/src/test/java/net/ripe/db/whois/common/PunycodeConversionTest.java b/whois-rpsl/src/test/java/net/ripe/db/whois/common/PunycodeConversionTest.java index 98a724cdcb..d8144e4479 100644 --- a/whois-rpsl/src/test/java/net/ripe/db/whois/common/PunycodeConversionTest.java +++ b/whois-rpsl/src/test/java/net/ripe/db/whois/common/PunycodeConversionTest.java @@ -2,8 +2,8 @@ import org.junit.Test; -import static org.hamcrest.Matchers.is; import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.is; public class PunycodeConversionTest { @@ -24,6 +24,74 @@ public void convert_email() { "source: TEST\n")); } + @Test + public void convert_email_trailing_spaces() { + final String value = + "role: Test Role\n" + + "nic-hdl: TR1-TEST\n" + + "e-mail: no-reply@zürich.example \n" + + "source: TEST\n"; + + final String result = PunycodeConversion.convert(value); + + assertThat(result, is( + "role: Test Role\n" + + "nic-hdl: TR1-TEST\n" + + "e-mail: no-reply@xn--zrich-kva.example \n" + + "source: TEST\n")); + } + + @Test + public void convert_email_trailing_spaces_with_comment() { + final String value = + "role: Test Role\n" + + "nic-hdl: TR1-TEST\n" + + "e-mail: no-reply@zürich.example # TODO \n" + + "source: TEST\n"; + + final String result = PunycodeConversion.convert(value); + + assertThat(result, is( + "role: Test Role\n" + + "nic-hdl: TR1-TEST\n" + + "e-mail: no-reply@xn--zrich-kva.example # TODO \n" + + "source: TEST\n")); + } + + @Test + public void convert_email_many_trailing_spaces() { + final String value = + "role: Test Role\n" + + "nic-hdl: TR1-TEST\n" + + "e-mail: no-reply@zürich.example" + " ".repeat(100) + "\n" + + "source: TEST\n"; + + final String result = PunycodeConversion.convert(value); + + assertThat(result, is( + "role: Test Role\n" + + "nic-hdl: TR1-TEST\n" + + "e-mail: no-reply@xn--zrich-kva.example" + " ".repeat(100) + "\n" + + "source: TEST\n")); + } + + @Test + public void convert_email_many_trailing_spaces_with_comment() { + final String value = + "role: Test Role\n" + + "nic-hdl: TR1-TEST\n" + + "e-mail: no-reply@zürich.example" + " ".repeat(100)+ "# TODO \n" + + "source: TEST\n"; + + final String result = PunycodeConversion.convert(value); + + assertThat(result, is( + "role: Test Role\n" + + "nic-hdl: TR1-TEST\n" + + "e-mail: no-reply@xn--zrich-kva.example" + " ".repeat(100)+ "# TODO \n" + + "source: TEST\n")); + } + @Test public void convert_email_local_part_only() { final String value = @@ -92,6 +160,23 @@ public void convert_email_name_and_address() { "source: TEST\n")); } + @Test + public void convert_email_name_and_address_and_many_spaces_and_comment() { + final String value = + "role: Test Role\n" + + "nic-hdl: TR1-TEST\n" + + "e-mail:" + " ".repeat(100) + "Example Test Usër " + " ".repeat(100) + "# comment \n" + + "source: TEST\n"; + + final String result = PunycodeConversion.convert(value); + + assertThat(result, is( + "role: Test Role\n" + + "nic-hdl: TR1-TEST\n" + + "e-mail:" + " ".repeat(100) + "Example Test Usër " + " ".repeat(100) + "# comment \n" + + "source: TEST\n")); + } + @Test public void convert_email_domain_only_not_local_part() { final String value =