zonemaster · mattias-p · Dec 21, 2023 · Dec 21, 2023 · Dec 21, 2023
diff --git a/lib/Zonemaster/Engine/Normalization.pm b/lib/Zonemaster/Engine/Normalization.pm
@@ -35,7 +35,7 @@ Zonemaster::Engine::Normalization - utility functions for names normalization
 
 
 our @EXPORT      = qw[ normalize_name ];
-our @EXPORT_OK   = qw[ normalize_name normalize_label ];
+our @EXPORT_OK   = qw[ normalize_name normalize_label trim_space ];
 
 Readonly my $ASCII => qr/^[[:ascii:]]+$/;
 Readonly my $VALID_ASCII => qr(^[A-Za-z0-9/_-]+$);
@@ -130,12 +130,28 @@ sub normalize_label {
     return \@messages, $alabel;
 }
 
+=item trim_space($str)
+
+Trim leading and trailing whitespace.
+
+Implements the space trimming part of L<normalization document|https://github.com/zonemaster/zonemaster/blob/master/docs/specifications/tests/RequirementsAndNormalizationOfDomainNames.md>.
+
+Returns a string.
+
+=cut
+
+sub trim_space {
+    my ( $str ) = @_;
+
+    return $str =~ s/^${$WHITE_SPACES_RE}+|${WHITE_SPACES_RE}+$//gr;
+}
+
 =item normalize_name($name)
 
 Normalize a domain name.
 
-
-The normalization process is detailed in the L<normalization document|https://github.com/zonemaster/zonemaster/blob/master/docs/specifications/tests/RequirementsAndNormalizationOfDomainNames.md>.
+Implements the normalization process, except the space trimming part, described
+in L<normalization document|https://github.com/zonemaster/zonemaster/blob/master/docs/specifications/tests/RequirementsAndNormalizationOfDomainNames.md>.
 
 Returns a tuple C<($errors: ArrayRef[Zonemaster::Engine::Normalization::Error], $name: String)>.
 
@@ -148,9 +164,6 @@ sub normalize_name {
     my ( $uname ) = @_;
     my @messages;
 
-    $uname =~ s/^${$WHITE_SPACES_RE}+//;
-    $uname =~ s/${WHITE_SPACES_RE}+$//;
-
     if ( length($uname) == 0 ) {
         push @messages, Zonemaster::Engine::Normalization::Error->new(EMPTY_DOMAIN_NAME => {});
         return \@messages, undef;

diff --git a/t/normalization.t b/t/normalization.t
@@ -1,9 +1,12 @@
-use Test::More;
-use Test::Exception;
-
+#!perl
+use strict;
 use utf8;
+use warnings;
+use Test::More;
 
-BEGIN { use_ok( 'Zonemaster::Engine::Normalization' ); }
+use Test::Differences;
+use Test::Exception;
+use Zonemaster::Engine::Normalization qw( normalize_name trim_space );
 
 sub char_to_hex_esc {
     my ($char) = @_;
@@ -30,9 +33,6 @@ subtest 'Valid domains' => sub {
         '。' => '.', # Ideographic full stop
         '｡' => '.',  # Halfwidth ideographic full stop
 
-        # Trailing and leading white spaces
-        " \x{205F} example.com.  \x{0009}" => 'example.com',
-
         # Mixed dots with trailing dot
         'example。com.' => 'example.com',
         'example｡com．' => 'example.com',
@@ -83,60 +83,92 @@ subtest 'Valid domains' => sub {
         "aḍ\x{0307}a" => 'xn--aa-rub587y',
     );
 
-    while (($domain, $expected_output) = each (%input_domains)) {
+    for my $domain ( sort keys %input_domains ) {
+        my $expected_output = $input_domains{$domain};
         my $safe_domain = to_hex_esc($domain);
         subtest "Domain: '$safe_domain'" => sub {
-            my $errors, $final_domain;
+            my ( $errors, $final_domain );
             lives_ok(sub {
                 ($errors, $final_domain) = normalize_name($domain);
             }, 'correct domain should live');
-            is(scalar @{$errors}, 0, 'No error returned') or diag(@{$errors});
-            is($final_domain, $expected_output, 'Match expected domain') or diag($final_domain);
+
+            my $actual   = { domain => $final_domain,    errors => $errors };
+            my $expected = { domain => $expected_output, errors => [] };
+
+            eq_or_diff $actual, $expected;
         }
     }
 };
 
 subtest 'Bad domains' => sub {
     my %input_domains = (
         # Empty labels
-        '.。．' => 'INITIAL_DOT',
-        'example。.com.' => 'REPEATED_DOTS',
-        'example。com.｡' => 'REPEATED_DOTS',
-        '．.example｡com' => 'INITIAL_DOT',
+        '.。．'           => ['INITIAL_DOT'],
+        'example。.com.' => ['REPEATED_DOTS'],
+        'example。com.｡' => ['REPEATED_DOTS'],
+        '．.example｡com' => ['INITIAL_DOT'],
 
         # Bad ascii
-        'bad:%;!$.example.com.' => 'INVALID_ASCII',
+        'bad:%;!$.example.com.'            => ['INVALID_ASCII'],
+        " \x{205F} example.com.  \x{0009}" => ['INVALID_ASCII'],
+        '    '                             => ['INVALID_ASCII'],
 
         # Label to long
-        "a" x 64 . ".example.com" => 'LABEL_TOO_LONG',
+        "a" x 64 . ".example.com" => ['LABEL_TOO_LONG'],
         # Length too long after idn conversion (libidn fails)
-        'チョコレート' x 8 . 'a' . '.example.com' => 'INVALID_U_LABEL',
+        'チョコレート' x 8 . 'a' . '.example.com' => ['INVALID_U_LABEL'],
         # Emoji in names are invalid as per IDNA2008
-        '❤️．example．com' => 'INVALID_U_LABEL',
+        '❤️．example．com' => ['INVALID_U_LABEL'],
 
         # Domain to long
         # this is 254 characters
-        ("a" x 15 . ".") x 15 . "bc" . ".example.com" => 'DOMAIN_NAME_TOO_LONG',
+        ( "a" x 15 . "." ) x 15 . "bc" . ".example.com" => ['DOMAIN_NAME_TOO_LONG'],
 
         # Empty domain
-        '' => 'EMPTY_DOMAIN_NAME',
-        '    ' => 'EMPTY_DOMAIN_NAME',
+        '' => ['EMPTY_DOMAIN_NAME'],
 
         # Ambiguous downcasing
-        'İ.example.com' => 'AMBIGUOUS_DOWNCASING',
+        'İ.example.com' => ['AMBIGUOUS_DOWNCASING'],
     );
 
-    while (($domain, $error) = each (%input_domains)) {
-        my $safe_domain = to_hex_esc($domain);
-        subtest "Domain: '$safe_domain' ($error)" => sub {
-            my $output, $messages, $domain;
+    for my $domain ( sort keys %input_domains ) {
+        my $expected_errors = $input_domains{$domain};
+        my $safe_domain = to_hex_esc( $domain );
+        subtest "Domain: '$safe_domain'" => sub {
+            my ( $errors, $final_domain );
             lives_ok(sub {
                 ($errors, $final_domain) = normalize_name($domain);
             }, 'incorrect domain should live');
 
-            is($final_domain, undef, 'No domain returned') or diag($final_domain);
-            is($errors->[0]->tag, $error, 'Correct error is returned') or diag($errors[0]);
-            note(to_hex_esc($errors->[0]))
+            my $actual = {
+                domain => $final_domain,
+                errors => [ map { $_->tag } @$errors ]
+            };
+            my $expected = {
+                domain => undef,
+                errors => $expected_errors
+            };
+
+            eq_or_diff $actual, $expected;
+        }
+    }
+};
+
+subtest 'Trimming space' => sub {
+    my %cases = (
+        "example."                     => 'example.',
+        "exam   ."                     => 'exam   .',
+        " \x{205F} example.  \x{0009}" => 'example.',
+    );
+
+    for my $str ( sort keys %cases ) {
+        my $expected = $cases{$str};
+
+        my $safe_str = to_hex_esc($str);
+        subtest "Domain: '$safe_str'" => sub {
+            my $actual = trim_space( $str );
+
+            is $actual, $expected, 'Match expected string';
         }
     }
 };