Skip to content
This repository has been archived by the owner on Jan 31, 2020. It is now read-only.

Validate Punycoded TLDs #67

Merged
merged 4 commits into from
May 12, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion src/Hostname.php
Original file line number Diff line number Diff line change
Expand Up @@ -1772,8 +1772,18 @@ public function isValid($value)
// id-prefix: alpha / digit
// ldh: alpha / digit / dash

$this->tld = $matches[1];
// Decode Punycode TLD to IDN
if (strpos($this->tld, 'xn--') === 0) {
$this->tld = $this->decodePunycode(substr($this->tld, 4));
Copy link
Member

@Maks3w Maks3w Apr 26, 2016

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Setting multibyte values in the property breaks non multibyte methods if (!in_array(strtolower($this->tld), $this->validTlds)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

TLD attribute must be ASCII for keep the backward compatibility

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But the current behaviour is to keep the UTF-8 TLD. For $hostnameValidator->isValid('тест.рф') the property $tld is рф. Tested with latest master.

Hostname::$validIdns uses UTF-8 TLDs as keys and Hostname::$validTlds holds the UTF-8 TLDs.

if ($this->tld === false) {
return false;
}
} else {
$this->tld = strtoupper($this->tld);
}

// Match TLD against known list
$this->tld = strtoupper($matches[1]);
if ($this->getTldCheck()) {
if (!in_array(strtolower($this->tld), $this->validTlds)
&& !in_array($this->tld, $this->validTlds)) {
Expand Down
61 changes: 46 additions & 15 deletions test/HostnameTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -486,24 +486,55 @@ public function testIDNIL()
}
}

public function testAdditionalUTF8TLDs()
/**
* Ensures that the validator follows expected behavior for UTF-8 and Punycoded (ACE) TLDs
*
* @dataProvider validTLDHostnames
*/
public function testValidTLDHostnames($value)
{
$validator = new Hostname(Hostname::ALLOW_ALL);
$this->assertTrue(
$this->validator->isValid($value),
sprintf(
'%s failed validation: %s',
$value,
implode("\n", $this->validator->getMessages())
)
);
}

// Check UTF-8 TLD matching
$valuesExpected = [
[true, ['test123.онлайн', 'тест.рф', 'туршилтын.мон']],
[false, ['சோதனை3.இலங்கை', 'رات.мон']]
public function validTLDHostnames()
{
// @codingStandardsIgnoreStart
return [
'ASCII label + UTF-8 TLD' => ['test123.онлайн'],
'ASCII label + Punycoded TLD' => ['test123.xn--80asehdb'],
'UTF-8 label + UTF-8 TLD (cyrillic)' => ['тест.рф'],
'Punycoded label + Punycoded TLD (cyrillic)' => ['xn--e1aybc.xn--p1ai'],
];
foreach ($valuesExpected as $element) {
foreach ($element[1] as $input) {
$this->assertEquals(
$element[0],
$validator->isValid($input),
implode("\n", $validator->getMessages()) .' - '. $input
);
}
}
// @codingStandardsIgnoreEnd
}

/**
* Ensures that the validator follows expected behavior for invalid UTF-8 and Punycoded (ACE) TLDs
*
* @dataProvider invalidTLDHostnames
*/
public function testInalidTLDHostnames($value)
{
$this->assertFalse($this->validator->isValid($value));
}

public function invalidTLDHostnames()
{
// @codingStandardsIgnoreStart
return [
'Invalid mix of UTF-8 and ASCII in label' => ['சோதனை3.இலங்கை'],
'Invalid mix of UTF-8 and ASCII in label (Punycoded)' => ['xn--3-owe4au9mpa.xn--xkc2al3hye2a'],
'Invalid use of non-cyrillic characters with cyrillic TLD' => ['رات.мон'],
'Invalid use of non-cyrillic characters with cyrillic TLD (Punycoded)' => ['xn--mgbgt.xn--l1acc'],
];
// @codingStandardsIgnoreEnd
}

public function testIDNIT()
Expand Down