Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

IDNA support #119

Merged
merged 12 commits into from
Jan 18, 2016
4 changes: 3 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[package]

name = "url"
version = "0.5.1"
version = "0.5.2"
authors = [ "Simon Sapin <simon.sapin@exyr.org>" ]

description = "URL library for Rust, based on the WHATWG URL Standard"
Expand Down Expand Up @@ -35,4 +35,6 @@ optional = true
[dependencies]
uuid = "0.1.17"
rustc-serialize = "0.3"
unicode-bidi = "0.2.3"
unicode-normalization = "0.1.1"
matches = "0.1"
8,190 changes: 8,190 additions & 0 deletions IdnaMappingTable.txt

Large diffs are not rendered by default.

60 changes: 60 additions & 0 deletions make_idna_table.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
# Copyright 2013-2014 Valentin Gosu.
#
# Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
# http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
# <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
# option. This file may not be copied, modified, or distributed
# except according to those terms.


# Run as: python make_idna_table.py idna_table.txt > src/idna_table.rs
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is idna_table.txt provided by Unicode? Where can it be found?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please add this URL in a code comment.

# You can get the latest idna table from
# http://www.unicode.org/Public/idna/latest/IdnaMappingTable.txt

print('''\
// Copyright 2013-2014 Valentin Gosu.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.

// Generated by make_idna_table.py

use idna::Mapping::*;
use idna::Range;

pub static TABLE: &'static [Range] = &[
''')

txt = open("IdnaMappingTable.txt")

def char(s):
return (unichr(int(s, 16))
.encode('utf8')
.replace('\\', '\\\\')
.replace('"', '\\"')
.replace('\0', '\\0'))

for line in txt:
# remove comments
line, _, _ = line.partition('#')
# skip empty lines
if len(line.strip()) == 0:
continue
fields = line.split(';')
if fields[0].strip() == 'D800..DFFF':
continue # Surrogates don't occur in Rust strings.
first, _, last = fields[0].strip().partition('..')
if not last:
last = first
mapping = fields[1].strip().replace('_', ' ').title().replace(' ', '')
if len(fields) > 2:
if fields[2].strip():
mapping += '("%s")' % ''.join(char(c) for c in fields[2].strip().split(' '))
elif mapping == "Deviation":
mapping += '("")'
print(" Range { from: '%s', to: '%s', mapping: %s }," % (char(first), char(last), mapping))

print("];")
14 changes: 8 additions & 6 deletions src/host.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ use std::fmt::{self, Formatter};
use std::net::{Ipv4Addr, Ipv6Addr};
use parser::{ParseResult, ParseError};
use percent_encoding::{from_hex, percent_decode};
use idna;


/// The host name of an URL.
Expand All @@ -34,8 +35,6 @@ impl Host {
///
/// Returns `Err` for an empty host, an invalid IPv6 address,
/// or a or invalid non-ASCII domain.
///
/// FIXME: Add IDNA support for non-ASCII domains.
pub fn parse(input: &str) -> ParseResult<Host> {
if input.len() == 0 {
return Err(ParseError::EmptyHost)
Expand All @@ -48,10 +47,13 @@ impl Host {
}
let decoded = percent_decode(input.as_bytes());
let domain = String::from_utf8_lossy(&decoded);
// TODO: Remove this check and use IDNA "domain to ASCII"
if !domain.is_ascii() {
return Err(ParseError::NonAsciiDomainsNotSupportedYet)
} else if domain.find(&[

let domain = match idna::domain_to_ascii(&domain) {
Ok(s) => s,
Err(_) => return Err(ParseError::InvalidDomainCharacter)
};

if domain.find(&[
'\0', '\t', '\n', '\r', ' ', '#', '%', '/', ':', '?', '@', '[', '\\', ']'
][..]).is_some() {
return Err(ParseError::InvalidDomainCharacter)
Expand Down
Loading