Skip to content

Commit

Permalink
Merge pull request #119 from valenting/idna_new
Browse files Browse the repository at this point in the history
IDNA support
  • Loading branch information
SimonSapin committed Jan 18, 2016
2 parents 9d88f17 + eb3846d commit d6e1ac5
Show file tree
Hide file tree
Showing 11 changed files with 21,977 additions and 11 deletions.
2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -35,4 +35,6 @@ optional = true
[dependencies]
uuid = "0.1.17"
rustc-serialize = "0.3"
unicode-bidi = "0.2.3"
unicode-normalization = "0.1.1"
matches = "0.1"
8,190 changes: 8,190 additions & 0 deletions IdnaMappingTable.txt

Large diffs are not rendered by default.

60 changes: 60 additions & 0 deletions make_idna_table.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
# Copyright 2013-2014 Valentin Gosu.
#
# Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
# http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
# <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
# option. This file may not be copied, modified, or distributed
# except according to those terms.


# Run as: python make_idna_table.py idna_table.txt > src/idna_table.rs
# You can get the latest idna table from
# http://www.unicode.org/Public/idna/latest/IdnaMappingTable.txt

print('''\
// Copyright 2013-2014 Valentin Gosu.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
// Generated by make_idna_table.py
use idna::Mapping::*;
use idna::Range;
pub static TABLE: &'static [Range] = &[
''')

txt = open("IdnaMappingTable.txt")

def char(s):
return (unichr(int(s, 16))
.encode('utf8')
.replace('\\', '\\\\')
.replace('"', '\\"')
.replace('\0', '\\0'))

for line in txt:
# remove comments
line, _, _ = line.partition('#')
# skip empty lines
if len(line.strip()) == 0:
continue
fields = line.split(';')
if fields[0].strip() == 'D800..DFFF':
continue # Surrogates don't occur in Rust strings.
first, _, last = fields[0].strip().partition('..')
if not last:
last = first
mapping = fields[1].strip().replace('_', ' ').title().replace(' ', '')
if len(fields) > 2:
if fields[2].strip():
mapping += '("%s")' % ''.join(char(c) for c in fields[2].strip().split(' '))
elif mapping == "Deviation":
mapping += '("")'
print(" Range { from: '%s', to: '%s', mapping: %s }," % (char(first), char(last), mapping))

print("];")
14 changes: 8 additions & 6 deletions src/host.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ use std::fmt::{self, Formatter};
use std::net::{Ipv4Addr, Ipv6Addr};
use parser::{ParseResult, ParseError};
use percent_encoding::{from_hex, percent_decode};
use idna;


/// The host name of an URL.
Expand All @@ -34,8 +35,6 @@ impl Host {
///
/// Returns `Err` for an empty host, an invalid IPv6 address,
/// or a or invalid non-ASCII domain.
///
/// FIXME: Add IDNA support for non-ASCII domains.
pub fn parse(input: &str) -> ParseResult<Host> {
if input.len() == 0 {
return Err(ParseError::EmptyHost)
Expand All @@ -48,10 +47,13 @@ impl Host {
}
let decoded = percent_decode(input.as_bytes());
let domain = String::from_utf8_lossy(&decoded);
// TODO: Remove this check and use IDNA "domain to ASCII"
if !domain.is_ascii() {
return Err(ParseError::NonAsciiDomainsNotSupportedYet)
} else if domain.find(&[

let domain = match idna::domain_to_ascii(&domain) {
Ok(s) => s,
Err(_) => return Err(ParseError::InvalidDomainCharacter)
};

if domain.find(&[
'\0', '\t', '\n', '\r', ' ', '#', '%', '/', ':', '?', '@', '[', '\\', ']'
][..]).is_some() {
return Err(ParseError::InvalidDomainCharacter)
Expand Down
Loading

0 comments on commit d6e1ac5

Please sign in to comment.