From 180d9a92d92541aa415c918a2265bd6b33d39655 Mon Sep 17 00:00:00 2001 From: Hugo Duncan Date: Sat, 7 Feb 2015 21:35:28 -0500 Subject: [PATCH] feat(headers): add enum for Charset Make Charset more strongly typed. --- src/header/common/accept_charset.rs | 11 +- src/header/mod.rs | 2 +- src/header/shared/charset.rs | 150 ++++++++++++++++++++++++++++ src/header/shared/mod.rs | 2 + 4 files changed, 158 insertions(+), 7 deletions(-) create mode 100644 src/header/shared/charset.rs diff --git a/src/header/common/accept_charset.rs b/src/header/common/accept_charset.rs index 75b4fb4ad4..8ea24a0246 100644 --- a/src/header/common/accept_charset.rs +++ b/src/header/common/accept_charset.rs @@ -1,6 +1,4 @@ -use header::{self, QualityItem}; - -pub type Charset = String; +use header::{Charset, QualityItem}; /// The `Accept-Charset` header /// @@ -16,11 +14,12 @@ impl_list_header!(AcceptCharset, #[test] fn test_parse_header() { + use header::{self, q}; let a: AcceptCharset = header::Header::parse_header( - [b"iso-8859-5, unicode-1-1;q=0.8".to_vec()].as_slice()).unwrap(); + [b"iso-8859-5, iso-8859-6;q=0.8".to_vec()].as_slice()).unwrap(); let b = AcceptCharset(vec![ - QualityItem{item: "iso-8859-5".to_string(), quality: 1.0}, - QualityItem{item: "unicode-1-1".to_string(), quality: 0.8}, + QualityItem { item: Charset::Iso_8859_5, quality: q(1.0) }, + QualityItem { item: Charset::Iso_8859_6, quality: q(0.8) }, ]); assert_eq!(format!("{}", a), format!("{}", b)); assert_eq!(a, b); diff --git a/src/header/mod.rs b/src/header/mod.rs index 9b8338cae9..a9a7519077 100644 --- a/src/header/mod.rs +++ b/src/header/mod.rs @@ -22,7 +22,7 @@ use unicase::UniCase; use self::cell::OptCell; use {http, HttpResult, HttpError}; -pub use self::shared::{Encoding, EntityTag, Quality, QualityItem, qitem, q}; +pub use self::shared::{Charset, Encoding, EntityTag, Quality, QualityItem, qitem, q}; pub use self::common::*; mod cell; diff --git a/src/header/shared/charset.rs b/src/header/shared/charset.rs new file mode 100644 index 0000000000..bfe663a5de --- /dev/null +++ b/src/header/shared/charset.rs @@ -0,0 +1,150 @@ +use std::fmt::{self, Display}; +use std::str::FromStr; +use std::ascii::AsciiExt; + +use self::Charset::*; + +/// A Mime charset. +/// +/// The string representation is normalised to upper case. +/// +/// See http://www.iana.org/assignments/character-sets/character-sets.xhtml +#[derive(Clone,Debug,PartialEq)] +#[allow(non_camel_case_types)] +pub enum Charset{ + /// US ASCII + Us_Ascii, + /// ISO-8859-1 + Iso_8859_1, + /// ISO-8859-2 + Iso_8859_2, + /// ISO-8859-3 + Iso_8859_3, + /// ISO-8859-4 + Iso_8859_4, + /// ISO-8859-5 + Iso_8859_5, + /// ISO-8859-6 + Iso_8859_6, + /// ISO-8859-7 + Iso_8859_7, + /// ISO-8859-8 + Iso_8859_8, + /// ISO-8859-9 + Iso_8859_9, + /// ISO-8859-10 + Iso_8859_10, + /// Shift_JIS + Shift_Jis, + /// EUC-JP + Euc_Jp, + /// ISO-2022-KR + Iso_2022_Kr, + /// EUC-KR + Euc_Kr, + /// ISO-2022-JP + Iso_2022_Jp, + /// ISO-2022-JP-2 + Iso_2022_Jp_2, + /// ISO-8859-6-E + Iso_8859_6_E, + /// ISO-8859-6-I + Iso_8859_6_I, + /// ISO-8859-8-E + Iso_8859_8_E, + /// ISO-8859-8-I + Iso_8859_8_I, + /// GB2312 + Gb2312, + /// Big5 + Big5, + /// KOI8-R + Koi8_R, + /// An arbitrary charset specified as a string + Ext(String) +} + +impl Charset { + fn name(&self) -> &str { + match *self { + Us_Ascii => "US-ASCII", + Iso_8859_1 => "ISO-8859-1", + Iso_8859_2 => "ISO-8859-2", + Iso_8859_3 => "ISO-8859-3", + Iso_8859_4 => "ISO-8859-4", + Iso_8859_5 => "ISO-8859-5", + Iso_8859_6 => "ISO-8859-6", + Iso_8859_7 => "ISO-8859-7", + Iso_8859_8 => "ISO-8859-8", + Iso_8859_9 => "ISO-8859-9", + Iso_8859_10 => "ISO-8859-10", + Shift_Jis => "Shift-JIS", + Euc_Jp => "EUC-JP", + Iso_2022_Kr => "ISO-2022-KR", + Euc_Kr => "EUC-KR", + Iso_2022_Jp => "ISO-2022-JP", + Iso_2022_Jp_2 => "ISO-2022-JP-2", + Iso_8859_6_E => "ISO-8859-6-E", + Iso_8859_6_I => "ISO-8859-6-I", + Iso_8859_8_E => "ISO-8859-8-E", + Iso_8859_8_I => "ISO-8859-8-I", + Gb2312 => "GB2312", + Big5 => "5", + Koi8_R => "KOI8-R", + Ext(ref s) => &s + } + } +} + +impl Display for Charset { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + write!(fmt, "{}", self.name()) + } +} + +impl FromStr for Charset { + type Err = (); + fn from_str(s: &str) -> Result { + Ok(match s.to_ascii_uppercase().as_slice() { + "US-ASCII" => Us_Ascii, + "ISO-8859-1" => Iso_8859_1, + "ISO-8859-2" => Iso_8859_2, + "ISO-8859-3" => Iso_8859_3, + "ISO-8859-4" => Iso_8859_4, + "ISO-8859-5" => Iso_8859_5, + "ISO-8859-6" => Iso_8859_6, + "ISO-8859-7" => Iso_8859_7, + "ISO-8859-8" => Iso_8859_8, + "ISO-8859-9" => Iso_8859_9, + "ISO-8859-10" => Iso_8859_10, + "Shift-JIS" => Shift_Jis, + "EUC-JP" => Euc_Jp, + "ISO-2022-KR" => Iso_2022_Kr, + "EUC-KR" => Euc_Kr, + "ISO-2022-JP" => Iso_2022_Jp, + "ISO-2022-JP-2" => Iso_2022_Jp_2, + "ISO-8859-6-E" => Iso_8859_6_E, + "ISO-8859-6-I" => Iso_8859_6_I, + "ISO-8859-8-E" => Iso_8859_8_E, + "ISO-8859-8-I" => Iso_8859_8_I, + "GB2312" => Gb2312, + "5" => Big5, + "KOI8-R" => Koi8_R, + s => Ext(s.to_string()) + }) + } +} + +#[test] +fn test_parse() { + assert_eq!(Us_Ascii,"us-ascii".parse().unwrap()); + assert_eq!(Us_Ascii,"US-Ascii".parse().unwrap()); + assert_eq!(Us_Ascii,"US-ASCII".parse().unwrap()); + assert_eq!(Ext("ABCD".to_string()),"abcd".parse().unwrap()); +} + +#[test] +fn test_display() { + assert_eq!("US-ASCII", format!("{}", Us_Ascii)); + assert_eq!("ABCD", format!("{}", Ext("ABCD".to_string()))); +} diff --git a/src/header/shared/mod.rs b/src/header/shared/mod.rs index 83f1c1c525..8af5f065ef 100644 --- a/src/header/shared/mod.rs +++ b/src/header/shared/mod.rs @@ -1,7 +1,9 @@ +pub use self::charset::Charset; pub use self::encoding::Encoding; pub use self::entity::EntityTag; pub use self::quality_item::{Quality, QualityItem, qitem, q}; +mod charset; mod encoding; mod entity; mod quality_item;