Skip to content

Commit

Permalink
Merge #48
Browse files Browse the repository at this point in the history
48: Char property macro 2.0 r=behnam

Replaces #41. See #41 for earlier discussion.

An example will show better than I can tell:

```rust
char_property! {
    /// Represents the Unicode character
    /// [*Bidi_Class*](http://www.unicode.org/reports/tr44/#Bidi_Class) property,
    /// also known as the *bidirectional character type*.
    ///
    /// * <http://www.unicode.org/reports/tr9/#Bidirectional_Character_Types>
    /// * <http://www.unicode.org/reports/tr44/#Bidi_Class_Values>
    pub enum BidiClass {
        /// Any strong left-to-right character
        ///
        /// ***General Scope***
        ///
        /// LRM, most alphabetic, syllabic, Han ideographs,
        /// non-European or non-Arabic digits, ...
        LeftToRight {
            abbr => L,
            long => Left_To_Right,
            display => "Left-to-Right",
        }

        /// Any strong right-to-left (non-Arabic-type) character
        ///
        /// ***General Scope***
        ///
        /// RLM, Hebrew alphabet, and related punctuation
        RightToLeft {
            abbr => R,
            long => Right_To_Left,
            display => "Right-to-Left",
        }

        /// Any strong right-to-left (Arabic-type) character
        ///
        /// ***General Scope***
        ///
        /// ALM, Arabic, Thaana, and Syriac alphabets,
        /// most punctuation specific to those scripts, ...
        ArabicLetter {
            abbr => AL,
            long => Arabic_Letter,
            display => "Right-to-Left Arabic",
        }
    }
}

/// Abbreviated name bindings for the `BidiClass` property
pub mod abbr_names for abbr;
/// Name bindings for the `BidiClass` property as they appear in Unicode documentation
pub mod long_names for long;
```

expands to:

```rust
/// Represents the Unicode character
/// [*Bidi_Class*](http://www.unicode.org/reports/tr44/#Bidi_Class) property,
/// also known as the *bidirectional character type*.
///
/// * <http://www.unicode.org/reports/tr9/#Bidirectional_Character_Types>
/// * <http://www.unicode.org/reports/tr44/#Bidi_Class_Values>
#[allow(bad_style)]
#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)]
pub enum BidiClass {
    /// Any strong left-to-right character
    LeftToRight,
    /// Any strong right-to-left (non-Arabic-type) character
    RightToLeft,
    /// Any strong right-to-left (Arabic-type) character
    ArabicLetter,
}
/// Abbreviated name bindings for the `BidiClass` property
#[allow(bad_style)]
pub mod abbr_names {
    pub use super::BidiClass::LeftToRight as L;
    pub use super::BidiClass::RightToLeft as R;
    pub use super::BidiClass::ArabicLetter as AL;
}
/// Name bindings for the `BidiClass` property as they appear in Unicode documentation
#[allow(bad_style)]
pub mod long_names {
    pub use super::BidiClass::LeftToRight as Left_To_Right;
    pub use super::BidiClass::RightToLeft as Right_To_Left;
    pub use super::BidiClass::ArabicLetter as Arabic_Letter;
}
#[allow(bad_style)]
#[allow(unreachable_patterns)]
impl ::std::str::FromStr for BidiClass {
    type Err = ();
    fn from_str(s: &str) -> Result<Self, Self::Err> {
        match s {
            "LeftToRight" => Ok(BidiClass::LeftToRight),
            "RightToLeft" => Ok(BidiClass::RightToLeft),
            "ArabicLetter" => Ok(BidiClass::ArabicLetter),
            "L" => Ok(BidiClass::LeftToRight),
            "R" => Ok(BidiClass::RightToLeft),
            "AL" => Ok(BidiClass::ArabicLetter),
            "Left_To_Right" => Ok(BidiClass::LeftToRight),
            "Right_To_Left" => Ok(BidiClass::RightToLeft),
            "Arabic_Letter" => Ok(BidiClass::ArabicLetter),
            _ => Err(()),
        }
    }
}
#[allow(bad_style)]
#[allow(unreachable_patterns)]
impl ::std::fmt::Display for BidiClass {
    fn fmt(&self, f: &mut ::std::fmt::Formatter) -> ::std::fmt::Result {
        match *self {
            BidiClass::LeftToRight => write!(f, "{}", "Left-to-Right"),
            BidiClass::RightToLeft => write!(f, "{}", "Right-to-Left"),
            BidiClass::ArabicLetter => write!(f, "{}", "Right-to-Left Arabic"),
            BidiClass::LeftToRight => write!(f, "{}", "Left_To_Right".replace('_', " ")),
            BidiClass::RightToLeft => write!(f, "{}", "Right_To_Left".replace('_', " ")),
            BidiClass::ArabicLetter => write!(f, "{}", "Arabic_Letter".replace('_', " ")),
            _ => {
                write!(
                    f,
                    "{}",
                    match *self {
                        BidiClass::LeftToRight => "L",
                        BidiClass::RightToLeft => "R",
                        BidiClass::ArabicLetter => "AL",
                        BidiClass::LeftToRight => "LeftToRight",
                        BidiClass::RightToLeft => "RightToLeft",
                        BidiClass::ArabicLetter => "ArabicLetter",
                    }
                )
            }
        }
    }
}
#[allow(bad_style)]
impl ::char_property::EnumeratedCharProperty for BidiClass {
    fn abbr_name(&self) -> &'static str {
        match *self {
            BidiClass::LeftToRight => "L",
            BidiClass::RightToLeft => "R",
            BidiClass::ArabicLetter => "AL",
        }
    }
    fn all_values() -> &'static [BidiClass] {
        const VALUES: &[BidiClass] = &[
            BidiClass::LeftToRight,
            BidiClass::RightToLeft,
            BidiClass::ArabicLetter,
        ];
        VALUES
    }
}
```

All three of the `abbr`, `long`, and `display` properties of the enum are optional, and have sane fallbacks: `abbr_name` and `long_name` return `None` if unspecified, and `fmt::Display` will check, in order, for `display`, `long_name`, `abbr_name`, and the variant name until it finds one to use (stringified, of course).

`FromStr` is defined, matching against any of the provided `abbr`, `long`, and variant name.

<hr />

Important notes:

- <strike>The current format uses associated consts, so it works on beta but won't work on stable until 1.20 is stable.</strike>
  - Consts have a slightly different meaning than `pub use` -- `pub use` aliases the type where `const` is a new object and if used in pattern matching is a `==` call and not a pattern match.
  - For this reason I'm actually slightly leaning towards using `pub use` even once associated consts land; they're compartmentalized (so `use Property::*` doesn't pull in 3x as many symbols as there are variants). After using the const based aliasing for a little bit, I'm inclined to like the current solution of `unic::ucd::bidi::BidiClass::*` + `unic::ucd::bidi::bidi_class::abbr_names::*`. These really should be a `pub use` and not a `const`.
  - Note that I still think `const` are the way to go for cases like `Canonical_Combining_Class`, though.
- <strike>The current syntax could easily be adapted to use modules instead of associated consts, but was written with the associated consts so we could get a feel of how it would look with them.</strike>
- The zero-or-more meta match before a enum variant conflicts with the ident match before 1.20. See rust-lang/rust#42913, rust-lang/rust#24189
- There only tests of the macro are rather thin and could be expanded.
- It's a macro, so the response when you stick stuff not matching the expected pattern is cryptic at best.
- The `CharProperty` trait is pretty much the lowest common denominator. It's a starting point, and we can iterate from there.
- How and where do we want to make `CharProperty` a externally visible trait? Currently having it in namespace is the only way to access `abbr_name` and `long_name`.
- <strike>Earlier discussion suggested putting these into `unic::utils::char_property`. Moving it would be simple, but for now it's living in the root of `unic-utils`</strike>
- <strike>The crate `unic-utils` is currently in the workspace by virtue of being a dependency of `unic`, but is not in any way visible a crate depending on `unic`.</strike>
- <strike>Documentation doesn't exist.</strike>
  • Loading branch information
bors[bot] committed Aug 10, 2017
2 parents 3c8ecc6 + 6b60633 commit b5771e4
Show file tree
Hide file tree
Showing 3 changed files with 397 additions and 0 deletions.
1 change: 1 addition & 0 deletions unic/utils/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ pub const PKG_DESCRIPTION: &'static str = env!("CARGO_PKG_DESCRIPTION");

pub mod char_property;
pub mod codepoints;
mod macros;
pub mod tables;


Expand Down
340 changes: 340 additions & 0 deletions unic/utils/src/macros.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,340 @@
/// Macro for declaring a character property.
///
/// # Syntax (Enumerated Property)
///
/// ```
/// # #[macro_use] extern crate unic_utils;
/// # fn main() {}
/// char_property! {
/// /// Zero or more attributes
/// pub enum PropertyName {
/// /// Exactly one attribute
/// RustName {
/// abbr => AbbrName,
/// long => Long_Name,
/// display => "&'static str that is a nicer presentation of the name",
/// }
///
/// /// All annotations on the variant are optional*
/// Variant2 {
/// abbr => V2, // *abbr is required for Enumerated Properties
/// }
/// }
///
/// /// Zero or more attributes
/// pub mod abbr_names for abbr;
///
/// /// Zero or more attributes
/// pub mod long_names for long;
/// }
///
/// // You must impl (Partial/Complete)CharProperty manually.
/// # impl unic_utils::char_property::PartialCharProperty for PropertyName {
/// # fn of(_: char) -> Option<Self> { None }
/// # }
/// ```
///
/// # Effect
///
/// - Implements `CharProperty` with the `abbr` and `long` presented in the appropriate method
/// - Implements `FromStr` accepting any of the rust, abbr, or long names
/// - Implements `Display` using the given string, falling back when not provided on
/// the long name, the short name, and the rust name, in that order
/// - Populates the module `abbr_names` with `pub use` bindings of variants to their abbr names
/// - Populates the module `long_names` with `pub use` bindings of variants to their long names
/// - Maintains all documentation comments and other `#[attributes]` as would be expected
/// (with some caveats, listed below)
///
/// # Limitations
///
/// Due to [rust-lang/rust/#24189](https://github.com/rust-lang/rust/issues/24189), (fixed in
/// [rust-lang/rust/#42913](https://github.com/rust-lang/rust/pull/42913), landing in 1.20),
/// exactly one attribute line must be used on each variant. On 1.20 or higher, one or more may
/// be used, and the restriction can be relaxed back the intended zero or more by replacing
/// `$(#[$variant_meta:meta])+` with `$(#[$variant_meta:meta])*`, and
/// `$(#[$variant_meta])+` with `$(#[$variant_meta])*`, and
/// `$(#[$ident_meta:meta])+` with `$(#[$ident_meta:meta])*` and
/// `$(#[$ident_meta])+` with `$(#[$ident_meta])*`, and
/// `$(#[$rest_meta:meta])+` with `$(#[$rest_meta:meta])*`, and
/// `$(#[$rest_meta])+` with `$(#[$rest_meta])*`, and
/// `$(#[$queue_meta:meta])+` with `$(#[$queue_meta:meta])*`, and
/// `$(#[$queue_meta])+` with `$(#[$queue_meta])*`
// TODO: Once adopting 1.20, fix the macro to work with zero attributes on variants (see above)
#[macro_export]
macro_rules! char_property {
(
$(#[$name_meta:meta])* pub enum $name:ident {
$( $(#[$variant_meta:meta])+ $variant:ident $tt:tt )*
}

$(#[$abbr_names_meta:meta])* pub mod $abbr_names:ident for abbr;
$(#[$long_names_meta:meta])* pub mod $long_names:ident for long;
) => {
__char_property_internal! {
$(#[$name_meta])* pub enum $name
$(#[$abbr_names_meta])* pub mod $abbr_names
$(#[$long_names_meta])* pub mod $long_names

variant [ ]
abbr [ ]
long [ ]
display [ ]

buffer [ ]
queue [ $( $(#[$variant_meta])+ $variant $tt )* ]
}
};
}

#[macro_export]
macro_rules! __char_property_internal {
// == Queue => Buffer == //
(
$(#[$name_meta:meta])* pub enum $name:ident
$(#[$abbr_names_meta:meta])* pub mod $abbr_names:ident
$(#[$long_names_meta:meta])* pub mod $long_names:ident

variant [ $( $(#[$variant_meta:meta])+ $variant:ident ; )* ]
abbr [ $( $abbr_variant:ident $abbr:ident ; )* ]
long [ $( $long_variant:ident $long:ident ; )* ]
display [ $( $display_variant:ident $display:expr ; )* ]

buffer [ ]
queue [
$(#[$ident_meta:meta])+ $ident:ident $ident_tt:tt
$( $(#[$rest_meta:meta])+ $rest:ident $rest_tt:tt )*
]
) => {
__char_property_internal! {
$(#[$name_meta])* pub enum $name
$(#[$abbr_names_meta])* pub mod $abbr_names
$(#[$long_names_meta])* pub mod $long_names

variant [
$( $(#[$variant_meta])+ $variant ; )*
$(#[$ident_meta])+ $ident ;
]
abbr [ $( $abbr_variant $abbr ; )* ]
long [ $( $long_variant $long ; )* ]
display [ $( $display_variant $display ; )* ]

buffer [ $ident $ident_tt ]
queue [ $( $(#[$rest_meta])+ $rest $rest_tt )* ]
}
};

// == Buffer -- Abbr Name == //
(
$(#[$name_meta:meta])* pub enum $name:ident
$(#[$abbr_names_meta:meta])* pub mod $abbr_names:ident
$(#[$long_names_meta:meta])* pub mod $long_names:ident

variant [ $( $(#[$variant_meta:meta])+ $variant:ident ; )* ]
abbr [ $( $abbr_variant:ident $abbr:ident ; )* ]
long [ $( $long_variant:ident $long:ident ; )* ]
display [ $( $display_variant:ident $display:expr ; )* ]

buffer [ $ident:ident {
abbr => $ident_abbr:ident ,
$( $rest:tt )*
} ]
queue [ $( $(#[$queue_meta:meta])+ $queue:ident $queue_tt:tt )* ]
) => {
__char_property_internal! {
$(#[$name_meta])* pub enum $name
$(#[$abbr_names_meta])* pub mod $abbr_names
$(#[$long_names_meta])* pub mod $long_names

variant [ $( $(#[$variant_meta])+ $variant ; )* ]
abbr [
$( $abbr_variant $abbr ; )*
$ident $ident_abbr ;
]
long [ $( $long_variant $long ; )* ]
display [ $( $display_variant $display ; )* ]

buffer [ $ident { $( $rest )* } ]
queue [ $( $(#[$queue_meta])+ $queue $queue_tt )* ]
}
};

// == Buffer -- Long Name == //
(
$(#[$name_meta:meta])* pub enum $name:ident
$(#[$abbr_names_meta:meta])* pub mod $abbr_names:ident
$(#[$long_names_meta:meta])* pub mod $long_names:ident

variant [ $( $(#[$variant_meta:meta])+ $variant:ident ; )* ]
abbr [ $( $abbr_variant:ident $abbr:ident ; )* ]
long [ $( $long_variant:ident $long:ident ; )* ]
display [ $( $display_variant:ident $display:expr ; )* ]

buffer [ $ident:ident {
long => $ident_long:ident ,
$( $rest:tt )*
} ]
queue [ $( $(#[$queue_meta:meta])+ $queue:ident $queue_tt:tt )* ]
) => {
__char_property_internal! {
$(#[$name_meta])* pub enum $name
$(#[$abbr_names_meta])* pub mod $abbr_names
$(#[$long_names_meta])* pub mod $long_names

variant [ $( $(#[$variant_meta])+ $variant ; )* ]
abbr [ $( $abbr_variant $abbr ; )* ]
long [
$( $long_variant $long ; )*
$ident $ident_long ;
]
display [ $( $display_variant $display ; )* ]

buffer [ $ident { $( $rest )* } ]
queue [ $( $(#[$queue_meta])+ $queue $queue_tt )* ]
}
};

// == Buffer -- Display //
(
$(#[$name_meta:meta])* pub enum $name:ident
$(#[$abbr_names_meta:meta])* pub mod $abbr_names:ident
$(#[$long_names_meta:meta])* pub mod $long_names:ident

variant [ $( $(#[$variant_meta:meta])+ $variant:ident ; )* ]
abbr [ $( $abbr_variant:ident $abbr:ident ; )* ]
long [ $( $long_variant:ident $long:ident ; )* ]
display [ $( $display_variant:ident $display:expr ; )* ]

buffer [ $ident:ident {
display => $ident_display:expr ,
$( $rest:tt )*
} ]
queue [ $( $(#[$queue_meta:meta])+ $queue:ident $queue_tt:tt )* ]
) => {
__char_property_internal! {
$(#[$name_meta])* pub enum $name
$(#[$abbr_names_meta])* pub mod $abbr_names
$(#[$long_names_meta])* pub mod $long_names

variant [ $( $(#[$variant_meta])+ $variant ; )* ]
abbr [ $( $abbr_variant $abbr ; )* ]
long [ $( $long_variant $long ; )* ]
display [
$( $display_variant $display ; )*
$ident $ident_display ;
]

buffer [ $ident { $( $rest )* } ]
queue [ $( $(#[$queue_meta])+ $queue $queue_tt )* ]
}
};

// == Buffer -- Empty == //
(
$(#[$name_meta:meta])* pub enum $name:ident
$(#[$abbr_names_meta:meta])* pub mod $abbr_names:ident
$(#[$long_names_meta:meta])* pub mod $long_names:ident

variant [ $( $(#[$variant_meta:meta])+ $variant:ident ; )* ]
abbr [ $( $abbr_variant:ident $abbr:ident ; )* ]
long [ $( $long_variant:ident $long:ident ; )* ]
display [ $( $display_variant:ident $display:expr ; )* ]

buffer [ $ident:ident {} ]
queue [ $( $(#[$queue_meta:meta])+ $queue:ident $queue_tt:tt )* ]
) => {
__char_property_internal! {
$(#[$name_meta])* pub enum $name
$(#[$abbr_names_meta])* pub mod $abbr_names
$(#[$long_names_meta])* pub mod $long_names

variant [ $( $(#[$variant_meta])+ $variant ; )* ]
abbr [ $( $abbr_variant $abbr ; )* ]
long [ $( $long_variant $long ; )* ]
display [ $( $display_variant $display ; )* ]

buffer [ ]
queue [ $( $(#[$queue_meta])+ $queue $queue_tt )* ]
}
};

// == Final formatting == //
(
$(#[$name_meta:meta])* pub enum $name:ident
$(#[$abbr_names_meta:meta])* pub mod $abbr_names:ident
$(#[$long_names_meta:meta])* pub mod $long_names:ident

variant [ $( $(#[$variant_meta:meta])+ $variant:ident ; )* ]
abbr [ $( $abbr_variant:ident $abbr:ident ; )* ]
long [ $( $long_variant:ident $long:ident ; )* ]
display [ $( $display_variant:ident $display:expr ; )* ]

buffer [ ]
queue [ ]
) => {
$(#[$name_meta])*
#[allow(bad_style)]
#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)]
pub enum $name {
$( $(#[$variant_meta])+ $variant, )*
}

$(#[$abbr_names_meta])*
#[allow(bad_style)]
pub mod $abbr_names {
$( pub use super::$name::$abbr_variant as $abbr; )*
}

$(#[$long_names_meta])*
#[allow(bad_style)]
pub mod $long_names {
$( pub use super::$name::$long_variant as $long; )*
}

#[allow(bad_style)]
#[allow(unreachable_patterns)]
impl ::std::str::FromStr for $name {
type Err = ();
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s {
$( stringify!($variant) => Ok($name::$variant), )*
$( stringify!($abbr) => Ok($name::$abbr_variant), )*
$( stringify!($long) => Ok($name::$long_variant), )*
_ => Err(()),
}
}
}

#[allow(bad_style)]
#[allow(unreachable_patterns)]
impl ::std::fmt::Display for $name {
fn fmt(&self, f: &mut ::std::fmt::Formatter) -> ::std::fmt::Result {
match *self {
$( $name::$display_variant => write!(f, "{}", $display), )*
$( $name::$long_variant => write!(f, "{}", stringify!($long).replace('_', " ")), )*
_ => write!(f, "{}", match *self {
$( $name::$abbr_variant => stringify!($abbr), )*
$( $name::$variant => stringify!($variant), )*
})
}
}
}

#[allow(bad_style)]
impl $crate::char_property::EnumeratedCharProperty for $name {
fn abbr_name(&self) -> &'static str {
match *self {
$( $name::$abbr_variant => stringify!($abbr), )*
// No catch all variant
// Abbr name is required on Enumerated properties
}
}

fn all_values() -> &'static [$name] {
const VALUES: &[$name] = &[
$($name::$variant,)+
];
VALUES
}
}
};
}
Loading

0 comments on commit b5771e4

Please sign in to comment.