From 043aca3e2c7dc7214ee3f671978b53b1b1197bf6 Mon Sep 17 00:00:00 2001 From: kwantam Date: Tue, 7 Apr 2015 18:13:39 -0400 Subject: [PATCH] add impl of UAX#29 word bounds algorithm in libunicode This patch does the following: 1. Adds three new structs in libunicode/str.rs: a. UnicodeWords: a filter on the UWordBounds iterator that yields only the "words" of a string as defined in Section 4 of Unicode Standard Annex #29 (UAX#29), http://unicode.org/reports/tr29/#Word_Boundaries b. UWordBounds: an iterator that segments a string on its word boundaries as defined in UAX#29. Note that this *only* segments the string, and does *not* drop whitespace and other non-word pieces of the text (that's what UnicodeWords does). Note that UWordBounds has both a forward and backward iterator that have total running time (that is, to segment the entire string) linear in the size of the string. It should be noted that with pathological inputs the reverse iterator could be about 2x less efficient than the forward iterator, but on reasonable inputs their costs are similar. c. UWordBoundIndices: the above iterator, but returning tuples of (offset, &str). 2. Adds three new functions in the `UnicodeStr` trait: a. words_unicode(): returns a UnicodeWords iterator. b. split_words_uax29(): returns a UWordBounds iterator. c. split_words_uax29_indices(): returns a UWordBoundIndices iterator. 3. Updates the `src/etc/unicode.py` script to generate tables necessary for running the UWordBounds iterators. 4. Adds a new script, `src/etc/unicode_gen_breaktests.py`, which processes the grapheme and word break tests published by the Unicode consortium into a format for inclusion in libcollectionstest. 5. Adds new impls in libcollections's `str` corresponding to the `UnicodeStr` functions of (2). Note that this new functionality is gated with `feature(unicode)`. 6. Adds tests in libcollectionstest to exercise this new functionality. In addition, updates the test data for the graphemes test to correspond to the output from the script of (4). (Note that at the moment this change is primarily cosmetic.) This patch does not settle the question raised by @huonw in #15628; rather, it introduces a new function alongside `words()` that follows UAX#29. In addition, it does not address the concerns that @SimonSapin raises in https://github.com/rust-lang/rfcs/pull/1054 since it leaves `words()` alone. --- src/etc/unicode.py | 57 +- src/etc/unicode_gen_breaktests.py | 197 ++++ src/libcollections/str.rs | 64 ++ src/libcollectionstest/str.rs | 1640 +++++++++++++++++++++++------ src/libunicode/lib.rs | 1 + src/libunicode/tables.rs | 523 ++++++++- src/libunicode/u_str.rs | 489 ++++++++- 7 files changed, 2597 insertions(+), 374 deletions(-) create mode 100755 src/etc/unicode_gen_breaktests.py diff --git a/src/etc/unicode.py b/src/etc/unicode.py index a8a60cc0eb9a3..407294d180fd7 100755 --- a/src/etc/unicode.py +++ b/src/etc/unicode.py @@ -15,6 +15,7 @@ # - DerivedNormalizationProps.txt # - EastAsianWidth.txt # - auxiliary/GraphemeBreakProperty.txt +# - auxiliary/WordBreakProperty.txt # - PropList.txt # - ReadMe.txt # - Scripts.txt @@ -290,11 +291,13 @@ def emit_bsearch_range_table(f): """) def emit_table(f, name, t_data, t_type = "&'static [(char, char)]", is_pub=True, - pfun=lambda x: "(%s,%s)" % (escape_char(x[0]), escape_char(x[1]))): - pub_string = "" + pfun=lambda x: "(%s,%s)" % (escape_char(x[0]), escape_char(x[1])), is_const=True): + pub_string = "const" + if not is_const: + pub_string = "let" if is_pub: - pub_string = "pub " - f.write(" %sconst %s: %s = &[\n" % (pub_string, name, t_type)) + pub_string = "pub " + pub_string + f.write(" %s %s: %s = &[\n" % (pub_string, name, t_type)) data = "" first = True for dat in t_data: @@ -375,21 +378,25 @@ def emit_conversions_module(f, lowerupper, upperlower): sorted(lowerupper.iteritems(), key=operator.itemgetter(0)), is_pub=False) f.write("}\n\n") -def emit_grapheme_module(f, grapheme_table, grapheme_cats): - f.write("""pub mod grapheme { +def emit_break_module(f, break_table, break_cats, name): + Name = name.capitalize() + f.write("""pub mod %s { use core::slice::SliceExt; - pub use self::GraphemeCat::*; + pub use self::%sCat::*; use core::result::Result::{Ok, Err}; #[allow(non_camel_case_types)] - #[derive(Clone, Copy)] - pub enum GraphemeCat { -""") - for cat in grapheme_cats + ["Any"]: - f.write(" GC_" + cat + ",\n") + #[derive(Clone, Copy, PartialEq, Eq)] + pub enum %sCat { +""" % (name, Name, Name)) + + break_cats.append("Any") + break_cats.sort() + for cat in break_cats: + f.write((" %sC_" % Name[0]) + cat + ",\n") f.write(""" } - fn bsearch_range_value_table(c: char, r: &'static [(char, char, GraphemeCat)]) -> GraphemeCat { + fn bsearch_range_value_table(c: char, r: &'static [(char, char, %sCat)]) -> %sCat { use core::cmp::Ordering::{Equal, Less, Greater}; match r.binary_search_by(|&(lo, hi, _)| { if lo <= c && c <= hi { Equal } @@ -400,19 +407,19 @@ def emit_grapheme_module(f, grapheme_table, grapheme_cats): let (_, _, cat) = r[idx]; cat } - Err(_) => GC_Any + Err(_) => %sC_Any } } - pub fn grapheme_category(c: char) -> GraphemeCat { - bsearch_range_value_table(c, grapheme_cat_table) + pub fn %s_category(c: char) -> %sCat { + bsearch_range_value_table(c, %s_cat_table) } -""") +""" % (Name, Name, Name[0], name, Name, name)) - emit_table(f, "grapheme_cat_table", grapheme_table, "&'static [(char, char, GraphemeCat)]", - pfun=lambda x: "(%s,%s,GC_%s)" % (escape_char(x[0]), escape_char(x[1]), x[2]), - is_pub=False) + emit_table(f, "%s_cat_table" % name, break_table, "&'static [(char, char, %sCat)]" % Name, + pfun=lambda x: "(%s,%s,%sC_%s)" % (escape_char(x[0]), escape_char(x[1]), Name[0], x[2]), + is_pub=False, is_const=True) f.write("}\n") def emit_charwidth_module(f, width_table): @@ -690,4 +697,12 @@ def optimize_width_table(wtable): for cat in grapheme_cats: grapheme_table.extend([(x, y, cat) for (x, y) in grapheme_cats[cat]]) grapheme_table.sort(key=lambda w: w[0]) - emit_grapheme_module(rf, grapheme_table, grapheme_cats.keys()) + emit_break_module(rf, grapheme_table, grapheme_cats.keys(), "grapheme") + rf.write("\n") + + word_cats = load_properties("auxiliary/WordBreakProperty.txt", []) + word_table = [] + for cat in word_cats: + word_table.extend([(x, y, cat) for (x, y) in word_cats[cat]]) + word_table.sort(key=lambda w: w[0]) + emit_break_module(rf, word_table, word_cats.keys(), "word") diff --git a/src/etc/unicode_gen_breaktests.py b/src/etc/unicode_gen_breaktests.py new file mode 100755 index 0000000000000..3f586740cdb2c --- /dev/null +++ b/src/etc/unicode_gen_breaktests.py @@ -0,0 +1,197 @@ +#!/usr/bin/env python +# -*- coding: utf-8 +# +# Copyright 2015 The Rust Project Developers. See the COPYRIGHT +# file at the top-level directory of this distribution and at +# http://rust-lang.org/COPYRIGHT. +# +# Licensed under the Apache License, Version 2.0 or the MIT license +# , at your +# option. This file may not be copied, modified, or distributed +# except according to those terms. + +# This script uses the following Unicode tables: +# - GraphemeBreakTest.txt +# - WordBreakTest.txt +# +# Since this should not require frequent updates, we just store this +# out-of-line and check the unicode.rs file into git. + +import unicode, re, os, fileinput + +def load_test_data(f, optsplit=[]): + outls = [] + testRe1 = re.compile("^÷\s+([^\s].*[^\s])\s+÷\s+#\s+÷\s+\[0.2\].*?([÷×].*)\s+÷\s+\[0.3\]\s*$") + + unicode.fetch(f) + data = [] + for line in fileinput.input(os.path.basename(f)): + # lines that include a test start with the ÷ character + if len(line) < 2 or line[0:2] != '÷': + continue + + m = testRe1.match(line) + if not m: + print "error: no match on line where test was expected: %s" % line + continue + + # process the characters in this test case + chars = process_split_string(m.group(1)) + # skip test case if it contains invalid characters (viz., surrogates) + if not chars: + continue + + # now process test cases + (chars, info) = process_split_info(m.group(2), chars, optsplit) + + # make sure that we have break info for each break! + assert len(chars) - 1 == len(info) + + outls.append((chars, info)) + + return outls + +def process_split_info(s, c, o): + outcs = [] + outis = [] + workcs = c.pop(0) + + # are we on a × or a ÷? + isX = False + if s[0:2] == '×': + isX = True + + # find each instance of '(÷|×) [x.y] ' + while s: + # find the currently considered rule number + sInd = s.index('[') + 1 + eInd = s.index(']') + + # if it's '× [a.b]' where 'a.b' is in o, then + # we consider it a split even though it's not + # marked as one + # if it's ÷ then it's always a split + if not isX or s[sInd:eInd] in o: + outis.append(s[sInd:eInd]) + outcs.append(workcs) + workcs = c.pop(0) + else: + workcs.extend(c.pop(0)) + + idx = 1 + while idx < len(s): + if s[idx:idx+2] == '×': + isX = True + break + if s[idx:idx+2] == '÷': + isX = False + break + idx += 1 + s = s[idx:] + + outcs.append(workcs) + return (outcs, outis) + +def process_split_string(s): + outls = [] + workls = [] + + inls = s.split() + + for i in inls: + if i == '÷' or i == '×': + outls.append(workls) + workls = [] + continue + + ival = int(i,16) + + if unicode.is_surrogate(ival): + return [] + + workls.append(ival) + + if workls: + outls.append(workls) + + return outls + +def showfun(x): + outstr = '("' + for c in x[0]: + outstr += "\\u{%x}" % c + outstr += '",&[' + xfirst = True + for xx in x[1:]: + if not xfirst: + outstr += '],&[' + xfirst = False + sfirst = True + for sp in xx: + if not sfirst: + outstr += ',' + sfirst = False + outstr += '"' + for c in sp: + outstr += "\\u{%x}" % c + outstr += '"' + outstr += '])' + return outstr + +def create_grapheme_data(): + # rules 9.1 and 9.2 are for extended graphemes only + optsplits = ['9.1','9.2'] + d = load_test_data("auxiliary/GraphemeBreakTest.txt", optsplits) + + test_same = [] + test_diff = [] + + for (c, i) in d: + allchars = [cn for s in c for cn in s] + extgraphs = [] + extwork = [] + + extwork.extend(c[0]) + for n in range(0,len(i)): + if i[n] in optsplits: + extwork.extend(c[n+1]) + else: + extgraphs.append(extwork) + extwork = [] + extwork.extend(c[n+1]) + + # these are the extended grapheme clusters + extgraphs.append(extwork) + + if extgraphs == c: + test_same.append((allchars, c)) + else: + test_diff.append((allchars, extgraphs, c)) + + stype = "&[(&str, &[&str])]" + dtype = "&[(&str, &[&str], &[&str])]" + with open("graph_tests.rs", "w") as rf: + rf.write(" // official Unicode test data\n") + rf.write(" // http://www.unicode.org/Public/UNIDATA/auxiliary/GraphemeBreakTest.txt\n") + unicode.emit_table(rf, "test_same", test_same, stype, False, showfun, False) + unicode.emit_table(rf, "test_diff", test_diff, dtype, False, showfun, False) + +def create_words_data(): + d = load_test_data("auxiliary/WordBreakTest.txt") + + test = [] + + for (c, i) in d: + allchars = [cn for s in c for cn in s] + test.append((allchars, c)) + + wtype = "&[(&str, &[&str])]" + with open("word_tests.rs", "w") as rf: + rf.write(" // official Unicode test data\n") + rf.write(" // http://www.unicode.org/Public/UNIDATA/auxiliary/WordBreakTest.txt\n") + unicode.emit_table(rf, "test_word", test, wtype, False, showfun, False) + +if __name__ == "main": + create_grapheme_data() + create_words_data() diff --git a/src/libcollections/str.rs b/src/libcollections/str.rs index 98f2933effc2c..206fffc160305 100644 --- a/src/libcollections/str.rs +++ b/src/libcollections/str.rs @@ -79,6 +79,7 @@ pub use core::str::{MatchIndices, RMatchIndices}; pub use core::str::{from_utf8, Chars, CharIndices, Bytes}; pub use core::str::{from_utf8_unchecked, ParseBoolError}; pub use unicode::str::{Words, Graphemes, GraphemeIndices}; +pub use unicode::str::{UnicodeWords, UWordBounds, UWordBoundIndices}; pub use core::str::pattern; /* @@ -1736,6 +1737,30 @@ impl str { UnicodeStr::words(&self[..]) } + /// An iterator over the words of `self`, separated on + /// [UAX#29 word boundaries](http://www.unicode.org/reports/tr29/#Word_Boundaries). + /// + /// In this function, "words" are just those substrings which, after splitting on + /// UAX#29 word boundaries, contain any alphanumeric characters. That is, the + /// substring must contain at least one character with the + /// [Alphabetic](http://unicode.org/reports/tr44/#Alphabetic) + /// property, or with + /// [General_Category=Number](http://unicode.org/reports/tr44/#General_Category_Values). + /// + /// # Example + /// # #![feature(unicode, core)] + /// let uws = "The quick (\"brown\") fox can't jump 32.3 feet, right?"; + /// let uw1 = uws.words_unicode().collect::>(); + /// let b: &[_] = &["The", "quick", "brown", "fox", "can't", "jump", "32.3", "feet", "right"]; + /// + /// assert_eq!(&uw1[..], b); + /// ``` + #[unstable(feature = "unicode", + reason = "questions remain regarding the naming of words() and words_unicode()")] + pub fn words_unicode(&self) -> UnicodeWords { + UnicodeStr::words_unicode(&self[..]) + } + /// Returns a string's displayed width in columns. /// /// Control characters have zero width. @@ -1819,4 +1844,43 @@ impl str { s.extend(self[..].chars().flat_map(|c| c.to_uppercase())); return s; } + + /// Returns an iterator over substrings of `self` separated on + /// [UAX#29 word boundaries](http://www.unicode.org/reports/tr29/#Word_Boundaries). + /// + /// The concatenation of the substrings returned by this function is just the original string. + /// + /// # Example + /// + /// ``` + /// # #![feature(unicode, core)] + /// let swu1 = "The quick (\"brown\") fox".split_words_uax29().collect::>(); + /// let b: &[_] = &["The", " ", "quick", " ", "(", "\"", "brown", "\"", ")", " ", " ", "fox"]; + /// + /// assert_eq!(&swu1[..], b); + /// ``` + #[unstable(feature = "unicode", + reason = "this functionality may only be provided by libunicode")] + pub fn split_words_uax29(&self) -> UWordBounds { + UnicodeStr::split_words_uax29(&self[..]) + } + + /// Returns an iterator over substrings of `self`, split on UAX#29 word boundaries, + /// and their offsets. See `split_words_uax29()` for more information. + /// + /// # Example + /// + /// ``` + /// # #![feature(unicode, core)] + /// let swi1 = "Brr, it's 29.3°F!".split_words_uax29_indices().collect::>(); + /// let b: &[_] = &[(0, "Brr"), (3, ","), (4, " "), (5, "it's"), (9, " "), (10, "29.3"), + /// (14, "°"), (16, "F"), (17, "!")]; + /// + /// assert_eq!(&swi1[..], b); + /// ``` + #[unstable(feature = "unicode", + reason = "this functionality may only be provided by libunicode")] + pub fn split_words_uax29_indices(&self) -> UWordBoundIndices { + UnicodeStr::split_words_uax29_indices(&self[..]) + } } diff --git a/src/libcollectionstest/str.rs b/src/libcollectionstest/str.rs index 15f15900e7830..66d0de34ead2f 100644 --- a/src/libcollectionstest/str.rs +++ b/src/libcollectionstest/str.rs @@ -944,6 +944,13 @@ fn test_words() { assert_eq!(words, ["Märy", "häd", "ä", "little", "lämb", "Little", "lämb"]) } +#[test] +fn test_words_unicode() { + let data = "\n \t(In reality, 'Märy' häd ~100,000 of \n\t'em)"; + let words_rev: Vec<&str> = data.words_unicode().rev().collect(); + assert_eq!(words_rev, ["em", "of", "100,000", "häd", "Märy", "reality", "In"]) +} + #[test] fn test_nfd_chars() { macro_rules! t { @@ -1038,344 +1045,219 @@ fn test_graphemes() { use std::iter::order; // official Unicode test data - // from http://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakTest.txt - let test_same: [(_, &[_]); 325] = [ - ("\u{20}\u{20}", &["\u{20}", "\u{20}"]), - ("\u{20}\u{308}\u{20}", &["\u{20}\u{308}", "\u{20}"]), - ("\u{20}\u{D}", &["\u{20}", "\u{D}"]), - ("\u{20}\u{308}\u{D}", &["\u{20}\u{308}", "\u{D}"]), - ("\u{20}\u{A}", &["\u{20}", "\u{A}"]), - ("\u{20}\u{308}\u{A}", &["\u{20}\u{308}", "\u{A}"]), - ("\u{20}\u{1}", &["\u{20}", "\u{1}"]), - ("\u{20}\u{308}\u{1}", &["\u{20}\u{308}", "\u{1}"]), - ("\u{20}\u{300}", &["\u{20}\u{300}"]), - ("\u{20}\u{308}\u{300}", &["\u{20}\u{308}\u{300}"]), - ("\u{20}\u{1100}", &["\u{20}", "\u{1100}"]), - ("\u{20}\u{308}\u{1100}", &["\u{20}\u{308}", "\u{1100}"]), - ("\u{20}\u{1160}", &["\u{20}", "\u{1160}"]), - ("\u{20}\u{308}\u{1160}", &["\u{20}\u{308}", "\u{1160}"]), - ("\u{20}\u{11A8}", &["\u{20}", "\u{11A8}"]), - ("\u{20}\u{308}\u{11A8}", &["\u{20}\u{308}", "\u{11A8}"]), - ("\u{20}\u{AC00}", &["\u{20}", "\u{AC00}"]), - ("\u{20}\u{308}\u{AC00}", &["\u{20}\u{308}", "\u{AC00}"]), - ("\u{20}\u{AC01}", &["\u{20}", "\u{AC01}"]), - ("\u{20}\u{308}\u{AC01}", &["\u{20}\u{308}", "\u{AC01}"]), - ("\u{20}\u{1F1E6}", &["\u{20}", "\u{1F1E6}"]), - ("\u{20}\u{308}\u{1F1E6}", &["\u{20}\u{308}", "\u{1F1E6}"]), - ("\u{20}\u{378}", &["\u{20}", "\u{378}"]), - ("\u{20}\u{308}\u{378}", &["\u{20}\u{308}", "\u{378}"]), - ("\u{D}\u{20}", &["\u{D}", "\u{20}"]), - ("\u{D}\u{308}\u{20}", &["\u{D}", "\u{308}", "\u{20}"]), - ("\u{D}\u{D}", &["\u{D}", "\u{D}"]), - ("\u{D}\u{308}\u{D}", &["\u{D}", "\u{308}", "\u{D}"]), - ("\u{D}\u{A}", &["\u{D}\u{A}"]), - ("\u{D}\u{308}\u{A}", &["\u{D}", "\u{308}", "\u{A}"]), - ("\u{D}\u{1}", &["\u{D}", "\u{1}"]), - ("\u{D}\u{308}\u{1}", &["\u{D}", "\u{308}", "\u{1}"]), - ("\u{D}\u{300}", &["\u{D}", "\u{300}"]), - ("\u{D}\u{308}\u{300}", &["\u{D}", "\u{308}\u{300}"]), - ("\u{D}\u{903}", &["\u{D}", "\u{903}"]), - ("\u{D}\u{1100}", &["\u{D}", "\u{1100}"]), - ("\u{D}\u{308}\u{1100}", &["\u{D}", "\u{308}", "\u{1100}"]), - ("\u{D}\u{1160}", &["\u{D}", "\u{1160}"]), - ("\u{D}\u{308}\u{1160}", &["\u{D}", "\u{308}", "\u{1160}"]), - ("\u{D}\u{11A8}", &["\u{D}", "\u{11A8}"]), - ("\u{D}\u{308}\u{11A8}", &["\u{D}", "\u{308}", "\u{11A8}"]), - ("\u{D}\u{AC00}", &["\u{D}", "\u{AC00}"]), - ("\u{D}\u{308}\u{AC00}", &["\u{D}", "\u{308}", "\u{AC00}"]), - ("\u{D}\u{AC01}", &["\u{D}", "\u{AC01}"]), - ("\u{D}\u{308}\u{AC01}", &["\u{D}", "\u{308}", "\u{AC01}"]), - ("\u{D}\u{1F1E6}", &["\u{D}", "\u{1F1E6}"]), - ("\u{D}\u{308}\u{1F1E6}", &["\u{D}", "\u{308}", "\u{1F1E6}"]), - ("\u{D}\u{378}", &["\u{D}", "\u{378}"]), - ("\u{D}\u{308}\u{378}", &["\u{D}", "\u{308}", "\u{378}"]), - ("\u{A}\u{20}", &["\u{A}", "\u{20}"]), - ("\u{A}\u{308}\u{20}", &["\u{A}", "\u{308}", "\u{20}"]), - ("\u{A}\u{D}", &["\u{A}", "\u{D}"]), - ("\u{A}\u{308}\u{D}", &["\u{A}", "\u{308}", "\u{D}"]), - ("\u{A}\u{A}", &["\u{A}", "\u{A}"]), - ("\u{A}\u{308}\u{A}", &["\u{A}", "\u{308}", "\u{A}"]), - ("\u{A}\u{1}", &["\u{A}", "\u{1}"]), - ("\u{A}\u{308}\u{1}", &["\u{A}", "\u{308}", "\u{1}"]), - ("\u{A}\u{300}", &["\u{A}", "\u{300}"]), - ("\u{A}\u{308}\u{300}", &["\u{A}", "\u{308}\u{300}"]), - ("\u{A}\u{903}", &["\u{A}", "\u{903}"]), - ("\u{A}\u{1100}", &["\u{A}", "\u{1100}"]), - ("\u{A}\u{308}\u{1100}", &["\u{A}", "\u{308}", "\u{1100}"]), - ("\u{A}\u{1160}", &["\u{A}", "\u{1160}"]), - ("\u{A}\u{308}\u{1160}", &["\u{A}", "\u{308}", "\u{1160}"]), - ("\u{A}\u{11A8}", &["\u{A}", "\u{11A8}"]), - ("\u{A}\u{308}\u{11A8}", &["\u{A}", "\u{308}", "\u{11A8}"]), - ("\u{A}\u{AC00}", &["\u{A}", "\u{AC00}"]), - ("\u{A}\u{308}\u{AC00}", &["\u{A}", "\u{308}", "\u{AC00}"]), - ("\u{A}\u{AC01}", &["\u{A}", "\u{AC01}"]), - ("\u{A}\u{308}\u{AC01}", &["\u{A}", "\u{308}", "\u{AC01}"]), - ("\u{A}\u{1F1E6}", &["\u{A}", "\u{1F1E6}"]), - ("\u{A}\u{308}\u{1F1E6}", &["\u{A}", "\u{308}", "\u{1F1E6}"]), - ("\u{A}\u{378}", &["\u{A}", "\u{378}"]), - ("\u{A}\u{308}\u{378}", &["\u{A}", "\u{308}", "\u{378}"]), - ("\u{1}\u{20}", &["\u{1}", "\u{20}"]), - ("\u{1}\u{308}\u{20}", &["\u{1}", "\u{308}", "\u{20}"]), - ("\u{1}\u{D}", &["\u{1}", "\u{D}"]), - ("\u{1}\u{308}\u{D}", &["\u{1}", "\u{308}", "\u{D}"]), - ("\u{1}\u{A}", &["\u{1}", "\u{A}"]), - ("\u{1}\u{308}\u{A}", &["\u{1}", "\u{308}", "\u{A}"]), - ("\u{1}\u{1}", &["\u{1}", "\u{1}"]), - ("\u{1}\u{308}\u{1}", &["\u{1}", "\u{308}", "\u{1}"]), - ("\u{1}\u{300}", &["\u{1}", "\u{300}"]), - ("\u{1}\u{308}\u{300}", &["\u{1}", "\u{308}\u{300}"]), - ("\u{1}\u{903}", &["\u{1}", "\u{903}"]), - ("\u{1}\u{1100}", &["\u{1}", "\u{1100}"]), - ("\u{1}\u{308}\u{1100}", &["\u{1}", "\u{308}", "\u{1100}"]), - ("\u{1}\u{1160}", &["\u{1}", "\u{1160}"]), - ("\u{1}\u{308}\u{1160}", &["\u{1}", "\u{308}", "\u{1160}"]), - ("\u{1}\u{11A8}", &["\u{1}", "\u{11A8}"]), - ("\u{1}\u{308}\u{11A8}", &["\u{1}", "\u{308}", "\u{11A8}"]), - ("\u{1}\u{AC00}", &["\u{1}", "\u{AC00}"]), - ("\u{1}\u{308}\u{AC00}", &["\u{1}", "\u{308}", "\u{AC00}"]), - ("\u{1}\u{AC01}", &["\u{1}", "\u{AC01}"]), - ("\u{1}\u{308}\u{AC01}", &["\u{1}", "\u{308}", "\u{AC01}"]), - ("\u{1}\u{1F1E6}", &["\u{1}", "\u{1F1E6}"]), - ("\u{1}\u{308}\u{1F1E6}", &["\u{1}", "\u{308}", "\u{1F1E6}"]), - ("\u{1}\u{378}", &["\u{1}", "\u{378}"]), - ("\u{1}\u{308}\u{378}", &["\u{1}", "\u{308}", "\u{378}"]), - ("\u{300}\u{20}", &["\u{300}", "\u{20}"]), - ("\u{300}\u{308}\u{20}", &["\u{300}\u{308}", "\u{20}"]), - ("\u{300}\u{D}", &["\u{300}", "\u{D}"]), - ("\u{300}\u{308}\u{D}", &["\u{300}\u{308}", "\u{D}"]), - ("\u{300}\u{A}", &["\u{300}", "\u{A}"]), - ("\u{300}\u{308}\u{A}", &["\u{300}\u{308}", "\u{A}"]), - ("\u{300}\u{1}", &["\u{300}", "\u{1}"]), - ("\u{300}\u{308}\u{1}", &["\u{300}\u{308}", "\u{1}"]), - ("\u{300}\u{300}", &["\u{300}\u{300}"]), - ("\u{300}\u{308}\u{300}", &["\u{300}\u{308}\u{300}"]), - ("\u{300}\u{1100}", &["\u{300}", "\u{1100}"]), - ("\u{300}\u{308}\u{1100}", &["\u{300}\u{308}", "\u{1100}"]), - ("\u{300}\u{1160}", &["\u{300}", "\u{1160}"]), - ("\u{300}\u{308}\u{1160}", &["\u{300}\u{308}", "\u{1160}"]), - ("\u{300}\u{11A8}", &["\u{300}", "\u{11A8}"]), - ("\u{300}\u{308}\u{11A8}", &["\u{300}\u{308}", "\u{11A8}"]), - ("\u{300}\u{AC00}", &["\u{300}", "\u{AC00}"]), - ("\u{300}\u{308}\u{AC00}", &["\u{300}\u{308}", "\u{AC00}"]), - ("\u{300}\u{AC01}", &["\u{300}", "\u{AC01}"]), - ("\u{300}\u{308}\u{AC01}", &["\u{300}\u{308}", "\u{AC01}"]), - ("\u{300}\u{1F1E6}", &["\u{300}", "\u{1F1E6}"]), - ("\u{300}\u{308}\u{1F1E6}", &["\u{300}\u{308}", "\u{1F1E6}"]), - ("\u{300}\u{378}", &["\u{300}", "\u{378}"]), - ("\u{300}\u{308}\u{378}", &["\u{300}\u{308}", "\u{378}"]), - ("\u{903}\u{20}", &["\u{903}", "\u{20}"]), - ("\u{903}\u{308}\u{20}", &["\u{903}\u{308}", "\u{20}"]), - ("\u{903}\u{D}", &["\u{903}", "\u{D}"]), - ("\u{903}\u{308}\u{D}", &["\u{903}\u{308}", "\u{D}"]), - ("\u{903}\u{A}", &["\u{903}", "\u{A}"]), - ("\u{903}\u{308}\u{A}", &["\u{903}\u{308}", "\u{A}"]), - ("\u{903}\u{1}", &["\u{903}", "\u{1}"]), - ("\u{903}\u{308}\u{1}", &["\u{903}\u{308}", "\u{1}"]), - ("\u{903}\u{300}", &["\u{903}\u{300}"]), - ("\u{903}\u{308}\u{300}", &["\u{903}\u{308}\u{300}"]), - ("\u{903}\u{1100}", &["\u{903}", "\u{1100}"]), - ("\u{903}\u{308}\u{1100}", &["\u{903}\u{308}", "\u{1100}"]), - ("\u{903}\u{1160}", &["\u{903}", "\u{1160}"]), - ("\u{903}\u{308}\u{1160}", &["\u{903}\u{308}", "\u{1160}"]), - ("\u{903}\u{11A8}", &["\u{903}", "\u{11A8}"]), - ("\u{903}\u{308}\u{11A8}", &["\u{903}\u{308}", "\u{11A8}"]), - ("\u{903}\u{AC00}", &["\u{903}", "\u{AC00}"]), - ("\u{903}\u{308}\u{AC00}", &["\u{903}\u{308}", "\u{AC00}"]), - ("\u{903}\u{AC01}", &["\u{903}", "\u{AC01}"]), - ("\u{903}\u{308}\u{AC01}", &["\u{903}\u{308}", "\u{AC01}"]), - ("\u{903}\u{1F1E6}", &["\u{903}", "\u{1F1E6}"]), - ("\u{903}\u{308}\u{1F1E6}", &["\u{903}\u{308}", "\u{1F1E6}"]), - ("\u{903}\u{378}", &["\u{903}", "\u{378}"]), - ("\u{903}\u{308}\u{378}", &["\u{903}\u{308}", "\u{378}"]), - ("\u{1100}\u{20}", &["\u{1100}", "\u{20}"]), - ("\u{1100}\u{308}\u{20}", &["\u{1100}\u{308}", "\u{20}"]), - ("\u{1100}\u{D}", &["\u{1100}", "\u{D}"]), - ("\u{1100}\u{308}\u{D}", &["\u{1100}\u{308}", "\u{D}"]), - ("\u{1100}\u{A}", &["\u{1100}", "\u{A}"]), - ("\u{1100}\u{308}\u{A}", &["\u{1100}\u{308}", "\u{A}"]), - ("\u{1100}\u{1}", &["\u{1100}", "\u{1}"]), - ("\u{1100}\u{308}\u{1}", &["\u{1100}\u{308}", "\u{1}"]), - ("\u{1100}\u{300}", &["\u{1100}\u{300}"]), - ("\u{1100}\u{308}\u{300}", &["\u{1100}\u{308}\u{300}"]), - ("\u{1100}\u{1100}", &["\u{1100}\u{1100}"]), - ("\u{1100}\u{308}\u{1100}", &["\u{1100}\u{308}", "\u{1100}"]), - ("\u{1100}\u{1160}", &["\u{1100}\u{1160}"]), - ("\u{1100}\u{308}\u{1160}", &["\u{1100}\u{308}", "\u{1160}"]), - ("\u{1100}\u{11A8}", &["\u{1100}", "\u{11A8}"]), - ("\u{1100}\u{308}\u{11A8}", &["\u{1100}\u{308}", "\u{11A8}"]), - ("\u{1100}\u{AC00}", &["\u{1100}\u{AC00}"]), - ("\u{1100}\u{308}\u{AC00}", &["\u{1100}\u{308}", "\u{AC00}"]), - ("\u{1100}\u{AC01}", &["\u{1100}\u{AC01}"]), - ("\u{1100}\u{308}\u{AC01}", &["\u{1100}\u{308}", "\u{AC01}"]), - ("\u{1100}\u{1F1E6}", &["\u{1100}", "\u{1F1E6}"]), - ("\u{1100}\u{308}\u{1F1E6}", &["\u{1100}\u{308}", "\u{1F1E6}"]), - ("\u{1100}\u{378}", &["\u{1100}", "\u{378}"]), - ("\u{1100}\u{308}\u{378}", &["\u{1100}\u{308}", "\u{378}"]), - ("\u{1160}\u{20}", &["\u{1160}", "\u{20}"]), - ("\u{1160}\u{308}\u{20}", &["\u{1160}\u{308}", "\u{20}"]), - ("\u{1160}\u{D}", &["\u{1160}", "\u{D}"]), - ("\u{1160}\u{308}\u{D}", &["\u{1160}\u{308}", "\u{D}"]), - ("\u{1160}\u{A}", &["\u{1160}", "\u{A}"]), - ("\u{1160}\u{308}\u{A}", &["\u{1160}\u{308}", "\u{A}"]), - ("\u{1160}\u{1}", &["\u{1160}", "\u{1}"]), - ("\u{1160}\u{308}\u{1}", &["\u{1160}\u{308}", "\u{1}"]), - ("\u{1160}\u{300}", &["\u{1160}\u{300}"]), - ("\u{1160}\u{308}\u{300}", &["\u{1160}\u{308}\u{300}"]), - ("\u{1160}\u{1100}", &["\u{1160}", "\u{1100}"]), - ("\u{1160}\u{308}\u{1100}", &["\u{1160}\u{308}", "\u{1100}"]), - ("\u{1160}\u{1160}", &["\u{1160}\u{1160}"]), - ("\u{1160}\u{308}\u{1160}", &["\u{1160}\u{308}", "\u{1160}"]), - ("\u{1160}\u{11A8}", &["\u{1160}\u{11A8}"]), - ("\u{1160}\u{308}\u{11A8}", &["\u{1160}\u{308}", "\u{11A8}"]), - ("\u{1160}\u{AC00}", &["\u{1160}", "\u{AC00}"]), - ("\u{1160}\u{308}\u{AC00}", &["\u{1160}\u{308}", "\u{AC00}"]), - ("\u{1160}\u{AC01}", &["\u{1160}", "\u{AC01}"]), - ("\u{1160}\u{308}\u{AC01}", &["\u{1160}\u{308}", "\u{AC01}"]), - ("\u{1160}\u{1F1E6}", &["\u{1160}", "\u{1F1E6}"]), - ("\u{1160}\u{308}\u{1F1E6}", &["\u{1160}\u{308}", "\u{1F1E6}"]), - ("\u{1160}\u{378}", &["\u{1160}", "\u{378}"]), - ("\u{1160}\u{308}\u{378}", &["\u{1160}\u{308}", "\u{378}"]), - ("\u{11A8}\u{20}", &["\u{11A8}", "\u{20}"]), - ("\u{11A8}\u{308}\u{20}", &["\u{11A8}\u{308}", "\u{20}"]), - ("\u{11A8}\u{D}", &["\u{11A8}", "\u{D}"]), - ("\u{11A8}\u{308}\u{D}", &["\u{11A8}\u{308}", "\u{D}"]), - ("\u{11A8}\u{A}", &["\u{11A8}", "\u{A}"]), - ("\u{11A8}\u{308}\u{A}", &["\u{11A8}\u{308}", "\u{A}"]), - ("\u{11A8}\u{1}", &["\u{11A8}", "\u{1}"]), - ("\u{11A8}\u{308}\u{1}", &["\u{11A8}\u{308}", "\u{1}"]), - ("\u{11A8}\u{300}", &["\u{11A8}\u{300}"]), - ("\u{11A8}\u{308}\u{300}", &["\u{11A8}\u{308}\u{300}"]), - ("\u{11A8}\u{1100}", &["\u{11A8}", "\u{1100}"]), - ("\u{11A8}\u{308}\u{1100}", &["\u{11A8}\u{308}", "\u{1100}"]), - ("\u{11A8}\u{1160}", &["\u{11A8}", "\u{1160}"]), - ("\u{11A8}\u{308}\u{1160}", &["\u{11A8}\u{308}", "\u{1160}"]), - ("\u{11A8}\u{11A8}", &["\u{11A8}\u{11A8}"]), - ("\u{11A8}\u{308}\u{11A8}", &["\u{11A8}\u{308}", "\u{11A8}"]), - ("\u{11A8}\u{AC00}", &["\u{11A8}", "\u{AC00}"]), - ("\u{11A8}\u{308}\u{AC00}", &["\u{11A8}\u{308}", "\u{AC00}"]), - ("\u{11A8}\u{AC01}", &["\u{11A8}", "\u{AC01}"]), - ("\u{11A8}\u{308}\u{AC01}", &["\u{11A8}\u{308}", "\u{AC01}"]), - ("\u{11A8}\u{1F1E6}", &["\u{11A8}", "\u{1F1E6}"]), - ("\u{11A8}\u{308}\u{1F1E6}", &["\u{11A8}\u{308}", "\u{1F1E6}"]), - ("\u{11A8}\u{378}", &["\u{11A8}", "\u{378}"]), - ("\u{11A8}\u{308}\u{378}", &["\u{11A8}\u{308}", "\u{378}"]), - ("\u{AC00}\u{20}", &["\u{AC00}", "\u{20}"]), - ("\u{AC00}\u{308}\u{20}", &["\u{AC00}\u{308}", "\u{20}"]), - ("\u{AC00}\u{D}", &["\u{AC00}", "\u{D}"]), - ("\u{AC00}\u{308}\u{D}", &["\u{AC00}\u{308}", "\u{D}"]), - ("\u{AC00}\u{A}", &["\u{AC00}", "\u{A}"]), - ("\u{AC00}\u{308}\u{A}", &["\u{AC00}\u{308}", "\u{A}"]), - ("\u{AC00}\u{1}", &["\u{AC00}", "\u{1}"]), - ("\u{AC00}\u{308}\u{1}", &["\u{AC00}\u{308}", "\u{1}"]), - ("\u{AC00}\u{300}", &["\u{AC00}\u{300}"]), - ("\u{AC00}\u{308}\u{300}", &["\u{AC00}\u{308}\u{300}"]), - ("\u{AC00}\u{1100}", &["\u{AC00}", "\u{1100}"]), - ("\u{AC00}\u{308}\u{1100}", &["\u{AC00}\u{308}", "\u{1100}"]), - ("\u{AC00}\u{1160}", &["\u{AC00}\u{1160}"]), - ("\u{AC00}\u{308}\u{1160}", &["\u{AC00}\u{308}", "\u{1160}"]), - ("\u{AC00}\u{11A8}", &["\u{AC00}\u{11A8}"]), - ("\u{AC00}\u{308}\u{11A8}", &["\u{AC00}\u{308}", "\u{11A8}"]), - ("\u{AC00}\u{AC00}", &["\u{AC00}", "\u{AC00}"]), - ("\u{AC00}\u{308}\u{AC00}", &["\u{AC00}\u{308}", "\u{AC00}"]), - ("\u{AC00}\u{AC01}", &["\u{AC00}", "\u{AC01}"]), - ("\u{AC00}\u{308}\u{AC01}", &["\u{AC00}\u{308}", "\u{AC01}"]), - ("\u{AC00}\u{1F1E6}", &["\u{AC00}", "\u{1F1E6}"]), - ("\u{AC00}\u{308}\u{1F1E6}", &["\u{AC00}\u{308}", "\u{1F1E6}"]), - ("\u{AC00}\u{378}", &["\u{AC00}", "\u{378}"]), - ("\u{AC00}\u{308}\u{378}", &["\u{AC00}\u{308}", "\u{378}"]), - ("\u{AC01}\u{20}", &["\u{AC01}", "\u{20}"]), - ("\u{AC01}\u{308}\u{20}", &["\u{AC01}\u{308}", "\u{20}"]), - ("\u{AC01}\u{D}", &["\u{AC01}", "\u{D}"]), - ("\u{AC01}\u{308}\u{D}", &["\u{AC01}\u{308}", "\u{D}"]), - ("\u{AC01}\u{A}", &["\u{AC01}", "\u{A}"]), - ("\u{AC01}\u{308}\u{A}", &["\u{AC01}\u{308}", "\u{A}"]), - ("\u{AC01}\u{1}", &["\u{AC01}", "\u{1}"]), - ("\u{AC01}\u{308}\u{1}", &["\u{AC01}\u{308}", "\u{1}"]), - ("\u{AC01}\u{300}", &["\u{AC01}\u{300}"]), - ("\u{AC01}\u{308}\u{300}", &["\u{AC01}\u{308}\u{300}"]), - ("\u{AC01}\u{1100}", &["\u{AC01}", "\u{1100}"]), - ("\u{AC01}\u{308}\u{1100}", &["\u{AC01}\u{308}", "\u{1100}"]), - ("\u{AC01}\u{1160}", &["\u{AC01}", "\u{1160}"]), - ("\u{AC01}\u{308}\u{1160}", &["\u{AC01}\u{308}", "\u{1160}"]), - ("\u{AC01}\u{11A8}", &["\u{AC01}\u{11A8}"]), - ("\u{AC01}\u{308}\u{11A8}", &["\u{AC01}\u{308}", "\u{11A8}"]), - ("\u{AC01}\u{AC00}", &["\u{AC01}", "\u{AC00}"]), - ("\u{AC01}\u{308}\u{AC00}", &["\u{AC01}\u{308}", "\u{AC00}"]), - ("\u{AC01}\u{AC01}", &["\u{AC01}", "\u{AC01}"]), - ("\u{AC01}\u{308}\u{AC01}", &["\u{AC01}\u{308}", "\u{AC01}"]), - ("\u{AC01}\u{1F1E6}", &["\u{AC01}", "\u{1F1E6}"]), - ("\u{AC01}\u{308}\u{1F1E6}", &["\u{AC01}\u{308}", "\u{1F1E6}"]), - ("\u{AC01}\u{378}", &["\u{AC01}", "\u{378}"]), - ("\u{AC01}\u{308}\u{378}", &["\u{AC01}\u{308}", "\u{378}"]), - ("\u{1F1E6}\u{20}", &["\u{1F1E6}", "\u{20}"]), - ("\u{1F1E6}\u{308}\u{20}", &["\u{1F1E6}\u{308}", "\u{20}"]), - ("\u{1F1E6}\u{D}", &["\u{1F1E6}", "\u{D}"]), - ("\u{1F1E6}\u{308}\u{D}", &["\u{1F1E6}\u{308}", "\u{D}"]), - ("\u{1F1E6}\u{A}", &["\u{1F1E6}", "\u{A}"]), - ("\u{1F1E6}\u{308}\u{A}", &["\u{1F1E6}\u{308}", "\u{A}"]), - ("\u{1F1E6}\u{1}", &["\u{1F1E6}", "\u{1}"]), - ("\u{1F1E6}\u{308}\u{1}", &["\u{1F1E6}\u{308}", "\u{1}"]), - ("\u{1F1E6}\u{300}", &["\u{1F1E6}\u{300}"]), - ("\u{1F1E6}\u{308}\u{300}", &["\u{1F1E6}\u{308}\u{300}"]), - ("\u{1F1E6}\u{1100}", &["\u{1F1E6}", "\u{1100}"]), - ("\u{1F1E6}\u{308}\u{1100}", &["\u{1F1E6}\u{308}", "\u{1100}"]), - ("\u{1F1E6}\u{1160}", &["\u{1F1E6}", "\u{1160}"]), - ("\u{1F1E6}\u{308}\u{1160}", &["\u{1F1E6}\u{308}", "\u{1160}"]), - ("\u{1F1E6}\u{11A8}", &["\u{1F1E6}", "\u{11A8}"]), - ("\u{1F1E6}\u{308}\u{11A8}", &["\u{1F1E6}\u{308}", "\u{11A8}"]), - ("\u{1F1E6}\u{AC00}", &["\u{1F1E6}", "\u{AC00}"]), - ("\u{1F1E6}\u{308}\u{AC00}", &["\u{1F1E6}\u{308}", "\u{AC00}"]), - ("\u{1F1E6}\u{AC01}", &["\u{1F1E6}", "\u{AC01}"]), - ("\u{1F1E6}\u{308}\u{AC01}", &["\u{1F1E6}\u{308}", "\u{AC01}"]), - ("\u{1F1E6}\u{1F1E6}", &["\u{1F1E6}\u{1F1E6}"]), - ("\u{1F1E6}\u{308}\u{1F1E6}", &["\u{1F1E6}\u{308}", "\u{1F1E6}"]), - ("\u{1F1E6}\u{378}", &["\u{1F1E6}", "\u{378}"]), - ("\u{1F1E6}\u{308}\u{378}", &["\u{1F1E6}\u{308}", "\u{378}"]), - ("\u{378}\u{20}", &["\u{378}", "\u{20}"]), - ("\u{378}\u{308}\u{20}", &["\u{378}\u{308}", "\u{20}"]), - ("\u{378}\u{D}", &["\u{378}", "\u{D}"]), - ("\u{378}\u{308}\u{D}", &["\u{378}\u{308}", "\u{D}"]), - ("\u{378}\u{A}", &["\u{378}", "\u{A}"]), - ("\u{378}\u{308}\u{A}", &["\u{378}\u{308}", "\u{A}"]), - ("\u{378}\u{1}", &["\u{378}", "\u{1}"]), - ("\u{378}\u{308}\u{1}", &["\u{378}\u{308}", "\u{1}"]), - ("\u{378}\u{300}", &["\u{378}\u{300}"]), - ("\u{378}\u{308}\u{300}", &["\u{378}\u{308}\u{300}"]), - ("\u{378}\u{1100}", &["\u{378}", "\u{1100}"]), - ("\u{378}\u{308}\u{1100}", &["\u{378}\u{308}", "\u{1100}"]), - ("\u{378}\u{1160}", &["\u{378}", "\u{1160}"]), - ("\u{378}\u{308}\u{1160}", &["\u{378}\u{308}", "\u{1160}"]), - ("\u{378}\u{11A8}", &["\u{378}", "\u{11A8}"]), - ("\u{378}\u{308}\u{11A8}", &["\u{378}\u{308}", "\u{11A8}"]), - ("\u{378}\u{AC00}", &["\u{378}", "\u{AC00}"]), - ("\u{378}\u{308}\u{AC00}", &["\u{378}\u{308}", "\u{AC00}"]), - ("\u{378}\u{AC01}", &["\u{378}", "\u{AC01}"]), - ("\u{378}\u{308}\u{AC01}", &["\u{378}\u{308}", "\u{AC01}"]), - ("\u{378}\u{1F1E6}", &["\u{378}", "\u{1F1E6}"]), - ("\u{378}\u{308}\u{1F1E6}", &["\u{378}\u{308}", "\u{1F1E6}"]), - ("\u{378}\u{378}", &["\u{378}", "\u{378}"]), - ("\u{378}\u{308}\u{378}", &["\u{378}\u{308}", "\u{378}"]), - ("\u{61}\u{1F1E6}\u{62}", &["\u{61}", "\u{1F1E6}", "\u{62}"]), - ("\u{1F1F7}\u{1F1FA}", &["\u{1F1F7}\u{1F1FA}"]), - ("\u{1F1F7}\u{1F1FA}\u{1F1F8}", &["\u{1F1F7}\u{1F1FA}\u{1F1F8}"]), - ("\u{1F1F7}\u{1F1FA}\u{1F1F8}\u{1F1EA}", - &["\u{1F1F7}\u{1F1FA}\u{1F1F8}\u{1F1EA}"]), - ("\u{1F1F7}\u{1F1FA}\u{200B}\u{1F1F8}\u{1F1EA}", - &["\u{1F1F7}\u{1F1FA}", "\u{200B}", "\u{1F1F8}\u{1F1EA}"]), - ("\u{1F1E6}\u{1F1E7}\u{1F1E8}", &["\u{1F1E6}\u{1F1E7}\u{1F1E8}"]), - ("\u{1F1E6}\u{200D}\u{1F1E7}\u{1F1E8}", &["\u{1F1E6}\u{200D}", - "\u{1F1E7}\u{1F1E8}"]), - ("\u{1F1E6}\u{1F1E7}\u{200D}\u{1F1E8}", - &["\u{1F1E6}\u{1F1E7}\u{200D}", "\u{1F1E8}"]), - ("\u{20}\u{200D}\u{646}", &["\u{20}\u{200D}", "\u{646}"]), - ("\u{646}\u{200D}\u{20}", &["\u{646}\u{200D}", "\u{20}"]), + // http://www.unicode.org/Public/UNIDATA/auxiliary/GraphemeBreakTest.txt + let test_same: &[(&str, &[&str])] = &[ + ("\u{20}\u{20}", &["\u{20}", "\u{20}"]), ("\u{20}\u{308}\u{20}", &["\u{20}\u{308}", + "\u{20}"]), ("\u{20}\u{d}", &["\u{20}", "\u{d}"]), ("\u{20}\u{308}\u{d}", &["\u{20}\u{308}", + "\u{d}"]), ("\u{20}\u{a}", &["\u{20}", "\u{a}"]), ("\u{20}\u{308}\u{a}", &["\u{20}\u{308}", + "\u{a}"]), ("\u{20}\u{1}", &["\u{20}", "\u{1}"]), ("\u{20}\u{308}\u{1}", &["\u{20}\u{308}", + "\u{1}"]), ("\u{20}\u{300}", &["\u{20}\u{300}"]), ("\u{20}\u{308}\u{300}", + &["\u{20}\u{308}\u{300}"]), ("\u{20}\u{1100}", &["\u{20}", "\u{1100}"]), + ("\u{20}\u{308}\u{1100}", &["\u{20}\u{308}", "\u{1100}"]), ("\u{20}\u{1160}", &["\u{20}", + "\u{1160}"]), ("\u{20}\u{308}\u{1160}", &["\u{20}\u{308}", "\u{1160}"]), ("\u{20}\u{11a8}", + &["\u{20}", "\u{11a8}"]), ("\u{20}\u{308}\u{11a8}", &["\u{20}\u{308}", "\u{11a8}"]), + ("\u{20}\u{ac00}", &["\u{20}", "\u{ac00}"]), ("\u{20}\u{308}\u{ac00}", &["\u{20}\u{308}", + "\u{ac00}"]), ("\u{20}\u{ac01}", &["\u{20}", "\u{ac01}"]), ("\u{20}\u{308}\u{ac01}", + &["\u{20}\u{308}", "\u{ac01}"]), ("\u{20}\u{1f1e6}", &["\u{20}", "\u{1f1e6}"]), + ("\u{20}\u{308}\u{1f1e6}", &["\u{20}\u{308}", "\u{1f1e6}"]), ("\u{20}\u{378}", &["\u{20}", + "\u{378}"]), ("\u{20}\u{308}\u{378}", &["\u{20}\u{308}", "\u{378}"]), ("\u{d}\u{20}", + &["\u{d}", "\u{20}"]), ("\u{d}\u{308}\u{20}", &["\u{d}", "\u{308}", "\u{20}"]), + ("\u{d}\u{d}", &["\u{d}", "\u{d}"]), ("\u{d}\u{308}\u{d}", &["\u{d}", "\u{308}", "\u{d}"]), + ("\u{d}\u{a}", &["\u{d}\u{a}"]), ("\u{d}\u{308}\u{a}", &["\u{d}", "\u{308}", "\u{a}"]), + ("\u{d}\u{1}", &["\u{d}", "\u{1}"]), ("\u{d}\u{308}\u{1}", &["\u{d}", "\u{308}", "\u{1}"]), + ("\u{d}\u{300}", &["\u{d}", "\u{300}"]), ("\u{d}\u{308}\u{300}", &["\u{d}", + "\u{308}\u{300}"]), ("\u{d}\u{903}", &["\u{d}", "\u{903}"]), ("\u{d}\u{1100}", &["\u{d}", + "\u{1100}"]), ("\u{d}\u{308}\u{1100}", &["\u{d}", "\u{308}", "\u{1100}"]), ("\u{d}\u{1160}", + &["\u{d}", "\u{1160}"]), ("\u{d}\u{308}\u{1160}", &["\u{d}", "\u{308}", "\u{1160}"]), + ("\u{d}\u{11a8}", &["\u{d}", "\u{11a8}"]), ("\u{d}\u{308}\u{11a8}", &["\u{d}", "\u{308}", + "\u{11a8}"]), ("\u{d}\u{ac00}", &["\u{d}", "\u{ac00}"]), ("\u{d}\u{308}\u{ac00}", &["\u{d}", + "\u{308}", "\u{ac00}"]), ("\u{d}\u{ac01}", &["\u{d}", "\u{ac01}"]), ("\u{d}\u{308}\u{ac01}", + &["\u{d}", "\u{308}", "\u{ac01}"]), ("\u{d}\u{1f1e6}", &["\u{d}", "\u{1f1e6}"]), + ("\u{d}\u{308}\u{1f1e6}", &["\u{d}", "\u{308}", "\u{1f1e6}"]), ("\u{d}\u{378}", &["\u{d}", + "\u{378}"]), ("\u{d}\u{308}\u{378}", &["\u{d}", "\u{308}", "\u{378}"]), ("\u{a}\u{20}", + &["\u{a}", "\u{20}"]), ("\u{a}\u{308}\u{20}", &["\u{a}", "\u{308}", "\u{20}"]), + ("\u{a}\u{d}", &["\u{a}", "\u{d}"]), ("\u{a}\u{308}\u{d}", &["\u{a}", "\u{308}", "\u{d}"]), + ("\u{a}\u{a}", &["\u{a}", "\u{a}"]), ("\u{a}\u{308}\u{a}", &["\u{a}", "\u{308}", "\u{a}"]), + ("\u{a}\u{1}", &["\u{a}", "\u{1}"]), ("\u{a}\u{308}\u{1}", &["\u{a}", "\u{308}", "\u{1}"]), + ("\u{a}\u{300}", &["\u{a}", "\u{300}"]), ("\u{a}\u{308}\u{300}", &["\u{a}", + "\u{308}\u{300}"]), ("\u{a}\u{903}", &["\u{a}", "\u{903}"]), ("\u{a}\u{1100}", &["\u{a}", + "\u{1100}"]), ("\u{a}\u{308}\u{1100}", &["\u{a}", "\u{308}", "\u{1100}"]), ("\u{a}\u{1160}", + &["\u{a}", "\u{1160}"]), ("\u{a}\u{308}\u{1160}", &["\u{a}", "\u{308}", "\u{1160}"]), + ("\u{a}\u{11a8}", &["\u{a}", "\u{11a8}"]), ("\u{a}\u{308}\u{11a8}", &["\u{a}", "\u{308}", + "\u{11a8}"]), ("\u{a}\u{ac00}", &["\u{a}", "\u{ac00}"]), ("\u{a}\u{308}\u{ac00}", &["\u{a}", + "\u{308}", "\u{ac00}"]), ("\u{a}\u{ac01}", &["\u{a}", "\u{ac01}"]), ("\u{a}\u{308}\u{ac01}", + &["\u{a}", "\u{308}", "\u{ac01}"]), ("\u{a}\u{1f1e6}", &["\u{a}", "\u{1f1e6}"]), + ("\u{a}\u{308}\u{1f1e6}", &["\u{a}", "\u{308}", "\u{1f1e6}"]), ("\u{a}\u{378}", &["\u{a}", + "\u{378}"]), ("\u{a}\u{308}\u{378}", &["\u{a}", "\u{308}", "\u{378}"]), ("\u{1}\u{20}", + &["\u{1}", "\u{20}"]), ("\u{1}\u{308}\u{20}", &["\u{1}", "\u{308}", "\u{20}"]), + ("\u{1}\u{d}", &["\u{1}", "\u{d}"]), ("\u{1}\u{308}\u{d}", &["\u{1}", "\u{308}", "\u{d}"]), + ("\u{1}\u{a}", &["\u{1}", "\u{a}"]), ("\u{1}\u{308}\u{a}", &["\u{1}", "\u{308}", "\u{a}"]), + ("\u{1}\u{1}", &["\u{1}", "\u{1}"]), ("\u{1}\u{308}\u{1}", &["\u{1}", "\u{308}", "\u{1}"]), + ("\u{1}\u{300}", &["\u{1}", "\u{300}"]), ("\u{1}\u{308}\u{300}", &["\u{1}", + "\u{308}\u{300}"]), ("\u{1}\u{903}", &["\u{1}", "\u{903}"]), ("\u{1}\u{1100}", &["\u{1}", + "\u{1100}"]), ("\u{1}\u{308}\u{1100}", &["\u{1}", "\u{308}", "\u{1100}"]), ("\u{1}\u{1160}", + &["\u{1}", "\u{1160}"]), ("\u{1}\u{308}\u{1160}", &["\u{1}", "\u{308}", "\u{1160}"]), + ("\u{1}\u{11a8}", &["\u{1}", "\u{11a8}"]), ("\u{1}\u{308}\u{11a8}", &["\u{1}", "\u{308}", + "\u{11a8}"]), ("\u{1}\u{ac00}", &["\u{1}", "\u{ac00}"]), ("\u{1}\u{308}\u{ac00}", &["\u{1}", + "\u{308}", "\u{ac00}"]), ("\u{1}\u{ac01}", &["\u{1}", "\u{ac01}"]), ("\u{1}\u{308}\u{ac01}", + &["\u{1}", "\u{308}", "\u{ac01}"]), ("\u{1}\u{1f1e6}", &["\u{1}", "\u{1f1e6}"]), + ("\u{1}\u{308}\u{1f1e6}", &["\u{1}", "\u{308}", "\u{1f1e6}"]), ("\u{1}\u{378}", &["\u{1}", + "\u{378}"]), ("\u{1}\u{308}\u{378}", &["\u{1}", "\u{308}", "\u{378}"]), ("\u{300}\u{20}", + &["\u{300}", "\u{20}"]), ("\u{300}\u{308}\u{20}", &["\u{300}\u{308}", "\u{20}"]), + ("\u{300}\u{d}", &["\u{300}", "\u{d}"]), ("\u{300}\u{308}\u{d}", &["\u{300}\u{308}", + "\u{d}"]), ("\u{300}\u{a}", &["\u{300}", "\u{a}"]), ("\u{300}\u{308}\u{a}", + &["\u{300}\u{308}", "\u{a}"]), ("\u{300}\u{1}", &["\u{300}", "\u{1}"]), + ("\u{300}\u{308}\u{1}", &["\u{300}\u{308}", "\u{1}"]), ("\u{300}\u{300}", + &["\u{300}\u{300}"]), ("\u{300}\u{308}\u{300}", &["\u{300}\u{308}\u{300}"]), + ("\u{300}\u{1100}", &["\u{300}", "\u{1100}"]), ("\u{300}\u{308}\u{1100}", + &["\u{300}\u{308}", "\u{1100}"]), ("\u{300}\u{1160}", &["\u{300}", "\u{1160}"]), + ("\u{300}\u{308}\u{1160}", &["\u{300}\u{308}", "\u{1160}"]), ("\u{300}\u{11a8}", + &["\u{300}", "\u{11a8}"]), ("\u{300}\u{308}\u{11a8}", &["\u{300}\u{308}", "\u{11a8}"]), + ("\u{300}\u{ac00}", &["\u{300}", "\u{ac00}"]), ("\u{300}\u{308}\u{ac00}", + &["\u{300}\u{308}", "\u{ac00}"]), ("\u{300}\u{ac01}", &["\u{300}", "\u{ac01}"]), + ("\u{300}\u{308}\u{ac01}", &["\u{300}\u{308}", "\u{ac01}"]), ("\u{300}\u{1f1e6}", + &["\u{300}", "\u{1f1e6}"]), ("\u{300}\u{308}\u{1f1e6}", &["\u{300}\u{308}", "\u{1f1e6}"]), + ("\u{300}\u{378}", &["\u{300}", "\u{378}"]), ("\u{300}\u{308}\u{378}", &["\u{300}\u{308}", + "\u{378}"]), ("\u{903}\u{20}", &["\u{903}", "\u{20}"]), ("\u{903}\u{308}\u{20}", + &["\u{903}\u{308}", "\u{20}"]), ("\u{903}\u{d}", &["\u{903}", "\u{d}"]), + ("\u{903}\u{308}\u{d}", &["\u{903}\u{308}", "\u{d}"]), ("\u{903}\u{a}", &["\u{903}", + "\u{a}"]), ("\u{903}\u{308}\u{a}", &["\u{903}\u{308}", "\u{a}"]), ("\u{903}\u{1}", + &["\u{903}", "\u{1}"]), ("\u{903}\u{308}\u{1}", &["\u{903}\u{308}", "\u{1}"]), + ("\u{903}\u{300}", &["\u{903}\u{300}"]), ("\u{903}\u{308}\u{300}", + &["\u{903}\u{308}\u{300}"]), ("\u{903}\u{1100}", &["\u{903}", "\u{1100}"]), + ("\u{903}\u{308}\u{1100}", &["\u{903}\u{308}", "\u{1100}"]), ("\u{903}\u{1160}", + &["\u{903}", "\u{1160}"]), ("\u{903}\u{308}\u{1160}", &["\u{903}\u{308}", "\u{1160}"]), + ("\u{903}\u{11a8}", &["\u{903}", "\u{11a8}"]), ("\u{903}\u{308}\u{11a8}", + &["\u{903}\u{308}", "\u{11a8}"]), ("\u{903}\u{ac00}", &["\u{903}", "\u{ac00}"]), + ("\u{903}\u{308}\u{ac00}", &["\u{903}\u{308}", "\u{ac00}"]), ("\u{903}\u{ac01}", + &["\u{903}", "\u{ac01}"]), ("\u{903}\u{308}\u{ac01}", &["\u{903}\u{308}", "\u{ac01}"]), + ("\u{903}\u{1f1e6}", &["\u{903}", "\u{1f1e6}"]), ("\u{903}\u{308}\u{1f1e6}", + &["\u{903}\u{308}", "\u{1f1e6}"]), ("\u{903}\u{378}", &["\u{903}", "\u{378}"]), + ("\u{903}\u{308}\u{378}", &["\u{903}\u{308}", "\u{378}"]), ("\u{1100}\u{20}", &["\u{1100}", + "\u{20}"]), ("\u{1100}\u{308}\u{20}", &["\u{1100}\u{308}", "\u{20}"]), ("\u{1100}\u{d}", + &["\u{1100}", "\u{d}"]), ("\u{1100}\u{308}\u{d}", &["\u{1100}\u{308}", "\u{d}"]), + ("\u{1100}\u{a}", &["\u{1100}", "\u{a}"]), ("\u{1100}\u{308}\u{a}", &["\u{1100}\u{308}", + "\u{a}"]), ("\u{1100}\u{1}", &["\u{1100}", "\u{1}"]), ("\u{1100}\u{308}\u{1}", + &["\u{1100}\u{308}", "\u{1}"]), ("\u{1100}\u{300}", &["\u{1100}\u{300}"]), + ("\u{1100}\u{308}\u{300}", &["\u{1100}\u{308}\u{300}"]), ("\u{1100}\u{1100}", + &["\u{1100}\u{1100}"]), ("\u{1100}\u{308}\u{1100}", &["\u{1100}\u{308}", "\u{1100}"]), + ("\u{1100}\u{1160}", &["\u{1100}\u{1160}"]), ("\u{1100}\u{308}\u{1160}", + &["\u{1100}\u{308}", "\u{1160}"]), ("\u{1100}\u{11a8}", &["\u{1100}", "\u{11a8}"]), + ("\u{1100}\u{308}\u{11a8}", &["\u{1100}\u{308}", "\u{11a8}"]), ("\u{1100}\u{ac00}", + &["\u{1100}\u{ac00}"]), ("\u{1100}\u{308}\u{ac00}", &["\u{1100}\u{308}", "\u{ac00}"]), + ("\u{1100}\u{ac01}", &["\u{1100}\u{ac01}"]), ("\u{1100}\u{308}\u{ac01}", + &["\u{1100}\u{308}", "\u{ac01}"]), ("\u{1100}\u{1f1e6}", &["\u{1100}", "\u{1f1e6}"]), + ("\u{1100}\u{308}\u{1f1e6}", &["\u{1100}\u{308}", "\u{1f1e6}"]), ("\u{1100}\u{378}", + &["\u{1100}", "\u{378}"]), ("\u{1100}\u{308}\u{378}", &["\u{1100}\u{308}", "\u{378}"]), + ("\u{1160}\u{20}", &["\u{1160}", "\u{20}"]), ("\u{1160}\u{308}\u{20}", &["\u{1160}\u{308}", + "\u{20}"]), ("\u{1160}\u{d}", &["\u{1160}", "\u{d}"]), ("\u{1160}\u{308}\u{d}", + &["\u{1160}\u{308}", "\u{d}"]), ("\u{1160}\u{a}", &["\u{1160}", "\u{a}"]), + ("\u{1160}\u{308}\u{a}", &["\u{1160}\u{308}", "\u{a}"]), ("\u{1160}\u{1}", &["\u{1160}", + "\u{1}"]), ("\u{1160}\u{308}\u{1}", &["\u{1160}\u{308}", "\u{1}"]), ("\u{1160}\u{300}", + &["\u{1160}\u{300}"]), ("\u{1160}\u{308}\u{300}", &["\u{1160}\u{308}\u{300}"]), + ("\u{1160}\u{1100}", &["\u{1160}", "\u{1100}"]), ("\u{1160}\u{308}\u{1100}", + &["\u{1160}\u{308}", "\u{1100}"]), ("\u{1160}\u{1160}", &["\u{1160}\u{1160}"]), + ("\u{1160}\u{308}\u{1160}", &["\u{1160}\u{308}", "\u{1160}"]), ("\u{1160}\u{11a8}", + &["\u{1160}\u{11a8}"]), ("\u{1160}\u{308}\u{11a8}", &["\u{1160}\u{308}", "\u{11a8}"]), + ("\u{1160}\u{ac00}", &["\u{1160}", "\u{ac00}"]), ("\u{1160}\u{308}\u{ac00}", + &["\u{1160}\u{308}", "\u{ac00}"]), ("\u{1160}\u{ac01}", &["\u{1160}", "\u{ac01}"]), + ("\u{1160}\u{308}\u{ac01}", &["\u{1160}\u{308}", "\u{ac01}"]), ("\u{1160}\u{1f1e6}", + &["\u{1160}", "\u{1f1e6}"]), ("\u{1160}\u{308}\u{1f1e6}", &["\u{1160}\u{308}", + "\u{1f1e6}"]), ("\u{1160}\u{378}", &["\u{1160}", "\u{378}"]), ("\u{1160}\u{308}\u{378}", + &["\u{1160}\u{308}", "\u{378}"]), ("\u{11a8}\u{20}", &["\u{11a8}", "\u{20}"]), + ("\u{11a8}\u{308}\u{20}", &["\u{11a8}\u{308}", "\u{20}"]), ("\u{11a8}\u{d}", &["\u{11a8}", + "\u{d}"]), ("\u{11a8}\u{308}\u{d}", &["\u{11a8}\u{308}", "\u{d}"]), ("\u{11a8}\u{a}", + &["\u{11a8}", "\u{a}"]), ("\u{11a8}\u{308}\u{a}", &["\u{11a8}\u{308}", "\u{a}"]), + ("\u{11a8}\u{1}", &["\u{11a8}", "\u{1}"]), ("\u{11a8}\u{308}\u{1}", &["\u{11a8}\u{308}", + "\u{1}"]), ("\u{11a8}\u{300}", &["\u{11a8}\u{300}"]), ("\u{11a8}\u{308}\u{300}", + &["\u{11a8}\u{308}\u{300}"]), ("\u{11a8}\u{1100}", &["\u{11a8}", "\u{1100}"]), + ("\u{11a8}\u{308}\u{1100}", &["\u{11a8}\u{308}", "\u{1100}"]), ("\u{11a8}\u{1160}", + &["\u{11a8}", "\u{1160}"]), ("\u{11a8}\u{308}\u{1160}", &["\u{11a8}\u{308}", "\u{1160}"]), + ("\u{11a8}\u{11a8}", &["\u{11a8}\u{11a8}"]), ("\u{11a8}\u{308}\u{11a8}", + &["\u{11a8}\u{308}", "\u{11a8}"]), ("\u{11a8}\u{ac00}", &["\u{11a8}", "\u{ac00}"]), + ("\u{11a8}\u{308}\u{ac00}", &["\u{11a8}\u{308}", "\u{ac00}"]), ("\u{11a8}\u{ac01}", + &["\u{11a8}", "\u{ac01}"]), ("\u{11a8}\u{308}\u{ac01}", &["\u{11a8}\u{308}", "\u{ac01}"]), + ("\u{11a8}\u{1f1e6}", &["\u{11a8}", "\u{1f1e6}"]), ("\u{11a8}\u{308}\u{1f1e6}", + &["\u{11a8}\u{308}", "\u{1f1e6}"]), ("\u{11a8}\u{378}", &["\u{11a8}", "\u{378}"]), + ("\u{11a8}\u{308}\u{378}", &["\u{11a8}\u{308}", "\u{378}"]), ("\u{ac00}\u{20}", + &["\u{ac00}", "\u{20}"]), ("\u{ac00}\u{308}\u{20}", &["\u{ac00}\u{308}", "\u{20}"]), + ("\u{ac00}\u{d}", &["\u{ac00}", "\u{d}"]), ("\u{ac00}\u{308}\u{d}", &["\u{ac00}\u{308}", + "\u{d}"]), ("\u{ac00}\u{a}", &["\u{ac00}", "\u{a}"]), ("\u{ac00}\u{308}\u{a}", + &["\u{ac00}\u{308}", "\u{a}"]), ("\u{ac00}\u{1}", &["\u{ac00}", "\u{1}"]), + ("\u{ac00}\u{308}\u{1}", &["\u{ac00}\u{308}", "\u{1}"]), ("\u{ac00}\u{300}", + &["\u{ac00}\u{300}"]), ("\u{ac00}\u{308}\u{300}", &["\u{ac00}\u{308}\u{300}"]), + ("\u{ac00}\u{1100}", &["\u{ac00}", "\u{1100}"]), ("\u{ac00}\u{308}\u{1100}", + &["\u{ac00}\u{308}", "\u{1100}"]), ("\u{ac00}\u{1160}", &["\u{ac00}\u{1160}"]), + ("\u{ac00}\u{308}\u{1160}", &["\u{ac00}\u{308}", "\u{1160}"]), ("\u{ac00}\u{11a8}", + &["\u{ac00}\u{11a8}"]), ("\u{ac00}\u{308}\u{11a8}", &["\u{ac00}\u{308}", "\u{11a8}"]), + ("\u{ac00}\u{ac00}", &["\u{ac00}", "\u{ac00}"]), ("\u{ac00}\u{308}\u{ac00}", + &["\u{ac00}\u{308}", "\u{ac00}"]), ("\u{ac00}\u{ac01}", &["\u{ac00}", "\u{ac01}"]), + ("\u{ac00}\u{308}\u{ac01}", &["\u{ac00}\u{308}", "\u{ac01}"]), ("\u{ac00}\u{1f1e6}", + &["\u{ac00}", "\u{1f1e6}"]), ("\u{ac00}\u{308}\u{1f1e6}", &["\u{ac00}\u{308}", + "\u{1f1e6}"]), ("\u{ac00}\u{378}", &["\u{ac00}", "\u{378}"]), ("\u{ac00}\u{308}\u{378}", + &["\u{ac00}\u{308}", "\u{378}"]), ("\u{ac01}\u{20}", &["\u{ac01}", "\u{20}"]), + ("\u{ac01}\u{308}\u{20}", &["\u{ac01}\u{308}", "\u{20}"]), ("\u{ac01}\u{d}", &["\u{ac01}", + "\u{d}"]), ("\u{ac01}\u{308}\u{d}", &["\u{ac01}\u{308}", "\u{d}"]), ("\u{ac01}\u{a}", + &["\u{ac01}", "\u{a}"]), ("\u{ac01}\u{308}\u{a}", &["\u{ac01}\u{308}", "\u{a}"]), + ("\u{ac01}\u{1}", &["\u{ac01}", "\u{1}"]), ("\u{ac01}\u{308}\u{1}", &["\u{ac01}\u{308}", + "\u{1}"]), ("\u{ac01}\u{300}", &["\u{ac01}\u{300}"]), ("\u{ac01}\u{308}\u{300}", + &["\u{ac01}\u{308}\u{300}"]), ("\u{ac01}\u{1100}", &["\u{ac01}", "\u{1100}"]), + ("\u{ac01}\u{308}\u{1100}", &["\u{ac01}\u{308}", "\u{1100}"]), ("\u{ac01}\u{1160}", + &["\u{ac01}", "\u{1160}"]), ("\u{ac01}\u{308}\u{1160}", &["\u{ac01}\u{308}", "\u{1160}"]), + ("\u{ac01}\u{11a8}", &["\u{ac01}\u{11a8}"]), ("\u{ac01}\u{308}\u{11a8}", + &["\u{ac01}\u{308}", "\u{11a8}"]), ("\u{ac01}\u{ac00}", &["\u{ac01}", "\u{ac00}"]), + ("\u{ac01}\u{308}\u{ac00}", &["\u{ac01}\u{308}", "\u{ac00}"]), ("\u{ac01}\u{ac01}", + &["\u{ac01}", "\u{ac01}"]), ("\u{ac01}\u{308}\u{ac01}", &["\u{ac01}\u{308}", "\u{ac01}"]), + ("\u{ac01}\u{1f1e6}", &["\u{ac01}", "\u{1f1e6}"]), ("\u{ac01}\u{308}\u{1f1e6}", + &["\u{ac01}\u{308}", "\u{1f1e6}"]), ("\u{ac01}\u{378}", &["\u{ac01}", "\u{378}"]), + ("\u{ac01}\u{308}\u{378}", &["\u{ac01}\u{308}", "\u{378}"]), ("\u{1f1e6}\u{20}", + &["\u{1f1e6}", "\u{20}"]), ("\u{1f1e6}\u{308}\u{20}", &["\u{1f1e6}\u{308}", "\u{20}"]), + ("\u{1f1e6}\u{d}", &["\u{1f1e6}", "\u{d}"]), ("\u{1f1e6}\u{308}\u{d}", &["\u{1f1e6}\u{308}", + "\u{d}"]), ("\u{1f1e6}\u{a}", &["\u{1f1e6}", "\u{a}"]), ("\u{1f1e6}\u{308}\u{a}", + &["\u{1f1e6}\u{308}", "\u{a}"]), ("\u{1f1e6}\u{1}", &["\u{1f1e6}", "\u{1}"]), + ("\u{1f1e6}\u{308}\u{1}", &["\u{1f1e6}\u{308}", "\u{1}"]), ("\u{1f1e6}\u{300}", + &["\u{1f1e6}\u{300}"]), ("\u{1f1e6}\u{308}\u{300}", &["\u{1f1e6}\u{308}\u{300}"]), + ("\u{1f1e6}\u{1100}", &["\u{1f1e6}", "\u{1100}"]), ("\u{1f1e6}\u{308}\u{1100}", + &["\u{1f1e6}\u{308}", "\u{1100}"]), ("\u{1f1e6}\u{1160}", &["\u{1f1e6}", "\u{1160}"]), + ("\u{1f1e6}\u{308}\u{1160}", &["\u{1f1e6}\u{308}", "\u{1160}"]), ("\u{1f1e6}\u{11a8}", + &["\u{1f1e6}", "\u{11a8}"]), ("\u{1f1e6}\u{308}\u{11a8}", &["\u{1f1e6}\u{308}", + "\u{11a8}"]), ("\u{1f1e6}\u{ac00}", &["\u{1f1e6}", "\u{ac00}"]), + ("\u{1f1e6}\u{308}\u{ac00}", &["\u{1f1e6}\u{308}", "\u{ac00}"]), ("\u{1f1e6}\u{ac01}", + &["\u{1f1e6}", "\u{ac01}"]), ("\u{1f1e6}\u{308}\u{ac01}", &["\u{1f1e6}\u{308}", + "\u{ac01}"]), ("\u{1f1e6}\u{1f1e6}", &["\u{1f1e6}\u{1f1e6}"]), ("\u{1f1e6}\u{308}\u{1f1e6}", + &["\u{1f1e6}\u{308}", "\u{1f1e6}"]), ("\u{1f1e6}\u{378}", &["\u{1f1e6}", "\u{378}"]), + ("\u{1f1e6}\u{308}\u{378}", &["\u{1f1e6}\u{308}", "\u{378}"]), ("\u{378}\u{20}", + &["\u{378}", "\u{20}"]), ("\u{378}\u{308}\u{20}", &["\u{378}\u{308}", "\u{20}"]), + ("\u{378}\u{d}", &["\u{378}", "\u{d}"]), ("\u{378}\u{308}\u{d}", &["\u{378}\u{308}", + "\u{d}"]), ("\u{378}\u{a}", &["\u{378}", "\u{a}"]), ("\u{378}\u{308}\u{a}", + &["\u{378}\u{308}", "\u{a}"]), ("\u{378}\u{1}", &["\u{378}", "\u{1}"]), + ("\u{378}\u{308}\u{1}", &["\u{378}\u{308}", "\u{1}"]), ("\u{378}\u{300}", + &["\u{378}\u{300}"]), ("\u{378}\u{308}\u{300}", &["\u{378}\u{308}\u{300}"]), + ("\u{378}\u{1100}", &["\u{378}", "\u{1100}"]), ("\u{378}\u{308}\u{1100}", + &["\u{378}\u{308}", "\u{1100}"]), ("\u{378}\u{1160}", &["\u{378}", "\u{1160}"]), + ("\u{378}\u{308}\u{1160}", &["\u{378}\u{308}", "\u{1160}"]), ("\u{378}\u{11a8}", + &["\u{378}", "\u{11a8}"]), ("\u{378}\u{308}\u{11a8}", &["\u{378}\u{308}", "\u{11a8}"]), + ("\u{378}\u{ac00}", &["\u{378}", "\u{ac00}"]), ("\u{378}\u{308}\u{ac00}", + &["\u{378}\u{308}", "\u{ac00}"]), ("\u{378}\u{ac01}", &["\u{378}", "\u{ac01}"]), + ("\u{378}\u{308}\u{ac01}", &["\u{378}\u{308}", "\u{ac01}"]), ("\u{378}\u{1f1e6}", + &["\u{378}", "\u{1f1e6}"]), ("\u{378}\u{308}\u{1f1e6}", &["\u{378}\u{308}", "\u{1f1e6}"]), + ("\u{378}\u{378}", &["\u{378}", "\u{378}"]), ("\u{378}\u{308}\u{378}", &["\u{378}\u{308}", + "\u{378}"]), ("\u{61}\u{1f1e6}\u{62}", &["\u{61}", "\u{1f1e6}", "\u{62}"]), + ("\u{1f1f7}\u{1f1fa}", &["\u{1f1f7}\u{1f1fa}"]), ("\u{1f1f7}\u{1f1fa}\u{1f1f8}", + &["\u{1f1f7}\u{1f1fa}\u{1f1f8}"]), ("\u{1f1f7}\u{1f1fa}\u{1f1f8}\u{1f1ea}", + &["\u{1f1f7}\u{1f1fa}\u{1f1f8}\u{1f1ea}"]), ("\u{1f1f7}\u{1f1fa}\u{200b}\u{1f1f8}\u{1f1ea}", + &["\u{1f1f7}\u{1f1fa}", "\u{200b}", "\u{1f1f8}\u{1f1ea}"]), ("\u{1f1e6}\u{1f1e7}\u{1f1e8}", + &["\u{1f1e6}\u{1f1e7}\u{1f1e8}"]), ("\u{1f1e6}\u{200d}\u{1f1e7}\u{1f1e8}", + &["\u{1f1e6}\u{200d}", "\u{1f1e7}\u{1f1e8}"]), ("\u{1f1e6}\u{1f1e7}\u{200d}\u{1f1e8}", + &["\u{1f1e6}\u{1f1e7}\u{200d}", "\u{1f1e8}"]), ("\u{20}\u{200d}\u{646}", &["\u{20}\u{200d}", + "\u{646}"]), ("\u{646}\u{200d}\u{20}", &["\u{646}\u{200d}", "\u{20}"]) ]; - let test_diff: [(_, &[_], &[_]); 23] = [ + let test_diff: &[(&str, &[&str], &[&str])] = &[ ("\u{20}\u{903}", &["\u{20}\u{903}"], &["\u{20}", "\u{903}"]), ("\u{20}\u{308}\u{903}", - &["\u{20}\u{308}\u{903}"], &["\u{20}\u{308}", "\u{903}"]), ("\u{D}\u{308}\u{903}", - &["\u{D}", "\u{308}\u{903}"], &["\u{D}", "\u{308}", "\u{903}"]), ("\u{A}\u{308}\u{903}", - &["\u{A}", "\u{308}\u{903}"], &["\u{A}", "\u{308}", "\u{903}"]), ("\u{1}\u{308}\u{903}", + &["\u{20}\u{308}\u{903}"], &["\u{20}\u{308}", "\u{903}"]), ("\u{d}\u{308}\u{903}", + &["\u{d}", "\u{308}\u{903}"], &["\u{d}", "\u{308}", "\u{903}"]), ("\u{a}\u{308}\u{903}", + &["\u{a}", "\u{308}\u{903}"], &["\u{a}", "\u{308}", "\u{903}"]), ("\u{1}\u{308}\u{903}", &["\u{1}", "\u{308}\u{903}"], &["\u{1}", "\u{308}", "\u{903}"]), ("\u{300}\u{903}", &["\u{300}\u{903}"], &["\u{300}", "\u{903}"]), ("\u{300}\u{308}\u{903}", &["\u{300}\u{308}\u{903}"], &["\u{300}\u{308}", "\u{903}"]), ("\u{903}\u{903}", @@ -1384,17 +1266,17 @@ fn test_graphemes() { &["\u{1100}\u{903}"], &["\u{1100}", "\u{903}"]), ("\u{1100}\u{308}\u{903}", &["\u{1100}\u{308}\u{903}"], &["\u{1100}\u{308}", "\u{903}"]), ("\u{1160}\u{903}", &["\u{1160}\u{903}"], &["\u{1160}", "\u{903}"]), ("\u{1160}\u{308}\u{903}", - &["\u{1160}\u{308}\u{903}"], &["\u{1160}\u{308}", "\u{903}"]), ("\u{11A8}\u{903}", - &["\u{11A8}\u{903}"], &["\u{11A8}", "\u{903}"]), ("\u{11A8}\u{308}\u{903}", - &["\u{11A8}\u{308}\u{903}"], &["\u{11A8}\u{308}", "\u{903}"]), ("\u{AC00}\u{903}", - &["\u{AC00}\u{903}"], &["\u{AC00}", "\u{903}"]), ("\u{AC00}\u{308}\u{903}", - &["\u{AC00}\u{308}\u{903}"], &["\u{AC00}\u{308}", "\u{903}"]), ("\u{AC01}\u{903}", - &["\u{AC01}\u{903}"], &["\u{AC01}", "\u{903}"]), ("\u{AC01}\u{308}\u{903}", - &["\u{AC01}\u{308}\u{903}"], &["\u{AC01}\u{308}", "\u{903}"]), ("\u{1F1E6}\u{903}", - &["\u{1F1E6}\u{903}"], &["\u{1F1E6}", "\u{903}"]), ("\u{1F1E6}\u{308}\u{903}", - &["\u{1F1E6}\u{308}\u{903}"], &["\u{1F1E6}\u{308}", "\u{903}"]), ("\u{378}\u{903}", + &["\u{1160}\u{308}\u{903}"], &["\u{1160}\u{308}", "\u{903}"]), ("\u{11a8}\u{903}", + &["\u{11a8}\u{903}"], &["\u{11a8}", "\u{903}"]), ("\u{11a8}\u{308}\u{903}", + &["\u{11a8}\u{308}\u{903}"], &["\u{11a8}\u{308}", "\u{903}"]), ("\u{ac00}\u{903}", + &["\u{ac00}\u{903}"], &["\u{ac00}", "\u{903}"]), ("\u{ac00}\u{308}\u{903}", + &["\u{ac00}\u{308}\u{903}"], &["\u{ac00}\u{308}", "\u{903}"]), ("\u{ac01}\u{903}", + &["\u{ac01}\u{903}"], &["\u{ac01}", "\u{903}"]), ("\u{ac01}\u{308}\u{903}", + &["\u{ac01}\u{308}\u{903}"], &["\u{ac01}\u{308}", "\u{903}"]), ("\u{1f1e6}\u{903}", + &["\u{1f1e6}\u{903}"], &["\u{1f1e6}", "\u{903}"]), ("\u{1f1e6}\u{308}\u{903}", + &["\u{1f1e6}\u{308}\u{903}"], &["\u{1f1e6}\u{308}", "\u{903}"]), ("\u{378}\u{903}", &["\u{378}\u{903}"], &["\u{378}", "\u{903}"]), ("\u{378}\u{308}\u{903}", - &["\u{378}\u{308}\u{903}"], &["\u{378}\u{308}", "\u{903}"]), + &["\u{378}\u{308}\u{903}"], &["\u{378}\u{308}", "\u{903}"]) ]; for &(s, g) in &test_same[..] { @@ -1407,7 +1289,7 @@ fn test_graphemes() { assert!(order::equals(s.graphemes(false).rev(), g.iter().rev().cloned())); } - for &(s, gt, gf) in &test_diff { + for &(s, gt, gf) in &test_diff[..] { // test forward iterator assert!(order::equals(s.graphemes(true), gt.iter().cloned())); assert!(order::equals(s.graphemes(false), gf.iter().cloned())); @@ -1443,6 +1325,1072 @@ fn test_graphemes() { assert_eq!(gr, b); } +#[test] +fn test_split_words_uax29() { + use std::iter::order; + + // official Unicode test data + // http://www.unicode.org/Public/UNIDATA/auxiliary/WordBreakTest.txt + let test_word: &[(&str, &[&str])] = &[ + ("\u{1}\u{1}", &["\u{1}", "\u{1}"]), ("\u{1}\u{308}\u{1}", &["\u{1}\u{308}", "\u{1}"]), + ("\u{1}\u{d}", &["\u{1}", "\u{d}"]), ("\u{1}\u{308}\u{d}", &["\u{1}\u{308}", "\u{d}"]), + ("\u{1}\u{a}", &["\u{1}", "\u{a}"]), ("\u{1}\u{308}\u{a}", &["\u{1}\u{308}", "\u{a}"]), + ("\u{1}\u{b}", &["\u{1}", "\u{b}"]), ("\u{1}\u{308}\u{b}", &["\u{1}\u{308}", "\u{b}"]), + ("\u{1}\u{3031}", &["\u{1}", "\u{3031}"]), ("\u{1}\u{308}\u{3031}", &["\u{1}\u{308}", + "\u{3031}"]), ("\u{1}\u{41}", &["\u{1}", "\u{41}"]), ("\u{1}\u{308}\u{41}", + &["\u{1}\u{308}", "\u{41}"]), ("\u{1}\u{3a}", &["\u{1}", "\u{3a}"]), ("\u{1}\u{308}\u{3a}", + &["\u{1}\u{308}", "\u{3a}"]), ("\u{1}\u{2c}", &["\u{1}", "\u{2c}"]), ("\u{1}\u{308}\u{2c}", + &["\u{1}\u{308}", "\u{2c}"]), ("\u{1}\u{2e}", &["\u{1}", "\u{2e}"]), ("\u{1}\u{308}\u{2e}", + &["\u{1}\u{308}", "\u{2e}"]), ("\u{1}\u{30}", &["\u{1}", "\u{30}"]), ("\u{1}\u{308}\u{30}", + &["\u{1}\u{308}", "\u{30}"]), ("\u{1}\u{5f}", &["\u{1}", "\u{5f}"]), ("\u{1}\u{308}\u{5f}", + &["\u{1}\u{308}", "\u{5f}"]), ("\u{1}\u{1f1e6}", &["\u{1}", "\u{1f1e6}"]), + ("\u{1}\u{308}\u{1f1e6}", &["\u{1}\u{308}", "\u{1f1e6}"]), ("\u{1}\u{5d0}", &["\u{1}", + "\u{5d0}"]), ("\u{1}\u{308}\u{5d0}", &["\u{1}\u{308}", "\u{5d0}"]), ("\u{1}\u{22}", + &["\u{1}", "\u{22}"]), ("\u{1}\u{308}\u{22}", &["\u{1}\u{308}", "\u{22}"]), ("\u{1}\u{27}", + &["\u{1}", "\u{27}"]), ("\u{1}\u{308}\u{27}", &["\u{1}\u{308}", "\u{27}"]), ("\u{1}\u{ad}", + &["\u{1}\u{ad}"]), ("\u{1}\u{308}\u{ad}", &["\u{1}\u{308}\u{ad}"]), ("\u{1}\u{300}", + &["\u{1}\u{300}"]), ("\u{1}\u{308}\u{300}", &["\u{1}\u{308}\u{300}"]), + ("\u{1}\u{61}\u{2060}", &["\u{1}", "\u{61}\u{2060}"]), ("\u{1}\u{308}\u{61}\u{2060}", + &["\u{1}\u{308}", "\u{61}\u{2060}"]), ("\u{1}\u{61}\u{3a}", &["\u{1}", "\u{61}", "\u{3a}"]), + ("\u{1}\u{308}\u{61}\u{3a}", &["\u{1}\u{308}", "\u{61}", "\u{3a}"]), ("\u{1}\u{61}\u{27}", + &["\u{1}", "\u{61}", "\u{27}"]), ("\u{1}\u{308}\u{61}\u{27}", &["\u{1}\u{308}", "\u{61}", + "\u{27}"]), ("\u{1}\u{61}\u{27}\u{2060}", &["\u{1}", "\u{61}", "\u{27}\u{2060}"]), + ("\u{1}\u{308}\u{61}\u{27}\u{2060}", &["\u{1}\u{308}", "\u{61}", "\u{27}\u{2060}"]), + ("\u{1}\u{61}\u{2c}", &["\u{1}", "\u{61}", "\u{2c}"]), ("\u{1}\u{308}\u{61}\u{2c}", + &["\u{1}\u{308}", "\u{61}", "\u{2c}"]), ("\u{1}\u{31}\u{3a}", &["\u{1}", "\u{31}", + "\u{3a}"]), ("\u{1}\u{308}\u{31}\u{3a}", &["\u{1}\u{308}", "\u{31}", "\u{3a}"]), + ("\u{1}\u{31}\u{27}", &["\u{1}", "\u{31}", "\u{27}"]), ("\u{1}\u{308}\u{31}\u{27}", + &["\u{1}\u{308}", "\u{31}", "\u{27}"]), ("\u{1}\u{31}\u{2c}", &["\u{1}", "\u{31}", + "\u{2c}"]), ("\u{1}\u{308}\u{31}\u{2c}", &["\u{1}\u{308}", "\u{31}", "\u{2c}"]), + ("\u{1}\u{31}\u{2e}\u{2060}", &["\u{1}", "\u{31}", "\u{2e}\u{2060}"]), + ("\u{1}\u{308}\u{31}\u{2e}\u{2060}", &["\u{1}\u{308}", "\u{31}", "\u{2e}\u{2060}"]), + ("\u{d}\u{1}", &["\u{d}", "\u{1}"]), ("\u{d}\u{308}\u{1}", &["\u{d}", "\u{308}", "\u{1}"]), + ("\u{d}\u{d}", &["\u{d}", "\u{d}"]), ("\u{d}\u{308}\u{d}", &["\u{d}", "\u{308}", "\u{d}"]), + ("\u{d}\u{a}", &["\u{d}\u{a}"]), ("\u{d}\u{308}\u{a}", &["\u{d}", "\u{308}", "\u{a}"]), + ("\u{d}\u{b}", &["\u{d}", "\u{b}"]), ("\u{d}\u{308}\u{b}", &["\u{d}", "\u{308}", "\u{b}"]), + ("\u{d}\u{3031}", &["\u{d}", "\u{3031}"]), ("\u{d}\u{308}\u{3031}", &["\u{d}", "\u{308}", + "\u{3031}"]), ("\u{d}\u{41}", &["\u{d}", "\u{41}"]), ("\u{d}\u{308}\u{41}", &["\u{d}", + "\u{308}", "\u{41}"]), ("\u{d}\u{3a}", &["\u{d}", "\u{3a}"]), ("\u{d}\u{308}\u{3a}", + &["\u{d}", "\u{308}", "\u{3a}"]), ("\u{d}\u{2c}", &["\u{d}", "\u{2c}"]), + ("\u{d}\u{308}\u{2c}", &["\u{d}", "\u{308}", "\u{2c}"]), ("\u{d}\u{2e}", &["\u{d}", + "\u{2e}"]), ("\u{d}\u{308}\u{2e}", &["\u{d}", "\u{308}", "\u{2e}"]), ("\u{d}\u{30}", + &["\u{d}", "\u{30}"]), ("\u{d}\u{308}\u{30}", &["\u{d}", "\u{308}", "\u{30}"]), + ("\u{d}\u{5f}", &["\u{d}", "\u{5f}"]), ("\u{d}\u{308}\u{5f}", &["\u{d}", "\u{308}", + "\u{5f}"]), ("\u{d}\u{1f1e6}", &["\u{d}", "\u{1f1e6}"]), ("\u{d}\u{308}\u{1f1e6}", + &["\u{d}", "\u{308}", "\u{1f1e6}"]), ("\u{d}\u{5d0}", &["\u{d}", "\u{5d0}"]), + ("\u{d}\u{308}\u{5d0}", &["\u{d}", "\u{308}", "\u{5d0}"]), ("\u{d}\u{22}", &["\u{d}", + "\u{22}"]), ("\u{d}\u{308}\u{22}", &["\u{d}", "\u{308}", "\u{22}"]), ("\u{d}\u{27}", + &["\u{d}", "\u{27}"]), ("\u{d}\u{308}\u{27}", &["\u{d}", "\u{308}", "\u{27}"]), + ("\u{d}\u{ad}", &["\u{d}", "\u{ad}"]), ("\u{d}\u{308}\u{ad}", &["\u{d}", "\u{308}\u{ad}"]), + ("\u{d}\u{300}", &["\u{d}", "\u{300}"]), ("\u{d}\u{308}\u{300}", &["\u{d}", + "\u{308}\u{300}"]), ("\u{d}\u{61}\u{2060}", &["\u{d}", "\u{61}\u{2060}"]), + ("\u{d}\u{308}\u{61}\u{2060}", &["\u{d}", "\u{308}", "\u{61}\u{2060}"]), + ("\u{d}\u{61}\u{3a}", &["\u{d}", "\u{61}", "\u{3a}"]), ("\u{d}\u{308}\u{61}\u{3a}", + &["\u{d}", "\u{308}", "\u{61}", "\u{3a}"]), ("\u{d}\u{61}\u{27}", &["\u{d}", "\u{61}", + "\u{27}"]), ("\u{d}\u{308}\u{61}\u{27}", &["\u{d}", "\u{308}", "\u{61}", "\u{27}"]), + ("\u{d}\u{61}\u{27}\u{2060}", &["\u{d}", "\u{61}", "\u{27}\u{2060}"]), + ("\u{d}\u{308}\u{61}\u{27}\u{2060}", &["\u{d}", "\u{308}", "\u{61}", "\u{27}\u{2060}"]), + ("\u{d}\u{61}\u{2c}", &["\u{d}", "\u{61}", "\u{2c}"]), ("\u{d}\u{308}\u{61}\u{2c}", + &["\u{d}", "\u{308}", "\u{61}", "\u{2c}"]), ("\u{d}\u{31}\u{3a}", &["\u{d}", "\u{31}", + "\u{3a}"]), ("\u{d}\u{308}\u{31}\u{3a}", &["\u{d}", "\u{308}", "\u{31}", "\u{3a}"]), + ("\u{d}\u{31}\u{27}", &["\u{d}", "\u{31}", "\u{27}"]), ("\u{d}\u{308}\u{31}\u{27}", + &["\u{d}", "\u{308}", "\u{31}", "\u{27}"]), ("\u{d}\u{31}\u{2c}", &["\u{d}", "\u{31}", + "\u{2c}"]), ("\u{d}\u{308}\u{31}\u{2c}", &["\u{d}", "\u{308}", "\u{31}", "\u{2c}"]), + ("\u{d}\u{31}\u{2e}\u{2060}", &["\u{d}", "\u{31}", "\u{2e}\u{2060}"]), + ("\u{d}\u{308}\u{31}\u{2e}\u{2060}", &["\u{d}", "\u{308}", "\u{31}", "\u{2e}\u{2060}"]), + ("\u{a}\u{1}", &["\u{a}", "\u{1}"]), ("\u{a}\u{308}\u{1}", &["\u{a}", "\u{308}", "\u{1}"]), + ("\u{a}\u{d}", &["\u{a}", "\u{d}"]), ("\u{a}\u{308}\u{d}", &["\u{a}", "\u{308}", "\u{d}"]), + ("\u{a}\u{a}", &["\u{a}", "\u{a}"]), ("\u{a}\u{308}\u{a}", &["\u{a}", "\u{308}", "\u{a}"]), + ("\u{a}\u{b}", &["\u{a}", "\u{b}"]), ("\u{a}\u{308}\u{b}", &["\u{a}", "\u{308}", "\u{b}"]), + ("\u{a}\u{3031}", &["\u{a}", "\u{3031}"]), ("\u{a}\u{308}\u{3031}", &["\u{a}", "\u{308}", + "\u{3031}"]), ("\u{a}\u{41}", &["\u{a}", "\u{41}"]), ("\u{a}\u{308}\u{41}", &["\u{a}", + "\u{308}", "\u{41}"]), ("\u{a}\u{3a}", &["\u{a}", "\u{3a}"]), ("\u{a}\u{308}\u{3a}", + &["\u{a}", "\u{308}", "\u{3a}"]), ("\u{a}\u{2c}", &["\u{a}", "\u{2c}"]), + ("\u{a}\u{308}\u{2c}", &["\u{a}", "\u{308}", "\u{2c}"]), ("\u{a}\u{2e}", &["\u{a}", + "\u{2e}"]), ("\u{a}\u{308}\u{2e}", &["\u{a}", "\u{308}", "\u{2e}"]), ("\u{a}\u{30}", + &["\u{a}", "\u{30}"]), ("\u{a}\u{308}\u{30}", &["\u{a}", "\u{308}", "\u{30}"]), + ("\u{a}\u{5f}", &["\u{a}", "\u{5f}"]), ("\u{a}\u{308}\u{5f}", &["\u{a}", "\u{308}", + "\u{5f}"]), ("\u{a}\u{1f1e6}", &["\u{a}", "\u{1f1e6}"]), ("\u{a}\u{308}\u{1f1e6}", + &["\u{a}", "\u{308}", "\u{1f1e6}"]), ("\u{a}\u{5d0}", &["\u{a}", "\u{5d0}"]), + ("\u{a}\u{308}\u{5d0}", &["\u{a}", "\u{308}", "\u{5d0}"]), ("\u{a}\u{22}", &["\u{a}", + "\u{22}"]), ("\u{a}\u{308}\u{22}", &["\u{a}", "\u{308}", "\u{22}"]), ("\u{a}\u{27}", + &["\u{a}", "\u{27}"]), ("\u{a}\u{308}\u{27}", &["\u{a}", "\u{308}", "\u{27}"]), + ("\u{a}\u{ad}", &["\u{a}", "\u{ad}"]), ("\u{a}\u{308}\u{ad}", &["\u{a}", "\u{308}\u{ad}"]), + ("\u{a}\u{300}", &["\u{a}", "\u{300}"]), ("\u{a}\u{308}\u{300}", &["\u{a}", + "\u{308}\u{300}"]), ("\u{a}\u{61}\u{2060}", &["\u{a}", "\u{61}\u{2060}"]), + ("\u{a}\u{308}\u{61}\u{2060}", &["\u{a}", "\u{308}", "\u{61}\u{2060}"]), + ("\u{a}\u{61}\u{3a}", &["\u{a}", "\u{61}", "\u{3a}"]), ("\u{a}\u{308}\u{61}\u{3a}", + &["\u{a}", "\u{308}", "\u{61}", "\u{3a}"]), ("\u{a}\u{61}\u{27}", &["\u{a}", "\u{61}", + "\u{27}"]), ("\u{a}\u{308}\u{61}\u{27}", &["\u{a}", "\u{308}", "\u{61}", "\u{27}"]), + ("\u{a}\u{61}\u{27}\u{2060}", &["\u{a}", "\u{61}", "\u{27}\u{2060}"]), + ("\u{a}\u{308}\u{61}\u{27}\u{2060}", &["\u{a}", "\u{308}", "\u{61}", "\u{27}\u{2060}"]), + ("\u{a}\u{61}\u{2c}", &["\u{a}", "\u{61}", "\u{2c}"]), ("\u{a}\u{308}\u{61}\u{2c}", + &["\u{a}", "\u{308}", "\u{61}", "\u{2c}"]), ("\u{a}\u{31}\u{3a}", &["\u{a}", "\u{31}", + "\u{3a}"]), ("\u{a}\u{308}\u{31}\u{3a}", &["\u{a}", "\u{308}", "\u{31}", "\u{3a}"]), + ("\u{a}\u{31}\u{27}", &["\u{a}", "\u{31}", "\u{27}"]), ("\u{a}\u{308}\u{31}\u{27}", + &["\u{a}", "\u{308}", "\u{31}", "\u{27}"]), ("\u{a}\u{31}\u{2c}", &["\u{a}", "\u{31}", + "\u{2c}"]), ("\u{a}\u{308}\u{31}\u{2c}", &["\u{a}", "\u{308}", "\u{31}", "\u{2c}"]), + ("\u{a}\u{31}\u{2e}\u{2060}", &["\u{a}", "\u{31}", "\u{2e}\u{2060}"]), + ("\u{a}\u{308}\u{31}\u{2e}\u{2060}", &["\u{a}", "\u{308}", "\u{31}", "\u{2e}\u{2060}"]), + ("\u{b}\u{1}", &["\u{b}", "\u{1}"]), ("\u{b}\u{308}\u{1}", &["\u{b}", "\u{308}", "\u{1}"]), + ("\u{b}\u{d}", &["\u{b}", "\u{d}"]), ("\u{b}\u{308}\u{d}", &["\u{b}", "\u{308}", "\u{d}"]), + ("\u{b}\u{a}", &["\u{b}", "\u{a}"]), ("\u{b}\u{308}\u{a}", &["\u{b}", "\u{308}", "\u{a}"]), + ("\u{b}\u{b}", &["\u{b}", "\u{b}"]), ("\u{b}\u{308}\u{b}", &["\u{b}", "\u{308}", "\u{b}"]), + ("\u{b}\u{3031}", &["\u{b}", "\u{3031}"]), ("\u{b}\u{308}\u{3031}", &["\u{b}", "\u{308}", + "\u{3031}"]), ("\u{b}\u{41}", &["\u{b}", "\u{41}"]), ("\u{b}\u{308}\u{41}", &["\u{b}", + "\u{308}", "\u{41}"]), ("\u{b}\u{3a}", &["\u{b}", "\u{3a}"]), ("\u{b}\u{308}\u{3a}", + &["\u{b}", "\u{308}", "\u{3a}"]), ("\u{b}\u{2c}", &["\u{b}", "\u{2c}"]), + ("\u{b}\u{308}\u{2c}", &["\u{b}", "\u{308}", "\u{2c}"]), ("\u{b}\u{2e}", &["\u{b}", + "\u{2e}"]), ("\u{b}\u{308}\u{2e}", &["\u{b}", "\u{308}", "\u{2e}"]), ("\u{b}\u{30}", + &["\u{b}", "\u{30}"]), ("\u{b}\u{308}\u{30}", &["\u{b}", "\u{308}", "\u{30}"]), + ("\u{b}\u{5f}", &["\u{b}", "\u{5f}"]), ("\u{b}\u{308}\u{5f}", &["\u{b}", "\u{308}", + "\u{5f}"]), ("\u{b}\u{1f1e6}", &["\u{b}", "\u{1f1e6}"]), ("\u{b}\u{308}\u{1f1e6}", + &["\u{b}", "\u{308}", "\u{1f1e6}"]), ("\u{b}\u{5d0}", &["\u{b}", "\u{5d0}"]), + ("\u{b}\u{308}\u{5d0}", &["\u{b}", "\u{308}", "\u{5d0}"]), ("\u{b}\u{22}", &["\u{b}", + "\u{22}"]), ("\u{b}\u{308}\u{22}", &["\u{b}", "\u{308}", "\u{22}"]), ("\u{b}\u{27}", + &["\u{b}", "\u{27}"]), ("\u{b}\u{308}\u{27}", &["\u{b}", "\u{308}", "\u{27}"]), + ("\u{b}\u{ad}", &["\u{b}", "\u{ad}"]), ("\u{b}\u{308}\u{ad}", &["\u{b}", "\u{308}\u{ad}"]), + ("\u{b}\u{300}", &["\u{b}", "\u{300}"]), ("\u{b}\u{308}\u{300}", &["\u{b}", + "\u{308}\u{300}"]), ("\u{b}\u{61}\u{2060}", &["\u{b}", "\u{61}\u{2060}"]), + ("\u{b}\u{308}\u{61}\u{2060}", &["\u{b}", "\u{308}", "\u{61}\u{2060}"]), + ("\u{b}\u{61}\u{3a}", &["\u{b}", "\u{61}", "\u{3a}"]), ("\u{b}\u{308}\u{61}\u{3a}", + &["\u{b}", "\u{308}", "\u{61}", "\u{3a}"]), ("\u{b}\u{61}\u{27}", &["\u{b}", "\u{61}", + "\u{27}"]), ("\u{b}\u{308}\u{61}\u{27}", &["\u{b}", "\u{308}", "\u{61}", "\u{27}"]), + ("\u{b}\u{61}\u{27}\u{2060}", &["\u{b}", "\u{61}", "\u{27}\u{2060}"]), + ("\u{b}\u{308}\u{61}\u{27}\u{2060}", &["\u{b}", "\u{308}", "\u{61}", "\u{27}\u{2060}"]), + ("\u{b}\u{61}\u{2c}", &["\u{b}", "\u{61}", "\u{2c}"]), ("\u{b}\u{308}\u{61}\u{2c}", + &["\u{b}", "\u{308}", "\u{61}", "\u{2c}"]), ("\u{b}\u{31}\u{3a}", &["\u{b}", "\u{31}", + "\u{3a}"]), ("\u{b}\u{308}\u{31}\u{3a}", &["\u{b}", "\u{308}", "\u{31}", "\u{3a}"]), + ("\u{b}\u{31}\u{27}", &["\u{b}", "\u{31}", "\u{27}"]), ("\u{b}\u{308}\u{31}\u{27}", + &["\u{b}", "\u{308}", "\u{31}", "\u{27}"]), ("\u{b}\u{31}\u{2c}", &["\u{b}", "\u{31}", + "\u{2c}"]), ("\u{b}\u{308}\u{31}\u{2c}", &["\u{b}", "\u{308}", "\u{31}", "\u{2c}"]), + ("\u{b}\u{31}\u{2e}\u{2060}", &["\u{b}", "\u{31}", "\u{2e}\u{2060}"]), + ("\u{b}\u{308}\u{31}\u{2e}\u{2060}", &["\u{b}", "\u{308}", "\u{31}", "\u{2e}\u{2060}"]), + ("\u{3031}\u{1}", &["\u{3031}", "\u{1}"]), ("\u{3031}\u{308}\u{1}", &["\u{3031}\u{308}", + "\u{1}"]), ("\u{3031}\u{d}", &["\u{3031}", "\u{d}"]), ("\u{3031}\u{308}\u{d}", + &["\u{3031}\u{308}", "\u{d}"]), ("\u{3031}\u{a}", &["\u{3031}", "\u{a}"]), + ("\u{3031}\u{308}\u{a}", &["\u{3031}\u{308}", "\u{a}"]), ("\u{3031}\u{b}", &["\u{3031}", + "\u{b}"]), ("\u{3031}\u{308}\u{b}", &["\u{3031}\u{308}", "\u{b}"]), ("\u{3031}\u{3031}", + &["\u{3031}\u{3031}"]), ("\u{3031}\u{308}\u{3031}", &["\u{3031}\u{308}\u{3031}"]), + ("\u{3031}\u{41}", &["\u{3031}", "\u{41}"]), ("\u{3031}\u{308}\u{41}", &["\u{3031}\u{308}", + "\u{41}"]), ("\u{3031}\u{3a}", &["\u{3031}", "\u{3a}"]), ("\u{3031}\u{308}\u{3a}", + &["\u{3031}\u{308}", "\u{3a}"]), ("\u{3031}\u{2c}", &["\u{3031}", "\u{2c}"]), + ("\u{3031}\u{308}\u{2c}", &["\u{3031}\u{308}", "\u{2c}"]), ("\u{3031}\u{2e}", &["\u{3031}", + "\u{2e}"]), ("\u{3031}\u{308}\u{2e}", &["\u{3031}\u{308}", "\u{2e}"]), ("\u{3031}\u{30}", + &["\u{3031}", "\u{30}"]), ("\u{3031}\u{308}\u{30}", &["\u{3031}\u{308}", "\u{30}"]), + ("\u{3031}\u{5f}", &["\u{3031}\u{5f}"]), ("\u{3031}\u{308}\u{5f}", + &["\u{3031}\u{308}\u{5f}"]), ("\u{3031}\u{1f1e6}", &["\u{3031}", "\u{1f1e6}"]), + ("\u{3031}\u{308}\u{1f1e6}", &["\u{3031}\u{308}", "\u{1f1e6}"]), ("\u{3031}\u{5d0}", + &["\u{3031}", "\u{5d0}"]), ("\u{3031}\u{308}\u{5d0}", &["\u{3031}\u{308}", "\u{5d0}"]), + ("\u{3031}\u{22}", &["\u{3031}", "\u{22}"]), ("\u{3031}\u{308}\u{22}", &["\u{3031}\u{308}", + "\u{22}"]), ("\u{3031}\u{27}", &["\u{3031}", "\u{27}"]), ("\u{3031}\u{308}\u{27}", + &["\u{3031}\u{308}", "\u{27}"]), ("\u{3031}\u{ad}", &["\u{3031}\u{ad}"]), + ("\u{3031}\u{308}\u{ad}", &["\u{3031}\u{308}\u{ad}"]), ("\u{3031}\u{300}", + &["\u{3031}\u{300}"]), ("\u{3031}\u{308}\u{300}", &["\u{3031}\u{308}\u{300}"]), + ("\u{3031}\u{61}\u{2060}", &["\u{3031}", "\u{61}\u{2060}"]), + ("\u{3031}\u{308}\u{61}\u{2060}", &["\u{3031}\u{308}", "\u{61}\u{2060}"]), + ("\u{3031}\u{61}\u{3a}", &["\u{3031}", "\u{61}", "\u{3a}"]), ("\u{3031}\u{308}\u{61}\u{3a}", + &["\u{3031}\u{308}", "\u{61}", "\u{3a}"]), ("\u{3031}\u{61}\u{27}", &["\u{3031}", "\u{61}", + "\u{27}"]), ("\u{3031}\u{308}\u{61}\u{27}", &["\u{3031}\u{308}", "\u{61}", "\u{27}"]), + ("\u{3031}\u{61}\u{27}\u{2060}", &["\u{3031}", "\u{61}", "\u{27}\u{2060}"]), + ("\u{3031}\u{308}\u{61}\u{27}\u{2060}", &["\u{3031}\u{308}", "\u{61}", "\u{27}\u{2060}"]), + ("\u{3031}\u{61}\u{2c}", &["\u{3031}", "\u{61}", "\u{2c}"]), ("\u{3031}\u{308}\u{61}\u{2c}", + &["\u{3031}\u{308}", "\u{61}", "\u{2c}"]), ("\u{3031}\u{31}\u{3a}", &["\u{3031}", "\u{31}", + "\u{3a}"]), ("\u{3031}\u{308}\u{31}\u{3a}", &["\u{3031}\u{308}", "\u{31}", "\u{3a}"]), + ("\u{3031}\u{31}\u{27}", &["\u{3031}", "\u{31}", "\u{27}"]), ("\u{3031}\u{308}\u{31}\u{27}", + &["\u{3031}\u{308}", "\u{31}", "\u{27}"]), ("\u{3031}\u{31}\u{2c}", &["\u{3031}", "\u{31}", + "\u{2c}"]), ("\u{3031}\u{308}\u{31}\u{2c}", &["\u{3031}\u{308}", "\u{31}", "\u{2c}"]), + ("\u{3031}\u{31}\u{2e}\u{2060}", &["\u{3031}", "\u{31}", "\u{2e}\u{2060}"]), + ("\u{3031}\u{308}\u{31}\u{2e}\u{2060}", &["\u{3031}\u{308}", "\u{31}", "\u{2e}\u{2060}"]), + ("\u{41}\u{1}", &["\u{41}", "\u{1}"]), ("\u{41}\u{308}\u{1}", &["\u{41}\u{308}", "\u{1}"]), + ("\u{41}\u{d}", &["\u{41}", "\u{d}"]), ("\u{41}\u{308}\u{d}", &["\u{41}\u{308}", "\u{d}"]), + ("\u{41}\u{a}", &["\u{41}", "\u{a}"]), ("\u{41}\u{308}\u{a}", &["\u{41}\u{308}", "\u{a}"]), + ("\u{41}\u{b}", &["\u{41}", "\u{b}"]), ("\u{41}\u{308}\u{b}", &["\u{41}\u{308}", "\u{b}"]), + ("\u{41}\u{3031}", &["\u{41}", "\u{3031}"]), ("\u{41}\u{308}\u{3031}", &["\u{41}\u{308}", + "\u{3031}"]), ("\u{41}\u{41}", &["\u{41}\u{41}"]), ("\u{41}\u{308}\u{41}", + &["\u{41}\u{308}\u{41}"]), ("\u{41}\u{3a}", &["\u{41}", "\u{3a}"]), ("\u{41}\u{308}\u{3a}", + &["\u{41}\u{308}", "\u{3a}"]), ("\u{41}\u{2c}", &["\u{41}", "\u{2c}"]), + ("\u{41}\u{308}\u{2c}", &["\u{41}\u{308}", "\u{2c}"]), ("\u{41}\u{2e}", &["\u{41}", + "\u{2e}"]), ("\u{41}\u{308}\u{2e}", &["\u{41}\u{308}", "\u{2e}"]), ("\u{41}\u{30}", + &["\u{41}\u{30}"]), ("\u{41}\u{308}\u{30}", &["\u{41}\u{308}\u{30}"]), ("\u{41}\u{5f}", + &["\u{41}\u{5f}"]), ("\u{41}\u{308}\u{5f}", &["\u{41}\u{308}\u{5f}"]), ("\u{41}\u{1f1e6}", + &["\u{41}", "\u{1f1e6}"]), ("\u{41}\u{308}\u{1f1e6}", &["\u{41}\u{308}", "\u{1f1e6}"]), + ("\u{41}\u{5d0}", &["\u{41}\u{5d0}"]), ("\u{41}\u{308}\u{5d0}", &["\u{41}\u{308}\u{5d0}"]), + ("\u{41}\u{22}", &["\u{41}", "\u{22}"]), ("\u{41}\u{308}\u{22}", &["\u{41}\u{308}", + "\u{22}"]), ("\u{41}\u{27}", &["\u{41}", "\u{27}"]), ("\u{41}\u{308}\u{27}", + &["\u{41}\u{308}", "\u{27}"]), ("\u{41}\u{ad}", &["\u{41}\u{ad}"]), ("\u{41}\u{308}\u{ad}", + &["\u{41}\u{308}\u{ad}"]), ("\u{41}\u{300}", &["\u{41}\u{300}"]), ("\u{41}\u{308}\u{300}", + &["\u{41}\u{308}\u{300}"]), ("\u{41}\u{61}\u{2060}", &["\u{41}\u{61}\u{2060}"]), + ("\u{41}\u{308}\u{61}\u{2060}", &["\u{41}\u{308}\u{61}\u{2060}"]), ("\u{41}\u{61}\u{3a}", + &["\u{41}\u{61}", "\u{3a}"]), ("\u{41}\u{308}\u{61}\u{3a}", &["\u{41}\u{308}\u{61}", + "\u{3a}"]), ("\u{41}\u{61}\u{27}", &["\u{41}\u{61}", "\u{27}"]), + ("\u{41}\u{308}\u{61}\u{27}", &["\u{41}\u{308}\u{61}", "\u{27}"]), + ("\u{41}\u{61}\u{27}\u{2060}", &["\u{41}\u{61}", "\u{27}\u{2060}"]), + ("\u{41}\u{308}\u{61}\u{27}\u{2060}", &["\u{41}\u{308}\u{61}", "\u{27}\u{2060}"]), + ("\u{41}\u{61}\u{2c}", &["\u{41}\u{61}", "\u{2c}"]), ("\u{41}\u{308}\u{61}\u{2c}", + &["\u{41}\u{308}\u{61}", "\u{2c}"]), ("\u{41}\u{31}\u{3a}", &["\u{41}\u{31}", "\u{3a}"]), + ("\u{41}\u{308}\u{31}\u{3a}", &["\u{41}\u{308}\u{31}", "\u{3a}"]), ("\u{41}\u{31}\u{27}", + &["\u{41}\u{31}", "\u{27}"]), ("\u{41}\u{308}\u{31}\u{27}", &["\u{41}\u{308}\u{31}", + "\u{27}"]), ("\u{41}\u{31}\u{2c}", &["\u{41}\u{31}", "\u{2c}"]), + ("\u{41}\u{308}\u{31}\u{2c}", &["\u{41}\u{308}\u{31}", "\u{2c}"]), + ("\u{41}\u{31}\u{2e}\u{2060}", &["\u{41}\u{31}", "\u{2e}\u{2060}"]), + ("\u{41}\u{308}\u{31}\u{2e}\u{2060}", &["\u{41}\u{308}\u{31}", "\u{2e}\u{2060}"]), + ("\u{3a}\u{1}", &["\u{3a}", "\u{1}"]), ("\u{3a}\u{308}\u{1}", &["\u{3a}\u{308}", "\u{1}"]), + ("\u{3a}\u{d}", &["\u{3a}", "\u{d}"]), ("\u{3a}\u{308}\u{d}", &["\u{3a}\u{308}", "\u{d}"]), + ("\u{3a}\u{a}", &["\u{3a}", "\u{a}"]), ("\u{3a}\u{308}\u{a}", &["\u{3a}\u{308}", "\u{a}"]), + ("\u{3a}\u{b}", &["\u{3a}", "\u{b}"]), ("\u{3a}\u{308}\u{b}", &["\u{3a}\u{308}", "\u{b}"]), + ("\u{3a}\u{3031}", &["\u{3a}", "\u{3031}"]), ("\u{3a}\u{308}\u{3031}", &["\u{3a}\u{308}", + "\u{3031}"]), ("\u{3a}\u{41}", &["\u{3a}", "\u{41}"]), ("\u{3a}\u{308}\u{41}", + &["\u{3a}\u{308}", "\u{41}"]), ("\u{3a}\u{3a}", &["\u{3a}", "\u{3a}"]), + ("\u{3a}\u{308}\u{3a}", &["\u{3a}\u{308}", "\u{3a}"]), ("\u{3a}\u{2c}", &["\u{3a}", + "\u{2c}"]), ("\u{3a}\u{308}\u{2c}", &["\u{3a}\u{308}", "\u{2c}"]), ("\u{3a}\u{2e}", + &["\u{3a}", "\u{2e}"]), ("\u{3a}\u{308}\u{2e}", &["\u{3a}\u{308}", "\u{2e}"]), + ("\u{3a}\u{30}", &["\u{3a}", "\u{30}"]), ("\u{3a}\u{308}\u{30}", &["\u{3a}\u{308}", + "\u{30}"]), ("\u{3a}\u{5f}", &["\u{3a}", "\u{5f}"]), ("\u{3a}\u{308}\u{5f}", + &["\u{3a}\u{308}", "\u{5f}"]), ("\u{3a}\u{1f1e6}", &["\u{3a}", "\u{1f1e6}"]), + ("\u{3a}\u{308}\u{1f1e6}", &["\u{3a}\u{308}", "\u{1f1e6}"]), ("\u{3a}\u{5d0}", &["\u{3a}", + "\u{5d0}"]), ("\u{3a}\u{308}\u{5d0}", &["\u{3a}\u{308}", "\u{5d0}"]), ("\u{3a}\u{22}", + &["\u{3a}", "\u{22}"]), ("\u{3a}\u{308}\u{22}", &["\u{3a}\u{308}", "\u{22}"]), + ("\u{3a}\u{27}", &["\u{3a}", "\u{27}"]), ("\u{3a}\u{308}\u{27}", &["\u{3a}\u{308}", + "\u{27}"]), ("\u{3a}\u{ad}", &["\u{3a}\u{ad}"]), ("\u{3a}\u{308}\u{ad}", + &["\u{3a}\u{308}\u{ad}"]), ("\u{3a}\u{300}", &["\u{3a}\u{300}"]), ("\u{3a}\u{308}\u{300}", + &["\u{3a}\u{308}\u{300}"]), ("\u{3a}\u{61}\u{2060}", &["\u{3a}", "\u{61}\u{2060}"]), + ("\u{3a}\u{308}\u{61}\u{2060}", &["\u{3a}\u{308}", "\u{61}\u{2060}"]), + ("\u{3a}\u{61}\u{3a}", &["\u{3a}", "\u{61}", "\u{3a}"]), ("\u{3a}\u{308}\u{61}\u{3a}", + &["\u{3a}\u{308}", "\u{61}", "\u{3a}"]), ("\u{3a}\u{61}\u{27}", &["\u{3a}", "\u{61}", + "\u{27}"]), ("\u{3a}\u{308}\u{61}\u{27}", &["\u{3a}\u{308}", "\u{61}", "\u{27}"]), + ("\u{3a}\u{61}\u{27}\u{2060}", &["\u{3a}", "\u{61}", "\u{27}\u{2060}"]), + ("\u{3a}\u{308}\u{61}\u{27}\u{2060}", &["\u{3a}\u{308}", "\u{61}", "\u{27}\u{2060}"]), + ("\u{3a}\u{61}\u{2c}", &["\u{3a}", "\u{61}", "\u{2c}"]), ("\u{3a}\u{308}\u{61}\u{2c}", + &["\u{3a}\u{308}", "\u{61}", "\u{2c}"]), ("\u{3a}\u{31}\u{3a}", &["\u{3a}", "\u{31}", + "\u{3a}"]), ("\u{3a}\u{308}\u{31}\u{3a}", &["\u{3a}\u{308}", "\u{31}", "\u{3a}"]), + ("\u{3a}\u{31}\u{27}", &["\u{3a}", "\u{31}", "\u{27}"]), ("\u{3a}\u{308}\u{31}\u{27}", + &["\u{3a}\u{308}", "\u{31}", "\u{27}"]), ("\u{3a}\u{31}\u{2c}", &["\u{3a}", "\u{31}", + "\u{2c}"]), ("\u{3a}\u{308}\u{31}\u{2c}", &["\u{3a}\u{308}", "\u{31}", "\u{2c}"]), + ("\u{3a}\u{31}\u{2e}\u{2060}", &["\u{3a}", "\u{31}", "\u{2e}\u{2060}"]), + ("\u{3a}\u{308}\u{31}\u{2e}\u{2060}", &["\u{3a}\u{308}", "\u{31}", "\u{2e}\u{2060}"]), + ("\u{2c}\u{1}", &["\u{2c}", "\u{1}"]), ("\u{2c}\u{308}\u{1}", &["\u{2c}\u{308}", "\u{1}"]), + ("\u{2c}\u{d}", &["\u{2c}", "\u{d}"]), ("\u{2c}\u{308}\u{d}", &["\u{2c}\u{308}", "\u{d}"]), + ("\u{2c}\u{a}", &["\u{2c}", "\u{a}"]), ("\u{2c}\u{308}\u{a}", &["\u{2c}\u{308}", "\u{a}"]), + ("\u{2c}\u{b}", &["\u{2c}", "\u{b}"]), ("\u{2c}\u{308}\u{b}", &["\u{2c}\u{308}", "\u{b}"]), + ("\u{2c}\u{3031}", &["\u{2c}", "\u{3031}"]), ("\u{2c}\u{308}\u{3031}", &["\u{2c}\u{308}", + "\u{3031}"]), ("\u{2c}\u{41}", &["\u{2c}", "\u{41}"]), ("\u{2c}\u{308}\u{41}", + &["\u{2c}\u{308}", "\u{41}"]), ("\u{2c}\u{3a}", &["\u{2c}", "\u{3a}"]), + ("\u{2c}\u{308}\u{3a}", &["\u{2c}\u{308}", "\u{3a}"]), ("\u{2c}\u{2c}", &["\u{2c}", + "\u{2c}"]), ("\u{2c}\u{308}\u{2c}", &["\u{2c}\u{308}", "\u{2c}"]), ("\u{2c}\u{2e}", + &["\u{2c}", "\u{2e}"]), ("\u{2c}\u{308}\u{2e}", &["\u{2c}\u{308}", "\u{2e}"]), + ("\u{2c}\u{30}", &["\u{2c}", "\u{30}"]), ("\u{2c}\u{308}\u{30}", &["\u{2c}\u{308}", + "\u{30}"]), ("\u{2c}\u{5f}", &["\u{2c}", "\u{5f}"]), ("\u{2c}\u{308}\u{5f}", + &["\u{2c}\u{308}", "\u{5f}"]), ("\u{2c}\u{1f1e6}", &["\u{2c}", "\u{1f1e6}"]), + ("\u{2c}\u{308}\u{1f1e6}", &["\u{2c}\u{308}", "\u{1f1e6}"]), ("\u{2c}\u{5d0}", &["\u{2c}", + "\u{5d0}"]), ("\u{2c}\u{308}\u{5d0}", &["\u{2c}\u{308}", "\u{5d0}"]), ("\u{2c}\u{22}", + &["\u{2c}", "\u{22}"]), ("\u{2c}\u{308}\u{22}", &["\u{2c}\u{308}", "\u{22}"]), + ("\u{2c}\u{27}", &["\u{2c}", "\u{27}"]), ("\u{2c}\u{308}\u{27}", &["\u{2c}\u{308}", + "\u{27}"]), ("\u{2c}\u{ad}", &["\u{2c}\u{ad}"]), ("\u{2c}\u{308}\u{ad}", + &["\u{2c}\u{308}\u{ad}"]), ("\u{2c}\u{300}", &["\u{2c}\u{300}"]), ("\u{2c}\u{308}\u{300}", + &["\u{2c}\u{308}\u{300}"]), ("\u{2c}\u{61}\u{2060}", &["\u{2c}", "\u{61}\u{2060}"]), + ("\u{2c}\u{308}\u{61}\u{2060}", &["\u{2c}\u{308}", "\u{61}\u{2060}"]), + ("\u{2c}\u{61}\u{3a}", &["\u{2c}", "\u{61}", "\u{3a}"]), ("\u{2c}\u{308}\u{61}\u{3a}", + &["\u{2c}\u{308}", "\u{61}", "\u{3a}"]), ("\u{2c}\u{61}\u{27}", &["\u{2c}", "\u{61}", + "\u{27}"]), ("\u{2c}\u{308}\u{61}\u{27}", &["\u{2c}\u{308}", "\u{61}", "\u{27}"]), + ("\u{2c}\u{61}\u{27}\u{2060}", &["\u{2c}", "\u{61}", "\u{27}\u{2060}"]), + ("\u{2c}\u{308}\u{61}\u{27}\u{2060}", &["\u{2c}\u{308}", "\u{61}", "\u{27}\u{2060}"]), + ("\u{2c}\u{61}\u{2c}", &["\u{2c}", "\u{61}", "\u{2c}"]), ("\u{2c}\u{308}\u{61}\u{2c}", + &["\u{2c}\u{308}", "\u{61}", "\u{2c}"]), ("\u{2c}\u{31}\u{3a}", &["\u{2c}", "\u{31}", + "\u{3a}"]), ("\u{2c}\u{308}\u{31}\u{3a}", &["\u{2c}\u{308}", "\u{31}", "\u{3a}"]), + ("\u{2c}\u{31}\u{27}", &["\u{2c}", "\u{31}", "\u{27}"]), ("\u{2c}\u{308}\u{31}\u{27}", + &["\u{2c}\u{308}", "\u{31}", "\u{27}"]), ("\u{2c}\u{31}\u{2c}", &["\u{2c}", "\u{31}", + "\u{2c}"]), ("\u{2c}\u{308}\u{31}\u{2c}", &["\u{2c}\u{308}", "\u{31}", "\u{2c}"]), + ("\u{2c}\u{31}\u{2e}\u{2060}", &["\u{2c}", "\u{31}", "\u{2e}\u{2060}"]), + ("\u{2c}\u{308}\u{31}\u{2e}\u{2060}", &["\u{2c}\u{308}", "\u{31}", "\u{2e}\u{2060}"]), + ("\u{2e}\u{1}", &["\u{2e}", "\u{1}"]), ("\u{2e}\u{308}\u{1}", &["\u{2e}\u{308}", "\u{1}"]), + ("\u{2e}\u{d}", &["\u{2e}", "\u{d}"]), ("\u{2e}\u{308}\u{d}", &["\u{2e}\u{308}", "\u{d}"]), + ("\u{2e}\u{a}", &["\u{2e}", "\u{a}"]), ("\u{2e}\u{308}\u{a}", &["\u{2e}\u{308}", "\u{a}"]), + ("\u{2e}\u{b}", &["\u{2e}", "\u{b}"]), ("\u{2e}\u{308}\u{b}", &["\u{2e}\u{308}", "\u{b}"]), + ("\u{2e}\u{3031}", &["\u{2e}", "\u{3031}"]), ("\u{2e}\u{308}\u{3031}", &["\u{2e}\u{308}", + "\u{3031}"]), ("\u{2e}\u{41}", &["\u{2e}", "\u{41}"]), ("\u{2e}\u{308}\u{41}", + &["\u{2e}\u{308}", "\u{41}"]), ("\u{2e}\u{3a}", &["\u{2e}", "\u{3a}"]), + ("\u{2e}\u{308}\u{3a}", &["\u{2e}\u{308}", "\u{3a}"]), ("\u{2e}\u{2c}", &["\u{2e}", + "\u{2c}"]), ("\u{2e}\u{308}\u{2c}", &["\u{2e}\u{308}", "\u{2c}"]), ("\u{2e}\u{2e}", + &["\u{2e}", "\u{2e}"]), ("\u{2e}\u{308}\u{2e}", &["\u{2e}\u{308}", "\u{2e}"]), + ("\u{2e}\u{30}", &["\u{2e}", "\u{30}"]), ("\u{2e}\u{308}\u{30}", &["\u{2e}\u{308}", + "\u{30}"]), ("\u{2e}\u{5f}", &["\u{2e}", "\u{5f}"]), ("\u{2e}\u{308}\u{5f}", + &["\u{2e}\u{308}", "\u{5f}"]), ("\u{2e}\u{1f1e6}", &["\u{2e}", "\u{1f1e6}"]), + ("\u{2e}\u{308}\u{1f1e6}", &["\u{2e}\u{308}", "\u{1f1e6}"]), ("\u{2e}\u{5d0}", &["\u{2e}", + "\u{5d0}"]), ("\u{2e}\u{308}\u{5d0}", &["\u{2e}\u{308}", "\u{5d0}"]), ("\u{2e}\u{22}", + &["\u{2e}", "\u{22}"]), ("\u{2e}\u{308}\u{22}", &["\u{2e}\u{308}", "\u{22}"]), + ("\u{2e}\u{27}", &["\u{2e}", "\u{27}"]), ("\u{2e}\u{308}\u{27}", &["\u{2e}\u{308}", + "\u{27}"]), ("\u{2e}\u{ad}", &["\u{2e}\u{ad}"]), ("\u{2e}\u{308}\u{ad}", + &["\u{2e}\u{308}\u{ad}"]), ("\u{2e}\u{300}", &["\u{2e}\u{300}"]), ("\u{2e}\u{308}\u{300}", + &["\u{2e}\u{308}\u{300}"]), ("\u{2e}\u{61}\u{2060}", &["\u{2e}", "\u{61}\u{2060}"]), + ("\u{2e}\u{308}\u{61}\u{2060}", &["\u{2e}\u{308}", "\u{61}\u{2060}"]), + ("\u{2e}\u{61}\u{3a}", &["\u{2e}", "\u{61}", "\u{3a}"]), ("\u{2e}\u{308}\u{61}\u{3a}", + &["\u{2e}\u{308}", "\u{61}", "\u{3a}"]), ("\u{2e}\u{61}\u{27}", &["\u{2e}", "\u{61}", + "\u{27}"]), ("\u{2e}\u{308}\u{61}\u{27}", &["\u{2e}\u{308}", "\u{61}", "\u{27}"]), + ("\u{2e}\u{61}\u{27}\u{2060}", &["\u{2e}", "\u{61}", "\u{27}\u{2060}"]), + ("\u{2e}\u{308}\u{61}\u{27}\u{2060}", &["\u{2e}\u{308}", "\u{61}", "\u{27}\u{2060}"]), + ("\u{2e}\u{61}\u{2c}", &["\u{2e}", "\u{61}", "\u{2c}"]), ("\u{2e}\u{308}\u{61}\u{2c}", + &["\u{2e}\u{308}", "\u{61}", "\u{2c}"]), ("\u{2e}\u{31}\u{3a}", &["\u{2e}", "\u{31}", + "\u{3a}"]), ("\u{2e}\u{308}\u{31}\u{3a}", &["\u{2e}\u{308}", "\u{31}", "\u{3a}"]), + ("\u{2e}\u{31}\u{27}", &["\u{2e}", "\u{31}", "\u{27}"]), ("\u{2e}\u{308}\u{31}\u{27}", + &["\u{2e}\u{308}", "\u{31}", "\u{27}"]), ("\u{2e}\u{31}\u{2c}", &["\u{2e}", "\u{31}", + "\u{2c}"]), ("\u{2e}\u{308}\u{31}\u{2c}", &["\u{2e}\u{308}", "\u{31}", "\u{2c}"]), + ("\u{2e}\u{31}\u{2e}\u{2060}", &["\u{2e}", "\u{31}", "\u{2e}\u{2060}"]), + ("\u{2e}\u{308}\u{31}\u{2e}\u{2060}", &["\u{2e}\u{308}", "\u{31}", "\u{2e}\u{2060}"]), + ("\u{30}\u{1}", &["\u{30}", "\u{1}"]), ("\u{30}\u{308}\u{1}", &["\u{30}\u{308}", "\u{1}"]), + ("\u{30}\u{d}", &["\u{30}", "\u{d}"]), ("\u{30}\u{308}\u{d}", &["\u{30}\u{308}", "\u{d}"]), + ("\u{30}\u{a}", &["\u{30}", "\u{a}"]), ("\u{30}\u{308}\u{a}", &["\u{30}\u{308}", "\u{a}"]), + ("\u{30}\u{b}", &["\u{30}", "\u{b}"]), ("\u{30}\u{308}\u{b}", &["\u{30}\u{308}", "\u{b}"]), + ("\u{30}\u{3031}", &["\u{30}", "\u{3031}"]), ("\u{30}\u{308}\u{3031}", &["\u{30}\u{308}", + "\u{3031}"]), ("\u{30}\u{41}", &["\u{30}\u{41}"]), ("\u{30}\u{308}\u{41}", + &["\u{30}\u{308}\u{41}"]), ("\u{30}\u{3a}", &["\u{30}", "\u{3a}"]), ("\u{30}\u{308}\u{3a}", + &["\u{30}\u{308}", "\u{3a}"]), ("\u{30}\u{2c}", &["\u{30}", "\u{2c}"]), + ("\u{30}\u{308}\u{2c}", &["\u{30}\u{308}", "\u{2c}"]), ("\u{30}\u{2e}", &["\u{30}", + "\u{2e}"]), ("\u{30}\u{308}\u{2e}", &["\u{30}\u{308}", "\u{2e}"]), ("\u{30}\u{30}", + &["\u{30}\u{30}"]), ("\u{30}\u{308}\u{30}", &["\u{30}\u{308}\u{30}"]), ("\u{30}\u{5f}", + &["\u{30}\u{5f}"]), ("\u{30}\u{308}\u{5f}", &["\u{30}\u{308}\u{5f}"]), ("\u{30}\u{1f1e6}", + &["\u{30}", "\u{1f1e6}"]), ("\u{30}\u{308}\u{1f1e6}", &["\u{30}\u{308}", "\u{1f1e6}"]), + ("\u{30}\u{5d0}", &["\u{30}\u{5d0}"]), ("\u{30}\u{308}\u{5d0}", &["\u{30}\u{308}\u{5d0}"]), + ("\u{30}\u{22}", &["\u{30}", "\u{22}"]), ("\u{30}\u{308}\u{22}", &["\u{30}\u{308}", + "\u{22}"]), ("\u{30}\u{27}", &["\u{30}", "\u{27}"]), ("\u{30}\u{308}\u{27}", + &["\u{30}\u{308}", "\u{27}"]), ("\u{30}\u{ad}", &["\u{30}\u{ad}"]), ("\u{30}\u{308}\u{ad}", + &["\u{30}\u{308}\u{ad}"]), ("\u{30}\u{300}", &["\u{30}\u{300}"]), ("\u{30}\u{308}\u{300}", + &["\u{30}\u{308}\u{300}"]), ("\u{30}\u{61}\u{2060}", &["\u{30}\u{61}\u{2060}"]), + ("\u{30}\u{308}\u{61}\u{2060}", &["\u{30}\u{308}\u{61}\u{2060}"]), ("\u{30}\u{61}\u{3a}", + &["\u{30}\u{61}", "\u{3a}"]), ("\u{30}\u{308}\u{61}\u{3a}", &["\u{30}\u{308}\u{61}", + "\u{3a}"]), ("\u{30}\u{61}\u{27}", &["\u{30}\u{61}", "\u{27}"]), + ("\u{30}\u{308}\u{61}\u{27}", &["\u{30}\u{308}\u{61}", "\u{27}"]), + ("\u{30}\u{61}\u{27}\u{2060}", &["\u{30}\u{61}", "\u{27}\u{2060}"]), + ("\u{30}\u{308}\u{61}\u{27}\u{2060}", &["\u{30}\u{308}\u{61}", "\u{27}\u{2060}"]), + ("\u{30}\u{61}\u{2c}", &["\u{30}\u{61}", "\u{2c}"]), ("\u{30}\u{308}\u{61}\u{2c}", + &["\u{30}\u{308}\u{61}", "\u{2c}"]), ("\u{30}\u{31}\u{3a}", &["\u{30}\u{31}", "\u{3a}"]), + ("\u{30}\u{308}\u{31}\u{3a}", &["\u{30}\u{308}\u{31}", "\u{3a}"]), ("\u{30}\u{31}\u{27}", + &["\u{30}\u{31}", "\u{27}"]), ("\u{30}\u{308}\u{31}\u{27}", &["\u{30}\u{308}\u{31}", + "\u{27}"]), ("\u{30}\u{31}\u{2c}", &["\u{30}\u{31}", "\u{2c}"]), + ("\u{30}\u{308}\u{31}\u{2c}", &["\u{30}\u{308}\u{31}", "\u{2c}"]), + ("\u{30}\u{31}\u{2e}\u{2060}", &["\u{30}\u{31}", "\u{2e}\u{2060}"]), + ("\u{30}\u{308}\u{31}\u{2e}\u{2060}", &["\u{30}\u{308}\u{31}", "\u{2e}\u{2060}"]), + ("\u{5f}\u{1}", &["\u{5f}", "\u{1}"]), ("\u{5f}\u{308}\u{1}", &["\u{5f}\u{308}", "\u{1}"]), + ("\u{5f}\u{d}", &["\u{5f}", "\u{d}"]), ("\u{5f}\u{308}\u{d}", &["\u{5f}\u{308}", "\u{d}"]), + ("\u{5f}\u{a}", &["\u{5f}", "\u{a}"]), ("\u{5f}\u{308}\u{a}", &["\u{5f}\u{308}", "\u{a}"]), + ("\u{5f}\u{b}", &["\u{5f}", "\u{b}"]), ("\u{5f}\u{308}\u{b}", &["\u{5f}\u{308}", "\u{b}"]), + ("\u{5f}\u{3031}", &["\u{5f}\u{3031}"]), ("\u{5f}\u{308}\u{3031}", + &["\u{5f}\u{308}\u{3031}"]), ("\u{5f}\u{41}", &["\u{5f}\u{41}"]), ("\u{5f}\u{308}\u{41}", + &["\u{5f}\u{308}\u{41}"]), ("\u{5f}\u{3a}", &["\u{5f}", "\u{3a}"]), ("\u{5f}\u{308}\u{3a}", + &["\u{5f}\u{308}", "\u{3a}"]), ("\u{5f}\u{2c}", &["\u{5f}", "\u{2c}"]), + ("\u{5f}\u{308}\u{2c}", &["\u{5f}\u{308}", "\u{2c}"]), ("\u{5f}\u{2e}", &["\u{5f}", + "\u{2e}"]), ("\u{5f}\u{308}\u{2e}", &["\u{5f}\u{308}", "\u{2e}"]), ("\u{5f}\u{30}", + &["\u{5f}\u{30}"]), ("\u{5f}\u{308}\u{30}", &["\u{5f}\u{308}\u{30}"]), ("\u{5f}\u{5f}", + &["\u{5f}\u{5f}"]), ("\u{5f}\u{308}\u{5f}", &["\u{5f}\u{308}\u{5f}"]), ("\u{5f}\u{1f1e6}", + &["\u{5f}", "\u{1f1e6}"]), ("\u{5f}\u{308}\u{1f1e6}", &["\u{5f}\u{308}", "\u{1f1e6}"]), + ("\u{5f}\u{5d0}", &["\u{5f}\u{5d0}"]), ("\u{5f}\u{308}\u{5d0}", &["\u{5f}\u{308}\u{5d0}"]), + ("\u{5f}\u{22}", &["\u{5f}", "\u{22}"]), ("\u{5f}\u{308}\u{22}", &["\u{5f}\u{308}", + "\u{22}"]), ("\u{5f}\u{27}", &["\u{5f}", "\u{27}"]), ("\u{5f}\u{308}\u{27}", + &["\u{5f}\u{308}", "\u{27}"]), ("\u{5f}\u{ad}", &["\u{5f}\u{ad}"]), ("\u{5f}\u{308}\u{ad}", + &["\u{5f}\u{308}\u{ad}"]), ("\u{5f}\u{300}", &["\u{5f}\u{300}"]), ("\u{5f}\u{308}\u{300}", + &["\u{5f}\u{308}\u{300}"]), ("\u{5f}\u{61}\u{2060}", &["\u{5f}\u{61}\u{2060}"]), + ("\u{5f}\u{308}\u{61}\u{2060}", &["\u{5f}\u{308}\u{61}\u{2060}"]), ("\u{5f}\u{61}\u{3a}", + &["\u{5f}\u{61}", "\u{3a}"]), ("\u{5f}\u{308}\u{61}\u{3a}", &["\u{5f}\u{308}\u{61}", + "\u{3a}"]), ("\u{5f}\u{61}\u{27}", &["\u{5f}\u{61}", "\u{27}"]), + ("\u{5f}\u{308}\u{61}\u{27}", &["\u{5f}\u{308}\u{61}", "\u{27}"]), + ("\u{5f}\u{61}\u{27}\u{2060}", &["\u{5f}\u{61}", "\u{27}\u{2060}"]), + ("\u{5f}\u{308}\u{61}\u{27}\u{2060}", &["\u{5f}\u{308}\u{61}", "\u{27}\u{2060}"]), + ("\u{5f}\u{61}\u{2c}", &["\u{5f}\u{61}", "\u{2c}"]), ("\u{5f}\u{308}\u{61}\u{2c}", + &["\u{5f}\u{308}\u{61}", "\u{2c}"]), ("\u{5f}\u{31}\u{3a}", &["\u{5f}\u{31}", "\u{3a}"]), + ("\u{5f}\u{308}\u{31}\u{3a}", &["\u{5f}\u{308}\u{31}", "\u{3a}"]), ("\u{5f}\u{31}\u{27}", + &["\u{5f}\u{31}", "\u{27}"]), ("\u{5f}\u{308}\u{31}\u{27}", &["\u{5f}\u{308}\u{31}", + "\u{27}"]), ("\u{5f}\u{31}\u{2c}", &["\u{5f}\u{31}", "\u{2c}"]), + ("\u{5f}\u{308}\u{31}\u{2c}", &["\u{5f}\u{308}\u{31}", "\u{2c}"]), + ("\u{5f}\u{31}\u{2e}\u{2060}", &["\u{5f}\u{31}", "\u{2e}\u{2060}"]), + ("\u{5f}\u{308}\u{31}\u{2e}\u{2060}", &["\u{5f}\u{308}\u{31}", "\u{2e}\u{2060}"]), + ("\u{1f1e6}\u{1}", &["\u{1f1e6}", "\u{1}"]), ("\u{1f1e6}\u{308}\u{1}", &["\u{1f1e6}\u{308}", + "\u{1}"]), ("\u{1f1e6}\u{d}", &["\u{1f1e6}", "\u{d}"]), ("\u{1f1e6}\u{308}\u{d}", + &["\u{1f1e6}\u{308}", "\u{d}"]), ("\u{1f1e6}\u{a}", &["\u{1f1e6}", "\u{a}"]), + ("\u{1f1e6}\u{308}\u{a}", &["\u{1f1e6}\u{308}", "\u{a}"]), ("\u{1f1e6}\u{b}", &["\u{1f1e6}", + "\u{b}"]), ("\u{1f1e6}\u{308}\u{b}", &["\u{1f1e6}\u{308}", "\u{b}"]), ("\u{1f1e6}\u{3031}", + &["\u{1f1e6}", "\u{3031}"]), ("\u{1f1e6}\u{308}\u{3031}", &["\u{1f1e6}\u{308}", + "\u{3031}"]), ("\u{1f1e6}\u{41}", &["\u{1f1e6}", "\u{41}"]), ("\u{1f1e6}\u{308}\u{41}", + &["\u{1f1e6}\u{308}", "\u{41}"]), ("\u{1f1e6}\u{3a}", &["\u{1f1e6}", "\u{3a}"]), + ("\u{1f1e6}\u{308}\u{3a}", &["\u{1f1e6}\u{308}", "\u{3a}"]), ("\u{1f1e6}\u{2c}", + &["\u{1f1e6}", "\u{2c}"]), ("\u{1f1e6}\u{308}\u{2c}", &["\u{1f1e6}\u{308}", "\u{2c}"]), + ("\u{1f1e6}\u{2e}", &["\u{1f1e6}", "\u{2e}"]), ("\u{1f1e6}\u{308}\u{2e}", + &["\u{1f1e6}\u{308}", "\u{2e}"]), ("\u{1f1e6}\u{30}", &["\u{1f1e6}", "\u{30}"]), + ("\u{1f1e6}\u{308}\u{30}", &["\u{1f1e6}\u{308}", "\u{30}"]), ("\u{1f1e6}\u{5f}", + &["\u{1f1e6}", "\u{5f}"]), ("\u{1f1e6}\u{308}\u{5f}", &["\u{1f1e6}\u{308}", "\u{5f}"]), + ("\u{1f1e6}\u{1f1e6}", &["\u{1f1e6}\u{1f1e6}"]), ("\u{1f1e6}\u{308}\u{1f1e6}", + &["\u{1f1e6}\u{308}\u{1f1e6}"]), ("\u{1f1e6}\u{5d0}", &["\u{1f1e6}", "\u{5d0}"]), + ("\u{1f1e6}\u{308}\u{5d0}", &["\u{1f1e6}\u{308}", "\u{5d0}"]), ("\u{1f1e6}\u{22}", + &["\u{1f1e6}", "\u{22}"]), ("\u{1f1e6}\u{308}\u{22}", &["\u{1f1e6}\u{308}", "\u{22}"]), + ("\u{1f1e6}\u{27}", &["\u{1f1e6}", "\u{27}"]), ("\u{1f1e6}\u{308}\u{27}", + &["\u{1f1e6}\u{308}", "\u{27}"]), ("\u{1f1e6}\u{ad}", &["\u{1f1e6}\u{ad}"]), + ("\u{1f1e6}\u{308}\u{ad}", &["\u{1f1e6}\u{308}\u{ad}"]), ("\u{1f1e6}\u{300}", + &["\u{1f1e6}\u{300}"]), ("\u{1f1e6}\u{308}\u{300}", &["\u{1f1e6}\u{308}\u{300}"]), + ("\u{1f1e6}\u{61}\u{2060}", &["\u{1f1e6}", "\u{61}\u{2060}"]), + ("\u{1f1e6}\u{308}\u{61}\u{2060}", &["\u{1f1e6}\u{308}", "\u{61}\u{2060}"]), + ("\u{1f1e6}\u{61}\u{3a}", &["\u{1f1e6}", "\u{61}", "\u{3a}"]), + ("\u{1f1e6}\u{308}\u{61}\u{3a}", &["\u{1f1e6}\u{308}", "\u{61}", "\u{3a}"]), + ("\u{1f1e6}\u{61}\u{27}", &["\u{1f1e6}", "\u{61}", "\u{27}"]), + ("\u{1f1e6}\u{308}\u{61}\u{27}", &["\u{1f1e6}\u{308}", "\u{61}", "\u{27}"]), + ("\u{1f1e6}\u{61}\u{27}\u{2060}", &["\u{1f1e6}", "\u{61}", "\u{27}\u{2060}"]), + ("\u{1f1e6}\u{308}\u{61}\u{27}\u{2060}", &["\u{1f1e6}\u{308}", "\u{61}", "\u{27}\u{2060}"]), + ("\u{1f1e6}\u{61}\u{2c}", &["\u{1f1e6}", "\u{61}", "\u{2c}"]), + ("\u{1f1e6}\u{308}\u{61}\u{2c}", &["\u{1f1e6}\u{308}", "\u{61}", "\u{2c}"]), + ("\u{1f1e6}\u{31}\u{3a}", &["\u{1f1e6}", "\u{31}", "\u{3a}"]), + ("\u{1f1e6}\u{308}\u{31}\u{3a}", &["\u{1f1e6}\u{308}", "\u{31}", "\u{3a}"]), + ("\u{1f1e6}\u{31}\u{27}", &["\u{1f1e6}", "\u{31}", "\u{27}"]), + ("\u{1f1e6}\u{308}\u{31}\u{27}", &["\u{1f1e6}\u{308}", "\u{31}", "\u{27}"]), + ("\u{1f1e6}\u{31}\u{2c}", &["\u{1f1e6}", "\u{31}", "\u{2c}"]), + ("\u{1f1e6}\u{308}\u{31}\u{2c}", &["\u{1f1e6}\u{308}", "\u{31}", "\u{2c}"]), + ("\u{1f1e6}\u{31}\u{2e}\u{2060}", &["\u{1f1e6}", "\u{31}", "\u{2e}\u{2060}"]), + ("\u{1f1e6}\u{308}\u{31}\u{2e}\u{2060}", &["\u{1f1e6}\u{308}", "\u{31}", "\u{2e}\u{2060}"]), + ("\u{5d0}\u{1}", &["\u{5d0}", "\u{1}"]), ("\u{5d0}\u{308}\u{1}", &["\u{5d0}\u{308}", + "\u{1}"]), ("\u{5d0}\u{d}", &["\u{5d0}", "\u{d}"]), ("\u{5d0}\u{308}\u{d}", + &["\u{5d0}\u{308}", "\u{d}"]), ("\u{5d0}\u{a}", &["\u{5d0}", "\u{a}"]), + ("\u{5d0}\u{308}\u{a}", &["\u{5d0}\u{308}", "\u{a}"]), ("\u{5d0}\u{b}", &["\u{5d0}", + "\u{b}"]), ("\u{5d0}\u{308}\u{b}", &["\u{5d0}\u{308}", "\u{b}"]), ("\u{5d0}\u{3031}", + &["\u{5d0}", "\u{3031}"]), ("\u{5d0}\u{308}\u{3031}", &["\u{5d0}\u{308}", "\u{3031}"]), + ("\u{5d0}\u{41}", &["\u{5d0}\u{41}"]), ("\u{5d0}\u{308}\u{41}", &["\u{5d0}\u{308}\u{41}"]), + ("\u{5d0}\u{3a}", &["\u{5d0}", "\u{3a}"]), ("\u{5d0}\u{308}\u{3a}", &["\u{5d0}\u{308}", + "\u{3a}"]), ("\u{5d0}\u{2c}", &["\u{5d0}", "\u{2c}"]), ("\u{5d0}\u{308}\u{2c}", + &["\u{5d0}\u{308}", "\u{2c}"]), ("\u{5d0}\u{2e}", &["\u{5d0}", "\u{2e}"]), + ("\u{5d0}\u{308}\u{2e}", &["\u{5d0}\u{308}", "\u{2e}"]), ("\u{5d0}\u{30}", + &["\u{5d0}\u{30}"]), ("\u{5d0}\u{308}\u{30}", &["\u{5d0}\u{308}\u{30}"]), ("\u{5d0}\u{5f}", + &["\u{5d0}\u{5f}"]), ("\u{5d0}\u{308}\u{5f}", &["\u{5d0}\u{308}\u{5f}"]), + ("\u{5d0}\u{1f1e6}", &["\u{5d0}", "\u{1f1e6}"]), ("\u{5d0}\u{308}\u{1f1e6}", + &["\u{5d0}\u{308}", "\u{1f1e6}"]), ("\u{5d0}\u{5d0}", &["\u{5d0}\u{5d0}"]), + ("\u{5d0}\u{308}\u{5d0}", &["\u{5d0}\u{308}\u{5d0}"]), ("\u{5d0}\u{22}", &["\u{5d0}", + "\u{22}"]), ("\u{5d0}\u{308}\u{22}", &["\u{5d0}\u{308}", "\u{22}"]), ("\u{5d0}\u{27}", + &["\u{5d0}\u{27}"]), ("\u{5d0}\u{308}\u{27}", &["\u{5d0}\u{308}\u{27}"]), ("\u{5d0}\u{ad}", + &["\u{5d0}\u{ad}"]), ("\u{5d0}\u{308}\u{ad}", &["\u{5d0}\u{308}\u{ad}"]), ("\u{5d0}\u{300}", + &["\u{5d0}\u{300}"]), ("\u{5d0}\u{308}\u{300}", &["\u{5d0}\u{308}\u{300}"]), + ("\u{5d0}\u{61}\u{2060}", &["\u{5d0}\u{61}\u{2060}"]), ("\u{5d0}\u{308}\u{61}\u{2060}", + &["\u{5d0}\u{308}\u{61}\u{2060}"]), ("\u{5d0}\u{61}\u{3a}", &["\u{5d0}\u{61}", "\u{3a}"]), + ("\u{5d0}\u{308}\u{61}\u{3a}", &["\u{5d0}\u{308}\u{61}", "\u{3a}"]), ("\u{5d0}\u{61}\u{27}", + &["\u{5d0}\u{61}", "\u{27}"]), ("\u{5d0}\u{308}\u{61}\u{27}", &["\u{5d0}\u{308}\u{61}", + "\u{27}"]), ("\u{5d0}\u{61}\u{27}\u{2060}", &["\u{5d0}\u{61}", "\u{27}\u{2060}"]), + ("\u{5d0}\u{308}\u{61}\u{27}\u{2060}", &["\u{5d0}\u{308}\u{61}", "\u{27}\u{2060}"]), + ("\u{5d0}\u{61}\u{2c}", &["\u{5d0}\u{61}", "\u{2c}"]), ("\u{5d0}\u{308}\u{61}\u{2c}", + &["\u{5d0}\u{308}\u{61}", "\u{2c}"]), ("\u{5d0}\u{31}\u{3a}", &["\u{5d0}\u{31}", "\u{3a}"]), + ("\u{5d0}\u{308}\u{31}\u{3a}", &["\u{5d0}\u{308}\u{31}", "\u{3a}"]), ("\u{5d0}\u{31}\u{27}", + &["\u{5d0}\u{31}", "\u{27}"]), ("\u{5d0}\u{308}\u{31}\u{27}", &["\u{5d0}\u{308}\u{31}", + "\u{27}"]), ("\u{5d0}\u{31}\u{2c}", &["\u{5d0}\u{31}", "\u{2c}"]), + ("\u{5d0}\u{308}\u{31}\u{2c}", &["\u{5d0}\u{308}\u{31}", "\u{2c}"]), + ("\u{5d0}\u{31}\u{2e}\u{2060}", &["\u{5d0}\u{31}", "\u{2e}\u{2060}"]), + ("\u{5d0}\u{308}\u{31}\u{2e}\u{2060}", &["\u{5d0}\u{308}\u{31}", "\u{2e}\u{2060}"]), + ("\u{22}\u{1}", &["\u{22}", "\u{1}"]), ("\u{22}\u{308}\u{1}", &["\u{22}\u{308}", "\u{1}"]), + ("\u{22}\u{d}", &["\u{22}", "\u{d}"]), ("\u{22}\u{308}\u{d}", &["\u{22}\u{308}", "\u{d}"]), + ("\u{22}\u{a}", &["\u{22}", "\u{a}"]), ("\u{22}\u{308}\u{a}", &["\u{22}\u{308}", "\u{a}"]), + ("\u{22}\u{b}", &["\u{22}", "\u{b}"]), ("\u{22}\u{308}\u{b}", &["\u{22}\u{308}", "\u{b}"]), + ("\u{22}\u{3031}", &["\u{22}", "\u{3031}"]), ("\u{22}\u{308}\u{3031}", &["\u{22}\u{308}", + "\u{3031}"]), ("\u{22}\u{41}", &["\u{22}", "\u{41}"]), ("\u{22}\u{308}\u{41}", + &["\u{22}\u{308}", "\u{41}"]), ("\u{22}\u{3a}", &["\u{22}", "\u{3a}"]), + ("\u{22}\u{308}\u{3a}", &["\u{22}\u{308}", "\u{3a}"]), ("\u{22}\u{2c}", &["\u{22}", + "\u{2c}"]), ("\u{22}\u{308}\u{2c}", &["\u{22}\u{308}", "\u{2c}"]), ("\u{22}\u{2e}", + &["\u{22}", "\u{2e}"]), ("\u{22}\u{308}\u{2e}", &["\u{22}\u{308}", "\u{2e}"]), + ("\u{22}\u{30}", &["\u{22}", "\u{30}"]), ("\u{22}\u{308}\u{30}", &["\u{22}\u{308}", + "\u{30}"]), ("\u{22}\u{5f}", &["\u{22}", "\u{5f}"]), ("\u{22}\u{308}\u{5f}", + &["\u{22}\u{308}", "\u{5f}"]), ("\u{22}\u{1f1e6}", &["\u{22}", "\u{1f1e6}"]), + ("\u{22}\u{308}\u{1f1e6}", &["\u{22}\u{308}", "\u{1f1e6}"]), ("\u{22}\u{5d0}", &["\u{22}", + "\u{5d0}"]), ("\u{22}\u{308}\u{5d0}", &["\u{22}\u{308}", "\u{5d0}"]), ("\u{22}\u{22}", + &["\u{22}", "\u{22}"]), ("\u{22}\u{308}\u{22}", &["\u{22}\u{308}", "\u{22}"]), + ("\u{22}\u{27}", &["\u{22}", "\u{27}"]), ("\u{22}\u{308}\u{27}", &["\u{22}\u{308}", + "\u{27}"]), ("\u{22}\u{ad}", &["\u{22}\u{ad}"]), ("\u{22}\u{308}\u{ad}", + &["\u{22}\u{308}\u{ad}"]), ("\u{22}\u{300}", &["\u{22}\u{300}"]), ("\u{22}\u{308}\u{300}", + &["\u{22}\u{308}\u{300}"]), ("\u{22}\u{61}\u{2060}", &["\u{22}", "\u{61}\u{2060}"]), + ("\u{22}\u{308}\u{61}\u{2060}", &["\u{22}\u{308}", "\u{61}\u{2060}"]), + ("\u{22}\u{61}\u{3a}", &["\u{22}", "\u{61}", "\u{3a}"]), ("\u{22}\u{308}\u{61}\u{3a}", + &["\u{22}\u{308}", "\u{61}", "\u{3a}"]), ("\u{22}\u{61}\u{27}", &["\u{22}", "\u{61}", + "\u{27}"]), ("\u{22}\u{308}\u{61}\u{27}", &["\u{22}\u{308}", "\u{61}", "\u{27}"]), + ("\u{22}\u{61}\u{27}\u{2060}", &["\u{22}", "\u{61}", "\u{27}\u{2060}"]), + ("\u{22}\u{308}\u{61}\u{27}\u{2060}", &["\u{22}\u{308}", "\u{61}", "\u{27}\u{2060}"]), + ("\u{22}\u{61}\u{2c}", &["\u{22}", "\u{61}", "\u{2c}"]), ("\u{22}\u{308}\u{61}\u{2c}", + &["\u{22}\u{308}", "\u{61}", "\u{2c}"]), ("\u{22}\u{31}\u{3a}", &["\u{22}", "\u{31}", + "\u{3a}"]), ("\u{22}\u{308}\u{31}\u{3a}", &["\u{22}\u{308}", "\u{31}", "\u{3a}"]), + ("\u{22}\u{31}\u{27}", &["\u{22}", "\u{31}", "\u{27}"]), ("\u{22}\u{308}\u{31}\u{27}", + &["\u{22}\u{308}", "\u{31}", "\u{27}"]), ("\u{22}\u{31}\u{2c}", &["\u{22}", "\u{31}", + "\u{2c}"]), ("\u{22}\u{308}\u{31}\u{2c}", &["\u{22}\u{308}", "\u{31}", "\u{2c}"]), + ("\u{22}\u{31}\u{2e}\u{2060}", &["\u{22}", "\u{31}", "\u{2e}\u{2060}"]), + ("\u{22}\u{308}\u{31}\u{2e}\u{2060}", &["\u{22}\u{308}", "\u{31}", "\u{2e}\u{2060}"]), + ("\u{27}\u{1}", &["\u{27}", "\u{1}"]), ("\u{27}\u{308}\u{1}", &["\u{27}\u{308}", "\u{1}"]), + ("\u{27}\u{d}", &["\u{27}", "\u{d}"]), ("\u{27}\u{308}\u{d}", &["\u{27}\u{308}", "\u{d}"]), + ("\u{27}\u{a}", &["\u{27}", "\u{a}"]), ("\u{27}\u{308}\u{a}", &["\u{27}\u{308}", "\u{a}"]), + ("\u{27}\u{b}", &["\u{27}", "\u{b}"]), ("\u{27}\u{308}\u{b}", &["\u{27}\u{308}", "\u{b}"]), + ("\u{27}\u{3031}", &["\u{27}", "\u{3031}"]), ("\u{27}\u{308}\u{3031}", &["\u{27}\u{308}", + "\u{3031}"]), ("\u{27}\u{41}", &["\u{27}", "\u{41}"]), ("\u{27}\u{308}\u{41}", + &["\u{27}\u{308}", "\u{41}"]), ("\u{27}\u{3a}", &["\u{27}", "\u{3a}"]), + ("\u{27}\u{308}\u{3a}", &["\u{27}\u{308}", "\u{3a}"]), ("\u{27}\u{2c}", &["\u{27}", + "\u{2c}"]), ("\u{27}\u{308}\u{2c}", &["\u{27}\u{308}", "\u{2c}"]), ("\u{27}\u{2e}", + &["\u{27}", "\u{2e}"]), ("\u{27}\u{308}\u{2e}", &["\u{27}\u{308}", "\u{2e}"]), + ("\u{27}\u{30}", &["\u{27}", "\u{30}"]), ("\u{27}\u{308}\u{30}", &["\u{27}\u{308}", + "\u{30}"]), ("\u{27}\u{5f}", &["\u{27}", "\u{5f}"]), ("\u{27}\u{308}\u{5f}", + &["\u{27}\u{308}", "\u{5f}"]), ("\u{27}\u{1f1e6}", &["\u{27}", "\u{1f1e6}"]), + ("\u{27}\u{308}\u{1f1e6}", &["\u{27}\u{308}", "\u{1f1e6}"]), ("\u{27}\u{5d0}", &["\u{27}", + "\u{5d0}"]), ("\u{27}\u{308}\u{5d0}", &["\u{27}\u{308}", "\u{5d0}"]), ("\u{27}\u{22}", + &["\u{27}", "\u{22}"]), ("\u{27}\u{308}\u{22}", &["\u{27}\u{308}", "\u{22}"]), + ("\u{27}\u{27}", &["\u{27}", "\u{27}"]), ("\u{27}\u{308}\u{27}", &["\u{27}\u{308}", + "\u{27}"]), ("\u{27}\u{ad}", &["\u{27}\u{ad}"]), ("\u{27}\u{308}\u{ad}", + &["\u{27}\u{308}\u{ad}"]), ("\u{27}\u{300}", &["\u{27}\u{300}"]), ("\u{27}\u{308}\u{300}", + &["\u{27}\u{308}\u{300}"]), ("\u{27}\u{61}\u{2060}", &["\u{27}", "\u{61}\u{2060}"]), + ("\u{27}\u{308}\u{61}\u{2060}", &["\u{27}\u{308}", "\u{61}\u{2060}"]), + ("\u{27}\u{61}\u{3a}", &["\u{27}", "\u{61}", "\u{3a}"]), ("\u{27}\u{308}\u{61}\u{3a}", + &["\u{27}\u{308}", "\u{61}", "\u{3a}"]), ("\u{27}\u{61}\u{27}", &["\u{27}", "\u{61}", + "\u{27}"]), ("\u{27}\u{308}\u{61}\u{27}", &["\u{27}\u{308}", "\u{61}", "\u{27}"]), + ("\u{27}\u{61}\u{27}\u{2060}", &["\u{27}", "\u{61}", "\u{27}\u{2060}"]), + ("\u{27}\u{308}\u{61}\u{27}\u{2060}", &["\u{27}\u{308}", "\u{61}", "\u{27}\u{2060}"]), + ("\u{27}\u{61}\u{2c}", &["\u{27}", "\u{61}", "\u{2c}"]), ("\u{27}\u{308}\u{61}\u{2c}", + &["\u{27}\u{308}", "\u{61}", "\u{2c}"]), ("\u{27}\u{31}\u{3a}", &["\u{27}", "\u{31}", + "\u{3a}"]), ("\u{27}\u{308}\u{31}\u{3a}", &["\u{27}\u{308}", "\u{31}", "\u{3a}"]), + ("\u{27}\u{31}\u{27}", &["\u{27}", "\u{31}", "\u{27}"]), ("\u{27}\u{308}\u{31}\u{27}", + &["\u{27}\u{308}", "\u{31}", "\u{27}"]), ("\u{27}\u{31}\u{2c}", &["\u{27}", "\u{31}", + "\u{2c}"]), ("\u{27}\u{308}\u{31}\u{2c}", &["\u{27}\u{308}", "\u{31}", "\u{2c}"]), + ("\u{27}\u{31}\u{2e}\u{2060}", &["\u{27}", "\u{31}", "\u{2e}\u{2060}"]), + ("\u{27}\u{308}\u{31}\u{2e}\u{2060}", &["\u{27}\u{308}", "\u{31}", "\u{2e}\u{2060}"]), + ("\u{ad}\u{1}", &["\u{ad}", "\u{1}"]), ("\u{ad}\u{308}\u{1}", &["\u{ad}\u{308}", "\u{1}"]), + ("\u{ad}\u{d}", &["\u{ad}", "\u{d}"]), ("\u{ad}\u{308}\u{d}", &["\u{ad}\u{308}", "\u{d}"]), + ("\u{ad}\u{a}", &["\u{ad}", "\u{a}"]), ("\u{ad}\u{308}\u{a}", &["\u{ad}\u{308}", "\u{a}"]), + ("\u{ad}\u{b}", &["\u{ad}", "\u{b}"]), ("\u{ad}\u{308}\u{b}", &["\u{ad}\u{308}", "\u{b}"]), + ("\u{ad}\u{3031}", &["\u{ad}", "\u{3031}"]), ("\u{ad}\u{308}\u{3031}", &["\u{ad}\u{308}", + "\u{3031}"]), ("\u{ad}\u{41}", &["\u{ad}", "\u{41}"]), ("\u{ad}\u{308}\u{41}", + &["\u{ad}\u{308}", "\u{41}"]), ("\u{ad}\u{3a}", &["\u{ad}", "\u{3a}"]), + ("\u{ad}\u{308}\u{3a}", &["\u{ad}\u{308}", "\u{3a}"]), ("\u{ad}\u{2c}", &["\u{ad}", + "\u{2c}"]), ("\u{ad}\u{308}\u{2c}", &["\u{ad}\u{308}", "\u{2c}"]), ("\u{ad}\u{2e}", + &["\u{ad}", "\u{2e}"]), ("\u{ad}\u{308}\u{2e}", &["\u{ad}\u{308}", "\u{2e}"]), + ("\u{ad}\u{30}", &["\u{ad}", "\u{30}"]), ("\u{ad}\u{308}\u{30}", &["\u{ad}\u{308}", + "\u{30}"]), ("\u{ad}\u{5f}", &["\u{ad}", "\u{5f}"]), ("\u{ad}\u{308}\u{5f}", + &["\u{ad}\u{308}", "\u{5f}"]), ("\u{ad}\u{1f1e6}", &["\u{ad}", "\u{1f1e6}"]), + ("\u{ad}\u{308}\u{1f1e6}", &["\u{ad}\u{308}", "\u{1f1e6}"]), ("\u{ad}\u{5d0}", &["\u{ad}", + "\u{5d0}"]), ("\u{ad}\u{308}\u{5d0}", &["\u{ad}\u{308}", "\u{5d0}"]), ("\u{ad}\u{22}", + &["\u{ad}", "\u{22}"]), ("\u{ad}\u{308}\u{22}", &["\u{ad}\u{308}", "\u{22}"]), + ("\u{ad}\u{27}", &["\u{ad}", "\u{27}"]), ("\u{ad}\u{308}\u{27}", &["\u{ad}\u{308}", + "\u{27}"]), ("\u{ad}\u{ad}", &["\u{ad}\u{ad}"]), ("\u{ad}\u{308}\u{ad}", + &["\u{ad}\u{308}\u{ad}"]), ("\u{ad}\u{300}", &["\u{ad}\u{300}"]), ("\u{ad}\u{308}\u{300}", + &["\u{ad}\u{308}\u{300}"]), ("\u{ad}\u{61}\u{2060}", &["\u{ad}", "\u{61}\u{2060}"]), + ("\u{ad}\u{308}\u{61}\u{2060}", &["\u{ad}\u{308}", "\u{61}\u{2060}"]), + ("\u{ad}\u{61}\u{3a}", &["\u{ad}", "\u{61}", "\u{3a}"]), ("\u{ad}\u{308}\u{61}\u{3a}", + &["\u{ad}\u{308}", "\u{61}", "\u{3a}"]), ("\u{ad}\u{61}\u{27}", &["\u{ad}", "\u{61}", + "\u{27}"]), ("\u{ad}\u{308}\u{61}\u{27}", &["\u{ad}\u{308}", "\u{61}", "\u{27}"]), + ("\u{ad}\u{61}\u{27}\u{2060}", &["\u{ad}", "\u{61}", "\u{27}\u{2060}"]), + ("\u{ad}\u{308}\u{61}\u{27}\u{2060}", &["\u{ad}\u{308}", "\u{61}", "\u{27}\u{2060}"]), + ("\u{ad}\u{61}\u{2c}", &["\u{ad}", "\u{61}", "\u{2c}"]), ("\u{ad}\u{308}\u{61}\u{2c}", + &["\u{ad}\u{308}", "\u{61}", "\u{2c}"]), ("\u{ad}\u{31}\u{3a}", &["\u{ad}", "\u{31}", + "\u{3a}"]), ("\u{ad}\u{308}\u{31}\u{3a}", &["\u{ad}\u{308}", "\u{31}", "\u{3a}"]), + ("\u{ad}\u{31}\u{27}", &["\u{ad}", "\u{31}", "\u{27}"]), ("\u{ad}\u{308}\u{31}\u{27}", + &["\u{ad}\u{308}", "\u{31}", "\u{27}"]), ("\u{ad}\u{31}\u{2c}", &["\u{ad}", "\u{31}", + "\u{2c}"]), ("\u{ad}\u{308}\u{31}\u{2c}", &["\u{ad}\u{308}", "\u{31}", "\u{2c}"]), + ("\u{ad}\u{31}\u{2e}\u{2060}", &["\u{ad}", "\u{31}", "\u{2e}\u{2060}"]), + ("\u{ad}\u{308}\u{31}\u{2e}\u{2060}", &["\u{ad}\u{308}", "\u{31}", "\u{2e}\u{2060}"]), + ("\u{300}\u{1}", &["\u{300}", "\u{1}"]), ("\u{300}\u{308}\u{1}", &["\u{300}\u{308}", + "\u{1}"]), ("\u{300}\u{d}", &["\u{300}", "\u{d}"]), ("\u{300}\u{308}\u{d}", + &["\u{300}\u{308}", "\u{d}"]), ("\u{300}\u{a}", &["\u{300}", "\u{a}"]), + ("\u{300}\u{308}\u{a}", &["\u{300}\u{308}", "\u{a}"]), ("\u{300}\u{b}", &["\u{300}", + "\u{b}"]), ("\u{300}\u{308}\u{b}", &["\u{300}\u{308}", "\u{b}"]), ("\u{300}\u{3031}", + &["\u{300}", "\u{3031}"]), ("\u{300}\u{308}\u{3031}", &["\u{300}\u{308}", "\u{3031}"]), + ("\u{300}\u{41}", &["\u{300}", "\u{41}"]), ("\u{300}\u{308}\u{41}", &["\u{300}\u{308}", + "\u{41}"]), ("\u{300}\u{3a}", &["\u{300}", "\u{3a}"]), ("\u{300}\u{308}\u{3a}", + &["\u{300}\u{308}", "\u{3a}"]), ("\u{300}\u{2c}", &["\u{300}", "\u{2c}"]), + ("\u{300}\u{308}\u{2c}", &["\u{300}\u{308}", "\u{2c}"]), ("\u{300}\u{2e}", &["\u{300}", + "\u{2e}"]), ("\u{300}\u{308}\u{2e}", &["\u{300}\u{308}", "\u{2e}"]), ("\u{300}\u{30}", + &["\u{300}", "\u{30}"]), ("\u{300}\u{308}\u{30}", &["\u{300}\u{308}", "\u{30}"]), + ("\u{300}\u{5f}", &["\u{300}", "\u{5f}"]), ("\u{300}\u{308}\u{5f}", &["\u{300}\u{308}", + "\u{5f}"]), ("\u{300}\u{1f1e6}", &["\u{300}", "\u{1f1e6}"]), ("\u{300}\u{308}\u{1f1e6}", + &["\u{300}\u{308}", "\u{1f1e6}"]), ("\u{300}\u{5d0}", &["\u{300}", "\u{5d0}"]), + ("\u{300}\u{308}\u{5d0}", &["\u{300}\u{308}", "\u{5d0}"]), ("\u{300}\u{22}", &["\u{300}", + "\u{22}"]), ("\u{300}\u{308}\u{22}", &["\u{300}\u{308}", "\u{22}"]), ("\u{300}\u{27}", + &["\u{300}", "\u{27}"]), ("\u{300}\u{308}\u{27}", &["\u{300}\u{308}", "\u{27}"]), + ("\u{300}\u{ad}", &["\u{300}\u{ad}"]), ("\u{300}\u{308}\u{ad}", &["\u{300}\u{308}\u{ad}"]), + ("\u{300}\u{300}", &["\u{300}\u{300}"]), ("\u{300}\u{308}\u{300}", + &["\u{300}\u{308}\u{300}"]), ("\u{300}\u{61}\u{2060}", &["\u{300}", "\u{61}\u{2060}"]), + ("\u{300}\u{308}\u{61}\u{2060}", &["\u{300}\u{308}", "\u{61}\u{2060}"]), + ("\u{300}\u{61}\u{3a}", &["\u{300}", "\u{61}", "\u{3a}"]), ("\u{300}\u{308}\u{61}\u{3a}", + &["\u{300}\u{308}", "\u{61}", "\u{3a}"]), ("\u{300}\u{61}\u{27}", &["\u{300}", "\u{61}", + "\u{27}"]), ("\u{300}\u{308}\u{61}\u{27}", &["\u{300}\u{308}", "\u{61}", "\u{27}"]), + ("\u{300}\u{61}\u{27}\u{2060}", &["\u{300}", "\u{61}", "\u{27}\u{2060}"]), + ("\u{300}\u{308}\u{61}\u{27}\u{2060}", &["\u{300}\u{308}", "\u{61}", "\u{27}\u{2060}"]), + ("\u{300}\u{61}\u{2c}", &["\u{300}", "\u{61}", "\u{2c}"]), ("\u{300}\u{308}\u{61}\u{2c}", + &["\u{300}\u{308}", "\u{61}", "\u{2c}"]), ("\u{300}\u{31}\u{3a}", &["\u{300}", "\u{31}", + "\u{3a}"]), ("\u{300}\u{308}\u{31}\u{3a}", &["\u{300}\u{308}", "\u{31}", "\u{3a}"]), + ("\u{300}\u{31}\u{27}", &["\u{300}", "\u{31}", "\u{27}"]), ("\u{300}\u{308}\u{31}\u{27}", + &["\u{300}\u{308}", "\u{31}", "\u{27}"]), ("\u{300}\u{31}\u{2c}", &["\u{300}", "\u{31}", + "\u{2c}"]), ("\u{300}\u{308}\u{31}\u{2c}", &["\u{300}\u{308}", "\u{31}", "\u{2c}"]), + ("\u{300}\u{31}\u{2e}\u{2060}", &["\u{300}", "\u{31}", "\u{2e}\u{2060}"]), + ("\u{300}\u{308}\u{31}\u{2e}\u{2060}", &["\u{300}\u{308}", "\u{31}", "\u{2e}\u{2060}"]), + ("\u{61}\u{2060}\u{1}", &["\u{61}\u{2060}", "\u{1}"]), ("\u{61}\u{2060}\u{308}\u{1}", + &["\u{61}\u{2060}\u{308}", "\u{1}"]), ("\u{61}\u{2060}\u{d}", &["\u{61}\u{2060}", "\u{d}"]), + ("\u{61}\u{2060}\u{308}\u{d}", &["\u{61}\u{2060}\u{308}", "\u{d}"]), ("\u{61}\u{2060}\u{a}", + &["\u{61}\u{2060}", "\u{a}"]), ("\u{61}\u{2060}\u{308}\u{a}", &["\u{61}\u{2060}\u{308}", + "\u{a}"]), ("\u{61}\u{2060}\u{b}", &["\u{61}\u{2060}", "\u{b}"]), + ("\u{61}\u{2060}\u{308}\u{b}", &["\u{61}\u{2060}\u{308}", "\u{b}"]), + ("\u{61}\u{2060}\u{3031}", &["\u{61}\u{2060}", "\u{3031}"]), + ("\u{61}\u{2060}\u{308}\u{3031}", &["\u{61}\u{2060}\u{308}", "\u{3031}"]), + ("\u{61}\u{2060}\u{41}", &["\u{61}\u{2060}\u{41}"]), ("\u{61}\u{2060}\u{308}\u{41}", + &["\u{61}\u{2060}\u{308}\u{41}"]), ("\u{61}\u{2060}\u{3a}", &["\u{61}\u{2060}", "\u{3a}"]), + ("\u{61}\u{2060}\u{308}\u{3a}", &["\u{61}\u{2060}\u{308}", "\u{3a}"]), + ("\u{61}\u{2060}\u{2c}", &["\u{61}\u{2060}", "\u{2c}"]), ("\u{61}\u{2060}\u{308}\u{2c}", + &["\u{61}\u{2060}\u{308}", "\u{2c}"]), ("\u{61}\u{2060}\u{2e}", &["\u{61}\u{2060}", + "\u{2e}"]), ("\u{61}\u{2060}\u{308}\u{2e}", &["\u{61}\u{2060}\u{308}", "\u{2e}"]), + ("\u{61}\u{2060}\u{30}", &["\u{61}\u{2060}\u{30}"]), ("\u{61}\u{2060}\u{308}\u{30}", + &["\u{61}\u{2060}\u{308}\u{30}"]), ("\u{61}\u{2060}\u{5f}", &["\u{61}\u{2060}\u{5f}"]), + ("\u{61}\u{2060}\u{308}\u{5f}", &["\u{61}\u{2060}\u{308}\u{5f}"]), + ("\u{61}\u{2060}\u{1f1e6}", &["\u{61}\u{2060}", "\u{1f1e6}"]), + ("\u{61}\u{2060}\u{308}\u{1f1e6}", &["\u{61}\u{2060}\u{308}", "\u{1f1e6}"]), + ("\u{61}\u{2060}\u{5d0}", &["\u{61}\u{2060}\u{5d0}"]), ("\u{61}\u{2060}\u{308}\u{5d0}", + &["\u{61}\u{2060}\u{308}\u{5d0}"]), ("\u{61}\u{2060}\u{22}", &["\u{61}\u{2060}", "\u{22}"]), + ("\u{61}\u{2060}\u{308}\u{22}", &["\u{61}\u{2060}\u{308}", "\u{22}"]), + ("\u{61}\u{2060}\u{27}", &["\u{61}\u{2060}", "\u{27}"]), ("\u{61}\u{2060}\u{308}\u{27}", + &["\u{61}\u{2060}\u{308}", "\u{27}"]), ("\u{61}\u{2060}\u{ad}", &["\u{61}\u{2060}\u{ad}"]), + ("\u{61}\u{2060}\u{308}\u{ad}", &["\u{61}\u{2060}\u{308}\u{ad}"]), ("\u{61}\u{2060}\u{300}", + &["\u{61}\u{2060}\u{300}"]), ("\u{61}\u{2060}\u{308}\u{300}", + &["\u{61}\u{2060}\u{308}\u{300}"]), ("\u{61}\u{2060}\u{61}\u{2060}", + &["\u{61}\u{2060}\u{61}\u{2060}"]), ("\u{61}\u{2060}\u{308}\u{61}\u{2060}", + &["\u{61}\u{2060}\u{308}\u{61}\u{2060}"]), ("\u{61}\u{2060}\u{61}\u{3a}", + &["\u{61}\u{2060}\u{61}", "\u{3a}"]), ("\u{61}\u{2060}\u{308}\u{61}\u{3a}", + &["\u{61}\u{2060}\u{308}\u{61}", "\u{3a}"]), ("\u{61}\u{2060}\u{61}\u{27}", + &["\u{61}\u{2060}\u{61}", "\u{27}"]), ("\u{61}\u{2060}\u{308}\u{61}\u{27}", + &["\u{61}\u{2060}\u{308}\u{61}", "\u{27}"]), ("\u{61}\u{2060}\u{61}\u{27}\u{2060}", + &["\u{61}\u{2060}\u{61}", "\u{27}\u{2060}"]), ("\u{61}\u{2060}\u{308}\u{61}\u{27}\u{2060}", + &["\u{61}\u{2060}\u{308}\u{61}", "\u{27}\u{2060}"]), ("\u{61}\u{2060}\u{61}\u{2c}", + &["\u{61}\u{2060}\u{61}", "\u{2c}"]), ("\u{61}\u{2060}\u{308}\u{61}\u{2c}", + &["\u{61}\u{2060}\u{308}\u{61}", "\u{2c}"]), ("\u{61}\u{2060}\u{31}\u{3a}", + &["\u{61}\u{2060}\u{31}", "\u{3a}"]), ("\u{61}\u{2060}\u{308}\u{31}\u{3a}", + &["\u{61}\u{2060}\u{308}\u{31}", "\u{3a}"]), ("\u{61}\u{2060}\u{31}\u{27}", + &["\u{61}\u{2060}\u{31}", "\u{27}"]), ("\u{61}\u{2060}\u{308}\u{31}\u{27}", + &["\u{61}\u{2060}\u{308}\u{31}", "\u{27}"]), ("\u{61}\u{2060}\u{31}\u{2c}", + &["\u{61}\u{2060}\u{31}", "\u{2c}"]), ("\u{61}\u{2060}\u{308}\u{31}\u{2c}", + &["\u{61}\u{2060}\u{308}\u{31}", "\u{2c}"]), ("\u{61}\u{2060}\u{31}\u{2e}\u{2060}", + &["\u{61}\u{2060}\u{31}", "\u{2e}\u{2060}"]), ("\u{61}\u{2060}\u{308}\u{31}\u{2e}\u{2060}", + &["\u{61}\u{2060}\u{308}\u{31}", "\u{2e}\u{2060}"]), ("\u{61}\u{3a}\u{1}", &["\u{61}", + "\u{3a}", "\u{1}"]), ("\u{61}\u{3a}\u{308}\u{1}", &["\u{61}", "\u{3a}\u{308}", "\u{1}"]), + ("\u{61}\u{3a}\u{d}", &["\u{61}", "\u{3a}", "\u{d}"]), ("\u{61}\u{3a}\u{308}\u{d}", + &["\u{61}", "\u{3a}\u{308}", "\u{d}"]), ("\u{61}\u{3a}\u{a}", &["\u{61}", "\u{3a}", + "\u{a}"]), ("\u{61}\u{3a}\u{308}\u{a}", &["\u{61}", "\u{3a}\u{308}", "\u{a}"]), + ("\u{61}\u{3a}\u{b}", &["\u{61}", "\u{3a}", "\u{b}"]), ("\u{61}\u{3a}\u{308}\u{b}", + &["\u{61}", "\u{3a}\u{308}", "\u{b}"]), ("\u{61}\u{3a}\u{3031}", &["\u{61}", "\u{3a}", + "\u{3031}"]), ("\u{61}\u{3a}\u{308}\u{3031}", &["\u{61}", "\u{3a}\u{308}", "\u{3031}"]), + ("\u{61}\u{3a}\u{41}", &["\u{61}\u{3a}\u{41}"]), ("\u{61}\u{3a}\u{308}\u{41}", + &["\u{61}\u{3a}\u{308}\u{41}"]), ("\u{61}\u{3a}\u{3a}", &["\u{61}", "\u{3a}", "\u{3a}"]), + ("\u{61}\u{3a}\u{308}\u{3a}", &["\u{61}", "\u{3a}\u{308}", "\u{3a}"]), + ("\u{61}\u{3a}\u{2c}", &["\u{61}", "\u{3a}", "\u{2c}"]), ("\u{61}\u{3a}\u{308}\u{2c}", + &["\u{61}", "\u{3a}\u{308}", "\u{2c}"]), ("\u{61}\u{3a}\u{2e}", &["\u{61}", "\u{3a}", + "\u{2e}"]), ("\u{61}\u{3a}\u{308}\u{2e}", &["\u{61}", "\u{3a}\u{308}", "\u{2e}"]), + ("\u{61}\u{3a}\u{30}", &["\u{61}", "\u{3a}", "\u{30}"]), ("\u{61}\u{3a}\u{308}\u{30}", + &["\u{61}", "\u{3a}\u{308}", "\u{30}"]), ("\u{61}\u{3a}\u{5f}", &["\u{61}", "\u{3a}", + "\u{5f}"]), ("\u{61}\u{3a}\u{308}\u{5f}", &["\u{61}", "\u{3a}\u{308}", "\u{5f}"]), + ("\u{61}\u{3a}\u{1f1e6}", &["\u{61}", "\u{3a}", "\u{1f1e6}"]), + ("\u{61}\u{3a}\u{308}\u{1f1e6}", &["\u{61}", "\u{3a}\u{308}", "\u{1f1e6}"]), + ("\u{61}\u{3a}\u{5d0}", &["\u{61}\u{3a}\u{5d0}"]), ("\u{61}\u{3a}\u{308}\u{5d0}", + &["\u{61}\u{3a}\u{308}\u{5d0}"]), ("\u{61}\u{3a}\u{22}", &["\u{61}", "\u{3a}", "\u{22}"]), + ("\u{61}\u{3a}\u{308}\u{22}", &["\u{61}", "\u{3a}\u{308}", "\u{22}"]), + ("\u{61}\u{3a}\u{27}", &["\u{61}", "\u{3a}", "\u{27}"]), ("\u{61}\u{3a}\u{308}\u{27}", + &["\u{61}", "\u{3a}\u{308}", "\u{27}"]), ("\u{61}\u{3a}\u{ad}", &["\u{61}", + "\u{3a}\u{ad}"]), ("\u{61}\u{3a}\u{308}\u{ad}", &["\u{61}", "\u{3a}\u{308}\u{ad}"]), + ("\u{61}\u{3a}\u{300}", &["\u{61}", "\u{3a}\u{300}"]), ("\u{61}\u{3a}\u{308}\u{300}", + &["\u{61}", "\u{3a}\u{308}\u{300}"]), ("\u{61}\u{3a}\u{61}\u{2060}", + &["\u{61}\u{3a}\u{61}\u{2060}"]), ("\u{61}\u{3a}\u{308}\u{61}\u{2060}", + &["\u{61}\u{3a}\u{308}\u{61}\u{2060}"]), ("\u{61}\u{3a}\u{61}\u{3a}", + &["\u{61}\u{3a}\u{61}", "\u{3a}"]), ("\u{61}\u{3a}\u{308}\u{61}\u{3a}", + &["\u{61}\u{3a}\u{308}\u{61}", "\u{3a}"]), ("\u{61}\u{3a}\u{61}\u{27}", + &["\u{61}\u{3a}\u{61}", "\u{27}"]), ("\u{61}\u{3a}\u{308}\u{61}\u{27}", + &["\u{61}\u{3a}\u{308}\u{61}", "\u{27}"]), ("\u{61}\u{3a}\u{61}\u{27}\u{2060}", + &["\u{61}\u{3a}\u{61}", "\u{27}\u{2060}"]), ("\u{61}\u{3a}\u{308}\u{61}\u{27}\u{2060}", + &["\u{61}\u{3a}\u{308}\u{61}", "\u{27}\u{2060}"]), ("\u{61}\u{3a}\u{61}\u{2c}", + &["\u{61}\u{3a}\u{61}", "\u{2c}"]), ("\u{61}\u{3a}\u{308}\u{61}\u{2c}", + &["\u{61}\u{3a}\u{308}\u{61}", "\u{2c}"]), ("\u{61}\u{3a}\u{31}\u{3a}", &["\u{61}", + "\u{3a}", "\u{31}", "\u{3a}"]), ("\u{61}\u{3a}\u{308}\u{31}\u{3a}", &["\u{61}", + "\u{3a}\u{308}", "\u{31}", "\u{3a}"]), ("\u{61}\u{3a}\u{31}\u{27}", &["\u{61}", "\u{3a}", + "\u{31}", "\u{27}"]), ("\u{61}\u{3a}\u{308}\u{31}\u{27}", &["\u{61}", "\u{3a}\u{308}", + "\u{31}", "\u{27}"]), ("\u{61}\u{3a}\u{31}\u{2c}", &["\u{61}", "\u{3a}", "\u{31}", + "\u{2c}"]), ("\u{61}\u{3a}\u{308}\u{31}\u{2c}", &["\u{61}", "\u{3a}\u{308}", "\u{31}", + "\u{2c}"]), ("\u{61}\u{3a}\u{31}\u{2e}\u{2060}", &["\u{61}", "\u{3a}", "\u{31}", + "\u{2e}\u{2060}"]), ("\u{61}\u{3a}\u{308}\u{31}\u{2e}\u{2060}", &["\u{61}", "\u{3a}\u{308}", + "\u{31}", "\u{2e}\u{2060}"]), ("\u{61}\u{27}\u{1}", &["\u{61}", "\u{27}", "\u{1}"]), + ("\u{61}\u{27}\u{308}\u{1}", &["\u{61}", "\u{27}\u{308}", "\u{1}"]), ("\u{61}\u{27}\u{d}", + &["\u{61}", "\u{27}", "\u{d}"]), ("\u{61}\u{27}\u{308}\u{d}", &["\u{61}", "\u{27}\u{308}", + "\u{d}"]), ("\u{61}\u{27}\u{a}", &["\u{61}", "\u{27}", "\u{a}"]), + ("\u{61}\u{27}\u{308}\u{a}", &["\u{61}", "\u{27}\u{308}", "\u{a}"]), ("\u{61}\u{27}\u{b}", + &["\u{61}", "\u{27}", "\u{b}"]), ("\u{61}\u{27}\u{308}\u{b}", &["\u{61}", "\u{27}\u{308}", + "\u{b}"]), ("\u{61}\u{27}\u{3031}", &["\u{61}", "\u{27}", "\u{3031}"]), + ("\u{61}\u{27}\u{308}\u{3031}", &["\u{61}", "\u{27}\u{308}", "\u{3031}"]), + ("\u{61}\u{27}\u{41}", &["\u{61}\u{27}\u{41}"]), ("\u{61}\u{27}\u{308}\u{41}", + &["\u{61}\u{27}\u{308}\u{41}"]), ("\u{61}\u{27}\u{3a}", &["\u{61}", "\u{27}", "\u{3a}"]), + ("\u{61}\u{27}\u{308}\u{3a}", &["\u{61}", "\u{27}\u{308}", "\u{3a}"]), + ("\u{61}\u{27}\u{2c}", &["\u{61}", "\u{27}", "\u{2c}"]), ("\u{61}\u{27}\u{308}\u{2c}", + &["\u{61}", "\u{27}\u{308}", "\u{2c}"]), ("\u{61}\u{27}\u{2e}", &["\u{61}", "\u{27}", + "\u{2e}"]), ("\u{61}\u{27}\u{308}\u{2e}", &["\u{61}", "\u{27}\u{308}", "\u{2e}"]), + ("\u{61}\u{27}\u{30}", &["\u{61}", "\u{27}", "\u{30}"]), ("\u{61}\u{27}\u{308}\u{30}", + &["\u{61}", "\u{27}\u{308}", "\u{30}"]), ("\u{61}\u{27}\u{5f}", &["\u{61}", "\u{27}", + "\u{5f}"]), ("\u{61}\u{27}\u{308}\u{5f}", &["\u{61}", "\u{27}\u{308}", "\u{5f}"]), + ("\u{61}\u{27}\u{1f1e6}", &["\u{61}", "\u{27}", "\u{1f1e6}"]), + ("\u{61}\u{27}\u{308}\u{1f1e6}", &["\u{61}", "\u{27}\u{308}", "\u{1f1e6}"]), + ("\u{61}\u{27}\u{5d0}", &["\u{61}\u{27}\u{5d0}"]), ("\u{61}\u{27}\u{308}\u{5d0}", + &["\u{61}\u{27}\u{308}\u{5d0}"]), ("\u{61}\u{27}\u{22}", &["\u{61}", "\u{27}", "\u{22}"]), + ("\u{61}\u{27}\u{308}\u{22}", &["\u{61}", "\u{27}\u{308}", "\u{22}"]), + ("\u{61}\u{27}\u{27}", &["\u{61}", "\u{27}", "\u{27}"]), ("\u{61}\u{27}\u{308}\u{27}", + &["\u{61}", "\u{27}\u{308}", "\u{27}"]), ("\u{61}\u{27}\u{ad}", &["\u{61}", + "\u{27}\u{ad}"]), ("\u{61}\u{27}\u{308}\u{ad}", &["\u{61}", "\u{27}\u{308}\u{ad}"]), + ("\u{61}\u{27}\u{300}", &["\u{61}", "\u{27}\u{300}"]), ("\u{61}\u{27}\u{308}\u{300}", + &["\u{61}", "\u{27}\u{308}\u{300}"]), ("\u{61}\u{27}\u{61}\u{2060}", + &["\u{61}\u{27}\u{61}\u{2060}"]), ("\u{61}\u{27}\u{308}\u{61}\u{2060}", + &["\u{61}\u{27}\u{308}\u{61}\u{2060}"]), ("\u{61}\u{27}\u{61}\u{3a}", + &["\u{61}\u{27}\u{61}", "\u{3a}"]), ("\u{61}\u{27}\u{308}\u{61}\u{3a}", + &["\u{61}\u{27}\u{308}\u{61}", "\u{3a}"]), ("\u{61}\u{27}\u{61}\u{27}", + &["\u{61}\u{27}\u{61}", "\u{27}"]), ("\u{61}\u{27}\u{308}\u{61}\u{27}", + &["\u{61}\u{27}\u{308}\u{61}", "\u{27}"]), ("\u{61}\u{27}\u{61}\u{27}\u{2060}", + &["\u{61}\u{27}\u{61}", "\u{27}\u{2060}"]), ("\u{61}\u{27}\u{308}\u{61}\u{27}\u{2060}", + &["\u{61}\u{27}\u{308}\u{61}", "\u{27}\u{2060}"]), ("\u{61}\u{27}\u{61}\u{2c}", + &["\u{61}\u{27}\u{61}", "\u{2c}"]), ("\u{61}\u{27}\u{308}\u{61}\u{2c}", + &["\u{61}\u{27}\u{308}\u{61}", "\u{2c}"]), ("\u{61}\u{27}\u{31}\u{3a}", &["\u{61}", + "\u{27}", "\u{31}", "\u{3a}"]), ("\u{61}\u{27}\u{308}\u{31}\u{3a}", &["\u{61}", + "\u{27}\u{308}", "\u{31}", "\u{3a}"]), ("\u{61}\u{27}\u{31}\u{27}", &["\u{61}", "\u{27}", + "\u{31}", "\u{27}"]), ("\u{61}\u{27}\u{308}\u{31}\u{27}", &["\u{61}", "\u{27}\u{308}", + "\u{31}", "\u{27}"]), ("\u{61}\u{27}\u{31}\u{2c}", &["\u{61}", "\u{27}", "\u{31}", + "\u{2c}"]), ("\u{61}\u{27}\u{308}\u{31}\u{2c}", &["\u{61}", "\u{27}\u{308}", "\u{31}", + "\u{2c}"]), ("\u{61}\u{27}\u{31}\u{2e}\u{2060}", &["\u{61}", "\u{27}", "\u{31}", + "\u{2e}\u{2060}"]), ("\u{61}\u{27}\u{308}\u{31}\u{2e}\u{2060}", &["\u{61}", "\u{27}\u{308}", + "\u{31}", "\u{2e}\u{2060}"]), ("\u{61}\u{27}\u{2060}\u{1}", &["\u{61}", "\u{27}\u{2060}", + "\u{1}"]), ("\u{61}\u{27}\u{2060}\u{308}\u{1}", &["\u{61}", "\u{27}\u{2060}\u{308}", + "\u{1}"]), ("\u{61}\u{27}\u{2060}\u{d}", &["\u{61}", "\u{27}\u{2060}", "\u{d}"]), + ("\u{61}\u{27}\u{2060}\u{308}\u{d}", &["\u{61}", "\u{27}\u{2060}\u{308}", "\u{d}"]), + ("\u{61}\u{27}\u{2060}\u{a}", &["\u{61}", "\u{27}\u{2060}", "\u{a}"]), + ("\u{61}\u{27}\u{2060}\u{308}\u{a}", &["\u{61}", "\u{27}\u{2060}\u{308}", "\u{a}"]), + ("\u{61}\u{27}\u{2060}\u{b}", &["\u{61}", "\u{27}\u{2060}", "\u{b}"]), + ("\u{61}\u{27}\u{2060}\u{308}\u{b}", &["\u{61}", "\u{27}\u{2060}\u{308}", "\u{b}"]), + ("\u{61}\u{27}\u{2060}\u{3031}", &["\u{61}", "\u{27}\u{2060}", "\u{3031}"]), + ("\u{61}\u{27}\u{2060}\u{308}\u{3031}", &["\u{61}", "\u{27}\u{2060}\u{308}", "\u{3031}"]), + ("\u{61}\u{27}\u{2060}\u{41}", &["\u{61}\u{27}\u{2060}\u{41}"]), + ("\u{61}\u{27}\u{2060}\u{308}\u{41}", &["\u{61}\u{27}\u{2060}\u{308}\u{41}"]), + ("\u{61}\u{27}\u{2060}\u{3a}", &["\u{61}", "\u{27}\u{2060}", "\u{3a}"]), + ("\u{61}\u{27}\u{2060}\u{308}\u{3a}", &["\u{61}", "\u{27}\u{2060}\u{308}", "\u{3a}"]), + ("\u{61}\u{27}\u{2060}\u{2c}", &["\u{61}", "\u{27}\u{2060}", "\u{2c}"]), + ("\u{61}\u{27}\u{2060}\u{308}\u{2c}", &["\u{61}", "\u{27}\u{2060}\u{308}", "\u{2c}"]), + ("\u{61}\u{27}\u{2060}\u{2e}", &["\u{61}", "\u{27}\u{2060}", "\u{2e}"]), + ("\u{61}\u{27}\u{2060}\u{308}\u{2e}", &["\u{61}", "\u{27}\u{2060}\u{308}", "\u{2e}"]), + ("\u{61}\u{27}\u{2060}\u{30}", &["\u{61}", "\u{27}\u{2060}", "\u{30}"]), + ("\u{61}\u{27}\u{2060}\u{308}\u{30}", &["\u{61}", "\u{27}\u{2060}\u{308}", "\u{30}"]), + ("\u{61}\u{27}\u{2060}\u{5f}", &["\u{61}", "\u{27}\u{2060}", "\u{5f}"]), + ("\u{61}\u{27}\u{2060}\u{308}\u{5f}", &["\u{61}", "\u{27}\u{2060}\u{308}", "\u{5f}"]), + ("\u{61}\u{27}\u{2060}\u{1f1e6}", &["\u{61}", "\u{27}\u{2060}", "\u{1f1e6}"]), + ("\u{61}\u{27}\u{2060}\u{308}\u{1f1e6}", &["\u{61}", "\u{27}\u{2060}\u{308}", "\u{1f1e6}"]), + ("\u{61}\u{27}\u{2060}\u{5d0}", &["\u{61}\u{27}\u{2060}\u{5d0}"]), + ("\u{61}\u{27}\u{2060}\u{308}\u{5d0}", &["\u{61}\u{27}\u{2060}\u{308}\u{5d0}"]), + ("\u{61}\u{27}\u{2060}\u{22}", &["\u{61}", "\u{27}\u{2060}", "\u{22}"]), + ("\u{61}\u{27}\u{2060}\u{308}\u{22}", &["\u{61}", "\u{27}\u{2060}\u{308}", "\u{22}"]), + ("\u{61}\u{27}\u{2060}\u{27}", &["\u{61}", "\u{27}\u{2060}", "\u{27}"]), + ("\u{61}\u{27}\u{2060}\u{308}\u{27}", &["\u{61}", "\u{27}\u{2060}\u{308}", "\u{27}"]), + ("\u{61}\u{27}\u{2060}\u{ad}", &["\u{61}", "\u{27}\u{2060}\u{ad}"]), + ("\u{61}\u{27}\u{2060}\u{308}\u{ad}", &["\u{61}", "\u{27}\u{2060}\u{308}\u{ad}"]), + ("\u{61}\u{27}\u{2060}\u{300}", &["\u{61}", "\u{27}\u{2060}\u{300}"]), + ("\u{61}\u{27}\u{2060}\u{308}\u{300}", &["\u{61}", "\u{27}\u{2060}\u{308}\u{300}"]), + ("\u{61}\u{27}\u{2060}\u{61}\u{2060}", &["\u{61}\u{27}\u{2060}\u{61}\u{2060}"]), + ("\u{61}\u{27}\u{2060}\u{308}\u{61}\u{2060}", + &["\u{61}\u{27}\u{2060}\u{308}\u{61}\u{2060}"]), ("\u{61}\u{27}\u{2060}\u{61}\u{3a}", + &["\u{61}\u{27}\u{2060}\u{61}", "\u{3a}"]), ("\u{61}\u{27}\u{2060}\u{308}\u{61}\u{3a}", + &["\u{61}\u{27}\u{2060}\u{308}\u{61}", "\u{3a}"]), ("\u{61}\u{27}\u{2060}\u{61}\u{27}", + &["\u{61}\u{27}\u{2060}\u{61}", "\u{27}"]), ("\u{61}\u{27}\u{2060}\u{308}\u{61}\u{27}", + &["\u{61}\u{27}\u{2060}\u{308}\u{61}", "\u{27}"]), + ("\u{61}\u{27}\u{2060}\u{61}\u{27}\u{2060}", &["\u{61}\u{27}\u{2060}\u{61}", + "\u{27}\u{2060}"]), ("\u{61}\u{27}\u{2060}\u{308}\u{61}\u{27}\u{2060}", + &["\u{61}\u{27}\u{2060}\u{308}\u{61}", "\u{27}\u{2060}"]), + ("\u{61}\u{27}\u{2060}\u{61}\u{2c}", &["\u{61}\u{27}\u{2060}\u{61}", "\u{2c}"]), + ("\u{61}\u{27}\u{2060}\u{308}\u{61}\u{2c}", &["\u{61}\u{27}\u{2060}\u{308}\u{61}", + "\u{2c}"]), ("\u{61}\u{27}\u{2060}\u{31}\u{3a}", &["\u{61}", "\u{27}\u{2060}", "\u{31}", + "\u{3a}"]), ("\u{61}\u{27}\u{2060}\u{308}\u{31}\u{3a}", &["\u{61}", "\u{27}\u{2060}\u{308}", + "\u{31}", "\u{3a}"]), ("\u{61}\u{27}\u{2060}\u{31}\u{27}", &["\u{61}", "\u{27}\u{2060}", + "\u{31}", "\u{27}"]), ("\u{61}\u{27}\u{2060}\u{308}\u{31}\u{27}", &["\u{61}", + "\u{27}\u{2060}\u{308}", "\u{31}", "\u{27}"]), ("\u{61}\u{27}\u{2060}\u{31}\u{2c}", + &["\u{61}", "\u{27}\u{2060}", "\u{31}", "\u{2c}"]), + ("\u{61}\u{27}\u{2060}\u{308}\u{31}\u{2c}", &["\u{61}", "\u{27}\u{2060}\u{308}", "\u{31}", + "\u{2c}"]), ("\u{61}\u{27}\u{2060}\u{31}\u{2e}\u{2060}", &["\u{61}", "\u{27}\u{2060}", + "\u{31}", "\u{2e}\u{2060}"]), ("\u{61}\u{27}\u{2060}\u{308}\u{31}\u{2e}\u{2060}", + &["\u{61}", "\u{27}\u{2060}\u{308}", "\u{31}", "\u{2e}\u{2060}"]), ("\u{61}\u{2c}\u{1}", + &["\u{61}", "\u{2c}", "\u{1}"]), ("\u{61}\u{2c}\u{308}\u{1}", &["\u{61}", "\u{2c}\u{308}", + "\u{1}"]), ("\u{61}\u{2c}\u{d}", &["\u{61}", "\u{2c}", "\u{d}"]), + ("\u{61}\u{2c}\u{308}\u{d}", &["\u{61}", "\u{2c}\u{308}", "\u{d}"]), ("\u{61}\u{2c}\u{a}", + &["\u{61}", "\u{2c}", "\u{a}"]), ("\u{61}\u{2c}\u{308}\u{a}", &["\u{61}", "\u{2c}\u{308}", + "\u{a}"]), ("\u{61}\u{2c}\u{b}", &["\u{61}", "\u{2c}", "\u{b}"]), + ("\u{61}\u{2c}\u{308}\u{b}", &["\u{61}", "\u{2c}\u{308}", "\u{b}"]), + ("\u{61}\u{2c}\u{3031}", &["\u{61}", "\u{2c}", "\u{3031}"]), ("\u{61}\u{2c}\u{308}\u{3031}", + &["\u{61}", "\u{2c}\u{308}", "\u{3031}"]), ("\u{61}\u{2c}\u{41}", &["\u{61}", "\u{2c}", + "\u{41}"]), ("\u{61}\u{2c}\u{308}\u{41}", &["\u{61}", "\u{2c}\u{308}", "\u{41}"]), + ("\u{61}\u{2c}\u{3a}", &["\u{61}", "\u{2c}", "\u{3a}"]), ("\u{61}\u{2c}\u{308}\u{3a}", + &["\u{61}", "\u{2c}\u{308}", "\u{3a}"]), ("\u{61}\u{2c}\u{2c}", &["\u{61}", "\u{2c}", + "\u{2c}"]), ("\u{61}\u{2c}\u{308}\u{2c}", &["\u{61}", "\u{2c}\u{308}", "\u{2c}"]), + ("\u{61}\u{2c}\u{2e}", &["\u{61}", "\u{2c}", "\u{2e}"]), ("\u{61}\u{2c}\u{308}\u{2e}", + &["\u{61}", "\u{2c}\u{308}", "\u{2e}"]), ("\u{61}\u{2c}\u{30}", &["\u{61}", "\u{2c}", + "\u{30}"]), ("\u{61}\u{2c}\u{308}\u{30}", &["\u{61}", "\u{2c}\u{308}", "\u{30}"]), + ("\u{61}\u{2c}\u{5f}", &["\u{61}", "\u{2c}", "\u{5f}"]), ("\u{61}\u{2c}\u{308}\u{5f}", + &["\u{61}", "\u{2c}\u{308}", "\u{5f}"]), ("\u{61}\u{2c}\u{1f1e6}", &["\u{61}", "\u{2c}", + "\u{1f1e6}"]), ("\u{61}\u{2c}\u{308}\u{1f1e6}", &["\u{61}", "\u{2c}\u{308}", "\u{1f1e6}"]), + ("\u{61}\u{2c}\u{5d0}", &["\u{61}", "\u{2c}", "\u{5d0}"]), ("\u{61}\u{2c}\u{308}\u{5d0}", + &["\u{61}", "\u{2c}\u{308}", "\u{5d0}"]), ("\u{61}\u{2c}\u{22}", &["\u{61}", "\u{2c}", + "\u{22}"]), ("\u{61}\u{2c}\u{308}\u{22}", &["\u{61}", "\u{2c}\u{308}", "\u{22}"]), + ("\u{61}\u{2c}\u{27}", &["\u{61}", "\u{2c}", "\u{27}"]), ("\u{61}\u{2c}\u{308}\u{27}", + &["\u{61}", "\u{2c}\u{308}", "\u{27}"]), ("\u{61}\u{2c}\u{ad}", &["\u{61}", + "\u{2c}\u{ad}"]), ("\u{61}\u{2c}\u{308}\u{ad}", &["\u{61}", "\u{2c}\u{308}\u{ad}"]), + ("\u{61}\u{2c}\u{300}", &["\u{61}", "\u{2c}\u{300}"]), ("\u{61}\u{2c}\u{308}\u{300}", + &["\u{61}", "\u{2c}\u{308}\u{300}"]), ("\u{61}\u{2c}\u{61}\u{2060}", &["\u{61}", "\u{2c}", + "\u{61}\u{2060}"]), ("\u{61}\u{2c}\u{308}\u{61}\u{2060}", &["\u{61}", "\u{2c}\u{308}", + "\u{61}\u{2060}"]), ("\u{61}\u{2c}\u{61}\u{3a}", &["\u{61}", "\u{2c}", "\u{61}", "\u{3a}"]), + ("\u{61}\u{2c}\u{308}\u{61}\u{3a}", &["\u{61}", "\u{2c}\u{308}", "\u{61}", "\u{3a}"]), + ("\u{61}\u{2c}\u{61}\u{27}", &["\u{61}", "\u{2c}", "\u{61}", "\u{27}"]), + ("\u{61}\u{2c}\u{308}\u{61}\u{27}", &["\u{61}", "\u{2c}\u{308}", "\u{61}", "\u{27}"]), + ("\u{61}\u{2c}\u{61}\u{27}\u{2060}", &["\u{61}", "\u{2c}", "\u{61}", "\u{27}\u{2060}"]), + ("\u{61}\u{2c}\u{308}\u{61}\u{27}\u{2060}", &["\u{61}", "\u{2c}\u{308}", "\u{61}", + "\u{27}\u{2060}"]), ("\u{61}\u{2c}\u{61}\u{2c}", &["\u{61}", "\u{2c}", "\u{61}", "\u{2c}"]), + ("\u{61}\u{2c}\u{308}\u{61}\u{2c}", &["\u{61}", "\u{2c}\u{308}", "\u{61}", "\u{2c}"]), + ("\u{61}\u{2c}\u{31}\u{3a}", &["\u{61}", "\u{2c}", "\u{31}", "\u{3a}"]), + ("\u{61}\u{2c}\u{308}\u{31}\u{3a}", &["\u{61}", "\u{2c}\u{308}", "\u{31}", "\u{3a}"]), + ("\u{61}\u{2c}\u{31}\u{27}", &["\u{61}", "\u{2c}", "\u{31}", "\u{27}"]), + ("\u{61}\u{2c}\u{308}\u{31}\u{27}", &["\u{61}", "\u{2c}\u{308}", "\u{31}", "\u{27}"]), + ("\u{61}\u{2c}\u{31}\u{2c}", &["\u{61}", "\u{2c}", "\u{31}", "\u{2c}"]), + ("\u{61}\u{2c}\u{308}\u{31}\u{2c}", &["\u{61}", "\u{2c}\u{308}", "\u{31}", "\u{2c}"]), + ("\u{61}\u{2c}\u{31}\u{2e}\u{2060}", &["\u{61}", "\u{2c}", "\u{31}", "\u{2e}\u{2060}"]), + ("\u{61}\u{2c}\u{308}\u{31}\u{2e}\u{2060}", &["\u{61}", "\u{2c}\u{308}", "\u{31}", + "\u{2e}\u{2060}"]), ("\u{31}\u{3a}\u{1}", &["\u{31}", "\u{3a}", "\u{1}"]), + ("\u{31}\u{3a}\u{308}\u{1}", &["\u{31}", "\u{3a}\u{308}", "\u{1}"]), ("\u{31}\u{3a}\u{d}", + &["\u{31}", "\u{3a}", "\u{d}"]), ("\u{31}\u{3a}\u{308}\u{d}", &["\u{31}", "\u{3a}\u{308}", + "\u{d}"]), ("\u{31}\u{3a}\u{a}", &["\u{31}", "\u{3a}", "\u{a}"]), + ("\u{31}\u{3a}\u{308}\u{a}", &["\u{31}", "\u{3a}\u{308}", "\u{a}"]), ("\u{31}\u{3a}\u{b}", + &["\u{31}", "\u{3a}", "\u{b}"]), ("\u{31}\u{3a}\u{308}\u{b}", &["\u{31}", "\u{3a}\u{308}", + "\u{b}"]), ("\u{31}\u{3a}\u{3031}", &["\u{31}", "\u{3a}", "\u{3031}"]), + ("\u{31}\u{3a}\u{308}\u{3031}", &["\u{31}", "\u{3a}\u{308}", "\u{3031}"]), + ("\u{31}\u{3a}\u{41}", &["\u{31}", "\u{3a}", "\u{41}"]), ("\u{31}\u{3a}\u{308}\u{41}", + &["\u{31}", "\u{3a}\u{308}", "\u{41}"]), ("\u{31}\u{3a}\u{3a}", &["\u{31}", "\u{3a}", + "\u{3a}"]), ("\u{31}\u{3a}\u{308}\u{3a}", &["\u{31}", "\u{3a}\u{308}", "\u{3a}"]), + ("\u{31}\u{3a}\u{2c}", &["\u{31}", "\u{3a}", "\u{2c}"]), ("\u{31}\u{3a}\u{308}\u{2c}", + &["\u{31}", "\u{3a}\u{308}", "\u{2c}"]), ("\u{31}\u{3a}\u{2e}", &["\u{31}", "\u{3a}", + "\u{2e}"]), ("\u{31}\u{3a}\u{308}\u{2e}", &["\u{31}", "\u{3a}\u{308}", "\u{2e}"]), + ("\u{31}\u{3a}\u{30}", &["\u{31}", "\u{3a}", "\u{30}"]), ("\u{31}\u{3a}\u{308}\u{30}", + &["\u{31}", "\u{3a}\u{308}", "\u{30}"]), ("\u{31}\u{3a}\u{5f}", &["\u{31}", "\u{3a}", + "\u{5f}"]), ("\u{31}\u{3a}\u{308}\u{5f}", &["\u{31}", "\u{3a}\u{308}", "\u{5f}"]), + ("\u{31}\u{3a}\u{1f1e6}", &["\u{31}", "\u{3a}", "\u{1f1e6}"]), + ("\u{31}\u{3a}\u{308}\u{1f1e6}", &["\u{31}", "\u{3a}\u{308}", "\u{1f1e6}"]), + ("\u{31}\u{3a}\u{5d0}", &["\u{31}", "\u{3a}", "\u{5d0}"]), ("\u{31}\u{3a}\u{308}\u{5d0}", + &["\u{31}", "\u{3a}\u{308}", "\u{5d0}"]), ("\u{31}\u{3a}\u{22}", &["\u{31}", "\u{3a}", + "\u{22}"]), ("\u{31}\u{3a}\u{308}\u{22}", &["\u{31}", "\u{3a}\u{308}", "\u{22}"]), + ("\u{31}\u{3a}\u{27}", &["\u{31}", "\u{3a}", "\u{27}"]), ("\u{31}\u{3a}\u{308}\u{27}", + &["\u{31}", "\u{3a}\u{308}", "\u{27}"]), ("\u{31}\u{3a}\u{ad}", &["\u{31}", + "\u{3a}\u{ad}"]), ("\u{31}\u{3a}\u{308}\u{ad}", &["\u{31}", "\u{3a}\u{308}\u{ad}"]), + ("\u{31}\u{3a}\u{300}", &["\u{31}", "\u{3a}\u{300}"]), ("\u{31}\u{3a}\u{308}\u{300}", + &["\u{31}", "\u{3a}\u{308}\u{300}"]), ("\u{31}\u{3a}\u{61}\u{2060}", &["\u{31}", "\u{3a}", + "\u{61}\u{2060}"]), ("\u{31}\u{3a}\u{308}\u{61}\u{2060}", &["\u{31}", "\u{3a}\u{308}", + "\u{61}\u{2060}"]), ("\u{31}\u{3a}\u{61}\u{3a}", &["\u{31}", "\u{3a}", "\u{61}", "\u{3a}"]), + ("\u{31}\u{3a}\u{308}\u{61}\u{3a}", &["\u{31}", "\u{3a}\u{308}", "\u{61}", "\u{3a}"]), + ("\u{31}\u{3a}\u{61}\u{27}", &["\u{31}", "\u{3a}", "\u{61}", "\u{27}"]), + ("\u{31}\u{3a}\u{308}\u{61}\u{27}", &["\u{31}", "\u{3a}\u{308}", "\u{61}", "\u{27}"]), + ("\u{31}\u{3a}\u{61}\u{27}\u{2060}", &["\u{31}", "\u{3a}", "\u{61}", "\u{27}\u{2060}"]), + ("\u{31}\u{3a}\u{308}\u{61}\u{27}\u{2060}", &["\u{31}", "\u{3a}\u{308}", "\u{61}", + "\u{27}\u{2060}"]), ("\u{31}\u{3a}\u{61}\u{2c}", &["\u{31}", "\u{3a}", "\u{61}", "\u{2c}"]), + ("\u{31}\u{3a}\u{308}\u{61}\u{2c}", &["\u{31}", "\u{3a}\u{308}", "\u{61}", "\u{2c}"]), + ("\u{31}\u{3a}\u{31}\u{3a}", &["\u{31}", "\u{3a}", "\u{31}", "\u{3a}"]), + ("\u{31}\u{3a}\u{308}\u{31}\u{3a}", &["\u{31}", "\u{3a}\u{308}", "\u{31}", "\u{3a}"]), + ("\u{31}\u{3a}\u{31}\u{27}", &["\u{31}", "\u{3a}", "\u{31}", "\u{27}"]), + ("\u{31}\u{3a}\u{308}\u{31}\u{27}", &["\u{31}", "\u{3a}\u{308}", "\u{31}", "\u{27}"]), + ("\u{31}\u{3a}\u{31}\u{2c}", &["\u{31}", "\u{3a}", "\u{31}", "\u{2c}"]), + ("\u{31}\u{3a}\u{308}\u{31}\u{2c}", &["\u{31}", "\u{3a}\u{308}", "\u{31}", "\u{2c}"]), + ("\u{31}\u{3a}\u{31}\u{2e}\u{2060}", &["\u{31}", "\u{3a}", "\u{31}", "\u{2e}\u{2060}"]), + ("\u{31}\u{3a}\u{308}\u{31}\u{2e}\u{2060}", &["\u{31}", "\u{3a}\u{308}", "\u{31}", + "\u{2e}\u{2060}"]), ("\u{31}\u{27}\u{1}", &["\u{31}", "\u{27}", "\u{1}"]), + ("\u{31}\u{27}\u{308}\u{1}", &["\u{31}", "\u{27}\u{308}", "\u{1}"]), ("\u{31}\u{27}\u{d}", + &["\u{31}", "\u{27}", "\u{d}"]), ("\u{31}\u{27}\u{308}\u{d}", &["\u{31}", "\u{27}\u{308}", + "\u{d}"]), ("\u{31}\u{27}\u{a}", &["\u{31}", "\u{27}", "\u{a}"]), + ("\u{31}\u{27}\u{308}\u{a}", &["\u{31}", "\u{27}\u{308}", "\u{a}"]), ("\u{31}\u{27}\u{b}", + &["\u{31}", "\u{27}", "\u{b}"]), ("\u{31}\u{27}\u{308}\u{b}", &["\u{31}", "\u{27}\u{308}", + "\u{b}"]), ("\u{31}\u{27}\u{3031}", &["\u{31}", "\u{27}", "\u{3031}"]), + ("\u{31}\u{27}\u{308}\u{3031}", &["\u{31}", "\u{27}\u{308}", "\u{3031}"]), + ("\u{31}\u{27}\u{41}", &["\u{31}", "\u{27}", "\u{41}"]), ("\u{31}\u{27}\u{308}\u{41}", + &["\u{31}", "\u{27}\u{308}", "\u{41}"]), ("\u{31}\u{27}\u{3a}", &["\u{31}", "\u{27}", + "\u{3a}"]), ("\u{31}\u{27}\u{308}\u{3a}", &["\u{31}", "\u{27}\u{308}", "\u{3a}"]), + ("\u{31}\u{27}\u{2c}", &["\u{31}", "\u{27}", "\u{2c}"]), ("\u{31}\u{27}\u{308}\u{2c}", + &["\u{31}", "\u{27}\u{308}", "\u{2c}"]), ("\u{31}\u{27}\u{2e}", &["\u{31}", "\u{27}", + "\u{2e}"]), ("\u{31}\u{27}\u{308}\u{2e}", &["\u{31}", "\u{27}\u{308}", "\u{2e}"]), + ("\u{31}\u{27}\u{30}", &["\u{31}\u{27}\u{30}"]), ("\u{31}\u{27}\u{308}\u{30}", + &["\u{31}\u{27}\u{308}\u{30}"]), ("\u{31}\u{27}\u{5f}", &["\u{31}", "\u{27}", "\u{5f}"]), + ("\u{31}\u{27}\u{308}\u{5f}", &["\u{31}", "\u{27}\u{308}", "\u{5f}"]), + ("\u{31}\u{27}\u{1f1e6}", &["\u{31}", "\u{27}", "\u{1f1e6}"]), + ("\u{31}\u{27}\u{308}\u{1f1e6}", &["\u{31}", "\u{27}\u{308}", "\u{1f1e6}"]), + ("\u{31}\u{27}\u{5d0}", &["\u{31}", "\u{27}", "\u{5d0}"]), ("\u{31}\u{27}\u{308}\u{5d0}", + &["\u{31}", "\u{27}\u{308}", "\u{5d0}"]), ("\u{31}\u{27}\u{22}", &["\u{31}", "\u{27}", + "\u{22}"]), ("\u{31}\u{27}\u{308}\u{22}", &["\u{31}", "\u{27}\u{308}", "\u{22}"]), + ("\u{31}\u{27}\u{27}", &["\u{31}", "\u{27}", "\u{27}"]), ("\u{31}\u{27}\u{308}\u{27}", + &["\u{31}", "\u{27}\u{308}", "\u{27}"]), ("\u{31}\u{27}\u{ad}", &["\u{31}", + "\u{27}\u{ad}"]), ("\u{31}\u{27}\u{308}\u{ad}", &["\u{31}", "\u{27}\u{308}\u{ad}"]), + ("\u{31}\u{27}\u{300}", &["\u{31}", "\u{27}\u{300}"]), ("\u{31}\u{27}\u{308}\u{300}", + &["\u{31}", "\u{27}\u{308}\u{300}"]), ("\u{31}\u{27}\u{61}\u{2060}", &["\u{31}", "\u{27}", + "\u{61}\u{2060}"]), ("\u{31}\u{27}\u{308}\u{61}\u{2060}", &["\u{31}", "\u{27}\u{308}", + "\u{61}\u{2060}"]), ("\u{31}\u{27}\u{61}\u{3a}", &["\u{31}", "\u{27}", "\u{61}", "\u{3a}"]), + ("\u{31}\u{27}\u{308}\u{61}\u{3a}", &["\u{31}", "\u{27}\u{308}", "\u{61}", "\u{3a}"]), + ("\u{31}\u{27}\u{61}\u{27}", &["\u{31}", "\u{27}", "\u{61}", "\u{27}"]), + ("\u{31}\u{27}\u{308}\u{61}\u{27}", &["\u{31}", "\u{27}\u{308}", "\u{61}", "\u{27}"]), + ("\u{31}\u{27}\u{61}\u{27}\u{2060}", &["\u{31}", "\u{27}", "\u{61}", "\u{27}\u{2060}"]), + ("\u{31}\u{27}\u{308}\u{61}\u{27}\u{2060}", &["\u{31}", "\u{27}\u{308}", "\u{61}", + "\u{27}\u{2060}"]), ("\u{31}\u{27}\u{61}\u{2c}", &["\u{31}", "\u{27}", "\u{61}", "\u{2c}"]), + ("\u{31}\u{27}\u{308}\u{61}\u{2c}", &["\u{31}", "\u{27}\u{308}", "\u{61}", "\u{2c}"]), + ("\u{31}\u{27}\u{31}\u{3a}", &["\u{31}\u{27}\u{31}", "\u{3a}"]), + ("\u{31}\u{27}\u{308}\u{31}\u{3a}", &["\u{31}\u{27}\u{308}\u{31}", "\u{3a}"]), + ("\u{31}\u{27}\u{31}\u{27}", &["\u{31}\u{27}\u{31}", "\u{27}"]), + ("\u{31}\u{27}\u{308}\u{31}\u{27}", &["\u{31}\u{27}\u{308}\u{31}", "\u{27}"]), + ("\u{31}\u{27}\u{31}\u{2c}", &["\u{31}\u{27}\u{31}", "\u{2c}"]), + ("\u{31}\u{27}\u{308}\u{31}\u{2c}", &["\u{31}\u{27}\u{308}\u{31}", "\u{2c}"]), + ("\u{31}\u{27}\u{31}\u{2e}\u{2060}", &["\u{31}\u{27}\u{31}", "\u{2e}\u{2060}"]), + ("\u{31}\u{27}\u{308}\u{31}\u{2e}\u{2060}", &["\u{31}\u{27}\u{308}\u{31}", + "\u{2e}\u{2060}"]), ("\u{31}\u{2c}\u{1}", &["\u{31}", "\u{2c}", "\u{1}"]), + ("\u{31}\u{2c}\u{308}\u{1}", &["\u{31}", "\u{2c}\u{308}", "\u{1}"]), ("\u{31}\u{2c}\u{d}", + &["\u{31}", "\u{2c}", "\u{d}"]), ("\u{31}\u{2c}\u{308}\u{d}", &["\u{31}", "\u{2c}\u{308}", + "\u{d}"]), ("\u{31}\u{2c}\u{a}", &["\u{31}", "\u{2c}", "\u{a}"]), + ("\u{31}\u{2c}\u{308}\u{a}", &["\u{31}", "\u{2c}\u{308}", "\u{a}"]), ("\u{31}\u{2c}\u{b}", + &["\u{31}", "\u{2c}", "\u{b}"]), ("\u{31}\u{2c}\u{308}\u{b}", &["\u{31}", "\u{2c}\u{308}", + "\u{b}"]), ("\u{31}\u{2c}\u{3031}", &["\u{31}", "\u{2c}", "\u{3031}"]), + ("\u{31}\u{2c}\u{308}\u{3031}", &["\u{31}", "\u{2c}\u{308}", "\u{3031}"]), + ("\u{31}\u{2c}\u{41}", &["\u{31}", "\u{2c}", "\u{41}"]), ("\u{31}\u{2c}\u{308}\u{41}", + &["\u{31}", "\u{2c}\u{308}", "\u{41}"]), ("\u{31}\u{2c}\u{3a}", &["\u{31}", "\u{2c}", + "\u{3a}"]), ("\u{31}\u{2c}\u{308}\u{3a}", &["\u{31}", "\u{2c}\u{308}", "\u{3a}"]), + ("\u{31}\u{2c}\u{2c}", &["\u{31}", "\u{2c}", "\u{2c}"]), ("\u{31}\u{2c}\u{308}\u{2c}", + &["\u{31}", "\u{2c}\u{308}", "\u{2c}"]), ("\u{31}\u{2c}\u{2e}", &["\u{31}", "\u{2c}", + "\u{2e}"]), ("\u{31}\u{2c}\u{308}\u{2e}", &["\u{31}", "\u{2c}\u{308}", "\u{2e}"]), + ("\u{31}\u{2c}\u{30}", &["\u{31}\u{2c}\u{30}"]), ("\u{31}\u{2c}\u{308}\u{30}", + &["\u{31}\u{2c}\u{308}\u{30}"]), ("\u{31}\u{2c}\u{5f}", &["\u{31}", "\u{2c}", "\u{5f}"]), + ("\u{31}\u{2c}\u{308}\u{5f}", &["\u{31}", "\u{2c}\u{308}", "\u{5f}"]), + ("\u{31}\u{2c}\u{1f1e6}", &["\u{31}", "\u{2c}", "\u{1f1e6}"]), + ("\u{31}\u{2c}\u{308}\u{1f1e6}", &["\u{31}", "\u{2c}\u{308}", "\u{1f1e6}"]), + ("\u{31}\u{2c}\u{5d0}", &["\u{31}", "\u{2c}", "\u{5d0}"]), ("\u{31}\u{2c}\u{308}\u{5d0}", + &["\u{31}", "\u{2c}\u{308}", "\u{5d0}"]), ("\u{31}\u{2c}\u{22}", &["\u{31}", "\u{2c}", + "\u{22}"]), ("\u{31}\u{2c}\u{308}\u{22}", &["\u{31}", "\u{2c}\u{308}", "\u{22}"]), + ("\u{31}\u{2c}\u{27}", &["\u{31}", "\u{2c}", "\u{27}"]), ("\u{31}\u{2c}\u{308}\u{27}", + &["\u{31}", "\u{2c}\u{308}", "\u{27}"]), ("\u{31}\u{2c}\u{ad}", &["\u{31}", + "\u{2c}\u{ad}"]), ("\u{31}\u{2c}\u{308}\u{ad}", &["\u{31}", "\u{2c}\u{308}\u{ad}"]), + ("\u{31}\u{2c}\u{300}", &["\u{31}", "\u{2c}\u{300}"]), ("\u{31}\u{2c}\u{308}\u{300}", + &["\u{31}", "\u{2c}\u{308}\u{300}"]), ("\u{31}\u{2c}\u{61}\u{2060}", &["\u{31}", "\u{2c}", + "\u{61}\u{2060}"]), ("\u{31}\u{2c}\u{308}\u{61}\u{2060}", &["\u{31}", "\u{2c}\u{308}", + "\u{61}\u{2060}"]), ("\u{31}\u{2c}\u{61}\u{3a}", &["\u{31}", "\u{2c}", "\u{61}", "\u{3a}"]), + ("\u{31}\u{2c}\u{308}\u{61}\u{3a}", &["\u{31}", "\u{2c}\u{308}", "\u{61}", "\u{3a}"]), + ("\u{31}\u{2c}\u{61}\u{27}", &["\u{31}", "\u{2c}", "\u{61}", "\u{27}"]), + ("\u{31}\u{2c}\u{308}\u{61}\u{27}", &["\u{31}", "\u{2c}\u{308}", "\u{61}", "\u{27}"]), + ("\u{31}\u{2c}\u{61}\u{27}\u{2060}", &["\u{31}", "\u{2c}", "\u{61}", "\u{27}\u{2060}"]), + ("\u{31}\u{2c}\u{308}\u{61}\u{27}\u{2060}", &["\u{31}", "\u{2c}\u{308}", "\u{61}", + "\u{27}\u{2060}"]), ("\u{31}\u{2c}\u{61}\u{2c}", &["\u{31}", "\u{2c}", "\u{61}", "\u{2c}"]), + ("\u{31}\u{2c}\u{308}\u{61}\u{2c}", &["\u{31}", "\u{2c}\u{308}", "\u{61}", "\u{2c}"]), + ("\u{31}\u{2c}\u{31}\u{3a}", &["\u{31}\u{2c}\u{31}", "\u{3a}"]), + ("\u{31}\u{2c}\u{308}\u{31}\u{3a}", &["\u{31}\u{2c}\u{308}\u{31}", "\u{3a}"]), + ("\u{31}\u{2c}\u{31}\u{27}", &["\u{31}\u{2c}\u{31}", "\u{27}"]), + ("\u{31}\u{2c}\u{308}\u{31}\u{27}", &["\u{31}\u{2c}\u{308}\u{31}", "\u{27}"]), + ("\u{31}\u{2c}\u{31}\u{2c}", &["\u{31}\u{2c}\u{31}", "\u{2c}"]), + ("\u{31}\u{2c}\u{308}\u{31}\u{2c}", &["\u{31}\u{2c}\u{308}\u{31}", "\u{2c}"]), + ("\u{31}\u{2c}\u{31}\u{2e}\u{2060}", &["\u{31}\u{2c}\u{31}", "\u{2e}\u{2060}"]), + ("\u{31}\u{2c}\u{308}\u{31}\u{2e}\u{2060}", &["\u{31}\u{2c}\u{308}\u{31}", + "\u{2e}\u{2060}"]), ("\u{31}\u{2e}\u{2060}\u{1}", &["\u{31}", "\u{2e}\u{2060}", "\u{1}"]), + ("\u{31}\u{2e}\u{2060}\u{308}\u{1}", &["\u{31}", "\u{2e}\u{2060}\u{308}", "\u{1}"]), + ("\u{31}\u{2e}\u{2060}\u{d}", &["\u{31}", "\u{2e}\u{2060}", "\u{d}"]), + ("\u{31}\u{2e}\u{2060}\u{308}\u{d}", &["\u{31}", "\u{2e}\u{2060}\u{308}", "\u{d}"]), + ("\u{31}\u{2e}\u{2060}\u{a}", &["\u{31}", "\u{2e}\u{2060}", "\u{a}"]), + ("\u{31}\u{2e}\u{2060}\u{308}\u{a}", &["\u{31}", "\u{2e}\u{2060}\u{308}", "\u{a}"]), + ("\u{31}\u{2e}\u{2060}\u{b}", &["\u{31}", "\u{2e}\u{2060}", "\u{b}"]), + ("\u{31}\u{2e}\u{2060}\u{308}\u{b}", &["\u{31}", "\u{2e}\u{2060}\u{308}", "\u{b}"]), + ("\u{31}\u{2e}\u{2060}\u{3031}", &["\u{31}", "\u{2e}\u{2060}", "\u{3031}"]), + ("\u{31}\u{2e}\u{2060}\u{308}\u{3031}", &["\u{31}", "\u{2e}\u{2060}\u{308}", "\u{3031}"]), + ("\u{31}\u{2e}\u{2060}\u{41}", &["\u{31}", "\u{2e}\u{2060}", "\u{41}"]), + ("\u{31}\u{2e}\u{2060}\u{308}\u{41}", &["\u{31}", "\u{2e}\u{2060}\u{308}", "\u{41}"]), + ("\u{31}\u{2e}\u{2060}\u{3a}", &["\u{31}", "\u{2e}\u{2060}", "\u{3a}"]), + ("\u{31}\u{2e}\u{2060}\u{308}\u{3a}", &["\u{31}", "\u{2e}\u{2060}\u{308}", "\u{3a}"]), + ("\u{31}\u{2e}\u{2060}\u{2c}", &["\u{31}", "\u{2e}\u{2060}", "\u{2c}"]), + ("\u{31}\u{2e}\u{2060}\u{308}\u{2c}", &["\u{31}", "\u{2e}\u{2060}\u{308}", "\u{2c}"]), + ("\u{31}\u{2e}\u{2060}\u{2e}", &["\u{31}", "\u{2e}\u{2060}", "\u{2e}"]), + ("\u{31}\u{2e}\u{2060}\u{308}\u{2e}", &["\u{31}", "\u{2e}\u{2060}\u{308}", "\u{2e}"]), + ("\u{31}\u{2e}\u{2060}\u{30}", &["\u{31}\u{2e}\u{2060}\u{30}"]), + ("\u{31}\u{2e}\u{2060}\u{308}\u{30}", &["\u{31}\u{2e}\u{2060}\u{308}\u{30}"]), + ("\u{31}\u{2e}\u{2060}\u{5f}", &["\u{31}", "\u{2e}\u{2060}", "\u{5f}"]), + ("\u{31}\u{2e}\u{2060}\u{308}\u{5f}", &["\u{31}", "\u{2e}\u{2060}\u{308}", "\u{5f}"]), + ("\u{31}\u{2e}\u{2060}\u{1f1e6}", &["\u{31}", "\u{2e}\u{2060}", "\u{1f1e6}"]), + ("\u{31}\u{2e}\u{2060}\u{308}\u{1f1e6}", &["\u{31}", "\u{2e}\u{2060}\u{308}", "\u{1f1e6}"]), + ("\u{31}\u{2e}\u{2060}\u{5d0}", &["\u{31}", "\u{2e}\u{2060}", "\u{5d0}"]), + ("\u{31}\u{2e}\u{2060}\u{308}\u{5d0}", &["\u{31}", "\u{2e}\u{2060}\u{308}", "\u{5d0}"]), + ("\u{31}\u{2e}\u{2060}\u{22}", &["\u{31}", "\u{2e}\u{2060}", "\u{22}"]), + ("\u{31}\u{2e}\u{2060}\u{308}\u{22}", &["\u{31}", "\u{2e}\u{2060}\u{308}", "\u{22}"]), + ("\u{31}\u{2e}\u{2060}\u{27}", &["\u{31}", "\u{2e}\u{2060}", "\u{27}"]), + ("\u{31}\u{2e}\u{2060}\u{308}\u{27}", &["\u{31}", "\u{2e}\u{2060}\u{308}", "\u{27}"]), + ("\u{31}\u{2e}\u{2060}\u{ad}", &["\u{31}", "\u{2e}\u{2060}\u{ad}"]), + ("\u{31}\u{2e}\u{2060}\u{308}\u{ad}", &["\u{31}", "\u{2e}\u{2060}\u{308}\u{ad}"]), + ("\u{31}\u{2e}\u{2060}\u{300}", &["\u{31}", "\u{2e}\u{2060}\u{300}"]), + ("\u{31}\u{2e}\u{2060}\u{308}\u{300}", &["\u{31}", "\u{2e}\u{2060}\u{308}\u{300}"]), + ("\u{31}\u{2e}\u{2060}\u{61}\u{2060}", &["\u{31}", "\u{2e}\u{2060}", "\u{61}\u{2060}"]), + ("\u{31}\u{2e}\u{2060}\u{308}\u{61}\u{2060}", &["\u{31}", "\u{2e}\u{2060}\u{308}", + "\u{61}\u{2060}"]), ("\u{31}\u{2e}\u{2060}\u{61}\u{3a}", &["\u{31}", "\u{2e}\u{2060}", + "\u{61}", "\u{3a}"]), ("\u{31}\u{2e}\u{2060}\u{308}\u{61}\u{3a}", &["\u{31}", + "\u{2e}\u{2060}\u{308}", "\u{61}", "\u{3a}"]), ("\u{31}\u{2e}\u{2060}\u{61}\u{27}", + &["\u{31}", "\u{2e}\u{2060}", "\u{61}", "\u{27}"]), + ("\u{31}\u{2e}\u{2060}\u{308}\u{61}\u{27}", &["\u{31}", "\u{2e}\u{2060}\u{308}", "\u{61}", + "\u{27}"]), ("\u{31}\u{2e}\u{2060}\u{61}\u{27}\u{2060}", &["\u{31}", "\u{2e}\u{2060}", + "\u{61}", "\u{27}\u{2060}"]), ("\u{31}\u{2e}\u{2060}\u{308}\u{61}\u{27}\u{2060}", + &["\u{31}", "\u{2e}\u{2060}\u{308}", "\u{61}", "\u{27}\u{2060}"]), + ("\u{31}\u{2e}\u{2060}\u{61}\u{2c}", &["\u{31}", "\u{2e}\u{2060}", "\u{61}", "\u{2c}"]), + ("\u{31}\u{2e}\u{2060}\u{308}\u{61}\u{2c}", &["\u{31}", "\u{2e}\u{2060}\u{308}", "\u{61}", + "\u{2c}"]), ("\u{31}\u{2e}\u{2060}\u{31}\u{3a}", &["\u{31}\u{2e}\u{2060}\u{31}", "\u{3a}"]), + ("\u{31}\u{2e}\u{2060}\u{308}\u{31}\u{3a}", &["\u{31}\u{2e}\u{2060}\u{308}\u{31}", + "\u{3a}"]), ("\u{31}\u{2e}\u{2060}\u{31}\u{27}", &["\u{31}\u{2e}\u{2060}\u{31}", "\u{27}"]), + ("\u{31}\u{2e}\u{2060}\u{308}\u{31}\u{27}", &["\u{31}\u{2e}\u{2060}\u{308}\u{31}", + "\u{27}"]), ("\u{31}\u{2e}\u{2060}\u{31}\u{2c}", &["\u{31}\u{2e}\u{2060}\u{31}", "\u{2c}"]), + ("\u{31}\u{2e}\u{2060}\u{308}\u{31}\u{2c}", &["\u{31}\u{2e}\u{2060}\u{308}\u{31}", + "\u{2c}"]), ("\u{31}\u{2e}\u{2060}\u{31}\u{2e}\u{2060}", &["\u{31}\u{2e}\u{2060}\u{31}", + "\u{2e}\u{2060}"]), ("\u{31}\u{2e}\u{2060}\u{308}\u{31}\u{2e}\u{2060}", + &["\u{31}\u{2e}\u{2060}\u{308}\u{31}", "\u{2e}\u{2060}"]), + ("\u{63}\u{61}\u{6e}\u{27}\u{74}", &["\u{63}\u{61}\u{6e}\u{27}\u{74}"]), + ("\u{63}\u{61}\u{6e}\u{2019}\u{74}", &["\u{63}\u{61}\u{6e}\u{2019}\u{74}"]), + ("\u{61}\u{62}\u{ad}\u{62}\u{79}", &["\u{61}\u{62}\u{ad}\u{62}\u{79}"]), + ("\u{61}\u{24}\u{2d}\u{33}\u{34}\u{2c}\u{35}\u{36}\u{37}\u{2e}\u{31}\u{34}\u{25}\u{62}", + &["\u{61}", "\u{24}", "\u{2d}", "\u{33}\u{34}\u{2c}\u{35}\u{36}\u{37}\u{2e}\u{31}\u{34}", + "\u{25}", "\u{62}"]), ("\u{33}\u{61}", &["\u{33}\u{61}"]), + ("\u{2060}\u{63}\u{2060}\u{61}\u{2060}\u{6e}\u{2060}\u{27}\u{2060}\u{74}\u{2060}\u{2060}", + &["\u{2060}", + "\u{63}\u{2060}\u{61}\u{2060}\u{6e}\u{2060}\u{27}\u{2060}\u{74}\u{2060}\u{2060}"]), + ("\u{2060}\u{63}\u{2060}\u{61}\u{2060}\u{6e}\u{2060}\u{2019}\u{2060}\u{74}\u{2060}\u{2060}", + &["\u{2060}", + "\u{63}\u{2060}\u{61}\u{2060}\u{6e}\u{2060}\u{2019}\u{2060}\u{74}\u{2060}\u{2060}"]), + ("\u{2060}\u{61}\u{2060}\u{62}\u{2060}\u{ad}\u{2060}\u{62}\u{2060}\u{79}\u{2060}\u{2060}", + &["\u{2060}", + "\u{61}\u{2060}\u{62}\u{2060}\u{ad}\u{2060}\u{62}\u{2060}\u{79}\u{2060}\u{2060}"]), + ("\u{2060}\u{61}\u{2060}\u{24}\u{2060}\u{2d}\u{2060}\u{33}\u{2060}\u{34}\u{2060}\u{2c}\ + \u{2060}\u{35}\u{2060}\u{36}\u{2060}\u{37}\u{2060}\u{2e}\u{2060}\u{31}\u{2060}\u{34}\ + \u{2060}\u{25}\u{2060}\u{62}\u{2060}\u{2060}", &["\u{2060}", "\u{61}\u{2060}", + "\u{24}\u{2060}", "\u{2d}\u{2060}", "\u{33}\u{2060}\u{34}\u{2060}\u{2c}\u{2060}\u{35}\ + \u{2060}\u{36}\u{2060}\u{37}\u{2060}\u{2e}\u{2060}\u{31}\u{2060}\u{34}\u{2060}", + "\u{25}\u{2060}", "\u{62}\u{2060}\u{2060}"]), + ("\u{2060}\u{33}\u{2060}\u{61}\u{2060}\u{2060}", &["\u{2060}", + "\u{33}\u{2060}\u{61}\u{2060}\u{2060}"]), ("\u{61}\u{1f1e6}\u{62}", &["\u{61}", "\u{1f1e6}", + "\u{62}"]), ("\u{1f1f7}\u{1f1fa}", &["\u{1f1f7}\u{1f1fa}"]), ("\u{1f1f7}\u{1f1fa}\u{1f1f8}", + &["\u{1f1f7}\u{1f1fa}\u{1f1f8}"]), ("\u{1f1f7}\u{1f1fa}\u{1f1f8}\u{1f1ea}", + &["\u{1f1f7}\u{1f1fa}\u{1f1f8}\u{1f1ea}"]), ("\u{1f1f7}\u{1f1fa}\u{200b}\u{1f1f8}\u{1f1ea}", + &["\u{1f1f7}\u{1f1fa}", "\u{200b}", "\u{1f1f8}\u{1f1ea}"]), ("\u{1f1e6}\u{1f1e7}\u{1f1e8}", + &["\u{1f1e6}\u{1f1e7}\u{1f1e8}"]), ("\u{1f1e6}\u{200d}\u{1f1e7}\u{1f1e8}", + &["\u{1f1e6}\u{200d}\u{1f1e7}\u{1f1e8}"]), ("\u{1f1e6}\u{1f1e7}\u{200d}\u{1f1e8}", + &["\u{1f1e6}\u{1f1e7}\u{200d}\u{1f1e8}"]), ("\u{20}\u{200d}\u{646}", &["\u{20}\u{200d}", + "\u{646}"]), ("\u{646}\u{200d}\u{20}", &["\u{646}\u{200d}", "\u{20}"]) + ]; + + for &(s, w) in &test_word[..] { + // test forward iterator + assert!(order::equals(s.split_words_uax29(), w.iter().cloned())); + + // test reverse iterator + assert!(order::equals(s.split_words_uax29().rev(), w.iter().rev().cloned())); + + // generate offsets from word string lengths + let mut indices = vec![0]; + indices.append(&mut w.iter().cloned().map(|s| s.len()) + .scan(0, |t, n| { *t += n; Some(*t) }).collect::>()); + indices.pop(); + let indices = indices; + + // test forward indices iterator + assert!(s.split_words_uax29_indices() + .zip(indices.iter()) + .all(|((l,_),m)| l == *m)); + + // test backward indices iterator + assert!(s.split_words_uax29_indices().rev() + .zip(indices.iter().rev()) + .all(|((l,_),m)| l == *m)); + } +} + #[test] fn test_splitator() { fn t(s: &str, sep: &str, u: &[&str]) { diff --git a/src/libunicode/lib.rs b/src/libunicode/lib.rs index 6879fa7b3ba68..87204cca76cd5 100644 --- a/src/libunicode/lib.rs +++ b/src/libunicode/lib.rs @@ -49,6 +49,7 @@ pub mod char; pub mod str { pub use u_str::{UnicodeStr, Words, Graphemes, GraphemeIndices}; + pub use u_str::{UnicodeWords, UWordBounds, UWordBoundIndices}; pub use u_str::{utf8_char_width, is_utf16, Utf16Items, Utf16Item}; pub use u_str::{utf16_items, Utf16Encoder}; } diff --git a/src/libunicode/tables.rs b/src/libunicode/tables.rs index ad6e81d53abcc..7e6338bc9da2b 100644 --- a/src/libunicode/tables.rs +++ b/src/libunicode/tables.rs @@ -7796,18 +7796,18 @@ pub mod grapheme { use core::result::Result::{Ok, Err}; #[allow(non_camel_case_types)] - #[derive(Clone, Copy)] + #[derive(Clone, Copy, PartialEq, Eq)] pub enum GraphemeCat { + GC_Any, GC_Control, GC_Extend, - GC_LVT, - GC_V, GC_L, - GC_Regional_Indicator, GC_LV, - GC_T, + GC_LVT, + GC_Regional_Indicator, GC_SpacingMark, - GC_Any, + GC_T, + GC_V, } fn bsearch_range_value_table(c: char, r: &'static [(char, char, GraphemeCat)]) -> GraphemeCat { @@ -8326,3 +8326,514 @@ pub mod grapheme { ]; } + +pub mod word { + use core::slice::SliceExt; + pub use self::WordCat::*; + use core::result::Result::{Ok, Err}; + + #[allow(non_camel_case_types)] + #[derive(Clone, Copy, PartialEq, Eq)] + pub enum WordCat { + WC_ALetter, + WC_Any, + WC_CR, + WC_Double_Quote, + WC_Extend, + WC_ExtendNumLet, + WC_Format, + WC_Hebrew_Letter, + WC_Katakana, + WC_LF, + WC_MidLetter, + WC_MidNum, + WC_MidNumLet, + WC_Newline, + WC_Numeric, + WC_Regional_Indicator, + WC_Single_Quote, + } + + fn bsearch_range_value_table(c: char, r: &'static [(char, char, WordCat)]) -> WordCat { + use core::cmp::Ordering::{Equal, Less, Greater}; + match r.binary_search_by(|&(lo, hi, _)| { + if lo <= c && c <= hi { Equal } + else if hi < c { Less } + else { Greater } + }) { + Ok(idx) => { + let (_, _, cat) = r[idx]; + cat + } + Err(_) => WC_Any + } + } + + pub fn word_category(c: char) -> WordCat { + bsearch_range_value_table(c, word_cat_table) + } + + const word_cat_table: &'static [(char, char, WordCat)] = &[ + ('\u{a}', '\u{a}', WC_LF), ('\u{b}', '\u{c}', WC_Newline), ('\u{d}', '\u{d}', WC_CR), + ('\u{22}', '\u{22}', WC_Double_Quote), ('\u{27}', '\u{27}', WC_Single_Quote), ('\u{2c}', + '\u{2c}', WC_MidNum), ('\u{2e}', '\u{2e}', WC_MidNumLet), ('\u{30}', '\u{39}', WC_Numeric), + ('\u{3a}', '\u{3a}', WC_MidLetter), ('\u{3b}', '\u{3b}', WC_MidNum), ('\u{41}', '\u{5a}', + WC_ALetter), ('\u{5f}', '\u{5f}', WC_ExtendNumLet), ('\u{61}', '\u{7a}', WC_ALetter), + ('\u{85}', '\u{85}', WC_Newline), ('\u{aa}', '\u{aa}', WC_ALetter), ('\u{ad}', '\u{ad}', + WC_Format), ('\u{b5}', '\u{b5}', WC_ALetter), ('\u{b7}', '\u{b7}', WC_MidLetter), ('\u{ba}', + '\u{ba}', WC_ALetter), ('\u{c0}', '\u{d6}', WC_ALetter), ('\u{d8}', '\u{f6}', WC_ALetter), + ('\u{f8}', '\u{1ba}', WC_ALetter), ('\u{1bb}', '\u{1bb}', WC_ALetter), ('\u{1bc}', + '\u{1bf}', WC_ALetter), ('\u{1c0}', '\u{1c3}', WC_ALetter), ('\u{1c4}', '\u{293}', + WC_ALetter), ('\u{294}', '\u{294}', WC_ALetter), ('\u{295}', '\u{2af}', WC_ALetter), + ('\u{2b0}', '\u{2c1}', WC_ALetter), ('\u{2c6}', '\u{2d1}', WC_ALetter), ('\u{2d7}', + '\u{2d7}', WC_MidLetter), ('\u{2e0}', '\u{2e4}', WC_ALetter), ('\u{2ec}', '\u{2ec}', + WC_ALetter), ('\u{2ee}', '\u{2ee}', WC_ALetter), ('\u{300}', '\u{36f}', WC_Extend), + ('\u{370}', '\u{373}', WC_ALetter), ('\u{374}', '\u{374}', WC_ALetter), ('\u{376}', + '\u{377}', WC_ALetter), ('\u{37a}', '\u{37a}', WC_ALetter), ('\u{37b}', '\u{37d}', + WC_ALetter), ('\u{37e}', '\u{37e}', WC_MidNum), ('\u{37f}', '\u{37f}', WC_ALetter), + ('\u{386}', '\u{386}', WC_ALetter), ('\u{387}', '\u{387}', WC_MidLetter), ('\u{388}', + '\u{38a}', WC_ALetter), ('\u{38c}', '\u{38c}', WC_ALetter), ('\u{38e}', '\u{3a1}', + WC_ALetter), ('\u{3a3}', '\u{3f5}', WC_ALetter), ('\u{3f7}', '\u{481}', WC_ALetter), + ('\u{483}', '\u{487}', WC_Extend), ('\u{488}', '\u{489}', WC_Extend), ('\u{48a}', '\u{52f}', + WC_ALetter), ('\u{531}', '\u{556}', WC_ALetter), ('\u{559}', '\u{559}', WC_ALetter), + ('\u{561}', '\u{587}', WC_ALetter), ('\u{589}', '\u{589}', WC_MidNum), ('\u{591}', + '\u{5bd}', WC_Extend), ('\u{5bf}', '\u{5bf}', WC_Extend), ('\u{5c1}', '\u{5c2}', WC_Extend), + ('\u{5c4}', '\u{5c5}', WC_Extend), ('\u{5c7}', '\u{5c7}', WC_Extend), ('\u{5d0}', '\u{5ea}', + WC_Hebrew_Letter), ('\u{5f0}', '\u{5f2}', WC_Hebrew_Letter), ('\u{5f3}', '\u{5f3}', + WC_ALetter), ('\u{5f4}', '\u{5f4}', WC_MidLetter), ('\u{600}', '\u{605}', WC_Format), + ('\u{60c}', '\u{60d}', WC_MidNum), ('\u{610}', '\u{61a}', WC_Extend), ('\u{61c}', '\u{61c}', + WC_Format), ('\u{620}', '\u{63f}', WC_ALetter), ('\u{640}', '\u{640}', WC_ALetter), + ('\u{641}', '\u{64a}', WC_ALetter), ('\u{64b}', '\u{65f}', WC_Extend), ('\u{660}', + '\u{669}', WC_Numeric), ('\u{66b}', '\u{66b}', WC_Numeric), ('\u{66c}', '\u{66c}', + WC_MidNum), ('\u{66e}', '\u{66f}', WC_ALetter), ('\u{670}', '\u{670}', WC_Extend), + ('\u{671}', '\u{6d3}', WC_ALetter), ('\u{6d5}', '\u{6d5}', WC_ALetter), ('\u{6d6}', + '\u{6dc}', WC_Extend), ('\u{6dd}', '\u{6dd}', WC_Format), ('\u{6df}', '\u{6e4}', WC_Extend), + ('\u{6e5}', '\u{6e6}', WC_ALetter), ('\u{6e7}', '\u{6e8}', WC_Extend), ('\u{6ea}', + '\u{6ed}', WC_Extend), ('\u{6ee}', '\u{6ef}', WC_ALetter), ('\u{6f0}', '\u{6f9}', + WC_Numeric), ('\u{6fa}', '\u{6fc}', WC_ALetter), ('\u{6ff}', '\u{6ff}', WC_ALetter), + ('\u{70f}', '\u{70f}', WC_Format), ('\u{710}', '\u{710}', WC_ALetter), ('\u{711}', + '\u{711}', WC_Extend), ('\u{712}', '\u{72f}', WC_ALetter), ('\u{730}', '\u{74a}', + WC_Extend), ('\u{74d}', '\u{7a5}', WC_ALetter), ('\u{7a6}', '\u{7b0}', WC_Extend), + ('\u{7b1}', '\u{7b1}', WC_ALetter), ('\u{7c0}', '\u{7c9}', WC_Numeric), ('\u{7ca}', + '\u{7ea}', WC_ALetter), ('\u{7eb}', '\u{7f3}', WC_Extend), ('\u{7f4}', '\u{7f5}', + WC_ALetter), ('\u{7f8}', '\u{7f8}', WC_MidNum), ('\u{7fa}', '\u{7fa}', WC_ALetter), + ('\u{800}', '\u{815}', WC_ALetter), ('\u{816}', '\u{819}', WC_Extend), ('\u{81a}', + '\u{81a}', WC_ALetter), ('\u{81b}', '\u{823}', WC_Extend), ('\u{824}', '\u{824}', + WC_ALetter), ('\u{825}', '\u{827}', WC_Extend), ('\u{828}', '\u{828}', WC_ALetter), + ('\u{829}', '\u{82d}', WC_Extend), ('\u{840}', '\u{858}', WC_ALetter), ('\u{859}', + '\u{85b}', WC_Extend), ('\u{8a0}', '\u{8b2}', WC_ALetter), ('\u{8e4}', '\u{902}', + WC_Extend), ('\u{903}', '\u{903}', WC_Extend), ('\u{904}', '\u{939}', WC_ALetter), + ('\u{93a}', '\u{93a}', WC_Extend), ('\u{93b}', '\u{93b}', WC_Extend), ('\u{93c}', '\u{93c}', + WC_Extend), ('\u{93d}', '\u{93d}', WC_ALetter), ('\u{93e}', '\u{940}', WC_Extend), + ('\u{941}', '\u{948}', WC_Extend), ('\u{949}', '\u{94c}', WC_Extend), ('\u{94d}', '\u{94d}', + WC_Extend), ('\u{94e}', '\u{94f}', WC_Extend), ('\u{950}', '\u{950}', WC_ALetter), + ('\u{951}', '\u{957}', WC_Extend), ('\u{958}', '\u{961}', WC_ALetter), ('\u{962}', + '\u{963}', WC_Extend), ('\u{966}', '\u{96f}', WC_Numeric), ('\u{971}', '\u{971}', + WC_ALetter), ('\u{972}', '\u{980}', WC_ALetter), ('\u{981}', '\u{981}', WC_Extend), + ('\u{982}', '\u{983}', WC_Extend), ('\u{985}', '\u{98c}', WC_ALetter), ('\u{98f}', + '\u{990}', WC_ALetter), ('\u{993}', '\u{9a8}', WC_ALetter), ('\u{9aa}', '\u{9b0}', + WC_ALetter), ('\u{9b2}', '\u{9b2}', WC_ALetter), ('\u{9b6}', '\u{9b9}', WC_ALetter), + ('\u{9bc}', '\u{9bc}', WC_Extend), ('\u{9bd}', '\u{9bd}', WC_ALetter), ('\u{9be}', + '\u{9c0}', WC_Extend), ('\u{9c1}', '\u{9c4}', WC_Extend), ('\u{9c7}', '\u{9c8}', WC_Extend), + ('\u{9cb}', '\u{9cc}', WC_Extend), ('\u{9cd}', '\u{9cd}', WC_Extend), ('\u{9ce}', '\u{9ce}', + WC_ALetter), ('\u{9d7}', '\u{9d7}', WC_Extend), ('\u{9dc}', '\u{9dd}', WC_ALetter), + ('\u{9df}', '\u{9e1}', WC_ALetter), ('\u{9e2}', '\u{9e3}', WC_Extend), ('\u{9e6}', + '\u{9ef}', WC_Numeric), ('\u{9f0}', '\u{9f1}', WC_ALetter), ('\u{a01}', '\u{a02}', + WC_Extend), ('\u{a03}', '\u{a03}', WC_Extend), ('\u{a05}', '\u{a0a}', WC_ALetter), + ('\u{a0f}', '\u{a10}', WC_ALetter), ('\u{a13}', '\u{a28}', WC_ALetter), ('\u{a2a}', + '\u{a30}', WC_ALetter), ('\u{a32}', '\u{a33}', WC_ALetter), ('\u{a35}', '\u{a36}', + WC_ALetter), ('\u{a38}', '\u{a39}', WC_ALetter), ('\u{a3c}', '\u{a3c}', WC_Extend), + ('\u{a3e}', '\u{a40}', WC_Extend), ('\u{a41}', '\u{a42}', WC_Extend), ('\u{a47}', '\u{a48}', + WC_Extend), ('\u{a4b}', '\u{a4d}', WC_Extend), ('\u{a51}', '\u{a51}', WC_Extend), + ('\u{a59}', '\u{a5c}', WC_ALetter), ('\u{a5e}', '\u{a5e}', WC_ALetter), ('\u{a66}', + '\u{a6f}', WC_Numeric), ('\u{a70}', '\u{a71}', WC_Extend), ('\u{a72}', '\u{a74}', + WC_ALetter), ('\u{a75}', '\u{a75}', WC_Extend), ('\u{a81}', '\u{a82}', WC_Extend), + ('\u{a83}', '\u{a83}', WC_Extend), ('\u{a85}', '\u{a8d}', WC_ALetter), ('\u{a8f}', + '\u{a91}', WC_ALetter), ('\u{a93}', '\u{aa8}', WC_ALetter), ('\u{aaa}', '\u{ab0}', + WC_ALetter), ('\u{ab2}', '\u{ab3}', WC_ALetter), ('\u{ab5}', '\u{ab9}', WC_ALetter), + ('\u{abc}', '\u{abc}', WC_Extend), ('\u{abd}', '\u{abd}', WC_ALetter), ('\u{abe}', + '\u{ac0}', WC_Extend), ('\u{ac1}', '\u{ac5}', WC_Extend), ('\u{ac7}', '\u{ac8}', WC_Extend), + ('\u{ac9}', '\u{ac9}', WC_Extend), ('\u{acb}', '\u{acc}', WC_Extend), ('\u{acd}', '\u{acd}', + WC_Extend), ('\u{ad0}', '\u{ad0}', WC_ALetter), ('\u{ae0}', '\u{ae1}', WC_ALetter), + ('\u{ae2}', '\u{ae3}', WC_Extend), ('\u{ae6}', '\u{aef}', WC_Numeric), ('\u{b01}', + '\u{b01}', WC_Extend), ('\u{b02}', '\u{b03}', WC_Extend), ('\u{b05}', '\u{b0c}', + WC_ALetter), ('\u{b0f}', '\u{b10}', WC_ALetter), ('\u{b13}', '\u{b28}', WC_ALetter), + ('\u{b2a}', '\u{b30}', WC_ALetter), ('\u{b32}', '\u{b33}', WC_ALetter), ('\u{b35}', + '\u{b39}', WC_ALetter), ('\u{b3c}', '\u{b3c}', WC_Extend), ('\u{b3d}', '\u{b3d}', + WC_ALetter), ('\u{b3e}', '\u{b3e}', WC_Extend), ('\u{b3f}', '\u{b3f}', WC_Extend), + ('\u{b40}', '\u{b40}', WC_Extend), ('\u{b41}', '\u{b44}', WC_Extend), ('\u{b47}', '\u{b48}', + WC_Extend), ('\u{b4b}', '\u{b4c}', WC_Extend), ('\u{b4d}', '\u{b4d}', WC_Extend), + ('\u{b56}', '\u{b56}', WC_Extend), ('\u{b57}', '\u{b57}', WC_Extend), ('\u{b5c}', '\u{b5d}', + WC_ALetter), ('\u{b5f}', '\u{b61}', WC_ALetter), ('\u{b62}', '\u{b63}', WC_Extend), + ('\u{b66}', '\u{b6f}', WC_Numeric), ('\u{b71}', '\u{b71}', WC_ALetter), ('\u{b82}', + '\u{b82}', WC_Extend), ('\u{b83}', '\u{b83}', WC_ALetter), ('\u{b85}', '\u{b8a}', + WC_ALetter), ('\u{b8e}', '\u{b90}', WC_ALetter), ('\u{b92}', '\u{b95}', WC_ALetter), + ('\u{b99}', '\u{b9a}', WC_ALetter), ('\u{b9c}', '\u{b9c}', WC_ALetter), ('\u{b9e}', + '\u{b9f}', WC_ALetter), ('\u{ba3}', '\u{ba4}', WC_ALetter), ('\u{ba8}', '\u{baa}', + WC_ALetter), ('\u{bae}', '\u{bb9}', WC_ALetter), ('\u{bbe}', '\u{bbf}', WC_Extend), + ('\u{bc0}', '\u{bc0}', WC_Extend), ('\u{bc1}', '\u{bc2}', WC_Extend), ('\u{bc6}', '\u{bc8}', + WC_Extend), ('\u{bca}', '\u{bcc}', WC_Extend), ('\u{bcd}', '\u{bcd}', WC_Extend), + ('\u{bd0}', '\u{bd0}', WC_ALetter), ('\u{bd7}', '\u{bd7}', WC_Extend), ('\u{be6}', + '\u{bef}', WC_Numeric), ('\u{c00}', '\u{c00}', WC_Extend), ('\u{c01}', '\u{c03}', + WC_Extend), ('\u{c05}', '\u{c0c}', WC_ALetter), ('\u{c0e}', '\u{c10}', WC_ALetter), + ('\u{c12}', '\u{c28}', WC_ALetter), ('\u{c2a}', '\u{c39}', WC_ALetter), ('\u{c3d}', + '\u{c3d}', WC_ALetter), ('\u{c3e}', '\u{c40}', WC_Extend), ('\u{c41}', '\u{c44}', + WC_Extend), ('\u{c46}', '\u{c48}', WC_Extend), ('\u{c4a}', '\u{c4d}', WC_Extend), + ('\u{c55}', '\u{c56}', WC_Extend), ('\u{c58}', '\u{c59}', WC_ALetter), ('\u{c60}', + '\u{c61}', WC_ALetter), ('\u{c62}', '\u{c63}', WC_Extend), ('\u{c66}', '\u{c6f}', + WC_Numeric), ('\u{c81}', '\u{c81}', WC_Extend), ('\u{c82}', '\u{c83}', WC_Extend), + ('\u{c85}', '\u{c8c}', WC_ALetter), ('\u{c8e}', '\u{c90}', WC_ALetter), ('\u{c92}', + '\u{ca8}', WC_ALetter), ('\u{caa}', '\u{cb3}', WC_ALetter), ('\u{cb5}', '\u{cb9}', + WC_ALetter), ('\u{cbc}', '\u{cbc}', WC_Extend), ('\u{cbd}', '\u{cbd}', WC_ALetter), + ('\u{cbe}', '\u{cbe}', WC_Extend), ('\u{cbf}', '\u{cbf}', WC_Extend), ('\u{cc0}', '\u{cc4}', + WC_Extend), ('\u{cc6}', '\u{cc6}', WC_Extend), ('\u{cc7}', '\u{cc8}', WC_Extend), + ('\u{cca}', '\u{ccb}', WC_Extend), ('\u{ccc}', '\u{ccd}', WC_Extend), ('\u{cd5}', '\u{cd6}', + WC_Extend), ('\u{cde}', '\u{cde}', WC_ALetter), ('\u{ce0}', '\u{ce1}', WC_ALetter), + ('\u{ce2}', '\u{ce3}', WC_Extend), ('\u{ce6}', '\u{cef}', WC_Numeric), ('\u{cf1}', + '\u{cf2}', WC_ALetter), ('\u{d01}', '\u{d01}', WC_Extend), ('\u{d02}', '\u{d03}', + WC_Extend), ('\u{d05}', '\u{d0c}', WC_ALetter), ('\u{d0e}', '\u{d10}', WC_ALetter), + ('\u{d12}', '\u{d3a}', WC_ALetter), ('\u{d3d}', '\u{d3d}', WC_ALetter), ('\u{d3e}', + '\u{d40}', WC_Extend), ('\u{d41}', '\u{d44}', WC_Extend), ('\u{d46}', '\u{d48}', WC_Extend), + ('\u{d4a}', '\u{d4c}', WC_Extend), ('\u{d4d}', '\u{d4d}', WC_Extend), ('\u{d4e}', '\u{d4e}', + WC_ALetter), ('\u{d57}', '\u{d57}', WC_Extend), ('\u{d60}', '\u{d61}', WC_ALetter), + ('\u{d62}', '\u{d63}', WC_Extend), ('\u{d66}', '\u{d6f}', WC_Numeric), ('\u{d7a}', + '\u{d7f}', WC_ALetter), ('\u{d82}', '\u{d83}', WC_Extend), ('\u{d85}', '\u{d96}', + WC_ALetter), ('\u{d9a}', '\u{db1}', WC_ALetter), ('\u{db3}', '\u{dbb}', WC_ALetter), + ('\u{dbd}', '\u{dbd}', WC_ALetter), ('\u{dc0}', '\u{dc6}', WC_ALetter), ('\u{dca}', + '\u{dca}', WC_Extend), ('\u{dcf}', '\u{dd1}', WC_Extend), ('\u{dd2}', '\u{dd4}', WC_Extend), + ('\u{dd6}', '\u{dd6}', WC_Extend), ('\u{dd8}', '\u{ddf}', WC_Extend), ('\u{de6}', '\u{def}', + WC_Numeric), ('\u{df2}', '\u{df3}', WC_Extend), ('\u{e31}', '\u{e31}', WC_Extend), + ('\u{e34}', '\u{e3a}', WC_Extend), ('\u{e47}', '\u{e4e}', WC_Extend), ('\u{e50}', '\u{e59}', + WC_Numeric), ('\u{eb1}', '\u{eb1}', WC_Extend), ('\u{eb4}', '\u{eb9}', WC_Extend), + ('\u{ebb}', '\u{ebc}', WC_Extend), ('\u{ec8}', '\u{ecd}', WC_Extend), ('\u{ed0}', '\u{ed9}', + WC_Numeric), ('\u{f00}', '\u{f00}', WC_ALetter), ('\u{f18}', '\u{f19}', WC_Extend), + ('\u{f20}', '\u{f29}', WC_Numeric), ('\u{f35}', '\u{f35}', WC_Extend), ('\u{f37}', + '\u{f37}', WC_Extend), ('\u{f39}', '\u{f39}', WC_Extend), ('\u{f3e}', '\u{f3f}', WC_Extend), + ('\u{f40}', '\u{f47}', WC_ALetter), ('\u{f49}', '\u{f6c}', WC_ALetter), ('\u{f71}', + '\u{f7e}', WC_Extend), ('\u{f7f}', '\u{f7f}', WC_Extend), ('\u{f80}', '\u{f84}', WC_Extend), + ('\u{f86}', '\u{f87}', WC_Extend), ('\u{f88}', '\u{f8c}', WC_ALetter), ('\u{f8d}', + '\u{f97}', WC_Extend), ('\u{f99}', '\u{fbc}', WC_Extend), ('\u{fc6}', '\u{fc6}', WC_Extend), + ('\u{102b}', '\u{102c}', WC_Extend), ('\u{102d}', '\u{1030}', WC_Extend), ('\u{1031}', + '\u{1031}', WC_Extend), ('\u{1032}', '\u{1037}', WC_Extend), ('\u{1038}', '\u{1038}', + WC_Extend), ('\u{1039}', '\u{103a}', WC_Extend), ('\u{103b}', '\u{103c}', WC_Extend), + ('\u{103d}', '\u{103e}', WC_Extend), ('\u{1040}', '\u{1049}', WC_Numeric), ('\u{1056}', + '\u{1057}', WC_Extend), ('\u{1058}', '\u{1059}', WC_Extend), ('\u{105e}', '\u{1060}', + WC_Extend), ('\u{1062}', '\u{1064}', WC_Extend), ('\u{1067}', '\u{106d}', WC_Extend), + ('\u{1071}', '\u{1074}', WC_Extend), ('\u{1082}', '\u{1082}', WC_Extend), ('\u{1083}', + '\u{1084}', WC_Extend), ('\u{1085}', '\u{1086}', WC_Extend), ('\u{1087}', '\u{108c}', + WC_Extend), ('\u{108d}', '\u{108d}', WC_Extend), ('\u{108f}', '\u{108f}', WC_Extend), + ('\u{1090}', '\u{1099}', WC_Numeric), ('\u{109a}', '\u{109c}', WC_Extend), ('\u{109d}', + '\u{109d}', WC_Extend), ('\u{10a0}', '\u{10c5}', WC_ALetter), ('\u{10c7}', '\u{10c7}', + WC_ALetter), ('\u{10cd}', '\u{10cd}', WC_ALetter), ('\u{10d0}', '\u{10fa}', WC_ALetter), + ('\u{10fc}', '\u{10fc}', WC_ALetter), ('\u{10fd}', '\u{1248}', WC_ALetter), ('\u{124a}', + '\u{124d}', WC_ALetter), ('\u{1250}', '\u{1256}', WC_ALetter), ('\u{1258}', '\u{1258}', + WC_ALetter), ('\u{125a}', '\u{125d}', WC_ALetter), ('\u{1260}', '\u{1288}', WC_ALetter), + ('\u{128a}', '\u{128d}', WC_ALetter), ('\u{1290}', '\u{12b0}', WC_ALetter), ('\u{12b2}', + '\u{12b5}', WC_ALetter), ('\u{12b8}', '\u{12be}', WC_ALetter), ('\u{12c0}', '\u{12c0}', + WC_ALetter), ('\u{12c2}', '\u{12c5}', WC_ALetter), ('\u{12c8}', '\u{12d6}', WC_ALetter), + ('\u{12d8}', '\u{1310}', WC_ALetter), ('\u{1312}', '\u{1315}', WC_ALetter), ('\u{1318}', + '\u{135a}', WC_ALetter), ('\u{135d}', '\u{135f}', WC_Extend), ('\u{1380}', '\u{138f}', + WC_ALetter), ('\u{13a0}', '\u{13f4}', WC_ALetter), ('\u{1401}', '\u{166c}', WC_ALetter), + ('\u{166f}', '\u{167f}', WC_ALetter), ('\u{1681}', '\u{169a}', WC_ALetter), ('\u{16a0}', + '\u{16ea}', WC_ALetter), ('\u{16ee}', '\u{16f0}', WC_ALetter), ('\u{16f1}', '\u{16f8}', + WC_ALetter), ('\u{1700}', '\u{170c}', WC_ALetter), ('\u{170e}', '\u{1711}', WC_ALetter), + ('\u{1712}', '\u{1714}', WC_Extend), ('\u{1720}', '\u{1731}', WC_ALetter), ('\u{1732}', + '\u{1734}', WC_Extend), ('\u{1740}', '\u{1751}', WC_ALetter), ('\u{1752}', '\u{1753}', + WC_Extend), ('\u{1760}', '\u{176c}', WC_ALetter), ('\u{176e}', '\u{1770}', WC_ALetter), + ('\u{1772}', '\u{1773}', WC_Extend), ('\u{17b4}', '\u{17b5}', WC_Extend), ('\u{17b6}', + '\u{17b6}', WC_Extend), ('\u{17b7}', '\u{17bd}', WC_Extend), ('\u{17be}', '\u{17c5}', + WC_Extend), ('\u{17c6}', '\u{17c6}', WC_Extend), ('\u{17c7}', '\u{17c8}', WC_Extend), + ('\u{17c9}', '\u{17d3}', WC_Extend), ('\u{17dd}', '\u{17dd}', WC_Extend), ('\u{17e0}', + '\u{17e9}', WC_Numeric), ('\u{180b}', '\u{180d}', WC_Extend), ('\u{180e}', '\u{180e}', + WC_Format), ('\u{1810}', '\u{1819}', WC_Numeric), ('\u{1820}', '\u{1842}', WC_ALetter), + ('\u{1843}', '\u{1843}', WC_ALetter), ('\u{1844}', '\u{1877}', WC_ALetter), ('\u{1880}', + '\u{18a8}', WC_ALetter), ('\u{18a9}', '\u{18a9}', WC_Extend), ('\u{18aa}', '\u{18aa}', + WC_ALetter), ('\u{18b0}', '\u{18f5}', WC_ALetter), ('\u{1900}', '\u{191e}', WC_ALetter), + ('\u{1920}', '\u{1922}', WC_Extend), ('\u{1923}', '\u{1926}', WC_Extend), ('\u{1927}', + '\u{1928}', WC_Extend), ('\u{1929}', '\u{192b}', WC_Extend), ('\u{1930}', '\u{1931}', + WC_Extend), ('\u{1932}', '\u{1932}', WC_Extend), ('\u{1933}', '\u{1938}', WC_Extend), + ('\u{1939}', '\u{193b}', WC_Extend), ('\u{1946}', '\u{194f}', WC_Numeric), ('\u{19b0}', + '\u{19c0}', WC_Extend), ('\u{19c8}', '\u{19c9}', WC_Extend), ('\u{19d0}', '\u{19d9}', + WC_Numeric), ('\u{1a00}', '\u{1a16}', WC_ALetter), ('\u{1a17}', '\u{1a18}', WC_Extend), + ('\u{1a19}', '\u{1a1a}', WC_Extend), ('\u{1a1b}', '\u{1a1b}', WC_Extend), ('\u{1a55}', + '\u{1a55}', WC_Extend), ('\u{1a56}', '\u{1a56}', WC_Extend), ('\u{1a57}', '\u{1a57}', + WC_Extend), ('\u{1a58}', '\u{1a5e}', WC_Extend), ('\u{1a60}', '\u{1a60}', WC_Extend), + ('\u{1a61}', '\u{1a61}', WC_Extend), ('\u{1a62}', '\u{1a62}', WC_Extend), ('\u{1a63}', + '\u{1a64}', WC_Extend), ('\u{1a65}', '\u{1a6c}', WC_Extend), ('\u{1a6d}', '\u{1a72}', + WC_Extend), ('\u{1a73}', '\u{1a7c}', WC_Extend), ('\u{1a7f}', '\u{1a7f}', WC_Extend), + ('\u{1a80}', '\u{1a89}', WC_Numeric), ('\u{1a90}', '\u{1a99}', WC_Numeric), ('\u{1ab0}', + '\u{1abd}', WC_Extend), ('\u{1abe}', '\u{1abe}', WC_Extend), ('\u{1b00}', '\u{1b03}', + WC_Extend), ('\u{1b04}', '\u{1b04}', WC_Extend), ('\u{1b05}', '\u{1b33}', WC_ALetter), + ('\u{1b34}', '\u{1b34}', WC_Extend), ('\u{1b35}', '\u{1b35}', WC_Extend), ('\u{1b36}', + '\u{1b3a}', WC_Extend), ('\u{1b3b}', '\u{1b3b}', WC_Extend), ('\u{1b3c}', '\u{1b3c}', + WC_Extend), ('\u{1b3d}', '\u{1b41}', WC_Extend), ('\u{1b42}', '\u{1b42}', WC_Extend), + ('\u{1b43}', '\u{1b44}', WC_Extend), ('\u{1b45}', '\u{1b4b}', WC_ALetter), ('\u{1b50}', + '\u{1b59}', WC_Numeric), ('\u{1b6b}', '\u{1b73}', WC_Extend), ('\u{1b80}', '\u{1b81}', + WC_Extend), ('\u{1b82}', '\u{1b82}', WC_Extend), ('\u{1b83}', '\u{1ba0}', WC_ALetter), + ('\u{1ba1}', '\u{1ba1}', WC_Extend), ('\u{1ba2}', '\u{1ba5}', WC_Extend), ('\u{1ba6}', + '\u{1ba7}', WC_Extend), ('\u{1ba8}', '\u{1ba9}', WC_Extend), ('\u{1baa}', '\u{1baa}', + WC_Extend), ('\u{1bab}', '\u{1bad}', WC_Extend), ('\u{1bae}', '\u{1baf}', WC_ALetter), + ('\u{1bb0}', '\u{1bb9}', WC_Numeric), ('\u{1bba}', '\u{1be5}', WC_ALetter), ('\u{1be6}', + '\u{1be6}', WC_Extend), ('\u{1be7}', '\u{1be7}', WC_Extend), ('\u{1be8}', '\u{1be9}', + WC_Extend), ('\u{1bea}', '\u{1bec}', WC_Extend), ('\u{1bed}', '\u{1bed}', WC_Extend), + ('\u{1bee}', '\u{1bee}', WC_Extend), ('\u{1bef}', '\u{1bf1}', WC_Extend), ('\u{1bf2}', + '\u{1bf3}', WC_Extend), ('\u{1c00}', '\u{1c23}', WC_ALetter), ('\u{1c24}', '\u{1c2b}', + WC_Extend), ('\u{1c2c}', '\u{1c33}', WC_Extend), ('\u{1c34}', '\u{1c35}', WC_Extend), + ('\u{1c36}', '\u{1c37}', WC_Extend), ('\u{1c40}', '\u{1c49}', WC_Numeric), ('\u{1c4d}', + '\u{1c4f}', WC_ALetter), ('\u{1c50}', '\u{1c59}', WC_Numeric), ('\u{1c5a}', '\u{1c77}', + WC_ALetter), ('\u{1c78}', '\u{1c7d}', WC_ALetter), ('\u{1cd0}', '\u{1cd2}', WC_Extend), + ('\u{1cd4}', '\u{1ce0}', WC_Extend), ('\u{1ce1}', '\u{1ce1}', WC_Extend), ('\u{1ce2}', + '\u{1ce8}', WC_Extend), ('\u{1ce9}', '\u{1cec}', WC_ALetter), ('\u{1ced}', '\u{1ced}', + WC_Extend), ('\u{1cee}', '\u{1cf1}', WC_ALetter), ('\u{1cf2}', '\u{1cf3}', WC_Extend), + ('\u{1cf4}', '\u{1cf4}', WC_Extend), ('\u{1cf5}', '\u{1cf6}', WC_ALetter), ('\u{1cf8}', + '\u{1cf9}', WC_Extend), ('\u{1d00}', '\u{1d2b}', WC_ALetter), ('\u{1d2c}', '\u{1d6a}', + WC_ALetter), ('\u{1d6b}', '\u{1d77}', WC_ALetter), ('\u{1d78}', '\u{1d78}', WC_ALetter), + ('\u{1d79}', '\u{1d9a}', WC_ALetter), ('\u{1d9b}', '\u{1dbf}', WC_ALetter), ('\u{1dc0}', + '\u{1df5}', WC_Extend), ('\u{1dfc}', '\u{1dff}', WC_Extend), ('\u{1e00}', '\u{1f15}', + WC_ALetter), ('\u{1f18}', '\u{1f1d}', WC_ALetter), ('\u{1f20}', '\u{1f45}', WC_ALetter), + ('\u{1f48}', '\u{1f4d}', WC_ALetter), ('\u{1f50}', '\u{1f57}', WC_ALetter), ('\u{1f59}', + '\u{1f59}', WC_ALetter), ('\u{1f5b}', '\u{1f5b}', WC_ALetter), ('\u{1f5d}', '\u{1f5d}', + WC_ALetter), ('\u{1f5f}', '\u{1f7d}', WC_ALetter), ('\u{1f80}', '\u{1fb4}', WC_ALetter), + ('\u{1fb6}', '\u{1fbc}', WC_ALetter), ('\u{1fbe}', '\u{1fbe}', WC_ALetter), ('\u{1fc2}', + '\u{1fc4}', WC_ALetter), ('\u{1fc6}', '\u{1fcc}', WC_ALetter), ('\u{1fd0}', '\u{1fd3}', + WC_ALetter), ('\u{1fd6}', '\u{1fdb}', WC_ALetter), ('\u{1fe0}', '\u{1fec}', WC_ALetter), + ('\u{1ff2}', '\u{1ff4}', WC_ALetter), ('\u{1ff6}', '\u{1ffc}', WC_ALetter), ('\u{200c}', + '\u{200d}', WC_Extend), ('\u{200e}', '\u{200f}', WC_Format), ('\u{2018}', '\u{2018}', + WC_MidNumLet), ('\u{2019}', '\u{2019}', WC_MidNumLet), ('\u{2024}', '\u{2024}', + WC_MidNumLet), ('\u{2027}', '\u{2027}', WC_MidLetter), ('\u{2028}', '\u{2028}', WC_Newline), + ('\u{2029}', '\u{2029}', WC_Newline), ('\u{202a}', '\u{202e}', WC_Format), ('\u{203f}', + '\u{2040}', WC_ExtendNumLet), ('\u{2044}', '\u{2044}', WC_MidNum), ('\u{2054}', '\u{2054}', + WC_ExtendNumLet), ('\u{2060}', '\u{2064}', WC_Format), ('\u{2066}', '\u{206f}', WC_Format), + ('\u{2071}', '\u{2071}', WC_ALetter), ('\u{207f}', '\u{207f}', WC_ALetter), ('\u{2090}', + '\u{209c}', WC_ALetter), ('\u{20d0}', '\u{20dc}', WC_Extend), ('\u{20dd}', '\u{20e0}', + WC_Extend), ('\u{20e1}', '\u{20e1}', WC_Extend), ('\u{20e2}', '\u{20e4}', WC_Extend), + ('\u{20e5}', '\u{20f0}', WC_Extend), ('\u{2102}', '\u{2102}', WC_ALetter), ('\u{2107}', + '\u{2107}', WC_ALetter), ('\u{210a}', '\u{2113}', WC_ALetter), ('\u{2115}', '\u{2115}', + WC_ALetter), ('\u{2119}', '\u{211d}', WC_ALetter), ('\u{2124}', '\u{2124}', WC_ALetter), + ('\u{2126}', '\u{2126}', WC_ALetter), ('\u{2128}', '\u{2128}', WC_ALetter), ('\u{212a}', + '\u{212d}', WC_ALetter), ('\u{212f}', '\u{2134}', WC_ALetter), ('\u{2135}', '\u{2138}', + WC_ALetter), ('\u{2139}', '\u{2139}', WC_ALetter), ('\u{213c}', '\u{213f}', WC_ALetter), + ('\u{2145}', '\u{2149}', WC_ALetter), ('\u{214e}', '\u{214e}', WC_ALetter), ('\u{2160}', + '\u{2182}', WC_ALetter), ('\u{2183}', '\u{2184}', WC_ALetter), ('\u{2185}', '\u{2188}', + WC_ALetter), ('\u{24b6}', '\u{24e9}', WC_ALetter), ('\u{2c00}', '\u{2c2e}', WC_ALetter), + ('\u{2c30}', '\u{2c5e}', WC_ALetter), ('\u{2c60}', '\u{2c7b}', WC_ALetter), ('\u{2c7c}', + '\u{2c7d}', WC_ALetter), ('\u{2c7e}', '\u{2ce4}', WC_ALetter), ('\u{2ceb}', '\u{2cee}', + WC_ALetter), ('\u{2cef}', '\u{2cf1}', WC_Extend), ('\u{2cf2}', '\u{2cf3}', WC_ALetter), + ('\u{2d00}', '\u{2d25}', WC_ALetter), ('\u{2d27}', '\u{2d27}', WC_ALetter), ('\u{2d2d}', + '\u{2d2d}', WC_ALetter), ('\u{2d30}', '\u{2d67}', WC_ALetter), ('\u{2d6f}', '\u{2d6f}', + WC_ALetter), ('\u{2d7f}', '\u{2d7f}', WC_Extend), ('\u{2d80}', '\u{2d96}', WC_ALetter), + ('\u{2da0}', '\u{2da6}', WC_ALetter), ('\u{2da8}', '\u{2dae}', WC_ALetter), ('\u{2db0}', + '\u{2db6}', WC_ALetter), ('\u{2db8}', '\u{2dbe}', WC_ALetter), ('\u{2dc0}', '\u{2dc6}', + WC_ALetter), ('\u{2dc8}', '\u{2dce}', WC_ALetter), ('\u{2dd0}', '\u{2dd6}', WC_ALetter), + ('\u{2dd8}', '\u{2dde}', WC_ALetter), ('\u{2de0}', '\u{2dff}', WC_Extend), ('\u{2e2f}', + '\u{2e2f}', WC_ALetter), ('\u{3005}', '\u{3005}', WC_ALetter), ('\u{302a}', '\u{302d}', + WC_Extend), ('\u{302e}', '\u{302f}', WC_Extend), ('\u{3031}', '\u{3035}', WC_Katakana), + ('\u{303b}', '\u{303b}', WC_ALetter), ('\u{303c}', '\u{303c}', WC_ALetter), ('\u{3099}', + '\u{309a}', WC_Extend), ('\u{309b}', '\u{309c}', WC_Katakana), ('\u{30a0}', '\u{30a0}', + WC_Katakana), ('\u{30a1}', '\u{30fa}', WC_Katakana), ('\u{30fc}', '\u{30fe}', WC_Katakana), + ('\u{30ff}', '\u{30ff}', WC_Katakana), ('\u{3105}', '\u{312d}', WC_ALetter), ('\u{3131}', + '\u{318e}', WC_ALetter), ('\u{31a0}', '\u{31ba}', WC_ALetter), ('\u{31f0}', '\u{31ff}', + WC_Katakana), ('\u{32d0}', '\u{32fe}', WC_Katakana), ('\u{3300}', '\u{3357}', WC_Katakana), + ('\u{a000}', '\u{a014}', WC_ALetter), ('\u{a015}', '\u{a015}', WC_ALetter), ('\u{a016}', + '\u{a48c}', WC_ALetter), ('\u{a4d0}', '\u{a4f7}', WC_ALetter), ('\u{a4f8}', '\u{a4fd}', + WC_ALetter), ('\u{a500}', '\u{a60b}', WC_ALetter), ('\u{a60c}', '\u{a60c}', WC_ALetter), + ('\u{a610}', '\u{a61f}', WC_ALetter), ('\u{a620}', '\u{a629}', WC_Numeric), ('\u{a62a}', + '\u{a62b}', WC_ALetter), ('\u{a640}', '\u{a66d}', WC_ALetter), ('\u{a66e}', '\u{a66e}', + WC_ALetter), ('\u{a66f}', '\u{a66f}', WC_Extend), ('\u{a670}', '\u{a672}', WC_Extend), + ('\u{a674}', '\u{a67d}', WC_Extend), ('\u{a67f}', '\u{a67f}', WC_ALetter), ('\u{a680}', + '\u{a69b}', WC_ALetter), ('\u{a69c}', '\u{a69d}', WC_ALetter), ('\u{a69f}', '\u{a69f}', + WC_Extend), ('\u{a6a0}', '\u{a6e5}', WC_ALetter), ('\u{a6e6}', '\u{a6ef}', WC_ALetter), + ('\u{a6f0}', '\u{a6f1}', WC_Extend), ('\u{a717}', '\u{a71f}', WC_ALetter), ('\u{a722}', + '\u{a76f}', WC_ALetter), ('\u{a770}', '\u{a770}', WC_ALetter), ('\u{a771}', '\u{a787}', + WC_ALetter), ('\u{a788}', '\u{a788}', WC_ALetter), ('\u{a78b}', '\u{a78e}', WC_ALetter), + ('\u{a790}', '\u{a7ad}', WC_ALetter), ('\u{a7b0}', '\u{a7b1}', WC_ALetter), ('\u{a7f7}', + '\u{a7f7}', WC_ALetter), ('\u{a7f8}', '\u{a7f9}', WC_ALetter), ('\u{a7fa}', '\u{a7fa}', + WC_ALetter), ('\u{a7fb}', '\u{a801}', WC_ALetter), ('\u{a802}', '\u{a802}', WC_Extend), + ('\u{a803}', '\u{a805}', WC_ALetter), ('\u{a806}', '\u{a806}', WC_Extend), ('\u{a807}', + '\u{a80a}', WC_ALetter), ('\u{a80b}', '\u{a80b}', WC_Extend), ('\u{a80c}', '\u{a822}', + WC_ALetter), ('\u{a823}', '\u{a824}', WC_Extend), ('\u{a825}', '\u{a826}', WC_Extend), + ('\u{a827}', '\u{a827}', WC_Extend), ('\u{a840}', '\u{a873}', WC_ALetter), ('\u{a880}', + '\u{a881}', WC_Extend), ('\u{a882}', '\u{a8b3}', WC_ALetter), ('\u{a8b4}', '\u{a8c3}', + WC_Extend), ('\u{a8c4}', '\u{a8c4}', WC_Extend), ('\u{a8d0}', '\u{a8d9}', WC_Numeric), + ('\u{a8e0}', '\u{a8f1}', WC_Extend), ('\u{a8f2}', '\u{a8f7}', WC_ALetter), ('\u{a8fb}', + '\u{a8fb}', WC_ALetter), ('\u{a900}', '\u{a909}', WC_Numeric), ('\u{a90a}', '\u{a925}', + WC_ALetter), ('\u{a926}', '\u{a92d}', WC_Extend), ('\u{a930}', '\u{a946}', WC_ALetter), + ('\u{a947}', '\u{a951}', WC_Extend), ('\u{a952}', '\u{a953}', WC_Extend), ('\u{a960}', + '\u{a97c}', WC_ALetter), ('\u{a980}', '\u{a982}', WC_Extend), ('\u{a983}', '\u{a983}', + WC_Extend), ('\u{a984}', '\u{a9b2}', WC_ALetter), ('\u{a9b3}', '\u{a9b3}', WC_Extend), + ('\u{a9b4}', '\u{a9b5}', WC_Extend), ('\u{a9b6}', '\u{a9b9}', WC_Extend), ('\u{a9ba}', + '\u{a9bb}', WC_Extend), ('\u{a9bc}', '\u{a9bc}', WC_Extend), ('\u{a9bd}', '\u{a9c0}', + WC_Extend), ('\u{a9cf}', '\u{a9cf}', WC_ALetter), ('\u{a9d0}', '\u{a9d9}', WC_Numeric), + ('\u{a9e5}', '\u{a9e5}', WC_Extend), ('\u{a9f0}', '\u{a9f9}', WC_Numeric), ('\u{aa00}', + '\u{aa28}', WC_ALetter), ('\u{aa29}', '\u{aa2e}', WC_Extend), ('\u{aa2f}', '\u{aa30}', + WC_Extend), ('\u{aa31}', '\u{aa32}', WC_Extend), ('\u{aa33}', '\u{aa34}', WC_Extend), + ('\u{aa35}', '\u{aa36}', WC_Extend), ('\u{aa40}', '\u{aa42}', WC_ALetter), ('\u{aa43}', + '\u{aa43}', WC_Extend), ('\u{aa44}', '\u{aa4b}', WC_ALetter), ('\u{aa4c}', '\u{aa4c}', + WC_Extend), ('\u{aa4d}', '\u{aa4d}', WC_Extend), ('\u{aa50}', '\u{aa59}', WC_Numeric), + ('\u{aa7b}', '\u{aa7b}', WC_Extend), ('\u{aa7c}', '\u{aa7c}', WC_Extend), ('\u{aa7d}', + '\u{aa7d}', WC_Extend), ('\u{aab0}', '\u{aab0}', WC_Extend), ('\u{aab2}', '\u{aab4}', + WC_Extend), ('\u{aab7}', '\u{aab8}', WC_Extend), ('\u{aabe}', '\u{aabf}', WC_Extend), + ('\u{aac1}', '\u{aac1}', WC_Extend), ('\u{aae0}', '\u{aaea}', WC_ALetter), ('\u{aaeb}', + '\u{aaeb}', WC_Extend), ('\u{aaec}', '\u{aaed}', WC_Extend), ('\u{aaee}', '\u{aaef}', + WC_Extend), ('\u{aaf2}', '\u{aaf2}', WC_ALetter), ('\u{aaf3}', '\u{aaf4}', WC_ALetter), + ('\u{aaf5}', '\u{aaf5}', WC_Extend), ('\u{aaf6}', '\u{aaf6}', WC_Extend), ('\u{ab01}', + '\u{ab06}', WC_ALetter), ('\u{ab09}', '\u{ab0e}', WC_ALetter), ('\u{ab11}', '\u{ab16}', + WC_ALetter), ('\u{ab20}', '\u{ab26}', WC_ALetter), ('\u{ab28}', '\u{ab2e}', WC_ALetter), + ('\u{ab30}', '\u{ab5a}', WC_ALetter), ('\u{ab5c}', '\u{ab5f}', WC_ALetter), ('\u{ab64}', + '\u{ab65}', WC_ALetter), ('\u{abc0}', '\u{abe2}', WC_ALetter), ('\u{abe3}', '\u{abe4}', + WC_Extend), ('\u{abe5}', '\u{abe5}', WC_Extend), ('\u{abe6}', '\u{abe7}', WC_Extend), + ('\u{abe8}', '\u{abe8}', WC_Extend), ('\u{abe9}', '\u{abea}', WC_Extend), ('\u{abec}', + '\u{abec}', WC_Extend), ('\u{abed}', '\u{abed}', WC_Extend), ('\u{abf0}', '\u{abf9}', + WC_Numeric), ('\u{ac00}', '\u{d7a3}', WC_ALetter), ('\u{d7b0}', '\u{d7c6}', WC_ALetter), + ('\u{d7cb}', '\u{d7fb}', WC_ALetter), ('\u{fb00}', '\u{fb06}', WC_ALetter), ('\u{fb13}', + '\u{fb17}', WC_ALetter), ('\u{fb1d}', '\u{fb1d}', WC_Hebrew_Letter), ('\u{fb1e}', + '\u{fb1e}', WC_Extend), ('\u{fb1f}', '\u{fb28}', WC_Hebrew_Letter), ('\u{fb2a}', '\u{fb36}', + WC_Hebrew_Letter), ('\u{fb38}', '\u{fb3c}', WC_Hebrew_Letter), ('\u{fb3e}', '\u{fb3e}', + WC_Hebrew_Letter), ('\u{fb40}', '\u{fb41}', WC_Hebrew_Letter), ('\u{fb43}', '\u{fb44}', + WC_Hebrew_Letter), ('\u{fb46}', '\u{fb4f}', WC_Hebrew_Letter), ('\u{fb50}', '\u{fbb1}', + WC_ALetter), ('\u{fbd3}', '\u{fd3d}', WC_ALetter), ('\u{fd50}', '\u{fd8f}', WC_ALetter), + ('\u{fd92}', '\u{fdc7}', WC_ALetter), ('\u{fdf0}', '\u{fdfb}', WC_ALetter), ('\u{fe00}', + '\u{fe0f}', WC_Extend), ('\u{fe10}', '\u{fe10}', WC_MidNum), ('\u{fe13}', '\u{fe13}', + WC_MidLetter), ('\u{fe14}', '\u{fe14}', WC_MidNum), ('\u{fe20}', '\u{fe2d}', WC_Extend), + ('\u{fe33}', '\u{fe34}', WC_ExtendNumLet), ('\u{fe4d}', '\u{fe4f}', WC_ExtendNumLet), + ('\u{fe50}', '\u{fe50}', WC_MidNum), ('\u{fe52}', '\u{fe52}', WC_MidNumLet), ('\u{fe54}', + '\u{fe54}', WC_MidNum), ('\u{fe55}', '\u{fe55}', WC_MidLetter), ('\u{fe70}', '\u{fe74}', + WC_ALetter), ('\u{fe76}', '\u{fefc}', WC_ALetter), ('\u{feff}', '\u{feff}', WC_Format), + ('\u{ff07}', '\u{ff07}', WC_MidNumLet), ('\u{ff0c}', '\u{ff0c}', WC_MidNum), ('\u{ff0e}', + '\u{ff0e}', WC_MidNumLet), ('\u{ff1a}', '\u{ff1a}', WC_MidLetter), ('\u{ff1b}', '\u{ff1b}', + WC_MidNum), ('\u{ff21}', '\u{ff3a}', WC_ALetter), ('\u{ff3f}', '\u{ff3f}', WC_ExtendNumLet), + ('\u{ff41}', '\u{ff5a}', WC_ALetter), ('\u{ff66}', '\u{ff6f}', WC_Katakana), ('\u{ff70}', + '\u{ff70}', WC_Katakana), ('\u{ff71}', '\u{ff9d}', WC_Katakana), ('\u{ff9e}', '\u{ff9f}', + WC_Extend), ('\u{ffa0}', '\u{ffbe}', WC_ALetter), ('\u{ffc2}', '\u{ffc7}', WC_ALetter), + ('\u{ffca}', '\u{ffcf}', WC_ALetter), ('\u{ffd2}', '\u{ffd7}', WC_ALetter), ('\u{ffda}', + '\u{ffdc}', WC_ALetter), ('\u{fff9}', '\u{fffb}', WC_Format), ('\u{10000}', '\u{1000b}', + WC_ALetter), ('\u{1000d}', '\u{10026}', WC_ALetter), ('\u{10028}', '\u{1003a}', WC_ALetter), + ('\u{1003c}', '\u{1003d}', WC_ALetter), ('\u{1003f}', '\u{1004d}', WC_ALetter), + ('\u{10050}', '\u{1005d}', WC_ALetter), ('\u{10080}', '\u{100fa}', WC_ALetter), + ('\u{10140}', '\u{10174}', WC_ALetter), ('\u{101fd}', '\u{101fd}', WC_Extend), ('\u{10280}', + '\u{1029c}', WC_ALetter), ('\u{102a0}', '\u{102d0}', WC_ALetter), ('\u{102e0}', '\u{102e0}', + WC_Extend), ('\u{10300}', '\u{1031f}', WC_ALetter), ('\u{10330}', '\u{10340}', WC_ALetter), + ('\u{10341}', '\u{10341}', WC_ALetter), ('\u{10342}', '\u{10349}', WC_ALetter), + ('\u{1034a}', '\u{1034a}', WC_ALetter), ('\u{10350}', '\u{10375}', WC_ALetter), + ('\u{10376}', '\u{1037a}', WC_Extend), ('\u{10380}', '\u{1039d}', WC_ALetter), ('\u{103a0}', + '\u{103c3}', WC_ALetter), ('\u{103c8}', '\u{103cf}', WC_ALetter), ('\u{103d1}', '\u{103d5}', + WC_ALetter), ('\u{10400}', '\u{1044f}', WC_ALetter), ('\u{10450}', '\u{1049d}', WC_ALetter), + ('\u{104a0}', '\u{104a9}', WC_Numeric), ('\u{10500}', '\u{10527}', WC_ALetter), + ('\u{10530}', '\u{10563}', WC_ALetter), ('\u{10600}', '\u{10736}', WC_ALetter), + ('\u{10740}', '\u{10755}', WC_ALetter), ('\u{10760}', '\u{10767}', WC_ALetter), + ('\u{10800}', '\u{10805}', WC_ALetter), ('\u{10808}', '\u{10808}', WC_ALetter), + ('\u{1080a}', '\u{10835}', WC_ALetter), ('\u{10837}', '\u{10838}', WC_ALetter), + ('\u{1083c}', '\u{1083c}', WC_ALetter), ('\u{1083f}', '\u{10855}', WC_ALetter), + ('\u{10860}', '\u{10876}', WC_ALetter), ('\u{10880}', '\u{1089e}', WC_ALetter), + ('\u{10900}', '\u{10915}', WC_ALetter), ('\u{10920}', '\u{10939}', WC_ALetter), + ('\u{10980}', '\u{109b7}', WC_ALetter), ('\u{109be}', '\u{109bf}', WC_ALetter), + ('\u{10a00}', '\u{10a00}', WC_ALetter), ('\u{10a01}', '\u{10a03}', WC_Extend), ('\u{10a05}', + '\u{10a06}', WC_Extend), ('\u{10a0c}', '\u{10a0f}', WC_Extend), ('\u{10a10}', '\u{10a13}', + WC_ALetter), ('\u{10a15}', '\u{10a17}', WC_ALetter), ('\u{10a19}', '\u{10a33}', WC_ALetter), + ('\u{10a38}', '\u{10a3a}', WC_Extend), ('\u{10a3f}', '\u{10a3f}', WC_Extend), ('\u{10a60}', + '\u{10a7c}', WC_ALetter), ('\u{10a80}', '\u{10a9c}', WC_ALetter), ('\u{10ac0}', '\u{10ac7}', + WC_ALetter), ('\u{10ac9}', '\u{10ae4}', WC_ALetter), ('\u{10ae5}', '\u{10ae6}', WC_Extend), + ('\u{10b00}', '\u{10b35}', WC_ALetter), ('\u{10b40}', '\u{10b55}', WC_ALetter), + ('\u{10b60}', '\u{10b72}', WC_ALetter), ('\u{10b80}', '\u{10b91}', WC_ALetter), + ('\u{10c00}', '\u{10c48}', WC_ALetter), ('\u{11000}', '\u{11000}', WC_Extend), ('\u{11001}', + '\u{11001}', WC_Extend), ('\u{11002}', '\u{11002}', WC_Extend), ('\u{11003}', '\u{11037}', + WC_ALetter), ('\u{11038}', '\u{11046}', WC_Extend), ('\u{11066}', '\u{1106f}', WC_Numeric), + ('\u{1107f}', '\u{11081}', WC_Extend), ('\u{11082}', '\u{11082}', WC_Extend), ('\u{11083}', + '\u{110af}', WC_ALetter), ('\u{110b0}', '\u{110b2}', WC_Extend), ('\u{110b3}', '\u{110b6}', + WC_Extend), ('\u{110b7}', '\u{110b8}', WC_Extend), ('\u{110b9}', '\u{110ba}', WC_Extend), + ('\u{110bd}', '\u{110bd}', WC_Format), ('\u{110d0}', '\u{110e8}', WC_ALetter), ('\u{110f0}', + '\u{110f9}', WC_Numeric), ('\u{11100}', '\u{11102}', WC_Extend), ('\u{11103}', '\u{11126}', + WC_ALetter), ('\u{11127}', '\u{1112b}', WC_Extend), ('\u{1112c}', '\u{1112c}', WC_Extend), + ('\u{1112d}', '\u{11134}', WC_Extend), ('\u{11136}', '\u{1113f}', WC_Numeric), ('\u{11150}', + '\u{11172}', WC_ALetter), ('\u{11173}', '\u{11173}', WC_Extend), ('\u{11176}', '\u{11176}', + WC_ALetter), ('\u{11180}', '\u{11181}', WC_Extend), ('\u{11182}', '\u{11182}', WC_Extend), + ('\u{11183}', '\u{111b2}', WC_ALetter), ('\u{111b3}', '\u{111b5}', WC_Extend), ('\u{111b6}', + '\u{111be}', WC_Extend), ('\u{111bf}', '\u{111c0}', WC_Extend), ('\u{111c1}', '\u{111c4}', + WC_ALetter), ('\u{111d0}', '\u{111d9}', WC_Numeric), ('\u{111da}', '\u{111da}', WC_ALetter), + ('\u{11200}', '\u{11211}', WC_ALetter), ('\u{11213}', '\u{1122b}', WC_ALetter), + ('\u{1122c}', '\u{1122e}', WC_Extend), ('\u{1122f}', '\u{11231}', WC_Extend), ('\u{11232}', + '\u{11233}', WC_Extend), ('\u{11234}', '\u{11234}', WC_Extend), ('\u{11235}', '\u{11235}', + WC_Extend), ('\u{11236}', '\u{11237}', WC_Extend), ('\u{112b0}', '\u{112de}', WC_ALetter), + ('\u{112df}', '\u{112df}', WC_Extend), ('\u{112e0}', '\u{112e2}', WC_Extend), ('\u{112e3}', + '\u{112ea}', WC_Extend), ('\u{112f0}', '\u{112f9}', WC_Numeric), ('\u{11301}', '\u{11301}', + WC_Extend), ('\u{11302}', '\u{11303}', WC_Extend), ('\u{11305}', '\u{1130c}', WC_ALetter), + ('\u{1130f}', '\u{11310}', WC_ALetter), ('\u{11313}', '\u{11328}', WC_ALetter), + ('\u{1132a}', '\u{11330}', WC_ALetter), ('\u{11332}', '\u{11333}', WC_ALetter), + ('\u{11335}', '\u{11339}', WC_ALetter), ('\u{1133c}', '\u{1133c}', WC_Extend), ('\u{1133d}', + '\u{1133d}', WC_ALetter), ('\u{1133e}', '\u{1133f}', WC_Extend), ('\u{11340}', '\u{11340}', + WC_Extend), ('\u{11341}', '\u{11344}', WC_Extend), ('\u{11347}', '\u{11348}', WC_Extend), + ('\u{1134b}', '\u{1134d}', WC_Extend), ('\u{11357}', '\u{11357}', WC_Extend), ('\u{1135d}', + '\u{11361}', WC_ALetter), ('\u{11362}', '\u{11363}', WC_Extend), ('\u{11366}', '\u{1136c}', + WC_Extend), ('\u{11370}', '\u{11374}', WC_Extend), ('\u{11480}', '\u{114af}', WC_ALetter), + ('\u{114b0}', '\u{114b2}', WC_Extend), ('\u{114b3}', '\u{114b8}', WC_Extend), ('\u{114b9}', + '\u{114b9}', WC_Extend), ('\u{114ba}', '\u{114ba}', WC_Extend), ('\u{114bb}', '\u{114be}', + WC_Extend), ('\u{114bf}', '\u{114c0}', WC_Extend), ('\u{114c1}', '\u{114c1}', WC_Extend), + ('\u{114c2}', '\u{114c3}', WC_Extend), ('\u{114c4}', '\u{114c5}', WC_ALetter), ('\u{114c7}', + '\u{114c7}', WC_ALetter), ('\u{114d0}', '\u{114d9}', WC_Numeric), ('\u{11580}', '\u{115ae}', + WC_ALetter), ('\u{115af}', '\u{115b1}', WC_Extend), ('\u{115b2}', '\u{115b5}', WC_Extend), + ('\u{115b8}', '\u{115bb}', WC_Extend), ('\u{115bc}', '\u{115bd}', WC_Extend), ('\u{115be}', + '\u{115be}', WC_Extend), ('\u{115bf}', '\u{115c0}', WC_Extend), ('\u{11600}', '\u{1162f}', + WC_ALetter), ('\u{11630}', '\u{11632}', WC_Extend), ('\u{11633}', '\u{1163a}', WC_Extend), + ('\u{1163b}', '\u{1163c}', WC_Extend), ('\u{1163d}', '\u{1163d}', WC_Extend), ('\u{1163e}', + '\u{1163e}', WC_Extend), ('\u{1163f}', '\u{11640}', WC_Extend), ('\u{11644}', '\u{11644}', + WC_ALetter), ('\u{11650}', '\u{11659}', WC_Numeric), ('\u{11680}', '\u{116aa}', WC_ALetter), + ('\u{116ab}', '\u{116ab}', WC_Extend), ('\u{116ac}', '\u{116ac}', WC_Extend), ('\u{116ad}', + '\u{116ad}', WC_Extend), ('\u{116ae}', '\u{116af}', WC_Extend), ('\u{116b0}', '\u{116b5}', + WC_Extend), ('\u{116b6}', '\u{116b6}', WC_Extend), ('\u{116b7}', '\u{116b7}', WC_Extend), + ('\u{116c0}', '\u{116c9}', WC_Numeric), ('\u{118a0}', '\u{118df}', WC_ALetter), + ('\u{118e0}', '\u{118e9}', WC_Numeric), ('\u{118ff}', '\u{118ff}', WC_ALetter), + ('\u{11ac0}', '\u{11af8}', WC_ALetter), ('\u{12000}', '\u{12398}', WC_ALetter), + ('\u{12400}', '\u{1246e}', WC_ALetter), ('\u{13000}', '\u{1342e}', WC_ALetter), + ('\u{16800}', '\u{16a38}', WC_ALetter), ('\u{16a40}', '\u{16a5e}', WC_ALetter), + ('\u{16a60}', '\u{16a69}', WC_Numeric), ('\u{16ad0}', '\u{16aed}', WC_ALetter), + ('\u{16af0}', '\u{16af4}', WC_Extend), ('\u{16b00}', '\u{16b2f}', WC_ALetter), ('\u{16b30}', + '\u{16b36}', WC_Extend), ('\u{16b40}', '\u{16b43}', WC_ALetter), ('\u{16b50}', '\u{16b59}', + WC_Numeric), ('\u{16b63}', '\u{16b77}', WC_ALetter), ('\u{16b7d}', '\u{16b8f}', WC_ALetter), + ('\u{16f00}', '\u{16f44}', WC_ALetter), ('\u{16f50}', '\u{16f50}', WC_ALetter), + ('\u{16f51}', '\u{16f7e}', WC_Extend), ('\u{16f8f}', '\u{16f92}', WC_Extend), ('\u{16f93}', + '\u{16f9f}', WC_ALetter), ('\u{1b000}', '\u{1b000}', WC_Katakana), ('\u{1bc00}', + '\u{1bc6a}', WC_ALetter), ('\u{1bc70}', '\u{1bc7c}', WC_ALetter), ('\u{1bc80}', '\u{1bc88}', + WC_ALetter), ('\u{1bc90}', '\u{1bc99}', WC_ALetter), ('\u{1bc9d}', '\u{1bc9e}', WC_Extend), + ('\u{1bca0}', '\u{1bca3}', WC_Format), ('\u{1d165}', '\u{1d166}', WC_Extend), ('\u{1d167}', + '\u{1d169}', WC_Extend), ('\u{1d16d}', '\u{1d172}', WC_Extend), ('\u{1d173}', '\u{1d17a}', + WC_Format), ('\u{1d17b}', '\u{1d182}', WC_Extend), ('\u{1d185}', '\u{1d18b}', WC_Extend), + ('\u{1d1aa}', '\u{1d1ad}', WC_Extend), ('\u{1d242}', '\u{1d244}', WC_Extend), ('\u{1d400}', + '\u{1d454}', WC_ALetter), ('\u{1d456}', '\u{1d49c}', WC_ALetter), ('\u{1d49e}', '\u{1d49f}', + WC_ALetter), ('\u{1d4a2}', '\u{1d4a2}', WC_ALetter), ('\u{1d4a5}', '\u{1d4a6}', WC_ALetter), + ('\u{1d4a9}', '\u{1d4ac}', WC_ALetter), ('\u{1d4ae}', '\u{1d4b9}', WC_ALetter), + ('\u{1d4bb}', '\u{1d4bb}', WC_ALetter), ('\u{1d4bd}', '\u{1d4c3}', WC_ALetter), + ('\u{1d4c5}', '\u{1d505}', WC_ALetter), ('\u{1d507}', '\u{1d50a}', WC_ALetter), + ('\u{1d50d}', '\u{1d514}', WC_ALetter), ('\u{1d516}', '\u{1d51c}', WC_ALetter), + ('\u{1d51e}', '\u{1d539}', WC_ALetter), ('\u{1d53b}', '\u{1d53e}', WC_ALetter), + ('\u{1d540}', '\u{1d544}', WC_ALetter), ('\u{1d546}', '\u{1d546}', WC_ALetter), + ('\u{1d54a}', '\u{1d550}', WC_ALetter), ('\u{1d552}', '\u{1d6a5}', WC_ALetter), + ('\u{1d6a8}', '\u{1d6c0}', WC_ALetter), ('\u{1d6c2}', '\u{1d6da}', WC_ALetter), + ('\u{1d6dc}', '\u{1d6fa}', WC_ALetter), ('\u{1d6fc}', '\u{1d714}', WC_ALetter), + ('\u{1d716}', '\u{1d734}', WC_ALetter), ('\u{1d736}', '\u{1d74e}', WC_ALetter), + ('\u{1d750}', '\u{1d76e}', WC_ALetter), ('\u{1d770}', '\u{1d788}', WC_ALetter), + ('\u{1d78a}', '\u{1d7a8}', WC_ALetter), ('\u{1d7aa}', '\u{1d7c2}', WC_ALetter), + ('\u{1d7c4}', '\u{1d7cb}', WC_ALetter), ('\u{1d7ce}', '\u{1d7ff}', WC_Numeric), + ('\u{1e800}', '\u{1e8c4}', WC_ALetter), ('\u{1e8d0}', '\u{1e8d6}', WC_Extend), ('\u{1ee00}', + '\u{1ee03}', WC_ALetter), ('\u{1ee05}', '\u{1ee1f}', WC_ALetter), ('\u{1ee21}', '\u{1ee22}', + WC_ALetter), ('\u{1ee24}', '\u{1ee24}', WC_ALetter), ('\u{1ee27}', '\u{1ee27}', WC_ALetter), + ('\u{1ee29}', '\u{1ee32}', WC_ALetter), ('\u{1ee34}', '\u{1ee37}', WC_ALetter), + ('\u{1ee39}', '\u{1ee39}', WC_ALetter), ('\u{1ee3b}', '\u{1ee3b}', WC_ALetter), + ('\u{1ee42}', '\u{1ee42}', WC_ALetter), ('\u{1ee47}', '\u{1ee47}', WC_ALetter), + ('\u{1ee49}', '\u{1ee49}', WC_ALetter), ('\u{1ee4b}', '\u{1ee4b}', WC_ALetter), + ('\u{1ee4d}', '\u{1ee4f}', WC_ALetter), ('\u{1ee51}', '\u{1ee52}', WC_ALetter), + ('\u{1ee54}', '\u{1ee54}', WC_ALetter), ('\u{1ee57}', '\u{1ee57}', WC_ALetter), + ('\u{1ee59}', '\u{1ee59}', WC_ALetter), ('\u{1ee5b}', '\u{1ee5b}', WC_ALetter), + ('\u{1ee5d}', '\u{1ee5d}', WC_ALetter), ('\u{1ee5f}', '\u{1ee5f}', WC_ALetter), + ('\u{1ee61}', '\u{1ee62}', WC_ALetter), ('\u{1ee64}', '\u{1ee64}', WC_ALetter), + ('\u{1ee67}', '\u{1ee6a}', WC_ALetter), ('\u{1ee6c}', '\u{1ee72}', WC_ALetter), + ('\u{1ee74}', '\u{1ee77}', WC_ALetter), ('\u{1ee79}', '\u{1ee7c}', WC_ALetter), + ('\u{1ee7e}', '\u{1ee7e}', WC_ALetter), ('\u{1ee80}', '\u{1ee89}', WC_ALetter), + ('\u{1ee8b}', '\u{1ee9b}', WC_ALetter), ('\u{1eea1}', '\u{1eea3}', WC_ALetter), + ('\u{1eea5}', '\u{1eea9}', WC_ALetter), ('\u{1eeab}', '\u{1eebb}', WC_ALetter), + ('\u{1f130}', '\u{1f149}', WC_ALetter), ('\u{1f150}', '\u{1f169}', WC_ALetter), + ('\u{1f170}', '\u{1f189}', WC_ALetter), ('\u{1f1e6}', '\u{1f1ff}', WC_Regional_Indicator), + ('\u{e0001}', '\u{e0001}', WC_Format), ('\u{e0020}', '\u{e007f}', WC_Format), ('\u{e0100}', + '\u{e01ef}', WC_Extend) + ]; + +} diff --git a/src/libunicode/u_str.rs b/src/libunicode/u_str.rs index 6852cfe11eb72..928ac91b75703 100644 --- a/src/libunicode/u_str.rs +++ b/src/libunicode/u_str.rs @@ -15,7 +15,6 @@ //! This module provides functionality to `str` that requires the Unicode methods provided by the //! unicode parts of the CharExt trait. -use self::GraphemeState::*; use core::prelude::*; use core::char; @@ -26,6 +25,7 @@ use core::slice; use core::str::Split; use tables::grapheme::GraphemeCat; +use tables::word::WordCat; /// An iterator over the words of a string, separated by a sequence of whitespace #[stable(feature = "rust1", since = "1.0.0")] @@ -39,12 +39,15 @@ pub trait UnicodeStr { fn graphemes<'a>(&'a self, is_extended: bool) -> Graphemes<'a>; fn grapheme_indices<'a>(&'a self, is_extended: bool) -> GraphemeIndices<'a>; fn words<'a>(&'a self) -> Words<'a>; + fn words_unicode<'a>(&'a self) -> UnicodeWords<'a>; fn is_whitespace(&self) -> bool; fn is_alphanumeric(&self) -> bool; fn width(&self, is_cjk: bool) -> usize; fn trim<'a>(&'a self) -> &'a str; fn trim_left<'a>(&'a self) -> &'a str; fn trim_right<'a>(&'a self) -> &'a str; + fn split_words_uax29<'a>(&'a self) -> UWordBounds<'a>; + fn split_words_uax29_indices<'a>(&'a self) -> UWordBoundIndices<'a>; } impl UnicodeStr for str { @@ -69,6 +72,14 @@ impl UnicodeStr for str { Words { inner: self.split(is_whitespace).filter(is_not_empty) } } + #[inline] + fn words_unicode(&self) -> UnicodeWords { + fn has_alphanumeric(s: &&str) -> bool { s.chars().any(|c| c.is_alphanumeric()) } + let has_alphanumeric: fn(&&str) -> bool = has_alphanumeric; // coerce to fn pointer + + UnicodeWords { inner: self.split_words_uax29().filter(has_alphanumeric) } + } + #[inline] fn is_whitespace(&self) -> bool { self.chars().all(|c| c.is_whitespace()) } @@ -94,6 +105,16 @@ impl UnicodeStr for str { fn trim_right(&self) -> &str { self.trim_right_matches(|c: char| c.is_whitespace()) } + + #[inline] + fn split_words_uax29(&self) -> UWordBounds { + UWordBounds { string: self, cat: None, catb: None } + } + + #[inline] + fn split_words_uax29_indices(&self) -> UWordBoundIndices { + UWordBoundIndices { start_offset: self.as_ptr() as usize, iter: self.split_words_uax29() } + } } /// External iterator for grapheme clusters and byte offsets. @@ -156,6 +177,7 @@ impl<'a> Iterator for Graphemes<'a> { #[inline] fn next(&mut self) -> Option<&'a str> { + use self::GraphemeState::*; use tables::grapheme as gr; if self.string.len() == 0 { return None; @@ -256,6 +278,7 @@ impl<'a> Iterator for Graphemes<'a> { impl<'a> DoubleEndedIterator for Graphemes<'a> { #[inline] fn next_back(&mut self) -> Option<&'a str> { + use self::GraphemeState::*; use tables::grapheme as gr; if self.string.len() == 0 { return None; @@ -554,3 +577,467 @@ impl<'a> Iterator for Words<'a> { impl<'a> DoubleEndedIterator for Words<'a> { fn next_back(&mut self) -> Option<&'a str> { self.inner.next_back() } } + +/// An iterator over the substrings of a string which, after splitting the string on +/// [word boundaries](http://www.unicode.org/reports/tr29/#Word_Boundaries), +/// contain any characters with the +/// [Alphabetic](http://unicode.org/reports/tr44/#Alphabetic) +/// property, or with +/// [General_Category=Number](http://unicode.org/reports/tr44/#General_Category_Values). +pub struct UnicodeWords<'a> { + inner: Filter, fn(&&str) -> bool>, +} + +impl<'a> Iterator for UnicodeWords<'a> { + type Item = &'a str; + + #[inline] + fn next(&mut self) -> Option<&'a str> { self.inner.next() } +} +impl<'a> DoubleEndedIterator for UnicodeWords<'a> { + #[inline] + fn next_back(&mut self) -> Option<&'a str> { self.inner.next_back() } +} + +/// External iterator for a string's +/// [word boundaries](http://www.unicode.org/reports/tr29/#Word_Boundaries). +#[derive(Clone)] +pub struct UWordBounds<'a> { + string: &'a str, + cat: Option, + catb: Option, +} + +/// External iterator for word boundaries and byte offsets. +#[derive(Clone)] +pub struct UWordBoundIndices<'a> { + start_offset: usize, + iter: UWordBounds<'a>, +} + +impl<'a> Iterator for UWordBoundIndices<'a> { + type Item = (usize, &'a str); + + #[inline] + fn next(&mut self) -> Option<(usize, &'a str)> { + self.iter.next().map(|s| (s.as_ptr() as usize - self.start_offset, s)) + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + self.iter.size_hint() + } +} + +impl<'a> DoubleEndedIterator for UWordBoundIndices<'a> { + #[inline] + fn next_back(&mut self) -> Option<(usize, &'a str)> { + self.iter.next_back().map(|s| (s.as_ptr() as usize - self.start_offset, s)) + } +} + +// state machine for word boundary rules +#[derive(Clone,Copy,PartialEq,Eq)] +enum UWordBoundsState { + Start, + Letter, + HLetter, + Numeric, + Katakana, + ExtendNumLet, + Regional, + FormatExtend(FormatExtendType), +} + +// subtypes for FormatExtend state in UWordBoundsState +#[derive(Clone,Copy,PartialEq,Eq)] +enum FormatExtendType { + AcceptAny, + AcceptNone, + RequireLetter, + RequireHLetter, + AcceptQLetter, + RequireNumeric, +} + +impl<'a> Iterator for UWordBounds<'a> { + type Item = &'a str; + + #[inline] + fn size_hint(&self) -> (usize, Option) { + let slen = self.string.len(); + (cmp::min(slen, 1), Some(slen)) + } + + #[inline] + fn next(&mut self) -> Option<&'a str> { + use self::UWordBoundsState::*; + use self::FormatExtendType::*; + use tables::word as wd; + if self.string.len() == 0 { + return None; + } + + let mut take_curr = true; + let mut take_cat = true; + let mut idx = 0; + let mut saveidx = 0; + let mut state = Start; + let mut cat = wd::WC_Any; + let mut savecat = wd::WC_Any; + for (curr, ch) in self.string.char_indices() { + idx = curr; + + // if there's a category cached, grab it + cat = match self.cat { + None => wd::word_category(ch), + _ => self.cat.take().unwrap() + }; + take_cat = true; + + // handle rule WB4 + // just skip all format and extend chars + // note that Start is a special case: if there's a bunch of Format | Extend + // characters at the beginning of a block of text, dump them out as one unit. + // + // (This is not obvious from the wording of UAX#29, but if you look at the + // test cases http://www.unicode.org/Public/UNIDATA/auxiliary/WordBreakTest.txt + // then the "correct" interpretation of WB4 becomes apparent.) + if state != Start && (cat == wd::WC_Extend || cat == wd::WC_Format) { + continue; + } + + state = match state { + Start if cat == wd::WC_CR => { + idx += match self.get_next_cat(idx) { + Some(ncat) if ncat == wd::WC_LF => 1, // rule WB3 + _ => 0 + }; + break; // rule WB3a + }, + Start => match cat { + wd::WC_ALetter => Letter, // rule WB5, WB6, WB9, WB13a + wd::WC_Hebrew_Letter => HLetter, // rule WB5, WB6, WB7a, WB7b, WB9, WB13a + wd::WC_Numeric => Numeric, // rule WB8, WB10, WB12, WB13a + wd::WC_Katakana => Katakana, // rule WB13, WB13a + wd::WC_ExtendNumLet => ExtendNumLet, // rule WB13a, WB13b + wd::WC_Regional_Indicator => Regional, // rule WB13c + wd::WC_LF | wd::WC_Newline => break, // rule WB3a + _ => { + if let Some(ncat) = self.get_next_cat(idx) { // rule WB4 + if ncat == wd::WC_Format || ncat == wd::WC_Extend { + state = FormatExtend(AcceptNone); + self.cat = Some(ncat); + continue; + } + } + break; // rule WB14 + } + }, + Letter | HLetter => match cat { + wd::WC_ALetter => Letter, // rule WB5 + wd::WC_Hebrew_Letter => HLetter, // rule WB5 + wd::WC_Numeric => Numeric, // rule WB9 + wd::WC_ExtendNumLet => ExtendNumLet, // rule WB13a + wd::WC_Double_Quote if state == HLetter => { + savecat = cat; + saveidx = idx; + FormatExtend(RequireHLetter) // rule WB7b + }, + wd::WC_Single_Quote if state == HLetter => { + FormatExtend(AcceptQLetter) // rule WB7a + }, + wd::WC_MidLetter | wd::WC_MidNumLet | wd::WC_Single_Quote => { + savecat = cat; + saveidx = idx; + FormatExtend(RequireLetter) // rule WB6 + }, + _ => { + take_curr = false; + break; + } + }, + Numeric => match cat { + wd::WC_Numeric => Numeric, // rule WB8 + wd::WC_ALetter => Letter, // rule WB10 + wd::WC_Hebrew_Letter => HLetter, // rule WB10 + wd::WC_ExtendNumLet => ExtendNumLet, // rule WB13a + wd::WC_MidNum | wd::WC_MidNumLet | wd::WC_Single_Quote => { + savecat = cat; + saveidx = idx; + FormatExtend(RequireNumeric) // rule WB12 + }, + _ => { + take_curr = false; + break; + } + }, + Katakana => match cat { + wd::WC_Katakana => Katakana, // rule WB13 + wd::WC_ExtendNumLet => ExtendNumLet, // rule WB13a + _ => { + take_curr = false; + break; + } + }, + ExtendNumLet => match cat { + wd::WC_ExtendNumLet => ExtendNumLet, // rule WB13a + wd::WC_ALetter => Letter, // rule WB13b + wd::WC_Hebrew_Letter => HLetter, // rule WB13b + wd::WC_Numeric => Numeric, // rule WB13b + wd::WC_Katakana => Katakana, // rule WB13b + _ => { + take_curr = false; + break; + } + }, + Regional => match cat { + wd::WC_Regional_Indicator => Regional, // rule WB13c + _ => { + take_curr = false; + break; + } + }, + FormatExtend(t) => match t { // handle FormatExtends depending on what type + RequireNumeric if cat == wd::WC_Numeric => Numeric, // rule WB11 + RequireLetter | AcceptQLetter if cat == wd::WC_ALetter => Letter, // rule WB7 + RequireLetter | AcceptQLetter if cat == wd::WC_Hebrew_Letter => HLetter, // WB7a + RequireHLetter if cat == wd::WC_Hebrew_Letter => HLetter, // rule WB7b + AcceptNone | AcceptQLetter => { + take_curr = false; // emit all the Format|Extend characters + take_cat = false; + break; + }, + _ => break // rewind (in if statement below) + } + } + } + + if let FormatExtend(t) = state { + // we were looking for something and didn't find it; we have to back up + if t == RequireLetter || t == RequireHLetter || t == RequireNumeric { + idx = saveidx; + cat = savecat; + take_curr = false; + } + } + + self.cat = if take_curr { + idx = idx + self.string.char_at(idx).len_utf8(); + None + } else if take_cat { + Some(cat) + } else { + None + }; + + let retstr = &self.string[..idx]; + self.string = &self.string[idx..]; + Some(retstr) + } +} + +impl<'a> DoubleEndedIterator for UWordBounds<'a> { + #[inline] + fn next_back(&mut self) -> Option<&'a str> { + use self::UWordBoundsState::*; + use self::FormatExtendType::*; + use tables::word as wd; + if self.string.len() == 0 { + return None; + } + + let mut take_curr = true; + let mut take_cat = true; + let mut idx = self.string.len(); + idx -= self.string.char_at_reverse(idx).len_utf8(); + let mut previdx = idx; + let mut saveidx = idx; + let mut state = Start; + let mut savestate = Start; + let mut cat = wd::WC_Any; + for (curr, ch) in self.string.char_indices().rev() { + previdx = idx; + idx = curr; + + // if there's a category cached, grab it + cat = match self.catb { + None => wd::word_category(ch), + _ => self.catb.take().unwrap() + }; + take_cat = true; + + // backward iterator over word boundaries. Mostly the same as the forward + // iterator, with two weirdnesses: + // (1) If we encounter a single quote in the Start state, we have to check for a + // Hebrew Letter immediately before it. + // (2) Format and Extend char handling takes some gymnastics. + + if cat == wd::WC_Extend || cat == wd::WC_Format { + if match state { + FormatExtend(_) | Start => false, + _ => true + } { + saveidx = previdx; + savestate = state; + state = FormatExtend(AcceptNone); + } + + if state != Start { + continue; + } + } else if state == FormatExtend(AcceptNone) { + // finished a scan of some Format|Extend chars, restore previous state + state = savestate; + previdx = saveidx; + take_cat = false; + } + + state = match state { + Start | FormatExtend(AcceptAny) => match cat { + wd::WC_ALetter => Letter, // rule WB5, WB7, WB10, WB13b + wd::WC_Hebrew_Letter => HLetter, // rule WB5, WB7, WB7c, WB10, WB13b + wd::WC_Numeric => Numeric, // rule WB8, WB9, WB11, WB13b + wd::WC_Katakana => Katakana, // rule WB13, WB13b + wd::WC_ExtendNumLet => ExtendNumLet, // rule WB13a + wd::WC_Regional_Indicator => Regional, // rule WB13c + wd::WC_Extend | wd::WC_Format => FormatExtend(AcceptAny), // rule WB4 + wd::WC_Single_Quote => { + saveidx = idx; + FormatExtend(AcceptQLetter) // rule WB7a + }, + wd::WC_CR | wd::WC_LF | wd::WC_Newline => { + if state == Start { + if cat == wd::WC_LF { + idx -= match self.get_prev_cat(idx) { + Some(pcat) if pcat == wd::WC_CR => 1, // rule WB3 + _ => 0 + }; + } + } else { + take_curr = false; + } + break; // rule WB3a + }, + _ => break // rule WB14 + }, + Letter | HLetter => match cat { + wd::WC_ALetter => Letter, // rule WB5 + wd::WC_Hebrew_Letter => HLetter, // rule WB5 + wd::WC_Numeric => Numeric, // rule WB10 + wd::WC_ExtendNumLet => ExtendNumLet, // rule WB13b + wd::WC_Double_Quote if state == HLetter => { + saveidx = previdx; + FormatExtend(RequireHLetter) // rule WB7c + }, + wd::WC_MidLetter | wd::WC_MidNumLet | wd::WC_Single_Quote => { + saveidx = previdx; + FormatExtend(RequireLetter) // rule WB7 + }, + _ => { + take_curr = false; + break; + } + }, + Numeric => match cat { + wd::WC_Numeric => Numeric, // rule WB8 + wd::WC_ALetter => Letter, // rule WB9 + wd::WC_Hebrew_Letter => HLetter, // rule WB9 + wd::WC_ExtendNumLet => ExtendNumLet, // rule WB13b + wd::WC_MidNum | wd::WC_MidNumLet | wd::WC_Single_Quote => { + saveidx = previdx; + FormatExtend(RequireNumeric) // rule WB11 + }, + _ => { + take_curr = false; + break; + } + }, + Katakana => match cat { + wd::WC_Katakana => Katakana, // rule WB13 + wd::WC_ExtendNumLet => ExtendNumLet, // rule WB13b + _ => { + take_curr = false; + break; + } + }, + ExtendNumLet => match cat { + wd::WC_ExtendNumLet => ExtendNumLet, // rule WB13a + wd::WC_ALetter => Letter, // rule WB13a + wd::WC_Hebrew_Letter => HLetter, // rule WB13a + wd::WC_Numeric => Numeric, // rule WB13a + wd::WC_Katakana => Katakana, // rule WB13a + _ => { + take_curr = false; + break; + } + }, + Regional => match cat { + wd::WC_Regional_Indicator => Regional, // rule WB13c + _ => { + take_curr = false; + break; + } + }, + FormatExtend(t) => match t { + RequireNumeric if cat == wd::WC_Numeric => Numeric, // rule WB12 + RequireLetter if cat == wd::WC_ALetter => Letter, // rule WB6 + RequireLetter if cat == wd::WC_Hebrew_Letter => HLetter, // rule WB6 + AcceptQLetter if cat == wd::WC_Hebrew_Letter => HLetter, // rule WB7a + RequireHLetter if cat == wd::WC_Hebrew_Letter => HLetter, // rule WB7b + _ => break // backtrack will happens + } + } + } + + if let FormatExtend(t) = state { + // if we required something but didn't find it, backtrack + if t == RequireLetter || t == RequireHLetter || + t == RequireNumeric || t == AcceptNone || t == AcceptQLetter { + previdx = saveidx; + take_cat = false; + take_curr = false; + } + } + + self.catb = if take_curr { + None + } else { + idx = previdx; + if take_cat { + Some(cat) + } else { + None + } + }; + + let retstr = &self.string[idx..]; + self.string = &self.string[..idx]; + Some(retstr) + } +} + +impl<'a> UWordBounds<'a> { + #[inline] + fn get_next_cat(&self, idx: usize) -> Option { + use tables::word as wd; + let nidx = idx + self.string.char_at(idx).len_utf8(); + if nidx < self.string.len() { + let nch = self.string.char_at(nidx); + Some(wd::word_category(nch)) + } else { + None + } + } + + #[inline] + fn get_prev_cat(&self, idx: usize) -> Option { + use tables::word as wd; + if idx > 0 { + let nch = self.string.char_at_reverse(idx); + Some(wd::word_category(nch)) + } else { + None + } + } +}