From af59f343dc7fe88a94a79c721fb3225a275a095d Mon Sep 17 00:00:00 2001 From: Titus Date: Tue, 24 Aug 2021 16:40:38 +0200 Subject: [PATCH] =?UTF-8?q?Fix=20to=20match=20GitHub=E2=80=99s=20algorithm?= =?UTF-8?q?=20on=20unicode?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I reverse engineered GitHub’s slugging algorithm. Somewhat based on #25 and #35. To do that, I created two scripts: * `generate-fixtures.mjs`, which generates a markdown file, in part from manual fixtures and in part on the Unicode General Categories, creates a gist, crawls the gist, removes it, and saves fixtures annotated with the expected result from GitHub * `generate-regex.mjs`, which generates the regex that GitHub uses for characters to ignore. The regex is about 2.5kb minzipped. This increases the file size of this project a bit. But matching GitHub is worth it in my opinion. I also investigated regex `\p{}` classes in `/u` regexes. They work mostly fine, with two caveats: a) they don’t work everywhere, so would be a major release, b) GitHub does not implement the same Unicode version as browsers. I tested with Unicode 13 and 14, and they include characters that GitHub handles differently. In the end, GitHub’s algorithm is mostly fine: strip non-alphanumericals, allow `-`, and turn ` ` (space) into `-`. Finally, I removed the trim functionality, because it is not implemented by GitHub. To assert this, make a heading like so in a readme: `# `. This is a space encoded as a character reference, meaning that the markdown does not see it as the whitespace between the `#` and the content. In fact, this makes it the content. And GitHub creates a slug of `-` for it. Closes GH-22. Closes GH-25. Closes GH-35. Closes GH-38. Co-authored-by: Dan Flettre Co-authored-by: Jack Bates --- index.js | 10 +- package.json | 17 +- regex.js | 3 + script/generate-fixtures.mjs | 145 ++++++++++++ script/generate-regex.mjs | 62 ++++++ test/1-basic-usage.md | 5 - test/2-camel-case.md | 5 - test/3-prototype.md | 7 - test/4-matching-slugs-basic.md | 9 - test/5-matching-slugs-again.md | 5 - test/6-characters.md | 17 -- test/7-duplicates.md | 5 - test/8-non-ascii.md | 23 -- test/9-emoji.md | 7 - test/fixtures.json | 396 +++++++++++++++++++++++++++++++++ test/index.js | 195 +--------------- test/test-static.js | 6 +- 17 files changed, 627 insertions(+), 290 deletions(-) create mode 100644 regex.js create mode 100644 script/generate-fixtures.mjs create mode 100644 script/generate-regex.mjs delete mode 100644 test/1-basic-usage.md delete mode 100644 test/2-camel-case.md delete mode 100644 test/3-prototype.md delete mode 100644 test/4-matching-slugs-basic.md delete mode 100644 test/5-matching-slugs-again.md delete mode 100644 test/6-characters.md delete mode 100644 test/7-duplicates.md delete mode 100644 test/8-non-ascii.md delete mode 100644 test/9-emoji.md create mode 100644 test/fixtures.json diff --git a/index.js b/index.js index 8649178..c1f2d01 100644 --- a/index.js +++ b/index.js @@ -1,10 +1,8 @@ -const emoji = require('emoji-regex') +const regex = require('./regex.js') module.exports = BananaSlug const own = Object.hasOwnProperty -const whitespace = /\s/g -const specials = /[\u2000-\u206F\u2E00-\u2E7F\\'!"#$%&()*+,./:;<=>?@[\]^`{|}~’]/g function BananaSlug () { const self = this @@ -46,11 +44,7 @@ BananaSlug.prototype.reset = function () { function slugger (string, maintainCase) { if (typeof string !== 'string') return '' if (!maintainCase) string = string.toLowerCase() - - return string.trim() - .replace(specials, '') - .replace(emoji(), '') - .replace(whitespace, '-') + return string.replace(regex, '').replace(/ /g, '-') } BananaSlug.slug = slugger diff --git a/package.json b/package.json index e9417a6..c1e41bf 100644 --- a/package.json +++ b/package.json @@ -11,16 +11,23 @@ "url": "https://github.com/Flet/github-slugger/issues" }, "files": [ - "index.js" + "index.js", + "regex.js" ], - "dependencies": { - "emoji-regex": ">=6.0.0 <=6.1.1" - }, "devDependencies": { + "@octokit/rest": "^18.0.0", + "@unicode/unicode-12.1.0": "^1.0.0", + "hast-util-select": "^5.0.0", + "mdast-util-gfm": "^1.0.0", + "mdast-util-to-markdown": "^1.0.0", + "node-fetch": "^2.0.0", "nyc": "^15.0.0", + "regenerate": "^1.0.0", + "rehype-parse": "^8.0.0", "standard": "*", "tap-spec": "^5.0.0", - "tape": "^4.0.0" + "tape": "^4.0.0", + "unified": "^10.0.0" }, "homepage": "https://github.com/Flet/github-slugger", "keywords": [ diff --git a/regex.js b/regex.js new file mode 100644 index 0000000..a2c1962 --- /dev/null +++ b/regex.js @@ -0,0 +1,3 @@ +// This module is generated by `script/`. +/* eslint-disable no-control-regex, no-misleading-character-class, no-useless-escape */ +module.exports = /[\0-\x1F!-,\.\/:-@\[-\^`\{-\xA9\xAB-\xB4\xB6-\xB9\xBB-\xBF\xD7\xF7\u02C2-\u02C5\u02D2-\u02DF\u02E5-\u02EB\u02ED\u02EF-\u02FF\u0375\u0378\u0379\u037E\u0380-\u0385\u0387\u038B\u038D\u03A2\u03F6\u0482\u0530\u0557\u0558\u055A-\u055F\u0589-\u0590\u05BE\u05C0\u05C3\u05C6\u05C8-\u05CF\u05EB-\u05EE\u05F3-\u060F\u061B-\u061F\u066A-\u066D\u06D4\u06DD\u06DE\u06E9\u06FD\u06FE\u0700-\u070F\u074B\u074C\u07B2-\u07BF\u07F6-\u07F9\u07FB\u07FC\u07FE\u07FF\u082E-\u083F\u085C-\u085F\u086B-\u089F\u08B5\u08BE-\u08D2\u08E2\u0964\u0965\u0970\u0984\u098D\u098E\u0991\u0992\u09A9\u09B1\u09B3-\u09B5\u09BA\u09BB\u09C5\u09C6\u09C9\u09CA\u09CF-\u09D6\u09D8-\u09DB\u09DE\u09E4\u09E5\u09F2-\u09FB\u09FD\u09FF\u0A00\u0A04\u0A0B-\u0A0E\u0A11\u0A12\u0A29\u0A31\u0A34\u0A37\u0A3A\u0A3B\u0A3D\u0A43-\u0A46\u0A49\u0A4A\u0A4E-\u0A50\u0A52-\u0A58\u0A5D\u0A5F-\u0A65\u0A76-\u0A80\u0A84\u0A8E\u0A92\u0AA9\u0AB1\u0AB4\u0ABA\u0ABB\u0AC6\u0ACA\u0ACE\u0ACF\u0AD1-\u0ADF\u0AE4\u0AE5\u0AF0-\u0AF8\u0B00\u0B04\u0B0D\u0B0E\u0B11\u0B12\u0B29\u0B31\u0B34\u0B3A\u0B3B\u0B45\u0B46\u0B49\u0B4A\u0B4E-\u0B55\u0B58-\u0B5B\u0B5E\u0B64\u0B65\u0B70\u0B72-\u0B81\u0B84\u0B8B-\u0B8D\u0B91\u0B96-\u0B98\u0B9B\u0B9D\u0BA0-\u0BA2\u0BA5-\u0BA7\u0BAB-\u0BAD\u0BBA-\u0BBD\u0BC3-\u0BC5\u0BC9\u0BCE\u0BCF\u0BD1-\u0BD6\u0BD8-\u0BE5\u0BF0-\u0BFF\u0C0D\u0C11\u0C29\u0C3A-\u0C3C\u0C45\u0C49\u0C4E-\u0C54\u0C57\u0C5B-\u0C5F\u0C64\u0C65\u0C70-\u0C7F\u0C84\u0C8D\u0C91\u0CA9\u0CB4\u0CBA\u0CBB\u0CC5\u0CC9\u0CCE-\u0CD4\u0CD7-\u0CDD\u0CDF\u0CE4\u0CE5\u0CF0\u0CF3-\u0CFF\u0D04\u0D0D\u0D11\u0D45\u0D49\u0D4F-\u0D53\u0D58-\u0D5E\u0D64\u0D65\u0D70-\u0D79\u0D80\u0D81\u0D84\u0D97-\u0D99\u0DB2\u0DBC\u0DBE\u0DBF\u0DC7-\u0DC9\u0DCB-\u0DCE\u0DD5\u0DD7\u0DE0-\u0DE5\u0DF0\u0DF1\u0DF4-\u0E00\u0E3B-\u0E3F\u0E4F\u0E5A-\u0E80\u0E83\u0E85\u0E8B\u0EA4\u0EA6\u0EBE\u0EBF\u0EC5\u0EC7\u0ECE\u0ECF\u0EDA\u0EDB\u0EE0-\u0EFF\u0F01-\u0F17\u0F1A-\u0F1F\u0F2A-\u0F34\u0F36\u0F38\u0F3A-\u0F3D\u0F48\u0F6D-\u0F70\u0F85\u0F98\u0FBD-\u0FC5\u0FC7-\u0FFF\u104A-\u104F\u109E\u109F\u10C6\u10C8-\u10CC\u10CE\u10CF\u10FB\u1249\u124E\u124F\u1257\u1259\u125E\u125F\u1289\u128E\u128F\u12B1\u12B6\u12B7\u12BF\u12C1\u12C6\u12C7\u12D7\u1311\u1316\u1317\u135B\u135C\u1360-\u137F\u1390-\u139F\u13F6\u13F7\u13FE-\u1400\u166D\u166E\u1680\u169B-\u169F\u16EB-\u16ED\u16F9-\u16FF\u170D\u1715-\u171F\u1735-\u173F\u1754-\u175F\u176D\u1771\u1774-\u177F\u17D4-\u17D6\u17D8-\u17DB\u17DE\u17DF\u17EA-\u180A\u180E\u180F\u181A-\u181F\u1879-\u187F\u18AB-\u18AF\u18F6-\u18FF\u191F\u192C-\u192F\u193C-\u1945\u196E\u196F\u1975-\u197F\u19AC-\u19AF\u19CA-\u19CF\u19DA-\u19FF\u1A1C-\u1A1F\u1A5F\u1A7D\u1A7E\u1A8A-\u1A8F\u1A9A-\u1AA6\u1AA8-\u1AAF\u1ABF-\u1AFF\u1B4C-\u1B4F\u1B5A-\u1B6A\u1B74-\u1B7F\u1BF4-\u1BFF\u1C38-\u1C3F\u1C4A-\u1C4C\u1C7E\u1C7F\u1C89-\u1C8F\u1CBB\u1CBC\u1CC0-\u1CCF\u1CD3\u1CFB-\u1CFF\u1DFA\u1F16\u1F17\u1F1E\u1F1F\u1F46\u1F47\u1F4E\u1F4F\u1F58\u1F5A\u1F5C\u1F5E\u1F7E\u1F7F\u1FB5\u1FBD\u1FBF-\u1FC1\u1FC5\u1FCD-\u1FCF\u1FD4\u1FD5\u1FDC-\u1FDF\u1FED-\u1FF1\u1FF5\u1FFD-\u203E\u2041-\u2053\u2055-\u2070\u2072-\u207E\u2080-\u208F\u209D-\u20CF\u20F1-\u2101\u2103-\u2106\u2108\u2109\u2114\u2116-\u2118\u211E-\u2123\u2125\u2127\u2129\u212E\u213A\u213B\u2140-\u2144\u214A-\u214D\u214F-\u215F\u2189-\u24B5\u24EA-\u2BFF\u2C2F\u2C5F\u2CE5-\u2CEA\u2CF4-\u2CFF\u2D26\u2D28-\u2D2C\u2D2E\u2D2F\u2D68-\u2D6E\u2D70-\u2D7E\u2D97-\u2D9F\u2DA7\u2DAF\u2DB7\u2DBF\u2DC7\u2DCF\u2DD7\u2DDF\u2E00-\u2E2E\u2E30-\u3004\u3008-\u3020\u3030\u3036\u3037\u303D-\u3040\u3097\u3098\u309B\u309C\u30A0\u30FB\u3100-\u3104\u3130\u318F-\u319F\u31BB-\u31EF\u3200-\u33FF\u4DB6-\u4DFF\u9FF0-\u9FFF\uA48D-\uA4CF\uA4FE\uA4FF\uA60D-\uA60F\uA62C-\uA63F\uA673\uA67E\uA6F2-\uA716\uA720\uA721\uA789\uA78A\uA7C0\uA7C1\uA7C7-\uA7F6\uA828-\uA83F\uA874-\uA87F\uA8C6-\uA8CF\uA8DA-\uA8DF\uA8F8-\uA8FA\uA8FC\uA92E\uA92F\uA954-\uA95F\uA97D-\uA97F\uA9C1-\uA9CE\uA9DA-\uA9DF\uA9FF\uAA37-\uAA3F\uAA4E\uAA4F\uAA5A-\uAA5F\uAA77-\uAA79\uAAC3-\uAADA\uAADE\uAADF\uAAF0\uAAF1\uAAF7-\uAB00\uAB07\uAB08\uAB0F\uAB10\uAB17-\uAB1F\uAB27\uAB2F\uAB5B\uAB68-\uAB6F\uABEB\uABEE\uABEF\uABFA-\uABFF\uD7A4-\uD7AF\uD7C7-\uD7CA\uD7FC-\uD7FF\uE000-\uF8FF\uFA6E\uFA6F\uFADA-\uFAFF\uFB07-\uFB12\uFB18-\uFB1C\uFB29\uFB37\uFB3D\uFB3F\uFB42\uFB45\uFBB2-\uFBD2\uFD3E-\uFD4F\uFD90\uFD91\uFDC8-\uFDEF\uFDFC-\uFDFF\uFE10-\uFE1F\uFE30-\uFE32\uFE35-\uFE4C\uFE50-\uFE6F\uFE75\uFEFD-\uFF0F\uFF1A-\uFF20\uFF3B-\uFF3E\uFF40\uFF5B-\uFF65\uFFBF-\uFFC1\uFFC8\uFFC9\uFFD0\uFFD1\uFFD8\uFFD9\uFFDD-\uFFFF]|\uD800[\uDC0C\uDC27\uDC3B\uDC3E\uDC4E\uDC4F\uDC5E-\uDC7F\uDCFB-\uDD3F\uDD75-\uDDFC\uDDFE-\uDE7F\uDE9D-\uDE9F\uDED1-\uDEDF\uDEE1-\uDEFF\uDF20-\uDF2C\uDF4B-\uDF4F\uDF7B-\uDF7F\uDF9E\uDF9F\uDFC4-\uDFC7\uDFD0\uDFD6-\uDFFF]|\uD801[\uDC9E\uDC9F\uDCAA-\uDCAF\uDCD4-\uDCD7\uDCFC-\uDCFF\uDD28-\uDD2F\uDD64-\uDDFF\uDF37-\uDF3F\uDF56-\uDF5F\uDF68-\uDFFF]|\uD802[\uDC06\uDC07\uDC09\uDC36\uDC39-\uDC3B\uDC3D\uDC3E\uDC56-\uDC5F\uDC77-\uDC7F\uDC9F-\uDCDF\uDCF3\uDCF6-\uDCFF\uDD16-\uDD1F\uDD3A-\uDD7F\uDDB8-\uDDBD\uDDC0-\uDDFF\uDE04\uDE07-\uDE0B\uDE14\uDE18\uDE36\uDE37\uDE3B-\uDE3E\uDE40-\uDE5F\uDE7D-\uDE7F\uDE9D-\uDEBF\uDEC8\uDEE7-\uDEFF\uDF36-\uDF3F\uDF56-\uDF5F\uDF73-\uDF7F\uDF92-\uDFFF]|\uD803[\uDC49-\uDC7F\uDCB3-\uDCBF\uDCF3-\uDCFF\uDD28-\uDD2F\uDD3A-\uDEFF\uDF1D-\uDF26\uDF28-\uDF2F\uDF51-\uDFDF\uDFF7-\uDFFF]|\uD804[\uDC47-\uDC65\uDC70-\uDC7E\uDCBB-\uDCCF\uDCE9-\uDCEF\uDCFA-\uDCFF\uDD35\uDD40-\uDD43\uDD47-\uDD4F\uDD74\uDD75\uDD77-\uDD7F\uDDC5-\uDDC8\uDDCD-\uDDCF\uDDDB\uDDDD-\uDDFF\uDE12\uDE38-\uDE3D\uDE3F-\uDE7F\uDE87\uDE89\uDE8E\uDE9E\uDEA9-\uDEAF\uDEEB-\uDEEF\uDEFA-\uDEFF\uDF04\uDF0D\uDF0E\uDF11\uDF12\uDF29\uDF31\uDF34\uDF3A\uDF45\uDF46\uDF49\uDF4A\uDF4E\uDF4F\uDF51-\uDF56\uDF58-\uDF5C\uDF64\uDF65\uDF6D-\uDF6F\uDF75-\uDFFF]|\uD805[\uDC4B-\uDC4F\uDC5A-\uDC5D\uDC60-\uDC7F\uDCC6\uDCC8-\uDCCF\uDCDA-\uDD7F\uDDB6\uDDB7\uDDC1-\uDDD7\uDDDE-\uDDFF\uDE41-\uDE43\uDE45-\uDE4F\uDE5A-\uDE7F\uDEB9-\uDEBF\uDECA-\uDEFF\uDF1B\uDF1C\uDF2C-\uDF2F\uDF3A-\uDFFF]|\uD806[\uDC3B-\uDC9F\uDCEA-\uDCFE\uDD00-\uDD9F\uDDA8\uDDA9\uDDD8\uDDD9\uDDE2\uDDE5-\uDDFF\uDE3F-\uDE46\uDE48-\uDE4F\uDE9A-\uDE9C\uDE9E-\uDEBF\uDEF9-\uDFFF]|\uD807[\uDC09\uDC37\uDC41-\uDC4F\uDC5A-\uDC71\uDC90\uDC91\uDCA8\uDCB7-\uDCFF\uDD07\uDD0A\uDD37-\uDD39\uDD3B\uDD3E\uDD48-\uDD4F\uDD5A-\uDD5F\uDD66\uDD69\uDD8F\uDD92\uDD99-\uDD9F\uDDAA-\uDEDF\uDEF7-\uDFFF]|\uD808[\uDF9A-\uDFFF]|\uD809[\uDC6F-\uDC7F\uDD44-\uDFFF]|[\uD80A\uD80B\uD80E-\uD810\uD812-\uD819\uD823-\uD82B\uD82D\uD82E\uD830-\uD833\uD837\uD839\uD83D-\uD83F\uD87B-\uD87D\uD87F-\uDB3F\uDB41-\uDBFF][\uDC00-\uDFFF]|\uD80D[\uDC2F-\uDFFF]|\uD811[\uDE47-\uDFFF]|\uD81A[\uDE39-\uDE3F\uDE5F\uDE6A-\uDECF\uDEEE\uDEEF\uDEF5-\uDEFF\uDF37-\uDF3F\uDF44-\uDF4F\uDF5A-\uDF62\uDF78-\uDF7C\uDF90-\uDFFF]|\uD81B[\uDC00-\uDE3F\uDE80-\uDEFF\uDF4B-\uDF4E\uDF88-\uDF8E\uDFA0-\uDFDF\uDFE2\uDFE4-\uDFFF]|\uD821[\uDFF8-\uDFFF]|\uD822[\uDEF3-\uDFFF]|\uD82C[\uDD1F-\uDD4F\uDD53-\uDD63\uDD68-\uDD6F\uDEFC-\uDFFF]|\uD82F[\uDC6B-\uDC6F\uDC7D-\uDC7F\uDC89-\uDC8F\uDC9A-\uDC9C\uDC9F-\uDFFF]|\uD834[\uDC00-\uDD64\uDD6A-\uDD6C\uDD73-\uDD7A\uDD83\uDD84\uDD8C-\uDDA9\uDDAE-\uDE41\uDE45-\uDFFF]|\uD835[\uDC55\uDC9D\uDCA0\uDCA1\uDCA3\uDCA4\uDCA7\uDCA8\uDCAD\uDCBA\uDCBC\uDCC4\uDD06\uDD0B\uDD0C\uDD15\uDD1D\uDD3A\uDD3F\uDD45\uDD47-\uDD49\uDD51\uDEA6\uDEA7\uDEC1\uDEDB\uDEFB\uDF15\uDF35\uDF4F\uDF6F\uDF89\uDFA9\uDFC3\uDFCC\uDFCD]|\uD836[\uDC00-\uDDFF\uDE37-\uDE3A\uDE6D-\uDE74\uDE76-\uDE83\uDE85-\uDE9A\uDEA0\uDEB0-\uDFFF]|\uD838[\uDC07\uDC19\uDC1A\uDC22\uDC25\uDC2B-\uDCFF\uDD2D-\uDD2F\uDD3E\uDD3F\uDD4A-\uDD4D\uDD4F-\uDEBF\uDEFA-\uDFFF]|\uD83A[\uDCC5-\uDCCF\uDCD7-\uDCFF\uDD4C-\uDD4F\uDD5A-\uDFFF]|\uD83B[\uDC00-\uDDFF\uDE04\uDE20\uDE23\uDE25\uDE26\uDE28\uDE33\uDE38\uDE3A\uDE3C-\uDE41\uDE43-\uDE46\uDE48\uDE4A\uDE4C\uDE50\uDE53\uDE55\uDE56\uDE58\uDE5A\uDE5C\uDE5E\uDE60\uDE63\uDE65\uDE66\uDE6B\uDE73\uDE78\uDE7D\uDE7F\uDE8A\uDE9C-\uDEA0\uDEA4\uDEAA\uDEBC-\uDFFF]|\uD83C[\uDC00-\uDD2F\uDD4A-\uDD4F\uDD6A-\uDD6F\uDD8A-\uDFFF]|\uD869[\uDED7-\uDEFF]|\uD86D[\uDF35-\uDF3F]|\uD86E[\uDC1E\uDC1F]|\uD873[\uDEA2-\uDEAF]|\uD87A[\uDFE1-\uDFFF]|\uD87E[\uDE1E-\uDFFF]|\uDB40[\uDC00-\uDCFF\uDDF0-\uDFFF]/g diff --git a/script/generate-fixtures.mjs b/script/generate-fixtures.mjs new file mode 100644 index 0000000..67b6082 --- /dev/null +++ b/script/generate-fixtures.mjs @@ -0,0 +1,145 @@ +import { promises as fs } from 'node:fs' +import { Octokit } from '@octokit/rest' +import fetch from 'node-fetch' +import { unified } from 'unified' +import rehypeParse from 'rehype-parse' +import { select, selectAll } from 'hast-util-select' +import { toMarkdown } from 'mdast-util-to-markdown' +import { gfmToMarkdown } from 'mdast-util-gfm' + +// Note: the GH token needs `gists` access! +const ghToken = process.env.GH_TOKEN || process.env.GITHUB_TOKEN + +if (!ghToken) { + throw new Error('Missing GitHub token: expected `GH_TOKEN` in env') +} + +const octo = new Octokit({ auth: 'token ' + ghToken }) +const categoryBase = new URL('../node_modules/@unicode/unicode-12.1.0/General_Category/', import.meta.url) + +// Take up to N samples from each category. +const samples = 400 + +const otherTests = [ + { name: 'Basic usage', input: 'alpha' }, + { name: 'Basic usage (again)', input: 'alpha' }, + { name: 'Camelcase', input: 'bravoCharlieDelta' }, + { name: 'Prototypal injection: proto', input: '__proto__' }, + { name: 'Prototypal injection: proto (again)', input: '__proto__' }, + { name: 'Prototypal injection: has own', input: 'hasOwnProperty' }, + { name: 'Repetition (1)', input: 'echo' }, + { name: 'Repetition (2)', input: 'echo' }, + { name: 'Repetition (3)', input: 'echo 1' }, + { name: 'Repetition (4)', input: 'echo-1' }, + { name: 'Repetition (5)', input: 'echo' }, + { name: 'More repetition (1)', input: 'foxtrot-1' }, + { name: 'More repetition (2)', input: 'foxtrot' }, + { name: 'More repetition (3)', input: 'foxtrot' }, + { name: 'Characters: dash', input: 'heading with a - dash' }, + { name: 'Characters: underscore', input: 'heading with an _ underscore' }, + { name: 'Characters: dot', input: 'heading with a period.txt' }, + { name: 'Characters: dots, parents, brackets', input: 'exchange.bind_headers(exchange, routing [, bindCallback])' }, + { name: 'Characters: space', input: ' ', markdownOverwrite: '# ' }, + { name: 'Characters: initial space', input: ' a', markdownOverwrite: '# a' }, + { name: 'Characters: final space', input: 'a ', markdownOverwrite: '# a ' }, + { name: 'Characters: initial and final spaces', input: ' a ', markdownOverwrite: '# a ' }, + { name: 'Characters: initial and final dashes', input: '-a-' }, + { name: 'Characters: apostrophe', input: 'apostrophe’s should be trimmed' }, + { name: 'Some more duplicates (1)', input: 'golf' }, + { name: 'Some more duplicates (2)', input: 'golf' }, + { name: 'Some more duplicates (3)', input: 'golf' }, + { name: 'Non-ascii: ♥', input: 'I ♥ unicode' }, + { name: 'Non-ascii: -', input: 'dash-dash' }, + { name: 'Non-ascii: –', input: 'en–dash' }, + { name: 'Non-ascii: –', input: 'em–dash' }, + { name: 'Non-ascii: 😄', input: '😄 unicode emoji' }, + { name: 'Non-ascii: 😄-😄', input: '😄-😄 unicode emoji' }, + { name: 'Non-ascii: 😄_😄', input: '😄_😄 unicode emoji' }, + { name: 'Non-ascii: 😄', input: '😄 - an emoji' }, + { name: 'Non-ascii: :smile:', input: ':smile: - a gemoji' }, + { name: 'Non-ascii: Cyrillic (1)', input: 'Привет' }, + { name: 'Non-ascii: Cyrillic (2)', input: 'Профили пользователей' }, + { name: 'Non-ascii: Cyrillic + Han', input: 'Привет non-latin 你好' }, + { name: 'Gemoji (1)', input: ':ok: No underscore' }, + { name: 'Gemoji (2)', input: ':ok_hand: Single' }, + { name: 'Gemoji (3)', input: ':ok_hand::hatched_chick: Two in a row with no spaces' }, + { name: 'Gemoji (4)', input: ':ok_hand: :hatched_chick: Two in a row' } +] + +main() + +async function main () { + const files = await fs.readdir(categoryBase) + const tests = [...otherTests] + let index = -1 + + // Create a test case with a bunch of examples. + while (++index < files.length) { + const name = files[index] + + if (name === 'index.js') continue + + // These result in Git(Hub) thinking it’s a binary file. + if (name === 'Control' || name === 'Surrogate') continue + + // This prevents GH from rendering markdown to HTML. + if (name === 'Other') continue + + const fp = `./${name}/code-points.js` + const { default: codePoints } = await import(new URL(fp, categoryBase)) + const subs = [] + + let n = -1 + + while (++n < samples) { + subs.push(codePoints[Math.floor(codePoints.length / samples * n)]) + } + + subs.push(codePoints[codePoints.length - 1]) + + tests.push({ name, input: 'a' + [...new Set(subs)].map(d => String.fromCodePoint(d)).join(' ') + 'b' }) + } + + // Create a Gist. + const filename = 'readme.md' + const gistResult = await octo.gists.create({ + files: { + [filename]: { + content: tests.map(d => { + return d.markdownOverwrite || toMarkdown({ type: 'heading', depth: 1, children: [{ type: 'text', value: d.input }] }, { extensions: [gfmToMarkdown()] }) + }).join('\n\n') + } + } + }) + + const file = gistResult.data.files[filename] + + if (!file.language) { + throw new Error('The generated markdown was seen as binary data instead of text by GitHub. This is likely because there are weird characters (such as control characters or lone surrogates) in it') + } + + // Fetch the rendered page. + const response = await fetch(gistResult.data.html_url, { + headers: { Authorization: 'token ' + ghToken } + }) + + const doc = await response.text() + + // Remove the Gist. + await octo.gists.delete({ gist_id: gistResult.data.id }) + + const tree = unified().use(rehypeParse).parse(doc) + const markdownBody = select('.markdown-body', tree) + + if (!markdownBody) { + throw new Error('The generated markdown could not be rendered by GitHub as HTML. This is likely because there are weird characters in it') + } + + const anchors = selectAll('h1 .anchor', markdownBody) + + anchors.forEach((node, i) => { + tests[i].expected = node.properties.href.slice(1) + }) + + await fs.writeFile(new URL('../test/fixtures.json', import.meta.url), JSON.stringify(tests, null, 2) + '\n') +} diff --git a/script/generate-regex.mjs b/script/generate-regex.mjs new file mode 100644 index 0000000..fac8f42 --- /dev/null +++ b/script/generate-regex.mjs @@ -0,0 +1,62 @@ +import { promises as fs } from 'node:fs' +import regenerate from 'regenerate' +import alphabetics from '@unicode/unicode-12.1.0/Binary_Property/Alphabetic/code-points.js' + +const categoryBase = new URL('../node_modules/@unicode/unicode-12.1.0/General_Category/', import.meta.url) + +// Unicode General Categories to remove. +const ranges = [ + // Some numbers: + 'Other_Number', + + // Some punctuation: + 'Close_Punctuation', + 'Final_Punctuation', + 'Initial_Punctuation', + 'Open_Punctuation', + 'Other_Punctuation', + // All except a normal `-` (dash) + 'Dash_Punctuation', + + // All: + 'Symbol', + 'Control', + 'Private_Use', + 'Format', + 'Unassigned', + + // All except a normal ` ` (space) + 'Separator' +] + +main() + +async function main () { + const generator = regenerate() + + let index = -1 + + // Add code points to strip. + while (++index < ranges.length) { + const name = ranges[index] + const fp = `./${name}/code-points.js` + const { default: codePoints } = await import(new URL(fp, categoryBase)) + + generator.add(codePoints) + } + + generator + // Some overlap between letters and Other Symbol. + .remove(alphabetics) + // Spaces are turned to `-` + .remove(' ') + // Dash is kept. + .remove('-') + + await fs.writeFile('regex.js', [ + '// This module is generated by `script/`.', + '/* eslint-disable no-control-regex, no-misleading-character-class, no-useless-escape */', + 'module.exports = ' + generator.toRegExp() + 'g', + '' + ].join('\n')) +} diff --git a/test/1-basic-usage.md b/test/1-basic-usage.md deleted file mode 100644 index 8419c49..0000000 --- a/test/1-basic-usage.md +++ /dev/null @@ -1,5 +0,0 @@ -# foo - -# foo bar - -# foo diff --git a/test/2-camel-case.md b/test/2-camel-case.md deleted file mode 100644 index 9931fc6..0000000 --- a/test/2-camel-case.md +++ /dev/null @@ -1,5 +0,0 @@ -# foo - -# fooCamelCase - -# fooCamelCase diff --git a/test/3-prototype.md b/test/3-prototype.md deleted file mode 100644 index cefddf3..0000000 --- a/test/3-prototype.md +++ /dev/null @@ -1,7 +0,0 @@ -# `__proto__` - -# `__proto__` - -# hasOwnProperty - -# foo diff --git a/test/4-matching-slugs-basic.md b/test/4-matching-slugs-basic.md deleted file mode 100644 index ac7560d..0000000 --- a/test/4-matching-slugs-basic.md +++ /dev/null @@ -1,9 +0,0 @@ -# foo - -# foo - -# foo 1 - -# foo-1 - -# foo diff --git a/test/5-matching-slugs-again.md b/test/5-matching-slugs-again.md deleted file mode 100644 index 3742215..0000000 --- a/test/5-matching-slugs-again.md +++ /dev/null @@ -1,5 +0,0 @@ -# foo-1 - -# foo - -# foo diff --git a/test/6-characters.md b/test/6-characters.md deleted file mode 100644 index 61d826c..0000000 --- a/test/6-characters.md +++ /dev/null @@ -1,17 +0,0 @@ -# heading with a - dash - -# heading with an _ underscore - -# heading with a period.txt - -# exchange.bind_headers(exchange, routing [, bindCallback]) - -# - -# - -# initial space - -# final space - -# heading with apostrophe’s diff --git a/test/7-duplicates.md b/test/7-duplicates.md deleted file mode 100644 index 1854107..0000000 --- a/test/7-duplicates.md +++ /dev/null @@ -1,5 +0,0 @@ -# duplicates - -# duplicates - -# duplicates diff --git a/test/8-non-ascii.md b/test/8-non-ascii.md deleted file mode 100644 index bed2661..0000000 --- a/test/8-non-ascii.md +++ /dev/null @@ -1,23 +0,0 @@ -# I ♥ unicode - -# Dash-dash - -# en–dash! - -# em–dash - -# 😄 unicode emoji - -# 😄-😄 unicode emoji - -# 😄_😄 unicode emoji - -# 😄 - an emoji - -# :smile: - a gemoji - -# Привет - -# Профили пользователей - -# Привет non-latin 你好 diff --git a/test/9-emoji.md b/test/9-emoji.md deleted file mode 100644 index fd4d2e7..0000000 --- a/test/9-emoji.md +++ /dev/null @@ -1,7 +0,0 @@ -# :ok: No underscore - -# :ok_hand: Single - -# :ok_hand::hatched_chick: Two in a row with no spaces - -# :ok_hand: :hatched_chick: Two in a row diff --git a/test/fixtures.json b/test/fixtures.json new file mode 100644 index 0000000..0c5c75f --- /dev/null +++ b/test/fixtures.json @@ -0,0 +1,396 @@ +[ + { + "name": "Basic usage", + "input": "alpha", + "expected": "alpha" + }, + { + "name": "Basic usage (again)", + "input": "alpha", + "expected": "alpha-1" + }, + { + "name": "Camelcase", + "input": "bravoCharlieDelta", + "expected": "bravocharliedelta" + }, + { + "name": "Prototypal injection: proto", + "input": "__proto__", + "expected": "__proto__" + }, + { + "name": "Prototypal injection: proto (again)", + "input": "__proto__", + "expected": "__proto__-1" + }, + { + "name": "Prototypal injection: has own", + "input": "hasOwnProperty", + "expected": "hasownproperty" + }, + { + "name": "Repetition (1)", + "input": "echo", + "expected": "echo" + }, + { + "name": "Repetition (2)", + "input": "echo", + "expected": "echo-1" + }, + { + "name": "Repetition (3)", + "input": "echo 1", + "expected": "echo-1-1" + }, + { + "name": "Repetition (4)", + "input": "echo-1", + "expected": "echo-1-2" + }, + { + "name": "Repetition (5)", + "input": "echo", + "expected": "echo-2" + }, + { + "name": "More repetition (1)", + "input": "foxtrot-1", + "expected": "foxtrot-1" + }, + { + "name": "More repetition (2)", + "input": "foxtrot", + "expected": "foxtrot" + }, + { + "name": "More repetition (3)", + "input": "foxtrot", + "expected": "foxtrot-2" + }, + { + "name": "Characters: dash", + "input": "heading with a - dash", + "expected": "heading-with-a---dash" + }, + { + "name": "Characters: underscore", + "input": "heading with an _ underscore", + "expected": "heading-with-an-_-underscore" + }, + { + "name": "Characters: dot", + "input": "heading with a period.txt", + "expected": "heading-with-a-periodtxt" + }, + { + "name": "Characters: dots, parents, brackets", + "input": "exchange.bind_headers(exchange, routing [, bindCallback])", + "expected": "exchangebind_headersexchange-routing--bindcallback" + }, + { + "name": "Characters: space", + "input": " ", + "markdownOverwrite": "# ", + "expected": "-" + }, + { + "name": "Characters: initial space", + "input": " a", + "markdownOverwrite": "# a", + "expected": "-a" + }, + { + "name": "Characters: final space", + "input": "a ", + "markdownOverwrite": "# a ", + "expected": "a-" + }, + { + "name": "Characters: initial and final spaces", + "input": " a ", + "markdownOverwrite": "# a ", + "expected": "-a-" + }, + { + "name": "Characters: initial and final dashes", + "input": "-a-", + "expected": "-a--1" + }, + { + "name": "Characters: apostrophe", + "input": "apostrophe’s should be trimmed", + "expected": "apostrophes-should-be-trimmed" + }, + { + "name": "Some more duplicates (1)", + "input": "golf", + "expected": "golf" + }, + { + "name": "Some more duplicates (2)", + "input": "golf", + "expected": "golf-1" + }, + { + "name": "Some more duplicates (3)", + "input": "golf", + "expected": "golf-2" + }, + { + "name": "Non-ascii: ♥", + "input": "I ♥ unicode", + "expected": "i--unicode" + }, + { + "name": "Non-ascii: -", + "input": "dash-dash", + "expected": "dash-dash" + }, + { + "name": "Non-ascii: –", + "input": "en–dash", + "expected": "endash" + }, + { + "name": "Non-ascii: –", + "input": "em–dash", + "expected": "emdash" + }, + { + "name": "Non-ascii: 😄", + "input": "😄 unicode emoji", + "expected": "-unicode-emoji" + }, + { + "name": "Non-ascii: 😄-😄", + "input": "😄-😄 unicode emoji", + "expected": "--unicode-emoji" + }, + { + "name": "Non-ascii: 😄_😄", + "input": "😄_😄 unicode emoji", + "expected": "_-unicode-emoji" + }, + { + "name": "Non-ascii: 😄", + "input": "😄 - an emoji", + "expected": "---an-emoji" + }, + { + "name": "Non-ascii: :smile:", + "input": ":smile: - a gemoji", + "expected": "smile---a-gemoji" + }, + { + "name": "Non-ascii: Cyrillic (1)", + "input": "Привет", + "expected": "привет" + }, + { + "name": "Non-ascii: Cyrillic (2)", + "input": "Профили пользователей", + "expected": "профили-пользователей" + }, + { + "name": "Non-ascii: Cyrillic + Han", + "input": "Привет non-latin 你好", + "expected": "привет-non-latin-你好" + }, + { + "name": "Gemoji (1)", + "input": ":ok: No underscore", + "expected": "ok-no-underscore" + }, + { + "name": "Gemoji (2)", + "input": ":ok_hand: Single", + "expected": "ok_hand-single" + }, + { + "name": "Gemoji (3)", + "input": ":ok_hand::hatched_chick: Two in a row with no spaces", + "expected": "ok_handhatched_chick-two-in-a-row-with-no-spaces" + }, + { + "name": "Gemoji (4)", + "input": ":ok_hand: :hatched_chick: Two in a row", + "expected": "ok_hand-hatched_chick-two-in-a-row" + }, + { + "name": "Cased_Letter", + "input": "aA J T d n x Æ Ð Û å ï ú Ą Ď ė ġ ī ĵ Ŀ ʼn œ ŝ ŧ ű Ż ƅ Ə Ƙ Ƣ Ƭ ƶ Dž Ǐ Ǚ ǣ ǭ Ƿ ȁ ȋ ȕ ȟ Ȩ Ȳ ȼ Ɇ ɐ ɚ ɤ ɮ ɸ ʂ ʌ ʗ ʡ ʪ Ͷ Ό Η Ρ ά ζ π ϊ ϔ Ϟ Ϩ ϲ ϼ І А К Ф Ю и т ь і Ѡ Ѫ Ѵ Ѿ ҏ ҙ ң ҭ ҷ Ӂ Ӌ ӕ ӟ ө ӳ ӽ ԇ Ԑ Ԛ Ԥ Ԯ Թ Ճ Ս ՠ ժ մ վ ֈ Ⴉ Ⴒ Ⴜ Ⴧ ი ტ წ ჶ Ꭲ Ꭼ Ꮆ Ꮐ Ꮚ Ꮤ Ꮮ Ꮷ Ᏹ ᏽ Ა Ლ Ფ Ხ Ჸ ᴄ ᴎ ᴘ ᴢ ᵫ ᵴ ᵿ ᶉ ᶓ Ḃ Ḍ Ḗ Ḡ Ḫ Ḵ Ḿ Ṉ Ṓ ṛ ṥ ṯ ṹ ẃ ẍ ẗ ạ ẫ ẵ ế ỉ ồ ờ Ủ Ự Ỻ ἄ Ἆ Ἒ ἦ ἰ Ἲ ὄ ὒ ὠ Ὢ έ ώ ᾉ ᾓ ᾝ ᾧ ᾱ ᾼ Ή Ὶ Ῠ Ὸ ℍ ℚ ℭ ℾ Ⰰ Ⰺ Ⱄ Ⱎ Ⱘ ⰳ ⰽ ⱇ ⱑ ⱛ ⱦ Ɐ ⱹ ⲅ ⲏ ⲙ ⲣ ⲭ ⲷ ⳁ ⳋ ⳕ ⳟ Ⳳ ⴇ ⴑ ⴛ ⴥ ꙇ ꙑ ꙛ ꙥ ꚁ ꚋ ꚕ ꜥ ꜯ Ꜹ Ꝃ Ꝍ Ꝗ Ꝡ Ꝫ ꝵ ꝿ ꞌ ꞗ ꞡ Ɜ ꞵ ꞿ ꬲ ꬼ ꭆ ꭐ ꭚ ꭱ ꭻ ꮅ ꮏ ꮙ ꮣ ꮭ ꮷ ff ﬖ I S c m w 𐐆 𐐐 𐐚 𐐤 𐐮 𐐸 𐑁 𐑋 𐒵 𐒿 𐓉 𐓓 𐓡 𐓫 𐓵 𐲃 𐲍 𐲗 𐲡 𐲫 𐳁 𐳋 𐳕 𐳟 𐳩 𑢠 𑢪 𑢴 𑢾 𑣈 𑣒 𑣜 𖹆 𖹏 𖹙 𖹣 𖹭 𖹷 𝐁 𝐋 𝐕 𝐟 𝐩 𝐳 𝐽 𝑇 𝑐 𝑛 𝑥 𝑯 𝑹 𝒃 𝒍 𝒗 𝒦 𝒳 𝒿 𝓊 𝓔 𝓞 𝓧 𝓱 𝓻 𝔅 𝔒 𝔞 𝔨 𝔲 𝔽 𝕌 𝕗 𝕡 𝕫 𝕴 𝕾 𝖈 𝖒 𝖜 𝖦 𝖰 𝖺 𝗄 𝗎 𝗘 𝗢 𝗬 𝗵 𝗿 𝘉 𝘓 𝘝 𝘧 𝘱 𝘻 𝙅 𝙏 𝙙 𝙣 𝙭 𝙷 𝚀 𝚊 𝚔 𝚞 𝚪 𝚴 𝚾 𝛉 𝛓 𝛞 𝛨 𝛲 𝛽 𝜆 𝜐 𝜛 𝜥 𝜯 𝜺 𝝄 𝝎 𝝙 𝝣 𝝭 𝝸 𝞂 𝞌 𝞖 𝞠 𝞫 𝞵 𝞿 𝟊 𞤈 𞤒 𞤜 𞤦 𞤰 𞤺 𞥃b", + "expected": "aa-j-t-d-n-x-æ-ð-û-å-ï-ú-ą-ď-ė-ġ-ī-ĵ-ŀ-ʼn-œ-ŝ-ŧ-ű-ż-ƅ-ə-ƙ-ƣ-ƭ-ƶ-dž-ǐ-ǚ-ǣ-ǭ-ƿ-ȁ-ȋ-ȕ-ȟ-ȩ-ȳ-ȼ-ɇ-ɐ-ɚ-ɤ-ɮ-ɸ-ʂ-ʌ-ʗ-ʡ-ʪ-ͷ-ό-η-ρ-ά-ζ-π-ϊ-ϔ-ϟ-ϩ-ϲ-ϼ-і-а-к-ф-ю-и-т-ь-і-ѡ-ѫ-ѵ-ѿ-ҏ-ҙ-ң-ҭ-ҷ-ӂ-ӌ-ӕ-ӟ-ө-ӳ-ӽ-ԇ-ԑ-ԛ-ԥ-ԯ-թ-ճ-ս-ՠ-ժ-մ-վ-ֈ-ⴉ-ⴒ-ⴜ-ⴧ-ი-ტ-წ-ჶ-ꭲ-ꭼ-ꮆ-ꮐ-ꮚ-ꮤ-ꮮ-ꮷ-ᏹ-ᏽ-ა-ლ-ფ-ხ-ჸ-ᴄ-ᴎ-ᴘ-ᴢ-ᵫ-ᵴ-ᵿ-ᶉ-ᶓ-ḃ-ḍ-ḗ-ḡ-ḫ-ḵ-ḿ-ṉ-ṓ-ṛ-ṥ-ṯ-ṹ-ẃ-ẍ-ẗ-ạ-ẫ-ẵ-ế-ỉ-ồ-ờ-ủ-ự-ỻ-ἄ-ἆ-ἒ-ἦ-ἰ-ἲ-ὄ-ὒ-ὠ-ὢ-έ-ώ-ᾁ-ᾓ-ᾕ-ᾧ-ᾱ-ᾳ-ή-ὶ-ῠ-ὸ-ℍ-ℚ-ℭ-ℾ-ⰰ-ⰺ-ⱄ-ⱎ-ⱘ-ⰳ-ⰽ-ⱇ-ⱑ-ⱛ-ⱦ-ɐ-ⱹ-ⲅ-ⲏ-ⲙ-ⲣ-ⲭ-ⲷ-ⳁ-ⳋ-ⳕ-ⳟ-ⳳ-ⴇ-ⴑ-ⴛ-ⴥ-ꙇ-ꙑ-ꙛ-ꙥ-ꚁ-ꚋ-ꚕ-ꜥ-ꜯ-ꜹ-ꝃ-ꝍ-ꝗ-ꝡ-ꝫ-ꝵ-ꝿ-ꞌ-ꞗ-ꞡ-ɜ-ꞵ-ꞿ-ꬲ-ꬼ-ꭆ-ꭐ-ꭚ-ꭱ-ꭻ-ꮅ-ꮏ-ꮙ-ꮣ-ꮭ-ꮷ-ff-ﬖ-i-s-c-m-w-𐐮-𐐸-𐑂-𐑌-𐐮-𐐸-𐑁-𐑋-𐓝-𐓧-𐓱-𐓻-𐓡-𐓫-𐓵-𐳃-𐳍-𐳗-𐳡-𐳫-𐳁-𐳋-𐳕-𐳟-𐳩-𑣀-𑣊-𑣔-𑣞-𑣈-𑣒-𑣜-𖹦-𖹯-𖹹-𖹣-𖹭-𖹷-𝐁-𝐋-𝐕-𝐟-𝐩-𝐳-𝐽-𝑇-𝑐-𝑛-𝑥-𝑯-𝑹-𝒃-𝒍-𝒗-𝒦-𝒳-𝒿-𝓊-𝓔-𝓞-𝓧-𝓱-𝓻-𝔅-𝔒-𝔞-𝔨-𝔲-𝔽-𝕌-𝕗-𝕡-𝕫-𝕴-𝕾-𝖈-𝖒-𝖜-𝖦-𝖰-𝖺-𝗄-𝗎-𝗘-𝗢-𝗬-𝗵-𝗿-𝘉-𝘓-𝘝-𝘧-𝘱-𝘻-𝙅-𝙏-𝙙-𝙣-𝙭-𝙷-𝚀-𝚊-𝚔-𝚞-𝚪-𝚴-𝚾-𝛉-𝛓-𝛞-𝛨-𝛲-𝛽-𝜆-𝜐-𝜛-𝜥-𝜯-𝜺-𝝄-𝝎-𝝙-𝝣-𝝭-𝝸-𝞂-𝞌-𝞖-𝞠-𝞫-𝞵-𝞿-𝟊-𞤪-𞤴-𞤾-𞤦-𞤰-𞤺-𞥃b" + }, + { + "name": "Close_Punctuation", + "input": "a) ] } ༻ ༽ ᚜ ⁆ ⁾ ₎ ⌉ ⌋ 〉 ❩ ❫ ❭ ❯ ❱ ❳ ❵ ⟆ ⟧ ⟩ ⟫ ⟭ ⟯ ⦄ ⦆ ⦈ ⦊ ⦌ ⦎ ⦐ ⦒ ⦔ ⦖ ⦘ ⧙ ⧛ ⧽ ⸣ ⸥ ⸧ ⸩ 〉 》 」 』 】 〕 〗 〙 〛 〞 〟 ﴾ ︘ ︶ ︸ ︺ ︼ ︾ ﹀ ﹂ ﹄ ﹈ ﹚ ﹜ ﹞ ) ] } ⦆ 」b", + "expected": "a------------------------------------------------------------------------b" + }, + { + "name": "Connector_Punctuation", + "input": "a_ ‿ ⁀ ⁔ ︳ ︴ ﹍ ﹎ ﹏ _b", + "expected": "a_-‿-⁀-⁔-︳-︴-﹍-﹎-﹏-_b" + }, + { + "name": "Currency_Symbol", + "input": "a$ ¢ £ ¤ ¥ ֏ ؋ ߾ ߿ ৲ ৳ ৻ ૱ ௹ ฿ ៛ ₠ ₡ ₢ ₣ ₤ ₥ ₦ ₧ ₨ ₩ ₪ ₫ € ₭ ₮ ₯ ₰ ₱ ₲ ₳ ₴ ₵ ₶ ₷ ₸ ₹ ₺ ₻ ₼ ₽ ₾ ₿ ꠸ ﷼ ﹩ $ ¢ £ ¥ ₩ 𑿝 𑿞 𑿟 𑿠 𞋿 𞲰b", + "expected": "a-------------------------------------------------------------b" + }, + { + "name": "Dash_Punctuation", + "input": "a- ֊ ־ ᐀ ᠆ ‐ ‑ ‒ – — ― ⸗ ⸚ ⸺ ⸻ ⹀ 〜 〰 ゠ ︱ ︲ ﹘ ﹣ -b", + "expected": "a------------------------b" + }, + { + "name": "Decimal_Number", + "input": "a0 1 3 4 6 7 9 ١ ٢ ٤ ٥ ٧ ٨ ۰ ۲ ۳ ۵ ۶ ۸ ۹ ߁ ߃ ߄ ߆ ߇ ߉ ० २ ४ ५ ७ ८ ০ ১ ৩ ৫ ৬ ৮ ৯ ੧ ੩ ੪ ੬ ੭ ੯ ૦ ૨ ૪ ૫ ૭ ૮ ୦ ୧ ୩ ୫ ୬ ୮ ୯ ௧ ௨ ௪ ௬ ௭ ௯ ౦ ౨ ౩ ౫ ౭ ౮ ೦ ೧ ೩ ೪ ೬ ೮ ೯ ൧ ൨ ൪ ൬ ൭ ൯ ෦ ෨ ෩ ෫ ෭ ෮ ๐ ๑ ๓ ๔ ๖ ๘ ๙ ໑ ໒ ໔ ໕ ໗ ໙ ༠ ༢ ༣ ༥ ༦ ༨ ၀ ၁ ၃ ၄ ၆ ၇ ၉ ႑ ႒ ႔ ႕ ႗ ႙ ០ ២ ៣ ៥ ៦ ៨ ᠐ ᠑ ᠓ ᠔ ᠖ ᠗ ᠙ ᥇ ᥈ ᥊ ᥋ ᥍ ᥎ ᧐ ᧒ ᧓ ᧕ ᧖ ᧘ ᧙ ᪁ ᪃ ᪄ ᪆ ᪇ ᪉ ᪐ ᪒ ᪔ ᪕ ᪗ ᪘ ᭐ ᭒ ᭓ ᭕ ᭖ ᭘ ᭙ ᮱ ᮳ ᮴ ᮶ ᮷ ᮹ ᱀ ᱂ ᱄ ᱅ ᱇ ᱈ ᱐ ᱑ ᱓ ᱕ ᱖ ᱘ ᱙ ꘡ ꘢ ꘤ ꘦ ꘧ ꘩ ꣐ ꣒ ꣓ ꣕ ꣗ ꣘ ꤀ ꤁ ꤃ ꤅ ꤆ ꤈ ꤉ ꧑ ꧒ ꧔ ꧖ ꧗ ꧙ ꧰ ꧲ ꧳ ꧵ ꧷ ꧸ ꩐ ꩑ ꩓ ꩔ ꩖ ꩘ ꩙ ꯱ ꯲ ꯴ ꯵ ꯷ ꯹ 0 2 3 5 6 8 𐒠 𐒡 𐒣 𐒤 𐒦 𐒨 𐒩 𐴱 𐴲 𐴴 𐴵 𐴷 𐴹 𑁦 𑁨 𑁩 𑁫 𑁬 𑁮 𑃰 𑃱 𑃳 𑃴 𑃶 𑃷 𑃹 𑄷 𑄸 𑄺 𑄻 𑄽 𑄾 𑇐 𑇒 𑇓 𑇕 𑇖 𑇘 𑇙 𑋱 𑋳 𑋴 𑋶 𑋷 𑋹 𑑑 𑑒 𑑔 𑑕 𑑗 𑑘 𑓐 𑓒 𑓓 𑓕 𑓖 𑓘 𑓙 𑙑 𑙓 𑙔 𑙖 𑙗 𑙙 𑛀 𑛂 𑛄 𑛅 𑛇 𑛈 𑜰 𑜱 𑜳 𑜵 𑜶 𑜸 𑜹 𑣡 𑣢 𑣤 𑣦 𑣧 𑣩 𑱐 𑱒 𑱔 𑱕 𑱗 𑱘 𑵐 𑵑 𑵓 𑵕 𑵖 𑵘 𑵙 𑶡 𑶢 𑶤 𑶦 𑶧 𑶩 𖩠 𖩢 𖩣 𖩥 𖩧 𖩨 𖭐 𖭑 𖭓 𖭔 𖭖 𖭘 𖭙 𝟏 𝟐 𝟒 𝟓 𝟕 𝟗 𝟘 𝟚 𝟛 𝟝 𝟟 𝟠 𝟢 𝟣 𝟥 𝟦 𝟨 𝟪 𝟫 𝟭 𝟮 𝟰 𝟱 𝟳 𝟵 𝟶 𝟸 𝟹 𝟻 𝟼 𝟾 𞅀 𞅁 𞅃 𞅄 𞅆 𞅇 𞅉 𞋱 𞋲 𞋴 𞋵 𞋷 𞋸 𞥐 𞥒 𞥓 𞥕 𞥖 𞥘 𞥙b", + "expected": "a0-1-3-4-6-7-9-١-٢-٤-٥-٧-٨-۰-۲-۳-۵-۶-۸-۹-߁-߃-߄-߆-߇-߉-०-२-४-५-७-८-০-১-৩-৫-৬-৮-৯-੧-੩-੪-੬-੭-੯-૦-૨-૪-૫-૭-૮-୦-୧-୩-୫-୬-୮-୯-௧-௨-௪-௬-௭-௯-౦-౨-౩-౫-౭-౮-೦-೧-೩-೪-೬-೮-೯-൧-൨-൪-൬-൭-൯-෦-෨-෩-෫-෭-෮-๐-๑-๓-๔-๖-๘-๙-໑-໒-໔-໕-໗-໙-༠-༢-༣-༥-༦-༨-၀-၁-၃-၄-၆-၇-၉-႑-႒-႔-႕-႗-႙-០-២-៣-៥-៦-៨-᠐-᠑-᠓-᠔-᠖-᠗-᠙-᥇-᥈-᥊-᥋-᥍-᥎-᧐-᧒-᧓-᧕-᧖-᧘-᧙-᪁-᪃-᪄-᪆-᪇-᪉-᪐-᪒-᪔-᪕-᪗-᪘-᭐-᭒-᭓-᭕-᭖-᭘-᭙-᮱-᮳-᮴-᮶-᮷-᮹-᱀-᱂-᱄-᱅-᱇-᱈-᱐-᱑-᱓-᱕-᱖-᱘-᱙-꘡-꘢-꘤-꘦-꘧-꘩-꣐-꣒-꣓-꣕-꣗-꣘-꤀-꤁-꤃-꤅-꤆-꤈-꤉-꧑-꧒-꧔-꧖-꧗-꧙-꧰-꧲-꧳-꧵-꧷-꧸-꩐-꩑-꩓-꩔-꩖-꩘-꩙-꯱-꯲-꯴-꯵-꯷-꯹-0-2-3-5-6-8-𐒠-𐒡-𐒣-𐒤-𐒦-𐒨-𐒩-𐴱-𐴲-𐴴-𐴵-𐴷-𐴹-𑁦-𑁨-𑁩-𑁫-𑁬-𑁮-𑃰-𑃱-𑃳-𑃴-𑃶-𑃷-𑃹-𑄷-𑄸-𑄺-𑄻-𑄽-𑄾-𑇐-𑇒-𑇓-𑇕-𑇖-𑇘-𑇙-𑋱-𑋳-𑋴-𑋶-𑋷-𑋹-𑑑-𑑒-𑑔-𑑕-𑑗-𑑘-𑓐-𑓒-𑓓-𑓕-𑓖-𑓘-𑓙-𑙑-𑙓-𑙔-𑙖-𑙗-𑙙-𑛀-𑛂-𑛄-𑛅-𑛇-𑛈-𑜰-𑜱-𑜳-𑜵-𑜶-𑜸-𑜹-𑣡-𑣢-𑣤-𑣦-𑣧-𑣩-𑱐-𑱒-𑱔-𑱕-𑱗-𑱘-𑵐-𑵑-𑵓-𑵕-𑵖-𑵘-𑵙-𑶡-𑶢-𑶤-𑶦-𑶧-𑶩-𖩠-𖩢-𖩣-𖩥-𖩧-𖩨-𖭐-𖭑-𖭓-𖭔-𖭖-𖭘-𖭙-𝟏-𝟐-𝟒-𝟓-𝟕-𝟗-𝟘-𝟚-𝟛-𝟝-𝟟-𝟠-𝟢-𝟣-𝟥-𝟦-𝟨-𝟪-𝟫-𝟭-𝟮-𝟰-𝟱-𝟳-𝟵-𝟶-𝟸-𝟹-𝟻-𝟼-𝟾-𞅀-𞅁-𞅃-𞅄-𞅆-𞅇-𞅉-𞋱-𞋲-𞋴-𞋵-𞋷-𞋸-𞥐-𞥒-𞥓-𞥕-𞥖-𞥘-𞥙b" + }, + { + "name": "Enclosing_Mark", + "input": "a҈ ҉ ᪾ ⃝ ⃞ ⃟ ⃠ ⃢ ⃣ ⃤ ꙰ ꙱ ꙲b", + "expected": "a҈-҉-᪾-⃝-⃞-⃟-⃠-⃢-⃣-⃤-꙰-꙱-꙲b" + }, + { + "name": "Final_Punctuation", + "input": "a» ’ ” › ⸃ ⸅ ⸊ ⸍ ⸝ ⸡b", + "expected": "a---------b" + }, + { + "name": "Format", + "input": "a­ ؀ ؁ ؂ ؃ ؄ ؅ ؜ ۝ ܏ ࣢ ᠎ ​ ‌ ‍ ‎ ‏ ‪ ‫ ‬ ‭ ‮ ⁠ ⁡ ⁢ ⁣ ⁤ ⁦ ⁧ ⁨ ⁩           𑂽 𑃍 𓐰 𓐱 𓐲 𓐳 𓐴 𓐵 𓐶 𓐷 𓐸 𛲠 𛲡 𛲢 𛲣 𝅳 𝅴 𝅵 𝅶 𝅷 𝅸 𝅹 𝅺 󠀁 󠀠 󠀡 󠀢 󠀣 󠀤 󠀥 󠀦 󠀧 󠀨 󠀩 󠀪 󠀫 󠀬 󠀭 󠀮 󠀯 󠀰 󠀱 󠀲 󠀳 󠀴 󠀵 󠀶 󠀷 󠀸 󠀹 󠀺 󠀻 󠀼 󠀽 󠀾 󠀿 󠁀 󠁁 󠁂 󠁃 󠁄 󠁅 󠁆 󠁇 󠁈 󠁉 󠁊 󠁋 󠁌 󠁍 󠁎 󠁏 󠁐 󠁑 󠁒 󠁓 󠁔 󠁕 󠁖 󠁗 󠁘 󠁙 󠁚 󠁛 󠁜 󠁝 󠁞 󠁟 󠁠 󠁡 󠁢 󠁣 󠁤 󠁥 󠁦 󠁧 󠁨 󠁩 󠁪 󠁫 󠁬 󠁭 󠁮 󠁯 󠁰 󠁱 󠁲 󠁳 󠁴 󠁵 󠁶 󠁷 󠁸 󠁹 󠁺 󠁻 󠁼 󠁽 󠁾 󠁿b", + "expected": "a----------------------------------------------------------------------------------------------------------------------------------------------------------------b" + }, + { + "name": "Initial_Punctuation", + "input": "a« ‘ ‛ “ ‟ ‹ ⸂ ⸄ ⸉ ⸌ ⸜ ⸠b", + "expected": "a-----------b" + }, + { + "name": "Letter", + "input": "aA Dž Ψ ӫ ۊ ऴ ధ ໜ ᅣ ኧ ᐧ ᕡ ᚣ ᢘ ᬒ ᴶ Ằ ℇ ⴢ ル 㑲 㖬 㛦 㠠 㥚 㪔 㯎 㴈 㹃 㽽 䂷 䇱 䌫 䑥 䖟 䛙 䠓 䥍 䪈 䯂 䳼 亀 侺 僴 刮 卨 咢 嗝 圗 塑 妋 嫅 寿 崹 平 徭 惨 戢 捜 撖 旐 朊 桄 楾 檸 毲 洭 湧 澡 烛 爕 獏 璉 痃 盽 砸 祲 窬 篦 素 繚 羔 胎 興 荃 葽 薷 蛱 蠫 襥 誟 诙 贓 蹍 辈 郂 釼 錶 鑰 閪 雤 頞 饘 骓 鯍 鴇 鹁 齻 ꃅ ꇿ ꌹ ꑳ ꗳ ꞕ ꦦ ꮘ 곯 긩 꽣 낝 뇗 댒 둌 떆 뛀 럺 뤴 멮 뮨 볢 븜 뽗 삑 쇋 쌅 쐿 앹 욳 쟭 줧 쩢 쮜 쳖 츐 콊 킄 톾 틸 퐲 항 횧 ퟱ 隷 ﮩ ﴄ ﻯ 𐂲 𐏃 𐕓 𐜪 𐨞 𐲊 𑃝 𑍝 𑠝 𑰧 𒂧 𒇡 𒌜 𒔼 𓄲 𓉬 𓎦 𔒱 𔗫 𖣞 𖨘 𖽁 𗄟 𗉙 𗎓 𗓍 𗘇 𗝁 𗡻 𗦵 𗫯 𗰪 𗵤 𗺞 𗿘 𘄒 𘉌 𘎆 𘓀 𘗺 𘜵 𘡷 𘦱 𘫫 𛅼 𛊶 𝑪 𝖻 𝛹 𞠏 𞹛 𠃬 𠈦 𠍠 𠒚 𠗔 𠜎 𠡈 𠦃 𠪽 𠯷 𠴱 𠹫 𠾥 𡃟 𡈙 𡍓 𡒍 𡗈 𡜂 𡠼 𡥶 𡪰 𡯪 𡴤 𡹞 𡾘 𢃓 𢈍 𢍇 𢒁 𢖻 𢛵 𢠯 𢥩 𢪣 𢯞 𢴘 𢹒 𢾌 𣃆 𣈀 𣌺 𣑴 𣖮 𣛨 𣠣 𣥝 𣪗 𣯑 𣴋 𣹅 𣽿 𤂹 𤇳 𤌮 𤑨 𤖢 𤛜 𤠖 𤥐 𤪊 𤯄 𤳾 𤸹 𤽳 𥂭 𥇧 𥌡 𥑛 𥖕 𥛏 𥠉 𥥃 𥩾 𥮸 𥳲 𥸬 𥽦 𦂠 𦇚 𦌔 𦑎 𦖉 𦛃 𦟽 𦤷 𦩱 𦮫 𦳥 𦸟 𦽙 𧂔 𧇎 𧌈 𧑂 𧕼 𧚶 𧟰 𧤪 𧩤 𧮟 𧳙 𧸓 𧽍 𨂇 𨇁 𨋻 𨐵 𨕯 𨚩 𨟤 𨤞 𨩘 𨮒 𨳌 𨸆 𨽀 𩁺 𩆴 𩋯 𩐩 𩕣 𩚝 𩟗 𩤑 𩩋 𩮅 𩲿 𩷺 𩼴 𪁮 𪆨 𪋢 𪐜 𪕖 𪚐 𪟳 𪤭 𪩨 𪮢 𪳜 𪸖 𪽐 𫂊 𫇄 𫋾 𫐸 𫕳 𫚭 𫟲 𫤮 𫩨 𫮢 𫳜 𫸖 𫽐 𬂋 𬇅 𬋿 𬐹 𬕳 𬚭 𬟧 𬤡 𬩛 𬮕 𬳐 𬸊 𬽒 𭂌 𭇆 𭌀 𭐺 𭕴 𭚮 𭟩 𭤣 𭩝 𭮗 𭳑 𭸋 𭽅 𮁿 𮆹 𮋴 𮐮 𮕨 𮚢 𮟜 𮤖 𮩐 𮮊 𣑭 𪘀b", + "expected": "aa-dž-ψ-ӫ-ۊ-ऴ-ధ-ໜ-ᅣ-ኧ-ᐧ-ᕡ-ᚣ-ᢘ-ᬒ-ᴶ-ằ-ℇ-ⴢ-ル-㑲-㖬-㛦-㠠-㥚-㪔-㯎-㴈-㹃-㽽-䂷-䇱-䌫-䑥-䖟-䛙-䠓-䥍-䪈-䯂-䳼-亀-侺-僴-刮-卨-咢-嗝-圗-塑-妋-嫅-寿-崹-平-徭-惨-戢-捜-撖-旐-朊-桄-楾-檸-毲-洭-湧-澡-烛-爕-獏-璉-痃-盽-砸-祲-窬-篦-素-繚-羔-胎-興-荃-葽-薷-蛱-蠫-襥-誟-诙-贓-蹍-辈-郂-釼-錶-鑰-閪-雤-頞-饘-骓-鯍-鴇-鹁-齻-ꃅ-ꇿ-ꌹ-ꑳ-ꗳ-ꞕ-ꦦ-ꮘ-곯-긩-꽣-낝-뇗-댒-둌-떆-뛀-럺-뤴-멮-뮨-볢-븜-뽗-삑-쇋-쌅-쐿-앹-욳-쟭-줧-쩢-쮜-쳖-츐-콊-킄-톾-틸-퐲-항-횧-ퟱ-隷-ﮩ-ﴄ-ﻯ-𐂲-𐏃-𐕓-𐜪-𐨞-𐳊-𑃝-𑍝-𑠝-𑰧-𒂧-𒇡-𒌜-𒔼-𓄲-𓉬-𓎦-𔒱-𔗫-𖣞-𖨘-𖽁-𗄟-𗉙-𗎓-𗓍-𗘇-𗝁-𗡻-𗦵-𗫯-𗰪-𗵤-𗺞-𗿘-𘄒-𘉌-𘎆-𘓀-𘗺-𘜵-𘡷-𘦱-𘫫-𛅼-𛊶-𝑪-𝖻-𝛹-𞠏-𞹛-𠃬-𠈦-𠍠-𠒚-𠗔-𠜎-𠡈-𠦃-𠪽-𠯷-𠴱-𠹫-𠾥-𡃟-𡈙-𡍓-𡒍-𡗈-𡜂-𡠼-𡥶-𡪰-𡯪-𡴤-𡹞-𡾘-𢃓-𢈍-𢍇-𢒁-𢖻-𢛵-𢠯-𢥩-𢪣-𢯞-𢴘-𢹒-𢾌-𣃆-𣈀-𣌺-𣑴-𣖮-𣛨-𣠣-𣥝-𣪗-𣯑-𣴋-𣹅-𣽿-𤂹-𤇳-𤌮-𤑨-𤖢-𤛜-𤠖-𤥐-𤪊-𤯄-𤳾-𤸹-𤽳-𥂭-𥇧-𥌡-𥑛-𥖕-𥛏-𥠉-𥥃-𥩾-𥮸-𥳲-𥸬-𥽦-𦂠-𦇚-𦌔-𦑎-𦖉-𦛃-𦟽-𦤷-𦩱-𦮫-𦳥-𦸟-𦽙-𧂔-𧇎-𧌈-𧑂-𧕼-𧚶-𧟰-𧤪-𧩤-𧮟-𧳙-𧸓-𧽍-𨂇-𨇁-𨋻-𨐵-𨕯-𨚩-𨟤-𨤞-𨩘-𨮒-𨳌-𨸆-𨽀-𩁺-𩆴-𩋯-𩐩-𩕣-𩚝-𩟗-𩤑-𩩋-𩮅-𩲿-𩷺-𩼴-𪁮-𪆨-𪋢-𪐜-𪕖-𪚐-𪟳-𪤭-𪩨-𪮢-𪳜-𪸖-𪽐-𫂊-𫇄-𫋾-𫐸-𫕳-𫚭-𫟲-𫤮-𫩨-𫮢-𫳜-𫸖-𫽐-𬂋-𬇅-𬋿-𬐹-𬕳-𬚭-𬟧-𬤡-𬩛-𬮕-𬳐-𬸊-𬽒-𭂌-𭇆-𭌀-𭐺-𭕴-𭚮-𭟩-𭤣-𭩝-𭮗-𭳑-𭸋-𭽅-𮁿-𮆹-𮋴-𮐮-𮕨-𮚢-𮟜-𮤖-𮩐-𮮊-𣑭-𪘀b" + }, + { + "name": "Letter_Number", + "input": "aᛮ ᛯ ᛰ Ⅰ Ⅱ Ⅲ Ⅳ Ⅴ Ⅵ Ⅶ Ⅷ Ⅸ Ⅹ Ⅺ Ⅻ Ⅼ Ⅽ Ⅾ Ⅿ ⅰ ⅱ ⅲ ⅳ ⅴ ⅵ ⅶ ⅷ ⅸ ⅹ ⅺ ⅻ ⅼ ⅽ ⅾ ⅿ ↀ ↁ ↂ ↅ ↆ ↇ ↈ 〇 〡 〢 〣 〤 〥 〦 〧 〨 〩 〸 〹 〺 ꛦ ꛧ ꛨ ꛩ ꛪ ꛫ ꛬ ꛭ ꛮ ꛯ 𐅀 𐅁 𐅂 𐅃 𐅄 𐅅 𐅆 𐅇 𐅈 𐅉 𐅊 𐅋 𐅌 𐅍 𐅎 𐅏 𐅐 𐅑 𐅒 𐅓 𐅔 𐅕 𐅖 𐅗 𐅘 𐅙 𐅚 𐅛 𐅜 𐅝 𐅞 𐅟 𐅠 𐅡 𐅢 𐅣 𐅤 𐅥 𐅦 𐅧 𐅨 𐅩 𐅪 𐅫 𐅬 𐅭 𐅮 𐅯 𐅰 𐅱 𐅲 𐅳 𐅴 𐍁 𐍊 𐏑 𐏒 𐏓 𐏔 𐏕 𒐀 𒐁 𒐂 𒐃 𒐄 𒐅 𒐆 𒐇 𒐈 𒐉 𒐊 𒐋 𒐌 𒐍 𒐎 𒐏 𒐐 𒐑 𒐒 𒐓 𒐔 𒐕 𒐖 𒐗 𒐘 𒐙 𒐚 𒐛 𒐜 𒐝 𒐞 𒐟 𒐠 𒐡 𒐢 𒐣 𒐤 𒐥 𒐦 𒐧 𒐨 𒐩 𒐪 𒐫 𒐬 𒐭 𒐮 𒐯 𒐰 𒐱 𒐲 𒐳 𒐴 𒐵 𒐶 𒐷 𒐸 𒐹 𒐺 𒐻 𒐼 𒐽 𒐾 𒐿 𒑀 𒑁 𒑂 𒑃 𒑄 𒑅 𒑆 𒑇 𒑈 𒑉 𒑊 𒑋 𒑌 𒑍 𒑎 𒑏 𒑐 𒑑 𒑒 𒑓 𒑔 𒑕 𒑖 𒑗 𒑘 𒑙 𒑚 𒑛 𒑜 𒑝 𒑞 𒑟 𒑠 𒑡 𒑢 𒑣 𒑤 𒑥 𒑦 𒑧 𒑨 𒑩 𒑪 𒑫 𒑬 𒑭 𒑮b", + "expected": "aᛮ-ᛯ-ᛰ-ⅰ-ⅱ-ⅲ-ⅳ-ⅴ-ⅵ-ⅶ-ⅷ-ⅸ-ⅹ-ⅺ-ⅻ-ⅼ-ⅽ-ⅾ-ⅿ-ⅰ-ⅱ-ⅲ-ⅳ-ⅴ-ⅵ-ⅶ-ⅷ-ⅸ-ⅹ-ⅺ-ⅻ-ⅼ-ⅽ-ⅾ-ⅿ-ↀ-ↁ-ↂ-ↅ-ↆ-ↇ-ↈ-〇-〡-〢-〣-〤-〥-〦-〧-〨-〩-〸-〹-〺-ꛦ-ꛧ-ꛨ-ꛩ-ꛪ-ꛫ-ꛬ-ꛭ-ꛮ-ꛯ-𐅀-𐅁-𐅂-𐅃-𐅄-𐅅-𐅆-𐅇-𐅈-𐅉-𐅊-𐅋-𐅌-𐅍-𐅎-𐅏-𐅐-𐅑-𐅒-𐅓-𐅔-𐅕-𐅖-𐅗-𐅘-𐅙-𐅚-𐅛-𐅜-𐅝-𐅞-𐅟-𐅠-𐅡-𐅢-𐅣-𐅤-𐅥-𐅦-𐅧-𐅨-𐅩-𐅪-𐅫-𐅬-𐅭-𐅮-𐅯-𐅰-𐅱-𐅲-𐅳-𐅴-𐍁-𐍊-𐏑-𐏒-𐏓-𐏔-𐏕-𒐀-𒐁-𒐂-𒐃-𒐄-𒐅-𒐆-𒐇-𒐈-𒐉-𒐊-𒐋-𒐌-𒐍-𒐎-𒐏-𒐐-𒐑-𒐒-𒐓-𒐔-𒐕-𒐖-𒐗-𒐘-𒐙-𒐚-𒐛-𒐜-𒐝-𒐞-𒐟-𒐠-𒐡-𒐢-𒐣-𒐤-𒐥-𒐦-𒐧-𒐨-𒐩-𒐪-𒐫-𒐬-𒐭-𒐮-𒐯-𒐰-𒐱-𒐲-𒐳-𒐴-𒐵-𒐶-𒐷-𒐸-𒐹-𒐺-𒐻-𒐼-𒐽-𒐾-𒐿-𒑀-𒑁-𒑂-𒑃-𒑄-𒑅-𒑆-𒑇-𒑈-𒑉-𒑊-𒑋-𒑌-𒑍-𒑎-𒑏-𒑐-𒑑-𒑒-𒑓-𒑔-𒑕-𒑖-𒑗-𒑘-𒑙-𒑚-𒑛-𒑜-𒑝-𒑞-𒑟-𒑠-𒑡-𒑢-𒑣-𒑤-𒑥-𒑦-𒑧-𒑨-𒑩-𒑪-𒑫-𒑬-𒑭-𒑮b" + }, + { + "name": "Line_Separator", + "input": "a
b", + "expected": "ab" + }, + { + "name": "Lowercase_Letter", + "input": "aa f k q v µ ä é ï ô ú ā ċ ĕ ġ ī ķ ŀ ʼn ŕ ş ũ ŵ ſ ƍ ƛ ƨ ƶ ƿ ǐ ǜ ǥ ǰ ǽ ȇ ȓ ȝ ȧ ȳ ȸ ɇ ɐ ɕ ɛ ɠ ɥ ɫ ɰ ɶ ɻ ʀ ʆ ʋ ʑ ʗ ʜ ʢ ʧ ʬ ͷ ά β η μ ς χ ό ϖ ϟ ϫ ϲ ϼ е к п х ъ ѐ ѕ њ ѡ ѫ ѵ ҁ ғ ҟ ҩ ҳ ҿ ӊ ӓ ӟ ө ӵ ӿ ԉ ԕ ԟ ԩ բ է խ ղ շ ս ւ ֈ ე კ ჟ ფ ჩ ჯ ჴ ჺ ᏹ ᲀ ᲆ ᴂ ᴇ ᴍ ᴒ ᴘ ᴝ ᴢ ᴨ ᵬ ᵱ ᵷ ᵽ ᶃ ᶈ ᶍ ᶓ ᶘ ḅ ḑ ḛ ḧ ḱ ḻ ṇ ṑ ṛ ṧ ṱ ṽ ẇ ẑ ẙ ẟ ẫ ẵ ế ị ổ ở ừ ỵ ἀ ἅ ἒ ἢ ἧ ἴ ὂ ὑ ὗ ὤ ά ί ὼ ᾃ ᾑ ᾖ ᾤ ᾱ ᾷ ῇ ῖ ΰ ῳ ℎ ℼ ⅉ ⰲ ⰸ ⰽ ⱂ ⱈ ⱍ ⱓ ⱘ ⱝ ⱪ ⱶ ⱻ ⲋ ⲕ ⲡ ⲫ ⲵ ⳁ ⳋ ⳗ ⳡ ⳳ ⴅ ⴊ ⴏ ⴕ ⴚ ⴠ ⴥ ꙅ ꙑ ꙛ ꙥ ꚃ ꚍ ꚙ ꜩ ꜱ ꜽ ꝇ ꝑ ꝝ ꝧ ꝲ ꝷ ꞁ ꞑ ꞙ ꞣ ꞷ ꟃ ꬴ ꬹ ꬾ ꭄ ꭉ ꭎ ꭔ ꭙ ꭤ ꭱ ꭶ ꭼ ꮁ ꮆ ꮌ ꮑ ꮗ ꮜ ꮡ ꮧ ꮬ ꮲ ꮷ ꮼ fl ﬓ a g l r w 𐐩 𐐯 𐐴 𐐹 𐐿 𐑄 𐑊 𐑏 𐓜 𐓢 𐓧 𐓬 𐓲 𐓷 𐳁 𐳆 𐳋 𐳑 𐳖 𐳛 𐳡 𐳦 𐳬 𐳱 𑣃 𑣉 𑣎 𑣓 𑣙 𑣞 𖹤 𖹩 𖹮 𖹴 𖹹 𖹿 𝐞 𝐣 𝐩 𝐮 𝐳 𝑓 𝑙 𝑟 𝑤 𝒃 𝒉 𝒎 𝒓 𝒙 𝒸 𝓀 𝓆 𝓋 𝓫 𝓰 𝓵 𝓻 𝔀 𝔠 𝔥 𝔪 𝔰 𝔵 𝕔 𝕚 𝕟 𝕥 𝕪 𝖉 𝖏 𝖔 𝖙 𝖟 𝖾 𝗄 𝗉 𝗎 𝗮 𝗳 𝗸 𝗾 𝘃 𝘣 𝘨 𝘭 𝘳 𝘸 𝙘 𝙝 𝙢 𝙨 𝙭 𝚌 𝚒 𝚗 𝚝 𝚢 𝛃 𝛉 𝛎 𝛓 𝛙 𝛟 𝛿 𝜄 𝜉 𝜏 𝜔 𝜚 𝜺 𝜿 𝝅 𝝊 𝝐 𝝰 𝝵 𝝺 𝞀 𝞅 𝞌 𝞫 𝞰 𝞶 𝞻 𝟀 𝟇 𞤣 𞤩 𞤮 𞤳 𞤹 𞤾 𞥃b", + "expected": "aa-f-k-q-v-µ-ä-é-ï-ô-ú-ā-ċ-ĕ-ġ-ī-ķ-ŀ-ʼn-ŕ-ş-ũ-ŵ-ſ-ƍ-ƛ-ƨ-ƶ-ƿ-ǐ-ǜ-ǥ-ǰ-ǽ-ȇ-ȓ-ȝ-ȧ-ȳ-ȸ-ɇ-ɐ-ɕ-ɛ-ɠ-ɥ-ɫ-ɰ-ɶ-ɻ-ʀ-ʆ-ʋ-ʑ-ʗ-ʜ-ʢ-ʧ-ʬ-ͷ-ά-β-η-μ-ς-χ-ό-ϖ-ϟ-ϫ-ϲ-ϼ-е-к-п-х-ъ-ѐ-ѕ-њ-ѡ-ѫ-ѵ-ҁ-ғ-ҟ-ҩ-ҳ-ҿ-ӊ-ӓ-ӟ-ө-ӵ-ӿ-ԉ-ԕ-ԟ-ԩ-բ-է-խ-ղ-շ-ս-ւ-ֈ-ე-კ-ჟ-ფ-ჩ-ჯ-ჴ-ჺ-ᏹ-ᲀ-ᲆ-ᴂ-ᴇ-ᴍ-ᴒ-ᴘ-ᴝ-ᴢ-ᴨ-ᵬ-ᵱ-ᵷ-ᵽ-ᶃ-ᶈ-ᶍ-ᶓ-ᶘ-ḅ-ḑ-ḛ-ḧ-ḱ-ḻ-ṇ-ṑ-ṛ-ṧ-ṱ-ṽ-ẇ-ẑ-ẙ-ẟ-ẫ-ẵ-ế-ị-ổ-ở-ừ-ỵ-ἀ-ἅ-ἒ-ἢ-ἧ-ἴ-ὂ-ὑ-ὗ-ὤ-ά-ί-ὼ-ᾃ-ᾑ-ᾖ-ᾤ-ᾱ-ᾷ-ῇ-ῖ-ΰ-ῳ-ℎ-ℼ-ⅉ-ⰲ-ⰸ-ⰽ-ⱂ-ⱈ-ⱍ-ⱓ-ⱘ-ⱝ-ⱪ-ⱶ-ⱻ-ⲋ-ⲕ-ⲡ-ⲫ-ⲵ-ⳁ-ⳋ-ⳗ-ⳡ-ⳳ-ⴅ-ⴊ-ⴏ-ⴕ-ⴚ-ⴠ-ⴥ-ꙅ-ꙑ-ꙛ-ꙥ-ꚃ-ꚍ-ꚙ-ꜩ-ꜱ-ꜽ-ꝇ-ꝑ-ꝝ-ꝧ-ꝲ-ꝷ-ꞁ-ꞑ-ꞙ-ꞣ-ꞷ-ꟃ-ꬴ-ꬹ-ꬾ-ꭄ-ꭉ-ꭎ-ꭔ-ꭙ-ꭤ-ꭱ-ꭶ-ꭼ-ꮁ-ꮆ-ꮌ-ꮑ-ꮗ-ꮜ-ꮡ-ꮧ-ꮬ-ꮲ-ꮷ-ꮼ-fl-ﬓ-a-g-l-r-w-𐐩-𐐯-𐐴-𐐹-𐐿-𐑄-𐑊-𐑏-𐓜-𐓢-𐓧-𐓬-𐓲-𐓷-𐳁-𐳆-𐳋-𐳑-𐳖-𐳛-𐳡-𐳦-𐳬-𐳱-𑣃-𑣉-𑣎-𑣓-𑣙-𑣞-𖹤-𖹩-𖹮-𖹴-𖹹-𖹿-𝐞-𝐣-𝐩-𝐮-𝐳-𝑓-𝑙-𝑟-𝑤-𝒃-𝒉-𝒎-𝒓-𝒙-𝒸-𝓀-𝓆-𝓋-𝓫-𝓰-𝓵-𝓻-𝔀-𝔠-𝔥-𝔪-𝔰-𝔵-𝕔-𝕚-𝕟-𝕥-𝕪-𝖉-𝖏-𝖔-𝖙-𝖟-𝖾-𝗄-𝗉-𝗎-𝗮-𝗳-𝗸-𝗾-𝘃-𝘣-𝘨-𝘭-𝘳-𝘸-𝙘-𝙝-𝙢-𝙨-𝙭-𝚌-𝚒-𝚗-𝚝-𝚢-𝛃-𝛉-𝛎-𝛓-𝛙-𝛟-𝛿-𝜄-𝜉-𝜏-𝜔-𝜚-𝜺-𝜿-𝝅-𝝊-𝝐-𝝰-𝝵-𝝺-𝞀-𝞅-𝞌-𝞫-𝞰-𝞶-𝞻-𝟀-𝟇-𞤣-𞤩-𞤮-𞤳-𞤹-𞤾-𞥃b" + }, + { + "name": "Mark", + "input": "à ̅ ̋ ̑ ̖ ̜ ̢ ̧ ̭ ̳ ̸ ̾ ̈́ ͉ ͏ ͕ ͚ ͠ ͦ ͫ ҄ ֑ ֖ ֜ ֢ ֧ ֭ ֳ ָ ֿ ؐ ؕ ً ّ ٖ ٜ ۗ ۜ ۤ ۭ ܳ ܹ ܿ ݄ ݊ ޫ ް ߰ ࠗ ࠝ ࠣ ࠫ ࡛ ࣘ ࣞ ࣤ ࣪ ࣰ ࣵ ࣻ ँ ़ ृ ॉ ॎ ॕ ং ী ৈ ৣ ਼ ੇ ੰ ઃ ૂ ૉ ૣ ૿ ି ୄ ୖ ி ே ௗ ా ృ ో ౣ ಾ ೄ ೌ ೣ ഼ ൃ ൊ ൣ ෑ ෘ ෞ ี ็ ์ ຶ ຼ ໌ ༹ ུ ཹ ཿ ྆ ྐ ྖ ྜྷ ྡྷ ྨ ྮ ླ ྐྵ ာ ေ ့ ွ ၙ ၤ ၬ ၴ ႇ ႍ ႝ ᜔ ᝲ ិ ួ ៃ ៈ ៎ ៝ ᢆ ᤤ ᤪ ᤳ ᤹ ᨚ ᩘ ᩞ ᩥ ᩪ ᩰ ᩶ ᩻ ᪳ ᪹ ᪾ ᬴ ᬺ ᬿ ᭫ ᭱ ᮂ ᮦ ᮬ ᯩ ᯯ ᰥ ᰪ ᰰ ᰶ ᳔ ᳚ ᳠ ᳥ ᳷ ᷃ ᷈ ᷎ ᷔ ᷙ ᷟ ᷥ ᷪ ᷰ ᷶ ᷼ ⃒ ⃘ ⃝ ⃣ ⃩ ⃮ ⵿ ⷥ ⷪ ⷰ ⷶ ⷻ 〫 ゚ ꙴ ꙺ ꛰ ꠤ ꢁ ꢹ ꢿ ꣄ ꣤ ꣪ ꣯ ꤨ ꥇ ꥌ ꥒ ꦳ ꦸ ꦾ ꨫ ꨰ ꨶ ꩽ ꪷ ꫬ ꯣ ꯨ ︀ ︆ ︋ ︡ ︧ ︬ 𐍶 𐨂 𐨍 𐨿 𐴧 𐽊 𐽐 𑀺 𑀿 𑁅 𑂰 𑂵 𑄀 𑄪 𑄯 𑅅 𑆳 𑆸 𑆾 𑇌 𑈰 𑈶 𑋢 𑋧 𑌂 𑍀 𑍇 𑍢 𑍪 𑍲 𑐸 𑐾 𑑃 𑒱 𑒷 𑒼 𑓂 𑖳 𑖺 𑗀 𑘳 𑘸 𑘾 𑚮 𑚳 𑜞 𑜤 𑜩 𑠯 𑠵 𑠺 𑧖 𑧞 𑨂 𑨈 𑨶 𑨼 𑩓 𑩙 𑪌 𑪒 𑪘 𑰲 𑰹 𑰿 𑲖 𑲜 𑲢 𑲧 𑲮 𑲴 𑴴 𑴽 𑵄 𑶍 𑶔 𑻵 𖫴 𖬴 𖽓 𖽙 𖽞 𖽤 𖽪 𖽯 𖽵 𖽻 𖾀 𖾆 𛲝 𝅨 𝅱 𝅿 𝆆 𝆪 𝉄 𝨄 𝨊 𝨐 𝨕 𝨛 𝨡 𝨦 𝨬 𝨲 𝨻 𝩁 𝩇 𝩌 𝩒 𝩘 𝩝 𝩣 𝩩 𝪄 𝪡 𝪧 𝪬 𞀂 𞀉 𞀎 𞀔 𞀜 𞀡 𞀩 𞄴 𞋮 𞣔 𞥇 󠄁 󠄇 󠄍 󠄒 󠄘 󠄞 󠄣 󠄩 󠄯 󠄴 󠄺 󠅀 󠅅 󠅋 󠅑 󠅖 󠅜 󠅢 󠅧 󠅭 󠅳 󠅸 󠅾 󠆄 󠆉 󠆏 󠆕 󠆚 󠆠 󠆦 󠆫 󠆱 󠆷 󠆼 󠇂 󠇈 󠇍 󠇓 󠇙 󠇞 󠇤 󠇪 󠇯b", + "expected": "à-̅-̋-̑-̖-̜-̢-̧-̭-̳-̸-̾-̈́-͉-͏-͕-͚-͠-ͦ-ͫ-҄-֑-֖-֜-֢-֧-֭-ֳ-ָ-ֿ-ؐ-ؕ-ً-ّ-ٖ-ٜ-ۗ-ۜ-ۤ-ۭ-ܳ-ܹ-ܿ-݄-݊-ޫ-ް-߰-ࠗ-ࠝ-ࠣ-ࠫ-࡛-ࣘ-ࣞ-ࣤ-࣪-ࣰ-ࣵ-ࣻ-ँ-़-ृ-ॉ-ॎ-ॕ-ং-ী-ৈ-ৣ-਼-ੇ-ੰ-ઃ-ૂ-ૉ-ૣ-૿-ି-ୄ-ୖ-ி-ே-ௗ-ా-ృ-ో-ౣ-ಾ-ೄ-ೌ-ೣ-഼-ൃ-ൊ-ൣ-ෑ-ෘ-ෞ-ี-็-์-ຶ-ຼ-໌-༹-ུ-ཹ-ཿ-྆-ྐ-ྖ-ྜྷ-ྡྷ-ྨ-ྮ-ླ-ྐྵ-ာ-ေ-့-ွ-ၙ-ၤ-ၬ-ၴ-ႇ-ႍ-ႝ-᜔-ᝲ-ិ-ួ-ៃ-ៈ-៎-៝-ᢆ-ᤤ-ᤪ-ᤳ-᤹-ᨚ-ᩘ-ᩞ-ᩥ-ᩪ-ᩰ-᩶-᩻-᪳-᪹-᪾-᬴-ᬺ-ᬿ-᭫-᭱-ᮂ-ᮦ-ᮬ-ᯩ-ᯯ-ᰥ-ᰪ-ᰰ-ᰶ-᳔-᳚-᳠-᳥-᳷-᷃-᷈-᷎-ᷔ-ᷙ-ᷟ-ᷥ-ᷪ-ᷰ-᷶-᷼-⃒-⃘-⃝-⃣-⃩-⃮-⵿-ⷥ-ⷪ-ⷰ-ⷶ-ⷻ-〫-゚-ꙴ-ꙺ-꛰-ꠤ-ꢁ-ꢹ-ꢿ-꣄-꣤-꣪-꣯-ꤨ-ꥇ-ꥌ-ꥒ-꦳-ꦸ-ꦾ-ꨫ-ꨰ-ꨶ-ꩽ-ꪷ-ꫬ-ꯣ-ꯨ-︀-︆-︋-︡-︧-︬-𐍶-𐨂-𐨍-𐨿-𐴧-𐽊-𐽐-𑀺-𑀿-𑁅-𑂰-𑂵-𑄀-𑄪-𑄯-𑅅-𑆳-𑆸-𑆾-𑇌-𑈰-𑈶-𑋢-𑋧-𑌂-𑍀-𑍇-𑍢-𑍪-𑍲-𑐸-𑐾-𑑃-𑒱-𑒷-𑒼-𑓂-𑖳-𑖺-𑗀-𑘳-𑘸-𑘾-𑚮-𑚳-𑜞-𑜤-𑜩-𑠯-𑠵-𑠺-𑧖-𑧞-𑨂-𑨈-𑨶-𑨼-𑩓-𑩙-𑪌-𑪒-𑪘-𑰲-𑰹-𑰿-𑲖-𑲜-𑲢-𑲧-𑲮-𑲴-𑴴-𑴽-𑵄-𑶍-𑶔-𑻵-𖫴-𖬴-𖽓-𖽙-𖽞-𖽤-𖽪-𖽯-𖽵-𖽻-𖾀-𖾆-𛲝-𝅨-𝅱-𝅿-𝆆-𝆪-𝉄-𝨄-𝨊-𝨐-𝨕-𝨛-𝨡-𝨦-𝨬-𝨲-𝨻-𝩁-𝩇-𝩌-𝩒-𝩘-𝩝-𝩣-𝩩-𝪄-𝪡-𝪧-𝪬-𞀂-𞀉-𞀎-𞀔-𞀜-𞀡-𞀩-𞄴-𞋮-𞣔-𞥇-󠄁-󠄇-󠄍-󠄒-󠄘-󠄞-󠄣-󠄩-󠄯-󠄴-󠄺-󠅀-󠅅-󠅋-󠅑-󠅖-󠅜-󠅢-󠅧-󠅭-󠅳-󠅸-󠅾-󠆄-󠆉-󠆏-󠆕-󠆚-󠆠-󠆦-󠆫-󠆱-󠆷-󠆼-󠇂-󠇈-󠇍-󠇓-󠇙-󠇞-󠇤-󠇪-󠇯b" + }, + { + "name": "Math_Symbol", + "input": "a+ = | ± ÷ ؆ ⁄ ⁺ ⁼ ₌ ⅀ ⅃ ⅋ ↑ ↔ ↛ ↣ ⇎ ⇒ ⇵ ⇷ ⇹ ⇼ ⇾ ∀ ∃ ∅ ∇ ∊ ∌ ∏ ∑ ∓ ∖ ∘ √ ∝ ∟ ∢ ∤ ∦ ∩ ∫ ∭ ∰ ∲ ∵ ∷ ∹ ∼ ∾ ≀ ≃ ≅ ≇ ≊ ≌ ≏ ≑ ≓ ≖ ≘ ≚ ≝ ≟ ≢ ≤ ≦ ≩ ≫ ≭ ≰ ≲ ≵ ≷ ≹ ≼ ≾ ⊀ ⊃ ⊅ ⊇ ⊊ ⊌ ⊏ ⊑ ⊓ ⊖ ⊘ ⊚ ⊝ ⊟ ⊢ ⊤ ⊦ ⊩ ⊫ ⊭ ⊰ ⊲ ⊵ ⊷ ⊹ ⊼ ⊾ ⋀ ⋃ ⋅ ⋇ ⋊ ⋌ ⋏ ⋑ ⋓ ⋖ ⋘ ⋚ ⋝ ⋟ ⋢ ⋤ ⋦ ⋩ ⋫ ⋭ ⋰ ⋲ ⋴ ⋷ ⋹ ⋼ ⋾ ⌠ ⎛ ⎝ ⎟ ⎢ ⎤ ⎧ ⎩ ⎫ ⎮ ⎰ ⎲ ⏝ ⏟ ▷ ◸ ◺ ◽ ◿ ⟀ ⟃ ⟇ ⟉ ⟌ ⟎ ⟑ ⟓ ⟕ ⟘ ⟚ ⟜ ⟟ ⟡ ⟤ ⟰ ⟲ ⟵ ⟷ ⟹ ⟼ ⟾ ⤁ ⤃ ⤅ ⤈ ⤊ ⤌ ⤏ ⤑ ⤓ ⤖ ⤘ ⤛ ⤝ ⤟ ⤢ ⤤ ⤦ ⤩ ⤫ ⤮ ⤰ ⤲ ⤵ ⤷ ⤹ ⤼ ⤾ ⥁ ⥃ ⥅ ⥈ ⥊ ⥌ ⥏ ⥑ ⥓ ⥖ ⥘ ⥛ ⥝ ⥟ ⥢ ⥤ ⥦ ⥩ ⥫ ⥮ ⥰ ⥲ ⥵ ⥷ ⥹ ⥼ ⥾ ⦀ ⦙ ⦛ ⦞ ⦠ ⦢ ⦥ ⦧ ⦩ ⦬ ⦮ ⦱ ⦳ ⦵ ⦸ ⦺ ⦼ ⦿ ⧁ ⧄ ⧆ ⧈ ⧋ ⧍ ⧏ ⧒ ⧔ ⧖ ⧝ ⧟ ⧢ ⧤ ⧦ ⧩ ⧫ ⧭ ⧰ ⧲ ⧵ ⧷ ⧹ ⧾ ⨀ ⨂ ⨅ ⨇ ⨊ ⨌ ⨎ ⨑ ⨓ ⨕ ⨘ ⨚ ⨜ ⨟ ⨡ ⨤ ⨦ ⨨ ⨫ ⨭ ⨯ ⨲ ⨴ ⨷ ⨹ ⨻ ⨾ ⩀ ⩂ ⩅ ⩇ ⩊ ⩌ ⩎ ⩑ ⩓ ⩕ ⩘ ⩚ ⩜ ⩟ ⩡ ⩤ ⩦ ⩨ ⩫ ⩭ ⩯ ⩲ ⩴ ⩷ ⩹ ⩻ ⩾ ⪀ ⪂ ⪅ ⪇ ⪉ ⪌ ⪎ ⪑ ⪓ ⪕ ⪘ ⪚ ⪜ ⪟ ⪡ ⪤ ⪦ ⪨ ⪫ ⪭ ⪯ ⪲ ⪴ ⪷ ⪹ ⪻ ⪾ ⫀ ⫂ ⫅ ⫇ ⫉ ⫌ ⫎ ⫑ ⫓ ⫕ ⫘ ⫚ ⫝̸ ⫟ ⫡ ⫤ ⫦ ⫨ ⫫ ⫭ ⫯ ⫲ ⫴ ⫷ ⫹ ⫻ ⫾ ⬰ ⬲ ⬵ ⬷ ⬹ ⬼ ⬾ ⭁ ⭃ ⭇ ⭊ ⭌ ﹢ ﹦ < | ¬ ↑ 𝛁 𝛻 𝜵 𝞉 𝟃 𞻱b", + "expected": "a----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------b" + }, + { + "name": "Modifier_Letter", + "input": "aʰ ʱ ʲ ʳ ʴ ʵ ʶ ʷ ʸ ʹ ʺ ʻ ʼ ʽ ʾ ʿ ˀ ˁ ˆ ˇ ˈ ˉ ˊ ˋ ˌ ˍ ˎ ˏ ː ˑ ˠ ˡ ˢ ˣ ˤ ˬ ˮ ʹ ͺ ՙ ـ ۥ ۦ ߴ ߵ ߺ ࠚ ࠤ ࠨ ॱ ๆ ໆ ჼ ៗ ᡃ ᪧ ᱸ ᱹ ᱺ ᱻ ᱼ ᱽ ᴬ ᴭ ᴮ ᴯ ᴰ ᴱ ᴲ ᴳ ᴴ ᴵ ᴶ ᴷ ᴸ ᴹ ᴺ ᴻ ᴼ ᴽ ᴾ ᴿ ᵀ ᵁ ᵂ ᵃ ᵄ ᵅ ᵆ ᵇ ᵈ ᵉ ᵊ ᵋ ᵌ ᵍ ᵎ ᵏ ᵐ ᵑ ᵒ ᵓ ᵔ ᵕ ᵖ ᵗ ᵘ ᵙ ᵚ ᵛ ᵜ ᵝ ᵞ ᵟ ᵠ ᵡ ᵢ ᵣ ᵤ ᵥ ᵦ ᵧ ᵨ ᵩ ᵪ ᵸ ᶛ ᶜ ᶝ ᶞ ᶟ ᶠ ᶡ ᶢ ᶣ ᶤ ᶥ ᶦ ᶧ ᶨ ᶩ ᶪ ᶫ ᶬ ᶭ ᶮ ᶯ ᶰ ᶱ ᶲ ᶳ ᶴ ᶵ ᶶ ᶷ ᶸ ᶹ ᶺ ᶻ ᶼ ᶽ ᶾ ᶿ ⁱ ⁿ ₐ ₑ ₒ ₓ ₔ ₕ ₖ ₗ ₘ ₙ ₚ ₛ ₜ ⱼ ⱽ ⵯ ⸯ 々 〱 〲 〳 〴 〵 〻 ゝ ゞ ー ヽ ヾ ꀕ ꓸ ꓹ ꓺ ꓻ ꓼ ꓽ ꘌ ꙿ ꚜ ꚝ ꜗ ꜘ ꜙ ꜚ ꜛ ꜜ ꜝ ꜞ ꜟ ꝰ ꞈ ꟸ ꟹ ꧏ ꧦ ꩰ ꫝ ꫳ ꫴ ꭜ ꭝ ꭞ ꭟ ー ゙ ゚ 𖭀 𖭁 𖭂 𖭃 𖾓 𖾔 𖾕 𖾖 𖾗 𖾘 𖾙 𖾚 𖾛 𖾜 𖾝 𖾞 𖾟 𖿠 𖿡 𖿣 𞄷 𞄸 𞄹 𞄺 𞄻 𞄼 𞄽 𞥋b", + "expected": "aʰ-ʱ-ʲ-ʳ-ʴ-ʵ-ʶ-ʷ-ʸ-ʹ-ʺ-ʻ-ʼ-ʽ-ʾ-ʿ-ˀ-ˁ-ˆ-ˇ-ˈ-ˉ-ˊ-ˋ-ˌ-ˍ-ˎ-ˏ-ː-ˑ-ˠ-ˡ-ˢ-ˣ-ˤ-ˬ-ˮ-ʹ-ͺ-ՙ-ـ-ۥ-ۦ-ߴ-ߵ-ߺ-ࠚ-ࠤ-ࠨ-ॱ-ๆ-ໆ-ჼ-ៗ-ᡃ-ᪧ-ᱸ-ᱹ-ᱺ-ᱻ-ᱼ-ᱽ-ᴬ-ᴭ-ᴮ-ᴯ-ᴰ-ᴱ-ᴲ-ᴳ-ᴴ-ᴵ-ᴶ-ᴷ-ᴸ-ᴹ-ᴺ-ᴻ-ᴼ-ᴽ-ᴾ-ᴿ-ᵀ-ᵁ-ᵂ-ᵃ-ᵄ-ᵅ-ᵆ-ᵇ-ᵈ-ᵉ-ᵊ-ᵋ-ᵌ-ᵍ-ᵎ-ᵏ-ᵐ-ᵑ-ᵒ-ᵓ-ᵔ-ᵕ-ᵖ-ᵗ-ᵘ-ᵙ-ᵚ-ᵛ-ᵜ-ᵝ-ᵞ-ᵟ-ᵠ-ᵡ-ᵢ-ᵣ-ᵤ-ᵥ-ᵦ-ᵧ-ᵨ-ᵩ-ᵪ-ᵸ-ᶛ-ᶜ-ᶝ-ᶞ-ᶟ-ᶠ-ᶡ-ᶢ-ᶣ-ᶤ-ᶥ-ᶦ-ᶧ-ᶨ-ᶩ-ᶪ-ᶫ-ᶬ-ᶭ-ᶮ-ᶯ-ᶰ-ᶱ-ᶲ-ᶳ-ᶴ-ᶵ-ᶶ-ᶷ-ᶸ-ᶹ-ᶺ-ᶻ-ᶼ-ᶽ-ᶾ-ᶿ-ⁱ-ⁿ-ₐ-ₑ-ₒ-ₓ-ₔ-ₕ-ₖ-ₗ-ₘ-ₙ-ₚ-ₛ-ₜ-ⱼ-ⱽ-ⵯ-ⸯ-々-〱-〲-〳-〴-〵-〻-ゝ-ゞ-ー-ヽ-ヾ-ꀕ-ꓸ-ꓹ-ꓺ-ꓻ-ꓼ-ꓽ-ꘌ-ꙿ-ꚜ-ꚝ-ꜗ-ꜘ-ꜙ-ꜚ-ꜛ-ꜜ-ꜝ-ꜞ-ꜟ-ꝰ-ꞈ-ꟸ-ꟹ-ꧏ-ꧦ-ꩰ-ꫝ-ꫳ-ꫴ-ꭜ-ꭝ-ꭞ-ꭟ-ー-゙-゚-𖭀-𖭁-𖭂-𖭃-𖾓-𖾔-𖾕-𖾖-𖾗-𖾘-𖾙-𖾚-𖾛-𖾜-𖾝-𖾞-𖾟-𖿠-𖿡-𖿣-𞄷-𞄸-𞄹-𞄺-𞄻-𞄼-𞄽-𞥋b" + }, + { + "name": "Modifier_Symbol", + "input": "a^ ` ¨ ¯ ´ ¸ ˂ ˃ ˄ ˅ ˒ ˓ ˔ ˕ ˖ ˗ ˘ ˙ ˚ ˛ ˜ ˝ ˞ ˟ ˥ ˦ ˧ ˨ ˩ ˪ ˫ ˭ ˯ ˰ ˱ ˲ ˳ ˴ ˵ ˶ ˷ ˸ ˹ ˺ ˻ ˼ ˽ ˾ ˿ ͵ ΄ ΅ ᾽ ᾿ ῀ ῁ ῍ ῎ ῏ ῝ ῞ ῟ ῭ ΅ ` ´ ῾ ゛ ゜ ꜀ ꜁ ꜂ ꜃ ꜄ ꜅ ꜆ ꜇ ꜈ ꜉ ꜊ ꜋ ꜌ ꜍ ꜎ ꜏ ꜐ ꜑ ꜒ ꜓ ꜔ ꜕ ꜖ ꜠ ꜡ ꞉ ꞊ ꭛ ﮲ ﮳ ﮴ ﮵ ﮶ ﮷ ﮸ ﮹ ﮺ ﮻ ﮼ ﮽ ﮾ ﮿ ﯀ ﯁ ^ `  ̄ 🏻 🏼 🏽 🏾 🏿b", + "expected": "a------------------------------------------------------------------------------------------------------------------------b" + }, + { + "name": "Nonspacing_Mark", + "input": "à ̄ ̉ ̍ ̒ ̖ ̛ ̟ ̤ ̩ ̭ ̲ ̶ ̻ ̿ ̈́ ͉ ͍ ͒ ͖ ͛ ͟ ͤ ͨ ͭ ҅ ֒ ֗ ֛ ֠ ֤ ֩ ֮ ֲ ַ ֻ ׂ ؐ ؕ ؚ َ ٓ ٗ ٜ ٰ ۚ ۠ ۧ ۭ ܲ ܷ ܻ ݀ ݄ ݉ ީ ޭ ߬ ߰ ࠖ ࠛ ࠠ ࠦ ࠫ ࡛ ࣖ ࣛ ࣟ ࣥ ࣩ ࣮ ࣳ ࣷ ࣼ ऀ ु ॅ ॑ ॖ ঁ ৄ ৾ ੂ ੌ ੵ ૂ ે ૺ ૾ ୁ ୍ ீ ా ై ౕ ಁ ್ ഁ ൃ ൣ ූ ื ็ ์ ິ ູ ່ ໍ ༹ ུ ཹ ཽ ྃ ྍ ྒ ྖ ྜ ྡ ྥ ྪ ྮ ླ ྷ ྼ ူ ဵ ွ ၞ ၳ ႆ ፟ ᜳ ᝲ ី ូ ់ ៏ ៝ ᢅ ᤢ ᤺ ᨛ ᩛ ᩠ ᩨ ᩬ ᩷ ᩼ ᪲ ᪷ ᪻ ᬂ ᬷ ᭂ ᭯ ᭳ ᮤ ᮫ ᯩ ᯱ ᰰ ᰶ ᳔ ᳙ ᳝ ᳣ ᳧ ᳹ ᷃ ᷈ ᷍ ᷑ ᷖ ᷚ ᷟ ᷣ ᷨ ᷭ ᷱ ᷶ ᷻ ⃐ ⃔ ⃙ ⃡ ⃩ ⃮ ⳰ ⷢ ⷦ ⷫ ⷯ ⷴ ⷹ ⷽ 〬 ꙯ ꙸ ꙼ ꛱ ꠦ ꣡ ꣦ ꣪ ꣯ ꤦ ꤫ ꥈ ꥍ ꦀ ꦶ ꦽ ꨫ ꨲ ꩌ ꪴ ꫁ ꯥ ︁ ︅ ︊ ︎ ︣ ︧ ︬ 𐋠 𐍹 𐨅 𐨎 𐨿 𐴥 𐽈 𐽍 𑀁 𑀼 𑁀 𑁅 𑂁 𑂹 𑄧 𑄫 𑄱 𑅳 𑆸 𑆼 𑇋 𑈱 𑋟 𑋧 𑌀 𑍦 𑍪 𑍲 𑐹 𑐾 𑑆 𑒵 𑒿 𑖲 𑖽 𑗝 𑘷 𑘿 𑚰 𑚵 𑜟 𑜧 𑜫 𑠳 𑠷 𑧖 𑨁 𑨅 𑨊 𑨶 𑨽 𑩒 𑩙 𑪌 𑪐 𑪕 𑰰 𑰵 𑰺 𑲒 𑲗 𑲛 𑲠 𑲤 𑲫 𑲯 𑲶 𑴴 𑴽 𑵃 𑶐 𑻴 𖫳 𖬳 𖽏 𛲝 𝅻 𝅿 𝆆 𝆊 𝆭 𝨀 𝨅 𝨊 𝨎 𝨓 𝨗 𝨜 𝨠 𝨥 𝨩 𝨮 𝨳 𝨻 𝩀 𝩄 𝩉 𝩍 𝩒 𝩗 𝩛 𝩠 𝩤 𝩩 𝩵 𝪞 𝪤 𝪨 𝪭 𞀁 𞀆 𞀋 𞀐 𞀔 𞀛 𞀠 𞀦 𞄰 𞄴 𞋮 𞣒 𞥄 𞥉 󠄂 󠄇 󠄋 󠄐 󠄔 󠄙 󠄞 󠄢 󠄧 󠄫 󠄰 󠄴 󠄹 󠄽 󠅂 󠅇 󠅋 󠅐 󠅔 󠅙 󠅝 󠅢 󠅧 󠅫 󠅰 󠅴 󠅹 󠅽 󠆂 󠆇 󠆋 󠆐 󠆔 󠆙 󠆝 󠆢 󠆦 󠆫 󠆰 󠆴 󠆹 󠆽 󠇂 󠇆 󠇋 󠇐 󠇔 󠇙 󠇝 󠇢 󠇦 󠇫 󠇯b", + "expected": "à-̄-̉-̍-̒-̖-̛-̟-̤-̩-̭-̲-̶-̻-̿-̈́-͉-͍-͒-͖-͛-͟-ͤ-ͨ-ͭ-҅-֒-֗-֛-֠-֤-֩-֮-ֲ-ַ-ֻ-ׂ-ؐ-ؕ-ؚ-َ-ٓ-ٗ-ٜ-ٰ-ۚ-۠-ۧ-ۭ-ܲ-ܷ-ܻ-݀-݄-݉-ީ-ޭ-߬-߰-ࠖ-ࠛ-ࠠ-ࠦ-ࠫ-࡛-ࣖ-ࣛ-ࣟ-ࣥ-ࣩ-࣮-ࣳ-ࣷ-ࣼ-ऀ-ु-ॅ-॑-ॖ-ঁ-ৄ-৾-ੂ-ੌ-ੵ-ૂ-ે-ૺ-૾-ୁ-୍-ீ-ా-ై-ౕ-ಁ-್-ഁ-ൃ-ൣ-ූ-ื-็-์-ິ-ູ-່-ໍ-༹-ུ-ཹ-ཽ-ྃ-ྍ-ྒ-ྖ-ྜ-ྡ-ྥ-ྪ-ྮ-ླ-ྷ-ྼ-ူ-ဵ-ွ-ၞ-ၳ-ႆ-፟-ᜳ-ᝲ-ី-ូ-់-៏-៝-ᢅ-ᤢ-᤺-ᨛ-ᩛ-᩠-ᩨ-ᩬ-᩷-᩼-᪲-᪷-᪻-ᬂ-ᬷ-ᭂ-᭯-᭳-ᮤ-᮫-ᯩ-ᯱ-ᰰ-ᰶ-᳔-᳙-᳝-᳣-᳧-᳹-᷃-᷈-᷍-᷑-ᷖ-ᷚ-ᷟ-ᷣ-ᷨ-ᷭ-ᷱ-᷶-᷻-⃐-⃔-⃙-⃡-⃩-⃮-⳰-ⷢ-ⷦ-ⷫ-ⷯ-ⷴ-ⷹ-ⷽ-〬-꙯-ꙸ-꙼-꛱-ꠦ-꣡-꣦-꣪-꣯-ꤦ-꤫-ꥈ-ꥍ-ꦀ-ꦶ-ꦽ-ꨫ-ꨲ-ꩌ-ꪴ-꫁-ꯥ-︁-︅-︊-︎-︣-︧-︬-𐋠-𐍹-𐨅-𐨎-𐨿-𐴥-𐽈-𐽍-𑀁-𑀼-𑁀-𑁅-𑂁-𑂹-𑄧-𑄫-𑄱-𑅳-𑆸-𑆼-𑇋-𑈱-𑋟-𑋧-𑌀-𑍦-𑍪-𑍲-𑐹-𑐾-𑑆-𑒵-𑒿-𑖲-𑖽-𑗝-𑘷-𑘿-𑚰-𑚵-𑜟-𑜧-𑜫-𑠳-𑠷-𑧖-𑨁-𑨅-𑨊-𑨶-𑨽-𑩒-𑩙-𑪌-𑪐-𑪕-𑰰-𑰵-𑰺-𑲒-𑲗-𑲛-𑲠-𑲤-𑲫-𑲯-𑲶-𑴴-𑴽-𑵃-𑶐-𑻴-𖫳-𖬳-𖽏-𛲝-𝅻-𝅿-𝆆-𝆊-𝆭-𝨀-𝨅-𝨊-𝨎-𝨓-𝨗-𝨜-𝨠-𝨥-𝨩-𝨮-𝨳-𝨻-𝩀-𝩄-𝩉-𝩍-𝩒-𝩗-𝩛-𝩠-𝩤-𝩩-𝩵-𝪞-𝪤-𝪨-𝪭-𞀁-𞀆-𞀋-𞀐-𞀔-𞀛-𞀠-𞀦-𞄰-𞄴-𞋮-𞣒-𞥄-𞥉-󠄂-󠄇-󠄋-󠄐-󠄔-󠄙-󠄞-󠄢-󠄧-󠄫-󠄰-󠄴-󠄹-󠄽-󠅂-󠅇-󠅋-󠅐-󠅔-󠅙-󠅝-󠅢-󠅧-󠅫-󠅰-󠅴-󠅹-󠅽-󠆂-󠆇-󠆋-󠆐-󠆔-󠆙-󠆝-󠆢-󠆦-󠆫-󠆰-󠆴-󠆹-󠆽-󠇂-󠇆-󠇋-󠇐-󠇔-󠇙-󠇝-󠇢-󠇦-󠇫-󠇯b" + }, + { + "name": "Number", + "input": "a0 4 8 ¼ ١ ٥ ۰ ۴ ۹ ߃ ߇ २ ६ ১ ৫ ৯ ৸ ੨ ੬ ૧ ૫ ୦ ୪ ୮ ୵ ௧ ௬ ௰ ౧ ౬ ౸ ౼ ೨ ೬ ൙ ൝ ൨ ൭ ൱ ൶ ෧ ෫ ๐ ๔ ๘ ໓ ໗ ༢ ༦ ༪ ༯ ༳ ၄ ၈ ႒ ႗ ፪ ፮ ፳ ፷ ፼ ០ ៤ ៩ ៳ ៸ ᠒ ᠖ ᥇ ᥋ ᥏ ᧔ ᧘ ᪂ ᪆ ᪐ ᪕ ᪙ ᭔ ᭘ ᮲ ᮷ ᱁ ᱅ ᱐ ᱔ ᱙ ⁶ ₀ ₅ ₉ ⅔ ⅘ ⅜ Ⅱ Ⅵ Ⅹ Ⅾ ⅲ ⅷ ⅻ ⅿ ↆ ① ⑥ ⑩ ⑭ ⑲ ⑶ ⑺ ⑿ ⒃ ⒈ ⒌ ⒐ ⒕ ⒙ ⓬ ⓰ ⓴ ⓹ ⓽ ❷ ❼ ➀ ➅ ➉ ➍ ➒ 〡 〦 〸 ㆓ ㈢ ㈦ ㉈ ㉍ ㉒ ㉗ ㉛ ㉟ ㊄ ㊈ ㊴ ㊸ ㊼ ꘡ ꘥ ꘩ ꛪ ꛮ ꠳ ꣑ ꣕ ꤀ ꤄ ꤉ ꧓ ꧗ ꧲ ꧶ ꩐ ꩕ ꩙ ꯴ ꯸ 2 7 𐄈 𐄍 𐄑 𐄕 𐄚 𐄞 𐄢 𐄧 𐄫 𐄰 𐅀 𐅄 𐅉 𐅍 𐅒 𐅖 𐅚 𐅟 𐅣 𐅧 𐅬 𐅰 𐅵 𐆊 𐋣 𐋨 𐋬 𐋱 𐋵 𐋹 𐌢 𐏑 𐒠 𐒤 𐒨 𐡛 𐡟 𐡼 𐢨 𐢬 𐣼 𐤖 𐤚 𐧁 𐧅 𐧊 𐧎 𐧔 𐧙 𐧝 𐧡 𐧦 𐧪 𐧯 𐧳 𐧷 𐧼 𐩀 𐩅 𐩽 𐪟 𐫯 𐭛 𐭟 𐭼 𐮩 𐮮 𐳼 𐴰 𐴵 𐴹 𐹤 𐹨 𐹬 𐹱 𐹵 𐹹 𐹾 𐼠 𐼥 𐽓 𑁔 𑁙 𑁝 𑁢 𑁦 𑁪 𑁯 𑃳 𑃷 𑄸 𑄼 𑇑 𑇕 𑇙 𑇥 𑇩 𑇮 𑇲 𑋱 𑋶 𑑐 𑑔 𑑙 𑓓 𑓘 𑙒 𑙖 𑛁 𑛅 𑜰 𑜴 𑜸 𑣡 𑣥 𑣩 𑣮 𑣲 𑱔 𑱘 𑱜 𑱡 𑱥 𑱪 𑵑 𑵕 𑶠 𑶤 𑶨 𑿃 𑿇 𑿌 𑿐 𑿔 𒐄 𒐈 𒐍 𒐑 𒐕 𒐚 𒐞 𒐢 𒐧 𒐫 𒐰 𒐴 𒐸 𒐽 𒑁 𒑆 𒑊 𒑎 𒑓 𒑗 𒑛 𒑠 𒑤 𒑩 𒑭 𖩢 𖩧 𖭑 𖭖 𖭛 𖭟 𖺂 𖺆 𖺊 𖺏 𖺓 𝋡 𝋥 𝋩 𝋮 𝋲 𝍣 𝍧 𝍫 𝍰 𝍴 𝍸 𝟒 𝟖 𝟛 𝟟 𝟣 𝟨 𝟬 𝟱 𝟵 𝟹 𝟾 𞅂 𞅆 𞋱 𞋵 𞣇 𞣋 𞣏 𞥔 𞥘 𞱴 𞱸 𞱼 𞲁 𞲅 𞲉 𞲎 𞲒 𞲗 𞲛 𞲟 𞲤 𞲨 𞲮 𞲳 𞴃 𞴈 𞴌 𞴐 𞴕 𞴙 𞴞 𞴢 𞴦 𞴫 𞴰 𞴵 𞴹 𞴽 🄄 🄈 🄌b", + "expected": "a0-4-8--١-٥-۰-۴-۹-߃-߇-२-६-১-৫-৯--੨-੬-૧-૫-୦-୪-୮--௧-௬--౧-౬---೨-೬---൨-൭---෧-෫-๐-๔-๘-໓-໗-༢-༦----၄-၈-႒-႗------០-៤-៩---᠒-᠖-᥇-᥋-᥏-᧔-᧘-᪂-᪆-᪐-᪕-᪙-᭔-᭘-᮲-᮷-᱁-᱅-᱐-᱔-᱙--------ⅱ-ⅵ-ⅹ-ⅾ-ⅲ-ⅷ-ⅻ-ⅿ-ↆ---------------------------〡-〦-〸---------------꘡-꘥-꘩-ꛪ-ꛮ--꣑-꣕-꤀-꤄-꤉-꧓-꧗-꧲-꧶-꩐-꩕-꩙-꯴-꯸-2-7-----------𐅀-𐅄-𐅉-𐅍-𐅒-𐅖-𐅚-𐅟-𐅣-𐅧-𐅬-𐅰----------𐏑-𐒠-𐒤-𐒨----------------------------------𐴰-𐴵-𐴹---------------𑁦-𑁪-𑁯-𑃳-𑃷-𑄸-𑄼-𑇑-𑇕-𑇙-----𑋱-𑋶-𑑐-𑑔-𑑙-𑓓-𑓘-𑙒-𑙖-𑛁-𑛅-𑜰-𑜴-𑜸-𑣡-𑣥-𑣩---𑱔-𑱘-----𑵑-𑵕-𑶠-𑶤-𑶨------𒐄-𒐈-𒐍-𒐑-𒐕-𒐚-𒐞-𒐢-𒐧-𒐫-𒐰-𒐴-𒐸-𒐽-𒑁-𒑆-𒑊-𒑎-𒑓-𒑗-𒑛-𒑠-𒑤-𒑩-𒑭-𖩢-𖩧-𖭑-𖭖-------------------𝟒-𝟖-𝟛-𝟟-𝟣-𝟨-𝟬-𝟱-𝟵-𝟹-𝟾-𞅂-𞅆-𞋱-𞋵----𞥔-𞥘--------------------------------b" + }, + { + "name": "Open_Punctuation", + "input": "a( [ { ༺ ༼ ᚛ ‚ „ ⁅ ⁽ ₍ ⌈ ⌊ 〈 ❨ ❪ ❬ ❮ ❰ ❲ ❴ ⟅ ⟦ ⟨ ⟪ ⟬ ⟮ ⦃ ⦅ ⦇ ⦉ ⦋ ⦍ ⦏ ⦑ ⦓ ⦕ ⦗ ⧘ ⧚ ⧼ ⸢ ⸤ ⸦ ⸨ ⹂ 〈 《 「 『 【 〔 〖 〘 〚 〝 ﴿ ︗ ︵ ︷ ︹ ︻ ︽ ︿ ﹁ ﹃ ﹇ ﹙ ﹛ ﹝ ( [ { ⦅ 「b", + "expected": "a--------------------------------------------------------------------------b" + }, + { + "name": "Other_Letter", + "input": "aª ޠ ਹ മ ၮ ሞ ᎈ ᔨ ᙘ ᡂ ᨄ ⴿ ㄉ 㑿 㖯 㛟 㠎 㤾 㩭 㮝 㳌 㷼 㼫 䁛 䆊 䊺 䏩 䔙 䙈 䝸 䢨 䧗 䬇 䰶 䵦 仟 倏 儾 剮 厝 响 嗼 圬 塜 妋 媻 寪 崚 幉 役 您 懘 指 搷 敦 暖 柅 棵 樥 歔 沄 涳 滣 瀒 煂 牱 玡 瓐 瘀 眯 硟 禎 窾 篮 紝 繍 罼 肬 臛 茋 萺 蕪 蚙 蟉 裸 訨 識 貇 趷 軦 逖 酅 鉵 鎤 铔 阃 霳 顢 馒 髁 鯱 鴡 鹐 龀 ꃀ ꇰ ꌟ ꑏ ꗉ ꣽ ꬉ 곳 긢 꽒 낁 놱 닡 됐 땀 뙯 랟 룎 맾 묭 뱝 붌 뺼 뿫 섛 쉋 썺 쒪 엙 윉 져 쥨 쪗 쯇 쳶 츦 콕 킅 톴 틤 퐔 핃 홳 힢 罹 ﭡ ﲲ ﺒ 𐂂 𐎆 𐙂 𐠟 𐪈 𐼘 𑈃 𑖒 𑩩 𒀜 𒅋 𒉻 𒒐 𓁼 𓆫 𓋛 𓐊 𔔋 𔘺 𖤣 𖩚 𗁨 𗆘 𗋇 𗏷 𗔦 𗙖 𗞅 𗢵 𗧤 𗬔 𗱃 𗵳 𗺢 𗿒 𘄂 𘈱 𘍡 𘒐 𘗀 𘛯 𘠧 𘥖 𘪆 𛃂 𛈼 𛱴 𞢺 𠂘 𠇇 𠋷 𠐦 𠕖 𠚅 𠞵 𠣤 𠨔 𠭃 𠱳 𠶢 𠻒 𡀁 𡄱 𡉡 𡎐 𡓀 𡗯 𡜟 𡡎 𡥾 𡪭 𡯝 𡴌 𡸼 𡽫 𢂛 𢇊 𢋺 𢐪 𢕙 𢚉 𢞸 𢣨 𢨗 𢭇 𢱶 𢶦 𢻕 𣀅 𣄴 𣉤 𣎔 𣓃 𣗳 𣜢 𣡒 𣦁 𣪱 𣯠 𣴐 𣸿 𣽯 𤂞 𤇎 𤋽 𤐭 𤕝 𤚌 𤞼 𤣫 𤨛 𤭊 𤱺 𤶩 𤻙 𥀈 𥄸 𥉧 𥎗 𥓆 𥗶 𥜦 𥡕 𥦅 𥪴 𥯤 𥴓 𥹃 𥽲 𦂢 𦇑 𦌁 𦐰 𦕠 𦚐 𦞿 𦣯 𦨞 𦭎 𦱽 𦶭 𦻜 𧀌 𧄻 𧉫 𧎚 𧓊 𧗹 𧜩 𧡙 𧦈 𧪸 𧯧 𧴗 𧹆 𧽶 𨂥 𨇕 𨌄 𨐴 𨕣 𨚓 𨟂 𨣲 𨨢 𨭑 𨲁 𨶰 𨻠 𩀏 𩄿 𩉮 𩎞 𩓍 𩗽 𩜬 𩡜 𩦌 𩪻 𩯫 𩴚 𩹊 𩽹 𪂩 𪇘 𪌈 𪐷 𪕧 𪚖 𪟯 𪤞 𪩎 𪭾 𪲭 𪷝 𪼌 𫀼 𫅫 𫊛 𫏊 𫓺 𫘩 𫝤 𫢕 𫧅 𫫴 𫰤 𫵔 𫺃 𫾳 𬃢 𬈒 𬍁 𬑱 𬖠 𬛐 𬟿 𬤯 𬩞 𬮎 𬲾 𬷭 𬼫 𭁚 𭆊 𭊹 𭏩 𭔘 𭙈 𭝷 𭢧 𭧖 𭬆 𭰵 𭵥 𭺕 𭿄 𮃴 𮈣 𮍓 𮒂 𮖲 𮛡 𮠑 𮥀 𮩰 𮮟 㰘 𪘀b", + "expected": "aª-ޠ-ਹ-മ-ၮ-ሞ-ᎈ-ᔨ-ᙘ-ᡂ-ᨄ-ⴿ-ㄉ-㑿-㖯-㛟-㠎-㤾-㩭-㮝-㳌-㷼-㼫-䁛-䆊-䊺-䏩-䔙-䙈-䝸-䢨-䧗-䬇-䰶-䵦-仟-倏-儾-剮-厝-响-嗼-圬-塜-妋-媻-寪-崚-幉-役-您-懘-指-搷-敦-暖-柅-棵-樥-歔-沄-涳-滣-瀒-煂-牱-玡-瓐-瘀-眯-硟-禎-窾-篮-紝-繍-罼-肬-臛-茋-萺-蕪-蚙-蟉-裸-訨-識-貇-趷-軦-逖-酅-鉵-鎤-铔-阃-霳-顢-馒-髁-鯱-鴡-鹐-龀-ꃀ-ꇰ-ꌟ-ꑏ-ꗉ-ꣽ-ꬉ-곳-긢-꽒-낁-놱-닡-됐-땀-뙯-랟-룎-맾-묭-뱝-붌-뺼-뿫-섛-쉋-썺-쒪-엙-윉-져-쥨-쪗-쯇-쳶-츦-콕-킅-톴-틤-퐔-핃-홳-힢-罹-ﭡ-ﲲ-ﺒ-𐂂-𐎆-𐙂-𐠟-𐪈-𐼘-𑈃-𑖒-𑩩-𒀜-𒅋-𒉻-𒒐-𓁼-𓆫-𓋛-𓐊-𔔋-𔘺-𖤣-𖩚-𗁨-𗆘-𗋇-𗏷-𗔦-𗙖-𗞅-𗢵-𗧤-𗬔-𗱃-𗵳-𗺢-𗿒-𘄂-𘈱-𘍡-𘒐-𘗀-𘛯-𘠧-𘥖-𘪆-𛃂-𛈼-𛱴-𞢺-𠂘-𠇇-𠋷-𠐦-𠕖-𠚅-𠞵-𠣤-𠨔-𠭃-𠱳-𠶢-𠻒-𡀁-𡄱-𡉡-𡎐-𡓀-𡗯-𡜟-𡡎-𡥾-𡪭-𡯝-𡴌-𡸼-𡽫-𢂛-𢇊-𢋺-𢐪-𢕙-𢚉-𢞸-𢣨-𢨗-𢭇-𢱶-𢶦-𢻕-𣀅-𣄴-𣉤-𣎔-𣓃-𣗳-𣜢-𣡒-𣦁-𣪱-𣯠-𣴐-𣸿-𣽯-𤂞-𤇎-𤋽-𤐭-𤕝-𤚌-𤞼-𤣫-𤨛-𤭊-𤱺-𤶩-𤻙-𥀈-𥄸-𥉧-𥎗-𥓆-𥗶-𥜦-𥡕-𥦅-𥪴-𥯤-𥴓-𥹃-𥽲-𦂢-𦇑-𦌁-𦐰-𦕠-𦚐-𦞿-𦣯-𦨞-𦭎-𦱽-𦶭-𦻜-𧀌-𧄻-𧉫-𧎚-𧓊-𧗹-𧜩-𧡙-𧦈-𧪸-𧯧-𧴗-𧹆-𧽶-𨂥-𨇕-𨌄-𨐴-𨕣-𨚓-𨟂-𨣲-𨨢-𨭑-𨲁-𨶰-𨻠-𩀏-𩄿-𩉮-𩎞-𩓍-𩗽-𩜬-𩡜-𩦌-𩪻-𩯫-𩴚-𩹊-𩽹-𪂩-𪇘-𪌈-𪐷-𪕧-𪚖-𪟯-𪤞-𪩎-𪭾-𪲭-𪷝-𪼌-𫀼-𫅫-𫊛-𫏊-𫓺-𫘩-𫝤-𫢕-𫧅-𫫴-𫰤-𫵔-𫺃-𫾳-𬃢-𬈒-𬍁-𬑱-𬖠-𬛐-𬟿-𬤯-𬩞-𬮎-𬲾-𬷭-𬼫-𭁚-𭆊-𭊹-𭏩-𭔘-𭙈-𭝷-𭢧-𭧖-𭬆-𭰵-𭵥-𭺕-𭿄-𮃴-𮈣-𮍓-𮒂-𮖲-𮛡-𮠑-𮥀-𮩰-𮮟-㰘-𪘀b" + }, + { + "name": "Other_Number", + "input": "a² ¹ ½ ৴ ৶ ৹ ୳ ୵ ୷ ௱ ౹ ౻ ౽ ൘ ൛ ൝ ൰ ൲ ൴ ൷ ༪ ༬ ༮ ༱ ༳ ፪ ፬ ፮ ፱ ፳ ፵ ፷ ፺ ፼ ៱ ៳ ៵ ៸ ᧚ ⁴ ⁶ ⁹ ₁ ₃ ₅ ₇ ⅐ ⅒ ⅔ ⅖ ⅙ ⅛ ⅝ ⅟ ① ④ ⑥ ⑧ ⑩ ⑫ ⑮ ⑰ ⑲ ⑴ ⑷ ⑹ ⑻ ⑽ ⑿ ⒂ ⒄ ⒆ ⒈ ⒋ ⒍ ⒏ ⒑ ⒓ ⒖ ⒘ ⒚ ⓪ ⓭ ⓯ ⓱ ⓳ ⓵ ⓸ ⓺ ⓼ ⓾ ❷ ❹ ❻ ❽ ❿ ➂ ➄ ➆ ➈ ➋ ➍ ➏ ➑ ➓ ㆓ ㆕ ㈡ ㈣ ㈥ ㈨ ㉈ ㉊ ㉌ ㉏ ㉒ ㉔ ㉖ ㉘ ㉛ ㉝ ㉟ ㊁ ㊄ ㊆ ㊈ ㊱ ㊳ ㊶ ㊸ ㊺ ㊼ ㊿ ꠱ ꠳ ꠵ 𐄈 𐄋 𐄍 𐄏 𐄑 𐄔 𐄖 𐄘 𐄚 𐄜 𐄟 𐄡 𐄣 𐄥 𐄨 𐄪 𐄬 𐄮 𐄰 𐄳 𐅶 𐅸 𐆋 𐋢 𐋥 𐋧 𐋩 𐋫 𐋮 𐋰 𐋲 𐋴 𐋶 𐋹 𐋻 𐌡 𐌣 𐡚 𐡜 𐡞 𐡹 𐡻 𐡾 𐢧 𐢩 𐢫 𐢮 𐣻 𐣽 𐣿 𐤗 𐤚 𐦼 𐧀 𐧂 𐧅 𐧇 𐧉 𐧋 𐧍 𐧒 𐧔 𐧖 𐧘 𐧛 𐧝 𐧟 𐧡 𐧣 𐧦 𐧨 𐧪 𐧬 𐧮 𐧱 𐧳 𐧵 𐧷 𐧺 𐧼 𐧾 𐩀 𐩂 𐩅 𐩇 𐩽 𐪝 𐫫 𐫭 𐫯 𐭙 𐭛 𐭞 𐭸 𐭺 𐭼 𐭿 𐮪 𐮬 𐮮 𐳺 𐳽 𐳿 𐹡 𐹣 𐹦 𐹨 𐹪 𐹬 𐹮 𐹱 𐹳 𐹵 𐹷 𐹺 𐹼 𐹾 𐼞 𐼠 𐼣 𐼥 𐽑 𐽓 𑁒 𑁕 𑁗 𑁙 𑁛 𑁞 𑁠 𑁢 𑁤 𑇡 𑇤 𑇦 𑇨 𑇪 𑇭 𑇯 𑇱 𑇳 𑜺 𑣫 𑣭 𑣯 𑣱 𑱛 𑱝 𑱟 𑱡 𑱣 𑱦 𑱨 𑱪 𑱬 𑿂 𑿄 𑿆 𑿈 𑿊 𑿍 𑿏 𑿑 𑿓 𖭜 𖭞 𖭠 𖺀 𖺂 𖺅 𖺇 𖺉 𖺋 𖺍 𖺐 𖺒 𖺔 𖺖 𝋢 𝋤 𝋦 𝋨 𝋪 𝋭 𝋯 𝋱 𝋳 𝍢 𝍤 𝍦 𝍨 𝍪 𝍭 𝍯 𝍱 𝍳 𝍶 𝍸 𞣈 𞣊 𞣌 𞣏 𞱲 𞱴 𞱶 𞱹 𞱻 𞱽 𞱿 𞲁 𞲄 𞲆 𞲈 𞲊 𞲍 𞲏 𞲑 𞲓 𞲕 𞲘 𞲚 𞲜 𞲞 𞲠 𞲣 𞲥 𞲧 𞲩 𞲭 𞲯 𞲲 𞲴 𞴂 𞴅 𞴇 𞴉 𞴋 𞴎 𞴐 𞴒 𞴔 𞴖 𞴙 𞴛 𞴝 𞴟 𞴢 𞴤 𞴦 𞴨 𞴪 𞴭 𞴰 𞴲 𞴴 𞴷 𞴹 𞴻 𞴽 🄁 🄄 🄆 🄈 🄊 🄌b", + "expected": "a----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------b-1" + }, + { + "name": "Other_Punctuation", + "input": "a! \" # & ' , . : ; @ \\ § ¶ ¿ ; ՚ ՛ ՜ ՞ ՟ ׀ ׃ ׳ ״ ؊ ، ؛ ؞ ٪ ٫ ٭ ۔ ܁ ܂ ܃ ܅ ܆ ܈ ܉ ܋ ܌ ߷ ߸ ࠰ ࠱ ࠳ ࠴ ࠶ ࠷ ࠹ ࠺ ࠻ ࠽ ࠾ । ॥ ৽ ੶ ౷ ಄ ๏ ๚ ༄ ༅ ༇ ༈ ༊ ་ ༌ ༎ ༏ ༑ ༒ ྅ ࿐ ࿒ ࿓ ࿙ ࿚ ။ ၌ ၎ ၏ ፠ ፡ ። ፤ ፥ ፧ ፨ ᛫ ᛬ ᜵ ᜶ ៕ ៖ ៙ ៚ ᠁ ᠂ ᠄ ᠅ ᠇ ᠉ ᠊ ᥅ ᨞ ᪠ ᪡ ᪣ ᪤ ᪦ ᪨ ᪪ ᪫ ᪭ ᭚ ᭛ ᭝ ᭞ ᭠ ᯼ ᯾ ᯿ ᰼ ᰽ ᰿ ᱾ ᳀ ᳁ ᳃ ᳄ ᳆ ᳇ ᳓ ‗ † • ‣ ‥ … ‰ ‱ ″ ‴ ‶ ‷ ※ ‼ ‾ ⁁ ⁂ ⁇ ⁈ ⁊ ⁋ ⁍ ⁎ ⁐ ⁑ ⁕ ⁖ ⁘ ⁙ ⁛ ⁜ ⁞ ⳹ ⳺ ⳼ ⳾ ⵰ ⸀ ⸆ ⸇ ⸋ ⸎ ⸐ ⸑ ⸓ ⸔ ⸖ ⸘ ⸛ ⸞ ⸟ ⸫ ⸬ ⸮ ⸰ ⸲ ⸳ ⸵ ⸶ ⸸ ⸹ ⸽ ⸾ ⹁ ⹃ ⹅ ⹆ ⹇ ⹉ ⹊ ⹌ ⹍ ⹏ 、 〃 〽 ꓾ ꓿ ꘎ ꘏ ꙾ ꛲ ꛳ ꛵ ꛶ ꡴ ꡵ ꡷ ꣎ ꣸ ꣹ ꣼ ꤮ ꥟ ꧁ ꧃ ꧄ ꧆ ꧇ ꧈ ꧊ ꧋ ꧍ ꧞ ꩜ ꩝ ꩟ ꫞ ꫰ ꫱ ︐ ︑ ︓ ︔ ︖ ︙ ︰ ﹆ ﹉ ﹋ ﹌ ﹑ ﹒ ﹕ ﹖ ﹟ ﹠ ﹨ ﹪ ! " % & ' , . : ; @ \ 、 ・ 𐄁 𐄂 𐏐 𐕯 𐤟 𐤿 𐩑 𐩒 𐩓 𐩕 𐩖 𐩘 𐩿 𐫱 𐫲 𐫴 𐫵 𐬹 𐬺 𐬼 𐬽 𐬿 𐮙 𐮛 𐮜 𐽕 𐽗 𐽘 𑁇 𑁈 𑁊 𑁋 𑁍 𑂻 𑂾 𑂿 𑃁 𑅀 𑅂 𑅃 𑅴 𑇅 𑇆 𑇈 𑇍 𑇝 𑇞 𑈸 𑈹 𑈻 𑈼 𑊩 𑑋 𑑍 𑑎 𑑛 𑑝 𑓆 𑗂 𑗃 𑗅 𑗆 𑗈 𑗉 𑗋 𑗌 𑗎 𑗏 𑗑 𑗒 𑗔 𑗕 𑗗 𑙁 𑙂 𑙠 𑙡 𑙣 𑙤 𑙦 𑙧 𑙩 𑙪 𑙬 𑜼 𑜾 𑠻 𑨿 𑩀 𑩂 𑩃 𑩄 𑩆 𑪚 𑪜 𑪞 𑪠 𑪡 𑱁 𑱂 𑱄 𑱅 𑱱 𑻷 𑿿 𒑰 𒑲 𒑳 𒑴 𖩯 𖫵 𖬸 𖬹 𖬻 𖭄 𖺘 𖺙 𖿢 𛲟 𝪈 𝪉 𝪋 𞥞 𞥟b", + "expected": "a----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------b-2" + }, + { + "name": "Other_Symbol", + "input": "a¦ ୰ ༕ ࿂ ࿘ ᧟ ᧮ ᧽ ᭷ № ⅍ ↤ ↵ ⇅ ⇘ ⇨ ⌃ ⌖ ⌨ ⌹ ⍉ ⍘ ⍧ ⍷ ⎇ ⎗ ⎿ ⏎ ⏤ ⏳ ␃ ␒ ␡ ⑊ ⒪ Ⓔ Ⓣ ⓘ ⓨ ┍ ┝ ┬ ┻ ╋ ╚ ╪ ╹ █ ▘ ▧ ▸ ◈ ◗ ◧ ◶ ☎ ☝ ☬ ☼ ♋ ♛ ♪ ♺ ⚊ ⚙ ⚩ ⚸ ⛇ ⛗ ⛦ ⛶ ✅ ✔ ✤ ✳ ❃ ❒ ❡ ➝ ➬ ➼ ⠋ ⠛ ⠪ ⠹ ⡉ ⡘ ⡨ ⡷ ⢆ ⢖ ⢥ ⢵ ⣄ ⣓ ⣣ ⣲ ⬂ ⬑ ⬠ ⭅ ⭚ ⭪ ⭻ ⮊ ⮜ ⮫ ⮻ ⯊ ⯙ ⯩ ⯸ ⺂ ⺑ ⺡ ⺱ ⻀ ⻐ ⻟ ⻮ ⼊ ⼙ ⼩ ⼸ ⽇ ⽗ ⽦ ⽶ ⾅ ⾔ ⾤ ⾳ ⿃ ⿒ ⿻ ㆛ ㇊ ㇚ ㈅ ㈔ ㈯ ㈾ ㉥ ㉴ ㊍ ㊝ ㊬ ㋋ ㋚ ㋩ ㋹ ㌈ ㌘ ㌧ ㌶ ㍆ ㍕ ㍥ ㍴ ㎃ ㎓ ㎢ ㎲ ㏁ ㏑ ㏠ ㏯ ㏿ ䷎ ䷞ ䷭ ䷼ ꒜ ꒫ ꒻ ꠫ 𐄸 𐆁 𐆓 𐇖 𐇥 𐇴 𑿘 𑿫 𝀃 𝀒 𝀡 𝀱 𝁀 𝁐 𝁟 𝁮 𝁾 𝂍 𝂝 𝂬 𝂻 𝃋 𝃚 𝃪 𝄃 𝄒 𝄢 𝄳 𝅃 𝅒 𝅘𝅥𝅯 𝆓 𝆢 𝆶 𝇅 𝇔 𝇤 𝈊 𝈚 𝈩 𝈸 𝌅 𝌔 𝌤 𝌳 𝍂 𝍒 𝠊 𝠚 𝠩 𝠸 𝡈 𝡗 𝡧 𝡶 𝢅 𝢕 𝢤 𝢴 𝣃 𝣒 𝣢 𝣱 𝤁 𝤐 𝤠 𝤯 𝤾 𝥎 𝥝 𝥭 𝥼 𝦋 𝦛 𝦪 𝦺 𝧉 𝧘 𝧨 𝧷 𝩰 𝪀 🀆 🀖 🀥 🀹 🁈 🁗 🁧 🁶 🂆 🂡 🂲 🃃 🃓 🃣 🃲 🄛 🄫 🄺 🅊 🅙 🅨 🅻 🆊 🆚 🆩 🇱 🈁 🈝 🈭 🉀 🉤 🌎 🌝 🌭 🌼 🍋 🍛 🍪 🍺 🎉 🎘 🎨 🎷 🏇 🏖 🏥 🏵 🐉 🐙 🐨 🐷 👇 👖 👦 👵 💄 💔 💣 💳 📂 📑 📡 📰 🔀 🔏 🔟 🔮 🔽 🕍 🕜 🕬 🕻 🖊 🖚 🖩 🖹 🗈 🗗 🗧 🗶 😆 😕 😤 😴 🙃 🙓 🙢 🙱 🚁 🚐 🚠 🚯 🚾 🛎 🛧 🛺 🜎 🜝 🜭 🜼 🝌 🝛 🝪 🞆 🞕 🞥 🞴 🟃 🟓 🟩 🠑 🠠 🠯 🠿 🡖 🡬 🡻 🢒 🢢 🤃 🤔 🤣 🤲 🥂 🥑 🥡 🥰 🦃 🦓 🦢 🦷 🧆 🧗 🧧 🧶 🨆 🨕 🨤 🨴 🩃 🩓 🩰 🪕b", + "expected": "a-----------------------------------ⓔ-ⓣ-ⓘ-ⓨ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------🄺--🅙-🅨-🅻-------------------------------------------------------------------------------------------------------------------------b" + }, + { + "name": "Paragraph_Separator", + "input": "a
b", + "expected": "ab-1" + }, + { + "name": "Private_Use", + "input": "a                   󰂁 󰇙 󰌱 󰒈 󰗠 󰜸 󰢏 󰧧 󰬿 󰲖 󰷮 󰽆 󱂝 󱇵 󱍍 󱒤 󱗼 󱝔 󱢫 󱨃 󱭛 󱲲 󱸊 󱽢 󲂹 󲈑 󲍩 󲓀 󲘘 󲝰 󲣇 󲨟 󲭷 󲳎 󲸦 󲽾 󳃕 󳈭 󳎅 󳓜 󳘴 󳞌 󳣣 󳨻 󳮓 󳳪 󳹂 󳾚 󴃱 󴉉 󴎡 󴓸 󴙐 󴞨 󴣿 󴩗 󴮯 󴴆 󴹞 󴾶 󵄍 󵉥 󵎽 󵔔 󵙬 󵟄 󵤛 󵩳 󵯋 󵴢 󵹺 󵿒 󶄩 󶊁 󶏙 󶔰 󶚈 󶟠 󶤷 󶪏 󶯧 󶴿 󶺖 󶿮 󷅆 󷊝 󷏵 󷕍 󷚤 󷟼 󷥔 󷪫 󷰃 󷵛 󷺲 󸀊 󸅢 󸊹 󸐑 󸕩 󸛀 󸠘 󸥰 󸫇 󸰟 󸵷 󸻎 󹀦 󹅾 󹋕 󹐭 󹖅 󹛜 󹠴 󹦌 󹫣 󹰻 󹶓 󹻪 󺁂 󺆚 󺋱 󺑉 󺖡 󺛸 󺡐 󺦨 󺫿 󺱗 󺶯 󺼆 󻁞 󻆶 󻌍 󻑥 󻖽 󻜔 󻡬 󻧄 󻬛 󻱳 󻷋 󻼢 󼁺 󼇒 󼌩 󼒁 󼗙 󼜰 󼢈 󼧠 󼬷 󼲏 󼷧 󼼾 󽂖 󽇮 󽍅 󽒝 󽗵 󽝌 󽢤 󽧼 󽭓 󽲫 󽸃 󽽚 󾂲 󾈊 󾍡 󾒹 󾘑 󾝨 󾣀 󾨘 󾭯 󾳇 󾸟 󾽶 󿃎 󿈦 󿍾 󿓕 󿘭 󿞅 󿣜 󿨴 󿮌 󿳣 󿸻 󿾓 􀃬 􀉄 􀎜 􀓳 􀙋 􀞣 􀣺 􀩒 􀮪 􀴁 􀹙 􀾱 􁄈 􁉠 􁎸 􁔏 􁙧 􁞿 􁤖 􁩮 􁯆 􁴝 􁹵 􁿍 􂄤 􂉼 􂏔 􂔫 􂚃 􂟛 􂤲 􂪊 􂯢 􂴹 􂺑 􂿩 􃅀 􃊘 􃏰 􃕇 􃚟 􃟷 􃥎 􃪦 􃯾 􃵕 􃺭 􄀅 􄅜 􄊴 􄐌 􄕣 􄚻 􄠓 􄥪 􄫂 􄰚 􄵱 􄻉 􅀡 􅅸 􅋐 􅐨 􅕿 􅛗 􅠯 􅦆 􅫞 􅰶 􅶍 􅻥 􆀽 􆆔 􆋬 􆑄 􆖛 􆛳 􆡋 􆦢 􆫺 􆱒 􆶩 􆼁 􇁙 􇆰 􇌈 􇑠 􇖷 􇜏 􇡧 􇦿 􇬖 􇱮 􇷆 􇼝 􈁵 􈇍 􈌤 􈑼 􈗔 􈜫 􈢃 􈧛 􈬲 􈲊 􈷢 􈼹 􉂑 􉇩 􉍀 􉒘 􉗰 􉝇 􉢟 􉧷 􉭎 􉲦 􉷾 􉽕 􊂭 􊈅 􊍜 􊒴 􊘌 􊝣 􊢻 􊨓 􊭪 􊳂 􊸚 􊽱 􋃉 􋈡 􋍸 􋓐 􋘨 􋝿 􋣗 􋨯 􋮆 􋳞 􋸶 􋾍 􌃥 􌈽 􌎔 􌓬 􌙄 􌞛 􌣳 􌩋 􌮢 􌳺 􌹒 􌾩 􍄁 􍉙 􍎰 􍔈 􍙠 􍞷 􍤏 􍩧 􍮾 􍴖 􍹮 􍿅 􎄝 􎉵 􎏌 􎔤 􎙼 􎟓 􎤫 􎪃 􎯚 􎴲 􎺊 􎿡 􏄹 􏊑 􏏨 􏕀 􏚘 􏟯 􏥇 􏪟 􏯶 􏵎 􏺦 􏿽b", + "expected": "a----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------b-3" + }, + { + "name": "Punctuation", + "input": "a! \" % ' ) , . : ? [ ] { ¡ « · ¿ · ՛ ՝ ՟ ֊ ׀ ׆ ״ ؊ ؍ ؞ ٪ ٬ ۔ ܁ ܃ ܅ ܇ ܉ ܋ ܍ ߸ ࠰ ࠲ ࠴ ࠶ ࠸ ࠺ ࠼ ࠾ । ॰ ੶ ౷ ෴ ๏ ๛ ༅ ༇ ༉ ་ ། ༏ ༑ ༔ ༻ ༽ ࿐ ࿒ ࿔ ࿚ ။ ၍ ၏ ፠ ። ፤ ፦ ፨ ᙮ ᚜ ᛬ ᜵ ។ ៖ ៙ ᠀ ᠂ ᠄ ᠆ ᠈ ᠊ ᥅ ᨟ ᪡ ᪣ ᪥ ᪨ ᪪ ᪬ ᭚ ᭜ ᭞ ᭠ ᯽ ᯾ ᰻ ᰽ ᰿ ᱿ ᳁ ᳃ ᳅ ᳇ ‐ ‒ — ‖ ‘ ‚ “ „ † • ․ … ‰ ′ ‴ ‶ ‸ › ‼ ‾ ⁀ ⁂ ⁅ ⁇ ⁉ ⁋ ⁍ ⁏ ⁑ ⁔ ⁖ ⁘ ⁚ ⁜ ⁞ ⁾ ₎ ⌉ ⌋ 〉 ❩ ❪ ❬ ❮ ❰ ❲ ❴ ⟅ ⟦ ⟨ ⟪ ⟬ ⟮ ⦃ ⦅ ⦇ ⦉ ⦋ ⦍ ⦏ ⦑ ⦓ ⦕ ⦗ ⧘ ⧚ ⧼ ⳹ ⳻ ⳾ ⵰ ⸁ ⸃ ⸅ ⸇ ⸉ ⸋ ⸍ ⸏ ⸑ ⸓ ⸕ ⸗ ⸙ ⸛ ⸝ ⸟ ⸡ ⸣ ⸥ ⸧ ⸨ ⸪ ⸬ ⸮ ⸱ ⸳ ⸵ ⸷ ⸹ ⸻ ⸽ ⸿ ⹁ ⹃ ⹅ ⹇ ⹉ ⹋ ⹍ ⹏ 。 〈 《 「 『 【 〔 〖 〘 〚 〜 〞 〰 ゠ ꓾ ꘍ ꘏ ꙾ ꛳ ꛵ ꛷ ꡵ ꡷ ꣏ ꣹ ꣼ ꤯ ꧁ ꧃ ꧅ ꧆ ꧈ ꧊ ꧌ ꧞ ꩜ ꩞ ꫞ ꫰ ꯫ ﴿ ︑ ︓ ︕ ︗ ︙ ︱ ︳ ︵ ︷ ︹ ︻ ︽ ︿ ﹁ ﹃ ﹅ ﹇ ﹉ ﹋ ﹍ ﹏ ﹑ ﹔ ﹖ ﹘ ﹚ ﹜ ﹞ ﹠ ﹣ ﹪ ! # & ( * - / ; ? [ ] { ⦅ 。 」 ・ 𐄁 𐎟 𐕯 𐤟 𐩐 𐩒 𐩔 𐩖 𐩘 𐫰 𐫲 𐫴 𐫶 𐬺 𐬼 𐬾 𐮙 𐮛 𐽕 𐽗 𐽙 𑁈 𑁊 𑁌 𑂻 𑂾 𑃀 𑅀 𑅂 𑅴 𑇅 𑇇 𑇍 𑇝 𑇟 𑈹 𑈻 𑈽 𑑋 𑑍 𑑏 𑑝 𑓆 𑗂 𑗄 𑗆 𑗈 𑗊 𑗌 𑗎 𑗐 𑗒 𑗔 𑗖 𑙁 𑙃 𑙡 𑙣 𑙥 𑙧 𑙩 𑙫 𑜼 𑜾 𑧢 𑩀 𑩂 𑩄 𑩆 𑪛 𑪞 𑪠 𑪢 𑱂 𑱄 𑱰 𑻷 𑿿 𒑱 𒑳 𖩮 𖫵 𖬸 𖬺 𖭄 𖺘 𖺚 𛲟 𝪈 𝪊 𞥞 𞥟b", + "expected": "a----------------------------------------------------------------------------------------------------------------------------------⁀---------⁔---------------------------------------------------------------------------------------------------------------------------------︳-------------﹍-﹏----------------------------------------------------------------------------------------------------------------------b" + }, + { + "name": "Separator", + "input": "a                           
 
      b", + "expected": "a-------------------b" + }, + { + "name": "Space_Separator", + "input": "a                                b", + "expected": "a-----------------b" + }, + { + "name": "Spacing_Mark", + "input": "aः ऻ ा ि ी ॉ ॊ ो ौ ॎ ॏ ং ঃ া ী ে ৈ ো ৌ ৗ ਃ ਾ ਿ ੀ ઃ ા િ ી ો ૌ ଂ ଃ ା ୀ େ ୈ ୋ ୌ ୗ ா ி ு ெ ே ை ொ ோ ௌ ௗ ఁ ం ః ు ూ ృ ౄ ಃ ಾ ೀ ು ೂ ೃ ೄ ೇ ೈ ೊ ೋ ೕ ೖ ഃ ാ ി ീ െ േ ൈ ൊ ോ ൌ ൗ ං ඃ ා ෑ ෘ ෙ ේ ෛ ො ෝ ෞ ෟ ෲ ෳ ༾ ༿ ཿ ာ ေ း ျ ြ ၖ ၗ ၢ ၣ ၤ ၧ ၨ ၩ ၪ ၬ ၭ ႃ ႄ ႇ ႈ ႉ ႊ ႋ ႌ ႏ ႚ ႛ ႜ ើ ឿ ៀ េ ែ ៃ ោ ៅ ះ ៈ ᤣ ᤤ ᤥ ᤩ ᤪ ᤫ ᤰ ᤱ ᤳ ᤴ ᤵ ᤶ ᤷ ᤸ ᨙ ᨚ ᩕ ᩡ ᩣ ᩤ ᩭ ᩮ ᩯ ᩰ ᩱ ᩲ ᬄ ᬵ ᬻ ᬽ ᬾ ᭀ ᭁ ᭃ ᭄ ᮂ ᮡ ᮦ ᮧ ᮪ ᯧ ᯪ ᯫ ᯬ ᯮ ᯳ ᰤ ᰥ ᰦ ᰧ ᰨ ᰩ ᰪ ᰫ ᰴ ᰵ ᳡ ᳷ 〮 ꠣ ꠤ ꠧ ꢀ ꢁ ꢴ ꢵ ꢶ ꢷ ꢸ ꢹ ꢺ ꢻ ꢽ ꢾ ꢿ ꣀ ꣁ ꣂ ꣃ ꥒ ꥓ ꦃ ꦴ ꦵ ꦺ ꦻ ꦿ ꧀ ꨯ ꨰ ꨳ ꨴ ꩍ ꩻ ꩽ ꫫ ꫮ ꫯ ꫵ ꯣ ꯦ ꯧ ꯩ ꯪ ꯬ 𑀀 𑀂 𑂂 𑂰 𑂱 𑂲 𑂷 𑂸 𑄬 𑅆 𑆂 𑆳 𑆴 𑆵 𑆿 𑇀 𑈬 𑈭 𑈮 𑈲 𑈳 𑈵 𑋠 𑋢 𑌂 𑌃 𑌾 𑌿 𑍁 𑍂 𑍃 𑍄 𑍇 𑍈 𑍋 𑍌 𑍗 𑍢 𑍣 𑐵 𑐶 𑐷 𑑀 𑑁 𑑅 𑒰 𑒱 𑒲 𑒹 𑒻 𑒽 𑒾 𑓁 𑖯 𑖰 𑖱 𑖸 𑖹 𑖺 𑖻 𑖾 𑘰 𑘱 𑘲 𑘼 𑘾 𑚬 𑚮 𑚯 𑚶 𑜠 𑜡 𑜦 𑠬 𑠭 𑠮 𑠸 𑧑 𑧓 𑧜 𑧝 𑧞 𑧟 𑧤 𑨹 𑩗 𑩘 𑪗 𑰯 𑰾 𑲩 𑲱 𑶊 𑶋 𑶌 𑶍 𑶎 𑶓 𑶔 𑶖 𑻵 𑻶 𖽑 𖽒 𖽓 𖽕 𖽖 𖽗 𖽘 𖽙 𖽚 𖽛 𖽜 𖽝 𖽞 𖽟 𖽠 𖽡 𖽢 𖽤 𖽥 𖽦 𖽧 𖽨 𖽩 𖽪 𖽫 𖽬 𖽭 𖽮 𖽯 𖽰 𖽱 𖽳 𖽴 𖽵 𖽶 𖽷 𖽸 𖽹 𖽺 𖽻 𖽼 𖽽 𖽾 𖽿 𖾀 𖾂 𖾃 𖾄 𖾅 𖾆 𖾇 𝅥 𝅦 𝅭 𝅮 𝅯 𝅰 𝅱 𝅲b", + "expected": "aः-ऻ-ा-ि-ी-ॉ-ॊ-ो-ौ-ॎ-ॏ-ং-ঃ-া-ী-ে-ৈ-ো-ৌ-ৗ-ਃ-ਾ-ਿ-ੀ-ઃ-ા-િ-ી-ો-ૌ-ଂ-ଃ-ା-ୀ-େ-ୈ-ୋ-ୌ-ୗ-ா-ி-ு-ெ-ே-ை-ொ-ோ-ௌ-ௗ-ఁ-ం-ః-ు-ూ-ృ-ౄ-ಃ-ಾ-ೀ-ು-ೂ-ೃ-ೄ-ೇ-ೈ-ೊ-ೋ-ೕ-ೖ-ഃ-ാ-ി-ീ-െ-േ-ൈ-ൊ-ോ-ൌ-ൗ-ං-ඃ-ා-ෑ-ෘ-ෙ-ේ-ෛ-ො-ෝ-ෞ-ෟ-ෲ-ෳ-༾-༿-ཿ-ာ-ေ-း-ျ-ြ-ၖ-ၗ-ၢ-ၣ-ၤ-ၧ-ၨ-ၩ-ၪ-ၬ-ၭ-ႃ-ႄ-ႇ-ႈ-ႉ-ႊ-ႋ-ႌ-ႏ-ႚ-ႛ-ႜ-ើ-ឿ-ៀ-េ-ែ-ៃ-ោ-ៅ-ះ-ៈ-ᤣ-ᤤ-ᤥ-ᤩ-ᤪ-ᤫ-ᤰ-ᤱ-ᤳ-ᤴ-ᤵ-ᤶ-ᤷ-ᤸ-ᨙ-ᨚ-ᩕ-ᩡ-ᩣ-ᩤ-ᩭ-ᩮ-ᩯ-ᩰ-ᩱ-ᩲ-ᬄ-ᬵ-ᬻ-ᬽ-ᬾ-ᭀ-ᭁ-ᭃ-᭄-ᮂ-ᮡ-ᮦ-ᮧ-᮪-ᯧ-ᯪ-ᯫ-ᯬ-ᯮ-᯳-ᰤ-ᰥ-ᰦ-ᰧ-ᰨ-ᰩ-ᰪ-ᰫ-ᰴ-ᰵ-᳡-᳷-〮-ꠣ-ꠤ-ꠧ-ꢀ-ꢁ-ꢴ-ꢵ-ꢶ-ꢷ-ꢸ-ꢹ-ꢺ-ꢻ-ꢽ-ꢾ-ꢿ-ꣀ-ꣁ-ꣂ-ꣃ-ꥒ-꥓-ꦃ-ꦴ-ꦵ-ꦺ-ꦻ-ꦿ-꧀-ꨯ-ꨰ-ꨳ-ꨴ-ꩍ-ꩻ-ꩽ-ꫫ-ꫮ-ꫯ-ꫵ-ꯣ-ꯦ-ꯧ-ꯩ-ꯪ-꯬-𑀀-𑀂-𑂂-𑂰-𑂱-𑂲-𑂷-𑂸-𑄬-𑅆-𑆂-𑆳-𑆴-𑆵-𑆿-𑇀-𑈬-𑈭-𑈮-𑈲-𑈳-𑈵-𑋠-𑋢-𑌂-𑌃-𑌾-𑌿-𑍁-𑍂-𑍃-𑍄-𑍇-𑍈-𑍋-𑍌-𑍗-𑍢-𑍣-𑐵-𑐶-𑐷-𑑀-𑑁-𑑅-𑒰-𑒱-𑒲-𑒹-𑒻-𑒽-𑒾-𑓁-𑖯-𑖰-𑖱-𑖸-𑖹-𑖺-𑖻-𑖾-𑘰-𑘱-𑘲-𑘼-𑘾-𑚬-𑚮-𑚯-𑚶-𑜠-𑜡-𑜦-𑠬-𑠭-𑠮-𑠸-𑧑-𑧓-𑧜-𑧝-𑧞-𑧟-𑧤-𑨹-𑩗-𑩘-𑪗-𑰯-𑰾-𑲩-𑲱-𑶊-𑶋-𑶌-𑶍-𑶎-𑶓-𑶔-𑶖-𑻵-𑻶-𖽑-𖽒-𖽓-𖽕-𖽖-𖽗-𖽘-𖽙-𖽚-𖽛-𖽜-𖽝-𖽞-𖽟-𖽠-𖽡-𖽢-𖽤-𖽥-𖽦-𖽧-𖽨-𖽩-𖽪-𖽫-𖽬-𖽭-𖽮-𖽯-𖽰-𖽱-𖽳-𖽴-𖽵-𖽶-𖽷-𖽸-𖽹-𖽺-𖽻-𖽼-𖽽-𖽾-𖽿-𖾀-𖾂-𖾃-𖾄-𖾅-𖾆-𖾇-𝅥-𝅦-𝅭-𝅮-𝅯-𝅰-𝅱-𝅲b" + }, + { + "name": "Symbol", + "input": "a$ ¯ ˙ ˲ ҂ ৺ ༃ ࿂ ᎐ ᧤ ᧶ ᭩ ῝ ₢ ₵ ℉ ⅁ ↗ ↪ ↼ ⇎ ⇠ ⇳ ∅ ∗ ∩ ∻ ≎ ≠ ≲ ⊄ ⊗ ⊩ ⊻ ⋍ ⋠ ⋲ ⌄ ⌚ ⌮ ⍁ ⍓ ⍥ ⍷ ⎊ ⎜ ⎮ ⏀ ⏓ ⏥ ⏷ ␉ ␛ ⑇ ⒪ Ⓖ Ⓨ ⓡ ┉ ┛ ┭ ╀ ╒ ╤ ╶ █ ▛ ▭ ▿ ◑ ◤ ◶ ☈ ☚ ☭ ☿ ♑ ♣ ♵ ⚈ ⚚ ⚬ ⚾ ⛑ ⛣ ⛵ ✇ ✚ ✬ ✾ ❐ ❢ ➡ ➳ ⟇ ⟙ ⟶ ⠈ ⠚ ⠬ ⠿ ⡑ ⡣ ⡵ ⢇ ⢚ ⢬ ⢾ ⣐ ⣣ ⣵ ⤇ ⤙ ⤫ ⤾ ⥐ ⥢ ⥴ ⦝ ⦯ ⧁ ⧓ ⧪ ⧾ ⨐ ⨢ ⨴ ⩇ ⩙ ⩫ ⩽ ⪐ ⪢ ⪴ ⫆ ⫙ ⫫ ⫽ ⬏ ⬡ ⬴ ⭆ ⭘ ⭪ ⭿ ⮑ ⮥ ⮷ ⯊ ⯜ ⯮ ⳥ ⺌ ⺠ ⺲ ⻄ ⻖ ⻩ ⼇ ⼙ ⼫ ⼾ ⽐ ⽢ ⽴ ⾆ ⾙ ⾫ ⾽ ⿏ 〄 ㆜ ㇎ ㇠ ㈏ ㈬ ㈾ ㉧ ㉹ ㊖ ㊨ ㋉ ㋛ ㋮ ㌀ ㌒ ㌤ ㌷ ㍉ ㍛ ㍭ ㍿ ㎒ ㎤ ㎶ ㏈ ㏛ ㏭ ㏿ ䷑ ䷤ ䷶ ꒘ ꒪ ꒼ ꜈ ꞊ ﮶ ﹩ ↑ 𐅻 𐆐 𐇕 𐇧 𐇺 𑿠 𖬼 𝀌 𝀟 𝀱 𝁃 𝁕 𝁨 𝁺 𝂌 𝂞 𝂰 𝃃 𝃕 𝃧 𝄃 𝄖 𝄪 𝄼 𝅎 𝅘𝅥𝅯 𝆕 𝆧 𝆹𝅥𝅮 𝇏 𝇢 𝈋 𝈝 𝈯 𝉅 𝌑 𝌣 𝌵 𝍈 𝜕 𝠋 𝠝 𝠯 𝡂 𝡔 𝡦 𝡸 𝢋 𝢝 𝢯 𝣁 𝣔 𝣦 𝣸 𝤊 𝤜 𝤯 𝥁 𝥓 𝥥 𝥸 𝦊 𝦜 𝦮 𝧁 𝧓 𝧥 𝧷 𝩲 𞅏 🀋 🀝 🀳 🁆 🁘 🁪 🁼 🂏 🂭 🃂 🃕 🃧 🄔 🄦 🄸 🅊 🅝 🅲 🆄 🆖 🆩 🇴 🈓 🈥 🈷 🉣 🌏 🌡 🌳 🍆 🍘 🍪 🍼 🎎 🎡 🎳 🏅 🏗 🏪 🏼 🐎 🐠 🐳 👅 👗 👩 👻 💎 💠 💲 📄 📗 📩 📻 🔍 🔠 🔲 🕄 🕖 🕨 🕻 🖍 🖟 🖱 🗄 🗖 🗨 🗺 😍 😟 😱 🙃 🙕 🙨 🙺 🚌 🚞 🚱 🛃 🛕 🛴 🜌 🜞 🜰 🝂 🝔 🝧 🞅 🞗 🞩 🞼 🟎 🟧 🠑 🠤 🠶 🡐 🡨 🡺 🢕 🢧 🤋 🤞 🤱 🥃 🥕 🥧 🥾 🦐 🦢 🦹 🧍 🧠 🧲 🨄 🨖 🨩 🨻 🩍 🩫 🪕b", + "expected": "a-------------------------------------------------------ⓖ-ⓨ-ⓡ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------🄸--🅝-🅲-🆄------------------------------------------------------------------------------------------------------b" + }, + { + "name": "Titlecase_Letter", + "input": "aDž Lj Nj Dz ᾈ ᾉ ᾊ ᾋ ᾌ ᾍ ᾎ ᾏ ᾘ ᾙ ᾚ ᾛ ᾜ ᾝ ᾞ ᾟ ᾨ ᾩ ᾪ ᾫ ᾬ ᾭ ᾮ ᾯ ᾼ ῌ ῼb", + "expected": "adž-lj-nj-dz-ᾀ-ᾁ-ᾂ-ᾃ-ᾄ-ᾅ-ᾆ-ᾇ-ᾐ-ᾑ-ᾒ-ᾓ-ᾔ-ᾕ-ᾖ-ᾗ-ᾠ-ᾡ-ᾢ-ᾣ-ᾤ-ᾥ-ᾦ-ᾧ-ᾳ-ῃ-ῳb" + }, + { + "name": "Unassigned", + "input": "a͸ 𐗋 𑡁 𒠢 𓒆 𓲱 𔜤 𔽏 𕝻 𕾦 𖟒 𘻣 𙜏 𙼺 𚝦 𚾑 𛩯 𜌭 𜭙 𝲯 𞖃 𞿫 𮱘 𯒃 𯻍 𰛸 𰼤 𱝏 𱽻 𲞦 𲿒 𳟽 𴀩 𴡔 𵂀 𵢫 𶃗 𶤂 𷄮 𷥙 𸆅 𸦰 𹇜 𹨇 𺈳 𺩞 𻊊 𻪵 𼋡 𼬌 𽌸 𽭣 𾎏 𾮺 𿏦 𿰑 񀐽 񀱨 񁒔 񁲿 񂓫 񂴖 񃕂 񃵭 񄖙 񄷄 񅗰 񅸛 񆙇 񆹲 񇚞 񇻉 񈛵 񈼠 񉝌 񉽷 񊞣 񊿎 񋟺 񌀥 񌡑 񍁼 񍢨 񎃓 񎣿 񏄪 񏥖 񐆁 񐦭 񑇘 񑨄 񒈯 񒩛 񓊆 񓪲 񔋝 񔬉 񕌴 񕭠 񖎋 񖮷 񗏣 񗰎 񘐺 񘱥 񙒑 񙲼 񚓨 񚴓 񛔿 񛵪 񜖖 񜷁 񝗭 񝸘 񞙄 񞹯 񟚛 񟻆 񠛲 񠼝 񡝉 񡽴 񢞠 񢿋 񣟷 񤀢 񤡎 񥁹 񥢥 񦃐 񦣼 񧄧 񧥓 񨅾 񨦪 񩇕 񩨁 񪈬 񪩘 񫊃 񫪯 񬋚 񬬆 񭌱 񭭝 񮎈 񮮴 񯏟 񯰋 񰐶 񰱢 񱒍 񱲹 񲓤 񲴐 񳔻 񳵧 񴖒 񴶾 񵗩 񵸕 񶙀 񶹬 񷚗 񷻃 񸛮 񸼚 񹝅 񹽱 񺞜 񺿈 񻟳 񼀟 񼡊 񽁶 񽢡 񾃍 񾣸 񿄤 񿥏 򀅻 򀦦 򁇒 򁧽 򂈩 򂩔 򃊀 򃪫 򄋗 򄬂 򅌮 򅭙 򆎅 򆮰 򇏜 򇰇 򈐳 򈱞 򉒊 򉲶 򊓡 򊴍 򋔸 򋵤 򌖏 򌶻 򍗦 򍸒 򎘽 򎹩 򏚔 򏻀 򐛫 򐼗 򑝂 򑽮 򒞙 򒿅 򓟰 򔀜 򔡇 򕁳 򕢞 򖃊 򖣵 򗄡 򗥌 򘅸 򘦣 򙇏 򙧺 򚈦 򚩑 򛉽 򛪨 򜋔 򜫿 򝌫 򝭖 򞎂 򞮭 򟏙 򟰄 򠐰 򠱛 򡒇 򡲲 򢓞 򢴉 򣔵 򣵠 򤖌 򤶷 򥗣 򥸎 򦘺 򦹥 򧚑 򧺼 򨛨 򨼓 򩜿 򩽪 򪞖 򪿁 򫟭 򬀘 򬡄 򭁯 򭢛 򮃆 򮣲 򯄝 򯥉 򰅴 򰦠 򱇋 򱧷 򲈢 򲩎 򳉹 򳪥 򴋐 򴫼 򵌧 򵭓 򶍾 򶮪 򷏕 򷰁 򸐬 򸱘 򹒃 򹲯 򺓚 򺴆 򻔱 򻵝 򼖈 򼶴 򽗠 򽸋 򾘷 򾹢 򿚎 򿺹 󀛥 󀼐 󁜼 󁽧 󂞓 󂾾 󃟪 󄀕 󄡁 󅁬 󅢘 󆃃 󆣯 󇄚 󇥆 󈅱 󈦝 󉇈 󉧴 󊈟 󊩋 󋉶 󋪢 󌋍 󌫹 󍌤 󍭐 󎍻 󎮧 󏏒 󏯾 󐐩 󐱕 󑒀 󑲬 󒓗 󒴃 󓔮 󓵚 󔖅 󔶱 󕗜 󕸈 󖘳 󖹟 󗚊 󗺶 󘛡 󘼍 󙜸 󙽤 󚞏 󚾻 󛟦 󜀒 󜠽 󝁩 󝢔 󞃀 󞣫 󟄗 󟥂 󠊿 󠫪 󡌖 󡭁 󢍭 󢮘 󣏄 󣯯 󤐛 󤱆 󥑲 󥲝 󦓉 󦳴 󧔠 󧵋 󨕷 󨶢 󩗎 󩷹 󪘥 󪹐 󫙼 󫺧 󬛓 󬻾 󭜪 󭽕 󮞁 󮾬 󯟘 􏿿b", + "expected": "a----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------b-4" + }, + { + "name": "Uppercase_Letter", + "input": "aA E I N R W À Å É Î Ò Ø Ü Ą Č Ė Ğ Ħ İ Ĺ Ń Ō Ŗ Ş Ũ Ű Ź Ƃ Ɗ Ɛ Ɨ Ɵ Ƨ Ư Ƶ LJ Ǒ Ǜ Ǥ Ǯ Ƿ Ȁ Ȉ Ȓ Ț Ȥ Ȭ Ȼ Ƀ Ɋ Ͳ Έ Ώ Δ Ι Ν Σ Χ Ϗ Ϙ Ϣ Ϫ Ϲ Ͽ Є Ј Ѝ Б Е К О У Ч Ь Ѡ Ѫ Ѳ Ѽ Ҍ Җ Ҟ Ҩ Ұ Һ Ӂ Ӊ Ӕ Ӝ Ӧ Ӯ Ӹ Ԁ Ԋ Ԓ Ԝ Ԥ Ԯ Դ Թ Խ Ղ Ն Պ Տ Փ Ⴁ Ⴅ Ⴊ Ⴎ Ⴓ Ⴗ Ⴜ Ⴠ Ⴥ Ꭱ Ꭶ Ꭺ Ꭾ Ꮃ Ꮇ Ꮌ Ꮐ Ꮕ Ꮙ Ꮞ Ꮢ Ꮧ Ꮫ Ꮰ Ꮴ Ꮹ Ꮽ Ᏺ Ა Ე Კ Ო Ტ Ღ Ძ Ჯ Ჴ Ჸ Ჿ Ḇ Ḑ Ḙ Ḣ Ḫ Ḵ Ḽ Ṅ Ṏ Ṗ Ṡ Ṩ Ṳ Ṻ Ẅ Ẍ ẞ Ầ Ằ Ẹ Ể Ị Ổ Ờ Ụ Ữ Ỷ Ἀ Ἄ Ἑ Ἕ Ἤ Ἰ Ἵ Ὁ Ὑ Ὠ Ὥ Ᾱ Ὴ Ὶ Ὺ Ὼ ℋ ℒ ℛ ℨ ℭ ℾ Ⰰ Ⰵ Ⰹ Ⰾ Ⱂ Ⱇ Ⱋ Ⱐ Ⱔ Ⱘ Ⱝ Ᵽ Ɑ Ⱳ Ⲃ Ⲋ Ⲕ Ⲝ Ⲧ Ⲯ Ⲹ Ⳁ Ⳋ Ⳓ Ⳛ Ⳬ Ꙃ Ꙍ Ꙕ Ꙟ Ꙧ Ꚃ Ꚋ Ꚕ Ꜣ Ꜭ Ꜷ Ꝁ Ꝉ Ꝓ Ꝛ Ꝣ Ꝭ Ᵹ Ꞇ Ꞓ Ꞟ Ꞧ Ɬ Ʝ Ꞻ Ꞔ C G L P U Y 𐐂 𐐇 𐐋 𐐐 𐐔 𐐙 𐐝 𐐢 𐐦 𐒳 𐒷 𐒼 𐓀 𐓅 𐓉 𐓎 𐓒 𐲂 𐲇 𐲋 𐲐 𐲔 𐲙 𐲝 𐲢 𐲦 𐲫 𐲯 𑢡 𑢥 𑢪 𑢮 𑢳 𑢷 𑢻 𖹀 𖹄 𖹉 𖹍 𖹒 𖹖 𖹛 𖹟 𝐄 𝐈 𝐍 𝐑 𝐖 𝐴 𝐹 𝐽 𝑁 𝑆 𝑊 𝑩 𝑭 𝑲 𝑶 𝑻 𝑿 𝒟 𝒩 𝒯 𝒳 𝓒 𝓖 𝓚 𝓟 𝓣 𝓨 𝔇 𝔎 𝔒 𝔘 𝔜 𝔽 𝕂 𝕋 𝕏 𝕯 𝕳 𝕸 𝕼 𝖀 𝖅 𝖣 𝖨 𝖬 𝖱 𝖵 𝗔 𝗘 𝗝 𝗡 𝗦 𝗪 𝘉 𝘍 𝘒 𝘖 𝘚 𝘟 𝘽 𝙂 𝙆 𝙋 𝙏 𝙔 𝙲 𝙷 𝙻 𝚀 𝚄 𝚉 𝚫 𝚰 𝚴 𝚸 𝚽 𝛢 𝛧 𝛫 𝛰 𝛴 𝛹 𝜞 𝜣 𝜧 𝜬 𝜰 𝝖 𝝚 𝝟 𝝣 𝝧 𝝬 𝞑 𝞖 𝞚 𝞟 𝞣 𝞨 𞤂 𞤇 𞤋 𞤐 𞤔 𞤙 𞤝 𞤡b", + "expected": "aa-e-i-n-r-w-à-å-é-î-ò-ø-ü-ą-č-ė-ğ-ħ-i̇-ĺ-ń-ō-ŗ-ş-ũ-ű-ź-ƃ-ɗ-ɛ-ɨ-ɵ-ƨ-ư-ƶ-lj-ǒ-ǜ-ǥ-ǯ-ƿ-ȁ-ȉ-ȓ-ț-ȥ-ȭ-ȼ-ƀ-ɋ-ͳ-έ-ώ-δ-ι-ν-σ-χ-ϗ-ϙ-ϣ-ϫ-ϲ-ͽ-є-ј-ѝ-б-е-к-о-у-ч-ь-ѡ-ѫ-ѳ-ѽ-ҍ-җ-ҟ-ҩ-ұ-һ-ӂ-ӊ-ӕ-ӝ-ӧ-ӯ-ӹ-ԁ-ԋ-ԓ-ԝ-ԥ-ԯ-դ-թ-խ-ղ-ն-պ-տ-փ-ⴁ-ⴅ-ⴊ-ⴎ-ⴓ-ⴗ-ⴜ-ⴠ-ⴥ-ꭱ-ꭶ-ꭺ-ꭾ-ꮃ-ꮇ-ꮌ-ꮐ-ꮕ-ꮙ-ꮞ-ꮢ-ꮧ-ꮫ-ꮰ-ꮴ-ꮹ-ꮽ-ᏺ-ა-ე-კ-ო-ტ-ღ-ძ-ჯ-ჴ-ჸ-ჿ-ḇ-ḑ-ḙ-ḣ-ḫ-ḵ-ḽ-ṅ-ṏ-ṗ-ṡ-ṩ-ṳ-ṻ-ẅ-ẍ-ß-ầ-ằ-ẹ-ể-ị-ổ-ờ-ụ-ữ-ỷ-ἀ-ἄ-ἑ-ἕ-ἤ-ἰ-ἵ-ὁ-ὑ-ὠ-ὥ-ᾱ-ὴ-ὶ-ὺ-ὼ-ℋ-ℒ-ℛ-ℨ-ℭ-ℾ-ⰰ-ⰵ-ⰹ-ⰾ-ⱂ-ⱇ-ⱋ-ⱐ-ⱔ-ⱘ-ⱝ-ᵽ-ɑ-ⱳ-ⲃ-ⲋ-ⲕ-ⲝ-ⲧ-ⲯ-ⲹ-ⳁ-ⳋ-ⳓ-ⳛ-ⳬ-ꙃ-ꙍ-ꙕ-ꙟ-ꙧ-ꚃ-ꚋ-ꚕ-ꜣ-ꜭ-ꜷ-ꝁ-ꝉ-ꝓ-ꝛ-ꝣ-ꝭ-ᵹ-ꞇ-ꞓ-ꞟ-ꞧ-ɬ-ʝ-ꞻ-ꞔ-c-g-l-p-u-y-𐐪-𐐯-𐐳-𐐸-𐐼-𐑁-𐑅-𐑊-𐑎-𐓛-𐓟-𐓤-𐓨-𐓭-𐓱-𐓶-𐓺-𐳂-𐳇-𐳋-𐳐-𐳔-𐳙-𐳝-𐳢-𐳦-𐳫-𐳯-𑣁-𑣅-𑣊-𑣎-𑣓-𑣗-𑣛-𖹠-𖹤-𖹩-𖹭-𖹲-𖹶-𖹻-𖹿-𝐄-𝐈-𝐍-𝐑-𝐖-𝐴-𝐹-𝐽-𝑁-𝑆-𝑊-𝑩-𝑭-𝑲-𝑶-𝑻-𝑿-𝒟-𝒩-𝒯-𝒳-𝓒-𝓖-𝓚-𝓟-𝓣-𝓨-𝔇-𝔎-𝔒-𝔘-𝔜-𝔽-𝕂-𝕋-𝕏-𝕯-𝕳-𝕸-𝕼-𝖀-𝖅-𝖣-𝖨-𝖬-𝖱-𝖵-𝗔-𝗘-𝗝-𝗡-𝗦-𝗪-𝘉-𝘍-𝘒-𝘖-𝘚-𝘟-𝘽-𝙂-𝙆-𝙋-𝙏-𝙔-𝙲-𝙷-𝙻-𝚀-𝚄-𝚉-𝚫-𝚰-𝚴-𝚸-𝚽-𝛢-𝛧-𝛫-𝛰-𝛴-𝛹-𝜞-𝜣-𝜧-𝜬-𝜰-𝝖-𝝚-𝝟-𝝣-𝝧-𝝬-𝞑-𝞖-𝞚-𝞟-𝞣-𝞨-𞤤-𞤩-𞤭-𞤲-𞤶-𞤻-𞤿-𞥃b" + } +] diff --git a/test/index.js b/test/index.js index 3ac8b97..2570258 100644 --- a/test/index.js +++ b/test/index.js @@ -1,210 +1,27 @@ const test = require('tape') const GithubSlugger = require('../') +const gist = require('./fixtures.json') require('./test-static') -const testCases = [ - // See `6-characters.md` - { - mesg: 'allows a dash', - text: 'heading with a - dash', - slug: 'heading-with-a---dash' - }, - { - mesg: 'allows underscores', - text: 'heading with an _ underscore', - slug: 'heading-with-an-_-underscore' - }, - { - mesg: 'filters periods', - text: 'heading with a period.txt', - slug: 'heading-with-a-periodtxt' - }, - { - mesg: 'allows two spaces even after filtering', - text: 'exchange.bind_headers(exchange, routing [, bindCallback])', - slug: 'exchangebind_headersexchange-routing--bindcallback' - }, - // Note: GH doesn’t create slugs for empty headings. - { - mesg: 'empty', - text: '', - slug: '' - }, - { - mesg: 'a space', - text: ' ', - slug: '-1' - }, - // Note: white-space in headings is trimmed off in markdown. - { - mesg: 'initial space', - text: ' initial space', - slug: 'initial-space' - }, - { - mesg: 'final space', - text: 'final space ', - slug: 'final-space' - }, - // Note: Apostrophe in heading is trimmed off in markdown - { - mesg: 'apostrophe’s should be trimmed', - text: 'apostrophe’s should be trimmed', - slug: 'apostrophes-should-be-trimmed' - }, - // See `7-duplicates.md` - { - mesg: 'deals with duplicates correctly', - text: 'duplicates', - slug: 'duplicates' - }, - { - mesg: 'deals with duplicates correctly-1', - text: 'duplicates', - slug: 'duplicates-1' - }, - { - mesg: 'deals with duplicates correctly-2', - text: 'duplicates', - slug: 'duplicates-2' - }, - // See `8-non-ascii.md` - { - mesg: 'gh-and-npm-slug-generation-1', - text: 'I ♥ unicode', - slug: 'i--unicode' - }, - { - mesg: 'gh-and-npm-slug-generation-2', - text: 'Dash-dash', - slug: 'dash-dash' - }, - { - mesg: 'gh-and-npm-slug-generation-3', - text: 'en–dash!', - slug: 'endash' - }, - { - mesg: 'gh-and-npm-slug-generation-4', - text: 'em–dash', - slug: 'emdash' - }, - { - mesg: 'gh-and-npm-slug-generation-5', - text: '😄 unicode emoji', - slug: '-unicode-emoji' - }, - { - mesg: 'gh-and-npm-slug-generation-6', - text: '😄-😄 unicode emoji', - slug: '--unicode-emoji' - }, - { - mesg: 'gh-and-npm-slug-generation-7', - text: '😄_😄 unicode emoji', - slug: '_-unicode-emoji' - }, - { - mesg: 'gh-and-npm-slug-generation-8', - text: '😄 - an emoji', - slug: '---an-emoji' - }, - { - mesg: 'gh-and-npm-slug-generation-9', - text: ':smile: - a gemoji', - slug: 'smile---a-gemoji' - }, - { - mesg: 'deals with non-latin chars', - text: 'Привет', - slug: 'привет' - }, - { - mesg: 'Cyrillic', - text: 'Профили пользователей', - slug: 'профили-пользователей' - }, - { - mesg: 'More non-latin', - text: 'Привет non-latin 你好', - slug: 'привет-non-latin-你好' - }, - // See `9-emoji.md` - { - mesg: 'emoji-slug-example-1', - text: ':ok: No underscore', - slug: 'ok-no-underscore' - }, - { - mesg: 'emoji-slug-example-2', - text: ':ok_hand: Single', - slug: 'ok_hand-single' - }, - { - mesg: 'emoji-slug-example-3', - text: ':ok_hand::hatched_chick: Two in a row with no spaces', - slug: 'ok_handhatched_chick-two-in-a-row-with-no-spaces' - }, - { - mesg: 'emoji-slug-example-4', - text: ':ok_hand: :hatched_chick: Two in a row', - slug: 'ok_hand-hatched_chick-two-in-a-row' - } -] - test('simple stuff', function (t) { const slugger = new GithubSlugger() t.equals(GithubSlugger().slug('foo'), 'foo', 'should work without new') t.equals(slugger.slug(1), '', 'should return empty string for non-strings') - // See `1-basic-usage.md` - t.equals(slugger.slug('foo'), 'foo') - t.equals(slugger.slug('foo bar'), 'foo-bar') - t.equals(slugger.slug('foo'), 'foo-1') - - // See `2-camel-case.md` - slugger.reset() - t.equals(slugger.slug('foo'), 'foo') // Note: GH doesn’t support `maintaincase`, so the actual values are commented below. - t.equals(slugger.slug('fooCamelCase', true), 'fooCamelCase') // foocamelcase - t.equals(slugger.slug('fooCamelCase'), 'foocamelcase') // foocamelcase-1 - - // See `3-prototype.md` - slugger.reset() - t.equals(slugger.slug('__proto__'), '__proto__') - t.equals(slugger.slug('__proto__'), '__proto__-1') - t.equals(slugger.slug('hasOwnProperty', true), 'hasOwnProperty') // hasownproperty - t.equals(slugger.slug('foo'), 'foo') - - t.end() -}) - -test('matching slugs', function (t) { - const slugger = new GithubSlugger() - - // See `4-matching-slugs-basic.md` - t.equals(slugger.slug('foo'), 'foo') - t.equals(slugger.slug('foo'), 'foo-1') - t.equals(slugger.slug('foo 1'), 'foo-1-1') - t.equals(slugger.slug('foo-1'), 'foo-1-2') - t.equals(slugger.slug('foo'), 'foo-2') - - // See `5-matching-slugs-again.md` - slugger.reset() - t.equals(slugger.slug('foo-1'), 'foo-1') - t.equals(slugger.slug('foo'), 'foo') - t.equals(slugger.slug('foo'), 'foo-2') + t.equals(slugger.slug('fooCamelCase', true), 'fooCamelCase', 'should support `maintainCase`') // foocamelcase + t.equals(slugger.slug('fooCamelCase'), 'foocamelcase', 'should support `maintainCase` (reference)') // foocamelcase-1 t.end() }) -test('github test cases', function (t) { +test('fixtures', function (t) { const slugger = new GithubSlugger() - testCases.forEach(function (test) { - t.equals(slugger.slug(test.text), test.slug, test.mesg) + gist.forEach((d) => { + t.equals(slugger.slug(d.input), d.expected, d.name) }) t.end() diff --git a/test/test-static.js b/test/test-static.js index 1eb8d25..54f40b3 100644 --- a/test/test-static.js +++ b/test/test-static.js @@ -4,14 +4,10 @@ const GithubSlugger = require('../') test('static method - simple stuff', function (t) { const slug = GithubSlugger.slug - // See `1-basic-usage.md` t.equals(slug('foo'), 'foo') t.equals(slug('foo bar'), 'foo-bar') t.equals(slug('foo'), 'foo') // idem potent - // See `2-camel-case.md` - t.equals(slug('foo'), 'foo') - // Note: GH doesn’t support `maintaincase`, so the actual values are commented below. t.equals(slug('fooCamelCase', true), 'fooCamelCase') // foocamelcase t.equals(slug('fooCamelCase'), 'foocamelcase') // foocamelcase @@ -23,7 +19,7 @@ test('static method - yielding empty strings', function (t) { const slug = GithubSlugger.slug t.equals(slug(1), '', 'should return empty string for non-strings') - t.equals(slug(' '), '') + t.equals(slug(' '), '-') t.end() })