From cfe0fcd1020ee28a5e186493e438ed9fea3e6064 Mon Sep 17 00:00:00 2001 From: Moritz Platt Date: Fri, 27 Feb 2015 23:06:09 +0100 Subject: [PATCH] changed default behaviour to ommit unmapped characters --- README.md | 250 ++------------------------------------------------- bower.json | 4 +- index.js | 2 +- package.json | 4 +- 4 files changed, 11 insertions(+), 249 deletions(-) diff --git a/README.md b/README.md index f650e99..91917a7 100644 --- a/README.md +++ b/README.md @@ -5,10 +5,6 @@ A JavaScript port of the Apache Lucene ASCII Folding Filter that converts alphab # Documentation -## Package Manager Availability - -This package is available in the npm registry (https://www.npmjs.com/package/fold-to-ascii) and in Bower. - ## Usage It is simple: @@ -17,16 +13,16 @@ It is simple: var foldToAscii = require("fold-to-ascii"); // Folding with replacement of unmapped characters with the "_" character: -console.log(foldToAscii.fold("★ Lorém ïpsum dölor.", "_")); -// Results in "_ Lorem ipsum dolor." +console.log(foldToAscii.fold("★Lorém ïpsum dölor.", "_")); +// Results in "_Lorem ipsum dolor." // Folding without replacement of unmapped characters: -console.log(foldToAscii.fold("★ Lorém ïpsum dölor.", null)); -console.log(foldToAscii.fold("★ Lorém ïpsum dölor.")); -// Results in "★ Lorem ipsum dolor." +console.log(foldToAscii.fold("★Lorém ïpsum dölor.", null)); +console.log(foldToAscii.fold("★Lorém ïpsum dölor.")); +// Both calls result in "Lorem ipsum dolor." ``` -If no replacement parameter is specified, unmapped characters will not be replaced. If you require an ASCII string, you thus must specify a replacement character within the range of ASCII characters. +If no replacement parameter is specified, unmapped characters will be replaced by the empty string. ## Tests @@ -44,238 +40,4 @@ The function to determine character codes is taken from a code example in the MD The unambiguous allocation of characters to replacements is not possible since it is language-dependent. For example a user from France might expect *ü* to be replaced with *u* while a user from Germany expects the replacement to be *ue*. -The replacements featured here are kept as general as possible. - -# Replacement Patterns - Character(s) | Replacement - --- | --- - À Á Â Ã Ä Å Ā Ă Ą Ə Ǎ Ǟ Ǡ Ǻ Ȁ Ȃ Ȧ Ⱥ ᴀ Ḁ Ạ Ả Ấ Ầ Ẩ Ẫ Ậ Ắ Ằ Ẳ Ẵ Ặ Ⓐ A | A - à á â ã ä å ā ă ą ǎ ǟ ǡ ǻ ȁ ȃ ȧ ɐ ə ɚ ᶏ ᶕ ḁ ẚ ạ ả ấ ầ ẩ ẫ ậ ắ ằ ẳ ẵ ặ ₐ ₔ ⓐ ⱥ Ɐ a | a - Ꜳ | AA - Æ Ǣ Ǽ ᴁ | AE - Ꜵ | AO - Ꜷ | AU - Ꜹ Ꜻ | AV - Ꜽ | AY - ⒜ | (a) - ꜳ | aa - æ ǣ ǽ ᴂ | ae - ꜵ | ao - ꜷ | au - ꜹ ꜻ | av - ꜽ | ay - Ɓ Ƃ Ƀ ʙ ᴃ Ḃ Ḅ Ḇ Ⓑ B | B - ƀ ƃ ɓ ᵬ ᶀ ḃ ḅ ḇ ⓑ b | b - ⒝ | (b) - Ç Ć Ĉ Ċ Č Ƈ Ȼ ʗ ᴄ Ḉ Ⓒ C | C - ç ć ĉ ċ č ƈ ȼ ɕ ḉ ↄ ⓒ Ꜿ ꜿ c | c - ⒞ | (c) - Ð Ď Đ Ɖ Ɗ Ƌ ᴅ ᴆ Ḋ Ḍ Ḏ Ḑ Ḓ Ⓓ Ꝺ D | D - ð ď đ ƌ ȡ ɖ ɗ ᵭ ᶁ ᶑ ḋ ḍ ḏ ḑ ḓ ⓓ ꝺ d | d - DŽ DZ | DZ - Dž Dz | Dz - ⒟ | (d) - ȸ | db - dž dz ʣ ʥ | dz - È É Ê Ë Ē Ĕ Ė Ę Ě Ǝ Ɛ Ȅ Ȇ Ȩ Ɇ ᴇ Ḕ Ḗ Ḙ Ḛ Ḝ Ẹ Ẻ Ẽ Ế Ề Ể Ễ Ệ Ⓔ ⱻ E | E - è é ê ë ē ĕ ė ę ě ǝ ȅ ȇ ȩ ɇ ɘ ɛ ɜ ɝ ɞ ʚ ᴈ ᶒ ᶓ ᶔ ḕ ḗ ḙ ḛ ḝ ẹ ẻ ẽ ế ề ể ễ ệ ₑ ⓔ ⱸ e | e - ⒠ | (e) - Ƒ Ḟ Ⓕ ꜰ Ꝼ ꟻ F | F - ƒ ᵮ ᶂ ḟ ẛ ⓕ ꝼ f | f - ⒡ | (f) - ff | ff - ffi | ffi - ffl | ffl - fi | fi - fl | fl - Ĝ Ğ Ġ Ģ Ɠ Ǥ ǥ Ǧ ǧ Ǵ ɢ ʛ Ḡ Ⓖ Ᵹ Ꝿ G | G - ĝ ğ ġ ģ ǵ ɠ ɡ ᵷ ᵹ ᶃ ḡ ⓖ ꝿ g | g - ⒢ | (g) - Ĥ Ħ Ȟ ʜ Ḣ Ḥ Ḧ Ḩ Ḫ Ⓗ Ⱨ Ⱶ H | H - ĥ ħ ȟ ɥ ɦ ʮ ʯ ḣ ḥ ḧ ḩ ḫ ẖ ⓗ ⱨ ⱶ h | h - Ƕ | HV - ⒣ | (h) - ƕ | hv - Ì Í Î Ï Ĩ Ī Ĭ Į İ Ɩ Ɨ Ǐ Ȉ Ȋ ɪ ᵻ Ḭ Ḯ Ỉ Ị Ⓘ ꟾ I | I - ì í î ï ĩ ī ĭ į ı ǐ ȉ ȋ ɨ ᴉ ᵢ ᵼ ᶖ ḭ ḯ ỉ ị ⁱ ⓘ i | i - IJ | IJ - ⒤ | (i) - ij | ij - Ĵ Ɉ ᴊ Ⓙ J | J - ĵ ǰ ȷ ɉ ɟ ʄ ʝ ⓙ ⱼ j | j - ⒥ | (j) - Ķ Ƙ Ǩ ᴋ Ḱ Ḳ Ḵ Ⓚ Ⱪ Ꝁ Ꝃ Ꝅ K | K - ķ ƙ ǩ ʞ ᶄ ḱ ḳ ḵ ⓚ ⱪ ꝁ ꝃ ꝅ k | k - ⒦ | (k) - Ĺ Ļ Ľ Ŀ Ł Ƚ ʟ ᴌ Ḷ Ḹ Ḻ Ḽ Ⓛ Ⱡ Ɫ Ꝇ Ꝉ Ꞁ L | L - ĺ ļ ľ ŀ ł ƚ ȴ ɫ ɬ ɭ ᶅ ḷ ḹ ḻ ḽ ⓛ ⱡ ꝇ ꝉ ꞁ l | l - LJ | LJ - Ỻ | LL - Lj | Lj - ⒧ | (l) - lj | lj - ỻ | ll - ʪ | ls - ʫ | lz - Ɯ ᴍ Ḿ Ṁ Ṃ Ⓜ Ɱ ꟽ ꟿ M | M - ɯ ɰ ɱ ᵯ ᶆ ḿ ṁ ṃ ⓜ m | m - ⒨ | (m) - Ñ Ń Ņ Ň Ŋ Ɲ Ǹ Ƞ ɴ ᴎ Ṅ Ṇ Ṉ Ṋ Ⓝ N | N - ñ ń ņ ň ʼn ŋ ƞ ǹ ȵ ɲ ɳ ᵰ ᶇ ṅ ṇ ṉ ṋ ⁿ ⓝ n | n - NJ | NJ - Nj | Nj - ⒩ | (n) - nj | nj - Ò Ó Ô Õ Ö Ø Ō Ŏ Ő Ɔ Ɵ Ơ Ǒ Ǫ Ǭ Ǿ Ȍ Ȏ Ȫ Ȭ Ȯ Ȱ ᴏ ᴐ Ṍ Ṏ Ṑ Ṓ Ọ Ỏ Ố Ồ Ổ Ỗ Ộ Ớ Ờ Ở Ỡ Ợ Ⓞ Ꝋ Ꝍ O | O - ò ó ô õ ö ø ō ŏ ő ơ ǒ ǫ ǭ ǿ ȍ ȏ ȫ ȭ ȯ ȱ ɔ ɵ ᴖ ᴗ ᶗ ṍ ṏ ṑ ṓ ọ ỏ ố ồ ổ ỗ ộ ớ ờ ở ỡ ợ ₒ ⓞ ⱺ ꝋ ꝍ o | o - Œ ɶ | OE - Ꝏ | OO - Ȣ ᴕ | OU - ⒪ | (o) - œ ᴔ | oe - ꝏ | oo - ȣ | ou - Ƥ ᴘ Ṕ Ṗ Ⓟ Ᵽ Ꝑ Ꝓ Ꝕ P | P - ƥ ᵱ ᵽ ᶈ ṕ ṗ ⓟ ꝑ ꝓ ꝕ ꟼ p | p - ⒫ | (p) - Ɋ Ⓠ Ꝗ Ꝙ Q | Q - ĸ ɋ ʠ ⓠ ꝗ ꝙ q | q - ⒬ | (q) - ȹ | qp - Ŕ Ŗ Ř Ȑ Ȓ Ɍ ʀ ʁ ᴙ ᴚ Ṙ Ṛ Ṝ Ṟ Ⓡ Ɽ Ꝛ Ꞃ R | R - ŕ ŗ ř ȑ ȓ ɍ ɼ ɽ ɾ ɿ ᵣ ᵲ ᵳ ᶉ ṙ ṛ ṝ ṟ ⓡ ꝛ ꞃ r | r - ⒭ | (r) - Ś Ŝ Ş Š Ș Ṡ Ṣ Ṥ Ṧ Ṩ Ⓢ ꜱ ꞅ S | S - ś ŝ ş š ſ ș ȿ ʂ ᵴ ᶊ ṡ ṣ ṥ ṧ ṩ ẜ ẝ ⓢ Ꞅ s | s - ẞ | SS - ⒮ | (s) - ß | ss - st | st - Ţ Ť Ŧ Ƭ Ʈ Ț Ⱦ ᴛ Ṫ Ṭ Ṯ Ṱ Ⓣ Ꞇ T | T - ţ ť ŧ ƫ ƭ ț ȶ ʇ ʈ ᵵ ṫ ṭ ṯ ṱ ẗ ⓣ ⱦ t | t - Þ Ꝧ | TH - Ꜩ | TZ - ⒯ | (t) - ʨ | tc - þ ᵺ ꝧ | th - ʦ | ts - ꜩ | tz - Ù Ú Û Ü Ũ Ū Ŭ Ů Ű Ų Ư Ǔ Ǖ Ǘ Ǚ Ǜ Ȕ Ȗ Ʉ ᴜ ᵾ Ṳ Ṵ Ṷ Ṹ Ṻ Ụ Ủ Ứ Ừ Ử Ữ Ự Ⓤ U | U - ù ú û ü ũ ū ŭ ů ű ų ư ǔ ǖ ǘ ǚ ǜ ȕ ȗ ʉ ᵤ ᶙ ṳ ṵ ṷ ṹ ṻ ụ ủ ứ ừ ử ữ ự ⓤ u | u - ⒰ | (u) - ᵫ | ue - Ʋ Ʌ ᴠ Ṽ Ṿ Ỽ Ⓥ Ꝟ Ꝩ V | V - ʋ ʌ ᵥ ᶌ ṽ ṿ ⓥ ⱱ ⱴ ꝟ v | v - Ꝡ | VY - ⒱ | (v) - ꝡ | vy - Ŵ Ƿ ᴡ Ẁ Ẃ Ẅ Ẇ Ẉ Ⓦ Ⱳ W | W - ŵ ƿ ʍ ẁ ẃ ẅ ẇ ẉ ẘ ⓦ ⱳ w | w - ⒲ | (w) - Ẋ Ẍ Ⓧ X | X - ᶍ ẋ ẍ ₓ ⓧ x | x - ⒳ | (x) - Ý Ŷ Ÿ Ƴ Ȳ Ɏ ʏ Ẏ Ỳ Ỵ Ỷ Ỹ Ỿ Ⓨ Y | Y - ý ÿ ŷ ƴ ȳ ɏ ʎ ẏ ẙ ỳ ỵ ỷ ỹ ỿ ⓨ y | y - ⒴ | (y) - Ź Ż Ž Ƶ Ȝ Ȥ ᴢ Ẑ Ẓ Ẕ Ⓩ Ⱬ Ꝣ Z | Z - ź ż ž ƶ ȝ ȥ ɀ ʐ ʑ ᵶ ᶎ ẑ ẓ ẕ ⓩ ⱬ ꝣ z | z - ⒵ | (z) - ⁰ ₀ ⓪ ⓿ 0 | 0 - ¹ ₁ ① ⓵ ❶ ➀ ➊ 1 | 1 - ⒈ | 1. - ⑴ | (1) - ² ₂ ② ⓶ ❷ ➁ ➋ 2 | 2 - ⒉ | 2. - ⑵ | (2) - ³ ₃ ③ ⓷ ❸ ➂ ➌ 3 | 3 - ⒊ | 3. - ⑶ | (3) - ⁴ ₄ ④ ⓸ ❹ ➃ ➍ 4 | 4 - ⒋ | 4. - ⑷ | (4) - ⁵ ₅ ⑤ ⓹ ❺ ➄ ➎ 5 | 5 - ⒌ | 5. - ⑸ | (5) - ⁶ ₆ ⑥ ⓺ ❻ ➅ ➏ 6 | 6 - ⒍ | 6. - ⑹ | (6) - ⁷ ₇ ⑦ ⓻ ❼ ➆ ➐ 7 | 7 - ⒎ | 7. - ⑺ | (7) - ⁸ ₈ ⑧ ⓼ ❽ ➇ ➑ 8 | 8 - ⒏ | 8. - ⑻ | (8) - ⁹ ₉ ⑨ ⓽ ❾ ➈ ➒ 9 | 9 - ⒐ | 9. - ⑼ | (9) - ⑩ ⓾ ❿ ➉ ➓ | 10 - ⒑ | 10. - ⑽ | (10) - ⑪ ⓫ | 11 - ⒒ | 11. - ⑾ | (11) - ⑫ ⓬ | 12 - ⒓ | 12. - ⑿ | (12) - ⑬ ⓭ | 13 - ⒔ | 13. - ⒀ | (13) - ⑭ ⓮ | 14 - ⒕ | 14. - ⒁ | (14) - ⑮ ⓯ | 15 - ⒖ | 15. - ⒂ | (15) - ⑯ ⓰ | 16 - ⒗ | 16. - ⒃ | (16) - ⑰ ⓱ | 17 - ⒘ | 17. - ⒄ | (17) - ⑱ ⓲ | 18 - ⒙ | 18. - ⒅ | (18) - ⑲ ⓳ | 19 - ⒚ | 19. - ⒆ | (19) - ⑳ ⓴ | 20 - ⒛ | 20. - ⒇ | (20) - « » “ ” „ ″ ‶ ❝ ❞ ❮ ❯ " | " - ‘ ’ ‚ ‛ ′ ‵ ‹ › ❛ ❜ ' | ' - ‐ ‑ ‒ – — ⁻ ₋ - | - - ⁅ ❲ [ | [ - ⁆ ❳ ] | ] - ⁽ ₍ ❨ ❪ ( | ( - ⸨ | (( - ⁾ ₎ ❩ ❫ ) | ) - ⸩ | )) - ❬ ❰ < | < - ❭ ❱ > | > - ❴ { | { - ❵ } | } - ⁺ ₊ + | + - ⁼ ₌ = | = - ! | ! - ‼ | !! - ⁉ | !? - # | # - $ | $ - ⁒ % | % - & | & - ⁎ * | * - , | , - . | . - ⁄ / | / - : | : - ⁏ ; | ; - ? | ? - ⁇ | ?? - ⁈ | ?! - @ | @ - \ | \\ - ‸ ^ | ^ - _ | _ - ⁓ ~ | ~ diff --git a/bower.json b/bower.json index 5af70d7..a62080f 100644 --- a/bower.json +++ b/bower.json @@ -1,11 +1,11 @@ { "name": "fold-to-ascii", "main": "index.js", - "version": "2.0.0", + "version": "2.0.1", "authors": [ "Moritz Platt " ], - "description": "A JavaScript port of the Apache Lucene ASCII Folding Filter that converts alphabetic, numeric, and symbolic Unicode characters which are not in the first 127 ASCII characters (the \"Basic Latin\" Unicode block) into ASCII equivalents, if they exist.", + "description": "A JavaScript port of the Apache Lucene ASCII Folding Filter that converts alphabetic, numeric, and symbolic Unicode characters which are not in the first 127 ASCII characters (the \"Basic Latin\" Unicode block) into a ASCII equivalents, if they exist.", "moduleType": [ "node" ], diff --git a/index.js b/index.js index aea502f..1f7d37b 100644 --- a/index.js +++ b/index.js @@ -1575,7 +1575,7 @@ var replaceChar = function(charCode, replace, replacement) { case 0xFF5E: // ~ [FULLWIDTH TILDE] return "~"; default: - return (replace ? replacement : String.fromCharCode(charCode)); + return (replace ? replacement : ""); } }; diff --git a/package.json b/package.json index eee0405..cb13808 100644 --- a/package.json +++ b/package.json @@ -1,7 +1,7 @@ { "name": "fold-to-ascii", - "version": "2.0.0", - "description": "A JavaScript port of the Apache Lucene ASCII Folding Filter that converts alphabetic, numeric, and symbolic Unicode characters which are not in the first 127 ASCII characters (the \"Basic Latin\" Unicode block) into ASCII equivalents, if they exist.", + "version": "2.0.1", + "description": "A JavaScript port of the Apache Lucene ASCII Folding Filter that converts alphabetic, numeric, and symbolic Unicode characters which are not in the first 127 ASCII characters (the \"Basic Latin\" Unicode block) into a ASCII equivalents, if they exist.", "main": "index.js", "scripts": { "test": "qunit -c ./index.js -t ./test/index.js"