From 3ac191bd5558758c3ca13f0fca245e241f1a899b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Kijewski?= Date: Tue, 16 Apr 2024 00:06:53 +0200 Subject: [PATCH] Optimize ASCII case --- scripts/make_unicode_categories.py | 5 ++++- src/_unicode_cat_of.hpp | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/scripts/make_unicode_categories.py b/scripts/make_unicode_categories.py index 3ef477e..51d89d9 100755 --- a/scripts/make_unicode_categories.py +++ b/scripts/make_unicode_categories.py @@ -137,8 +137,11 @@ def main(input_file, output_file): print(" };", file=output_file) print(file=output_file) - print(" if (codepoint > 0x110000) codepoint = 0x110000;", file=output_file) + print(" if (JSON5EncoderCpp_expect(codepoint < 256, true)) {", file=output_file) + print(" return (demiplane_data[0][codepoint / 4] >> (2 * (codepoint % 4))) % 4;", file=output_file) + print(" }", file=output_file) print(file=output_file) + print(" if (codepoint > 0x110000) codepoint = 0x110000;", file=output_file) print(" std::uint32_t fst_row = codepoint / 0x100;", file=output_file) print(" std::uint32_t fst_col = codepoint % 0x100;", file=output_file) print(" std::uint32_t snd_row = fst_row / 64;", file=output_file) diff --git a/src/_unicode_cat_of.hpp b/src/_unicode_cat_of.hpp index ae3becc..3eae7dc 100644 --- a/src/_unicode_cat_of.hpp +++ b/src/_unicode_cat_of.hpp @@ -817,8 +817,11 @@ static unsigned unicode_cat_of(std::uint32_t codepoint) { 0x0cu, 0x0cu, 0x0cu, 0x0cu, 0x0eu, 0x0cu, 0x0cu, 0x0cu, }; - if (codepoint > 0x110000) codepoint = 0x110000; + if (JSON5EncoderCpp_expect(codepoint < 256, true)) { + return (demiplane_data[0][codepoint / 4] >> (2 * (codepoint % 4))) % 4; + } + if (codepoint > 0x110000) codepoint = 0x110000; std::uint32_t fst_row = codepoint / 0x100; std::uint32_t fst_col = codepoint % 0x100; std::uint32_t snd_row = fst_row / 64;