diff --git a/deps/simdutf/simdutf.cpp b/deps/simdutf/simdutf.cpp
index 3065bcdfbb3857..b6613d69256a5d 100644
--- a/deps/simdutf/simdutf.cpp
+++ b/deps/simdutf/simdutf.cpp
@@ -1,4 +1,4 @@
-/* auto-generated on 2023-03-30 20:31:03 -0400. Do not edit! */
+/* auto-generated on 2023-04-08 11:21:57 -0400. Do not edit! */
 // dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf.cpp
 /* begin file src/simdutf.cpp */
 #include "simdutf.h"
@@ -4812,7 +4812,7 @@ simdutf_warn_unused result convert_utf16be_to_utf8_with_errors(const char16_t *
   return get_active_implementation()->convert_utf16be_to_utf8_with_errors(buf, len, utf8_buffer);
 }
 simdutf_warn_unused size_t convert_valid_utf16_to_utf8(const char16_t * buf, size_t len, char* utf8_buffer) noexcept {
-  #if BIG_ENDIAN
+  #if SIMDUTF_IS_BIG_ENDIAN
   return convert_valid_utf16be_to_utf8(buf, len, utf8_buffer);
   #else
   return convert_valid_utf16le_to_utf8(buf, len, utf8_buffer);
@@ -5294,31 +5294,31 @@ const uint8_t shufutf8[209][16] =
 /* number of two + three bytes : 145 */
 /* number of two + three + four bytes : 209 */
 const uint8_t utf8bigindex[4096][2] =
-{	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
+{	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{145, 3},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{146, 4},
- 	{0, 12},
+ 	{209, 12},
  	{149, 4},
  	{161, 4},
  	{64, 4},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{147, 5},
- 	{0, 12},
+ 	{209, 12},
  	{150, 5},
  	{162, 5},
  	{65, 5},
- 	{0, 12},
+ 	{209, 12},
  	{153, 5},
  	{165, 5},
  	{67, 5},
@@ -5326,15 +5326,15 @@ const uint8_t utf8bigindex[4096][2] =
  	{73, 5},
  	{91, 5},
  	{64, 4},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{148, 6},
- 	{0, 12},
+ 	{209, 12},
  	{151, 6},
  	{163, 6},
  	{66, 6},
- 	{0, 12},
+ 	{209, 12},
  	{154, 6},
  	{166, 6},
  	{68, 6},
@@ -5342,7 +5342,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{74, 6},
  	{92, 6},
  	{64, 4},
- 	{0, 12},
+ 	{209, 12},
  	{157, 6},
  	{169, 6},
  	{70, 6},
@@ -5358,15 +5358,15 @@ const uint8_t utf8bigindex[4096][2] =
  	{73, 5},
  	{91, 5},
  	{0, 6},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{152, 7},
  	{164, 7},
  	{145, 3},
- 	{0, 12},
+ 	{209, 12},
  	{155, 7},
  	{167, 7},
  	{69, 7},
@@ -5374,7 +5374,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{75, 7},
  	{93, 7},
  	{64, 4},
- 	{0, 12},
+ 	{209, 12},
  	{158, 7},
  	{170, 7},
  	{71, 7},
@@ -5390,8 +5390,8 @@ const uint8_t utf8bigindex[4096][2] =
  	{73, 5},
  	{91, 5},
  	{1, 7},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{173, 7},
  	{148, 6},
  	{185, 7},
@@ -5406,7 +5406,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{74, 6},
  	{92, 6},
  	{2, 7},
- 	{0, 12},
+ 	{209, 12},
  	{157, 6},
  	{109, 7},
  	{70, 6},
@@ -5422,15 +5422,15 @@ const uint8_t utf8bigindex[4096][2] =
  	{16, 7},
  	{32, 7},
  	{0, 6},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{145, 3},
- 	{0, 12},
+ 	{209, 12},
  	{156, 8},
  	{168, 8},
  	{146, 4},
@@ -5438,7 +5438,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{149, 4},
  	{161, 4},
  	{64, 4},
- 	{0, 12},
+ 	{209, 12},
  	{159, 8},
  	{171, 8},
  	{72, 8},
@@ -5454,8 +5454,8 @@ const uint8_t utf8bigindex[4096][2] =
  	{73, 5},
  	{91, 5},
  	{64, 4},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{174, 8},
  	{148, 6},
  	{186, 8},
@@ -5470,7 +5470,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{74, 6},
  	{92, 6},
  	{3, 8},
- 	{0, 12},
+ 	{209, 12},
  	{157, 6},
  	{110, 8},
  	{70, 6},
@@ -5486,10 +5486,10 @@ const uint8_t utf8bigindex[4096][2] =
  	{17, 8},
  	{33, 8},
  	{0, 6},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{189, 8},
  	{152, 7},
  	{164, 7},
@@ -5502,7 +5502,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{75, 7},
  	{93, 7},
  	{64, 4},
- 	{0, 12},
+ 	{209, 12},
  	{158, 7},
  	{112, 8},
  	{71, 7},
@@ -5518,8 +5518,8 @@ const uint8_t utf8bigindex[4096][2] =
  	{18, 8},
  	{34, 8},
  	{1, 7},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{173, 7},
  	{148, 6},
  	{136, 8},
@@ -5534,7 +5534,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{20, 8},
  	{36, 8},
  	{2, 7},
- 	{0, 12},
+ 	{209, 12},
  	{157, 6},
  	{109, 7},
  	{70, 6},
@@ -5550,23 +5550,23 @@ const uint8_t utf8bigindex[4096][2] =
  	{16, 7},
  	{32, 7},
  	{0, 6},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{145, 3},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{146, 4},
- 	{0, 12},
+ 	{209, 12},
  	{149, 4},
  	{161, 4},
  	{64, 4},
- 	{0, 12},
+ 	{209, 12},
  	{160, 9},
  	{172, 9},
  	{147, 5},
@@ -5582,8 +5582,8 @@ const uint8_t utf8bigindex[4096][2] =
  	{73, 5},
  	{91, 5},
  	{64, 4},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{175, 9},
  	{148, 6},
  	{187, 9},
@@ -5598,7 +5598,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{74, 6},
  	{92, 6},
  	{64, 4},
- 	{0, 12},
+ 	{209, 12},
  	{157, 6},
  	{111, 9},
  	{70, 6},
@@ -5614,10 +5614,10 @@ const uint8_t utf8bigindex[4096][2] =
  	{73, 5},
  	{91, 5},
  	{0, 6},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{190, 9},
  	{152, 7},
  	{164, 7},
@@ -5630,7 +5630,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{75, 7},
  	{93, 7},
  	{64, 4},
- 	{0, 12},
+ 	{209, 12},
  	{158, 7},
  	{113, 9},
  	{71, 7},
@@ -5646,8 +5646,8 @@ const uint8_t utf8bigindex[4096][2] =
  	{19, 9},
  	{35, 9},
  	{1, 7},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{173, 7},
  	{148, 6},
  	{137, 9},
@@ -5662,7 +5662,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{21, 9},
  	{37, 9},
  	{2, 7},
- 	{0, 12},
+ 	{209, 12},
  	{157, 6},
  	{109, 7},
  	{70, 6},
@@ -5678,13 +5678,13 @@ const uint8_t utf8bigindex[4096][2] =
  	{16, 7},
  	{32, 7},
  	{0, 6},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{145, 3},
  	{205, 9},
  	{156, 8},
@@ -5694,7 +5694,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{149, 4},
  	{161, 4},
  	{64, 4},
- 	{0, 12},
+ 	{209, 12},
  	{159, 8},
  	{115, 9},
  	{72, 8},
@@ -5710,8 +5710,8 @@ const uint8_t utf8bigindex[4096][2] =
  	{73, 5},
  	{91, 5},
  	{64, 4},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{174, 8},
  	{148, 6},
  	{139, 9},
@@ -5726,7 +5726,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{22, 9},
  	{38, 9},
  	{3, 8},
- 	{0, 12},
+ 	{209, 12},
  	{157, 6},
  	{110, 8},
  	{70, 6},
@@ -5742,10 +5742,10 @@ const uint8_t utf8bigindex[4096][2] =
  	{17, 8},
  	{33, 8},
  	{0, 6},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{189, 8},
  	{152, 7},
  	{164, 7},
@@ -5758,7 +5758,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{75, 7},
  	{93, 7},
  	{64, 4},
- 	{0, 12},
+ 	{209, 12},
  	{158, 7},
  	{112, 8},
  	{71, 7},
@@ -5774,8 +5774,8 @@ const uint8_t utf8bigindex[4096][2] =
  	{18, 8},
  	{34, 8},
  	{1, 7},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{173, 7},
  	{148, 6},
  	{136, 8},
@@ -5790,7 +5790,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{20, 8},
  	{36, 8},
  	{2, 7},
- 	{0, 12},
+ 	{209, 12},
  	{157, 6},
  	{109, 7},
  	{70, 6},
@@ -5806,31 +5806,31 @@ const uint8_t utf8bigindex[4096][2] =
  	{16, 7},
  	{32, 7},
  	{0, 6},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{145, 3},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{146, 4},
- 	{0, 12},
+ 	{209, 12},
  	{149, 4},
  	{161, 4},
  	{64, 4},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{147, 5},
- 	{0, 12},
+ 	{209, 12},
  	{150, 5},
  	{162, 5},
  	{65, 5},
- 	{0, 12},
+ 	{209, 12},
  	{153, 5},
  	{165, 5},
  	{67, 5},
@@ -5838,8 +5838,8 @@ const uint8_t utf8bigindex[4096][2] =
  	{73, 5},
  	{91, 5},
  	{64, 4},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{176, 10},
  	{148, 6},
  	{188, 10},
@@ -5854,7 +5854,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{74, 6},
  	{92, 6},
  	{64, 4},
- 	{0, 12},
+ 	{209, 12},
  	{157, 6},
  	{169, 6},
  	{70, 6},
@@ -5870,10 +5870,10 @@ const uint8_t utf8bigindex[4096][2] =
  	{73, 5},
  	{91, 5},
  	{0, 6},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{191, 10},
  	{152, 7},
  	{164, 7},
@@ -5886,7 +5886,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{75, 7},
  	{93, 7},
  	{64, 4},
- 	{0, 12},
+ 	{209, 12},
  	{158, 7},
  	{114, 10},
  	{71, 7},
@@ -5902,8 +5902,8 @@ const uint8_t utf8bigindex[4096][2] =
  	{73, 5},
  	{91, 5},
  	{1, 7},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{173, 7},
  	{148, 6},
  	{138, 10},
@@ -5918,7 +5918,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{74, 6},
  	{92, 6},
  	{2, 7},
- 	{0, 12},
+ 	{209, 12},
  	{157, 6},
  	{109, 7},
  	{70, 6},
@@ -5934,13 +5934,13 @@ const uint8_t utf8bigindex[4096][2] =
  	{16, 7},
  	{32, 7},
  	{0, 6},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{145, 3},
  	{206, 10},
  	{156, 8},
@@ -5950,7 +5950,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{149, 4},
  	{161, 4},
  	{64, 4},
- 	{0, 12},
+ 	{209, 12},
  	{159, 8},
  	{116, 10},
  	{72, 8},
@@ -5966,8 +5966,8 @@ const uint8_t utf8bigindex[4096][2] =
  	{73, 5},
  	{91, 5},
  	{64, 4},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{174, 8},
  	{148, 6},
  	{140, 10},
@@ -5982,7 +5982,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{23, 10},
  	{39, 10},
  	{3, 8},
- 	{0, 12},
+ 	{209, 12},
  	{157, 6},
  	{110, 8},
  	{70, 6},
@@ -5998,10 +5998,10 @@ const uint8_t utf8bigindex[4096][2] =
  	{17, 8},
  	{33, 8},
  	{0, 6},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{189, 8},
  	{152, 7},
  	{164, 7},
@@ -6014,7 +6014,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{75, 7},
  	{93, 7},
  	{64, 4},
- 	{0, 12},
+ 	{209, 12},
  	{158, 7},
  	{112, 8},
  	{71, 7},
@@ -6030,8 +6030,8 @@ const uint8_t utf8bigindex[4096][2] =
  	{18, 8},
  	{34, 8},
  	{1, 7},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{173, 7},
  	{148, 6},
  	{136, 8},
@@ -6046,7 +6046,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{20, 8},
  	{36, 8},
  	{2, 7},
- 	{0, 12},
+ 	{209, 12},
  	{157, 6},
  	{109, 7},
  	{70, 6},
@@ -6062,23 +6062,23 @@ const uint8_t utf8bigindex[4096][2] =
  	{16, 7},
  	{32, 7},
  	{0, 6},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{145, 3},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{146, 4},
- 	{0, 12},
+ 	{209, 12},
  	{149, 4},
  	{161, 4},
  	{64, 4},
- 	{0, 12},
+ 	{209, 12},
  	{160, 9},
  	{172, 9},
  	{147, 5},
@@ -6094,8 +6094,8 @@ const uint8_t utf8bigindex[4096][2] =
  	{73, 5},
  	{91, 5},
  	{64, 4},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{175, 9},
  	{148, 6},
  	{142, 10},
@@ -6110,7 +6110,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{74, 6},
  	{92, 6},
  	{64, 4},
- 	{0, 12},
+ 	{209, 12},
  	{157, 6},
  	{111, 9},
  	{70, 6},
@@ -6126,10 +6126,10 @@ const uint8_t utf8bigindex[4096][2] =
  	{73, 5},
  	{91, 5},
  	{0, 6},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{190, 9},
  	{152, 7},
  	{164, 7},
@@ -6142,7 +6142,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{75, 7},
  	{93, 7},
  	{64, 4},
- 	{0, 12},
+ 	{209, 12},
  	{158, 7},
  	{113, 9},
  	{71, 7},
@@ -6158,8 +6158,8 @@ const uint8_t utf8bigindex[4096][2] =
  	{19, 9},
  	{35, 9},
  	{1, 7},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{173, 7},
  	{148, 6},
  	{137, 9},
@@ -6174,7 +6174,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{21, 9},
  	{37, 9},
  	{2, 7},
- 	{0, 12},
+ 	{209, 12},
  	{157, 6},
  	{109, 7},
  	{70, 6},
@@ -6190,13 +6190,13 @@ const uint8_t utf8bigindex[4096][2] =
  	{16, 7},
  	{32, 7},
  	{0, 6},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{145, 3},
  	{205, 9},
  	{156, 8},
@@ -6206,7 +6206,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{149, 4},
  	{161, 4},
  	{64, 4},
- 	{0, 12},
+ 	{209, 12},
  	{159, 8},
  	{115, 9},
  	{72, 8},
@@ -6222,8 +6222,8 @@ const uint8_t utf8bigindex[4096][2] =
  	{73, 5},
  	{91, 5},
  	{64, 4},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{174, 8},
  	{148, 6},
  	{139, 9},
@@ -6238,7 +6238,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{22, 9},
  	{38, 9},
  	{3, 8},
- 	{0, 12},
+ 	{209, 12},
  	{157, 6},
  	{110, 8},
  	{70, 6},
@@ -6254,10 +6254,10 @@ const uint8_t utf8bigindex[4096][2] =
  	{17, 8},
  	{33, 8},
  	{0, 6},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{189, 8},
  	{152, 7},
  	{164, 7},
@@ -6270,7 +6270,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{75, 7},
  	{93, 7},
  	{64, 4},
- 	{0, 12},
+ 	{209, 12},
  	{158, 7},
  	{112, 8},
  	{71, 7},
@@ -6286,8 +6286,8 @@ const uint8_t utf8bigindex[4096][2] =
  	{18, 8},
  	{34, 8},
  	{1, 7},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{173, 7},
  	{148, 6},
  	{136, 8},
@@ -6302,7 +6302,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{20, 8},
  	{36, 8},
  	{2, 7},
- 	{0, 12},
+ 	{209, 12},
  	{157, 6},
  	{109, 7},
  	{70, 6},
@@ -6318,31 +6318,31 @@ const uint8_t utf8bigindex[4096][2] =
  	{16, 7},
  	{32, 7},
  	{0, 6},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{145, 3},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{146, 4},
- 	{0, 12},
+ 	{209, 12},
  	{149, 4},
  	{161, 4},
  	{64, 4},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{147, 5},
- 	{0, 12},
+ 	{209, 12},
  	{150, 5},
  	{162, 5},
  	{65, 5},
- 	{0, 12},
+ 	{209, 12},
  	{153, 5},
  	{165, 5},
  	{67, 5},
@@ -6350,15 +6350,15 @@ const uint8_t utf8bigindex[4096][2] =
  	{73, 5},
  	{91, 5},
  	{64, 4},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{148, 6},
- 	{0, 12},
+ 	{209, 12},
  	{151, 6},
  	{163, 6},
  	{66, 6},
- 	{0, 12},
+ 	{209, 12},
  	{154, 6},
  	{166, 6},
  	{68, 6},
@@ -6366,7 +6366,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{74, 6},
  	{92, 6},
  	{64, 4},
- 	{0, 12},
+ 	{209, 12},
  	{157, 6},
  	{169, 6},
  	{70, 6},
@@ -6382,10 +6382,10 @@ const uint8_t utf8bigindex[4096][2] =
  	{73, 5},
  	{91, 5},
  	{0, 6},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{192, 11},
  	{152, 7},
  	{164, 7},
@@ -6398,7 +6398,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{75, 7},
  	{93, 7},
  	{64, 4},
- 	{0, 12},
+ 	{209, 12},
  	{158, 7},
  	{170, 7},
  	{71, 7},
@@ -6414,8 +6414,8 @@ const uint8_t utf8bigindex[4096][2] =
  	{73, 5},
  	{91, 5},
  	{1, 7},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{173, 7},
  	{148, 6},
  	{185, 7},
@@ -6430,7 +6430,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{74, 6},
  	{92, 6},
  	{2, 7},
- 	{0, 12},
+ 	{209, 12},
  	{157, 6},
  	{109, 7},
  	{70, 6},
@@ -6446,13 +6446,13 @@ const uint8_t utf8bigindex[4096][2] =
  	{16, 7},
  	{32, 7},
  	{0, 6},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{145, 3},
  	{207, 11},
  	{156, 8},
@@ -6462,7 +6462,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{149, 4},
  	{161, 4},
  	{64, 4},
- 	{0, 12},
+ 	{209, 12},
  	{159, 8},
  	{117, 11},
  	{72, 8},
@@ -6478,8 +6478,8 @@ const uint8_t utf8bigindex[4096][2] =
  	{73, 5},
  	{91, 5},
  	{64, 4},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{174, 8},
  	{148, 6},
  	{141, 11},
@@ -6494,7 +6494,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{74, 6},
  	{92, 6},
  	{3, 8},
- 	{0, 12},
+ 	{209, 12},
  	{157, 6},
  	{110, 8},
  	{70, 6},
@@ -6510,10 +6510,10 @@ const uint8_t utf8bigindex[4096][2] =
  	{17, 8},
  	{33, 8},
  	{0, 6},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{189, 8},
  	{152, 7},
  	{164, 7},
@@ -6526,7 +6526,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{75, 7},
  	{93, 7},
  	{64, 4},
- 	{0, 12},
+ 	{209, 12},
  	{158, 7},
  	{112, 8},
  	{71, 7},
@@ -6542,8 +6542,8 @@ const uint8_t utf8bigindex[4096][2] =
  	{18, 8},
  	{34, 8},
  	{1, 7},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{173, 7},
  	{148, 6},
  	{136, 8},
@@ -6558,7 +6558,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{20, 8},
  	{36, 8},
  	{2, 7},
- 	{0, 12},
+ 	{209, 12},
  	{157, 6},
  	{109, 7},
  	{70, 6},
@@ -6574,23 +6574,23 @@ const uint8_t utf8bigindex[4096][2] =
  	{16, 7},
  	{32, 7},
  	{0, 6},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{145, 3},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{146, 4},
- 	{0, 12},
+ 	{209, 12},
  	{149, 4},
  	{161, 4},
  	{64, 4},
- 	{0, 12},
+ 	{209, 12},
  	{160, 9},
  	{172, 9},
  	{147, 5},
@@ -6606,8 +6606,8 @@ const uint8_t utf8bigindex[4096][2] =
  	{73, 5},
  	{91, 5},
  	{64, 4},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{175, 9},
  	{148, 6},
  	{143, 11},
@@ -6622,7 +6622,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{74, 6},
  	{92, 6},
  	{64, 4},
- 	{0, 12},
+ 	{209, 12},
  	{157, 6},
  	{111, 9},
  	{70, 6},
@@ -6638,10 +6638,10 @@ const uint8_t utf8bigindex[4096][2] =
  	{73, 5},
  	{91, 5},
  	{0, 6},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{190, 9},
  	{152, 7},
  	{164, 7},
@@ -6654,7 +6654,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{75, 7},
  	{93, 7},
  	{64, 4},
- 	{0, 12},
+ 	{209, 12},
  	{158, 7},
  	{113, 9},
  	{71, 7},
@@ -6670,8 +6670,8 @@ const uint8_t utf8bigindex[4096][2] =
  	{19, 9},
  	{35, 9},
  	{1, 7},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{173, 7},
  	{148, 6},
  	{137, 9},
@@ -6686,7 +6686,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{21, 9},
  	{37, 9},
  	{2, 7},
- 	{0, 12},
+ 	{209, 12},
  	{157, 6},
  	{109, 7},
  	{70, 6},
@@ -6702,13 +6702,13 @@ const uint8_t utf8bigindex[4096][2] =
  	{16, 7},
  	{32, 7},
  	{0, 6},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{145, 3},
  	{205, 9},
  	{156, 8},
@@ -6718,7 +6718,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{149, 4},
  	{161, 4},
  	{64, 4},
- 	{0, 12},
+ 	{209, 12},
  	{159, 8},
  	{115, 9},
  	{72, 8},
@@ -6734,8 +6734,8 @@ const uint8_t utf8bigindex[4096][2] =
  	{73, 5},
  	{91, 5},
  	{64, 4},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{174, 8},
  	{148, 6},
  	{139, 9},
@@ -6750,7 +6750,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{22, 9},
  	{38, 9},
  	{3, 8},
- 	{0, 12},
+ 	{209, 12},
  	{157, 6},
  	{110, 8},
  	{70, 6},
@@ -6766,10 +6766,10 @@ const uint8_t utf8bigindex[4096][2] =
  	{17, 8},
  	{33, 8},
  	{0, 6},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{189, 8},
  	{152, 7},
  	{164, 7},
@@ -6782,7 +6782,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{75, 7},
  	{93, 7},
  	{64, 4},
- 	{0, 12},
+ 	{209, 12},
  	{158, 7},
  	{112, 8},
  	{71, 7},
@@ -6798,8 +6798,8 @@ const uint8_t utf8bigindex[4096][2] =
  	{18, 8},
  	{34, 8},
  	{1, 7},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{173, 7},
  	{148, 6},
  	{136, 8},
@@ -6814,7 +6814,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{20, 8},
  	{36, 8},
  	{2, 7},
- 	{0, 12},
+ 	{209, 12},
  	{157, 6},
  	{109, 7},
  	{70, 6},
@@ -6830,31 +6830,31 @@ const uint8_t utf8bigindex[4096][2] =
  	{16, 7},
  	{32, 7},
  	{0, 6},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{145, 3},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{146, 4},
- 	{0, 12},
+ 	{209, 12},
  	{149, 4},
  	{161, 4},
  	{64, 4},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{147, 5},
- 	{0, 12},
+ 	{209, 12},
  	{150, 5},
  	{162, 5},
  	{65, 5},
- 	{0, 12},
+ 	{209, 12},
  	{153, 5},
  	{165, 5},
  	{67, 5},
@@ -6862,8 +6862,8 @@ const uint8_t utf8bigindex[4096][2] =
  	{73, 5},
  	{91, 5},
  	{64, 4},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{176, 10},
  	{148, 6},
  	{188, 10},
@@ -6878,7 +6878,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{74, 6},
  	{92, 6},
  	{64, 4},
- 	{0, 12},
+ 	{209, 12},
  	{157, 6},
  	{169, 6},
  	{70, 6},
@@ -6894,10 +6894,10 @@ const uint8_t utf8bigindex[4096][2] =
  	{73, 5},
  	{91, 5},
  	{0, 6},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{191, 10},
  	{152, 7},
  	{164, 7},
@@ -6910,7 +6910,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{75, 7},
  	{93, 7},
  	{64, 4},
- 	{0, 12},
+ 	{209, 12},
  	{158, 7},
  	{114, 10},
  	{71, 7},
@@ -6926,8 +6926,8 @@ const uint8_t utf8bigindex[4096][2] =
  	{73, 5},
  	{91, 5},
  	{1, 7},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{173, 7},
  	{148, 6},
  	{138, 10},
@@ -6942,7 +6942,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{74, 6},
  	{92, 6},
  	{2, 7},
- 	{0, 12},
+ 	{209, 12},
  	{157, 6},
  	{109, 7},
  	{70, 6},
@@ -6958,13 +6958,13 @@ const uint8_t utf8bigindex[4096][2] =
  	{16, 7},
  	{32, 7},
  	{0, 6},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{145, 3},
  	{206, 10},
  	{156, 8},
@@ -6974,7 +6974,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{149, 4},
  	{161, 4},
  	{64, 4},
- 	{0, 12},
+ 	{209, 12},
  	{159, 8},
  	{116, 10},
  	{72, 8},
@@ -6990,8 +6990,8 @@ const uint8_t utf8bigindex[4096][2] =
  	{73, 5},
  	{91, 5},
  	{64, 4},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{174, 8},
  	{148, 6},
  	{140, 10},
@@ -7006,7 +7006,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{23, 10},
  	{39, 10},
  	{3, 8},
- 	{0, 12},
+ 	{209, 12},
  	{157, 6},
  	{110, 8},
  	{70, 6},
@@ -7022,10 +7022,10 @@ const uint8_t utf8bigindex[4096][2] =
  	{17, 8},
  	{33, 8},
  	{0, 6},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{189, 8},
  	{152, 7},
  	{164, 7},
@@ -7038,7 +7038,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{75, 7},
  	{93, 7},
  	{64, 4},
- 	{0, 12},
+ 	{209, 12},
  	{158, 7},
  	{112, 8},
  	{71, 7},
@@ -7054,8 +7054,8 @@ const uint8_t utf8bigindex[4096][2] =
  	{18, 8},
  	{34, 8},
  	{1, 7},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{173, 7},
  	{148, 6},
  	{136, 8},
@@ -7070,7 +7070,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{20, 8},
  	{36, 8},
  	{2, 7},
- 	{0, 12},
+ 	{209, 12},
  	{157, 6},
  	{109, 7},
  	{70, 6},
@@ -7086,23 +7086,23 @@ const uint8_t utf8bigindex[4096][2] =
  	{16, 7},
  	{32, 7},
  	{0, 6},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{145, 3},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{146, 4},
- 	{0, 12},
+ 	{209, 12},
  	{149, 4},
  	{161, 4},
  	{64, 4},
- 	{0, 12},
+ 	{209, 12},
  	{160, 9},
  	{172, 9},
  	{147, 5},
@@ -7118,8 +7118,8 @@ const uint8_t utf8bigindex[4096][2] =
  	{73, 5},
  	{91, 5},
  	{64, 4},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{175, 9},
  	{148, 6},
  	{142, 10},
@@ -7134,7 +7134,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{74, 6},
  	{92, 6},
  	{64, 4},
- 	{0, 12},
+ 	{209, 12},
  	{157, 6},
  	{111, 9},
  	{70, 6},
@@ -7150,10 +7150,10 @@ const uint8_t utf8bigindex[4096][2] =
  	{73, 5},
  	{91, 5},
  	{0, 6},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{190, 9},
  	{152, 7},
  	{164, 7},
@@ -7166,7 +7166,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{75, 7},
  	{93, 7},
  	{64, 4},
- 	{0, 12},
+ 	{209, 12},
  	{158, 7},
  	{113, 9},
  	{71, 7},
@@ -7182,8 +7182,8 @@ const uint8_t utf8bigindex[4096][2] =
  	{19, 9},
  	{35, 9},
  	{1, 7},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{173, 7},
  	{148, 6},
  	{137, 9},
@@ -7198,7 +7198,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{21, 9},
  	{37, 9},
  	{2, 7},
- 	{0, 12},
+ 	{209, 12},
  	{157, 6},
  	{109, 7},
  	{70, 6},
@@ -7214,13 +7214,13 @@ const uint8_t utf8bigindex[4096][2] =
  	{16, 7},
  	{32, 7},
  	{0, 6},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{145, 3},
  	{205, 9},
  	{156, 8},
@@ -7230,7 +7230,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{149, 4},
  	{161, 4},
  	{64, 4},
- 	{0, 12},
+ 	{209, 12},
  	{159, 8},
  	{115, 9},
  	{72, 8},
@@ -7246,8 +7246,8 @@ const uint8_t utf8bigindex[4096][2] =
  	{73, 5},
  	{91, 5},
  	{64, 4},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{174, 8},
  	{148, 6},
  	{139, 9},
@@ -7262,7 +7262,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{22, 9},
  	{38, 9},
  	{3, 8},
- 	{0, 12},
+ 	{209, 12},
  	{157, 6},
  	{110, 8},
  	{70, 6},
@@ -7278,10 +7278,10 @@ const uint8_t utf8bigindex[4096][2] =
  	{17, 8},
  	{33, 8},
  	{0, 6},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{189, 8},
  	{152, 7},
  	{164, 7},
@@ -7294,7 +7294,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{75, 7},
  	{93, 7},
  	{64, 4},
- 	{0, 12},
+ 	{209, 12},
  	{158, 7},
  	{112, 8},
  	{71, 7},
@@ -7310,8 +7310,8 @@ const uint8_t utf8bigindex[4096][2] =
  	{18, 8},
  	{34, 8},
  	{1, 7},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{173, 7},
  	{148, 6},
  	{136, 8},
@@ -7326,7 +7326,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{20, 8},
  	{36, 8},
  	{2, 7},
- 	{0, 12},
+ 	{209, 12},
  	{157, 6},
  	{109, 7},
  	{70, 6},
@@ -7342,31 +7342,31 @@ const uint8_t utf8bigindex[4096][2] =
  	{16, 7},
  	{32, 7},
  	{0, 6},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{145, 3},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{146, 4},
- 	{0, 12},
+ 	{209, 12},
  	{149, 4},
  	{161, 4},
  	{64, 4},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{147, 5},
- 	{0, 12},
+ 	{209, 12},
  	{150, 5},
  	{162, 5},
  	{65, 5},
- 	{0, 12},
+ 	{209, 12},
  	{153, 5},
  	{165, 5},
  	{67, 5},
@@ -7374,15 +7374,15 @@ const uint8_t utf8bigindex[4096][2] =
  	{73, 5},
  	{91, 5},
  	{64, 4},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{148, 6},
- 	{0, 12},
+ 	{209, 12},
  	{151, 6},
  	{163, 6},
  	{66, 6},
- 	{0, 12},
+ 	{209, 12},
  	{154, 6},
  	{166, 6},
  	{68, 6},
@@ -7390,7 +7390,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{74, 6},
  	{92, 6},
  	{64, 4},
- 	{0, 12},
+ 	{209, 12},
  	{157, 6},
  	{169, 6},
  	{70, 6},
@@ -7406,15 +7406,15 @@ const uint8_t utf8bigindex[4096][2] =
  	{73, 5},
  	{91, 5},
  	{0, 6},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{152, 7},
  	{164, 7},
  	{145, 3},
- 	{0, 12},
+ 	{209, 12},
  	{155, 7},
  	{167, 7},
  	{69, 7},
@@ -7422,7 +7422,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{75, 7},
  	{93, 7},
  	{64, 4},
- 	{0, 12},
+ 	{209, 12},
  	{158, 7},
  	{170, 7},
  	{71, 7},
@@ -7438,8 +7438,8 @@ const uint8_t utf8bigindex[4096][2] =
  	{73, 5},
  	{91, 5},
  	{1, 7},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{173, 7},
  	{148, 6},
  	{185, 7},
@@ -7454,7 +7454,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{74, 6},
  	{92, 6},
  	{2, 7},
- 	{0, 12},
+ 	{209, 12},
  	{157, 6},
  	{109, 7},
  	{70, 6},
@@ -7470,13 +7470,13 @@ const uint8_t utf8bigindex[4096][2] =
  	{16, 7},
  	{32, 7},
  	{0, 6},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{145, 3},
  	{208, 12},
  	{156, 8},
@@ -7486,7 +7486,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{149, 4},
  	{161, 4},
  	{64, 4},
- 	{0, 12},
+ 	{209, 12},
  	{159, 8},
  	{171, 8},
  	{72, 8},
@@ -7502,8 +7502,8 @@ const uint8_t utf8bigindex[4096][2] =
  	{73, 5},
  	{91, 5},
  	{64, 4},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{174, 8},
  	{148, 6},
  	{186, 8},
@@ -7518,7 +7518,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{74, 6},
  	{92, 6},
  	{3, 8},
- 	{0, 12},
+ 	{209, 12},
  	{157, 6},
  	{110, 8},
  	{70, 6},
@@ -7534,10 +7534,10 @@ const uint8_t utf8bigindex[4096][2] =
  	{17, 8},
  	{33, 8},
  	{0, 6},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{189, 8},
  	{152, 7},
  	{164, 7},
@@ -7550,7 +7550,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{75, 7},
  	{93, 7},
  	{64, 4},
- 	{0, 12},
+ 	{209, 12},
  	{158, 7},
  	{112, 8},
  	{71, 7},
@@ -7566,8 +7566,8 @@ const uint8_t utf8bigindex[4096][2] =
  	{18, 8},
  	{34, 8},
  	{1, 7},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{173, 7},
  	{148, 6},
  	{136, 8},
@@ -7582,7 +7582,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{20, 8},
  	{36, 8},
  	{2, 7},
- 	{0, 12},
+ 	{209, 12},
  	{157, 6},
  	{109, 7},
  	{70, 6},
@@ -7598,23 +7598,23 @@ const uint8_t utf8bigindex[4096][2] =
  	{16, 7},
  	{32, 7},
  	{0, 6},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{145, 3},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{146, 4},
- 	{0, 12},
+ 	{209, 12},
  	{149, 4},
  	{161, 4},
  	{64, 4},
- 	{0, 12},
+ 	{209, 12},
  	{160, 9},
  	{172, 9},
  	{147, 5},
@@ -7630,8 +7630,8 @@ const uint8_t utf8bigindex[4096][2] =
  	{73, 5},
  	{91, 5},
  	{64, 4},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{175, 9},
  	{148, 6},
  	{144, 12},
@@ -7646,7 +7646,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{74, 6},
  	{92, 6},
  	{64, 4},
- 	{0, 12},
+ 	{209, 12},
  	{157, 6},
  	{111, 9},
  	{70, 6},
@@ -7662,10 +7662,10 @@ const uint8_t utf8bigindex[4096][2] =
  	{73, 5},
  	{91, 5},
  	{0, 6},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{190, 9},
  	{152, 7},
  	{164, 7},
@@ -7678,7 +7678,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{75, 7},
  	{93, 7},
  	{64, 4},
- 	{0, 12},
+ 	{209, 12},
  	{158, 7},
  	{113, 9},
  	{71, 7},
@@ -7694,8 +7694,8 @@ const uint8_t utf8bigindex[4096][2] =
  	{19, 9},
  	{35, 9},
  	{1, 7},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{173, 7},
  	{148, 6},
  	{137, 9},
@@ -7710,7 +7710,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{21, 9},
  	{37, 9},
  	{2, 7},
- 	{0, 12},
+ 	{209, 12},
  	{157, 6},
  	{109, 7},
  	{70, 6},
@@ -7726,13 +7726,13 @@ const uint8_t utf8bigindex[4096][2] =
  	{16, 7},
  	{32, 7},
  	{0, 6},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{145, 3},
  	{205, 9},
  	{156, 8},
@@ -7742,7 +7742,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{149, 4},
  	{161, 4},
  	{64, 4},
- 	{0, 12},
+ 	{209, 12},
  	{159, 8},
  	{115, 9},
  	{72, 8},
@@ -7758,8 +7758,8 @@ const uint8_t utf8bigindex[4096][2] =
  	{73, 5},
  	{91, 5},
  	{64, 4},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{174, 8},
  	{148, 6},
  	{139, 9},
@@ -7774,7 +7774,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{22, 9},
  	{38, 9},
  	{3, 8},
- 	{0, 12},
+ 	{209, 12},
  	{157, 6},
  	{110, 8},
  	{70, 6},
@@ -7790,10 +7790,10 @@ const uint8_t utf8bigindex[4096][2] =
  	{17, 8},
  	{33, 8},
  	{0, 6},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{189, 8},
  	{152, 7},
  	{164, 7},
@@ -7806,7 +7806,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{75, 7},
  	{93, 7},
  	{64, 4},
- 	{0, 12},
+ 	{209, 12},
  	{158, 7},
  	{112, 8},
  	{71, 7},
@@ -7822,8 +7822,8 @@ const uint8_t utf8bigindex[4096][2] =
  	{18, 8},
  	{34, 8},
  	{1, 7},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{173, 7},
  	{148, 6},
  	{136, 8},
@@ -7838,7 +7838,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{20, 8},
  	{36, 8},
  	{2, 7},
- 	{0, 12},
+ 	{209, 12},
  	{157, 6},
  	{109, 7},
  	{70, 6},
@@ -7854,31 +7854,31 @@ const uint8_t utf8bigindex[4096][2] =
  	{16, 7},
  	{32, 7},
  	{0, 6},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{145, 3},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{146, 4},
- 	{0, 12},
+ 	{209, 12},
  	{149, 4},
  	{161, 4},
  	{64, 4},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{147, 5},
- 	{0, 12},
+ 	{209, 12},
  	{150, 5},
  	{162, 5},
  	{65, 5},
- 	{0, 12},
+ 	{209, 12},
  	{153, 5},
  	{165, 5},
  	{67, 5},
@@ -7886,8 +7886,8 @@ const uint8_t utf8bigindex[4096][2] =
  	{73, 5},
  	{91, 5},
  	{64, 4},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{176, 10},
  	{148, 6},
  	{188, 10},
@@ -7902,7 +7902,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{74, 6},
  	{92, 6},
  	{64, 4},
- 	{0, 12},
+ 	{209, 12},
  	{157, 6},
  	{169, 6},
  	{70, 6},
@@ -7918,10 +7918,10 @@ const uint8_t utf8bigindex[4096][2] =
  	{73, 5},
  	{91, 5},
  	{0, 6},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{191, 10},
  	{152, 7},
  	{164, 7},
@@ -7934,7 +7934,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{75, 7},
  	{93, 7},
  	{64, 4},
- 	{0, 12},
+ 	{209, 12},
  	{158, 7},
  	{114, 10},
  	{71, 7},
@@ -7950,8 +7950,8 @@ const uint8_t utf8bigindex[4096][2] =
  	{73, 5},
  	{91, 5},
  	{1, 7},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{173, 7},
  	{148, 6},
  	{138, 10},
@@ -7966,7 +7966,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{74, 6},
  	{92, 6},
  	{2, 7},
- 	{0, 12},
+ 	{209, 12},
  	{157, 6},
  	{109, 7},
  	{70, 6},
@@ -7982,13 +7982,13 @@ const uint8_t utf8bigindex[4096][2] =
  	{16, 7},
  	{32, 7},
  	{0, 6},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{145, 3},
  	{206, 10},
  	{156, 8},
@@ -7998,7 +7998,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{149, 4},
  	{161, 4},
  	{64, 4},
- 	{0, 12},
+ 	{209, 12},
  	{159, 8},
  	{116, 10},
  	{72, 8},
@@ -8014,8 +8014,8 @@ const uint8_t utf8bigindex[4096][2] =
  	{73, 5},
  	{91, 5},
  	{64, 4},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{174, 8},
  	{148, 6},
  	{140, 10},
@@ -8030,7 +8030,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{23, 10},
  	{39, 10},
  	{3, 8},
- 	{0, 12},
+ 	{209, 12},
  	{157, 6},
  	{110, 8},
  	{70, 6},
@@ -8046,10 +8046,10 @@ const uint8_t utf8bigindex[4096][2] =
  	{17, 8},
  	{33, 8},
  	{0, 6},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{189, 8},
  	{152, 7},
  	{164, 7},
@@ -8062,7 +8062,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{75, 7},
  	{93, 7},
  	{64, 4},
- 	{0, 12},
+ 	{209, 12},
  	{158, 7},
  	{112, 8},
  	{71, 7},
@@ -8078,8 +8078,8 @@ const uint8_t utf8bigindex[4096][2] =
  	{18, 8},
  	{34, 8},
  	{1, 7},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{173, 7},
  	{148, 6},
  	{136, 8},
@@ -8094,7 +8094,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{20, 8},
  	{36, 8},
  	{2, 7},
- 	{0, 12},
+ 	{209, 12},
  	{157, 6},
  	{109, 7},
  	{70, 6},
@@ -8110,23 +8110,23 @@ const uint8_t utf8bigindex[4096][2] =
  	{16, 7},
  	{32, 7},
  	{0, 6},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{145, 3},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{146, 4},
- 	{0, 12},
+ 	{209, 12},
  	{149, 4},
  	{161, 4},
  	{64, 4},
- 	{0, 12},
+ 	{209, 12},
  	{160, 9},
  	{172, 9},
  	{147, 5},
@@ -8142,8 +8142,8 @@ const uint8_t utf8bigindex[4096][2] =
  	{73, 5},
  	{91, 5},
  	{64, 4},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{175, 9},
  	{148, 6},
  	{142, 10},
@@ -8158,7 +8158,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{74, 6},
  	{92, 6},
  	{64, 4},
- 	{0, 12},
+ 	{209, 12},
  	{157, 6},
  	{111, 9},
  	{70, 6},
@@ -8174,10 +8174,10 @@ const uint8_t utf8bigindex[4096][2] =
  	{73, 5},
  	{91, 5},
  	{0, 6},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{190, 9},
  	{152, 7},
  	{164, 7},
@@ -8190,7 +8190,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{75, 7},
  	{93, 7},
  	{64, 4},
- 	{0, 12},
+ 	{209, 12},
  	{158, 7},
  	{113, 9},
  	{71, 7},
@@ -8206,8 +8206,8 @@ const uint8_t utf8bigindex[4096][2] =
  	{19, 9},
  	{35, 9},
  	{1, 7},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{173, 7},
  	{148, 6},
  	{137, 9},
@@ -8222,7 +8222,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{21, 9},
  	{37, 9},
  	{2, 7},
- 	{0, 12},
+ 	{209, 12},
  	{157, 6},
  	{109, 7},
  	{70, 6},
@@ -8238,13 +8238,13 @@ const uint8_t utf8bigindex[4096][2] =
  	{16, 7},
  	{32, 7},
  	{0, 6},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{145, 3},
  	{205, 9},
  	{156, 8},
@@ -8254,7 +8254,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{149, 4},
  	{161, 4},
  	{64, 4},
- 	{0, 12},
+ 	{209, 12},
  	{159, 8},
  	{115, 9},
  	{72, 8},
@@ -8270,8 +8270,8 @@ const uint8_t utf8bigindex[4096][2] =
  	{73, 5},
  	{91, 5},
  	{64, 4},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{174, 8},
  	{148, 6},
  	{139, 9},
@@ -8286,7 +8286,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{22, 9},
  	{38, 9},
  	{3, 8},
- 	{0, 12},
+ 	{209, 12},
  	{157, 6},
  	{110, 8},
  	{70, 6},
@@ -8302,10 +8302,10 @@ const uint8_t utf8bigindex[4096][2] =
  	{17, 8},
  	{33, 8},
  	{0, 6},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{189, 8},
  	{152, 7},
  	{164, 7},
@@ -8318,7 +8318,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{75, 7},
  	{93, 7},
  	{64, 4},
- 	{0, 12},
+ 	{209, 12},
  	{158, 7},
  	{112, 8},
  	{71, 7},
@@ -8334,8 +8334,8 @@ const uint8_t utf8bigindex[4096][2] =
  	{18, 8},
  	{34, 8},
  	{1, 7},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{173, 7},
  	{148, 6},
  	{136, 8},
@@ -8350,7 +8350,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{20, 8},
  	{36, 8},
  	{2, 7},
- 	{0, 12},
+ 	{209, 12},
  	{157, 6},
  	{109, 7},
  	{70, 6},
@@ -8366,31 +8366,31 @@ const uint8_t utf8bigindex[4096][2] =
  	{16, 7},
  	{32, 7},
  	{0, 6},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{145, 3},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{146, 4},
- 	{0, 12},
+ 	{209, 12},
  	{149, 4},
  	{161, 4},
  	{64, 4},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{147, 5},
- 	{0, 12},
+ 	{209, 12},
  	{150, 5},
  	{162, 5},
  	{65, 5},
- 	{0, 12},
+ 	{209, 12},
  	{153, 5},
  	{165, 5},
  	{67, 5},
@@ -8398,15 +8398,15 @@ const uint8_t utf8bigindex[4096][2] =
  	{73, 5},
  	{91, 5},
  	{64, 4},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{148, 6},
- 	{0, 12},
+ 	{209, 12},
  	{151, 6},
  	{163, 6},
  	{66, 6},
- 	{0, 12},
+ 	{209, 12},
  	{154, 6},
  	{166, 6},
  	{68, 6},
@@ -8414,7 +8414,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{74, 6},
  	{92, 6},
  	{64, 4},
- 	{0, 12},
+ 	{209, 12},
  	{157, 6},
  	{169, 6},
  	{70, 6},
@@ -8430,10 +8430,10 @@ const uint8_t utf8bigindex[4096][2] =
  	{73, 5},
  	{91, 5},
  	{0, 6},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{192, 11},
  	{152, 7},
  	{164, 7},
@@ -8446,7 +8446,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{75, 7},
  	{93, 7},
  	{64, 4},
- 	{0, 12},
+ 	{209, 12},
  	{158, 7},
  	{170, 7},
  	{71, 7},
@@ -8462,8 +8462,8 @@ const uint8_t utf8bigindex[4096][2] =
  	{73, 5},
  	{91, 5},
  	{1, 7},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{173, 7},
  	{148, 6},
  	{185, 7},
@@ -8478,7 +8478,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{74, 6},
  	{92, 6},
  	{2, 7},
- 	{0, 12},
+ 	{209, 12},
  	{157, 6},
  	{109, 7},
  	{70, 6},
@@ -8494,13 +8494,13 @@ const uint8_t utf8bigindex[4096][2] =
  	{16, 7},
  	{32, 7},
  	{0, 6},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{145, 3},
  	{207, 11},
  	{156, 8},
@@ -8510,7 +8510,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{149, 4},
  	{161, 4},
  	{64, 4},
- 	{0, 12},
+ 	{209, 12},
  	{159, 8},
  	{117, 11},
  	{72, 8},
@@ -8526,8 +8526,8 @@ const uint8_t utf8bigindex[4096][2] =
  	{73, 5},
  	{91, 5},
  	{64, 4},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{174, 8},
  	{148, 6},
  	{141, 11},
@@ -8542,7 +8542,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{74, 6},
  	{92, 6},
  	{3, 8},
- 	{0, 12},
+ 	{209, 12},
  	{157, 6},
  	{110, 8},
  	{70, 6},
@@ -8558,10 +8558,10 @@ const uint8_t utf8bigindex[4096][2] =
  	{17, 8},
  	{33, 8},
  	{0, 6},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{189, 8},
  	{152, 7},
  	{164, 7},
@@ -8574,7 +8574,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{75, 7},
  	{93, 7},
  	{64, 4},
- 	{0, 12},
+ 	{209, 12},
  	{158, 7},
  	{112, 8},
  	{71, 7},
@@ -8590,8 +8590,8 @@ const uint8_t utf8bigindex[4096][2] =
  	{18, 8},
  	{34, 8},
  	{1, 7},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{173, 7},
  	{148, 6},
  	{136, 8},
@@ -8606,7 +8606,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{20, 8},
  	{36, 8},
  	{2, 7},
- 	{0, 12},
+ 	{209, 12},
  	{157, 6},
  	{109, 7},
  	{70, 6},
@@ -8622,23 +8622,23 @@ const uint8_t utf8bigindex[4096][2] =
  	{16, 7},
  	{32, 7},
  	{0, 6},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{145, 3},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{146, 4},
- 	{0, 12},
+ 	{209, 12},
  	{149, 4},
  	{161, 4},
  	{64, 4},
- 	{0, 12},
+ 	{209, 12},
  	{160, 9},
  	{172, 9},
  	{147, 5},
@@ -8654,8 +8654,8 @@ const uint8_t utf8bigindex[4096][2] =
  	{73, 5},
  	{91, 5},
  	{64, 4},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{175, 9},
  	{148, 6},
  	{143, 11},
@@ -8670,7 +8670,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{74, 6},
  	{92, 6},
  	{64, 4},
- 	{0, 12},
+ 	{209, 12},
  	{157, 6},
  	{111, 9},
  	{70, 6},
@@ -8686,10 +8686,10 @@ const uint8_t utf8bigindex[4096][2] =
  	{73, 5},
  	{91, 5},
  	{0, 6},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{190, 9},
  	{152, 7},
  	{164, 7},
@@ -8702,7 +8702,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{75, 7},
  	{93, 7},
  	{64, 4},
- 	{0, 12},
+ 	{209, 12},
  	{158, 7},
  	{113, 9},
  	{71, 7},
@@ -8718,8 +8718,8 @@ const uint8_t utf8bigindex[4096][2] =
  	{19, 9},
  	{35, 9},
  	{1, 7},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{173, 7},
  	{148, 6},
  	{137, 9},
@@ -8734,7 +8734,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{21, 9},
  	{37, 9},
  	{2, 7},
- 	{0, 12},
+ 	{209, 12},
  	{157, 6},
  	{109, 7},
  	{70, 6},
@@ -8750,13 +8750,13 @@ const uint8_t utf8bigindex[4096][2] =
  	{16, 7},
  	{32, 7},
  	{0, 6},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{145, 3},
  	{205, 9},
  	{156, 8},
@@ -8766,7 +8766,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{149, 4},
  	{161, 4},
  	{64, 4},
- 	{0, 12},
+ 	{209, 12},
  	{159, 8},
  	{115, 9},
  	{72, 8},
@@ -8782,8 +8782,8 @@ const uint8_t utf8bigindex[4096][2] =
  	{73, 5},
  	{91, 5},
  	{64, 4},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{174, 8},
  	{148, 6},
  	{139, 9},
@@ -8798,7 +8798,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{22, 9},
  	{38, 9},
  	{3, 8},
- 	{0, 12},
+ 	{209, 12},
  	{157, 6},
  	{110, 8},
  	{70, 6},
@@ -8814,10 +8814,10 @@ const uint8_t utf8bigindex[4096][2] =
  	{17, 8},
  	{33, 8},
  	{0, 6},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{189, 8},
  	{152, 7},
  	{164, 7},
@@ -8830,7 +8830,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{75, 7},
  	{93, 7},
  	{64, 4},
- 	{0, 12},
+ 	{209, 12},
  	{158, 7},
  	{112, 8},
  	{71, 7},
@@ -8846,8 +8846,8 @@ const uint8_t utf8bigindex[4096][2] =
  	{18, 8},
  	{34, 8},
  	{1, 7},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{173, 7},
  	{148, 6},
  	{136, 8},
@@ -8862,7 +8862,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{20, 8},
  	{36, 8},
  	{2, 7},
- 	{0, 12},
+ 	{209, 12},
  	{157, 6},
  	{109, 7},
  	{70, 6},
@@ -8878,31 +8878,31 @@ const uint8_t utf8bigindex[4096][2] =
  	{16, 7},
  	{32, 7},
  	{0, 6},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{145, 3},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{146, 4},
- 	{0, 12},
+ 	{209, 12},
  	{149, 4},
  	{161, 4},
  	{64, 4},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{147, 5},
- 	{0, 12},
+ 	{209, 12},
  	{150, 5},
  	{162, 5},
  	{65, 5},
- 	{0, 12},
+ 	{209, 12},
  	{153, 5},
  	{165, 5},
  	{67, 5},
@@ -8910,8 +8910,8 @@ const uint8_t utf8bigindex[4096][2] =
  	{73, 5},
  	{91, 5},
  	{64, 4},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{176, 10},
  	{148, 6},
  	{188, 10},
@@ -8926,7 +8926,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{74, 6},
  	{92, 6},
  	{64, 4},
- 	{0, 12},
+ 	{209, 12},
  	{157, 6},
  	{169, 6},
  	{70, 6},
@@ -8942,10 +8942,10 @@ const uint8_t utf8bigindex[4096][2] =
  	{73, 5},
  	{91, 5},
  	{0, 6},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{191, 10},
  	{152, 7},
  	{164, 7},
@@ -8958,7 +8958,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{75, 7},
  	{93, 7},
  	{64, 4},
- 	{0, 12},
+ 	{209, 12},
  	{158, 7},
  	{114, 10},
  	{71, 7},
@@ -8974,8 +8974,8 @@ const uint8_t utf8bigindex[4096][2] =
  	{73, 5},
  	{91, 5},
  	{1, 7},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{173, 7},
  	{148, 6},
  	{138, 10},
@@ -8990,7 +8990,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{74, 6},
  	{92, 6},
  	{2, 7},
- 	{0, 12},
+ 	{209, 12},
  	{157, 6},
  	{109, 7},
  	{70, 6},
@@ -9006,13 +9006,13 @@ const uint8_t utf8bigindex[4096][2] =
  	{16, 7},
  	{32, 7},
  	{0, 6},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{145, 3},
  	{206, 10},
  	{156, 8},
@@ -9022,7 +9022,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{149, 4},
  	{161, 4},
  	{64, 4},
- 	{0, 12},
+ 	{209, 12},
  	{159, 8},
  	{116, 10},
  	{72, 8},
@@ -9038,8 +9038,8 @@ const uint8_t utf8bigindex[4096][2] =
  	{73, 5},
  	{91, 5},
  	{64, 4},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{174, 8},
  	{148, 6},
  	{140, 10},
@@ -9054,7 +9054,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{23, 10},
  	{39, 10},
  	{3, 8},
- 	{0, 12},
+ 	{209, 12},
  	{157, 6},
  	{110, 8},
  	{70, 6},
@@ -9070,10 +9070,10 @@ const uint8_t utf8bigindex[4096][2] =
  	{17, 8},
  	{33, 8},
  	{0, 6},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{189, 8},
  	{152, 7},
  	{164, 7},
@@ -9086,7 +9086,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{75, 7},
  	{93, 7},
  	{64, 4},
- 	{0, 12},
+ 	{209, 12},
  	{158, 7},
  	{112, 8},
  	{71, 7},
@@ -9102,8 +9102,8 @@ const uint8_t utf8bigindex[4096][2] =
  	{18, 8},
  	{34, 8},
  	{1, 7},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{173, 7},
  	{148, 6},
  	{136, 8},
@@ -9118,7 +9118,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{20, 8},
  	{36, 8},
  	{2, 7},
- 	{0, 12},
+ 	{209, 12},
  	{157, 6},
  	{109, 7},
  	{70, 6},
@@ -9134,23 +9134,23 @@ const uint8_t utf8bigindex[4096][2] =
  	{16, 7},
  	{32, 7},
  	{0, 6},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{145, 3},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{146, 4},
- 	{0, 12},
+ 	{209, 12},
  	{149, 4},
  	{161, 4},
  	{64, 4},
- 	{0, 12},
+ 	{209, 12},
  	{160, 9},
  	{172, 9},
  	{147, 5},
@@ -9166,8 +9166,8 @@ const uint8_t utf8bigindex[4096][2] =
  	{73, 5},
  	{91, 5},
  	{64, 4},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{175, 9},
  	{148, 6},
  	{142, 10},
@@ -9182,7 +9182,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{74, 6},
  	{92, 6},
  	{64, 4},
- 	{0, 12},
+ 	{209, 12},
  	{157, 6},
  	{111, 9},
  	{70, 6},
@@ -9198,10 +9198,10 @@ const uint8_t utf8bigindex[4096][2] =
  	{73, 5},
  	{91, 5},
  	{0, 6},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{190, 9},
  	{152, 7},
  	{164, 7},
@@ -9214,7 +9214,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{75, 7},
  	{93, 7},
  	{64, 4},
- 	{0, 12},
+ 	{209, 12},
  	{158, 7},
  	{113, 9},
  	{71, 7},
@@ -9230,8 +9230,8 @@ const uint8_t utf8bigindex[4096][2] =
  	{19, 9},
  	{35, 9},
  	{1, 7},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{173, 7},
  	{148, 6},
  	{137, 9},
@@ -9246,7 +9246,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{21, 9},
  	{37, 9},
  	{2, 7},
- 	{0, 12},
+ 	{209, 12},
  	{157, 6},
  	{109, 7},
  	{70, 6},
@@ -9262,13 +9262,13 @@ const uint8_t utf8bigindex[4096][2] =
  	{16, 7},
  	{32, 7},
  	{0, 6},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{145, 3},
  	{205, 9},
  	{156, 8},
@@ -9278,7 +9278,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{149, 4},
  	{161, 4},
  	{64, 4},
- 	{0, 12},
+ 	{209, 12},
  	{159, 8},
  	{115, 9},
  	{72, 8},
@@ -9294,8 +9294,8 @@ const uint8_t utf8bigindex[4096][2] =
  	{73, 5},
  	{91, 5},
  	{64, 4},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{174, 8},
  	{148, 6},
  	{139, 9},
@@ -9310,7 +9310,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{22, 9},
  	{38, 9},
  	{3, 8},
- 	{0, 12},
+ 	{209, 12},
  	{157, 6},
  	{110, 8},
  	{70, 6},
@@ -9326,10 +9326,10 @@ const uint8_t utf8bigindex[4096][2] =
  	{17, 8},
  	{33, 8},
  	{0, 6},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{189, 8},
  	{152, 7},
  	{164, 7},
@@ -9342,7 +9342,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{75, 7},
  	{93, 7},
  	{64, 4},
- 	{0, 12},
+ 	{209, 12},
  	{158, 7},
  	{112, 8},
  	{71, 7},
@@ -9358,8 +9358,8 @@ const uint8_t utf8bigindex[4096][2] =
  	{18, 8},
  	{34, 8},
  	{1, 7},
- 	{0, 12},
- 	{0, 12},
+ 	{209, 12},
+ 	{209, 12},
  	{173, 7},
  	{148, 6},
  	{136, 8},
@@ -9374,7 +9374,7 @@ const uint8_t utf8bigindex[4096][2] =
  	{20, 8},
  	{36, 8},
  	{2, 7},
- 	{0, 12},
+ 	{209, 12},
  	{157, 6},
  	{109, 7},
  	{70, 6},
@@ -11299,8 +11299,10 @@ template <endianness endian>
 inline result rewind_and_convert_with_errors(size_t prior_bytes, const char* buf, size_t len, char16_t* utf16_output) {
   size_t extra_len{0};
   // We potentially need to go back in time and find a leading byte.
-  size_t how_far_back = 3; // 3 bytes in the past + current position
-  if(how_far_back >= prior_bytes) { how_far_back = prior_bytes; }
+  // In theory '3' would be sufficient, but sometimes the error can go back quite far.
+  size_t how_far_back = prior_bytes;
+  // size_t how_far_back = 3; // 3 bytes in the past + current position
+  // if(how_far_back >= prior_bytes) { how_far_back = prior_bytes; }
   bool found_leading_bytes{false};
   // important: it is i <= how_far_back and not 'i < how_far_back'.
   for(size_t i = 0; i <= how_far_back; i++) {
@@ -12206,6 +12208,14 @@ size_t convert_masked_utf8_to_utf16(const char *input,
     utf16_output += 4;
   } else if (idx < 209) {
     // TWO (2) input code-words
+    //////////////
+    // There might be garbage inputs where a leading byte mascarades as a four-byte
+    // leading byte (by being followed by 3 continuation byte), but is not greater than
+    // 0xf0. This could trigger a buffer overflow if we only counted leading
+    // bytes of the form 0xf0 as generating surrogate pairs, without further UTF-8 validation.
+    // Thus we must be careful to ensure that only leading bytes at least as large as 0xf0 generate surrogate pairs.
+    // We do as at the cost of an extra mask.
+    /////////////
     uint8x16_t sh = vld1q_u8(reinterpret_cast<const uint8_t*>(simdutf::tables::utf8_to_utf16::shufutf8[idx]));
     uint8x16_t perm = vqtbl1q_u8(in, sh);
     uint8x16_t ascii = vandq_u8(perm, vreinterpretq_u8_u32(vmovq_n_u32(0x7f)));
@@ -12217,8 +12227,14 @@ size_t convert_masked_utf8_to_utf16(const char *input,
         vreinterpretq_u8_u32(vshrq_n_u32(vreinterpretq_u32_u8(vandq_u8(perm, vreinterpretq_u8_u32(vmovq_n_u32(0x400000)))), 1));
     middlehighbyte = veorq_u8(correct, middlehighbyte);
     uint8x16_t middlehighbyte_shifted = vreinterpretq_u8_u32(vshrq_n_u32(vreinterpretq_u32_u8(middlehighbyte), 4));
-    uint8x16_t highbyte = vandq_u8(perm, vreinterpretq_u8_u32(vmovq_n_u32(0x07000000)));
-    uint8x16_t highbyte_shifted =vreinterpretq_u8_u32(vshrq_n_u32(vreinterpretq_u32_u8(highbyte), 6));
+    // We deliberately carry the leading four bits if they are present, we remove
+    // them later when computing hightenbits.
+    uint8x16_t highbyte = vandq_u8(perm, vreinterpretq_u8_u32(vmovq_n_u32(0xff000000)));
+    uint8x16_t highbyte_shifted = vreinterpretq_u8_u32(vshrq_n_u32(vreinterpretq_u32_u8(highbyte), 6));
+    // When we need to generate a surrogate pair (leading byte > 0xF0), then
+    // the corresponding 32-bit value in 'composed'  will be greater than
+    // > (0xff00000>>6) or > 0x3c00000. This can be used later to identify the
+    // location of the surrogate pairs.
     uint8x16_t composed =
         vorrq_u8(vorrq_u8(ascii, middlebyte_shifted),
                      vorrq_u8(highbyte_shifted, middlehighbyte_shifted));
@@ -12226,7 +12242,8 @@ size_t convert_masked_utf8_to_utf16(const char *input,
         vsubq_u32(vreinterpretq_u32_u8(composed), vmovq_n_u32(0x10000));
     uint32x4_t lowtenbits =
         vandq_u32(composedminus, vmovq_n_u32(0x3ff));
-    uint32x4_t hightenbits = vshrq_n_u32(composedminus, 10);
+    // Notice the 0x3ff mask:
+    uint32x4_t hightenbits = vandq_u32(vshrq_n_u32(composedminus, 10), vmovq_n_u32(0x3ff));
     uint32x4_t lowtenbitsadd =
         vaddq_u32(lowtenbits, vmovq_n_u32(0xDC00));
     uint32x4_t hightenbitsadd =
@@ -12244,13 +12261,13 @@ size_t convert_masked_utf8_to_utf16(const char *input,
     uint32_t surrogate_buffer[4];
     vst1q_u32(surrogate_buffer, surrogates);
     for (size_t i = 0; i < 3; i++) {
-      if (basic_buffer[i] < 65536) {
-        utf16_output[0] = !match_system(big_endian) ? uint16_t(basic_buffer_swap[i]) : uint16_t(basic_buffer[i]);
-        utf16_output++;
-      } else {
+      if(basic_buffer[i] > 0x3c00000) {
         utf16_output[0] = uint16_t(surrogate_buffer[i] & 0xffff);
         utf16_output[1] = uint16_t(surrogate_buffer[i] >> 16);
         utf16_output += 2;
+      } else {
+        utf16_output[0] = !match_system(big_endian) ? uint16_t(basic_buffer_swap[i]) : uint16_t(basic_buffer[i]);
+        utf16_output++;
       }
     }
   } else {
@@ -14231,7 +14248,7 @@ simdutf_warn_unused size_t convert_valid(const char* input, size_t size,
         utf8_end_of_code_point_mask >>= consumed;
       }
       // At this point there may remain between 0 and 12 bytes in the
-      // 64-byte block.These bytes will be processed again. So we have an
+      // 64-byte block. These bytes will be processed again. So we have an
       // 80% efficiency (in the worst case). In practice we expect an
       // 85% to 90% efficiency.
     }
@@ -14377,7 +14394,19 @@ using namespace simd;
     simdutf_really_inline size_t convert(const char* in, size_t size, char16_t* utf16_output) {
       size_t pos = 0;
       char16_t* start{utf16_output};
-      const size_t safety_margin = 16; // to avoid overruns!
+      // In the worst case, we have the haswell kernel which can cause an overflow of
+      // 8 bytes when calling convert_masked_utf8_to_utf16. If you skip the last 16 bytes,
+      // and if the data is valid, then it is entirely safe because 16 UTF-8 bytes generate
+      // much more than 8 bytes. However, you cannot generally assume that you have valid
+      // UTF-8 input, so we are going to go back from the end counting 8 leading bytes,
+      // to give us a good margin.
+      size_t leading_byte = 0;
+      size_t margin = size;
+      for(; margin > 0 && leading_byte < 8; margin--) {
+        leading_byte += (int8_t(in[margin-1]) > -65);
+      }
+      // If the input is long enough, then we have that margin-1 is the eight last leading byte.
+      const size_t safety_margin = size - margin + 1; // to avoid overruns!
       while(pos + 64 + safety_margin <= size) {
         simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
         if(input.is_ascii()) {
@@ -14422,7 +14451,7 @@ using namespace simd;
             utf8_end_of_code_point_mask >>= consumed;
           }
           // At this point there may remain between 0 and 12 bytes in the
-          // 64-byte block.These bytes will be processed again. So we have an
+          // 64-byte block. These bytes will be processed again. So we have an
           // 80% efficiency (in the worst case). In practice we expect an
           // 85% to 90% efficiency.
         }
@@ -14440,7 +14469,19 @@ using namespace simd;
     simdutf_really_inline result convert_with_errors(const char* in, size_t size, char16_t* utf16_output) {
       size_t pos = 0;
       char16_t* start{utf16_output};
-      const size_t safety_margin = 16; // to avoid overruns!
+      // In the worst case, we have the haswell kernel which can cause an overflow of
+      // 8 bytes when calling convert_masked_utf8_to_utf16. If you skip the last 16 bytes,
+      // and if the data is valid, then it is entirely safe because 16 UTF-8 bytes generate
+      // much more than 8 bytes. However, you cannot generally assume that you have valid
+      // UTF-8 input, so we are going to go back from the end counting 8 leading bytes,
+      // to give us a good margin.
+      size_t leading_byte = 0;
+      size_t margin = size;
+      for(; margin > 0 && leading_byte < 8; margin--) {
+        leading_byte += (int8_t(in[margin-1]) > -65);
+      }
+      // If the input is long enough, then we have that margin-1 is the eight last leading byte.
+      const size_t safety_margin = size - margin + 1; // to avoid overruns!
       while(pos + 64 + safety_margin <= size) {
         simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
         if(input.is_ascii()) {
@@ -14492,7 +14533,7 @@ using namespace simd;
             utf8_end_of_code_point_mask >>= consumed;
           }
           // At this point there may remain between 0 and 12 bytes in the
-          // 64-byte block.These bytes will be processed again. So we have an
+          // 64-byte block. These bytes will be processed again. So we have an
           // 80% efficiency (in the worst case). In practice we expect an
           // 85% to 90% efficiency.
         }
@@ -14707,7 +14748,19 @@ using namespace simd;
     simdutf_really_inline size_t convert(const char* in, size_t size, char32_t* utf32_output) {
       size_t pos = 0;
       char32_t* start{utf32_output};
-      const size_t safety_margin = 16; // to avoid overruns!
+      // In the worst case, we have the haswell kernel which can cause an overflow of
+      // 8 bytes when calling convert_masked_utf8_to_utf32. If you skip the last 16 bytes,
+      // and if the data is valid, then it is entirely safe because 16 UTF-8 bytes generate
+      // much more than 8 bytes. However, you cannot generally assume that you have valid
+      // UTF-8 input, so we are going to go back from the end counting 4 leading bytes,
+      // to give us a good margin.
+      size_t leading_byte = 0;
+      size_t margin = size;
+      for(; margin > 0 && leading_byte < 4; margin--) {
+        leading_byte += (int8_t(in[margin-1]) > -65);
+      }
+      // If the input is long enough, then we have that margin-1 is the fourth last leading byte.
+      const size_t safety_margin = size - margin + 1; // to avoid overruns!
       while(pos + 64 + safety_margin <= size) {
         simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
         if(input.is_ascii()) {
@@ -14752,7 +14805,7 @@ using namespace simd;
             utf8_end_of_code_point_mask >>= consumed;
           }
           // At this point there may remain between 0 and 12 bytes in the
-          // 64-byte block.These bytes will be processed again. So we have an
+          // 64-byte block. These bytes will be processed again. So we have an
           // 80% efficiency (in the worst case). In practice we expect an
           // 85% to 90% efficiency.
         }
@@ -14769,7 +14822,19 @@ using namespace simd;
     simdutf_really_inline result convert_with_errors(const char* in, size_t size, char32_t* utf32_output) {
       size_t pos = 0;
       char32_t* start{utf32_output};
-      const size_t safety_margin = 16; // to avoid overruns!
+      // In the worst case, we have the haswell kernel which can cause an overflow of
+      // 8 bytes when calling convert_masked_utf8_to_utf32. If you skip the last 16 bytes,
+      // and if the data is valid, then it is entirely safe because 16 UTF-8 bytes generate
+      // much more than 8 bytes. However, you cannot generally assume that you have valid
+      // UTF-8 input, so we are going to go back from the end counting 4 leading bytes,
+      // to give us a good margin.
+      size_t leading_byte = 0;
+      size_t margin = size;
+      for(; margin > 0 && leading_byte < 4; margin--) {
+        leading_byte += (int8_t(in[margin-1]) > -65);
+      }
+      // If the input is long enough, then we have that margin-1 is the fourth last leading byte.
+      const size_t safety_margin = size - margin + 1; // to avoid overruns!
       while(pos + 64 + safety_margin <= size) {
         simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
         if(input.is_ascii()) {
@@ -14819,7 +14884,7 @@ using namespace simd;
             utf8_end_of_code_point_mask >>= consumed;
           }
           // At this point there may remain between 0 and 12 bytes in the
-          // 64-byte block.These bytes will be processed again. So we have an
+          // 64-byte block. These bytes will be processed again. So we have an
           // 80% efficiency (in the worst case). In practice we expect an
           // 85% to 90% efficiency.
         }
@@ -19797,7 +19862,7 @@ size_t convert_masked_utf8_to_utf16(const char *input,
     __m128i composed = _mm_or_si128(ascii, _mm_srli_epi16(highbyte, 2));
     if (big_endian) composed = _mm_shuffle_epi8(composed, swap);
     _mm_storeu_si128((__m128i *)utf16_output, composed);
-    utf16_output += 6; // We wrote 12 bytes, 6 code points.
+    utf16_output += 6; // We wrote 12 bytes, 6 code points. There is a potential overflow of 4 bytes.
   } else if (idx < 145) {
     // FOUR (4) input code-words
     const __m128i sh =
@@ -19816,9 +19881,17 @@ size_t convert_masked_utf8_to_utf16(const char *input,
     __m128i composed_repacked = _mm_packus_epi32(composed, composed);
     if (big_endian) composed_repacked = _mm_shuffle_epi8(composed_repacked, swap);
     _mm_storeu_si128((__m128i *)utf16_output, composed_repacked);
-    utf16_output += 4;
+    utf16_output += 4; // Here we overflow by 8 bytes.
   } else if (idx < 209) {
     // TWO (2) input code-words
+    //////////////
+    // There might be garbage inputs where a leading byte mascarades as a four-byte
+    // leading byte (by being followed by 3 continuation byte), but is not greater than
+    // 0xf0. This could trigger a buffer overflow if we only counted leading
+    // bytes of the form 0xf0 as generating surrogate pairs, without further UTF-8 validation.
+    // Thus we must be careful to ensure that only leading bytes at least as large as 0xf0 generate surrogate pairs.
+    // We do as at the cost of an extra mask.
+    /////////////
     const __m128i sh =
         _mm_loadu_si128((const __m128i *)simdutf::tables::utf8_to_utf16::shufutf8[idx]);
     const __m128i perm = _mm_shuffle_epi8(in, sh);
@@ -19831,8 +19904,14 @@ size_t convert_masked_utf8_to_utf16(const char *input,
         _mm_srli_epi32(_mm_and_si128(perm, _mm_set1_epi32(0x400000)), 1);
     middlehighbyte = _mm_xor_si128(correct, middlehighbyte);
     const __m128i middlehighbyte_shifted = _mm_srli_epi32(middlehighbyte, 4);
-    const __m128i highbyte = _mm_and_si128(perm, _mm_set1_epi32(0x07000000));
+    // We deliberately carry the leading four bits in highbyte if they are present,
+    // we remove them later when computing hightenbits.
+    const __m128i highbyte = _mm_and_si128(perm, _mm_set1_epi32(0xff000000));
     const __m128i highbyte_shifted = _mm_srli_epi32(highbyte, 6);
+    // When we need to generate a surrogate pair (leading byte > 0xF0), then
+    // the corresponding 32-bit value in 'composed'  will be greater than
+    // > (0xff00000>>6) or > 0x3c00000. This can be used later to identify the
+    // location of the surrogate pairs.
     const __m128i composed =
         _mm_or_si128(_mm_or_si128(ascii, middlebyte_shifted),
                      _mm_or_si128(highbyte_shifted, middlehighbyte_shifted));
@@ -19840,7 +19919,8 @@ size_t convert_masked_utf8_to_utf16(const char *input,
         _mm_sub_epi32(composed, _mm_set1_epi32(0x10000));
     const __m128i lowtenbits =
         _mm_and_si128(composedminus, _mm_set1_epi32(0x3ff));
-    const __m128i hightenbits = _mm_srli_epi32(composedminus, 10);
+    // Notice the 0x3ff mask:
+    const __m128i hightenbits = _mm_and_si128(_mm_srli_epi32(composedminus, 10), _mm_set1_epi32(0x3ff));
     const __m128i lowtenbitsadd =
         _mm_add_epi32(lowtenbits, _mm_set1_epi32(0xDC00));
     const __m128i hightenbitsadd =
@@ -19858,13 +19938,13 @@ size_t convert_masked_utf8_to_utf16(const char *input,
     uint32_t surrogate_buffer[4];
     _mm_storeu_si128((__m128i *)surrogate_buffer, surrogates);
     for (size_t i = 0; i < 3; i++) {
-      if (basic_buffer[i] < 65536) {
-        utf16_output[0] = big_endian ? uint16_t(basic_buffer_swap[i]) : uint16_t(basic_buffer[i]);
-        utf16_output++;
-      } else {
+      if(basic_buffer[i] > 0x3c00000) {
         utf16_output[0] = uint16_t(surrogate_buffer[i] & 0xffff);
         utf16_output[1] = uint16_t(surrogate_buffer[i] >> 16);
         utf16_output += 2;
+      } else  {
+        utf16_output[0] = big_endian ? uint16_t(basic_buffer_swap[i]) : uint16_t(basic_buffer[i]);
+        utf16_output++;
       }
     }
   } else {
@@ -19955,7 +20035,8 @@ size_t convert_masked_utf8_to_utf32(const char *input,
     const __m128i highbyte = _mm_and_si128(perm, _mm_set1_epi16(0x1f00));
     const __m128i composed = _mm_or_si128(ascii, _mm_srli_epi16(highbyte, 2));
     _mm256_storeu_si256((__m256i *)utf32_output, _mm256_cvtepu16_epi32(composed));
-    utf32_output += 6; // We wrote 12 bytes, 6 code points.
+    utf32_output += 6; // We wrote 24 bytes, 6 code points. There is a potential
+    // overflow of 32 - 24 = 8 bytes.
   } else if (idx < 145) {
     // FOUR (4) input code-words
     const __m128i sh =
@@ -19993,7 +20074,7 @@ size_t convert_masked_utf8_to_utf32(const char *input,
         _mm_or_si128(_mm_or_si128(ascii, middlebyte_shifted),
                      _mm_or_si128(highbyte_shifted, middlehighbyte_shifted));
     _mm_storeu_si128((__m128i *)utf32_output, composed);
-    utf32_output += 3;
+    utf32_output += 3; // We wrote 3 * 4 bytes, there is a potential overflow of 4 bytes.
   } else {
     // here we know that there is an error but we do not handle errors
   }
@@ -21810,7 +21891,7 @@ simdutf_warn_unused size_t convert_valid(const char* input, size_t size,
         utf8_end_of_code_point_mask >>= consumed;
       }
       // At this point there may remain between 0 and 12 bytes in the
-      // 64-byte block.These bytes will be processed again. So we have an
+      // 64-byte block. These bytes will be processed again. So we have an
       // 80% efficiency (in the worst case). In practice we expect an
       // 85% to 90% efficiency.
     }
@@ -21956,7 +22037,19 @@ using namespace simd;
     simdutf_really_inline size_t convert(const char* in, size_t size, char16_t* utf16_output) {
       size_t pos = 0;
       char16_t* start{utf16_output};
-      const size_t safety_margin = 16; // to avoid overruns!
+      // In the worst case, we have the haswell kernel which can cause an overflow of
+      // 8 bytes when calling convert_masked_utf8_to_utf16. If you skip the last 16 bytes,
+      // and if the data is valid, then it is entirely safe because 16 UTF-8 bytes generate
+      // much more than 8 bytes. However, you cannot generally assume that you have valid
+      // UTF-8 input, so we are going to go back from the end counting 8 leading bytes,
+      // to give us a good margin.
+      size_t leading_byte = 0;
+      size_t margin = size;
+      for(; margin > 0 && leading_byte < 8; margin--) {
+        leading_byte += (int8_t(in[margin-1]) > -65);
+      }
+      // If the input is long enough, then we have that margin-1 is the eight last leading byte.
+      const size_t safety_margin = size - margin + 1; // to avoid overruns!
       while(pos + 64 + safety_margin <= size) {
         simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
         if(input.is_ascii()) {
@@ -22001,7 +22094,7 @@ using namespace simd;
             utf8_end_of_code_point_mask >>= consumed;
           }
           // At this point there may remain between 0 and 12 bytes in the
-          // 64-byte block.These bytes will be processed again. So we have an
+          // 64-byte block. These bytes will be processed again. So we have an
           // 80% efficiency (in the worst case). In practice we expect an
           // 85% to 90% efficiency.
         }
@@ -22019,7 +22112,19 @@ using namespace simd;
     simdutf_really_inline result convert_with_errors(const char* in, size_t size, char16_t* utf16_output) {
       size_t pos = 0;
       char16_t* start{utf16_output};
-      const size_t safety_margin = 16; // to avoid overruns!
+      // In the worst case, we have the haswell kernel which can cause an overflow of
+      // 8 bytes when calling convert_masked_utf8_to_utf16. If you skip the last 16 bytes,
+      // and if the data is valid, then it is entirely safe because 16 UTF-8 bytes generate
+      // much more than 8 bytes. However, you cannot generally assume that you have valid
+      // UTF-8 input, so we are going to go back from the end counting 8 leading bytes,
+      // to give us a good margin.
+      size_t leading_byte = 0;
+      size_t margin = size;
+      for(; margin > 0 && leading_byte < 8; margin--) {
+        leading_byte += (int8_t(in[margin-1]) > -65);
+      }
+      // If the input is long enough, then we have that margin-1 is the eight last leading byte.
+      const size_t safety_margin = size - margin + 1; // to avoid overruns!
       while(pos + 64 + safety_margin <= size) {
         simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
         if(input.is_ascii()) {
@@ -22071,7 +22176,7 @@ using namespace simd;
             utf8_end_of_code_point_mask >>= consumed;
           }
           // At this point there may remain between 0 and 12 bytes in the
-          // 64-byte block.These bytes will be processed again. So we have an
+          // 64-byte block. These bytes will be processed again. So we have an
           // 80% efficiency (in the worst case). In practice we expect an
           // 85% to 90% efficiency.
         }
@@ -22286,7 +22391,19 @@ using namespace simd;
     simdutf_really_inline size_t convert(const char* in, size_t size, char32_t* utf32_output) {
       size_t pos = 0;
       char32_t* start{utf32_output};
-      const size_t safety_margin = 16; // to avoid overruns!
+      // In the worst case, we have the haswell kernel which can cause an overflow of
+      // 8 bytes when calling convert_masked_utf8_to_utf32. If you skip the last 16 bytes,
+      // and if the data is valid, then it is entirely safe because 16 UTF-8 bytes generate
+      // much more than 8 bytes. However, you cannot generally assume that you have valid
+      // UTF-8 input, so we are going to go back from the end counting 4 leading bytes,
+      // to give us a good margin.
+      size_t leading_byte = 0;
+      size_t margin = size;
+      for(; margin > 0 && leading_byte < 4; margin--) {
+        leading_byte += (int8_t(in[margin-1]) > -65);
+      }
+      // If the input is long enough, then we have that margin-1 is the fourth last leading byte.
+      const size_t safety_margin = size - margin + 1; // to avoid overruns!
       while(pos + 64 + safety_margin <= size) {
         simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
         if(input.is_ascii()) {
@@ -22331,7 +22448,7 @@ using namespace simd;
             utf8_end_of_code_point_mask >>= consumed;
           }
           // At this point there may remain between 0 and 12 bytes in the
-          // 64-byte block.These bytes will be processed again. So we have an
+          // 64-byte block. These bytes will be processed again. So we have an
           // 80% efficiency (in the worst case). In practice we expect an
           // 85% to 90% efficiency.
         }
@@ -22348,7 +22465,19 @@ using namespace simd;
     simdutf_really_inline result convert_with_errors(const char* in, size_t size, char32_t* utf32_output) {
       size_t pos = 0;
       char32_t* start{utf32_output};
-      const size_t safety_margin = 16; // to avoid overruns!
+      // In the worst case, we have the haswell kernel which can cause an overflow of
+      // 8 bytes when calling convert_masked_utf8_to_utf32. If you skip the last 16 bytes,
+      // and if the data is valid, then it is entirely safe because 16 UTF-8 bytes generate
+      // much more than 8 bytes. However, you cannot generally assume that you have valid
+      // UTF-8 input, so we are going to go back from the end counting 4 leading bytes,
+      // to give us a good margin.
+      size_t leading_byte = 0;
+      size_t margin = size;
+      for(; margin > 0 && leading_byte < 4; margin--) {
+        leading_byte += (int8_t(in[margin-1]) > -65);
+      }
+      // If the input is long enough, then we have that margin-1 is the fourth last leading byte.
+      const size_t safety_margin = size - margin + 1; // to avoid overruns!
       while(pos + 64 + safety_margin <= size) {
         simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
         if(input.is_ascii()) {
@@ -22398,7 +22527,7 @@ using namespace simd;
             utf8_end_of_code_point_mask >>= consumed;
           }
           // At this point there may remain between 0 and 12 bytes in the
-          // 64-byte block.These bytes will be processed again. So we have an
+          // 64-byte block. These bytes will be processed again. So we have an
           // 80% efficiency (in the worst case). In practice we expect an
           // 85% to 90% efficiency.
         }
@@ -23539,7 +23668,7 @@ simdutf_warn_unused size_t convert_valid(const char* input, size_t size,
         utf8_end_of_code_point_mask >>= consumed;
       }
       // At this point there may remain between 0 and 12 bytes in the
-      // 64-byte block.These bytes will be processed again. So we have an
+      // 64-byte block. These bytes will be processed again. So we have an
       // 80% efficiency (in the worst case). In practice we expect an
       // 85% to 90% efficiency.
     }
@@ -23685,7 +23814,19 @@ using namespace simd;
     simdutf_really_inline size_t convert(const char* in, size_t size, char16_t* utf16_output) {
       size_t pos = 0;
       char16_t* start{utf16_output};
-      const size_t safety_margin = 16; // to avoid overruns!
+      // In the worst case, we have the haswell kernel which can cause an overflow of
+      // 8 bytes when calling convert_masked_utf8_to_utf16. If you skip the last 16 bytes,
+      // and if the data is valid, then it is entirely safe because 16 UTF-8 bytes generate
+      // much more than 8 bytes. However, you cannot generally assume that you have valid
+      // UTF-8 input, so we are going to go back from the end counting 8 leading bytes,
+      // to give us a good margin.
+      size_t leading_byte = 0;
+      size_t margin = size;
+      for(; margin > 0 && leading_byte < 8; margin--) {
+        leading_byte += (int8_t(in[margin-1]) > -65);
+      }
+      // If the input is long enough, then we have that margin-1 is the eight last leading byte.
+      const size_t safety_margin = size - margin + 1; // to avoid overruns!
       while(pos + 64 + safety_margin <= size) {
         simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
         if(input.is_ascii()) {
@@ -23730,7 +23871,7 @@ using namespace simd;
             utf8_end_of_code_point_mask >>= consumed;
           }
           // At this point there may remain between 0 and 12 bytes in the
-          // 64-byte block.These bytes will be processed again. So we have an
+          // 64-byte block. These bytes will be processed again. So we have an
           // 80% efficiency (in the worst case). In practice we expect an
           // 85% to 90% efficiency.
         }
@@ -23748,7 +23889,19 @@ using namespace simd;
     simdutf_really_inline result convert_with_errors(const char* in, size_t size, char16_t* utf16_output) {
       size_t pos = 0;
       char16_t* start{utf16_output};
-      const size_t safety_margin = 16; // to avoid overruns!
+      // In the worst case, we have the haswell kernel which can cause an overflow of
+      // 8 bytes when calling convert_masked_utf8_to_utf16. If you skip the last 16 bytes,
+      // and if the data is valid, then it is entirely safe because 16 UTF-8 bytes generate
+      // much more than 8 bytes. However, you cannot generally assume that you have valid
+      // UTF-8 input, so we are going to go back from the end counting 8 leading bytes,
+      // to give us a good margin.
+      size_t leading_byte = 0;
+      size_t margin = size;
+      for(; margin > 0 && leading_byte < 8; margin--) {
+        leading_byte += (int8_t(in[margin-1]) > -65);
+      }
+      // If the input is long enough, then we have that margin-1 is the eight last leading byte.
+      const size_t safety_margin = size - margin + 1; // to avoid overruns!
       while(pos + 64 + safety_margin <= size) {
         simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
         if(input.is_ascii()) {
@@ -23800,7 +23953,7 @@ using namespace simd;
             utf8_end_of_code_point_mask >>= consumed;
           }
           // At this point there may remain between 0 and 12 bytes in the
-          // 64-byte block.These bytes will be processed again. So we have an
+          // 64-byte block. These bytes will be processed again. So we have an
           // 80% efficiency (in the worst case). In practice we expect an
           // 85% to 90% efficiency.
         }
@@ -24015,7 +24168,19 @@ using namespace simd;
     simdutf_really_inline size_t convert(const char* in, size_t size, char32_t* utf32_output) {
       size_t pos = 0;
       char32_t* start{utf32_output};
-      const size_t safety_margin = 16; // to avoid overruns!
+      // In the worst case, we have the haswell kernel which can cause an overflow of
+      // 8 bytes when calling convert_masked_utf8_to_utf32. If you skip the last 16 bytes,
+      // and if the data is valid, then it is entirely safe because 16 UTF-8 bytes generate
+      // much more than 8 bytes. However, you cannot generally assume that you have valid
+      // UTF-8 input, so we are going to go back from the end counting 4 leading bytes,
+      // to give us a good margin.
+      size_t leading_byte = 0;
+      size_t margin = size;
+      for(; margin > 0 && leading_byte < 4; margin--) {
+        leading_byte += (int8_t(in[margin-1]) > -65);
+      }
+      // If the input is long enough, then we have that margin-1 is the fourth last leading byte.
+      const size_t safety_margin = size - margin + 1; // to avoid overruns!
       while(pos + 64 + safety_margin <= size) {
         simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
         if(input.is_ascii()) {
@@ -24060,7 +24225,7 @@ using namespace simd;
             utf8_end_of_code_point_mask >>= consumed;
           }
           // At this point there may remain between 0 and 12 bytes in the
-          // 64-byte block.These bytes will be processed again. So we have an
+          // 64-byte block. These bytes will be processed again. So we have an
           // 80% efficiency (in the worst case). In practice we expect an
           // 85% to 90% efficiency.
         }
@@ -24077,7 +24242,19 @@ using namespace simd;
     simdutf_really_inline result convert_with_errors(const char* in, size_t size, char32_t* utf32_output) {
       size_t pos = 0;
       char32_t* start{utf32_output};
-      const size_t safety_margin = 16; // to avoid overruns!
+      // In the worst case, we have the haswell kernel which can cause an overflow of
+      // 8 bytes when calling convert_masked_utf8_to_utf32. If you skip the last 16 bytes,
+      // and if the data is valid, then it is entirely safe because 16 UTF-8 bytes generate
+      // much more than 8 bytes. However, you cannot generally assume that you have valid
+      // UTF-8 input, so we are going to go back from the end counting 4 leading bytes,
+      // to give us a good margin.
+      size_t leading_byte = 0;
+      size_t margin = size;
+      for(; margin > 0 && leading_byte < 4; margin--) {
+        leading_byte += (int8_t(in[margin-1]) > -65);
+      }
+      // If the input is long enough, then we have that margin-1 is the fourth last leading byte.
+      const size_t safety_margin = size - margin + 1; // to avoid overruns!
       while(pos + 64 + safety_margin <= size) {
         simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
         if(input.is_ascii()) {
@@ -24127,7 +24304,7 @@ using namespace simd;
             utf8_end_of_code_point_mask >>= consumed;
           }
           // At this point there may remain between 0 and 12 bytes in the
-          // 64-byte block.These bytes will be processed again. So we have an
+          // 64-byte block. These bytes will be processed again. So we have an
           // 80% efficiency (in the worst case). In practice we expect an
           // 85% to 90% efficiency.
         }
@@ -25144,6 +25321,14 @@ size_t convert_masked_utf8_to_utf16(const char *input,
     utf16_output += 4;
   } else if (idx < 209) {
     // TWO (2) input code-words
+    //////////////
+    // There might be garbage inputs where a leading byte mascarades as a four-byte
+    // leading byte (by being followed by 3 continuation byte), but is not greater than
+    // 0xf0. This could trigger a buffer overflow if we only counted leading
+    // bytes of the form 0xf0 as generating surrogate pairs, without further UTF-8 validation.
+    // Thus we must be careful to ensure that only leading bytes at least as large as 0xf0 generate surrogate pairs.
+    // We do as at the cost of an extra mask.
+    /////////////
     const __m128i sh =
         _mm_loadu_si128((const __m128i *)tables::utf8_to_utf16::shufutf8[idx]);
     const __m128i perm = _mm_shuffle_epi8(in, sh);
@@ -25156,8 +25341,14 @@ size_t convert_masked_utf8_to_utf16(const char *input,
         _mm_srli_epi32(_mm_and_si128(perm, _mm_set1_epi32(0x400000)), 1);
     middlehighbyte = _mm_xor_si128(correct, middlehighbyte);
     const __m128i middlehighbyte_shifted = _mm_srli_epi32(middlehighbyte, 4);
-    const __m128i highbyte = _mm_and_si128(perm, _mm_set1_epi32(0x07000000));
+    // We deliberately carry the leading four bits in highbyte if they are present,
+    // we remove them later when computing hightenbits.
+    const __m128i highbyte = _mm_and_si128(perm, _mm_set1_epi32(0xff000000));
     const __m128i highbyte_shifted = _mm_srli_epi32(highbyte, 6);
+    // When we need to generate a surrogate pair (leading byte > 0xF0), then
+    // the corresponding 32-bit value in 'composed'  will be greater than
+    // > (0xff00000>>6) or > 0x3c00000. This can be used later to identify the
+    // location of the surrogate pairs.
     const __m128i composed =
         _mm_or_si128(_mm_or_si128(ascii, middlebyte_shifted),
                      _mm_or_si128(highbyte_shifted, middlehighbyte_shifted));
@@ -25165,7 +25356,8 @@ size_t convert_masked_utf8_to_utf16(const char *input,
         _mm_sub_epi32(composed, _mm_set1_epi32(0x10000));
     const __m128i lowtenbits =
         _mm_and_si128(composedminus, _mm_set1_epi32(0x3ff));
-    const __m128i hightenbits = _mm_srli_epi32(composedminus, 10);
+    // Notice the 0x3ff mask:
+    const __m128i hightenbits = _mm_and_si128(_mm_srli_epi32(composedminus, 10), _mm_set1_epi32(0x3ff));
     const __m128i lowtenbitsadd =
         _mm_add_epi32(lowtenbits, _mm_set1_epi32(0xDC00));
     const __m128i hightenbitsadd =
@@ -25183,13 +25375,13 @@ size_t convert_masked_utf8_to_utf16(const char *input,
     uint32_t surrogate_buffer[4];
     _mm_storeu_si128((__m128i *)surrogate_buffer, surrogates);
     for (size_t i = 0; i < 3; i++) {
-      if (basic_buffer[i] < 65536) {
-        utf16_output[0] = big_endian ? uint16_t(basic_buffer_swap[i]) : uint16_t(basic_buffer[i]);
-        utf16_output++;
-      } else {
+      if(basic_buffer[i] > 0x3c00000) {
         utf16_output[0] = uint16_t(surrogate_buffer[i] & 0xffff);
         utf16_output[1] = uint16_t(surrogate_buffer[i] >> 16);
         utf16_output += 2;
+      } else {
+        utf16_output[0] = big_endian ? uint16_t(basic_buffer_swap[i]) : uint16_t(basic_buffer[i]);
+        utf16_output++;
       }
     }
   } else {
@@ -27140,7 +27332,7 @@ simdutf_warn_unused size_t convert_valid(const char* input, size_t size,
         utf8_end_of_code_point_mask >>= consumed;
       }
       // At this point there may remain between 0 and 12 bytes in the
-      // 64-byte block.These bytes will be processed again. So we have an
+      // 64-byte block. These bytes will be processed again. So we have an
       // 80% efficiency (in the worst case). In practice we expect an
       // 85% to 90% efficiency.
     }
@@ -27286,7 +27478,19 @@ using namespace simd;
     simdutf_really_inline size_t convert(const char* in, size_t size, char16_t* utf16_output) {
       size_t pos = 0;
       char16_t* start{utf16_output};
-      const size_t safety_margin = 16; // to avoid overruns!
+      // In the worst case, we have the haswell kernel which can cause an overflow of
+      // 8 bytes when calling convert_masked_utf8_to_utf16. If you skip the last 16 bytes,
+      // and if the data is valid, then it is entirely safe because 16 UTF-8 bytes generate
+      // much more than 8 bytes. However, you cannot generally assume that you have valid
+      // UTF-8 input, so we are going to go back from the end counting 8 leading bytes,
+      // to give us a good margin.
+      size_t leading_byte = 0;
+      size_t margin = size;
+      for(; margin > 0 && leading_byte < 8; margin--) {
+        leading_byte += (int8_t(in[margin-1]) > -65);
+      }
+      // If the input is long enough, then we have that margin-1 is the eight last leading byte.
+      const size_t safety_margin = size - margin + 1; // to avoid overruns!
       while(pos + 64 + safety_margin <= size) {
         simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
         if(input.is_ascii()) {
@@ -27331,7 +27535,7 @@ using namespace simd;
             utf8_end_of_code_point_mask >>= consumed;
           }
           // At this point there may remain between 0 and 12 bytes in the
-          // 64-byte block.These bytes will be processed again. So we have an
+          // 64-byte block. These bytes will be processed again. So we have an
           // 80% efficiency (in the worst case). In practice we expect an
           // 85% to 90% efficiency.
         }
@@ -27349,7 +27553,19 @@ using namespace simd;
     simdutf_really_inline result convert_with_errors(const char* in, size_t size, char16_t* utf16_output) {
       size_t pos = 0;
       char16_t* start{utf16_output};
-      const size_t safety_margin = 16; // to avoid overruns!
+      // In the worst case, we have the haswell kernel which can cause an overflow of
+      // 8 bytes when calling convert_masked_utf8_to_utf16. If you skip the last 16 bytes,
+      // and if the data is valid, then it is entirely safe because 16 UTF-8 bytes generate
+      // much more than 8 bytes. However, you cannot generally assume that you have valid
+      // UTF-8 input, so we are going to go back from the end counting 8 leading bytes,
+      // to give us a good margin.
+      size_t leading_byte = 0;
+      size_t margin = size;
+      for(; margin > 0 && leading_byte < 8; margin--) {
+        leading_byte += (int8_t(in[margin-1]) > -65);
+      }
+      // If the input is long enough, then we have that margin-1 is the eight last leading byte.
+      const size_t safety_margin = size - margin + 1; // to avoid overruns!
       while(pos + 64 + safety_margin <= size) {
         simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
         if(input.is_ascii()) {
@@ -27401,7 +27617,7 @@ using namespace simd;
             utf8_end_of_code_point_mask >>= consumed;
           }
           // At this point there may remain between 0 and 12 bytes in the
-          // 64-byte block.These bytes will be processed again. So we have an
+          // 64-byte block. These bytes will be processed again. So we have an
           // 80% efficiency (in the worst case). In practice we expect an
           // 85% to 90% efficiency.
         }
@@ -27616,7 +27832,19 @@ using namespace simd;
     simdutf_really_inline size_t convert(const char* in, size_t size, char32_t* utf32_output) {
       size_t pos = 0;
       char32_t* start{utf32_output};
-      const size_t safety_margin = 16; // to avoid overruns!
+      // In the worst case, we have the haswell kernel which can cause an overflow of
+      // 8 bytes when calling convert_masked_utf8_to_utf32. If you skip the last 16 bytes,
+      // and if the data is valid, then it is entirely safe because 16 UTF-8 bytes generate
+      // much more than 8 bytes. However, you cannot generally assume that you have valid
+      // UTF-8 input, so we are going to go back from the end counting 4 leading bytes,
+      // to give us a good margin.
+      size_t leading_byte = 0;
+      size_t margin = size;
+      for(; margin > 0 && leading_byte < 4; margin--) {
+        leading_byte += (int8_t(in[margin-1]) > -65);
+      }
+      // If the input is long enough, then we have that margin-1 is the fourth last leading byte.
+      const size_t safety_margin = size - margin + 1; // to avoid overruns!
       while(pos + 64 + safety_margin <= size) {
         simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
         if(input.is_ascii()) {
@@ -27661,7 +27889,7 @@ using namespace simd;
             utf8_end_of_code_point_mask >>= consumed;
           }
           // At this point there may remain between 0 and 12 bytes in the
-          // 64-byte block.These bytes will be processed again. So we have an
+          // 64-byte block. These bytes will be processed again. So we have an
           // 80% efficiency (in the worst case). In practice we expect an
           // 85% to 90% efficiency.
         }
@@ -27678,7 +27906,19 @@ using namespace simd;
     simdutf_really_inline result convert_with_errors(const char* in, size_t size, char32_t* utf32_output) {
       size_t pos = 0;
       char32_t* start{utf32_output};
-      const size_t safety_margin = 16; // to avoid overruns!
+      // In the worst case, we have the haswell kernel which can cause an overflow of
+      // 8 bytes when calling convert_masked_utf8_to_utf32. If you skip the last 16 bytes,
+      // and if the data is valid, then it is entirely safe because 16 UTF-8 bytes generate
+      // much more than 8 bytes. However, you cannot generally assume that you have valid
+      // UTF-8 input, so we are going to go back from the end counting 4 leading bytes,
+      // to give us a good margin.
+      size_t leading_byte = 0;
+      size_t margin = size;
+      for(; margin > 0 && leading_byte < 4; margin--) {
+        leading_byte += (int8_t(in[margin-1]) > -65);
+      }
+      // If the input is long enough, then we have that margin-1 is the fourth last leading byte.
+      const size_t safety_margin = size - margin + 1; // to avoid overruns!
       while(pos + 64 + safety_margin <= size) {
         simd8x64<int8_t> input(reinterpret_cast<const int8_t *>(in + pos));
         if(input.is_ascii()) {
@@ -27728,7 +27968,7 @@ using namespace simd;
             utf8_end_of_code_point_mask >>= consumed;
           }
           // At this point there may remain between 0 and 12 bytes in the
-          // 64-byte block.These bytes will be processed again. So we have an
+          // 64-byte block. These bytes will be processed again. So we have an
           // 80% efficiency (in the worst case). In practice we expect an
           // 85% to 90% efficiency.
         }
diff --git a/deps/simdutf/simdutf.h b/deps/simdutf/simdutf.h
index bc18418fbf52cb..05e8985540655a 100644
--- a/deps/simdutf/simdutf.h
+++ b/deps/simdutf/simdutf.h
@@ -1,4 +1,4 @@
-/* auto-generated on 2023-03-30 20:31:03 -0400. Do not edit! */
+/* auto-generated on 2023-04-08 11:21:57 -0400. Do not edit! */
 // dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/include, filename=simdutf.h
 /* begin file include/simdutf.h */
 #ifndef SIMDUTF_H
@@ -144,6 +144,8 @@
 // POWER processors. Please see https://github.com/lemire/simdutf/issues/51
 #elif defined(__s390__)
 // s390 IBM system. Big endian.
+#elif (defined(__riscv) || defined(__riscv__)) && __riscv_xlen == 64
+// RISC-V 64-bit
 #else
 // The simdutf library is designed
 // for 64-bit processors and it seems that you are not
@@ -572,7 +574,7 @@ SIMDUTF_DISABLE_UNDESIRED_WARNINGS
 #define SIMDUTF_SIMDUTF_VERSION_H
 
 /** The version of simdutf being used (major.minor.revision) */
-#define SIMDUTF_VERSION "3.2.3"
+#define SIMDUTF_VERSION "3.2.7"
 
 namespace simdutf {
 enum {
@@ -587,7 +589,7 @@ enum {
   /**
    * The revision (major.minor.REVISION) of simdutf being used.
    */
-  SIMDUTF_VERSION_REVISION = 3
+  SIMDUTF_VERSION_REVISION = 7
 };
 } // namespace simdutf