From 3c9c7841cc585a866a0fa6800fb526d0209da8c0 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Tue, 9 Nov 2021 12:03:59 +0100 Subject: [PATCH] Fix bug #81598: Use C.UTF-8 as LC_CTYPE locale by default Unfortunately, libedit is locale based and does not accept UTF-8 input when the C locale is used. This patch switches the default locale to C.UTF-8 instead (if it is available). This makes libedit work and I believe it shouldn't affect behavior of single-byte locale-dependent functions that PHP otherwise uses. --- Zend/tests/lc_ctype_inheritance.phpt | 4 ++-- Zend/zend_operators.c | 11 ++++++++++- Zend/zend_operators.h | 2 ++ ext/snmp/snmp.c | 2 +- ext/standard/basic_functions.c | 1 + main/main.c | 1 + 6 files changed, 17 insertions(+), 4 deletions(-) diff --git a/Zend/tests/lc_ctype_inheritance.phpt b/Zend/tests/lc_ctype_inheritance.phpt index 8971ff1969723..8c968f0615e6e 100644 --- a/Zend/tests/lc_ctype_inheritance.phpt +++ b/Zend/tests/lc_ctype_inheritance.phpt @@ -16,8 +16,8 @@ var_dump(setlocale(LC_CTYPE, "de_DE", "de-DE") !== false); var_dump(bin2hex(strtoupper("\xe4"))); var_dump(preg_match('/\w/', "\xe4")); ?> ---EXPECT-- -string(1) "C" +--EXPECTF-- +string(%d) "C%r(\.UTF-8)?%r" string(2) "e4" int(0) bool(true) diff --git a/Zend/zend_operators.c b/Zend/zend_operators.c index acda841979eb5..97f298a6696df 100644 --- a/Zend/zend_operators.c +++ b/Zend/zend_operators.c @@ -34,8 +34,8 @@ #include #endif -#ifdef ZEND_USE_TOLOWER_L #include +#ifdef ZEND_USE_TOLOWER_L static _locale_t current_locale = NULL; /* this is true global! may lead to strange effects on ZTS, but so may setlocale() */ #define zend_tolower(c) _tolower_l(c, current_locale) @@ -2562,6 +2562,15 @@ ZEND_API void zend_update_current_locale(void) /* {{{ */ /* }}} */ #endif +ZEND_API void zend_reset_lc_ctype_locale(void) +{ + /* Use the C.UTF-8 locale so that readline can process UTF-8 input, while not interfering + * with single-byte locale-dependent functions used by PHP. */ + if (!setlocale(LC_CTYPE, "C.UTF-8")) { + setlocale(LC_CTYPE, "C"); + } +} + static zend_always_inline void zend_str_tolower_impl(char *dest, const char *str, size_t length) /* {{{ */ { unsigned char *p = (unsigned char*)str; unsigned char *q = (unsigned char*)dest; diff --git a/Zend/zend_operators.h b/Zend/zend_operators.h index d543b7b03c2bb..3569d6a531945 100644 --- a/Zend/zend_operators.h +++ b/Zend/zend_operators.h @@ -514,6 +514,8 @@ ZEND_API void zend_update_current_locale(void); #define zend_update_current_locale() #endif +ZEND_API void zend_reset_lc_ctype_locale(void); + /* The offset in bytes between the value and type fields of a zval */ #define ZVAL_OFFSETOF_TYPE \ (offsetof(zval, u1.type_info) - offsetof(zval, value)) diff --git a/ext/snmp/snmp.c b/ext/snmp/snmp.c index d31995827880d..eba8a0b0543d3 100644 --- a/ext/snmp/snmp.c +++ b/ext/snmp/snmp.c @@ -1975,7 +1975,7 @@ PHP_MINIT_FUNCTION(snmp) init_snmp("snmpapp"); /* net-snmp corrupts the CTYPE locale during initialization. */ - setlocale(LC_CTYPE, "C"); + zend_reset_lc_ctype_locale(); #ifdef NETSNMP_DS_LIB_DONT_PERSIST_STATE /* Prevent update of the snmpapp.conf file */ diff --git a/ext/standard/basic_functions.c b/ext/standard/basic_functions.c index 876ef347ebf89..bea9ea89ad3b9 100755 --- a/ext/standard/basic_functions.c +++ b/ext/standard/basic_functions.c @@ -518,6 +518,7 @@ PHP_RSHUTDOWN_FUNCTION(basic) /* {{{ */ * to the value in startup environment */ if (BG(locale_changed)) { setlocale(LC_ALL, "C"); + zend_reset_lc_ctype_locale(); zend_update_current_locale(); if (BG(ctype_string)) { zend_string_release_ex(BG(ctype_string), 0); diff --git a/main/main.c b/main/main.c index 073d260ac6898..48c5a656f42a3 100644 --- a/main/main.c +++ b/main/main.c @@ -2092,6 +2092,7 @@ int php_module_startup(sapi_module_struct *sf, zend_module_entry *additional_mod zuf.getenv_function = sapi_getenv; zuf.resolve_path_function = php_resolve_path_for_zend; zend_startup(&zuf); + zend_reset_lc_ctype_locale(); zend_update_current_locale(); zend_observer_startup();