From 3af5f47ce611dbf4c769b7de60793b87a20e8128 Mon Sep 17 00:00:00 2001 From: Calvin Buckley Date: Wed, 1 Mar 2023 13:30:15 -0400 Subject: [PATCH 001/168] `http_response_code` should warn if headers were already sent This would previously fail silently. We also return false to indicate the error. Fixes GH-10742 Closes GH-10744 --- NEWS | 2 ++ ext/standard/head.c | 12 ++++++++++++ .../tests/general_functions/http_response_code.phpt | 11 ++++++++++- sapi/fpm/tests/log-suppress-output.phpt | 4 ++-- 4 files changed, 26 insertions(+), 3 deletions(-) diff --git a/NEWS b/NEWS index 92fe365e3ab6b..55c13e5a52e05 100644 --- a/NEWS +++ b/NEWS @@ -184,6 +184,8 @@ PHP NEWS . Fix GH-11010 (parse_ini_string() now preserves formatting of unquoted strings starting with numbers when the INI_SCANNER_TYPED flag is specified). (ilutov) + . Fix GH-10742 (http_response_code emits no error when headers were already + sent). (NattyNarwhal) - Streams: . Fixed bug #51056: blocking fread() will block even if data is available. diff --git a/ext/standard/head.c b/ext/standard/head.c index 5bdae98dfce56..7d223c646f215 100644 --- a/ext/standard/head.c +++ b/ext/standard/head.c @@ -363,6 +363,18 @@ PHP_FUNCTION(http_response_code) if (response_code) { + if (SG(headers_sent) && !SG(request_info).no_headers) { + const char *output_start_filename = php_output_get_start_filename(); + int output_start_lineno = php_output_get_start_lineno(); + + if (output_start_filename) { + php_error_docref(NULL, E_WARNING, "Cannot set response code - headers already sent " + "(output started at %s:%d)", output_start_filename, output_start_lineno); + } else { + php_error_docref(NULL, E_WARNING, "Cannot set response code - headers already sent"); + } + RETURN_FALSE; + } zend_long old_response_code; old_response_code = SG(sapi_headers).http_response_code; diff --git a/ext/standard/tests/general_functions/http_response_code.phpt b/ext/standard/tests/general_functions/http_response_code.phpt index ab290c3cefe19..8f8b87511a3b9 100644 --- a/ext/standard/tests/general_functions/http_response_code.phpt +++ b/ext/standard/tests/general_functions/http_response_code.phpt @@ -21,8 +21,17 @@ var_dump( // Get the new response code http_response_code() ); +echo "Now we've sent the headers\n"; +var_dump( + // This should fail + http_response_code(500) +); ?> ---EXPECT-- +--EXPECTF-- bool(false) bool(true) int(201) +Now we've sent the headers + +Warning: http_response_code(): Cannot set response code - headers already sent (output started at %s:%d) in %s on line %d +bool(false) diff --git a/sapi/fpm/tests/log-suppress-output.phpt b/sapi/fpm/tests/log-suppress-output.phpt index 5a5e7bb9544ba..a507180e99227 100644 --- a/sapi/fpm/tests/log-suppress-output.phpt +++ b/sapi/fpm/tests/log-suppress-output.phpt @@ -38,7 +38,7 @@ function doTestCalls(FPM\Tester &$tester, bool $expectSuppressableEntries) $tester->request(query: 'test=output', uri: '/ping')->expectBody('pong', 'text/plain'); $tester->expectAccessLog("'GET /ping?test=output' 200", suppressable: false); - $tester->request(headers: ['X_ERROR' => 1])->expectBody('Not OK'); + $tester->request(headers: ['X_ERROR' => 1])->expectStatus('500 Internal Server Error')->expectBody('Not OK'); $tester->expectAccessLog("'GET /log-suppress-output.src.php' 500", suppressable: false); $tester->request()->expectBody('OK'); @@ -54,8 +54,8 @@ function doTestCalls(FPM\Tester &$tester, bool $expectSuppressableEntries) $src = << Date: Tue, 2 May 2023 19:05:53 +0100 Subject: [PATCH 002/168] ext/pgsql adding PGSQL_ERRORS_SQLSTATE constant support. Close GH-11181 --- NEWS | 1 + UPGRADING | 1 + ext/pgsql/pgsql.c | 6 +++++- ext/pgsql/pgsql.stub.php | 13 +++++++++++++ ext/pgsql/pgsql_arginfo.h | 8 +++++++- ext/pgsql/tests/07optional.phpt | 1 + 6 files changed, 28 insertions(+), 2 deletions(-) diff --git a/NEWS b/NEWS index 55c13e5a52e05..e644a9fc6f49f 100644 --- a/NEWS +++ b/NEWS @@ -126,6 +126,7 @@ PHP NEWS . pg_cancel use thread safe PQcancel api instead. (David Carlier) . pg_trace new PGSQL_TRACE_SUPPRESS_TIMESTAMPS/PGSQL_TRACE_REGRESS_MODE contants support. (David Carlier) + . pg_set_error_verbosity adding PGSQL_ERRORS_STATE constant. (David Carlier) - Phar: . Fix memory leak in phar_rename_archive(). (stkeke) diff --git a/UPGRADING b/UPGRADING index a3f6a12f3dc09..6f8701179be55 100644 --- a/UPGRADING +++ b/UPGRADING @@ -220,6 +220,7 @@ PHP 8.3 UPGRADE NOTES - PGSQL: . PGSQL_TRACE_SUPPRESS_TIMESTAMPS. . PGSQL_TRACE_REGRESS_MODE. + . PGSQL_ERRORS_SQLSTATE. - Posix: . POSIX_SC_ARG_MAX. diff --git a/ext/pgsql/pgsql.c b/ext/pgsql/pgsql.c index f2a7fb2ac296b..14488ca6157db 100644 --- a/ext/pgsql/pgsql.c +++ b/ext/pgsql/pgsql.c @@ -112,6 +112,10 @@ char pgsql_libpq_version[16]; #define PQfreemem free #endif +#if PG_VERSION_NUM < 120000 +#define PQERRORS_SQLSTATE 0 +#endif + ZEND_DECLARE_MODULE_GLOBALS(pgsql) static PHP_GINIT_FUNCTION(pgsql); @@ -2821,7 +2825,7 @@ PHP_FUNCTION(pg_set_error_verbosity) pgsql = link->conn; - if (verbosity & (PQERRORS_TERSE|PQERRORS_DEFAULT|PQERRORS_VERBOSE)) { + if (verbosity & (PQERRORS_TERSE|PQERRORS_DEFAULT|PQERRORS_VERBOSE|PQERRORS_SQLSTATE)) { RETURN_LONG(PQsetErrorVerbosity(pgsql, verbosity)); } else { RETURN_FALSE; diff --git a/ext/pgsql/pgsql.stub.php b/ext/pgsql/pgsql.stub.php index e095350543c79..fd58fb92c0045 100644 --- a/ext/pgsql/pgsql.stub.php +++ b/ext/pgsql/pgsql.stub.php @@ -183,6 +183,19 @@ * @cvalue PQERRORS_VERBOSE */ const PGSQL_ERRORS_VERBOSE = UNKNOWN; + #if PGVERSION_NUM > 110000 + /** + * @var int + * @cvalue PQERRORS_SQLSTATE + */ + const PGSQL_ERRORS_SQLSTATE = UNKNOWN; + #else + /** + * @var int + * @cvalue PQERRORS_TERSE + */ + const PGSQL_ERRORS_SQLSTATE = UNKNOWN; + #endif /* For lo_seek() */ diff --git a/ext/pgsql/pgsql_arginfo.h b/ext/pgsql/pgsql_arginfo.h index 2b8e7cd17ae6a..839842bbc3944 100644 --- a/ext/pgsql/pgsql_arginfo.h +++ b/ext/pgsql/pgsql_arginfo.h @@ -1,5 +1,5 @@ /* This is a generated file, edit the .stub.php file instead. - * Stub hash: f18a73443942daa2b3695e8750c8daaea6b96194 */ + * Stub hash: f5b563282ffa08a4f79293fcf91df284433a6f5c */ ZEND_BEGIN_ARG_WITH_RETURN_OBJ_TYPE_MASK_EX(arginfo_pg_connect, 0, 1, PgSql\\Connection, MAY_BE_FALSE) ZEND_ARG_TYPE_INFO(0, connection_string, IS_STRING, 0) @@ -754,6 +754,12 @@ static void register_pgsql_symbols(int module_number) REGISTER_LONG_CONSTANT("PGSQL_ERRORS_TERSE", PQERRORS_TERSE, CONST_PERSISTENT); REGISTER_LONG_CONSTANT("PGSQL_ERRORS_DEFAULT", PQERRORS_DEFAULT, CONST_PERSISTENT); REGISTER_LONG_CONSTANT("PGSQL_ERRORS_VERBOSE", PQERRORS_VERBOSE, CONST_PERSISTENT); +#if PGVERSION_NUM > 110000 + REGISTER_LONG_CONSTANT("PGSQL_ERRORS_SQLSTATE", PQERRORS_SQLSTATE, CONST_PERSISTENT); +#endif +#if !(PGVERSION_NUM > 110000) + REGISTER_LONG_CONSTANT("PGSQL_ERRORS_SQLSTATE", PQERRORS_TERSE, CONST_PERSISTENT); +#endif REGISTER_LONG_CONSTANT("PGSQL_SEEK_SET", SEEK_SET, CONST_PERSISTENT); REGISTER_LONG_CONSTANT("PGSQL_SEEK_CUR", SEEK_CUR, CONST_PERSISTENT); REGISTER_LONG_CONSTANT("PGSQL_SEEK_END", SEEK_END, CONST_PERSISTENT); diff --git a/ext/pgsql/tests/07optional.phpt b/ext/pgsql/tests/07optional.phpt index 6c58727e4adbe..b9ce491b2ca7a 100644 --- a/ext/pgsql/tests/07optional.phpt +++ b/ext/pgsql/tests/07optional.phpt @@ -19,6 +19,7 @@ if (function_exists('pg_set_error_verbosity')) { pg_set_error_verbosity($db, PGSQL_ERRORS_TERSE); pg_set_error_verbosity($db, PGSQL_ERRORS_DEFAULT); pg_set_error_verbosity($db, PGSQL_ERRORS_VERBOSE); + pg_set_error_verbosity($db, PGSQL_ERRORS_SQLSTATE); } echo "OK"; ?> From b33fbbfe3d470aceddea37c1cb84d91bacc1f81f Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Fri, 3 Feb 2023 00:00:42 +0100 Subject: [PATCH 003/168] Fix GH-10031: [Stream] STREAM_NOTIFY_PROGRESS over HTTP emitted irregularly for last chunk of data It's possible that the server already sent in more data than just the headers. Since the stream only accepts progress increments after the headers are processed, the already read data is never added to the process. We account for this by adjusting the progress counter by the difference of already read header data and the body. For the test: Co-authored-by: aetonsi <18366087+aetonsi@users.noreply.github.com> Closes GH-10492. --- NEWS | 2 + ext/standard/http_fopen_wrapper.c | 7 ++++ ext/standard/tests/streams/gh10031.phpt | 52 +++++++++++++++++++++++++ 3 files changed, 61 insertions(+) create mode 100644 ext/standard/tests/streams/gh10031.phpt diff --git a/NEWS b/NEWS index 8938421723666..0028d6944f82c 100644 --- a/NEWS +++ b/NEWS @@ -24,6 +24,8 @@ PHP NEWS source file). (ilutov) - Streams: + . Fixed bug GH-10031 ([Stream] STREAM_NOTIFY_PROGRESS over HTTP emitted + irregularly for last chunk of data). (nielsdos) . Fixed bug GH-11175 (Stream Socket Timeout). (nielsdos) . Fixed bug GH-11177 (ASAN UndefinedBehaviorSanitizer when timeout = -1 passed to stream_socket_accept/stream_socket_client). (nielsdos) diff --git a/ext/standard/http_fopen_wrapper.c b/ext/standard/http_fopen_wrapper.c index 5964efd2f9a1c..fa0dcb5e6890a 100644 --- a/ext/standard/http_fopen_wrapper.c +++ b/ext/standard/http_fopen_wrapper.c @@ -955,6 +955,13 @@ static php_stream *php_stream_url_wrap_http_ex(php_stream_wrapper *wrapper, if (transfer_encoding) { php_stream_filter_append(&stream->readfilters, transfer_encoding); } + + /* It's possible that the server already sent in more data than just the headers. + * We account for this by adjusting the progress counter by the difference of + * already read header data and the body. */ + if (stream->writepos > stream->readpos) { + php_stream_notify_progress_increment(context, stream->writepos - stream->readpos, 0); + } } return stream; diff --git a/ext/standard/tests/streams/gh10031.phpt b/ext/standard/tests/streams/gh10031.phpt new file mode 100644 index 0000000000000..aa3576dab51ad --- /dev/null +++ b/ext/standard/tests/streams/gh10031.phpt @@ -0,0 +1,52 @@ +--TEST-- +GH-10031 ([Stream] STREAM_NOTIFY_PROGRESS over HTTP emitted irregularly for last chunk of data) +--SKIPIF-- + +--INI-- +allow_url_fopen=1 +--CONFLICTS-- +server +--FILE-- + ['ignore_errors' => true,]]); +$lastBytesTransferred = 0; +stream_context_set_params($context, ['notification' => function ($code, $s, $m, $mc, $bytes_transferred, $bytes_max) +use (&$lastBytesTransferred) { + if ($code === STREAM_NOTIFY_FILE_SIZE_IS) echo "expected filesize=$bytes_max".PHP_EOL; + $lastBytesTransferred = $bytes_transferred; + @ob_flush(); +}]); + +$get = file_get_contents("http://".PHP_CLI_SERVER_ADDRESS, false, $context); + +echo "got filesize=" . strlen($get) . PHP_EOL; +var_dump($lastBytesTransferred); + +?> +--EXPECT-- +expected filesize=1000 +got filesize=1000 +int(1000) From ac5920f92b97bf2d1fc54377957b61faa790b5cc Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Sat, 29 Apr 2023 21:28:03 +0200 Subject: [PATCH 004/168] Fix GH-11141: Could not open input file: should be sent to stderr I grepped for php_printf cases in main/ and sapi/ and converted the cases which clearly indicate errors to fprintf(stderr, ...), like suggested in the linked issue. Closes GH-11163. --- NEWS | 4 ++++ main/main.c | 6 +++--- sapi/cli/php_cli.c | 2 +- sapi/cli/tests/007.phpt | 4 ++-- sapi/cli/tests/008.phpt | 4 ++-- sapi/cli/tests/011.phpt | 4 ++-- sapi/cli/tests/014.phpt | 4 ++-- sapi/litespeed/lsapi_main.c | 2 +- 8 files changed, 17 insertions(+), 13 deletions(-) diff --git a/NEWS b/NEWS index e644a9fc6f49f..e27a83e1b6791 100644 --- a/NEWS +++ b/NEWS @@ -153,6 +153,10 @@ PHP NEWS . Fix Segfault when using ReflectionFiber suspended by an internal function. (danog) +- SAPI: + . Fixed GH-11141 (Could not open input file: should be sent to stderr). + (nielsdos) + - Sockets: . Added SO_ATTACH_REUSEPORT_CBPF socket option, to give tighter control over socket binding for a cpu core. (David Carlier) diff --git a/main/main.c b/main/main.c index 2e0802d3b3b00..ad5ef2902f4af 100644 --- a/main/main.c +++ b/main/main.c @@ -2107,7 +2107,7 @@ zend_result php_module_startup(sapi_module_struct *sf, zend_module_entry *additi /* start up winsock services */ if (WSAStartup(wVersionRequested, &wsaData) != 0) { - php_printf("\nwinsock.dll unusable. %d\n", WSAGetLastError()); + fprintf(stderr, "\nwinsock.dll unusable. %d\n", WSAGetLastError()); return FAILURE; } php_win32_signal_ctrl_handler_init(); @@ -2166,7 +2166,7 @@ zend_result php_module_startup(sapi_module_struct *sf, zend_module_entry *additi * (this uses configuration parameters from php.ini) */ if (php_init_stream_wrappers(module_number) == FAILURE) { - php_printf("PHP: Unable to initialize stream url wrappers.\n"); + fprintf(stderr, "PHP: Unable to initialize stream url wrappers.\n"); return FAILURE; } @@ -2180,7 +2180,7 @@ zend_result php_module_startup(sapi_module_struct *sf, zend_module_entry *additi /* startup extensions statically compiled in */ if (php_register_internal_extensions_func() == FAILURE) { - php_printf("Unable to start builtin modules\n"); + fprintf(stderr, "Unable to start builtin modules\n"); return FAILURE; } diff --git a/sapi/cli/php_cli.c b/sapi/cli/php_cli.c index 5e84ae7379cfb..653093cee9c07 100644 --- a/sapi/cli/php_cli.c +++ b/sapi/cli/php_cli.c @@ -574,7 +574,7 @@ static zend_result cli_seek_file_begin(zend_file_handle *file_handle, char *scri { FILE *fp = VCWD_FOPEN(script_file, "rb"); if (!fp) { - php_printf("Could not open input file: %s\n", script_file); + fprintf(stderr, "Could not open input file: %s\n", script_file); return FAILURE; } diff --git a/sapi/cli/tests/007.phpt b/sapi/cli/tests/007.phpt index 2de5880a89f74..6f27586e27d96 100644 --- a/sapi/cli/tests/007.phpt +++ b/sapi/cli/tests/007.phpt @@ -45,8 +45,8 @@ string(81) " " -string(33) "Could not open input file: wrong -" +Could not open input file: wrong +NULL string(43) " " Done diff --git a/sapi/cli/tests/008.phpt b/sapi/cli/tests/008.phpt index 5edd40c84fefd..a8b205056300e 100644 --- a/sapi/cli/tests/008.phpt +++ b/sapi/cli/tests/008.phpt @@ -41,6 +41,6 @@ Stack trace: #0 {main} thrown in %s on line %d " -string(33) "Could not open input file: wrong -" +Could not open input file: wrong +NULL Done diff --git a/sapi/cli/tests/011.phpt b/sapi/cli/tests/011.phpt index bd61b260f5a94..d45cb94a08c71 100644 --- a/sapi/cli/tests/011.phpt +++ b/sapi/cli/tests/011.phpt @@ -50,8 +50,8 @@ echo "Done\n"; --EXPECTF-- string(%d) "No syntax errors detected in %s011.test.php " -string(40) "Could not open input file: some.unknown -" +Could not open input file: some.unknown +NULL string(%d) " Parse error: %s expecting %s{%s in %s on line %d Errors parsing %s011.test.php diff --git a/sapi/cli/tests/014.phpt b/sapi/cli/tests/014.phpt index 09f69faa521b3..734eef0c2ce95 100644 --- a/sapi/cli/tests/014.phpt +++ b/sapi/cli/tests/014.phpt @@ -40,6 +40,6 @@ string(1478) "
<?php
$test 
"var"//var
/* test class */
class test {
    private 
$var = array();

    public static function 
foo(Test $arg) {
        echo 
"hello";
        
var_dump($this);
    }
}

$o = new test;
?>
" -string(35) "Could not open input file: unknown -" +Could not open input file: unknown +NULL Done diff --git a/sapi/litespeed/lsapi_main.c b/sapi/litespeed/lsapi_main.c index 46ad97f2bf335..9c2f4e1344232 100644 --- a/sapi/litespeed/lsapi_main.c +++ b/sapi/litespeed/lsapi_main.c @@ -1347,7 +1347,7 @@ static int cli_main( int argc, char * argv[] ) php_request_shutdown( NULL ); } } else { - php_printf("Could not open input file: %s.\n", *p); + fprintf(stderr, "Could not open input file: %s.\n", *p); } } else { cli_usage(); From f18a0384c1981cf743a1ff3881ef18b7f703036f Mon Sep 17 00:00:00 2001 From: David CARLIER Date: Sat, 6 May 2023 10:02:30 +0100 Subject: [PATCH 005/168] ext/pgsql: fix pg_trace test when trace mode is supported. (#11191) --- ext/pgsql/tests/pg_trace.phpt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ext/pgsql/tests/pg_trace.phpt b/ext/pgsql/tests/pg_trace.phpt index 0917959bbef77..16e483cc99213 100644 --- a/ext/pgsql/tests/pg_trace.phpt +++ b/ext/pgsql/tests/pg_trace.phpt @@ -21,6 +21,6 @@ var_dump(pg_trace($tracefile, 'w', $db, 0)); $res = pg_query($db, 'select 1'); ?> ---EXPECT-- -pg_trace(): Argument #4 ($trace_mode) cannot set as trace is unsupported +--EXPECTF-- +pg_trace(): Argument #4 ($trace_mode) %s bool(true) From fa10dfcc81bbaf31ecd34e60def18d70562b6bfd Mon Sep 17 00:00:00 2001 From: Daniel Kesselberg Date: Sat, 25 Feb 2023 10:22:09 +0100 Subject: [PATCH 006/168] Add PKCS7_NOOLDMIMETYPE and OPENSSL_CMS_OLDMIMETYPE PKCS7_NOOLDMIMETYPE to use Content-Type application/pkcs7-mime OPENSSL_CMS_OLDMIMETYPE to use Content-Type application/x-pkcs7-mime SMIME_write_PKCS7 and SMIME_write_CMS are using SMIME_write_ASN1_ex. The Content-Type application/x-pkcs7-mime is generated with the flag SMIME_OLDMIME (0x400).[^1] SMIME_write_PKCS7 set SMIME_OLDMIME by default.[^2] SMIME_write_CMS does not.[^3] I picked OPENSSL_CMS_OLDMIMETYPE over OPENSSL_CMS_NOOLDMIMETYPE because that's what the flag actually does. [^1]: https://github.com/openssl/openssl/blob/9a2f78e14a67eeaadefc77d05f0778fc9684d26c/crypto/asn1/asn_mime.c#L248-L251 [^2]: https://github.com/openssl/openssl/blob/9a2f78e14a67eeaadefc77d05f0778fc9684d26c/crypto/pkcs7/pk7_mime.c#L41-L43 [^3]: https://github.com/openssl/openssl/blob/9a2f78e14a67eeaadefc77d05f0778fc9684d26c/crypto/cms/cms_io.c#L93 Signed-off-by: Daniel Kesselberg --- NEWS | 4 ++++ UPGRADING | 4 ++++ ext/openssl/openssl.stub.php | 10 ++++++++++ ext/openssl/openssl_arginfo.h | 4 +++- ext/openssl/tests/openssl_cms_encrypt_basic.phpt | 15 +++++++++++++++ .../tests/openssl_pkcs7_encrypt_basic.phpt | 15 +++++++++++++++ 6 files changed, 51 insertions(+), 1 deletion(-) diff --git a/NEWS b/NEWS index e27a83e1b6791..b0994af905ab6 100644 --- a/NEWS +++ b/NEWS @@ -115,6 +115,10 @@ PHP NEWS . Added memfd api usage, on Linux, for zend_shared_alloc_create_lock() to create an abstract anonymous file for the opcache's lock. (Max Kellermann) +- OpenSSL: + . Added OPENSSL_CMS_OLDMIMETYPE and PKCS7_NOOLDMIMETYPE contants to switch + between mime content types. (Daniel Kesselberg) + - PCNTL: . SA_ONSTACK is now set for pcntl_signal. (Kévin Dunglas) . Added SIGINFO constant. (David Carlier) diff --git a/UPGRADING b/UPGRADING index 6f8701179be55..b7c70c34a96d1 100644 --- a/UPGRADING +++ b/UPGRADING @@ -214,6 +214,10 @@ PHP 8.3 UPGRADE NOTES . MIXED_NUMBERS (Spoofchecker). . HIDDEN_OVERLAY (Spoofchecker). +- OpenSSL: + . OPENSSL_CMS_OLDMIMETYPE + . PKCS7_NOOLDMIMETYPE + - PCNTL: . SIGINFO diff --git a/ext/openssl/openssl.stub.php b/ext/openssl/openssl.stub.php index a9fad2eaeae90..6ebb519ba34a2 100644 --- a/ext/openssl/openssl.stub.php +++ b/ext/openssl/openssl.stub.php @@ -161,6 +161,11 @@ * @cvalue PKCS7_NOSIGS */ const PKCS7_NOSIGS = UNKNOWN; +/** + * @var int + * @cvalue PKCS7_NOOLDMIMETYPE + */ +const PKCS7_NOOLDMIMETYPE = UNKNOWN; /** * @var int @@ -202,6 +207,11 @@ * @cvalue CMS_NOSIGS */ const OPENSSL_CMS_NOSIGS = UNKNOWN; +/** + * @var int + * @cvalue CMS_NOOLDMIMETYPE + */ +const OPENSSL_CMS_OLDMIMETYPE = UNKNOWN; /** * @var int diff --git a/ext/openssl/openssl_arginfo.h b/ext/openssl/openssl_arginfo.h index 3e1b4a778a967..b53e70242466f 100644 --- a/ext/openssl/openssl_arginfo.h +++ b/ext/openssl/openssl_arginfo.h @@ -1,5 +1,5 @@ /* This is a generated file, edit the .stub.php file instead. - * Stub hash: be79b4cc0d9eb4469c43f10208b86369dcc1239d */ + * Stub hash: 549946c91248fddc4d43502d32335b68cfbe71f2 */ ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_openssl_x509_export_to_file, 0, 2, _IS_BOOL, 0) ZEND_ARG_OBJ_TYPE_MASK(0, certificate, OpenSSLCertificate, MAY_BE_STRING, NULL) @@ -578,6 +578,7 @@ static void register_openssl_symbols(int module_number) REGISTER_LONG_CONSTANT("PKCS7_NOATTR", PKCS7_NOATTR, CONST_PERSISTENT); REGISTER_LONG_CONSTANT("PKCS7_BINARY", PKCS7_BINARY, CONST_PERSISTENT); REGISTER_LONG_CONSTANT("PKCS7_NOSIGS", PKCS7_NOSIGS, CONST_PERSISTENT); + REGISTER_LONG_CONSTANT("PKCS7_NOOLDMIMETYPE", PKCS7_NOOLDMIMETYPE, CONST_PERSISTENT); REGISTER_LONG_CONSTANT("OPENSSL_CMS_DETACHED", CMS_DETACHED, CONST_PERSISTENT); REGISTER_LONG_CONSTANT("OPENSSL_CMS_TEXT", CMS_TEXT, CONST_PERSISTENT); REGISTER_LONG_CONSTANT("OPENSSL_CMS_NOINTERN", CMS_NOINTERN, CONST_PERSISTENT); @@ -586,6 +587,7 @@ static void register_openssl_symbols(int module_number) REGISTER_LONG_CONSTANT("OPENSSL_CMS_NOATTR", CMS_NOATTR, CONST_PERSISTENT); REGISTER_LONG_CONSTANT("OPENSSL_CMS_BINARY", CMS_BINARY, CONST_PERSISTENT); REGISTER_LONG_CONSTANT("OPENSSL_CMS_NOSIGS", CMS_NOSIGS, CONST_PERSISTENT); + REGISTER_LONG_CONSTANT("OPENSSL_CMS_OLDMIMETYPE", CMS_NOOLDMIMETYPE, CONST_PERSISTENT); REGISTER_LONG_CONSTANT("OPENSSL_PKCS1_PADDING", RSA_PKCS1_PADDING, CONST_PERSISTENT); #if defined(RSA_SSLV23_PADDING) REGISTER_LONG_CONSTANT("OPENSSL_SSLV23_PADDING", RSA_SSLV23_PADDING, CONST_PERSISTENT); diff --git a/ext/openssl/tests/openssl_cms_encrypt_basic.phpt b/ext/openssl/tests/openssl_cms_encrypt_basic.phpt index cf3e7ae7f3787..14b5231fdd327 100644 --- a/ext/openssl/tests/openssl_cms_encrypt_basic.phpt +++ b/ext/openssl/tests/openssl_cms_encrypt_basic.phpt @@ -9,6 +9,9 @@ $outfile = tempnam(sys_get_temp_dir(), "cms_enc_basic"); if ($outfile === false) die("failed to get a temporary filename!"); $outfile2 = $outfile . ".out"; +$outfile3 = tempnam(sys_get_temp_dir(), "cms_enc_basic"); +if ($outfile3 === false) + die("failed to get a temporary filename!"); $single_cert = "file://" . __DIR__ . "/cert.crt"; $privkey = "file://" . __DIR__ . "/private_rsa_1024.key"; $wrongkey = "file://" . __DIR__ . "/private_rsa_2048.key"; @@ -33,6 +36,7 @@ var_dump(openssl_cms_encrypt($infile, $outfile, $wrong, $headers, cipher_algo: $ var_dump(openssl_cms_encrypt($infile, $outfile, $empty, $headers, cipher_algo: $cipher)); var_dump(openssl_cms_encrypt($infile, $outfile, $multi_certs, $headers, cipher_algo: $cipher)); var_dump(openssl_cms_encrypt($infile, $outfile, array_map('openssl_x509_read', $multi_certs), $headers, cipher_algo: $cipher)); +var_dump(openssl_cms_encrypt($infile, $outfile3, $single_cert, $headers, flags: OPENSSL_CMS_OLDMIMETYPE, cipher_algo: $cipher)); if (file_exists($outfile)) { echo "true\n"; @@ -42,6 +46,15 @@ if (file_exists($outfile2)) { echo "true\n"; unlink($outfile2); } + +if (file_exists($outfile3)) { + $content = file_get_contents($outfile3, false, null, 0, 256); + if (str_contains($content, 'Content-Type: application/x-pkcs7-mime; smime-type=enveloped-data; name="smime.p7m"')) { + echo "true\n"; + } + unset($content); + unlink($outfile3); +} ?> --EXPECT-- bool(true) @@ -57,5 +70,7 @@ bool(false) bool(false) bool(true) bool(true) +bool(true) +true true true diff --git a/ext/openssl/tests/openssl_pkcs7_encrypt_basic.phpt b/ext/openssl/tests/openssl_pkcs7_encrypt_basic.phpt index 070fd1c4ec8af..0ae5873c9bb49 100644 --- a/ext/openssl/tests/openssl_pkcs7_encrypt_basic.phpt +++ b/ext/openssl/tests/openssl_pkcs7_encrypt_basic.phpt @@ -11,6 +11,9 @@ if ($outfile === false) $outfile2 = tempnam(sys_get_temp_dir(), "ssl"); if ($outfile2 === false) die("failed to get a temporary filename!"); +$outfile3 = tempnam(sys_get_temp_dir(), "ssl"); +if ($outfile3 === false) + die("failed to get a temporary filename!"); $single_cert = "file://" . __DIR__ . "/cert.crt"; $privkey = "file://" . __DIR__ . "/private_rsa_1024.key"; @@ -34,6 +37,7 @@ var_dump(openssl_pkcs7_encrypt($infile, $outfile, $wrong, $headers, 0, $cipher)) var_dump(openssl_pkcs7_encrypt($infile, $outfile, $empty, $headers, 0, $cipher)); var_dump(openssl_pkcs7_encrypt($infile, $outfile, $multi_certs, $headers, 0, $cipher)); var_dump(openssl_pkcs7_encrypt($infile, $outfile, array_map('openssl_x509_read', $multi_certs), $headers, 0, $cipher)); +var_dump(openssl_pkcs7_encrypt($infile, $outfile3, $single_cert, $headers, PKCS7_NOOLDMIMETYPE, $cipher)); if (file_exists($outfile)) { echo "true\n"; @@ -43,6 +47,15 @@ if (file_exists($outfile2)) { echo "true\n"; unlink($outfile2); } + +if (file_exists($outfile3)) { + $content = file_get_contents($outfile3, false, null, 0, 256); + if (str_contains($content, 'Content-Type: application/pkcs7-mime; smime-type=enveloped-data; name="smime.p7m"')) { + echo "true\n"; + } + unset($content); + unlink($outfile3); +} ?> --EXPECT-- bool(true) @@ -57,5 +70,7 @@ bool(false) bool(false) bool(true) bool(true) +bool(true) +true true true From 5690e8baea1047ec44141420ca703020072d0b15 Mon Sep 17 00:00:00 2001 From: Jakub Zelenka Date: Sat, 6 May 2023 11:39:35 +0100 Subject: [PATCH 007/168] Add myself as a standard CODEOWNER to not miss some changes I have got some knowledge of some pieces there so happy to do reviews. --- CODEOWNERS | 1 + 1 file changed, 1 insertion(+) diff --git a/CODEOWNERS b/CODEOWNERS index befcdd6fce863..62a501854ac1f 100644 --- a/CODEOWNERS +++ b/CODEOWNERS @@ -26,6 +26,7 @@ /ext/session @Girgias /ext/sockets @devnexen /ext/spl @Girgias +/ext/standard @bukka /main @bukka /sapi/fpm @bukka /Zend @iluuu1994 From 4d4b9604ca126b54ca34df07e57c28578fcf84be Mon Sep 17 00:00:00 2001 From: Florian Moser Date: Mon, 10 Apr 2023 20:12:31 +0200 Subject: [PATCH 008/168] Fix GH-11054: Reset OpenSSL errors when using a PEM public key The error happens when the PEM contains a public key, as it will be first tried to be parsed as a certificate. The parsing as a certificate fails, which then leads to a corresponding error tracked by PHP with the next call to php_openssl_store_errors(). This change introduces an error marking to be able to reset the stored errors to the state before trying the certificate. Closes GH-11055 --- NEWS | 2 + ext/openssl/openssl.c | 37 +++++++++++++++++++ ext/openssl/php_openssl.h | 1 + ext/openssl/tests/bug11054.pem | 9 +++++ ext/openssl/tests/bug11054.phpt | 15 ++++++++ .../tests/openssl_error_string_basic.phpt | 2 +- .../openssl_error_string_basic_openssl3.phpt | 6 +-- 7 files changed, 66 insertions(+), 6 deletions(-) create mode 100644 ext/openssl/tests/bug11054.pem create mode 100644 ext/openssl/tests/bug11054.phpt diff --git a/NEWS b/NEWS index b0994af905ab6..3b5b6f03c88fb 100644 --- a/NEWS +++ b/NEWS @@ -118,6 +118,8 @@ PHP NEWS - OpenSSL: . Added OPENSSL_CMS_OLDMIMETYPE and PKCS7_NOOLDMIMETYPE contants to switch between mime content types. (Daniel Kesselberg) + . Fixed GH-11054: Reset OpenSSL errors when using a PEM public key. + (Florian Moser) - PCNTL: . SA_ONSTACK is now set for pcntl_signal. (Kévin Dunglas) diff --git a/ext/openssl/openssl.c b/ext/openssl/openssl.c index f74f25c30403e..4009a5b84df1f 100644 --- a/ext/openssl/openssl.c +++ b/ext/openssl/openssl.c @@ -476,6 +476,37 @@ void php_openssl_store_errors(void) } /* }}} */ +/* {{{ php_openssl_errors_set_mark */ +void php_openssl_errors_set_mark(void) { + if (!OPENSSL_G(errors)) { + return; + } + + if (!OPENSSL_G(errors_mark)) { + OPENSSL_G(errors_mark) = pecalloc(1, sizeof(struct php_openssl_errors), 1); + } + + memcpy(OPENSSL_G(errors_mark), OPENSSL_G(errors), sizeof(struct php_openssl_errors)); +} +/* }}} */ + +/* {{{ php_openssl_errors_restore_mark */ +void php_openssl_errors_restore_mark(void) { + if (!OPENSSL_G(errors)) { + return; + } + + struct php_openssl_errors *errors = OPENSSL_G(errors); + + if (!OPENSSL_G(errors_mark)) { + errors->top = 0; + errors->bottom = 0; + } else { + memcpy(errors, OPENSSL_G(errors_mark), sizeof(struct php_openssl_errors)); + } +} +/* }}} */ + /* openssl file path check error function */ static void php_openssl_check_path_error(uint32_t arg_num, int type, const char *format, ...) { @@ -1293,6 +1324,7 @@ PHP_GINIT_FUNCTION(openssl) ZEND_TSRMLS_CACHE_UPDATE(); #endif openssl_globals->errors = NULL; + openssl_globals->errors_mark = NULL; } /* }}} */ @@ -1302,6 +1334,9 @@ PHP_GSHUTDOWN_FUNCTION(openssl) if (openssl_globals->errors) { pefree(openssl_globals->errors, 1); } + if (openssl_globals->errors_mark) { + pefree(openssl_globals->errors_mark, 1); + } } /* }}} */ @@ -3571,12 +3606,14 @@ static EVP_PKEY *php_openssl_pkey_from_zval( } /* it's an X509 file/cert of some kind, and we need to extract the data from that */ if (public_key) { + php_openssl_errors_set_mark(); cert = php_openssl_x509_from_str(Z_STR_P(val), arg_num, false, NULL); if (cert) { free_cert = 1; } else { /* not a X509 certificate, try to retrieve public key */ + php_openssl_errors_restore_mark(); BIO* in; if (is_file) { in = BIO_new_file(file_path, PHP_OPENSSL_BIO_MODE_R(PKCS7_BINARY)); diff --git a/ext/openssl/php_openssl.h b/ext/openssl/php_openssl.h index 5cfadbedc9829..304854b4bf91d 100644 --- a/ext/openssl/php_openssl.h +++ b/ext/openssl/php_openssl.h @@ -80,6 +80,7 @@ struct php_openssl_errors { ZEND_BEGIN_MODULE_GLOBALS(openssl) struct php_openssl_errors *errors; + struct php_openssl_errors *errors_mark; ZEND_END_MODULE_GLOBALS(openssl) #define OPENSSL_G(v) ZEND_MODULE_GLOBALS_ACCESSOR(openssl, v) diff --git a/ext/openssl/tests/bug11054.pem b/ext/openssl/tests/bug11054.pem new file mode 100644 index 0000000000000..60d7afa827f2c --- /dev/null +++ b/ext/openssl/tests/bug11054.pem @@ -0,0 +1,9 @@ +-----BEGIN PUBLIC KEY----- +MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAvYH14fT4DPgyffkDOrHt +x0q+rxclB48h2ykgbR3QyDG2d7hMSXjtqEseO/iR1FdAv7UevIKyHFbHpJilOIwo +mEqQNxUQCWdZsWhv7ZVfG8UUgki7LKMGPruJM97vteBS101hSCaCQz+zTyVyP8Uy +nqx5zlPmcBUA92gAFfSCa+tm/lR2BY5g/20mZX/lMY0xXV1iLhfdK6RgJYXX2SdH +YR/01IgmjgTfIp7gX+xixDgGZuZY++jo8C52udFkCf5vxyG4Ed57vRfCLFOPfeY4 +r3i0Jiply65zSo8y/6KxudRtmGOfV2qb2EsMTW9PaLs3+rnhhiYBM/nR4V5ux6u6 +DwIDAQAB +-----END PUBLIC KEY----- diff --git a/ext/openssl/tests/bug11054.phpt b/ext/openssl/tests/bug11054.phpt new file mode 100644 index 0000000000000..25f23d5b1de3e --- /dev/null +++ b/ext/openssl/tests/bug11054.phpt @@ -0,0 +1,15 @@ +--TEST-- +Bug #11054: Calling with a PEM public key results in error +--EXTENSIONS-- +openssl +--FILE-- + +--EXPECT-- +bool(false) diff --git a/ext/openssl/tests/openssl_error_string_basic.phpt b/ext/openssl/tests/openssl_error_string_basic.phpt index e4ea264b3bf1f..02e8b3fbc49d1 100644 --- a/ext/openssl/tests/openssl_error_string_basic.phpt +++ b/ext/openssl/tests/openssl_error_string_basic.phpt @@ -123,7 +123,7 @@ expect_openssl_errors('openssl_private_decrypt', ['04065072']); // public encrypt and decrypt with failed padding check and padding @openssl_public_encrypt("data", $crypted, $public_key_file, 1000); @openssl_public_decrypt("data", $crypted, $public_key_file); -expect_openssl_errors('openssl_private_(en|de)crypt padding', [$err_pem_no_start_line, '0408F090', '04067072']); +expect_openssl_errors('openssl_private_(en|de)crypt padding', ['0408F090', '04067072']); // X509 echo "X509 errors\n"; diff --git a/ext/openssl/tests/openssl_error_string_basic_openssl3.phpt b/ext/openssl/tests/openssl_error_string_basic_openssl3.phpt index d435a53e3047f..041a0a0b5648a 100644 --- a/ext/openssl/tests/openssl_error_string_basic_openssl3.phpt +++ b/ext/openssl/tests/openssl_error_string_basic_openssl3.phpt @@ -114,9 +114,6 @@ expect_openssl_errors('openssl_pkey_export_to_file write', ['10080002']); // successful export @openssl_pkey_export($private_key_file_with_pass, $out, 'wrong pwd', $options); expect_openssl_errors('openssl_pkey_export', ['1C800064', '04800065']); -// invalid x509 for getting public key -@openssl_pkey_get_public($private_key_file); -expect_openssl_errors('openssl_pkey_get_public', [$err_pem_no_start_line]); // private encrypt with unknown padding @openssl_private_encrypt("data", $crypted, $private_key_file, 1000); expect_openssl_errors('openssl_private_encrypt', ['1C8000A5']); @@ -126,7 +123,7 @@ expect_openssl_errors('openssl_private_decrypt', ['0200009F', '02000072']); // public encrypt and decrypt with failed padding check and padding @openssl_public_encrypt("data", $crypted, $public_key_file, 1000); @openssl_public_decrypt("data", $crypted, $public_key_file); -expect_openssl_errors('openssl_private_(en|de)crypt padding', [$err_pem_no_start_line, '02000076', '0200008A', '02000072', '1C880004']); +expect_openssl_errors('openssl_private_(en|de)crypt padding', ['02000076', '0200008A', '02000072', '1C880004']); // X509 echo "X509 errors\n"; @@ -170,7 +167,6 @@ openssl_pkey_export_to_file opening: ok openssl_pkey_export_to_file pem: ok openssl_pkey_export_to_file write: ok openssl_pkey_export: ok -openssl_pkey_get_public: ok openssl_private_encrypt: ok openssl_private_decrypt: ok openssl_private_(en|de)crypt padding: ok From a0e71cb8114661202c7cb20a9ef21777516bae67 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Sat, 6 May 2023 18:26:57 +0200 Subject: [PATCH 009/168] Fix maximum argument count of pcntl_forkx() Closes GH-11199. --- NEWS | 3 +++ ext/pcntl/pcntl.c | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/NEWS b/NEWS index 3f025d79687bc..198059fc72b14 100644 --- a/NEWS +++ b/NEWS @@ -14,6 +14,9 @@ PHP NEWS . Fixed bug GH-11134 (Incorrect match default branch optimization). (ilutov) . Fixed too wide OR and AND range inference. (nielsdos) +- PCNTL: + . Fixed maximum argument count of pcntl_forkx(). (nielsdos) + - PGSQL: . Fixed parameter parsing of pg_lo_export(). (kocsismate) diff --git a/ext/pcntl/pcntl.c b/ext/pcntl/pcntl.c index d040f96889ee8..c2b7f390fcafd 100644 --- a/ext/pcntl/pcntl.c +++ b/ext/pcntl/pcntl.c @@ -1295,7 +1295,7 @@ PHP_FUNCTION(pcntl_forkx) zend_long flags; pid_t pid; - ZEND_PARSE_PARAMETERS_START(1, 2) + ZEND_PARSE_PARAMETERS_START(1, 1) Z_PARAM_LONG(flags) ZEND_PARSE_PARAMETERS_END(); From 7c0dfc5cf58d3c445b935fa14ea8f5f13568c419 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Sat, 29 Apr 2023 21:07:50 +0200 Subject: [PATCH 010/168] Fix GH-11160: Few tests failed building with new libxml 2.11.0 It's possible to categorise the failures into 2 categories: - Changed error message. In this case we either duplicate the test and modify the error message. Or if the change in error message is small, we use the EXPECTF matchers to make the test compatible with both old and new versions of libxml2. - Missing warnings. This is caused by a change in libxml2 where the parser started using SAX APIs internally [1]. In this case the error_type passed to php_libxml_internal_error_handler() changed from PHP_LIBXML_ERROR to PHP_LIBXML_CTX_WARNING because it internally started to use the SAX handlers instead of the generic handlers. However, for the SAX handlers the current input stack is empty, so nothing is actually printed. I fixed this by falling back to a regular warning without a filename & line number reference, which mimicks the old behaviour. Furthermore, this change now also shows an additional warning in a test which was previously hidden. [1] https://gitlab.gnome.org/GNOME/libxml2/-/commit/9a82b94a94bd310db426edd453b0f38c6c8f69f5 Closes GH-11162. --- NEWS | 4 + .../DOMDocument_loadXML_error2_gte2_11.phpt | 34 +++++++ ...> DOMDocument_loadXML_error2_pre2_11.phpt} | 4 + .../DOMDocument_load_error2_gte2_11.phpt | 34 +++++++ ...t => DOMDocument_load_error2_pre2_11.phpt} | 4 + ext/libxml/libxml.c | 2 + ext/libxml/tests/bug61367-read_2.phpt | 2 +- .../tests/libxml_disable_entity_loader_2.phpt | 2 +- ...set_external_entity_loader_variation2.phpt | 2 + ext/xml/tests/bug26614_libxml_gte2_11.phpt | 95 +++++++++++++++++++ ...bxml.phpt => bug26614_libxml_pre2_11.phpt} | 1 + 11 files changed, 182 insertions(+), 2 deletions(-) create mode 100644 ext/dom/tests/DOMDocument_loadXML_error2_gte2_11.phpt rename ext/dom/tests/{DOMDocument_loadXML_error2.phpt => DOMDocument_loadXML_error2_pre2_11.phpt} (90%) create mode 100644 ext/dom/tests/DOMDocument_load_error2_gte2_11.phpt rename ext/dom/tests/{DOMDocument_load_error2.phpt => DOMDocument_load_error2_pre2_11.phpt} (90%) create mode 100644 ext/xml/tests/bug26614_libxml_gte2_11.phpt rename ext/xml/tests/{bug26614_libxml.phpt => bug26614_libxml_pre2_11.phpt} (96%) diff --git a/NEWS b/NEWS index 0028d6944f82c..b61cfd4cdcbbc 100644 --- a/NEWS +++ b/NEWS @@ -8,6 +8,10 @@ PHP NEWS . Fixed bug GH-11189 (Exceeding memory limit in zend_hash_do_resize leaves the array in an invalid state). (Bob) +- LibXML: + . Fixed bug GH-11160 (Few tests failed building with new libxml 2.11.0). + (nielsdos) + - Opcache: . Fixed bug GH-11134 (Incorrect match default branch optimization). (ilutov) . Fixed too wide OR and AND range inference. (nielsdos) diff --git a/ext/dom/tests/DOMDocument_loadXML_error2_gte2_11.phpt b/ext/dom/tests/DOMDocument_loadXML_error2_gte2_11.phpt new file mode 100644 index 0000000000000..ff5ceb3fbed53 --- /dev/null +++ b/ext/dom/tests/DOMDocument_loadXML_error2_gte2_11.phpt @@ -0,0 +1,34 @@ +--TEST-- +Test DOMDocument::loadXML() detects not-well formed XML +--SKIPIF-- += 2.11'); +?> +--DESCRIPTION-- +This test verifies the method detects attributes values not closed between " or ' +Environment variables used in the test: +- XML_FILE: the xml file to load +- LOAD_OPTIONS: the second parameter to pass to the method +- EXPECTED_RESULT: the expected result +--CREDITS-- +Antonio Diaz Ruiz +--INI-- +assert.bail=true +--EXTENSIONS-- +dom +--ENV-- +XML_FILE=/not_well_formed2.xml +LOAD_OPTIONS=0 +EXPECTED_RESULT=0 +--FILE_EXTERNAL-- +domdocumentloadxml_test_method.inc +--EXPECTF-- +Warning: DOMDocument::loadXML(): AttValue: " or ' expected in Entity, line: 4 in %s on line %d + +Warning: DOMDocument::loadXML(): internal error: xmlParseStartTag: problem parsing attributes in Entity, line: 4 in %s on line %d + +Warning: DOMDocument::loadXML(): Couldn't find end of Start Tag book line 4 in Entity, line: 4 in %s on line %d + +Warning: DOMDocument::loadXML(): Opening and ending tag mismatch: books line 3 and book in Entity, line: 7 in %s on line %d + +Warning: DOMDocument::loadXML(): Extra content at the end of the document in Entity, line: 8 in %s on line %d diff --git a/ext/dom/tests/DOMDocument_loadXML_error2.phpt b/ext/dom/tests/DOMDocument_loadXML_error2_pre2_11.phpt similarity index 90% rename from ext/dom/tests/DOMDocument_loadXML_error2.phpt rename to ext/dom/tests/DOMDocument_loadXML_error2_pre2_11.phpt index a24d5215da48b..c826386f7a4e1 100644 --- a/ext/dom/tests/DOMDocument_loadXML_error2.phpt +++ b/ext/dom/tests/DOMDocument_loadXML_error2_pre2_11.phpt @@ -1,5 +1,9 @@ --TEST-- Test DOMDocument::loadXML() detects not-well formed XML +--SKIPIF-- += 21100) die('skip libxml2 test variant for version < 2.11'); +?> --DESCRIPTION-- This test verifies the method detects attributes values not closed between " or ' Environment variables used in the test: diff --git a/ext/dom/tests/DOMDocument_load_error2_gte2_11.phpt b/ext/dom/tests/DOMDocument_load_error2_gte2_11.phpt new file mode 100644 index 0000000000000..32b6bf161142e --- /dev/null +++ b/ext/dom/tests/DOMDocument_load_error2_gte2_11.phpt @@ -0,0 +1,34 @@ +--TEST-- +Test DOMDocument::load() detects not-well formed +--SKIPIF-- += 2.11'); +?> +--DESCRIPTION-- +This test verifies the method detects attributes values not closed between " or ' +Environment variables used in the test: +- XML_FILE: the xml file to load +- LOAD_OPTIONS: the second parameter to pass to the method +- EXPECTED_RESULT: the expected result +--CREDITS-- +Antonio Diaz Ruiz +--INI-- +assert.bail=true +--EXTENSIONS-- +dom +--ENV-- +XML_FILE=/not_well_formed2.xml +LOAD_OPTIONS=0 +EXPECTED_RESULT=0 +--FILE_EXTERNAL-- +domdocumentload_test_method.inc +--EXPECTF-- +Warning: DOMDocument::load(): AttValue: " or ' expected in %s on line %d + +Warning: DOMDocument::load(): internal error: xmlParseStartTag: problem parsing attributes in %s on line %d + +Warning: DOMDocument::load(): Couldn't find end of Start Tag book line 4 in %s on line %d + +Warning: DOMDocument::load(): Opening and ending tag mismatch: books line 3 and book in %s on line %d + +Warning: DOMDocument::load(): Extra content at the end of the document in %s on line %d diff --git a/ext/dom/tests/DOMDocument_load_error2.phpt b/ext/dom/tests/DOMDocument_load_error2_pre2_11.phpt similarity index 90% rename from ext/dom/tests/DOMDocument_load_error2.phpt rename to ext/dom/tests/DOMDocument_load_error2_pre2_11.phpt index cd13b3f901b27..695740be9ca92 100644 --- a/ext/dom/tests/DOMDocument_load_error2.phpt +++ b/ext/dom/tests/DOMDocument_load_error2_pre2_11.phpt @@ -1,5 +1,9 @@ --TEST-- Test DOMDocument::load() detects not-well formed XML +--SKIPIF-- += 21100) die('skip libxml2 test variant for version < 2.11'); +?> --DESCRIPTION-- This test verifies the method detects attributes values not closed between " or ' Environment variables used in the test: diff --git a/ext/libxml/libxml.c b/ext/libxml/libxml.c index b4099bb7b8227..3959b362a0e40 100644 --- a/ext/libxml/libxml.c +++ b/ext/libxml/libxml.c @@ -527,6 +527,8 @@ static void php_libxml_ctx_error_level(int level, void *ctx, const char *msg) } else { php_error_docref(NULL, level, "%s in Entity, line: %d", msg, parser->input->line); } + } else { + php_error_docref(NULL, E_WARNING, "%s", msg); } } diff --git a/ext/libxml/tests/bug61367-read_2.phpt b/ext/libxml/tests/bug61367-read_2.phpt index 8adad1ce429fa..38f12949bcbb2 100644 --- a/ext/libxml/tests/bug61367-read_2.phpt +++ b/ext/libxml/tests/bug61367-read_2.phpt @@ -56,6 +56,6 @@ bool(true) int(4) bool(true) -Warning: DOMDocument::loadXML(): I/O warning : failed to load external entity "file:///%s/test_bug_61367-read/bad" in %s on line %d +Warning: DOMDocument::loadXML(): %Sfailed to load external entity "file:///%s/test_bug_61367-read/bad" in %s on line %d Warning: Attempt to read property "nodeValue" on null in %s on line %d diff --git a/ext/libxml/tests/libxml_disable_entity_loader_2.phpt b/ext/libxml/tests/libxml_disable_entity_loader_2.phpt index ad253171625f9..182fe13cfda96 100644 --- a/ext/libxml/tests/libxml_disable_entity_loader_2.phpt +++ b/ext/libxml/tests/libxml_disable_entity_loader_2.phpt @@ -39,6 +39,6 @@ bool(true) Deprecated: Function libxml_disable_entity_loader() is deprecated in %s on line %d bool(false) -Warning: DOMDocument::loadXML(): I/O warning : failed to load external entity "%s" in %s on line %d +Warning: DOMDocument::loadXML(): %Sfailed to load external entity "%s" in %s on line %d bool(true) Done diff --git a/ext/libxml/tests/libxml_set_external_entity_loader_variation2.phpt b/ext/libxml/tests/libxml_set_external_entity_loader_variation2.phpt index 9ebf2c0e9d32a..5657b727bacd2 100644 --- a/ext/libxml/tests/libxml_set_external_entity_loader_variation2.phpt +++ b/ext/libxml/tests/libxml_set_external_entity_loader_variation2.phpt @@ -39,6 +39,8 @@ echo "Done.\n"; string(10) "-//FOO/BAR" string(%d) "%sfoobar.dtd" +Warning: DOMDocument::validate(): Failed to load external entity "-//FOO/BAR" in %s on line %d + Warning: DOMDocument::validate(): Could not load the external subset "foobar.dtd" in %s on line %d bool(false) bool(true) diff --git a/ext/xml/tests/bug26614_libxml_gte2_11.phpt b/ext/xml/tests/bug26614_libxml_gte2_11.phpt new file mode 100644 index 0000000000000..9a81b67686d14 --- /dev/null +++ b/ext/xml/tests/bug26614_libxml_gte2_11.phpt @@ -0,0 +1,95 @@ +--TEST-- +Bug #26614 (CDATA sections skipped on line count) +--EXTENSIONS-- +xml +--SKIPIF-- += 2.11'); +?> +--FILE-- + + + +'; + +// Case 2: replace some characters so that we get comments instead +$xmls["Comment"] =' + + +'; + +// Case 3: replace even more characters so that only textual data is left +$xmls["Text"] =' + +-!-- ATA[ +multi +line +CDATA +block +--- +'; + +function startElement($parser, $name, $attrs) { + printf("<$name> at line %d, col %d (byte %d)\n", + xml_get_current_line_number($parser), + xml_get_current_column_number($parser), + xml_get_current_byte_index($parser)); +} + +function endElement($parser, $name) { + printf(" at line %d, col %d (byte %d)\n", + xml_get_current_line_number($parser), + xml_get_current_column_number($parser), + xml_get_current_byte_index($parser)); +} + +function characterData($parser, $data) { + // dummy +} + +foreach ($xmls as $desc => $xml) { + echo "$desc\n"; + $xml_parser = xml_parser_create(); + xml_set_element_handler($xml_parser, "startElement", "endElement"); + xml_set_character_data_handler($xml_parser, "characterData"); + if (!xml_parse($xml_parser, $xml, true)) + echo "Error: ".xml_error_string(xml_get_error_code($xml_parser))."\n"; + xml_parser_free($xml_parser); +} +?> +--EXPECTF-- +CDATA + at line 2, col %d (byte 50) + at line 9, col %d (byte 96) +Comment + at line 2, col %d (byte 50) + at line 9, col %d (byte 96) +Text + at line 2, col %d (byte 50) + at line 9, col %d (byte 96) diff --git a/ext/xml/tests/bug26614_libxml.phpt b/ext/xml/tests/bug26614_libxml_pre2_11.phpt similarity index 96% rename from ext/xml/tests/bug26614_libxml.phpt rename to ext/xml/tests/bug26614_libxml_pre2_11.phpt index 6acf2c44b2a66..c581a08e9b8fb 100644 --- a/ext/xml/tests/bug26614_libxml.phpt +++ b/ext/xml/tests/bug26614_libxml_pre2_11.phpt @@ -5,6 +5,7 @@ xml --SKIPIF-- = 21100) die('skip libxml2 test variant for version < 2.11'); ?> --FILE-- Date: Sun, 7 May 2023 13:17:19 +0200 Subject: [PATCH 011/168] Correctly copy lineno for zval asts (#11203) The comment was incorrect. Zval ASTs store their lineno in u2, but u2 does not get copied in ZVAL_COPY. This triggers use-of-uninitialized errors with MSAN. Unfortunately, I don't have a simple reproducer. --- Zend/zend_ast.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Zend/zend_ast.c b/Zend/zend_ast.c index 6d5c5aaa44635..70d01bb9f85f3 100644 --- a/Zend/zend_ast.c +++ b/Zend/zend_ast.c @@ -1063,8 +1063,8 @@ static void* ZEND_FASTCALL zend_ast_tree_copy(zend_ast *ast, void *buf) new->kind = ZEND_AST_ZVAL; new->attr = ast->attr; ZVAL_COPY(&new->val, zend_ast_get_zval(ast)); + Z_LINENO(new->val) = zend_ast_get_lineno(ast); buf = (void*)((char*)buf + sizeof(zend_ast_zval)); - // Lineno gets copied with ZVAL_COPY } else if (ast->kind == ZEND_AST_CONSTANT) { zend_ast_zval *new = (zend_ast_zval*)buf; new->kind = ZEND_AST_CONSTANT; From 1820c421f172fdbc37c83af61f4fdd45d9e6c50a Mon Sep 17 00:00:00 2001 From: George Peter Banyard Date: Sun, 7 May 2023 15:00:30 +0100 Subject: [PATCH 012/168] Prevent unnecessary string duplication in assert() (#11031) --- ext/standard/assert.c | 10 +++--- .../assert/assert_closures_multiple.phpt | 35 +++++++++++++++++++ 2 files changed, 40 insertions(+), 5 deletions(-) create mode 100644 ext/standard/tests/assert/assert_closures_multiple.phpt diff --git a/ext/standard/assert.c b/ext/standard/assert.c index 0b43033dd4d30..74467eb433f03 100644 --- a/ext/standard/assert.c +++ b/ext/standard/assert.c @@ -34,8 +34,6 @@ ZEND_DECLARE_MODULE_GLOBALS(assert) #define ASSERTG(v) ZEND_MODULE_GLOBALS_ACCESSOR(assert, v) -#define SAFE_STRING(s) ((s)?(s):"") - PHPAPI zend_class_entry *assertion_error_ce; static PHP_INI_MH(OnChangeCallback) /* {{{ */ @@ -151,9 +149,12 @@ PHP_FUNCTION(assert) zval args[4]; zval retval; uint32_t lineno = zend_get_executed_lineno(); - const char *filename = zend_get_executed_filename(); + zend_string *filename = zend_get_executed_filename_ex(); + if (UNEXPECTED(!filename)) { + filename = ZSTR_KNOWN(ZEND_STR_UNKNOWN_CAPITALIZED); + } - ZVAL_STRING(&args[0], SAFE_STRING(filename)); + ZVAL_STR(&args[0], filename); ZVAL_LONG(&args[1], lineno); ZVAL_NULL(&args[2]); @@ -166,7 +167,6 @@ PHP_FUNCTION(assert) call_user_function(NULL, NULL, &ASSERTG(callback), &retval, 3, args); } - zval_ptr_dtor(&args[0]); zval_ptr_dtor(&retval); } diff --git a/ext/standard/tests/assert/assert_closures_multiple.phpt b/ext/standard/tests/assert/assert_closures_multiple.phpt new file mode 100644 index 0000000000000..e63c355299f39 --- /dev/null +++ b/ext/standard/tests/assert/assert_closures_multiple.phpt @@ -0,0 +1,35 @@ +--TEST-- +assert() asserting multiple with callback +--INI-- +assert.active = 1 +assert.warning = 1 +assert.bail = 0 +assert.exception=1 +--FILE-- + +DONE +--EXPECT-- +DONE From 646f54b59400135353f584d34295faba838dce85 Mon Sep 17 00:00:00 2001 From: George Peter Banyard Date: Sat, 6 May 2023 10:56:29 +0100 Subject: [PATCH 013/168] ext/standard/array.c: use uint32_t instead of incorrect int type Drive-by indentation fixes and bool usage --- ext/standard/array.c | 96 ++++++++++++++++++++++---------------------- 1 file changed, 48 insertions(+), 48 deletions(-) diff --git a/ext/standard/array.c b/ext/standard/array.c index 5399706320d19..73dd0adac9a9c 100644 --- a/ext/standard/array.c +++ b/ext/standard/array.c @@ -126,25 +126,25 @@ static zend_never_inline ZEND_COLD int stable_sort_fallback(Bucket *a, Bucket *b static zend_always_inline int php_array_key_compare_unstable_i(Bucket *f, Bucket *s) /* {{{ */ { - zval first; - zval second; - - if (f->key == NULL && s->key == NULL) { - return (zend_long)f->h > (zend_long)s->h ? 1 : -1; - } else if (f->key && s->key) { - return zendi_smart_strcmp(f->key, s->key); - } - if (f->key) { - ZVAL_STR(&first, f->key); - } else { - ZVAL_LONG(&first, f->h); - } - if (s->key) { - ZVAL_STR(&second, s->key); - } else { - ZVAL_LONG(&second, s->h); - } - return zend_compare(&first, &second); + zval first; + zval second; + + if (f->key == NULL && s->key == NULL) { + return (zend_long)f->h > (zend_long)s->h ? 1 : -1; + } else if (f->key && s->key) { + return zendi_smart_strcmp(f->key, s->key); + } + if (f->key) { + ZVAL_STR(&first, f->key); + } else { + ZVAL_LONG(&first, f->h); + } + if (s->key) { + ZVAL_STR(&second, s->key); + } else { + ZVAL_LONG(&second, s->h); + } + return zend_compare(&first, &second); } /* }}} */ @@ -1210,7 +1210,7 @@ static int php_data_compare(const void *f, const void *s) /* {{{ */ Return the lowest value in an array or a series of arguments */ PHP_FUNCTION(min) { - int argc; + uint32_t argc; zval *args = NULL; ZEND_PARSE_PARAMETERS_START(1, -1) @@ -1234,7 +1234,7 @@ PHP_FUNCTION(min) } else { /* mixed min ( mixed $value1 , mixed $value2 [, mixed $value3... ] ) */ zval *min, result; - int i; + uint32_t i; min = &args[0]; @@ -1257,7 +1257,7 @@ PHP_FUNCTION(min) PHP_FUNCTION(max) { zval *args = NULL; - int argc; + uint32_t argc; ZEND_PARSE_PARAMETERS_START(1, -1) Z_PARAM_VARIADIC('+', args, argc) @@ -1280,7 +1280,7 @@ PHP_FUNCTION(max) } else { /* mixed max ( mixed $value1 , mixed $value2 [, mixed $value3... ] ) */ zval *max, result; - int i; + uint32_t i; max = &args[0]; @@ -2525,7 +2525,7 @@ static void php_compact_var(HashTable *eg_active_symbol_table, zval *return_valu php_error_docref(NULL, E_WARNING, "Undefined variable $%s", ZSTR_VAL(Z_STR_P(entry))); } } else if (Z_TYPE_P(entry) == IS_ARRAY) { - if (Z_REFCOUNTED_P(entry)) { + if (Z_REFCOUNTED_P(entry)) { if (Z_IS_RECURSIVE_P(entry)) { zend_throw_error(NULL, "Recursion detected"); return; @@ -2535,7 +2535,7 @@ static void php_compact_var(HashTable *eg_active_symbol_table, zval *return_valu ZEND_HASH_FOREACH_VAL(Z_ARRVAL_P(entry), value_ptr) { php_compact_var(eg_active_symbol_table, return_value, value_ptr, pos); } ZEND_HASH_FOREACH_END(); - if (Z_REFCOUNTED_P(entry)) { + if (Z_REFCOUNTED_P(entry)) { Z_UNPROTECT_RECURSION_P(entry); } } else { @@ -3179,8 +3179,7 @@ PHP_FUNCTION(array_push) zval *args, /* Function arguments array */ *stack, /* Input array */ new_var; /* Variable to be pushed */ - int i, /* Loop counter */ - argc; /* Number of function arguments */ + uint32_t argc; /* Number of function arguments */ ZEND_PARSE_PARAMETERS_START(1, -1) @@ -3189,7 +3188,7 @@ PHP_FUNCTION(array_push) ZEND_PARSE_PARAMETERS_END(); /* For each subsequent argument, make it a reference, increase refcount, and add it to the end of the array */ - for (i = 0; i < argc; i++) { + for (uint32_t i = 0; i < argc; i++) { ZVAL_COPY(&new_var, &args[i]); if (zend_hash_next_index_insert(Z_ARRVAL_P(stack), &new_var) == NULL) { @@ -3387,8 +3386,7 @@ PHP_FUNCTION(array_unshift) zval *args, /* Function arguments array */ *stack; /* Input stack */ HashTable new_hash; /* New hashtable for the stack */ - int argc; /* Number of function arguments */ - int i; + uint32_t argc; /* Number of function arguments */ zend_string *key; zval *value; @@ -3398,7 +3396,7 @@ PHP_FUNCTION(array_unshift) ZEND_PARSE_PARAMETERS_END(); zend_hash_init(&new_hash, zend_hash_num_elements(Z_ARRVAL_P(stack)) + argc, NULL, ZVAL_PTR_DTOR, 0); - for (i = 0; i < argc; i++) { + for (uint32_t i = 0; i < argc; i++) { Z_TRY_ADDREF(args[i]); zend_hash_next_index_insert_new(&new_hash, &args[i]); } @@ -3818,8 +3816,8 @@ PHPAPI int php_array_replace_recursive(HashTable *dest, HashTable *src) /* {{{ * dest_zval = dest_entry; ZVAL_DEREF(dest_zval); if (Z_IS_RECURSIVE_P(dest_zval) || - Z_IS_RECURSIVE_P(src_zval) || - (Z_ISREF_P(src_entry) && Z_ISREF_P(dest_entry) && Z_REF_P(src_entry) == Z_REF_P(dest_entry) && (Z_REFCOUNT_P(dest_entry) % 2))) { + Z_IS_RECURSIVE_P(src_zval) || + (Z_ISREF_P(src_entry) && Z_ISREF_P(dest_entry) && Z_REF_P(src_entry) == Z_REF_P(dest_entry) && (Z_REFCOUNT_P(dest_entry) % 2))) { zend_throw_error(NULL, "Recursion detected"); return 0; } @@ -3857,7 +3855,7 @@ static zend_always_inline void php_array_replace_wrapper(INTERNAL_FUNCTION_PARAM { zval *args = NULL; zval *arg; - int argc, i; + uint32_t argc, i; HashTable *dest; ZEND_PARSE_PARAMETERS_START(1, -1) @@ -3907,7 +3905,7 @@ static zend_always_inline void php_array_merge_wrapper(INTERNAL_FUNCTION_PARAMET { zval *args = NULL; zval *arg; - int argc, i; + uint32_t argc, i; zval *src_entry; HashTable *src, *dest; uint32_t count = 0; @@ -4717,7 +4715,7 @@ static int zval_user_compare(zval *a, zval *b) /* {{{ */ static void php_array_intersect_key(INTERNAL_FUNCTION_PARAMETERS, int data_compare_type) /* {{{ */ { - int argc, i; + uint32_t argc, i; zval *args; int (*intersect_data_compare_func)(zval *, zval *) = NULL; bool ok; @@ -4799,7 +4797,8 @@ static void php_array_intersect(INTERNAL_FUNCTION_PARAMETERS, int behavior, int { zval *args = NULL; HashTable *hash; - int arr_argc, i, c = 0; + uint32_t arr_argc, i; + int c = 0; uint32_t idx; Bucket **lists, *list, **ptrs, *p; char *param_spec; @@ -5117,7 +5116,7 @@ PHP_FUNCTION(array_uintersect_uassoc) static void php_array_diff_key(INTERNAL_FUNCTION_PARAMETERS, int data_compare_type) /* {{{ */ { - int argc, i; + uint32_t argc, i; zval *args; int (*diff_data_compare_func)(zval *, zval *) = NULL; bool ok; @@ -5194,7 +5193,8 @@ static void php_array_diff(INTERNAL_FUNCTION_PARAMETERS, int behavior, int data_ { zval *args = NULL; HashTable *hash; - int arr_argc, i, c; + uint32_t arr_argc, i; + int c; uint32_t idx; Bucket **lists, *list, **ptrs, *p; char *param_spec; @@ -5460,7 +5460,7 @@ PHP_FUNCTION(array_diff_ukey) PHP_FUNCTION(array_diff) { zval *args; - int argc, i; + uint32_t argc, i; uint32_t num; HashTable exclude; zval *value; @@ -5661,15 +5661,15 @@ PHP_FUNCTION(array_multisort) zval* args; zval** arrays; Bucket** indirect; - uint32_t idx; + uint32_t idx; HashTable* hash; - int argc; - int array_size; - int num_arrays = 0; + uint32_t argc; + uint32_t array_size; + uint32_t num_arrays = 0; int parse_state[MULTISORT_LAST]; /* 0 - flag not allowed 1 - flag allowed */ int sort_order = PHP_SORT_ASC; int sort_type = PHP_SORT_REGULAR; - int i, k, n; + uint32_t i, k, n; bucket_compare_func_t *func; ZEND_PARSE_PARAMETERS_START(1, -1) @@ -5755,7 +5755,7 @@ PHP_FUNCTION(array_multisort) /* Make sure the arrays are of the same size. */ array_size = zend_hash_num_elements(Z_ARRVAL_P(arrays[0])); for (i = 1; i < num_arrays; i++) { - if (zend_hash_num_elements(Z_ARRVAL_P(arrays[i])) != (uint32_t)array_size) { + if (zend_hash_num_elements(Z_ARRVAL_P(arrays[i])) != array_size) { zend_value_error("Array sizes are inconsistent"); MULTISORT_ABORT; } @@ -5815,14 +5815,14 @@ PHP_FUNCTION(array_multisort) ZVAL_COPY_VALUE(&hash->arPacked[k], &indirect[k][i].val); } } else { - int repack = 1; + bool repack = true; for (n = 0, k = 0; k < array_size; k++) { hash->arData[k] = indirect[k][i]; if (hash->arData[k].key == NULL) { hash->arData[k].h = n++; } else { - repack = 0; + repack = false; } } if (repack) { From baa07f3de376fb9f28f88a062e8686d99b66bca1 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Sat, 6 May 2023 18:23:24 +0200 Subject: [PATCH 014/168] Fix GH-11180: hash_file() appears to be restricted to 3 arguments Closes GH-11198. --- NEWS | 4 ++++ ext/hash/hash.c | 2 +- ext/hash/tests/hash_file_basic1.phpt | 12 ++++++++++++ 3 files changed, 17 insertions(+), 1 deletion(-) diff --git a/NEWS b/NEWS index b61cfd4cdcbbc..bebc691d019fe 100644 --- a/NEWS +++ b/NEWS @@ -8,6 +8,10 @@ PHP NEWS . Fixed bug GH-11189 (Exceeding memory limit in zend_hash_do_resize leaves the array in an invalid state). (Bob) +- Hash: + . Fixed bug GH-11180 (hash_file() appears to be restricted to 3 arguments). + (nielsdos) + - LibXML: . Fixed bug GH-11160 (Few tests failed building with new libxml 2.11.0). (nielsdos) diff --git a/ext/hash/hash.c b/ext/hash/hash.c index aa80f7429fa58..5b33d946376b3 100644 --- a/ext/hash/hash.c +++ b/ext/hash/hash.c @@ -449,7 +449,7 @@ PHP_FUNCTION(hash_file) bool raw_output = 0; HashTable *args = NULL; - ZEND_PARSE_PARAMETERS_START(2, 3) + ZEND_PARSE_PARAMETERS_START(2, 4) Z_PARAM_STR(algo) Z_PARAM_STRING(data, data_len) Z_PARAM_OPTIONAL diff --git a/ext/hash/tests/hash_file_basic1.phpt b/ext/hash/tests/hash_file_basic1.phpt index 9ffe9a2f6cb01..334bad8c2726a 100644 --- a/ext/hash/tests/hash_file_basic1.phpt +++ b/ext/hash/tests/hash_file_basic1.phpt @@ -42,6 +42,12 @@ echo "sha512: " . hash_file('sha512', $file). "\n"; echo "snefru: " . hash_file('snefru', $file). "\n"; echo "tiger192,3: " . hash_file('tiger192,3', $file). "\n"; echo "whirlpool: " . hash_file('whirlpool', $file). "\n"; +echo "murmur3a: " . hash_file('murmur3a', $file). "\n"; +echo "murmur3a: " . hash_file('murmur3a', $file, false, ['seed' => 1234]). "\n"; +echo "murmur3c: " . hash_file('murmur3c', $file). "\n"; +echo "murmur3c: " . hash_file('murmur3c', $file, false, ['seed' => 1234]). "\n"; +echo "murmur3f: " . hash_file('murmur3f', $file). "\n"; +echo "murmur3f: " . hash_file('murmur3f', $file, false, ['seed' => 1234]). "\n"; echo "adler32(raw): " . bin2hex(hash_file('adler32', $file, TRUE)) . "\n"; echo "md5(raw): " . bin2hex(hash_file('md5', $file, TRUE)). "\n"; @@ -70,6 +76,12 @@ sha512: 1f42adaf938fbf136e381b164bae5f984c7f9fe60c82728bd889c14f187c7d63e81a0305 snefru: d414b2345d3e7fa1a31c044cf334bfc1fec24d89e464411998d579d24663895f tiger192,3: 7acf4ebea075fac6fc8ea0e2b4af3cfa71b9460e4c53403a whirlpool: 4248b149e000477269a4a5f1a84d97cfc3d0199b7aaf505913e6f010a6f83276029d11a9ad545374bc710eb59c7d958985023ab886ffa9ec9a23852844c764ec +murmur3a: bc6554c8 +murmur3a: 432e4379 +murmur3c: 8779de509ffc06fb27bcf5fc861504d6 +murmur3c: b43afac65c38a617323020432c170005 +murmur3f: 2b84cd546b2f18a9ab6f893194224afd +murmur3f: 6cc7716646664d6a83d68cb6563ac38e adler32(raw): ff87222e md5(raw): 704bf818448f5bbb94061332d2c889aa sha256(raw): a0f5702fa5d3670b80033d668e8732b70550392abb53841355447f8bb0f72245 From 85338569debd3f669ef5bc793822b2d9f3f1b1ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A1t=C3=A9=20Kocsis?= Date: Sun, 7 May 2023 10:19:06 +0200 Subject: [PATCH 015/168] Narrow bool return types to true when possible --- ext/intl/calendar/calendar.stub.php | 14 +++++++------- ext/intl/calendar/calendar_arginfo.h | 2 +- ext/intl/php_intl.stub.php | 14 +++++++------- ext/intl/php_intl_arginfo.h | 14 +++++++------- ext/pgsql/pgsql.stub.php | 4 ++-- ext/pgsql/pgsql_arginfo.h | 10 ++++++---- ext/snmp/snmp.stub.php | 8 ++++---- ext/snmp/snmp_arginfo.h | 12 +++++++----- ext/spl/spl_array.stub.php | 24 ++++++++++++------------ ext/spl/spl_array_arginfo.h | 13 +++++++------ ext/spl/spl_heap.stub.php | 2 +- ext/spl/spl_heap_arginfo.h | 2 +- ext/standard/basic_functions.stub.php | 6 +++--- ext/standard/basic_functions_arginfo.h | 13 ++++--------- 14 files changed, 69 insertions(+), 69 deletions(-) diff --git a/ext/intl/calendar/calendar.stub.php b/ext/intl/calendar/calendar.stub.php index 97807b139e0d4..748d5eaf25285 100644 --- a/ext/intl/calendar/calendar.stub.php +++ b/ext/intl/calendar/calendar.stub.php @@ -244,7 +244,7 @@ public function after(IntlCalendar $other): bool {} public function before(IntlCalendar $other): bool {} /** - * @return bool + * @return true * @alias intlcal_clear */ public function clear(?int $field = null) {} // TODO make return type void @@ -341,7 +341,7 @@ public function getMaximum(int $field): int|false {} public function getMinimalDaysInFirstWeek(): int|false {} /** - * @return bool + * @return true * @alias intlcal_set_minimal_days_in_first_week */ public function setMinimalDaysInFirstWeek(int $days) {} // TODO make return void @@ -432,31 +432,31 @@ public function roll(int $field, $value): bool {} public function isSet(int $field): bool {} /** - * @return bool + * @return true * @alias intlcal_set */ public function set(int $year, int $month, int $dayOfMonth = UNKNOWN, int $hour = UNKNOWN, int $minute = UNKNOWN, int $second = UNKNOWN) {} // TODO make return type void /** - * @return bool + * @return true * @alias intlcal_set_first_day_of_week */ public function setFirstDayOfWeek(int $dayOfWeek) {} // TODO make return type void /** - * @return bool + * @return true * @alias intlcal_set_lenient */ public function setLenient(bool $lenient) {} // TODO make return type void /** - * @return bool + * @return true * @alias intlcal_set_repeated_wall_time_option */ public function setRepeatedWallTimeOption(int $option) {} // TODO make return type void /** - * @return bool + * @return true * @alias intlcal_set_skipped_wall_time_option */ public function setSkippedWallTimeOption(int $option) {} // TODO make return type void diff --git a/ext/intl/calendar/calendar_arginfo.h b/ext/intl/calendar/calendar_arginfo.h index 6173d1283806b..b67149915b9d3 100644 --- a/ext/intl/calendar/calendar_arginfo.h +++ b/ext/intl/calendar/calendar_arginfo.h @@ -1,5 +1,5 @@ /* This is a generated file, edit the .stub.php file instead. - * Stub hash: ef1b6e5e8ff6890ccb176c08c22499048afdfdb5 */ + * Stub hash: c1d451a668ccab343208ab5cc30ab8457d6802b9 */ ZEND_BEGIN_ARG_INFO_EX(arginfo_class_IntlCalendar___construct, 0, 0, 0) ZEND_END_ARG_INFO() diff --git a/ext/intl/php_intl.stub.php b/ext/intl/php_intl.stub.php index 8980a807919b9..eab42fcc0ff57 100644 --- a/ext/intl/php_intl.stub.php +++ b/ext/intl/php_intl.stub.php @@ -201,12 +201,12 @@ function intlcal_after(IntlCalendar $calendar, IntlCalendar $other): bool {} function intlcal_before(IntlCalendar $calendar, IntlCalendar $other): bool {} -function intlcal_set(IntlCalendar $calendar, int $year, int $month, int $dayOfMonth = UNKNOWN, int $hour = UNKNOWN, int $minute = UNKNOWN, int $second = UNKNOWN): bool {} +function intlcal_set(IntlCalendar $calendar, int $year, int $month, int $dayOfMonth = UNKNOWN, int $hour = UNKNOWN, int $minute = UNKNOWN, int $second = UNKNOWN): true {} /** @param int|bool $value */ function intlcal_roll(IntlCalendar $calendar, int $field, $value): bool {} -function intlcal_clear(IntlCalendar $calendar, ?int $field = null): bool {} +function intlcal_clear(IntlCalendar $calendar, ?int $field = null): true {} function intlcal_field_difference(IntlCalendar $calendar, float $timestamp, int $field): int|false {} @@ -228,7 +228,7 @@ function intlcal_get_maximum(IntlCalendar $calendar, int $field): int|false {} function intlcal_get_minimal_days_in_first_week(IntlCalendar $calendar): int|false {} -function intlcal_set_minimal_days_in_first_week(IntlCalendar $calendar, int $days): bool {} +function intlcal_set_minimal_days_in_first_week(IntlCalendar $calendar, int $days): true {} function intlcal_get_minimum(IntlCalendar $calendar, int $field): int|false {} @@ -248,9 +248,9 @@ function intlcal_is_equivalent_to(IntlCalendar $calendar, IntlCalendar $other): function intlcal_is_weekend(IntlCalendar $calendar, ?float $timestamp = null): bool {} -function intlcal_set_first_day_of_week(IntlCalendar $calendar, int $dayOfWeek): bool {} +function intlcal_set_first_day_of_week(IntlCalendar $calendar, int $dayOfWeek): true {} -function intlcal_set_lenient(IntlCalendar $calendar, bool $lenient): bool {} +function intlcal_set_lenient(IntlCalendar $calendar, bool $lenient): true {} function intlcal_get_repeated_wall_time_option(IntlCalendar $calendar): int {} @@ -258,9 +258,9 @@ function intlcal_equals(IntlCalendar $calendar, IntlCalendar $other): bool {} function intlcal_get_skipped_wall_time_option(IntlCalendar $calendar): int {} -function intlcal_set_repeated_wall_time_option(IntlCalendar $calendar, int $option): bool {} +function intlcal_set_repeated_wall_time_option(IntlCalendar $calendar, int $option): true {} -function intlcal_set_skipped_wall_time_option(IntlCalendar $calendar, int $option): bool {} +function intlcal_set_skipped_wall_time_option(IntlCalendar $calendar, int $option): true {} function intlcal_from_date_time(DateTime|string $datetime, ?string $locale = null): ?IntlCalendar {} diff --git a/ext/intl/php_intl_arginfo.h b/ext/intl/php_intl_arginfo.h index 8f2903315a386..c05ecb7b24973 100644 --- a/ext/intl/php_intl_arginfo.h +++ b/ext/intl/php_intl_arginfo.h @@ -1,5 +1,5 @@ /* This is a generated file, edit the .stub.php file instead. - * Stub hash: 136c14d9162548cd7211985ce9a5d767a90a0b99 */ + * Stub hash: c32e74bddb55455f69083a302bcaf52f654b1293 */ ZEND_BEGIN_ARG_WITH_RETURN_OBJ_INFO_EX(arginfo_intlcal_create_instance, 0, 0, IntlCalendar, 1) ZEND_ARG_INFO_WITH_DEFAULT_VALUE(0, timezone, "null") @@ -50,7 +50,7 @@ ZEND_END_ARG_INFO() #define arginfo_intlcal_before arginfo_intlcal_after -ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_intlcal_set, 0, 3, _IS_BOOL, 0) +ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_intlcal_set, 0, 3, IS_TRUE, 0) ZEND_ARG_OBJ_INFO(0, calendar, IntlCalendar, 0) ZEND_ARG_TYPE_INFO(0, year, IS_LONG, 0) ZEND_ARG_TYPE_INFO(0, month, IS_LONG, 0) @@ -66,7 +66,7 @@ ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_intlcal_roll, 0, 3, _IS_BOOL, 0) ZEND_ARG_INFO(0, value) ZEND_END_ARG_INFO() -ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_intlcal_clear, 0, 1, _IS_BOOL, 0) +ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_intlcal_clear, 0, 1, IS_TRUE, 0) ZEND_ARG_OBJ_INFO(0, calendar, IntlCalendar, 0) ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, field, IS_LONG, 1, "null") ZEND_END_ARG_INFO() @@ -103,7 +103,7 @@ ZEND_END_ARG_INFO() #define arginfo_intlcal_get_minimal_days_in_first_week arginfo_intlcal_get_first_day_of_week -ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_intlcal_set_minimal_days_in_first_week, 0, 2, _IS_BOOL, 0) +ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_intlcal_set_minimal_days_in_first_week, 0, 2, IS_TRUE, 0) ZEND_ARG_OBJ_INFO(0, calendar, IntlCalendar, 0) ZEND_ARG_TYPE_INFO(0, days, IS_LONG, 0) ZEND_END_ARG_INFO() @@ -138,12 +138,12 @@ ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_intlcal_is_weekend, 0, 1, _IS_BO ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, timestamp, IS_DOUBLE, 1, "null") ZEND_END_ARG_INFO() -ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_intlcal_set_first_day_of_week, 0, 2, _IS_BOOL, 0) +ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_intlcal_set_first_day_of_week, 0, 2, IS_TRUE, 0) ZEND_ARG_OBJ_INFO(0, calendar, IntlCalendar, 0) ZEND_ARG_TYPE_INFO(0, dayOfWeek, IS_LONG, 0) ZEND_END_ARG_INFO() -ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_intlcal_set_lenient, 0, 2, _IS_BOOL, 0) +ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_intlcal_set_lenient, 0, 2, IS_TRUE, 0) ZEND_ARG_OBJ_INFO(0, calendar, IntlCalendar, 0) ZEND_ARG_TYPE_INFO(0, lenient, _IS_BOOL, 0) ZEND_END_ARG_INFO() @@ -156,7 +156,7 @@ ZEND_END_ARG_INFO() #define arginfo_intlcal_get_skipped_wall_time_option arginfo_intlcal_get_repeated_wall_time_option -ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_intlcal_set_repeated_wall_time_option, 0, 2, _IS_BOOL, 0) +ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_intlcal_set_repeated_wall_time_option, 0, 2, IS_TRUE, 0) ZEND_ARG_OBJ_INFO(0, calendar, IntlCalendar, 0) ZEND_ARG_TYPE_INFO(0, option, IS_LONG, 0) ZEND_END_ARG_INFO() diff --git a/ext/pgsql/pgsql.stub.php b/ext/pgsql/pgsql.stub.php index fd58fb92c0045..f337251d59a75 100644 --- a/ext/pgsql/pgsql.stub.php +++ b/ext/pgsql/pgsql.stub.php @@ -469,7 +469,7 @@ function pg_pconnect(string $connection_string, int $flags = 0): PgSql\Connectio function pg_connect_poll(PgSql\Connection $connection): int {} - function pg_close(?PgSql\Connection $connection = null): bool {} + function pg_close(?PgSql\Connection $connection = null): true {} /** @refcount 1 */ function pg_dbname(?PgSql\Connection $connection = null): string {} @@ -691,7 +691,7 @@ function pg_getlastoid(PgSql\Result $result): string|int|false {} function pg_trace(string $filename, string $mode = "w", ?PgSql\Connection $connection = null, int $trace_mode = 0): bool {} - function pg_untrace(?PgSql\Connection $connection = null): bool {} + function pg_untrace(?PgSql\Connection $connection = null): true {} /** * @param PgSql\Connection $connection diff --git a/ext/pgsql/pgsql_arginfo.h b/ext/pgsql/pgsql_arginfo.h index 839842bbc3944..8fcc229637191 100644 --- a/ext/pgsql/pgsql_arginfo.h +++ b/ext/pgsql/pgsql_arginfo.h @@ -1,5 +1,5 @@ /* This is a generated file, edit the .stub.php file instead. - * Stub hash: f5b563282ffa08a4f79293fcf91df284433a6f5c */ + * Stub hash: bf714281e441d59e0760e51df9f4050c96319794 */ ZEND_BEGIN_ARG_WITH_RETURN_OBJ_TYPE_MASK_EX(arginfo_pg_connect, 0, 1, PgSql\\Connection, MAY_BE_FALSE) ZEND_ARG_TYPE_INFO(0, connection_string, IS_STRING, 0) @@ -12,7 +12,7 @@ ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_pg_connect_poll, 0, 1, IS_LONG, ZEND_ARG_OBJ_INFO(0, connection, PgSql\\Connection, 0) ZEND_END_ARG_INFO() -ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_pg_close, 0, 0, _IS_BOOL, 0) +ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_pg_close, 0, 0, IS_TRUE, 0) ZEND_ARG_OBJ_INFO_WITH_DEFAULT_VALUE(0, connection, PgSql\\Connection, 1, "null") ZEND_END_ARG_INFO() @@ -41,7 +41,9 @@ ZEND_BEGIN_ARG_WITH_RETURN_TYPE_MASK_EX(arginfo_pg_parameter_status, 0, 1, MAY_B ZEND_ARG_TYPE_INFO(0, name, IS_STRING, 0) ZEND_END_ARG_INFO() -#define arginfo_pg_ping arginfo_pg_close +ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_pg_ping, 0, 0, _IS_BOOL, 0) + ZEND_ARG_OBJ_INFO_WITH_DEFAULT_VALUE(0, connection, PgSql\\Connection, 1, "null") +ZEND_END_ARG_INFO() ZEND_BEGIN_ARG_WITH_RETURN_OBJ_TYPE_MASK_EX(arginfo_pg_query, 0, 1, PgSql\\Result, MAY_BE_FALSE) ZEND_ARG_INFO(0, connection) @@ -297,7 +299,7 @@ ZEND_END_ARG_INFO() #define arginfo_pg_clientencoding arginfo_pg_dbname -#define arginfo_pg_end_copy arginfo_pg_close +#define arginfo_pg_end_copy arginfo_pg_ping ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_pg_put_line, 0, 1, _IS_BOOL, 0) ZEND_ARG_INFO(0, connection) diff --git a/ext/snmp/snmp.stub.php b/ext/snmp/snmp.stub.php index 84fc738354aeb..b02cfd44ee3c0 100644 --- a/ext/snmp/snmp.stub.php +++ b/ext/snmp/snmp.stub.php @@ -125,11 +125,11 @@ function snmpset(string $hostname, string $community, array|string $object_id, a function snmp_get_quick_print(): bool {} -function snmp_set_quick_print(bool $enable): bool {} +function snmp_set_quick_print(bool $enable): true {} -function snmp_set_enum_print(bool $enable): bool {} +function snmp_set_enum_print(bool $enable): true {} -function snmp_set_oid_output_format(int $format): bool {} +function snmp_set_oid_output_format(int $format): true {} /** @alias snmp_set_oid_output_format */ function snmp_set_oid_numeric_print(int $format): bool {} @@ -175,7 +175,7 @@ function snmp3_set( array|string $object_id, array|string $type, array|string $value, int $timeout = -1, int $retries = -1): bool {} -function snmp_set_valueretrieval(int $method): bool {} +function snmp_set_valueretrieval(int $method): true {} function snmp_get_valueretrieval(): int {} diff --git a/ext/snmp/snmp_arginfo.h b/ext/snmp/snmp_arginfo.h index bbe95f1dd2cfb..f3f52e4bcea9a 100644 --- a/ext/snmp/snmp_arginfo.h +++ b/ext/snmp/snmp_arginfo.h @@ -1,5 +1,5 @@ /* This is a generated file, edit the .stub.php file instead. - * Stub hash: a79a697fa8c1ab2513bde03e0c2367d0caaec7d8 */ + * Stub hash: 659db99d46c15b508e992d55a1e421f48b51f6e3 */ ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_snmpget, 0, 3, IS_MIXED, 0) ZEND_ARG_TYPE_INFO(0, hostname, IS_STRING, 0) @@ -36,17 +36,19 @@ ZEND_END_ARG_INFO() ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_snmp_get_quick_print, 0, 0, _IS_BOOL, 0) ZEND_END_ARG_INFO() -ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_snmp_set_quick_print, 0, 1, _IS_BOOL, 0) +ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_snmp_set_quick_print, 0, 1, IS_TRUE, 0) ZEND_ARG_TYPE_INFO(0, enable, _IS_BOOL, 0) ZEND_END_ARG_INFO() #define arginfo_snmp_set_enum_print arginfo_snmp_set_quick_print -ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_snmp_set_oid_output_format, 0, 1, _IS_BOOL, 0) +ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_snmp_set_oid_output_format, 0, 1, IS_TRUE, 0) ZEND_ARG_TYPE_INFO(0, format, IS_LONG, 0) ZEND_END_ARG_INFO() -#define arginfo_snmp_set_oid_numeric_print arginfo_snmp_set_oid_output_format +ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_snmp_set_oid_numeric_print, 0, 1, _IS_BOOL, 0) + ZEND_ARG_TYPE_INFO(0, format, IS_LONG, 0) +ZEND_END_ARG_INFO() #define arginfo_snmp2_get arginfo_snmpget @@ -103,7 +105,7 @@ ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_snmp3_set, 0, 10, _IS_BOOL, 0) ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, retries, IS_LONG, 0, "-1") ZEND_END_ARG_INFO() -ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_snmp_set_valueretrieval, 0, 1, _IS_BOOL, 0) +ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_snmp_set_valueretrieval, 0, 1, IS_TRUE, 0) ZEND_ARG_TYPE_INFO(0, method, IS_LONG, 0) ZEND_END_ARG_INFO() diff --git a/ext/spl/spl_array.stub.php b/ext/spl/spl_array.stub.php index 64855f37d2e0b..8841fb351aefa 100644 --- a/ext/spl/spl_array.stub.php +++ b/ext/spl/spl_array.stub.php @@ -45,22 +45,22 @@ public function getFlags(): int {} public function setFlags(int $flags): void {} /** @tentative-return-type */ - public function asort(int $flags = SORT_REGULAR): bool {} + public function asort(int $flags = SORT_REGULAR): true {} /** @tentative-return-type */ - public function ksort(int $flags = SORT_REGULAR): bool {} + public function ksort(int $flags = SORT_REGULAR): true {} /** @tentative-return-type */ - public function uasort(callable $callback): bool {} + public function uasort(callable $callback): true {} /** @tentative-return-type */ - public function uksort(callable $callback): bool {} + public function uksort(callable $callback): true {} /** @tentative-return-type */ - public function natsort(): bool {} + public function natsort(): true {} /** @tentative-return-type */ - public function natcasesort(): bool {} + public function natcasesort(): true {} /** @tentative-return-type */ public function unserialize(string $data): void {} @@ -163,37 +163,37 @@ public function setFlags(int $flags): void {} * @tentative-return-type * @implementation-alias ArrayObject::asort */ - public function asort(int $flags = SORT_REGULAR): bool {} + public function asort(int $flags = SORT_REGULAR): true {} /** * @tentative-return-type * @implementation-alias ArrayObject::ksort */ - public function ksort(int $flags = SORT_REGULAR): bool {} + public function ksort(int $flags = SORT_REGULAR): true {} /** * @tentative-return-type * @implementation-alias ArrayObject::uasort */ - public function uasort(callable $callback): bool {} + public function uasort(callable $callback): true {} /** * @tentative-return-type * @implementation-alias ArrayObject::uksort */ - public function uksort(callable $callback): bool {} + public function uksort(callable $callback): true {} /** * @tentative-return-type * @implementation-alias ArrayObject::natsort */ - public function natsort(): bool {} + public function natsort(): true {} /** * @tentative-return-type * @implementation-alias ArrayObject::natcasesort */ - public function natcasesort(): bool {} + public function natcasesort(): true {} /** * @tentative-return-type diff --git a/ext/spl/spl_array_arginfo.h b/ext/spl/spl_array_arginfo.h index 9c1bb5b7b2014..de2ec5f1efb04 100644 --- a/ext/spl/spl_array_arginfo.h +++ b/ext/spl/spl_array_arginfo.h @@ -1,5 +1,5 @@ /* This is a generated file, edit the .stub.php file instead. - * Stub hash: 13e1f68463c2abc1a1ce0e1cff5f47a12407cfc1 */ + * Stub hash: d0ce4612e25d2b8a765544c835fa2347ae9b23f2 */ ZEND_BEGIN_ARG_INFO_EX(arginfo_class_ArrayObject___construct, 0, 0, 0) ZEND_ARG_TYPE_MASK(0, array, MAY_BE_ARRAY|MAY_BE_OBJECT, "[]") @@ -40,19 +40,19 @@ ZEND_BEGIN_ARG_WITH_TENTATIVE_RETURN_TYPE_INFO_EX(arginfo_class_ArrayObject_setF ZEND_ARG_TYPE_INFO(0, flags, IS_LONG, 0) ZEND_END_ARG_INFO() -ZEND_BEGIN_ARG_WITH_TENTATIVE_RETURN_TYPE_INFO_EX(arginfo_class_ArrayObject_asort, 0, 0, _IS_BOOL, 0) +ZEND_BEGIN_ARG_WITH_TENTATIVE_RETURN_TYPE_INFO_EX(arginfo_class_ArrayObject_asort, 0, 0, IS_TRUE, 0) ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, flags, IS_LONG, 0, "SORT_REGULAR") ZEND_END_ARG_INFO() #define arginfo_class_ArrayObject_ksort arginfo_class_ArrayObject_asort -ZEND_BEGIN_ARG_WITH_TENTATIVE_RETURN_TYPE_INFO_EX(arginfo_class_ArrayObject_uasort, 0, 1, _IS_BOOL, 0) +ZEND_BEGIN_ARG_WITH_TENTATIVE_RETURN_TYPE_INFO_EX(arginfo_class_ArrayObject_uasort, 0, 1, IS_TRUE, 0) ZEND_ARG_TYPE_INFO(0, callback, IS_CALLABLE, 0) ZEND_END_ARG_INFO() #define arginfo_class_ArrayObject_uksort arginfo_class_ArrayObject_uasort -ZEND_BEGIN_ARG_WITH_TENTATIVE_RETURN_TYPE_INFO_EX(arginfo_class_ArrayObject_natsort, 0, 0, _IS_BOOL, 0) +ZEND_BEGIN_ARG_WITH_TENTATIVE_RETURN_TYPE_INFO_EX(arginfo_class_ArrayObject_natsort, 0, 0, IS_TRUE, 0) ZEND_END_ARG_INFO() #define arginfo_class_ArrayObject_natcasesort arginfo_class_ArrayObject_natsort @@ -139,7 +139,8 @@ ZEND_END_ARG_INFO() #define arginfo_class_ArrayIterator_next arginfo_class_ArrayIterator_rewind -#define arginfo_class_ArrayIterator_valid arginfo_class_ArrayObject_natsort +ZEND_BEGIN_ARG_WITH_TENTATIVE_RETURN_TYPE_INFO_EX(arginfo_class_ArrayIterator_valid, 0, 0, _IS_BOOL, 0) +ZEND_END_ARG_INFO() ZEND_BEGIN_ARG_WITH_TENTATIVE_RETURN_TYPE_INFO_EX(arginfo_class_ArrayIterator_seek, 0, 1, IS_VOID, 0) ZEND_ARG_TYPE_INFO(0, offset, IS_LONG, 0) @@ -147,7 +148,7 @@ ZEND_END_ARG_INFO() #define arginfo_class_ArrayIterator___debugInfo arginfo_class_ArrayObject_getArrayCopy -#define arginfo_class_RecursiveArrayIterator_hasChildren arginfo_class_ArrayObject_natsort +#define arginfo_class_RecursiveArrayIterator_hasChildren arginfo_class_ArrayIterator_valid ZEND_BEGIN_ARG_WITH_TENTATIVE_RETURN_OBJ_INFO_EX(arginfo_class_RecursiveArrayIterator_getChildren, 0, 0, RecursiveArrayIterator, 1) ZEND_END_ARG_INFO() diff --git a/ext/spl/spl_heap.stub.php b/ext/spl/spl_heap.stub.php index ac9e4151ea308..a1b4dfdbde318 100644 --- a/ext/spl/spl_heap.stub.php +++ b/ext/spl/spl_heap.stub.php @@ -23,7 +23,7 @@ class SplPriorityQueue implements Iterator, Countable /** @tentative-return-type */ public function compare(mixed $priority1, mixed $priority2): int {} - /** @return bool */ + /** @return true */ public function insert(mixed $value, mixed $priority) {} // TODO make return type void /** @tentative-return-type */ diff --git a/ext/spl/spl_heap_arginfo.h b/ext/spl/spl_heap_arginfo.h index 42d9590fea2f4..99eda7e079359 100644 --- a/ext/spl/spl_heap_arginfo.h +++ b/ext/spl/spl_heap_arginfo.h @@ -1,5 +1,5 @@ /* This is a generated file, edit the .stub.php file instead. - * Stub hash: 8e4784e749d6c70174a0958e73e4e9907adcd4b5 */ + * Stub hash: 4045035ec5bee0f951fa31df75c3f42c31bd8be2 */ ZEND_BEGIN_ARG_WITH_TENTATIVE_RETURN_TYPE_INFO_EX(arginfo_class_SplPriorityQueue_compare, 0, 2, IS_LONG, 0) ZEND_ARG_TYPE_INFO(0, priority1, IS_MIXED, 0) diff --git a/ext/standard/basic_functions.stub.php b/ext/standard/basic_functions.stub.php index 9124626b5c5b5..b2270d66d9e25 100755 --- a/ext/standard/basic_functions.stub.php +++ b/ext/standard/basic_functions.stub.php @@ -1574,9 +1574,9 @@ function count(Countable|array $value, int $mode = COUNT_NORMAL): int {} /** @alias count */ function sizeof(Countable|array $value, int $mode = COUNT_NORMAL): int {} -function natsort(array &$array): bool {} +function natsort(array &$array): true {} -function natcasesort(array &$array): bool {} +function natcasesort(array &$array): true {} function asort(array &$array, int $flags = SORT_REGULAR): true {} @@ -1584,7 +1584,7 @@ function arsort(array &$array, int $flags = SORT_REGULAR): true {} function sort(array &$array, int $flags = SORT_REGULAR): true {} -function rsort(array &$array, int $flags = SORT_REGULAR): bool {} +function rsort(array &$array, int $flags = SORT_REGULAR): true {} function usort(array &$array, callable $callback): true {} diff --git a/ext/standard/basic_functions_arginfo.h b/ext/standard/basic_functions_arginfo.h index bb46fb87090f6..f43ca8f98fc16 100644 --- a/ext/standard/basic_functions_arginfo.h +++ b/ext/standard/basic_functions_arginfo.h @@ -1,5 +1,5 @@ /* This is a generated file, edit the .stub.php file instead. - * Stub hash: 73f82e392f5adf146b9b8dfb39496b3ce8465115 */ + * Stub hash: 0d2bffd95e986b632f5fd1afbf8f6464e6bc8759 */ ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_set_time_limit, 0, 1, _IS_BOOL, 0) ZEND_ARG_TYPE_INFO(0, seconds, IS_LONG, 0) @@ -88,7 +88,7 @@ ZEND_END_ARG_INFO() #define arginfo_sizeof arginfo_count -ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_natsort, 0, 1, _IS_BOOL, 0) +ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_natsort, 0, 1, IS_TRUE, 0) ZEND_ARG_TYPE_INFO(1, array, IS_ARRAY, 0) ZEND_END_ARG_INFO() @@ -100,10 +100,7 @@ ZEND_END_ARG_INFO() #define arginfo_sort arginfo_krsort -ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_rsort, 0, 1, _IS_BOOL, 0) - ZEND_ARG_TYPE_INFO(1, array, IS_ARRAY, 0) - ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, flags, IS_LONG, 0, "SORT_REGULAR") -ZEND_END_ARG_INFO() +#define arginfo_rsort arginfo_krsort ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_usort, 0, 2, IS_TRUE, 0) ZEND_ARG_TYPE_INFO(1, array, IS_ARRAY, 0) @@ -189,9 +186,7 @@ ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_range, 0, 2, IS_ARRAY, 0) ZEND_ARG_TYPE_MASK(0, step, MAY_BE_LONG|MAY_BE_DOUBLE, "1") ZEND_END_ARG_INFO() -ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_shuffle, 0, 1, IS_TRUE, 0) - ZEND_ARG_TYPE_INFO(1, array, IS_ARRAY, 0) -ZEND_END_ARG_INFO() +#define arginfo_shuffle arginfo_natsort ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_array_pop, 0, 1, IS_MIXED, 0) ZEND_ARG_TYPE_INFO(1, array, IS_ARRAY, 0) From 281669aeb4b09c05b285d5727cb394c461ce9efa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A1t=C3=A9=20Kocsis?= Date: Sun, 7 May 2023 10:19:38 +0200 Subject: [PATCH 016/168] Add support for true standalone type when generating methodsynopsis --- build/gen_stub.php | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/build/gen_stub.php b/build/gen_stub.php index c0b02b03738f2..f3b3b5fcc7514 100755 --- a/build/gen_stub.php +++ b/build/gen_stub.php @@ -725,11 +725,7 @@ public function getTypeForDoc(DOMDocument $doc): DOMElement { } } else { $type = $this->types[0]; - if ($type->isBuiltin && strtolower($type->name) === "true") { - $name = "bool"; - } else { - $name = $type->name; - } + $name = $type->name; $typeElement = $doc->createElement('type', $name); } From 37e6594545393464184781b2963795c349ac4b73 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20Vo=C5=99=C3=AD=C5=A1ek?= Date: Sun, 7 May 2023 23:30:12 +0200 Subject: [PATCH 017/168] Fix gmp_long/gmp_ulong typedef warning on Windows x86 (#11112) --- ext/gmp/php_gmp_int.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ext/gmp/php_gmp_int.h b/ext/gmp/php_gmp_int.h index d4ef5f0157332..c37fdb136d023 100644 --- a/ext/gmp/php_gmp_int.h +++ b/ext/gmp/php_gmp_int.h @@ -28,7 +28,7 @@ static inline gmp_object *php_gmp_object_from_zend_object(zend_object *zobj) { PHP_GMP_API zend_class_entry *php_gmp_class_entry(void); /* GMP and MPIR use different datatypes on different platforms */ -#ifdef PHP_WIN32 +#ifdef _WIN64 typedef zend_long gmp_long; typedef zend_ulong gmp_ulong; #else From 78ec64af44a02baf337fe0550c1bb73fb936598a Mon Sep 17 00:00:00 2001 From: Ilija Tovilo Date: Sun, 7 May 2023 12:01:13 +0200 Subject: [PATCH 018/168] Fix use-of-uninitialized value in phar_object.c resource would stay uninitialized if the first call to zend_parse_parameters fails, but the value is still passed to phar_add_file(). It's not used there if cont_str is provided and so didn't cause any issues. Closes GH-11202 --- ext/phar/phar_object.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ext/phar/phar_object.c b/ext/phar/phar_object.c index e32b530b82297..f329c3b0b17b2 100644 --- a/ext/phar/phar_object.c +++ b/ext/phar/phar_object.c @@ -3697,7 +3697,7 @@ PHP_METHOD(Phar, offsetSet) { char *fname, *cont_str = NULL; size_t fname_len, cont_len; - zval *zresource; + zval *zresource = NULL; if (zend_parse_parameters_ex(ZEND_PARSE_PARAMS_QUIET, ZEND_NUM_ARGS(), "pr", &fname, &fname_len, &zresource) == FAILURE && zend_parse_parameters(ZEND_NUM_ARGS(), "ps", &fname, &fname_len, &cont_str, &cont_len) == FAILURE) { From 06fe9ff0f11d1e6005bfa06af88815baaa5e45e9 Mon Sep 17 00:00:00 2001 From: Ilija Tovilo Date: Mon, 8 May 2023 17:00:40 +0200 Subject: [PATCH 019/168] Fix use-of-undefined in zend_fiber_object_gc of ex->call ex->call is only set for user calls, we shouldn't access it here. zend_unfinished_execution_gc_ex wouldn't actually use it for internal calls, so it didn't cause any serious issues. Closes GH-11208 --- Zend/zend_fibers.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Zend/zend_fibers.c b/Zend/zend_fibers.c index 53f78417f5dbf..aae7058294d5e 100644 --- a/Zend/zend_fibers.c +++ b/Zend/zend_fibers.c @@ -670,7 +670,7 @@ static HashTable *zend_fiber_object_gc(zend_object *object, zval **table, int *n HashTable *lastSymTable = NULL; zend_execute_data *ex = fiber->execute_data; for (; ex; ex = ex->prev_execute_data) { - HashTable *symTable = zend_unfinished_execution_gc_ex(ex, ex->call, buf, false); + HashTable *symTable = zend_unfinished_execution_gc_ex(ex, ZEND_USER_CODE(ex->func->type) ? ex->call : NULL, buf, false); if (symTable) { if (lastSymTable) { zval *val; From acc940645e985ab9f452be9a93622b41186ceecf Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Tue, 9 May 2023 19:46:45 +0200 Subject: [PATCH 020/168] Remove unnecessary NULL assignments after ecalloc in streams (#11209) ecalloc already zeroes the structure, so writing NULL is not necessary. --- main/streams/streams.c | 1 - main/streams/userspace.c | 1 - 2 files changed, 2 deletions(-) diff --git a/main/streams/streams.c b/main/streams/streams.c index f655faef10cbf..eeb3cdf101c6e 100644 --- a/main/streams/streams.c +++ b/main/streams/streams.c @@ -2315,7 +2315,6 @@ PHPAPI php_stream_context *php_stream_context_alloc(void) php_stream_context *context; context = ecalloc(1, sizeof(php_stream_context)); - context->notifier = NULL; array_init(&context->options); context->res = zend_register_resource(context, php_le_stream_context()); diff --git a/main/streams/userspace.c b/main/streams/userspace.c index 33ec6c0990b13..165bd7da3ad98 100644 --- a/main/streams/userspace.c +++ b/main/streams/userspace.c @@ -469,7 +469,6 @@ PHP_FUNCTION(stream_wrapper_register) uwrap->wrapper.wops = &user_stream_wops; uwrap->wrapper.abstract = uwrap; uwrap->wrapper.is_url = ((flags & PHP_STREAM_IS_URL) != 0); - uwrap->resource = NULL; rsrc = zend_register_resource(uwrap, le_protocols); From 6ba0b0681998f24e3db9af1e96cdb7bbe186def2 Mon Sep 17 00:00:00 2001 From: nielsdos <7771979+nielsdos@users.noreply.github.com> Date: Mon, 8 May 2023 23:37:12 +0200 Subject: [PATCH 021/168] Fix GH-8426: make test fail while soap extension build If you build soap as a shared object, then these tests fail on non-Windows, or when the PHP install hasn't been make install-ed yet, but is executed from the development directory. Closes GH-11211. --- NEWS | 3 +++ ext/soap/tests/bug73037.phpt | 8 ++++++-- ext/soap/tests/custom_content_type.phpt | 8 ++++++-- 3 files changed, 15 insertions(+), 4 deletions(-) diff --git a/NEWS b/NEWS index bebc691d019fe..050955d207f5f 100644 --- a/NEWS +++ b/NEWS @@ -23,6 +23,9 @@ PHP NEWS - PGSQL: . Fixed parameter parsing of pg_lo_export(). (kocsismate) +- Soap: + . Fixed bug GH-8426 (make test fail while soap extension build). (nielsdos) + - SPL: . Fixed bug GH-11178 (Segmentation fault in spl_array_it_get_current_data (PHP 8.1.18)). (nielsdos) diff --git a/ext/soap/tests/bug73037.phpt b/ext/soap/tests/bug73037.phpt index 4cf46eb373aa4..25fde2cb0dabe 100644 --- a/ext/soap/tests/bug73037.phpt +++ b/ext/soap/tests/bug73037.phpt @@ -59,8 +59,12 @@ function get_data($max) } $router = "bug73037_server.php"; -$args = substr(PHP_OS, 0, 3) == 'WIN' - ? ["-d", "extension_dir=" . ini_get("extension_dir"), "-d", "extension=php_soap.dll"] : []; +$args = ["-d", "extension_dir=" . ini_get("extension_dir"), "-d", "extension=" . (substr(PHP_OS, 0, 3) == "WIN" ? "php_" : "") . "soap." . PHP_SHLIB_SUFFIX]; +if (php_ini_loaded_file()) { + // Necessary such that it works from a development directory in which case extension_dir might not be the real extension dir + $args[] = "-c"; + $args[] = php_ini_loaded_file(); +} $code = <<<'PHP' $s = new SoapServer(NULL, array('uri' => 'http://here')); $s->setObject(new stdclass()); diff --git a/ext/soap/tests/custom_content_type.phpt b/ext/soap/tests/custom_content_type.phpt index b8bc8c9870113..d32f1df783591 100644 --- a/ext/soap/tests/custom_content_type.phpt +++ b/ext/soap/tests/custom_content_type.phpt @@ -13,8 +13,12 @@ soap include __DIR__ . "/../../../sapi/cli/tests/php_cli_server.inc"; -$args = substr(PHP_OS, 0, 3) == 'WIN' - ? ["-d", "extension_dir=" . ini_get("extension_dir"), "-d", "extension=php_soap.dll"] : []; +$args = ["-d", "extension_dir=" . ini_get("extension_dir"), "-d", "extension=" . (substr(PHP_OS, 0, 3) == "WIN" ? "php_" : "") . "soap." . PHP_SHLIB_SUFFIX]; +if (php_ini_loaded_file()) { + // Necessary such that it works from a development directory in which case extension_dir might not be the real extension dir + $args[] = "-c"; + $args[] = php_ini_loaded_file(); +} $code = <<<'PHP' /* Receive */ $content = trim(file_get_contents("php://input")) . PHP_EOL; From 175ff603c3a8ae9dd3e6ccb3fc3081b06263f989 Mon Sep 17 00:00:00 2001 From: Amedeo Baragiola Date: Tue, 9 May 2023 19:09:42 +0100 Subject: [PATCH 022/168] Fix compilation error on old GCC versions In older versions of GCC (<=4.5) designated initializers would not accept member names nested inside anonymous structures. Instead, we need to use a positional member wrapped in {}. Fixes GH-11063 Closes GH-11212 --- NEWS | 1 + Zend/zend_hash.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/NEWS b/NEWS index 050955d207f5f..6a4a49b1cfb17 100644 --- a/NEWS +++ b/NEWS @@ -7,6 +7,7 @@ PHP NEWS value(s)). (nielsdos) . Fixed bug GH-11189 (Exceeding memory limit in zend_hash_do_resize leaves the array in an invalid state). (Bob) + . Fixed bug GH-11063 (Compilation error on old GCC versions). (ingamedeo) - Hash: . Fixed bug GH-11180 (hash_file() appears to be restricted to 3 arguments). diff --git a/Zend/zend_hash.c b/Zend/zend_hash.c index 49c0df614369b..58e5b40d05055 100644 --- a/Zend/zend_hash.c +++ b/Zend/zend_hash.c @@ -240,7 +240,7 @@ ZEND_API const HashTable zend_empty_array = { .gc.u.type_info = IS_ARRAY | (GC_IMMUTABLE << GC_FLAGS_SHIFT), .u.flags = HASH_FLAG_UNINITIALIZED, .nTableMask = HT_MIN_MASK, - .arData = (Bucket*)&uninitialized_bucket[2], + {.arData = (Bucket*)&uninitialized_bucket[2]}, .nNumUsed = 0, .nNumOfElements = 0, .nTableSize = HT_MIN_SIZE, From 975d28e278d0ba1936ac38774d7903941f3c95b9 Mon Sep 17 00:00:00 2001 From: Bob Weinand Date: Wed, 10 May 2023 16:44:29 +0200 Subject: [PATCH 023/168] Fix GH-11222: foreach by-ref may jump over keys during a rehash Signed-off-by: Bob Weinand --- NEWS | 2 ++ Zend/tests/gh11222.phpt | 29 +++++++++++++++++++++++++++++ Zend/zend_hash.c | 2 +- 3 files changed, 32 insertions(+), 1 deletion(-) create mode 100644 Zend/tests/gh11222.phpt diff --git a/NEWS b/NEWS index 6a4a49b1cfb17..8f51e4aec7751 100644 --- a/NEWS +++ b/NEWS @@ -8,6 +8,8 @@ PHP NEWS . Fixed bug GH-11189 (Exceeding memory limit in zend_hash_do_resize leaves the array in an invalid state). (Bob) . Fixed bug GH-11063 (Compilation error on old GCC versions). (ingamedeo) + . Fixed bug GH-11222 (foreach by-ref may jump over keys during a rehash). + (Bob) - Hash: . Fixed bug GH-11180 (hash_file() appears to be restricted to 3 arguments). diff --git a/Zend/tests/gh11222.phpt b/Zend/tests/gh11222.phpt new file mode 100644 index 0000000000000..c2c2b5eb4881a --- /dev/null +++ b/Zend/tests/gh11222.phpt @@ -0,0 +1,29 @@ +--TEST-- +GH-112222: foreach by-ref may jump over keys during a rehash +--FILE-- + 0, 1 => 1, 2, 3, 4, 5, 6]; +foreach ($a as $k => &$v) { + if ($k == 1) { + // force that it'll be rehashed by adding enough holes + unset($a[4], $a[5]); + // actually make the array larger than 8 elements to trigger rehash + $a[] = 8; $a[] = 9; $a[] = 10; + + } + // observe the iteration jumping from key 1 to key 6, skipping keys 2 and 3 + echo "$k => $v\n"; +} + +?> +--EXPECTF-- +k => 0 +1 => 1 +2 => 2 +3 => 3 +6 => 6 +7 => 8 +8 => 9 +9 => 10 diff --git a/Zend/zend_hash.c b/Zend/zend_hash.c index 58e5b40d05055..0f7ed38347087 100644 --- a/Zend/zend_hash.c +++ b/Zend/zend_hash.c @@ -1282,7 +1282,7 @@ ZEND_API void ZEND_FASTCALL zend_hash_rehash(HashTable *ht) } } } else { - uint32_t iter_pos = zend_hash_iterators_lower_pos(ht, 0); + uint32_t iter_pos = zend_hash_iterators_lower_pos(ht, i + 1); while (++i < ht->nNumUsed) { p++; From 09dd3e3daf2bfc77b056ccaf7e7825af63305e7a Mon Sep 17 00:00:00 2001 From: kocsismate Date: Wed, 10 May 2023 19:08:05 +0200 Subject: [PATCH 024/168] Narrow some more return types to true --- ext/xmlreader/php_xmlreader.stub.php | 4 ++-- ext/xmlreader/php_xmlreader_arginfo.h | 2 +- ext/xsl/php_xsl.stub.php | 4 ++-- ext/xsl/php_xsl_arginfo.h | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/ext/xmlreader/php_xmlreader.stub.php b/ext/xmlreader/php_xmlreader.stub.php index ed4cc393e27bb..26cbf69a2f0df 100644 --- a/ext/xmlreader/php_xmlreader.stub.php +++ b/ext/xmlreader/php_xmlreader.stub.php @@ -149,8 +149,8 @@ class XMLReader public string $xmlLang; - /** @return bool */ - public function close() {} // TODO make the return type void + /** @return true */ + public function close() {} // TODO make return type void /** @tentative-return-type */ public function getAttribute(string $name): ?string {} diff --git a/ext/xmlreader/php_xmlreader_arginfo.h b/ext/xmlreader/php_xmlreader_arginfo.h index c80006570ccac..7556579d0ee30 100644 --- a/ext/xmlreader/php_xmlreader_arginfo.h +++ b/ext/xmlreader/php_xmlreader_arginfo.h @@ -1,5 +1,5 @@ /* This is a generated file, edit the .stub.php file instead. - * Stub hash: 0102030bdd0ca8806bbd881ed3f6788d4b18c462 */ + * Stub hash: 4751b68b857ffbf53cab6d1aa88fe8f6120d4fc6 */ ZEND_BEGIN_ARG_INFO_EX(arginfo_class_XMLReader_close, 0, 0, 0) ZEND_END_ARG_INFO() diff --git a/ext/xsl/php_xsl.stub.php b/ext/xsl/php_xsl.stub.php index 140c498523b42..52300c019ecc7 100644 --- a/ext/xsl/php_xsl.stub.php +++ b/ext/xsl/php_xsl.stub.php @@ -110,8 +110,8 @@ public function hasExsltSupport(): bool {} /** @tentative-return-type */ public function registerPHPFunctions(array|string|null $functions = null): void {} - /** @return bool */ - public function setProfiling(?string $filename) {} // TODO make the return type void + /** @return true */ + public function setProfiling(?string $filename) {} // TODO make return type void /** @tentative-return-type */ public function setSecurityPrefs(int $preferences): int {} diff --git a/ext/xsl/php_xsl_arginfo.h b/ext/xsl/php_xsl_arginfo.h index 546dc3229efa6..aabf6629cc17f 100644 --- a/ext/xsl/php_xsl_arginfo.h +++ b/ext/xsl/php_xsl_arginfo.h @@ -1,5 +1,5 @@ /* This is a generated file, edit the .stub.php file instead. - * Stub hash: a0615bef7b2211570d9da95a31857832a06360dd */ + * Stub hash: 7d747e7b5989c18169e67d9a9d70256583fffd8e */ ZEND_BEGIN_ARG_WITH_TENTATIVE_RETURN_TYPE_INFO_EX(arginfo_class_XSLTProcessor_importStylesheet, 0, 1, _IS_BOOL, 0) ZEND_ARG_TYPE_INFO(0, stylesheet, IS_OBJECT, 0) From 8f66b67ccffad70fdd21e189539989e65bf484c2 Mon Sep 17 00:00:00 2001 From: Ilija Tovilo Date: Wed, 10 May 2023 23:58:50 +0200 Subject: [PATCH 025/168] Fix compilation for PHP 8.1 Accidentally introduced in 175ff603c3a8ae9dd3e6ccb3fc3081b06263f989. arData was not part of an anonymous union. --- Zend/zend_hash.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Zend/zend_hash.c b/Zend/zend_hash.c index 0f7ed38347087..5032668e1bfab 100644 --- a/Zend/zend_hash.c +++ b/Zend/zend_hash.c @@ -240,7 +240,7 @@ ZEND_API const HashTable zend_empty_array = { .gc.u.type_info = IS_ARRAY | (GC_IMMUTABLE << GC_FLAGS_SHIFT), .u.flags = HASH_FLAG_UNINITIALIZED, .nTableMask = HT_MIN_MASK, - {.arData = (Bucket*)&uninitialized_bucket[2]}, + .arData = (Bucket*)&uninitialized_bucket[2], .nNumUsed = 0, .nNumOfElements = 0, .nTableSize = HT_MIN_SIZE, From ad747d93c32013a08860e19c51eaa6c8ab10ee20 Mon Sep 17 00:00:00 2001 From: Ilija Tovilo Date: Thu, 11 May 2023 11:53:18 +0200 Subject: [PATCH 026/168] [skip ci] Remove NEWS entry for reverted change in PHP 8.1 --- NEWS | 1 - 1 file changed, 1 deletion(-) diff --git a/NEWS b/NEWS index 8f51e4aec7751..fcc2e3b58a6d5 100644 --- a/NEWS +++ b/NEWS @@ -7,7 +7,6 @@ PHP NEWS value(s)). (nielsdos) . Fixed bug GH-11189 (Exceeding memory limit in zend_hash_do_resize leaves the array in an invalid state). (Bob) - . Fixed bug GH-11063 (Compilation error on old GCC versions). (ingamedeo) . Fixed bug GH-11222 (foreach by-ref may jump over keys during a rehash). (Bob) From 12c30a8da35e44362cb5fedfd43651caed5a4daa Mon Sep 17 00:00:00 2001 From: Ilija Tovilo Date: Thu, 11 May 2023 12:46:54 +0200 Subject: [PATCH 027/168] [skip ci] Add missing --no-progress flag to ARM build --- .cirrus.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.cirrus.yml b/.cirrus.yml index c03086509cd76..b25d29a870f55 100644 --- a/.cirrus.yml +++ b/.cirrus.yml @@ -204,6 +204,7 @@ arm_task: -d opcache.jit=function -P -q -x -j2 -g FAIL,BORK,LEAK,XLEAK + --no-progress --offline --show-diff --show-slow 1000 @@ -216,6 +217,7 @@ arm_task: -d opcache.jit=tracing -P -q -x -j2 -g FAIL,BORK,LEAK,XLEAK + --no-progress --offline --show-diff --show-slow 1000 From 0a04c008d0df2cb5e3d54c10d2f73516730ee384 Mon Sep 17 00:00:00 2001 From: Ilija Tovilo Date: Thu, 11 May 2023 14:33:49 +0200 Subject: [PATCH 028/168] Fix potential NULL pointer access in zend_fiber_object_gc Accidentally introduced in GH-11208. Fixes oss-fuzz #58795 --- Zend/zend_fibers.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Zend/zend_fibers.c b/Zend/zend_fibers.c index aae7058294d5e..6a22d5c43cf9a 100644 --- a/Zend/zend_fibers.c +++ b/Zend/zend_fibers.c @@ -670,7 +670,7 @@ static HashTable *zend_fiber_object_gc(zend_object *object, zval **table, int *n HashTable *lastSymTable = NULL; zend_execute_data *ex = fiber->execute_data; for (; ex; ex = ex->prev_execute_data) { - HashTable *symTable = zend_unfinished_execution_gc_ex(ex, ZEND_USER_CODE(ex->func->type) ? ex->call : NULL, buf, false); + HashTable *symTable = zend_unfinished_execution_gc_ex(ex, ex->func && ZEND_USER_CODE(ex->func->type) ? ex->call : NULL, buf, false); if (symTable) { if (lastSymTable) { zval *val; From 730f32bad90aaba236ad1d9df3470dae1f2b9939 Mon Sep 17 00:00:00 2001 From: iamluc Date: Thu, 20 Apr 2023 10:34:25 +0200 Subject: [PATCH 029/168] Keep the orig_path for xport stream Closes GH-11113 --- NEWS | 2 ++ .../tests/network/socket_get_status_basic.phpt | 4 +++- .../stream_get_meta_data_socket_basic.phpt | 4 +++- .../stream_get_meta_data_socket_variation1.phpt | 16 ++++++++++++---- .../stream_get_meta_data_socket_variation2.phpt | 16 ++++++++++++---- .../stream_get_meta_data_socket_variation3.phpt | 12 +++++++++--- .../stream_get_meta_data_socket_variation4.phpt | 12 +++++++++--- main/streams/transports.c | 4 +++- main/streams/xp_socket.c | 4 ---- 9 files changed, 53 insertions(+), 21 deletions(-) diff --git a/NEWS b/NEWS index 3b5b6f03c88fb..ce096488a9dd6 100644 --- a/NEWS +++ b/NEWS @@ -201,6 +201,8 @@ PHP NEWS - Streams: . Fixed bug #51056: blocking fread() will block even if data is available. (Jakub Zelenka) + . Added storing of the original path used to open xport stream. + (Luc Vieillescazes) - XSLTProcessor: . Fixed bug #69168 (DomNode::getNodePath() returns invalid path). (nielsdos) diff --git a/ext/standard/tests/network/socket_get_status_basic.phpt b/ext/standard/tests/network/socket_get_status_basic.phpt index 32a8d9ce6fbb4..215d9551e0a5d 100644 --- a/ext/standard/tests/network/socket_get_status_basic.phpt +++ b/ext/standard/tests/network/socket_get_status_basic.phpt @@ -17,7 +17,7 @@ fclose($server); ?> --EXPECTF-- -array(7) { +array(8) { ["timed_out"]=> bool(false) ["blocked"]=> @@ -32,4 +32,6 @@ array(7) { int(0) ["seekable"]=> bool(false) + ["uri"]=> + string(21) "tcp://127.0.0.1:%d" } diff --git a/ext/standard/tests/streams/stream_get_meta_data_socket_basic.phpt b/ext/standard/tests/streams/stream_get_meta_data_socket_basic.phpt index 66658bd5d0c33..b0a82616a9145 100644 --- a/ext/standard/tests/streams/stream_get_meta_data_socket_basic.phpt +++ b/ext/standard/tests/streams/stream_get_meta_data_socket_basic.phpt @@ -9,7 +9,7 @@ fclose($tcp_socket); ?> --EXPECTF-- -array(7) { +array(8) { ["timed_out"]=> bool(false) ["blocked"]=> @@ -24,4 +24,6 @@ array(7) { int(0) ["seekable"]=> bool(false) + ["uri"]=> + string(21) "tcp://127.0.0.1:31330" } diff --git a/ext/standard/tests/streams/stream_get_meta_data_socket_variation1.phpt b/ext/standard/tests/streams/stream_get_meta_data_socket_variation1.phpt index 9db45e8c2f588..0601bd194ea16 100644 --- a/ext/standard/tests/streams/stream_get_meta_data_socket_variation1.phpt +++ b/ext/standard/tests/streams/stream_get_meta_data_socket_variation1.phpt @@ -38,7 +38,7 @@ var_dump(stream_get_meta_data($client)); ?> --EXPECTF-- Write some data: -array(7) { +array(8) { ["timed_out"]=> bool(false) ["blocked"]=> @@ -53,11 +53,13 @@ array(7) { int(0) ["seekable"]=> bool(false) + ["uri"]=> + string(21) "tcp://127.0.0.1:31331" } Read a line from the client, causing data to be buffered: -array(7) { +array(8) { ["timed_out"]=> bool(false) ["blocked"]=> @@ -72,11 +74,13 @@ array(7) { int(15) ["seekable"]=> bool(false) + ["uri"]=> + string(21) "tcp://127.0.0.1:31331" } Read 3 bytes of data from the client: -array(7) { +array(8) { ["timed_out"]=> bool(false) ["blocked"]=> @@ -91,11 +95,13 @@ array(7) { int(12) ["seekable"]=> bool(false) + ["uri"]=> + string(21) "tcp://127.0.0.1:31331" } Close the server side socket and read the remaining data from the client: -array(7) { +array(8) { ["timed_out"]=> bool(false) ["blocked"]=> @@ -110,4 +116,6 @@ array(7) { int(0) ["seekable"]=> bool(false) + ["uri"]=> + string(21) "tcp://127.0.0.1:31331" } diff --git a/ext/standard/tests/streams/stream_get_meta_data_socket_variation2.phpt b/ext/standard/tests/streams/stream_get_meta_data_socket_variation2.phpt index f6e6504536eeb..cd1ba6b41dd80 100644 --- a/ext/standard/tests/streams/stream_get_meta_data_socket_variation2.phpt +++ b/ext/standard/tests/streams/stream_get_meta_data_socket_variation2.phpt @@ -36,7 +36,7 @@ fclose($server); ?> --EXPECTF-- -array(7) { +array(8) { ["timed_out"]=> bool(false) ["blocked"]=> @@ -51,11 +51,13 @@ array(7) { int(0) ["seekable"]=> bool(false) + ["uri"]=> + string(21) "tcp://127.0.0.1:31332" } Set a timeout on the client and attempt a read: -array(7) { +array(8) { ["timed_out"]=> bool(true) ["blocked"]=> @@ -70,11 +72,13 @@ array(7) { int(0) ["seekable"]=> bool(false) + ["uri"]=> + string(21) "tcp://127.0.0.1:31332" } Write some data from the server: -array(7) { +array(8) { ["timed_out"]=> bool(true) ["blocked"]=> @@ -89,11 +93,13 @@ array(7) { int(0) ["seekable"]=> bool(false) + ["uri"]=> + string(21) "tcp://127.0.0.1:31332" } Read some data from the client: -array(7) { +array(8) { ["timed_out"]=> bool(false) ["blocked"]=> @@ -108,4 +114,6 @@ array(7) { int(0) ["seekable"]=> bool(false) + ["uri"]=> + string(21) "tcp://127.0.0.1:31332" } diff --git a/ext/standard/tests/streams/stream_get_meta_data_socket_variation3.phpt b/ext/standard/tests/streams/stream_get_meta_data_socket_variation3.phpt index ef1a22a3a1637..4413312b7eb46 100644 --- a/ext/standard/tests/streams/stream_get_meta_data_socket_variation3.phpt +++ b/ext/standard/tests/streams/stream_get_meta_data_socket_variation3.phpt @@ -31,7 +31,7 @@ fclose($server); ?> --EXPECTF-- -array(7) { +array(8) { ["timed_out"]=> bool(false) ["blocked"]=> @@ -46,12 +46,14 @@ array(7) { int(0) ["seekable"]=> bool(false) + ["uri"]=> + string(21) "tcp://127.0.0.1:31333" } Set blocking to false: bool(true) -array(7) { +array(8) { ["timed_out"]=> bool(false) ["blocked"]=> @@ -66,12 +68,14 @@ array(7) { int(0) ["seekable"]=> bool(false) + ["uri"]=> + string(21) "tcp://127.0.0.1:31333" } Set blocking to true: bool(true) -array(7) { +array(8) { ["timed_out"]=> bool(false) ["blocked"]=> @@ -86,4 +90,6 @@ array(7) { int(0) ["seekable"]=> bool(false) + ["uri"]=> + string(21) "tcp://127.0.0.1:31333" } diff --git a/ext/standard/tests/streams/stream_get_meta_data_socket_variation4.phpt b/ext/standard/tests/streams/stream_get_meta_data_socket_variation4.phpt index cd83b1342c7bc..14e1a6b38769b 100644 --- a/ext/standard/tests/streams/stream_get_meta_data_socket_variation4.phpt +++ b/ext/standard/tests/streams/stream_get_meta_data_socket_variation4.phpt @@ -36,7 +36,7 @@ fclose($client); ?> --EXPECTF-- Write some data: -array(7) { +array(8) { ["timed_out"]=> bool(false) ["blocked"]=> @@ -51,11 +51,13 @@ array(7) { int(%i) ["seekable"]=> bool(false) + ["uri"]=> + string(21) "tcp://127.0.0.1:31334" } Read a line from the client: -array(7) { +array(8) { ["timed_out"]=> bool(false) ["blocked"]=> @@ -70,11 +72,13 @@ array(7) { int(%i) ["seekable"]=> bool(false) + ["uri"]=> + string(21) "tcp://127.0.0.1:31334" } Close the server side socket and read the remaining data from the client: -array(7) { +array(8) { ["timed_out"]=> bool(false) ["blocked"]=> @@ -89,4 +93,6 @@ array(7) { int(%i) ["seekable"]=> bool(false) + ["uri"]=> + string(21) "tcp://127.0.0.1:31334" } diff --git a/main/streams/transports.c b/main/streams/transports.c index 1c9a83be2c882..38850a3b541a4 100644 --- a/main/streams/transports.c +++ b/main/streams/transports.c @@ -59,7 +59,7 @@ PHPAPI php_stream *_php_stream_xport_create(const char *name, size_t namelen, in { php_stream *stream = NULL; php_stream_transport_factory factory = NULL; - const char *p, *protocol = NULL; + const char *p, *protocol, *orig_path = NULL; size_t n = 0; bool failed = false; bool bailout = false; @@ -94,6 +94,7 @@ PHPAPI php_stream *_php_stream_xport_create(const char *name, size_t namelen, in } } + orig_path = name; for (p = name; isalnum((int)*p) || *p == '+' || *p == '-' || *p == '.'; p++) { n++; } @@ -135,6 +136,7 @@ PHPAPI php_stream *_php_stream_xport_create(const char *name, size_t namelen, in if (stream) { zend_try { php_stream_context_set(stream, context); + stream->orig_path = pestrdup(orig_path, persistent_id ? 1 : 0); if ((flags & STREAM_XPORT_SERVER) == 0) { /* client */ diff --git a/main/streams/xp_socket.c b/main/streams/xp_socket.c index 4ea0dc8e880bf..8f0a87b998043 100644 --- a/main/streams/xp_socket.c +++ b/main/streams/xp_socket.c @@ -966,9 +966,5 @@ PHPAPI php_stream *php_stream_generic_socket_factory(const char *proto, size_t p return NULL; } - if (flags == 0) { - return stream; - } - return stream; } From e3499130f1a9a1d9aa31d9763cb40e26394879c3 Mon Sep 17 00:00:00 2001 From: Ilija Tovilo Date: Thu, 11 May 2023 00:25:21 +0200 Subject: [PATCH 030/168] Fix delayed early binding class redeclaration error If we bind the class to the runtime slot even if we're not the ones who have performed early binding we'll miss the redeclaration error in the ZEND_DECLARE_CLASS_DELAYED handler. Closes GH-11226 --- NEWS | 1 + .../tests/delayed_early_binding_redeclaration-1.inc | 2 ++ .../tests/delayed_early_binding_redeclaration-2.inc | 2 ++ Zend/tests/delayed_early_binding_redeclaration.phpt | 13 +++++++++++++ ext/opcache/zend_accelerator_util_funcs.c | 6 +++--- 5 files changed, 21 insertions(+), 3 deletions(-) create mode 100644 Zend/tests/delayed_early_binding_redeclaration-1.inc create mode 100644 Zend/tests/delayed_early_binding_redeclaration-2.inc create mode 100644 Zend/tests/delayed_early_binding_redeclaration.phpt diff --git a/NEWS b/NEWS index 48dfea012478f..338c972397772 100644 --- a/NEWS +++ b/NEWS @@ -24,6 +24,7 @@ PHP NEWS - Opcache: . Fixed bug GH-11134 (Incorrect match default branch optimization). (ilutov) . Fixed too wide OR and AND range inference. (nielsdos) + . Fixed missing class redeclaration error with OPcache enabled. (ilutov) - PCNTL: . Fixed maximum argument count of pcntl_forkx(). (nielsdos) diff --git a/Zend/tests/delayed_early_binding_redeclaration-1.inc b/Zend/tests/delayed_early_binding_redeclaration-1.inc new file mode 100644 index 0000000000000..abfccf90686e3 --- /dev/null +++ b/Zend/tests/delayed_early_binding_redeclaration-1.inc @@ -0,0 +1,2 @@ + +--EXPECTF-- +Fatal error: Cannot declare class Bar, because the name is already in use in %sdelayed_early_binding_redeclaration-2.inc on line %d diff --git a/ext/opcache/zend_accelerator_util_funcs.c b/ext/opcache/zend_accelerator_util_funcs.c index 9a64f92dab6ee..2ee60d4bbc493 100644 --- a/ext/opcache/zend_accelerator_util_funcs.c +++ b/ext/opcache/zend_accelerator_util_funcs.c @@ -358,9 +358,9 @@ static void zend_accel_do_delayed_early_binding( ce = zend_try_early_bind(orig_ce, parent_ce, early_binding->lcname, zv); } } - } - if (ce && early_binding->cache_slot != (uint32_t) -1) { - *(void**)((char*)run_time_cache + early_binding->cache_slot) = ce; + if (ce && early_binding->cache_slot != (uint32_t) -1) { + *(void**)((char*)run_time_cache + early_binding->cache_slot) = ce; + } } } CG(compiled_filename) = orig_compiled_filename; From 7b768485f3388e487c0887622092ce2df74fe1f9 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Fri, 24 Mar 2023 16:01:52 +0100 Subject: [PATCH 031/168] Fix GH-10834: exif_read_data() cannot read smaller stream wrapper chunk sizes php_stream_read() may return less than the requested amount of bytes by design. This patch introduces a static function for exif which reads from the stream in a loop until all the requested bytes are read. For the test: Co-authored-by: dotpointer Closes GH-10924. --- NEWS | 4 ++ ext/exif/exif.c | 43 +++++++++++++++----- ext/exif/tests/gh10834.phpt | 79 +++++++++++++++++++++++++++++++++++++ 3 files changed, 116 insertions(+), 10 deletions(-) create mode 100644 ext/exif/tests/gh10834.phpt diff --git a/NEWS b/NEWS index fcc2e3b58a6d5..a835815dd17bc 100644 --- a/NEWS +++ b/NEWS @@ -10,6 +10,10 @@ PHP NEWS . Fixed bug GH-11222 (foreach by-ref may jump over keys during a rehash). (Bob) +- Exif: + . Fixed bug GH-10834 (exif_read_data() cannot read smaller stream wrapper + chunk sizes). (nielsdos) + - Hash: . Fixed bug GH-11180 (hash_file() appears to be restricted to 3 arguments). (nielsdos) diff --git a/ext/exif/exif.c b/ext/exif/exif.c index 273149ccbb845..be77433e63e49 100644 --- a/ext/exif/exif.c +++ b/ext/exif/exif.c @@ -215,6 +215,25 @@ zend_module_entry exif_module_entry = { ZEND_GET_MODULE(exif) #endif +/* php_stream_read() may return early without reading all data, depending on the chunk size + * and whether it's a URL stream or not. This helper keeps reading until the requested amount + * is read or until there is no more data available to read. */ +static ssize_t exif_read_from_stream_file_looped(php_stream *stream, char *buf, size_t count) +{ + ssize_t total_read = 0; + while (total_read < count) { + ssize_t ret = php_stream_read(stream, buf + total_read, count - total_read); + if (ret == -1) { + return -1; + } + if (ret == 0) { + break; + } + total_read += ret; + } + return total_read; +} + /* {{{ php_strnlen * get length of string if buffer if less than buffer size or buffer size */ static size_t php_strnlen(char* str, size_t maxlen) { @@ -3321,7 +3340,7 @@ static bool exif_process_IFD_TAG_impl(image_info_type *ImageInfo, char *dir_entr exif_error_docref(NULL EXIFERR_CC, ImageInfo, E_WARNING, "Wrong file pointer: 0x%08X != 0x%08X", fgot, displacement+offset_val); return false; } - fgot = php_stream_read(ImageInfo->infile, value_ptr, byte_count); + fgot = exif_read_from_stream_file_looped(ImageInfo->infile, value_ptr, byte_count); php_stream_seek(ImageInfo->infile, fpos, SEEK_SET); if (fgot != byte_count) { EFREE_IF(outside); @@ -3854,7 +3873,7 @@ static bool exif_scan_JPEG_header(image_info_type *ImageInfo) Data[0] = (uchar)lh; Data[1] = (uchar)ll; - got = php_stream_read(ImageInfo->infile, (char*)(Data+2), itemlen-2); /* Read the whole section. */ + got = exif_read_from_stream_file_looped(ImageInfo->infile, (char*)(Data+2), itemlen-2); /* Read the whole section. */ if (got != itemlen-2) { exif_error_docref(NULL EXIFERR_CC, ImageInfo, E_WARNING, "Error reading from file: got=x%04X(=%d) != itemlen-2=x%04X(=%d)", got, got, itemlen-2, itemlen-2); return false; @@ -3872,7 +3891,7 @@ static bool exif_scan_JPEG_header(image_info_type *ImageInfo) size = ImageInfo->FileSize - fpos; sn = exif_file_sections_add(ImageInfo, M_PSEUDO, size, NULL); Data = ImageInfo->file.list[sn].data; - got = php_stream_read(ImageInfo->infile, (char*)Data, size); + got = exif_read_from_stream_file_looped(ImageInfo->infile, (char*)Data, size); if (got != size) { EXIF_ERRLOG_FILEEOF(ImageInfo) return false; @@ -4049,7 +4068,9 @@ static bool exif_process_IFD_in_TIFF_impl(image_info_type *ImageInfo, size_t dir exif_error_docref(NULL EXIFERR_CC, ImageInfo, E_NOTICE, "Read from TIFF: filesize(x%04X), IFD dir(x%04X + x%04X)", ImageInfo->FileSize, dir_offset, 2); #endif php_stream_seek(ImageInfo->infile, dir_offset, SEEK_SET); /* we do not know the order of sections */ - php_stream_read(ImageInfo->infile, (char*)ImageInfo->file.list[sn].data, 2); + if (UNEXPECTED(exif_read_from_stream_file_looped(ImageInfo->infile, (char*)ImageInfo->file.list[sn].data, 2) != 2)) { + return false; + } num_entries = php_ifd_get16u(ImageInfo->file.list[sn].data, ImageInfo->motorola_intel); dir_size = 2/*num dir entries*/ +12/*length of entry*/*(size_t)num_entries +4/* offset to next ifd (points to thumbnail or NULL)*/; if (ImageInfo->FileSize >= dir_size && ImageInfo->FileSize - dir_size >= dir_offset) { @@ -4059,7 +4080,9 @@ static bool exif_process_IFD_in_TIFF_impl(image_info_type *ImageInfo, size_t dir if (exif_file_sections_realloc(ImageInfo, sn, dir_size)) { return false; } - php_stream_read(ImageInfo->infile, (char*)(ImageInfo->file.list[sn].data+2), dir_size-2); + if (UNEXPECTED(exif_read_from_stream_file_looped(ImageInfo->infile, (char*)(ImageInfo->file.list[sn].data+2), dir_size-2) != dir_size - 2)) { + return false; + } next_offset = php_ifd_get32u(ImageInfo->file.list[sn].data + dir_size - 4, ImageInfo->motorola_intel); #ifdef EXIF_DEBUG exif_error_docref(NULL EXIFERR_CC, ImageInfo, E_NOTICE, "Read from TIFF done, next offset x%04X", next_offset); @@ -4147,7 +4170,7 @@ static bool exif_process_IFD_in_TIFF_impl(image_info_type *ImageInfo, size_t dir #ifdef EXIF_DEBUG exif_error_docref(NULL EXIFERR_CC, ImageInfo, E_NOTICE, "Read from TIFF: filesize(x%04X), IFD(x%04X + x%04X)", ImageInfo->FileSize, dir_offset, ifd_size); #endif - php_stream_read(ImageInfo->infile, (char*)(ImageInfo->file.list[sn].data+dir_size), ifd_size-dir_size); + exif_read_from_stream_file_looped(ImageInfo->infile, (char*)(ImageInfo->file.list[sn].data+dir_size), ifd_size-dir_size); #ifdef EXIF_DEBUG exif_error_docref(NULL EXIFERR_CC, ImageInfo, E_NOTICE, "Read from TIFF, done"); #endif @@ -4198,7 +4221,7 @@ static bool exif_process_IFD_in_TIFF_impl(image_info_type *ImageInfo, size_t dir if (!ImageInfo->Thumbnail.data) { ImageInfo->Thumbnail.data = safe_emalloc(ImageInfo->Thumbnail.size, 1, 0); php_stream_seek(ImageInfo->infile, ImageInfo->Thumbnail.offset, SEEK_SET); - fgot = php_stream_read(ImageInfo->infile, ImageInfo->Thumbnail.data, ImageInfo->Thumbnail.size); + fgot = exif_read_from_stream_file_looped(ImageInfo->infile, ImageInfo->Thumbnail.data, ImageInfo->Thumbnail.size); if (fgot != ImageInfo->Thumbnail.size) { EXIF_ERRLOG_THUMBEOF(ImageInfo) efree(ImageInfo->Thumbnail.data); @@ -4238,7 +4261,7 @@ static bool exif_process_IFD_in_TIFF_impl(image_info_type *ImageInfo, size_t dir if (!ImageInfo->Thumbnail.data && ImageInfo->Thumbnail.offset && ImageInfo->Thumbnail.size && ImageInfo->read_thumbnail) { ImageInfo->Thumbnail.data = safe_emalloc(ImageInfo->Thumbnail.size, 1, 0); php_stream_seek(ImageInfo->infile, ImageInfo->Thumbnail.offset, SEEK_SET); - fgot = php_stream_read(ImageInfo->infile, ImageInfo->Thumbnail.data, ImageInfo->Thumbnail.size); + fgot = exif_read_from_stream_file_looped(ImageInfo->infile, ImageInfo->Thumbnail.data, ImageInfo->Thumbnail.size); if (fgot != ImageInfo->Thumbnail.size) { EXIF_ERRLOG_THUMBEOF(ImageInfo) efree(ImageInfo->Thumbnail.data); @@ -4293,7 +4316,7 @@ static bool exif_scan_FILE_header(image_info_type *ImageInfo) if (ImageInfo->FileSize >= 2) { php_stream_seek(ImageInfo->infile, 0, SEEK_SET); - if (php_stream_read(ImageInfo->infile, (char*)file_header, 2) != 2) { + if (exif_read_from_stream_file_looped(ImageInfo->infile, (char*)file_header, 2) != 2) { return false; } if ((file_header[0]==0xff) && (file_header[1]==M_SOI)) { @@ -4304,7 +4327,7 @@ static bool exif_scan_FILE_header(image_info_type *ImageInfo) exif_error_docref(NULL EXIFERR_CC, ImageInfo, E_WARNING, "Invalid JPEG file"); } } else if (ImageInfo->FileSize >= 8) { - if (php_stream_read(ImageInfo->infile, (char*)(file_header+2), 6) != 6) { + if (exif_read_from_stream_file_looped(ImageInfo->infile, (char*)(file_header+2), 6) != 6) { return false; } if (!memcmp(file_header, "II\x2A\x00", 4)) { diff --git a/ext/exif/tests/gh10834.phpt b/ext/exif/tests/gh10834.phpt new file mode 100644 index 0000000000000..3c9caebdb70b6 --- /dev/null +++ b/ext/exif/tests/gh10834.phpt @@ -0,0 +1,79 @@ +--TEST-- +GH-10834 (exif_read_data() cannot read smaller stream wrapper chunk sizes) +--EXTENSIONS-- +exif +--FILE-- +position >= strlen($this->data); + } + + function stream_open($path, $mode, $options, &$opened_path) { + $this->position = 0; + $this->data = file_get_contents(__DIR__.'/bug50845.jpg'); + return true; + } + + function stream_seek($offset, $whence) { + switch ($whence) { + case SEEK_SET: + if ($offset < strlen($this->data) && $offset >= 0) { + $this->position = $offset; + return true; + } else { + return false; + } + break; + case SEEK_CUR: + if ($offset >= 0) { + $this->position += $offset; + return true; + } else { + return false; + } + break; + case SEEK_END: + if (strlen($this->data) + $offset >= 0) { + $this->position = strlen($this->data) + $offset; + return true; + } else { + return false; + } + break; + default: + return false; + } + } + + function stream_read($count) { + $ret = substr($this->data, $this->position, $count); + $this->position += strlen($ret); + return $ret; + } + + function stream_tell() { + return $this->position; + } +} + +stream_wrapper_register('var', 'VariableStream'); + +$fp = fopen('var://myvar', 'rb'); + +stream_set_chunk_size($fp, 10); +$headers = exif_read_data($fp); +var_dump(is_array($headers)); + +fclose($fp); +?> +--EXPECT-- +bool(true) From 102953735c13943694ece1698676426c88392104 Mon Sep 17 00:00:00 2001 From: Jakub Zelenka Date: Sat, 15 Apr 2023 16:07:09 +0100 Subject: [PATCH 032/168] Fix GH-10461: Postpone FPM child freeing in event loop This is to prevent after free accessing of the child event that might happen when child is killed and the message is delivered at that same time. Also fixes GH-10889 and properly fixes GH-8517 that was not previously fixed correctly. --- NEWS | 4 ++++ sapi/fpm/fpm/fpm_children.c | 25 ++++++++++++++++++++++++- sapi/fpm/fpm/fpm_children.h | 5 +++-- sapi/fpm/fpm/fpm_process_ctl.c | 2 +- sapi/fpm/fpm/fpm_stdio.c | 10 ++++------ 5 files changed, 36 insertions(+), 10 deletions(-) diff --git a/NEWS b/NEWS index a835815dd17bc..c01d4d36dfa76 100644 --- a/NEWS +++ b/NEWS @@ -14,6 +14,10 @@ PHP NEWS . Fixed bug GH-10834 (exif_read_data() cannot read smaller stream wrapper chunk sizes). (nielsdos) +- FPM: + . Fixed bug GH-10461 (PHP-FPM segfault due to after free usage of + child->ev_std(out|err)). (Jakub Zelenka) + - Hash: . Fixed bug GH-11180 (hash_file() appears to be restricted to 3 arguments). (nielsdos) diff --git a/sapi/fpm/fpm/fpm_children.c b/sapi/fpm/fpm/fpm_children.c index 2f8e3dc4d0acc..1c9780e3de3c1 100644 --- a/sapi/fpm/fpm/fpm_children.c +++ b/sapi/fpm/fpm/fpm_children.c @@ -63,10 +63,27 @@ static void fpm_child_free(struct fpm_child_s *child) /* {{{ */ } /* }}} */ +static void fpm_postponed_child_free(struct fpm_event_s *ev, short which, void *arg) +{ + struct fpm_child_s *child = (struct fpm_child_s *) arg; + + if (child->fd_stdout != -1) { + fpm_event_del(&child->ev_stdout); + close(child->fd_stdout); + } + if (child->fd_stderr != -1) { + fpm_event_del(&child->ev_stderr); + close(child->fd_stderr); + } + + fpm_child_free((struct fpm_child_s *) child); +} + static void fpm_child_close(struct fpm_child_s *child, int in_event_loop) /* {{{ */ { if (child->fd_stdout != -1) { if (in_event_loop) { + child->postponed_free = true; fpm_event_fire(&child->ev_stdout); } if (child->fd_stdout != -1) { @@ -76,6 +93,7 @@ static void fpm_child_close(struct fpm_child_s *child, int in_event_loop) /* {{{ if (child->fd_stderr != -1) { if (in_event_loop) { + child->postponed_free = true; fpm_event_fire(&child->ev_stderr); } if (child->fd_stderr != -1) { @@ -83,7 +101,12 @@ static void fpm_child_close(struct fpm_child_s *child, int in_event_loop) /* {{{ } } - fpm_child_free(child); + if (in_event_loop && child->postponed_free) { + fpm_event_set_timer(&child->ev_free, 0, &fpm_postponed_child_free, child); + fpm_event_add(&child->ev_free, 1000); + } else { + fpm_child_free(child); + } } /* }}} */ diff --git a/sapi/fpm/fpm/fpm_children.h b/sapi/fpm/fpm/fpm_children.h index 679c34ba0383e..fe06eb3ba84cd 100644 --- a/sapi/fpm/fpm/fpm_children.h +++ b/sapi/fpm/fpm/fpm_children.h @@ -23,12 +23,13 @@ struct fpm_child_s { struct fpm_child_s *prev, *next; struct timeval started; struct fpm_worker_pool_s *wp; - struct fpm_event_s ev_stdout, ev_stderr; + struct fpm_event_s ev_stdout, ev_stderr, ev_free; int shm_slot_i; int fd_stdout, fd_stderr; void (*tracer)(struct fpm_child_s *); struct timeval slow_logged; - int idle_kill; + bool idle_kill; + bool postponed_free; pid_t pid; int scoreboard_i; struct zlog_stream *log_stream; diff --git a/sapi/fpm/fpm/fpm_process_ctl.c b/sapi/fpm/fpm/fpm_process_ctl.c index 48eb0003d4918..7a55d98b046fc 100644 --- a/sapi/fpm/fpm/fpm_process_ctl.c +++ b/sapi/fpm/fpm/fpm_process_ctl.c @@ -318,7 +318,7 @@ static void fpm_pctl_kill_idle_child(struct fpm_child_s *child) /* {{{ */ if (child->idle_kill) { fpm_pctl_kill(child->pid, FPM_PCTL_KILL); } else { - child->idle_kill = 1; + child->idle_kill = true; fpm_pctl_kill(child->pid, FPM_PCTL_QUIT); } } diff --git a/sapi/fpm/fpm/fpm_stdio.c b/sapi/fpm/fpm/fpm_stdio.c index 78326924acd9b..8f71e8cbfcd08 100644 --- a/sapi/fpm/fpm/fpm_stdio.c +++ b/sapi/fpm/fpm/fpm_stdio.c @@ -181,10 +181,7 @@ static void fpm_stdio_child_said(struct fpm_event_s *ev, short which, void *arg) if (!arg) { return; } - child = fpm_child_find((intptr_t) arg); - if (!child) { - return; - } + child = (struct fpm_child_s *) arg; is_stdout = (fd == child->fd_stdout); if (is_stdout) { @@ -277,6 +274,7 @@ static void fpm_stdio_child_said(struct fpm_event_s *ev, short which, void *arg) fpm_event_del(event); + child->postponed_free = true; if (is_stdout) { close(child->fd_stdout); child->fd_stdout = -1; @@ -330,10 +328,10 @@ int fpm_stdio_parent_use_pipes(struct fpm_child_s *child) /* {{{ */ child->fd_stdout = fd_stdout[0]; child->fd_stderr = fd_stderr[0]; - fpm_event_set(&child->ev_stdout, child->fd_stdout, FPM_EV_READ, fpm_stdio_child_said, (void *) (intptr_t) child->pid); + fpm_event_set(&child->ev_stdout, child->fd_stdout, FPM_EV_READ, fpm_stdio_child_said, child); fpm_event_add(&child->ev_stdout, 0); - fpm_event_set(&child->ev_stderr, child->fd_stderr, FPM_EV_READ, fpm_stdio_child_said, (void *) (intptr_t) child->pid); + fpm_event_set(&child->ev_stderr, child->fd_stderr, FPM_EV_READ, fpm_stdio_child_said, child); fpm_event_add(&child->ev_stderr, 0); return 0; } From e8a836eb394c956658e4ec77d7322cd7956a9b53 Mon Sep 17 00:00:00 2001 From: Jakub Zelenka Date: Mon, 10 Apr 2023 14:13:34 +0100 Subject: [PATCH 033/168] Expose JSON internal function to escape string --- ext/json/json.c | 15 +++++++++++++++ ext/json/json_encoder.c | 6 +----- ext/json/php_json.h | 2 ++ ext/json/php_json_encoder.h | 2 ++ 4 files changed, 20 insertions(+), 5 deletions(-) diff --git a/ext/json/json.c b/ext/json/json.c index c8f0ed5e93a3d..13efeb901e322 100644 --- a/ext/json/json.c +++ b/ext/json/json.c @@ -142,6 +142,21 @@ static PHP_MINFO_FUNCTION(json) } /* }}} */ +PHP_JSON_API zend_string *php_json_encode_string(const char *s, size_t len, int options) +{ + smart_str buf = {0}; + php_json_encoder encoder; + + php_json_encode_init(&encoder); + + if (php_json_escape_string(&buf, s, len, options, &encoder) == FAILURE) { + smart_str_free(&buf); + return NULL; + } + + return smart_str_extract(&buf); +} + PHP_JSON_API int php_json_encode_ex(smart_str *buf, zval *val, int options, zend_long depth) /* {{{ */ { php_json_encoder encoder; diff --git a/ext/json/json_encoder.c b/ext/json/json_encoder.c index b0f703041b068..f3523ed3258b4 100644 --- a/ext/json/json_encoder.c +++ b/ext/json/json_encoder.c @@ -31,10 +31,6 @@ static const char digits[] = "0123456789abcdef"; -static int php_json_escape_string( - smart_str *buf, const char *s, size_t len, - int options, php_json_encoder *encoder); - static int php_json_determine_array_type(zval *val) /* {{{ */ { zend_array *myht = Z_ARRVAL_P(val); @@ -312,7 +308,7 @@ static int php_json_encode_array(smart_str *buf, zval *val, int options, php_jso } /* }}} */ -static int php_json_escape_string( +int php_json_escape_string( smart_str *buf, const char *s, size_t len, int options, php_json_encoder *encoder) /* {{{ */ { diff --git a/ext/json/php_json.h b/ext/json/php_json.h index 89d04ed7f4d57..f8905b10cbb80 100644 --- a/ext/json/php_json.h +++ b/ext/json/php_json.h @@ -97,6 +97,8 @@ PHP_JSON_API ZEND_EXTERN_MODULE_GLOBALS(json) ZEND_TSRMLS_CACHE_EXTERN() #endif +PHP_JSON_API zend_string *php_json_encode_string(const char *s, size_t len, int options); + PHP_JSON_API int php_json_encode_ex(smart_str *buf, zval *val, int options, zend_long depth); PHP_JSON_API int php_json_encode(smart_str *buf, zval *val, int options); PHP_JSON_API int php_json_decode_ex(zval *return_value, const char *str, size_t str_len, zend_long options, zend_long depth); diff --git a/ext/json/php_json_encoder.h b/ext/json/php_json_encoder.h index 51d2d6b59ab49..a1ddd3c349e63 100644 --- a/ext/json/php_json_encoder.h +++ b/ext/json/php_json_encoder.h @@ -35,4 +35,6 @@ static inline void php_json_encode_init(php_json_encoder *encoder) int php_json_encode_zval(smart_str *buf, zval *val, int options, php_json_encoder *encoder); +int php_json_escape_string(smart_str *buf, const char *s, size_t len, int options, php_json_encoder *encoder); + #endif /* PHP_JSON_ENCODER_H */ From 5e64ead64ab5eaba5d62847483c847c1836171d7 Mon Sep 17 00:00:00 2001 From: Jakub Zelenka Date: Mon, 10 Apr 2023 14:15:57 +0100 Subject: [PATCH 034/168] Fix bug #64539: FPM status - query_string not properly JSON encoded Closes GH-11050 --- NEWS | 2 + sapi/fpm/fpm/fpm_status.c | 34 ++++++++++--- .../tests/bug64539-status-json-encoding.phpt | 50 +++++++++++++++++++ sapi/fpm/tests/response.inc | 12 +++-- sapi/fpm/tests/tester.inc | 4 ++ 5 files changed, 90 insertions(+), 12 deletions(-) create mode 100644 sapi/fpm/tests/bug64539-status-json-encoding.phpt diff --git a/NEWS b/NEWS index c01d4d36dfa76..b5a60d5c12cb9 100644 --- a/NEWS +++ b/NEWS @@ -17,6 +17,8 @@ PHP NEWS - FPM: . Fixed bug GH-10461 (PHP-FPM segfault due to after free usage of child->ev_std(out|err)). (Jakub Zelenka) + . Fixed bug #64539 (FPM status page: query_string not properly JSON encoded). + (Jakub Zelenka) - Hash: . Fixed bug GH-11180 (hash_file() appears to be restricted to 3 arguments). diff --git a/sapi/fpm/fpm/fpm_status.c b/sapi/fpm/fpm/fpm_status.c index 514d60d176e39..f0d869444afce 100644 --- a/sapi/fpm/fpm/fpm_status.c +++ b/sapi/fpm/fpm/fpm_status.c @@ -13,7 +13,8 @@ #include "fpm_atomic.h" #include "fpm_conf.h" #include "fpm_php.h" -#include +#include "ext/standard/html.h" +#include "ext/json/php_json.h" static char *fpm_status_uri = NULL; static char *fpm_status_ping_uri = NULL; @@ -140,7 +141,8 @@ int fpm_status_handle_request(void) /* {{{ */ struct fpm_scoreboard_proc_s *proc; char *buffer, *time_format, time_buffer[64]; time_t now_epoch; - int full, encode, has_start_time; + int full, has_start_time; + bool encode_html, encode_json; char *short_syntax, *short_post; char *full_pre, *full_syntax, *full_post, *full_separator; zend_string *_GET_str; @@ -175,7 +177,8 @@ int fpm_status_handle_request(void) /* {{{ */ full = (fpm_php_get_string_from_table(_GET_str, "full") != NULL); short_syntax = short_post = NULL; full_separator = full_pre = full_syntax = full_post = NULL; - encode = 0; + encode_html = false; + encode_json = false; has_start_time = 1; scoreboard_p = fpm_scoreboard_get(); @@ -218,7 +221,7 @@ int fpm_status_handle_request(void) /* {{{ */ if (fpm_php_get_string_from_table(_GET_str, "html")) { sapi_add_header_ex(ZEND_STRL("Content-Type: text/html"), 1, 1); time_format = "%d/%b/%Y:%H:%M:%S %z"; - encode = 1; + encode_html = true; short_syntax = "\n" @@ -287,7 +290,7 @@ int fpm_status_handle_request(void) /* {{{ */ } else if (fpm_php_get_string_from_table(_GET_str, "xml")) { sapi_add_header_ex(ZEND_STRL("Content-Type: text/xml"), 1, 1); time_format = "%s"; - encode = 1; + encode_html = true; short_syntax = "\n" @@ -336,6 +339,8 @@ int fpm_status_handle_request(void) /* {{{ */ sapi_add_header_ex(ZEND_STRL("Content-Type: application/json"), 1, 1); time_format = "%s"; + encode_json = true; + short_syntax = "{" "\"pool\":\"%s\"," @@ -549,11 +554,24 @@ int fpm_status_handle_request(void) /* {{{ */ query_string = NULL; tmp_query_string = NULL; if (proc->query_string[0] != '\0') { - if (!encode) { - query_string = proc->query_string; + if (encode_html) { + tmp_query_string = php_escape_html_entities_ex( + (const unsigned char *) proc->query_string, + strlen(proc->query_string), 1, ENT_HTML_IGNORE_ERRORS & ENT_COMPAT, + NULL, /* double_encode */ 1, /* quiet */ 0); + } else if (encode_json) { + tmp_query_string = php_json_encode_string(proc->query_string, + strlen(proc->query_string), PHP_JSON_INVALID_UTF8_IGNORE); } else { - tmp_query_string = php_escape_html_entities_ex((const unsigned char *) proc->query_string, strlen(proc->query_string), 1, ENT_HTML_IGNORE_ERRORS & ENT_COMPAT, NULL, /* double_encode */ 1, /* quiet */ 0); + query_string = proc->query_string; + } + if (tmp_query_string) { query_string = ZSTR_VAL(tmp_query_string); + /* remove quotes around the string */ + if (encode_json && ZSTR_LEN(tmp_query_string) >= 2) { + query_string[ZSTR_LEN(tmp_query_string) - 1] = '\0'; + ++query_string; + } } } diff --git a/sapi/fpm/tests/bug64539-status-json-encoding.phpt b/sapi/fpm/tests/bug64539-status-json-encoding.phpt new file mode 100644 index 0000000000000..0d735925593a4 --- /dev/null +++ b/sapi/fpm/tests/bug64539-status-json-encoding.phpt @@ -0,0 +1,50 @@ +--TEST-- +FPM: bug64539 - status json format escaping +--SKIPIF-- + +--FILE-- +start(); +$tester->expectLogStartNotices(); +$responses = $tester + ->multiRequest([ + ['query' => 'a=b"c'], + ['uri' => '/status', 'query' => 'full&json', 'delay' => 100000], + ]); +$data = json_decode($responses[1]->getBody('application/json'), true); +var_dump(explode('?', $data['processes'][0]['request uri'])[1]); +$tester->terminate(); +$tester->expectLogTerminatingNotices(); +$tester->close(); + +?> +Done +--EXPECT-- +string(5) "a=b"c" +Done +--CLEAN-- + diff --git a/sapi/fpm/tests/response.inc b/sapi/fpm/tests/response.inc index 99290e72f41d4..c1c4566d4b12d 100644 --- a/sapi/fpm/tests/response.inc +++ b/sapi/fpm/tests/response.inc @@ -192,18 +192,22 @@ class Response /** * Print raw body. + * + * @param string $contentType Expect body to have specified content type. */ - public function dumpBody() + public function dumpBody(string $contentType = 'text/html') { - var_dump($this->getBody()); + var_dump($this->getBody($contentType)); } /** * Print raw body. + * + * @param string $contentType Expect body to have specified content type. */ - public function printBody() + public function printBody(string $contentType = 'text/html') { - echo $this->getBody() . "\n"; + echo $this->getBody($contentType) . "\n"; } /** diff --git a/sapi/fpm/tests/tester.inc b/sapi/fpm/tests/tester.inc index 0b6ad9d0f831b..ddab17162a227 100644 --- a/sapi/fpm/tests/tester.inc +++ b/sapi/fpm/tests/tester.inc @@ -795,6 +795,10 @@ class Tester $requestData['uri'] ?? null ); + if (isset($requestData['delay'])) { + usleep($requestData['delay']); + } + return [ 'client' => $client, 'requestId' => $client->async_request($params, false), From 4294e8d448c38fa379ebf0daf80aef2254b665bd Mon Sep 17 00:00:00 2001 From: Jakub Zelenka Date: Sun, 16 Apr 2023 15:54:45 +0100 Subject: [PATCH 035/168] FPM: Fix memory leak for invalid primary script file handle Closes GH-11088 --- NEWS | 1 + sapi/fpm/fpm/fpm_main.c | 19 ++++++++----------- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/NEWS b/NEWS index b5a60d5c12cb9..9c44373d21534 100644 --- a/NEWS +++ b/NEWS @@ -19,6 +19,7 @@ PHP NEWS child->ev_std(out|err)). (Jakub Zelenka) . Fixed bug #64539 (FPM status page: query_string not properly JSON encoded). (Jakub Zelenka) + . Fixed memory leak for invalid primary script file handle. (Jakub Zelenka) - Hash: . Fixed bug GH-11180 (hash_file() appears to be restricted to 3 arguments). diff --git a/sapi/fpm/fpm/fpm_main.c b/sapi/fpm/fpm/fpm_main.c index 64ef27dadeb39..b91bb8d055dff 100644 --- a/sapi/fpm/fpm/fpm_main.c +++ b/sapi/fpm/fpm/fpm_main.c @@ -1924,19 +1924,16 @@ consult the installation file that came with this distribution, or visit \n\ } } zend_catch { } zend_end_try(); - /* we want to serve more requests if this is fastcgi - * so cleanup and continue, request shutdown is - * handled later */ - - goto fastcgi_request_done; - } - - fpm_request_executing(); + /* We want to serve more requests if this is fastcgi so cleanup and continue, + * request shutdown is handled later. */ + } else { + fpm_request_executing(); - /* Reset exit status from the previous execution */ - EG(exit_status) = 0; + /* Reset exit status from the previous execution */ + EG(exit_status) = 0; - php_execute_script(&file_handle); + php_execute_script(&file_handle); + } /* Without opcache, or the first time with opcache, the file handle will be placed * in the CG(open_files) list by open_file_for_scanning(). Starting from the second From ac41608797d7d9258d3ccbf35a0ee55631d157ee Mon Sep 17 00:00:00 2001 From: Ilija Tovilo Date: Sun, 14 May 2023 22:10:23 +0200 Subject: [PATCH 036/168] Fix -Wenum-int-mismatch warning in ext/json/php_json_encoder.h --- ext/json/php_json_encoder.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ext/json/php_json_encoder.h b/ext/json/php_json_encoder.h index 3174e77958124..d66d71cb5538c 100644 --- a/ext/json/php_json_encoder.h +++ b/ext/json/php_json_encoder.h @@ -35,6 +35,6 @@ static inline void php_json_encode_init(php_json_encoder *encoder) zend_result php_json_encode_zval(smart_str *buf, zval *val, int options, php_json_encoder *encoder); -int php_json_escape_string(smart_str *buf, const char *s, size_t len, int options, php_json_encoder *encoder); +zend_result php_json_escape_string(smart_str *buf, const char *s, size_t len, int options, php_json_encoder *encoder); #endif /* PHP_JSON_ENCODER_H */ From 6bd546462c4b1addc5453ed963192ed36a75b728 Mon Sep 17 00:00:00 2001 From: Sara Date: Mon, 15 May 2023 15:28:43 +0800 Subject: [PATCH 037/168] Cacheline demote to improve performance (#11101) Once code is emitted to JIT buffer, hint the hardware to demote the corresponding cache lines to more distant level so other CPUs can access them more quickly. This gets nearly 1% performance gain on our workload. Signed-off-by: Xue,Wang Signed-off-by: Tao,Su Signed-off-by: Hu,chen --- Zend/zend_cpuinfo.h | 11 +++++++++++ ext/opcache/jit/zend_jit.c | 35 +++++++++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+) diff --git a/Zend/zend_cpuinfo.h b/Zend/zend_cpuinfo.h index 31e7c54e0b6f0..9d221c59e541a 100644 --- a/Zend/zend_cpuinfo.h +++ b/Zend/zend_cpuinfo.h @@ -258,4 +258,15 @@ static inline int zend_cpu_supports_pclmul(void) { } #endif +/* __builtin_cpu_supports has cldemote from gcc11 */ +#if PHP_HAVE_BUILTIN_CPU_SUPPORTS && defined(__GNUC__) && (ZEND_GCC_VERSION >= 11000) +ZEND_NO_SANITIZE_ADDRESS +static inline int zend_cpu_supports_cldemote(void) { +#if PHP_HAVE_BUILTIN_CPU_INIT + __builtin_cpu_init(); +#endif + return __builtin_cpu_supports("cldemote"); +} +#endif + #endif diff --git a/ext/opcache/jit/zend_jit.c b/ext/opcache/jit/zend_jit.c index e5a748d8355a8..874eff576b554 100644 --- a/ext/opcache/jit/zend_jit.c +++ b/ext/opcache/jit/zend_jit.c @@ -144,6 +144,31 @@ static zend_jit_trace_info *zend_jit_get_current_trace_info(void); static uint32_t zend_jit_trace_find_exit_point(const void* addr); #endif +#if ZEND_JIT_TARGET_X86 && defined(__linux__) +# if PHP_HAVE_BUILTIN_CPU_SUPPORTS && defined(__GNUC__) && (ZEND_GCC_VERSION >= 11000) +# define ZEND_JIT_SUPPORT_CLDEMOTE 1 +# else +# define ZEND_JIT_SUPPORT_CLDEMOTE 0 +# endif +#endif + +#if ZEND_JIT_SUPPORT_CLDEMOTE +#include +#pragma GCC push_options +#pragma GCC target("cldemote") +// check cldemote by CPUID when JIT startup +static int cpu_support_cldemote = 0; +static inline void shared_cacheline_demote(uintptr_t start, size_t size) { + uintptr_t cache_line_base = start & ~0x3F; + do { + _cldemote((void *)cache_line_base); + // next cacheline start size + cache_line_base += 64; + } while (cache_line_base < start + size); +} +#pragma GCC pop_options +#endif + static int zend_jit_assign_to_variable(dasm_State **Dst, const zend_op *opline, zend_jit_addr var_use_addr, @@ -973,6 +998,12 @@ static void *dasm_link_and_encode(dasm_State **dasm_state, /* flush the hardware I-cache */ JIT_CACHE_FLUSH(entry, entry + size); + /* hint to the hardware to push out the cache line that contains the linear address */ +#if ZEND_JIT_SUPPORT_CLDEMOTE + if (cpu_support_cldemote && JIT_G(trigger) == ZEND_JIT_ON_HOT_TRACE) { + shared_cacheline_demote((uintptr_t)entry, size); + } +#endif if (trace_num) { zend_jit_trace_add_code(entry, dasm_getpclabel(dasm_state, 1)); @@ -4902,6 +4933,10 @@ ZEND_EXT_API int zend_jit_startup(void *buf, size_t size, bool reattached) zend_jit_gdb_init(); #endif +#if ZEND_JIT_SUPPORT_CLDEMOTE + cpu_support_cldemote = zend_cpu_supports_cldemote(); +#endif + #ifdef HAVE_PTHREAD_JIT_WRITE_PROTECT_NP zend_write_protect = pthread_jit_write_protect_supported_np(); #endif From 0600f513b374969992f86591ad9834a32df319a8 Mon Sep 17 00:00:00 2001 From: Ilija Tovilo Date: Wed, 10 May 2023 20:38:33 +0200 Subject: [PATCH 038/168] Implement delayed early binding for classes without parents Normally, we add classes without parents (and no interfaces or traits) directly to the class map, early binding the class. However, if the same class has already been registered, we would instead just add a ZEND_DECLARE_CLASS instruction and let the handler throw a duplicate class declaration exception. However, with opcache, if on the next request the files are included in the opposite order, we won't perform early binding. To fix this, create a ZEND_DECLARE_CLASS_DELAYED instruction instead and handle classes without parents accordingly, skipping any linking for classes that are already linked in delayed early binding. Fixes GH-8846 --- NEWS | 2 ++ Zend/zend_compile.c | 11 ++++++- Zend/zend_inheritance.c | 11 ++++++- ext/opcache/tests/gh8846-1.inc | 4 +++ ext/opcache/tests/gh8846-2.inc | 5 +++ ext/opcache/tests/gh8846.phpt | 39 +++++++++++++++++++++++ ext/opcache/zend_accelerator_util_funcs.c | 7 ++-- 7 files changed, 74 insertions(+), 5 deletions(-) create mode 100644 ext/opcache/tests/gh8846-1.inc create mode 100644 ext/opcache/tests/gh8846-2.inc create mode 100644 ext/opcache/tests/gh8846.phpt diff --git a/NEWS b/NEWS index ce096488a9dd6..b2527affb28ba 100644 --- a/NEWS +++ b/NEWS @@ -35,6 +35,8 @@ PHP NEWS has inherited it from its parent). (ilutov) . Fix bug GH-11154 (Negative indices on empty array don't affect next chosen index). (ColinHDev) + . Fix bug GH-8846 (Implement delayed early binding for classes without + parents). (ilutov) - Date: . Implement More Appropriate Date/Time Exceptions RFC. (Derick) diff --git a/Zend/zend_compile.c b/Zend/zend_compile.c index 084c47f45bc47..04244b0de626a 100644 --- a/Zend/zend_compile.c +++ b/Zend/zend_compile.c @@ -8057,8 +8057,11 @@ static void zend_compile_class_decl(znode *result, zend_ast *ast, bool toplevel) ce->ce_flags |= ZEND_ACC_LINKED; zend_observer_class_linked_notify(ce, lcname); return; + } else { + goto link_unbound; } } else if (!extends_ast) { +link_unbound: /* Link unbound simple class */ zend_build_properties_info_table(ce); ce->ce_flags |= ZEND_ACC_LINKED; @@ -8098,11 +8101,17 @@ static void zend_compile_class_decl(znode *result, zend_ast *ast, bool toplevel) zend_add_literal_string(&key); opline->opcode = ZEND_DECLARE_CLASS; - if (extends_ast && toplevel + if (toplevel && (CG(compiler_options) & ZEND_COMPILE_DELAYED_BINDING) /* We currently don't early-bind classes that implement interfaces or use traits */ && !ce->num_interfaces && !ce->num_traits ) { + if (!extends_ast) { + /* Use empty string for classes without parents to avoid new handler, and special + * handling of zend_early_binding. */ + opline->op2_type = IS_CONST; + LITERAL_STR(opline->op2, ZSTR_EMPTY_ALLOC()); + } CG(active_op_array)->fn_flags |= ZEND_ACC_EARLY_BINDING; opline->opcode = ZEND_DECLARE_CLASS_DELAYED; opline->extended_value = zend_alloc_cache_slot(); diff --git a/Zend/zend_inheritance.c b/Zend/zend_inheritance.c index ff3a4d7080751..17bbae8335445 100644 --- a/Zend/zend_inheritance.c +++ b/Zend/zend_inheritance.c @@ -3276,8 +3276,17 @@ ZEND_API zend_class_entry *zend_try_early_bind(zend_class_entry *ce, zend_class_ inheritance_status status; zend_class_entry *proto = NULL; zend_class_entry *orig_linking_class; - uint32_t is_cacheable = ce->ce_flags & ZEND_ACC_IMMUTABLE; + if (ce->ce_flags & ZEND_ACC_LINKED) { + ZEND_ASSERT(ce->parent == NULL); + if (UNEXPECTED(!register_early_bound_ce(delayed_early_binding, lcname, ce))) { + return NULL; + } + zend_observer_class_linked_notify(ce, lcname); + return ce; + } + + uint32_t is_cacheable = ce->ce_flags & ZEND_ACC_IMMUTABLE; UPDATE_IS_CACHEABLE(parent_ce); if (is_cacheable) { if (zend_inheritance_cache_get && zend_inheritance_cache_add) { diff --git a/ext/opcache/tests/gh8846-1.inc b/ext/opcache/tests/gh8846-1.inc new file mode 100644 index 0000000000000..6169e1cfea4a9 --- /dev/null +++ b/ext/opcache/tests/gh8846-1.inc @@ -0,0 +1,4 @@ + +--CLEAN-- + +--EXPECTF-- +bool(true) +
+Fatal error: Cannot declare class Foo, because the name is already in use in %sgh8846-2.inc on line %d
+ +bool(true) +Ok diff --git a/ext/opcache/zend_accelerator_util_funcs.c b/ext/opcache/zend_accelerator_util_funcs.c index 1a67e2b6e661b..e021270eff3d3 100644 --- a/ext/opcache/zend_accelerator_util_funcs.c +++ b/ext/opcache/zend_accelerator_util_funcs.c @@ -357,9 +357,10 @@ static void zend_accel_do_delayed_early_binding( zval *zv = zend_hash_find_known_hash(EG(class_table), early_binding->rtd_key); if (zv) { zend_class_entry *orig_ce = Z_CE_P(zv); - zend_class_entry *parent_ce = - zend_hash_find_ex_ptr(EG(class_table), early_binding->lc_parent_name, 1); - if (parent_ce) { + zend_class_entry *parent_ce = !(orig_ce->ce_flags & ZEND_ACC_LINKED) + ? zend_hash_find_ex_ptr(EG(class_table), early_binding->lc_parent_name, 1) + : NULL; + if (parent_ce || (orig_ce->ce_flags & ZEND_ACC_LINKED)) { ce = zend_try_early_bind(orig_ce, parent_ce, early_binding->lcname, zv); } } From aa553af911ba6b8886849df5101dc90a9a4b24bc Mon Sep 17 00:00:00 2001 From: Ilija Tovilo Date: Wed, 10 May 2023 14:12:27 +0200 Subject: [PATCH 039/168] Fix segfault in mb_strrpos/mb_strripos with ASCII encoding and negative offset We're setting the encoding from PHP_FUNCTION(mb_strpos), but mbfl_strpos would discard it, setting it to mbfl_encoding_pass, making zend_memnrstr fail due to a null-pointer exception. Fixes GH-11217 Closes GH-11220 --- NEWS | 4 ++++ ext/mbstring/libmbfl/mbfl/mbfilter.c | 4 ++-- ext/mbstring/tests/gh11217.phpt | 12 ++++++++++++ 3 files changed, 18 insertions(+), 2 deletions(-) create mode 100644 ext/mbstring/tests/gh11217.phpt diff --git a/NEWS b/NEWS index 94083381373fe..3d5888b896227 100644 --- a/NEWS +++ b/NEWS @@ -32,6 +32,10 @@ PHP NEWS . Fixed bug GH-11160 (Few tests failed building with new libxml 2.11.0). (nielsdos) +- MBString: + . Fix bug GH-11217 (Segfault in mb_strrpos / mb_strripos when using negative + offset and ASCII encoding). (ilutov) + - Opcache: . Fixed bug GH-11134 (Incorrect match default branch optimization). (ilutov) . Fixed too wide OR and AND range inference. (nielsdos) diff --git a/ext/mbstring/libmbfl/mbfl/mbfilter.c b/ext/mbstring/libmbfl/mbfl/mbfilter.c index 5f5ce07ce6f66..a517c12c72e02 100644 --- a/ext/mbstring/libmbfl/mbfl/mbfilter.c +++ b/ext/mbstring/libmbfl/mbfl/mbfilter.c @@ -594,7 +594,7 @@ mbfl_strpos( const unsigned char *offset_pointer; if (haystack->encoding->no_encoding != mbfl_no_encoding_utf8) { - mbfl_string_init(&_haystack_u8); + mbfl_string_init_set(&_haystack_u8, haystack->encoding); haystack_u8 = mbfl_convert_encoding(haystack, &_haystack_u8, &mbfl_encoding_utf8); if (haystack_u8 == NULL) { result = MBFL_ERROR_ENCODING; @@ -605,7 +605,7 @@ mbfl_strpos( } if (needle->encoding->no_encoding != mbfl_no_encoding_utf8) { - mbfl_string_init(&_needle_u8); + mbfl_string_init_set(&_needle_u8, needle->encoding); needle_u8 = mbfl_convert_encoding(needle, &_needle_u8, &mbfl_encoding_utf8); if (needle_u8 == NULL) { result = MBFL_ERROR_ENCODING; diff --git a/ext/mbstring/tests/gh11217.phpt b/ext/mbstring/tests/gh11217.phpt new file mode 100644 index 0000000000000..d500f22cbd7bb --- /dev/null +++ b/ext/mbstring/tests/gh11217.phpt @@ -0,0 +1,12 @@ +--TEST-- +GH-11217: Segfault in mb_strrpos/mb_strripos with ASCII encoding and negative offset +--EXTENSIONS-- +mbstring +--FILE-- + +--EXPECT-- +int(0) +int(0) From c5a623ba5eacad833c12ec6ce88f5daeeed5a8a6 Mon Sep 17 00:00:00 2001 From: Randy Geraads Date: Wed, 10 May 2023 13:25:39 +0200 Subject: [PATCH 040/168] Added negative offset test for mb_strrpos Should expose https://github.com/php/php-src/issues/11217 --- ext/mbstring/tests/mb_strrpos_basic.phpt | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/ext/mbstring/tests/mb_strrpos_basic.phpt b/ext/mbstring/tests/mb_strrpos_basic.phpt index 28e038da406bc..599dfd38da12a 100644 --- a/ext/mbstring/tests/mb_strrpos_basic.phpt +++ b/ext/mbstring/tests/mb_strrpos_basic.phpt @@ -22,6 +22,9 @@ var_dump(mb_strrpos($string_ascii, 'is', 4, 'ISO-8859-1')); echo "\n-- ASCII string 2 --\n"; var_dump(mb_strrpos($string_ascii, 'hello, world')); +echo "\n-- ASCII string with negative offset --\n"; +var_dump(mb_strrpos($string_ascii, 'hello', -1, 'ISO-8859-1')); + echo "\n-- Multibyte string 1 --\n"; $needle1 = base64_decode('44CC'); var_dump(mb_strrpos($string_mb, $needle1)); @@ -41,6 +44,9 @@ int(15) -- ASCII string 2 -- bool(false) +-- ASCII string with negative offset -- +bool(false) + -- Multibyte string 1 -- int(20) From aae39fe5a7c873798714402de0bd278062462e06 Mon Sep 17 00:00:00 2001 From: Peter Kokot Date: Mon, 15 May 2023 17:54:40 +0200 Subject: [PATCH 041/168] Fix #9483: Fix autoconf warnings due to old libtool (#11207) --- build/libtool.m4 | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/build/libtool.m4 b/build/libtool.m4 index 91a1f9022f657..8ee7b453010f1 100644 --- a/build/libtool.m4 +++ b/build/libtool.m4 @@ -271,7 +271,7 @@ $rm -r conftest* dnl autoconf 2.13 compatibility dnl _LT_AC_TRY_LINK() -AC_DEFUN(_LT_AC_TRY_LINK, [ +AC_DEFUN([_LT_AC_TRY_LINK], [ cat > conftest.$ac_ext </dev/null && hard_links=no AC_MSG_RESULT([$hard_links]) if test "$hard_links" = no; then - AC_MSG_WARN([\`$CC' does not support \`-c -o', so \`make -j' may be unsafe]) + AC_MSG_WARN(['$CC' does not support '-c -o', so 'make -j' may be unsafe]) need_locks=warn fi else @@ -1935,15 +1934,15 @@ AC_ARG_WITH([tags], if test -f "$ltmain" && test -n "$tagnames"; then if test ! -f "${ofile}"; then - AC_MSG_WARN([output file \`$ofile' does not exist]) + AC_MSG_WARN([output file '$ofile' does not exist]) fi if test -z "$LTCC"; then eval "`$SHELL ${ofile} --config | grep '^LTCC='`" if test -z "$LTCC"; then - AC_MSG_WARN([output file \`$ofile' does not look like a libtool script]) + AC_MSG_WARN([output file '$ofile' does not look like a libtool script]) else - AC_MSG_WARN([using \`LTCC=$LTCC', extracted from \`$ofile']) + AC_MSG_WARN([using 'LTCC=$LTCC', extracted from '$ofile']) fi fi if test -z "$LTCFLAGS"; then @@ -1966,7 +1965,7 @@ if test -f "$ltmain" && test -n "$tagnames"; then if grep "^# ### BEGIN LIBTOOL TAG CONFIG: $tagname$" < "${ofile}" > /dev/null then - AC_MSG_ERROR([tag name \"$tagname\" already exists]) + AC_MSG_ERROR([tag name "$tagname" already exists]) fi # Update the list of available tags. @@ -2738,8 +2737,7 @@ fi AC_DEFUN([AC_LIBTOOL_LANG_C_CONFIG], [_LT_AC_LANG_C_CONFIG]) AC_DEFUN([_LT_AC_LANG_C_CONFIG], [lt_save_CC="$CC" -AC_LANG_SAVE -AC_LANG_C +AC_LANG_PUSH([C]) # Source file extension for C test sources. ac_ext=c @@ -2807,7 +2805,7 @@ AC_MSG_RESULT([$enable_static]) AC_LIBTOOL_CONFIG($1) -AC_LANG_RESTORE +AC_LANG_POP([C]) CC="$lt_save_CC" ])# AC_LIBTOOL_LANG_C_CONFIG @@ -2819,8 +2817,7 @@ CC="$lt_save_CC" # AC_LIBTOOL_CONFIG to write the compiler configuration to `libtool'. AC_DEFUN([AC_LIBTOOL_LANG_CXX_CONFIG], [_LT_AC_LANG_CXX_CONFIG(CXX)]) AC_DEFUN([_LT_AC_LANG_CXX_CONFIG], -[AC_LANG_SAVE -AC_LANG_CPLUSPLUS +[AC_LANG_PUSH([C++]) AC_REQUIRE([AC_PROG_CXX]) AC_REQUIRE([_LT_AC_PROG_CXXCPP]) @@ -3806,7 +3803,7 @@ AC_LIBTOOL_PROG_LD_HARDCODE_LIBPATH($1) AC_LIBTOOL_CONFIG($1) -AC_LANG_RESTORE +AC_LANG_POP([C++]) CC=$lt_save_CC LDCXX=$LD LD=$lt_save_LD From e35cd34bcdd680d87a370502771154627c95c285 Mon Sep 17 00:00:00 2001 From: George Peter Banyard Date: Tue, 16 May 2023 11:22:10 +0100 Subject: [PATCH 042/168] Fix assertion warning message when no description is provided --- ext/standard/assert.c | 2 +- ext/standard/tests/assert/assert_warnings.phpt | 13 +++++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) create mode 100644 ext/standard/tests/assert/assert_warnings.phpt diff --git a/ext/standard/assert.c b/ext/standard/assert.c index 74467eb433f03..316e2a515d8af 100644 --- a/ext/standard/assert.c +++ b/ext/standard/assert.c @@ -177,7 +177,7 @@ PHP_FUNCTION(assert) zend_exception_error(EG(exception), E_ERROR); } } else if (ASSERTG(warning)) { - php_error_docref(NULL, E_WARNING, "%s failed", description_str ? ZSTR_VAL(description_str) : "Assertion failed"); + php_error_docref(NULL, E_WARNING, "%s failed", description_str ? ZSTR_VAL(description_str) : "Assertion"); } if (ASSERTG(bail)) { diff --git a/ext/standard/tests/assert/assert_warnings.phpt b/ext/standard/tests/assert/assert_warnings.phpt new file mode 100644 index 0000000000000..804575dbc1daf --- /dev/null +++ b/ext/standard/tests/assert/assert_warnings.phpt @@ -0,0 +1,13 @@ +--TEST-- +assert(): warnings with no descriptions. +--INI-- +assert.active = 1 +assert.warning = 1 +assert.bail = 0 +assert.exception=0 +--FILE-- + +--EXPECTF-- +Warning: assert(): Assertion failed in %s on line %d From 80c8ca9c8f582cc510b1734e261d12fd6fdd820a Mon Sep 17 00:00:00 2001 From: George Peter Banyard Date: Tue, 16 May 2023 11:34:41 +0100 Subject: [PATCH 043/168] Use uint32_t for variable storing ZEND_NUM_ARGS() --- ext/standard/assert.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ext/standard/assert.c b/ext/standard/assert.c index 316e2a515d8af..e9a3ef3158f25 100644 --- a/ext/standard/assert.c +++ b/ext/standard/assert.c @@ -195,7 +195,7 @@ PHP_FUNCTION(assert_options) zval *value = NULL; zend_long what; bool oldint; - int ac = ZEND_NUM_ARGS(); + uint32_t ac = ZEND_NUM_ARGS(); zend_string *key; ZEND_PARSE_PARAMETERS_START(1, 2) From 97e29bed9e598263eb44170b36ffa0ff1c4302b8 Mon Sep 17 00:00:00 2001 From: Alex Dowad Date: Sat, 22 Apr 2023 17:47:22 +0200 Subject: [PATCH 044/168] Use shared, immutable array for return value of mb_list_encodings This will allow us to easily check in other mbstring functions if the list of all supported encodings, returned by mb_list_encodings, is passed in as input to another function. Co-authored-by: Ilija Tovilo --- Zend/Optimizer/zend_func_infos.h | 2 +- ext/mbstring/mbstring.c | 26 +++++++++++++++++++++++--- ext/mbstring/mbstring.h | 1 + ext/mbstring/mbstring.stub.php | 1 - ext/mbstring/mbstring_arginfo.h | 2 +- 5 files changed, 26 insertions(+), 6 deletions(-) diff --git a/Zend/Optimizer/zend_func_infos.h b/Zend/Optimizer/zend_func_infos.h index a3475fab6cc8f..34b8b9c4cbbf6 100644 --- a/Zend/Optimizer/zend_func_infos.h +++ b/Zend/Optimizer/zend_func_infos.h @@ -227,7 +227,7 @@ static const func_info_t func_infos[] = { F1("mb_strtoupper", MAY_BE_STRING), F1("mb_strtolower", MAY_BE_STRING), F1("mb_detect_encoding", MAY_BE_STRING|MAY_BE_FALSE), - F1("mb_list_encodings", MAY_BE_ARRAY|MAY_BE_ARRAY_KEY_LONG|MAY_BE_ARRAY_OF_STRING), + FN("mb_list_encodings", MAY_BE_ARRAY|MAY_BE_ARRAY_KEY_LONG|MAY_BE_ARRAY_OF_STRING), F1("mb_encoding_aliases", MAY_BE_ARRAY|MAY_BE_ARRAY_KEY_LONG|MAY_BE_ARRAY_OF_STRING), F1("mb_encode_mimeheader", MAY_BE_STRING), F1("mb_decode_mimeheader", MAY_BE_STRING), diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c index 73dea4e5c6b81..76977ec7bc494 100644 --- a/ext/mbstring/mbstring.c +++ b/ext/mbstring/mbstring.c @@ -1016,6 +1016,7 @@ ZEND_TSRMLS_CACHE_UPDATE(); mbstring_globals->internal_encoding_set = 0; mbstring_globals->http_output_set = 0; mbstring_globals->http_input_set = 0; + mbstring_globals->all_encodings_list = NULL; } /* }}} */ @@ -1156,6 +1157,13 @@ PHP_RSHUTDOWN_FUNCTION(mbstring) MBSTRG(outconv_enabled) = false; MBSTRG(outconv_state) = 0; + if (MBSTRG(all_encodings_list)) { + GC_DELREF(MBSTRG(all_encodings_list)); + zend_hash_destroy(MBSTRG(all_encodings_list)); + efree(MBSTRG(all_encodings_list)); + MBSTRG(all_encodings_list) = NULL; + } + #ifdef HAVE_MBREGEX PHP_RSHUTDOWN(mb_regex) (INIT_FUNC_ARGS_PASSTHRU); #endif @@ -3205,10 +3213,22 @@ PHP_FUNCTION(mb_list_encodings) { ZEND_PARSE_PARAMETERS_NONE(); - array_init(return_value); - for (const mbfl_encoding **encodings = mbfl_get_supported_encodings(); *encodings; encodings++) { - add_next_index_string(return_value, (*encodings)->name); + if (MBSTRG(all_encodings_list) == NULL) { + /* Initialize shared array of supported encoding names + * This is done so that we can check if `mb_list_encodings()` is being + * passed to other mbstring functions using a cheap pointer equality check */ + HashTable *array = emalloc(sizeof(HashTable)); + zend_hash_init(array, 80, NULL, zval_ptr_dtor_str, false); + for (const mbfl_encoding **encodings = mbfl_get_supported_encodings(); *encodings; encodings++) { + zval tmp; + ZVAL_STRING(&tmp, (*encodings)->name); + zend_hash_next_index_insert(array, &tmp); + } + MBSTRG(all_encodings_list) = array; } + + GC_ADDREF(MBSTRG(all_encodings_list)); + RETURN_ARR(MBSTRG(all_encodings_list)); } /* }}} */ diff --git a/ext/mbstring/mbstring.h b/ext/mbstring/mbstring.h index 0837e45cf327a..7cd58a0277310 100644 --- a/ext/mbstring/mbstring.h +++ b/ext/mbstring/mbstring.h @@ -88,6 +88,7 @@ ZEND_BEGIN_MODULE_GLOBALS(mbstring) size_t current_detect_order_list_size; enum mbfl_no_encoding *default_detect_order_list; size_t default_detect_order_list_size; + HashTable *all_encodings_list; int filter_illegal_mode; uint32_t filter_illegal_substchar; int current_filter_illegal_mode; diff --git a/ext/mbstring/mbstring.stub.php b/ext/mbstring/mbstring.stub.php index 0d04e34bc7823..add0a35e05b80 100644 --- a/ext/mbstring/mbstring.stub.php +++ b/ext/mbstring/mbstring.stub.php @@ -140,7 +140,6 @@ function mb_detect_encoding(string $string, array|string|null $encodings = null, /** * @return array - * @refcount 1 */ function mb_list_encodings(): array {} diff --git a/ext/mbstring/mbstring_arginfo.h b/ext/mbstring/mbstring_arginfo.h index 3bf10789e0d6c..e8985793e380f 100644 --- a/ext/mbstring/mbstring_arginfo.h +++ b/ext/mbstring/mbstring_arginfo.h @@ -1,5 +1,5 @@ /* This is a generated file, edit the .stub.php file instead. - * Stub hash: 0c9ac8888b8332557f7098cfb9d259757af8b3c6 */ + * Stub hash: 26a027093075613056921c4d1a7eee65d52ec5eb */ ZEND_BEGIN_ARG_WITH_RETURN_TYPE_MASK_EX(arginfo_mb_language, 0, 0, MAY_BE_STRING|MAY_BE_BOOL) ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, language, IS_STRING, 1, "null") From 3ab10da758f01cdc53839307ac943ea09cb16a58 Mon Sep 17 00:00:00 2001 From: Alex Dowad Date: Sun, 30 Apr 2023 14:44:00 +0200 Subject: [PATCH 045/168] Take order of candidate encodings into account when guessing text encoding MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The documentation for mb_detect_encoding says that this function "Detects the most likely character encoding for string `string` from an ordered list of candidates". Prior to 28b346bc06, mb_detect_encoding did not really attempt to determine the "most likely" text encoding for the input string. It would just return the first candidate encoding for which the string was valid. In 28b346bc06, I amended this function so that it uses heuristics to try to guess which candidate encoding is "most likely". However, the caller did not have any way to indicate which candidate text encoding(s) they consider to be more likely, in case the heuristics applied are inconclusive. In the language of Bayesian probability, there was no way for the caller to indicate their 'prior' assignment of probabilities. Further, the documentation for mb_detect_encoding also says that the second parameter `encodings` is "a list of character encodings to try, in order". The documentation clearly implies that the order of the `encodings` argument should be significant. Therefore, amend mb_detect_encoding so that while it still uses heuristics to guess the most likely text encoding for the input string, it favors those which are earlier in the list of candidate encodings. One complication is that many callers of mb_detect_encoding use it in this way: mb_detect_encoding($string, mb_list_encodings()); In a majority of cases, this is bad code; mb_detect_encoding will both be much slower and the results will be less reliable than if a smaller list of candidates is used. However, since such code already exists and people are using it in production, we should not unnecessarily break it. The order of candidate encodings obviously does not express any prior belief of which candidates are more likely in this case, and treating it as if it did will degrade the accuracy of the result. Since mb_list_encodings now returns a single, immutable array on each call, we can avoid that problem by turning off the new behavior when we receive the array of encodings returned by mb_list_encodings. This implementation means that if the user does this: $a = mb_list_encodings(); mb_detect_encoding($string, $a); ...then the order of candidate encodings will not be considered. However, if the user explicitly initializes their own array of all supported legacy text encodings, then the order *will* be considered. The other functions which also follow this new behavior are: • mb_convert_variables • mb_convert_encoding (when multiple candidate input encodings are listed) Other places where "detection" (or really "guessing") of text encoding may be performed include: • mb_send_mail • Zend engine, when determining the encoding of a PHP script • mbstring processing of HTTP request contents, when http_input INI parameter is set to a list In these cases, the new logic based on order of candidate encodings is *not* enabled. It *might* be logical to consider the order of candidate encodings in some or all of these cases, but I'm not sure if that is true, so it seems wiser to avoid more behavior changes than is necessary. Further, ever since the new encoding detection heuristics were implemented in 28b346bc06, we have not received any complaints of user code being broken in these areas. So I am reluctant to "fix what isn't broken". Well, some might say that applying the new detection heuristics to mb_send_mail, etc. in 28b346bc06 was "fixing what wasn't broken", but (cough cough) I don't have any comment on that... --- ext/mbstring/mb_gpc.c | 2 +- ext/mbstring/mbstring.c | 49 ++++++++++++++++------ ext/mbstring/mbstring.h | 2 +- ext/mbstring/tests/mb_detect_encoding.phpt | 13 ++++++ 4 files changed, 52 insertions(+), 14 deletions(-) diff --git a/ext/mbstring/mb_gpc.c b/ext/mbstring/mb_gpc.c index 1ffe0af56bc7e..33fbd32edc663 100644 --- a/ext/mbstring/mb_gpc.c +++ b/ext/mbstring/mb_gpc.c @@ -234,7 +234,7 @@ const mbfl_encoding *_php_mb_encoding_handler_ex(const php_mb_encoding_handler_i } else if (info->num_from_encodings == 1) { from_encoding = info->from_encodings[0]; } else { - from_encoding = mb_guess_encoding_for_strings((const unsigned char**)val_list, len_list, num, info->from_encodings, info->num_from_encodings, MBSTRG(strict_detection)); + from_encoding = mb_guess_encoding_for_strings((const unsigned char**)val_list, len_list, num, info->from_encodings, info->num_from_encodings, MBSTRG(strict_detection), false); if (!from_encoding) { if (info->report_errors) { php_error_docref(NULL, E_WARNING, "Unable to detect encoding"); diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c index 76977ec7bc494..17140747776eb 100644 --- a/ext/mbstring/mbstring.c +++ b/ext/mbstring/mbstring.c @@ -90,7 +90,7 @@ static inline bool php_mb_is_no_encoding_utf8(enum mbfl_no_encoding no_enc); static bool mb_check_str_encoding(zend_string *str, const mbfl_encoding *encoding); -static const mbfl_encoding* mb_guess_encoding(unsigned char *in, size_t in_len, const mbfl_encoding **elist, unsigned int elist_size, bool strict); +static const mbfl_encoding* mb_guess_encoding(unsigned char *in, size_t in_len, const mbfl_encoding **elist, unsigned int elist_size, bool strict, bool order_significant); static zend_string* mb_mime_header_encode(zend_string *input, const mbfl_encoding *incode, const mbfl_encoding *outcode, bool base64, char *linefeed, size_t linefeed_len, zend_long indent); @@ -452,7 +452,7 @@ static const zend_encoding *php_mb_zend_encoding_detector(const unsigned char *a list_size = MBSTRG(current_detect_order_list_size); } - return (const zend_encoding*)mb_guess_encoding((unsigned char*)arg_string, arg_length, (const mbfl_encoding **)list, list_size, false); + return (const zend_encoding*)mb_guess_encoding((unsigned char*)arg_string, arg_length, (const mbfl_encoding **)list, list_size, false, false); } static size_t php_mb_zend_encoding_converter(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length, const zend_encoding *encoding_to, const zend_encoding *encoding_from) @@ -2695,7 +2695,7 @@ MBSTRING_API zend_string* php_mb_convert_encoding(const char *input, size_t leng from_encoding = *from_encodings; } else { /* auto detect */ - from_encoding = mb_guess_encoding((unsigned char*)input, length, from_encodings, num_from_encodings, MBSTRG(strict_detection)); + from_encoding = mb_guess_encoding((unsigned char*)input, length, from_encodings, num_from_encodings, MBSTRG(strict_detection), true); if (!from_encoding) { php_error_docref(NULL, E_WARNING, "Unable to detect character encoding"); return NULL; @@ -2996,9 +2996,10 @@ struct candidate { size_t in_len; uint64_t demerits; /* Wide bit size to prevent overflow */ unsigned int state; + float multiplier; }; -static size_t init_candidate_array(struct candidate *array, size_t length, const mbfl_encoding **encodings, const unsigned char **in, size_t *in_len, size_t n, bool strict) +static size_t init_candidate_array(struct candidate *array, size_t length, const mbfl_encoding **encodings, const unsigned char **in, size_t *in_len, size_t n, bool strict, bool order_significant) { size_t j = 0; @@ -3018,6 +3019,10 @@ static size_t init_candidate_array(struct candidate *array, size_t length, const array[j].enc = enc; array[j].state = 0; array[j].demerits = 0; + /* This multiplier can optionally be used to make candidate encodings listed + * first more likely to be chosen. It is a weight factor which multiplies + * the number of demerits counted for each candidate. */ + array[j].multiplier = order_significant ? 1.0 + ((0.3 * i) / length) : 1.0; j++; skip_to_next: ; } @@ -3093,10 +3098,14 @@ static size_t count_demerits(struct candidate *array, size_t length, bool strict } } + for (size_t i = 0; i < length; i++) { + array[i].demerits *= array[i].multiplier; + } + return length; } -MBSTRING_API const mbfl_encoding* mb_guess_encoding_for_strings(const unsigned char **strings, size_t *str_lengths, size_t n, const mbfl_encoding **elist, unsigned int elist_size, bool strict) +MBSTRING_API const mbfl_encoding* mb_guess_encoding_for_strings(const unsigned char **strings, size_t *str_lengths, size_t n, const mbfl_encoding **elist, unsigned int elist_size, bool strict, bool order_significant) { if (elist_size == 0) { return NULL; @@ -3117,7 +3126,7 @@ MBSTRING_API const mbfl_encoding* mb_guess_encoding_for_strings(const unsigned c /* Allocate on stack; when we return, this array is automatically freed */ struct candidate *array = alloca(elist_size * sizeof(struct candidate)); - elist_size = init_candidate_array(array, elist_size, elist, strings, str_lengths, n, strict); + elist_size = init_candidate_array(array, elist_size, elist, strings, str_lengths, n, strict, order_significant); while (n--) { start_string(array, elist_size, strings[n], str_lengths[n]); @@ -3141,9 +3150,9 @@ MBSTRING_API const mbfl_encoding* mb_guess_encoding_for_strings(const unsigned c /* When doing 'strict' detection, any string which is invalid in the candidate encoding * is rejected. With non-strict detection, we just continue, but apply demerits for * each invalid byte sequence */ -static const mbfl_encoding* mb_guess_encoding(unsigned char *in, size_t in_len, const mbfl_encoding **elist, unsigned int elist_size, bool strict) +static const mbfl_encoding* mb_guess_encoding(unsigned char *in, size_t in_len, const mbfl_encoding **elist, unsigned int elist_size, bool strict, bool order_significant) { - return mb_guess_encoding_for_strings((const unsigned char**)&in, &in_len, 1, elist, elist_size, strict); + return mb_guess_encoding_for_strings((const unsigned char**)&in, &in_len, 1, elist, elist_size, strict, order_significant); } /* {{{ Encodings of the given string is returned (as a string) */ @@ -3162,8 +3171,17 @@ PHP_FUNCTION(mb_detect_encoding) Z_PARAM_BOOL(strict) ZEND_PARSE_PARAMETERS_END(); + /* Should we pay attention to the order of the provided candidate encodings and prefer + * the earlier ones (if more than one candidate encoding matches)? + * If the entire list of supported encodings returned by `mb_list_encodings` is passed + * in, then don't treat the order as significant */ + bool order_significant = true; + /* make encoding list */ if (encoding_ht) { + if (encoding_ht == MBSTRG(all_encodings_list)) { + order_significant = false; + } if (FAILURE == php_mb_parse_encoding_array(encoding_ht, &elist, &size, 2)) { RETURN_THROWS(); } @@ -3195,7 +3213,7 @@ PHP_FUNCTION(mb_detect_encoding) if (size == 1 && *elist == &mbfl_encoding_utf8 && (GC_FLAGS(str) & IS_STR_VALID_UTF8)) { ret = &mbfl_encoding_utf8; } else { - ret = mb_guess_encoding((unsigned char*)ZSTR_VAL(str), ZSTR_LEN(str), elist, size, strict); + ret = mb_guess_encoding((unsigned char*)ZSTR_VAL(str), ZSTR_LEN(str), elist, size, strict, order_significant); } efree(ZEND_VOIDP(elist)); @@ -3556,8 +3574,15 @@ PHP_FUNCTION(mb_convert_variables) from_encoding = MBSTRG(current_internal_encoding); + bool order_significant = true; + /* pre-conversion encoding */ if (from_enc_ht) { + if (from_enc_ht == MBSTRG(all_encodings_list)) { + /* If entire list of supported encodings returned by `mb_list_encodings` is passed + * in, then don't treat the order of the list as significant */ + order_significant = false; + } if (php_mb_parse_encoding_array(from_enc_ht, &elist, &elistsz, 2) == FAILURE) { RETURN_THROWS(); } @@ -3595,7 +3620,7 @@ PHP_FUNCTION(mb_convert_variables) RETURN_FALSE; } } - from_encoding = mb_guess_encoding_for_strings(val_list, len_list, num, elist, elistsz, MBSTRG(strict_detection)); + from_encoding = mb_guess_encoding_for_strings(val_list, len_list, num, elist, elistsz, MBSTRG(strict_detection), order_significant); efree(ZEND_VOIDP(val_list)); efree(len_list); if (!from_encoding) { @@ -4313,7 +4338,7 @@ PHP_FUNCTION(mb_send_mail) /* Subject: */ const mbfl_encoding *enc = MBSTRG(current_internal_encoding); if (enc == &mbfl_encoding_pass) { - enc = mb_guess_encoding((unsigned char*)ZSTR_VAL(subject), ZSTR_LEN(subject), MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection)); + enc = mb_guess_encoding((unsigned char*)ZSTR_VAL(subject), ZSTR_LEN(subject), MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection), false); } const char *line_sep = PG(mail_mixed_lf_and_crlf) ? "\n" : CRLF; size_t line_sep_len = strlen(line_sep); @@ -4323,7 +4348,7 @@ PHP_FUNCTION(mb_send_mail) /* message body */ const mbfl_encoding *msg_enc = MBSTRG(current_internal_encoding); if (msg_enc == &mbfl_encoding_pass) { - msg_enc = mb_guess_encoding((unsigned char*)message, message_len, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection)); + msg_enc = mb_guess_encoding((unsigned char*)message, message_len, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection), false); } unsigned int num_errors = 0; diff --git a/ext/mbstring/mbstring.h b/ext/mbstring/mbstring.h index 7cd58a0277310..7256605bd2462 100644 --- a/ext/mbstring/mbstring.h +++ b/ext/mbstring/mbstring.h @@ -67,7 +67,7 @@ MBSTRING_API size_t php_mb_mbchar_bytes(const char *s, const mbfl_encoding *enc) MBSTRING_API size_t php_mb_stripos(bool mode, zend_string *haystack, zend_string *needle, zend_long offset, const mbfl_encoding *enc); MBSTRING_API bool php_mb_check_encoding(const char *input, size_t length, const mbfl_encoding *encoding); -MBSTRING_API const mbfl_encoding* mb_guess_encoding_for_strings(const unsigned char **strings, size_t *str_lengths, size_t n, const mbfl_encoding **elist, unsigned int elist_size, bool strict); +MBSTRING_API const mbfl_encoding* mb_guess_encoding_for_strings(const unsigned char **strings, size_t *str_lengths, size_t n, const mbfl_encoding **elist, unsigned int elist_size, bool strict, bool order_significant); ZEND_BEGIN_MODULE_GLOBALS(mbstring) char *internal_encoding_name; diff --git a/ext/mbstring/tests/mb_detect_encoding.phpt b/ext/mbstring/tests/mb_detect_encoding.phpt index 544375fbd2998..97136d89b8e4a 100644 --- a/ext/mbstring/tests/mb_detect_encoding.phpt +++ b/ext/mbstring/tests/mb_detect_encoding.phpt @@ -88,6 +88,17 @@ print("EUC-JP: " . mb_detect_encoding($euc_jp) . "\n"); print("SJIS: " . mb_detect_encoding($sjis) . "\n"); +// Thanks to Ulrik Nielsen for the following tests; the hex strings are the same file, but in two +// different text encodings +// We do not have any strong hints showing that the second one is actually UTF-8... +// but mb_detect_encoding still guesses UTF-8 because it is the first one in the list + +$win1252text = hex2bin("2320546869732066696c6520636f6e7461696e732057696e646f77732d3132353220656e636f646564206461746120616e642048544d4c20656e7469746965730a61626364650ae6f8e50af00a3c703e476f646461673c6272202f3e0a7b726561646f626a206f626a65637469643d24726573756c745b305d2e706172656e7469642061737369676e3d22646f63227d3c6272202f3e0a23205468697320697320746f20656e73757265207468617420646966666572656e74206b696e6473206f662048544d4c20656e74697469657320617265206265696e6720636f6e76657274656420636f72726563746c790af00ad00a2623383739313b0a262331373238373b0a262333383937393b0a2623353437333b0a616263646520e6f8e520f020d0203c703e476f646461673c6272202f3e207b726561646f626a206f626a65637469643d24726573756c745b305d2e706172656e7469642061737369676e3d22646f63227d3c6272202f3e202623383739313b20262331373238373b20262333383937393b202623353437333b0a232054686520666f6c6c6f77696e67206368617261637465727320617265207370656369616c206368617261637465727320746861742068617320646966666572656e7420636f646520706f696e747320696e2049534f2d383835392d3120616e642057696e646f77732d31323532202d207468617420776520646966666572656e746961746520636f72726563746c79206265747765656e2049534f2d383835392d3120616e642057696e646f77732d313235320a8c0a890a2320506f6c69736820737472696e670a50727a656a6426233337383b20646f2070727a65676c26233236313b64750a"); +echo mb_detect_encoding($win1252text, ['UTF-8', 'CP1252', 'ISO-8859-1'], true), "\n"; + +$utf8text = hex2bin("2320546869732066696c6520636f6e7461696e73205554462d3820656e636f64656420646174610a61626364650ac3a6c3b8c3a50ac3b00a3c703e476f646461673c6272202f3e0a7b726561646f626a206f626a65637469643d24726573756c745b305d2e706172656e7469642061737369676e3d22646f63227d3c6272202f3e0a23205468697320697320746f20656e73757265207468617420646966666572656e74206b696e6473206f662048544d4c20656e74697469657320617265206265696e6720636f6e76657274656420636f72726563746c790ac3b00ac3900ae289970ae48e870ae9a1830ae195a10a616263646520c3a6c3b8c3a520c3b020c390203c703e476f646461673c6272202f3e207b726561646f626a206f626a65637469643d24726573756c745b305d2e706172656e7469642061737369676e3d22646f63227d3c6272202f3e20e2899720e48e8720e9a18320e195a10a232054686520666f6c6c6f77696e67206368617261637465727320617265207370656369616c206368617261637465727320746861742068617320646966666572656e7420636f646520706f696e747320696e2049534f2d383835392d3120616e642057696e646f77732d31323532202d207468617420776520646966666572656e746961746520636f72726563746c79206265747765656e2049534f2d383835392d3120616e642057696e646f77732d313235320ac5920ae280b00a2320506f6c69736820737472696e670a50727a656a64c5ba20646f2070727a65676cc48564750a"); +echo mb_detect_encoding($utf8text, ['UTF-8', 'CP1252', 'ISO-8859-1'], true), "\n"; + echo "== INVALID PARAMETER ==\n"; print("INT: " . mb_detect_encoding(1234, 'EUC-JP') . "\n"); // EUC-JP @@ -393,6 +404,8 @@ SJIS JIS: JIS EUC-JP: EUC-JP SJIS: SJIS +Windows-1252 +UTF-8 == INVALID PARAMETER == INT: EUC-JP EUC-JP: EUC-JP From 7914b8cefd21fccda53df3c8ca54bba55efff205 Mon Sep 17 00:00:00 2001 From: Alex Dowad Date: Thu, 11 May 2023 20:21:30 +0200 Subject: [PATCH 046/168] Use pakutoma's encoding check functions for mb_detect_encoding even in non-strict mode In 6fc8d014df, pakutoma added specialized validity checking functions for some legacy text encodings like ISO-2022-JP and UTF-7. These check functions perform a more strict validity check than the encoding conversion functions for the same text encodings. For example, the check function for ISO-2022-JP verifies that the string ends in the correct state required by the specification for ISO-2022-JP. These check functions are already being used to make detection of text encoding more accurate when 'strict' detection mode is enabled. However, since the default is 'non-strict' detection (a bad API design but we're stuck with it now), most users will not benefit from pakutoma's work. I was previously reluctant to enable this new logic for non-strict detection mode. My intention was to reduce the scope of behavior changes, since almost *any* behavior change may affect *some* user in a way we don't expect. However, we definitely have users whose (production) code was broken by the changes I made in 28b346bc06, and enabling pakutoma's check functions for non-strict detection mode would un-break it. (See GH-10192 as an example.) The added checks do also make sense. In non-strict detection mode, we will not immediately reject candidate encodings whose validity check function returns false; but they will be much less likely to be selected. However, failure of the validity check function is weighted less heavily than an encoding error detected by the encoding conversion function. --- ext/mbstring/mbstring.c | 16 ++++++++++------ ext/mbstring/tests/gh10192_utf7.phpt | 14 +++++++------- ext/mbstring/tests/mb_detect_encoding.phpt | 8 ++++++++ 3 files changed, 25 insertions(+), 13 deletions(-) diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c index 17140747776eb..88bc7334253d9 100644 --- a/ext/mbstring/mbstring.c +++ b/ext/mbstring/mbstring.c @@ -1816,7 +1816,6 @@ static size_t mb_get_strlen(zend_string *string, const mbfl_encoding *encoding) return mb_fast_strlen_utf8((unsigned char*)ZSTR_VAL(string), ZSTR_LEN(string)); } - uint32_t wchar_buf[128]; unsigned char *in = (unsigned char*)ZSTR_VAL(string); size_t in_len = ZSTR_LEN(string); @@ -3006,19 +3005,24 @@ static size_t init_candidate_array(struct candidate *array, size_t length, const for (size_t i = 0; i < length; i++) { const mbfl_encoding *enc = encodings[i]; + array[j].enc = enc; + array[j].state = 0; + array[j].demerits = 0; + /* If any candidate encodings have specialized validation functions, use them * to eliminate as many candidates as possible */ - if (strict && enc->check != NULL) { + if (enc->check != NULL) { for (size_t k = 0; k < n; k++) { if (!enc->check((unsigned char*)in[k], in_len[k])) { - goto skip_to_next; + if (strict) { + goto skip_to_next; + } else { + array[j].demerits += 500; + } } } } - array[j].enc = enc; - array[j].state = 0; - array[j].demerits = 0; /* This multiplier can optionally be used to make candidate encodings listed * first more likely to be chosen. It is a weight factor which multiplies * the number of demerits counted for each candidate. */ diff --git a/ext/mbstring/tests/gh10192_utf7.phpt b/ext/mbstring/tests/gh10192_utf7.phpt index 2930942c12c5a..9aa4eb6925463 100644 --- a/ext/mbstring/tests/gh10192_utf7.phpt +++ b/ext/mbstring/tests/gh10192_utf7.phpt @@ -75,7 +75,7 @@ foreach ($testcases as $title => $case) { --EXPECT-- non-base64 character after + string(5) "UTF-8" -string(5) "UTF-7" +string(5) "UTF-8" bool(false) string(5) "UTF-7" bool(false) @@ -93,7 +93,7 @@ int(0) base64 character before + string(5) "UTF-8" -string(5) "UTF-7" +string(5) "UTF-8" bool(false) string(5) "UTF-7" bool(false) @@ -174,7 +174,7 @@ int(2) - and + string(5) "UTF-8" -string(5) "UTF-7" +string(5) "UTF-8" bool(false) string(5) "UTF-7" bool(false) @@ -219,7 +219,7 @@ int(2) valid direct encoding character = after + string(5) "UTF-8" -string(5) "UTF-7" +string(5) "UTF-8" bool(false) string(5) "UTF-7" bool(false) @@ -228,7 +228,7 @@ int(2) invalid direct encoding character ~ after + string(5) "UTF-8" -string(5) "UTF-7" +string(5) "UTF-8" bool(false) string(5) "UTF-7" bool(false) @@ -237,7 +237,7 @@ int(2) invalid direct encoding character \ after + string(5) "UTF-8" -string(5) "UTF-7" +string(5) "UTF-8" bool(false) string(5) "UTF-7" bool(false) @@ -246,7 +246,7 @@ int(2) invalid direct encoding character ESC after + string(5) "UTF-8" -string(5) "UTF-7" +string(5) "UTF-8" bool(false) string(5) "UTF-7" bool(false) diff --git a/ext/mbstring/tests/mb_detect_encoding.phpt b/ext/mbstring/tests/mb_detect_encoding.phpt index 97136d89b8e4a..11d5a1c31364f 100644 --- a/ext/mbstring/tests/mb_detect_encoding.phpt +++ b/ext/mbstring/tests/mb_detect_encoding.phpt @@ -78,6 +78,13 @@ echo mb_detect_encoding($test, ['UTF-8', 'ISO-8859-1']), "\n"; // Should be UTF- echo mb_detect_encoding('abc', ['UUENCODE', 'UTF-8']), "\n"; echo mb_detect_encoding('abc', ['UUENCODE', 'QPrint', 'HTML-ENTITIES', 'Base64', '7bit', '8bit', 'SJIS']), "\n"; +// This test case courtesy of Adrien Foulon +// It depends on the below use of '+' being recognized as invalid UTF-7 +$css = 'input[type="radio"]:checked + img { + border: 5px solid #0083ca; +}'; +echo mb_detect_encoding($css, mb_list_encodings(), true), "\n"; + echo "== DETECT ORDER ==\n"; mb_detect_order('auto'); @@ -400,6 +407,7 @@ UTF-8 UTF-8 UTF-8 SJIS +UTF-8 == DETECT ORDER == JIS: JIS EUC-JP: EUC-JP From 727e26f9f27ed0737fdbf6d2626d37a916e08c22 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Sun, 4 Dec 2022 21:59:18 +0100 Subject: [PATCH 047/168] Fix #97836 and #81705: Segfault / type confusion in concat_function The following sequence of actions was happening which caused a null pointer dereference: 1. debug_backtrace() returns an array 2. The concatenation to $c will transform the array to a string via `zval_get_string_func` for op2 and output a warning. Note that zval op1 is of type string due to the first do-while sequence. 3. The warning of an implicit "array to string conversion" triggers the ob_start callback to run. This code transform $c (==op1) to a long. 4. The code below the 2 do-while sequences assume that both op1 and op2 are strings, but this is no longer the case. A dereference of the string will therefore result in a null pointer dereference. The solution used here is to work with the zend_string directly instead of with the ops. For the tests: Co-authored-by: changochen1@gmail.com Co-authored-by: cmbecker69@gmx.de Co-authored-by: yukik@risec.co.jp Closes GH-10049. --- NEWS | 3 + Zend/tests/bug79836.phpt | 18 +++ Zend/tests/bug79836_1.phpt | 18 +++ Zend/tests/bug79836_2.phpt | 25 ++++ Zend/tests/bug81705.phpt | 19 +++ ...tring_concat_non_interned_with_itself.phpt | 21 ++++ .../class_toString_concat_with_itself.phpt | 16 +++ Zend/zend_operators.c | 118 ++++++++++++------ 8 files changed, 198 insertions(+), 40 deletions(-) create mode 100644 Zend/tests/bug79836.phpt create mode 100644 Zend/tests/bug79836_1.phpt create mode 100644 Zend/tests/bug79836_2.phpt create mode 100644 Zend/tests/bug81705.phpt create mode 100644 Zend/tests/class_toString_concat_non_interned_with_itself.phpt create mode 100644 Zend/tests/class_toString_concat_with_itself.phpt diff --git a/NEWS b/NEWS index b2527affb28ba..53e4fe73519b2 100644 --- a/NEWS +++ b/NEWS @@ -37,6 +37,9 @@ PHP NEWS index). (ColinHDev) . Fix bug GH-8846 (Implement delayed early binding for classes without parents). (ilutov) + . Fix bug #79836 (Segfault in concat_function). (nielsdos) + . Fix bug #81705 (type confusion/UAF on set_error_handler with concat + operation). (nielsdos) - Date: . Implement More Appropriate Date/Time Exceptions RFC. (Derick) diff --git a/Zend/tests/bug79836.phpt b/Zend/tests/bug79836.phpt new file mode 100644 index 0000000000000..5fb07396762f5 --- /dev/null +++ b/Zend/tests/bug79836.phpt @@ -0,0 +1,18 @@ +--TEST-- +Bug #79836 (Segfault in concat_function) +--INI-- +opcache.optimization_level = 0x7FFEBFFF & ~0x400 +--FILE-- + +--EXPECT-- +3 diff --git a/Zend/tests/bug79836_1.phpt b/Zend/tests/bug79836_1.phpt new file mode 100644 index 0000000000000..86e7f47671849 --- /dev/null +++ b/Zend/tests/bug79836_1.phpt @@ -0,0 +1,18 @@ +--TEST-- +Bug #79836 (Segfault in concat_function) +--INI-- +opcache.optimization_level = 0x7FFEBFFF & ~0x400 +--FILE-- + +--EXPECT-- +Done diff --git a/Zend/tests/bug79836_2.phpt b/Zend/tests/bug79836_2.phpt new file mode 100644 index 0000000000000..b02fcc13ea11b --- /dev/null +++ b/Zend/tests/bug79836_2.phpt @@ -0,0 +1,25 @@ +--TEST-- +Bug #79836 (Segfault in concat_function) +--FILE-- + +--EXPECT-- +abcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabc diff --git a/Zend/tests/bug81705.phpt b/Zend/tests/bug81705.phpt new file mode 100644 index 0000000000000..1c00b1c77d4bb --- /dev/null +++ b/Zend/tests/bug81705.phpt @@ -0,0 +1,19 @@ +--TEST-- +Bug #81705 (type confusion/UAF on set_error_handler with concat operation) +--FILE-- + +--EXPECT-- +error +string(6) "aArray" \ No newline at end of file diff --git a/Zend/tests/class_toString_concat_non_interned_with_itself.phpt b/Zend/tests/class_toString_concat_non_interned_with_itself.phpt new file mode 100644 index 0000000000000..87b129ce9e796 --- /dev/null +++ b/Zend/tests/class_toString_concat_non_interned_with_itself.phpt @@ -0,0 +1,21 @@ +--TEST-- +Test concatenating a class instance that has __toString with itself that uses a non-interned string +--FILE-- + +--EXPECT-- +aaaaaa diff --git a/Zend/tests/class_toString_concat_with_itself.phpt b/Zend/tests/class_toString_concat_with_itself.phpt new file mode 100644 index 0000000000000..96d28679b2f93 --- /dev/null +++ b/Zend/tests/class_toString_concat_with_itself.phpt @@ -0,0 +1,16 @@ +--TEST-- +Test concatenating a class instance that has __toString with itself +--FILE-- + +--EXPECT-- +abcabc diff --git a/Zend/zend_operators.c b/Zend/zend_operators.c index a9932a6b592b6..c4fce74ebbbee 100644 --- a/Zend/zend_operators.c +++ b/Zend/zend_operators.c @@ -1940,108 +1940,146 @@ ZEND_API zend_result ZEND_FASTCALL shift_right_function(zval *result, zval *op1, ZEND_API zend_result ZEND_FASTCALL concat_function(zval *result, zval *op1, zval *op2) /* {{{ */ { zval *orig_op1 = op1; - zval op1_copy, op2_copy; - - ZVAL_UNDEF(&op1_copy); - ZVAL_UNDEF(&op2_copy); + zend_string *op1_string, *op2_string; + bool free_op1_string = false; + bool free_op2_string = false; do { - if (UNEXPECTED(Z_TYPE_P(op1) != IS_STRING)) { + if (EXPECTED(Z_TYPE_P(op1) == IS_STRING)) { + op1_string = Z_STR_P(op1); + } else { if (Z_ISREF_P(op1)) { op1 = Z_REFVAL_P(op1); - if (Z_TYPE_P(op1) == IS_STRING) break; + if (Z_TYPE_P(op1) == IS_STRING) { + op1_string = Z_STR_P(op1); + break; + } } ZEND_TRY_BINARY_OBJECT_OPERATION(ZEND_CONCAT); - ZVAL_STR(&op1_copy, zval_get_string_func(op1)); + op1_string = zval_get_string_func(op1); if (UNEXPECTED(EG(exception))) { - zval_ptr_dtor_str(&op1_copy); + zend_string_release(op1_string); if (orig_op1 != result) { ZVAL_UNDEF(result); } return FAILURE; } + free_op1_string = true; if (result == op1) { if (UNEXPECTED(op1 == op2)) { - op2 = &op1_copy; + op2_string = op1_string; + goto has_op2_string; } } - op1 = &op1_copy; } } while (0); do { - if (UNEXPECTED(Z_TYPE_P(op2) != IS_STRING)) { - if (Z_ISREF_P(op2)) { - op2 = Z_REFVAL_P(op2); - if (Z_TYPE_P(op2) == IS_STRING) break; - } + if (EXPECTED(Z_TYPE_P(op2) == IS_STRING)) { + op2_string = Z_STR_P(op2); + } else { + if (Z_ISREF_P(op2)) { + op2 = Z_REFVAL_P(op2); + if (Z_TYPE_P(op2) == IS_STRING) { + op2_string = Z_STR_P(op2); + break; + } + } + /* hold an additional reference because a userland function could free this */ + if (!free_op1_string) { + op1_string = zend_string_copy(op1_string); + free_op1_string = true; + } ZEND_TRY_BINARY_OP2_OBJECT_OPERATION(ZEND_CONCAT); - ZVAL_STR(&op2_copy, zval_get_string_func(op2)); + op2_string = zval_get_string_func(op2); if (UNEXPECTED(EG(exception))) { - zval_ptr_dtor_str(&op1_copy); - zval_ptr_dtor_str(&op2_copy); + zend_string_release(op1_string); + zend_string_release(op2_string); if (orig_op1 != result) { ZVAL_UNDEF(result); } return FAILURE; } - op2 = &op2_copy; + free_op2_string = true; } } while (0); - if (UNEXPECTED(Z_STRLEN_P(op1) == 0)) { - if (EXPECTED(result != op2)) { +has_op2_string:; + if (UNEXPECTED(ZSTR_LEN(op1_string) == 0)) { + if (EXPECTED(free_op2_string || result != op2)) { if (result == orig_op1) { i_zval_ptr_dtor(result); } - ZVAL_COPY(result, op2); + if (free_op2_string) { + /* transfer ownership of op2_string */ + ZVAL_STR(result, op2_string); + free_op2_string = false; + } else { + ZVAL_STR_COPY(result, op2_string); + } } - } else if (UNEXPECTED(Z_STRLEN_P(op2) == 0)) { - if (EXPECTED(result != op1)) { + } else if (UNEXPECTED(ZSTR_LEN(op2_string) == 0)) { + if (EXPECTED(free_op1_string || result != op1)) { if (result == orig_op1) { i_zval_ptr_dtor(result); } - ZVAL_COPY(result, op1); + if (free_op1_string) { + /* transfer ownership of op1_string */ + ZVAL_STR(result, op1_string); + free_op1_string = false; + } else { + ZVAL_STR_COPY(result, op1_string); + } } } else { - size_t op1_len = Z_STRLEN_P(op1); - size_t op2_len = Z_STRLEN_P(op2); + size_t op1_len = ZSTR_LEN(op1_string); + size_t op2_len = ZSTR_LEN(op2_string); size_t result_len = op1_len + op2_len; zend_string *result_str; - uint32_t flags = ZSTR_GET_COPYABLE_CONCAT_PROPERTIES_BOTH(Z_STR_P(op1), Z_STR_P(op2)); + uint32_t flags = ZSTR_GET_COPYABLE_CONCAT_PROPERTIES_BOTH(op1_string, op2_string); if (UNEXPECTED(op1_len > ZSTR_MAX_LEN - op2_len)) { + if (free_op1_string) zend_string_release(op1_string); + if (free_op2_string) zend_string_release(op2_string); zend_throw_error(NULL, "String size overflow"); - zval_ptr_dtor_str(&op1_copy); - zval_ptr_dtor_str(&op2_copy); if (orig_op1 != result) { ZVAL_UNDEF(result); } return FAILURE; } - if (result == op1 && Z_REFCOUNTED_P(result)) { + if (result == op1) { + if (free_op1_string) { + /* op1_string will be used as the result, so we should not free it */ + i_zval_ptr_dtor(result); + free_op1_string = false; + } /* special case, perform operations on result */ - result_str = zend_string_extend(Z_STR_P(result), result_len, 0); + result_str = zend_string_extend(op1_string, result_len, 0); + /* account for the case where result_str == op1_string == op2_string and the realloc is done */ + if (op1_string == op2_string) { + if (free_op2_string) { + zend_string_release(op2_string); + free_op2_string = false; + } + op2_string = result_str; + } } else { result_str = zend_string_alloc(result_len, 0); - memcpy(ZSTR_VAL(result_str), Z_STRVAL_P(op1), op1_len); + memcpy(ZSTR_VAL(result_str), ZSTR_VAL(op1_string), op1_len); if (result == orig_op1) { i_zval_ptr_dtor(result); } } GC_ADD_FLAGS(result_str, flags); - /* This has to happen first to account for the cases where result == op1 == op2 and - * the realloc is done. In this case this line will also update Z_STRVAL_P(op2) to - * point to the new string. The first op2_len bytes of result will still be the same. */ ZVAL_NEW_STR(result, result_str); - - memcpy(ZSTR_VAL(result_str) + op1_len, Z_STRVAL_P(op2), op2_len); + memcpy(ZSTR_VAL(result_str) + op1_len, ZSTR_VAL(op2_string), op2_len); ZSTR_VAL(result_str)[result_len] = '\0'; } - zval_ptr_dtor_str(&op1_copy); - zval_ptr_dtor_str(&op2_copy); + if (free_op1_string) zend_string_release(op1_string); + if (free_op2_string) zend_string_release(op2_string); + return SUCCESS; } /* }}} */ From bd03c0343e7df2e70fedf872eea6eb855f3b76e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20Vo=C5=99=C3=AD=C5=A1ek?= Date: Tue, 16 May 2023 20:54:26 +0200 Subject: [PATCH 048/168] Allow CTE on more CTE safe functions (#10771) --- ext/hash/hash.stub.php | 2 + ext/hash/hash_arginfo.h | 6 +- ext/standard/basic_functions.stub.php | 148 ++++++++++++++++--- ext/standard/basic_functions_arginfo.h | 78 +++++----- ext/zend_test/tests/observer_basic_01.phpt | 1 + ext/zend_test/tests/observer_basic_02.phpt | 1 + ext/zend_test/tests/observer_closure_01.phpt | 1 + 7 files changed, 175 insertions(+), 62 deletions(-) diff --git a/ext/hash/hash.stub.php b/ext/hash/hash.stub.php index 9efd72361cde7..e0439c67bf23d 100644 --- a/ext/hash/hash.stub.php +++ b/ext/hash/hash.stub.php @@ -45,12 +45,14 @@ function hash_copy(HashContext $context): HashContext {} /** * @return array + * @compile-time-eval * @refcount 1 */ function hash_algos(): array {} /** * @return array + * @compile-time-eval * @refcount 1 */ function hash_hmac_algos(): array {} diff --git a/ext/hash/hash_arginfo.h b/ext/hash/hash_arginfo.h index dca428af029b5..d52a5979cb10e 100644 --- a/ext/hash/hash_arginfo.h +++ b/ext/hash/hash_arginfo.h @@ -1,5 +1,5 @@ /* This is a generated file, edit the .stub.php file instead. - * Stub hash: 7168b9b3d1422d4f8ff9270c5de2f42988a55811 */ + * Stub hash: 8838801d9789d4b77d57b290d993ee37784bbd1f */ ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_hash, 0, 2, IS_STRING, 0) ZEND_ARG_TYPE_INFO(0, algo, IS_STRING, 0) @@ -179,8 +179,8 @@ static const zend_function_entry ext_functions[] = { ZEND_FE(hash_update_file, arginfo_hash_update_file) ZEND_FE(hash_final, arginfo_hash_final) ZEND_FE(hash_copy, arginfo_hash_copy) - ZEND_FE(hash_algos, arginfo_hash_algos) - ZEND_FE(hash_hmac_algos, arginfo_hash_hmac_algos) + ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(hash_algos, arginfo_hash_algos) + ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(hash_hmac_algos, arginfo_hash_hmac_algos) ZEND_FE(hash_pbkdf2, arginfo_hash_pbkdf2) ZEND_FE(hash_equals, arginfo_hash_equals) ZEND_FE(hash_hkdf, arginfo_hash_hkdf) diff --git a/ext/standard/basic_functions.stub.php b/ext/standard/basic_functions.stub.php index b2270d66d9e25..effb05ff9f982 100755 --- a/ext/standard/basic_functions.stub.php +++ b/ext/standard/basic_functions.stub.php @@ -1660,6 +1660,9 @@ function array_unshift(array &$array, mixed ...$values): int {} function array_splice(array &$array, int $offset, ?int $length = null, mixed $replacement = []): array {} +/** + * @compile-time-eval + */ function array_slice(array $array, int $offset, ?int $length = null, bool $preserve_keys = false): array {} /** @@ -1708,14 +1711,21 @@ function array_values(array $array): array {} /** * @return array + * @compile-time-eval * @refcount 1 */ function array_count_values(array $array): array {} -/** @refcount 1 */ +/** + * @compile-time-eval + * @refcount 1 + */ function array_column(array $array, int|string|null $column_key, int|string|null $index_key = null): array {} -/** @refcount 1 */ +/** + * @compile-time-eval + * @refcount 1 + */ function array_reverse(array $array, bool $preserve_keys = false): array {} function array_pad(array $array, int $length, mixed $value): array {} @@ -1844,8 +1854,14 @@ function array_multisort(&$array, &...$rest): bool {} /** @return int|string|array */ function array_rand(array $array, int $num = 1): int|string|array {} +/** + * @compile-time-eval + */ function array_sum(array $array): int|float {} +/** + * @compile-time-eval + */ function array_product(array $array): int|float {} function array_reduce(array $array, callable $callback, mixed $initial = null): mixed {} @@ -1866,8 +1882,14 @@ function array_key_exists($key, array $array): bool {} */ function key_exists($key, array $array): bool {} +/** + * @compile-time-eval + */ function array_chunk(array $array, int $length, bool $preserve_keys = false): array {} +/** + * @compile-time-eval + */ function array_combine(array $keys, array $values): array {} /** @compile-time-eval */ @@ -2033,6 +2055,7 @@ function parse_ini_file(string $filename, bool $process_sections = false, int $s /** * @return array|false + * @compile-time-eval * @refcount 1 */ function parse_ini_string(string $ini_string, bool $process_sections = false, int $scanner_mode = INI_SCANNER_NORMAL): array|false {} @@ -2261,8 +2284,14 @@ function bin2hex(string $string): string {} */ function hex2bin(string $string): string|false {} +/** + * @compile-time-eval + */ function strspn(string $string, string $characters, int $offset = 0, ?int $length = null): int {} +/** + * @compile-time-eval + */ function strcspn(string $string, string $characters, int $offset = 0, ?int $length = null): int {} #ifdef HAVE_NL_LANGINFO @@ -2284,7 +2313,10 @@ function chop(string $string, string $characters = " \n\r\t\v\0"): string {} /** @compile-time-eval */ function ltrim(string $string, string $characters = " \n\r\t\v\0"): string {} -/** @refcount 1 */ +/** + * @compile-time-eval + * @refcount 1 + */ function wordwrap(string $string, int $width = 75, string $break = "\n", bool $cut_long_words = false): string {} /** @@ -2302,7 +2334,10 @@ function implode(string|array $separator, ?array $array = null): string {} /** @alias implode */ function join(string|array $separator, ?array $array = null): string {} -/** @refcount 1 */ +/** + * @compile-time-eval + * @refcount 1 + */ function strtok(string $string, ?string $token = null): string|false {} /** @compile-time-eval */ @@ -2323,7 +2358,10 @@ function dirname(string $path, int $levels = 1): string {} */ function pathinfo(string $path, int $flags = PATHINFO_ALL): array|string {} -/** @refcount 1 */ +/** + * @compile-time-eval + * @refcount 1 + */ function stristr(string $haystack, string $needle, bool $before_needle = false): string|false {} /** @@ -2347,7 +2385,10 @@ function strrpos(string $haystack, string $needle, int $offset = 0): int|false { /** @compile-time-eval */ function strripos(string $haystack, string $needle, int $offset = 0): int|false {} -/** @refcount 1 */ +/** + * @compile-time-eval + * @refcount 1 + */ function strrchr(string $haystack, string $needle): string|false {} /** @compile-time-eval */ @@ -2359,16 +2400,25 @@ function str_starts_with(string $haystack, string $needle): bool {} /** @compile-time-eval */ function str_ends_with(string $haystack, string $needle): bool {} -/** @refcount 1 */ +/** + * @compile-time-eval + * @refcount 1 + */ function chunk_split(string $string, int $length = 76, string $separator = "\r\n"): string {} /** @compile-time-eval */ function substr(string $string, int $offset, ?int $length = null): string {} -/** @return string|array */ +/** + * @return string|array + * @compile-time-eval + */ function substr_replace(array|string $string, array|string $replace, array|int $offset, array|int|null $length = null): string|array {} -/** @refcount 1 */ +/** + * @compile-time-eval + * @refcount 1 + */ function quotemeta(string $string): string {} /** @compile-time-eval */ @@ -2392,6 +2442,9 @@ function lcfirst(string $string): string {} */ function ucwords(string $string, string $separators = " \t\r\n\f\v"): string {} +/** + * @compile-time-eval + */ function strtr(string $string, string|array $from, ?string $to = null): string {} /** @@ -2403,14 +2456,26 @@ function strrev(string $string): string {} /** @param float $percent */ function similar_text(string $string1, string $string2, &$percent = null): int {} +/** + * @compile-time-eval + */ function addcslashes(string $string, string $characters): string {} +/** + * @compile-time-eval + */ function addslashes(string $string): string {} -/** @refcount 1 */ +/** + * @compile-time-eval + * @refcount 1 + */ function stripcslashes(string $string): string {} -/** @refcount 1 */ +/** + * @compile-time-eval + * @refcount 1 + */ function stripslashes(string $string): string {} /** @@ -2430,9 +2495,15 @@ function str_ireplace(array|string $search, array|string $replace, string|array /** @refcount 1 */ function hebrev(string $string, int $max_chars_per_line = 0): string {} +/** + * @compile-time-eval + */ function nl2br(string $string, bool $use_xhtml = true): string {} -/** @refcount 1 */ +/** + * @compile-time-eval + * @refcount 1 + */ function strip_tags(string $string, array|string|null $allowed_tags = null): string {} /** @@ -2455,6 +2526,7 @@ function str_repeat(string $string, int $times): string {} /** * @return array|string + * @compile-time-eval * @refcount 1 */ function count_chars(string $string, int $mode = 0): array|string {} @@ -2469,6 +2541,9 @@ function localeconv(): array {} function strnatcasecmp(string $string1, string $string2): int {} +/** + * @compile-time-eval + */ function substr_count(string $haystack, string $needle, int $offset = 0, ?int $length = null): int {} function str_pad(string $string, int $length, string $pad_string = " ", int $pad_type = STR_PAD_RIGHT): string {} @@ -2479,7 +2554,10 @@ function str_pad(string $string, int $length, string $pad_string = " ", int $pad */ function sscanf(string $string, string $format, mixed &...$vars): array|int|null {} -/** @refcount 1 */ +/** + * @compile-time-eval + * @refcount 1 + */ function str_rot13(string $string): string {} /** @refcount 1 */ @@ -2497,18 +2575,26 @@ function str_word_count(string $string, int $format = 0, ?string $characters = n */ function str_split(string $string, int $length = 1): array {} -/** @refcount 1 */ +/** + * @compile-time-eval + * @refcount 1 + */ function strpbrk(string $string, string $characters): string|false {} +/** + * @compile-time-eval + */ function substr_compare(string $haystack, string $needle, int $offset, ?int $length = null, bool $case_insensitive = false): int {} /** + * @compile-time-eval * @refcount 1 * @deprecated */ function utf8_encode(string $string): string {} /** + * @compile-time-eval * @refcount 1 * @deprecated */ @@ -2883,10 +2969,16 @@ function http_build_query(array|object $data, string $numeric_prefix = "", ?stri /* image.c */ -/** @refcount 1 */ +/** + * @compile-time-eval + * @refcount 1 + */ function image_type_to_mime_type(int $image_type): string {} -/** @refcount 1 */ +/** + * @compile-time-eval + * @refcount 1 + */ function image_type_to_extension(int $image_type, bool $include_dot = true): string|false {} /** @@ -3165,6 +3257,9 @@ function password_needs_rehash(string $hash, string|int|null $algo, array $optio function password_verify(#[\SensitiveParameter] string $password, string $hash): bool {} +/** + * @compile-time-eval + */ function password_algos(): array {} /* proc_open.c */ @@ -3193,10 +3288,16 @@ function proc_get_status($process): array {} /* quot_print.c */ -/** @refcount 1 */ +/** + * @compile-time-eval + * @refcount 1 + */ function quoted_printable_decode(string $string): string {} -/** @refcount 1 */ +/** + * @compile-time-eval + * @refcount 1 + */ function quoted_printable_encode(string $string): string {} /* soundex.c */ @@ -3527,6 +3628,7 @@ function uniqid(string $prefix = "", bool $more_entropy = false): string {} /** * @return int|string|array|null|false + * @compile-time-eval * @refcount 1 */ function parse_url(string $url, int $component = -1): int|string|array|null|false {} @@ -3592,10 +3694,16 @@ function stream_filter_register(string $filter_name, string $class): bool {} /* uuencode.c */ -/** @refcount 1 */ +/** + * @compile-time-eval + * @refcount 1 + */ function convert_uuencode(string $string): string {} -/** @refcount 1 */ +/** + * @compile-time-eval + * @refcount 1 + */ function convert_uudecode(string $string): string|false {} /* var.c */ diff --git a/ext/standard/basic_functions_arginfo.h b/ext/standard/basic_functions_arginfo.h index f43ca8f98fc16..5612ee21867e8 100644 --- a/ext/standard/basic_functions_arginfo.h +++ b/ext/standard/basic_functions_arginfo.h @@ -1,5 +1,5 @@ /* This is a generated file, edit the .stub.php file instead. - * Stub hash: 0d2bffd95e986b632f5fd1afbf8f6464e6bc8759 */ + * Stub hash: 9cc9c0954bd7032d363ce9a531be621274b9a7e2 */ ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_set_time_limit, 0, 1, _IS_BOOL, 0) ZEND_ARG_TYPE_INFO(0, seconds, IS_LONG, 0) @@ -2876,7 +2876,7 @@ static const zend_function_entry ext_functions[] = { ZEND_FE(array_shift, arginfo_array_shift) ZEND_FE(array_unshift, arginfo_array_unshift) ZEND_FE(array_splice, arginfo_array_splice) - ZEND_FE(array_slice, arginfo_array_slice) + ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(array_slice, arginfo_array_slice) ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(array_merge, arginfo_array_merge) ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(array_merge_recursive, arginfo_array_merge_recursive) ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(array_replace, arginfo_array_replace) @@ -2885,9 +2885,9 @@ static const zend_function_entry ext_functions[] = { ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(array_key_first, arginfo_array_key_first) ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(array_key_last, arginfo_array_key_last) ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(array_values, arginfo_array_values) - ZEND_FE(array_count_values, arginfo_array_count_values) - ZEND_FE(array_column, arginfo_array_column) - ZEND_FE(array_reverse, arginfo_array_reverse) + ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(array_count_values, arginfo_array_count_values) + ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(array_column, arginfo_array_column) + ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(array_reverse, arginfo_array_reverse) ZEND_FE(array_pad, arginfo_array_pad) ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(array_flip, arginfo_array_flip) ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(array_change_key_case, arginfo_array_change_key_case) @@ -2910,15 +2910,15 @@ static const zend_function_entry ext_functions[] = { ZEND_FE(array_udiff_uassoc, arginfo_array_udiff_uassoc) ZEND_FE(array_multisort, arginfo_array_multisort) ZEND_FE(array_rand, arginfo_array_rand) - ZEND_FE(array_sum, arginfo_array_sum) - ZEND_FE(array_product, arginfo_array_product) + ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(array_sum, arginfo_array_sum) + ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(array_product, arginfo_array_product) ZEND_FE(array_reduce, arginfo_array_reduce) ZEND_FE(array_filter, arginfo_array_filter) ZEND_FE(array_map, arginfo_array_map) ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(array_key_exists, arginfo_array_key_exists) ZEND_FALIAS(key_exists, array_key_exists, arginfo_key_exists) - ZEND_FE(array_chunk, arginfo_array_chunk) - ZEND_FE(array_combine, arginfo_array_combine) + ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(array_chunk, arginfo_array_chunk) + ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(array_combine, arginfo_array_combine) ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(array_is_list, arginfo_array_is_list) ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(base64_encode, arginfo_base64_encode) ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(base64_decode, arginfo_base64_decode) @@ -2982,7 +2982,7 @@ static const zend_function_entry ext_functions[] = { ZEND_FE(is_uploaded_file, arginfo_is_uploaded_file) ZEND_FE(move_uploaded_file, arginfo_move_uploaded_file) ZEND_FE(parse_ini_file, arginfo_parse_ini_file) - ZEND_FE(parse_ini_string, arginfo_parse_ini_string) + ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(parse_ini_string, arginfo_parse_ini_string) #if ZEND_DEBUG ZEND_FE(config_get_hash, arginfo_config_get_hash) #endif @@ -3064,8 +3064,8 @@ static const zend_function_entry ext_functions[] = { ZEND_FE(assert_options, arginfo_assert_options) ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(bin2hex, arginfo_bin2hex) ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(hex2bin, arginfo_hex2bin) - ZEND_FE(strspn, arginfo_strspn) - ZEND_FE(strcspn, arginfo_strcspn) + ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(strspn, arginfo_strspn) + ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(strcspn, arginfo_strcspn) #if defined(HAVE_NL_LANGINFO) ZEND_FE(nl_langinfo, arginfo_nl_langinfo) #endif @@ -3074,65 +3074,65 @@ static const zend_function_entry ext_functions[] = { ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(rtrim, arginfo_rtrim) ZEND_FALIAS(chop, rtrim, arginfo_chop) ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(ltrim, arginfo_ltrim) - ZEND_FE(wordwrap, arginfo_wordwrap) + ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(wordwrap, arginfo_wordwrap) ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(explode, arginfo_explode) ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(implode, arginfo_implode) ZEND_FALIAS(join, implode, arginfo_join) - ZEND_FE(strtok, arginfo_strtok) + ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(strtok, arginfo_strtok) ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(strtoupper, arginfo_strtoupper) ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(strtolower, arginfo_strtolower) ZEND_FE(basename, arginfo_basename) ZEND_FE(dirname, arginfo_dirname) ZEND_FE(pathinfo, arginfo_pathinfo) - ZEND_FE(stristr, arginfo_stristr) + ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(stristr, arginfo_stristr) ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(strstr, arginfo_strstr) ZEND_FALIAS(strchr, strstr, arginfo_strchr) ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(strpos, arginfo_strpos) ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(stripos, arginfo_stripos) ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(strrpos, arginfo_strrpos) ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(strripos, arginfo_strripos) - ZEND_FE(strrchr, arginfo_strrchr) + ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(strrchr, arginfo_strrchr) ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(str_contains, arginfo_str_contains) ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(str_starts_with, arginfo_str_starts_with) ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(str_ends_with, arginfo_str_ends_with) - ZEND_FE(chunk_split, arginfo_chunk_split) + ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(chunk_split, arginfo_chunk_split) ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(substr, arginfo_substr) - ZEND_FE(substr_replace, arginfo_substr_replace) - ZEND_FE(quotemeta, arginfo_quotemeta) + ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(substr_replace, arginfo_substr_replace) + ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(quotemeta, arginfo_quotemeta) ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(ord, arginfo_ord) ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(chr, arginfo_chr) ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(ucfirst, arginfo_ucfirst) ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(lcfirst, arginfo_lcfirst) ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(ucwords, arginfo_ucwords) - ZEND_FE(strtr, arginfo_strtr) + ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(strtr, arginfo_strtr) ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(strrev, arginfo_strrev) ZEND_FE(similar_text, arginfo_similar_text) - ZEND_FE(addcslashes, arginfo_addcslashes) - ZEND_FE(addslashes, arginfo_addslashes) - ZEND_FE(stripcslashes, arginfo_stripcslashes) - ZEND_FE(stripslashes, arginfo_stripslashes) + ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(addcslashes, arginfo_addcslashes) + ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(addslashes, arginfo_addslashes) + ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(stripcslashes, arginfo_stripcslashes) + ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(stripslashes, arginfo_stripslashes) ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(str_replace, arginfo_str_replace) ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(str_ireplace, arginfo_str_ireplace) ZEND_FE(hebrev, arginfo_hebrev) - ZEND_FE(nl2br, arginfo_nl2br) - ZEND_FE(strip_tags, arginfo_strip_tags) + ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(nl2br, arginfo_nl2br) + ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(strip_tags, arginfo_strip_tags) ZEND_FE(setlocale, arginfo_setlocale) ZEND_FE(parse_str, arginfo_parse_str) ZEND_FE(str_getcsv, arginfo_str_getcsv) ZEND_FE(str_repeat, arginfo_str_repeat) - ZEND_FE(count_chars, arginfo_count_chars) + ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(count_chars, arginfo_count_chars) ZEND_FE(strnatcmp, arginfo_strnatcmp) ZEND_FE(localeconv, arginfo_localeconv) ZEND_FE(strnatcasecmp, arginfo_strnatcasecmp) - ZEND_FE(substr_count, arginfo_substr_count) + ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(substr_count, arginfo_substr_count) ZEND_FE(str_pad, arginfo_str_pad) ZEND_FE(sscanf, arginfo_sscanf) - ZEND_FE(str_rot13, arginfo_str_rot13) + ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(str_rot13, arginfo_str_rot13) ZEND_FE(str_shuffle, arginfo_str_shuffle) ZEND_FE(str_word_count, arginfo_str_word_count) ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(str_split, arginfo_str_split) - ZEND_FE(strpbrk, arginfo_strpbrk) - ZEND_FE(substr_compare, arginfo_substr_compare) + ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(strpbrk, arginfo_strpbrk) + ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(substr_compare, arginfo_substr_compare) ZEND_DEP_FE(utf8_encode, arginfo_utf8_encode) ZEND_DEP_FE(utf8_decode, arginfo_utf8_decode) ZEND_FE(opendir, arginfo_opendir) @@ -3245,8 +3245,8 @@ static const zend_function_entry ext_functions[] = { ZEND_FE(fsockopen, arginfo_fsockopen) ZEND_FE(pfsockopen, arginfo_pfsockopen) ZEND_FE(http_build_query, arginfo_http_build_query) - ZEND_FE(image_type_to_mime_type, arginfo_image_type_to_mime_type) - ZEND_FE(image_type_to_extension, arginfo_image_type_to_extension) + ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(image_type_to_mime_type, arginfo_image_type_to_mime_type) + ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(image_type_to_extension, arginfo_image_type_to_extension) ZEND_FE(getimagesize, arginfo_getimagesize) ZEND_FE(getimagesizefromstring, arginfo_getimagesizefromstring) ZEND_FE(phpinfo, arginfo_phpinfo) @@ -3329,7 +3329,7 @@ static const zend_function_entry ext_functions[] = { ZEND_FE(password_hash, arginfo_password_hash) ZEND_FE(password_needs_rehash, arginfo_password_needs_rehash) ZEND_FE(password_verify, arginfo_password_verify) - ZEND_FE(password_algos, arginfo_password_algos) + ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(password_algos, arginfo_password_algos) #if defined(PHP_CAN_SUPPORT_PROC_OPEN) ZEND_FE(proc_open, arginfo_proc_open) #endif @@ -3342,8 +3342,8 @@ static const zend_function_entry ext_functions[] = { #if defined(PHP_CAN_SUPPORT_PROC_OPEN) ZEND_FE(proc_get_status, arginfo_proc_get_status) #endif - ZEND_FE(quoted_printable_decode, arginfo_quoted_printable_decode) - ZEND_FE(quoted_printable_encode, arginfo_quoted_printable_encode) + ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(quoted_printable_decode, arginfo_quoted_printable_decode) + ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(quoted_printable_encode, arginfo_quoted_printable_encode) ZEND_FE(soundex, arginfo_soundex) ZEND_FE(stream_select, arginfo_stream_select) ZEND_FE(stream_context_create, arginfo_stream_context_create) @@ -3422,7 +3422,7 @@ static const zend_function_entry ext_functions[] = { #if defined(HAVE_GETTIMEOFDAY) ZEND_FE(uniqid, arginfo_uniqid) #endif - ZEND_FE(parse_url, arginfo_parse_url) + ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(parse_url, arginfo_parse_url) ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(urlencode, arginfo_urlencode) ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(urldecode, arginfo_urldecode) ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(rawurlencode, arginfo_rawurlencode) @@ -3434,8 +3434,8 @@ static const zend_function_entry ext_functions[] = { ZEND_FE(stream_bucket_new, arginfo_stream_bucket_new) ZEND_FE(stream_get_filters, arginfo_stream_get_filters) ZEND_FE(stream_filter_register, arginfo_stream_filter_register) - ZEND_FE(convert_uuencode, arginfo_convert_uuencode) - ZEND_FE(convert_uudecode, arginfo_convert_uudecode) + ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(convert_uuencode, arginfo_convert_uuencode) + ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(convert_uudecode, arginfo_convert_uudecode) ZEND_FE(var_dump, arginfo_var_dump) ZEND_FE(var_export, arginfo_var_export) ZEND_FE(debug_zval_dump, arginfo_debug_zval_dump) diff --git a/ext/zend_test/tests/observer_basic_01.phpt b/ext/zend_test/tests/observer_basic_01.phpt index b208ae2341e85..4f6dea29ecc7f 100644 --- a/ext/zend_test/tests/observer_basic_01.phpt +++ b/ext/zend_test/tests/observer_basic_01.phpt @@ -5,6 +5,7 @@ zend_test --INI-- zend_test.observer.enabled=1 zend_test.observer.observe_all=1 +opcache.optimization_level=0x7FFFBFFF & ~0x0080 --FILE-- Date: Wed, 17 May 2023 17:42:05 +0200 Subject: [PATCH 049/168] [skip ci] Mark frequently failing fpm test as XFAIL Reported here: https://github.com/php/php-src/pull/11050#issuecomment-1546990346 --- sapi/fpm/tests/bug64539-status-json-encoding.phpt | 1 + 1 file changed, 1 insertion(+) diff --git a/sapi/fpm/tests/bug64539-status-json-encoding.phpt b/sapi/fpm/tests/bug64539-status-json-encoding.phpt index 0d735925593a4..7daa43ab75b13 100644 --- a/sapi/fpm/tests/bug64539-status-json-encoding.phpt +++ b/sapi/fpm/tests/bug64539-status-json-encoding.phpt @@ -1,5 +1,6 @@ --TEST-- FPM: bug64539 - status json format escaping +--XFAIL-- --SKIPIF-- From aa061cd40b1b8c935b41f43774bfcbc091e943d2 Mon Sep 17 00:00:00 2001 From: Jakub Zelenka Date: Fri, 19 May 2023 13:18:36 +0100 Subject: [PATCH 050/168] Fix FPM status json encoded value test Closes GH-11276 --- .../tests/bug64539-status-json-encoding.phpt | 5 +-- sapi/fpm/tests/response.inc | 31 +++++++++++++++++++ 2 files changed, 32 insertions(+), 4 deletions(-) diff --git a/sapi/fpm/tests/bug64539-status-json-encoding.phpt b/sapi/fpm/tests/bug64539-status-json-encoding.phpt index 7daa43ab75b13..f5c856c11c8a5 100644 --- a/sapi/fpm/tests/bug64539-status-json-encoding.phpt +++ b/sapi/fpm/tests/bug64539-status-json-encoding.phpt @@ -1,6 +1,5 @@ --TEST-- FPM: bug64539 - status json format escaping ---XFAIL-- --SKIPIF-- @@ -33,8 +32,7 @@ $responses = $tester ['query' => 'a=b"c'], ['uri' => '/status', 'query' => 'full&json', 'delay' => 100000], ]); -$data = json_decode($responses[1]->getBody('application/json'), true); -var_dump(explode('?', $data['processes'][0]['request uri'])[1]); +$responses[1]->expectJsonBodyPatternForStatusProcessField('request uri', '\?a=b"c$'); $tester->terminate(); $tester->expectLogTerminatingNotices(); $tester->close(); @@ -42,7 +40,6 @@ $tester->close(); ?> Done --EXPECT-- -string(5) "a=b"c" Done --CLEAN-- getBody('application/json'); + $data = json_decode($rawData, true); + if (empty($data['processes']) || !is_array($data['processes'])) { + $this->error( + "The body data is not a valid status json containing processes field '$rawData'" + ); + } + foreach ($data['processes'] as $process) { + if (preg_match('|' . $pattern . '|', $process[$fieldName]) !== false) { + return $this; + } + } + + $this->error( + "No field $fieldName matched pattern $pattern for any process in status data '$rawData'" + ); + + return $this; + } + /** * @return Response */ From 1ede3137c9abe19da1fafef66d8e4038c63516b4 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Fri, 19 May 2023 12:55:39 +0200 Subject: [PATCH 051/168] Fix GH-11274: POST/PATCH request via file_get_contents + stream_context_create switches to GET after a HTTP 308 redirect RFC 7231 states that status code 307 should keep the POST method upon redirect. RFC 7538 does the same for code 308. Although it's not mandated by the RFCs that PATCH is also kept (we can choose), it seems like keeping PATCH will be the most consistent and understandable behaviour. This patch also changes an existing test because it was testing for the wrong behaviour. Closes GH-11275. --- NEWS | 2 + ext/standard/http_fopen_wrapper.c | 21 ++++++--- ext/standard/tests/http/bug67430.phpt | 2 +- ext/standard/tests/http/gh11274.phpt | 62 +++++++++++++++++++++++++++ 4 files changed, 80 insertions(+), 7 deletions(-) create mode 100644 ext/standard/tests/http/gh11274.phpt diff --git a/NEWS b/NEWS index 9c44373d21534..0f24c304b84aa 100644 --- a/NEWS +++ b/NEWS @@ -46,6 +46,8 @@ PHP NEWS - Standard: . Fixed bug GH-11138 (move_uploaded_file() emits open_basedir warning for source file). (ilutov) + . Fixed bug GH-11274 (POST/PATCH request switches to GET after a HTTP 308 + redirect). (nielsdos) - Streams: . Fixed bug GH-10031 ([Stream] STREAM_NOTIFY_PROGRESS over HTTP emitted diff --git a/ext/standard/http_fopen_wrapper.c b/ext/standard/http_fopen_wrapper.c index fa0dcb5e6890a..f6a4a094b425e 100644 --- a/ext/standard/http_fopen_wrapper.c +++ b/ext/standard/http_fopen_wrapper.c @@ -79,6 +79,7 @@ #define HTTP_WRAPPER_HEADER_INIT 1 #define HTTP_WRAPPER_REDIRECTED 2 +#define HTTP_WRAPPER_KEEP_METHOD 4 static inline void strip_header(char *header_bag, char *lc_header_bag, const char *lc_header_name) @@ -140,6 +141,7 @@ static php_stream *php_stream_url_wrap_http_ex(php_stream_wrapper *wrapper, char *user_headers = NULL; int header_init = ((flags & HTTP_WRAPPER_HEADER_INIT) != 0); int redirected = ((flags & HTTP_WRAPPER_REDIRECTED) != 0); + int redirect_keep_method = ((flags & HTTP_WRAPPER_KEEP_METHOD) != 0); bool follow_location = 1; php_stream_filter *transfer_encoding = NULL; int response_code; @@ -363,8 +365,8 @@ static php_stream *php_stream_url_wrap_http_ex(php_stream_wrapper *wrapper, if (context && (tmpzval = php_stream_context_get_option(context, "http", "method")) != NULL) { if (Z_TYPE_P(tmpzval) == IS_STRING && Z_STRLEN_P(tmpzval) > 0) { /* As per the RFC, automatically redirected requests MUST NOT use other methods than - * GET and HEAD unless it can be confirmed by the user */ - if (!redirected + * GET and HEAD unless it can be confirmed by the user. */ + if (!redirected || redirect_keep_method || zend_string_equals_literal(Z_STR_P(tmpzval), "GET") || zend_string_equals_literal(Z_STR_P(tmpzval), "HEAD") ) { @@ -458,7 +460,7 @@ static php_stream *php_stream_url_wrap_http_ex(php_stream_wrapper *wrapper, zend_str_tolower(ZSTR_VAL(tmp), ZSTR_LEN(tmp)); t = ZSTR_VAL(tmp); - if (!header_init) { + if (!header_init && !redirect_keep_method) { /* strip POST headers on redirect */ strip_header(user_headers, t, "content-length:"); strip_header(user_headers, t, "content-type:"); @@ -606,7 +608,7 @@ static php_stream *php_stream_url_wrap_http_ex(php_stream_wrapper *wrapper, * see bug #44603 for details. Since Content-Type maybe part of user's headers we need to do this check first. */ if ( - header_init && + (header_init || redirect_keep_method) && context && !(have_header & HTTP_HEADER_CONTENT_LENGTH) && (tmpzval = php_stream_context_get_option(context, "http", "content")) != NULL && @@ -624,7 +626,7 @@ static php_stream *php_stream_url_wrap_http_ex(php_stream_wrapper *wrapper, } /* Request content, such as for POST requests */ - if (header_init && context && + if ((header_init || redirect_keep_method) && context && (tmpzval = php_stream_context_get_option(context, "http", "content")) != NULL && Z_TYPE_P(tmpzval) == IS_STRING && Z_STRLEN_P(tmpzval) > 0) { if (!(have_header & HTTP_HEADER_CONTENT_LENGTH)) { @@ -913,9 +915,16 @@ static php_stream *php_stream_url_wrap_http_ex(php_stream_wrapper *wrapper, CHECK_FOR_CNTRL_CHARS(resource->pass); CHECK_FOR_CNTRL_CHARS(resource->path); } + int new_flags = HTTP_WRAPPER_REDIRECTED; + if (response_code == 307 || response_code == 308) { + /* RFC 7538 specifies that status code 308 does not allow changing the request method from POST to GET. + * RFC 7231 does the same for status code 307. + * To keep consistency between POST and PATCH requests, we'll also not change the request method from PATCH to GET, even though it's allowed it's not mandated by the RFC. */ + new_flags |= HTTP_WRAPPER_KEEP_METHOD; + } stream = php_stream_url_wrap_http_ex( wrapper, new_path, mode, options, opened_path, context, - --redirect_max, HTTP_WRAPPER_REDIRECTED, response_header STREAMS_CC); + --redirect_max, new_flags, response_header STREAMS_CC); } else { php_stream_wrapper_log_error(wrapper, options, "HTTP request failed! %s", tmp_line); } diff --git a/ext/standard/tests/http/bug67430.phpt b/ext/standard/tests/http/bug67430.phpt index e72e419fc02ac..1a515537e6609 100644 --- a/ext/standard/tests/http/bug67430.phpt +++ b/ext/standard/tests/http/bug67430.phpt @@ -41,7 +41,7 @@ POST / HTTP/1.1 Host: %s:%d Connection: close -GET /foo HTTP/1.1 +POST /foo HTTP/1.1 Host: %s:%d Connection: close diff --git a/ext/standard/tests/http/gh11274.phpt b/ext/standard/tests/http/gh11274.phpt new file mode 100644 index 0000000000000..fc125bfc494cf --- /dev/null +++ b/ext/standard/tests/http/gh11274.phpt @@ -0,0 +1,62 @@ +--TEST-- +GH-11274 (POST/PATCH request via file_get_contents + stream_context_create switches to GET after a HTTP 308 redirect) +--INI-- +allow_url_fopen=1 +--CONFLICTS-- +server +--FILE-- + ['method' => 'POST', 'header' => 'Content-type: application/x-www-form-urlencoded', 'content' => http_build_query(['hello' => 'world'])]])); + echo file_get_contents("http://" . PHP_CLI_SERVER_ADDRESS . "/test$suffix", false, stream_context_create(['http' => ['method' => 'PATCH', 'header' => 'Content-type: application/x-www-form-urlencoded', 'content' => http_build_query(['hello' => 'world'])]])); + echo file_get_contents("http://" . PHP_CLI_SERVER_ADDRESS . "/test/$suffix", false, stream_context_create(['http' => ['method' => 'POST', 'header' => 'Content-type: application/x-www-form-urlencoded', 'content' => http_build_query(['hello' => 'world'])]])); + echo file_get_contents("http://" . PHP_CLI_SERVER_ADDRESS . "/test/$suffix", false, stream_context_create(['http' => ['method' => 'PATCH', 'header' => 'Content-type: application/x-www-form-urlencoded', 'content' => http_build_query(['hello' => 'world'])]])); +} +?> +--EXPECT-- +-- Testing unredirected request -- +method: POST; body: hello=world +method: PATCH; body: hello=world +method: POST; body: hello=world +method: PATCH; body: hello=world +-- Testing redirect status code 301 -- +method: GET; body: +method: GET; body: +method: GET; body: +method: GET; body: +-- Testing redirect status code 302 -- +method: GET; body: +method: GET; body: +method: GET; body: +method: GET; body: +-- Testing redirect status code 307 -- +method: POST; body: hello=world +method: PATCH; body: hello=world +method: POST; body: hello=world +method: PATCH; body: hello=world +-- Testing redirect status code 308 -- +method: POST; body: hello=world +method: PATCH; body: hello=world +method: POST; body: hello=world +method: PATCH; body: hello=world From 1fe7dc31ef149db20ea3813c92a45deff80c21a3 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Sat, 20 May 2023 11:43:30 +0200 Subject: [PATCH 052/168] Fix -Wstrict-prototypes warnings in fuzzer SAPI (#11277) --- sapi/fuzzer/fuzzer-sapi.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sapi/fuzzer/fuzzer-sapi.c b/sapi/fuzzer/fuzzer-sapi.c index cbb09e08f2730..d0a9b553aa856 100644 --- a/sapi/fuzzer/fuzzer-sapi.c +++ b/sapi/fuzzer/fuzzer-sapi.c @@ -80,7 +80,7 @@ static void send_header(sapi_header_struct *sapi_header, void *server_context) { } -static char* read_cookies() +static char* read_cookies(void) { /* TODO: fuzz these! */ return NULL; @@ -172,7 +172,7 @@ int fuzzer_init_php(const char *extra_ini) return SUCCESS; } -int fuzzer_request_startup() +int fuzzer_request_startup(void) { if (php_request_startup() == FAILURE) { php_module_shutdown(); @@ -188,7 +188,7 @@ int fuzzer_request_startup() return SUCCESS; } -void fuzzer_request_shutdown() +void fuzzer_request_shutdown(void) { zend_try { /* Destroy thrown exceptions. This does not happen as part of request shutdown. */ @@ -207,7 +207,7 @@ void fuzzer_request_shutdown() } /* Set up a dummy stack frame so that exceptions may be thrown. */ -void fuzzer_setup_dummy_frame() +void fuzzer_setup_dummy_frame(void) { static zend_execute_data execute_data; static zend_function func; From 93fa9613e162d1a0e8479ba83c4b6a399846e209 Mon Sep 17 00:00:00 2001 From: Peter Date: Tue, 16 May 2023 00:31:57 +0800 Subject: [PATCH 053/168] Fix GH-11099: Generating phar.php during cross-compile can't be done Closes GH-11243. --- NEWS | 4 ++++ ext/phar/Makefile.frag | 44 ++++++++++++++++++++++++++++-------------- 2 files changed, 34 insertions(+), 14 deletions(-) diff --git a/NEWS b/NEWS index 0f24c304b84aa..32f2b0e8dc969 100644 --- a/NEWS +++ b/NEWS @@ -36,6 +36,10 @@ PHP NEWS - PGSQL: . Fixed parameter parsing of pg_lo_export(). (kocsismate) +- Phar: + . Fixed bug GH-11099 (Generating phar.php during cross-compile can't be + done). (peter279k) + - Soap: . Fixed bug GH-8426 (make test fail while soap extension build). (nielsdos) diff --git a/ext/phar/Makefile.frag b/ext/phar/Makefile.frag index 58789cae25b57..e5646b2029261 100644 --- a/ext/phar/Makefile.frag +++ b/ext/phar/Makefile.frag @@ -29,22 +29,38 @@ $(builddir)/phar/phar.inc: $(srcdir)/phar/phar.inc -@test -d $(builddir)/phar || mkdir $(builddir)/phar -@test -f $(builddir)/phar/phar.inc || cp $(srcdir)/phar/phar.inc $(builddir)/phar/phar.inc + +TEST_PHP_EXECUTABLE = $(shell $(PHP_EXECUTABLE) -v 2>&1) +TEST_PHP_EXECUTABLE_RES = $(shell echo "$(TEST_PHP_EXECUTABLE)" | grep -c 'Exec format error') + $(builddir)/phar.php: $(srcdir)/build_precommand.php $(srcdir)/phar/*.inc $(srcdir)/phar/*.php $(SAPI_CLI_PATH) - -@echo "Generating phar.php" - @$(PHP_PHARCMD_EXECUTABLE) $(PHP_PHARCMD_SETTINGS) $(srcdir)/build_precommand.php > $(builddir)/phar.php + -@(echo "Generating phar.php"; \ + if [ $(TEST_PHP_EXECUTABLE_RES) -ne 1 ]; then \ + $(PHP_PHARCMD_EXECUTABLE) $(PHP_PHARCMD_SETTINGS) $(srcdir)/build_precommand.php > $(builddir)/phar.php; \ + else \ + echo "Skipping phar.php generating during cross compilation"; \ + fi) $(builddir)/phar.phar: $(builddir)/phar.php $(builddir)/phar/phar.inc $(srcdir)/phar/*.inc $(srcdir)/phar/*.php $(SAPI_CLI_PATH) - -@echo "Generating phar.phar" - -@rm -f $(builddir)/phar.phar - -@rm -f $(srcdir)/phar.phar - @$(PHP_PHARCMD_EXECUTABLE) $(PHP_PHARCMD_SETTINGS) $(builddir)/phar.php pack -f $(builddir)/phar.phar -a pharcommand -c auto -x \\.svn -p 0 -s $(srcdir)/phar/phar.php -h sha1 -b "$(PHP_PHARCMD_BANG)" $(srcdir)/phar/ - -@chmod +x $(builddir)/phar.phar + -@(echo "Generating phar.phar"; \ + if [ $(TEST_PHP_EXECUTABLE_RES) -ne 1 ]; then \ + rm -f $(builddir)/phar.phar; \ + rm -f $(srcdir)/phar.phar; \ + $(PHP_PHARCMD_EXECUTABLE) $(PHP_PHARCMD_SETTINGS) $(builddir)/phar.php pack -f $(builddir)/phar.phar -a pharcommand -c auto -x \\.svn -p 0 -s $(srcdir)/phar/phar.php -h sha1 -b "$(PHP_PHARCMD_BANG)" $(srcdir)/phar/; \ + chmod +x $(builddir)/phar.phar; \ + else \ + echo "Skipping phar.phar generating during cross compilation"; \ + fi) install-pharcmd: pharcmd - -@$(mkinstalldirs) $(INSTALL_ROOT)$(bindir) - $(INSTALL) $(builddir)/phar.phar $(INSTALL_ROOT)$(bindir)/$(program_prefix)phar$(program_suffix).phar - -@rm -f $(INSTALL_ROOT)$(bindir)/$(program_prefix)phar$(program_suffix) - $(LN_S) -f $(program_prefix)phar$(program_suffix).phar $(INSTALL_ROOT)$(bindir)/$(program_prefix)phar$(program_suffix) - @$(mkinstalldirs) $(INSTALL_ROOT)$(mandir)/man1 - @$(INSTALL_DATA) $(builddir)/phar.1 $(INSTALL_ROOT)$(mandir)/man1/$(program_prefix)phar$(program_suffix).1 - @$(INSTALL_DATA) $(builddir)/phar.phar.1 $(INSTALL_ROOT)$(mandir)/man1/$(program_prefix)phar$(program_suffix).phar.1 + @(if [ $(TEST_PHP_EXECUTABLE_RES) -ne 1 ]; then \ + $(mkinstalldirs) $(INSTALL_ROOT)$(bindir); \ + $(INSTALL) $(builddir)/phar.phar $(INSTALL_ROOT)$(bindir)/$(program_prefix)phar$(program_suffix).phar; \ + rm -f $(INSTALL_ROOT)$(bindir)/$(program_prefix)phar$(program_suffix); \ + $(LN_S) -f $(program_prefix)phar$(program_suffix).phar $(INSTALL_ROOT)$(bindir)/$(program_prefix)phar$(program_suffix); \ + $(mkinstalldirs) $(INSTALL_ROOT)$(mandir)/man1; \ + $(INSTALL_DATA) $(builddir)/phar.1 $(INSTALL_ROOT)$(mandir)/man1/$(program_prefix)phar$(program_suffix).1; \ + $(INSTALL_DATA) $(builddir)/phar.phar.1 $(INSTALL_ROOT)$(mandir)/man1/$(program_prefix)phar$(program_suffix).phar.1; \ + else \ + echo "Skipping install-pharcmd during cross compilation"; \ + fi) From 548e0615cdfa3f13e51c07e95cde0b20de21c877 Mon Sep 17 00:00:00 2001 From: George Peter Banyard Date: Sat, 20 May 2023 22:49:41 +0100 Subject: [PATCH 054/168] FPM: refactor fpm_php_get_string_from_table() to better match usage (#11051) Pass the key length to improve the existence check for the key --- sapi/fpm/fpm/fpm_php.c | 18 +++++++++--------- sapi/fpm/fpm/fpm_php.h | 2 +- sapi/fpm/fpm/fpm_status.c | 13 +++++++------ 3 files changed, 17 insertions(+), 16 deletions(-) diff --git a/sapi/fpm/fpm/fpm_php.c b/sapi/fpm/fpm/fpm_php.c index 92b189668206e..aadaafbb05630 100644 --- a/sapi/fpm/fpm/fpm_php.c +++ b/sapi/fpm/fpm/fpm_php.c @@ -252,13 +252,13 @@ int fpm_php_limit_extensions(char *path) /* {{{ */ } /* }}} */ -char* fpm_php_get_string_from_table(zend_string *table, char *key) /* {{{ */ +bool fpm_php_is_key_in_table(zend_string *table, const char *key, size_t key_len) /* {{{ */ { - zval *data, *tmp; + zval *data; zend_string *str; - if (!table || !key) { - return NULL; - } + + ZEND_ASSERT(table); + ZEND_ASSERT(key); /* inspired from ext/standard/info.c */ @@ -270,12 +270,12 @@ char* fpm_php_get_string_from_table(zend_string *table, char *key) /* {{{ */ return NULL; } - ZEND_HASH_FOREACH_STR_KEY_VAL(Z_ARRVAL_P(data), str, tmp) { - if (str && !strncmp(ZSTR_VAL(str), key, ZSTR_LEN(str))) { - return Z_STRVAL_P(tmp); + ZEND_HASH_FOREACH_STR_KEY(Z_ARRVAL_P(data), str) { + if (str && zend_string_equals_cstr(str, key, key_len)) { + return true; } } ZEND_HASH_FOREACH_END(); - return NULL; + return false; } /* }}} */ diff --git a/sapi/fpm/fpm/fpm_php.h b/sapi/fpm/fpm/fpm_php.h index 01ed65799170d..d61857c5e0e11 100644 --- a/sapi/fpm/fpm/fpm_php.h +++ b/sapi/fpm/fpm/fpm_php.h @@ -41,6 +41,6 @@ void fpm_php_soft_quit(void); int fpm_php_init_main(void); int fpm_php_apply_defines_ex(struct key_value_s *kv, int mode); int fpm_php_limit_extensions(char *path); -char* fpm_php_get_string_from_table(zend_string *table, char *key); +bool fpm_php_is_key_in_table(zend_string *table, const char *key, size_t key_len); #endif diff --git a/sapi/fpm/fpm/fpm_status.c b/sapi/fpm/fpm/fpm_status.c index 103fbafd953b2..20c5770768a73 100644 --- a/sapi/fpm/fpm/fpm_status.c +++ b/sapi/fpm/fpm/fpm_status.c @@ -145,7 +145,6 @@ int fpm_status_handle_request(void) /* {{{ */ bool encode_html, encode_json; char *short_syntax, *short_post; char *full_pre, *full_syntax, *full_post, *full_separator; - zend_string *_GET_str; if (!SG(request_info).request_uri) { return 0; @@ -170,11 +169,13 @@ int fpm_status_handle_request(void) /* {{{ */ /* STATUS */ if (fpm_status_uri && !strcmp(fpm_status_uri, SG(request_info).request_uri)) { + zend_string *_GET_str; + fpm_request_executing(); /* full status ? */ _GET_str = ZSTR_INIT_LITERAL("_GET", 0); - full = (fpm_php_get_string_from_table(_GET_str, "full") != NULL); + full = fpm_php_is_key_in_table(_GET_str, ZEND_STRL("full")); short_syntax = short_post = NULL; full_separator = full_pre = full_syntax = full_post = NULL; encode_html = false; @@ -218,7 +219,7 @@ int fpm_status_handle_request(void) /* {{{ */ } /* HTML */ - if (fpm_php_get_string_from_table(_GET_str, "html")) { + if (fpm_php_is_key_in_table(_GET_str, ZEND_STRL("html"))) { sapi_add_header_ex(ZEND_STRL("Content-Type: text/html"), 1, 1); time_format = "%d/%b/%Y:%H:%M:%S %z"; encode_html = true; @@ -287,7 +288,7 @@ int fpm_status_handle_request(void) /* {{{ */ } /* XML */ - } else if (fpm_php_get_string_from_table(_GET_str, "xml")) { + } else if (fpm_php_is_key_in_table(_GET_str, ZEND_STRL("xml"))) { sapi_add_header_ex(ZEND_STRL("Content-Type: text/xml"), 1, 1); time_format = "%s"; encode_html = true; @@ -335,7 +336,7 @@ int fpm_status_handle_request(void) /* {{{ */ } /* JSON */ - } else if (fpm_php_get_string_from_table(_GET_str, "json")) { + } else if (fpm_php_is_key_in_table(_GET_str, ZEND_STRL("json"))) { sapi_add_header_ex(ZEND_STRL("Content-Type: application/json"), 1, 1); time_format = "%s"; @@ -384,7 +385,7 @@ int fpm_status_handle_request(void) /* {{{ */ } /* OpenMetrics */ - } else if (fpm_php_get_string_from_table(_GET_str, "openmetrics")) { + } else if (fpm_php_is_key_in_table(_GET_str, ZEND_STRL("openmetrics"))) { sapi_add_header_ex(ZEND_STRL("Content-Type: application/openmetrics-text; version=1.0.0; charset=utf-8"), 1, 1); time_format = "%s"; From c717c79a0967fb47b784c44e8e027e86b49bb152 Mon Sep 17 00:00:00 2001 From: Alex Dowad Date: Fri, 14 Apr 2023 09:00:11 +0200 Subject: [PATCH 055/168] Combine CJK encoding conversion code in a single source file This will make it easier to combine duplicated code between all the CJK text encodings (a significant amount is already combined in this commit, such as the repeated definitions of SJIS_DECODE and SJIS_ENCODE), but I hope to remove even more redundancy in the future. The table used to implement mb_strlen for CP932 has been changed to the same table as "SJIS-win". --- ext/mbstring/config.m4 | 20 +- ext/mbstring/config.w32 | 14 +- ext/mbstring/libmbfl/filters/mbfilter_big5.c | 660 - ext/mbstring/libmbfl/filters/mbfilter_big5.h | 46 - ext/mbstring/libmbfl/filters/mbfilter_cjk.c | 12545 ++++++++++++++++ ext/mbstring/libmbfl/filters/mbfilter_cjk.h | 48 + .../libmbfl/filters/mbfilter_cp5022x.c | 1252 -- .../libmbfl/filters/mbfilter_cp5022x.h | 50 - .../libmbfl/filters/mbfilter_cp51932.c | 412 - ext/mbstring/libmbfl/filters/mbfilter_cp932.c | 618 - ext/mbstring/libmbfl/filters/mbfilter_cp932.h | 47 - ext/mbstring/libmbfl/filters/mbfilter_cp936.c | 439 - ext/mbstring/libmbfl/filters/mbfilter_cp936.h | 42 - .../libmbfl/filters/mbfilter_euc_cn.c | 326 - .../libmbfl/filters/mbfilter_euc_cn.h | 42 - .../libmbfl/filters/mbfilter_euc_jp.c | 373 - .../libmbfl/filters/mbfilter_euc_jp.h | 42 - .../libmbfl/filters/mbfilter_euc_jp_2004.h | 39 - .../libmbfl/filters/mbfilter_euc_jp_win.c | 536 - .../libmbfl/filters/mbfilter_euc_jp_win.h | 42 - .../libmbfl/filters/mbfilter_euc_kr.c | 297 - .../libmbfl/filters/mbfilter_euc_kr.h | 42 - .../libmbfl/filters/mbfilter_euc_tw.c | 375 - .../libmbfl/filters/mbfilter_euc_tw.h | 42 - .../libmbfl/filters/mbfilter_gb18030.c | 644 - .../libmbfl/filters/mbfilter_gb18030.h | 42 - ext/mbstring/libmbfl/filters/mbfilter_hz.c | 409 - ext/mbstring/libmbfl/filters/mbfilter_hz.h | 43 - .../libmbfl/filters/mbfilter_iso2022_jp_ms.c | 584 - .../libmbfl/filters/mbfilter_iso2022_jp_ms.h | 43 - .../libmbfl/filters/mbfilter_iso2022_kr.c | 431 - .../libmbfl/filters/mbfilter_iso2022_kr.h | 42 - .../filters/mbfilter_iso2022jp_mobile.c | 757 - .../filters/mbfilter_iso2022jp_mobile.h | 39 - ext/mbstring/libmbfl/filters/mbfilter_jis.c | 944 -- ext/mbstring/libmbfl/filters/mbfilter_jis.h | 47 - ext/mbstring/libmbfl/filters/mbfilter_sjis.c | 2941 ---- ext/mbstring/libmbfl/filters/mbfilter_sjis.h | 46 - .../libmbfl/filters/mbfilter_sjis_2004.c | 1420 -- .../libmbfl/filters/mbfilter_sjis_2004.h | 49 - .../libmbfl/filters/mbfilter_sjis_mac.h | 39 - .../libmbfl/filters/mbfilter_sjis_mobile.h | 64 - ext/mbstring/libmbfl/filters/mbfilter_uhc.c | 297 - ext/mbstring/libmbfl/filters/mbfilter_uhc.h | 42 - .../libmbfl/filters/mbfilter_utf8_mobile.c | 66 +- .../libmbfl/filters/unicode_table_cp932_ext.h | 17 - .../libmbfl/filters/unicode_table_cp936.h | 43 - .../libmbfl/filters/unicode_table_jis.h | 29 - .../libmbfl/filters/unicode_table_uhc.h | 43 - ext/mbstring/libmbfl/mbfl/mbfl_convert.c | 24 +- ext/mbstring/libmbfl/mbfl/mbfl_encoding.c | 24 +- ext/mbstring/tests/cp932_encoding.phpt | 3 + 52 files changed, 12667 insertions(+), 14854 deletions(-) delete mode 100644 ext/mbstring/libmbfl/filters/mbfilter_big5.c delete mode 100644 ext/mbstring/libmbfl/filters/mbfilter_big5.h create mode 100644 ext/mbstring/libmbfl/filters/mbfilter_cjk.c create mode 100644 ext/mbstring/libmbfl/filters/mbfilter_cjk.h delete mode 100644 ext/mbstring/libmbfl/filters/mbfilter_cp5022x.c delete mode 100644 ext/mbstring/libmbfl/filters/mbfilter_cp5022x.h delete mode 100644 ext/mbstring/libmbfl/filters/mbfilter_cp51932.c delete mode 100644 ext/mbstring/libmbfl/filters/mbfilter_cp932.c delete mode 100644 ext/mbstring/libmbfl/filters/mbfilter_cp932.h delete mode 100644 ext/mbstring/libmbfl/filters/mbfilter_cp936.c delete mode 100644 ext/mbstring/libmbfl/filters/mbfilter_cp936.h delete mode 100644 ext/mbstring/libmbfl/filters/mbfilter_euc_cn.c delete mode 100644 ext/mbstring/libmbfl/filters/mbfilter_euc_cn.h delete mode 100644 ext/mbstring/libmbfl/filters/mbfilter_euc_jp.c delete mode 100644 ext/mbstring/libmbfl/filters/mbfilter_euc_jp.h delete mode 100644 ext/mbstring/libmbfl/filters/mbfilter_euc_jp_2004.h delete mode 100644 ext/mbstring/libmbfl/filters/mbfilter_euc_jp_win.c delete mode 100644 ext/mbstring/libmbfl/filters/mbfilter_euc_jp_win.h delete mode 100644 ext/mbstring/libmbfl/filters/mbfilter_euc_kr.c delete mode 100644 ext/mbstring/libmbfl/filters/mbfilter_euc_kr.h delete mode 100644 ext/mbstring/libmbfl/filters/mbfilter_euc_tw.c delete mode 100644 ext/mbstring/libmbfl/filters/mbfilter_euc_tw.h delete mode 100644 ext/mbstring/libmbfl/filters/mbfilter_gb18030.c delete mode 100644 ext/mbstring/libmbfl/filters/mbfilter_gb18030.h delete mode 100644 ext/mbstring/libmbfl/filters/mbfilter_hz.c delete mode 100644 ext/mbstring/libmbfl/filters/mbfilter_hz.h delete mode 100644 ext/mbstring/libmbfl/filters/mbfilter_iso2022_jp_ms.c delete mode 100644 ext/mbstring/libmbfl/filters/mbfilter_iso2022_jp_ms.h delete mode 100644 ext/mbstring/libmbfl/filters/mbfilter_iso2022_kr.c delete mode 100644 ext/mbstring/libmbfl/filters/mbfilter_iso2022_kr.h delete mode 100644 ext/mbstring/libmbfl/filters/mbfilter_iso2022jp_mobile.c delete mode 100644 ext/mbstring/libmbfl/filters/mbfilter_iso2022jp_mobile.h delete mode 100644 ext/mbstring/libmbfl/filters/mbfilter_jis.c delete mode 100644 ext/mbstring/libmbfl/filters/mbfilter_jis.h delete mode 100644 ext/mbstring/libmbfl/filters/mbfilter_sjis.c delete mode 100644 ext/mbstring/libmbfl/filters/mbfilter_sjis.h delete mode 100644 ext/mbstring/libmbfl/filters/mbfilter_sjis_2004.c delete mode 100644 ext/mbstring/libmbfl/filters/mbfilter_sjis_2004.h delete mode 100644 ext/mbstring/libmbfl/filters/mbfilter_sjis_mac.h delete mode 100644 ext/mbstring/libmbfl/filters/mbfilter_sjis_mobile.h delete mode 100644 ext/mbstring/libmbfl/filters/mbfilter_uhc.c delete mode 100644 ext/mbstring/libmbfl/filters/mbfilter_uhc.h diff --git a/ext/mbstring/config.m4 b/ext/mbstring/config.m4 index db2298661920f..2a3da1ce88a3b 100644 --- a/ext/mbstring/config.m4 +++ b/ext/mbstring/config.m4 @@ -95,30 +95,12 @@ AC_DEFUN([PHP_MBSTRING_SETUP_LIBMBFL], [ libmbfl/filters/html_entities.c libmbfl/filters/mbfilter_7bit.c libmbfl/filters/mbfilter_base64.c - libmbfl/filters/mbfilter_big5.c - libmbfl/filters/mbfilter_cp5022x.c - libmbfl/filters/mbfilter_cp51932.c - libmbfl/filters/mbfilter_cp932.c - libmbfl/filters/mbfilter_cp936.c - libmbfl/filters/mbfilter_gb18030.c - libmbfl/filters/mbfilter_euc_cn.c - libmbfl/filters/mbfilter_euc_jp.c - libmbfl/filters/mbfilter_euc_jp_win.c - libmbfl/filters/mbfilter_euc_kr.c - libmbfl/filters/mbfilter_euc_tw.c + libmbfl/filters/mbfilter_cjk.c libmbfl/filters/mbfilter_htmlent.c - libmbfl/filters/mbfilter_hz.c - libmbfl/filters/mbfilter_iso2022_jp_ms.c - libmbfl/filters/mbfilter_iso2022jp_mobile.c - libmbfl/filters/mbfilter_iso2022_kr.c - libmbfl/filters/mbfilter_jis.c libmbfl/filters/mbfilter_qprint.c libmbfl/filters/mbfilter_singlebyte.c - libmbfl/filters/mbfilter_sjis.c - libmbfl/filters/mbfilter_sjis_2004.c libmbfl/filters/mbfilter_ucs2.c libmbfl/filters/mbfilter_ucs4.c - libmbfl/filters/mbfilter_uhc.c libmbfl/filters/mbfilter_utf16.c libmbfl/filters/mbfilter_utf32.c libmbfl/filters/mbfilter_utf7.c diff --git a/ext/mbstring/config.w32 b/ext/mbstring/config.w32 index 5ba672434356e..780fe47defd9a 100644 --- a/ext/mbstring/config.w32 +++ b/ext/mbstring/config.w32 @@ -17,17 +17,13 @@ if (PHP_MBSTRING != "no") { "ext\\mbstring\\libmbfl\\config.h", true); ADD_SOURCES("ext/mbstring/libmbfl/filters", "html_entities.c \ - mbfilter_7bit.c mbfilter_base64.c mbfilter_big5.c mbfilter_cp932.c \ - mbfilter_cp936.c mbfilter_cp51932.c mbfilter_euc_cn.c \ - mbfilter_euc_jp.c mbfilter_euc_jp_win.c mbfilter_euc_kr.c \ - mbfilter_euc_tw.c mbfilter_htmlent.c mbfilter_hz.c mbfilter_iso2022_kr.c \ - mbfilter_jis.c mbfilter_iso2022_jp_ms.c mbfilter_gb18030.c \ - mbfilter_sjis_2004.c mbfilter_qprint.c mbfilter_sjis.c mbfilter_ucs2.c \ - mbfilter_ucs4.c mbfilter_uhc.c mbfilter_utf16.c mbfilter_utf32.c \ + mbfilter_7bit.c mbfilter_base64.c \ + mbfilter_cjk.c mbfilter_htmlent.c \ + mbfilter_qprint.c mbfilter_ucs2.c \ + mbfilter_ucs4.c mbfilter_utf16.c mbfilter_utf32.c \ mbfilter_utf7.c mbfilter_utf7imap.c mbfilter_utf8.c \ mbfilter_utf8_mobile.c mbfilter_uuencode.c \ - mbfilter_cp5022x.c \ - mbfilter_iso2022jp_mobile.c mbfilter_singlebyte.c", "mbstring"); + mbfilter_singlebyte.c", "mbstring"); ADD_SOURCES("ext/mbstring/libmbfl/mbfl", "mbfilter.c mbfilter_8bit.c \ mbfilter_pass.c mbfilter_wchar.c mbfl_convert.c mbfl_encoding.c \ diff --git a/ext/mbstring/libmbfl/filters/mbfilter_big5.c b/ext/mbstring/libmbfl/filters/mbfilter_big5.c deleted file mode 100644 index ab10c6a5df3e4..0000000000000 --- a/ext/mbstring/libmbfl/filters/mbfilter_big5.c +++ /dev/null @@ -1,660 +0,0 @@ -/* - * "streamable kanji code filter and converter" - * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. - * - * LICENSE NOTICES - * - * This file is part of "streamable kanji code filter and converter", - * which is distributed under the terms of GNU Lesser General Public - * License (version 2) as published by the Free Software Foundation. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with "streamable kanji code filter and converter"; - * if not, write to the Free Software Foundation, Inc., 59 Temple Place, - * Suite 330, Boston, MA 02111-1307 USA - * - * The author of this file: Rui Hirokawa - * - */ -/* - * The source code included in this file was separated from mbfilter_tw.c - * by moriyoshi koizumi on 4 dec 2002. - * - */ - -#include "mbfilter.h" -#include "mbfilter_big5.h" - -#include "unicode_table_big5.h" - -static int mbfl_filt_conv_big5_wchar_flush(mbfl_convert_filter *filter); -static size_t mb_big5_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state); -static void mb_wchar_to_big5(uint32_t *in, size_t len, mb_convert_buf *buf, bool end); -static size_t mb_cp950_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state); -static void mb_wchar_to_cp950(uint32_t *in, size_t len, mb_convert_buf *buf, bool end); - -static const unsigned char mblen_table_big5[] = { /* 0x81-0xFE */ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 -}; - -static const char *mbfl_encoding_big5_aliases[] = {"CN-BIG5", "BIG-FIVE", "BIGFIVE", NULL}; - -const mbfl_encoding mbfl_encoding_big5 = { - mbfl_no_encoding_big5, - "BIG-5", - "BIG5", - mbfl_encoding_big5_aliases, - mblen_table_big5, - MBFL_ENCTYPE_GL_UNSAFE, - &vtbl_big5_wchar, - &vtbl_wchar_big5, - mb_big5_to_wchar, - mb_wchar_to_big5, - NULL -}; - -const mbfl_encoding mbfl_encoding_cp950 = { - mbfl_no_encoding_cp950, - "CP950", - "BIG5", - NULL, - mblen_table_big5, - MBFL_ENCTYPE_GL_UNSAFE, - &vtbl_cp950_wchar, - &vtbl_wchar_cp950, - mb_cp950_to_wchar, - mb_wchar_to_cp950, - NULL -}; - -const struct mbfl_convert_vtbl vtbl_big5_wchar = { - mbfl_no_encoding_big5, - mbfl_no_encoding_wchar, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_big5_wchar, - mbfl_filt_conv_big5_wchar_flush, - NULL, -}; - -const struct mbfl_convert_vtbl vtbl_wchar_big5 = { - mbfl_no_encoding_wchar, - mbfl_no_encoding_big5, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_wchar_big5, - mbfl_filt_conv_common_flush, - NULL -}; - -const struct mbfl_convert_vtbl vtbl_cp950_wchar = { - mbfl_no_encoding_cp950, - mbfl_no_encoding_wchar, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_big5_wchar, - mbfl_filt_conv_big5_wchar_flush, - NULL, -}; - -const struct mbfl_convert_vtbl vtbl_wchar_cp950 = { - mbfl_no_encoding_wchar, - mbfl_no_encoding_cp950, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_wchar_big5, - mbfl_filt_conv_common_flush, - NULL, -}; - -#define CK(statement) do { if ((statement) < 0) return (-1); } while (0) - -/* 63 + 94 = 157 or 94 */ -static unsigned short cp950_pua_tbl[][4] = { - {0xe000, 0xe310, 0xfa40, 0xfefe}, - {0xe311, 0xeeb7, 0x8e40, 0xa0fe}, - {0xeeb8, 0xf6b0, 0x8140, 0x8dfe}, - {0xf6b1, 0xf70e, 0xc6a1, 0xc6fe}, - {0xf70f, 0xf848, 0xc740, 0xc8fe}, -}; - -static inline int is_in_cp950_pua(int c1, int c) -{ - if ((c1 >= 0xfa && c1 <= 0xfe) || (c1 >= 0x8e && c1 <= 0xa0) || - (c1 >= 0x81 && c1 <= 0x8d) || (c1 >= 0xc7 && c1 <= 0xc8)) { - return (c >= 0x40 && c <= 0x7e) || (c >= 0xa1 && c <= 0xfe); - } else if (c1 == 0xc6) { - return c >= 0xa1 && c <= 0xfe; - } - return 0; -} - -int mbfl_filt_conv_big5_wchar(int c, mbfl_convert_filter *filter) -{ - int k, c1, w; - - switch (filter->status) { - case 0: - if (c >= 0 && c < 0x80) { /* latin */ - CK((*filter->output_function)(c, filter->data)); - } else if (filter->from->no_encoding != mbfl_no_encoding_cp950 && c > 0xA0 && c <= 0xF9 && c != 0xC8) { - filter->status = 1; - filter->cache = c; - } else if (filter->from->no_encoding == mbfl_no_encoding_cp950 && c > 0x80 && c <= 0xFE) { - filter->status = 1; - filter->cache = c; - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - case 1: /* dbcs second byte */ - filter->status = 0; - c1 = filter->cache; - if ((c > 0x3f && c < 0x7f) || (c > 0xa0 && c < 0xff)) { - if (c < 0x7f) { - w = (c1 - 0xa1)*157 + (c - 0x40); - } else { - w = (c1 - 0xa1)*157 + (c - 0xa1) + 0x3f; - } - if (w >= 0 && w < big5_ucs_table_size) { - w = big5_ucs_table[w]; - } else { - w = 0; - } - - if (filter->from->no_encoding == mbfl_no_encoding_cp950) { - /* PUA for CP950 */ - if (is_in_cp950_pua(c1, c)) { - int c2 = (c1 << 8) | c; - - for (k = 0; k < sizeof(cp950_pua_tbl) / (sizeof(unsigned short)*4); k++) { - if (c2 >= cp950_pua_tbl[k][2] && c2 <= cp950_pua_tbl[k][3]) { - break; - } - } - - if ((cp950_pua_tbl[k][2] & 0xff) == 0x40) { - w = 157*(c1 - (cp950_pua_tbl[k][2]>>8)) + c - (c >= 0xa1 ? 0x62 : 0x40) + cp950_pua_tbl[k][0]; - } else { - w = c2 - cp950_pua_tbl[k][2] + cp950_pua_tbl[k][0]; - } - } else if (c1 == 0xA1) { - if (c == 0x45) { - w = 0x2027; - } else if (c == 0x4E) { - w = 0xFE51; - } else if (c == 0x5A) { - w = 0x2574; - } else if (c == 0xC2) { - w = 0x00AF; - } else if (c == 0xC3) { - w = 0xFFE3; - } else if (c == 0xC5) { - w = 0x02CD; - } else if (c == 0xE3) { - w = 0xFF5E; - } else if (c == 0xF2) { - w = 0x2295; - } else if (c == 0xF3) { - w = 0x2299; - } else if (c == 0xFE) { - w = 0xFF0F; - } - } else if (c1 == 0xA2) { - if (c == 0x40) { - w = 0xFF3C; - } else if (c == 0x41) { - w = 0x2215; - } else if (c == 0x42) { - w = 0xFE68; - } else if (c == 0x46) { - w = 0xFFE0; - } else if (c == 0x47) { - w = 0xFFE1; - } else if (c == 0xCC) { - w = 0x5341; - } else if (c == 0xCE) { - w = 0x5345; - } - } - } - - if (w <= 0) { - w = MBFL_BAD_INPUT; - } - CK((*filter->output_function)(w, filter->data)); - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - EMPTY_SWITCH_DEFAULT_CASE(); - } - - return 0; -} - -static int mbfl_filt_conv_big5_wchar_flush(mbfl_convert_filter *filter) -{ - if (filter->status == 1) { - /* 2-byte character was truncated */ - filter->status = 0; - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - - if (filter->flush_function) { - (*filter->flush_function)(filter->data); - } - - return 0; -} - -int mbfl_filt_conv_wchar_big5(int c, mbfl_convert_filter *filter) -{ - int k, s = 0; - - if (c >= ucs_a1_big5_table_min && c < ucs_a1_big5_table_max) { - s = ucs_a1_big5_table[c - ucs_a1_big5_table_min]; - } else if (c >= ucs_a2_big5_table_min && c < ucs_a2_big5_table_max) { - s = ucs_a2_big5_table[c - ucs_a2_big5_table_min]; - } else if (c >= ucs_a3_big5_table_min && c < ucs_a3_big5_table_max) { - s = ucs_a3_big5_table[c - ucs_a3_big5_table_min]; - } else if (c >= ucs_i_big5_table_min && c < ucs_i_big5_table_max) { - s = ucs_i_big5_table[c - ucs_i_big5_table_min]; - } else if (c >= ucs_r1_big5_table_min && c < ucs_r1_big5_table_max) { - s = ucs_r1_big5_table[c - ucs_r1_big5_table_min]; - } else if (c >= ucs_r2_big5_table_min && c < ucs_r2_big5_table_max) { - s = ucs_r2_big5_table[c - ucs_r2_big5_table_min]; - } - - if (filter->to->no_encoding == mbfl_no_encoding_cp950) { - if (c >= 0xe000 && c <= 0xf848) { /* PUA for CP950 */ - for (k = 0; k < sizeof(cp950_pua_tbl) / (sizeof(unsigned short)*4); k++) { - if (c <= cp950_pua_tbl[k][1]) { - break; - } - } - - int c1 = c - cp950_pua_tbl[k][0]; - if ((cp950_pua_tbl[k][2] & 0xff) == 0x40) { - int c2 = cp950_pua_tbl[k][2] >> 8; - s = ((c1 / 157) + c2) << 8; - c1 %= 157; - s |= c1 + (c1 >= 0x3f ? 0x62 : 0x40); - } else { - s = c1 + cp950_pua_tbl[k][2]; - } - } else if (c == 0x00A2) { - s = 0; - } else if (c == 0x00A3) { - s = 0; - } else if (c == 0x00AF) { - s = 0xA1C2; - } else if (c == 0x02CD) { - s = 0xA1C5; - } else if (c == 0x0401) { - s = 0; - } else if (c >= 0x0414 && c <= 0x041C) { - s = 0; - } else if (c >= 0x0423 && c <= 0x044F) { - s = 0; - } else if (c == 0x0451) { - s = 0; - } else if (c == 0x2022) { - s = 0; - } else if (c == 0x2027) { - s = 0xA145; - } else if (c == 0x203E) { - s = 0; - } else if (c == 0x2215) { - s = 0xA241; - } else if (c == 0x223C) { - s = 0; - } else if (c == 0x2295) { - s = 0xA1F2; - } else if (c == 0x2299) { - s = 0xA1F3; - } else if (c >= 0x2460 && c <= 0x247D) { - s = 0; - } else if (c == 0x2574) { - s = 0xA15A; - } else if (c == 0x2609) { - s = 0; - } else if (c == 0x2641) { - s = 0; - } else if (c == 0x3005 || (c >= 0x302A && c <= 0x30FF)) { - s = 0; - } else if (c == 0xFE51) { - s = 0xA14E; - } else if (c == 0xFE68) { - s = 0xA242; - } else if (c == 0xFF3C) { - s = 0xA240; - } else if (c == 0xFF5E) { - s = 0xA1E3; - } else if (c == 0xFF64) { - s = 0; - } else if (c == 0xFFE0) { - s = 0xA246; - } else if (c == 0xFFE1) { - s = 0xA247; - } else if (c == 0xFFE3) { - s = 0xA1C3; - } else if (c == 0xFF0F) { - s = 0xA1FE; - } - } - - if (s <= 0) { - if (c == 0) { - s = 0; - } else { - s = -1; - } - } - - if (s >= 0) { - if (s <= 0x80) { /* latin */ - CK((*filter->output_function)(s, filter->data)); - } else { - CK((*filter->output_function)((s >> 8) & 0xff, filter->data)); - CK((*filter->output_function)(s & 0xff, filter->data)); - } - } else { - CK(mbfl_filt_conv_illegal_output(c, filter)); - } - - return 0; -} - -static size_t mb_big5_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) -{ - unsigned char *p = *in, *e = p + *in_len; - uint32_t *out = buf, *limit = buf + bufsize; - - e--; /* Stop the main loop 1 byte short of the end of the input */ - - while (p < e && out < limit) { - unsigned char c = *p++; - - if (c <= 0x7F) { - *out++ = c; - } else if (c > 0xA0 && c <= 0xF9) { - /* We don't need to check p < e here; it's not possible that this pointer dereference - * will be outside the input string, because of e-- above */ - unsigned char c2 = *p++; - - if ((c2 >= 0x40 && c2 <= 0x7E) || (c2 >= 0xA1 && c2 <= 0xFE)) { - unsigned int w = (c - 0xA1)*157 + c2 - ((c2 <= 0x7E) ? 0x40 : 0xA1 - 0x3F); - ZEND_ASSERT(w < big5_ucs_table_size); - w = big5_ucs_table[w]; - if (!w) { - if (c == 0xC8) { - p--; - } - w = MBFL_BAD_INPUT; - } - *out++ = w; - } else { - *out++ = MBFL_BAD_INPUT; - } - } else { - *out++ = MBFL_BAD_INPUT; - } - } - - /* Finish up last byte of input string if there is one */ - if (p == e && out < limit) { - unsigned char c = *p++; - *out++ = (c <= 0x7F) ? c : MBFL_BAD_INPUT; - } - - *in_len = e - p + 1; - *in = p; - return out - buf; -} - -static void mb_wchar_to_big5(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) -{ - unsigned char *out, *limit; - MB_CONVERT_BUF_LOAD(buf, out, limit); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len); - - while (len--) { - uint32_t w = *in++; - unsigned int s = 0; - - if (w >= ucs_a1_big5_table_min && w < ucs_a1_big5_table_max) { - s = ucs_a1_big5_table[w - ucs_a1_big5_table_min]; - } else if (w >= ucs_a2_big5_table_min && w < ucs_a2_big5_table_max) { - s = ucs_a2_big5_table[w - ucs_a2_big5_table_min]; - } else if (w >= ucs_a3_big5_table_min && w < ucs_a3_big5_table_max) { - s = ucs_a3_big5_table[w - ucs_a3_big5_table_min]; - } else if (w >= ucs_i_big5_table_min && w < ucs_i_big5_table_max) { - s = ucs_i_big5_table[w - ucs_i_big5_table_min]; - } else if (w >= ucs_r1_big5_table_min && w < ucs_r1_big5_table_max) { - s = ucs_r1_big5_table[w - ucs_r1_big5_table_min]; - } else if (w >= ucs_r2_big5_table_min && w < ucs_r2_big5_table_max) { - s = ucs_r2_big5_table[w - ucs_r2_big5_table_min]; - } - - if (!s) { - if (w == 0) { - out = mb_convert_buf_add(out, 0); - } else { - MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_big5); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len); - } - } else if (s <= 0x80) { - out = mb_convert_buf_add(out, s); - } else { - MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2); - out = mb_convert_buf_add2(out, (s >> 8) & 0xFF, s & 0xFF); - } - } - - MB_CONVERT_BUF_STORE(buf, out, limit); -} - -static size_t mb_cp950_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) -{ - unsigned char *p = *in, *e = p + *in_len; - uint32_t *out = buf, *limit = buf + bufsize; - - while (p < e && out < limit) { - unsigned char c = *p++; - - if (c <= 0x7F) { - *out++ = c; - } else if (c > 0x80 && c <= 0xFE && p < e) { - unsigned char c2 = *p++; - - if ((c2 >= 0x40 && c2 <= 0x7E) || (c2 >= 0xA1 && c2 <= 0xFE)) { - unsigned int w = ((c - 0xA1)*157) + c2 - ((c2 <= 0x7E) ? 0x40 : 0xA1 - 0x3F); - w = (w < big5_ucs_table_size) ? big5_ucs_table[w] : 0; - - /* PUA for CP950 */ - if (is_in_cp950_pua(c, c2)) { - unsigned int s = (c << 8) | c2; - - int k; - for (k = 0; k < sizeof(cp950_pua_tbl) / (sizeof(unsigned short)*4); k++) { - if (s >= cp950_pua_tbl[k][2] && s <= cp950_pua_tbl[k][3]) { - break; - } - } - - if ((cp950_pua_tbl[k][2] & 0xFF) == 0x40) { - w = 157*(c - (cp950_pua_tbl[k][2] >> 8)) + c2 - (c2 >= 0xA1 ? 0x62 : 0x40) + cp950_pua_tbl[k][0]; - } else { - w = s - cp950_pua_tbl[k][2] + cp950_pua_tbl[k][0]; - } - } else if (c == 0xA1) { - if (c2 == 0x45) { - w = 0x2027; - } else if (c2 == 0x4E) { - w = 0xFE51; - } else if (c2 == 0x5A) { - w = 0x2574; - } else if (c2 == 0xC2) { - w = 0x00AF; - } else if (c2 == 0xC3) { - w = 0xFFE3; - } else if (c2 == 0xC5) { - w = 0x02CD; - } else if (c2 == 0xE3) { - w = 0xFF5E; - } else if (c2 == 0xF2) { - w = 0x2295; - } else if (c2 == 0xF3) { - w = 0x2299; - } else if (c2 == 0xFE) { - w = 0xFF0F; - } - } else if (c == 0xA2) { - if (c2 == 0x40) { - w = 0xFF3C; - } else if (c2 == 0x41) { - w = 0x2215; - } else if (c2 == 0x42) { - w = 0xFE68; - } else if (c2 == 0x46) { - w = 0xFFE0; - } else if (c2 == 0x47) { - w = 0xFFE1; - } else if (c2 == 0xCC) { - w = 0x5341; - } else if (c2 == 0xCE) { - w = 0x5345; - } - } - - if (!w) - w = MBFL_BAD_INPUT; - *out++ = w; - } else { - *out++ = MBFL_BAD_INPUT; - } - } else { - *out++ = MBFL_BAD_INPUT; - } - } - - *in_len = e - p; - *in = p; - return out - buf; -} - -static void mb_wchar_to_cp950(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) -{ - unsigned char *out, *limit; - MB_CONVERT_BUF_LOAD(buf, out, limit); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len); - - while (len--) { - uint32_t w = *in++; - unsigned int s = 0; - - if (w >= ucs_a1_big5_table_min && w < ucs_a1_big5_table_max) { - s = ucs_a1_big5_table[w - ucs_a1_big5_table_min]; - } else if (w >= ucs_a2_big5_table_min && w < ucs_a2_big5_table_max) { - s = ucs_a2_big5_table[w - ucs_a2_big5_table_min]; - } else if (w >= ucs_a3_big5_table_min && w < ucs_a3_big5_table_max) { - s = ucs_a3_big5_table[w - ucs_a3_big5_table_min]; - } else if (w >= ucs_i_big5_table_min && w < ucs_i_big5_table_max) { - s = ucs_i_big5_table[w - ucs_i_big5_table_min]; - } else if (w >= ucs_r1_big5_table_min && w < ucs_r1_big5_table_max) { - s = ucs_r1_big5_table[w - ucs_r1_big5_table_min]; - } else if (w >= ucs_r2_big5_table_min && w < ucs_r2_big5_table_max) { - s = ucs_r2_big5_table[w - ucs_r2_big5_table_min]; - } - - if (w >= 0xE000 && w <= 0xF848) { - int k; - for (k = 0; k < sizeof(cp950_pua_tbl) / (sizeof(unsigned short)*4); k++) { - if (w <= cp950_pua_tbl[k][1]) { - break; - } - } - - int c1 = w - cp950_pua_tbl[k][0]; - if ((cp950_pua_tbl[k][2] & 0xFF) == 0x40) { - int c2 = cp950_pua_tbl[k][2] >> 8; - s = ((c1 / 157) + c2) << 8; - c1 %= 157; - s |= c1 + (c1 >= 0x3F ? 0x62 : 0x40); - } else { - s = c1 + cp950_pua_tbl[k][2]; - } - } else if (w == 0xA2 || w == 0xA3 || w == 0x401 || (w >= 0x414 && w <= 0x41C) || (w >= 0x423 && w <= 0x44F) || w == 0x451 || w == 0x2022 || w == 0x203E || w == 0x223C || (w >= 0x2460 && w <= 0x247D) || w == 0x2609 || w == 0x2641 || w == 0x3005 || (w >= 0x302A && w <= 0x30FF) || w == 0xFF64) { - s = 0; - } else if (w == 0xAF) { - s = 0xA1C2; - } else if (w == 0x2CD) { - s = 0xA1C5; - } else if (w == 0x2027) { - s = 0xA145; - } else if (w == 0x2215) { - s = 0xA241; - } else if (w == 0x2295) { - s = 0xA1F2; - } else if (w == 0x2299) { - s = 0xA1F3; - } else if (w == 0x2574) { - s = 0xA15A; - } else if (w == 0xFE51) { - s = 0xA14E; - } else if (w == 0xFE68) { - s = 0xA242; - } else if (w == 0xFF3C) { - s = 0xA240; - } else if (w == 0xFF5E) { - s = 0xA1E3; - } else if (w == 0xFFE0) { - s = 0xA246; - } else if (w == 0xFFE1) { - s = 0xA247; - } else if (w == 0xFFE3) { - s = 0xA1C3; - } else if (w == 0xFF0F) { - s = 0xA1FE; - } - - if (!s) { - if (w == 0) { - out = mb_convert_buf_add(out, 0); - } else { - MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_big5); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len); - } - } else if (s <= 0x80) { - out = mb_convert_buf_add(out, s); - } else { - MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2); - out = mb_convert_buf_add2(out, (s >> 8) & 0xFF, s & 0xFF); - } - } - - MB_CONVERT_BUF_STORE(buf, out, limit); -} diff --git a/ext/mbstring/libmbfl/filters/mbfilter_big5.h b/ext/mbstring/libmbfl/filters/mbfilter_big5.h deleted file mode 100644 index e475b6bd0c537..0000000000000 --- a/ext/mbstring/libmbfl/filters/mbfilter_big5.h +++ /dev/null @@ -1,46 +0,0 @@ -/* - * "streamable kanji code filter and converter" - * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. - * - * LICENSE NOTICES - * - * This file is part of "streamable kanji code filter and converter", - * which is distributed under the terms of GNU Lesser General Public - * License (version 2) as published by the Free Software Foundation. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with "streamable kanji code filter and converter"; - * if not, write to the Free Software Foundation, Inc., 59 Temple Place, - * Suite 330, Boston, MA 02111-1307 USA - * - * The author of this file: Rui Hirokawa - * - */ -/* - * The source code included in this files was separated from mbfilter_tw.h - * by moriyoshi koizumi on 4 dec 2002. - * - */ - -#ifndef MBFL_MBFILTER_BIG5_H -#define MBFL_MBFILTER_BIG5_H - -#include "mbfilter.h" - -extern const mbfl_encoding mbfl_encoding_big5; -extern const struct mbfl_convert_vtbl vtbl_big5_wchar; -extern const struct mbfl_convert_vtbl vtbl_wchar_big5; - -extern const mbfl_encoding mbfl_encoding_cp950; -extern const struct mbfl_convert_vtbl vtbl_cp950_wchar; -extern const struct mbfl_convert_vtbl vtbl_wchar_cp950; - -int mbfl_filt_conv_big5_wchar(int c, mbfl_convert_filter *filter); -int mbfl_filt_conv_wchar_big5(int c, mbfl_convert_filter *filter); - -#endif /* MBFL_MBFILTER_BIG5_H */ diff --git a/ext/mbstring/libmbfl/filters/mbfilter_cjk.c b/ext/mbstring/libmbfl/filters/mbfilter_cjk.c new file mode 100644 index 0000000000000..063274422007b --- /dev/null +++ b/ext/mbstring/libmbfl/filters/mbfilter_cjk.c @@ -0,0 +1,12545 @@ +#include "mbfilter_cjk.h" + +#include "unicode_table_jis.h" +#include "unicode_table_jis2004.h" +#include "unicode_table_big5.h" +#include "unicode_table_cns11643.h" +#include "unicode_table_cp932_ext.h" +#include "unicode_table_cp936.h" +#include "unicode_table_gb18030.h" +#include "unicode_table_gb2312.h" +#include "unicode_table_uhc.h" +#include "cp932_table.h" +#include "sjis_mac2uni.h" +#include "translit_kana_jisx0201_jisx0208.h" +#include "emoji2uni.h" + +/* Regional Indicator Unicode codepoints are from 0x1F1E6-0x1F1FF + * These correspond to the letters A-Z + * To display the flag emoji for a country, two unicode codepoints are combined, + * which correspond to the two-letter code for that country + * This macro converts uppercase ASCII values to Regional Indicator codepoints */ +#define NFLAGS(c) (0x1F1A5+((unsigned int)(c))) + +static const char nflags_s[10][2] = {"CN", "DE", "ES", "FR", "GB", "IT", "JP", "KR", "RU", "US"}; +static const int nflags_code_kddi[10] = { 0x2549, 0x2546, 0x24C0, 0x2545, 0x2548, 0x2547, 0x2750, 0x254A, 0x24C1, 0x27F7 }; +static const int nflags_code_sb[10] = { 0x2B0A, 0x2B05, 0x2B08, 0x2B04, 0x2B07, 0x2B06, 0x2B02, 0x2B0B, 0x2B09, 0x2B03 }; + +#define EMIT_KEYPAD_EMOJI(c) do { *snd = (c); return 0x20E3; } while(0) +#define EMIT_FLAG_EMOJI(country) do { *snd = NFLAGS((country)[0]); return NFLAGS((country)[1]); } while(0) + +static const char nflags_kddi[6][2] = {"FR", "DE", "IT", "GB", "CN", "KR"}; +static const char nflags_sb[10][2] = {"JP", "US", "FR", "DE", "IT", "GB", "ES", "RU", "CN", "KR"}; + +/* number -> (ku*94)+ten value for telephone keypad character */ +#define DOCOMO_KEYPAD(n) ((n) == 0 ? 0x296F : (0x2965 + (n))) +#define DOCOMO_KEYPAD_HASH 0x2964 + +/* `tbl` contains inclusive ranges, each represented by a pair of unsigned shorts */ +static int mbfl_bisec_srch(int w, const unsigned short *tbl, int n) +{ + int l = 0, r = n-1; + while (l <= r) { + int probe = (l + r) >> 1; + unsigned short lo = tbl[2 * probe], hi = tbl[(2 * probe) + 1]; + if (w < lo) { + r = probe - 1; + } else if (w > hi) { + l = probe + 1; + } else { + return probe; + } + } + return -1; +} + +/* `tbl` contains single values, not ranges */ +int mbfl_bisec_srch2(int w, const unsigned short tbl[], int n) +{ + int l = 0, r = n-1; + while (l <= r) { + int probe = (l + r) >> 1; + unsigned short val = tbl[probe]; + if (w < val) { + r = probe - 1; + } else if (w > val) { + l = probe + 1; + } else { + return probe; + } + } + return -1; +} + +#define SJIS_ENCODE(c1,c2,s1,s2) \ + do { \ + s1 = ((c1 - 1) >> 1) + ((c1) < 0x5F ? 0x71 : 0xB1); \ + s2 = c2; \ + if ((c1) & 1) { \ + if ((c2) < 0x60) { \ + s2--; \ + } \ + s2 += 0x20; \ + } else { \ + s2 += 0x7e; \ + } \ + } while (0) + +#define SJIS_DECODE(c1,c2,s1,s2) \ + do { \ + if (c1 < 0xa0) { \ + s1 = ((c1 - 0x81) << 1) + 0x21; \ + } else { \ + s1 = ((c1 - 0xc1) << 1) + 0x21; \ + } \ + s2 = c2; \ + if (c2 < 0x9f) { \ + if (c2 < 0x7f) { \ + s2++; \ + } \ + s2 -= 0x20; \ + } else { \ + s1++; \ + s2 -= 0x7e; \ + } \ + } while (0) + +#define CK(statement) do { if ((statement) < 0) return (-1); } while (0) + +/* + * ISO-2022 variants + */ + +#define ASCII 0 +#define JISX0201_KANA 0x20 +#define JISX0208_KANJI 0x80 + +static int mbfl_filt_conv_jis_wchar(int c, mbfl_convert_filter *filter) +{ + int c1, s, w; + +retry: + switch (filter->status & 0xf) { +/* case 0x00: ASCII */ +/* case 0x10: X 0201 latin */ +/* case 0x20: X 0201 kana */ +/* case 0x80: X 0208 */ +/* case 0x90: X 0212 */ + case 0: + if (c == 0x1b) { + filter->status += 2; + } else if (c == 0x0e) { /* "kana in" */ + filter->status = 0x20; + } else if (c == 0x0f) { /* "kana out" */ + filter->status = 0; + } else if (filter->status == 0x10 && c == 0x5c) { /* YEN SIGN */ + CK((*filter->output_function)(0xa5, filter->data)); + } else if (filter->status == 0x10 && c == 0x7e) { /* OVER LINE */ + CK((*filter->output_function)(0x203e, filter->data)); + } else if (filter->status == 0x20 && c > 0x20 && c < 0x60) { /* kana */ + CK((*filter->output_function)(0xff40 + c, filter->data)); + } else if ((filter->status == 0x80 || filter->status == 0x90) && c > 0x20 && c < 0x7f) { /* kanji first char */ + filter->cache = c; + filter->status += 1; + } else if (c >= 0 && c < 0x80) { /* latin, CTLs */ + CK((*filter->output_function)(c, filter->data)); + } else if (c > 0xa0 && c < 0xe0) { /* GR kana */ + CK((*filter->output_function)(0xfec0 + c, filter->data)); + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + +/* case 0x81: X 0208 second char */ +/* case 0x91: X 0212 second char */ + case 1: + filter->status &= ~0xf; + c1 = filter->cache; + if (c > 0x20 && c < 0x7f) { + s = (c1 - 0x21)*94 + c - 0x21; + if (filter->status == 0x80) { + if (s >= 0 && s < jisx0208_ucs_table_size) { + w = jisx0208_ucs_table[s]; + } else { + w = 0; + } + + if (w <= 0) { + w = MBFL_BAD_INPUT; + } + } else { + if (s >= 0 && s < jisx0212_ucs_table_size) { + w = jisx0212_ucs_table[s]; + } else { + w = 0; + } + + if (w <= 0) { + w = MBFL_BAD_INPUT; + } + } + CK((*filter->output_function)(w, filter->data)); + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + /* ESC */ +/* case 0x02: */ +/* case 0x12: */ +/* case 0x22: */ +/* case 0x82: */ +/* case 0x92: */ + case 2: + if (c == 0x24) { /* '$' */ + filter->status++; + } else if (c == 0x28) { /* '(' */ + filter->status += 3; + } else { + filter->status &= ~0xf; + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + goto retry; + } + break; + + /* ESC $ */ +/* case 0x03: */ +/* case 0x13: */ +/* case 0x23: */ +/* case 0x83: */ +/* case 0x93: */ + case 3: + if (c == 0x40 || c == 0x42) { /* '@' or 'B' */ + filter->status = 0x80; + } else if (c == 0x28) { /* '(' */ + filter->status++; + } else { + filter->status &= ~0xf; + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + CK((*filter->output_function)(0x24, filter->data)); + goto retry; + } + break; + + /* ESC $ ( */ +/* case 0x04: */ +/* case 0x14: */ +/* case 0x24: */ +/* case 0x84: */ +/* case 0x94: */ + case 4: + if (c == 0x40 || c == 0x42) { /* '@' or 'B' */ + filter->status = 0x80; + } else if (c == 0x44) { /* 'D' */ + filter->status = 0x90; + } else { + filter->status &= ~0xf; + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + CK((*filter->output_function)(0x24, filter->data)); + CK((*filter->output_function)(0x28, filter->data)); + goto retry; + } + break; + + /* ESC ( */ +/* case 0x05: */ +/* case 0x15: */ +/* case 0x25: */ +/* case 0x85: */ +/* case 0x95: */ + case 5: + if (c == 0x42 || c == 0x48) { /* 'B' or 'H' */ + filter->status = 0; + } else if (c == 0x4a) { /* 'J' */ + filter->status = 0x10; + } else if (c == 0x49) { /* 'I' */ + filter->status = 0x20; + } else { + filter->status &= ~0xf; + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + CK((*filter->output_function)(0x28, filter->data)); + goto retry; + } + break; + + EMPTY_SWITCH_DEFAULT_CASE(); + } + + return 0; +} + +static int mbfl_filt_conv_jis_wchar_flush(mbfl_convert_filter *filter) +{ + if (filter->status & 0xF) { + /* 2-byte (JIS X 0208 or 0212) character was truncated, + * or else escape sequence was truncated */ + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + filter->status = 0; + + if (filter->flush_function) { + (*filter->flush_function)(filter->data); + } + + return 0; +} + +static int mbfl_filt_conv_wchar_jis(int c, mbfl_convert_filter *filter) +{ + int s = 0; + + if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) { + s = ucs_a1_jis_table[c - ucs_a1_jis_table_min]; + } else if (c == 0x203E) { /* OVERLINE */ + s = 0x1007E; /* Convert to JISX 0201 OVERLINE */ + } else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) { + s = ucs_a2_jis_table[c - ucs_a2_jis_table_min]; + } else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) { + s = ucs_i_jis_table[c - ucs_i_jis_table_min]; + } else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) { + s = ucs_r_jis_table[c - ucs_r_jis_table_min]; + } + if (s <= 0) { + if (c == 0xa5) { /* YEN SIGN */ + s = 0x1005c; + } else if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */ + s = 0x2140; + } else if (c == 0x2225) { /* PARALLEL TO */ + s = 0x2142; + } else if (c == 0xff0d) { /* FULLWIDTH HYPHEN-MINUS */ + s = 0x215d; + } else if (c == 0xffe0) { /* FULLWIDTH CENT SIGN */ + s = 0x2171; + } else if (c == 0xffe1) { /* FULLWIDTH POUND SIGN */ + s = 0x2172; + } else if (c == 0xffe2) { /* FULLWIDTH NOT SIGN */ + s = 0x224c; + } + if (c == 0) { + s = 0; + } else if (s <= 0) { + s = -1; + } + } + if (s >= 0) { + if (s < 0x80) { /* ASCII */ + if ((filter->status & 0xff00) != 0) { + CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ + CK((*filter->output_function)(0x28, filter->data)); /* '(' */ + CK((*filter->output_function)(0x42, filter->data)); /* 'B' */ + } + filter->status = 0; + CK((*filter->output_function)(s, filter->data)); + } else if (s < 0x8080) { /* X 0208 */ + if ((filter->status & 0xff00) != 0x200) { + CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ + CK((*filter->output_function)(0x24, filter->data)); /* '$' */ + CK((*filter->output_function)(0x42, filter->data)); /* 'B' */ + } + filter->status = 0x200; + CK((*filter->output_function)((s >> 8) & 0x7f, filter->data)); + CK((*filter->output_function)(s & 0x7f, filter->data)); + } else if (s < 0x10000) { /* X 0212 */ + if ((filter->status & 0xff00) != 0x300) { + CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ + CK((*filter->output_function)(0x24, filter->data)); /* '$' */ + CK((*filter->output_function)(0x28, filter->data)); /* '(' */ + CK((*filter->output_function)(0x44, filter->data)); /* 'D' */ + } + filter->status = 0x300; + CK((*filter->output_function)((s >> 8) & 0x7f, filter->data)); + CK((*filter->output_function)(s & 0x7f, filter->data)); + } else { /* X 0201 latin */ + if ((filter->status & 0xff00) != 0x400) { + CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ + CK((*filter->output_function)(0x28, filter->data)); /* '(' */ + CK((*filter->output_function)(0x4a, filter->data)); /* 'J' */ + } + filter->status = 0x400; + CK((*filter->output_function)(s & 0x7f, filter->data)); + } + } else { + CK(mbfl_filt_conv_illegal_output(c, filter)); + } + + return 0; +} + +static int mbfl_filt_conv_wchar_2022jp(int c, mbfl_convert_filter *filter) +{ + int s; + + s = 0; + if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) { + s = ucs_a1_jis_table[c - ucs_a1_jis_table_min]; + } else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) { + s = ucs_a2_jis_table[c - ucs_a2_jis_table_min]; + } else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) { + s = ucs_i_jis_table[c - ucs_i_jis_table_min]; + } else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) { + s = ucs_r_jis_table[c - ucs_r_jis_table_min]; + } + + if (s <= 0) { + if (c == 0xa5) { /* YEN SIGN */ + s = 0x1005c; + } else if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */ + s = 0x2140; + } else if (c == 0x2225) { /* PARALLEL TO */ + s = 0x2142; + } else if (c == 0xff0d) { /* FULLWIDTH HYPHEN-MINUS */ + s = 0x215d; + } else if (c == 0xffe0) { /* FULLWIDTH CENT SIGN */ + s = 0x2171; + } else if (c == 0xffe1) { /* FULLWIDTH POUND SIGN */ + s = 0x2172; + } else if (c == 0xffe2) { /* FULLWIDTH NOT SIGN */ + s = 0x224c; + } + if (c == 0) { + s = 0; + } else if (s <= 0) { + s = -1; + } + } else if ((s >= 0x80 && s < 0x2121) || (s > 0x8080)) { + s = -1; + } + if (s >= 0) { + if (s < 0x80) { /* ASCII */ + if ((filter->status & 0xff00) != 0) { + CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ + CK((*filter->output_function)(0x28, filter->data)); /* '(' */ + CK((*filter->output_function)(0x42, filter->data)); /* 'B' */ + } + filter->status = 0; + CK((*filter->output_function)(s, filter->data)); + } else if (s < 0x10000) { /* X 0208 */ + if ((filter->status & 0xff00) != 0x200) { + CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ + CK((*filter->output_function)(0x24, filter->data)); /* '$' */ + CK((*filter->output_function)(0x42, filter->data)); /* 'B' */ + } + filter->status = 0x200; + CK((*filter->output_function)((s >> 8) & 0x7f, filter->data)); + CK((*filter->output_function)(s & 0x7f, filter->data)); + } else { /* X 0201 latin */ + if ((filter->status & 0xff00) != 0x400) { + CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ + CK((*filter->output_function)(0x28, filter->data)); /* '(' */ + CK((*filter->output_function)(0x4a, filter->data)); /* 'J' */ + } + filter->status = 0x400; + CK((*filter->output_function)(s & 0x7f, filter->data)); + } + } + + return 0; +} + +#define ASCII 0 +#define JISX_0201_LATIN 1 +#define JISX_0201_KANA 2 +#define JISX_0208 3 +#define JISX_0212 4 + +static size_t mb_iso2022jp_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) +{ + ZEND_ASSERT(bufsize >= 3); + + unsigned char *p = *in, *e = p + *in_len; + uint32_t *out = buf, *limit = buf + bufsize; + + while (p < e && out < limit) { + unsigned char c = *p++; + + if (c == 0x1B) { + /* ESC seen; this is an escape sequence */ + if ((e - p) < 2) { + *out++ = MBFL_BAD_INPUT; + if (p != e && (*p == '$' || *p == '(')) + p++; + continue; + } + + unsigned char c2 = *p++; + if (c2 == '$') { + unsigned char c3 = *p++; + if (c3 == '@' || c3 == 'B') { + *state = JISX_0208; + } else if (c3 == '(') { + if (p == e) { + *out++ = MBFL_BAD_INPUT; + break; + } + unsigned char c4 = *p++; + if (c4 == '@' || c4 == 'B') { + *state = JISX_0208; + } else if (c4 == 'D') { + *state = JISX_0212; + } else { + if ((limit - out) < 3) { + p -= 4; + break; + } + *out++ = MBFL_BAD_INPUT; + *out++ = '$'; + *out++ = '('; + p--; + } + } else { + if ((limit - out) < 2) { + p -= 3; + break; + } + *out++ = MBFL_BAD_INPUT; + *out++ = '$'; + p--; + } + } else if (c2 == '(') { + unsigned char c3 = *p++; + if (c3 == 'B' || c3 == 'H') { + *state = ASCII; + } else if (c3 == 'J') { + *state = JISX_0201_LATIN; + } else if (c3 == 'I') { + *state = JISX_0201_KANA; + } else { + if ((limit - out) < 2) { + p -= 3; + break; + } + *out++ = MBFL_BAD_INPUT; + *out++ = '('; + p--; + } + } else { + *out++ = MBFL_BAD_INPUT; + p--; + } + } else if (c == 0xE) { + /* "Kana In" marker; this is just for JIS-7/8, but we also accept it for ISO-2022-JP */ + *state = JISX_0201_KANA; + } else if (c == 0xF) { + /* "Kana Out" marker */ + *state = ASCII; + } else if (*state == JISX_0201_LATIN && c == 0x5C) { /* YEN SIGN */ + *out++ = 0xA5; + } else if (*state == JISX_0201_LATIN && c == 0x7E) { /* OVER LINE */ + *out++ = 0x203E; + } else if (*state == JISX_0201_KANA && c > 0x20 && c < 0x60) { + *out++ = 0xFF40 + c; + } else if (*state >= JISX_0208 && c > 0x20 && c < 0x7F) { + if (p == e) { + *out++ = MBFL_BAD_INPUT; + break; + } + unsigned char c2 = *p++; + if (c2 > 0x20 && c2 < 0x7F) { + unsigned int s = (c - 0x21)*94 + c2 - 0x21; + uint32_t w = 0; + if (*state == JISX_0208) { + if (s < jisx0208_ucs_table_size) { + w = jisx0208_ucs_table[s]; + } + if (!w) { + w = MBFL_BAD_INPUT; + } + } else { + if (s < jisx0212_ucs_table_size) { + w = jisx0212_ucs_table[s]; + } + if (!w) { + w = MBFL_BAD_INPUT; + } + } + *out++ = w; + } else { + *out++ = MBFL_BAD_INPUT; + } + } else if (c < 0x80) { + *out++ = c; + } else if (c >= 0xA1 && c <= 0xDF) { + /* GR-invoked Kana; "GR" stands for "graphics right" and refers to bytes + * with the MSB bit (in the context of ISO-2022 encoding). + * + * In this regard, Wikipedia states: + * "Other, older variants known as JIS7 and JIS8 build directly on the 7-bit and 8-bit + * encodings defined by JIS X 0201 and allow use of JIS X 0201 kana from G1 without + * escape sequences, using Shift Out and Shift In or setting the eighth bit + * (GR-invoked), respectively." + * + * Note that we support both the 'JIS7' use of 0xE/0xF Shift In/Shift Out codes + * and the 'JIS8' use of GR-invoked Kana */ + *out++ = 0xFEC0 + c; + } else { + *out++ = MBFL_BAD_INPUT; + } + } + + *in_len = e - p; + *in = p; + return out - buf; +} + +static void mb_wchar_to_iso2022jp(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) +{ + unsigned char *out, *limit; + MB_CONVERT_BUF_LOAD(buf, out, limit); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2); + + while (len--) { + uint32_t w = *in++; + unsigned int s = 0; + + if (w >= ucs_a1_jis_table_min && w < ucs_a1_jis_table_max) { + s = ucs_a1_jis_table[w - ucs_a1_jis_table_min]; + } else if (w >= ucs_a2_jis_table_min && w < ucs_a2_jis_table_max) { + s = ucs_a2_jis_table[w - ucs_a2_jis_table_min]; + } else if (w >= ucs_i_jis_table_min && w < ucs_i_jis_table_max) { + s = ucs_i_jis_table[w - ucs_i_jis_table_min]; + } else if (w >= ucs_r_jis_table_min && w < ucs_r_jis_table_max) { + s = ucs_r_jis_table[w - ucs_r_jis_table_min]; + } + + if (s == 0) { + if (w == 0xA5) { /* YEN SIGN */ + s = 0x1005C; + } else if (w == 0xFF3C) { /* FULLWIDTH REVERSE SOLIDUS */ + s = 0x2140; + } else if (w == 0x2225) { /* PARALLEL TO */ + s = 0x2142; + } else if (w == 0xFF0D) { /* FULLWIDTH HYPHEN-MINUS */ + s = 0x215D; + } else if (w == 0xFFE0) { /* FULLWIDTH CENT SIGN */ + s = 0x2171; + } else if (w == 0xFFE1) { /* FULLWIDTH POUND SIGN */ + s = 0x2172; + } else if (w == 0xFFE2) { /* FULLWIDTH NOT SIGN */ + s = 0x224C; + } else if (w != 0) { + MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_iso2022jp); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2); + continue; + } + } else if ((s >= 0x80 && s < 0x2121) || (s > 0x8080)) { + MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_iso2022jp); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2); + continue; + } + + if (s < 0x80) { /* ASCII */ + if (buf->state != ASCII) { + MB_CONVERT_BUF_ENSURE(buf, out, limit, (len * 2) + 4); + out = mb_convert_buf_add3(out, 0x1B, '(', 'B'); + buf->state = ASCII; + } + out = mb_convert_buf_add(out, s); + } else if (s < 0x8080) { /* JIS X 0208 */ + if (buf->state != JISX_0208) { + MB_CONVERT_BUF_ENSURE(buf, out, limit, (len * 2) + 5); + out = mb_convert_buf_add3(out, 0x1B, '$', 'B'); + buf->state = JISX_0208; + } + out = mb_convert_buf_add2(out, (s >> 8) & 0x7F, s & 0x7F); + } else if (s < 0x10000) { /* JIS X 0212 */ + if (buf->state != JISX_0212) { + MB_CONVERT_BUF_ENSURE(buf, out, limit, (len * 2) + 6); + out = mb_convert_buf_add4(out, 0x1B, '$', '(', 'D'); + buf->state = JISX_0212; + } + out = mb_convert_buf_add2(out, (s >> 8) & 0x7F, s & 0x7F); + } else { /* X 0201 Latin */ + if (buf->state != JISX_0201_LATIN) { + MB_CONVERT_BUF_ENSURE(buf, out, limit, (len * 2) + 4); + out = mb_convert_buf_add3(out, 0x1B, '(', 'J'); + buf->state = JISX_0201_LATIN; + } + out = mb_convert_buf_add(out, s & 0x7F); + } + } + + if (end && buf->state != ASCII) { + MB_CONVERT_BUF_ENSURE(buf, out, limit, 3); + out = mb_convert_buf_add3(out, 0x1B, '(', 'B'); + } + + MB_CONVERT_BUF_STORE(buf, out, limit); +} + +static void mb_wchar_to_jis(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) +{ + unsigned char *out, *limit; + MB_CONVERT_BUF_LOAD(buf, out, limit); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2); + + while (len--) { + uint32_t w = *in++; + unsigned int s = 0; + + if (w >= ucs_a1_jis_table_min && w < ucs_a1_jis_table_max) { + s = ucs_a1_jis_table[w - ucs_a1_jis_table_min]; + } else if (w == 0x203E) { /* OVERLINE */ + s = 0x1007E; /* Convert to JISX 0201 OVERLINE */ + } else if (w >= ucs_a2_jis_table_min && w < ucs_a2_jis_table_max) { + s = ucs_a2_jis_table[w - ucs_a2_jis_table_min]; + } else if (w >= ucs_i_jis_table_min && w < ucs_i_jis_table_max) { + s = ucs_i_jis_table[w - ucs_i_jis_table_min]; + } else if (w >= ucs_r_jis_table_min && w < ucs_r_jis_table_max) { + s = ucs_r_jis_table[w - ucs_r_jis_table_min]; + } + + if (s == 0) { + if (w == 0xA5) { /* YEN SIGN */ + s = 0x1005C; + } else if (w == 0xFF3C) { /* FULLWIDTH REVERSE SOLIDUS */ + s = 0x2140; + } else if (w == 0x2225) { /* PARALLEL TO */ + s = 0x2142; + } else if (w == 0xFF0D) { /* FULLWIDTH HYPHEN-MINUS */ + s = 0x215D; + } else if (w == 0xFFE0) { /* FULLWIDTH CENT SIGN */ + s = 0x2171; + } else if (w == 0xFFE1) { /* FULLWIDTH POUND SIGN */ + s = 0x2172; + } else if (w == 0xFFE2) { /* FULLWIDTH NOT SIGN */ + s = 0x224C; + } else if (w != 0) { + MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_iso2022jp); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2); + continue; + } + } + + if (s < 0x80) { /* ASCII */ + if (buf->state != ASCII) { + MB_CONVERT_BUF_ENSURE(buf, out, limit, (len * 2) + 4); + out = mb_convert_buf_add3(out, 0x1B, '(', 'B'); + buf->state = ASCII; + } + out = mb_convert_buf_add(out, s); + } else if (s >= 0xA1 && s <= 0xDF) { + if (buf->state != JISX_0201_KANA) { + MB_CONVERT_BUF_ENSURE(buf, out, limit, (len * 2) + 4); + out = mb_convert_buf_add3(out, 0x1B, '(', 'I'); + buf->state = JISX_0201_KANA; + } + out = mb_convert_buf_add(out, s & 0x7F); + } else if (s < 0x8080) { /* JIS X 0208 */ + if (buf->state != JISX_0208) { + MB_CONVERT_BUF_ENSURE(buf, out, limit, (len * 2) + 5); + out = mb_convert_buf_add3(out, 0x1B, '$', 'B'); + buf->state = JISX_0208; + } + out = mb_convert_buf_add2(out, (s >> 8) & 0x7F, s & 0x7F); + } else if (s < 0x10000) { /* JIS X 0212 */ + if (buf->state != JISX_0212) { + MB_CONVERT_BUF_ENSURE(buf, out, limit, (len * 2) + 6); + out = mb_convert_buf_add4(out, 0x1B, '$', '(', 'D'); + buf->state = JISX_0212; + } + out = mb_convert_buf_add2(out, (s >> 8) & 0x7F, s & 0x7F); + } else { /* X 0201 Latin */ + if (buf->state != JISX_0201_LATIN) { + MB_CONVERT_BUF_ENSURE(buf, out, limit, (len * 2) + 4); + out = mb_convert_buf_add3(out, 0x1B, '(', 'J'); + buf->state = JISX_0201_LATIN; + } + out = mb_convert_buf_add(out, s & 0x7F); + } + } + + if (end && buf->state != ASCII) { + MB_CONVERT_BUF_ENSURE(buf, out, limit, 3); + out = mb_convert_buf_add3(out, 0x1B, '(', 'B'); + } + + MB_CONVERT_BUF_STORE(buf, out, limit); +} + +#define JISX_0201_KANA_SO 5 + +static bool mb_check_jis(unsigned char *in, size_t in_len) +{ + unsigned char *p = in, *e = p + in_len; + unsigned int state = ASCII; + + while (p < e) { + unsigned char c = *p++; + if (c == 0x1B) { + /* ESC seen; this is an escape sequence */ + if (state == JISX_0201_KANA_SO) { + return false; + } + if ((e - p) < 2) { + return false; + } + unsigned char c2 = *p++; + if (c2 == '$') { + unsigned char c3 = *p++; + if (c3 == '@' || c3 == 'B') { + state = JISX_0208; + } else if (c3 == '(') { + if (p == e) { + return false; + } + unsigned char c4 = *p++; + if (c4 == '@' || c4 == 'B') { + state = JISX_0208; + } else if (c4 == 'D') { + state = JISX_0212; + } else { + return false; + } + } else { + return false; + } + } else if (c2 == '(') { + unsigned char c3 = *p++; + /* ESC ( H is treated as a sequence transitioning to ASCII for historical reasons. + * see https://github.com/php/php-src/pull/10828#issuecomment-1478342432. */ + if (c3 == 'B' || c3 == 'H') { + state = ASCII; + } else if (c3 == 'J') { + state = JISX_0201_LATIN; + } else if (c3 == 'I') { + state = JISX_0201_KANA; + } else { + return false; + } + } else { + return false; + } + } else if (c == 0xE) { + /* "Kana In" marker */ + if (state != ASCII) { + return false; + } + state = JISX_0201_KANA_SO; + } else if (c == 0xF) { + /* "Kana Out" marker */ + if (state != JISX_0201_KANA_SO) { + return false; + } + state = ASCII; + } else if ((state == JISX_0208 || state == JISX_0212) && (c > 0x20 && c < 0x7F)) { + if (p == e) { + return false; + } + unsigned char c2 = *p++; + if (c2 > 0x20 && c2 < 0x7F) { + unsigned int s = (c - 0x21)*94 + c2 - 0x21; + if (state == JISX_0208) { + if (s < jisx0208_ucs_table_size && jisx0208_ucs_table[s]) { + continue; + } + } else { + if (s < jisx0212_ucs_table_size && jisx0212_ucs_table[s]) { + continue; + } + } + return false; + } else { + return false; + } + } else if (c < 0x80) { + continue; + } else if (c >= 0xA1 && c <= 0xDF) { + /* GR-invoked Kana */ + continue; + } else { + return false; + } + } + + return state == ASCII; +} + +static bool mb_check_iso2022jp(unsigned char *in, size_t in_len) +{ + unsigned char *p = in, *e = p + in_len; + unsigned int state = ASCII; + + while (p < e) { + unsigned char c = *p++; + if (c == 0x1B) { + /* ESC seen; this is an escape sequence */ + if ((e - p) < 2) { + return false; + } + unsigned char c2 = *p++; + if (c2 == '$') { + unsigned char c3 = *p++; + if (c3 == '@' || c3 == 'B') { + state = JISX_0208; + } else { + return false; + } + } else if (c2 == '(') { + unsigned char c3 = *p++; + if (c3 == 'B') { + state = ASCII; + } else if (c3 == 'J') { + state = JISX_0201_LATIN; + } else { + return false; + } + } else { + return false; + } + } else if (c == 0xE || c == 0xF) { + /* "Kana In" or "Kana Out" marker; ISO-2022-JP is not accepted. */ + return false; + } else if (state == JISX_0208 && (c > 0x20 && c < 0x7F)) { + if (p == e) { + return false; + } + unsigned char c2 = *p++; + if (c2 > 0x20 && c2 < 0x7F) { + unsigned int s = (c - 0x21)*94 + c2 - 0x21; + if (s < jisx0208_ucs_table_size && jisx0208_ucs_table[s]) { + continue; + } + return false; + } else { + return false; + } + } else if (c < 0x80) { + continue; + } else { + return false; + } + } + + return state == ASCII; +} + +/* Unicode codepoints for emoji are above 0x1F000, but we only store 16-bits + * in our tables. Therefore, add 0x10000 to recover the true values. + * + * Again, for some emoji which are not supported by Unicode, we use codepoints + * in the Private Use Area above 0xFE000. Again, add 0xF0000 to recover the + * true value. */ +static inline int convert_emoji_cp(int cp) +{ + if (cp > 0xF000) + return cp + 0x10000; + else if (cp > 0xE000) + return cp + 0xF0000; + return cp; +} + +int mbfilter_sjis_emoji_kddi2unicode(int s, int *snd) +{ + if (s >= mb_tbl_code2uni_kddi1_min && s <= mb_tbl_code2uni_kddi1_max) { + if (s == 0x24C0) { /* Spain */ + EMIT_FLAG_EMOJI("ES"); + } else if (s == 0x24C1) { /* Russia */ + EMIT_FLAG_EMOJI("RU"); + } else if (s >= 0x2545 && s <= 0x254A) { + EMIT_FLAG_EMOJI(nflags_kddi[s - 0x2545]); + } else if (s == 0x25BC) { + EMIT_KEYPAD_EMOJI('#'); + } else { + *snd = 0; + return convert_emoji_cp(mb_tbl_code2uni_kddi1[s - mb_tbl_code2uni_kddi1_min]); + } + } else if (s >= mb_tbl_code2uni_kddi2_min && s <= mb_tbl_code2uni_kddi2_max) { + if (s == 0x2750) { /* Japan */ + EMIT_FLAG_EMOJI("JP"); + } else if (s >= 0x27A6 && s <= 0x27AE) { + EMIT_KEYPAD_EMOJI(s - 0x27A6 + '1'); + } else if (s == 0x27F7) { /* United States */ + EMIT_FLAG_EMOJI("US"); + } else if (s == 0x2830) { + EMIT_KEYPAD_EMOJI('0'); + } else { + *snd = 0; + return convert_emoji_cp(mb_tbl_code2uni_kddi2[s - mb_tbl_code2uni_kddi2_min]); + } + } + return 0; +} + +static int mbfl_filt_conv_2022jp_mobile_wchar(int c, mbfl_convert_filter *filter) +{ + int c1, s, w, snd = 0; + + switch (filter->status & 0xF) { + case 0: + if (c == 0x1B) { + filter->status += 2; + } else if (filter->status == JISX0201_KANA && c > 0x20 && c < 0x60) { + CK((*filter->output_function)(0xFF40 + c, filter->data)); + } else if (filter->status == JISX0208_KANJI && c > 0x20 && c < 0x80) { + filter->cache = c; + filter->status += 1; + } else if (c >= 0 && c < 0x80) { /* ASCII */ + CK((*filter->output_function)(c, filter->data)); + } else if (c > 0xA0 && c < 0xE0) { /* Kana */ + CK((*filter->output_function)(0xFEC0 + c, filter->data)); + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + /* JISX 0208, second byte */ + case 1: + w = 0; + filter->status &= ~0xF; + c1 = filter->cache; + if (c > 0x20 && c < 0x7F) { + s = ((c1 - 0x21) * 94) + c - 0x21; + + if (s <= 137) { + if (s == 31) { + w = 0xFF3C; /* FULLWIDTH REVERSE SOLIDUS */ + } else if (s == 32) { + w = 0xFF5E; /* FULLWIDTH TILDE */ + } else if (s == 33) { + w = 0x2225; /* PARALLEL TO */ + } else if (s == 60) { + w = 0xFF0D; /* FULLWIDTH HYPHEN-MINUS */ + } else if (s == 80) { + w = 0xFFE0; /* FULLWIDTH CENT SIGN */ + } else if (s == 81) { + w = 0xFFE1; /* FULLWIDTH POUND SIGN */ + } else if (s == 137) { + w = 0xFFE2; /* FULLWIDTH NOT SIGN */ + } + } + + if (s >= (84 * 94) && s < (91 * 94)) { + s += 22 * 94; + w = mbfilter_sjis_emoji_kddi2unicode(s, &snd); + if (w > 0 && snd > 0) { + (*filter->output_function)(snd, filter->data); + } + } + + if (w == 0) { + if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) { + w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min]; + } else if (s >= 0 && s < jisx0208_ucs_table_size) { + w = jisx0208_ucs_table[s]; + } + } + + if (w <= 0) { + w = MBFL_BAD_INPUT; + } + CK((*filter->output_function)(w, filter->data)); + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + /* ESC */ + case 2: + if (c == '$') { + filter->status++; + } else if (c == '(') { + filter->status += 3; + } else { + filter->status &= ~0xF; + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + /* ESC $ */ + case 3: + if (c == '@' || c == 'B') { + filter->status = JISX0208_KANJI; + } else if (c == '(') { + filter->status++; + } else { + filter->status &= ~0xF; + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + /* ESC $ ( */ + case 4: + if (c == '@' || c == 'B') { + filter->status = JISX0208_KANJI; + } else { + filter->status &= ~0xF; + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + /* ESC ( */ + case 5: + if (c == 'B' || c == 'J') { + filter->status = 0; /* ASCII mode */ + } else if (c == 'I') { + filter->status = JISX0201_KANA; + } else { + filter->status &= ~0xF; + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + } + + return 0; +} + +static int mbfl_filt_conv_2022jp_mobile_wchar_flush(mbfl_convert_filter *filter) +{ + if (filter->status & 0xF) { + (*filter->output_function)(MBFL_BAD_INPUT, filter->data); + } + filter->status = 0; + + if (filter->flush_function) { + (*filter->flush_function)(filter->data); + } + + return 0; +} + +static int mbfilter_unicode2sjis_emoji_kddi(int c, int *s1, mbfl_convert_filter *filter) +{ + if ((filter->status & 0xF) == 1) { + int c1 = filter->cache; + filter->cache = 0; + filter->status &= ~0xFF; + if (c == 0x20E3) { + if (c1 == '#') { + *s1 = 0x25BC; + } else if (c1 == '0') { + *s1 = 0x2830; + } else { /* Previous character was '1'-'9' */ + *s1 = 0x27A6 + (c1 - '1'); + } + return 1; + } else { + if (filter->status & 0xFF00) { + CK((*filter->output_function)(0x1B, filter->data)); /* ESC */ + CK((*filter->output_function)('(', filter->data)); + CK((*filter->output_function)('B', filter->data)); + } + CK((*filter->output_function)(c1, filter->data)); + filter->status = 0; + } + } + + if (c == '#' || (c >= '0' && c <= '9')) { + filter->status |= 1; + filter->cache = c; + return 0; + } + + if (c == 0xA9) { /* Copyright sign */ + *s1 = 0x27DC; + return 1; + } else if (c == 0xAE) { /* Registered sign */ + *s1 = 0x27DD; + return 1; + } else if (c >= mb_tbl_uni_kddi2code2_min && c <= mb_tbl_uni_kddi2code2_max) { + int i = mbfl_bisec_srch2(c, mb_tbl_uni_kddi2code2_key, mb_tbl_uni_kddi2code2_len); + if (i >= 0) { + *s1 = mb_tbl_uni_kddi2code2_value[i]; + return 1; + } + } else if (c >= mb_tbl_uni_kddi2code3_min && c <= mb_tbl_uni_kddi2code3_max) { + int i = mbfl_bisec_srch2(c - 0x10000, mb_tbl_uni_kddi2code3_key, mb_tbl_uni_kddi2code3_len); + if (i >= 0) { + *s1 = mb_tbl_uni_kddi2code3_value[i]; + return 1; + } + } else if (c >= mb_tbl_uni_kddi2code5_min && c <= mb_tbl_uni_kddi2code5_max) { + int i = mbfl_bisec_srch2(c - 0xF0000, mb_tbl_uni_kddi2code5_key, mb_tbl_uni_kddi2code5_len); + if (i >= 0) { + *s1 = mb_tbl_uni_kddi2code5_val[i]; + return 1; + } + } + return 0; +} + +/* (ku*94)+ten value -> Shift-JIS byte sequence */ +#define CODE2JIS(c1,c2,s1,s2) \ + c1 = (s1)/94+0x21; \ + c2 = (s1)-94*((c1)-0x21)+0x21; \ + s1 = ((c1) << 8) | (c2); \ + s2 = 1 + +static int mbfl_filt_conv_wchar_2022jp_mobile(int c, mbfl_convert_filter *filter) +{ + int c1, c2, s1 = 0, s2 = 0; + + if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) { + s1 = ucs_a1_jis_table[c - ucs_a1_jis_table_min]; + } else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) { + s1 = ucs_a2_jis_table[c - ucs_a2_jis_table_min]; + } else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) { + s1 = ucs_i_jis_table[c - ucs_i_jis_table_min]; + } else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) { + s1 = ucs_r_jis_table[c - ucs_r_jis_table_min]; + } + + if (s1 <= 0) { + if (c == 0xA5) { /* YEN SIGN */ + s1 = 0x216F; /* FULLWIDTH YEN SIGN */ + } else if (c == 0xFF3C) { /* FULLWIDTH REVERSE SOLIDUS */ + s1 = 0x2140; + } else if (c == 0x2225) { /* PARALLEL TO */ + s1 = 0x2142; + } else if (c == 0xFF0D) { /* FULLWIDTH HYPHEN-MINUS */ + s1 = 0x215d; + } else if (c == 0xFFE0) { /* FULLWIDTH CENT SIGN */ + s1 = 0x2171; + } else if (c == 0xFFE1) { /* FULLWIDTH POUND SIGN */ + s1 = 0x2172; + } else if (c == 0xFFE2) { /* FULLWIDTH NOT SIGN */ + s1 = 0x224c; + } + } + + if (mbfilter_unicode2sjis_emoji_kddi(c, &s1, filter) > 0) { + /* A KDDI emoji was detected and stored in s1 */ + CODE2JIS(c1,c2,s1,s2); + s1 -= 0x1600; + } else if ((filter->status & 0xFF) == 1 && filter->cache) { + /* We are just processing one of KDDI's special emoji for a phone keypad button */ + return 0; + } + + if ((s1 <= 0) || (s1 >= 0xa1a1 && s2 == 0)) { /* not found or X 0212 */ + s1 = -1; + for (c1 = 0; c1 < cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; c1++) { + if (c == cp932ext1_ucs_table[c1]) { + s1 = (((c1 / 94) + 0x2D) << 8) + (c1 % 94) + 0x21; + break; + } + } + + if (c == 0) { + s1 = 0; + } + } + + if (s1 >= 0) { + if (s1 < 0x80) { /* ASCII */ + if (filter->status & 0xFF00) { + CK((*filter->output_function)(0x1B, filter->data)); /* ESC */ + CK((*filter->output_function)('(', filter->data)); + CK((*filter->output_function)('B', filter->data)); + } + CK((*filter->output_function)(s1, filter->data)); + filter->status = 0; + } else if (s1 > 0xA0 && s1 < 0xE0) { /* Kana */ + if ((filter->status & 0xFF00) != 0x100) { + CK((*filter->output_function)(0x1B, filter->data)); /* ESC */ + CK((*filter->output_function)('(', filter->data)); + CK((*filter->output_function)('I', filter->data)); + } + filter->status = 0x100; + CK((*filter->output_function)(s1 & 0x7F, filter->data)); + } else if (s1 < 0x7E7F) { /* JIS X 0208 */ + if ((filter->status & 0xFF00) != 0x200) { + CK((*filter->output_function)(0x1B, filter->data)); /* ESC */ + CK((*filter->output_function)('$', filter->data)); + CK((*filter->output_function)('B', filter->data)); + } + filter->status = 0x200; + CK((*filter->output_function)((s1 >> 8) & 0xFF, filter->data)); + CK((*filter->output_function)(s1 & 0x7F, filter->data)); + } + } else { + CK(mbfl_filt_conv_illegal_output(c, filter)); + } + + return 0; +} + +static int mbfl_filt_conv_wchar_2022jp_mobile_flush(mbfl_convert_filter *filter) +{ + /* Go back to ASCII mode (so strings can be safely concatenated) */ + if (filter->status & 0xFF00) { + (*filter->output_function)(0x1B, filter->data); /* ESC */ + (*filter->output_function)('(', filter->data); + (*filter->output_function)('B', filter->data); + } + + int c1 = filter->cache; + if ((filter->status & 0xFF) == 1 && (c1 == '#' || (c1 >= '0' && c1 <= '9'))) { + (*filter->output_function)(c1, filter->data); + } + filter->status = filter->cache = 0; + + if (filter->flush_function) { + (*filter->flush_function)(filter->data); + } + + return 0; +} + +static size_t mb_iso2022jp_kddi_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) +{ + unsigned char *p = *in, *e = p + *in_len; + uint32_t *out = buf, *limit = buf + bufsize - 1; + + while (p < e && out < limit) { + unsigned char c = *p++; + + if (c == 0x1B) { + if ((e - p) < 2) { + p = e; + *out++ = MBFL_BAD_INPUT; + break; + } + unsigned char c2 = *p++; + unsigned char c3 = *p++; + + if (c2 == '$') { + if (c3 == '@' || c3 == 'B') { + *state = JISX0208_KANJI; + } else if (c3 == '(') { + if (p == e) { + *out++ = MBFL_BAD_INPUT; + break; + } + unsigned char c4 = *p++; + + if (c4 == '@' || c4 == 'B') { + *state = JISX0208_KANJI; + } else { + *out++ = MBFL_BAD_INPUT; + } + } else { + *out++ = MBFL_BAD_INPUT; + } + } else if (c2 == '(') { + if (c3 == 'B' || c3 == 'J') { + *state = ASCII; + } else if (c3 == 'I') { + *state = JISX0201_KANA; + } else { + *out++ = MBFL_BAD_INPUT; + } + } else { + p--; + *out++ = MBFL_BAD_INPUT; + } + } else if (*state == JISX0201_KANA && c >= 0x21 && c <= 0x5F) { + *out++ = 0xFF40 + c; + } else if (*state == JISX0208_KANJI && c >= 0x21 && c <= 0x7F) { + if (p == e) { + *out++ = MBFL_BAD_INPUT; + break; + } + unsigned char c2 = *p++; + + if (c2 >= 0x21 && c2 <= 0x7E) { + unsigned int s = ((c - 0x21) * 94) + c2 - 0x21; + uint32_t w = 0; + + if (s <= 137) { + if (s == 31) { + w = 0xFF3C; /* FULLWIDTH REVERSE SOLIDUS */ + } else if (s == 32) { + w = 0xFF5E; /* FULLWIDTH TILDE */ + } else if (s == 33) { + w = 0x2225; /* PARALLEL TO */ + } else if (s == 60) { + w = 0xFF0D; /* FULLWIDTH HYPHEN-MINUS */ + } else if (s == 80) { + w = 0xFFE0; /* FULLWIDTH CENT SIGN */ + } else if (s == 81) { + w = 0xFFE1; /* FULLWIDTH POUND SIGN */ + } else if (s == 137) { + w = 0xFFE2; /* FULLWIDTH NOT SIGN */ + } + } + + if (s >= (84 * 94) && s < (91 * 94)) { + int snd = 0; + s += 22 * 94; + w = mbfilter_sjis_emoji_kddi2unicode(s, &snd); + if (w && snd) { + *out++ = snd; + } + } + + if (!w) { + if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) { + w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min]; + } else if (s < jisx0208_ucs_table_size) { + w = jisx0208_ucs_table[s]; + } + } + + *out++ = w ? w : MBFL_BAD_INPUT; + } else { + *out++ = MBFL_BAD_INPUT; + } + } else if (c <= 0x7F) { + *out++ = c; + } else if (c >= 0xA1 && c <= 0xDF) { + *out++ = 0xFEC0 + c; + } else { + *out++ = MBFL_BAD_INPUT; + } + } + + *in_len = e - p; + *in = p; + return out - buf; +} + +static void mb_wchar_to_iso2022jp_kddi(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) +{ + unsigned char *out, *limit; + MB_CONVERT_BUF_LOAD(buf, out, limit); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len); + + while (len--) { + uint32_t w = *in++; + unsigned int s = 0; + + if (w >= ucs_a1_jis_table_min && w < ucs_a1_jis_table_max) { + s = ucs_a1_jis_table[w - ucs_a1_jis_table_min]; + } else if (w >= ucs_a2_jis_table_min && w < ucs_a2_jis_table_max) { + s = ucs_a2_jis_table[w - ucs_a2_jis_table_min]; + } else if (w >= ucs_i_jis_table_min && w < ucs_i_jis_table_max) { + s = ucs_i_jis_table[w - ucs_i_jis_table_min]; + } else if (w >= ucs_r_jis_table_min && w < ucs_r_jis_table_max) { + s = ucs_r_jis_table[w - ucs_r_jis_table_min]; + } + + if (!s) { + if (w == 0xA5) { /* YEN SIGN */ + s = 0x216F; /* FULLWIDTH YEN SIGN */ + } else if (w == 0xFF3C) { /* FULLWIDTH REVERSE SOLIDUS */ + s = 0x2140; + } else if (w == 0x2225) { /* PARALLEL TO */ + s = 0x2142; + } else if (w == 0xFF0D) { /* FULLWIDTH HYPHEN-MINUS */ + s = 0x215D; + } else if (w == 0xFFE0) { /* FULLWIDTH CENT SIGN */ + s = 0x2171; + } else if (w == 0xFFE1) { /* FULLWIDTH POUND SIGN */ + s = 0x2172; + } else if (w == 0xFFE2) { /* FULLWIDTH NOT SIGN */ + s = 0x224C; + } + } + + if ((w == '#' || (w >= '0' && w <= '9')) && len) { + uint32_t w2 = *in++; len--; + + if (w2 == 0x20E3) { + unsigned int s1 = 0; + if (w == '#') { + s1 = 0x25BC; + } else if (w == '0') { + s1 = 0x2830; + } else { /* Previous character was '1'-'9' */ + s1 = 0x27A6 + (w - '1'); + } + s = (((s1 / 94) + 0x21) << 8) + ((s1 % 94) + 0x21) - 0x1600; + } else { + in--; len++; + } + } else if (w >= NFLAGS('C') && w <= NFLAGS('U') && len) { /* C for CN, U for US */ + uint32_t w2 = *in++; len--; + + if (w2 >= NFLAGS('B') && w2 <= NFLAGS('U')) { /* B for GB, U for RU */ + for (int i = 0; i < 10; i++) { + if (w == NFLAGS(nflags_s[i][0]) && w2 == NFLAGS(nflags_s[i][1])) { + unsigned int s1 = nflags_code_kddi[i]; + s = (((s1 / 94) + 0x21) << 8) + ((s1 % 94) + 0x21) - 0x1600; + goto found_flag_emoji; + } + } + } + + in--; len++; +found_flag_emoji: ; + } + + if (w == 0xA9) { /* Copyright sign */ + unsigned int s1 = 0x27DC; + s = (((s1 / 94) + 0x21) << 8) + ((s1 % 94) + 0x21) - 0x1600; + } else if (w == 0xAE) { /* Registered sign */ + unsigned int s1 = 0x27DD; + s = (((s1 / 94) + 0x21) << 8) + ((s1 % 94) + 0x21) - 0x1600; + } else if (w >= mb_tbl_uni_kddi2code2_min && w <= mb_tbl_uni_kddi2code2_max) { + int i = mbfl_bisec_srch2(w, mb_tbl_uni_kddi2code2_key, mb_tbl_uni_kddi2code2_len); + if (i >= 0) { + unsigned int s1 = mb_tbl_uni_kddi2code2_value[i]; + s = (((s1 / 94) + 0x21) << 8) + ((s1 % 94) + 0x21) - 0x1600; + } + } else if (w >= mb_tbl_uni_kddi2code3_min && w <= mb_tbl_uni_kddi2code3_max) { + int i = mbfl_bisec_srch2(w - 0x10000, mb_tbl_uni_kddi2code3_key, mb_tbl_uni_kddi2code3_len); + if (i >= 0) { + unsigned int s1 = mb_tbl_uni_kddi2code3_value[i]; + s = (((s1 / 94) + 0x21) << 8) + ((s1 % 94) + 0x21) - 0x1600; + } + } else if (w >= mb_tbl_uni_kddi2code5_min && w <= mb_tbl_uni_kddi2code5_max) { + int i = mbfl_bisec_srch2(w - 0xF0000, mb_tbl_uni_kddi2code5_key, mb_tbl_uni_kddi2code5_len); + if (i >= 0) { + unsigned int s1 = mb_tbl_uni_kddi2code5_val[i]; + s = (((s1 / 94) + 0x21) << 8) + ((s1 % 94) + 0x21) - 0x1600; + } + } + + if (!s || s >= 0xA1A1) { + s = 0; + for (int i = 0; i < cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; i++) { + if (w == cp932ext1_ucs_table[i]) { + s = (((i / 94) + 0x2D) << 8) + (i % 94) + 0x21; + break; + } + } + if (w == 0) + s = 0; + } + + if (!s && w) { + MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_iso2022jp_kddi); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len); + } else if (s <= 0x7F) { + if (buf->state != ASCII) { + MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 4); + out = mb_convert_buf_add3(out, 0x1B, '(', 'B'); + buf->state = ASCII; + } + out = mb_convert_buf_add(out, s); + } else if (s >= 0xA1 && s <= 0xDF) { + if (buf->state != JISX0201_KANA) { + MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 4); + out = mb_convert_buf_add3(out, 0x1B, '(', 'I'); + buf->state = JISX0201_KANA; + } + out = mb_convert_buf_add(out, s & 0x7F); + } else if (s <= 0x7E7E) { + if (buf->state != JISX0208_KANJI) { + MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 5); + out = mb_convert_buf_add3(out, 0x1B, '$', 'B'); + buf->state = JISX0208_KANJI; + } else { + MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2); + } + out = mb_convert_buf_add2(out, (s >> 8) & 0xFF, s & 0xFF); + } else { + MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_iso2022jp_kddi); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len); + } + } + + if (end && buf->state != ASCII) { + MB_CONVERT_BUF_ENSURE(buf, out, limit, 3); + out = mb_convert_buf_add3(out, 0x1B, '(', 'B'); + } + + MB_CONVERT_BUF_STORE(buf, out, limit); +} + +static int mbfl_filt_conv_jis2004_wchar(int c, mbfl_convert_filter *filter) +{ + int k; + int c1, c2, s, s1 = 0, s2 = 0, w = 0, w1; + + switch (filter->status & 0xf) { + case 0: + if (c >= 0 && c < 0x80) { /* latin */ + if (filter->from->no_encoding == mbfl_no_encoding_eucjp2004) { + CK((*filter->output_function)(c, filter->data)); + } else if (filter->from->no_encoding == mbfl_no_encoding_sjis2004) { + if (c == 0x5c) { + CK((*filter->output_function)(0x00a5, filter->data)); + } else if (c == 0x7e) { + CK((*filter->output_function)(0x203e, filter->data)); + } else { + CK((*filter->output_function)(c, filter->data)); + } + } else { /* ISO-2022-JP-2004 */ + if (c == 0x1b) { + filter->status += 6; + } else if ((filter->status == 0x80 || filter->status == 0x90 || filter->status == 0xa0) + && c > 0x20 && c < 0x7f) { /* kanji first char */ + filter->cache = c; + if (filter->status == 0x90) { + filter->status += 1; /* JIS X 0213 plane 1 */ + } else if (filter->status == 0xa0) { + filter->status += 4; /* JIS X 0213 plane 2 */ + } else { + filter->status += 5; /* JIS X 0208 */ + } + } else { + CK((*filter->output_function)(c, filter->data)); + } + } + } else { + if (filter->from->no_encoding == mbfl_no_encoding_eucjp2004) { + if (c > 0xa0 && c < 0xff) { /* X 0213 plane 1 first char */ + filter->status = 1; + filter->cache = c; + } else if (c == 0x8e) { /* kana first char */ + filter->cache = 0x8E; /* So error will be reported if input is truncated right here */ + filter->status = 2; + } else if (c == 0x8f) { /* X 0213 plane 2 first char */ + filter->status = 3; + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + } else if (filter->from->no_encoding == mbfl_no_encoding_sjis2004) { + if (c > 0xa0 && c < 0xe0) { /* kana */ + CK((*filter->output_function)(0xfec0 + c, filter->data)); + } else if (c > 0x80 && c < 0xfd && c != 0xa0) { /* kanji first char */ + filter->status = 1; + filter->cache = c; + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + } + break; + + case 1: /* kanji second char */ + filter->status &= ~0xf; + c1 = filter->cache; + + if (filter->from->no_encoding == mbfl_no_encoding_eucjp2004) { + if (c > 0xa0 && c < 0xff) { + s1 = c1 - 0x80; + s2 = c - 0x80; + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + break; + } + } else if (filter->from->no_encoding == mbfl_no_encoding_sjis2004) { + if (c >= 0x40 && c <= 0xfc && c != 0x7f) { + SJIS_DECODE(c1, c, s1, s2); + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + break; + } + } else { /* ISO-2022-JP-2004 */ + if (c >= 0x21 && c <= 0x7E) { + s1 = c1; + s2 = c; + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + break; + } + } + w1 = (s1 << 8) | s2; + + /* conversion for combining characters */ + if ((w1 >= 0x2477 && w1 <= 0x2479) || (w1 >= 0x2479 && w1 <= 0x247B) || + (w1 >= 0x2577 && w1 <= 0x257E) || w1 == 0x2678 || w1 == 0x2B44 || + (w1 >= 0x2B48 && w1 <= 0x2B4F) || (w1 >= 0x2B65 && w1 <= 0x2B66)) { + k = mbfl_bisec_srch2(w1, jisx0213_u2_key, jisx0213_u2_tbl_len); + if (k >= 0) { + w = jisx0213_u2_tbl[2*k]; + CK((*filter->output_function)(w, filter->data)); + w = jisx0213_u2_tbl[2*k+1]; + } + } + + /* conversion for BMP */ + if (w <= 0) { + w1 = (s1 - 0x21)*94 + s2 - 0x21; + if (w1 >= 0 && w1 < jisx0213_ucs_table_size) { + w = jisx0213_ucs_table[w1]; + } + } + + /* conversion for CJK Unified Ideographs ext.B (U+2XXXX) */ + if (w <= 0) { + w1 = (s1 << 8) | s2; + k = mbfl_bisec_srch2(w1, jisx0213_jis_u5_key, jisx0213_u5_tbl_len); + if (k >= 0) { + w = jisx0213_jis_u5_tbl[k] + 0x20000; + } + } + + if (w <= 0) { + w = MBFL_BAD_INPUT; + } + CK((*filter->output_function)(w, filter->data)); + break; + + case 2: /* got 0x8e: EUC-JP-2004 kana */ + filter->status = 0; + if (c > 0xa0 && c < 0xe0) { + w = 0xfec0 + c; + CK((*filter->output_function)(w, filter->data)); + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + case 3: /* X 0213 plane 2 first char: EUC-JP-2004 (0x8f) */ + if (c == 0xA1 || (c >= 0xA3 && c <= 0xA5) || c == 0xA8 || (c >= 0xAC && c <= 0xAF) || (c >= 0xEE && c <= 0xFE)) { + filter->cache = c - 0x80; + filter->status++; + } else { + filter->status = 0; + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + case 4: /* X 0213 plane 2 second char: EUC-JP-2004, ISO-2022-JP-2004 */ + filter->status &= ~0xF; + c1 = filter->cache; + if (filter->from->no_encoding == mbfl_no_encoding_eucjp2004) { + c2 = c - 0x80; + } else { + c2 = c; + } + + if (c2 < 0x21 || c2 > 0x7E) { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + break; + } + + s1 = c1 - 0x21; + s2 = c2 - 0x21; + + if (((s1 >= 0 && s1 <= 4 && s1 != 1) || s1 == 7 || (s1 >= 11 && s1 <= 14) || + (s1 >= 77 && s1 < 94)) && s2 >= 0 && s2 < 94) { + /* calc offset from ku */ + for (k = 0; k < jisx0213_p2_ofst_len; k++) { + if (s1 == jisx0213_p2_ofst[k]) { + break; + } + } + k -= jisx0213_p2_ofst[k]; + + /* check for japanese chars in BMP */ + s = (s1 + 94 + k)*94 + s2; + ZEND_ASSERT(s < jisx0213_ucs_table_size); + w = jisx0213_ucs_table[s]; + + /* check for japanese chars in CJK Unified Ideographs ext.B (U+2XXXX) */ + if (w <= 0) { + w1 = ((c1 + k + 94) << 8) | c2; + k = mbfl_bisec_srch2(w1, jisx0213_jis_u5_key, jisx0213_u5_tbl_len); + if (k >= 0) { + w = jisx0213_jis_u5_tbl[k] + 0x20000; + } + } + + if (w <= 0) { + w = MBFL_BAD_INPUT; + } + + CK((*filter->output_function)(w, filter->data)); + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + case 5: /* X 0208: ISO-2022-JP-2004 */ + filter->status &= ~0xf; + c1 = filter->cache; + if (c > 0x20 && c < 0x7f) { + s = (c1 - 0x21)*94 + c - 0x21; + if (s >= 0 && s < jisx0208_ucs_table_size) { + w = jisx0208_ucs_table[s]; + } + } + + if (w <= 0) { + w = MBFL_BAD_INPUT; + } + + CK((*filter->output_function)(w, filter->data)); + break; + + /* ESC: ISO-2022-JP-2004 */ +/* case 0x06: */ +/* case 0x16: */ +/* case 0x26: */ +/* case 0x86: */ +/* case 0x96: */ +/* case 0xa6: */ + case 6: + if (c == '$') { + filter->status++; + } else if (c == '(') { + filter->status += 3; + } else { + filter->status &= ~0xf; + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + /* ESC $: ISO-2022-JP-2004 */ +/* case 0x07: */ +/* case 0x17: */ +/* case 0x27: */ +/* case 0x87: */ +/* case 0x97: */ +/* case 0xa7: */ + case 7: + if (c == 'B') { /* JIS X 0208-1983 */ + filter->status = 0x80; + } else if (c == '(') { + filter->status++; + } else { + filter->status &= ~0xf; + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + /* ESC $ (: ISO-2022-JP-2004 */ +/* case 0x08: */ +/* case 0x18: */ +/* case 0x28: */ +/* case 0x88: */ +/* case 0x98: */ +/* case 0xa8: */ + case 8: + if (c == 'Q') { /* JIS X 0213 plane 1 */ + filter->status = 0x90; + } else if (c == 'P') { /* JIS X 0213 plane 2 */ + filter->status = 0xa0; + } else { + filter->status &= ~0xf; + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + /* ESC (: ISO-2022-JP-2004 */ +/* case 0x09: */ +/* case 0x19: */ +/* case 0x29: */ +/* case 0x89: */ +/* case 0x99: */ + case 9: + if (c == 'B') { + filter->status = 0; + } else { + filter->status &= ~0xf; + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + EMPTY_SWITCH_DEFAULT_CASE(); + } + + return 0; +} + +static int mbfl_filt_conv_jis2004_wchar_flush(mbfl_convert_filter *filter) +{ + if (filter->status & 0xF) { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + filter->status = 0; + + if (filter->flush_function) { + return (*filter->flush_function)(filter->data); + } + + return 0; +} + +static int mbfl_filt_conv_wchar_jis2004(int c, mbfl_convert_filter *filter) +{ + int k; + int c1, c2, s1, s2; + +retry: + s1 = 0; + /* check for 1st char of combining characters */ + if ((filter->status & 0xf) == 0 && ( + c == 0x00E6 || + (c >= 0x0254 && c <= 0x02E9) || + (c >= 0x304B && c <= 0x3053) || + (c >= 0x30AB && c <= 0x30C8) || + c == 0x31F7)) { + for (k = 0; k < jisx0213_u2_tbl_len; k++) { + if (c == jisx0213_u2_tbl[2*k]) { + filter->status++; + filter->cache = k; + return 0; + } + } + } + + /* check for 2nd char of combining characters */ + if ((filter->status & 0xf) == 1 && filter->cache >= 0 && filter->cache < jisx0213_u2_tbl_len) { + k = filter->cache; + filter->status &= ~0xf; + filter->cache = 0; + + c1 = jisx0213_u2_tbl[2*k]; + if ((c1 == 0x0254 || c1 == 0x028C || c1 == 0x0259 || c1 == 0x025A) && c == 0x0301) { + k++; + } + if (c == jisx0213_u2_tbl[2*k+1]) { + s1 = jisx0213_u2_key[k]; + } else { /* fallback */ + s1 = jisx0213_u2_fb_tbl[k]; + + if (filter->to->no_encoding == mbfl_no_encoding_sjis2004) { + c1 = (s1 >> 8) & 0xff; + c2 = s1 & 0xff; + SJIS_ENCODE(c1, c2, s1, s2); + } else if (filter->to->no_encoding == mbfl_no_encoding_eucjp2004) { + s2 = (s1 & 0xff) + 0x80; + s1 = ((s1 >> 8) & 0xff) + 0x80; + } else { + if (filter->status != 0x200) { + CK((*filter->output_function)(0x1b, filter->data)); + CK((*filter->output_function)('$', filter->data)); + CK((*filter->output_function)('(', filter->data)); + CK((*filter->output_function)('Q', filter->data)); + } + filter->status = 0x200; + + s2 = s1 & 0x7f; + s1 = (s1 >> 8) & 0x7f; + } + + /* Flush out cached data */ + CK((*filter->output_function)(s1, filter->data)); + CK((*filter->output_function)(s2, filter->data)); + goto retry; + } + } + + /* check for major japanese chars: U+4E00 - U+9FFF */ + if (s1 <= 0) { + for (k = 0; k < uni2jis_tbl_len; k++) { + if (c >= uni2jis_tbl_range[k][0] && c <= uni2jis_tbl_range[k][1]) { + s1 = uni2jis_tbl[k][c-uni2jis_tbl_range[k][0]]; + break; + } + } + } + + /* check for japanese chars in compressed mapping area: U+1E00 - U+4DBF */ + if (s1 <= 0 && c >= ucs_c1_jisx0213_min && c <= ucs_c1_jisx0213_max) { + k = mbfl_bisec_srch(c, ucs_c1_jisx0213_tbl, ucs_c1_jisx0213_tbl_len); + if (k >= 0) { + s1 = ucs_c1_jisx0213_ofst[k] + c - ucs_c1_jisx0213_tbl[2*k]; + } + } + + /* check for japanese chars in CJK Unified Ideographs ext.B (U+2XXXX) */ + if (s1 <= 0 && c >= jisx0213_u5_tbl_min && c <= jisx0213_u5_tbl_max) { + k = mbfl_bisec_srch2(c - 0x20000, jisx0213_u5_jis_key, jisx0213_u5_tbl_len); + if (k >= 0) { + s1 = jisx0213_u5_jis_tbl[k]; + } + } + + if (s1 <= 0) { + /* CJK Compatibility Forms: U+FE30 - U+FE4F */ + if (c == 0xfe45) { + s1 = 0x233e; + } else if (c == 0xfe46) { + s1 = 0x233d; + } else if (c >= 0xf91d && c <= 0xf9dc) { + /* CJK Compatibility Ideographs: U+F900 - U+F92A */ + k = mbfl_bisec_srch2(c, ucs_r2b_jisx0213_cmap_key, ucs_r2b_jisx0213_cmap_len); + if (k >= 0) { + s1 = ucs_r2b_jisx0213_cmap_val[k]; + } + } + } + + if (s1 <= 0) { + if (c == 0) { + s1 = 0; + } else { + s1 = -1; + } + } + + if (s1 >= 0) { + if (s1 < 0x80) { /* ASCII */ + if (filter->to->no_encoding == mbfl_no_encoding_2022jp_2004 && (filter->status & 0xff00)) { + CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ + CK((*filter->output_function)('(', filter->data)); + CK((*filter->output_function)('B', filter->data)); + } + filter->status = 0; + CK((*filter->output_function)(s1, filter->data)); + } else if (s1 < 0x100) { /* latin or kana */ + if (filter->to->no_encoding == mbfl_no_encoding_eucjp2004) { + CK((*filter->output_function)(0x8e, filter->data)); + CK((*filter->output_function)(s1, filter->data)); + } else if (filter->to->no_encoding == mbfl_no_encoding_sjis2004 && (s1 >= 0xA1 && s1 <= 0xDF)) { + CK((*filter->output_function)(s1, filter->data)); + } else { + CK(mbfl_filt_conv_illegal_output(c, filter)); + } + } else if (s1 < 0x7f00) { /* X 0213 plane 1 */ + if (filter->to->no_encoding == mbfl_no_encoding_sjis2004) { + c1 = (s1 >> 8) & 0xff; + c2 = s1 & 0xff; + SJIS_ENCODE(c1, c2, s1, s2); + } else if (filter->to->no_encoding == mbfl_no_encoding_eucjp2004) { + s2 = (s1 & 0xff) + 0x80; + s1 = ((s1 >> 8) & 0xff) + 0x80; + } else { + if ((filter->status & 0xff00) != 0x200) { + CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ + CK((*filter->output_function)('$', filter->data)); + CK((*filter->output_function)('(', filter->data)); + CK((*filter->output_function)('Q', filter->data)); + } + filter->status = 0x200; + s2 = s1 & 0xff; + s1 = (s1 >> 8) & 0xff; + } + CK((*filter->output_function)(s1, filter->data)); + CK((*filter->output_function)(s2, filter->data)); + } else { /* X 0213 plane 2 */ + if (filter->to->no_encoding == mbfl_no_encoding_sjis2004) { + c1 = (s1 >> 8) & 0xff; + c2 = s1 & 0xff; + SJIS_ENCODE(c1, c2, s1, s2); + } else { + s2 = s1 & 0xff; + k = ((s1 >> 8) & 0xff) - 0x7f; + if (k >= 0 && k < jisx0213_p2_ofst_len) { + s1 = jisx0213_p2_ofst[k] + 0x21; + } + if (filter->to->no_encoding == mbfl_no_encoding_eucjp2004) { + s2 |= 0x80; + s1 |= 0x80; + CK((*filter->output_function)(0x8f, filter->data)); + } else { + if ((filter->status & 0xff00) != 0x200) { + CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ + CK((*filter->output_function)('$', filter->data)); + CK((*filter->output_function)('(', filter->data)); + CK((*filter->output_function)('P', filter->data)); + } + filter->status = 0x200; + } + } + + CK((*filter->output_function)(s1, filter->data)); + CK((*filter->output_function)(s2, filter->data)); + } + } else { + CK(mbfl_filt_conv_illegal_output(c, filter)); + } + + return 0; +} + +static int mbfl_filt_conv_wchar_jis2004_flush(mbfl_convert_filter *filter) +{ + int k, c1, c2, s1, s2; + + k = filter->cache; + filter->cache = 0; + + if (filter->status == 1 && k >= 0 && k <= jisx0213_u2_tbl_len) { + s1 = jisx0213_u2_fb_tbl[k]; + + if (filter->to->no_encoding == mbfl_no_encoding_sjis2004) { + c1 = (s1 >> 8) & 0xff; + c2 = s1 & 0xff; + SJIS_ENCODE(c1, c2, s1, s2); + } else if (filter->to->no_encoding == mbfl_no_encoding_eucjp2004) { + s2 = (s1 & 0xff) | 0x80; + s1 = ((s1 >> 8) & 0xff) | 0x80; + } else { + s2 = s1 & 0x7f; + s1 = (s1 >> 8) & 0x7f; + if ((filter->status & 0xff00) != 0x200) { + CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ + CK((*filter->output_function)('$', filter->data)); + CK((*filter->output_function)('(', filter->data)); + CK((*filter->output_function)('Q', filter->data)); + } + filter->status = 0x200; + } + + CK((*filter->output_function)(s1, filter->data)); + CK((*filter->output_function)(s2, filter->data)); + } + + /* If we had switched to a different charset, go back to ASCII mode + * This makes it possible to concatenate arbitrary valid strings + * together and get a valid string */ + if (filter->status & 0xff00) { + CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ + CK((*filter->output_function)('(', filter->data)); + CK((*filter->output_function)('B', filter->data)); + } + + filter->status = 0; + + if (filter->flush_function) { + return (*filter->flush_function)(filter->data); + } + + return 0; +} + +#define ASCII 0 +#define JISX0208 1 +#define JISX0213_PLANE1 2 +#define JISX0213_PLANE2 3 + +static size_t mb_iso2022jp2004_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) +{ + unsigned char *p = *in, *e = p + *in_len; + uint32_t *out = buf, *limit = buf + bufsize - 1; + + while (p < e && out < limit) { + unsigned char c = *p++; + + if (c <= 0x7F) { + if (c == 0x1B) { + if ((e - p) < 2) { + *out++ = MBFL_BAD_INPUT; + p = e; + break; + } + unsigned char c2 = *p++; + unsigned char c3 = *p++; + if (c2 == '$') { + if (c3 == 'B') { + *state = JISX0208; + } else if (c3 == '(') { + if (p == e) { + *out++ = MBFL_BAD_INPUT; + break; + } + unsigned char c4 = *p++; + if (c4 == 'Q') { + *state = JISX0213_PLANE1; + } else if (c4 == 'P') { + *state = JISX0213_PLANE2; + } else { + *out++ = MBFL_BAD_INPUT; + } + } else { + *out++ = MBFL_BAD_INPUT; + } + } else if (c2 == '(') { + if (c3 == 'B') { + *state = ASCII; + } else { + *out++ = MBFL_BAD_INPUT; + } + } else { + p--; + *out++ = MBFL_BAD_INPUT; + } + } else if (*state >= JISX0208 && c > 0x20 && c < 0x7F) { + if (p == e) { + *out++ = MBFL_BAD_INPUT; + break; + } + unsigned char c2 = *p++; + if (c2 < 0x21 || c2 > 0x7E) { + *out++ = MBFL_BAD_INPUT; + continue; + } + + if (*state == JISX0213_PLANE1) { + unsigned int w1 = (c << 8) | c2; + + /* Conversion for combining characters */ + if ((w1 >= 0x2477 && w1 <= 0x2479) || (w1 >= 0x2479 && w1 <= 0x247B) || (w1 >= 0x2577 && w1 <= 0x257E) || w1 == 0x2678 || w1 == 0x2B44 || (w1 >= 0x2B48 && w1 <= 0x2B4F) || (w1 >= 0x2B65 && w1 <= 0x2B66)) { + int k = mbfl_bisec_srch2(w1, jisx0213_u2_key, jisx0213_u2_tbl_len); + if (k >= 0) { + *out++ = jisx0213_u2_tbl[2*k]; + *out++ = jisx0213_u2_tbl[2*k+1]; + continue; + } + } + + /* Conversion for BMP */ + uint32_t w = 0; + w1 = (c - 0x21)*94 + c2 - 0x21; + if (w1 < jisx0213_ucs_table_size) { + w = jisx0213_ucs_table[w1]; + } + + /* Conversion for CJK Unified Ideographs ext.B (U+2XXXX) */ + if (!w) { + int k = mbfl_bisec_srch2((c << 8) | c2, jisx0213_jis_u5_key, jisx0213_u5_tbl_len); + if (k >= 0) { + w = jisx0213_jis_u5_tbl[k] + 0x20000; + } + } + + *out++ = w ? w : MBFL_BAD_INPUT; + } else if (*state == JISX0213_PLANE2) { + + unsigned int s1 = c - 0x21, s2 = c2 - 0x21; + + if (((s1 <= 4 && s1 != 1) || s1 == 7 || (s1 >= 11 && s1 <= 14) || (s1 >= 77 && s1 < 94)) && s2 < 94) { + int k; + for (k = 0; k < jisx0213_p2_ofst_len; k++) { + if (s1 == jisx0213_p2_ofst[k]) { + break; + } + } + k -= jisx0213_p2_ofst[k]; + + /* Check for Japanese chars in BMP */ + unsigned int s = (s1 + 94 + k)*94 + s2; + ZEND_ASSERT(s < jisx0213_ucs_table_size); + uint32_t w = jisx0213_ucs_table[s]; + + /* Check for Japanese chars in CJK Unified Ideographs ext B (U+2XXXX) */ + if (!w) { + k = mbfl_bisec_srch2(((c + k + 94) << 8) | c2, jisx0213_jis_u5_key, jisx0213_u5_tbl_len); + if (k >= 0) { + w = jisx0213_jis_u5_tbl[k] + 0x20000; + } + } + + *out++ = w ? w : MBFL_BAD_INPUT; + } else { + *out++ = MBFL_BAD_INPUT; + } + } else { /* state == JISX0208 */ + unsigned int s = (c - 0x21)*94 + c2 - 0x21; + uint32_t w = 0; + if (s < jisx0208_ucs_table_size) { + w = jisx0208_ucs_table[s]; + } + *out++ = w ? w : MBFL_BAD_INPUT; + } + } else { + *out++ = c; + } + } else { + *out++ = MBFL_BAD_INPUT; + } + } + + *in_len = e - p; + *in = p; + return out - buf; +} + +static void mb_wchar_to_iso2022jp2004(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) +{ + unsigned char *out, *limit; + MB_CONVERT_BUF_LOAD(buf, out, limit); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len); + + uint32_t w; + if (buf->state & 0xFF00) { + int k = (buf->state >> 8) - 1; + w = jisx0213_u2_tbl[2*k]; + buf->state &= 0xFF; + goto process_codepoint; + } + + while (len--) { + w = *in++; +process_codepoint: ; + unsigned int s = 0; + + if (w == 0xE6 || (w >= 0x254 && w <= 0x2E9) || (w >= 0x304B && w <= 0x3053) || (w >= 0x30AB && w <= 0x30C8) || w == 0x31F7) { + for (int k = 0; k < jisx0213_u2_tbl_len; k++) { + if (w == jisx0213_u2_tbl[2*k]) { + if (!len) { + if (!end) { + buf->state |= (k+1) << 8; + MB_CONVERT_BUF_STORE(buf, out, limit); + return; + } + } else { + uint32_t w2 = *in++; len--; + if ((w == 0x254 || w == 0x28C || w == 0x259 || w == 0x25A) && w2 == 0x301) { + k++; + } + if (w2 == jisx0213_u2_tbl[2*k+1]) { + s = jisx0213_u2_key[k]; + break; + } + in--; len++; + } + + s = jisx0213_u2_fb_tbl[k]; + break; + } + } + } + + /* Check for major Japanese chars: U+4E00-U+9FFF */ + if (!s) { + for (int k = 0; k < uni2jis_tbl_len; k++) { + if (w >= uni2jis_tbl_range[k][0] && w <= uni2jis_tbl_range[k][1]) { + s = uni2jis_tbl[k][w - uni2jis_tbl_range[k][0]]; + break; + } + } + } + + /* Check for Japanese chars in compressed mapping area: U+1E00-U+4DBF */ + if (!s && w >= ucs_c1_jisx0213_min && w <= ucs_c1_jisx0213_max) { + int k = mbfl_bisec_srch(w, ucs_c1_jisx0213_tbl, ucs_c1_jisx0213_tbl_len); + if (k >= 0) { + s = ucs_c1_jisx0213_ofst[k] + w - ucs_c1_jisx0213_tbl[2*k]; + } + } + + /* Check for Japanese chars in CJK Unified Ideographs ext B (U+2XXXX) */ + if (!s && w >= jisx0213_u5_tbl_min && w <= jisx0213_u5_tbl_max) { + int k = mbfl_bisec_srch2(w - 0x20000, jisx0213_u5_jis_key, jisx0213_u5_tbl_len); + if (k >= 0) { + s = jisx0213_u5_jis_tbl[k]; + } + } + + if (!s) { + /* CJK Compatibility Forms: U+FE30-U+FE4F */ + if (w == 0xFE45) { + s = 0x233E; + } else if (w == 0xFE46) { + s = 0x233D; + } else if (w >= 0xF91D && w <= 0xF9DC) { + /* CJK Compatibility Ideographs: U+F900-U+F92A */ + int k = mbfl_bisec_srch2(w, ucs_r2b_jisx0213_cmap_key, ucs_r2b_jisx0213_cmap_len); + if (k >= 0) { + s = ucs_r2b_jisx0213_cmap_val[k]; + } + } + } + + if (!s && w) { + MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_iso2022jp2004); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len); + } else if (s <= 0x7F) { + if (buf->state != ASCII) { + MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 4); + out = mb_convert_buf_add3(out, 0x1B, '(', 'B'); + buf->state = ASCII; + } + out = mb_convert_buf_add(out, s); + } else if (s <= 0xFF) { + MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_iso2022jp2004); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len); + } else if (s <= 0x7EFF) { + if (buf->state != JISX0213_PLANE1) { + MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 6); + out = mb_convert_buf_add4(out, 0x1B, '$', '(', 'Q'); + buf->state = JISX0213_PLANE1; + } else { + MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2); + } + out = mb_convert_buf_add2(out, (s >> 8) & 0xFF, s & 0xFF); + } else { + if (buf->state != JISX0213_PLANE2) { + MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 6); + out = mb_convert_buf_add4(out, 0x1B, '$', '(', 'P'); + buf->state = JISX0213_PLANE2; + } else { + MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2); + } + unsigned int s2 = s & 0xFF; + int k = ((s >> 8) & 0xFF) - 0x7F; + ZEND_ASSERT(k < jisx0213_p2_ofst_len); + s = jisx0213_p2_ofst[k] + 0x21; + out = mb_convert_buf_add2(out, s, s2); + } + } + + if (end && buf->state != ASCII) { + MB_CONVERT_BUF_ENSURE(buf, out, limit, 3); + out = mb_convert_buf_add3(out, 0x1B, '(', 'B'); + } + + MB_CONVERT_BUF_STORE(buf, out, limit); +} + +static int mbfl_filt_conv_cp5022x_wchar(int c, mbfl_convert_filter *filter) +{ + int c1, s, w; + +retry: + switch (filter->status & 0xf) { +/* case 0x00: ASCII */ +/* case 0x10: X 0201 latin */ +/* case 0x20: X 0201 kana */ +/* case 0x80: X 0208 */ +/* case 0x90: X 0212 */ + case 0: + if (c == 0x1b) { + filter->status += 2; + } else if (c == 0x0e) { /* "kana in" */ + filter->status = 0x20; + } else if (c == 0x0f) { /* "kana out" */ + filter->status = 0; + } else if (filter->status == 0x10 && c == 0x5c) { /* YEN SIGN */ + CK((*filter->output_function)(0xa5, filter->data)); + } else if (filter->status == 0x10 && c == 0x7e) { /* OVER LINE */ + CK((*filter->output_function)(0x203e, filter->data)); + } else if (filter->status == 0x20 && c > 0x20 && c < 0x60) { /* kana */ + CK((*filter->output_function)(0xff40 + c, filter->data)); + } else if ((filter->status == 0x80 || filter->status == 0x90) && c > 0x20 && c <= 0x97) { /* kanji first char */ + filter->cache = c; + filter->status += 1; + } else if (c >= 0 && c < 0x80) { /* latin, CTLs */ + CK((*filter->output_function)(c, filter->data)); + } else if (c > 0xa0 && c < 0xe0) { /* GR kana */ + CK((*filter->output_function)(0xfec0 + c, filter->data)); + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + +/* case 0x81: X 0208 second char */ +/* case 0x91: X 0212 second char */ + case 1: + filter->status &= ~0xf; + c1 = filter->cache; + if (c > 0x20 && c < 0x7f) { + s = (c1 - 0x21)*94 + c - 0x21; + if (filter->status == 0x80) { + if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) { + w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min]; + } else if (s >= 0 && s < jisx0208_ucs_table_size) { + w = jisx0208_ucs_table[s]; + } else if (s >= cp932ext2_ucs_table_min && s < cp932ext2_ucs_table_max) { + w = cp932ext2_ucs_table[s - cp932ext2_ucs_table_min]; + } else if (s >= cp932ext3_ucs_table_min && s < cp932ext3_ucs_table_max) { + w = cp932ext3_ucs_table[s - cp932ext3_ucs_table_min]; + } else if (s >= 94 * 94 && s < 114 * 94) { + /* user-defined => PUA (Microsoft extended) */ + w = s - 94*94 + 0xe000; + } else { + w = 0; + } + + if (w <= 0) { + w = MBFL_BAD_INPUT; + } + } else { + if (s >= 0 && s < jisx0212_ucs_table_size) { + w = jisx0212_ucs_table[s]; + } else { + w = 0; + } + + if (w <= 0) { + w = MBFL_BAD_INPUT; + } + } + CK((*filter->output_function)(w, filter->data)); + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + /* ESC */ +/* case 0x02: */ +/* case 0x12: */ +/* case 0x22: */ +/* case 0x82: */ +/* case 0x92: */ + case 2: + if (c == 0x24) { /* '$' */ + filter->status++; + } else if (c == 0x28) { /* '(' */ + filter->status += 3; + } else { + filter->status &= ~0xf; + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + goto retry; + } + break; + + /* ESC $ */ +/* case 0x03: */ +/* case 0x13: */ +/* case 0x23: */ +/* case 0x83: */ +/* case 0x93: */ + case 3: + if (c == 0x40 || c == 0x42) { /* '@' or 'B' */ + filter->status = 0x80; + } else if (c == 0x28) { /* '(' */ + filter->status++; + } else { + filter->status &= ~0xf; + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + CK((*filter->output_function)(0x24, filter->data)); + goto retry; + } + break; + + /* ESC $ ( */ +/* case 0x04: */ +/* case 0x14: */ +/* case 0x24: */ +/* case 0x84: */ +/* case 0x94: */ + case 4: + if (c == 0x40 || c == 0x42) { /* '@' or 'B' */ + filter->status = 0x80; + } else if (c == 0x44) { /* 'D' */ + filter->status = 0x90; + } else { + filter->status &= ~0xf; + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + CK((*filter->output_function)(0x24, filter->data)); + CK((*filter->output_function)(0x28, filter->data)); + goto retry; + } + break; + + /* ESC ( */ +/* case 0x05: */ +/* case 0x15: */ +/* case 0x25: */ +/* case 0x85: */ +/* case 0x95: */ + case 5: + if (c == 0x42 || c == 0x48) { /* 'B' or 'H' */ + filter->status = 0; + } else if (c == 0x4a) { /* 'J' */ + filter->status = 0x10; + } else if (c == 0x49) { /* 'I' */ + filter->status = 0x20; + } else { + filter->status &= ~0xf; + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + CK((*filter->output_function)(0x28, filter->data)); + goto retry; + } + break; + + EMPTY_SWITCH_DEFAULT_CASE(); + } + + return 0; +} + +static int mbfl_filt_conv_cp5022x_wchar_flush(mbfl_convert_filter *filter) +{ + if (filter->status & 0xF) { + /* 2-byte (JIS X 0208 or 0212) character was truncated, or else + * escape sequence was truncated */ + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + filter->status = 0; + + if (filter->flush_function) { + (*filter->flush_function)(filter->data); + } + + return 0; +} + +/* Apply various transforms to input codepoint, such as converting halfwidth katakana + * to fullwidth katakana. `mode` is a bitfield which controls which transforms are + * actually performed. The bit values are defined in translit_kana_jisx0201_jisx0208.h. + * `mode` must not call for transforms which are inverses (i.e. which would cancel + * each other out). + * + * In some cases, successive input codepoints may be merged into one output codepoint. + * (That is the purpose of the `next` parameter.) If the `next` codepoint is consumed + * and should be skipped over, `*consumed` will be set to true. Otherwise, `*consumed` + * will not be modified. If there is no following codepoint, `next` should be zero. + * + * Again, in some cases, one input codepoint may convert to two output codepoints. + * If so, the second output codepoint will be stored in `*second`. + * + * Return the resulting codepoint. If none of the requested transforms apply, return + * the input codepoint unchanged. + */ +uint32_t mb_convert_kana_codepoint(uint32_t c, uint32_t next, bool *consumed, uint32_t *second, unsigned int mode) +{ + if ((mode & MBFL_HAN2ZEN_ALL) && c >= 0x21 && c <= 0x7D && c != '"' && c != '\'' && c != '\\') { + return c + 0xFEE0; + } + if ((mode & MBFL_HAN2ZEN_ALPHA) && ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'))) { + return c + 0xFEE0; + } + if ((mode & MBFL_HAN2ZEN_NUMERIC) && c >= '0' && c <= '9') { + return c + 0xFEE0; + } + if ((mode & MBFL_HAN2ZEN_SPACE) && c == ' ') { + return 0x3000; + } + + if (mode & (MBFL_HAN2ZEN_KATAKANA | MBFL_HAN2ZEN_HIRAGANA)) { + /* Convert Hankaku kana to Zenkaku kana + * Either all Hankaku kana (including katakana and hiragana) will be converted + * to Zenkaku katakana, or to Zenkaku hiragana */ + if ((mode & MBFL_HAN2ZEN_KATAKANA) && (mode & MBFL_HAN2ZEN_GLUE)) { + if (c >= 0xFF61 && c <= 0xFF9F) { + int n = c - 0xFF60; + + if (next >= 0xFF61 && next <= 0xFF9F) { + if (next == 0xFF9E && ((n >= 22 && n <= 36) || (n >= 42 && n <= 46))) { + *consumed = true; + return 0x3001 + hankana2zenkana_table[n]; + } + if (next == 0xFF9E && n == 19) { + *consumed = true; + return 0x30F4; + } + if (next == 0xFF9F && n >= 42 && n <= 46) { + *consumed = true; + return 0x3002 + hankana2zenkana_table[n]; + } + } + + return 0x3000 + hankana2zenkana_table[n]; + } + } + if ((mode & MBFL_HAN2ZEN_HIRAGANA) && (mode & MBFL_HAN2ZEN_GLUE)) { + if (c >= 0xFF61 && c <= 0xFF9F) { + int n = c - 0xFF60; + + if (next >= 0xFF61 && next <= 0xFF9F) { + if (next == 0xFF9E && ((n >= 22 && n <= 36) || (n >= 42 && n <= 46))) { + *consumed = true; + return 0x3001 + hankana2zenhira_table[n]; + } + if (next == 0xFF9F && n >= 42 && n <= 46) { + *consumed = true; + return 0x3002 + hankana2zenhira_table[n]; + } + } + + return 0x3000 + hankana2zenhira_table[n]; + } + } + if ((mode & MBFL_HAN2ZEN_KATAKANA) && c >= 0xFF61 && c <= 0xFF9F) { + return 0x3000 + hankana2zenkana_table[c - 0xFF60]; + } + if ((mode & MBFL_HAN2ZEN_HIRAGANA) && c >= 0xFF61 && c <= 0xFF9F) { + return 0x3000 + hankana2zenhira_table[c - 0xFF60]; + } + } + + if (mode & MBFL_HAN2ZEN_SPECIAL) { /* special ascii to symbol */ + if (c == '\\' || c == 0xA5) { /* YEN SIGN */ + return 0xFFE5; /* FULLWIDTH YEN SIGN */ + } + if (c == 0x7E || c == 0x203E) { + return 0xFFE3; /* FULLWIDTH MACRON */ + } + if (c == '\'') { + return 0x2019; /* RIGHT SINGLE QUOTATION MARK */ + } + if (c == '"') { + return 0x201D; /* RIGHT DOUBLE QUOTATION MARK */ + } + } + + if (mode & (MBFL_ZEN2HAN_ALL | MBFL_ZEN2HAN_ALPHA | MBFL_ZEN2HAN_NUMERIC | MBFL_ZEN2HAN_SPACE)) { + /* Zenkaku to Hankaku */ + if ((mode & MBFL_ZEN2HAN_ALL) && c >= 0xFF01 && c <= 0xFF5D && c != 0xFF02 && c != 0xFF07 && c != 0xFF3C) { + /* all except " ' \ ~ */ + return c - 0xFEE0; + } + if ((mode & MBFL_ZEN2HAN_ALPHA) && ((c >= 0xFF21 && c <= 0xFF3A) || (c >= 0xFF41 && c <= 0xFF5A))) { + return c - 0xFEE0; + } + if ((mode & MBFL_ZEN2HAN_NUMERIC) && (c >= 0xFF10 && c <= 0xFF19)) { + return c - 0xFEE0; + } + if ((mode & MBFL_ZEN2HAN_SPACE) && (c == 0x3000)) { + return ' '; + } + if ((mode & MBFL_ZEN2HAN_ALL) && (c == 0x2212)) { /* MINUS SIGN */ + return '-'; + } + } + + if (mode & (MBFL_ZEN2HAN_KATAKANA | MBFL_ZEN2HAN_HIRAGANA)) { + /* Zenkaku kana to hankaku kana */ + if ((mode & MBFL_ZEN2HAN_KATAKANA) && c >= 0x30A1 && c <= 0x30F4) { + /* Zenkaku katakana to hankaku kana */ + int n = c - 0x30A1; + if (zenkana2hankana_table[n][1]) { + *second = 0xFF00 + zenkana2hankana_table[n][1]; + } + return 0xFF00 + zenkana2hankana_table[n][0]; + } + if ((mode & MBFL_ZEN2HAN_HIRAGANA) && c >= 0x3041 && c <= 0x3093) { + /* Zenkaku hiragana to hankaku kana */ + int n = c - 0x3041; + if (zenkana2hankana_table[n][1]) { + *second = 0xFF00 + zenkana2hankana_table[n][1]; + } + return 0xFF00 + zenkana2hankana_table[n][0]; + } + if (c == 0x3001) { + return 0xFF64; /* HALFWIDTH IDEOGRAPHIC COMMA */ + } + if (c == 0x3002) { + return 0xFF61; /* HALFWIDTH IDEOGRAPHIC FULL STOP */ + } + if (c == 0x300C) { + return 0xFF62; /* HALFWIDTH LEFT CORNER BRACKET */ + } + if (c == 0x300D) { + return 0xFF63; /* HALFWIDTH RIGHT CORNER BRACKET */ + } + if (c == 0x309B) { + return 0xFF9E; /* HALFWIDTH KATAKANA VOICED SOUND MARK */ + } + if (c == 0x309C) { + return 0xff9f; /* HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK */ + } + if (c == 0x30FC) { + return 0xFF70; /* HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK */ + } + if (c == 0x30FB) { + return 0xFF65; /* HALFWIDTH KATAKANA MIDDLE DOT */ + } + } + + if (mode & (MBFL_ZENKAKU_HIRA2KATA | MBFL_ZENKAKU_KATA2HIRA)) { + if ((mode & MBFL_ZENKAKU_HIRA2KATA) && ((c >= 0x3041 && c <= 0x3093) || c == 0x309D || c == 0x309E)) { + /* Zenkaku hiragana to Zenkaku katakana */ + return c + 0x60; + } + if ((mode & MBFL_ZENKAKU_KATA2HIRA) && ((c >= 0x30A1 && c <= 0x30F3) || c == 0x30FD || c == 0x30FE)) { + /* Zenkaku katakana to Zenkaku hiragana */ + return c - 0x60; + } + } + + if (mode & MBFL_ZEN2HAN_SPECIAL) { /* special symbol to ascii */ + if (c == 0xFFE5 || c == 0xFF3C) { /* FULLWIDTH YEN SIGN/FULLWIDTH REVERSE SOLIDUS */ + return '\\'; + } + if (c == 0xFFE3 || c == 0x203E) { /* FULLWIDTH MACRON/OVERLINE */ + return '~'; + } + if (c == 0x2018 || c == 0x2019) { /* LEFT/RIGHT SINGLE QUOTATION MARK*/ + return '\''; + } + if (c == 0x201C || c == 0x201D) { /* LEFT/RIGHT DOUBLE QUOTATION MARK */ + return '"'; + } + } + + return c; +} + +static int mbfl_filt_conv_wchar_cp50221(int c, mbfl_convert_filter *filter); + +static int mbfl_filt_conv_wchar_cp50220(int c, mbfl_convert_filter *filter) +{ + int mode = MBFL_HAN2ZEN_KATAKANA | MBFL_HAN2ZEN_GLUE; + bool consumed = false; + + if (filter->cache) { + int s = mb_convert_kana_codepoint(filter->cache, c, &consumed, NULL, mode); + filter->cache = consumed ? 0 : c; + /* Terrible hack to get CP50220 to emit error markers in the proper + * position, not reordering them with subsequent characters */ + filter->filter_function = mbfl_filt_conv_wchar_cp50221; + mbfl_filt_conv_wchar_cp50221(s, filter); + filter->filter_function = mbfl_filt_conv_wchar_cp50220; + if (c == 0 && !consumed) { + (*filter->output_function)(0, filter->data); + } + } else if (c == 0) { + /* This case has to be handled separately, since `filter->cache == 0` means + * no codepoint is cached */ + (*filter->output_function)(0, filter->data); + } else { + filter->cache = c; + } + + return 0; +} + +static int mbfl_filt_conv_any_jis_flush(mbfl_convert_filter *filter) +{ + /* back to latin */ + if ((filter->status & 0xff00) != 0) { + CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ + CK((*filter->output_function)(0x28, filter->data)); /* '(' */ + CK((*filter->output_function)(0x42, filter->data)); /* 'B' */ + } + filter->status = 0; + + if (filter->flush_function != NULL) { + return (*filter->flush_function)(filter->data); + } + + return 0; +} + +static int mbfl_filt_conv_wchar_cp50220_flush(mbfl_convert_filter *filter) +{ + int mode = MBFL_HAN2ZEN_KATAKANA | MBFL_HAN2ZEN_GLUE; + + if (filter->cache) { + int s = mb_convert_kana_codepoint(filter->cache, 0, NULL, NULL, mode); + filter->filter_function = mbfl_filt_conv_wchar_cp50221; + mbfl_filt_conv_wchar_cp50221(s, filter); + filter->filter_function = mbfl_filt_conv_wchar_cp50220; + filter->cache = 0; + } + + return mbfl_filt_conv_any_jis_flush(filter); +} + +static int mbfl_filt_conv_wchar_cp50221(int c, mbfl_convert_filter *filter) +{ + int s = 0; + + if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) { + s = ucs_a1_jis_table[c - ucs_a1_jis_table_min]; + } else if (c == 0x203E) { /* OVERLINE */ + s = 0x1007E; /* Convert to JISX 0201 OVERLINE */ + } else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) { + s = ucs_a2_jis_table[c - ucs_a2_jis_table_min]; + } else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) { + s = ucs_i_jis_table[c - ucs_i_jis_table_min]; + } else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) { + s = ucs_r_jis_table[c - ucs_r_jis_table_min]; + } else if (c >= 0xE000 && c <= 0xE757) { + /* 'private'/'user' codepoints */ + s = c - 0xE000; + s = ((s / 94) + 0x7F) << 8 | ((s % 94) + 0x21); + } + + if (s <= 0) { + if (c == 0xa5) { /* YEN SIGN */ + s = 0x1005c; + } else if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */ + s = 0x2140; + } else if (c == 0x2225) { /* PARALLEL TO */ + s = 0x2142; + } else if (c == 0xff0d) { /* FULLWIDTH HYPHEN-MINUS */ + s = 0x215d; + } else if (c == 0xffe0) { /* FULLWIDTH CENT SIGN */ + s = 0x2171; + } else if (c == 0xffe1) { /* FULLWIDTH POUND SIGN */ + s = 0x2172; + } else if (c == 0xffe2) { /* FULLWIDTH NOT SIGN */ + s = 0x224c; + } + } + + /* Above, we do a series of lookups in `ucs_*_jis_table` to find a + * corresponding kuten code for this Unicode codepoint + * If we get zero, that means the codepoint is not in JIS X 0208 + * On the other hand, if we get a result with the high bits set on both + * upper and lower bytes, that is not a code in JIS X 0208 but rather + * in JIS X 0213 + * In either case, check if this codepoint is one of the extensions added + * to JIS X 0208 by MicroSoft (to make CP932) */ + if (s == 0 || ((s & 0x8000) && (s & 0x80))) { + int i; + s = -1; + + for (i = 0; + i < cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; + i++) { + const int oh = cp932ext1_ucs_table_min / 94; + + if (c == cp932ext1_ucs_table[i]) { + s = ((i / 94 + oh + 0x21) << 8) + (i % 94 + 0x21); + break; + } + } + + if (s < 0) { + const int oh = cp932ext2_ucs_table_min / 94; + const int cp932ext2_ucs_table_size = + cp932ext2_ucs_table_max - cp932ext2_ucs_table_min; + for (i = 0; i < cp932ext2_ucs_table_size; i++) { + if (c == cp932ext2_ucs_table[i]) { + s = ((i / 94 + oh + 0x21) << 8) + (i % 94 + 0x21); + break; + } + } + } + + if (c == 0) { + s = 0; + } else if (s <= 0) { + s = -1; + } + } + + if (s >= 0) { + if (s < 0x80) { /* ASCII */ + if ((filter->status & 0xff00) != 0) { + CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ + CK((*filter->output_function)(0x28, filter->data)); /* '(' */ + CK((*filter->output_function)(0x42, filter->data)); /* 'B' */ + filter->status = 0; + } + CK((*filter->output_function)(s, filter->data)); + } else if (s >= 0xa0 && s < 0xe0) { /* X 0201 kana */ + if ((filter->status & 0xff00) != 0x500) { + CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ + CK((*filter->output_function)(0x28, filter->data)); /* '(' */ + CK((*filter->output_function)(0x49, filter->data)); /* 'I' */ + filter->status = 0x500; + } + CK((*filter->output_function)(s - 0x80, filter->data)); + } else if (s <= 0x927E) { /* X 0208 + extensions */ + if ((filter->status & 0xff00) != 0x200) { + CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ + CK((*filter->output_function)(0x24, filter->data)); /* '$' */ + CK((*filter->output_function)(0x42, filter->data)); /* 'B' */ + filter->status = 0x200; + } + CK((*filter->output_function)((s >> 8) & 0xff, filter->data)); + CK((*filter->output_function)(s & 0xff, filter->data)); + } else if (s < 0x10000) { /* X0212 */ + CK(mbfl_filt_conv_illegal_output(c, filter)); + } else { /* X 0201 latin */ + if ((filter->status & 0xff00) != 0x400) { + CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ + CK((*filter->output_function)(0x28, filter->data)); /* '(' */ + CK((*filter->output_function)(0x4a, filter->data)); /* 'J' */ + } + filter->status = 0x400; + CK((*filter->output_function)(s & 0x7f, filter->data)); + } + } else { + CK(mbfl_filt_conv_illegal_output(c, filter)); + } + + return 0; +} + +static int mbfl_filt_conv_wchar_cp50222(int c, mbfl_convert_filter *filter) +{ + int s = 0; + + if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) { + s = ucs_a1_jis_table[c - ucs_a1_jis_table_min]; + } else if (c == 0x203E) { /* OVERLINE */ + s = 0x1007E; /* Convert to JISX 0201 OVERLINE */ + } else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) { + s = ucs_a2_jis_table[c - ucs_a2_jis_table_min]; + } else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) { + s = ucs_i_jis_table[c - ucs_i_jis_table_min]; + } else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) { + s = ucs_r_jis_table[c - ucs_r_jis_table_min]; + } else if (c >= 0xE000 && c <= 0xE757) { + /* 'private'/'user' codepoints */ + s = c - 0xE000; + s = ((s / 94) + 0x7F) << 8 | ((s % 94) + 0x21); + } + + if (s <= 0) { + if (c == 0xa5) { /* YEN SIGN */ + s = 0x1005c; + } else if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */ + s = 0x2140; + } else if (c == 0x2225) { /* PARALLEL TO */ + s = 0x2142; + } else if (c == 0xff0d) { /* FULLWIDTH HYPHEN-MINUS */ + s = 0x215d; + } else if (c == 0xffe0) { /* FULLWIDTH CENT SIGN */ + s = 0x2171; + } else if (c == 0xffe1) { /* FULLWIDTH POUND SIGN */ + s = 0x2172; + } else if (c == 0xffe2) { /* FULLWIDTH NOT SIGN */ + s = 0x224c; + } + } + if (s == 0 || ((s & 0x8000) && (s & 0x80))) { + int i; + s = -1; + + for (i = 0; + i < cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; i++) { + const int oh = cp932ext1_ucs_table_min / 94; + + if (c == cp932ext1_ucs_table[i]) { + s = ((i / 94 + oh + 0x21) << 8) + (i % 94 + 0x21); + break; + } + } + + if (s <= 0) { + const int oh = cp932ext2_ucs_table_min / 94; + const int cp932ext2_ucs_table_size = + cp932ext2_ucs_table_max - cp932ext2_ucs_table_min; + for (i = 0; i < cp932ext2_ucs_table_size; i++) { + if (c == cp932ext2_ucs_table[i]) { + s = ((i / 94 + oh + 0x21) << 8) + (i % 94 + 0x21); + break; + } + } + } + + if (c == 0) { + s = 0; + } else if (s <= 0) { + s = -1; + } + } + + if (s >= 0) { + if (s < 0x80) { /* ASCII */ + if ((filter->status & 0xff00) == 0x500) { + CK((*filter->output_function)(0x0f, filter->data)); /* SO */ + filter->status = 0; + } else if ((filter->status & 0xff00) != 0) { + CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ + CK((*filter->output_function)(0x28, filter->data)); /* '(' */ + CK((*filter->output_function)(0x42, filter->data)); /* 'B' */ + filter->status = 0; + } + CK((*filter->output_function)(s, filter->data)); + } else if (s >= 0xa0 && s < 0xe0) { /* X 0201 kana */ + if ((filter->status & 0xff00) != 0x500) { + CK((*filter->output_function)(0x0e, filter->data)); /* SI */ + filter->status = 0x500; + } + CK((*filter->output_function)(s - 0x80, filter->data)); + } else if (s <= 0x927E) { /* X 0208 */ + if ((filter->status & 0xff00) == 0x500) { + CK((*filter->output_function)(0x0f, filter->data)); /* SO */ + filter->status = 0; + } + if ((filter->status & 0xff00) != 0x200) { + CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ + CK((*filter->output_function)(0x24, filter->data)); /* '$' */ + CK((*filter->output_function)(0x42, filter->data)); /* 'B' */ + filter->status = 0x200; + } + CK((*filter->output_function)((s >> 8) & 0xff, filter->data)); + CK((*filter->output_function)(s & 0xff, filter->data)); + } else if (s < 0x10000) { /* X0212 */ + CK(mbfl_filt_conv_illegal_output(c, filter)); + } else { /* X 0201 latin */ + if ((filter->status & 0xff00) == 0x500) { + CK((*filter->output_function)(0x0f, filter->data)); /* SO */ + filter->status = 0; + } + if ((filter->status & 0xff00) != 0x400) { + CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ + CK((*filter->output_function)(0x28, filter->data)); /* '(' */ + CK((*filter->output_function)(0x4a, filter->data)); /* 'J' */ + } + filter->status = 0x400; + CK((*filter->output_function)(s & 0x7f, filter->data)); + } + } else { + CK(mbfl_filt_conv_illegal_output(c, filter)); + } + + return 0; +} + +static int mbfl_filt_conv_wchar_cp50222_flush(mbfl_convert_filter *filter) +{ + /* back to latin */ + if ((filter->status & 0xff00) == 0x500) { + CK((*filter->output_function)(0x0f, filter->data)); /* SO */ + } else if ((filter->status & 0xff00) != 0) { + CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ + CK((*filter->output_function)(0x28, filter->data)); /* '(' */ + CK((*filter->output_function)(0x42, filter->data)); /* 'B' */ + } + filter->status = 0; + + if (filter->flush_function) { + (*filter->flush_function)(filter->data); + } + + return 0; +} + +#define ASCII 0 +#define JISX_0201_LATIN 1 +#define JISX_0201_KANA 2 +#define JISX_0208 3 +#define JISX_0212 4 + +static size_t mb_cp5022x_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) +{ + ZEND_ASSERT(bufsize >= 3); + + unsigned char *p = *in, *e = p + *in_len; + uint32_t *out = buf, *limit = buf + bufsize; + + while (p < e && out < limit) { + unsigned char c = *p++; + + if (c == 0x1B) { + /* Escape sequence */ + if ((e - p) < 2) { + *out++ = MBFL_BAD_INPUT; + /* Duplicate error-handling behavior of legacy code */ + if (p < e && (*p == '(' || *p == '$')) + p++; + continue; + } + unsigned char c2 = *p++; + if (c2 == '$') { + unsigned char c3 = *p++; + if (c3 == '@' || c3 == 'B') { + *state = JISX_0208; + } else if (c3 == '(') { + if (p == e) { + *out++ = MBFL_BAD_INPUT; + break; + } + unsigned char c4 = *p++; + if (c4 == '@' || c4 == 'B') { + *state = JISX_0208; + } else if (c4 == 'D') { + *state = JISX_0212; + } else { + if ((limit - out) < 3) { + p -= 4; + break; + } + *out++ = MBFL_BAD_INPUT; + *out++ = '$'; + *out++ = '('; + p--; + } + } else { + if ((limit - out) < 2) { + p -= 3; + break; + } + *out++ = MBFL_BAD_INPUT; + *out++ = '$'; + p--; + } + } else if (c2 == '(') { + unsigned char c3 = *p++; + if (c3 == 'B' || c3 == 'H') { + *state = ASCII; + } else if (c3 == 'J') { + *state = JISX_0201_LATIN; + } else if (c3 == 'I') { + *state = JISX_0201_KANA; + } else { + if ((limit - out) < 2) { + p -= 3; + break; + } + *out++ = MBFL_BAD_INPUT; + *out++ = '('; + p--; + } + } else { + *out++ = MBFL_BAD_INPUT; + p--; + } + } else if (c == 0xE) { + *state = JISX_0201_KANA; + } else if (c == 0xF) { + *state = ASCII; + } else if (*state == JISX_0201_LATIN && c == 0x5C) { /* YEN SIGN */ + *out++ = 0xA5; + } else if (*state == JISX_0201_LATIN && c == 0x7E) { /* OVER LINE */ + *out++ = 0x203E; + } else if (*state == JISX_0201_KANA && c > 0x20 && c < 0x60) { + *out++ = 0xFF40 + c; + } else if (*state >= JISX_0208 && c > 0x20 && c <= 0x97) { + if (p == e) { + *out++ = MBFL_BAD_INPUT; + break; + } + unsigned char c2 = *p++; + if (c2 > 0x20 && c2 < 0x7F) { + unsigned int s = (c - 0x21)*94 + c2 - 0x21; + uint32_t w = 0; + if (*state == JISX_0208) { + if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) { + w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min]; + } else if (s < jisx0208_ucs_table_size) { + w = jisx0208_ucs_table[s]; + } else if (s >= cp932ext2_ucs_table_min && s < cp932ext2_ucs_table_max) { + w = cp932ext2_ucs_table[s - cp932ext2_ucs_table_min]; + } else if (s >= cp932ext3_ucs_table_min && s < cp932ext3_ucs_table_max) { + w = cp932ext3_ucs_table[s - cp932ext3_ucs_table_min]; + } else if (s >= 94*94 && s < 114*94) { + /* MicroSoft extension */ + w = s - 94*94 + 0xE000; + } + if (!w) + w = MBFL_BAD_INPUT; + } else { + if (s < jisx0212_ucs_table_size) { + w = jisx0212_ucs_table[s]; + } + if (!w) + w = MBFL_BAD_INPUT; + } + *out++ = w; + } else { + *out++ = MBFL_BAD_INPUT; + } + } else if (c < 0x80) { + *out++ = c; + } else if (c >= 0xA1 && c <= 0xDF) { + *out++ = 0xFEC0 + c; + } else { + *out++ = MBFL_BAD_INPUT; + } + } + + *in_len = e - p; + *in = p; + return out - buf; +} + +static unsigned int lookup_wchar(uint32_t w) +{ + unsigned int s = 0; + + if (w >= ucs_a1_jis_table_min && w < ucs_a1_jis_table_max) { + s = ucs_a1_jis_table[w - ucs_a1_jis_table_min]; + } else if (w == 0x203E) { /* OVERLINE */ + s = 0x1007E; /* Convert to JISX 0201 OVERLINE */ + } else if (w >= ucs_a2_jis_table_min && w < ucs_a2_jis_table_max) { + s = ucs_a2_jis_table[w - ucs_a2_jis_table_min]; + } else if (w >= ucs_i_jis_table_min && w < ucs_i_jis_table_max) { + s = ucs_i_jis_table[w - ucs_i_jis_table_min]; + } else if (w >= ucs_r_jis_table_min && w < ucs_r_jis_table_max) { + s = ucs_r_jis_table[w - ucs_r_jis_table_min]; + } else if (w >= 0xE000 && w <= 0xE757) { + /* Private Use Area codepoints */ + s = w - 0xE000; + s = ((s / 94) + 0x7F) << 8 | ((s % 94) + 0x21); + } + + if (!s) { + if (w == 0xA5) { /* YEN SIGN */ + s = 0x1005C; + } else if (w == 0xFF3C) { /* FULLWIDTH REVERSE SOLIDUS */ + s = 0x2140; + } else if (w == 0x2225) { /* PARALLEL TO */ + s = 0x2142; + } else if (w == 0xFF0D) { /* FULLWIDTH HYPHEN-MINUS */ + s = 0x215D; + } else if (w == 0xFFE0) { /* FULLWIDTH CENT SIGN */ + s = 0x2171; + } else if (w == 0xFFE1) { /* FULLWIDTH POUND SIGN */ + s = 0x2172; + } else if (w == 0xFFE2) { /* FULLWIDTH NOT SIGN */ + s = 0x224C; + } else if (w == 0) { + return 0; + } + } + + /* Above, we do a series of lookups in `ucs_*_jis_table` to find a + * corresponding kuten code for this Unicode codepoint + * If we get zero, that means the codepoint is not in JIS X 0208 + * On the other hand, if we get a result with the high bits set on both + * upper and lower bytes, that is not a code in JIS X 0208 but rather + * in JIS X 0213 + * In either case, check if this codepoint is one of the extensions added + * to JIS X 0208 by MicroSoft (to make CP932) */ + if (!s || s >= 0x8080) { + for (int i = 0; i < cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; i++) { + if (w == cp932ext1_ucs_table[i]) { + return (((i / 94) + (cp932ext1_ucs_table_min / 94) + 0x21) << 8) + (i % 94) + 0x21; + } + } + + for (int i = 0; i < cp932ext2_ucs_table_max - cp932ext2_ucs_table_min; i++) { + if (w == cp932ext2_ucs_table[i]) { + return (((i / 94) + (cp932ext2_ucs_table_min / 94) + 0x21) << 8) + (i % 94) + 0x21; + } + } + } + + return s; +} + +static void mb_wchar_to_cp50221(uint32_t *in, size_t len, mb_convert_buf *buf, bool end); + +static void mb_wchar_to_cp50220(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) +{ + unsigned char *out, *limit; + MB_CONVERT_BUF_LOAD(buf, out, limit); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len); + + uint32_t w; + + if (buf->state & 0xFFFF00) { + /* Reprocess cached codepoint */ + w = buf->state >> 8; + buf->state &= 0xFF; + goto reprocess_codepoint; + } + + while (len--) { + w = *in++; +reprocess_codepoint: + + if (w >= 0xFF61 && w <= 0xFF9F && !len && !end) { + /* This codepoint may need to combine with the next one, + * but the 'next one' will come in a separate buffer */ + buf->state |= w << 8; + break; + } + + bool consumed = false; + w = mb_convert_kana_codepoint(w, len ? *in : 0, &consumed, NULL, MBFL_HAN2ZEN_KATAKANA | MBFL_HAN2ZEN_GLUE); + if (consumed) { + /* Two successive codepoints were converted into one */ + in++; len--; consumed = false; + } + + unsigned int s = lookup_wchar(w); + + if (!s && w) { + MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_cp50221); + } else if (s < 0x80) { + /* ASCII */ + MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 4); + if (buf->state != ASCII) { + out = mb_convert_buf_add3(out, 0x1B, '(', 'B'); + buf->state = ASCII; + } + out = mb_convert_buf_add(out, s); + } else if (s >= 0xA0 && s < 0xE0) { + /* JISX 0201 Kana */ + MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 4); + if (buf->state != JISX_0201_KANA) { + out = mb_convert_buf_add3(out, 0x1B, '(', 'I'); + buf->state = JISX_0201_KANA; + } + out = mb_convert_buf_add(out, s - 0x80); + } else if (s <= 0x927E) { + /* JISX 0208 Kanji */ + MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 5); + if (buf->state != JISX_0208) { + out = mb_convert_buf_add3(out, 0x1B, '$', 'B'); + buf->state = JISX_0208; + } + out = mb_convert_buf_add2(out, (s >> 8) & 0xFF, s & 0xFF); + } else if (s >= 0x10000) { + /* JISX 0201 Latin; we 'tag' these by adding 0x10000 */ + MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 4); + if (buf->state != JISX_0201_LATIN) { + out = mb_convert_buf_add3(out, 0x1B, '(', 'J'); + buf->state = JISX_0201_LATIN; + } + out = mb_convert_buf_add(out, s & 0x7F); + } else { + MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_cp50221); + } + } + + if (end && buf->state != ASCII) { + MB_CONVERT_BUF_ENSURE(buf, out, limit, 3); + out = mb_convert_buf_add3(out, 0x1B, '(', 'B'); + } + + MB_CONVERT_BUF_STORE(buf, out, limit); +} + +static void mb_wchar_to_cp50221(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) +{ + unsigned char *out, *limit; + MB_CONVERT_BUF_LOAD(buf, out, limit); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len); + + while (len--) { + uint32_t w = *in++; + unsigned int s = lookup_wchar(w); + + if (!s && w) { + MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_cp50221); + } else if (s < 0x80) { + /* ASCII */ + MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 4); + if (buf->state != ASCII) { + out = mb_convert_buf_add3(out, 0x1B, '(', 'B'); + buf->state = ASCII; + } + out = mb_convert_buf_add(out, s); + } else if (s >= 0xA0 && s < 0xE0) { + /* JISX 0201 Kana */ + MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 4); + if (buf->state != JISX_0201_KANA) { + out = mb_convert_buf_add3(out, 0x1B, '(', 'I'); + buf->state = JISX_0201_KANA; + } + out = mb_convert_buf_add(out, s - 0x80); + } else if (s <= 0x927E) { + /* JISX 0208 Kanji */ + MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 5); + if (buf->state != JISX_0208) { + out = mb_convert_buf_add3(out, 0x1B, '$', 'B'); + buf->state = JISX_0208; + } + out = mb_convert_buf_add2(out, (s >> 8) & 0xFF, s & 0xFF); + } else if (s >= 0x10000) { + /* JISX 0201 Latin; we 'tag' these by adding 0x10000 */ + MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 4); + if (buf->state != JISX_0201_LATIN) { + out = mb_convert_buf_add3(out, 0x1B, '(', 'J'); + buf->state = JISX_0201_LATIN; + } + out = mb_convert_buf_add(out, s & 0x7F); + } else { + MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_cp50221); + } + } + + if (end && buf->state != ASCII) { + MB_CONVERT_BUF_ENSURE(buf, out, limit, 3); + out = mb_convert_buf_add3(out, 0x1B, '(', 'B'); + } + + MB_CONVERT_BUF_STORE(buf, out, limit); +} + +static void mb_wchar_to_cp50222(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) +{ + unsigned char *out, *limit; + MB_CONVERT_BUF_LOAD(buf, out, limit); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len); + + while (len--) { + uint32_t w = *in++; + unsigned int s = lookup_wchar(w); + + if (!s && w) { + MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_cp50222); + } else if (s < 0x80) { + /* ASCII */ + MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 4); + if (buf->state == JISX_0201_KANA) { + out = mb_convert_buf_add(out, 0xF); + buf->state = ASCII; + } else if (buf->state != ASCII) { + out = mb_convert_buf_add3(out, 0x1B, '(', 'B'); + buf->state = ASCII; + } + out = mb_convert_buf_add(out, s); + } else if (s >= 0xA0 && s < 0xE0) { + /* JISX 0201 Kana */ + MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2); + if (buf->state != JISX_0201_KANA) { + out = mb_convert_buf_add(out, 0xE); + buf->state = JISX_0201_KANA; + } + out = mb_convert_buf_add(out, s - 0x80); + } else if (s <= 0x927E) { + /* JISX 0208 Kanji */ + MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 6); + if (buf->state == JISX_0201_KANA) { + out = mb_convert_buf_add(out, 0xF); + } + if (buf->state != JISX_0208) { + out = mb_convert_buf_add3(out, 0x1B, '$', 'B'); + buf->state = JISX_0208; + } + out = mb_convert_buf_add2(out, (s >> 8) & 0xFF, s & 0xFF); + } else if (s >= 0x10000) { + /* JISX 0201 Latin; we 'tag' these by adding 0x10000 */ + MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 5); + if (buf->state == JISX_0201_KANA) { + out = mb_convert_buf_add(out, 0xF); + } + if (buf->state != JISX_0201_LATIN) { + out = mb_convert_buf_add3(out, 0x1B, '(', 'J'); + buf->state = JISX_0201_LATIN; + } + out = mb_convert_buf_add(out, s & 0x7F); + } else { + MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_cp50222); + } + } + + if (end) { + if (buf->state == JISX_0201_KANA) { + MB_CONVERT_BUF_ENSURE(buf, out, limit, 1); + out = mb_convert_buf_add(out, 0xF); + } else if (buf->state != ASCII) { + MB_CONVERT_BUF_ENSURE(buf, out, limit, 3); + out = mb_convert_buf_add3(out, 0x1B, '(', 'B'); + } + } + + MB_CONVERT_BUF_STORE(buf, out, limit); +} + +#define ASCII 0 +#define JISX0201_KANA 0x20 +#define JISX0208_KANJI 0x80 +#define UDC 0xA0 + +static int mbfl_filt_conv_2022jpms_wchar(int c, mbfl_convert_filter *filter) +{ + int c1, s, w; + + switch (filter->status & 0xF) { + case 0: + if (c == 0x1B) { + filter->status += 2; + } else if (filter->status == JISX0201_KANA && c > 0x20 && c < 0x60) { + CK((*filter->output_function)(0xFF40 + c, filter->data)); + } else if ((filter->status == JISX0208_KANJI || filter->status == UDC) && c > 0x20 && c < 0x80) { + filter->cache = c; + filter->status += 1; + } else if (c >= 0 && c < 0x80) { /* ASCII */ + CK((*filter->output_function)(c, filter->data)); + } else if (c > 0xA0 && c < 0xE0) { /* Kana */ + CK((*filter->output_function)(0xFEC0 + c, filter->data)); + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + /* Kanji, second byte */ + case 1: + w = 0; + filter->status &= ~0xF; + c1 = filter->cache; + if (c > 0x20 && c < 0x7F) { + s = ((c1 - 0x21) * 94) + c - 0x21; + if (filter->status == JISX0208_KANJI) { + if (s <= 137) { + if (s == 31) { + w = 0xFF3C; /* FULLWIDTH REVERSE SOLIDUS */ + } else if (s == 32) { + w = 0xFF5E; /* FULLWIDTH TILDE */ + } else if (s == 33) { + w = 0x2225; /* PARALLEL TO */ + } else if (s == 60) { + w = 0xFF0D; /* FULLWIDTH HYPHEN-MINUS */ + } else if (s == 80) { + w = 0xFFE0; /* FULLWIDTH CENT SIGN */ + } else if (s == 81) { + w = 0xFFE1; /* FULLWIDTH POUND SIGN */ + } else if (s == 137) { + w = 0xFFE2; /* FULLWIDTH NOT SIGN */ + } + } + + if (w == 0) { + if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) { /* vendor ext1 (13ku) */ + w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min]; + } else if (s >= 0 && s < jisx0208_ucs_table_size) { + w = jisx0208_ucs_table[s]; + } else if (s >= cp932ext2_ucs_table_min && s < cp932ext2_ucs_table_max) { /* vendor ext2 (89ku - 92ku) */ + w = cp932ext2_ucs_table[s - cp932ext2_ucs_table_min]; + } + } + + if (w <= 0) { + w = MBFL_BAD_INPUT; + } + } else { + if (c1 > 0x20 && c1 < 0x35) { + w = 0xE000 + ((c1 - 0x21) * 94) + c - 0x21; + } else { + w = MBFL_BAD_INPUT; + } + } + CK((*filter->output_function)(w, filter->data)); + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + /* ESC */ + case 2: + if (c == '$') { + filter->status++; + } else if (c == '(') { + filter->status += 3; + } else { + filter->status &= ~0xF; + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + /* ESC $ */ + case 3: + if (c == '@' || c == 'B') { + filter->status = JISX0208_KANJI; + } else if (c == '(') { + filter->status++; + } else { + filter->status &= ~0xF; + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + /* ESC $ ( */ + case 4: + if (c == '@' || c == 'B') { + filter->status = JISX0208_KANJI; + } else if (c == '?') { + filter->status = UDC; + } else { + filter->status &= ~0xF; + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + /* ESC ( */ + case 5: + if (c == 'B' || c == 'J') { + filter->status = 0; + } else if (c == 'I') { + filter->status = JISX0201_KANA; + } else { + filter->status &= ~0xF; + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + } + + return 0; +} + +static int mbfl_filt_conv_2022jpms_wchar_flush(mbfl_convert_filter *filter) +{ + if (filter->status & 0xF) { + (*filter->output_function)(MBFL_BAD_INPUT, filter->data); + } + filter->status = 0; + + if (filter->flush_function) { + (*filter->flush_function)(filter->data); + } + + return 0; +} + +#define sjistoidx(c1, c2) \ + (((c1) > 0x9f) ? (((c1) - 0xc1) * 188 + (c2) - (((c2) > 0x7e) ? 0x41 : 0x40)) : (((c1) - 0x81) * 188 + (c2) - (((c2) > 0x7e) ? 0x41 : 0x40))) +#define idxtojis1(c) (((c) / 94) + 0x21) +#define idxtojis2(c) (((c) % 94) + 0x21) + +static int cp932ext3_cp932ext2_jis(int c) +{ + int idx; + + idx = sjistoidx(0xfa, 0x40) + c; + if (idx >= sjistoidx(0xfa, 0x5c)) + idx -= sjistoidx(0xfa, 0x5c) - sjistoidx(0xed, 0x40); + else if (idx >= sjistoidx(0xfa, 0x55)) + idx -= sjistoidx(0xfa, 0x55) - sjistoidx(0xee, 0xfa); + else if (idx >= sjistoidx(0xfa, 0x40)) + idx -= sjistoidx(0xfa, 0x40) - sjistoidx(0xee, 0xef); + return idxtojis1(idx) << 8 | idxtojis2(idx); +} + +static int mbfl_filt_conv_wchar_2022jpms(int c, mbfl_convert_filter *filter) +{ + int c1, c2, s1 = 0, s2 = 0; + + if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) { + s1 = ucs_a1_jis_table[c - ucs_a1_jis_table_min]; + } else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) { + s1 = ucs_a2_jis_table[c - ucs_a2_jis_table_min]; + } else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) { + s1 = ucs_i_jis_table[c - ucs_i_jis_table_min]; + } else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) { + s1 = ucs_r_jis_table[c - ucs_r_jis_table_min]; + } else if (c >= 0xE000 && c < (0xE000 + 20*94)) { + /* Private User Area (95ku - 114ku) */ + s1 = c - 0xE000; + c1 = (s1 / 94) + 0x7f; + c2 = (s1 % 94) + 0x21; + s1 = (c1 << 8) | c2; + } + + if (s1 <= 0) { + if (c == 0xA5) { /* YEN SIGN */ + s1 = 0x216F; /* FULLWIDTH YEN SIGN */ + } else if (c == 0xFF3C) { /* FULLWIDTH REVERSE SOLIDUS */ + s1 = 0x2140; + } else if (c == 0x2225) { /* PARALLEL TO */ + s1 = 0x2142; + } else if (c == 0xFF0D) { /* FULLWIDTH HYPHEN-MINUS */ + s1 = 0x215d; + } else if (c == 0xFFE0) { /* FULLWIDTH CENT SIGN */ + s1 = 0x2171; + } else if (c == 0xFFE1) { /* FULLWIDTH POUND SIGN */ + s1 = 0x2172; + } else if (c == 0xFFE2) { /* FULLWIDTH NOT SIGN */ + s1 = 0x224C; + } + } + + if ((s1 <= 0) || (s1 >= 0xa1a1 && s2 == 0)) { /* not found or X 0212 */ + s1 = -1; + for (c1 = 0; c1 < cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; c1++) { + if (c == cp932ext1_ucs_table[c1]) { + s1 = (((c1 / 94) + 0x2D) << 8) + (c1 % 94) + 0x21; + break; + } + } + + if (s1 <= 0) { + for (c1 = 0; c1 < cp932ext3_ucs_table_max - cp932ext3_ucs_table_min; c1++) { + if (c == cp932ext3_ucs_table[c1]) { + s1 = cp932ext3_cp932ext2_jis(c1); + break; + } + } + } + + if (c == 0) { + s1 = 0; + } + } + + if (s1 >= 0) { + if (s1 < 0x80) { /* latin */ + if (filter->status & 0xFF00) { + CK((*filter->output_function)(0x1B, filter->data)); /* ESC */ + CK((*filter->output_function)('(', filter->data)); + CK((*filter->output_function)('B', filter->data)); + } + CK((*filter->output_function)(s1, filter->data)); + filter->status = 0; + } else if (s1 > 0xA0 && s1 < 0xE0) { /* kana */ + if ((filter->status & 0xFF00) != 0x100) { + CK((*filter->output_function)(0x1B, filter->data)); /* ESC */ + CK((*filter->output_function)('(', filter->data)); + CK((*filter->output_function)('I', filter->data)); + } + filter->status = 0x100; + CK((*filter->output_function)(s1 & 0x7F, filter->data)); + } else if (s1 < 0x7E7F) { /* X 0208 */ + if ((filter->status & 0xFF00) != 0x200) { + CK((*filter->output_function)(0x1B, filter->data)); /* ESC */ + CK((*filter->output_function)('$', filter->data)); + CK((*filter->output_function)('B', filter->data)); + } + filter->status = 0x200; + CK((*filter->output_function)((s1 >> 8) & 0xFF, filter->data)); + CK((*filter->output_function)(s1 & 0x7F, filter->data)); + } else if (s1 < 0x927F) { /* UDC */ + if ((filter->status & 0xFF00) != 0x800) { + CK((*filter->output_function)(0x1B, filter->data)); /* ESC */ + CK((*filter->output_function)('$', filter->data)); + CK((*filter->output_function)('(', filter->data)); + CK((*filter->output_function)('?', filter->data)); + } + filter->status = 0x800; + CK((*filter->output_function)(((s1 >> 8) - 0x5E) & 0x7F, filter->data)); + CK((*filter->output_function)(s1 & 0x7F, filter->data)); + } + } else { + CK(mbfl_filt_conv_illegal_output(c, filter)); + } + + return 0; +} + +static int mbfl_filt_conv_any_2022jpms_flush(mbfl_convert_filter *filter) +{ + /* Go back to ASCII (so strings can be safely concatenated) */ + if ((filter->status & 0xFF00) != 0) { + CK((*filter->output_function)(0x1B, filter->data)); /* ESC */ + CK((*filter->output_function)('(', filter->data)); + CK((*filter->output_function)('B', filter->data)); + } + filter->status = 0; + + if (filter->flush_function) { + (*filter->flush_function)(filter->data); + } + + return 0; +} + +static size_t mb_iso2022jpms_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) +{ + unsigned char *p = *in, *e = p + *in_len; + uint32_t *out = buf, *limit = buf + bufsize; + + while (p < e && out < limit) { + unsigned char c = *p++; + + if (c == 0x1B) { + if ((e - p) < 2) { + *out++ = MBFL_BAD_INPUT; + p = e; + break; + } + unsigned char c2 = *p++; + unsigned char c3 = *p++; + + if (c2 == '$') { + if (c3 == '@' || c3 == 'B') { + *state = JISX0208_KANJI; + } else if (c3 == '(' && p < e) { + unsigned char c4 = *p++; + + if (c4 == '@' || c4 == 'B') { + *state = JISX0208_KANJI; + } else if (c4 == '?') { + *state = UDC; + } else { + *out++ = MBFL_BAD_INPUT; + } + } else { + *out++ = MBFL_BAD_INPUT; + } + } else if (c2 == '(') { + if (c3 == 'B' || c3 == 'J') { + *state = ASCII; + } else if (c3 == 'I') { + *state = JISX0201_KANA; + } else { + *out++ = MBFL_BAD_INPUT; + } + } else { + p--; + *out++ = MBFL_BAD_INPUT; + } + } else if (*state == JISX0201_KANA && c >= 0x21 && c <= 0x5F) { + *out++ = 0xFF40 + c; + } else if ((*state == JISX0208_KANJI || *state == UDC) && c >= 0x21 && c <= 0x7F) { + if (p == e) { + *out++ = MBFL_BAD_INPUT; + break; + } + unsigned char c2 = *p++; + unsigned int w = 0; + + if (c2 >= 0x21 && c2 <= 0x7E) { + unsigned int s = ((c - 0x21) * 94) + c2 - 0x21; + if (*state == JISX0208_KANJI) { + if (s <= 137) { + if (s == 31) { + w = 0xFF3C; /* FULLWIDTH REVERSE SOLIDUS */ + } else if (s == 32) { + w = 0xFF5E; /* FULLWIDTH TILDE */ + } else if (s == 33) { + w = 0x2225; /* PARALLEL TO */ + } else if (s == 60) { + w = 0xFF0D; /* FULLWIDTH HYPHEN-MINUS */ + } else if (s == 80) { + w = 0xFFE0; /* FULLWIDTH CENT SIGN */ + } else if (s == 81) { + w = 0xFFE1; /* FULLWIDTH POUND SIGN */ + } else if (s == 137) { + w = 0xFFE2; /* FULLWIDTH NOT SIGN */ + } + } + + if (!w) { + if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) { + w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min]; + } else if (s < jisx0208_ucs_table_size) { + w = jisx0208_ucs_table[s]; + } else if (s >= cp932ext2_ucs_table_min && s < cp932ext2_ucs_table_max) { + w = cp932ext2_ucs_table[s - cp932ext2_ucs_table_min]; + } + } + } else if (c >= 0x21 && c <= 0x34) { + w = 0xE000 + ((c - 0x21) * 94) + c2 - 0x21; + } + + *out++ = w ? w : MBFL_BAD_INPUT; + } else { + *out++ = MBFL_BAD_INPUT; + } + } else if (c <= 0x7F) { + *out++ = c; + } else if (c >= 0xA1 && c <= 0xDF) { + *out++ = 0xFEC0 + c; + } else { + *out++ = MBFL_BAD_INPUT; + } + } + + *in_len = e - p; + *in = p; + return out - buf; +} + +static void mb_wchar_to_iso2022jpms(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) +{ + unsigned char *out, *limit; + MB_CONVERT_BUF_LOAD(buf, out, limit); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len); + + while (len--) { + uint32_t w = *in++; + unsigned int s = 0; + + if (w >= ucs_a1_jis_table_min && w < ucs_a1_jis_table_max) { + s = ucs_a1_jis_table[w - ucs_a1_jis_table_min]; + } else if (w >= ucs_a2_jis_table_min && w < ucs_a2_jis_table_max) { + s = ucs_a2_jis_table[w - ucs_a2_jis_table_min]; + } else if (w >= ucs_i_jis_table_min && w < ucs_i_jis_table_max) { + s = ucs_i_jis_table[w - ucs_i_jis_table_min]; + } else if (w >= ucs_r_jis_table_min && w < ucs_r_jis_table_max) { + s = ucs_r_jis_table[w - ucs_r_jis_table_min]; + } else if (w >= 0xE000 && w < (0xE000 + 20*94)) { + /* Private User Area (95ku - 114ku) */ + s = ((((w - 0xE000) / 94) + 0x7F) << 8) | (((w - 0xE000) % 94) + 0x21); + } + + if (!s) { + if (w == 0xA5) { /* YEN SIGN */ + s = 0x216F; /* FULLWIDTH YEN SIGN */ + } else if (w == 0xFF3C) { /* FULLWIDTH REVERSE SOLIDUS */ + s = 0x2140; + } else if (w == 0x2225) { /* PARALLEL TO */ + s = 0x2142; + } else if (w == 0xFF0D) { /* FULLWIDTH HYPHEN-MINUS */ + s = 0x215D; + } else if (w == 0xFFE0) { /* FULLWIDTH CENT SIGN */ + s = 0x2171; + } else if (w == 0xFFE1) { /* FULLWIDTH POUND SIGN */ + s = 0x2172; + } else if (w == 0xFFE2) { /* FULLWIDTH NOT SIGN */ + s = 0x224C; + } + } + + if (s >= 0xA1A1) /* JISX 0212 */ + s = 0; + + if (!s && w) { + for (int i = 0; i < cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; i++) { + if (w == cp932ext1_ucs_table[i]) { + s = (((i / 94) + 0x2D) << 8) + (i % 94) + 0x21; + break; + } + } + + if (!s) { + for (int i = 0; i < cp932ext3_ucs_table_max - cp932ext3_ucs_table_min; i++) { + if (w == cp932ext3_ucs_table[i]) { + s = cp932ext3_cp932ext2_jis(i); + break; + } + } + } + } + + if (!s && w) { + MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_iso2022jpms); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len); + } else if (s <= 0x7F) { + if (buf->state != ASCII) { + MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 4); + out = mb_convert_buf_add3(out, 0x1B, '(', 'B'); + buf->state = ASCII; + } + out = mb_convert_buf_add(out, s); + } else if (s >= 0xA1 && s <= 0xDF) { + if (buf->state != JISX0201_KANA) { + MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 4); + out = mb_convert_buf_add3(out, 0x1B, '(', 'I'); + buf->state = JISX0201_KANA; + } + out = mb_convert_buf_add(out, s & 0x7F); + } else if (s <= 0x7E7E) { + if (buf->state != JISX0208_KANJI) { + MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 5); + out = mb_convert_buf_add3(out, 0x1B, '$', 'B'); + buf->state = JISX0208_KANJI; + } else { + MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2); + } + out = mb_convert_buf_add2(out, (s >> 8) & 0xFF, s & 0x7F); + } else if (s < 0x927F) { + if (buf->state != UDC) { + MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 6); + out = mb_convert_buf_add4(out, 0x1B, '$', '(', '?'); + buf->state = UDC; + } else { + MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2); + } + out = mb_convert_buf_add2(out, ((s >> 8) - 0x5E) & 0x7F, s & 0x7F); + } else { + MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_iso2022jpms); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len); + } + } + + if (end && buf->state != ASCII) { + MB_CONVERT_BUF_ENSURE(buf, out, limit, 3); + out = mb_convert_buf_add3(out, 0x1B, '(', 'B'); + } + + MB_CONVERT_BUF_STORE(buf, out, limit); +} + +static int mbfl_filt_conv_2022kr_wchar(int c, mbfl_convert_filter *filter) +{ + int w = 0; + + switch (filter->status & 0xf) { + /* case 0x00: ASCII */ + /* case 0x10: KSC5601 */ + case 0: + if (c == 0x1b) { /* ESC */ + filter->status += 2; + } else if (c == 0x0f) { /* shift in (ASCII) */ + filter->status = 0; + } else if (c == 0x0e) { /* shift out (KSC5601) */ + filter->status = 0x10; + } else if ((filter->status & 0x10) && c > 0x20 && c < 0x7f) { + /* KSC5601 lead byte */ + filter->cache = c; + filter->status = 0x11; + } else if ((filter->status & 0x10) == 0 && c >= 0 && c < 0x80) { + /* latin, CTLs */ + CK((*filter->output_function)(c, filter->data)); + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + case 1: /* dbcs second byte */ + filter->status = 0x10; + int c1 = filter->cache; + int flag = 0; + + if (c1 > 0x20 && c1 < 0x47) { + flag = 1; + } else if (c1 >= 0x47 && c1 <= 0x7e && c1 != 0x49) { + flag = 2; + } + + if (flag > 0 && c > 0x20 && c < 0x7f) { + if (flag == 1) { + if (c1 != 0x22 || c <= 0x65) { + w = (c1 - 1)*190 + (c - 0x41) + 0x80; + ZEND_ASSERT(w < uhc1_ucs_table_size); + w = uhc1_ucs_table[w]; + } + } else { + w = (c1 - 0x47)*94 + c - 0x21; + if (w < uhc3_ucs_table_size) { + w = uhc3_ucs_table[w]; + } else { + w = MBFL_BAD_INPUT; + } + } + + if (w <= 0) { + w = MBFL_BAD_INPUT; + } + CK((*filter->output_function)(w, filter->data)); + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + case 2: /* ESC */ + if (c == '$') { + filter->status++; + } else { + filter->status &= ~0xF; + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + case 3: /* ESC $ */ + if (c == ')') { + filter->status++; + } else { + filter->status &= ~0xF; + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + case 4: /* ESC $ ) */ + filter->status = 0; + if (c != 'C') { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + EMPTY_SWITCH_DEFAULT_CASE(); + } + + return 0; +} + +static int mbfl_filt_conv_2022kr_wchar_flush(mbfl_convert_filter *filter) +{ + if (filter->status & 0xF) { + /* 2-byte character was truncated */ + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + filter->status = 0; + + if (filter->flush_function) { + (*filter->flush_function)(filter->data); + } + + return 0; +} + +static int mbfl_filt_conv_wchar_2022kr(int c, mbfl_convert_filter *filter) +{ + int c1, c2, s = 0; + + if ((filter->status & 0x100) == 0) { + CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ + CK((*filter->output_function)('$', filter->data)); + CK((*filter->output_function)(')', filter->data)); + CK((*filter->output_function)('C', filter->data)); + filter->status |= 0x100; + } + + if (c >= ucs_a1_uhc_table_min && c < ucs_a1_uhc_table_max) { + s = ucs_a1_uhc_table[c - ucs_a1_uhc_table_min]; + } else if (c >= ucs_a2_uhc_table_min && c < ucs_a2_uhc_table_max) { + s = ucs_a2_uhc_table[c - ucs_a2_uhc_table_min]; + } else if (c >= ucs_a3_uhc_table_min && c < ucs_a3_uhc_table_max) { + s = ucs_a3_uhc_table[c - ucs_a3_uhc_table_min]; + } else if (c >= ucs_i_uhc_table_min && c < ucs_i_uhc_table_max) { + s = ucs_i_uhc_table[c - ucs_i_uhc_table_min]; + } else if (c >= ucs_s_uhc_table_min && c < ucs_s_uhc_table_max) { + s = ucs_s_uhc_table[c - ucs_s_uhc_table_min]; + } else if (c >= ucs_r1_uhc_table_min && c < ucs_r1_uhc_table_max) { + s = ucs_r1_uhc_table[c - ucs_r1_uhc_table_min]; + } else if (c >= ucs_r2_uhc_table_min && c < ucs_r2_uhc_table_max) { + s = ucs_r2_uhc_table[c - ucs_r2_uhc_table_min]; + } + + c1 = (s >> 8) & 0xff; + c2 = s & 0xff; + /* exclude UHC extension area */ + if (c1 < 0xa1 || c2 < 0xa1) { + s = c; + } else if (s & 0x8000) { + s -= 0x8080; + } + + if (s <= 0) { + if (c == 0) { + s = 0; + } else { + s = -1; + } + } else if ((s >= 0x80 && s < 0x2121) || (s > 0x8080)) { + s = -1; + } + + if (s >= 0) { + if (s < 0x80 && s >= 0) { /* ASCII */ + if (filter->status & 0x10) { + CK((*filter->output_function)(0x0f, filter->data)); /* shift in */ + filter->status &= ~0x10; + } + CK((*filter->output_function)(s, filter->data)); + } else { + if ((filter->status & 0x10) == 0) { + CK((*filter->output_function)(0x0e, filter->data)); /* shift out */ + filter->status |= 0x10; + } + CK((*filter->output_function)((s >> 8) & 0xff, filter->data)); + CK((*filter->output_function)(s & 0xff, filter->data)); + } + } else { + CK(mbfl_filt_conv_illegal_output(c, filter)); + } + + return 0; +} + +static int mbfl_filt_conv_any_2022kr_flush(mbfl_convert_filter *filter) +{ + if (filter->status & 0xF) { + /* Escape sequence or 2-byte character was truncated */ + (*filter->output_function)(MBFL_BAD_INPUT, filter->data); + } + /* back to ascii */ + if (filter->status & 0x10) { + CK((*filter->output_function)(0x0f, filter->data)); /* shift in */ + } + + filter->status = filter->cache = 0; + + if (filter->flush_function) { + return (*filter->flush_function)(filter->data); + } + + return 0; +} + +#define ASCII 0 +#define KSC5601 1 + +static size_t mb_iso2022kr_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) +{ + unsigned char *p = *in, *e = p + *in_len; + uint32_t *out = buf, *limit = buf + bufsize; + + while (p < e && out < limit) { + unsigned char c = *p++; + + if (c == 0x1B) { + if ((e - p) < 3) { + *out++ = MBFL_BAD_INPUT; + if (p < e && *p++ == '$') { + if (p < e) { + p++; + } + } + continue; + } + unsigned char c2 = *p++; + unsigned char c3 = *p++; + unsigned char c4 = *p++; + if (c2 == '$' && c3 == ')' && c4 == 'C') { + *state = ASCII; + } else { + if (c3 != ')') { + p--; + if (c2 != '$') + p--; + } + *out++ = MBFL_BAD_INPUT; + } + } else if (c == 0xF) { + *state = ASCII; + } else if (c == 0xE) { + *state = KSC5601; + } else if (c >= 0x21 && c <= 0x7E && *state == KSC5601) { + if (p == e) { + *out++ = MBFL_BAD_INPUT; + break; + } + unsigned char c2 = *p++; + unsigned int w = 0; + + if (c2 < 0x21 || c2 > 0x7E) { + *out++ = MBFL_BAD_INPUT; + continue; + } + + if (c < 0x47) { + if (c != 0x22 || c2 <= 0x65) { + w = (c - 1)*190 + c2 - 0x41 + 0x80; + ZEND_ASSERT(w < uhc1_ucs_table_size); + w = uhc1_ucs_table[w]; + } + } else if (c != 0x49 && c <= 0x7D) { + w = (c - 0x47)*94 + c2 - 0x21; + ZEND_ASSERT(w < uhc3_ucs_table_size); + w = uhc3_ucs_table[w]; + } + + if (!w) + w = MBFL_BAD_INPUT; + *out++ = w; + } else if (c < 0x80 && *state == ASCII) { + *out++ = c; + } else { + *out++ = MBFL_BAD_INPUT; + } + } + + *in_len = e - p; + *in = p; + return out - buf; +} + +#define EMITTED_ESC_SEQUENCE 0x10 + +static void mb_wchar_to_iso2022kr(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) +{ + unsigned char *out, *limit; + MB_CONVERT_BUF_LOAD(buf, out, limit); + + /* This escape sequence needs to come *somewhere* at the beginning of a line before + * we can use the Shift In/Shift Out bytes, but it only needs to come once in a string + * Rather than tracking newlines, we can just emit the sequence once at the beginning + * of the output string... since that will always be "the beginning of a line" */ + if (len && !(buf->state & EMITTED_ESC_SEQUENCE)) { + MB_CONVERT_BUF_ENSURE(buf, out, limit, 4 + len); + out = mb_convert_buf_add4(out, 0x1B, '$', ')', 'C'); + buf->state |= EMITTED_ESC_SEQUENCE; + } else { + MB_CONVERT_BUF_ENSURE(buf, out, limit, len); + } + + while (len--) { + uint32_t w = *in++; + unsigned int s = 0; + + if (w >= ucs_a1_uhc_table_min && w < ucs_a1_uhc_table_max) { + s = ucs_a1_uhc_table[w - ucs_a1_uhc_table_min]; + } else if (w >= ucs_a2_uhc_table_min && w < ucs_a2_uhc_table_max) { + s = ucs_a2_uhc_table[w - ucs_a2_uhc_table_min]; + } else if (w >= ucs_a3_uhc_table_min && w < ucs_a3_uhc_table_max) { + s = ucs_a3_uhc_table[w - ucs_a3_uhc_table_min]; + } else if (w >= ucs_i_uhc_table_min && w < ucs_i_uhc_table_max) { + s = ucs_i_uhc_table[w - ucs_i_uhc_table_min]; + } else if (w >= ucs_s_uhc_table_min && w < ucs_s_uhc_table_max) { + s = ucs_s_uhc_table[w - ucs_s_uhc_table_min]; + } else if (w >= ucs_r1_uhc_table_min && w < ucs_r1_uhc_table_max) { + s = ucs_r1_uhc_table[w - ucs_r1_uhc_table_min]; + } else if (w >= ucs_r2_uhc_table_min && w < ucs_r2_uhc_table_max) { + s = ucs_r2_uhc_table[w - ucs_r2_uhc_table_min]; + } + + if (((s >> 8) & 0xFF) < 0xA1 || (s & 0xFF) < 0xA1) { + s = w; + } else { + s -= 0x8080; + } + + if ((s >= 0x80 && s < 0x2121) || (s > 0x8080)) { + MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_iso2022kr); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len); + } else if (s < 0x80) { + if ((buf->state & 1) != ASCII) { + MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2); + out = mb_convert_buf_add(out, 0xF); + buf->state &= ~KSC5601; + } + out = mb_convert_buf_add(out, s); + } else { + if ((buf->state & 1) != KSC5601) { + MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 3); + out = mb_convert_buf_add(out, 0xE); + buf->state |= KSC5601; + } else { + MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2); + } + out = mb_convert_buf_add2(out, (s >> 8) & 0xFF, s & 0xFF); + } + } + + if (end && (buf->state & 1) != ASCII) { + MB_CONVERT_BUF_ENSURE(buf, out, limit, 1); + out = mb_convert_buf_add(out, 0xF); + } + + MB_CONVERT_BUF_STORE(buf, out, limit); +} + +static const struct mbfl_convert_vtbl vtbl_jis_wchar = { + mbfl_no_encoding_jis, + mbfl_no_encoding_wchar, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_jis_wchar, + mbfl_filt_conv_jis_wchar_flush, + NULL, +}; + +static const struct mbfl_convert_vtbl vtbl_wchar_jis = { + mbfl_no_encoding_wchar, + mbfl_no_encoding_jis, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_wchar_jis, + mbfl_filt_conv_any_jis_flush, + NULL, +}; + +const mbfl_encoding mbfl_encoding_jis = { + mbfl_no_encoding_jis, + "JIS", + "ISO-2022-JP", + NULL, + NULL, + MBFL_ENCTYPE_GL_UNSAFE, + &vtbl_jis_wchar, + &vtbl_wchar_jis, + mb_iso2022jp_to_wchar, + mb_wchar_to_jis, + mb_check_jis +}; + +static const struct mbfl_convert_vtbl vtbl_2022jp_wchar = { + mbfl_no_encoding_2022jp, + mbfl_no_encoding_wchar, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_jis_wchar, + mbfl_filt_conv_jis_wchar_flush, + NULL, +}; + +static const struct mbfl_convert_vtbl vtbl_wchar_2022jp = { + mbfl_no_encoding_wchar, + mbfl_no_encoding_2022jp, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_wchar_2022jp, + mbfl_filt_conv_any_jis_flush, + NULL, +}; + +const mbfl_encoding mbfl_encoding_2022jp = { + mbfl_no_encoding_2022jp, + "ISO-2022-JP", + "ISO-2022-JP", + NULL, + NULL, + MBFL_ENCTYPE_GL_UNSAFE, + &vtbl_2022jp_wchar, + &vtbl_wchar_2022jp, + mb_iso2022jp_to_wchar, + mb_wchar_to_iso2022jp, + mb_check_iso2022jp +}; + +static const char *mbfl_encoding_2022jp_kddi_aliases[] = {"ISO-2022-JP-KDDI", NULL}; + +static const struct mbfl_convert_vtbl vtbl_2022jp_kddi_wchar = { + mbfl_no_encoding_2022jp_kddi, + mbfl_no_encoding_wchar, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_2022jp_mobile_wchar, + mbfl_filt_conv_2022jp_mobile_wchar_flush, + NULL, +}; + +static const struct mbfl_convert_vtbl vtbl_wchar_2022jp_kddi = { + mbfl_no_encoding_wchar, + mbfl_no_encoding_2022jp_kddi, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_wchar_2022jp_mobile, + mbfl_filt_conv_wchar_2022jp_mobile_flush, + NULL, +}; + +const mbfl_encoding mbfl_encoding_2022jp_kddi = { + mbfl_no_encoding_2022jp_kddi, + "ISO-2022-JP-MOBILE#KDDI", + "ISO-2022-JP", + mbfl_encoding_2022jp_kddi_aliases, + NULL, + MBFL_ENCTYPE_GL_UNSAFE, + &vtbl_2022jp_kddi_wchar, + &vtbl_wchar_2022jp_kddi, + mb_iso2022jp_kddi_to_wchar, + mb_wchar_to_iso2022jp_kddi, + NULL +}; + +static const struct mbfl_convert_vtbl vtbl_2022jp_2004_wchar = { + mbfl_no_encoding_2022jp_2004, + mbfl_no_encoding_wchar, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_jis2004_wchar, + mbfl_filt_conv_jis2004_wchar_flush, + NULL, +}; + +static const struct mbfl_convert_vtbl vtbl_wchar_2022jp_2004 = { + mbfl_no_encoding_wchar, + mbfl_no_encoding_2022jp_2004, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_wchar_jis2004, + mbfl_filt_conv_wchar_jis2004_flush, + NULL, +}; + +const mbfl_encoding mbfl_encoding_2022jp_2004 = { + mbfl_no_encoding_2022jp_2004, + "ISO-2022-JP-2004", + "ISO-2022-JP-2004", + NULL, + NULL, + MBFL_ENCTYPE_GL_UNSAFE, + &vtbl_2022jp_2004_wchar, + &vtbl_wchar_2022jp_2004, + mb_iso2022jp2004_to_wchar, + mb_wchar_to_iso2022jp2004, + NULL +}; + +/* Previously, a dubious 'encoding' called 'cp50220raw' was supported + * This was just CP50220, but the implementation was less strict regarding + * invalid characters; it would silently pass some through + * This 'encoding' only existed in mbstring. In case some poor, lost soul is + * still using it, retain minimal support by aliasing it to CP50220 + * + * Further, mbstring also had a made-up encoding called "JIS-ms" + * This was the same as CP5022{0,1,2}, but without their special ways of + * handling conversion of Unicode half-width katakana */ +static const char *cp50220_aliases[] = {"cp50220raw", "cp50220-raw", "JIS-ms", NULL}; + +static const struct mbfl_convert_vtbl vtbl_cp50220_wchar = { + mbfl_no_encoding_cp50220, + mbfl_no_encoding_wchar, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_cp5022x_wchar, + mbfl_filt_conv_cp5022x_wchar_flush, + NULL, +}; + +static const struct mbfl_convert_vtbl vtbl_wchar_cp50220 = { + mbfl_no_encoding_wchar, + mbfl_no_encoding_cp50220, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_wchar_cp50220, + mbfl_filt_conv_wchar_cp50220_flush, + NULL, +}; + +static const struct mbfl_convert_vtbl vtbl_cp50221_wchar = { + mbfl_no_encoding_cp50221, + mbfl_no_encoding_wchar, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_cp5022x_wchar, + mbfl_filt_conv_cp5022x_wchar_flush, + NULL, +}; + +static const struct mbfl_convert_vtbl vtbl_wchar_cp50221 = { + mbfl_no_encoding_wchar, + mbfl_no_encoding_cp50221, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_wchar_cp50221, + mbfl_filt_conv_any_jis_flush, + NULL, +}; + +static const struct mbfl_convert_vtbl vtbl_cp50222_wchar = { + mbfl_no_encoding_cp50222, + mbfl_no_encoding_wchar, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_cp5022x_wchar, + mbfl_filt_conv_cp5022x_wchar_flush, + NULL, +}; + +static const struct mbfl_convert_vtbl vtbl_wchar_cp50222 = { + mbfl_no_encoding_wchar, + mbfl_no_encoding_cp50222, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_wchar_cp50222, + mbfl_filt_conv_wchar_cp50222_flush, + NULL, +}; + +const mbfl_encoding mbfl_encoding_cp50220 = { + mbfl_no_encoding_cp50220, + "CP50220", + "ISO-2022-JP", + cp50220_aliases, + NULL, + MBFL_ENCTYPE_GL_UNSAFE, + &vtbl_cp50220_wchar, + &vtbl_wchar_cp50220, + mb_cp5022x_to_wchar, + mb_wchar_to_cp50220, + NULL +}; + +const mbfl_encoding mbfl_encoding_cp50221 = { + mbfl_no_encoding_cp50221, + "CP50221", + "ISO-2022-JP", + NULL, + NULL, + MBFL_ENCTYPE_GL_UNSAFE, + &vtbl_cp50221_wchar, + &vtbl_wchar_cp50221, + mb_cp5022x_to_wchar, + mb_wchar_to_cp50221, + NULL +}; + +const mbfl_encoding mbfl_encoding_cp50222 = { + mbfl_no_encoding_cp50222, + "CP50222", + "ISO-2022-JP", + NULL, + NULL, + MBFL_ENCTYPE_GL_UNSAFE, + &vtbl_cp50222_wchar, + &vtbl_wchar_cp50222, + mb_cp5022x_to_wchar, + mb_wchar_to_cp50222, + NULL +}; + +static const char *mbfl_encoding_2022jpms_aliases[] = {"ISO2022JPMS", NULL}; + +static const struct mbfl_convert_vtbl vtbl_2022jpms_wchar = { + mbfl_no_encoding_2022jpms, + mbfl_no_encoding_wchar, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_2022jpms_wchar, + mbfl_filt_conv_2022jpms_wchar_flush, + NULL, +}; + +static const struct mbfl_convert_vtbl vtbl_wchar_2022jpms = { + mbfl_no_encoding_wchar, + mbfl_no_encoding_2022jpms, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_wchar_2022jpms, + mbfl_filt_conv_any_2022jpms_flush, + NULL, +}; + +const mbfl_encoding mbfl_encoding_2022jpms = { + mbfl_no_encoding_2022jpms, + "ISO-2022-JP-MS", + "ISO-2022-JP", + mbfl_encoding_2022jpms_aliases, + NULL, + MBFL_ENCTYPE_GL_UNSAFE, + &vtbl_2022jpms_wchar, + &vtbl_wchar_2022jpms, + mb_iso2022jpms_to_wchar, + mb_wchar_to_iso2022jpms, + NULL +}; + +/* ISO-2022-KR is defined in RFC 1557 + * + * The RFC says that ESC $ ) C must appear once in a ISO-2022-KR string, + * at the beginning of a line, before any instances of the Shift In or + * Shift Out bytes which are used to switch between ASCII/KSC 5601 modes + * + * We don't enforce that for ISO-2022-KR input */ + +static const struct mbfl_convert_vtbl vtbl_wchar_2022kr = { + mbfl_no_encoding_wchar, + mbfl_no_encoding_2022kr, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_wchar_2022kr, + mbfl_filt_conv_any_2022kr_flush, + NULL, +}; + +static const struct mbfl_convert_vtbl vtbl_2022kr_wchar = { + mbfl_no_encoding_2022kr, + mbfl_no_encoding_wchar, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_2022kr_wchar, + mbfl_filt_conv_2022kr_wchar_flush, + NULL, +}; + +const mbfl_encoding mbfl_encoding_2022kr = { + mbfl_no_encoding_2022kr, + "ISO-2022-KR", + "ISO-2022-KR", + NULL, + NULL, + MBFL_ENCTYPE_GL_UNSAFE, + &vtbl_2022kr_wchar, + &vtbl_wchar_2022kr, + mb_iso2022kr_to_wchar, + mb_wchar_to_iso2022kr, + NULL +}; + +/* + * SJIS variants + */ + +static int mbfl_filt_conv_sjis_wchar(int c, mbfl_convert_filter *filter) +{ + int s1, s2, w; + + switch (filter->status) { + case 0: + if (c >= 0 && c < 0x80) { /* ASCII */ + CK((*filter->output_function)(c, filter->data)); + } else if (c > 0xA0 && c < 0xE0) { /* Kana */ + CK((*filter->output_function)(0xFEC0 + c, filter->data)); + } else if (c > 0x80 && c < 0xF0 && c != 0xA0) { /* Kanji, first byte */ + filter->status = 1; + filter->cache = c; + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + case 1: /* Kanji, second byte */ + filter->status = 0; + int c1 = filter->cache; + if (c >= 0x40 && c <= 0xFC && c != 0x7F) { + SJIS_DECODE(c1, c, s1, s2); + w = (s1 - 0x21)*94 + s2 - 0x21; + if (w >= 0 && w < jisx0208_ucs_table_size) { + w = jisx0208_ucs_table[w]; + if (!w) + w = MBFL_BAD_INPUT; + } else { + w = MBFL_BAD_INPUT; + } + CK((*filter->output_function)(w, filter->data)); + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + } + + return 0; +} + +static int mbfl_filt_conv_sjis_wchar_flush(mbfl_convert_filter *filter) +{ + if (filter->status && filter->status != 4) { + (*filter->output_function)(MBFL_BAD_INPUT, filter->data); + } + filter->status = 0; + + if (filter->flush_function) { + (*filter->flush_function)(filter->data); + } + + return 0; +} + +static int mbfl_filt_conv_wchar_sjis(int c, mbfl_convert_filter *filter) +{ + int c1, c2, s1 = 0, s2; + + if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) { + s1 = ucs_a1_jis_table[c - ucs_a1_jis_table_min]; + } else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) { + s1 = ucs_a2_jis_table[c - ucs_a2_jis_table_min]; + } else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) { + s1 = ucs_i_jis_table[c - ucs_i_jis_table_min]; + } else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) { + s1 = ucs_r_jis_table[c - ucs_r_jis_table_min]; + } + if (s1 <= 0) { + if (c == 0xA5) { /* YEN SIGN */ + s1 = 0x216F; /* FULLWIDTH YEN SIGN */ + } else if (c == 0xAF || c == 0x203E) { /* U+00AF is MACRON, U+203E is OVERLINE */ + s1 = 0x2131; /* FULLWIDTH MACRON */ + } else if (c == 0xFF3C) { /* FULLWIDTH REVERSE SOLIDUS */ + s1 = 0x2140; + } else if (c == 0x2225) { /* PARALLEL TO */ + s1 = 0x2142; + } else if (c == 0xFF0D) { /* FULLWIDTH HYPHEN-MINUS */ + s1 = 0x215D; + } else if (c == 0xFFE0) { /* FULLWIDTH CENT SIGN */ + s1 = 0x2171; + } else if (c == 0xFFE1) { /* FULLWIDTH POUND SIGN */ + s1 = 0x2172; + } else if (c == 0xFFE2) { /* FULLWIDTH NOT SIGN */ + s1 = 0x224C; + } else if (c == 0) { + s1 = 0; + } else { + s1 = -1; + } + } else if (s1 >= 0x8080) { /* JIS X 0212; not supported */ + s1 = -1; + } + + if (s1 >= 0) { + if (s1 < 0x100) { /* Latin/Kana */ + CK((*filter->output_function)(s1, filter->data)); + } else { /* Kanji */ + c1 = (s1 >> 8) & 0xFF; + c2 = s1 & 0xFF; + SJIS_ENCODE(c1, c2, s1, s2); + CK((*filter->output_function)(s1, filter->data)); + CK((*filter->output_function)(s2, filter->data)); + } + } else { + CK(mbfl_filt_conv_illegal_output(c, filter)); + } + + return 0; +} + +static const unsigned short sjis_decode_tbl1[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xFFFF, 0, 188, 376, 564, 752, 940, 1128, 1316, 1504, 1692, 1880, 2068, 2256, 2444, 2632, 2820, 3008, 3196, 3384, 3572, 3760, 3948, 4136, 4324, 4512, 4700, 4888, 5076, 5264, 5452, 5640, 0xFFFF, -6016, -5828, -5640, -5452, -5264, -5076, -4888, -4700, -4512, -4324, -4136, -3948, -3760, -3572, -3384, -3196, -3008, -2820, -2632, -2444, -2256, -2068, -1880, -1692, -1504, -1316, -1128, -940, -752, -564, -376, -188, 0, 188, 376, 564, 752, 940, 1128, 1316, 1504, 1692, 1880, 2068, 2256, 2444, 2632, 2820, 3008, 3196, 3384, 3572, 3760, 3948, 4136, 4324, 4512, 4700, 4888, 5076, 5264, 5452, 5640, 5828, 6016, 6204, 6392, 6580, 6768, 6956, 7144, 7332, 7520, 7708, 7896, 8084, 8272, 8460, 8648, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF +}; + +static const unsigned short sjis_decode_tbl2[] = { + 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 0xFFFF, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 0xFFFF, 0xFFFF, 0xFFFF +}; + +static size_t mb_sjis_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) +{ + unsigned char *p = *in, *e = p + *in_len; + uint32_t *out = buf, *limit = buf + bufsize; + + e--; /* Stop the main loop 1 byte short of the end of the input */ + + while (p < e && out < limit) { + unsigned char c = *p++; + + if (c <= 0x7F) { + *out++ = c; + } else if (c >= 0xA1 && c <= 0xDF) { /* Kana */ + *out++ = 0xFEC0 + c; + } else { + /* Don't need to check p < e; it's not possible to go out of bounds here, due to e-- above */ + unsigned char c2 = *p++; + /* This is only legal if c2 >= 0x40 && c2 <= 0xFC && c2 != 0x7F + * But the values in the above conversion tables have been chosen such that + * illegal values of c2 will always result in w > jisx0208_ucs_table_size, + * so we don't need to do a separate bounds check on c2 + * Likewise, the values in the conversion tables are such that illegal values + * for c will always result in w > jisx0208_ucs_table_size */ + uint32_t w = sjis_decode_tbl1[c] + sjis_decode_tbl2[c2]; + if (w < jisx0208_ucs_table_size) { + w = jisx0208_ucs_table[w]; + if (!w) + w = MBFL_BAD_INPUT; + *out++ = w; + } else { + if (c == 0x80 || c == 0xA0 || c > 0xEF) { + p--; + } + *out++ = MBFL_BAD_INPUT; + } + } + } + + /* Finish up last byte of input string if there is one */ + if (p == e && out < limit) { + unsigned char c = *p++; + if (c <= 0x7F) { + *out++ = c; + } else if (c >= 0xA1 && c <= 0xDF) { + *out++ = 0xFEC0 + c; + } else { + *out++ = MBFL_BAD_INPUT; + } + } + + *in_len = e - p + 1; + *in = p; + return out - buf; +} + +static void mb_wchar_to_sjis(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) +{ + unsigned char *out, *limit; + MB_CONVERT_BUF_LOAD(buf, out, limit); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2); + + while (len--) { + uint32_t w = *in++; + unsigned int s = 0; + + if (w >= ucs_a1_jis_table_min && w < ucs_a1_jis_table_max) { + s = ucs_a1_jis_table[w - ucs_a1_jis_table_min]; + } else if (w >= ucs_a2_jis_table_min && w < ucs_a2_jis_table_max) { + s = ucs_a2_jis_table[w - ucs_a2_jis_table_min]; + } else if (w >= ucs_i_jis_table_min && w < ucs_i_jis_table_max) { + s = ucs_i_jis_table[w - ucs_i_jis_table_min]; + } else if (w >= ucs_r_jis_table_min && w < ucs_r_jis_table_max) { + s = ucs_r_jis_table[w - ucs_r_jis_table_min]; + } + + if (s == 0) { + if (w == 0xA5) { /* YEN SIGN */ + s = 0x216F; /* FULLWIDTH YEN SIGN */ + } else if (w == 0xAF || w == 0x203E) { + s = 0x2131; /* FULLWIDTH MACRON */ + } else if (w == 0xFF3C) { /* FULLWIDTH REVERSE SOLIDUS */ + s = 0x2140; + } else if (w == 0x2225) { /* PARALLEL TO */ + s = 0x2142; + } else if (w == 0xFF0D) { /* FULLWIDTH HYPHEN-MINUS */ + s = 0x215D; + } else if (w == 0xFFE0) { /* FULLWIDTH CENT SIGN */ + s = 0x2171; + } else if (w == 0xFFE1) { /* FULLWIDTH POUND SIGN */ + s = 0x2172; + } else if (w == 0xFFE2) { /* FULLWIDTH NOT SIGN */ + s = 0x224C; + } else if (w != 0) { + MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_sjis); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2); + continue; + } + } else if (s >= 0x8080) { /* JIS X 0212; not supported */ + MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_sjis); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2); + continue; + } + + if (s <= 0xFF) { + /* Latin/Kana */ + out = mb_convert_buf_add(out, s); + } else { + /* Kanji */ + unsigned int c1 = (s >> 8) & 0xFF, c2 = s & 0xFF, s2; + SJIS_ENCODE(c1, c2, s, s2); + out = mb_convert_buf_add2(out, s, s2); + } + } + + MB_CONVERT_BUF_STORE(buf, out, limit); +} + +static int mbfl_filt_conv_sjis_mac_wchar(int c, mbfl_convert_filter *filter) +{ + int i, j, n; + int c1, s, s1, s2, w; + + switch (filter->status) { + case 0: + if (c >= 0 && c < 0x80 && c != 0x5c) { /* latin */ + CK((*filter->output_function)(c, filter->data)); + } else if (c > 0xa0 && c < 0xe0) { /* kana */ + CK((*filter->output_function)(0xfec0 + c, filter->data)); + } else if (c > 0x80 && c <= 0xed && c != 0xa0) { /* kanji first char */ + filter->status = 1; + filter->cache = c; + } else if (c == 0x5c) { + CK((*filter->output_function)(0x00a5, filter->data)); + } else if (c == 0x80) { + CK((*filter->output_function)(0x005c, filter->data)); + } else if (c == 0xa0) { + CK((*filter->output_function)(0x00a0, filter->data)); + } else if (c == 0xfd) { + CK((*filter->output_function)(0x00a9, filter->data)); + } else if (c == 0xfe) { + CK((*filter->output_function)(0x2122, filter->data)); + } else if (c == 0xff) { + CK((*filter->output_function)(0x2026, filter->data)); + CK((*filter->output_function)(0xf87f, filter->data)); + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + case 1: /* kanji second char */ + filter->status = 0; + c1 = filter->cache; + if (c >= 0x40 && c <= 0xfc && c != 0x7f) { + w = 0; + SJIS_DECODE(c1, c, s1, s2); + s = (s1 - 0x21)*94 + s2 - 0x21; + if (s <= 0x89) { + if (s == 0x1c) { + w = 0x2014; /* EM DASH */ + } else if (s == 0x1f) { + w = 0xff3c; /* FULLWIDTH REVERSE SOLIDUS */ + } else if (s == 0x20) { + w = 0x301c; /* FULLWIDTH TILDE */ + } else if (s == 0x21) { + w = 0x2016; /* PARALLEL TO */ + } else if (s == 0x3c) { + w = 0x2212; /* FULLWIDTH HYPHEN-MINUS */ + } else if (s == 0x50) { + w = 0x00a2; /* FULLWIDTH CENT SIGN */ + } else if (s == 0x51) { + w = 0x00a3; /* FULLWIDTH POUND SIGN */ + } else if (s == 0x89) { + w = 0x00ac; /* FULLWIDTH NOT SIGN */ + } + } + + /* apple gaiji area 0x8540 - 0x886d */ + if (w == 0) { + for (i=0; i<7; i++) { + if (s >= code_tbl[i][0] && s <= code_tbl[i][1]) { + w = s - code_tbl[i][0] + code_tbl[i][2]; + break; + } + } + } + + if (w == 0) { + + for (i=0; ioutput_function)(code_tbl_m[i][j], filter->data)); + } + w = code_tbl_m[i][n-1]; + break; + } + } + } + + if (w == 0) { + for (i=0; i<8; i++) { + if (s >= code_ofst_tbl[i][0] && s <= code_ofst_tbl[i][1]) { + w = code_map[i][s - code_ofst_tbl[i][0]]; + if (w == 0) { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + return 0; + } + s2 = 0; + if (s >= 0x043e && s <= 0x0441) { + s2 = 0xf87a; + } else if (s == 0x03b1 || s == 0x03b7) { + s2 = 0xf87f; + } else if (s == 0x04b8 || s == 0x04b9 || s == 0x04c4) { + s2 = 0x20dd; + } else if (s == 0x1ed9 || s == 0x1eda || s == 0x1ee8 || s == 0x1ef3 || + (s >= 0x1ef5 && s <= 0x1efb) || s == 0x1f05 || s == 0x1f06 || + s == 0x1f18 || (s >= 0x1ff2 && s <= 0x20a5)) { + s2 = 0xf87e; + } + if (s2 > 0) { + CK((*filter->output_function)(w, filter->data)); + w = s2; + } + break; + } + } + } + + if (w == 0 && s >= 0 && s < jisx0208_ucs_table_size) { /* X 0208 */ + w = jisx0208_ucs_table[s]; + } + + if (w <= 0) { + w = MBFL_BAD_INPUT; + } + CK((*filter->output_function)(w, filter->data)); + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + EMPTY_SWITCH_DEFAULT_CASE(); + } + + return 0; +} + +static int mbfl_filt_conv_wchar_sjis_mac(int c, mbfl_convert_filter *filter) +{ + int i, c1, c2, s1 = 0, s2 = 0, mode; + + // a1: U+0000 -> U+046F + // a2: U+2000 -> U+30FF + // i: U+4E00 -> U+9FFF + // r: U+FF00 -> U+FFFF + + switch (filter->status) { + case 1: + c1 = filter->cache; + filter->cache = filter->status = 0; + + if (c == 0xf87a) { + for (i = 0; i < 4; i++) { + if (c1 == s_form_tbl[i+34+3+3]) { + s1 = s_form_sjis_tbl[i+34+3+3]; + break; + } + } + if (s1 <= 0) { + s2 = c1; + } + } else if (c == 0x20dd) { + for (i = 0; i < 3; i++) { + if (c1 == s_form_tbl[i+34+3]) { + s1 = s_form_sjis_tbl[i+34+3]; + break; + } + } + if (s1 <= 0) { + s2 = c1; + } + } else if (c == 0xf87f) { + for (i = 0; i < 3; i++) { + if (c1 == s_form_tbl[i+34]) { + s1 = s_form_sjis_tbl[i+34]; + break; + } + } + if (s1 <= 0) { + s2 = c1; + s1 = -1; + } + } else if (c == 0xf87e) { + for (i = 0; i < 34; i++) { + if (c1 == s_form_tbl[i]) { + s1 = s_form_sjis_tbl[i]; + break; + } + } + if (s1 <= 0) { + s2 = c1; + s1 = -1; + } + } else { + s2 = c1; + s1 = c; + } + + if (s2 > 0) { + for (i = 0; i < s_form_tbl_len; i++) { + if (c1 == s_form_tbl[i]) { + s1 = s_form_sjis_fallback_tbl[i]; + break; + } + } + } + + if (s1 >= 0) { + if (s1 < 0x100) { + CK((*filter->output_function)(s1, filter->data)); + } else { + CK((*filter->output_function)((s1 >> 8) & 0xff, filter->data)); + CK((*filter->output_function)(s1 & 0xff, filter->data)); + } + } else { + CK(mbfl_filt_conv_illegal_output(c, filter)); + } + + if (s2 <= 0 || s1 == -1) { + break; + } + s1 = s2 = 0; + ZEND_FALLTHROUGH; + + case 0: + if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) { + s1 = ucs_a1_jis_table[c - ucs_a1_jis_table_min]; + if (c == 0x5c) { + s1 = 0x80; + } else if (c == 0xa9) { + s1 = 0xfd; + } + } else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) { + s1 = ucs_a2_jis_table[c - ucs_a2_jis_table_min]; + if (c == 0x2122) { + s1 = 0xfe; + } else if (c == 0x2014) { + s1 = 0x213d; + } else if (c == 0x2116) { + s1 = 0x2c1d; + } + } else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) { + s1 = ucs_i_jis_table[c - ucs_i_jis_table_min]; + } else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) { + s1 = ucs_r_jis_table[c - ucs_r_jis_table_min]; + } + + if (c >= 0x2000) { + for (i = 0; i < s_form_tbl_len; i++) { + if (c == s_form_tbl[i]) { + filter->status = 1; + filter->cache = c; + return 0; + } + } + + if (c == 0xf860 || c == 0xf861 || c == 0xf862) { + /* Apple 'transcoding hint' codepoints (from private use area) */ + filter->status = 2; + filter->cache = c; + return 0; + } + } + + if (s1 <= 0) { + if (c == 0xa0) { + s1 = 0x00a0; + } else if (c == 0xa5) { /* YEN SIGN */ + /* Unicode has codepoint 0xFFE5 for a fullwidth Yen sign; + * convert codepoint 0xA5 to halfwidth Yen sign */ + s1 = 0x5c; /* HALFWIDTH YEN SIGN */ + } else if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */ + s1 = 0x2140; + } + } + + if (s1 <= 0) { + for (i=0; i= wchar2sjis_mac_r_tbl[i][0] && c <= wchar2sjis_mac_r_tbl[i][1]) { + s1 = c - wchar2sjis_mac_r_tbl[i][0] + wchar2sjis_mac_r_tbl[i][2]; + break; + } + } + + if (s1 <= 0) { + for (i=0; i= wchar2sjis_mac_r_map[i][0] && c <= wchar2sjis_mac_r_map[i][1]) { + s1 = wchar2sjis_mac_code_map[i][c-wchar2sjis_mac_r_map[i][0]]; + break; + } + } + } + + if (s1 <= 0) { + for (i=0; i 0) { + c1 = s1/94+0x21; + c2 = s1-94*(c1-0x21)+0x21; + s1 = (c1 << 8) | c2; + s2 = 1; + } + } + + if ((s1 <= 0) || (s1 >= 0x8080 && s2 == 0)) { /* not found or X 0212 */ + s1 = -1; + c1 = 0; + + if (c == 0) { + s1 = 0; + } else if (s1 <= 0) { + s1 = -1; + } + } + + if (s1 >= 0) { + if (s1 < 0x100) { /* latin or kana */ + CK((*filter->output_function)(s1, filter->data)); + } else { /* kanji */ + c1 = (s1 >> 8) & 0xff; + c2 = s1 & 0xff; + SJIS_ENCODE(c1, c2, s1, s2); + CK((*filter->output_function)(s1, filter->data)); + CK((*filter->output_function)(s2, filter->data)); + } + } else { + CK(mbfl_filt_conv_illegal_output(c, filter)); + } + break; + + case 2: + c1 = filter->cache; + filter->cache = 0; + filter->status = 0; + if (c1 == 0xf860) { + for (i = 0; i < 5; i++) { + if (c == code_tbl_m[i][2]) { + filter->cache = c | 0x10000; + filter->status = 3; + break; + } + } + } else if (c1 == 0xf861) { + for (i = 0; i < 3; i++) { + if (c == code_tbl_m[i+5][2]) { + filter->cache = c | 0x20000; + filter->status = 3; + break; + } + } + } else if (c1 == 0xf862) { + for (i = 0; i < 4; i++) { + if (c == code_tbl_m[i+5+3][2]) { + filter->cache = c | 0x40000; + filter->status = 3; + break; + } + } + } + + if (filter->status == 0) { + /* Didn't find any of expected codepoints after Apple transcoding hint */ + CK(mbfl_filt_conv_illegal_output(c1, filter)); + return mbfl_filt_conv_wchar_sjis_mac(c, filter); + } + break; + + case 3: + s1 = 0; + c1 = filter->cache & 0xffff; + mode = (filter->cache & 0xf0000) >> 16; + + filter->cache = filter->status = 0; + + if (mode == 0x1) { + for (i = 0; i < 5; i++) { + if (c1 == code_tbl_m[i][2] && c == code_tbl_m[i][3]) { + s1 = code_tbl_m[i][0]; + break; + } + } + + if (s1 > 0) { + c1 = s1/94+0x21; + c2 = s1-94*(c1-0x21)+0x21; + SJIS_ENCODE(c1, c2, s1, s2); + CK((*filter->output_function)(s1, filter->data)); + CK((*filter->output_function)(s2, filter->data)); + } else { + CK(mbfl_filt_conv_illegal_output(0xf860, filter)); + CK(mbfl_filt_conv_illegal_output(c1, filter)); + CK(mbfl_filt_conv_illegal_output(c, filter)); + } + } else if (mode == 0x2) { + for (i = 0; i < 3; i++) { + if (c1 == code_tbl_m[i+5][2] && c == code_tbl_m[i+5][3]) { + filter->cache = c | 0x20000; + filter->status = 4; + break; + } + } + } else if (mode == 0x4) { + for (i = 0; i < 4; i++) { + if (c1 == code_tbl_m[i+8][2] && c == code_tbl_m[i+8][3]) { + filter->cache = c | 0x40000; + filter->status = 4; + break; + } + } + } + break; + + case 4: + s1 = 0; + c1 = filter->cache & 0xffff; + mode = (filter->cache & 0xf0000) >> 16; + + filter->cache = 0; + filter->status = 0; + + if (mode == 0x2) { + for (i = 0; i < 3; i++) { + if (c1 == code_tbl_m[i+5][3] && c == code_tbl_m[i+5][4]) { + s1 = code_tbl_m[i+5][0]; + break; + } + } + + if (s1 > 0) { + c1 = s1/94+0x21; + c2 = s1-94*(c1-0x21)+0x21; + SJIS_ENCODE(c1, c2, s1, s2); + CK((*filter->output_function)(s1, filter->data)); + CK((*filter->output_function)(s2, filter->data)); + } else { + CK(mbfl_filt_conv_illegal_output(0xf861, filter)); + for (i = 0; i < 3; i++) { + if (c1 == code_tbl_m[i+5][3]) { + CK(mbfl_filt_conv_illegal_output(code_tbl_m[i+5][2], filter)); + break; + } + } + CK(mbfl_filt_conv_illegal_output(c1, filter)); + CK(mbfl_filt_conv_illegal_output(c, filter)); + } + } else if (mode == 0x4) { + for (i = 0; i < 4; i++) { + if (c1 == code_tbl_m[i+8][3] && c == code_tbl_m[i+8][4]) { + filter->cache = c | 0x40000; + filter->status = 5; + break; + } + } + } + break; + + case 5: + s1 = 0; + c1 = filter->cache & 0xffff; + mode = (filter->cache & 0xf0000) >> 16; + + filter->cache = filter->status = 0; + + if (mode == 0x4) { + for (i = 0; i < 4; i++) { + if (c1 == code_tbl_m[i+8][4] && c == code_tbl_m[i+8][5]) { + s1 = code_tbl_m[i+8][0]; + break; + } + } + + if (s1 > 0) { + c1 = s1/94+0x21; + c2 = s1-94*(c1-0x21)+0x21; + SJIS_ENCODE(c1, c2, s1, s2); + CK((*filter->output_function)(s1, filter->data)); + CK((*filter->output_function)(s2, filter->data)); + } else { + CK(mbfl_filt_conv_illegal_output(0xf862, filter)); + for (i = 0; i < 4; i++) { + if (c1 == code_tbl_m[i+8][4]) { + CK(mbfl_filt_conv_illegal_output( code_tbl_m[i+8][2], filter)); + CK(mbfl_filt_conv_illegal_output( code_tbl_m[i+8][3], filter)); + break; + } + } + CK(mbfl_filt_conv_illegal_output(c1, filter)); + CK(mbfl_filt_conv_illegal_output(c, filter)); + } + } + break; + + EMPTY_SWITCH_DEFAULT_CASE(); + } + + return 0; +} + +static int mbfl_filt_conv_wchar_sjis_mac_flush(mbfl_convert_filter *filter) +{ + int i, c1, s1 = 0; + if (filter->status == 1 && filter->cache > 0) { + c1 = filter->cache; + for (i=0;i 0) { + CK((*filter->output_function)((s1 >> 8) & 0xff, filter->data)); + CK((*filter->output_function)(s1 & 0xff, filter->data)); + } + } + filter->cache = 0; + filter->status = 0; + + if (filter->flush_function != NULL) { + return (*filter->flush_function)(filter->data); + } + + return 0; +} + +static size_t mb_sjismac_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) +{ + /* A single SJIS-Mac kuten code can convert to up to 5 Unicode codepoints, oh my! */ + ZEND_ASSERT(bufsize >= 5); + + unsigned char *p = *in, *e = p + *in_len; + uint32_t *out = buf, *limit = buf + bufsize; + + while (p < e && out < limit) { + unsigned char c = *p++; + + if (c <= 0x80 || c == 0xA0) { + if (c == 0x5C) { + *out++ = 0xA5; + } else if (c == 0x80) { + *out++ = 0x5C; + } else { + *out++ = c; + } + } else if (c >= 0xA1 && c <= 0xDF) { + *out++ = 0xFEC0 + c; + } else if (c <= 0xED) { + if (p == e) { + *out++ = MBFL_BAD_INPUT; + break; + } + unsigned char c2 = *p++; + uint32_t w = sjis_decode_tbl1[c] + sjis_decode_tbl2[c2]; + + if (w <= 0x89) { + if (w == 0x1C) { + *out++ = 0x2014; /* EM DASH */ + continue; + } else if (w == 0x1F) { + *out++ = 0xFF3C; /* FULLWIDTH REVERSE SOLIDUS */ + continue; + } else if (w == 0x20) { + *out++ = 0x301C; /* FULLWIDTH TILDE */ + continue; + } else if (w == 0x21) { + *out++ = 0x2016; /* PARALLEL TO */ + continue; + } else if (w == 0x3C) { + *out++ = 0x2212; /* FULLWIDTH HYPHEN-MINUS */ + continue; + } else if (w == 0x50) { + *out++ = 0xA2; /* FULLWIDTH CENT SIGN */ + continue; + } else if (w == 0x51) { + *out++ = 0xA3; /* FULLWIDTH POUND SIGN */ + continue; + } else if (w == 0x89) { + *out++ = 0xAC; /* FULLWIDTH NOT SIGN */ + continue; + } + } else { + if (w >= 0x2F0 && w <= 0x3A3) { + for (int i = 0; i < 7; i++) { + if (w >= code_tbl[i][0] && w <= code_tbl[i][1]) { + *out++ = w - code_tbl[i][0] + code_tbl[i][2]; + goto next_iteration; + } + } + } + + if (w >= 0x340 && w <= 0x523) { + for (int i = 0; i < code_tbl_m_len; i++) { + if (w == code_tbl_m[i][0]) { + int n = 5; + if (code_tbl_m[i][1] == 0xF860) { + n = 3; + } else if (code_tbl_m[i][1] == 0xF861) { + n = 4; + } + if ((limit - out) < n) { + p -= 2; + goto finished; + } + for (int j = 1; j <= n; j++) { + *out++ = code_tbl_m[i][j]; + } + goto next_iteration; + } + } + } + + if (w >= 0x3AC && w <= 0x20A5) { + for (int i = 0; i < 8; i++) { + if (w >= code_ofst_tbl[i][0] && w <= code_ofst_tbl[i][1]) { + uint32_t w2 = code_map[i][w - code_ofst_tbl[i][0]]; + if (!w2) { + *out++ = MBFL_BAD_INPUT; + goto next_iteration; + } + if ((limit - out) < 2) { + p -= 2; + goto finished; + } + *out++ = w2; + if (w >= 0x43E && w <= 0x441) { + *out++ = 0xF87A; + } else if (w == 0x3B1 || w == 0x3B7) { + *out++ = 0xF87F; + } else if (w == 0x4B8 || w == 0x4B9 || w == 0x4C4) { + *out++ = 0x20DD; + } else if (w == 0x1ED9 || w == 0x1EDA || w == 0x1EE8 || w == 0x1EF3 || (w >= 0x1EF5 && w <= 0x1EFB) || w == 0x1F05 || w == 0x1F06 || w == 0x1F18 || (w >= 0x1FF2 && w <= 0x20A5)) { + *out++ = 0xF87E; + } + goto next_iteration; + } + } + } + } + + if (w < jisx0208_ucs_table_size) { + w = jisx0208_ucs_table[w]; + if (!w) + w = MBFL_BAD_INPUT; + *out++ = w; + } else { + *out++ = MBFL_BAD_INPUT; + } + } else if (c == 0xFD) { + *out++ = 0xA9; + } else if (c == 0xFE) { + *out++ = 0x2122; + } else if (c == 0xFF) { + if ((limit - out) < 2) { + p--; + break; + } + *out++ = 0x2026; + *out++ = 0xF87F; + } else { + *out++ = MBFL_BAD_INPUT; + } +next_iteration: ; + } + +finished: + *in_len = e - p; + *in = p; + return out - buf; +} + +static bool process_s_form(uint32_t w, uint32_t w2, unsigned int *s) +{ + if (w2 == 0xF87A) { + for (int i = 0; i < 4; i++) { + if (w == s_form_tbl[i+34+3+3]) { + *s = s_form_sjis_tbl[i+34+3+3]; + return true; + } + } + } else if (w2 == 0x20DD) { + for (int i = 0; i < 3; i++) { + if (w == s_form_tbl[i+34+3]) { + *s = s_form_sjis_tbl[i+34+3]; + return true; + } + } + } else if (w2 == 0xF87F) { + for (int i = 0; i < 3; i++) { + if (w == s_form_tbl[i+34]) { + *s = s_form_sjis_tbl[i+34]; + return true; + } + } + } else if (w2 == 0xF87E) { + for (int i = 0; i < 34; i++) { + if (w == s_form_tbl[i]) { + *s = s_form_sjis_tbl[i]; + return true; + } + } + } + + return false; +} + +/* For codepoints F860-F862, which are treated specially in MacJapanese */ +static int transcoding_hint_cp_width[3] = { 3, 4, 5 }; + +static void mb_wchar_to_sjismac(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) +{ + unsigned char *out, *limit; + MB_CONVERT_BUF_LOAD(buf, out, limit); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len); + + uint32_t w; + + if (buf->state) { + w = buf->state & 0xFFFF; + if (buf->state & 0xFF000000L) { + goto resume_transcoding_hint; + } else { + buf->state = 0; + goto process_codepoint; + } + } + + while (len--) { + w = *in++; +process_codepoint: ; + unsigned int s = 0; + + if (w >= ucs_a1_jis_table_min && w < ucs_a1_jis_table_max) { + if (w == 0x5C) { + s = 0x80; + } else if (w == 0xA9) { + s = 0xFD; + } else { + s = ucs_a1_jis_table[w - ucs_a1_jis_table_min]; + } + } else if (w >= ucs_a2_jis_table_min && w < ucs_a2_jis_table_max) { + if (w == 0x2122) { + s = 0xFE; + } else if (w == 0x2014) { + s = 0x213D; + } else if (w == 0x2116) { + s = 0x2C1D; + } else { + s = ucs_a2_jis_table[w - ucs_a2_jis_table_min]; + } + } else if (w >= ucs_i_jis_table_min && w < ucs_i_jis_table_max) { + s = ucs_i_jis_table[w - ucs_i_jis_table_min]; + } else if (w >= ucs_r_jis_table_min && w < ucs_r_jis_table_max) { + s = ucs_r_jis_table[w - ucs_r_jis_table_min]; + } + + if (w >= 0x2000) { + for (int i = 0; i < s_form_tbl_len; i++) { + if (w == s_form_tbl[i]) { + if (!len) { + if (end) { + s = s_form_sjis_fallback_tbl[i]; + if (s) { + MB_CONVERT_BUF_ENSURE(buf, out, limit, 2); + out = mb_convert_buf_add2(out, (s >> 8) & 0xFF, s & 0xFF); + } else { + MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_sjismac); + } + } else { + buf->state = w; + } + MB_CONVERT_BUF_STORE(buf, out, limit); + return; + } + uint32_t w2 = *in++; + len--; + + if (!process_s_form(w, w2, &s)) { + in--; len++; + + for (int i = 0; i < s_form_tbl_len; i++) { + if (w == s_form_tbl[i]) { + s = s_form_sjis_fallback_tbl[i]; + break; + } + } + } + + if (s <= 0xFF) { + out = mb_convert_buf_add(out, s); + } else { + MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2); + out = mb_convert_buf_add2(out, (s >> 8) & 0xFF, s & 0xFF); + } + + goto next_iteration; + } + } + + if (w == 0xF860 || w == 0xF861 || w == 0xF862) { + /* Apple 'transcoding hint' codepoints (from private use area) */ + if (!len) { + if (end) { + MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_sjismac); + } else { + buf->state = w; + } + MB_CONVERT_BUF_STORE(buf, out, limit); + return; + } + + uint32_t w2 = *in++; + len--; + + for (int i = 0; i < code_tbl_m_len; i++) { + if (w == code_tbl_m[i][1] && w2 == code_tbl_m[i][2]) { + /* This might be a valid transcoding hint sequence */ + int index = 3; + + if (buf->state) { +resume_transcoding_hint: + i = buf->state >> 24; + index = (buf->state >> 16) & 0xFF; + buf->state = 0; + } + + int expected = transcoding_hint_cp_width[w - 0xF860]; + + while (index <= expected) { + if (!len) { + if (end) { + for (int j = 1; j < index; j++) { + MB_CONVERT_ERROR(buf, out, limit, code_tbl_m[i][j], mb_wchar_to_sjismac); + } + } else { + buf->state = (i << 24) | (index << 16) | (w & 0xFFFF); + } + MB_CONVERT_BUF_STORE(buf, out, limit); + return; + } + + w2 = *in++; + len--; + + if (w2 != code_tbl_m[i][index]) { + /* Didn't match */ + for (int j = 1; j < index; j++) { + MB_CONVERT_ERROR(buf, out, limit, code_tbl_m[i][j], mb_wchar_to_sjismac); + } + MB_CONVERT_ERROR(buf, out, limit, w2, mb_wchar_to_sjismac); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len); + goto next_iteration; + } + + index++; + } + + /* Successful match, emit SJIS-mac bytes */ + s = code_tbl_m[i][0]; + unsigned int c1 = (s / 94) + 0x21, c2 = (s % 94) + 0x21, s1, s2; + SJIS_ENCODE(c1, c2, s1, s2); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2); + out = mb_convert_buf_add2(out, s1, s2); + goto next_iteration; + } + } + + /* No valid transcoding hint sequence found */ + in--; len++; + MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_sjismac); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len); + continue; + } + } + + if (!s) { + if (w == 0xA0) { + s = 0xA0; + } else if (w == 0xA5) { /* YEN SIGN */ + /* Unicode has codepoint 0xFFE5 for a fullwidth Yen sign; + * convert codepoint 0xA5 to halfwidth Yen sign */ + s = 0x5C; /* HALFWIDTH YEN SIGN */ + } else if (w == 0xFF3C) { /* FULLWIDTH REVERSE SOLIDUS */ + s = 0x2140; + } else { + for (int i = 0; i < wchar2sjis_mac_r_tbl_len; i++) { + if (w >= wchar2sjis_mac_r_tbl[i][0] && w <= wchar2sjis_mac_r_tbl[i][1]) { + s = w - wchar2sjis_mac_r_tbl[i][0] + wchar2sjis_mac_r_tbl[i][2]; + s = (((s / 94) + 0x21) << 8) | ((s % 94) + 0x21); + goto found_kuten_code; + } + } + + for (int i = 0; i < wchar2sjis_mac_r_map_len; i++) { + if (w >= wchar2sjis_mac_r_map[i][0] && w <= wchar2sjis_mac_r_map[i][1]) { + s = wchar2sjis_mac_code_map[i][w - wchar2sjis_mac_r_map[i][0]]; + if (s) { + s = (((s / 94) + 0x21) << 8) | ((s % 94) + 0x21); + goto found_kuten_code; + } + } + } + + for (int i = 0; i < wchar2sjis_mac_wchar_tbl_len; i++) { + if (w == wchar2sjis_mac_wchar_tbl[i][0]) { + s = wchar2sjis_mac_wchar_tbl[i][1]; + s = (((s / 94) + 0x21) << 8) | ((s % 94) + 0x21); + goto found_kuten_code; + } + } + } + } + +found_kuten_code: + if ((!s && w) || s >= 0x8080) { + MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_sjismac); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len); + } else if (s <= 0xFF) { + out = mb_convert_buf_add(out, s); + } else { + unsigned int c1 = (s >> 8) & 0xFF, c2 = s & 0xFF, s1, s2; + SJIS_ENCODE(c1, c2, s1, s2); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2); + out = mb_convert_buf_add2(out, s1, s2); + } + +next_iteration: ; + } + + MB_CONVERT_BUF_STORE(buf, out, limit); +} + +int mbfilter_sjis_emoji_docomo2unicode(int s, int *snd) +{ + /* All three mobile vendors had emoji for numbers on a telephone keypad + * Unicode doesn't have those, but it has a combining character which puts + * a 'keypad button' around the following character, making it look like + * a key on a telephone or keyboard. That combining char is codepoint 0x20E3. */ + if (s >= mb_tbl_code2uni_docomo1_min && s <= mb_tbl_code2uni_docomo1_max) { + if ((s >= DOCOMO_KEYPAD(1) && s <= DOCOMO_KEYPAD(9)) || s == DOCOMO_KEYPAD(0) || s == DOCOMO_KEYPAD_HASH) { + EMIT_KEYPAD_EMOJI(convert_emoji_cp(mb_tbl_code2uni_docomo1[s - mb_tbl_code2uni_docomo1_min])); + } else { + *snd = 0; + return convert_emoji_cp(mb_tbl_code2uni_docomo1[s - mb_tbl_code2uni_docomo1_min]); + } + } + return 0; +} + +int mbfilter_sjis_emoji_sb2unicode(int s, int *snd) +{ + if (s >= mb_tbl_code2uni_sb1_min && s <= mb_tbl_code2uni_sb1_max) { + if (s == 0x2817 || (s >= 0x2823 && s <= 0x282C)) { + EMIT_KEYPAD_EMOJI(mb_tbl_code2uni_sb1[s - mb_tbl_code2uni_sb1_min]); + } else { + *snd = 0; + return convert_emoji_cp(mb_tbl_code2uni_sb1[s - mb_tbl_code2uni_sb1_min]); + } + } else if (s >= mb_tbl_code2uni_sb2_min && s <= mb_tbl_code2uni_sb2_max) { + *snd = 0; + return convert_emoji_cp(mb_tbl_code2uni_sb2[s - mb_tbl_code2uni_sb2_min]); + } else if (s >= mb_tbl_code2uni_sb3_min && s <= mb_tbl_code2uni_sb3_max) { + if (s >= 0x2B02 && s <= 0x2B0B) { + EMIT_FLAG_EMOJI(nflags_sb[s - 0x2B02]); + } else { + *snd = 0; + return convert_emoji_cp(mb_tbl_code2uni_sb3[s - mb_tbl_code2uni_sb3_min]); + } + } + return 0; +} + +int mbfilter_unicode2sjis_emoji_docomo(int c, int *s1, mbfl_convert_filter *filter) +{ + /* When converting SJIS-Mobile to Unicode, we convert keypad symbol emoji + * to a sequence of 2 codepoints, one of which is a combining character which + * adds the 'key' image around the other + * + * In the other direction, look for such sequences and convert them to a + * single emoji */ + if (filter->status == 1) { + int c1 = filter->cache; + filter->cache = filter->status = 0; + if (c == 0x20E3) { + if (c1 == '#') { + *s1 = 0x2964; + } else if (c1 == '0') { + *s1 = 0x296F; + } else { /* Previous character was '1'-'9' */ + *s1 = 0x2966 + (c1 - '1'); + } + return 1; + } else { + /* This character wasn't combining character to make keypad symbol, + * so pass the previous character through... and proceed to process the + * current character as usual + * (Single-byte ASCII characters are valid in Shift-JIS...) */ + CK((*filter->output_function)(c1, filter->data)); + } + } + + if (c == '#' || (c >= '0' && c <= '9')) { + filter->status = 1; + filter->cache = c; + return 0; + } + + if (c == 0xA9) { /* Copyright sign */ + *s1 = 0x29B5; + return 1; + } else if (c == 0x00AE) { /* Registered sign */ + *s1 = 0x29BA; + return 1; + } else if (c >= mb_tbl_uni_docomo2code2_min && c <= mb_tbl_uni_docomo2code2_max) { + int i = mbfl_bisec_srch2(c, mb_tbl_uni_docomo2code2_key, mb_tbl_uni_docomo2code2_len); + if (i >= 0) { + *s1 = mb_tbl_uni_docomo2code2_value[i]; + return 1; + } + } else if (c >= mb_tbl_uni_docomo2code3_min && c <= mb_tbl_uni_docomo2code3_max) { + int i = mbfl_bisec_srch2(c - 0x10000, mb_tbl_uni_docomo2code3_key, mb_tbl_uni_docomo2code3_len); + if (i >= 0) { + *s1 = mb_tbl_uni_docomo2code3_value[i]; + return 1; + } + } else if (c >= mb_tbl_uni_docomo2code5_min && c <= mb_tbl_uni_docomo2code5_max) { + int i = mbfl_bisec_srch2(c - 0xF0000, mb_tbl_uni_docomo2code5_key, mb_tbl_uni_docomo2code5_len); + if (i >= 0) { + *s1 = mb_tbl_uni_docomo2code5_val[i]; + return 1; + } + } + return 0; +} + +int mbfilter_unicode2sjis_emoji_kddi_sjis(int c, int *s1, mbfl_convert_filter *filter) +{ + if (filter->status == 1) { + int c1 = filter->cache; + filter->cache = filter->status = 0; + if (c == 0x20E3) { + if (c1 == '#') { + *s1 = 0x25BC; + } else if (c1 == '0') { + *s1 = 0x2830; + } else { /* Previous character was '1'-'9' */ + *s1 = 0x27a6 + (c1 - '1'); + } + return 1; + } else { + CK((*filter->output_function)(c1, filter->data)); + } + } else if (filter->status == 2) { + int c1 = filter->cache; + filter->cache = filter->status = 0; + if (c >= NFLAGS('B') && c <= NFLAGS('U')) { /* B for GB, U for RU */ + for (int i = 0; i < 10; i++) { + if (c1 == NFLAGS(nflags_s[i][0]) && c == NFLAGS(nflags_s[i][1])) { + *s1 = nflags_code_kddi[i]; + return 1; + } + } + } + + /* If none of the KDDI national flag emoji matched, then we have no way + * to convert the previous codepoint... */ + mbfl_filt_conv_illegal_output(c1, filter); + } + + if (c == '#' || (c >= '0' && c <= '9')) { + filter->status = 1; + filter->cache = c; + return 0; + } else if (c >= NFLAGS('C') && c <= NFLAGS('U')) { /* C for CN, U for US */ + filter->status = 2; + filter->cache = c; + return 0; + } + + if (c == 0xA9) { /* Copyright sign */ + *s1 = 0x27DC; + return 1; + } else if (c == 0xAE) { /* Registered sign */ + *s1 = 0x27DD; + return 1; + } else if (c >= mb_tbl_uni_kddi2code2_min && c <= mb_tbl_uni_kddi2code2_max) { + int i = mbfl_bisec_srch2(c, mb_tbl_uni_kddi2code2_key, mb_tbl_uni_kddi2code2_len); + if (i >= 0) { + *s1 = mb_tbl_uni_kddi2code2_value[i]; + return 1; + } + } else if (c >= mb_tbl_uni_kddi2code3_min && c <= mb_tbl_uni_kddi2code3_max) { + int i = mbfl_bisec_srch2(c - 0x10000, mb_tbl_uni_kddi2code3_key, mb_tbl_uni_kddi2code3_len); + if (i >= 0) { + *s1 = mb_tbl_uni_kddi2code3_value[i]; + return 1; + } + } else if (c >= mb_tbl_uni_kddi2code5_min && c <= mb_tbl_uni_kddi2code5_max) { + int i = mbfl_bisec_srch2(c - 0xF0000, mb_tbl_uni_kddi2code5_key, mb_tbl_uni_kddi2code5_len); + if (i >= 0) { + *s1 = mb_tbl_uni_kddi2code5_val[i]; + return 1; + } + } + return 0; +} + +int mbfilter_unicode2sjis_emoji_sb(int c, int *s1, mbfl_convert_filter *filter) +{ + if (filter->status == 1) { + int c1 = filter->cache; + filter->cache = filter->status = 0; + if (c == 0x20E3) { + if (c1 == '#') { + *s1 = 0x2817; + } else if (c1 == '0') { + *s1 = 0x282c; + } else { /* Previous character was '1'-'9' */ + *s1 = 0x2823 + (c1 - '1'); + } + return 1; + } else { + (*filter->output_function)(c1, filter->data); + } + } else if (filter->status == 2) { + int c1 = filter->cache; + filter->cache = filter->status = 0; + if (c >= NFLAGS('B') && c <= NFLAGS('U')) { /* B for GB, U for RU */ + for (int i = 0; i < 10; i++) { + if (c1 == NFLAGS(nflags_s[i][0]) && c == NFLAGS(nflags_s[i][1])) { + *s1 = nflags_code_sb[i]; + return 1; + } + } + } + + /* If none of the SoftBank national flag emoji matched, then we have no way + * to convert the previous codepoint... */ + mbfl_filt_conv_illegal_output(c1, filter); + } + + if (c == '#' || (c >= '0' && c <= '9')) { + filter->status = 1; + filter->cache = c; + return 0; + } else if (c >= NFLAGS('C') && c <= NFLAGS('U')) { /* C for CN, U for US */ + filter->status = 2; + filter->cache = c; + return 0; + } + + if (c == 0xA9) { /* Copyright sign */ + *s1 = 0x2855; + return 1; + } else if (c == 0xAE) { /* Registered sign */ + *s1 = 0x2856; + return 1; + } else if (c >= mb_tbl_uni_sb2code2_min && c <= mb_tbl_uni_sb2code2_max) { + int i = mbfl_bisec_srch2(c, mb_tbl_uni_sb2code2_key, mb_tbl_uni_sb2code2_len); + if (i >= 0) { + *s1 = mb_tbl_uni_sb2code2_value[i]; + return 1; + } + } else if (c >= mb_tbl_uni_sb2code3_min && c <= mb_tbl_uni_sb2code3_max) { + int i = mbfl_bisec_srch2(c - 0x10000, mb_tbl_uni_sb2code3_key, mb_tbl_uni_sb2code3_len); + if (i >= 0) { + *s1 = mb_tbl_uni_sb2code3_value[i]; + return 1; + } + } else if (c >= mb_tbl_uni_sb2code5_min && c <= mb_tbl_uni_sb2code5_max) { + int i = mbfl_bisec_srch2(c - 0xF0000, mb_tbl_uni_sb2code5_key, mb_tbl_uni_sb2code5_len); + if (i >= 0) { + *s1 = mb_tbl_uni_sb2code5_val[i]; + return 1; + } + } + return 0; +} + +static int mbfl_filt_conv_sjis_mobile_wchar(int c, mbfl_convert_filter *filter) +{ + int c1, s, s1, s2, w, snd = 0; + + switch (filter->status) { + case 0: + if (c >= 0 && c < 0x80) { /* ASCII */ + if (filter->from == &mbfl_encoding_sjis_sb && c == 0x1B) { + /* ESC; escape sequences were used on older SoftBank phones for emoji */ + filter->cache = c; + filter->status = 2; + } else { + CK((*filter->output_function)(c, filter->data)); + } + } else if (c > 0xA0 && c < 0xE0) { /* Kana */ + CK((*filter->output_function)(0xFEC0 + c, filter->data)); + } else if (c > 0x80 && c < 0xFD && c != 0xA0) { /* Kanji, first byte */ + filter->status = 1; + filter->cache = c; + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + case 1: /* Kanji, second byte */ + filter->status = 0; + c1 = filter->cache; + if (c >= 0x40 && c <= 0xFC && c != 0x7F) { + w = 0; + SJIS_DECODE(c1, c, s1, s2); + s = ((s1 - 0x21) * 94) + s2 - 0x21; + if (s <= 137) { + if (s == 31) { + w = 0xFF3C; /* FULLWIDTH REVERSE SOLIDUS */ + } else if (s == 32) { + w = 0xFF5E; /* FULLWIDTH TILDE */ + } else if (s == 33) { + w = 0x2225; /* PARALLEL TO */ + } else if (s == 60) { + w = 0xFF0D; /* FULLWIDTH HYPHEN-MINUS */ + } else if (s == 80) { + w = 0xFFE0; /* FULLWIDTH CENT SIGN */ + } else if (s == 81) { + w = 0xFFE1; /* FULLWIDTH POUND SIGN */ + } else if (s == 137) { + w = 0xFFE2; /* FULLWIDTH NOT SIGN */ + } + } + if (w == 0) { + if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) { /* vendor ext1 (13ku) */ + w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min]; + } else if (s >= 0 && s < jisx0208_ucs_table_size) { /* X 0208 */ + w = jisx0208_ucs_table[s]; + } else if (s >= cp932ext2_ucs_table_min && s < cp932ext2_ucs_table_max) { /* vendor ext2 (89ku - 92ku) */ + w = cp932ext2_ucs_table[s - cp932ext2_ucs_table_min]; + } + + /* Emoji */ + if (filter->from == &mbfl_encoding_sjis_docomo && s >= mb_tbl_code2uni_docomo1_min && s <= mb_tbl_code2uni_docomo1_max) { + w = mbfilter_sjis_emoji_docomo2unicode(s, &snd); + if (snd > 0) { + CK((*filter->output_function)(snd, filter->data)); + } + } else if (filter->from == &mbfl_encoding_sjis_kddi && s >= mb_tbl_code2uni_kddi1_min && s <= mb_tbl_code2uni_kddi2_max) { + w = mbfilter_sjis_emoji_kddi2unicode(s, &snd); + if (snd > 0) { + CK((*filter->output_function)(snd, filter->data)); + } + } else if (filter->from == &mbfl_encoding_sjis_sb && s >= mb_tbl_code2uni_sb1_min && s <= mb_tbl_code2uni_sb3_max) { + w = mbfilter_sjis_emoji_sb2unicode(s, &snd); + if (snd > 0) { + CK((*filter->output_function)(snd, filter->data)); + } + } + + if (w == 0) { + if (s >= cp932ext3_ucs_table_min && s < cp932ext3_ucs_table_max) { /* vendor ext3 (115ku - 119ku) */ + w = cp932ext3_ucs_table[s - cp932ext3_ucs_table_min]; + } else if (s >= (94*94) && s < (114*94)) { /* user (95ku - 114ku) */ + w = s - (94*94) + 0xe000; + } + } + } + if (w <= 0) { + w = MBFL_BAD_INPUT; + } + CK((*filter->output_function)(w, filter->data)); + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + /* ESC: Softbank Emoji */ + case 2: + if (c == '$') { + filter->cache = c; + filter->status++; + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + filter->status = filter->cache = 0; + } + break; + + /* ESC $: Softbank Emoji */ + case 3: + if ((c >= 'E' && c <= 'G') || (c >= 'O' && c <= 'Q')) { + filter->cache = c; + filter->status++; + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + filter->status = filter->cache = 0; + } + break; + + /* ESC $ [GEFOPQ]: Softbank Emoji */ + case 4: + c1 = filter->cache; + if (c == 0xF) { /* Terminate sequence of emoji */ + filter->status = filter->cache = 0; + return 0; + } else { + if (c1 == 'G' && c >= 0x21 && c <= 0x7a) { + s1 = (0x91 - 0x21) * 94; + } else if (c1 == 'E' && c >= 0x21 && c <= 0x7A) { + s1 = (0x8D - 0x21) * 94; + } else if (c1 == 'F' && c >= 0x21 && c <= 0x7A) { + s1 = (0x8E - 0x21) * 94; + } else if (c1 == 'O' && c >= 0x21 && c <= 0x6D) { + s1 = (0x92 - 0x21) * 94; + } else if (c1 == 'P' && c >= 0x21 && c <= 0x6C) { + s1 = (0x95 - 0x21) * 94; + } else if (c1 == 'Q' && c >= 0x21 && c <= 0x5E) { + s1 = (0x96 - 0x21) * 94; + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + filter->status = filter->cache = 0; + return 0; + } + + w = mbfilter_sjis_emoji_sb2unicode(s1 + c - 0x21, &snd); + if (w > 0) { + if (snd > 0) { + CK((*filter->output_function)(snd, filter->data)); + } + CK((*filter->output_function)(w, filter->data)); + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + filter->status = filter->cache = 0; + } + } + } + + return 0; +} + +static int mbfl_filt_conv_wchar_sjis_mobile(int c, mbfl_convert_filter *filter) +{ + int c1, c2, s1 = 0, s2 = 0; + + if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) { + s1 = ucs_a1_jis_table[c - ucs_a1_jis_table_min]; + } else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) { + s1 = ucs_a2_jis_table[c - ucs_a2_jis_table_min]; + } else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) { + s1 = ucs_i_jis_table[c - ucs_i_jis_table_min]; + } else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) { + s1 = ucs_r_jis_table[c - ucs_r_jis_table_min]; + } else if (c >= 0xE000 && c < (0xE000 + 20*94)) { + /* Private User Area (95ku - 114ku) */ + s1 = c - 0xE000; + c1 = (s1 / 94) + 0x7F; + c2 = (s1 % 94) + 0x21; + s1 = (c1 << 8) | c2; + s2 = 1; + } + + if (s1 <= 0) { + if (c == 0xA5) { /* YEN SIGN */ + s1 = 0x216F; /* FULLWIDTH YEN SIGN */ + } else if (c == 0xFF3c) { /* FULLWIDTH REVERSE SOLIDUS */ + s1 = 0x2140; + } else if (c == 0x2225) { /* PARALLEL TO */ + s1 = 0x2142; + } else if (c == 0xFF0D) { /* FULLWIDTH HYPHEN-MINUS */ + s1 = 0x215D; + } else if (c == 0xFFE0) { /* FULLWIDTH CENT SIGN */ + s1 = 0x2171; + } else if (c == 0xFFE1) { /* FULLWIDTH POUND SIGN */ + s1 = 0x2172; + } else if (c == 0xFFE2) { /* FULLWIDTH NOT SIGN */ + s1 = 0x224C; + } + } + + if ((s1 <= 0) || (s1 >= 0x8080 && s2 == 0)) { /* not found or X 0212 */ + s1 = -1; + + /* CP932 vendor ext1 (13ku) */ + for (c1 = 0; c1 < cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; c1++) { + if (c == cp932ext1_ucs_table[c1]) { + s1 = (((c1 / 94) + 0x2D) << 8) + (c1 % 94) + 0x21; + break; + } + } + + if (s1 <= 0) { + /* CP932 vendor ext2 (115ku - 119ku) */ + for (c1 = 0; c1 < cp932ext2_ucs_table_max - cp932ext2_ucs_table_min; c1++) { + if (c == cp932ext2_ucs_table[c1]) { + s1 = (((c1 / 94) + 0x79) << 8) + (c1 % 94) + 0x21; + break; + } + } + } + + if (c == 0) { + s1 = 0; + } + } + + if ((filter->to == &mbfl_encoding_sjis_docomo && mbfilter_unicode2sjis_emoji_docomo(c, &s1, filter)) || + (filter->to == &mbfl_encoding_sjis_kddi && mbfilter_unicode2sjis_emoji_kddi_sjis(c, &s1, filter)) || + (filter->to == &mbfl_encoding_sjis_sb && mbfilter_unicode2sjis_emoji_sb(c, &s1, filter))) { + s1 = (((s1 / 94) + 0x21) << 8) | ((s1 % 94) + 0x21); + } + + if (filter->status) { + return 0; + } + + if (s1 >= 0) { + if (s1 < 0x100) { /* Latin/Kana */ + CK((*filter->output_function)(s1, filter->data)); + } else { /* Kanji */ + c1 = (s1 >> 8) & 0xff; + c2 = s1 & 0xff; + SJIS_ENCODE(c1, c2, s1, s2); + CK((*filter->output_function)(s1, filter->data)); + CK((*filter->output_function)(s2, filter->data)); + } + } else { + CK(mbfl_filt_conv_illegal_output(c, filter)); + } + + return 0; +} + +int mbfl_filt_conv_sjis_mobile_flush(mbfl_convert_filter *filter) +{ + int c1 = filter->cache; + if (filter->status == 1 && (c1 == '#' || (c1 >= '0' && c1 <= '9'))) { + filter->cache = filter->status = 0; + CK((*filter->output_function)(c1, filter->data)); + } else if (filter->status == 2) { + /* First of a pair of Regional Indicator codepoints came at the end of a string */ + filter->cache = filter->status = 0; + mbfl_filt_conv_illegal_output(c1, filter); + } + + if (filter->flush_function) { + (*filter->flush_function)(filter->data); + } + + return 0; +} + +static const unsigned short sjis_mobile_decode_tbl1[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xFFFF, 0, 188, 376, 564, 752, 940, 1128, 1316, 1504, 1692, 1880, 2068, 2256, 2444, 2632, 2820, 3008, 3196, 3384, 3572, 3760, 3948, 4136, 4324, 4512, 4700, 4888, 5076, 5264, 5452, 5640, 0xFFFF, -6016, -5828, -5640, -5452, -5264, -5076, -4888, -4700, -4512, -4324, -4136, -3948, -3760, -3572, -3384, -3196, -3008, -2820, -2632, -2444, -2256, -2068, -1880, -1692, -1504, -1316, -1128, -940, -752, -564, -376, -188, 0, 188, 376, 564, 752, 940, 1128, 1316, 1504, 1692, 1880, 2068, 2256, 2444, 2632, 2820, 3008, 3196, 3384, 3572, 3760, 3948, 4136, 4324, 4512, 4700, 4888, 5076, 5264, 5452, 5640, 5828, 6016, 6204, 6392, 6580, 6768, 6956, 7144, 7332, 7520, 7708, 7896, 8084, 8272, 8460, 8648, 8836, 9024, 9212, 9400, 9588, 9776, 9964, 10152, 10340, 10528, 10716, 10904, 11092, 0xFFFF, 0xFFFF, 0xFFFF +}; + +static size_t mb_sjis_docomo_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) +{ + unsigned char *p = *in, *e = p + *in_len; + /* Leave one extra space available in output buffer, since some iterations of + * main loop (below) may emit two wchars */ + uint32_t *out = buf, *limit = buf + bufsize - 1; + + while (p < e && out < limit) { + unsigned char c = *p++; + + if (c <= 0x7F) { + *out++ = c; + } else if (c >= 0xA1 && c <= 0xDF) { + /* Kana */ + *out++ = 0xFEC0 + c; + } else { + /* Kanji */ + if (p == e) { + *out++ = MBFL_BAD_INPUT; + break; + } + unsigned char c2 = *p++; + uint32_t w = sjis_mobile_decode_tbl1[c] + sjis_decode_tbl2[c2]; + + if (w <= 137) { + if (w == 31) { + *out++ = 0xFF3C; /* FULLWIDTH REVERSE SOLIDUS */ + continue; + } else if (w == 32) { + *out++ = 0xFF5E; /* FULLWIDTH TILDE */ + continue; + } else if (w == 33) { + *out++ = 0x2225; /* PARALLEL TO */ + continue; + } else if (w == 60) { + *out++ = 0xFF0D; /* FULLWIDTH HYPHEN-MINUS */ + continue; + } else if (w == 80) { + *out++ = 0xFFE0; /* FULLWIDTH CENT SIGN */ + continue; + } else if (w == 81) { + *out++ = 0xFFE1; /* FULLWIDTH POUND SIGN */ + continue; + } else if (w == 137) { + *out++ = 0xFFE2; /* FULLWIDTH NOT SIGN */ + continue; + } + } + + if (w >= mb_tbl_code2uni_docomo1_min && w <= mb_tbl_code2uni_docomo1_max) { + int snd = 0; + w = mbfilter_sjis_emoji_docomo2unicode(w, &snd); + if (snd) { + *out++ = snd; + } + } else if (w >= cp932ext1_ucs_table_min && w < cp932ext1_ucs_table_max) { + w = cp932ext1_ucs_table[w - cp932ext1_ucs_table_min]; + } else if (w < jisx0208_ucs_table_size) { + w = jisx0208_ucs_table[w]; + } else if (w >= cp932ext2_ucs_table_min && w < cp932ext2_ucs_table_max) { + w = cp932ext2_ucs_table[w - cp932ext2_ucs_table_min]; + } else if (w >= cp932ext3_ucs_table_min && w < cp932ext3_ucs_table_max) { + w = cp932ext3_ucs_table[w - cp932ext3_ucs_table_min]; + } else if (w >= (94*94) && w < (114*94)) { + w = w - (94*94) + 0xE000; + } else { + if (c == 0x80 || c == 0xA0 || c >= 0xFD) { + p--; + } + *out++ = MBFL_BAD_INPUT; + continue; + } + + *out++ = w ? w : MBFL_BAD_INPUT; + } + } + + *in_len = e - p; + *in = p; + return out - buf; +} + +static void mb_wchar_to_sjis_docomo(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) +{ + unsigned char *out, *limit; + MB_CONVERT_BUF_LOAD(buf, out, limit); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len + (buf->state ? 1 : 0)); + + uint32_t w; + unsigned int s = 0; + + if (buf->state) { + /* Continue what we were doing on the previous call */ + w = buf->state; + buf->state = 0; + goto reprocess_wchar; + } + + while (len--) { + w = *in++; +reprocess_wchar: + s = 0; + + if (w >= ucs_a1_jis_table_min && w < ucs_a1_jis_table_max) { + s = ucs_a1_jis_table[w - ucs_a1_jis_table_min]; + } else if (w >= ucs_a2_jis_table_min && w < ucs_a2_jis_table_max) { + s = ucs_a2_jis_table[w - ucs_a2_jis_table_min]; + } else if (w >= ucs_i_jis_table_min && w < ucs_i_jis_table_max) { + s = ucs_i_jis_table[w - ucs_i_jis_table_min]; + } else if (w >= ucs_r_jis_table_min && w < ucs_r_jis_table_max) { + s = ucs_r_jis_table[w - ucs_r_jis_table_min]; + } else if (w >= 0xE000 && w < (0xE000 + 20*94)) { + /* Private User Area (95ku - 114ku) */ + s = w - 0xE000; + s = (((s / 94) + 0x7F) << 8) | ((s % 94) + 0x21); + goto process_emoji; + } + + if (!s) { + if (w == 0xA5) { /* YEN SIGN */ + s = 0x216F; /* FULLWIDTH YEN SIGN */ + } else if (w == 0xFF3C) { /* FULLWIDTH REVERSE SOLIDUS */ + s = 0x2140; + } else if (w == 0x2225) { /* PARALLEL TO */ + s = 0x2142; + } else if (w == 0xFF0D) { /* FULLWIDTH HYPHEN-MINUS */ + s = 0x215D; + } else if (w == 0xFFE0) { /* FULLWIDTH CENT SIGN */ + s = 0x2171; + } else if (w == 0xFFE1) { /* FULLWIDTH POUND SIGN */ + s = 0x2172; + } else if (w == 0xFFE2) { /* FULLWIDTH NOT SIGN */ + s = 0x224C; + } + } + + if (w && (!s || s >= 0x8080)) { + s = 0; + + for (int i = 0; i < cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; i++) { + if (w == cp932ext1_ucs_table[i]) { + s = (((i / 94) + 0x2D) << 8) + (i % 94) + 0x21; + goto process_emoji; + } + } + + for (int i = 0; i < cp932ext2_ucs_table_max - cp932ext2_ucs_table_min; i++) { + if (w == cp932ext2_ucs_table[i]) { + s = (((i / 94) + 0x79) << 8) + (i % 94) + 0x21; + goto process_emoji; + } + } + } + +process_emoji: + /* When converting SJIS-Mobile to Unicode, we convert keypad symbol emoji + * to a sequence of 2 codepoints, one of which is a combining character which + * adds the 'key' image around the other + * + * In the other direction, look for such sequences and convert them to a + * single emoji */ + if (w == '#' || (w >= '0' && w <= '9')) { + if (!len) { + if (end) { + goto emit_output; + } else { + /* If we are at the end of the current buffer of codepoints, but another + * buffer is coming, then remember that we have to reprocess `w` */ + buf->state = w; + break; + } + } + uint32_t w2 = *in++; len--; + if (w2 == 0x20E3) { + if (w == '#') { + s = 0x2964; + } else if (w == '0') { + s = 0x296F; + } else { /* Previous character was '1'-'9' */ + s = 0x2966 + (w - '1'); + } + s = (((s / 94) + 0x21) << 8) | ((s % 94) + 0x21); + } else { + in--; len++; + } + } else if (w == 0xA9) { /* Copyright sign */ + s = (((0x29B5 / 94) + 0x21) << 8) | ((0x29B5 % 94) + 0x21); + } else if (w == 0xAE) { /* Registered sign */ + s = (((0x29BA / 94) + 0x21) << 8) | ((0x29BA % 94) + 0x21); + } else if (w >= mb_tbl_uni_docomo2code2_min && w <= mb_tbl_uni_docomo2code2_max) { + int i = mbfl_bisec_srch2(w, mb_tbl_uni_docomo2code2_key, mb_tbl_uni_docomo2code2_len); + if (i >= 0) { + s = mb_tbl_uni_docomo2code2_value[i]; + s = (((s / 94) + 0x21) << 8) | ((s % 94) + 0x21); + } + } else if (w >= mb_tbl_uni_docomo2code3_min && w <= mb_tbl_uni_docomo2code3_max) { + int i = mbfl_bisec_srch2(w - 0x10000, mb_tbl_uni_docomo2code3_key, mb_tbl_uni_docomo2code3_len); + if (i >= 0) { + s = mb_tbl_uni_docomo2code3_value[i]; + s = (((s / 94) + 0x21) << 8) | ((s % 94) + 0x21); + } + } else if (w >= mb_tbl_uni_docomo2code5_min && w <= mb_tbl_uni_docomo2code5_max) { + int i = mbfl_bisec_srch2(w - 0xF0000, mb_tbl_uni_docomo2code5_key, mb_tbl_uni_docomo2code5_len); + if (i >= 0) { + s = mb_tbl_uni_docomo2code5_val[i]; + s = (((s / 94) + 0x21) << 8) | ((s % 94) + 0x21); + } + } + +emit_output: + if (!s && w) { + MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_sjis_docomo); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len); + } else if (s <= 0xFF) { + out = mb_convert_buf_add(out, s); + } else { + unsigned int c1 = (s >> 8) & 0xFF, c2 = s & 0xFF, s1, s2; + SJIS_ENCODE(c1, c2, s1, s2); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2); + out = mb_convert_buf_add2(out, s1, s2); + } + } + + MB_CONVERT_BUF_STORE(buf, out, limit); +} + +static size_t mb_sjis_kddi_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) +{ + unsigned char *p = *in, *e = p + *in_len; + uint32_t *out = buf, *limit = buf + bufsize - 1; + + while (p < e && out < limit) { + unsigned char c = *p++; + + if (c <= 0x7F) { + *out++ = c; + } else if (c >= 0xA1 && c <= 0xDF) { + /* Kana */ + *out++ = 0xFEC0 + c; + } else { + /* Kanji */ + if (p == e) { + *out++ = MBFL_BAD_INPUT; + break; + } + unsigned char c2 = *p++; + uint32_t w = sjis_mobile_decode_tbl1[c] + sjis_decode_tbl2[c2]; + + if (w <= 137) { + if (w == 31) { + *out++ = 0xFF3C; /* FULLWIDTH REVERSE SOLIDUS */ + continue; + } else if (w == 32) { + *out++ = 0xFF5E; /* FULLWIDTH TILDE */ + continue; + } else if (w == 33) { + *out++ = 0x2225; /* PARALLEL TO */ + continue; + } else if (w == 60) { + *out++ = 0xFF0D; /* FULLWIDTH HYPHEN-MINUS */ + continue; + } else if (w == 80) { + *out++ = 0xFFE0; /* FULLWIDTH CENT SIGN */ + continue; + } else if (w == 81) { + *out++ = 0xFFE1; /* FULLWIDTH POUND SIGN */ + continue; + } else if (w == 137) { + *out++ = 0xFFE2; /* FULLWIDTH NOT SIGN */ + continue; + } + } + + if (w >= mb_tbl_code2uni_kddi1_min && w <= mb_tbl_code2uni_kddi2_max) { + int snd = 0; + w = mbfilter_sjis_emoji_kddi2unicode(w, &snd); + if (!w) { + w = sjis_mobile_decode_tbl1[c] + sjis_decode_tbl2[c2]; + if (w >= (94*94) && w < (114*94)) { + w = w - (94*94) + 0xE000; + } + } else if (snd) { + *out++ = snd; + } + } else if (w >= cp932ext1_ucs_table_min && w < cp932ext1_ucs_table_max) { + w = cp932ext1_ucs_table[w - cp932ext1_ucs_table_min]; + } else if (w < jisx0208_ucs_table_size) { + w = jisx0208_ucs_table[w]; + } else if (w >= cp932ext2_ucs_table_min && w < cp932ext2_ucs_table_max) { + w = cp932ext2_ucs_table[w - cp932ext2_ucs_table_min]; + } else if (w >= cp932ext3_ucs_table_min && w < cp932ext3_ucs_table_max) { + w = cp932ext3_ucs_table[w - cp932ext3_ucs_table_min]; + } else if (w >= (94*94) && w < (114*94)) { + w = w - (94*94) + 0xE000; + } else { + if (c == 0x80 || c == 0xA0 || c >= 0xFD) { + p--; + } + *out++ = MBFL_BAD_INPUT; + continue; + } + + *out++ = w ? w : MBFL_BAD_INPUT; + } + } + + *in_len = e - p; + *in = p; + return out - buf; +} + +static void mb_wchar_to_sjis_kddi(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) +{ + unsigned char *out, *limit; + MB_CONVERT_BUF_LOAD(buf, out, limit); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len + (buf->state ? 1 : 0)); + + uint32_t w; + unsigned int s = 0; + + if (buf->state) { + w = buf->state; + buf->state = 0; + goto reprocess_wchar; + } + + while (len--) { + w = *in++; +reprocess_wchar: + s = 0; + + if (w >= ucs_a1_jis_table_min && w < ucs_a1_jis_table_max) { + s = ucs_a1_jis_table[w - ucs_a1_jis_table_min]; + } else if (w >= ucs_a2_jis_table_min && w < ucs_a2_jis_table_max) { + s = ucs_a2_jis_table[w - ucs_a2_jis_table_min]; + } else if (w >= ucs_i_jis_table_min && w < ucs_i_jis_table_max) { + s = ucs_i_jis_table[w - ucs_i_jis_table_min]; + } else if (w >= ucs_r_jis_table_min && w < ucs_r_jis_table_max) { + s = ucs_r_jis_table[w - ucs_r_jis_table_min]; + } else if (w >= 0xE000 && w < (0xE000 + 20*94)) { + /* Private User Area (95ku - 114ku) */ + s = w - 0xE000; + s = (((s / 94) + 0x7F) << 8) | ((s % 94) + 0x21); + goto process_emoji; + } + + if (!s) { + if (w == 0xA5) { /* YEN SIGN */ + s = 0x216F; /* FULLWIDTH YEN SIGN */ + } else if (w == 0xFF3c) { /* FULLWIDTH REVERSE SOLIDUS */ + s = 0x2140; + } else if (w == 0x2225) { /* PARALLEL TO */ + s = 0x2142; + } else if (w == 0xFF0D) { /* FULLWIDTH HYPHEN-MINUS */ + s = 0x215D; + } else if (w == 0xFFE0) { /* FULLWIDTH CENT SIGN */ + s = 0x2171; + } else if (w == 0xFFE1) { /* FULLWIDTH POUND SIGN */ + s = 0x2172; + } else if (w == 0xFFE2) { /* FULLWIDTH NOT SIGN */ + s = 0x224C; + } + } + + if (w && (!s || s >= 0x8080)) { + s = 0; + + for (int i = 0; i < cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; i++) { + if (w == cp932ext1_ucs_table[i]) { + s = (((i / 94) + 0x2D) << 8) + (i % 94) + 0x21; + goto process_emoji; + } + } + + for (int i = 0; i < cp932ext2_ucs_table_max - cp932ext2_ucs_table_min; i++) { + if (w == cp932ext2_ucs_table[i]) { + s = (((i / 94) + 0x79) << 8) + (i % 94) + 0x21; + goto process_emoji; + } + } + } + +process_emoji: + if (w == '#' || (w >= '0' && w <= '9')) { + if (!len) { + if (end) { + goto emit_output; + } else { + /* If we are at the end of the current buffer of codepoints, but another + * buffer is coming, then remember that we have to reprocess `w` */ + buf->state = w; + break; + } + } + uint32_t w2 = *in++; len--; + if (w2 == 0x20E3) { + if (w == '#') { + s = 0x25BC; + } else if (w == '0') { + s = 0x2830; + } else { /* Previous character was '1'-'9' */ + s = 0x27A6 + (w - '1'); + } + s = (((s / 94) + 0x21) << 8) | ((s % 94) + 0x21); + } else { + in--; len++; + } + } else if (w >= NFLAGS('C') && w <= NFLAGS('U')) { /* C for CN, U for US */ + if (!len) { + if (end) { + MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_sjis_kddi); + } else { + /* Reprocess `w` when this function is called again with another buffer + * of wchars */ + buf->state = w; + } + break; + } + uint32_t w2 = *in++; len--; + if (w2 >= NFLAGS('B') && w2 <= NFLAGS('U')) { /* B for GB, U for RU */ + for (int i = 0; i < 10; i++) { + if (w == NFLAGS(nflags_s[i][0]) && w2 == NFLAGS(nflags_s[i][1])) { + s = nflags_code_kddi[i]; + s = (((s / 94) + 0x21) << 8) | ((s % 94) + 0x21); + goto emit_output; + } + } + } + in--; len++; + MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_sjis_kddi); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len); + continue; + } else if (w == 0xA9) { /* Copyright sign */ + s = (((0x27DC / 94) + 0x21) << 8) | ((0x27DC % 94) + 0x21); + } else if (w == 0xAE) { /* Registered sign */ + s = (((0x27DD / 94) + 0x21) << 8) | ((0x27DD % 94) + 0x21); + } else if (w >= mb_tbl_uni_kddi2code2_min && w <= mb_tbl_uni_kddi2code2_max) { + int i = mbfl_bisec_srch2(w, mb_tbl_uni_kddi2code2_key, mb_tbl_uni_kddi2code2_len); + if (i >= 0) { + s = mb_tbl_uni_kddi2code2_value[i]; + s = (((s / 94) + 0x21) << 8) | ((s % 94) + 0x21); + } + } else if (w >= mb_tbl_uni_kddi2code3_min && w <= mb_tbl_uni_kddi2code3_max) { + int i = mbfl_bisec_srch2(w - 0x10000, mb_tbl_uni_kddi2code3_key, mb_tbl_uni_kddi2code3_len); + if (i >= 0) { + s = mb_tbl_uni_kddi2code3_value[i]; + s = (((s / 94) + 0x21) << 8) | ((s % 94) + 0x21); + } + } else if (w >= mb_tbl_uni_kddi2code5_min && w <= mb_tbl_uni_kddi2code5_max) { + int i = mbfl_bisec_srch2(w - 0xF0000, mb_tbl_uni_kddi2code5_key, mb_tbl_uni_kddi2code5_len); + if (i >= 0) { + s = mb_tbl_uni_kddi2code5_val[i]; + s = (((s / 94) + 0x21) << 8) | ((s % 94) + 0x21); + } + } + +emit_output: + if (!s && w) { + MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_sjis_kddi); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len); + } else if (s <= 0xFF) { + out = mb_convert_buf_add(out, s); + } else { + unsigned int c1 = (s >> 8) & 0xFF, c2 = s & 0xFF, s1, s2; + SJIS_ENCODE(c1, c2, s1, s2); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2); + out = mb_convert_buf_add2(out, s1, s2); + } + } + + MB_CONVERT_BUF_STORE(buf, out, limit); +} + +static size_t mb_sjis_sb_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) +{ + unsigned char *p = *in, *e = p + *in_len; + uint32_t *out = buf, *limit = buf + bufsize - 1; + + if (*state) { + goto softbank_emoji_escapes; + } + + while (p < e && out < limit) { + unsigned char c = *p++; + + if (c == 0x1B) { + /* Escape sequence */ + if (p == e || *p++ != '$' || p == e) { + *out++ = MBFL_BAD_INPUT; + continue; + } + unsigned char c2 = *p++; + if ((c2 < 'E' || c2 > 'G') && (c2 < 'O' || c2 > 'Q')) { + *out++ = MBFL_BAD_INPUT; + continue; + } + /* Escape sequence was valid, next should be a series of specially + * encoded Softbank emoji */ + *state = c2; + +softbank_emoji_escapes: + while (p < e && out < limit) { + c = *p++; + if (c == 0xF) { + *state = 0; + break; + } + unsigned int s = 0; + if (*state == 'G' && c >= 0x21 && c <= 0x7A) { + s = (0x91 - 0x21) * 94; + } else if (*state == 'E' && c >= 0x21 && c <= 0x7A) { + s = (0x8D - 0x21) * 94; + } else if (*state == 'F' && c >= 0x21 && c <= 0x7A) { + s = (0x8E - 0x21) * 94; + } else if (*state == 'O' && c >= 0x21 && c <= 0x6D) { + s = (0x92 - 0x21) * 94; + } else if (*state == 'P' && c >= 0x21 && c <= 0x6C) { + s = (0x95 - 0x21) * 94; + } else if (*state == 'Q' && c >= 0x21 && c <= 0x5E) { + s = (0x96 - 0x21) * 94; + } else { + *out++ = MBFL_BAD_INPUT; + *state = 0; + break; + } + + int snd = 0; + uint32_t w = mbfilter_sjis_emoji_sb2unicode(s + c - 0x21, &snd); + if (w) { + if (snd) { + *out++ = snd; + } + *out++ = w; + } else { + *out++ = MBFL_BAD_INPUT; + *state = 0; + break; + } + } + } else if (c <= 0x7F) { + *out++ = c; + } else if (c >= 0xA1 && c <= 0xDF) { + /* Kana */ + *out++ = 0xFEC0 + c; + } else { + /* Kanji */ + if (p == e) { + *out++ = MBFL_BAD_INPUT; + break; + } + unsigned char c2 = *p++; + uint32_t w = sjis_mobile_decode_tbl1[c] + sjis_decode_tbl2[c2]; + + if (w <= 137) { + if (w == 31) { + *out++ = 0xFF3C; /* FULLWIDTH REVERSE SOLIDUS */ + continue; + } else if (w == 32) { + *out++ = 0xFF5E; /* FULLWIDTH TILDE */ + continue; + } else if (w == 33) { + *out++ = 0x2225; /* PARALLEL TO */ + continue; + } else if (w == 60) { + *out++ = 0xFF0D; /* FULLWIDTH HYPHEN-MINUS */ + continue; + } else if (w == 80) { + *out++ = 0xFFE0; /* FULLWIDTH CENT SIGN */ + continue; + } else if (w == 81) { + *out++ = 0xFFE1; /* FULLWIDTH POUND SIGN */ + continue; + } else if (w == 137) { + *out++ = 0xFFE2; /* FULLWIDTH NOT SIGN */ + continue; + } + } + + if (w >= mb_tbl_code2uni_sb1_min && w <= mb_tbl_code2uni_sb3_max) { + int snd = 0; + w = mbfilter_sjis_emoji_sb2unicode(w, &snd); + if (!w) { + w = sjis_mobile_decode_tbl1[c] + sjis_decode_tbl2[c2]; + if (w >= cp932ext3_ucs_table_min && w < cp932ext3_ucs_table_max) { + w = cp932ext3_ucs_table[w - cp932ext3_ucs_table_min]; + } else if (w >= (94*94) && w < (114*94)) { + w = w - (94*94) + 0xE000; + } + } else if (snd) { + *out++ = snd; + } + } else if (w >= cp932ext1_ucs_table_min && w < cp932ext1_ucs_table_max) { + w = cp932ext1_ucs_table[w - cp932ext1_ucs_table_min]; + } else if (w < jisx0208_ucs_table_size) { + w = jisx0208_ucs_table[w]; + } else if (w >= cp932ext2_ucs_table_min && w < cp932ext2_ucs_table_max) { + w = cp932ext2_ucs_table[w - cp932ext2_ucs_table_min]; + } else if (w >= cp932ext3_ucs_table_min && w < cp932ext3_ucs_table_max) { + w = cp932ext3_ucs_table[w - cp932ext3_ucs_table_min]; + } else if (w >= (94*94) && w < (114*94)) { + w = w - (94*94) + 0xE000; + } else { + if (c == 0x80 || c == 0xA0 || c >= 0xFD) { + p--; + } + *out++ = MBFL_BAD_INPUT; + continue; + } + + *out++ = w ? w : MBFL_BAD_INPUT; + } + } + + *in_len = e - p; + *in = p; + return out - buf; +} + +static void mb_wchar_to_sjis_sb(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) +{ + unsigned char *out, *limit; + MB_CONVERT_BUF_LOAD(buf, out, limit); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len + (buf->state ? 1 : 0)); + + uint32_t w; + unsigned int s = 0; + + if (buf->state) { + w = buf->state; + buf->state = 0; + goto reprocess_wchar; + } + + while (len--) { + w = *in++; +reprocess_wchar: + s = 0; + + if (w >= ucs_a1_jis_table_min && w < ucs_a1_jis_table_max) { + s = ucs_a1_jis_table[w - ucs_a1_jis_table_min]; + } else if (w >= ucs_a2_jis_table_min && w < ucs_a2_jis_table_max) { + s = ucs_a2_jis_table[w - ucs_a2_jis_table_min]; + } else if (w >= ucs_i_jis_table_min && w < ucs_i_jis_table_max) { + s = ucs_i_jis_table[w - ucs_i_jis_table_min]; + } else if (w >= ucs_r_jis_table_min && w < ucs_r_jis_table_max) { + s = ucs_r_jis_table[w - ucs_r_jis_table_min]; + } else if (w >= 0xE000 && w < (0xE000 + 20*94)) { + /* Private User Area (95ku - 114ku) */ + s = w - 0xE000; + s = (((s / 94) + 0x7F) << 8) | ((s % 94) + 0x21); + goto process_emoji; + } + + if (!s) { + if (w == 0xA5) { /* YEN SIGN */ + s = 0x216F; /* FULLWIDTH YEN SIGN */ + } else if (w == 0xFF3C) { /* FULLWIDTH REVERSE SOLIDUS */ + s = 0x2140; + } else if (w == 0x2225) { /* PARALLEL TO */ + s = 0x2142; + } else if (w == 0xFF0D) { /* FULLWIDTH HYPHEN-MINUS */ + s = 0x215D; + } else if (w == 0xFFE0) { /* FULLWIDTH CENT SIGN */ + s = 0x2171; + } else if (w == 0xFFE1) { /* FULLWIDTH POUND SIGN */ + s = 0x2172; + } else if (w == 0xFFE2) { /* FULLWIDTH NOT SIGN */ + s = 0x224C; + } + } + + if (w && (!s || s >= 0x8080)) { + s = 0; + + for (int i = 0; i < cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; i++) { + if (w == cp932ext1_ucs_table[i]) { + s = (((i / 94) + 0x2D) << 8) + (i % 94) + 0x21; + goto process_emoji; + } + } + + for (int i = 0; i < cp932ext2_ucs_table_max - cp932ext2_ucs_table_min; i++) { + if (w == cp932ext2_ucs_table[i]) { + s = (((i / 94) + 0x79) << 8) + (i % 94) + 0x21; + goto process_emoji; + } + } + } + +process_emoji: + if (w == '#' || (w >= '0' && w <= '9')) { + if (!len) { + if (end) { + goto emit_output; + } else { + /* If we are at the end of the current buffer of codepoints, but another + * buffer is coming, then remember that we have to reprocess `w` */ + buf->state = w; + break; + } + } + uint32_t w2 = *in++; len--; + if (w2 == 0x20E3) { + if (w == '#') { + s = 0x2817; + } else if (w == '0') { + s = 0x282c; + } else { /* Previous character was '1'-'9' */ + s = 0x2823 + (w - '1'); + } + s = (((s / 94) + 0x21) << 8) | ((s % 94) + 0x21); + } else { + in--; len++; + } + } else if (w >= NFLAGS('C') && w <= NFLAGS('U')) { /* C for CN, U for US */ + if (!len) { + if (end) { + MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_sjis_sb); + } else { + /* Reprocess `w` when this function is called again with + * another buffer of wchars */ + buf->state = w; + } + break; + } + uint32_t w2 = *in++; len--; + if (w2 >= NFLAGS('B') && w2 <= NFLAGS('U')) { /* B for GB, U for RU */ + for (int i = 0; i < 10; i++) { + if (w == NFLAGS(nflags_s[i][0]) && w2 == NFLAGS(nflags_s[i][1])) { + s = nflags_code_sb[i]; + s = (((s / 94) + 0x21) << 8) | ((s % 94) + 0x21); + goto emit_output; + } + } + } + in--; len++; + MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_sjis_sb); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len); + continue; + } else if (w == 0xA9) { /* Copyright sign */ + s = (((0x2855 / 94) + 0x21) << 8) | ((0x2855 % 94) + 0x21); + } else if (w == 0xAE) { /* Registered sign */ + s = (((0x2856 / 94) + 0x21) << 8) | ((0x2856 % 94) + 0x21); + } else if (w >= mb_tbl_uni_sb2code2_min && w <= mb_tbl_uni_sb2code2_max) { + int i = mbfl_bisec_srch2(w, mb_tbl_uni_sb2code2_key, mb_tbl_uni_sb2code2_len); + if (i >= 0) { + s = mb_tbl_uni_sb2code2_value[i]; + s = (((s / 94) + 0x21) << 8) | ((s % 94) + 0x21); + } + } else if (w >= mb_tbl_uni_sb2code3_min && w <= mb_tbl_uni_sb2code3_max) { + int i = mbfl_bisec_srch2(w - 0x10000, mb_tbl_uni_sb2code3_key, mb_tbl_uni_sb2code3_len); + if (i >= 0) { + s = mb_tbl_uni_sb2code3_value[i]; + s = (((s / 94) + 0x21) << 8) | ((s % 94) + 0x21); + } + } else if (w >= mb_tbl_uni_sb2code5_min && w <= mb_tbl_uni_sb2code5_max) { + int i = mbfl_bisec_srch2(w - 0xF0000, mb_tbl_uni_sb2code5_key, mb_tbl_uni_sb2code5_len); + if (i >= 0) { + s = mb_tbl_uni_sb2code5_val[i]; + s = (((s / 94) + 0x21) << 8) | ((s % 94) + 0x21); + } + } + +emit_output: + if (!s && w) { + MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_sjis_sb); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len); + } else if (s <= 0xFF) { + out = mb_convert_buf_add(out, s); + } else { + unsigned int c1 = (s >> 8) & 0xFF, c2 = s & 0xFF, s1, s2; + SJIS_ENCODE(c1, c2, s1, s2); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2); + out = mb_convert_buf_add2(out, s1, s2); + } + } + + MB_CONVERT_BUF_STORE(buf, out, limit); +} + +static size_t mb_sjis2004_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) +{ + unsigned char *p = *in, *e = p + *in_len; + uint32_t *out = buf, *limit = buf + bufsize - 1; + + while (p < e && out < limit) { + unsigned char c = *p++; + + if (c <= 0x7F) { + if (c == 0x5C) { + *out++ = 0xA5; + } else if (c == 0x7E) { + *out++ = 0x203E; + } else { + *out++ = c; + } + } else if (c >= 0xA1 && c <= 0xDF) { + *out++ = 0xFEC0 + c; + } else if (c > 0x80 && c < 0xFD && c != 0xA0) { + if (p == e) { + *out++ = MBFL_BAD_INPUT; + break; + } + unsigned char c2 = *p++; + + if (c2 < 0x40 || c2 > 0xFC || c2 == 0x7F) { + *out++ = MBFL_BAD_INPUT; + continue; + } + + unsigned int s1, s2; + SJIS_DECODE(c, c2, s1, s2); + unsigned int w1 = (s1 << 8) | s2, w = 0; + + /* Conversion for combining characters */ + if ((w1 >= 0x2477 && w1 <= 0x2479) || (w1 >= 0x2479 && w1 <= 0x247B) || (w1 >= 0x2577 && w1 <= 0x257E) || w1 == 0x2678 || w1 == 0x2B44 || (w1 >= 0x2B48 && w1 <= 0x2B4F) || (w1 >= 0x2B65 && w1 <= 0x2B66)) { + int k = mbfl_bisec_srch2(w1, jisx0213_u2_key, jisx0213_u2_tbl_len); + if (k >= 0) { + *out++ = jisx0213_u2_tbl[2*k]; + *out++ = jisx0213_u2_tbl[2*k+1]; + continue; + } + } + + /* Conversion for BMP */ + w1 = (s1 - 0x21)*94 + s2 - 0x21; + if (w1 < jisx0213_ucs_table_size) { + w = jisx0213_ucs_table[w1]; + } + + /* Conversion for CJK Unified Ideographs extension B (U+2XXXX) */ + if (!w) { + w1 = (s1 << 8) | s2; + int k = mbfl_bisec_srch2(w1, jisx0213_jis_u5_key, jisx0213_u5_tbl_len); + if (k >= 0) { + w = jisx0213_jis_u5_tbl[k] + 0x20000; + } + } + + *out++ = w ? w : MBFL_BAD_INPUT; + } else { + *out++ = MBFL_BAD_INPUT; + } + } + + *in_len = e - p; + *in = p; + return out - buf; +} + +static void mb_wchar_to_sjis2004(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) +{ + unsigned char *out, *limit; + MB_CONVERT_BUF_LOAD(buf, out, limit); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len); + + uint32_t w; + if (buf->state) { + w = buf->state; + buf->state = 0; + goto process_codepoint; + } + + while (len--) { + w = *in++; +process_codepoint: ; + unsigned int s = 0; + + if (w == 0xE6 || (w >= 0x254 && w <= 0x2E9) || (w >= 0x304B && w <= 0x3053) || (w >= 0x30AB && w <= 0x30C8) || w == 0x31F7) { + for (int k = 0; k < jisx0213_u2_tbl_len; k++) { + if (w == jisx0213_u2_tbl[2*k]) { + if (!len) { + if (!end) { + buf->state = w; + MB_CONVERT_BUF_STORE(buf, out, limit); + return; + } + } else { + uint32_t w2 = *in++; len--; + if ((w == 0x254 || w == 0x28C || w == 0x259 || w == 0x25A) && w2 == 0x301) { + k++; + } + if (w2 == jisx0213_u2_tbl[2*k+1]) { + s = jisx0213_u2_key[k]; + break; + } + in--; len++; + } + + /* Fallback */ + s = jisx0213_u2_fb_tbl[k]; + break; + } + } + } + + /* Check for major Japanese chars: U+4E00-U+9FFF */ + if (!s) { + for (int k = 0; k < uni2jis_tbl_len; k++) { + if (w >= uni2jis_tbl_range[k][0] && w <= uni2jis_tbl_range[k][1]) { + s = uni2jis_tbl[k][w - uni2jis_tbl_range[k][0]]; + break; + } + } + } + + /* Check for Japanese chars in compressed mapping area: U+1E00-U+4DBF */ + if (!s && w >= ucs_c1_jisx0213_min && w <= ucs_c1_jisx0213_max) { + int k = mbfl_bisec_srch(w, ucs_c1_jisx0213_tbl, ucs_c1_jisx0213_tbl_len); + if (k >= 0) { + s = ucs_c1_jisx0213_ofst[k] + w - ucs_c1_jisx0213_tbl[2*k]; + } + } + + /* Check for Japanese chars in CJK Unified Ideographs ext.B (U+2XXXX) */ + if (!s && w >= jisx0213_u5_tbl_min && w <= jisx0213_u5_tbl_max) { + int k = mbfl_bisec_srch2(w - 0x20000, jisx0213_u5_jis_key, jisx0213_u5_tbl_len); + if (k >= 0) { + s = jisx0213_u5_jis_tbl[k]; + } + } + + if (!s) { + /* CJK Compatibility Forms: U+FE30-U+FE4F */ + if (w == 0xFE45) { + s = 0x233E; + } else if (w == 0xFE46) { + s = 0x233D; + } else if (w >= 0xF91D && w <= 0xF9DC) { + /* CJK Compatibility Ideographs: U+F900-U+F92A */ + int k = mbfl_bisec_srch2(w, ucs_r2b_jisx0213_cmap_key, ucs_r2b_jisx0213_cmap_len); + if (k >= 0) { + s = ucs_r2b_jisx0213_cmap_val[k]; + } + } + } + + if (!s && w) { + MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_sjis2004); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len); + } else if (s <= 0xFF) { + out = mb_convert_buf_add(out, s); + } else { + unsigned int c1 = (s >> 8) & 0xFF, c2 = s & 0xFF, s1, s2; + SJIS_ENCODE(c1, c2, s1, s2); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2); + out = mb_convert_buf_add2(out, s1, s2); + } + } + + MB_CONVERT_BUF_STORE(buf, out, limit); +} + +static int mbfl_filt_conv_cp932_wchar(int c, mbfl_convert_filter *filter) +{ + int c1, s, s1, s2, w; + + switch (filter->status) { + case 0: + if (c >= 0 && c < 0x80) { /* latin */ + CK((*filter->output_function)(c, filter->data)); + } else if (c > 0xa0 && c < 0xe0) { /* kana */ + CK((*filter->output_function)(0xfec0 + c, filter->data)); + } else if (c > 0x80 && c < 0xfd && c != 0xa0) { /* kanji first char */ + filter->status = 1; + filter->cache = c; + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + case 1: /* kanji second char */ + filter->status = 0; + c1 = filter->cache; + if (c >= 0x40 && c <= 0xfc && c != 0x7f) { + w = 0; + SJIS_DECODE(c1, c, s1, s2); + s = (s1 - 0x21)*94 + s2 - 0x21; + if (s <= 137) { + if (s == 31) { + w = 0xff3c; /* FULLWIDTH REVERSE SOLIDUS */ + } else if (s == 32) { + w = 0xff5e; /* FULLWIDTH TILDE */ + } else if (s == 33) { + w = 0x2225; /* PARALLEL TO */ + } else if (s == 60) { + w = 0xff0d; /* FULLWIDTH HYPHEN-MINUS */ + } else if (s == 80) { + w = 0xffe0; /* FULLWIDTH CENT SIGN */ + } else if (s == 81) { + w = 0xffe1; /* FULLWIDTH POUND SIGN */ + } else if (s == 137) { + w = 0xffe2; /* FULLWIDTH NOT SIGN */ + } + } + if (w == 0) { + if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) { /* vendor ext1 (13ku) */ + w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min]; + } else if (s >= 0 && s < jisx0208_ucs_table_size) { /* X 0208 */ + w = jisx0208_ucs_table[s]; + } else if (s >= cp932ext2_ucs_table_min && s < cp932ext2_ucs_table_max) { /* vendor ext2 (89ku - 92ku) */ + w = cp932ext2_ucs_table[s - cp932ext2_ucs_table_min]; + } else if (s >= cp932ext3_ucs_table_min && s < cp932ext3_ucs_table_max) { /* vendor ext3 (115ku - 119ku) */ + w = cp932ext3_ucs_table[s - cp932ext3_ucs_table_min]; + } else if (s >= (94*94) && s < (114*94)) { /* user (95ku - 114ku) */ + w = s - (94*94) + 0xe000; + } + } + + if (w <= 0) { + w = MBFL_BAD_INPUT; + } + + CK((*filter->output_function)(w, filter->data)); + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + EMPTY_SWITCH_DEFAULT_CASE(); + } + + return 0; +} + +static int mbfl_filt_conv_cp932_wchar_flush(mbfl_convert_filter *filter) +{ + if (filter->status) { + (*filter->output_function)(MBFL_BAD_INPUT, filter->data); + filter->status = 0; + } + + if (filter->flush_function) { + (*filter->flush_function)(filter->data); + } + + return 0; +} + +static int mbfl_filt_conv_wchar_cp932(int c, mbfl_convert_filter *filter) +{ + int c1, c2, s1, s2; + + s1 = 0; + s2 = 0; + if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) { + s1 = ucs_a1_jis_table[c - ucs_a1_jis_table_min]; + } else if (c == 0x203E) { + s1 = 0x7E; + } else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) { + s1 = ucs_a2_jis_table[c - ucs_a2_jis_table_min]; + } else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) { + s1 = ucs_i_jis_table[c - ucs_i_jis_table_min]; + } else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) { + s1 = ucs_r_jis_table[c - ucs_r_jis_table_min]; + } else if (c >= 0xe000 && c < (0xe000 + 20*94)) { /* user (95ku - 114ku) */ + s1 = c - 0xe000; + c1 = s1/94 + 0x7f; + c2 = s1%94 + 0x21; + s1 = (c1 << 8) | c2; + s2 = 1; + } + if (s1 <= 0) { + if (c == 0xa5) { /* YEN SIGN */ + s1 = 0x5C; + } else if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */ + s1 = 0x2140; + } else if (c == 0x2225) { /* PARALLEL TO */ + s1 = 0x2142; + } else if (c == 0xff0d) { /* FULLWIDTH HYPHEN-MINUS */ + s1 = 0x215d; + } else if (c == 0xffe0) { /* FULLWIDTH CENT SIGN */ + s1 = 0x2171; + } else if (c == 0xffe1) { /* FULLWIDTH POUND SIGN */ + s1 = 0x2172; + } else if (c == 0xffe2) { /* FULLWIDTH NOT SIGN */ + s1 = 0x224c; + } + } + if ((s1 <= 0) || (s1 >= 0x8080 && s2 == 0)) { /* not found or X 0212 */ + s1 = -1; + c1 = 0; + c2 = cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; + while (c1 < c2) { /* CP932 vendor ext1 (13ku) */ + if (c == cp932ext1_ucs_table[c1]) { + s1 = ((c1/94 + 0x2d) << 8) + (c1%94 + 0x21); + break; + } + c1++; + } + if (s1 <= 0) { + c1 = 0; + c2 = cp932ext3_ucs_table_max - cp932ext3_ucs_table_min; + while (c1 < c2) { /* CP932 vendor ext3 (115ku - 119ku) */ + if (c == cp932ext3_ucs_table[c1]) { + s1 = ((c1/94 + 0x93) << 8) + (c1%94 + 0x21); + break; + } + c1++; + } + } + if (c == 0) { + s1 = 0; + } else if (s1 <= 0) { + s1 = -1; + } + } + if (s1 >= 0) { + if (s1 < 0x100) { /* latin or kana */ + CK((*filter->output_function)(s1, filter->data)); + } else { /* kanji */ + c1 = (s1 >> 8) & 0xff; + c2 = s1 & 0xff; + SJIS_ENCODE(c1, c2, s1, s2); + CK((*filter->output_function)(s1, filter->data)); + CK((*filter->output_function)(s2, filter->data)); + } + } else { + CK(mbfl_filt_conv_illegal_output(c, filter)); + } + + return 0; +} + +static int mbfl_filt_conv_wchar_sjiswin(int c, mbfl_convert_filter *filter) +{ + if (c == 0xA5) { + CK((*filter->output_function)(0x81, filter->data)); + CK((*filter->output_function)(0x8F, filter->data)); + } else if (c == 0x203E) { + CK((*filter->output_function)(0x81, filter->data)); + CK((*filter->output_function)(0x50, filter->data)); + } else { + return mbfl_filt_conv_wchar_cp932(c, filter); + } + return 0; +} + +static size_t mb_cp932_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) +{ + unsigned char *p = *in, *e = p + *in_len; + uint32_t *out = buf, *limit = buf + bufsize; + + while (p < e && out < limit) { + unsigned char c = *p++; + + if (c < 0x80) { + *out++ = c; + } else if (c > 0xA0 && c < 0xE0) { + /* Kana */ + *out++ = 0xFEC0 + c; + } else if (c > 0x80 && c < 0xFD && c != 0xA0 && p < e) { + unsigned char c2 = *p++; + + if (c2 >= 0x40 && c2 <= 0xFC && c2 != 0x7F) { + unsigned int s1, s2, w = 0; + SJIS_DECODE(c, c2, s1, s2); + unsigned int s = (s1 - 0x21)*94 + s2 - 0x21; + + if (s <= 137) { + if (s == 31) { + w = 0xFF3C; /* FULLWIDTH REVERSE SOLIDUS */ + } else if (s == 32) { + w = 0xFF5E; /* FULLWIDTH TILDE */ + } else if (s == 33) { + w = 0x2225; /* PARALLEL TO */ + } else if (s == 60) { + w = 0xFF0D; /* FULLWIDTH HYPHEN-MINUS */ + } else if (s == 80) { + w = 0xFFE0; /* FULLWIDTH CENT SIGN */ + } else if (s == 81) { + w = 0xFFE1; /* FULLWIDTH POUND SIGN */ + } else if (s == 137) { + w = 0xFFE2; /* FULLWIDTH NOT SIGN */ + } + } + + if (w == 0) { + if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) { + w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min]; + } else if (s < jisx0208_ucs_table_size) { + w = jisx0208_ucs_table[s]; + } else if (s >= cp932ext2_ucs_table_min && s < cp932ext2_ucs_table_max) { + w = cp932ext2_ucs_table[s - cp932ext2_ucs_table_min]; + } else if (s >= cp932ext3_ucs_table_min && s < cp932ext3_ucs_table_max) { + w = cp932ext3_ucs_table[s - cp932ext3_ucs_table_min]; + } else if (s >= (94*94) && s < (114*94)) { + w = s - (94*94) + 0xE000; + } + } + + if (!w) + w = MBFL_BAD_INPUT; + *out++ = w; + } else { + *out++ = MBFL_BAD_INPUT; + } + } else { + *out++ = MBFL_BAD_INPUT; + } + } + + *in_len = e - p; + *in = p; + return out - buf; +} + +static void mb_wchar_to_cp932(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) +{ + unsigned char *out, *limit; + MB_CONVERT_BUF_LOAD(buf, out, limit); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2); + + while (len--) { + uint32_t w = *in++; + unsigned int s1 = 0, s2 = 0, c1, c2; + + if (w >= ucs_a1_jis_table_min && w < ucs_a1_jis_table_max) { + s1 = ucs_a1_jis_table[w - ucs_a1_jis_table_min]; + } else if (w == 0x203E) { + s1 = 0x7E; + } else if (w >= ucs_a2_jis_table_min && w < ucs_a2_jis_table_max) { + s1 = ucs_a2_jis_table[w - ucs_a2_jis_table_min]; + } else if (w >= ucs_i_jis_table_min && w < ucs_i_jis_table_max) { + s1 = ucs_i_jis_table[w - ucs_i_jis_table_min]; + } else if (w >= ucs_r_jis_table_min && w < ucs_r_jis_table_max) { + s1 = ucs_r_jis_table[w - ucs_r_jis_table_min]; + } else if (w >= 0xE000 && w < (0xE000 + 20*94)) { + s1 = w - 0xE000; + c1 = s1/94 + 0x7F; + c2 = s1%94 + 0x21; + s1 = (c1 << 8) | c2; + s2 = 1; + } + + if (w == 0xA5) { /* YEN SIGN */ + s1 = 0x5C; + } else if (w == 0xFF3C) { /* FULLWIDTH REVERSE SOLIDUS */ + s1 = 0x2140; + } else if (w == 0x2225) { /* PARALLEL TO */ + s1 = 0x2142; + } else if (w == 0xFF0D) { /* FULLWIDTH HYPHEN-MINUS */ + s1 = 0x215D; + } else if (w == 0xFFE0) { /* FULLWIDTH CENT SIGN */ + s1 = 0x2171; + } else if (w == 0xFFE1) { /* FULLWIDTH POUND SIGN */ + s1 = 0x2172; + } else if (w == 0xFFE2) { /* FULLWIDTH NOT SIGN */ + s1 = 0x224C; + } else if (w == 0) { + out = mb_convert_buf_add(out, 0); + continue; + } + + if (!s1 || (s1 >= 0x8080 && !s2)) { /* not found or X 0212 */ + for (unsigned int i = 0; i < cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; i++) { + if (cp932ext1_ucs_table[i] == w) { + s1 = ((i/94 + 0x2D) << 8) + (i%94 + 0x21); + goto emit_output; + } + } + + for (unsigned int i = 0; i < cp932ext3_ucs_table_max - cp932ext3_ucs_table_min; i++) { + if (cp932ext3_ucs_table[i] == w) { + s1 = ((i/94 + 0x93) << 8) + (i%94 + 0x21); + goto emit_output; + } + } + + MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_cp932); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2); + continue; + } + +emit_output: + if (s1 < 0x100) { + out = mb_convert_buf_add(out, s1); + } else { + c1 = (s1 >> 8) & 0xFF; + c2 = s1 & 0xFF; + SJIS_ENCODE(c1, c2, s1, s2); + out = mb_convert_buf_add2(out, s1, s2); + } + } + + MB_CONVERT_BUF_STORE(buf, out, limit); +} + +static void mb_wchar_to_sjiswin(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) +{ + unsigned char *out, *limit; + MB_CONVERT_BUF_LOAD(buf, out, limit); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2); + + while (len--) { + uint32_t w = *in++; + unsigned int s1 = 0, s2 = 0, c1, c2; + + if (w >= ucs_a1_jis_table_min && w < ucs_a1_jis_table_max) { + s1 = ucs_a1_jis_table[w - ucs_a1_jis_table_min]; + } else if (w >= ucs_a2_jis_table_min && w < ucs_a2_jis_table_max) { + s1 = ucs_a2_jis_table[w - ucs_a2_jis_table_min]; + } else if (w >= ucs_i_jis_table_min && w < ucs_i_jis_table_max) { + s1 = ucs_i_jis_table[w - ucs_i_jis_table_min]; + } else if (w >= ucs_r_jis_table_min && w < ucs_r_jis_table_max) { + s1 = ucs_r_jis_table[w - ucs_r_jis_table_min]; + } else if (w >= 0xE000 && w < (0xE000 + 20*94)) { + s1 = w - 0xE000; + c1 = s1/94 + 0x7F; + c2 = s1%94 + 0x21; + s1 = (c1 << 8) | c2; + s2 = 1; + } + + if (w == 0xA5) { /* YEN SIGN */ + s1 = 0x216F; /* FULLWIDTH YEN SIGN */ + } else if (w == 0xFF3C) { /* FULLWIDTH REVERSE SOLIDUS */ + s1 = 0x2140; + } else if (w == 0x2225) { /* PARALLEL TO */ + s1 = 0x2142; + } else if (w == 0xFF0D) { /* FULLWIDTH HYPHEN-MINUS */ + s1 = 0x215D; + } else if (w == 0xFFE0) { /* FULLWIDTH CENT SIGN */ + s1 = 0x2171; + } else if (w == 0xFFE1) { /* FULLWIDTH POUND SIGN */ + s1 = 0x2172; + } else if (w == 0xFFE2) { /* FULLWIDTH NOT SIGN */ + s1 = 0x224C; + } else if (w == 0) { + out = mb_convert_buf_add(out, 0); + continue; + } + + if (!s1 || (s1 >= 0x8080 && !s2)) { /* not found or X 0212 */ + for (unsigned int i = 0; i < cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; i++) { + if (cp932ext1_ucs_table[i] == w) { + s1 = ((i/94 + 0x2D) << 8) + (i%94 + 0x21); + goto emit_output; + } + } + + for (unsigned int i = 0; i < cp932ext3_ucs_table_max - cp932ext3_ucs_table_min; i++) { + if (cp932ext3_ucs_table[i] == w) { + s1 = ((i/94 + 0x93) << 8) + (i%94 + 0x21); + goto emit_output; + } + } + + MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_cp932); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2); + continue; + } + +emit_output: + if (s1 < 0x100) { + out = mb_convert_buf_add(out, s1); + } else { + c1 = (s1 >> 8) & 0xFF; + c2 = s1 & 0xFF; + SJIS_ENCODE(c1, c2, s1, s2); + out = mb_convert_buf_add2(out, s1, s2); + } + } + + MB_CONVERT_BUF_STORE(buf, out, limit); +} + +static const unsigned char mblen_table_sjis[] = { /* 0x81-0x9F,0xE0-0xEF */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 +}; + +static const unsigned char mblen_table_sjismac[] = { /* 0x81-0x9F,0xE0-0xED */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 +}; + +static const unsigned char mblen_table_sjis_mobile[] = { /* 0x81-0x9F,0xE0-0xFC */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1 +}; + +static const char *mbfl_encoding_sjis_aliases[] = {"x-sjis", "SHIFT-JIS", NULL}; + +static const struct mbfl_convert_vtbl vtbl_sjis_wchar = { + mbfl_no_encoding_sjis, + mbfl_no_encoding_wchar, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_sjis_wchar, + mbfl_filt_conv_sjis_wchar_flush, + NULL +}; + +static const struct mbfl_convert_vtbl vtbl_wchar_sjis = { + mbfl_no_encoding_wchar, + mbfl_no_encoding_sjis, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_wchar_sjis, + mbfl_filt_conv_common_flush, + NULL +}; + +const mbfl_encoding mbfl_encoding_sjis = { + mbfl_no_encoding_sjis, + "SJIS", + "Shift_JIS", + mbfl_encoding_sjis_aliases, + mblen_table_sjis, + MBFL_ENCTYPE_GL_UNSAFE, + &vtbl_sjis_wchar, + &vtbl_wchar_sjis, + mb_sjis_to_wchar, + mb_wchar_to_sjis, + NULL +}; + +static const char *mbfl_encoding_sjis_mac_aliases[] = {"MacJapanese", "x-Mac-Japanese", NULL}; + +static const struct mbfl_convert_vtbl vtbl_sjis_mac_wchar = { + mbfl_no_encoding_sjis_mac, + mbfl_no_encoding_wchar, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_sjis_mac_wchar, + mbfl_filt_conv_sjis_wchar_flush, + NULL, +}; + +static const struct mbfl_convert_vtbl vtbl_wchar_sjis_mac = { + mbfl_no_encoding_wchar, + mbfl_no_encoding_sjis_mac, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_wchar_sjis_mac, + mbfl_filt_conv_wchar_sjis_mac_flush, + NULL, +}; + +const mbfl_encoding mbfl_encoding_sjis_mac = { + mbfl_no_encoding_sjis_mac, + "SJIS-mac", + "Shift_JIS", + mbfl_encoding_sjis_mac_aliases, + mblen_table_sjismac, + MBFL_ENCTYPE_GL_UNSAFE, + &vtbl_sjis_mac_wchar, + &vtbl_wchar_sjis_mac, + mb_sjismac_to_wchar, + mb_wchar_to_sjismac, + NULL +}; + +static const char *mbfl_encoding_sjis_docomo_aliases[] = {"SJIS-DOCOMO", "shift_jis-imode", "x-sjis-emoji-docomo", NULL}; +static const char *mbfl_encoding_sjis_kddi_aliases[] = {"SJIS-KDDI", "shift_jis-kddi", "x-sjis-emoji-kddi", NULL}; +static const char *mbfl_encoding_sjis_sb_aliases[] = {"SJIS-SOFTBANK", "shift_jis-softbank", "x-sjis-emoji-softbank", NULL}; + +static const struct mbfl_convert_vtbl vtbl_sjis_docomo_wchar = { + mbfl_no_encoding_sjis_docomo, + mbfl_no_encoding_wchar, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_sjis_mobile_wchar, + mbfl_filt_conv_sjis_wchar_flush, + NULL, +}; + +static const struct mbfl_convert_vtbl vtbl_wchar_sjis_docomo = { + mbfl_no_encoding_wchar, + mbfl_no_encoding_sjis_docomo, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_wchar_sjis_mobile, + mbfl_filt_conv_sjis_mobile_flush, + NULL, +}; + +const mbfl_encoding mbfl_encoding_sjis_docomo = { + mbfl_no_encoding_sjis_docomo, + "SJIS-Mobile#DOCOMO", + "Shift_JIS", + mbfl_encoding_sjis_docomo_aliases, + mblen_table_sjis_mobile, + MBFL_ENCTYPE_GL_UNSAFE, + &vtbl_sjis_docomo_wchar, + &vtbl_wchar_sjis_docomo, + mb_sjis_docomo_to_wchar, + mb_wchar_to_sjis_docomo, + NULL +}; + +static const struct mbfl_convert_vtbl vtbl_sjis_kddi_wchar = { + mbfl_no_encoding_sjis_kddi, + mbfl_no_encoding_wchar, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_sjis_mobile_wchar, + mbfl_filt_conv_sjis_wchar_flush, + NULL, +}; + +static const struct mbfl_convert_vtbl vtbl_wchar_sjis_kddi = { + mbfl_no_encoding_wchar, + mbfl_no_encoding_sjis_kddi, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_wchar_sjis_mobile, + mbfl_filt_conv_sjis_mobile_flush, + NULL, +}; + +const mbfl_encoding mbfl_encoding_sjis_kddi = { + mbfl_no_encoding_sjis_kddi, + "SJIS-Mobile#KDDI", + "Shift_JIS", + mbfl_encoding_sjis_kddi_aliases, + mblen_table_sjis_mobile, + MBFL_ENCTYPE_GL_UNSAFE, + &vtbl_sjis_kddi_wchar, + &vtbl_wchar_sjis_kddi, + mb_sjis_kddi_to_wchar, + mb_wchar_to_sjis_kddi, + NULL +}; + +static const struct mbfl_convert_vtbl vtbl_sjis_sb_wchar = { + mbfl_no_encoding_sjis_sb, + mbfl_no_encoding_wchar, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_sjis_mobile_wchar, + mbfl_filt_conv_sjis_wchar_flush, + NULL, +}; + +static const struct mbfl_convert_vtbl vtbl_wchar_sjis_sb = { + mbfl_no_encoding_wchar, + mbfl_no_encoding_sjis_sb, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_wchar_sjis_mobile, + mbfl_filt_conv_sjis_mobile_flush, + NULL, +}; + +const mbfl_encoding mbfl_encoding_sjis_sb = { + mbfl_no_encoding_sjis_sb, + "SJIS-Mobile#SOFTBANK", + "Shift_JIS", + mbfl_encoding_sjis_sb_aliases, + mblen_table_sjis_mobile, + MBFL_ENCTYPE_GL_UNSAFE, + &vtbl_sjis_sb_wchar, + &vtbl_wchar_sjis_sb, + mb_sjis_sb_to_wchar, + mb_wchar_to_sjis_sb, + NULL +}; + +/* Although the specification for Shift-JIS-2004 indicates that 0x5C and + * 0x7E should (respectively) represent a Yen sign and an overbar, feedback + * from Japanese PHP users indicates that they prefer 0x5C and 0x7E to be + * treated as equivalent to U+005C and U+007E. This is the historical + * behavior of mbstring, and promotes compatibility with other software + * which handles Shift-JIS and Shift-JIS-2004 text in this way. */ + +static const char *mbfl_encoding_sjis2004_aliases[] = {"SJIS2004","Shift_JIS-2004", NULL}; + +static const struct mbfl_convert_vtbl vtbl_sjis2004_wchar = { + mbfl_no_encoding_sjis2004, + mbfl_no_encoding_wchar, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_jis2004_wchar, + mbfl_filt_conv_jis2004_wchar_flush, + NULL, +}; + +static const struct mbfl_convert_vtbl vtbl_wchar_sjis2004 = { + mbfl_no_encoding_wchar, + mbfl_no_encoding_sjis2004, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_wchar_jis2004, + mbfl_filt_conv_wchar_jis2004_flush, + NULL, +}; + +const mbfl_encoding mbfl_encoding_sjis2004 = { + mbfl_no_encoding_sjis2004, + "SJIS-2004", + "Shift_JIS", + mbfl_encoding_sjis2004_aliases, + mblen_table_sjis_mobile, /* Leading byte values used for SJIS-2004 are the same as mobile SJIS variants */ + MBFL_ENCTYPE_GL_UNSAFE, + &vtbl_sjis2004_wchar, + &vtbl_wchar_sjis2004, + mb_sjis2004_to_wchar, + mb_wchar_to_sjis2004, + NULL +}; + +/* CP932 is Microsoft's version of Shift-JIS. + * + * What we call "SJIS-win" is a variant of CP932 which maps U+00A5 + * and U+203E the same way as eucJP-win; namely, instead of mapping + * U+00A5 (YEN SIGN) to 0x5C and U+203E (OVERLINE) to 0x7E, + * these codepoints are mapped to appropriate JIS X 0208 characters. + * + * When converting from Shift-JIS to Unicode, there is no difference + * between CP932 and "SJIS-win". + * + * Additional facts: + * + * • In the libmbfl library which formed the base for mbstring, "CP932" and + * "SJIS-win" were originally aliases. The differing mappings were added in + * December 2002. The libmbfl author later stated that this was done so that + * "CP932" would comply with a certain specification, while "SJIS-win" would + * maintain the existing mappings. He does not remember which specification + * it was. + * • The WHATWG specification for "Shift_JIS" (followed by web browsers) + * agrees with our mappings for "CP932". + * • Microsoft Windows' "best-fit" mappings for CP932 (via the + * WideCharToMultiByte API) convert U+00A5 to 0x5C, which also agrees with + * our mappings for "CP932". + * • glibc's iconv converts U+203E to CP932 0x7E, which again agrees with + * our mappings for "CP932". + * • When converting Shift-JIS to CP932, the conversion goes through Unicode. + * Shift-JIS 0x7E converts to U+203E, so mapping U+203E to 0x7E means that + * 0x7E will go to 0x7E when converting Shift-JIS to CP932. + */ + +static const unsigned char mblen_table_sjiswin[] = { /* 0x80-0x9F,0xE0-0xFF */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 +}; + +static const char *mbfl_encoding_cp932_aliases[] = {"MS932", "Windows-31J", "MS_Kanji", NULL}; +static const char *mbfl_encoding_sjiswin_aliases[] = {"SJIS-ms", "SJIS-open", NULL}; + +static const struct mbfl_convert_vtbl vtbl_cp932_wchar = { + mbfl_no_encoding_cp932, + mbfl_no_encoding_wchar, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_cp932_wchar, + mbfl_filt_conv_cp932_wchar_flush, + NULL, +}; + +static const struct mbfl_convert_vtbl vtbl_wchar_cp932 = { + mbfl_no_encoding_wchar, + mbfl_no_encoding_cp932, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_wchar_cp932, + mbfl_filt_conv_common_flush, + NULL, +}; + +const mbfl_encoding mbfl_encoding_cp932 = { + mbfl_no_encoding_cp932, + "CP932", + "Shift_JIS", + mbfl_encoding_cp932_aliases, + mblen_table_sjiswin, + MBFL_ENCTYPE_GL_UNSAFE, + &vtbl_cp932_wchar, + &vtbl_wchar_cp932, + mb_cp932_to_wchar, + mb_wchar_to_cp932, + NULL +}; + +static const struct mbfl_convert_vtbl vtbl_sjiswin_wchar = { + mbfl_no_encoding_sjiswin, + mbfl_no_encoding_wchar, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_cp932_wchar, + mbfl_filt_conv_cp932_wchar_flush, + NULL, +}; + +static const struct mbfl_convert_vtbl vtbl_wchar_sjiswin = { + mbfl_no_encoding_wchar, + mbfl_no_encoding_sjiswin, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_wchar_sjiswin, + mbfl_filt_conv_common_flush, + NULL, +}; + +const mbfl_encoding mbfl_encoding_sjiswin = { + mbfl_no_encoding_sjiswin, + "SJIS-win", + "Shift_JIS", + mbfl_encoding_sjiswin_aliases, + mblen_table_sjiswin, + MBFL_ENCTYPE_GL_UNSAFE, + &vtbl_sjiswin_wchar, + &vtbl_wchar_sjiswin, + mb_cp932_to_wchar, + mb_wchar_to_sjiswin, + NULL +}; + +/* + * EUC variants + */ + +static int mbfl_filt_conv_eucjp_wchar(int c, mbfl_convert_filter *filter) +{ + int c1, s, w = 0; + + switch (filter->status) { + case 0: + if (c >= 0 && c < 0x80) { /* latin */ + CK((*filter->output_function)(c, filter->data)); + } else if (c > 0xa0 && c < 0xff) { /* X 0208 first char */ + filter->status = 1; + filter->cache = c; + } else if (c == 0x8e) { /* kana first char */ + filter->status = 2; + } else if (c == 0x8f) { /* X 0212 first char */ + filter->status = 3; + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + case 1: /* got first half */ + filter->status = 0; + c1 = filter->cache; + if (c > 0xa0 && c < 0xff) { + s = (c1 - 0xa1)*94 + c - 0xa1; + if (s >= 0 && s < jisx0208_ucs_table_size) { + w = jisx0208_ucs_table[s]; + if (!w) + w = MBFL_BAD_INPUT; + } else { + w = MBFL_BAD_INPUT; + } + + CK((*filter->output_function)(w, filter->data)); + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + case 2: /* got 0x8e */ + filter->status = 0; + if (c > 0xa0 && c < 0xe0) { + w = 0xfec0 + c; + CK((*filter->output_function)(w, filter->data)); + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + case 3: /* got 0x8f, JIS X 0212 first byte */ + filter->status++; + filter->cache = c; + break; + + case 4: /* got 0x8f, JIS X 0212 second byte */ + filter->status = 0; + c1 = filter->cache; + if (c > 0xA0 && c < 0xFF && c1 > 0xA0 && c1 < 0xFF) { + s = (c1 - 0xa1)*94 + c - 0xa1; + if (s >= 0 && s < jisx0212_ucs_table_size) { + w = jisx0212_ucs_table[s]; + if (!w) + w = MBFL_BAD_INPUT; + } else { + w = MBFL_BAD_INPUT; + } + + CK((*filter->output_function)(w, filter->data)); + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + EMPTY_SWITCH_DEFAULT_CASE(); + } + + return 0; +} + +static int mbfl_filt_conv_eucjp_wchar_flush(mbfl_convert_filter *filter) +{ + if (filter->status) { + (*filter->output_function)(MBFL_BAD_INPUT, filter->data); + filter->status = 0; + } + + if (filter->flush_function) { + (*filter->flush_function)(filter->data); + } + + return 0; +} + +static int mbfl_filt_conv_wchar_eucjp(int c, mbfl_convert_filter *filter) +{ + int s = 0; + + if (c == 0xAF) { /* U+00AF is MACRON */ + s = 0xA2B4; /* Use JIS X 0212 overline */ + } else if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) { + s = ucs_a1_jis_table[c - ucs_a1_jis_table_min]; + } else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) { + s = ucs_a2_jis_table[c - ucs_a2_jis_table_min]; + } else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) { + s = ucs_i_jis_table[c - ucs_i_jis_table_min]; + } else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) { + s = ucs_r_jis_table[c - ucs_r_jis_table_min]; + } + if (s <= 0) { + if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */ + s = 0x2140; + } else if (c == 0x2225) { /* PARALLEL TO */ + s = 0x2142; + } else if (c == 0xff0d) { /* FULLWIDTH HYPHEN-MINUS */ + s = 0x215d; + } else if (c == 0xffe0) { /* FULLWIDTH CENT SIGN */ + s = 0x2171; + } else if (c == 0xffe1) { /* FULLWIDTH POUND SIGN */ + s = 0x2172; + } else if (c == 0xffe2) { /* FULLWIDTH NOT SIGN */ + s = 0x224c; + } else if (c == 0) { + s = 0; + } else { + s = -1; + } + } + if (s >= 0) { + if (s < 0x80) { /* latin */ + CK((*filter->output_function)(s, filter->data)); + } else if (s < 0x100) { /* kana */ + CK((*filter->output_function)(0x8e, filter->data)); + CK((*filter->output_function)(s, filter->data)); + } else if (s < 0x8080) { /* X 0208 */ + CK((*filter->output_function)(((s >> 8) & 0xff) | 0x80, filter->data)); + CK((*filter->output_function)((s & 0xff) | 0x80, filter->data)); + } else { /* X 0212 */ + CK((*filter->output_function)(0x8f, filter->data)); + CK((*filter->output_function)(((s >> 8) & 0xff) | 0x80, filter->data)); + CK((*filter->output_function)((s & 0xff) | 0x80, filter->data)); + } + } else { + CK(mbfl_filt_conv_illegal_output(c, filter)); + } + + return 0; +} + +static size_t mb_eucjp_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) +{ + unsigned char *p = *in, *e = p + *in_len; + uint32_t *out = buf, *limit = buf + bufsize; + + while (p < e && out < limit) { + unsigned char c = *p++; + + if (c < 0x80) { + *out++ = c; + } else if (c >= 0xA1 && c <= 0xFE && p < e) { + /* JISX 0208 */ + unsigned char c2 = *p++; + if (c2 >= 0xA1 && c2 <= 0xFE) { + unsigned int s = (c - 0xA1)*94 + c2 - 0xA1; + if (s < jisx0208_ucs_table_size) { + uint32_t w = jisx0208_ucs_table[s]; + if (!w) + w = MBFL_BAD_INPUT; + *out++ = w; + } else { + *out++ = MBFL_BAD_INPUT; + } + } else { + *out++ = MBFL_BAD_INPUT; + } + } else if (c == 0x8E && p < e) { + /* Kana */ + unsigned char c2 = *p++; + *out++ = (c2 >= 0xA1 && c2 <= 0xDF) ? 0xFEC0 + c2 : MBFL_BAD_INPUT; + } else if (c == 0x8F) { + /* JISX 0212 */ + if ((e - p) >= 2) { + unsigned char c2 = *p++; + unsigned char c3 = *p++; + if (c3 >= 0xA1 && c3 <= 0xFE && c2 >= 0xA1 && c2 <= 0xFE) { + unsigned int s = (c2 - 0xA1)*94 + c3 - 0xA1; + if (s < jisx0212_ucs_table_size) { + uint32_t w = jisx0212_ucs_table[s]; + if (!w) + w = MBFL_BAD_INPUT; + *out++ = w; + } else { + *out++ = MBFL_BAD_INPUT; + } + } else { + *out++ = MBFL_BAD_INPUT; + } + } else { + *out++ = MBFL_BAD_INPUT; + p = e; /* Jump to end of string */ + } + } else { + *out++ = MBFL_BAD_INPUT; + } + } + + *in_len = e - p; + *in = p; + return out - buf; +} + +static void mb_wchar_to_eucjp(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) +{ + unsigned char *out, *limit; + MB_CONVERT_BUF_LOAD(buf, out, limit); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2); + + while (len--) { + uint32_t w = *in++; + unsigned int s = 0; + + if (w == 0xAF) { /* U+00AF is MACRON */ + s = 0xA2B4; /* Use JIS X 0212 overline */ + } else if (w >= ucs_a1_jis_table_min && w < ucs_a1_jis_table_max) { + s = ucs_a1_jis_table[w - ucs_a1_jis_table_min]; + } else if (w >= ucs_a2_jis_table_min && w < ucs_a2_jis_table_max) { + s = ucs_a2_jis_table[w - ucs_a2_jis_table_min]; + } else if (w >= ucs_i_jis_table_min && w < ucs_i_jis_table_max) { + s = ucs_i_jis_table[w - ucs_i_jis_table_min]; + } else if (w >= ucs_r_jis_table_min && w < ucs_r_jis_table_max) { + s = ucs_r_jis_table[w - ucs_r_jis_table_min]; + } + + if (s == 0) { + if (w == 0xFF3C) { /* FULLWIDTH REVERSE SOLIDUS */ + s = 0x2140; + } else if (w == 0x2225) { /* PARALLEL TO */ + s = 0x2142; + } else if (w == 0xFF0D) { /* FULLWIDTH HYPHEN-MINUS */ + s = 0x215D; + } else if (w == 0xFFE0) { /* FULLWIDTH CENT SIGN */ + s = 0x2171; + } else if (w == 0xFFE1) { /* FULLWIDTH POUND SIGN */ + s = 0x2172; + } else if (w == 0xFFE2) { /* FULLWIDTH NOT SIGN */ + s = 0x224C; + } else if (w == 0) { + out = mb_convert_buf_add(out, 0); + continue; + } else { + MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_eucjp); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2); + continue; + } + } + + if (s < 0x80) { + out = mb_convert_buf_add(out, s); + } else if (s < 0x100) { + out = mb_convert_buf_add2(out, 0x8E, s); + } else if (s < 0x8080) { + out = mb_convert_buf_add2(out, ((s >> 8) & 0xFF) | 0x80, (s & 0xFF) | 0x80); + } else { + MB_CONVERT_BUF_ENSURE(buf, out, limit, (len * 2) + 3); + out = mb_convert_buf_add3(out, 0x8F, ((s >> 8) & 0xFF) | 0x80, (s & 0xFF) | 0x80); + } + } + + MB_CONVERT_BUF_STORE(buf, out, limit); +} + +static int mbfl_filt_conv_eucjpwin_wchar(int c, mbfl_convert_filter *filter) +{ + int c1, s, w, n; + + switch (filter->status) { + case 0: + if (c >= 0 && c < 0x80) { /* latin */ + CK((*filter->output_function)(c, filter->data)); + } else if (c >= 0xa1 && c <= 0xfe) { /* CP932 first char */ + filter->status = 1; + filter->cache = c; + } else if (c == 0x8e) { /* kana first char */ + filter->status = 2; + } else if (c == 0x8f) { /* X 0212 first char */ + filter->status = 3; + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + case 1: /* got first half */ + filter->status = 0; + c1 = filter->cache; + if (c > 0xa0 && c < 0xff) { + w = 0; + s = (c1 - 0xa1)*94 + c - 0xa1; + if (s <= 137) { + if (s == 31) { + w = 0xff3c; /* FULLWIDTH REVERSE SOLIDUS */ + } else if (s == 32) { + w = 0xff5e; /* FULLWIDTH TILDE */ + } else if (s == 33) { + w = 0x2225; /* PARALLEL TO */ + } else if (s == 60) { + w = 0xff0d; /* FULLWIDTH HYPHEN-MINUS */ + } else if (s == 80) { + w = 0xffe0; /* FULLWIDTH CENT SIGN */ + } else if (s == 81) { + w = 0xffe1; /* FULLWIDTH POUND SIGN */ + } else if (s == 137) { + w = 0xffe2; /* FULLWIDTH NOT SIGN */ + } + } + + if (w == 0) { + if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) { /* vendor ext1 (13ku) */ + w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min]; + } else if (s >= 0 && s < jisx0208_ucs_table_size) { /* X 0208 */ + w = jisx0208_ucs_table[s]; + } else if (s >= (84 * 94)) { /* user (85ku - 94ku) */ + w = s - (84 * 94) + 0xe000; + } + } + + if (w <= 0) { + w = MBFL_BAD_INPUT; + } + CK((*filter->output_function)(w, filter->data)); + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + case 2: /* got 0x8e, X0201 kana */ + filter->status = 0; + if (c > 0xa0 && c < 0xe0) { + w = 0xfec0 + c; + CK((*filter->output_function)(w, filter->data)); + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + case 3: /* got 0x8f, X 0212 first char */ + filter->status++; + filter->cache = c; + break; + + case 4: /* got 0x8f, X 0212 second char */ + filter->status = 0; + c1 = filter->cache; + if (c1 > 0xa0 && c1 < 0xff && c > 0xa0 && c < 0xff) { + s = (c1 - 0xa1)*94 + c - 0xa1; + + if (s >= 0 && s < jisx0212_ucs_table_size) { + w = jisx0212_ucs_table[s]; + + if (w == 0x007e) { + w = 0xff5e; /* FULLWIDTH TILDE */ + } + } else if (s >= (82*94) && s < (84*94)) { /* vender ext3 (83ku - 84ku) <-> CP932 (115ku -120ku) */ + s = (c1 << 8) | c; + w = 0; + n = 0; + while (n < cp932ext3_eucjp_table_size) { + if (s == cp932ext3_eucjp_table[n]) { + if (n < (cp932ext3_ucs_table_max - cp932ext3_ucs_table_min)) { + w = cp932ext3_ucs_table[n]; + } + break; + } + n++; + } + } else if (s >= (84*94)) { /* user (85ku - 94ku) */ + w = s - (84*94) + (0xe000 + (94*10)); + } else { + w = 0; + } + + if (w == 0x00A6) { + w = 0xFFE4; /* FULLWIDTH BROKEN BAR */ + } + + if (w <= 0) { + w = MBFL_BAD_INPUT; + } + CK((*filter->output_function)(w, filter->data)); + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + EMPTY_SWITCH_DEFAULT_CASE(); + } + + return 0; +} + +static int mbfl_filt_conv_eucjpwin_wchar_flush(mbfl_convert_filter *filter) +{ + if (filter->status) { + (*filter->output_function)(MBFL_BAD_INPUT, filter->data); + filter->status = 0; + } + + if (filter->flush_function) { + (*filter->flush_function)(filter->data); + } + + return 0; +} + +static int mbfl_filt_conv_wchar_eucjpwin(int c, mbfl_convert_filter *filter) +{ + int c1, c2, s1 = 0; + + if (c == 0xAF) { /* U+00AF is MACRON */ + s1 = 0xA2B4; /* Use JIS X 0212 overline */ + } else if (c == 0x203E) { + s1 = 0x7E; + } else if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) { + s1 = ucs_a1_jis_table[c - ucs_a1_jis_table_min]; + } else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) { + s1 = ucs_a2_jis_table[c - ucs_a2_jis_table_min]; + } else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) { + s1 = ucs_i_jis_table[c - ucs_i_jis_table_min]; + } else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) { + s1 = ucs_r_jis_table[c - ucs_r_jis_table_min]; + } else if (c >= 0xe000 && c < (0xe000 + 10*94)) { /* user (X0208 85ku - 94ku) */ + s1 = c - 0xe000; + c1 = s1/94 + 0x75; + c2 = s1%94 + 0x21; + s1 = (c1 << 8) | c2; + } else if (c >= (0xe000 + 10*94) && c < (0xe000 + 20*94)) { /* user (X0212 85ku - 94ku) */ + s1 = c - (0xe000 + 10*94); + c1 = s1/94 + 0xf5; + c2 = s1%94 + 0xa1; + s1 = (c1 << 8) | c2; + } + + if (s1 == 0xa2f1) { + s1 = 0x2d62; /* NUMERO SIGN */ + } + + if (s1 <= 0) { + if (c == 0xa5) { /* YEN SIGN */ + s1 = 0x5C; + } else if (c == 0x2014) { + s1 = 0x213D; + } else if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */ + s1 = 0x2140; + } else if (c == 0x2225) { /* PARALLEL TO */ + s1 = 0x2142; + } else if (c == 0xff0d) { /* FULLWIDTH HYPHEN-MINUS */ + s1 = 0x215d; + } else if (c == 0xffe0) { /* FULLWIDTH CENT SIGN */ + s1 = 0x2171; + } else if (c == 0xffe1) { /* FULLWIDTH POUND SIGN */ + s1 = 0x2172; + } else if (c == 0xffe2) { /* FULLWIDTH NOT SIGN */ + s1 = 0x224c; + } else { + s1 = -1; + c1 = 0; + c2 = cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; + while (c1 < c2) { /* CP932 vendor ext1 (13ku) */ + const int oh = cp932ext1_ucs_table_min / 94; + + if (c == cp932ext1_ucs_table[c1]) { + s1 = ((c1 / 94 + oh + 0x21) << 8) + (c1 % 94 + 0x21); + break; + } + c1++; + } + if (s1 < 0) { + c1 = 0; + c2 = cp932ext3_ucs_table_max - cp932ext3_ucs_table_min; + while (c1 < c2) { /* CP932 vendor ext3 (115ku - 119ku) */ + if (c == cp932ext3_ucs_table[c1]) { + if (c1 < cp932ext3_eucjp_table_size) { + s1 = cp932ext3_eucjp_table[c1]; + } + break; + } + c1++; + } + } + } + + if (c == 0) { + s1 = 0; + } else if (s1 <= 0) { + s1 = -1; + } + } + + if (s1 >= 0) { + if (s1 < 0x80) { /* latin */ + CK((*filter->output_function)(s1, filter->data)); + } else if (s1 < 0x100) { /* kana */ + CK((*filter->output_function)(0x8e, filter->data)); + CK((*filter->output_function)(s1, filter->data)); + } else if (s1 < 0x8080) { /* X 0208 */ + CK((*filter->output_function)(((s1 >> 8) & 0xff) | 0x80, filter->data)); + CK((*filter->output_function)((s1 & 0xff) | 0x80, filter->data)); + } else { /* X 0212 */ + CK((*filter->output_function)(0x8f, filter->data)); + CK((*filter->output_function)(((s1 >> 8) & 0xff) | 0x80, filter->data)); + CK((*filter->output_function)((s1 & 0xff) | 0x80, filter->data)); + } + } else { + CK(mbfl_filt_conv_illegal_output(c, filter)); + } + + return 0; +} + +static size_t mb_eucjpwin_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) +{ + unsigned char *p = *in, *e = p + *in_len; + uint32_t *out = buf, *limit = buf + bufsize; + + while (p < e && out < limit) { + unsigned char c = *p++; + + if (c < 0x80) { + *out++ = c; + } else if (c >= 0xA1 && c <= 0xFE && p < e) { + unsigned char c2 = *p++; + + if (c2 >= 0xA1 && c2 <= 0xFE) { + unsigned int s = (c - 0xA1)*94 + c2 - 0xA1, w = 0; + + if (s <= 137) { + if (s == 31) { + w = 0xFF3C; /* FULLWIDTH REVERSE SOLIDUS */ + } else if (s == 32) { + w = 0xFF5E; /* FULLWIDTH TILDE */ + } else if (s == 33) { + w = 0x2225; /* PARALLEL TO */ + } else if (s == 60) { + w = 0xFF0D; /* FULLWIDTH HYPHEN-MINUS */ + } else if (s == 80) { + w = 0xFFE0; /* FULLWIDTH CENT SIGN */ + } else if (s == 81) { + w = 0xFFE1; /* FULLWIDTH POUND SIGN */ + } else if (s == 137) { + w = 0xFFE2; /* FULLWIDTH NOT SIGN */ + } + } + + if (w == 0) { + if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) { + w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min]; + } else if (s < jisx0208_ucs_table_size) { + w = jisx0208_ucs_table[s]; + } else if (s >= (84 * 94)) { + w = s - (84 * 94) + 0xE000; + } + } + + if (!w) + w = MBFL_BAD_INPUT; + *out++ = w; + } else { + *out++ = MBFL_BAD_INPUT; + } + } else if (c == 0x8E && p < e) { + unsigned char c2 = *p++; + if (c2 >= 0xA1 && c2 <= 0xDF) { + *out++ = 0xFEC0 + c2; + } else { + *out++ = MBFL_BAD_INPUT; + } + } else if (c == 0x8F && p < e) { + unsigned char c2 = *p++; + if (p == e) { + *out++ = MBFL_BAD_INPUT; + continue; + } + unsigned char c3 = *p++; + + if (c2 >= 0xA1 && c2 <= 0xFE && c3 >= 0xA1 && c3 <= 0xFE) { + unsigned int s = (c2 - 0xA1)*94 + c3 - 0xA1, w = 0; + + if (s < jisx0212_ucs_table_size) { + w = jisx0212_ucs_table[s]; + if (w == 0x7E) + w = 0xFF5E; /* FULLWIDTH TILDE */ + } else if (s >= (82*94) && s < (84*94)) { + s = (c2 << 8) | c3; + for (int i = 0; i < cp932ext3_eucjp_table_size; i++) { + if (cp932ext3_eucjp_table[i] == s) { + w = cp932ext3_ucs_table[i]; + break; + } + } + } else if (s >= (84*94)) { + w = s - (84*94) + 0xE000 + (94*10); + } + + if (w == 0xA6) + w = 0xFFE4; /* FULLWIDTH BROKEN BAR */ + + if (!w) + w = MBFL_BAD_INPUT; + *out++ = w; + } else { + *out++ = MBFL_BAD_INPUT; + } + } else { + *out++ = MBFL_BAD_INPUT; + } + } + + *in_len = e - p; + *in = p; + return out - buf; +} + +static void mb_wchar_to_eucjpwin(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) +{ + unsigned char *out, *limit; + MB_CONVERT_BUF_LOAD(buf, out, limit); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2); + + while (len--) { + uint32_t w = *in++; + unsigned int s = 0; + + if (w == 0) { + out = mb_convert_buf_add(out, 0); + continue; + } else if (w == 0xAF) { /* U+00AF is MACRON */ + s = 0xA2B4; /* Use JIS X 0212 overline */ + } else if (w == 0x203E) { + s = 0x7E; + } else if (w >= ucs_a1_jis_table_min && w < ucs_a1_jis_table_max) { + s = ucs_a1_jis_table[w - ucs_a1_jis_table_min]; + } else if (w >= ucs_a2_jis_table_min && w < ucs_a2_jis_table_max) { + s = ucs_a2_jis_table[w - ucs_a2_jis_table_min]; + } else if (w >= ucs_i_jis_table_min && w < ucs_i_jis_table_max) { + s = ucs_i_jis_table[w - ucs_i_jis_table_min]; + } else if (w >= ucs_r_jis_table_min && w < ucs_r_jis_table_max) { + s = ucs_r_jis_table[w - ucs_r_jis_table_min]; + } else if (w >= 0xE000 && w < (0xE000 + 10*94)) { + s = w - 0xE000; + s = ((s/94 + 0x75) << 8) + (s%94) + 0x21; + } else if (w >= (0xE000 + 10*94) && w < (0xE000 + 20*94)) { + s = w - (0xE000 + 10*94); + s = ((s/94 + 0xF5) << 8) + (s%94) + 0xA1; + } + + if (s == 0xA2F1) + s = 0x2D62; /* NUMERO SIGN */ + + if (s == 0) { + if (w == 0xA5) { /* YEN SIGN */ + s = 0x5C; + } else if (w == 0x2014) { /* EM DASH */ + s = 0x213D; + } else if (w == 0xFF3C) { /* FULLWIDTH REVERSE SOLIDUS */ + s = 0x2140; + } else if (w == 0x2225) { /* PARALLEL TO */ + s = 0x2142; + } else if (w == 0xFF0D) { /* FULLWIDTH HYPHEN-MINUS */ + s = 0x215D; + } else if (w == 0xFFE0) { /* FULLWIDTH CENT SIGN */ + s = 0x2171; + } else if (w == 0xFFE1) { /* FULLWIDTH POUND SIGN */ + s = 0x2172; + } else if (w == 0xFFE2) { /* FULLWIDTH NOT SIGN */ + s = 0x224C; + } else { + for (int i = 0; i < cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; i++) { + if (cp932ext1_ucs_table[i] == w) { + s = (((i/94) + (cp932ext1_ucs_table_min/94) + 0x21) << 8) + (i%94) + 0x21; + break; + } + } + + if (!s) { + for (int i = 0; i < cp932ext3_ucs_table_max - cp932ext3_ucs_table_min; i++) { + if (cp932ext3_ucs_table[i] == w) { + s = cp932ext3_eucjp_table[i]; + break; + } + } + } + } + } + + if (!s) { + MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_eucjpwin); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2); + } else if (s < 0x80) { + out = mb_convert_buf_add(out, s); + } else if (s < 0x100) { + out = mb_convert_buf_add2(out, 0x8E, s); + } else if (s < 0x8080) { + out = mb_convert_buf_add2(out, ((s >> 8) & 0xFF) | 0x80, (s & 0xFF) | 0x80); + } else { + MB_CONVERT_BUF_ENSURE(buf, out, limit, (len * 2) + 3); + out = mb_convert_buf_add3(out, 0x8F, ((s >> 8) & 0xFF) | 0x80, (s & 0xFF) | 0x80); + } + } + + MB_CONVERT_BUF_STORE(buf, out, limit); +} + +static int mbfl_filt_conv_cp51932_wchar(int c, mbfl_convert_filter *filter) +{ + int c1, s, w; + + switch (filter->status) { + case 0: + if (c >= 0 && c < 0x80) { /* latin */ + CK((*filter->output_function)(c, filter->data)); + } else if (c >= 0xA1 && c <= 0xFE) { /* CP932, first byte */ + filter->status = 1; + filter->cache = c; + } else if (c == 0x8e) { /* kana first char */ + filter->status = 2; + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + case 1: /* got first half */ + filter->status = 0; + c1 = filter->cache; + if (c > 0xa0 && c < 0xff) { + w = 0; + s = (c1 - 0xa1)*94 + c - 0xa1; + if (s <= 137) { + if (s == 31) { + w = 0xff3c; /* FULLWIDTH REVERSE SOLIDUS */ + } else if (s == 32) { + w = 0xff5e; /* FULLWIDTH TILDE */ + } else if (s == 33) { + w = 0x2225; /* PARALLEL TO */ + } else if (s == 60) { + w = 0xff0d; /* FULLWIDTH HYPHEN-MINUS */ + } else if (s == 80) { + w = 0xffe0; /* FULLWIDTH CENT SIGN */ + } else if (s == 81) { + w = 0xffe1; /* FULLWIDTH POUND SIGN */ + } else if (s == 137) { + w = 0xffe2; /* FULLWIDTH NOT SIGN */ + } + } + if (w == 0) { + if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) { /* vendor ext1 (13ku) */ + w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min]; + } else if (s >= 0 && s < jisx0208_ucs_table_size) { /* X 0208 */ + w = jisx0208_ucs_table[s]; + } else if (s >= cp932ext2_ucs_table_min && s < cp932ext2_ucs_table_max) { /* vendor ext2 (89ku - 92ku) */ + w = cp932ext2_ucs_table[s - cp932ext2_ucs_table_min]; + } + } + if (w <= 0) { + w = MBFL_BAD_INPUT; + } + CK((*filter->output_function)(w, filter->data)); + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + case 2: /* got 0x8e, X0201 kana */ + filter->status = 0; + if (c > 0xa0 && c < 0xe0) { + w = 0xfec0 + c; + CK((*filter->output_function)(w, filter->data)); + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + EMPTY_SWITCH_DEFAULT_CASE(); + } + + return 0; +} + +static int mbfl_filt_conv_cp51932_wchar_flush(mbfl_convert_filter *filter) +{ + if (filter->status) { + /* Input string was truncated */ + (*filter->output_function)(MBFL_BAD_INPUT, filter->data); + filter->status = 0; + } + + if (filter->flush_function) { + (*filter->flush_function)(filter->data); + } + + return 0; +} + +static int mbfl_filt_conv_wchar_cp51932(int c, mbfl_convert_filter *filter) +{ + int c1, c2, s1; + + s1 = 0; + if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) { + s1 = ucs_a1_jis_table[c - ucs_a1_jis_table_min]; + } else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) { + s1 = ucs_a2_jis_table[c - ucs_a2_jis_table_min]; + } else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) { + s1 = ucs_i_jis_table[c - ucs_i_jis_table_min]; + } else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) { + s1 = ucs_r_jis_table[c - ucs_r_jis_table_min]; + } + if (s1 >= 0x8080) s1 = -1; /* we don't support JIS X0213 */ + if (s1 <= 0) { + if (c == 0xa5) { /* YEN SIGN */ + s1 = 0x216F; /* FULLWIDTH YEN SIGN */ + } else if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */ + s1 = 0x2140; + } else if (c == 0x2225) { /* PARALLEL TO */ + s1 = 0x2142; + } else if (c == 0xff0d) { /* FULLWIDTH HYPHEN-MINUS */ + s1 = 0x215d; + } else if (c == 0xffe0) { /* FULLWIDTH CENT SIGN */ + s1 = 0x2171; + } else if (c == 0xffe1) { /* FULLWIDTH POUND SIGN */ + s1 = 0x2172; + } else if (c == 0xffe2) { /* FULLWIDTH NOT SIGN */ + s1 = 0x224c; + } else { + s1 = -1; + c1 = 0; + c2 = cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; + while (c1 < c2) { /* CP932 vendor ext1 (13ku) */ + if (c == cp932ext1_ucs_table[c1]) { + s1 = ((c1/94 + 0x2d) << 8) + (c1%94 + 0x21); + break; + } + c1++; + } + if (s1 < 0) { + c1 = 0; + c2 = cp932ext2_ucs_table_max - cp932ext2_ucs_table_min; + while (c1 < c2) { /* CP932 vendor ext3 (115ku - 119ku) */ + if (c == cp932ext2_ucs_table[c1]) { + s1 = ((c1/94 + 0x79) << 8) +(c1%94 + 0x21); + break; + } + c1++; + } + } + } + if (c == 0) { + s1 = 0; + } else if (s1 <= 0) { + s1 = -1; + } + } + + if (s1 >= 0) { + if (s1 < 0x80) { /* latin */ + CK((*filter->output_function)(s1, filter->data)); + } else if (s1 < 0x100) { /* kana */ + CK((*filter->output_function)(0x8e, filter->data)); + CK((*filter->output_function)(s1, filter->data)); + } else if (s1 < 0x8080) { /* X 0208 */ + CK((*filter->output_function)(((s1 >> 8) & 0xff) | 0x80, filter->data)); + CK((*filter->output_function)((s1 & 0xff) | 0x80, filter->data)); + } else { + CK(mbfl_filt_conv_illegal_output(c, filter)); + } + } else { + CK(mbfl_filt_conv_illegal_output(c, filter)); + } + + return 0; +} + +static size_t mb_cp51932_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) +{ + unsigned char *p = *in, *e = p + *in_len; + uint32_t *out = buf, *limit = buf + bufsize; + + while (p < e && out < limit) { + unsigned char c = *p++; + + if (c < 0x80) { + *out++ = c; + } else if (c >= 0xA1 && c <= 0xFE && p < e) { + unsigned char c2 = *p++; + if (c2 >= 0xA1 && c2 <= 0xFE) { + unsigned int s = (c - 0xA1)*94 + c2 - 0xA1, w = 0; + + if (s <= 137) { + if (s == 31) { + w = 0xFF3C; /* FULLWIDTH REVERSE SOLIDUS */ + } else if (s == 32) { + w = 0xFF5E; /* FULLWIDTH TILDE */ + } else if (s == 33) { + w = 0x2225; /* PARALLEL TO */ + } else if (s == 60) { + w = 0xFF0D; /* FULLWIDTH HYPHEN-MINUS */ + } else if (s == 80) { + w = 0xFFE0; /* FULLWIDTH CENT SIGN */ + } else if (s == 81) { + w = 0xFFE1; /* FULLWIDTH POUND SIGN */ + } else if (s == 137) { + w = 0xFFE2; /* FULLWIDTH NOT SIGN */ + } + } + + if (w == 0) { + if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) { + w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min]; + } else if (s < jisx0208_ucs_table_size) { + w = jisx0208_ucs_table[s]; + } else if (s >= cp932ext2_ucs_table_min && s < cp932ext2_ucs_table_max) { + w = cp932ext2_ucs_table[s - cp932ext2_ucs_table_min]; + } + } + + if (!w) + w = MBFL_BAD_INPUT; + *out++ = w; + } else { + *out++ = MBFL_BAD_INPUT; + } + } else if (c == 0x8E && p < e) { + unsigned char c2 = *p++; + if (c2 >= 0xA1 && c2 <= 0xDF) { + *out++ = 0xFEC0 + c2; + } else { + *out++ = MBFL_BAD_INPUT; + } + } else { + *out++ = MBFL_BAD_INPUT; + } + } + + *in_len = e - p; + *in = p; + return out - buf; +} + +static void mb_wchar_to_cp51932(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) +{ + unsigned char *out, *limit; + MB_CONVERT_BUF_LOAD(buf, out, limit); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2); + + while (len--) { + uint32_t w = *in++; + unsigned int s = 0; + + if (w == 0) { + out = mb_convert_buf_add(out, 0); + continue; + } else if (w >= ucs_a1_jis_table_min && w < ucs_a1_jis_table_max) { + s = ucs_a1_jis_table[w - ucs_a1_jis_table_min]; + } else if (w >= ucs_a2_jis_table_min && w < ucs_a2_jis_table_max) { + s = ucs_a2_jis_table[w - ucs_a2_jis_table_min]; + } else if (w >= ucs_i_jis_table_min && w < ucs_i_jis_table_max) { + s = ucs_i_jis_table[w - ucs_i_jis_table_min]; + } else if (w >= ucs_r_jis_table_min && w < ucs_r_jis_table_max) { + s = ucs_r_jis_table[w - ucs_r_jis_table_min]; + } + + if (s >= 0x8080) s = 0; /* We don't support JIS X0213 */ + + if (s == 0) { + if (w == 0xA5) { /* YEN SIGN */ + s = 0x216F; /* FULLWIDTH YEN SIGN */ + } else if (w == 0xFF3C) { /* FULLWIDTH REVERSE SOLIDUS */ + s = 0x2140; + } else if (w == 0x2225) { /* PARALLEL TO */ + s = 0x2142; + } else if (w == 0xFF0D) { /* FULLWIDTH HYPHEN-MINUS */ + s = 0x215D; + } else if (w == 0xFFE0) { /* FULLWIDTH CENT SIGN */ + s = 0x2171; + } else if (w == 0xFFE1) { /* FULLWIDTH POUND SIGN */ + s = 0x2172; + } else if (w == 0xFFE2) { /* FULLWIDTH NOT SIGN */ + s = 0x224C; + } else { + for (int i = 0; i < cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; i++) { + if (cp932ext1_ucs_table[i] == w) { + s = ((i/94 + 0x2D) << 8) + (i%94) + 0x21; + goto found_it; + } + } + + for (int i = 0; i < cp932ext2_ucs_table_max - cp932ext2_ucs_table_min; i++) { + if (cp932ext2_ucs_table[i] == w) { + s = ((i/94 + 0x79) << 8) + (i%94) + 0x21; + goto found_it; + } + } + } +found_it: ; + } + + if (!s || s >= 0x8080) { + MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_cp51932); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2); + } else if (s < 0x80) { + out = mb_convert_buf_add(out, s); + } else if (s < 0x100) { + out = mb_convert_buf_add2(out, 0x8E, s); + } else { + out = mb_convert_buf_add2(out, ((s >> 8) & 0xFF) | 0x80, (s & 0xFF) | 0x80); + } + } + + MB_CONVERT_BUF_STORE(buf, out, limit); +} + +static size_t mb_eucjp2004_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) +{ + unsigned char *p = *in, *e = p + *in_len; + uint32_t *out = buf, *limit = buf + bufsize - 1; + + while (p < e && out < limit) { + unsigned char c = *p++; + + if (c <= 0x7F) { + *out++ = c; + } else if (c >= 0xA1 && c <= 0xFE) { + /* Kanji */ + if (p == e) { + *out++ = MBFL_BAD_INPUT; + break; + } + unsigned char c2 = *p++; + if (c2 <= 0xA0 || c2 == 0xFF) { + *out++ = MBFL_BAD_INPUT; + continue; + } + + unsigned int s1 = c - 0x80, s2 = c2 - 0x80; + unsigned int w1 = (s1 << 8) | s2, w = 0; + + /* Conversion for combining characters */ + if ((w1 >= 0x2477 && w1 <= 0x2479) || (w1 >= 0x2479 && w1 <= 0x247B) || (w1 >= 0x2577 && w1 <= 0x257E) || w1 == 0x2678 || w1 == 0x2B44 || (w1 >= 0x2B48 && w1 <= 0x2B4F) || (w1 >= 0x2B65 && w1 <= 0x2B66)) { + int k = mbfl_bisec_srch2(w1, jisx0213_u2_key, jisx0213_u2_tbl_len); + if (k >= 0) { + *out++ = jisx0213_u2_tbl[2*k]; + *out++ = jisx0213_u2_tbl[2*k+1]; + continue; + } + } + + /* Conversion for BMP */ + w1 = (s1 - 0x21)*94 + s2 - 0x21; + if (w1 < jisx0213_ucs_table_size) { + w = jisx0213_ucs_table[w1]; + } + + /* Conversion for CJK Unified Ideographs ext.B (U+2XXXX) */ + if (!w) { + w1 = (s1 << 8) | s2; + int k = mbfl_bisec_srch2(w1, jisx0213_jis_u5_key, jisx0213_u5_tbl_len); + if (k >= 0) { + w = jisx0213_jis_u5_tbl[k] + 0x20000; + } + } + + *out++ = w ? w : MBFL_BAD_INPUT; + } else if (c == 0x8E && p < e) { + /* Kana */ + unsigned char c2 = *p++; + if (c2 >= 0xA1 && c2 <= 0xDF) { + *out++ = 0xFEC0 + c2; + } else { + *out++ = MBFL_BAD_INPUT; + } + } else if (c == 0x8F && p < e) { + unsigned char c2 = *p++; + if ((c2 == 0xA1 || (c2 >= 0xA3 && c2 <= 0xA5) || c2 == 0xA8 || (c2 >= 0xAC && c2 <= 0xAF) || (c2 >= 0xEE && c2 <= 0xFE)) && p < e) { + unsigned char c3 = *p++; + + if (c3 < 0xA1 || c3 == 0xFF) { + *out++ = MBFL_BAD_INPUT; + continue; + } + + unsigned int s1 = c2 - 0xA1, s2 = c3 - 0xA1; + + if (((s1 <= 4 && s1 != 1) || s1 == 7 || (s1 >= 11 && s1 <= 14) || (s1 >= 77 && s1 < 94)) && s2 < 94) { + int k; + for (k = 0; k < jisx0213_p2_ofst_len; k++) { + if (s1 == jisx0213_p2_ofst[k]) { + break; + } + } + k -= jisx0213_p2_ofst[k]; + + /* Check for Japanese chars in BMP */ + unsigned int s = (s1 + 94 + k)*94 + s2; + ZEND_ASSERT(s < jisx0213_ucs_table_size); + unsigned int w = jisx0213_ucs_table[s]; + + /* Check for Japanese chars in CJK Unified Ideographs ext B (U+2XXXX) */ + if (!w) { + k = mbfl_bisec_srch2(((c2 - 0x80 + k + 94) << 8) | (c3 - 0x80), jisx0213_jis_u5_key, jisx0213_u5_tbl_len); + if (k >= 0) { + w = jisx0213_jis_u5_tbl[k] + 0x20000; + } + } + + *out++ = w ? w : MBFL_BAD_INPUT; + } else { + *out++ = MBFL_BAD_INPUT; + } + } else { + *out++ = MBFL_BAD_INPUT; + } + } else { + *out++ = MBFL_BAD_INPUT; + } + } + + *in_len = e - p; + *in = p; + return out - buf; +} + +static void mb_wchar_to_eucjp2004(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) +{ + unsigned char *out, *limit; + MB_CONVERT_BUF_LOAD(buf, out, limit); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len); + + uint32_t w; + if (buf->state) { + w = buf->state; + buf->state = 0; + goto process_codepoint; + } + + while (len--) { + w = *in++; +process_codepoint: ; + unsigned int s = 0; + + /* Check for 1st char of combining characters */ + if (w == 0xE6 || (w >= 0x254 && w <= 0x2E9) || (w >= 0x304B && w <= 0x3053) || (w >= 0x30AB && w <= 0x30C8) || w == 0x31F7) { + for (int k = 0; k < jisx0213_u2_tbl_len; k++) { + if (w == jisx0213_u2_tbl[2*k]) { + if (!len) { + if (!end) { + buf->state = w; + MB_CONVERT_BUF_STORE(buf, out, limit); + return; + } + } else { + uint32_t w2 = *in++; len--; + if ((w == 0x254 || w == 0x28C || w == 0x259 || w == 0x25A) && w2 == 0x301) { + k++; + } + if (w2 == jisx0213_u2_tbl[2*k+1]) { + s = jisx0213_u2_key[k]; + break; + } + in--; len++; + } + + /* Fallback */ + s = jisx0213_u2_fb_tbl[k]; + break; + } + } + } + + /* Check for major Japanese chars: U+4E00-U+9FFF */ + if (!s) { + for (int k = 0; k < uni2jis_tbl_len; k++) { + if (w >= uni2jis_tbl_range[k][0] && w <= uni2jis_tbl_range[k][1]) { + s = uni2jis_tbl[k][w - uni2jis_tbl_range[k][0]]; + break; + } + } + } + + /* Check for Japanese chars in compressed mapping area: U+1E00-U+4DBF */ + if (!s && w >= ucs_c1_jisx0213_min && w <= ucs_c1_jisx0213_max) { + int k = mbfl_bisec_srch(w, ucs_c1_jisx0213_tbl, ucs_c1_jisx0213_tbl_len); + if (k >= 0) { + s = ucs_c1_jisx0213_ofst[k] + w - ucs_c1_jisx0213_tbl[2*k]; + } + } + + /* Check for Japanese chars in CJK Unified Ideographs ext.B (U+2XXXX) */ + if (!s && w >= jisx0213_u5_tbl_min && w <= jisx0213_u5_tbl_max) { + int k = mbfl_bisec_srch2(w - 0x20000, jisx0213_u5_jis_key, jisx0213_u5_tbl_len); + if (k >= 0) { + s = jisx0213_u5_jis_tbl[k]; + } + } + + if (!s) { + /* CJK Compatibility Forms: U+FE30-U+FE4F */ + if (w == 0xFE45) { + s = 0x233E; + } else if (w == 0xFE46) { + s = 0x233D; + } else if (w >= 0xF91D && w <= 0xF9DC) { + /* CJK Compatibility Ideographs: U+F900-U+F92A */ + int k = mbfl_bisec_srch2(w, ucs_r2b_jisx0213_cmap_key, ucs_r2b_jisx0213_cmap_len); + if (k >= 0) { + s = ucs_r2b_jisx0213_cmap_val[k]; + } + } + } + + if (!s && w) { + MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_eucjp2004); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len); + } else if (s <= 0x7F) { + out = mb_convert_buf_add(out, s); + } else if (s <= 0xFF) { + MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2); + out = mb_convert_buf_add2(out, 0x8E, s); + } else if (s <= 0x7EFF) { + MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2); + out = mb_convert_buf_add2(out, ((s >> 8) & 0xFF) + 0x80, (s & 0xFF) + 0x80); + } else { + unsigned int s2 = s & 0xFF; + int k = ((s >> 8) & 0xFF) - 0x7F; + ZEND_ASSERT(k < jisx0213_p2_ofst_len); + s = jisx0213_p2_ofst[k] + 0x21; + MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 3); + out = mb_convert_buf_add3(out, 0x8F, s | 0x80, s2 | 0x80); + } + } + + MB_CONVERT_BUF_STORE(buf, out, limit); +} + +static int mbfl_filt_conv_euccn_wchar(int c, mbfl_convert_filter *filter) +{ + int c1, w; + + switch (filter->status) { + case 0: + if (c >= 0 && c < 0x80) { /* latin */ + CK((*filter->output_function)(c, filter->data)); + } else if ((c >= 0xA1 && c <= 0xA9) || (c >= 0xB0 && c <= 0xF7)) { /* dbcs lead byte */ + filter->status = 1; + filter->cache = c; + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + case 1: /* dbcs second byte */ + filter->status = 0; + c1 = filter->cache; + if (c > 0xA0 && c < 0xFF) { + w = (c1 - 0x81)*192 + c - 0x40; + ZEND_ASSERT(w < cp936_ucs_table_size); + if (w == 0x1864) { + w = 0x30FB; + } else if (w == 0x186A) { + w = 0x2015; + } else if ((w >= 0x1921 && w <= 0x192A) || w == 0x1963 || (w >= 0x1C59 && w <= 0x1C7E) || (w >= 0x1DBB && w <= 0x1DC4)) { + w = 0; + } else { + w = cp936_ucs_table[w]; + } + + if (w <= 0) { + w = MBFL_BAD_INPUT; + } + + CK((*filter->output_function)(w, filter->data)); + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + EMPTY_SWITCH_DEFAULT_CASE(); + } + + return 0; +} + +static int mbfl_filt_conv_wchar_euccn(int c, mbfl_convert_filter *filter) +{ + int s = 0; + + if (c >= ucs_a1_cp936_table_min && c < ucs_a1_cp936_table_max) { + if (c == 0xB7 || c == 0x144 || c == 0x148 || c == 0x251 || c == 0x261) { + s = 0; + } else { + s = ucs_a1_cp936_table[c - ucs_a1_cp936_table_min]; + } + } else if (c >= ucs_a2_cp936_table_min && c < ucs_a2_cp936_table_max) { + if (c == 0x2015) { + s = 0xA1AA; + } else if (c == 0x2014 || (c >= 0x2170 && c <= 0x2179)) { + s = 0; + } else { + s = ucs_a2_cp936_table[c - ucs_a2_cp936_table_min]; + } + } else if (c >= ucs_a3_cp936_table_min && c < ucs_a3_cp936_table_max) { + if (c == 0x30FB) { + s = 0xA1A4; + } else { + s = ucs_a3_cp936_table[c - ucs_a3_cp936_table_min]; + } + } else if (c >= ucs_i_cp936_table_min && c < ucs_i_cp936_table_max) { + s = ucs_i_cp936_table[c - ucs_i_cp936_table_min]; + } else if (c >= ucs_hff_cp936_table_min && c < ucs_hff_cp936_table_max) { + if (c == 0xFF04) { + s = 0xA1E7; + } else if (c == 0xFF5E) { + s = 0xA1AB; + } else if (c >= 0xFF01 && c <= 0xFF5D) { + s = c - 0xFF01 + 0xA3A1; + } else if (c >= 0xFFE0 && c <= 0xFFE5) { + s = ucs_hff_s_cp936_table[c - 0xFFE0]; + } + } + + /* exclude CP936 extensions */ + if (((s >> 8) & 0xFF) < 0xA1 || (s & 0xFF) < 0xA1) { + s = 0; + } + + if (s <= 0) { + if (c < 0x80) { + s = c; + } else if (s <= 0) { + s = -1; + } + } + + if (s >= 0) { + if (s < 0x80) { /* latin */ + CK((*filter->output_function)(s, filter->data)); + } else { + CK((*filter->output_function)((s >> 8) & 0xFF, filter->data)); + CK((*filter->output_function)(s & 0xFF, filter->data)); + } + } else { + CK(mbfl_filt_conv_illegal_output(c, filter)); + } + + return 0; +} + +static int mbfl_filt_conv_euccn_wchar_flush(mbfl_convert_filter *filter) +{ + if (filter->status == 1) { + /* 2-byte character was truncated */ + filter->status = 0; + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + + if (filter->flush_function) { + (*filter->flush_function)(filter->data); + } + + return 0; +} + +static size_t mb_euccn_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) +{ + unsigned char *p = *in, *e = p + *in_len; + uint32_t *out = buf, *limit = buf + bufsize; + + while (p < e && out < limit) { + unsigned char c = *p++; + + if (c < 0x80) { + *out++ = c; + } else if (((c >= 0xA1 && c <= 0xA9) || (c >= 0xB0 && c <= 0xF7)) && p < e) { + unsigned char c2 = *p++; + + if (c2 >= 0xA1 && c2 <= 0xFE) { + unsigned int w = (c - 0x81)*192 + c2 - 0x40; + ZEND_ASSERT(w < cp936_ucs_table_size); + if (w == 0x1864) { + w = 0x30FB; + } else if (w == 0x186A) { + w = 0x2015; + } else if ((w >= 0x1921 && w <= 0x192A) || w == 0x1963 || (w >= 0x1C59 && w <= 0x1C7E) || (w >= 0x1DBB && w <= 0x1DC4)) { + w = 0; + } else { + w = cp936_ucs_table[w]; + } + + if (!w) + w = MBFL_BAD_INPUT; + *out++ = w; + } else { + *out++ = MBFL_BAD_INPUT; + } + } else { + *out++ = MBFL_BAD_INPUT; + } + } + + *in_len = e - p; + *in = p; + return out - buf; +} + +static void mb_wchar_to_euccn(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) +{ + unsigned char *out, *limit; + MB_CONVERT_BUF_LOAD(buf, out, limit); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2); + + while (len--) { + uint32_t w = *in++; + unsigned int s = 0; + + if (w >= ucs_a1_cp936_table_min && w < ucs_a1_cp936_table_max) { + if (w != 0xB7 && w != 0x144 && w != 0x148 && w != 0x251 && w != 0x261) { + s = ucs_a1_cp936_table[w - ucs_a1_cp936_table_min]; + } + } else if (w >= ucs_a2_cp936_table_min && w < ucs_a2_cp936_table_max) { + if (w == 0x2015) { + s = 0xA1AA; + } else if (w != 0x2014 && (w < 0x2170 || w > 0x2179)) { + s = ucs_a2_cp936_table[w - ucs_a2_cp936_table_min]; + } + } else if (w >= ucs_a3_cp936_table_min && w < ucs_a3_cp936_table_max) { + if (w == 0x30FB) { + s = 0xA1A4; + } else { + s = ucs_a3_cp936_table[w - ucs_a3_cp936_table_min]; + } + } else if (w >= ucs_i_cp936_table_min && w < ucs_i_cp936_table_max) { + s = ucs_i_cp936_table[w - ucs_i_cp936_table_min]; + } else if (w >= ucs_hff_cp936_table_min && w < ucs_hff_cp936_table_max) { + if (w == 0xFF04) { + s = 0xA1E7; + } else if (w == 0xFF5E) { + s = 0xA1AB; + } else if (w >= 0xFF01 && w <= 0xFF5D) { + s = w - 0xFF01 + 0xA3A1; + } else if (w >= 0xFFE0 && w <= 0xFFE5) { + s = ucs_hff_s_cp936_table[w - 0xFFE0]; + } + } + + /* Exclude CP936 extensions */ + if (((s >> 8) & 0xFF) < 0xA1 || (s & 0xFF) < 0xA1) { + s = 0; + } + + if (!s) { + if (w < 0x80) { + out = mb_convert_buf_add(out, w); + } else { + MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_euccn); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2); + } + } else if (s < 0x80) { + out = mb_convert_buf_add(out, s); + } else { + out = mb_convert_buf_add2(out, (s >> 8) & 0xFF, s & 0xFF); + } + } + + MB_CONVERT_BUF_STORE(buf, out, limit); +} + +static int mbfl_filt_conv_euctw_wchar(int c, mbfl_convert_filter *filter) +{ + int c1, s, w; + + switch (filter->status) { + case 0: + if (c >= 0 && c < 0x80) { /* latin */ + CK((*filter->output_function)(c, filter->data)); + } else if (((c >= 0xA1 && c <= 0xA6) || (c >= 0xC2 && c <= 0xFD)) && c != 0xC3) { /* 2-byte character, first byte */ + filter->status = 1; + filter->cache = c; + } else if (c == 0x8E) { /* 4-byte character, first byte */ + filter->status = 2; + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + case 1: /* 2-byte character, second byte */ + filter->status = 0; + c1 = filter->cache; + if (c > 0xA0 && c < 0xFF) { + w = (c1 - 0xA1)*94 + (c - 0xA1); + if (w >= 0 && w < cns11643_1_ucs_table_size) { + w = cns11643_1_ucs_table[w]; + } else { + w = 0; + } + + if (w <= 0) { + w = MBFL_BAD_INPUT; + } + + CK((*filter->output_function)(w, filter->data)); + } else { + filter->status = filter->cache = 0; + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + case 2: /* got 0x8e, second byte */ + if (c == 0xA1 || c == 0xA2 || c == 0xAE) { + filter->status = 3; + filter->cache = c - 0xA1; + } else { + filter->status = filter->cache = 0; + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + case 3: /* got 0x8e, third byte */ + filter->status = 0; + c1 = filter->cache; + if (c >= 0xA1 && ((c1 == 0 && ((c >= 0xA1 && c <= 0xA6) || (c >= 0xC2 && c <= 0xFD)) && c != 0xC3) || + (c1 == 1 && c <= 0xF2) || (c1 == 13 && c <= 0xE7))) { + filter->status = 4; + filter->cache = (c1 << 8) + c - 0xA1; + } else { + filter->status = filter->cache = 0; + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + case 4: /* multi-byte character, fourth byte */ + filter->status = 0; + c1 = filter->cache; + if (c1 <= 0xDFF && c > 0xA0 && c < 0xFF) { + int plane = (c1 & 0xF00) >> 8; /* This is actually the CNS-11643 plane minus one */ + s = (c1 & 0xFF)*94 + c - 0xA1; + w = 0; + if (s >= 0) { + /* A later version of CNS-11643 moved all the characters in "plane 14" to "plane 3", + * and added tens of thousands more characters in planes 4, 5, 6, and 7 + * We only support the older version of CNS-11643 + * This is the same as iconv from glibc 2.2 */ + if (plane == 0 && s < cns11643_1_ucs_table_size) { + w = cns11643_1_ucs_table[s]; + } else if (plane == 1 && s < cns11643_2_ucs_table_size) { + w = cns11643_2_ucs_table[s]; + } else if (plane == 13 && s < cns11643_14_ucs_table_size) { + w = cns11643_14_ucs_table[s]; + } + } + + if (w <= 0) { + w = MBFL_BAD_INPUT; + } + + CK((*filter->output_function)(w, filter->data)); + } else { + filter->status = filter->cache = 0; + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + EMPTY_SWITCH_DEFAULT_CASE(); + } + + return 0; +} + +static int mbfl_filt_conv_wchar_euctw(int c, mbfl_convert_filter *filter) +{ + int s = 0; + + if (c >= ucs_a1_cns11643_table_min && c < ucs_a1_cns11643_table_max) { + s = ucs_a1_cns11643_table[c - ucs_a1_cns11643_table_min]; + } else if (c >= ucs_a2_cns11643_table_min && c < ucs_a2_cns11643_table_max) { + s = ucs_a2_cns11643_table[c - ucs_a2_cns11643_table_min]; + } else if (c >= ucs_a3_cns11643_table_min && c < ucs_a3_cns11643_table_max) { + s = ucs_a3_cns11643_table[c - ucs_a3_cns11643_table_min]; + } else if (c >= ucs_i_cns11643_table_min && c < ucs_i_cns11643_table_max) { + s = ucs_i_cns11643_table[c - ucs_i_cns11643_table_min]; + } else if (c >= ucs_r_cns11643_table_min && c < ucs_r_cns11643_table_max) { + s = ucs_r_cns11643_table[c - ucs_r_cns11643_table_min]; + } + + if (s <= 0) { + if (c == 0) { + s = 0; + } else if (s <= 0) { + s = -1; + } + } + + if (s >= 0) { + int plane = (s & 0x1F0000) >> 16; + if (plane <= 1) { + if (s < 0x80) { /* latin */ + CK((*filter->output_function)(s, filter->data)); + } else { + s = (s & 0xFFFF) | 0x8080; + CK((*filter->output_function)((s >> 8) & 0xFF, filter->data)); + CK((*filter->output_function)(s & 0xFF, filter->data)); + } + } else { + s = (0x8EA00000 + (plane << 16)) | ((s & 0xFFFF) | 0x8080); + CK((*filter->output_function)(0x8e , filter->data)); + CK((*filter->output_function)((s >> 16) & 0xFF, filter->data)); + CK((*filter->output_function)((s >> 8) & 0xFF, filter->data)); + CK((*filter->output_function)(s & 0xFF, filter->data)); + } + } else { + CK(mbfl_filt_conv_illegal_output(c, filter)); + } + return 0; +} + +static int mbfl_filt_conv_euctw_wchar_flush(mbfl_convert_filter *filter) +{ + if (filter->status) { + /* 2-byte or 4-byte character was truncated */ + filter->status = 0; + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + + if (filter->flush_function) { + (*filter->flush_function)(filter->data); + } + + return 0; +} + +static size_t mb_euctw_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) +{ + unsigned char *p = *in, *e = p + *in_len; + uint32_t *out = buf, *limit = buf + bufsize; + + while (p < e && out < limit) { + unsigned char c = *p++; + + if (c < 0x80) { + *out++ = c; + } else if (((c >= 0xA1 && c <= 0xA6) || (c >= 0xC2 && c <= 0xFD)) && c != 0xC3 && p < e) { + unsigned char c2 = *p++; + + if (c2 >= 0xA1 && c2 <= 0xFE) { + unsigned int w = (c - 0xA1)*94 + (c2 - 0xA1); + if (w < cns11643_1_ucs_table_size) { + w = cns11643_1_ucs_table[w]; + } else { + w = 0; + } + if (!w) + w = MBFL_BAD_INPUT; + *out++ = w; + } else { + *out++ = MBFL_BAD_INPUT; + } + } else if (c == 0x8E && p < e) { + unsigned char c2 = *p++; + + if ((c2 == 0xA1 || c2 == 0xA2 || c2 == 0xAE) && p < e) { + unsigned int plane = c2 - 0xA1; /* This is actually the CNS-11643 plane minus one */ + unsigned char c3 = *p++; + + if (c3 >= 0xA1 && ((plane == 0 && ((c3 >= 0xA1 && c3 <= 0xA6) || (c3 >= 0xC2 && c3 <= 0xFD)) && c3 != 0xC3) || (plane == 1 && c3 <= 0xF2) || (plane == 13 && c3 <= 0xE7)) && p < e) { + unsigned char c4 = *p++; + + if (c2 <= 0xAE && c4 > 0xA0 && c4 < 0xFF) { + unsigned int s = (c3 - 0xA1)*94 + c4 - 0xA1, w = 0; + + /* A later version of CNS-11643 moved all the characters in "plane 14" to "plane 3", + * and added tens of thousands more characters in planes 4, 5, 6, and 7 + * We only support the older version of CNS-11643 + * This is the same as iconv from glibc 2.2 */ + if (plane == 0 && s < cns11643_1_ucs_table_size) { + w = cns11643_1_ucs_table[s]; + } else if (plane == 1 && s < cns11643_2_ucs_table_size) { + w = cns11643_2_ucs_table[s]; + } else if (plane == 13 && s < cns11643_14_ucs_table_size) { + w = cns11643_14_ucs_table[s]; + } + + if (!w) + w = MBFL_BAD_INPUT; + *out++ = w; + continue; + } + } + } + + *out++ = MBFL_BAD_INPUT; + } else { + *out++ = MBFL_BAD_INPUT; + } + } + + *in_len = e - p; + *in = p; + return out - buf; +} + +static void mb_wchar_to_euctw(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) +{ + unsigned char *out, *limit; + MB_CONVERT_BUF_LOAD(buf, out, limit); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2); + + while (len--) { + uint32_t w = *in++; + unsigned int s = 0; + + if (w >= ucs_a1_cns11643_table_min && w < ucs_a1_cns11643_table_max) { + s = ucs_a1_cns11643_table[w - ucs_a1_cns11643_table_min]; + } else if (w >= ucs_a2_cns11643_table_min && w < ucs_a2_cns11643_table_max) { + s = ucs_a2_cns11643_table[w - ucs_a2_cns11643_table_min]; + } else if (w >= ucs_a3_cns11643_table_min && w < ucs_a3_cns11643_table_max) { + s = ucs_a3_cns11643_table[w - ucs_a3_cns11643_table_min]; + } else if (w >= ucs_i_cns11643_table_min && w < ucs_i_cns11643_table_max) { + s = ucs_i_cns11643_table[w - ucs_i_cns11643_table_min]; + } else if (w >= ucs_r_cns11643_table_min && w < ucs_r_cns11643_table_max) { + s = ucs_r_cns11643_table[w - ucs_r_cns11643_table_min]; + } + + if (!s) { + if (w == 0) { + out = mb_convert_buf_add(out, 0); + } else { + MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_euctw); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2); + } + } else { + unsigned int plane = s >> 16; + if (plane <= 1) { + if (s < 0x80) { + out = mb_convert_buf_add(out, s); + } else { + out = mb_convert_buf_add2(out, ((s >> 8) & 0xFF) | 0x80, (s & 0xFF) | 0x80); + } + } else { + MB_CONVERT_BUF_ENSURE(buf, out, limit, (len * 2) + 4); + out = mb_convert_buf_add4(out, 0x8E, 0xA0 + plane, ((s >> 8) & 0xFF) | 0x80, (s & 0xFF) | 0x80); + } + } + } + + MB_CONVERT_BUF_STORE(buf, out, limit); +} + +static int mbfl_filt_conv_euckr_wchar(int c, mbfl_convert_filter *filter) +{ + int c1, w, flag; + + switch (filter->status) { + case 0: + if (c >= 0 && c < 0x80) { /* latin */ + CK((*filter->output_function)(c, filter->data)); + } else if (((c >= 0xA1 && c <= 0xAC) || (c >= 0xB0 && c <= 0xFD)) && c != 0xC9) { /* dbcs lead byte */ + filter->status = 1; + filter->cache = c; + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + case 1: /* dbcs second byte */ + filter->status = 0; + c1 = filter->cache; + flag = 0; + if (c1 >= 0xa1 && c1 <= 0xc6) { + flag = 1; + } else if (c1 >= 0xc7 && c1 <= 0xfe && c1 != 0xc9) { + flag = 2; + } + if (flag > 0 && c >= 0xa1 && c <= 0xfe) { + if (flag == 1) { /* 1st: 0xa1..0xc6, 2nd: 0x41..0x7a, 0x81..0xfe */ + w = (c1 - 0x81)*190 + c - 0x41; + ZEND_ASSERT(w < uhc1_ucs_table_size); + w = uhc1_ucs_table[w]; + } else { /* 1st: 0xc7..0xc8,0xca..0xfe, 2nd: 0xa1..0xfe */ + w = (c1 - 0xc7)*94 + c - 0xa1; + ZEND_ASSERT(w < uhc3_ucs_table_size); + w = uhc3_ucs_table[w]; + } + + if (w <= 0) { + w = MBFL_BAD_INPUT; + } + CK((*filter->output_function)(w, filter->data)); + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + EMPTY_SWITCH_DEFAULT_CASE(); + } + + return 0; +} + +static int mbfl_filt_conv_wchar_euckr(int c, mbfl_convert_filter *filter) +{ + int s = 0; + + if (c >= ucs_a1_uhc_table_min && c < ucs_a1_uhc_table_max) { + s = ucs_a1_uhc_table[c - ucs_a1_uhc_table_min]; + } else if (c >= ucs_a2_uhc_table_min && c < ucs_a2_uhc_table_max) { + s = ucs_a2_uhc_table[c - ucs_a2_uhc_table_min]; + } else if (c >= ucs_a3_uhc_table_min && c < ucs_a3_uhc_table_max) { + s = ucs_a3_uhc_table[c - ucs_a3_uhc_table_min]; + } else if (c >= ucs_i_uhc_table_min && c < ucs_i_uhc_table_max) { + s = ucs_i_uhc_table[c - ucs_i_uhc_table_min]; + } else if (c >= ucs_s_uhc_table_min && c < ucs_s_uhc_table_max) { + s = ucs_s_uhc_table[c - ucs_s_uhc_table_min]; + } else if (c >= ucs_r1_uhc_table_min && c < ucs_r1_uhc_table_max) { + s = ucs_r1_uhc_table[c - ucs_r1_uhc_table_min]; + } else if (c >= ucs_r2_uhc_table_min && c < ucs_r2_uhc_table_max) { + s = ucs_r2_uhc_table[c - ucs_r2_uhc_table_min]; + } + + /* exclude UHC extension area (although we are using the UHC conversion tables) */ + if (((s >> 8) & 0xFF) < 0xA1 || (s & 0xFF) < 0xA1) { + s = 0; + } + + if (s <= 0) { + if (c < 0x80) { + s = c; + } else { + s = -1; + } + } + + if (s >= 0) { + if (s < 0x80) { /* latin */ + CK((*filter->output_function)(s, filter->data)); + } else { + CK((*filter->output_function)((s >> 8) & 0xff, filter->data)); + CK((*filter->output_function)(s & 0xff, filter->data)); + } + } else { + CK(mbfl_filt_conv_illegal_output(c, filter)); + } + + return 0; +} + +static int mbfl_filt_conv_euckr_wchar_flush(mbfl_convert_filter *filter) +{ + if (filter->status == 1) { + /* 2-byte character was truncated */ + filter->status = 0; + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + + if (filter->flush_function) { + (*filter->flush_function)(filter->data); + } + + return 0; +} + +static size_t mb_euckr_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) +{ + unsigned char *p = *in, *e = p + *in_len; + uint32_t *out = buf, *limit = buf + bufsize; + + while (p < e && out < limit) { + unsigned char c = *p++; + + if (c < 0x80) { + *out++ = c; + } else if (((c >= 0xA1 && c <= 0xAC) || (c >= 0xB0 && c <= 0xFD)) && c != 0xC9 && p < e) { + unsigned char c2 = *p++; + if (c2 < 0xA1 || c2 == 0xFF) { + *out++ = MBFL_BAD_INPUT; + continue; + } + + if (c <= 0xC6) { + unsigned int w = (c - 0x81)*190 + c2 - 0x41; + ZEND_ASSERT(w < uhc1_ucs_table_size); + w = uhc1_ucs_table[w]; + if (!w) + w = MBFL_BAD_INPUT; + *out++ = w; + } else { + unsigned int w = (c - 0xC7)*94 + c2 - 0xA1; + ZEND_ASSERT(w < uhc3_ucs_table_size); + w = uhc3_ucs_table[w]; + if (!w) + w = MBFL_BAD_INPUT; + *out++ = w; + } + } else { + *out++ = MBFL_BAD_INPUT; + } + } + + *in_len = e - p; + *in = p; + return out - buf; +} + +static void mb_wchar_to_euckr(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) +{ + unsigned char *out, *limit; + MB_CONVERT_BUF_LOAD(buf, out, limit); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len); + + while (len--) { + uint32_t w = *in++; + unsigned int s = 0; + + if (w >= ucs_a1_uhc_table_min && w < ucs_a1_uhc_table_max) { + s = ucs_a1_uhc_table[w - ucs_a1_uhc_table_min]; + } else if (w >= ucs_a2_uhc_table_min && w < ucs_a2_uhc_table_max) { + s = ucs_a2_uhc_table[w - ucs_a2_uhc_table_min]; + } else if (w >= ucs_a3_uhc_table_min && w < ucs_a3_uhc_table_max) { + s = ucs_a3_uhc_table[w - ucs_a3_uhc_table_min]; + } else if (w >= ucs_i_uhc_table_min && w < ucs_i_uhc_table_max) { + s = ucs_i_uhc_table[w - ucs_i_uhc_table_min]; + } else if (w >= ucs_s_uhc_table_min && w < ucs_s_uhc_table_max) { + s = ucs_s_uhc_table[w - ucs_s_uhc_table_min]; + } else if (w >= ucs_r1_uhc_table_min && w < ucs_r1_uhc_table_max) { + s = ucs_r1_uhc_table[w - ucs_r1_uhc_table_min]; + } else if (w >= ucs_r2_uhc_table_min && w < ucs_r2_uhc_table_max) { + s = ucs_r2_uhc_table[w - ucs_r2_uhc_table_min]; + } + + /* Exclude UHC extension area (although we are using the UHC conversion tables) */ + if (((s >> 8) & 0xFF) < 0xA1 || (s & 0xFF) < 0xA1) { + s = 0; + } + + if (!s) { + if (w < 0x80) { + out = mb_convert_buf_add(out, w); + } else { + MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_euckr); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len); + } + } else if (s < 0x80) { + out = mb_convert_buf_add(out, s); + } else { + MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2); + out = mb_convert_buf_add2(out, (s >> 8) & 0xFF, s & 0xFF); + } + } + + MB_CONVERT_BUF_STORE(buf, out, limit); +} + +static int mbfl_filt_conv_uhc_wchar(int c, mbfl_convert_filter *filter) +{ + switch (filter->status) { + case 0: + if (c >= 0 && c < 0x80) { /* latin */ + CK((*filter->output_function)(c, filter->data)); + } else if (c > 0x80 && c < 0xfe && c != 0xc9) { /* dbcs lead byte */ + filter->status = 1; + filter->cache = c; + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + case 1: /* dbcs second byte */ + filter->status = 0; + int c1 = filter->cache, w = 0; + + if (c1 >= 0x81 && c1 <= 0xc6 && c >= 0x41 && c <= 0xfe) { + w = (c1 - 0x81)*190 + (c - 0x41); + if (w >= 0 && w < uhc1_ucs_table_size) { + w = uhc1_ucs_table[w]; + } + } else if (c1 >= 0xc7 && c1 < 0xfe && c >= 0xa1 && c <= 0xfe) { + w = (c1 - 0xc7)*94 + (c - 0xa1); + if (w >= 0 && w < uhc3_ucs_table_size) { + w = uhc3_ucs_table[w]; + } + } + + if (w == 0) { + w = MBFL_BAD_INPUT; + } + CK((*filter->output_function)(w, filter->data)); + break; + + EMPTY_SWITCH_DEFAULT_CASE(); + } + + return 0; +} + +static int mbfl_filt_conv_uhc_wchar_flush(mbfl_convert_filter *filter) +{ + if (filter->status == 1) { + /* 2-byte character was truncated */ + filter->status = 0; + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + + if (filter->flush_function) { + (*filter->flush_function)(filter->data); + } + + return 0; +} + +static int mbfl_filt_conv_wchar_uhc(int c, mbfl_convert_filter *filter) +{ + int s = 0; + + if (c >= ucs_a1_uhc_table_min && c < ucs_a1_uhc_table_max) { + s = ucs_a1_uhc_table[c - ucs_a1_uhc_table_min]; + } else if (c >= ucs_a2_uhc_table_min && c < ucs_a2_uhc_table_max) { + s = ucs_a2_uhc_table[c - ucs_a2_uhc_table_min]; + } else if (c >= ucs_a3_uhc_table_min && c < ucs_a3_uhc_table_max) { + s = ucs_a3_uhc_table[c - ucs_a3_uhc_table_min]; + } else if (c >= ucs_i_uhc_table_min && c < ucs_i_uhc_table_max) { + s = ucs_i_uhc_table[c - ucs_i_uhc_table_min]; + } else if (c >= ucs_s_uhc_table_min && c < ucs_s_uhc_table_max) { + s = ucs_s_uhc_table[c - ucs_s_uhc_table_min]; + } else if (c >= ucs_r1_uhc_table_min && c < ucs_r1_uhc_table_max) { + s = ucs_r1_uhc_table[c - ucs_r1_uhc_table_min]; + } else if (c >= ucs_r2_uhc_table_min && c < ucs_r2_uhc_table_max) { + s = ucs_r2_uhc_table[c - ucs_r2_uhc_table_min]; + } + + if (s == 0 && c != 0) { + s = -1; + } + + if (s >= 0) { + if (s < 0x80) { /* latin */ + CK((*filter->output_function)(s, filter->data)); + } else { + CK((*filter->output_function)((s >> 8) & 0xff, filter->data)); + CK((*filter->output_function)(s & 0xff, filter->data)); + } + } else { + CK(mbfl_filt_conv_illegal_output(c, filter)); + } + + return 0; +} + +static size_t mb_uhc_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) +{ + unsigned char *p = *in, *e = p + *in_len; + uint32_t *out = buf, *limit = buf + bufsize; + + e--; /* Stop the main loop 1 byte short of the end of the input */ + + while (p < e && out < limit) { + unsigned char c = *p++; + + if (c < 0x80) { + *out++ = c; + } else if (c > 0x80 && c < 0xFE) { + /* We don't need to check p < e here; it's not possible that this pointer dereference + * will be outside the input string, because of e-- above */ + unsigned char c2 = *p++; + if (c2 < 0x41 || c2 == 0xFF) { + *out++ = MBFL_BAD_INPUT; + continue; + } + unsigned int w = 0; + + if (c <= 0xC6) { + w = (c - 0x81)*190 + c2 - 0x41; + ZEND_ASSERT(w < uhc1_ucs_table_size); + w = uhc1_ucs_table[w]; + } else if (c2 >= 0xA1) { + w = (c - 0xC7)*94 + c2 - 0xA1; + ZEND_ASSERT(w < uhc3_ucs_table_size); + w = uhc3_ucs_table[w]; + if (!w) { + /* If c == 0xC9, we shouldn't have tried to read a 2-byte char at all... but it is faster + * to fix up that rare case here rather than include an extra check in the hot path */ + if (c == 0xC9) { + p--; + } + *out++ = MBFL_BAD_INPUT; + continue; + } + } + if (!w) { + w = MBFL_BAD_INPUT; + } + *out++ = w; + } else { + *out++ = MBFL_BAD_INPUT; + } + } + + /* Finish up last byte of input string if there is one */ + if (p == e && out < limit) { + unsigned char c = *p++; + *out++ = (c < 0x80) ? c : MBFL_BAD_INPUT; + } + + *in_len = e - p + 1; + *in = p; + return out - buf; +} + +static void mb_wchar_to_uhc(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) +{ + unsigned char *out, *limit; + MB_CONVERT_BUF_LOAD(buf, out, limit); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len); + + while (len--) { + uint32_t w = *in++; + unsigned int s = 0; + + if (w >= ucs_a1_uhc_table_min && w < ucs_a1_uhc_table_max) { + s = ucs_a1_uhc_table[w - ucs_a1_uhc_table_min]; + } else if (w >= ucs_a2_uhc_table_min && w < ucs_a2_uhc_table_max) { + s = ucs_a2_uhc_table[w - ucs_a2_uhc_table_min]; + } else if (w >= ucs_a3_uhc_table_min && w < ucs_a3_uhc_table_max) { + s = ucs_a3_uhc_table[w - ucs_a3_uhc_table_min]; + } else if (w >= ucs_i_uhc_table_min && w < ucs_i_uhc_table_max) { + s = ucs_i_uhc_table[w - ucs_i_uhc_table_min]; + } else if (w >= ucs_s_uhc_table_min && w < ucs_s_uhc_table_max) { + s = ucs_s_uhc_table[w - ucs_s_uhc_table_min]; + } else if (w >= ucs_r1_uhc_table_min && w < ucs_r1_uhc_table_max) { + s = ucs_r1_uhc_table[w - ucs_r1_uhc_table_min]; + } else if (w >= ucs_r2_uhc_table_min && w < ucs_r2_uhc_table_max) { + s = ucs_r2_uhc_table[w - ucs_r2_uhc_table_min]; + } + + if (!s) { + if (w == 0) { + out = mb_convert_buf_add(out, 0); + } else { + MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_uhc); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len); + } + } else if (s < 0x80) { + out = mb_convert_buf_add(out, s); + } else { + MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2); + out = mb_convert_buf_add2(out, (s >> 8) & 0xFF, s & 0xFF); + } + } + + MB_CONVERT_BUF_STORE(buf, out, limit); +} + +static const unsigned char mblen_table_eucjp[] = { /* 0xA1-0xFE */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 +}; + +static const char *mbfl_encoding_euc_jp_aliases[] = {"EUC", "EUC_JP", "eucJP", "x-euc-jp", NULL}; + +static const struct mbfl_convert_vtbl vtbl_eucjp_wchar = { + mbfl_no_encoding_euc_jp, + mbfl_no_encoding_wchar, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_eucjp_wchar, + mbfl_filt_conv_eucjp_wchar_flush, + NULL, +}; + +static const struct mbfl_convert_vtbl vtbl_wchar_eucjp = { + mbfl_no_encoding_wchar, + mbfl_no_encoding_euc_jp, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_wchar_eucjp, + mbfl_filt_conv_common_flush, + NULL, +}; + +const mbfl_encoding mbfl_encoding_euc_jp = { + mbfl_no_encoding_euc_jp, + "EUC-JP", + "EUC-JP", + mbfl_encoding_euc_jp_aliases, + mblen_table_eucjp, + 0, + &vtbl_eucjp_wchar, + &vtbl_wchar_eucjp, + mb_eucjp_to_wchar, + mb_wchar_to_eucjp, + NULL +}; + +static const char *mbfl_encoding_eucjp2004_aliases[] = {"EUC_JP-2004", NULL}; + +static const struct mbfl_convert_vtbl vtbl_eucjp2004_wchar = { + mbfl_no_encoding_eucjp2004, + mbfl_no_encoding_wchar, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_jis2004_wchar, + mbfl_filt_conv_jis2004_wchar_flush, + NULL, +}; + +static const struct mbfl_convert_vtbl vtbl_wchar_eucjp2004 = { + mbfl_no_encoding_wchar, + mbfl_no_encoding_eucjp2004, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_wchar_jis2004, + mbfl_filt_conv_wchar_jis2004_flush, + NULL, +}; + +const mbfl_encoding mbfl_encoding_eucjp2004 = { + mbfl_no_encoding_eucjp2004, + "EUC-JP-2004", + "EUC-JP", + mbfl_encoding_eucjp2004_aliases, + mblen_table_eucjp, + 0, + &vtbl_eucjp2004_wchar, + &vtbl_wchar_eucjp2004, + mb_eucjp2004_to_wchar, + mb_wchar_to_eucjp2004, + NULL +}; + +static const char *mbfl_encoding_eucjp_win_aliases[] = {"eucJP-open", "eucJP-ms", NULL}; + +static const struct mbfl_convert_vtbl vtbl_eucjpwin_wchar = { + mbfl_no_encoding_eucjp_win, + mbfl_no_encoding_wchar, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_eucjpwin_wchar, + mbfl_filt_conv_eucjpwin_wchar_flush, + NULL, +}; + +static const struct mbfl_convert_vtbl vtbl_wchar_eucjpwin = { + mbfl_no_encoding_wchar, + mbfl_no_encoding_eucjp_win, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_wchar_eucjpwin, + mbfl_filt_conv_common_flush, + NULL, +}; + +const mbfl_encoding mbfl_encoding_eucjp_win = { + mbfl_no_encoding_eucjp_win, + "eucJP-win", + "EUC-JP", + mbfl_encoding_eucjp_win_aliases, + mblen_table_eucjp, + 0, + &vtbl_eucjpwin_wchar, + &vtbl_wchar_eucjpwin, + mb_eucjpwin_to_wchar, + mb_wchar_to_eucjpwin, + NULL +}; + +static const char *mbfl_encoding_cp51932_aliases[] = {"cp51932", NULL}; + +static const struct mbfl_convert_vtbl vtbl_cp51932_wchar = { + mbfl_no_encoding_cp51932, + mbfl_no_encoding_wchar, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_cp51932_wchar, + mbfl_filt_conv_cp51932_wchar_flush, + NULL, +}; + +static const struct mbfl_convert_vtbl vtbl_wchar_cp51932 = { + mbfl_no_encoding_wchar, + mbfl_no_encoding_cp51932, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_wchar_cp51932, + mbfl_filt_conv_common_flush, + NULL, +}; + +const mbfl_encoding mbfl_encoding_cp51932 = { + mbfl_no_encoding_cp51932, + "CP51932", + "CP51932", + mbfl_encoding_cp51932_aliases, + mblen_table_eucjp, + 0, + &vtbl_cp51932_wchar, + &vtbl_wchar_cp51932, + mb_cp51932_to_wchar, + mb_wchar_to_cp51932, + NULL +}; + +static const unsigned char mblen_table_euccn[] = { /* 0xA1-0xFE */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 +}; + +static const char *mbfl_encoding_euc_cn_aliases[] = {"CN-GB", "EUC_CN", "eucCN", "x-euc-cn", "gb2312", NULL}; + +static const struct mbfl_convert_vtbl vtbl_euccn_wchar = { + mbfl_no_encoding_euc_cn, + mbfl_no_encoding_wchar, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_euccn_wchar, + mbfl_filt_conv_euccn_wchar_flush, + NULL, +}; + +static const struct mbfl_convert_vtbl vtbl_wchar_euccn = { + mbfl_no_encoding_wchar, + mbfl_no_encoding_euc_cn, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_wchar_euccn, + mbfl_filt_conv_common_flush, + NULL, +}; + +const mbfl_encoding mbfl_encoding_euc_cn = { + mbfl_no_encoding_euc_cn, + "EUC-CN", + "CN-GB", + mbfl_encoding_euc_cn_aliases, + mblen_table_euccn, + 0, + &vtbl_euccn_wchar, + &vtbl_wchar_euccn, + mb_euccn_to_wchar, + mb_wchar_to_euccn, + NULL +}; + +static const char *mbfl_encoding_euc_tw_aliases[] = {"EUC_TW", "eucTW", "x-euc-tw", NULL}; + +static const struct mbfl_convert_vtbl vtbl_euctw_wchar = { + mbfl_no_encoding_euc_tw, + mbfl_no_encoding_wchar, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_euctw_wchar, + mbfl_filt_conv_euctw_wchar_flush, + NULL, +}; + +static const struct mbfl_convert_vtbl vtbl_wchar_euctw = { + mbfl_no_encoding_wchar, + mbfl_no_encoding_euc_tw, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_wchar_euctw, + mbfl_filt_conv_common_flush, + NULL, +}; + +const mbfl_encoding mbfl_encoding_euc_tw = { + mbfl_no_encoding_euc_tw, + "EUC-TW", + "EUC-TW", + mbfl_encoding_euc_tw_aliases, + mblen_table_euccn, + 0, + &vtbl_euctw_wchar, + &vtbl_wchar_euctw, + mb_euctw_to_wchar, + mb_wchar_to_euctw, + NULL +}; + +static const char *mbfl_encoding_euc_kr_aliases[] = {"EUC_KR", "eucKR", "x-euc-kr", NULL}; + +static const struct mbfl_convert_vtbl vtbl_euckr_wchar = { + mbfl_no_encoding_euc_kr, + mbfl_no_encoding_wchar, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_euckr_wchar, + mbfl_filt_conv_euckr_wchar_flush, + NULL, +}; + +static const struct mbfl_convert_vtbl vtbl_wchar_euckr = { + mbfl_no_encoding_wchar, + mbfl_no_encoding_euc_kr, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_wchar_euckr, + mbfl_filt_conv_common_flush, + NULL, +}; + +const mbfl_encoding mbfl_encoding_euc_kr = { + mbfl_no_encoding_euc_kr, + "EUC-KR", + "EUC-KR", + mbfl_encoding_euc_kr_aliases, + mblen_table_euccn, + 0, + &vtbl_euckr_wchar, + &vtbl_wchar_euckr, + mb_euckr_to_wchar, + mb_wchar_to_euckr, + NULL +}; + +/* UHC was introduced by MicroSoft in Windows 95, and is also known as CP949. + * It is the same as EUC-KR, but with 8,822 additional characters added to + * complete all the characters in the Johab charset. */ + +static const unsigned char mblen_table_81_to_fe[] = { /* 0x81-0xFE */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 +}; + +static const char *mbfl_encoding_uhc_aliases[] = {"CP949", NULL}; + +static const struct mbfl_convert_vtbl vtbl_uhc_wchar = { + mbfl_no_encoding_uhc, + mbfl_no_encoding_wchar, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_uhc_wchar, + mbfl_filt_conv_uhc_wchar_flush, + NULL, +}; + +static const struct mbfl_convert_vtbl vtbl_wchar_uhc = { + mbfl_no_encoding_wchar, + mbfl_no_encoding_uhc, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_wchar_uhc, + mbfl_filt_conv_common_flush, + NULL, +}; + +const mbfl_encoding mbfl_encoding_uhc = { + mbfl_no_encoding_uhc, + "UHC", + "UHC", + mbfl_encoding_uhc_aliases, + mblen_table_81_to_fe, + 0, + &vtbl_uhc_wchar, + &vtbl_wchar_uhc, + mb_uhc_to_wchar, + mb_wchar_to_uhc, + NULL +}; + +/* + * GB18030/CP936 + */ + +static int mbfl_filt_conv_gb18030_wchar(int c, mbfl_convert_filter *filter) +{ + int k; + int c1, c2, c3, w = -1; + + switch (filter->status) { + case 0: + if (c >= 0 && c < 0x80) { /* latin */ + CK((*filter->output_function)(c, filter->data)); + } else if (c > 0x80 && c < 0xff) { /* dbcs/qbcs lead byte */ + filter->status = 1; + filter->cache = c; + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + case 1: /* dbcs/qbcs second byte */ + c1 = filter->cache; + filter->status = 0; + + if (c1 >= 0x81 && c1 <= 0x84 && c >= 0x30 && c <= 0x39) { + /* 4 byte range: Unicode BMP */ + filter->status = 2; + filter->cache = (c1 << 8) | c; + return 0; + } else if (c1 >= 0x90 && c1 <= 0xe3 && c >= 0x30 && c <= 0x39) { + /* 4 byte range: Unicode 16 planes */ + filter->status = 2; + filter->cache = (c1 << 8) | c; + return 0; + } else if (((c1 >= 0xaa && c1 <= 0xaf) || (c1 >= 0xf8 && c1 <= 0xfe)) && (c >= 0xa1 && c <= 0xfe)) { + /* UDA part 1,2: U+E000-U+E4C5 */ + w = 94*(c1 >= 0xf8 ? c1 - 0xf2 : c1 - 0xaa) + (c - 0xa1) + 0xe000; + CK((*filter->output_function)(w, filter->data)); + } else if (c1 >= 0xa1 && c1 <= 0xa7 && c >= 0x40 && c < 0xa1 && c != 0x7f) { + /* UDA part3 : U+E4C6-U+E765*/ + w = 96*(c1 - 0xa1) + c - (c >= 0x80 ? 0x41 : 0x40) + 0xe4c6; + CK((*filter->output_function)(w, filter->data)); + } + + c2 = (c1 << 8) | c; + + if (w <= 0 && + ((c2 >= 0xa2ab && c2 <= 0xa9f0 + (0xe80f-0xe801)) || + (c2 >= 0xd7fa && c2 <= 0xd7fa + (0xe814-0xe810)) || + (c2 >= 0xfe50 && c2 <= 0xfe80 + (0xe864-0xe844)))) { + for (k = 0; k < mbfl_gb18030_pua_tbl_max; k++) { + if (c2 >= mbfl_gb18030_pua_tbl[k][2] && c2 <= mbfl_gb18030_pua_tbl[k][2] + mbfl_gb18030_pua_tbl[k][1] - mbfl_gb18030_pua_tbl[k][0]) { + w = c2 - mbfl_gb18030_pua_tbl[k][2] + mbfl_gb18030_pua_tbl[k][0]; + CK((*filter->output_function)(w, filter->data)); + break; + } + } + } + + if (w <= 0) { + if ((c1 >= 0xa1 && c1 <= 0xa9 && c >= 0xa1 && c <= 0xfe) || + (c1 >= 0xb0 && c1 <= 0xf7 && c >= 0xa1 && c <= 0xfe) || + (c1 >= 0x81 && c1 <= 0xa0 && c >= 0x40 && c <= 0xfe && c != 0x7f) || + (c1 >= 0xaa && c1 <= 0xfe && c >= 0x40 && c <= 0xa0 && c != 0x7f) || + (c1 >= 0xa8 && c1 <= 0xa9 && c >= 0x40 && c <= 0xa0 && c != 0x7f)) { + w = (c1 - 0x81)*192 + c - 0x40; + ZEND_ASSERT(w < cp936_ucs_table_size); + CK((*filter->output_function)(cp936_ucs_table[w], filter->data)); + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + } + break; + + case 2: /* qbcs third byte */ + c1 = (filter->cache >> 8) & 0xff; + c2 = filter->cache & 0xff; + filter->status = filter->cache = 0; + if (((c1 >= 0x81 && c1 <= 0x84) || (c1 >= 0x90 && c1 <= 0xe3)) && c2 >= 0x30 && c2 <= 0x39 && c >= 0x81 && c <= 0xfe) { + filter->cache = (c1 << 16) | (c2 << 8) | c; + filter->status = 3; + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + case 3: /* qbcs fourth byte */ + c1 = (filter->cache >> 16) & 0xff; + c2 = (filter->cache >> 8) & 0xff; + c3 = filter->cache & 0xff; + filter->status = filter->cache = 0; + if (((c1 >= 0x81 && c1 <= 0x84) || (c1 >= 0x90 && c1 <= 0xe3)) && c2 >= 0x30 && c2 <= 0x39 && c3 >= 0x81 && c3 <= 0xfe && c >= 0x30 && c <= 0x39) { + if (c1 >= 0x90 && c1 <= 0xe3) { + w = ((((c1 - 0x90)*10 + (c2 - 0x30))*126 + (c3 - 0x81)))*10 + (c - 0x30) + 0x10000; + if (w > 0x10FFFF) { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + return 0; + } + } else { /* Unicode BMP */ + w = (((c1 - 0x81)*10 + (c2 - 0x30))*126 + (c3 - 0x81))*10 + (c - 0x30); + if (w >= 0 && w <= 39419) { + k = mbfl_bisec_srch(w, mbfl_gb2uni_tbl, mbfl_gb_uni_max); + w += mbfl_gb_uni_ofst[k]; + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + return 0; + } + } + CK((*filter->output_function)(w, filter->data)); + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + EMPTY_SWITCH_DEFAULT_CASE(); + } + + return 0; +} + +static int mbfl_filt_conv_gb18030_wchar_flush(mbfl_convert_filter *filter) +{ + if (filter->status) { + /* multi-byte character was truncated */ + filter->status = 0; + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + + if (filter->flush_function) { + (*filter->flush_function)(filter->data); + } + + return 0; +} + +static int mbfl_filt_conv_wchar_gb18030(int c, mbfl_convert_filter *filter) +{ + int k, k1, k2; + int c1, s = 0, s1 = 0; + + if (c >= ucs_a1_cp936_table_min && c < ucs_a1_cp936_table_max) { + if (c == 0x01f9) { + s = 0xa8bf; + } else { + s = ucs_a1_cp936_table[c - ucs_a1_cp936_table_min]; + } + } else if (c >= ucs_a2_cp936_table_min && c < ucs_a2_cp936_table_max) { + if (c == 0x20ac) { /* euro-sign */ + s = 0xa2e3; + } else { + s = ucs_a2_cp936_table[c - ucs_a2_cp936_table_min]; + } + } else if (c >= ucs_a3_cp936_table_min && c < ucs_a3_cp936_table_max) { + s = ucs_a3_cp936_table[c - ucs_a3_cp936_table_min]; + } else if (c >= ucs_i_cp936_table_min && c < ucs_i_cp936_table_max) { + s = ucs_i_cp936_table[c - ucs_i_cp936_table_min]; + } else if (c >= ucs_ci_cp936_table_min && c < ucs_ci_cp936_table_max) { + /* U+F900-FA2F CJK Compatibility Ideographs */ + if (c == 0xf92c) { + s = 0xfd9c; + } else if (c == 0xf979) { + s = 0xfd9d; + } else if (c == 0xf995) { + s = 0xfd9e; + } else if (c == 0xf9e7) { + s = 0xfd9f; + } else if (c == 0xf9f1) { + s = 0xfda0; + } else if (c >= 0xfa0c && c <= 0xfa29) { + s = ucs_ci_s_cp936_table[c - 0xfa0c]; + } + } else if (c >= ucs_cf_cp936_table_min && c < ucs_cf_cp936_table_max) { + /* FE30h CJK Compatibility Forms */ + s = ucs_cf_cp936_table[c - ucs_cf_cp936_table_min]; + } else if (c >= ucs_sfv_cp936_table_min && c < ucs_sfv_cp936_table_max) { + /* U+FE50-FE6F Small Form Variants */ + s = ucs_sfv_cp936_table[c - ucs_sfv_cp936_table_min]; + } else if (c >= ucs_hff_cp936_table_min && c < ucs_hff_cp936_table_max) { + /* U+FF00-FFFF HW/FW Forms */ + if (c == 0xff04) { + s = 0xa1e7; + } else if (c == 0xff5e) { + s = 0xa1ab; + } else if (c >= 0xff01 && c <= 0xff5d) { + s = c - 0xff01 + 0xa3a1; + } else if (c >= 0xffe0 && c <= 0xffe5) { + s = ucs_hff_s_cp936_table[c-0xffe0]; + } + } + + /* While GB18030 and CP936 are very similar, some mappings are different between these encodings; + * do a binary search in a table of differing codepoints to see if we have one */ + if (s <= 0 && c >= mbfl_gb18030_c_tbl_key[0] && c <= mbfl_gb18030_c_tbl_key[mbfl_gb18030_c_tbl_max-1]) { + k1 = mbfl_bisec_srch2(c, mbfl_gb18030_c_tbl_key, mbfl_gb18030_c_tbl_max); + if (k1 >= 0) { + s = mbfl_gb18030_c_tbl_val[k1]; + } + } + + if (c >= 0xe000 && c <= 0xe864) { /* PUA */ + if (c < 0xe766) { + if (c < 0xe4c6) { + c1 = c - 0xe000; + s = (c1 % 94) + 0xa1; + c1 /= 94; + s |= (c1 < 0x06 ? c1 + 0xaa : c1 + 0xf2) << 8; + } else { + c1 = c - 0xe4c6; + s = ((c1 / 96) + 0xa1) << 8; + c1 %= 96; + s |= c1 + (c1 >= 0x3f ? 0x41 : 0x40); + } + } else { + /* U+E766..U+E864 */ + k1 = 0; + k2 = mbfl_gb18030_pua_tbl_max; + while (k1 < k2) { + k = (k1 + k2) >> 1; + if (c < mbfl_gb18030_pua_tbl[k][0]) { + k2 = k; + } else if (c > mbfl_gb18030_pua_tbl[k][1]) { + k1 = k + 1; + } else { + s = c - mbfl_gb18030_pua_tbl[k][0] + mbfl_gb18030_pua_tbl[k][2]; + break; + } + } + } + } + + /* If we have not yet found a suitable mapping for this codepoint, it requires a 4-byte code */ + if (s <= 0 && c >= 0x0080 && c <= 0xffff) { + /* BMP */ + s = mbfl_bisec_srch(c, mbfl_uni2gb_tbl, mbfl_gb_uni_max); + if (s >= 0) { + c1 = c - mbfl_gb_uni_ofst[s]; + s = (c1 % 10) + 0x30; + c1 /= 10; + s |= ((c1 % 126) + 0x81) << 8; + c1 /= 126; + s |= ((c1 % 10) + 0x30) << 16; + c1 /= 10; + s1 = c1 + 0x81; + } + } else if (c >= 0x10000 && c <= 0x10ffff) { + /* Code set 3: Unicode U+10000..U+10FFFF */ + c1 = c - 0x10000; + s = (c1 % 10) + 0x30; + c1 /= 10; + s |= ((c1 % 126) + 0x81) << 8; + c1 /= 126; + s |= ((c1 % 10) + 0x30) << 16; + c1 /= 10; + s1 = c1 + 0x90; + } + + if (c == 0) { + s = 0; + } else if (s == 0) { + s = -1; + } + + if (s >= 0) { + if (s <= 0x80) { /* latin */ + CK((*filter->output_function)(s, filter->data)); + } else if (s1 > 0) { /* qbcs */ + CK((*filter->output_function)(s1 & 0xff, filter->data)); + CK((*filter->output_function)((s >> 16) & 0xff, filter->data)); + CK((*filter->output_function)((s >> 8) & 0xff, filter->data)); + CK((*filter->output_function)(s & 0xff, filter->data)); + } else { /* dbcs */ + CK((*filter->output_function)((s >> 8) & 0xff, filter->data)); + CK((*filter->output_function)(s & 0xff, filter->data)); + } + } else { + CK(mbfl_filt_conv_illegal_output(c, filter)); + } + + return 0; +} + +static const unsigned short gb18030_pua_tbl3[] = { + /* 0xFE50 */ + 0x0000,0xE816,0xE817,0xE818,0x0000,0x0000,0x0000,0x0000, + 0x0000,0xE81E,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000, + 0x0000,0xE826,0x0000,0x0000,0x0000,0x0000,0xE82B,0xE82C, + 0x0000,0x0000,0x0000,0x0000,0xE831,0xE832,0x0000,0x0000, + 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0xE83B,0x0000, + 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0xE843,0x0000, + 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000, + 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000, + 0xE854,0xE855,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000, + 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000, + /* 0xFEA0 */ + 0xE864 +}; + +static size_t mb_gb18030_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) +{ + unsigned char *p = *in, *e = p + *in_len; + uint32_t *out = buf, *limit = buf + bufsize; + + while (p < e && out < limit) { + unsigned char c = *p++; + + if (c < 0x80) { + *out++ = c; + } else if (c == 0x80 || c == 0xFF) { + *out++ = MBFL_BAD_INPUT; + } else { + if (p == e) { + *out++ = MBFL_BAD_INPUT; + break; + } + unsigned char c2 = *p++; + + if (((c >= 0x81 && c <= 0x84) || (c >= 0x90 && c <= 0xE3)) && c2 >= 0x30 && c2 <= 0x39) { + if (p >= e) { + *out++ = MBFL_BAD_INPUT; + break; + } + unsigned char c3 = *p++; + + if (c3 >= 0x81 && c3 <= 0xFE && p < e) { + unsigned char c4 = *p++; + + if (c4 >= 0x30 && c4 <= 0x39) { + if (c >= 0x90 && c <= 0xE3) { + unsigned int w = ((((c - 0x90)*10 + (c2 - 0x30))*126 + (c3 - 0x81)))*10 + (c4 - 0x30) + 0x10000; + *out++ = (w > 0x10FFFF) ? MBFL_BAD_INPUT : w; + } else { + /* Unicode BMP */ + unsigned int w = (((c - 0x81)*10 + (c2 - 0x30))*126 + (c3 - 0x81))*10 + (c4 - 0x30); + if (w <= 39419) { + *out++ = w + mbfl_gb_uni_ofst[mbfl_bisec_srch(w, mbfl_gb2uni_tbl, mbfl_gb_uni_max)]; + } else { + *out++ = MBFL_BAD_INPUT; + } + } + } else { + *out++ = MBFL_BAD_INPUT; + } + } else { + *out++ = MBFL_BAD_INPUT; + } + } else if (((c >= 0xAA && c <= 0xAF) || (c >= 0xF8 && c <= 0xFE)) && (c2 >= 0xA1 && c2 <= 0xFE)) { + /* UDA part 1, 2: U+E000-U+E4C5 */ + *out++ = 94*(c >= 0xF8 ? c - 0xF2 : c - 0xAA) + (c2 - 0xA1) + 0xE000; + } else if (c >= 0xA1 && c <= 0xA7 && c2 >= 0x40 && c2 < 0xA1 && c2 != 0x7F) { + /* UDA part 3: U+E4C6-U+E765 */ + *out++ = 96*(c - 0xA1) + c2 - (c2 >= 0x80 ? 0x41 : 0x40) + 0xE4C6; + } else if (c2 >= 0x40 && c2 != 0x7F && c2 != 0xFF) { + unsigned int w = (c - 0x81)*192 + c2 - 0x40; + + if (w >= 0x192B) { + if (w <= 0x1EBE) { + if (w != 0x1963 && w != 0x1DBF && (w < 0x1E49 || w > 0x1E55) && w != 0x1E7F) { + *out++ = cp936_pua_tbl1[w - 0x192B]; + continue; + } + } else if (w >= 0x413A) { + if (w <= 0x413E) { + *out++ = cp936_pua_tbl2[w - 0x413A]; + continue; + } else if (w >= 0x5DD0 && w <= 0x5E20) { + unsigned int c = gb18030_pua_tbl3[w - 0x5DD0]; + if (c) { + *out++ = c; + continue; + } + } + } + } + + if ((c >= 0x81 && c <= 0xA9) || (c >= 0xB0 && c <= 0xF7 && c2 >= 0xA1) || (c >= 0xAA && c <= 0xFE && c2 <= 0xA0)) { + ZEND_ASSERT(w < cp936_ucs_table_size); + *out++ = cp936_ucs_table[w]; + } else { + *out++ = MBFL_BAD_INPUT; + } + } else { + *out++ = MBFL_BAD_INPUT; + } + } + } + + *in_len = e - p; + *in = p; + return out - buf; +} + +static void mb_wchar_to_gb18030(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) +{ + unsigned char *out, *limit; + MB_CONVERT_BUF_LOAD(buf, out, limit); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len); + + while (len--) { + uint32_t w = *in++; + unsigned int s = 0; + + if (w == 0) { + out = mb_convert_buf_add(out, 0); + continue; + } else if (w >= ucs_a1_cp936_table_min && w < ucs_a1_cp936_table_max) { + if (w == 0x1F9) { + s = 0xA8Bf; + } else { + s = ucs_a1_cp936_table[w - ucs_a1_cp936_table_min]; + } + } else if (w >= ucs_a2_cp936_table_min && w < ucs_a2_cp936_table_max) { + if (w == 0x20AC) { /* Euro sign */ + s = 0xA2E3; + } else { + s = ucs_a2_cp936_table[w - ucs_a2_cp936_table_min]; + } + } else if (w >= ucs_a3_cp936_table_min && w < ucs_a3_cp936_table_max) { + s = ucs_a3_cp936_table[w - ucs_a3_cp936_table_min]; + } else if (w >= ucs_i_cp936_table_min && w < ucs_i_cp936_table_max) { + s = ucs_i_cp936_table[w - ucs_i_cp936_table_min]; + } else if (w >= ucs_ci_cp936_table_min && w < ucs_ci_cp936_table_max) { + /* U+F900-U+FA2F CJK Compatibility Ideographs */ + if (w == 0xF92C) { + s = 0xFD9C; + } else if (w == 0xF979) { + s = 0xFD9D; + } else if (w == 0xF995) { + s = 0xFD9E; + } else if (w == 0xF9E7) { + s = 0xFD9F; + } else if (w == 0xF9F1) { + s = 0xFDA0; + } else if (w >= 0xFA0C && w <= 0xFA29) { + s = ucs_ci_s_cp936_table[w - 0xFA0C]; + } + } else if (w >= ucs_cf_cp936_table_min && w < ucs_cf_cp936_table_max) { + /* CJK Compatibility Forms */ + s = ucs_cf_cp936_table[w - ucs_cf_cp936_table_min]; + } else if (w >= ucs_sfv_cp936_table_min && w < ucs_sfv_cp936_table_max) { + /* U+FE50-U+FE6F Small Form Variants */ + s = ucs_sfv_cp936_table[w - ucs_sfv_cp936_table_min]; + } else if (w >= ucs_hff_cp936_table_min && w < ucs_hff_cp936_table_max) { + /* U+FF00-U+FFFF HW/FW Forms */ + if (w == 0xFF04) { + s = 0xA1E7; + } else if (w == 0xFF5E) { + s = 0xA1AB; + } else if (w >= 0xFF01 && w <= 0xFF5D) { + s = w - 0xFF01 + 0xA3A1; + } else if (w >= 0xFFE0 && w <= 0xFFE5) { + s = ucs_hff_s_cp936_table[w - 0xFFE0]; + } + } else if (w >= 0xE000 && w <= 0xE864) { + /* PUA */ + if (w < 0xE766) { + if (w < 0xE4C6) { + unsigned int c1 = w - 0xE000; + s = (c1 % 94) + 0xA1; + c1 /= 94; + s |= (c1 + (c1 < 0x06 ? 0xAA : 0xF2)) << 8; + } else { + unsigned int c1 = w - 0xE4C6; + s = ((c1 / 96) + 0xA1) << 8; + c1 %= 96; + s |= c1 + (c1 >= 0x3F ? 0x41 : 0x40); + } + } else { + /* U+E766-U+E864 */ + unsigned int k1 = 0, k2 = mbfl_gb18030_pua_tbl_max; + while (k1 < k2) { + unsigned int k = (k1 + k2) >> 1; + if (w < mbfl_gb18030_pua_tbl[k][0]) { + k2 = k; + } else if (w > mbfl_gb18030_pua_tbl[k][1]) { + k1 = k + 1; + } else { + s = w - mbfl_gb18030_pua_tbl[k][0] + mbfl_gb18030_pua_tbl[k][2]; + break; + } + } + } + } + + /* While GB18030 and CP936 are very similar, some mappings are different between these encodings; + * do a binary search in a table of differing codepoints to see if we have one */ + if (!s && w >= mbfl_gb18030_c_tbl_key[0] && w <= mbfl_gb18030_c_tbl_key[mbfl_gb18030_c_tbl_max-1]) { + int i = mbfl_bisec_srch2(w, mbfl_gb18030_c_tbl_key, mbfl_gb18030_c_tbl_max); + if (i >= 0) { + s = mbfl_gb18030_c_tbl_val[i]; + } + } + + /* If we have not yet found a suitable mapping for this codepoint, it requires a 4-byte code */ + if (!s && w >= 0x80 && w <= 0xFFFF) { + /* BMP */ + int i = mbfl_bisec_srch(w, mbfl_uni2gb_tbl, mbfl_gb_uni_max); + if (i >= 0) { + unsigned int c1 = w - mbfl_gb_uni_ofst[i]; + s = (c1 % 10) + 0x30; + c1 /= 10; + s |= ((c1 % 126) + 0x81) << 8; + c1 /= 126; + s |= ((c1 % 10) + 0x30) << 16; + c1 /= 10; + s |= (c1 + 0x81) << 24; + } + } else if (w >= 0x10000 && w <= 0x10FFFF) { + /* Code set 3: Unicode U+10000-U+10FFFF */ + unsigned int c1 = w - 0x10000; + s = (c1 % 10) + 0x30; + c1 /= 10; + s |= ((c1 % 126) + 0x81) << 8; + c1 /= 126; + s |= ((c1 % 10) + 0x30) << 16; + c1 /= 10; + s |= (c1 + 0x90) << 24; + } + + if (!s) { + MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_gb18030); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len); + } else if (s < 0x80) { + out = mb_convert_buf_add(out, s); + } else if (s > 0xFFFFFF) { + MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 4); + out = mb_convert_buf_add4(out, (s >> 24) & 0xFF, (s >> 16) & 0xFF, (s >> 8) & 0xFF, s & 0xFF); + } else { + MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2); + out = mb_convert_buf_add2(out, (s >> 8) & 0xFF, s & 0xFF); + } + } + + MB_CONVERT_BUF_STORE(buf, out, limit); +} + +static int mbfl_filt_conv_cp936_wchar(int c, mbfl_convert_filter *filter) +{ + int k; + int c1, c2, w = -1; + + switch (filter->status) { + case 0: + if (c >= 0 && c < 0x80) { /* latin */ + CK((*filter->output_function)(c, filter->data)); + } else if (c == 0x80) { /* euro sign */ + CK((*filter->output_function)(0x20ac, filter->data)); + } else if (c < 0xff) { /* dbcs lead byte */ + filter->status = 1; + filter->cache = c; + } else { /* 0xff */ + CK((*filter->output_function)(0xf8f5, filter->data)); + } + break; + + case 1: /* dbcs second byte */ + filter->status = 0; + c1 = filter->cache; + + if (((c1 >= 0xaa && c1 <= 0xaf) || (c1 >= 0xf8 && c1 <= 0xfe)) && + (c >= 0xa1 && c <= 0xfe)) { + /* UDA part1,2: U+E000-U+E4C5 */ + w = 94*(c1 >= 0xf8 ? c1 - 0xf2 : c1 - 0xaa) + (c - 0xa1) + 0xe000; + CK((*filter->output_function)(w, filter->data)); + } else if (c1 >= 0xa1 && c1 <= 0xa7 && c >= 0x40 && c < 0xa1 && c != 0x7f) { + /* UDA part3 : U+E4C6-U+E765*/ + w = 96*(c1 - 0xa1) + c - (c >= 0x80 ? 0x41 : 0x40) + 0xe4c6; + CK((*filter->output_function)(w, filter->data)); + } + + c2 = (c1 << 8) | c; + + if (w <= 0 && + ((c2 >= 0xa2ab && c2 <= 0xa9f0 + (0xe80f-0xe801)) || + (c2 >= 0xd7fa && c2 <= 0xd7fa + (0xe814-0xe810)) || + (c2 >= 0xfe50 && c2 <= 0xfe80 + (0xe864-0xe844)))) { + for (k = 0; k < mbfl_cp936_pua_tbl_max; k++) { + if (c2 >= mbfl_cp936_pua_tbl[k][2] && + c2 <= mbfl_cp936_pua_tbl[k][2] + + mbfl_cp936_pua_tbl[k][1] - mbfl_cp936_pua_tbl[k][0]) { + w = c2 - mbfl_cp936_pua_tbl[k][2] + mbfl_cp936_pua_tbl[k][0]; + CK((*filter->output_function)(w, filter->data)); + break; + } + } + } + + if (w <= 0) { + if (c1 < 0xff && c1 > 0x80 && c >= 0x40 && c < 0xff && c != 0x7f) { + w = (c1 - 0x81)*192 + c - 0x40; + ZEND_ASSERT(w < cp936_ucs_table_size); + CK((*filter->output_function)(cp936_ucs_table[w], filter->data)); + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + } + break; + + EMPTY_SWITCH_DEFAULT_CASE(); + } + + return 0; +} + +static int mbfl_filt_conv_cp936_wchar_flush(mbfl_convert_filter *filter) +{ + if (filter->status) { + /* 2-byte character was truncated */ + filter->status = 0; + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + + if (filter->flush_function) { + (*filter->flush_function)(filter->data); + } + + return 0; +} + +static int mbfl_filt_conv_wchar_cp936(int c, mbfl_convert_filter *filter) +{ + int k, k1, k2; + int c1, s = 0; + + if (c >= ucs_a1_cp936_table_min && c < ucs_a1_cp936_table_max) { + /* U+0000 - U+0451 */ + s = ucs_a1_cp936_table[c - ucs_a1_cp936_table_min]; + } else if (c >= ucs_a2_cp936_table_min && c < ucs_a2_cp936_table_max) { + /* U+2000 - U+26FF */ + if (c == 0x203e) { + s = 0xa3fe; + } else if (c == 0x2218) { + s = 0xa1e3; + } else if (c == 0x223c) { + s = 0xa1ab; + } else { + s = ucs_a2_cp936_table[c - ucs_a2_cp936_table_min]; + } + } else if (c >= ucs_a3_cp936_table_min && c < ucs_a3_cp936_table_max) { + /* U+2F00 - U+33FF */ + s = ucs_a3_cp936_table[c - ucs_a3_cp936_table_min]; + } else if (c >= ucs_i_cp936_table_min && c < ucs_i_cp936_table_max) { + /* U+4D00-9FFF CJK Unified Ideographs (+ Extension A) */ + s = ucs_i_cp936_table[c - ucs_i_cp936_table_min]; + } else if (c >= 0xe000 && c <= 0xe864) { /* PUA */ + if (c < 0xe766) { + if (c < 0xe4c6) { + c1 = c - 0xe000; + s = (c1 % 94) + 0xa1; c1 /= 94; + s |= (c1 < 0x06 ? c1 + 0xaa : c1 + 0xf2) << 8; + } else { + c1 = c - 0xe4c6; + s = ((c1 / 96) + 0xa1) << 8; c1 %= 96; + s |= c1 + (c1 >= 0x3f ? 0x41 : 0x40); + } + } else { + /* U+E766..U+E864 */ + k1 = 0; k2 = mbfl_cp936_pua_tbl_max; + while (k1 < k2) { + k = (k1 + k2) >> 1; + if (c < mbfl_cp936_pua_tbl[k][0]) { + k2 = k; + } else if (c > mbfl_cp936_pua_tbl[k][1]) { + k1 = k + 1; + } else { + s = c - mbfl_cp936_pua_tbl[k][0] + mbfl_cp936_pua_tbl[k][2]; + break; + } + } + } + } else if (c == 0xf8f5) { + s = 0xff; + } else if (c >= ucs_ci_cp936_table_min && c < ucs_ci_cp936_table_max) { + /* U+F900-FA2F CJK Compatibility Ideographs */ + s = ucs_ci_cp936_table[c - ucs_ci_cp936_table_min]; + } else if (c >= ucs_cf_cp936_table_min && c < ucs_cf_cp936_table_max) { + s = ucs_cf_cp936_table[c - ucs_cf_cp936_table_min]; + } else if (c >= ucs_sfv_cp936_table_min && c < ucs_sfv_cp936_table_max) { + s = ucs_sfv_cp936_table[c - ucs_sfv_cp936_table_min]; /* U+FE50-FE6F Small Form Variants */ + } else if (c >= ucs_hff_cp936_table_min && c < ucs_hff_cp936_table_max) { + /* U+FF00-FFFF HW/FW Forms */ + if (c == 0xff04) { + s = 0xa1e7; + } else if (c == 0xff5e) { + s = 0xa1ab; + } else if (c >= 0xff01 && c <= 0xff5d) { + s = c - 0xff01 + 0xa3a1; + } else if (c >= 0xffe0 && c <= 0xffe5) { + s = ucs_hff_s_cp936_table[c-0xffe0]; + } + } + + if (s <= 0) { + if (c == 0) { + s = 0; + } else if (s <= 0) { + s = -1; + } + } + + if (s >= 0) { + if (s <= 0x80 || s == 0xff) { /* latin */ + CK((*filter->output_function)(s, filter->data)); + } else { + CK((*filter->output_function)((s >> 8) & 0xff, filter->data)); + CK((*filter->output_function)(s & 0xff, filter->data)); + } + } else { + CK(mbfl_filt_conv_illegal_output(c, filter)); + } + + return 0; +} + +static size_t mb_cp936_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) +{ + unsigned char *p = *in, *e = p + *in_len; + uint32_t *out = buf, *limit = buf + bufsize; + + while (p < e && out < limit) { + unsigned char c = *p++; + + if (c < 0x80) { + *out++ = c; + } else if (c == 0x80) { + *out++ = 0x20AC; /* Euro sign */ + } else if (c < 0xFF) { + if (p >= e) { + *out++ = MBFL_BAD_INPUT; + continue; + } + + unsigned char c2 = *p++; + if (c2 < 0x40 || c2 == 0x7F || c2 == 0xFF) { + *out++ = MBFL_BAD_INPUT; + continue; + } + + if (((c >= 0xAA && c <= 0xAF) || (c >= 0xF8 && c <= 0xFE)) && c2 >= 0xA1) { + /* UDA part 1, 2: U+E000-U+E4C5 */ + *out++ = 94*(c >= 0xF8 ? c - 0xF2 : c - 0xAA) + (c2 - 0xA1) + 0xE000; + } else if (c >= 0xA1 && c <= 0xA7 && c2 < 0xA1) { + /* UDA part 3: U+E4C6-U+E765*/ + *out++ = 96*(c - 0xA1) + c2 - (c2 >= 0x80 ? 0x41 : 0x40) + 0xE4C6; + } else { + unsigned int w = (c - 0x81)*192 + c2 - 0x40; /* Convert c, c2 into GB 2312 table lookup index */ + + /* For CP936 and GB18030, certain GB 2312 byte combinations are mapped to PUA codepoints, + * whereas the same combinations aren't mapped to any codepoint for HZ and EUC-CN + * To avoid duplicating the entire GB 2312 -> Unicode lookup table, we have three + * auxiliary tables which are consulted instead for specific ranges of lookup indices */ + if (w >= 0x192B) { + if (w <= 0x1EBE) { + *out++ = cp936_pua_tbl1[w - 0x192B]; + continue; + } else if (w >= 0x413A) { + if (w <= 0x413E) { + *out++ = cp936_pua_tbl2[w - 0x413A]; + continue; + } else if (w >= 0x5DD0 && w <= 0x5E20) { + *out++ = cp936_pua_tbl3[w - 0x5DD0]; + continue; + } + } + } + + ZEND_ASSERT(w < cp936_ucs_table_size); + *out++ = cp936_ucs_table[w]; + } + } else { + *out++ = 0xF8F5; + } + } + + *in_len = e - p; + *in = p; + return out - buf; +} + +static void mb_wchar_to_cp936(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) +{ + unsigned char *out, *limit; + MB_CONVERT_BUF_LOAD(buf, out, limit); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2); + + while (len--) { + uint32_t w = *in++; + unsigned int s = 0; + + if (w >= ucs_a1_cp936_table_min && w < ucs_a1_cp936_table_max) { + /* U+0000-U+0451 */ + s = ucs_a1_cp936_table[w - ucs_a1_cp936_table_min]; + } else if (w >= ucs_a2_cp936_table_min && w < ucs_a2_cp936_table_max) { + /* U+2000-U+26FF */ + if (w == 0x203E) { + s = 0xA3FE; + } else if (w == 0x2218) { + s = 0xA1E3; + } else if (w == 0x223C) { + s = 0xA1AB; + } else { + s = ucs_a2_cp936_table[w - ucs_a2_cp936_table_min]; + } + } else if (w >= ucs_a3_cp936_table_min && w < ucs_a3_cp936_table_max) { + /* U+2F00-U+33FF */ + s = ucs_a3_cp936_table[w - ucs_a3_cp936_table_min]; + } else if (w >= ucs_i_cp936_table_min && w < ucs_i_cp936_table_max) { + /* U+4D00-9FFF CJK Unified Ideographs (+ Extension A) */ + s = ucs_i_cp936_table[w - ucs_i_cp936_table_min]; + } else if (w >= 0xE000 && w <= 0xE864) { + /* PUA */ + if (w < 0xe766) { + if (w < 0xe4c6) { + unsigned int c1 = w - 0xE000; + s = (c1 % 94) + 0xA1; + c1 /= 94; + s |= (c1 < 0x6 ? c1 + 0xAA : c1 + 0xF2) << 8; + } else { + unsigned int c1 = w - 0xE4C6; + s = ((c1 / 96) + 0xA1) << 8; + c1 %= 96; + s |= c1 + (c1 >= 0x3F ? 0x41 : 0x40); + } + } else { + /* U+E766-U+E864 */ + unsigned int k1 = 0; + unsigned int k2 = mbfl_cp936_pua_tbl_max; + while (k1 < k2) { + int k = (k1 + k2) >> 1; + if (w < mbfl_cp936_pua_tbl[k][0]) { + k2 = k; + } else if (w > mbfl_cp936_pua_tbl[k][1]) { + k1 = k + 1; + } else { + s = w - mbfl_cp936_pua_tbl[k][0] + mbfl_cp936_pua_tbl[k][2]; + break; + } + } + } + } else if (w == 0xF8F5) { + s = 0xFF; + } else if (w >= ucs_ci_cp936_table_min && w < ucs_ci_cp936_table_max) { + /* U+F900-U+FA2F CJK Compatibility Ideographs */ + s = ucs_ci_cp936_table[w - ucs_ci_cp936_table_min]; + } else if (w >= ucs_cf_cp936_table_min && w < ucs_cf_cp936_table_max) { + s = ucs_cf_cp936_table[w - ucs_cf_cp936_table_min]; + } else if (w >= ucs_sfv_cp936_table_min && w < ucs_sfv_cp936_table_max) { + /* U+FE50-U+FE6F Small Form Variants */ + s = ucs_sfv_cp936_table[w - ucs_sfv_cp936_table_min]; + } else if (w >= ucs_hff_cp936_table_min && w < ucs_hff_cp936_table_max) { + /* U+FF00-U+FFFF HW/FW Forms */ + if (w == 0xFF04) { + s = 0xA1E7; + } else if (w == 0xFF5E) { + s = 0xA1AB; + } else if (w >= 0xFF01 && w <= 0xFF5D) { + s = w - 0xFF01 + 0xA3A1; + } else if (w >= 0xFFE0 && w <= 0xFFE5) { + s = ucs_hff_s_cp936_table[w - 0xFFE0]; + } + } + + if (!s) { + if (w == 0) { + out = mb_convert_buf_add(out, 0); + } else { + MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_cp936); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2); + } + } else if (s <= 0x80 || s == 0xFF) { + out = mb_convert_buf_add(out, s); + } else { + out = mb_convert_buf_add2(out, (s >> 8) & 0xFF, s & 0xFF); + } + } + + MB_CONVERT_BUF_STORE(buf, out, limit); +} + +static const char *mbfl_encoding_gb18030_aliases[] = {"gb-18030", "gb-18030-2000", NULL}; + +static const struct mbfl_convert_vtbl vtbl_gb18030_wchar = { + mbfl_no_encoding_gb18030, + mbfl_no_encoding_wchar, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_gb18030_wchar, + mbfl_filt_conv_gb18030_wchar_flush, + NULL, +}; + +static const struct mbfl_convert_vtbl vtbl_wchar_gb18030 = { + mbfl_no_encoding_wchar, + mbfl_no_encoding_gb18030, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_wchar_gb18030, + mbfl_filt_conv_common_flush, + NULL, +}; + +const mbfl_encoding mbfl_encoding_gb18030 = { + mbfl_no_encoding_gb18030, + "GB18030", + "GB18030", + mbfl_encoding_gb18030_aliases, + NULL, + MBFL_ENCTYPE_GL_UNSAFE, + &vtbl_gb18030_wchar, + &vtbl_wchar_gb18030, + mb_gb18030_to_wchar, + mb_wchar_to_gb18030, + NULL +}; + +static const char *mbfl_encoding_cp936_aliases[] = {"CP-936", "GBK", NULL}; + +static const struct mbfl_convert_vtbl vtbl_cp936_wchar = { + mbfl_no_encoding_cp936, + mbfl_no_encoding_wchar, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_cp936_wchar, + mbfl_filt_conv_cp936_wchar_flush, + NULL, +}; + +static const struct mbfl_convert_vtbl vtbl_wchar_cp936 = { + mbfl_no_encoding_wchar, + mbfl_no_encoding_cp936, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_wchar_cp936, + mbfl_filt_conv_common_flush, + NULL, +}; + +const mbfl_encoding mbfl_encoding_cp936 = { + mbfl_no_encoding_cp936, + "CP936", + "CP936", + mbfl_encoding_cp936_aliases, + mblen_table_81_to_fe, + MBFL_ENCTYPE_GL_UNSAFE, + &vtbl_cp936_wchar, + &vtbl_wchar_cp936, + mb_cp936_to_wchar, + mb_wchar_to_cp936, + NULL +}; + +/* + * BIG5/CP950 + */ + +/* 63 + 94 = 157 or 94 */ +static unsigned short cp950_pua_tbl[][4] = { + {0xe000, 0xe310, 0xfa40, 0xfefe}, + {0xe311, 0xeeb7, 0x8e40, 0xa0fe}, + {0xeeb8, 0xf6b0, 0x8140, 0x8dfe}, + {0xf6b1, 0xf70e, 0xc6a1, 0xc6fe}, + {0xf70f, 0xf848, 0xc740, 0xc8fe}, +}; + +static inline int is_in_cp950_pua(int c1, int c) +{ + if ((c1 >= 0xfa && c1 <= 0xfe) || (c1 >= 0x8e && c1 <= 0xa0) || (c1 >= 0x81 && c1 <= 0x8d) || (c1 >= 0xc7 && c1 <= 0xc8)) { + return (c >= 0x40 && c <= 0x7e) || (c >= 0xa1 && c <= 0xfe); + } else if (c1 == 0xc6) { + return c >= 0xa1 && c <= 0xfe; + } + return 0; +} + +static int mbfl_filt_conv_big5_wchar(int c, mbfl_convert_filter *filter) +{ + int k, c1, w; + + switch (filter->status) { + case 0: + if (c >= 0 && c < 0x80) { /* latin */ + CK((*filter->output_function)(c, filter->data)); + } else if (filter->from->no_encoding != mbfl_no_encoding_cp950 && c > 0xA0 && c <= 0xF9 && c != 0xC8) { + filter->status = 1; + filter->cache = c; + } else if (filter->from->no_encoding == mbfl_no_encoding_cp950 && c > 0x80 && c <= 0xFE) { + filter->status = 1; + filter->cache = c; + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + case 1: /* dbcs second byte */ + filter->status = 0; + c1 = filter->cache; + if ((c > 0x3f && c < 0x7f) || (c > 0xa0 && c < 0xff)) { + if (c < 0x7f) { + w = (c1 - 0xa1)*157 + (c - 0x40); + } else { + w = (c1 - 0xa1)*157 + (c - 0xa1) + 0x3f; + } + if (w >= 0 && w < big5_ucs_table_size) { + w = big5_ucs_table[w]; + } else { + w = 0; + } + + if (filter->from->no_encoding == mbfl_no_encoding_cp950) { + /* PUA for CP950 */ + if (is_in_cp950_pua(c1, c)) { + int c2 = (c1 << 8) | c; + + for (k = 0; k < sizeof(cp950_pua_tbl) / (sizeof(unsigned short)*4); k++) { + if (c2 >= cp950_pua_tbl[k][2] && c2 <= cp950_pua_tbl[k][3]) { + break; + } + } + + if ((cp950_pua_tbl[k][2] & 0xff) == 0x40) { + w = 157*(c1 - (cp950_pua_tbl[k][2]>>8)) + c - (c >= 0xa1 ? 0x62 : 0x40) + cp950_pua_tbl[k][0]; + } else { + w = c2 - cp950_pua_tbl[k][2] + cp950_pua_tbl[k][0]; + } + } else if (c1 == 0xA1) { + if (c == 0x45) { + w = 0x2027; + } else if (c == 0x4E) { + w = 0xFE51; + } else if (c == 0x5A) { + w = 0x2574; + } else if (c == 0xC2) { + w = 0x00AF; + } else if (c == 0xC3) { + w = 0xFFE3; + } else if (c == 0xC5) { + w = 0x02CD; + } else if (c == 0xE3) { + w = 0xFF5E; + } else if (c == 0xF2) { + w = 0x2295; + } else if (c == 0xF3) { + w = 0x2299; + } else if (c == 0xFE) { + w = 0xFF0F; + } + } else if (c1 == 0xA2) { + if (c == 0x40) { + w = 0xFF3C; + } else if (c == 0x41) { + w = 0x2215; + } else if (c == 0x42) { + w = 0xFE68; + } else if (c == 0x46) { + w = 0xFFE0; + } else if (c == 0x47) { + w = 0xFFE1; + } else if (c == 0xCC) { + w = 0x5341; + } else if (c == 0xCE) { + w = 0x5345; + } + } + } + + if (w <= 0) { + w = MBFL_BAD_INPUT; + } + CK((*filter->output_function)(w, filter->data)); + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + EMPTY_SWITCH_DEFAULT_CASE(); + } + + return 0; +} + +static int mbfl_filt_conv_big5_wchar_flush(mbfl_convert_filter *filter) +{ + if (filter->status == 1) { + /* 2-byte character was truncated */ + filter->status = 0; + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + + if (filter->flush_function) { + (*filter->flush_function)(filter->data); + } + + return 0; +} + +static int mbfl_filt_conv_wchar_big5(int c, mbfl_convert_filter *filter) +{ + int k, s = 0; + + if (c >= ucs_a1_big5_table_min && c < ucs_a1_big5_table_max) { + s = ucs_a1_big5_table[c - ucs_a1_big5_table_min]; + } else if (c >= ucs_a2_big5_table_min && c < ucs_a2_big5_table_max) { + s = ucs_a2_big5_table[c - ucs_a2_big5_table_min]; + } else if (c >= ucs_a3_big5_table_min && c < ucs_a3_big5_table_max) { + s = ucs_a3_big5_table[c - ucs_a3_big5_table_min]; + } else if (c >= ucs_i_big5_table_min && c < ucs_i_big5_table_max) { + s = ucs_i_big5_table[c - ucs_i_big5_table_min]; + } else if (c >= ucs_r1_big5_table_min && c < ucs_r1_big5_table_max) { + s = ucs_r1_big5_table[c - ucs_r1_big5_table_min]; + } else if (c >= ucs_r2_big5_table_min && c < ucs_r2_big5_table_max) { + s = ucs_r2_big5_table[c - ucs_r2_big5_table_min]; + } + + if (filter->to->no_encoding == mbfl_no_encoding_cp950) { + if (c >= 0xe000 && c <= 0xf848) { /* PUA for CP950 */ + for (k = 0; k < sizeof(cp950_pua_tbl) / (sizeof(unsigned short)*4); k++) { + if (c <= cp950_pua_tbl[k][1]) { + break; + } + } + + int c1 = c - cp950_pua_tbl[k][0]; + if ((cp950_pua_tbl[k][2] & 0xff) == 0x40) { + int c2 = cp950_pua_tbl[k][2] >> 8; + s = ((c1 / 157) + c2) << 8; + c1 %= 157; + s |= c1 + (c1 >= 0x3f ? 0x62 : 0x40); + } else { + s = c1 + cp950_pua_tbl[k][2]; + } + } else if (c == 0x00A2) { + s = 0; + } else if (c == 0x00A3) { + s = 0; + } else if (c == 0x00AF) { + s = 0xA1C2; + } else if (c == 0x02CD) { + s = 0xA1C5; + } else if (c == 0x0401) { + s = 0; + } else if (c >= 0x0414 && c <= 0x041C) { + s = 0; + } else if (c >= 0x0423 && c <= 0x044F) { + s = 0; + } else if (c == 0x0451) { + s = 0; + } else if (c == 0x2022) { + s = 0; + } else if (c == 0x2027) { + s = 0xA145; + } else if (c == 0x203E) { + s = 0; + } else if (c == 0x2215) { + s = 0xA241; + } else if (c == 0x223C) { + s = 0; + } else if (c == 0x2295) { + s = 0xA1F2; + } else if (c == 0x2299) { + s = 0xA1F3; + } else if (c >= 0x2460 && c <= 0x247D) { + s = 0; + } else if (c == 0x2574) { + s = 0xA15A; + } else if (c == 0x2609) { + s = 0; + } else if (c == 0x2641) { + s = 0; + } else if (c == 0x3005 || (c >= 0x302A && c <= 0x30FF)) { + s = 0; + } else if (c == 0xFE51) { + s = 0xA14E; + } else if (c == 0xFE68) { + s = 0xA242; + } else if (c == 0xFF3C) { + s = 0xA240; + } else if (c == 0xFF5E) { + s = 0xA1E3; + } else if (c == 0xFF64) { + s = 0; + } else if (c == 0xFFE0) { + s = 0xA246; + } else if (c == 0xFFE1) { + s = 0xA247; + } else if (c == 0xFFE3) { + s = 0xA1C3; + } else if (c == 0xFF0F) { + s = 0xA1FE; + } + } + + if (s <= 0) { + if (c == 0) { + s = 0; + } else { + s = -1; + } + } + + if (s >= 0) { + if (s <= 0x80) { /* latin */ + CK((*filter->output_function)(s, filter->data)); + } else { + CK((*filter->output_function)((s >> 8) & 0xff, filter->data)); + CK((*filter->output_function)(s & 0xff, filter->data)); + } + } else { + CK(mbfl_filt_conv_illegal_output(c, filter)); + } + + return 0; +} + +static size_t mb_big5_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) +{ + unsigned char *p = *in, *e = p + *in_len; + uint32_t *out = buf, *limit = buf + bufsize; + + e--; /* Stop the main loop 1 byte short of the end of the input */ + + while (p < e && out < limit) { + unsigned char c = *p++; + + if (c <= 0x7F) { + *out++ = c; + } else if (c > 0xA0 && c <= 0xF9) { + /* We don't need to check p < e here; it's not possible that this pointer dereference + * will be outside the input string, because of e-- above */ + unsigned char c2 = *p++; + + if ((c2 >= 0x40 && c2 <= 0x7E) || (c2 >= 0xA1 && c2 <= 0xFE)) { + unsigned int w = (c - 0xA1)*157 + c2 - ((c2 <= 0x7E) ? 0x40 : 0xA1 - 0x3F); + ZEND_ASSERT(w < big5_ucs_table_size); + w = big5_ucs_table[w]; + if (!w) { + if (c == 0xC8) { + p--; + } + w = MBFL_BAD_INPUT; + } + *out++ = w; + } else { + *out++ = MBFL_BAD_INPUT; + } + } else { + *out++ = MBFL_BAD_INPUT; + } + } + + /* Finish up last byte of input string if there is one */ + if (p == e && out < limit) { + unsigned char c = *p++; + *out++ = (c <= 0x7F) ? c : MBFL_BAD_INPUT; + } + + *in_len = e - p + 1; + *in = p; + return out - buf; +} + +static void mb_wchar_to_big5(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) +{ + unsigned char *out, *limit; + MB_CONVERT_BUF_LOAD(buf, out, limit); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len); + + while (len--) { + uint32_t w = *in++; + unsigned int s = 0; + + if (w >= ucs_a1_big5_table_min && w < ucs_a1_big5_table_max) { + s = ucs_a1_big5_table[w - ucs_a1_big5_table_min]; + } else if (w >= ucs_a2_big5_table_min && w < ucs_a2_big5_table_max) { + s = ucs_a2_big5_table[w - ucs_a2_big5_table_min]; + } else if (w >= ucs_a3_big5_table_min && w < ucs_a3_big5_table_max) { + s = ucs_a3_big5_table[w - ucs_a3_big5_table_min]; + } else if (w >= ucs_i_big5_table_min && w < ucs_i_big5_table_max) { + s = ucs_i_big5_table[w - ucs_i_big5_table_min]; + } else if (w >= ucs_r1_big5_table_min && w < ucs_r1_big5_table_max) { + s = ucs_r1_big5_table[w - ucs_r1_big5_table_min]; + } else if (w >= ucs_r2_big5_table_min && w < ucs_r2_big5_table_max) { + s = ucs_r2_big5_table[w - ucs_r2_big5_table_min]; + } + + if (!s) { + if (w == 0) { + out = mb_convert_buf_add(out, 0); + } else { + MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_big5); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len); + } + } else if (s <= 0x80) { + out = mb_convert_buf_add(out, s); + } else { + MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2); + out = mb_convert_buf_add2(out, (s >> 8) & 0xFF, s & 0xFF); + } + } + + MB_CONVERT_BUF_STORE(buf, out, limit); +} + +static size_t mb_cp950_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) +{ + unsigned char *p = *in, *e = p + *in_len; + uint32_t *out = buf, *limit = buf + bufsize; + + while (p < e && out < limit) { + unsigned char c = *p++; + + if (c <= 0x7F) { + *out++ = c; + } else if (c > 0x80 && c <= 0xFE && p < e) { + unsigned char c2 = *p++; + + if ((c2 >= 0x40 && c2 <= 0x7E) || (c2 >= 0xA1 && c2 <= 0xFE)) { + unsigned int w = ((c - 0xA1)*157) + c2 - ((c2 <= 0x7E) ? 0x40 : 0xA1 - 0x3F); + w = (w < big5_ucs_table_size) ? big5_ucs_table[w] : 0; + + /* PUA for CP950 */ + if (is_in_cp950_pua(c, c2)) { + unsigned int s = (c << 8) | c2; + + int k; + for (k = 0; k < sizeof(cp950_pua_tbl) / (sizeof(unsigned short)*4); k++) { + if (s >= cp950_pua_tbl[k][2] && s <= cp950_pua_tbl[k][3]) { + break; + } + } + + if ((cp950_pua_tbl[k][2] & 0xFF) == 0x40) { + w = 157*(c - (cp950_pua_tbl[k][2] >> 8)) + c2 - (c2 >= 0xA1 ? 0x62 : 0x40) + cp950_pua_tbl[k][0]; + } else { + w = s - cp950_pua_tbl[k][2] + cp950_pua_tbl[k][0]; + } + } else if (c == 0xA1) { + if (c2 == 0x45) { + w = 0x2027; + } else if (c2 == 0x4E) { + w = 0xFE51; + } else if (c2 == 0x5A) { + w = 0x2574; + } else if (c2 == 0xC2) { + w = 0x00AF; + } else if (c2 == 0xC3) { + w = 0xFFE3; + } else if (c2 == 0xC5) { + w = 0x02CD; + } else if (c2 == 0xE3) { + w = 0xFF5E; + } else if (c2 == 0xF2) { + w = 0x2295; + } else if (c2 == 0xF3) { + w = 0x2299; + } else if (c2 == 0xFE) { + w = 0xFF0F; + } + } else if (c == 0xA2) { + if (c2 == 0x40) { + w = 0xFF3C; + } else if (c2 == 0x41) { + w = 0x2215; + } else if (c2 == 0x42) { + w = 0xFE68; + } else if (c2 == 0x46) { + w = 0xFFE0; + } else if (c2 == 0x47) { + w = 0xFFE1; + } else if (c2 == 0xCC) { + w = 0x5341; + } else if (c2 == 0xCE) { + w = 0x5345; + } + } + + if (!w) + w = MBFL_BAD_INPUT; + *out++ = w; + } else { + *out++ = MBFL_BAD_INPUT; + } + } else { + *out++ = MBFL_BAD_INPUT; + } + } + + *in_len = e - p; + *in = p; + return out - buf; +} + +static void mb_wchar_to_cp950(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) +{ + unsigned char *out, *limit; + MB_CONVERT_BUF_LOAD(buf, out, limit); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len); + + while (len--) { + uint32_t w = *in++; + unsigned int s = 0; + + if (w >= ucs_a1_big5_table_min && w < ucs_a1_big5_table_max) { + s = ucs_a1_big5_table[w - ucs_a1_big5_table_min]; + } else if (w >= ucs_a2_big5_table_min && w < ucs_a2_big5_table_max) { + s = ucs_a2_big5_table[w - ucs_a2_big5_table_min]; + } else if (w >= ucs_a3_big5_table_min && w < ucs_a3_big5_table_max) { + s = ucs_a3_big5_table[w - ucs_a3_big5_table_min]; + } else if (w >= ucs_i_big5_table_min && w < ucs_i_big5_table_max) { + s = ucs_i_big5_table[w - ucs_i_big5_table_min]; + } else if (w >= ucs_r1_big5_table_min && w < ucs_r1_big5_table_max) { + s = ucs_r1_big5_table[w - ucs_r1_big5_table_min]; + } else if (w >= ucs_r2_big5_table_min && w < ucs_r2_big5_table_max) { + s = ucs_r2_big5_table[w - ucs_r2_big5_table_min]; + } + + if (w >= 0xE000 && w <= 0xF848) { + int k; + for (k = 0; k < sizeof(cp950_pua_tbl) / (sizeof(unsigned short)*4); k++) { + if (w <= cp950_pua_tbl[k][1]) { + break; + } + } + + int c1 = w - cp950_pua_tbl[k][0]; + if ((cp950_pua_tbl[k][2] & 0xFF) == 0x40) { + int c2 = cp950_pua_tbl[k][2] >> 8; + s = ((c1 / 157) + c2) << 8; + c1 %= 157; + s |= c1 + (c1 >= 0x3F ? 0x62 : 0x40); + } else { + s = c1 + cp950_pua_tbl[k][2]; + } + } else if (w == 0xA2 || w == 0xA3 || w == 0x401 || (w >= 0x414 && w <= 0x41C) || (w >= 0x423 && w <= 0x44F) || w == 0x451 || w == 0x2022 || w == 0x203E || w == 0x223C || (w >= 0x2460 && w <= 0x247D) || w == 0x2609 || w == 0x2641 || w == 0x3005 || (w >= 0x302A && w <= 0x30FF) || w == 0xFF64) { + s = 0; + } else if (w == 0xAF) { + s = 0xA1C2; + } else if (w == 0x2CD) { + s = 0xA1C5; + } else if (w == 0x2027) { + s = 0xA145; + } else if (w == 0x2215) { + s = 0xA241; + } else if (w == 0x2295) { + s = 0xA1F2; + } else if (w == 0x2299) { + s = 0xA1F3; + } else if (w == 0x2574) { + s = 0xA15A; + } else if (w == 0xFE51) { + s = 0xA14E; + } else if (w == 0xFE68) { + s = 0xA242; + } else if (w == 0xFF3C) { + s = 0xA240; + } else if (w == 0xFF5E) { + s = 0xA1E3; + } else if (w == 0xFFE0) { + s = 0xA246; + } else if (w == 0xFFE1) { + s = 0xA247; + } else if (w == 0xFFE3) { + s = 0xA1C3; + } else if (w == 0xFF0F) { + s = 0xA1FE; + } + + if (!s) { + if (w == 0) { + out = mb_convert_buf_add(out, 0); + } else { + MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_big5); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len); + } + } else if (s <= 0x80) { + out = mb_convert_buf_add(out, s); + } else { + MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2); + out = mb_convert_buf_add2(out, (s >> 8) & 0xFF, s & 0xFF); + } + } + + MB_CONVERT_BUF_STORE(buf, out, limit); +} + +static const char *mbfl_encoding_big5_aliases[] = {"CN-BIG5", "BIG-FIVE", "BIGFIVE", NULL}; + +static const struct mbfl_convert_vtbl vtbl_big5_wchar = { + mbfl_no_encoding_big5, + mbfl_no_encoding_wchar, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_big5_wchar, + mbfl_filt_conv_big5_wchar_flush, + NULL, +}; + +static const struct mbfl_convert_vtbl vtbl_wchar_big5 = { + mbfl_no_encoding_wchar, + mbfl_no_encoding_big5, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_wchar_big5, + mbfl_filt_conv_common_flush, + NULL +}; + +const mbfl_encoding mbfl_encoding_big5 = { + mbfl_no_encoding_big5, + "BIG-5", + "BIG5", + mbfl_encoding_big5_aliases, + mblen_table_81_to_fe, + MBFL_ENCTYPE_GL_UNSAFE, + &vtbl_big5_wchar, + &vtbl_wchar_big5, + mb_big5_to_wchar, + mb_wchar_to_big5, + NULL +}; + +static const struct mbfl_convert_vtbl vtbl_cp950_wchar = { + mbfl_no_encoding_cp950, + mbfl_no_encoding_wchar, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_big5_wchar, + mbfl_filt_conv_big5_wchar_flush, + NULL, +}; + +static const struct mbfl_convert_vtbl vtbl_wchar_cp950 = { + mbfl_no_encoding_wchar, + mbfl_no_encoding_cp950, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_wchar_big5, + mbfl_filt_conv_common_flush, + NULL, +}; + +const mbfl_encoding mbfl_encoding_cp950 = { + mbfl_no_encoding_cp950, + "CP950", + "BIG5", + NULL, + mblen_table_81_to_fe, + MBFL_ENCTYPE_GL_UNSAFE, + &vtbl_cp950_wchar, + &vtbl_wchar_cp950, + mb_cp950_to_wchar, + mb_wchar_to_cp950, + NULL +}; + +/* + * HZ + */ + +static int mbfl_filt_conv_hz_wchar(int c, mbfl_convert_filter *filter) +{ + int c1, s, w; + + switch (filter->status & 0xf) { + /* case 0x00: ASCII */ + /* case 0x10: GB2312 */ + case 0: + if (c == '~') { + filter->status += 2; + } else if (filter->status == 0x10 && ((c > 0x20 && c <= 0x29) || (c >= 0x30 && c <= 0x77))) { + /* DBCS first char */ + filter->cache = c; + filter->status += 1; + } else if (filter->status == 0 && c >= 0 && c < 0x80) { /* latin, CTLs */ + CK((*filter->output_function)(c, filter->data)); + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + /* case 0x11: GB2312 second char */ + case 1: + filter->status &= ~0xf; + c1 = filter->cache; + if (c1 > 0x20 && c1 < 0x7F && c > 0x20 && c < 0x7F) { + s = (c1 - 1)*192 + c + 0x40; /* GB2312 */ + ZEND_ASSERT(s < cp936_ucs_table_size); + if (s == 0x1864) { + w = 0x30FB; + } else if (s == 0x186A) { + w = 0x2015; + } else if (s == 0x186C) { + w = 0x2225; + } else if ((s >= 0x1920 && s <= 0x192A) || s == 0x1963 || (s >= 0x1C60 && s <= 0x1C7F) || (s >= 0x1DBB && s <= 0x1DC4)) { + w = 0; + } else { + w = cp936_ucs_table[s]; + } + + if (w <= 0) { + w = MBFL_BAD_INPUT; + } + + CK((*filter->output_function)(w, filter->data)); + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + /* '~' */ + case 2: + if (c == '}' && filter->status == 0x12) { + filter->status = 0; + } else if (c == '{' && filter->status == 2) { + filter->status = 0x10; + } else if (c == '~' && filter->status == 2) { + CK((*filter->output_function)('~', filter->data)); + filter->status -= 2; + } else if (c == '\n') { + /* "~\n" is a line continuation; no output is needed, nor should we shift modes */ + filter->status -= 2; + } else { + /* Invalid character after ~ */ + filter->status -= 2; + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + EMPTY_SWITCH_DEFAULT_CASE(); + } + + return 0; +} + +static int mbfl_filt_conv_hz_wchar_flush(mbfl_convert_filter *filter) +{ + if (filter->status == 0x11) { + /* 2-byte character was truncated */ + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + + filter->status = 0; + + if (filter->flush_function) { + (*filter->flush_function)(filter->data); + } + + return 0; +} + +static int mbfl_filt_conv_wchar_hz(int c, mbfl_convert_filter *filter) +{ + int s = 0; + + if (c >= ucs_a1_cp936_table_min && c < ucs_a1_cp936_table_max) { + if (c == 0xB7 || c == 0x144 || c == 0x148 || c == 0x251 || c == 0x261 || c == 0x2CA || c == 0x2CB || c == 0x2D9) { + s = 0; + } else { + s = ucs_a1_cp936_table[c - ucs_a1_cp936_table_min]; + } + } else if (c >= ucs_a2_cp936_table_min && c < ucs_a2_cp936_table_max) { + if (c == 0x2015) { + s = 0xA1AA; + } else if (c == 0x2010 || c == 0x2013 || c == 0x2014 || c == 0x2016 || c == 0x2025 || c == 0x2035 || + c == 0x2105 || c == 0x2109 || c == 0x2121 || (c >= 0x2170 && c <= 0x2179) || (c >= 0x2196 && c <= 0x2199) || + c == 0x2215 || c == 0x221F || c == 0x2223 || c == 0x2252 || c == 0x2266 || c == 0x2267 || c == 0x2295 || + (c >= 0x2550 && c <= 0x2573) || c == 0x22BF || c == 0x2609 || (c >= 0x2581 && c <= 0x258F) || + (c >= 0x2593 && c <= 0x2595) || c == 0x25BC || c == 0x25BD || (c >= 0x25E2 && c <= 0x25E5)) { + s = 0; + } else { + s = ucs_a2_cp936_table[c - ucs_a2_cp936_table_min]; + } + } else if (c >= ucs_a3_cp936_table_min && c < ucs_a3_cp936_table_max) { + if (c == 0x30FB) { + s = 0xA1A4; + } else if (c == 0x3006 || c == 0x3007 || c == 0x3012 || c == 0x3231 || c == 0x32A3 || c >= 0x3300 || + (c >= 0x3018 && c <= 0x3040) || (c >= 0x309B && c <= 0x309E) || (c >= 0x30FC && c <= 0x30FE)) { + s = 0; + } else { + s = ucs_a3_cp936_table[c - ucs_a3_cp936_table_min]; + } + } else if (c >= ucs_i_gb2312_table_min && c < ucs_i_gb2312_table_max) { + s = ucs_i_gb2312_table[c - ucs_i_gb2312_table_min]; + } else if (c >= ucs_hff_cp936_table_min && c < ucs_hff_cp936_table_max) { + if (c == 0xFF04) { + s = 0xA1E7; + } else if (c == 0xFF5E) { + s = 0xA1AB; + } else if (c >= 0xFF01 && c <= 0xFF5D) { + s = c - 0xFF01 + 0xA3A1; + } else if (c == 0xFFE0 || c == 0xFFE1 || c == 0xFFE3 || c == 0xFFE5) { + s = ucs_hff_s_cp936_table[c - 0xFFE0]; + } + } + + if (s & 0x8000) { + s -= 0x8080; + } + + if (s <= 0) { + s = (c == 0) ? 0 : -1; + } else if ((s >= 0x80 && s < 0x2121) || s > 0x8080) { + s = -1; + } + + if (s >= 0) { + if (s < 0x80) { /* ASCII */ + if ((filter->status & 0xff00) != 0) { + CK((*filter->output_function)('~', filter->data)); + CK((*filter->output_function)('}', filter->data)); + } + filter->status = 0; + if (s == 0x7E) { + CK((*filter->output_function)('~', filter->data)); + } + CK((*filter->output_function)(s, filter->data)); + } else { /* GB 2312-80 */ + if ((filter->status & 0xFF00) != 0x200) { + CK((*filter->output_function)('~', filter->data)); + CK((*filter->output_function)('{', filter->data)); + } + filter->status = 0x200; + CK((*filter->output_function)((s >> 8) & 0x7F, filter->data)); + CK((*filter->output_function)(s & 0x7F, filter->data)); + } + } else { + CK(mbfl_filt_conv_illegal_output(c, filter)); + } + + return 0; +} + +static int mbfl_filt_conv_any_hz_flush(mbfl_convert_filter *filter) +{ + /* back to latin */ + if (filter->status & 0xFF00) { + CK((*filter->output_function)('~', filter->data)); + CK((*filter->output_function)('}', filter->data)); + } + filter->status = 0; + return 0; +} + +#define ASCII 0 +#define GB2312 1 + +static size_t mb_hz_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) +{ + unsigned char *p = *in, *e = p + *in_len; + uint32_t *out = buf, *limit = buf + bufsize; + + while (p < e && out < limit) { + unsigned char c = *p++; + + if (c == '~') { + if (p == e) { + break; + } + unsigned char c2 = *p++; + + if (c2 == '}' && *state == GB2312) { + *state = ASCII; + } else if (c2 == '{' && *state == ASCII) { + *state = GB2312; + } else if (c2 == '~' && *state == ASCII) { + *out++ = '~'; + } else if (c2 == '\n') { + /* "~\n" is a line continuation; no output is needed, nor should we shift modes */ + } else { + /* Invalid character after ~ */ + *out++ = MBFL_BAD_INPUT; + } + } else if (((c > 0x20 && c <= 0x29) || (c >= 0x30 && c <= 0x77)) && p < e && *state == GB2312) { + unsigned char c2 = *p++; + + if (c > 0x20 && c < 0x7F && c2 > 0x20 && c2 < 0x7F) { + unsigned int s = (c - 1)*192 + c2 + 0x40; + ZEND_ASSERT(s < cp936_ucs_table_size); + + if (s == 0x1864) { + s = 0x30FB; + } else if (s == 0x186A) { + s = 0x2015; + } else if (s == 0x186C) { + s = 0x2225; + } else if ((s >= 0x1920 && s <= 0x192A) || s == 0x1963 || (s >= 0x1C60 && s <= 0x1C7F) || (s >= 0x1DBB && s <= 0x1DC4)) { + s = 0; + } else { + s = cp936_ucs_table[s]; + } + if (!s) + s = MBFL_BAD_INPUT; + *out++ = s; + } else { + *out++ = MBFL_BAD_INPUT; + } + } else if (c < 0x80 && *state == ASCII) { + *out++ = c; + } else { + *out++ = MBFL_BAD_INPUT; + } + } + + *in_len = e - p; + *in = p; + return out - buf; +} + +static void mb_wchar_to_hz(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) +{ + unsigned char *out, *limit; + MB_CONVERT_BUF_LOAD(buf, out, limit); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len); + + while (len--) { + uint32_t w = *in++; + unsigned int s = 0; + + if (w >= ucs_a1_cp936_table_min && w < ucs_a1_cp936_table_max) { + if (w == 0xB7 || w == 0x144 || w == 0x148 || w == 0x251 || w == 0x261 || w == 0x2CA || w == 0x2CB || w == 0x2D9) { + s = 0; + } else { + s = ucs_a1_cp936_table[w - ucs_a1_cp936_table_min]; + } + } else if (w >= ucs_a2_cp936_table_min && w < ucs_a2_cp936_table_max) { + if (w == 0x2015) { + s = 0xA1AA; + } else if (w == 0x2010 || w == 0x2013 || w == 0x2014 || w == 0x2016 || w == 0x2025 || w == 0x2035 || w == 0x2105 || w == 0x2109 || w == 0x2121 || (w >= 0x2170 && w <= 0x2179) || (w >= 0x2196 && w <= 0x2199) || w == 0x2215 || w == 0x221F || w == 0x2223 || w == 0x2252 || w == 0x2266 || w == 0x2267 || w == 0x2295 || (w >= 0x2550 && w <= 0x2573) || w == 0x22BF || w == 0x2609 || (w >= 0x2581 && w <= 0x258F) || (w >= 0x2593 && w <= 0x2595) || w == 0x25BC || w == 0x25BD || (w >= 0x25E2 && w <= 0x25E5)) { + s = 0; + } else { + s = ucs_a2_cp936_table[w - ucs_a2_cp936_table_min]; + } + } else if (w >= ucs_a3_cp936_table_min && w < ucs_a3_cp936_table_max) { + if (w == 0x30FB) { + s = 0xA1A4; + } else if (w == 0x3006 || w == 0x3007 || w == 0x3012 || w == 0x3231 || w == 0x32A3 || w >= 0x3300 || (w >= 0x3018 && w <= 0x3040) || (w >= 0x309B && w <= 0x309E) || (w >= 0x30FC && w <= 0x30FE)) { + s = 0; + } else { + s = ucs_a3_cp936_table[w - ucs_a3_cp936_table_min]; + } + } else if (w >= ucs_i_gb2312_table_min && w < ucs_i_gb2312_table_max) { + s = ucs_i_gb2312_table[w - ucs_i_gb2312_table_min]; + } else if (w >= ucs_hff_cp936_table_min && w < ucs_hff_cp936_table_max) { + if (w == 0xFF04) { + s = 0xA1E7; + } else if (w == 0xFF5E) { + s = 0xA1AB; + } else if (w >= 0xFF01 && w <= 0xFF5D) { + s = w - 0xFF01 + 0xA3A1; + } else if (w == 0xFFE0 || w == 0xFFE1 || w == 0xFFE3 || w == 0xFFE5) { + s = ucs_hff_s_cp936_table[w - 0xFFE0]; + } + } + + s &= ~0x8080; + + if ((!s && w) || (s >= 0x80 && s < 0x2121)) { + MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_hz); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len); + } else if (s < 0x80) { + /* ASCII */ + if (buf->state != ASCII) { + MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 3); + out = mb_convert_buf_add2(out, '~', '}'); + buf->state = ASCII; + } + if (s == '~') { + MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2); + out = mb_convert_buf_add2(out, '~', '~'); + } else { + out = mb_convert_buf_add(out, s); + } + } else { + /* GB 2312-80 */ + if (buf->state != GB2312) { + MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 4); + out = mb_convert_buf_add2(out, '~', '{'); + buf->state = GB2312; + } else { + MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2); + } + out = mb_convert_buf_add2(out, (s >> 8) & 0x7F, s & 0x7F); + } + } + + if (end && buf->state != ASCII) { + /* If not in ASCII state, need to emit closing control chars */ + MB_CONVERT_BUF_ENSURE(buf, out, limit, 2); + out = mb_convert_buf_add2(out, '~', '}'); + } + + MB_CONVERT_BUF_STORE(buf, out, limit); +} + +static const struct mbfl_convert_vtbl vtbl_hz_wchar = { + mbfl_no_encoding_hz, + mbfl_no_encoding_wchar, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_hz_wchar, + mbfl_filt_conv_hz_wchar_flush, + NULL, +}; + +static const struct mbfl_convert_vtbl vtbl_wchar_hz = { + mbfl_no_encoding_wchar, + mbfl_no_encoding_hz, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_wchar_hz, + mbfl_filt_conv_any_hz_flush, + NULL, +}; + +const mbfl_encoding mbfl_encoding_hz = { + mbfl_no_encoding_hz, + "HZ", + "HZ-GB-2312", + NULL, + NULL, + MBFL_ENCTYPE_GL_UNSAFE, + &vtbl_hz_wchar, + &vtbl_wchar_hz, + mb_hz_to_wchar, + mb_wchar_to_hz, + NULL +}; diff --git a/ext/mbstring/libmbfl/filters/mbfilter_cjk.h b/ext/mbstring/libmbfl/filters/mbfilter_cjk.h new file mode 100644 index 0000000000000..0749b755e3cce --- /dev/null +++ b/ext/mbstring/libmbfl/filters/mbfilter_cjk.h @@ -0,0 +1,48 @@ +#ifndef MBFL_MBFILTER_CJK_H +#define MBFL_MBFILTER_CJK_H + +#include "mbfilter.h" + +extern const mbfl_encoding mbfl_encoding_jis; +extern const mbfl_encoding mbfl_encoding_2022jp; +extern const mbfl_encoding mbfl_encoding_2022jp_kddi; +extern const mbfl_encoding mbfl_encoding_2022jpms; +extern const mbfl_encoding mbfl_encoding_2022jp_2004; +extern const mbfl_encoding mbfl_encoding_cp50220; +extern const mbfl_encoding mbfl_encoding_cp50221; +extern const mbfl_encoding mbfl_encoding_cp50222; +extern const mbfl_encoding mbfl_encoding_2022kr; + +extern const mbfl_encoding mbfl_encoding_sjis; +extern const mbfl_encoding mbfl_encoding_sjis_mac; +extern const mbfl_encoding mbfl_encoding_sjis_docomo; +extern const mbfl_encoding mbfl_encoding_sjis_kddi; +extern const mbfl_encoding mbfl_encoding_sjis_sb; +extern const mbfl_encoding mbfl_encoding_sjis2004; +extern const mbfl_encoding mbfl_encoding_cp932; +extern const mbfl_encoding mbfl_encoding_sjiswin; + +extern const mbfl_encoding mbfl_encoding_euc_jp; +extern const mbfl_encoding mbfl_encoding_eucjp_win; +extern const mbfl_encoding mbfl_encoding_eucjp2004; +extern const mbfl_encoding mbfl_encoding_cp51932; +extern const mbfl_encoding mbfl_encoding_euc_cn; +extern const mbfl_encoding mbfl_encoding_euc_tw; +extern const mbfl_encoding mbfl_encoding_euc_kr; +extern const mbfl_encoding mbfl_encoding_uhc; + +extern const mbfl_encoding mbfl_encoding_gb18030; +extern const mbfl_encoding mbfl_encoding_cp936; +extern const mbfl_encoding mbfl_encoding_big5; +extern const mbfl_encoding mbfl_encoding_cp950; +extern const mbfl_encoding mbfl_encoding_hz; + +int mbfilter_sjis_emoji_docomo2unicode(int s, int *snd); +int mbfilter_sjis_emoji_kddi2unicode(int s, int *snd); +int mbfilter_sjis_emoji_sb2unicode(int s, int *snd); + +int mbfilter_unicode2sjis_emoji_docomo(int c, int *s1, mbfl_convert_filter *filter); +int mbfilter_unicode2sjis_emoji_kddi_sjis(int c, int *s1, mbfl_convert_filter *filter); +int mbfilter_unicode2sjis_emoji_sb(int c, int *s1, mbfl_convert_filter *filter); + +#endif /* MBFL_MBFILTER_CJK_H */ diff --git a/ext/mbstring/libmbfl/filters/mbfilter_cp5022x.c b/ext/mbstring/libmbfl/filters/mbfilter_cp5022x.c deleted file mode 100644 index 93c33da9543d0..0000000000000 --- a/ext/mbstring/libmbfl/filters/mbfilter_cp5022x.c +++ /dev/null @@ -1,1252 +0,0 @@ -/* - * "streamable kanji code filter and converter" - * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. - * - * LICENSE NOTICES - * - * This file is part of "streamable kanji code filter and converter", - * which is distributed under the terms of GNU Lesser General Public - * License (version 2) as published by the Free Software Foundation. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with "streamable kanji code filter and converter"; - * if not, write to the Free Software Foundation, Inc., 59 Temple Place, - * Suite 330, Boston, MA 02111-1307 USA - * - * The author of this file: Moriyoshi Koizumi - * - */ - -#include "mbfilter.h" -#include "mbfilter_cp5022x.h" -#include "mbfilter_jis.h" - -#include "unicode_table_cp932_ext.h" -#include "unicode_table_jis.h" -#include "cp932_table.h" -#include "translit_kana_jisx0201_jisx0208.h" - -static int mbfl_filt_conv_cp5022x_wchar_flush(mbfl_convert_filter *filter); -static int mbfl_filt_conv_wchar_cp50220_flush(mbfl_convert_filter *filter); -static int mbfl_filt_conv_wchar_cp50222_flush(mbfl_convert_filter *filter); -static int mbfl_filt_conv_wchar_cp50220(int c, mbfl_convert_filter *filter); -static size_t mb_cp5022x_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state); -static void mb_wchar_to_cp50220(uint32_t *in, size_t len, mb_convert_buf *buf, bool end); -static void mb_wchar_to_cp50221(uint32_t *in, size_t len, mb_convert_buf *buf, bool end); -static void mb_wchar_to_cp50222(uint32_t *in, size_t len, mb_convert_buf *buf, bool end); - -/* Previously, a dubious 'encoding' called 'cp50220raw' was supported - * This was just CP50220, but the implementation was less strict regarding - * invalid characters; it would silently pass some through - * This 'encoding' only existed in mbstring. In case some poor, lost soul is - * still using it, retain minimal support by aliasing it to CP50220 - * - * Further, mbstring also had a made-up encoding called "JIS-ms" - * This was the same as CP5022{0,1,2}, but without their special ways of - * handling conversion of Unicode half-width katakana */ -static const char *cp50220_aliases[] = {"cp50220raw", "cp50220-raw", "JIS-ms", NULL}; - -const mbfl_encoding mbfl_encoding_cp50220 = { - mbfl_no_encoding_cp50220, - "CP50220", - "ISO-2022-JP", - cp50220_aliases, - NULL, - MBFL_ENCTYPE_GL_UNSAFE, - &vtbl_cp50220_wchar, - &vtbl_wchar_cp50220, - mb_cp5022x_to_wchar, - mb_wchar_to_cp50220, - NULL -}; - -const mbfl_encoding mbfl_encoding_cp50221 = { - mbfl_no_encoding_cp50221, - "CP50221", - "ISO-2022-JP", - NULL, - NULL, - MBFL_ENCTYPE_GL_UNSAFE, - &vtbl_cp50221_wchar, - &vtbl_wchar_cp50221, - mb_cp5022x_to_wchar, - mb_wchar_to_cp50221, - NULL -}; - -const mbfl_encoding mbfl_encoding_cp50222 = { - mbfl_no_encoding_cp50222, - "CP50222", - "ISO-2022-JP", - NULL, - NULL, - MBFL_ENCTYPE_GL_UNSAFE, - &vtbl_cp50222_wchar, - &vtbl_wchar_cp50222, - mb_cp5022x_to_wchar, - mb_wchar_to_cp50222, - NULL -}; - -const struct mbfl_convert_vtbl vtbl_cp50220_wchar = { - mbfl_no_encoding_cp50220, - mbfl_no_encoding_wchar, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_cp5022x_wchar, - mbfl_filt_conv_cp5022x_wchar_flush, - NULL, -}; - -const struct mbfl_convert_vtbl vtbl_wchar_cp50220 = { - mbfl_no_encoding_wchar, - mbfl_no_encoding_cp50220, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_wchar_cp50220, - mbfl_filt_conv_wchar_cp50220_flush, - NULL, -}; - -const struct mbfl_convert_vtbl vtbl_cp50221_wchar = { - mbfl_no_encoding_cp50221, - mbfl_no_encoding_wchar, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_cp5022x_wchar, - mbfl_filt_conv_cp5022x_wchar_flush, - NULL, -}; - -const struct mbfl_convert_vtbl vtbl_wchar_cp50221 = { - mbfl_no_encoding_wchar, - mbfl_no_encoding_cp50221, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_wchar_cp50221, - mbfl_filt_conv_any_jis_flush, - NULL, -}; - -const struct mbfl_convert_vtbl vtbl_cp50222_wchar = { - mbfl_no_encoding_cp50222, - mbfl_no_encoding_wchar, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_cp5022x_wchar, - mbfl_filt_conv_cp5022x_wchar_flush, - NULL, -}; - -const struct mbfl_convert_vtbl vtbl_wchar_cp50222 = { - mbfl_no_encoding_wchar, - mbfl_no_encoding_cp50222, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_wchar_cp50222, - mbfl_filt_conv_wchar_cp50222_flush, - NULL, -}; - -#define CK(statement) do { if ((statement) < 0) return (-1); } while (0) - -int mbfl_filt_conv_cp5022x_wchar(int c, mbfl_convert_filter *filter) -{ - int c1, s, w; - -retry: - switch (filter->status & 0xf) { -/* case 0x00: ASCII */ -/* case 0x10: X 0201 latin */ -/* case 0x20: X 0201 kana */ -/* case 0x80: X 0208 */ -/* case 0x90: X 0212 */ - case 0: - if (c == 0x1b) { - filter->status += 2; - } else if (c == 0x0e) { /* "kana in" */ - filter->status = 0x20; - } else if (c == 0x0f) { /* "kana out" */ - filter->status = 0; - } else if (filter->status == 0x10 && c == 0x5c) { /* YEN SIGN */ - CK((*filter->output_function)(0xa5, filter->data)); - } else if (filter->status == 0x10 && c == 0x7e) { /* OVER LINE */ - CK((*filter->output_function)(0x203e, filter->data)); - } else if (filter->status == 0x20 && c > 0x20 && c < 0x60) { /* kana */ - CK((*filter->output_function)(0xff40 + c, filter->data)); - } else if ((filter->status == 0x80 || filter->status == 0x90) && c > 0x20 && c <= 0x97) { /* kanji first char */ - filter->cache = c; - filter->status += 1; - } else if (c >= 0 && c < 0x80) { /* latin, CTLs */ - CK((*filter->output_function)(c, filter->data)); - } else if (c > 0xa0 && c < 0xe0) { /* GR kana */ - CK((*filter->output_function)(0xfec0 + c, filter->data)); - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - -/* case 0x81: X 0208 second char */ -/* case 0x91: X 0212 second char */ - case 1: - filter->status &= ~0xf; - c1 = filter->cache; - if (c > 0x20 && c < 0x7f) { - s = (c1 - 0x21)*94 + c - 0x21; - if (filter->status == 0x80) { - if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) { - w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min]; - } else if (s >= 0 && s < jisx0208_ucs_table_size) { - w = jisx0208_ucs_table[s]; - } else if (s >= cp932ext2_ucs_table_min && s < cp932ext2_ucs_table_max) { - w = cp932ext2_ucs_table[s - cp932ext2_ucs_table_min]; - } else if (s >= cp932ext3_ucs_table_min && s < cp932ext3_ucs_table_max) { - w = cp932ext3_ucs_table[s - cp932ext3_ucs_table_min]; - } else if (s >= 94 * 94 && s < 114 * 94) { - /* user-defined => PUA (Microsoft extended) */ - w = s - 94*94 + 0xe000; - } else { - w = 0; - } - - if (w <= 0) { - w = MBFL_BAD_INPUT; - } - } else { - if (s >= 0 && s < jisx0212_ucs_table_size) { - w = jisx0212_ucs_table[s]; - } else { - w = 0; - } - - if (w <= 0) { - w = MBFL_BAD_INPUT; - } - } - CK((*filter->output_function)(w, filter->data)); - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - /* ESC */ -/* case 0x02: */ -/* case 0x12: */ -/* case 0x22: */ -/* case 0x82: */ -/* case 0x92: */ - case 2: - if (c == 0x24) { /* '$' */ - filter->status++; - } else if (c == 0x28) { /* '(' */ - filter->status += 3; - } else { - filter->status &= ~0xf; - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - goto retry; - } - break; - - /* ESC $ */ -/* case 0x03: */ -/* case 0x13: */ -/* case 0x23: */ -/* case 0x83: */ -/* case 0x93: */ - case 3: - if (c == 0x40 || c == 0x42) { /* '@' or 'B' */ - filter->status = 0x80; - } else if (c == 0x28) { /* '(' */ - filter->status++; - } else { - filter->status &= ~0xf; - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - CK((*filter->output_function)(0x24, filter->data)); - goto retry; - } - break; - - /* ESC $ ( */ -/* case 0x04: */ -/* case 0x14: */ -/* case 0x24: */ -/* case 0x84: */ -/* case 0x94: */ - case 4: - if (c == 0x40 || c == 0x42) { /* '@' or 'B' */ - filter->status = 0x80; - } else if (c == 0x44) { /* 'D' */ - filter->status = 0x90; - } else { - filter->status &= ~0xf; - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - CK((*filter->output_function)(0x24, filter->data)); - CK((*filter->output_function)(0x28, filter->data)); - goto retry; - } - break; - - /* ESC ( */ -/* case 0x05: */ -/* case 0x15: */ -/* case 0x25: */ -/* case 0x85: */ -/* case 0x95: */ - case 5: - if (c == 0x42 || c == 0x48) { /* 'B' or 'H' */ - filter->status = 0; - } else if (c == 0x4a) { /* 'J' */ - filter->status = 0x10; - } else if (c == 0x49) { /* 'I' */ - filter->status = 0x20; - } else { - filter->status &= ~0xf; - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - CK((*filter->output_function)(0x28, filter->data)); - goto retry; - } - break; - - EMPTY_SWITCH_DEFAULT_CASE(); - } - - return 0; -} - -static int mbfl_filt_conv_cp5022x_wchar_flush(mbfl_convert_filter *filter) -{ - if (filter->status & 0xF) { - /* 2-byte (JIS X 0208 or 0212) character was truncated, or else - * escape sequence was truncated */ - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - filter->status = 0; - - if (filter->flush_function) { - (*filter->flush_function)(filter->data); - } - - return 0; -} - -/* Apply various transforms to input codepoint, such as converting halfwidth katakana - * to fullwidth katakana. `mode` is a bitfield which controls which transforms are - * actually performed. The bit values are defined in translit_kana_jisx0201_jisx0208.h. - * `mode` must not call for transforms which are inverses (i.e. which would cancel - * each other out). - * - * In some cases, successive input codepoints may be merged into one output codepoint. - * (That is the purpose of the `next` parameter.) If the `next` codepoint is consumed - * and should be skipped over, `*consumed` will be set to true. Otherwise, `*consumed` - * will not be modified. If there is no following codepoint, `next` should be zero. - * - * Again, in some cases, one input codepoint may convert to two output codepoints. - * If so, the second output codepoint will be stored in `*second`. - * - * Return the resulting codepoint. If none of the requested transforms apply, return - * the input codepoint unchanged. - */ -uint32_t mb_convert_kana_codepoint(uint32_t c, uint32_t next, bool *consumed, uint32_t *second, unsigned int mode) -{ - if ((mode & MBFL_HAN2ZEN_ALL) && c >= 0x21 && c <= 0x7D && c != '"' && c != '\'' && c != '\\') { - return c + 0xFEE0; - } - if ((mode & MBFL_HAN2ZEN_ALPHA) && ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'))) { - return c + 0xFEE0; - } - if ((mode & MBFL_HAN2ZEN_NUMERIC) && c >= '0' && c <= '9') { - return c + 0xFEE0; - } - if ((mode & MBFL_HAN2ZEN_SPACE) && c == ' ') { - return 0x3000; - } - - if (mode & (MBFL_HAN2ZEN_KATAKANA | MBFL_HAN2ZEN_HIRAGANA)) { - /* Convert Hankaku kana to Zenkaku kana - * Either all Hankaku kana (including katakana and hiragana) will be converted - * to Zenkaku katakana, or to Zenkaku hiragana */ - if ((mode & MBFL_HAN2ZEN_KATAKANA) && (mode & MBFL_HAN2ZEN_GLUE)) { - if (c >= 0xFF61 && c <= 0xFF9F) { - int n = c - 0xFF60; - - if (next >= 0xFF61 && next <= 0xFF9F) { - if (next == 0xFF9E && ((n >= 22 && n <= 36) || (n >= 42 && n <= 46))) { - *consumed = true; - return 0x3001 + hankana2zenkana_table[n]; - } - if (next == 0xFF9E && n == 19) { - *consumed = true; - return 0x30F4; - } - if (next == 0xFF9F && n >= 42 && n <= 46) { - *consumed = true; - return 0x3002 + hankana2zenkana_table[n]; - } - } - - return 0x3000 + hankana2zenkana_table[n]; - } - } - if ((mode & MBFL_HAN2ZEN_HIRAGANA) && (mode & MBFL_HAN2ZEN_GLUE)) { - if (c >= 0xFF61 && c <= 0xFF9F) { - int n = c - 0xFF60; - - if (next >= 0xFF61 && next <= 0xFF9F) { - if (next == 0xFF9E && ((n >= 22 && n <= 36) || (n >= 42 && n <= 46))) { - *consumed = true; - return 0x3001 + hankana2zenhira_table[n]; - } - if (next == 0xFF9F && n >= 42 && n <= 46) { - *consumed = true; - return 0x3002 + hankana2zenhira_table[n]; - } - } - - return 0x3000 + hankana2zenhira_table[n]; - } - } - if ((mode & MBFL_HAN2ZEN_KATAKANA) && c >= 0xFF61 && c <= 0xFF9F) { - return 0x3000 + hankana2zenkana_table[c - 0xFF60]; - } - if ((mode & MBFL_HAN2ZEN_HIRAGANA) && c >= 0xFF61 && c <= 0xFF9F) { - return 0x3000 + hankana2zenhira_table[c - 0xFF60]; - } - } - - if (mode & MBFL_HAN2ZEN_SPECIAL) { /* special ascii to symbol */ - if (c == '\\' || c == 0xA5) { /* YEN SIGN */ - return 0xFFE5; /* FULLWIDTH YEN SIGN */ - } - if (c == 0x7E || c == 0x203E) { - return 0xFFE3; /* FULLWIDTH MACRON */ - } - if (c == '\'') { - return 0x2019; /* RIGHT SINGLE QUOTATION MARK */ - } - if (c == '"') { - return 0x201D; /* RIGHT DOUBLE QUOTATION MARK */ - } - } - - if (mode & (MBFL_ZEN2HAN_ALL | MBFL_ZEN2HAN_ALPHA | MBFL_ZEN2HAN_NUMERIC | MBFL_ZEN2HAN_SPACE)) { - /* Zenkaku to Hankaku */ - if ((mode & MBFL_ZEN2HAN_ALL) && c >= 0xFF01 && c <= 0xFF5D && c != 0xFF02 && c != 0xFF07 && c != 0xFF3C) { - /* all except " ' \ ~ */ - return c - 0xFEE0; - } - if ((mode & MBFL_ZEN2HAN_ALPHA) && ((c >= 0xFF21 && c <= 0xFF3A) || (c >= 0xFF41 && c <= 0xFF5A))) { - return c - 0xFEE0; - } - if ((mode & MBFL_ZEN2HAN_NUMERIC) && (c >= 0xFF10 && c <= 0xFF19)) { - return c - 0xFEE0; - } - if ((mode & MBFL_ZEN2HAN_SPACE) && (c == 0x3000)) { - return ' '; - } - if ((mode & MBFL_ZEN2HAN_ALL) && (c == 0x2212)) { /* MINUS SIGN */ - return '-'; - } - } - - if (mode & (MBFL_ZEN2HAN_KATAKANA | MBFL_ZEN2HAN_HIRAGANA)) { - /* Zenkaku kana to hankaku kana */ - if ((mode & MBFL_ZEN2HAN_KATAKANA) && c >= 0x30A1 && c <= 0x30F4) { - /* Zenkaku katakana to hankaku kana */ - int n = c - 0x30A1; - if (zenkana2hankana_table[n][1]) { - *second = 0xFF00 + zenkana2hankana_table[n][1]; - } - return 0xFF00 + zenkana2hankana_table[n][0]; - } - if ((mode & MBFL_ZEN2HAN_HIRAGANA) && c >= 0x3041 && c <= 0x3093) { - /* Zenkaku hiragana to hankaku kana */ - int n = c - 0x3041; - if (zenkana2hankana_table[n][1]) { - *second = 0xFF00 + zenkana2hankana_table[n][1]; - } - return 0xFF00 + zenkana2hankana_table[n][0]; - } - if (c == 0x3001) { - return 0xFF64; /* HALFWIDTH IDEOGRAPHIC COMMA */ - } - if (c == 0x3002) { - return 0xFF61; /* HALFWIDTH IDEOGRAPHIC FULL STOP */ - } - if (c == 0x300C) { - return 0xFF62; /* HALFWIDTH LEFT CORNER BRACKET */ - } - if (c == 0x300D) { - return 0xFF63; /* HALFWIDTH RIGHT CORNER BRACKET */ - } - if (c == 0x309B) { - return 0xFF9E; /* HALFWIDTH KATAKANA VOICED SOUND MARK */ - } - if (c == 0x309C) { - return 0xff9f; /* HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK */ - } - if (c == 0x30FC) { - return 0xFF70; /* HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK */ - } - if (c == 0x30FB) { - return 0xFF65; /* HALFWIDTH KATAKANA MIDDLE DOT */ - } - } - - if (mode & (MBFL_ZENKAKU_HIRA2KATA | MBFL_ZENKAKU_KATA2HIRA)) { - if ((mode & MBFL_ZENKAKU_HIRA2KATA) && ((c >= 0x3041 && c <= 0x3093) || c == 0x309D || c == 0x309E)) { - /* Zenkaku hiragana to Zenkaku katakana */ - return c + 0x60; - } - if ((mode & MBFL_ZENKAKU_KATA2HIRA) && ((c >= 0x30A1 && c <= 0x30F3) || c == 0x30FD || c == 0x30FE)) { - /* Zenkaku katakana to Zenkaku hiragana */ - return c - 0x60; - } - } - - if (mode & MBFL_ZEN2HAN_SPECIAL) { /* special symbol to ascii */ - if (c == 0xFFE5 || c == 0xFF3C) { /* FULLWIDTH YEN SIGN/FULLWIDTH REVERSE SOLIDUS */ - return '\\'; - } - if (c == 0xFFE3 || c == 0x203E) { /* FULLWIDTH MACRON/OVERLINE */ - return '~'; - } - if (c == 0x2018 || c == 0x2019) { /* LEFT/RIGHT SINGLE QUOTATION MARK*/ - return '\''; - } - if (c == 0x201C || c == 0x201D) { /* LEFT/RIGHT DOUBLE QUOTATION MARK */ - return '"'; - } - } - - return c; -} - -static int mbfl_filt_conv_wchar_cp50220(int c, mbfl_convert_filter *filter) -{ - int mode = MBFL_HAN2ZEN_KATAKANA | MBFL_HAN2ZEN_GLUE; - bool consumed = false; - - if (filter->cache) { - int s = mb_convert_kana_codepoint(filter->cache, c, &consumed, NULL, mode); - filter->cache = consumed ? 0 : c; - /* Terrible hack to get CP50220 to emit error markers in the proper - * position, not reordering them with subsequent characters */ - filter->filter_function = mbfl_filt_conv_wchar_cp50221; - mbfl_filt_conv_wchar_cp50221(s, filter); - filter->filter_function = mbfl_filt_conv_wchar_cp50220; - if (c == 0 && !consumed) { - (*filter->output_function)(0, filter->data); - } - } else if (c == 0) { - /* This case has to be handled separately, since `filter->cache == 0` means - * no codepoint is cached */ - (*filter->output_function)(0, filter->data); - } else { - filter->cache = c; - } - - return 0; -} - -static int mbfl_filt_conv_wchar_cp50220_flush(mbfl_convert_filter *filter) -{ - int mode = MBFL_HAN2ZEN_KATAKANA | MBFL_HAN2ZEN_GLUE; - - if (filter->cache) { - int s = mb_convert_kana_codepoint(filter->cache, 0, NULL, NULL, mode); - filter->filter_function = mbfl_filt_conv_wchar_cp50221; - mbfl_filt_conv_wchar_cp50221(s, filter); - filter->filter_function = mbfl_filt_conv_wchar_cp50220; - filter->cache = 0; - } - - return mbfl_filt_conv_any_jis_flush(filter); -} - -int mbfl_filt_conv_wchar_cp50221(int c, mbfl_convert_filter *filter) -{ - int s = 0; - - if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) { - s = ucs_a1_jis_table[c - ucs_a1_jis_table_min]; - } else if (c == 0x203E) { /* OVERLINE */ - s = 0x1007E; /* Convert to JISX 0201 OVERLINE */ - } else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) { - s = ucs_a2_jis_table[c - ucs_a2_jis_table_min]; - } else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) { - s = ucs_i_jis_table[c - ucs_i_jis_table_min]; - } else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) { - s = ucs_r_jis_table[c - ucs_r_jis_table_min]; - } else if (c >= 0xE000 && c <= 0xE757) { - /* 'private'/'user' codepoints */ - s = c - 0xE000; - s = ((s / 94) + 0x7F) << 8 | ((s % 94) + 0x21); - } - - if (s <= 0) { - if (c == 0xa5) { /* YEN SIGN */ - s = 0x1005c; - } else if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */ - s = 0x2140; - } else if (c == 0x2225) { /* PARALLEL TO */ - s = 0x2142; - } else if (c == 0xff0d) { /* FULLWIDTH HYPHEN-MINUS */ - s = 0x215d; - } else if (c == 0xffe0) { /* FULLWIDTH CENT SIGN */ - s = 0x2171; - } else if (c == 0xffe1) { /* FULLWIDTH POUND SIGN */ - s = 0x2172; - } else if (c == 0xffe2) { /* FULLWIDTH NOT SIGN */ - s = 0x224c; - } - } - - /* Above, we do a series of lookups in `ucs_*_jis_table` to find a - * corresponding kuten code for this Unicode codepoint - * If we get zero, that means the codepoint is not in JIS X 0208 - * On the other hand, if we get a result with the high bits set on both - * upper and lower bytes, that is not a code in JIS X 0208 but rather - * in JIS X 0213 - * In either case, check if this codepoint is one of the extensions added - * to JIS X 0208 by MicroSoft (to make CP932) */ - if (s == 0 || ((s & 0x8000) && (s & 0x80))) { - int i; - s = -1; - - for (i = 0; - i < cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; - i++) { - const int oh = cp932ext1_ucs_table_min / 94; - - if (c == cp932ext1_ucs_table[i]) { - s = ((i / 94 + oh + 0x21) << 8) + (i % 94 + 0x21); - break; - } - } - - if (s < 0) { - const int oh = cp932ext2_ucs_table_min / 94; - const int cp932ext2_ucs_table_size = - cp932ext2_ucs_table_max - cp932ext2_ucs_table_min; - for (i = 0; i < cp932ext2_ucs_table_size; i++) { - if (c == cp932ext2_ucs_table[i]) { - s = ((i / 94 + oh + 0x21) << 8) + (i % 94 + 0x21); - break; - } - } - } - - if (c == 0) { - s = 0; - } else if (s <= 0) { - s = -1; - } - } - - if (s >= 0) { - if (s < 0x80) { /* ASCII */ - if ((filter->status & 0xff00) != 0) { - CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ - CK((*filter->output_function)(0x28, filter->data)); /* '(' */ - CK((*filter->output_function)(0x42, filter->data)); /* 'B' */ - filter->status = 0; - } - CK((*filter->output_function)(s, filter->data)); - } else if (s >= 0xa0 && s < 0xe0) { /* X 0201 kana */ - if ((filter->status & 0xff00) != 0x500) { - CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ - CK((*filter->output_function)(0x28, filter->data)); /* '(' */ - CK((*filter->output_function)(0x49, filter->data)); /* 'I' */ - filter->status = 0x500; - } - CK((*filter->output_function)(s - 0x80, filter->data)); - } else if (s <= 0x927E) { /* X 0208 + extensions */ - if ((filter->status & 0xff00) != 0x200) { - CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ - CK((*filter->output_function)(0x24, filter->data)); /* '$' */ - CK((*filter->output_function)(0x42, filter->data)); /* 'B' */ - filter->status = 0x200; - } - CK((*filter->output_function)((s >> 8) & 0xff, filter->data)); - CK((*filter->output_function)(s & 0xff, filter->data)); - } else if (s < 0x10000) { /* X0212 */ - CK(mbfl_filt_conv_illegal_output(c, filter)); - } else { /* X 0201 latin */ - if ((filter->status & 0xff00) != 0x400) { - CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ - CK((*filter->output_function)(0x28, filter->data)); /* '(' */ - CK((*filter->output_function)(0x4a, filter->data)); /* 'J' */ - } - filter->status = 0x400; - CK((*filter->output_function)(s & 0x7f, filter->data)); - } - } else { - CK(mbfl_filt_conv_illegal_output(c, filter)); - } - - return 0; -} - -/* - * wchar => CP50222 - */ -int mbfl_filt_conv_wchar_cp50222(int c, mbfl_convert_filter *filter) -{ - int s = 0; - - if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) { - s = ucs_a1_jis_table[c - ucs_a1_jis_table_min]; - } else if (c == 0x203E) { /* OVERLINE */ - s = 0x1007E; /* Convert to JISX 0201 OVERLINE */ - } else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) { - s = ucs_a2_jis_table[c - ucs_a2_jis_table_min]; - } else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) { - s = ucs_i_jis_table[c - ucs_i_jis_table_min]; - } else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) { - s = ucs_r_jis_table[c - ucs_r_jis_table_min]; - } else if (c >= 0xE000 && c <= 0xE757) { - /* 'private'/'user' codepoints */ - s = c - 0xE000; - s = ((s / 94) + 0x7F) << 8 | ((s % 94) + 0x21); - } - - if (s <= 0) { - if (c == 0xa5) { /* YEN SIGN */ - s = 0x1005c; - } else if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */ - s = 0x2140; - } else if (c == 0x2225) { /* PARALLEL TO */ - s = 0x2142; - } else if (c == 0xff0d) { /* FULLWIDTH HYPHEN-MINUS */ - s = 0x215d; - } else if (c == 0xffe0) { /* FULLWIDTH CENT SIGN */ - s = 0x2171; - } else if (c == 0xffe1) { /* FULLWIDTH POUND SIGN */ - s = 0x2172; - } else if (c == 0xffe2) { /* FULLWIDTH NOT SIGN */ - s = 0x224c; - } - } - if (s == 0 || ((s & 0x8000) && (s & 0x80))) { - int i; - s = -1; - - for (i = 0; - i < cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; i++) { - const int oh = cp932ext1_ucs_table_min / 94; - - if (c == cp932ext1_ucs_table[i]) { - s = ((i / 94 + oh + 0x21) << 8) + (i % 94 + 0x21); - break; - } - } - - if (s <= 0) { - const int oh = cp932ext2_ucs_table_min / 94; - const int cp932ext2_ucs_table_size = - cp932ext2_ucs_table_max - cp932ext2_ucs_table_min; - for (i = 0; i < cp932ext2_ucs_table_size; i++) { - if (c == cp932ext2_ucs_table[i]) { - s = ((i / 94 + oh + 0x21) << 8) + (i % 94 + 0x21); - break; - } - } - } - - if (c == 0) { - s = 0; - } else if (s <= 0) { - s = -1; - } - } - - if (s >= 0) { - if (s < 0x80) { /* ASCII */ - if ((filter->status & 0xff00) == 0x500) { - CK((*filter->output_function)(0x0f, filter->data)); /* SO */ - filter->status = 0; - } else if ((filter->status & 0xff00) != 0) { - CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ - CK((*filter->output_function)(0x28, filter->data)); /* '(' */ - CK((*filter->output_function)(0x42, filter->data)); /* 'B' */ - filter->status = 0; - } - CK((*filter->output_function)(s, filter->data)); - } else if (s >= 0xa0 && s < 0xe0) { /* X 0201 kana */ - if ((filter->status & 0xff00) != 0x500) { - CK((*filter->output_function)(0x0e, filter->data)); /* SI */ - filter->status = 0x500; - } - CK((*filter->output_function)(s - 0x80, filter->data)); - } else if (s <= 0x927E) { /* X 0208 */ - if ((filter->status & 0xff00) == 0x500) { - CK((*filter->output_function)(0x0f, filter->data)); /* SO */ - filter->status = 0; - } - if ((filter->status & 0xff00) != 0x200) { - CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ - CK((*filter->output_function)(0x24, filter->data)); /* '$' */ - CK((*filter->output_function)(0x42, filter->data)); /* 'B' */ - filter->status = 0x200; - } - CK((*filter->output_function)((s >> 8) & 0xff, filter->data)); - CK((*filter->output_function)(s & 0xff, filter->data)); - } else if (s < 0x10000) { /* X0212 */ - CK(mbfl_filt_conv_illegal_output(c, filter)); - } else { /* X 0201 latin */ - if ((filter->status & 0xff00) == 0x500) { - CK((*filter->output_function)(0x0f, filter->data)); /* SO */ - filter->status = 0; - } - if ((filter->status & 0xff00) != 0x400) { - CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ - CK((*filter->output_function)(0x28, filter->data)); /* '(' */ - CK((*filter->output_function)(0x4a, filter->data)); /* 'J' */ - } - filter->status = 0x400; - CK((*filter->output_function)(s & 0x7f, filter->data)); - } - } else { - CK(mbfl_filt_conv_illegal_output(c, filter)); - } - - return 0; -} - -static int mbfl_filt_conv_wchar_cp50222_flush(mbfl_convert_filter *filter) -{ - /* back to latin */ - if ((filter->status & 0xff00) == 0x500) { - CK((*filter->output_function)(0x0f, filter->data)); /* SO */ - } else if ((filter->status & 0xff00) != 0) { - CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ - CK((*filter->output_function)(0x28, filter->data)); /* '(' */ - CK((*filter->output_function)(0x42, filter->data)); /* 'B' */ - } - filter->status = 0; - - if (filter->flush_function) { - (*filter->flush_function)(filter->data); - } - - return 0; -} - -#define ASCII 0 -#define JISX_0201_LATIN 1 -#define JISX_0201_KANA 2 -#define JISX_0208 3 -#define JISX_0212 4 - -static size_t mb_cp5022x_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) -{ - ZEND_ASSERT(bufsize >= 3); - - unsigned char *p = *in, *e = p + *in_len; - uint32_t *out = buf, *limit = buf + bufsize; - - while (p < e && out < limit) { - unsigned char c = *p++; - - if (c == 0x1B) { - /* Escape sequence */ - if ((e - p) < 2) { - *out++ = MBFL_BAD_INPUT; - /* Duplicate error-handling behavior of legacy code */ - if (p < e && (*p == '(' || *p == '$')) - p++; - continue; - } - unsigned char c2 = *p++; - if (c2 == '$') { - unsigned char c3 = *p++; - if (c3 == '@' || c3 == 'B') { - *state = JISX_0208; - } else if (c3 == '(') { - if (p == e) { - *out++ = MBFL_BAD_INPUT; - break; - } - unsigned char c4 = *p++; - if (c4 == '@' || c4 == 'B') { - *state = JISX_0208; - } else if (c4 == 'D') { - *state = JISX_0212; - } else { - if ((limit - out) < 3) { - p -= 4; - break; - } - *out++ = MBFL_BAD_INPUT; - *out++ = '$'; - *out++ = '('; - p--; - } - } else { - if ((limit - out) < 2) { - p -= 3; - break; - } - *out++ = MBFL_BAD_INPUT; - *out++ = '$'; - p--; - } - } else if (c2 == '(') { - unsigned char c3 = *p++; - if (c3 == 'B' || c3 == 'H') { - *state = ASCII; - } else if (c3 == 'J') { - *state = JISX_0201_LATIN; - } else if (c3 == 'I') { - *state = JISX_0201_KANA; - } else { - if ((limit - out) < 2) { - p -= 3; - break; - } - *out++ = MBFL_BAD_INPUT; - *out++ = '('; - p--; - } - } else { - *out++ = MBFL_BAD_INPUT; - p--; - } - } else if (c == 0xE) { - *state = JISX_0201_KANA; - } else if (c == 0xF) { - *state = ASCII; - } else if (*state == JISX_0201_LATIN && c == 0x5C) { /* YEN SIGN */ - *out++ = 0xA5; - } else if (*state == JISX_0201_LATIN && c == 0x7E) { /* OVER LINE */ - *out++ = 0x203E; - } else if (*state == JISX_0201_KANA && c > 0x20 && c < 0x60) { - *out++ = 0xFF40 + c; - } else if (*state >= JISX_0208 && c > 0x20 && c <= 0x97) { - if (p == e) { - *out++ = MBFL_BAD_INPUT; - break; - } - unsigned char c2 = *p++; - if (c2 > 0x20 && c2 < 0x7F) { - unsigned int s = (c - 0x21)*94 + c2 - 0x21; - uint32_t w = 0; - if (*state == JISX_0208) { - if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) { - w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min]; - } else if (s < jisx0208_ucs_table_size) { - w = jisx0208_ucs_table[s]; - } else if (s >= cp932ext2_ucs_table_min && s < cp932ext2_ucs_table_max) { - w = cp932ext2_ucs_table[s - cp932ext2_ucs_table_min]; - } else if (s >= cp932ext3_ucs_table_min && s < cp932ext3_ucs_table_max) { - w = cp932ext3_ucs_table[s - cp932ext3_ucs_table_min]; - } else if (s >= 94*94 && s < 114*94) { - /* MicroSoft extension */ - w = s - 94*94 + 0xE000; - } - if (!w) - w = MBFL_BAD_INPUT; - } else { - if (s < jisx0212_ucs_table_size) { - w = jisx0212_ucs_table[s]; - } - if (!w) - w = MBFL_BAD_INPUT; - } - *out++ = w; - } else { - *out++ = MBFL_BAD_INPUT; - } - } else if (c < 0x80) { - *out++ = c; - } else if (c >= 0xA1 && c <= 0xDF) { - *out++ = 0xFEC0 + c; - } else { - *out++ = MBFL_BAD_INPUT; - } - } - - *in_len = e - p; - *in = p; - return out - buf; -} - -static unsigned int lookup_wchar(uint32_t w) -{ - unsigned int s = 0; - - if (w >= ucs_a1_jis_table_min && w < ucs_a1_jis_table_max) { - s = ucs_a1_jis_table[w - ucs_a1_jis_table_min]; - } else if (w == 0x203E) { /* OVERLINE */ - s = 0x1007E; /* Convert to JISX 0201 OVERLINE */ - } else if (w >= ucs_a2_jis_table_min && w < ucs_a2_jis_table_max) { - s = ucs_a2_jis_table[w - ucs_a2_jis_table_min]; - } else if (w >= ucs_i_jis_table_min && w < ucs_i_jis_table_max) { - s = ucs_i_jis_table[w - ucs_i_jis_table_min]; - } else if (w >= ucs_r_jis_table_min && w < ucs_r_jis_table_max) { - s = ucs_r_jis_table[w - ucs_r_jis_table_min]; - } else if (w >= 0xE000 && w <= 0xE757) { - /* Private Use Area codepoints */ - s = w - 0xE000; - s = ((s / 94) + 0x7F) << 8 | ((s % 94) + 0x21); - } - - if (!s) { - if (w == 0xA5) { /* YEN SIGN */ - s = 0x1005C; - } else if (w == 0xFF3C) { /* FULLWIDTH REVERSE SOLIDUS */ - s = 0x2140; - } else if (w == 0x2225) { /* PARALLEL TO */ - s = 0x2142; - } else if (w == 0xFF0D) { /* FULLWIDTH HYPHEN-MINUS */ - s = 0x215D; - } else if (w == 0xFFE0) { /* FULLWIDTH CENT SIGN */ - s = 0x2171; - } else if (w == 0xFFE1) { /* FULLWIDTH POUND SIGN */ - s = 0x2172; - } else if (w == 0xFFE2) { /* FULLWIDTH NOT SIGN */ - s = 0x224C; - } else if (w == 0) { - return 0; - } - } - - /* Above, we do a series of lookups in `ucs_*_jis_table` to find a - * corresponding kuten code for this Unicode codepoint - * If we get zero, that means the codepoint is not in JIS X 0208 - * On the other hand, if we get a result with the high bits set on both - * upper and lower bytes, that is not a code in JIS X 0208 but rather - * in JIS X 0213 - * In either case, check if this codepoint is one of the extensions added - * to JIS X 0208 by MicroSoft (to make CP932) */ - if (!s || s >= 0x8080) { - for (int i = 0; i < cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; i++) { - if (w == cp932ext1_ucs_table[i]) { - return (((i / 94) + (cp932ext1_ucs_table_min / 94) + 0x21) << 8) + (i % 94) + 0x21; - } - } - - for (int i = 0; i < cp932ext2_ucs_table_max - cp932ext2_ucs_table_min; i++) { - if (w == cp932ext2_ucs_table[i]) { - return (((i / 94) + (cp932ext2_ucs_table_min / 94) + 0x21) << 8) + (i % 94) + 0x21; - } - } - } - - return s; -} - -static void mb_wchar_to_cp50220(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) -{ - unsigned char *out, *limit; - MB_CONVERT_BUF_LOAD(buf, out, limit); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len); - - uint32_t w; - - if (buf->state & 0xFFFF00) { - /* Reprocess cached codepoint */ - w = buf->state >> 8; - buf->state &= 0xFF; - goto reprocess_codepoint; - } - - while (len--) { - w = *in++; -reprocess_codepoint: - - if (w >= 0xFF61 && w <= 0xFF9F && !len && !end) { - /* This codepoint may need to combine with the next one, - * but the 'next one' will come in a separate buffer */ - buf->state |= w << 8; - break; - } - - bool consumed = false; - w = mb_convert_kana_codepoint(w, len ? *in : 0, &consumed, NULL, MBFL_HAN2ZEN_KATAKANA | MBFL_HAN2ZEN_GLUE); - if (consumed) { - /* Two successive codepoints were converted into one */ - in++; len--; consumed = false; - } - - unsigned int s = lookup_wchar(w); - - if (!s && w) { - MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_cp50221); - } else if (s < 0x80) { - /* ASCII */ - MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 4); - if (buf->state != ASCII) { - out = mb_convert_buf_add3(out, 0x1B, '(', 'B'); - buf->state = ASCII; - } - out = mb_convert_buf_add(out, s); - } else if (s >= 0xA0 && s < 0xE0) { - /* JISX 0201 Kana */ - MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 4); - if (buf->state != JISX_0201_KANA) { - out = mb_convert_buf_add3(out, 0x1B, '(', 'I'); - buf->state = JISX_0201_KANA; - } - out = mb_convert_buf_add(out, s - 0x80); - } else if (s <= 0x927E) { - /* JISX 0208 Kanji */ - MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 5); - if (buf->state != JISX_0208) { - out = mb_convert_buf_add3(out, 0x1B, '$', 'B'); - buf->state = JISX_0208; - } - out = mb_convert_buf_add2(out, (s >> 8) & 0xFF, s & 0xFF); - } else if (s >= 0x10000) { - /* JISX 0201 Latin; we 'tag' these by adding 0x10000 */ - MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 4); - if (buf->state != JISX_0201_LATIN) { - out = mb_convert_buf_add3(out, 0x1B, '(', 'J'); - buf->state = JISX_0201_LATIN; - } - out = mb_convert_buf_add(out, s & 0x7F); - } else { - MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_cp50221); - } - } - - if (end && buf->state != ASCII) { - MB_CONVERT_BUF_ENSURE(buf, out, limit, 3); - out = mb_convert_buf_add3(out, 0x1B, '(', 'B'); - } - - MB_CONVERT_BUF_STORE(buf, out, limit); -} - -static void mb_wchar_to_cp50221(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) -{ - unsigned char *out, *limit; - MB_CONVERT_BUF_LOAD(buf, out, limit); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len); - - while (len--) { - uint32_t w = *in++; - unsigned int s = lookup_wchar(w); - - if (!s && w) { - MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_cp50221); - } else if (s < 0x80) { - /* ASCII */ - MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 4); - if (buf->state != ASCII) { - out = mb_convert_buf_add3(out, 0x1B, '(', 'B'); - buf->state = ASCII; - } - out = mb_convert_buf_add(out, s); - } else if (s >= 0xA0 && s < 0xE0) { - /* JISX 0201 Kana */ - MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 4); - if (buf->state != JISX_0201_KANA) { - out = mb_convert_buf_add3(out, 0x1B, '(', 'I'); - buf->state = JISX_0201_KANA; - } - out = mb_convert_buf_add(out, s - 0x80); - } else if (s <= 0x927E) { - /* JISX 0208 Kanji */ - MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 5); - if (buf->state != JISX_0208) { - out = mb_convert_buf_add3(out, 0x1B, '$', 'B'); - buf->state = JISX_0208; - } - out = mb_convert_buf_add2(out, (s >> 8) & 0xFF, s & 0xFF); - } else if (s >= 0x10000) { - /* JISX 0201 Latin; we 'tag' these by adding 0x10000 */ - MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 4); - if (buf->state != JISX_0201_LATIN) { - out = mb_convert_buf_add3(out, 0x1B, '(', 'J'); - buf->state = JISX_0201_LATIN; - } - out = mb_convert_buf_add(out, s & 0x7F); - } else { - MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_cp50221); - } - } - - if (end && buf->state != ASCII) { - MB_CONVERT_BUF_ENSURE(buf, out, limit, 3); - out = mb_convert_buf_add3(out, 0x1B, '(', 'B'); - } - - MB_CONVERT_BUF_STORE(buf, out, limit); -} - -static void mb_wchar_to_cp50222(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) -{ - unsigned char *out, *limit; - MB_CONVERT_BUF_LOAD(buf, out, limit); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len); - - while (len--) { - uint32_t w = *in++; - unsigned int s = lookup_wchar(w); - - if (!s && w) { - MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_cp50222); - } else if (s < 0x80) { - /* ASCII */ - MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 4); - if (buf->state == JISX_0201_KANA) { - out = mb_convert_buf_add(out, 0xF); - buf->state = ASCII; - } else if (buf->state != ASCII) { - out = mb_convert_buf_add3(out, 0x1B, '(', 'B'); - buf->state = ASCII; - } - out = mb_convert_buf_add(out, s); - } else if (s >= 0xA0 && s < 0xE0) { - /* JISX 0201 Kana */ - MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2); - if (buf->state != JISX_0201_KANA) { - out = mb_convert_buf_add(out, 0xE); - buf->state = JISX_0201_KANA; - } - out = mb_convert_buf_add(out, s - 0x80); - } else if (s <= 0x927E) { - /* JISX 0208 Kanji */ - MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 6); - if (buf->state == JISX_0201_KANA) { - out = mb_convert_buf_add(out, 0xF); - } - if (buf->state != JISX_0208) { - out = mb_convert_buf_add3(out, 0x1B, '$', 'B'); - buf->state = JISX_0208; - } - out = mb_convert_buf_add2(out, (s >> 8) & 0xFF, s & 0xFF); - } else if (s >= 0x10000) { - /* JISX 0201 Latin; we 'tag' these by adding 0x10000 */ - MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 5); - if (buf->state == JISX_0201_KANA) { - out = mb_convert_buf_add(out, 0xF); - } - if (buf->state != JISX_0201_LATIN) { - out = mb_convert_buf_add3(out, 0x1B, '(', 'J'); - buf->state = JISX_0201_LATIN; - } - out = mb_convert_buf_add(out, s & 0x7F); - } else { - MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_cp50222); - } - } - - if (end) { - if (buf->state == JISX_0201_KANA) { - MB_CONVERT_BUF_ENSURE(buf, out, limit, 1); - out = mb_convert_buf_add(out, 0xF); - } else if (buf->state != ASCII) { - MB_CONVERT_BUF_ENSURE(buf, out, limit, 3); - out = mb_convert_buf_add3(out, 0x1B, '(', 'B'); - } - } - - MB_CONVERT_BUF_STORE(buf, out, limit); -} diff --git a/ext/mbstring/libmbfl/filters/mbfilter_cp5022x.h b/ext/mbstring/libmbfl/filters/mbfilter_cp5022x.h deleted file mode 100644 index 0cc90f4b45889..0000000000000 --- a/ext/mbstring/libmbfl/filters/mbfilter_cp5022x.h +++ /dev/null @@ -1,50 +0,0 @@ -/* - * "streamable kanji code filter and converter" - * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. - * - * LICENSE NOTICES - * - * This file is part of "streamable kanji code filter and converter", - * which is distributed under the terms of GNU Lesser General Public - * License (version 2) as published by the Free Software Foundation. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with "streamable kanji code filter and converter"; - * if not, write to the Free Software Foundation, Inc., 59 Temple Place, - * Suite 330, Boston, MA 02111-1307 USA - * - * The author of this file: - * - */ -/* - * The source code included in this files was separated from mbfilter_ja.c - * by moriyoshi koizumi on 4 dec 2002. - * - */ - -#ifndef MBFL_MBFILTER_CP50221_h -#define MBFL_MBFILTER_CP50221_h - -#include "mbfilter.h" - -extern const mbfl_encoding mbfl_encoding_cp50220; -extern const mbfl_encoding mbfl_encoding_cp50221; -extern const mbfl_encoding mbfl_encoding_cp50222; - -extern const struct mbfl_convert_vtbl vtbl_cp50220_wchar; -extern const struct mbfl_convert_vtbl vtbl_wchar_cp50220; -extern const struct mbfl_convert_vtbl vtbl_cp50221_wchar; -extern const struct mbfl_convert_vtbl vtbl_wchar_cp50221; -extern const struct mbfl_convert_vtbl vtbl_cp50222_wchar; -extern const struct mbfl_convert_vtbl vtbl_wchar_cp50222; - -int mbfl_filt_conv_cp5022x_wchar(int c, mbfl_convert_filter *filter); -int mbfl_filt_conv_wchar_cp50221(int c, mbfl_convert_filter *filter); -int mbfl_filt_conv_wchar_cp50222(int c, mbfl_convert_filter *filter); - -#endif /* MBFL_MBFILTER_CP50221_h */ diff --git a/ext/mbstring/libmbfl/filters/mbfilter_cp51932.c b/ext/mbstring/libmbfl/filters/mbfilter_cp51932.c deleted file mode 100644 index d3aae8b10f56e..0000000000000 --- a/ext/mbstring/libmbfl/filters/mbfilter_cp51932.c +++ /dev/null @@ -1,412 +0,0 @@ -/* - * "streamable kanji code filter and converter" - * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. - * - * LICENSE NOTICES - * - * This file is part of "streamable kanji code filter and converter", - * which is distributed under the terms of GNU Lesser General Public - * License (version 2) as published by the Free Software Foundation. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with "streamable kanji code filter and converter"; - * if not, write to the Free Software Foundation, Inc., 59 Temple Place, - * Suite 330, Boston, MA 02111-1307 USA - * - * The author of this file: - * - */ -/* - * The source code included in this files was separated from mbfilter_ja.c - * by moriyoshi koizumi on 4 dec 2002. - * - */ - -#include "mbfilter.h" -#include "mbfilter_cp51932.h" - -#include "unicode_table_cp932_ext.h" -#include "unicode_table_jis.h" -#include "cp932_table.h" - -static int mbfl_filt_conv_cp51932_wchar_flush(mbfl_convert_filter *filter); -static size_t mb_cp51932_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state); -static void mb_wchar_to_cp51932(uint32_t *in, size_t len, mb_convert_buf *buf, bool end); - -static const unsigned char mblen_table_eucjp[] = { /* 0xA1-0xFE */ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 -}; - -static const char *mbfl_encoding_cp51932_aliases[] = {"cp51932", NULL}; - -const mbfl_encoding mbfl_encoding_cp51932 = { - mbfl_no_encoding_cp51932, - "CP51932", - "CP51932", - mbfl_encoding_cp51932_aliases, - mblen_table_eucjp, - 0, - &vtbl_cp51932_wchar, - &vtbl_wchar_cp51932, - mb_cp51932_to_wchar, - mb_wchar_to_cp51932, - NULL -}; - -const struct mbfl_convert_vtbl vtbl_cp51932_wchar = { - mbfl_no_encoding_cp51932, - mbfl_no_encoding_wchar, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_cp51932_wchar, - mbfl_filt_conv_cp51932_wchar_flush, - NULL, -}; - -const struct mbfl_convert_vtbl vtbl_wchar_cp51932 = { - mbfl_no_encoding_wchar, - mbfl_no_encoding_cp51932, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_wchar_cp51932, - mbfl_filt_conv_common_flush, - NULL, -}; - -#define CK(statement) do { if ((statement) < 0) return (-1); } while (0) - -/* - * cp51932 => wchar - */ -int -mbfl_filt_conv_cp51932_wchar(int c, mbfl_convert_filter *filter) -{ - int c1, s, w; - - switch (filter->status) { - case 0: - if (c >= 0 && c < 0x80) { /* latin */ - CK((*filter->output_function)(c, filter->data)); - } else if (c >= 0xA1 && c <= 0xFE) { /* CP932, first byte */ - filter->status = 1; - filter->cache = c; - } else if (c == 0x8e) { /* kana first char */ - filter->status = 2; - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - case 1: /* got first half */ - filter->status = 0; - c1 = filter->cache; - if (c > 0xa0 && c < 0xff) { - w = 0; - s = (c1 - 0xa1)*94 + c - 0xa1; - if (s <= 137) { - if (s == 31) { - w = 0xff3c; /* FULLWIDTH REVERSE SOLIDUS */ - } else if (s == 32) { - w = 0xff5e; /* FULLWIDTH TILDE */ - } else if (s == 33) { - w = 0x2225; /* PARALLEL TO */ - } else if (s == 60) { - w = 0xff0d; /* FULLWIDTH HYPHEN-MINUS */ - } else if (s == 80) { - w = 0xffe0; /* FULLWIDTH CENT SIGN */ - } else if (s == 81) { - w = 0xffe1; /* FULLWIDTH POUND SIGN */ - } else if (s == 137) { - w = 0xffe2; /* FULLWIDTH NOT SIGN */ - } - } - if (w == 0) { - if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) { /* vendor ext1 (13ku) */ - w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min]; - } else if (s >= 0 && s < jisx0208_ucs_table_size) { /* X 0208 */ - w = jisx0208_ucs_table[s]; - } else if (s >= cp932ext2_ucs_table_min && s < cp932ext2_ucs_table_max) { /* vendor ext2 (89ku - 92ku) */ - w = cp932ext2_ucs_table[s - cp932ext2_ucs_table_min]; - } - } - if (w <= 0) { - w = MBFL_BAD_INPUT; - } - CK((*filter->output_function)(w, filter->data)); - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - case 2: /* got 0x8e, X0201 kana */ - filter->status = 0; - if (c > 0xa0 && c < 0xe0) { - w = 0xfec0 + c; - CK((*filter->output_function)(w, filter->data)); - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - EMPTY_SWITCH_DEFAULT_CASE(); - } - - return 0; -} - -static int mbfl_filt_conv_cp51932_wchar_flush(mbfl_convert_filter *filter) -{ - if (filter->status) { - /* Input string was truncated */ - (*filter->output_function)(MBFL_BAD_INPUT, filter->data); - filter->status = 0; - } - - if (filter->flush_function) { - (*filter->flush_function)(filter->data); - } - - return 0; -} - -/* - * wchar => cp51932 - */ -int -mbfl_filt_conv_wchar_cp51932(int c, mbfl_convert_filter *filter) -{ - int c1, c2, s1; - - s1 = 0; - if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) { - s1 = ucs_a1_jis_table[c - ucs_a1_jis_table_min]; - } else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) { - s1 = ucs_a2_jis_table[c - ucs_a2_jis_table_min]; - } else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) { - s1 = ucs_i_jis_table[c - ucs_i_jis_table_min]; - } else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) { - s1 = ucs_r_jis_table[c - ucs_r_jis_table_min]; - } - if (s1 >= 0x8080) s1 = -1; /* we don't support JIS X0213 */ - if (s1 <= 0) { - if (c == 0xa5) { /* YEN SIGN */ - s1 = 0x216F; /* FULLWIDTH YEN SIGN */ - } else if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */ - s1 = 0x2140; - } else if (c == 0x2225) { /* PARALLEL TO */ - s1 = 0x2142; - } else if (c == 0xff0d) { /* FULLWIDTH HYPHEN-MINUS */ - s1 = 0x215d; - } else if (c == 0xffe0) { /* FULLWIDTH CENT SIGN */ - s1 = 0x2171; - } else if (c == 0xffe1) { /* FULLWIDTH POUND SIGN */ - s1 = 0x2172; - } else if (c == 0xffe2) { /* FULLWIDTH NOT SIGN */ - s1 = 0x224c; - } else { - s1 = -1; - c1 = 0; - c2 = cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; - while (c1 < c2) { /* CP932 vendor ext1 (13ku) */ - if (c == cp932ext1_ucs_table[c1]) { - s1 = ((c1/94 + 0x2d) << 8) + (c1%94 + 0x21); - break; - } - c1++; - } - if (s1 < 0) { - c1 = 0; - c2 = cp932ext2_ucs_table_max - cp932ext2_ucs_table_min; - while (c1 < c2) { /* CP932 vendor ext3 (115ku - 119ku) */ - if (c == cp932ext2_ucs_table[c1]) { - s1 = ((c1/94 + 0x79) << 8) +(c1%94 + 0x21); - break; - } - c1++; - } - } - } - if (c == 0) { - s1 = 0; - } else if (s1 <= 0) { - s1 = -1; - } - } - - if (s1 >= 0) { - if (s1 < 0x80) { /* latin */ - CK((*filter->output_function)(s1, filter->data)); - } else if (s1 < 0x100) { /* kana */ - CK((*filter->output_function)(0x8e, filter->data)); - CK((*filter->output_function)(s1, filter->data)); - } else if (s1 < 0x8080) { /* X 0208 */ - CK((*filter->output_function)(((s1 >> 8) & 0xff) | 0x80, filter->data)); - CK((*filter->output_function)((s1 & 0xff) | 0x80, filter->data)); - } else { - CK(mbfl_filt_conv_illegal_output(c, filter)); - } - } else { - CK(mbfl_filt_conv_illegal_output(c, filter)); - } - - return 0; -} - -static size_t mb_cp51932_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) -{ - unsigned char *p = *in, *e = p + *in_len; - uint32_t *out = buf, *limit = buf + bufsize; - - while (p < e && out < limit) { - unsigned char c = *p++; - - if (c < 0x80) { - *out++ = c; - } else if (c >= 0xA1 && c <= 0xFE && p < e) { - unsigned char c2 = *p++; - if (c2 >= 0xA1 && c2 <= 0xFE) { - unsigned int s = (c - 0xA1)*94 + c2 - 0xA1, w = 0; - - if (s <= 137) { - if (s == 31) { - w = 0xFF3C; /* FULLWIDTH REVERSE SOLIDUS */ - } else if (s == 32) { - w = 0xFF5E; /* FULLWIDTH TILDE */ - } else if (s == 33) { - w = 0x2225; /* PARALLEL TO */ - } else if (s == 60) { - w = 0xFF0D; /* FULLWIDTH HYPHEN-MINUS */ - } else if (s == 80) { - w = 0xFFE0; /* FULLWIDTH CENT SIGN */ - } else if (s == 81) { - w = 0xFFE1; /* FULLWIDTH POUND SIGN */ - } else if (s == 137) { - w = 0xFFE2; /* FULLWIDTH NOT SIGN */ - } - } - - if (w == 0) { - if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) { - w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min]; - } else if (s < jisx0208_ucs_table_size) { - w = jisx0208_ucs_table[s]; - } else if (s >= cp932ext2_ucs_table_min && s < cp932ext2_ucs_table_max) { - w = cp932ext2_ucs_table[s - cp932ext2_ucs_table_min]; - } - } - - if (!w) - w = MBFL_BAD_INPUT; - *out++ = w; - } else { - *out++ = MBFL_BAD_INPUT; - } - } else if (c == 0x8E && p < e) { - unsigned char c2 = *p++; - if (c2 >= 0xA1 && c2 <= 0xDF) { - *out++ = 0xFEC0 + c2; - } else { - *out++ = MBFL_BAD_INPUT; - } - } else { - *out++ = MBFL_BAD_INPUT; - } - } - - *in_len = e - p; - *in = p; - return out - buf; -} - -static void mb_wchar_to_cp51932(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) -{ - unsigned char *out, *limit; - MB_CONVERT_BUF_LOAD(buf, out, limit); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2); - - while (len--) { - uint32_t w = *in++; - unsigned int s = 0; - - if (w == 0) { - out = mb_convert_buf_add(out, 0); - continue; - } else if (w >= ucs_a1_jis_table_min && w < ucs_a1_jis_table_max) { - s = ucs_a1_jis_table[w - ucs_a1_jis_table_min]; - } else if (w >= ucs_a2_jis_table_min && w < ucs_a2_jis_table_max) { - s = ucs_a2_jis_table[w - ucs_a2_jis_table_min]; - } else if (w >= ucs_i_jis_table_min && w < ucs_i_jis_table_max) { - s = ucs_i_jis_table[w - ucs_i_jis_table_min]; - } else if (w >= ucs_r_jis_table_min && w < ucs_r_jis_table_max) { - s = ucs_r_jis_table[w - ucs_r_jis_table_min]; - } - - if (s >= 0x8080) s = 0; /* We don't support JIS X0213 */ - - if (s == 0) { - if (w == 0xA5) { /* YEN SIGN */ - s = 0x216F; /* FULLWIDTH YEN SIGN */ - } else if (w == 0xFF3C) { /* FULLWIDTH REVERSE SOLIDUS */ - s = 0x2140; - } else if (w == 0x2225) { /* PARALLEL TO */ - s = 0x2142; - } else if (w == 0xFF0D) { /* FULLWIDTH HYPHEN-MINUS */ - s = 0x215D; - } else if (w == 0xFFE0) { /* FULLWIDTH CENT SIGN */ - s = 0x2171; - } else if (w == 0xFFE1) { /* FULLWIDTH POUND SIGN */ - s = 0x2172; - } else if (w == 0xFFE2) { /* FULLWIDTH NOT SIGN */ - s = 0x224C; - } else { - for (int i = 0; i < cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; i++) { - if (cp932ext1_ucs_table[i] == w) { - s = ((i/94 + 0x2D) << 8) + (i%94) + 0x21; - goto found_it; - } - } - - for (int i = 0; i < cp932ext2_ucs_table_max - cp932ext2_ucs_table_min; i++) { - if (cp932ext2_ucs_table[i] == w) { - s = ((i/94 + 0x79) << 8) + (i%94) + 0x21; - goto found_it; - } - } - } -found_it: ; - } - - if (!s || s >= 0x8080) { - MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_cp51932); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2); - } else if (s < 0x80) { - out = mb_convert_buf_add(out, s); - } else if (s < 0x100) { - out = mb_convert_buf_add2(out, 0x8E, s); - } else { - out = mb_convert_buf_add2(out, ((s >> 8) & 0xFF) | 0x80, (s & 0xFF) | 0x80); - } - } - - MB_CONVERT_BUF_STORE(buf, out, limit); -} diff --git a/ext/mbstring/libmbfl/filters/mbfilter_cp932.c b/ext/mbstring/libmbfl/filters/mbfilter_cp932.c deleted file mode 100644 index 506c24393906d..0000000000000 --- a/ext/mbstring/libmbfl/filters/mbfilter_cp932.c +++ /dev/null @@ -1,618 +0,0 @@ -/* - * "streamable kanji code filter and converter" - * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. - * - * LICENSE NOTICES - * - * This file is part of "streamable kanji code filter and converter", - * which is distributed under the terms of GNU Lesser General Public - * License (version 2) as published by the Free Software Foundation. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with "streamable kanji code filter and converter"; - * if not, write to the Free Software Foundation, Inc., 59 Temple Place, - * Suite 330, Boston, MA 02111-1307 USA - * - * The author of this file: - * - */ -/* - * the source code included in this files was separated from mbfilter_ja.c - * by moriyoshi koizumi on 4 dec 2002. - * - */ - -/* CP932 is Microsoft's version of Shift-JIS. - * - * What we call "SJIS-win" is a variant of CP932 which maps U+00A5 - * and U+203E the same way as eucJP-win; namely, instead of mapping - * U+00A5 (YEN SIGN) to 0x5C and U+203E (OVERLINE) to 0x7E, - * these codepoints are mapped to appropriate JIS X 0208 characters. - * - * When converting from Shift-JIS to Unicode, there is no difference - * between CP932 and "SJIS-win". - * - * Additional facts: - * - * • In the libmbfl library which formed the base for mbstring, "CP932" and - * "SJIS-win" were originally aliases. The differing mappings were added in - * December 2002. The libmbfl author later stated that this was done so that - * "CP932" would comply with a certain specification, while "SJIS-win" would - * maintain the existing mappings. He does not remember which specification - * it was. - * • The WHATWG specification for "Shift_JIS" (followed by web browsers) - * agrees with our mappings for "CP932". - * • Microsoft Windows' "best-fit" mappings for CP932 (via the - * WideCharToMultiByte API) convert U+00A5 to 0x5C, which also agrees with - * our mappings for "CP932". - * • glibc's iconv converts U+203E to CP932 0x7E, which again agrees with - * our mappings for "CP932". - * • When converting Shift-JIS to CP932, the conversion goes through Unicode. - * Shift-JIS 0x7E converts to U+203E, so mapping U+203E to 0x7E means that - * 0x7E will go to 0x7E when converting Shift-JIS to CP932. - */ - -#include "mbfilter.h" -#include "mbfilter_cp932.h" - -#include "unicode_table_cp932_ext.h" -#include "unicode_table_jis.h" - -static int mbfl_filt_conv_cp932_wchar_flush(mbfl_convert_filter *filter); -static size_t mb_cp932_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state); -static void mb_wchar_to_cp932(uint32_t *in, size_t len, mb_convert_buf *buf, bool end); -static void mb_wchar_to_sjiswin(uint32_t *in, size_t len, mb_convert_buf *buf, bool end); - -static const unsigned char mblen_table_sjis[] = { /* 0x80-0x9f,0xE0-0xFF */ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 -}; - -static const char *mbfl_encoding_cp932_aliases[] = {"MS932", "Windows-31J", "MS_Kanji", NULL}; -static const char *mbfl_encoding_sjiswin_aliases[] = {"SJIS-ms", "SJIS-open", NULL}; - -const mbfl_encoding mbfl_encoding_cp932 = { - mbfl_no_encoding_cp932, - "CP932", - "Shift_JIS", - mbfl_encoding_cp932_aliases, - mblen_table_sjis, - MBFL_ENCTYPE_GL_UNSAFE, - &vtbl_cp932_wchar, - &vtbl_wchar_cp932, - mb_cp932_to_wchar, - mb_wchar_to_cp932, - NULL -}; - -const struct mbfl_convert_vtbl vtbl_cp932_wchar = { - mbfl_no_encoding_cp932, - mbfl_no_encoding_wchar, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_cp932_wchar, - mbfl_filt_conv_cp932_wchar_flush, - NULL, -}; - -const struct mbfl_convert_vtbl vtbl_wchar_cp932 = { - mbfl_no_encoding_wchar, - mbfl_no_encoding_cp932, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_wchar_cp932, - mbfl_filt_conv_common_flush, - NULL, -}; - -const mbfl_encoding mbfl_encoding_sjiswin = { - mbfl_no_encoding_sjiswin, - "SJIS-win", - "Shift_JIS", - mbfl_encoding_sjiswin_aliases, - mblen_table_sjis, - MBFL_ENCTYPE_GL_UNSAFE, - &vtbl_sjiswin_wchar, - &vtbl_wchar_sjiswin, - mb_cp932_to_wchar, - mb_wchar_to_sjiswin, - NULL -}; - -const struct mbfl_convert_vtbl vtbl_sjiswin_wchar = { - mbfl_no_encoding_sjiswin, - mbfl_no_encoding_wchar, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_cp932_wchar, - mbfl_filt_conv_cp932_wchar_flush, - NULL, -}; - -const struct mbfl_convert_vtbl vtbl_wchar_sjiswin = { - mbfl_no_encoding_wchar, - mbfl_no_encoding_sjiswin, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_wchar_sjiswin, - mbfl_filt_conv_common_flush, - NULL, -}; - -#define CK(statement) do { if ((statement) < 0) return (-1); } while (0) - -#define SJIS_ENCODE(c1,c2,s1,s2) \ - do { \ - s1 = c1; \ - s1--; \ - s1 >>= 1; \ - if ((c1) < 0x5f) { \ - s1 += 0x71; \ - } else { \ - s1 += 0xb1; \ - } \ - s2 = c2; \ - if ((c1) & 1) { \ - if ((c2) < 0x60) { \ - s2--; \ - } \ - s2 += 0x20; \ - } else { \ - s2 += 0x7e; \ - } \ - } while (0) - -#define SJIS_DECODE(c1,c2,s1,s2) \ - do { \ - s1 = c1; \ - if (s1 < 0xa0) { \ - s1 -= 0x81; \ - } else { \ - s1 -= 0xc1; \ - } \ - s1 <<= 1; \ - s1 += 0x21; \ - s2 = c2; \ - if (s2 < 0x9f) { \ - if (s2 < 0x7f) { \ - s2++; \ - } \ - s2 -= 0x20; \ - } else { \ - s1++; \ - s2 -= 0x7e; \ - } \ - } while (0) - -int mbfl_filt_conv_cp932_wchar(int c, mbfl_convert_filter *filter) -{ - int c1, s, s1, s2, w; - - switch (filter->status) { - case 0: - if (c >= 0 && c < 0x80) { /* latin */ - CK((*filter->output_function)(c, filter->data)); - } else if (c > 0xa0 && c < 0xe0) { /* kana */ - CK((*filter->output_function)(0xfec0 + c, filter->data)); - } else if (c > 0x80 && c < 0xfd && c != 0xa0) { /* kanji first char */ - filter->status = 1; - filter->cache = c; - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - case 1: /* kanji second char */ - filter->status = 0; - c1 = filter->cache; - if (c >= 0x40 && c <= 0xfc && c != 0x7f) { - w = 0; - SJIS_DECODE(c1, c, s1, s2); - s = (s1 - 0x21)*94 + s2 - 0x21; - if (s <= 137) { - if (s == 31) { - w = 0xff3c; /* FULLWIDTH REVERSE SOLIDUS */ - } else if (s == 32) { - w = 0xff5e; /* FULLWIDTH TILDE */ - } else if (s == 33) { - w = 0x2225; /* PARALLEL TO */ - } else if (s == 60) { - w = 0xff0d; /* FULLWIDTH HYPHEN-MINUS */ - } else if (s == 80) { - w = 0xffe0; /* FULLWIDTH CENT SIGN */ - } else if (s == 81) { - w = 0xffe1; /* FULLWIDTH POUND SIGN */ - } else if (s == 137) { - w = 0xffe2; /* FULLWIDTH NOT SIGN */ - } - } - if (w == 0) { - if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) { /* vendor ext1 (13ku) */ - w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min]; - } else if (s >= 0 && s < jisx0208_ucs_table_size) { /* X 0208 */ - w = jisx0208_ucs_table[s]; - } else if (s >= cp932ext2_ucs_table_min && s < cp932ext2_ucs_table_max) { /* vendor ext2 (89ku - 92ku) */ - w = cp932ext2_ucs_table[s - cp932ext2_ucs_table_min]; - } else if (s >= cp932ext3_ucs_table_min && s < cp932ext3_ucs_table_max) { /* vendor ext3 (115ku - 119ku) */ - w = cp932ext3_ucs_table[s - cp932ext3_ucs_table_min]; - } else if (s >= (94*94) && s < (114*94)) { /* user (95ku - 114ku) */ - w = s - (94*94) + 0xe000; - } - } - - if (w <= 0) { - w = MBFL_BAD_INPUT; - } - - CK((*filter->output_function)(w, filter->data)); - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - EMPTY_SWITCH_DEFAULT_CASE(); - } - - return 0; -} - -static int mbfl_filt_conv_cp932_wchar_flush(mbfl_convert_filter *filter) -{ - if (filter->status) { - (*filter->output_function)(MBFL_BAD_INPUT, filter->data); - filter->status = 0; - } - - if (filter->flush_function) { - (*filter->flush_function)(filter->data); - } - - return 0; -} - -int mbfl_filt_conv_wchar_cp932(int c, mbfl_convert_filter *filter) -{ - int c1, c2, s1, s2; - - s1 = 0; - s2 = 0; - if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) { - s1 = ucs_a1_jis_table[c - ucs_a1_jis_table_min]; - } else if (c == 0x203E) { - s1 = 0x7E; - } else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) { - s1 = ucs_a2_jis_table[c - ucs_a2_jis_table_min]; - } else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) { - s1 = ucs_i_jis_table[c - ucs_i_jis_table_min]; - } else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) { - s1 = ucs_r_jis_table[c - ucs_r_jis_table_min]; - } else if (c >= 0xe000 && c < (0xe000 + 20*94)) { /* user (95ku - 114ku) */ - s1 = c - 0xe000; - c1 = s1/94 + 0x7f; - c2 = s1%94 + 0x21; - s1 = (c1 << 8) | c2; - s2 = 1; - } - if (s1 <= 0) { - if (c == 0xa5) { /* YEN SIGN */ - s1 = 0x5C; - } else if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */ - s1 = 0x2140; - } else if (c == 0x2225) { /* PARALLEL TO */ - s1 = 0x2142; - } else if (c == 0xff0d) { /* FULLWIDTH HYPHEN-MINUS */ - s1 = 0x215d; - } else if (c == 0xffe0) { /* FULLWIDTH CENT SIGN */ - s1 = 0x2171; - } else if (c == 0xffe1) { /* FULLWIDTH POUND SIGN */ - s1 = 0x2172; - } else if (c == 0xffe2) { /* FULLWIDTH NOT SIGN */ - s1 = 0x224c; - } - } - if ((s1 <= 0) || (s1 >= 0x8080 && s2 == 0)) { /* not found or X 0212 */ - s1 = -1; - c1 = 0; - c2 = cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; - while (c1 < c2) { /* CP932 vendor ext1 (13ku) */ - if (c == cp932ext1_ucs_table[c1]) { - s1 = ((c1/94 + 0x2d) << 8) + (c1%94 + 0x21); - break; - } - c1++; - } - if (s1 <= 0) { - c1 = 0; - c2 = cp932ext3_ucs_table_max - cp932ext3_ucs_table_min; - while (c1 < c2) { /* CP932 vendor ext3 (115ku - 119ku) */ - if (c == cp932ext3_ucs_table[c1]) { - s1 = ((c1/94 + 0x93) << 8) + (c1%94 + 0x21); - break; - } - c1++; - } - } - if (c == 0) { - s1 = 0; - } else if (s1 <= 0) { - s1 = -1; - } - } - if (s1 >= 0) { - if (s1 < 0x100) { /* latin or kana */ - CK((*filter->output_function)(s1, filter->data)); - } else { /* kanji */ - c1 = (s1 >> 8) & 0xff; - c2 = s1 & 0xff; - SJIS_ENCODE(c1, c2, s1, s2); - CK((*filter->output_function)(s1, filter->data)); - CK((*filter->output_function)(s2, filter->data)); - } - } else { - CK(mbfl_filt_conv_illegal_output(c, filter)); - } - - return 0; -} - -int mbfl_filt_conv_wchar_sjiswin(int c, mbfl_convert_filter *filter) -{ - if (c == 0xA5) { - CK((*filter->output_function)(0x81, filter->data)); - CK((*filter->output_function)(0x8F, filter->data)); - } else if (c == 0x203E) { - CK((*filter->output_function)(0x81, filter->data)); - CK((*filter->output_function)(0x50, filter->data)); - } else { - return mbfl_filt_conv_wchar_cp932(c, filter); - } - return 0; -} - -static size_t mb_cp932_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) -{ - unsigned char *p = *in, *e = p + *in_len; - uint32_t *out = buf, *limit = buf + bufsize; - - while (p < e && out < limit) { - unsigned char c = *p++; - - if (c < 0x80) { - *out++ = c; - } else if (c > 0xA0 && c < 0xE0) { - /* Kana */ - *out++ = 0xFEC0 + c; - } else if (c > 0x80 && c < 0xFD && c != 0xA0 && p < e) { - unsigned char c2 = *p++; - - if (c2 >= 0x40 && c2 <= 0xFC && c2 != 0x7F) { - unsigned int s1, s2, w = 0; - SJIS_DECODE(c, c2, s1, s2); - unsigned int s = (s1 - 0x21)*94 + s2 - 0x21; - - if (s <= 137) { - if (s == 31) { - w = 0xFF3C; /* FULLWIDTH REVERSE SOLIDUS */ - } else if (s == 32) { - w = 0xFF5E; /* FULLWIDTH TILDE */ - } else if (s == 33) { - w = 0x2225; /* PARALLEL TO */ - } else if (s == 60) { - w = 0xFF0D; /* FULLWIDTH HYPHEN-MINUS */ - } else if (s == 80) { - w = 0xFFE0; /* FULLWIDTH CENT SIGN */ - } else if (s == 81) { - w = 0xFFE1; /* FULLWIDTH POUND SIGN */ - } else if (s == 137) { - w = 0xFFE2; /* FULLWIDTH NOT SIGN */ - } - } - - if (w == 0) { - if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) { - w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min]; - } else if (s < jisx0208_ucs_table_size) { - w = jisx0208_ucs_table[s]; - } else if (s >= cp932ext2_ucs_table_min && s < cp932ext2_ucs_table_max) { - w = cp932ext2_ucs_table[s - cp932ext2_ucs_table_min]; - } else if (s >= cp932ext3_ucs_table_min && s < cp932ext3_ucs_table_max) { - w = cp932ext3_ucs_table[s - cp932ext3_ucs_table_min]; - } else if (s >= (94*94) && s < (114*94)) { - w = s - (94*94) + 0xE000; - } - } - - if (!w) - w = MBFL_BAD_INPUT; - *out++ = w; - } else { - *out++ = MBFL_BAD_INPUT; - } - } else { - *out++ = MBFL_BAD_INPUT; - } - } - - *in_len = e - p; - *in = p; - return out - buf; -} - -static void mb_wchar_to_cp932(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) -{ - unsigned char *out, *limit; - MB_CONVERT_BUF_LOAD(buf, out, limit); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2); - - while (len--) { - uint32_t w = *in++; - unsigned int s1 = 0, s2 = 0, c1, c2; - - if (w >= ucs_a1_jis_table_min && w < ucs_a1_jis_table_max) { - s1 = ucs_a1_jis_table[w - ucs_a1_jis_table_min]; - } else if (w == 0x203E) { - s1 = 0x7E; - } else if (w >= ucs_a2_jis_table_min && w < ucs_a2_jis_table_max) { - s1 = ucs_a2_jis_table[w - ucs_a2_jis_table_min]; - } else if (w >= ucs_i_jis_table_min && w < ucs_i_jis_table_max) { - s1 = ucs_i_jis_table[w - ucs_i_jis_table_min]; - } else if (w >= ucs_r_jis_table_min && w < ucs_r_jis_table_max) { - s1 = ucs_r_jis_table[w - ucs_r_jis_table_min]; - } else if (w >= 0xE000 && w < (0xE000 + 20*94)) { - s1 = w - 0xE000; - c1 = s1/94 + 0x7F; - c2 = s1%94 + 0x21; - s1 = (c1 << 8) | c2; - s2 = 1; - } - - if (w == 0xA5) { /* YEN SIGN */ - s1 = 0x5C; - } else if (w == 0xFF3C) { /* FULLWIDTH REVERSE SOLIDUS */ - s1 = 0x2140; - } else if (w == 0x2225) { /* PARALLEL TO */ - s1 = 0x2142; - } else if (w == 0xFF0D) { /* FULLWIDTH HYPHEN-MINUS */ - s1 = 0x215D; - } else if (w == 0xFFE0) { /* FULLWIDTH CENT SIGN */ - s1 = 0x2171; - } else if (w == 0xFFE1) { /* FULLWIDTH POUND SIGN */ - s1 = 0x2172; - } else if (w == 0xFFE2) { /* FULLWIDTH NOT SIGN */ - s1 = 0x224C; - } else if (w == 0) { - out = mb_convert_buf_add(out, 0); - continue; - } - - if (!s1 || (s1 >= 0x8080 && !s2)) { /* not found or X 0212 */ - for (unsigned int i = 0; i < cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; i++) { - if (cp932ext1_ucs_table[i] == w) { - s1 = ((i/94 + 0x2D) << 8) + (i%94 + 0x21); - goto emit_output; - } - } - - for (unsigned int i = 0; i < cp932ext3_ucs_table_max - cp932ext3_ucs_table_min; i++) { - if (cp932ext3_ucs_table[i] == w) { - s1 = ((i/94 + 0x93) << 8) + (i%94 + 0x21); - goto emit_output; - } - } - - MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_cp932); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2); - continue; - } - -emit_output: - if (s1 < 0x100) { - out = mb_convert_buf_add(out, s1); - } else { - c1 = (s1 >> 8) & 0xFF; - c2 = s1 & 0xFF; - SJIS_ENCODE(c1, c2, s1, s2); - out = mb_convert_buf_add2(out, s1, s2); - } - } - - MB_CONVERT_BUF_STORE(buf, out, limit); -} - -static void mb_wchar_to_sjiswin(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) -{ - unsigned char *out, *limit; - MB_CONVERT_BUF_LOAD(buf, out, limit); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2); - - while (len--) { - uint32_t w = *in++; - unsigned int s1 = 0, s2 = 0, c1, c2; - - if (w >= ucs_a1_jis_table_min && w < ucs_a1_jis_table_max) { - s1 = ucs_a1_jis_table[w - ucs_a1_jis_table_min]; - } else if (w >= ucs_a2_jis_table_min && w < ucs_a2_jis_table_max) { - s1 = ucs_a2_jis_table[w - ucs_a2_jis_table_min]; - } else if (w >= ucs_i_jis_table_min && w < ucs_i_jis_table_max) { - s1 = ucs_i_jis_table[w - ucs_i_jis_table_min]; - } else if (w >= ucs_r_jis_table_min && w < ucs_r_jis_table_max) { - s1 = ucs_r_jis_table[w - ucs_r_jis_table_min]; - } else if (w >= 0xE000 && w < (0xE000 + 20*94)) { - s1 = w - 0xE000; - c1 = s1/94 + 0x7F; - c2 = s1%94 + 0x21; - s1 = (c1 << 8) | c2; - s2 = 1; - } - - if (w == 0xA5) { /* YEN SIGN */ - s1 = 0x216F; /* FULLWIDTH YEN SIGN */ - } else if (w == 0xFF3C) { /* FULLWIDTH REVERSE SOLIDUS */ - s1 = 0x2140; - } else if (w == 0x2225) { /* PARALLEL TO */ - s1 = 0x2142; - } else if (w == 0xFF0D) { /* FULLWIDTH HYPHEN-MINUS */ - s1 = 0x215D; - } else if (w == 0xFFE0) { /* FULLWIDTH CENT SIGN */ - s1 = 0x2171; - } else if (w == 0xFFE1) { /* FULLWIDTH POUND SIGN */ - s1 = 0x2172; - } else if (w == 0xFFE2) { /* FULLWIDTH NOT SIGN */ - s1 = 0x224C; - } else if (w == 0) { - out = mb_convert_buf_add(out, 0); - continue; - } - - if (!s1 || (s1 >= 0x8080 && !s2)) { /* not found or X 0212 */ - for (unsigned int i = 0; i < cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; i++) { - if (cp932ext1_ucs_table[i] == w) { - s1 = ((i/94 + 0x2D) << 8) + (i%94 + 0x21); - goto emit_output; - } - } - - for (unsigned int i = 0; i < cp932ext3_ucs_table_max - cp932ext3_ucs_table_min; i++) { - if (cp932ext3_ucs_table[i] == w) { - s1 = ((i/94 + 0x93) << 8) + (i%94 + 0x21); - goto emit_output; - } - } - - MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_cp932); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2); - continue; - } - -emit_output: - if (s1 < 0x100) { - out = mb_convert_buf_add(out, s1); - } else { - c1 = (s1 >> 8) & 0xFF; - c2 = s1 & 0xFF; - SJIS_ENCODE(c1, c2, s1, s2); - out = mb_convert_buf_add2(out, s1, s2); - } - } - - MB_CONVERT_BUF_STORE(buf, out, limit); -} diff --git a/ext/mbstring/libmbfl/filters/mbfilter_cp932.h b/ext/mbstring/libmbfl/filters/mbfilter_cp932.h deleted file mode 100644 index 8dce3ab9e91d8..0000000000000 --- a/ext/mbstring/libmbfl/filters/mbfilter_cp932.h +++ /dev/null @@ -1,47 +0,0 @@ -/* - * "streamable kanji code filter and converter" - * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. - * - * LICENSE NOTICES - * - * This file is part of "streamable kanji code filter and converter", - * which is distributed under the terms of GNU Lesser General Public - * License (version 2) as published by the Free Software Foundation. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with "streamable kanji code filter and converter"; - * if not, write to the Free Software Foundation, Inc., 59 Temple Place, - * Suite 330, Boston, MA 02111-1307 USA - * - * The author of this file: - * - */ -/* - * the source code included in this files was separated from mbfilter_ja.c - * by moriyoshi koizumi on 4 dec 2002. - * - */ - -#ifndef MBFL_MBFILTER_CP932_H -#define MBFL_MBFILTER_CP932_H - -#include "mbfilter.h" - -extern const mbfl_encoding mbfl_encoding_cp932; -extern const struct mbfl_convert_vtbl vtbl_cp932_wchar; -extern const struct mbfl_convert_vtbl vtbl_wchar_cp932; - -extern const mbfl_encoding mbfl_encoding_sjiswin; -extern const struct mbfl_convert_vtbl vtbl_sjiswin_wchar; -extern const struct mbfl_convert_vtbl vtbl_wchar_sjiswin; - -int mbfl_filt_conv_cp932_wchar(int c, mbfl_convert_filter *filter); -int mbfl_filt_conv_wchar_cp932(int c, mbfl_convert_filter *filter); -int mbfl_filt_conv_wchar_sjiswin(int c, mbfl_convert_filter *filter); - -#endif /* MBFL_MBFILTER_CP932_H */ diff --git a/ext/mbstring/libmbfl/filters/mbfilter_cp936.c b/ext/mbstring/libmbfl/filters/mbfilter_cp936.c deleted file mode 100644 index ba3e6c6436708..0000000000000 --- a/ext/mbstring/libmbfl/filters/mbfilter_cp936.c +++ /dev/null @@ -1,439 +0,0 @@ -/* - * "streamable kanji code filter and converter" - * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. - * - * LICENSE NOTICES - * - * This file is part of "streamable kanji code filter and converter", - * which is distributed under the terms of GNU Lesser General Public - * License (version 2) as published by the Free Software Foundation. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with "streamable kanji code filter and converter"; - * if not, write to the Free Software Foundation, Inc., 59 Temple Place, - * Suite 330, Boston, MA 02111-1307 USA - * - * The author of this file: - * - */ -/* - * the source code included in this file was separated from mbfilter_cn.c - * by moriyoshi koizumi on 4 dec 2002. - * - */ - -#include "mbfilter.h" -#include "mbfilter_cp936.h" -#define UNICODE_TABLE_CP936_DEF -#include "unicode_table_cp936.h" - -static int mbfl_filt_conv_cp936_wchar_flush(mbfl_convert_filter *filter); -static size_t mb_cp936_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state); -static void mb_wchar_to_cp936(uint32_t *in, size_t len, mb_convert_buf *buf, bool end); - - -static const unsigned char mblen_table_cp936[] = { /* 0x81-0xFE */ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 -}; - -static const char *mbfl_encoding_cp936_aliases[] = {"CP-936", "GBK", NULL}; - -const mbfl_encoding mbfl_encoding_cp936 = { - mbfl_no_encoding_cp936, - "CP936", - "CP936", - mbfl_encoding_cp936_aliases, - mblen_table_cp936, - MBFL_ENCTYPE_GL_UNSAFE, - &vtbl_cp936_wchar, - &vtbl_wchar_cp936, - mb_cp936_to_wchar, - mb_wchar_to_cp936, - NULL -}; - -const struct mbfl_convert_vtbl vtbl_cp936_wchar = { - mbfl_no_encoding_cp936, - mbfl_no_encoding_wchar, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_cp936_wchar, - mbfl_filt_conv_cp936_wchar_flush, - NULL, -}; - -const struct mbfl_convert_vtbl vtbl_wchar_cp936 = { - mbfl_no_encoding_wchar, - mbfl_no_encoding_cp936, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_wchar_cp936, - mbfl_filt_conv_common_flush, - NULL, -}; - - -#define CK(statement) do { if ((statement) < 0) return (-1); } while (0) - -int mbfl_filt_conv_cp936_wchar(int c, mbfl_convert_filter *filter) -{ - int k; - int c1, c2, w = -1; - - switch (filter->status) { - case 0: - if (c >= 0 && c < 0x80) { /* latin */ - CK((*filter->output_function)(c, filter->data)); - } else if (c == 0x80) { /* euro sign */ - CK((*filter->output_function)(0x20ac, filter->data)); - } else if (c < 0xff) { /* dbcs lead byte */ - filter->status = 1; - filter->cache = c; - } else { /* 0xff */ - CK((*filter->output_function)(0xf8f5, filter->data)); - } - break; - - case 1: /* dbcs second byte */ - filter->status = 0; - c1 = filter->cache; - - if (((c1 >= 0xaa && c1 <= 0xaf) || (c1 >= 0xf8 && c1 <= 0xfe)) && - (c >= 0xa1 && c <= 0xfe)) { - /* UDA part1,2: U+E000-U+E4C5 */ - w = 94*(c1 >= 0xf8 ? c1 - 0xf2 : c1 - 0xaa) + (c - 0xa1) + 0xe000; - CK((*filter->output_function)(w, filter->data)); - } else if (c1 >= 0xa1 && c1 <= 0xa7 && c >= 0x40 && c < 0xa1 && c != 0x7f) { - /* UDA part3 : U+E4C6-U+E765*/ - w = 96*(c1 - 0xa1) + c - (c >= 0x80 ? 0x41 : 0x40) + 0xe4c6; - CK((*filter->output_function)(w, filter->data)); - } - - c2 = (c1 << 8) | c; - - if (w <= 0 && - ((c2 >= 0xa2ab && c2 <= 0xa9f0 + (0xe80f-0xe801)) || - (c2 >= 0xd7fa && c2 <= 0xd7fa + (0xe814-0xe810)) || - (c2 >= 0xfe50 && c2 <= 0xfe80 + (0xe864-0xe844)))) { - for (k = 0; k < mbfl_cp936_pua_tbl_max; k++) { - if (c2 >= mbfl_cp936_pua_tbl[k][2] && - c2 <= mbfl_cp936_pua_tbl[k][2] + - mbfl_cp936_pua_tbl[k][1] - mbfl_cp936_pua_tbl[k][0]) { - w = c2 - mbfl_cp936_pua_tbl[k][2] + mbfl_cp936_pua_tbl[k][0]; - CK((*filter->output_function)(w, filter->data)); - break; - } - } - } - - if (w <= 0) { - if (c1 < 0xff && c1 > 0x80 && c >= 0x40 && c < 0xff && c != 0x7f) { - w = (c1 - 0x81)*192 + c - 0x40; - ZEND_ASSERT(w < cp936_ucs_table_size); - CK((*filter->output_function)(cp936_ucs_table[w], filter->data)); - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - } - break; - - EMPTY_SWITCH_DEFAULT_CASE(); - } - - return 0; -} - -static int mbfl_filt_conv_cp936_wchar_flush(mbfl_convert_filter *filter) -{ - if (filter->status) { - /* 2-byte character was truncated */ - filter->status = 0; - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - - if (filter->flush_function) { - (*filter->flush_function)(filter->data); - } - - return 0; -} - -int mbfl_filt_conv_wchar_cp936(int c, mbfl_convert_filter *filter) -{ - int k, k1, k2; - int c1, s = 0; - - if (c >= ucs_a1_cp936_table_min && c < ucs_a1_cp936_table_max) { - /* U+0000 - U+0451 */ - s = ucs_a1_cp936_table[c - ucs_a1_cp936_table_min]; - } else if (c >= ucs_a2_cp936_table_min && c < ucs_a2_cp936_table_max) { - /* U+2000 - U+26FF */ - if (c == 0x203e) { - s = 0xa3fe; - } else if (c == 0x2218) { - s = 0xa1e3; - } else if (c == 0x223c) { - s = 0xa1ab; - } else { - s = ucs_a2_cp936_table[c - ucs_a2_cp936_table_min]; - } - } else if (c >= ucs_a3_cp936_table_min && c < ucs_a3_cp936_table_max) { - /* U+2F00 - U+33FF */ - s = ucs_a3_cp936_table[c - ucs_a3_cp936_table_min]; - } else if (c >= ucs_i_cp936_table_min && c < ucs_i_cp936_table_max) { - /* U+4D00-9FFF CJK Unified Ideographs (+ Extension A) */ - s = ucs_i_cp936_table[c - ucs_i_cp936_table_min]; - } else if (c >= 0xe000 && c <= 0xe864) { /* PUA */ - if (c < 0xe766) { - if (c < 0xe4c6) { - c1 = c - 0xe000; - s = (c1 % 94) + 0xa1; c1 /= 94; - s |= (c1 < 0x06 ? c1 + 0xaa : c1 + 0xf2) << 8; - } else { - c1 = c - 0xe4c6; - s = ((c1 / 96) + 0xa1) << 8; c1 %= 96; - s |= c1 + (c1 >= 0x3f ? 0x41 : 0x40); - } - } else { - /* U+E766..U+E864 */ - k1 = 0; k2 = mbfl_cp936_pua_tbl_max; - while (k1 < k2) { - k = (k1 + k2) >> 1; - if (c < mbfl_cp936_pua_tbl[k][0]) { - k2 = k; - } else if (c > mbfl_cp936_pua_tbl[k][1]) { - k1 = k + 1; - } else { - s = c - mbfl_cp936_pua_tbl[k][0] + mbfl_cp936_pua_tbl[k][2]; - break; - } - } - } - } else if (c == 0xf8f5) { - s = 0xff; - } else if (c >= ucs_ci_cp936_table_min && c < ucs_ci_cp936_table_max) { - /* U+F900-FA2F CJK Compatibility Ideographs */ - s = ucs_ci_cp936_table[c - ucs_ci_cp936_table_min]; - } else if (c >= ucs_cf_cp936_table_min && c < ucs_cf_cp936_table_max) { - s = ucs_cf_cp936_table[c - ucs_cf_cp936_table_min]; - } else if (c >= ucs_sfv_cp936_table_min && c < ucs_sfv_cp936_table_max) { - s = ucs_sfv_cp936_table[c - ucs_sfv_cp936_table_min]; /* U+FE50-FE6F Small Form Variants */ - } else if (c >= ucs_hff_cp936_table_min && c < ucs_hff_cp936_table_max) { - /* U+FF00-FFFF HW/FW Forms */ - if (c == 0xff04) { - s = 0xa1e7; - } else if (c == 0xff5e) { - s = 0xa1ab; - } else if (c >= 0xff01 && c <= 0xff5d) { - s = c - 0xff01 + 0xa3a1; - } else if (c >= 0xffe0 && c <= 0xffe5) { - s = ucs_hff_s_cp936_table[c-0xffe0]; - } - } - - if (s <= 0) { - if (c == 0) { - s = 0; - } else if (s <= 0) { - s = -1; - } - } - - if (s >= 0) { - if (s <= 0x80 || s == 0xff) { /* latin */ - CK((*filter->output_function)(s, filter->data)); - } else { - CK((*filter->output_function)((s >> 8) & 0xff, filter->data)); - CK((*filter->output_function)(s & 0xff, filter->data)); - } - } else { - CK(mbfl_filt_conv_illegal_output(c, filter)); - } - - return 0; -} - -static size_t mb_cp936_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) -{ - unsigned char *p = *in, *e = p + *in_len; - uint32_t *out = buf, *limit = buf + bufsize; - - while (p < e && out < limit) { - unsigned char c = *p++; - - if (c < 0x80) { - *out++ = c; - } else if (c == 0x80) { - *out++ = 0x20AC; /* Euro sign */ - } else if (c < 0xFF) { - if (p >= e) { - *out++ = MBFL_BAD_INPUT; - continue; - } - - unsigned char c2 = *p++; - if (c2 < 0x40 || c2 == 0x7F || c2 == 0xFF) { - *out++ = MBFL_BAD_INPUT; - continue; - } - - if (((c >= 0xAA && c <= 0xAF) || (c >= 0xF8 && c <= 0xFE)) && c2 >= 0xA1) { - /* UDA part 1, 2: U+E000-U+E4C5 */ - *out++ = 94*(c >= 0xF8 ? c - 0xF2 : c - 0xAA) + (c2 - 0xA1) + 0xE000; - } else if (c >= 0xA1 && c <= 0xA7 && c2 < 0xA1) { - /* UDA part 3: U+E4C6-U+E765*/ - *out++ = 96*(c - 0xA1) + c2 - (c2 >= 0x80 ? 0x41 : 0x40) + 0xE4C6; - } else { - unsigned int w = (c - 0x81)*192 + c2 - 0x40; /* Convert c, c2 into GB 2312 table lookup index */ - - /* For CP936 and GB18030, certain GB 2312 byte combinations are mapped to PUA codepoints, - * whereas the same combinations aren't mapped to any codepoint for HZ and EUC-CN - * To avoid duplicating the entire GB 2312 -> Unicode lookup table, we have three - * auxiliary tables which are consulted instead for specific ranges of lookup indices */ - if (w >= 0x192B) { - if (w <= 0x1EBE) { - *out++ = cp936_pua_tbl1[w - 0x192B]; - continue; - } else if (w >= 0x413A) { - if (w <= 0x413E) { - *out++ = cp936_pua_tbl2[w - 0x413A]; - continue; - } else if (w >= 0x5DD0 && w <= 0x5E20) { - *out++ = cp936_pua_tbl3[w - 0x5DD0]; - continue; - } - } - } - - ZEND_ASSERT(w < cp936_ucs_table_size); - *out++ = cp936_ucs_table[w]; - } - } else { - *out++ = 0xF8F5; - } - } - - *in_len = e - p; - *in = p; - return out - buf; -} - -static void mb_wchar_to_cp936(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) -{ - unsigned char *out, *limit; - MB_CONVERT_BUF_LOAD(buf, out, limit); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2); - - while (len--) { - uint32_t w = *in++; - unsigned int s = 0; - - if (w >= ucs_a1_cp936_table_min && w < ucs_a1_cp936_table_max) { - /* U+0000-U+0451 */ - s = ucs_a1_cp936_table[w - ucs_a1_cp936_table_min]; - } else if (w >= ucs_a2_cp936_table_min && w < ucs_a2_cp936_table_max) { - /* U+2000-U+26FF */ - if (w == 0x203E) { - s = 0xA3FE; - } else if (w == 0x2218) { - s = 0xA1E3; - } else if (w == 0x223C) { - s = 0xA1AB; - } else { - s = ucs_a2_cp936_table[w - ucs_a2_cp936_table_min]; - } - } else if (w >= ucs_a3_cp936_table_min && w < ucs_a3_cp936_table_max) { - /* U+2F00-U+33FF */ - s = ucs_a3_cp936_table[w - ucs_a3_cp936_table_min]; - } else if (w >= ucs_i_cp936_table_min && w < ucs_i_cp936_table_max) { - /* U+4D00-9FFF CJK Unified Ideographs (+ Extension A) */ - s = ucs_i_cp936_table[w - ucs_i_cp936_table_min]; - } else if (w >= 0xE000 && w <= 0xE864) { - /* PUA */ - if (w < 0xe766) { - if (w < 0xe4c6) { - unsigned int c1 = w - 0xE000; - s = (c1 % 94) + 0xA1; - c1 /= 94; - s |= (c1 < 0x6 ? c1 + 0xAA : c1 + 0xF2) << 8; - } else { - unsigned int c1 = w - 0xE4C6; - s = ((c1 / 96) + 0xA1) << 8; - c1 %= 96; - s |= c1 + (c1 >= 0x3F ? 0x41 : 0x40); - } - } else { - /* U+E766-U+E864 */ - unsigned int k1 = 0; - unsigned int k2 = mbfl_cp936_pua_tbl_max; - while (k1 < k2) { - int k = (k1 + k2) >> 1; - if (w < mbfl_cp936_pua_tbl[k][0]) { - k2 = k; - } else if (w > mbfl_cp936_pua_tbl[k][1]) { - k1 = k + 1; - } else { - s = w - mbfl_cp936_pua_tbl[k][0] + mbfl_cp936_pua_tbl[k][2]; - break; - } - } - } - } else if (w == 0xF8F5) { - s = 0xFF; - } else if (w >= ucs_ci_cp936_table_min && w < ucs_ci_cp936_table_max) { - /* U+F900-U+FA2F CJK Compatibility Ideographs */ - s = ucs_ci_cp936_table[w - ucs_ci_cp936_table_min]; - } else if (w >= ucs_cf_cp936_table_min && w < ucs_cf_cp936_table_max) { - s = ucs_cf_cp936_table[w - ucs_cf_cp936_table_min]; - } else if (w >= ucs_sfv_cp936_table_min && w < ucs_sfv_cp936_table_max) { - /* U+FE50-U+FE6F Small Form Variants */ - s = ucs_sfv_cp936_table[w - ucs_sfv_cp936_table_min]; - } else if (w >= ucs_hff_cp936_table_min && w < ucs_hff_cp936_table_max) { - /* U+FF00-U+FFFF HW/FW Forms */ - if (w == 0xFF04) { - s = 0xA1E7; - } else if (w == 0xFF5E) { - s = 0xA1AB; - } else if (w >= 0xFF01 && w <= 0xFF5D) { - s = w - 0xFF01 + 0xA3A1; - } else if (w >= 0xFFE0 && w <= 0xFFE5) { - s = ucs_hff_s_cp936_table[w - 0xFFE0]; - } - } - - if (!s) { - if (w == 0) { - out = mb_convert_buf_add(out, 0); - } else { - MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_cp936); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2); - } - } else if (s <= 0x80 || s == 0xFF) { - out = mb_convert_buf_add(out, s); - } else { - out = mb_convert_buf_add2(out, (s >> 8) & 0xFF, s & 0xFF); - } - } - - MB_CONVERT_BUF_STORE(buf, out, limit); -} diff --git a/ext/mbstring/libmbfl/filters/mbfilter_cp936.h b/ext/mbstring/libmbfl/filters/mbfilter_cp936.h deleted file mode 100644 index d10391f5d2201..0000000000000 --- a/ext/mbstring/libmbfl/filters/mbfilter_cp936.h +++ /dev/null @@ -1,42 +0,0 @@ -/* - * "streamable kanji code filter and converter" - * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. - * - * LICENSE NOTICES - * - * This file is part of "streamable kanji code filter and converter", - * which is distributed under the terms of GNU Lesser General Public - * License (version 2) as published by the Free Software Foundation. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with "streamable kanji code filter and converter"; - * if not, write to the Free Software Foundation, Inc., 59 Temple Place, - * Suite 330, Boston, MA 02111-1307 USA - * - * The author of this file: - * - */ -/* - * the source code included in this files was separated from mbfilter_cn.h - * by moriyoshi koizumi on 4 dec 2002. - * - */ - -#ifndef MBFL_MBFILTER_CP936_H -#define MBFL_MBFILTER_CP936_H - -#include "mbfilter.h" - -extern const mbfl_encoding mbfl_encoding_cp936; -extern const struct mbfl_convert_vtbl vtbl_cp936_wchar; -extern const struct mbfl_convert_vtbl vtbl_wchar_cp936; - -int mbfl_filt_conv_cp936_wchar(int c, mbfl_convert_filter *filter); -int mbfl_filt_conv_wchar_cp936(int c, mbfl_convert_filter *filter); - -#endif /* MBFL_MBFILTER_CP936_H */ diff --git a/ext/mbstring/libmbfl/filters/mbfilter_euc_cn.c b/ext/mbstring/libmbfl/filters/mbfilter_euc_cn.c deleted file mode 100644 index d8181d7f7c30d..0000000000000 --- a/ext/mbstring/libmbfl/filters/mbfilter_euc_cn.c +++ /dev/null @@ -1,326 +0,0 @@ -/* - * "streamable kanji code filter and converter" - * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. - * - * LICENSE NOTICES - * - * This file is part of "streamable kanji code filter and converter", - * which is distributed under the terms of GNU Lesser General Public - * License (version 2) as published by the Free Software Foundation. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with "streamable kanji code filter and converter"; - * if not, write to the Free Software Foundation, Inc., 59 Temple Place, - * Suite 330, Boston, MA 02111-1307 USA - * - * The author of this file: - * - */ -/* - * The source code included in this file was separated from mbfilter_cn.c - * by Moriyoshi Koizumi on 4 Dec 2002. - * - */ - -#include "mbfilter.h" -#include "mbfilter_euc_cn.h" - -#include "unicode_table_cp936.h" - -static int mbfl_filt_conv_euccn_wchar_flush(mbfl_convert_filter *filter); -static size_t mb_euccn_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state); -static void mb_wchar_to_euccn(uint32_t *in, size_t len, mb_convert_buf *buf, bool end); - -static const unsigned char mblen_table_euccn[] = { /* 0xA1-0xFE */ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 -}; - -static const char *mbfl_encoding_euc_cn_aliases[] = {"CN-GB", "EUC_CN", "eucCN", "x-euc-cn", "gb2312", NULL}; - -const mbfl_encoding mbfl_encoding_euc_cn = { - mbfl_no_encoding_euc_cn, - "EUC-CN", - "CN-GB", - mbfl_encoding_euc_cn_aliases, - mblen_table_euccn, - 0, - &vtbl_euccn_wchar, - &vtbl_wchar_euccn, - mb_euccn_to_wchar, - mb_wchar_to_euccn, - NULL -}; - -const struct mbfl_convert_vtbl vtbl_euccn_wchar = { - mbfl_no_encoding_euc_cn, - mbfl_no_encoding_wchar, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_euccn_wchar, - mbfl_filt_conv_euccn_wchar_flush, - NULL, -}; - -const struct mbfl_convert_vtbl vtbl_wchar_euccn = { - mbfl_no_encoding_wchar, - mbfl_no_encoding_euc_cn, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_wchar_euccn, - mbfl_filt_conv_common_flush, - NULL, -}; - -#define CK(statement) do { if ((statement) < 0) return (-1); } while (0) - -int mbfl_filt_conv_euccn_wchar(int c, mbfl_convert_filter *filter) -{ - int c1, w; - - switch (filter->status) { - case 0: - if (c >= 0 && c < 0x80) { /* latin */ - CK((*filter->output_function)(c, filter->data)); - } else if ((c >= 0xA1 && c <= 0xA9) || (c >= 0xB0 && c <= 0xF7)) { /* dbcs lead byte */ - filter->status = 1; - filter->cache = c; - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - case 1: /* dbcs second byte */ - filter->status = 0; - c1 = filter->cache; - if (c > 0xA0 && c < 0xFF) { - w = (c1 - 0x81)*192 + c - 0x40; - ZEND_ASSERT(w < cp936_ucs_table_size); - if (w == 0x1864) { - w = 0x30FB; - } else if (w == 0x186A) { - w = 0x2015; - } else if ((w >= 0x1921 && w <= 0x192A) || w == 0x1963 || (w >= 0x1C59 && w <= 0x1C7E) || (w >= 0x1DBB && w <= 0x1DC4)) { - w = 0; - } else { - w = cp936_ucs_table[w]; - } - - if (w <= 0) { - w = MBFL_BAD_INPUT; - } - - CK((*filter->output_function)(w, filter->data)); - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - EMPTY_SWITCH_DEFAULT_CASE(); - } - - return 0; -} - -int mbfl_filt_conv_wchar_euccn(int c, mbfl_convert_filter *filter) -{ - int s = 0; - - if (c >= ucs_a1_cp936_table_min && c < ucs_a1_cp936_table_max) { - if (c == 0xB7 || c == 0x144 || c == 0x148 || c == 0x251 || c == 0x261) { - s = 0; - } else { - s = ucs_a1_cp936_table[c - ucs_a1_cp936_table_min]; - } - } else if (c >= ucs_a2_cp936_table_min && c < ucs_a2_cp936_table_max) { - if (c == 0x2015) { - s = 0xA1AA; - } else if (c == 0x2014 || (c >= 0x2170 && c <= 0x2179)) { - s = 0; - } else { - s = ucs_a2_cp936_table[c - ucs_a2_cp936_table_min]; - } - } else if (c >= ucs_a3_cp936_table_min && c < ucs_a3_cp936_table_max) { - if (c == 0x30FB) { - s = 0xA1A4; - } else { - s = ucs_a3_cp936_table[c - ucs_a3_cp936_table_min]; - } - } else if (c >= ucs_i_cp936_table_min && c < ucs_i_cp936_table_max) { - s = ucs_i_cp936_table[c - ucs_i_cp936_table_min]; - } else if (c >= ucs_hff_cp936_table_min && c < ucs_hff_cp936_table_max) { - if (c == 0xFF04) { - s = 0xA1E7; - } else if (c == 0xFF5E) { - s = 0xA1AB; - } else if (c >= 0xFF01 && c <= 0xFF5D) { - s = c - 0xFF01 + 0xA3A1; - } else if (c >= 0xFFE0 && c <= 0xFFE5) { - s = ucs_hff_s_cp936_table[c - 0xFFE0]; - } - } - - /* exclude CP936 extensions */ - if (((s >> 8) & 0xFF) < 0xA1 || (s & 0xFF) < 0xA1) { - s = 0; - } - - if (s <= 0) { - if (c < 0x80) { - s = c; - } else if (s <= 0) { - s = -1; - } - } - - if (s >= 0) { - if (s < 0x80) { /* latin */ - CK((*filter->output_function)(s, filter->data)); - } else { - CK((*filter->output_function)((s >> 8) & 0xFF, filter->data)); - CK((*filter->output_function)(s & 0xFF, filter->data)); - } - } else { - CK(mbfl_filt_conv_illegal_output(c, filter)); - } - - return 0; -} - -static int mbfl_filt_conv_euccn_wchar_flush(mbfl_convert_filter *filter) -{ - if (filter->status == 1) { - /* 2-byte character was truncated */ - filter->status = 0; - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - - if (filter->flush_function) { - (*filter->flush_function)(filter->data); - } - - return 0; -} - -static size_t mb_euccn_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) -{ - unsigned char *p = *in, *e = p + *in_len; - uint32_t *out = buf, *limit = buf + bufsize; - - while (p < e && out < limit) { - unsigned char c = *p++; - - if (c < 0x80) { - *out++ = c; - } else if (((c >= 0xA1 && c <= 0xA9) || (c >= 0xB0 && c <= 0xF7)) && p < e) { - unsigned char c2 = *p++; - - if (c2 >= 0xA1 && c2 <= 0xFE) { - unsigned int w = (c - 0x81)*192 + c2 - 0x40; - ZEND_ASSERT(w < cp936_ucs_table_size); - if (w == 0x1864) { - w = 0x30FB; - } else if (w == 0x186A) { - w = 0x2015; - } else if ((w >= 0x1921 && w <= 0x192A) || w == 0x1963 || (w >= 0x1C59 && w <= 0x1C7E) || (w >= 0x1DBB && w <= 0x1DC4)) { - w = 0; - } else { - w = cp936_ucs_table[w]; - } - - if (!w) - w = MBFL_BAD_INPUT; - *out++ = w; - } else { - *out++ = MBFL_BAD_INPUT; - } - } else { - *out++ = MBFL_BAD_INPUT; - } - } - - *in_len = e - p; - *in = p; - return out - buf; -} - -static void mb_wchar_to_euccn(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) -{ - unsigned char *out, *limit; - MB_CONVERT_BUF_LOAD(buf, out, limit); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2); - - while (len--) { - uint32_t w = *in++; - unsigned int s = 0; - - if (w >= ucs_a1_cp936_table_min && w < ucs_a1_cp936_table_max) { - if (w != 0xB7 && w != 0x144 && w != 0x148 && w != 0x251 && w != 0x261) { - s = ucs_a1_cp936_table[w - ucs_a1_cp936_table_min]; - } - } else if (w >= ucs_a2_cp936_table_min && w < ucs_a2_cp936_table_max) { - if (w == 0x2015) { - s = 0xA1AA; - } else if (w != 0x2014 && (w < 0x2170 || w > 0x2179)) { - s = ucs_a2_cp936_table[w - ucs_a2_cp936_table_min]; - } - } else if (w >= ucs_a3_cp936_table_min && w < ucs_a3_cp936_table_max) { - if (w == 0x30FB) { - s = 0xA1A4; - } else { - s = ucs_a3_cp936_table[w - ucs_a3_cp936_table_min]; - } - } else if (w >= ucs_i_cp936_table_min && w < ucs_i_cp936_table_max) { - s = ucs_i_cp936_table[w - ucs_i_cp936_table_min]; - } else if (w >= ucs_hff_cp936_table_min && w < ucs_hff_cp936_table_max) { - if (w == 0xFF04) { - s = 0xA1E7; - } else if (w == 0xFF5E) { - s = 0xA1AB; - } else if (w >= 0xFF01 && w <= 0xFF5D) { - s = w - 0xFF01 + 0xA3A1; - } else if (w >= 0xFFE0 && w <= 0xFFE5) { - s = ucs_hff_s_cp936_table[w - 0xFFE0]; - } - } - - /* Exclude CP936 extensions */ - if (((s >> 8) & 0xFF) < 0xA1 || (s & 0xFF) < 0xA1) { - s = 0; - } - - if (!s) { - if (w < 0x80) { - out = mb_convert_buf_add(out, w); - } else { - MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_euccn); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2); - } - } else if (s < 0x80) { - out = mb_convert_buf_add(out, s); - } else { - out = mb_convert_buf_add2(out, (s >> 8) & 0xFF, s & 0xFF); - } - } - - MB_CONVERT_BUF_STORE(buf, out, limit); -} diff --git a/ext/mbstring/libmbfl/filters/mbfilter_euc_cn.h b/ext/mbstring/libmbfl/filters/mbfilter_euc_cn.h deleted file mode 100644 index 7ef92d8b4b87b..0000000000000 --- a/ext/mbstring/libmbfl/filters/mbfilter_euc_cn.h +++ /dev/null @@ -1,42 +0,0 @@ -/* - * "streamable kanji code filter and converter" - * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. - * - * LICENSE NOTICES - * - * This file is part of "streamable kanji code filter and converter", - * which is distributed under the terms of GNU Lesser General Public - * License (version 2) as published by the Free Software Foundation. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with "streamable kanji code filter and converter"; - * if not, write to the Free Software Foundation, Inc., 59 Temple Place, - * Suite 330, Boston, MA 02111-1307 USA - * - * The author of this file: - * - */ -/* - * The source code included in this files was separated from mbfilter_euc_cn.h - * by Moriyoshi Koizumi on 4 Dec 2002. - * - */ - -#ifndef MBFL_MBFILTER_EUC_CN_H -#define MBFL_MBFILTER_EUC_CN_H - -#include "mbfilter.h" - -extern const mbfl_encoding mbfl_encoding_euc_cn; -extern const struct mbfl_convert_vtbl vtbl_euccn_wchar; -extern const struct mbfl_convert_vtbl vtbl_wchar_euccn; - -int mbfl_filt_conv_euccn_wchar(int c, mbfl_convert_filter *filter); -int mbfl_filt_conv_wchar_euccn(int c, mbfl_convert_filter *filter); - -#endif /* MBFL_MBFILTER_EUC_CN_H */ diff --git a/ext/mbstring/libmbfl/filters/mbfilter_euc_jp.c b/ext/mbstring/libmbfl/filters/mbfilter_euc_jp.c deleted file mode 100644 index d9b1362d15f93..0000000000000 --- a/ext/mbstring/libmbfl/filters/mbfilter_euc_jp.c +++ /dev/null @@ -1,373 +0,0 @@ -/* - * "streamable kanji code filter and converter" - * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. - * - * LICENSE NOTICES - * - * This file is part of "streamable kanji code filter and converter", - * which is distributed under the terms of GNU Lesser General Public - * License (version 2) as published by the Free Software Foundation. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with "streamable kanji code filter and converter"; - * if not, write to the Free Software Foundation, Inc., 59 Temple Place, - * Suite 330, Boston, MA 02111-1307 USA - * - * The author of this file: - * - */ -/* - * The source code included in this file was separated from mbfilter_ja.c - * by moriyoshi koizumi on 4 dec 2002. - * - */ - -#include "mbfilter.h" -#include "mbfilter_euc_jp.h" - -#include "unicode_table_cp932_ext.h" -#include "unicode_table_jis.h" - -static int mbfl_filt_conv_eucjp_wchar_flush(mbfl_convert_filter *filter); -static size_t mb_eucjp_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state); -static void mb_wchar_to_eucjp(uint32_t *in, size_t len, mb_convert_buf *buf, bool end); - -const unsigned char mblen_table_eucjp[] = { /* 0xA1-0xFE */ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 -}; - -static const char *mbfl_encoding_euc_jp_aliases[] = {"EUC", "EUC_JP", "eucJP", "x-euc-jp", NULL}; - -const mbfl_encoding mbfl_encoding_euc_jp = { - mbfl_no_encoding_euc_jp, - "EUC-JP", - "EUC-JP", - mbfl_encoding_euc_jp_aliases, - mblen_table_eucjp, - 0, - &vtbl_eucjp_wchar, - &vtbl_wchar_eucjp, - mb_eucjp_to_wchar, - mb_wchar_to_eucjp, - NULL -}; - -const struct mbfl_convert_vtbl vtbl_eucjp_wchar = { - mbfl_no_encoding_euc_jp, - mbfl_no_encoding_wchar, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_eucjp_wchar, - mbfl_filt_conv_eucjp_wchar_flush, - NULL, -}; - -const struct mbfl_convert_vtbl vtbl_wchar_eucjp = { - mbfl_no_encoding_wchar, - mbfl_no_encoding_euc_jp, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_wchar_eucjp, - mbfl_filt_conv_common_flush, - NULL, -}; - -#define CK(statement) do { if ((statement) < 0) return (-1); } while (0) - -/* - * EUC-JP => wchar - */ -int -mbfl_filt_conv_eucjp_wchar(int c, mbfl_convert_filter *filter) -{ - int c1, s, w = 0; - - switch (filter->status) { - case 0: - if (c >= 0 && c < 0x80) { /* latin */ - CK((*filter->output_function)(c, filter->data)); - } else if (c > 0xa0 && c < 0xff) { /* X 0208 first char */ - filter->status = 1; - filter->cache = c; - } else if (c == 0x8e) { /* kana first char */ - filter->status = 2; - } else if (c == 0x8f) { /* X 0212 first char */ - filter->status = 3; - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - case 1: /* got first half */ - filter->status = 0; - c1 = filter->cache; - if (c > 0xa0 && c < 0xff) { - s = (c1 - 0xa1)*94 + c - 0xa1; - if (s >= 0 && s < jisx0208_ucs_table_size) { - w = jisx0208_ucs_table[s]; - if (!w) - w = MBFL_BAD_INPUT; - } else { - w = MBFL_BAD_INPUT; - } - - CK((*filter->output_function)(w, filter->data)); - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - case 2: /* got 0x8e */ - filter->status = 0; - if (c > 0xa0 && c < 0xe0) { - w = 0xfec0 + c; - CK((*filter->output_function)(w, filter->data)); - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - case 3: /* got 0x8f, JIS X 0212 first byte */ - filter->status++; - filter->cache = c; - break; - - case 4: /* got 0x8f, JIS X 0212 second byte */ - filter->status = 0; - c1 = filter->cache; - if (c > 0xA0 && c < 0xFF && c1 > 0xA0 && c1 < 0xFF) { - s = (c1 - 0xa1)*94 + c - 0xa1; - if (s >= 0 && s < jisx0212_ucs_table_size) { - w = jisx0212_ucs_table[s]; - if (!w) - w = MBFL_BAD_INPUT; - } else { - w = MBFL_BAD_INPUT; - } - - CK((*filter->output_function)(w, filter->data)); - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - EMPTY_SWITCH_DEFAULT_CASE(); - } - - return 0; -} - -static int mbfl_filt_conv_eucjp_wchar_flush(mbfl_convert_filter *filter) -{ - if (filter->status) { - (*filter->output_function)(MBFL_BAD_INPUT, filter->data); - filter->status = 0; - } - - if (filter->flush_function) { - (*filter->flush_function)(filter->data); - } - - return 0; -} - -/* - * wchar => EUC-JP - */ -int -mbfl_filt_conv_wchar_eucjp(int c, mbfl_convert_filter *filter) -{ - int s = 0; - - if (c == 0xAF) { /* U+00AF is MACRON */ - s = 0xA2B4; /* Use JIS X 0212 overline */ - } else if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) { - s = ucs_a1_jis_table[c - ucs_a1_jis_table_min]; - } else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) { - s = ucs_a2_jis_table[c - ucs_a2_jis_table_min]; - } else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) { - s = ucs_i_jis_table[c - ucs_i_jis_table_min]; - } else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) { - s = ucs_r_jis_table[c - ucs_r_jis_table_min]; - } - if (s <= 0) { - if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */ - s = 0x2140; - } else if (c == 0x2225) { /* PARALLEL TO */ - s = 0x2142; - } else if (c == 0xff0d) { /* FULLWIDTH HYPHEN-MINUS */ - s = 0x215d; - } else if (c == 0xffe0) { /* FULLWIDTH CENT SIGN */ - s = 0x2171; - } else if (c == 0xffe1) { /* FULLWIDTH POUND SIGN */ - s = 0x2172; - } else if (c == 0xffe2) { /* FULLWIDTH NOT SIGN */ - s = 0x224c; - } else if (c == 0) { - s = 0; - } else { - s = -1; - } - } - if (s >= 0) { - if (s < 0x80) { /* latin */ - CK((*filter->output_function)(s, filter->data)); - } else if (s < 0x100) { /* kana */ - CK((*filter->output_function)(0x8e, filter->data)); - CK((*filter->output_function)(s, filter->data)); - } else if (s < 0x8080) { /* X 0208 */ - CK((*filter->output_function)(((s >> 8) & 0xff) | 0x80, filter->data)); - CK((*filter->output_function)((s & 0xff) | 0x80, filter->data)); - } else { /* X 0212 */ - CK((*filter->output_function)(0x8f, filter->data)); - CK((*filter->output_function)(((s >> 8) & 0xff) | 0x80, filter->data)); - CK((*filter->output_function)((s & 0xff) | 0x80, filter->data)); - } - } else { - CK(mbfl_filt_conv_illegal_output(c, filter)); - } - - return 0; -} - -static size_t mb_eucjp_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) -{ - unsigned char *p = *in, *e = p + *in_len; - uint32_t *out = buf, *limit = buf + bufsize; - - while (p < e && out < limit) { - unsigned char c = *p++; - - if (c < 0x80) { - *out++ = c; - } else if (c >= 0xA1 && c <= 0xFE && p < e) { - /* JISX 0208 */ - unsigned char c2 = *p++; - if (c2 >= 0xA1 && c2 <= 0xFE) { - unsigned int s = (c - 0xA1)*94 + c2 - 0xA1; - if (s < jisx0208_ucs_table_size) { - uint32_t w = jisx0208_ucs_table[s]; - if (!w) - w = MBFL_BAD_INPUT; - *out++ = w; - } else { - *out++ = MBFL_BAD_INPUT; - } - } else { - *out++ = MBFL_BAD_INPUT; - } - } else if (c == 0x8E && p < e) { - /* Kana */ - unsigned char c2 = *p++; - *out++ = (c2 >= 0xA1 && c2 <= 0xDF) ? 0xFEC0 + c2 : MBFL_BAD_INPUT; - } else if (c == 0x8F) { - /* JISX 0212 */ - if ((e - p) >= 2) { - unsigned char c2 = *p++; - unsigned char c3 = *p++; - if (c3 >= 0xA1 && c3 <= 0xFE && c2 >= 0xA1 && c2 <= 0xFE) { - unsigned int s = (c2 - 0xA1)*94 + c3 - 0xA1; - if (s < jisx0212_ucs_table_size) { - uint32_t w = jisx0212_ucs_table[s]; - if (!w) - w = MBFL_BAD_INPUT; - *out++ = w; - } else { - *out++ = MBFL_BAD_INPUT; - } - } else { - *out++ = MBFL_BAD_INPUT; - } - } else { - *out++ = MBFL_BAD_INPUT; - p = e; /* Jump to end of string */ - } - } else { - *out++ = MBFL_BAD_INPUT; - } - } - - *in_len = e - p; - *in = p; - return out - buf; -} - -static void mb_wchar_to_eucjp(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) -{ - unsigned char *out, *limit; - MB_CONVERT_BUF_LOAD(buf, out, limit); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2); - - while (len--) { - uint32_t w = *in++; - unsigned int s = 0; - - if (w == 0xAF) { /* U+00AF is MACRON */ - s = 0xA2B4; /* Use JIS X 0212 overline */ - } else if (w >= ucs_a1_jis_table_min && w < ucs_a1_jis_table_max) { - s = ucs_a1_jis_table[w - ucs_a1_jis_table_min]; - } else if (w >= ucs_a2_jis_table_min && w < ucs_a2_jis_table_max) { - s = ucs_a2_jis_table[w - ucs_a2_jis_table_min]; - } else if (w >= ucs_i_jis_table_min && w < ucs_i_jis_table_max) { - s = ucs_i_jis_table[w - ucs_i_jis_table_min]; - } else if (w >= ucs_r_jis_table_min && w < ucs_r_jis_table_max) { - s = ucs_r_jis_table[w - ucs_r_jis_table_min]; - } - - if (s == 0) { - if (w == 0xFF3C) { /* FULLWIDTH REVERSE SOLIDUS */ - s = 0x2140; - } else if (w == 0x2225) { /* PARALLEL TO */ - s = 0x2142; - } else if (w == 0xFF0D) { /* FULLWIDTH HYPHEN-MINUS */ - s = 0x215D; - } else if (w == 0xFFE0) { /* FULLWIDTH CENT SIGN */ - s = 0x2171; - } else if (w == 0xFFE1) { /* FULLWIDTH POUND SIGN */ - s = 0x2172; - } else if (w == 0xFFE2) { /* FULLWIDTH NOT SIGN */ - s = 0x224C; - } else if (w == 0) { - out = mb_convert_buf_add(out, 0); - continue; - } else { - MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_eucjp); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2); - continue; - } - } - - if (s < 0x80) { - out = mb_convert_buf_add(out, s); - } else if (s < 0x100) { - out = mb_convert_buf_add2(out, 0x8E, s); - } else if (s < 0x8080) { - out = mb_convert_buf_add2(out, ((s >> 8) & 0xFF) | 0x80, (s & 0xFF) | 0x80); - } else { - MB_CONVERT_BUF_ENSURE(buf, out, limit, (len * 2) + 3); - out = mb_convert_buf_add3(out, 0x8F, ((s >> 8) & 0xFF) | 0x80, (s & 0xFF) | 0x80); - } - } - - MB_CONVERT_BUF_STORE(buf, out, limit); -} diff --git a/ext/mbstring/libmbfl/filters/mbfilter_euc_jp.h b/ext/mbstring/libmbfl/filters/mbfilter_euc_jp.h deleted file mode 100644 index cc7aa3a6bff49..0000000000000 --- a/ext/mbstring/libmbfl/filters/mbfilter_euc_jp.h +++ /dev/null @@ -1,42 +0,0 @@ -/* - * "streamable kanji code filter and converter" - * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. - * - * LICENSE NOTICES - * - * This file is part of "streamable kanji code filter and converter", - * which is distributed under the terms of GNU Lesser General Public - * License (version 2) as published by the Free Software Foundation. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with "streamable kanji code filter and converter"; - * if not, write to the Free Software Foundation, Inc., 59 Temple Place, - * Suite 330, Boston, MA 02111-1307 USA - * - * The author of this file: - * - */ -/* - * The source code included in this files was separated from mbfilter_ja.h - * by moriyoshi koizumi on 4 dec 2002. - * - */ - -#ifndef MBFL_MBFILTER_EUC_JP_H -#define MBFL_MBFILTER_EUC_JP_H - -#include "mbfilter.h" - -extern const mbfl_encoding mbfl_encoding_euc_jp; -extern const struct mbfl_convert_vtbl vtbl_eucjp_wchar; -extern const struct mbfl_convert_vtbl vtbl_wchar_eucjp; - -int mbfl_filt_conv_eucjp_wchar(int c, mbfl_convert_filter *filter); -int mbfl_filt_conv_wchar_eucjp(int c, mbfl_convert_filter *filter); - -#endif /* MBFL_MBFILTER_EUC_JP_H */ diff --git a/ext/mbstring/libmbfl/filters/mbfilter_euc_jp_2004.h b/ext/mbstring/libmbfl/filters/mbfilter_euc_jp_2004.h deleted file mode 100644 index e86fad9564cd2..0000000000000 --- a/ext/mbstring/libmbfl/filters/mbfilter_euc_jp_2004.h +++ /dev/null @@ -1,39 +0,0 @@ -/* - * "streamable kanji code filter and converter" - * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. - * - * LICENSE NOTICES - * - * This file is part of "streamable kanji code filter and converter", - * which is distributed under the terms of GNU Lesser General Public - * License (version 2) as published by the Free Software Foundation. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with "streamable kanji code filter and converter"; - * if not, write to the Free Software Foundation, Inc., 59 Temple Place, - * Suite 330, Boston, MA 02111-1307 USA - * - * The author of this file: - * - */ -/* - * The source code included in this files was separated from mbfilter_ja.h - * by rui hirokawa on 15 aug 2011. - * - */ - -#ifndef MBFL_MBFILTER_EUC_JP_2004_H -#define MBFL_MBFILTER_EUC_JP_2004_H - -#include "mbfilter.h" - -extern const mbfl_encoding mbfl_encoding_eucjp2004; -extern const struct mbfl_convert_vtbl vtbl_eucjp2004_wchar; -extern const struct mbfl_convert_vtbl vtbl_wchar_eucjp2004; - -#endif /* MBFL_MBFILTER_EUC_JP_2004_H */ diff --git a/ext/mbstring/libmbfl/filters/mbfilter_euc_jp_win.c b/ext/mbstring/libmbfl/filters/mbfilter_euc_jp_win.c deleted file mode 100644 index 96b9546dde105..0000000000000 --- a/ext/mbstring/libmbfl/filters/mbfilter_euc_jp_win.c +++ /dev/null @@ -1,536 +0,0 @@ -/* - * "streamable kanji code filter and converter" - * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. - * - * LICENSE NOTICES - * - * This file is part of "streamable kanji code filter and converter", - * which is distributed under the terms of GNU Lesser General Public - * License (version 2) as published by the Free Software Foundation. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with "streamable kanji code filter and converter"; - * if not, write to the Free Software Foundation, Inc., 59 Temple Place, - * Suite 330, Boston, MA 02111-1307 USA - * - * The author of this file: - * - */ -/* - * The source code included in this file was separated from mbfilter_ja.c - * by moriyoshi koizumi on 4 dec 2002. - * - */ - -#include "mbfilter.h" -#include "mbfilter_euc_jp_win.h" - -#include "unicode_table_cp932_ext.h" -#include "unicode_table_jis.h" -#include "cp932_table.h" - -static int mbfl_filt_conv_eucjpwin_wchar_flush(mbfl_convert_filter *filter); -static size_t mb_eucjpwin_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state); -static void mb_wchar_to_eucjpwin(uint32_t *in, size_t len, mb_convert_buf *buf, bool end); - -static const unsigned char mblen_table_eucjp[] = { /* 0xA1-0xFE */ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 -}; - -static const char *mbfl_encoding_eucjp_win_aliases[] = {"eucJP-open", "eucJP-ms", NULL}; - -const mbfl_encoding mbfl_encoding_eucjp_win = { - mbfl_no_encoding_eucjp_win, - "eucJP-win", - "EUC-JP", - mbfl_encoding_eucjp_win_aliases, - mblen_table_eucjp, - 0, - &vtbl_eucjpwin_wchar, - &vtbl_wchar_eucjpwin, - mb_eucjpwin_to_wchar, - mb_wchar_to_eucjpwin, - NULL -}; - -const struct mbfl_convert_vtbl vtbl_eucjpwin_wchar = { - mbfl_no_encoding_eucjp_win, - mbfl_no_encoding_wchar, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_eucjpwin_wchar, - mbfl_filt_conv_eucjpwin_wchar_flush, - NULL, -}; - -const struct mbfl_convert_vtbl vtbl_wchar_eucjpwin = { - mbfl_no_encoding_wchar, - mbfl_no_encoding_eucjp_win, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_wchar_eucjpwin, - mbfl_filt_conv_common_flush, - NULL, -}; - -#define CK(statement) do { if ((statement) < 0) return (-1); } while (0) - -int mbfl_filt_conv_eucjpwin_wchar(int c, mbfl_convert_filter *filter) -{ - int c1, s, w, n; - - switch (filter->status) { - case 0: - if (c >= 0 && c < 0x80) { /* latin */ - CK((*filter->output_function)(c, filter->data)); - } else if (c >= 0xa1 && c <= 0xfe) { /* CP932 first char */ - filter->status = 1; - filter->cache = c; - } else if (c == 0x8e) { /* kana first char */ - filter->status = 2; - } else if (c == 0x8f) { /* X 0212 first char */ - filter->status = 3; - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - case 1: /* got first half */ - filter->status = 0; - c1 = filter->cache; - if (c > 0xa0 && c < 0xff) { - w = 0; - s = (c1 - 0xa1)*94 + c - 0xa1; - if (s <= 137) { - if (s == 31) { - w = 0xff3c; /* FULLWIDTH REVERSE SOLIDUS */ - } else if (s == 32) { - w = 0xff5e; /* FULLWIDTH TILDE */ - } else if (s == 33) { - w = 0x2225; /* PARALLEL TO */ - } else if (s == 60) { - w = 0xff0d; /* FULLWIDTH HYPHEN-MINUS */ - } else if (s == 80) { - w = 0xffe0; /* FULLWIDTH CENT SIGN */ - } else if (s == 81) { - w = 0xffe1; /* FULLWIDTH POUND SIGN */ - } else if (s == 137) { - w = 0xffe2; /* FULLWIDTH NOT SIGN */ - } - } - - if (w == 0) { - if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) { /* vendor ext1 (13ku) */ - w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min]; - } else if (s >= 0 && s < jisx0208_ucs_table_size) { /* X 0208 */ - w = jisx0208_ucs_table[s]; - } else if (s >= (84 * 94)) { /* user (85ku - 94ku) */ - w = s - (84 * 94) + 0xe000; - } - } - - if (w <= 0) { - w = MBFL_BAD_INPUT; - } - CK((*filter->output_function)(w, filter->data)); - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - case 2: /* got 0x8e, X0201 kana */ - filter->status = 0; - if (c > 0xa0 && c < 0xe0) { - w = 0xfec0 + c; - CK((*filter->output_function)(w, filter->data)); - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - case 3: /* got 0x8f, X 0212 first char */ - filter->status++; - filter->cache = c; - break; - - case 4: /* got 0x8f, X 0212 second char */ - filter->status = 0; - c1 = filter->cache; - if (c1 > 0xa0 && c1 < 0xff && c > 0xa0 && c < 0xff) { - s = (c1 - 0xa1)*94 + c - 0xa1; - - if (s >= 0 && s < jisx0212_ucs_table_size) { - w = jisx0212_ucs_table[s]; - - if (w == 0x007e) { - w = 0xff5e; /* FULLWIDTH TILDE */ - } - } else if (s >= (82*94) && s < (84*94)) { /* vender ext3 (83ku - 84ku) <-> CP932 (115ku -120ku) */ - s = (c1 << 8) | c; - w = 0; - n = 0; - while (n < cp932ext3_eucjp_table_size) { - if (s == cp932ext3_eucjp_table[n]) { - if (n < (cp932ext3_ucs_table_max - cp932ext3_ucs_table_min)) { - w = cp932ext3_ucs_table[n]; - } - break; - } - n++; - } - } else if (s >= (84*94)) { /* user (85ku - 94ku) */ - w = s - (84*94) + (0xe000 + (94*10)); - } else { - w = 0; - } - - if (w == 0x00A6) { - w = 0xFFE4; /* FULLWIDTH BROKEN BAR */ - } - - if (w <= 0) { - w = MBFL_BAD_INPUT; - } - CK((*filter->output_function)(w, filter->data)); - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - EMPTY_SWITCH_DEFAULT_CASE(); - } - - return 0; -} - -static int mbfl_filt_conv_eucjpwin_wchar_flush(mbfl_convert_filter *filter) -{ - if (filter->status) { - (*filter->output_function)(MBFL_BAD_INPUT, filter->data); - filter->status = 0; - } - - if (filter->flush_function) { - (*filter->flush_function)(filter->data); - } - - return 0; -} - -int mbfl_filt_conv_wchar_eucjpwin(int c, mbfl_convert_filter *filter) -{ - int c1, c2, s1 = 0; - - if (c == 0xAF) { /* U+00AF is MACRON */ - s1 = 0xA2B4; /* Use JIS X 0212 overline */ - } else if (c == 0x203E) { - s1 = 0x7E; - } else if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) { - s1 = ucs_a1_jis_table[c - ucs_a1_jis_table_min]; - } else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) { - s1 = ucs_a2_jis_table[c - ucs_a2_jis_table_min]; - } else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) { - s1 = ucs_i_jis_table[c - ucs_i_jis_table_min]; - } else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) { - s1 = ucs_r_jis_table[c - ucs_r_jis_table_min]; - } else if (c >= 0xe000 && c < (0xe000 + 10*94)) { /* user (X0208 85ku - 94ku) */ - s1 = c - 0xe000; - c1 = s1/94 + 0x75; - c2 = s1%94 + 0x21; - s1 = (c1 << 8) | c2; - } else if (c >= (0xe000 + 10*94) && c < (0xe000 + 20*94)) { /* user (X0212 85ku - 94ku) */ - s1 = c - (0xe000 + 10*94); - c1 = s1/94 + 0xf5; - c2 = s1%94 + 0xa1; - s1 = (c1 << 8) | c2; - } - - if (s1 == 0xa2f1) { - s1 = 0x2d62; /* NUMERO SIGN */ - } - - if (s1 <= 0) { - if (c == 0xa5) { /* YEN SIGN */ - s1 = 0x5C; - } else if (c == 0x2014) { - s1 = 0x213D; - } else if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */ - s1 = 0x2140; - } else if (c == 0x2225) { /* PARALLEL TO */ - s1 = 0x2142; - } else if (c == 0xff0d) { /* FULLWIDTH HYPHEN-MINUS */ - s1 = 0x215d; - } else if (c == 0xffe0) { /* FULLWIDTH CENT SIGN */ - s1 = 0x2171; - } else if (c == 0xffe1) { /* FULLWIDTH POUND SIGN */ - s1 = 0x2172; - } else if (c == 0xffe2) { /* FULLWIDTH NOT SIGN */ - s1 = 0x224c; - } else { - s1 = -1; - c1 = 0; - c2 = cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; - while (c1 < c2) { /* CP932 vendor ext1 (13ku) */ - const int oh = cp932ext1_ucs_table_min / 94; - - if (c == cp932ext1_ucs_table[c1]) { - s1 = ((c1 / 94 + oh + 0x21) << 8) + (c1 % 94 + 0x21); - break; - } - c1++; - } - if (s1 < 0) { - c1 = 0; - c2 = cp932ext3_ucs_table_max - cp932ext3_ucs_table_min; - while (c1 < c2) { /* CP932 vendor ext3 (115ku - 119ku) */ - if (c == cp932ext3_ucs_table[c1]) { - if (c1 < cp932ext3_eucjp_table_size) { - s1 = cp932ext3_eucjp_table[c1]; - } - break; - } - c1++; - } - } - } - - if (c == 0) { - s1 = 0; - } else if (s1 <= 0) { - s1 = -1; - } - } - - if (s1 >= 0) { - if (s1 < 0x80) { /* latin */ - CK((*filter->output_function)(s1, filter->data)); - } else if (s1 < 0x100) { /* kana */ - CK((*filter->output_function)(0x8e, filter->data)); - CK((*filter->output_function)(s1, filter->data)); - } else if (s1 < 0x8080) { /* X 0208 */ - CK((*filter->output_function)(((s1 >> 8) & 0xff) | 0x80, filter->data)); - CK((*filter->output_function)((s1 & 0xff) | 0x80, filter->data)); - } else { /* X 0212 */ - CK((*filter->output_function)(0x8f, filter->data)); - CK((*filter->output_function)(((s1 >> 8) & 0xff) | 0x80, filter->data)); - CK((*filter->output_function)((s1 & 0xff) | 0x80, filter->data)); - } - } else { - CK(mbfl_filt_conv_illegal_output(c, filter)); - } - - return 0; -} - -static size_t mb_eucjpwin_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) -{ - unsigned char *p = *in, *e = p + *in_len; - uint32_t *out = buf, *limit = buf + bufsize; - - while (p < e && out < limit) { - unsigned char c = *p++; - - if (c < 0x80) { - *out++ = c; - } else if (c >= 0xA1 && c <= 0xFE && p < e) { - unsigned char c2 = *p++; - - if (c2 >= 0xA1 && c2 <= 0xFE) { - unsigned int s = (c - 0xA1)*94 + c2 - 0xA1, w = 0; - - if (s <= 137) { - if (s == 31) { - w = 0xFF3C; /* FULLWIDTH REVERSE SOLIDUS */ - } else if (s == 32) { - w = 0xFF5E; /* FULLWIDTH TILDE */ - } else if (s == 33) { - w = 0x2225; /* PARALLEL TO */ - } else if (s == 60) { - w = 0xFF0D; /* FULLWIDTH HYPHEN-MINUS */ - } else if (s == 80) { - w = 0xFFE0; /* FULLWIDTH CENT SIGN */ - } else if (s == 81) { - w = 0xFFE1; /* FULLWIDTH POUND SIGN */ - } else if (s == 137) { - w = 0xFFE2; /* FULLWIDTH NOT SIGN */ - } - } - - if (w == 0) { - if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) { - w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min]; - } else if (s < jisx0208_ucs_table_size) { - w = jisx0208_ucs_table[s]; - } else if (s >= (84 * 94)) { - w = s - (84 * 94) + 0xE000; - } - } - - if (!w) - w = MBFL_BAD_INPUT; - *out++ = w; - } else { - *out++ = MBFL_BAD_INPUT; - } - } else if (c == 0x8E && p < e) { - unsigned char c2 = *p++; - if (c2 >= 0xA1 && c2 <= 0xDF) { - *out++ = 0xFEC0 + c2; - } else { - *out++ = MBFL_BAD_INPUT; - } - } else if (c == 0x8F && p < e) { - unsigned char c2 = *p++; - if (p == e) { - *out++ = MBFL_BAD_INPUT; - continue; - } - unsigned char c3 = *p++; - - if (c2 >= 0xA1 && c2 <= 0xFE && c3 >= 0xA1 && c3 <= 0xFE) { - unsigned int s = (c2 - 0xA1)*94 + c3 - 0xA1, w = 0; - - if (s < jisx0212_ucs_table_size) { - w = jisx0212_ucs_table[s]; - if (w == 0x7E) - w = 0xFF5E; /* FULLWIDTH TILDE */ - } else if (s >= (82*94) && s < (84*94)) { - s = (c2 << 8) | c3; - for (int i = 0; i < cp932ext3_eucjp_table_size; i++) { - if (cp932ext3_eucjp_table[i] == s) { - w = cp932ext3_ucs_table[i]; - break; - } - } - } else if (s >= (84*94)) { - w = s - (84*94) + 0xE000 + (94*10); - } - - if (w == 0xA6) - w = 0xFFE4; /* FULLWIDTH BROKEN BAR */ - - if (!w) - w = MBFL_BAD_INPUT; - *out++ = w; - } else { - *out++ = MBFL_BAD_INPUT; - } - } else { - *out++ = MBFL_BAD_INPUT; - } - } - - *in_len = e - p; - *in = p; - return out - buf; -} - -static void mb_wchar_to_eucjpwin(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) -{ - unsigned char *out, *limit; - MB_CONVERT_BUF_LOAD(buf, out, limit); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2); - - while (len--) { - uint32_t w = *in++; - unsigned int s = 0; - - if (w == 0) { - out = mb_convert_buf_add(out, 0); - continue; - } else if (w == 0xAF) { /* U+00AF is MACRON */ - s = 0xA2B4; /* Use JIS X 0212 overline */ - } else if (w == 0x203E) { - s = 0x7E; - } else if (w >= ucs_a1_jis_table_min && w < ucs_a1_jis_table_max) { - s = ucs_a1_jis_table[w - ucs_a1_jis_table_min]; - } else if (w >= ucs_a2_jis_table_min && w < ucs_a2_jis_table_max) { - s = ucs_a2_jis_table[w - ucs_a2_jis_table_min]; - } else if (w >= ucs_i_jis_table_min && w < ucs_i_jis_table_max) { - s = ucs_i_jis_table[w - ucs_i_jis_table_min]; - } else if (w >= ucs_r_jis_table_min && w < ucs_r_jis_table_max) { - s = ucs_r_jis_table[w - ucs_r_jis_table_min]; - } else if (w >= 0xE000 && w < (0xE000 + 10*94)) { - s = w - 0xE000; - s = ((s/94 + 0x75) << 8) + (s%94) + 0x21; - } else if (w >= (0xE000 + 10*94) && w < (0xE000 + 20*94)) { - s = w - (0xE000 + 10*94); - s = ((s/94 + 0xF5) << 8) + (s%94) + 0xA1; - } - - if (s == 0xA2F1) - s = 0x2D62; /* NUMERO SIGN */ - - if (s == 0) { - if (w == 0xA5) { /* YEN SIGN */ - s = 0x5C; - } else if (w == 0x2014) { /* EM DASH */ - s = 0x213D; - } else if (w == 0xFF3C) { /* FULLWIDTH REVERSE SOLIDUS */ - s = 0x2140; - } else if (w == 0x2225) { /* PARALLEL TO */ - s = 0x2142; - } else if (w == 0xFF0D) { /* FULLWIDTH HYPHEN-MINUS */ - s = 0x215D; - } else if (w == 0xFFE0) { /* FULLWIDTH CENT SIGN */ - s = 0x2171; - } else if (w == 0xFFE1) { /* FULLWIDTH POUND SIGN */ - s = 0x2172; - } else if (w == 0xFFE2) { /* FULLWIDTH NOT SIGN */ - s = 0x224C; - } else { - for (int i = 0; i < cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; i++) { - if (cp932ext1_ucs_table[i] == w) { - s = (((i/94) + (cp932ext1_ucs_table_min/94) + 0x21) << 8) + (i%94) + 0x21; - break; - } - } - - if (!s) { - for (int i = 0; i < cp932ext3_ucs_table_max - cp932ext3_ucs_table_min; i++) { - if (cp932ext3_ucs_table[i] == w) { - s = cp932ext3_eucjp_table[i]; - break; - } - } - } - } - } - - if (!s) { - MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_eucjpwin); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2); - } else if (s < 0x80) { - out = mb_convert_buf_add(out, s); - } else if (s < 0x100) { - out = mb_convert_buf_add2(out, 0x8E, s); - } else if (s < 0x8080) { - out = mb_convert_buf_add2(out, ((s >> 8) & 0xFF) | 0x80, (s & 0xFF) | 0x80); - } else { - MB_CONVERT_BUF_ENSURE(buf, out, limit, (len * 2) + 3); - out = mb_convert_buf_add3(out, 0x8F, ((s >> 8) & 0xFF) | 0x80, (s & 0xFF) | 0x80); - } - } - - MB_CONVERT_BUF_STORE(buf, out, limit); -} diff --git a/ext/mbstring/libmbfl/filters/mbfilter_euc_jp_win.h b/ext/mbstring/libmbfl/filters/mbfilter_euc_jp_win.h deleted file mode 100644 index bb1e4dc392d7b..0000000000000 --- a/ext/mbstring/libmbfl/filters/mbfilter_euc_jp_win.h +++ /dev/null @@ -1,42 +0,0 @@ -/* - * "streamable kanji code filter and converter" - * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. - * - * LICENSE NOTICES - * - * This file is part of "streamable kanji code filter and converter", - * which is distributed under the terms of GNU Lesser General Public - * License (version 2) as published by the Free Software Foundation. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with "streamable kanji code filter and converter"; - * if not, write to the Free Software Foundation, Inc., 59 Temple Place, - * Suite 330, Boston, MA 02111-1307 USA - * - * The author of this file: - * - */ -/* - * The source code included in this files was separated from mbfilter_ja.h - * by moriyoshi koizumi on 4 dec 2002. - * - */ - -#ifndef MBFL_MBFILTER_EUC_JP_WIN_H -#define MBFL_MBFILTER_EUC_JP_WIN_H - -#include "mbfilter.h" - -extern const mbfl_encoding mbfl_encoding_eucjp_win; -extern const struct mbfl_convert_vtbl vtbl_eucjpwin_wchar; -extern const struct mbfl_convert_vtbl vtbl_wchar_eucjpwin; - -int mbfl_filt_conv_eucjpwin_wchar(int c, mbfl_convert_filter *filter); -int mbfl_filt_conv_wchar_eucjpwin(int c, mbfl_convert_filter *filter); - -#endif /* MBFL_MBFILTER_EUC_JP_WIN_H */ diff --git a/ext/mbstring/libmbfl/filters/mbfilter_euc_kr.c b/ext/mbstring/libmbfl/filters/mbfilter_euc_kr.c deleted file mode 100644 index 2c95a80ba965c..0000000000000 --- a/ext/mbstring/libmbfl/filters/mbfilter_euc_kr.c +++ /dev/null @@ -1,297 +0,0 @@ -/* - * "streamable kanji code filter and converter" - * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. - * - * LICENSE NOTICES - * - * This file is part of "streamable kanji code filter and converter", - * which is distributed under the terms of GNU Lesser General Public - * License (version 2) as published by the Free Software Foundation. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with "streamable kanji code filter and converter"; - * if not, write to the Free Software Foundation, Inc., 59 Temple Place, - * Suite 330, Boston, MA 02111-1307 USA - * - * The author of this file: - * - */ -/* - * The source code included in this files was separated from mbfilter_kr.c - * by moriyoshi koizumi on 4 dec 2002. - * - */ - -#include "mbfilter.h" -#include "mbfilter_euc_kr.h" -#include "unicode_table_uhc.h" - -static int mbfl_filt_conv_euckr_wchar_flush(mbfl_convert_filter *filter); -static size_t mb_euckr_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state); -static void mb_wchar_to_euckr(uint32_t *in, size_t len, mb_convert_buf *buf, bool end); - -static const unsigned char mblen_table_euckr[] = { /* 0xA1-0xFE */ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 -}; - -static const char *mbfl_encoding_euc_kr_aliases[] = {"EUC_KR", "eucKR", "x-euc-kr", NULL}; - -const mbfl_encoding mbfl_encoding_euc_kr = { - mbfl_no_encoding_euc_kr, - "EUC-KR", - "EUC-KR", - mbfl_encoding_euc_kr_aliases, - mblen_table_euckr, - 0, - &vtbl_euckr_wchar, - &vtbl_wchar_euckr, - mb_euckr_to_wchar, - mb_wchar_to_euckr, - NULL -}; - -const struct mbfl_convert_vtbl vtbl_euckr_wchar = { - mbfl_no_encoding_euc_kr, - mbfl_no_encoding_wchar, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_euckr_wchar, - mbfl_filt_conv_euckr_wchar_flush, - NULL, -}; - -const struct mbfl_convert_vtbl vtbl_wchar_euckr = { - mbfl_no_encoding_wchar, - mbfl_no_encoding_euc_kr, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_wchar_euckr, - mbfl_filt_conv_common_flush, - NULL, -}; - -#define CK(statement) do { if ((statement) < 0) return (-1); } while (0) - -int mbfl_filt_conv_euckr_wchar(int c, mbfl_convert_filter *filter) -{ - int c1, w, flag; - - switch (filter->status) { - case 0: - if (c >= 0 && c < 0x80) { /* latin */ - CK((*filter->output_function)(c, filter->data)); - } else if (((c >= 0xA1 && c <= 0xAC) || (c >= 0xB0 && c <= 0xFD)) && c != 0xC9) { /* dbcs lead byte */ - filter->status = 1; - filter->cache = c; - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - case 1: /* dbcs second byte */ - filter->status = 0; - c1 = filter->cache; - flag = 0; - if (c1 >= 0xa1 && c1 <= 0xc6) { - flag = 1; - } else if (c1 >= 0xc7 && c1 <= 0xfe && c1 != 0xc9) { - flag = 2; - } - if (flag > 0 && c >= 0xa1 && c <= 0xfe) { - if (flag == 1) { /* 1st: 0xa1..0xc6, 2nd: 0x41..0x7a, 0x81..0xfe */ - w = (c1 - 0x81)*190 + c - 0x41; - ZEND_ASSERT(w < uhc1_ucs_table_size); - w = uhc1_ucs_table[w]; - } else { /* 1st: 0xc7..0xc8,0xca..0xfe, 2nd: 0xa1..0xfe */ - w = (c1 - 0xc7)*94 + c - 0xa1; - ZEND_ASSERT(w < uhc3_ucs_table_size); - w = uhc3_ucs_table[w]; - } - - if (w <= 0) { - w = MBFL_BAD_INPUT; - } - CK((*filter->output_function)(w, filter->data)); - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - EMPTY_SWITCH_DEFAULT_CASE(); - } - - return 0; -} - -int mbfl_filt_conv_wchar_euckr(int c, mbfl_convert_filter *filter) -{ - int s = 0; - - if (c >= ucs_a1_uhc_table_min && c < ucs_a1_uhc_table_max) { - s = ucs_a1_uhc_table[c - ucs_a1_uhc_table_min]; - } else if (c >= ucs_a2_uhc_table_min && c < ucs_a2_uhc_table_max) { - s = ucs_a2_uhc_table[c - ucs_a2_uhc_table_min]; - } else if (c >= ucs_a3_uhc_table_min && c < ucs_a3_uhc_table_max) { - s = ucs_a3_uhc_table[c - ucs_a3_uhc_table_min]; - } else if (c >= ucs_i_uhc_table_min && c < ucs_i_uhc_table_max) { - s = ucs_i_uhc_table[c - ucs_i_uhc_table_min]; - } else if (c >= ucs_s_uhc_table_min && c < ucs_s_uhc_table_max) { - s = ucs_s_uhc_table[c - ucs_s_uhc_table_min]; - } else if (c >= ucs_r1_uhc_table_min && c < ucs_r1_uhc_table_max) { - s = ucs_r1_uhc_table[c - ucs_r1_uhc_table_min]; - } else if (c >= ucs_r2_uhc_table_min && c < ucs_r2_uhc_table_max) { - s = ucs_r2_uhc_table[c - ucs_r2_uhc_table_min]; - } - - /* exclude UHC extension area (although we are using the UHC conversion tables) */ - if (((s >> 8) & 0xFF) < 0xA1 || (s & 0xFF) < 0xA1) { - s = 0; - } - - if (s <= 0) { - if (c < 0x80) { - s = c; - } else { - s = -1; - } - } - - if (s >= 0) { - if (s < 0x80) { /* latin */ - CK((*filter->output_function)(s, filter->data)); - } else { - CK((*filter->output_function)((s >> 8) & 0xff, filter->data)); - CK((*filter->output_function)(s & 0xff, filter->data)); - } - } else { - CK(mbfl_filt_conv_illegal_output(c, filter)); - } - - return 0; -} - -static int mbfl_filt_conv_euckr_wchar_flush(mbfl_convert_filter *filter) -{ - if (filter->status == 1) { - /* 2-byte character was truncated */ - filter->status = 0; - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - - if (filter->flush_function) { - (*filter->flush_function)(filter->data); - } - - return 0; -} - -static size_t mb_euckr_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) -{ - unsigned char *p = *in, *e = p + *in_len; - uint32_t *out = buf, *limit = buf + bufsize; - - while (p < e && out < limit) { - unsigned char c = *p++; - - if (c < 0x80) { - *out++ = c; - } else if (((c >= 0xA1 && c <= 0xAC) || (c >= 0xB0 && c <= 0xFD)) && c != 0xC9 && p < e) { - unsigned char c2 = *p++; - if (c2 < 0xA1 || c2 == 0xFF) { - *out++ = MBFL_BAD_INPUT; - continue; - } - - if (c <= 0xC6) { - unsigned int w = (c - 0x81)*190 + c2 - 0x41; - ZEND_ASSERT(w < uhc1_ucs_table_size); - w = uhc1_ucs_table[w]; - if (!w) - w = MBFL_BAD_INPUT; - *out++ = w; - } else { - unsigned int w = (c - 0xC7)*94 + c2 - 0xA1; - ZEND_ASSERT(w < uhc3_ucs_table_size); - w = uhc3_ucs_table[w]; - if (!w) - w = MBFL_BAD_INPUT; - *out++ = w; - } - } else { - *out++ = MBFL_BAD_INPUT; - } - } - - *in_len = e - p; - *in = p; - return out - buf; -} - -static void mb_wchar_to_euckr(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) -{ - unsigned char *out, *limit; - MB_CONVERT_BUF_LOAD(buf, out, limit); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len); - - while (len--) { - uint32_t w = *in++; - unsigned int s = 0; - - if (w >= ucs_a1_uhc_table_min && w < ucs_a1_uhc_table_max) { - s = ucs_a1_uhc_table[w - ucs_a1_uhc_table_min]; - } else if (w >= ucs_a2_uhc_table_min && w < ucs_a2_uhc_table_max) { - s = ucs_a2_uhc_table[w - ucs_a2_uhc_table_min]; - } else if (w >= ucs_a3_uhc_table_min && w < ucs_a3_uhc_table_max) { - s = ucs_a3_uhc_table[w - ucs_a3_uhc_table_min]; - } else if (w >= ucs_i_uhc_table_min && w < ucs_i_uhc_table_max) { - s = ucs_i_uhc_table[w - ucs_i_uhc_table_min]; - } else if (w >= ucs_s_uhc_table_min && w < ucs_s_uhc_table_max) { - s = ucs_s_uhc_table[w - ucs_s_uhc_table_min]; - } else if (w >= ucs_r1_uhc_table_min && w < ucs_r1_uhc_table_max) { - s = ucs_r1_uhc_table[w - ucs_r1_uhc_table_min]; - } else if (w >= ucs_r2_uhc_table_min && w < ucs_r2_uhc_table_max) { - s = ucs_r2_uhc_table[w - ucs_r2_uhc_table_min]; - } - - /* Exclude UHC extension area (although we are using the UHC conversion tables) */ - if (((s >> 8) & 0xFF) < 0xA1 || (s & 0xFF) < 0xA1) { - s = 0; - } - - if (!s) { - if (w < 0x80) { - out = mb_convert_buf_add(out, w); - } else { - MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_euckr); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len); - } - } else if (s < 0x80) { - out = mb_convert_buf_add(out, s); - } else { - MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2); - out = mb_convert_buf_add2(out, (s >> 8) & 0xFF, s & 0xFF); - } - } - - MB_CONVERT_BUF_STORE(buf, out, limit); -} diff --git a/ext/mbstring/libmbfl/filters/mbfilter_euc_kr.h b/ext/mbstring/libmbfl/filters/mbfilter_euc_kr.h deleted file mode 100644 index e0c13cf53ad73..0000000000000 --- a/ext/mbstring/libmbfl/filters/mbfilter_euc_kr.h +++ /dev/null @@ -1,42 +0,0 @@ -/* - * "streamable kanji code filter and converter" - * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. - * - * LICENSE NOTICES - * - * This file is part of "streamable kanji code filter and converter", - * which is distributed under the terms of GNU Lesser General Public - * License (version 2) as published by the Free Software Foundation. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with "streamable kanji code filter and converter"; - * if not, write to the Free Software Foundation, Inc., 59 Temple Place, - * Suite 330, Boston, MA 02111-1307 USA - * - * The author of this file: - * - */ -/* - * The source code included in this files was separated from mbfilter_kr.h - * by moriyoshi koizumi on 4 dec 2002. - * - */ - -#ifndef MBFL_MBFILTER_EUC_KR_H -#define MBFL_MBFILTER_EUC_KR_H - -#include "mbfilter.h" - -extern const mbfl_encoding mbfl_encoding_euc_kr; -extern const struct mbfl_convert_vtbl vtbl_euckr_wchar; -extern const struct mbfl_convert_vtbl vtbl_wchar_euckr; - -int mbfl_filt_conv_euckr_wchar(int c, mbfl_convert_filter *filter); -int mbfl_filt_conv_wchar_euckr(int c, mbfl_convert_filter *filter); - -#endif /* MBFL_MBFILTER_EUC_KR_H */ diff --git a/ext/mbstring/libmbfl/filters/mbfilter_euc_tw.c b/ext/mbstring/libmbfl/filters/mbfilter_euc_tw.c deleted file mode 100644 index 522f5f4a05a5b..0000000000000 --- a/ext/mbstring/libmbfl/filters/mbfilter_euc_tw.c +++ /dev/null @@ -1,375 +0,0 @@ -/* - * "streamable kanji code filter and converter" - * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. - * - * LICENSE NOTICES - * - * This file is part of "streamable kanji code filter and converter", - * which is distributed under the terms of GNU Lesser General Public - * License (version 2) as published by the Free Software Foundation. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with "streamable kanji code filter and converter"; - * if not, write to the Free Software Foundation, Inc., 59 Temple Place, - * Suite 330, Boston, MA 02111-1307 USA - * - * The author of this file: Rui Hirokawa - * - */ -/* - * The source code included in this files was separated from mbfilter_tw.c - * by moriyoshi koizumi on 4 dec 2002. - * - */ - -#include "mbfilter.h" -#include "mbfilter_euc_tw.h" - -#include "unicode_table_cns11643.h" - -static int mbfl_filt_conv_euctw_wchar_flush(mbfl_convert_filter *filter); -static size_t mb_euctw_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state); -static void mb_wchar_to_euctw(uint32_t *in, size_t len, mb_convert_buf *buf, bool end); - -static const unsigned char mblen_table_euctw[] = { /* 0xA1-0xFE */ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 -}; - - -static const char *mbfl_encoding_euc_tw_aliases[] = {"EUC_TW", "eucTW", "x-euc-tw", NULL}; - -const mbfl_encoding mbfl_encoding_euc_tw = { - mbfl_no_encoding_euc_tw, - "EUC-TW", - "EUC-TW", - mbfl_encoding_euc_tw_aliases, - mblen_table_euctw, - 0, - &vtbl_euctw_wchar, - &vtbl_wchar_euctw, - mb_euctw_to_wchar, - mb_wchar_to_euctw, - NULL -}; - -const struct mbfl_convert_vtbl vtbl_euctw_wchar = { - mbfl_no_encoding_euc_tw, - mbfl_no_encoding_wchar, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_euctw_wchar, - mbfl_filt_conv_euctw_wchar_flush, - NULL, -}; - -const struct mbfl_convert_vtbl vtbl_wchar_euctw = { - mbfl_no_encoding_wchar, - mbfl_no_encoding_euc_tw, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_wchar_euctw, - mbfl_filt_conv_common_flush, - NULL, -}; - -#define CK(statement) do { if ((statement) < 0) return (-1); } while (0) - -int mbfl_filt_conv_euctw_wchar(int c, mbfl_convert_filter *filter) -{ - int c1, s, w; - - switch (filter->status) { - case 0: - if (c >= 0 && c < 0x80) { /* latin */ - CK((*filter->output_function)(c, filter->data)); - } else if (((c >= 0xA1 && c <= 0xA6) || (c >= 0xC2 && c <= 0xFD)) && c != 0xC3) { /* 2-byte character, first byte */ - filter->status = 1; - filter->cache = c; - } else if (c == 0x8E) { /* 4-byte character, first byte */ - filter->status = 2; - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - case 1: /* 2-byte character, second byte */ - filter->status = 0; - c1 = filter->cache; - if (c > 0xA0 && c < 0xFF) { - w = (c1 - 0xA1)*94 + (c - 0xA1); - if (w >= 0 && w < cns11643_1_ucs_table_size) { - w = cns11643_1_ucs_table[w]; - } else { - w = 0; - } - - if (w <= 0) { - w = MBFL_BAD_INPUT; - } - - CK((*filter->output_function)(w, filter->data)); - } else { - filter->status = filter->cache = 0; - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - case 2: /* got 0x8e, second byte */ - if (c == 0xA1 || c == 0xA2 || c == 0xAE) { - filter->status = 3; - filter->cache = c - 0xA1; - } else { - filter->status = filter->cache = 0; - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - case 3: /* got 0x8e, third byte */ - filter->status = 0; - c1 = filter->cache; - if (c >= 0xA1 && ((c1 == 0 && ((c >= 0xA1 && c <= 0xA6) || (c >= 0xC2 && c <= 0xFD)) && c != 0xC3) || - (c1 == 1 && c <= 0xF2) || (c1 == 13 && c <= 0xE7))) { - filter->status = 4; - filter->cache = (c1 << 8) + c - 0xA1; - } else { - filter->status = filter->cache = 0; - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - case 4: /* multi-byte character, fourth byte */ - filter->status = 0; - c1 = filter->cache; - if (c1 <= 0xDFF && c > 0xA0 && c < 0xFF) { - int plane = (c1 & 0xF00) >> 8; /* This is actually the CNS-11643 plane minus one */ - s = (c1 & 0xFF)*94 + c - 0xA1; - w = 0; - if (s >= 0) { - /* A later version of CNS-11643 moved all the characters in "plane 14" to "plane 3", - * and added tens of thousands more characters in planes 4, 5, 6, and 7 - * We only support the older version of CNS-11643 - * This is the same as iconv from glibc 2.2 */ - if (plane == 0 && s < cns11643_1_ucs_table_size) { - w = cns11643_1_ucs_table[s]; - } else if (plane == 1 && s < cns11643_2_ucs_table_size) { - w = cns11643_2_ucs_table[s]; - } else if (plane == 13 && s < cns11643_14_ucs_table_size) { - w = cns11643_14_ucs_table[s]; - } - } - - if (w <= 0) { - w = MBFL_BAD_INPUT; - } - - CK((*filter->output_function)(w, filter->data)); - } else { - filter->status = filter->cache = 0; - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - EMPTY_SWITCH_DEFAULT_CASE(); - } - - return 0; -} - -int mbfl_filt_conv_wchar_euctw(int c, mbfl_convert_filter *filter) -{ - int s = 0; - - if (c >= ucs_a1_cns11643_table_min && c < ucs_a1_cns11643_table_max) { - s = ucs_a1_cns11643_table[c - ucs_a1_cns11643_table_min]; - } else if (c >= ucs_a2_cns11643_table_min && c < ucs_a2_cns11643_table_max) { - s = ucs_a2_cns11643_table[c - ucs_a2_cns11643_table_min]; - } else if (c >= ucs_a3_cns11643_table_min && c < ucs_a3_cns11643_table_max) { - s = ucs_a3_cns11643_table[c - ucs_a3_cns11643_table_min]; - } else if (c >= ucs_i_cns11643_table_min && c < ucs_i_cns11643_table_max) { - s = ucs_i_cns11643_table[c - ucs_i_cns11643_table_min]; - } else if (c >= ucs_r_cns11643_table_min && c < ucs_r_cns11643_table_max) { - s = ucs_r_cns11643_table[c - ucs_r_cns11643_table_min]; - } - - if (s <= 0) { - if (c == 0) { - s = 0; - } else if (s <= 0) { - s = -1; - } - } - - if (s >= 0) { - int plane = (s & 0x1F0000) >> 16; - if (plane <= 1) { - if (s < 0x80) { /* latin */ - CK((*filter->output_function)(s, filter->data)); - } else { - s = (s & 0xFFFF) | 0x8080; - CK((*filter->output_function)((s >> 8) & 0xFF, filter->data)); - CK((*filter->output_function)(s & 0xFF, filter->data)); - } - } else { - s = (0x8EA00000 + (plane << 16)) | ((s & 0xFFFF) | 0x8080); - CK((*filter->output_function)(0x8e , filter->data)); - CK((*filter->output_function)((s >> 16) & 0xFF, filter->data)); - CK((*filter->output_function)((s >> 8) & 0xFF, filter->data)); - CK((*filter->output_function)(s & 0xFF, filter->data)); - } - } else { - CK(mbfl_filt_conv_illegal_output(c, filter)); - } - return 0; -} - -static int mbfl_filt_conv_euctw_wchar_flush(mbfl_convert_filter *filter) -{ - if (filter->status) { - /* 2-byte or 4-byte character was truncated */ - filter->status = 0; - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - - if (filter->flush_function) { - (*filter->flush_function)(filter->data); - } - - return 0; -} - -static size_t mb_euctw_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) -{ - unsigned char *p = *in, *e = p + *in_len; - uint32_t *out = buf, *limit = buf + bufsize; - - while (p < e && out < limit) { - unsigned char c = *p++; - - if (c < 0x80) { - *out++ = c; - } else if (((c >= 0xA1 && c <= 0xA6) || (c >= 0xC2 && c <= 0xFD)) && c != 0xC3 && p < e) { - unsigned char c2 = *p++; - - if (c2 >= 0xA1 && c2 <= 0xFE) { - unsigned int w = (c - 0xA1)*94 + (c2 - 0xA1); - if (w < cns11643_1_ucs_table_size) { - w = cns11643_1_ucs_table[w]; - } else { - w = 0; - } - if (!w) - w = MBFL_BAD_INPUT; - *out++ = w; - } else { - *out++ = MBFL_BAD_INPUT; - } - } else if (c == 0x8E && p < e) { - unsigned char c2 = *p++; - - if ((c2 == 0xA1 || c2 == 0xA2 || c2 == 0xAE) && p < e) { - unsigned int plane = c2 - 0xA1; /* This is actually the CNS-11643 plane minus one */ - unsigned char c3 = *p++; - - if (c3 >= 0xA1 && ((plane == 0 && ((c3 >= 0xA1 && c3 <= 0xA6) || (c3 >= 0xC2 && c3 <= 0xFD)) && c3 != 0xC3) || (plane == 1 && c3 <= 0xF2) || (plane == 13 && c3 <= 0xE7)) && p < e) { - unsigned char c4 = *p++; - - if (c2 <= 0xAE && c4 > 0xA0 && c4 < 0xFF) { - unsigned int s = (c3 - 0xA1)*94 + c4 - 0xA1, w = 0; - - /* A later version of CNS-11643 moved all the characters in "plane 14" to "plane 3", - * and added tens of thousands more characters in planes 4, 5, 6, and 7 - * We only support the older version of CNS-11643 - * This is the same as iconv from glibc 2.2 */ - if (plane == 0 && s < cns11643_1_ucs_table_size) { - w = cns11643_1_ucs_table[s]; - } else if (plane == 1 && s < cns11643_2_ucs_table_size) { - w = cns11643_2_ucs_table[s]; - } else if (plane == 13 && s < cns11643_14_ucs_table_size) { - w = cns11643_14_ucs_table[s]; - } - - if (!w) - w = MBFL_BAD_INPUT; - *out++ = w; - continue; - } - } - } - - *out++ = MBFL_BAD_INPUT; - } else { - *out++ = MBFL_BAD_INPUT; - } - } - - *in_len = e - p; - *in = p; - return out - buf; -} - -static void mb_wchar_to_euctw(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) -{ - unsigned char *out, *limit; - MB_CONVERT_BUF_LOAD(buf, out, limit); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2); - - while (len--) { - uint32_t w = *in++; - unsigned int s = 0; - - if (w >= ucs_a1_cns11643_table_min && w < ucs_a1_cns11643_table_max) { - s = ucs_a1_cns11643_table[w - ucs_a1_cns11643_table_min]; - } else if (w >= ucs_a2_cns11643_table_min && w < ucs_a2_cns11643_table_max) { - s = ucs_a2_cns11643_table[w - ucs_a2_cns11643_table_min]; - } else if (w >= ucs_a3_cns11643_table_min && w < ucs_a3_cns11643_table_max) { - s = ucs_a3_cns11643_table[w - ucs_a3_cns11643_table_min]; - } else if (w >= ucs_i_cns11643_table_min && w < ucs_i_cns11643_table_max) { - s = ucs_i_cns11643_table[w - ucs_i_cns11643_table_min]; - } else if (w >= ucs_r_cns11643_table_min && w < ucs_r_cns11643_table_max) { - s = ucs_r_cns11643_table[w - ucs_r_cns11643_table_min]; - } - - if (!s) { - if (w == 0) { - out = mb_convert_buf_add(out, 0); - } else { - MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_euctw); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2); - } - } else { - unsigned int plane = s >> 16; - if (plane <= 1) { - if (s < 0x80) { - out = mb_convert_buf_add(out, s); - } else { - out = mb_convert_buf_add2(out, ((s >> 8) & 0xFF) | 0x80, (s & 0xFF) | 0x80); - } - } else { - MB_CONVERT_BUF_ENSURE(buf, out, limit, (len * 2) + 4); - out = mb_convert_buf_add4(out, 0x8E, 0xA0 + plane, ((s >> 8) & 0xFF) | 0x80, (s & 0xFF) | 0x80); - } - } - } - - MB_CONVERT_BUF_STORE(buf, out, limit); -} diff --git a/ext/mbstring/libmbfl/filters/mbfilter_euc_tw.h b/ext/mbstring/libmbfl/filters/mbfilter_euc_tw.h deleted file mode 100644 index 9c2ffa480210f..0000000000000 --- a/ext/mbstring/libmbfl/filters/mbfilter_euc_tw.h +++ /dev/null @@ -1,42 +0,0 @@ -/* - * "streamable kanji code filter and converter" - * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. - * - * LICENSE NOTICES - * - * This file is part of "streamable kanji code filter and converter", - * which is distributed under the terms of GNU Lesser General Public - * License (version 2) as published by the Free Software Foundation. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with "streamable kanji code filter and converter"; - * if not, write to the Free Software Foundation, Inc., 59 Temple Place, - * Suite 330, Boston, MA 02111-1307 USA - * - * The author of this file: Rui Hirokawa - * - */ -/* - * The source code included in this files was separated from mbfilter_tw.h - * by moriyoshi koizumi on 4 dec 2002. - * - */ - -#ifndef MBFL_MBFILTER_EUC_TW_H -#define MBFL_MBFILTER_EUC_TW_H - -#include "mbfilter.h" - -extern const mbfl_encoding mbfl_encoding_euc_tw; -extern const struct mbfl_convert_vtbl vtbl_euctw_wchar; -extern const struct mbfl_convert_vtbl vtbl_wchar_euctw; - -int mbfl_filt_conv_euctw_wchar(int c, mbfl_convert_filter *filter); -int mbfl_filt_conv_wchar_euctw(int c, mbfl_convert_filter *filter); - -#endif /* MBFL_MBFILTER_EUC_TW_H */ diff --git a/ext/mbstring/libmbfl/filters/mbfilter_gb18030.c b/ext/mbstring/libmbfl/filters/mbfilter_gb18030.c deleted file mode 100644 index 6485e735ed4ba..0000000000000 --- a/ext/mbstring/libmbfl/filters/mbfilter_gb18030.c +++ /dev/null @@ -1,644 +0,0 @@ -/* - * "streamable kanji code filter and converter" - * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. - * - * LICENSE NOTICES - * - * This file is part of "streamable kanji code filter and converter", - * which is distributed under the terms of GNU Lesser General Public - * License (version 2) as published by the Free Software Foundation. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with "streamable kanji code filter and converter"; - * if not, write to the Free Software Foundation, Inc., 59 Temple Place, - * Suite 330, Boston, MA 02111-1307 USA - * - * The author of this file: - * - */ -/* - * the source code included in this file was separated from mbfilter_cp936.c - * by rui hirokawa on 11 Aug 2011. - * - */ - -#include "mbfilter.h" -#include "mbfilter_gb18030.h" - -#include "unicode_table_cp936.h" -#include "unicode_table_gb18030.h" - -static int mbfl_filt_conv_gb18030_wchar_flush(mbfl_convert_filter *filter); -static size_t mb_gb18030_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state); -static void mb_wchar_to_gb18030(uint32_t *in, size_t len, mb_convert_buf *buf, bool end); - -static const char *mbfl_encoding_gb18030_aliases[] = {"gb-18030", "gb-18030-2000", NULL}; - -const mbfl_encoding mbfl_encoding_gb18030 = { - mbfl_no_encoding_gb18030, - "GB18030", - "GB18030", - mbfl_encoding_gb18030_aliases, - NULL, - MBFL_ENCTYPE_GL_UNSAFE, - &vtbl_gb18030_wchar, - &vtbl_wchar_gb18030, - mb_gb18030_to_wchar, - mb_wchar_to_gb18030, - NULL -}; - -const struct mbfl_convert_vtbl vtbl_gb18030_wchar = { - mbfl_no_encoding_gb18030, - mbfl_no_encoding_wchar, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_gb18030_wchar, - mbfl_filt_conv_gb18030_wchar_flush, - NULL, -}; - -const struct mbfl_convert_vtbl vtbl_wchar_gb18030 = { - mbfl_no_encoding_wchar, - mbfl_no_encoding_gb18030, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_wchar_gb18030, - mbfl_filt_conv_common_flush, - NULL, -}; - -#define CK(statement) do { if ((statement) < 0) return (-1); } while (0) - -/* `tbl` contains inclusive ranges, each represented by a pair of unsigned shorts */ -int mbfl_bisec_srch(int w, const unsigned short *tbl, int n) -{ - int l = 0, r = n-1; - while (l <= r) { - int probe = (l + r) >> 1; - unsigned short lo = tbl[2 * probe], hi = tbl[(2 * probe) + 1]; - if (w < lo) { - r = probe - 1; - } else if (w > hi) { - l = probe + 1; - } else { - return probe; - } - } - return -1; -} - -/* `tbl` contains single values, not ranges */ -int mbfl_bisec_srch2(int w, const unsigned short tbl[], int n) -{ - int l = 0, r = n-1; - while (l <= r) { - int probe = (l + r) >> 1; - unsigned short val = tbl[probe]; - if (w < val) { - r = probe - 1; - } else if (w > val) { - l = probe + 1; - } else { - return probe; - } - } - return -1; -} - -int mbfl_filt_conv_gb18030_wchar(int c, mbfl_convert_filter *filter) -{ - int k; - int c1, c2, c3, w = -1; - - switch (filter->status) { - case 0: - if (c >= 0 && c < 0x80) { /* latin */ - CK((*filter->output_function)(c, filter->data)); - } else if (c > 0x80 && c < 0xff) { /* dbcs/qbcs lead byte */ - filter->status = 1; - filter->cache = c; - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - case 1: /* dbcs/qbcs second byte */ - c1 = filter->cache; - filter->status = 0; - - if (c1 >= 0x81 && c1 <= 0x84 && c >= 0x30 && c <= 0x39) { - /* 4 byte range: Unicode BMP */ - filter->status = 2; - filter->cache = (c1 << 8) | c; - return 0; - } else if (c1 >= 0x90 && c1 <= 0xe3 && c >= 0x30 && c <= 0x39) { - /* 4 byte range: Unicode 16 planes */ - filter->status = 2; - filter->cache = (c1 << 8) | c; - return 0; - } else if (((c1 >= 0xaa && c1 <= 0xaf) || (c1 >= 0xf8 && c1 <= 0xfe)) && (c >= 0xa1 && c <= 0xfe)) { - /* UDA part 1,2: U+E000-U+E4C5 */ - w = 94*(c1 >= 0xf8 ? c1 - 0xf2 : c1 - 0xaa) + (c - 0xa1) + 0xe000; - CK((*filter->output_function)(w, filter->data)); - } else if (c1 >= 0xa1 && c1 <= 0xa7 && c >= 0x40 && c < 0xa1 && c != 0x7f) { - /* UDA part3 : U+E4C6-U+E765*/ - w = 96*(c1 - 0xa1) + c - (c >= 0x80 ? 0x41 : 0x40) + 0xe4c6; - CK((*filter->output_function)(w, filter->data)); - } - - c2 = (c1 << 8) | c; - - if (w <= 0 && - ((c2 >= 0xa2ab && c2 <= 0xa9f0 + (0xe80f-0xe801)) || - (c2 >= 0xd7fa && c2 <= 0xd7fa + (0xe814-0xe810)) || - (c2 >= 0xfe50 && c2 <= 0xfe80 + (0xe864-0xe844)))) { - for (k = 0; k < mbfl_gb18030_pua_tbl_max; k++) { - if (c2 >= mbfl_gb18030_pua_tbl[k][2] && c2 <= mbfl_gb18030_pua_tbl[k][2] + mbfl_gb18030_pua_tbl[k][1] - mbfl_gb18030_pua_tbl[k][0]) { - w = c2 - mbfl_gb18030_pua_tbl[k][2] + mbfl_gb18030_pua_tbl[k][0]; - CK((*filter->output_function)(w, filter->data)); - break; - } - } - } - - if (w <= 0) { - if ((c1 >= 0xa1 && c1 <= 0xa9 && c >= 0xa1 && c <= 0xfe) || - (c1 >= 0xb0 && c1 <= 0xf7 && c >= 0xa1 && c <= 0xfe) || - (c1 >= 0x81 && c1 <= 0xa0 && c >= 0x40 && c <= 0xfe && c != 0x7f) || - (c1 >= 0xaa && c1 <= 0xfe && c >= 0x40 && c <= 0xa0 && c != 0x7f) || - (c1 >= 0xa8 && c1 <= 0xa9 && c >= 0x40 && c <= 0xa0 && c != 0x7f)) { - w = (c1 - 0x81)*192 + c - 0x40; - ZEND_ASSERT(w < cp936_ucs_table_size); - CK((*filter->output_function)(cp936_ucs_table[w], filter->data)); - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - } - break; - - case 2: /* qbcs third byte */ - c1 = (filter->cache >> 8) & 0xff; - c2 = filter->cache & 0xff; - filter->status = filter->cache = 0; - if (((c1 >= 0x81 && c1 <= 0x84) || (c1 >= 0x90 && c1 <= 0xe3)) && c2 >= 0x30 && c2 <= 0x39 && c >= 0x81 && c <= 0xfe) { - filter->cache = (c1 << 16) | (c2 << 8) | c; - filter->status = 3; - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - case 3: /* qbcs fourth byte */ - c1 = (filter->cache >> 16) & 0xff; - c2 = (filter->cache >> 8) & 0xff; - c3 = filter->cache & 0xff; - filter->status = filter->cache = 0; - if (((c1 >= 0x81 && c1 <= 0x84) || (c1 >= 0x90 && c1 <= 0xe3)) && c2 >= 0x30 && c2 <= 0x39 && c3 >= 0x81 && c3 <= 0xfe && c >= 0x30 && c <= 0x39) { - if (c1 >= 0x90 && c1 <= 0xe3) { - w = ((((c1 - 0x90)*10 + (c2 - 0x30))*126 + (c3 - 0x81)))*10 + (c - 0x30) + 0x10000; - if (w > 0x10FFFF) { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - return 0; - } - } else { /* Unicode BMP */ - w = (((c1 - 0x81)*10 + (c2 - 0x30))*126 + (c3 - 0x81))*10 + (c - 0x30); - if (w >= 0 && w <= 39419) { - k = mbfl_bisec_srch(w, mbfl_gb2uni_tbl, mbfl_gb_uni_max); - w += mbfl_gb_uni_ofst[k]; - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - return 0; - } - } - CK((*filter->output_function)(w, filter->data)); - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - EMPTY_SWITCH_DEFAULT_CASE(); - } - - return 0; -} - -static int mbfl_filt_conv_gb18030_wchar_flush(mbfl_convert_filter *filter) -{ - if (filter->status) { - /* multi-byte character was truncated */ - filter->status = 0; - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - - if (filter->flush_function) { - (*filter->flush_function)(filter->data); - } - - return 0; -} - -int mbfl_filt_conv_wchar_gb18030(int c, mbfl_convert_filter *filter) -{ - int k, k1, k2; - int c1, s = 0, s1 = 0; - - if (c >= ucs_a1_cp936_table_min && c < ucs_a1_cp936_table_max) { - if (c == 0x01f9) { - s = 0xa8bf; - } else { - s = ucs_a1_cp936_table[c - ucs_a1_cp936_table_min]; - } - } else if (c >= ucs_a2_cp936_table_min && c < ucs_a2_cp936_table_max) { - if (c == 0x20ac) { /* euro-sign */ - s = 0xa2e3; - } else { - s = ucs_a2_cp936_table[c - ucs_a2_cp936_table_min]; - } - } else if (c >= ucs_a3_cp936_table_min && c < ucs_a3_cp936_table_max) { - s = ucs_a3_cp936_table[c - ucs_a3_cp936_table_min]; - } else if (c >= ucs_i_cp936_table_min && c < ucs_i_cp936_table_max) { - s = ucs_i_cp936_table[c - ucs_i_cp936_table_min]; - } else if (c >= ucs_ci_cp936_table_min && c < ucs_ci_cp936_table_max) { - /* U+F900-FA2F CJK Compatibility Ideographs */ - if (c == 0xf92c) { - s = 0xfd9c; - } else if (c == 0xf979) { - s = 0xfd9d; - } else if (c == 0xf995) { - s = 0xfd9e; - } else if (c == 0xf9e7) { - s = 0xfd9f; - } else if (c == 0xf9f1) { - s = 0xfda0; - } else if (c >= 0xfa0c && c <= 0xfa29) { - s = ucs_ci_s_cp936_table[c - 0xfa0c]; - } - } else if (c >= ucs_cf_cp936_table_min && c < ucs_cf_cp936_table_max) { - /* FE30h CJK Compatibility Forms */ - s = ucs_cf_cp936_table[c - ucs_cf_cp936_table_min]; - } else if (c >= ucs_sfv_cp936_table_min && c < ucs_sfv_cp936_table_max) { - /* U+FE50-FE6F Small Form Variants */ - s = ucs_sfv_cp936_table[c - ucs_sfv_cp936_table_min]; - } else if (c >= ucs_hff_cp936_table_min && c < ucs_hff_cp936_table_max) { - /* U+FF00-FFFF HW/FW Forms */ - if (c == 0xff04) { - s = 0xa1e7; - } else if (c == 0xff5e) { - s = 0xa1ab; - } else if (c >= 0xff01 && c <= 0xff5d) { - s = c - 0xff01 + 0xa3a1; - } else if (c >= 0xffe0 && c <= 0xffe5) { - s = ucs_hff_s_cp936_table[c-0xffe0]; - } - } - - /* While GB18030 and CP936 are very similar, some mappings are different between these encodings; - * do a binary search in a table of differing codepoints to see if we have one */ - if (s <= 0 && c >= mbfl_gb18030_c_tbl_key[0] && c <= mbfl_gb18030_c_tbl_key[mbfl_gb18030_c_tbl_max-1]) { - k1 = mbfl_bisec_srch2(c, mbfl_gb18030_c_tbl_key, mbfl_gb18030_c_tbl_max); - if (k1 >= 0) { - s = mbfl_gb18030_c_tbl_val[k1]; - } - } - - if (c >= 0xe000 && c <= 0xe864) { /* PUA */ - if (c < 0xe766) { - if (c < 0xe4c6) { - c1 = c - 0xe000; - s = (c1 % 94) + 0xa1; - c1 /= 94; - s |= (c1 < 0x06 ? c1 + 0xaa : c1 + 0xf2) << 8; - } else { - c1 = c - 0xe4c6; - s = ((c1 / 96) + 0xa1) << 8; - c1 %= 96; - s |= c1 + (c1 >= 0x3f ? 0x41 : 0x40); - } - } else { - /* U+E766..U+E864 */ - k1 = 0; - k2 = mbfl_gb18030_pua_tbl_max; - while (k1 < k2) { - k = (k1 + k2) >> 1; - if (c < mbfl_gb18030_pua_tbl[k][0]) { - k2 = k; - } else if (c > mbfl_gb18030_pua_tbl[k][1]) { - k1 = k + 1; - } else { - s = c - mbfl_gb18030_pua_tbl[k][0] + mbfl_gb18030_pua_tbl[k][2]; - break; - } - } - } - } - - /* If we have not yet found a suitable mapping for this codepoint, it requires a 4-byte code */ - if (s <= 0 && c >= 0x0080 && c <= 0xffff) { - /* BMP */ - s = mbfl_bisec_srch(c, mbfl_uni2gb_tbl, mbfl_gb_uni_max); - if (s >= 0) { - c1 = c - mbfl_gb_uni_ofst[s]; - s = (c1 % 10) + 0x30; - c1 /= 10; - s |= ((c1 % 126) + 0x81) << 8; - c1 /= 126; - s |= ((c1 % 10) + 0x30) << 16; - c1 /= 10; - s1 = c1 + 0x81; - } - } else if (c >= 0x10000 && c <= 0x10ffff) { - /* Code set 3: Unicode U+10000..U+10FFFF */ - c1 = c - 0x10000; - s = (c1 % 10) + 0x30; - c1 /= 10; - s |= ((c1 % 126) + 0x81) << 8; - c1 /= 126; - s |= ((c1 % 10) + 0x30) << 16; - c1 /= 10; - s1 = c1 + 0x90; - } - - if (c == 0) { - s = 0; - } else if (s == 0) { - s = -1; - } - - if (s >= 0) { - if (s <= 0x80) { /* latin */ - CK((*filter->output_function)(s, filter->data)); - } else if (s1 > 0) { /* qbcs */ - CK((*filter->output_function)(s1 & 0xff, filter->data)); - CK((*filter->output_function)((s >> 16) & 0xff, filter->data)); - CK((*filter->output_function)((s >> 8) & 0xff, filter->data)); - CK((*filter->output_function)(s & 0xff, filter->data)); - } else { /* dbcs */ - CK((*filter->output_function)((s >> 8) & 0xff, filter->data)); - CK((*filter->output_function)(s & 0xff, filter->data)); - } - } else { - CK(mbfl_filt_conv_illegal_output(c, filter)); - } - - return 0; -} - -static const unsigned short gb18030_pua_tbl3[] = { -/* 0xFE50 */ -0x0000,0xE816,0xE817,0xE818,0x0000,0x0000,0x0000,0x0000, -0x0000,0xE81E,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000, -0x0000,0xE826,0x0000,0x0000,0x0000,0x0000,0xE82B,0xE82C, -0x0000,0x0000,0x0000,0x0000,0xE831,0xE832,0x0000,0x0000, -0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0xE83B,0x0000, -0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0xE843,0x0000, -0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000, -0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000, -0xE854,0xE855,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000, -0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000, -/* 0xFEA0 */ -0xE864 -}; - -static size_t mb_gb18030_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) -{ - unsigned char *p = *in, *e = p + *in_len; - uint32_t *out = buf, *limit = buf + bufsize; - - while (p < e && out < limit) { - unsigned char c = *p++; - - if (c < 0x80) { - *out++ = c; - } else if (c == 0x80 || c == 0xFF) { - *out++ = MBFL_BAD_INPUT; - } else { - if (p == e) { - *out++ = MBFL_BAD_INPUT; - break; - } - unsigned char c2 = *p++; - - if (((c >= 0x81 && c <= 0x84) || (c >= 0x90 && c <= 0xE3)) && c2 >= 0x30 && c2 <= 0x39) { - if (p >= e) { - *out++ = MBFL_BAD_INPUT; - break; - } - unsigned char c3 = *p++; - - if (c3 >= 0x81 && c3 <= 0xFE && p < e) { - unsigned char c4 = *p++; - - if (c4 >= 0x30 && c4 <= 0x39) { - if (c >= 0x90 && c <= 0xE3) { - unsigned int w = ((((c - 0x90)*10 + (c2 - 0x30))*126 + (c3 - 0x81)))*10 + (c4 - 0x30) + 0x10000; - *out++ = (w > 0x10FFFF) ? MBFL_BAD_INPUT : w; - } else { - /* Unicode BMP */ - unsigned int w = (((c - 0x81)*10 + (c2 - 0x30))*126 + (c3 - 0x81))*10 + (c4 - 0x30); - if (w <= 39419) { - *out++ = w + mbfl_gb_uni_ofst[mbfl_bisec_srch(w, mbfl_gb2uni_tbl, mbfl_gb_uni_max)]; - } else { - *out++ = MBFL_BAD_INPUT; - } - } - } else { - *out++ = MBFL_BAD_INPUT; - } - } else { - *out++ = MBFL_BAD_INPUT; - } - } else if (((c >= 0xAA && c <= 0xAF) || (c >= 0xF8 && c <= 0xFE)) && (c2 >= 0xA1 && c2 <= 0xFE)) { - /* UDA part 1, 2: U+E000-U+E4C5 */ - *out++ = 94*(c >= 0xF8 ? c - 0xF2 : c - 0xAA) + (c2 - 0xA1) + 0xE000; - } else if (c >= 0xA1 && c <= 0xA7 && c2 >= 0x40 && c2 < 0xA1 && c2 != 0x7F) { - /* UDA part 3: U+E4C6-U+E765 */ - *out++ = 96*(c - 0xA1) + c2 - (c2 >= 0x80 ? 0x41 : 0x40) + 0xE4C6; - } else if (c2 >= 0x40 && c2 != 0x7F && c2 != 0xFF) { - unsigned int w = (c - 0x81)*192 + c2 - 0x40; - - if (w >= 0x192B) { - if (w <= 0x1EBE) { - if (w != 0x1963 && w != 0x1DBF && (w < 0x1E49 || w > 0x1E55) && w != 0x1E7F) { - *out++ = cp936_pua_tbl1[w - 0x192B]; - continue; - } - } else if (w >= 0x413A) { - if (w <= 0x413E) { - *out++ = cp936_pua_tbl2[w - 0x413A]; - continue; - } else if (w >= 0x5DD0 && w <= 0x5E20) { - unsigned int c = gb18030_pua_tbl3[w - 0x5DD0]; - if (c) { - *out++ = c; - continue; - } - } - } - } - - if ((c >= 0x81 && c <= 0xA9) || (c >= 0xB0 && c <= 0xF7 && c2 >= 0xA1) || (c >= 0xAA && c <= 0xFE && c2 <= 0xA0)) { - ZEND_ASSERT(w < cp936_ucs_table_size); - *out++ = cp936_ucs_table[w]; - } else { - *out++ = MBFL_BAD_INPUT; - } - } else { - *out++ = MBFL_BAD_INPUT; - } - } - } - - *in_len = e - p; - *in = p; - return out - buf; -} - -static void mb_wchar_to_gb18030(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) -{ - unsigned char *out, *limit; - MB_CONVERT_BUF_LOAD(buf, out, limit); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len); - - while (len--) { - uint32_t w = *in++; - unsigned int s = 0; - - if (w == 0) { - out = mb_convert_buf_add(out, 0); - continue; - } else if (w >= ucs_a1_cp936_table_min && w < ucs_a1_cp936_table_max) { - if (w == 0x1F9) { - s = 0xA8Bf; - } else { - s = ucs_a1_cp936_table[w - ucs_a1_cp936_table_min]; - } - } else if (w >= ucs_a2_cp936_table_min && w < ucs_a2_cp936_table_max) { - if (w == 0x20AC) { /* Euro sign */ - s = 0xA2E3; - } else { - s = ucs_a2_cp936_table[w - ucs_a2_cp936_table_min]; - } - } else if (w >= ucs_a3_cp936_table_min && w < ucs_a3_cp936_table_max) { - s = ucs_a3_cp936_table[w - ucs_a3_cp936_table_min]; - } else if (w >= ucs_i_cp936_table_min && w < ucs_i_cp936_table_max) { - s = ucs_i_cp936_table[w - ucs_i_cp936_table_min]; - } else if (w >= ucs_ci_cp936_table_min && w < ucs_ci_cp936_table_max) { - /* U+F900-U+FA2F CJK Compatibility Ideographs */ - if (w == 0xF92C) { - s = 0xFD9C; - } else if (w == 0xF979) { - s = 0xFD9D; - } else if (w == 0xF995) { - s = 0xFD9E; - } else if (w == 0xF9E7) { - s = 0xFD9F; - } else if (w == 0xF9F1) { - s = 0xFDA0; - } else if (w >= 0xFA0C && w <= 0xFA29) { - s = ucs_ci_s_cp936_table[w - 0xFA0C]; - } - } else if (w >= ucs_cf_cp936_table_min && w < ucs_cf_cp936_table_max) { - /* CJK Compatibility Forms */ - s = ucs_cf_cp936_table[w - ucs_cf_cp936_table_min]; - } else if (w >= ucs_sfv_cp936_table_min && w < ucs_sfv_cp936_table_max) { - /* U+FE50-U+FE6F Small Form Variants */ - s = ucs_sfv_cp936_table[w - ucs_sfv_cp936_table_min]; - } else if (w >= ucs_hff_cp936_table_min && w < ucs_hff_cp936_table_max) { - /* U+FF00-U+FFFF HW/FW Forms */ - if (w == 0xFF04) { - s = 0xA1E7; - } else if (w == 0xFF5E) { - s = 0xA1AB; - } else if (w >= 0xFF01 && w <= 0xFF5D) { - s = w - 0xFF01 + 0xA3A1; - } else if (w >= 0xFFE0 && w <= 0xFFE5) { - s = ucs_hff_s_cp936_table[w - 0xFFE0]; - } - } else if (w >= 0xE000 && w <= 0xE864) { - /* PUA */ - if (w < 0xE766) { - if (w < 0xE4C6) { - unsigned int c1 = w - 0xE000; - s = (c1 % 94) + 0xA1; - c1 /= 94; - s |= (c1 + (c1 < 0x06 ? 0xAA : 0xF2)) << 8; - } else { - unsigned int c1 = w - 0xE4C6; - s = ((c1 / 96) + 0xA1) << 8; - c1 %= 96; - s |= c1 + (c1 >= 0x3F ? 0x41 : 0x40); - } - } else { - /* U+E766-U+E864 */ - unsigned int k1 = 0, k2 = mbfl_gb18030_pua_tbl_max; - while (k1 < k2) { - unsigned int k = (k1 + k2) >> 1; - if (w < mbfl_gb18030_pua_tbl[k][0]) { - k2 = k; - } else if (w > mbfl_gb18030_pua_tbl[k][1]) { - k1 = k + 1; - } else { - s = w - mbfl_gb18030_pua_tbl[k][0] + mbfl_gb18030_pua_tbl[k][2]; - break; - } - } - } - } - - /* While GB18030 and CP936 are very similar, some mappings are different between these encodings; - * do a binary search in a table of differing codepoints to see if we have one */ - if (!s && w >= mbfl_gb18030_c_tbl_key[0] && w <= mbfl_gb18030_c_tbl_key[mbfl_gb18030_c_tbl_max-1]) { - int i = mbfl_bisec_srch2(w, mbfl_gb18030_c_tbl_key, mbfl_gb18030_c_tbl_max); - if (i >= 0) { - s = mbfl_gb18030_c_tbl_val[i]; - } - } - - /* If we have not yet found a suitable mapping for this codepoint, it requires a 4-byte code */ - if (!s && w >= 0x80 && w <= 0xFFFF) { - /* BMP */ - int i = mbfl_bisec_srch(w, mbfl_uni2gb_tbl, mbfl_gb_uni_max); - if (i >= 0) { - unsigned int c1 = w - mbfl_gb_uni_ofst[i]; - s = (c1 % 10) + 0x30; - c1 /= 10; - s |= ((c1 % 126) + 0x81) << 8; - c1 /= 126; - s |= ((c1 % 10) + 0x30) << 16; - c1 /= 10; - s |= (c1 + 0x81) << 24; - } - } else if (w >= 0x10000 && w <= 0x10FFFF) { - /* Code set 3: Unicode U+10000-U+10FFFF */ - unsigned int c1 = w - 0x10000; - s = (c1 % 10) + 0x30; - c1 /= 10; - s |= ((c1 % 126) + 0x81) << 8; - c1 /= 126; - s |= ((c1 % 10) + 0x30) << 16; - c1 /= 10; - s |= (c1 + 0x90) << 24; - } - - if (!s) { - MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_gb18030); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len); - } else if (s < 0x80) { - out = mb_convert_buf_add(out, s); - } else if (s > 0xFFFFFF) { - MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 4); - out = mb_convert_buf_add4(out, (s >> 24) & 0xFF, (s >> 16) & 0xFF, (s >> 8) & 0xFF, s & 0xFF); - } else { - MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2); - out = mb_convert_buf_add2(out, (s >> 8) & 0xFF, s & 0xFF); - } - } - - MB_CONVERT_BUF_STORE(buf, out, limit); -} diff --git a/ext/mbstring/libmbfl/filters/mbfilter_gb18030.h b/ext/mbstring/libmbfl/filters/mbfilter_gb18030.h deleted file mode 100644 index e7f0eae16bf6e..0000000000000 --- a/ext/mbstring/libmbfl/filters/mbfilter_gb18030.h +++ /dev/null @@ -1,42 +0,0 @@ -/* - * "streamable kanji code filter and converter" - * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. - * - * LICENSE NOTICES - * - * This file is part of "streamable kanji code filter and converter", - * which is distributed under the terms of GNU Lesser General Public - * License (version 2) as published by the Free Software Foundation. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with "streamable kanji code filter and converter"; - * if not, write to the Free Software Foundation, Inc., 59 Temple Place, - * Suite 330, Boston, MA 02111-1307 USA - * - * The author of this file: - * - */ -/* - * the source code included in this files was separated from mbfilter_cn.h - * by moriyoshi koizumi on 4 dec 2002. - * - */ - -#ifndef MBFL_MBFILTER_GB18030_H -#define MBFL_MBFILTER_GB18030_H - -#include "mbfilter.h" - -extern const mbfl_encoding mbfl_encoding_gb18030; -extern const struct mbfl_convert_vtbl vtbl_gb18030_wchar; -extern const struct mbfl_convert_vtbl vtbl_wchar_gb18030; - -int mbfl_filt_conv_gb18030_wchar(int c, mbfl_convert_filter *filter); -int mbfl_filt_conv_wchar_gb18030(int c, mbfl_convert_filter *filter); - -#endif /* MBFL_MBFILTER_GB18030_H */ diff --git a/ext/mbstring/libmbfl/filters/mbfilter_hz.c b/ext/mbstring/libmbfl/filters/mbfilter_hz.c deleted file mode 100644 index b047bfc8b7b27..0000000000000 --- a/ext/mbstring/libmbfl/filters/mbfilter_hz.c +++ /dev/null @@ -1,409 +0,0 @@ -/* - * "streamable kanji code filter and converter" - * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. - * - * LICENSE NOTICES - * - * This file is part of "streamable kanji code filter and converter", - * which is distributed under the terms of GNU Lesser General Public - * License (version 2) as published by the Free Software Foundation. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with "streamable kanji code filter and converter"; - * if not, write to the Free Software Foundation, Inc., 59 Temple Place, - * Suite 330, Boston, MA 02111-1307 USA - * - * The author of this file: - * - */ -/* - * The source code included in this files was separated from mbfilter_cn.c - * by moriyoshi koizumi on 4 dec 2002. - * - */ - -#include "mbfilter.h" -#include "mbfilter_hz.h" - -#include "unicode_table_cp936.h" -#include "unicode_table_gb2312.h" - -static int mbfl_filt_conv_hz_wchar_flush(mbfl_convert_filter *filter); -static size_t mb_hz_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state); -static void mb_wchar_to_hz(uint32_t *in, size_t len, mb_convert_buf *buf, bool end); - -const mbfl_encoding mbfl_encoding_hz = { - mbfl_no_encoding_hz, - "HZ", - "HZ-GB-2312", - NULL, - NULL, - MBFL_ENCTYPE_GL_UNSAFE, - &vtbl_hz_wchar, - &vtbl_wchar_hz, - mb_hz_to_wchar, - mb_wchar_to_hz, - NULL -}; - -const struct mbfl_convert_vtbl vtbl_hz_wchar = { - mbfl_no_encoding_hz, - mbfl_no_encoding_wchar, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_hz_wchar, - mbfl_filt_conv_hz_wchar_flush, - NULL, -}; - -const struct mbfl_convert_vtbl vtbl_wchar_hz = { - mbfl_no_encoding_wchar, - mbfl_no_encoding_hz, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_wchar_hz, - mbfl_filt_conv_any_hz_flush, - NULL, -}; - -#define CK(statement) do { if ((statement) < 0) return (-1); } while (0) - -int mbfl_filt_conv_hz_wchar(int c, mbfl_convert_filter *filter) -{ - int c1, s, w; - - switch (filter->status & 0xf) { - /* case 0x00: ASCII */ - /* case 0x10: GB2312 */ - case 0: - if (c == '~') { - filter->status += 2; - } else if (filter->status == 0x10 && ((c > 0x20 && c <= 0x29) || (c >= 0x30 && c <= 0x77))) { - /* DBCS first char */ - filter->cache = c; - filter->status += 1; - } else if (filter->status == 0 && c >= 0 && c < 0x80) { /* latin, CTLs */ - CK((*filter->output_function)(c, filter->data)); - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - /* case 0x11: GB2312 second char */ - case 1: - filter->status &= ~0xf; - c1 = filter->cache; - if (c1 > 0x20 && c1 < 0x7F && c > 0x20 && c < 0x7F) { - s = (c1 - 1)*192 + c + 0x40; /* GB2312 */ - ZEND_ASSERT(s < cp936_ucs_table_size); - if (s == 0x1864) { - w = 0x30FB; - } else if (s == 0x186A) { - w = 0x2015; - } else if (s == 0x186C) { - w = 0x2225; - } else if ((s >= 0x1920 && s <= 0x192A) || s == 0x1963 || (s >= 0x1C60 && s <= 0x1C7F) || (s >= 0x1DBB && s <= 0x1DC4)) { - w = 0; - } else { - w = cp936_ucs_table[s]; - } - - if (w <= 0) { - w = MBFL_BAD_INPUT; - } - - CK((*filter->output_function)(w, filter->data)); - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - /* '~' */ - case 2: - if (c == '}' && filter->status == 0x12) { - filter->status = 0; - } else if (c == '{' && filter->status == 2) { - filter->status = 0x10; - } else if (c == '~' && filter->status == 2) { - CK((*filter->output_function)('~', filter->data)); - filter->status -= 2; - } else if (c == '\n') { - /* "~\n" is a line continuation; no output is needed, nor should we shift modes */ - filter->status -= 2; - } else { - /* Invalid character after ~ */ - filter->status -= 2; - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - EMPTY_SWITCH_DEFAULT_CASE(); - } - - return 0; -} - -static int mbfl_filt_conv_hz_wchar_flush(mbfl_convert_filter *filter) -{ - if (filter->status == 0x11) { - /* 2-byte character was truncated */ - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - - filter->status = 0; - - if (filter->flush_function) { - (*filter->flush_function)(filter->data); - } - - return 0; -} - -int mbfl_filt_conv_wchar_hz(int c, mbfl_convert_filter *filter) -{ - int s = 0; - - if (c >= ucs_a1_cp936_table_min && c < ucs_a1_cp936_table_max) { - if (c == 0xB7 || c == 0x144 || c == 0x148 || c == 0x251 || c == 0x261 || c == 0x2CA || c == 0x2CB || c == 0x2D9) { - s = 0; - } else { - s = ucs_a1_cp936_table[c - ucs_a1_cp936_table_min]; - } - } else if (c >= ucs_a2_cp936_table_min && c < ucs_a2_cp936_table_max) { - if (c == 0x2015) { - s = 0xA1AA; - } else if (c == 0x2010 || c == 0x2013 || c == 0x2014 || c == 0x2016 || c == 0x2025 || c == 0x2035 || - c == 0x2105 || c == 0x2109 || c == 0x2121 || (c >= 0x2170 && c <= 0x2179) || (c >= 0x2196 && c <= 0x2199) || - c == 0x2215 || c == 0x221F || c == 0x2223 || c == 0x2252 || c == 0x2266 || c == 0x2267 || c == 0x2295 || - (c >= 0x2550 && c <= 0x2573) || c == 0x22BF || c == 0x2609 || (c >= 0x2581 && c <= 0x258F) || - (c >= 0x2593 && c <= 0x2595) || c == 0x25BC || c == 0x25BD || (c >= 0x25E2 && c <= 0x25E5)) { - s = 0; - } else { - s = ucs_a2_cp936_table[c - ucs_a2_cp936_table_min]; - } - } else if (c >= ucs_a3_cp936_table_min && c < ucs_a3_cp936_table_max) { - if (c == 0x30FB) { - s = 0xA1A4; - } else if (c == 0x3006 || c == 0x3007 || c == 0x3012 || c == 0x3231 || c == 0x32A3 || c >= 0x3300 || - (c >= 0x3018 && c <= 0x3040) || (c >= 0x309B && c <= 0x309E) || (c >= 0x30FC && c <= 0x30FE)) { - s = 0; - } else { - s = ucs_a3_cp936_table[c - ucs_a3_cp936_table_min]; - } - } else if (c >= ucs_i_gb2312_table_min && c < ucs_i_gb2312_table_max) { - s = ucs_i_gb2312_table[c - ucs_i_gb2312_table_min]; - } else if (c >= ucs_hff_cp936_table_min && c < ucs_hff_cp936_table_max) { - if (c == 0xFF04) { - s = 0xA1E7; - } else if (c == 0xFF5E) { - s = 0xA1AB; - } else if (c >= 0xFF01 && c <= 0xFF5D) { - s = c - 0xFF01 + 0xA3A1; - } else if (c == 0xFFE0 || c == 0xFFE1 || c == 0xFFE3 || c == 0xFFE5) { - s = ucs_hff_s_cp936_table[c - 0xFFE0]; - } - } - - if (s & 0x8000) { - s -= 0x8080; - } - - if (s <= 0) { - s = (c == 0) ? 0 : -1; - } else if ((s >= 0x80 && s < 0x2121) || s > 0x8080) { - s = -1; - } - - if (s >= 0) { - if (s < 0x80) { /* ASCII */ - if ((filter->status & 0xff00) != 0) { - CK((*filter->output_function)('~', filter->data)); - CK((*filter->output_function)('}', filter->data)); - } - filter->status = 0; - if (s == 0x7E) { - CK((*filter->output_function)('~', filter->data)); - } - CK((*filter->output_function)(s, filter->data)); - } else { /* GB 2312-80 */ - if ((filter->status & 0xFF00) != 0x200) { - CK((*filter->output_function)('~', filter->data)); - CK((*filter->output_function)('{', filter->data)); - } - filter->status = 0x200; - CK((*filter->output_function)((s >> 8) & 0x7F, filter->data)); - CK((*filter->output_function)(s & 0x7F, filter->data)); - } - } else { - CK(mbfl_filt_conv_illegal_output(c, filter)); - } - - return 0; -} - -int mbfl_filt_conv_any_hz_flush(mbfl_convert_filter *filter) -{ - /* back to latin */ - if (filter->status & 0xFF00) { - CK((*filter->output_function)('~', filter->data)); - CK((*filter->output_function)('}', filter->data)); - } - filter->status = 0; - return 0; -} - -#define ASCII 0 -#define GB2312 1 - -static size_t mb_hz_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) -{ - unsigned char *p = *in, *e = p + *in_len; - uint32_t *out = buf, *limit = buf + bufsize; - - while (p < e && out < limit) { - unsigned char c = *p++; - - if (c == '~') { - if (p == e) { - break; - } - unsigned char c2 = *p++; - - if (c2 == '}' && *state == GB2312) { - *state = ASCII; - } else if (c2 == '{' && *state == ASCII) { - *state = GB2312; - } else if (c2 == '~' && *state == ASCII) { - *out++ = '~'; - } else if (c2 == '\n') { - /* "~\n" is a line continuation; no output is needed, nor should we shift modes */ - } else { - /* Invalid character after ~ */ - *out++ = MBFL_BAD_INPUT; - } - } else if (((c > 0x20 && c <= 0x29) || (c >= 0x30 && c <= 0x77)) && p < e && *state == GB2312) { - unsigned char c2 = *p++; - - if (c > 0x20 && c < 0x7F && c2 > 0x20 && c2 < 0x7F) { - unsigned int s = (c - 1)*192 + c2 + 0x40; - ZEND_ASSERT(s < cp936_ucs_table_size); - - if (s == 0x1864) { - s = 0x30FB; - } else if (s == 0x186A) { - s = 0x2015; - } else if (s == 0x186C) { - s = 0x2225; - } else if ((s >= 0x1920 && s <= 0x192A) || s == 0x1963 || (s >= 0x1C60 && s <= 0x1C7F) || (s >= 0x1DBB && s <= 0x1DC4)) { - s = 0; - } else { - s = cp936_ucs_table[s]; - } - if (!s) - s = MBFL_BAD_INPUT; - *out++ = s; - } else { - *out++ = MBFL_BAD_INPUT; - } - } else if (c < 0x80 && *state == ASCII) { - *out++ = c; - } else { - *out++ = MBFL_BAD_INPUT; - } - } - - *in_len = e - p; - *in = p; - return out - buf; -} - -static void mb_wchar_to_hz(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) -{ - unsigned char *out, *limit; - MB_CONVERT_BUF_LOAD(buf, out, limit); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len); - - while (len--) { - uint32_t w = *in++; - unsigned int s = 0; - - if (w >= ucs_a1_cp936_table_min && w < ucs_a1_cp936_table_max) { - if (w == 0xB7 || w == 0x144 || w == 0x148 || w == 0x251 || w == 0x261 || w == 0x2CA || w == 0x2CB || w == 0x2D9) { - s = 0; - } else { - s = ucs_a1_cp936_table[w - ucs_a1_cp936_table_min]; - } - } else if (w >= ucs_a2_cp936_table_min && w < ucs_a2_cp936_table_max) { - if (w == 0x2015) { - s = 0xA1AA; - } else if (w == 0x2010 || w == 0x2013 || w == 0x2014 || w == 0x2016 || w == 0x2025 || w == 0x2035 || w == 0x2105 || w == 0x2109 || w == 0x2121 || (w >= 0x2170 && w <= 0x2179) || (w >= 0x2196 && w <= 0x2199) || w == 0x2215 || w == 0x221F || w == 0x2223 || w == 0x2252 || w == 0x2266 || w == 0x2267 || w == 0x2295 || (w >= 0x2550 && w <= 0x2573) || w == 0x22BF || w == 0x2609 || (w >= 0x2581 && w <= 0x258F) || (w >= 0x2593 && w <= 0x2595) || w == 0x25BC || w == 0x25BD || (w >= 0x25E2 && w <= 0x25E5)) { - s = 0; - } else { - s = ucs_a2_cp936_table[w - ucs_a2_cp936_table_min]; - } - } else if (w >= ucs_a3_cp936_table_min && w < ucs_a3_cp936_table_max) { - if (w == 0x30FB) { - s = 0xA1A4; - } else if (w == 0x3006 || w == 0x3007 || w == 0x3012 || w == 0x3231 || w == 0x32A3 || w >= 0x3300 || (w >= 0x3018 && w <= 0x3040) || (w >= 0x309B && w <= 0x309E) || (w >= 0x30FC && w <= 0x30FE)) { - s = 0; - } else { - s = ucs_a3_cp936_table[w - ucs_a3_cp936_table_min]; - } - } else if (w >= ucs_i_gb2312_table_min && w < ucs_i_gb2312_table_max) { - s = ucs_i_gb2312_table[w - ucs_i_gb2312_table_min]; - } else if (w >= ucs_hff_cp936_table_min && w < ucs_hff_cp936_table_max) { - if (w == 0xFF04) { - s = 0xA1E7; - } else if (w == 0xFF5E) { - s = 0xA1AB; - } else if (w >= 0xFF01 && w <= 0xFF5D) { - s = w - 0xFF01 + 0xA3A1; - } else if (w == 0xFFE0 || w == 0xFFE1 || w == 0xFFE3 || w == 0xFFE5) { - s = ucs_hff_s_cp936_table[w - 0xFFE0]; - } - } - - s &= ~0x8080; - - if ((!s && w) || (s >= 0x80 && s < 0x2121)) { - MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_hz); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len); - } else if (s < 0x80) { - /* ASCII */ - if (buf->state != ASCII) { - MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 3); - out = mb_convert_buf_add2(out, '~', '}'); - buf->state = ASCII; - } - if (s == '~') { - MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2); - out = mb_convert_buf_add2(out, '~', '~'); - } else { - out = mb_convert_buf_add(out, s); - } - } else { - /* GB 2312-80 */ - if (buf->state != GB2312) { - MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 4); - out = mb_convert_buf_add2(out, '~', '{'); - buf->state = GB2312; - } else { - MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2); - } - out = mb_convert_buf_add2(out, (s >> 8) & 0x7F, s & 0x7F); - } - } - - if (end && buf->state != ASCII) { - /* If not in ASCII state, need to emit closing control chars */ - MB_CONVERT_BUF_ENSURE(buf, out, limit, 2); - out = mb_convert_buf_add2(out, '~', '}'); - } - - MB_CONVERT_BUF_STORE(buf, out, limit); -} diff --git a/ext/mbstring/libmbfl/filters/mbfilter_hz.h b/ext/mbstring/libmbfl/filters/mbfilter_hz.h deleted file mode 100644 index 6b1dfb1564c9d..0000000000000 --- a/ext/mbstring/libmbfl/filters/mbfilter_hz.h +++ /dev/null @@ -1,43 +0,0 @@ -/* - * "streamable kanji code filter and converter" - * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. - * - * LICENSE NOTICES - * - * This file is part of "streamable kanji code filter and converter", - * which is distributed under the terms of GNU Lesser General Public - * License (version 2) as published by the Free Software Foundation. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with "streamable kanji code filter and converter"; - * if not, write to the Free Software Foundation, Inc., 59 Temple Place, - * Suite 330, Boston, MA 02111-1307 USA - * - * The author of this file: - * - */ -/* - * The source code included in this files was separated from mbfilter_cn.h - * by moriyoshi koizumi on 4 dec 2002. - * - */ - -#ifndef MBFL_MBFILTER_HZ_H -#define MBFL_MBFILTER_HZ_H - -#include "mbfilter.h" - -extern const mbfl_encoding mbfl_encoding_hz; -extern const struct mbfl_convert_vtbl vtbl_hz_wchar; -extern const struct mbfl_convert_vtbl vtbl_wchar_hz; - -int mbfl_filt_conv_hz_wchar(int c, mbfl_convert_filter *filter); -int mbfl_filt_conv_wchar_hz(int c, mbfl_convert_filter *filter); -int mbfl_filt_conv_any_hz_flush(mbfl_convert_filter *filter); - -#endif /* MBFL_MBFILTER_HZ_H */ diff --git a/ext/mbstring/libmbfl/filters/mbfilter_iso2022_jp_ms.c b/ext/mbstring/libmbfl/filters/mbfilter_iso2022_jp_ms.c deleted file mode 100644 index e3676d30e2904..0000000000000 --- a/ext/mbstring/libmbfl/filters/mbfilter_iso2022_jp_ms.c +++ /dev/null @@ -1,584 +0,0 @@ -/* - * "streamable kanji code filter and converter" - * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. - * - * LICENSE NOTICES - * - * This file is part of "streamable kanji code filter and converter", - * which is distributed under the terms of GNU Lesser General Public - * License (version 2) as published by the Free Software Foundation. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with "streamable kanji code filter and converter"; - * if not, write to the Free Software Foundation, Inc., 59 Temple Place, - * Suite 330, Boston, MA 02111-1307 USA - * - * The author of this file: - * - */ -/* - * The source code included in this files was separated from mbfilter_ja.c - * by moriyoshi koizumi on 4 dec 2002. - * - */ - -#include "mbfilter.h" -#include "mbfilter_iso2022_jp_ms.h" - -#include "unicode_table_cp932_ext.h" -#include "unicode_table_jis.h" -#include "cp932_table.h" - -static size_t mb_iso2022jpms_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state); -static void mb_wchar_to_iso2022jpms(uint32_t *in, size_t len, mb_convert_buf *buf, bool end); - -static int mbfl_filt_conv_2022jpms_wchar_flush(mbfl_convert_filter *filter); - -static const char *mbfl_encoding_2022jpms_aliases[] = {"ISO2022JPMS", NULL}; - -const mbfl_encoding mbfl_encoding_2022jpms = { - mbfl_no_encoding_2022jpms, - "ISO-2022-JP-MS", - "ISO-2022-JP", - mbfl_encoding_2022jpms_aliases, - NULL, - MBFL_ENCTYPE_GL_UNSAFE, - &vtbl_2022jpms_wchar, - &vtbl_wchar_2022jpms, - mb_iso2022jpms_to_wchar, - mb_wchar_to_iso2022jpms, - NULL -}; - -const struct mbfl_convert_vtbl vtbl_2022jpms_wchar = { - mbfl_no_encoding_2022jpms, - mbfl_no_encoding_wchar, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_2022jpms_wchar, - mbfl_filt_conv_2022jpms_wchar_flush, - NULL, -}; - -const struct mbfl_convert_vtbl vtbl_wchar_2022jpms = { - mbfl_no_encoding_wchar, - mbfl_no_encoding_2022jpms, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_wchar_2022jpms, - mbfl_filt_conv_any_2022jpms_flush, - NULL, -}; - -#define CK(statement) do { if ((statement) < 0) return (-1); } while (0) - -#define sjistoidx(c1, c2) \ - (((c1) > 0x9f) \ - ? (((c1) - 0xc1) * 188 + (c2) - (((c2) > 0x7e) ? 0x41 : 0x40)) \ - : (((c1) - 0x81) * 188 + (c2) - (((c2) > 0x7e) ? 0x41 : 0x40))) -#define idxtojis1(c) (((c) / 94) + 0x21) -#define idxtojis2(c) (((c) % 94) + 0x21) - -#define ASCII 0 -#define JISX0201_KANA 0x20 -#define JISX0208_KANJI 0x80 -#define UDC 0xA0 - -int mbfl_filt_conv_2022jpms_wchar(int c, mbfl_convert_filter *filter) -{ - int c1, s, w; - - switch (filter->status & 0xF) { - case 0: - if (c == 0x1B) { - filter->status += 2; - } else if (filter->status == JISX0201_KANA && c > 0x20 && c < 0x60) { - CK((*filter->output_function)(0xFF40 + c, filter->data)); - } else if ((filter->status == JISX0208_KANJI || filter->status == UDC) && c > 0x20 && c < 0x80) { - filter->cache = c; - filter->status += 1; - } else if (c >= 0 && c < 0x80) { /* ASCII */ - CK((*filter->output_function)(c, filter->data)); - } else if (c > 0xA0 && c < 0xE0) { /* Kana */ - CK((*filter->output_function)(0xFEC0 + c, filter->data)); - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - /* Kanji, second byte */ - case 1: - w = 0; - filter->status &= ~0xF; - c1 = filter->cache; - if (c > 0x20 && c < 0x7F) { - s = ((c1 - 0x21) * 94) + c - 0x21; - if (filter->status == JISX0208_KANJI) { - if (s <= 137) { - if (s == 31) { - w = 0xFF3C; /* FULLWIDTH REVERSE SOLIDUS */ - } else if (s == 32) { - w = 0xFF5E; /* FULLWIDTH TILDE */ - } else if (s == 33) { - w = 0x2225; /* PARALLEL TO */ - } else if (s == 60) { - w = 0xFF0D; /* FULLWIDTH HYPHEN-MINUS */ - } else if (s == 80) { - w = 0xFFE0; /* FULLWIDTH CENT SIGN */ - } else if (s == 81) { - w = 0xFFE1; /* FULLWIDTH POUND SIGN */ - } else if (s == 137) { - w = 0xFFE2; /* FULLWIDTH NOT SIGN */ - } - } - - if (w == 0) { - if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) { /* vendor ext1 (13ku) */ - w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min]; - } else if (s >= 0 && s < jisx0208_ucs_table_size) { - w = jisx0208_ucs_table[s]; - } else if (s >= cp932ext2_ucs_table_min && s < cp932ext2_ucs_table_max) { /* vendor ext2 (89ku - 92ku) */ - w = cp932ext2_ucs_table[s - cp932ext2_ucs_table_min]; - } - } - - if (w <= 0) { - w = MBFL_BAD_INPUT; - } - } else { - if (c1 > 0x20 && c1 < 0x35) { - w = 0xE000 + ((c1 - 0x21) * 94) + c - 0x21; - } else { - w = MBFL_BAD_INPUT; - } - } - CK((*filter->output_function)(w, filter->data)); - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - /* ESC */ - case 2: - if (c == '$') { - filter->status++; - } else if (c == '(') { - filter->status += 3; - } else { - filter->status &= ~0xF; - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - /* ESC $ */ - case 3: - if (c == '@' || c == 'B') { - filter->status = JISX0208_KANJI; - } else if (c == '(') { - filter->status++; - } else { - filter->status &= ~0xF; - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - /* ESC $ ( */ - case 4: - if (c == '@' || c == 'B') { - filter->status = JISX0208_KANJI; - } else if (c == '?') { - filter->status = UDC; - } else { - filter->status &= ~0xF; - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - /* ESC ( */ - case 5: - if (c == 'B' || c == 'J') { - filter->status = 0; - } else if (c == 'I') { - filter->status = JISX0201_KANA; - } else { - filter->status &= ~0xF; - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - } - - return 0; -} - - -static int mbfl_filt_conv_2022jpms_wchar_flush(mbfl_convert_filter *filter) -{ - if (filter->status & 0xF) { - (*filter->output_function)(MBFL_BAD_INPUT, filter->data); - } - filter->status = 0; - - if (filter->flush_function) { - (*filter->flush_function)(filter->data); - } - - return 0; -} - -static int cp932ext3_cp932ext2_jis(int c) -{ - int idx; - - idx = sjistoidx(0xfa, 0x40) + c; - if (idx >= sjistoidx(0xfa, 0x5c)) - idx -= sjistoidx(0xfa, 0x5c) - sjistoidx(0xed, 0x40); - else if (idx >= sjistoidx(0xfa, 0x55)) - idx -= sjistoidx(0xfa, 0x55) - sjistoidx(0xee, 0xfa); - else if (idx >= sjistoidx(0xfa, 0x40)) - idx -= sjistoidx(0xfa, 0x40) - sjistoidx(0xee, 0xef); - return idxtojis1(idx) << 8 | idxtojis2(idx); -} - -int mbfl_filt_conv_wchar_2022jpms(int c, mbfl_convert_filter *filter) -{ - int c1, c2, s1 = 0, s2 = 0; - - if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) { - s1 = ucs_a1_jis_table[c - ucs_a1_jis_table_min]; - } else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) { - s1 = ucs_a2_jis_table[c - ucs_a2_jis_table_min]; - } else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) { - s1 = ucs_i_jis_table[c - ucs_i_jis_table_min]; - } else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) { - s1 = ucs_r_jis_table[c - ucs_r_jis_table_min]; - } else if (c >= 0xE000 && c < (0xE000 + 20*94)) { - /* Private User Area (95ku - 114ku) */ - s1 = c - 0xE000; - c1 = (s1 / 94) + 0x7f; - c2 = (s1 % 94) + 0x21; - s1 = (c1 << 8) | c2; - } - - if (s1 <= 0) { - if (c == 0xA5) { /* YEN SIGN */ - s1 = 0x216F; /* FULLWIDTH YEN SIGN */ - } else if (c == 0xFF3C) { /* FULLWIDTH REVERSE SOLIDUS */ - s1 = 0x2140; - } else if (c == 0x2225) { /* PARALLEL TO */ - s1 = 0x2142; - } else if (c == 0xFF0D) { /* FULLWIDTH HYPHEN-MINUS */ - s1 = 0x215d; - } else if (c == 0xFFE0) { /* FULLWIDTH CENT SIGN */ - s1 = 0x2171; - } else if (c == 0xFFE1) { /* FULLWIDTH POUND SIGN */ - s1 = 0x2172; - } else if (c == 0xFFE2) { /* FULLWIDTH NOT SIGN */ - s1 = 0x224C; - } - } - - if ((s1 <= 0) || (s1 >= 0xa1a1 && s2 == 0)) { /* not found or X 0212 */ - s1 = -1; - for (c1 = 0; c1 < cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; c1++) { - if (c == cp932ext1_ucs_table[c1]) { - s1 = (((c1 / 94) + 0x2D) << 8) + (c1 % 94) + 0x21; - break; - } - } - - if (s1 <= 0) { - for (c1 = 0; c1 < cp932ext3_ucs_table_max - cp932ext3_ucs_table_min; c1++) { - if (c == cp932ext3_ucs_table[c1]) { - s1 = cp932ext3_cp932ext2_jis(c1); - break; - } - } - } - - if (c == 0) { - s1 = 0; - } - } - - if (s1 >= 0) { - if (s1 < 0x80) { /* latin */ - if (filter->status & 0xFF00) { - CK((*filter->output_function)(0x1B, filter->data)); /* ESC */ - CK((*filter->output_function)('(', filter->data)); - CK((*filter->output_function)('B', filter->data)); - } - CK((*filter->output_function)(s1, filter->data)); - filter->status = 0; - } else if (s1 > 0xA0 && s1 < 0xE0) { /* kana */ - if ((filter->status & 0xFF00) != 0x100) { - CK((*filter->output_function)(0x1B, filter->data)); /* ESC */ - CK((*filter->output_function)('(', filter->data)); - CK((*filter->output_function)('I', filter->data)); - } - filter->status = 0x100; - CK((*filter->output_function)(s1 & 0x7F, filter->data)); - } else if (s1 < 0x7E7F) { /* X 0208 */ - if ((filter->status & 0xFF00) != 0x200) { - CK((*filter->output_function)(0x1B, filter->data)); /* ESC */ - CK((*filter->output_function)('$', filter->data)); - CK((*filter->output_function)('B', filter->data)); - } - filter->status = 0x200; - CK((*filter->output_function)((s1 >> 8) & 0xFF, filter->data)); - CK((*filter->output_function)(s1 & 0x7F, filter->data)); - } else if (s1 < 0x927F) { /* UDC */ - if ((filter->status & 0xFF00) != 0x800) { - CK((*filter->output_function)(0x1B, filter->data)); /* ESC */ - CK((*filter->output_function)('$', filter->data)); - CK((*filter->output_function)('(', filter->data)); - CK((*filter->output_function)('?', filter->data)); - } - filter->status = 0x800; - CK((*filter->output_function)(((s1 >> 8) - 0x5E) & 0x7F, filter->data)); - CK((*filter->output_function)(s1 & 0x7F, filter->data)); - } - } else { - CK(mbfl_filt_conv_illegal_output(c, filter)); - } - - return 0; -} - -int mbfl_filt_conv_any_2022jpms_flush(mbfl_convert_filter *filter) -{ - /* Go back to ASCII (so strings can be safely concatenated) */ - if ((filter->status & 0xFF00) != 0) { - CK((*filter->output_function)(0x1B, filter->data)); /* ESC */ - CK((*filter->output_function)('(', filter->data)); - CK((*filter->output_function)('B', filter->data)); - } - filter->status = 0; - - if (filter->flush_function) { - (*filter->flush_function)(filter->data); - } - - return 0; -} - -static size_t mb_iso2022jpms_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) -{ - unsigned char *p = *in, *e = p + *in_len; - uint32_t *out = buf, *limit = buf + bufsize; - - while (p < e && out < limit) { - unsigned char c = *p++; - - if (c == 0x1B) { - if ((e - p) < 2) { - *out++ = MBFL_BAD_INPUT; - p = e; - break; - } - unsigned char c2 = *p++; - unsigned char c3 = *p++; - - if (c2 == '$') { - if (c3 == '@' || c3 == 'B') { - *state = JISX0208_KANJI; - } else if (c3 == '(' && p < e) { - unsigned char c4 = *p++; - - if (c4 == '@' || c4 == 'B') { - *state = JISX0208_KANJI; - } else if (c4 == '?') { - *state = UDC; - } else { - *out++ = MBFL_BAD_INPUT; - } - } else { - *out++ = MBFL_BAD_INPUT; - } - } else if (c2 == '(') { - if (c3 == 'B' || c3 == 'J') { - *state = ASCII; - } else if (c3 == 'I') { - *state = JISX0201_KANA; - } else { - *out++ = MBFL_BAD_INPUT; - } - } else { - p--; - *out++ = MBFL_BAD_INPUT; - } - } else if (*state == JISX0201_KANA && c >= 0x21 && c <= 0x5F) { - *out++ = 0xFF40 + c; - } else if ((*state == JISX0208_KANJI || *state == UDC) && c >= 0x21 && c <= 0x7F) { - if (p == e) { - *out++ = MBFL_BAD_INPUT; - break; - } - unsigned char c2 = *p++; - unsigned int w = 0; - - if (c2 >= 0x21 && c2 <= 0x7E) { - unsigned int s = ((c - 0x21) * 94) + c2 - 0x21; - if (*state == JISX0208_KANJI) { - if (s <= 137) { - if (s == 31) { - w = 0xFF3C; /* FULLWIDTH REVERSE SOLIDUS */ - } else if (s == 32) { - w = 0xFF5E; /* FULLWIDTH TILDE */ - } else if (s == 33) { - w = 0x2225; /* PARALLEL TO */ - } else if (s == 60) { - w = 0xFF0D; /* FULLWIDTH HYPHEN-MINUS */ - } else if (s == 80) { - w = 0xFFE0; /* FULLWIDTH CENT SIGN */ - } else if (s == 81) { - w = 0xFFE1; /* FULLWIDTH POUND SIGN */ - } else if (s == 137) { - w = 0xFFE2; /* FULLWIDTH NOT SIGN */ - } - } - - if (!w) { - if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) { - w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min]; - } else if (s < jisx0208_ucs_table_size) { - w = jisx0208_ucs_table[s]; - } else if (s >= cp932ext2_ucs_table_min && s < cp932ext2_ucs_table_max) { - w = cp932ext2_ucs_table[s - cp932ext2_ucs_table_min]; - } - } - } else if (c >= 0x21 && c <= 0x34) { - w = 0xE000 + ((c - 0x21) * 94) + c2 - 0x21; - } - - *out++ = w ? w : MBFL_BAD_INPUT; - } else { - *out++ = MBFL_BAD_INPUT; - } - } else if (c <= 0x7F) { - *out++ = c; - } else if (c >= 0xA1 && c <= 0xDF) { - *out++ = 0xFEC0 + c; - } else { - *out++ = MBFL_BAD_INPUT; - } - } - - *in_len = e - p; - *in = p; - return out - buf; -} - -static void mb_wchar_to_iso2022jpms(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) -{ - unsigned char *out, *limit; - MB_CONVERT_BUF_LOAD(buf, out, limit); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len); - - while (len--) { - uint32_t w = *in++; - unsigned int s = 0; - - if (w >= ucs_a1_jis_table_min && w < ucs_a1_jis_table_max) { - s = ucs_a1_jis_table[w - ucs_a1_jis_table_min]; - } else if (w >= ucs_a2_jis_table_min && w < ucs_a2_jis_table_max) { - s = ucs_a2_jis_table[w - ucs_a2_jis_table_min]; - } else if (w >= ucs_i_jis_table_min && w < ucs_i_jis_table_max) { - s = ucs_i_jis_table[w - ucs_i_jis_table_min]; - } else if (w >= ucs_r_jis_table_min && w < ucs_r_jis_table_max) { - s = ucs_r_jis_table[w - ucs_r_jis_table_min]; - } else if (w >= 0xE000 && w < (0xE000 + 20*94)) { - /* Private User Area (95ku - 114ku) */ - s = ((((w - 0xE000) / 94) + 0x7F) << 8) | (((w - 0xE000) % 94) + 0x21); - } - - if (!s) { - if (w == 0xA5) { /* YEN SIGN */ - s = 0x216F; /* FULLWIDTH YEN SIGN */ - } else if (w == 0xFF3C) { /* FULLWIDTH REVERSE SOLIDUS */ - s = 0x2140; - } else if (w == 0x2225) { /* PARALLEL TO */ - s = 0x2142; - } else if (w == 0xFF0D) { /* FULLWIDTH HYPHEN-MINUS */ - s = 0x215D; - } else if (w == 0xFFE0) { /* FULLWIDTH CENT SIGN */ - s = 0x2171; - } else if (w == 0xFFE1) { /* FULLWIDTH POUND SIGN */ - s = 0x2172; - } else if (w == 0xFFE2) { /* FULLWIDTH NOT SIGN */ - s = 0x224C; - } - } - - if (s >= 0xA1A1) /* JISX 0212 */ - s = 0; - - if (!s && w) { - for (int i = 0; i < cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; i++) { - if (w == cp932ext1_ucs_table[i]) { - s = (((i / 94) + 0x2D) << 8) + (i % 94) + 0x21; - break; - } - } - - if (!s) { - for (int i = 0; i < cp932ext3_ucs_table_max - cp932ext3_ucs_table_min; i++) { - if (w == cp932ext3_ucs_table[i]) { - s = cp932ext3_cp932ext2_jis(i); - break; - } - } - } - } - - if (!s && w) { - MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_iso2022jpms); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len); - } else if (s <= 0x7F) { - if (buf->state != ASCII) { - MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 4); - out = mb_convert_buf_add3(out, 0x1B, '(', 'B'); - buf->state = ASCII; - } - out = mb_convert_buf_add(out, s); - } else if (s >= 0xA1 && s <= 0xDF) { - if (buf->state != JISX0201_KANA) { - MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 4); - out = mb_convert_buf_add3(out, 0x1B, '(', 'I'); - buf->state = JISX0201_KANA; - } - out = mb_convert_buf_add(out, s & 0x7F); - } else if (s <= 0x7E7E) { - if (buf->state != JISX0208_KANJI) { - MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 5); - out = mb_convert_buf_add3(out, 0x1B, '$', 'B'); - buf->state = JISX0208_KANJI; - } else { - MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2); - } - out = mb_convert_buf_add2(out, (s >> 8) & 0xFF, s & 0x7F); - } else if (s < 0x927F) { - if (buf->state != UDC) { - MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 6); - out = mb_convert_buf_add4(out, 0x1B, '$', '(', '?'); - buf->state = UDC; - } else { - MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2); - } - out = mb_convert_buf_add2(out, ((s >> 8) - 0x5E) & 0x7F, s & 0x7F); - } else { - MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_iso2022jpms); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len); - } - } - - if (end && buf->state != ASCII) { - MB_CONVERT_BUF_ENSURE(buf, out, limit, 3); - out = mb_convert_buf_add3(out, 0x1B, '(', 'B'); - } - - MB_CONVERT_BUF_STORE(buf, out, limit); -} diff --git a/ext/mbstring/libmbfl/filters/mbfilter_iso2022_jp_ms.h b/ext/mbstring/libmbfl/filters/mbfilter_iso2022_jp_ms.h deleted file mode 100644 index fdc85183d7ea2..0000000000000 --- a/ext/mbstring/libmbfl/filters/mbfilter_iso2022_jp_ms.h +++ /dev/null @@ -1,43 +0,0 @@ -/* - * "streamable kanji code filter and converter" - * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. - * - * LICENSE NOTICES - * - * This file is part of "streamable kanji code filter and converter", - * which is distributed under the terms of GNU Lesser General Public - * License (version 2) as published by the Free Software Foundation. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with "streamable kanji code filter and converter"; - * if not, write to the Free Software Foundation, Inc., 59 Temple Place, - * Suite 330, Boston, MA 02111-1307 USA - * - * The author of this file: - * - */ -/* - * The source code included in this files was separated from mbfilter_ja.c - * by moriyoshi koizumi on 4 dec 2002. - * - */ - -#ifndef MBFL_MBFILTER_ISO2022_JP_MS_H -#define MBFL_MBFILTER_ISO2022_JP_MS_H - -#include "mbfilter.h" - -extern const mbfl_encoding mbfl_encoding_2022jpms; -extern const struct mbfl_convert_vtbl vtbl_2022jpms_wchar; -extern const struct mbfl_convert_vtbl vtbl_wchar_2022jpms; - -int mbfl_filt_conv_2022jpms_wchar(int c, mbfl_convert_filter *filter); -int mbfl_filt_conv_wchar_2022jpms(int c, mbfl_convert_filter *filter); -int mbfl_filt_conv_any_2022jpms_flush(mbfl_convert_filter *filter); - -#endif /* MBFL_MBFILTER_ISO2022_JP_MS_H */ diff --git a/ext/mbstring/libmbfl/filters/mbfilter_iso2022_kr.c b/ext/mbstring/libmbfl/filters/mbfilter_iso2022_kr.c deleted file mode 100644 index dcf8fc51b6637..0000000000000 --- a/ext/mbstring/libmbfl/filters/mbfilter_iso2022_kr.c +++ /dev/null @@ -1,431 +0,0 @@ -/* - * "streamable kanji code filter and converter" - * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. - * - * LICENSE NOTICES - * - * This file is part of "streamable kanji code filter and converter", - * which is distributed under the terms of GNU Lesser General Public - * License (version 2) as published by the Free Software Foundation. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with "streamable kanji code filter and converter"; - * if not, write to the Free Software Foundation, Inc., 59 Temple Place, - * Suite 330, Boston, MA 02111-1307 USA - * - * The author of this file: - * - */ -/* - * The source code included in this files was separated from mbfilter_kr.c - * by moriyoshi koizumi on 4 dec 2002. - * - */ - -/* ISO-2022-KR is defined in RFC 1557 - * - * The RFC says that ESC $ ) C must appear once in a ISO-2022-KR string, - * at the beginning of a line, before any instances of the Shift In or - * Shift Out bytes which are used to switch between ASCII/KSC 5601 modes - * - * We don't enforce that for ISO-2022-KR input */ - -#include "mbfilter.h" -#include "mbfilter_iso2022_kr.h" -#include "unicode_table_uhc.h" - -static int mbfl_filt_conv_2022kr_wchar_flush(mbfl_convert_filter *filter); -static int mbfl_filt_conv_any_2022kr_flush(mbfl_convert_filter *filter); -static size_t mb_iso2022kr_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state); -static void mb_wchar_to_iso2022kr(uint32_t *in, size_t len, mb_convert_buf *buf, bool end); - -const mbfl_encoding mbfl_encoding_2022kr = { - mbfl_no_encoding_2022kr, - "ISO-2022-KR", - "ISO-2022-KR", - NULL, - NULL, - MBFL_ENCTYPE_GL_UNSAFE, - &vtbl_2022kr_wchar, - &vtbl_wchar_2022kr, - mb_iso2022kr_to_wchar, - mb_wchar_to_iso2022kr, - NULL -}; - -const struct mbfl_convert_vtbl vtbl_wchar_2022kr = { - mbfl_no_encoding_wchar, - mbfl_no_encoding_2022kr, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_wchar_2022kr, - mbfl_filt_conv_any_2022kr_flush, - NULL, -}; - -const struct mbfl_convert_vtbl vtbl_2022kr_wchar = { - mbfl_no_encoding_2022kr, - mbfl_no_encoding_wchar, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_2022kr_wchar, - mbfl_filt_conv_2022kr_wchar_flush, - NULL, -}; - -#define CK(statement) do { if ((statement) < 0) return (-1); } while (0) - -int mbfl_filt_conv_2022kr_wchar(int c, mbfl_convert_filter *filter) -{ - int w = 0; - - switch (filter->status & 0xf) { - /* case 0x00: ASCII */ - /* case 0x10: KSC5601 */ - case 0: - if (c == 0x1b) { /* ESC */ - filter->status += 2; - } else if (c == 0x0f) { /* shift in (ASCII) */ - filter->status = 0; - } else if (c == 0x0e) { /* shift out (KSC5601) */ - filter->status = 0x10; - } else if ((filter->status & 0x10) && c > 0x20 && c < 0x7f) { - /* KSC5601 lead byte */ - filter->cache = c; - filter->status = 0x11; - } else if ((filter->status & 0x10) == 0 && c >= 0 && c < 0x80) { - /* latin, CTLs */ - CK((*filter->output_function)(c, filter->data)); - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - case 1: /* dbcs second byte */ - filter->status = 0x10; - int c1 = filter->cache; - int flag = 0; - - if (c1 > 0x20 && c1 < 0x47) { - flag = 1; - } else if (c1 >= 0x47 && c1 <= 0x7e && c1 != 0x49) { - flag = 2; - } - - if (flag > 0 && c > 0x20 && c < 0x7f) { - if (flag == 1) { - if (c1 != 0x22 || c <= 0x65) { - w = (c1 - 1)*190 + (c - 0x41) + 0x80; - ZEND_ASSERT(w < uhc1_ucs_table_size); - w = uhc1_ucs_table[w]; - } - } else { - w = (c1 - 0x47)*94 + c - 0x21; - if (w < uhc3_ucs_table_size) { - w = uhc3_ucs_table[w]; - } else { - w = MBFL_BAD_INPUT; - } - } - - if (w <= 0) { - w = MBFL_BAD_INPUT; - } - CK((*filter->output_function)(w, filter->data)); - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - case 2: /* ESC */ - if (c == '$') { - filter->status++; - } else { - filter->status &= ~0xF; - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - case 3: /* ESC $ */ - if (c == ')') { - filter->status++; - } else { - filter->status &= ~0xF; - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - case 4: /* ESC $ ) */ - filter->status = 0; - if (c != 'C') { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - EMPTY_SWITCH_DEFAULT_CASE(); - } - - return 0; -} - -static int mbfl_filt_conv_2022kr_wchar_flush(mbfl_convert_filter *filter) -{ - if (filter->status & 0xF) { - /* 2-byte character was truncated */ - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - filter->status = 0; - - if (filter->flush_function) { - (*filter->flush_function)(filter->data); - } - - return 0; -} - -int mbfl_filt_conv_wchar_2022kr(int c, mbfl_convert_filter *filter) -{ - int c1, c2, s = 0; - - if ((filter->status & 0x100) == 0) { - CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ - CK((*filter->output_function)('$', filter->data)); - CK((*filter->output_function)(')', filter->data)); - CK((*filter->output_function)('C', filter->data)); - filter->status |= 0x100; - } - - if (c >= ucs_a1_uhc_table_min && c < ucs_a1_uhc_table_max) { - s = ucs_a1_uhc_table[c - ucs_a1_uhc_table_min]; - } else if (c >= ucs_a2_uhc_table_min && c < ucs_a2_uhc_table_max) { - s = ucs_a2_uhc_table[c - ucs_a2_uhc_table_min]; - } else if (c >= ucs_a3_uhc_table_min && c < ucs_a3_uhc_table_max) { - s = ucs_a3_uhc_table[c - ucs_a3_uhc_table_min]; - } else if (c >= ucs_i_uhc_table_min && c < ucs_i_uhc_table_max) { - s = ucs_i_uhc_table[c - ucs_i_uhc_table_min]; - } else if (c >= ucs_s_uhc_table_min && c < ucs_s_uhc_table_max) { - s = ucs_s_uhc_table[c - ucs_s_uhc_table_min]; - } else if (c >= ucs_r1_uhc_table_min && c < ucs_r1_uhc_table_max) { - s = ucs_r1_uhc_table[c - ucs_r1_uhc_table_min]; - } else if (c >= ucs_r2_uhc_table_min && c < ucs_r2_uhc_table_max) { - s = ucs_r2_uhc_table[c - ucs_r2_uhc_table_min]; - } - - c1 = (s >> 8) & 0xff; - c2 = s & 0xff; - /* exclude UHC extension area */ - if (c1 < 0xa1 || c2 < 0xa1) { - s = c; - } else if (s & 0x8000) { - s -= 0x8080; - } - - if (s <= 0) { - if (c == 0) { - s = 0; - } else { - s = -1; - } - } else if ((s >= 0x80 && s < 0x2121) || (s > 0x8080)) { - s = -1; - } - - if (s >= 0) { - if (s < 0x80 && s >= 0) { /* ASCII */ - if (filter->status & 0x10) { - CK((*filter->output_function)(0x0f, filter->data)); /* shift in */ - filter->status &= ~0x10; - } - CK((*filter->output_function)(s, filter->data)); - } else { - if ((filter->status & 0x10) == 0) { - CK((*filter->output_function)(0x0e, filter->data)); /* shift out */ - filter->status |= 0x10; - } - CK((*filter->output_function)((s >> 8) & 0xff, filter->data)); - CK((*filter->output_function)(s & 0xff, filter->data)); - } - } else { - CK(mbfl_filt_conv_illegal_output(c, filter)); - } - - return 0; -} - -static int mbfl_filt_conv_any_2022kr_flush(mbfl_convert_filter *filter) -{ - if (filter->status & 0xF) { - /* Escape sequence or 2-byte character was truncated */ - (*filter->output_function)(MBFL_BAD_INPUT, filter->data); - } - /* back to ascii */ - if (filter->status & 0x10) { - CK((*filter->output_function)(0x0f, filter->data)); /* shift in */ - } - - filter->status = filter->cache = 0; - - if (filter->flush_function) { - return (*filter->flush_function)(filter->data); - } - - return 0; -} - -#define ASCII 0 -#define KSC5601 1 - -static size_t mb_iso2022kr_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) -{ - unsigned char *p = *in, *e = p + *in_len; - uint32_t *out = buf, *limit = buf + bufsize; - - while (p < e && out < limit) { - unsigned char c = *p++; - - if (c == 0x1B) { - if ((e - p) < 3) { - *out++ = MBFL_BAD_INPUT; - if (p < e && *p++ == '$') { - if (p < e) { - p++; - } - } - continue; - } - unsigned char c2 = *p++; - unsigned char c3 = *p++; - unsigned char c4 = *p++; - if (c2 == '$' && c3 == ')' && c4 == 'C') { - *state = ASCII; - } else { - if (c3 != ')') { - p--; - if (c2 != '$') - p--; - } - *out++ = MBFL_BAD_INPUT; - } - } else if (c == 0xF) { - *state = ASCII; - } else if (c == 0xE) { - *state = KSC5601; - } else if (c >= 0x21 && c <= 0x7E && *state == KSC5601) { - if (p == e) { - *out++ = MBFL_BAD_INPUT; - break; - } - unsigned char c2 = *p++; - unsigned int w = 0; - - if (c2 < 0x21 || c2 > 0x7E) { - *out++ = MBFL_BAD_INPUT; - continue; - } - - if (c < 0x47) { - if (c != 0x22 || c2 <= 0x65) { - w = (c - 1)*190 + c2 - 0x41 + 0x80; - ZEND_ASSERT(w < uhc1_ucs_table_size); - w = uhc1_ucs_table[w]; - } - } else if (c != 0x49 && c <= 0x7D) { - w = (c - 0x47)*94 + c2 - 0x21; - ZEND_ASSERT(w < uhc3_ucs_table_size); - w = uhc3_ucs_table[w]; - } - - if (!w) - w = MBFL_BAD_INPUT; - *out++ = w; - } else if (c < 0x80 && *state == ASCII) { - *out++ = c; - } else { - *out++ = MBFL_BAD_INPUT; - } - } - - *in_len = e - p; - *in = p; - return out - buf; -} - -#define EMITTED_ESC_SEQUENCE 0x10 - -static void mb_wchar_to_iso2022kr(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) -{ - unsigned char *out, *limit; - MB_CONVERT_BUF_LOAD(buf, out, limit); - - /* This escape sequence needs to come *somewhere* at the beginning of a line before - * we can use the Shift In/Shift Out bytes, but it only needs to come once in a string - * Rather than tracking newlines, we can just emit the sequence once at the beginning - * of the output string... since that will always be "the beginning of a line" */ - if (len && !(buf->state & EMITTED_ESC_SEQUENCE)) { - MB_CONVERT_BUF_ENSURE(buf, out, limit, 4 + len); - out = mb_convert_buf_add4(out, 0x1B, '$', ')', 'C'); - buf->state |= EMITTED_ESC_SEQUENCE; - } else { - MB_CONVERT_BUF_ENSURE(buf, out, limit, len); - } - - while (len--) { - uint32_t w = *in++; - unsigned int s = 0; - - if (w >= ucs_a1_uhc_table_min && w < ucs_a1_uhc_table_max) { - s = ucs_a1_uhc_table[w - ucs_a1_uhc_table_min]; - } else if (w >= ucs_a2_uhc_table_min && w < ucs_a2_uhc_table_max) { - s = ucs_a2_uhc_table[w - ucs_a2_uhc_table_min]; - } else if (w >= ucs_a3_uhc_table_min && w < ucs_a3_uhc_table_max) { - s = ucs_a3_uhc_table[w - ucs_a3_uhc_table_min]; - } else if (w >= ucs_i_uhc_table_min && w < ucs_i_uhc_table_max) { - s = ucs_i_uhc_table[w - ucs_i_uhc_table_min]; - } else if (w >= ucs_s_uhc_table_min && w < ucs_s_uhc_table_max) { - s = ucs_s_uhc_table[w - ucs_s_uhc_table_min]; - } else if (w >= ucs_r1_uhc_table_min && w < ucs_r1_uhc_table_max) { - s = ucs_r1_uhc_table[w - ucs_r1_uhc_table_min]; - } else if (w >= ucs_r2_uhc_table_min && w < ucs_r2_uhc_table_max) { - s = ucs_r2_uhc_table[w - ucs_r2_uhc_table_min]; - } - - if (((s >> 8) & 0xFF) < 0xA1 || (s & 0xFF) < 0xA1) { - s = w; - } else { - s -= 0x8080; - } - - if ((s >= 0x80 && s < 0x2121) || (s > 0x8080)) { - MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_iso2022kr); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len); - } else if (s < 0x80) { - if ((buf->state & 1) != ASCII) { - MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2); - out = mb_convert_buf_add(out, 0xF); - buf->state &= ~KSC5601; - } - out = mb_convert_buf_add(out, s); - } else { - if ((buf->state & 1) != KSC5601) { - MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 3); - out = mb_convert_buf_add(out, 0xE); - buf->state |= KSC5601; - } else { - MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2); - } - out = mb_convert_buf_add2(out, (s >> 8) & 0xFF, s & 0xFF); - } - } - - if (end && (buf->state & 1) != ASCII) { - MB_CONVERT_BUF_ENSURE(buf, out, limit, 1); - out = mb_convert_buf_add(out, 0xF); - } - - MB_CONVERT_BUF_STORE(buf, out, limit); -} diff --git a/ext/mbstring/libmbfl/filters/mbfilter_iso2022_kr.h b/ext/mbstring/libmbfl/filters/mbfilter_iso2022_kr.h deleted file mode 100644 index dc6687a61478a..0000000000000 --- a/ext/mbstring/libmbfl/filters/mbfilter_iso2022_kr.h +++ /dev/null @@ -1,42 +0,0 @@ -/* - * "streamable kanji code filter and converter" - * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. - * - * LICENSE NOTICES - * - * This file is part of "streamable kanji code filter and converter", - * which is distributed under the terms of GNU Lesser General Public - * License (version 2) as published by the Free Software Foundation. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with "streamable kanji code filter and converter"; - * if not, write to the Free Software Foundation, Inc., 59 Temple Place, - * Suite 330, Boston, MA 02111-1307 USA - * - * The author of this file: - * - */ -/* - * The source code included in this files was separated from mbfilter_kr.h - * by moriyoshi koizumi on 4 dec 2002. - * - */ - -#ifndef MBFL_MBFILTER_ISO2022_KR_H -#define MBFL_MBFILTER_ISO2022_KR_H - -#include "mbfilter.h" - -extern const mbfl_encoding mbfl_encoding_2022kr; -extern const struct mbfl_convert_vtbl vtbl_wchar_2022kr; -extern const struct mbfl_convert_vtbl vtbl_2022kr_wchar; - -int mbfl_filt_conv_2022kr_wchar(int c, mbfl_convert_filter *filter); -int mbfl_filt_conv_wchar_2022kr(int c, mbfl_convert_filter *filter); - -#endif /* MBFL_MBFILTER_ISO2022_KR_H */ diff --git a/ext/mbstring/libmbfl/filters/mbfilter_iso2022jp_mobile.c b/ext/mbstring/libmbfl/filters/mbfilter_iso2022jp_mobile.c deleted file mode 100644 index 79b7a4714af23..0000000000000 --- a/ext/mbstring/libmbfl/filters/mbfilter_iso2022jp_mobile.c +++ /dev/null @@ -1,757 +0,0 @@ -/* - * "streamable kanji code filter and converter" - * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. - * - * LICENSE NOTICES - * - * This file is part of "streamable kanji code filter and converter", - * which is distributed under the terms of GNU Lesser General Public - * License (version 2) as published by the Free Software Foundation. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with "streamable kanji code filter and converter"; - * if not, write to the Free Software Foundation, Inc., 59 Temple Place, - * Suite 330, Boston, MA 02111-1307 USA - * - * The author of this file: - * - */ -/* - * The source code included in this file was separated from mbfilter_iso2022_jp_ms.c - * by Rui Hirokawa on 25 July 2011. - * - */ - -#include "mbfilter.h" -#include "mbfilter_iso2022jp_mobile.h" - -#include "unicode_table_cp932_ext.h" -#include "unicode_table_jis.h" -#include "cp932_table.h" -#include "emoji2uni.h" - -static size_t mb_iso2022jp_kddi_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state); -static void mb_wchar_to_iso2022jp_kddi(uint32_t *in, size_t len, mb_convert_buf *buf, bool end); - -static int mbfl_filt_conv_2022jp_mobile_wchar(int c, mbfl_convert_filter *filter); -static int mbfl_filt_conv_wchar_2022jp_mobile(int c, mbfl_convert_filter *filter); -static int mbfl_filt_conv_2022jp_mobile_wchar_flush(mbfl_convert_filter *filter); -static int mbfl_filt_conv_wchar_2022jp_mobile_flush(mbfl_convert_filter *filter); - -extern int mbfl_bisec_srch2(int w, const unsigned short tbl[], int n); - -/* Regional Indicator Unicode codepoints are from 0x1F1E6-0x1F1FF - * These correspond to the letters A-Z - * To display the flag emoji for a country, two unicode codepoints are combined, - * which correspond to the two-letter code for that country - * This macro converts uppercase ASCII values to Regional Indicator codepoints */ -#define NFLAGS(c) (0x1F1A5+((unsigned int)(c))) - -static const char nflags_s[10][2] = { - "CN","DE","ES","FR","GB","IT","JP","KR","RU","US" -}; -static const int nflags_code_kddi[10] = { - 0x2549, 0x2546, 0x24C0, 0x2545, 0x2548, 0x2547, 0x2750, 0x254A, 0x24C1, 0x27F7 -}; - -static const char *mbfl_encoding_2022jp_kddi_aliases[] = {"ISO-2022-JP-KDDI", NULL}; - -const mbfl_encoding mbfl_encoding_2022jp_kddi = { - mbfl_no_encoding_2022jp_kddi, - "ISO-2022-JP-MOBILE#KDDI", - "ISO-2022-JP", - mbfl_encoding_2022jp_kddi_aliases, - NULL, - MBFL_ENCTYPE_GL_UNSAFE, - &vtbl_2022jp_kddi_wchar, - &vtbl_wchar_2022jp_kddi, - mb_iso2022jp_kddi_to_wchar, - mb_wchar_to_iso2022jp_kddi, - NULL -}; - -const struct mbfl_convert_vtbl vtbl_2022jp_kddi_wchar = { - mbfl_no_encoding_2022jp_kddi, - mbfl_no_encoding_wchar, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_2022jp_mobile_wchar, - mbfl_filt_conv_2022jp_mobile_wchar_flush, - NULL, -}; - -const struct mbfl_convert_vtbl vtbl_wchar_2022jp_kddi = { - mbfl_no_encoding_wchar, - mbfl_no_encoding_2022jp_kddi, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_wchar_2022jp_mobile, - mbfl_filt_conv_wchar_2022jp_mobile_flush, - NULL, -}; - -#define CK(statement) do { if ((statement) < 0) return (-1); } while (0) - -#define SJIS_ENCODE(c1,c2,s1,s2) \ - do { \ - s1 = ((c1 - 1) >> 1) + ((c1) < 0x5F ? 0x71 : 0xB1); \ - s2 = c2; \ - if ((c1) & 1) { \ - if ((c2) < 0x60) { \ - s2--; \ - } \ - s2 += 0x20; \ - } else { \ - s2 += 0x7e; \ - } \ - } while (0) - -#define SJIS_DECODE(c1,c2,s1,s2) \ - do { \ - if (c1 < 0xa0) { \ - s1 = ((c1 - 0x81) << 1) + 0x21; \ - } else { \ - s1 = ((c1 - 0xc1) << 1) + 0x21; \ - } \ - s2 = c2; \ - if (c2 < 0x9f) { \ - if (c2 < 0x7f) { \ - s2++; \ - } \ - s2 -= 0x20; \ - } else { \ - s1++; \ - s2 -= 0x7e; \ - } \ - } while (0) - -/* (ku*94)+ten value -> Shift-JIS byte sequence */ -#define CODE2JIS(c1,c2,s1,s2) \ - c1 = (s1)/94+0x21; \ - c2 = (s1)-94*((c1)-0x21)+0x21; \ - s1 = ((c1) << 8) | (c2); \ - s2 = 1 - -#define ASCII 0 -#define JISX0201_KANA 0x20 -#define JISX0208_KANJI 0x80 - -#define EMIT_KEYPAD_EMOJI(c) do { *snd = (c); return 0x20E3; } while(0) -#define EMIT_FLAG_EMOJI(country) do { *snd = NFLAGS((country)[0]); return NFLAGS((country)[1]); } while(0) - -static const char nflags_kddi[6][2] = {"FR", "DE", "IT", "GB", "CN", "KR"}; - -static inline int convert_emoji_cp(int cp) -{ - if (cp > 0xF000) - return cp + 0x10000; - if (cp > 0xE000) - return cp + 0xF0000; - return cp; -} - -static int mbfilter_sjis_emoji_kddi2unicode(int s, int *snd) -{ - if (s >= mb_tbl_code2uni_kddi1_min && s <= mb_tbl_code2uni_kddi1_max) { - if (s == 0x24C0) { /* Spain */ - EMIT_FLAG_EMOJI("ES"); - } else if (s == 0x24C1) { /* Russia */ - EMIT_FLAG_EMOJI("RU"); - } else if (s >= 0x2545 && s <= 0x254A) { - EMIT_FLAG_EMOJI(nflags_kddi[s - 0x2545]); - } else if (s == 0x25BC) { - EMIT_KEYPAD_EMOJI('#'); - } else { - *snd = 0; - return convert_emoji_cp(mb_tbl_code2uni_kddi1[s - mb_tbl_code2uni_kddi1_min]); - } - } else if (s >= mb_tbl_code2uni_kddi2_min && s <= mb_tbl_code2uni_kddi2_max) { - if (s == 0x2750) { /* Japan */ - EMIT_FLAG_EMOJI("JP"); - } else if (s >= 0x27A6 && s <= 0x27AE) { - EMIT_KEYPAD_EMOJI(s - 0x27A6 + '1'); - } else if (s == 0x27F7) { /* United States */ - EMIT_FLAG_EMOJI("US"); - } else if (s == 0x2830) { - EMIT_KEYPAD_EMOJI('0'); - } else { - *snd = 0; - return convert_emoji_cp(mb_tbl_code2uni_kddi2[s - mb_tbl_code2uni_kddi2_min]); - } - } - return 0; -} - -static int mbfl_filt_conv_2022jp_mobile_wchar(int c, mbfl_convert_filter *filter) -{ - int c1, s, w, snd = 0; - - switch (filter->status & 0xF) { - case 0: - if (c == 0x1B) { - filter->status += 2; - } else if (filter->status == JISX0201_KANA && c > 0x20 && c < 0x60) { - CK((*filter->output_function)(0xFF40 + c, filter->data)); - } else if (filter->status == JISX0208_KANJI && c > 0x20 && c < 0x80) { - filter->cache = c; - filter->status += 1; - } else if (c >= 0 && c < 0x80) { /* ASCII */ - CK((*filter->output_function)(c, filter->data)); - } else if (c > 0xA0 && c < 0xE0) { /* Kana */ - CK((*filter->output_function)(0xFEC0 + c, filter->data)); - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - /* JISX 0208, second byte */ - case 1: - w = 0; - filter->status &= ~0xF; - c1 = filter->cache; - if (c > 0x20 && c < 0x7F) { - s = ((c1 - 0x21) * 94) + c - 0x21; - - if (s <= 137) { - if (s == 31) { - w = 0xFF3C; /* FULLWIDTH REVERSE SOLIDUS */ - } else if (s == 32) { - w = 0xFF5E; /* FULLWIDTH TILDE */ - } else if (s == 33) { - w = 0x2225; /* PARALLEL TO */ - } else if (s == 60) { - w = 0xFF0D; /* FULLWIDTH HYPHEN-MINUS */ - } else if (s == 80) { - w = 0xFFE0; /* FULLWIDTH CENT SIGN */ - } else if (s == 81) { - w = 0xFFE1; /* FULLWIDTH POUND SIGN */ - } else if (s == 137) { - w = 0xFFE2; /* FULLWIDTH NOT SIGN */ - } - } - - if (s >= (84 * 94) && s < (91 * 94)) { - s += 22 * 94; - w = mbfilter_sjis_emoji_kddi2unicode(s, &snd); - if (w > 0 && snd > 0) { - (*filter->output_function)(snd, filter->data); - } - } - - if (w == 0) { - if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) { - w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min]; - } else if (s >= 0 && s < jisx0208_ucs_table_size) { - w = jisx0208_ucs_table[s]; - } - } - - if (w <= 0) { - w = MBFL_BAD_INPUT; - } - CK((*filter->output_function)(w, filter->data)); - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - /* ESC */ - case 2: - if (c == '$') { - filter->status++; - } else if (c == '(') { - filter->status += 3; - } else { - filter->status &= ~0xF; - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - /* ESC $ */ - case 3: - if (c == '@' || c == 'B') { - filter->status = JISX0208_KANJI; - } else if (c == '(') { - filter->status++; - } else { - filter->status &= ~0xF; - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - /* ESC $ ( */ - case 4: - if (c == '@' || c == 'B') { - filter->status = JISX0208_KANJI; - } else { - filter->status &= ~0xF; - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - /* ESC ( */ - case 5: - if (c == 'B' || c == 'J') { - filter->status = 0; /* ASCII mode */ - } else if (c == 'I') { - filter->status = JISX0201_KANA; - } else { - filter->status &= ~0xF; - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - } - - return 0; -} - -static int mbfl_filt_conv_2022jp_mobile_wchar_flush(mbfl_convert_filter *filter) -{ - if (filter->status & 0xF) { - (*filter->output_function)(MBFL_BAD_INPUT, filter->data); - } - filter->status = 0; - - if (filter->flush_function) { - (*filter->flush_function)(filter->data); - } - - return 0; -} - -static int mbfilter_unicode2sjis_emoji_kddi(int c, int *s1, mbfl_convert_filter *filter) -{ - if ((filter->status & 0xF) == 1) { - int c1 = filter->cache; - filter->cache = 0; - filter->status &= ~0xFF; - if (c == 0x20E3) { - if (c1 == '#') { - *s1 = 0x25BC; - } else if (c1 == '0') { - *s1 = 0x2830; - } else { /* Previous character was '1'-'9' */ - *s1 = 0x27A6 + (c1 - '1'); - } - return 1; - } else { - if (filter->status & 0xFF00) { - CK((*filter->output_function)(0x1B, filter->data)); /* ESC */ - CK((*filter->output_function)('(', filter->data)); - CK((*filter->output_function)('B', filter->data)); - } - CK((*filter->output_function)(c1, filter->data)); - filter->status = 0; - } - } - - if (c == '#' || (c >= '0' && c <= '9')) { - filter->status |= 1; - filter->cache = c; - return 0; - } - - if (c == 0xA9) { /* Copyright sign */ - *s1 = 0x27DC; - return 1; - } else if (c == 0xAE) { /* Registered sign */ - *s1 = 0x27DD; - return 1; - } else if (c >= mb_tbl_uni_kddi2code2_min && c <= mb_tbl_uni_kddi2code2_max) { - int i = mbfl_bisec_srch2(c, mb_tbl_uni_kddi2code2_key, mb_tbl_uni_kddi2code2_len); - if (i >= 0) { - *s1 = mb_tbl_uni_kddi2code2_value[i]; - return 1; - } - } else if (c >= mb_tbl_uni_kddi2code3_min && c <= mb_tbl_uni_kddi2code3_max) { - int i = mbfl_bisec_srch2(c - 0x10000, mb_tbl_uni_kddi2code3_key, mb_tbl_uni_kddi2code3_len); - if (i >= 0) { - *s1 = mb_tbl_uni_kddi2code3_value[i]; - return 1; - } - } else if (c >= mb_tbl_uni_kddi2code5_min && c <= mb_tbl_uni_kddi2code5_max) { - int i = mbfl_bisec_srch2(c - 0xF0000, mb_tbl_uni_kddi2code5_key, mb_tbl_uni_kddi2code5_len); - if (i >= 0) { - *s1 = mb_tbl_uni_kddi2code5_val[i]; - return 1; - } - } - return 0; -} - -static int mbfl_filt_conv_wchar_2022jp_mobile(int c, mbfl_convert_filter *filter) -{ - int c1, c2, s1 = 0, s2 = 0; - - if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) { - s1 = ucs_a1_jis_table[c - ucs_a1_jis_table_min]; - } else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) { - s1 = ucs_a2_jis_table[c - ucs_a2_jis_table_min]; - } else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) { - s1 = ucs_i_jis_table[c - ucs_i_jis_table_min]; - } else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) { - s1 = ucs_r_jis_table[c - ucs_r_jis_table_min]; - } - - if (s1 <= 0) { - if (c == 0xA5) { /* YEN SIGN */ - s1 = 0x216F; /* FULLWIDTH YEN SIGN */ - } else if (c == 0xFF3C) { /* FULLWIDTH REVERSE SOLIDUS */ - s1 = 0x2140; - } else if (c == 0x2225) { /* PARALLEL TO */ - s1 = 0x2142; - } else if (c == 0xFF0D) { /* FULLWIDTH HYPHEN-MINUS */ - s1 = 0x215d; - } else if (c == 0xFFE0) { /* FULLWIDTH CENT SIGN */ - s1 = 0x2171; - } else if (c == 0xFFE1) { /* FULLWIDTH POUND SIGN */ - s1 = 0x2172; - } else if (c == 0xFFE2) { /* FULLWIDTH NOT SIGN */ - s1 = 0x224c; - } - } - - if (mbfilter_unicode2sjis_emoji_kddi(c, &s1, filter) > 0) { - /* A KDDI emoji was detected and stored in s1 */ - CODE2JIS(c1,c2,s1,s2); - s1 -= 0x1600; - } else if ((filter->status & 0xFF) == 1 && filter->cache) { - /* We are just processing one of KDDI's special emoji for a phone keypad button */ - return 0; - } - - if ((s1 <= 0) || (s1 >= 0xa1a1 && s2 == 0)) { /* not found or X 0212 */ - s1 = -1; - for (c1 = 0; c1 < cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; c1++) { - if (c == cp932ext1_ucs_table[c1]) { - s1 = (((c1 / 94) + 0x2D) << 8) + (c1 % 94) + 0x21; - break; - } - } - - if (c == 0) { - s1 = 0; - } - } - - if (s1 >= 0) { - if (s1 < 0x80) { /* ASCII */ - if (filter->status & 0xFF00) { - CK((*filter->output_function)(0x1B, filter->data)); /* ESC */ - CK((*filter->output_function)('(', filter->data)); - CK((*filter->output_function)('B', filter->data)); - } - CK((*filter->output_function)(s1, filter->data)); - filter->status = 0; - } else if (s1 > 0xA0 && s1 < 0xE0) { /* Kana */ - if ((filter->status & 0xFF00) != 0x100) { - CK((*filter->output_function)(0x1B, filter->data)); /* ESC */ - CK((*filter->output_function)('(', filter->data)); - CK((*filter->output_function)('I', filter->data)); - } - filter->status = 0x100; - CK((*filter->output_function)(s1 & 0x7F, filter->data)); - } else if (s1 < 0x7E7F) { /* JIS X 0208 */ - if ((filter->status & 0xFF00) != 0x200) { - CK((*filter->output_function)(0x1B, filter->data)); /* ESC */ - CK((*filter->output_function)('$', filter->data)); - CK((*filter->output_function)('B', filter->data)); - } - filter->status = 0x200; - CK((*filter->output_function)((s1 >> 8) & 0xFF, filter->data)); - CK((*filter->output_function)(s1 & 0x7F, filter->data)); - } - } else { - CK(mbfl_filt_conv_illegal_output(c, filter)); - } - - return 0; -} - -static int mbfl_filt_conv_wchar_2022jp_mobile_flush(mbfl_convert_filter *filter) -{ - /* Go back to ASCII mode (so strings can be safely concatenated) */ - if (filter->status & 0xFF00) { - (*filter->output_function)(0x1B, filter->data); /* ESC */ - (*filter->output_function)('(', filter->data); - (*filter->output_function)('B', filter->data); - } - - int c1 = filter->cache; - if ((filter->status & 0xFF) == 1 && (c1 == '#' || (c1 >= '0' && c1 <= '9'))) { - (*filter->output_function)(c1, filter->data); - } - filter->status = filter->cache = 0; - - if (filter->flush_function) { - (*filter->flush_function)(filter->data); - } - - return 0; -} - -static size_t mb_iso2022jp_kddi_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) -{ - unsigned char *p = *in, *e = p + *in_len; - uint32_t *out = buf, *limit = buf + bufsize - 1; - - while (p < e && out < limit) { - unsigned char c = *p++; - - if (c == 0x1B) { - if ((e - p) < 2) { - p = e; - *out++ = MBFL_BAD_INPUT; - break; - } - unsigned char c2 = *p++; - unsigned char c3 = *p++; - - if (c2 == '$') { - if (c3 == '@' || c3 == 'B') { - *state = JISX0208_KANJI; - } else if (c3 == '(') { - if (p == e) { - *out++ = MBFL_BAD_INPUT; - break; - } - unsigned char c4 = *p++; - - if (c4 == '@' || c4 == 'B') { - *state = JISX0208_KANJI; - } else { - *out++ = MBFL_BAD_INPUT; - } - } else { - *out++ = MBFL_BAD_INPUT; - } - } else if (c2 == '(') { - if (c3 == 'B' || c3 == 'J') { - *state = ASCII; - } else if (c3 == 'I') { - *state = JISX0201_KANA; - } else { - *out++ = MBFL_BAD_INPUT; - } - } else { - p--; - *out++ = MBFL_BAD_INPUT; - } - } else if (*state == JISX0201_KANA && c >= 0x21 && c <= 0x5F) { - *out++ = 0xFF40 + c; - } else if (*state == JISX0208_KANJI && c >= 0x21 && c <= 0x7F) { - if (p == e) { - *out++ = MBFL_BAD_INPUT; - break; - } - unsigned char c2 = *p++; - - if (c2 >= 0x21 && c2 <= 0x7E) { - unsigned int s = ((c - 0x21) * 94) + c2 - 0x21; - uint32_t w = 0; - - if (s <= 137) { - if (s == 31) { - w = 0xFF3C; /* FULLWIDTH REVERSE SOLIDUS */ - } else if (s == 32) { - w = 0xFF5E; /* FULLWIDTH TILDE */ - } else if (s == 33) { - w = 0x2225; /* PARALLEL TO */ - } else if (s == 60) { - w = 0xFF0D; /* FULLWIDTH HYPHEN-MINUS */ - } else if (s == 80) { - w = 0xFFE0; /* FULLWIDTH CENT SIGN */ - } else if (s == 81) { - w = 0xFFE1; /* FULLWIDTH POUND SIGN */ - } else if (s == 137) { - w = 0xFFE2; /* FULLWIDTH NOT SIGN */ - } - } - - if (s >= (84 * 94) && s < (91 * 94)) { - int snd = 0; - s += 22 * 94; - w = mbfilter_sjis_emoji_kddi2unicode(s, &snd); - if (w && snd) { - *out++ = snd; - } - } - - if (!w) { - if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) { - w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min]; - } else if (s < jisx0208_ucs_table_size) { - w = jisx0208_ucs_table[s]; - } - } - - *out++ = w ? w : MBFL_BAD_INPUT; - } else { - *out++ = MBFL_BAD_INPUT; - } - } else if (c <= 0x7F) { - *out++ = c; - } else if (c >= 0xA1 && c <= 0xDF) { - *out++ = 0xFEC0 + c; - } else { - *out++ = MBFL_BAD_INPUT; - } - } - - *in_len = e - p; - *in = p; - return out - buf; -} - -static void mb_wchar_to_iso2022jp_kddi(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) -{ - unsigned char *out, *limit; - MB_CONVERT_BUF_LOAD(buf, out, limit); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len); - - while (len--) { - uint32_t w = *in++; - unsigned int s = 0; - - if (w >= ucs_a1_jis_table_min && w < ucs_a1_jis_table_max) { - s = ucs_a1_jis_table[w - ucs_a1_jis_table_min]; - } else if (w >= ucs_a2_jis_table_min && w < ucs_a2_jis_table_max) { - s = ucs_a2_jis_table[w - ucs_a2_jis_table_min]; - } else if (w >= ucs_i_jis_table_min && w < ucs_i_jis_table_max) { - s = ucs_i_jis_table[w - ucs_i_jis_table_min]; - } else if (w >= ucs_r_jis_table_min && w < ucs_r_jis_table_max) { - s = ucs_r_jis_table[w - ucs_r_jis_table_min]; - } - - if (!s) { - if (w == 0xA5) { /* YEN SIGN */ - s = 0x216F; /* FULLWIDTH YEN SIGN */ - } else if (w == 0xFF3C) { /* FULLWIDTH REVERSE SOLIDUS */ - s = 0x2140; - } else if (w == 0x2225) { /* PARALLEL TO */ - s = 0x2142; - } else if (w == 0xFF0D) { /* FULLWIDTH HYPHEN-MINUS */ - s = 0x215D; - } else if (w == 0xFFE0) { /* FULLWIDTH CENT SIGN */ - s = 0x2171; - } else if (w == 0xFFE1) { /* FULLWIDTH POUND SIGN */ - s = 0x2172; - } else if (w == 0xFFE2) { /* FULLWIDTH NOT SIGN */ - s = 0x224C; - } - } - - if ((w == '#' || (w >= '0' && w <= '9')) && len) { - uint32_t w2 = *in++; len--; - - if (w2 == 0x20E3) { - unsigned int s1 = 0; - if (w == '#') { - s1 = 0x25BC; - } else if (w == '0') { - s1 = 0x2830; - } else { /* Previous character was '1'-'9' */ - s1 = 0x27A6 + (w - '1'); - } - s = (((s1 / 94) + 0x21) << 8) + ((s1 % 94) + 0x21) - 0x1600; - } else { - in--; len++; - } - } else if (w >= NFLAGS('C') && w <= NFLAGS('U') && len) { /* C for CN, U for US */ - uint32_t w2 = *in++; len--; - - if (w2 >= NFLAGS('B') && w2 <= NFLAGS('U')) { /* B for GB, U for RU */ - for (int i = 0; i < 10; i++) { - if (w == NFLAGS(nflags_s[i][0]) && w2 == NFLAGS(nflags_s[i][1])) { - unsigned int s1 = nflags_code_kddi[i]; - s = (((s1 / 94) + 0x21) << 8) + ((s1 % 94) + 0x21) - 0x1600; - goto found_flag_emoji; - } - } - } - - in--; len++; -found_flag_emoji: ; - } - - if (w == 0xA9) { /* Copyright sign */ - unsigned int s1 = 0x27DC; - s = (((s1 / 94) + 0x21) << 8) + ((s1 % 94) + 0x21) - 0x1600; - } else if (w == 0xAE) { /* Registered sign */ - unsigned int s1 = 0x27DD; - s = (((s1 / 94) + 0x21) << 8) + ((s1 % 94) + 0x21) - 0x1600; - } else if (w >= mb_tbl_uni_kddi2code2_min && w <= mb_tbl_uni_kddi2code2_max) { - int i = mbfl_bisec_srch2(w, mb_tbl_uni_kddi2code2_key, mb_tbl_uni_kddi2code2_len); - if (i >= 0) { - unsigned int s1 = mb_tbl_uni_kddi2code2_value[i]; - s = (((s1 / 94) + 0x21) << 8) + ((s1 % 94) + 0x21) - 0x1600; - } - } else if (w >= mb_tbl_uni_kddi2code3_min && w <= mb_tbl_uni_kddi2code3_max) { - int i = mbfl_bisec_srch2(w - 0x10000, mb_tbl_uni_kddi2code3_key, mb_tbl_uni_kddi2code3_len); - if (i >= 0) { - unsigned int s1 = mb_tbl_uni_kddi2code3_value[i]; - s = (((s1 / 94) + 0x21) << 8) + ((s1 % 94) + 0x21) - 0x1600; - } - } else if (w >= mb_tbl_uni_kddi2code5_min && w <= mb_tbl_uni_kddi2code5_max) { - int i = mbfl_bisec_srch2(w - 0xF0000, mb_tbl_uni_kddi2code5_key, mb_tbl_uni_kddi2code5_len); - if (i >= 0) { - unsigned int s1 = mb_tbl_uni_kddi2code5_val[i]; - s = (((s1 / 94) + 0x21) << 8) + ((s1 % 94) + 0x21) - 0x1600; - } - } - - if (!s || s >= 0xA1A1) { - s = 0; - for (int i = 0; i < cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; i++) { - if (w == cp932ext1_ucs_table[i]) { - s = (((i / 94) + 0x2D) << 8) + (i % 94) + 0x21; - break; - } - } - if (w == 0) - s = 0; - } - - if (!s && w) { - MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_iso2022jp_kddi); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len); - } else if (s <= 0x7F) { - if (buf->state != ASCII) { - MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 4); - out = mb_convert_buf_add3(out, 0x1B, '(', 'B'); - buf->state = ASCII; - } - out = mb_convert_buf_add(out, s); - } else if (s >= 0xA1 && s <= 0xDF) { - if (buf->state != JISX0201_KANA) { - MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 4); - out = mb_convert_buf_add3(out, 0x1B, '(', 'I'); - buf->state = JISX0201_KANA; - } - out = mb_convert_buf_add(out, s & 0x7F); - } else if (s <= 0x7E7E) { - if (buf->state != JISX0208_KANJI) { - MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 5); - out = mb_convert_buf_add3(out, 0x1B, '$', 'B'); - buf->state = JISX0208_KANJI; - } else { - MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2); - } - out = mb_convert_buf_add2(out, (s >> 8) & 0xFF, s & 0xFF); - } else { - MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_iso2022jp_kddi); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len); - } - } - - if (end && buf->state != ASCII) { - MB_CONVERT_BUF_ENSURE(buf, out, limit, 3); - out = mb_convert_buf_add3(out, 0x1B, '(', 'B'); - } - - MB_CONVERT_BUF_STORE(buf, out, limit); -} diff --git a/ext/mbstring/libmbfl/filters/mbfilter_iso2022jp_mobile.h b/ext/mbstring/libmbfl/filters/mbfilter_iso2022jp_mobile.h deleted file mode 100644 index c2beafde64726..0000000000000 --- a/ext/mbstring/libmbfl/filters/mbfilter_iso2022jp_mobile.h +++ /dev/null @@ -1,39 +0,0 @@ -/* - * "streamable kanji code filter and converter" - * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. - * - * LICENSE NOTICES - * - * This file is part of "streamable kanji code filter and converter", - * which is distributed under the terms of GNU Lesser General Public - * License (version 2) as published by the Free Software Foundation. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with "streamable kanji code filter and converter"; - * if not, write to the Free Software Foundation, Inc., 59 Temple Place, - * Suite 330, Boston, MA 02111-1307 USA - * - * The author of this file: - * - */ -/* - * The source code included in this files was separated from mbfilter_iso2022_jp_ms.h - * by Rui Hirokawa on 25 July 2011. - * - */ - -#ifndef MBFL_MBFILTER_ISO2022_JP_MOBILE_H -#define MBFL_MBFILTER_ISO2022_JP_MOBILE_H - -#include "mbfilter.h" - -extern const mbfl_encoding mbfl_encoding_2022jp_kddi; -extern const struct mbfl_convert_vtbl vtbl_2022jp_kddi_wchar; -extern const struct mbfl_convert_vtbl vtbl_wchar_2022jp_kddi; - -#endif /* MBFL_MBFILTER_ISO2022_JP_MOBILE_H */ diff --git a/ext/mbstring/libmbfl/filters/mbfilter_jis.c b/ext/mbstring/libmbfl/filters/mbfilter_jis.c deleted file mode 100644 index 80af0e695644c..0000000000000 --- a/ext/mbstring/libmbfl/filters/mbfilter_jis.c +++ /dev/null @@ -1,944 +0,0 @@ -/* - * "streamable kanji code filter and converter" - * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. - * - * LICENSE NOTICES - * - * This file is part of "streamable kanji code filter and converter", - * which is distributed under the terms of GNU Lesser General Public - * License (version 2) as published by the Free Software Foundation. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with "streamable kanji code filter and converter"; - * if not, write to the Free Software Foundation, Inc., 59 Temple Place, - * Suite 330, Boston, MA 02111-1307 USA - * - * The author of this file: - * - */ -/* - * The source code included in this files was separated from mbfilter_ja.c - * by moriyoshi koizumi on 4 dec 2002. - * - */ - -#include "mbfilter.h" -#include "mbfilter_jis.h" - -#include "unicode_table_cp932_ext.h" -#include "unicode_table_jis.h" - -static int mbfl_filt_conv_jis_wchar_flush(mbfl_convert_filter *filter); -static size_t mb_iso2022jp_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state); -static void mb_wchar_to_iso2022jp(uint32_t *in, size_t len, mb_convert_buf *buf, bool end); -static void mb_wchar_to_jis(uint32_t *in, size_t len, mb_convert_buf *buf, bool end); -static bool mb_check_iso2022jp(unsigned char *in, size_t in_len); -static bool mb_check_jis(unsigned char *in, size_t in_len); - -const mbfl_encoding mbfl_encoding_jis = { - mbfl_no_encoding_jis, - "JIS", - "ISO-2022-JP", - NULL, - NULL, - MBFL_ENCTYPE_GL_UNSAFE, - &vtbl_jis_wchar, - &vtbl_wchar_jis, - mb_iso2022jp_to_wchar, - mb_wchar_to_jis, - mb_check_jis -}; - -const mbfl_encoding mbfl_encoding_2022jp = { - mbfl_no_encoding_2022jp, - "ISO-2022-JP", - "ISO-2022-JP", - NULL, - NULL, - MBFL_ENCTYPE_GL_UNSAFE, - &vtbl_2022jp_wchar, - &vtbl_wchar_2022jp, - mb_iso2022jp_to_wchar, - mb_wchar_to_iso2022jp, - mb_check_iso2022jp -}; - -const struct mbfl_convert_vtbl vtbl_jis_wchar = { - mbfl_no_encoding_jis, - mbfl_no_encoding_wchar, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_jis_wchar, - mbfl_filt_conv_jis_wchar_flush, - NULL, -}; - -const struct mbfl_convert_vtbl vtbl_wchar_jis = { - mbfl_no_encoding_wchar, - mbfl_no_encoding_jis, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_wchar_jis, - mbfl_filt_conv_any_jis_flush, - NULL, -}; - -const struct mbfl_convert_vtbl vtbl_2022jp_wchar = { - mbfl_no_encoding_2022jp, - mbfl_no_encoding_wchar, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_jis_wchar, - mbfl_filt_conv_jis_wchar_flush, - NULL, -}; - -const struct mbfl_convert_vtbl vtbl_wchar_2022jp = { - mbfl_no_encoding_wchar, - mbfl_no_encoding_2022jp, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_wchar_2022jp, - mbfl_filt_conv_any_jis_flush, - NULL, -}; - -#define CK(statement) do { if ((statement) < 0) return (-1); } while (0) - -/* - * JIS => wchar - */ -int -mbfl_filt_conv_jis_wchar(int c, mbfl_convert_filter *filter) -{ - int c1, s, w; - -retry: - switch (filter->status & 0xf) { -/* case 0x00: ASCII */ -/* case 0x10: X 0201 latin */ -/* case 0x20: X 0201 kana */ -/* case 0x80: X 0208 */ -/* case 0x90: X 0212 */ - case 0: - if (c == 0x1b) { - filter->status += 2; - } else if (c == 0x0e) { /* "kana in" */ - filter->status = 0x20; - } else if (c == 0x0f) { /* "kana out" */ - filter->status = 0; - } else if (filter->status == 0x10 && c == 0x5c) { /* YEN SIGN */ - CK((*filter->output_function)(0xa5, filter->data)); - } else if (filter->status == 0x10 && c == 0x7e) { /* OVER LINE */ - CK((*filter->output_function)(0x203e, filter->data)); - } else if (filter->status == 0x20 && c > 0x20 && c < 0x60) { /* kana */ - CK((*filter->output_function)(0xff40 + c, filter->data)); - } else if ((filter->status == 0x80 || filter->status == 0x90) && c > 0x20 && c < 0x7f) { /* kanji first char */ - filter->cache = c; - filter->status += 1; - } else if (c >= 0 && c < 0x80) { /* latin, CTLs */ - CK((*filter->output_function)(c, filter->data)); - } else if (c > 0xa0 && c < 0xe0) { /* GR kana */ - CK((*filter->output_function)(0xfec0 + c, filter->data)); - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - -/* case 0x81: X 0208 second char */ -/* case 0x91: X 0212 second char */ - case 1: - filter->status &= ~0xf; - c1 = filter->cache; - if (c > 0x20 && c < 0x7f) { - s = (c1 - 0x21)*94 + c - 0x21; - if (filter->status == 0x80) { - if (s >= 0 && s < jisx0208_ucs_table_size) { - w = jisx0208_ucs_table[s]; - } else { - w = 0; - } - - if (w <= 0) { - w = MBFL_BAD_INPUT; - } - } else { - if (s >= 0 && s < jisx0212_ucs_table_size) { - w = jisx0212_ucs_table[s]; - } else { - w = 0; - } - - if (w <= 0) { - w = MBFL_BAD_INPUT; - } - } - CK((*filter->output_function)(w, filter->data)); - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - /* ESC */ -/* case 0x02: */ -/* case 0x12: */ -/* case 0x22: */ -/* case 0x82: */ -/* case 0x92: */ - case 2: - if (c == 0x24) { /* '$' */ - filter->status++; - } else if (c == 0x28) { /* '(' */ - filter->status += 3; - } else { - filter->status &= ~0xf; - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - goto retry; - } - break; - - /* ESC $ */ -/* case 0x03: */ -/* case 0x13: */ -/* case 0x23: */ -/* case 0x83: */ -/* case 0x93: */ - case 3: - if (c == 0x40 || c == 0x42) { /* '@' or 'B' */ - filter->status = 0x80; - } else if (c == 0x28) { /* '(' */ - filter->status++; - } else { - filter->status &= ~0xf; - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - CK((*filter->output_function)(0x24, filter->data)); - goto retry; - } - break; - - /* ESC $ ( */ -/* case 0x04: */ -/* case 0x14: */ -/* case 0x24: */ -/* case 0x84: */ -/* case 0x94: */ - case 4: - if (c == 0x40 || c == 0x42) { /* '@' or 'B' */ - filter->status = 0x80; - } else if (c == 0x44) { /* 'D' */ - filter->status = 0x90; - } else { - filter->status &= ~0xf; - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - CK((*filter->output_function)(0x24, filter->data)); - CK((*filter->output_function)(0x28, filter->data)); - goto retry; - } - break; - - /* ESC ( */ -/* case 0x05: */ -/* case 0x15: */ -/* case 0x25: */ -/* case 0x85: */ -/* case 0x95: */ - case 5: - if (c == 0x42 || c == 0x48) { /* 'B' or 'H' */ - filter->status = 0; - } else if (c == 0x4a) { /* 'J' */ - filter->status = 0x10; - } else if (c == 0x49) { /* 'I' */ - filter->status = 0x20; - } else { - filter->status &= ~0xf; - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - CK((*filter->output_function)(0x28, filter->data)); - goto retry; - } - break; - - EMPTY_SWITCH_DEFAULT_CASE(); - } - - return 0; -} - -static int mbfl_filt_conv_jis_wchar_flush(mbfl_convert_filter *filter) -{ - if (filter->status & 0xF) { - /* 2-byte (JIS X 0208 or 0212) character was truncated, - * or else escape sequence was truncated */ - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - filter->status = 0; - - if (filter->flush_function) { - (*filter->flush_function)(filter->data); - } - - return 0; -} - -/* - * wchar => JIS - */ -int -mbfl_filt_conv_wchar_jis(int c, mbfl_convert_filter *filter) -{ - int s = 0; - - if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) { - s = ucs_a1_jis_table[c - ucs_a1_jis_table_min]; - } else if (c == 0x203E) { /* OVERLINE */ - s = 0x1007E; /* Convert to JISX 0201 OVERLINE */ - } else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) { - s = ucs_a2_jis_table[c - ucs_a2_jis_table_min]; - } else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) { - s = ucs_i_jis_table[c - ucs_i_jis_table_min]; - } else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) { - s = ucs_r_jis_table[c - ucs_r_jis_table_min]; - } - if (s <= 0) { - if (c == 0xa5) { /* YEN SIGN */ - s = 0x1005c; - } else if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */ - s = 0x2140; - } else if (c == 0x2225) { /* PARALLEL TO */ - s = 0x2142; - } else if (c == 0xff0d) { /* FULLWIDTH HYPHEN-MINUS */ - s = 0x215d; - } else if (c == 0xffe0) { /* FULLWIDTH CENT SIGN */ - s = 0x2171; - } else if (c == 0xffe1) { /* FULLWIDTH POUND SIGN */ - s = 0x2172; - } else if (c == 0xffe2) { /* FULLWIDTH NOT SIGN */ - s = 0x224c; - } - if (c == 0) { - s = 0; - } else if (s <= 0) { - s = -1; - } - } - if (s >= 0) { - if (s < 0x80) { /* ASCII */ - if ((filter->status & 0xff00) != 0) { - CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ - CK((*filter->output_function)(0x28, filter->data)); /* '(' */ - CK((*filter->output_function)(0x42, filter->data)); /* 'B' */ - } - filter->status = 0; - CK((*filter->output_function)(s, filter->data)); - } else if (s < 0x8080) { /* X 0208 */ - if ((filter->status & 0xff00) != 0x200) { - CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ - CK((*filter->output_function)(0x24, filter->data)); /* '$' */ - CK((*filter->output_function)(0x42, filter->data)); /* 'B' */ - } - filter->status = 0x200; - CK((*filter->output_function)((s >> 8) & 0x7f, filter->data)); - CK((*filter->output_function)(s & 0x7f, filter->data)); - } else if (s < 0x10000) { /* X 0212 */ - if ((filter->status & 0xff00) != 0x300) { - CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ - CK((*filter->output_function)(0x24, filter->data)); /* '$' */ - CK((*filter->output_function)(0x28, filter->data)); /* '(' */ - CK((*filter->output_function)(0x44, filter->data)); /* 'D' */ - } - filter->status = 0x300; - CK((*filter->output_function)((s >> 8) & 0x7f, filter->data)); - CK((*filter->output_function)(s & 0x7f, filter->data)); - } else { /* X 0201 latin */ - if ((filter->status & 0xff00) != 0x400) { - CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ - CK((*filter->output_function)(0x28, filter->data)); /* '(' */ - CK((*filter->output_function)(0x4a, filter->data)); /* 'J' */ - } - filter->status = 0x400; - CK((*filter->output_function)(s & 0x7f, filter->data)); - } - } else { - CK(mbfl_filt_conv_illegal_output(c, filter)); - } - - return 0; -} - - -/* - * wchar => ISO-2022-JP - */ -int -mbfl_filt_conv_wchar_2022jp(int c, mbfl_convert_filter *filter) -{ - int s; - - s = 0; - if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) { - s = ucs_a1_jis_table[c - ucs_a1_jis_table_min]; - } else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) { - s = ucs_a2_jis_table[c - ucs_a2_jis_table_min]; - } else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) { - s = ucs_i_jis_table[c - ucs_i_jis_table_min]; - } else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) { - s = ucs_r_jis_table[c - ucs_r_jis_table_min]; - } - - if (s <= 0) { - if (c == 0xa5) { /* YEN SIGN */ - s = 0x1005c; - } else if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */ - s = 0x2140; - } else if (c == 0x2225) { /* PARALLEL TO */ - s = 0x2142; - } else if (c == 0xff0d) { /* FULLWIDTH HYPHEN-MINUS */ - s = 0x215d; - } else if (c == 0xffe0) { /* FULLWIDTH CENT SIGN */ - s = 0x2171; - } else if (c == 0xffe1) { /* FULLWIDTH POUND SIGN */ - s = 0x2172; - } else if (c == 0xffe2) { /* FULLWIDTH NOT SIGN */ - s = 0x224c; - } - if (c == 0) { - s = 0; - } else if (s <= 0) { - s = -1; - } - } else if ((s >= 0x80 && s < 0x2121) || (s > 0x8080)) { - s = -1; - } - if (s >= 0) { - if (s < 0x80) { /* ASCII */ - if ((filter->status & 0xff00) != 0) { - CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ - CK((*filter->output_function)(0x28, filter->data)); /* '(' */ - CK((*filter->output_function)(0x42, filter->data)); /* 'B' */ - } - filter->status = 0; - CK((*filter->output_function)(s, filter->data)); - } else if (s < 0x10000) { /* X 0208 */ - if ((filter->status & 0xff00) != 0x200) { - CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ - CK((*filter->output_function)(0x24, filter->data)); /* '$' */ - CK((*filter->output_function)(0x42, filter->data)); /* 'B' */ - } - filter->status = 0x200; - CK((*filter->output_function)((s >> 8) & 0x7f, filter->data)); - CK((*filter->output_function)(s & 0x7f, filter->data)); - } else { /* X 0201 latin */ - if ((filter->status & 0xff00) != 0x400) { - CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ - CK((*filter->output_function)(0x28, filter->data)); /* '(' */ - CK((*filter->output_function)(0x4a, filter->data)); /* 'J' */ - } - filter->status = 0x400; - CK((*filter->output_function)(s & 0x7f, filter->data)); - } - } else { - CK(mbfl_filt_conv_illegal_output(c, filter)); - } - - return 0; -} - -int -mbfl_filt_conv_any_jis_flush(mbfl_convert_filter *filter) -{ - /* back to latin */ - if ((filter->status & 0xff00) != 0) { - CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ - CK((*filter->output_function)(0x28, filter->data)); /* '(' */ - CK((*filter->output_function)(0x42, filter->data)); /* 'B' */ - } - filter->status = 0; - - if (filter->flush_function != NULL) { - return (*filter->flush_function)(filter->data); - } - - return 0; -} - -#define ASCII 0 -#define JISX_0201_LATIN 1 -#define JISX_0201_KANA 2 -#define JISX_0208 3 -#define JISX_0212 4 - -static size_t mb_iso2022jp_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) -{ - ZEND_ASSERT(bufsize >= 3); - - unsigned char *p = *in, *e = p + *in_len; - uint32_t *out = buf, *limit = buf + bufsize; - - while (p < e && out < limit) { - unsigned char c = *p++; - - if (c == 0x1B) { - /* ESC seen; this is an escape sequence */ - if ((e - p) < 2) { - *out++ = MBFL_BAD_INPUT; - if (p != e && (*p == '$' || *p == '(')) - p++; - continue; - } - - unsigned char c2 = *p++; - if (c2 == '$') { - unsigned char c3 = *p++; - if (c3 == '@' || c3 == 'B') { - *state = JISX_0208; - } else if (c3 == '(') { - if (p == e) { - *out++ = MBFL_BAD_INPUT; - break; - } - unsigned char c4 = *p++; - if (c4 == '@' || c4 == 'B') { - *state = JISX_0208; - } else if (c4 == 'D') { - *state = JISX_0212; - } else { - if ((limit - out) < 3) { - p -= 4; - break; - } - *out++ = MBFL_BAD_INPUT; - *out++ = '$'; - *out++ = '('; - p--; - } - } else { - if ((limit - out) < 2) { - p -= 3; - break; - } - *out++ = MBFL_BAD_INPUT; - *out++ = '$'; - p--; - } - } else if (c2 == '(') { - unsigned char c3 = *p++; - if (c3 == 'B' || c3 == 'H') { - *state = ASCII; - } else if (c3 == 'J') { - *state = JISX_0201_LATIN; - } else if (c3 == 'I') { - *state = JISX_0201_KANA; - } else { - if ((limit - out) < 2) { - p -= 3; - break; - } - *out++ = MBFL_BAD_INPUT; - *out++ = '('; - p--; - } - } else { - *out++ = MBFL_BAD_INPUT; - p--; - } - } else if (c == 0xE) { - /* "Kana In" marker; this is just for JIS-7/8, but we also accept it for ISO-2022-JP */ - *state = JISX_0201_KANA; - } else if (c == 0xF) { - /* "Kana Out" marker */ - *state = ASCII; - } else if (*state == JISX_0201_LATIN && c == 0x5C) { /* YEN SIGN */ - *out++ = 0xA5; - } else if (*state == JISX_0201_LATIN && c == 0x7E) { /* OVER LINE */ - *out++ = 0x203E; - } else if (*state == JISX_0201_KANA && c > 0x20 && c < 0x60) { - *out++ = 0xFF40 + c; - } else if (*state >= JISX_0208 && c > 0x20 && c < 0x7F) { - if (p == e) { - *out++ = MBFL_BAD_INPUT; - break; - } - unsigned char c2 = *p++; - if (c2 > 0x20 && c2 < 0x7F) { - unsigned int s = (c - 0x21)*94 + c2 - 0x21; - uint32_t w = 0; - if (*state == JISX_0208) { - if (s < jisx0208_ucs_table_size) { - w = jisx0208_ucs_table[s]; - } - if (!w) { - w = MBFL_BAD_INPUT; - } - } else { - if (s < jisx0212_ucs_table_size) { - w = jisx0212_ucs_table[s]; - } - if (!w) { - w = MBFL_BAD_INPUT; - } - } - *out++ = w; - } else { - *out++ = MBFL_BAD_INPUT; - } - } else if (c < 0x80) { - *out++ = c; - } else if (c >= 0xA1 && c <= 0xDF) { - /* GR-invoked Kana; "GR" stands for "graphics right" and refers to bytes - * with the MSB bit (in the context of ISO-2022 encoding). - * - * In this regard, Wikipedia states: - * "Other, older variants known as JIS7 and JIS8 build directly on the 7-bit and 8-bit - * encodings defined by JIS X 0201 and allow use of JIS X 0201 kana from G1 without - * escape sequences, using Shift Out and Shift In or setting the eighth bit - * (GR-invoked), respectively." - * - * Note that we support both the 'JIS7' use of 0xE/0xF Shift In/Shift Out codes - * and the 'JIS8' use of GR-invoked Kana */ - *out++ = 0xFEC0 + c; - } else { - *out++ = MBFL_BAD_INPUT; - } - } - - *in_len = e - p; - *in = p; - return out - buf; -} - -static void mb_wchar_to_iso2022jp(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) -{ - unsigned char *out, *limit; - MB_CONVERT_BUF_LOAD(buf, out, limit); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2); - - while (len--) { - uint32_t w = *in++; - unsigned int s = 0; - - if (w >= ucs_a1_jis_table_min && w < ucs_a1_jis_table_max) { - s = ucs_a1_jis_table[w - ucs_a1_jis_table_min]; - } else if (w >= ucs_a2_jis_table_min && w < ucs_a2_jis_table_max) { - s = ucs_a2_jis_table[w - ucs_a2_jis_table_min]; - } else if (w >= ucs_i_jis_table_min && w < ucs_i_jis_table_max) { - s = ucs_i_jis_table[w - ucs_i_jis_table_min]; - } else if (w >= ucs_r_jis_table_min && w < ucs_r_jis_table_max) { - s = ucs_r_jis_table[w - ucs_r_jis_table_min]; - } - - if (s == 0) { - if (w == 0xA5) { /* YEN SIGN */ - s = 0x1005C; - } else if (w == 0xFF3C) { /* FULLWIDTH REVERSE SOLIDUS */ - s = 0x2140; - } else if (w == 0x2225) { /* PARALLEL TO */ - s = 0x2142; - } else if (w == 0xFF0D) { /* FULLWIDTH HYPHEN-MINUS */ - s = 0x215D; - } else if (w == 0xFFE0) { /* FULLWIDTH CENT SIGN */ - s = 0x2171; - } else if (w == 0xFFE1) { /* FULLWIDTH POUND SIGN */ - s = 0x2172; - } else if (w == 0xFFE2) { /* FULLWIDTH NOT SIGN */ - s = 0x224C; - } else if (w != 0) { - MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_iso2022jp); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2); - continue; - } - } else if ((s >= 0x80 && s < 0x2121) || (s > 0x8080)) { - MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_iso2022jp); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2); - continue; - } - - if (s < 0x80) { /* ASCII */ - if (buf->state != ASCII) { - MB_CONVERT_BUF_ENSURE(buf, out, limit, (len * 2) + 4); - out = mb_convert_buf_add3(out, 0x1B, '(', 'B'); - buf->state = ASCII; - } - out = mb_convert_buf_add(out, s); - } else if (s < 0x8080) { /* JIS X 0208 */ - if (buf->state != JISX_0208) { - MB_CONVERT_BUF_ENSURE(buf, out, limit, (len * 2) + 5); - out = mb_convert_buf_add3(out, 0x1B, '$', 'B'); - buf->state = JISX_0208; - } - out = mb_convert_buf_add2(out, (s >> 8) & 0x7F, s & 0x7F); - } else if (s < 0x10000) { /* JIS X 0212 */ - if (buf->state != JISX_0212) { - MB_CONVERT_BUF_ENSURE(buf, out, limit, (len * 2) + 6); - out = mb_convert_buf_add4(out, 0x1B, '$', '(', 'D'); - buf->state = JISX_0212; - } - out = mb_convert_buf_add2(out, (s >> 8) & 0x7F, s & 0x7F); - } else { /* X 0201 Latin */ - if (buf->state != JISX_0201_LATIN) { - MB_CONVERT_BUF_ENSURE(buf, out, limit, (len * 2) + 4); - out = mb_convert_buf_add3(out, 0x1B, '(', 'J'); - buf->state = JISX_0201_LATIN; - } - out = mb_convert_buf_add(out, s & 0x7F); - } - } - - if (end && buf->state != ASCII) { - MB_CONVERT_BUF_ENSURE(buf, out, limit, 3); - out = mb_convert_buf_add3(out, 0x1B, '(', 'B'); - } - - MB_CONVERT_BUF_STORE(buf, out, limit); -} - -static void mb_wchar_to_jis(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) -{ - unsigned char *out, *limit; - MB_CONVERT_BUF_LOAD(buf, out, limit); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2); - - while (len--) { - uint32_t w = *in++; - unsigned int s = 0; - - if (w >= ucs_a1_jis_table_min && w < ucs_a1_jis_table_max) { - s = ucs_a1_jis_table[w - ucs_a1_jis_table_min]; - } else if (w == 0x203E) { /* OVERLINE */ - s = 0x1007E; /* Convert to JISX 0201 OVERLINE */ - } else if (w >= ucs_a2_jis_table_min && w < ucs_a2_jis_table_max) { - s = ucs_a2_jis_table[w - ucs_a2_jis_table_min]; - } else if (w >= ucs_i_jis_table_min && w < ucs_i_jis_table_max) { - s = ucs_i_jis_table[w - ucs_i_jis_table_min]; - } else if (w >= ucs_r_jis_table_min && w < ucs_r_jis_table_max) { - s = ucs_r_jis_table[w - ucs_r_jis_table_min]; - } - - if (s == 0) { - if (w == 0xA5) { /* YEN SIGN */ - s = 0x1005C; - } else if (w == 0xFF3C) { /* FULLWIDTH REVERSE SOLIDUS */ - s = 0x2140; - } else if (w == 0x2225) { /* PARALLEL TO */ - s = 0x2142; - } else if (w == 0xFF0D) { /* FULLWIDTH HYPHEN-MINUS */ - s = 0x215D; - } else if (w == 0xFFE0) { /* FULLWIDTH CENT SIGN */ - s = 0x2171; - } else if (w == 0xFFE1) { /* FULLWIDTH POUND SIGN */ - s = 0x2172; - } else if (w == 0xFFE2) { /* FULLWIDTH NOT SIGN */ - s = 0x224C; - } else if (w != 0) { - MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_iso2022jp); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2); - continue; - } - } - - if (s < 0x80) { /* ASCII */ - if (buf->state != ASCII) { - MB_CONVERT_BUF_ENSURE(buf, out, limit, (len * 2) + 4); - out = mb_convert_buf_add3(out, 0x1B, '(', 'B'); - buf->state = ASCII; - } - out = mb_convert_buf_add(out, s); - } else if (s >= 0xA1 && s <= 0xDF) { - if (buf->state != JISX_0201_KANA) { - MB_CONVERT_BUF_ENSURE(buf, out, limit, (len * 2) + 4); - out = mb_convert_buf_add3(out, 0x1B, '(', 'I'); - buf->state = JISX_0201_KANA; - } - out = mb_convert_buf_add(out, s & 0x7F); - } else if (s < 0x8080) { /* JIS X 0208 */ - if (buf->state != JISX_0208) { - MB_CONVERT_BUF_ENSURE(buf, out, limit, (len * 2) + 5); - out = mb_convert_buf_add3(out, 0x1B, '$', 'B'); - buf->state = JISX_0208; - } - out = mb_convert_buf_add2(out, (s >> 8) & 0x7F, s & 0x7F); - } else if (s < 0x10000) { /* JIS X 0212 */ - if (buf->state != JISX_0212) { - MB_CONVERT_BUF_ENSURE(buf, out, limit, (len * 2) + 6); - out = mb_convert_buf_add4(out, 0x1B, '$', '(', 'D'); - buf->state = JISX_0212; - } - out = mb_convert_buf_add2(out, (s >> 8) & 0x7F, s & 0x7F); - } else { /* X 0201 Latin */ - if (buf->state != JISX_0201_LATIN) { - MB_CONVERT_BUF_ENSURE(buf, out, limit, (len * 2) + 4); - out = mb_convert_buf_add3(out, 0x1B, '(', 'J'); - buf->state = JISX_0201_LATIN; - } - out = mb_convert_buf_add(out, s & 0x7F); - } - } - - if (end && buf->state != ASCII) { - MB_CONVERT_BUF_ENSURE(buf, out, limit, 3); - out = mb_convert_buf_add3(out, 0x1B, '(', 'B'); - } - - MB_CONVERT_BUF_STORE(buf, out, limit); -} - -#define JISX_0201_KANA_SO 5 - -static bool mb_check_jis(unsigned char *in, size_t in_len) -{ - unsigned char *p = in, *e = p + in_len; - unsigned int state = ASCII; - - while (p < e) { - unsigned char c = *p++; - if (c == 0x1B) { - /* ESC seen; this is an escape sequence */ - if (state == JISX_0201_KANA_SO) { - return false; - } - if ((e - p) < 2) { - return false; - } - unsigned char c2 = *p++; - if (c2 == '$') { - unsigned char c3 = *p++; - if (c3 == '@' || c3 == 'B') { - state = JISX_0208; - } else if (c3 == '(') { - if (p == e) { - return false; - } - unsigned char c4 = *p++; - if (c4 == '@' || c4 == 'B') { - state = JISX_0208; - } else if (c4 == 'D') { - state = JISX_0212; - } else { - return false; - } - } else { - return false; - } - } else if (c2 == '(') { - unsigned char c3 = *p++; - /* ESC ( H is treated as a sequence transitioning to ASCII for historical reasons. - * see https://github.com/php/php-src/pull/10828#issuecomment-1478342432. */ - if (c3 == 'B' || c3 == 'H') { - state = ASCII; - } else if (c3 == 'J') { - state = JISX_0201_LATIN; - } else if (c3 == 'I') { - state = JISX_0201_KANA; - } else { - return false; - } - } else { - return false; - } - } else if (c == 0xE) { - /* "Kana In" marker */ - if (state != ASCII) { - return false; - } - state = JISX_0201_KANA_SO; - } else if (c == 0xF) { - /* "Kana Out" marker */ - if (state != JISX_0201_KANA_SO) { - return false; - } - state = ASCII; - } else if ((state == JISX_0208 || state == JISX_0212) && (c > 0x20 && c < 0x7F)) { - if (p == e) { - return false; - } - unsigned char c2 = *p++; - if (c2 > 0x20 && c2 < 0x7F) { - unsigned int s = (c - 0x21)*94 + c2 - 0x21; - if (state == JISX_0208) { - if (s < jisx0208_ucs_table_size && jisx0208_ucs_table[s]) { - continue; - } - } else { - if (s < jisx0212_ucs_table_size && jisx0212_ucs_table[s]) { - continue; - } - } - return false; - } else { - return false; - } - } else if (c < 0x80) { - continue; - } else if (c >= 0xA1 && c <= 0xDF) { - /* GR-invoked Kana */ - continue; - } else { - return false; - } - } - - return state == ASCII; -} - - -static bool mb_check_iso2022jp(unsigned char *in, size_t in_len) -{ - unsigned char *p = in, *e = p + in_len; - unsigned int state = ASCII; - - while (p < e) { - unsigned char c = *p++; - if (c == 0x1B) { - /* ESC seen; this is an escape sequence */ - if ((e - p) < 2) { - return false; - } - unsigned char c2 = *p++; - if (c2 == '$') { - unsigned char c3 = *p++; - if (c3 == '@' || c3 == 'B') { - state = JISX_0208; - } else { - return false; - } - } else if (c2 == '(') { - unsigned char c3 = *p++; - if (c3 == 'B') { - state = ASCII; - } else if (c3 == 'J') { - state = JISX_0201_LATIN; - } else { - return false; - } - } else { - return false; - } - } else if (c == 0xE || c == 0xF) { - /* "Kana In" or "Kana Out" marker; ISO-2022-JP is not accepted. */ - return false; - } else if (state == JISX_0208 && (c > 0x20 && c < 0x7F)) { - if (p == e) { - return false; - } - unsigned char c2 = *p++; - if (c2 > 0x20 && c2 < 0x7F) { - unsigned int s = (c - 0x21)*94 + c2 - 0x21; - if (s < jisx0208_ucs_table_size && jisx0208_ucs_table[s]) { - continue; - } - return false; - } else { - return false; - } - } else if (c < 0x80) { - continue; - } else { - return false; - } - } - - return state == ASCII; -} diff --git a/ext/mbstring/libmbfl/filters/mbfilter_jis.h b/ext/mbstring/libmbfl/filters/mbfilter_jis.h deleted file mode 100644 index 55787c9acb7ac..0000000000000 --- a/ext/mbstring/libmbfl/filters/mbfilter_jis.h +++ /dev/null @@ -1,47 +0,0 @@ -/* - * "streamable kanji code filter and converter" - * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. - * - * LICENSE NOTICES - * - * This file is part of "streamable kanji code filter and converter", - * which is distributed under the terms of GNU Lesser General Public - * License (version 2) as published by the Free Software Foundation. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with "streamable kanji code filter and converter"; - * if not, write to the Free Software Foundation, Inc., 59 Temple Place, - * Suite 330, Boston, MA 02111-1307 USA - * - * The author of this file: - * - */ -/* - * The source code included in this files was separated from mbfilter_ja.c - * by moriyoshi koizumi on 4 dec 2002. - * - */ - -#ifndef MBFL_MBFILTER_JIS_H -#define MBFL_MBFILTER_JIS_H - -#include "mbfilter.h" - -extern const mbfl_encoding mbfl_encoding_jis; -extern const mbfl_encoding mbfl_encoding_2022jp; -extern const struct mbfl_convert_vtbl vtbl_jis_wchar; -extern const struct mbfl_convert_vtbl vtbl_wchar_jis; -extern const struct mbfl_convert_vtbl vtbl_2022jp_wchar; -extern const struct mbfl_convert_vtbl vtbl_wchar_2022jp; - -int mbfl_filt_conv_jis_wchar(int c, mbfl_convert_filter *filter); -int mbfl_filt_conv_wchar_jis(int c, mbfl_convert_filter *filter); -int mbfl_filt_conv_wchar_2022jp(int c, mbfl_convert_filter *filter); -int mbfl_filt_conv_any_jis_flush(mbfl_convert_filter *filter); - -#endif /* MBFL_MBFILTER_JIS_H */ diff --git a/ext/mbstring/libmbfl/filters/mbfilter_sjis.c b/ext/mbstring/libmbfl/filters/mbfilter_sjis.c deleted file mode 100644 index 4db34c56b0e57..0000000000000 --- a/ext/mbstring/libmbfl/filters/mbfilter_sjis.c +++ /dev/null @@ -1,2941 +0,0 @@ -/* - * "streamable kanji code filter and converter" - * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. - * - * LICENSE NOTICES - * - * This file is part of "streamable kanji code filter and converter", - * which is distributed under the terms of GNU Lesser General Public - * License (version 2) as published by the Free Software Foundation. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with "streamable kanji code filter and converter"; - * if not, write to the Free Software Foundation, Inc., 59 Temple Place, - * Suite 330, Boston, MA 02111-1307 USA - * - * The author of this file: - * - */ -/* - * The source code included in this file was separated from mbfilter_ja.c - * by moriyoshi koizumi on 4 dec 2002. - * - */ - -#include "mbfilter.h" -#include "mbfilter_sjis.h" -#include "mbfilter_sjis_mac.h" -#include "mbfilter_sjis_mobile.h" - -#define UNICODE_TABLE_CP932_DEF -#define UNICODE_TABLE_JIS_DEF - -#include "unicode_table_cp932_ext.h" -#include "unicode_table_jis.h" -#include "sjis_mac2uni.h" -#include "emoji2uni.h" - -extern int mbfl_bisec_srch2(int w, const unsigned short tbl[], int n); - -static int mbfl_filt_conv_sjis_wchar_flush(mbfl_convert_filter *filter); -static size_t mb_sjis_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state); -static void mb_wchar_to_sjis(uint32_t *in, size_t len, mb_convert_buf *buf, bool end); - -static int mbfl_filt_conv_sjis_mac_wchar(int c, mbfl_convert_filter *filter); -static int mbfl_filt_conv_wchar_sjis_mac(int c, mbfl_convert_filter *filter); -static int mbfl_filt_conv_wchar_sjis_mac_flush(mbfl_convert_filter *filter); -static size_t mb_sjismac_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state); -static void mb_wchar_to_sjismac(uint32_t *in, size_t len, mb_convert_buf *buf, bool end); - -static int mbfl_filt_conv_wchar_sjis_mobile(int c, mbfl_convert_filter *filter); -static int mbfl_filt_conv_sjis_mobile_wchar(int c, mbfl_convert_filter *filter); -static size_t mb_sjis_docomo_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state); -static void mb_wchar_to_sjis_docomo(uint32_t *in, size_t len, mb_convert_buf *buf, bool end); -static size_t mb_sjis_kddi_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state); -static void mb_wchar_to_sjis_kddi(uint32_t *in, size_t len, mb_convert_buf *buf, bool end); -static size_t mb_sjis_sb_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state); -static void mb_wchar_to_sjis_sb(uint32_t *in, size_t len, mb_convert_buf *buf, bool end); - -const unsigned char mblen_table_sjis[] = { /* 0x81-0x9F,0xE0-0xEF */ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 -}; - -const unsigned char mblen_table_sjismac[] = { /* 0x81-0x9F,0xE0-0xED */ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 -}; - -const unsigned char mblen_table_sjis_mobile[] = { /* 0x81-0x9F,0xE0-0xFC */ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1 -}; - -static const char *mbfl_encoding_sjis_aliases[] = {"x-sjis", "SHIFT-JIS", NULL}; - -const mbfl_encoding mbfl_encoding_sjis = { - mbfl_no_encoding_sjis, - "SJIS", - "Shift_JIS", - mbfl_encoding_sjis_aliases, - mblen_table_sjis, - MBFL_ENCTYPE_GL_UNSAFE, - &vtbl_sjis_wchar, - &vtbl_wchar_sjis, - mb_sjis_to_wchar, - mb_wchar_to_sjis, - NULL -}; - -const struct mbfl_convert_vtbl vtbl_sjis_wchar = { - mbfl_no_encoding_sjis, - mbfl_no_encoding_wchar, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_sjis_wchar, - mbfl_filt_conv_sjis_wchar_flush, - NULL -}; - -const struct mbfl_convert_vtbl vtbl_wchar_sjis = { - mbfl_no_encoding_wchar, - mbfl_no_encoding_sjis, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_wchar_sjis, - mbfl_filt_conv_common_flush, - NULL -}; - -static const char *mbfl_encoding_sjis_mac_aliases[] = {"MacJapanese", "x-Mac-Japanese", NULL}; - -const mbfl_encoding mbfl_encoding_sjis_mac = { - mbfl_no_encoding_sjis_mac, - "SJIS-mac", - "Shift_JIS", - mbfl_encoding_sjis_mac_aliases, - mblen_table_sjismac, - MBFL_ENCTYPE_GL_UNSAFE, - &vtbl_sjis_mac_wchar, - &vtbl_wchar_sjis_mac, - mb_sjismac_to_wchar, - mb_wchar_to_sjismac, - NULL -}; - -const struct mbfl_convert_vtbl vtbl_sjis_mac_wchar = { - mbfl_no_encoding_sjis_mac, - mbfl_no_encoding_wchar, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_sjis_mac_wchar, - mbfl_filt_conv_sjis_wchar_flush, - NULL, -}; - -const struct mbfl_convert_vtbl vtbl_wchar_sjis_mac = { - mbfl_no_encoding_wchar, - mbfl_no_encoding_sjis_mac, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_wchar_sjis_mac, - mbfl_filt_conv_wchar_sjis_mac_flush, - NULL, -}; - -static const char *mbfl_encoding_sjis_docomo_aliases[] = {"SJIS-DOCOMO", "shift_jis-imode", "x-sjis-emoji-docomo", NULL}; -static const char *mbfl_encoding_sjis_kddi_aliases[] = {"SJIS-KDDI", "shift_jis-kddi", "x-sjis-emoji-kddi", NULL}; -static const char *mbfl_encoding_sjis_sb_aliases[] = {"SJIS-SOFTBANK", "shift_jis-softbank", "x-sjis-emoji-softbank", NULL}; - -const mbfl_encoding mbfl_encoding_sjis_docomo = { - mbfl_no_encoding_sjis_docomo, - "SJIS-Mobile#DOCOMO", - "Shift_JIS", - mbfl_encoding_sjis_docomo_aliases, - mblen_table_sjis_mobile, - MBFL_ENCTYPE_GL_UNSAFE, - &vtbl_sjis_docomo_wchar, - &vtbl_wchar_sjis_docomo, - mb_sjis_docomo_to_wchar, - mb_wchar_to_sjis_docomo, - NULL -}; - -const mbfl_encoding mbfl_encoding_sjis_kddi = { - mbfl_no_encoding_sjis_kddi, - "SJIS-Mobile#KDDI", - "Shift_JIS", - mbfl_encoding_sjis_kddi_aliases, - mblen_table_sjis_mobile, - MBFL_ENCTYPE_GL_UNSAFE, - &vtbl_sjis_kddi_wchar, - &vtbl_wchar_sjis_kddi, - mb_sjis_kddi_to_wchar, - mb_wchar_to_sjis_kddi, - NULL -}; - -const mbfl_encoding mbfl_encoding_sjis_sb = { - mbfl_no_encoding_sjis_sb, - "SJIS-Mobile#SOFTBANK", - "Shift_JIS", - mbfl_encoding_sjis_sb_aliases, - mblen_table_sjis_mobile, - MBFL_ENCTYPE_GL_UNSAFE, - &vtbl_sjis_sb_wchar, - &vtbl_wchar_sjis_sb, - mb_sjis_sb_to_wchar, - mb_wchar_to_sjis_sb, - NULL -}; - -const struct mbfl_convert_vtbl vtbl_sjis_docomo_wchar = { - mbfl_no_encoding_sjis_docomo, - mbfl_no_encoding_wchar, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_sjis_mobile_wchar, - mbfl_filt_conv_sjis_wchar_flush, - NULL, -}; - -const struct mbfl_convert_vtbl vtbl_wchar_sjis_docomo = { - mbfl_no_encoding_wchar, - mbfl_no_encoding_sjis_docomo, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_wchar_sjis_mobile, - mbfl_filt_conv_sjis_mobile_flush, - NULL, -}; - -const struct mbfl_convert_vtbl vtbl_sjis_kddi_wchar = { - mbfl_no_encoding_sjis_kddi, - mbfl_no_encoding_wchar, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_sjis_mobile_wchar, - mbfl_filt_conv_sjis_wchar_flush, - NULL, -}; - -const struct mbfl_convert_vtbl vtbl_wchar_sjis_kddi = { - mbfl_no_encoding_wchar, - mbfl_no_encoding_sjis_kddi, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_wchar_sjis_mobile, - mbfl_filt_conv_sjis_mobile_flush, - NULL, -}; - -const struct mbfl_convert_vtbl vtbl_sjis_sb_wchar = { - mbfl_no_encoding_sjis_sb, - mbfl_no_encoding_wchar, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_sjis_mobile_wchar, - mbfl_filt_conv_sjis_wchar_flush, - NULL, -}; - -const struct mbfl_convert_vtbl vtbl_wchar_sjis_sb = { - mbfl_no_encoding_wchar, - mbfl_no_encoding_sjis_sb, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_wchar_sjis_mobile, - mbfl_filt_conv_sjis_mobile_flush, - NULL, -}; - -#define CK(statement) do { if ((statement) < 0) return (-1); } while (0) - -#define SJIS_ENCODE(c1,c2,s1,s2) \ - do { \ - s1 = c1; \ - s1--; \ - s1 >>= 1; \ - if ((c1) < 0x5f) { \ - s1 += 0x71; \ - } else { \ - s1 += 0xb1; \ - } \ - s2 = c2; \ - if ((c1) & 1) { \ - if ((c2) < 0x60) { \ - s2--; \ - } \ - s2 += 0x20; \ - } else { \ - s2 += 0x7e; \ - } \ - } while (0) - -#define SJIS_DECODE(c1,c2,s1,s2) \ - do { \ - s1 = c1; \ - if (s1 < 0xa0) { \ - s1 -= 0x81; \ - } else { \ - s1 -= 0xc1; \ - } \ - s1 <<= 1; \ - s1 += 0x21; \ - s2 = c2; \ - if (s2 < 0x9f) { \ - if (s2 < 0x7f) { \ - s2++; \ - } \ - s2 -= 0x20; \ - } else { \ - s1++; \ - s2 -= 0x7e; \ - } \ - } while (0) - -int mbfl_filt_conv_sjis_wchar(int c, mbfl_convert_filter *filter) -{ - int s1, s2, w; - - switch (filter->status) { - case 0: - if (c >= 0 && c < 0x80) { /* ASCII */ - CK((*filter->output_function)(c, filter->data)); - } else if (c > 0xA0 && c < 0xE0) { /* Kana */ - CK((*filter->output_function)(0xFEC0 + c, filter->data)); - } else if (c > 0x80 && c < 0xF0 && c != 0xA0) { /* Kanji, first byte */ - filter->status = 1; - filter->cache = c; - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - case 1: /* Kanji, second byte */ - filter->status = 0; - int c1 = filter->cache; - if (c >= 0x40 && c <= 0xFC && c != 0x7F) { - SJIS_DECODE(c1, c, s1, s2); - w = (s1 - 0x21)*94 + s2 - 0x21; - if (w >= 0 && w < jisx0208_ucs_table_size) { - w = jisx0208_ucs_table[w]; - if (!w) - w = MBFL_BAD_INPUT; - } else { - w = MBFL_BAD_INPUT; - } - CK((*filter->output_function)(w, filter->data)); - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - } - - return 0; -} - -static int mbfl_filt_conv_sjis_wchar_flush(mbfl_convert_filter *filter) -{ - if (filter->status && filter->status != 4) { - (*filter->output_function)(MBFL_BAD_INPUT, filter->data); - } - filter->status = 0; - - if (filter->flush_function) { - (*filter->flush_function)(filter->data); - } - - return 0; -} - -int mbfl_filt_conv_wchar_sjis(int c, mbfl_convert_filter *filter) -{ - int c1, c2, s1, s2; - - s1 = 0; - if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) { - s1 = ucs_a1_jis_table[c - ucs_a1_jis_table_min]; - } else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) { - s1 = ucs_a2_jis_table[c - ucs_a2_jis_table_min]; - } else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) { - s1 = ucs_i_jis_table[c - ucs_i_jis_table_min]; - } else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) { - s1 = ucs_r_jis_table[c - ucs_r_jis_table_min]; - } - if (s1 <= 0) { - if (c == 0xA5) { /* YEN SIGN */ - s1 = 0x216F; /* FULLWIDTH YEN SIGN */ - } else if (c == 0xAF || c == 0x203E) { /* U+00AF is MACRON, U+203E is OVERLINE */ - s1 = 0x2131; /* FULLWIDTH MACRON */ - } else if (c == 0xFF3C) { /* FULLWIDTH REVERSE SOLIDUS */ - s1 = 0x2140; - } else if (c == 0x2225) { /* PARALLEL TO */ - s1 = 0x2142; - } else if (c == 0xFF0D) { /* FULLWIDTH HYPHEN-MINUS */ - s1 = 0x215D; - } else if (c == 0xFFE0) { /* FULLWIDTH CENT SIGN */ - s1 = 0x2171; - } else if (c == 0xFFE1) { /* FULLWIDTH POUND SIGN */ - s1 = 0x2172; - } else if (c == 0xFFE2) { /* FULLWIDTH NOT SIGN */ - s1 = 0x224C; - } else if (c == 0) { - s1 = 0; - } else { - s1 = -1; - } - } else if (s1 >= 0x8080) { /* JIS X 0212; not supported */ - s1 = -1; - } - - if (s1 >= 0) { - if (s1 < 0x100) { /* Latin/Kana */ - CK((*filter->output_function)(s1, filter->data)); - } else { /* Kanji */ - c1 = (s1 >> 8) & 0xFF; - c2 = s1 & 0xFF; - SJIS_ENCODE(c1, c2, s1, s2); - CK((*filter->output_function)(s1, filter->data)); - CK((*filter->output_function)(s2, filter->data)); - } - } else { - CK(mbfl_filt_conv_illegal_output(c, filter)); - } - - return 0; -} - -static const unsigned short sjis_decode_tbl1[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xFFFF, 0, 188, 376, 564, 752, 940, 1128, 1316, 1504, 1692, 1880, 2068, 2256, 2444, 2632, 2820, 3008, 3196, 3384, 3572, 3760, 3948, 4136, 4324, 4512, 4700, 4888, 5076, 5264, 5452, 5640, 0xFFFF, -6016, -5828, -5640, -5452, -5264, -5076, -4888, -4700, -4512, -4324, -4136, -3948, -3760, -3572, -3384, -3196, -3008, -2820, -2632, -2444, -2256, -2068, -1880, -1692, -1504, -1316, -1128, -940, -752, -564, -376, -188, 0, 188, 376, 564, 752, 940, 1128, 1316, 1504, 1692, 1880, 2068, 2256, 2444, 2632, 2820, 3008, 3196, 3384, 3572, 3760, 3948, 4136, 4324, 4512, 4700, 4888, 5076, 5264, 5452, 5640, 5828, 6016, 6204, 6392, 6580, 6768, 6956, 7144, 7332, 7520, 7708, 7896, 8084, 8272, 8460, 8648, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF -}; - -static const unsigned short sjis_decode_tbl2[] = { - 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 0xFFFF, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 0xFFFF, 0xFFFF, 0xFFFF -}; - -static size_t mb_sjis_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) -{ - unsigned char *p = *in, *e = p + *in_len; - uint32_t *out = buf, *limit = buf + bufsize; - - e--; /* Stop the main loop 1 byte short of the end of the input */ - - while (p < e && out < limit) { - unsigned char c = *p++; - - if (c <= 0x7F) { - *out++ = c; - } else if (c >= 0xA1 && c <= 0xDF) { /* Kana */ - *out++ = 0xFEC0 + c; - } else { - /* Don't need to check p < e; it's not possible to go out of bounds here, due to e-- above */ - unsigned char c2 = *p++; - /* This is only legal if c2 >= 0x40 && c2 <= 0xFC && c2 != 0x7F - * But the values in the above conversion tables have been chosen such that - * illegal values of c2 will always result in w > jisx0208_ucs_table_size, - * so we don't need to do a separate bounds check on c2 - * Likewise, the values in the conversion tables are such that illegal values - * for c will always result in w > jisx0208_ucs_table_size */ - uint32_t w = sjis_decode_tbl1[c] + sjis_decode_tbl2[c2]; - if (w < jisx0208_ucs_table_size) { - w = jisx0208_ucs_table[w]; - if (!w) - w = MBFL_BAD_INPUT; - *out++ = w; - } else { - if (c == 0x80 || c == 0xA0 || c > 0xEF) { - p--; - } - *out++ = MBFL_BAD_INPUT; - } - } - } - - /* Finish up last byte of input string if there is one */ - if (p == e && out < limit) { - unsigned char c = *p++; - if (c <= 0x7F) { - *out++ = c; - } else if (c >= 0xA1 && c <= 0xDF) { - *out++ = 0xFEC0 + c; - } else { - *out++ = MBFL_BAD_INPUT; - } - } - - *in_len = e - p + 1; - *in = p; - return out - buf; -} - -static void mb_wchar_to_sjis(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) -{ - unsigned char *out, *limit; - MB_CONVERT_BUF_LOAD(buf, out, limit); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2); - - while (len--) { - uint32_t w = *in++; - unsigned int s = 0; - - if (w >= ucs_a1_jis_table_min && w < ucs_a1_jis_table_max) { - s = ucs_a1_jis_table[w - ucs_a1_jis_table_min]; - } else if (w >= ucs_a2_jis_table_min && w < ucs_a2_jis_table_max) { - s = ucs_a2_jis_table[w - ucs_a2_jis_table_min]; - } else if (w >= ucs_i_jis_table_min && w < ucs_i_jis_table_max) { - s = ucs_i_jis_table[w - ucs_i_jis_table_min]; - } else if (w >= ucs_r_jis_table_min && w < ucs_r_jis_table_max) { - s = ucs_r_jis_table[w - ucs_r_jis_table_min]; - } - - if (s == 0) { - if (w == 0xA5) { /* YEN SIGN */ - s = 0x216F; /* FULLWIDTH YEN SIGN */ - } else if (w == 0xAF || w == 0x203E) { - s = 0x2131; /* FULLWIDTH MACRON */ - } else if (w == 0xFF3C) { /* FULLWIDTH REVERSE SOLIDUS */ - s = 0x2140; - } else if (w == 0x2225) { /* PARALLEL TO */ - s = 0x2142; - } else if (w == 0xFF0D) { /* FULLWIDTH HYPHEN-MINUS */ - s = 0x215D; - } else if (w == 0xFFE0) { /* FULLWIDTH CENT SIGN */ - s = 0x2171; - } else if (w == 0xFFE1) { /* FULLWIDTH POUND SIGN */ - s = 0x2172; - } else if (w == 0xFFE2) { /* FULLWIDTH NOT SIGN */ - s = 0x224C; - } else if (w != 0) { - MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_sjis); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2); - continue; - } - } else if (s >= 0x8080) { /* JIS X 0212; not supported */ - MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_sjis); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2); - continue; - } - - if (s <= 0xFF) { - /* Latin/Kana */ - out = mb_convert_buf_add(out, s); - } else { - /* Kanji */ - unsigned int c1 = (s >> 8) & 0xFF, c2 = s & 0xFF, s2; - SJIS_ENCODE(c1, c2, s, s2); - out = mb_convert_buf_add2(out, s, s2); - } - } - - MB_CONVERT_BUF_STORE(buf, out, limit); -} - -static int mbfl_filt_conv_sjis_mac_wchar(int c, mbfl_convert_filter *filter) -{ - int i, j, n; - int c1, s, s1, s2, w; - - switch (filter->status) { - case 0: - if (c >= 0 && c < 0x80 && c != 0x5c) { /* latin */ - CK((*filter->output_function)(c, filter->data)); - } else if (c > 0xa0 && c < 0xe0) { /* kana */ - CK((*filter->output_function)(0xfec0 + c, filter->data)); - } else if (c > 0x80 && c <= 0xed && c != 0xa0) { /* kanji first char */ - filter->status = 1; - filter->cache = c; - } else if (c == 0x5c) { - CK((*filter->output_function)(0x00a5, filter->data)); - } else if (c == 0x80) { - CK((*filter->output_function)(0x005c, filter->data)); - } else if (c == 0xa0) { - CK((*filter->output_function)(0x00a0, filter->data)); - } else if (c == 0xfd) { - CK((*filter->output_function)(0x00a9, filter->data)); - } else if (c == 0xfe) { - CK((*filter->output_function)(0x2122, filter->data)); - } else if (c == 0xff) { - CK((*filter->output_function)(0x2026, filter->data)); - CK((*filter->output_function)(0xf87f, filter->data)); - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - case 1: /* kanji second char */ - filter->status = 0; - c1 = filter->cache; - if (c >= 0x40 && c <= 0xfc && c != 0x7f) { - w = 0; - SJIS_DECODE(c1, c, s1, s2); - s = (s1 - 0x21)*94 + s2 - 0x21; - if (s <= 0x89) { - if (s == 0x1c) { - w = 0x2014; /* EM DASH */ - } else if (s == 0x1f) { - w = 0xff3c; /* FULLWIDTH REVERSE SOLIDUS */ - } else if (s == 0x20) { - w = 0x301c; /* FULLWIDTH TILDE */ - } else if (s == 0x21) { - w = 0x2016; /* PARALLEL TO */ - } else if (s == 0x3c) { - w = 0x2212; /* FULLWIDTH HYPHEN-MINUS */ - } else if (s == 0x50) { - w = 0x00a2; /* FULLWIDTH CENT SIGN */ - } else if (s == 0x51) { - w = 0x00a3; /* FULLWIDTH POUND SIGN */ - } else if (s == 0x89) { - w = 0x00ac; /* FULLWIDTH NOT SIGN */ - } - } - - /* apple gaiji area 0x8540 - 0x886d */ - if (w == 0) { - for (i=0; i<7; i++) { - if (s >= code_tbl[i][0] && s <= code_tbl[i][1]) { - w = s - code_tbl[i][0] + code_tbl[i][2]; - break; - } - } - } - - if (w == 0) { - - for (i=0; ioutput_function)(code_tbl_m[i][j], filter->data)); - } - w = code_tbl_m[i][n-1]; - break; - } - } - } - - if (w == 0) { - for (i=0; i<8; i++) { - if (s >= code_ofst_tbl[i][0] && s <= code_ofst_tbl[i][1]) { - w = code_map[i][s - code_ofst_tbl[i][0]]; - if (w == 0) { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - return 0; - } - s2 = 0; - if (s >= 0x043e && s <= 0x0441) { - s2 = 0xf87a; - } else if (s == 0x03b1 || s == 0x03b7) { - s2 = 0xf87f; - } else if (s == 0x04b8 || s == 0x04b9 || s == 0x04c4) { - s2 = 0x20dd; - } else if (s == 0x1ed9 || s == 0x1eda || s == 0x1ee8 || s == 0x1ef3 || - (s >= 0x1ef5 && s <= 0x1efb) || s == 0x1f05 || s == 0x1f06 || - s == 0x1f18 || (s >= 0x1ff2 && s <= 0x20a5)) { - s2 = 0xf87e; - } - if (s2 > 0) { - CK((*filter->output_function)(w, filter->data)); - w = s2; - } - break; - } - } - } - - if (w == 0 && s >= 0 && s < jisx0208_ucs_table_size) { /* X 0208 */ - w = jisx0208_ucs_table[s]; - } - - if (w <= 0) { - w = MBFL_BAD_INPUT; - } - CK((*filter->output_function)(w, filter->data)); - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - EMPTY_SWITCH_DEFAULT_CASE(); - } - - return 0; -} - -static int mbfl_filt_conv_wchar_sjis_mac(int c, mbfl_convert_filter *filter) -{ - int i, c1, c2, s1 = 0, s2 = 0, mode; - - // a1: U+0000 -> U+046F - // a2: U+2000 -> U+30FF - // i: U+4E00 -> U+9FFF - // r: U+FF00 -> U+FFFF - - switch (filter->status) { - case 1: - c1 = filter->cache; - filter->cache = filter->status = 0; - - if (c == 0xf87a) { - for (i = 0; i < 4; i++) { - if (c1 == s_form_tbl[i+34+3+3]) { - s1 = s_form_sjis_tbl[i+34+3+3]; - break; - } - } - if (s1 <= 0) { - s2 = c1; - } - } else if (c == 0x20dd) { - for (i = 0; i < 3; i++) { - if (c1 == s_form_tbl[i+34+3]) { - s1 = s_form_sjis_tbl[i+34+3]; - break; - } - } - if (s1 <= 0) { - s2 = c1; - } - } else if (c == 0xf87f) { - for (i = 0; i < 3; i++) { - if (c1 == s_form_tbl[i+34]) { - s1 = s_form_sjis_tbl[i+34]; - break; - } - } - if (s1 <= 0) { - s2 = c1; - s1 = -1; - } - } else if (c == 0xf87e) { - for (i = 0; i < 34; i++) { - if (c1 == s_form_tbl[i]) { - s1 = s_form_sjis_tbl[i]; - break; - } - } - if (s1 <= 0) { - s2 = c1; - s1 = -1; - } - } else { - s2 = c1; - s1 = c; - } - - if (s2 > 0) { - for (i = 0; i < s_form_tbl_len; i++) { - if (c1 == s_form_tbl[i]) { - s1 = s_form_sjis_fallback_tbl[i]; - break; - } - } - } - - if (s1 >= 0) { - if (s1 < 0x100) { - CK((*filter->output_function)(s1, filter->data)); - } else { - CK((*filter->output_function)((s1 >> 8) & 0xff, filter->data)); - CK((*filter->output_function)(s1 & 0xff, filter->data)); - } - } else { - CK(mbfl_filt_conv_illegal_output(c, filter)); - } - - if (s2 <= 0 || s1 == -1) { - break; - } - s1 = s2 = 0; - ZEND_FALLTHROUGH; - - case 0: - if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) { - s1 = ucs_a1_jis_table[c - ucs_a1_jis_table_min]; - if (c == 0x5c) { - s1 = 0x80; - } else if (c == 0xa9) { - s1 = 0xfd; - } - } else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) { - s1 = ucs_a2_jis_table[c - ucs_a2_jis_table_min]; - if (c == 0x2122) { - s1 = 0xfe; - } else if (c == 0x2014) { - s1 = 0x213d; - } else if (c == 0x2116) { - s1 = 0x2c1d; - } - } else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) { - s1 = ucs_i_jis_table[c - ucs_i_jis_table_min]; - } else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) { - s1 = ucs_r_jis_table[c - ucs_r_jis_table_min]; - } - - if (c >= 0x2000) { - for (i = 0; i < s_form_tbl_len; i++) { - if (c == s_form_tbl[i]) { - filter->status = 1; - filter->cache = c; - return 0; - } - } - - if (c == 0xf860 || c == 0xf861 || c == 0xf862) { - /* Apple 'transcoding hint' codepoints (from private use area) */ - filter->status = 2; - filter->cache = c; - return 0; - } - } - - if (s1 <= 0) { - if (c == 0xa0) { - s1 = 0x00a0; - } else if (c == 0xa5) { /* YEN SIGN */ - /* Unicode has codepoint 0xFFE5 for a fullwidth Yen sign; - * convert codepoint 0xA5 to halfwidth Yen sign */ - s1 = 0x5c; /* HALFWIDTH YEN SIGN */ - } else if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */ - s1 = 0x2140; - } - } - - if (s1 <= 0) { - for (i=0; i= wchar2sjis_mac_r_tbl[i][0] && c <= wchar2sjis_mac_r_tbl[i][1]) { - s1 = c - wchar2sjis_mac_r_tbl[i][0] + wchar2sjis_mac_r_tbl[i][2]; - break; - } - } - - if (s1 <= 0) { - for (i=0; i= wchar2sjis_mac_r_map[i][0] && c <= wchar2sjis_mac_r_map[i][1]) { - s1 = wchar2sjis_mac_code_map[i][c-wchar2sjis_mac_r_map[i][0]]; - break; - } - } - } - - if (s1 <= 0) { - for (i=0; i 0) { - c1 = s1/94+0x21; - c2 = s1-94*(c1-0x21)+0x21; - s1 = (c1 << 8) | c2; - s2 = 1; - } - } - - if ((s1 <= 0) || (s1 >= 0x8080 && s2 == 0)) { /* not found or X 0212 */ - s1 = -1; - c1 = 0; - - if (c == 0) { - s1 = 0; - } else if (s1 <= 0) { - s1 = -1; - } - } - - if (s1 >= 0) { - if (s1 < 0x100) { /* latin or kana */ - CK((*filter->output_function)(s1, filter->data)); - } else { /* kanji */ - c1 = (s1 >> 8) & 0xff; - c2 = s1 & 0xff; - SJIS_ENCODE(c1, c2, s1, s2); - CK((*filter->output_function)(s1, filter->data)); - CK((*filter->output_function)(s2, filter->data)); - } - } else { - CK(mbfl_filt_conv_illegal_output(c, filter)); - } - break; - - case 2: - c1 = filter->cache; - filter->cache = 0; - filter->status = 0; - if (c1 == 0xf860) { - for (i = 0; i < 5; i++) { - if (c == code_tbl_m[i][2]) { - filter->cache = c | 0x10000; - filter->status = 3; - break; - } - } - } else if (c1 == 0xf861) { - for (i = 0; i < 3; i++) { - if (c == code_tbl_m[i+5][2]) { - filter->cache = c | 0x20000; - filter->status = 3; - break; - } - } - } else if (c1 == 0xf862) { - for (i = 0; i < 4; i++) { - if (c == code_tbl_m[i+5+3][2]) { - filter->cache = c | 0x40000; - filter->status = 3; - break; - } - } - } - - if (filter->status == 0) { - /* Didn't find any of expected codepoints after Apple transcoding hint */ - CK(mbfl_filt_conv_illegal_output(c1, filter)); - return mbfl_filt_conv_wchar_sjis_mac(c, filter); - } - break; - - case 3: - s1 = 0; - c1 = filter->cache & 0xffff; - mode = (filter->cache & 0xf0000) >> 16; - - filter->cache = filter->status = 0; - - if (mode == 0x1) { - for (i = 0; i < 5; i++) { - if (c1 == code_tbl_m[i][2] && c == code_tbl_m[i][3]) { - s1 = code_tbl_m[i][0]; - break; - } - } - - if (s1 > 0) { - c1 = s1/94+0x21; - c2 = s1-94*(c1-0x21)+0x21; - SJIS_ENCODE(c1, c2, s1, s2); - CK((*filter->output_function)(s1, filter->data)); - CK((*filter->output_function)(s2, filter->data)); - } else { - CK(mbfl_filt_conv_illegal_output(0xf860, filter)); - CK(mbfl_filt_conv_illegal_output(c1, filter)); - CK(mbfl_filt_conv_illegal_output(c, filter)); - } - } else if (mode == 0x2) { - for (i = 0; i < 3; i++) { - if (c1 == code_tbl_m[i+5][2] && c == code_tbl_m[i+5][3]) { - filter->cache = c | 0x20000; - filter->status = 4; - break; - } - } - } else if (mode == 0x4) { - for (i = 0; i < 4; i++) { - if (c1 == code_tbl_m[i+8][2] && c == code_tbl_m[i+8][3]) { - filter->cache = c | 0x40000; - filter->status = 4; - break; - } - } - } - break; - - case 4: - s1 = 0; - c1 = filter->cache & 0xffff; - mode = (filter->cache & 0xf0000) >> 16; - - filter->cache = 0; - filter->status = 0; - - if (mode == 0x2) { - for (i = 0; i < 3; i++) { - if (c1 == code_tbl_m[i+5][3] && c == code_tbl_m[i+5][4]) { - s1 = code_tbl_m[i+5][0]; - break; - } - } - - if (s1 > 0) { - c1 = s1/94+0x21; - c2 = s1-94*(c1-0x21)+0x21; - SJIS_ENCODE(c1, c2, s1, s2); - CK((*filter->output_function)(s1, filter->data)); - CK((*filter->output_function)(s2, filter->data)); - } else { - CK(mbfl_filt_conv_illegal_output(0xf861, filter)); - for (i = 0; i < 3; i++) { - if (c1 == code_tbl_m[i+5][3]) { - CK(mbfl_filt_conv_illegal_output(code_tbl_m[i+5][2], filter)); - break; - } - } - CK(mbfl_filt_conv_illegal_output(c1, filter)); - CK(mbfl_filt_conv_illegal_output(c, filter)); - } - } else if (mode == 0x4) { - for (i = 0; i < 4; i++) { - if (c1 == code_tbl_m[i+8][3] && c == code_tbl_m[i+8][4]) { - filter->cache = c | 0x40000; - filter->status = 5; - break; - } - } - } - break; - - case 5: - s1 = 0; - c1 = filter->cache & 0xffff; - mode = (filter->cache & 0xf0000) >> 16; - - filter->cache = filter->status = 0; - - if (mode == 0x4) { - for (i = 0; i < 4; i++) { - if (c1 == code_tbl_m[i+8][4] && c == code_tbl_m[i+8][5]) { - s1 = code_tbl_m[i+8][0]; - break; - } - } - - if (s1 > 0) { - c1 = s1/94+0x21; - c2 = s1-94*(c1-0x21)+0x21; - SJIS_ENCODE(c1, c2, s1, s2); - CK((*filter->output_function)(s1, filter->data)); - CK((*filter->output_function)(s2, filter->data)); - } else { - CK(mbfl_filt_conv_illegal_output(0xf862, filter)); - for (i = 0; i < 4; i++) { - if (c1 == code_tbl_m[i+8][4]) { - CK(mbfl_filt_conv_illegal_output( code_tbl_m[i+8][2], filter)); - CK(mbfl_filt_conv_illegal_output( code_tbl_m[i+8][3], filter)); - break; - } - } - CK(mbfl_filt_conv_illegal_output(c1, filter)); - CK(mbfl_filt_conv_illegal_output(c, filter)); - } - } - break; - - EMPTY_SWITCH_DEFAULT_CASE(); - } - - return 0; -} - -static int mbfl_filt_conv_wchar_sjis_mac_flush(mbfl_convert_filter *filter) -{ - int i, c1, s1 = 0; - if (filter->status == 1 && filter->cache > 0) { - c1 = filter->cache; - for (i=0;i 0) { - CK((*filter->output_function)((s1 >> 8) & 0xff, filter->data)); - CK((*filter->output_function)(s1 & 0xff, filter->data)); - } - } - filter->cache = 0; - filter->status = 0; - - if (filter->flush_function != NULL) { - return (*filter->flush_function)(filter->data); - } - - return 0; -} - -static size_t mb_sjismac_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) -{ - /* A single SJIS-Mac kuten code can convert to up to 5 Unicode codepoints, oh my! */ - ZEND_ASSERT(bufsize >= 5); - - unsigned char *p = *in, *e = p + *in_len; - uint32_t *out = buf, *limit = buf + bufsize; - - while (p < e && out < limit) { - unsigned char c = *p++; - - if (c <= 0x80 || c == 0xA0) { - if (c == 0x5C) { - *out++ = 0xA5; - } else if (c == 0x80) { - *out++ = 0x5C; - } else { - *out++ = c; - } - } else if (c >= 0xA1 && c <= 0xDF) { - *out++ = 0xFEC0 + c; - } else if (c <= 0xED) { - if (p == e) { - *out++ = MBFL_BAD_INPUT; - break; - } - unsigned char c2 = *p++; - uint32_t w = sjis_decode_tbl1[c] + sjis_decode_tbl2[c2]; - - if (w <= 0x89) { - if (w == 0x1C) { - *out++ = 0x2014; /* EM DASH */ - continue; - } else if (w == 0x1F) { - *out++ = 0xFF3C; /* FULLWIDTH REVERSE SOLIDUS */ - continue; - } else if (w == 0x20) { - *out++ = 0x301C; /* FULLWIDTH TILDE */ - continue; - } else if (w == 0x21) { - *out++ = 0x2016; /* PARALLEL TO */ - continue; - } else if (w == 0x3C) { - *out++ = 0x2212; /* FULLWIDTH HYPHEN-MINUS */ - continue; - } else if (w == 0x50) { - *out++ = 0xA2; /* FULLWIDTH CENT SIGN */ - continue; - } else if (w == 0x51) { - *out++ = 0xA3; /* FULLWIDTH POUND SIGN */ - continue; - } else if (w == 0x89) { - *out++ = 0xAC; /* FULLWIDTH NOT SIGN */ - continue; - } - } else { - if (w >= 0x2F0 && w <= 0x3A3) { - for (int i = 0; i < 7; i++) { - if (w >= code_tbl[i][0] && w <= code_tbl[i][1]) { - *out++ = w - code_tbl[i][0] + code_tbl[i][2]; - goto next_iteration; - } - } - } - - if (w >= 0x340 && w <= 0x523) { - for (int i = 0; i < code_tbl_m_len; i++) { - if (w == code_tbl_m[i][0]) { - int n = 5; - if (code_tbl_m[i][1] == 0xF860) { - n = 3; - } else if (code_tbl_m[i][1] == 0xF861) { - n = 4; - } - if ((limit - out) < n) { - p -= 2; - goto finished; - } - for (int j = 1; j <= n; j++) { - *out++ = code_tbl_m[i][j]; - } - goto next_iteration; - } - } - } - - if (w >= 0x3AC && w <= 0x20A5) { - for (int i = 0; i < 8; i++) { - if (w >= code_ofst_tbl[i][0] && w <= code_ofst_tbl[i][1]) { - uint32_t w2 = code_map[i][w - code_ofst_tbl[i][0]]; - if (!w2) { - *out++ = MBFL_BAD_INPUT; - goto next_iteration; - } - if ((limit - out) < 2) { - p -= 2; - goto finished; - } - *out++ = w2; - if (w >= 0x43E && w <= 0x441) { - *out++ = 0xF87A; - } else if (w == 0x3B1 || w == 0x3B7) { - *out++ = 0xF87F; - } else if (w == 0x4B8 || w == 0x4B9 || w == 0x4C4) { - *out++ = 0x20DD; - } else if (w == 0x1ED9 || w == 0x1EDA || w == 0x1EE8 || w == 0x1EF3 || (w >= 0x1EF5 && w <= 0x1EFB) || w == 0x1F05 || w == 0x1F06 || w == 0x1F18 || (w >= 0x1FF2 && w <= 0x20A5)) { - *out++ = 0xF87E; - } - goto next_iteration; - } - } - } - } - - if (w < jisx0208_ucs_table_size) { - w = jisx0208_ucs_table[w]; - if (!w) - w = MBFL_BAD_INPUT; - *out++ = w; - } else { - *out++ = MBFL_BAD_INPUT; - } - } else if (c == 0xFD) { - *out++ = 0xA9; - } else if (c == 0xFE) { - *out++ = 0x2122; - } else if (c == 0xFF) { - if ((limit - out) < 2) { - p--; - break; - } - *out++ = 0x2026; - *out++ = 0xF87F; - } else { - *out++ = MBFL_BAD_INPUT; - } -next_iteration: ; - } - -finished: - *in_len = e - p; - *in = p; - return out - buf; -} - -static bool process_s_form(uint32_t w, uint32_t w2, unsigned int *s) -{ - if (w2 == 0xF87A) { - for (int i = 0; i < 4; i++) { - if (w == s_form_tbl[i+34+3+3]) { - *s = s_form_sjis_tbl[i+34+3+3]; - return true; - } - } - } else if (w2 == 0x20DD) { - for (int i = 0; i < 3; i++) { - if (w == s_form_tbl[i+34+3]) { - *s = s_form_sjis_tbl[i+34+3]; - return true; - } - } - } else if (w2 == 0xF87F) { - for (int i = 0; i < 3; i++) { - if (w == s_form_tbl[i+34]) { - *s = s_form_sjis_tbl[i+34]; - return true; - } - } - } else if (w2 == 0xF87E) { - for (int i = 0; i < 34; i++) { - if (w == s_form_tbl[i]) { - *s = s_form_sjis_tbl[i]; - return true; - } - } - } - - return false; -} - -/* For codepoints F860-F862, which are treated specially in MacJapanese */ -static int transcoding_hint_cp_width[3] = { 3, 4, 5 }; - -static void mb_wchar_to_sjismac(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) -{ - unsigned char *out, *limit; - MB_CONVERT_BUF_LOAD(buf, out, limit); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len); - - uint32_t w; - - if (buf->state) { - w = buf->state & 0xFFFF; - if (buf->state & 0xFF000000L) { - goto resume_transcoding_hint; - } else { - buf->state = 0; - goto process_codepoint; - } - } - - while (len--) { - w = *in++; -process_codepoint: ; - unsigned int s = 0; - - if (w >= ucs_a1_jis_table_min && w < ucs_a1_jis_table_max) { - if (w == 0x5C) { - s = 0x80; - } else if (w == 0xA9) { - s = 0xFD; - } else { - s = ucs_a1_jis_table[w - ucs_a1_jis_table_min]; - } - } else if (w >= ucs_a2_jis_table_min && w < ucs_a2_jis_table_max) { - if (w == 0x2122) { - s = 0xFE; - } else if (w == 0x2014) { - s = 0x213D; - } else if (w == 0x2116) { - s = 0x2C1D; - } else { - s = ucs_a2_jis_table[w - ucs_a2_jis_table_min]; - } - } else if (w >= ucs_i_jis_table_min && w < ucs_i_jis_table_max) { - s = ucs_i_jis_table[w - ucs_i_jis_table_min]; - } else if (w >= ucs_r_jis_table_min && w < ucs_r_jis_table_max) { - s = ucs_r_jis_table[w - ucs_r_jis_table_min]; - } - - if (w >= 0x2000) { - for (int i = 0; i < s_form_tbl_len; i++) { - if (w == s_form_tbl[i]) { - if (!len) { - if (end) { - s = s_form_sjis_fallback_tbl[i]; - if (s) { - MB_CONVERT_BUF_ENSURE(buf, out, limit, 2); - out = mb_convert_buf_add2(out, (s >> 8) & 0xFF, s & 0xFF); - } else { - MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_sjismac); - } - } else { - buf->state = w; - } - MB_CONVERT_BUF_STORE(buf, out, limit); - return; - } - uint32_t w2 = *in++; - len--; - - if (!process_s_form(w, w2, &s)) { - in--; len++; - - for (int i = 0; i < s_form_tbl_len; i++) { - if (w == s_form_tbl[i]) { - s = s_form_sjis_fallback_tbl[i]; - break; - } - } - } - - if (s <= 0xFF) { - out = mb_convert_buf_add(out, s); - } else { - MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2); - out = mb_convert_buf_add2(out, (s >> 8) & 0xFF, s & 0xFF); - } - - goto next_iteration; - } - } - - if (w == 0xF860 || w == 0xF861 || w == 0xF862) { - /* Apple 'transcoding hint' codepoints (from private use area) */ - if (!len) { - if (end) { - MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_sjismac); - } else { - buf->state = w; - } - MB_CONVERT_BUF_STORE(buf, out, limit); - return; - } - - uint32_t w2 = *in++; - len--; - - for (int i = 0; i < code_tbl_m_len; i++) { - if (w == code_tbl_m[i][1] && w2 == code_tbl_m[i][2]) { - /* This might be a valid transcoding hint sequence */ - int index = 3; - - if (buf->state) { -resume_transcoding_hint: - i = buf->state >> 24; - index = (buf->state >> 16) & 0xFF; - buf->state = 0; - } - - int expected = transcoding_hint_cp_width[w - 0xF860]; - - while (index <= expected) { - if (!len) { - if (end) { - for (int j = 1; j < index; j++) { - MB_CONVERT_ERROR(buf, out, limit, code_tbl_m[i][j], mb_wchar_to_sjismac); - } - } else { - buf->state = (i << 24) | (index << 16) | (w & 0xFFFF); - } - MB_CONVERT_BUF_STORE(buf, out, limit); - return; - } - - w2 = *in++; - len--; - - if (w2 != code_tbl_m[i][index]) { - /* Didn't match */ - for (int j = 1; j < index; j++) { - MB_CONVERT_ERROR(buf, out, limit, code_tbl_m[i][j], mb_wchar_to_sjismac); - } - MB_CONVERT_ERROR(buf, out, limit, w2, mb_wchar_to_sjismac); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len); - goto next_iteration; - } - - index++; - } - - /* Successful match, emit SJIS-mac bytes */ - s = code_tbl_m[i][0]; - unsigned int c1 = (s / 94) + 0x21, c2 = (s % 94) + 0x21, s1, s2; - SJIS_ENCODE(c1, c2, s1, s2); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2); - out = mb_convert_buf_add2(out, s1, s2); - goto next_iteration; - } - } - - /* No valid transcoding hint sequence found */ - in--; len++; - MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_sjismac); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len); - continue; - } - } - - if (!s) { - if (w == 0xA0) { - s = 0xA0; - } else if (w == 0xA5) { /* YEN SIGN */ - /* Unicode has codepoint 0xFFE5 for a fullwidth Yen sign; - * convert codepoint 0xA5 to halfwidth Yen sign */ - s = 0x5C; /* HALFWIDTH YEN SIGN */ - } else if (w == 0xFF3C) { /* FULLWIDTH REVERSE SOLIDUS */ - s = 0x2140; - } else { - for (int i = 0; i < wchar2sjis_mac_r_tbl_len; i++) { - if (w >= wchar2sjis_mac_r_tbl[i][0] && w <= wchar2sjis_mac_r_tbl[i][1]) { - s = w - wchar2sjis_mac_r_tbl[i][0] + wchar2sjis_mac_r_tbl[i][2]; - s = (((s / 94) + 0x21) << 8) | ((s % 94) + 0x21); - goto found_kuten_code; - } - } - - for (int i = 0; i < wchar2sjis_mac_r_map_len; i++) { - if (w >= wchar2sjis_mac_r_map[i][0] && w <= wchar2sjis_mac_r_map[i][1]) { - s = wchar2sjis_mac_code_map[i][w - wchar2sjis_mac_r_map[i][0]]; - if (s) { - s = (((s / 94) + 0x21) << 8) | ((s % 94) + 0x21); - goto found_kuten_code; - } - } - } - - for (int i = 0; i < wchar2sjis_mac_wchar_tbl_len; i++) { - if (w == wchar2sjis_mac_wchar_tbl[i][0]) { - s = wchar2sjis_mac_wchar_tbl[i][1]; - s = (((s / 94) + 0x21) << 8) | ((s % 94) + 0x21); - goto found_kuten_code; - } - } - } - } - -found_kuten_code: - if ((!s && w) || s >= 0x8080) { - MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_sjismac); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len); - } else if (s <= 0xFF) { - out = mb_convert_buf_add(out, s); - } else { - unsigned int c1 = (s >> 8) & 0xFF, c2 = s & 0xFF, s1, s2; - SJIS_ENCODE(c1, c2, s1, s2); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2); - out = mb_convert_buf_add2(out, s1, s2); - } - -next_iteration: ; - } - - MB_CONVERT_BUF_STORE(buf, out, limit); -} - -static const char nflags_s[10][2] = {"CN","DE","ES","FR","GB","IT","JP","KR","RU","US"}; -static const int nflags_code_kddi[10] = {0x2549, 0x2546, 0x24c0, 0x2545, 0x2548, 0x2547, 0x2750, 0x254a, 0x24c1, 0x27f7}; -static const int nflags_code_sb[10] = {0x2b0a, 0x2b05, 0x2b08, 0x2b04, 0x2b07, 0x2b06, 0x2b02, 0x2b0b, 0x2b09, 0x2b03}; - -const unsigned short mbfl_docomo2uni_pua[4][3] = { - {0x28c2, 0x292f, 0xe63e}, - {0x2930, 0x2934, 0xe6ac}, - {0x2935, 0x2951, 0xe6b1}, - {0x2952, 0x29db, 0xe6ce}, -}; - -const unsigned short mbfl_kddi2uni_pua[7][3] = { - {0x26ec, 0x2838, 0xe468}, - {0x284c, 0x2863, 0xe5b5}, - {0x24b8, 0x24ca, 0xe5cd}, - {0x24cb, 0x2545, 0xea80}, - {0x2839, 0x284b, 0xeafb}, - {0x2546, 0x25c0, 0xeb0e}, - {0x25c1, 0x25c6, 0xeb89}, -}; - -const unsigned short mbfl_sb2uni_pua[6][3] = { - {0x27a9, 0x2802, 0xe101}, - {0x2808, 0x2861, 0xe201}, - {0x2921, 0x297a, 0xe001}, - {0x2980, 0x29cc, 0xe301}, - {0x2a99, 0x2ae4, 0xe401}, - {0x2af8, 0x2b35, 0xe501}, -}; - -const unsigned short mbfl_kddi2uni_pua_b[8][3] = { - {0x24b8, 0x24f6, 0xec40}, - {0x24f7, 0x2573, 0xec80}, - {0x2574, 0x25b2, 0xed40}, - {0x25b3, 0x25c6, 0xed80}, - {0x26ec, 0x272a, 0xef40}, - {0x272b, 0x27a7, 0xef80}, - {0x27a8, 0x27e6, 0xf040}, - {0x27e7, 0x2863, 0xf080}, -}; - -/* Regional Indicator Unicode codepoints are from 0x1F1E6-0x1F1FF - * These correspond to the letters A-Z - * To display the flag emoji for a country, two unicode codepoints are combined, - * which correspond to the two-letter code for that country - * This macro converts uppercase ASCII values to Regional Indicator codepoints */ -#define NFLAGS(c) (0x1F1A5+(int)(c)) - -int mbfilter_conv_map_tbl(int c, int *w, const unsigned short map[][3], int n) -{ - for (int i = 0; i < n; i++) { - if (map[i][0] <= c && c <= map[i][1]) { - *w = c - map[i][0] + map[i][2]; - return 1; - } - } - return 0; -} - -int mbfilter_conv_r_map_tbl(int c, int *w, const unsigned short map[][3], int n) -{ - /* Convert in reverse direction */ - for (int i = 0; i < n; i++) { - if (map[i][2] <= c && c <= map[i][2] - map[i][0] + map[i][1]) { - *w = c + map[i][0] - map[i][2]; - return 1; - } - } - return 0; -} - -/* number -> (ku*94)+ten value for telephone keypad character */ -#define DOCOMO_KEYPAD(n) ((n) == 0 ? 0x296F : (0x2965 + (n))) -#define DOCOMO_KEYPAD_HASH 0x2964 - -#define EMIT_KEYPAD_EMOJI(c) do { *snd = (c); return 0x20E3; } while(0) - -/* Unicode codepoints for emoji are above 0x1F000, but we only store 16-bits - * in our tables. Therefore, add 0x10000 to recover the true values. - * - * Again, for some emoji which are not supported by Unicode, we use codepoints - * in the Private Use Area above 0xFE000. Again, add 0xF0000 to recover the - * true value. */ -static inline int convert_emoji_cp(int cp) -{ - if (cp > 0xF000) - return cp + 0x10000; - else if (cp > 0xE000) - return cp + 0xF0000; - return cp; -} - -int mbfilter_sjis_emoji_docomo2unicode(int s, int *snd) -{ - /* All three mobile vendors had emoji for numbers on a telephone keypad - * Unicode doesn't have those, but it has a combining character which puts - * a 'keypad button' around the following character, making it look like - * a key on a telephone or keyboard. That combining char is codepoint 0x20E3. */ - if (s >= mb_tbl_code2uni_docomo1_min && s <= mb_tbl_code2uni_docomo1_max) { - if ((s >= DOCOMO_KEYPAD(1) && s <= DOCOMO_KEYPAD(9)) || s == DOCOMO_KEYPAD(0) || s == DOCOMO_KEYPAD_HASH) { - EMIT_KEYPAD_EMOJI(convert_emoji_cp(mb_tbl_code2uni_docomo1[s - mb_tbl_code2uni_docomo1_min])); - } else { - *snd = 0; - return convert_emoji_cp(mb_tbl_code2uni_docomo1[s - mb_tbl_code2uni_docomo1_min]); - } - } - return 0; -} - -#define EMIT_FLAG_EMOJI(country) do { *snd = NFLAGS((country)[0]); return NFLAGS((country)[1]); } while(0) - -static const char nflags_kddi[6][2] = {"FR", "DE", "IT", "GB", "CN", "KR"}; - -int mbfilter_sjis_emoji_kddi2unicode(int s, int *snd) -{ - if (s >= mb_tbl_code2uni_kddi1_min && s <= mb_tbl_code2uni_kddi1_max) { - if (s == 0x24C0) { /* Spain */ - EMIT_FLAG_EMOJI("ES"); - } else if (s == 0x24C1) { /* Russia */ - EMIT_FLAG_EMOJI("RU"); - } else if (s >= 0x2545 && s <= 0x254A) { - EMIT_FLAG_EMOJI(nflags_kddi[s - 0x2545]); - } else if (s == 0x25BC) { - EMIT_KEYPAD_EMOJI('#'); - } else { - *snd = 0; - return convert_emoji_cp(mb_tbl_code2uni_kddi1[s - mb_tbl_code2uni_kddi1_min]); - } - } else if (s >= mb_tbl_code2uni_kddi2_min && s <= mb_tbl_code2uni_kddi2_max) { - if (s == 0x2750) { /* Japan */ - EMIT_FLAG_EMOJI("JP"); - } else if (s >= 0x27A6 && s <= 0x27AE) { - EMIT_KEYPAD_EMOJI(s - 0x27A6 + '1'); - } else if (s == 0x27F7) { /* United States */ - EMIT_FLAG_EMOJI("US"); - } else if (s == 0x2830) { - EMIT_KEYPAD_EMOJI('0'); - } else { - *snd = 0; - return convert_emoji_cp(mb_tbl_code2uni_kddi2[s - mb_tbl_code2uni_kddi2_min]); - } - } - return 0; -} - -static const char nflags_sb[10][2] = {"JP", "US", "FR", "DE", "IT", "GB", "ES", "RU", "CN", "KR"}; - -int mbfilter_sjis_emoji_sb2unicode(int s, int *snd) -{ - if (s >= mb_tbl_code2uni_sb1_min && s <= mb_tbl_code2uni_sb1_max) { - if (s == 0x2817 || (s >= 0x2823 && s <= 0x282C)) { - EMIT_KEYPAD_EMOJI(mb_tbl_code2uni_sb1[s - mb_tbl_code2uni_sb1_min]); - } else { - *snd = 0; - return convert_emoji_cp(mb_tbl_code2uni_sb1[s - mb_tbl_code2uni_sb1_min]); - } - } else if (s >= mb_tbl_code2uni_sb2_min && s <= mb_tbl_code2uni_sb2_max) { - *snd = 0; - return convert_emoji_cp(mb_tbl_code2uni_sb2[s - mb_tbl_code2uni_sb2_min]); - } else if (s >= mb_tbl_code2uni_sb3_min && s <= mb_tbl_code2uni_sb3_max) { - if (s >= 0x2B02 && s <= 0x2B0B) { - EMIT_FLAG_EMOJI(nflags_sb[s - 0x2B02]); - } else { - *snd = 0; - return convert_emoji_cp(mb_tbl_code2uni_sb3[s - mb_tbl_code2uni_sb3_min]); - } - } - return 0; -} - -int mbfilter_unicode2sjis_emoji_docomo(int c, int *s1, mbfl_convert_filter *filter) -{ - /* When converting SJIS-Mobile to Unicode, we convert keypad symbol emoji - * to a sequence of 2 codepoints, one of which is a combining character which - * adds the 'key' image around the other - * - * In the other direction, look for such sequences and convert them to a - * single emoji */ - if (filter->status == 1) { - int c1 = filter->cache; - filter->cache = filter->status = 0; - if (c == 0x20E3) { - if (c1 == '#') { - *s1 = 0x2964; - } else if (c1 == '0') { - *s1 = 0x296F; - } else { /* Previous character was '1'-'9' */ - *s1 = 0x2966 + (c1 - '1'); - } - return 1; - } else { - /* This character wasn't combining character to make keypad symbol, - * so pass the previous character through... and proceed to process the - * current character as usual - * (Single-byte ASCII characters are valid in Shift-JIS...) */ - CK((*filter->output_function)(c1, filter->data)); - } - } - - if (c == '#' || (c >= '0' && c <= '9')) { - filter->status = 1; - filter->cache = c; - return 0; - } - - if (c == 0xA9) { /* Copyright sign */ - *s1 = 0x29B5; - return 1; - } else if (c == 0x00AE) { /* Registered sign */ - *s1 = 0x29BA; - return 1; - } else if (c >= mb_tbl_uni_docomo2code2_min && c <= mb_tbl_uni_docomo2code2_max) { - int i = mbfl_bisec_srch2(c, mb_tbl_uni_docomo2code2_key, mb_tbl_uni_docomo2code2_len); - if (i >= 0) { - *s1 = mb_tbl_uni_docomo2code2_value[i]; - return 1; - } - } else if (c >= mb_tbl_uni_docomo2code3_min && c <= mb_tbl_uni_docomo2code3_max) { - int i = mbfl_bisec_srch2(c - 0x10000, mb_tbl_uni_docomo2code3_key, mb_tbl_uni_docomo2code3_len); - if (i >= 0) { - *s1 = mb_tbl_uni_docomo2code3_value[i]; - return 1; - } - } else if (c >= mb_tbl_uni_docomo2code5_min && c <= mb_tbl_uni_docomo2code5_max) { - int i = mbfl_bisec_srch2(c - 0xF0000, mb_tbl_uni_docomo2code5_key, mb_tbl_uni_docomo2code5_len); - if (i >= 0) { - *s1 = mb_tbl_uni_docomo2code5_val[i]; - return 1; - } - } - return 0; -} - -int mbfilter_unicode2sjis_emoji_kddi(int c, int *s1, mbfl_convert_filter *filter) -{ - if (filter->status == 1) { - int c1 = filter->cache; - filter->cache = filter->status = 0; - if (c == 0x20E3) { - if (c1 == '#') { - *s1 = 0x25BC; - } else if (c1 == '0') { - *s1 = 0x2830; - } else { /* Previous character was '1'-'9' */ - *s1 = 0x27a6 + (c1 - '1'); - } - return 1; - } else { - CK((*filter->output_function)(c1, filter->data)); - } - } else if (filter->status == 2) { - int c1 = filter->cache; - filter->cache = filter->status = 0; - if (c >= NFLAGS('B') && c <= NFLAGS('U')) { /* B for GB, U for RU */ - for (int i = 0; i < 10; i++) { - if (c1 == NFLAGS(nflags_s[i][0]) && c == NFLAGS(nflags_s[i][1])) { - *s1 = nflags_code_kddi[i]; - return 1; - } - } - } - - /* If none of the KDDI national flag emoji matched, then we have no way - * to convert the previous codepoint... */ - mbfl_filt_conv_illegal_output(c1, filter); - } - - if (c == '#' || (c >= '0' && c <= '9')) { - filter->status = 1; - filter->cache = c; - return 0; - } else if (c >= NFLAGS('C') && c <= NFLAGS('U')) { /* C for CN, U for US */ - filter->status = 2; - filter->cache = c; - return 0; - } - - if (c == 0xA9) { /* Copyright sign */ - *s1 = 0x27DC; - return 1; - } else if (c == 0xAE) { /* Registered sign */ - *s1 = 0x27DD; - return 1; - } else if (c >= mb_tbl_uni_kddi2code2_min && c <= mb_tbl_uni_kddi2code2_max) { - int i = mbfl_bisec_srch2(c, mb_tbl_uni_kddi2code2_key, mb_tbl_uni_kddi2code2_len); - if (i >= 0) { - *s1 = mb_tbl_uni_kddi2code2_value[i]; - return 1; - } - } else if (c >= mb_tbl_uni_kddi2code3_min && c <= mb_tbl_uni_kddi2code3_max) { - int i = mbfl_bisec_srch2(c - 0x10000, mb_tbl_uni_kddi2code3_key, mb_tbl_uni_kddi2code3_len); - if (i >= 0) { - *s1 = mb_tbl_uni_kddi2code3_value[i]; - return 1; - } - } else if (c >= mb_tbl_uni_kddi2code5_min && c <= mb_tbl_uni_kddi2code5_max) { - int i = mbfl_bisec_srch2(c - 0xF0000, mb_tbl_uni_kddi2code5_key, mb_tbl_uni_kddi2code5_len); - if (i >= 0) { - *s1 = mb_tbl_uni_kddi2code5_val[i]; - return 1; - } - } - return 0; -} - -int mbfilter_unicode2sjis_emoji_sb(int c, int *s1, mbfl_convert_filter *filter) -{ - if (filter->status == 1) { - int c1 = filter->cache; - filter->cache = filter->status = 0; - if (c == 0x20E3) { - if (c1 == '#') { - *s1 = 0x2817; - } else if (c1 == '0') { - *s1 = 0x282c; - } else { /* Previous character was '1'-'9' */ - *s1 = 0x2823 + (c1 - '1'); - } - return 1; - } else { - (*filter->output_function)(c1, filter->data); - } - } else if (filter->status == 2) { - int c1 = filter->cache; - filter->cache = filter->status = 0; - if (c >= NFLAGS('B') && c <= NFLAGS('U')) { /* B for GB, U for RU */ - for (int i = 0; i < 10; i++) { - if (c1 == NFLAGS(nflags_s[i][0]) && c == NFLAGS(nflags_s[i][1])) { - *s1 = nflags_code_sb[i]; - return 1; - } - } - } - - /* If none of the SoftBank national flag emoji matched, then we have no way - * to convert the previous codepoint... */ - mbfl_filt_conv_illegal_output(c1, filter); - } - - if (c == '#' || (c >= '0' && c <= '9')) { - filter->status = 1; - filter->cache = c; - return 0; - } else if (c >= NFLAGS('C') && c <= NFLAGS('U')) { /* C for CN, U for US */ - filter->status = 2; - filter->cache = c; - return 0; - } - - if (c == 0xA9) { /* Copyright sign */ - *s1 = 0x2855; - return 1; - } else if (c == 0xAE) { /* Registered sign */ - *s1 = 0x2856; - return 1; - } else if (c >= mb_tbl_uni_sb2code2_min && c <= mb_tbl_uni_sb2code2_max) { - int i = mbfl_bisec_srch2(c, mb_tbl_uni_sb2code2_key, mb_tbl_uni_sb2code2_len); - if (i >= 0) { - *s1 = mb_tbl_uni_sb2code2_value[i]; - return 1; - } - } else if (c >= mb_tbl_uni_sb2code3_min && c <= mb_tbl_uni_sb2code3_max) { - int i = mbfl_bisec_srch2(c - 0x10000, mb_tbl_uni_sb2code3_key, mb_tbl_uni_sb2code3_len); - if (i >= 0) { - *s1 = mb_tbl_uni_sb2code3_value[i]; - return 1; - } - } else if (c >= mb_tbl_uni_sb2code5_min && c <= mb_tbl_uni_sb2code5_max) { - int i = mbfl_bisec_srch2(c - 0xF0000, mb_tbl_uni_sb2code5_key, mb_tbl_uni_sb2code5_len); - if (i >= 0) { - *s1 = mb_tbl_uni_sb2code5_val[i]; - return 1; - } - } - return 0; -} - -static int mbfl_filt_conv_sjis_mobile_wchar(int c, mbfl_convert_filter *filter) -{ - int c1, s, s1, s2, w, snd = 0; - - switch (filter->status) { - case 0: - if (c >= 0 && c < 0x80) { /* ASCII */ - if (filter->from == &mbfl_encoding_sjis_sb && c == 0x1B) { - /* ESC; escape sequences were used on older SoftBank phones for emoji */ - filter->cache = c; - filter->status = 2; - } else { - CK((*filter->output_function)(c, filter->data)); - } - } else if (c > 0xA0 && c < 0xE0) { /* Kana */ - CK((*filter->output_function)(0xFEC0 + c, filter->data)); - } else if (c > 0x80 && c < 0xFD && c != 0xA0) { /* Kanji, first byte */ - filter->status = 1; - filter->cache = c; - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - case 1: /* Kanji, second byte */ - filter->status = 0; - c1 = filter->cache; - if (c >= 0x40 && c <= 0xFC && c != 0x7F) { - w = 0; - SJIS_DECODE(c1, c, s1, s2); - s = ((s1 - 0x21) * 94) + s2 - 0x21; - if (s <= 137) { - if (s == 31) { - w = 0xFF3C; /* FULLWIDTH REVERSE SOLIDUS */ - } else if (s == 32) { - w = 0xFF5E; /* FULLWIDTH TILDE */ - } else if (s == 33) { - w = 0x2225; /* PARALLEL TO */ - } else if (s == 60) { - w = 0xFF0D; /* FULLWIDTH HYPHEN-MINUS */ - } else if (s == 80) { - w = 0xFFE0; /* FULLWIDTH CENT SIGN */ - } else if (s == 81) { - w = 0xFFE1; /* FULLWIDTH POUND SIGN */ - } else if (s == 137) { - w = 0xFFE2; /* FULLWIDTH NOT SIGN */ - } - } - if (w == 0) { - if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) { /* vendor ext1 (13ku) */ - w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min]; - } else if (s >= 0 && s < jisx0208_ucs_table_size) { /* X 0208 */ - w = jisx0208_ucs_table[s]; - } else if (s >= cp932ext2_ucs_table_min && s < cp932ext2_ucs_table_max) { /* vendor ext2 (89ku - 92ku) */ - w = cp932ext2_ucs_table[s - cp932ext2_ucs_table_min]; - } - - /* Emoji */ - if (filter->from == &mbfl_encoding_sjis_docomo && s >= mb_tbl_code2uni_docomo1_min && s <= mb_tbl_code2uni_docomo1_max) { - w = mbfilter_sjis_emoji_docomo2unicode(s, &snd); - if (snd > 0) { - CK((*filter->output_function)(snd, filter->data)); - } - } else if (filter->from == &mbfl_encoding_sjis_kddi && s >= mb_tbl_code2uni_kddi1_min && s <= mb_tbl_code2uni_kddi2_max) { - w = mbfilter_sjis_emoji_kddi2unicode(s, &snd); - if (snd > 0) { - CK((*filter->output_function)(snd, filter->data)); - } - } else if (filter->from == &mbfl_encoding_sjis_sb && s >= mb_tbl_code2uni_sb1_min && s <= mb_tbl_code2uni_sb3_max) { - w = mbfilter_sjis_emoji_sb2unicode(s, &snd); - if (snd > 0) { - CK((*filter->output_function)(snd, filter->data)); - } - } - - if (w == 0) { - if (s >= cp932ext3_ucs_table_min && s < cp932ext3_ucs_table_max) { /* vendor ext3 (115ku - 119ku) */ - w = cp932ext3_ucs_table[s - cp932ext3_ucs_table_min]; - } else if (s >= (94*94) && s < (114*94)) { /* user (95ku - 114ku) */ - w = s - (94*94) + 0xe000; - } - } - } - if (w <= 0) { - w = MBFL_BAD_INPUT; - } - CK((*filter->output_function)(w, filter->data)); - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - /* ESC: Softbank Emoji */ - case 2: - if (c == '$') { - filter->cache = c; - filter->status++; - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - filter->status = filter->cache = 0; - } - break; - - /* ESC $: Softbank Emoji */ - case 3: - if ((c >= 'E' && c <= 'G') || (c >= 'O' && c <= 'Q')) { - filter->cache = c; - filter->status++; - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - filter->status = filter->cache = 0; - } - break; - - /* ESC $ [GEFOPQ]: Softbank Emoji */ - case 4: - c1 = filter->cache; - if (c == 0xF) { /* Terminate sequence of emoji */ - filter->status = filter->cache = 0; - return 0; - } else { - if (c1 == 'G' && c >= 0x21 && c <= 0x7a) { - s1 = (0x91 - 0x21) * 94; - } else if (c1 == 'E' && c >= 0x21 && c <= 0x7A) { - s1 = (0x8D - 0x21) * 94; - } else if (c1 == 'F' && c >= 0x21 && c <= 0x7A) { - s1 = (0x8E - 0x21) * 94; - } else if (c1 == 'O' && c >= 0x21 && c <= 0x6D) { - s1 = (0x92 - 0x21) * 94; - } else if (c1 == 'P' && c >= 0x21 && c <= 0x6C) { - s1 = (0x95 - 0x21) * 94; - } else if (c1 == 'Q' && c >= 0x21 && c <= 0x5E) { - s1 = (0x96 - 0x21) * 94; - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - filter->status = filter->cache = 0; - return 0; - } - - w = mbfilter_sjis_emoji_sb2unicode(s1 + c - 0x21, &snd); - if (w > 0) { - if (snd > 0) { - CK((*filter->output_function)(snd, filter->data)); - } - CK((*filter->output_function)(w, filter->data)); - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - filter->status = filter->cache = 0; - } - } - } - - return 0; -} - -static int mbfl_filt_conv_wchar_sjis_mobile(int c, mbfl_convert_filter *filter) -{ - int c1, c2, s1 = 0, s2 = 0; - - if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) { - s1 = ucs_a1_jis_table[c - ucs_a1_jis_table_min]; - } else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) { - s1 = ucs_a2_jis_table[c - ucs_a2_jis_table_min]; - } else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) { - s1 = ucs_i_jis_table[c - ucs_i_jis_table_min]; - } else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) { - s1 = ucs_r_jis_table[c - ucs_r_jis_table_min]; - } else if (c >= 0xE000 && c < (0xE000 + 20*94)) { - /* Private User Area (95ku - 114ku) */ - s1 = c - 0xE000; - c1 = (s1 / 94) + 0x7F; - c2 = (s1 % 94) + 0x21; - s1 = (c1 << 8) | c2; - s2 = 1; - } - - if (s1 <= 0) { - if (c == 0xA5) { /* YEN SIGN */ - s1 = 0x216F; /* FULLWIDTH YEN SIGN */ - } else if (c == 0xFF3c) { /* FULLWIDTH REVERSE SOLIDUS */ - s1 = 0x2140; - } else if (c == 0x2225) { /* PARALLEL TO */ - s1 = 0x2142; - } else if (c == 0xFF0D) { /* FULLWIDTH HYPHEN-MINUS */ - s1 = 0x215D; - } else if (c == 0xFFE0) { /* FULLWIDTH CENT SIGN */ - s1 = 0x2171; - } else if (c == 0xFFE1) { /* FULLWIDTH POUND SIGN */ - s1 = 0x2172; - } else if (c == 0xFFE2) { /* FULLWIDTH NOT SIGN */ - s1 = 0x224C; - } - } - - if ((s1 <= 0) || (s1 >= 0x8080 && s2 == 0)) { /* not found or X 0212 */ - s1 = -1; - - /* CP932 vendor ext1 (13ku) */ - for (c1 = 0; c1 < cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; c1++) { - if (c == cp932ext1_ucs_table[c1]) { - s1 = (((c1 / 94) + 0x2D) << 8) + (c1 % 94) + 0x21; - break; - } - } - - if (s1 <= 0) { - /* CP932 vendor ext2 (115ku - 119ku) */ - for (c1 = 0; c1 < cp932ext2_ucs_table_max - cp932ext2_ucs_table_min; c1++) { - if (c == cp932ext2_ucs_table[c1]) { - s1 = (((c1 / 94) + 0x79) << 8) + (c1 % 94) + 0x21; - break; - } - } - } - - if (c == 0) { - s1 = 0; - } - } - - if ((filter->to == &mbfl_encoding_sjis_docomo && mbfilter_unicode2sjis_emoji_docomo(c, &s1, filter)) || - (filter->to == &mbfl_encoding_sjis_kddi && mbfilter_unicode2sjis_emoji_kddi(c, &s1, filter)) || - (filter->to == &mbfl_encoding_sjis_sb && mbfilter_unicode2sjis_emoji_sb(c, &s1, filter))) { - s1 = (((s1 / 94) + 0x21) << 8) | ((s1 % 94) + 0x21); - } - - if (filter->status) { - return 0; - } - - if (s1 >= 0) { - if (s1 < 0x100) { /* Latin/Kana */ - CK((*filter->output_function)(s1, filter->data)); - } else { /* Kanji */ - c1 = (s1 >> 8) & 0xff; - c2 = s1 & 0xff; - SJIS_ENCODE(c1, c2, s1, s2); - CK((*filter->output_function)(s1, filter->data)); - CK((*filter->output_function)(s2, filter->data)); - } - } else { - CK(mbfl_filt_conv_illegal_output(c, filter)); - } - - return 0; -} - -int mbfl_filt_conv_sjis_mobile_flush(mbfl_convert_filter *filter) -{ - int c1 = filter->cache; - if (filter->status == 1 && (c1 == '#' || (c1 >= '0' && c1 <= '9'))) { - filter->cache = filter->status = 0; - CK((*filter->output_function)(c1, filter->data)); - } else if (filter->status == 2) { - /* First of a pair of Regional Indicator codepoints came at the end of a string */ - filter->cache = filter->status = 0; - mbfl_filt_conv_illegal_output(c1, filter); - } - - if (filter->flush_function) { - (*filter->flush_function)(filter->data); - } - - return 0; -} - -static const unsigned short sjis_mobile_decode_tbl1[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xFFFF, 0, 188, 376, 564, 752, 940, 1128, 1316, 1504, 1692, 1880, 2068, 2256, 2444, 2632, 2820, 3008, 3196, 3384, 3572, 3760, 3948, 4136, 4324, 4512, 4700, 4888, 5076, 5264, 5452, 5640, 0xFFFF, -6016, -5828, -5640, -5452, -5264, -5076, -4888, -4700, -4512, -4324, -4136, -3948, -3760, -3572, -3384, -3196, -3008, -2820, -2632, -2444, -2256, -2068, -1880, -1692, -1504, -1316, -1128, -940, -752, -564, -376, -188, 0, 188, 376, 564, 752, 940, 1128, 1316, 1504, 1692, 1880, 2068, 2256, 2444, 2632, 2820, 3008, 3196, 3384, 3572, 3760, 3948, 4136, 4324, 4512, 4700, 4888, 5076, 5264, 5452, 5640, 5828, 6016, 6204, 6392, 6580, 6768, 6956, 7144, 7332, 7520, 7708, 7896, 8084, 8272, 8460, 8648, 8836, 9024, 9212, 9400, 9588, 9776, 9964, 10152, 10340, 10528, 10716, 10904, 11092, 0xFFFF, 0xFFFF, 0xFFFF -}; - -static size_t mb_sjis_docomo_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) -{ - unsigned char *p = *in, *e = p + *in_len; - /* Leave one extra space available in output buffer, since some iterations of - * main loop (below) may emit two wchars */ - uint32_t *out = buf, *limit = buf + bufsize - 1; - - while (p < e && out < limit) { - unsigned char c = *p++; - - if (c <= 0x7F) { - *out++ = c; - } else if (c >= 0xA1 && c <= 0xDF) { - /* Kana */ - *out++ = 0xFEC0 + c; - } else { - /* Kanji */ - if (p == e) { - *out++ = MBFL_BAD_INPUT; - break; - } - unsigned char c2 = *p++; - uint32_t w = sjis_mobile_decode_tbl1[c] + sjis_decode_tbl2[c2]; - - if (w <= 137) { - if (w == 31) { - *out++ = 0xFF3C; /* FULLWIDTH REVERSE SOLIDUS */ - continue; - } else if (w == 32) { - *out++ = 0xFF5E; /* FULLWIDTH TILDE */ - continue; - } else if (w == 33) { - *out++ = 0x2225; /* PARALLEL TO */ - continue; - } else if (w == 60) { - *out++ = 0xFF0D; /* FULLWIDTH HYPHEN-MINUS */ - continue; - } else if (w == 80) { - *out++ = 0xFFE0; /* FULLWIDTH CENT SIGN */ - continue; - } else if (w == 81) { - *out++ = 0xFFE1; /* FULLWIDTH POUND SIGN */ - continue; - } else if (w == 137) { - *out++ = 0xFFE2; /* FULLWIDTH NOT SIGN */ - continue; - } - } - - if (w >= mb_tbl_code2uni_docomo1_min && w <= mb_tbl_code2uni_docomo1_max) { - int snd = 0; - w = mbfilter_sjis_emoji_docomo2unicode(w, &snd); - if (snd) { - *out++ = snd; - } - } else if (w >= cp932ext1_ucs_table_min && w < cp932ext1_ucs_table_max) { - w = cp932ext1_ucs_table[w - cp932ext1_ucs_table_min]; - } else if (w < jisx0208_ucs_table_size) { - w = jisx0208_ucs_table[w]; - } else if (w >= cp932ext2_ucs_table_min && w < cp932ext2_ucs_table_max) { - w = cp932ext2_ucs_table[w - cp932ext2_ucs_table_min]; - } else if (w >= cp932ext3_ucs_table_min && w < cp932ext3_ucs_table_max) { - w = cp932ext3_ucs_table[w - cp932ext3_ucs_table_min]; - } else if (w >= (94*94) && w < (114*94)) { - w = w - (94*94) + 0xE000; - } else { - if (c == 0x80 || c == 0xA0 || c >= 0xFD) { - p--; - } - *out++ = MBFL_BAD_INPUT; - continue; - } - - *out++ = w ? w : MBFL_BAD_INPUT; - } - } - - *in_len = e - p; - *in = p; - return out - buf; -} - -static void mb_wchar_to_sjis_docomo(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) -{ - unsigned char *out, *limit; - MB_CONVERT_BUF_LOAD(buf, out, limit); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len + (buf->state ? 1 : 0)); - - uint32_t w; - unsigned int s = 0; - - if (buf->state) { - /* Continue what we were doing on the previous call */ - w = buf->state; - buf->state = 0; - goto reprocess_wchar; - } - - while (len--) { - w = *in++; -reprocess_wchar: - s = 0; - - if (w >= ucs_a1_jis_table_min && w < ucs_a1_jis_table_max) { - s = ucs_a1_jis_table[w - ucs_a1_jis_table_min]; - } else if (w >= ucs_a2_jis_table_min && w < ucs_a2_jis_table_max) { - s = ucs_a2_jis_table[w - ucs_a2_jis_table_min]; - } else if (w >= ucs_i_jis_table_min && w < ucs_i_jis_table_max) { - s = ucs_i_jis_table[w - ucs_i_jis_table_min]; - } else if (w >= ucs_r_jis_table_min && w < ucs_r_jis_table_max) { - s = ucs_r_jis_table[w - ucs_r_jis_table_min]; - } else if (w >= 0xE000 && w < (0xE000 + 20*94)) { - /* Private User Area (95ku - 114ku) */ - s = w - 0xE000; - s = (((s / 94) + 0x7F) << 8) | ((s % 94) + 0x21); - goto process_emoji; - } - - if (!s) { - if (w == 0xA5) { /* YEN SIGN */ - s = 0x216F; /* FULLWIDTH YEN SIGN */ - } else if (w == 0xFF3C) { /* FULLWIDTH REVERSE SOLIDUS */ - s = 0x2140; - } else if (w == 0x2225) { /* PARALLEL TO */ - s = 0x2142; - } else if (w == 0xFF0D) { /* FULLWIDTH HYPHEN-MINUS */ - s = 0x215D; - } else if (w == 0xFFE0) { /* FULLWIDTH CENT SIGN */ - s = 0x2171; - } else if (w == 0xFFE1) { /* FULLWIDTH POUND SIGN */ - s = 0x2172; - } else if (w == 0xFFE2) { /* FULLWIDTH NOT SIGN */ - s = 0x224C; - } - } - - if (w && (!s || s >= 0x8080)) { - s = 0; - - for (int i = 0; i < cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; i++) { - if (w == cp932ext1_ucs_table[i]) { - s = (((i / 94) + 0x2D) << 8) + (i % 94) + 0x21; - goto process_emoji; - } - } - - for (int i = 0; i < cp932ext2_ucs_table_max - cp932ext2_ucs_table_min; i++) { - if (w == cp932ext2_ucs_table[i]) { - s = (((i / 94) + 0x79) << 8) + (i % 94) + 0x21; - goto process_emoji; - } - } - } - -process_emoji: - /* When converting SJIS-Mobile to Unicode, we convert keypad symbol emoji - * to a sequence of 2 codepoints, one of which is a combining character which - * adds the 'key' image around the other - * - * In the other direction, look for such sequences and convert them to a - * single emoji */ - if (w == '#' || (w >= '0' && w <= '9')) { - if (!len) { - if (end) { - goto emit_output; - } else { - /* If we are at the end of the current buffer of codepoints, but another - * buffer is coming, then remember that we have to reprocess `w` */ - buf->state = w; - break; - } - } - uint32_t w2 = *in++; len--; - if (w2 == 0x20E3) { - if (w == '#') { - s = 0x2964; - } else if (w == '0') { - s = 0x296F; - } else { /* Previous character was '1'-'9' */ - s = 0x2966 + (w - '1'); - } - s = (((s / 94) + 0x21) << 8) | ((s % 94) + 0x21); - } else { - in--; len++; - } - } else if (w == 0xA9) { /* Copyright sign */ - s = (((0x29B5 / 94) + 0x21) << 8) | ((0x29B5 % 94) + 0x21); - } else if (w == 0xAE) { /* Registered sign */ - s = (((0x29BA / 94) + 0x21) << 8) | ((0x29BA % 94) + 0x21); - } else if (w >= mb_tbl_uni_docomo2code2_min && w <= mb_tbl_uni_docomo2code2_max) { - int i = mbfl_bisec_srch2(w, mb_tbl_uni_docomo2code2_key, mb_tbl_uni_docomo2code2_len); - if (i >= 0) { - s = mb_tbl_uni_docomo2code2_value[i]; - s = (((s / 94) + 0x21) << 8) | ((s % 94) + 0x21); - } - } else if (w >= mb_tbl_uni_docomo2code3_min && w <= mb_tbl_uni_docomo2code3_max) { - int i = mbfl_bisec_srch2(w - 0x10000, mb_tbl_uni_docomo2code3_key, mb_tbl_uni_docomo2code3_len); - if (i >= 0) { - s = mb_tbl_uni_docomo2code3_value[i]; - s = (((s / 94) + 0x21) << 8) | ((s % 94) + 0x21); - } - } else if (w >= mb_tbl_uni_docomo2code5_min && w <= mb_tbl_uni_docomo2code5_max) { - int i = mbfl_bisec_srch2(w - 0xF0000, mb_tbl_uni_docomo2code5_key, mb_tbl_uni_docomo2code5_len); - if (i >= 0) { - s = mb_tbl_uni_docomo2code5_val[i]; - s = (((s / 94) + 0x21) << 8) | ((s % 94) + 0x21); - } - } - -emit_output: - if (!s && w) { - MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_sjis_docomo); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len); - } else if (s <= 0xFF) { - out = mb_convert_buf_add(out, s); - } else { - unsigned int c1 = (s >> 8) & 0xFF, c2 = s & 0xFF, s1, s2; - SJIS_ENCODE(c1, c2, s1, s2); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2); - out = mb_convert_buf_add2(out, s1, s2); - } - } - - MB_CONVERT_BUF_STORE(buf, out, limit); -} - -static size_t mb_sjis_kddi_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) -{ - unsigned char *p = *in, *e = p + *in_len; - uint32_t *out = buf, *limit = buf + bufsize - 1; - - while (p < e && out < limit) { - unsigned char c = *p++; - - if (c <= 0x7F) { - *out++ = c; - } else if (c >= 0xA1 && c <= 0xDF) { - /* Kana */ - *out++ = 0xFEC0 + c; - } else { - /* Kanji */ - if (p == e) { - *out++ = MBFL_BAD_INPUT; - break; - } - unsigned char c2 = *p++; - uint32_t w = sjis_mobile_decode_tbl1[c] + sjis_decode_tbl2[c2]; - - if (w <= 137) { - if (w == 31) { - *out++ = 0xFF3C; /* FULLWIDTH REVERSE SOLIDUS */ - continue; - } else if (w == 32) { - *out++ = 0xFF5E; /* FULLWIDTH TILDE */ - continue; - } else if (w == 33) { - *out++ = 0x2225; /* PARALLEL TO */ - continue; - } else if (w == 60) { - *out++ = 0xFF0D; /* FULLWIDTH HYPHEN-MINUS */ - continue; - } else if (w == 80) { - *out++ = 0xFFE0; /* FULLWIDTH CENT SIGN */ - continue; - } else if (w == 81) { - *out++ = 0xFFE1; /* FULLWIDTH POUND SIGN */ - continue; - } else if (w == 137) { - *out++ = 0xFFE2; /* FULLWIDTH NOT SIGN */ - continue; - } - } - - if (w >= mb_tbl_code2uni_kddi1_min && w <= mb_tbl_code2uni_kddi2_max) { - int snd = 0; - w = mbfilter_sjis_emoji_kddi2unicode(w, &snd); - if (!w) { - w = sjis_mobile_decode_tbl1[c] + sjis_decode_tbl2[c2]; - if (w >= (94*94) && w < (114*94)) { - w = w - (94*94) + 0xE000; - } - } else if (snd) { - *out++ = snd; - } - } else if (w >= cp932ext1_ucs_table_min && w < cp932ext1_ucs_table_max) { - w = cp932ext1_ucs_table[w - cp932ext1_ucs_table_min]; - } else if (w < jisx0208_ucs_table_size) { - w = jisx0208_ucs_table[w]; - } else if (w >= cp932ext2_ucs_table_min && w < cp932ext2_ucs_table_max) { - w = cp932ext2_ucs_table[w - cp932ext2_ucs_table_min]; - } else if (w >= cp932ext3_ucs_table_min && w < cp932ext3_ucs_table_max) { - w = cp932ext3_ucs_table[w - cp932ext3_ucs_table_min]; - } else if (w >= (94*94) && w < (114*94)) { - w = w - (94*94) + 0xE000; - } else { - if (c == 0x80 || c == 0xA0 || c >= 0xFD) { - p--; - } - *out++ = MBFL_BAD_INPUT; - continue; - } - - *out++ = w ? w : MBFL_BAD_INPUT; - } - } - - *in_len = e - p; - *in = p; - return out - buf; -} - -static void mb_wchar_to_sjis_kddi(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) -{ - unsigned char *out, *limit; - MB_CONVERT_BUF_LOAD(buf, out, limit); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len + (buf->state ? 1 : 0)); - - uint32_t w; - unsigned int s = 0; - - if (buf->state) { - w = buf->state; - buf->state = 0; - goto reprocess_wchar; - } - - while (len--) { - w = *in++; -reprocess_wchar: - s = 0; - - if (w >= ucs_a1_jis_table_min && w < ucs_a1_jis_table_max) { - s = ucs_a1_jis_table[w - ucs_a1_jis_table_min]; - } else if (w >= ucs_a2_jis_table_min && w < ucs_a2_jis_table_max) { - s = ucs_a2_jis_table[w - ucs_a2_jis_table_min]; - } else if (w >= ucs_i_jis_table_min && w < ucs_i_jis_table_max) { - s = ucs_i_jis_table[w - ucs_i_jis_table_min]; - } else if (w >= ucs_r_jis_table_min && w < ucs_r_jis_table_max) { - s = ucs_r_jis_table[w - ucs_r_jis_table_min]; - } else if (w >= 0xE000 && w < (0xE000 + 20*94)) { - /* Private User Area (95ku - 114ku) */ - s = w - 0xE000; - s = (((s / 94) + 0x7F) << 8) | ((s % 94) + 0x21); - goto process_emoji; - } - - if (!s) { - if (w == 0xA5) { /* YEN SIGN */ - s = 0x216F; /* FULLWIDTH YEN SIGN */ - } else if (w == 0xFF3c) { /* FULLWIDTH REVERSE SOLIDUS */ - s = 0x2140; - } else if (w == 0x2225) { /* PARALLEL TO */ - s = 0x2142; - } else if (w == 0xFF0D) { /* FULLWIDTH HYPHEN-MINUS */ - s = 0x215D; - } else if (w == 0xFFE0) { /* FULLWIDTH CENT SIGN */ - s = 0x2171; - } else if (w == 0xFFE1) { /* FULLWIDTH POUND SIGN */ - s = 0x2172; - } else if (w == 0xFFE2) { /* FULLWIDTH NOT SIGN */ - s = 0x224C; - } - } - - if (w && (!s || s >= 0x8080)) { - s = 0; - - for (int i = 0; i < cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; i++) { - if (w == cp932ext1_ucs_table[i]) { - s = (((i / 94) + 0x2D) << 8) + (i % 94) + 0x21; - goto process_emoji; - } - } - - for (int i = 0; i < cp932ext2_ucs_table_max - cp932ext2_ucs_table_min; i++) { - if (w == cp932ext2_ucs_table[i]) { - s = (((i / 94) + 0x79) << 8) + (i % 94) + 0x21; - goto process_emoji; - } - } - } - -process_emoji: - if (w == '#' || (w >= '0' && w <= '9')) { - if (!len) { - if (end) { - goto emit_output; - } else { - /* If we are at the end of the current buffer of codepoints, but another - * buffer is coming, then remember that we have to reprocess `w` */ - buf->state = w; - break; - } - } - uint32_t w2 = *in++; len--; - if (w2 == 0x20E3) { - if (w == '#') { - s = 0x25BC; - } else if (w == '0') { - s = 0x2830; - } else { /* Previous character was '1'-'9' */ - s = 0x27A6 + (w - '1'); - } - s = (((s / 94) + 0x21) << 8) | ((s % 94) + 0x21); - } else { - in--; len++; - } - } else if (w >= NFLAGS('C') && w <= NFLAGS('U')) { /* C for CN, U for US */ - if (!len) { - if (end) { - MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_sjis_kddi); - } else { - /* Reprocess `w` when this function is called again with another buffer - * of wchars */ - buf->state = w; - } - break; - } - uint32_t w2 = *in++; len--; - if (w2 >= NFLAGS('B') && w2 <= NFLAGS('U')) { /* B for GB, U for RU */ - for (int i = 0; i < 10; i++) { - if (w == NFLAGS(nflags_s[i][0]) && w2 == NFLAGS(nflags_s[i][1])) { - s = nflags_code_kddi[i]; - s = (((s / 94) + 0x21) << 8) | ((s % 94) + 0x21); - goto emit_output; - } - } - } - in--; len++; - MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_sjis_kddi); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len); - continue; - } else if (w == 0xA9) { /* Copyright sign */ - s = (((0x27DC / 94) + 0x21) << 8) | ((0x27DC % 94) + 0x21); - } else if (w == 0xAE) { /* Registered sign */ - s = (((0x27DD / 94) + 0x21) << 8) | ((0x27DD % 94) + 0x21); - } else if (w >= mb_tbl_uni_kddi2code2_min && w <= mb_tbl_uni_kddi2code2_max) { - int i = mbfl_bisec_srch2(w, mb_tbl_uni_kddi2code2_key, mb_tbl_uni_kddi2code2_len); - if (i >= 0) { - s = mb_tbl_uni_kddi2code2_value[i]; - s = (((s / 94) + 0x21) << 8) | ((s % 94) + 0x21); - } - } else if (w >= mb_tbl_uni_kddi2code3_min && w <= mb_tbl_uni_kddi2code3_max) { - int i = mbfl_bisec_srch2(w - 0x10000, mb_tbl_uni_kddi2code3_key, mb_tbl_uni_kddi2code3_len); - if (i >= 0) { - s = mb_tbl_uni_kddi2code3_value[i]; - s = (((s / 94) + 0x21) << 8) | ((s % 94) + 0x21); - } - } else if (w >= mb_tbl_uni_kddi2code5_min && w <= mb_tbl_uni_kddi2code5_max) { - int i = mbfl_bisec_srch2(w - 0xF0000, mb_tbl_uni_kddi2code5_key, mb_tbl_uni_kddi2code5_len); - if (i >= 0) { - s = mb_tbl_uni_kddi2code5_val[i]; - s = (((s / 94) + 0x21) << 8) | ((s % 94) + 0x21); - } - } - -emit_output: - if (!s && w) { - MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_sjis_kddi); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len); - } else if (s <= 0xFF) { - out = mb_convert_buf_add(out, s); - } else { - unsigned int c1 = (s >> 8) & 0xFF, c2 = s & 0xFF, s1, s2; - SJIS_ENCODE(c1, c2, s1, s2); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2); - out = mb_convert_buf_add2(out, s1, s2); - } - } - - MB_CONVERT_BUF_STORE(buf, out, limit); -} - -static size_t mb_sjis_sb_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) -{ - unsigned char *p = *in, *e = p + *in_len; - uint32_t *out = buf, *limit = buf + bufsize - 1; - - if (*state) { - goto softbank_emoji_escapes; - } - - while (p < e && out < limit) { - unsigned char c = *p++; - - if (c == 0x1B) { - /* Escape sequence */ - if (p == e || *p++ != '$' || p == e) { - *out++ = MBFL_BAD_INPUT; - continue; - } - unsigned char c2 = *p++; - if ((c2 < 'E' || c2 > 'G') && (c2 < 'O' || c2 > 'Q')) { - *out++ = MBFL_BAD_INPUT; - continue; - } - /* Escape sequence was valid, next should be a series of specially - * encoded Softbank emoji */ - *state = c2; - -softbank_emoji_escapes: - while (p < e && out < limit) { - c = *p++; - if (c == 0xF) { - *state = 0; - break; - } - unsigned int s = 0; - if (*state == 'G' && c >= 0x21 && c <= 0x7A) { - s = (0x91 - 0x21) * 94; - } else if (*state == 'E' && c >= 0x21 && c <= 0x7A) { - s = (0x8D - 0x21) * 94; - } else if (*state == 'F' && c >= 0x21 && c <= 0x7A) { - s = (0x8E - 0x21) * 94; - } else if (*state == 'O' && c >= 0x21 && c <= 0x6D) { - s = (0x92 - 0x21) * 94; - } else if (*state == 'P' && c >= 0x21 && c <= 0x6C) { - s = (0x95 - 0x21) * 94; - } else if (*state == 'Q' && c >= 0x21 && c <= 0x5E) { - s = (0x96 - 0x21) * 94; - } else { - *out++ = MBFL_BAD_INPUT; - *state = 0; - break; - } - - int snd = 0; - uint32_t w = mbfilter_sjis_emoji_sb2unicode(s + c - 0x21, &snd); - if (w) { - if (snd) { - *out++ = snd; - } - *out++ = w; - } else { - *out++ = MBFL_BAD_INPUT; - *state = 0; - break; - } - } - } else if (c <= 0x7F) { - *out++ = c; - } else if (c >= 0xA1 && c <= 0xDF) { - /* Kana */ - *out++ = 0xFEC0 + c; - } else { - /* Kanji */ - if (p == e) { - *out++ = MBFL_BAD_INPUT; - break; - } - unsigned char c2 = *p++; - uint32_t w = sjis_mobile_decode_tbl1[c] + sjis_decode_tbl2[c2]; - - if (w <= 137) { - if (w == 31) { - *out++ = 0xFF3C; /* FULLWIDTH REVERSE SOLIDUS */ - continue; - } else if (w == 32) { - *out++ = 0xFF5E; /* FULLWIDTH TILDE */ - continue; - } else if (w == 33) { - *out++ = 0x2225; /* PARALLEL TO */ - continue; - } else if (w == 60) { - *out++ = 0xFF0D; /* FULLWIDTH HYPHEN-MINUS */ - continue; - } else if (w == 80) { - *out++ = 0xFFE0; /* FULLWIDTH CENT SIGN */ - continue; - } else if (w == 81) { - *out++ = 0xFFE1; /* FULLWIDTH POUND SIGN */ - continue; - } else if (w == 137) { - *out++ = 0xFFE2; /* FULLWIDTH NOT SIGN */ - continue; - } - } - - if (w >= mb_tbl_code2uni_sb1_min && w <= mb_tbl_code2uni_sb3_max) { - int snd = 0; - w = mbfilter_sjis_emoji_sb2unicode(w, &snd); - if (!w) { - w = sjis_mobile_decode_tbl1[c] + sjis_decode_tbl2[c2]; - if (w >= cp932ext3_ucs_table_min && w < cp932ext3_ucs_table_max) { - w = cp932ext3_ucs_table[w - cp932ext3_ucs_table_min]; - } else if (w >= (94*94) && w < (114*94)) { - w = w - (94*94) + 0xE000; - } - } else if (snd) { - *out++ = snd; - } - } else if (w >= cp932ext1_ucs_table_min && w < cp932ext1_ucs_table_max) { - w = cp932ext1_ucs_table[w - cp932ext1_ucs_table_min]; - } else if (w < jisx0208_ucs_table_size) { - w = jisx0208_ucs_table[w]; - } else if (w >= cp932ext2_ucs_table_min && w < cp932ext2_ucs_table_max) { - w = cp932ext2_ucs_table[w - cp932ext2_ucs_table_min]; - } else if (w >= cp932ext3_ucs_table_min && w < cp932ext3_ucs_table_max) { - w = cp932ext3_ucs_table[w - cp932ext3_ucs_table_min]; - } else if (w >= (94*94) && w < (114*94)) { - w = w - (94*94) + 0xE000; - } else { - if (c == 0x80 || c == 0xA0 || c >= 0xFD) { - p--; - } - *out++ = MBFL_BAD_INPUT; - continue; - } - - *out++ = w ? w : MBFL_BAD_INPUT; - } - } - - *in_len = e - p; - *in = p; - return out - buf; -} - -static void mb_wchar_to_sjis_sb(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) -{ - unsigned char *out, *limit; - MB_CONVERT_BUF_LOAD(buf, out, limit); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len + (buf->state ? 1 : 0)); - - uint32_t w; - unsigned int s = 0; - - if (buf->state) { - w = buf->state; - buf->state = 0; - goto reprocess_wchar; - } - - while (len--) { - w = *in++; -reprocess_wchar: - s = 0; - - if (w >= ucs_a1_jis_table_min && w < ucs_a1_jis_table_max) { - s = ucs_a1_jis_table[w - ucs_a1_jis_table_min]; - } else if (w >= ucs_a2_jis_table_min && w < ucs_a2_jis_table_max) { - s = ucs_a2_jis_table[w - ucs_a2_jis_table_min]; - } else if (w >= ucs_i_jis_table_min && w < ucs_i_jis_table_max) { - s = ucs_i_jis_table[w - ucs_i_jis_table_min]; - } else if (w >= ucs_r_jis_table_min && w < ucs_r_jis_table_max) { - s = ucs_r_jis_table[w - ucs_r_jis_table_min]; - } else if (w >= 0xE000 && w < (0xE000 + 20*94)) { - /* Private User Area (95ku - 114ku) */ - s = w - 0xE000; - s = (((s / 94) + 0x7F) << 8) | ((s % 94) + 0x21); - goto process_emoji; - } - - if (!s) { - if (w == 0xA5) { /* YEN SIGN */ - s = 0x216F; /* FULLWIDTH YEN SIGN */ - } else if (w == 0xFF3C) { /* FULLWIDTH REVERSE SOLIDUS */ - s = 0x2140; - } else if (w == 0x2225) { /* PARALLEL TO */ - s = 0x2142; - } else if (w == 0xFF0D) { /* FULLWIDTH HYPHEN-MINUS */ - s = 0x215D; - } else if (w == 0xFFE0) { /* FULLWIDTH CENT SIGN */ - s = 0x2171; - } else if (w == 0xFFE1) { /* FULLWIDTH POUND SIGN */ - s = 0x2172; - } else if (w == 0xFFE2) { /* FULLWIDTH NOT SIGN */ - s = 0x224C; - } - } - - if (w && (!s || s >= 0x8080)) { - s = 0; - - for (int i = 0; i < cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; i++) { - if (w == cp932ext1_ucs_table[i]) { - s = (((i / 94) + 0x2D) << 8) + (i % 94) + 0x21; - goto process_emoji; - } - } - - for (int i = 0; i < cp932ext2_ucs_table_max - cp932ext2_ucs_table_min; i++) { - if (w == cp932ext2_ucs_table[i]) { - s = (((i / 94) + 0x79) << 8) + (i % 94) + 0x21; - goto process_emoji; - } - } - } - -process_emoji: - if (w == '#' || (w >= '0' && w <= '9')) { - if (!len) { - if (end) { - goto emit_output; - } else { - /* If we are at the end of the current buffer of codepoints, but another - * buffer is coming, then remember that we have to reprocess `w` */ - buf->state = w; - break; - } - } - uint32_t w2 = *in++; len--; - if (w2 == 0x20E3) { - if (w == '#') { - s = 0x2817; - } else if (w == '0') { - s = 0x282c; - } else { /* Previous character was '1'-'9' */ - s = 0x2823 + (w - '1'); - } - s = (((s / 94) + 0x21) << 8) | ((s % 94) + 0x21); - } else { - in--; len++; - } - } else if (w >= NFLAGS('C') && w <= NFLAGS('U')) { /* C for CN, U for US */ - if (!len) { - if (end) { - MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_sjis_sb); - } else { - /* Reprocess `w` when this function is called again with - * another buffer of wchars */ - buf->state = w; - } - break; - } - uint32_t w2 = *in++; len--; - if (w2 >= NFLAGS('B') && w2 <= NFLAGS('U')) { /* B for GB, U for RU */ - for (int i = 0; i < 10; i++) { - if (w == NFLAGS(nflags_s[i][0]) && w2 == NFLAGS(nflags_s[i][1])) { - s = nflags_code_sb[i]; - s = (((s / 94) + 0x21) << 8) | ((s % 94) + 0x21); - goto emit_output; - } - } - } - in--; len++; - MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_sjis_sb); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len); - continue; - } else if (w == 0xA9) { /* Copyright sign */ - s = (((0x2855 / 94) + 0x21) << 8) | ((0x2855 % 94) + 0x21); - } else if (w == 0xAE) { /* Registered sign */ - s = (((0x2856 / 94) + 0x21) << 8) | ((0x2856 % 94) + 0x21); - } else if (w >= mb_tbl_uni_sb2code2_min && w <= mb_tbl_uni_sb2code2_max) { - int i = mbfl_bisec_srch2(w, mb_tbl_uni_sb2code2_key, mb_tbl_uni_sb2code2_len); - if (i >= 0) { - s = mb_tbl_uni_sb2code2_value[i]; - s = (((s / 94) + 0x21) << 8) | ((s % 94) + 0x21); - } - } else if (w >= mb_tbl_uni_sb2code3_min && w <= mb_tbl_uni_sb2code3_max) { - int i = mbfl_bisec_srch2(w - 0x10000, mb_tbl_uni_sb2code3_key, mb_tbl_uni_sb2code3_len); - if (i >= 0) { - s = mb_tbl_uni_sb2code3_value[i]; - s = (((s / 94) + 0x21) << 8) | ((s % 94) + 0x21); - } - } else if (w >= mb_tbl_uni_sb2code5_min && w <= mb_tbl_uni_sb2code5_max) { - int i = mbfl_bisec_srch2(w - 0xF0000, mb_tbl_uni_sb2code5_key, mb_tbl_uni_sb2code5_len); - if (i >= 0) { - s = mb_tbl_uni_sb2code5_val[i]; - s = (((s / 94) + 0x21) << 8) | ((s % 94) + 0x21); - } - } - -emit_output: - if (!s && w) { - MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_sjis_sb); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len); - } else if (s <= 0xFF) { - out = mb_convert_buf_add(out, s); - } else { - unsigned int c1 = (s >> 8) & 0xFF, c2 = s & 0xFF, s1, s2; - SJIS_ENCODE(c1, c2, s1, s2); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2); - out = mb_convert_buf_add2(out, s1, s2); - } - } - - MB_CONVERT_BUF_STORE(buf, out, limit); -} diff --git a/ext/mbstring/libmbfl/filters/mbfilter_sjis.h b/ext/mbstring/libmbfl/filters/mbfilter_sjis.h deleted file mode 100644 index b0689fce64388..0000000000000 --- a/ext/mbstring/libmbfl/filters/mbfilter_sjis.h +++ /dev/null @@ -1,46 +0,0 @@ -/* - * "streamable kanji code filter and converter" - * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. - * - * LICENSE NOTICES - * - * This file is part of "streamable kanji code filter and converter", - * which is distributed under the terms of GNU Lesser General Public - * License (version 2) as published by the Free Software Foundation. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with "streamable kanji code filter and converter"; - * if not, write to the Free Software Foundation, Inc., 59 Temple Place, - * Suite 330, Boston, MA 02111-1307 USA - * - * The author of this file: - * - */ -/* - * The source code included in this files was separated from mbfilter_ja.c - * by moriyoshi koizumi on 4 dec 2002. - * - */ - -#ifndef MBFL_MBFILTER_SJIS_H -#define MBFL_MBFILTER_SJIS_H - -#include "mbfilter.h" - -extern const mbfl_encoding mbfl_encoding_sjis; -extern const struct mbfl_convert_vtbl vtbl_sjis_wchar; -extern const struct mbfl_convert_vtbl vtbl_wchar_sjis; - -int mbfl_filt_conv_sjis_wchar(int c, mbfl_convert_filter *filter); -int mbfl_filt_conv_wchar_sjis(int c, mbfl_convert_filter *filter); - -#endif /* MBFL_MBFILTER_SJIS_H */ - -/* - * charset=UTF-8 - */ diff --git a/ext/mbstring/libmbfl/filters/mbfilter_sjis_2004.c b/ext/mbstring/libmbfl/filters/mbfilter_sjis_2004.c deleted file mode 100644 index bc4d932187061..0000000000000 --- a/ext/mbstring/libmbfl/filters/mbfilter_sjis_2004.c +++ /dev/null @@ -1,1420 +0,0 @@ -/* - * "streamable kanji code filter and converter" - * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. - * - * LICENSE NOTICES - * - * This file is part of "streamable kanji code filter and converter", - * which is distributed under the terms of GNU Lesser General Public - * License (version 2) as published by the Free Software Foundation. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with "streamable kanji code filter and converter"; - * if not, write to the Free Software Foundation, Inc., 59 Temple Place, - * Suite 330, Boston, MA 02111-1307 USA - * - * The author of this file: - * - */ -/* - * The source code included in this files was separated from mbfilter_sjis.c - * by rui hirokawa on 15 aug 2011. - */ - -/* Although the specification for Shift-JIS-2004 indicates that 0x5C and - * 0x7E should (respectively) represent a Yen sign and an overbar, feedback - * from Japanese PHP users indicates that they prefer 0x5C and 0x7E to be - * treated as equivalent to U+005C and U+007E. This is the historical - * behavior of mbstring, and promotes compatibility with other software - * which handles Shift-JIS and Shift-JIS-2004 text in this way. */ - -#include "mbfilter.h" -#include "mbfilter_sjis_2004.h" -#include "mbfilter_euc_jp_2004.h" -#include "mbfilter_iso2022jp_2004.h" - -#include "unicode_table_jis2004.h" -#include "unicode_table_jis.h" - -extern const unsigned char mblen_table_sjis_mobile[]; -extern const unsigned char mblen_table_eucjp[]; - -static size_t mb_sjis2004_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state); -static void mb_wchar_to_sjis2004(uint32_t *in, size_t len, mb_convert_buf *buf, bool end); -static size_t mb_eucjp2004_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state); -static void mb_wchar_to_eucjp2004(uint32_t *in, size_t len, mb_convert_buf *buf, bool end); -static size_t mb_iso2022jp2004_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state); -static void mb_wchar_to_iso2022jp2004(uint32_t *in, size_t len, mb_convert_buf *buf, bool end); - -extern int mbfl_bisec_srch(int w, const unsigned short *tbl, int n); -extern int mbfl_bisec_srch2(int w, const unsigned short tbl[], int n); - -static const char *mbfl_encoding_sjis2004_aliases[] = {"SJIS2004","Shift_JIS-2004", NULL}; -static const char *mbfl_encoding_eucjp2004_aliases[] = {"EUC_JP-2004", NULL}; - -const mbfl_encoding mbfl_encoding_sjis2004 = { - mbfl_no_encoding_sjis2004, - "SJIS-2004", - "Shift_JIS", - mbfl_encoding_sjis2004_aliases, - mblen_table_sjis_mobile, /* Leading byte values used for SJIS-2004 are the same as mobile SJIS variants */ - MBFL_ENCTYPE_GL_UNSAFE, - &vtbl_sjis2004_wchar, - &vtbl_wchar_sjis2004, - mb_sjis2004_to_wchar, - mb_wchar_to_sjis2004, - NULL -}; - -const struct mbfl_convert_vtbl vtbl_sjis2004_wchar = { - mbfl_no_encoding_sjis2004, - mbfl_no_encoding_wchar, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_jis2004_wchar, - mbfl_filt_conv_jis2004_wchar_flush, - NULL, -}; - -const struct mbfl_convert_vtbl vtbl_wchar_sjis2004 = { - mbfl_no_encoding_wchar, - mbfl_no_encoding_sjis2004, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_wchar_jis2004, - mbfl_filt_conv_wchar_jis2004_flush, - NULL, -}; - -const mbfl_encoding mbfl_encoding_eucjp2004 = { - mbfl_no_encoding_eucjp2004, - "EUC-JP-2004", - "EUC-JP", - mbfl_encoding_eucjp2004_aliases, - mblen_table_eucjp, - 0, - &vtbl_eucjp2004_wchar, - &vtbl_wchar_eucjp2004, - mb_eucjp2004_to_wchar, - mb_wchar_to_eucjp2004, - NULL -}; - -const struct mbfl_convert_vtbl vtbl_eucjp2004_wchar = { - mbfl_no_encoding_eucjp2004, - mbfl_no_encoding_wchar, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_jis2004_wchar, - mbfl_filt_conv_jis2004_wchar_flush, - NULL, -}; - -const struct mbfl_convert_vtbl vtbl_wchar_eucjp2004 = { - mbfl_no_encoding_wchar, - mbfl_no_encoding_eucjp2004, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_wchar_jis2004, - mbfl_filt_conv_wchar_jis2004_flush, - NULL, -}; - -const mbfl_encoding mbfl_encoding_2022jp_2004 = { - mbfl_no_encoding_2022jp_2004, - "ISO-2022-JP-2004", - "ISO-2022-JP-2004", - NULL, - NULL, - MBFL_ENCTYPE_GL_UNSAFE, - &vtbl_2022jp_2004_wchar, - &vtbl_wchar_2022jp_2004, - mb_iso2022jp2004_to_wchar, - mb_wchar_to_iso2022jp2004, - NULL -}; - -const struct mbfl_convert_vtbl vtbl_2022jp_2004_wchar = { - mbfl_no_encoding_2022jp_2004, - mbfl_no_encoding_wchar, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_jis2004_wchar, - mbfl_filt_conv_jis2004_wchar_flush, - NULL, -}; - -const struct mbfl_convert_vtbl vtbl_wchar_2022jp_2004 = { - mbfl_no_encoding_wchar, - mbfl_no_encoding_2022jp_2004, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_wchar_jis2004, - mbfl_filt_conv_wchar_jis2004_flush, - NULL, -}; - -#define CK(statement) do { if ((statement) < 0) return (-1); } while (0) - -#define SJIS_ENCODE(c1,c2,s1,s2) \ - do { \ - s1 = c1; \ - s1--; \ - s1 >>= 1; \ - if ((c1) < 0x5f) { \ - s1 += 0x71; \ - } else { \ - s1 += 0xb1; \ - } \ - s2 = c2; \ - if ((c1) & 1) { \ - if ((c2) < 0x60) { \ - s2--; \ - } \ - s2 += 0x20; \ - } else { \ - s2 += 0x7e; \ - } \ - } while (0) - -#define SJIS_DECODE(c1,c2,s1,s2) \ - do { \ - s1 = c1; \ - if (s1 < 0xa0) { \ - s1 -= 0x81; \ - } else { \ - s1 -= 0xc1; \ - } \ - s1 <<= 1; \ - s1 += 0x21; \ - s2 = c2; \ - if (s2 < 0x9f) { \ - if (s2 < 0x7f) { \ - s2++; \ - } \ - s2 -= 0x20; \ - } else { \ - s1++; \ - s2 -= 0x7e; \ - } \ - } while (0) - -int mbfl_filt_conv_jis2004_wchar(int c, mbfl_convert_filter *filter) -{ - int k; - int c1, c2, s, s1 = 0, s2 = 0, w = 0, w1; - - switch (filter->status & 0xf) { - case 0: - if (c >= 0 && c < 0x80) { /* latin */ - if (filter->from->no_encoding == mbfl_no_encoding_eucjp2004) { - CK((*filter->output_function)(c, filter->data)); - } else if (filter->from->no_encoding == mbfl_no_encoding_sjis2004) { - if (c == 0x5c) { - CK((*filter->output_function)(0x00a5, filter->data)); - } else if (c == 0x7e) { - CK((*filter->output_function)(0x203e, filter->data)); - } else { - CK((*filter->output_function)(c, filter->data)); - } - } else { /* ISO-2022-JP-2004 */ - if (c == 0x1b) { - filter->status += 6; - } else if ((filter->status == 0x80 || filter->status == 0x90 || filter->status == 0xa0) - && c > 0x20 && c < 0x7f) { /* kanji first char */ - filter->cache = c; - if (filter->status == 0x90) { - filter->status += 1; /* JIS X 0213 plane 1 */ - } else if (filter->status == 0xa0) { - filter->status += 4; /* JIS X 0213 plane 2 */ - } else { - filter->status += 5; /* JIS X 0208 */ - } - } else { - CK((*filter->output_function)(c, filter->data)); - } - } - } else { - if (filter->from->no_encoding == mbfl_no_encoding_eucjp2004) { - if (c > 0xa0 && c < 0xff) { /* X 0213 plane 1 first char */ - filter->status = 1; - filter->cache = c; - } else if (c == 0x8e) { /* kana first char */ - filter->cache = 0x8E; /* So error will be reported if input is truncated right here */ - filter->status = 2; - } else if (c == 0x8f) { /* X 0213 plane 2 first char */ - filter->status = 3; - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - } else if (filter->from->no_encoding == mbfl_no_encoding_sjis2004) { - if (c > 0xa0 && c < 0xe0) { /* kana */ - CK((*filter->output_function)(0xfec0 + c, filter->data)); - } else if (c > 0x80 && c < 0xfd && c != 0xa0) { /* kanji first char */ - filter->status = 1; - filter->cache = c; - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - } - break; - - case 1: /* kanji second char */ - filter->status &= ~0xf; - c1 = filter->cache; - - if (filter->from->no_encoding == mbfl_no_encoding_eucjp2004) { - if (c > 0xa0 && c < 0xff) { - s1 = c1 - 0x80; - s2 = c - 0x80; - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - break; - } - } else if (filter->from->no_encoding == mbfl_no_encoding_sjis2004) { - if (c >= 0x40 && c <= 0xfc && c != 0x7f) { - SJIS_DECODE(c1, c, s1, s2); - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - break; - } - } else { /* ISO-2022-JP-2004 */ - if (c >= 0x21 && c <= 0x7E) { - s1 = c1; - s2 = c; - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - break; - } - } - w1 = (s1 << 8) | s2; - - /* conversion for combining characters */ - if ((w1 >= 0x2477 && w1 <= 0x2479) || (w1 >= 0x2479 && w1 <= 0x247B) || - (w1 >= 0x2577 && w1 <= 0x257E) || w1 == 0x2678 || w1 == 0x2B44 || - (w1 >= 0x2B48 && w1 <= 0x2B4F) || (w1 >= 0x2B65 && w1 <= 0x2B66)) { - k = mbfl_bisec_srch2(w1, jisx0213_u2_key, jisx0213_u2_tbl_len); - if (k >= 0) { - w = jisx0213_u2_tbl[2*k]; - CK((*filter->output_function)(w, filter->data)); - w = jisx0213_u2_tbl[2*k+1]; - } - } - - /* conversion for BMP */ - if (w <= 0) { - w1 = (s1 - 0x21)*94 + s2 - 0x21; - if (w1 >= 0 && w1 < jisx0213_ucs_table_size) { - w = jisx0213_ucs_table[w1]; - } - } - - /* conversion for CJK Unified Ideographs ext.B (U+2XXXX) */ - if (w <= 0) { - w1 = (s1 << 8) | s2; - k = mbfl_bisec_srch2(w1, jisx0213_jis_u5_key, jisx0213_u5_tbl_len); - if (k >= 0) { - w = jisx0213_jis_u5_tbl[k] + 0x20000; - } - } - - if (w <= 0) { - w = MBFL_BAD_INPUT; - } - CK((*filter->output_function)(w, filter->data)); - break; - - case 2: /* got 0x8e: EUC-JP-2004 kana */ - filter->status = 0; - if (c > 0xa0 && c < 0xe0) { - w = 0xfec0 + c; - CK((*filter->output_function)(w, filter->data)); - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - case 3: /* X 0213 plane 2 first char: EUC-JP-2004 (0x8f) */ - if (c == 0xA1 || (c >= 0xA3 && c <= 0xA5) || c == 0xA8 || (c >= 0xAC && c <= 0xAF) || (c >= 0xEE && c <= 0xFE)) { - filter->cache = c - 0x80; - filter->status++; - } else { - filter->status = 0; - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - case 4: /* X 0213 plane 2 second char: EUC-JP-2004, ISO-2022-JP-2004 */ - filter->status &= ~0xF; - c1 = filter->cache; - if (filter->from->no_encoding == mbfl_no_encoding_eucjp2004) { - c2 = c - 0x80; - } else { - c2 = c; - } - - if (c2 < 0x21 || c2 > 0x7E) { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - break; - } - - s1 = c1 - 0x21; - s2 = c2 - 0x21; - - if (((s1 >= 0 && s1 <= 4 && s1 != 1) || s1 == 7 || (s1 >= 11 && s1 <= 14) || - (s1 >= 77 && s1 < 94)) && s2 >= 0 && s2 < 94) { - /* calc offset from ku */ - for (k = 0; k < jisx0213_p2_ofst_len; k++) { - if (s1 == jisx0213_p2_ofst[k]) { - break; - } - } - k -= jisx0213_p2_ofst[k]; - - /* check for japanese chars in BMP */ - s = (s1 + 94 + k)*94 + s2; - ZEND_ASSERT(s < jisx0213_ucs_table_size); - w = jisx0213_ucs_table[s]; - - /* check for japanese chars in CJK Unified Ideographs ext.B (U+2XXXX) */ - if (w <= 0) { - w1 = ((c1 + k + 94) << 8) | c2; - k = mbfl_bisec_srch2(w1, jisx0213_jis_u5_key, jisx0213_u5_tbl_len); - if (k >= 0) { - w = jisx0213_jis_u5_tbl[k] + 0x20000; - } - } - - if (w <= 0) { - w = MBFL_BAD_INPUT; - } - - CK((*filter->output_function)(w, filter->data)); - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - case 5: /* X 0208: ISO-2022-JP-2004 */ - filter->status &= ~0xf; - c1 = filter->cache; - if (c > 0x20 && c < 0x7f) { - s = (c1 - 0x21)*94 + c - 0x21; - if (s >= 0 && s < jisx0208_ucs_table_size) { - w = jisx0208_ucs_table[s]; - } - } - - if (w <= 0) { - w = MBFL_BAD_INPUT; - } - - CK((*filter->output_function)(w, filter->data)); - break; - - /* ESC: ISO-2022-JP-2004 */ -/* case 0x06: */ -/* case 0x16: */ -/* case 0x26: */ -/* case 0x86: */ -/* case 0x96: */ -/* case 0xa6: */ - case 6: - if (c == '$') { - filter->status++; - } else if (c == '(') { - filter->status += 3; - } else { - filter->status &= ~0xf; - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - /* ESC $: ISO-2022-JP-2004 */ -/* case 0x07: */ -/* case 0x17: */ -/* case 0x27: */ -/* case 0x87: */ -/* case 0x97: */ -/* case 0xa7: */ - case 7: - if (c == 'B') { /* JIS X 0208-1983 */ - filter->status = 0x80; - } else if (c == '(') { - filter->status++; - } else { - filter->status &= ~0xf; - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - /* ESC $ (: ISO-2022-JP-2004 */ -/* case 0x08: */ -/* case 0x18: */ -/* case 0x28: */ -/* case 0x88: */ -/* case 0x98: */ -/* case 0xa8: */ - case 8: - if (c == 'Q') { /* JIS X 0213 plane 1 */ - filter->status = 0x90; - } else if (c == 'P') { /* JIS X 0213 plane 2 */ - filter->status = 0xa0; - } else { - filter->status &= ~0xf; - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - /* ESC (: ISO-2022-JP-2004 */ -/* case 0x09: */ -/* case 0x19: */ -/* case 0x29: */ -/* case 0x89: */ -/* case 0x99: */ - case 9: - if (c == 'B') { - filter->status = 0; - } else { - filter->status &= ~0xf; - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - EMPTY_SWITCH_DEFAULT_CASE(); - } - - return 0; -} - -int mbfl_filt_conv_jis2004_wchar_flush(mbfl_convert_filter *filter) -{ - if (filter->status & 0xF) { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - filter->status = 0; - - if (filter->flush_function) { - return (*filter->flush_function)(filter->data); - } - - return 0; -} - -int mbfl_filt_conv_wchar_jis2004(int c, mbfl_convert_filter *filter) -{ - int k; - int c1, c2, s1, s2; - -retry: - s1 = 0; - /* check for 1st char of combining characters */ - if ((filter->status & 0xf) == 0 && ( - c == 0x00E6 || - (c >= 0x0254 && c <= 0x02E9) || - (c >= 0x304B && c <= 0x3053) || - (c >= 0x30AB && c <= 0x30C8) || - c == 0x31F7)) { - for (k = 0; k < jisx0213_u2_tbl_len; k++) { - if (c == jisx0213_u2_tbl[2*k]) { - filter->status++; - filter->cache = k; - return 0; - } - } - } - - /* check for 2nd char of combining characters */ - if ((filter->status & 0xf) == 1 && filter->cache >= 0 && filter->cache < jisx0213_u2_tbl_len) { - k = filter->cache; - filter->status &= ~0xf; - filter->cache = 0; - - c1 = jisx0213_u2_tbl[2*k]; - if ((c1 == 0x0254 || c1 == 0x028C || c1 == 0x0259 || c1 == 0x025A) && c == 0x0301) { - k++; - } - if (c == jisx0213_u2_tbl[2*k+1]) { - s1 = jisx0213_u2_key[k]; - } else { /* fallback */ - s1 = jisx0213_u2_fb_tbl[k]; - - if (filter->to->no_encoding == mbfl_no_encoding_sjis2004) { - c1 = (s1 >> 8) & 0xff; - c2 = s1 & 0xff; - SJIS_ENCODE(c1, c2, s1, s2); - } else if (filter->to->no_encoding == mbfl_no_encoding_eucjp2004) { - s2 = (s1 & 0xff) + 0x80; - s1 = ((s1 >> 8) & 0xff) + 0x80; - } else { - if (filter->status != 0x200) { - CK((*filter->output_function)(0x1b, filter->data)); - CK((*filter->output_function)('$', filter->data)); - CK((*filter->output_function)('(', filter->data)); - CK((*filter->output_function)('Q', filter->data)); - } - filter->status = 0x200; - - s2 = s1 & 0x7f; - s1 = (s1 >> 8) & 0x7f; - } - - /* Flush out cached data */ - CK((*filter->output_function)(s1, filter->data)); - CK((*filter->output_function)(s2, filter->data)); - goto retry; - } - } - - /* check for major japanese chars: U+4E00 - U+9FFF */ - if (s1 <= 0) { - for (k = 0; k < uni2jis_tbl_len; k++) { - if (c >= uni2jis_tbl_range[k][0] && c <= uni2jis_tbl_range[k][1]) { - s1 = uni2jis_tbl[k][c-uni2jis_tbl_range[k][0]]; - break; - } - } - } - - /* check for japanese chars in compressed mapping area: U+1E00 - U+4DBF */ - if (s1 <= 0 && c >= ucs_c1_jisx0213_min && c <= ucs_c1_jisx0213_max) { - k = mbfl_bisec_srch(c, ucs_c1_jisx0213_tbl, ucs_c1_jisx0213_tbl_len); - if (k >= 0) { - s1 = ucs_c1_jisx0213_ofst[k] + c - ucs_c1_jisx0213_tbl[2*k]; - } - } - - /* check for japanese chars in CJK Unified Ideographs ext.B (U+2XXXX) */ - if (s1 <= 0 && c >= jisx0213_u5_tbl_min && c <= jisx0213_u5_tbl_max) { - k = mbfl_bisec_srch2(c - 0x20000, jisx0213_u5_jis_key, jisx0213_u5_tbl_len); - if (k >= 0) { - s1 = jisx0213_u5_jis_tbl[k]; - } - } - - if (s1 <= 0) { - /* CJK Compatibility Forms: U+FE30 - U+FE4F */ - if (c == 0xfe45) { - s1 = 0x233e; - } else if (c == 0xfe46) { - s1 = 0x233d; - } else if (c >= 0xf91d && c <= 0xf9dc) { - /* CJK Compatibility Ideographs: U+F900 - U+F92A */ - k = mbfl_bisec_srch2(c, ucs_r2b_jisx0213_cmap_key, ucs_r2b_jisx0213_cmap_len); - if (k >= 0) { - s1 = ucs_r2b_jisx0213_cmap_val[k]; - } - } - } - - if (s1 <= 0) { - if (c == 0) { - s1 = 0; - } else { - s1 = -1; - } - } - - if (s1 >= 0) { - if (s1 < 0x80) { /* ASCII */ - if (filter->to->no_encoding == mbfl_no_encoding_2022jp_2004 && (filter->status & 0xff00)) { - CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ - CK((*filter->output_function)('(', filter->data)); - CK((*filter->output_function)('B', filter->data)); - } - filter->status = 0; - CK((*filter->output_function)(s1, filter->data)); - } else if (s1 < 0x100) { /* latin or kana */ - if (filter->to->no_encoding == mbfl_no_encoding_eucjp2004) { - CK((*filter->output_function)(0x8e, filter->data)); - CK((*filter->output_function)(s1, filter->data)); - } else if (filter->to->no_encoding == mbfl_no_encoding_sjis2004 && (s1 >= 0xA1 && s1 <= 0xDF)) { - CK((*filter->output_function)(s1, filter->data)); - } else { - CK(mbfl_filt_conv_illegal_output(c, filter)); - } - } else if (s1 < 0x7f00) { /* X 0213 plane 1 */ - if (filter->to->no_encoding == mbfl_no_encoding_sjis2004) { - c1 = (s1 >> 8) & 0xff; - c2 = s1 & 0xff; - SJIS_ENCODE(c1, c2, s1, s2); - } else if (filter->to->no_encoding == mbfl_no_encoding_eucjp2004) { - s2 = (s1 & 0xff) + 0x80; - s1 = ((s1 >> 8) & 0xff) + 0x80; - } else { - if ((filter->status & 0xff00) != 0x200) { - CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ - CK((*filter->output_function)('$', filter->data)); - CK((*filter->output_function)('(', filter->data)); - CK((*filter->output_function)('Q', filter->data)); - } - filter->status = 0x200; - s2 = s1 & 0xff; - s1 = (s1 >> 8) & 0xff; - } - CK((*filter->output_function)(s1, filter->data)); - CK((*filter->output_function)(s2, filter->data)); - } else { /* X 0213 plane 2 */ - if (filter->to->no_encoding == mbfl_no_encoding_sjis2004) { - c1 = (s1 >> 8) & 0xff; - c2 = s1 & 0xff; - SJIS_ENCODE(c1, c2, s1, s2); - } else { - s2 = s1 & 0xff; - k = ((s1 >> 8) & 0xff) - 0x7f; - if (k >= 0 && k < jisx0213_p2_ofst_len) { - s1 = jisx0213_p2_ofst[k] + 0x21; - } - if (filter->to->no_encoding == mbfl_no_encoding_eucjp2004) { - s2 |= 0x80; - s1 |= 0x80; - CK((*filter->output_function)(0x8f, filter->data)); - } else { - if ((filter->status & 0xff00) != 0x200) { - CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ - CK((*filter->output_function)('$', filter->data)); - CK((*filter->output_function)('(', filter->data)); - CK((*filter->output_function)('P', filter->data)); - } - filter->status = 0x200; - } - } - - CK((*filter->output_function)(s1, filter->data)); - CK((*filter->output_function)(s2, filter->data)); - } - } else { - CK(mbfl_filt_conv_illegal_output(c, filter)); - } - - return 0; -} - -int mbfl_filt_conv_wchar_jis2004_flush(mbfl_convert_filter *filter) -{ - int k, c1, c2, s1, s2; - - k = filter->cache; - filter->cache = 0; - - if (filter->status == 1 && k >= 0 && k <= jisx0213_u2_tbl_len) { - s1 = jisx0213_u2_fb_tbl[k]; - - if (filter->to->no_encoding == mbfl_no_encoding_sjis2004) { - c1 = (s1 >> 8) & 0xff; - c2 = s1 & 0xff; - SJIS_ENCODE(c1, c2, s1, s2); - } else if (filter->to->no_encoding == mbfl_no_encoding_eucjp2004) { - s2 = (s1 & 0xff) | 0x80; - s1 = ((s1 >> 8) & 0xff) | 0x80; - } else { - s2 = s1 & 0x7f; - s1 = (s1 >> 8) & 0x7f; - if ((filter->status & 0xff00) != 0x200) { - CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ - CK((*filter->output_function)('$', filter->data)); - CK((*filter->output_function)('(', filter->data)); - CK((*filter->output_function)('Q', filter->data)); - } - filter->status = 0x200; - } - - CK((*filter->output_function)(s1, filter->data)); - CK((*filter->output_function)(s2, filter->data)); - } - - /* If we had switched to a different charset, go back to ASCII mode - * This makes it possible to concatenate arbitrary valid strings - * together and get a valid string */ - if (filter->status & 0xff00) { - CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ - CK((*filter->output_function)('(', filter->data)); - CK((*filter->output_function)('B', filter->data)); - } - - filter->status = 0; - - if (filter->flush_function) { - return (*filter->flush_function)(filter->data); - } - - return 0; -} - -static size_t mb_sjis2004_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) -{ - unsigned char *p = *in, *e = p + *in_len; - uint32_t *out = buf, *limit = buf + bufsize - 1; - - while (p < e && out < limit) { - unsigned char c = *p++; - - if (c <= 0x7F) { - if (c == 0x5C) { - *out++ = 0xA5; - } else if (c == 0x7E) { - *out++ = 0x203E; - } else { - *out++ = c; - } - } else if (c >= 0xA1 && c <= 0xDF) { - *out++ = 0xFEC0 + c; - } else if (c > 0x80 && c < 0xFD && c != 0xA0) { - if (p == e) { - *out++ = MBFL_BAD_INPUT; - break; - } - unsigned char c2 = *p++; - - if (c2 < 0x40 || c2 > 0xFC || c2 == 0x7F) { - *out++ = MBFL_BAD_INPUT; - continue; - } - - unsigned int s1, s2; - SJIS_DECODE(c, c2, s1, s2); - unsigned int w1 = (s1 << 8) | s2, w = 0; - - /* Conversion for combining characters */ - if ((w1 >= 0x2477 && w1 <= 0x2479) || (w1 >= 0x2479 && w1 <= 0x247B) || (w1 >= 0x2577 && w1 <= 0x257E) || w1 == 0x2678 || w1 == 0x2B44 || (w1 >= 0x2B48 && w1 <= 0x2B4F) || (w1 >= 0x2B65 && w1 <= 0x2B66)) { - int k = mbfl_bisec_srch2(w1, jisx0213_u2_key, jisx0213_u2_tbl_len); - if (k >= 0) { - *out++ = jisx0213_u2_tbl[2*k]; - *out++ = jisx0213_u2_tbl[2*k+1]; - continue; - } - } - - /* Conversion for BMP */ - w1 = (s1 - 0x21)*94 + s2 - 0x21; - if (w1 < jisx0213_ucs_table_size) { - w = jisx0213_ucs_table[w1]; - } - - /* Conversion for CJK Unified Ideographs extension B (U+2XXXX) */ - if (!w) { - w1 = (s1 << 8) | s2; - int k = mbfl_bisec_srch2(w1, jisx0213_jis_u5_key, jisx0213_u5_tbl_len); - if (k >= 0) { - w = jisx0213_jis_u5_tbl[k] + 0x20000; - } - } - - *out++ = w ? w : MBFL_BAD_INPUT; - } else { - *out++ = MBFL_BAD_INPUT; - } - } - - *in_len = e - p; - *in = p; - return out - buf; -} - -static void mb_wchar_to_sjis2004(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) -{ - unsigned char *out, *limit; - MB_CONVERT_BUF_LOAD(buf, out, limit); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len); - - uint32_t w; - if (buf->state) { - w = buf->state; - buf->state = 0; - goto process_codepoint; - } - - while (len--) { - w = *in++; -process_codepoint: ; - unsigned int s = 0; - - if (w == 0xE6 || (w >= 0x254 && w <= 0x2E9) || (w >= 0x304B && w <= 0x3053) || (w >= 0x30AB && w <= 0x30C8) || w == 0x31F7) { - for (int k = 0; k < jisx0213_u2_tbl_len; k++) { - if (w == jisx0213_u2_tbl[2*k]) { - if (!len) { - if (!end) { - buf->state = w; - MB_CONVERT_BUF_STORE(buf, out, limit); - return; - } - } else { - uint32_t w2 = *in++; len--; - if ((w == 0x254 || w == 0x28C || w == 0x259 || w == 0x25A) && w2 == 0x301) { - k++; - } - if (w2 == jisx0213_u2_tbl[2*k+1]) { - s = jisx0213_u2_key[k]; - break; - } - in--; len++; - } - - /* Fallback */ - s = jisx0213_u2_fb_tbl[k]; - break; - } - } - } - - /* Check for major Japanese chars: U+4E00-U+9FFF */ - if (!s) { - for (int k = 0; k < uni2jis_tbl_len; k++) { - if (w >= uni2jis_tbl_range[k][0] && w <= uni2jis_tbl_range[k][1]) { - s = uni2jis_tbl[k][w - uni2jis_tbl_range[k][0]]; - break; - } - } - } - - /* Check for Japanese chars in compressed mapping area: U+1E00-U+4DBF */ - if (!s && w >= ucs_c1_jisx0213_min && w <= ucs_c1_jisx0213_max) { - int k = mbfl_bisec_srch(w, ucs_c1_jisx0213_tbl, ucs_c1_jisx0213_tbl_len); - if (k >= 0) { - s = ucs_c1_jisx0213_ofst[k] + w - ucs_c1_jisx0213_tbl[2*k]; - } - } - - /* Check for Japanese chars in CJK Unified Ideographs ext.B (U+2XXXX) */ - if (!s && w >= jisx0213_u5_tbl_min && w <= jisx0213_u5_tbl_max) { - int k = mbfl_bisec_srch2(w - 0x20000, jisx0213_u5_jis_key, jisx0213_u5_tbl_len); - if (k >= 0) { - s = jisx0213_u5_jis_tbl[k]; - } - } - - if (!s) { - /* CJK Compatibility Forms: U+FE30-U+FE4F */ - if (w == 0xFE45) { - s = 0x233E; - } else if (w == 0xFE46) { - s = 0x233D; - } else if (w >= 0xF91D && w <= 0xF9DC) { - /* CJK Compatibility Ideographs: U+F900-U+F92A */ - int k = mbfl_bisec_srch2(w, ucs_r2b_jisx0213_cmap_key, ucs_r2b_jisx0213_cmap_len); - if (k >= 0) { - s = ucs_r2b_jisx0213_cmap_val[k]; - } - } - } - - if (!s && w) { - MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_sjis2004); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len); - } else if (s <= 0xFF) { - out = mb_convert_buf_add(out, s); - } else { - unsigned int c1 = (s >> 8) & 0xFF, c2 = s & 0xFF, s1, s2; - SJIS_ENCODE(c1, c2, s1, s2); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2); - out = mb_convert_buf_add2(out, s1, s2); - } - } - - MB_CONVERT_BUF_STORE(buf, out, limit); -} - -static size_t mb_eucjp2004_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) -{ - unsigned char *p = *in, *e = p + *in_len; - uint32_t *out = buf, *limit = buf + bufsize - 1; - - while (p < e && out < limit) { - unsigned char c = *p++; - - if (c <= 0x7F) { - *out++ = c; - } else if (c >= 0xA1 && c <= 0xFE) { - /* Kanji */ - if (p == e) { - *out++ = MBFL_BAD_INPUT; - break; - } - unsigned char c2 = *p++; - if (c2 <= 0xA0 || c2 == 0xFF) { - *out++ = MBFL_BAD_INPUT; - continue; - } - - unsigned int s1 = c - 0x80, s2 = c2 - 0x80; - unsigned int w1 = (s1 << 8) | s2, w = 0; - - /* Conversion for combining characters */ - if ((w1 >= 0x2477 && w1 <= 0x2479) || (w1 >= 0x2479 && w1 <= 0x247B) || (w1 >= 0x2577 && w1 <= 0x257E) || w1 == 0x2678 || w1 == 0x2B44 || (w1 >= 0x2B48 && w1 <= 0x2B4F) || (w1 >= 0x2B65 && w1 <= 0x2B66)) { - int k = mbfl_bisec_srch2(w1, jisx0213_u2_key, jisx0213_u2_tbl_len); - if (k >= 0) { - *out++ = jisx0213_u2_tbl[2*k]; - *out++ = jisx0213_u2_tbl[2*k+1]; - continue; - } - } - - /* Conversion for BMP */ - w1 = (s1 - 0x21)*94 + s2 - 0x21; - if (w1 < jisx0213_ucs_table_size) { - w = jisx0213_ucs_table[w1]; - } - - /* Conversion for CJK Unified Ideographs ext.B (U+2XXXX) */ - if (!w) { - w1 = (s1 << 8) | s2; - int k = mbfl_bisec_srch2(w1, jisx0213_jis_u5_key, jisx0213_u5_tbl_len); - if (k >= 0) { - w = jisx0213_jis_u5_tbl[k] + 0x20000; - } - } - - *out++ = w ? w : MBFL_BAD_INPUT; - } else if (c == 0x8E && p < e) { - /* Kana */ - unsigned char c2 = *p++; - if (c2 >= 0xA1 && c2 <= 0xDF) { - *out++ = 0xFEC0 + c2; - } else { - *out++ = MBFL_BAD_INPUT; - } - } else if (c == 0x8F && p < e) { - unsigned char c2 = *p++; - if ((c2 == 0xA1 || (c2 >= 0xA3 && c2 <= 0xA5) || c2 == 0xA8 || (c2 >= 0xAC && c2 <= 0xAF) || (c2 >= 0xEE && c2 <= 0xFE)) && p < e) { - unsigned char c3 = *p++; - - if (c3 < 0xA1 || c3 == 0xFF) { - *out++ = MBFL_BAD_INPUT; - continue; - } - - unsigned int s1 = c2 - 0xA1, s2 = c3 - 0xA1; - - if (((s1 <= 4 && s1 != 1) || s1 == 7 || (s1 >= 11 && s1 <= 14) || (s1 >= 77 && s1 < 94)) && s2 < 94) { - int k; - for (k = 0; k < jisx0213_p2_ofst_len; k++) { - if (s1 == jisx0213_p2_ofst[k]) { - break; - } - } - k -= jisx0213_p2_ofst[k]; - - /* Check for Japanese chars in BMP */ - unsigned int s = (s1 + 94 + k)*94 + s2; - ZEND_ASSERT(s < jisx0213_ucs_table_size); - unsigned int w = jisx0213_ucs_table[s]; - - /* Check for Japanese chars in CJK Unified Ideographs ext B (U+2XXXX) */ - if (!w) { - k = mbfl_bisec_srch2(((c2 - 0x80 + k + 94) << 8) | (c3 - 0x80), jisx0213_jis_u5_key, jisx0213_u5_tbl_len); - if (k >= 0) { - w = jisx0213_jis_u5_tbl[k] + 0x20000; - } - } - - *out++ = w ? w : MBFL_BAD_INPUT; - } else { - *out++ = MBFL_BAD_INPUT; - } - } else { - *out++ = MBFL_BAD_INPUT; - } - } else { - *out++ = MBFL_BAD_INPUT; - } - } - - *in_len = e - p; - *in = p; - return out - buf; -} - -static void mb_wchar_to_eucjp2004(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) -{ - unsigned char *out, *limit; - MB_CONVERT_BUF_LOAD(buf, out, limit); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len); - - uint32_t w; - if (buf->state) { - w = buf->state; - buf->state = 0; - goto process_codepoint; - } - - while (len--) { - w = *in++; -process_codepoint: ; - unsigned int s = 0; - - /* Check for 1st char of combining characters */ - if (w == 0xE6 || (w >= 0x254 && w <= 0x2E9) || (w >= 0x304B && w <= 0x3053) || (w >= 0x30AB && w <= 0x30C8) || w == 0x31F7) { - for (int k = 0; k < jisx0213_u2_tbl_len; k++) { - if (w == jisx0213_u2_tbl[2*k]) { - if (!len) { - if (!end) { - buf->state = w; - MB_CONVERT_BUF_STORE(buf, out, limit); - return; - } - } else { - uint32_t w2 = *in++; len--; - if ((w == 0x254 || w == 0x28C || w == 0x259 || w == 0x25A) && w2 == 0x301) { - k++; - } - if (w2 == jisx0213_u2_tbl[2*k+1]) { - s = jisx0213_u2_key[k]; - break; - } - in--; len++; - } - - /* Fallback */ - s = jisx0213_u2_fb_tbl[k]; - break; - } - } - } - - /* Check for major Japanese chars: U+4E00-U+9FFF */ - if (!s) { - for (int k = 0; k < uni2jis_tbl_len; k++) { - if (w >= uni2jis_tbl_range[k][0] && w <= uni2jis_tbl_range[k][1]) { - s = uni2jis_tbl[k][w - uni2jis_tbl_range[k][0]]; - break; - } - } - } - - /* Check for Japanese chars in compressed mapping area: U+1E00-U+4DBF */ - if (!s && w >= ucs_c1_jisx0213_min && w <= ucs_c1_jisx0213_max) { - int k = mbfl_bisec_srch(w, ucs_c1_jisx0213_tbl, ucs_c1_jisx0213_tbl_len); - if (k >= 0) { - s = ucs_c1_jisx0213_ofst[k] + w - ucs_c1_jisx0213_tbl[2*k]; - } - } - - /* Check for Japanese chars in CJK Unified Ideographs ext.B (U+2XXXX) */ - if (!s && w >= jisx0213_u5_tbl_min && w <= jisx0213_u5_tbl_max) { - int k = mbfl_bisec_srch2(w - 0x20000, jisx0213_u5_jis_key, jisx0213_u5_tbl_len); - if (k >= 0) { - s = jisx0213_u5_jis_tbl[k]; - } - } - - if (!s) { - /* CJK Compatibility Forms: U+FE30-U+FE4F */ - if (w == 0xFE45) { - s = 0x233E; - } else if (w == 0xFE46) { - s = 0x233D; - } else if (w >= 0xF91D && w <= 0xF9DC) { - /* CJK Compatibility Ideographs: U+F900-U+F92A */ - int k = mbfl_bisec_srch2(w, ucs_r2b_jisx0213_cmap_key, ucs_r2b_jisx0213_cmap_len); - if (k >= 0) { - s = ucs_r2b_jisx0213_cmap_val[k]; - } - } - } - - if (!s && w) { - MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_eucjp2004); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len); - } else if (s <= 0x7F) { - out = mb_convert_buf_add(out, s); - } else if (s <= 0xFF) { - MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2); - out = mb_convert_buf_add2(out, 0x8E, s); - } else if (s <= 0x7EFF) { - MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2); - out = mb_convert_buf_add2(out, ((s >> 8) & 0xFF) + 0x80, (s & 0xFF) + 0x80); - } else { - unsigned int s2 = s & 0xFF; - int k = ((s >> 8) & 0xFF) - 0x7F; - ZEND_ASSERT(k < jisx0213_p2_ofst_len); - s = jisx0213_p2_ofst[k] + 0x21; - MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 3); - out = mb_convert_buf_add3(out, 0x8F, s | 0x80, s2 | 0x80); - } - } - - MB_CONVERT_BUF_STORE(buf, out, limit); -} - -#define ASCII 0 -#define JISX0208 1 -#define JISX0213_PLANE1 2 -#define JISX0213_PLANE2 3 - -static size_t mb_iso2022jp2004_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) -{ - unsigned char *p = *in, *e = p + *in_len; - uint32_t *out = buf, *limit = buf + bufsize - 1; - - while (p < e && out < limit) { - unsigned char c = *p++; - - if (c <= 0x7F) { - if (c == 0x1B) { - if ((e - p) < 2) { - *out++ = MBFL_BAD_INPUT; - p = e; - break; - } - unsigned char c2 = *p++; - unsigned char c3 = *p++; - if (c2 == '$') { - if (c3 == 'B') { - *state = JISX0208; - } else if (c3 == '(') { - if (p == e) { - *out++ = MBFL_BAD_INPUT; - break; - } - unsigned char c4 = *p++; - if (c4 == 'Q') { - *state = JISX0213_PLANE1; - } else if (c4 == 'P') { - *state = JISX0213_PLANE2; - } else { - *out++ = MBFL_BAD_INPUT; - } - } else { - *out++ = MBFL_BAD_INPUT; - } - } else if (c2 == '(') { - if (c3 == 'B') { - *state = ASCII; - } else { - *out++ = MBFL_BAD_INPUT; - } - } else { - p--; - *out++ = MBFL_BAD_INPUT; - } - } else if (*state >= JISX0208 && c > 0x20 && c < 0x7F) { - if (p == e) { - *out++ = MBFL_BAD_INPUT; - break; - } - unsigned char c2 = *p++; - if (c2 < 0x21 || c2 > 0x7E) { - *out++ = MBFL_BAD_INPUT; - continue; - } - - if (*state == JISX0213_PLANE1) { - unsigned int w1 = (c << 8) | c2; - - /* Conversion for combining characters */ - if ((w1 >= 0x2477 && w1 <= 0x2479) || (w1 >= 0x2479 && w1 <= 0x247B) || (w1 >= 0x2577 && w1 <= 0x257E) || w1 == 0x2678 || w1 == 0x2B44 || (w1 >= 0x2B48 && w1 <= 0x2B4F) || (w1 >= 0x2B65 && w1 <= 0x2B66)) { - int k = mbfl_bisec_srch2(w1, jisx0213_u2_key, jisx0213_u2_tbl_len); - if (k >= 0) { - *out++ = jisx0213_u2_tbl[2*k]; - *out++ = jisx0213_u2_tbl[2*k+1]; - continue; - } - } - - /* Conversion for BMP */ - uint32_t w = 0; - w1 = (c - 0x21)*94 + c2 - 0x21; - if (w1 < jisx0213_ucs_table_size) { - w = jisx0213_ucs_table[w1]; - } - - /* Conversion for CJK Unified Ideographs ext.B (U+2XXXX) */ - if (!w) { - int k = mbfl_bisec_srch2((c << 8) | c2, jisx0213_jis_u5_key, jisx0213_u5_tbl_len); - if (k >= 0) { - w = jisx0213_jis_u5_tbl[k] + 0x20000; - } - } - - *out++ = w ? w : MBFL_BAD_INPUT; - } else if (*state == JISX0213_PLANE2) { - - unsigned int s1 = c - 0x21, s2 = c2 - 0x21; - - if (((s1 <= 4 && s1 != 1) || s1 == 7 || (s1 >= 11 && s1 <= 14) || (s1 >= 77 && s1 < 94)) && s2 < 94) { - int k; - for (k = 0; k < jisx0213_p2_ofst_len; k++) { - if (s1 == jisx0213_p2_ofst[k]) { - break; - } - } - k -= jisx0213_p2_ofst[k]; - - /* Check for Japanese chars in BMP */ - unsigned int s = (s1 + 94 + k)*94 + s2; - ZEND_ASSERT(s < jisx0213_ucs_table_size); - uint32_t w = jisx0213_ucs_table[s]; - - /* Check for Japanese chars in CJK Unified Ideographs ext B (U+2XXXX) */ - if (!w) { - k = mbfl_bisec_srch2(((c + k + 94) << 8) | c2, jisx0213_jis_u5_key, jisx0213_u5_tbl_len); - if (k >= 0) { - w = jisx0213_jis_u5_tbl[k] + 0x20000; - } - } - - *out++ = w ? w : MBFL_BAD_INPUT; - } else { - *out++ = MBFL_BAD_INPUT; - } - } else { /* state == JISX0208 */ - unsigned int s = (c - 0x21)*94 + c2 - 0x21; - uint32_t w = 0; - if (s < jisx0208_ucs_table_size) { - w = jisx0208_ucs_table[s]; - } - *out++ = w ? w : MBFL_BAD_INPUT; - } - } else { - *out++ = c; - } - } else { - *out++ = MBFL_BAD_INPUT; - } - } - - *in_len = e - p; - *in = p; - return out - buf; -} - -static void mb_wchar_to_iso2022jp2004(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) -{ - unsigned char *out, *limit; - MB_CONVERT_BUF_LOAD(buf, out, limit); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len); - - uint32_t w; - if (buf->state & 0xFF00) { - int k = (buf->state >> 8) - 1; - w = jisx0213_u2_tbl[2*k]; - buf->state &= 0xFF; - goto process_codepoint; - } - - while (len--) { - w = *in++; -process_codepoint: ; - unsigned int s = 0; - - if (w == 0xE6 || (w >= 0x254 && w <= 0x2E9) || (w >= 0x304B && w <= 0x3053) || (w >= 0x30AB && w <= 0x30C8) || w == 0x31F7) { - for (int k = 0; k < jisx0213_u2_tbl_len; k++) { - if (w == jisx0213_u2_tbl[2*k]) { - if (!len) { - if (!end) { - buf->state |= (k+1) << 8; - MB_CONVERT_BUF_STORE(buf, out, limit); - return; - } - } else { - uint32_t w2 = *in++; len--; - if ((w == 0x254 || w == 0x28C || w == 0x259 || w == 0x25A) && w2 == 0x301) { - k++; - } - if (w2 == jisx0213_u2_tbl[2*k+1]) { - s = jisx0213_u2_key[k]; - break; - } - in--; len++; - } - - s = jisx0213_u2_fb_tbl[k]; - break; - } - } - } - - /* Check for major Japanese chars: U+4E00-U+9FFF */ - if (!s) { - for (int k = 0; k < uni2jis_tbl_len; k++) { - if (w >= uni2jis_tbl_range[k][0] && w <= uni2jis_tbl_range[k][1]) { - s = uni2jis_tbl[k][w - uni2jis_tbl_range[k][0]]; - break; - } - } - } - - /* Check for Japanese chars in compressed mapping area: U+1E00-U+4DBF */ - if (!s && w >= ucs_c1_jisx0213_min && w <= ucs_c1_jisx0213_max) { - int k = mbfl_bisec_srch(w, ucs_c1_jisx0213_tbl, ucs_c1_jisx0213_tbl_len); - if (k >= 0) { - s = ucs_c1_jisx0213_ofst[k] + w - ucs_c1_jisx0213_tbl[2*k]; - } - } - - /* Check for Japanese chars in CJK Unified Ideographs ext B (U+2XXXX) */ - if (!s && w >= jisx0213_u5_tbl_min && w <= jisx0213_u5_tbl_max) { - int k = mbfl_bisec_srch2(w - 0x20000, jisx0213_u5_jis_key, jisx0213_u5_tbl_len); - if (k >= 0) { - s = jisx0213_u5_jis_tbl[k]; - } - } - - if (!s) { - /* CJK Compatibility Forms: U+FE30-U+FE4F */ - if (w == 0xFE45) { - s = 0x233E; - } else if (w == 0xFE46) { - s = 0x233D; - } else if (w >= 0xF91D && w <= 0xF9DC) { - /* CJK Compatibility Ideographs: U+F900-U+F92A */ - int k = mbfl_bisec_srch2(w, ucs_r2b_jisx0213_cmap_key, ucs_r2b_jisx0213_cmap_len); - if (k >= 0) { - s = ucs_r2b_jisx0213_cmap_val[k]; - } - } - } - - if (!s && w) { - MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_iso2022jp2004); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len); - } else if (s <= 0x7F) { - if (buf->state != ASCII) { - MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 4); - out = mb_convert_buf_add3(out, 0x1B, '(', 'B'); - buf->state = ASCII; - } - out = mb_convert_buf_add(out, s); - } else if (s <= 0xFF) { - MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_iso2022jp2004); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len); - } else if (s <= 0x7EFF) { - if (buf->state != JISX0213_PLANE1) { - MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 6); - out = mb_convert_buf_add4(out, 0x1B, '$', '(', 'Q'); - buf->state = JISX0213_PLANE1; - } else { - MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2); - } - out = mb_convert_buf_add2(out, (s >> 8) & 0xFF, s & 0xFF); - } else { - if (buf->state != JISX0213_PLANE2) { - MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 6); - out = mb_convert_buf_add4(out, 0x1B, '$', '(', 'P'); - buf->state = JISX0213_PLANE2; - } else { - MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2); - } - unsigned int s2 = s & 0xFF; - int k = ((s >> 8) & 0xFF) - 0x7F; - ZEND_ASSERT(k < jisx0213_p2_ofst_len); - s = jisx0213_p2_ofst[k] + 0x21; - out = mb_convert_buf_add2(out, s, s2); - } - } - - if (end && buf->state != ASCII) { - MB_CONVERT_BUF_ENSURE(buf, out, limit, 3); - out = mb_convert_buf_add3(out, 0x1B, '(', 'B'); - } - - MB_CONVERT_BUF_STORE(buf, out, limit); -} diff --git a/ext/mbstring/libmbfl/filters/mbfilter_sjis_2004.h b/ext/mbstring/libmbfl/filters/mbfilter_sjis_2004.h deleted file mode 100644 index 869fd145c1ce0..0000000000000 --- a/ext/mbstring/libmbfl/filters/mbfilter_sjis_2004.h +++ /dev/null @@ -1,49 +0,0 @@ -/* - * "streamable kanji code filter and converter" - * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. - * - * LICENSE NOTICES - * - * This file is part of "streamable kanji code filter and converter", - * which is distributed under the terms of GNU Lesser General Public - * License (version 2) as published by the Free Software Foundation. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with "streamable kanji code filter and converter"; - * if not, write to the Free Software Foundation, Inc., 59 Temple Place, - * Suite 330, Boston, MA 02111-1307 USA - * - * The author of this file: - * - */ -/* - * The source code included in this files was separated from mbfilter_sjis.c - * by rui hirokawa on 15 aug 2011. - * - */ - -#ifndef MBFL_MBFILTER_SJIS_2004_H -#define MBFL_MBFILTER_SJIS_2004_H - -#include "mbfilter.h" - -extern const mbfl_encoding mbfl_encoding_sjis2004; -extern const struct mbfl_convert_vtbl vtbl_sjis2004_wchar; -extern const struct mbfl_convert_vtbl vtbl_wchar_sjis2004; - -int mbfl_filt_conv_jis2004_wchar(int c, mbfl_convert_filter *filter); -int mbfl_filt_conv_wchar_jis2004(int c, mbfl_convert_filter *filter); - -int mbfl_filt_conv_wchar_jis2004_flush(mbfl_convert_filter *filter); -int mbfl_filt_conv_jis2004_wchar_flush(mbfl_convert_filter *filter); - -#endif /* MBFL_MBFILTER_SJIS_2004_H */ - -/* - * charset=UTF-8 - */ diff --git a/ext/mbstring/libmbfl/filters/mbfilter_sjis_mac.h b/ext/mbstring/libmbfl/filters/mbfilter_sjis_mac.h deleted file mode 100644 index 58d8eb2ab03e4..0000000000000 --- a/ext/mbstring/libmbfl/filters/mbfilter_sjis_mac.h +++ /dev/null @@ -1,39 +0,0 @@ -/* - * "streamable kanji code filter and converter" - * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. - * - * LICENSE NOTICES - * - * This file is part of "streamable kanji code filter and converter", - * which is distributed under the terms of GNU Lesser General Public - * License (version 2) as published by the Free Software Foundation. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with "streamable kanji code filter and converter"; - * if not, write to the Free Software Foundation, Inc., 59 Temple Place, - * Suite 330, Boston, MA 02111-1307 USA - * - * The author of this file: - * - */ -/* - * the source code included in this files was separated from mbfilter_sjis_open.c - * by Rui Hirokawa on 25 July 2011. - * - */ - -#ifndef MBFL_MBFILTER_SJIS_MAC_H -#define MBFL_MBFILTER_SJIS_MAC_H - -#include "mbfilter.h" - -extern const mbfl_encoding mbfl_encoding_sjis_mac; -extern const struct mbfl_convert_vtbl vtbl_sjis_mac_wchar; -extern const struct mbfl_convert_vtbl vtbl_wchar_sjis_mac; - -#endif /* MBFL_MBFILTER_SJIS_MAC_H */ diff --git a/ext/mbstring/libmbfl/filters/mbfilter_sjis_mobile.h b/ext/mbstring/libmbfl/filters/mbfilter_sjis_mobile.h deleted file mode 100644 index 6085e2b5a1266..0000000000000 --- a/ext/mbstring/libmbfl/filters/mbfilter_sjis_mobile.h +++ /dev/null @@ -1,64 +0,0 @@ -/* - * "streamable kanji code filter and converter" - * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. - * - * LICENSE NOTICES - * - * This file is part of "streamable kanji code filter and converter", - * which is distributed under the terms of GNU Lesser General Public - * License (version 2) as published by the Free Software Foundation. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with "streamable kanji code filter and converter"; - * if not, write to the Free Software Foundation, Inc., 59 Temple Place, - * Suite 330, Boston, MA 02111-1307 USA - * - * The author of this file: - * - */ -/* - * the source code included in this files was separated from mbfilter_sjis_open.c - * by Rui Hirokawa on 25 July 2011. - * - */ - -#ifndef MBFL_MBFILTER_SJIS_MOBILE_H -#define MBFL_MBFILTER_SJIS_MOBILE_H - -#include "mbfilter.h" - -extern const mbfl_encoding mbfl_encoding_sjis_docomo; -extern const mbfl_encoding mbfl_encoding_sjis_kddi; -extern const mbfl_encoding mbfl_encoding_sjis_sb; - -extern const struct mbfl_convert_vtbl vtbl_sjis_docomo_wchar; -extern const struct mbfl_convert_vtbl vtbl_wchar_sjis_docomo; -extern const struct mbfl_convert_vtbl vtbl_sjis_kddi_wchar; -extern const struct mbfl_convert_vtbl vtbl_wchar_sjis_kddi; -extern const struct mbfl_convert_vtbl vtbl_sjis_sb_wchar; -extern const struct mbfl_convert_vtbl vtbl_wchar_sjis_sb; - -extern const unsigned short mbfl_docomo2uni_pua[4][3]; -extern const unsigned short mbfl_kddi2uni_pua[7][3]; -extern const unsigned short mbfl_sb2uni_pua[6][3]; -extern const unsigned short mbfl_kddi2uni_pua_b[8][3]; - -int mbfl_filt_conv_sjis_mobile_flush(mbfl_convert_filter *filter); - -int mbfilter_sjis_emoji_docomo2unicode(int s, int *snd); -int mbfilter_sjis_emoji_kddi2unicode(int s, int *snd); -int mbfilter_sjis_emoji_sb2unicode(int s, int *snd); - -int mbfilter_unicode2sjis_emoji_docomo(int c, int *s1, mbfl_convert_filter *filter); -int mbfilter_unicode2sjis_emoji_kddi(int c, int *s1, mbfl_convert_filter *filter); -int mbfilter_unicode2sjis_emoji_sb(int c, int *s1, mbfl_convert_filter *filter); - -int mbfilter_conv_map_tbl(int c, int *w, const unsigned short map[][3], int n); -int mbfilter_conv_r_map_tbl(int c, int *w, const unsigned short map[][3], int n); - -#endif /* MBFL_MBFILTER_SJIS_MOBILE_H */ diff --git a/ext/mbstring/libmbfl/filters/mbfilter_uhc.c b/ext/mbstring/libmbfl/filters/mbfilter_uhc.c deleted file mode 100644 index 8d611adb5ac3e..0000000000000 --- a/ext/mbstring/libmbfl/filters/mbfilter_uhc.c +++ /dev/null @@ -1,297 +0,0 @@ -/* - * "streamable kanji code filter and converter" - * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. - * - * LICENSE NOTICES - * - * This file is part of "streamable kanji code filter and converter", - * which is distributed under the terms of GNU Lesser General Public - * License (version 2) as published by the Free Software Foundation. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with "streamable kanji code filter and converter"; - * if not, write to the Free Software Foundation, Inc., 59 Temple Place, - * Suite 330, Boston, MA 02111-1307 USA - * - * The author of this file: - * - */ -/* - * The source code included in this files was separated from mbfilter_kr.c - * by moriyoshi koizumi on 4 dec 2002. - * - */ - -/* UHC was introduced by MicroSoft in Windows 95, and is also known as CP949. - * It is the same as EUC-KR, but with 8,822 additional characters added to - * complete all the characters in the Johab charset. */ - -#include "mbfilter.h" -#include "mbfilter_uhc.h" -#define UNICODE_TABLE_UHC_DEF -#include "unicode_table_uhc.h" - -static int mbfl_filt_conv_uhc_wchar_flush(mbfl_convert_filter *filter); -static size_t mb_uhc_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state); -static void mb_wchar_to_uhc(uint32_t *in, size_t len, mb_convert_buf *buf, bool end); - -static const unsigned char mblen_table_uhc[] = { /* 0x81-0xFE */ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 -}; - -static const char *mbfl_encoding_uhc_aliases[] = {"CP949", NULL}; - -const mbfl_encoding mbfl_encoding_uhc = { - mbfl_no_encoding_uhc, - "UHC", - "UHC", - mbfl_encoding_uhc_aliases, - mblen_table_uhc, - 0, - &vtbl_uhc_wchar, - &vtbl_wchar_uhc, - mb_uhc_to_wchar, - mb_wchar_to_uhc, - NULL -}; - -const struct mbfl_convert_vtbl vtbl_uhc_wchar = { - mbfl_no_encoding_uhc, - mbfl_no_encoding_wchar, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_uhc_wchar, - mbfl_filt_conv_uhc_wchar_flush, - NULL, -}; - -const struct mbfl_convert_vtbl vtbl_wchar_uhc = { - mbfl_no_encoding_wchar, - mbfl_no_encoding_uhc, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_wchar_uhc, - mbfl_filt_conv_common_flush, - NULL, -}; - -#define CK(statement) do { if ((statement) < 0) return (-1); } while (0) - -int mbfl_filt_conv_uhc_wchar(int c, mbfl_convert_filter *filter) -{ - switch (filter->status) { - case 0: - if (c >= 0 && c < 0x80) { /* latin */ - CK((*filter->output_function)(c, filter->data)); - } else if (c > 0x80 && c < 0xfe && c != 0xc9) { /* dbcs lead byte */ - filter->status = 1; - filter->cache = c; - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - case 1: /* dbcs second byte */ - filter->status = 0; - int c1 = filter->cache, w = 0; - - if (c1 >= 0x81 && c1 <= 0xc6 && c >= 0x41 && c <= 0xfe) { - w = (c1 - 0x81)*190 + (c - 0x41); - if (w >= 0 && w < uhc1_ucs_table_size) { - w = uhc1_ucs_table[w]; - } - } else if (c1 >= 0xc7 && c1 < 0xfe && c >= 0xa1 && c <= 0xfe) { - w = (c1 - 0xc7)*94 + (c - 0xa1); - if (w >= 0 && w < uhc3_ucs_table_size) { - w = uhc3_ucs_table[w]; - } - } - - if (w == 0) { - w = MBFL_BAD_INPUT; - } - CK((*filter->output_function)(w, filter->data)); - break; - - EMPTY_SWITCH_DEFAULT_CASE(); - } - - return 0; -} - -static int mbfl_filt_conv_uhc_wchar_flush(mbfl_convert_filter *filter) -{ - if (filter->status == 1) { - /* 2-byte character was truncated */ - filter->status = 0; - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - - if (filter->flush_function) { - (*filter->flush_function)(filter->data); - } - - return 0; -} - -int mbfl_filt_conv_wchar_uhc(int c, mbfl_convert_filter *filter) -{ - int s = 0; - - if (c >= ucs_a1_uhc_table_min && c < ucs_a1_uhc_table_max) { - s = ucs_a1_uhc_table[c - ucs_a1_uhc_table_min]; - } else if (c >= ucs_a2_uhc_table_min && c < ucs_a2_uhc_table_max) { - s = ucs_a2_uhc_table[c - ucs_a2_uhc_table_min]; - } else if (c >= ucs_a3_uhc_table_min && c < ucs_a3_uhc_table_max) { - s = ucs_a3_uhc_table[c - ucs_a3_uhc_table_min]; - } else if (c >= ucs_i_uhc_table_min && c < ucs_i_uhc_table_max) { - s = ucs_i_uhc_table[c - ucs_i_uhc_table_min]; - } else if (c >= ucs_s_uhc_table_min && c < ucs_s_uhc_table_max) { - s = ucs_s_uhc_table[c - ucs_s_uhc_table_min]; - } else if (c >= ucs_r1_uhc_table_min && c < ucs_r1_uhc_table_max) { - s = ucs_r1_uhc_table[c - ucs_r1_uhc_table_min]; - } else if (c >= ucs_r2_uhc_table_min && c < ucs_r2_uhc_table_max) { - s = ucs_r2_uhc_table[c - ucs_r2_uhc_table_min]; - } - - if (s == 0 && c != 0) { - s = -1; - } - - if (s >= 0) { - if (s < 0x80) { /* latin */ - CK((*filter->output_function)(s, filter->data)); - } else { - CK((*filter->output_function)((s >> 8) & 0xff, filter->data)); - CK((*filter->output_function)(s & 0xff, filter->data)); - } - } else { - CK(mbfl_filt_conv_illegal_output(c, filter)); - } - - return 0; -} - -static size_t mb_uhc_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) -{ - unsigned char *p = *in, *e = p + *in_len; - uint32_t *out = buf, *limit = buf + bufsize; - - e--; /* Stop the main loop 1 byte short of the end of the input */ - - while (p < e && out < limit) { - unsigned char c = *p++; - - if (c < 0x80) { - *out++ = c; - } else if (c > 0x80 && c < 0xFE) { - /* We don't need to check p < e here; it's not possible that this pointer dereference - * will be outside the input string, because of e-- above */ - unsigned char c2 = *p++; - if (c2 < 0x41 || c2 == 0xFF) { - *out++ = MBFL_BAD_INPUT; - continue; - } - unsigned int w = 0; - - if (c <= 0xC6) { - w = (c - 0x81)*190 + c2 - 0x41; - ZEND_ASSERT(w < uhc1_ucs_table_size); - w = uhc1_ucs_table[w]; - } else if (c2 >= 0xA1) { - w = (c - 0xC7)*94 + c2 - 0xA1; - ZEND_ASSERT(w < uhc3_ucs_table_size); - w = uhc3_ucs_table[w]; - if (!w) { - /* If c == 0xC9, we shouldn't have tried to read a 2-byte char at all... but it is faster - * to fix up that rare case here rather than include an extra check in the hot path */ - if (c == 0xC9) { - p--; - } - *out++ = MBFL_BAD_INPUT; - continue; - } - } - if (!w) { - w = MBFL_BAD_INPUT; - } - *out++ = w; - } else { - *out++ = MBFL_BAD_INPUT; - } - } - - /* Finish up last byte of input string if there is one */ - if (p == e && out < limit) { - unsigned char c = *p++; - *out++ = (c < 0x80) ? c : MBFL_BAD_INPUT; - } - - *in_len = e - p + 1; - *in = p; - return out - buf; -} - -static void mb_wchar_to_uhc(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) -{ - unsigned char *out, *limit; - MB_CONVERT_BUF_LOAD(buf, out, limit); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len); - - while (len--) { - uint32_t w = *in++; - unsigned int s = 0; - - if (w >= ucs_a1_uhc_table_min && w < ucs_a1_uhc_table_max) { - s = ucs_a1_uhc_table[w - ucs_a1_uhc_table_min]; - } else if (w >= ucs_a2_uhc_table_min && w < ucs_a2_uhc_table_max) { - s = ucs_a2_uhc_table[w - ucs_a2_uhc_table_min]; - } else if (w >= ucs_a3_uhc_table_min && w < ucs_a3_uhc_table_max) { - s = ucs_a3_uhc_table[w - ucs_a3_uhc_table_min]; - } else if (w >= ucs_i_uhc_table_min && w < ucs_i_uhc_table_max) { - s = ucs_i_uhc_table[w - ucs_i_uhc_table_min]; - } else if (w >= ucs_s_uhc_table_min && w < ucs_s_uhc_table_max) { - s = ucs_s_uhc_table[w - ucs_s_uhc_table_min]; - } else if (w >= ucs_r1_uhc_table_min && w < ucs_r1_uhc_table_max) { - s = ucs_r1_uhc_table[w - ucs_r1_uhc_table_min]; - } else if (w >= ucs_r2_uhc_table_min && w < ucs_r2_uhc_table_max) { - s = ucs_r2_uhc_table[w - ucs_r2_uhc_table_min]; - } - - if (!s) { - if (w == 0) { - out = mb_convert_buf_add(out, 0); - } else { - MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_uhc); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len); - } - } else if (s < 0x80) { - out = mb_convert_buf_add(out, s); - } else { - MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2); - out = mb_convert_buf_add2(out, (s >> 8) & 0xFF, s & 0xFF); - } - } - - MB_CONVERT_BUF_STORE(buf, out, limit); -} diff --git a/ext/mbstring/libmbfl/filters/mbfilter_uhc.h b/ext/mbstring/libmbfl/filters/mbfilter_uhc.h deleted file mode 100644 index 860d45eb86fe1..0000000000000 --- a/ext/mbstring/libmbfl/filters/mbfilter_uhc.h +++ /dev/null @@ -1,42 +0,0 @@ -/* - * "streamable kanji code filter and converter" - * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. - * - * LICENSE NOTICES - * - * This file is part of "streamable kanji code filter and converter", - * which is distributed under the terms of GNU Lesser General Public - * License (version 2) as published by the Free Software Foundation. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with "streamable kanji code filter and converter"; - * if not, write to the Free Software Foundation, Inc., 59 Temple Place, - * Suite 330, Boston, MA 02111-1307 USA - * - * The author of this file: - * - */ -/* - * The source code included in this files was separated from mbfilter_kr.h - * by moriyoshi koizumi on 4 dec 2002. - * - */ - -#ifndef MBFL_MBFILTER_UHC_H -#define MBFL_MBFILTER_UHC_H - -#include "mbfilter.h" - -extern const mbfl_encoding mbfl_encoding_uhc; -extern const struct mbfl_convert_vtbl vtbl_uhc_wchar; -extern const struct mbfl_convert_vtbl vtbl_wchar_uhc; - -int mbfl_filt_conv_uhc_wchar(int c, mbfl_convert_filter *filter); -int mbfl_filt_conv_wchar_uhc(int c, mbfl_convert_filter *filter); - -#endif /* MBFL_MBFILTER_UHC_H */ diff --git a/ext/mbstring/libmbfl/filters/mbfilter_utf8_mobile.c b/ext/mbstring/libmbfl/filters/mbfilter_utf8_mobile.c index 7d5fdc3e0a469..374863ce52681 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_utf8_mobile.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_utf8_mobile.c @@ -30,7 +30,7 @@ #include "mbfilter.h" #include "mbfilter_utf8_mobile.h" -#include "mbfilter_sjis_mobile.h" +#include "mbfilter_cjk.h" #include "emoji2uni.h" @@ -47,6 +47,66 @@ static void mb_wchar_to_utf8_kddi_b(uint32_t *in, size_t len, mb_convert_buf *bu static size_t mb_utf8_sb_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state); static void mb_wchar_to_utf8_sb(uint32_t *in, size_t len, mb_convert_buf *buf, bool end); +static int mbfilter_conv_map_tbl(int c, int *w, const unsigned short map[][3], int n) +{ + for (int i = 0; i < n; i++) { + if (map[i][0] <= c && c <= map[i][1]) { + *w = c - map[i][0] + map[i][2]; + return 1; + } + } + return 0; +} + +static int mbfilter_conv_r_map_tbl(int c, int *w, const unsigned short map[][3], int n) +{ + /* Convert in reverse direction */ + for (int i = 0; i < n; i++) { + if (map[i][2] <= c && c <= map[i][2] - map[i][0] + map[i][1]) { + *w = c + map[i][0] - map[i][2]; + return 1; + } + } + return 0; +} + +static const unsigned short mbfl_docomo2uni_pua[4][3] = { + {0x28c2, 0x292f, 0xe63e}, + {0x2930, 0x2934, 0xe6ac}, + {0x2935, 0x2951, 0xe6b1}, + {0x2952, 0x29db, 0xe6ce}, +}; + +static const unsigned short mbfl_kddi2uni_pua[7][3] = { + {0x26ec, 0x2838, 0xe468}, + {0x284c, 0x2863, 0xe5b5}, + {0x24b8, 0x24ca, 0xe5cd}, + {0x24cb, 0x2545, 0xea80}, + {0x2839, 0x284b, 0xeafb}, + {0x2546, 0x25c0, 0xeb0e}, + {0x25c1, 0x25c6, 0xeb89}, +}; + +static const unsigned short mbfl_kddi2uni_pua_b[8][3] = { + {0x24b8, 0x24f6, 0xec40}, + {0x24f7, 0x2573, 0xec80}, + {0x2574, 0x25b2, 0xed40}, + {0x25b3, 0x25c6, 0xed80}, + {0x26ec, 0x272a, 0xef40}, + {0x272b, 0x27a7, 0xef80}, + {0x27a8, 0x27e6, 0xf040}, + {0x27e7, 0x2863, 0xf080}, +}; + +static const unsigned short mbfl_sb2uni_pua[6][3] = { + {0x27a9, 0x2802, 0xe101}, + {0x2808, 0x2861, 0xe201}, + {0x2921, 0x297a, 0xe001}, + {0x2980, 0x29cc, 0xe301}, + {0x2a99, 0x2ae4, 0xe401}, + {0x2af8, 0x2b35, 0xe501}, +}; + extern const unsigned char mblen_table_utf8[]; static const char *mbfl_encoding_utf8_docomo_aliases[] = {"UTF-8-DOCOMO", "UTF8-DOCOMO", NULL}; @@ -298,8 +358,8 @@ int mbfl_filt_conv_wchar_utf8_mobile(int c, mbfl_convert_filter *filter) int s1, c1; if ((filter->to->no_encoding == mbfl_no_encoding_utf8_docomo && mbfilter_unicode2sjis_emoji_docomo(c, &s1, filter) > 0 && mbfilter_conv_map_tbl(s1, &c1, mbfl_docomo2uni_pua, 4) > 0) || - (filter->to->no_encoding == mbfl_no_encoding_utf8_kddi_a && mbfilter_unicode2sjis_emoji_kddi(c, &s1, filter) > 0 && mbfilter_conv_map_tbl(s1, &c1, mbfl_kddi2uni_pua, 7) > 0) || - (filter->to->no_encoding == mbfl_no_encoding_utf8_kddi_b && mbfilter_unicode2sjis_emoji_kddi(c, &s1, filter) > 0 && mbfilter_conv_map_tbl(s1, &c1, mbfl_kddi2uni_pua_b, 8) > 0) || + (filter->to->no_encoding == mbfl_no_encoding_utf8_kddi_a && mbfilter_unicode2sjis_emoji_kddi_sjis(c, &s1, filter) > 0 && mbfilter_conv_map_tbl(s1, &c1, mbfl_kddi2uni_pua, 7) > 0) || + (filter->to->no_encoding == mbfl_no_encoding_utf8_kddi_b && mbfilter_unicode2sjis_emoji_kddi_sjis(c, &s1, filter) > 0 && mbfilter_conv_map_tbl(s1, &c1, mbfl_kddi2uni_pua_b, 8) > 0) || (filter->to->no_encoding == mbfl_no_encoding_utf8_sb && mbfilter_unicode2sjis_emoji_sb(c, &s1, filter) > 0 && mbfilter_conv_map_tbl(s1, &c1, mbfl_sb2uni_pua, 6) > 0)) { c = c1; } diff --git a/ext/mbstring/libmbfl/filters/unicode_table_cp932_ext.h b/ext/mbstring/libmbfl/filters/unicode_table_cp932_ext.h index 8b1efdd0bed55..66944b09f128e 100644 --- a/ext/mbstring/libmbfl/filters/unicode_table_cp932_ext.h +++ b/ext/mbstring/libmbfl/filters/unicode_table_cp932_ext.h @@ -25,8 +25,6 @@ #ifndef UNICODE_TABLE_CP932_EXT_H #define UNICODE_TABLE_CP932_EXT_H -#ifdef UNICODE_TABLE_CP932_DEF - const unsigned short cp932ext1_ucs_table[] = { /* ku 13 */ 0x2460,0x2461,0x2462,0x2463,0x2464,0x2465,0x2466,0x2467, @@ -169,19 +167,4 @@ const unsigned short cp932ext3_ucs_table[] = { const int cp932ext3_ucs_table_min = (115 - 1)*94; const int cp932ext3_ucs_table_max = (115 - 1)*94 + (sizeof (cp932ext3_ucs_table) / sizeof (unsigned short)); -#else - -extern const unsigned short cp932ext1_ucs_table[]; -extern const unsigned short cp932ext2_ucs_table[]; -extern const unsigned short cp932ext3_ucs_table[]; - -extern const int cp932ext1_ucs_table_min; -extern const int cp932ext1_ucs_table_max; -extern const int cp932ext2_ucs_table_min; -extern const int cp932ext2_ucs_table_max; -extern const int cp932ext3_ucs_table_min; -extern const int cp932ext3_ucs_table_max; - -#endif - #endif /* UNICODE_TABLE_CP932_EXT_H */ diff --git a/ext/mbstring/libmbfl/filters/unicode_table_cp936.h b/ext/mbstring/libmbfl/filters/unicode_table_cp936.h index c225c586ffb35..1d739cce29f6a 100644 --- a/ext/mbstring/libmbfl/filters/unicode_table_cp936.h +++ b/ext/mbstring/libmbfl/filters/unicode_table_cp936.h @@ -19,17 +19,11 @@ * Suite 330, Boston, MA 02111-1307 USA * * The author of this file: Rui Hirokawa - * */ #ifndef UNICODE_TABLE_CP936_H #define UNICODE_TABLE_CP936_H -/* - * Unicode table - */ -#ifdef UNICODE_TABLE_CP936_DEF - /* CP936 -> Unicode, but without PUA codepoints used in CP936 and GB18030 */ const unsigned short cp936_ucs_table[] = { /* 0x8140 */ @@ -6634,41 +6628,4 @@ static const unsigned short mbfl_cp936_pua_tbl[][3] = { static const int mbfl_cp936_pua_tbl_max = sizeof(mbfl_cp936_pua_tbl)/(sizeof(unsigned short)*3); -#else - -extern const unsigned short cp936_ucs_table[]; -extern const unsigned short cp936_pua_tbl1[]; -extern const unsigned short cp936_pua_tbl2[]; -extern const unsigned short cp936_pua_tbl3[]; - -extern const unsigned short ucs_a1_cp936_table[]; -extern const unsigned short ucs_a2_cp936_table[]; -extern const unsigned short ucs_a3_cp936_table[]; -extern const unsigned short ucs_i_cp936_table[]; -extern const unsigned short ucs_cf_cp936_table[]; -extern const unsigned short ucs_sfv_cp936_table[]; - -extern const unsigned short ucs_ci_s_cp936_table[]; -extern const unsigned short ucs_hff_s_cp936_table[]; - -extern const int cp936_ucs_table_size; -extern const int ucs_a1_cp936_table_min; -extern const int ucs_a1_cp936_table_max; -extern const int ucs_a2_cp936_table_min; -extern const int ucs_a2_cp936_table_max; -extern const int ucs_a3_cp936_table_min; -extern const int ucs_a3_cp936_table_max; -extern const int ucs_i_cp936_table_min; -extern const int ucs_i_cp936_table_max; -extern const int ucs_ci_cp936_table_min; -extern const int ucs_ci_cp936_table_max; -extern const int ucs_cf_cp936_table_min; -extern const int ucs_cf_cp936_table_max; -extern const int ucs_sfv_cp936_table_min; -extern const int ucs_sfv_cp936_table_max; -extern const int ucs_hff_cp936_table_min; -extern const int ucs_hff_cp936_table_max; - -#endif - #endif /* UNICODE_TABLE_CP936_H */ diff --git a/ext/mbstring/libmbfl/filters/unicode_table_jis.h b/ext/mbstring/libmbfl/filters/unicode_table_jis.h index 04e6a63b9e274..de4a325ab9935 100644 --- a/ext/mbstring/libmbfl/filters/unicode_table_jis.h +++ b/ext/mbstring/libmbfl/filters/unicode_table_jis.h @@ -21,17 +21,11 @@ * The authors of this file: PHP3 internationalization team * You can contact the primary authors; 金本 茂 , * Tsukada Takuya . - * */ #ifndef UNICODE_TABLE_JIS_H #define UNICODE_TABLE_JIS_H -#ifdef UNICODE_TABLE_JIS_DEF - -/* - * Unicode table - */ const unsigned short jisx0208_ucs_table[] = { /* ku 1 */ 0x3000,0x3001,0x3002,0xFF0C,0xFF0E,0x30FB,0xFF1A,0xFF1B, @@ -5846,27 +5840,4 @@ const unsigned short ucs_r_jis_table[] = { int ucs_r_jis_table_min = 0xFF00; int ucs_r_jis_table_max = 0xFF00 + (sizeof (ucs_r_jis_table) / sizeof (unsigned short)); -#else - -extern const unsigned short jisx0208_ucs_table[]; -extern const unsigned short jisx0212_ucs_table[]; -extern const unsigned short ucs_a1_jis_table[]; -extern const unsigned short ucs_a2_jis_table[]; -extern const unsigned short ucs_i_jis_table[]; -extern const unsigned short ucs_r_jis_table[]; - -extern const int jisx0208_ucs_table_size; -extern const int jisx0212_ucs_table_size; -extern const int ucs_a1_jis_table_min; -extern const int ucs_a1_jis_table_max; -extern const int ucs_a2_jis_table_min; -extern const int ucs_a2_jis_table_max; -extern const int ucs_i_jis_table_min; -extern const int ucs_i_jis_table_max; -extern int ucs_r_jis_table_min; -extern int ucs_r_jis_table_max; - -#endif - - #endif /* UNICODE_TABLE_JIS_H */ diff --git a/ext/mbstring/libmbfl/filters/unicode_table_uhc.h b/ext/mbstring/libmbfl/filters/unicode_table_uhc.h index 737d7921619eb..ef45c8547acc3 100644 --- a/ext/mbstring/libmbfl/filters/unicode_table_uhc.h +++ b/ext/mbstring/libmbfl/filters/unicode_table_uhc.h @@ -25,11 +25,6 @@ #ifndef UNICODE_TABLE_UHC_H #define UNICODE_TABLE_UHC_H -/* - * Unicode table - */ -#ifdef UNICODE_TABLE_UHC_DEF - const unsigned short uhc1_ucs_table[] = { 0xac02,0xac03,0xac05,0xac06,0xac0b,0xac0c,0xac0d,0xac0e, 0xac0f,0xac18,0xac1e,0xac1f,0xac21,0xac22,0xac23,0xac25, @@ -7178,42 +7173,4 @@ const unsigned short ucs_r2_uhc_table[] = { const int ucs_r2_uhc_table_min = 0xff00; const int ucs_r2_uhc_table_max = 0xff00 + (sizeof (ucs_r2_uhc_table) / sizeof (unsigned short)); -#else - -extern const unsigned short uhc1_ucs_table[]; -extern const unsigned short uhc2_ucs_table[]; -extern const unsigned short uhc3_ucs_table[]; -extern const unsigned short ucs_a1_uhc_table[]; -extern const unsigned short ucs_a2_uhc_table[]; -extern const unsigned short ucs_a3_uhc_table[]; -extern const unsigned short ucs_i_uhc_table[]; -extern const unsigned short ucs_s_uhc_table[]; -extern const unsigned short ucs_r1_uhc_table[]; -extern const unsigned short ucs_r2_uhc_table[]; - -extern const int uhc1_ucs_table_size; -extern const int uhc2_ucs_table_size; -extern const int uhc3_ucs_table_size; -extern const int ucs_a1_uhc_table_min; -extern const int ucs_a1_uhc_table_max; -extern const int ucs_a2_uhc_table_min; -extern const int ucs_a2_uhc_table_max; -extern const int ucs_a3_uhc_table_min; -extern const int ucs_a3_uhc_table_max; -extern const int ucs_i_uhc_table_min; -extern const int ucs_i_uhc_table_max; -extern const int ucs_s_uhc_table_min; -extern const int ucs_s_uhc_table_max; -extern const int ucs_r1_uhc_table_min; -extern const int ucs_r1_uhc_table_max; -extern const int ucs_r2_uhc_table_min; -extern const int ucs_r2_uhc_table_max; - - - - -#endif - - - #endif /* UNICODE_TABLE_UHC_H */ diff --git a/ext/mbstring/libmbfl/mbfl/mbfl_convert.c b/ext/mbstring/libmbfl/mbfl/mbfl_convert.c index edad3a3b57599..47d7980d5492e 100644 --- a/ext/mbstring/libmbfl/mbfl/mbfl_convert.c +++ b/ext/mbstring/libmbfl/mbfl/mbfl_convert.c @@ -36,30 +36,8 @@ #include "mbfilter_8bit.h" #include "mbfilter_wchar.h" -#include "filters/mbfilter_euc_cn.h" -#include "filters/mbfilter_hz.h" -#include "filters/mbfilter_euc_tw.h" -#include "filters/mbfilter_big5.h" -#include "filters/mbfilter_uhc.h" -#include "filters/mbfilter_euc_kr.h" -#include "filters/mbfilter_iso2022_kr.h" -#include "filters/mbfilter_sjis.h" -#include "filters/mbfilter_sjis_2004.h" -#include "filters/mbfilter_sjis_mobile.h" -#include "filters/mbfilter_sjis_mac.h" -#include "filters/mbfilter_cp51932.h" -#include "filters/mbfilter_jis.h" -#include "filters/mbfilter_iso2022_jp_ms.h" -#include "filters/mbfilter_iso2022jp_2004.h" -#include "filters/mbfilter_iso2022jp_mobile.h" -#include "filters/mbfilter_euc_jp.h" -#include "filters/mbfilter_euc_jp_2004.h" -#include "filters/mbfilter_euc_jp_win.h" -#include "filters/mbfilter_gb18030.h" -#include "filters/mbfilter_cp932.h" -#include "filters/mbfilter_cp936.h" -#include "filters/mbfilter_cp5022x.h" #include "filters/mbfilter_base64.h" +#include "filters/mbfilter_cjk.h" #include "filters/mbfilter_qprint.h" #include "filters/mbfilter_uuencode.h" #include "filters/mbfilter_7bit.h" diff --git a/ext/mbstring/libmbfl/mbfl/mbfl_encoding.c b/ext/mbstring/libmbfl/mbfl/mbfl_encoding.c index 1d44756ee051a..d78e4763b4878 100644 --- a/ext/mbstring/libmbfl/mbfl/mbfl_encoding.c +++ b/ext/mbstring/libmbfl/mbfl/mbfl_encoding.c @@ -39,30 +39,8 @@ #include "mbfilter_pass.h" #include "mbfilter_8bit.h" -#include "filters/mbfilter_euc_cn.h" -#include "filters/mbfilter_hz.h" -#include "filters/mbfilter_euc_tw.h" -#include "filters/mbfilter_big5.h" -#include "filters/mbfilter_uhc.h" -#include "filters/mbfilter_euc_kr.h" -#include "filters/mbfilter_iso2022_kr.h" -#include "filters/mbfilter_sjis.h" -#include "filters/mbfilter_sjis_mobile.h" -#include "filters/mbfilter_sjis_mac.h" -#include "filters/mbfilter_sjis_2004.h" -#include "filters/mbfilter_cp51932.h" -#include "filters/mbfilter_jis.h" -#include "filters/mbfilter_iso2022_jp_ms.h" -#include "filters/mbfilter_iso2022jp_2004.h" -#include "filters/mbfilter_iso2022jp_mobile.h" -#include "filters/mbfilter_euc_jp.h" -#include "filters/mbfilter_euc_jp_win.h" -#include "filters/mbfilter_euc_jp_2004.h" -#include "filters/mbfilter_gb18030.h" -#include "filters/mbfilter_cp932.h" -#include "filters/mbfilter_cp936.h" -#include "filters/mbfilter_cp5022x.h" #include "filters/mbfilter_base64.h" +#include "filters/mbfilter_cjk.h" #include "filters/mbfilter_qprint.h" #include "filters/mbfilter_uuencode.h" #include "filters/mbfilter_7bit.h" diff --git a/ext/mbstring/tests/cp932_encoding.phpt b/ext/mbstring/tests/cp932_encoding.phpt index df700f20286a5..b7bfee7496ccb 100644 --- a/ext/mbstring/tests/cp932_encoding.phpt +++ b/ext/mbstring/tests/cp932_encoding.phpt @@ -142,6 +142,8 @@ convertInvalidString("\xEA", "%", "SJIS-win", "UTF-8"); convertInvalidString("\x81\x20", "%", "SJIS-win", "UTF-8"); convertInvalidString("\xEA\xA9", "%", "SJIS-win", "UTF-8"); +echo 'mb_strlen("\x80\x81", "CP932") == ' . mb_strlen("\x80\x81", "CP932") . PHP_EOL; + echo "Done!\n"; ?> --EXPECT-- @@ -151,4 +153,5 @@ Unicode -> CP932 conversion works on all invalid codepoints SJIS-win verification and conversion works on all valid characters SJIS-win verification and conversion works on all invalid characters Unicode -> SJIS-win conversion works on all invalid codepoints +mb_strlen("\x80\x81", "CP932") == 2 Done! From 73633bf1c30c3723b5be35031dce29861b30aea8 Mon Sep 17 00:00:00 2001 From: Alex Dowad Date: Tue, 18 Apr 2023 15:01:42 +0200 Subject: [PATCH 056/168] Optimize conversion of SJIS-2004 text to Unicode Conversion of SJIS-2004 text to UTF-8 using `mb_convert_encoding` is now about 60% faster than before. (Many other mbstring functions will also be faster now on SJIS-2004 text.) --- ext/mbstring/libmbfl/filters/mbfilter_cjk.c | 50 ++++------ .../libmbfl/filters/unicode_table_jis2004.h | 94 ++++++++++--------- 2 files changed, 72 insertions(+), 72 deletions(-) diff --git a/ext/mbstring/libmbfl/filters/mbfilter_cjk.c b/ext/mbstring/libmbfl/filters/mbfilter_cjk.c index 063274422007b..4bc89a3d98591 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_cjk.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_cjk.c @@ -1651,7 +1651,6 @@ static int mbfl_filt_conv_jis2004_wchar(int c, mbfl_convert_filter *filter) /* conversion for CJK Unified Ideographs ext.B (U+2XXXX) */ if (w <= 0) { - w1 = (s1 << 8) | s2; k = mbfl_bisec_srch2(w1, jisx0213_jis_u5_key, jisx0213_u5_tbl_len); if (k >= 0) { w = jisx0213_jis_u5_tbl[k] + 0x20000; @@ -1718,8 +1717,7 @@ static int mbfl_filt_conv_jis2004_wchar(int c, mbfl_convert_filter *filter) /* check for japanese chars in CJK Unified Ideographs ext.B (U+2XXXX) */ if (w <= 0) { - w1 = ((c1 + k + 94) << 8) | c2; - k = mbfl_bisec_srch2(w1, jisx0213_jis_u5_key, jisx0213_u5_tbl_len); + k = mbfl_bisec_srch2(s, jisx0213_jis_u5_key, jisx0213_u5_tbl_len); if (k >= 0) { w = jisx0213_jis_u5_tbl[k] + 0x20000; } @@ -2165,7 +2163,7 @@ static size_t mb_iso2022jp2004_to_wchar(unsigned char **in, size_t *in_len, uint /* Conversion for CJK Unified Ideographs ext.B (U+2XXXX) */ if (!w) { - int k = mbfl_bisec_srch2((c << 8) | c2, jisx0213_jis_u5_key, jisx0213_u5_tbl_len); + int k = mbfl_bisec_srch2(w1, jisx0213_jis_u5_key, jisx0213_u5_tbl_len); if (k >= 0) { w = jisx0213_jis_u5_tbl[k] + 0x20000; } @@ -2192,7 +2190,7 @@ static size_t mb_iso2022jp2004_to_wchar(unsigned char **in, size_t *in_len, uint /* Check for Japanese chars in CJK Unified Ideographs ext B (U+2XXXX) */ if (!w) { - k = mbfl_bisec_srch2(((c + k + 94) << 8) | c2, jisx0213_jis_u5_key, jisx0213_u5_tbl_len); + k = mbfl_bisec_srch2(s, jisx0213_jis_u5_key, jisx0213_u5_tbl_len); if (k >= 0) { w = jisx0213_jis_u5_tbl[k] + 0x20000; } @@ -7142,25 +7140,17 @@ static size_t mb_sjis2004_to_wchar(unsigned char **in, size_t *in_len, uint32_t } } else if (c >= 0xA1 && c <= 0xDF) { *out++ = 0xFEC0 + c; - } else if (c > 0x80 && c < 0xFD && c != 0xA0) { + } else { if (p == e) { *out++ = MBFL_BAD_INPUT; break; } unsigned char c2 = *p++; - - if (c2 < 0x40 || c2 > 0xFC || c2 == 0x7F) { - *out++ = MBFL_BAD_INPUT; - continue; - } - - unsigned int s1, s2; - SJIS_DECODE(c, c2, s1, s2); - unsigned int w1 = (s1 << 8) | s2, w = 0; + uint32_t w1 = sjis_mobile_decode_tbl1[c] + sjis_decode_tbl2[c2]; /* Conversion for combining characters */ - if ((w1 >= 0x2477 && w1 <= 0x2479) || (w1 >= 0x2479 && w1 <= 0x247B) || (w1 >= 0x2577 && w1 <= 0x257E) || w1 == 0x2678 || w1 == 0x2B44 || (w1 >= 0x2B48 && w1 <= 0x2B4F) || (w1 >= 0x2B65 && w1 <= 0x2B66)) { - int k = mbfl_bisec_srch2(w1, jisx0213_u2_key, jisx0213_u2_tbl_len); + if (w1 >= 0x0170 && w1 <= 0x03F1) { + int k = mbfl_bisec_srch2(w1, jisx0213_u2_key_b, jisx0213_u2_tbl_len); if (k >= 0) { *out++ = jisx0213_u2_tbl[2*k]; *out++ = jisx0213_u2_tbl[2*k+1]; @@ -7169,23 +7159,24 @@ static size_t mb_sjis2004_to_wchar(unsigned char **in, size_t *in_len, uint32_t } /* Conversion for BMP */ - w1 = (s1 - 0x21)*94 + s2 - 0x21; if (w1 < jisx0213_ucs_table_size) { - w = jisx0213_ucs_table[w1]; + uint32_t w = jisx0213_ucs_table[w1]; + if (w) { + *out++ = w; + continue; + } } /* Conversion for CJK Unified Ideographs extension B (U+2XXXX) */ - if (!w) { - w1 = (s1 << 8) | s2; - int k = mbfl_bisec_srch2(w1, jisx0213_jis_u5_key, jisx0213_u5_tbl_len); - if (k >= 0) { - w = jisx0213_jis_u5_tbl[k] + 0x20000; + int k = mbfl_bisec_srch2(w1, jisx0213_jis_u5_key, jisx0213_u5_tbl_len); + if (k >= 0) { + *out++ = jisx0213_jis_u5_tbl[k] + 0x20000; + } else { + if (c == 0x80 || c == 0xA0 || c >= 0xFD) { + p--; } + *out++ = MBFL_BAD_INPUT; } - - *out++ = w ? w : MBFL_BAD_INPUT; - } else { - *out++ = MBFL_BAD_INPUT; } } @@ -9174,7 +9165,6 @@ static size_t mb_eucjp2004_to_wchar(unsigned char **in, size_t *in_len, uint32_t /* Conversion for CJK Unified Ideographs ext.B (U+2XXXX) */ if (!w) { - w1 = (s1 << 8) | s2; int k = mbfl_bisec_srch2(w1, jisx0213_jis_u5_key, jisx0213_u5_tbl_len); if (k >= 0) { w = jisx0213_jis_u5_tbl[k] + 0x20000; @@ -9218,7 +9208,7 @@ static size_t mb_eucjp2004_to_wchar(unsigned char **in, size_t *in_len, uint32_t /* Check for Japanese chars in CJK Unified Ideographs ext B (U+2XXXX) */ if (!w) { - k = mbfl_bisec_srch2(((c2 - 0x80 + k + 94) << 8) | (c3 - 0x80), jisx0213_jis_u5_key, jisx0213_u5_tbl_len); + k = mbfl_bisec_srch2(s, jisx0213_jis_u5_key, jisx0213_u5_tbl_len); if (k >= 0) { w = jisx0213_jis_u5_tbl[k] + 0x20000; } diff --git a/ext/mbstring/libmbfl/filters/unicode_table_jis2004.h b/ext/mbstring/libmbfl/filters/unicode_table_jis2004.h index 965a567b4b7d7..0b16114f6584d 100644 --- a/ext/mbstring/libmbfl/filters/unicode_table_jis2004.h +++ b/ext/mbstring/libmbfl/filters/unicode_table_jis2004.h @@ -4888,44 +4888,45 @@ static const unsigned short jisx0213_jis_u5_tbl[] = { 0x2218,0xA38C,0xA437,0xA5F1,0xA602,0xA61A,0xA6B2,}; static const unsigned short jisx0213_jis_u5_key[] = { -0x2E22,0x2F42,0x2F4C,0x2F60,0x2F7B,0x4F54,0x4F63,0x4F6E, -0x753A,0x7572,0x7629,0x7632,0x7660,0x776C,0x787E,0x7929, -0x7947,0x7954,0x796E,0x7A5D,0x7B33,0x7B49,0x7B6C,0x7C49, -0x7C51,0x7E66,0x7F21,0x7F2B,0x7F2E,0x7F36,0x7F46,0x7F70, -0x7F77,0x7F79,0x8030,0x8037,0x8038,0x803A,0x803B,0x803F, -0x8040,0x8045,0x8048,0x804A,0x804B,0x805B,0x8066,0x806C, -0x8122,0x8125,0x8127,0x8131,0x8132,0x8138,0x813F,0x8141, -0x814A,0x8152,0x8153,0x8159,0x815C,0x8177,0x822A,0x8231, -0x8232,0x823A,0x823D,0x8259,0x825C,0x825E,0x8263,0x826A, -0x826B,0x8272,0x8274,0x8275,0x8325,0x8332,0x833E,0x8344, -0x8347,0x8355,0x8356,0x837E,0x8422,0x842B,0x8430,0x8450, -0x8465,0x846D,0x8472,0x8524,0x8529,0x852A,0x8532,0x8534, -0x8535,0x8539,0x8556,0x857D,0x8623,0x8624,0x863A,0x863C, -0x863D,0x8642,0x8643,0x8644,0x8647,0x8649,0x8655,0x8656, -0x8657,0x865B,0x8677,0x8678,0x872A,0x873F,0x8740,0x8742, -0x8743,0x874E,0x8759,0x8761,0x8769,0x876A,0x8770,0x8775, -0x8823,0x8834,0x8849,0x885C,0x885E,0x885F,0x8860,0x8932, -0x8947,0x894D,0x8961,0x8964,0x8A22,0x8A33,0x8A39,0x8A53, -0x8A7B,0x8B2E,0x8B30,0x8B35,0x8B44,0x8B5D,0x8B61,0x8B66, -0x8B69,0x8B75,0x8B77,0x8B7A,0x8C21,0x8C23,0x8C24,0x8C28, -0x8C2C,0x8C3D,0x8C48,0x8C5B,0x8C75,0x8C76,0x8D32,0x8D3D, -0x8D3E,0x8D40,0x8D52,0x8D5D,0x8D5E,0x8D73,0x8D74,0x8D75, -0x8D77,0x8D7B,0x8D7D,0x8E22,0x8E24,0x8E27,0x8E2E,0x8E2F, -0x8E34,0x8E35,0x8E3D,0x8E42,0x8E4F,0x8E69,0x8E6B,0x8E72, -0x8E75,0x8E79,0x8F35,0x8F3A,0x8F46,0x8F56,0x8F58,0x8F5A, -0x8F5D,0x8F5F,0x8F63,0x8F6A,0x8F70,0x8F73,0x9044,0x904E, -0x905D,0x9075,0x907E,0x9121,0x9122,0x9133,0x9136,0x9164, -0x9165,0x916B,0x916E,0x9173,0x9229,0x922A,0x922C,0x9234, -0x923C,0x923E,0x9242,0x9256,0x9263,0x9277,0x9279,0x927A, -0x9325,0x932F,0x9332,0x9339,0x9342,0x9348,0x9359,0x935E, -0x9366,0x936B,0x937A,0x937E,0x9421,0x942C,0x942F,0x944F, -0x9450,0x9457,0x9465,0x9466,0x9471,0x9472,0x947E,0x9521, -0x952C,0x952D,0x9536,0x9537,0x953D,0x953E,0x954E,0x954F, -0x9557,0x955A,0x955C,0x955D,0x9561,0x9565,0x9567,0x9569, -0x9571,0x9622,0x9623,0x9638,0x9642,0x964C,0x9656,0x9659, -0x965D,0x9676,0x972C,0x974B,0x974C,0x9759,0x975B,0x975D, -0x9767,0x976D,0x9770,0x9825,0x9829,0x982B,0x9832,0x9835, -0x9853,0x9858,0x985A,0x986E,0x9870,0x9872,0x9876,}; +0x04C7,0x0545,0x054F,0x0563,0x057E,0x1117,0x1126,0x1131, +0x1EF1,0x1F29,0x1F3E,0x1F47,0x1F75,0x1FDF,0x204F,0x2058, +0x2076,0x2083,0x209D,0x20EA,0x211E,0x2134,0x2157,0x2192, +0x219A,0x226B,0x2284,0x228E,0x2291,0x2299,0x22A9,0x22D3, +0x22DA,0x22DC,0x22F1,0x22F8,0x22F9,0x22FB,0x22FC,0x2300, +0x2301,0x2306,0x2309,0x230B,0x230C,0x231C,0x2327,0x232D, +0x2341,0x2344,0x2346,0x2350,0x2351,0x2357,0x235E,0x2360, +0x2369,0x2371,0x2372,0x2378,0x237B,0x2396,0x23A7,0x23AE, +0x23AF,0x23B7,0x23BA,0x23D6,0x23D9,0x23DB,0x23E0,0x23E7, +0x23E8,0x23EF,0x23F1,0x23F2,0x2400,0x240D,0x2419,0x241F, +0x2422,0x2430,0x2431,0x2459,0x245B,0x2464,0x2469,0x2489, +0x249E,0x24A6,0x24AB,0x24BB,0x24C0,0x24C1,0x24C9,0x24CB, +0x24CC,0x24D0,0x24ED,0x2514,0x2518,0x2519,0x252F,0x2531, +0x2532,0x2537,0x2538,0x2539,0x253C,0x253E,0x254A,0x254B, +0x254C,0x2550,0x256C,0x256D,0x257D,0x2592,0x2593,0x2595, +0x2596,0x25A1,0x25AC,0x25B4,0x25BC,0x25BD,0x25C3,0x25C8, +0x25D4,0x25E5,0x25FA,0x260D,0x260F,0x2610,0x2611,0x2641, +0x2656,0x265C,0x2670,0x2673,0x268F,0x26A0,0x26A6,0x26C0, +0x26E8,0x26F9,0x26FB,0x2700,0x270F,0x2728,0x272C,0x2731, +0x2734,0x2740,0x2742,0x2745,0x274A,0x274C,0x274D,0x2751, +0x2755,0x2766,0x2771,0x2784,0x279E,0x279F,0x27B9,0x27C4, +0x27C5,0x27C7,0x27D9,0x27E4,0x27E5,0x27FA,0x27FB,0x27FC, +0x27FE,0x2802,0x2804,0x2807,0x2809,0x280C,0x2813,0x2814, +0x2819,0x281A,0x2822,0x2827,0x2834,0x284E,0x2850,0x2857, +0x285A,0x285E,0x2878,0x287D,0x2889,0x2899,0x289B,0x289D, +0x28A0,0x28A2,0x28A6,0x28AD,0x28B3,0x28B6,0x28E5,0x28EF, +0x28FE,0x2916,0x291F,0x2920,0x2921,0x2932,0x2935,0x2963, +0x2964,0x296A,0x296D,0x2972,0x2986,0x2987,0x2989,0x2991, +0x2999,0x299B,0x299F,0x29B3,0x29C0,0x29D4,0x29D6,0x29D7, +0x29E0,0x29EA,0x29ED,0x29F4,0x29FD,0x2A03,0x2A14,0x2A19, +0x2A21,0x2A26,0x2A35,0x2A39,0x2A3A,0x2A45,0x2A48,0x2A68, +0x2A69,0x2A70,0x2A7E,0x2A7F,0x2A8A,0x2A8B,0x2A97,0x2A98, +0x2AA3,0x2AA4,0x2AAD,0x2AAE,0x2AB4,0x2AB5,0x2AC5,0x2AC6, +0x2ACE,0x2AD1,0x2AD3,0x2AD4,0x2AD8,0x2ADC,0x2ADE,0x2AE0, +0x2AE8,0x2AF7,0x2AF8,0x2B0D,0x2B17,0x2B21,0x2B2B,0x2B2E, +0x2B32,0x2B4B,0x2B5F,0x2B7E,0x2B7F,0x2B8C,0x2B8E,0x2B90, +0x2B9A,0x2BA0,0x2BA3,0x2BB6,0x2BBA,0x2BBC,0x2BC3,0x2BC6, +0x2BE4,0x2BE9,0x2BEB,0x2BFF,0x2C01,0x2C03,0x2C07 +}; static const unsigned short jisx0213_u5_jis_tbl[] = { 0x2E22,0x7F21,0x7F2B,0x7F2E,0x7F36,0x7F46,0x7F70,0x7F79, @@ -5016,7 +5017,15 @@ static const unsigned short jisx0213_u2_key[] = { 0x2477,0x2478,0x2479,0x247A,0x247B,0x2577,0x2578,0x2579, 0x257A,0x257B,0x257C,0x257D,0x257E,0x2678,0x2B44,0x2B48, 0x2B49,0x2B4A,0x2B4B,0x2B4C,0x2B4D,0x2B4E,0x2B4F,0x2B65, - 0x2B66}; + 0x2B66 +}; + +static const unsigned short jisx0213_u2_key_b[] = { + 0x0170,0x0171,0x0172,0x0173,0x0174,0x01CE,0x01CF,0x01D0, + 0x01D1,0x01D2,0x01D3,0x01D4,0x01D5,0x022D,0x03CF,0x03D3, + 0x03D4,0x03D5,0x03D6,0x03D7,0x03D8,0x03D9,0x03DA,0x03F0, + 0x03F1 +}; /* combined pairs in Unicode */ static const unsigned short jisx0213_u2_tbl[] = { @@ -5026,18 +5035,19 @@ static const unsigned short jisx0213_u2_tbl[] = { 0x30C8,0x309A,0x31F7,0x309A,0x00E6,0x0300,0x0254,0x0300, 0x0254,0x0301,0x028C,0x0300,0x028C,0x0301,0x0259,0x0300, 0x0259,0x0301,0x025A,0x0300,0x025A,0x0301,0x02E9,0x02E5, - 0x02E5,0x02E9}; + 0x02E5,0x02E9 +}; /* fallback chars for combined chars in Unicode */ static const unsigned short jisx0213_u2_fb_tbl[] = { 0x242B,0x242D,0x242F,0x2431,0x2433,0x252B,0x252D,0x252F, 0x2531,0x2533,0x253B,0x2544,0x2548,0x2675,0x295C,0x2B38, 0x2B38,0x2B37,0x2B37,0x2B30,0x2B30,0x2B43,0x2B43,0x2B64, - 0x2B60}; + 0x2B60 +}; static const int jisx0213_u2_tbl_len = sizeof(jisx0213_u2_key)/sizeof(unsigned short); - static const unsigned short jisx0213_p2_ofst[] = { 0, 7, 2, 3, 4, 11, 12, 13, 14, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93 From 175154dbcc90533f9c85e4a4b91f4d9e1deb40c8 Mon Sep 17 00:00:00 2001 From: Alex Dowad Date: Tue, 18 Apr 2023 15:12:14 +0200 Subject: [PATCH 057/168] Optimize conversion of CP932 text to Unicode Conversion of CP932 text to UTF-8 using `mb_convert_encoding` is now about 20% faster than before. --- ext/mbstring/libmbfl/filters/mbfilter_cjk.c | 82 ++++++++++----------- 1 file changed, 41 insertions(+), 41 deletions(-) diff --git a/ext/mbstring/libmbfl/filters/mbfilter_cjk.c b/ext/mbstring/libmbfl/filters/mbfilter_cjk.c index 4bc89a3d98591..ca7d9dad6a99b 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_cjk.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_cjk.c @@ -7486,54 +7486,54 @@ static size_t mb_cp932_to_wchar(unsigned char **in, size_t *in_len, uint32_t *bu } else if (c > 0xA0 && c < 0xE0) { /* Kana */ *out++ = 0xFEC0 + c; - } else if (c > 0x80 && c < 0xFD && c != 0xA0 && p < e) { + } else { + if (p == e) { + *out++ = MBFL_BAD_INPUT; + break; + } unsigned char c2 = *p++; + unsigned int w = 0; + unsigned int s = sjis_mobile_decode_tbl1[c] + sjis_decode_tbl2[c2]; - if (c2 >= 0x40 && c2 <= 0xFC && c2 != 0x7F) { - unsigned int s1, s2, w = 0; - SJIS_DECODE(c, c2, s1, s2); - unsigned int s = (s1 - 0x21)*94 + s2 - 0x21; - - if (s <= 137) { - if (s == 31) { - w = 0xFF3C; /* FULLWIDTH REVERSE SOLIDUS */ - } else if (s == 32) { - w = 0xFF5E; /* FULLWIDTH TILDE */ - } else if (s == 33) { - w = 0x2225; /* PARALLEL TO */ - } else if (s == 60) { - w = 0xFF0D; /* FULLWIDTH HYPHEN-MINUS */ - } else if (s == 80) { - w = 0xFFE0; /* FULLWIDTH CENT SIGN */ - } else if (s == 81) { - w = 0xFFE1; /* FULLWIDTH POUND SIGN */ - } else if (s == 137) { - w = 0xFFE2; /* FULLWIDTH NOT SIGN */ - } + if (s <= 137) { + if (s == 31) { + w = 0xFF3C; /* FULLWIDTH REVERSE SOLIDUS */ + } else if (s == 32) { + w = 0xFF5E; /* FULLWIDTH TILDE */ + } else if (s == 33) { + w = 0x2225; /* PARALLEL TO */ + } else if (s == 60) { + w = 0xFF0D; /* FULLWIDTH HYPHEN-MINUS */ + } else if (s == 80) { + w = 0xFFE0; /* FULLWIDTH CENT SIGN */ + } else if (s == 81) { + w = 0xFFE1; /* FULLWIDTH POUND SIGN */ + } else if (s == 137) { + w = 0xFFE2; /* FULLWIDTH NOT SIGN */ } + } - if (w == 0) { - if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) { - w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min]; - } else if (s < jisx0208_ucs_table_size) { - w = jisx0208_ucs_table[s]; - } else if (s >= cp932ext2_ucs_table_min && s < cp932ext2_ucs_table_max) { - w = cp932ext2_ucs_table[s - cp932ext2_ucs_table_min]; - } else if (s >= cp932ext3_ucs_table_min && s < cp932ext3_ucs_table_max) { - w = cp932ext3_ucs_table[s - cp932ext3_ucs_table_min]; - } else if (s >= (94*94) && s < (114*94)) { - w = s - (94*94) + 0xE000; - } + if (w == 0) { + if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) { + w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min]; + } else if (s < jisx0208_ucs_table_size) { + w = jisx0208_ucs_table[s]; + } else if (s >= cp932ext2_ucs_table_min && s < cp932ext2_ucs_table_max) { + w = cp932ext2_ucs_table[s - cp932ext2_ucs_table_min]; + } else if (s >= cp932ext3_ucs_table_min && s < cp932ext3_ucs_table_max) { + w = cp932ext3_ucs_table[s - cp932ext3_ucs_table_min]; + } else if (s >= (94*94) && s < (114*94)) { + w = s - (94*94) + 0xE000; } + } - if (!w) - w = MBFL_BAD_INPUT; - *out++ = w; - } else { - *out++ = MBFL_BAD_INPUT; + if (!w) { + if (c == 0x80 || c == 0xA0 || c >= 0xFD) { + p--; + } + w = MBFL_BAD_INPUT; } - } else { - *out++ = MBFL_BAD_INPUT; + *out++ = w; } } From 245daedb412d331f96a132e6bf0d9e3d3b26fb0e Mon Sep 17 00:00:00 2001 From: Alex Dowad Date: Sat, 22 Apr 2023 17:48:47 +0200 Subject: [PATCH 058/168] Move kana translation tables to mbfilter_cjk.c These (static) tables were defined in a header file, which was included in two different .c files. That will result in two copies of the tables being included in the PHP binary. But the tables were only used in one of the two .c files. Move it where it is used to avoid needlessly bloating the binary. (I checked in a hex editor and confirmed that while the previous binary contained two copies of these tables, it now only contains one.) --- ext/mbstring/libmbfl/filters/mbfilter_cjk.c | 40 +++++++++++++++++++ .../filters/translit_kana_jisx0201_jisx0208.h | 39 ------------------ 2 files changed, 40 insertions(+), 39 deletions(-) diff --git a/ext/mbstring/libmbfl/filters/mbfilter_cjk.c b/ext/mbstring/libmbfl/filters/mbfilter_cjk.c index ca7d9dad6a99b..0c3c85474f56a 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_cjk.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_cjk.c @@ -2533,6 +2533,46 @@ static int mbfl_filt_conv_cp5022x_wchar_flush(mbfl_convert_filter *filter) return 0; } +static const unsigned char hankana2zenkana_table[64] = { + 0x00,0x02,0x0C,0x0D,0x01,0xFB,0xF2,0xA1,0xA3,0xA5, + 0xA7,0xA9,0xE3,0xE5,0xE7,0xC3,0xFC,0xA2,0xA4,0xA6, + 0xA8,0xAA,0xAB,0xAD,0xAF,0xB1,0xB3,0xB5,0xB7,0xB9, + 0xBB,0xBD,0xBF,0xC1,0xC4,0xC6,0xC8,0xCA,0xCB,0xCC, + 0xCD,0xCE,0xCF,0xD2,0xD5,0xD8,0xDB,0xDE,0xDF,0xE0, + 0xE1,0xE2,0xE4,0xE6,0xE8,0xE9,0xEA,0xEB,0xEC,0xED, + 0xEF,0xF3,0x9B,0x9C +}; + +static const unsigned char hankana2zenhira_table[64] = { + 0x00,0x02,0x0C,0x0D,0x01,0xFB,0x92,0x41,0x43,0x45, + 0x47,0x49,0x83,0x85,0x87,0x63,0xFC,0x42,0x44,0x46, + 0x48,0x4A,0x4B,0x4D,0x4F,0x51,0x53,0x55,0x57,0x59, + 0x5B,0x5D,0x5F,0x61,0x64,0x66,0x68,0x6A,0x6B,0x6C, + 0x6D,0x6E,0x6F,0x72,0x75,0x78,0x7B,0x7E,0x7F,0x80, + 0x81,0x82,0x84,0x86,0x88,0x89,0x8A,0x8B,0x8C,0x8D, + 0x8F,0x93,0x9B,0x9C +}; + +static const unsigned char zenkana2hankana_table[84][2] = { + {0x67,0x00},{0x71,0x00},{0x68,0x00},{0x72,0x00},{0x69,0x00}, + {0x73,0x00},{0x6A,0x00},{0x74,0x00},{0x6B,0x00},{0x75,0x00}, + {0x76,0x00},{0x76,0x9E},{0x77,0x00},{0x77,0x9E},{0x78,0x00}, + {0x78,0x9E},{0x79,0x00},{0x79,0x9E},{0x7A,0x00},{0x7A,0x9E}, + {0x7B,0x00},{0x7B,0x9E},{0x7C,0x00},{0x7C,0x9E},{0x7D,0x00}, + {0x7D,0x9E},{0x7E,0x00},{0x7E,0x9E},{0x7F,0x00},{0x7F,0x9E}, + {0x80,0x00},{0x80,0x9E},{0x81,0x00},{0x81,0x9E},{0x6F,0x00}, + {0x82,0x00},{0x82,0x9E},{0x83,0x00},{0x83,0x9E},{0x84,0x00}, + {0x84,0x9E},{0x85,0x00},{0x86,0x00},{0x87,0x00},{0x88,0x00}, + {0x89,0x00},{0x8A,0x00},{0x8A,0x9E},{0x8A,0x9F},{0x8B,0x00}, + {0x8B,0x9E},{0x8B,0x9F},{0x8C,0x00},{0x8C,0x9E},{0x8C,0x9F}, + {0x8D,0x00},{0x8D,0x9E},{0x8D,0x9F},{0x8E,0x00},{0x8E,0x9E}, + {0x8E,0x9F},{0x8F,0x00},{0x90,0x00},{0x91,0x00},{0x92,0x00}, + {0x93,0x00},{0x6C,0x00},{0x94,0x00},{0x6D,0x00},{0x95,0x00}, + {0x6E,0x00},{0x96,0x00},{0x97,0x00},{0x98,0x00},{0x99,0x00}, + {0x9A,0x00},{0x9B,0x00},{0x9C,0x00},{0x9C,0x00},{0x72,0x00}, + {0x74,0x00},{0x66,0x00},{0x9D,0x00},{0x73,0x9E} +}; + /* Apply various transforms to input codepoint, such as converting halfwidth katakana * to fullwidth katakana. `mode` is a bitfield which controls which transforms are * actually performed. The bit values are defined in translit_kana_jisx0201_jisx0208.h. diff --git a/ext/mbstring/libmbfl/filters/translit_kana_jisx0201_jisx0208.h b/ext/mbstring/libmbfl/filters/translit_kana_jisx0201_jisx0208.h index 545333928b74c..ea117228893c7 100644 --- a/ext/mbstring/libmbfl/filters/translit_kana_jisx0201_jisx0208.h +++ b/ext/mbstring/libmbfl/filters/translit_kana_jisx0201_jisx0208.h @@ -47,43 +47,4 @@ #define MBFL_HAN2ZEN_GLUE 0x10000 -static const unsigned char hankana2zenkana_table[64] = { - 0x00,0x02,0x0C,0x0D,0x01,0xFB,0xF2,0xA1,0xA3,0xA5, - 0xA7,0xA9,0xE3,0xE5,0xE7,0xC3,0xFC,0xA2,0xA4,0xA6, - 0xA8,0xAA,0xAB,0xAD,0xAF,0xB1,0xB3,0xB5,0xB7,0xB9, - 0xBB,0xBD,0xBF,0xC1,0xC4,0xC6,0xC8,0xCA,0xCB,0xCC, - 0xCD,0xCE,0xCF,0xD2,0xD5,0xD8,0xDB,0xDE,0xDF,0xE0, - 0xE1,0xE2,0xE4,0xE6,0xE8,0xE9,0xEA,0xEB,0xEC,0xED, - 0xEF,0xF3,0x9B,0x9C -}; - -static const unsigned char hankana2zenhira_table[64] = { - 0x00,0x02,0x0C,0x0D,0x01,0xFB,0x92,0x41,0x43,0x45, - 0x47,0x49,0x83,0x85,0x87,0x63,0xFC,0x42,0x44,0x46, - 0x48,0x4A,0x4B,0x4D,0x4F,0x51,0x53,0x55,0x57,0x59, - 0x5B,0x5D,0x5F,0x61,0x64,0x66,0x68,0x6A,0x6B,0x6C, - 0x6D,0x6E,0x6F,0x72,0x75,0x78,0x7B,0x7E,0x7F,0x80, - 0x81,0x82,0x84,0x86,0x88,0x89,0x8A,0x8B,0x8C,0x8D, - 0x8F,0x93,0x9B,0x9C -}; -static const unsigned char zenkana2hankana_table[84][2] = { - {0x67,0x00},{0x71,0x00},{0x68,0x00},{0x72,0x00},{0x69,0x00}, - {0x73,0x00},{0x6A,0x00},{0x74,0x00},{0x6B,0x00},{0x75,0x00}, - {0x76,0x00},{0x76,0x9E},{0x77,0x00},{0x77,0x9E},{0x78,0x00}, - {0x78,0x9E},{0x79,0x00},{0x79,0x9E},{0x7A,0x00},{0x7A,0x9E}, - {0x7B,0x00},{0x7B,0x9E},{0x7C,0x00},{0x7C,0x9E},{0x7D,0x00}, - {0x7D,0x9E},{0x7E,0x00},{0x7E,0x9E},{0x7F,0x00},{0x7F,0x9E}, - {0x80,0x00},{0x80,0x9E},{0x81,0x00},{0x81,0x9E},{0x6F,0x00}, - {0x82,0x00},{0x82,0x9E},{0x83,0x00},{0x83,0x9E},{0x84,0x00}, - {0x84,0x9E},{0x85,0x00},{0x86,0x00},{0x87,0x00},{0x88,0x00}, - {0x89,0x00},{0x8A,0x00},{0x8A,0x9E},{0x8A,0x9F},{0x8B,0x00}, - {0x8B,0x9E},{0x8B,0x9F},{0x8C,0x00},{0x8C,0x9E},{0x8C,0x9F}, - {0x8D,0x00},{0x8D,0x9E},{0x8D,0x9F},{0x8E,0x00},{0x8E,0x9E}, - {0x8E,0x9F},{0x8F,0x00},{0x90,0x00},{0x91,0x00},{0x92,0x00}, - {0x93,0x00},{0x6C,0x00},{0x94,0x00},{0x6D,0x00},{0x95,0x00}, - {0x6E,0x00},{0x96,0x00},{0x97,0x00},{0x98,0x00},{0x99,0x00}, - {0x9A,0x00},{0x9B,0x00},{0x9C,0x00},{0x9C,0x00},{0x72,0x00}, - {0x74,0x00},{0x66,0x00},{0x9D,0x00},{0x73,0x9E} -}; - #endif /* TRANSLIT_KANA_JISX0201_JISX0208_H */ From f337c920509257f7706f7a538cc1b34379dd6bd8 Mon Sep 17 00:00:00 2001 From: Alex Dowad Date: Tue, 16 May 2023 09:04:59 -0700 Subject: [PATCH 059/168] Test mb_strlen for all text encodings supported by mbstring When combining all the CJK encoding conversion code in a single file, I combined some redundant mblen tables. This check will help to ensure that all the mblen tables are correct. --- ext/mbstring/tests/encoding_tests.inc | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/ext/mbstring/tests/encoding_tests.inc b/ext/mbstring/tests/encoding_tests.inc index 978cb8db399c0..095aeae517fcc 100644 --- a/ext/mbstring/tests/encoding_tests.inc +++ b/ext/mbstring/tests/encoding_tests.inc @@ -148,6 +148,12 @@ function testAllValidChars($charMap, $fromEncoding, $toEncoding, $bothWays = tru } testValidString($fromString, $toString, $fromEncoding, $toEncoding, $bothWays); + + $strLen = mb_strlen($fromString, $fromEncoding); + if ($strLen !== mb_strlen($toString, $toEncoding)) { + echo "Length of $fromEncoding string '" . bin2hex($fromString) . "' was different than expected; mb_strlen returned $strLen" . PHP_EOL; + testFailedIncrement(); + } } } From 8e6be14372590a14474d49b484c0d1b32005d0bc Mon Sep 17 00:00:00 2001 From: Alex Dowad Date: Tue, 16 May 2023 14:58:37 -0700 Subject: [PATCH 060/168] Fix problem with CP949 conversion when 0xC9 precedes byte lower than 0xA1 This bug was introduced in e837a8800b. In that commit, I increased the performance of CP949 text conversion, but accidentally broke the case where 0xC9 (illegal byte to start a character) is followed by a valid character with a first byte less than 0xA1. The 'broken' behavior is that both the 0xC9 byte and the following valid character would be converted to error markers. --- ext/mbstring/libmbfl/filters/mbfilter_cjk.c | 14 +++++--------- ext/mbstring/tests/uhc_encoding.phpt | 5 +++++ 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/ext/mbstring/libmbfl/filters/mbfilter_cjk.c b/ext/mbstring/libmbfl/filters/mbfilter_cjk.c index 0c3c85474f56a..a1ecdab986253 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_cjk.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_cjk.c @@ -10224,17 +10224,13 @@ static size_t mb_uhc_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, w = (c - 0xC7)*94 + c2 - 0xA1; ZEND_ASSERT(w < uhc3_ucs_table_size); w = uhc3_ucs_table[w]; - if (!w) { - /* If c == 0xC9, we shouldn't have tried to read a 2-byte char at all... but it is faster - * to fix up that rare case here rather than include an extra check in the hot path */ - if (c == 0xC9) { - p--; - } - *out++ = MBFL_BAD_INPUT; - continue; - } } if (!w) { + /* If c == 0xC9, we shouldn't have tried to read a 2-byte char at all... but it is faster + * to fix up that rare case here rather than include an extra check in the hot path */ + if (c == 0xC9) { + p--; + } w = MBFL_BAD_INPUT; } *out++ = w; diff --git a/ext/mbstring/tests/uhc_encoding.phpt b/ext/mbstring/tests/uhc_encoding.phpt index 622569c68c699..da8ec711ebfae 100644 --- a/ext/mbstring/tests/uhc_encoding.phpt +++ b/ext/mbstring/tests/uhc_encoding.phpt @@ -14,6 +14,11 @@ testEncodingFromUTF16ConversionTable(__DIR__ . '/data/CP949.txt', 'UHC'); // Regression test convertInvalidString("\xE4\xA4\xB4<", "\x75\x1A\x00%", "UHC", "UTF-16BE"); +// When optimizing performance of CP949 conversion, I accidentally broke the +// case where 0xC9 appears before a valid character which starts with a +// byte lower than 0xA1 +convertInvalidString("\xC9\x9E\x98", "%\xEC\x98\x92", "UHC", "UTF-8"); + // Test "long" illegal character markers mb_substitute_character("long"); convertInvalidString("\x80", "%", "UHC", "UTF-8"); From 18ca48934730ce35f5304763064f75f1108ca0c5 Mon Sep 17 00:00:00 2001 From: Alex Dowad Date: Sat, 20 May 2023 21:04:16 -0700 Subject: [PATCH 061/168] Convert mbfilter_conv{,_r}_map_tbl to return bool Thanks to Girgias for pointing this out. --- .../libmbfl/filters/mbfilter_utf8_mobile.c | 36 +++++++++---------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/ext/mbstring/libmbfl/filters/mbfilter_utf8_mobile.c b/ext/mbstring/libmbfl/filters/mbfilter_utf8_mobile.c index 374863ce52681..dd253cfe689fc 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_utf8_mobile.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_utf8_mobile.c @@ -47,27 +47,27 @@ static void mb_wchar_to_utf8_kddi_b(uint32_t *in, size_t len, mb_convert_buf *bu static size_t mb_utf8_sb_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state); static void mb_wchar_to_utf8_sb(uint32_t *in, size_t len, mb_convert_buf *buf, bool end); -static int mbfilter_conv_map_tbl(int c, int *w, const unsigned short map[][3], int n) +static bool mbfilter_conv_map_tbl(int c, int *w, const unsigned short map[][3], int n) { for (int i = 0; i < n; i++) { if (map[i][0] <= c && c <= map[i][1]) { *w = c - map[i][0] + map[i][2]; - return 1; + return true; } } - return 0; + return false; } -static int mbfilter_conv_r_map_tbl(int c, int *w, const unsigned short map[][3], int n) +static bool mbfilter_conv_r_map_tbl(int c, int *w, const unsigned short map[][3], int n) { /* Convert in reverse direction */ for (int i = 0; i < n; i++) { if (map[i][2] <= c && c <= map[i][2] - map[i][0] + map[i][1]) { *w = c + map[i][0] - map[i][2]; - return 1; + return true; } } - return 0; + return false; } static const unsigned short mbfl_docomo2uni_pua[4][3] = { @@ -284,13 +284,13 @@ int mbfl_filt_conv_utf8_mobile_wchar(int c, mbfl_convert_filter *filter) s = (filter->cache << 6) | (c & 0x3f); filter->cache = 0; - if (filter->from->no_encoding == mbfl_no_encoding_utf8_docomo && mbfilter_conv_r_map_tbl(s, &s1, mbfl_docomo2uni_pua, 4) > 0) { + if (filter->from->no_encoding == mbfl_no_encoding_utf8_docomo && mbfilter_conv_r_map_tbl(s, &s1, mbfl_docomo2uni_pua, 4)) { s = mbfilter_sjis_emoji_docomo2unicode(s1, &snd); - } else if (filter->from->no_encoding == mbfl_no_encoding_utf8_kddi_a && mbfilter_conv_r_map_tbl(s, &s1, mbfl_kddi2uni_pua, 7) > 0) { + } else if (filter->from->no_encoding == mbfl_no_encoding_utf8_kddi_a && mbfilter_conv_r_map_tbl(s, &s1, mbfl_kddi2uni_pua, 7)) { s = mbfilter_sjis_emoji_kddi2unicode(s1, &snd); - } else if (filter->from->no_encoding == mbfl_no_encoding_utf8_kddi_b && mbfilter_conv_r_map_tbl(s, &s1, mbfl_kddi2uni_pua_b, 8) > 0) { + } else if (filter->from->no_encoding == mbfl_no_encoding_utf8_kddi_b && mbfilter_conv_r_map_tbl(s, &s1, mbfl_kddi2uni_pua_b, 8)) { s = mbfilter_sjis_emoji_kddi2unicode(s1, &snd); - } else if (filter->from->no_encoding == mbfl_no_encoding_utf8_sb && mbfilter_conv_r_map_tbl(s, &s1, mbfl_sb2uni_pua, 6) > 0) { + } else if (filter->from->no_encoding == mbfl_no_encoding_utf8_sb && mbfilter_conv_r_map_tbl(s, &s1, mbfl_sb2uni_pua, 6)) { s = mbfilter_sjis_emoji_sb2unicode(s1, &snd); } @@ -357,10 +357,10 @@ int mbfl_filt_conv_wchar_utf8_mobile(int c, mbfl_convert_filter *filter) if (c >= 0 && c < 0x110000) { int s1, c1; - if ((filter->to->no_encoding == mbfl_no_encoding_utf8_docomo && mbfilter_unicode2sjis_emoji_docomo(c, &s1, filter) > 0 && mbfilter_conv_map_tbl(s1, &c1, mbfl_docomo2uni_pua, 4) > 0) || - (filter->to->no_encoding == mbfl_no_encoding_utf8_kddi_a && mbfilter_unicode2sjis_emoji_kddi_sjis(c, &s1, filter) > 0 && mbfilter_conv_map_tbl(s1, &c1, mbfl_kddi2uni_pua, 7) > 0) || - (filter->to->no_encoding == mbfl_no_encoding_utf8_kddi_b && mbfilter_unicode2sjis_emoji_kddi_sjis(c, &s1, filter) > 0 && mbfilter_conv_map_tbl(s1, &c1, mbfl_kddi2uni_pua_b, 8) > 0) || - (filter->to->no_encoding == mbfl_no_encoding_utf8_sb && mbfilter_unicode2sjis_emoji_sb(c, &s1, filter) > 0 && mbfilter_conv_map_tbl(s1, &c1, mbfl_sb2uni_pua, 6) > 0)) { + if ((filter->to->no_encoding == mbfl_no_encoding_utf8_docomo && mbfilter_unicode2sjis_emoji_docomo(c, &s1, filter) > 0 && mbfilter_conv_map_tbl(s1, &c1, mbfl_docomo2uni_pua, 4)) || + (filter->to->no_encoding == mbfl_no_encoding_utf8_kddi_a && mbfilter_unicode2sjis_emoji_kddi_sjis(c, &s1, filter) > 0 && mbfilter_conv_map_tbl(s1, &c1, mbfl_kddi2uni_pua, 7)) || + (filter->to->no_encoding == mbfl_no_encoding_utf8_kddi_b && mbfilter_unicode2sjis_emoji_kddi_sjis(c, &s1, filter) > 0 && mbfilter_conv_map_tbl(s1, &c1, mbfl_kddi2uni_pua_b, 8)) || + (filter->to->no_encoding == mbfl_no_encoding_utf8_sb && mbfilter_unicode2sjis_emoji_sb(c, &s1, filter) > 0 && mbfilter_conv_map_tbl(s1, &c1, mbfl_sb2uni_pua, 6))) { c = c1; } @@ -486,7 +486,7 @@ static size_t mb_mobile_utf8_to_wchar(unsigned char **in, size_t *in_len, uint32 } int s1 = 0, snd = 0; - if (mbfilter_conv_r_map_tbl(s, &s1, emoji_map, n) > 0) { + if (mbfilter_conv_r_map_tbl(s, &s1, emoji_map, n)) { s = convert_emoji(s1, &snd); if (snd) { *out++ = snd; @@ -552,7 +552,7 @@ static void mb_wchar_to_utf8_docomo(uint32_t *in, size_t len, mb_convert_buf *bu } } - if (s && mbfilter_conv_map_tbl(s, &c1, mbfl_docomo2uni_pua, 4) > 0) { + if (s && mbfilter_conv_map_tbl(s, &c1, mbfl_docomo2uni_pua, 4)) { w = c1; } @@ -649,7 +649,7 @@ static void mb_wchar_to_utf8_kddi(uint32_t *in, size_t len, mb_convert_buf *buf, } process_kuten: - if (s && mbfilter_conv_map_tbl(s, &c1, emoji_map, n) > 0) { + if (s && mbfilter_conv_map_tbl(s, &c1, emoji_map, n)) { w = c1; } @@ -761,7 +761,7 @@ static void mb_wchar_to_utf8_sb(uint32_t *in, size_t len, mb_convert_buf *buf, b } process_kuten: - if (s && mbfilter_conv_map_tbl(s, &c1, mbfl_sb2uni_pua, 6) > 0) { + if (s && mbfilter_conv_map_tbl(s, &c1, mbfl_sb2uni_pua, 6)) { w = c1; } From 03b163b2b3f0060a7b3d6ad5b7d62f8354f1c304 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Sun, 21 May 2023 11:27:32 +0200 Subject: [PATCH 062/168] Remove unused variable err in mb_send_mail() (#11285) --- ext/mbstring/mbstring.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c index 88bc7334253d9..758c00b4261ef 100644 --- a/ext/mbstring/mbstring.c +++ b/ext/mbstring/mbstring.c @@ -4211,7 +4211,6 @@ PHP_FUNCTION(mb_send_mail) *head_enc, /* header transfer encoding */ *body_enc; /* body transfer encoding */ const mbfl_language *lang; - int err = 0; HashTable ht_headers; zval *s; @@ -4426,7 +4425,7 @@ PHP_FUNCTION(mb_send_mail) extra_cmd = php_escape_shell_cmd(ZSTR_VAL(extra_cmd)); } - RETVAL_BOOL(!err && php_mail(to_r, ZSTR_VAL(subject), message, ZSTR_VAL(str_headers), extra_cmd ? ZSTR_VAL(extra_cmd) : NULL)); + RETVAL_BOOL(php_mail(to_r, ZSTR_VAL(subject), message, ZSTR_VAL(str_headers), extra_cmd ? ZSTR_VAL(extra_cmd) : NULL)); if (extra_cmd) { zend_string_release_ex(extra_cmd, 0); From c230aa9be36bbe33daea8c49c28fb577c084105b Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Sat, 20 May 2023 18:20:13 +0200 Subject: [PATCH 063/168] Correctly handle multiple constants in typed declaration While here also fix AST printing support. --- .../typed_class_constants_ast_print.phpt | 18 ++++++++++++++ ...ed_class_constants_multiple_constants.phpt | 12 ++++++++++ Zend/zend_ast.c | 4 ++++ Zend/zend_ast.h | 4 ++-- Zend/zend_compile.c | 6 ++--- Zend/zend_language_parser.y | 24 ++++++++++--------- 6 files changed, 52 insertions(+), 16 deletions(-) create mode 100644 Zend/tests/type_declarations/typed_class_constants_ast_print.phpt create mode 100644 Zend/tests/type_declarations/typed_class_constants_multiple_constants.phpt diff --git a/Zend/tests/type_declarations/typed_class_constants_ast_print.phpt b/Zend/tests/type_declarations/typed_class_constants_ast_print.phpt new file mode 100644 index 0000000000000..dd4957f2269c5 --- /dev/null +++ b/Zend/tests/type_declarations/typed_class_constants_ast_print.phpt @@ -0,0 +1,18 @@ +--TEST-- +AST printing support for typed constants +--FILE-- +getMessage(), "\n"; +} + +?> +--EXPECT-- +assert(false && new class { + public const int X = 1; +}) diff --git a/Zend/tests/type_declarations/typed_class_constants_multiple_constants.phpt b/Zend/tests/type_declarations/typed_class_constants_multiple_constants.phpt new file mode 100644 index 0000000000000..8d973b2e5d591 --- /dev/null +++ b/Zend/tests/type_declarations/typed_class_constants_multiple_constants.phpt @@ -0,0 +1,12 @@ +--TEST-- +Multiple typed constants in one declaration +--FILE-- + +--EXPECTF-- +Fatal error: Cannot use string as value for class constant Test::Y of type int in %s on line %d diff --git a/Zend/zend_ast.c b/Zend/zend_ast.c index 70d01bb9f85f3..954c9958c534d 100644 --- a/Zend/zend_ast.c +++ b/Zend/zend_ast.c @@ -1924,6 +1924,10 @@ static ZEND_COLD void zend_ast_export_ex(smart_str *str, zend_ast *ast, int prio zend_ast_export_visibility(str, ast->attr); smart_str_appends(str, "const "); + if (ast->child[2]) { + zend_ast_export_type(str, ast->child[2], indent); + smart_str_appendc(str, ' '); + } ast = ast->child[0]; diff --git a/Zend/zend_ast.h b/Zend/zend_ast.h index 73e4fed7a997a..0bbb3a820c291 100644 --- a/Zend/zend_ast.h +++ b/Zend/zend_ast.h @@ -145,7 +145,6 @@ enum _zend_ast_kind { ZEND_AST_USE_ELEM, ZEND_AST_TRAIT_ALIAS, ZEND_AST_GROUP_USE, - ZEND_AST_CLASS_CONST_GROUP, ZEND_AST_ATTRIBUTE, ZEND_AST_MATCH, ZEND_AST_MATCH_ARM, @@ -161,6 +160,8 @@ enum _zend_ast_kind { ZEND_AST_CATCH, ZEND_AST_PROP_GROUP, ZEND_AST_PROP_ELEM, + ZEND_AST_CONST_ELEM, + ZEND_AST_CLASS_CONST_GROUP, // Pseudo node for initializing enums ZEND_AST_CONST_ENUM_INIT, @@ -169,7 +170,6 @@ enum _zend_ast_kind { ZEND_AST_FOR = 4 << ZEND_AST_NUM_CHILDREN_SHIFT, ZEND_AST_FOREACH, ZEND_AST_ENUM_CASE, - ZEND_AST_CONST_ELEM, /* 5 child nodes */ ZEND_AST_PARAM = 5 << ZEND_AST_NUM_CHILDREN_SHIFT, diff --git a/Zend/zend_compile.c b/Zend/zend_compile.c index 04244b0de626a..bb12e55848b2f 100644 --- a/Zend/zend_compile.c +++ b/Zend/zend_compile.c @@ -7693,7 +7693,7 @@ static void zend_check_trait_alias_modifiers(uint32_t attr) /* {{{ */ } /* }}} */ -static void zend_compile_class_const_decl(zend_ast *ast, uint32_t flags, zend_ast *attr_ast) +static void zend_compile_class_const_decl(zend_ast *ast, uint32_t flags, zend_ast *attr_ast, zend_ast *type_ast) { zend_ast_list *list = zend_ast_get_list(ast); zend_class_entry *ce = CG(active_class_entry); @@ -7705,7 +7705,6 @@ static void zend_compile_class_const_decl(zend_ast *ast, uint32_t flags, zend_as zend_ast *name_ast = const_ast->child[0]; zend_ast **value_ast_ptr = &const_ast->child[1]; zend_ast *doc_comment_ast = const_ast->child[2]; - zend_ast *type_ast = const_ast->child[3]; zend_string *name = zval_make_interned_string(zend_ast_get_zval(name_ast)); zend_string *doc_comment = doc_comment_ast ? zend_string_copy(zend_ast_get_str(doc_comment_ast)) : NULL; zval value_zv; @@ -7752,8 +7751,9 @@ static void zend_compile_class_const_group(zend_ast *ast) /* {{{ */ { zend_ast *const_ast = ast->child[0]; zend_ast *attr_ast = ast->child[1]; + zend_ast *type_ast = ast->child[2]; - zend_compile_class_const_decl(const_ast, ast->attr, attr_ast); + zend_compile_class_const_decl(const_ast, ast->attr, attr_ast, type_ast); } /* }}} */ diff --git a/Zend/zend_language_parser.y b/Zend/zend_language_parser.y index 9b663887264f0..298eaf95ad055 100644 --- a/Zend/zend_language_parser.y +++ b/Zend/zend_language_parser.y @@ -267,7 +267,7 @@ static YYSIZE_T zend_yytnamerr(char*, const char*); %type echo_expr_list unset_variables catch_name_list catch_list optional_variable parameter_list class_statement_list %type implements_list case_list if_stmt_without_else %type non_empty_parameter_list argument_list non_empty_argument_list property_list -%type class_const_list first_class_const_decl class_const_decl class_name_list trait_adaptations method_body non_empty_for_exprs +%type class_const_list class_const_decl class_name_list trait_adaptations method_body non_empty_for_exprs %type ctor_arguments alt_if_stmt_without_else trait_adaptation_list lexical_vars %type lexical_var_list encaps_list %type array_pair non_empty_array_pair_list array_pair_list possible_array_pair @@ -940,7 +940,10 @@ attributed_class_statement: { $$ = zend_ast_create(ZEND_AST_PROP_GROUP, $2, $3, NULL); $$->attr = $1; } | class_const_modifiers T_CONST class_const_list ';' - { $$ = zend_ast_create(ZEND_AST_CLASS_CONST_GROUP, $3, NULL); + { $$ = zend_ast_create(ZEND_AST_CLASS_CONST_GROUP, $3, NULL, NULL); + $$->attr = $1; } + | class_const_modifiers T_CONST type_expr class_const_list ';' + { $$ = zend_ast_create(ZEND_AST_CLASS_CONST_GROUP, $4, NULL, $3); $$->attr = $1; } | method_modifiers function returns_ref identifier backup_doc_comment '(' parameter_list ')' return_type backup_fn_flags method_body backup_fn_flags @@ -1076,21 +1079,20 @@ property: class_const_list: class_const_list ',' class_const_decl { $$ = zend_ast_list_add($1, $3); } - | first_class_const_decl { $$ = zend_ast_create_list(1, ZEND_AST_CLASS_CONST_DECL, $1); } -; - -first_class_const_decl: - T_STRING '=' expr backup_doc_comment { $$ = zend_ast_create(ZEND_AST_CONST_ELEM, $1, $3, ($4 ? zend_ast_create_zval_from_str($4) : NULL), NULL); } - | semi_reserved '=' expr backup_doc_comment { zval zv; if (zend_lex_tstring(&zv, $1) == FAILURE) { YYABORT; } $$ = zend_ast_create(ZEND_AST_CONST_ELEM, zend_ast_create_zval(&zv), $3, ($4 ? zend_ast_create_zval_from_str($4) : NULL), NULL); } - | type_expr identifier '=' expr backup_doc_comment { $$ = zend_ast_create(ZEND_AST_CONST_ELEM, $2, $4, ($5 ? zend_ast_create_zval_from_str($5) : NULL), $1); } + | class_const_decl { $$ = zend_ast_create_list(1, ZEND_AST_CLASS_CONST_DECL, $1); } ; class_const_decl: - identifier '=' expr backup_doc_comment { $$ = zend_ast_create(ZEND_AST_CONST_ELEM, $1, $3, ($4 ? zend_ast_create_zval_from_str($4) : NULL), NULL); } + T_STRING '=' expr backup_doc_comment { $$ = zend_ast_create(ZEND_AST_CONST_ELEM, $1, $3, ($4 ? zend_ast_create_zval_from_str($4) : NULL)); } + | semi_reserved '=' expr backup_doc_comment { + zval zv; + if (zend_lex_tstring(&zv, $1) == FAILURE) { YYABORT; } + $$ = zend_ast_create(ZEND_AST_CONST_ELEM, zend_ast_create_zval(&zv), $3, ($4 ? zend_ast_create_zval_from_str($4) : NULL)); + } ; const_decl: - T_STRING '=' expr backup_doc_comment { $$ = zend_ast_create(ZEND_AST_CONST_ELEM, $1, $3, ($4 ? zend_ast_create_zval_from_str($4) : NULL), NULL); } + T_STRING '=' expr backup_doc_comment { $$ = zend_ast_create(ZEND_AST_CONST_ELEM, $1, $3, ($4 ? zend_ast_create_zval_from_str($4) : NULL)); } ; echo_expr_list: From 5c741644d177d521e35168c63891cb43e018f65b Mon Sep 17 00:00:00 2001 From: Ilija Tovilo Date: Mon, 22 May 2023 19:48:07 +0200 Subject: [PATCH 064/168] Fix string coercion for $a .= $a (#11296) free_op2_string may be set to false when the operands are not strings, and `result == op1 == op2`, by re-using the same string for both operands. In that case, the string should still be copied to result because result is not actually a string. Also change the op1 branch to stay consistent. Introduced by GH-10049 --- Zend/tests/bug79836_3.phpt | 10 ++++++++++ Zend/zend_operators.c | 4 ++-- 2 files changed, 12 insertions(+), 2 deletions(-) create mode 100644 Zend/tests/bug79836_3.phpt diff --git a/Zend/tests/bug79836_3.phpt b/Zend/tests/bug79836_3.phpt new file mode 100644 index 0000000000000..75262eb460fb4 --- /dev/null +++ b/Zend/tests/bug79836_3.phpt @@ -0,0 +1,10 @@ +--TEST-- +Bug #79836 ($a .= $a should coerce to string) +--FILE-- + +--EXPECT-- +string(0) "" diff --git a/Zend/zend_operators.c b/Zend/zend_operators.c index c4fce74ebbbee..7e5e5ff3e0bdd 100644 --- a/Zend/zend_operators.c +++ b/Zend/zend_operators.c @@ -2005,7 +2005,7 @@ ZEND_API zend_result ZEND_FASTCALL concat_function(zval *result, zval *op1, zval has_op2_string:; if (UNEXPECTED(ZSTR_LEN(op1_string) == 0)) { - if (EXPECTED(free_op2_string || result != op2)) { + if (EXPECTED(result != op2 || Z_TYPE_P(result) != IS_STRING)) { if (result == orig_op1) { i_zval_ptr_dtor(result); } @@ -2018,7 +2018,7 @@ has_op2_string:; } } } else if (UNEXPECTED(ZSTR_LEN(op2_string) == 0)) { - if (EXPECTED(free_op1_string || result != op1)) { + if (EXPECTED(result != op1 || Z_TYPE_P(result) != IS_STRING)) { if (result == orig_op1) { i_zval_ptr_dtor(result); } From 7790ee8736a316f037c18c436d7d2d7a1118abaf Mon Sep 17 00:00:00 2001 From: Ilija Tovilo Date: Mon, 22 May 2023 22:02:27 +0200 Subject: [PATCH 065/168] Fix concat_function use-after-free on out-of-memory error (#11297) Introduced by GH-10049 --- Zend/tests/bug79836_4.phpt | 18 ++++++++++++++++++ Zend/zend_operators.c | 7 +++++-- 2 files changed, 23 insertions(+), 2 deletions(-) create mode 100644 Zend/tests/bug79836_4.phpt diff --git a/Zend/tests/bug79836_4.phpt b/Zend/tests/bug79836_4.phpt new file mode 100644 index 0000000000000..2d6b862f42139 --- /dev/null +++ b/Zend/tests/bug79836_4.phpt @@ -0,0 +1,18 @@ +--TEST-- +Bug #79836 (use-after-free in concat_function) +--INI-- +memory_limit=10M +--FILE-- + +--EXPECTF-- +Fatal error: Allowed memory size of %d bytes exhausted%s(tried to allocate %d bytes) in %s on line %d diff --git a/Zend/zend_operators.c b/Zend/zend_operators.c index 7e5e5ff3e0bdd..0b7902d4e36c6 100644 --- a/Zend/zend_operators.c +++ b/Zend/zend_operators.c @@ -2048,13 +2048,16 @@ has_op2_string:; } if (result == op1) { + /* special case, perform operations on result */ + result_str = zend_string_extend(op1_string, result_len, 0); + /* Free result after zend_string_extend(), as it may throw an out-of-memory error. If we + * free it before we would leave the released variable on the stack with shutdown trying + * to free it again. */ if (free_op1_string) { /* op1_string will be used as the result, so we should not free it */ i_zval_ptr_dtor(result); free_op1_string = false; } - /* special case, perform operations on result */ - result_str = zend_string_extend(op1_string, result_len, 0); /* account for the case where result_str == op1_string == op2_string and the realloc is done */ if (op1_string == op2_string) { if (free_op2_string) { From 5cad1a717692a5a3f14b889c6aee538a9bbcc374 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Tue, 16 May 2023 18:03:47 +0200 Subject: [PATCH 066/168] Fix GH-11245 (In some specific cases SWITCH with one default statement will cause segfault) The block optimizer pass allows the use of sources of the preceding block if the block is a follower and not a target. This causes issues when trying to remove FREE instructions: if the source is not in the block of the FREE, then the FREE and source are still removed. Therefore the other successor blocks, which must consume or FREE the temporary, will still contain the FREE opline. This opline will now refer to a temporary that doesn't exist anymore, which most of the time results in a crash. For these kind of non-local scenarios, we'll let the SSA based optimizations handle those cases. Closes GH-11251. --- NEWS | 2 ++ Zend/Optimizer/block_pass.c | 10 +++++++- ext/opcache/tests/opt/gh11245_1.phpt | 33 ++++++++++++++++++++++++++ ext/opcache/tests/opt/gh11245_2.phpt | 35 ++++++++++++++++++++++++++++ 4 files changed, 79 insertions(+), 1 deletion(-) create mode 100644 ext/opcache/tests/opt/gh11245_1.phpt create mode 100644 ext/opcache/tests/opt/gh11245_2.phpt diff --git a/NEWS b/NEWS index 32f2b0e8dc969..2e714c631ba7e 100644 --- a/NEWS +++ b/NEWS @@ -32,6 +32,8 @@ PHP NEWS - Opcache: . Fixed bug GH-11134 (Incorrect match default branch optimization). (ilutov) . Fixed too wide OR and AND range inference. (nielsdos) + . Fixed bug GH-11245 (In some specific cases SWITCH with one default + statement will cause segfault). (nielsdos) - PGSQL: . Fixed parameter parsing of pg_lo_export(). (kocsismate) diff --git a/Zend/Optimizer/block_pass.c b/Zend/Optimizer/block_pass.c index 72ae012066094..b17fcde4bdf85 100644 --- a/Zend/Optimizer/block_pass.c +++ b/Zend/Optimizer/block_pass.c @@ -257,6 +257,10 @@ static void zend_optimize_block(zend_basic_block *block, zend_op_array *op_array break; case ZEND_FREE: + /* Note: Only remove the source if the source is local to this block. + * If it's not local, then the other blocks successors must also eventually either FREE or consume the temporary, + * hence removing the temporary is not safe in the general case, especially when other consumers are not FREE. + * A FREE may not be removed without also removing the source's result, because otherwise that would cause a memory leak. */ if (opline->op1_type == IS_TMP_VAR) { src = VAR_SOURCE(opline->op1); if (src) { @@ -265,6 +269,7 @@ static void zend_optimize_block(zend_basic_block *block, zend_op_array *op_array case ZEND_BOOL_NOT: /* T = BOOL(X), FREE(T) => T = BOOL(X) */ /* The remaining BOOL is removed by a separate optimization */ + /* The source is a bool, no source removals take place, so this may be done non-locally. */ VAR_SOURCE(opline->op1) = NULL; MAKE_NOP(opline); ++(*opt_count); @@ -283,6 +288,9 @@ static void zend_optimize_block(zend_basic_block *block, zend_op_array *op_array case ZEND_PRE_DEC_OBJ: case ZEND_PRE_INC_STATIC_PROP: case ZEND_PRE_DEC_STATIC_PROP: + if (src < op_array->opcodes + block->start) { + break; + } src->result_type = IS_UNUSED; VAR_SOURCE(opline->op1) = NULL; MAKE_NOP(opline); @@ -295,7 +303,7 @@ static void zend_optimize_block(zend_basic_block *block, zend_op_array *op_array } else if (opline->op1_type == IS_VAR) { src = VAR_SOURCE(opline->op1); /* V = OP, FREE(V) => OP. NOP */ - if (src && + if (src >= op_array->opcodes + block->start && src->opcode != ZEND_FETCH_R && src->opcode != ZEND_FETCH_STATIC_PROP_R && src->opcode != ZEND_FETCH_DIM_R && diff --git a/ext/opcache/tests/opt/gh11245_1.phpt b/ext/opcache/tests/opt/gh11245_1.phpt new file mode 100644 index 0000000000000..eac085ac44025 --- /dev/null +++ b/ext/opcache/tests/opt/gh11245_1.phpt @@ -0,0 +1,33 @@ +--TEST-- +GH-11245: In some specific cases SWITCH with one default statement will cause segfault (VAR variation) +--INI-- +opcache.enable=1 +opcache.enable_cli=1 +opcache.optimization_level=0x7FFFBFFF +opcache.opt_debug_level=0x20000 +opcache.preload= +--EXTENSIONS-- +opcache +--FILE-- + +--EXPECTF-- +$_main: + ; (lines=4, args=0, vars=1, tmps=1) + ; (after optimizer) + ; %s +0000 T1 = ISSET_ISEMPTY_CV (empty) CV0($xx) +0001 JMPNZ T1 0003 +0002 RETURN null +0003 RETURN int(1) + +xx: + ; (lines=1, args=0, vars=0, tmps=0) + ; (after optimizer) + ; %s +0000 RETURN string("somegarbage") diff --git a/ext/opcache/tests/opt/gh11245_2.phpt b/ext/opcache/tests/opt/gh11245_2.phpt new file mode 100644 index 0000000000000..8e967bf9f41be --- /dev/null +++ b/ext/opcache/tests/opt/gh11245_2.phpt @@ -0,0 +1,35 @@ +--TEST-- +GH-11245: In some specific cases SWITCH with one default statement will cause segfault (TMP variation) +--INI-- +opcache.enable=1 +opcache.enable_cli=1 +opcache.optimization_level=0x7FFFBFFF +opcache.opt_debug_level=0x20000 +opcache.preload= +--EXTENSIONS-- +opcache +--FILE-- + +--EXPECTF-- +$_main: + ; (lines=7, args=0, vars=1, tmps=2) + ; (after optimizer) + ; %s +0000 T1 = PRE_INC_STATIC_PROP string("prop") string("X") +0001 T2 = ISSET_ISEMPTY_CV (empty) CV0($xx) +0002 JMPZ T2 0005 +0003 FREE T1 +0004 RETURN null +0005 FREE T1 +0006 RETURN int(1) +LIVE RANGES: + 1: 0001 - 0005 (tmp/var) From a2af8ac3710510e7f563f9b6455299dcc7a5459c Mon Sep 17 00:00:00 2001 From: LoongT4o <109949122+LoongT4o@users.noreply.github.com> Date: Tue, 23 May 2023 19:49:19 +0800 Subject: [PATCH 067/168] Fix the JIT buffer relocation failure at the corner case (#11266) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Avoid missing possible candidates due to the large address range of the free segment. Eg,  48000000-49400000 r-xs 08000000 00:0f 39322841               segment1 7ffff2ec8000-7ffff2f49000 rw-p 00000000 00:00 0              segment2 7ffff6fae000-7ffff735c000 r-xp 00200000 08:02 11538515       /usr/local/sbin/php-fpm original code will miss the opportunity between [7ffff2ec** - 7ffff2ec8000]. Fix issue #11265. Signed-off-by: Long, Tao Signed-off-by: Dmitry Stogov --- ext/opcache/shared_alloc_mmap.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/ext/opcache/shared_alloc_mmap.c b/ext/opcache/shared_alloc_mmap.c index 1414ef96149d0..ca492b122157d 100644 --- a/ext/opcache/shared_alloc_mmap.c +++ b/ext/opcache/shared_alloc_mmap.c @@ -67,8 +67,13 @@ static void *find_prefered_mmap_base(size_t requested_size) while (fgets(buffer, MAXPATHLEN, f) && sscanf(buffer, "%lx-%lx", &start, &end) == 2) { if ((uintptr_t)execute_ex >= start) { /* the current segment lays before PHP .text segment or PHP .text segment itself */ + /*Search for candidates at the end of the free segment near the .text segment + to prevent candidates from being missed due to large hole*/ if (last_free_addr + requested_size <= start) { - last_candidate = last_free_addr; + last_candidate = ZEND_MM_ALIGNED_SIZE_EX(start - requested_size, huge_page_size); + if (last_candidate + requested_size > start) { + last_candidate -= huge_page_size; + } } if ((uintptr_t)execute_ex < end) { /* the current segment is PHP .text segment itself */ @@ -117,7 +122,10 @@ static void *find_prefered_mmap_base(size_t requested_size) if ((uintptr_t)execute_ex >= e_start) { /* the current segment lays before PHP .text segment or PHP .text segment itself */ if (last_free_addr + requested_size <= e_start) { - last_candidate = last_free_addr; + last_candidate = ZEND_MM_ALIGNED_SIZE_EX(e_start - requested_size, huge_page_size); + if (last_candidate + requested_size > e_start) { + last_candidate -= huge_page_size; + } } if ((uintptr_t)execute_ex < e_end) { /* the current segment is PHP .text segment itself */ From f9117eb82487a0566c8fee3fda798cc2eb969d4a Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Sat, 20 May 2023 01:19:28 +0200 Subject: [PATCH 068/168] Fix GH-11281: DateTimeZone::getName() does not include seconds in offset If the seconds portion is non-zero, include the seconds in the output. Closes GH-11282. --- NEWS | 4 ++++ ext/date/php_date.c | 18 +++++++++++++++--- ext/date/tests/bug81097.phpt | 2 +- ext/date/tests/bug81565.phpt | 2 +- ext/date/tests/gh11281.phpt | 33 +++++++++++++++++++++++++++++++++ 5 files changed, 54 insertions(+), 5 deletions(-) create mode 100644 ext/date/tests/gh11281.phpt diff --git a/NEWS b/NEWS index 2e714c631ba7e..ff7857f62e9bd 100644 --- a/NEWS +++ b/NEWS @@ -10,6 +10,10 @@ PHP NEWS . Fixed bug GH-11222 (foreach by-ref may jump over keys during a rehash). (Bob) +- Date: + . Fixed bug GH-11281 (DateTimeZone::getName() does not include seconds in + offset). (nielsdos) + - Exif: . Fixed bug GH-10834 (exif_read_data() cannot read smaller stream wrapper chunk sizes). (nielsdos) diff --git a/ext/date/php_date.c b/ext/date/php_date.c index a0625c96c9826..92ebd4f9c688b 100644 --- a/ext/date/php_date.c +++ b/ext/date/php_date.c @@ -1957,13 +1957,25 @@ static void php_timezone_to_string(php_timezone_obj *tzobj, zval *zv) ZVAL_STRING(zv, tzobj->tzi.tz->name); break; case TIMELIB_ZONETYPE_OFFSET: { - zend_string *tmpstr = zend_string_alloc(sizeof("UTC+05:00")-1, 0); timelib_sll utc_offset = tzobj->tzi.utc_offset; + int seconds = utc_offset % 60; + size_t size; + const char *format; + if (seconds == 0) { + size = sizeof("+05:00"); + format = "%c%02d:%02d"; + } else { + size = sizeof("+05:00:01"); + format = "%c%02d:%02d:%02d"; + } + zend_string *tmpstr = zend_string_alloc(size - 1, 0); - ZSTR_LEN(tmpstr) = snprintf(ZSTR_VAL(tmpstr), sizeof("+05:00"), "%c%02d:%02d", + /* Note: if seconds == 0, the seconds argument will be excessive and therefore ignored. */ + ZSTR_LEN(tmpstr) = snprintf(ZSTR_VAL(tmpstr), size, format, utc_offset < 0 ? '-' : '+', abs((int)(utc_offset / 3600)), - abs((int)(utc_offset % 3600) / 60)); + abs((int)(utc_offset % 3600) / 60), + abs(seconds)); ZVAL_NEW_STR(zv, tmpstr); } diff --git a/ext/date/tests/bug81097.phpt b/ext/date/tests/bug81097.phpt index 2cfd7e00a9dd4..7a3baf06a6389 100644 --- a/ext/date/tests/bug81097.phpt +++ b/ext/date/tests/bug81097.phpt @@ -10,5 +10,5 @@ object(DateTimeZone)#%d (%d) { ["timezone_type"]=> int(1) ["timezone"]=> - string(6) "+01:45" + string(9) "+01:45:30" } diff --git a/ext/date/tests/bug81565.phpt b/ext/date/tests/bug81565.phpt index 34f8d869fa32a..b7392540b8f68 100644 --- a/ext/date/tests/bug81565.phpt +++ b/ext/date/tests/bug81565.phpt @@ -17,4 +17,4 @@ DateTime::__set_state(array( 'timezone_type' => 1, 'timezone' => '+00:49', )) -+01:45 ++01:45:30 diff --git a/ext/date/tests/gh11281.phpt b/ext/date/tests/gh11281.phpt new file mode 100644 index 0000000000000..be1fe30b88c9d --- /dev/null +++ b/ext/date/tests/gh11281.phpt @@ -0,0 +1,33 @@ +--TEST-- +GH-11281 (DateTimeZone::getName() does not include seconds in offset) +--FILE-- +getName(), "\n"; +$tz = new DateTimeZone('+03:00:00'); +echo $tz->getName(), "\n"; +$tz = new DateTimeZone('-03:00:00'); +echo $tz->getName(), "\n"; +$tz = new DateTimeZone('+03:00:01'); +echo $tz->getName(), "\n"; +$tz = new DateTimeZone('-03:00:01'); +echo $tz->getName(), "\n"; +$tz = new DateTimeZone('+03:00:58'); +echo $tz->getName(), "\n"; +$tz = new DateTimeZone('-03:00:58'); +echo $tz->getName(), "\n"; +$tz = new DateTimeZone('+03:00:59'); +echo $tz->getName(), "\n"; +$tz = new DateTimeZone('-03:00:59'); +echo $tz->getName(), "\n"; +?> +--EXPECT-- ++03:00 ++03:00 +-03:00 ++03:00:01 +-03:00:01 ++03:00:58 +-03:00:58 ++03:00:59 +-03:00:59 From d5f68b50fc451ab0d8ef9bdef814fb93d84c554a Mon Sep 17 00:00:00 2001 From: Pierrick Charron Date: Tue, 23 May 2023 16:56:58 -0400 Subject: [PATCH 069/168] PHP-8.2 is now for PHP 8.2.8-dev --- NEWS | 5 ++++- Zend/zend.h | 2 +- configure.ac | 2 +- main/php_version.h | 6 +++--- 4 files changed, 9 insertions(+), 6 deletions(-) diff --git a/NEWS b/NEWS index 14ac06359aef7..89bf5c1dda162 100644 --- a/NEWS +++ b/NEWS @@ -1,6 +1,9 @@ PHP NEWS ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| -?? ??? ????, PHP 8.2.7 +?? ??? ????, PHP 8.2.8 + + +01 Jun 2023, PHP 8.2.7 - Core: . Fixed bug GH-11152 (Unable to alias namespaces containing reserved class diff --git a/Zend/zend.h b/Zend/zend.h index cd10119651ba8..a0f87c6c9dba8 100644 --- a/Zend/zend.h +++ b/Zend/zend.h @@ -20,7 +20,7 @@ #ifndef ZEND_H #define ZEND_H -#define ZEND_VERSION "4.2.7-dev" +#define ZEND_VERSION "4.2.8-dev" #define ZEND_ENGINE_3 diff --git a/configure.ac b/configure.ac index e95a37fd428e0..59b92b958ac3b 100644 --- a/configure.ac +++ b/configure.ac @@ -17,7 +17,7 @@ dnl Basic autoconf initialization, generation of config.nice. dnl ---------------------------------------------------------------------------- AC_PREREQ([2.68]) -AC_INIT([PHP],[8.2.7-dev],[https://github.com/php/php-src/issues],[php],[https://www.php.net]) +AC_INIT([PHP],[8.2.8-dev],[https://github.com/php/php-src/issues],[php],[https://www.php.net]) AC_CONFIG_SRCDIR([main/php_version.h]) AC_CONFIG_AUX_DIR([build]) AC_PRESERVE_HELP_ORDER diff --git a/main/php_version.h b/main/php_version.h index f24a3aa56bcde..80517df950fb5 100644 --- a/main/php_version.h +++ b/main/php_version.h @@ -2,7 +2,7 @@ /* edit configure.ac to change version number */ #define PHP_MAJOR_VERSION 8 #define PHP_MINOR_VERSION 2 -#define PHP_RELEASE_VERSION 7 +#define PHP_RELEASE_VERSION 8 #define PHP_EXTRA_VERSION "-dev" -#define PHP_VERSION "8.2.7-dev" -#define PHP_VERSION_ID 80207 +#define PHP_VERSION "8.2.8-dev" +#define PHP_VERSION_ID 80208 From 2f2fd06be0d99feac2370412d1894317a817dfa9 Mon Sep 17 00:00:00 2001 From: Ben Ramsey Date: Tue, 23 May 2023 16:19:16 -0500 Subject: [PATCH 070/168] PHP-8.1 is now for PHP 8.1.21-dev --- NEWS | 6 +++++- Zend/zend.h | 2 +- configure.ac | 2 +- main/php_version.h | 6 +++--- 4 files changed, 10 insertions(+), 6 deletions(-) diff --git a/NEWS b/NEWS index ff7857f62e9bd..4e4935cbfdaa5 100644 --- a/NEWS +++ b/NEWS @@ -1,6 +1,10 @@ PHP NEWS ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| -?? ??? ????, PHP 8.1.20 +?? ??? ????, PHP 8.1.21 + + + +08 Jun 2023, PHP 8.1.20 - Core: . Fixed bug GH-9068 (Conditional jump or move depends on uninitialised diff --git a/Zend/zend.h b/Zend/zend.h index 5c3c1de49d5ce..e3b67150b7817 100644 --- a/Zend/zend.h +++ b/Zend/zend.h @@ -20,7 +20,7 @@ #ifndef ZEND_H #define ZEND_H -#define ZEND_VERSION "4.1.20-dev" +#define ZEND_VERSION "4.1.21-dev" #define ZEND_ENGINE_3 diff --git a/configure.ac b/configure.ac index 166d313f133bc..661df89a03de5 100644 --- a/configure.ac +++ b/configure.ac @@ -17,7 +17,7 @@ dnl Basic autoconf initialization, generation of config.nice. dnl ---------------------------------------------------------------------------- AC_PREREQ([2.68]) -AC_INIT([PHP],[8.1.20-dev],[https://github.com/php/php-src/issues],[php],[https://www.php.net]) +AC_INIT([PHP],[8.1.21-dev],[https://github.com/php/php-src/issues],[php],[https://www.php.net]) AC_CONFIG_SRCDIR([main/php_version.h]) AC_CONFIG_AUX_DIR([build]) AC_PRESERVE_HELP_ORDER diff --git a/main/php_version.h b/main/php_version.h index 8b01d87fbacc1..0cf267f2e80b5 100644 --- a/main/php_version.h +++ b/main/php_version.h @@ -2,7 +2,7 @@ /* edit configure.ac to change version number */ #define PHP_MAJOR_VERSION 8 #define PHP_MINOR_VERSION 1 -#define PHP_RELEASE_VERSION 20 +#define PHP_RELEASE_VERSION 21 #define PHP_EXTRA_VERSION "-dev" -#define PHP_VERSION "8.1.20-dev" -#define PHP_VERSION_ID 80120 +#define PHP_VERSION "8.1.21-dev" +#define PHP_VERSION_ID 80121 From 2eee46e98977f1e5b8e45ebb70f6e9cdda14b90a Mon Sep 17 00:00:00 2001 From: Mikhail Galanin <195510+negram@users.noreply.github.com> Date: Wed, 24 May 2023 11:05:09 +0100 Subject: [PATCH 071/168] ext/session: pass ini options to extra processes in tests (#11294) --- ext/session/tests/session_regenerate_id_cookie.phpt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ext/session/tests/session_regenerate_id_cookie.phpt b/ext/session/tests/session_regenerate_id_cookie.phpt index a61e24d2a04d4..f2375b253ed56 100644 --- a/ext/session/tests/session_regenerate_id_cookie.phpt +++ b/ext/session/tests/session_regenerate_id_cookie.phpt @@ -52,7 +52,8 @@ var_dump(session_destroy()); ob_end_flush(); ?>'); -var_dump(`$php -n -d session.name=PHPSESSID $file`); +$extra_arguments = getenv('TEST_PHP_EXTRA_ARGS'); +var_dump(`$php $extra_arguments -d session.name=PHPSESSID $file`); unlink($file); From f5c54fd88b3694c601e58789196d46c29ebadd08 Mon Sep 17 00:00:00 2001 From: Ilija Tovilo Date: Tue, 23 May 2023 12:10:27 +0200 Subject: [PATCH 072/168] Fix access on NULL pointer in array_merge_recursive() Closes GH-11303 --- NEWS | 3 ++- ...ray_merge_recursive_next_key_overflow.phpt | 25 +++++++++++++++++++ Zend/zend_execute.c | 2 +- Zend/zend_execute.h | 2 ++ ext/standard/array.c | 11 +++++++- 5 files changed, 40 insertions(+), 3 deletions(-) create mode 100644 Zend/tests/array_merge_recursive_next_key_overflow.phpt diff --git a/NEWS b/NEWS index 4e4935cbfdaa5..553d8acca6811 100644 --- a/NEWS +++ b/NEWS @@ -2,7 +2,8 @@ PHP NEWS ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| ?? ??? ????, PHP 8.1.21 - +- Standard: + . Fix access on NULL pointer in array_merge_recursive(). (ilutov) 08 Jun 2023, PHP 8.1.20 diff --git a/Zend/tests/array_merge_recursive_next_key_overflow.phpt b/Zend/tests/array_merge_recursive_next_key_overflow.phpt new file mode 100644 index 0000000000000..f7d2872957837 --- /dev/null +++ b/Zend/tests/array_merge_recursive_next_key_overflow.phpt @@ -0,0 +1,25 @@ +--TEST-- +Access on NULL pointer in array_merge_recursive() +--FILE-- + [PHP_INT_MAX => null]], + ['' => [null]], + ); +} catch (Throwable $e) { + echo $e->getMessage(), "\n"; +} + +try { + array_merge_recursive( + ['foo' => [PHP_INT_MAX => null]], + ['foo' => str_repeat('a', 2)], + ); +} catch (Throwable $e) { + echo $e->getMessage(), "\n"; +} +?> +--EXPECT-- +Cannot add element to the array as the next element is already occupied +Cannot add element to the array as the next element is already occupied diff --git a/Zend/zend_execute.c b/Zend/zend_execute.c index ad4396e5d9e07..5c9a59bb953bf 100644 --- a/Zend/zend_execute.c +++ b/Zend/zend_execute.c @@ -2231,7 +2231,7 @@ static zend_never_inline ZEND_COLD void ZEND_FASTCALL zend_use_scalar_as_array(v zend_throw_error(NULL, "Cannot use a scalar value as an array"); } -static zend_never_inline ZEND_COLD void ZEND_FASTCALL zend_cannot_add_element(void) +ZEND_API zend_never_inline ZEND_COLD void ZEND_FASTCALL zend_cannot_add_element(void) { zend_throw_error(NULL, "Cannot add element to the array as the next element is already occupied"); } diff --git a/Zend/zend_execute.h b/Zend/zend_execute.h index bf7b2afdf88bc..dba57320108e0 100644 --- a/Zend/zend_execute.h +++ b/Zend/zend_execute.h @@ -76,6 +76,8 @@ ZEND_API ZEND_COLD void zend_wrong_string_offset_error(void); ZEND_API ZEND_COLD void ZEND_FASTCALL zend_readonly_property_modification_error(zend_property_info *info); ZEND_API ZEND_COLD void ZEND_FASTCALL zend_readonly_property_indirect_modification_error(zend_property_info *info); +ZEND_API ZEND_COLD void ZEND_FASTCALL zend_cannot_add_element(void); + ZEND_API bool zend_verify_scalar_type_hint(uint32_t type_mask, zval *arg, bool strict, bool is_internal_arg); ZEND_API ZEND_COLD void zend_verify_arg_error( const zend_function *zf, const zend_arg_info *arg_info, uint32_t arg_num, zval *value); diff --git a/ext/standard/array.c b/ext/standard/array.c index fb705cd34c4e8..bfd4f95d4137f 100644 --- a/ext/standard/array.c +++ b/ext/standard/array.c @@ -3595,7 +3595,12 @@ PHPAPI int php_array_merge_recursive(HashTable *dest, HashTable *src) /* {{{ */ } } else { Z_TRY_ADDREF_P(src_zval); - zend_hash_next_index_insert(Z_ARRVAL_P(dest_zval), src_zval); + zval *zv = zend_hash_next_index_insert(Z_ARRVAL_P(dest_zval), src_zval); + if (EXPECTED(!zv)) { + Z_TRY_DELREF_P(src_zval); + zend_cannot_add_element(); + return 0; + } } zval_ptr_dtor(&tmp); } else { @@ -3604,6 +3609,10 @@ PHPAPI int php_array_merge_recursive(HashTable *dest, HashTable *src) /* {{{ */ } } else { zval *zv = zend_hash_next_index_insert(dest, src_entry); + if (UNEXPECTED(!zv)) { + zend_cannot_add_element(); + return 0; + } zval_add_ref(zv); } } ZEND_HASH_FOREACH_END(); From 7c7698f754ea19435e5b0d2bcf096e446e0bb828 Mon Sep 17 00:00:00 2001 From: Ilija Tovilo Date: Tue, 23 May 2023 13:55:50 +0200 Subject: [PATCH 073/168] Fix preg_replace_callback_array() pattern validation Closes GH-11301 --- NEWS | 3 +++ ext/pcre/php_pcre.c | 4 ++++ ...eplace_callback_array_numeric_index_error.phpt | 15 +++++++++++++++ 3 files changed, 22 insertions(+) create mode 100644 ext/pcre/tests/preg_replace_callback_array_numeric_index_error.phpt diff --git a/NEWS b/NEWS index f352246fb79e7..10cda5af84c63 100644 --- a/NEWS +++ b/NEWS @@ -2,6 +2,9 @@ PHP NEWS ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| ?? ??? ????, PHP 8.2.8 +- PCRE: + . Fix preg_replace_callback_array() pattern validation. (ilutov) + - Standard: . Fix access on NULL pointer in array_merge_recursive(). (ilutov) diff --git a/ext/pcre/php_pcre.c b/ext/pcre/php_pcre.c index 82560ddd83a61..ea5e6a01ff065 100644 --- a/ext/pcre/php_pcre.c +++ b/ext/pcre/php_pcre.c @@ -2425,6 +2425,10 @@ PHP_FUNCTION(preg_replace_callback_array) zend_argument_type_error(1, "must contain only valid callbacks"); goto error; } + if (!str_idx_regex) { + zend_argument_type_error(1, "must contain only string patterns as keys"); + goto error; + } ZVAL_COPY_VALUE(&fci.function_name, replace); diff --git a/ext/pcre/tests/preg_replace_callback_array_numeric_index_error.phpt b/ext/pcre/tests/preg_replace_callback_array_numeric_index_error.phpt new file mode 100644 index 0000000000000..55dfabce8649c --- /dev/null +++ b/ext/pcre/tests/preg_replace_callback_array_numeric_index_error.phpt @@ -0,0 +1,15 @@ +--TEST-- +preg_replace_callback_array() invalid pattern +--FILE-- + function () {}], + 'a', +); +?> +--EXPECTF-- +Fatal error: Uncaught TypeError: preg_replace_callback_array(): Argument #1 ($pattern) must contain only string patterns as keys in %s:%d +Stack trace: +#0 %s(%d): preg_replace_callback_array(Array, 'a') +#1 {main} + thrown in %s on line %d From b2ec6c24f8b5d8a2767d1fc2557424bf68608b47 Mon Sep 17 00:00:00 2001 From: Ilija Tovilo Date: Tue, 23 May 2023 09:54:54 +0200 Subject: [PATCH 074/168] Fix exception handling in array_multisort() Closes GH-11302 --- NEWS | 1 + Zend/tests/array_multisort_exception.phpt | 13 +++++++++++++ ext/standard/array.c | 7 +++++-- 3 files changed, 19 insertions(+), 2 deletions(-) create mode 100644 Zend/tests/array_multisort_exception.phpt diff --git a/NEWS b/NEWS index 553d8acca6811..d529159d54fe6 100644 --- a/NEWS +++ b/NEWS @@ -4,6 +4,7 @@ PHP NEWS - Standard: . Fix access on NULL pointer in array_merge_recursive(). (ilutov) + . Fix exception handling in array_multisort(). (ilutov) 08 Jun 2023, PHP 8.1.20 diff --git a/Zend/tests/array_multisort_exception.phpt b/Zend/tests/array_multisort_exception.phpt new file mode 100644 index 0000000000000..8ee6007745e03 --- /dev/null +++ b/Zend/tests/array_multisort_exception.phpt @@ -0,0 +1,13 @@ +--TEST-- +Exception handling in array_multisort() +--FILE-- + new DateTime(), 0 => new DateTime()]; +array_multisort($array, SORT_STRING); +?> +--EXPECTF-- +Fatal error: Uncaught Error: Object of class DateTime could not be converted to string in %s:%d +Stack trace: +#0 %s(%d): array_multisort(Array, 2) +#1 {main} + thrown in %s on line %d diff --git a/ext/standard/array.c b/ext/standard/array.c index bfd4f95d4137f..86d70a8844e30 100644 --- a/ext/standard/array.c +++ b/ext/standard/array.c @@ -5598,6 +5598,9 @@ PHP_FUNCTION(array_multisort) /* Do the actual sort magic - bada-bim, bada-boom. */ zend_sort(indirect, array_size, sizeof(Bucket *), php_multisort_compare, (swap_func_t)array_bucket_p_sawp); + if (EG(exception)) { + goto clean_up; + } /* Restructure the arrays based on sorted indirect - this is mostly taken from zend_hash_sort() function. */ for (i = 0; i < num_arrays; i++) { @@ -5623,15 +5626,15 @@ PHP_FUNCTION(array_multisort) zend_hash_rehash(hash); } } + RETVAL_TRUE; - /* Clean up. */ +clean_up: for (i = 0; i < array_size; i++) { efree(indirect[i]); } efree(indirect); efree(func); efree(arrays); - RETURN_TRUE; } /* }}} */ From 1c733c8bbc295dbb0634371cc40952c1528f9038 Mon Sep 17 00:00:00 2001 From: Ilija Tovilo Date: Wed, 24 May 2023 16:35:37 +0200 Subject: [PATCH 075/168] Use zend_ast_apply in zend_eval_const_expr (#11261) Supporting new constant expressions requires remembering to add them to zend_eval_const_expr, even if it only evalutes its children. This is routinely forgotten, at least by me. Use zend_ast_apply to solve this generically. --- Zend/zend_compile.c | 70 ++++++++++++++++----------------------------- 1 file changed, 24 insertions(+), 46 deletions(-) diff --git a/Zend/zend_compile.c b/Zend/zend_compile.c index bb12e55848b2f..6b6948b896eb8 100644 --- a/Zend/zend_compile.c +++ b/Zend/zend_compile.c @@ -10557,7 +10557,7 @@ static zend_op *zend_delayed_compile_var(znode *result, zend_ast *ast, uint32_t } /* }}} */ -static void zend_eval_const_expr(zend_ast **ast_ptr) /* {{{ */ +static void zend_eval_const_expr_inner(zend_ast **ast_ptr, void *ctx) /* {{{ */ { zend_ast *ast = *ast_ptr; zval result; @@ -10566,10 +10566,25 @@ static void zend_eval_const_expr(zend_ast **ast_ptr) /* {{{ */ return; } + /* Set isset fetch indicator here, opcache disallows runtime altering of the AST */ + if (ast->kind == ZEND_AST_DIM + && (ast->attr & ZEND_DIM_IS) + && ast->child[0]->kind == ZEND_AST_DIM) { + ast->child[0]->attr |= ZEND_DIM_IS; + } + + /* We don't want to evaluate the class name of ZEND_AST_CLASS_NAME nodes. We need to be able to + * differenciate between literal class names and expressions that evaluate to strings. Strings + * are not actually allowed in ::class expressions. + * + * ZEND_AST_COALESCE and ZEND_AST_CONDITIONAL will manually evaluate only the children for the + * taken paths. */ + if (ast->kind != ZEND_AST_CLASS_NAME && ast->kind != ZEND_AST_COALESCE && ast->kind != ZEND_AST_CONDITIONAL) { + zend_ast_apply(ast, zend_eval_const_expr_inner, ctx); + } + switch (ast->kind) { case ZEND_AST_BINARY_OP: - zend_eval_const_expr(&ast->child[0]); - zend_eval_const_expr(&ast->child[1]); if (ast->child[0]->kind != ZEND_AST_ZVAL || ast->child[1]->kind != ZEND_AST_ZVAL) { return; } @@ -10582,8 +10597,6 @@ static void zend_eval_const_expr(zend_ast **ast_ptr) /* {{{ */ break; case ZEND_AST_GREATER: case ZEND_AST_GREATER_EQUAL: - zend_eval_const_expr(&ast->child[0]); - zend_eval_const_expr(&ast->child[1]); if (ast->child[0]->kind != ZEND_AST_ZVAL || ast->child[1]->kind != ZEND_AST_ZVAL) { return; } @@ -10595,8 +10608,6 @@ static void zend_eval_const_expr(zend_ast **ast_ptr) /* {{{ */ case ZEND_AST_OR: { bool child0_is_true, child1_is_true; - zend_eval_const_expr(&ast->child[0]); - zend_eval_const_expr(&ast->child[1]); if (ast->child[0]->kind != ZEND_AST_ZVAL) { return; } @@ -10620,7 +10631,6 @@ static void zend_eval_const_expr(zend_ast **ast_ptr) /* {{{ */ break; } case ZEND_AST_UNARY_OP: - zend_eval_const_expr(&ast->child[0]); if (ast->child[0]->kind != ZEND_AST_ZVAL) { return; } @@ -10631,7 +10641,6 @@ static void zend_eval_const_expr(zend_ast **ast_ptr) /* {{{ */ break; case ZEND_AST_UNARY_PLUS: case ZEND_AST_UNARY_MINUS: - zend_eval_const_expr(&ast->child[0]); if (ast->child[0]->kind != ZEND_AST_ZVAL) { return; } @@ -10702,13 +10711,6 @@ static void zend_eval_const_expr(zend_ast **ast_ptr) /* {{{ */ zend_error(E_COMPILE_ERROR, "Array and string offset access syntax with curly braces is no longer supported"); } - /* Set isset fetch indicator here, opcache disallows runtime altering of the AST */ - if ((ast->attr & ZEND_DIM_IS) && ast->child[0]->kind == ZEND_AST_DIM) { - ast->child[0]->attr |= ZEND_DIM_IS; - } - - zend_eval_const_expr(&ast->child[0]); - zend_eval_const_expr(&ast->child[1]); if (ast->child[0]->kind != ZEND_AST_ZVAL || ast->child[1]->kind != ZEND_AST_ZVAL) { return; } @@ -10786,9 +10788,6 @@ static void zend_eval_const_expr(zend_ast **ast_ptr) /* {{{ */ zend_ast *name_ast; zend_string *resolved_name; - zend_eval_const_expr(&ast->child[0]); - zend_eval_const_expr(&ast->child[1]); - if (UNEXPECTED(ast->child[1]->kind != ZEND_AST_ZVAL || Z_TYPE_P(zend_ast_get_zval(ast->child[1])) != IS_STRING)) { return; @@ -10818,33 +10817,6 @@ static void zend_eval_const_expr(zend_ast **ast_ptr) /* {{{ */ } break; } - // TODO: We should probably use zend_ast_apply to recursively walk nodes without - // special handling. It is required that all nodes that are part of a const expr - // are visited. Probably we should be distinguishing evaluation of const expr and - // normal exprs here. - case ZEND_AST_ARG_LIST: - { - zend_ast_list *list = zend_ast_get_list(ast); - for (uint32_t i = 0; i < list->children; i++) { - zend_eval_const_expr(&list->child[i]); - } - return; - } - case ZEND_AST_NEW: - zend_eval_const_expr(&ast->child[0]); - zend_eval_const_expr(&ast->child[1]); - return; - case ZEND_AST_NAMED_ARG: - zend_eval_const_expr(&ast->child[1]); - return; - case ZEND_AST_CONST_ENUM_INIT: - zend_eval_const_expr(&ast->child[2]); - return; - case ZEND_AST_PROP: - case ZEND_AST_NULLSAFE_PROP: - zend_eval_const_expr(&ast->child[0]); - zend_eval_const_expr(&ast->child[1]); - return; default: return; } @@ -10853,3 +10825,9 @@ static void zend_eval_const_expr(zend_ast **ast_ptr) /* {{{ */ *ast_ptr = zend_ast_create_zval(&result); } /* }}} */ + + +static void zend_eval_const_expr(zend_ast **ast_ptr) /* {{{ */ +{ + zend_eval_const_expr_inner(ast_ptr, NULL); +} From 6267601f84981577d2b9faaa033b776dddfbfb75 Mon Sep 17 00:00:00 2001 From: nielsdos <7771979+nielsdos@users.noreply.github.com> Date: Tue, 23 May 2023 19:50:09 +0200 Subject: [PATCH 076/168] Fix allocation loop in zend_shared_alloc_startup() The break is outside the if, so if it succeeds or not this will always stop after the first loop iteration instead of trying more allocators if the first one fails. Closes GH-11306. --- NEWS | 3 +++ ext/opcache/zend_shared_alloc.c | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/NEWS b/NEWS index d529159d54fe6..332bd89835b34 100644 --- a/NEWS +++ b/NEWS @@ -2,6 +2,9 @@ PHP NEWS ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| ?? ??? ????, PHP 8.1.21 +- Opcache: + . Fix allocation loop in zend_shared_alloc_startup(). (nielsdos) + - Standard: . Fix access on NULL pointer in array_merge_recursive(). (ilutov) . Fix exception handling in array_multisort(). (ilutov) diff --git a/ext/opcache/zend_shared_alloc.c b/ext/opcache/zend_shared_alloc.c index 53c7b61ff3f36..be931f526c1b1 100644 --- a/ext/opcache/zend_shared_alloc.c +++ b/ext/opcache/zend_shared_alloc.c @@ -179,8 +179,8 @@ int zend_shared_alloc_startup(size_t requested_size, size_t reserved_size) res = zend_shared_alloc_try(he, requested_size, &ZSMMG(shared_segments), &ZSMMG(shared_segments_count), &error_in); if (res) { /* this model works! */ + break; } - break; } } } From 0b1d750d91f75cf62f6ba1efb6df816de71ff81c Mon Sep 17 00:00:00 2001 From: Ilija Tovilo Date: Thu, 11 Aug 2022 17:42:54 +0200 Subject: [PATCH 077/168] Allow arbitrary expressions in static variable initializer Closes GH-9301 --- UPGRADING | 2 + Zend/Optimizer/block_pass.c | 1 + Zend/Optimizer/dce.c | 13 +- Zend/Optimizer/dfa_pass.c | 1 + Zend/Optimizer/pass1.c | 1 + Zend/Optimizer/sccp.c | 2 + Zend/Optimizer/zend_cfg.c | 2 + Zend/Optimizer/zend_dfg.c | 1 + Zend/Optimizer/zend_inference.c | 12 +- Zend/Optimizer/zend_optimizer.c | 4 + Zend/Optimizer/zend_ssa.c | 1 + Zend/tests/035.phpt | 2 +- Zend/tests/bug79778.phpt | 51 +- Zend/tests/constexpr/new_anon_class.phpt | 9 +- Zend/tests/constexpr/new_arg_unpack.phpt | 9 +- .../constexpr/new_dynamic_class_name.phpt | 9 +- .../new_invalid_operation_in_arg.phpt | 13 +- Zend/tests/constexpr/new_static.phpt | 18 +- Zend/tests/static_variable_func_call.phpt | 23 + Zend/tests/static_variables_closure_bind.phpt | 14 + Zend/tests/static_variables_destructor.phpt | 36 + Zend/tests/static_variables_global.phpt | 22 + Zend/tests/static_variables_global_2.phpt | 26 + Zend/tests/static_variables_recursive.phpt | 29 + Zend/zend_closures.c | 10 +- Zend/zend_compile.c | 54 +- Zend/zend_opcode.c | 1 + Zend/zend_types.h | 8 + Zend/zend_vm_def.h | 47 +- Zend/zend_vm_execute.h | 337 ++++--- Zend/zend_vm_handlers.h | 919 +++++++++--------- Zend/zend_vm_opcodes.c | 8 +- Zend/zend_vm_opcodes.h | 3 +- ext/opcache/jit/zend_jit.c | 3 + ext/opcache/jit/zend_jit_trace.c | 1 + ext/opcache/tests/optimize_static_002.phpt | 22 + ext/opcache/tests/optimize_static_003.phpt | 24 + ext/opcache/zend_file_cache.c | 2 + ext/opcache/zend_persist.c | 1 + ext/reflection/php_reflection.c | 6 - ..._getStaticVariables_basic_extra_bleed.phpt | 31 + ext/reflection/tests/new_in_constexpr.phpt | 7 + sapi/phpdbg/phpdbg.c | 2 +- tests/lang/static_basic_002.phpt | 21 +- 44 files changed, 1118 insertions(+), 690 deletions(-) create mode 100644 Zend/tests/static_variable_func_call.phpt create mode 100644 Zend/tests/static_variables_closure_bind.phpt create mode 100644 Zend/tests/static_variables_destructor.phpt create mode 100644 Zend/tests/static_variables_global.phpt create mode 100644 Zend/tests/static_variables_global_2.phpt create mode 100644 Zend/tests/static_variables_recursive.phpt create mode 100644 ext/opcache/tests/optimize_static_002.phpt create mode 100644 ext/opcache/tests/optimize_static_003.phpt create mode 100644 ext/reflection/tests/ReflectionMethod_getStaticVariables_basic_extra_bleed.phpt diff --git a/UPGRADING b/UPGRADING index b7c70c34a96d1..79f86cb148d6f 100644 --- a/UPGRADING +++ b/UPGRADING @@ -41,6 +41,8 @@ PHP 8.3 UPGRADE NOTES property to the class directly without traits. . Assigning a negative index n to an empty array will now make sure that the next index is n+1 instead of 0. + . Static variable initializers can now contain arbitrary expressions. + RFC: https://wiki.php.net/rfc/arbitrary_static_variable_initializers - FFI: . C functions that have a return type of void now return null instead of diff --git a/Zend/Optimizer/block_pass.c b/Zend/Optimizer/block_pass.c index f2d3fb8419e7a..ccb32e2e453d4 100644 --- a/Zend/Optimizer/block_pass.c +++ b/Zend/Optimizer/block_pass.c @@ -1010,6 +1010,7 @@ static void assemble_code_blocks(zend_cfg *cfg, zend_op_array *op_array, zend_op case ZEND_COALESCE: case ZEND_ASSERT_CHECK: case ZEND_JMP_NULL: + case ZEND_BIND_INIT_STATIC_OR_JMP: ZEND_SET_OP_JMP_ADDR(opline, opline->op2, new_opcodes + blocks[b->successors[0]].start); break; case ZEND_CATCH: diff --git a/Zend/Optimizer/dce.c b/Zend/Optimizer/dce.c index 15c9cf5e6e22a..219b139cfc874 100644 --- a/Zend/Optimizer/dce.c +++ b/Zend/Optimizer/dce.c @@ -145,6 +145,7 @@ static inline bool may_have_side_effects( case ZEND_COALESCE: case ZEND_ASSERT_CHECK: case ZEND_JMP_NULL: + case ZEND_BIND_INIT_STATIC_OR_JMP: /* For our purposes a jumps and branches are side effects. */ return 1; case ZEND_BEGIN_SILENCE: @@ -245,15 +246,9 @@ static inline bool may_have_side_effects( if ((opline->extended_value & (ZEND_BIND_IMPLICIT|ZEND_BIND_EXPLICIT))) { return 1; } - - if ((opline->extended_value & ZEND_BIND_REF) != 0) { - zval *value = - (zval*)((char*)op_array->static_variables->arData + - (opline->extended_value & ~ZEND_BIND_REF)); - if (Z_TYPE_P(value) == IS_CONSTANT_AST) { - /* AST may contain undefined constants */ - return 1; - } + /* Modifies static variables which are observable through reflection */ + if ((opline->extended_value & ZEND_BIND_REF) && opline->op2_type != IS_UNUSED) { + return 1; } } return 0; diff --git a/Zend/Optimizer/dfa_pass.c b/Zend/Optimizer/dfa_pass.c index 3ed507c35b070..b1f568da5d920 100644 --- a/Zend/Optimizer/dfa_pass.c +++ b/Zend/Optimizer/dfa_pass.c @@ -652,6 +652,7 @@ static void zend_ssa_replace_control_link(zend_op_array *op_array, zend_ssa *ssa case ZEND_COALESCE: case ZEND_ASSERT_CHECK: case ZEND_JMP_NULL: + case ZEND_BIND_INIT_STATIC_OR_JMP: if (ZEND_OP2_JMP_ADDR(opline) == op_array->opcodes + old->start) { ZEND_SET_OP_JMP_ADDR(opline, opline->op2, op_array->opcodes + dst->start); } diff --git a/Zend/Optimizer/pass1.c b/Zend/Optimizer/pass1.c index 00bc30160ab7b..818829fcdf6ba 100644 --- a/Zend/Optimizer/pass1.c +++ b/Zend/Optimizer/pass1.c @@ -354,6 +354,7 @@ void zend_optimizer_pass1(zend_op_array *op_array, zend_optimizer_ctx *ctx) case ZEND_ASSERT_CHECK: case ZEND_JMP_NULL: case ZEND_VERIFY_NEVER_TYPE: + case ZEND_BIND_INIT_STATIC_OR_JMP: collect_constants = 0; break; } diff --git a/Zend/Optimizer/sccp.c b/Zend/Optimizer/sccp.c index f6144f87b4fac..c35c60fd6be93 100644 --- a/Zend/Optimizer/sccp.c +++ b/Zend/Optimizer/sccp.c @@ -249,6 +249,7 @@ static bool can_replace_op1( case ZEND_ROPE_ADD: case ZEND_ROPE_END: case ZEND_BIND_STATIC: + case ZEND_BIND_INIT_STATIC_OR_JMP: case ZEND_BIND_GLOBAL: case ZEND_MAKE_REF: case ZEND_UNSET_CV: @@ -1773,6 +1774,7 @@ static void sccp_mark_feasible_successors( case ZEND_CATCH: case ZEND_FE_FETCH_R: case ZEND_FE_FETCH_RW: + case ZEND_BIND_INIT_STATIC_OR_JMP: scdf_mark_edge_feasible(scdf, block_num, block->successors[0]); scdf_mark_edge_feasible(scdf, block_num, block->successors[1]); return; diff --git a/Zend/Optimizer/zend_cfg.c b/Zend/Optimizer/zend_cfg.c index 219738e6f692b..ce7d078bb957e 100644 --- a/Zend/Optimizer/zend_cfg.c +++ b/Zend/Optimizer/zend_cfg.c @@ -369,6 +369,7 @@ ZEND_API void zend_build_cfg(zend_arena **arena, const zend_op_array *op_array, case ZEND_COALESCE: case ZEND_ASSERT_CHECK: case ZEND_JMP_NULL: + case ZEND_BIND_INIT_STATIC_OR_JMP: BB_START(OP_JMP_ADDR(opline, opline->op2) - op_array->opcodes); BB_START(i + 1); break; @@ -522,6 +523,7 @@ ZEND_API void zend_build_cfg(zend_arena **arena, const zend_op_array *op_array, case ZEND_COALESCE: case ZEND_ASSERT_CHECK: case ZEND_JMP_NULL: + case ZEND_BIND_INIT_STATIC_OR_JMP: block->successors_count = 2; block->successors[0] = block_map[OP_JMP_ADDR(opline, opline->op2) - op_array->opcodes]; block->successors[1] = j + 1; diff --git a/Zend/Optimizer/zend_dfg.c b/Zend/Optimizer/zend_dfg.c index 2207b594b85a5..93faec6a4d86d 100644 --- a/Zend/Optimizer/zend_dfg.c +++ b/Zend/Optimizer/zend_dfg.c @@ -150,6 +150,7 @@ static zend_always_inline void _zend_dfg_add_use_def_op(const zend_op_array *op_ case ZEND_POST_DEC: case ZEND_BIND_GLOBAL: case ZEND_BIND_STATIC: + case ZEND_BIND_INIT_STATIC_OR_JMP: case ZEND_SEND_VAR_NO_REF: case ZEND_SEND_VAR_NO_REF_EX: case ZEND_SEND_VAR_EX: diff --git a/Zend/Optimizer/zend_inference.c b/Zend/Optimizer/zend_inference.c index afe1c2339ed3a..f7298e7b43dba 100644 --- a/Zend/Optimizer/zend_inference.c +++ b/Zend/Optimizer/zend_inference.c @@ -2944,6 +2944,10 @@ static zend_always_inline zend_result _zend_update_type_info( } UPDATE_SSA_TYPE(tmp, ssa_op->op1_def); break; + case ZEND_BIND_INIT_STATIC_OR_JMP: + tmp = MAY_BE_UNDEF | MAY_BE_ANY | MAY_BE_ARRAY_KEY_ANY | MAY_BE_ARRAY_OF_ANY | MAY_BE_ARRAY_OF_REF | MAY_BE_REF; + UPDATE_SSA_TYPE(tmp, ssa_op->op1_def); + break; case ZEND_SEND_VAR: if (ssa_op->op1_def >= 0) { tmp = t1; @@ -4363,6 +4367,7 @@ static void zend_mark_cv_references(const zend_op_array *op_array, const zend_sc case ZEND_SEND_REF: case ZEND_SEND_VAR_EX: case ZEND_SEND_FUNC_ARG: + case ZEND_BIND_INIT_STATIC_OR_JMP: break; case ZEND_INIT_ARRAY: case ZEND_ADD_ARRAY_ELEMENT: @@ -4518,6 +4523,7 @@ ZEND_API bool zend_may_throw_ex(const zend_op *opline, const zend_ssa_op *ssa_op case ZEND_ASSIGN_REF: case ZEND_BIND_GLOBAL: case ZEND_BIND_STATIC: + case ZEND_BIND_INIT_STATIC_OR_JMP: case ZEND_FETCH_DIM_IS: case ZEND_FETCH_OBJ_IS: case ZEND_SEND_REF: @@ -4755,14 +4761,12 @@ ZEND_API bool zend_may_throw_ex(const zend_op *opline, const zend_ssa_op *ssa_op case ZEND_UNSET_VAR: return (t1 & (MAY_BE_OBJECT|MAY_BE_RESOURCE|MAY_BE_ARRAY_OF_OBJECT|MAY_BE_ARRAY_OF_RESOURCE|MAY_BE_ARRAY_OF_ARRAY)); case ZEND_BIND_STATIC: + case ZEND_BIND_INIT_STATIC_OR_JMP: if (t1 & (MAY_BE_OBJECT|MAY_BE_RESOURCE|MAY_BE_ARRAY_OF_OBJECT|MAY_BE_ARRAY_OF_RESOURCE|MAY_BE_ARRAY_OF_ARRAY)) { /* Destructor may throw. */ return 1; - } else { - zval *value = (zval*)((char*)op_array->static_variables->arData + (opline->extended_value & ~(ZEND_BIND_REF|ZEND_BIND_IMPLICIT|ZEND_BIND_EXPLICIT))); - /* May throw if initializer is CONSTANT_AST. */ - return Z_TYPE_P(value) == IS_CONSTANT_AST; } + return 0; case ZEND_ASSIGN_DIM: if ((opline+1)->op1_type == IS_CV) { if (_ssa_op1_info(op_array, ssa, opline+1, ssa_op+1) & MAY_BE_UNDEF) { diff --git a/Zend/Optimizer/zend_optimizer.c b/Zend/Optimizer/zend_optimizer.c index 956a13d658399..463bbbfa84b45 100644 --- a/Zend/Optimizer/zend_optimizer.c +++ b/Zend/Optimizer/zend_optimizer.c @@ -720,6 +720,7 @@ void zend_optimizer_migrate_jump(zend_op_array *op_array, zend_op *new_opline, z case ZEND_COALESCE: case ZEND_ASSERT_CHECK: case ZEND_JMP_NULL: + case ZEND_BIND_INIT_STATIC_OR_JMP: ZEND_SET_OP_JMP_ADDR(new_opline, new_opline->op2, ZEND_OP2_JMP_ADDR(opline)); break; case ZEND_FE_FETCH_R: @@ -763,6 +764,7 @@ void zend_optimizer_shift_jump(zend_op_array *op_array, zend_op *opline, uint32_ case ZEND_COALESCE: case ZEND_ASSERT_CHECK: case ZEND_JMP_NULL: + case ZEND_BIND_INIT_STATIC_OR_JMP: ZEND_SET_OP_JMP_ADDR(opline, opline->op2, ZEND_OP2_JMP_ADDR(opline) - shiftlist[ZEND_OP2_JMP_ADDR(opline) - op_array->opcodes]); break; case ZEND_CATCH: @@ -1157,6 +1159,7 @@ static void zend_redo_pass_two(zend_op_array *op_array) case ZEND_FE_RESET_RW: case ZEND_ASSERT_CHECK: case ZEND_JMP_NULL: + case ZEND_BIND_INIT_STATIC_OR_JMP: opline->op2.jmp_addr = &op_array->opcodes[opline->op2.jmp_addr - old_opcodes]; break; case ZEND_CATCH: @@ -1277,6 +1280,7 @@ static void zend_redo_pass_two_ex(zend_op_array *op_array, zend_ssa *ssa) case ZEND_FE_RESET_RW: case ZEND_ASSERT_CHECK: case ZEND_JMP_NULL: + case ZEND_BIND_INIT_STATIC_OR_JMP: opline->op2.jmp_addr = &op_array->opcodes[opline->op2.jmp_addr - old_opcodes]; break; case ZEND_CATCH: diff --git a/Zend/Optimizer/zend_ssa.c b/Zend/Optimizer/zend_ssa.c index 67165a9b26d7a..186af8674588b 100644 --- a/Zend/Optimizer/zend_ssa.c +++ b/Zend/Optimizer/zend_ssa.c @@ -679,6 +679,7 @@ static zend_always_inline int _zend_ssa_rename_op(const zend_op_array *op_array, case ZEND_POST_DEC: case ZEND_BIND_GLOBAL: case ZEND_BIND_STATIC: + case ZEND_BIND_INIT_STATIC_OR_JMP: case ZEND_SEND_VAR_NO_REF: case ZEND_SEND_VAR_NO_REF_EX: case ZEND_SEND_VAR_EX: diff --git a/Zend/tests/035.phpt b/Zend/tests/035.phpt index 75df786e88fe1..a0b1ed69afc9d 100644 --- a/Zend/tests/035.phpt +++ b/Zend/tests/035.phpt @@ -3,7 +3,7 @@ Using 'static' and 'global' in global scope --FILE-- getMessage(), "\n"; +} + +var_dump($closure1); +print_r($closure1); + +const CONST_REF = 'foo'; +$closure1(); var_dump($closure1); print_r($closure1); + ?> --EXPECT-- object(Closure)#1 (1) { ["static"]=> array(1) { ["var"]=> - string(14) "" + NULL + } +} +Closure Object +( + [static] => Array + ( + [var] => + ) + +) +Undefined constant "CONST_REF" +object(Closure)#1 (1) { + ["static"]=> + array(1) { + ["var"]=> + NULL + } +} +Closure Object +( + [static] => Array + ( + [var] => + ) + +) +object(Closure)#1 (1) { + ["static"]=> + array(1) { + ["var"]=> + string(3) "foo" } } Closure Object ( [static] => Array ( - [var] => + [var] => foo ) ) diff --git a/Zend/tests/constexpr/new_anon_class.phpt b/Zend/tests/constexpr/new_anon_class.phpt index 6f2b433136d40..49fc120f6b92b 100644 --- a/Zend/tests/constexpr/new_anon_class.phpt +++ b/Zend/tests/constexpr/new_anon_class.phpt @@ -1,10 +1,13 @@ --TEST-- -New with anonymous class is not supported in constant expressions +New with anonymous class works --FILE-- ---EXPECTF-- -Fatal error: Cannot use anonymous class in constant expression in %s on line %d +--EXPECT-- +object(class@anonymous)#1 (0) { +} diff --git a/Zend/tests/constexpr/new_arg_unpack.phpt b/Zend/tests/constexpr/new_arg_unpack.phpt index 303116054ede9..4584a2e5601c2 100644 --- a/Zend/tests/constexpr/new_arg_unpack.phpt +++ b/Zend/tests/constexpr/new_arg_unpack.phpt @@ -1,10 +1,13 @@ --TEST-- -Argument unpacking in new arguments in const expr (not yet supported) +Argument unpacking in new arguments in static variable --FILE-- ---EXPECTF-- -Fatal error: Argument unpacking in constant expressions is not supported in %s on line %d +--EXPECT-- +object(stdClass)#1 (0) { +} diff --git a/Zend/tests/constexpr/new_dynamic_class_name.phpt b/Zend/tests/constexpr/new_dynamic_class_name.phpt index 645e3b7240b3b..60ae3ea4195d7 100644 --- a/Zend/tests/constexpr/new_dynamic_class_name.phpt +++ b/Zend/tests/constexpr/new_dynamic_class_name.phpt @@ -3,8 +3,13 @@ Dynamic class name in new is not supported --FILE-- ---EXPECTF-- -Fatal error: Cannot use dynamic class name in constant expression in %s on line %d +--EXPECT-- +object(Foo)#1 (0) { +} diff --git a/Zend/tests/constexpr/new_invalid_operation_in_arg.phpt b/Zend/tests/constexpr/new_invalid_operation_in_arg.phpt index 0537d21874e53..fd562df49ad27 100644 --- a/Zend/tests/constexpr/new_invalid_operation_in_arg.phpt +++ b/Zend/tests/constexpr/new_invalid_operation_in_arg.phpt @@ -3,8 +3,17 @@ Invalid operation in new arg in const expr --FILE-- ---EXPECTF-- -Fatal error: Constant expression contains invalid operations in %s on line %d +--EXPECT-- +array(3) { + [0]=> + int(1) + [1]=> + int(2) + [2]=> + int(3) +} diff --git a/Zend/tests/constexpr/new_static.phpt b/Zend/tests/constexpr/new_static.phpt index a626b2982475d..b4f7a332cf6a7 100644 --- a/Zend/tests/constexpr/new_static.phpt +++ b/Zend/tests/constexpr/new_static.phpt @@ -3,8 +3,20 @@ Static in new is not supported --FILE-- ---EXPECTF-- -Fatal error: "static" is not allowed in compile-time constants in %s on line %d +--EXPECT-- +object(Foo)#1 (0) { +} +object(Foo)#1 (0) { +} diff --git a/Zend/tests/static_variable_func_call.phpt b/Zend/tests/static_variable_func_call.phpt new file mode 100644 index 0000000000000..7a0ec74be5df3 --- /dev/null +++ b/Zend/tests/static_variable_func_call.phpt @@ -0,0 +1,23 @@ +--TEST-- +Static variable initializer with function call +--FILE-- + +--EXPECT-- +bar() called +bar +bar diff --git a/Zend/tests/static_variables_closure_bind.phpt b/Zend/tests/static_variables_closure_bind.phpt new file mode 100644 index 0000000000000..ee3fce78f2fdc --- /dev/null +++ b/Zend/tests/static_variables_closure_bind.phpt @@ -0,0 +1,14 @@ +--TEST-- +Static variable can't override bound closure variables +--FILE-- + +--EXPECTF-- +Fatal error: Duplicate declaration of static variable $a in %s on line %d diff --git a/Zend/tests/static_variables_destructor.phpt b/Zend/tests/static_variables_destructor.phpt new file mode 100644 index 0000000000000..9128c86e6b1bf --- /dev/null +++ b/Zend/tests/static_variables_destructor.phpt @@ -0,0 +1,36 @@ +--TEST-- +Static variable assign triggering destructor +--FILE-- +getMessage(), "\n"; +} +foo(false); + +?> +--EXPECT-- +bar() called +__destruct() called +int(42) diff --git a/Zend/tests/static_variables_global.phpt b/Zend/tests/static_variables_global.phpt new file mode 100644 index 0000000000000..27f9128fb502e --- /dev/null +++ b/Zend/tests/static_variables_global.phpt @@ -0,0 +1,22 @@ +--TEST-- +Global can override static variable +--FILE-- + +--EXPECT-- +int(42) +int(41) +int(42) +int(41) diff --git a/Zend/tests/static_variables_global_2.phpt b/Zend/tests/static_variables_global_2.phpt new file mode 100644 index 0000000000000..b37da7583a8d4 --- /dev/null +++ b/Zend/tests/static_variables_global_2.phpt @@ -0,0 +1,26 @@ +--TEST-- +Static variable can override global +--FILE-- + +--EXPECT-- +int(42) +int(41) +int(42) +int(42) +int(41) +int(42) diff --git a/Zend/tests/static_variables_recursive.phpt b/Zend/tests/static_variables_recursive.phpt new file mode 100644 index 0000000000000..8b4eda10f9512 --- /dev/null +++ b/Zend/tests/static_variables_recursive.phpt @@ -0,0 +1,29 @@ +--TEST-- +Static variable with recursive initializer +--FILE-- + +--EXPECT-- +string(7) "Done 11" +string(7) "Done 11" +string(7) "Done 11" +string(7) "Done 11" +string(7) "Done 11" +string(7) "Done 11" +string(7) "Done 11" +string(7) "Done 11" +string(7) "Done 11" +string(7) "Done 11" +string(7) "Done 11" +string(7) "Done 11" +string(7) "Done 11" diff --git a/Zend/zend_closures.c b/Zend/zend_closures.c index 4c326a6c79b12..cec392dce9b21 100644 --- a/Zend/zend_closures.c +++ b/Zend/zend_closures.c @@ -597,14 +597,10 @@ static HashTable *zend_closure_get_debug_info(zend_object *object, int *is_temp) ZEND_HASH_MAP_FOREACH_STR_KEY_VAL(static_variables, key, var) { zval copy; - if (Z_TYPE_P(var) == IS_CONSTANT_AST) { - ZVAL_STRING(©, ""); - } else { - if (Z_ISREF_P(var) && Z_REFCOUNT_P(var) == 1) { - var = Z_REFVAL_P(var); - } - ZVAL_COPY(©, var); + if (Z_ISREF_P(var) && Z_REFCOUNT_P(var) == 1) { + var = Z_REFVAL_P(var); } + ZVAL_COPY(©, var); zend_hash_add_new(Z_ARRVAL(val), key, ©); } ZEND_HASH_FOREACH_END(); diff --git a/Zend/zend_compile.c b/Zend/zend_compile.c index 6b6948b896eb8..eeb940060bba4 100644 --- a/Zend/zend_compile.c +++ b/Zend/zend_compile.c @@ -2293,6 +2293,7 @@ static inline void zend_update_jump_target(uint32_t opnum_jump, uint32_t opnum_t case ZEND_JMP_SET: case ZEND_COALESCE: case ZEND_JMP_NULL: + case ZEND_BIND_INIT_STATIC_OR_JMP: opline->op2.opline_num = opnum_target; break; EMPTY_SWITCH_DEFAULT_CASE() @@ -4885,16 +4886,55 @@ static void zend_compile_static_var_common(zend_string *var_name, zval *value, u static void zend_compile_static_var(zend_ast *ast) /* {{{ */ { zend_ast *var_ast = ast->child[0]; - zend_ast **value_ast_ptr = &ast->child[1]; - zval value_zv; + zend_string *var_name = zend_ast_get_str(var_ast); - if (*value_ast_ptr) { - zend_const_expr_to_zval(&value_zv, value_ast_ptr, /* allow_dynamic */ true); - } else { - ZVAL_NULL(&value_zv); + if (zend_string_equals_literal(var_name, "this")) { + zend_error_noreturn(E_COMPILE_ERROR, "Cannot use $this as static variable"); + } + + if (!CG(active_op_array)->static_variables) { + if (CG(active_op_array)->scope) { + CG(active_op_array)->scope->ce_flags |= ZEND_HAS_STATIC_IN_METHODS; + } + CG(active_op_array)->static_variables = zend_new_array(8); + } + + if (zend_hash_exists(CG(active_op_array)->static_variables, var_name)) { + zend_error_noreturn(E_COMPILE_ERROR, "Duplicate declaration of static variable $%s", ZSTR_VAL(var_name)); } - zend_compile_static_var_common(zend_ast_get_str(var_ast), &value_zv, ZEND_BIND_REF); + zend_eval_const_expr(&ast->child[1]); + zend_ast *value_ast = ast->child[1]; + + if (!value_ast || value_ast->kind == ZEND_AST_ZVAL) { + zval *value_zv = value_ast + ? zend_ast_get_zval(value_ast) + : &EG(uninitialized_zval); + Z_TRY_ADDREF_P(value_zv); + zend_compile_static_var_common(var_name, value_zv, ZEND_BIND_REF); + } else { + zend_op *opline; + + zval *placeholder_ptr = zend_hash_update(CG(active_op_array)->static_variables, var_name, &EG(uninitialized_zval)); + Z_TYPE_EXTRA_P(placeholder_ptr) |= IS_STATIC_VAR_UNINITIALIZED; + uint32_t placeholder_offset = (uint32_t)((char*)placeholder_ptr - (char*)CG(active_op_array)->static_variables->arData); + + uint32_t static_def_jmp_opnum = get_next_op_number(); + opline = zend_emit_op(NULL, ZEND_BIND_INIT_STATIC_OR_JMP, NULL, NULL); + opline->op1_type = IS_CV; + opline->op1.var = lookup_cv(var_name); + opline->extended_value = placeholder_offset; + + znode expr; + zend_compile_expr(&expr, value_ast); + + opline = zend_emit_op(NULL, ZEND_BIND_STATIC, NULL, &expr); + opline->op1_type = IS_CV; + opline->op1.var = lookup_cv(var_name); + opline->extended_value = placeholder_offset | ZEND_BIND_REF; + + zend_update_jump_target_to_next(static_def_jmp_opnum); + } } /* }}} */ diff --git a/Zend/zend_opcode.c b/Zend/zend_opcode.c index 5db656b46c268..dc968bc395303 100644 --- a/Zend/zend_opcode.c +++ b/Zend/zend_opcode.c @@ -1122,6 +1122,7 @@ ZEND_API void pass_two(zend_op_array *op_array) case ZEND_FE_RESET_R: case ZEND_FE_RESET_RW: case ZEND_JMP_NULL: + case ZEND_BIND_INIT_STATIC_OR_JMP: ZEND_PASS_TWO_UPDATE_JMP_TARGET(op_array, opline, opline->op2); break; case ZEND_ASSERT_CHECK: diff --git a/Zend/zend_types.h b/Zend/zend_types.h index c341ffa0b4d8c..af5f3821723fa 100644 --- a/Zend/zend_types.h +++ b/Zend/zend_types.h @@ -626,6 +626,9 @@ static zend_always_inline uint8_t zval_get_type(const zval* pz) { #define Z_TYPE_FLAGS(zval) (zval).u1.v.type_flags #define Z_TYPE_FLAGS_P(zval_p) Z_TYPE_FLAGS(*(zval_p)) +#define Z_TYPE_EXTRA(zval) (zval).u1.v.u.extra +#define Z_TYPE_EXTRA_P(zval_p) Z_TYPE_EXTRA(*(zval_p)) + #define Z_TYPE_INFO(zval) (zval).u1.type_info #define Z_TYPE_INFO_P(zval_p) Z_TYPE_INFO(*(zval_p)) @@ -752,6 +755,11 @@ static zend_always_inline uint32_t zval_gc_info(uint32_t gc_type_info) { /* zval.u1.v.type_flags */ #define IS_TYPE_REFCOUNTED (1<<0) #define IS_TYPE_COLLECTABLE (1<<1) +/* Used for static variables to check if they have been initialized. We can't use IS_UNDEF because + * we can't store IS_UNDEF zvals in the static_variables HashTable. This needs to live in type_info + * so that the ZEND_ASSIGN overrides it but is moved to extra to avoid breaking the Z_REFCOUNTED() + * optimization that only checks for Z_TYPE_FLAGS() without `& (IS_TYPE_COLLECTABLE|IS_TYPE_REFCOUNTED)`. */ +#define IS_STATIC_VAR_UNINITIALIZED (1<<0) #if 1 /* This optimized version assumes that we have a single "type_flag" */ diff --git a/Zend/zend_vm_def.h b/Zend/zend_vm_def.h index 0b6604217fa35..7e86b29c6b4f7 100644 --- a/Zend/zend_vm_def.h +++ b/Zend/zend_vm_def.h @@ -8922,7 +8922,7 @@ ZEND_VM_HANDLER(182, ZEND_BIND_LEXICAL, TMP, CV, REF) ZEND_VM_NEXT_OPCODE(); } -ZEND_VM_HANDLER(183, ZEND_BIND_STATIC, CV, UNUSED, REF) +ZEND_VM_HANDLER(183, ZEND_BIND_STATIC, CV, ANY, REF) { USE_OPLINE HashTable *ht; @@ -8942,18 +8942,18 @@ ZEND_VM_HANDLER(183, ZEND_BIND_STATIC, CV, UNUSED, REF) SAVE_OPLINE(); if (opline->extended_value & ZEND_BIND_REF) { - if (Z_TYPE_P(value) == IS_CONSTANT_AST) { - if (UNEXPECTED(zval_update_constant_ex(value, EX(func)->op_array.scope) != SUCCESS)) { - HANDLE_EXCEPTION(); - } - } - i_zval_ptr_dtor(variable_ptr); if (UNEXPECTED(!Z_ISREF_P(value))) { zend_reference *ref = (zend_reference*)emalloc(sizeof(zend_reference)); GC_SET_REFCOUNT(ref, 2); GC_TYPE_INFO(ref) = GC_REFERENCE; - ZVAL_COPY_VALUE(&ref->val, value); + if (OP2_TYPE == IS_UNUSED) { + ZVAL_COPY_VALUE(&ref->val, value); + } else { + ZEND_ASSERT(!Z_REFCOUNTED_P(value)); + ZVAL_COPY(&ref->val, GET_OP2_ZVAL_PTR_DEREF(BP_VAR_R)); + FREE_OP2(); + } ref->sources.ptr = NULL; Z_REF_P(value) = ref; Z_TYPE_INFO_P(value) = IS_REFERENCE_EX; @@ -8961,6 +8961,9 @@ ZEND_VM_HANDLER(183, ZEND_BIND_STATIC, CV, UNUSED, REF) } else { Z_ADDREF_P(value); ZVAL_REF(variable_ptr, Z_REF_P(value)); + if (OP2_TYPE != IS_UNUSED) { + FREE_OP2(); + } } } else { i_zval_ptr_dtor(variable_ptr); @@ -8970,6 +8973,34 @@ ZEND_VM_HANDLER(183, ZEND_BIND_STATIC, CV, UNUSED, REF) ZEND_VM_NEXT_OPCODE_CHECK_EXCEPTION(); } +ZEND_VM_HANDLER(203, ZEND_BIND_INIT_STATIC_OR_JMP, CV, JMP_ADDR) +{ + USE_OPLINE + HashTable *ht; + zval *value; + zval *variable_ptr; + + variable_ptr = GET_OP1_ZVAL_PTR_PTR_UNDEF(BP_VAR_W); + + ht = ZEND_MAP_PTR_GET(EX(func)->op_array.static_variables_ptr); + if (!ht) { + ZEND_VM_NEXT_OPCODE(); + } + ZEND_ASSERT(GC_REFCOUNT(ht) == 1); + + value = (zval*)((char*)ht->arData + opline->extended_value); + if (Z_TYPE_EXTRA_P(value) & IS_STATIC_VAR_UNINITIALIZED) { + ZEND_VM_NEXT_OPCODE(); + } else { + SAVE_OPLINE(); + zval_ptr_dtor(variable_ptr); + ZEND_ASSERT(Z_TYPE_P(value) == IS_REFERENCE); + Z_ADDREF_P(value); + ZVAL_REF(variable_ptr, Z_REF_P(value)); + ZEND_VM_JMP_EX(OP_JMP_ADDR(opline, opline->op2), 1); + } +} + ZEND_VM_HOT_HANDLER(184, ZEND_FETCH_THIS, UNUSED, UNUSED) { USE_OPLINE diff --git a/Zend/zend_vm_execute.h b/Zend/zend_vm_execute.h index 21b927c02b895..14e3a5aca2a2a 100644 --- a/Zend/zend_vm_execute.h +++ b/Zend/zend_vm_execute.h @@ -40366,6 +40366,85 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FETCH_CLASS_NAME_SPEC_CV_HANDL ZEND_VM_NEXT_OPCODE(); } +static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_BIND_STATIC_SPEC_CV_HANDLER(ZEND_OPCODE_HANDLER_ARGS) +{ + USE_OPLINE + HashTable *ht; + zval *value; + zval *variable_ptr; + + variable_ptr = EX_VAR(opline->op1.var); + + ht = ZEND_MAP_PTR_GET(EX(func)->op_array.static_variables_ptr); + if (!ht) { + ht = zend_array_dup(EX(func)->op_array.static_variables); + ZEND_MAP_PTR_SET(EX(func)->op_array.static_variables_ptr, ht); + } + ZEND_ASSERT(GC_REFCOUNT(ht) == 1); + + value = (zval*)((char*)ht->arData + (opline->extended_value & ~(ZEND_BIND_REF|ZEND_BIND_IMPLICIT|ZEND_BIND_EXPLICIT))); + + SAVE_OPLINE(); + if (opline->extended_value & ZEND_BIND_REF) { + i_zval_ptr_dtor(variable_ptr); + if (UNEXPECTED(!Z_ISREF_P(value))) { + zend_reference *ref = (zend_reference*)emalloc(sizeof(zend_reference)); + GC_SET_REFCOUNT(ref, 2); + GC_TYPE_INFO(ref) = GC_REFERENCE; + if (opline->op2_type == IS_UNUSED) { + ZVAL_COPY_VALUE(&ref->val, value); + } else { + ZEND_ASSERT(!Z_REFCOUNTED_P(value)); + ZVAL_COPY(&ref->val, get_zval_ptr_deref(opline->op2_type, opline->op2, BP_VAR_R)); + FREE_OP(opline->op2_type, opline->op2.var); + } + ref->sources.ptr = NULL; + Z_REF_P(value) = ref; + Z_TYPE_INFO_P(value) = IS_REFERENCE_EX; + ZVAL_REF(variable_ptr, ref); + } else { + Z_ADDREF_P(value); + ZVAL_REF(variable_ptr, Z_REF_P(value)); + if (opline->op2_type != IS_UNUSED) { + FREE_OP(opline->op2_type, opline->op2.var); + } + } + } else { + i_zval_ptr_dtor(variable_ptr); + ZVAL_COPY(variable_ptr, value); + } + + ZEND_VM_NEXT_OPCODE_CHECK_EXCEPTION(); +} + +static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_BIND_INIT_STATIC_OR_JMP_SPEC_CV_HANDLER(ZEND_OPCODE_HANDLER_ARGS) +{ + USE_OPLINE + HashTable *ht; + zval *value; + zval *variable_ptr; + + variable_ptr = EX_VAR(opline->op1.var); + + ht = ZEND_MAP_PTR_GET(EX(func)->op_array.static_variables_ptr); + if (!ht) { + ZEND_VM_NEXT_OPCODE(); + } + ZEND_ASSERT(GC_REFCOUNT(ht) == 1); + + value = (zval*)((char*)ht->arData + opline->extended_value); + if (Z_TYPE_EXTRA_P(value) & IS_STATIC_VAR_UNINITIALIZED) { + ZEND_VM_NEXT_OPCODE(); + } else { + SAVE_OPLINE(); + zval_ptr_dtor(variable_ptr); + ZEND_ASSERT(Z_TYPE_P(value) == IS_REFERENCE); + Z_ADDREF_P(value); + ZVAL_REF(variable_ptr, Z_REF_P(value)); + ZEND_VM_JMP_EX(OP_JMP_ADDR(opline, opline->op2), 1); + } +} + static ZEND_VM_HOT ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_PRE_INC_LONG_NO_OVERFLOW_SPEC_CV_RETVAL_UNUSED_HANDLER(ZEND_OPCODE_HANDLER_ARGS) { USE_OPLINE @@ -49560,54 +49639,6 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_YIELD_SPEC_CV_UNUSED_HANDLER(Z ZEND_VM_RETURN(); } -static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_BIND_STATIC_SPEC_CV_UNUSED_HANDLER(ZEND_OPCODE_HANDLER_ARGS) -{ - USE_OPLINE - HashTable *ht; - zval *value; - zval *variable_ptr; - - variable_ptr = EX_VAR(opline->op1.var); - - ht = ZEND_MAP_PTR_GET(EX(func)->op_array.static_variables_ptr); - if (!ht) { - ht = zend_array_dup(EX(func)->op_array.static_variables); - ZEND_MAP_PTR_SET(EX(func)->op_array.static_variables_ptr, ht); - } - ZEND_ASSERT(GC_REFCOUNT(ht) == 1); - - value = (zval*)((char*)ht->arData + (opline->extended_value & ~(ZEND_BIND_REF|ZEND_BIND_IMPLICIT|ZEND_BIND_EXPLICIT))); - - SAVE_OPLINE(); - if (opline->extended_value & ZEND_BIND_REF) { - if (Z_TYPE_P(value) == IS_CONSTANT_AST) { - if (UNEXPECTED(zval_update_constant_ex(value, EX(func)->op_array.scope) != SUCCESS)) { - HANDLE_EXCEPTION(); - } - } - - i_zval_ptr_dtor(variable_ptr); - if (UNEXPECTED(!Z_ISREF_P(value))) { - zend_reference *ref = (zend_reference*)emalloc(sizeof(zend_reference)); - GC_SET_REFCOUNT(ref, 2); - GC_TYPE_INFO(ref) = GC_REFERENCE; - ZVAL_COPY_VALUE(&ref->val, value); - ref->sources.ptr = NULL; - Z_REF_P(value) = ref; - Z_TYPE_INFO_P(value) = IS_REFERENCE_EX; - ZVAL_REF(variable_ptr, ref); - } else { - Z_ADDREF_P(value); - ZVAL_REF(variable_ptr, Z_REF_P(value)); - } - } else { - i_zval_ptr_dtor(variable_ptr); - ZVAL_COPY(variable_ptr, value); - } - - ZEND_VM_NEXT_OPCODE_CHECK_EXCEPTION(); -} - static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CHECK_VAR_SPEC_CV_UNUSED_HANDLER(ZEND_OPCODE_HANDLER_ARGS) { USE_OPLINE @@ -55758,7 +55789,7 @@ ZEND_API void execute_ex(zend_execute_data *ex) (void*)&&ZEND_NULL_LABEL, (void*)&&ZEND_NULL_LABEL, (void*)&&ZEND_BIND_LEXICAL_SPEC_TMP_CV_LABEL, - (void*)&&ZEND_BIND_STATIC_SPEC_CV_UNUSED_LABEL, + (void*)&&ZEND_BIND_STATIC_SPEC_CV_LABEL, (void*)&&ZEND_FETCH_THIS_SPEC_UNUSED_UNUSED_LABEL, (void*)&&ZEND_SEND_FUNC_ARG_SPEC_VAR_CONST_LABEL, (void*)&&ZEND_NULL_LABEL, @@ -55866,6 +55897,7 @@ ZEND_API void execute_ex(zend_execute_data *ex) (void*)&&ZEND_FETCH_GLOBALS_SPEC_UNUSED_UNUSED_LABEL, (void*)&&ZEND_VERIFY_NEVER_TYPE_SPEC_UNUSED_UNUSED_LABEL, (void*)&&ZEND_CALLABLE_CONVERT_SPEC_UNUSED_UNUSED_LABEL, + (void*)&&ZEND_BIND_INIT_STATIC_OR_JMP_SPEC_CV_LABEL, (void*)&&ZEND_RECV_NOTYPE_SPEC_LABEL, (void*)&&ZEND_JMP_FORWARD_SPEC_LABEL, (void*)&&ZEND_NULL_LABEL, @@ -60435,6 +60467,14 @@ ZEND_API void execute_ex(zend_execute_data *ex) VM_TRACE(ZEND_FETCH_CLASS_NAME_SPEC_CV) ZEND_FETCH_CLASS_NAME_SPEC_CV_HANDLER(ZEND_OPCODE_HANDLER_ARGS_PASSTHRU); HYBRID_BREAK(); + HYBRID_CASE(ZEND_BIND_STATIC_SPEC_CV): + VM_TRACE(ZEND_BIND_STATIC_SPEC_CV) + ZEND_BIND_STATIC_SPEC_CV_HANDLER(ZEND_OPCODE_HANDLER_ARGS_PASSTHRU); + HYBRID_BREAK(); + HYBRID_CASE(ZEND_BIND_INIT_STATIC_OR_JMP_SPEC_CV): + VM_TRACE(ZEND_BIND_INIT_STATIC_OR_JMP_SPEC_CV) + ZEND_BIND_INIT_STATIC_OR_JMP_SPEC_CV_HANDLER(ZEND_OPCODE_HANDLER_ARGS_PASSTHRU); + HYBRID_BREAK(); HYBRID_CASE(ZEND_PRE_INC_LONG_NO_OVERFLOW_SPEC_CV_RETVAL_UNUSED): VM_TRACE(ZEND_PRE_INC_LONG_NO_OVERFLOW_SPEC_CV_RETVAL_UNUSED) ZEND_PRE_INC_LONG_NO_OVERFLOW_SPEC_CV_RETVAL_UNUSED_HANDLER(ZEND_OPCODE_HANDLER_ARGS_PASSTHRU); @@ -61079,10 +61119,6 @@ ZEND_API void execute_ex(zend_execute_data *ex) VM_TRACE(ZEND_YIELD_SPEC_CV_UNUSED) ZEND_YIELD_SPEC_CV_UNUSED_HANDLER(ZEND_OPCODE_HANDLER_ARGS_PASSTHRU); HYBRID_BREAK(); - HYBRID_CASE(ZEND_BIND_STATIC_SPEC_CV_UNUSED): - VM_TRACE(ZEND_BIND_STATIC_SPEC_CV_UNUSED) - ZEND_BIND_STATIC_SPEC_CV_UNUSED_HANDLER(ZEND_OPCODE_HANDLER_ARGS_PASSTHRU); - HYBRID_BREAK(); HYBRID_CASE(ZEND_CHECK_VAR_SPEC_CV_UNUSED): VM_TRACE(ZEND_CHECK_VAR_SPEC_CV_UNUSED) ZEND_CHECK_VAR_SPEC_CV_UNUSED_HANDLER(ZEND_OPCODE_HANDLER_ARGS_PASSTHRU); @@ -63862,7 +63898,7 @@ void zend_vm_init(void) ZEND_NULL_HANDLER, ZEND_NULL_HANDLER, ZEND_BIND_LEXICAL_SPEC_TMP_CV_HANDLER, - ZEND_BIND_STATIC_SPEC_CV_UNUSED_HANDLER, + ZEND_BIND_STATIC_SPEC_CV_HANDLER, ZEND_FETCH_THIS_SPEC_UNUSED_UNUSED_HANDLER, ZEND_SEND_FUNC_ARG_SPEC_VAR_CONST_HANDLER, ZEND_NULL_HANDLER, @@ -63970,6 +64006,7 @@ void zend_vm_init(void) ZEND_FETCH_GLOBALS_SPEC_UNUSED_UNUSED_HANDLER, ZEND_VERIFY_NEVER_TYPE_SPEC_UNUSED_UNUSED_HANDLER, ZEND_CALLABLE_CONVERT_SPEC_UNUSED_UNUSED_HANDLER, + ZEND_BIND_INIT_STATIC_OR_JMP_SPEC_CV_HANDLER, ZEND_RECV_NOTYPE_SPEC_HANDLER, ZEND_JMP_FORWARD_SPEC_HANDLER, ZEND_NULL_HANDLER, @@ -64921,7 +64958,7 @@ void zend_vm_init(void) 1255, 1256 | SPEC_RULE_OP1, 1261 | SPEC_RULE_OP1, - 3470, + 3471, 1266 | SPEC_RULE_OP1, 1271 | SPEC_RULE_OP1, 1276 | SPEC_RULE_OP2, @@ -65079,59 +65116,59 @@ void zend_vm_init(void) 2564, 2565, 2566, - 3470, - 3470, - 3470, - 3470, - 3470, - 3470, - 3470, - 3470, - 3470, - 3470, - 3470, - 3470, - 3470, - 3470, - 3470, - 3470, - 3470, - 3470, - 3470, - 3470, - 3470, - 3470, - 3470, - 3470, - 3470, - 3470, - 3470, - 3470, - 3470, - 3470, - 3470, - 3470, - 3470, - 3470, - 3470, - 3470, - 3470, - 3470, - 3470, - 3470, - 3470, - 3470, - 3470, - 3470, - 3470, - 3470, - 3470, - 3470, - 3470, - 3470, - 3470, - 3470, - 3470, + 2567, + 3471, + 3471, + 3471, + 3471, + 3471, + 3471, + 3471, + 3471, + 3471, + 3471, + 3471, + 3471, + 3471, + 3471, + 3471, + 3471, + 3471, + 3471, + 3471, + 3471, + 3471, + 3471, + 3471, + 3471, + 3471, + 3471, + 3471, + 3471, + 3471, + 3471, + 3471, + 3471, + 3471, + 3471, + 3471, + 3471, + 3471, + 3471, + 3471, + 3471, + 3471, + 3471, + 3471, + 3471, + 3471, + 3471, + 3471, + 3471, + 3471, + 3471, + 3471, + 3471, }; #if (ZEND_VM_KIND == ZEND_VM_KIND_HYBRID) zend_opcode_handler_funcs = labels; @@ -65304,7 +65341,7 @@ ZEND_API void ZEND_FASTCALL zend_vm_set_opcode_handler_ex(zend_op* op, uint32_t if (op->op1_type == IS_CONST && op->op2_type == IS_CONST) { break; } - spec = 2569 | SPEC_RULE_OP1 | SPEC_RULE_OP2 | SPEC_RULE_COMMUTATIVE; + spec = 2570 | SPEC_RULE_OP1 | SPEC_RULE_OP2 | SPEC_RULE_COMMUTATIVE; if (op->op1_type < op->op2_type) { zend_swap_operands(op); } @@ -65312,7 +65349,7 @@ ZEND_API void ZEND_FASTCALL zend_vm_set_opcode_handler_ex(zend_op* op, uint32_t if (op->op1_type == IS_CONST && op->op2_type == IS_CONST) { break; } - spec = 2594 | SPEC_RULE_OP1 | SPEC_RULE_OP2 | SPEC_RULE_COMMUTATIVE; + spec = 2595 | SPEC_RULE_OP1 | SPEC_RULE_OP2 | SPEC_RULE_COMMUTATIVE; if (op->op1_type < op->op2_type) { zend_swap_operands(op); } @@ -65320,7 +65357,7 @@ ZEND_API void ZEND_FASTCALL zend_vm_set_opcode_handler_ex(zend_op* op, uint32_t if (op->op1_type == IS_CONST && op->op2_type == IS_CONST) { break; } - spec = 2619 | SPEC_RULE_OP1 | SPEC_RULE_OP2 | SPEC_RULE_COMMUTATIVE; + spec = 2620 | SPEC_RULE_OP1 | SPEC_RULE_OP2 | SPEC_RULE_COMMUTATIVE; if (op->op1_type < op->op2_type) { zend_swap_operands(op); } @@ -65331,17 +65368,17 @@ ZEND_API void ZEND_FASTCALL zend_vm_set_opcode_handler_ex(zend_op* op, uint32_t if (op->op1_type == IS_CONST && op->op2_type == IS_CONST) { break; } - spec = 2644 | SPEC_RULE_OP1 | SPEC_RULE_OP2; + spec = 2645 | SPEC_RULE_OP1 | SPEC_RULE_OP2; } else if (op1_info == MAY_BE_LONG && op2_info == MAY_BE_LONG) { if (op->op1_type == IS_CONST && op->op2_type == IS_CONST) { break; } - spec = 2669 | SPEC_RULE_OP1 | SPEC_RULE_OP2; + spec = 2670 | SPEC_RULE_OP1 | SPEC_RULE_OP2; } else if (op1_info == MAY_BE_DOUBLE && op2_info == MAY_BE_DOUBLE) { if (op->op1_type == IS_CONST && op->op2_type == IS_CONST) { break; } - spec = 2694 | SPEC_RULE_OP1 | SPEC_RULE_OP2; + spec = 2695 | SPEC_RULE_OP1 | SPEC_RULE_OP2; } break; case ZEND_MUL: @@ -65352,17 +65389,17 @@ ZEND_API void ZEND_FASTCALL zend_vm_set_opcode_handler_ex(zend_op* op, uint32_t if (op->op1_type == IS_CONST && op->op2_type == IS_CONST) { break; } - spec = 2719 | SPEC_RULE_OP1 | SPEC_RULE_OP2 | SPEC_RULE_COMMUTATIVE; + spec = 2720 | SPEC_RULE_OP1 | SPEC_RULE_OP2 | SPEC_RULE_COMMUTATIVE; } else if (op1_info == MAY_BE_LONG && op2_info == MAY_BE_LONG) { if (op->op1_type == IS_CONST && op->op2_type == IS_CONST) { break; } - spec = 2744 | SPEC_RULE_OP1 | SPEC_RULE_OP2 | SPEC_RULE_COMMUTATIVE; + spec = 2745 | SPEC_RULE_OP1 | SPEC_RULE_OP2 | SPEC_RULE_COMMUTATIVE; } else if (op1_info == MAY_BE_DOUBLE && op2_info == MAY_BE_DOUBLE) { if (op->op1_type == IS_CONST && op->op2_type == IS_CONST) { break; } - spec = 2769 | SPEC_RULE_OP1 | SPEC_RULE_OP2 | SPEC_RULE_COMMUTATIVE; + spec = 2770 | SPEC_RULE_OP1 | SPEC_RULE_OP2 | SPEC_RULE_COMMUTATIVE; } break; case ZEND_IS_IDENTICAL: @@ -65373,14 +65410,14 @@ ZEND_API void ZEND_FASTCALL zend_vm_set_opcode_handler_ex(zend_op* op, uint32_t if (op->op1_type == IS_CONST && op->op2_type == IS_CONST) { break; } - spec = 2794 | SPEC_RULE_OP1 | SPEC_RULE_OP2 | SPEC_RULE_SMART_BRANCH | SPEC_RULE_COMMUTATIVE; + spec = 2795 | SPEC_RULE_OP1 | SPEC_RULE_OP2 | SPEC_RULE_SMART_BRANCH | SPEC_RULE_COMMUTATIVE; } else if (op1_info == MAY_BE_DOUBLE && op2_info == MAY_BE_DOUBLE) { if (op->op1_type == IS_CONST && op->op2_type == IS_CONST) { break; } - spec = 2869 | SPEC_RULE_OP1 | SPEC_RULE_OP2 | SPEC_RULE_SMART_BRANCH | SPEC_RULE_COMMUTATIVE; + spec = 2870 | SPEC_RULE_OP1 | SPEC_RULE_OP2 | SPEC_RULE_SMART_BRANCH | SPEC_RULE_COMMUTATIVE; } else if (op->op1_type == IS_CV && (op->op2_type & (IS_CONST|IS_CV)) && !(op1_info & (MAY_BE_UNDEF|MAY_BE_REF)) && !(op2_info & (MAY_BE_UNDEF|MAY_BE_REF))) { - spec = 3094 | SPEC_RULE_OP2 | SPEC_RULE_COMMUTATIVE; + spec = 3095 | SPEC_RULE_OP2 | SPEC_RULE_COMMUTATIVE; } break; case ZEND_IS_NOT_IDENTICAL: @@ -65391,14 +65428,14 @@ ZEND_API void ZEND_FASTCALL zend_vm_set_opcode_handler_ex(zend_op* op, uint32_t if (op->op1_type == IS_CONST && op->op2_type == IS_CONST) { break; } - spec = 2944 | SPEC_RULE_OP1 | SPEC_RULE_OP2 | SPEC_RULE_SMART_BRANCH | SPEC_RULE_COMMUTATIVE; + spec = 2945 | SPEC_RULE_OP1 | SPEC_RULE_OP2 | SPEC_RULE_SMART_BRANCH | SPEC_RULE_COMMUTATIVE; } else if (op1_info == MAY_BE_DOUBLE && op2_info == MAY_BE_DOUBLE) { if (op->op1_type == IS_CONST && op->op2_type == IS_CONST) { break; } - spec = 3019 | SPEC_RULE_OP1 | SPEC_RULE_OP2 | SPEC_RULE_SMART_BRANCH | SPEC_RULE_COMMUTATIVE; + spec = 3020 | SPEC_RULE_OP1 | SPEC_RULE_OP2 | SPEC_RULE_SMART_BRANCH | SPEC_RULE_COMMUTATIVE; } else if (op->op1_type == IS_CV && (op->op2_type & (IS_CONST|IS_CV)) && !(op1_info & (MAY_BE_UNDEF|MAY_BE_REF)) && !(op2_info & (MAY_BE_UNDEF|MAY_BE_REF))) { - spec = 3099 | SPEC_RULE_OP2 | SPEC_RULE_COMMUTATIVE; + spec = 3100 | SPEC_RULE_OP2 | SPEC_RULE_COMMUTATIVE; } break; case ZEND_IS_EQUAL: @@ -65409,12 +65446,12 @@ ZEND_API void ZEND_FASTCALL zend_vm_set_opcode_handler_ex(zend_op* op, uint32_t if (op->op1_type == IS_CONST && op->op2_type == IS_CONST) { break; } - spec = 2794 | SPEC_RULE_OP1 | SPEC_RULE_OP2 | SPEC_RULE_SMART_BRANCH | SPEC_RULE_COMMUTATIVE; + spec = 2795 | SPEC_RULE_OP1 | SPEC_RULE_OP2 | SPEC_RULE_SMART_BRANCH | SPEC_RULE_COMMUTATIVE; } else if (op1_info == MAY_BE_DOUBLE && op2_info == MAY_BE_DOUBLE) { if (op->op1_type == IS_CONST && op->op2_type == IS_CONST) { break; } - spec = 2869 | SPEC_RULE_OP1 | SPEC_RULE_OP2 | SPEC_RULE_SMART_BRANCH | SPEC_RULE_COMMUTATIVE; + spec = 2870 | SPEC_RULE_OP1 | SPEC_RULE_OP2 | SPEC_RULE_SMART_BRANCH | SPEC_RULE_COMMUTATIVE; } break; case ZEND_IS_NOT_EQUAL: @@ -65425,12 +65462,12 @@ ZEND_API void ZEND_FASTCALL zend_vm_set_opcode_handler_ex(zend_op* op, uint32_t if (op->op1_type == IS_CONST && op->op2_type == IS_CONST) { break; } - spec = 2944 | SPEC_RULE_OP1 | SPEC_RULE_OP2 | SPEC_RULE_SMART_BRANCH | SPEC_RULE_COMMUTATIVE; + spec = 2945 | SPEC_RULE_OP1 | SPEC_RULE_OP2 | SPEC_RULE_SMART_BRANCH | SPEC_RULE_COMMUTATIVE; } else if (op1_info == MAY_BE_DOUBLE && op2_info == MAY_BE_DOUBLE) { if (op->op1_type == IS_CONST && op->op2_type == IS_CONST) { break; } - spec = 3019 | SPEC_RULE_OP1 | SPEC_RULE_OP2 | SPEC_RULE_SMART_BRANCH | SPEC_RULE_COMMUTATIVE; + spec = 3020 | SPEC_RULE_OP1 | SPEC_RULE_OP2 | SPEC_RULE_SMART_BRANCH | SPEC_RULE_COMMUTATIVE; } break; case ZEND_IS_SMALLER: @@ -65438,12 +65475,12 @@ ZEND_API void ZEND_FASTCALL zend_vm_set_opcode_handler_ex(zend_op* op, uint32_t if (op->op1_type == IS_CONST && op->op2_type == IS_CONST) { break; } - spec = 3104 | SPEC_RULE_OP1 | SPEC_RULE_OP2 | SPEC_RULE_SMART_BRANCH; + spec = 3105 | SPEC_RULE_OP1 | SPEC_RULE_OP2 | SPEC_RULE_SMART_BRANCH; } else if (op1_info == MAY_BE_DOUBLE && op2_info == MAY_BE_DOUBLE) { if (op->op1_type == IS_CONST && op->op2_type == IS_CONST) { break; } - spec = 3179 | SPEC_RULE_OP1 | SPEC_RULE_OP2 | SPEC_RULE_SMART_BRANCH; + spec = 3180 | SPEC_RULE_OP1 | SPEC_RULE_OP2 | SPEC_RULE_SMART_BRANCH; } break; case ZEND_IS_SMALLER_OR_EQUAL: @@ -65451,74 +65488,74 @@ ZEND_API void ZEND_FASTCALL zend_vm_set_opcode_handler_ex(zend_op* op, uint32_t if (op->op1_type == IS_CONST && op->op2_type == IS_CONST) { break; } - spec = 3254 | SPEC_RULE_OP1 | SPEC_RULE_OP2 | SPEC_RULE_SMART_BRANCH; + spec = 3255 | SPEC_RULE_OP1 | SPEC_RULE_OP2 | SPEC_RULE_SMART_BRANCH; } else if (op1_info == MAY_BE_DOUBLE && op2_info == MAY_BE_DOUBLE) { if (op->op1_type == IS_CONST && op->op2_type == IS_CONST) { break; } - spec = 3329 | SPEC_RULE_OP1 | SPEC_RULE_OP2 | SPEC_RULE_SMART_BRANCH; + spec = 3330 | SPEC_RULE_OP1 | SPEC_RULE_OP2 | SPEC_RULE_SMART_BRANCH; } break; case ZEND_QM_ASSIGN: if (op1_info == MAY_BE_LONG) { - spec = 3416 | SPEC_RULE_OP1; + spec = 3417 | SPEC_RULE_OP1; } else if (op1_info == MAY_BE_DOUBLE) { - spec = 3421 | SPEC_RULE_OP1; + spec = 3422 | SPEC_RULE_OP1; } else if ((op->op1_type == IS_CONST) ? !Z_REFCOUNTED_P(RT_CONSTANT(op, op->op1)) : (!(op1_info & ((MAY_BE_ANY|MAY_BE_UNDEF)-(MAY_BE_NULL|MAY_BE_FALSE|MAY_BE_TRUE|MAY_BE_LONG|MAY_BE_DOUBLE))))) { - spec = 3426 | SPEC_RULE_OP1; + spec = 3427 | SPEC_RULE_OP1; } break; case ZEND_PRE_INC: if (res_info == MAY_BE_LONG && op1_info == MAY_BE_LONG) { - spec = 3404 | SPEC_RULE_RETVAL; + spec = 3405 | SPEC_RULE_RETVAL; } else if (op1_info == MAY_BE_LONG) { - spec = 3406 | SPEC_RULE_RETVAL; + spec = 3407 | SPEC_RULE_RETVAL; } break; case ZEND_PRE_DEC: if (res_info == MAY_BE_LONG && op1_info == MAY_BE_LONG) { - spec = 3408 | SPEC_RULE_RETVAL; + spec = 3409 | SPEC_RULE_RETVAL; } else if (op1_info == MAY_BE_LONG) { - spec = 3410 | SPEC_RULE_RETVAL; + spec = 3411 | SPEC_RULE_RETVAL; } break; case ZEND_POST_INC: if (res_info == MAY_BE_LONG && op1_info == MAY_BE_LONG) { - spec = 3412; - } else if (op1_info == MAY_BE_LONG) { spec = 3413; + } else if (op1_info == MAY_BE_LONG) { + spec = 3414; } break; case ZEND_POST_DEC: if (res_info == MAY_BE_LONG && op1_info == MAY_BE_LONG) { - spec = 3414; - } else if (op1_info == MAY_BE_LONG) { spec = 3415; + } else if (op1_info == MAY_BE_LONG) { + spec = 3416; } break; case ZEND_JMP: if (OP_JMP_ADDR(op, op->op1) > op) { - spec = 2568; + spec = 2569; } break; case ZEND_RECV: if (op->op2.num == MAY_BE_ANY) { - spec = 2567; + spec = 2568; } break; case ZEND_SEND_VAL: if (op->op1_type == IS_CONST && op->op2_type == IS_UNUSED && !Z_REFCOUNTED_P(RT_CONSTANT(op, op->op1))) { - spec = 3466; + spec = 3467; } break; case ZEND_SEND_VAR_EX: if (op->op2_type == IS_UNUSED && op->op2.num <= MAX_ARG_FLAG_NUM && (op1_info & (MAY_BE_UNDEF|MAY_BE_REF)) == 0) { - spec = 3461 | SPEC_RULE_OP1; + spec = 3462 | SPEC_RULE_OP1; } break; case ZEND_FE_FETCH_R: if (op->op2_type == IS_CV && (op1_info & (MAY_BE_ANY|MAY_BE_REF)) == MAY_BE_ARRAY) { - spec = 3468 | SPEC_RULE_RETVAL; + spec = 3469 | SPEC_RULE_RETVAL; } break; case ZEND_FETCH_DIM_R: @@ -65526,17 +65563,17 @@ ZEND_API void ZEND_FASTCALL zend_vm_set_opcode_handler_ex(zend_op* op, uint32_t if (op->op1_type == IS_CONST && op->op2_type == IS_CONST) { break; } - spec = 3431 | SPEC_RULE_OP1 | SPEC_RULE_OP2; + spec = 3432 | SPEC_RULE_OP1 | SPEC_RULE_OP2; } break; case ZEND_SEND_VAL_EX: if (op->op2_type == IS_UNUSED && op->op2.num <= MAX_ARG_FLAG_NUM && op->op1_type == IS_CONST && !Z_REFCOUNTED_P(RT_CONSTANT(op, op->op1))) { - spec = 3467; + spec = 3468; } break; case ZEND_SEND_VAR: if (op->op2_type == IS_UNUSED && (op1_info & (MAY_BE_UNDEF|MAY_BE_REF)) == 0) { - spec = 3456 | SPEC_RULE_OP1; + spec = 3457 | SPEC_RULE_OP1; } break; case ZEND_BW_OR: diff --git a/Zend/zend_vm_handlers.h b/Zend/zend_vm_handlers.h index fae2138ef912e..97dfeac30cae8 100644 --- a/Zend/zend_vm_handlers.h +++ b/Zend/zend_vm_handlers.h @@ -1291,7 +1291,7 @@ _(2450, ZEND_FETCH_CLASS_CONSTANT_SPEC_UNUSED_TMPVARCV) \ _(2452, ZEND_FETCH_CLASS_CONSTANT_SPEC_UNUSED_TMPVARCV) \ _(2458, ZEND_BIND_LEXICAL_SPEC_TMP_CV) \ - _(2459, ZEND_BIND_STATIC_SPEC_CV_UNUSED) \ + _(2459, ZEND_BIND_STATIC_SPEC_CV) \ _(2460, ZEND_FETCH_THIS_SPEC_UNUSED_UNUSED) \ _(2461, ZEND_SEND_FUNC_ARG_SPEC_VAR_CONST) \ _(2464, ZEND_SEND_FUNC_ARG_SPEC_VAR_UNUSED) \ @@ -1362,498 +1362,499 @@ _(2564, ZEND_FETCH_GLOBALS_SPEC_UNUSED_UNUSED) \ _(2565, ZEND_VERIFY_NEVER_TYPE_SPEC_UNUSED_UNUSED) \ _(2566, ZEND_CALLABLE_CONVERT_SPEC_UNUSED_UNUSED) \ - _(2567, ZEND_RECV_NOTYPE_SPEC) \ - _(2568, ZEND_JMP_FORWARD_SPEC) \ - _(2574, ZEND_ADD_LONG_NO_OVERFLOW_SPEC_TMPVARCV_CONST) \ - _(2575, ZEND_ADD_LONG_NO_OVERFLOW_SPEC_TMPVARCV_TMPVARCV) \ + _(2567, ZEND_BIND_INIT_STATIC_OR_JMP_SPEC_CV) \ + _(2568, ZEND_RECV_NOTYPE_SPEC) \ + _(2569, ZEND_JMP_FORWARD_SPEC) \ + _(2575, ZEND_ADD_LONG_NO_OVERFLOW_SPEC_TMPVARCV_CONST) \ _(2576, ZEND_ADD_LONG_NO_OVERFLOW_SPEC_TMPVARCV_TMPVARCV) \ - _(2578, ZEND_ADD_LONG_NO_OVERFLOW_SPEC_TMPVARCV_TMPVARCV) \ - _(2579, ZEND_ADD_LONG_NO_OVERFLOW_SPEC_TMPVARCV_CONST) \ - _(2580, ZEND_ADD_LONG_NO_OVERFLOW_SPEC_TMPVARCV_TMPVARCV) \ + _(2577, ZEND_ADD_LONG_NO_OVERFLOW_SPEC_TMPVARCV_TMPVARCV) \ + _(2579, ZEND_ADD_LONG_NO_OVERFLOW_SPEC_TMPVARCV_TMPVARCV) \ + _(2580, ZEND_ADD_LONG_NO_OVERFLOW_SPEC_TMPVARCV_CONST) \ _(2581, ZEND_ADD_LONG_NO_OVERFLOW_SPEC_TMPVARCV_TMPVARCV) \ - _(2583, ZEND_ADD_LONG_NO_OVERFLOW_SPEC_TMPVARCV_TMPVARCV) \ - _(2589, ZEND_ADD_LONG_NO_OVERFLOW_SPEC_TMPVARCV_CONST) \ - _(2590, ZEND_ADD_LONG_NO_OVERFLOW_SPEC_TMPVARCV_TMPVARCV) \ + _(2582, ZEND_ADD_LONG_NO_OVERFLOW_SPEC_TMPVARCV_TMPVARCV) \ + _(2584, ZEND_ADD_LONG_NO_OVERFLOW_SPEC_TMPVARCV_TMPVARCV) \ + _(2590, ZEND_ADD_LONG_NO_OVERFLOW_SPEC_TMPVARCV_CONST) \ _(2591, ZEND_ADD_LONG_NO_OVERFLOW_SPEC_TMPVARCV_TMPVARCV) \ - _(2593, ZEND_ADD_LONG_NO_OVERFLOW_SPEC_TMPVARCV_TMPVARCV) \ - _(2599, ZEND_ADD_LONG_SPEC_TMPVARCV_CONST) \ - _(2600, ZEND_ADD_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(2592, ZEND_ADD_LONG_NO_OVERFLOW_SPEC_TMPVARCV_TMPVARCV) \ + _(2594, ZEND_ADD_LONG_NO_OVERFLOW_SPEC_TMPVARCV_TMPVARCV) \ + _(2600, ZEND_ADD_LONG_SPEC_TMPVARCV_CONST) \ _(2601, ZEND_ADD_LONG_SPEC_TMPVARCV_TMPVARCV) \ - _(2603, ZEND_ADD_LONG_SPEC_TMPVARCV_TMPVARCV) \ - _(2604, ZEND_ADD_LONG_SPEC_TMPVARCV_CONST) \ - _(2605, ZEND_ADD_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(2602, ZEND_ADD_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(2604, ZEND_ADD_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(2605, ZEND_ADD_LONG_SPEC_TMPVARCV_CONST) \ _(2606, ZEND_ADD_LONG_SPEC_TMPVARCV_TMPVARCV) \ - _(2608, ZEND_ADD_LONG_SPEC_TMPVARCV_TMPVARCV) \ - _(2614, ZEND_ADD_LONG_SPEC_TMPVARCV_CONST) \ - _(2615, ZEND_ADD_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(2607, ZEND_ADD_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(2609, ZEND_ADD_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(2615, ZEND_ADD_LONG_SPEC_TMPVARCV_CONST) \ _(2616, ZEND_ADD_LONG_SPEC_TMPVARCV_TMPVARCV) \ - _(2618, ZEND_ADD_LONG_SPEC_TMPVARCV_TMPVARCV) \ - _(2624, ZEND_ADD_DOUBLE_SPEC_TMPVARCV_CONST) \ - _(2625, ZEND_ADD_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(2617, ZEND_ADD_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(2619, ZEND_ADD_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(2625, ZEND_ADD_DOUBLE_SPEC_TMPVARCV_CONST) \ _(2626, ZEND_ADD_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ - _(2628, ZEND_ADD_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ - _(2629, ZEND_ADD_DOUBLE_SPEC_TMPVARCV_CONST) \ - _(2630, ZEND_ADD_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(2627, ZEND_ADD_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(2629, ZEND_ADD_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(2630, ZEND_ADD_DOUBLE_SPEC_TMPVARCV_CONST) \ _(2631, ZEND_ADD_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ - _(2633, ZEND_ADD_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ - _(2639, ZEND_ADD_DOUBLE_SPEC_TMPVARCV_CONST) \ - _(2640, ZEND_ADD_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(2632, ZEND_ADD_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(2634, ZEND_ADD_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(2640, ZEND_ADD_DOUBLE_SPEC_TMPVARCV_CONST) \ _(2641, ZEND_ADD_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ - _(2643, ZEND_ADD_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ - _(2645, ZEND_SUB_LONG_NO_OVERFLOW_SPEC_CONST_TMPVARCV) \ + _(2642, ZEND_ADD_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(2644, ZEND_ADD_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ _(2646, ZEND_SUB_LONG_NO_OVERFLOW_SPEC_CONST_TMPVARCV) \ - _(2648, ZEND_SUB_LONG_NO_OVERFLOW_SPEC_CONST_TMPVARCV) \ - _(2649, ZEND_SUB_LONG_NO_OVERFLOW_SPEC_TMPVARCV_CONST) \ - _(2650, ZEND_SUB_LONG_NO_OVERFLOW_SPEC_TMPVARCV_TMPVARCV) \ + _(2647, ZEND_SUB_LONG_NO_OVERFLOW_SPEC_CONST_TMPVARCV) \ + _(2649, ZEND_SUB_LONG_NO_OVERFLOW_SPEC_CONST_TMPVARCV) \ + _(2650, ZEND_SUB_LONG_NO_OVERFLOW_SPEC_TMPVARCV_CONST) \ _(2651, ZEND_SUB_LONG_NO_OVERFLOW_SPEC_TMPVARCV_TMPVARCV) \ - _(2653, ZEND_SUB_LONG_NO_OVERFLOW_SPEC_TMPVARCV_TMPVARCV) \ - _(2654, ZEND_SUB_LONG_NO_OVERFLOW_SPEC_TMPVARCV_CONST) \ - _(2655, ZEND_SUB_LONG_NO_OVERFLOW_SPEC_TMPVARCV_TMPVARCV) \ + _(2652, ZEND_SUB_LONG_NO_OVERFLOW_SPEC_TMPVARCV_TMPVARCV) \ + _(2654, ZEND_SUB_LONG_NO_OVERFLOW_SPEC_TMPVARCV_TMPVARCV) \ + _(2655, ZEND_SUB_LONG_NO_OVERFLOW_SPEC_TMPVARCV_CONST) \ _(2656, ZEND_SUB_LONG_NO_OVERFLOW_SPEC_TMPVARCV_TMPVARCV) \ - _(2658, ZEND_SUB_LONG_NO_OVERFLOW_SPEC_TMPVARCV_TMPVARCV) \ - _(2664, ZEND_SUB_LONG_NO_OVERFLOW_SPEC_TMPVARCV_CONST) \ - _(2665, ZEND_SUB_LONG_NO_OVERFLOW_SPEC_TMPVARCV_TMPVARCV) \ + _(2657, ZEND_SUB_LONG_NO_OVERFLOW_SPEC_TMPVARCV_TMPVARCV) \ + _(2659, ZEND_SUB_LONG_NO_OVERFLOW_SPEC_TMPVARCV_TMPVARCV) \ + _(2665, ZEND_SUB_LONG_NO_OVERFLOW_SPEC_TMPVARCV_CONST) \ _(2666, ZEND_SUB_LONG_NO_OVERFLOW_SPEC_TMPVARCV_TMPVARCV) \ - _(2668, ZEND_SUB_LONG_NO_OVERFLOW_SPEC_TMPVARCV_TMPVARCV) \ - _(2670, ZEND_SUB_LONG_SPEC_CONST_TMPVARCV) \ + _(2667, ZEND_SUB_LONG_NO_OVERFLOW_SPEC_TMPVARCV_TMPVARCV) \ + _(2669, ZEND_SUB_LONG_NO_OVERFLOW_SPEC_TMPVARCV_TMPVARCV) \ _(2671, ZEND_SUB_LONG_SPEC_CONST_TMPVARCV) \ - _(2673, ZEND_SUB_LONG_SPEC_CONST_TMPVARCV) \ - _(2674, ZEND_SUB_LONG_SPEC_TMPVARCV_CONST) \ - _(2675, ZEND_SUB_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(2672, ZEND_SUB_LONG_SPEC_CONST_TMPVARCV) \ + _(2674, ZEND_SUB_LONG_SPEC_CONST_TMPVARCV) \ + _(2675, ZEND_SUB_LONG_SPEC_TMPVARCV_CONST) \ _(2676, ZEND_SUB_LONG_SPEC_TMPVARCV_TMPVARCV) \ - _(2678, ZEND_SUB_LONG_SPEC_TMPVARCV_TMPVARCV) \ - _(2679, ZEND_SUB_LONG_SPEC_TMPVARCV_CONST) \ - _(2680, ZEND_SUB_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(2677, ZEND_SUB_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(2679, ZEND_SUB_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(2680, ZEND_SUB_LONG_SPEC_TMPVARCV_CONST) \ _(2681, ZEND_SUB_LONG_SPEC_TMPVARCV_TMPVARCV) \ - _(2683, ZEND_SUB_LONG_SPEC_TMPVARCV_TMPVARCV) \ - _(2689, ZEND_SUB_LONG_SPEC_TMPVARCV_CONST) \ - _(2690, ZEND_SUB_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(2682, ZEND_SUB_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(2684, ZEND_SUB_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(2690, ZEND_SUB_LONG_SPEC_TMPVARCV_CONST) \ _(2691, ZEND_SUB_LONG_SPEC_TMPVARCV_TMPVARCV) \ - _(2693, ZEND_SUB_LONG_SPEC_TMPVARCV_TMPVARCV) \ - _(2695, ZEND_SUB_DOUBLE_SPEC_CONST_TMPVARCV) \ + _(2692, ZEND_SUB_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(2694, ZEND_SUB_LONG_SPEC_TMPVARCV_TMPVARCV) \ _(2696, ZEND_SUB_DOUBLE_SPEC_CONST_TMPVARCV) \ - _(2698, ZEND_SUB_DOUBLE_SPEC_CONST_TMPVARCV) \ - _(2699, ZEND_SUB_DOUBLE_SPEC_TMPVARCV_CONST) \ - _(2700, ZEND_SUB_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(2697, ZEND_SUB_DOUBLE_SPEC_CONST_TMPVARCV) \ + _(2699, ZEND_SUB_DOUBLE_SPEC_CONST_TMPVARCV) \ + _(2700, ZEND_SUB_DOUBLE_SPEC_TMPVARCV_CONST) \ _(2701, ZEND_SUB_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ - _(2703, ZEND_SUB_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ - _(2704, ZEND_SUB_DOUBLE_SPEC_TMPVARCV_CONST) \ - _(2705, ZEND_SUB_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(2702, ZEND_SUB_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(2704, ZEND_SUB_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(2705, ZEND_SUB_DOUBLE_SPEC_TMPVARCV_CONST) \ _(2706, ZEND_SUB_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ - _(2708, ZEND_SUB_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ - _(2714, ZEND_SUB_DOUBLE_SPEC_TMPVARCV_CONST) \ - _(2715, ZEND_SUB_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(2707, ZEND_SUB_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(2709, ZEND_SUB_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(2715, ZEND_SUB_DOUBLE_SPEC_TMPVARCV_CONST) \ _(2716, ZEND_SUB_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ - _(2718, ZEND_SUB_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ - _(2724, ZEND_MUL_LONG_NO_OVERFLOW_SPEC_TMPVARCV_CONST) \ - _(2725, ZEND_MUL_LONG_NO_OVERFLOW_SPEC_TMPVARCV_TMPVARCV) \ + _(2717, ZEND_SUB_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(2719, ZEND_SUB_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(2725, ZEND_MUL_LONG_NO_OVERFLOW_SPEC_TMPVARCV_CONST) \ _(2726, ZEND_MUL_LONG_NO_OVERFLOW_SPEC_TMPVARCV_TMPVARCV) \ - _(2728, ZEND_MUL_LONG_NO_OVERFLOW_SPEC_TMPVARCV_TMPVARCV) \ - _(2729, ZEND_MUL_LONG_NO_OVERFLOW_SPEC_TMPVARCV_CONST) \ - _(2730, ZEND_MUL_LONG_NO_OVERFLOW_SPEC_TMPVARCV_TMPVARCV) \ + _(2727, ZEND_MUL_LONG_NO_OVERFLOW_SPEC_TMPVARCV_TMPVARCV) \ + _(2729, ZEND_MUL_LONG_NO_OVERFLOW_SPEC_TMPVARCV_TMPVARCV) \ + _(2730, ZEND_MUL_LONG_NO_OVERFLOW_SPEC_TMPVARCV_CONST) \ _(2731, ZEND_MUL_LONG_NO_OVERFLOW_SPEC_TMPVARCV_TMPVARCV) \ - _(2733, ZEND_MUL_LONG_NO_OVERFLOW_SPEC_TMPVARCV_TMPVARCV) \ - _(2739, ZEND_MUL_LONG_NO_OVERFLOW_SPEC_TMPVARCV_CONST) \ - _(2740, ZEND_MUL_LONG_NO_OVERFLOW_SPEC_TMPVARCV_TMPVARCV) \ + _(2732, ZEND_MUL_LONG_NO_OVERFLOW_SPEC_TMPVARCV_TMPVARCV) \ + _(2734, ZEND_MUL_LONG_NO_OVERFLOW_SPEC_TMPVARCV_TMPVARCV) \ + _(2740, ZEND_MUL_LONG_NO_OVERFLOW_SPEC_TMPVARCV_CONST) \ _(2741, ZEND_MUL_LONG_NO_OVERFLOW_SPEC_TMPVARCV_TMPVARCV) \ - _(2743, ZEND_MUL_LONG_NO_OVERFLOW_SPEC_TMPVARCV_TMPVARCV) \ - _(2749, ZEND_MUL_LONG_SPEC_TMPVARCV_CONST) \ - _(2750, ZEND_MUL_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(2742, ZEND_MUL_LONG_NO_OVERFLOW_SPEC_TMPVARCV_TMPVARCV) \ + _(2744, ZEND_MUL_LONG_NO_OVERFLOW_SPEC_TMPVARCV_TMPVARCV) \ + _(2750, ZEND_MUL_LONG_SPEC_TMPVARCV_CONST) \ _(2751, ZEND_MUL_LONG_SPEC_TMPVARCV_TMPVARCV) \ - _(2753, ZEND_MUL_LONG_SPEC_TMPVARCV_TMPVARCV) \ - _(2754, ZEND_MUL_LONG_SPEC_TMPVARCV_CONST) \ - _(2755, ZEND_MUL_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(2752, ZEND_MUL_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(2754, ZEND_MUL_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(2755, ZEND_MUL_LONG_SPEC_TMPVARCV_CONST) \ _(2756, ZEND_MUL_LONG_SPEC_TMPVARCV_TMPVARCV) \ - _(2758, ZEND_MUL_LONG_SPEC_TMPVARCV_TMPVARCV) \ - _(2764, ZEND_MUL_LONG_SPEC_TMPVARCV_CONST) \ - _(2765, ZEND_MUL_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(2757, ZEND_MUL_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(2759, ZEND_MUL_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(2765, ZEND_MUL_LONG_SPEC_TMPVARCV_CONST) \ _(2766, ZEND_MUL_LONG_SPEC_TMPVARCV_TMPVARCV) \ - _(2768, ZEND_MUL_LONG_SPEC_TMPVARCV_TMPVARCV) \ - _(2774, ZEND_MUL_DOUBLE_SPEC_TMPVARCV_CONST) \ - _(2775, ZEND_MUL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(2767, ZEND_MUL_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(2769, ZEND_MUL_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(2775, ZEND_MUL_DOUBLE_SPEC_TMPVARCV_CONST) \ _(2776, ZEND_MUL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ - _(2778, ZEND_MUL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ - _(2779, ZEND_MUL_DOUBLE_SPEC_TMPVARCV_CONST) \ - _(2780, ZEND_MUL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(2777, ZEND_MUL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(2779, ZEND_MUL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(2780, ZEND_MUL_DOUBLE_SPEC_TMPVARCV_CONST) \ _(2781, ZEND_MUL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ - _(2783, ZEND_MUL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ - _(2789, ZEND_MUL_DOUBLE_SPEC_TMPVARCV_CONST) \ - _(2790, ZEND_MUL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(2782, ZEND_MUL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(2784, ZEND_MUL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(2790, ZEND_MUL_DOUBLE_SPEC_TMPVARCV_CONST) \ _(2791, ZEND_MUL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ - _(2793, ZEND_MUL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ - _(2809, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_CONST) \ - _(2810, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_CONST_JMPZ) \ - _(2811, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_CONST_JMPNZ) \ - _(2812, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV) \ - _(2813, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(2814, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(2815, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV) \ - _(2816, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(2817, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(2821, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV) \ - _(2822, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(2823, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(2824, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_CONST) \ - _(2825, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_CONST_JMPZ) \ - _(2826, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_CONST_JMPNZ) \ - _(2827, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV) \ - _(2828, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(2829, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(2830, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV) \ - _(2831, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(2832, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(2836, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV) \ - _(2837, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(2838, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(2854, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_CONST) \ - _(2855, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_CONST_JMPZ) \ - _(2856, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_CONST_JMPNZ) \ - _(2857, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV) \ - _(2858, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(2859, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(2860, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV) \ - _(2861, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(2862, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(2866, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV) \ - _(2867, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(2868, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(2884, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_CONST) \ - _(2885, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_CONST_JMPZ) \ - _(2886, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_CONST_JMPNZ) \ - _(2887, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ - _(2888, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(2889, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(2890, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ - _(2891, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(2892, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(2896, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ - _(2897, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(2898, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(2899, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_CONST) \ - _(2900, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_CONST_JMPZ) \ - _(2901, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_CONST_JMPNZ) \ - _(2902, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ - _(2903, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(2904, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(2905, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ - _(2906, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(2907, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(2911, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ - _(2912, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(2913, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(2929, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_CONST) \ - _(2930, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_CONST_JMPZ) \ - _(2931, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_CONST_JMPNZ) \ - _(2932, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ - _(2933, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(2934, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(2935, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ - _(2936, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(2937, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(2941, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ - _(2942, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(2943, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(2959, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_CONST) \ - _(2960, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_CONST_JMPZ) \ - _(2961, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_CONST_JMPNZ) \ - _(2962, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV) \ - _(2963, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(2964, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(2965, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV) \ - _(2966, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(2967, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(2971, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV) \ - _(2972, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(2973, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(2974, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_CONST) \ - _(2975, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_CONST_JMPZ) \ - _(2976, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_CONST_JMPNZ) \ - _(2977, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV) \ - _(2978, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(2979, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(2980, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV) \ - _(2981, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(2982, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(2986, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV) \ - _(2987, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(2988, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(3004, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_CONST) \ - _(3005, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_CONST_JMPZ) \ - _(3006, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_CONST_JMPNZ) \ - _(3007, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV) \ - _(3008, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(3009, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(3010, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV) \ - _(3011, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(3012, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(3016, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV) \ - _(3017, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(3018, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(3034, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_CONST) \ - _(3035, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_CONST_JMPZ) \ - _(3036, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_CONST_JMPNZ) \ - _(3037, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ - _(3038, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(3039, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(3040, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ - _(3041, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(3042, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(3046, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ - _(3047, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(3048, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(3049, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_CONST) \ - _(3050, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_CONST_JMPZ) \ - _(3051, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_CONST_JMPNZ) \ - _(3052, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ - _(3053, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(3054, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(3055, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ - _(3056, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(3057, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(3061, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ - _(3062, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(3063, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(3079, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_CONST) \ - _(3080, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_CONST_JMPZ) \ - _(3081, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_CONST_JMPNZ) \ - _(3082, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ - _(3083, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(3084, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(3085, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ - _(3086, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(3087, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(3091, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ - _(3092, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(3093, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(3094, ZEND_IS_IDENTICAL_NOTHROW_SPEC_CV_CONST) \ - _(3098, ZEND_IS_IDENTICAL_NOTHROW_SPEC_CV_CV) \ - _(3099, ZEND_IS_NOT_IDENTICAL_NOTHROW_SPEC_CV_CONST) \ - _(3103, ZEND_IS_NOT_IDENTICAL_NOTHROW_SPEC_CV_CV) \ - _(3107, ZEND_IS_SMALLER_LONG_SPEC_CONST_TMPVARCV) \ - _(3108, ZEND_IS_SMALLER_LONG_SPEC_CONST_TMPVARCV_JMPZ) \ - _(3109, ZEND_IS_SMALLER_LONG_SPEC_CONST_TMPVARCV_JMPNZ) \ - _(3110, ZEND_IS_SMALLER_LONG_SPEC_CONST_TMPVARCV) \ - _(3111, ZEND_IS_SMALLER_LONG_SPEC_CONST_TMPVARCV_JMPZ) \ - _(3112, ZEND_IS_SMALLER_LONG_SPEC_CONST_TMPVARCV_JMPNZ) \ - _(3116, ZEND_IS_SMALLER_LONG_SPEC_CONST_TMPVARCV) \ - _(3117, ZEND_IS_SMALLER_LONG_SPEC_CONST_TMPVARCV_JMPZ) \ - _(3118, ZEND_IS_SMALLER_LONG_SPEC_CONST_TMPVARCV_JMPNZ) \ - _(3119, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_CONST) \ - _(3120, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_CONST_JMPZ) \ - _(3121, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_CONST_JMPNZ) \ - _(3122, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_TMPVARCV) \ - _(3123, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(3124, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(3125, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_TMPVARCV) \ - _(3126, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(3127, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(3131, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_TMPVARCV) \ - _(3132, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(3133, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(3134, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_CONST) \ - _(3135, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_CONST_JMPZ) \ - _(3136, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_CONST_JMPNZ) \ - _(3137, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_TMPVARCV) \ - _(3138, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(3139, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(3140, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_TMPVARCV) \ - _(3141, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(3142, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(3146, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_TMPVARCV) \ - _(3147, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(3148, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(3164, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_CONST) \ - _(3165, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_CONST_JMPZ) \ - _(3166, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_CONST_JMPNZ) \ - _(3167, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_TMPVARCV) \ - _(3168, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(3169, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(3170, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_TMPVARCV) \ - _(3171, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(3172, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(3176, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_TMPVARCV) \ - _(3177, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(3178, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(3182, ZEND_IS_SMALLER_DOUBLE_SPEC_CONST_TMPVARCV) \ - _(3183, ZEND_IS_SMALLER_DOUBLE_SPEC_CONST_TMPVARCV_JMPZ) \ - _(3184, ZEND_IS_SMALLER_DOUBLE_SPEC_CONST_TMPVARCV_JMPNZ) \ - _(3185, ZEND_IS_SMALLER_DOUBLE_SPEC_CONST_TMPVARCV) \ - _(3186, ZEND_IS_SMALLER_DOUBLE_SPEC_CONST_TMPVARCV_JMPZ) \ - _(3187, ZEND_IS_SMALLER_DOUBLE_SPEC_CONST_TMPVARCV_JMPNZ) \ - _(3191, ZEND_IS_SMALLER_DOUBLE_SPEC_CONST_TMPVARCV) \ - _(3192, ZEND_IS_SMALLER_DOUBLE_SPEC_CONST_TMPVARCV_JMPZ) \ - _(3193, ZEND_IS_SMALLER_DOUBLE_SPEC_CONST_TMPVARCV_JMPNZ) \ - _(3194, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_CONST) \ - _(3195, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_CONST_JMPZ) \ - _(3196, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_CONST_JMPNZ) \ - _(3197, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ - _(3198, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(3199, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(3200, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ - _(3201, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(3202, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(3206, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ - _(3207, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(3208, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(3209, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_CONST) \ - _(3210, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_CONST_JMPZ) \ - _(3211, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_CONST_JMPNZ) \ - _(3212, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ - _(3213, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(3214, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(3215, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ - _(3216, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(3217, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(3221, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ - _(3222, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(3223, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(3239, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_CONST) \ - _(3240, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_CONST_JMPZ) \ - _(3241, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_CONST_JMPNZ) \ - _(3242, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ - _(3243, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(3244, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(3245, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ - _(3246, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(3247, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(3251, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ - _(3252, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(3253, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(3257, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_CONST_TMPVARCV) \ - _(3258, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_CONST_TMPVARCV_JMPZ) \ - _(3259, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_CONST_TMPVARCV_JMPNZ) \ - _(3260, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_CONST_TMPVARCV) \ - _(3261, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_CONST_TMPVARCV_JMPZ) \ - _(3262, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_CONST_TMPVARCV_JMPNZ) \ - _(3266, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_CONST_TMPVARCV) \ - _(3267, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_CONST_TMPVARCV_JMPZ) \ - _(3268, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_CONST_TMPVARCV_JMPNZ) \ - _(3269, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_CONST) \ - _(3270, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_CONST_JMPZ) \ - _(3271, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_CONST_JMPNZ) \ - _(3272, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV) \ - _(3273, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(3274, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(3275, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV) \ - _(3276, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(3277, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(3281, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV) \ - _(3282, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(3283, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(3284, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_CONST) \ - _(3285, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_CONST_JMPZ) \ - _(3286, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_CONST_JMPNZ) \ - _(3287, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV) \ - _(3288, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(3289, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(3290, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV) \ - _(3291, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(3292, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(3296, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV) \ - _(3297, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(3298, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(3314, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_CONST) \ - _(3315, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_CONST_JMPZ) \ - _(3316, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_CONST_JMPNZ) \ - _(3317, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV) \ - _(3318, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(3319, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(3320, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV) \ - _(3321, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(3322, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(3326, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV) \ - _(3327, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(3328, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(3332, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_CONST_TMPVARCV) \ - _(3333, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_CONST_TMPVARCV_JMPZ) \ - _(3334, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_CONST_TMPVARCV_JMPNZ) \ - _(3335, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_CONST_TMPVARCV) \ - _(3336, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_CONST_TMPVARCV_JMPZ) \ - _(3337, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_CONST_TMPVARCV_JMPNZ) \ - _(3341, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_CONST_TMPVARCV) \ - _(3342, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_CONST_TMPVARCV_JMPZ) \ - _(3343, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_CONST_TMPVARCV_JMPNZ) \ - _(3344, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_CONST) \ - _(3345, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_CONST_JMPZ) \ - _(3346, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_CONST_JMPNZ) \ - _(3347, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ - _(3348, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(3349, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(3350, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ - _(3351, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(3352, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(3356, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ - _(3357, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(3358, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(3359, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_CONST) \ - _(3360, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_CONST_JMPZ) \ - _(3361, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_CONST_JMPNZ) \ - _(3362, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ - _(3363, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(3364, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(3365, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ - _(3366, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(3367, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(3371, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ - _(3372, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(3373, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(3389, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_CONST) \ - _(3390, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_CONST_JMPZ) \ - _(3391, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_CONST_JMPNZ) \ - _(3392, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ - _(3393, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(3394, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(3395, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ - _(3396, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(3397, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(3401, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ - _(3402, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(3403, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(3404, ZEND_PRE_INC_LONG_NO_OVERFLOW_SPEC_CV_RETVAL_UNUSED) \ - _(3405, ZEND_PRE_INC_LONG_NO_OVERFLOW_SPEC_CV_RETVAL_USED) \ - _(3406, ZEND_PRE_INC_LONG_SPEC_CV_RETVAL_UNUSED) \ - _(3407, ZEND_PRE_INC_LONG_SPEC_CV_RETVAL_USED) \ - _(3408, ZEND_PRE_DEC_LONG_NO_OVERFLOW_SPEC_CV_RETVAL_UNUSED) \ - _(3409, ZEND_PRE_DEC_LONG_NO_OVERFLOW_SPEC_CV_RETVAL_USED) \ - _(3410, ZEND_PRE_DEC_LONG_SPEC_CV_RETVAL_UNUSED) \ - _(3411, ZEND_PRE_DEC_LONG_SPEC_CV_RETVAL_USED) \ - _(3412, ZEND_POST_INC_LONG_NO_OVERFLOW_SPEC_CV) \ - _(3413, ZEND_POST_INC_LONG_SPEC_CV) \ - _(3414, ZEND_POST_DEC_LONG_NO_OVERFLOW_SPEC_CV) \ - _(3415, ZEND_POST_DEC_LONG_SPEC_CV) \ - _(3416, ZEND_QM_ASSIGN_LONG_SPEC_CONST) \ - _(3417, ZEND_QM_ASSIGN_LONG_SPEC_TMPVARCV) \ + _(2792, ZEND_MUL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(2794, ZEND_MUL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(2810, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_CONST) \ + _(2811, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_CONST_JMPZ) \ + _(2812, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_CONST_JMPNZ) \ + _(2813, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(2814, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(2815, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(2816, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(2817, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(2818, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(2822, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(2823, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(2824, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(2825, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_CONST) \ + _(2826, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_CONST_JMPZ) \ + _(2827, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_CONST_JMPNZ) \ + _(2828, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(2829, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(2830, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(2831, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(2832, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(2833, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(2837, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(2838, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(2839, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(2855, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_CONST) \ + _(2856, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_CONST_JMPZ) \ + _(2857, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_CONST_JMPNZ) \ + _(2858, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(2859, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(2860, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(2861, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(2862, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(2863, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(2867, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(2868, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(2869, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(2885, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_CONST) \ + _(2886, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_CONST_JMPZ) \ + _(2887, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_CONST_JMPNZ) \ + _(2888, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(2889, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(2890, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(2891, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(2892, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(2893, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(2897, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(2898, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(2899, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(2900, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_CONST) \ + _(2901, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_CONST_JMPZ) \ + _(2902, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_CONST_JMPNZ) \ + _(2903, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(2904, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(2905, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(2906, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(2907, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(2908, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(2912, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(2913, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(2914, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(2930, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_CONST) \ + _(2931, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_CONST_JMPZ) \ + _(2932, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_CONST_JMPNZ) \ + _(2933, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(2934, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(2935, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(2936, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(2937, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(2938, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(2942, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(2943, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(2944, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(2960, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_CONST) \ + _(2961, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_CONST_JMPZ) \ + _(2962, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_CONST_JMPNZ) \ + _(2963, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(2964, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(2965, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(2966, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(2967, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(2968, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(2972, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(2973, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(2974, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(2975, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_CONST) \ + _(2976, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_CONST_JMPZ) \ + _(2977, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_CONST_JMPNZ) \ + _(2978, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(2979, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(2980, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(2981, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(2982, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(2983, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(2987, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(2988, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(2989, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(3005, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_CONST) \ + _(3006, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_CONST_JMPZ) \ + _(3007, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_CONST_JMPNZ) \ + _(3008, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(3009, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(3010, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(3011, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(3012, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(3013, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(3017, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(3018, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(3019, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(3035, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_CONST) \ + _(3036, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_CONST_JMPZ) \ + _(3037, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_CONST_JMPNZ) \ + _(3038, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(3039, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(3040, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(3041, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(3042, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(3043, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(3047, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(3048, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(3049, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(3050, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_CONST) \ + _(3051, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_CONST_JMPZ) \ + _(3052, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_CONST_JMPNZ) \ + _(3053, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(3054, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(3055, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(3056, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(3057, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(3058, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(3062, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(3063, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(3064, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(3080, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_CONST) \ + _(3081, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_CONST_JMPZ) \ + _(3082, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_CONST_JMPNZ) \ + _(3083, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(3084, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(3085, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(3086, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(3087, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(3088, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(3092, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(3093, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(3094, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(3095, ZEND_IS_IDENTICAL_NOTHROW_SPEC_CV_CONST) \ + _(3099, ZEND_IS_IDENTICAL_NOTHROW_SPEC_CV_CV) \ + _(3100, ZEND_IS_NOT_IDENTICAL_NOTHROW_SPEC_CV_CONST) \ + _(3104, ZEND_IS_NOT_IDENTICAL_NOTHROW_SPEC_CV_CV) \ + _(3108, ZEND_IS_SMALLER_LONG_SPEC_CONST_TMPVARCV) \ + _(3109, ZEND_IS_SMALLER_LONG_SPEC_CONST_TMPVARCV_JMPZ) \ + _(3110, ZEND_IS_SMALLER_LONG_SPEC_CONST_TMPVARCV_JMPNZ) \ + _(3111, ZEND_IS_SMALLER_LONG_SPEC_CONST_TMPVARCV) \ + _(3112, ZEND_IS_SMALLER_LONG_SPEC_CONST_TMPVARCV_JMPZ) \ + _(3113, ZEND_IS_SMALLER_LONG_SPEC_CONST_TMPVARCV_JMPNZ) \ + _(3117, ZEND_IS_SMALLER_LONG_SPEC_CONST_TMPVARCV) \ + _(3118, ZEND_IS_SMALLER_LONG_SPEC_CONST_TMPVARCV_JMPZ) \ + _(3119, ZEND_IS_SMALLER_LONG_SPEC_CONST_TMPVARCV_JMPNZ) \ + _(3120, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_CONST) \ + _(3121, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_CONST_JMPZ) \ + _(3122, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_CONST_JMPNZ) \ + _(3123, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(3124, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(3125, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(3126, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(3127, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(3128, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(3132, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(3133, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(3134, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(3135, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_CONST) \ + _(3136, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_CONST_JMPZ) \ + _(3137, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_CONST_JMPNZ) \ + _(3138, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(3139, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(3140, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(3141, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(3142, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(3143, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(3147, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(3148, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(3149, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(3165, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_CONST) \ + _(3166, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_CONST_JMPZ) \ + _(3167, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_CONST_JMPNZ) \ + _(3168, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(3169, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(3170, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(3171, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(3172, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(3173, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(3177, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(3178, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(3179, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(3183, ZEND_IS_SMALLER_DOUBLE_SPEC_CONST_TMPVARCV) \ + _(3184, ZEND_IS_SMALLER_DOUBLE_SPEC_CONST_TMPVARCV_JMPZ) \ + _(3185, ZEND_IS_SMALLER_DOUBLE_SPEC_CONST_TMPVARCV_JMPNZ) \ + _(3186, ZEND_IS_SMALLER_DOUBLE_SPEC_CONST_TMPVARCV) \ + _(3187, ZEND_IS_SMALLER_DOUBLE_SPEC_CONST_TMPVARCV_JMPZ) \ + _(3188, ZEND_IS_SMALLER_DOUBLE_SPEC_CONST_TMPVARCV_JMPNZ) \ + _(3192, ZEND_IS_SMALLER_DOUBLE_SPEC_CONST_TMPVARCV) \ + _(3193, ZEND_IS_SMALLER_DOUBLE_SPEC_CONST_TMPVARCV_JMPZ) \ + _(3194, ZEND_IS_SMALLER_DOUBLE_SPEC_CONST_TMPVARCV_JMPNZ) \ + _(3195, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_CONST) \ + _(3196, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_CONST_JMPZ) \ + _(3197, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_CONST_JMPNZ) \ + _(3198, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(3199, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(3200, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(3201, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(3202, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(3203, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(3207, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(3208, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(3209, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(3210, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_CONST) \ + _(3211, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_CONST_JMPZ) \ + _(3212, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_CONST_JMPNZ) \ + _(3213, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(3214, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(3215, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(3216, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(3217, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(3218, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(3222, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(3223, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(3224, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(3240, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_CONST) \ + _(3241, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_CONST_JMPZ) \ + _(3242, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_CONST_JMPNZ) \ + _(3243, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(3244, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(3245, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(3246, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(3247, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(3248, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(3252, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(3253, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(3254, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(3258, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_CONST_TMPVARCV) \ + _(3259, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_CONST_TMPVARCV_JMPZ) \ + _(3260, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_CONST_TMPVARCV_JMPNZ) \ + _(3261, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_CONST_TMPVARCV) \ + _(3262, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_CONST_TMPVARCV_JMPZ) \ + _(3263, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_CONST_TMPVARCV_JMPNZ) \ + _(3267, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_CONST_TMPVARCV) \ + _(3268, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_CONST_TMPVARCV_JMPZ) \ + _(3269, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_CONST_TMPVARCV_JMPNZ) \ + _(3270, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_CONST) \ + _(3271, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_CONST_JMPZ) \ + _(3272, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_CONST_JMPNZ) \ + _(3273, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(3274, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(3275, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(3276, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(3277, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(3278, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(3282, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(3283, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(3284, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(3285, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_CONST) \ + _(3286, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_CONST_JMPZ) \ + _(3287, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_CONST_JMPNZ) \ + _(3288, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(3289, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(3290, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(3291, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(3292, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(3293, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(3297, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(3298, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(3299, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(3315, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_CONST) \ + _(3316, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_CONST_JMPZ) \ + _(3317, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_CONST_JMPNZ) \ + _(3318, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(3319, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(3320, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(3321, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(3322, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(3323, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(3327, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(3328, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(3329, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(3333, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_CONST_TMPVARCV) \ + _(3334, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_CONST_TMPVARCV_JMPZ) \ + _(3335, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_CONST_TMPVARCV_JMPNZ) \ + _(3336, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_CONST_TMPVARCV) \ + _(3337, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_CONST_TMPVARCV_JMPZ) \ + _(3338, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_CONST_TMPVARCV_JMPNZ) \ + _(3342, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_CONST_TMPVARCV) \ + _(3343, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_CONST_TMPVARCV_JMPZ) \ + _(3344, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_CONST_TMPVARCV_JMPNZ) \ + _(3345, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_CONST) \ + _(3346, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_CONST_JMPZ) \ + _(3347, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_CONST_JMPNZ) \ + _(3348, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(3349, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(3350, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(3351, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(3352, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(3353, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(3357, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(3358, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(3359, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(3360, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_CONST) \ + _(3361, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_CONST_JMPZ) \ + _(3362, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_CONST_JMPNZ) \ + _(3363, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(3364, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(3365, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(3366, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(3367, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(3368, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(3372, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(3373, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(3374, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(3390, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_CONST) \ + _(3391, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_CONST_JMPZ) \ + _(3392, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_CONST_JMPNZ) \ + _(3393, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(3394, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(3395, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(3396, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(3397, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(3398, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(3402, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(3403, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(3404, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(3405, ZEND_PRE_INC_LONG_NO_OVERFLOW_SPEC_CV_RETVAL_UNUSED) \ + _(3406, ZEND_PRE_INC_LONG_NO_OVERFLOW_SPEC_CV_RETVAL_USED) \ + _(3407, ZEND_PRE_INC_LONG_SPEC_CV_RETVAL_UNUSED) \ + _(3408, ZEND_PRE_INC_LONG_SPEC_CV_RETVAL_USED) \ + _(3409, ZEND_PRE_DEC_LONG_NO_OVERFLOW_SPEC_CV_RETVAL_UNUSED) \ + _(3410, ZEND_PRE_DEC_LONG_NO_OVERFLOW_SPEC_CV_RETVAL_USED) \ + _(3411, ZEND_PRE_DEC_LONG_SPEC_CV_RETVAL_UNUSED) \ + _(3412, ZEND_PRE_DEC_LONG_SPEC_CV_RETVAL_USED) \ + _(3413, ZEND_POST_INC_LONG_NO_OVERFLOW_SPEC_CV) \ + _(3414, ZEND_POST_INC_LONG_SPEC_CV) \ + _(3415, ZEND_POST_DEC_LONG_NO_OVERFLOW_SPEC_CV) \ + _(3416, ZEND_POST_DEC_LONG_SPEC_CV) \ + _(3417, ZEND_QM_ASSIGN_LONG_SPEC_CONST) \ _(3418, ZEND_QM_ASSIGN_LONG_SPEC_TMPVARCV) \ - _(3420, ZEND_QM_ASSIGN_LONG_SPEC_TMPVARCV) \ - _(3421, ZEND_QM_ASSIGN_DOUBLE_SPEC_CONST) \ - _(3422, ZEND_QM_ASSIGN_DOUBLE_SPEC_TMPVARCV) \ + _(3419, ZEND_QM_ASSIGN_LONG_SPEC_TMPVARCV) \ + _(3421, ZEND_QM_ASSIGN_LONG_SPEC_TMPVARCV) \ + _(3422, ZEND_QM_ASSIGN_DOUBLE_SPEC_CONST) \ _(3423, ZEND_QM_ASSIGN_DOUBLE_SPEC_TMPVARCV) \ - _(3425, ZEND_QM_ASSIGN_DOUBLE_SPEC_TMPVARCV) \ - _(3426, ZEND_QM_ASSIGN_NOREF_SPEC_CONST) \ - _(3427, ZEND_QM_ASSIGN_NOREF_SPEC_TMPVARCV) \ + _(3424, ZEND_QM_ASSIGN_DOUBLE_SPEC_TMPVARCV) \ + _(3426, ZEND_QM_ASSIGN_DOUBLE_SPEC_TMPVARCV) \ + _(3427, ZEND_QM_ASSIGN_NOREF_SPEC_CONST) \ _(3428, ZEND_QM_ASSIGN_NOREF_SPEC_TMPVARCV) \ - _(3430, ZEND_QM_ASSIGN_NOREF_SPEC_TMPVARCV) \ - _(3432, ZEND_FETCH_DIM_R_INDEX_SPEC_CONST_TMPVARCV) \ + _(3429, ZEND_QM_ASSIGN_NOREF_SPEC_TMPVARCV) \ + _(3431, ZEND_QM_ASSIGN_NOREF_SPEC_TMPVARCV) \ _(3433, ZEND_FETCH_DIM_R_INDEX_SPEC_CONST_TMPVARCV) \ - _(3435, ZEND_FETCH_DIM_R_INDEX_SPEC_CONST_TMPVARCV) \ - _(3436, ZEND_FETCH_DIM_R_INDEX_SPEC_TMPVAR_CONST) \ - _(3437, ZEND_FETCH_DIM_R_INDEX_SPEC_TMPVAR_TMPVARCV) \ + _(3434, ZEND_FETCH_DIM_R_INDEX_SPEC_CONST_TMPVARCV) \ + _(3436, ZEND_FETCH_DIM_R_INDEX_SPEC_CONST_TMPVARCV) \ + _(3437, ZEND_FETCH_DIM_R_INDEX_SPEC_TMPVAR_CONST) \ _(3438, ZEND_FETCH_DIM_R_INDEX_SPEC_TMPVAR_TMPVARCV) \ - _(3440, ZEND_FETCH_DIM_R_INDEX_SPEC_TMPVAR_TMPVARCV) \ - _(3441, ZEND_FETCH_DIM_R_INDEX_SPEC_TMPVAR_CONST) \ - _(3442, ZEND_FETCH_DIM_R_INDEX_SPEC_TMPVAR_TMPVARCV) \ + _(3439, ZEND_FETCH_DIM_R_INDEX_SPEC_TMPVAR_TMPVARCV) \ + _(3441, ZEND_FETCH_DIM_R_INDEX_SPEC_TMPVAR_TMPVARCV) \ + _(3442, ZEND_FETCH_DIM_R_INDEX_SPEC_TMPVAR_CONST) \ _(3443, ZEND_FETCH_DIM_R_INDEX_SPEC_TMPVAR_TMPVARCV) \ - _(3445, ZEND_FETCH_DIM_R_INDEX_SPEC_TMPVAR_TMPVARCV) \ - _(3451, ZEND_FETCH_DIM_R_INDEX_SPEC_CV_CONST) \ - _(3452, ZEND_FETCH_DIM_R_INDEX_SPEC_CV_TMPVARCV) \ + _(3444, ZEND_FETCH_DIM_R_INDEX_SPEC_TMPVAR_TMPVARCV) \ + _(3446, ZEND_FETCH_DIM_R_INDEX_SPEC_TMPVAR_TMPVARCV) \ + _(3452, ZEND_FETCH_DIM_R_INDEX_SPEC_CV_CONST) \ _(3453, ZEND_FETCH_DIM_R_INDEX_SPEC_CV_TMPVARCV) \ - _(3455, ZEND_FETCH_DIM_R_INDEX_SPEC_CV_TMPVARCV) \ - _(3458, ZEND_SEND_VAR_SIMPLE_SPEC_VAR) \ - _(3460, ZEND_SEND_VAR_SIMPLE_SPEC_CV) \ - _(3463, ZEND_SEND_VAR_EX_SIMPLE_SPEC_VAR_UNUSED) \ - _(3465, ZEND_SEND_VAR_EX_SIMPLE_SPEC_CV_UNUSED) \ - _(3466, ZEND_SEND_VAL_SIMPLE_SPEC_CONST) \ - _(3467, ZEND_SEND_VAL_EX_SIMPLE_SPEC_CONST) \ - _(3468, ZEND_FE_FETCH_R_SIMPLE_SPEC_VAR_CV_RETVAL_UNUSED) \ - _(3469, ZEND_FE_FETCH_R_SIMPLE_SPEC_VAR_CV_RETVAL_USED) \ - _(3469+1, ZEND_NULL) + _(3454, ZEND_FETCH_DIM_R_INDEX_SPEC_CV_TMPVARCV) \ + _(3456, ZEND_FETCH_DIM_R_INDEX_SPEC_CV_TMPVARCV) \ + _(3459, ZEND_SEND_VAR_SIMPLE_SPEC_VAR) \ + _(3461, ZEND_SEND_VAR_SIMPLE_SPEC_CV) \ + _(3464, ZEND_SEND_VAR_EX_SIMPLE_SPEC_VAR_UNUSED) \ + _(3466, ZEND_SEND_VAR_EX_SIMPLE_SPEC_CV_UNUSED) \ + _(3467, ZEND_SEND_VAL_SIMPLE_SPEC_CONST) \ + _(3468, ZEND_SEND_VAL_EX_SIMPLE_SPEC_CONST) \ + _(3469, ZEND_FE_FETCH_R_SIMPLE_SPEC_VAR_CV_RETVAL_UNUSED) \ + _(3470, ZEND_FE_FETCH_R_SIMPLE_SPEC_VAR_CV_RETVAL_USED) \ + _(3470+1, ZEND_NULL) diff --git a/Zend/zend_vm_opcodes.c b/Zend/zend_vm_opcodes.c index a9e4317e04e2d..e94b6c7d5f0cd 100644 --- a/Zend/zend_vm_opcodes.c +++ b/Zend/zend_vm_opcodes.c @@ -22,7 +22,7 @@ #include #include -static const char *zend_vm_opcodes_names[203] = { +static const char *zend_vm_opcodes_names[204] = { "ZEND_NOP", "ZEND_ADD", "ZEND_SUB", @@ -226,9 +226,10 @@ static const char *zend_vm_opcodes_names[203] = { "ZEND_FETCH_GLOBALS", "ZEND_VERIFY_NEVER_TYPE", "ZEND_CALLABLE_CONVERT", + "ZEND_BIND_INIT_STATIC_OR_JMP", }; -static uint32_t zend_vm_opcodes_flags[203] = { +static uint32_t zend_vm_opcodes_flags[204] = { 0x00000000, 0x00000b0b, 0x00000b0b, @@ -412,7 +413,7 @@ static uint32_t zend_vm_opcodes_flags[203] = { 0x00067000, 0x00040b73, 0x00100101, - 0x00100101, + 0x00100001, 0x00000101, 0x00001301, 0x00000101, @@ -432,6 +433,7 @@ static uint32_t zend_vm_opcodes_flags[203] = { 0x00000101, 0x00000101, 0x00000101, + 0x00002001, }; ZEND_API const char* ZEND_FASTCALL zend_get_opcode_name(uint8_t opcode) { diff --git a/Zend/zend_vm_opcodes.h b/Zend/zend_vm_opcodes.h index 43bd8bc252802..5531accbf0c20 100644 --- a/Zend/zend_vm_opcodes.h +++ b/Zend/zend_vm_opcodes.h @@ -285,7 +285,8 @@ END_EXTERN_C() #define ZEND_FETCH_GLOBALS 200 #define ZEND_VERIFY_NEVER_TYPE 201 #define ZEND_CALLABLE_CONVERT 202 +#define ZEND_BIND_INIT_STATIC_OR_JMP 203 -#define ZEND_VM_LAST_OPCODE 202 +#define ZEND_VM_LAST_OPCODE 203 #endif diff --git a/ext/opcache/jit/zend_jit.c b/ext/opcache/jit/zend_jit.c index 874eff576b554..f168ec190eee6 100644 --- a/ext/opcache/jit/zend_jit.c +++ b/ext/opcache/jit/zend_jit.c @@ -366,6 +366,7 @@ static int zend_jit_needs_call_chain(zend_call_info *call_info, uint32_t b, cons case ZEND_DECLARE_ANON_CLASS: case ZEND_FE_FETCH_R: case ZEND_FE_FETCH_RW: + case ZEND_BIND_INIT_STATIC_OR_JMP: return 1; case ZEND_DO_ICALL: case ZEND_DO_UCALL: @@ -448,6 +449,7 @@ static int zend_jit_needs_call_chain(zend_call_info *call_info, uint32_t b, cons case ZEND_DECLARE_ANON_CLASS: case ZEND_FE_FETCH_R: case ZEND_FE_FETCH_RW: + case ZEND_BIND_INIT_STATIC_OR_JMP: return 1; case ZEND_DO_ICALL: case ZEND_DO_UCALL: @@ -4049,6 +4051,7 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op case ZEND_ASSERT_CHECK: case ZEND_FE_FETCH_R: case ZEND_FE_FETCH_RW: + case ZEND_BIND_INIT_STATIC_OR_JMP: if (!zend_jit_handler(&dasm_state, opline, zend_may_throw(opline, ssa_op, op_array, ssa)) || !zend_jit_cond_jmp(&dasm_state, opline + 1, ssa->cfg.blocks[b].successors[0])) { diff --git a/ext/opcache/jit/zend_jit_trace.c b/ext/opcache/jit/zend_jit_trace.c index ed8cdbc95b590..191e4eebbbd2b 100644 --- a/ext/opcache/jit/zend_jit_trace.c +++ b/ext/opcache/jit/zend_jit_trace.c @@ -332,6 +332,7 @@ static int zend_jit_trace_may_exit(const zend_op_array *op_array, const zend_op case ZEND_SWITCH_LONG: case ZEND_SWITCH_STRING: case ZEND_MATCH: + case ZEND_BIND_INIT_STATIC_OR_JMP: /* branch opcodes */ return 1; case ZEND_NEW: diff --git a/ext/opcache/tests/optimize_static_002.phpt b/ext/opcache/tests/optimize_static_002.phpt new file mode 100644 index 0000000000000..e4299ba4f3fff --- /dev/null +++ b/ext/opcache/tests/optimize_static_002.phpt @@ -0,0 +1,22 @@ +--TEST-- +Keep BIND_STATIC when static variable has an initializer +--INI-- +opcache.enable=1 +opcache.enable_cli=1 +opcache.optimization_level=-1 +--EXTENSIONS-- +opcache +--FILE-- +getStaticVariables()); +} +foo(); +?> +--EXPECT-- +array(1) { + ["a"]=> + int(42) +} diff --git a/ext/opcache/tests/optimize_static_003.phpt b/ext/opcache/tests/optimize_static_003.phpt new file mode 100644 index 0000000000000..1a3c98a48fb0b --- /dev/null +++ b/ext/opcache/tests/optimize_static_003.phpt @@ -0,0 +1,24 @@ +--TEST-- +Keep BIND_STATIC when static variable has an initializer +--INI-- +opcache.enable=1 +opcache.enable_cli=1 +opcache.optimization_level=-1 +--EXTENSIONS-- +opcache +--FILE-- +getStaticVariables()); +} +foo(); +?> +--EXPECT-- +array(1) { + ["a"]=> + &int(42) +} diff --git a/ext/opcache/zend_file_cache.c b/ext/opcache/zend_file_cache.c index f4c9a77996b96..864bc4aff202c 100644 --- a/ext/opcache/zend_file_cache.c +++ b/ext/opcache/zend_file_cache.c @@ -556,6 +556,7 @@ static void zend_file_cache_serialize_op_array(zend_op_array *op_arra case ZEND_FE_RESET_RW: case ZEND_ASSERT_CHECK: case ZEND_JMP_NULL: + case ZEND_BIND_INIT_STATIC_OR_JMP: SERIALIZE_PTR(opline->op2.jmp_addr); break; case ZEND_CATCH: @@ -1404,6 +1405,7 @@ static void zend_file_cache_unserialize_op_array(zend_op_array *op_arr case ZEND_FE_RESET_RW: case ZEND_ASSERT_CHECK: case ZEND_JMP_NULL: + case ZEND_BIND_INIT_STATIC_OR_JMP: UNSERIALIZE_PTR(opline->op2.jmp_addr); break; case ZEND_CATCH: diff --git a/ext/opcache/zend_persist.c b/ext/opcache/zend_persist.c index e21aaa069348a..791032f0e1f26 100644 --- a/ext/opcache/zend_persist.c +++ b/ext/opcache/zend_persist.c @@ -578,6 +578,7 @@ static void zend_persist_op_array_ex(zend_op_array *op_array, zend_persistent_sc case ZEND_FE_RESET_RW: case ZEND_ASSERT_CHECK: case ZEND_JMP_NULL: + case ZEND_BIND_INIT_STATIC_OR_JMP: opline->op2.jmp_addr = &new_opcodes[opline->op2.jmp_addr - op_array->opcodes]; break; case ZEND_CATCH: diff --git a/ext/reflection/php_reflection.c b/ext/reflection/php_reflection.c index 2f8acbfb00852..6eae95ba47fbe 100644 --- a/ext/reflection/php_reflection.c +++ b/ext/reflection/php_reflection.c @@ -1949,7 +1949,6 @@ ZEND_METHOD(ReflectionFunctionAbstract, getStaticVariables) { reflection_object *intern; zend_function *fptr; - zval *val; if (zend_parse_parameters_none() == FAILURE) { RETURN_THROWS(); @@ -1966,11 +1965,6 @@ ZEND_METHOD(ReflectionFunctionAbstract, getStaticVariables) ht = zend_array_dup(fptr->op_array.static_variables); ZEND_MAP_PTR_SET(fptr->op_array.static_variables_ptr, ht); } - ZEND_HASH_MAP_FOREACH_VAL(ht, val) { - if (UNEXPECTED(zval_update_constant_ex(val, fptr->common.scope) != SUCCESS)) { - RETURN_THROWS(); - } - } ZEND_HASH_FOREACH_END(); zend_hash_copy(Z_ARRVAL_P(return_value), ht, zval_add_ref); } else { RETURN_EMPTY_ARRAY(); diff --git a/ext/reflection/tests/ReflectionMethod_getStaticVariables_basic_extra_bleed.phpt b/ext/reflection/tests/ReflectionMethod_getStaticVariables_basic_extra_bleed.phpt new file mode 100644 index 0000000000000..6803e7a063dd3 --- /dev/null +++ b/ext/reflection/tests/ReflectionMethod_getStaticVariables_basic_extra_bleed.phpt @@ -0,0 +1,31 @@ +--TEST-- +ReflectionMethod::getStaticVariables() should not bleed IS_TYPE_UNINITIALIZED +--FILE-- +getStaticVariables()['a']; + + static $a = test(); + var_dump($a); + + // Technically, IS_TYPE_UNINITIALIZED does bleed, but it doesn't matter since there's no way we + // can assign it to the static variable directly instead of the reference. + $staticVar = &$methodInfo->getStaticVariables()['a']; + $staticVar = $nullWithIsTypeUninitialized; +} + +foo(); +foo(); + +?> +--EXPECT-- +test() called +int(42) +NULL diff --git a/ext/reflection/tests/new_in_constexpr.phpt b/ext/reflection/tests/new_in_constexpr.phpt index c9b77c0eed216..bda5aaba6997a 100644 --- a/ext/reflection/tests/new_in_constexpr.phpt +++ b/ext/reflection/tests/new_in_constexpr.phpt @@ -9,6 +9,9 @@ function test1() { } $rf = new ReflectionFunction('test1'); +var_dump($rf->getStaticVariables()); +test1(); + $s = $rf->getStaticVariables(); var_dump($s['x'] === test1()); @@ -23,5 +26,9 @@ var_dump($rp->getDefaultValue() !== test2()); ?> --EXPECT-- +array(1) { + ["x"]=> + NULL +} bool(true) bool(true) diff --git a/sapi/phpdbg/phpdbg.c b/sapi/phpdbg/phpdbg.c index ba70244c143f0..17b7513789ee7 100644 --- a/sapi/phpdbg/phpdbg.c +++ b/sapi/phpdbg/phpdbg.c @@ -436,7 +436,7 @@ static zend_always_inline bool phpdbg_is_ignored_opcode(uint8_t opcode) { || opcode == ZEND_DECLARE_CLASS_DELAYED || opcode == ZEND_DECLARE_ANON_CLASS || opcode == ZEND_FAST_RET || opcode == ZEND_TICKS || opcode == ZEND_EXT_STMT || opcode == ZEND_EXT_FCALL_BEGIN || opcode == ZEND_EXT_FCALL_END - || opcode == ZEND_BIND_GLOBAL + || opcode == ZEND_BIND_GLOBAL || opcode == ZEND_BIND_INIT_STATIC_OR_JMP ; } diff --git a/tests/lang/static_basic_002.phpt b/tests/lang/static_basic_002.phpt index b6e9856e8222e..a8d700d7bd9c6 100644 --- a/tests/lang/static_basic_002.phpt +++ b/tests/lang/static_basic_002.phpt @@ -3,26 +3,9 @@ Multiple declarations of the same static variable --FILE-- ---EXPECT-- -int(5) -int(11) -int(14) +--EXPECTF-- +Fatal error: Duplicate declaration of static variable $a in %s on line %d From 5ff244d6e016095b0cee183c59c357cf0772e856 Mon Sep 17 00:00:00 2001 From: Ilija Tovilo Date: Wed, 24 May 2023 20:45:20 +0200 Subject: [PATCH 078/168] [skip ci] Fix linguist-generated for zend_vm_opcodes files {h,c} is not actually a supported pattern --- .gitattributes | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitattributes b/.gitattributes index f8b91505bc983..49d5471ff6646 100644 --- a/.gitattributes +++ b/.gitattributes @@ -22,7 +22,7 @@ # Collapse generated files within a pull request. **/*_arginfo.h linguist-generated /Zend/zend_vm_execute.h linguist-generated -/Zend/zend_vm_opcodes.{h,c} linguist-generated +/Zend/zend_vm_opcodes.[ch] linguist-generated # The OSS fuzz files are bunary /ext/date/tests/ossfuzz*.txt binary From eb7ec15a614c20a7af278b4d2f7aec4a73a06b64 Mon Sep 17 00:00:00 2001 From: Ilija Tovilo Date: Wed, 24 May 2023 14:19:46 +0200 Subject: [PATCH 079/168] Use single allocation for indirect values in array_multisort Closes GH-11309 --- ext/standard/array.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ext/standard/array.c b/ext/standard/array.c index 86751acfb07cb..46c2c882b83d6 100644 --- a/ext/standard/array.c +++ b/ext/standard/array.c @@ -5782,8 +5782,10 @@ PHP_FUNCTION(array_multisort) * of the input arrays + 1. The last column is UNDEF to indicate the end * of the row. It also stores the original position for stable sorting. */ indirect = (Bucket **)safe_emalloc(array_size, sizeof(Bucket *), 0); + /* Move num_arrays multiplication to size because it's essentially impossible to overflow. */ + Bucket *indirects = (Bucket *)safe_emalloc(array_size, sizeof(Bucket) * (num_arrays + 1), 0); for (i = 0; i < array_size; i++) { - indirect[i] = (Bucket *)safe_emalloc((num_arrays + 1), sizeof(Bucket), 0); + indirect[i] = indirects + (i * (num_arrays + 1)); } for (i = 0; i < num_arrays; i++) { k = 0; @@ -5847,9 +5849,7 @@ PHP_FUNCTION(array_multisort) RETVAL_TRUE; clean_up: - for (i = 0; i < array_size; i++) { - efree(indirect[i]); - } + efree(indirects); efree(indirect); efree(func); efree(arrays); From 150825d176a6a46216e2f29eb71cf1081d840f74 Mon Sep 17 00:00:00 2001 From: Pierrick Charron Date: Wed, 24 May 2023 23:41:36 -0400 Subject: [PATCH 080/168] [skip ci] Fix release date of PHP 8.2.7 --- NEWS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS b/NEWS index a4ca945bf08d2..6b9667d91ee2f 100644 --- a/NEWS +++ b/NEWS @@ -12,7 +12,7 @@ PHP NEWS . Fix access on NULL pointer in array_merge_recursive(). (ilutov) . Fix exception handling in array_multisort(). (ilutov) -01 Jun 2023, PHP 8.2.7 +08 Jun 2023, PHP 8.2.7 - Core: . Fixed bug GH-11152 (Unable to alias namespaces containing reserved class From 8946b7b141ea72aa72d1330c27fa76c9f0af9b03 Mon Sep 17 00:00:00 2001 From: KoudelkaB <33930155+KoudelkaB@users.noreply.github.com> Date: Wed, 24 May 2023 17:23:27 +0200 Subject: [PATCH 081/168] Access violation when ALLOC_FALLBACK fixed Close GH-11312 --- NEWS | 1 + ext/opcache/zend_shared_alloc.c | 2 ++ 2 files changed, 3 insertions(+) diff --git a/NEWS b/NEWS index 332bd89835b34..35e9fb124cfbe 100644 --- a/NEWS +++ b/NEWS @@ -4,6 +4,7 @@ PHP NEWS - Opcache: . Fix allocation loop in zend_shared_alloc_startup(). (nielsdos) + . Access violation on smm_shared_globals with ALLOC_FALLBACK. (KoudelkaB) - Standard: . Fix access on NULL pointer in array_merge_recursive(). (ilutov) diff --git a/ext/opcache/zend_shared_alloc.c b/ext/opcache/zend_shared_alloc.c index be931f526c1b1..afe539bf987a7 100644 --- a/ext/opcache/zend_shared_alloc.c +++ b/ext/opcache/zend_shared_alloc.c @@ -191,6 +191,7 @@ int zend_shared_alloc_startup(size_t requested_size, size_t reserved_size) } #if ENABLE_FILE_CACHE_FALLBACK if (ALLOC_FALLBACK == res) { + smm_shared_globals = NULL; return ALLOC_FALLBACK; } #endif @@ -216,6 +217,7 @@ int zend_shared_alloc_startup(size_t requested_size, size_t reserved_size) } #if ENABLE_FILE_CACHE_FALLBACK if (ALLOC_FALLBACK == res) { + smm_shared_globals = NULL; return ALLOC_FALLBACK; } #endif From cba335d61e68fa4ae9e0b36184552c76878ff615 Mon Sep 17 00:00:00 2001 From: nielsdos <7771979+nielsdos@users.noreply.github.com> Date: Mon, 22 May 2023 23:50:06 +0200 Subject: [PATCH 082/168] Fix GH-11288 and GH-11289 and GH-11290 and GH-9142: DOMExceptions and segfaults with replaceWith This replaces the implementation of before and after with one following the spec very strictly, instead of trying to figure out the state we're in by looking at the pointers. Also relaxes the condition on text node copying to prevent working on a stale node pointer. Closes GH-11299. --- NEWS | 4 + ext/dom/parentnode.c | 188 +++++++++++++++++++--------------- ext/dom/tests/bug80602.phpt | 88 ++++++++-------- ext/dom/tests/bug80602_2.phpt | 88 ++++++++-------- ext/dom/tests/bug80602_3.phpt | 120 ++++++++++++++++++++++ ext/dom/tests/bug80602_4.phpt | 33 ++++++ ext/dom/tests/gh11288.phpt | 67 ++++++++++++ ext/dom/tests/gh11289.phpt | 28 +++++ ext/dom/tests/gh11290.phpt | 27 +++++ ext/dom/tests/gh9142.phpt | 20 ++++ 10 files changed, 490 insertions(+), 173 deletions(-) create mode 100644 ext/dom/tests/bug80602_3.phpt create mode 100644 ext/dom/tests/bug80602_4.phpt create mode 100644 ext/dom/tests/gh11288.phpt create mode 100644 ext/dom/tests/gh11289.phpt create mode 100644 ext/dom/tests/gh11290.phpt create mode 100644 ext/dom/tests/gh9142.phpt diff --git a/NEWS b/NEWS index 35e9fb124cfbe..93b363cfac79a 100644 --- a/NEWS +++ b/NEWS @@ -2,6 +2,10 @@ PHP NEWS ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| ?? ??? ????, PHP 8.1.21 +- DOM: + . Fixed bugs GH-11288 and GH-11289 and GH-11290 and GH-9142 (DOMExceptions + and segfaults with replaceWith). (nielsdos) + - Opcache: . Fix allocation loop in zend_shared_alloc_startup(). (nielsdos) . Access violation on smm_shared_globals with ALLOC_FALLBACK. (KoudelkaB) diff --git a/ext/dom/parentnode.c b/ext/dom/parentnode.c index cf823057d22ae..46c90a13e31d5 100644 --- a/ext/dom/parentnode.c +++ b/ext/dom/parentnode.c @@ -124,6 +124,23 @@ int dom_parent_node_child_element_count(dom_object *obj, zval *retval) } /* }}} */ +static bool dom_is_node_in_list(const zval *nodes, int nodesc, const xmlNodePtr node_to_find) +{ + for (int i = 0; i < nodesc; i++) { + if (Z_TYPE(nodes[i]) == IS_OBJECT) { + const zend_class_entry *ce = Z_OBJCE(nodes[i]); + + if (instanceof_function(ce, dom_node_class_entry)) { + if (dom_object_get_node(Z_DOMOBJ_P(nodes + i)) == node_to_find) { + return true; + } + } + } + } + + return false; +} + xmlNode* dom_zvals_to_fragment(php_libxml_ref_obj *document, xmlNode *contextNode, zval *nodes, int nodesc) { int i; @@ -177,17 +194,16 @@ xmlNode* dom_zvals_to_fragment(php_libxml_ref_obj *document, xmlNode *contextNod goto hierarchy_request_err; } - /* - * xmlNewDocText function will always returns same address to the second parameter if the parameters are greater than or equal to three. - * If it's text, that's fine, but if it's an object, it can cause invalid pointer because many new nodes point to the same memory address. - * So we must copy the new node to avoid this situation. - */ - if (nodesc > 1) { + /* Citing from the docs (https://gnome.pages.gitlab.gnome.org/libxml2/devhelp/libxml2-tree.html#xmlAddChild): + * "Add a new node to @parent, at the end of the child (or property) list merging adjacent TEXT nodes (in which case @cur is freed)". + * So we must take a copy if this situation arises to prevent a use-after-free. */ + bool will_free = newNode->type == XML_TEXT_NODE && fragment->last && fragment->last->type == XML_TEXT_NODE; + if (will_free) { newNode = xmlCopyNode(newNode, 1); } if (!xmlAddChild(fragment, newNode)) { - if (nodesc > 1) { + if (will_free) { xmlFreeNode(newNode); } goto hierarchy_request_err; @@ -303,25 +319,64 @@ void dom_parent_node_prepend(dom_object *context, zval *nodes, int nodesc) xmlFree(fragment); } +static void dom_pre_insert(xmlNodePtr insertion_point, xmlNodePtr parentNode, xmlNodePtr newchild, xmlNodePtr fragment) +{ + if (!insertion_point) { + /* Place it as last node */ + if (parentNode->children) { + /* There are children */ + fragment->last->prev = parentNode->last; + newchild->prev = parentNode->last->prev; + parentNode->last->next = newchild; + } else { + /* No children, because they moved out when they became a fragment */ + parentNode->children = newchild; + parentNode->last = newchild; + } + } else { + /* Insert fragment before insertion_point */ + fragment->last->next = insertion_point; + if (insertion_point->prev) { + insertion_point->prev->next = newchild; + newchild->prev = insertion_point->prev; + } + insertion_point->prev = newchild; + if (parentNode->children == insertion_point) { + parentNode->children = newchild; + } + } +} + void dom_parent_node_after(dom_object *context, zval *nodes, int nodesc) { + /* Spec link: https://dom.spec.whatwg.org/#dom-childnode-after */ + xmlNode *prevsib = dom_object_get_node(context); xmlNodePtr newchild, parentNode; - xmlNode *fragment, *nextsib; + xmlNode *fragment; xmlDoc *doc; - bool afterlastchild; - - int stricterror = dom_get_strict_error(context->document); - if (!prevsib->parent) { - php_dom_throw_error(NO_MODIFICATION_ALLOWED_ERR, stricterror); + /* Spec step 1 */ + parentNode = prevsib->parent; + /* Spec step 2 */ + if (!parentNode) { + int stricterror = dom_get_strict_error(context->document); + php_dom_throw_error(HIERARCHY_REQUEST_ERR, stricterror); return; } + /* Spec step 3: find first following child not in nodes; otherwise null */ + xmlNodePtr viable_next_sibling = prevsib->next; + while (viable_next_sibling) { + if (!dom_is_node_in_list(nodes, nodesc, viable_next_sibling)) { + break; + } + viable_next_sibling = viable_next_sibling->next; + } + doc = prevsib->doc; - parentNode = prevsib->parent; - nextsib = prevsib->next; - afterlastchild = (nextsib == NULL); + + /* Spec step 4: convert nodes into fragment */ fragment = dom_zvals_to_fragment(context->document, parentNode, nodes, nodesc); if (fragment == NULL) { @@ -331,40 +386,9 @@ void dom_parent_node_after(dom_object *context, zval *nodes, int nodesc) newchild = fragment->children; if (newchild) { - /* first node and last node are both both parameters to DOMElement::after() method so nextsib and prevsib are null. */ - if (!parentNode->children) { - prevsib = nextsib = NULL; - } else if (afterlastchild) { - /* - * The new node will be inserted after last node, prevsib is last node. - * The first node is the parameter to DOMElement::after() if parentNode->children == prevsib is true - * and prevsib does not change, otherwise prevsib is parentNode->last (first node). - */ - prevsib = parentNode->children == prevsib ? prevsib : parentNode->last; - } else { - /* - * The new node will be inserted after first node, prevsib is first node. - * The first node is not the parameter to DOMElement::after() if parentNode->children == prevsib is true - * and prevsib does not change otherwise prevsib is null to mean that parentNode->children is the new node. - */ - prevsib = parentNode->children == prevsib ? prevsib : NULL; - } - - if (prevsib) { - fragment->last->next = prevsib->next; - if (prevsib->next) { - prevsib->next->prev = fragment->last; - } - prevsib->next = newchild; - } else { - parentNode->children = newchild; - if (nextsib) { - fragment->last->next = nextsib; - nextsib->prev = fragment->last; - } - } + /* Step 5: place fragment into the parent before viable_next_sibling */ + dom_pre_insert(viable_next_sibling, parentNode, newchild, fragment); - newchild->prev = prevsib; dom_fragment_assign_parent_node(parentNode, fragment); dom_reconcile_ns(doc, newchild); } @@ -374,17 +398,34 @@ void dom_parent_node_after(dom_object *context, zval *nodes, int nodesc) void dom_parent_node_before(dom_object *context, zval *nodes, int nodesc) { + /* Spec link: https://dom.spec.whatwg.org/#dom-childnode-before */ + xmlNode *nextsib = dom_object_get_node(context); - xmlNodePtr newchild, prevsib, parentNode; - xmlNode *fragment, *afternextsib; + xmlNodePtr newchild, parentNode; + xmlNode *fragment; xmlDoc *doc; - bool beforefirstchild; - doc = nextsib->doc; - prevsib = nextsib->prev; - afternextsib = nextsib->next; + /* Spec step 1 */ parentNode = nextsib->parent; - beforefirstchild = !prevsib; + /* Spec step 2 */ + if (!parentNode) { + int stricterror = dom_get_strict_error(context->document); + php_dom_throw_error(HIERARCHY_REQUEST_ERR, stricterror); + return; + } + + /* Spec step 3: find first following child not in nodes; otherwise null */ + xmlNodePtr viable_previous_sibling = nextsib->prev; + while (viable_previous_sibling) { + if (!dom_is_node_in_list(nodes, nodesc, viable_previous_sibling)) { + break; + } + viable_previous_sibling = viable_previous_sibling->prev; + } + + doc = nextsib->doc; + + /* Spec step 4: convert nodes into fragment */ fragment = dom_zvals_to_fragment(context->document, parentNode, nodes, nodesc); if (fragment == NULL) { @@ -394,37 +435,14 @@ void dom_parent_node_before(dom_object *context, zval *nodes, int nodesc) newchild = fragment->children; if (newchild) { - /* first node and last node are both both parameters to DOMElement::before() method so nextsib is null. */ - if (!parentNode->children) { - nextsib = NULL; - } else if (beforefirstchild) { - /* - * The new node will be inserted before first node, nextsib is first node and afternextsib is last node. - * The first node is not the parameter to DOMElement::before() if parentNode->children == nextsib is true - * and nextsib does not change, otherwise nextsib is the last node. - */ - nextsib = parentNode->children == nextsib ? nextsib : afternextsib; - } else { - /* - * The new node will be inserted before last node, prevsib is first node and nestsib is last node. - * The first node is not the parameter to DOMElement::before() if parentNode->children == prevsib is true - * but last node may be, so use prevsib->next to determine the value of nextsib, otherwise nextsib does not change. - */ - nextsib = parentNode->children == prevsib ? prevsib->next : nextsib; - } - - if (parentNode->children == nextsib) { - parentNode->children = newchild; + /* Step 5: if viable_previous_sibling is null, set it to the parent's first child, otherwise viable_previous_sibling's next sibling */ + if (!viable_previous_sibling) { + viable_previous_sibling = parentNode->children; } else { - prevsib->next = newchild; - } - - fragment->last->next = nextsib; - if (nextsib) { - nextsib->prev = fragment->last; + viable_previous_sibling = viable_previous_sibling->next; } - - newchild->prev = prevsib; + /* Step 6: place fragment into the parent after viable_previous_sibling */ + dom_pre_insert(viable_previous_sibling, parentNode, newchild, fragment); dom_fragment_assign_parent_node(parentNode, fragment); dom_reconcile_ns(doc, newchild); diff --git a/ext/dom/tests/bug80602.phpt b/ext/dom/tests/bug80602.phpt index 9f041f686f516..844d829cb08d0 100644 --- a/ext/dom/tests/bug80602.phpt +++ b/ext/dom/tests/bug80602.phpt @@ -8,84 +8,84 @@ $doc = new \DOMDocument(); $doc->loadXML('foo'); $target = $doc->documentElement->firstChild; $target->before($target); -echo $doc->saveXML($doc->documentElement).PHP_EOL; +echo "1 ", $doc->saveXML($doc->documentElement).PHP_EOL; $doc = new \DOMDocument(); $doc->loadXML('foo'); $target = $doc->documentElement->lastChild; $target->before($target); -echo $doc->saveXML($doc->documentElement).PHP_EOL; +echo "2 ", $doc->saveXML($doc->documentElement).PHP_EOL; $doc = new \DOMDocument(); $doc->loadXML('foo'); $target = $doc->documentElement->firstChild; $target->before($doc->documentElement->lastChild); -echo $doc->saveXML($doc->documentElement).PHP_EOL; +echo "3 ", $doc->saveXML($doc->documentElement).PHP_EOL; $doc = new \DOMDocument(); $doc->loadXML('foo'); $target = $doc->documentElement->lastChild; $target->before($doc->documentElement->firstChild); -echo $doc->saveXML($doc->documentElement).PHP_EOL; +echo "4 ", $doc->saveXML($doc->documentElement).PHP_EOL; $doc = new \DOMDocument(); $doc->loadXML('foo'); $target = $doc->documentElement->firstChild; $target->before($target, $doc->documentElement->lastChild); -echo $doc->saveXML($doc->documentElement).PHP_EOL; +echo "5 ", $doc->saveXML($doc->documentElement).PHP_EOL; $doc = new \DOMDocument(); $doc->loadXML('foo'); $target = $doc->documentElement->firstChild; $target->before($doc->documentElement->lastChild, $target); -echo $doc->saveXML($doc->documentElement).PHP_EOL; +echo "6 ", $doc->saveXML($doc->documentElement).PHP_EOL; $doc = new \DOMDocument(); $doc->loadXML('foo'); $target = $doc->documentElement->lastChild; $target->before($target, $doc->documentElement->firstChild); -echo $doc->saveXML($doc->documentElement).PHP_EOL; +echo "7 ", $doc->saveXML($doc->documentElement).PHP_EOL; $doc = new \DOMDocument(); $doc->loadXML('foo'); $target = $doc->documentElement->lastChild; $target->before($doc->documentElement->firstChild, $target); -echo $doc->saveXML($doc->documentElement).PHP_EOL; +echo "8 ", $doc->saveXML($doc->documentElement).PHP_EOL; $doc = new \DOMDocument(); $doc->loadXML('foo'); $target = $doc->documentElement->firstChild; $target->before('bar','baz'); -echo $doc->saveXML($doc->documentElement).PHP_EOL; +echo "9 ", $doc->saveXML($doc->documentElement).PHP_EOL; $doc = new \DOMDocument(); $doc->loadXML('foo'); $target = $doc->documentElement->lastChild; $target->before('bar','baz'); -echo $doc->saveXML($doc->documentElement).PHP_EOL; +echo "10 ", $doc->saveXML($doc->documentElement).PHP_EOL; $doc = new \DOMDocument(); $doc->loadXML('foo'); $target = $doc->documentElement->firstChild; $target->before($target, 'bar','baz'); -echo $doc->saveXML($doc->documentElement).PHP_EOL; +echo "11 ", $doc->saveXML($doc->documentElement).PHP_EOL; $doc = new \DOMDocument(); $doc->loadXML('foo'); $target = $doc->documentElement->firstChild; $target->before('bar', $target, 'baz'); -echo $doc->saveXML($doc->documentElement).PHP_EOL; +echo "12 ", $doc->saveXML($doc->documentElement).PHP_EOL; $doc = new \DOMDocument(); $doc->loadXML('foo'); $target = $doc->documentElement->firstChild; $target->before('bar', 'baz', $target); -echo $doc->saveXML($doc->documentElement).PHP_EOL; +echo "13 ", $doc->saveXML($doc->documentElement).PHP_EOL; @@ -93,19 +93,19 @@ $doc = new \DOMDocument(); $doc->loadXML('foo'); $target = $doc->documentElement->lastChild; $target->before($target, 'bar','baz'); -echo $doc->saveXML($doc->documentElement).PHP_EOL; +echo "14 ", $doc->saveXML($doc->documentElement).PHP_EOL; $doc = new \DOMDocument(); $doc->loadXML('foo'); $target = $doc->documentElement->lastChild; $target->before('bar', $target, 'baz'); -echo $doc->saveXML($doc->documentElement).PHP_EOL; +echo "15 ", $doc->saveXML($doc->documentElement).PHP_EOL; $doc = new \DOMDocument(); $doc->loadXML('foo'); $target = $doc->documentElement->lastChild; $target->before('bar', 'baz', $target); -echo $doc->saveXML($doc->documentElement).PHP_EOL; +echo "16 ", $doc->saveXML($doc->documentElement).PHP_EOL; @@ -113,21 +113,21 @@ $doc = new \DOMDocument(); $doc->loadXML('foo'); $target = $doc->documentElement->firstChild; $target->before('bar', $target, $doc->documentElement->lastChild); -echo $doc->saveXML($doc->documentElement).PHP_EOL; +echo "17 ", $doc->saveXML($doc->documentElement).PHP_EOL; $doc = new \DOMDocument(); $doc->loadXML('foo'); $target = $doc->documentElement->firstChild; $target->before($target, 'bar', $doc->documentElement->lastChild); -echo $doc->saveXML($doc->documentElement).PHP_EOL; +echo "18 ", $doc->saveXML($doc->documentElement).PHP_EOL; $doc = new \DOMDocument(); $doc->loadXML('foo'); $target = $doc->documentElement->firstChild; $target->before($target, $doc->documentElement->lastChild, 'bar'); -echo $doc->saveXML($doc->documentElement).PHP_EOL; +echo "19 ", $doc->saveXML($doc->documentElement).PHP_EOL; @@ -136,43 +136,43 @@ $doc = new \DOMDocument(); $doc->loadXML('foo'); $target = $doc->documentElement->lastChild; $target->before('bar', $doc->documentElement->firstChild, $target); -echo $doc->saveXML($doc->documentElement).PHP_EOL; +echo "20 ", $doc->saveXML($doc->documentElement).PHP_EOL; $doc = new \DOMDocument(); $doc->loadXML('foo'); $target = $doc->documentElement->lastChild; $target->before($doc->documentElement->firstChild, 'bar', $target); -echo $doc->saveXML($doc->documentElement).PHP_EOL; +echo "21 ", $doc->saveXML($doc->documentElement).PHP_EOL; $doc = new \DOMDocument(); $doc->loadXML('foo'); $target = $doc->documentElement->lastChild; $target->before($doc->documentElement->firstChild, $target, 'bar'); -echo $doc->saveXML($doc->documentElement).PHP_EOL; +echo "22 ", $doc->saveXML($doc->documentElement).PHP_EOL; ?> --EXPECTF-- -foo -foo -foo -foo -foo -foo -foo -foo -barbazfoo -foobarbaz -foobarbaz -barfoobaz -barbazfoo -foobarbaz -foobarbaz -foobarbaz -barfoo -foobar -foobar -barfoo -foobar -foobar +1 foo +2 foo +3 foo +4 foo +5 foo +6 foo +7 foo +8 foo +9 barbazfoo +10 foobarbaz +11 foobarbaz +12 barfoobaz +13 barbazfoo +14 foobarbaz +15 foobarbaz +16 foobarbaz +17 barfoo +18 foobar +19 foobar +20 barfoo +21 foobar +22 foobar diff --git a/ext/dom/tests/bug80602_2.phpt b/ext/dom/tests/bug80602_2.phpt index 1151417c0f845..7c5070f51424c 100644 --- a/ext/dom/tests/bug80602_2.phpt +++ b/ext/dom/tests/bug80602_2.phpt @@ -8,84 +8,84 @@ $doc = new \DOMDocument(); $doc->loadXML('foo'); $target = $doc->documentElement->firstChild; $target->after($target); -echo $doc->saveXML($doc->documentElement).PHP_EOL; +echo "1 ", $doc->saveXML($doc->documentElement).PHP_EOL; $doc = new \DOMDocument(); $doc->loadXML('foo'); $target = $doc->documentElement->lastChild; $target->after($target); -echo $doc->saveXML($doc->documentElement).PHP_EOL; +echo "2 ", $doc->saveXML($doc->documentElement).PHP_EOL; $doc = new \DOMDocument(); $doc->loadXML('foo'); $target = $doc->documentElement->firstChild; $target->after($doc->documentElement->lastChild); -echo $doc->saveXML($doc->documentElement).PHP_EOL; +echo "3 ", $doc->saveXML($doc->documentElement).PHP_EOL; $doc = new \DOMDocument(); $doc->loadXML('foo'); $target = $doc->documentElement->lastChild; $target->after($doc->documentElement->firstChild); -echo $doc->saveXML($doc->documentElement).PHP_EOL; +echo "4 ", $doc->saveXML($doc->documentElement).PHP_EOL; $doc = new \DOMDocument(); $doc->loadXML('foo'); $target = $doc->documentElement->firstChild; $target->after($target, $doc->documentElement->lastChild); -echo $doc->saveXML($doc->documentElement).PHP_EOL; +echo "5 ", $doc->saveXML($doc->documentElement).PHP_EOL; $doc = new \DOMDocument(); $doc->loadXML('foo'); $target = $doc->documentElement->firstChild; $target->after($doc->documentElement->lastChild, $target); -echo $doc->saveXML($doc->documentElement).PHP_EOL; +echo "6 ", $doc->saveXML($doc->documentElement).PHP_EOL; $doc = new \DOMDocument(); $doc->loadXML('foo'); $target = $doc->documentElement->lastChild; $target->after($target, $doc->documentElement->firstChild); -echo $doc->saveXML($doc->documentElement).PHP_EOL; +echo "7 ", $doc->saveXML($doc->documentElement).PHP_EOL; $doc = new \DOMDocument(); $doc->loadXML('foo'); $target = $doc->documentElement->lastChild; $target->after($doc->documentElement->firstChild, $target); -echo $doc->saveXML($doc->documentElement).PHP_EOL; +echo "8 ", $doc->saveXML($doc->documentElement).PHP_EOL; $doc = new \DOMDocument(); $doc->loadXML('foo'); $target = $doc->documentElement->firstChild; $target->after('bar','baz'); -echo $doc->saveXML($doc->documentElement).PHP_EOL; +echo "9 ", $doc->saveXML($doc->documentElement).PHP_EOL; $doc = new \DOMDocument(); $doc->loadXML('foo'); $target = $doc->documentElement->lastChild; $target->after('bar','baz'); -echo $doc->saveXML($doc->documentElement).PHP_EOL; +echo "10 ", $doc->saveXML($doc->documentElement).PHP_EOL; $doc = new \DOMDocument(); $doc->loadXML('foo'); $target = $doc->documentElement->firstChild; $target->after($target, 'bar','baz'); -echo $doc->saveXML($doc->documentElement).PHP_EOL; +echo "11 ", $doc->saveXML($doc->documentElement).PHP_EOL; $doc = new \DOMDocument(); $doc->loadXML('foo'); $target = $doc->documentElement->firstChild; $target->after('bar', $target, 'baz'); -echo $doc->saveXML($doc->documentElement).PHP_EOL; +echo "12 ", $doc->saveXML($doc->documentElement).PHP_EOL; $doc = new \DOMDocument(); $doc->loadXML('foo'); $target = $doc->documentElement->firstChild; $target->after('bar', 'baz', $target); -echo $doc->saveXML($doc->documentElement).PHP_EOL; +echo "13 ", $doc->saveXML($doc->documentElement).PHP_EOL; @@ -93,19 +93,19 @@ $doc = new \DOMDocument(); $doc->loadXML('foo'); $target = $doc->documentElement->lastChild; $target->after($target, 'bar','baz'); -echo $doc->saveXML($doc->documentElement).PHP_EOL; +echo "14 ", $doc->saveXML($doc->documentElement).PHP_EOL; $doc = new \DOMDocument(); $doc->loadXML('foo'); $target = $doc->documentElement->lastChild; $target->after('bar', $target, 'baz'); -echo $doc->saveXML($doc->documentElement).PHP_EOL; +echo "15 ", $doc->saveXML($doc->documentElement).PHP_EOL; $doc = new \DOMDocument(); $doc->loadXML('foo'); $target = $doc->documentElement->lastChild; $target->after('bar', 'baz', $target); -echo $doc->saveXML($doc->documentElement).PHP_EOL; +echo "16 ", $doc->saveXML($doc->documentElement).PHP_EOL; @@ -113,21 +113,21 @@ $doc = new \DOMDocument(); $doc->loadXML('foo'); $target = $doc->documentElement->firstChild; $target->after('bar', $target, $doc->documentElement->lastChild); -echo $doc->saveXML($doc->documentElement).PHP_EOL; +echo "17 ", $doc->saveXML($doc->documentElement).PHP_EOL; $doc = new \DOMDocument(); $doc->loadXML('foo'); $target = $doc->documentElement->firstChild; $target->after($target, 'bar', $doc->documentElement->lastChild); -echo $doc->saveXML($doc->documentElement).PHP_EOL; +echo "18 ", $doc->saveXML($doc->documentElement).PHP_EOL; $doc = new \DOMDocument(); $doc->loadXML('foo'); $target = $doc->documentElement->firstChild; $target->after($target, $doc->documentElement->lastChild, 'bar'); -echo $doc->saveXML($doc->documentElement).PHP_EOL; +echo "19 ", $doc->saveXML($doc->documentElement).PHP_EOL; @@ -136,43 +136,43 @@ $doc = new \DOMDocument(); $doc->loadXML('foo'); $target = $doc->documentElement->lastChild; $target->after('bar', $doc->documentElement->firstChild, $target); -echo $doc->saveXML($doc->documentElement).PHP_EOL; +echo "20 ", $doc->saveXML($doc->documentElement).PHP_EOL; $doc = new \DOMDocument(); $doc->loadXML('foo'); $target = $doc->documentElement->lastChild; $target->after($doc->documentElement->firstChild, 'bar', $target); -echo $doc->saveXML($doc->documentElement).PHP_EOL; +echo "21 ", $doc->saveXML($doc->documentElement).PHP_EOL; $doc = new \DOMDocument(); $doc->loadXML('foo'); $target = $doc->documentElement->lastChild; $target->after($doc->documentElement->firstChild, $target, 'bar'); -echo $doc->saveXML($doc->documentElement).PHP_EOL; +echo "22 ", $doc->saveXML($doc->documentElement).PHP_EOL; ?> --EXPECTF-- -foo -foo -foo -foo -foo -foo -foo -foo -foobarbaz -foobarbaz -foobarbaz -barfoobaz -barbazfoo -foobarbaz -foobarbaz -foobarbaz -barfoo -foobar -foobar -barfoo -foobar -foobar +1 foo +2 foo +3 foo +4 foo +5 foo +6 foo +7 foo +8 foo +9 foobarbaz +10 foobarbaz +11 foobarbaz +12 barfoobaz +13 barbazfoo +14 foobarbaz +15 foobarbaz +16 foobarbaz +17 barfoo +18 foobar +19 foobar +20 barfoo +21 foobar +22 foobar diff --git a/ext/dom/tests/bug80602_3.phpt b/ext/dom/tests/bug80602_3.phpt new file mode 100644 index 0000000000000..f9bf67e778da5 --- /dev/null +++ b/ext/dom/tests/bug80602_3.phpt @@ -0,0 +1,120 @@ +--TEST-- +Bug #80602 (Segfault when using DOMChildNode::before()) - use-after-free variation +--FILE-- +loadXML('foo'); +$target = $doc->documentElement->lastChild; +$target->before('bar', $doc->documentElement->firstChild, 'baz'); +echo $doc->saveXML($doc->documentElement), "\n"; +var_dump($target); + +$doc = new \DOMDocument(); +$doc->loadXML('foo'); +$target = $doc->documentElement->lastChild; +// Note: after instead of before +$target->after('bar', $doc->documentElement->firstChild, 'baz'); +echo $doc->saveXML($doc->documentElement), "\n"; +var_dump($target); + +?> +--EXPECTF-- +barfoobaz +object(DOMElement)#3 (23) { + ["schemaTypeInfo"]=> + NULL + ["tagName"]=> + string(4) "last" + ["firstElementChild"]=> + NULL + ["lastElementChild"]=> + NULL + ["childElementCount"]=> + int(0) + ["previousElementSibling"]=> + NULL + ["nextElementSibling"]=> + NULL + ["nodeName"]=> + string(4) "last" + ["nodeValue"]=> + string(0) "" + ["nodeType"]=> + int(1) + ["parentNode"]=> + string(22) "(object value omitted)" + ["childNodes"]=> + string(22) "(object value omitted)" + ["firstChild"]=> + NULL + ["lastChild"]=> + NULL + ["previousSibling"]=> + string(22) "(object value omitted)" + ["nextSibling"]=> + NULL + ["attributes"]=> + string(22) "(object value omitted)" + ["ownerDocument"]=> + string(22) "(object value omitted)" + ["namespaceURI"]=> + NULL + ["prefix"]=> + string(0) "" + ["localName"]=> + string(4) "last" + ["baseURI"]=> + string(%d) %s + ["textContent"]=> + string(0) "" +} +barfoobaz +object(DOMElement)#2 (23) { + ["schemaTypeInfo"]=> + NULL + ["tagName"]=> + string(4) "last" + ["firstElementChild"]=> + NULL + ["lastElementChild"]=> + NULL + ["childElementCount"]=> + int(0) + ["previousElementSibling"]=> + NULL + ["nextElementSibling"]=> + NULL + ["nodeName"]=> + string(4) "last" + ["nodeValue"]=> + string(0) "" + ["nodeType"]=> + int(1) + ["parentNode"]=> + string(22) "(object value omitted)" + ["childNodes"]=> + string(22) "(object value omitted)" + ["firstChild"]=> + NULL + ["lastChild"]=> + NULL + ["previousSibling"]=> + NULL + ["nextSibling"]=> + string(22) "(object value omitted)" + ["attributes"]=> + string(22) "(object value omitted)" + ["ownerDocument"]=> + string(22) "(object value omitted)" + ["namespaceURI"]=> + NULL + ["prefix"]=> + string(0) "" + ["localName"]=> + string(4) "last" + ["baseURI"]=> + string(%d) %s + ["textContent"]=> + string(0) "" +} diff --git a/ext/dom/tests/bug80602_4.phpt b/ext/dom/tests/bug80602_4.phpt new file mode 100644 index 0000000000000..a1df8d10caa31 --- /dev/null +++ b/ext/dom/tests/bug80602_4.phpt @@ -0,0 +1,33 @@ +--TEST-- +Bug #80602 (Segfault when using DOMChildNode::before()) - after text merge variation +--FILE-- +loadXML('foobar'); +$foo = $doc->firstChild->firstChild; +$bar = $doc->firstChild->lastChild; + +$foo->after($bar); + +var_dump($doc->saveXML()); + +$foo->nodeValue = "x"; + +var_dump($doc->saveXML()); + +$bar->nodeValue = "y"; + +var_dump($doc->saveXML()); + +?> +--EXPECT-- +string(43) " +foobar +" +string(41) " +xbar +" +string(39) " +xy +" diff --git a/ext/dom/tests/gh11288.phpt b/ext/dom/tests/gh11288.phpt new file mode 100644 index 0000000000000..f70bea80d9085 --- /dev/null +++ b/ext/dom/tests/gh11288.phpt @@ -0,0 +1,67 @@ +--TEST-- +GH-11288 (Error: Couldn't fetch DOMElement introduced in 8.2.6, 8.1.19) +--FILE-- + + +Loremipsum + +HTML; + +$dom = new DOMDocument(); +$dom->loadHTML($html); + +$spans = iterator_to_array($dom->getElementsByTagName('span')->getIterator()); +foreach ($spans as $span) { + if ('unwrap_me' === $span->getAttribute('class')) { + $fragment = $dom->createDocumentFragment(); + $fragment->append(...$span->childNodes); + $span->parentNode?->replaceChild($fragment, $span); + } +} + +var_dump(str_replace("\n", "", $dom->saveHTML())); + +$html = << + +Loremipsum + +HTML; + +$dom = new DOMDocument(); +$dom->loadHTML($html); + +$spans = iterator_to_array($dom->getElementsByTagName('span')->getIterator()); +foreach ($spans as $span) { + if ('unwrap_me' === $span->getAttribute('class')) { + $span->replaceWith(...$span->childNodes); + } +} + +var_dump(str_replace("\n", "", $dom->saveHTML())); + +$html = << + +Loremipsum + +HTML; + +$dom = new DOMDocument(); +$dom->loadHTML($html); + +$spans = iterator_to_array($dom->getElementsByTagName('span')->getIterator()); +foreach ($spans as $span) { + if ('unwrap_me' === $span->getAttribute('class')) { + $span->replaceWith('abc'); + } +} + +var_dump(str_replace("\n", "", $dom->saveHTML())); +?> +--EXPECT-- +string(108) "Loremipsum" +string(108) "Loremipsum" +string(44) "abc" diff --git a/ext/dom/tests/gh11289.phpt b/ext/dom/tests/gh11289.phpt new file mode 100644 index 0000000000000..7771a486bd66b --- /dev/null +++ b/ext/dom/tests/gh11289.phpt @@ -0,0 +1,28 @@ +--TEST-- +GH-11289 (DOMException: Not Found Error introduced in 8.2.6, 8.1.19) +--FILE-- + + + +
+ + +HTML; + +$dom = new DOMDocument(); +$dom->loadHTML($html); + +$divs = iterator_to_array($dom->getElementsByTagName('div')->getIterator()); +foreach ($divs as $div) { + $fragment = $dom->createDocumentFragment(); + $fragment->appendXML('

Hi!

'); + $div->replaceWith(...$fragment->childNodes); +} + +var_dump(str_replace("\n", "", $dom->saveHTML())); +?> +--EXPECT-- +string(55) "

Hi!

" diff --git a/ext/dom/tests/gh11290.phpt b/ext/dom/tests/gh11290.phpt new file mode 100644 index 0000000000000..2900720301041 --- /dev/null +++ b/ext/dom/tests/gh11290.phpt @@ -0,0 +1,27 @@ +--TEST-- +GH-11290 (DOMElement::replaceWith causes crash) +--FILE-- + + + +

Loremipsumdolor

+ + +HTML; + +$dom = new DOMDocument(); +$dom->loadHTML($html); + +$spans = iterator_to_array($dom->getElementsByTagName('span')->getIterator()); +foreach ($spans as $span) { + if ('unwrap_me' === $span->getAttribute('class')) { + $span->replaceWith(...$span->childNodes); + } +} + +var_dump(str_replace("\n", "", $dom->saveHTML())); +?> +--EXPECT-- +string(67) "

Loremipsumdolor

" diff --git a/ext/dom/tests/gh9142.phpt b/ext/dom/tests/gh9142.phpt new file mode 100644 index 0000000000000..f72dfa823f38c --- /dev/null +++ b/ext/dom/tests/gh9142.phpt @@ -0,0 +1,20 @@ +--TEST-- +GH-9142 (DOMChildNode replaceWith() double-free error when replacing elements not separated by any whitespace) +--FILE-- +OneTwo'; + +($dom = new DOMDocument('1.0', 'UTF-8'))->loadHTML($document); + +foreach ((new DOMXPath($dom))->query('//var') as $var) { + $var->replaceWith($dom->createElement('p', $var->nodeValue)); +} + +var_dump($dom->saveHTML()); + +?> +--EXPECT-- +string(154) " +

One

Two

+" From 5b033b0def799cf89e4f750f6100c2c72e8aa550 Mon Sep 17 00:00:00 2001 From: Ilija Tovilo Date: Fri, 26 May 2023 00:30:58 +0200 Subject: [PATCH 083/168] Fix zend_jit_stop_counter_handlers() performance issues with protect_memory=1 The function repeatedly calls mprotect() which is extremely slow. In our community build, the Laravel tests went from ~6 minutes to ~4 hours. This issue only occurs with opcache.protect_memory=1. Closes GH-11323 --- ext/opcache/jit/zend_jit_trace.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/ext/opcache/jit/zend_jit_trace.c b/ext/opcache/jit/zend_jit_trace.c index b0a86318a4ec8..b1fbc7b6518cc 100644 --- a/ext/opcache/jit/zend_jit_trace.c +++ b/ext/opcache/jit/zend_jit_trace.c @@ -7175,8 +7175,6 @@ static void zend_jit_stop_hot_trace_counters(zend_op_array *op_array) uint32_t i; jit_extension = (zend_jit_op_array_trace_extension*)ZEND_FUNC_INFO(op_array); - zend_shared_alloc_lock(); - SHM_UNPROTECT(); for (i = 0; i < op_array->last; i++) { /* Opline with Jit-ed code handler is skipped. */ if (jit_extension->trace_info[i].trace_flags & @@ -7188,8 +7186,6 @@ static void zend_jit_stop_hot_trace_counters(zend_op_array *op_array) op_array->opcodes[i].handler = jit_extension->trace_info[i].orig_handler; } } - SHM_PROTECT(); - zend_shared_alloc_unlock(); } /* Get the tracing op_array. */ @@ -7228,6 +7224,9 @@ static void zend_jit_stop_persistent_script(zend_persistent_script *script) /* Get all scripts which are accelerated by JIT */ static void zend_jit_stop_counter_handlers(void) { + zend_shared_alloc_lock(); + /* mprotect has an extreme overhead, avoid calls to it for every function. */ + SHM_UNPROTECT(); for (uint32_t i = 0; i < ZCSG(hash).max_num_entries; i++) { zend_accel_hash_entry *cache_entry; for (cache_entry = ZCSG(hash).hash_table[i]; cache_entry; cache_entry = cache_entry->next) { @@ -7237,6 +7236,8 @@ static void zend_jit_stop_counter_handlers(void) zend_jit_stop_persistent_script(script); } } + SHM_PROTECT(); + zend_shared_alloc_unlock(); } static void zend_jit_blacklist_root_trace(const zend_op *opline, size_t offset) From b5a07a7501f9cc43170c9675f20a1dcf34b360e5 Mon Sep 17 00:00:00 2001 From: Ilija Tovilo Date: Fri, 26 May 2023 11:37:01 +0200 Subject: [PATCH 084/168] [skip ci] Fix race condition in readline test The var_dump can be preceded by the "Interactive shell" log. The var_dump does not add much to the test anyway, so just remove it. --- ext/readline/tests/bug77812-readline.phpt | 2 -- 1 file changed, 2 deletions(-) diff --git a/ext/readline/tests/bug77812-readline.phpt b/ext/readline/tests/bug77812-readline.phpt index a18686781718b..a2d6c212c536a 100644 --- a/ext/readline/tests/bug77812-readline.phpt +++ b/ext/readline/tests/bug77812-readline.phpt @@ -13,7 +13,6 @@ $php = getenv('TEST_PHP_EXECUTABLE'); $ini = getenv('TEST_PHP_EXTRA_ARGS'); $descriptorspec = [['pipe', 'r'], STDOUT, STDERR]; $proc = proc_open("$php $ini -a", $descriptorspec, $pipes); -var_dump($proc); fwrite($pipes[0], "echo << --EXPECTF-- -resource(%d) of type (process) Interactive shell php > echo << Date: Thu, 25 May 2023 23:39:48 +0200 Subject: [PATCH 085/168] Add tests for list() in assignment in array literals Array literals will constant evaluate their elements. These can include assignments, even though these are not valid constant expressions. The lhs of assignments can be a list() element (or []) which is parsed as an array with a special flag. --- Zend/tests/gh11320_1.phpt | 28 ++++++++++++++++++++++++++++ Zend/tests/gh11320_2.phpt | 12 ++++++++++++ Zend/tests/gh11320_3.phpt | 8 ++++++++ 3 files changed, 48 insertions(+) create mode 100644 Zend/tests/gh11320_1.phpt create mode 100644 Zend/tests/gh11320_2.phpt create mode 100644 Zend/tests/gh11320_3.phpt diff --git a/Zend/tests/gh11320_1.phpt b/Zend/tests/gh11320_1.phpt new file mode 100644 index 0000000000000..f9beef76ccf6d --- /dev/null +++ b/Zend/tests/gh11320_1.phpt @@ -0,0 +1,28 @@ +--TEST-- +GH-11320: Array literals can contain list() assignments +--FILE-- + list($x, $y) = getList()]); +var_dump([$index => [$x, $y] = getList()]); +?> +--EXPECT-- +array(1) { + [1]=> + array(2) { + [0]=> + int(2) + [1]=> + int(3) + } +} +array(1) { + [1]=> + array(2) { + [0]=> + int(2) + [1]=> + int(3) + } +} diff --git a/Zend/tests/gh11320_2.phpt b/Zend/tests/gh11320_2.phpt new file mode 100644 index 0000000000000..5173c518f387f --- /dev/null +++ b/Zend/tests/gh11320_2.phpt @@ -0,0 +1,12 @@ +--TEST-- +GH-11320: list() expressions can contain magic constants +--FILE-- + $foo) = [__FILE__ => 'foo']]; +var_dump($foo); +[[__FILE__ => $foo] = [__FILE__ => 'foo']]; +var_dump($foo); +?> +--EXPECT-- +string(3) "foo" +string(3) "foo" diff --git a/Zend/tests/gh11320_3.phpt b/Zend/tests/gh11320_3.phpt new file mode 100644 index 0000000000000..3c3ed336d0b72 --- /dev/null +++ b/Zend/tests/gh11320_3.phpt @@ -0,0 +1,8 @@ +--TEST-- +GH-11320: list() must not appear as a standalone array element +--FILE-- + +--EXPECTF-- +Fatal error: Cannot use list() as standalone expression in %s on line %d From fbe6696d49980ec52018fb4281798fb873bf0a54 Mon Sep 17 00:00:00 2001 From: Ilija Tovilo Date: Fri, 26 May 2023 10:46:31 +0200 Subject: [PATCH 086/168] Revert "Use zend_ast_apply in zend_eval_const_expr (#11261)" This reverts commit 1c733c8bbc295dbb0634371cc40952c1528f9038. Fixes GH-11320 --- Zend/zend_compile.c | 70 +++++++++++++++++++++++++++++---------------- 1 file changed, 46 insertions(+), 24 deletions(-) diff --git a/Zend/zend_compile.c b/Zend/zend_compile.c index eeb940060bba4..5984206a8b136 100644 --- a/Zend/zend_compile.c +++ b/Zend/zend_compile.c @@ -10597,7 +10597,7 @@ static zend_op *zend_delayed_compile_var(znode *result, zend_ast *ast, uint32_t } /* }}} */ -static void zend_eval_const_expr_inner(zend_ast **ast_ptr, void *ctx) /* {{{ */ +static void zend_eval_const_expr(zend_ast **ast_ptr) /* {{{ */ { zend_ast *ast = *ast_ptr; zval result; @@ -10606,25 +10606,10 @@ static void zend_eval_const_expr_inner(zend_ast **ast_ptr, void *ctx) /* {{{ */ return; } - /* Set isset fetch indicator here, opcache disallows runtime altering of the AST */ - if (ast->kind == ZEND_AST_DIM - && (ast->attr & ZEND_DIM_IS) - && ast->child[0]->kind == ZEND_AST_DIM) { - ast->child[0]->attr |= ZEND_DIM_IS; - } - - /* We don't want to evaluate the class name of ZEND_AST_CLASS_NAME nodes. We need to be able to - * differenciate between literal class names and expressions that evaluate to strings. Strings - * are not actually allowed in ::class expressions. - * - * ZEND_AST_COALESCE and ZEND_AST_CONDITIONAL will manually evaluate only the children for the - * taken paths. */ - if (ast->kind != ZEND_AST_CLASS_NAME && ast->kind != ZEND_AST_COALESCE && ast->kind != ZEND_AST_CONDITIONAL) { - zend_ast_apply(ast, zend_eval_const_expr_inner, ctx); - } - switch (ast->kind) { case ZEND_AST_BINARY_OP: + zend_eval_const_expr(&ast->child[0]); + zend_eval_const_expr(&ast->child[1]); if (ast->child[0]->kind != ZEND_AST_ZVAL || ast->child[1]->kind != ZEND_AST_ZVAL) { return; } @@ -10637,6 +10622,8 @@ static void zend_eval_const_expr_inner(zend_ast **ast_ptr, void *ctx) /* {{{ */ break; case ZEND_AST_GREATER: case ZEND_AST_GREATER_EQUAL: + zend_eval_const_expr(&ast->child[0]); + zend_eval_const_expr(&ast->child[1]); if (ast->child[0]->kind != ZEND_AST_ZVAL || ast->child[1]->kind != ZEND_AST_ZVAL) { return; } @@ -10648,6 +10635,8 @@ static void zend_eval_const_expr_inner(zend_ast **ast_ptr, void *ctx) /* {{{ */ case ZEND_AST_OR: { bool child0_is_true, child1_is_true; + zend_eval_const_expr(&ast->child[0]); + zend_eval_const_expr(&ast->child[1]); if (ast->child[0]->kind != ZEND_AST_ZVAL) { return; } @@ -10671,6 +10660,7 @@ static void zend_eval_const_expr_inner(zend_ast **ast_ptr, void *ctx) /* {{{ */ break; } case ZEND_AST_UNARY_OP: + zend_eval_const_expr(&ast->child[0]); if (ast->child[0]->kind != ZEND_AST_ZVAL) { return; } @@ -10681,6 +10671,7 @@ static void zend_eval_const_expr_inner(zend_ast **ast_ptr, void *ctx) /* {{{ */ break; case ZEND_AST_UNARY_PLUS: case ZEND_AST_UNARY_MINUS: + zend_eval_const_expr(&ast->child[0]); if (ast->child[0]->kind != ZEND_AST_ZVAL) { return; } @@ -10751,6 +10742,13 @@ static void zend_eval_const_expr_inner(zend_ast **ast_ptr, void *ctx) /* {{{ */ zend_error(E_COMPILE_ERROR, "Array and string offset access syntax with curly braces is no longer supported"); } + /* Set isset fetch indicator here, opcache disallows runtime altering of the AST */ + if ((ast->attr & ZEND_DIM_IS) && ast->child[0]->kind == ZEND_AST_DIM) { + ast->child[0]->attr |= ZEND_DIM_IS; + } + + zend_eval_const_expr(&ast->child[0]); + zend_eval_const_expr(&ast->child[1]); if (ast->child[0]->kind != ZEND_AST_ZVAL || ast->child[1]->kind != ZEND_AST_ZVAL) { return; } @@ -10828,6 +10826,9 @@ static void zend_eval_const_expr_inner(zend_ast **ast_ptr, void *ctx) /* {{{ */ zend_ast *name_ast; zend_string *resolved_name; + zend_eval_const_expr(&ast->child[0]); + zend_eval_const_expr(&ast->child[1]); + if (UNEXPECTED(ast->child[1]->kind != ZEND_AST_ZVAL || Z_TYPE_P(zend_ast_get_zval(ast->child[1])) != IS_STRING)) { return; @@ -10857,6 +10858,33 @@ static void zend_eval_const_expr_inner(zend_ast **ast_ptr, void *ctx) /* {{{ */ } break; } + // TODO: We should probably use zend_ast_apply to recursively walk nodes without + // special handling. It is required that all nodes that are part of a const expr + // are visited. Probably we should be distinguishing evaluation of const expr and + // normal exprs here. + case ZEND_AST_ARG_LIST: + { + zend_ast_list *list = zend_ast_get_list(ast); + for (uint32_t i = 0; i < list->children; i++) { + zend_eval_const_expr(&list->child[i]); + } + return; + } + case ZEND_AST_NEW: + zend_eval_const_expr(&ast->child[0]); + zend_eval_const_expr(&ast->child[1]); + return; + case ZEND_AST_NAMED_ARG: + zend_eval_const_expr(&ast->child[1]); + return; + case ZEND_AST_CONST_ENUM_INIT: + zend_eval_const_expr(&ast->child[2]); + return; + case ZEND_AST_PROP: + case ZEND_AST_NULLSAFE_PROP: + zend_eval_const_expr(&ast->child[0]); + zend_eval_const_expr(&ast->child[1]); + return; default: return; } @@ -10865,9 +10893,3 @@ static void zend_eval_const_expr_inner(zend_ast **ast_ptr, void *ctx) /* {{{ */ *ast_ptr = zend_ast_create_zval(&result); } /* }}} */ - - -static void zend_eval_const_expr(zend_ast **ast_ptr) /* {{{ */ -{ - zend_eval_const_expr_inner(ast_ptr, NULL); -} From 1dfa277a966cff1988e720146de21d1bcc09e0f5 Mon Sep 17 00:00:00 2001 From: Daniil Gentili Date: Thu, 25 May 2023 20:26:28 +0200 Subject: [PATCH 087/168] Fix GCC 12 compilation on riscv64 Close GH-11321 --- NEWS | 3 +++ configure.ac | 10 ++++++++++ 2 files changed, 13 insertions(+) diff --git a/NEWS b/NEWS index 93b363cfac79a..3b0d012995477 100644 --- a/NEWS +++ b/NEWS @@ -2,6 +2,9 @@ PHP NEWS ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| ?? ??? ????, PHP 8.1.21 +- Core: + . Fixed build for the riscv64 architecture/GCC 12. (Daniil Gentili) + - DOM: . Fixed bugs GH-11288 and GH-11289 and GH-11290 and GH-9142 (DOMExceptions and segfaults with replaceWith). (nielsdos) diff --git a/configure.ac b/configure.ac index 661df89a03de5..baf8651f044ad 100644 --- a/configure.ac +++ b/configure.ac @@ -365,6 +365,16 @@ if test "$ac_cv_func_dlopen" = "yes"; then fi AC_CHECK_LIB(m, sin) +case $host_alias in + riscv64*) + AC_CHECK_LIB(atomic, __atomic_exchange_1, [ + PHP_ADD_LIBRARY(atomic) + ], [ + AC_MSG_ERROR([Problem with enabling atomic. Please check config.log for details.]) + ]) + ;; +esac + dnl Check for inet_aton in -lc, -lbind and -lresolv. PHP_CHECK_FUNC(inet_aton, resolv, bind) From f249958cd3d83220d04409ce231bf77ff6d68b9b Mon Sep 17 00:00:00 2001 From: Ilija Tovilo Date: Sat, 27 May 2023 19:16:39 +0200 Subject: [PATCH 088/168] [skip ci] Add more patterns to run-tests.php retry list CURL: 404: Page Not Found IMAP: Can't create a temporary mailbox: [ALREADYEXISTS] Mailbox already exists Sockets: socket_bind(): Unable to bind address [98]: Address already in use --- run-tests.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/run-tests.php b/run-tests.php index 14eb45eecd609..63ae57a35c8c0 100755 --- a/run-tests.php +++ b/run-tests.php @@ -2877,7 +2877,7 @@ function run_test(string $php, $file, array $env): string function error_may_be_retried(string $output): bool { - return preg_match('((timed out)|(connection refused))i', $output) === 1; + return preg_match('((timed out)|(connection refused)|(404: page not found)|(address already in use)|(mailbox already exists))i', $output) === 1; } /** From b8840115ffdd6b3d9e3554b5bf83b9e0a0638f4c Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Mon, 29 May 2023 11:41:42 +0200 Subject: [PATCH 089/168] Shrink libxml_doc_props struct (#11326) These values are only ever bools, store them as bools. Reduces the size from 40 bytes to 16 bytes on my system. --- ext/libxml/php_libxml.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/ext/libxml/php_libxml.h b/ext/libxml/php_libxml.h index ff8a634e0cf9b..de9b49d2ce3b6 100644 --- a/ext/libxml/php_libxml.h +++ b/ext/libxml/php_libxml.h @@ -47,14 +47,14 @@ ZEND_BEGIN_MODULE_GLOBALS(libxml) ZEND_END_MODULE_GLOBALS(libxml) typedef struct _libxml_doc_props { - int formatoutput; - int validateonparse; - int resolveexternals; - int preservewhitespace; - int substituteentities; - int stricterror; - int recover; HashTable *classmap; + bool formatoutput; + bool validateonparse; + bool resolveexternals; + bool preservewhitespace; + bool substituteentities; + bool stricterror; + bool recover; } libxml_doc_props; typedef struct _php_libxml_ref_obj { From c473787abb486c3f25bd551252b36d617267738f Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Fri, 6 Jan 2023 19:51:49 +0100 Subject: [PATCH 090/168] Fix GH-10234: Setting DOMAttr::textContent results in an empty attribute value We can't directly call xmlNodeSetContent, because it might encode the string through xmlStringLenGetNodeList for types XML_DOCUMENT_FRAG_NODE, XML_ELEMENT_NODE, XML_ATTRIBUTE_NODE. In these cases we need to use a text node to avoid the encoding. For the other cases, we *can* rely on xmlNodeSetContent because it is either a no-op, or handles the content without encoding and clears the properties field if needed. The test was taken from the issue report, for the test: Co-authored-by: ThomasWeinert Closes GH-10245. --- NEWS | 2 + ext/dom/node.c | 19 ++++++-- ext/dom/tests/gh10234.phpt | 93 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 110 insertions(+), 4 deletions(-) create mode 100644 ext/dom/tests/gh10234.phpt diff --git a/NEWS b/NEWS index 3b0d012995477..40fb3e328b7d1 100644 --- a/NEWS +++ b/NEWS @@ -8,6 +8,8 @@ PHP NEWS - DOM: . Fixed bugs GH-11288 and GH-11289 and GH-11290 and GH-9142 (DOMExceptions and segfaults with replaceWith). (nielsdos) + . Fixed bug GH-10234 (Setting DOMAttr::textContent results in an empty + attribute value). (nielsdos) - Opcache: . Fix allocation loop in zend_shared_alloc_startup(). (nielsdos) diff --git a/ext/dom/node.c b/ext/dom/node.c index 880c8cfe3e794..b291ccc99a308 100644 --- a/ext/dom/node.c +++ b/ext/dom/node.c @@ -769,17 +769,28 @@ int dom_node_text_content_write(dom_object *obj, zval *newval) return FAILURE; } - if (nodep->type == XML_ELEMENT_NODE || nodep->type == XML_ATTRIBUTE_NODE) { + const xmlChar *xmlChars = (const xmlChar *) ZSTR_VAL(str); + int type = nodep->type; + + /* We can't directly call xmlNodeSetContent, because it might encode the string through + * xmlStringLenGetNodeList for types XML_DOCUMENT_FRAG_NODE, XML_ELEMENT_NODE, XML_ATTRIBUTE_NODE. + * See tree.c:xmlNodeSetContent in libxml. + * In these cases we need to use a text node to avoid the encoding. + * For the other cases, we *can* rely on xmlNodeSetContent because it is either a no-op, or handles + * the content without encoding. */ + if (type == XML_DOCUMENT_FRAG_NODE || type == XML_ELEMENT_NODE || type == XML_ATTRIBUTE_NODE) { if (nodep->children) { node_list_unlink(nodep->children); php_libxml_node_free_list((xmlNodePtr) nodep->children); nodep->children = NULL; } + + xmlNode *textNode = xmlNewText(xmlChars); + xmlAddChild(nodep, textNode); + } else { + xmlNodeSetContent(nodep, xmlChars); } - /* we have to use xmlNodeAddContent() to get the same behavior as with xmlNewText() */ - xmlNodeSetContent(nodep, (xmlChar *) ""); - xmlNodeAddContent(nodep, (xmlChar *) ZSTR_VAL(str)); zend_string_release_ex(str, 0); return SUCCESS; diff --git a/ext/dom/tests/gh10234.phpt b/ext/dom/tests/gh10234.phpt new file mode 100644 index 0000000000000..5edc8fc6c1ff1 --- /dev/null +++ b/ext/dom/tests/gh10234.phpt @@ -0,0 +1,93 @@ +--TEST-- +GH-10234 (Setting DOMAttr::textContent results in an empty attribute value.) +--EXTENSIONS-- +dom +--FILE-- +loadXML(''); +$attribute = $document->documentElement->getAttributeNode('attribute'); + +echo "-- Attribute tests --\n"; + +var_dump($document->saveHTML()); +var_dump($attribute->textContent); + +$attribute->textContent = 'new value'; +var_dump($attribute->textContent); +var_dump($document->saveHTML()); + +$attribute->textContent = 'hello & world'; +var_dump($attribute->textContent); +var_dump($document->saveHTML()); + +$attribute->textContent = 'hi'; +var_dump($attribute->textContent); +var_dump($document->saveHTML()); + +$attribute->textContent = 'quote "test"'; +var_dump($attribute->textContent); +var_dump($document->saveHTML()); + +$attribute->textContent = "quote 'test'"; +var_dump($attribute->textContent); +var_dump($document->saveHTML()); + +$attribute->textContent = "quote '\"test\"'"; +var_dump($attribute->textContent); +var_dump($document->saveHTML()); + +echo "-- Document element tests --\n"; + +$document->documentElement->textContent = 'hello & world'; +var_dump($document->documentElement->textContent); +var_dump($document->saveHTML()); + +$document->documentElement->textContent = 'hi'; +var_dump($document->documentElement->textContent); +var_dump($document->saveHTML()); + +$document->documentElement->textContent = 'quote "test"'; +var_dump($document->documentElement->textContent); +var_dump($document->saveHTML()); + +$document->documentElement->textContent = "quote 'test'"; +var_dump($document->documentElement->textContent); +var_dump($document->saveHTML()); +?> +--EXPECT-- +-- Attribute tests -- +string(38) " +" +string(5) "value" +string(9) "new value" +string(42) " +" +string(13) "hello & world" +string(50) " +" +string(9) "hi" +string(54) " +" +string(12) "quote "test"" +string(45) " +" +string(12) "quote 'test'" +string(45) " +" +string(14) "quote '"test"'" +string(57) " +" +-- Document element tests -- +string(13) "hello & world" +string(74) "hello & world +" +string(9) "hi" +string(78) "<b>hi</b> +" +string(12) "quote "test"" +string(69) "quote "test" +" +string(12) "quote 'test'" +string(69) "quote 'test' +" From b495a916a4f01886a11695c6fd94185655019e2c Mon Sep 17 00:00:00 2001 From: Kirill Nesmeyanov Date: Mon, 29 May 2023 16:45:00 +0300 Subject: [PATCH 091/168] Add string output escaping into zend dump (phpdbg + opcache debug) (#11337) * Add string output escaping into zend dump (phpdbg + opcache debug) * Use ZSTR_VAL macro instead direct string access * Move "escaped_string" into local switch/case scope * Add zend_string_release * Add Z_STR_P macro instead direct string access * Merge zend_string declaration and its assigment in one stmt --- Zend/Optimizer/zend_dump.c | 9 +++++++-- sapi/phpdbg/tests/print_001.phpt | 8 ++++---- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/Zend/Optimizer/zend_dump.c b/Zend/Optimizer/zend_dump.c index bc697ba8ba9e5..9eaca19f18f57 100644 --- a/Zend/Optimizer/zend_dump.c +++ b/Zend/Optimizer/zend_dump.c @@ -23,6 +23,7 @@ #include "zend_func_info.h" #include "zend_call_graph.h" #include "zend_dump.h" +#include "ext/standard/php_string.h" void zend_dump_ht(HashTable *ht) { @@ -65,8 +66,12 @@ void zend_dump_const(const zval *zv) case IS_DOUBLE: fprintf(stderr, " float(%g)", Z_DVAL_P(zv)); break; - case IS_STRING: - fprintf(stderr, " string(\"%s\")", Z_STRVAL_P(zv)); + case IS_STRING:; + zend_string *escaped_string = php_addcslashes(Z_STR_P(zv), "\"\\", 2); + + fprintf(stderr, " string(\"%s\")", ZSTR_VAL(escaped_string)); + + zend_string_release(escaped_string); break; case IS_ARRAY: fprintf(stderr, " array(...)"); diff --git a/sapi/phpdbg/tests/print_001.phpt b/sapi/phpdbg/tests/print_001.phpt index c25c5178fef4c..a981cb0001f67 100644 --- a/sapi/phpdbg/tests/print_001.phpt +++ b/sapi/phpdbg/tests/print_001.phpt @@ -29,7 +29,7 @@ Foo\Bar::Foo: ; (lines=5, args=1, vars=1, tmps=1) ; %s:5-7 L0005 0000 CV0($bar) = RECV 1 -L0006 0001 INIT_NS_FCALL_BY_NAME 1 string("Foo\var_dump") +L0006 0001 INIT_NS_FCALL_BY_NAME 1 string("Foo\\var_dump") L0006 0002 SEND_VAR_EX CV0($bar) 1 L0006 0003 DO_FCALL L0007 0004 RETURN null @@ -44,10 +44,10 @@ prompt> [Context %s (9 ops)] $_main: ; (lines=9, args=0, vars=0, tmps=4) ; %s:1-21 -L0018 0000 V0 = NEW 0 string("Foo\Bar") +L0018 0000 V0 = NEW 0 string("Foo\\Bar") L0018 0001 DO_FCALL L0018 0002 INIT_METHOD_CALL 1 V0 string("Foo") -L0018 0003 SEND_VAL_EX string("test") 1 +L0018 0003 SEND_VAL_EX string("test \"quotes\"") 1 L0018 0004 DO_FCALL L0019 0005 INIT_FCALL %d %d string("foo") L0019 0006 SEND_VAL string("test") 1 @@ -72,6 +72,6 @@ namespace { var_dump(strrev($baz)); } - (new \Foo\Bar)->Foo("test"); + (new \Foo\Bar)->Foo('test "quotes"'); foo("test"); } From 761b9a44f8f097f77d0f96d479c485e9b11e51d6 Mon Sep 17 00:00:00 2001 From: divinity76 Date: Mon, 29 May 2023 16:53:00 +0200 Subject: [PATCH 092/168] Fix return value in stub file for DOMNodeList::item Not explicitly documenting the possibility of returning DOMElement causes the Intelephense linter (a popular PHP linter with ~9 million downloads: https://marketplace.visualstudio.com/items?itemName=bmewburn.vscode-intelephense-client) to think this code is bad: $xp->query("whatever")->item(0)->getAttribute("foo"); DOMNode does not have getAttribute (while DOMElement does). Documenting the DOMElement return type should fix Intelephense's linter. Closes GH-11342. --- NEWS | 1 + ext/dom/php_dom.stub.php | 2 +- ext/dom/php_dom_arginfo.h | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/NEWS b/NEWS index 40fb3e328b7d1..143418e2a30d2 100644 --- a/NEWS +++ b/NEWS @@ -10,6 +10,7 @@ PHP NEWS and segfaults with replaceWith). (nielsdos) . Fixed bug GH-10234 (Setting DOMAttr::textContent results in an empty attribute value). (nielsdos) + . Fix return value in stub file for DOMNodeList::item. (divinity76) - Opcache: . Fix allocation loop in zend_shared_alloc_startup(). (nielsdos) diff --git a/ext/dom/php_dom.stub.php b/ext/dom/php_dom.stub.php index f26518c0ba8ec..45b54c21d6c2c 100644 --- a/ext/dom/php_dom.stub.php +++ b/ext/dom/php_dom.stub.php @@ -232,7 +232,7 @@ public function count(): int {} public function getIterator(): Iterator {} - /** @return DOMNode|DOMNameSpaceNode|null */ + /** @return DOMElement|DOMNode|DOMNameSpaceNode|null */ public function item(int $index) {} } diff --git a/ext/dom/php_dom_arginfo.h b/ext/dom/php_dom_arginfo.h index 2ac8ae45f2b26..d63b43e9b95f8 100644 --- a/ext/dom/php_dom_arginfo.h +++ b/ext/dom/php_dom_arginfo.h @@ -1,5 +1,5 @@ /* This is a generated file, edit the .stub.php file instead. - * Stub hash: 74698bea9c5e0635cf91345e8512b9677489510c */ + * Stub hash: a62e383b05df81ea245a7993215fb8ff4e1c7f9d */ ZEND_BEGIN_ARG_WITH_RETURN_OBJ_INFO_EX(arginfo_dom_import_simplexml, 0, 1, DOMElement, 0) ZEND_ARG_TYPE_INFO(0, node, IS_OBJECT, 0) From bce536067c803c47e33508b5c85798e0ba038d46 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Mon, 29 May 2023 16:16:01 +0200 Subject: [PATCH 093/168] Fix GH-11338: SplFileInfo empty getBasename with more than one slash Regressed in 13e4ce386bb7. Closes GH-11340. --- NEWS | 4 ++++ ext/spl/spl_directory.c | 4 +++- ext/spl/tests/gh11338.phpt | 47 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 54 insertions(+), 1 deletion(-) create mode 100644 ext/spl/tests/gh11338.phpt diff --git a/NEWS b/NEWS index 143418e2a30d2..e587b833339e6 100644 --- a/NEWS +++ b/NEWS @@ -16,6 +16,10 @@ PHP NEWS . Fix allocation loop in zend_shared_alloc_startup(). (nielsdos) . Access violation on smm_shared_globals with ALLOC_FALLBACK. (KoudelkaB) +- SPL: + . Fixed bug GH-11338 (SplFileInfo empty getBasename with more than one + slash). (nielsdos) + - Standard: . Fix access on NULL pointer in array_merge_recursive(). (ilutov) . Fix exception handling in array_multisort(). (ilutov) diff --git a/ext/spl/spl_directory.c b/ext/spl/spl_directory.c index aefa2aa933e51..b00a1e66568e0 100644 --- a/ext/spl/spl_directory.c +++ b/ext/spl/spl_directory.c @@ -432,7 +432,9 @@ static void spl_filesystem_info_set_filename(spl_filesystem_object *intern, zend path_len = ZSTR_LEN(path); if (path_len > 1 && IS_SLASH_AT(ZSTR_VAL(path), path_len-1)) { - path_len--; + do { + path_len--; + } while (path_len > 1 && IS_SLASH_AT(ZSTR_VAL(path), path_len - 1)); intern->file_name = zend_string_init(ZSTR_VAL(path), path_len, 0); } else { intern->file_name = zend_string_copy(path); diff --git a/ext/spl/tests/gh11338.phpt b/ext/spl/tests/gh11338.phpt new file mode 100644 index 0000000000000..0a59cea9e7468 --- /dev/null +++ b/ext/spl/tests/gh11338.phpt @@ -0,0 +1,47 @@ +--TEST-- +GH-11338 (SplFileInfo empty getBasename with more than on slash) +--FILE-- +getBasename()); + var_dump($file->getFilename()); +} + +test('/dir/anotherdir/basedir//'); +test('/dir/anotherdir/basedir/'); +test('/dir/anotherdir/basedir'); +test('/dir/anotherdir//basedir'); +test('///'); +test('//'); +test('/'); +test(''); + +?> +--EXPECT-- +Testing: '/dir/anotherdir/basedir//' +string(7) "basedir" +string(7) "basedir" +Testing: '/dir/anotherdir/basedir/' +string(7) "basedir" +string(7) "basedir" +Testing: '/dir/anotherdir/basedir' +string(7) "basedir" +string(7) "basedir" +Testing: '/dir/anotherdir//basedir' +string(7) "basedir" +string(7) "basedir" +Testing: '///' +string(0) "" +string(1) "/" +Testing: '//' +string(0) "" +string(1) "/" +Testing: '/' +string(0) "" +string(1) "/" +Testing: '' +string(0) "" +string(0) "" From 9c59d22a7bb4dd0b4cd0b138e6cea12686c1868d Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Mon, 29 May 2023 16:39:58 +0200 Subject: [PATCH 094/168] Fix GH-11336: php still tries to unlock the shared memory ZendSem with opcache.file_cache_only=1 but it was never locked I chose to check for the value of lock_file instead of checking the file_cache_only, because it is probably a little bit faster and we're going to access the lock_file variable anyway. It's also more generic. Closes GH-11341. --- NEWS | 2 ++ ext/opcache/ZendAccelerator.c | 4 ++++ ext/opcache/zend_shared_alloc.c | 2 +- 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/NEWS b/NEWS index e587b833339e6..a0f4c4b10fe25 100644 --- a/NEWS +++ b/NEWS @@ -15,6 +15,8 @@ PHP NEWS - Opcache: . Fix allocation loop in zend_shared_alloc_startup(). (nielsdos) . Access violation on smm_shared_globals with ALLOC_FALLBACK. (KoudelkaB) + . Fixed bug GH-11336 (php still tries to unlock the shared memory ZendSem + with opcache.file_cache_only=1 but it was never locked). (nielsdos) - SPL: . Fixed bug GH-11338 (SplFileInfo empty getBasename with more than one diff --git a/ext/opcache/ZendAccelerator.c b/ext/opcache/ZendAccelerator.c index 97b82378780b8..ed0602394ce97 100644 --- a/ext/opcache/ZendAccelerator.c +++ b/ext/opcache/ZendAccelerator.c @@ -354,6 +354,10 @@ static inline void accel_unlock_all(void) #ifdef ZEND_WIN32 accel_deactivate_sub(); #else + if (lock_file == -1) { + return; + } + struct flock mem_usage_unlock_all; mem_usage_unlock_all.l_type = F_UNLCK; diff --git a/ext/opcache/zend_shared_alloc.c b/ext/opcache/zend_shared_alloc.c index afe539bf987a7..37f6fea9199a7 100644 --- a/ext/opcache/zend_shared_alloc.c +++ b/ext/opcache/zend_shared_alloc.c @@ -52,7 +52,7 @@ zend_smm_shared_globals *smm_shared_globals; #ifdef ZTS static MUTEX_T zts_lock; #endif -int lock_file; +int lock_file = -1; static char lockfile_name[MAXPATHLEN]; #endif From 154c2510135bf7d4b96374fb34c7c0aa0410e143 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Mon, 29 May 2023 18:36:02 +0200 Subject: [PATCH 095/168] Fix spec compliance error for DOMDocument::getElementsByTagNameNS Spec link: https://dom.spec.whatwg.org/#concept-getelementsbytagnamens Spec says we should match any namespace when '*' is provided. This was however not the case: elements that didn't have a namespace were not returned. This patch fixes the error by modifying the namespace check. Closes GH-11343. --- NEWS | 2 + ext/dom/php_dom.c | 7 +- ...ementsByTagNameNS_match_any_namespace.phpt | 82 +++++++++++++++++++ 3 files changed, 90 insertions(+), 1 deletion(-) create mode 100644 ext/dom/tests/DOMDocument_getElementsByTagNameNS_match_any_namespace.phpt diff --git a/NEWS b/NEWS index a0f4c4b10fe25..1b40b1bceb49b 100644 --- a/NEWS +++ b/NEWS @@ -11,6 +11,8 @@ PHP NEWS . Fixed bug GH-10234 (Setting DOMAttr::textContent results in an empty attribute value). (nielsdos) . Fix return value in stub file for DOMNodeList::item. (divinity76) + . Fix spec compliance error with '*' namespace for + DOMDocument::getElementsByTagNameNS. (nielsdos) - Opcache: . Fix allocation loop in zend_shared_alloc_startup(). (nielsdos) diff --git a/ext/dom/php_dom.c b/ext/dom/php_dom.c index 01a206c0985bd..1883767d2e48b 100644 --- a/ext/dom/php_dom.c +++ b/ext/dom/php_dom.c @@ -1270,10 +1270,15 @@ xmlNode *dom_get_elements_by_tag_name_ns_raw(xmlNodePtr nodep, char *ns, char *l { xmlNodePtr ret = NULL; + /* Note: The spec says that ns == '' must be transformed to ns == NULL. In other words, they are equivalent. + * PHP however does not do this and internally uses the empty string everywhere when the user provides ns == NULL. + * This is because for PHP ns == NULL has another meaning: "match every namespace" instead of "match the empty namespace". */ + bool ns_match_any = ns == NULL || (ns[0] == '*' && ns[1] == '\0'); + while (nodep != NULL && (*cur <= index || index == -1)) { if (nodep->type == XML_ELEMENT_NODE) { if (xmlStrEqual(nodep->name, (xmlChar *)local) || xmlStrEqual((xmlChar *)"*", (xmlChar *)local)) { - if (ns == NULL || (!strcmp(ns, "") && nodep->ns == NULL) || (nodep->ns != NULL && (xmlStrEqual(nodep->ns->href, (xmlChar *)ns) || xmlStrEqual((xmlChar *)"*", (xmlChar *)ns)))) { + if (ns_match_any || (!strcmp(ns, "") && nodep->ns == NULL) || (nodep->ns != NULL && xmlStrEqual(nodep->ns->href, (xmlChar *)ns))) { if (*cur == index) { ret = nodep; break; diff --git a/ext/dom/tests/DOMDocument_getElementsByTagNameNS_match_any_namespace.phpt b/ext/dom/tests/DOMDocument_getElementsByTagNameNS_match_any_namespace.phpt new file mode 100644 index 0000000000000..888d1ef9b8057 --- /dev/null +++ b/ext/dom/tests/DOMDocument_getElementsByTagNameNS_match_any_namespace.phpt @@ -0,0 +1,82 @@ +--TEST-- +DOMDocument::getElementsByTagNameNS() match any namespace +--EXTENSIONS-- +dom +--FILE-- + + +Books of the other guy.. + + + + xinclude: book.xml not found + + + + This is another namespace + + + +EOD; +$dom = new DOMDocument; + +// load the XML string defined above +$dom->loadXML($xml); + +function test($namespace, $local) { + global $dom; + $namespace_str = $namespace !== NULL ? "'$namespace'" : "null"; + echo "-- getElementsByTagNameNS($namespace_str, '$local') --\n"; + foreach ($dom->getElementsByTagNameNS($namespace, $local) as $element) { + echo 'local name: \'', $element->localName, '\', prefix: \'', $element->prefix, "'\n"; + } +} + +// Should *also* include objects even without a namespace +test(null, '*'); +// Should *also* include objects even without a namespace +test('*', '*'); +// Should *only* include objects without a namespace +test('', '*'); +// Should *only* include objects with the specified namespace +test('http://www.w3.org/2001/XInclude', '*'); +// Should not give any output +test('', 'fallback'); +// Should not give any output, because the null namespace is the same as the empty namespace +test(null, 'fallback'); +// Should only output the include from the empty namespace +test(null, 'include'); + +?> +--EXPECT-- +-- getElementsByTagNameNS(null, '*') -- +local name: 'chapter', prefix: '' +local name: 'title', prefix: '' +local name: 'para', prefix: '' +local name: 'error', prefix: '' +local name: 'include', prefix: '' +-- getElementsByTagNameNS('*', '*') -- +local name: 'chapter', prefix: '' +local name: 'title', prefix: '' +local name: 'para', prefix: '' +local name: 'include', prefix: 'xi' +local name: 'fallback', prefix: 'xi' +local name: 'error', prefix: '' +local name: 'include', prefix: '' +-- getElementsByTagNameNS('', '*') -- +local name: 'chapter', prefix: '' +local name: 'title', prefix: '' +local name: 'para', prefix: '' +local name: 'error', prefix: '' +local name: 'include', prefix: '' +-- getElementsByTagNameNS('http://www.w3.org/2001/XInclude', '*') -- +local name: 'include', prefix: 'xi' +local name: 'fallback', prefix: 'xi' +-- getElementsByTagNameNS('', 'fallback') -- +-- getElementsByTagNameNS(null, 'fallback') -- +-- getElementsByTagNameNS(null, 'include') -- +local name: 'include', prefix: '' From b374ec399d85d2f7051b6f92324715e76b9b11ed Mon Sep 17 00:00:00 2001 From: nielsdos <7771979+nielsdos@users.noreply.github.com> Date: Mon, 29 May 2023 22:29:50 +0200 Subject: [PATCH 096/168] Fix DOMElement::append() and DOMElement::prepend() hierarchy checks We could end up in an invalid hierarchy, resulting in infinite loops and eventual crashes if we don't check for the DOM hierarchy validity. Closes GH-11344. --- NEWS | 2 + ext/dom/parentnode.c | 28 ++++++ .../DOMElement_append_hierarchy_test.phpt | 89 +++++++++++++++++++ .../DOMElement_prepend_hierarchy_test.phpt | 89 +++++++++++++++++++ 4 files changed, 208 insertions(+) create mode 100644 ext/dom/tests/DOMElement_append_hierarchy_test.phpt create mode 100644 ext/dom/tests/DOMElement_prepend_hierarchy_test.phpt diff --git a/NEWS b/NEWS index 1b40b1bceb49b..42c54e587de16 100644 --- a/NEWS +++ b/NEWS @@ -13,6 +13,8 @@ PHP NEWS . Fix return value in stub file for DOMNodeList::item. (divinity76) . Fix spec compliance error with '*' namespace for DOMDocument::getElementsByTagNameNS. (nielsdos) + . Fix DOMElement::append() and DOMElement::prepend() hierarchy checks. + (nielsdos) - Opcache: . Fix allocation loop in zend_shared_alloc_startup(). (nielsdos) diff --git a/ext/dom/parentnode.c b/ext/dom/parentnode.c index 46c90a13e31d5..c99a2a5a6622a 100644 --- a/ext/dom/parentnode.c +++ b/ext/dom/parentnode.c @@ -255,10 +255,33 @@ static void dom_fragment_assign_parent_node(xmlNodePtr parentNode, xmlNodePtr fr fragment->last = NULL; } +static zend_result dom_hierarchy_node_list(xmlNodePtr parentNode, zval *nodes, int nodesc) +{ + for (int i = 0; i < nodesc; i++) { + if (Z_TYPE(nodes[i]) == IS_OBJECT) { + const zend_class_entry *ce = Z_OBJCE(nodes[i]); + + if (instanceof_function(ce, dom_node_class_entry)) { + if (dom_hierarchy(parentNode, dom_object_get_node(Z_DOMOBJ_P(nodes + i))) != SUCCESS) { + return FAILURE; + } + } + } + } + + return SUCCESS; +} + void dom_parent_node_append(dom_object *context, zval *nodes, int nodesc) { xmlNode *parentNode = dom_object_get_node(context); xmlNodePtr newchild, prevsib; + + if (UNEXPECTED(dom_hierarchy_node_list(parentNode, nodes, nodesc) != SUCCESS)) { + php_dom_throw_error(HIERARCHY_REQUEST_ERR, dom_get_strict_error(context->document)); + return; + } + xmlNode *fragment = dom_zvals_to_fragment(context->document, parentNode, nodes, nodesc); if (fragment == NULL) { @@ -296,6 +319,11 @@ void dom_parent_node_prepend(dom_object *context, zval *nodes, int nodesc) return; } + if (UNEXPECTED(dom_hierarchy_node_list(parentNode, nodes, nodesc) != SUCCESS)) { + php_dom_throw_error(HIERARCHY_REQUEST_ERR, dom_get_strict_error(context->document)); + return; + } + xmlNodePtr newchild, nextsib; xmlNode *fragment = dom_zvals_to_fragment(context->document, parentNode, nodes, nodesc); diff --git a/ext/dom/tests/DOMElement_append_hierarchy_test.phpt b/ext/dom/tests/DOMElement_append_hierarchy_test.phpt new file mode 100644 index 0000000000000..2d70b10fe9f70 --- /dev/null +++ b/ext/dom/tests/DOMElement_append_hierarchy_test.phpt @@ -0,0 +1,89 @@ +--TEST-- +DOMElement::append() with hierarchy changes and errors +--EXTENSIONS-- +dom +--FILE-- +loadXML('

helloworld

'); + +echo "-- Append hello with world --\n"; +$dom = clone $dom_original; +$b_hello = $dom->firstChild->firstChild; +$b_world = $b_hello->nextSibling; +$b_hello->append($b_world); +var_dump($dom->saveHTML()); + +echo "-- Append hello with world's child --\n"; +$dom = clone $dom_original; +$b_hello = $dom->firstChild->firstChild; +$b_world = $b_hello->nextSibling; +$b_hello->append($b_world->firstChild); +var_dump($dom->saveHTML()); + +echo "-- Append world's child with hello --\n"; +$dom = clone $dom_original; +$b_hello = $dom->firstChild->firstChild; +$b_world = $b_hello->nextSibling; +$b_world->firstChild->append($b_hello); +var_dump($dom->saveHTML()); + +echo "-- Append hello with itself --\n"; +$dom = clone $dom_original; +$b_hello = $dom->firstChild->firstChild; +try { + $b_hello->append($b_hello); +} catch (\DOMException $e) { + echo $e->getMessage(), "\n"; +} +var_dump($dom->saveHTML()); + +echo "-- Append world's i tag with the parent --\n"; +$dom = clone $dom_original; +$b_hello = $dom->firstChild->firstChild; +$b_world = $b_hello->nextSibling; +try { + $b_world->firstChild->append($b_world); +} catch (\DOMException $e) { + echo $e->getMessage(), "\n"; +} +var_dump($dom->saveHTML()); + +echo "-- Append from another document --\n"; +$dom = clone $dom_original; +$dom2 = new DOMDocument; +$dom2->loadXML('

other

'); +try { + $dom->firstChild->firstChild->prepend($dom2->firstChild); +} catch (\DOMException $e) { + echo $e->getMessage(), "\n"; +} +var_dump($dom2->saveHTML()); +var_dump($dom->saveHTML()); + +?> +--EXPECT-- +-- Append hello with world -- +string(39) "

helloworld

+" +-- Append hello with world's child -- +string(39) "

helloworld

+" +-- Append world's child with hello -- +string(39) "

worldhello

+" +-- Append hello with itself -- +Hierarchy Request Error +string(39) "

helloworld

+" +-- Append world's i tag with the parent -- +Hierarchy Request Error +string(39) "

helloworld

+" +-- Append from another document -- +Wrong Document Error +string(13) "

other

+" +string(39) "

helloworld

+" diff --git a/ext/dom/tests/DOMElement_prepend_hierarchy_test.phpt b/ext/dom/tests/DOMElement_prepend_hierarchy_test.phpt new file mode 100644 index 0000000000000..4d9cf24a61828 --- /dev/null +++ b/ext/dom/tests/DOMElement_prepend_hierarchy_test.phpt @@ -0,0 +1,89 @@ +--TEST-- +DOMElement::prepend() with hierarchy changes and errors +--EXTENSIONS-- +dom +--FILE-- +loadXML('

helloworld

'); + +echo "-- Prepend hello with world --\n"; +$dom = clone $dom_original; +$b_hello = $dom->firstChild->firstChild; +$b_world = $b_hello->nextSibling; +$b_hello->prepend($b_world); +var_dump($dom->saveHTML()); + +echo "-- Prepend hello with world's child --\n"; +$dom = clone $dom_original; +$b_hello = $dom->firstChild->firstChild; +$b_world = $b_hello->nextSibling; +$b_hello->prepend($b_world->firstChild); +var_dump($dom->saveHTML()); + +echo "-- Prepend world's child with hello --\n"; +$dom = clone $dom_original; +$b_hello = $dom->firstChild->firstChild; +$b_world = $b_hello->nextSibling; +$b_world->firstChild->prepend($b_hello); +var_dump($dom->saveHTML()); + +echo "-- Prepend hello with itself --\n"; +$dom = clone $dom_original; +$b_hello = $dom->firstChild->firstChild; +try { + $b_hello->prepend($b_hello); +} catch (\DOMException $e) { + echo $e->getMessage(), "\n"; +} +var_dump($dom->saveHTML()); + +echo "-- Prepend world's i tag with the parent --\n"; +$dom = clone $dom_original; +$b_hello = $dom->firstChild->firstChild; +$b_world = $b_hello->nextSibling; +try { + $b_world->firstChild->prepend($b_world); +} catch (\DOMException $e) { + echo $e->getMessage(), "\n"; +} +var_dump($dom->saveHTML()); + +echo "-- Append from another document --\n"; +$dom = clone $dom_original; +$dom2 = new DOMDocument; +$dom2->loadXML('

other

'); +try { + $dom->firstChild->firstChild->prepend($dom2->firstChild); +} catch (\DOMException $e) { + echo $e->getMessage(), "\n"; +} +var_dump($dom2->saveHTML()); +var_dump($dom->saveHTML()); + +?> +--EXPECT-- +-- Prepend hello with world -- +string(39) "

worldhello

+" +-- Prepend hello with world's child -- +string(39) "

worldhello

+" +-- Prepend world's child with hello -- +string(39) "

helloworld

+" +-- Prepend hello with itself -- +Hierarchy Request Error +string(39) "

helloworld

+" +-- Prepend world's i tag with the parent -- +Hierarchy Request Error +string(39) "

helloworld

+" +-- Append from another document -- +Wrong Document Error +string(13) "

other

+" +string(39) "

helloworld

+" From d8102e6ba43f8affecad39d2ed9a0778304c3af2 Mon Sep 17 00:00:00 2001 From: nielsdos <7771979+nielsdos@users.noreply.github.com> Date: Mon, 29 May 2023 22:44:31 +0200 Subject: [PATCH 097/168] Remove unnecessary tree setting in dom_zvals_to_fragment() This is already done by xmlNewDocText(). Closes GH-11345. --- ext/dom/parentnode.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/ext/dom/parentnode.c b/ext/dom/parentnode.c index 9fdfc7818e8a4..4d0fffeb9e058 100644 --- a/ext/dom/parentnode.c +++ b/ext/dom/parentnode.c @@ -217,8 +217,6 @@ xmlNode* dom_zvals_to_fragment(php_libxml_ref_obj *document, xmlNode *contextNod } else if (Z_TYPE(nodes[i]) == IS_STRING) { newNode = xmlNewDocText(documentNode, (xmlChar *) Z_STRVAL(nodes[i])); - xmlSetTreeDoc(newNode, documentNode); - if (!xmlAddChild(fragment, newNode)) { xmlFreeNode(newNode); goto hierarchy_request_err; From c6655fb719c75b1db5e5e835e166d39b18aff2c0 Mon Sep 17 00:00:00 2001 From: nielsdos <7771979+nielsdos@users.noreply.github.com> Date: Mon, 29 May 2023 23:14:27 +0200 Subject: [PATCH 098/168] Implement dom_get_doc_props_read_only() I was surprised to see that getting the stricterror property showed in in the Callgrind profile of some tests. Turns out we sometimes allocate them. Fix this by returning the default in case no changes were made yet. Closes GH-11345. --- UPGRADING.INTERNALS | 5 +++++ ext/dom/document.c | 31 ++++++++++------------------ ext/dom/php_dom.c | 49 ++++++++++++++++++++++++--------------------- ext/dom/php_dom.h | 1 + 4 files changed, 43 insertions(+), 43 deletions(-) diff --git a/UPGRADING.INTERNALS b/UPGRADING.INTERNALS index b4675e22215e9..99b609a115b00 100644 --- a/UPGRADING.INTERNALS +++ b/UPGRADING.INTERNALS @@ -116,6 +116,11 @@ PHP 8.3 INTERNALS UPGRADE NOTES - The PHPAPI spl_iterator_apply() function now returns zend_result instead of int. There are no functional changes. + f. ext/dom + - A new function dom_get_doc_props_read_only() is added to gather the document + properties in a read-only way. This function avoids allocation when there are + no document properties changed yet. + ======================== 4. OpCode changes ======================== diff --git a/ext/dom/document.c b/ext/dom/document.c index c60198a3be110..13324645e987b 100644 --- a/ext/dom/document.c +++ b/ext/dom/document.c @@ -297,7 +297,7 @@ readonly=no int dom_document_format_output_read(dom_object *obj, zval *retval) { if (obj->document) { - dom_doc_propsptr doc_prop = dom_get_doc_props(obj->document); + libxml_doc_props const* doc_prop = dom_get_doc_props_read_only(obj->document); ZVAL_BOOL(retval, doc_prop->formatoutput); } else { ZVAL_FALSE(retval); @@ -322,7 +322,7 @@ readonly=no int dom_document_validate_on_parse_read(dom_object *obj, zval *retval) { if (obj->document) { - dom_doc_propsptr doc_prop = dom_get_doc_props(obj->document); + libxml_doc_props const* doc_prop = dom_get_doc_props_read_only(obj->document); ZVAL_BOOL(retval, doc_prop->validateonparse); } else { ZVAL_FALSE(retval); @@ -347,7 +347,7 @@ readonly=no int dom_document_resolve_externals_read(dom_object *obj, zval *retval) { if (obj->document) { - dom_doc_propsptr doc_prop = dom_get_doc_props(obj->document); + libxml_doc_props const* doc_prop = dom_get_doc_props_read_only(obj->document); ZVAL_BOOL(retval, doc_prop->resolveexternals); } else { ZVAL_FALSE(retval); @@ -372,7 +372,7 @@ readonly=no int dom_document_preserve_whitespace_read(dom_object *obj, zval *retval) { if (obj->document) { - dom_doc_propsptr doc_prop = dom_get_doc_props(obj->document); + libxml_doc_props const* doc_prop = dom_get_doc_props_read_only(obj->document); ZVAL_BOOL(retval, doc_prop->preservewhitespace); } else { ZVAL_FALSE(retval); @@ -397,7 +397,7 @@ readonly=no int dom_document_recover_read(dom_object *obj, zval *retval) { if (obj->document) { - dom_doc_propsptr doc_prop = dom_get_doc_props(obj->document); + libxml_doc_props const* doc_prop = dom_get_doc_props_read_only(obj->document); ZVAL_BOOL(retval, doc_prop->recover); } else { ZVAL_FALSE(retval); @@ -422,7 +422,7 @@ readonly=no int dom_document_substitue_entities_read(dom_object *obj, zval *retval) { if (obj->document) { - dom_doc_propsptr doc_prop = dom_get_doc_props(obj->document); + libxml_doc_props const* doc_prop = dom_get_doc_props_read_only(obj->document); ZVAL_BOOL(retval, doc_prop->substituteentities); } else { ZVAL_FALSE(retval); @@ -1176,7 +1176,6 @@ static xmlDocPtr dom_document_parser(zval *id, int mode, char *source, size_t so { xmlDocPtr ret; xmlParserCtxtPtr ctxt = NULL; - dom_doc_propsptr doc_props; dom_object *intern; php_libxml_ref_obj *document = NULL; int validate, recover, resolve_externals, keep_blanks, substitute_ent; @@ -1189,17 +1188,13 @@ static xmlDocPtr dom_document_parser(zval *id, int mode, char *source, size_t so document = intern->document; } - doc_props = dom_get_doc_props(document); + libxml_doc_props const* doc_props = dom_get_doc_props_read_only(document); validate = doc_props->validateonparse; resolve_externals = doc_props->resolveexternals; keep_blanks = doc_props->preservewhitespace; substitute_ent = doc_props->substituteentities; recover = doc_props->recover; - if (document == NULL) { - efree(doc_props); - } - xmlInitParser(); if (mode == DOM_LOAD_FILE) { @@ -1387,7 +1382,6 @@ PHP_METHOD(DOMDocument, save) size_t file_len = 0; int bytes, format, saveempty = 0; dom_object *intern; - dom_doc_propsptr doc_props; char *file; zend_long options = 0; @@ -1405,7 +1399,7 @@ PHP_METHOD(DOMDocument, save) /* encoding handled by property on doc */ - doc_props = dom_get_doc_props(intern->document); + libxml_doc_props const* doc_props = dom_get_doc_props_read_only(intern->document); format = doc_props->formatoutput; if (options & LIBXML_SAVE_NOEMPTYTAG) { saveempty = xmlSaveNoEmptyTags; @@ -1433,7 +1427,6 @@ PHP_METHOD(DOMDocument, saveXML) xmlBufferPtr buf; xmlChar *mem; dom_object *intern, *nodeobj; - dom_doc_propsptr doc_props; int size, format, saveempty = 0; zend_long options = 0; @@ -1444,7 +1437,7 @@ PHP_METHOD(DOMDocument, saveXML) DOM_GET_OBJ(docp, id, xmlDocPtr, intern); - doc_props = dom_get_doc_props(intern->document); + libxml_doc_props const* doc_props = dom_get_doc_props_read_only(intern->document); format = doc_props->formatoutput; if (nodep != NULL) { @@ -1928,7 +1921,6 @@ PHP_METHOD(DOMDocument, saveHTMLFile) size_t file_len; int bytes, format; dom_object *intern; - dom_doc_propsptr doc_props; char *file; const char *encoding; @@ -1947,7 +1939,7 @@ PHP_METHOD(DOMDocument, saveHTMLFile) encoding = (const char *) htmlGetMetaEncoding(docp); - doc_props = dom_get_doc_props(intern->document); + libxml_doc_props const* doc_props = dom_get_doc_props_read_only(intern->document); format = doc_props->formatoutput; bytes = htmlSaveFileFormat(file, docp, encoding, format); @@ -1969,7 +1961,6 @@ PHP_METHOD(DOMDocument, saveHTML) dom_object *intern, *nodeobj; xmlChar *mem = NULL; int format; - dom_doc_propsptr doc_props; id = ZEND_THIS; if (zend_parse_parameters(ZEND_NUM_ARGS(), @@ -1980,7 +1971,7 @@ PHP_METHOD(DOMDocument, saveHTML) DOM_GET_OBJ(docp, id, xmlDocPtr, intern); - doc_props = dom_get_doc_props(intern->document); + libxml_doc_props const* doc_props = dom_get_doc_props(intern->document); format = doc_props->formatoutput; if (nodep != NULL) { diff --git a/ext/dom/php_dom.c b/ext/dom/php_dom.c index 00725d3fb00fb..e02b0973291c5 100644 --- a/ext/dom/php_dom.c +++ b/ext/dom/php_dom.c @@ -140,6 +140,17 @@ int dom_node_children_valid(xmlNodePtr node) { } /* }}} end dom_node_children_valid */ +static const libxml_doc_props default_doc_props = { + .formatoutput = false, + .validateonparse = false, + .resolveexternals = false, + .preservewhitespace = true, + .substituteentities = false, + .stricterror = true, + .recover = false, + .classmap = NULL, +}; + /* {{{ dom_get_doc_props() */ dom_doc_propsptr dom_get_doc_props(php_libxml_ref_obj *document) { @@ -149,28 +160,31 @@ dom_doc_propsptr dom_get_doc_props(php_libxml_ref_obj *document) return document->doc_props; } else { doc_props = emalloc(sizeof(libxml_doc_props)); - doc_props->formatoutput = 0; - doc_props->validateonparse = 0; - doc_props->resolveexternals = 0; - doc_props->preservewhitespace = 1; - doc_props->substituteentities = 0; - doc_props->stricterror = 1; - doc_props->recover = 0; - doc_props->classmap = NULL; + memcpy(doc_props, &default_doc_props, sizeof(libxml_doc_props)); if (document) { document->doc_props = doc_props; } return doc_props; } } +/* }}} */ + +libxml_doc_props const* dom_get_doc_props_read_only(const php_libxml_ref_obj *document) +{ + if (document && document->doc_props) { + return document->doc_props; + } else { + return &default_doc_props; + } +} static void dom_copy_doc_props(php_libxml_ref_obj *source_doc, php_libxml_ref_obj *dest_doc) { - dom_doc_propsptr source, dest; + dom_doc_propsptr dest; if (source_doc && dest_doc) { - source = dom_get_doc_props(source_doc); + libxml_doc_props const* source = dom_get_doc_props_read_only(source_doc); dest = dom_get_doc_props(dest_doc); dest->formatoutput = source->formatoutput; @@ -212,10 +226,8 @@ void dom_set_doc_classmap(php_libxml_ref_obj *document, zend_class_entry *basece zend_class_entry *dom_get_doc_classmap(php_libxml_ref_obj *document, zend_class_entry *basece) { - dom_doc_propsptr doc_props; - if (document) { - doc_props = dom_get_doc_props(document); + libxml_doc_props const* doc_props = dom_get_doc_props_read_only(document); if (doc_props->classmap) { zend_class_entry *ce = zend_hash_find_ptr(doc_props->classmap, basece->name); if (ce) { @@ -230,16 +242,7 @@ zend_class_entry *dom_get_doc_classmap(php_libxml_ref_obj *document, zend_class_ /* {{{ dom_get_strict_error() */ int dom_get_strict_error(php_libxml_ref_obj *document) { - int stricterror; - dom_doc_propsptr doc_props; - - doc_props = dom_get_doc_props(document); - stricterror = doc_props->stricterror; - if (document == NULL) { - efree(doc_props); - } - - return stricterror; + return dom_get_doc_props_read_only(document)->stricterror; } /* }}} */ diff --git a/ext/dom/php_dom.h b/ext/dom/php_dom.h index fdfdd4e7a31ca..a7ae09384cfdc 100644 --- a/ext/dom/php_dom.h +++ b/ext/dom/php_dom.h @@ -97,6 +97,7 @@ typedef struct { dom_object *dom_object_get_data(xmlNodePtr obj); dom_doc_propsptr dom_get_doc_props(php_libxml_ref_obj *document); +libxml_doc_props const* dom_get_doc_props_read_only(const php_libxml_ref_obj *document); zend_object *dom_objects_new(zend_class_entry *class_type); zend_object *dom_nnodemap_objects_new(zend_class_entry *class_type); #ifdef LIBXML_XPATH_ENABLED From c50172e8121246cd11df768dae605129a3b3b19c Mon Sep 17 00:00:00 2001 From: Yuya Hamada Date: Mon, 29 May 2023 11:22:44 +0900 Subject: [PATCH 099/168] Fix mb_strlen is wrong length for CP932 when 0x80. --- ext/mbstring/libmbfl/filters/mbfilter_cp932.c | 4 ++-- ext/mbstring/tests/mb_strlen.phpt | 8 ++++++++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/ext/mbstring/libmbfl/filters/mbfilter_cp932.c b/ext/mbstring/libmbfl/filters/mbfilter_cp932.c index 54f93f91fe207..c0732b7cf92be 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_cp932.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_cp932.c @@ -65,7 +65,7 @@ static int mbfl_filt_conv_cp932_wchar_flush(mbfl_convert_filter *filter); -static const unsigned char mblen_table_sjis[] = { /* 0x80-0x9f,0xE0-0xFF */ +static const unsigned char mblen_table_sjis[] = { /* 0x81-0x9f,0xE0-0xFF */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, @@ -74,7 +74,7 @@ static const unsigned char mblen_table_sjis[] = { /* 0x80-0x9f,0xE0-0xFF */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, diff --git a/ext/mbstring/tests/mb_strlen.phpt b/ext/mbstring/tests/mb_strlen.phpt index 5ebfcd1aec065..81cacaf197763 100644 --- a/ext/mbstring/tests/mb_strlen.phpt +++ b/ext/mbstring/tests/mb_strlen.phpt @@ -35,6 +35,11 @@ print "-- Testing illegal bytes 0x80,0xFD-FF --\n"; print mb_strlen("\x80\xA1", 'SJIS') . "\n"; print mb_strlen("abc\xFD\xFE\xFF", 'SJIS') . "\n"; +echo "== CP932 ==\n"; +print mb_strlen("\x80\xA1", "CP932") . "\n"; +// 0xFD, 0xFE, 0xFF is reserved. +print mb_strlen("abc\xFD\xFE\xFF", 'CP932') . "\n"; + echo "== MacJapanese ==\n"; print mb_strlen("\x80\xA1", 'MacJapanese') . "\n"; print mb_strlen("abc\xFD\xFE\xFF", 'MacJapanese') . "\n"; @@ -91,6 +96,9 @@ try { -- Testing illegal bytes 0x80,0xFD-FF -- 2 6 +== CP932 == +2 +5 == MacJapanese == 2 6 From c6ae7a55b75f67a23b3b25c4c1573b7884b1ff4d Mon Sep 17 00:00:00 2001 From: James Lucas Date: Tue, 16 May 2023 10:37:42 +1000 Subject: [PATCH 100/168] Fix bug GH-11246 cli/get_set_process_title Fail to clobber_error only when the argv is a non-contiguous area Don't increment the end_of_error if a non-contiguous area is encountered in environ Closes GH-11247 --- NEWS | 4 ++++ sapi/cli/ps_title.c | 15 ++++++++------- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/NEWS b/NEWS index 42c54e587de16..894d23c2375ae 100644 --- a/NEWS +++ b/NEWS @@ -2,6 +2,10 @@ PHP NEWS ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| ?? ??? ????, PHP 8.1.21 +- CLI: + . Fixed bug GH-11246 (cli/get_set_process_title fails on MacOS). + (James Lucas) + - Core: . Fixed build for the riscv64 architecture/GCC 12. (Daniil Gentili) diff --git a/sapi/cli/ps_title.c b/sapi/cli/ps_title.c index 7b00348be67cc..01a8d05c4c1e9 100644 --- a/sapi/cli/ps_title.c +++ b/sapi/cli/ps_title.c @@ -167,19 +167,20 @@ char** save_ps_args(int argc, char** argv) end_of_area = argv[i] + strlen(argv[i]); } + if (non_contiguous_area != 0) { + goto clobber_error; + } + /* * check for contiguous environ strings following argv */ - for (i = 0; (non_contiguous_area == 0) && (environ[i] != NULL); i++) + for (i = 0; environ[i] != NULL; i++) { - if (end_of_area + 1 != environ[i]) - non_contiguous_area = 1; - end_of_area = environ[i] + strlen(environ[i]); + if (end_of_area + 1 == environ[i]) { + end_of_area = environ[i] + strlen(environ[i]); + } } - if (non_contiguous_area != 0) - goto clobber_error; - ps_buffer = argv[0]; ps_buffer_size = end_of_area - argv[0]; From 781277210553b9276939dc899790f39b93290268 Mon Sep 17 00:00:00 2001 From: nielsdos <7771979+nielsdos@users.noreply.github.com> Date: Tue, 30 May 2023 17:20:04 +0200 Subject: [PATCH 101/168] Fix GH-11347: Memory leak when calling a static method inside an xpath query It's a type confusion bug. `zend_make_callable` may change the function name of the fci to become an array, causing a crash in debug mode on `zval_ptr_dtor_str(&fci.function_name);` in `dom_xpath_ext_function_php`. On a production build it doesn't crash but only causes a leak, because the array elements are not destroyed, only the array container itself is. We can use the nogc variant because it cannot contain cycles, the potential array can only contain 2 strings. Closes GH-11350. --- NEWS | 2 ++ ext/dom/tests/gh11347.phpt | 26 ++++++++++++++++++++++++++ ext/dom/xpath.c | 2 +- 3 files changed, 29 insertions(+), 1 deletion(-) create mode 100644 ext/dom/tests/gh11347.phpt diff --git a/NEWS b/NEWS index 894d23c2375ae..f2cc5b1be96a1 100644 --- a/NEWS +++ b/NEWS @@ -19,6 +19,8 @@ PHP NEWS DOMDocument::getElementsByTagNameNS. (nielsdos) . Fix DOMElement::append() and DOMElement::prepend() hierarchy checks. (nielsdos) + . Fixed bug GH-11347 (Memory leak when calling a static method inside an + xpath query). (nielsdos) - Opcache: . Fix allocation loop in zend_shared_alloc_startup(). (nielsdos) diff --git a/ext/dom/tests/gh11347.phpt b/ext/dom/tests/gh11347.phpt new file mode 100644 index 0000000000000..189231f925081 --- /dev/null +++ b/ext/dom/tests/gh11347.phpt @@ -0,0 +1,26 @@ +--TEST-- +GH-11347 (Memory leak when calling a static method inside an xpath query) +--EXTENSIONS-- +dom +--FILE-- +loadHTML('hello'); +$xpath = new DOMXpath($doc); +$xpath->registerNamespace("php", "http://php.net/xpath"); +$xpath->registerPHPFunctions(); +$xpath->query("//a[php:function('MyClass::dump', string(@href))]"); + +?> +Done +--EXPECT-- +string(15) "https://php.net" +Done diff --git a/ext/dom/xpath.c b/ext/dom/xpath.c index 876d8b00dae0e..f546733a436d1 100644 --- a/ext/dom/xpath.c +++ b/ext/dom/xpath.c @@ -182,7 +182,7 @@ static void dom_xpath_ext_function_php(xmlXPathParserContextPtr ctxt, int nargs, } cleanup: zend_string_release_ex(callable, 0); - zval_ptr_dtor_str(&fci.function_name); + zval_ptr_dtor_nogc(&fci.function_name); if (fci.param_count > 0) { for (i = 0; i < nargs - 1; i++) { zval_ptr_dtor(&fci.params[i]); From 82e761eaacc49baf19a52d1cb4c100e299da12e2 Mon Sep 17 00:00:00 2001 From: George Peter Banyard Date: Thu, 1 Jun 2023 13:27:46 +0100 Subject: [PATCH 102/168] Fix [-Wenum-int-mismatch] compiler warnings (#11352) --- ext/bcmath/libbcmath/src/bcmath.h | 2 +- ext/ffi/ffi_parser.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ext/bcmath/libbcmath/src/bcmath.h b/ext/bcmath/libbcmath/src/bcmath.h index 4e32a3cbacacb..6ce1350956417 100644 --- a/ext/bcmath/libbcmath/src/bcmath.h +++ b/ext/bcmath/libbcmath/src/bcmath.h @@ -129,7 +129,7 @@ int bc_modulo(bc_num num1, bc_num num2, bc_num *resul, int scale); int bc_divmod(bc_num num1, bc_num num2, bc_num *quo, bc_num *rem, int scale); -int bc_raisemod(bc_num base, bc_num expo, bc_num mo, bc_num *result, int scale); +zend_result bc_raisemod(bc_num base, bc_num expo, bc_num mo, bc_num *result, int scale); void bc_raise(bc_num num1, bc_num num2, bc_num *resul, int scale); diff --git a/ext/ffi/ffi_parser.c b/ext/ffi/ffi_parser.c index eca10c27d195b..b956f885ee001 100644 --- a/ext/ffi/ffi_parser.c +++ b/ext/ffi/ffi_parser.c @@ -3552,7 +3552,7 @@ static void parse(void) { } } -int zend_ffi_parse_decl(const char *str, size_t len) { +zend_result zend_ffi_parse_decl(const char *str, size_t len) { if (SETJMP(FFI_G(bailout))==0) { FFI_G(allow_vla) = 0; FFI_G(attribute_parsing) = 0; @@ -3565,7 +3565,7 @@ int zend_ffi_parse_decl(const char *str, size_t len) { } } -int zend_ffi_parse_type(const char *str, size_t len, zend_ffi_dcl *dcl) { +zend_result zend_ffi_parse_type(const char *str, size_t len, zend_ffi_dcl *dcl) { int sym; if (SETJMP(FFI_G(bailout))==0) { From 15402454a6f6a70f3a5688a4ebe99614336a353f Mon Sep 17 00:00:00 2001 From: George Peter Banyard Date: Fri, 2 Jun 2023 10:27:46 +0100 Subject: [PATCH 103/168] ext/standard/array.c: Optimize min/max functions for int/float (#11194) Co-authored-by: Niels Dossche <7771979+nielsdos@users.noreply.github.com> --- ext/standard/array.c | 106 ++++++++++++++++-- .../array/max_int_float_optimisation.phpt | 61 ++++++++++ .../array/min_int_float_optimisation.phpt | 61 ++++++++++ 3 files changed, 218 insertions(+), 10 deletions(-) create mode 100644 ext/standard/tests/array/max_int_float_optimisation.phpt create mode 100644 ext/standard/tests/array/min_int_float_optimisation.phpt diff --git a/ext/standard/array.c b/ext/standard/array.c index 46c2c882b83d6..6bb146eb46888 100644 --- a/ext/standard/array.c +++ b/ext/standard/array.c @@ -1233,15 +1233,58 @@ PHP_FUNCTION(min) } } else { /* mixed min ( mixed $value1 , mixed $value2 [, mixed $value3... ] ) */ - zval *min, result; + zval *min; uint32_t i; min = &args[0]; + zend_long min_lval; + double min_dval; - for (i = 1; i < argc; i++) { - is_smaller_function(&result, &args[i], min); - if (Z_TYPE(result) == IS_TRUE) { - min = &args[i]; + if (Z_TYPE_P(min) == IS_LONG) { + min_lval = Z_LVAL_P(min); + + for (i = 1; i < argc; i++) { + if (EXPECTED(Z_TYPE(args[i]) == IS_LONG)) { + if (min_lval > Z_LVAL(args[i])) { + min_lval = Z_LVAL(args[i]); + min = &args[i]; + } + } else if (Z_TYPE(args[i]) == IS_DOUBLE && (zend_dval_to_lval((double) min_lval) == min_lval)) { + /* if min_lval can be exactly represented as a double, go to double dedicated code */ + min_dval = (double) min_lval; + goto double_compare; + } else { + goto generic_compare; + } + } + + RETURN_LONG(min_lval); + } else if (Z_TYPE_P(min) == IS_DOUBLE) { + min_dval = Z_DVAL_P(min); + + for (i = 1; i < argc; i++) { + if (EXPECTED(Z_TYPE(args[i]) == IS_DOUBLE)) { + double_compare: + if (min_dval > Z_DVAL(args[i])) { + min_dval = Z_DVAL(args[i]); + min = &args[i]; + } + } else if (Z_TYPE(args[i]) == IS_LONG && (zend_dval_to_lval((double) Z_LVAL(args[i])) == Z_LVAL(args[i]))) { + /* if the value can be exactly represented as a double, use double dedicated code otherwise generic */ + if (min_dval > (double)Z_LVAL(args[i])) { + min_dval = (double)Z_LVAL(args[i]); + min = &args[i]; + } + } else { + goto generic_compare; + } + } + } else { + for (i = 1; i < argc; i++) { + generic_compare: + if (zend_compare(&args[i], min) < 0) { + min = &args[i]; + } } } @@ -1279,15 +1322,58 @@ PHP_FUNCTION(max) } } else { /* mixed max ( mixed $value1 , mixed $value2 [, mixed $value3... ] ) */ - zval *max, result; + zval *max; uint32_t i; max = &args[0]; + zend_long max_lval; + double max_dval; - for (i = 1; i < argc; i++) { - is_smaller_or_equal_function(&result, &args[i], max); - if (Z_TYPE(result) == IS_FALSE) { - max = &args[i]; + if (Z_TYPE_P(max) == IS_LONG) { + max_lval = Z_LVAL_P(max); + + for (i = 1; i < argc; i++) { + if (EXPECTED(Z_TYPE(args[i]) == IS_LONG)) { + if (max_lval < Z_LVAL(args[i])) { + max_lval = Z_LVAL(args[i]); + max = &args[i]; + } + } else if (Z_TYPE(args[i]) == IS_DOUBLE && (zend_dval_to_lval((double) max_lval) == max_lval)) { + /* if max_lval can be exactly represented as a double, go to double dedicated code */ + max_dval = (double) max_lval; + goto double_compare; + } else { + goto generic_compare; + } + } + + RETURN_LONG(max_lval); + } else if (Z_TYPE_P(max) == IS_DOUBLE) { + max_dval = Z_DVAL_P(max); + + for (i = 1; i < argc; i++) { + if (EXPECTED(Z_TYPE(args[i]) == IS_DOUBLE)) { + double_compare: + if (max_dval < Z_DVAL(args[i])) { + max_dval = Z_DVAL(args[i]); + max = &args[i]; + } + } else if (Z_TYPE(args[i]) == IS_LONG && (zend_dval_to_lval((double) Z_LVAL(args[i])) == Z_LVAL(args[i]))) { + /* if the value can be exactly represented as a double, use double dedicated code otherwise generic */ + if (max_dval < (double)Z_LVAL(args[i])) { + max_dval = (double)Z_LVAL(args[i]); + max = &args[i]; + } + } else { + goto generic_compare; + } + } + } else { + for (i = 1; i < argc; i++) { + generic_compare: + if (zend_compare(&args[i], max) > 0) { + max = &args[i]; + } } } diff --git a/ext/standard/tests/array/max_int_float_optimisation.phpt b/ext/standard/tests/array/max_int_float_optimisation.phpt new file mode 100644 index 0000000000000..0f5df35d12a7c --- /dev/null +++ b/ext/standard/tests/array/max_int_float_optimisation.phpt @@ -0,0 +1,61 @@ +--TEST-- +Check max() optimisation for int and float types +--SKIPIF-- + +--FILE-- + +--EXPECT-- +Start as int optimisation: +int(10) +int(10) +int(10) +int(10) +int(10) +int(10) +string(2) "15" +Check that int not representable as float works: +int(-9223372036854775807) +float(1.8446744073709552E+19) +float(INF) +Start as float optimisation: +float(10.5) +float(10.5) +float(10.5) +float(10.5) +float(10.5) +float(10.5) +string(4) "15.5" +Check that int not representable as float works: +int(-9223372036854775807) +float(1.8446744073709552E+19) +float(INF) diff --git a/ext/standard/tests/array/min_int_float_optimisation.phpt b/ext/standard/tests/array/min_int_float_optimisation.phpt new file mode 100644 index 0000000000000..e383b833694c7 --- /dev/null +++ b/ext/standard/tests/array/min_int_float_optimisation.phpt @@ -0,0 +1,61 @@ +--TEST-- +Check min() optimisation for int and float types +--SKIPIF-- + +--FILE-- + +--EXPECT-- +Start as int optimisation: +int(2) +int(2) +int(2) +int(2) +int(2) +int(2) +string(1) "1" +Check that int not representable as float works: +int(9223372036854775806) +float(-1.8446744073709552E+19) +int(9223372036854775806) +Start as float optimisation: +float(2.5) +float(2.5) +float(2.5) +float(2.5) +float(2.5) +float(2.5) +string(3) "1.5" +Check that int not representable as float works: +int(9223372036854775806) +float(-1.8446744073709552E+19) +int(9223372036854775806) From f8faa8b2eeb5c193a98f6d0196ba0c4a95729417 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Fri, 2 Jun 2023 12:58:00 +0200 Subject: [PATCH 104/168] Use zval_ptr_dtor_nogc() for callable in ext/xslt (#11356) It cannot contain cycles because it's either a string or an array with 2 strings. --- ext/xsl/xsltprocessor.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ext/xsl/xsltprocessor.c b/ext/xsl/xsltprocessor.c index 7f3d1e0e42170..5f1482ea31b67 100644 --- a/ext/xsl/xsltprocessor.c +++ b/ext/xsl/xsltprocessor.c @@ -289,7 +289,7 @@ static void xsl_ext_function_php(xmlXPathParserContextPtr ctxt, int nargs, int t zval_ptr_dtor(&retval); } zend_string_release_ex(callable, 0); - zval_ptr_dtor(&handler); + zval_ptr_dtor_nogc(&handler); if (fci.param_count > 0) { for (i = 0; i < nargs - 1; i++) { zval_ptr_dtor(&args[i]); From 79128afb927ab45d6cc40e82c2de88ca5a4effcf Mon Sep 17 00:00:00 2001 From: Pierrick Charron Date: Fri, 2 Jun 2023 15:05:49 -0400 Subject: [PATCH 105/168] [skip ci] Update release process to publish php-keyring.gpg in web-php --- docs/release-process.md | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/docs/release-process.md b/docs/release-process.md index a082aabc65bef..1a60d1ee7035e 100644 --- a/docs/release-process.md +++ b/docs/release-process.md @@ -988,6 +988,20 @@ volunteers to begin the selection process for the next release managers. git push ``` + `web-php-distributions` is a submodule of `web-php`. You'll now have to update + the commit reference to reflect the change made in web-php-distributions. + + ```shell + cd /path/to/repos/php/web-php + git submodule update + cd distributions # This is the submodule refering to web-php-distributions + git pull origin master + cd .. + git add distributions + git commit --gpg-sign=YOURKEYID -m "Update php-keyring.gpg in distributions" + git push + ``` + 4. Request moderation access to php-announce@lists.php.net and primary-qa-tester@lists.php.net lists, so you are able to moderate your release announcements. All the announcements should be sent from your From 810507ab1b68a92a9fa9bb8a06ae5fe2b8b7992b Mon Sep 17 00:00:00 2001 From: George Peter Banyard Date: Fri, 2 Jun 2023 11:22:30 +0100 Subject: [PATCH 106/168] http_fopen_wrapper: fix [-Wanalyzer-deref-before-check] MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit warning: check of ‘*resource.scheme’ for NULL after already dereferencing it [-Wanalyzer-deref-before-check] 186 | use_ssl = resource->scheme && (ZSTR_LEN(resource->scheme) > 4) && ZSTR_VAL(resource->scheme)[4] == 's'; | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Although resource->scheme is already dereferenced on line 163 in the IF condition --- ext/standard/http_fopen_wrapper.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ext/standard/http_fopen_wrapper.c b/ext/standard/http_fopen_wrapper.c index c382ba4116c09..daaaa41b00f9b 100644 --- a/ext/standard/http_fopen_wrapper.c +++ b/ext/standard/http_fopen_wrapper.c @@ -160,6 +160,7 @@ static php_stream *php_stream_url_wrap_http_ex(php_stream_wrapper *wrapper, return NULL; } + ZEND_ASSERT(resource->scheme); if (!zend_string_equals_literal_ci(resource->scheme, "http") && !zend_string_equals_literal_ci(resource->scheme, "https")) { if (!context || @@ -183,7 +184,7 @@ static php_stream *php_stream_url_wrap_http_ex(php_stream_wrapper *wrapper, return NULL; } - use_ssl = resource->scheme && (ZSTR_LEN(resource->scheme) > 4) && ZSTR_VAL(resource->scheme)[4] == 's'; + use_ssl = (ZSTR_LEN(resource->scheme) > 4) && ZSTR_VAL(resource->scheme)[4] == 's'; /* choose default ports */ if (use_ssl && resource->port == 0) resource->port = 443; From 13ad8ef40bd19734c504c30e0b0cd7608f0c38a6 Mon Sep 17 00:00:00 2001 From: George Peter Banyard Date: Fri, 2 Jun 2023 17:39:55 +0100 Subject: [PATCH 107/168] memory stream: fix [-Wanalyzer-deref-before-check] | 732 | ts->mode = mode && mode[0] == 'r' && mode[1] != '+' ? TEMP_STREAM_READONLY : 0; | | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^~~ Although mode is already dereference on line 723 in the call to strlen() --- main/streams/memory.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/main/streams/memory.c b/main/streams/memory.c index f53084a6c3a77..444f963761729 100644 --- a/main/streams/memory.c +++ b/main/streams/memory.c @@ -349,7 +349,7 @@ static ssize_t php_stream_temp_write(php_stream *stream, const char *buf, size_t } if (php_stream_is(ts->innerstream, PHP_STREAM_IS_MEMORY)) { zend_off_t pos = php_stream_tell(ts->innerstream); - + if (pos + count >= ts->smax) { zend_string *membuf = php_stream_memory_get_buffer(ts->innerstream); php_stream *file = php_stream_fopen_temporary_file(ts->tmpdir, "php", NULL); @@ -614,6 +614,8 @@ static php_stream * php_stream_url_wrap_rfc2397(php_stream_wrapper *wrapper, con int base64 = 0; zend_string *base64_comma = NULL; + ZEND_ASSERT(mode); + ZVAL_NULL(&meta); if (memcmp(path, "data:", 5)) { return NULL; @@ -729,7 +731,7 @@ static php_stream * php_stream_url_wrap_rfc2397(php_stream_wrapper *wrapper, con stream->ops = &php_stream_rfc2397_ops; ts = (php_stream_temp_data*)stream->abstract; assert(ts != NULL); - ts->mode = mode && mode[0] == 'r' && mode[1] != '+' ? TEMP_STREAM_READONLY : 0; + ts->mode = mode[0] == 'r' && mode[1] != '+' ? TEMP_STREAM_READONLY : 0; ZVAL_COPY_VALUE(&ts->meta, &meta); } if (base64_comma) { From c5d7264149ef9263bad139c27bc9497aae518a0e Mon Sep 17 00:00:00 2001 From: George Peter Banyard Date: Fri, 2 Jun 2023 17:54:58 +0100 Subject: [PATCH 108/168] Fix file descriptor check Technically 0 is a valid file descriptor --- Zend/zend_gdb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Zend/zend_gdb.c b/Zend/zend_gdb.c index 02afb6bc6f7bc..82e8182ba822a 100644 --- a/Zend/zend_gdb.c +++ b/Zend/zend_gdb.c @@ -113,7 +113,7 @@ ZEND_API bool zend_gdb_present(void) #if defined(__linux__) /* netbsd while having this procfs part, does not hold the tracer pid */ int fd = open("/proc/self/status", O_RDONLY); - if (fd > 0) { + if (fd >= 0) { char buf[1024]; ssize_t n = read(fd, buf, sizeof(buf) - 1); char *s; From ce724d186de92612fadb6787fd2e8f2f2f76eed5 Mon Sep 17 00:00:00 2001 From: George Peter Banyard Date: Fri, 2 Jun 2023 17:59:19 +0100 Subject: [PATCH 109/168] Assert zend_constant exist If not the enum case does not exist and it is a programming error --- Zend/zend_enum.c | 1 + 1 file changed, 1 insertion(+) diff --git a/Zend/zend_enum.c b/Zend/zend_enum.c index 21628f74956bb..770beb7320ddb 100644 --- a/Zend/zend_enum.c +++ b/Zend/zend_enum.c @@ -597,6 +597,7 @@ ZEND_API void zend_enum_add_case_cstr(zend_class_entry *ce, const char *name, zv ZEND_API zend_object *zend_enum_get_case(zend_class_entry *ce, zend_string *name) { zend_class_constant *c = zend_hash_find_ptr(CE_CONSTANTS_TABLE(ce), name); + ZEND_ASSERT(c && "Must be a valid enum case"); ZEND_ASSERT(ZEND_CLASS_CONST_FLAGS(c) & ZEND_CLASS_CONST_IS_CASE); if (Z_TYPE(c->value) == IS_CONSTANT_AST) { From c3f0797385b86a67543631e5ab9d78d76c08b421 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Sat, 3 Jun 2023 00:13:14 +0200 Subject: [PATCH 110/168] Implement iteration cache, item cache and length cache for node list iteration (#11330) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Implement iteration cache, item cache and length cache for node list iteration The current implementation follows the spec requirement that the list must be "live". This means that changes in the document must be reflected in the existing node lists without requiring the user to refetch the node list. The consequence is that getting any item, or the length of the list, always starts searching from the root element of the node list. This results in O(n) time to get any item or the length. If there's a for loop over the node list, this means the iterations will take O(n²) time in total. This causes real-world performance issues with potential for downtime (see GH-11308 and its references for details). We fix this by introducing a caching strategy. We cache the last iterated object in the iterator, the last requested item in the node list, and the last length computation. To invalidate the cache, we simply count the number of modifications made to the containing document. If the modification number does not match what the number was during caching, we know the document has been modified and the cache is invalid. If this ever overflows, we saturate the modification number and don't do any caching anymore. Note that we don't check for overflow on 64-bit systems because it would take hundreds of years to overflow. Fixes GH-11308. --- NEWS | 3 + UPGRADING.INTERNALS | 18 +++ ext/dom/document.c | 35 ++++-- ext/dom/documenttype.c | 4 +- ext/dom/dom_iterators.c | 43 ++++--- ext/dom/element.c | 9 +- ext/dom/node.c | 18 ++- ext/dom/nodelist.c | 106 +++++++++++++++--- ext/dom/parentnode.c | 10 ++ ext/dom/php_dom.c | 82 +++++++++++--- ext/dom/php_dom.h | 38 ++++++- ext/dom/processinginstruction.c | 2 + ...ocument_getElementsByTagName_liveness.phpt | 47 ++++++++ ...tElementsByTagName_liveness_simplexml.phpt | 29 +++++ ...tElementsByTagName_liveness_tree_walk.phpt | 89 +++++++++++++++ ...tsByTagName_liveness_write_properties.phpt | 43 +++++++ ...etElementsByTagName_liveness_xinclude.phpt | 43 +++++++ .../DOMDocument_item_cache_invalidation.phpt | 69 ++++++++++++ ...DOMDocument_length_cache_invalidation.phpt | 34 ++++++ ...ocument_liveness_caching_invalidation.phpt | 43 +++++++ ...getElementsByTagName_without_document.phpt | 16 +++ ext/libxml/libxml.c | 8 +- ext/libxml/php_libxml.h | 31 +++++ ext/simplexml/simplexml.c | 6 + 24 files changed, 762 insertions(+), 64 deletions(-) create mode 100644 ext/dom/tests/DOMDocument_getElementsByTagName_liveness.phpt create mode 100644 ext/dom/tests/DOMDocument_getElementsByTagName_liveness_simplexml.phpt create mode 100644 ext/dom/tests/DOMDocument_getElementsByTagName_liveness_tree_walk.phpt create mode 100644 ext/dom/tests/DOMDocument_getElementsByTagName_liveness_write_properties.phpt create mode 100644 ext/dom/tests/DOMDocument_getElementsByTagName_liveness_xinclude.phpt create mode 100644 ext/dom/tests/DOMDocument_item_cache_invalidation.phpt create mode 100644 ext/dom/tests/DOMDocument_length_cache_invalidation.phpt create mode 100644 ext/dom/tests/DOMDocument_liveness_caching_invalidation.phpt create mode 100644 ext/dom/tests/DOMElement_getElementsByTagName_without_document.phpt diff --git a/NEWS b/NEWS index 53e4fe73519b2..808f3ed806101 100644 --- a/NEWS +++ b/NEWS @@ -44,6 +44,9 @@ PHP NEWS - Date: . Implement More Appropriate Date/Time Exceptions RFC. (Derick) +- DOM: + . Fix bug GH-11308 (getElementsByTagName() is O(N^2)). (nielsdos) + - Exif: . Removed unneeded codepaths in exif_process_TIFF_in_JPEG(). (nielsdos) diff --git a/UPGRADING.INTERNALS b/UPGRADING.INTERNALS index 99b609a115b00..6db2d99ec59b9 100644 --- a/UPGRADING.INTERNALS +++ b/UPGRADING.INTERNALS @@ -120,6 +120,24 @@ PHP 8.3 INTERNALS UPGRADE NOTES - A new function dom_get_doc_props_read_only() is added to gather the document properties in a read-only way. This function avoids allocation when there are no document properties changed yet. + - The node list returned by DOMNode::getElementsByTagName() and + DOMNode::getElementsByTagNameNS() now caches the length and the last requested item. + This means that the length and the last requested item are not recalculated + when the node list is iterated over multiple times. + If you do not use the internal PHP dom APIs to modify the document, you need to + manually invalidate the cache using php_libxml_invalidate_node_list_cache_from_doc(). + Furthermore, the following internal APIs were added to handle the cache: + . php_dom_is_cache_tag_stale_from_doc_ptr() + . php_dom_is_cache_tag_stale_from_node() + . php_dom_mark_cache_tag_up_to_date_from_node() + - The function dom_get_elements_by_tag_name_ns_raw() has an additional parameter to indicate + the base node of the node list. This function also no longer accepts -1 as the index argument. + - The function dom_namednode_iter() has additional arguments to avoid recomputing the length of + the strings. + + g. ext/libxml + - Two new functions: php_libxml_invalidate_node_list_cache_from_doc() and + php_libxml_invalidate_node_list_cache() were added to invalidate the cache of a node list. ======================== 4. OpCode changes diff --git a/ext/dom/document.c b/ext/dom/document.c index 13324645e987b..06c4b2b97b9e3 100644 --- a/ext/dom/document.c +++ b/ext/dom/document.c @@ -777,7 +777,6 @@ PHP_METHOD(DOMDocument, getElementsByTagName) size_t name_len; dom_object *intern, *namednode; char *name; - xmlChar *local; id = ZEND_THIS; if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", &name, &name_len) == FAILURE) { @@ -788,8 +787,7 @@ PHP_METHOD(DOMDocument, getElementsByTagName) php_dom_create_iterator(return_value, DOM_NODELIST); namednode = Z_DOMOBJ_P(return_value); - local = xmlCharStrndup(name, name_len); - dom_namednode_iter(intern, 0, namednode, NULL, local, NULL); + dom_namednode_iter(intern, 0, namednode, NULL, name, name_len, NULL, 0); } /* }}} end dom_document_get_elements_by_tag_name */ @@ -847,6 +845,8 @@ PHP_METHOD(DOMDocument, importNode) } } + php_libxml_invalidate_node_list_cache_from_doc(docp); + DOM_RET_OBJ((xmlNodePtr) retnodep, &ret, intern); } /* }}} end dom_document_import_node */ @@ -991,7 +991,6 @@ PHP_METHOD(DOMDocument, getElementsByTagNameNS) size_t uri_len, name_len; dom_object *intern, *namednode; char *uri, *name; - xmlChar *local, *nsuri; id = ZEND_THIS; if (zend_parse_parameters(ZEND_NUM_ARGS(), "s!s", &uri, &uri_len, &name, &name_len) == FAILURE) { @@ -1002,9 +1001,7 @@ PHP_METHOD(DOMDocument, getElementsByTagNameNS) php_dom_create_iterator(return_value, DOM_NODELIST); namednode = Z_DOMOBJ_P(return_value); - local = xmlCharStrndup(name, name_len); - nsuri = xmlCharStrndup(uri ? uri : "", uri_len); - dom_namednode_iter(intern, 0, namednode, NULL, local, nsuri); + dom_namednode_iter(intern, 0, namednode, NULL, name, name_len, uri ? uri : "", uri_len); } /* }}} end dom_document_get_elements_by_tag_name_ns */ @@ -1070,6 +1067,8 @@ PHP_METHOD(DOMDocument, normalizeDocument) DOM_GET_OBJ(docp, id, xmlDocPtr, intern); + php_libxml_invalidate_node_list_cache_from_doc(docp); + dom_normalize((xmlNodePtr) docp); } /* }}} end dom_document_normalize_document */ @@ -1328,10 +1327,14 @@ static void dom_parse_document(INTERNAL_FUNCTION_PARAMETERS, int mode) { if (id != NULL) { intern = Z_DOMOBJ_P(id); + size_t old_modification_nr = 0; if (intern != NULL) { docp = (xmlDocPtr) dom_object_get_node(intern); doc_prop = NULL; if (docp != NULL) { + const php_libxml_doc_ptr *doc_ptr = docp->_private; + ZEND_ASSERT(doc_ptr != NULL); /* Must exist, we have a document */ + old_modification_nr = doc_ptr->cache_tag.modification_nr; php_libxml_decrement_node_ptr((php_libxml_node_object *) intern); doc_prop = intern->document->doc_props; intern->document->doc_props = NULL; @@ -1348,6 +1351,12 @@ static void dom_parse_document(INTERNAL_FUNCTION_PARAMETERS, int mode) { } php_libxml_increment_node_ptr((php_libxml_node_object *)intern, (xmlNodePtr)newdoc, (void *)intern); + /* Since iterators should invalidate, we need to start the modification number from the old counter */ + if (old_modification_nr != 0) { + php_libxml_doc_ptr* doc_ptr = (php_libxml_doc_ptr*) ((php_libxml_node_object*) intern)->node; /* downcast */ + doc_ptr->cache_tag.modification_nr = old_modification_nr; + php_libxml_invalidate_node_list_cache(doc_ptr); + } RETURN_TRUE; } else { @@ -1563,6 +1572,8 @@ PHP_METHOD(DOMDocument, xinclude) php_dom_remove_xinclude_nodes(root); } + php_libxml_invalidate_node_list_cache_from_doc(docp); + if (err) { RETVAL_LONG(err); } else { @@ -1871,10 +1882,14 @@ static void dom_load_html(INTERNAL_FUNCTION_PARAMETERS, int mode) /* {{{ */ if (id != NULL && instanceof_function(Z_OBJCE_P(id), dom_document_class_entry)) { intern = Z_DOMOBJ_P(id); + size_t old_modification_nr = 0; if (intern != NULL) { docp = (xmlDocPtr) dom_object_get_node(intern); doc_prop = NULL; if (docp != NULL) { + const php_libxml_doc_ptr *doc_ptr = docp->_private; + ZEND_ASSERT(doc_ptr != NULL); /* Must exist, we have a document */ + old_modification_nr = doc_ptr->cache_tag.modification_nr; php_libxml_decrement_node_ptr((php_libxml_node_object *) intern); doc_prop = intern->document->doc_props; intern->document->doc_props = NULL; @@ -1891,6 +1906,12 @@ static void dom_load_html(INTERNAL_FUNCTION_PARAMETERS, int mode) /* {{{ */ } php_libxml_increment_node_ptr((php_libxml_node_object *)intern, (xmlNodePtr)newdoc, (void *)intern); + /* Since iterators should invalidate, we need to start the modification number from the old counter */ + if (old_modification_nr != 0) { + php_libxml_doc_ptr* doc_ptr = (php_libxml_doc_ptr*) ((php_libxml_node_object*) intern)->node; /* downcast */ + doc_ptr->cache_tag.modification_nr = old_modification_nr; + php_libxml_invalidate_node_list_cache(doc_ptr); + } RETURN_TRUE; } else { diff --git a/ext/dom/documenttype.c b/ext/dom/documenttype.c index b046b05f80eff..cfc4b043edb22 100644 --- a/ext/dom/documenttype.c +++ b/ext/dom/documenttype.c @@ -65,7 +65,7 @@ int dom_documenttype_entities_read(dom_object *obj, zval *retval) entityht = (xmlHashTable *) doctypep->entities; intern = Z_DOMOBJ_P(retval); - dom_namednode_iter(obj, XML_ENTITY_NODE, intern, entityht, NULL, NULL); + dom_namednode_iter(obj, XML_ENTITY_NODE, intern, entityht, NULL, 0, NULL, 0); return SUCCESS; } @@ -93,7 +93,7 @@ int dom_documenttype_notations_read(dom_object *obj, zval *retval) notationht = (xmlHashTable *) doctypep->notations; intern = Z_DOMOBJ_P(retval); - dom_namednode_iter(obj, XML_NOTATION_NODE, intern, notationht, NULL, NULL); + dom_namednode_iter(obj, XML_NOTATION_NODE, intern, notationht, NULL, 0, NULL, 0); return SUCCESS; } diff --git a/ext/dom/dom_iterators.c b/ext/dom/dom_iterators.c index 72c97104db04d..2cf2c7bb6e7ce 100644 --- a/ext/dom/dom_iterators.c +++ b/ext/dom/dom_iterators.c @@ -179,7 +179,7 @@ static void php_dom_iterator_move_forward(zend_object_iterator *iter) /* {{{ */ dom_object *intern; dom_object *nnmap; dom_nnodemap_object *objmap; - int previndex=0; + int previndex; HashTable *nodeht; zval *entry; bool do_curobj_undef = 1; @@ -205,23 +205,32 @@ static void php_dom_iterator_move_forward(zend_object_iterator *iter) /* {{{ */ do_curobj_undef = 0; } } else { - curnode = (xmlNodePtr)((php_libxml_node_ptr *)intern->ptr)->node; if (objmap->nodetype == XML_ATTRIBUTE_NODE || objmap->nodetype == XML_ELEMENT_NODE) { + curnode = (xmlNodePtr)((php_libxml_node_ptr *)intern->ptr)->node; curnode = curnode->next; } else { - /* Nav the tree evey time as this is LIVE */ + /* The collection is live, we nav the tree from the base object if we cannot + * use the cache to restart from the last point. */ basenode = dom_object_get_node(objmap->baseobj); - if (basenode && (basenode->type == XML_DOCUMENT_NODE || - basenode->type == XML_HTML_DOCUMENT_NODE)) { - basenode = xmlDocGetRootElement((xmlDoc *) basenode); - } else if (basenode) { - basenode = basenode->children; - } else { + if (UNEXPECTED(!basenode)) { goto err; } + if (php_dom_is_cache_tag_stale_from_node(&iterator->cache_tag, basenode)) { + php_dom_mark_cache_tag_up_to_date_from_node(&iterator->cache_tag, basenode); + previndex = 0; + if (basenode && (basenode->type == XML_DOCUMENT_NODE || + basenode->type == XML_HTML_DOCUMENT_NODE)) { + curnode = xmlDocGetRootElement((xmlDoc *) basenode); + } else { + curnode = basenode->children; + } + } else { + previndex = iter->index - 1; + curnode = (xmlNodePtr)((php_libxml_node_ptr *)intern->ptr)->node; + } curnode = dom_get_elements_by_tag_name_ns_raw( - basenode, (char *) objmap->ns, (char *) objmap->local, &previndex, iter->index); + basenode, curnode, (char *) objmap->ns, (char *) objmap->local, &previndex, iter->index); } } } else { @@ -258,7 +267,7 @@ zend_object_iterator *php_dom_get_iterator(zend_class_entry *ce, zval *object, i { dom_object *intern; dom_nnodemap_object *objmap; - xmlNodePtr nodep, curnode=NULL; + xmlNodePtr curnode=NULL; int curindex = 0; HashTable *nodeht; zval *entry; @@ -270,6 +279,7 @@ zend_object_iterator *php_dom_get_iterator(zend_class_entry *ce, zval *object, i } iterator = emalloc(sizeof(php_dom_iterator)); zend_iterator_init(&iterator->intern); + iterator->cache_tag.modification_nr = 0; ZVAL_OBJ_COPY(&iterator->intern.data, Z_OBJ_P(object)); iterator->intern.funcs = &php_dom_iterator_funcs; @@ -288,24 +298,25 @@ zend_object_iterator *php_dom_get_iterator(zend_class_entry *ce, zval *object, i ZVAL_COPY(&iterator->curobj, entry); } } else { - nodep = (xmlNode *)dom_object_get_node(objmap->baseobj); - if (!nodep) { + xmlNodePtr basep = (xmlNode *)dom_object_get_node(objmap->baseobj); + if (!basep) { goto err; } if (objmap->nodetype == XML_ATTRIBUTE_NODE || objmap->nodetype == XML_ELEMENT_NODE) { if (objmap->nodetype == XML_ATTRIBUTE_NODE) { - curnode = (xmlNodePtr) nodep->properties; + curnode = (xmlNodePtr) basep->properties; } else { - curnode = (xmlNodePtr) nodep->children; + curnode = (xmlNodePtr) basep->children; } } else { + xmlNodePtr nodep = basep; if (nodep->type == XML_DOCUMENT_NODE || nodep->type == XML_HTML_DOCUMENT_NODE) { nodep = xmlDocGetRootElement((xmlDoc *) nodep); } else { nodep = nodep->children; } curnode = dom_get_elements_by_tag_name_ns_raw( - nodep, (char *) objmap->ns, (char *) objmap->local, &curindex, 0); + basep, nodep, (char *) objmap->ns, (char *) objmap->local, &curindex, 0); } } } else { diff --git a/ext/dom/element.c b/ext/dom/element.c index 19cef5834657a..93d9ad5fb910a 100644 --- a/ext/dom/element.c +++ b/ext/dom/element.c @@ -511,7 +511,6 @@ PHP_METHOD(DOMElement, getElementsByTagName) size_t name_len; dom_object *intern, *namednode; char *name; - xmlChar *local; id = ZEND_THIS; if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", &name, &name_len) == FAILURE) { @@ -522,8 +521,7 @@ PHP_METHOD(DOMElement, getElementsByTagName) php_dom_create_iterator(return_value, DOM_NODELIST); namednode = Z_DOMOBJ_P(return_value); - local = xmlCharStrndup(name, name_len); - dom_namednode_iter(intern, 0, namednode, NULL, local, NULL); + dom_namednode_iter(intern, 0, namednode, NULL, name, name_len, NULL, 0); } /* }}} end dom_element_get_elements_by_tag_name */ @@ -930,7 +928,6 @@ PHP_METHOD(DOMElement, getElementsByTagNameNS) size_t uri_len, name_len; dom_object *intern, *namednode; char *uri, *name; - xmlChar *local, *nsuri; id = ZEND_THIS; if (zend_parse_parameters(ZEND_NUM_ARGS(), "s!s", &uri, &uri_len, &name, &name_len) == FAILURE) { @@ -941,9 +938,7 @@ PHP_METHOD(DOMElement, getElementsByTagNameNS) php_dom_create_iterator(return_value, DOM_NODELIST); namednode = Z_DOMOBJ_P(return_value); - local = xmlCharStrndup(name, name_len); - nsuri = xmlCharStrndup(uri ? uri : "", uri_len); - dom_namednode_iter(intern, 0, namednode, NULL, local, nsuri); + dom_namednode_iter(intern, 0, namednode, NULL, name, name_len, uri ? uri : "", uri_len); } /* }}} end dom_element_get_elements_by_tag_name_ns */ diff --git a/ext/dom/node.c b/ext/dom/node.c index fdb51bf51092f..78c9b2dca1802 100644 --- a/ext/dom/node.c +++ b/ext/dom/node.c @@ -195,6 +195,8 @@ int dom_node_node_value_write(dom_object *obj, zval *newval) break; } + php_libxml_invalidate_node_list_cache_from_doc(nodep->doc); + zend_string_release_ex(str, 0); return SUCCESS; } @@ -274,7 +276,7 @@ int dom_node_child_nodes_read(dom_object *obj, zval *retval) php_dom_create_iterator(retval, DOM_NODELIST); intern = Z_DOMOBJ_P(retval); - dom_namednode_iter(obj, XML_ELEMENT_NODE, intern, NULL, NULL, NULL); + dom_namednode_iter(obj, XML_ELEMENT_NODE, intern, NULL, NULL, 0, NULL, 0); return SUCCESS; } @@ -482,7 +484,7 @@ int dom_node_attributes_read(dom_object *obj, zval *retval) if (nodep->type == XML_ELEMENT_NODE) { php_dom_create_iterator(retval, DOM_NAMEDNODEMAP); intern = Z_DOMOBJ_P(retval); - dom_namednode_iter(obj, XML_ATTRIBUTE_NODE, intern, NULL, NULL, NULL); + dom_namednode_iter(obj, XML_ATTRIBUTE_NODE, intern, NULL, NULL, 0, NULL, 0); } else { ZVAL_NULL(retval); } @@ -769,6 +771,8 @@ int dom_node_text_content_write(dom_object *obj, zval *newval) return FAILURE; } + php_libxml_invalidate_node_list_cache_from_doc(nodep->doc); + const xmlChar *xmlChars = (const xmlChar *) ZSTR_VAL(str); int type = nodep->type; @@ -897,6 +901,8 @@ PHP_METHOD(DOMNode, insertBefore) php_libxml_increment_doc_ref((php_libxml_node_object *)childobj, NULL); } + php_libxml_invalidate_node_list_cache_from_doc(parentp->doc); + if (ref != NULL) { DOM_GET_OBJ(refp, ref, xmlNodePtr, refpobj); if (refp->parent != parentp) { @@ -1086,6 +1092,7 @@ PHP_METHOD(DOMNode, replaceChild) nodep->doc->intSubset = (xmlDtd *) newchild; } } + php_libxml_invalidate_node_list_cache_from_doc(nodep->doc); DOM_RET_OBJ(oldchild, &ret, intern); } /* }}} end dom_node_replace_child */ @@ -1127,6 +1134,7 @@ PHP_METHOD(DOMNode, removeChild) } xmlUnlinkNode(child); + php_libxml_invalidate_node_list_cache_from_doc(nodep->doc); DOM_RET_OBJ(child, &ret, intern); } /* }}} end dom_node_remove_child */ @@ -1230,6 +1238,8 @@ PHP_METHOD(DOMNode, appendChild) dom_reconcile_ns(nodep->doc, new_child); + php_libxml_invalidate_node_list_cache_from_doc(nodep->doc); + DOM_RET_OBJ(new_child, &ret, intern); } /* }}} end dom_node_append_child */ @@ -1339,6 +1349,8 @@ PHP_METHOD(DOMNode, normalize) DOM_GET_OBJ(nodep, id, xmlNodePtr, intern); + php_libxml_invalidate_node_list_cache_from_doc(nodep->doc); + dom_normalize(nodep); } @@ -1571,6 +1583,8 @@ static void dom_canonicalization(INTERNAL_FUNCTION_PARAMETERS, int mode) /* {{{ RETURN_THROWS(); } + php_libxml_invalidate_node_list_cache_from_doc(docp); + if (xpath_array == NULL) { if (nodep->type != XML_DOCUMENT_NODE) { ctxp = xmlXPathNewContext(docp); diff --git a/ext/dom/nodelist.c b/ext/dom/nodelist.c index b03ebe1acd90a..20e3b18bee883 100644 --- a/ext/dom/nodelist.c +++ b/ext/dom/nodelist.c @@ -31,6 +31,24 @@ * Since: */ +static zend_always_inline void objmap_cache_release_cached_obj(dom_nnodemap_object *objmap) +{ + if (objmap->cached_obj) { + /* Since the DOM is a tree there can be no cycles. */ + if (GC_DELREF(&objmap->cached_obj->std) == 0) { + zend_objects_store_del(&objmap->cached_obj->std); + } + objmap->cached_obj = NULL; + objmap->cached_obj_index = 0; + } +} + +static zend_always_inline void reset_objmap_cache(dom_nnodemap_object *objmap) +{ + objmap_cache_release_cached_obj(objmap); + objmap->cached_length = -1; +} + static int get_nodelist_length(dom_object *obj) { dom_nnodemap_object *objmap = (dom_nnodemap_object *) obj->ptr; @@ -52,6 +70,17 @@ static int get_nodelist_length(dom_object *obj) return 0; } + if (!php_dom_is_cache_tag_stale_from_node(&objmap->cache_tag, nodep)) { + if (objmap->cached_length >= 0) { + return objmap->cached_length; + } + /* Only the length is out-of-date, the cache tag is still valid. + * Therefore, only overwrite the length and keep the currently cached object. */ + } else { + php_dom_mark_cache_tag_up_to_date_from_node(&objmap->cache_tag, nodep); + reset_objmap_cache(objmap); + } + int count = 0; if (objmap->nodetype == XML_ATTRIBUTE_NODE || objmap->nodetype == XML_ELEMENT_NODE) { xmlNodePtr curnode = nodep->children; @@ -63,15 +92,18 @@ static int get_nodelist_length(dom_object *obj) } } } else { + xmlNodePtr basep = nodep; if (nodep->type == XML_DOCUMENT_NODE || nodep->type == XML_HTML_DOCUMENT_NODE) { nodep = xmlDocGetRootElement((xmlDoc *) nodep); } else { nodep = nodep->children; } dom_get_elements_by_tag_name_ns_raw( - nodep, (char *) objmap->ns, (char *) objmap->local, &count, -1); + basep, nodep, (char *) objmap->ns, (char *) objmap->local, &count, INT_MAX - 1 /* because of <= */); } + objmap->cached_length = count; + return count; } @@ -113,11 +145,12 @@ PHP_METHOD(DOMNodeList, item) zval *id; zend_long index; int ret; + bool cache_itemnode = false; dom_object *intern; xmlNodePtr itemnode = NULL; dom_nnodemap_object *objmap; - xmlNodePtr nodep, curnode; + xmlNodePtr basep; int count = 0; id = ZEND_THIS; @@ -145,23 +178,51 @@ PHP_METHOD(DOMNodeList, item) return; } } else if (objmap->baseobj) { - nodep = dom_object_get_node(objmap->baseobj); - if (nodep) { + basep = dom_object_get_node(objmap->baseobj); + if (basep) { + xmlNodePtr nodep = basep; + /* For now we're only able to use cache for forward search. + * TODO: in the future we could extend the logic of the node list such that backwards searches + * are also possible. */ + bool restart = true; + int relative_index = index; + if (index >= objmap->cached_obj_index && objmap->cached_obj && !php_dom_is_cache_tag_stale_from_node(&objmap->cache_tag, nodep)) { + xmlNodePtr cached_obj_xml_node = dom_object_get_node(objmap->cached_obj); + + /* The node cannot be NULL if the cache is valid. If it is NULL, then it means we + * forgot an invalidation somewhere. Take the defensive programming approach and invalidate + * it here if it's NULL (except in debug mode where we would want to catch this). */ + if (UNEXPECTED(cached_obj_xml_node == NULL)) { +#if ZEND_DEBUG + ZEND_UNREACHABLE(); +#endif + reset_objmap_cache(objmap); + } else { + restart = false; + relative_index -= objmap->cached_obj_index; + nodep = cached_obj_xml_node; + } + } if (objmap->nodetype == XML_ATTRIBUTE_NODE || objmap->nodetype == XML_ELEMENT_NODE) { - curnode = nodep->children; - while (count < index && curnode != NULL) { + if (restart) { + nodep = nodep->children; + } + while (count < relative_index && nodep != NULL) { count++; - curnode = curnode->next; + nodep = nodep->next; } - itemnode = curnode; + itemnode = nodep; } else { - if (nodep->type == XML_DOCUMENT_NODE || nodep->type == XML_HTML_DOCUMENT_NODE) { - nodep = xmlDocGetRootElement((xmlDoc *) nodep); - } else { - nodep = nodep->children; + if (restart) { + if (basep->type == XML_DOCUMENT_NODE || basep->type == XML_HTML_DOCUMENT_NODE) { + nodep = xmlDocGetRootElement((xmlDoc*) basep); + } else { + nodep = basep->children; + } } - itemnode = dom_get_elements_by_tag_name_ns_raw(nodep, (char *) objmap->ns, (char *) objmap->local, &count, index); + itemnode = dom_get_elements_by_tag_name_ns_raw(basep, nodep, (char *) objmap->ns, (char *) objmap->local, &count, relative_index); } + cache_itemnode = true; } } } @@ -169,6 +230,25 @@ PHP_METHOD(DOMNodeList, item) if (itemnode) { DOM_RET_OBJ(itemnode, &ret, objmap->baseobj); + if (cache_itemnode) { + /* Hold additional reference for the cache, must happen before releasing the cache + * because we might be the last reference holder. + * Instead of storing and copying zvals, we store the object pointer directly. + * This saves us some bytes because a pointer is smaller than a zval. + * This also means we have to manually refcount the objects here, and remove the reference count + * in reset_objmap_cache() and the destructor. */ + dom_object *cached_obj = Z_DOMOBJ_P(return_value); + GC_ADDREF(&cached_obj->std); + /* If the tag is stale, all cached data is useless. Otherwise only the cached object is useless. */ + if (php_dom_is_cache_tag_stale_from_node(&objmap->cache_tag, itemnode)) { + php_dom_mark_cache_tag_up_to_date_from_node(&objmap->cache_tag, itemnode); + reset_objmap_cache(objmap); + } else { + objmap_cache_release_cached_obj(objmap); + } + objmap->cached_obj_index = index; + objmap->cached_obj = cached_obj; + } return; } } diff --git a/ext/dom/parentnode.c b/ext/dom/parentnode.c index 4d0fffeb9e058..36cd6104f38a4 100644 --- a/ext/dom/parentnode.c +++ b/ext/dom/parentnode.c @@ -280,6 +280,8 @@ void dom_parent_node_append(dom_object *context, zval *nodes, int nodesc) return; } + php_libxml_invalidate_node_list_cache_from_doc(parentNode->doc); + xmlNode *fragment = dom_zvals_to_fragment(context->document, parentNode, nodes, nodesc); if (fragment == NULL) { @@ -322,6 +324,8 @@ void dom_parent_node_prepend(dom_object *context, zval *nodes, int nodesc) return; } + php_libxml_invalidate_node_list_cache_from_doc(parentNode->doc); + xmlNodePtr newchild, nextsib; xmlNode *fragment = dom_zvals_to_fragment(context->document, parentNode, nodes, nodesc); @@ -402,6 +406,8 @@ void dom_parent_node_after(dom_object *context, zval *nodes, int nodesc) doc = prevsib->doc; + php_libxml_invalidate_node_list_cache_from_doc(doc); + /* Spec step 4: convert nodes into fragment */ fragment = dom_zvals_to_fragment(context->document, parentNode, nodes, nodesc); @@ -451,6 +457,8 @@ void dom_parent_node_before(dom_object *context, zval *nodes, int nodesc) doc = nextsib->doc; + php_libxml_invalidate_node_list_cache_from_doc(doc); + /* Spec step 4: convert nodes into fragment */ fragment = dom_zvals_to_fragment(context->document, parentNode, nodes, nodesc); @@ -506,6 +514,8 @@ void dom_child_node_remove(dom_object *context) return; } + php_libxml_invalidate_node_list_cache_from_doc(context->document->ptr); + while (children) { if (children == child) { xmlUnlinkNode(child); diff --git a/ext/dom/php_dom.c b/ext/dom/php_dom.c index e02b0973291c5..44c10ea6a8aec 100644 --- a/ext/dom/php_dom.c +++ b/ext/dom/php_dom.c @@ -942,7 +942,7 @@ void dom_objects_free_storage(zend_object *object) } /* }}} */ -void dom_namednode_iter(dom_object *basenode, int ntype, dom_object *intern, xmlHashTablePtr ht, xmlChar *local, xmlChar *ns) /* {{{ */ +void dom_namednode_iter(dom_object *basenode, int ntype, dom_object *intern, xmlHashTablePtr ht, const char *local, size_t local_len, const char *ns, size_t ns_len) /* {{{ */ { dom_nnodemap_object *mapptr = (dom_nnodemap_object *) intern->ptr; @@ -950,11 +950,33 @@ void dom_namednode_iter(dom_object *basenode, int ntype, dom_object *intern, xml ZVAL_OBJ_COPY(&mapptr->baseobj_zv, &basenode->std); + xmlDocPtr doc = basenode->document ? basenode->document->ptr : NULL; + mapptr->baseobj = basenode; mapptr->nodetype = ntype; mapptr->ht = ht; - mapptr->local = local; - mapptr->ns = ns; + + const xmlChar* tmp; + + if (local) { + int len = local_len > INT_MAX ? -1 : (int) local_len; + if (doc != NULL && (tmp = xmlDictExists(doc->dict, (const xmlChar *)local, len)) != NULL) { + mapptr->local = (xmlChar*) tmp; + } else { + mapptr->local = xmlCharStrndup(local, len); + mapptr->free_local = true; + } + } + + if (ns) { + int len = ns_len > INT_MAX ? -1 : (int) ns_len; + if (doc != NULL && (tmp = xmlDictExists(doc->dict, (const xmlChar *)ns, len)) != NULL) { + mapptr->ns = (xmlChar*) tmp; + } else { + mapptr->ns = xmlCharStrndup(ns, len); + mapptr->free_ns = true; + } + } } /* }}} */ @@ -1010,10 +1032,13 @@ void dom_nnodemap_objects_free_storage(zend_object *object) /* {{{ */ dom_nnodemap_object *objmap = (dom_nnodemap_object *)intern->ptr; if (objmap) { - if (objmap->local) { + if (objmap->cached_obj && GC_DELREF(&objmap->cached_obj->std) == 0) { + zend_objects_store_del(&objmap->cached_obj->std); + } + if (objmap->free_local) { xmlFree(objmap->local); } - if (objmap->ns) { + if (objmap->free_ns) { xmlFree(objmap->ns); } if (!Z_ISUNDEF(objmap->baseobj_zv)) { @@ -1042,7 +1067,13 @@ zend_object *dom_nnodemap_objects_new(zend_class_entry *class_type) /* {{{ */ objmap->nodetype = 0; objmap->ht = NULL; objmap->local = NULL; + objmap->free_local = false; objmap->ns = NULL; + objmap->free_ns = false; + objmap->cache_tag.modification_nr = 0; + objmap->cached_length = -1; + objmap->cached_obj = NULL; + objmap->cached_obj_index = 0; return &intern->std; } @@ -1220,19 +1251,25 @@ bool dom_has_feature(zend_string *feature, zend_string *version) } /* }}} end dom_has_feature */ -xmlNode *dom_get_elements_by_tag_name_ns_raw(xmlNodePtr nodep, char *ns, char *local, int *cur, int index) /* {{{ */ +xmlNode *dom_get_elements_by_tag_name_ns_raw(xmlNodePtr basep, xmlNodePtr nodep, char *ns, char *local, int *cur, int index) /* {{{ */ { + /* Can happen with detached document */ + if (UNEXPECTED(nodep == NULL)) { + return NULL; + } + xmlNodePtr ret = NULL; + bool local_match_any = local[0] == '*' && local[1] == '\0'; /* Note: The spec says that ns == '' must be transformed to ns == NULL. In other words, they are equivalent. * PHP however does not do this and internally uses the empty string everywhere when the user provides ns == NULL. * This is because for PHP ns == NULL has another meaning: "match every namespace" instead of "match the empty namespace". */ bool ns_match_any = ns == NULL || (ns[0] == '*' && ns[1] == '\0'); - while (nodep != NULL && (*cur <= index || index == -1)) { + while (*cur <= index) { if (nodep->type == XML_ELEMENT_NODE) { - if (xmlStrEqual(nodep->name, (xmlChar *)local) || xmlStrEqual((xmlChar *)"*", (xmlChar *)local)) { - if (ns_match_any || (!strcmp(ns, "") && nodep->ns == NULL) || (nodep->ns != NULL && xmlStrEqual(nodep->ns->href, (xmlChar *)ns))) { + if (local_match_any || xmlStrEqual(nodep->name, (xmlChar *)local)) { + if (ns_match_any || (ns[0] == '\0' && nodep->ns == NULL) || (nodep->ns != NULL && xmlStrEqual(nodep->ns->href, (xmlChar *)ns))) { if (*cur == index) { ret = nodep; break; @@ -1240,16 +1277,33 @@ xmlNode *dom_get_elements_by_tag_name_ns_raw(xmlNodePtr nodep, char *ns, char *l (*cur)++; } } - ret = dom_get_elements_by_tag_name_ns_raw(nodep->children, ns, local, cur, index); - if (ret != NULL) { - break; + + if (nodep->children) { + nodep = nodep->children; + continue; } } - nodep = nodep->next; + + if (nodep->next) { + nodep = nodep->next; + } else { + /* Go upwards, until we find a parent node with a next sibling, or until we hit the base. */ + do { + nodep = nodep->parent; + if (nodep == basep) { + return NULL; + } + /* This shouldn't happen, unless there's an invalidation bug somewhere. */ + if (UNEXPECTED(nodep == NULL)) { + zend_throw_error(NULL, "Current node in traversal is not in the document. Please report this as a bug in php-src."); + return NULL; + } + } while (nodep->next == NULL); + nodep = nodep->next; + } } return ret; } -/* }}} */ /* }}} end dom_element_get_elements_by_tag_name_ns_raw */ static inline bool is_empty_node(xmlNodePtr nodep) diff --git a/ext/dom/php_dom.h b/ext/dom/php_dom.h index a7ae09384cfdc..0602f4166eaa7 100644 --- a/ext/dom/php_dom.h +++ b/ext/dom/php_dom.h @@ -82,15 +82,22 @@ typedef struct _dom_nnodemap_object { dom_object *baseobj; zval baseobj_zv; int nodetype; + int cached_length; xmlHashTable *ht; xmlChar *local; xmlChar *ns; + php_libxml_cache_tag cache_tag; + dom_object *cached_obj; + int cached_obj_index; + bool free_local : 1; + bool free_ns : 1; } dom_nnodemap_object; typedef struct { zend_object_iterator intern; zval curobj; HashPosition pos; + php_libxml_cache_tag cache_tag; } php_dom_iterator; #include "domexception.h" @@ -113,14 +120,14 @@ void dom_set_old_ns(xmlDoc *doc, xmlNs *ns); void dom_reconcile_ns(xmlDocPtr doc, xmlNodePtr nodep); xmlNsPtr dom_get_nsdecl(xmlNode *node, xmlChar *localName); void dom_normalize (xmlNodePtr nodep); -xmlNode *dom_get_elements_by_tag_name_ns_raw(xmlNodePtr nodep, char *ns, char *local, int *cur, int index); +xmlNode *dom_get_elements_by_tag_name_ns_raw(xmlNodePtr basep, xmlNodePtr nodep, char *ns, char *local, int *cur, int index); void php_dom_create_implementation(zval *retval); int dom_hierarchy(xmlNodePtr parent, xmlNodePtr child); bool dom_has_feature(zend_string *feature, zend_string *version); int dom_node_is_read_only(xmlNodePtr node); int dom_node_children_valid(xmlNodePtr node); void php_dom_create_iterator(zval *return_value, int ce_type); -void dom_namednode_iter(dom_object *basenode, int ntype, dom_object *intern, xmlHashTablePtr ht, xmlChar *local, xmlChar *ns); +void dom_namednode_iter(dom_object *basenode, int ntype, dom_object *intern, xmlHashTablePtr ht, const char *local, size_t local_len, const char *ns, size_t ns_len); xmlNodePtr create_notation(const xmlChar *name, const xmlChar *ExternalID, const xmlChar *SystemID); xmlNode *php_dom_libxml_hash_iter(xmlHashTable *ht, int index); xmlNode *php_dom_libxml_notation_iter(xmlHashTable *ht, int index); @@ -153,6 +160,33 @@ void dom_child_node_remove(dom_object *context); #define DOM_NODELIST 0 #define DOM_NAMEDNODEMAP 1 +static zend_always_inline bool php_dom_is_cache_tag_stale_from_doc_ptr(const php_libxml_cache_tag *cache_tag, const php_libxml_doc_ptr *doc_ptr) +{ + ZEND_ASSERT(cache_tag != NULL); + ZEND_ASSERT(doc_ptr != NULL); + /* See overflow comment in php_libxml_invalidate_node_list_cache(). */ +#if SIZEOF_SIZE_T == 8 + return cache_tag->modification_nr != doc_ptr->cache_tag.modification_nr; +#else + return cache_tag->modification_nr != doc_ptr->cache_tag.modification_nr || UNEXPECTED(doc_ptr->cache_tag.modification_nr == SIZE_MAX); +#endif +} + +static zend_always_inline bool php_dom_is_cache_tag_stale_from_node(const php_libxml_cache_tag *cache_tag, const xmlNodePtr node) +{ + ZEND_ASSERT(node != NULL); + return !node->doc || !node->doc->_private || php_dom_is_cache_tag_stale_from_doc_ptr(cache_tag, node->doc->_private); +} + +static zend_always_inline void php_dom_mark_cache_tag_up_to_date_from_node(php_libxml_cache_tag *cache_tag, const xmlNodePtr node) +{ + ZEND_ASSERT(cache_tag != NULL); + if (node->doc && node->doc->_private) { + const php_libxml_doc_ptr* doc_ptr = node->doc->_private; + cache_tag->modification_nr = doc_ptr->cache_tag.modification_nr; + } +} + PHP_MINIT_FUNCTION(dom); PHP_MSHUTDOWN_FUNCTION(dom); PHP_MINFO_FUNCTION(dom); diff --git a/ext/dom/processinginstruction.c b/ext/dom/processinginstruction.c index 465ecb431e73a..c40d24d18ce23 100644 --- a/ext/dom/processinginstruction.c +++ b/ext/dom/processinginstruction.c @@ -128,6 +128,8 @@ int dom_processinginstruction_data_write(dom_object *obj, zval *newval) return FAILURE; } + php_libxml_invalidate_node_list_cache_from_doc(nodep->doc); + xmlNodeSetContentLen(nodep, (xmlChar *) ZSTR_VAL(str), ZSTR_LEN(str) + 1); zend_string_release_ex(str, 0); diff --git a/ext/dom/tests/DOMDocument_getElementsByTagName_liveness.phpt b/ext/dom/tests/DOMDocument_getElementsByTagName_liveness.phpt new file mode 100644 index 0000000000000..2b4622d10d389 --- /dev/null +++ b/ext/dom/tests/DOMDocument_getElementsByTagName_liveness.phpt @@ -0,0 +1,47 @@ +--TEST-- +DOMDocument::getElementsByTagName() is live +--EXTENSIONS-- +dom +--FILE-- +loadXML( '' ); +$root = $doc->documentElement; + +$i = 0; + +/* Note that the list is live. The explanation for the output is as follows: + Before the loop we have the following (writing only the attributes): + 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 + + Now the loop starts, the current element is marked with a V. $i == 0: + V + 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 + 1 gets printed. $i == 0, which is even, so 1 gets removed, which results in: + V + 2 3 4 5 6 7 8 9 10 11 12 13 14 15 + Note that everything shifted to the left. + Because the list is live, the current element pointer still refers to the first index, which now corresponds to element with attribute 2. + Now the foreach body ends, which means we go to the next element, which is now 3 instead of 2. + V + 2 3 4 5 6 7 8 9 10 11 12 13 14 15 + 3 gets printed. $i == 1, which is odd, so nothing happens and we move on to the next element: + V + 2 3 4 5 6 7 8 9 10 11 12 13 14 15 + 4 gets printed. $i == 2, which is even, so 4 gets removed, which results in: + V + 2 3 5 6 7 8 9 10 11 12 13 14 15 + Note again everything shifted to the left. + Now the foreach body ends, which means we go to the next element, which is now 6 instead of 5. + V + 2 3 5 6 7 8 9 10 11 12 13 14 15 + 6 gets printed, etc... */ +foreach ($doc->getElementsByTagName('e') as $node) { + print $node->getAttribute('i') . ' '; + if ($i++ % 2 == 0) + $root->removeChild($node); +} +print "\n"; +?> +--EXPECT-- +1 3 4 6 7 9 10 12 13 15 diff --git a/ext/dom/tests/DOMDocument_getElementsByTagName_liveness_simplexml.phpt b/ext/dom/tests/DOMDocument_getElementsByTagName_liveness_simplexml.phpt new file mode 100644 index 0000000000000..0ac52cd5d662f --- /dev/null +++ b/ext/dom/tests/DOMDocument_getElementsByTagName_liveness_simplexml.phpt @@ -0,0 +1,29 @@ +--TEST-- +DOMDocument::getElementsByTagName() liveness with simplexml_import_dom +--EXTENSIONS-- +dom +simplexml +--FILE-- +loadXML( '' ); +$list = $doc->getElementsByTagName('e'); +print $list->item(5)->getAttribute('i')."\n"; +echo "before import\n"; +$s = simplexml_import_dom($doc->documentElement); +echo "after import\n"; + +unset($s->e[5]); +print $list->item(5)->getAttribute('i')."\n"; + +unset($s->e[5]); +print $list->item(5)->getAttribute('i')."\n"; + +?> +--EXPECT-- +6 +before import +after import +7 +8 diff --git a/ext/dom/tests/DOMDocument_getElementsByTagName_liveness_tree_walk.phpt b/ext/dom/tests/DOMDocument_getElementsByTagName_liveness_tree_walk.phpt new file mode 100644 index 0000000000000..91d810df51bc6 --- /dev/null +++ b/ext/dom/tests/DOMDocument_getElementsByTagName_liveness_tree_walk.phpt @@ -0,0 +1,89 @@ +--TEST-- +DOMDocument::getElementsByTagName() liveness tree walk +--EXTENSIONS-- +dom +--FILE-- +loadXML(''); + +echo "-- On first child, for --\n"; +$list = $doc->documentElement->firstChild->getElementsByTagName('b'); +var_dump($list->length); +for ($i = 0; $i < $list->length; $i++) { + echo $i, " ", $list->item($i)->getAttribute('i'), "\n"; +} +// Try to access one beyond to check if we don't get excess elements +var_dump($list->item($i)); + +echo "-- On first child, foreach --\n"; +foreach ($list as $item) { + echo $item->getAttribute('i'), "\n"; +} + +echo "-- On document, for --\n"; +$list = $doc->getElementsByTagName('b'); +var_dump($list->length); +for ($i = 0; $i < $list->length; $i++) { + echo $i, " ", $list->item($i)->getAttribute('i'), "\n"; +} +// Try to access one beyond to check if we don't get excess elements +var_dump($list->item($i)); + +echo "-- On document, foreach --\n"; +foreach ($list as $item) { + echo $item->getAttribute('i'), "\n"; +} + +echo "-- On document, after caching followed by removing --\n"; + +$list = $doc->documentElement->firstChild->getElementsByTagName('b'); +$list->item(0); // Activate item cache +$list->item(0)->remove(); +$list->item(0)->remove(); +$list->item(0)->remove(); +var_dump($list->length); +var_dump($list->item(0)); +foreach ($list as $item) { + echo "Should not execute\n"; +} + +echo "-- On document, clean list after removal --\n"; +$list = $doc->documentElement->firstChild->getElementsByTagName('b'); +var_dump($list->length); +var_dump($list->item(0)); +foreach ($list as $item) { + echo "Should not execute\n"; +} + +?> +--EXPECT-- +-- On first child, for -- +int(3) +0 1 +1 2 +2 3 +NULL +-- On first child, foreach -- +1 +2 +3 +-- On document, for -- +int(4) +0 1 +1 2 +2 3 +3 4 +NULL +-- On document, foreach -- +1 +2 +3 +4 +-- On document, after caching followed by removing -- +int(0) +NULL +-- On document, clean list after removal -- +int(0) +NULL diff --git a/ext/dom/tests/DOMDocument_getElementsByTagName_liveness_write_properties.phpt b/ext/dom/tests/DOMDocument_getElementsByTagName_liveness_write_properties.phpt new file mode 100644 index 0000000000000..af8af51844c9d --- /dev/null +++ b/ext/dom/tests/DOMDocument_getElementsByTagName_liveness_write_properties.phpt @@ -0,0 +1,43 @@ +--TEST-- +DOMDocument::getElementsByTagName() liveness affected by writing properties +--EXTENSIONS-- +dom +--FILE-- +'; +$fields = ['nodeValue', 'textContent']; + +foreach ($fields as $field) { + $doc = new DOMDocument; + $doc->loadXML($xml); + $list = $doc->getElementsByTagName('a'); + var_dump($list->item(0) === NULL); + $doc->documentElement->{$field} = 'new_content'; + var_dump($list->item(0) === NULL); + print $doc->saveXML(); +} + +// Shouldn't be affected +$doc = new DOMDocument; +$doc->loadXML($xml); +$list = $doc->getElementsByTagNameNS('foo', 'a'); +var_dump($list->item(0) === NULL); +$doc->documentElement->firstChild->prefix = 'ns2'; +var_dump($list->item(0) === NULL); +print $doc->saveXML(); + +?> +--EXPECT-- +bool(false) +bool(true) + +new_content +bool(false) +bool(true) + +new_content +bool(false) +bool(false) + + diff --git a/ext/dom/tests/DOMDocument_getElementsByTagName_liveness_xinclude.phpt b/ext/dom/tests/DOMDocument_getElementsByTagName_liveness_xinclude.phpt new file mode 100644 index 0000000000000..2c14a2080569e --- /dev/null +++ b/ext/dom/tests/DOMDocument_getElementsByTagName_liveness_xinclude.phpt @@ -0,0 +1,43 @@ +--TEST-- +DOMDocument::getElementsByTagName() liveness with DOMDocument::xinclude() +--EXTENSIONS-- +dom +--FILE-- + + +

Hello

+ + + +

xinclude: book.xml not found

+
+
+
+
+EOD; + +$dom = new DOMDocument; +$dom->loadXML($xml); +$elements = $dom->getElementsByTagName('p'); +var_dump($elements->item(0)->textContent); +@$dom->xinclude(); +var_dump($elements->item(1)->textContent); +echo $dom->saveXML(); + +?> +--EXPECT-- +string(5) "Hello" +string(28) "xinclude: book.xml not found" + + +

Hello

+ + +

xinclude: book.xml not found

+ +
+
diff --git a/ext/dom/tests/DOMDocument_item_cache_invalidation.phpt b/ext/dom/tests/DOMDocument_item_cache_invalidation.phpt new file mode 100644 index 0000000000000..dad532b8167fe --- /dev/null +++ b/ext/dom/tests/DOMDocument_item_cache_invalidation.phpt @@ -0,0 +1,69 @@ +--TEST-- +DOMDocument node list item cache invalidation +--EXTENSIONS-- +dom +--FILE-- +loadHTML('

hello

world

'); + +$elements = $doc->getElementsByTagName('p'); +$elements->item(0); // Activate item cache +$doc->loadHTML('

A

B

C

'); +var_dump($elements); +var_dump($elements->item(0)->textContent); // First lookup +var_dump($elements->item(2)->textContent); // Uses cache +var_dump($elements->item(1)->textContent); // Does not use cache + +echo "-- Remove cached item test --\n"; + +$doc = new DOMDocument(); +$doc->loadHTML('

hello

world

!

'); + +$elements = $doc->getElementsByTagName('p'); +$item = $elements->item(0); // Activate item cache +var_dump($item->textContent); +$item->remove(); +// Now element 0 means "world", and 1 means "!" +unset($item); +$item = $elements->item(1); +var_dump($item->textContent); + +echo "-- Removal of cached item in loop test --\n"; + +$doc = new DOMDocument; +$doc->loadXML( '' ); +$root = $doc->documentElement; + +$i = 0; +$elements = $root->getElementsByTagName('e'); +for ($i = 0; $i < 11; $i++) { + $node = $elements->item($i); + print $node->getAttribute('i') . ' '; + if ($i++ % 2 == 0) + $root->removeChild( $node ); +} +print "\n"; + +?> +--EXPECTF-- +-- Switch document test -- +object(DOMNodeList)#2 (1) { + ["length"]=> + int(3) +} +string(1) "A" +string(1) "C" +string(1) "B" +-- Remove cached item test -- +string(5) "hello" +string(1) "!" +-- Removal of cached item in loop test -- +1 4 7 10 13 +Fatal error: Uncaught Error: Call to a member function getAttribute() on null in %s:%d +Stack trace: +#0 {main} + thrown in %s on line %d diff --git a/ext/dom/tests/DOMDocument_length_cache_invalidation.phpt b/ext/dom/tests/DOMDocument_length_cache_invalidation.phpt new file mode 100644 index 0000000000000..7a3633894a381 --- /dev/null +++ b/ext/dom/tests/DOMDocument_length_cache_invalidation.phpt @@ -0,0 +1,34 @@ +--TEST-- +DOMDocument node list length cache invalidation +--EXTENSIONS-- +dom +--FILE-- +loadHTML('

hello

world

!

'); + +$elements = $doc->getElementsByTagName('p'); +$item = $elements->item(0); // Activate item cache +var_dump($elements->length); // Length not cached yet, should still compute +$item->remove(); +// Now element 0 means "world", and 1 means "!" +unset($item); +var_dump($elements->length); +$item = $elements->item(1); +var_dump($item->textContent); +$item = $elements->item(1); +var_dump($item->textContent); +$item = $elements->item(0); +var_dump($item->textContent); +$item = $elements->item(1); +var_dump($item->textContent); + +?> +--EXPECT-- +int(3) +int(2) +string(1) "!" +string(1) "!" +string(5) "world" +string(1) "!" diff --git a/ext/dom/tests/DOMDocument_liveness_caching_invalidation.phpt b/ext/dom/tests/DOMDocument_liveness_caching_invalidation.phpt new file mode 100644 index 0000000000000..e05bd1ac6f646 --- /dev/null +++ b/ext/dom/tests/DOMDocument_liveness_caching_invalidation.phpt @@ -0,0 +1,43 @@ +--TEST-- +DOMDocument liveness caching invalidation by textContent +--EXTENSIONS-- +dom +--FILE-- +loadXML(''); +$root = $doc->documentElement; + +$i = 0; + +echo "-- Overwrite during iteration --\n"; + +foreach ($doc->getElementsByTagName('e') as $node) { + if ($i++ == 2) { + $root->textContent = 'overwrite'; + } + var_dump($node->tagName, $node->getAttribute('id')); +} + +echo "-- Empty iteration --\n"; +foreach ($doc->getElementsByTagName('e') as $node) { + echo "Should not execute\n"; +} + +echo "-- After adding an element again --\n"; +$root->appendChild(new DOMElement('e')); +foreach ($doc->getElementsByTagName('e') as $node) { + echo "Should execute once\n"; +} +?> +--EXPECT-- +-- Overwrite during iteration -- +string(1) "e" +string(1) "1" +string(1) "e" +string(1) "2" +string(1) "e" +string(1) "3" +-- Empty iteration -- +-- After adding an element again -- +Should execute once diff --git a/ext/dom/tests/DOMElement_getElementsByTagName_without_document.phpt b/ext/dom/tests/DOMElement_getElementsByTagName_without_document.phpt new file mode 100644 index 0000000000000..9aebf3139cdf9 --- /dev/null +++ b/ext/dom/tests/DOMElement_getElementsByTagName_without_document.phpt @@ -0,0 +1,16 @@ +--TEST-- +Node list cache should not break on DOMElement::getElementsByTagName() without document +--EXTENSIONS-- +dom +--FILE-- +getElementsByTagName("b") as $x) { + var_dump($x); +} + +?> +Done +--EXPECT-- +Done diff --git a/ext/libxml/libxml.c b/ext/libxml/libxml.c index 71ed3f911cca7..5af3443069aba 100644 --- a/ext/libxml/libxml.c +++ b/ext/libxml/libxml.c @@ -1163,8 +1163,14 @@ PHP_LIBXML_API int php_libxml_increment_node_ptr(php_libxml_node_object *object, object->node->_private = private_data; } } else { + if (UNEXPECTED(node->type == XML_DOCUMENT_NODE || node->type == XML_HTML_DOCUMENT_NODE)) { + php_libxml_doc_ptr *doc_ptr = emalloc(sizeof(php_libxml_doc_ptr)); + doc_ptr->cache_tag.modification_nr = 1; /* iterators start at 0, such that they will start in an uninitialised state */ + object->node = (php_libxml_node_ptr *) doc_ptr; /* downcast */ + } else { + object->node = emalloc(sizeof(php_libxml_node_ptr)); + } ret_refcount = 1; - object->node = emalloc(sizeof(php_libxml_node_ptr)); object->node->node = node; object->node->refcount = 1; object->node->_private = private_data; diff --git a/ext/libxml/php_libxml.h b/ext/libxml/php_libxml.h index de9b49d2ce3b6..a23ff6ee57c13 100644 --- a/ext/libxml/php_libxml.h +++ b/ext/libxml/php_libxml.h @@ -69,6 +69,16 @@ typedef struct _php_libxml_node_ptr { void *_private; } php_libxml_node_ptr; +typedef struct { + size_t modification_nr; +} php_libxml_cache_tag; + +/* extends php_libxml_node_ptr */ +typedef struct { + php_libxml_node_ptr node_ptr; + php_libxml_cache_tag cache_tag; +} php_libxml_doc_ptr; + typedef struct _php_libxml_node_object { php_libxml_node_ptr *node; php_libxml_ref_obj *document; @@ -81,6 +91,27 @@ static inline php_libxml_node_object *php_libxml_node_fetch_object(zend_object * return (php_libxml_node_object *)((char*)(obj) - obj->handlers->offset); } +static zend_always_inline void php_libxml_invalidate_node_list_cache(php_libxml_doc_ptr *doc_ptr) +{ +#if SIZEOF_SIZE_T == 8 + /* If one operation happens every nanosecond, then it would still require 584 years to overflow + * the counter. So we'll just assume this never happens. */ + doc_ptr->cache_tag.modification_nr++; +#else + size_t new_modification_nr = doc_ptr->cache_tag.modification_nr + 1; + if (EXPECTED(new_modification_nr > 0)) { /* unsigned overflow; checking after addition results in one less instruction */ + doc_ptr->cache_tag.modification_nr = new_modification_nr; + } +#endif +} + +static zend_always_inline void php_libxml_invalidate_node_list_cache_from_doc(xmlDocPtr docp) +{ + if (docp && docp->_private) { /* docp is NULL for detached nodes */ + php_libxml_invalidate_node_list_cache(docp->_private); + } +} + #define Z_LIBXML_NODE_P(zv) php_libxml_node_fetch_object(Z_OBJ_P((zv))) typedef void * (*php_libxml_export_node) (zval *object); diff --git a/ext/simplexml/simplexml.c b/ext/simplexml/simplexml.c index d3f2865e12036..e0340b2e3a68a 100644 --- a/ext/simplexml/simplexml.c +++ b/ext/simplexml/simplexml.c @@ -442,6 +442,8 @@ static zval *sxe_prop_dim_write(zend_object *object, zval *member, zval *value, GET_NODE(sxe, node); + php_libxml_invalidate_node_list_cache_from_doc(node->doc); + if (sxe->iter.type == SXE_ITER_ATTRLIST) { attribs = 1; elements = 0; @@ -813,6 +815,8 @@ static void sxe_prop_dim_delete(zend_object *object, zval *member, bool elements GET_NODE(sxe, node); + php_libxml_invalidate_node_list_cache_from_doc(node->doc); + if (Z_TYPE_P(member) == IS_LONG) { if (sxe->iter.type != SXE_ITER_ATTRLIST) { attribs = 0; @@ -1686,6 +1690,8 @@ PHP_METHOD(SimpleXMLElement, addChild) sxe = Z_SXEOBJ_P(ZEND_THIS); GET_NODE(sxe, node); + php_libxml_invalidate_node_list_cache_from_doc(node->doc); + if (sxe->iter.type == SXE_ITER_ATTRLIST) { php_error_docref(NULL, E_WARNING, "Cannot add element to attributes"); return; From 5fae4b50318faf3c3011cb4b24cceb3926b2ba84 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Sat, 3 Jun 2023 23:32:33 +0200 Subject: [PATCH 111/168] Struct-pack spl_dllist_object --- ext/spl/spl_dllist.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ext/spl/spl_dllist.c b/ext/spl/spl_dllist.c index 176989936ed8f..f1445fa5d905e 100644 --- a/ext/spl/spl_dllist.c +++ b/ext/spl/spl_dllist.c @@ -72,8 +72,8 @@ typedef struct _spl_dllist_it spl_dllist_it; struct _spl_dllist_object { spl_ptr_llist *llist; - int traverse_position; spl_ptr_llist_element *traverse_pointer; + int traverse_position; int flags; zend_function *fptr_offset_get; zend_function *fptr_offset_set; From c6bffff96bf46a4345a5d2a8c526bdc8a7bdd3aa Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Sat, 3 Jun 2023 00:19:28 +0200 Subject: [PATCH 112/168] Remove dead code from sxe_get_element_by_name() retnode will never be set to anything other than NULL, because the branch is always taken if the names match. --- ext/simplexml/simplexml.c | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/ext/simplexml/simplexml.c b/ext/simplexml/simplexml.c index e0340b2e3a68a..26c77e35b9312 100644 --- a/ext/simplexml/simplexml.c +++ b/ext/simplexml/simplexml.c @@ -165,7 +165,6 @@ static xmlNodePtr sxe_get_element_by_name(php_sxe_object *sxe, xmlNodePtr node, { int orgtype; xmlNodePtr orgnode = node; - xmlNodePtr retnode = NULL; if (sxe->iter.type != SXE_ITER_ATTRLIST) { @@ -189,25 +188,14 @@ static xmlNodePtr sxe_get_element_by_name(php_sxe_object *sxe, xmlNodePtr node, SKIP_TEXT(node) if (node->type == XML_ELEMENT_NODE && match_ns(sxe, node, sxe->iter.nsprefix, sxe->iter.isprefix)) { if (!xmlStrcmp(node->name, (xmlChar *)*name)) { - if (1||retnode) - { - *type = SXE_ITER_ELEMENT; - return orgnode; - } - retnode = node; + *type = SXE_ITER_ELEMENT; + return orgnode; } } next_iter: node = node->next; } - if (retnode) - { - *type = SXE_ITER_NONE; - *name = NULL; - return retnode; - } - return NULL; } /* }}} */ From 795127942b1bdb10b728447fcbb89d881f58426b Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Sat, 3 Jun 2023 00:21:26 +0200 Subject: [PATCH 113/168] Remove double class entry variable --- ext/simplexml/simplexml.c | 34 +++++++++++++++------------------- 1 file changed, 15 insertions(+), 19 deletions(-) diff --git a/ext/simplexml/simplexml.c b/ext/simplexml/simplexml.c index 26c77e35b9312..435f88d40992a 100644 --- a/ext/simplexml/simplexml.c +++ b/ext/simplexml/simplexml.c @@ -33,13 +33,12 @@ #include "zend_interfaces.h" #include "ext/spl/spl_iterators.h" -zend_class_entry *sxe_class_entry = NULL; PHP_SXE_API zend_class_entry *ce_SimpleXMLIterator; PHP_SXE_API zend_class_entry *ce_SimpleXMLElement; PHP_SXE_API zend_class_entry *sxe_get_element_class_entry(void) /* {{{ */ { - return sxe_class_entry; + return ce_SimpleXMLElement; } /* }}} */ @@ -471,7 +470,7 @@ static zval *sxe_prop_dim_write(zend_object *object, zval *member, zval *value, value_str = zval_get_string(value); break; case IS_OBJECT: - if (Z_OBJCE_P(value) == sxe_class_entry) { + if (Z_OBJCE_P(value) == ce_SimpleXMLElement) { zval zval_copy; if (sxe_object_cast_ex(Z_OBJ_P(value), &zval_copy, IS_STRING) == FAILURE) { zend_throw_error(NULL, "Unable to cast node to string"); @@ -2184,7 +2183,7 @@ static zend_function* php_sxe_find_fptr_count(zend_class_entry *ce) int inherited = 0; while (parent) { - if (parent == sxe_class_entry) { + if (parent == ce_SimpleXMLElement) { break; } parent = parent->parent; @@ -2242,7 +2241,7 @@ PHP_FUNCTION(simplexml_load_file) char *ns = NULL; size_t ns_len = 0; zend_long options = 0; - zend_class_entry *ce= sxe_class_entry; + zend_class_entry *ce= ce_SimpleXMLElement; zend_function *fptr_count; bool isprefix = 0; @@ -2262,7 +2261,7 @@ PHP_FUNCTION(simplexml_load_file) } if (!ce) { - ce = sxe_class_entry; + ce = ce_SimpleXMLElement; fptr_count = NULL; } else { fptr_count = php_sxe_find_fptr_count(ce); @@ -2287,7 +2286,7 @@ PHP_FUNCTION(simplexml_load_string) char *ns = NULL; size_t ns_len = 0; zend_long options = 0; - zend_class_entry *ce= sxe_class_entry; + zend_class_entry *ce= ce_SimpleXMLElement; zend_function *fptr_count; bool isprefix = 0; @@ -2315,7 +2314,7 @@ PHP_FUNCTION(simplexml_load_string) } if (!ce) { - ce = sxe_class_entry; + ce = ce_SimpleXMLElement; fptr_count = NULL; } else { fptr_count = php_sxe_find_fptr_count(ce); @@ -2589,7 +2588,7 @@ PHP_FUNCTION(simplexml_import_dom) zval *node; php_libxml_node_object *object; xmlNodePtr nodep = NULL; - zend_class_entry *ce = sxe_class_entry; + zend_class_entry *ce = ce_SimpleXMLElement; zend_function *fptr_count; if (zend_parse_parameters(ZEND_NUM_ARGS(), "o|C!", &node, &ce) == FAILURE) { @@ -2614,7 +2613,7 @@ PHP_FUNCTION(simplexml_import_dom) if (nodep && nodep->type == XML_ELEMENT_NODE) { if (!ce) { - ce = sxe_class_entry; + ce = ce_SimpleXMLElement; fptr_count = NULL; } else { fptr_count = php_sxe_find_fptr_count(ce); @@ -2664,10 +2663,10 @@ ZEND_GET_MODULE(simplexml) /* {{{ PHP_MINIT_FUNCTION(simplexml) */ PHP_MINIT_FUNCTION(simplexml) { - sxe_class_entry = register_class_SimpleXMLElement(zend_ce_stringable, zend_ce_countable, spl_ce_RecursiveIterator); - sxe_class_entry->create_object = sxe_object_new; - sxe_class_entry->default_object_handlers = &sxe_object_handlers; - sxe_class_entry->get_iterator = php_sxe_get_iterator; + ce_SimpleXMLElement = register_class_SimpleXMLElement(zend_ce_stringable, zend_ce_countable, spl_ce_RecursiveIterator); + ce_SimpleXMLElement->create_object = sxe_object_new; + ce_SimpleXMLElement->default_object_handlers = &sxe_object_handlers; + ce_SimpleXMLElement->get_iterator = php_sxe_get_iterator; memcpy(&sxe_object_handlers, &std_object_handlers, sizeof(zend_object_handlers)); sxe_object_handlers.offset = XtOffsetOf(php_sxe_object, zo); @@ -2690,12 +2689,9 @@ PHP_MINIT_FUNCTION(simplexml) sxe_object_handlers.get_closure = NULL; sxe_object_handlers.get_gc = sxe_get_gc; - /* TODO: Why do we have two variables for this? */ - ce_SimpleXMLElement = sxe_class_entry; - ce_SimpleXMLIterator = register_class_SimpleXMLIterator(ce_SimpleXMLElement); - php_libxml_register_export(sxe_class_entry, simplexml_export_node); + php_libxml_register_export(ce_SimpleXMLElement, simplexml_export_node); return SUCCESS; } @@ -2704,7 +2700,7 @@ PHP_MINIT_FUNCTION(simplexml) /* {{{ PHP_MSHUTDOWN_FUNCTION(simplexml) */ PHP_MSHUTDOWN_FUNCTION(simplexml) { - sxe_class_entry = NULL; + ce_SimpleXMLElement = NULL; return SUCCESS; } /* }}} */ From 47c277bde55cf78870d7766be4b701cbc34182b7 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Sat, 3 Jun 2023 00:22:21 +0200 Subject: [PATCH 114/168] Use xmlStrEqual() instead of !xmlStrCmp() This actually shows the intent clearer, and also from the docs of xmlStrEqual: "Should be a bit more readable and faster than xmlStrcmp()". --- ext/simplexml/simplexml.c | 40 +++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/ext/simplexml/simplexml.c b/ext/simplexml/simplexml.c index 435f88d40992a..d8dcfabff14e2 100644 --- a/ext/simplexml/simplexml.c +++ b/ext/simplexml/simplexml.c @@ -100,7 +100,7 @@ static inline int match_ns(php_sxe_object *sxe, xmlNodePtr node, xmlChar *name, return 1; } - if (node->ns && !xmlStrcmp(prefix ? node->ns->prefix : node->ns->href, name)) { + if (node->ns && xmlStrEqual(prefix ? node->ns->prefix : node->ns->href, name)) { return 1; } @@ -126,7 +126,7 @@ static xmlNodePtr sxe_get_element_by_offset(php_sxe_object *sxe, zend_long offse SKIP_TEXT(node) if (node->type == XML_ELEMENT_NODE && match_ns(sxe, node, sxe->iter.nsprefix, sxe->iter.isprefix)) { if (sxe->iter.type == SXE_ITER_CHILD || ( - sxe->iter.type == SXE_ITER_ELEMENT && !xmlStrcmp(node->name, sxe->iter.name))) { + sxe->iter.type == SXE_ITER_ELEMENT && xmlStrEqual(node->name, sxe->iter.name))) { if (nodendx == offset) { break; } @@ -150,7 +150,7 @@ static xmlNodePtr sxe_find_element_by_name(php_sxe_object *sxe, xmlNodePtr node, while (node) { SKIP_TEXT(node) if (node->type == XML_ELEMENT_NODE && match_ns(sxe, node, sxe->iter.nsprefix, sxe->iter.isprefix)) { - if (!xmlStrcmp(node->name, name)) { + if (xmlStrEqual(node->name, name)) { return node; } } @@ -186,7 +186,7 @@ static xmlNodePtr sxe_get_element_by_name(php_sxe_object *sxe, xmlNodePtr node, while (node) { SKIP_TEXT(node) if (node->type == XML_ELEMENT_NODE && match_ns(sxe, node, sxe->iter.nsprefix, sxe->iter.isprefix)) { - if (!xmlStrcmp(node->name, (xmlChar *)*name)) { + if (xmlStrEqual(node->name, (xmlChar *)*name)) { *type = SXE_ITER_ELEMENT; return orgnode; } @@ -268,7 +268,7 @@ static zval *sxe_prop_dim_read(zend_object *object, zval *member, bool elements, if (Z_TYPE_P(member) != IS_LONG || sxe->iter.type == SXE_ITER_ATTRLIST) { if (Z_TYPE_P(member) == IS_LONG) { while (attr && nodendx <= Z_LVAL_P(member)) { - if ((!test || !xmlStrcmp(attr->name, sxe->iter.name)) && match_ns(sxe, (xmlNodePtr) attr, sxe->iter.nsprefix, sxe->iter.isprefix)) { + if ((!test || xmlStrEqual(attr->name, sxe->iter.name)) && match_ns(sxe, (xmlNodePtr) attr, sxe->iter.nsprefix, sxe->iter.isprefix)) { if (nodendx == Z_LVAL_P(member)) { _node_as_zval(sxe, (xmlNodePtr) attr, rv, SXE_ITER_NONE, NULL, sxe->iter.nsprefix, sxe->iter.isprefix); break; @@ -279,7 +279,7 @@ static zval *sxe_prop_dim_read(zend_object *object, zval *member, bool elements, } } else { while (attr) { - if ((!test || !xmlStrcmp(attr->name, sxe->iter.name)) && !xmlStrcmp(attr->name, (xmlChar *)name) && match_ns(sxe, (xmlNodePtr) attr, sxe->iter.nsprefix, sxe->iter.isprefix)) { + if ((!test || xmlStrEqual(attr->name, sxe->iter.name)) && xmlStrEqual(attr->name, (xmlChar *)name) && match_ns(sxe, (xmlNodePtr) attr, sxe->iter.nsprefix, sxe->iter.isprefix)) { _node_as_zval(sxe, (xmlNodePtr) attr, rv, SXE_ITER_NONE, NULL, sxe->iter.nsprefix, sxe->iter.isprefix); break; } @@ -494,7 +494,7 @@ static zval *sxe_prop_dim_write(zend_object *object, zval *member, zval *value, if (attribs) { if (Z_TYPE_P(member) == IS_LONG) { while (attr && nodendx <= Z_LVAL_P(member)) { - if ((!test || !xmlStrcmp(attr->name, sxe->iter.name)) && match_ns(sxe, (xmlNodePtr) attr, sxe->iter.nsprefix, sxe->iter.isprefix)) { + if ((!test || xmlStrEqual(attr->name, sxe->iter.name)) && match_ns(sxe, (xmlNodePtr) attr, sxe->iter.nsprefix, sxe->iter.isprefix)) { if (nodendx == Z_LVAL_P(member)) { is_attr = 1; ++counter; @@ -506,7 +506,7 @@ static zval *sxe_prop_dim_write(zend_object *object, zval *member, zval *value, } } else { while (attr) { - if ((!test || !xmlStrcmp(attr->name, sxe->iter.name)) && !xmlStrcmp(attr->name, (xmlChar *)Z_STRVAL_P(member)) && match_ns(sxe, (xmlNodePtr) attr, sxe->iter.nsprefix, sxe->iter.isprefix)) { + if ((!test || xmlStrEqual(attr->name, sxe->iter.name)) && xmlStrEqual(attr->name, (xmlChar *)Z_STRVAL_P(member)) && match_ns(sxe, (xmlNodePtr) attr, sxe->iter.nsprefix, sxe->iter.isprefix)) { is_attr = 1; ++counter; break; @@ -545,7 +545,7 @@ static zval *sxe_prop_dim_write(zend_object *object, zval *member, zval *value, while (node) { SKIP_TEXT(node); - if (!xmlStrcmp(node->name, (xmlChar *)Z_STRVAL_P(member)) && match_ns(sxe, node, sxe->iter.nsprefix, sxe->iter.isprefix)) { + if (xmlStrEqual(node->name, (xmlChar *)Z_STRVAL_P(member)) && match_ns(sxe, node, sxe->iter.nsprefix, sxe->iter.isprefix)) { newnode = node; ++counter; } @@ -708,7 +708,7 @@ static int sxe_prop_dim_exists(zend_object *object, zval *member, int check_empt int nodendx = 0; while (attr && nodendx <= Z_LVAL_P(member)) { - if ((!test || !xmlStrcmp(attr->name, sxe->iter.name)) && match_ns(sxe, (xmlNodePtr) attr, sxe->iter.nsprefix, sxe->iter.isprefix)) { + if ((!test || xmlStrEqual(attr->name, sxe->iter.name)) && match_ns(sxe, (xmlNodePtr) attr, sxe->iter.nsprefix, sxe->iter.isprefix)) { if (nodendx == Z_LVAL_P(member)) { exists = 1; break; @@ -719,7 +719,7 @@ static int sxe_prop_dim_exists(zend_object *object, zval *member, int check_empt } } else { while (attr) { - if ((!test || !xmlStrcmp(attr->name, sxe->iter.name)) && !xmlStrcmp(attr->name, (xmlChar *)Z_STRVAL_P(member)) && match_ns(sxe, (xmlNodePtr) attr, sxe->iter.nsprefix, sxe->iter.isprefix)) { + if ((!test || xmlStrEqual(attr->name, sxe->iter.name)) && xmlStrEqual(attr->name, (xmlChar *)Z_STRVAL_P(member)) && match_ns(sxe, (xmlNodePtr) attr, sxe->iter.nsprefix, sxe->iter.isprefix)) { exists = 1; break; } @@ -728,7 +728,7 @@ static int sxe_prop_dim_exists(zend_object *object, zval *member, int check_empt } } if (exists && check_empty == 1 && - (!attr->children || !attr->children->content || !attr->children->content[0] || !xmlStrcmp(attr->children->content, (const xmlChar *) "0")) ) { + (!attr->children || !attr->children->content || !attr->children->content[0] || xmlStrEqual(attr->children->content, (const xmlChar *) "0")) ) { /* Attribute with no content in it's text node */ exists = 0; } @@ -747,7 +747,7 @@ static int sxe_prop_dim_exists(zend_object *object, zval *member, int check_empt exists = 1; if (check_empty == 1 && (!node->children || (node->children->type == XML_TEXT_NODE && !node->children->next && - (!node->children->content || !node->children->content[0] || !xmlStrcmp(node->children->content, (const xmlChar *) "0")))) ) { + (!node->children->content || !node->children->content[0] || xmlStrEqual(node->children->content, (const xmlChar *) "0")))) ) { exists = 0; } } @@ -832,7 +832,7 @@ static void sxe_prop_dim_delete(zend_object *object, zval *member, bool elements int nodendx = 0; while (attr && nodendx <= Z_LVAL_P(member)) { - if ((!test || !xmlStrcmp(attr->name, sxe->iter.name)) && match_ns(sxe, (xmlNodePtr) attr, sxe->iter.nsprefix, sxe->iter.isprefix)) { + if ((!test || xmlStrEqual(attr->name, sxe->iter.name)) && match_ns(sxe, (xmlNodePtr) attr, sxe->iter.nsprefix, sxe->iter.isprefix)) { if (nodendx == Z_LVAL_P(member)) { xmlUnlinkNode((xmlNodePtr) attr); php_libxml_node_free_resource((xmlNodePtr) attr); @@ -845,7 +845,7 @@ static void sxe_prop_dim_delete(zend_object *object, zval *member, bool elements } else { while (attr) { anext = attr->next; - if ((!test || !xmlStrcmp(attr->name, sxe->iter.name)) && !xmlStrcmp(attr->name, (xmlChar *)Z_STRVAL_P(member)) && match_ns(sxe, (xmlNodePtr) attr, sxe->iter.nsprefix, sxe->iter.isprefix)) { + if ((!test || xmlStrEqual(attr->name, sxe->iter.name)) && xmlStrEqual(attr->name, (xmlChar *)Z_STRVAL_P(member)) && match_ns(sxe, (xmlNodePtr) attr, sxe->iter.nsprefix, sxe->iter.isprefix)) { xmlUnlinkNode((xmlNodePtr) attr); php_libxml_node_free_resource((xmlNodePtr) attr); break; @@ -872,7 +872,7 @@ static void sxe_prop_dim_delete(zend_object *object, zval *member, bool elements SKIP_TEXT(node); - if (!xmlStrcmp(node->name, (xmlChar *)Z_STRVAL_P(member)) && match_ns(sxe, node, sxe->iter.nsprefix, sxe->iter.isprefix)) { + if (xmlStrEqual(node->name, (xmlChar *)Z_STRVAL_P(member)) && match_ns(sxe, node, sxe->iter.nsprefix, sxe->iter.isprefix)) { xmlUnlinkNode(node); php_libxml_node_free_resource(node); } @@ -997,7 +997,7 @@ static int sxe_prop_is_empty(zend_object *object) /* {{{ */ attr = node ? (xmlAttrPtr)node->properties : NULL; test = sxe->iter.name && sxe->iter.type == SXE_ITER_ATTRLIST; while (attr) { - if ((!test || !xmlStrcmp(attr->name, sxe->iter.name)) && match_ns(sxe, (xmlNodePtr)attr, sxe->iter.nsprefix, sxe->iter.isprefix)) { + if ((!test || xmlStrEqual(attr->name, sxe->iter.name)) && match_ns(sxe, (xmlNodePtr)attr, sxe->iter.nsprefix, sxe->iter.isprefix)) { return 0; } attr = attr->next; @@ -1107,7 +1107,7 @@ static HashTable *sxe_get_prop_hash(zend_object *object, int is_debug) /* {{{ */ ZVAL_UNDEF(&zattr); test = sxe->iter.name && sxe->iter.type == SXE_ITER_ATTRLIST; while (attr) { - if ((!test || !xmlStrcmp(attr->name, sxe->iter.name)) && match_ns(sxe, (xmlNodePtr)attr, sxe->iter.nsprefix, sxe->iter.isprefix)) { + if ((!test || xmlStrEqual(attr->name, sxe->iter.name)) && match_ns(sxe, (xmlNodePtr)attr, sxe->iter.nsprefix, sxe->iter.isprefix)) { ZVAL_STR(&value, sxe_xmlNodeListGetString((xmlDocPtr) sxe->document->ptr, attr->children, 1)); namelen = xmlStrlen(attr->name); if (Z_ISUNDEF(zattr)) { @@ -2392,7 +2392,7 @@ static xmlNodePtr php_sxe_iterator_fetch(php_sxe_object *sxe, xmlNodePtr node, i if (sxe->iter.name) { while (node) { if (node->type == XML_ATTRIBUTE_NODE) { - if (!xmlStrcmp(node->name, sxe->iter.name) && match_ns(sxe, node, prefix, isprefix)) { + if (xmlStrEqual(node->name, sxe->iter.name) && match_ns(sxe, node, prefix, isprefix)) { break; } } @@ -2411,7 +2411,7 @@ static xmlNodePtr php_sxe_iterator_fetch(php_sxe_object *sxe, xmlNodePtr node, i } else if (sxe->iter.type == SXE_ITER_ELEMENT && sxe->iter.name) { while (node) { if (node->type == XML_ELEMENT_NODE) { - if (!xmlStrcmp(node->name, sxe->iter.name) && match_ns(sxe, node, prefix, isprefix)) { + if (xmlStrEqual(node->name, sxe->iter.name) && match_ns(sxe, node, prefix, isprefix)) { break; } } From ed097e30f0479176ffb608eaf115af1d77579408 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Sat, 3 Jun 2023 00:27:00 +0200 Subject: [PATCH 115/168] No need for the double name pointer --- ext/simplexml/simplexml.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ext/simplexml/simplexml.c b/ext/simplexml/simplexml.c index d8dcfabff14e2..7d9bed8ad6c40 100644 --- a/ext/simplexml/simplexml.c +++ b/ext/simplexml/simplexml.c @@ -160,7 +160,7 @@ static xmlNodePtr sxe_find_element_by_name(php_sxe_object *sxe, xmlNodePtr node, return NULL; } /* }}} */ -static xmlNodePtr sxe_get_element_by_name(php_sxe_object *sxe, xmlNodePtr node, char **name, SXE_ITER *type) /* {{{ */ +static xmlNodePtr sxe_get_element_by_name(php_sxe_object *sxe, xmlNodePtr node, char *name, SXE_ITER *type) /* {{{ */ { int orgtype; xmlNodePtr orgnode = node; @@ -186,7 +186,7 @@ static xmlNodePtr sxe_get_element_by_name(php_sxe_object *sxe, xmlNodePtr node, while (node) { SKIP_TEXT(node) if (node->type == XML_ELEMENT_NODE && match_ns(sxe, node, sxe->iter.nsprefix, sxe->iter.isprefix)) { - if (xmlStrEqual(node->name, (xmlChar *)*name)) { + if (xmlStrEqual(node->name, (xmlChar *)name)) { *type = SXE_ITER_ELEMENT; return orgnode; } @@ -634,7 +634,7 @@ static zval *sxe_property_get_adr(zend_object *object, zend_string *zname, int f sxe = php_sxe_fetch_object(object); GET_NODE(sxe, node); name = ZSTR_VAL(zname); - node = sxe_get_element_by_name(sxe, node, &name, &type); + node = sxe_get_element_by_name(sxe, node, name, &type); if (node) { return NULL; } From 3da415662a3dd1fa339e208ae87c3a6b95edce89 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Sun, 4 Jun 2023 15:20:04 +0200 Subject: [PATCH 116/168] Switch DOMNodeList::item() and DOMNamedNodeMap::item() to fast ZPP (#11361) Code size impact: * DOMNodeList::item() => +96 bytes hot, +31 bytes cold * DOMNamedNodeMap::item() => +57 bytes hot, +31 bytes cold --- ext/dom/namednodemap.c | 6 +++--- ext/dom/nodelist.c | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/ext/dom/namednodemap.c b/ext/dom/namednodemap.c index 99103ce30b7ad..dadab115a1c2a 100644 --- a/ext/dom/namednodemap.c +++ b/ext/dom/namednodemap.c @@ -142,9 +142,9 @@ PHP_METHOD(DOMNamedNodeMap, item) int count; id = ZEND_THIS; - if (zend_parse_parameters(ZEND_NUM_ARGS(), "l", &index) == FAILURE) { - RETURN_THROWS(); - } + ZEND_PARSE_PARAMETERS_START(1, 1) + Z_PARAM_LONG(index) + ZEND_PARSE_PARAMETERS_END(); if (index < 0 || ZEND_LONG_INT_OVFL(index)) { zend_argument_value_error(1, "must be between 0 and %d", INT_MAX); RETURN_THROWS(); diff --git a/ext/dom/nodelist.c b/ext/dom/nodelist.c index 20e3b18bee883..55073b255016c 100644 --- a/ext/dom/nodelist.c +++ b/ext/dom/nodelist.c @@ -154,9 +154,9 @@ PHP_METHOD(DOMNodeList, item) int count = 0; id = ZEND_THIS; - if (zend_parse_parameters(ZEND_NUM_ARGS(), "l", &index) == FAILURE) { - RETURN_THROWS(); - } + ZEND_PARSE_PARAMETERS_START(1, 1) + Z_PARAM_LONG(index) + ZEND_PARSE_PARAMETERS_END(); if (index >= 0) { intern = Z_DOMOBJ_P(id); From b1d8e240e688cae810c83b364772bf140ac45f42 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Sat, 3 Jun 2023 16:41:44 +0200 Subject: [PATCH 117/168] Fix bug #67440: append_node of a DOMDocumentFragment does not reconcile namespaces The test was amended from the original issue report. For the test: Co-authored-by: php@deep-freeze.ca The problem is that the regular dom_reconcile_ns() only works on a single node. We actually have to reconciliate the whole tree in case a fragment was added. This also required to move some code around such that this special case could be handled separately. Closes GH-11362. --- NEWS | 2 + ext/dom/node.c | 66 ++++++++++------ ext/dom/parentnode.c | 18 +++-- ext/dom/php_dom.c | 75 +++++++++++++----- ext/dom/php_dom.h | 1 + ext/dom/tests/bug67440.phpt | 151 ++++++++++++++++++++++++++++++++++++ 6 files changed, 264 insertions(+), 49 deletions(-) create mode 100644 ext/dom/tests/bug67440.phpt diff --git a/NEWS b/NEWS index f2cc5b1be96a1..8395e5233c864 100644 --- a/NEWS +++ b/NEWS @@ -21,6 +21,8 @@ PHP NEWS (nielsdos) . Fixed bug GH-11347 (Memory leak when calling a static method inside an xpath query). (nielsdos) + . Fixed bug #67440 (append_node of a DOMDocumentFragment does not reconcile + namespaces). (nielsdos) - Opcache: . Fix allocation loop in zend_shared_alloc_startup(). (nielsdos) diff --git a/ext/dom/node.c b/ext/dom/node.c index b291ccc99a308..bcf4ee487d38d 100644 --- a/ext/dom/node.c +++ b/ext/dom/node.c @@ -943,12 +943,20 @@ PHP_METHOD(DOMNode, insertBefore) return; } } + new_child = xmlAddPrevSibling(refp, child); + if (UNEXPECTED(NULL == new_child)) { + goto cannot_add; + } } else if (child->type == XML_DOCUMENT_FRAG_NODE) { + xmlNodePtr last = child->last; new_child = _php_dom_insert_fragment(parentp, refp->prev, refp, child, intern, childobj); - } - - if (new_child == NULL) { + dom_reconcile_ns_list(parentp->doc, new_child, last); + } else { new_child = xmlAddPrevSibling(refp, child); + if (UNEXPECTED(NULL == new_child)) { + goto cannot_add; + } + dom_reconcile_ns(parentp->doc, new_child); } } else { if (child->parent != NULL){ @@ -985,23 +993,28 @@ PHP_METHOD(DOMNode, insertBefore) return; } } + new_child = xmlAddChild(parentp, child); + if (UNEXPECTED(NULL == new_child)) { + goto cannot_add; + } } else if (child->type == XML_DOCUMENT_FRAG_NODE) { + xmlNodePtr last = child->last; new_child = _php_dom_insert_fragment(parentp, parentp->last, NULL, child, intern, childobj); - } - if (new_child == NULL) { + dom_reconcile_ns_list(parentp->doc, new_child, last); + } else { new_child = xmlAddChild(parentp, child); + if (UNEXPECTED(NULL == new_child)) { + goto cannot_add; + } + dom_reconcile_ns(parentp->doc, new_child); } } - if (NULL == new_child) { - zend_throw_error(NULL, "Cannot add newnode as the previous sibling of refnode"); - RETURN_THROWS(); - } - - dom_reconcile_ns(parentp->doc, new_child); - DOM_RET_OBJ(new_child, &ret, intern); - + return; +cannot_add: + zend_throw_error(NULL, "Cannot add newnode as the previous sibling of refnode"); + RETURN_THROWS(); } /* }}} end dom_node_insert_before */ @@ -1066,9 +1079,10 @@ PHP_METHOD(DOMNode, replaceChild) xmlUnlinkNode(oldchild); + xmlNodePtr last = newchild->last; newchild = _php_dom_insert_fragment(nodep, prevsib, nextsib, newchild, intern, newchildobj); if (newchild) { - dom_reconcile_ns(nodep->doc, newchild); + dom_reconcile_ns_list(nodep->doc, newchild, last); } } else if (oldchild != newchild) { xmlDtdPtr intSubset = xmlGetIntSubset(nodep->doc); @@ -1215,22 +1229,28 @@ PHP_METHOD(DOMNode, appendChild) php_libxml_node_free_resource((xmlNodePtr) lastattr); } } + new_child = xmlAddChild(nodep, child); + if (UNEXPECTED(new_child == NULL)) { + goto cannot_add; + } } else if (child->type == XML_DOCUMENT_FRAG_NODE) { + xmlNodePtr last = child->last; new_child = _php_dom_insert_fragment(nodep, nodep->last, NULL, child, intern, childobj); - } - - if (new_child == NULL) { + dom_reconcile_ns_list(nodep->doc, new_child, last); + } else { new_child = xmlAddChild(nodep, child); - if (new_child == NULL) { - // TODO Convert to Error? - php_error_docref(NULL, E_WARNING, "Couldn't append node"); - RETURN_FALSE; + if (UNEXPECTED(new_child == NULL)) { + goto cannot_add; } + dom_reconcile_ns(nodep->doc, new_child); } - dom_reconcile_ns(nodep->doc, new_child); - DOM_RET_OBJ(new_child, &ret, intern); + return; +cannot_add: + // TODO Convert to Error? + php_error_docref(NULL, E_WARNING, "Couldn't append node"); + RETURN_FALSE; } /* }}} end dom_node_append_child */ diff --git a/ext/dom/parentnode.c b/ext/dom/parentnode.c index c99a2a5a6622a..b7e8e3ba774e3 100644 --- a/ext/dom/parentnode.c +++ b/ext/dom/parentnode.c @@ -298,13 +298,14 @@ void dom_parent_node_append(dom_object *context, zval *nodes, int nodesc) parentNode->children = newchild; } - parentNode->last = fragment->last; + xmlNodePtr last = fragment->last; + parentNode->last = last; newchild->prev = prevsib; dom_fragment_assign_parent_node(parentNode, fragment); - dom_reconcile_ns(parentNode->doc, newchild); + dom_reconcile_ns_list(parentNode->doc, newchild, last); } xmlFree(fragment); @@ -335,13 +336,14 @@ void dom_parent_node_prepend(dom_object *context, zval *nodes, int nodesc) nextsib = parentNode->children; if (newchild) { + xmlNodePtr last = fragment->last; parentNode->children = newchild; fragment->last->next = nextsib; - nextsib->prev = fragment->last; + nextsib->prev = last; dom_fragment_assign_parent_node(parentNode, fragment); - dom_reconcile_ns(parentNode->doc, newchild); + dom_reconcile_ns_list(parentNode->doc, newchild, last); } xmlFree(fragment); @@ -414,11 +416,13 @@ void dom_parent_node_after(dom_object *context, zval *nodes, int nodesc) newchild = fragment->children; if (newchild) { + xmlNodePtr last = fragment->last; + /* Step 5: place fragment into the parent before viable_next_sibling */ dom_pre_insert(viable_next_sibling, parentNode, newchild, fragment); dom_fragment_assign_parent_node(parentNode, fragment); - dom_reconcile_ns(doc, newchild); + dom_reconcile_ns_list(doc, newchild, last); } xmlFree(fragment); @@ -463,6 +467,8 @@ void dom_parent_node_before(dom_object *context, zval *nodes, int nodesc) newchild = fragment->children; if (newchild) { + xmlNodePtr last = fragment->last; + /* Step 5: if viable_previous_sibling is null, set it to the parent's first child, otherwise viable_previous_sibling's next sibling */ if (!viable_previous_sibling) { viable_previous_sibling = parentNode->children; @@ -473,7 +479,7 @@ void dom_parent_node_before(dom_object *context, zval *nodes, int nodesc) dom_pre_insert(viable_previous_sibling, parentNode, newchild, fragment); dom_fragment_assign_parent_node(parentNode, fragment); - dom_reconcile_ns(doc, newchild); + dom_reconcile_ns_list(doc, newchild, last); } xmlFree(fragment); diff --git a/ext/dom/php_dom.c b/ext/dom/php_dom.c index 1883767d2e48b..df20093221f16 100644 --- a/ext/dom/php_dom.c +++ b/ext/dom/php_dom.c @@ -1385,38 +1385,73 @@ void dom_set_old_ns(xmlDoc *doc, xmlNs *ns) { } /* }}} end dom_set_old_ns */ -void dom_reconcile_ns(xmlDocPtr doc, xmlNodePtr nodep) /* {{{ */ +static void dom_reconcile_ns_internal(xmlDocPtr doc, xmlNodePtr nodep) { xmlNsPtr nsptr, nsdftptr, curns, prevns = NULL; - if (nodep->type == XML_ELEMENT_NODE) { - /* Following if block primarily used for inserting nodes created via createElementNS */ - if (nodep->nsDef != NULL) { - curns = nodep->nsDef; - while (curns) { - nsdftptr = curns->next; - if (curns->href != NULL) { - if((nsptr = xmlSearchNsByHref(doc, nodep->parent, curns->href)) && - (curns->prefix == NULL || xmlStrEqual(nsptr->prefix, curns->prefix))) { - curns->next = NULL; - if (prevns == NULL) { - nodep->nsDef = nsdftptr; - } else { - prevns->next = nsdftptr; - } - dom_set_old_ns(doc, curns); - curns = prevns; + /* Following if block primarily used for inserting nodes created via createElementNS */ + if (nodep->nsDef != NULL) { + curns = nodep->nsDef; + while (curns) { + nsdftptr = curns->next; + if (curns->href != NULL) { + if((nsptr = xmlSearchNsByHref(doc, nodep->parent, curns->href)) && + (curns->prefix == NULL || xmlStrEqual(nsptr->prefix, curns->prefix))) { + curns->next = NULL; + if (prevns == NULL) { + nodep->nsDef = nsdftptr; + } else { + prevns->next = nsdftptr; } + dom_set_old_ns(doc, curns); + curns = prevns; } - prevns = curns; - curns = nsdftptr; } + prevns = curns; + curns = nsdftptr; } + } +} + +void dom_reconcile_ns(xmlDocPtr doc, xmlNodePtr nodep) /* {{{ */ +{ + if (nodep->type == XML_ELEMENT_NODE) { + dom_reconcile_ns_internal(doc, nodep); xmlReconciliateNs(doc, nodep); } } /* }}} */ +static void dom_reconcile_ns_list_internal(xmlDocPtr doc, xmlNodePtr nodep, xmlNodePtr last) +{ + ZEND_ASSERT(nodep != NULL); + while (true) { + if (nodep->type == XML_ELEMENT_NODE) { + dom_reconcile_ns_internal(doc, nodep); + if (nodep->children) { + dom_reconcile_ns_list_internal(doc, nodep->children, nodep->last /* process the whole children list */); + } + } + if (nodep == last) { + break; + } + nodep = nodep->next; + } +} + +void dom_reconcile_ns_list(xmlDocPtr doc, xmlNodePtr nodep, xmlNodePtr last) +{ + dom_reconcile_ns_list_internal(doc, nodep, last); + /* Outside of the recursion above because xmlReconciliateNs() performs its own recursion. */ + while (true) { + xmlReconciliateNs(doc, nodep); + if (nodep == last) { + break; + } + nodep = nodep->next; + } +} + /* http://www.w3.org/TR/2004/REC-DOM-Level-3-Core-20040407/core.html#ID-DocCrElNS diff --git a/ext/dom/php_dom.h b/ext/dom/php_dom.h index fdfdd4e7a31ca..924d1397ca73a 100644 --- a/ext/dom/php_dom.h +++ b/ext/dom/php_dom.h @@ -110,6 +110,7 @@ int dom_check_qname(char *qname, char **localname, char **prefix, int uri_len, i xmlNsPtr dom_get_ns(xmlNodePtr node, char *uri, int *errorcode, char *prefix); void dom_set_old_ns(xmlDoc *doc, xmlNs *ns); void dom_reconcile_ns(xmlDocPtr doc, xmlNodePtr nodep); +void dom_reconcile_ns_list(xmlDocPtr doc, xmlNodePtr nodep, xmlNodePtr last); xmlNsPtr dom_get_nsdecl(xmlNode *node, xmlChar *localName); void dom_normalize (xmlNodePtr nodep); xmlNode *dom_get_elements_by_tag_name_ns_raw(xmlNodePtr nodep, char *ns, char *local, int *cur, int index); diff --git a/ext/dom/tests/bug67440.phpt b/ext/dom/tests/bug67440.phpt new file mode 100644 index 0000000000000..3e30f69b9ae4d --- /dev/null +++ b/ext/dom/tests/bug67440.phpt @@ -0,0 +1,151 @@ +--TEST-- +Bug #67440 (append_node of a DOMDocumentFragment does not reconcile namespaces) +--EXTENSIONS-- +dom +--FILE-- +loadXML(''); + $fragment = $document->createDocumentFragment(); + $fragment->appendChild($document->createTextNode("\n")); + $fragment->appendChild($document->createElementNS('http://example/ns', 'myns:childNode', '1')); + $fragment->appendChild($document->createTextNode("\n")); + $fragment->appendChild($document->createElementNS('http://example/ns', 'myns:childNode', '2')); + $fragment->appendChild($document->createTextNode("\n")); + return array($document, $fragment); +} + +function case1($method) { + list($document, $fragment) = createDocument(); + $document->documentElement->{$method}($fragment); + echo $document->saveXML(); +} + +function case2($method) { + list($document, $fragment) = createDocument(); + $childNodes = iterator_to_array($fragment->childNodes); + foreach ($childNodes as $childNode) { + $document->documentElement->{$method}($childNode); + } + echo $document->saveXML(); +} + +function case3($method) { + list($document, $fragment) = createDocument(); + $fragment->removeChild($fragment->firstChild); + $document->documentElement->{$method}($fragment); + echo $document->saveXML(); +} + +function case4($method) { + list($document, $fragment) = createDocument(); + $fragment->childNodes[1]->appendChild($document->createElementNS('http://example/ns2', 'myns2:childNode', '3')); + $document->documentElement->{$method}($fragment); + echo $document->saveXML(); +} + +echo "== appendChild ==\n"; +echo "-- fragment to document element --\n"; case1('appendChild'); echo "\n"; +echo "-- children manually document element --\n"; case2('appendChild'); echo "\n"; +echo "-- fragment to document where first element is not a text node --\n"; case3('appendChild'); echo "\n"; +echo "-- fragment with namespace declarations in children --\n"; case4('appendChild'); echo "\n"; + +echo "== insertBefore ==\n"; +echo "-- fragment to document element --\n"; case1('insertBefore'); echo "\n"; +echo "-- children manually document element --\n"; case2('insertBefore'); echo "\n"; +echo "-- fragment to document where first element is not a text node --\n"; case3('insertBefore'); echo "\n"; +echo "-- fragment with namespace declarations in children --\n"; case4('insertBefore'); echo "\n"; + +echo "== insertAfter ==\n"; +echo "-- fragment to document element --\n"; case1('insertBefore'); echo "\n"; +echo "-- children manually document element --\n"; case2('insertBefore'); echo "\n"; +echo "-- fragment to document where first element is not a text node --\n"; case3('insertBefore'); echo "\n"; +echo "-- fragment with namespace declarations in children --\n"; case4('insertBefore'); echo "\n"; + +?> +--EXPECT-- +== appendChild == +-- fragment to document element -- + + +1 +2 + + +-- children manually document element -- + + +1 +2 + + +-- fragment to document where first element is not a text node -- + +1 +2 + + +-- fragment with namespace declarations in children -- + + +13 +2 + + +== insertBefore == +-- fragment to document element -- + + +1 +2 + + +-- children manually document element -- + + +1 +2 + + +-- fragment to document where first element is not a text node -- + +1 +2 + + +-- fragment with namespace declarations in children -- + + +13 +2 + + +== insertAfter == +-- fragment to document element -- + + +1 +2 + + +-- children manually document element -- + + +1 +2 + + +-- fragment to document where first element is not a text node -- + +1 +2 + + +-- fragment with namespace declarations in children -- + + +13 +2 + From 23f70025270c040e0d378b210120d9824af10ea6 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Sat, 3 Jun 2023 17:54:37 +0200 Subject: [PATCH 118/168] Fix bug #81642: DOMChildNode::replaceWith() bug when replacing a node with itself Closes GH-11363. --- NEWS | 2 ++ ext/dom/element.c | 5 ++- ext/dom/parentnode.c | 69 +++++++++++++++++++++++++++++++------ ext/dom/php_dom.h | 1 + ext/dom/tests/bug81642.phpt | 49 ++++++++++++++++++++++++++ 5 files changed, 112 insertions(+), 14 deletions(-) create mode 100644 ext/dom/tests/bug81642.phpt diff --git a/NEWS b/NEWS index 8395e5233c864..39605d0d3c4c0 100644 --- a/NEWS +++ b/NEWS @@ -23,6 +23,8 @@ PHP NEWS xpath query). (nielsdos) . Fixed bug #67440 (append_node of a DOMDocumentFragment does not reconcile namespaces). (nielsdos) + . Fixed bug #81642 (DOMChildNode::replaceWith() bug when replacing a node + with itself). (nielsdos) - Opcache: . Fix allocation loop in zend_shared_alloc_startup(). (nielsdos) diff --git a/ext/dom/element.c b/ext/dom/element.c index 19cef5834657a..78113d72776bd 100644 --- a/ext/dom/element.c +++ b/ext/dom/element.c @@ -1234,7 +1234,7 @@ PHP_METHOD(DOMElement, prepend) } /* }}} end DOMElement::prepend */ -/* {{{ URL: https://dom.spec.whatwg.org/#dom-parentnode-prepend +/* {{{ URL: https://dom.spec.whatwg.org/#dom-parentnode-replacechildren Since: DOM Living Standard (DOM4) */ PHP_METHOD(DOMElement, replaceWith) @@ -1251,8 +1251,7 @@ PHP_METHOD(DOMElement, replaceWith) id = ZEND_THIS; DOM_GET_OBJ(context, id, xmlNodePtr, intern); - dom_parent_node_after(intern, args, argc); - dom_child_node_remove(intern); + dom_child_replace_with(intern, args, argc); } /* }}} end DOMElement::prepend */ diff --git a/ext/dom/parentnode.c b/ext/dom/parentnode.c index b7e8e3ba774e3..a9dfda59622b7 100644 --- a/ext/dom/parentnode.c +++ b/ext/dom/parentnode.c @@ -485,35 +485,45 @@ void dom_parent_node_before(dom_object *context, zval *nodes, int nodesc) xmlFree(fragment); } -void dom_child_node_remove(dom_object *context) +static zend_result dom_child_removal_preconditions(const xmlNodePtr child, int stricterror) { - xmlNode *child = dom_object_get_node(context); - xmlNodePtr children; - int stricterror; - - stricterror = dom_get_strict_error(context->document); - if (dom_node_is_read_only(child) == SUCCESS || (child->parent != NULL && dom_node_is_read_only(child->parent) == SUCCESS)) { php_dom_throw_error(NO_MODIFICATION_ALLOWED_ERR, stricterror); - return; + return FAILURE; } if (!child->parent) { php_dom_throw_error(NOT_FOUND_ERR, stricterror); - return; + return FAILURE; } if (dom_node_children_valid(child->parent) == FAILURE) { - return; + return FAILURE; } - children = child->parent->children; + xmlNodePtr children = child->parent->children; if (!children) { php_dom_throw_error(NOT_FOUND_ERR, stricterror); + return FAILURE; + } + + return SUCCESS; +} + +void dom_child_node_remove(dom_object *context) +{ + xmlNode *child = dom_object_get_node(context); + xmlNodePtr children; + int stricterror; + + stricterror = dom_get_strict_error(context->document); + + if (UNEXPECTED(dom_child_removal_preconditions(child, stricterror) != SUCCESS)) { return; } + children = child->parent->children; while (children) { if (children == child) { xmlUnlinkNode(child); @@ -525,4 +535,41 @@ void dom_child_node_remove(dom_object *context) php_dom_throw_error(NOT_FOUND_ERR, stricterror); } +void dom_child_replace_with(dom_object *context, zval *nodes, int nodesc) +{ + xmlNodePtr child = dom_object_get_node(context); + xmlNodePtr parentNode = child->parent; + + int stricterror = dom_get_strict_error(context->document); + if (UNEXPECTED(dom_child_removal_preconditions(child, stricterror) != SUCCESS)) { + return; + } + + xmlNodePtr insertion_point = child->next; + + xmlNodePtr fragment = dom_zvals_to_fragment(context->document, parentNode, nodes, nodesc); + if (UNEXPECTED(fragment == NULL)) { + return; + } + + xmlNodePtr newchild = fragment->children; + xmlDocPtr doc = parentNode->doc; + + if (newchild) { + xmlNodePtr last = fragment->last; + + /* Unlink and free it unless it became a part of the fragment. */ + if (child->parent != fragment) { + xmlUnlinkNode(child); + } + + dom_pre_insert(insertion_point, parentNode, newchild, fragment); + + dom_fragment_assign_parent_node(parentNode, fragment); + dom_reconcile_ns_list(doc, newchild, last); + } + + xmlFree(fragment); +} + #endif diff --git a/ext/dom/php_dom.h b/ext/dom/php_dom.h index 924d1397ca73a..ac23d1fc25bb5 100644 --- a/ext/dom/php_dom.h +++ b/ext/dom/php_dom.h @@ -132,6 +132,7 @@ void dom_parent_node_append(dom_object *context, zval *nodes, int nodesc); void dom_parent_node_after(dom_object *context, zval *nodes, int nodesc); void dom_parent_node_before(dom_object *context, zval *nodes, int nodesc); void dom_child_node_remove(dom_object *context); +void dom_child_replace_with(dom_object *context, zval *nodes, int nodesc); #define DOM_GET_OBJ(__ptr, __id, __prtype, __intern) { \ __intern = Z_DOMOBJ_P(__id); \ diff --git a/ext/dom/tests/bug81642.phpt b/ext/dom/tests/bug81642.phpt new file mode 100644 index 0000000000000..7bf3dde50588e --- /dev/null +++ b/ext/dom/tests/bug81642.phpt @@ -0,0 +1,49 @@ +--TEST-- +Bug #81642 (DOMChildNode::replaceWith() bug when replacing a node with itself) +--EXTENSIONS-- +dom +--FILE-- +appendChild($target = $doc->createElement('test')); +$target->replaceWith($target); +var_dump($doc->saveXML()); + +// Replace with itself + another element +$doc = new DOMDocument(); +$doc->appendChild($target = $doc->createElement('test')); +$target->replaceWith($target, $doc->createElement('foo')); +var_dump($doc->saveXML()); + +// Replace with text node +$doc = new DOMDocument(); +$doc->appendChild($target = $doc->createElement('test')); +$target->replaceWith($target, 'foo'); +var_dump($doc->saveXML()); + +// Replace with text node variant 2 +$doc = new DOMDocument(); +$doc->appendChild($target = $doc->createElement('test')); +$target->replaceWith('bar', $target, 'foo'); +var_dump($doc->saveXML()); + +?> +--EXPECT-- +string(30) " + +" +string(37) " + + +" +string(34) " + +foo +" +string(38) " +bar + +foo +" From 0e34ac864a20bd03a35741db09f0bdf72ae56874 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Sun, 4 Jun 2023 15:13:37 +0200 Subject: [PATCH 119/168] Fix bug #77686: Removed elements are still returned by getElementById From the moment an ID is created, libxml2's behaviour is to cache that element, even if that element is not yet attached to the document. Similarly, only upon destruction of the element the ID is actually removed by libxml2. Since libxml2 has such behaviour deeply ingrained in the library, and uses the cache for various purposes, it seems like a bad idea and lost cause to fight it. Instead, we'll simply walk the tree upwards to check if the node is attached to the document. Closes GH-11369. --- NEWS | 2 ++ ext/dom/document.c | 21 ++++++++++++++++++- ext/dom/tests/bug77686.phpt | 40 +++++++++++++++++++++++++++++++++++++ 3 files changed, 62 insertions(+), 1 deletion(-) create mode 100644 ext/dom/tests/bug77686.phpt diff --git a/NEWS b/NEWS index 39605d0d3c4c0..122e4a48b86bd 100644 --- a/NEWS +++ b/NEWS @@ -25,6 +25,8 @@ PHP NEWS namespaces). (nielsdos) . Fixed bug #81642 (DOMChildNode::replaceWith() bug when replacing a node with itself). (nielsdos) + . Fixed bug #77686 (Removed elements are still returned by getElementById). + (nielsdos) - Opcache: . Fix allocation loop in zend_shared_alloc_startup(). (nielsdos) diff --git a/ext/dom/document.c b/ext/dom/document.c index c60198a3be110..93091df83a04f 100644 --- a/ext/dom/document.c +++ b/ext/dom/document.c @@ -1008,6 +1008,19 @@ PHP_METHOD(DOMDocument, getElementsByTagNameNS) } /* }}} end dom_document_get_elements_by_tag_name_ns */ +static bool php_dom_is_node_attached(const xmlNode *node) +{ + ZEND_ASSERT(node != NULL); + node = node->parent; + while (node != NULL) { + if (node->type == XML_DOCUMENT_NODE || node->type == XML_HTML_DOCUMENT_NODE) { + return true; + } + node = node->parent; + } + return false; +} + /* {{{ URL: http://www.w3.org/TR/2003/WD-DOM-Level-3-Core-20030226/DOM3-Core.html#core-ID-getElBId Since: DOM Level 2 */ @@ -1030,7 +1043,13 @@ PHP_METHOD(DOMDocument, getElementById) attrp = xmlGetID(docp, (xmlChar *) idname); - if (attrp && attrp->parent) { + /* From the moment an ID is created, libxml2's behaviour is to cache that element, even + * if that element is not yet attached to the document. Similarly, only upon destruction of + * the element the ID is actually removed by libxml2. Since libxml2 has such behaviour deeply + * ingrained in the library, and uses the cache for various purposes, it seems like a bad + * idea and lost cause to fight it. Instead, we'll simply walk the tree upwards to check + * if the node is attached to the document. */ + if (attrp && attrp->parent && php_dom_is_node_attached(attrp->parent)) { DOM_RET_OBJ((xmlNodePtr) attrp->parent, &ret, intern); } else { RETVAL_NULL(); diff --git a/ext/dom/tests/bug77686.phpt b/ext/dom/tests/bug77686.phpt new file mode 100644 index 0000000000000..ddd7c3364786c --- /dev/null +++ b/ext/dom/tests/bug77686.phpt @@ -0,0 +1,40 @@ +--TEST-- +Bug #77686 (Removed elements are still returned by getElementById) +--EXTENSIONS-- +dom +--FILE-- +loadHTML('before
hello
after'); +$body = $doc->getElementById('x'); +$div = $doc->getElementById('y'); +var_dump($doc->getElementById('y')->textContent); + +// Detached from document, should not find it anymore +$body->removeChild($div); +var_dump($doc->getElementById('y')); + +// Added again, should find it +$body->appendChild($div); +var_dump($doc->getElementById('y')->textContent); + +// Should find root element without a problem +var_dump($doc->getElementById('htmlelement')->textContent); + +// Created element but not yet attached, should not find it before it is added +$new_element = $doc->createElement('p'); +$new_element->textContent = 'my new text'; +$new_element->setAttribute('id', 'myp'); +var_dump($doc->getElementById('myp')); +$body->appendChild($new_element); +var_dump($doc->getElementById('myp')->textContent); + +?> +--EXPECT-- +string(5) "hello" +NULL +string(5) "hello" +string(16) "beforeafterhello" +NULL +string(11) "my new text" From a7202682148118b0d60be493e3adee97d8187adb Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Mon, 5 Jun 2023 14:57:41 +0200 Subject: [PATCH 120/168] Use uint32_t for the number of nodes (#11371) --- UPGRADING.INTERNALS | 2 ++ ext/dom/characterdata.c | 6 +++--- ext/dom/document.c | 4 ++-- ext/dom/documentfragment.c | 4 ++-- ext/dom/element.c | 10 +++++----- ext/dom/parentnode.c | 23 +++++++++++------------ ext/dom/php_dom.h | 10 +++++----- 7 files changed, 30 insertions(+), 29 deletions(-) diff --git a/UPGRADING.INTERNALS b/UPGRADING.INTERNALS index 6db2d99ec59b9..b1a9188748fad 100644 --- a/UPGRADING.INTERNALS +++ b/UPGRADING.INTERNALS @@ -134,6 +134,8 @@ PHP 8.3 INTERNALS UPGRADE NOTES the base node of the node list. This function also no longer accepts -1 as the index argument. - The function dom_namednode_iter() has additional arguments to avoid recomputing the length of the strings. + - The functions dom_parent_node_prepend(), dom_parent_node_append(), dom_parent_node_after(), and + dom_parent_node_before() now use an uint32_t argument for the number of nodes instead of int. g. ext/libxml - Two new functions: php_libxml_invalidate_node_list_cache_from_doc() and diff --git a/ext/dom/characterdata.c b/ext/dom/characterdata.c index 85660a7b3549f..2ff65a314d3e6 100644 --- a/ext/dom/characterdata.c +++ b/ext/dom/characterdata.c @@ -364,7 +364,7 @@ PHP_METHOD(DOMCharacterData, remove) PHP_METHOD(DOMCharacterData, after) { - int argc; + uint32_t argc; zval *args, *id; dom_object *intern; xmlNode *context; @@ -381,7 +381,7 @@ PHP_METHOD(DOMCharacterData, after) PHP_METHOD(DOMCharacterData, before) { - int argc; + uint32_t argc; zval *args, *id; dom_object *intern; xmlNode *context; @@ -398,7 +398,7 @@ PHP_METHOD(DOMCharacterData, before) PHP_METHOD(DOMCharacterData, replaceWith) { - int argc; + uint32_t argc; zval *args, *id; dom_object *intern; xmlNode *context; diff --git a/ext/dom/document.c b/ext/dom/document.c index 7dd1e7f38ac80..0660fa779e537 100644 --- a/ext/dom/document.c +++ b/ext/dom/document.c @@ -2104,7 +2104,7 @@ Since: DOM Living Standard (DOM4) */ PHP_METHOD(DOMDocument, append) { - int argc; + uint32_t argc; zval *args, *id; dom_object *intern; xmlNode *context; @@ -2125,7 +2125,7 @@ Since: DOM Living Standard (DOM4) */ PHP_METHOD(DOMDocument, prepend) { - int argc; + uint32_t argc; zval *args, *id; dom_object *intern; xmlNode *context; diff --git a/ext/dom/documentfragment.c b/ext/dom/documentfragment.c index 4e7f76a7de80a..a3394e88d5566 100644 --- a/ext/dom/documentfragment.c +++ b/ext/dom/documentfragment.c @@ -135,7 +135,7 @@ Since: DOM Living Standard (DOM4) */ PHP_METHOD(DOMDocumentFragment, append) { - int argc; + uint32_t argc; zval *args, *id; dom_object *intern; xmlNode *context; @@ -156,7 +156,7 @@ Since: DOM Living Standard (DOM4) */ PHP_METHOD(DOMDocumentFragment, prepend) { - int argc; + uint32_t argc; zval *args, *id; dom_object *intern; xmlNode *context; diff --git a/ext/dom/element.c b/ext/dom/element.c index 9b8fe667707fd..8faf77e8440c9 100644 --- a/ext/dom/element.c +++ b/ext/dom/element.c @@ -1155,7 +1155,7 @@ PHP_METHOD(DOMElement, remove) PHP_METHOD(DOMElement, after) { - int argc; + uint32_t argc; zval *args, *id; dom_object *intern; xmlNode *context; @@ -1172,7 +1172,7 @@ PHP_METHOD(DOMElement, after) PHP_METHOD(DOMElement, before) { - int argc; + uint32_t argc; zval *args, *id; dom_object *intern; xmlNode *context; @@ -1192,7 +1192,7 @@ Since: DOM Living Standard (DOM4) */ PHP_METHOD(DOMElement, append) { - int argc; + uint32_t argc; zval *args, *id; dom_object *intern; xmlNode *context; @@ -1213,7 +1213,7 @@ Since: DOM Living Standard (DOM4) */ PHP_METHOD(DOMElement, prepend) { - int argc; + uint32_t argc; zval *args, *id; dom_object *intern; xmlNode *context; @@ -1234,7 +1234,7 @@ Since: DOM Living Standard (DOM4) */ PHP_METHOD(DOMElement, replaceWith) { - int argc; + uint32_t argc; zval *args, *id; dom_object *intern; xmlNode *context; diff --git a/ext/dom/parentnode.c b/ext/dom/parentnode.c index 555296e2ec358..70a952935cac2 100644 --- a/ext/dom/parentnode.c +++ b/ext/dom/parentnode.c @@ -124,9 +124,9 @@ int dom_parent_node_child_element_count(dom_object *obj, zval *retval) } /* }}} */ -static bool dom_is_node_in_list(const zval *nodes, int nodesc, const xmlNodePtr node_to_find) +static bool dom_is_node_in_list(const zval *nodes, uint32_t nodesc, const xmlNodePtr node_to_find) { - for (int i = 0; i < nodesc; i++) { + for (uint32_t i = 0; i < nodesc; i++) { if (Z_TYPE(nodes[i]) == IS_OBJECT) { const zend_class_entry *ce = Z_OBJCE(nodes[i]); @@ -141,9 +141,8 @@ static bool dom_is_node_in_list(const zval *nodes, int nodesc, const xmlNodePtr return false; } -xmlNode* dom_zvals_to_fragment(php_libxml_ref_obj *document, xmlNode *contextNode, zval *nodes, int nodesc) +xmlNode* dom_zvals_to_fragment(php_libxml_ref_obj *document, xmlNode *contextNode, zval *nodes, uint32_t nodesc) { - int i; xmlDoc *documentNode; xmlNode *fragment; xmlNode *newNode; @@ -170,7 +169,7 @@ xmlNode* dom_zvals_to_fragment(php_libxml_ref_obj *document, xmlNode *contextNod stricterror = dom_get_strict_error(document); - for (i = 0; i < nodesc; i++) { + for (uint32_t i = 0; i < nodesc; i++) { if (Z_TYPE(nodes[i]) == IS_OBJECT) { ce = Z_OBJCE(nodes[i]); @@ -253,9 +252,9 @@ static void dom_fragment_assign_parent_node(xmlNodePtr parentNode, xmlNodePtr fr fragment->last = NULL; } -static zend_result dom_hierarchy_node_list(xmlNodePtr parentNode, zval *nodes, int nodesc) +static zend_result dom_hierarchy_node_list(xmlNodePtr parentNode, zval *nodes, uint32_t nodesc) { - for (int i = 0; i < nodesc; i++) { + for (uint32_t i = 0; i < nodesc; i++) { if (Z_TYPE(nodes[i]) == IS_OBJECT) { const zend_class_entry *ce = Z_OBJCE(nodes[i]); @@ -270,7 +269,7 @@ static zend_result dom_hierarchy_node_list(xmlNodePtr parentNode, zval *nodes, i return SUCCESS; } -void dom_parent_node_append(dom_object *context, zval *nodes, int nodesc) +void dom_parent_node_append(dom_object *context, zval *nodes, uint32_t nodesc) { xmlNode *parentNode = dom_object_get_node(context); xmlNodePtr newchild, prevsib; @@ -311,7 +310,7 @@ void dom_parent_node_append(dom_object *context, zval *nodes, int nodesc) xmlFree(fragment); } -void dom_parent_node_prepend(dom_object *context, zval *nodes, int nodesc) +void dom_parent_node_prepend(dom_object *context, zval *nodes, uint32_t nodesc) { xmlNode *parentNode = dom_object_get_node(context); @@ -379,7 +378,7 @@ static void dom_pre_insert(xmlNodePtr insertion_point, xmlNodePtr parentNode, xm } } -void dom_parent_node_after(dom_object *context, zval *nodes, int nodesc) +void dom_parent_node_after(dom_object *context, zval *nodes, uint32_t nodesc) { /* Spec link: https://dom.spec.whatwg.org/#dom-childnode-after */ @@ -432,7 +431,7 @@ void dom_parent_node_after(dom_object *context, zval *nodes, int nodesc) xmlFree(fragment); } -void dom_parent_node_before(dom_object *context, zval *nodes, int nodesc) +void dom_parent_node_before(dom_object *context, zval *nodes, uint32_t nodesc) { /* Spec link: https://dom.spec.whatwg.org/#dom-childnode-before */ @@ -544,7 +543,7 @@ void dom_child_node_remove(dom_object *context) php_dom_throw_error(NOT_FOUND_ERR, stricterror); } -void dom_child_replace_with(dom_object *context, zval *nodes, int nodesc) +void dom_child_replace_with(dom_object *context, zval *nodes, uint32_t nodesc) { xmlNodePtr child = dom_object_get_node(context); xmlNodePtr parentNode = child->parent; diff --git a/ext/dom/php_dom.h b/ext/dom/php_dom.h index 9e01a2ca63964..49e23213157d6 100644 --- a/ext/dom/php_dom.h +++ b/ext/dom/php_dom.h @@ -135,12 +135,12 @@ xmlNode *php_dom_libxml_notation_iter(xmlHashTable *ht, int index); zend_object_iterator *php_dom_get_iterator(zend_class_entry *ce, zval *object, int by_ref); void dom_set_doc_classmap(php_libxml_ref_obj *document, zend_class_entry *basece, zend_class_entry *ce); -void dom_parent_node_prepend(dom_object *context, zval *nodes, int nodesc); -void dom_parent_node_append(dom_object *context, zval *nodes, int nodesc); -void dom_parent_node_after(dom_object *context, zval *nodes, int nodesc); -void dom_parent_node_before(dom_object *context, zval *nodes, int nodesc); +void dom_parent_node_prepend(dom_object *context, zval *nodes, uint32_t nodesc); +void dom_parent_node_append(dom_object *context, zval *nodes, uint32_t nodesc); +void dom_parent_node_after(dom_object *context, zval *nodes, uint32_t nodesc); +void dom_parent_node_before(dom_object *context, zval *nodes, uint32_t nodesc); void dom_child_node_remove(dom_object *context); -void dom_child_replace_with(dom_object *context, zval *nodes, int nodesc); +void dom_child_replace_with(dom_object *context, zval *nodes, uint32_t nodesc); #define DOM_GET_OBJ(__ptr, __id, __prtype, __intern) { \ __intern = Z_DOMOBJ_P(__id); \ From 9ce6980b4d93d539341807087abef9217cb81351 Mon Sep 17 00:00:00 2001 From: George Peter Banyard Date: Mon, 5 Jun 2023 13:59:04 +0100 Subject: [PATCH 121/168] Use known zend_string pointer to check for equality instead of C strings (#11370) * Compare __invoke magic method name with known zend_string pointer * Compare __sleep/__wakeup magic method name with known zend_string pointer --- Zend/zend_API.c | 6 +++--- Zend/zend_closures.c | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Zend/zend_API.c b/Zend/zend_API.c index e9058f3e43db9..61f15fec0c883 100644 --- a/Zend/zend_API.c +++ b/Zend/zend_API.c @@ -2678,15 +2678,15 @@ ZEND_API void zend_check_magic_method_implementation(const zend_class_entry *ce, zend_check_magic_method_public(ce, fptr, error_type); zend_check_magic_method_arg_type(0, ce, fptr, error_type, MAY_BE_ARRAY); zend_check_magic_method_return_type(ce, fptr, error_type, MAY_BE_OBJECT); - } else if (zend_string_equals_literal(lcname, "__invoke")) { + } else if (zend_string_equals(lcname, ZSTR_KNOWN(ZEND_STR_MAGIC_INVOKE))) { zend_check_magic_method_non_static(ce, fptr, error_type); zend_check_magic_method_public(ce, fptr, error_type); - } else if (zend_string_equals_literal(lcname, "__sleep")) { + } else if (zend_string_equals(lcname, ZSTR_KNOWN(ZEND_STR_SLEEP))) { zend_check_magic_method_args(0, ce, fptr, error_type); zend_check_magic_method_non_static(ce, fptr, error_type); zend_check_magic_method_public(ce, fptr, error_type); zend_check_magic_method_return_type(ce, fptr, error_type, MAY_BE_ARRAY); - } else if (zend_string_equals_literal(lcname, "__wakeup")) { + } else if (zend_string_equals(lcname, ZSTR_KNOWN(ZEND_STR_WAKEUP))) { zend_check_magic_method_args(0, ce, fptr, error_type); zend_check_magic_method_non_static(ce, fptr, error_type); zend_check_magic_method_public(ce, fptr, error_type); diff --git a/Zend/zend_closures.c b/Zend/zend_closures.c index cec392dce9b21..e6ef6ca1d29f2 100644 --- a/Zend/zend_closures.c +++ b/Zend/zend_closures.c @@ -324,7 +324,7 @@ static zend_result zend_create_closure_from_callable(zval *return_value, zval *c if (mptr->common.fn_flags & ZEND_ACC_CALL_VIA_TRAMPOLINE) { /* For Closure::fromCallable([$closure, "__invoke"]) return $closure. */ if (fcc.object && fcc.object->ce == zend_ce_closure - && zend_string_equals_literal(mptr->common.function_name, "__invoke")) { + && zend_string_equals(mptr->common.function_name, ZSTR_KNOWN(ZEND_STR_MAGIC_INVOKE))) { RETVAL_OBJ_COPY(fcc.object); zend_free_trampoline(mptr); return SUCCESS; @@ -834,7 +834,7 @@ void zend_closure_from_frame(zval *return_value, zend_execute_data *call) { /* { if (mptr->common.fn_flags & ZEND_ACC_CALL_VIA_TRAMPOLINE) { if ((ZEND_CALL_INFO(call) & ZEND_CALL_HAS_THIS) && (Z_OBJCE(call->This) == zend_ce_closure) - && zend_string_equals_literal(mptr->common.function_name, "__invoke")) { + && zend_string_equals(mptr->common.function_name, ZSTR_KNOWN(ZEND_STR_MAGIC_INVOKE))) { zend_free_trampoline(mptr); RETURN_OBJ_COPY(Z_OBJ(call->This)); } From 16a63d7b077f63b8e6139f11f5aca9c702af8e0f Mon Sep 17 00:00:00 2001 From: David CARLIER Date: Sat, 13 May 2023 16:30:25 +0100 Subject: [PATCH 122/168] ext/pgsql: php_pgsql_convert converts E_NOTICE to TypeError/ValueError exceptions. Close GH-11238 --- NEWS | 2 ++ UPGRADING | 2 ++ ext/pgsql/pgsql.c | 33 ++++++++++++++++++--------------- ext/pgsql/tests/bug71998.phpt | 15 ++++++++++++++- ext/pgsql/tests/bug77047.phpt | 15 +++++++++++---- 5 files changed, 47 insertions(+), 20 deletions(-) diff --git a/NEWS b/NEWS index 808f3ed806101..7034346b84783 100644 --- a/NEWS +++ b/NEWS @@ -141,6 +141,8 @@ PHP NEWS . pg_trace new PGSQL_TRACE_SUPPRESS_TIMESTAMPS/PGSQL_TRACE_REGRESS_MODE contants support. (David Carlier) . pg_set_error_verbosity adding PGSQL_ERRORS_STATE constant. (David Carlier) + . pg_convert/pg_insert E_WARNING on type errors had been converted to + ValueError/TypeError exceptions. (David Carlier) - Phar: . Fix memory leak in phar_rename_archive(). (stkeke) diff --git a/UPGRADING b/UPGRADING index 79f86cb148d6f..53ac565428e79 100644 --- a/UPGRADING +++ b/UPGRADING @@ -146,6 +146,8 @@ PHP 8.3 UPGRADE NOTES . pg_fetch_object now raises a ValueError instead of an Exception when the constructor_args argument is non empty with the class not having constructor. . pg_insert now raises a ValueError instead of a WARNING when the table specified is invalid. + . pg_insert and pg_convert raises a ValueError or a TypeError instead of a WARNING when the + value/type of a field does not match properly with a PostGreSQL's type. - Standard: . E_NOTICEs emitted by unserialize() have been promoted to E_WARNING. diff --git a/ext/pgsql/pgsql.c b/ext/pgsql/pgsql.c index 14488ca6157db..d7215f3a9ba12 100644 --- a/ext/pgsql/pgsql.c +++ b/ext/pgsql/pgsql.c @@ -4529,7 +4529,6 @@ PHP_PGSQL_API zend_result php_pgsql_convert(PGconn *pg_link, const zend_string * data_type = php_pgsql_get_data_type(Z_STR_P(type)); } - /* TODO: Should E_NOTICE be converted to type error if PHP type cannot be converted to field type? */ switch(data_type) { case PG_BOOL: @@ -4554,7 +4553,7 @@ PHP_PGSQL_API zend_result php_pgsql_convert(PGconn *pg_link, const zend_string * ZVAL_STRINGL(&new_val, "'f'", sizeof("'f'")-1); } else { - php_error_docref(NULL, E_NOTICE, "Detected invalid value (%s) for PostgreSQL %s field (%s)", Z_STRVAL_P(val), Z_STRVAL_P(type), ZSTR_VAL(field)); + zend_value_error("%s(): Field \"%s\" must be of type bool, invalid PostgreSQL string boolean value \"%s\" given", get_active_function_name(), ZSTR_VAL(field), Z_STRVAL_P(val)); err = 1; } } @@ -4586,7 +4585,7 @@ PHP_PGSQL_API zend_result php_pgsql_convert(PGconn *pg_link, const zend_string * } PGSQL_CONV_CHECK_IGNORE(); if (err) { - php_error_docref(NULL, E_NOTICE, "Expects string, null, long or boolelan value for PostgreSQL '%s' (%s)", Z_STRVAL_P(type), ZSTR_VAL(field)); + zend_type_error("%s(): Field \"%s\" must be of type string|null|int|bool, %s given", get_active_function_name(), ZSTR_VAL(field), Z_STRVAL_P(type)); } break; @@ -4630,7 +4629,7 @@ PHP_PGSQL_API zend_result php_pgsql_convert(PGconn *pg_link, const zend_string * } PGSQL_CONV_CHECK_IGNORE(); if (err) { - php_error_docref(NULL, E_NOTICE, "Expects NULL, string, long or double value for pgsql '%s' (%s)", Z_STRVAL_P(type), ZSTR_VAL(field)); + zend_type_error("%s(): Field \"%s\" must be of type int|null, %s given", get_active_function_name(), ZSTR_VAL(field), Z_STRVAL_P(type)); } break; @@ -4679,7 +4678,7 @@ PHP_PGSQL_API zend_result php_pgsql_convert(PGconn *pg_link, const zend_string * } PGSQL_CONV_CHECK_IGNORE(); if (err) { - php_error_docref(NULL, E_NOTICE, "Expects NULL, string, long or double value for PostgreSQL '%s' (%s)", Z_STRVAL_P(type), ZSTR_VAL(field)); + zend_type_error("%s(): Field \"%s\" must be of type %s|int|null, %s given", get_active_function_name(), (data_type == PG_MONEY ? "money" : "float"), ZSTR_VAL(field), Z_STRVAL_P(type)); } break; @@ -4740,7 +4739,7 @@ PHP_PGSQL_API zend_result php_pgsql_convert(PGconn *pg_link, const zend_string * } PGSQL_CONV_CHECK_IGNORE(); if (err) { - php_error_docref(NULL, E_NOTICE, "Expects NULL, string, long or double value for PostgreSQL '%s' (%s)", Z_STRVAL_P(type), ZSTR_VAL(field)); + zend_type_error("%s(): Field \"%s\" must be of type string|null, %s given", get_active_function_name(), ZSTR_VAL(field), Z_STRVAL_P(type)); } break; @@ -4782,7 +4781,7 @@ PHP_PGSQL_API zend_result php_pgsql_convert(PGconn *pg_link, const zend_string * } PGSQL_CONV_CHECK_IGNORE(); if (err) { - php_error_docref(NULL, E_NOTICE, "Expects NULL, string, long or double value for '%s' (%s)", Z_STRVAL_P(type), ZSTR_VAL(field)); + zend_type_error("%s(): Field \"%s\" must be of type int|null, %s given", get_active_function_name(), ZSTR_VAL(field), zend_zval_value_name(val)); } break; @@ -4801,7 +4800,7 @@ PHP_PGSQL_API zend_result php_pgsql_convert(PGconn *pg_link, const zend_string * at all though and let the server side to handle it.*/ if (php_pgsql_convert_match(Z_STR_P(val), REGEX0, sizeof(REGEX0)-1, 0) == FAILURE && php_pgsql_convert_match(Z_STR_P(val), REGEX1, sizeof(REGEX1)-1, 0) == FAILURE) { - err = 1; + err = 2; } else { ZVAL_STR(&new_val, php_pgsql_add_quotes(Z_STR_P(val))); @@ -4820,7 +4819,11 @@ PHP_PGSQL_API zend_result php_pgsql_convert(PGconn *pg_link, const zend_string * } PGSQL_CONV_CHECK_IGNORE(); if (err) { - php_error_docref(NULL, E_NOTICE, "Expects NULL or IPv4 or IPv6 address string for '%s' (%s)", Z_STRVAL_P(type), ZSTR_VAL(field)); + if (err == 2) { + zend_value_error("%s(): Field \"%s\" must be a valid IPv4 or IPv6 address string, \"%s\" given", get_active_function_name(), ZSTR_VAL(field), Z_STRVAL_P(val)); + } else { + zend_type_error("%s(): Field \"%s\" must be of type string|null, given %s", get_active_function_name(), ZSTR_VAL(field), zend_zval_value_name(val)); + } } break; @@ -4854,7 +4857,7 @@ PHP_PGSQL_API zend_result php_pgsql_convert(PGconn *pg_link, const zend_string * } PGSQL_CONV_CHECK_IGNORE(); if (err) { - php_error_docref(NULL, E_NOTICE, "Expects NULL or string for PostgreSQL %s field (%s)", Z_STRVAL_P(type), ZSTR_VAL(field)); + zend_type_error("%s(): Field \"%s\" must be of type string|null, %s given", get_active_function_name(), ZSTR_VAL(field), Z_STRVAL_P(type)); } break; @@ -4886,7 +4889,7 @@ PHP_PGSQL_API zend_result php_pgsql_convert(PGconn *pg_link, const zend_string * } PGSQL_CONV_CHECK_IGNORE(); if (err) { - php_error_docref(NULL, E_NOTICE, "Expects NULL or string for PostgreSQL %s field (%s)", Z_STRVAL_P(type), ZSTR_VAL(field)); + zend_type_error("%s(): Field \"%s\" must be of type string|null, %s given", get_active_function_name(), ZSTR_VAL(field), Z_STRVAL_P(type)); } break; @@ -4918,7 +4921,7 @@ PHP_PGSQL_API zend_result php_pgsql_convert(PGconn *pg_link, const zend_string * } PGSQL_CONV_CHECK_IGNORE(); if (err) { - php_error_docref(NULL, E_NOTICE, "Expects NULL or string for PostgreSQL %s field (%s)", Z_STRVAL_P(type), ZSTR_VAL(field)); + zend_type_error("%s(): Field \"%s\" must be of type string|null, %s given", get_active_function_name(), ZSTR_VAL(field), Z_STRVAL_P(type)); } break; @@ -4996,7 +4999,7 @@ PHP_PGSQL_API zend_result php_pgsql_convert(PGconn *pg_link, const zend_string * } PGSQL_CONV_CHECK_IGNORE(); if (err) { - php_error_docref(NULL, E_NOTICE, "Expects NULL or string for PostgreSQL %s field (%s)", Z_STRVAL_P(type), ZSTR_VAL(field)); + zend_type_error("%s(): Field \"%s\" must be of type string|null, %s given", get_active_function_name(), ZSTR_VAL(field), Z_STRVAL_P(type)); } break; case PG_BYTEA: @@ -5037,7 +5040,7 @@ PHP_PGSQL_API zend_result php_pgsql_convert(PGconn *pg_link, const zend_string * } PGSQL_CONV_CHECK_IGNORE(); if (err) { - php_error_docref(NULL, E_NOTICE, "Expects NULL, string, long or double value for PostgreSQL '%s' (%s)", Z_STRVAL_P(type), ZSTR_VAL(field)); + zend_type_error("%s(): Field \"%s\" must be of type string|null, %s given", get_active_function_name(), ZSTR_VAL(field), Z_STRVAL_P(type)); } break; @@ -5068,7 +5071,7 @@ PHP_PGSQL_API zend_result php_pgsql_convert(PGconn *pg_link, const zend_string * } PGSQL_CONV_CHECK_IGNORE(); if (err) { - php_error_docref(NULL, E_NOTICE, "Expects NULL or string for PostgreSQL %s field (%s)", Z_STRVAL_P(type), ZSTR_VAL(field)); + zend_type_error("%s(): Field \"%s\" must be of type string|null, %s given", get_active_function_name(), ZSTR_VAL(field), Z_STRVAL_P(type)); } break; diff --git a/ext/pgsql/tests/bug71998.phpt b/ext/pgsql/tests/bug71998.phpt index 854c4e4227436..96fc58ff8272e 100644 --- a/ext/pgsql/tests/bug71998.phpt +++ b/ext/pgsql/tests/bug71998.phpt @@ -57,7 +57,13 @@ $i = 0; $errors = 0; foreach ($ips as $ip) { $data = array("id" => ++$i, "remote_addr" => $ip); - $r = @pg_insert($db, 'tmp_statistics', $data); + $r = true; + try { + @pg_insert($db, 'tmp_statistics', $data); + } catch (\ValueError $e) { + echo $e->getMessage() . PHP_EOL; + $r = false; + } if (!$r && in_array($ip, $bad)) { $errors++; @@ -79,6 +85,13 @@ pg_close($db); ?> --EXPECT-- +pg_insert(): Field "remote_addr" must be a valid IPv4 or IPv6 address string, "256.257.258.259" given +pg_insert(): Field "remote_addr" must be a valid IPv4 or IPv6 address string, "fe08::7:8interface" given +pg_insert(): Field "remote_addr" must be a valid IPv4 or IPv6 address string, "schnitzel" given +pg_insert(): Field "remote_addr" must be a valid IPv4 or IPv6 address string, "10002.3.4" given +pg_insert(): Field "remote_addr" must be a valid IPv4 or IPv6 address string, "1.2.3.4.5" given +pg_insert(): Field "remote_addr" must be a valid IPv4 or IPv6 address string, "256.0.0.0" given +pg_insert(): Field "remote_addr" must be a valid IPv4 or IPv6 address string, "260.0.0.0" given array(2) { [0]=> string(1) "1" diff --git a/ext/pgsql/tests/bug77047.phpt b/ext/pgsql/tests/bug77047.phpt index 50a4d61e7b520..8995050677b8d 100644 --- a/ext/pgsql/tests/bug77047.phpt +++ b/ext/pgsql/tests/bug77047.phpt @@ -19,10 +19,18 @@ pg_query($db, "CREATE TABLE bug77047 ( t TIME WITHOUT TIME ZONE )"); -pg_insert($db, "bug77047", array("t" => "13:31")); +try { + pg_insert($db, "bug77047", array("t" => "13:31")); +} catch (\TypeError $e) { + echo $e->getMessage(); +} pg_insert($db, "bug77047", array("t" => "13:31:13")); pg_insert($db, "bug77047", array("t" => "1:2:3")); -pg_insert($db, "bug77047", array("t" => "xyz")); +try { + pg_insert($db, "bug77047", array("t" => "xyz")); +} catch (\TypeError $e) { + echo $e->getMessage() . PHP_EOL; +} pg_insert($db, "bug77047", array("t" => NULL)); pg_insert($db, "bug77047", array("t" => "")); @@ -33,10 +41,9 @@ while (false !== ($row = pg_fetch_row($res))) { ?> --EXPECTF-- -Notice: pg_insert(): Expects NULL or string for PostgreSQL time field (t) in %s on line %d +pg_insert(): Field "t" must be of type string|null, time given string(8) "13:31:00" string(8) "13:31:13" string(8) "01:02:03" NULL NULL - From 61e1f8aaebdd0f609ae6be5453d0bbab001cef12 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Sat, 3 Jun 2023 20:15:23 +0200 Subject: [PATCH 123/168] Let closure created from magic method accept named parameters Implements GH-11348. Closes GH-11364. --- NEWS | 2 + UPGRADING | 1 + .../trampoline_closure_named_arguments.phpt | 112 ++++++++++++++++++ Zend/zend_closures.c | 15 ++- 4 files changed, 128 insertions(+), 2 deletions(-) create mode 100644 Zend/tests/trampoline_closure_named_arguments.phpt diff --git a/NEWS b/NEWS index 7034346b84783..57b4b57bd26dc 100644 --- a/NEWS +++ b/NEWS @@ -40,6 +40,8 @@ PHP NEWS . Fix bug #79836 (Segfault in concat_function). (nielsdos) . Fix bug #81705 (type confusion/UAF on set_error_handler with concat operation). (nielsdos) + . Fix GH-11348 (Closure created from magic method does not accept named + arguments). (nielsdos) - Date: . Implement More Appropriate Date/Time Exceptions RFC. (Derick) diff --git a/UPGRADING b/UPGRADING index 53ac565428e79..5c81ef8b325c6 100644 --- a/UPGRADING +++ b/UPGRADING @@ -58,6 +58,7 @@ PHP 8.3 UPGRADE NOTES RFC: https://wiki.php.net/rfc/readonly_amendments . Class, interface, trait, and enum constants now support type declarations. RFC: https://wiki.php.net/rfc/typed_class_constants + . Closures created from magic methods can now accept named arguments. - Posix . posix_getrlimit() now takes an optional $res parameter to allow fetching a diff --git a/Zend/tests/trampoline_closure_named_arguments.phpt b/Zend/tests/trampoline_closure_named_arguments.phpt new file mode 100644 index 0000000000000..e209853e509cb --- /dev/null +++ b/Zend/tests/trampoline_closure_named_arguments.phpt @@ -0,0 +1,112 @@ +--TEST-- +Trampoline closure created from magic method accepts named arguments +--FILE-- +test(1, 2, a: 123); +$test->test(...)(1, 2); +$test->test(...)(1, 2, a: 123, b: $test); +$test->test(...)(a: 123, b: $test); +$test->test(...)(); + +echo "-- Static cases --\n"; +Test::testStatic(1, 2, a: 123); +Test::testStatic(...)(1, 2); +Test::testStatic(...)(1, 2, a: 123, b: $test); +Test::testStatic(...)(a: 123, b: $test); +Test::testStatic(...)(); + +?> +--EXPECT-- +-- Non-static cases -- +string(4) "test" +array(3) { + [0]=> + int(1) + [1]=> + int(2) + ["a"]=> + int(123) +} +string(4) "test" +array(2) { + [0]=> + int(1) + [1]=> + int(2) +} +string(4) "test" +array(4) { + [0]=> + int(1) + [1]=> + int(2) + ["a"]=> + int(123) + ["b"]=> + object(Test)#1 (0) { + } +} +string(4) "test" +array(2) { + ["a"]=> + int(123) + ["b"]=> + object(Test)#1 (0) { + } +} +string(4) "test" +array(0) { +} +-- Static cases -- +string(10) "testStatic" +array(3) { + [0]=> + int(1) + [1]=> + int(2) + ["a"]=> + int(123) +} +string(10) "testStatic" +array(2) { + [0]=> + int(1) + [1]=> + int(2) +} +string(10) "testStatic" +array(4) { + [0]=> + int(1) + [1]=> + int(2) + ["a"]=> + int(123) + ["b"]=> + object(Test)#1 (0) { + } +} +string(10) "testStatic" +array(2) { + ["a"]=> + int(123) + ["b"]=> + object(Test)#1 (0) { + } +} +string(10) "testStatic" +array(0) { +} diff --git a/Zend/zend_closures.c b/Zend/zend_closures.c index e6ef6ca1d29f2..69eeb3cf1ceef 100644 --- a/Zend/zend_closures.c +++ b/Zend/zend_closures.c @@ -294,7 +294,18 @@ static ZEND_NAMED_FUNCTION(zend_closure_call_magic) /* {{{ */ { fci.params = params; fci.param_count = 2; ZVAL_STR(&fci.params[0], EX(func)->common.function_name); - if (ZEND_NUM_ARGS()) { + if (EX_CALL_INFO() & ZEND_CALL_HAS_EXTRA_NAMED_PARAMS) { + zend_string *name; + zval *named_param_zval; + array_init_size(&fci.params[1], ZEND_NUM_ARGS() + zend_hash_num_elements(EX(extra_named_params))); + /* Avoid conversion from packed to mixed later. */ + zend_hash_real_init_mixed(Z_ARRVAL(fci.params[1])); + zend_copy_parameters_array(ZEND_NUM_ARGS(), &fci.params[1]); + ZEND_HASH_MAP_FOREACH_STR_KEY_VAL(EX(extra_named_params), name, named_param_zval) { + Z_TRY_ADDREF_P(named_param_zval); + zend_hash_add_new(Z_ARRVAL(fci.params[1]), name, named_param_zval); + } ZEND_HASH_FOREACH_END(); + } else if (ZEND_NUM_ARGS()) { array_init_size(&fci.params[1], ZEND_NUM_ARGS()); zend_copy_parameters_array(ZEND_NUM_ARGS(), &fci.params[1]); } else { @@ -841,7 +852,7 @@ void zend_closure_from_frame(zval *return_value, zend_execute_data *call) { /* { memset(&trampoline, 0, sizeof(zend_internal_function)); trampoline.type = ZEND_INTERNAL_FUNCTION; - trampoline.fn_flags = mptr->common.fn_flags & ZEND_ACC_STATIC; + trampoline.fn_flags = mptr->common.fn_flags & (ZEND_ACC_STATIC | ZEND_ACC_VARIADIC); trampoline.handler = zend_closure_call_magic; trampoline.function_name = mptr->common.function_name; trampoline.scope = mptr->common.scope; From 50fdad83256989fadcb92e584ef77ef80d781465 Mon Sep 17 00:00:00 2001 From: Tim Starling Date: Tue, 20 Dec 2022 10:52:06 +1100 Subject: [PATCH 124/168] Set DOMAttr::$value without expanding entities The manual refers to the DOM Level 3 Core spec which says: "On setting, this creates a Text node with the unparsed contents of the string. I.e. any characters that an XML processor would recognize as markup are instead treated as literal text." PHP is expanding entities when DOMAttr::value is set, which is non-compliant and is a difference in behaviour compared to browser DOM implementations. So, when value is set, remove all children of the attribute node. Then create a single text node and insert that as the only child of the attribute. Add tests. --- ext/dom/attr.c | 11 +++++++- ext/dom/tests/DOMAttr_entity_expansion.phpt | 30 +++++++++++++++++++++ 2 files changed, 40 insertions(+), 1 deletion(-) create mode 100644 ext/dom/tests/DOMAttr_entity_expansion.phpt diff --git a/ext/dom/attr.c b/ext/dom/attr.c index a262aea821395..8d9d5b57f9973 100644 --- a/ext/dom/attr.c +++ b/ext/dom/attr.c @@ -136,6 +136,7 @@ int dom_attr_value_write(dom_object *obj, zval *newval) { zend_string *str; xmlAttrPtr attrp = (xmlAttrPtr) dom_object_get_node(obj); + xmlNodePtr node, next; if (attrp == NULL) { php_dom_throw_error(INVALID_STATE_ERR, 1); @@ -149,9 +150,17 @@ int dom_attr_value_write(dom_object *obj, zval *newval) if (attrp->children) { node_list_unlink(attrp->children); + node = attrp->children; + while (node) { + next = node->next; + xmlUnlinkNode(node); + xmlFreeNode(node); + node = next; + } } - xmlNodeSetContentLen((xmlNodePtr) attrp, (xmlChar *) ZSTR_VAL(str), ZSTR_LEN(str) + 1); + node = xmlNewTextLen((xmlChar *) ZSTR_VAL(str), ZSTR_LEN(str)); + xmlAddChild((xmlNodePtr) attrp, node); zend_string_release_ex(str, 0); return SUCCESS; diff --git a/ext/dom/tests/DOMAttr_entity_expansion.phpt b/ext/dom/tests/DOMAttr_entity_expansion.phpt new file mode 100644 index 0000000000000..d8745cb9f460f --- /dev/null +++ b/ext/dom/tests/DOMAttr_entity_expansion.phpt @@ -0,0 +1,30 @@ +--TEST-- +DOMAttr entity expansion +--FILE-- +createElement('elt'); +$doc->appendChild($elt); +$elt->setAttribute('a','&'); +print $doc->saveXML($elt) . "\n"; + +$attr = $elt->getAttributeNode('a'); +$attr->value = '&'; +print $doc->saveXML($elt) . "\n"; + +$attr->removeChild($attr->firstChild); +print $doc->saveXML($elt) . "\n"; + +$elt->setAttributeNS('http://www.w3.org/2000/svg', 'svg:id','&'); +print $doc->saveXML($elt) . "\n"; + +$attr = $elt->getAttributeNodeNS('http://www.w3.org/2000/svg', 'id'); +$attr->value = '<&'; +print $doc->saveXML($elt) . "\n"; + +--EXPECT-- + + + + + From 74910b14037f2a31e38ad5f1eba8366e6cc407f9 Mon Sep 17 00:00:00 2001 From: Tim Starling Date: Thu, 22 Dec 2022 11:15:43 +1100 Subject: [PATCH 125/168] Factor out dom_remove_all_children() A few callers remove all children of a node. The way it was done in node.c was unsafe, because it left nodep->last dangling. It just happens to not crash if xmlNodeSetContent() is called immediately afterwards. --- ext/dom/attr.c | 15 ++------------- ext/dom/node.c | 13 ++----------- ext/dom/php_dom.c | 10 ++++++++++ ext/dom/php_dom.h | 2 ++ 4 files changed, 16 insertions(+), 24 deletions(-) diff --git a/ext/dom/attr.c b/ext/dom/attr.c index 8d9d5b57f9973..417f92a25c364 100644 --- a/ext/dom/attr.c +++ b/ext/dom/attr.c @@ -136,7 +136,6 @@ int dom_attr_value_write(dom_object *obj, zval *newval) { zend_string *str; xmlAttrPtr attrp = (xmlAttrPtr) dom_object_get_node(obj); - xmlNodePtr node, next; if (attrp == NULL) { php_dom_throw_error(INVALID_STATE_ERR, 1); @@ -148,18 +147,8 @@ int dom_attr_value_write(dom_object *obj, zval *newval) return FAILURE; } - if (attrp->children) { - node_list_unlink(attrp->children); - node = attrp->children; - while (node) { - next = node->next; - xmlUnlinkNode(node); - xmlFreeNode(node); - node = next; - } - } - - node = xmlNewTextLen((xmlChar *) ZSTR_VAL(str), ZSTR_LEN(str)); + dom_remove_all_children((xmlNodePtr) attrp); + xmlNodePtr node = xmlNewTextLen((xmlChar *) ZSTR_VAL(str), ZSTR_LEN(str)); xmlAddChild((xmlNodePtr) attrp, node); zend_string_release_ex(str, 0); diff --git a/ext/dom/node.c b/ext/dom/node.c index d7410fc4c7eb1..0fa4d66cbc0d9 100644 --- a/ext/dom/node.c +++ b/ext/dom/node.c @@ -179,11 +179,7 @@ int dom_node_node_value_write(dom_object *obj, zval *newval) switch (nodep->type) { case XML_ELEMENT_NODE: case XML_ATTRIBUTE_NODE: - if (nodep->children) { - node_list_unlink(nodep->children); - php_libxml_node_free_list((xmlNodePtr) nodep->children); - nodep->children = NULL; - } + dom_remove_all_children(nodep); ZEND_FALLTHROUGH; case XML_TEXT_NODE: case XML_COMMENT_NODE: @@ -783,12 +779,7 @@ int dom_node_text_content_write(dom_object *obj, zval *newval) * For the other cases, we *can* rely on xmlNodeSetContent because it is either a no-op, or handles * the content without encoding. */ if (type == XML_DOCUMENT_FRAG_NODE || type == XML_ELEMENT_NODE || type == XML_ATTRIBUTE_NODE) { - if (nodep->children) { - node_list_unlink(nodep->children); - php_libxml_node_free_list((xmlNodePtr) nodep->children); - nodep->children = NULL; - } - + dom_remove_all_children(nodep); xmlNode *textNode = xmlNewText(xmlChars); xmlAddChild(nodep, textNode); } else { diff --git a/ext/dom/php_dom.c b/ext/dom/php_dom.c index 26528056ad785..6867e5acf128e 100644 --- a/ext/dom/php_dom.c +++ b/ext/dom/php_dom.c @@ -1588,4 +1588,14 @@ static int dom_nodelist_has_dimension(zend_object *object, zval *member, int che } } /* }}} end dom_nodelist_has_dimension */ +void dom_remove_all_children(xmlNodePtr nodep) +{ + if (nodep->children) { + node_list_unlink(nodep->children); + php_libxml_node_free_list((xmlNodePtr) nodep->children); + nodep->children = NULL; + nodep->last = NULL; + } +} + #endif /* HAVE_DOM */ diff --git a/ext/dom/php_dom.h b/ext/dom/php_dom.h index 49e23213157d6..4faeff5c599c2 100644 --- a/ext/dom/php_dom.h +++ b/ext/dom/php_dom.h @@ -142,6 +142,8 @@ void dom_parent_node_before(dom_object *context, zval *nodes, uint32_t nodesc); void dom_child_node_remove(dom_object *context); void dom_child_replace_with(dom_object *context, zval *nodes, uint32_t nodesc); +void dom_remove_all_children(xmlNodePtr nodep); + #define DOM_GET_OBJ(__ptr, __id, __prtype, __intern) { \ __intern = Z_DOMOBJ_P(__id); \ if (__intern->ptr == NULL || !(__ptr = (__prtype)((php_libxml_node_ptr *)__intern->ptr)->node)) { \ From ee68c2212876f7e4a28bac2fc7d0c8c8aa950540 Mon Sep 17 00:00:00 2001 From: Tim Starling Date: Thu, 22 Dec 2022 11:43:20 +1100 Subject: [PATCH 126/168] Don't add 1 when calling xmlNodeSetContent() The length is passed to xmlStrndup(), which also adds 1, and adds a null terminator past the end. It worked because the length is not actually stored. Strings in libxml2 are null terminated. Passing the length just avoids a call to strlen(). --- ext/dom/characterdata.c | 2 +- ext/dom/node.c | 2 +- ext/dom/processinginstruction.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ext/dom/characterdata.c b/ext/dom/characterdata.c index 2ff65a314d3e6..0a0373a5c1d11 100644 --- a/ext/dom/characterdata.c +++ b/ext/dom/characterdata.c @@ -70,7 +70,7 @@ int dom_characterdata_data_write(dom_object *obj, zval *newval) return FAILURE; } - xmlNodeSetContentLen(nodep, (xmlChar *) ZSTR_VAL(str), ZSTR_LEN(str) + 1); + xmlNodeSetContentLen(nodep, (xmlChar *) ZSTR_VAL(str), ZSTR_LEN(str)); zend_string_release_ex(str, 0); return SUCCESS; diff --git a/ext/dom/node.c b/ext/dom/node.c index 0fa4d66cbc0d9..1caaad0ed365c 100644 --- a/ext/dom/node.c +++ b/ext/dom/node.c @@ -185,7 +185,7 @@ int dom_node_node_value_write(dom_object *obj, zval *newval) case XML_COMMENT_NODE: case XML_CDATA_SECTION_NODE: case XML_PI_NODE: - xmlNodeSetContentLen(nodep, (xmlChar *) ZSTR_VAL(str), ZSTR_LEN(str) + 1); + xmlNodeSetContentLen(nodep, (xmlChar *) ZSTR_VAL(str), ZSTR_LEN(str)); break; default: break; diff --git a/ext/dom/processinginstruction.c b/ext/dom/processinginstruction.c index c40d24d18ce23..1f85f91b28150 100644 --- a/ext/dom/processinginstruction.c +++ b/ext/dom/processinginstruction.c @@ -130,7 +130,7 @@ int dom_processinginstruction_data_write(dom_object *obj, zval *newval) php_libxml_invalidate_node_list_cache_from_doc(nodep->doc); - xmlNodeSetContentLen(nodep, (xmlChar *) ZSTR_VAL(str), ZSTR_LEN(str) + 1); + xmlNodeSetContentLen(nodep, (xmlChar *) ZSTR_VAL(str), ZSTR_LEN(str)); zend_string_release_ex(str, 0); return SUCCESS; From 076ddf2b0511947b7a23cf382176505071ecfb5a Mon Sep 17 00:00:00 2001 From: Tim Starling Date: Thu, 22 Dec 2022 12:34:26 +1100 Subject: [PATCH 127/168] Also avoid entity expansion in DOMAttr::$nodeValue --- ext/dom/node.c | 5 ++++- ext/dom/tests/DOMAttr_entity_expansion.phpt | 13 +++++++++++-- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/ext/dom/node.c b/ext/dom/node.c index 1caaad0ed365c..8aad9de12c8e8 100644 --- a/ext/dom/node.c +++ b/ext/dom/node.c @@ -177,8 +177,11 @@ int dom_node_node_value_write(dom_object *obj, zval *newval) /* Access to Element node is implemented as a convenience method */ switch (nodep->type) { - case XML_ELEMENT_NODE: case XML_ATTRIBUTE_NODE: + dom_remove_all_children(nodep); + xmlAddChild(nodep, xmlNewTextLen((xmlChar *) ZSTR_VAL(str), ZSTR_LEN(str))); + break; + case XML_ELEMENT_NODE: dom_remove_all_children(nodep); ZEND_FALLTHROUGH; case XML_TEXT_NODE: diff --git a/ext/dom/tests/DOMAttr_entity_expansion.phpt b/ext/dom/tests/DOMAttr_entity_expansion.phpt index d8745cb9f460f..7c449071bb141 100644 --- a/ext/dom/tests/DOMAttr_entity_expansion.phpt +++ b/ext/dom/tests/DOMAttr_entity_expansion.phpt @@ -15,6 +15,13 @@ print $doc->saveXML($elt) . "\n"; $attr->removeChild($attr->firstChild); print $doc->saveXML($elt) . "\n"; +$attr->nodeValue = '&'; +print $doc->saveXML($elt) . "\n"; + +$attr->nodeValue = '&'; +print $doc->saveXML($elt) . "\n"; + +$elt->removeAttributeNode($attr); $elt->setAttributeNS('http://www.w3.org/2000/svg', 'svg:id','&'); print $doc->saveXML($elt) . "\n"; @@ -26,5 +33,7 @@ print $doc->saveXML($elt) . "\n"; - - + + + + From 0cc028c374b17271bc83d16cbb15550cf19ce0f9 Mon Sep 17 00:00:00 2001 From: Tim Starling Date: Thu, 22 Dec 2022 12:34:51 +1100 Subject: [PATCH 128/168] Changelog notes for DOMAttr value and nodeValue properties --- NEWS | 1 + UPGRADING | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/NEWS b/NEWS index 57b4b57bd26dc..e43e600f59aab 100644 --- a/NEWS +++ b/NEWS @@ -47,6 +47,7 @@ PHP NEWS . Implement More Appropriate Date/Time Exceptions RFC. (Derick) - DOM: + . Fix bug GH-8388 (DOMAttr unescapes character reference). (Tim Starling) . Fix bug GH-11308 (getElementsByTagName() is O(N^2)). (nielsdos) - Exif: diff --git a/UPGRADING b/UPGRADING index 5c81ef8b325c6..643c8d68ab901 100644 --- a/UPGRADING +++ b/UPGRADING @@ -44,6 +44,10 @@ PHP 8.3 UPGRADE NOTES . Static variable initializers can now contain arbitrary expressions. RFC: https://wiki.php.net/rfc/arbitrary_static_variable_initializers +- DOM: + . Assignment to DOMAttr::$value and DOMAttr::$nodeValue no longer expands + entities in the new value. + - FFI: . C functions that have a return type of void now return null instead of returning the following object object(FFI\CData:void) { } From ab774858909b89b8b807b161d547eceb523bce39 Mon Sep 17 00:00:00 2001 From: Tim Starling Date: Mon, 5 Jun 2023 10:03:30 +1000 Subject: [PATCH 129/168] Improve test DOMAttr_entity_expansion.phpt --- ext/dom/tests/DOMAttr_entity_expansion.phpt | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/ext/dom/tests/DOMAttr_entity_expansion.phpt b/ext/dom/tests/DOMAttr_entity_expansion.phpt index 7c449071bb141..e3482d1a9d739 100644 --- a/ext/dom/tests/DOMAttr_entity_expansion.phpt +++ b/ext/dom/tests/DOMAttr_entity_expansion.phpt @@ -1,5 +1,7 @@ --TEST-- DOMAttr entity expansion +--EXTENSIONS-- +dom --FILE-- saveXML($elt) . "\n"; $attr = $elt->getAttributeNode('a'); $attr->value = '&'; +print "$attr->value\n"; print $doc->saveXML($elt) . "\n"; $attr->removeChild($attr->firstChild); print $doc->saveXML($elt) . "\n"; $attr->nodeValue = '&'; +print "$attr->nodeValue\n"; print $doc->saveXML($elt) . "\n"; $attr->nodeValue = '&'; +print "$attr->nodeValue\n"; print $doc->saveXML($elt) . "\n"; $elt->removeAttributeNode($attr); @@ -27,13 +32,23 @@ print $doc->saveXML($elt) . "\n"; $attr = $elt->getAttributeNodeNS('http://www.w3.org/2000/svg', 'id'); $attr->value = '<&'; +print "$attr->value\n"; print $doc->saveXML($elt) . "\n"; +$node = new DOMAttr('foo','bar'); +$node->nodeValue = 'xx1yy'; +print "$node->nodeValue\n"; +?> --EXPECT-- +& +& +& +<& +xx1yy From 99fa740acb24424869b86e8ef1b192e6f6cad629 Mon Sep 17 00:00:00 2001 From: George Peter Banyard Date: Tue, 6 Jun 2023 11:28:19 +0100 Subject: [PATCH 130/168] Use common function for TypeError on illegal offset access (#10544) This merges all usages of emitting an offset TypeError into a new ZEND_API function zend_illegal_container_offset(const zend_string* container, const zval *offset, int type); Where the container should represent the type on which the access is attempted (e.g. string, array) The offset zval that is used, where the error message will display its type The type of access, which should be a BP_VAR_* constant, to get special message for isset/empty/unset --- Zend/tests/036.phpt | 2 +- Zend/tests/038.phpt | 2 +- Zend/tests/assign_dim_obj_null_return.phpt | 4 +- Zend/tests/bug24773.phpt | 2 +- Zend/tests/gh8821.phpt | 2 +- .../illegal_offset_unset_isset_empty.phpt | 2 +- .../tests/init_array_illegal_offset_type.phpt | 2 +- Zend/tests/isset_array.phpt | 2 +- Zend/tests/offset_array.phpt | 2 +- Zend/zend.c | 23 ++++++++++ Zend/zend.h | 2 + Zend/zend_API.c | 12 +---- Zend/zend_execute.c | 32 +++++++------- Zend/zend_vm_def.h | 4 +- Zend/zend_vm_execute.h | 44 +++++++++---------- ext/opcache/jit/zend_jit_helpers.c | 42 ++++++------------ ext/opcache/tests/jit/assign_dim_002.phpt | 2 +- ext/spl/spl_array.c | 33 +++++--------- ext/spl/spl_fixedarray.c | 8 +--- ext/spl/tests/ArrayObject_illegal_offset.phpt | 2 +- ext/spl/tests/fixedarray_001.phpt | 2 +- ext/spl/tests/fixedarray_002.phpt | 2 +- ext/spl/tests/fixedarray_003.phpt | 42 +++++++++--------- .../array/array_key_exists_variation1.phpt | 2 +- ext/standard/tests/array/bug68553.phpt | 2 +- tests/classes/tostring_001.phpt | 2 +- 26 files changed, 130 insertions(+), 146 deletions(-) diff --git a/Zend/tests/036.phpt b/Zend/tests/036.phpt index 4037d3d0e3d21..8958237b0eb8b 100644 --- a/Zend/tests/036.phpt +++ b/Zend/tests/036.phpt @@ -11,4 +11,4 @@ try { ?> --EXPECT-- -Cannot access offset of type object on array +Cannot access offset of type Closure on array diff --git a/Zend/tests/038.phpt b/Zend/tests/038.phpt index 4f822a6f5a154..8b6441e3c25a3 100644 --- a/Zend/tests/038.phpt +++ b/Zend/tests/038.phpt @@ -11,4 +11,4 @@ try { ?> --EXPECT-- -Cannot access offset of type object on array +Cannot access offset of type Closure on array diff --git a/Zend/tests/assign_dim_obj_null_return.phpt b/Zend/tests/assign_dim_obj_null_return.phpt index 02e709818669e..e2b7f20a0c072 100644 --- a/Zend/tests/assign_dim_obj_null_return.phpt +++ b/Zend/tests/assign_dim_obj_null_return.phpt @@ -73,11 +73,11 @@ test(); --EXPECT-- Cannot add element to the array as the next element is already occupied Cannot access offset of type array on array -Cannot access offset of type object on array +Cannot access offset of type stdClass on array Cannot use a scalar value as an array Cannot add element to the array as the next element is already occupied Cannot access offset of type array on array -Cannot access offset of type object on array +Cannot access offset of type stdClass on array Cannot use a scalar value as an array Attempt to assign property "foo" on true Attempt to assign property "foo" on true diff --git a/Zend/tests/bug24773.phpt b/Zend/tests/bug24773.phpt index 4c73fd0dd00f2..f1845fa46a680 100644 --- a/Zend/tests/bug24773.phpt +++ b/Zend/tests/bug24773.phpt @@ -6,7 +6,7 @@ Bug #24773 (unset() of integers treated as arrays causes a crash) unset($array["lvl1"]["lvl2"]["b"]); ?> --EXPECTF-- -Fatal error: Uncaught TypeError: Cannot access offset of type string on string in %s:%d +Fatal error: Uncaught Error: Cannot unset string offsets in %s:%d Stack trace: #0 {main} thrown in %s on line %d diff --git a/Zend/tests/gh8821.phpt b/Zend/tests/gh8821.phpt index e6abf5c1c4f1a..7588239fc83ba 100644 --- a/Zend/tests/gh8821.phpt +++ b/Zend/tests/gh8821.phpt @@ -15,7 +15,7 @@ new Bravo(); ?> --EXPECTF-- -Fatal error: Uncaught TypeError: Cannot access offset of type object on array in %sgh8821.php:8 +Fatal error: Uncaught TypeError: Cannot access offset of type Alpha on array in %sgh8821.php:8 Stack trace: #0 %sgh8821.php(11): [constant expression]() #1 {main} diff --git a/Zend/tests/illegal_offset_unset_isset_empty.phpt b/Zend/tests/illegal_offset_unset_isset_empty.phpt index a09613748281b..ee837f0b61439 100644 --- a/Zend/tests/illegal_offset_unset_isset_empty.phpt +++ b/Zend/tests/illegal_offset_unset_isset_empty.phpt @@ -22,6 +22,6 @@ try { ?> --EXPECT-- -Cannot access offset of type array in unset +Cannot unset offset of type array on array Cannot access offset of type array in isset or empty Cannot access offset of type array in isset or empty diff --git a/Zend/tests/init_array_illegal_offset_type.phpt b/Zend/tests/init_array_illegal_offset_type.phpt index 2e5a0401d6e4a..ee41c0217ad77 100644 --- a/Zend/tests/init_array_illegal_offset_type.phpt +++ b/Zend/tests/init_array_illegal_offset_type.phpt @@ -12,4 +12,4 @@ try { } ?> --EXPECT-- -Cannot access offset of type object on array +Cannot access offset of type stdClass on array diff --git a/Zend/tests/isset_array.phpt b/Zend/tests/isset_array.phpt index 792483294805d..dfa3fdef51dd7 100644 --- a/Zend/tests/isset_array.phpt +++ b/Zend/tests/isset_array.phpt @@ -47,4 +47,4 @@ bool(false) Warning: Resource ID#%d used as offset, casting to integer (%d) in %s on line %d bool(false) Cannot access offset of type array in isset or empty -Cannot access offset of type object in isset or empty +Cannot access offset of type stdClass in isset or empty diff --git a/Zend/tests/offset_array.phpt b/Zend/tests/offset_array.phpt index e44244511fcf1..368ec7a020e14 100644 --- a/Zend/tests/offset_array.phpt +++ b/Zend/tests/offset_array.phpt @@ -48,6 +48,6 @@ int(1) Warning: Resource ID#%d used as offset, casting to integer (%d) in %s on line %d int(%d) -Cannot access offset of type object on array +Cannot access offset of type stdClass on array Cannot access offset of type array on array Done diff --git a/Zend/zend.c b/Zend/zend.c index bbddd4597042b..0e3cfb4381fad 100644 --- a/Zend/zend.c +++ b/Zend/zend.c @@ -1719,6 +1719,29 @@ ZEND_API ZEND_COLD void zend_throw_error(zend_class_entry *exception_ce, const c } /* }}} */ +/* type should be one of the BP_VAR_* constants, only special messages happen for isset/empty and unset */ +ZEND_API ZEND_COLD void zend_illegal_container_offset(const zend_string *container, const zval *offset, int type) +{ + switch (type) { + case BP_VAR_IS: + zend_type_error("Cannot access offset of type %s in isset or empty", + zend_zval_type_name(offset)); + return; + case BP_VAR_UNSET: + /* Consistent error for when trying to unset a string offset */ + if (zend_string_equals(container, ZSTR_KNOWN(ZEND_STR_STRING))) { + zend_throw_error(NULL, "Cannot unset string offsets"); + } else { + zend_type_error("Cannot unset offset of type %s on %s", zend_zval_type_name(offset), ZSTR_VAL(container)); + } + return; + default: + zend_type_error("Cannot access offset of type %s on %s", + zend_zval_type_name(offset), ZSTR_VAL(container)); + return; + } +} + ZEND_API ZEND_COLD void zend_type_error(const char *format, ...) /* {{{ */ { va_list va; diff --git a/Zend/zend.h b/Zend/zend.h index fd21cbfeb93cf..94440530f3b36 100644 --- a/Zend/zend.h +++ b/Zend/zend.h @@ -357,6 +357,8 @@ ZEND_API ZEND_COLD void zend_throw_error(zend_class_entry *exception_ce, const c ZEND_API ZEND_COLD void zend_type_error(const char *format, ...) ZEND_ATTRIBUTE_FORMAT(printf, 1, 2); ZEND_API ZEND_COLD void zend_argument_count_error(const char *format, ...) ZEND_ATTRIBUTE_FORMAT(printf, 1, 2); ZEND_API ZEND_COLD void zend_value_error(const char *format, ...) ZEND_ATTRIBUTE_FORMAT(printf, 1, 2); +/* type should be one of the BP_VAR_* constants, only special messages happen for isset/empty and unset */ +ZEND_API ZEND_COLD void zend_illegal_container_offset(const zend_string *container, const zval *offset, int type); ZEND_COLD void zenderror(const char *error); diff --git a/Zend/zend_API.c b/Zend/zend_API.c index 61f15fec0c883..2bd236a26262e 100644 --- a/Zend/zend_API.c +++ b/Zend/zend_API.c @@ -407,16 +407,6 @@ ZEND_API ZEND_COLD void ZEND_FASTCALL zend_argument_error_variadic(zend_class_en } /* }}} */ -ZEND_API ZEND_COLD void zend_illegal_array_offset(const zval *offset) -{ - zend_type_error("Cannot access offset of type %s on array", zend_get_type_by_const(Z_TYPE_P(offset))); -} - -ZEND_API ZEND_COLD void zend_illegal_empty_or_isset_offset(const zval *offset) -{ - zend_type_error("Cannot access offset of type %s in isset or empty", zend_get_type_by_const(Z_TYPE_P(offset))); -} - ZEND_API ZEND_COLD void zend_argument_error(zend_class_entry *error_ce, uint32_t arg_num, const char *format, ...) /* {{{ */ { va_list va; @@ -2112,7 +2102,7 @@ ZEND_API zend_result array_set_zval_key(HashTable *ht, zval *key, zval *value) / result = zend_hash_index_update(ht, zend_dval_to_lval_safe(Z_DVAL_P(key)), value); break; default: - zend_illegal_array_offset(key); + zend_illegal_container_offset(ZSTR_KNOWN(ZEND_STR_ARRAY), key, BP_VAR_W); result = NULL; } diff --git a/Zend/zend_execute.c b/Zend/zend_execute.c index caaedce98a850..29631504d5f90 100644 --- a/Zend/zend_execute.c +++ b/Zend/zend_execute.c @@ -1506,24 +1506,24 @@ static zend_never_inline ZEND_COLD void ZEND_FASTCALL zend_use_object_as_array(v zend_throw_error(NULL, "Cannot use object as array"); } -static zend_never_inline ZEND_COLD void ZEND_FASTCALL zend_illegal_unset_offset(const zval *offset) +static zend_never_inline ZEND_COLD void ZEND_FASTCALL zend_illegal_array_offset_access(const zval *offset) { - zend_type_error("Cannot access offset of type %s in unset", zend_get_type_by_const(Z_TYPE_P(offset))); + zend_illegal_container_offset(ZSTR_KNOWN(ZEND_STR_ARRAY), offset, BP_VAR_RW); } -static zend_never_inline ZEND_COLD void ZEND_FASTCALL zend_illegal_array_offset(const zval *offset) +static zend_never_inline ZEND_COLD void ZEND_FASTCALL zend_illegal_array_offset_isset(const zval *offset) { - zend_type_error("Cannot access offset of type %s on array", zend_get_type_by_const(Z_TYPE_P(offset))); + zend_illegal_container_offset(ZSTR_KNOWN(ZEND_STR_ARRAY), offset, BP_VAR_IS); } -static zend_never_inline ZEND_COLD void ZEND_FASTCALL zend_illegal_empty_or_isset_offset(const zval *offset) +static zend_never_inline ZEND_COLD void ZEND_FASTCALL zend_illegal_array_offset_unset(const zval *offset) { - zend_type_error("Cannot access offset of type %s in isset or empty", zend_get_type_by_const(Z_TYPE_P(offset))); + zend_illegal_container_offset(ZSTR_KNOWN(ZEND_STR_ARRAY), offset, BP_VAR_UNSET); } -static zend_never_inline ZEND_COLD void ZEND_FASTCALL zend_illegal_string_offset(const zval *offset) +static zend_never_inline ZEND_COLD void ZEND_FASTCALL zend_illegal_string_offset(const zval *offset, int type) { - zend_type_error("Cannot access offset of type %s on string", zend_zval_type_name(offset)); + zend_illegal_container_offset(ZSTR_KNOWN(ZEND_STR_STRING), offset, type); } static zend_never_inline void zend_assign_to_object_dim(zend_object *obj, zval *dim, zval *value OPLINE_DC EXECUTE_DATA_DC) @@ -1651,7 +1651,7 @@ static zend_never_inline zend_long zend_check_string_offset(zval *dim, int type } return offset; } - zend_illegal_string_offset(dim); + zend_illegal_string_offset(dim, type); return 0; } case IS_UNDEF: @@ -1667,7 +1667,7 @@ static zend_never_inline zend_long zend_check_string_offset(zval *dim, int type dim = Z_REFVAL_P(dim); goto try_again; default: - zend_illegal_string_offset(dim); + zend_illegal_string_offset(dim, type); return 0; } @@ -2390,7 +2390,7 @@ static zend_never_inline uint8_t slow_index_convert(HashTable *ht, const zval *d value->lval = 1; return IS_LONG; default: - zend_illegal_array_offset(dim); + zend_illegal_array_offset_access(dim); return IS_NULL; } } @@ -2464,7 +2464,7 @@ static zend_never_inline uint8_t slow_index_convert_w(HashTable *ht, const zval value->lval = 1; return IS_LONG; default: - zend_illegal_array_offset(dim); + zend_illegal_array_offset_access(dim); return IS_NULL; } } @@ -2762,7 +2762,7 @@ static zend_always_inline void zend_fetch_dimension_address_read(zval *result, z ZVAL_NULL(result); return; } - zend_illegal_string_offset(dim); + zend_illegal_string_offset(dim, BP_VAR_R); ZVAL_NULL(result); return; } @@ -2801,7 +2801,7 @@ static zend_always_inline void zend_fetch_dimension_address_read(zval *result, z dim = Z_REFVAL_P(dim); goto try_string_offset; default: - zend_illegal_string_offset(dim); + zend_illegal_string_offset(dim, BP_VAR_R); ZVAL_NULL(result); return; } @@ -2923,7 +2923,7 @@ static zend_never_inline zval* ZEND_FASTCALL zend_find_array_dim_slow(HashTable ZVAL_UNDEFINED_OP2(); goto str_idx; } else { - zend_illegal_empty_or_isset_offset(offset); + zend_illegal_array_offset_isset(offset); return NULL; } } @@ -3046,7 +3046,7 @@ static zend_never_inline bool ZEND_FASTCALL zend_array_key_exists_fast(HashTable str = ZSTR_EMPTY_ALLOC(); goto str_key; } else { - zend_illegal_array_offset(key); + zend_illegal_array_offset_access(key); return 0; } } diff --git a/Zend/zend_vm_def.h b/Zend/zend_vm_def.h index 7e86b29c6b4f7..437aa4b0c3b55 100644 --- a/Zend/zend_vm_def.h +++ b/Zend/zend_vm_def.h @@ -6098,7 +6098,7 @@ ZEND_VM_C_LABEL(num_index): str = ZSTR_EMPTY_ALLOC(); ZEND_VM_C_GOTO(str_index); } else { - zend_illegal_array_offset(offset); + zend_illegal_array_offset_access(offset); zval_ptr_dtor_nogc(expr_ptr); } FREE_OP2(); @@ -6610,7 +6610,7 @@ ZEND_VM_C_LABEL(num_index_dim): key = ZSTR_EMPTY_ALLOC(); ZEND_VM_C_GOTO(str_index_dim); } else { - zend_illegal_unset_offset(offset); + zend_illegal_array_offset_unset(offset); } break; } else if (Z_ISREF_P(container)) { diff --git a/Zend/zend_vm_execute.h b/Zend/zend_vm_execute.h index 14e3a5aca2a2a..8ec2f6946007e 100644 --- a/Zend/zend_vm_execute.h +++ b/Zend/zend_vm_execute.h @@ -7371,7 +7371,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_ADD_ARRAY_ELEMENT_SPEC_CONST_C str = ZSTR_EMPTY_ALLOC(); goto str_index; } else { - zend_illegal_array_offset(offset); + zend_illegal_array_offset_access(offset); zval_ptr_dtor_nogc(expr_ptr); } @@ -9690,7 +9690,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_ADD_ARRAY_ELEMENT_SPEC_CONST_T str = ZSTR_EMPTY_ALLOC(); goto str_index; } else { - zend_illegal_array_offset(offset); + zend_illegal_array_offset_access(offset); zval_ptr_dtor_nogc(expr_ptr); } zval_ptr_dtor_nogc(EX_VAR(opline->op2.var)); @@ -10613,7 +10613,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_ADD_ARRAY_ELEMENT_SPEC_CONST_U str = ZSTR_EMPTY_ALLOC(); goto str_index; } else { - zend_illegal_array_offset(offset); + zend_illegal_array_offset_access(offset); zval_ptr_dtor_nogc(expr_ptr); } @@ -12063,7 +12063,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_ADD_ARRAY_ELEMENT_SPEC_CONST_C str = ZSTR_EMPTY_ALLOC(); goto str_index; } else { - zend_illegal_array_offset(offset); + zend_illegal_array_offset_access(offset); zval_ptr_dtor_nogc(expr_ptr); } @@ -20063,7 +20063,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_ADD_ARRAY_ELEMENT_SPEC_TMP_CON str = ZSTR_EMPTY_ALLOC(); goto str_index; } else { - zend_illegal_array_offset(offset); + zend_illegal_array_offset_access(offset); zval_ptr_dtor_nogc(expr_ptr); } @@ -20507,7 +20507,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_ADD_ARRAY_ELEMENT_SPEC_TMP_TMP str = ZSTR_EMPTY_ALLOC(); goto str_index; } else { - zend_illegal_array_offset(offset); + zend_illegal_array_offset_access(offset); zval_ptr_dtor_nogc(expr_ptr); } zval_ptr_dtor_nogc(EX_VAR(opline->op2.var)); @@ -20968,7 +20968,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_ADD_ARRAY_ELEMENT_SPEC_TMP_UNU str = ZSTR_EMPTY_ALLOC(); goto str_index; } else { - zend_illegal_array_offset(offset); + zend_illegal_array_offset_access(offset); zval_ptr_dtor_nogc(expr_ptr); } @@ -21372,7 +21372,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_ADD_ARRAY_ELEMENT_SPEC_TMP_CV_ str = ZSTR_EMPTY_ALLOC(); goto str_index; } else { - zend_illegal_array_offset(offset); + zend_illegal_array_offset_access(offset); zval_ptr_dtor_nogc(expr_ptr); } @@ -25188,7 +25188,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_ADD_ARRAY_ELEMENT_SPEC_VAR_CON str = ZSTR_EMPTY_ALLOC(); goto str_index; } else { - zend_illegal_array_offset(offset); + zend_illegal_array_offset_access(offset); zval_ptr_dtor_nogc(expr_ptr); } @@ -25280,7 +25280,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_UNSET_DIM_SPEC_VAR_CONST_HANDL key = ZSTR_EMPTY_ALLOC(); goto str_index_dim; } else { - zend_illegal_unset_offset(offset); + zend_illegal_array_offset_unset(offset); } break; } else if (Z_ISREF_P(container)) { @@ -27627,7 +27627,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_ADD_ARRAY_ELEMENT_SPEC_VAR_TMP str = ZSTR_EMPTY_ALLOC(); goto str_index; } else { - zend_illegal_array_offset(offset); + zend_illegal_array_offset_access(offset); zval_ptr_dtor_nogc(expr_ptr); } zval_ptr_dtor_nogc(EX_VAR(opline->op2.var)); @@ -27719,7 +27719,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_UNSET_DIM_SPEC_VAR_TMPVAR_HAND key = ZSTR_EMPTY_ALLOC(); goto str_index_dim; } else { - zend_illegal_unset_offset(offset); + zend_illegal_array_offset_unset(offset); } break; } else if (Z_ISREF_P(container)) { @@ -29705,7 +29705,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_ADD_ARRAY_ELEMENT_SPEC_VAR_UNU str = ZSTR_EMPTY_ALLOC(); goto str_index; } else { - zend_illegal_array_offset(offset); + zend_illegal_array_offset_access(offset); zval_ptr_dtor_nogc(expr_ptr); } @@ -32015,7 +32015,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_ADD_ARRAY_ELEMENT_SPEC_VAR_CV_ str = ZSTR_EMPTY_ALLOC(); goto str_index; } else { - zend_illegal_array_offset(offset); + zend_illegal_array_offset_access(offset); zval_ptr_dtor_nogc(expr_ptr); } @@ -32107,7 +32107,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_UNSET_DIM_SPEC_VAR_CV_HANDLER( key = ZSTR_EMPTY_ALLOC(); goto str_index_dim; } else { - zend_illegal_unset_offset(offset); + zend_illegal_array_offset_unset(offset); } break; } else if (Z_ISREF_P(container)) { @@ -43690,7 +43690,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_ADD_ARRAY_ELEMENT_SPEC_CV_CONS str = ZSTR_EMPTY_ALLOC(); goto str_index; } else { - zend_illegal_array_offset(offset); + zend_illegal_array_offset_access(offset); zval_ptr_dtor_nogc(expr_ptr); } @@ -43782,7 +43782,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_UNSET_DIM_SPEC_CV_CONST_HANDLE key = ZSTR_EMPTY_ALLOC(); goto str_index_dim; } else { - zend_illegal_unset_offset(offset); + zend_illegal_array_offset_unset(offset); } break; } else if (Z_ISREF_P(container)) { @@ -47330,7 +47330,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_ADD_ARRAY_ELEMENT_SPEC_CV_TMPV str = ZSTR_EMPTY_ALLOC(); goto str_index; } else { - zend_illegal_array_offset(offset); + zend_illegal_array_offset_access(offset); zval_ptr_dtor_nogc(expr_ptr); } zval_ptr_dtor_nogc(EX_VAR(opline->op2.var)); @@ -47422,7 +47422,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_UNSET_DIM_SPEC_CV_TMPVAR_HANDL key = ZSTR_EMPTY_ALLOC(); goto str_index_dim; } else { - zend_illegal_unset_offset(offset); + zend_illegal_array_offset_unset(offset); } break; } else if (Z_ISREF_P(container)) { @@ -49292,7 +49292,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_ADD_ARRAY_ELEMENT_SPEC_CV_UNUS str = ZSTR_EMPTY_ALLOC(); goto str_index; } else { - zend_illegal_array_offset(offset); + zend_illegal_array_offset_access(offset); zval_ptr_dtor_nogc(expr_ptr); } @@ -52791,7 +52791,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_ADD_ARRAY_ELEMENT_SPEC_CV_CV_H str = ZSTR_EMPTY_ALLOC(); goto str_index; } else { - zend_illegal_array_offset(offset); + zend_illegal_array_offset_access(offset); zval_ptr_dtor_nogc(expr_ptr); } @@ -52883,7 +52883,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_UNSET_DIM_SPEC_CV_CV_HANDLER(Z key = ZSTR_EMPTY_ALLOC(); goto str_index_dim; } else { - zend_illegal_unset_offset(offset); + zend_illegal_array_offset_unset(offset); } break; } else if (Z_ISREF_P(container)) { diff --git a/ext/opcache/jit/zend_jit_helpers.c b/ext/opcache/jit/zend_jit_helpers.c index 41c7e14a804cb..f9541bd7087dc 100644 --- a/ext/opcache/jit/zend_jit_helpers.c +++ b/ext/opcache/jit/zend_jit_helpers.c @@ -27,21 +27,6 @@ static ZEND_COLD void undef_result_after_exception(void) { } } -static ZEND_COLD void zend_jit_illegal_array_offset(const zval *offset) -{ - zend_type_error("Cannot access offset of type %s on array", zend_get_type_by_const(Z_TYPE_P(offset))); -} - -static ZEND_COLD void zend_jit_illegal_empty_or_isset_offset(const zval *offset) -{ - zend_type_error("Cannot access offset of type %s in isset or empty", zend_get_type_by_const(Z_TYPE_P(offset))); -} - -static ZEND_COLD void zend_jit_illegal_string_offset(zval *offset) -{ - zend_type_error("Cannot access offset of type %s on string", zend_zval_value_name(offset)); -} - static zend_never_inline zend_function* ZEND_FASTCALL _zend_jit_init_func_run_time_cache(zend_op_array *op_array) /* {{{ */ { void **run_time_cache; @@ -493,7 +478,7 @@ static void ZEND_FASTCALL zend_jit_fetch_dim_r_helper(zend_array *ht, zval *dim, hval = 1; goto num_index; default: - zend_jit_illegal_array_offset(dim); + zend_illegal_container_offset(ZSTR_KNOWN(ZEND_STR_ARRAY), dim, BP_VAR_R); undef_result_after_exception(); return; } @@ -635,7 +620,7 @@ static void ZEND_FASTCALL zend_jit_fetch_dim_is_helper(zend_array *ht, zval *dim hval = 1; goto num_index; default: - zend_jit_illegal_array_offset(dim); + zend_illegal_container_offset(ZSTR_KNOWN(ZEND_STR_ARRAY), dim, BP_VAR_IS); undef_result_after_exception(); return; } @@ -737,7 +722,7 @@ static int ZEND_FASTCALL zend_jit_fetch_dim_isset_helper(zend_array *ht, zval *d hval = 1; goto num_index; default: - zend_jit_illegal_empty_or_isset_offset(dim); + zend_illegal_container_offset(ZSTR_KNOWN(ZEND_STR_ARRAY), dim, BP_VAR_IS); return 0; } @@ -873,7 +858,7 @@ static zval* ZEND_FASTCALL zend_jit_fetch_dim_rw_helper(zend_array *ht, zval *di hval = 1; goto num_index; default: - zend_jit_illegal_array_offset(dim); + zend_illegal_container_offset(ZSTR_KNOWN(ZEND_STR_ARRAY), dim, BP_VAR_RW); undef_result_after_exception(); return NULL; } @@ -1006,7 +991,7 @@ static zval* ZEND_FASTCALL zend_jit_fetch_dim_w_helper(zend_array *ht, zval *dim hval = 1; goto num_index; default: - zend_jit_illegal_array_offset(dim); + zend_illegal_container_offset(ZSTR_KNOWN(ZEND_STR_ARRAY), dim, BP_VAR_R); undef_result_after_exception(); if (EG(opline_before_exception) && (EG(opline_before_exception)+1)->opcode == ZEND_OP_DATA @@ -1029,7 +1014,8 @@ static zval* ZEND_FASTCALL zend_jit_fetch_dim_w_helper(zend_array *ht, zval *dim return retval; } -static zend_never_inline zend_long zend_check_string_offset(zval *dim/*, int type*/) +/* type is one of the BP_VAR_* constants */ +static zend_never_inline zend_long zend_check_string_offset(zval *dim, int type) { zend_long offset; @@ -1049,7 +1035,7 @@ static zend_never_inline zend_long zend_check_string_offset(zval *dim/*, int typ } return offset; } - zend_jit_illegal_string_offset(dim); + zend_illegal_container_offset(ZSTR_KNOWN(ZEND_STR_STRING), dim, BP_VAR_R); return 0; } case IS_UNDEF: @@ -1065,7 +1051,7 @@ static zend_never_inline zend_long zend_check_string_offset(zval *dim/*, int typ dim = Z_REFVAL_P(dim); goto try_again; default: - zend_jit_illegal_string_offset(dim); + zend_illegal_container_offset(ZSTR_KNOWN(ZEND_STR_STRING), dim, type); return 0; } @@ -1103,7 +1089,7 @@ static zend_string* ZEND_FASTCALL zend_jit_fetch_dim_str_r_helper(zend_string *s if (!(GC_FLAGS(str) & IS_STR_INTERNED)) { GC_ADDREF(str); } - offset = zend_check_string_offset(dim/*, BP_VAR_R*/); + offset = zend_check_string_offset(dim, BP_VAR_R); if (!(GC_FLAGS(str) & IS_STR_INTERNED) && UNEXPECTED(GC_DELREF(str) == 0)) { zend_string *ret = zend_jit_fetch_dim_str_offset(str, offset); zend_string_efree(str); @@ -1140,7 +1126,7 @@ static void ZEND_FASTCALL zend_jit_fetch_dim_str_is_helper(zend_string *str, zva dim = Z_REFVAL_P(dim); goto try_string_offset; default: - zend_jit_illegal_string_offset(dim); + zend_illegal_container_offset(ZSTR_KNOWN(ZEND_STR_STRING), dim, BP_VAR_IS); break; } @@ -1242,7 +1228,7 @@ static zend_never_inline void zend_assign_to_string_offset(zval *str, zval *dim, /* The string may be destroyed while throwing the notice. * Temporarily increase the refcount to detect this situation. */ GC_ADDREF(s); - offset = zend_check_string_offset(dim/*, BP_VAR_W*/); + offset = zend_check_string_offset(dim, BP_VAR_W); if (UNEXPECTED(GC_DELREF(s) == 0)) { zend_string_efree(s); if (result) { @@ -1418,7 +1404,7 @@ static zend_always_inline void ZEND_FASTCALL zend_jit_fetch_dim_obj_helper(zval zend_throw_error(NULL, "[] operator not supported for strings"); } else { if (UNEXPECTED(Z_TYPE_P(dim) != IS_LONG)) { - zend_check_string_offset(dim/*, BP_VAR_RW*/); + zend_check_string_offset(dim, BP_VAR_RW); } zend_wrong_string_offset_error(); } @@ -1606,7 +1592,7 @@ static void ZEND_FASTCALL zend_jit_assign_dim_op_helper(zval *container, zval *d zend_throw_error(NULL, "[] operator not supported for strings"); } else { if (UNEXPECTED(Z_TYPE_P(dim) != IS_LONG)) { - zend_check_string_offset(dim/*, BP_VAR_RW*/); + zend_check_string_offset(dim, BP_VAR_RW); } zend_wrong_string_offset_error(); } diff --git a/ext/opcache/tests/jit/assign_dim_002.phpt b/ext/opcache/tests/jit/assign_dim_002.phpt index 83b4bfdec7873..743ca1bf093ee 100644 --- a/ext/opcache/tests/jit/assign_dim_002.phpt +++ b/ext/opcache/tests/jit/assign_dim_002.phpt @@ -161,7 +161,7 @@ array(1) { int(1) } } -Cannot access offset of type object on array +Cannot access offset of type Closure on array array(1) { [0]=> array(2) { diff --git a/ext/spl/spl_array.c b/ext/spl/spl_array.c index 1ef0c48d272d6..0373f5a7820ea 100644 --- a/ext/spl/spl_array.c +++ b/ext/spl/spl_array.c @@ -95,21 +95,6 @@ static inline HashTable **spl_array_get_hash_table_ptr(spl_array_object* intern) } /* }}} */ -static void spl_array_illegal_offset(const zval *offset) -{ - zend_type_error("Cannot access offset of type %s on ArrayObject", zend_get_type_by_const(Z_TYPE_P(offset))); -} - -static void spl_array_illegal_empty_or_isset_offset(const zval *offset) -{ - zend_type_error("Cannot access offset of type %s in isset or empty", zend_get_type_by_const(Z_TYPE_P(offset))); -} - -static void spl_array_illegal_unset_offset(const zval *offset) -{ - zend_type_error("Cannot access offset of type %s in unset", zend_get_type_by_const(Z_TYPE_P(offset))); -} - static inline HashTable *spl_array_get_hash_table(spl_array_object* intern) { /* {{{ */ return *spl_array_get_hash_table_ptr(intern); } @@ -269,6 +254,8 @@ static void spl_hash_key_release(spl_hash_key *key) { } } +/* This function does not throw any exceptions for illegal offsets, calls to + * zend_illegal_container_offset(); need to be made if the return value is FAILURE */ static zend_result get_hash_key(spl_hash_key *key, spl_array_object *intern, zval *offset) { key->release_key = false; @@ -309,7 +296,6 @@ static zend_result get_hash_key(spl_hash_key *key, spl_array_object *intern, zva ZVAL_DEREF(offset); goto try_again; default: - spl_array_illegal_offset(offset); return FAILURE; } @@ -320,7 +306,8 @@ static zend_result get_hash_key(spl_hash_key *key, spl_array_object *intern, zva return SUCCESS; } -static zval *spl_array_get_dimension_ptr(int check_inherited, spl_array_object *intern, zval *offset, int type) /* {{{ */ +static zval *spl_array_get_dimension_ptr(bool check_inherited, spl_array_object *intern, const zend_string *ce_name, + zval *offset, int type) /* {{{ */ { zval *retval; spl_hash_key key; @@ -336,7 +323,7 @@ static zval *spl_array_get_dimension_ptr(int check_inherited, spl_array_object * } if (get_hash_key(&key, intern, offset) == FAILURE) { - spl_array_illegal_offset(offset); + zend_illegal_container_offset(ce_name, offset, type); return (type == BP_VAR_W || type == BP_VAR_RW) ? &EG(error_zval) : &EG(uninitialized_zval); } @@ -438,7 +425,7 @@ static zval *spl_array_read_dimension_ex(int check_inherited, zend_object *objec } } - ret = spl_array_get_dimension_ptr(check_inherited, intern, offset, type); + ret = spl_array_get_dimension_ptr(check_inherited, intern, object->ce->name, offset, type); /* When in a write context, * ZE has to be fooled into thinking this is in a reference set @@ -512,7 +499,7 @@ static void spl_array_write_dimension_ex(int check_inherited, zend_object *objec } if (get_hash_key(&key, intern, offset) == FAILURE) { - spl_array_illegal_offset(offset); + zend_illegal_container_offset(object->ce->name, offset, BP_VAR_W); zval_ptr_dtor(value); return; } @@ -553,7 +540,7 @@ static void spl_array_unset_dimension_ex(int check_inherited, zend_object *objec } if (get_hash_key(&key, intern, offset) == FAILURE) { - spl_array_illegal_unset_offset(offset); + zend_illegal_container_offset(object->ce->name, offset, BP_VAR_UNSET); return; } @@ -623,7 +610,7 @@ static bool spl_array_has_dimension_ex(bool check_inherited, zend_object *object spl_hash_key key; if (get_hash_key(&key, intern, offset) == FAILURE) { - spl_array_illegal_empty_or_isset_offset(offset); + zend_illegal_container_offset(object->ce->name, offset, BP_VAR_IS); return 0; } @@ -861,7 +848,7 @@ static zval *spl_array_get_property_ptr_ptr(zend_object *object, zend_string *na return NULL; } ZVAL_STR(&member, name); - return spl_array_get_dimension_ptr(1, intern, &member, type); + return spl_array_get_dimension_ptr(1, intern, object->ce->name, &member, type); } return zend_std_get_property_ptr_ptr(object, name, type, cache_slot); } /* }}} */ diff --git a/ext/spl/spl_fixedarray.c b/ext/spl/spl_fixedarray.c index 574b06fc4e93c..bffdbbebcedca 100644 --- a/ext/spl/spl_fixedarray.c +++ b/ext/spl/spl_fixedarray.c @@ -83,11 +83,6 @@ static bool spl_fixedarray_empty(spl_fixedarray *array) return true; } -static void spl_fixedarray_illegal_offset(const zval *offset) -{ - zend_type_error("Cannot access offset of type %s on FixedArray", zend_get_type_by_const(Z_TYPE_P(offset))); -} - static void spl_fixedarray_default_ctor(spl_fixedarray *array) { array->size = 0; @@ -338,7 +333,8 @@ static zend_long spl_offset_convert_to_long(zval *offset) /* {{{ */ return Z_RES_HANDLE_P(offset); } - spl_fixedarray_illegal_offset(offset); + /* Use SplFixedArray name from the CE */ + zend_illegal_container_offset(spl_ce_SplFixedArray->name, offset, BP_VAR_R); return 0; } diff --git a/ext/spl/tests/ArrayObject_illegal_offset.phpt b/ext/spl/tests/ArrayObject_illegal_offset.phpt index 08353c704c6f3..a2803e4729663 100644 --- a/ext/spl/tests/ArrayObject_illegal_offset.phpt +++ b/ext/spl/tests/ArrayObject_illegal_offset.phpt @@ -36,4 +36,4 @@ Cannot access offset of type array on ArrayObject Cannot access offset of type array on ArrayObject Cannot access offset of type array on ArrayObject Cannot access offset of type array in isset or empty -Cannot access offset of type array in unset +Cannot unset offset of type array on ArrayObject diff --git a/ext/spl/tests/fixedarray_001.phpt b/ext/spl/tests/fixedarray_001.phpt index 35a7a9cf17725..0683555934d53 100644 --- a/ext/spl/tests/fixedarray_001.phpt +++ b/ext/spl/tests/fixedarray_001.phpt @@ -46,7 +46,7 @@ var_dump($b[0]); ?> --EXPECT-- RuntimeException: Index invalid or out of range -TypeError: Cannot access offset of type string on FixedArray +TypeError: Cannot access offset of type string on SplFixedArray RuntimeException: Index invalid or out of range string(6) "value0" string(6) "value2" diff --git a/ext/spl/tests/fixedarray_002.phpt b/ext/spl/tests/fixedarray_002.phpt index 940d5996f5dbc..0ee2dcb8ba11d 100644 --- a/ext/spl/tests/fixedarray_002.phpt +++ b/ext/spl/tests/fixedarray_002.phpt @@ -71,7 +71,7 @@ var_dump(count($a), $a->getSize(), count($a) == $a->getSize()); A::offsetSet RuntimeException: Index invalid or out of range A::offsetGet -TypeError: Cannot access offset of type string on FixedArray +TypeError: Cannot access offset of type string on SplFixedArray A::offsetUnset RuntimeException: Index invalid or out of range A::offsetSet diff --git a/ext/spl/tests/fixedarray_003.phpt b/ext/spl/tests/fixedarray_003.phpt index d246561c1b7e8..cca9ac07e9f7a 100644 --- a/ext/spl/tests/fixedarray_003.phpt +++ b/ext/spl/tests/fixedarray_003.phpt @@ -1,5 +1,5 @@ --TEST-- -SPL: FixedArray: Non integer offset handling +SPL: SplFixedArray: Non integer offset handling --FILE-- Date: Tue, 6 Jun 2023 13:29:55 +0300 Subject: [PATCH 131/168] Fixed deoptimization info for interrupt handler --- ext/opcache/jit/zend_jit_trace.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ext/opcache/jit/zend_jit_trace.c b/ext/opcache/jit/zend_jit_trace.c index d2e3e1e1c3185..cae233c684eb3 100644 --- a/ext/opcache/jit/zend_jit_trace.c +++ b/ext/opcache/jit/zend_jit_trace.c @@ -6834,7 +6834,8 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par if (!(t->flags & ZEND_JIT_TRACE_USES_INITIAL_IP) || (ra && zend_jit_trace_stack_needs_deoptimization(stack, op_array->last_var + op_array->T))) { - uint32_t exit_point = zend_jit_trace_get_exit_point(opline, ZEND_JIT_EXIT_TO_VM); + /* Deoptimize to the first instruction of the loop */ + uint32_t exit_point = zend_jit_trace_get_exit_point(trace_buffer[1].opline, ZEND_JIT_EXIT_TO_VM); timeout_exit_addr = zend_jit_trace_get_exit_addr(exit_point); if (!timeout_exit_addr) { From a02f7f24c619612a5431c4b348cf49520d1da804 Mon Sep 17 00:00:00 2001 From: George Peter Banyard Date: Tue, 6 Jun 2023 12:12:07 +0100 Subject: [PATCH 132/168] Use more appropriate types for php_array_walk() function --- ext/standard/array.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/ext/standard/array.c b/ext/standard/array.c index 6bb146eb46888..c2ae5225be31d 100644 --- a/ext/standard/array.c +++ b/ext/standard/array.c @@ -1387,8 +1387,8 @@ typedef struct { zend_fcall_info_cache fci_cache; } php_array_walk_context; -static int php_array_walk( - php_array_walk_context *context, zval *array, zval *userdata, int recursive) +static zend_result php_array_walk( + php_array_walk_context *context, zval *array, zval *userdata, bool recursive) { zval args[3], /* Arguments to userland function */ retval, /* Return value - unused */ @@ -1396,7 +1396,7 @@ static int php_array_walk( HashTable *target_hash = HASH_OF(array); HashPosition pos; uint32_t ht_iter; - int result = SUCCESS; + zend_result result = SUCCESS; /* Create a local copy of fci, as we want to use different arguments at different * levels of recursion. */ @@ -1538,7 +1538,7 @@ PHP_FUNCTION(array_walk) Z_PARAM_ZVAL(userdata) ZEND_PARSE_PARAMETERS_END(); - php_array_walk(&context, array, userdata, 0); + php_array_walk(&context, array, userdata, /* recursive */ false); RETURN_TRUE; } /* }}} */ @@ -1557,7 +1557,7 @@ PHP_FUNCTION(array_walk_recursive) Z_PARAM_ZVAL(userdata) ZEND_PARSE_PARAMETERS_END(); - php_array_walk(&context, array, userdata, 1); + php_array_walk(&context, array, userdata, /* recursive */ true); RETURN_TRUE; } /* }}} */ From e56904a573bcdb0e9678fa8c6998ddcbbb7ead81 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Tue, 6 Jun 2023 20:06:20 +0200 Subject: [PATCH 133/168] Disable old libxml2 hack if the version does not suffer from the bug (#11379) This bug is ancient, we'll keep the code but version check if with an ifdef. This should also save some cycles. --- ext/dom/documentfragment.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/ext/dom/documentfragment.c b/ext/dom/documentfragment.c index a3394e88d5566..b52a9c820baca 100644 --- a/ext/dom/documentfragment.c +++ b/ext/dom/documentfragment.c @@ -57,6 +57,7 @@ PHP_METHOD(DOMDocumentFragment, __construct) } /* }}} end DOMDocumentFragment::__construct */ +#if LIBXML_VERSION <= 20614 /* php_dom_xmlSetTreeDoc is a custom implementation of xmlSetTreeDoc needed for hack in appendXML due to libxml bug - no need to share this function */ static void php_dom_xmlSetTreeDoc(xmlNodePtr tree, xmlDocPtr doc) /* {{{ */ @@ -90,6 +91,7 @@ static void php_dom_xmlSetTreeDoc(xmlNodePtr tree, xmlDocPtr doc) /* {{{ */ } } /* }}} */ +#endif /* {{{ */ PHP_METHOD(DOMDocumentFragment, appendXML) { @@ -118,10 +120,11 @@ PHP_METHOD(DOMDocumentFragment, appendXML) { if (err != 0) { RETURN_FALSE; } - /* Following needed due to bug in libxml2 <= 2.6.14 - ifdef after next libxml release as bug is fixed in their cvs */ +#if LIBXML_VERSION <= 20614 + /* Following needed due to bug in libxml2 <= 2.6.14 */ php_dom_xmlSetTreeDoc(lst, nodep->doc); /* End stupid hack */ +#endif xmlAddChildList(nodep,lst); } From 5b430a25fadbbbbd19f3c1f0bf66c6ebbb21dc3b Mon Sep 17 00:00:00 2001 From: Jakub Zelenka Date: Tue, 6 Jun 2023 19:12:48 +0100 Subject: [PATCH 134/168] Update NEWS for PHP 8.3.0alpha1 --- NEWS | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/NEWS b/NEWS index e43e600f59aab..8cf60aeedf885 100644 --- a/NEWS +++ b/NEWS @@ -1,6 +1,9 @@ PHP NEWS ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| -?? ??? ????, PHP 8.3.0alpha1 +?? ??? ????, PHP 8.3.0alpha2 + + +08 Jun 2023, PHP 8.3.0alpha1 - CLI: . Added pdeathsig to builtin server to terminate workers when the master From cced0ddf9d8f25c7f95f6331e5f89350f90edd40 Mon Sep 17 00:00:00 2001 From: nielsdos <7771979+nielsdos@users.noreply.github.com> Date: Mon, 5 Jun 2023 22:13:35 +0200 Subject: [PATCH 135/168] Fix test failure for init_fcall_003.phpt without opcache If opcache isn't loaded, then opcache_invalidate() will fail. Reproducible when you compile PHP without opcache, or run PHP without opcache loaded, and try to run this test. Closes GH-11378. --- ext/opcache/tests/jit/init_fcall_003.phpt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ext/opcache/tests/jit/init_fcall_003.phpt b/ext/opcache/tests/jit/init_fcall_003.phpt index f37344cbce4a9..180f0745c16c6 100644 --- a/ext/opcache/tests/jit/init_fcall_003.phpt +++ b/ext/opcache/tests/jit/init_fcall_003.phpt @@ -11,6 +11,8 @@ opcache.jit_hot_loop=64 opcache.jit_hot_func=127 opcache.jit_hot_return=8 opcache.jit_hot_side_exit=8 +--EXTENSIONS-- +opcache --FILE-- Date: Sun, 16 Apr 2023 15:05:03 +0200 Subject: [PATCH 136/168] Fix missing randomness check and insufficient random bytes for SOAP HTTP Digest MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If php_random_bytes_throw fails, the nonce will be uninitialized, but still sent to the server. The client nonce is intended to protect against a malicious server. See section 5.10 and 5.12 of RFC 7616 [1], and bullet point 2 below. Tim pointed out that even though it's the MD5 of the nonce that gets sent, enumerating 31 bits is trivial. So we have still a stack information leak of 31 bits. Furthermore, Tim found the following issues: * The small size of cnonce might cause the server to erroneously reject a request due to a repeated (cnonce, nc) pair. As per the birthday problem 31 bits of randomness will return a duplication with 50% chance after less than 55000 requests and nc always starts counting at 1. * The cnonce is intended to protect the client and password against a malicious server that returns a constant server nonce where the server precomputed a rainbow table between passwords and correct client response. As storage is fairly cheap, a server could precompute the client responses for (a subset of) client nonces and still have a chance of reversing the client response with the same probability as the cnonce duplication. Precomputing the rainbow table for all 2^31 cnonces increases the rainbow table size by factor 2 billion, which is infeasible. But precomputing it for 2^14 cnonces only increases the table size by factor 16k and the server would still have a 10% chance of successfully reversing a password with a single client request. This patch fixes the issues by increasing the nonce size, and checking the return value of php_random_bytes_throw(). In the process we also get rid of the MD5 hashing of the nonce. [1] RFC 7616: https://www.rfc-editor.org/rfc/rfc7616 Co-authored-by: Tim Düsterhus --- ext/soap/php_http.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/ext/soap/php_http.c b/ext/soap/php_http.c index 1da286ad875f8..e796dba9619ac 100644 --- a/ext/soap/php_http.c +++ b/ext/soap/php_http.c @@ -664,18 +664,23 @@ int make_http_soap_request(zval *this_ptr, if ((digest = zend_hash_str_find(Z_OBJPROP_P(this_ptr), "_digest", sizeof("_digest")-1)) != NULL) { if (Z_TYPE_P(digest) == IS_ARRAY) { char HA1[33], HA2[33], response[33], cnonce[33], nc[9]; - zend_long nonce; + unsigned char nonce[16]; PHP_MD5_CTX md5ctx; unsigned char hash[16]; - php_random_bytes_throw(&nonce, sizeof(nonce)); - nonce &= 0x7fffffff; + if (UNEXPECTED(php_random_bytes_throw(&nonce, sizeof(nonce)) != SUCCESS)) { + ZEND_ASSERT(EG(exception)); + php_stream_close(stream); + convert_to_null(Z_CLIENT_HTTPURL_P(this_ptr)); + convert_to_null(Z_CLIENT_HTTPSOCKET_P(this_ptr)); + convert_to_null(Z_CLIENT_USE_PROXY_P(this_ptr)); + smart_str_free(&soap_headers_z); + smart_str_free(&soap_headers); + return FALSE; + } - PHP_MD5Init(&md5ctx); - snprintf(cnonce, sizeof(cnonce), ZEND_LONG_FMT, nonce); - PHP_MD5Update(&md5ctx, (unsigned char*)cnonce, strlen(cnonce)); - PHP_MD5Final(hash, &md5ctx); - make_digest(cnonce, hash); + php_hash_bin2hex(cnonce, nonce, sizeof(nonce)); + cnonce[32] = 0; if ((tmp = zend_hash_str_find(Z_ARRVAL_P(digest), "nc", sizeof("nc")-1)) != NULL && Z_TYPE_P(tmp) == IS_LONG) { From 05724482637904235b95082d06e0dc01965c73d0 Mon Sep 17 00:00:00 2001 From: Remi Collet Date: Tue, 6 Jun 2023 18:05:22 +0200 Subject: [PATCH 137/168] Fix GH-11382 add missing hash header for bin2hex --- ext/soap/php_http.c | 1 + 1 file changed, 1 insertion(+) diff --git a/ext/soap/php_http.c b/ext/soap/php_http.c index e796dba9619ac..77ed21d4f0f4e 100644 --- a/ext/soap/php_http.c +++ b/ext/soap/php_http.c @@ -20,6 +20,7 @@ #include "ext/standard/base64.h" #include "ext/standard/md5.h" #include "ext/standard/php_random.h" +#include "ext/hash/php_hash.h" static char *get_http_header_value_nodup(char *headers, char *type, size_t *len); static char *get_http_header_value(char *headers, char *type); From b720ab99f8de31e878e1707f0e232f28fc6655c5 Mon Sep 17 00:00:00 2001 From: Pierrick Charron Date: Tue, 6 Jun 2023 17:59:43 -0400 Subject: [PATCH 138/168] Update NEWS --- NEWS | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/NEWS b/NEWS index 9b41aeb1ed7d5..ff0d216d435ce 100644 --- a/NEWS +++ b/NEWS @@ -1,8 +1,14 @@ PHP NEWS ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| -?? ??? ????, PHP 8.0.29 +?? ??? ????, PHP 8.0.30 +08 Jun 2023, PHP 8.0.29 + +- Soap: + . Fixed bug GHSA-76gg-c692-v2mw (Missing error check and insufficient random + bytes in HTTP Digest authentication for SOAP). (nielsdos, timwolla) + 14 Feb 2023, PHP 8.0.28 - Core: From 5604f7ae22cbc8f0539aa49421201348895f3401 Mon Sep 17 00:00:00 2001 From: Pierrick Charron Date: Tue, 6 Jun 2023 18:06:13 -0400 Subject: [PATCH 139/168] Update NEWS --- NEWS | 2 ++ 1 file changed, 2 insertions(+) diff --git a/NEWS b/NEWS index 122e4a48b86bd..33d789b6b1714 100644 --- a/NEWS +++ b/NEWS @@ -89,6 +89,8 @@ PHP NEWS done). (peter279k) - Soap: + . Fixed bug GHSA-76gg-c692-v2mw (Missing error check and insufficient random + bytes in HTTP Digest authentication for SOAP). (nielsdos, timwolla) . Fixed bug GH-8426 (make test fail while soap extension build). (nielsdos) - SPL: From 269d6c5942896617c1bb51d143c25f4ffe1c6259 Mon Sep 17 00:00:00 2001 From: Pierrick Charron Date: Tue, 6 Jun 2023 18:10:06 -0400 Subject: [PATCH 140/168] Update NEWS --- NEWS | 2 ++ 1 file changed, 2 insertions(+) diff --git a/NEWS b/NEWS index 68a2d8cb71544..2551047462e89 100644 --- a/NEWS +++ b/NEWS @@ -103,6 +103,8 @@ PHP NEWS done). (peter279k) - Soap: + . Fixed bug GHSA-76gg-c692-v2mw (Missing error check and insufficient random + bytes in HTTP Digest authentication for SOAP). (nielsdos, timwolla) . Fixed bug GH-8426 (make test fail while soap extension build). (nielsdos) - SPL: From 938ebb3b61bc745a4ea0581c95ec53306f8510fb Mon Sep 17 00:00:00 2001 From: Ben Ramsey Date: Tue, 6 Jun 2023 18:16:54 -0500 Subject: [PATCH 141/168] Add example commit message to release process doc --- docs/release-process.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/release-process.md b/docs/release-process.md index 1a60d1ee7035e..07c27528e0f8a 100644 --- a/docs/release-process.md +++ b/docs/release-process.md @@ -605,7 +605,7 @@ slightly different steps. We'll call attention where the steps differ. git fetch --all git pull --rebase upstream master cd .. - git commit distributions + git commit distributions -m "X.Y.Z tarballs" git push upstream master ``` From ca1905116bb25ffda5509078563f673644a1656d Mon Sep 17 00:00:00 2001 From: Ben Ramsey Date: Tue, 6 Jun 2023 18:47:36 -0500 Subject: [PATCH 142/168] Add PHP 8.3 to release process doc; remove 7.4 --- docs/release-process.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/release-process.md b/docs/release-process.md index 07c27528e0f8a..f69ac6cf3b5d1 100644 --- a/docs/release-process.md +++ b/docs/release-process.md @@ -6,10 +6,10 @@ repository available according to the release schedule. The release schedule for each version is published on the [PHP wiki](https://wiki.php.net): +- [PHP 8.3](https://wiki.php.net/todo/php83) - [PHP 8.2](https://wiki.php.net/todo/php82) - [PHP 8.1](https://wiki.php.net/todo/php81) - [PHP 8.0](https://wiki.php.net/todo/php80) -- [PHP 7.4](https://wiki.php.net/todo/php74) The PHP project publishes builds every two weeks. From 0561783903054b038a01f9279e761dc5bc29c41a Mon Sep 17 00:00:00 2001 From: Remi Collet Date: Tue, 6 Jun 2023 19:14:18 +0200 Subject: [PATCH 143/168] ensure session.sid_length have proper value for test --- ext/session/tests/session_regenerate_id_cookie.phpt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ext/session/tests/session_regenerate_id_cookie.phpt b/ext/session/tests/session_regenerate_id_cookie.phpt index f2375b253ed56..f1dc0727205d2 100644 --- a/ext/session/tests/session_regenerate_id_cookie.phpt +++ b/ext/session/tests/session_regenerate_id_cookie.phpt @@ -2,6 +2,8 @@ Test session_regenerate_id() function : basic functionality --EXTENSIONS-- session +--INI-- +session.sid_length = 32 --SKIPIF-- Date: Wed, 7 Jun 2023 18:49:11 +0200 Subject: [PATCH 144/168] [ci skip] Fix inaccurate comment --- ext/dom/parentnode.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ext/dom/parentnode.c b/ext/dom/parentnode.c index 70a952935cac2..c6d36f0c670b2 100644 --- a/ext/dom/parentnode.c +++ b/ext/dom/parentnode.c @@ -566,7 +566,8 @@ void dom_child_replace_with(dom_object *context, zval *nodes, uint32_t nodesc) if (newchild) { xmlNodePtr last = fragment->last; - /* Unlink and free it unless it became a part of the fragment. */ + /* Unlink it unless it became a part of the fragment. + * Freeing will be taken care of by the lifetime of the returned dom object. */ if (child->parent != fragment) { xmlUnlinkNode(child); } From 6e04050474d363f27046bd3c0259b59b7fa87515 Mon Sep 17 00:00:00 2001 From: nielsdos <7771979+nielsdos@users.noreply.github.com> Date: Mon, 5 Jun 2023 21:56:10 +0200 Subject: [PATCH 145/168] Remove redundant assignment on nodep->ns It's already set by xmlSetNs(). --- ext/dom/document.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/ext/dom/document.c b/ext/dom/document.c index 0660fa779e537..1b26c9c7bfc73 100644 --- a/ext/dom/document.c +++ b/ext/dom/document.c @@ -859,7 +859,6 @@ PHP_METHOD(DOMDocument, createElementNS) zval *id; xmlDocPtr docp; xmlNodePtr nodep = NULL; - xmlNsPtr nsptr = NULL; int ret; size_t uri_len = 0, name_len = 0, value_len = 0; char *uri, *name, *value = NULL; @@ -880,7 +879,7 @@ PHP_METHOD(DOMDocument, createElementNS) if (xmlValidateName((xmlChar *) localname, 0) == 0) { nodep = xmlNewDocNode(docp, NULL, (xmlChar *) localname, (xmlChar *) value); if (nodep != NULL && uri != NULL) { - nsptr = xmlSearchNsByHref(nodep->doc, nodep, (xmlChar *) uri); + xmlNsPtr nsptr = xmlSearchNsByHref(nodep->doc, nodep, (xmlChar *) uri); if (nsptr == NULL) { nsptr = dom_get_ns(nodep, uri, &errorcode, prefix); } @@ -908,9 +907,6 @@ PHP_METHOD(DOMDocument, createElementNS) RETURN_FALSE; } - - nodep->ns = nsptr; - DOM_RET_OBJ(nodep, &ret, intern); } /* }}} end dom_document_create_element_ns */ From bde6f2a2f74d0b5022c2657f5fd2f2b1196ae088 Mon Sep 17 00:00:00 2001 From: Florian Engelhardt Date: Wed, 7 Jun 2023 23:51:18 +0200 Subject: [PATCH 146/168] Fix initial array size in `gc_status()` (#11393) Small fix for the initial array size to reflect the number of items that will be added. --- Zend/zend_builtin_functions.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Zend/zend_builtin_functions.c b/Zend/zend_builtin_functions.c index b8cd96c480282..047edfb83b3cc 100644 --- a/Zend/zend_builtin_functions.c +++ b/Zend/zend_builtin_functions.c @@ -138,7 +138,7 @@ ZEND_FUNCTION(gc_status) zend_gc_get_status(&status); - array_init_size(return_value, 3); + array_init_size(return_value, 8); add_assoc_bool_ex(return_value, "running", sizeof("running")-1, status.active); add_assoc_bool_ex(return_value, "protected", sizeof("protected")-1, status.gc_protected); From 79d024ac0e2a07b93362a5a2fd4ef36f61fca8fb Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Wed, 7 Jun 2023 23:53:21 +0200 Subject: [PATCH 147/168] Allow final modifier when using a method from a trait (#11394) Fixes GH-11388. Following https://wiki.php.net/rfc/horizontalreuse which introduced traits, this should be allowed. The implementation was refactored in 3f8c729. That commit is the first time the "final" check appears AFAICT, but no reason was given for why. That commit seems to have landed in 5.4.11 and the NEWS for that version doesn't seem to mention something relevant to the behaviour change. This patch removes the restriction of the final modifier. Closes GH-11394. --- NEWS | 3 +++ UPGRADING | 1 + Zend/tests/traits/language019.phpt | 5 ++++- Zend/tests/traits/language020.phpt | 21 +++++++++++++++++++++ Zend/zend_compile.c | 2 -- 5 files changed, 29 insertions(+), 3 deletions(-) create mode 100644 Zend/tests/traits/language020.phpt diff --git a/NEWS b/NEWS index 8cf60aeedf885..b86e31ec70fa1 100644 --- a/NEWS +++ b/NEWS @@ -2,6 +2,9 @@ PHP NEWS ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| ?? ??? ????, PHP 8.3.0alpha2 +- Core: + . Fix GH-11388 (Allow "final" modifier when importing a method from a trait). + (nielsdos) 08 Jun 2023, PHP 8.3.0alpha1 diff --git a/UPGRADING b/UPGRADING index 643c8d68ab901..1320b919206c9 100644 --- a/UPGRADING +++ b/UPGRADING @@ -63,6 +63,7 @@ PHP 8.3 UPGRADE NOTES . Class, interface, trait, and enum constants now support type declarations. RFC: https://wiki.php.net/rfc/typed_class_constants . Closures created from magic methods can now accept named arguments. + . The final modifier may now be used when using a method from a trait. - Posix . posix_getrlimit() now takes an optional $res parameter to allow fetching a diff --git a/Zend/tests/traits/language019.phpt b/Zend/tests/traits/language019.phpt index 299f661db9b38..e64ec08909f5d 100644 --- a/Zend/tests/traits/language019.phpt +++ b/Zend/tests/traits/language019.phpt @@ -10,6 +10,9 @@ class C1 { T1::foo as final; } } +class C2 extends C1 { + public function foo() {} +} ?> --EXPECTF-- -Fatal error: Cannot use "final" as method modifier in trait alias in %s on line %d +Fatal error: Cannot override final method C1::foo() in %s on line %d diff --git a/Zend/tests/traits/language020.phpt b/Zend/tests/traits/language020.phpt new file mode 100644 index 0000000000000..bbfa5c82652c2 --- /dev/null +++ b/Zend/tests/traits/language020.phpt @@ -0,0 +1,21 @@ +--TEST-- +final alias - positive test variation +--FILE-- +foo(); +?> +--EXPECT-- +Done diff --git a/Zend/zend_compile.c b/Zend/zend_compile.c index 5984206a8b136..b6a4840ab558d 100644 --- a/Zend/zend_compile.c +++ b/Zend/zend_compile.c @@ -7727,8 +7727,6 @@ static void zend_check_trait_alias_modifiers(uint32_t attr) /* {{{ */ zend_error_noreturn(E_COMPILE_ERROR, "Cannot use \"static\" as method modifier in trait alias"); } else if (attr & ZEND_ACC_ABSTRACT) { zend_error_noreturn(E_COMPILE_ERROR, "Cannot use \"abstract\" as method modifier in trait alias"); - } else if (attr & ZEND_ACC_FINAL) { - zend_error_noreturn(E_COMPILE_ERROR, "Cannot use \"final\" as method modifier in trait alias"); } } /* }}} */ From 06d68738b78bd7a469931ca035f3dd0cce805623 Mon Sep 17 00:00:00 2001 From: Dmitry Stogov Date: Thu, 8 Jun 2023 14:55:18 +0300 Subject: [PATCH 148/168] Keep consistent EG(current_execute_data) after return from generator (#11380) --- Zend/zend_vm_def.h | 4 ++++ Zend/zend_vm_execute.h | 12 ++++++++++++ 2 files changed, 16 insertions(+) diff --git a/Zend/zend_vm_def.h b/Zend/zend_vm_def.h index a2db0e30da7ac..646dab4ae685a 100644 --- a/Zend/zend_vm_def.h +++ b/Zend/zend_vm_def.h @@ -4549,6 +4549,8 @@ ZEND_VM_HANDLER(161, ZEND_GENERATOR_RETURN, CONST|TMP|VAR|CV, ANY, SPEC(OBSERVER ZEND_OBSERVER_FCALL_END(generator->execute_data, &generator->retval); + EG(current_execute_data) = EX(prev_execute_data); + /* Close the generator to free up resources */ zend_generator_close(generator, 1); @@ -7837,6 +7839,7 @@ ZEND_VM_HELPER(zend_dispatch_try_catch_finally_helper, ANY, ANY, uint32_t try_ca cleanup_live_vars(execute_data, op_num, 0); if (UNEXPECTED((EX_CALL_INFO() & ZEND_CALL_GENERATOR) != 0)) { zend_generator *generator = zend_get_running_generator(EXECUTE_DATA_C); + EG(current_execute_data) = EX(prev_execute_data); zend_generator_close(generator, 1); ZEND_VM_RETURN(); } else { @@ -7930,6 +7933,7 @@ ZEND_VM_HANDLER(150, ZEND_USER_OPCODE, ANY, ANY) case ZEND_USER_OPCODE_RETURN: if (UNEXPECTED((EX_CALL_INFO() & ZEND_CALL_GENERATOR) != 0)) { zend_generator *generator = zend_get_running_generator(EXECUTE_DATA_C); + EG(current_execute_data) = EX(prev_execute_data); zend_generator_close(generator, 1); ZEND_VM_RETURN(); } else { diff --git a/Zend/zend_vm_execute.h b/Zend/zend_vm_execute.h index 388d19e3d692b..5675f89412159 100644 --- a/Zend/zend_vm_execute.h +++ b/Zend/zend_vm_execute.h @@ -3061,6 +3061,7 @@ static zend_never_inline ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL zend_dispatch_try cleanup_live_vars(execute_data, op_num, 0); if (UNEXPECTED((EX_CALL_INFO() & ZEND_CALL_GENERATOR) != 0)) { zend_generator *generator = zend_get_running_generator(EXECUTE_DATA_C); + EG(current_execute_data) = EX(prev_execute_data); zend_generator_close(generator, 1); ZEND_VM_RETURN(); } else { @@ -3154,6 +3155,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_USER_OPCODE_SPEC_HANDLER(ZEND_ case ZEND_USER_OPCODE_RETURN: if (UNEXPECTED((EX_CALL_INFO() & ZEND_CALL_GENERATOR) != 0)) { zend_generator *generator = zend_get_running_generator(EXECUTE_DATA_C); + EG(current_execute_data) = EX(prev_execute_data); zend_generator_close(generator, 1); ZEND_VM_RETURN(); } else { @@ -4517,6 +4519,8 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_GENERATOR_RETURN_SPEC_CONST_HA } } + EG(current_execute_data) = EX(prev_execute_data); + /* Close the generator to free up resources */ zend_generator_close(generator, 1); @@ -4562,6 +4566,8 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_GENERATOR_RETURN_SPEC_OBSERVER zend_observer_fcall_end(generator->execute_data, &generator->retval); + EG(current_execute_data) = EX(prev_execute_data); + /* Close the generator to free up resources */ zend_generator_close(generator, 1); @@ -18954,6 +18960,8 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_GENERATOR_RETURN_SPEC_TMP_HAND } } + EG(current_execute_data) = EX(prev_execute_data); + /* Close the generator to free up resources */ zend_generator_close(generator, 1); @@ -21612,6 +21620,8 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_GENERATOR_RETURN_SPEC_VAR_HAND } } + EG(current_execute_data) = EX(prev_execute_data); + /* Close the generator to free up resources */ zend_generator_close(generator, 1); @@ -38480,6 +38490,8 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_GENERATOR_RETURN_SPEC_CV_HANDL } } + EG(current_execute_data) = EX(prev_execute_data); + /* Close the generator to free up resources */ zend_generator_close(generator, 1); From d5ad75108e8e0360f4c661d8f2041673118636d6 Mon Sep 17 00:00:00 2001 From: George Peter Banyard Date: Thu, 8 Jun 2023 13:03:29 +0100 Subject: [PATCH 149/168] More usage of known zend_str instead of C string (#11381) --- Zend/zend_API.c | 4 ++-- Zend/zend_ast.c | 2 +- Zend/zend_attributes.c | 2 +- Zend/zend_builtin_functions.c | 2 +- Zend/zend_compile.c | 20 ++++++++-------- Zend/zend_constants.c | 4 ++-- Zend/zend_enum.c | 4 ++-- Zend/zend_interfaces.c | 4 ++-- ext/curl/interface.c | 2 +- ext/dom/node.c | 3 ++- ext/ftp/ftp.c | 2 +- ext/iconv/iconv.c | 2 +- ext/ldap/ldap.c | 5 ++-- ext/opcache/jit/zend_jit.c | 2 +- ext/opcache/zend_persist.c | 2 +- ext/pdo_sqlite/sqlite_statement.c | 8 +++---- ext/pgsql/pgsql.c | 2 +- ext/reflection/php_reflection.c | 4 ++-- ext/session/session.c | 4 ++-- ext/simplexml/simplexml.c | 3 ++- ext/soap/soap.c | 10 ++++---- ext/sodium/libsodium.c | 4 ++-- ext/spl/spl_array.c | 3 ++- ext/spl/spl_dllist.c | 3 ++- ext/spl/spl_fixedarray.c | 3 ++- ext/spl/spl_heap.c | 3 ++- ext/standard/array.c | 38 +++++++++++++++---------------- ext/standard/filestat.c | 2 +- ext/standard/proc_open.c | 4 ++-- ext/standard/type.c | 22 +++++++++--------- ext/xml/xml.c | 4 ++-- main/main.c | 2 +- sapi/cli/php_cli.c | 2 +- sapi/cli/php_cli_server.c | 2 +- sapi/fpm/fpm/fpm_conf.c | 2 +- sapi/phpdbg/phpdbg_frame.c | 14 ++++++------ sapi/phpdbg/phpdbg_prompt.c | 12 +++++----- sapi/phpdbg/phpdbg_watch.c | 2 +- 38 files changed, 110 insertions(+), 103 deletions(-) diff --git a/Zend/zend_API.c b/Zend/zend_API.c index 2bd236a26262e..897201c7dbde4 100644 --- a/Zend/zend_API.c +++ b/Zend/zend_API.c @@ -3529,7 +3529,7 @@ static bool zend_is_callable_check_class(zend_string *name, zend_class_entry *sc *strict_class = 1; ret = 1; } - } else if (zend_string_equals_literal(lcname, "static")) { + } else if (zend_string_equals(lcname, ZSTR_KNOWN(ZEND_STR_STATIC))) { zend_class_entry *called_scope = zend_get_called_scope(frame); if (!called_scope) { @@ -4560,7 +4560,7 @@ ZEND_API zend_class_constant *zend_declare_typed_class_constant(zend_class_entry } } - if (zend_string_equals_literal_ci(name, "class")) { + if (zend_string_equals_ci(name, ZSTR_KNOWN(ZEND_STR_CLASS))) { zend_error_noreturn(ce->type == ZEND_INTERNAL_CLASS ? E_CORE_ERROR : E_COMPILE_ERROR, "A class constant must not be called 'class'; it is reserved for class name fetching"); } diff --git a/Zend/zend_ast.c b/Zend/zend_ast.c index 954c9958c534d..525d9dfe9a742 100644 --- a/Zend/zend_ast.c +++ b/Zend/zend_ast.c @@ -101,7 +101,7 @@ ZEND_API zend_ast * ZEND_FASTCALL zend_ast_create_constant(zend_string *name, ze ZEND_API zend_ast * ZEND_FASTCALL zend_ast_create_class_const_or_name(zend_ast *class_name, zend_ast *name) { zend_string *name_str = zend_ast_get_str(name); - if (zend_string_equals_literal_ci(name_str, "class")) { + if (zend_string_equals_ci(name_str, ZSTR_KNOWN(ZEND_STR_CLASS))) { zend_string_release(name_str); return zend_ast_create(ZEND_AST_CLASS_NAME, class_name); } else { diff --git a/Zend/zend_attributes.c b/Zend/zend_attributes.c index 2dbcb47392e9a..83c3c928c56e8 100644 --- a/Zend/zend_attributes.c +++ b/Zend/zend_attributes.c @@ -114,7 +114,7 @@ ZEND_METHOD(SensitiveParameterValue, __construct) Z_PARAM_ZVAL(value) ZEND_PARSE_PARAMETERS_END(); - zend_update_property(zend_ce_sensitive_parameter_value, Z_OBJ_P(ZEND_THIS), "value", strlen("value"), value); + zend_update_property_ex(zend_ce_sensitive_parameter_value, Z_OBJ_P(ZEND_THIS), ZSTR_KNOWN(ZEND_STR_VALUE), value); } ZEND_METHOD(SensitiveParameterValue, getValue) diff --git a/Zend/zend_builtin_functions.c b/Zend/zend_builtin_functions.c index 047edfb83b3cc..acf69536d4593 100644 --- a/Zend/zend_builtin_functions.c +++ b/Zend/zend_builtin_functions.c @@ -1322,7 +1322,7 @@ ZEND_FUNCTION(get_defined_functions) } ZEND_HASH_FOREACH_END(); zend_hash_str_add_new(Z_ARRVAL_P(return_value), "internal", sizeof("internal")-1, &internal); - zend_hash_str_add_new(Z_ARRVAL_P(return_value), "user", sizeof("user")-1, &user); + zend_hash_add_new(Z_ARRVAL_P(return_value), ZSTR_KNOWN(ZEND_STR_USER), &user); } /* }}} */ diff --git a/Zend/zend_compile.c b/Zend/zend_compile.c index b6a4840ab558d..0b54823d2026c 100644 --- a/Zend/zend_compile.c +++ b/Zend/zend_compile.c @@ -1429,7 +1429,7 @@ ZEND_API zend_string *zend_type_to_string(zend_type type) { } static bool is_generator_compatible_class_type(zend_string *name) { - return zend_string_equals_literal_ci(name, "Traversable") + return zend_string_equals_ci(name, ZSTR_KNOWN(ZEND_STR_TRAVERSABLE)) || zend_string_equals_literal_ci(name, "Iterator") || zend_string_equals_literal_ci(name, "Generator"); } @@ -1617,7 +1617,7 @@ uint32_t zend_get_class_fetch_type(const zend_string *name) /* {{{ */ return ZEND_FETCH_CLASS_SELF; } else if (zend_string_equals_literal_ci(name, "parent")) { return ZEND_FETCH_CLASS_PARENT; - } else if (zend_string_equals_literal_ci(name, "static")) { + } else if (zend_string_equals_ci(name, ZSTR_KNOWN(ZEND_STR_STATIC))) { return ZEND_FETCH_CLASS_STATIC; } else { return ZEND_FETCH_CLASS_DEFAULT; @@ -2821,7 +2821,7 @@ static bool is_this_fetch(zend_ast *ast) /* {{{ */ { if (ast->kind == ZEND_AST_VAR && ast->child[0]->kind == ZEND_AST_ZVAL) { zval *name = zend_ast_get_zval(ast->child[0]); - return Z_TYPE_P(name) == IS_STRING && zend_string_equals_literal(Z_STR_P(name), "this"); + return Z_TYPE_P(name) == IS_STRING && zend_string_equals(Z_STR_P(name), ZSTR_KNOWN(ZEND_STR_THIS)); } return 0; @@ -4522,7 +4522,7 @@ static zend_result zend_try_compile_special_func(znode *result, zend_string *lcn return zend_compile_func_cuf(result, args, lcname); } else if (zend_string_equals_literal(lcname, "in_array")) { return zend_compile_func_in_array(result, args); - } else if (zend_string_equals_literal(lcname, "count") + } else if (zend_string_equals(lcname, ZSTR_KNOWN(ZEND_STR_COUNT)) || zend_string_equals_literal(lcname, "sizeof")) { return zend_compile_func_count(result, args, lcname); } else if (zend_string_equals_literal(lcname, "get_class")) { @@ -4872,7 +4872,7 @@ static void zend_compile_static_var_common(zend_string *var_name, zval *value, u value = zend_hash_update(CG(active_op_array)->static_variables, var_name, value); - if (zend_string_equals_literal(var_name, "this")) { + if (zend_string_equals(var_name, ZSTR_KNOWN(ZEND_STR_THIS))) { zend_error_noreturn(E_COMPILE_ERROR, "Cannot use $this as static variable"); } @@ -4888,7 +4888,7 @@ static void zend_compile_static_var(zend_ast *ast) /* {{{ */ zend_ast *var_ast = ast->child[0]; zend_string *var_name = zend_ast_get_str(var_ast); - if (zend_string_equals_literal(var_name, "this")) { + if (zend_string_equals(var_name, ZSTR_KNOWN(ZEND_STR_THIS))) { zend_error_noreturn(E_COMPILE_ERROR, "Cannot use $this as static variable"); } @@ -6089,7 +6089,7 @@ static void zend_compile_try(zend_ast *ast) /* {{{ */ zend_resolve_class_name_ast(class_ast)); opline->extended_value = zend_alloc_cache_slot(); - if (var_name && zend_string_equals_literal(var_name, "this")) { + if (var_name && zend_string_equals(var_name, ZSTR_KNOWN(ZEND_STR_THIS))) { zend_error_noreturn(E_COMPILE_ERROR, "Cannot re-assign $this"); } @@ -6925,7 +6925,7 @@ static void zend_compile_params(zend_ast *ast, zend_ast *return_type_ast, uint32 if (EX_VAR_TO_NUM(var_node.u.op.var) != i) { zend_error_noreturn(E_COMPILE_ERROR, "Redefinition of parameter $%s", ZSTR_VAL(name)); - } else if (zend_string_equals_literal(name, "this")) { + } else if (zend_string_equals(name, ZSTR_KNOWN(ZEND_STR_THIS))) { zend_error_noreturn(E_COMPILE_ERROR, "Cannot use $this as parameter"); } @@ -7152,7 +7152,7 @@ static void zend_compile_closure_binding(znode *closure, zend_op_array *op_array zend_op *opline; zval *value; - if (zend_string_equals_literal(var_name, "this")) { + if (zend_string_equals(var_name, ZSTR_KNOWN(ZEND_STR_THIS))) { zend_error_noreturn(E_COMPILE_ERROR, "Cannot use $this as lexical variable"); } @@ -7196,7 +7196,7 @@ static void find_implicit_binds_recursively(closure_info *info, zend_ast *ast) { return; } - if (zend_string_equals_literal(name, "this")) { + if (zend_string_equals(name, ZSTR_KNOWN(ZEND_STR_THIS))) { /* $this does not need to be explicitly imported. */ return; } diff --git a/Zend/zend_constants.c b/Zend/zend_constants.c index 854f9c2116ee2..edc9fc0b82d44 100644 --- a/Zend/zend_constants.c +++ b/Zend/zend_constants.c @@ -314,7 +314,7 @@ ZEND_API zval *zend_get_class_constant_ex(zend_string *class_name, zend_string * } else { ce = scope->parent; } - } else if (zend_string_equals_literal_ci(class_name, "static")) { + } else if (zend_string_equals_ci(class_name, ZSTR_KNOWN(ZEND_STR_STATIC))) { ce = zend_get_called_scope(EG(current_execute_data)); if (UNEXPECTED(!ce)) { zend_throw_error(NULL, "Cannot access \"static\" when no class scope is active"); @@ -419,7 +419,7 @@ ZEND_API zval *zend_get_constant_ex(zend_string *cname, zend_class_entry *scope, } else { ce = scope->parent; } - } else if (zend_string_equals_literal_ci(class_name, "static")) { + } else if (zend_string_equals_ci(class_name, ZSTR_KNOWN(ZEND_STR_STATIC))) { ce = zend_get_called_scope(EG(current_execute_data)); if (UNEXPECTED(!ce)) { zend_throw_error(NULL, "Cannot access \"static\" when no class scope is active"); diff --git a/Zend/zend_enum.c b/Zend/zend_enum.c index 770beb7320ddb..2e85bb08a7245 100644 --- a/Zend/zend_enum.c +++ b/Zend/zend_enum.c @@ -62,12 +62,12 @@ static void zend_verify_enum_properties(zend_class_entry *ce) zend_property_info *property_info; ZEND_HASH_MAP_FOREACH_PTR(&ce->properties_info, property_info) { - if (zend_string_equals_literal(property_info->name, "name")) { + if (zend_string_equals(property_info->name, ZSTR_KNOWN(ZEND_STR_NAME))) { continue; } if ( ce->enum_backing_type != IS_UNDEF - && zend_string_equals_literal(property_info->name, "value") + && zend_string_equals(property_info->name, ZSTR_KNOWN(ZEND_STR_VALUE)) ) { continue; } diff --git a/Zend/zend_interfaces.c b/Zend/zend_interfaces.c index 5d2f7d0ffc4a9..14593264a4803 100644 --- a/Zend/zend_interfaces.c +++ b/Zend/zend_interfaces.c @@ -344,8 +344,8 @@ static int zend_implement_iterator(zend_class_entry *interface, zend_class_entry &class_type->function_table, "rewind", sizeof("rewind") - 1); funcs_ptr->zf_valid = zend_hash_str_find_ptr( &class_type->function_table, "valid", sizeof("valid") - 1); - funcs_ptr->zf_key = zend_hash_str_find_ptr( - &class_type->function_table, "key", sizeof("key") - 1); + funcs_ptr->zf_key = zend_hash_find_ptr( + &class_type->function_table, ZSTR_KNOWN(ZEND_STR_KEY)); funcs_ptr->zf_current = zend_hash_str_find_ptr( &class_type->function_table, "current", sizeof("current") - 1); funcs_ptr->zf_next = zend_hash_str_find_ptr( diff --git a/ext/curl/interface.c b/ext/curl/interface.c index 3aeae5c575f82..e932cd0b0f7ab 100644 --- a/ext/curl/interface.c +++ b/ext/curl/interface.c @@ -1377,7 +1377,7 @@ static inline zend_result build_mime_structure_from_hash(php_curl *ch, zval *zpo curl_seek_callback seekfunc = seek_cb; #endif - prop = zend_read_property(curl_CURLFile_class, Z_OBJ_P(current), "name", sizeof("name")-1, 0, &rv); + prop = zend_read_property_ex(curl_CURLFile_class, Z_OBJ_P(current), ZSTR_KNOWN(ZEND_STR_NAME), /* silent */ false, &rv); ZVAL_DEREF(prop); if (Z_TYPE_P(prop) != IS_STRING) { php_error_docref(NULL, E_WARNING, "Invalid filename for key %s", ZSTR_VAL(string_key)); diff --git a/ext/dom/node.c b/ext/dom/node.c index 8aad9de12c8e8..29262f8579146 100644 --- a/ext/dom/node.c +++ b/ext/dom/node.c @@ -1622,7 +1622,8 @@ static void dom_canonicalization(INTERNAL_FUNCTION_PARAMETERS, int mode) /* {{{ zval *tmp; char *xquery; - tmp = zend_hash_str_find(ht, "query", sizeof("query")-1); + /* Find "query" key */ + tmp = zend_hash_find(ht, ZSTR_KNOWN(ZEND_STR_QUERY)); if (!tmp) { /* if mode == 0 then $xpath arg is 3, if mode == 1 then $xpath is 4 */ zend_argument_value_error(3 + mode, "must have a \"query\" key"); diff --git a/ext/ftp/ftp.c b/ext/ftp/ftp.c index 358f4fe7bf86c..2b8dca47fab01 100644 --- a/ext/ftp/ftp.c +++ b/ext/ftp/ftp.c @@ -708,7 +708,7 @@ ftp_mlsd_parse_line(HashTable *ht, const char *input) { /* Extract pathname */ ZVAL_STRINGL(&zstr, sp + 1, end - sp - 1); - zend_hash_str_update(ht, "name", sizeof("name")-1, &zstr); + zend_hash_update(ht, ZSTR_KNOWN(ZEND_STR_NAME), &zstr); end = sp; while (input < end) { diff --git a/ext/iconv/iconv.c b/ext/iconv/iconv.c index c2ed3f258bc88..93e83a95b5f1c 100644 --- a/ext/iconv/iconv.c +++ b/ext/iconv/iconv.c @@ -1978,7 +1978,7 @@ PHP_FUNCTION(iconv_mime_encode) if (pref != NULL) { zval *pzval; - if ((pzval = zend_hash_str_find_deref(Z_ARRVAL_P(pref), "scheme", sizeof("scheme") - 1)) != NULL) { + if ((pzval = zend_hash_find_deref(Z_ARRVAL_P(pref), ZSTR_KNOWN(ZEND_STR_SCHEME))) != NULL) { if (Z_TYPE_P(pzval) == IS_STRING && Z_STRLEN_P(pzval) > 0) { switch (Z_STRVAL_P(pzval)[0]) { case 'B': case 'b': diff --git a/ext/ldap/ldap.c b/ext/ldap/ldap.c index 715bde8bd13e3..8a60df4edf7b6 100644 --- a/ext/ldap/ldap.c +++ b/ext/ldap/ldap.c @@ -412,7 +412,7 @@ static int _php_ldap_control_from_array(LDAP *ld, LDAPControl** ctrl, zval* arra struct berval control_value = { 0L, NULL }; int control_value_alloc = 0; - if ((val = zend_hash_str_find(Z_ARRVAL_P(array), "value", sizeof("value") - 1)) != NULL) { + if ((val = zend_hash_find(Z_ARRVAL_P(array), ZSTR_KNOWN(ZEND_STR_VALUE))) != NULL) { if (Z_TYPE_P(val) != IS_ARRAY) { tmpstring = zval_get_string(val); if (EG(exception)) { @@ -634,7 +634,8 @@ static int _php_ldap_control_from_array(LDAP *ld, LDAPControl** ctrl, zval* arra } else if ((tmp = zend_hash_str_find(Z_ARRVAL_P(val), "offset", sizeof("offset") - 1)) != NULL) { vlvInfo.ldvlv_attrvalue = NULL; vlvInfo.ldvlv_offset = zval_get_long(tmp); - if ((tmp = zend_hash_str_find(Z_ARRVAL_P(val), "count", sizeof("count") - 1)) != NULL) { + /* Find "count" key */ + if ((tmp = zend_hash_find(Z_ARRVAL_P(val), ZSTR_KNOWN(ZEND_STR_COUNT))) != NULL) { vlvInfo.ldvlv_count = zval_get_long(tmp); } else { rc = -1; diff --git a/ext/opcache/jit/zend_jit.c b/ext/opcache/jit/zend_jit.c index f168ec190eee6..1df2b9af92b04 100644 --- a/ext/opcache/jit/zend_jit.c +++ b/ext/opcache/jit/zend_jit.c @@ -4822,7 +4822,7 @@ ZEND_EXT_API int zend_jit_config(zend_string *jit, int stage) JIT_G(trigger) = ZEND_JIT_ON_HOT_TRACE; JIT_G(opt_flags) = ZEND_JIT_REG_ALLOC_GLOBAL | ZEND_JIT_CPU_AVX; return SUCCESS; - } else if (zend_string_equals_literal_ci(jit, "function")) { + } else if (zend_string_equals_ci(jit, ZSTR_KNOWN(ZEND_STR_FUNCTION))) { JIT_G(enabled) = 1; JIT_G(on) = 1; JIT_G(opt_level) = ZEND_JIT_LEVEL_OPT_SCRIPT; diff --git a/ext/opcache/zend_persist.c b/ext/opcache/zend_persist.c index 791032f0e1f26..c8330c1e79057 100644 --- a/ext/opcache/zend_persist.c +++ b/ext/opcache/zend_persist.c @@ -1121,7 +1121,7 @@ void zend_update_parent_ce(zend_class_entry *ce) if (zend_class_implements_interface(ce, zend_ce_iterator)) { ce->iterator_funcs_ptr->zf_rewind = zend_hash_str_find_ptr(&ce->function_table, "rewind", sizeof("rewind") - 1); ce->iterator_funcs_ptr->zf_valid = zend_hash_str_find_ptr(&ce->function_table, "valid", sizeof("valid") - 1); - ce->iterator_funcs_ptr->zf_key = zend_hash_str_find_ptr(&ce->function_table, "key", sizeof("key") - 1); + ce->iterator_funcs_ptr->zf_key = zend_hash_find_ptr(&ce->function_table, ZSTR_KNOWN(ZEND_STR_KEY)); ce->iterator_funcs_ptr->zf_current = zend_hash_str_find_ptr(&ce->function_table, "current", sizeof("current") - 1); ce->iterator_funcs_ptr->zf_next = zend_hash_str_find_ptr(&ce->function_table, "next", sizeof("next") - 1); } diff --git a/ext/pdo_sqlite/sqlite_statement.c b/ext/pdo_sqlite/sqlite_statement.c index 90de059a3b7ac..c6b907f6fc22f 100644 --- a/ext/pdo_sqlite/sqlite_statement.c +++ b/ext/pdo_sqlite/sqlite_statement.c @@ -319,12 +319,12 @@ static int pdo_sqlite_stmt_col_meta(pdo_stmt_t *stmt, zend_long colno, zval *ret switch (sqlite3_column_type(S->stmt, colno)) { case SQLITE_NULL: - add_assoc_string(return_value, "native_type", "null"); + add_assoc_str(return_value, "native_type", ZSTR_KNOWN(ZEND_STR_NULL_LOWERCASE)); add_assoc_long(return_value, "pdo_type", PDO_PARAM_NULL); break; case SQLITE_FLOAT: - add_assoc_string(return_value, "native_type", "double"); + add_assoc_str(return_value, "native_type", ZSTR_KNOWN(ZEND_STR_DOUBLE)); add_assoc_long(return_value, "pdo_type", PDO_PARAM_STR); break; @@ -333,12 +333,12 @@ static int pdo_sqlite_stmt_col_meta(pdo_stmt_t *stmt, zend_long colno, zval *ret /* TODO Check this is correct */ ZEND_FALLTHROUGH; case SQLITE_TEXT: - add_assoc_string(return_value, "native_type", "string"); + add_assoc_str(return_value, "native_type", ZSTR_KNOWN(ZEND_STR_STRING)); add_assoc_long(return_value, "pdo_type", PDO_PARAM_STR); break; case SQLITE_INTEGER: - add_assoc_string(return_value, "native_type", "integer"); + add_assoc_str(return_value, "native_type", ZSTR_KNOWN(ZEND_STR_INTEGER)); add_assoc_long(return_value, "pdo_type", PDO_PARAM_INT); break; } diff --git a/ext/pgsql/pgsql.c b/ext/pgsql/pgsql.c index d7215f3a9ba12..e62f8f5191073 100644 --- a/ext/pgsql/pgsql.c +++ b/ext/pgsql/pgsql.c @@ -4296,7 +4296,7 @@ static php_pgsql_data_type php_pgsql_get_data_type(const zend_string *type_name) /* This is stupid way to do. I'll fix it when I decide how to support user defined types. (Yasuo) */ /* boolean */ - if (zend_string_equals_literal(type_name, "bool")|| zend_string_equals_literal(type_name, "boolean")) + if (zend_string_equals(type_name, ZSTR_KNOWN(ZEND_STR_BOOL)) ||zend_string_equals(type_name, ZSTR_KNOWN(ZEND_STR_BOOLEAN))) return PG_BOOL; /* object id */ if (zend_string_equals_literal(type_name, "oid")) diff --git a/ext/reflection/php_reflection.c b/ext/reflection/php_reflection.c index 6eae95ba47fbe..7d24f5b4b2291 100644 --- a/ext/reflection/php_reflection.c +++ b/ext/reflection/php_reflection.c @@ -1560,7 +1560,7 @@ ZEND_METHOD(Reflection, getModifierNames) } if (modifiers & ZEND_ACC_STATIC) { - add_next_index_stringl(return_value, "static", sizeof("static")-1); + add_next_index_str(return_value, ZSTR_KNOWN(ZEND_STR_STATIC)); } if (modifiers & (ZEND_ACC_READONLY | ZEND_ACC_READONLY_CLASS)) { @@ -7137,7 +7137,7 @@ ZEND_METHOD(ReflectionFiber, getCallable) static zval *_reflection_write_property(zend_object *object, zend_string *name, zval *value, void **cache_slot) { if (zend_hash_exists(&object->ce->properties_info, name) - && (zend_string_equals_literal(name, "name") || zend_string_equals_literal(name, "class"))) + && (zend_string_equals(name, ZSTR_KNOWN(ZEND_STR_NAME)) || zend_string_equals(name, ZSTR_KNOWN(ZEND_STR_CLASS)))) { zend_throw_exception_ex(reflection_exception_ptr, 0, "Cannot set read-only property %s::$%s", ZSTR_VAL(object->ce->name), ZSTR_VAL(name)); diff --git a/ext/session/session.c b/ext/session/session.c index c3ee25313fbbe..05898594547f9 100644 --- a/ext/session/session.c +++ b/ext/session/session.c @@ -1927,7 +1927,7 @@ PHP_FUNCTION(session_module_name) } if (name) { - if (zend_string_equals_literal_ci(name, "user")) { + if (zend_string_equals_ci(name, ZSTR_KNOWN(ZEND_STR_USER))) { zend_argument_value_error(1, "cannot be \"user\""); RETURN_THROWS(); } @@ -1967,7 +1967,7 @@ static inline void set_user_save_handler_ini(void) { zend_string *ini_name, *ini_val; ini_name = ZSTR_INIT_LITERAL("session.save_handler", 0); - ini_val = ZSTR_INIT_LITERAL("user", 0); + ini_val = ZSTR_KNOWN(ZEND_STR_USER); PS(set_handler) = 1; zend_alter_ini_entry(ini_name, ini_val, PHP_INI_USER, PHP_INI_STAGE_RUNTIME); PS(set_handler) = 0; diff --git a/ext/simplexml/simplexml.c b/ext/simplexml/simplexml.c index 7d9bed8ad6c40..e219d7d07ef75 100644 --- a/ext/simplexml/simplexml.c +++ b/ext/simplexml/simplexml.c @@ -2191,7 +2191,8 @@ static zend_function* php_sxe_find_fptr_count(zend_class_entry *ce) } if (inherited) { - fptr_count = zend_hash_str_find_ptr(&ce->function_table, "count", sizeof("count") - 1); + /* Find count() method */ + fptr_count = zend_hash_find_ptr(&ce->function_table, ZSTR_KNOWN(ZEND_STR_COUNT)); if (fptr_count->common.scope == parent) { fptr_count = NULL; } diff --git a/ext/soap/soap.c b/ext/soap/soap.c index fea43f2f82146..f750e440f31c9 100644 --- a/ext/soap/soap.c +++ b/ext/soap/soap.c @@ -584,8 +584,8 @@ PHP_METHOD(SoapFault, __toString) this_ptr = ZEND_THIS; faultcode = zend_read_property(soap_fault_class_entry, Z_OBJ_P(this_ptr), "faultcode", sizeof("faultcode")-1, 1, &rv1); faultstring = zend_read_property(soap_fault_class_entry, Z_OBJ_P(this_ptr), "faultstring", sizeof("faultstring")-1, 1, &rv2); - file = zend_read_property(soap_fault_class_entry, Z_OBJ_P(this_ptr), "file", sizeof("file")-1, 1, &rv3); - line = zend_read_property(soap_fault_class_entry, Z_OBJ_P(this_ptr), "line", sizeof("line")-1, 1, &rv4); + file = zend_read_property_ex(soap_fault_class_entry, Z_OBJ_P(this_ptr), ZSTR_KNOWN(ZEND_STR_FILE), /* silent */ true, &rv3); + line = zend_read_property_ex(soap_fault_class_entry, Z_OBJ_P(this_ptr), ZSTR_KNOWN(ZEND_STR_LINE), /* silent */ true, &rv4); zend_call_method_with_0_params( Z_OBJ_P(ZEND_THIS), Z_OBJCE_P(ZEND_THIS), NULL, "gettraceasstring", &trace); @@ -1107,7 +1107,7 @@ static void _soap_server_exception(soapServicePtr service, sdlFunctionPtr functi } else if (instanceof_function(Z_OBJCE(exception_object), zend_ce_error)) { if (service->send_errors) { zval rv; - zend_string *msg = zval_get_string(zend_read_property(zend_ce_error, Z_OBJ(exception_object), "message", sizeof("message")-1, 0, &rv)); + zend_string *msg = zval_get_string(zend_read_property_ex(zend_ce_error, Z_OBJ(exception_object), ZSTR_KNOWN(ZEND_STR_MESSAGE), /* silent */ false, &rv)); add_soap_fault_ex(&exception_object, this_ptr, "Server", ZSTR_VAL(msg), NULL, NULL); zend_string_release_ex(msg, 0); } else { @@ -1943,7 +1943,7 @@ PHP_METHOD(SoapClient, __construct) php_stream_context_set_option(context, "ssl", "passphrase", tmp); } } - if ((tmp = zend_hash_str_find(ht, "trace", sizeof("trace")-1)) != NULL && + if ((tmp = zend_hash_find(ht, ZSTR_KNOWN(ZEND_STR_TRACE))) != NULL && (Z_TYPE_P(tmp) == IS_TRUE || (Z_TYPE_P(tmp) == IS_LONG && Z_LVAL_P(tmp) == 1))) { ZVAL_TRUE(Z_CLIENT_TRACE_P(this_ptr)); @@ -2747,7 +2747,7 @@ static void set_soap_fault(zval *obj, char *fault_code_ns, char *fault_code, cha } ZVAL_STRING(Z_FAULT_STRING_P(obj), fault_string ? fault_string : ""); - zend_update_property_string(zend_ce_exception, Z_OBJ_P(obj), "message", sizeof("message")-1, (fault_string ? fault_string : "")); + zend_update_property_ex(zend_ce_exception, Z_OBJ_P(obj), ZSTR_KNOWN(ZEND_STR_MESSAGE), Z_FAULT_STRING_P(obj)); if (fault_code != NULL) { int soap_version = SOAP_GLOBAL(soap_version); diff --git a/ext/sodium/libsodium.c b/ext/sodium/libsodium.c index 6eb10f7571358..a037c7b680d53 100644 --- a/ext/sodium/libsodium.c +++ b/ext/sodium/libsodium.c @@ -125,12 +125,12 @@ ZEND_GET_MODULE(sodium) /* Remove argument information from backtrace to prevent information leaks */ static void sodium_remove_param_values_from_backtrace(zend_object *obj) { zval rv; - zval *trace = zend_read_property(zend_get_exception_base(obj), obj, "trace", sizeof("trace")-1, 0, &rv); + zval *trace = zend_read_property_ex(zend_get_exception_base(obj), obj, ZSTR_KNOWN(ZEND_STR_TRACE), /* silent */ false, &rv); if (trace && Z_TYPE_P(trace) == IS_ARRAY) { zval *frame; ZEND_HASH_FOREACH_VAL(Z_ARRVAL_P(trace), frame) { if (Z_TYPE_P(frame) == IS_ARRAY) { - zval *args = zend_hash_str_find(Z_ARRVAL_P(frame), "args", sizeof("args")-1); + zval *args = zend_hash_find(Z_ARRVAL_P(frame), ZSTR_KNOWN(ZEND_STR_ARGS)); if (args) { zval_ptr_dtor(args); ZVAL_EMPTY_ARRAY(args); diff --git a/ext/spl/spl_array.c b/ext/spl/spl_array.c index 0373f5a7820ea..2e67b3cfe8c37 100644 --- a/ext/spl/spl_array.c +++ b/ext/spl/spl_array.c @@ -211,7 +211,8 @@ static zend_object *spl_array_object_new_ex(zend_class_entry *class_type, zend_o if (intern->fptr_offset_del->common.scope == parent) { intern->fptr_offset_del = NULL; } - intern->fptr_count = zend_hash_str_find_ptr(&class_type->function_table, "count", sizeof("count") - 1); + /* Find count() method */ + intern->fptr_count = zend_hash_find_ptr(&class_type->function_table, ZSTR_KNOWN(ZEND_STR_COUNT)); if (intern->fptr_count->common.scope == parent) { intern->fptr_count = NULL; } diff --git a/ext/spl/spl_dllist.c b/ext/spl/spl_dllist.c index f1445fa5d905e..74dc7731fd152 100644 --- a/ext/spl/spl_dllist.c +++ b/ext/spl/spl_dllist.c @@ -375,7 +375,8 @@ static zend_object *spl_dllist_object_new_ex(zend_class_entry *class_type, zend_ if (intern->fptr_offset_del->common.scope == parent) { intern->fptr_offset_del = NULL; } - intern->fptr_count = zend_hash_str_find_ptr(&class_type->function_table, "count", sizeof("count") - 1); + /* Find count() method */ + intern->fptr_count = zend_hash_find_ptr(&class_type->function_table, ZSTR_KNOWN(ZEND_STR_COUNT)); if (intern->fptr_count->common.scope == parent) { intern->fptr_count = NULL; } diff --git a/ext/spl/spl_fixedarray.c b/ext/spl/spl_fixedarray.c index bffdbbebcedca..7687544fc6043 100644 --- a/ext/spl/spl_fixedarray.c +++ b/ext/spl/spl_fixedarray.c @@ -282,7 +282,8 @@ static zend_object *spl_fixedarray_object_new_ex(zend_class_entry *class_type, z ZEND_ASSERT(parent); if (UNEXPECTED(inherited)) { - zend_function *fptr_count = zend_hash_str_find_ptr(&class_type->function_table, "count", sizeof("count") - 1); + /* Find count() method */ + zend_function *fptr_count = zend_hash_find_ptr(&class_type->function_table, ZSTR_KNOWN(ZEND_STR_COUNT)); if (fptr_count->common.scope == parent) { fptr_count = NULL; } diff --git a/ext/spl/spl_heap.c b/ext/spl/spl_heap.c index d781ff4ca010d..4f242d3a3c394 100644 --- a/ext/spl/spl_heap.c +++ b/ext/spl/spl_heap.c @@ -451,7 +451,8 @@ static zend_object *spl_heap_object_new_ex(zend_class_entry *class_type, zend_ob if (intern->fptr_cmp->common.scope == parent) { intern->fptr_cmp = NULL; } - intern->fptr_count = zend_hash_str_find_ptr(&class_type->function_table, "count", sizeof("count") - 1); + /* Find count() method */ + intern->fptr_count = zend_hash_find_ptr(&class_type->function_table, ZSTR_KNOWN(ZEND_STR_COUNT)); if (intern->fptr_count->common.scope == parent) { intern->fptr_count = NULL; } diff --git a/ext/standard/array.c b/ext/standard/array.c index c2ae5225be31d..19269c36a9f8c 100644 --- a/ext/standard/array.c +++ b/ext/standard/array.c @@ -1777,7 +1777,7 @@ static zend_long php_extract_ref_if_exists(zend_array *arr, zend_array *symbol_t if (zend_string_equals_literal(var_name, "GLOBALS")) { continue; } - if (zend_string_equals_literal(var_name, "this")) { + if (zend_string_equals(var_name, ZSTR_KNOWN(ZEND_STR_THIS))) { zend_throw_error(NULL, "Cannot re-assign $this"); return -1; } @@ -1823,7 +1823,7 @@ static zend_long php_extract_if_exists(zend_array *arr, zend_array *symbol_table if (zend_string_equals_literal(var_name, "GLOBALS")) { continue; } - if (zend_string_equals_literal(var_name, "this")) { + if (zend_string_equals(var_name, ZSTR_KNOWN(ZEND_STR_THIS))) { zend_throw_error(NULL, "Cannot re-assign $this"); return -1; } @@ -1856,7 +1856,7 @@ static zend_long php_extract_ref_overwrite(zend_array *arr, zend_array *symbol_t if (!php_valid_var_name(ZSTR_VAL(var_name), ZSTR_LEN(var_name))) { continue; } - if (zend_string_equals_literal(var_name, "this")) { + if (zend_string_equals(var_name, ZSTR_KNOWN(ZEND_STR_THIS))) { zend_throw_error(NULL, "Cannot re-assign $this"); return -1; } @@ -1906,7 +1906,7 @@ static zend_long php_extract_overwrite(zend_array *arr, zend_array *symbol_table if (!php_valid_var_name(ZSTR_VAL(var_name), ZSTR_LEN(var_name))) { continue; } - if (zend_string_equals_literal(var_name, "this")) { + if (zend_string_equals(var_name, ZSTR_KNOWN(ZEND_STR_THIS))) { zend_throw_error(NULL, "Cannot re-assign $this"); return -1; } @@ -1965,7 +1965,7 @@ static zend_long php_extract_ref_prefix_if_exists(zend_array *arr, zend_array *s } php_prefix_varname(&final_name, prefix, ZSTR_VAL(var_name), ZSTR_LEN(var_name), 1); if (php_valid_var_name(Z_STRVAL(final_name), Z_STRLEN(final_name))) { - if (zend_string_equals_literal(Z_STR(final_name), "this")) { + if (zend_string_equals(Z_STR(final_name), ZSTR_KNOWN(ZEND_STR_THIS))) { zend_throw_error(NULL, "Cannot re-assign $this"); return -1; } else { @@ -2019,7 +2019,7 @@ static zend_long php_extract_prefix_if_exists(zend_array *arr, zend_array *symbo } php_prefix_varname(&final_name, prefix, ZSTR_VAL(var_name), ZSTR_LEN(var_name), 1); if (php_valid_var_name(Z_STRVAL(final_name), Z_STRLEN(final_name))) { - if (zend_string_equals_literal(Z_STR(final_name), "this")) { + if (zend_string_equals(Z_STR(final_name), ZSTR_KNOWN(ZEND_STR_THIS))) { zend_throw_error(NULL, "Cannot re-assign $this"); return -1; } else { @@ -2082,7 +2082,7 @@ static zend_long php_extract_ref_prefix_same(zend_array *arr, zend_array *symbol prefix: php_prefix_varname(&final_name, prefix, ZSTR_VAL(var_name), ZSTR_LEN(var_name), 1); if (php_valid_var_name(Z_STRVAL(final_name), Z_STRLEN(final_name))) { - if (zend_string_equals_literal(Z_STR(final_name), "this")) { + if (zend_string_equals(Z_STR(final_name), ZSTR_KNOWN(ZEND_STR_THIS))) { zend_throw_error(NULL, "Cannot re-assign $this"); return -1; } else { @@ -2108,7 +2108,7 @@ static zend_long php_extract_ref_prefix_same(zend_array *arr, zend_array *symbol if (!php_valid_var_name(ZSTR_VAL(var_name), ZSTR_LEN(var_name))) { continue; } - if (zend_string_equals_literal(var_name, "this")) { + if (zend_string_equals(var_name, ZSTR_KNOWN(ZEND_STR_THIS))) { goto prefix; } if (Z_ISREF_P(entry)) { @@ -2154,7 +2154,7 @@ static zend_long php_extract_prefix_same(zend_array *arr, zend_array *symbol_tab prefix: php_prefix_varname(&final_name, prefix, ZSTR_VAL(var_name), ZSTR_LEN(var_name), 1); if (php_valid_var_name(Z_STRVAL(final_name), Z_STRLEN(final_name))) { - if (zend_string_equals_literal(Z_STR(final_name), "this")) { + if (zend_string_equals(Z_STR(final_name), ZSTR_KNOWN(ZEND_STR_THIS))) { zend_throw_error(NULL, "Cannot re-assign $this"); return -1; } else { @@ -2180,7 +2180,7 @@ static zend_long php_extract_prefix_same(zend_array *arr, zend_array *symbol_tab if (!php_valid_var_name(ZSTR_VAL(var_name), ZSTR_LEN(var_name))) { continue; } - if (zend_string_equals_literal(var_name, "this")) { + if (zend_string_equals(var_name, ZSTR_KNOWN(ZEND_STR_THIS))) { goto prefix; } ZVAL_DEREF(entry); @@ -2213,7 +2213,7 @@ static zend_long php_extract_ref_prefix_all(zend_array *arr, zend_array *symbol_ zend_string_release_ex(str, 0); } if (php_valid_var_name(Z_STRVAL(final_name), Z_STRLEN(final_name))) { - if (zend_string_equals_literal(Z_STR(final_name), "this")) { + if (zend_string_equals(Z_STR(final_name), ZSTR_KNOWN(ZEND_STR_THIS))) { zend_throw_error(NULL, "Cannot re-assign $this"); return -1; } else { @@ -2260,7 +2260,7 @@ static zend_long php_extract_prefix_all(zend_array *arr, zend_array *symbol_tabl zend_string_release_ex(str, 0); } if (php_valid_var_name(Z_STRVAL(final_name), Z_STRLEN(final_name))) { - if (zend_string_equals_literal(Z_STR(final_name), "this")) { + if (zend_string_equals(Z_STR(final_name), ZSTR_KNOWN(ZEND_STR_THIS))) { zend_throw_error(NULL, "Cannot re-assign $this"); return -1; } else { @@ -2298,7 +2298,7 @@ static zend_long php_extract_ref_prefix_invalid(zend_array *arr, zend_array *sym ZEND_HASH_FOREACH_KEY_VAL(arr, num_key, var_name, entry) { if (var_name) { if (!php_valid_var_name(ZSTR_VAL(var_name), ZSTR_LEN(var_name)) - || zend_string_equals_literal(var_name, "this")) { + || zend_string_equals(var_name, ZSTR_KNOWN(ZEND_STR_THIS))) { php_prefix_varname(&final_name, prefix, ZSTR_VAL(var_name), ZSTR_LEN(var_name), 1); if (!php_valid_var_name(Z_STRVAL(final_name), Z_STRLEN(final_name))) { zval_ptr_dtor_str(&final_name); @@ -2316,7 +2316,7 @@ static zend_long php_extract_ref_prefix_invalid(zend_array *arr, zend_array *sym continue; } } - if (zend_string_equals_literal(Z_STR(final_name), "this")) { + if (zend_string_equals(Z_STR(final_name), ZSTR_KNOWN(ZEND_STR_THIS))) { zend_throw_error(NULL, "Cannot re-assign $this"); return -1; } else { @@ -2353,7 +2353,7 @@ static zend_long php_extract_prefix_invalid(zend_array *arr, zend_array *symbol_ ZEND_HASH_FOREACH_KEY_VAL(arr, num_key, var_name, entry) { if (var_name) { if (!php_valid_var_name(ZSTR_VAL(var_name), ZSTR_LEN(var_name)) - || zend_string_equals_literal(var_name, "this")) { + || zend_string_equals(var_name, ZSTR_KNOWN(ZEND_STR_THIS))) { php_prefix_varname(&final_name, prefix, ZSTR_VAL(var_name), ZSTR_LEN(var_name), 1); if (!php_valid_var_name(Z_STRVAL(final_name), Z_STRLEN(final_name))) { zval_ptr_dtor_str(&final_name); @@ -2371,7 +2371,7 @@ static zend_long php_extract_prefix_invalid(zend_array *arr, zend_array *symbol_ continue; } } - if (zend_string_equals_literal(Z_STR(final_name), "this")) { + if (zend_string_equals(Z_STR(final_name), ZSTR_KNOWN(ZEND_STR_THIS))) { zend_throw_error(NULL, "Cannot re-assign $this"); return -1; } else { @@ -2414,7 +2414,7 @@ static zend_long php_extract_ref_skip(zend_array *arr, zend_array *symbol_table) if (!php_valid_var_name(ZSTR_VAL(var_name), ZSTR_LEN(var_name))) { continue; } - if (zend_string_equals_literal(var_name, "this")) { + if (zend_string_equals(var_name, ZSTR_KNOWN(ZEND_STR_THIS))) { continue; } orig_var = zend_hash_find_known_hash(symbol_table, var_name); @@ -2462,7 +2462,7 @@ static zend_long php_extract_skip(zend_array *arr, zend_array *symbol_table) /* if (!php_valid_var_name(ZSTR_VAL(var_name), ZSTR_LEN(var_name))) { continue; } - if (zend_string_equals_literal(var_name, "this")) { + if (zend_string_equals(var_name, ZSTR_KNOWN(ZEND_STR_THIS))) { continue; } orig_var = zend_hash_find_known_hash(symbol_table, var_name); @@ -2601,7 +2601,7 @@ static void php_compact_var(HashTable *eg_active_symbol_table, zval *return_valu ZVAL_DEREF(value_ptr); Z_TRY_ADDREF_P(value_ptr); zend_hash_update(Z_ARRVAL_P(return_value), Z_STR_P(entry), value_ptr); - } else if (zend_string_equals_literal(Z_STR_P(entry), "this")) { + } else if (zend_string_equals(Z_STR_P(entry), ZSTR_KNOWN(ZEND_STR_THIS))) { zend_object *object = zend_get_this_object(EG(current_execute_data)); if (object) { ZVAL_OBJ_COPY(&data, object); diff --git a/ext/standard/filestat.c b/ext/standard/filestat.c index a55f7b6fb8ce8..4fb712cabd985 100644 --- a/ext/standard/filestat.c +++ b/ext/standard/filestat.c @@ -889,7 +889,7 @@ PHPAPI void php_stat(zend_string *filename, int type, zval *return_value) case S_IFCHR: RETURN_STRING("char"); case S_IFDIR: RETURN_STRING("dir"); case S_IFBLK: RETURN_STRING("block"); - case S_IFREG: RETURN_STRING("file"); + case S_IFREG: RETURN_STR(ZSTR_KNOWN(ZEND_STR_FILE)); /* "file" */ #if defined(S_IFSOCK) && !defined(PHP_WIN32) case S_IFSOCK: RETURN_STRING("socket"); #endif diff --git a/ext/standard/proc_open.c b/ext/standard/proc_open.c index 03fd0716bacf3..182860720c608 100644 --- a/ext/standard/proc_open.c +++ b/ext/standard/proc_open.c @@ -917,7 +917,7 @@ static zend_result set_proc_descriptor_from_array(zval *descitem, descriptorspec } else if (zend_string_equals_literal(ztype, "socket")) { /* Set descriptor to socketpair */ retval = set_proc_descriptor_to_socket(&descriptors[ndesc]); - } else if (zend_string_equals_literal(ztype, "file")) { + } else if (zend_string_equals(ztype, ZSTR_KNOWN(ZEND_STR_FILE))) { /* Set descriptor to file */ if ((zfile = get_string_parameter(descitem, 1, "file name parameter for 'file'")) == NULL) { goto finish; @@ -940,7 +940,7 @@ static zend_result set_proc_descriptor_from_array(zval *descitem, descriptorspec retval = redirect_proc_descriptor( &descriptors[ndesc], (int)Z_LVAL_P(ztarget), descriptors, ndesc, nindex); - } else if (zend_string_equals_literal(ztype, "null")) { + } else if (zend_string_equals(ztype, ZSTR_KNOWN(ZEND_STR_NULL_LOWERCASE))) { /* Set descriptor to blackhole (discard all data written) */ retval = set_proc_descriptor_to_blackhole(&descriptors[ndesc]); } else if (zend_string_equals_literal(ztype, "pty")) { diff --git a/ext/standard/type.c b/ext/standard/type.c index a564446bd8ba6..6cd72fc744ca4 100644 --- a/ext/standard/type.c +++ b/ext/standard/type.c @@ -100,31 +100,31 @@ PHP_FUNCTION(settype) } else { ptr = Z_REFVAL_P(var); } - if (zend_string_equals_literal_ci(type, "integer")) { + if (zend_string_equals_ci(type, ZSTR_KNOWN(ZEND_STR_INTEGER))) { convert_to_long(ptr); - } else if (zend_string_equals_literal_ci(type, "int")) { + } else if (zend_string_equals_ci(type, ZSTR_KNOWN(ZEND_STR_INT))) { convert_to_long(ptr); - } else if (zend_string_equals_literal_ci(type, "float")) { + } else if (zend_string_equals_ci(type, ZSTR_KNOWN(ZEND_STR_FLOAT))) { convert_to_double(ptr); - } else if (zend_string_equals_literal_ci(type, "double")) { /* deprecated */ + } else if (zend_string_equals_ci(type, ZSTR_KNOWN(ZEND_STR_DOUBLE))) { /* deprecated */ convert_to_double(ptr); - } else if (zend_string_equals_literal_ci(type, "string")) { + } else if (zend_string_equals_ci(type, ZSTR_KNOWN(ZEND_STR_STRING))) { convert_to_string(ptr); - } else if (zend_string_equals_literal_ci(type, "array")) { + } else if (zend_string_equals_ci(type, ZSTR_KNOWN(ZEND_STR_ARRAY))) { convert_to_array(ptr); - } else if (zend_string_equals_literal_ci(type, "object")) { + } else if (zend_string_equals_ci(type, ZSTR_KNOWN(ZEND_STR_OBJECT))) { convert_to_object(ptr); - } else if (zend_string_equals_literal_ci(type, "bool")) { + } else if (zend_string_equals_ci(type, ZSTR_KNOWN(ZEND_STR_BOOL))) { convert_to_boolean(ptr); - } else if (zend_string_equals_literal_ci(type, "boolean")) { + } else if (zend_string_equals_ci(type, ZSTR_KNOWN(ZEND_STR_BOOLEAN))) { convert_to_boolean(ptr); - } else if (zend_string_equals_literal_ci(type, "null")) { + } else if (zend_string_equals_ci(type, ZSTR_KNOWN(ZEND_STR_NULL_LOWERCASE))) { convert_to_null(ptr); } else { if (ptr == &tmp) { zval_ptr_dtor(&tmp); } - if (zend_string_equals_literal_ci(type, "resource")) { + if (zend_string_equals_ci(type, ZSTR_KNOWN(ZEND_STR_RESOURCE))) { zend_value_error("Cannot convert to resource type"); } else { zend_argument_value_error(2, "must be a valid type"); diff --git a/ext/xml/xml.c b/ext/xml/xml.c index ef68c70af50f1..f49bfbb5766b5 100644 --- a/ext/xml/xml.c +++ b/ext/xml/xml.c @@ -762,7 +762,7 @@ void _xml_characterDataHandler(void *userData, const XML_Char *s, int len) if (parser->lastwasopen) { zval *myval; /* check if the current tag already has a value - if yes append to that! */ - if ((myval = zend_hash_str_find(Z_ARRVAL_P(parser->ctag), "value", sizeof("value") - 1))) { + if ((myval = zend_hash_find(Z_ARRVAL_P(parser->ctag), ZSTR_KNOWN(ZEND_STR_VALUE)))) { size_t newlen = Z_STRLEN_P(myval) + ZSTR_LEN(decoded_value); Z_STR_P(myval) = zend_string_extend(Z_STR_P(myval), newlen, 0); strncpy(Z_STRVAL_P(myval) + Z_STRLEN_P(myval) - ZSTR_LEN(decoded_value), @@ -781,7 +781,7 @@ void _xml_characterDataHandler(void *userData, const XML_Char *s, int len) ZEND_HASH_REVERSE_FOREACH_VAL(Z_ARRVAL(parser->data), curtag) { if ((mytype = zend_hash_str_find(Z_ARRVAL_P(curtag),"type", sizeof("type") - 1))) { if (zend_string_equals_literal(Z_STR_P(mytype), "cdata")) { - if ((myval = zend_hash_str_find(Z_ARRVAL_P(curtag), "value", sizeof("value") - 1))) { + if ((myval = zend_hash_find(Z_ARRVAL_P(curtag), ZSTR_KNOWN(ZEND_STR_VALUE)))) { size_t newlen = Z_STRLEN_P(myval) + ZSTR_LEN(decoded_value); Z_STR_P(myval) = zend_string_extend(Z_STR_P(myval), newlen, 0); strncpy(Z_STRVAL_P(myval) + Z_STRLEN_P(myval) - ZSTR_LEN(decoded_value), diff --git a/main/main.c b/main/main.c index ad5ef2902f4af..d918e0b73ae78 100644 --- a/main/main.c +++ b/main/main.c @@ -169,7 +169,7 @@ static PHP_INI_MH(OnSetFacility) } #endif #ifdef LOG_USER - if (zend_string_equals_literal(facility, "LOG_USER") || zend_string_equals_literal(facility, "user")) { + if (zend_string_equals(facility, ZSTR_KNOWN(ZEND_STR_USER)) || zend_string_equals_literal(facility, "LOG_USER")) { PG(syslog_facility) = LOG_USER; return SUCCESS; } diff --git a/sapi/cli/php_cli.c b/sapi/cli/php_cli.c index 653093cee9c07..a5e3755be6594 100644 --- a/sapi/cli/php_cli.c +++ b/sapi/cli/php_cli.c @@ -1076,7 +1076,7 @@ static int do_cli(int argc, char **argv) /* {{{ */ if (EG(exception)) { zval rv; - zval *msg = zend_read_property(zend_ce_exception, EG(exception), "message", sizeof("message")-1, 0, &rv); + zval *msg = zend_read_property_ex(zend_ce_exception, EG(exception), ZSTR_KNOWN(ZEND_STR_MESSAGE), /* silent */ false, &rv); zend_printf("Exception: %s\n", Z_STRVAL_P(msg)); zend_object_release(EG(exception)); EG(exception) = NULL; diff --git a/sapi/cli/php_cli_server.c b/sapi/cli/php_cli_server.c index 8ea04137d1229..9c71b835801eb 100644 --- a/sapi/cli/php_cli_server.c +++ b/sapi/cli/php_cli_server.c @@ -361,7 +361,7 @@ static void append_essential_headers(smart_str* buffer, php_cli_server_client *c zval *val; struct timeval tv = {0}; - if (NULL != (val = zend_hash_str_find(&client->request.headers, "host", sizeof("host")-1))) { + if (NULL != (val = zend_hash_find(&client->request.headers, ZSTR_KNOWN(ZEND_STR_HOST)))) { smart_str_appends_ex(buffer, "Host: ", persistent); smart_str_append_ex(buffer, Z_STR_P(val), persistent); smart_str_appends_ex(buffer, "\r\n", persistent); diff --git a/sapi/fpm/fpm/fpm_conf.c b/sapi/fpm/fpm/fpm_conf.c index 8f7548a402c23..4823c039b059b 100644 --- a/sapi/fpm/fpm/fpm_conf.c +++ b/sapi/fpm/fpm/fpm_conf.c @@ -534,7 +534,7 @@ static char *fpm_conf_set_pm(zval *value, void **config, intptr_t offset) /* {{{ { zend_string *val = Z_STR_P(value); struct fpm_worker_pool_config_s *c = *config; - if (zend_string_equals_literal_ci(val, "static")) { + if (zend_string_equals_ci(val, ZSTR_KNOWN(ZEND_STR_STATIC))) { c->pm = PM_STYLE_STATIC; } else if (zend_string_equals_literal_ci(val, "dynamic")) { c->pm = PM_STYLE_DYNAMIC; diff --git a/sapi/phpdbg/phpdbg_frame.c b/sapi/phpdbg/phpdbg_frame.c index 644668d8d14e5..ed0d9573436a5 100644 --- a/sapi/phpdbg/phpdbg_frame.c +++ b/sapi/phpdbg/phpdbg_frame.c @@ -171,24 +171,24 @@ static void phpdbg_dump_prototype(zval *tmp) /* {{{ */ { zval *funcname, *class, class_zv, *args, *argstmp; - funcname = zend_hash_str_find(Z_ARRVAL_P(tmp), ZEND_STRL("function")); + funcname = zend_hash_find(Z_ARRVAL_P(tmp), ZSTR_KNOWN(ZEND_STR_FUNCTION)); - if ((class = zend_hash_str_find(Z_ARRVAL_P(tmp), ZEND_STRL("object")))) { + if ((class = zend_hash_find(Z_ARRVAL_P(tmp), ZSTR_KNOWN(ZEND_STR_OBJECT)))) { ZVAL_NEW_STR(&class_zv, Z_OBJCE_P(class)->name); class = &class_zv; } else { - class = zend_hash_str_find(Z_ARRVAL_P(tmp), ZEND_STRL("class")); + class = zend_hash_find(Z_ARRVAL_P(tmp), ZSTR_KNOWN(ZEND_STR_CLASS)); } if (class) { - zval *type = zend_hash_str_find(Z_ARRVAL_P(tmp), ZEND_STRL("type")); + zval *type = zend_hash_find(Z_ARRVAL_P(tmp), ZSTR_KNOWN(ZEND_STR_TYPE)); phpdbg_out("%s%s%s(", Z_STRVAL_P(class), Z_STRVAL_P(type), Z_STRVAL_P(funcname)); } else { phpdbg_out("%s(", Z_STRVAL_P(funcname)); } - args = zend_hash_str_find(Z_ARRVAL_P(tmp), ZEND_STRL("args")); + args = zend_hash_find(Z_ARRVAL_P(tmp), ZSTR_KNOWN(ZEND_STR_ARGS)); if (args) { const zend_function *func = NULL; @@ -287,8 +287,8 @@ void phpdbg_dump_backtrace(size_t num) /* {{{ */ phpdbg_out(" (internal function)\n"); } - file = zend_hash_str_find(Z_ARRVAL_P(tmp), ZEND_STRL("file")); - line = zend_hash_str_find(Z_ARRVAL_P(tmp), ZEND_STRL("line")); + file = zend_hash_find(Z_ARRVAL_P(tmp), ZSTR_KNOWN(ZEND_STR_FILE)); + line = zend_hash_find(Z_ARRVAL_P(tmp), ZSTR_KNOWN(ZEND_STR_LINE)); zend_hash_move_forward_ex(Z_ARRVAL(zbacktrace), &position); } diff --git a/sapi/phpdbg/phpdbg_prompt.c b/sapi/phpdbg/phpdbg_prompt.c index f8041c660f266..39befbd64ec32 100644 --- a/sapi/phpdbg/phpdbg_prompt.c +++ b/sapi/phpdbg/phpdbg_prompt.c @@ -715,8 +715,8 @@ static inline void phpdbg_handle_exception(void) /* {{{ */ EG(exception) = NULL; zend_call_known_instance_method_with_0_params(ex->ce->__tostring, ex, &tmp); - file = zval_get_string(zend_read_property(zend_get_exception_base(ex), ex, ZEND_STRL("file"), 1, &rv)); - line = zval_get_long(zend_read_property(zend_get_exception_base(ex), ex, ZEND_STRL("line"), 1, &rv)); + file = zval_get_string(zend_read_property_ex(zend_get_exception_base(ex), ex, ZSTR_KNOWN(ZEND_STR_FILE), /* silent */ true, &rv)); + line = zval_get_long(zend_read_property_ex(zend_get_exception_base(ex), ex, ZSTR_KNOWN(ZEND_STR_LINE), /* silent */ true, &rv)); if (EG(exception)) { EG(exception) = NULL; @@ -724,7 +724,7 @@ static inline void phpdbg_handle_exception(void) /* {{{ */ } else { zend_update_property_string(zend_get_exception_base(ex), ex, ZEND_STRL("string"), Z_STRVAL(tmp)); zval_ptr_dtor(&tmp); - msg = zval_get_string(zend_read_property(zend_get_exception_base(ex), ex, ZEND_STRL("string"), 1, &rv)); + msg = zval_get_string(zend_read_property_ex(zend_get_exception_base(ex), ex, ZSTR_KNOWN(ZEND_STR_STRING), /* silent */ true, &rv)); } phpdbg_error("Uncaught %s in %s on line " ZEND_LONG_FMT, ZSTR_VAL(ex->ce->name), ZSTR_VAL(file), line); @@ -1695,9 +1695,9 @@ void phpdbg_execute_ex(zend_execute_data *execute_data) /* {{{ */ PHPDBG_G(handled_exception) = exception; zval rv; - zend_string *file = zval_get_string(zend_read_property(zend_get_exception_base(exception), exception, ZEND_STRL("file"), 1, &rv)); - zend_long line = zval_get_long(zend_read_property(zend_get_exception_base(exception), exception, ZEND_STRL("line"), 1, &rv)); - zend_string *msg = zval_get_string(zend_read_property(zend_get_exception_base(exception), exception, ZEND_STRL("message"), 1, &rv)); + zend_string *file = zval_get_string(zend_read_property_ex(zend_get_exception_base(exception), exception, ZSTR_KNOWN(ZEND_STR_FILE), /* silent */ true, &rv)); + zend_long line = zval_get_long(zend_read_property_ex(zend_get_exception_base(exception), exception, ZSTR_KNOWN(ZEND_STR_LINE), /* silent */ true, &rv)); + zend_string *msg = zval_get_string(zend_read_property_ex(zend_get_exception_base(exception), exception, ZSTR_KNOWN(ZEND_STR_MESSAGE), /* silent */ true, &rv)); phpdbg_error("Uncaught %s in %s on line " ZEND_LONG_FMT ": %.*s", ZSTR_VAL(exception->ce->name), ZSTR_VAL(file), line, diff --git a/sapi/phpdbg/phpdbg_watch.c b/sapi/phpdbg/phpdbg_watch.c index 0468d4614fd5d..d4af608ff762b 100644 --- a/sapi/phpdbg/phpdbg_watch.c +++ b/sapi/phpdbg/phpdbg_watch.c @@ -1365,7 +1365,7 @@ static int phpdbg_watchpoint_parse_symtables(char *input, size_t len, int (*call int ret; if (scope && len >= 5 && !memcmp("$this", input, 5)) { - zend_hash_str_add(EG(current_execute_data)->symbol_table, ZEND_STRL("this"), &EG(current_execute_data)->This); + zend_hash_add(EG(current_execute_data)->symbol_table, ZSTR_KNOWN(ZEND_STR_THIS), &EG(current_execute_data)->This); } if (callback == phpdbg_create_array_watchpoint) { From a6911cbd4d33d2301e9be4ff43a947fcb5aa111d Mon Sep 17 00:00:00 2001 From: Mikhail Galanin <195510+mikhainin@users.noreply.github.com> Date: Thu, 8 Jun 2023 15:16:47 +0100 Subject: [PATCH 150/168] Suppress warning when the test run under non-root (#11400) When we run the test under non-root user, the test gets BORKed with: Warning: pcntl_unshare(): Error 1: No privilege to use these flags in ext/pcntl/tests/pcntl_unshare_03.skip.php on line 8 skip Insufficient privileges for CLONE_NEWUSER It looks like for the root-user there is the similar warning which is already suppressed (see the following "skip"). Let us skip the test properly if we aren't able to execute it --- ext/pcntl/tests/pcntl_unshare_03.phpt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ext/pcntl/tests/pcntl_unshare_03.phpt b/ext/pcntl/tests/pcntl_unshare_03.phpt index 56433c0ee9307..567ff6b77b21c 100644 --- a/ext/pcntl/tests/pcntl_unshare_03.phpt +++ b/ext/pcntl/tests/pcntl_unshare_03.phpt @@ -9,7 +9,7 @@ if (!function_exists("pcntl_unshare")) die("skip pcntl_unshare is not available" if (!defined("CLONE_NEWNET")) die("skip flag unavailable"); if (posix_getuid() !== 0 && (!defined("CLONE_NEWUSER") || - (pcntl_unshare(CLONE_NEWUSER) == false && pcntl_get_last_error() == PCNTL_EPERM))) { + (@pcntl_unshare(CLONE_NEWUSER) == false && pcntl_get_last_error() == PCNTL_EPERM))) { die("skip Insufficient privileges for CLONE_NEWUSER"); } if (@pcntl_unshare(CLONE_NEWNET) == false && pcntl_get_last_error() == PCNTL_EPERM) { From 50b4df18e0b0ce079d05954cad007990bc5ac2da Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Thu, 8 Jun 2023 17:44:55 +0200 Subject: [PATCH 151/168] Get rid of return value for php_libxml_unregister_node() (#11398) --- ext/libxml/libxml.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/ext/libxml/libxml.c b/ext/libxml/libxml.c index 5af3443069aba..d12c215367c3c 100644 --- a/ext/libxml/libxml.c +++ b/ext/libxml/libxml.c @@ -113,7 +113,7 @@ static int php_libxml_clear_object(php_libxml_node_object *object) return php_libxml_decrement_doc_ref(object); } -static int php_libxml_unregister_node(xmlNodePtr nodep) +static void php_libxml_unregister_node(xmlNodePtr nodep) { php_libxml_node_object *wrapper; @@ -130,8 +130,6 @@ static int php_libxml_unregister_node(xmlNodePtr nodep) nodeptr->node = NULL; } } - - return -1; } static void php_libxml_node_free(xmlNodePtr node) @@ -209,9 +207,7 @@ PHP_LIBXML_API void php_libxml_node_free_list(xmlNodePtr node) curnode = node->next; xmlUnlinkNode(node); - if (php_libxml_unregister_node(node) == 0) { - node->doc = NULL; - } + php_libxml_unregister_node(node); php_libxml_node_free(node); } } @@ -1270,9 +1266,7 @@ PHP_LIBXML_API void php_libxml_node_free_resource(xmlNodePtr node) default: php_libxml_node_free_list((xmlNodePtr) node->properties); } - if (php_libxml_unregister_node(node) == 0) { - node->doc = NULL; - } + php_libxml_unregister_node(node); php_libxml_node_free(node); } else { php_libxml_unregister_node(node); From a38e3c999ffd1d965692898901a449d511c56381 Mon Sep 17 00:00:00 2001 From: nielsdos <7771979+nielsdos@users.noreply.github.com> Date: Mon, 5 Jun 2023 21:56:51 +0200 Subject: [PATCH 152/168] Fix #79700: Bad performance with namespaced nodes due to wrong libxml assumption * Use a prepending strategy instead of appending in dom_set_old_ns() Looping to the end of the list is wasteful. We can just put the new nodes at the front of the list. I don't believe we can fully prepend, because libxml2 may assume that the xml namespace is the first one, so we'll put the new ones as the second one. * Reuse namespaces from doc->oldNs if possible in dom_get_ns() * Add a test for reconciling a reused namespace * Explain why there can't be a cycle between oldNs and nsDef Closes GH-11376. Also fixes #77894. --- NEWS | 6 +++ ext/dom/php_dom.c | 53 +++++++++++++------ ext/dom/tests/reconcile_reused_namespace.phpt | 42 +++++++++++++++ 3 files changed, 85 insertions(+), 16 deletions(-) create mode 100644 ext/dom/tests/reconcile_reused_namespace.phpt diff --git a/NEWS b/NEWS index b86e31ec70fa1..13c6669e88886 100644 --- a/NEWS +++ b/NEWS @@ -6,6 +6,12 @@ PHP NEWS . Fix GH-11388 (Allow "final" modifier when importing a method from a trait). (nielsdos) +- DOM: + . Fix #79700 (wrong use of libxml oldNs leads to performance problem). + (nielsdos) + . Fix #77894 (DOMNode::C14N() very slow on generated DOMDocuments even after + normalisation). (nielsdos) + 08 Jun 2023, PHP 8.3.0alpha1 - CLI: diff --git a/ext/dom/php_dom.c b/ext/dom/php_dom.c index 6867e5acf128e..d3401f007dc49 100644 --- a/ext/dom/php_dom.c +++ b/ext/dom/php_dom.c @@ -1369,11 +1369,16 @@ void dom_normalize (xmlNodePtr nodep) /* {{{ void dom_set_old_ns(xmlDoc *doc, xmlNs *ns) */ void dom_set_old_ns(xmlDoc *doc, xmlNs *ns) { - xmlNs *cur; - if (doc == NULL) return; + ZEND_ASSERT(ns->next == NULL); + + /* Note: we'll use a prepend strategy instead of append to + * make sure we don't lose performance when the list is long. + * As libxml2 could assume the xml node is the first one, we'll place our + * new entries after the first one. */ + if (doc->oldNs == NULL) { doc->oldNs = (xmlNsPtr) xmlMalloc(sizeof(xmlNs)); if (doc->oldNs == NULL) { @@ -1383,13 +1388,10 @@ void dom_set_old_ns(xmlDoc *doc, xmlNs *ns) { doc->oldNs->type = XML_LOCAL_NAMESPACE; doc->oldNs->href = xmlStrdup(XML_XML_NAMESPACE); doc->oldNs->prefix = xmlStrdup((const xmlChar *)"xml"); + } else { + ns->next = doc->oldNs->next; } - - cur = doc->oldNs; - while (cur->next != NULL) { - cur = cur->next; - } - cur->next = ns; + doc->oldNs->next = ns; } /* }}} end dom_set_old_ns */ @@ -1411,6 +1413,9 @@ static void dom_reconcile_ns_internal(xmlDocPtr doc, xmlNodePtr nodep) } else { prevns->next = nsdftptr; } + /* Note: we can't get here if the ns is already on the oldNs list. + * This is because in that case the definition won't be on the node, and + * therefore won't be in the nodep->nsDef list. */ dom_set_old_ns(doc, curns); curns = prevns; } @@ -1509,22 +1514,38 @@ NAMESPACE_ERR: Raised if /* {{{ xmlNsPtr dom_get_ns(xmlNodePtr nodep, char *uri, int *errorcode, char *prefix) */ xmlNsPtr dom_get_ns(xmlNodePtr nodep, char *uri, int *errorcode, char *prefix) { - xmlNsPtr nsptr = NULL; - - *errorcode = 0; + xmlNsPtr nsptr; if (! ((prefix && !strcmp (prefix, "xml") && strcmp(uri, (char *)XML_XML_NAMESPACE)) || (prefix && !strcmp (prefix, "xmlns") && strcmp(uri, (char *)DOM_XMLNS_NAMESPACE)) || (prefix && !strcmp(uri, (char *)DOM_XMLNS_NAMESPACE) && strcmp (prefix, "xmlns")))) { + /* Reuse the old namespaces from doc->oldNs if possible, before creating a new one. + * This will prevent the oldNs list from growing with duplicates. */ + xmlDocPtr doc = nodep->doc; + if (doc && doc->oldNs != NULL) { + nsptr = doc->oldNs; + do { + if (xmlStrEqual(nsptr->prefix, (xmlChar *)prefix) && xmlStrEqual(nsptr->href, (xmlChar *)uri)) { + goto out; + } + nsptr = nsptr->next; + } while (nsptr); + } + /* Couldn't reuse one, create a new one. */ nsptr = xmlNewNs(nodep, (xmlChar *)uri, (xmlChar *)prefix); + if (UNEXPECTED(nsptr == NULL)) { + goto err; + } + } else { + goto err; } - if (nsptr == NULL) { - *errorcode = NAMESPACE_ERR; - } - +out: + *errorcode = 0; return nsptr; - +err: + *errorcode = NAMESPACE_ERR; + return NULL; } /* }}} end dom_get_ns */ diff --git a/ext/dom/tests/reconcile_reused_namespace.phpt b/ext/dom/tests/reconcile_reused_namespace.phpt new file mode 100644 index 0000000000000..5f9ab6c0d80fa --- /dev/null +++ b/ext/dom/tests/reconcile_reused_namespace.phpt @@ -0,0 +1,42 @@ +--TEST-- +Reconcile a reused namespace from doc->oldNs +--EXTENSIONS-- +dom +--FILE-- +createElementNS('http://www.w3.org/2000/xhtml', 'html'); + +$dom->loadXML(<< + +XML); +$root = $dom->firstElementChild; + +echo "Add first\n"; +$element = $dom->createElementNS('http://example.com/B', 'p', 'Hello World'); +$root->appendChild($element); + +echo "Add second\n"; +$element = $dom->createElementNS('http://example.com/A', 'p', 'Hello World'); +$root->appendChild($element); + +echo "Add third\n"; +$element = $dom->createElementNS('http://example.com/A', 'p', 'Hello World'); +$root->appendChild($element); + +var_dump($dom->saveXML()); + +?> +--EXPECT-- +Add first +Add second +Add third +string(201) " +Hello WorldHello WorldHello World +" From 709540ccdc5a3fc25fcdfceba322e6ad3aa3ce6f Mon Sep 17 00:00:00 2001 From: Bob Weinand Date: Fri, 9 Jun 2023 14:00:53 +0200 Subject: [PATCH 153/168] Fix add/remove observer API with multiple observers installed Depending on the order in which observers were installed, some observers might have been executed twice after removal of another observer. Also, adding an observer could produce a bogus pointer. --- Zend/zend_observer.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/Zend/zend_observer.c b/Zend/zend_observer.c index 79929bfdd80e5..2cb4db914758a 100644 --- a/Zend/zend_observer.c +++ b/Zend/zend_observer.c @@ -158,9 +158,8 @@ static bool zend_observer_remove_handler(void **first_handler, void *old_handler } else { if (cur_handler != last_handler) { memmove(cur_handler, cur_handler + 1, sizeof(cur_handler) * (last_handler - cur_handler)); - } else { - *last_handler = NULL; } + *last_handler = NULL; } return true; } @@ -196,7 +195,7 @@ ZEND_API void zend_observer_add_end_handler(zend_function *function, zend_observ if (*end_handler != ZEND_OBSERVER_NOT_OBSERVED) { // there's no space for new handlers, then it's forbidden to call this function ZEND_ASSERT(end_handler[registered_observers - 1] == NULL); - memmove(end_handler + 1, end_handler, registered_observers - 1); + memmove(end_handler + 1, end_handler, sizeof(end_handler) * (registered_observers - 1)); } *end_handler = end; } From fd09728bb6a5c8f7c7320ae1e60a2db48c765ce6 Mon Sep 17 00:00:00 2001 From: James Lucas Date: Fri, 28 Apr 2023 11:02:49 +1000 Subject: [PATCH 154/168] Fix bug GH-9356: Incomplete SAN validation of IPv6 address IPv6 addresses are valid entries in subjectAltNames. Certificate Authorities may issue certificates including IPv6 addresses except if they fall within addresses in the RFC 4193 range. Google and CloudFlare provide IPv6 addresses in their DNS over HTTPS services. Internal CAs do not have those restrictions and can issue Unique local addresses in certificates. Closes GH-11145 --- NEWS | 4 ++ ext/openssl/tests/san_ipv6_peer_matching.phpt | 69 +++++++++++++++++++ ext/openssl/xp_ssl.c | 47 +++++++++++-- 3 files changed, 116 insertions(+), 4 deletions(-) create mode 100644 ext/openssl/tests/san_ipv6_peer_matching.phpt diff --git a/NEWS b/NEWS index 33d789b6b1714..8eae4ccaa5e7c 100644 --- a/NEWS +++ b/NEWS @@ -34,6 +34,10 @@ PHP NEWS . Fixed bug GH-11336 (php still tries to unlock the shared memory ZendSem with opcache.file_cache_only=1 but it was never locked). (nielsdos) +- OpenSSL: + . Fixed bug GH-9356 Incomplete validation of IPv6 Address fields in + subjectAltNames (James Lucas, Jakub Zelenka). + - SPL: . Fixed bug GH-11338 (SplFileInfo empty getBasename with more than one slash). (nielsdos) diff --git a/ext/openssl/tests/san_ipv6_peer_matching.phpt b/ext/openssl/tests/san_ipv6_peer_matching.phpt new file mode 100644 index 0000000000000..81966025d3969 --- /dev/null +++ b/ext/openssl/tests/san_ipv6_peer_matching.phpt @@ -0,0 +1,69 @@ +--TEST-- +IPv6 Peer verification matches SAN names +--EXTENSIONS-- +openssl +--SKIPIF-- + +--FILE-- + [ + 'local_cert' => '%s', + ]]); + + $server = stream_socket_server($serverUri, $errno, $errstr, $serverFlags, $serverCtx); + phpt_notify(); + + @stream_socket_accept($server, 1); + @stream_socket_accept($server, 1); +CODE; +$serverCode = sprintf($serverCode, $certFile); + +$clientCode = <<<'CODE' + $serverUri = "ssl://[::1]:64324"; + $clientFlags = STREAM_CLIENT_CONNECT; + $clientCtx = stream_context_create(['ssl' => [ + 'verify_peer' => false, + ]]); + + phpt_wait(); + + stream_context_set_option($clientCtx, 'ssl', 'peer_name', '2001:db8:85a3:8d3:1319:8a2e:370:7348'); + var_dump(stream_socket_client($serverUri, $errno, $errstr, 1, $clientFlags, $clientCtx)); + + stream_context_set_option($clientCtx, 'ssl', 'peer_name', '2001:db8:85a3:8d3:1319:8a2e:370:7349'); + var_dump(stream_socket_client($serverUri, $errno, $errstr, 1, $clientFlags, $clientCtx)); +CODE; + +include 'CertificateGenerator.inc'; +$certificateGenerator = new CertificateGenerator(); +$certificateGenerator->saveNewCertAsFileWithKey(null, $certFile, null, $san); + +include 'ServerClientTestCase.inc'; +ServerClientTestCase::getInstance()->run($clientCode, $serverCode); +?> +--CLEAN-- + +--EXPECTF-- +resource(%d) of type (stream) + +Warning: stream_socket_client(): Unable to locate peer certificate CN in %s on line %d + +Warning: stream_socket_client(): Failed to enable crypto in %s on line %d + +Warning: stream_socket_client(): Unable to connect to ssl://[::1]:64324 (Unknown error) in %s on line %d +bool(false) diff --git a/ext/openssl/xp_ssl.c b/ext/openssl/xp_ssl.c index 9aac4a0b70a28..5b3ad2c1f8863 100644 --- a/ext/openssl/xp_ssl.c +++ b/ext/openssl/xp_ssl.c @@ -39,6 +39,7 @@ #ifdef PHP_WIN32 #include "win32/winutil.h" #include "win32/time.h" +#include #include /* These are from Wincrypt.h, they conflict with OpenSSL */ #undef X509_NAME @@ -46,6 +47,10 @@ #undef X509_EXTENSIONS #endif +#ifdef HAVE_ARPA_INET_H +#include +#endif + /* Flags for determining allowed stream crypto methods */ #define STREAM_CRYPTO_IS_CLIENT (1<<0) #define STREAM_CRYPTO_METHOD_SSLv2 (1<<1) @@ -110,6 +115,21 @@ #define PHP_X509_NAME_ENTRY_TO_UTF8(ne, i, out) \ ASN1_STRING_to_UTF8(&out, X509_NAME_ENTRY_get_data(X509_NAME_get_entry(ne, i))) +/* Used for IPv6 Address peer verification */ +#define EXPAND_IPV6_ADDRESS(_str, _bytes) \ + do { \ + snprintf(_str, 40, "%X:%X:%X:%X:%X:%X:%X:%X", \ + _bytes[0] << 8 | _bytes[1], \ + _bytes[2] << 8 | _bytes[3], \ + _bytes[4] << 8 | _bytes[5], \ + _bytes[6] << 8 | _bytes[7], \ + _bytes[8] << 8 | _bytes[9], \ + _bytes[10] << 8 | _bytes[11], \ + _bytes[12] << 8 | _bytes[13], \ + _bytes[14] << 8 | _bytes[15] \ + ); \ + } while(0) + #if PHP_OPENSSL_API_VERSION < 0x10100 static RSA *php_openssl_tmp_rsa_cb(SSL *s, int is_export, int keylength); #endif @@ -421,6 +441,18 @@ static bool php_openssl_matches_san_list(X509 *peer, const char *subject_name) / GENERAL_NAMES *alt_names = X509_get_ext_d2i(peer, NID_subject_alt_name, 0, 0); int alt_name_count = sk_GENERAL_NAME_num(alt_names); +#if defined(HAVE_IPV6) && defined(HAVE_INET_PTON) + /* detect if subject name is an IPv6 address and expand once if required */ + char subject_name_ipv6_expanded[40]; + unsigned char ipv6[16]; + bool subject_name_is_ipv6 = false; + subject_name_ipv6_expanded[0] = 0; + if (inet_pton(AF_INET6, subject_name, &ipv6)) { + EXPAND_IPV6_ADDRESS(subject_name_ipv6_expanded, ipv6); + subject_name_is_ipv6 = true; + } +#endif + for (i = 0; i < alt_name_count; i++) { GENERAL_NAME *san = sk_GENERAL_NAME_value(alt_names, i); @@ -459,10 +491,17 @@ static bool php_openssl_matches_san_list(X509 *peer, const char *subject_name) / return 1; } } - /* No, we aren't bothering to check IPv6 addresses. Why? - * Because IP SAN names are officially deprecated and are - * not allowed by CAs starting in 2015. Deal with it. - */ +#if defined(HAVE_IPV6) && defined(HAVE_INET_PTON) + else if (san->d.ip->length == 16 && subject_name_is_ipv6) { + ipbuffer[0] = 0; + EXPAND_IPV6_ADDRESS(ipbuffer, san->d.iPAddress->data); + if (strcasecmp((const char*)subject_name_ipv6_expanded, (const char*)ipbuffer) == 0) { + sk_GENERAL_NAME_pop_free(alt_names, GENERAL_NAME_free); + + return 1; + } + } +#endif } } From 3fc013b2e24bcb38805975be4ae913a925075d41 Mon Sep 17 00:00:00 2001 From: Jakub Zelenka Date: Fri, 9 Jun 2023 16:48:00 +0100 Subject: [PATCH 155/168] Fix CS and checking for IPv6 SAN verify --- ext/openssl/xp_ssl.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/ext/openssl/xp_ssl.c b/ext/openssl/xp_ssl.c index 5b3ad2c1f8863..6890810125cef 100644 --- a/ext/openssl/xp_ssl.c +++ b/ext/openssl/xp_ssl.c @@ -115,6 +115,7 @@ #define PHP_X509_NAME_ENTRY_TO_UTF8(ne, i, out) \ ASN1_STRING_to_UTF8(&out, X509_NAME_ENTRY_get_data(X509_NAME_get_entry(ne, i))) +#if defined(HAVE_IPV6) && defined(HAVE_INET_PTON) /* Used for IPv6 Address peer verification */ #define EXPAND_IPV6_ADDRESS(_str, _bytes) \ do { \ @@ -129,6 +130,8 @@ _bytes[14] << 8 | _bytes[15] \ ); \ } while(0) +#define HAVE_IPV6_SAN 1 +#endif #if PHP_OPENSSL_API_VERSION < 0x10100 static RSA *php_openssl_tmp_rsa_cb(SSL *s, int is_export, int keylength); @@ -441,16 +444,17 @@ static bool php_openssl_matches_san_list(X509 *peer, const char *subject_name) / GENERAL_NAMES *alt_names = X509_get_ext_d2i(peer, NID_subject_alt_name, 0, 0); int alt_name_count = sk_GENERAL_NAME_num(alt_names); -#if defined(HAVE_IPV6) && defined(HAVE_INET_PTON) +#ifdef HAVE_IPV6_SAN /* detect if subject name is an IPv6 address and expand once if required */ - char subject_name_ipv6_expanded[40]; - unsigned char ipv6[16]; - bool subject_name_is_ipv6 = false; - subject_name_ipv6_expanded[0] = 0; + char subject_name_ipv6_expanded[40]; + unsigned char ipv6[16]; + bool subject_name_is_ipv6 = false; + subject_name_ipv6_expanded[0] = 0; + if (inet_pton(AF_INET6, subject_name, &ipv6)) { - EXPAND_IPV6_ADDRESS(subject_name_ipv6_expanded, ipv6); - subject_name_is_ipv6 = true; - } + EXPAND_IPV6_ADDRESS(subject_name_ipv6_expanded, ipv6); + subject_name_is_ipv6 = true; + } #endif for (i = 0; i < alt_name_count; i++) { @@ -491,7 +495,7 @@ static bool php_openssl_matches_san_list(X509 *peer, const char *subject_name) / return 1; } } -#if defined(HAVE_IPV6) && defined(HAVE_INET_PTON) +#ifdef HAVE_IPV6_SAN else if (san->d.ip->length == 16 && subject_name_is_ipv6) { ipbuffer[0] = 0; EXPAND_IPV6_ADDRESS(ipbuffer, san->d.iPAddress->data); From f2d673fb18cc6a6c88bf588f39fd1aa9dcfec964 Mon Sep 17 00:00:00 2001 From: nielsdos <7771979+nielsdos@users.noreply.github.com> Date: Thu, 8 Jun 2023 21:22:20 +0200 Subject: [PATCH 156/168] Fix #70359 and #78577: segfaults with DOMNameSpaceNode * Fix type confusion and parent reference * Manually manage the lifetime of the parent * Add regression tests * Break out to a helper, and apply the use-after-free fix to xpath Closes GH-11402. --- NEWS | 3 + ext/dom/element.c | 25 ++---- ext/dom/php_dom.c | 61 ++++++++++++-- ext/dom/php_dom.h | 13 +++ ext/dom/tests/bug70359.phpt | 83 +++++++++++++++++++ ext/dom/tests/bug78577.phpt | 33 ++++++++ ext/dom/tests/xpath_domnamespacenode.phpt | 2 +- .../xpath_domnamespacenode_advanced.phpt | 75 +++++++++++++++++ ext/dom/xpath.c | 54 +++++------- 9 files changed, 292 insertions(+), 57 deletions(-) create mode 100644 ext/dom/tests/bug70359.phpt create mode 100644 ext/dom/tests/bug78577.phpt create mode 100644 ext/dom/tests/xpath_domnamespacenode_advanced.phpt diff --git a/NEWS b/NEWS index 8eae4ccaa5e7c..139c696374456 100644 --- a/NEWS +++ b/NEWS @@ -27,6 +27,9 @@ PHP NEWS with itself). (nielsdos) . Fixed bug #77686 (Removed elements are still returned by getElementById). (nielsdos) + . Fixed bug #70359 (print_r() on DOMAttr causes Segfault in + php_libxml_node_free_list()). (nielsdos) + . Fixed bug #78577 (Crash in DOMNameSpace debug info handlers). (nielsdos) - Opcache: . Fix allocation loop in zend_shared_alloc_startup(). (nielsdos) diff --git a/ext/dom/element.c b/ext/dom/element.c index 78113d72776bd..f84caa629cc66 100644 --- a/ext/dom/element.c +++ b/ext/dom/element.c @@ -150,6 +150,7 @@ int dom_element_schema_type_info_read(dom_object *obj, zval *retval) /* }}} */ +/* Note: the object returned is not necessarily a node, but can be an attribute or a namespace declaration. */ static xmlNodePtr dom_get_dom1_attribute(xmlNodePtr elem, xmlChar *name) /* {{{ */ { int len; @@ -376,25 +377,13 @@ PHP_METHOD(DOMElement, getAttributeNode) } if (attrp->type == XML_NAMESPACE_DECL) { - xmlNsPtr curns; - xmlNodePtr nsparent; - - nsparent = attrp->_private; - curns = xmlNewNs(NULL, attrp->name, NULL); - if (attrp->children) { - curns->prefix = xmlStrdup((xmlChar *) attrp->children); - } - if (attrp->children) { - attrp = xmlNewDocNode(nodep->doc, NULL, (xmlChar *) attrp->children, attrp->name); - } else { - attrp = xmlNewDocNode(nodep->doc, NULL, (xmlChar *)"xmlns", attrp->name); - } - attrp->type = XML_NAMESPACE_DECL; - attrp->parent = nsparent; - attrp->ns = curns; + xmlNsPtr original = (xmlNsPtr) attrp; + /* Keep parent alive, because we're a fake child. */ + GC_ADDREF(&intern->std); + (void) php_dom_create_fake_namespace_decl(nodep, original, return_value, intern); + } else { + DOM_RET_OBJ((xmlNodePtr) attrp, &ret, intern); } - - DOM_RET_OBJ((xmlNodePtr) attrp, &ret, intern); } /* }}} end dom_element_get_attribute_node */ diff --git a/ext/dom/php_dom.c b/ext/dom/php_dom.c index df20093221f16..9e0bb1f3d1d02 100644 --- a/ext/dom/php_dom.c +++ b/ext/dom/php_dom.c @@ -61,6 +61,7 @@ PHP_DOM_EXPORT zend_class_entry *dom_namespace_node_class_entry; zend_object_handlers dom_object_handlers; zend_object_handlers dom_nnodemap_object_handlers; +zend_object_handlers dom_object_namespace_node_handlers; #ifdef LIBXML_XPATH_ENABLED zend_object_handlers dom_xpath_object_handlers; #endif @@ -86,6 +87,9 @@ static HashTable dom_xpath_prop_handlers; #endif /* }}} */ +static zend_object *dom_objects_namespace_node_new(zend_class_entry *class_type); +static void dom_object_namespace_node_free_storage(zend_object *object); + typedef int (*dom_read_t)(dom_object *obj, zval *retval); typedef int (*dom_write_t)(dom_object *obj, zval *newval); @@ -570,6 +574,10 @@ PHP_MINIT_FUNCTION(dom) dom_nnodemap_object_handlers.read_dimension = dom_nodelist_read_dimension; dom_nnodemap_object_handlers.has_dimension = dom_nodelist_has_dimension; + memcpy(&dom_object_namespace_node_handlers, &dom_object_handlers, sizeof(zend_object_handlers)); + dom_object_namespace_node_handlers.offset = XtOffsetOf(dom_object_namespace_node, dom.std); + dom_object_namespace_node_handlers.free_obj = dom_object_namespace_node_free_storage; + zend_hash_init(&classes, 0, NULL, NULL, 1); dom_domexception_class_entry = register_class_DOMException(zend_ce_exception); @@ -604,7 +612,7 @@ PHP_MINIT_FUNCTION(dom) zend_hash_add_ptr(&classes, dom_node_class_entry->name, &dom_node_prop_handlers); dom_namespace_node_class_entry = register_class_DOMNameSpaceNode(); - dom_namespace_node_class_entry->create_object = dom_objects_new; + dom_namespace_node_class_entry->create_object = dom_objects_namespace_node_new; zend_hash_init(&dom_namespace_node_prop_handlers, 0, NULL, dom_dtor_prop_handler, 1); dom_register_prop_handler(&dom_namespace_node_prop_handlers, "nodeName", sizeof("nodeName")-1, dom_node_node_name_read, NULL); @@ -1001,10 +1009,8 @@ void dom_namednode_iter(dom_object *basenode, int ntype, dom_object *intern, xml } /* }}} */ -static dom_object* dom_objects_set_class(zend_class_entry *class_type) /* {{{ */ +static void dom_objects_set_class_ex(zend_class_entry *class_type, dom_object *intern) { - dom_object *intern = zend_object_alloc(sizeof(dom_object), class_type); - zend_class_entry *base_class = class_type; while ((base_class->type != ZEND_INTERNAL_CLASS || base_class->info.internal.module->module_number != dom_module_entry.module_number) && base_class->parent != NULL) { base_class = base_class->parent; @@ -1014,10 +1020,14 @@ static dom_object* dom_objects_set_class(zend_class_entry *class_type) /* {{{ */ zend_object_std_init(&intern->std, class_type); object_properties_init(&intern->std, class_type); +} +static dom_object* dom_objects_set_class(zend_class_entry *class_type) +{ + dom_object *intern = zend_object_alloc(sizeof(dom_object), class_type); + dom_objects_set_class_ex(class_type, intern); return intern; } -/* }}} */ /* {{{ dom_objects_new */ zend_object *dom_objects_new(zend_class_entry *class_type) @@ -1028,6 +1038,25 @@ zend_object *dom_objects_new(zend_class_entry *class_type) } /* }}} */ +static zend_object *dom_objects_namespace_node_new(zend_class_entry *class_type) +{ + dom_object_namespace_node *intern = zend_object_alloc(sizeof(dom_object_namespace_node), class_type); + dom_objects_set_class_ex(class_type, &intern->dom); + intern->dom.std.handlers = &dom_object_namespace_node_handlers; + return &intern->dom.std; +} + +static void dom_object_namespace_node_free_storage(zend_object *object) +{ + dom_object_namespace_node *intern = php_dom_namespace_node_obj_from_obj(object); + if (intern->parent_intern != NULL) { + zval tmp; + ZVAL_OBJ(&tmp, &intern->parent_intern->std); + zval_ptr_dtor(&tmp); + } + dom_objects_free_storage(object); +} + #ifdef LIBXML_XPATH_ENABLED /* {{{ zend_object dom_xpath_objects_new(zend_class_entry *class_type) */ zend_object *dom_xpath_objects_new(zend_class_entry *class_type) @@ -1550,6 +1579,28 @@ xmlNsPtr dom_get_nsdecl(xmlNode *node, xmlChar *localName) { } /* }}} end dom_get_nsdecl */ +/* Note: Assumes the additional lifetime was already added in the caller. */ +xmlNodePtr php_dom_create_fake_namespace_decl(xmlNodePtr nodep, xmlNsPtr original, zval *return_value, dom_object *parent_intern) +{ + xmlNodePtr attrp; + xmlNsPtr curns = xmlNewNs(NULL, original->href, NULL); + if (original->prefix) { + curns->prefix = xmlStrdup(original->prefix); + attrp = xmlNewDocNode(nodep->doc, NULL, (xmlChar *) original->prefix, original->href); + } else { + attrp = xmlNewDocNode(nodep->doc, NULL, (xmlChar *)"xmlns", original->href); + } + attrp->type = XML_NAMESPACE_DECL; + attrp->parent = nodep; + attrp->ns = curns; + + php_dom_create_object(attrp, return_value, parent_intern); + /* This object must exist, because we just created an object for it via php_dom_create_object(). */ + dom_object *obj = ((php_libxml_node_ptr *)attrp->_private)->_private; + php_dom_namespace_node_obj_from_obj(&obj->std)->parent_intern = parent_intern; + return attrp; +} + static zval *dom_nodelist_read_dimension(zend_object *object, zval *offset, int type, zval *rv) /* {{{ */ { zval offset_copy; diff --git a/ext/dom/php_dom.h b/ext/dom/php_dom.h index ac23d1fc25bb5..6ed382b6f84af 100644 --- a/ext/dom/php_dom.h +++ b/ext/dom/php_dom.h @@ -93,6 +93,18 @@ typedef struct { HashPosition pos; } php_dom_iterator; +typedef struct { + /* This may be a fake object that isn't actually in the children list of the parent. + * This is because some namespace declaration nodes aren't stored on the parent in libxml2, so we have to fake it. + * We could use a zval for this, but since this is always going to be an object let's save space... */ + dom_object *parent_intern; + dom_object dom; +} dom_object_namespace_node; + +static inline dom_object_namespace_node *php_dom_namespace_node_obj_from_obj(zend_object *obj) { + return (dom_object_namespace_node*)((char*)(obj) - XtOffsetOf(dom_object_namespace_node, dom.std)); +} + #include "domexception.h" dom_object *dom_object_get_data(xmlNodePtr obj); @@ -126,6 +138,7 @@ xmlNode *php_dom_libxml_hash_iter(xmlHashTable *ht, int index); xmlNode *php_dom_libxml_notation_iter(xmlHashTable *ht, int index); zend_object_iterator *php_dom_get_iterator(zend_class_entry *ce, zval *object, int by_ref); void dom_set_doc_classmap(php_libxml_ref_obj *document, zend_class_entry *basece, zend_class_entry *ce); +xmlNodePtr php_dom_create_fake_namespace_decl(xmlNodePtr nodep, xmlNsPtr original, zval *return_value, dom_object *parent_intern); void dom_parent_node_prepend(dom_object *context, zval *nodes, int nodesc); void dom_parent_node_append(dom_object *context, zval *nodes, int nodesc); diff --git a/ext/dom/tests/bug70359.phpt b/ext/dom/tests/bug70359.phpt new file mode 100644 index 0000000000000..b0a5ae57a3232 --- /dev/null +++ b/ext/dom/tests/bug70359.phpt @@ -0,0 +1,83 @@ +--TEST-- +Bug #70359 (print_r() on DOMAttr causes Segfault in php_libxml_node_free_list()) +--EXTENSIONS-- +dom +--FILE-- +loadXML(<< + +XML); +$spaceNode = $dom->documentElement->getAttributeNode('xmlns'); +print_r($spaceNode); + +echo "-- Test with parent and non-ns attribute --\n"; + +$dom = new DOMDocument(); +$dom->loadXML(<< + + + +XML); +$spaceNode = $dom->documentElement->firstElementChild->getAttributeNode('myattrib'); +var_dump($spaceNode->nodeType); +var_dump($spaceNode->nodeValue); + +$dom->documentElement->firstElementChild->remove(); +try { + print_r($spaceNode->parentNode); +} catch (\Error $e) { + echo $e->getMessage(), "\n"; +} + +echo "-- Test with parent and ns attribute --\n"; + +$dom = new DOMDocument(); +$dom->loadXML(<< + + + +XML); +$spaceNode = $dom->documentElement->firstElementChild->getAttributeNode('xmlns:xsi'); +print_r($spaceNode); + +$dom->documentElement->firstElementChild->remove(); +var_dump($spaceNode->parentNode->nodeName); // Shouldn't crash + +?> +--EXPECT-- +-- Test without parent -- +DOMNameSpaceNode Object +( + [nodeName] => xmlns + [nodeValue] => http://www.sitemaps.org/schemas/sitemap/0.9 + [nodeType] => 18 + [prefix] => + [localName] => xmlns + [namespaceURI] => http://www.sitemaps.org/schemas/sitemap/0.9 + [ownerDocument] => (object value omitted) + [parentNode] => (object value omitted) +) +-- Test with parent and non-ns attribute -- +int(2) +string(3) "bar" +Couldn't fetch DOMAttr. Node no longer exists +-- Test with parent and ns attribute -- +DOMNameSpaceNode Object +( + [nodeName] => xmlns:xsi + [nodeValue] => fooooooooooooooooooooo + [nodeType] => 18 + [prefix] => xsi + [localName] => xsi + [namespaceURI] => fooooooooooooooooooooo + [ownerDocument] => (object value omitted) + [parentNode] => (object value omitted) +) +string(3) "url" diff --git a/ext/dom/tests/bug78577.phpt b/ext/dom/tests/bug78577.phpt new file mode 100644 index 0000000000000..2631efc1e206c --- /dev/null +++ b/ext/dom/tests/bug78577.phpt @@ -0,0 +1,33 @@ +--TEST-- +Bug #78577 (Crash in DOMNameSpace debug info handlers) +--EXTENSIONS-- +dom +--FILE-- +loadXML(''); + +$attr = $doc->documentElement->getAttributeNode('xmlns'); +var_dump($attr); + +?> +--EXPECT-- +object(DOMNameSpaceNode)#3 (8) { + ["nodeName"]=> + string(5) "xmlns" + ["nodeValue"]=> + string(19) "http://php.net/test" + ["nodeType"]=> + int(18) + ["prefix"]=> + string(0) "" + ["localName"]=> + string(5) "xmlns" + ["namespaceURI"]=> + string(19) "http://php.net/test" + ["ownerDocument"]=> + string(22) "(object value omitted)" + ["parentNode"]=> + string(22) "(object value omitted)" +} diff --git a/ext/dom/tests/xpath_domnamespacenode.phpt b/ext/dom/tests/xpath_domnamespacenode.phpt index f0bfbed10dda6..97059c18e54da 100644 --- a/ext/dom/tests/xpath_domnamespacenode.phpt +++ b/ext/dom/tests/xpath_domnamespacenode.phpt @@ -17,7 +17,7 @@ var_dump($nodes->item(0)); ?> --EXPECT-- -object(DOMNameSpaceNode)#3 (8) { +object(DOMNameSpaceNode)#4 (8) { ["nodeName"]=> string(9) "xmlns:xml" ["nodeValue"]=> diff --git a/ext/dom/tests/xpath_domnamespacenode_advanced.phpt b/ext/dom/tests/xpath_domnamespacenode_advanced.phpt new file mode 100644 index 0000000000000..bbc49dc54652d --- /dev/null +++ b/ext/dom/tests/xpath_domnamespacenode_advanced.phpt @@ -0,0 +1,75 @@ +--TEST-- +DOMXPath::query() can return DOMNodeList with DOMNameSpaceNode items - advanced variation +--EXTENSIONS-- +dom +--FILE-- +loadXML(<<<'XML' + + Hello PHP! + +XML); + +$xpath = new DOMXPath($dom); +$query = '//namespace::*'; + +echo "-- All namespace attributes --\n"; + +foreach ($xpath->query($query) as $attribute) { + echo $attribute->nodeName . ' = ' . $attribute->nodeValue . PHP_EOL; + var_dump($attribute->parentNode->tagName); +} + +echo "-- All namespace attributes with removal attempt --\n"; + +foreach ($xpath->query($query) as $attribute) { + echo "Before: ", $attribute->parentNode->tagName, "\n"; + // Second & third attempt should fail because it's no longer in the document + try { + $attribute->parentNode->remove(); + } catch (\DOMException $e) { + echo $e->getMessage(), "\n"; + } + // However, it should not cause a use-after-free + echo "After: ", $attribute->parentNode->tagName, "\n"; +} + +?> +--EXPECT-- +-- All namespace attributes -- +xmlns:xml = http://www.w3.org/XML/1998/namespace +string(4) "root" +xmlns:bar = http://example.com/bar +string(4) "root" +xmlns:foo = http://example.com/foo +string(4) "root" +xmlns:xml = http://www.w3.org/XML/1998/namespace +string(5) "child" +xmlns:bar = http://example.com/bar +string(5) "child" +xmlns:foo = http://example.com/foo +string(5) "child" +xmlns:baz = http://example.com/baz +string(5) "child" +-- All namespace attributes with removal attempt -- +Before: root +After: root +Before: root +Not Found Error +After: root +Before: root +Not Found Error +After: root +Before: child +After: child +Before: child +Not Found Error +After: child +Before: child +Not Found Error +After: child +Before: child +Not Found Error +After: child diff --git a/ext/dom/xpath.c b/ext/dom/xpath.c index f546733a436d1..62e11f6b99bfb 100644 --- a/ext/dom/xpath.c +++ b/ext/dom/xpath.c @@ -101,24 +101,18 @@ static void dom_xpath_ext_function_php(xmlXPathParserContextPtr ctxt, int nargs, zval child; /* not sure, if we need this... it's copied from xpath.c */ if (node->type == XML_NAMESPACE_DECL) { - xmlNsPtr curns; - xmlNodePtr nsparent; - - nsparent = node->_private; - curns = xmlNewNs(NULL, node->name, NULL); - if (node->children) { - curns->prefix = xmlStrdup((xmlChar *) node->children); - } - if (node->children) { - node = xmlNewDocNode(node->doc, NULL, (xmlChar *) node->children, node->name); - } else { - node = xmlNewDocNode(node->doc, NULL, (xmlChar *) "xmlns", node->name); - } - node->type = XML_NAMESPACE_DECL; - node->parent = nsparent; - node->ns = curns; + xmlNodePtr nsparent = node->_private; + xmlNsPtr original = (xmlNsPtr) node; + + /* Make sure parent dom object exists, so we can take an extra reference. */ + zval parent_zval; /* don't destroy me, my lifetime is transfered to the fake namespace decl */ + php_dom_create_object(nsparent, &parent_zval, &intern->dom); + dom_object *parent_intern = Z_DOMOBJ_P(&parent_zval); + + node = php_dom_create_fake_namespace_decl(nsparent, original, &child, parent_intern); + } else { + php_dom_create_object(node, &child, &intern->dom); } - php_dom_create_object(node, &child, &intern->dom); add_next_index_zval(&fci.params[i], &child); } } else { @@ -421,24 +415,18 @@ static void php_xpath_eval(INTERNAL_FUNCTION_PARAMETERS, int type) /* {{{ */ zval child; if (node->type == XML_NAMESPACE_DECL) { - xmlNsPtr curns; - xmlNodePtr nsparent; + xmlNodePtr nsparent = node->_private; + xmlNsPtr original = (xmlNsPtr) node; - nsparent = node->_private; - curns = xmlNewNs(NULL, node->name, NULL); - if (node->children) { - curns->prefix = xmlStrdup((xmlChar *) node->children); - } - if (node->children) { - node = xmlNewDocNode(docp, NULL, (xmlChar *) node->children, node->name); - } else { - node = xmlNewDocNode(docp, NULL, (xmlChar *) "xmlns", node->name); - } - node->type = XML_NAMESPACE_DECL; - node->parent = nsparent; - node->ns = curns; + /* Make sure parent dom object exists, so we can take an extra reference. */ + zval parent_zval; /* don't destroy me, my lifetime is transfered to the fake namespace decl */ + php_dom_create_object(nsparent, &parent_zval, &intern->dom); + dom_object *parent_intern = Z_DOMOBJ_P(&parent_zval); + + node = php_dom_create_fake_namespace_decl(nsparent, original, &child, parent_intern); + } else { + php_dom_create_object(node, &child, &intern->dom); } - php_dom_create_object(node, &child, &intern->dom); add_next_index_zval(&retval, &child); } } else { From d22d0e26dcc064e1765c5b89acbb3b5e73b8aa74 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Sat, 4 Feb 2023 02:12:49 +0100 Subject: [PATCH 157/168] Implement GH-8641: STREAM_NOTIFY_COMPLETED over HTTP never emitted This adds support for the completed event. Since the read handler could be entered twice towards the end of the stream we remember what the eof flag was before reading so we can emit the completed event when the flag changes to true. Closes GH-10505. --- NEWS | 4 +++ UPGRADING | 4 +++ ext/standard/tests/http/gh8641.phpt | 34 +++++++++++++++++++++++++ main/streams/php_stream_context.h | 4 +++ main/streams/streams.c | 39 +++++++++++++++++++++++------ 5 files changed, 78 insertions(+), 7 deletions(-) create mode 100644 ext/standard/tests/http/gh8641.phpt diff --git a/NEWS b/NEWS index 13c6669e88886..c77f990d4a5e4 100644 --- a/NEWS +++ b/NEWS @@ -12,6 +12,10 @@ PHP NEWS . Fix #77894 (DOMNode::C14N() very slow on generated DOMDocuments even after normalisation). (nielsdos) +- Streams: + . Implement GH-8641 (STREAM_NOTIFY_COMPLETED over HTTP never emitted). + (nielsdos, Jakub Zelenka) + 08 Jun 2023, PHP 8.3.0alpha1 - CLI: diff --git a/UPGRADING b/UPGRADING index 1320b919206c9..9ec95f6d63759 100644 --- a/UPGRADING +++ b/UPGRADING @@ -72,6 +72,10 @@ PHP 8.3 UPGRADE NOTES . posix_ttyname() now raises type warnings for integers following the usual ZPP semantics and value warnings for invalid file descriptor integers. +- Streams + . Streams can now emit the STREAM_NOTIFY_COMPLETED notification. This was previously + not implemented. + ======================================== 3. Changes in SAPI modules ======================================== diff --git a/ext/standard/tests/http/gh8641.phpt b/ext/standard/tests/http/gh8641.phpt new file mode 100644 index 0000000000000..9ccedc443dfec --- /dev/null +++ b/ext/standard/tests/http/gh8641.phpt @@ -0,0 +1,34 @@ +--TEST-- +GH-8641 ([Stream] STREAM_NOTIFY_COMPLETED over HTTP never emitted) +--SKIPIF-- + +--INI-- +allow_url_fopen=1 +--FILE-- + "stream_notification_callback")); + +$responses = array( + "data://text/plain,HTTP/1.0 200 Ok\r\nContent-Length: 11\r\n\r\nHello world", +); + +['pid' => $pid, 'uri' => $uri] = http_server($responses, $output); + +$f = file_get_contents($uri, 0, $ctx); + +http_server_kill($pid); +var_dump($f); +?> +--EXPECTF-- +8 11 11 +string(11) "Hello world" diff --git a/main/streams/php_stream_context.h b/main/streams/php_stream_context.h index c98f5420ac3e6..d4ebe29bc162e 100644 --- a/main/streams/php_stream_context.h +++ b/main/streams/php_stream_context.h @@ -94,6 +94,10 @@ END_EXTERN_C() php_stream_notification_notify((context), PHP_STREAM_NOTIFY_PROGRESS, PHP_STREAM_NOTIFY_SEVERITY_INFO, \ NULL, 0, (bsofar), (bmax), NULL); } } while(0) +#define php_stream_notify_completed(context) do { if ((context) && (context)->notifier) { \ + php_stream_notification_notify((context), PHP_STREAM_NOTIFY_COMPLETED, PHP_STREAM_NOTIFY_SEVERITY_INFO, \ + NULL, 0, (context)->notifier->progress, (context)->notifier->progress_max, NULL); } } while(0) + #define php_stream_notify_progress_init(context, sofar, bmax) do { if ((context) && (context)->notifier) { \ (context)->notifier->progress = (sofar); \ (context)->notifier->progress_max = (bmax); \ diff --git a/main/streams/streams.c b/main/streams/streams.c index eeb3cdf101c6e..14b534c998a82 100644 --- a/main/streams/streams.c +++ b/main/streams/streams.c @@ -542,6 +542,9 @@ PHPAPI zend_result _php_stream_fill_read_buffer(php_stream *stream, size_t size) { /* allocate/fill the buffer */ + zend_result retval; + bool old_eof = stream->eof; + if (stream->readfilters.head) { size_t to_read_now = MIN(size, stream->chunk_size); char *chunk_buf; @@ -562,7 +565,8 @@ PHPAPI zend_result _php_stream_fill_read_buffer(php_stream *stream, size_t size) justread = stream->ops->read(stream, chunk_buf, stream->chunk_size); if (justread < 0 && stream->writepos == stream->readpos) { efree(chunk_buf); - return FAILURE; + retval = FAILURE; + goto out_check_eof; } else if (justread > 0) { bucket = php_stream_bucket_new(stream, chunk_buf, justread, 0, 0); @@ -633,7 +637,8 @@ PHPAPI zend_result _php_stream_fill_read_buffer(php_stream *stream, size_t size) * further reads should fail. */ stream->eof = 1; efree(chunk_buf); - return FAILURE; + retval = FAILURE; + goto out_is_eof; } if (justread <= 0) { @@ -643,7 +648,6 @@ PHPAPI zend_result _php_stream_fill_read_buffer(php_stream *stream, size_t size) efree(chunk_buf); return SUCCESS; - } else { /* is there enough data in the buffer ? */ if (stream->writepos - stream->readpos < (zend_off_t)size) { @@ -670,12 +674,22 @@ PHPAPI zend_result _php_stream_fill_read_buffer(php_stream *stream, size_t size) stream->readbuflen - stream->writepos ); if (justread < 0) { - return FAILURE; + retval = FAILURE; + goto out_check_eof; } stream->writepos += justread; + retval = SUCCESS; + goto out_check_eof; } return SUCCESS; } + +out_check_eof: + if (old_eof != stream->eof) { +out_is_eof: + php_stream_notify_completed(PHP_STREAM_CONTEXT(stream)); + } + return retval; } PHPAPI ssize_t _php_stream_read(php_stream *stream, char *buf, size_t size) @@ -1124,6 +1138,7 @@ PHPAPI zend_string *php_stream_get_record(php_stream *stream, size_t maxlen, con static ssize_t _php_stream_write_buffer(php_stream *stream, const char *buf, size_t count) { ssize_t didwrite = 0; + ssize_t retval; /* if we have a seekable stream we need to ensure that data is written at the * current stream->position. This means invalidating the read buffer and then @@ -1134,15 +1149,19 @@ static ssize_t _php_stream_write_buffer(php_stream *stream, const char *buf, siz stream->ops->seek(stream, stream->position, SEEK_SET, &stream->position); } + bool old_eof = stream->eof; + while (count > 0) { ssize_t justwrote = stream->ops->write(stream, buf, count); if (justwrote <= 0) { /* If we already successfully wrote some bytes and a write error occurred * later, report the successfully written bytes. */ if (didwrite == 0) { - return justwrote; + retval = justwrote; + goto out; } - return didwrite; + retval = didwrite; + goto out; } buf += justwrote; @@ -1151,7 +1170,13 @@ static ssize_t _php_stream_write_buffer(php_stream *stream, const char *buf, siz stream->position += justwrote; } - return didwrite; + retval = didwrite; + +out: + if (old_eof != stream->eof) { + php_stream_notify_completed(PHP_STREAM_CONTEXT(stream)); + } + return retval; } /* push some data through the write filter chain. From 49fbbea2ea5fe1f6bab0719c95a46a119ea3d91f Mon Sep 17 00:00:00 2001 From: Jakub Zelenka Date: Sat, 10 Jun 2023 18:22:26 +0100 Subject: [PATCH 158/168] Fix GH-10406: fgets on a redis socket connection fails on PHP 8.3 This is an alternative implementation for GH-10406 that resets the has_buffered_data flag after finishing stream read so it does not impact other ops->read use like for example php_stream_get_line. Closes GH-11421 --- NEWS | 2 ++ ext/standard/tests/streams/gh11418.phpt | 36 +++++++++++++++++++++++++ main/php_streams.h | 4 ++- main/streams/streams.c | 20 +++++++------- main/streams/xp_socket.c | 2 +- 5 files changed, 53 insertions(+), 11 deletions(-) create mode 100644 ext/standard/tests/streams/gh11418.phpt diff --git a/NEWS b/NEWS index c77f990d4a5e4..b2415262102ac 100644 --- a/NEWS +++ b/NEWS @@ -15,6 +15,8 @@ PHP NEWS - Streams: . Implement GH-8641 (STREAM_NOTIFY_COMPLETED over HTTP never emitted). (nielsdos, Jakub Zelenka) + . Fix bug GH-10406 (fgets on a redis socket connection fails on PHP 8.3). + (Jakub Zelenka) 08 Jun 2023, PHP 8.3.0alpha1 diff --git a/ext/standard/tests/streams/gh11418.phpt b/ext/standard/tests/streams/gh11418.phpt new file mode 100644 index 0000000000000..99f70ff4c6b24 --- /dev/null +++ b/ext/standard/tests/streams/gh11418.phpt @@ -0,0 +1,36 @@ +--TEST-- +GH-11418: fgets on a redis socket connection fails on PHP 8.3 +--FILE-- +run($clientCode, $serverCode); + +?> +--EXPECT-- +Hi Hello World diff --git a/main/php_streams.h b/main/php_streams.h index 13f8fe827b08a..5acb94f3042d3 100644 --- a/main/php_streams.h +++ b/main/php_streams.h @@ -211,6 +211,9 @@ struct _php_stream { * PHP_STREAM_FCLOSE_XXX as appropriate */ uint8_t fclose_stdiocast:2; + /* flag to mark whether the stream has buffered data */ + uint8_t has_buffered_data:1; + char mode[16]; /* "rwb" etc. ala stdio */ uint32_t flags; /* PHP_STREAM_FLAG_XXX */ @@ -227,7 +230,6 @@ struct _php_stream { size_t readbuflen; zend_off_t readpos; zend_off_t writepos; - ssize_t didread; /* how much data to read when filling buffer */ size_t chunk_size; diff --git a/main/streams/streams.c b/main/streams/streams.c index 14b534c998a82..2a5178e294266 100644 --- a/main/streams/streams.c +++ b/main/streams/streams.c @@ -694,8 +694,7 @@ PHPAPI zend_result _php_stream_fill_read_buffer(php_stream *stream, size_t size) PHPAPI ssize_t _php_stream_read(php_stream *stream, char *buf, size_t size) { - ssize_t toread = 0; - stream->didread = 0; + ssize_t toread = 0, didread = 0; while (size > 0) { @@ -714,7 +713,8 @@ PHPAPI ssize_t _php_stream_read(php_stream *stream, char *buf, size_t size) stream->readpos += toread; size -= toread; buf += toread; - stream->didread += toread; + didread += toread; + stream->has_buffered_data = 1; } /* ignore eof here; the underlying state might have changed */ @@ -727,14 +727,14 @@ PHPAPI ssize_t _php_stream_read(php_stream *stream, char *buf, size_t size) if (toread < 0) { /* Report an error if the read failed and we did not read any data * before that. Otherwise return the data we did read. */ - if (stream->didread == 0) { + if (didread == 0) { return toread; } break; } } else { if (php_stream_fill_read_buffer(stream, size) != SUCCESS) { - if (stream->didread == 0) { + if (didread == 0) { return -1; } break; @@ -751,9 +751,10 @@ PHPAPI ssize_t _php_stream_read(php_stream *stream, char *buf, size_t size) } } if (toread > 0) { - stream->didread += toread; + didread += toread; buf += toread; size -= toread; + stream->has_buffered_data = 1; } else { /* EOF, or temporary end of data (for non-blocking mode). */ break; @@ -767,11 +768,12 @@ PHPAPI ssize_t _php_stream_read(php_stream *stream, char *buf, size_t size) } } - if (stream->didread > 0) { - stream->position += stream->didread; + if (didread > 0) { + stream->position += didread; + stream->has_buffered_data = 0; } - return stream->didread; + return didread; } /* Like php_stream_read(), but reading into a zend_string buffer. This has some similarity diff --git a/main/streams/xp_socket.c b/main/streams/xp_socket.c index 8f0a87b998043..6c770d77aed95 100644 --- a/main/streams/xp_socket.c +++ b/main/streams/xp_socket.c @@ -168,7 +168,7 @@ static ssize_t php_sockop_read(php_stream *stream, char *buf, size_t count) /* Special handling for blocking read. */ if (sock->is_blocked) { /* Find out if there is any data buffered from the previous read. */ - bool has_buffered_data = stream->didread > 0; + bool has_buffered_data = stream->has_buffered_data; /* No need to wait if there is any data buffered or no timeout. */ bool dont_wait = has_buffered_data || (sock->timeout.tv_sec == 0 && sock->timeout.tv_usec == 0); From e309fd84610802c67413fb48284e85495034e7a9 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Sat, 10 Jun 2023 22:48:16 +0200 Subject: [PATCH 159/168] Fix lifetime issue with getAttributeNodeNS() It's the same issue that I fixed previously in GH-11402, but in a different place. Closes GH-11422. --- NEWS | 1 + ext/dom/element.c | 20 ++++--------------- ...ifetime_parentNode_getAttributeNodeNS.phpt | 20 +++++++++++++++++++ 3 files changed, 25 insertions(+), 16 deletions(-) create mode 100644 ext/dom/tests/bug_lifetime_parentNode_getAttributeNodeNS.phpt diff --git a/NEWS b/NEWS index 139c696374456..776745d073a38 100644 --- a/NEWS +++ b/NEWS @@ -30,6 +30,7 @@ PHP NEWS . Fixed bug #70359 (print_r() on DOMAttr causes Segfault in php_libxml_node_free_list()). (nielsdos) . Fixed bug #78577 (Crash in DOMNameSpace debug info handlers). (nielsdos) + . Fix lifetime issue with getAttributeNodeNS(). (nielsdos) - Opcache: . Fix allocation loop in zend_shared_alloc_startup(). (nielsdos) diff --git a/ext/dom/element.c b/ext/dom/element.c index f84caa629cc66..44c576a07363f 100644 --- a/ext/dom/element.c +++ b/ext/dom/element.c @@ -787,7 +787,7 @@ Since: DOM Level 2 PHP_METHOD(DOMElement, getAttributeNodeNS) { zval *id; - xmlNodePtr elemp, fakeAttrp; + xmlNodePtr elemp; xmlAttrPtr attrp; dom_object *intern; size_t uri_len, name_len; @@ -808,21 +808,9 @@ PHP_METHOD(DOMElement, getAttributeNodeNS) xmlNsPtr nsptr; nsptr = dom_get_nsdecl(elemp, (xmlChar *)name); if (nsptr != NULL) { - xmlNsPtr curns; - curns = xmlNewNs(NULL, nsptr->href, NULL); - if (nsptr->prefix) { - curns->prefix = xmlStrdup((xmlChar *) nsptr->prefix); - } - if (nsptr->prefix) { - fakeAttrp = xmlNewDocNode(elemp->doc, NULL, (xmlChar *) nsptr->prefix, nsptr->href); - } else { - fakeAttrp = xmlNewDocNode(elemp->doc, NULL, (xmlChar *)"xmlns", nsptr->href); - } - fakeAttrp->type = XML_NAMESPACE_DECL; - fakeAttrp->parent = elemp; - fakeAttrp->ns = curns; - - DOM_RET_OBJ(fakeAttrp, &ret, intern); + /* Keep parent alive, because we're a fake child. */ + GC_ADDREF(&intern->std); + (void) php_dom_create_fake_namespace_decl(elemp, nsptr, return_value, intern); } else { RETURN_NULL(); } diff --git a/ext/dom/tests/bug_lifetime_parentNode_getAttributeNodeNS.phpt b/ext/dom/tests/bug_lifetime_parentNode_getAttributeNodeNS.phpt new file mode 100644 index 0000000000000..3c53e08d4db76 --- /dev/null +++ b/ext/dom/tests/bug_lifetime_parentNode_getAttributeNodeNS.phpt @@ -0,0 +1,20 @@ +--TEST-- +Lifetime issue with parentNode on getAttributeNodeNS() +--EXTENSIONS-- +dom +--FILE-- + + + +'; + +$xml=new DOMDocument(); +$xml->loadXML($xmlString); +$ns2 = $xml->documentElement->getAttributeNodeNS("http://www.w3.org/2000/xmlns/", "ns2"); +$ns2->parentNode->remove(); +var_dump($ns2->parentNode->localName); + +?> +--EXPECT-- +string(4) "root" From 10d94aca4c5ee7a101ed39bc395bcc1bb9d68507 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Sun, 11 Jun 2023 23:44:58 +0200 Subject: [PATCH 160/168] Fix "invalid state error" with cloned namespace declarations Closes GH-11429. --- NEWS | 1 + ext/dom/php_dom.c | 57 +++++++++++++++++++++------ ext/dom/tests/clone_nodes.phpt | 72 ++++++++++++++++++++++++++++++++++ 3 files changed, 117 insertions(+), 13 deletions(-) create mode 100644 ext/dom/tests/clone_nodes.phpt diff --git a/NEWS b/NEWS index 776745d073a38..cad4653438930 100644 --- a/NEWS +++ b/NEWS @@ -31,6 +31,7 @@ PHP NEWS php_libxml_node_free_list()). (nielsdos) . Fixed bug #78577 (Crash in DOMNameSpace debug info handlers). (nielsdos) . Fix lifetime issue with getAttributeNodeNS(). (nielsdos) + . Fix "invalid state error" with cloned namespace declarations. (nielsdos) - Opcache: . Fix allocation loop in zend_shared_alloc_startup(). (nielsdos) diff --git a/ext/dom/php_dom.c b/ext/dom/php_dom.c index 9e0bb1f3d1d02..454dc54d8e211 100644 --- a/ext/dom/php_dom.c +++ b/ext/dom/php_dom.c @@ -89,6 +89,7 @@ static HashTable dom_xpath_prop_handlers; static zend_object *dom_objects_namespace_node_new(zend_class_entry *class_type); static void dom_object_namespace_node_free_storage(zend_object *object); +static xmlNodePtr php_dom_create_fake_namespace_decl_node_ptr(xmlNodePtr nodep, xmlNsPtr original); typedef int (*dom_read_t)(dom_object *obj, zval *retval); typedef int (*dom_write_t)(dom_object *obj, zval *newval); @@ -477,6 +478,19 @@ PHP_FUNCTION(dom_import_simplexml) static dom_object* dom_objects_set_class(zend_class_entry *class_type); +static void dom_update_refcount_after_clone(dom_object *original, xmlNodePtr original_node, dom_object *clone, xmlNodePtr cloned_node) +{ + /* If we cloned a document then we must create new doc proxy */ + if (cloned_node->doc == original_node->doc) { + clone->document = original->document; + } + php_libxml_increment_doc_ref((php_libxml_node_object *)clone, cloned_node->doc); + php_libxml_increment_node_ptr((php_libxml_node_object *)clone, cloned_node, (void *)clone); + if (original->document != clone->document) { + dom_copy_doc_props(original->document, clone->document); + } +} + static zend_object *dom_objects_store_clone_obj(zend_object *zobject) /* {{{ */ { dom_object *intern = php_dom_obj_from_obj(zobject); @@ -489,15 +503,7 @@ static zend_object *dom_objects_store_clone_obj(zend_object *zobject) /* {{{ */ if (node != NULL) { xmlNodePtr cloned_node = xmlDocCopyNode(node, node->doc, 1); if (cloned_node != NULL) { - /* If we cloned a document then we must create new doc proxy */ - if (cloned_node->doc == node->doc) { - clone->document = intern->document; - } - php_libxml_increment_doc_ref((php_libxml_node_object *)clone, cloned_node->doc); - php_libxml_increment_node_ptr((php_libxml_node_object *)clone, cloned_node, (void *)clone); - if (intern->document != clone->document) { - dom_copy_doc_props(intern->document, clone->document); - } + dom_update_refcount_after_clone(intern, node, clone, cloned_node); } } @@ -509,6 +515,26 @@ static zend_object *dom_objects_store_clone_obj(zend_object *zobject) /* {{{ */ } /* }}} */ +static zend_object *dom_object_namespace_node_clone_obj(zend_object *zobject) +{ + dom_object_namespace_node *intern = php_dom_namespace_node_obj_from_obj(zobject); + zend_object *clone = dom_objects_namespace_node_new(intern->dom.std.ce); + dom_object_namespace_node *clone_intern = php_dom_namespace_node_obj_from_obj(clone); + + xmlNodePtr original_node = dom_object_get_node(&intern->dom); + ZEND_ASSERT(original_node->type == XML_NAMESPACE_DECL); + xmlNodePtr cloned_node = php_dom_create_fake_namespace_decl_node_ptr(original_node->parent, original_node->ns); + + if (intern->parent_intern) { + clone_intern->parent_intern = intern->parent_intern; + GC_ADDREF(&clone_intern->parent_intern->std); + } + dom_update_refcount_after_clone(&intern->dom, original_node, &clone_intern->dom, cloned_node); + + zend_objects_clone_members(clone, &intern->dom.std); + return clone; +} + static void dom_copy_prop_handler(zval *zv) /* {{{ */ { dom_prop_handler *hnd = Z_PTR_P(zv); @@ -577,6 +603,7 @@ PHP_MINIT_FUNCTION(dom) memcpy(&dom_object_namespace_node_handlers, &dom_object_handlers, sizeof(zend_object_handlers)); dom_object_namespace_node_handlers.offset = XtOffsetOf(dom_object_namespace_node, dom.std); dom_object_namespace_node_handlers.free_obj = dom_object_namespace_node_free_storage; + dom_object_namespace_node_handlers.clone_obj = dom_object_namespace_node_clone_obj; zend_hash_init(&classes, 0, NULL, NULL, 1); @@ -1579,8 +1606,7 @@ xmlNsPtr dom_get_nsdecl(xmlNode *node, xmlChar *localName) { } /* }}} end dom_get_nsdecl */ -/* Note: Assumes the additional lifetime was already added in the caller. */ -xmlNodePtr php_dom_create_fake_namespace_decl(xmlNodePtr nodep, xmlNsPtr original, zval *return_value, dom_object *parent_intern) +static xmlNodePtr php_dom_create_fake_namespace_decl_node_ptr(xmlNodePtr nodep, xmlNsPtr original) { xmlNodePtr attrp; xmlNsPtr curns = xmlNewNs(NULL, original->href, NULL); @@ -1593,11 +1619,16 @@ xmlNodePtr php_dom_create_fake_namespace_decl(xmlNodePtr nodep, xmlNsPtr origina attrp->type = XML_NAMESPACE_DECL; attrp->parent = nodep; attrp->ns = curns; + return attrp; +} +/* Note: Assumes the additional lifetime was already added in the caller. */ +xmlNodePtr php_dom_create_fake_namespace_decl(xmlNodePtr nodep, xmlNsPtr original, zval *return_value, dom_object *parent_intern) +{ + xmlNodePtr attrp = php_dom_create_fake_namespace_decl_node_ptr(nodep, original); php_dom_create_object(attrp, return_value, parent_intern); /* This object must exist, because we just created an object for it via php_dom_create_object(). */ - dom_object *obj = ((php_libxml_node_ptr *)attrp->_private)->_private; - php_dom_namespace_node_obj_from_obj(&obj->std)->parent_intern = parent_intern; + php_dom_namespace_node_obj_from_obj(Z_OBJ_P(return_value))->parent_intern = parent_intern; return attrp; } diff --git a/ext/dom/tests/clone_nodes.phpt b/ext/dom/tests/clone_nodes.phpt new file mode 100644 index 0000000000000..1841c702caf8d --- /dev/null +++ b/ext/dom/tests/clone_nodes.phpt @@ -0,0 +1,72 @@ +--TEST-- +Clone nodes +--EXTENSIONS-- +dom +--FILE-- +loadXML(''); + +$attr = $doc->documentElement->getAttributeNode('xmlns'); +var_dump($attr); + +$attrClone = clone $attr; +var_dump($attrClone->nodeValue); +var_dump($attrClone->parentNode->nodeName); + +unset($doc); +unset($attr); + +var_dump($attrClone->nodeValue); +var_dump($attrClone->parentNode->nodeName); + +echo "-- Clone DOMNode --\n"; + +$doc = new DOMDocument; +$doc->loadXML(''); + +$bar = $doc->documentElement->firstChild; +$barClone = clone $bar; +$bar->remove(); +unset($bar); + +var_dump($barClone->nodeName); + +$doc->firstElementChild->remove(); +unset($doc); + +var_dump($barClone->nodeName); +var_dump($barClone->parentNode); + +?> +--EXPECT-- +-- Clone DOMNameSpaceNode -- +object(DOMNameSpaceNode)#3 (8) { + ["nodeName"]=> + string(5) "xmlns" + ["nodeValue"]=> + string(19) "http://php.net/test" + ["nodeType"]=> + int(18) + ["prefix"]=> + string(0) "" + ["localName"]=> + string(5) "xmlns" + ["namespaceURI"]=> + string(19) "http://php.net/test" + ["ownerDocument"]=> + string(22) "(object value omitted)" + ["parentNode"]=> + string(22) "(object value omitted)" +} +string(19) "http://php.net/test" +string(3) "foo" +string(19) "http://php.net/test" +string(3) "foo" +-- Clone DOMNode -- +string(3) "bar" +string(3) "bar" +NULL From a8a3b99e00747f3a1198c526674c9dad513a203f Mon Sep 17 00:00:00 2001 From: nielsdos <7771979+nielsdos@users.noreply.github.com> Date: Mon, 12 Jun 2023 23:58:34 +0200 Subject: [PATCH 161/168] Fix GH-11433: Unable to set CURLOPT_ACCEPT_ENCODING to NULL Closes GH-11446. --- NEWS | 4 ++ ext/curl/interface.c | 2 +- .../curl_setopt_CURLOPT_ACCEPT_ENCODING.phpt | 38 +++++++++++++++++++ 3 files changed, 43 insertions(+), 1 deletion(-) create mode 100644 ext/curl/tests/curl_setopt_CURLOPT_ACCEPT_ENCODING.phpt diff --git a/NEWS b/NEWS index cad4653438930..6baaae22e8629 100644 --- a/NEWS +++ b/NEWS @@ -9,6 +9,10 @@ PHP NEWS - Core: . Fixed build for the riscv64 architecture/GCC 12. (Daniil Gentili) +- Curl: + . Fixed bug GH-11433 (Unable to set CURLOPT_ACCEPT_ENCODING to NULL). + (nielsdos) + - DOM: . Fixed bugs GH-11288 and GH-11289 and GH-11290 and GH-9142 (DOMExceptions and segfaults with replaceWith). (nielsdos) diff --git a/ext/curl/interface.c b/ext/curl/interface.c index 025c876ad5bcd..807b27cb78c90 100644 --- a/ext/curl/interface.c +++ b/ext/curl/interface.c @@ -2493,7 +2493,6 @@ static int _php_curl_setopt(php_curl *ch, zend_long option, zval *zvalue, bool i case CURLOPT_TLSAUTH_TYPE: case CURLOPT_TLSAUTH_PASSWORD: case CURLOPT_TLSAUTH_USERNAME: - case CURLOPT_ACCEPT_ENCODING: case CURLOPT_TRANSFER_ENCODING: case CURLOPT_DNS_SERVERS: case CURLOPT_MAIL_AUTH: @@ -2553,6 +2552,7 @@ static int _php_curl_setopt(php_curl *ch, zend_long option, zval *zvalue, bool i case CURLOPT_RANGE: case CURLOPT_FTP_ACCOUNT: case CURLOPT_RTSP_SESSION_ID: + case CURLOPT_ACCEPT_ENCODING: #if LIBCURL_VERSION_NUM >= 0x072100 /* Available since 7.33.0 */ case CURLOPT_DNS_INTERFACE: case CURLOPT_DNS_LOCAL_IP4: diff --git a/ext/curl/tests/curl_setopt_CURLOPT_ACCEPT_ENCODING.phpt b/ext/curl/tests/curl_setopt_CURLOPT_ACCEPT_ENCODING.phpt new file mode 100644 index 0000000000000..c170308c2e981 --- /dev/null +++ b/ext/curl/tests/curl_setopt_CURLOPT_ACCEPT_ENCODING.phpt @@ -0,0 +1,38 @@ +--TEST-- +Test curl_setopt() with CURLOPT_ACCEPT_ENCODING +--EXTENSIONS-- +curl +--FILE-- + +--EXPECTF-- +GET /get.inc?test= HTTP/1.1 +Host: %s +Accept: */* +Accept-Encoding: gzip + +GET /get.inc?test= HTTP/1.1 +Host: %s +Accept: */* From 5c789806efb7d5f5d687d3895e8987a89a6cfbba Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Fri, 9 Jun 2023 17:06:24 +0200 Subject: [PATCH 162/168] Fix GH-11406: segfault with unpacking and magic method closure The magic method trampoline closure may be variadic. However, the arg_info for the variadic argument was not set, resulting in a crash both in reflection and in the VM. Fix it by creating an arg_info containing a single element in case of the variadic case. The variadic argument is the last one (and in this case only one) in the arg_info array. We make sure the argument info is equivalent to the argument info of `$closure` of the following code snippet: ``` function foo(...$arguments) {} $closure = foo(...); ``` Closes GH-11417. --- NEWS | 2 ++ .../trampoline_closure_named_arguments.phpt | 35 +++++++++++++++++++ Zend/zend_closures.c | 6 ++++ 3 files changed, 43 insertions(+) diff --git a/NEWS b/NEWS index b2415262102ac..50a2c8bb1ca82 100644 --- a/NEWS +++ b/NEWS @@ -5,6 +5,8 @@ PHP NEWS - Core: . Fix GH-11388 (Allow "final" modifier when importing a method from a trait). (nielsdos) + . Fixed bug GH-11406 (segfault with unpacking and magic method closure). + (nielsdos) - DOM: . Fix #79700 (wrong use of libxml oldNs leads to performance problem). diff --git a/Zend/tests/trampoline_closure_named_arguments.phpt b/Zend/tests/trampoline_closure_named_arguments.phpt index e209853e509cb..e4ccaf16e63a6 100644 --- a/Zend/tests/trampoline_closure_named_arguments.phpt +++ b/Zend/tests/trampoline_closure_named_arguments.phpt @@ -14,12 +14,15 @@ class Test { $test = new Test; +$array = ["unpacked"]; + echo "-- Non-static cases --\n"; $test->test(1, 2, a: 123); $test->test(...)(1, 2); $test->test(...)(1, 2, a: 123, b: $test); $test->test(...)(a: 123, b: $test); $test->test(...)(); +$test->test(...)(...$array); echo "-- Static cases --\n"; Test::testStatic(1, 2, a: 123); @@ -27,6 +30,16 @@ Test::testStatic(...)(1, 2); Test::testStatic(...)(1, 2, a: 123, b: $test); Test::testStatic(...)(a: 123, b: $test); Test::testStatic(...)(); +Test::testStatic(...)(...$array); + +echo "-- Reflection tests --\n"; +$reflectionFunction = new ReflectionFunction(Test::fail(...)); +var_dump($reflectionFunction->getParameters()); +$argument = $reflectionFunction->getParameters()[0]; +var_dump($argument->isVariadic()); +$type = $argument->getType(); +var_dump($type); +var_dump($type->getName()); ?> --EXPECT-- @@ -70,6 +83,11 @@ array(2) { string(4) "test" array(0) { } +string(4) "test" +array(1) { + [0]=> + string(8) "unpacked" +} -- Static cases -- string(10) "testStatic" array(3) { @@ -110,3 +128,20 @@ array(2) { string(10) "testStatic" array(0) { } +string(10) "testStatic" +array(1) { + [0]=> + string(8) "unpacked" +} +-- Reflection tests -- +array(1) { + [0]=> + object(ReflectionParameter)#4 (1) { + ["name"]=> + string(9) "arguments" + } +} +bool(true) +object(ReflectionNamedType)#5 (0) { +} +string(5) "mixed" diff --git a/Zend/zend_closures.c b/Zend/zend_closures.c index 69eeb3cf1ceef..2072eac72d712 100644 --- a/Zend/zend_closures.c +++ b/Zend/zend_closures.c @@ -833,6 +833,9 @@ ZEND_API void zend_create_fake_closure(zval *res, zend_function *func, zend_clas } /* }}} */ +/* __call and __callStatic name the arguments "$arguments" in the docs. */ +static zend_internal_arg_info trampoline_arg_info[] = {ZEND_ARG_VARIADIC_TYPE_INFO(false, arguments, IS_MIXED, false)}; + void zend_closure_from_frame(zval *return_value, zend_execute_data *call) { /* {{{ */ zval instance; zend_internal_function trampoline; @@ -856,6 +859,9 @@ void zend_closure_from_frame(zval *return_value, zend_execute_data *call) { /* { trampoline.handler = zend_closure_call_magic; trampoline.function_name = mptr->common.function_name; trampoline.scope = mptr->common.scope; + if (trampoline.fn_flags & ZEND_ACC_VARIADIC) { + trampoline.arg_info = trampoline_arg_info; + } zend_free_trampoline(mptr); mptr = (zend_function *) &trampoline; From dd8514a0bdf7ce69281626b570dfcaa2594b2273 Mon Sep 17 00:00:00 2001 From: David CARLIER Date: Wed, 7 Jun 2023 21:33:47 +0100 Subject: [PATCH 163/168] ext/pgsql: adding pg_set_error_context_visibility. another level of context for pg_last_error/pg_result_error() to include or not the context in those. PQSHOW_CONTEXT_ERRORS being the default. Close GH-11395 --- NEWS | 2 ++ UPGRADING | 3 +++ ext/pgsql/pgsql.c | 27 +++++++++++++++++++++++++-- ext/pgsql/pgsql.stub.php | 21 +++++++++++++++++++++ ext/pgsql/pgsql_arginfo.h | 12 +++++++++++- ext/pgsql/tests/07optional.phpt | 3 +++ 6 files changed, 65 insertions(+), 3 deletions(-) diff --git a/NEWS b/NEWS index 50a2c8bb1ca82..b68559737ab30 100644 --- a/NEWS +++ b/NEWS @@ -166,6 +166,8 @@ PHP NEWS . pg_set_error_verbosity adding PGSQL_ERRORS_STATE constant. (David Carlier) . pg_convert/pg_insert E_WARNING on type errors had been converted to ValueError/TypeError exceptions. (David Carlier) + . Added pg_set_error_context_visibility to set the context's visibility + within the error messages. (David Carlier) - Phar: . Fix memory leak in phar_rename_archive(). (stkeke) diff --git a/UPGRADING b/UPGRADING index 9ec95f6d63759..4d292bb810087 100644 --- a/UPGRADING +++ b/UPGRADING @@ -192,6 +192,9 @@ PHP 8.3 UPGRADE NOTES . Added posix_fpathconf call to get configuration value from a file descriptor. . Added posix_eaccess call to check the effective user id's permission for a path. +- PGSQL: + . Added pg_set_error_context_visilibity to set the visibility of the context in error messages. + - Random: . Added Randomizer::getBytesFromString(). RFC: https://wiki.php.net/rfc/randomizer_additions diff --git a/ext/pgsql/pgsql.c b/ext/pgsql/pgsql.c index e62f8f5191073..68325155c23ca 100644 --- a/ext/pgsql/pgsql.c +++ b/ext/pgsql/pgsql.c @@ -2833,6 +2833,29 @@ PHP_FUNCTION(pg_set_error_verbosity) } /* }}} */ +PHP_FUNCTION(pg_set_error_context_visibility) +{ + zval *pgsql_link = NULL; + zend_long visibility; + PGconn *pgsql; + pgsql_link_handle *link; + + if (zend_parse_parameters(ZEND_NUM_ARGS(), "Ol", &pgsql_link, pgsql_link_ce, &visibility) == FAILURE) { + RETURN_THROWS(); + } + link = Z_PGSQL_LINK_P(pgsql_link); + CHECK_PGSQL_LINK(link); + + pgsql = link->conn; + + if (visibility == PQSHOW_CONTEXT_NEVER || visibility & (PQSHOW_CONTEXT_ERRORS|PQSHOW_CONTEXT_ALWAYS)) { + RETURN_LONG(PQsetErrorContextVisibility(pgsql, visibility)); + } else { + zend_argument_value_error(2, "must be one of PGSQL_SHOW_CONTEXT_NEVER, PGSQL_SHOW_CONTEXT_ERRORS or PGSQL_SHOW_CONTEXT_ALWAYS"); + RETURN_THROWS(); + } +} + /* {{{ Set client encoding */ PHP_FUNCTION(pg_set_client_encoding) { @@ -3331,7 +3354,7 @@ PHP_FUNCTION(pg_result_error) RETURN_FALSE; } - err = (char *)PQresultErrorMessage(pgsql_result); + err = PQresultErrorMessage(pgsql_result); RETURN_STRING(err); } /* }}} */ @@ -3365,7 +3388,7 @@ PHP_FUNCTION(pg_result_error_field) #endif |PG_DIAG_CONTEXT|PG_DIAG_SOURCE_FILE|PG_DIAG_SOURCE_LINE |PG_DIAG_SOURCE_FUNCTION)) { - field = (char *)PQresultErrorField(pgsql_result, (int)fieldcode); + field = PQresultErrorField(pgsql_result, (int)fieldcode); if (field == NULL) { RETURN_NULL(); } else { diff --git a/ext/pgsql/pgsql.stub.php b/ext/pgsql/pgsql.stub.php index f337251d59a75..1b5356bd7be99 100644 --- a/ext/pgsql/pgsql.stub.php +++ b/ext/pgsql/pgsql.stub.php @@ -462,6 +462,25 @@ */ const PGSQL_PIPELINE_ABORTED = UNKNOWN; #endif + + /* For pg_set_error_context_visibility() */ + + /** + * @var int + * @cvalue PQSHOW_CONTEXT_NEVER + */ + const PGSQL_SHOW_CONTEXT_NEVER = UNKNOWN; + /** + * @var int + * @cvalue PQSHOW_CONTEXT_ERRORS + */ + const PGSQL_SHOW_CONTEXT_ERRORS = UNKNOWN; + /** + * @var int + * @cvalue PQSHOW_CONTEXT_ALWAYS + */ + const PGSQL_SHOW_CONTEXT_ALWAYS = UNKNOWN; + function pg_connect(string $connection_string, int $flags = 0): PgSql\Connection|false {} @@ -951,6 +970,8 @@ function pg_exit_pipeline_mode(PgSql\Connection $connection): bool {} function pg_pipeline_sync(PgSql\Connection $connection): bool {} function pg_pipeline_status(PgSql\Connection $connection): int {} #endif + + function pg_set_error_context_visibility(PgSql\Connection $connection, int $visibility): int {} } namespace PgSql { diff --git a/ext/pgsql/pgsql_arginfo.h b/ext/pgsql/pgsql_arginfo.h index 8fcc229637191..26e0777b1cbf5 100644 --- a/ext/pgsql/pgsql_arginfo.h +++ b/ext/pgsql/pgsql_arginfo.h @@ -1,5 +1,5 @@ /* This is a generated file, edit the .stub.php file instead. - * Stub hash: bf714281e441d59e0760e51df9f4050c96319794 */ + * Stub hash: a37be19da43ac0838655b0ba7e34382e9c7424f5 */ ZEND_BEGIN_ARG_WITH_RETURN_OBJ_TYPE_MASK_EX(arginfo_pg_connect, 0, 1, PgSql\\Connection, MAY_BE_FALSE) ZEND_ARG_TYPE_INFO(0, connection_string, IS_STRING, 0) @@ -472,6 +472,11 @@ ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_pg_pipeline_status, 0, 1, IS_LON ZEND_END_ARG_INFO() #endif +ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_pg_set_error_context_visibility, 0, 2, IS_LONG, 0) + ZEND_ARG_OBJ_INFO(0, connection, PgSql\\Connection, 0) + ZEND_ARG_TYPE_INFO(0, visibility, IS_LONG, 0) +ZEND_END_ARG_INFO() + ZEND_FUNCTION(pg_connect); ZEND_FUNCTION(pg_pconnect); @@ -574,6 +579,7 @@ ZEND_FUNCTION(pg_pipeline_sync); #if defined(LIBPQ_HAS_PIPELINING) ZEND_FUNCTION(pg_pipeline_status); #endif +ZEND_FUNCTION(pg_set_error_context_visibility); static const zend_function_entry ext_functions[] = { @@ -703,6 +709,7 @@ static const zend_function_entry ext_functions[] = { #if defined(LIBPQ_HAS_PIPELINING) ZEND_FE(pg_pipeline_status, arginfo_pg_pipeline_status) #endif + ZEND_FE(pg_set_error_context_visibility, arginfo_pg_set_error_context_visibility) ZEND_FE_END }; @@ -835,6 +842,9 @@ static void register_pgsql_symbols(int module_number) #if defined(LIBPQ_HAS_PIPELINING) REGISTER_LONG_CONSTANT("PGSQL_PIPELINE_ABORTED", PQ_PIPELINE_ABORTED, CONST_PERSISTENT); #endif + REGISTER_LONG_CONSTANT("PGSQL_SHOW_CONTEXT_NEVER", PQSHOW_CONTEXT_NEVER, CONST_PERSISTENT); + REGISTER_LONG_CONSTANT("PGSQL_SHOW_CONTEXT_ERRORS", PQSHOW_CONTEXT_ERRORS, CONST_PERSISTENT); + REGISTER_LONG_CONSTANT("PGSQL_SHOW_CONTEXT_ALWAYS", PQSHOW_CONTEXT_ALWAYS, CONST_PERSISTENT); } static zend_class_entry *register_class_PgSql_Connection(void) diff --git a/ext/pgsql/tests/07optional.phpt b/ext/pgsql/tests/07optional.phpt index b9ce491b2ca7a..41f89ca6c8c3b 100644 --- a/ext/pgsql/tests/07optional.phpt +++ b/ext/pgsql/tests/07optional.phpt @@ -21,6 +21,9 @@ if (function_exists('pg_set_error_verbosity')) { pg_set_error_verbosity($db, PGSQL_ERRORS_VERBOSE); pg_set_error_verbosity($db, PGSQL_ERRORS_SQLSTATE); } +pg_set_error_context_visibility($db, PGSQL_SHOW_CONTEXT_NEVER); +pg_set_error_context_visibility($db, PGSQL_SHOW_CONTEXT_ERRORS); +pg_set_error_context_visibility($db, PGSQL_SHOW_CONTEXT_ALWAYS); echo "OK"; ?> --EXPECT-- From ec3daea1d6c37770a51b7648b1f21fd2872be340 Mon Sep 17 00:00:00 2001 From: David CARLIER Date: Mon, 12 Jun 2023 21:48:32 +0100 Subject: [PATCH 164/168] ext/pdo_pgsql: connection status update to distinguish from truly bad quality connections. Close GH-11443 --- ext/pdo_pgsql/pgsql_driver.c | 34 +++++++++++++++++++++++++++------- 1 file changed, 27 insertions(+), 7 deletions(-) diff --git a/ext/pdo_pgsql/pgsql_driver.c b/ext/pdo_pgsql/pgsql_driver.c index ec4d5ec65866b..46b3f25f4086b 100644 --- a/ext/pdo_pgsql/pgsql_driver.c +++ b/ext/pdo_pgsql/pgsql_driver.c @@ -463,33 +463,53 @@ static int pdo_pgsql_get_attribute(pdo_dbh_t *dbh, zend_long attr, zval *return_ case PDO_ATTR_CONNECTION_STATUS: switch (PQstatus(H->server)) { case CONNECTION_STARTED: - ZVAL_STRINGL(return_value, "Waiting for connection to be made.", sizeof("Waiting for connection to be made.")-1); + ZVAL_STRINGL(return_value, "Waiting for connection to be made.", strlen("Waiting for connection to be made.")); break; case CONNECTION_MADE: case CONNECTION_OK: - ZVAL_STRINGL(return_value, "Connection OK; waiting to send.", sizeof("Connection OK; waiting to send.")-1); + ZVAL_STRINGL(return_value, "Connection OK; waiting to send.", strlen("Connection OK; waiting to send.")); break; case CONNECTION_AWAITING_RESPONSE: - ZVAL_STRINGL(return_value, "Waiting for a response from the server.", sizeof("Waiting for a response from the server.")-1); + ZVAL_STRINGL(return_value, "Waiting for a response from the server.", strlen("Waiting for a response from the server.")); break; case CONNECTION_AUTH_OK: - ZVAL_STRINGL(return_value, "Received authentication; waiting for backend start-up to finish.", sizeof("Received authentication; waiting for backend start-up to finish.")-1); + ZVAL_STRINGL(return_value, "Received authentication; waiting for backend start-up to finish.", strlen("Received authentication; waiting for backend start-up to finish.")); break; #ifdef CONNECTION_SSL_STARTUP case CONNECTION_SSL_STARTUP: - ZVAL_STRINGL(return_value, "Negotiating SSL encryption.", sizeof("Negotiating SSL encryption.")-1); + ZVAL_STRINGL(return_value, "Negotiating SSL encryption.", strlen("Negotiating SSL encryption.")); break; #endif case CONNECTION_SETENV: - ZVAL_STRINGL(return_value, "Negotiating environment-driven parameter settings.", sizeof("Negotiating environment-driven parameter settings.")-1); + ZVAL_STRINGL(return_value, "Negotiating environment-driven parameter settings.", strlen("Negotiating environment-driven parameter settings.")); break; +#ifdef CONNECTION_CONSUME + case CONNECTION_CONSUME: + ZVAL_STRINGL(return_value, "Flushing send queue/consuming extra data.", strlen("Flushing send queue/consuming extra data.")); + break; +#endif +#ifdef CONNECTION_GSS_STARTUP + case CONNECTION_SSL_STARTUP: + ZVAL_STRINGL(return_value, "Negotiating GSSAPI.", strlen("Negotiating GSSAPI.")); + break; +#endif +#ifdef CONNECTION_CHECK_TARGET + case CONNECTION_CHECK_TARGET: + ZVAL_STRINGL(return_value, "Connection OK; checking target server properties.", strlen("Connection OK; checking target server properties.")); + break; +#endif +#ifdef CONNECTION_CHECK_STANDBY + case CONNECTION_CHECK_STANDBY: + ZVAL_STRINGL(return_value, "Connection OK; checking if server in standby.", strlen("Connection OK; checking if server in standby.")); + break; +#endif case CONNECTION_BAD: default: - ZVAL_STRINGL(return_value, "Bad connection.", sizeof("Bad connection.")-1); + ZVAL_STRINGL(return_value, "Bad connection.", strlen("Bad connection.")); break; } break; From 4fcb3e0d343bb2a4bd405fce816020986e040434 Mon Sep 17 00:00:00 2001 From: Peter Date: Tue, 13 Jun 2023 01:19:11 +0800 Subject: [PATCH 165/168] Fix cross-compilation check in phar generation for FreeBSD FreeBSD's shell is very POSIX strict. This patch makes sure it works correctly under FreeBSD too. Closes GH-11441. --- NEWS | 3 +++ ext/phar/Makefile.frag | 6 +++--- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/NEWS b/NEWS index 6baaae22e8629..2335b5d7d397a 100644 --- a/NEWS +++ b/NEWS @@ -47,6 +47,9 @@ PHP NEWS . Fixed bug GH-9356 Incomplete validation of IPv6 Address fields in subjectAltNames (James Lucas, Jakub Zelenka). +- Phar: + . Fix cross-compilation check in phar generation for FreeBSD. (peter279k) + - SPL: . Fixed bug GH-11338 (SplFileInfo empty getBasename with more than one slash). (nielsdos) diff --git a/ext/phar/Makefile.frag b/ext/phar/Makefile.frag index e5646b2029261..7a867dd7df28f 100644 --- a/ext/phar/Makefile.frag +++ b/ext/phar/Makefile.frag @@ -35,7 +35,7 @@ TEST_PHP_EXECUTABLE_RES = $(shell echo "$(TEST_PHP_EXECUTABLE)" | grep -c 'Exec $(builddir)/phar.php: $(srcdir)/build_precommand.php $(srcdir)/phar/*.inc $(srcdir)/phar/*.php $(SAPI_CLI_PATH) -@(echo "Generating phar.php"; \ - if [ $(TEST_PHP_EXECUTABLE_RES) -ne 1 ]; then \ + if [ "$(TEST_PHP_EXECUTABLE_RES)" != 1 ]; then \ $(PHP_PHARCMD_EXECUTABLE) $(PHP_PHARCMD_SETTINGS) $(srcdir)/build_precommand.php > $(builddir)/phar.php; \ else \ echo "Skipping phar.php generating during cross compilation"; \ @@ -43,7 +43,7 @@ $(builddir)/phar.php: $(srcdir)/build_precommand.php $(srcdir)/phar/*.inc $(srcd $(builddir)/phar.phar: $(builddir)/phar.php $(builddir)/phar/phar.inc $(srcdir)/phar/*.inc $(srcdir)/phar/*.php $(SAPI_CLI_PATH) -@(echo "Generating phar.phar"; \ - if [ $(TEST_PHP_EXECUTABLE_RES) -ne 1 ]; then \ + if [ "$(TEST_PHP_EXECUTABLE_RES)" != 1 ]; then \ rm -f $(builddir)/phar.phar; \ rm -f $(srcdir)/phar.phar; \ $(PHP_PHARCMD_EXECUTABLE) $(PHP_PHARCMD_SETTINGS) $(builddir)/phar.php pack -f $(builddir)/phar.phar -a pharcommand -c auto -x \\.svn -p 0 -s $(srcdir)/phar/phar.php -h sha1 -b "$(PHP_PHARCMD_BANG)" $(srcdir)/phar/; \ @@ -53,7 +53,7 @@ $(builddir)/phar.phar: $(builddir)/phar.php $(builddir)/phar/phar.inc $(srcdir)/ fi) install-pharcmd: pharcmd - @(if [ $(TEST_PHP_EXECUTABLE_RES) -ne 1 ]; then \ + @(if [ "$(TEST_PHP_EXECUTABLE_RES)" != 1 ]; then \ $(mkinstalldirs) $(INSTALL_ROOT)$(bindir); \ $(INSTALL) $(builddir)/phar.phar $(INSTALL_ROOT)$(bindir)/$(program_prefix)phar$(program_suffix).phar; \ rm -f $(INSTALL_ROOT)$(bindir)/$(program_prefix)phar$(program_suffix); \ From 38b5e949a34fcf6bc47d0c3e0fb7b66401c09323 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Tue, 13 Jun 2023 19:38:12 +0200 Subject: [PATCH 166/168] [ci skip] Add myself to CODEOWNERS for dom and libxml (#11449) --- CODEOWNERS | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CODEOWNERS b/CODEOWNERS index 62a501854ac1f..ef06da93a1f50 100644 --- a/CODEOWNERS +++ b/CODEOWNERS @@ -14,10 +14,12 @@ /ext/curl @adoy /ext/date @derickr /ext/dba @Girgias +/ext/dom @nielsdos /ext/ffi @dstogov /ext/gmp @Girgias /ext/imap @Girgias /ext/json @bukka +/ext/libxml @nielsdos /ext/mbstring @alexdowad /ext/opcache @dstogov @iluuu1994 /ext/openssl @bukka From 85a4a80da307a93493829e3b40d5ddd6a66f50a6 Mon Sep 17 00:00:00 2001 From: George Peter Banyard Date: Wed, 14 Jun 2023 05:50:23 +0100 Subject: [PATCH 167/168] ext/imap: Refactor + Update to modern property write API (#11415) Use common functions for creating and setting up similar objects The increase in verbosity compared to the ``add_property_*()`` version is somewhat unfortunate, but the behaviour of ``add_property_str()`` releasing the zend_string is suboptimal, confusing, and has me concerned. --- ext/imap/php_imap.c | 929 ++++++++++++++++++++++++++++++-------------- 1 file changed, 630 insertions(+), 299 deletions(-) diff --git a/ext/imap/php_imap.c b/ext/imap/php_imap.c index 7723669417af3..f9badf5585da2 100644 --- a/ext/imap/php_imap.c +++ b/ext/imap/php_imap.c @@ -83,7 +83,7 @@ MAILSTREAM DEFAULTPROTO; #endif static void _php_make_header_object(zval *myzvalue, ENVELOPE *en); -static void _php_imap_add_body(zval *arg, BODY *body); +static void _php_imap_add_body(zval *arg, const BODY *body); static zend_string* _php_imap_parse_address(ADDRESS *addresslist, zval *paddress); static zend_string* _php_rfc822_write_address(ADDRESS *addresslist); @@ -1351,6 +1351,33 @@ PHP_FUNCTION(imap_list) /* }}} */ +static void php_imap_populate_mailbox_object(zval *z_object, const FOBJECTLIST *mailbox) +{ + zend_update_property_string( + Z_OBJCE_P(z_object), Z_OBJ_P(z_object), + "name", strlen("name"), + (char*)mailbox->LTEXT + ); + zend_update_property_long( + Z_OBJCE_P(z_object), Z_OBJ_P(z_object), + "attributes", strlen("attributes"), + mailbox->attributes + ); +#ifdef IMAP41 + zend_update_property_str( + Z_OBJCE_P(z_object), Z_OBJ_P(z_object), + "delimiter", strlen("delimiter"), + ZSTR_CHAR((unsigned char)mailbox->delimiter) + ); +#else + zend_update_property_string( + Z_OBJCE_P(z_object), Z_OBJ_P(z_object), + "delimiter", strlen("delimiter"), + mailbox->delimiter + ); +#endif +} + /* {{{ Reads the list of mailboxes and returns a full array of objects containing name, attributes, and delimiter */ /* Author: CJH */ PHP_FUNCTION(imap_getmailboxes) @@ -1359,7 +1386,6 @@ PHP_FUNCTION(imap_getmailboxes) zend_string *ref, *pat; php_imap_object *imap_conn_struct; FOBJECTLIST *cur=NIL; - char *delim=NIL; if (zend_parse_parameters(ZEND_NUM_ARGS(), "OSS", &imap_conn_obj, php_imap_ce, &ref, &pat) == FAILURE) { RETURN_THROWS(); @@ -1377,24 +1403,14 @@ PHP_FUNCTION(imap_getmailboxes) } array_init(return_value); - delim = safe_emalloc(2, sizeof(char), 0); cur=IMAPG(imap_folder_objects); while (cur != NIL) { object_init(&mboxob); - add_property_string(&mboxob, "name", (char*)cur->LTEXT); - add_property_long(&mboxob, "attributes", cur->attributes); -#ifdef IMAP41 - delim[0] = (char)cur->delimiter; - delim[1] = 0; - add_property_string(&mboxob, "delimiter", delim); -#else - add_property_string(&mboxob, "delimiter", cur->delimiter); -#endif + php_imap_populate_mailbox_object(&mboxob, cur); php_imap_list_add_object(return_value, &mboxob); cur=cur->next; } mail_free_foblist(&IMAPG(imap_folder_objects), &IMAPG(imap_folder_objects_tail)); - efree(delim); IMAPG(folderlist_style) = FLIST_ARRAY; /* reset to default */ } /* }}} */ @@ -1431,12 +1447,42 @@ PHP_FUNCTION(imap_listscan) /* }}} */ +static void php_imap_populate_mailbox_properties_object(zval *z_object, const MAILSTREAM *imap_stream) +{ + char date[100]; + rfc822_date(date); + zend_update_property_string( + Z_OBJCE_P(z_object), Z_OBJ_P(z_object), + "Date", strlen("Date"), + date + ); + zend_update_property_string( + Z_OBJCE_P(z_object), Z_OBJ_P(z_object), + "Driver", strlen("Driver"), + imap_stream->dtb->name + ); + zend_update_property_string( + Z_OBJCE_P(z_object), Z_OBJ_P(z_object), + "Mailbox", strlen("Mailbox"), + imap_stream->mailbox + ); + zend_update_property_long( + Z_OBJCE_P(z_object), Z_OBJ_P(z_object), + "Nmsgs", strlen("Nmsgs"), + imap_stream->nmsgs + ); + zend_update_property_long( + Z_OBJCE_P(z_object), Z_OBJ_P(z_object), + "Recent", strlen("Recent"), + imap_stream->recent + ); +} + /* {{{ Get mailbox properties */ PHP_FUNCTION(imap_check) { zval *imap_conn_obj; php_imap_object *imap_conn_struct; - char date[100]; if (zend_parse_parameters(ZEND_NUM_ARGS(), "O", &imap_conn_obj, php_imap_ce) == FAILURE) { RETURN_THROWS(); @@ -1449,13 +1495,8 @@ PHP_FUNCTION(imap_check) } if (imap_conn_struct->imap_stream->mailbox) { - rfc822_date(date); object_init(return_value); - add_property_string(return_value, "Date", date); - add_property_string(return_value, "Driver", imap_conn_struct->imap_stream->dtb->name); - add_property_string(return_value, "Mailbox", imap_conn_struct->imap_stream->mailbox); - add_property_long(return_value, "Nmsgs", imap_conn_struct->imap_stream->nmsgs); - add_property_long(return_value, "Recent", imap_conn_struct->imap_stream->recent); + php_imap_populate_mailbox_properties_object(return_value, imap_conn_struct->imap_stream); } else { RETURN_FALSE; } @@ -1555,33 +1596,83 @@ PHP_FUNCTION(imap_headerinfo) /* now run through properties that are only going to be returned from a server, not text headers */ - add_property_string(return_value, "Recent", cache->recent ? (cache->seen ? "R": "N") : " "); - add_property_string(return_value, "Unseen", (cache->recent | cache->seen) ? " " : "U"); - add_property_string(return_value, "Flagged", cache->flagged ? "F" : " "); - add_property_string(return_value, "Answered", cache->answered ? "A" : " "); - add_property_string(return_value, "Deleted", cache->deleted ? "D" : " "); - add_property_string(return_value, "Draft", cache->draft ? "X" : " "); + zend_update_property_str( + Z_OBJCE_P(return_value), Z_OBJ_P(return_value), + "Recent", strlen("Recent"), + cache->recent ? ( + cache->seen ? ZSTR_CHAR('R') : ZSTR_CHAR('N') + ) : ZSTR_CHAR(' ') + ); + zend_update_property_str( + Z_OBJCE_P(return_value), Z_OBJ_P(return_value), + "Unseen", strlen("Unseen"), + (cache->recent | cache->seen) ? ZSTR_CHAR(' ') : ZSTR_CHAR('U') + ); + zend_update_property_str( + Z_OBJCE_P(return_value), Z_OBJ_P(return_value), + "Flagged", strlen("Flagged"), + cache->flagged ? ZSTR_CHAR('F') : ZSTR_CHAR(' ') + ); + zend_update_property_str( + Z_OBJCE_P(return_value), Z_OBJ_P(return_value), + "Answered", strlen("Answered"), + cache->answered ? ZSTR_CHAR('A') : ZSTR_CHAR(' ') + ); + zend_update_property_str( + Z_OBJCE_P(return_value), Z_OBJ_P(return_value), + "Deleted", strlen("Deleted"), + cache->deleted ? ZSTR_CHAR('D') : ZSTR_CHAR(' ') + ); + zend_update_property_str( + Z_OBJCE_P(return_value), Z_OBJ_P(return_value), + "Draft", strlen("Draft"), + cache->draft ? ZSTR_CHAR('X') : ZSTR_CHAR(' ') + ); snprintf(dummy, sizeof(dummy), "%4ld", cache->msgno); - add_property_string(return_value, "Msgno", dummy); + zend_update_property_string( + Z_OBJCE_P(return_value), Z_OBJ_P(return_value), + "Msgno", strlen("Msgno"), + dummy + ); mail_date(dummy, cache); - add_property_string(return_value, "MailDate", dummy); + zend_update_property_string( + Z_OBJCE_P(return_value), Z_OBJ_P(return_value), + "MailDate", strlen("MailDate"), + dummy + ); snprintf(dummy, sizeof(dummy), "%ld", cache->rfc822_size); - add_property_string(return_value, "Size", dummy); - - add_property_long(return_value, "udate", mail_longdate(cache)); + zend_update_property_string( + Z_OBJCE_P(return_value), Z_OBJ_P(return_value), + "Size", strlen("Size"), + dummy + ); + + zend_update_property_long( + Z_OBJCE_P(return_value), Z_OBJ_P(return_value), + "udate", strlen("udate"), + mail_longdate(cache) + ); if (en->from && fromlength) { fulladdress[0] = 0x00; mail_fetchfrom(fulladdress, imap_conn_struct->imap_stream, msgno, fromlength); - add_property_string(return_value, "fetchfrom", fulladdress); + zend_update_property_string( + Z_OBJCE_P(return_value), Z_OBJ_P(return_value), + "fetchfrom", strlen("fetchfrom"), + fulladdress + ); } if (en->subject && subjectlength) { fulladdress[0] = 0x00; mail_fetchsubject(fulladdress, imap_conn_struct->imap_stream, msgno, subjectlength); - add_property_string(return_value, "fetchsubject", fulladdress); + zend_update_property_string( + Z_OBJCE_P(return_value), Z_OBJ_P(return_value), + "fetchsubject", strlen("fetchsubject"), + fulladdress + ); } } /* }}} */ @@ -1654,7 +1745,6 @@ PHP_FUNCTION(imap_getsubscribed) zend_string *ref, *pat; php_imap_object *imap_conn_struct; FOBJECTLIST *cur=NIL; - char *delim=NIL; if (zend_parse_parameters(ZEND_NUM_ARGS(), "OSS", &imap_conn_obj, php_imap_ce, &ref, &pat) == FAILURE) { RETURN_THROWS(); @@ -1674,24 +1764,14 @@ PHP_FUNCTION(imap_getsubscribed) } array_init(return_value); - delim = safe_emalloc(2, sizeof(char), 0); cur=IMAPG(imap_sfolder_objects); while (cur != NIL) { object_init(&mboxob); - add_property_string(&mboxob, "name", (char*)cur->LTEXT); - add_property_long(&mboxob, "attributes", cur->attributes); -#ifdef IMAP41 - delim[0] = (char)cur->delimiter; - delim[1] = 0; - add_property_string(&mboxob, "delimiter", delim); -#else - add_property_string(&mboxob, "delimiter", cur->delimiter); -#endif + php_imap_populate_mailbox_object(&mboxob, cur); php_imap_list_add_object(return_value, &mboxob); cur=cur->next; } mail_free_foblist (&IMAPG(imap_sfolder_objects), &IMAPG(imap_sfolder_objects_tail)); - efree(delim); IMAPG(folderlist_style) = FLIST_ARRAY; /* reset to default */ } /* }}} */ @@ -2001,7 +2081,6 @@ PHP_FUNCTION(imap_mailboxmsginfo) { zval *imap_conn_obj; php_imap_object *imap_conn_struct; - char date[100]; unsigned long msgno; zend_ulong unreadmsg = 0, deletedmsg = 0, msize = 0; @@ -2027,15 +2106,23 @@ PHP_FUNCTION(imap_mailboxmsginfo) } msize = msize + cache->rfc822_size; } - add_property_long(return_value, "Unread", unreadmsg); - add_property_long(return_value, "Deleted", deletedmsg); - add_property_long(return_value, "Nmsgs", imap_conn_struct->imap_stream->nmsgs); - add_property_long(return_value, "Size", msize); - rfc822_date(date); - add_property_string(return_value, "Date", date); - add_property_string(return_value, "Driver", imap_conn_struct->imap_stream->dtb->name); - add_property_string(return_value, "Mailbox", imap_conn_struct->imap_stream->mailbox); - add_property_long(return_value, "Recent", imap_conn_struct->imap_stream->recent); + zend_update_property_long( + Z_OBJCE_P(return_value), Z_OBJ_P(return_value), + "Unread", strlen("Unread"), + unreadmsg + ); + zend_update_property_long( + Z_OBJCE_P(return_value), Z_OBJ_P(return_value), + "Deleted", strlen("Deleted"), + deletedmsg + ); + zend_update_property_long( + Z_OBJCE_P(return_value), Z_OBJ_P(return_value), + "Size", strlen("Size"), + msize + ); + + php_imap_populate_mailbox_properties_object(return_value, imap_conn_struct->imap_stream); } /* }}} */ @@ -2078,13 +2165,55 @@ PHP_FUNCTION(imap_rfc822_write_address) } /* }}} */ +static void php_imap_construct_address_object(zval *z_object, const ADDRESS *address) +{ + if (address->mailbox) { + zend_update_property_string( + Z_OBJCE_P(z_object), Z_OBJ_P(z_object), + "mailbox", strlen("mailbox"), + address->mailbox + ); + } + if (address->host) { + zend_update_property_string( + Z_OBJCE_P(z_object), Z_OBJ_P(z_object), + "host", strlen("host"), + address->host + ); + } + if (address->personal) { + zend_update_property_string( + Z_OBJCE_P(z_object), Z_OBJ_P(z_object), + "personal", strlen("personal"), + address->personal + ); + } + if (address->adl) { + zend_update_property_string( + Z_OBJCE_P(z_object), Z_OBJ_P(z_object), + "adl", strlen("adl"), + address->adl + ); + } +} + +static void php_imap_construct_list_of_addresses(zval *list, const ADDRESS *const address_list) +{ + const ADDRESS *current_address = address_list; + do { + zval tmp_object; + object_init(&tmp_object); + php_imap_construct_address_object(&tmp_object, current_address); + php_imap_list_add_object(list, &tmp_object); + } while ((current_address = current_address->next)); +} + /* {{{ Parses an address string */ PHP_FUNCTION(imap_rfc822_parse_adrlist) { - zval tovals; zend_string *str, *defaulthost; char *str_copy; - ADDRESS *addresstmp; + ADDRESS *address_list; ENVELOPE *env; if (zend_parse_parameters(ZEND_NUM_ARGS(), "SS", &str, &defaulthost) == FAILURE) { @@ -2100,28 +2229,14 @@ PHP_FUNCTION(imap_rfc822_parse_adrlist) array_init(return_value); - addresstmp = env->to; + address_list = env->to; - if (addresstmp) do { - object_init(&tovals); - if (addresstmp->mailbox) { - add_property_string(&tovals, "mailbox", addresstmp->mailbox); - } - if (addresstmp->host) { - add_property_string(&tovals, "host", addresstmp->host); - } - if (addresstmp->personal) { - add_property_string(&tovals, "personal", addresstmp->personal); - } - if (addresstmp->adl) { - add_property_string(&tovals, "adl", addresstmp->adl); - } - php_imap_list_add_object(return_value, &tovals); - } while ((addresstmp = addresstmp->next)); + if (address_list) { + php_imap_construct_list_of_addresses(return_value, address_list); + } mail_free_envelope(&env); } -/* }}} */ /* {{{ Convert a mime-encoded text to UTF-8 */ PHP_FUNCTION(imap_utf8) @@ -2671,24 +2786,48 @@ PHP_FUNCTION(imap_status) RETURN_THROWS(); } - object_init(return_value); - if (mail_status(imap_conn_struct->imap_stream, ZSTR_VAL(mbx), flags)) { - add_property_long(return_value, "flags", IMAPG(status_flags)); + object_init(return_value); + + zend_update_property_long( + Z_OBJCE_P(return_value), Z_OBJ_P(return_value), + "flags", strlen("flags"), + IMAPG(status_flags) + ); if (IMAPG(status_flags) & SA_MESSAGES) { - add_property_long(return_value, "messages", IMAPG(status_messages)); + zend_update_property_long( + Z_OBJCE_P(return_value), Z_OBJ_P(return_value), + "messages", strlen("messages"), + IMAPG(status_messages) + ); } if (IMAPG(status_flags) & SA_RECENT) { - add_property_long(return_value, "recent", IMAPG(status_recent)); + zend_update_property_long( + Z_OBJCE_P(return_value), Z_OBJ_P(return_value), + "recent", strlen("recent"), + IMAPG(status_recent) + ); } if (IMAPG(status_flags) & SA_UNSEEN) { - add_property_long(return_value, "unseen", IMAPG(status_unseen)); + zend_update_property_long( + Z_OBJCE_P(return_value), Z_OBJ_P(return_value), + "unseen", strlen("unseen"), + IMAPG(status_unseen) + ); } if (IMAPG(status_flags) & SA_UIDNEXT) { - add_property_long(return_value, "uidnext", IMAPG(status_uidnext)); + zend_update_property_long( + Z_OBJCE_P(return_value), Z_OBJ_P(return_value), + "uidnext", strlen("uidnext"), + IMAPG(status_uidnext) + ); } if (IMAPG(status_flags) & SA_UIDVALIDITY) { - add_property_long(return_value, "uidvalidity", IMAPG(status_uidvalidity)); + zend_update_property_long( + Z_OBJCE_P(return_value), Z_OBJ_P(return_value), + "uidvalidity", strlen("uidvalidity"), + IMAPG(status_uidvalidity) + ); } } else { RETURN_FALSE; @@ -2696,108 +2835,213 @@ PHP_FUNCTION(imap_status) } /* }}} */ -/* {{{ Read the structure of a specified body section of a specific message */ -PHP_FUNCTION(imap_bodystruct) +static void php_imap_populate_body_struct_object(zval *z_object, const BODY *body) { - zval *imap_conn_obj; - zend_long msgno; - zend_string *section; - php_imap_object *imap_conn_struct; - zval parametres, param, dparametres, dparam; - PARAMETER *par, *dpar; - BODY *body; - - if (zend_parse_parameters(ZEND_NUM_ARGS(), "OlS", &imap_conn_obj, php_imap_ce, &msgno, §ion) == FAILURE) { - RETURN_THROWS(); - } - - GET_IMAP_STREAM(imap_conn_struct, imap_conn_obj); - - PHP_IMAP_CHECK_MSGNO(msgno, 2); - - body=mail_body(imap_conn_struct->imap_stream, msgno, (unsigned char*)ZSTR_VAL(section)); - if (body == NULL) { - RETURN_FALSE; - } - - object_init(return_value); if (body->type <= TYPEMAX) { - add_property_long(return_value, "type", body->type); + zend_update_property_long( + Z_OBJCE_P(z_object), Z_OBJ_P(z_object), + "type", strlen("type"), + body->type + ); } + if (body->encoding <= ENCMAX) { - add_property_long(return_value, "encoding", body->encoding); + zend_update_property_long( + Z_OBJCE_P(z_object), Z_OBJ_P(z_object), + "encoding", strlen("encoding"), + body->encoding + ); } if (body->subtype) { - add_property_long(return_value, "ifsubtype", 1); - add_property_string(return_value, "subtype", body->subtype); + zend_update_property_long( + Z_OBJCE_P(z_object), Z_OBJ_P(z_object), + "ifsubtype", strlen("ifsubtype"), + 1 + ); + zend_update_property_string( + Z_OBJCE_P(z_object), Z_OBJ_P(z_object), + "subtype", strlen("subtype"), + body->subtype + ); } else { - add_property_long(return_value, "ifsubtype", 0); + zend_update_property_long( + Z_OBJCE_P(z_object), Z_OBJ_P(z_object), + "ifsubtype", strlen("ifsubtype"), + 0 + ); } if (body->description) { - add_property_long(return_value, "ifdescription", 1); - add_property_string(return_value, "description", body->description); + zend_update_property_long( + Z_OBJCE_P(z_object), Z_OBJ_P(z_object), + "ifdescription", strlen("ifdescription"), + 1 + ); + zend_update_property_string( + Z_OBJCE_P(z_object), Z_OBJ_P(z_object), + "description", strlen("description"), + body->description + ); } else { - add_property_long(return_value, "ifdescription", 0); + zend_update_property_long( + Z_OBJCE_P(z_object), Z_OBJ_P(z_object), + "ifdescription", strlen("ifdescription"), + 0 + ); } + if (body->id) { - add_property_long(return_value, "ifid", 1); - add_property_string(return_value, "id", body->id); + zend_update_property_long( + Z_OBJCE_P(z_object), Z_OBJ_P(z_object), + "ifid", strlen("ifid"), + 1 + ); + zend_update_property_string( + Z_OBJCE_P(z_object), Z_OBJ_P(z_object), + "id", strlen("id"), + body->id + ); } else { - add_property_long(return_value, "ifid", 0); + zend_update_property_long( + Z_OBJCE_P(z_object), Z_OBJ_P(z_object), + "ifid", strlen("ifid"), + 0 + ); } if (body->size.lines) { - add_property_long(return_value, "lines", body->size.lines); + zend_update_property_long( + Z_OBJCE_P(z_object), Z_OBJ_P(z_object), + "lines", strlen("lines"), + body->size.lines + ); } + if (body->size.bytes) { - add_property_long(return_value, "bytes", body->size.bytes); + zend_update_property_long( + Z_OBJCE_P(z_object), Z_OBJ_P(z_object), + "bytes", strlen("bytes"), + body->size.bytes + ); } + #ifdef IMAP41 if (body->disposition.type) { - add_property_long(return_value, "ifdisposition", 1); - add_property_string(return_value, "disposition", body->disposition.type); + zend_update_property_long( + Z_OBJCE_P(z_object), Z_OBJ_P(z_object), + "ifdisposition", strlen("ifdisposition"), + 1 + ); + zend_update_property_string( + Z_OBJCE_P(z_object), Z_OBJ_P(z_object), + "disposition", strlen("disposition"), + body->disposition.type + ); } else { - add_property_long(return_value, "ifdisposition", 0); + zend_update_property_long( + Z_OBJCE_P(z_object), Z_OBJ_P(z_object), + "ifdisposition", strlen("ifdisposition"), + 0 + ); } if (body->disposition.parameter) { - dpar = body->disposition.parameter; - add_property_long(return_value, "ifdparameters", 1); - array_init(&dparametres); + PARAMETER *disposition_parameter = body->disposition.parameter; + zval z_disposition_parameter_list; + + zend_update_property_long( + Z_OBJCE_P(z_object), Z_OBJ_P(z_object), + "ifdparameters", strlen("ifdparameters"), + 1 + ); + array_init(&z_disposition_parameter_list); do { - object_init(&dparam); - add_property_string(&dparam, "attribute", dpar->attribute); - add_property_string(&dparam, "value", dpar->value); - php_imap_list_add_object(&dparametres, &dparam); - } while ((dpar = dpar->next)); - php_imap_hash_add_object(return_value, "dparameters", &dparametres); + zval z_disposition_parameter; + object_init(&z_disposition_parameter); + zend_update_property_string( + Z_OBJCE_P(&z_disposition_parameter), Z_OBJ_P(&z_disposition_parameter), + "attribute", strlen("attribute"), + disposition_parameter->attribute + ); + zend_update_property_string( + Z_OBJCE_P(&z_disposition_parameter), Z_OBJ_P(&z_disposition_parameter), + "value", strlen("value"), + disposition_parameter->value + ); + php_imap_list_add_object(&z_disposition_parameter_list, &z_disposition_parameter); + } while ((disposition_parameter = disposition_parameter->next)); + php_imap_hash_add_object(z_object, "dparameters", &z_disposition_parameter_list); } else { - add_property_long(return_value, "ifdparameters", 0); + zend_update_property_long( + Z_OBJCE_P(z_object), Z_OBJ_P(z_object), + "ifdparameters", strlen("ifdparameters"), + 0 + ); } #endif - if ((par = body->parameter)) { - add_property_long(return_value, "ifparameters", 1); + PARAMETER *body_parameters = body->parameter; + zval z_body_parameter_list; - array_init(¶metres); - do { - object_init(¶m); - if (par->attribute) { - add_property_string(¶m, "attribute", par->attribute); - } - if (par->value) { - add_property_string(¶m, "value", par->value); - } + if (body_parameters) { + zend_update_property_long( + Z_OBJCE_P(z_object), Z_OBJ_P(z_object), + "ifparameters", strlen("ifparameters"), + 1 + ); - php_imap_list_add_object(¶metres, ¶m); - } while ((par = par->next)); + array_init(&z_body_parameter_list); + do { + zval z_body_parameter; + object_init(&z_body_parameter); + zend_update_property_string( + Z_OBJCE_P(&z_body_parameter), Z_OBJ_P(&z_body_parameter), + "attribute", strlen("attribute"), + body_parameters->attribute + ); + zend_update_property_string( + Z_OBJCE_P(&z_body_parameter), Z_OBJ_P(&z_body_parameter), + "value", strlen("value"), + body_parameters->value + ); + + php_imap_list_add_object(&z_body_parameter_list, &z_body_parameter); + } while ((body_parameters = body_parameters->next)); } else { - object_init(¶metres); - add_property_long(return_value, "ifparameters", 0); + object_init(&z_body_parameter_list); + zend_update_property_long( + Z_OBJCE_P(z_object), Z_OBJ_P(z_object), + "ifparameters", strlen("ifparameters"), + 0 + ); + } + php_imap_hash_add_object(z_object, "parameters", &z_body_parameter_list); +} + +/* {{{ Read the structure of a specified body section of a specific message */ +PHP_FUNCTION(imap_bodystruct) +{ + zval *imap_conn_obj; + zend_long msgno; + zend_string *section; + php_imap_object *imap_conn_struct; + + if (zend_parse_parameters(ZEND_NUM_ARGS(), "OlS", &imap_conn_obj, php_imap_ce, &msgno, §ion) == FAILURE) { + RETURN_THROWS(); } - php_imap_hash_add_object(return_value, "parameters", ¶metres); + + GET_IMAP_STREAM(imap_conn_struct, imap_conn_obj); + + PHP_IMAP_CHECK_MSGNO(msgno, 2); + + const BODY *body = mail_body(imap_conn_struct->imap_stream, msgno, (unsigned char*)ZSTR_VAL(section)); + if (body == NULL) { + RETURN_FALSE; + } + + object_init(return_value); + php_imap_populate_body_struct_object(return_value, body); } /* }}} */ @@ -2837,46 +3081,118 @@ PHP_FUNCTION(imap_fetch_overview) for (i = 1; i <= imap_conn_struct->imap_stream->nmsgs; i++) { if (((elt = mail_elt (imap_conn_struct->imap_stream, i))->sequence) && (env = mail_fetch_structure (imap_conn_struct->imap_stream, i, NIL, NIL))) { + + // TODO Use part _php_make_header_object function? object_init(&myoverview); if (env->subject) { - add_property_string(&myoverview, "subject", env->subject); + zend_update_property_string( + Z_OBJCE_P(&myoverview), Z_OBJ_P(&myoverview), + "subject", strlen("subject"), + env->subject + ); } if (env->from) { env->from->next=NULL; address =_php_rfc822_write_address(env->from); if (address) { - add_property_str(&myoverview, "from", address); + zend_update_property_str( + Z_OBJCE_P(&myoverview), Z_OBJ_P(&myoverview), + "from", strlen("from"), + address + ); + zend_string_release(address); } } if (env->to) { env->to->next = NULL; address = _php_rfc822_write_address(env->to); if (address) { - add_property_str(&myoverview, "to", address); + zend_update_property_str( + Z_OBJCE_P(&myoverview), Z_OBJ_P(&myoverview), + "to", strlen("to"), + address + ); + zend_string_release(address); } } if (env->date) { - add_property_string(&myoverview, "date", (char*)env->date); + zend_update_property_string( + Z_OBJCE_P(&myoverview), Z_OBJ_P(&myoverview), + "date", strlen("date"), + (char*)env->date + ); } if (env->message_id) { - add_property_string(&myoverview, "message_id", env->message_id); + zend_update_property_string( + Z_OBJCE_P(&myoverview), Z_OBJ_P(&myoverview), + "message_id", strlen("message_id"), + env->message_id + ); } if (env->references) { - add_property_string(&myoverview, "references", env->references); + zend_update_property_string( + Z_OBJCE_P(&myoverview), Z_OBJ_P(&myoverview), + "references", strlen("references"), + env->references + ); } if (env->in_reply_to) { - add_property_string(&myoverview, "in_reply_to", env->in_reply_to); + zend_update_property_string( + Z_OBJCE_P(&myoverview), Z_OBJ_P(&myoverview), + "in_reply_to", strlen("in_reply_to"), + env->in_reply_to + ); } - add_property_long(&myoverview, "size", elt->rfc822_size); - add_property_long(&myoverview, "uid", mail_uid(imap_conn_struct->imap_stream, i)); - add_property_long(&myoverview, "msgno", i); - add_property_long(&myoverview, "recent", elt->recent); - add_property_long(&myoverview, "flagged", elt->flagged); - add_property_long(&myoverview, "answered", elt->answered); - add_property_long(&myoverview, "deleted", elt->deleted); - add_property_long(&myoverview, "seen", elt->seen); - add_property_long(&myoverview, "draft", elt->draft); - add_property_long(&myoverview, "udate", mail_longdate(elt)); + zend_update_property_long( + Z_OBJCE_P(&myoverview), Z_OBJ_P(&myoverview), + "size", strlen("size"), + elt->rfc822_size + ); + zend_update_property_long( + Z_OBJCE_P(&myoverview), Z_OBJ_P(&myoverview), + "uid", strlen("uid"), + mail_uid(imap_conn_struct->imap_stream, i) + ); + zend_update_property_long( + Z_OBJCE_P(&myoverview), Z_OBJ_P(&myoverview), + "msgno", strlen("msgno"), + i + ); + zend_update_property_long( + Z_OBJCE_P(&myoverview), Z_OBJ_P(&myoverview), + "recent", strlen("recent"), + elt->recent + ); + zend_update_property_long( + Z_OBJCE_P(&myoverview), Z_OBJ_P(&myoverview), + "flagged", strlen("flagged"), + elt->flagged + ); + zend_update_property_long( + Z_OBJCE_P(&myoverview), Z_OBJ_P(&myoverview), + "answered", strlen("answered"), + elt->answered + ); + zend_update_property_long( + Z_OBJCE_P(&myoverview), Z_OBJ_P(&myoverview), + "deleted", strlen("deleted"), + elt->deleted + ); + zend_update_property_long( + Z_OBJCE_P(&myoverview), Z_OBJ_P(&myoverview), + "seen", strlen("seen"), + elt->seen + ); + zend_update_property_long( + Z_OBJCE_P(&myoverview), Z_OBJ_P(&myoverview), + "draft", strlen("draft"), + elt->draft + ); + zend_update_property_long( + Z_OBJCE_P(&myoverview), Z_OBJ_P(&myoverview), + "udate", strlen("udate"), + mail_longdate(elt) + ); php_imap_list_add_object(return_value, &myoverview); } } @@ -3707,6 +4023,21 @@ PHP_FUNCTION(imap_last_error) } /* }}} */ +static void php_imap_populate_mime_header_object(zval *z_object, const char *charset, const char *content) +{ + ZEND_ASSERT(z_object && "Object must be initialized"); + zend_update_property_string( + Z_OBJCE_P(z_object), Z_OBJ_P(z_object), + "charset", strlen("charset"), + charset + ); + zend_update_property_string( + Z_OBJCE_P(z_object), Z_OBJ_P(z_object), + "text", strlen("text"), + content + ); +} + /* {{{ Decode mime header element in accordance with RFC 2047 and return array of objects containing 'charset' encoding and decoded 'text' */ PHP_FUNCTION(imap_mime_header_decode) { @@ -3736,8 +4067,7 @@ PHP_FUNCTION(imap_mime_header_decode) memcpy(text, &string[offset], charset_token-offset); text[charset_token - offset] = 0x00; object_init(&myobject); - add_property_string(&myobject, "charset", "default"); - add_property_string(&myobject, "text", text); + php_imap_populate_mime_header_object(&myobject, "default", text); zend_hash_next_index_insert(Z_ARRVAL_P(return_value), &myobject); } if ((encoding_token = (zend_long)php_memnstr(&string[charset_token+2], "?", 1, string+end))) { /* Find token for encoding */ @@ -3762,8 +4092,7 @@ PHP_FUNCTION(imap_mime_header_decode) RETURN_FALSE; } object_init(&myobject); - add_property_string(&myobject, "charset", charset); - add_property_string(&myobject, "text", decode); + php_imap_populate_mime_header_object(&myobject, charset, decode); zend_hash_next_index_insert(Z_ARRVAL_P(return_value), &myobject); /* only free decode if it was allocated by rfc822_qprint or rfc822_base64 */ @@ -3791,8 +4120,7 @@ PHP_FUNCTION(imap_mime_header_decode) memcpy(text, &string[charset_token], end - charset_token); /* Extract unencoded text from string */ text[end - charset_token] = 0x00; object_init(&myobject); - add_property_string(&myobject, "charset", "default"); - add_property_string(&myobject, "text", text); + php_imap_populate_mime_header_object(&myobject, "default", text); zend_hash_next_index_insert(Z_ARRVAL_P(return_value), &myobject); offset = end; /* We have reached the end of the string. */ @@ -3909,25 +4237,16 @@ static zend_string* _php_rfc822_write_address(ADDRESS *addresslist) /* }}} */ #endif /* {{{ _php_imap_parse_address */ -static zend_string* _php_imap_parse_address (ADDRESS *addresslist, zval *paddress) +static zend_string* _php_imap_parse_address (ADDRESS *address_list, zval *paddress) { zend_string *fulladdress; ADDRESS *addresstmp; - zval tmpvals; - addresstmp = addresslist; + addresstmp = address_list; fulladdress = _php_rfc822_write_address(addresstmp); - addresstmp = addresslist; - do { - object_init(&tmpvals); - if (addresstmp->personal) add_property_string(&tmpvals, "personal", addresstmp->personal); - if (addresstmp->adl) add_property_string(&tmpvals, "adl", addresstmp->adl); - if (addresstmp->mailbox) add_property_string(&tmpvals, "mailbox", addresstmp->mailbox); - if (addresstmp->host) add_property_string(&tmpvals, "host", addresstmp->host); - php_imap_list_add_object(paddress, &tmpvals); - } while ((addresstmp = addresstmp->next)); + php_imap_construct_list_of_addresses(paddress, address_list); return fulladdress; } /* }}} */ @@ -3940,22 +4259,83 @@ static void _php_make_header_object(zval *myzvalue, ENVELOPE *en) object_init(myzvalue); - if (en->remail) add_property_string(myzvalue, "remail", en->remail); - if (en->date) add_property_string(myzvalue, "date", (char*)en->date); - if (en->date) add_property_string(myzvalue, "Date", (char*)en->date); - if (en->subject) add_property_string(myzvalue, "subject", en->subject); - if (en->subject) add_property_string(myzvalue, "Subject", en->subject); - if (en->in_reply_to) add_property_string(myzvalue, "in_reply_to", en->in_reply_to); - if (en->message_id) add_property_string(myzvalue, "message_id", en->message_id); - if (en->newsgroups) add_property_string(myzvalue, "newsgroups", en->newsgroups); - if (en->followup_to) add_property_string(myzvalue, "followup_to", en->followup_to); - if (en->references) add_property_string(myzvalue, "references", en->references); + if (en->remail) { + zend_update_property_string( + Z_OBJCE_P(myzvalue), Z_OBJ_P(myzvalue), + "remail", strlen("remail"), + en->remail + ); + } + if (en->date) { + zend_update_property_string( + Z_OBJCE_P(myzvalue), Z_OBJ_P(myzvalue), + "date", strlen("date"), + (char*)en->date + ); + zend_update_property_string( + Z_OBJCE_P(myzvalue), Z_OBJ_P(myzvalue), + "Date", strlen("Date"), + (char*)en->date + ); + } + if (en->subject) { + zend_update_property_string( + Z_OBJCE_P(myzvalue), Z_OBJ_P(myzvalue), + "subject", strlen("subject"), + en->subject + ); + zend_update_property_string( + Z_OBJCE_P(myzvalue), Z_OBJ_P(myzvalue), + "Subject", strlen("Subject"), + en->subject + ); + } + if (en->in_reply_to) { + zend_update_property_string( + Z_OBJCE_P(myzvalue), Z_OBJ_P(myzvalue), + "in_reply_to", strlen("in_reply_to"), + en->in_reply_to + ); + } + if (en->message_id) { + zend_update_property_string( + Z_OBJCE_P(myzvalue), Z_OBJ_P(myzvalue), + "message_id", strlen("message_id"), + en->message_id + ); + } + if (en->newsgroups) { + zend_update_property_string( + Z_OBJCE_P(myzvalue), Z_OBJ_P(myzvalue), + "newsgroups", strlen("newsgroups"), + en->newsgroups + ); + } + if (en->followup_to) { + zend_update_property_string( + Z_OBJCE_P(myzvalue), Z_OBJ_P(myzvalue), + "followup_to", strlen("followup_to"), + en->followup_to + ); + } + if (en->references) { + zend_update_property_string( + Z_OBJCE_P(myzvalue), Z_OBJ_P(myzvalue), + "references", strlen("references"), + en->references + ); + } if (en->to) { array_init(&paddress); fulladdress = _php_imap_parse_address(en->to, &paddress); if (fulladdress) { - add_property_str(myzvalue, "toaddress", fulladdress); + zend_update_property_str( + Z_OBJCE_P(myzvalue), Z_OBJ_P(myzvalue), + "toaddress", strlen("toaddress"), + fulladdress + ); + zend_string_release(fulladdress); } php_imap_hash_add_object(myzvalue, "to", &paddress); } @@ -3964,7 +4344,12 @@ static void _php_make_header_object(zval *myzvalue, ENVELOPE *en) array_init(&paddress); fulladdress = _php_imap_parse_address(en->from, &paddress); if (fulladdress) { - add_property_str(myzvalue, "fromaddress", fulladdress); + zend_update_property_str( + Z_OBJCE_P(myzvalue), Z_OBJ_P(myzvalue), + "fromaddress", strlen("fromaddress"), + fulladdress + ); + zend_string_release(fulladdress); } php_imap_hash_add_object(myzvalue, "from", &paddress); } @@ -3973,7 +4358,12 @@ static void _php_make_header_object(zval *myzvalue, ENVELOPE *en) array_init(&paddress); fulladdress = _php_imap_parse_address(en->cc, &paddress); if (fulladdress) { - add_property_str(myzvalue, "ccaddress", fulladdress); + zend_update_property_str( + Z_OBJCE_P(myzvalue), Z_OBJ_P(myzvalue), + "ccaddress", strlen("ccaddress"), + fulladdress + ); + zend_string_release(fulladdress); } php_imap_hash_add_object(myzvalue, "cc", &paddress); } @@ -3982,7 +4372,12 @@ static void _php_make_header_object(zval *myzvalue, ENVELOPE *en) array_init(&paddress); fulladdress = _php_imap_parse_address(en->bcc, &paddress); if (fulladdress) { - add_property_str(myzvalue, "bccaddress", fulladdress); + zend_update_property_str( + Z_OBJCE_P(myzvalue), Z_OBJ_P(myzvalue), + "bccaddress", strlen("bccaddress"), + fulladdress + ); + zend_string_release(fulladdress); } php_imap_hash_add_object(myzvalue, "bcc", &paddress); } @@ -3991,7 +4386,12 @@ static void _php_make_header_object(zval *myzvalue, ENVELOPE *en) array_init(&paddress); fulladdress = _php_imap_parse_address(en->reply_to, &paddress); if (fulladdress) { - add_property_str(myzvalue, "reply_toaddress", fulladdress); + zend_update_property_str( + Z_OBJCE_P(myzvalue), Z_OBJ_P(myzvalue), + "reply_toaddress", strlen("reply_toaddress"), + fulladdress + ); + zend_string_release(fulladdress); } php_imap_hash_add_object(myzvalue, "reply_to", &paddress); } @@ -4000,7 +4400,12 @@ static void _php_make_header_object(zval *myzvalue, ENVELOPE *en) array_init(&paddress); fulladdress = _php_imap_parse_address(en->sender, &paddress); if (fulladdress) { - add_property_str(myzvalue, "senderaddress", fulladdress); + zend_update_property_str( + Z_OBJCE_P(myzvalue), Z_OBJ_P(myzvalue), + "senderaddress", strlen("senderaddress"), + fulladdress + ); + zend_string_release(fulladdress); } php_imap_hash_add_object(myzvalue, "sender", &paddress); } @@ -4009,7 +4414,12 @@ static void _php_make_header_object(zval *myzvalue, ENVELOPE *en) array_init(&paddress); fulladdress = _php_imap_parse_address(en->return_path, &paddress); if (fulladdress) { - add_property_str(myzvalue, "return_pathaddress", fulladdress); + zend_update_property_str( + Z_OBJCE_P(myzvalue), Z_OBJ_P(myzvalue), + "return_pathaddress", strlen("return_pathaddress"), + fulladdress + ); + zend_string_release(fulladdress); } php_imap_hash_add_object(myzvalue, "return_path", &paddress); // From rebase might need? @@ -4019,113 +4429,34 @@ static void _php_make_header_object(zval *myzvalue, ENVELOPE *en) /* }}} */ /* {{{ _php_imap_add_body */ -void _php_imap_add_body(zval *arg, BODY *body) +void _php_imap_add_body(zval *arg, const BODY *body) { - zval parametres, param, dparametres, dparam; - PARAMETER *par, *dpar; - PART *part; - - if (body->type <= TYPEMAX) { - add_property_long(arg, "type", body->type); - } - - if (body->encoding <= ENCMAX) { - add_property_long(arg, "encoding", body->encoding); - } - - if (body->subtype) { - add_property_long(arg, "ifsubtype", 1); - add_property_string(arg, "subtype", body->subtype); - } else { - add_property_long(arg, "ifsubtype", 0); - } - - if (body->description) { - add_property_long(arg, "ifdescription", 1); - add_property_string(arg, "description", body->description); - } else { - add_property_long(arg, "ifdescription", 0); - } - - if (body->id) { - add_property_long(arg, "ifid", 1); - add_property_string(arg, "id", body->id); - } else { - add_property_long(arg, "ifid", 0); - } - - if (body->size.lines) { - add_property_long(arg, "lines", body->size.lines); - } - - if (body->size.bytes) { - add_property_long(arg, "bytes", body->size.bytes); - } - -#ifdef IMAP41 - if (body->disposition.type) { - add_property_long(arg, "ifdisposition", 1); - add_property_string(arg, "disposition", body->disposition.type); - } else { - add_property_long(arg, "ifdisposition", 0); - } - - if (body->disposition.parameter) { - dpar = body->disposition.parameter; - add_property_long(arg, "ifdparameters", 1); - array_init(&dparametres); - do { - object_init(&dparam); - add_property_string(&dparam, "attribute", dpar->attribute); - add_property_string(&dparam, "value", dpar->value); - php_imap_list_add_object(&dparametres, &dparam); - } while ((dpar = dpar->next)); - php_imap_hash_add_object(arg, "dparameters", &dparametres); - } else { - add_property_long(arg, "ifdparameters", 0); - } -#endif - - if ((par = body->parameter)) { - add_property_long(arg, "ifparameters", 1); - - array_init(¶metres); - do { - object_init(¶m); - if (par->attribute) { - add_property_string(¶m, "attribute", par->attribute); - } - if (par->value) { - add_property_string(¶m, "value", par->value); - } - - php_imap_list_add_object(¶metres, ¶m); - } while ((par = par->next)); - } else { - object_init(¶metres); - add_property_long(arg, "ifparameters", 0); - } - php_imap_hash_add_object(arg, "parameters", ¶metres); + php_imap_populate_body_struct_object(arg, body); /* multipart message ? */ if (body->type == TYPEMULTIPART) { - array_init(¶metres); - for (part = body->CONTENT_PART; part; part = part->next) { - object_init(¶m); - _php_imap_add_body(¶m, &part->body); - php_imap_list_add_object(¶metres, ¶m); + zval z_content_part_list; + + array_init(&z_content_part_list); + for (const PART *content_part = body->CONTENT_PART; content_part; content_part = content_part->next) { + zval z_content_part; + object_init(&z_content_part); + _php_imap_add_body(&z_content_part, &content_part->body); + php_imap_list_add_object(&z_content_part_list, &z_content_part); } - php_imap_hash_add_object(arg, "parts", ¶metres); + php_imap_hash_add_object(arg, "parts", &z_content_part_list); } /* encapsulated message ? */ if ((body->type == TYPEMESSAGE) && (!strcasecmp(body->subtype, "rfc822"))) { - body = body->CONTENT_MSG_BODY; - array_init(¶metres); - object_init(¶m); - _php_imap_add_body(¶m, body); - php_imap_list_add_object(¶metres, ¶m); - php_imap_hash_add_object(arg, "parts", ¶metres); + zval message_list, message; + + const BODY *message_body = body->CONTENT_MSG_BODY; + array_init(&message_list); + object_init(&message); + _php_imap_add_body(&message, message_body); + php_imap_list_add_object(&message_list, &message); + php_imap_hash_add_object(arg, "parts", &message_list); } } /* }}} */ From 96ea06a1d9b115a138bd9e16a3ecc7901ae3abf6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A1t=C3=A9=20Kocsis?= Date: Thu, 15 Jun 2023 01:06:29 +0200 Subject: [PATCH 168/168] Add test for GH-11423 --- ext/zend_test/tests/gh11423.phpt | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 ext/zend_test/tests/gh11423.phpt diff --git a/ext/zend_test/tests/gh11423.phpt b/ext/zend_test/tests/gh11423.phpt new file mode 100644 index 0000000000000..3c153229b3976 --- /dev/null +++ b/ext/zend_test/tests/gh11423.phpt @@ -0,0 +1,29 @@ +--TEST-- +GH-11423 (internal constants have their namespace lowercased) +--EXTENSIONS-- +zend_test +--FILE-- +getConstants()); + +define('NS1\ns2\Const1','value1'); +var_dump(get_defined_constants(true)["user"]); + +?> +--EXPECT-- +array(4) { + ["ZEND_TEST_DEPRECATED"]=> + int(42) + ["ZEND_CONSTANT_A"]=> + string(6) "global" + ["zendtestns2\ZEND_CONSTANT_A"]=> + string(10) "namespaced" + ["zendtestns2\zendsubns\ZEND_CONSTANT_A"]=> + string(10) "namespaced" +} +array(1) { + ["ns1\ns2\Const1"]=> + string(6) "value1" +}