diff --git a/.cirrus.yml b/.cirrus.yml index 0b5c8c0ad361a..4478d945ed937 100644 --- a/.cirrus.yml +++ b/.cirrus.yml @@ -385,6 +385,7 @@ arm_task: -d opcache.jit=function -P -q -x -j2 -g FAIL,BORK,LEAK,XLEAK + --no-progress --offline --show-diff --show-slow 1000 @@ -397,6 +398,7 @@ arm_task: -d opcache.jit=tracing -P -q -x -j2 -g FAIL,BORK,LEAK,XLEAK + --no-progress --offline --show-diff --show-slow 1000 diff --git a/.gitattributes b/.gitattributes index f8b91505bc983..49d5471ff6646 100644 --- a/.gitattributes +++ b/.gitattributes @@ -22,7 +22,7 @@ # Collapse generated files within a pull request. **/*_arginfo.h linguist-generated /Zend/zend_vm_execute.h linguist-generated -/Zend/zend_vm_opcodes.{h,c} linguist-generated +/Zend/zend_vm_opcodes.[ch] linguist-generated # The OSS fuzz files are bunary /ext/date/tests/ossfuzz*.txt binary diff --git a/CODEOWNERS b/CODEOWNERS index befcdd6fce863..ef06da93a1f50 100644 --- a/CODEOWNERS +++ b/CODEOWNERS @@ -14,10 +14,12 @@ /ext/curl @adoy /ext/date @derickr /ext/dba @Girgias +/ext/dom @nielsdos /ext/ffi @dstogov /ext/gmp @Girgias /ext/imap @Girgias /ext/json @bukka +/ext/libxml @nielsdos /ext/mbstring @alexdowad /ext/opcache @dstogov @iluuu1994 /ext/openssl @bukka @@ -26,6 +28,7 @@ /ext/session @Girgias /ext/sockets @devnexen /ext/spl @Girgias +/ext/standard @bukka /main @bukka /sapi/fpm @bukka /Zend @iluuu1994 diff --git a/NEWS b/NEWS index 92fe365e3ab6b..b68559737ab30 100644 --- a/NEWS +++ b/NEWS @@ -1,6 +1,26 @@ PHP NEWS ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| -?? ??? ????, PHP 8.3.0alpha1 +?? ??? ????, PHP 8.3.0alpha2 + +- Core: + . Fix GH-11388 (Allow "final" modifier when importing a method from a trait). + (nielsdos) + . Fixed bug GH-11406 (segfault with unpacking and magic method closure). + (nielsdos) + +- DOM: + . Fix #79700 (wrong use of libxml oldNs leads to performance problem). + (nielsdos) + . Fix #77894 (DOMNode::C14N() very slow on generated DOMDocuments even after + normalisation). (nielsdos) + +- Streams: + . Implement GH-8641 (STREAM_NOTIFY_COMPLETED over HTTP never emitted). + (nielsdos, Jakub Zelenka) + . Fix bug GH-10406 (fgets on a redis socket connection fails on PHP 8.3). + (Jakub Zelenka) + +08 Jun 2023, PHP 8.3.0alpha1 - CLI: . Added pdeathsig to builtin server to terminate workers when the master @@ -35,10 +55,21 @@ PHP NEWS has inherited it from its parent). (ilutov) . Fix bug GH-11154 (Negative indices on empty array don't affect next chosen index). (ColinHDev) + . Fix bug GH-8846 (Implement delayed early binding for classes without + parents). (ilutov) + . Fix bug #79836 (Segfault in concat_function). (nielsdos) + . Fix bug #81705 (type confusion/UAF on set_error_handler with concat + operation). (nielsdos) + . Fix GH-11348 (Closure created from magic method does not accept named + arguments). (nielsdos) - Date: . Implement More Appropriate Date/Time Exceptions RFC. (Derick) +- DOM: + . Fix bug GH-8388 (DOMAttr unescapes character reference). (Tim Starling) + . Fix bug GH-11308 (getElementsByTagName() is O(N^2)). (nielsdos) + - Exif: . Removed unneeded codepaths in exif_process_TIFF_in_JPEG(). (nielsdos) @@ -115,6 +146,12 @@ PHP NEWS . Added memfd api usage, on Linux, for zend_shared_alloc_create_lock() to create an abstract anonymous file for the opcache's lock. (Max Kellermann) +- OpenSSL: + . Added OPENSSL_CMS_OLDMIMETYPE and PKCS7_NOOLDMIMETYPE contants to switch + between mime content types. (Daniel Kesselberg) + . Fixed GH-11054: Reset OpenSSL errors when using a PEM public key. + (Florian Moser) + - PCNTL: . SA_ONSTACK is now set for pcntl_signal. (Kévin Dunglas) . Added SIGINFO constant. (David Carlier) @@ -126,6 +163,11 @@ PHP NEWS . pg_cancel use thread safe PQcancel api instead. (David Carlier) . pg_trace new PGSQL_TRACE_SUPPRESS_TIMESTAMPS/PGSQL_TRACE_REGRESS_MODE contants support. (David Carlier) + . pg_set_error_verbosity adding PGSQL_ERRORS_STATE constant. (David Carlier) + . pg_convert/pg_insert E_WARNING on type errors had been converted to + ValueError/TypeError exceptions. (David Carlier) + . Added pg_set_error_context_visibility to set the context's visibility + within the error messages. (David Carlier) - Phar: . Fix memory leak in phar_rename_archive(). (stkeke) @@ -152,6 +194,10 @@ PHP NEWS . Fix Segfault when using ReflectionFiber suspended by an internal function. (danog) +- SAPI: + . Fixed GH-11141 (Could not open input file: should be sent to stderr). + (nielsdos) + - Sockets: . Added SO_ATTACH_REUSEPORT_CBPF socket option, to give tighter control over socket binding for a cpu core. (David Carlier) @@ -184,10 +230,14 @@ PHP NEWS . Fix GH-11010 (parse_ini_string() now preserves formatting of unquoted strings starting with numbers when the INI_SCANNER_TYPED flag is specified). (ilutov) + . Fix GH-10742 (http_response_code emits no error when headers were already + sent). (NattyNarwhal) - Streams: . Fixed bug #51056: blocking fread() will block even if data is available. (Jakub Zelenka) + . Added storing of the original path used to open xport stream. + (Luc Vieillescazes) - XSLTProcessor: . Fixed bug #69168 (DomNode::getNodePath() returns invalid path). (nielsdos) diff --git a/UPGRADING b/UPGRADING index a3f6a12f3dc09..4d292bb810087 100644 --- a/UPGRADING +++ b/UPGRADING @@ -41,6 +41,12 @@ PHP 8.3 UPGRADE NOTES property to the class directly without traits. . Assigning a negative index n to an empty array will now make sure that the next index is n+1 instead of 0. + . Static variable initializers can now contain arbitrary expressions. + RFC: https://wiki.php.net/rfc/arbitrary_static_variable_initializers + +- DOM: + . Assignment to DOMAttr::$value and DOMAttr::$nodeValue no longer expands + entities in the new value. - FFI: . C functions that have a return type of void now return null instead of @@ -56,6 +62,8 @@ PHP 8.3 UPGRADE NOTES RFC: https://wiki.php.net/rfc/readonly_amendments . Class, interface, trait, and enum constants now support type declarations. RFC: https://wiki.php.net/rfc/typed_class_constants + . Closures created from magic methods can now accept named arguments. + . The final modifier may now be used when using a method from a trait. - Posix . posix_getrlimit() now takes an optional $res parameter to allow fetching a @@ -64,6 +72,10 @@ PHP 8.3 UPGRADE NOTES . posix_ttyname() now raises type warnings for integers following the usual ZPP semantics and value warnings for invalid file descriptor integers. +- Streams + . Streams can now emit the STREAM_NOTIFY_COMPLETED notification. This was previously + not implemented. + ======================================== 3. Changes in SAPI modules ======================================== @@ -144,6 +156,8 @@ PHP 8.3 UPGRADE NOTES . pg_fetch_object now raises a ValueError instead of an Exception when the constructor_args argument is non empty with the class not having constructor. . pg_insert now raises a ValueError instead of a WARNING when the table specified is invalid. + . pg_insert and pg_convert raises a ValueError or a TypeError instead of a WARNING when the + value/type of a field does not match properly with a PostGreSQL's type. - Standard: . E_NOTICEs emitted by unserialize() have been promoted to E_WARNING. @@ -178,6 +192,9 @@ PHP 8.3 UPGRADE NOTES . Added posix_fpathconf call to get configuration value from a file descriptor. . Added posix_eaccess call to check the effective user id's permission for a path. +- PGSQL: + . Added pg_set_error_context_visilibity to set the visibility of the context in error messages. + - Random: . Added Randomizer::getBytesFromString(). RFC: https://wiki.php.net/rfc/randomizer_additions @@ -214,12 +231,17 @@ PHP 8.3 UPGRADE NOTES . MIXED_NUMBERS (Spoofchecker). . HIDDEN_OVERLAY (Spoofchecker). +- OpenSSL: + . OPENSSL_CMS_OLDMIMETYPE + . PKCS7_NOOLDMIMETYPE + - PCNTL: . SIGINFO - PGSQL: . PGSQL_TRACE_SUPPRESS_TIMESTAMPS. . PGSQL_TRACE_REGRESS_MODE. + . PGSQL_ERRORS_SQLSTATE. - Posix: . POSIX_SC_ARG_MAX. diff --git a/UPGRADING.INTERNALS b/UPGRADING.INTERNALS index b4675e22215e9..b1a9188748fad 100644 --- a/UPGRADING.INTERNALS +++ b/UPGRADING.INTERNALS @@ -116,6 +116,31 @@ PHP 8.3 INTERNALS UPGRADE NOTES - The PHPAPI spl_iterator_apply() function now returns zend_result instead of int. There are no functional changes. + f. ext/dom + - A new function dom_get_doc_props_read_only() is added to gather the document + properties in a read-only way. This function avoids allocation when there are + no document properties changed yet. + - The node list returned by DOMNode::getElementsByTagName() and + DOMNode::getElementsByTagNameNS() now caches the length and the last requested item. + This means that the length and the last requested item are not recalculated + when the node list is iterated over multiple times. + If you do not use the internal PHP dom APIs to modify the document, you need to + manually invalidate the cache using php_libxml_invalidate_node_list_cache_from_doc(). + Furthermore, the following internal APIs were added to handle the cache: + . php_dom_is_cache_tag_stale_from_doc_ptr() + . php_dom_is_cache_tag_stale_from_node() + . php_dom_mark_cache_tag_up_to_date_from_node() + - The function dom_get_elements_by_tag_name_ns_raw() has an additional parameter to indicate + the base node of the node list. This function also no longer accepts -1 as the index argument. + - The function dom_namednode_iter() has additional arguments to avoid recomputing the length of + the strings. + - The functions dom_parent_node_prepend(), dom_parent_node_append(), dom_parent_node_after(), and + dom_parent_node_before() now use an uint32_t argument for the number of nodes instead of int. + + g. ext/libxml + - Two new functions: php_libxml_invalidate_node_list_cache_from_doc() and + php_libxml_invalidate_node_list_cache() were added to invalidate the cache of a node list. + ======================== 4. OpCode changes ======================== diff --git a/Zend/Optimizer/block_pass.c b/Zend/Optimizer/block_pass.c index 22dd3c7f7b6e1..ccb32e2e453d4 100644 --- a/Zend/Optimizer/block_pass.c +++ b/Zend/Optimizer/block_pass.c @@ -264,6 +264,10 @@ static void zend_optimize_block(zend_basic_block *block, zend_op_array *op_array break; case ZEND_FREE: + /* Note: Only remove the source if the source is local to this block. + * If it's not local, then the other blocks successors must also eventually either FREE or consume the temporary, + * hence removing the temporary is not safe in the general case, especially when other consumers are not FREE. + * A FREE may not be removed without also removing the source's result, because otherwise that would cause a memory leak. */ if (opline->op1_type == IS_TMP_VAR) { src = VAR_SOURCE(opline->op1); if (src) { @@ -272,6 +276,7 @@ static void zend_optimize_block(zend_basic_block *block, zend_op_array *op_array case ZEND_BOOL_NOT: /* T = BOOL(X), FREE(T) => T = BOOL(X) */ /* The remaining BOOL is removed by a separate optimization */ + /* The source is a bool, no source removals take place, so this may be done non-locally. */ VAR_SOURCE(opline->op1) = NULL; MAKE_NOP(opline); ++(*opt_count); @@ -290,6 +295,9 @@ static void zend_optimize_block(zend_basic_block *block, zend_op_array *op_array case ZEND_PRE_DEC_OBJ: case ZEND_PRE_INC_STATIC_PROP: case ZEND_PRE_DEC_STATIC_PROP: + if (src < op_array->opcodes + block->start) { + break; + } src->result_type = IS_UNUSED; VAR_SOURCE(opline->op1) = NULL; MAKE_NOP(opline); @@ -302,7 +310,7 @@ static void zend_optimize_block(zend_basic_block *block, zend_op_array *op_array } else if (opline->op1_type == IS_VAR) { src = VAR_SOURCE(opline->op1); /* V = OP, FREE(V) => OP. NOP */ - if (src && + if (src >= op_array->opcodes + block->start && src->opcode != ZEND_FETCH_R && src->opcode != ZEND_FETCH_STATIC_PROP_R && src->opcode != ZEND_FETCH_DIM_R && @@ -1002,6 +1010,7 @@ static void assemble_code_blocks(zend_cfg *cfg, zend_op_array *op_array, zend_op case ZEND_COALESCE: case ZEND_ASSERT_CHECK: case ZEND_JMP_NULL: + case ZEND_BIND_INIT_STATIC_OR_JMP: ZEND_SET_OP_JMP_ADDR(opline, opline->op2, new_opcodes + blocks[b->successors[0]].start); break; case ZEND_CATCH: diff --git a/Zend/Optimizer/dce.c b/Zend/Optimizer/dce.c index 15c9cf5e6e22a..219b139cfc874 100644 --- a/Zend/Optimizer/dce.c +++ b/Zend/Optimizer/dce.c @@ -145,6 +145,7 @@ static inline bool may_have_side_effects( case ZEND_COALESCE: case ZEND_ASSERT_CHECK: case ZEND_JMP_NULL: + case ZEND_BIND_INIT_STATIC_OR_JMP: /* For our purposes a jumps and branches are side effects. */ return 1; case ZEND_BEGIN_SILENCE: @@ -245,15 +246,9 @@ static inline bool may_have_side_effects( if ((opline->extended_value & (ZEND_BIND_IMPLICIT|ZEND_BIND_EXPLICIT))) { return 1; } - - if ((opline->extended_value & ZEND_BIND_REF) != 0) { - zval *value = - (zval*)((char*)op_array->static_variables->arData + - (opline->extended_value & ~ZEND_BIND_REF)); - if (Z_TYPE_P(value) == IS_CONSTANT_AST) { - /* AST may contain undefined constants */ - return 1; - } + /* Modifies static variables which are observable through reflection */ + if ((opline->extended_value & ZEND_BIND_REF) && opline->op2_type != IS_UNUSED) { + return 1; } } return 0; diff --git a/Zend/Optimizer/dfa_pass.c b/Zend/Optimizer/dfa_pass.c index 3ed507c35b070..b1f568da5d920 100644 --- a/Zend/Optimizer/dfa_pass.c +++ b/Zend/Optimizer/dfa_pass.c @@ -652,6 +652,7 @@ static void zend_ssa_replace_control_link(zend_op_array *op_array, zend_ssa *ssa case ZEND_COALESCE: case ZEND_ASSERT_CHECK: case ZEND_JMP_NULL: + case ZEND_BIND_INIT_STATIC_OR_JMP: if (ZEND_OP2_JMP_ADDR(opline) == op_array->opcodes + old->start) { ZEND_SET_OP_JMP_ADDR(opline, opline->op2, op_array->opcodes + dst->start); } diff --git a/Zend/Optimizer/pass1.c b/Zend/Optimizer/pass1.c index 00bc30160ab7b..818829fcdf6ba 100644 --- a/Zend/Optimizer/pass1.c +++ b/Zend/Optimizer/pass1.c @@ -354,6 +354,7 @@ void zend_optimizer_pass1(zend_op_array *op_array, zend_optimizer_ctx *ctx) case ZEND_ASSERT_CHECK: case ZEND_JMP_NULL: case ZEND_VERIFY_NEVER_TYPE: + case ZEND_BIND_INIT_STATIC_OR_JMP: collect_constants = 0; break; } diff --git a/Zend/Optimizer/sccp.c b/Zend/Optimizer/sccp.c index f6144f87b4fac..c35c60fd6be93 100644 --- a/Zend/Optimizer/sccp.c +++ b/Zend/Optimizer/sccp.c @@ -249,6 +249,7 @@ static bool can_replace_op1( case ZEND_ROPE_ADD: case ZEND_ROPE_END: case ZEND_BIND_STATIC: + case ZEND_BIND_INIT_STATIC_OR_JMP: case ZEND_BIND_GLOBAL: case ZEND_MAKE_REF: case ZEND_UNSET_CV: @@ -1773,6 +1774,7 @@ static void sccp_mark_feasible_successors( case ZEND_CATCH: case ZEND_FE_FETCH_R: case ZEND_FE_FETCH_RW: + case ZEND_BIND_INIT_STATIC_OR_JMP: scdf_mark_edge_feasible(scdf, block_num, block->successors[0]); scdf_mark_edge_feasible(scdf, block_num, block->successors[1]); return; diff --git a/Zend/Optimizer/zend_cfg.c b/Zend/Optimizer/zend_cfg.c index 219738e6f692b..ce7d078bb957e 100644 --- a/Zend/Optimizer/zend_cfg.c +++ b/Zend/Optimizer/zend_cfg.c @@ -369,6 +369,7 @@ ZEND_API void zend_build_cfg(zend_arena **arena, const zend_op_array *op_array, case ZEND_COALESCE: case ZEND_ASSERT_CHECK: case ZEND_JMP_NULL: + case ZEND_BIND_INIT_STATIC_OR_JMP: BB_START(OP_JMP_ADDR(opline, opline->op2) - op_array->opcodes); BB_START(i + 1); break; @@ -522,6 +523,7 @@ ZEND_API void zend_build_cfg(zend_arena **arena, const zend_op_array *op_array, case ZEND_COALESCE: case ZEND_ASSERT_CHECK: case ZEND_JMP_NULL: + case ZEND_BIND_INIT_STATIC_OR_JMP: block->successors_count = 2; block->successors[0] = block_map[OP_JMP_ADDR(opline, opline->op2) - op_array->opcodes]; block->successors[1] = j + 1; diff --git a/Zend/Optimizer/zend_dfg.c b/Zend/Optimizer/zend_dfg.c index 2207b594b85a5..93faec6a4d86d 100644 --- a/Zend/Optimizer/zend_dfg.c +++ b/Zend/Optimizer/zend_dfg.c @@ -150,6 +150,7 @@ static zend_always_inline void _zend_dfg_add_use_def_op(const zend_op_array *op_ case ZEND_POST_DEC: case ZEND_BIND_GLOBAL: case ZEND_BIND_STATIC: + case ZEND_BIND_INIT_STATIC_OR_JMP: case ZEND_SEND_VAR_NO_REF: case ZEND_SEND_VAR_NO_REF_EX: case ZEND_SEND_VAR_EX: diff --git a/Zend/Optimizer/zend_dump.c b/Zend/Optimizer/zend_dump.c index bc697ba8ba9e5..9eaca19f18f57 100644 --- a/Zend/Optimizer/zend_dump.c +++ b/Zend/Optimizer/zend_dump.c @@ -23,6 +23,7 @@ #include "zend_func_info.h" #include "zend_call_graph.h" #include "zend_dump.h" +#include "ext/standard/php_string.h" void zend_dump_ht(HashTable *ht) { @@ -65,8 +66,12 @@ void zend_dump_const(const zval *zv) case IS_DOUBLE: fprintf(stderr, " float(%g)", Z_DVAL_P(zv)); break; - case IS_STRING: - fprintf(stderr, " string(\"%s\")", Z_STRVAL_P(zv)); + case IS_STRING:; + zend_string *escaped_string = php_addcslashes(Z_STR_P(zv), "\"\\", 2); + + fprintf(stderr, " string(\"%s\")", ZSTR_VAL(escaped_string)); + + zend_string_release(escaped_string); break; case IS_ARRAY: fprintf(stderr, " array(...)"); diff --git a/Zend/Optimizer/zend_func_infos.h b/Zend/Optimizer/zend_func_infos.h index a3475fab6cc8f..34b8b9c4cbbf6 100644 --- a/Zend/Optimizer/zend_func_infos.h +++ b/Zend/Optimizer/zend_func_infos.h @@ -227,7 +227,7 @@ static const func_info_t func_infos[] = { F1("mb_strtoupper", MAY_BE_STRING), F1("mb_strtolower", MAY_BE_STRING), F1("mb_detect_encoding", MAY_BE_STRING|MAY_BE_FALSE), - F1("mb_list_encodings", MAY_BE_ARRAY|MAY_BE_ARRAY_KEY_LONG|MAY_BE_ARRAY_OF_STRING), + FN("mb_list_encodings", MAY_BE_ARRAY|MAY_BE_ARRAY_KEY_LONG|MAY_BE_ARRAY_OF_STRING), F1("mb_encoding_aliases", MAY_BE_ARRAY|MAY_BE_ARRAY_KEY_LONG|MAY_BE_ARRAY_OF_STRING), F1("mb_encode_mimeheader", MAY_BE_STRING), F1("mb_decode_mimeheader", MAY_BE_STRING), diff --git a/Zend/Optimizer/zend_inference.c b/Zend/Optimizer/zend_inference.c index afe1c2339ed3a..f7298e7b43dba 100644 --- a/Zend/Optimizer/zend_inference.c +++ b/Zend/Optimizer/zend_inference.c @@ -2944,6 +2944,10 @@ static zend_always_inline zend_result _zend_update_type_info( } UPDATE_SSA_TYPE(tmp, ssa_op->op1_def); break; + case ZEND_BIND_INIT_STATIC_OR_JMP: + tmp = MAY_BE_UNDEF | MAY_BE_ANY | MAY_BE_ARRAY_KEY_ANY | MAY_BE_ARRAY_OF_ANY | MAY_BE_ARRAY_OF_REF | MAY_BE_REF; + UPDATE_SSA_TYPE(tmp, ssa_op->op1_def); + break; case ZEND_SEND_VAR: if (ssa_op->op1_def >= 0) { tmp = t1; @@ -4363,6 +4367,7 @@ static void zend_mark_cv_references(const zend_op_array *op_array, const zend_sc case ZEND_SEND_REF: case ZEND_SEND_VAR_EX: case ZEND_SEND_FUNC_ARG: + case ZEND_BIND_INIT_STATIC_OR_JMP: break; case ZEND_INIT_ARRAY: case ZEND_ADD_ARRAY_ELEMENT: @@ -4518,6 +4523,7 @@ ZEND_API bool zend_may_throw_ex(const zend_op *opline, const zend_ssa_op *ssa_op case ZEND_ASSIGN_REF: case ZEND_BIND_GLOBAL: case ZEND_BIND_STATIC: + case ZEND_BIND_INIT_STATIC_OR_JMP: case ZEND_FETCH_DIM_IS: case ZEND_FETCH_OBJ_IS: case ZEND_SEND_REF: @@ -4755,14 +4761,12 @@ ZEND_API bool zend_may_throw_ex(const zend_op *opline, const zend_ssa_op *ssa_op case ZEND_UNSET_VAR: return (t1 & (MAY_BE_OBJECT|MAY_BE_RESOURCE|MAY_BE_ARRAY_OF_OBJECT|MAY_BE_ARRAY_OF_RESOURCE|MAY_BE_ARRAY_OF_ARRAY)); case ZEND_BIND_STATIC: + case ZEND_BIND_INIT_STATIC_OR_JMP: if (t1 & (MAY_BE_OBJECT|MAY_BE_RESOURCE|MAY_BE_ARRAY_OF_OBJECT|MAY_BE_ARRAY_OF_RESOURCE|MAY_BE_ARRAY_OF_ARRAY)) { /* Destructor may throw. */ return 1; - } else { - zval *value = (zval*)((char*)op_array->static_variables->arData + (opline->extended_value & ~(ZEND_BIND_REF|ZEND_BIND_IMPLICIT|ZEND_BIND_EXPLICIT))); - /* May throw if initializer is CONSTANT_AST. */ - return Z_TYPE_P(value) == IS_CONSTANT_AST; } + return 0; case ZEND_ASSIGN_DIM: if ((opline+1)->op1_type == IS_CV) { if (_ssa_op1_info(op_array, ssa, opline+1, ssa_op+1) & MAY_BE_UNDEF) { diff --git a/Zend/Optimizer/zend_optimizer.c b/Zend/Optimizer/zend_optimizer.c index 956a13d658399..463bbbfa84b45 100644 --- a/Zend/Optimizer/zend_optimizer.c +++ b/Zend/Optimizer/zend_optimizer.c @@ -720,6 +720,7 @@ void zend_optimizer_migrate_jump(zend_op_array *op_array, zend_op *new_opline, z case ZEND_COALESCE: case ZEND_ASSERT_CHECK: case ZEND_JMP_NULL: + case ZEND_BIND_INIT_STATIC_OR_JMP: ZEND_SET_OP_JMP_ADDR(new_opline, new_opline->op2, ZEND_OP2_JMP_ADDR(opline)); break; case ZEND_FE_FETCH_R: @@ -763,6 +764,7 @@ void zend_optimizer_shift_jump(zend_op_array *op_array, zend_op *opline, uint32_ case ZEND_COALESCE: case ZEND_ASSERT_CHECK: case ZEND_JMP_NULL: + case ZEND_BIND_INIT_STATIC_OR_JMP: ZEND_SET_OP_JMP_ADDR(opline, opline->op2, ZEND_OP2_JMP_ADDR(opline) - shiftlist[ZEND_OP2_JMP_ADDR(opline) - op_array->opcodes]); break; case ZEND_CATCH: @@ -1157,6 +1159,7 @@ static void zend_redo_pass_two(zend_op_array *op_array) case ZEND_FE_RESET_RW: case ZEND_ASSERT_CHECK: case ZEND_JMP_NULL: + case ZEND_BIND_INIT_STATIC_OR_JMP: opline->op2.jmp_addr = &op_array->opcodes[opline->op2.jmp_addr - old_opcodes]; break; case ZEND_CATCH: @@ -1277,6 +1280,7 @@ static void zend_redo_pass_two_ex(zend_op_array *op_array, zend_ssa *ssa) case ZEND_FE_RESET_RW: case ZEND_ASSERT_CHECK: case ZEND_JMP_NULL: + case ZEND_BIND_INIT_STATIC_OR_JMP: opline->op2.jmp_addr = &op_array->opcodes[opline->op2.jmp_addr - old_opcodes]; break; case ZEND_CATCH: diff --git a/Zend/Optimizer/zend_ssa.c b/Zend/Optimizer/zend_ssa.c index 67165a9b26d7a..186af8674588b 100644 --- a/Zend/Optimizer/zend_ssa.c +++ b/Zend/Optimizer/zend_ssa.c @@ -679,6 +679,7 @@ static zend_always_inline int _zend_ssa_rename_op(const zend_op_array *op_array, case ZEND_POST_DEC: case ZEND_BIND_GLOBAL: case ZEND_BIND_STATIC: + case ZEND_BIND_INIT_STATIC_OR_JMP: case ZEND_SEND_VAR_NO_REF: case ZEND_SEND_VAR_NO_REF_EX: case ZEND_SEND_VAR_EX: diff --git a/Zend/tests/035.phpt b/Zend/tests/035.phpt index 75df786e88fe1..a0b1ed69afc9d 100644 --- a/Zend/tests/035.phpt +++ b/Zend/tests/035.phpt @@ -3,7 +3,7 @@ Using 'static' and 'global' in global scope --FILE-- --EXPECT-- -Cannot access offset of type object on array +Cannot access offset of type Closure on array diff --git a/Zend/tests/038.phpt b/Zend/tests/038.phpt index 4f822a6f5a154..8b6441e3c25a3 100644 --- a/Zend/tests/038.phpt +++ b/Zend/tests/038.phpt @@ -11,4 +11,4 @@ try { ?> --EXPECT-- -Cannot access offset of type object on array +Cannot access offset of type Closure on array diff --git a/Zend/tests/array_merge_recursive_next_key_overflow.phpt b/Zend/tests/array_merge_recursive_next_key_overflow.phpt new file mode 100644 index 0000000000000..f7d2872957837 --- /dev/null +++ b/Zend/tests/array_merge_recursive_next_key_overflow.phpt @@ -0,0 +1,25 @@ +--TEST-- +Access on NULL pointer in array_merge_recursive() +--FILE-- + [PHP_INT_MAX => null]], + ['' => [null]], + ); +} catch (Throwable $e) { + echo $e->getMessage(), "\n"; +} + +try { + array_merge_recursive( + ['foo' => [PHP_INT_MAX => null]], + ['foo' => str_repeat('a', 2)], + ); +} catch (Throwable $e) { + echo $e->getMessage(), "\n"; +} +?> +--EXPECT-- +Cannot add element to the array as the next element is already occupied +Cannot add element to the array as the next element is already occupied diff --git a/Zend/tests/array_multisort_exception.phpt b/Zend/tests/array_multisort_exception.phpt new file mode 100644 index 0000000000000..8ee6007745e03 --- /dev/null +++ b/Zend/tests/array_multisort_exception.phpt @@ -0,0 +1,13 @@ +--TEST-- +Exception handling in array_multisort() +--FILE-- + new DateTime(), 0 => new DateTime()]; +array_multisort($array, SORT_STRING); +?> +--EXPECTF-- +Fatal error: Uncaught Error: Object of class DateTime could not be converted to string in %s:%d +Stack trace: +#0 %s(%d): array_multisort(Array, 2) +#1 {main} + thrown in %s on line %d diff --git a/Zend/tests/assign_dim_obj_null_return.phpt b/Zend/tests/assign_dim_obj_null_return.phpt index 02e709818669e..e2b7f20a0c072 100644 --- a/Zend/tests/assign_dim_obj_null_return.phpt +++ b/Zend/tests/assign_dim_obj_null_return.phpt @@ -73,11 +73,11 @@ test(); --EXPECT-- Cannot add element to the array as the next element is already occupied Cannot access offset of type array on array -Cannot access offset of type object on array +Cannot access offset of type stdClass on array Cannot use a scalar value as an array Cannot add element to the array as the next element is already occupied Cannot access offset of type array on array -Cannot access offset of type object on array +Cannot access offset of type stdClass on array Cannot use a scalar value as an array Attempt to assign property "foo" on true Attempt to assign property "foo" on true diff --git a/Zend/tests/bug24773.phpt b/Zend/tests/bug24773.phpt index 4c73fd0dd00f2..f1845fa46a680 100644 --- a/Zend/tests/bug24773.phpt +++ b/Zend/tests/bug24773.phpt @@ -6,7 +6,7 @@ Bug #24773 (unset() of integers treated as arrays causes a crash) unset($array["lvl1"]["lvl2"]["b"]); ?> --EXPECTF-- -Fatal error: Uncaught TypeError: Cannot access offset of type string on string in %s:%d +Fatal error: Uncaught Error: Cannot unset string offsets in %s:%d Stack trace: #0 {main} thrown in %s on line %d diff --git a/Zend/tests/bug79778.phpt b/Zend/tests/bug79778.phpt index f1476a95cf85d..44c9027372767 100644 --- a/Zend/tests/bug79778.phpt +++ b/Zend/tests/bug79778.phpt @@ -5,22 +5,69 @@ Bug #79778: Assertion failure if dumping closure with unresolved static variable $closure1 = function() { static $var = CONST_REF; }; + +var_dump($closure1); +print_r($closure1); + +try { + $closure1(); +} catch (\Error $e) { + echo $e->getMessage(), "\n"; +} + +var_dump($closure1); +print_r($closure1); + +const CONST_REF = 'foo'; +$closure1(); var_dump($closure1); print_r($closure1); + ?> --EXPECT-- object(Closure)#1 (1) { ["static"]=> array(1) { ["var"]=> - string(14) "" + NULL + } +} +Closure Object +( + [static] => Array + ( + [var] => + ) + +) +Undefined constant "CONST_REF" +object(Closure)#1 (1) { + ["static"]=> + array(1) { + ["var"]=> + NULL + } +} +Closure Object +( + [static] => Array + ( + [var] => + ) + +) +object(Closure)#1 (1) { + ["static"]=> + array(1) { + ["var"]=> + string(3) "foo" } } Closure Object ( [static] => Array ( - [var] => + [var] => foo ) ) diff --git a/Zend/tests/bug79836.phpt b/Zend/tests/bug79836.phpt new file mode 100644 index 0000000000000..5fb07396762f5 --- /dev/null +++ b/Zend/tests/bug79836.phpt @@ -0,0 +1,18 @@ +--TEST-- +Bug #79836 (Segfault in concat_function) +--INI-- +opcache.optimization_level = 0x7FFEBFFF & ~0x400 +--FILE-- + +--EXPECT-- +3 diff --git a/Zend/tests/bug79836_1.phpt b/Zend/tests/bug79836_1.phpt new file mode 100644 index 0000000000000..86e7f47671849 --- /dev/null +++ b/Zend/tests/bug79836_1.phpt @@ -0,0 +1,18 @@ +--TEST-- +Bug #79836 (Segfault in concat_function) +--INI-- +opcache.optimization_level = 0x7FFEBFFF & ~0x400 +--FILE-- + +--EXPECT-- +Done diff --git a/Zend/tests/bug79836_2.phpt b/Zend/tests/bug79836_2.phpt new file mode 100644 index 0000000000000..b02fcc13ea11b --- /dev/null +++ b/Zend/tests/bug79836_2.phpt @@ -0,0 +1,25 @@ +--TEST-- +Bug #79836 (Segfault in concat_function) +--FILE-- + +--EXPECT-- +abcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabc diff --git a/Zend/tests/bug79836_3.phpt b/Zend/tests/bug79836_3.phpt new file mode 100644 index 0000000000000..75262eb460fb4 --- /dev/null +++ b/Zend/tests/bug79836_3.phpt @@ -0,0 +1,10 @@ +--TEST-- +Bug #79836 ($a .= $a should coerce to string) +--FILE-- + +--EXPECT-- +string(0) "" diff --git a/Zend/tests/bug79836_4.phpt b/Zend/tests/bug79836_4.phpt new file mode 100644 index 0000000000000..2d6b862f42139 --- /dev/null +++ b/Zend/tests/bug79836_4.phpt @@ -0,0 +1,18 @@ +--TEST-- +Bug #79836 (use-after-free in concat_function) +--INI-- +memory_limit=10M +--FILE-- + +--EXPECTF-- +Fatal error: Allowed memory size of %d bytes exhausted%s(tried to allocate %d bytes) in %s on line %d diff --git a/Zend/tests/bug81705.phpt b/Zend/tests/bug81705.phpt new file mode 100644 index 0000000000000..1c00b1c77d4bb --- /dev/null +++ b/Zend/tests/bug81705.phpt @@ -0,0 +1,19 @@ +--TEST-- +Bug #81705 (type confusion/UAF on set_error_handler with concat operation) +--FILE-- + +--EXPECT-- +error +string(6) "aArray" \ No newline at end of file diff --git a/Zend/tests/class_toString_concat_non_interned_with_itself.phpt b/Zend/tests/class_toString_concat_non_interned_with_itself.phpt new file mode 100644 index 0000000000000..87b129ce9e796 --- /dev/null +++ b/Zend/tests/class_toString_concat_non_interned_with_itself.phpt @@ -0,0 +1,21 @@ +--TEST-- +Test concatenating a class instance that has __toString with itself that uses a non-interned string +--FILE-- + +--EXPECT-- +aaaaaa diff --git a/Zend/tests/class_toString_concat_with_itself.phpt b/Zend/tests/class_toString_concat_with_itself.phpt new file mode 100644 index 0000000000000..96d28679b2f93 --- /dev/null +++ b/Zend/tests/class_toString_concat_with_itself.phpt @@ -0,0 +1,16 @@ +--TEST-- +Test concatenating a class instance that has __toString with itself +--FILE-- + +--EXPECT-- +abcabc diff --git a/Zend/tests/constexpr/new_anon_class.phpt b/Zend/tests/constexpr/new_anon_class.phpt index 6f2b433136d40..49fc120f6b92b 100644 --- a/Zend/tests/constexpr/new_anon_class.phpt +++ b/Zend/tests/constexpr/new_anon_class.phpt @@ -1,10 +1,13 @@ --TEST-- -New with anonymous class is not supported in constant expressions +New with anonymous class works --FILE-- ---EXPECTF-- -Fatal error: Cannot use anonymous class in constant expression in %s on line %d +--EXPECT-- +object(class@anonymous)#1 (0) { +} diff --git a/Zend/tests/constexpr/new_arg_unpack.phpt b/Zend/tests/constexpr/new_arg_unpack.phpt index 303116054ede9..4584a2e5601c2 100644 --- a/Zend/tests/constexpr/new_arg_unpack.phpt +++ b/Zend/tests/constexpr/new_arg_unpack.phpt @@ -1,10 +1,13 @@ --TEST-- -Argument unpacking in new arguments in const expr (not yet supported) +Argument unpacking in new arguments in static variable --FILE-- ---EXPECTF-- -Fatal error: Argument unpacking in constant expressions is not supported in %s on line %d +--EXPECT-- +object(stdClass)#1 (0) { +} diff --git a/Zend/tests/constexpr/new_dynamic_class_name.phpt b/Zend/tests/constexpr/new_dynamic_class_name.phpt index 645e3b7240b3b..60ae3ea4195d7 100644 --- a/Zend/tests/constexpr/new_dynamic_class_name.phpt +++ b/Zend/tests/constexpr/new_dynamic_class_name.phpt @@ -3,8 +3,13 @@ Dynamic class name in new is not supported --FILE-- ---EXPECTF-- -Fatal error: Cannot use dynamic class name in constant expression in %s on line %d +--EXPECT-- +object(Foo)#1 (0) { +} diff --git a/Zend/tests/constexpr/new_invalid_operation_in_arg.phpt b/Zend/tests/constexpr/new_invalid_operation_in_arg.phpt index 0537d21874e53..fd562df49ad27 100644 --- a/Zend/tests/constexpr/new_invalid_operation_in_arg.phpt +++ b/Zend/tests/constexpr/new_invalid_operation_in_arg.phpt @@ -3,8 +3,17 @@ Invalid operation in new arg in const expr --FILE-- ---EXPECTF-- -Fatal error: Constant expression contains invalid operations in %s on line %d +--EXPECT-- +array(3) { + [0]=> + int(1) + [1]=> + int(2) + [2]=> + int(3) +} diff --git a/Zend/tests/constexpr/new_static.phpt b/Zend/tests/constexpr/new_static.phpt index a626b2982475d..b4f7a332cf6a7 100644 --- a/Zend/tests/constexpr/new_static.phpt +++ b/Zend/tests/constexpr/new_static.phpt @@ -3,8 +3,20 @@ Static in new is not supported --FILE-- ---EXPECTF-- -Fatal error: "static" is not allowed in compile-time constants in %s on line %d +--EXPECT-- +object(Foo)#1 (0) { +} +object(Foo)#1 (0) { +} diff --git a/Zend/tests/delayed_early_binding_redeclaration-1.inc b/Zend/tests/delayed_early_binding_redeclaration-1.inc new file mode 100644 index 0000000000000..abfccf90686e3 --- /dev/null +++ b/Zend/tests/delayed_early_binding_redeclaration-1.inc @@ -0,0 +1,2 @@ + +--EXPECTF-- +Fatal error: Cannot declare class Bar, because the name is already in use in %sdelayed_early_binding_redeclaration-2.inc on line %d diff --git a/Zend/tests/gh11222.phpt b/Zend/tests/gh11222.phpt new file mode 100644 index 0000000000000..c2c2b5eb4881a --- /dev/null +++ b/Zend/tests/gh11222.phpt @@ -0,0 +1,29 @@ +--TEST-- +GH-112222: foreach by-ref may jump over keys during a rehash +--FILE-- + 0, 1 => 1, 2, 3, 4, 5, 6]; +foreach ($a as $k => &$v) { + if ($k == 1) { + // force that it'll be rehashed by adding enough holes + unset($a[4], $a[5]); + // actually make the array larger than 8 elements to trigger rehash + $a[] = 8; $a[] = 9; $a[] = 10; + + } + // observe the iteration jumping from key 1 to key 6, skipping keys 2 and 3 + echo "$k => $v\n"; +} + +?> +--EXPECTF-- +k => 0 +1 => 1 +2 => 2 +3 => 3 +6 => 6 +7 => 8 +8 => 9 +9 => 10 diff --git a/Zend/tests/gh11320_1.phpt b/Zend/tests/gh11320_1.phpt new file mode 100644 index 0000000000000..f9beef76ccf6d --- /dev/null +++ b/Zend/tests/gh11320_1.phpt @@ -0,0 +1,28 @@ +--TEST-- +GH-11320: Array literals can contain list() assignments +--FILE-- + list($x, $y) = getList()]); +var_dump([$index => [$x, $y] = getList()]); +?> +--EXPECT-- +array(1) { + [1]=> + array(2) { + [0]=> + int(2) + [1]=> + int(3) + } +} +array(1) { + [1]=> + array(2) { + [0]=> + int(2) + [1]=> + int(3) + } +} diff --git a/Zend/tests/gh11320_2.phpt b/Zend/tests/gh11320_2.phpt new file mode 100644 index 0000000000000..5173c518f387f --- /dev/null +++ b/Zend/tests/gh11320_2.phpt @@ -0,0 +1,12 @@ +--TEST-- +GH-11320: list() expressions can contain magic constants +--FILE-- + $foo) = [__FILE__ => 'foo']]; +var_dump($foo); +[[__FILE__ => $foo] = [__FILE__ => 'foo']]; +var_dump($foo); +?> +--EXPECT-- +string(3) "foo" +string(3) "foo" diff --git a/Zend/tests/gh11320_3.phpt b/Zend/tests/gh11320_3.phpt new file mode 100644 index 0000000000000..3c3ed336d0b72 --- /dev/null +++ b/Zend/tests/gh11320_3.phpt @@ -0,0 +1,8 @@ +--TEST-- +GH-11320: list() must not appear as a standalone array element +--FILE-- + +--EXPECTF-- +Fatal error: Cannot use list() as standalone expression in %s on line %d diff --git a/Zend/tests/gh8821.phpt b/Zend/tests/gh8821.phpt index e6abf5c1c4f1a..7588239fc83ba 100644 --- a/Zend/tests/gh8821.phpt +++ b/Zend/tests/gh8821.phpt @@ -15,7 +15,7 @@ new Bravo(); ?> --EXPECTF-- -Fatal error: Uncaught TypeError: Cannot access offset of type object on array in %sgh8821.php:8 +Fatal error: Uncaught TypeError: Cannot access offset of type Alpha on array in %sgh8821.php:8 Stack trace: #0 %sgh8821.php(11): [constant expression]() #1 {main} diff --git a/Zend/tests/illegal_offset_unset_isset_empty.phpt b/Zend/tests/illegal_offset_unset_isset_empty.phpt index a09613748281b..ee837f0b61439 100644 --- a/Zend/tests/illegal_offset_unset_isset_empty.phpt +++ b/Zend/tests/illegal_offset_unset_isset_empty.phpt @@ -22,6 +22,6 @@ try { ?> --EXPECT-- -Cannot access offset of type array in unset +Cannot unset offset of type array on array Cannot access offset of type array in isset or empty Cannot access offset of type array in isset or empty diff --git a/Zend/tests/init_array_illegal_offset_type.phpt b/Zend/tests/init_array_illegal_offset_type.phpt index 2e5a0401d6e4a..ee41c0217ad77 100644 --- a/Zend/tests/init_array_illegal_offset_type.phpt +++ b/Zend/tests/init_array_illegal_offset_type.phpt @@ -12,4 +12,4 @@ try { } ?> --EXPECT-- -Cannot access offset of type object on array +Cannot access offset of type stdClass on array diff --git a/Zend/tests/isset_array.phpt b/Zend/tests/isset_array.phpt index 792483294805d..dfa3fdef51dd7 100644 --- a/Zend/tests/isset_array.phpt +++ b/Zend/tests/isset_array.phpt @@ -47,4 +47,4 @@ bool(false) Warning: Resource ID#%d used as offset, casting to integer (%d) in %s on line %d bool(false) Cannot access offset of type array in isset or empty -Cannot access offset of type object in isset or empty +Cannot access offset of type stdClass in isset or empty diff --git a/Zend/tests/offset_array.phpt b/Zend/tests/offset_array.phpt index e44244511fcf1..368ec7a020e14 100644 --- a/Zend/tests/offset_array.phpt +++ b/Zend/tests/offset_array.phpt @@ -48,6 +48,6 @@ int(1) Warning: Resource ID#%d used as offset, casting to integer (%d) in %s on line %d int(%d) -Cannot access offset of type object on array +Cannot access offset of type stdClass on array Cannot access offset of type array on array Done diff --git a/Zend/tests/static_variable_func_call.phpt b/Zend/tests/static_variable_func_call.phpt new file mode 100644 index 0000000000000..7a0ec74be5df3 --- /dev/null +++ b/Zend/tests/static_variable_func_call.phpt @@ -0,0 +1,23 @@ +--TEST-- +Static variable initializer with function call +--FILE-- + +--EXPECT-- +bar() called +bar +bar diff --git a/Zend/tests/static_variables_closure_bind.phpt b/Zend/tests/static_variables_closure_bind.phpt new file mode 100644 index 0000000000000..ee3fce78f2fdc --- /dev/null +++ b/Zend/tests/static_variables_closure_bind.phpt @@ -0,0 +1,14 @@ +--TEST-- +Static variable can't override bound closure variables +--FILE-- + +--EXPECTF-- +Fatal error: Duplicate declaration of static variable $a in %s on line %d diff --git a/Zend/tests/static_variables_destructor.phpt b/Zend/tests/static_variables_destructor.phpt new file mode 100644 index 0000000000000..9128c86e6b1bf --- /dev/null +++ b/Zend/tests/static_variables_destructor.phpt @@ -0,0 +1,36 @@ +--TEST-- +Static variable assign triggering destructor +--FILE-- +getMessage(), "\n"; +} +foo(false); + +?> +--EXPECT-- +bar() called +__destruct() called +int(42) diff --git a/Zend/tests/static_variables_global.phpt b/Zend/tests/static_variables_global.phpt new file mode 100644 index 0000000000000..27f9128fb502e --- /dev/null +++ b/Zend/tests/static_variables_global.phpt @@ -0,0 +1,22 @@ +--TEST-- +Global can override static variable +--FILE-- + +--EXPECT-- +int(42) +int(41) +int(42) +int(41) diff --git a/Zend/tests/static_variables_global_2.phpt b/Zend/tests/static_variables_global_2.phpt new file mode 100644 index 0000000000000..b37da7583a8d4 --- /dev/null +++ b/Zend/tests/static_variables_global_2.phpt @@ -0,0 +1,26 @@ +--TEST-- +Static variable can override global +--FILE-- + +--EXPECT-- +int(42) +int(41) +int(42) +int(42) +int(41) +int(42) diff --git a/Zend/tests/static_variables_recursive.phpt b/Zend/tests/static_variables_recursive.phpt new file mode 100644 index 0000000000000..8b4eda10f9512 --- /dev/null +++ b/Zend/tests/static_variables_recursive.phpt @@ -0,0 +1,29 @@ +--TEST-- +Static variable with recursive initializer +--FILE-- + +--EXPECT-- +string(7) "Done 11" +string(7) "Done 11" +string(7) "Done 11" +string(7) "Done 11" +string(7) "Done 11" +string(7) "Done 11" +string(7) "Done 11" +string(7) "Done 11" +string(7) "Done 11" +string(7) "Done 11" +string(7) "Done 11" +string(7) "Done 11" +string(7) "Done 11" diff --git a/Zend/tests/traits/language019.phpt b/Zend/tests/traits/language019.phpt index 299f661db9b38..e64ec08909f5d 100644 --- a/Zend/tests/traits/language019.phpt +++ b/Zend/tests/traits/language019.phpt @@ -10,6 +10,9 @@ class C1 { T1::foo as final; } } +class C2 extends C1 { + public function foo() {} +} ?> --EXPECTF-- -Fatal error: Cannot use "final" as method modifier in trait alias in %s on line %d +Fatal error: Cannot override final method C1::foo() in %s on line %d diff --git a/Zend/tests/traits/language020.phpt b/Zend/tests/traits/language020.phpt new file mode 100644 index 0000000000000..bbfa5c82652c2 --- /dev/null +++ b/Zend/tests/traits/language020.phpt @@ -0,0 +1,21 @@ +--TEST-- +final alias - positive test variation +--FILE-- +foo(); +?> +--EXPECT-- +Done diff --git a/Zend/tests/trampoline_closure_named_arguments.phpt b/Zend/tests/trampoline_closure_named_arguments.phpt new file mode 100644 index 0000000000000..e4ccaf16e63a6 --- /dev/null +++ b/Zend/tests/trampoline_closure_named_arguments.phpt @@ -0,0 +1,147 @@ +--TEST-- +Trampoline closure created from magic method accepts named arguments +--FILE-- +test(1, 2, a: 123); +$test->test(...)(1, 2); +$test->test(...)(1, 2, a: 123, b: $test); +$test->test(...)(a: 123, b: $test); +$test->test(...)(); +$test->test(...)(...$array); + +echo "-- Static cases --\n"; +Test::testStatic(1, 2, a: 123); +Test::testStatic(...)(1, 2); +Test::testStatic(...)(1, 2, a: 123, b: $test); +Test::testStatic(...)(a: 123, b: $test); +Test::testStatic(...)(); +Test::testStatic(...)(...$array); + +echo "-- Reflection tests --\n"; +$reflectionFunction = new ReflectionFunction(Test::fail(...)); +var_dump($reflectionFunction->getParameters()); +$argument = $reflectionFunction->getParameters()[0]; +var_dump($argument->isVariadic()); +$type = $argument->getType(); +var_dump($type); +var_dump($type->getName()); + +?> +--EXPECT-- +-- Non-static cases -- +string(4) "test" +array(3) { + [0]=> + int(1) + [1]=> + int(2) + ["a"]=> + int(123) +} +string(4) "test" +array(2) { + [0]=> + int(1) + [1]=> + int(2) +} +string(4) "test" +array(4) { + [0]=> + int(1) + [1]=> + int(2) + ["a"]=> + int(123) + ["b"]=> + object(Test)#1 (0) { + } +} +string(4) "test" +array(2) { + ["a"]=> + int(123) + ["b"]=> + object(Test)#1 (0) { + } +} +string(4) "test" +array(0) { +} +string(4) "test" +array(1) { + [0]=> + string(8) "unpacked" +} +-- Static cases -- +string(10) "testStatic" +array(3) { + [0]=> + int(1) + [1]=> + int(2) + ["a"]=> + int(123) +} +string(10) "testStatic" +array(2) { + [0]=> + int(1) + [1]=> + int(2) +} +string(10) "testStatic" +array(4) { + [0]=> + int(1) + [1]=> + int(2) + ["a"]=> + int(123) + ["b"]=> + object(Test)#1 (0) { + } +} +string(10) "testStatic" +array(2) { + ["a"]=> + int(123) + ["b"]=> + object(Test)#1 (0) { + } +} +string(10) "testStatic" +array(0) { +} +string(10) "testStatic" +array(1) { + [0]=> + string(8) "unpacked" +} +-- Reflection tests -- +array(1) { + [0]=> + object(ReflectionParameter)#4 (1) { + ["name"]=> + string(9) "arguments" + } +} +bool(true) +object(ReflectionNamedType)#5 (0) { +} +string(5) "mixed" diff --git a/Zend/tests/type_declarations/typed_class_constants_ast_print.phpt b/Zend/tests/type_declarations/typed_class_constants_ast_print.phpt new file mode 100644 index 0000000000000..dd4957f2269c5 --- /dev/null +++ b/Zend/tests/type_declarations/typed_class_constants_ast_print.phpt @@ -0,0 +1,18 @@ +--TEST-- +AST printing support for typed constants +--FILE-- +getMessage(), "\n"; +} + +?> +--EXPECT-- +assert(false && new class { + public const int X = 1; +}) diff --git a/Zend/tests/type_declarations/typed_class_constants_multiple_constants.phpt b/Zend/tests/type_declarations/typed_class_constants_multiple_constants.phpt new file mode 100644 index 0000000000000..8d973b2e5d591 --- /dev/null +++ b/Zend/tests/type_declarations/typed_class_constants_multiple_constants.phpt @@ -0,0 +1,12 @@ +--TEST-- +Multiple typed constants in one declaration +--FILE-- + +--EXPECTF-- +Fatal error: Cannot use string as value for class constant Test::Y of type int in %s on line %d diff --git a/Zend/zend.c b/Zend/zend.c index bbddd4597042b..0e3cfb4381fad 100644 --- a/Zend/zend.c +++ b/Zend/zend.c @@ -1719,6 +1719,29 @@ ZEND_API ZEND_COLD void zend_throw_error(zend_class_entry *exception_ce, const c } /* }}} */ +/* type should be one of the BP_VAR_* constants, only special messages happen for isset/empty and unset */ +ZEND_API ZEND_COLD void zend_illegal_container_offset(const zend_string *container, const zval *offset, int type) +{ + switch (type) { + case BP_VAR_IS: + zend_type_error("Cannot access offset of type %s in isset or empty", + zend_zval_type_name(offset)); + return; + case BP_VAR_UNSET: + /* Consistent error for when trying to unset a string offset */ + if (zend_string_equals(container, ZSTR_KNOWN(ZEND_STR_STRING))) { + zend_throw_error(NULL, "Cannot unset string offsets"); + } else { + zend_type_error("Cannot unset offset of type %s on %s", zend_zval_type_name(offset), ZSTR_VAL(container)); + } + return; + default: + zend_type_error("Cannot access offset of type %s on %s", + zend_zval_type_name(offset), ZSTR_VAL(container)); + return; + } +} + ZEND_API ZEND_COLD void zend_type_error(const char *format, ...) /* {{{ */ { va_list va; diff --git a/Zend/zend.h b/Zend/zend.h index fd21cbfeb93cf..94440530f3b36 100644 --- a/Zend/zend.h +++ b/Zend/zend.h @@ -357,6 +357,8 @@ ZEND_API ZEND_COLD void zend_throw_error(zend_class_entry *exception_ce, const c ZEND_API ZEND_COLD void zend_type_error(const char *format, ...) ZEND_ATTRIBUTE_FORMAT(printf, 1, 2); ZEND_API ZEND_COLD void zend_argument_count_error(const char *format, ...) ZEND_ATTRIBUTE_FORMAT(printf, 1, 2); ZEND_API ZEND_COLD void zend_value_error(const char *format, ...) ZEND_ATTRIBUTE_FORMAT(printf, 1, 2); +/* type should be one of the BP_VAR_* constants, only special messages happen for isset/empty and unset */ +ZEND_API ZEND_COLD void zend_illegal_container_offset(const zend_string *container, const zval *offset, int type); ZEND_COLD void zenderror(const char *error); diff --git a/Zend/zend_API.c b/Zend/zend_API.c index e9058f3e43db9..897201c7dbde4 100644 --- a/Zend/zend_API.c +++ b/Zend/zend_API.c @@ -407,16 +407,6 @@ ZEND_API ZEND_COLD void ZEND_FASTCALL zend_argument_error_variadic(zend_class_en } /* }}} */ -ZEND_API ZEND_COLD void zend_illegal_array_offset(const zval *offset) -{ - zend_type_error("Cannot access offset of type %s on array", zend_get_type_by_const(Z_TYPE_P(offset))); -} - -ZEND_API ZEND_COLD void zend_illegal_empty_or_isset_offset(const zval *offset) -{ - zend_type_error("Cannot access offset of type %s in isset or empty", zend_get_type_by_const(Z_TYPE_P(offset))); -} - ZEND_API ZEND_COLD void zend_argument_error(zend_class_entry *error_ce, uint32_t arg_num, const char *format, ...) /* {{{ */ { va_list va; @@ -2112,7 +2102,7 @@ ZEND_API zend_result array_set_zval_key(HashTable *ht, zval *key, zval *value) / result = zend_hash_index_update(ht, zend_dval_to_lval_safe(Z_DVAL_P(key)), value); break; default: - zend_illegal_array_offset(key); + zend_illegal_container_offset(ZSTR_KNOWN(ZEND_STR_ARRAY), key, BP_VAR_W); result = NULL; } @@ -2678,15 +2668,15 @@ ZEND_API void zend_check_magic_method_implementation(const zend_class_entry *ce, zend_check_magic_method_public(ce, fptr, error_type); zend_check_magic_method_arg_type(0, ce, fptr, error_type, MAY_BE_ARRAY); zend_check_magic_method_return_type(ce, fptr, error_type, MAY_BE_OBJECT); - } else if (zend_string_equals_literal(lcname, "__invoke")) { + } else if (zend_string_equals(lcname, ZSTR_KNOWN(ZEND_STR_MAGIC_INVOKE))) { zend_check_magic_method_non_static(ce, fptr, error_type); zend_check_magic_method_public(ce, fptr, error_type); - } else if (zend_string_equals_literal(lcname, "__sleep")) { + } else if (zend_string_equals(lcname, ZSTR_KNOWN(ZEND_STR_SLEEP))) { zend_check_magic_method_args(0, ce, fptr, error_type); zend_check_magic_method_non_static(ce, fptr, error_type); zend_check_magic_method_public(ce, fptr, error_type); zend_check_magic_method_return_type(ce, fptr, error_type, MAY_BE_ARRAY); - } else if (zend_string_equals_literal(lcname, "__wakeup")) { + } else if (zend_string_equals(lcname, ZSTR_KNOWN(ZEND_STR_WAKEUP))) { zend_check_magic_method_args(0, ce, fptr, error_type); zend_check_magic_method_non_static(ce, fptr, error_type); zend_check_magic_method_public(ce, fptr, error_type); @@ -3539,7 +3529,7 @@ static bool zend_is_callable_check_class(zend_string *name, zend_class_entry *sc *strict_class = 1; ret = 1; } - } else if (zend_string_equals_literal(lcname, "static")) { + } else if (zend_string_equals(lcname, ZSTR_KNOWN(ZEND_STR_STATIC))) { zend_class_entry *called_scope = zend_get_called_scope(frame); if (!called_scope) { @@ -4570,7 +4560,7 @@ ZEND_API zend_class_constant *zend_declare_typed_class_constant(zend_class_entry } } - if (zend_string_equals_literal_ci(name, "class")) { + if (zend_string_equals_ci(name, ZSTR_KNOWN(ZEND_STR_CLASS))) { zend_error_noreturn(ce->type == ZEND_INTERNAL_CLASS ? E_CORE_ERROR : E_COMPILE_ERROR, "A class constant must not be called 'class'; it is reserved for class name fetching"); } diff --git a/Zend/zend_ast.c b/Zend/zend_ast.c index 6d5c5aaa44635..525d9dfe9a742 100644 --- a/Zend/zend_ast.c +++ b/Zend/zend_ast.c @@ -101,7 +101,7 @@ ZEND_API zend_ast * ZEND_FASTCALL zend_ast_create_constant(zend_string *name, ze ZEND_API zend_ast * ZEND_FASTCALL zend_ast_create_class_const_or_name(zend_ast *class_name, zend_ast *name) { zend_string *name_str = zend_ast_get_str(name); - if (zend_string_equals_literal_ci(name_str, "class")) { + if (zend_string_equals_ci(name_str, ZSTR_KNOWN(ZEND_STR_CLASS))) { zend_string_release(name_str); return zend_ast_create(ZEND_AST_CLASS_NAME, class_name); } else { @@ -1063,8 +1063,8 @@ static void* ZEND_FASTCALL zend_ast_tree_copy(zend_ast *ast, void *buf) new->kind = ZEND_AST_ZVAL; new->attr = ast->attr; ZVAL_COPY(&new->val, zend_ast_get_zval(ast)); + Z_LINENO(new->val) = zend_ast_get_lineno(ast); buf = (void*)((char*)buf + sizeof(zend_ast_zval)); - // Lineno gets copied with ZVAL_COPY } else if (ast->kind == ZEND_AST_CONSTANT) { zend_ast_zval *new = (zend_ast_zval*)buf; new->kind = ZEND_AST_CONSTANT; @@ -1924,6 +1924,10 @@ static ZEND_COLD void zend_ast_export_ex(smart_str *str, zend_ast *ast, int prio zend_ast_export_visibility(str, ast->attr); smart_str_appends(str, "const "); + if (ast->child[2]) { + zend_ast_export_type(str, ast->child[2], indent); + smart_str_appendc(str, ' '); + } ast = ast->child[0]; diff --git a/Zend/zend_ast.h b/Zend/zend_ast.h index 73e4fed7a997a..0bbb3a820c291 100644 --- a/Zend/zend_ast.h +++ b/Zend/zend_ast.h @@ -145,7 +145,6 @@ enum _zend_ast_kind { ZEND_AST_USE_ELEM, ZEND_AST_TRAIT_ALIAS, ZEND_AST_GROUP_USE, - ZEND_AST_CLASS_CONST_GROUP, ZEND_AST_ATTRIBUTE, ZEND_AST_MATCH, ZEND_AST_MATCH_ARM, @@ -161,6 +160,8 @@ enum _zend_ast_kind { ZEND_AST_CATCH, ZEND_AST_PROP_GROUP, ZEND_AST_PROP_ELEM, + ZEND_AST_CONST_ELEM, + ZEND_AST_CLASS_CONST_GROUP, // Pseudo node for initializing enums ZEND_AST_CONST_ENUM_INIT, @@ -169,7 +170,6 @@ enum _zend_ast_kind { ZEND_AST_FOR = 4 << ZEND_AST_NUM_CHILDREN_SHIFT, ZEND_AST_FOREACH, ZEND_AST_ENUM_CASE, - ZEND_AST_CONST_ELEM, /* 5 child nodes */ ZEND_AST_PARAM = 5 << ZEND_AST_NUM_CHILDREN_SHIFT, diff --git a/Zend/zend_attributes.c b/Zend/zend_attributes.c index 2dbcb47392e9a..83c3c928c56e8 100644 --- a/Zend/zend_attributes.c +++ b/Zend/zend_attributes.c @@ -114,7 +114,7 @@ ZEND_METHOD(SensitiveParameterValue, __construct) Z_PARAM_ZVAL(value) ZEND_PARSE_PARAMETERS_END(); - zend_update_property(zend_ce_sensitive_parameter_value, Z_OBJ_P(ZEND_THIS), "value", strlen("value"), value); + zend_update_property_ex(zend_ce_sensitive_parameter_value, Z_OBJ_P(ZEND_THIS), ZSTR_KNOWN(ZEND_STR_VALUE), value); } ZEND_METHOD(SensitiveParameterValue, getValue) diff --git a/Zend/zend_builtin_functions.c b/Zend/zend_builtin_functions.c index b8cd96c480282..acf69536d4593 100644 --- a/Zend/zend_builtin_functions.c +++ b/Zend/zend_builtin_functions.c @@ -138,7 +138,7 @@ ZEND_FUNCTION(gc_status) zend_gc_get_status(&status); - array_init_size(return_value, 3); + array_init_size(return_value, 8); add_assoc_bool_ex(return_value, "running", sizeof("running")-1, status.active); add_assoc_bool_ex(return_value, "protected", sizeof("protected")-1, status.gc_protected); @@ -1322,7 +1322,7 @@ ZEND_FUNCTION(get_defined_functions) } ZEND_HASH_FOREACH_END(); zend_hash_str_add_new(Z_ARRVAL_P(return_value), "internal", sizeof("internal")-1, &internal); - zend_hash_str_add_new(Z_ARRVAL_P(return_value), "user", sizeof("user")-1, &user); + zend_hash_add_new(Z_ARRVAL_P(return_value), ZSTR_KNOWN(ZEND_STR_USER), &user); } /* }}} */ diff --git a/Zend/zend_closures.c b/Zend/zend_closures.c index 4c326a6c79b12..2072eac72d712 100644 --- a/Zend/zend_closures.c +++ b/Zend/zend_closures.c @@ -294,7 +294,18 @@ static ZEND_NAMED_FUNCTION(zend_closure_call_magic) /* {{{ */ { fci.params = params; fci.param_count = 2; ZVAL_STR(&fci.params[0], EX(func)->common.function_name); - if (ZEND_NUM_ARGS()) { + if (EX_CALL_INFO() & ZEND_CALL_HAS_EXTRA_NAMED_PARAMS) { + zend_string *name; + zval *named_param_zval; + array_init_size(&fci.params[1], ZEND_NUM_ARGS() + zend_hash_num_elements(EX(extra_named_params))); + /* Avoid conversion from packed to mixed later. */ + zend_hash_real_init_mixed(Z_ARRVAL(fci.params[1])); + zend_copy_parameters_array(ZEND_NUM_ARGS(), &fci.params[1]); + ZEND_HASH_MAP_FOREACH_STR_KEY_VAL(EX(extra_named_params), name, named_param_zval) { + Z_TRY_ADDREF_P(named_param_zval); + zend_hash_add_new(Z_ARRVAL(fci.params[1]), name, named_param_zval); + } ZEND_HASH_FOREACH_END(); + } else if (ZEND_NUM_ARGS()) { array_init_size(&fci.params[1], ZEND_NUM_ARGS()); zend_copy_parameters_array(ZEND_NUM_ARGS(), &fci.params[1]); } else { @@ -324,7 +335,7 @@ static zend_result zend_create_closure_from_callable(zval *return_value, zval *c if (mptr->common.fn_flags & ZEND_ACC_CALL_VIA_TRAMPOLINE) { /* For Closure::fromCallable([$closure, "__invoke"]) return $closure. */ if (fcc.object && fcc.object->ce == zend_ce_closure - && zend_string_equals_literal(mptr->common.function_name, "__invoke")) { + && zend_string_equals(mptr->common.function_name, ZSTR_KNOWN(ZEND_STR_MAGIC_INVOKE))) { RETVAL_OBJ_COPY(fcc.object); zend_free_trampoline(mptr); return SUCCESS; @@ -597,14 +608,10 @@ static HashTable *zend_closure_get_debug_info(zend_object *object, int *is_temp) ZEND_HASH_MAP_FOREACH_STR_KEY_VAL(static_variables, key, var) { zval copy; - if (Z_TYPE_P(var) == IS_CONSTANT_AST) { - ZVAL_STRING(©, ""); - } else { - if (Z_ISREF_P(var) && Z_REFCOUNT_P(var) == 1) { - var = Z_REFVAL_P(var); - } - ZVAL_COPY(©, var); + if (Z_ISREF_P(var) && Z_REFCOUNT_P(var) == 1) { + var = Z_REFVAL_P(var); } + ZVAL_COPY(©, var); zend_hash_add_new(Z_ARRVAL(val), key, ©); } ZEND_HASH_FOREACH_END(); @@ -826,6 +833,9 @@ ZEND_API void zend_create_fake_closure(zval *res, zend_function *func, zend_clas } /* }}} */ +/* __call and __callStatic name the arguments "$arguments" in the docs. */ +static zend_internal_arg_info trampoline_arg_info[] = {ZEND_ARG_VARIADIC_TYPE_INFO(false, arguments, IS_MIXED, false)}; + void zend_closure_from_frame(zval *return_value, zend_execute_data *call) { /* {{{ */ zval instance; zend_internal_function trampoline; @@ -838,17 +848,20 @@ void zend_closure_from_frame(zval *return_value, zend_execute_data *call) { /* { if (mptr->common.fn_flags & ZEND_ACC_CALL_VIA_TRAMPOLINE) { if ((ZEND_CALL_INFO(call) & ZEND_CALL_HAS_THIS) && (Z_OBJCE(call->This) == zend_ce_closure) - && zend_string_equals_literal(mptr->common.function_name, "__invoke")) { + && zend_string_equals(mptr->common.function_name, ZSTR_KNOWN(ZEND_STR_MAGIC_INVOKE))) { zend_free_trampoline(mptr); RETURN_OBJ_COPY(Z_OBJ(call->This)); } memset(&trampoline, 0, sizeof(zend_internal_function)); trampoline.type = ZEND_INTERNAL_FUNCTION; - trampoline.fn_flags = mptr->common.fn_flags & ZEND_ACC_STATIC; + trampoline.fn_flags = mptr->common.fn_flags & (ZEND_ACC_STATIC | ZEND_ACC_VARIADIC); trampoline.handler = zend_closure_call_magic; trampoline.function_name = mptr->common.function_name; trampoline.scope = mptr->common.scope; + if (trampoline.fn_flags & ZEND_ACC_VARIADIC) { + trampoline.arg_info = trampoline_arg_info; + } zend_free_trampoline(mptr); mptr = (zend_function *) &trampoline; diff --git a/Zend/zend_compile.c b/Zend/zend_compile.c index 084c47f45bc47..0b54823d2026c 100644 --- a/Zend/zend_compile.c +++ b/Zend/zend_compile.c @@ -1429,7 +1429,7 @@ ZEND_API zend_string *zend_type_to_string(zend_type type) { } static bool is_generator_compatible_class_type(zend_string *name) { - return zend_string_equals_literal_ci(name, "Traversable") + return zend_string_equals_ci(name, ZSTR_KNOWN(ZEND_STR_TRAVERSABLE)) || zend_string_equals_literal_ci(name, "Iterator") || zend_string_equals_literal_ci(name, "Generator"); } @@ -1617,7 +1617,7 @@ uint32_t zend_get_class_fetch_type(const zend_string *name) /* {{{ */ return ZEND_FETCH_CLASS_SELF; } else if (zend_string_equals_literal_ci(name, "parent")) { return ZEND_FETCH_CLASS_PARENT; - } else if (zend_string_equals_literal_ci(name, "static")) { + } else if (zend_string_equals_ci(name, ZSTR_KNOWN(ZEND_STR_STATIC))) { return ZEND_FETCH_CLASS_STATIC; } else { return ZEND_FETCH_CLASS_DEFAULT; @@ -2293,6 +2293,7 @@ static inline void zend_update_jump_target(uint32_t opnum_jump, uint32_t opnum_t case ZEND_JMP_SET: case ZEND_COALESCE: case ZEND_JMP_NULL: + case ZEND_BIND_INIT_STATIC_OR_JMP: opline->op2.opline_num = opnum_target; break; EMPTY_SWITCH_DEFAULT_CASE() @@ -2820,7 +2821,7 @@ static bool is_this_fetch(zend_ast *ast) /* {{{ */ { if (ast->kind == ZEND_AST_VAR && ast->child[0]->kind == ZEND_AST_ZVAL) { zval *name = zend_ast_get_zval(ast->child[0]); - return Z_TYPE_P(name) == IS_STRING && zend_string_equals_literal(Z_STR_P(name), "this"); + return Z_TYPE_P(name) == IS_STRING && zend_string_equals(Z_STR_P(name), ZSTR_KNOWN(ZEND_STR_THIS)); } return 0; @@ -4521,7 +4522,7 @@ static zend_result zend_try_compile_special_func(znode *result, zend_string *lcn return zend_compile_func_cuf(result, args, lcname); } else if (zend_string_equals_literal(lcname, "in_array")) { return zend_compile_func_in_array(result, args); - } else if (zend_string_equals_literal(lcname, "count") + } else if (zend_string_equals(lcname, ZSTR_KNOWN(ZEND_STR_COUNT)) || zend_string_equals_literal(lcname, "sizeof")) { return zend_compile_func_count(result, args, lcname); } else if (zend_string_equals_literal(lcname, "get_class")) { @@ -4871,7 +4872,7 @@ static void zend_compile_static_var_common(zend_string *var_name, zval *value, u value = zend_hash_update(CG(active_op_array)->static_variables, var_name, value); - if (zend_string_equals_literal(var_name, "this")) { + if (zend_string_equals(var_name, ZSTR_KNOWN(ZEND_STR_THIS))) { zend_error_noreturn(E_COMPILE_ERROR, "Cannot use $this as static variable"); } @@ -4885,16 +4886,55 @@ static void zend_compile_static_var_common(zend_string *var_name, zval *value, u static void zend_compile_static_var(zend_ast *ast) /* {{{ */ { zend_ast *var_ast = ast->child[0]; - zend_ast **value_ast_ptr = &ast->child[1]; - zval value_zv; + zend_string *var_name = zend_ast_get_str(var_ast); - if (*value_ast_ptr) { - zend_const_expr_to_zval(&value_zv, value_ast_ptr, /* allow_dynamic */ true); - } else { - ZVAL_NULL(&value_zv); + if (zend_string_equals(var_name, ZSTR_KNOWN(ZEND_STR_THIS))) { + zend_error_noreturn(E_COMPILE_ERROR, "Cannot use $this as static variable"); } - zend_compile_static_var_common(zend_ast_get_str(var_ast), &value_zv, ZEND_BIND_REF); + if (!CG(active_op_array)->static_variables) { + if (CG(active_op_array)->scope) { + CG(active_op_array)->scope->ce_flags |= ZEND_HAS_STATIC_IN_METHODS; + } + CG(active_op_array)->static_variables = zend_new_array(8); + } + + if (zend_hash_exists(CG(active_op_array)->static_variables, var_name)) { + zend_error_noreturn(E_COMPILE_ERROR, "Duplicate declaration of static variable $%s", ZSTR_VAL(var_name)); + } + + zend_eval_const_expr(&ast->child[1]); + zend_ast *value_ast = ast->child[1]; + + if (!value_ast || value_ast->kind == ZEND_AST_ZVAL) { + zval *value_zv = value_ast + ? zend_ast_get_zval(value_ast) + : &EG(uninitialized_zval); + Z_TRY_ADDREF_P(value_zv); + zend_compile_static_var_common(var_name, value_zv, ZEND_BIND_REF); + } else { + zend_op *opline; + + zval *placeholder_ptr = zend_hash_update(CG(active_op_array)->static_variables, var_name, &EG(uninitialized_zval)); + Z_TYPE_EXTRA_P(placeholder_ptr) |= IS_STATIC_VAR_UNINITIALIZED; + uint32_t placeholder_offset = (uint32_t)((char*)placeholder_ptr - (char*)CG(active_op_array)->static_variables->arData); + + uint32_t static_def_jmp_opnum = get_next_op_number(); + opline = zend_emit_op(NULL, ZEND_BIND_INIT_STATIC_OR_JMP, NULL, NULL); + opline->op1_type = IS_CV; + opline->op1.var = lookup_cv(var_name); + opline->extended_value = placeholder_offset; + + znode expr; + zend_compile_expr(&expr, value_ast); + + opline = zend_emit_op(NULL, ZEND_BIND_STATIC, NULL, &expr); + opline->op1_type = IS_CV; + opline->op1.var = lookup_cv(var_name); + opline->extended_value = placeholder_offset | ZEND_BIND_REF; + + zend_update_jump_target_to_next(static_def_jmp_opnum); + } } /* }}} */ @@ -6049,7 +6089,7 @@ static void zend_compile_try(zend_ast *ast) /* {{{ */ zend_resolve_class_name_ast(class_ast)); opline->extended_value = zend_alloc_cache_slot(); - if (var_name && zend_string_equals_literal(var_name, "this")) { + if (var_name && zend_string_equals(var_name, ZSTR_KNOWN(ZEND_STR_THIS))) { zend_error_noreturn(E_COMPILE_ERROR, "Cannot re-assign $this"); } @@ -6885,7 +6925,7 @@ static void zend_compile_params(zend_ast *ast, zend_ast *return_type_ast, uint32 if (EX_VAR_TO_NUM(var_node.u.op.var) != i) { zend_error_noreturn(E_COMPILE_ERROR, "Redefinition of parameter $%s", ZSTR_VAL(name)); - } else if (zend_string_equals_literal(name, "this")) { + } else if (zend_string_equals(name, ZSTR_KNOWN(ZEND_STR_THIS))) { zend_error_noreturn(E_COMPILE_ERROR, "Cannot use $this as parameter"); } @@ -7112,7 +7152,7 @@ static void zend_compile_closure_binding(znode *closure, zend_op_array *op_array zend_op *opline; zval *value; - if (zend_string_equals_literal(var_name, "this")) { + if (zend_string_equals(var_name, ZSTR_KNOWN(ZEND_STR_THIS))) { zend_error_noreturn(E_COMPILE_ERROR, "Cannot use $this as lexical variable"); } @@ -7156,7 +7196,7 @@ static void find_implicit_binds_recursively(closure_info *info, zend_ast *ast) { return; } - if (zend_string_equals_literal(name, "this")) { + if (zend_string_equals(name, ZSTR_KNOWN(ZEND_STR_THIS))) { /* $this does not need to be explicitly imported. */ return; } @@ -7687,13 +7727,11 @@ static void zend_check_trait_alias_modifiers(uint32_t attr) /* {{{ */ zend_error_noreturn(E_COMPILE_ERROR, "Cannot use \"static\" as method modifier in trait alias"); } else if (attr & ZEND_ACC_ABSTRACT) { zend_error_noreturn(E_COMPILE_ERROR, "Cannot use \"abstract\" as method modifier in trait alias"); - } else if (attr & ZEND_ACC_FINAL) { - zend_error_noreturn(E_COMPILE_ERROR, "Cannot use \"final\" as method modifier in trait alias"); } } /* }}} */ -static void zend_compile_class_const_decl(zend_ast *ast, uint32_t flags, zend_ast *attr_ast) +static void zend_compile_class_const_decl(zend_ast *ast, uint32_t flags, zend_ast *attr_ast, zend_ast *type_ast) { zend_ast_list *list = zend_ast_get_list(ast); zend_class_entry *ce = CG(active_class_entry); @@ -7705,7 +7743,6 @@ static void zend_compile_class_const_decl(zend_ast *ast, uint32_t flags, zend_as zend_ast *name_ast = const_ast->child[0]; zend_ast **value_ast_ptr = &const_ast->child[1]; zend_ast *doc_comment_ast = const_ast->child[2]; - zend_ast *type_ast = const_ast->child[3]; zend_string *name = zval_make_interned_string(zend_ast_get_zval(name_ast)); zend_string *doc_comment = doc_comment_ast ? zend_string_copy(zend_ast_get_str(doc_comment_ast)) : NULL; zval value_zv; @@ -7752,8 +7789,9 @@ static void zend_compile_class_const_group(zend_ast *ast) /* {{{ */ { zend_ast *const_ast = ast->child[0]; zend_ast *attr_ast = ast->child[1]; + zend_ast *type_ast = ast->child[2]; - zend_compile_class_const_decl(const_ast, ast->attr, attr_ast); + zend_compile_class_const_decl(const_ast, ast->attr, attr_ast, type_ast); } /* }}} */ @@ -8057,8 +8095,11 @@ static void zend_compile_class_decl(znode *result, zend_ast *ast, bool toplevel) ce->ce_flags |= ZEND_ACC_LINKED; zend_observer_class_linked_notify(ce, lcname); return; + } else { + goto link_unbound; } } else if (!extends_ast) { +link_unbound: /* Link unbound simple class */ zend_build_properties_info_table(ce); ce->ce_flags |= ZEND_ACC_LINKED; @@ -8098,11 +8139,17 @@ static void zend_compile_class_decl(znode *result, zend_ast *ast, bool toplevel) zend_add_literal_string(&key); opline->opcode = ZEND_DECLARE_CLASS; - if (extends_ast && toplevel + if (toplevel && (CG(compiler_options) & ZEND_COMPILE_DELAYED_BINDING) /* We currently don't early-bind classes that implement interfaces or use traits */ && !ce->num_interfaces && !ce->num_traits ) { + if (!extends_ast) { + /* Use empty string for classes without parents to avoid new handler, and special + * handling of zend_early_binding. */ + opline->op2_type = IS_CONST; + LITERAL_STR(opline->op2, ZSTR_EMPTY_ALLOC()); + } CG(active_op_array)->fn_flags |= ZEND_ACC_EARLY_BINDING; opline->opcode = ZEND_DECLARE_CLASS_DELAYED; opline->extended_value = zend_alloc_cache_slot(); diff --git a/Zend/zend_constants.c b/Zend/zend_constants.c index 854f9c2116ee2..edc9fc0b82d44 100644 --- a/Zend/zend_constants.c +++ b/Zend/zend_constants.c @@ -314,7 +314,7 @@ ZEND_API zval *zend_get_class_constant_ex(zend_string *class_name, zend_string * } else { ce = scope->parent; } - } else if (zend_string_equals_literal_ci(class_name, "static")) { + } else if (zend_string_equals_ci(class_name, ZSTR_KNOWN(ZEND_STR_STATIC))) { ce = zend_get_called_scope(EG(current_execute_data)); if (UNEXPECTED(!ce)) { zend_throw_error(NULL, "Cannot access \"static\" when no class scope is active"); @@ -419,7 +419,7 @@ ZEND_API zval *zend_get_constant_ex(zend_string *cname, zend_class_entry *scope, } else { ce = scope->parent; } - } else if (zend_string_equals_literal_ci(class_name, "static")) { + } else if (zend_string_equals_ci(class_name, ZSTR_KNOWN(ZEND_STR_STATIC))) { ce = zend_get_called_scope(EG(current_execute_data)); if (UNEXPECTED(!ce)) { zend_throw_error(NULL, "Cannot access \"static\" when no class scope is active"); diff --git a/Zend/zend_cpuinfo.h b/Zend/zend_cpuinfo.h index 31e7c54e0b6f0..9d221c59e541a 100644 --- a/Zend/zend_cpuinfo.h +++ b/Zend/zend_cpuinfo.h @@ -258,4 +258,15 @@ static inline int zend_cpu_supports_pclmul(void) { } #endif +/* __builtin_cpu_supports has cldemote from gcc11 */ +#if PHP_HAVE_BUILTIN_CPU_SUPPORTS && defined(__GNUC__) && (ZEND_GCC_VERSION >= 11000) +ZEND_NO_SANITIZE_ADDRESS +static inline int zend_cpu_supports_cldemote(void) { +#if PHP_HAVE_BUILTIN_CPU_INIT + __builtin_cpu_init(); +#endif + return __builtin_cpu_supports("cldemote"); +} +#endif + #endif diff --git a/Zend/zend_enum.c b/Zend/zend_enum.c index 21628f74956bb..2e85bb08a7245 100644 --- a/Zend/zend_enum.c +++ b/Zend/zend_enum.c @@ -62,12 +62,12 @@ static void zend_verify_enum_properties(zend_class_entry *ce) zend_property_info *property_info; ZEND_HASH_MAP_FOREACH_PTR(&ce->properties_info, property_info) { - if (zend_string_equals_literal(property_info->name, "name")) { + if (zend_string_equals(property_info->name, ZSTR_KNOWN(ZEND_STR_NAME))) { continue; } if ( ce->enum_backing_type != IS_UNDEF - && zend_string_equals_literal(property_info->name, "value") + && zend_string_equals(property_info->name, ZSTR_KNOWN(ZEND_STR_VALUE)) ) { continue; } @@ -597,6 +597,7 @@ ZEND_API void zend_enum_add_case_cstr(zend_class_entry *ce, const char *name, zv ZEND_API zend_object *zend_enum_get_case(zend_class_entry *ce, zend_string *name) { zend_class_constant *c = zend_hash_find_ptr(CE_CONSTANTS_TABLE(ce), name); + ZEND_ASSERT(c && "Must be a valid enum case"); ZEND_ASSERT(ZEND_CLASS_CONST_FLAGS(c) & ZEND_CLASS_CONST_IS_CASE); if (Z_TYPE(c->value) == IS_CONSTANT_AST) { diff --git a/Zend/zend_execute.c b/Zend/zend_execute.c index 42c9fcdc0fc1c..29631504d5f90 100644 --- a/Zend/zend_execute.c +++ b/Zend/zend_execute.c @@ -1506,24 +1506,24 @@ static zend_never_inline ZEND_COLD void ZEND_FASTCALL zend_use_object_as_array(v zend_throw_error(NULL, "Cannot use object as array"); } -static zend_never_inline ZEND_COLD void ZEND_FASTCALL zend_illegal_unset_offset(const zval *offset) +static zend_never_inline ZEND_COLD void ZEND_FASTCALL zend_illegal_array_offset_access(const zval *offset) { - zend_type_error("Cannot access offset of type %s in unset", zend_get_type_by_const(Z_TYPE_P(offset))); + zend_illegal_container_offset(ZSTR_KNOWN(ZEND_STR_ARRAY), offset, BP_VAR_RW); } -static zend_never_inline ZEND_COLD void ZEND_FASTCALL zend_illegal_array_offset(const zval *offset) +static zend_never_inline ZEND_COLD void ZEND_FASTCALL zend_illegal_array_offset_isset(const zval *offset) { - zend_type_error("Cannot access offset of type %s on array", zend_get_type_by_const(Z_TYPE_P(offset))); + zend_illegal_container_offset(ZSTR_KNOWN(ZEND_STR_ARRAY), offset, BP_VAR_IS); } -static zend_never_inline ZEND_COLD void ZEND_FASTCALL zend_illegal_empty_or_isset_offset(const zval *offset) +static zend_never_inline ZEND_COLD void ZEND_FASTCALL zend_illegal_array_offset_unset(const zval *offset) { - zend_type_error("Cannot access offset of type %s in isset or empty", zend_get_type_by_const(Z_TYPE_P(offset))); + zend_illegal_container_offset(ZSTR_KNOWN(ZEND_STR_ARRAY), offset, BP_VAR_UNSET); } -static zend_never_inline ZEND_COLD void ZEND_FASTCALL zend_illegal_string_offset(const zval *offset) +static zend_never_inline ZEND_COLD void ZEND_FASTCALL zend_illegal_string_offset(const zval *offset, int type) { - zend_type_error("Cannot access offset of type %s on string", zend_zval_type_name(offset)); + zend_illegal_container_offset(ZSTR_KNOWN(ZEND_STR_STRING), offset, type); } static zend_never_inline void zend_assign_to_object_dim(zend_object *obj, zval *dim, zval *value OPLINE_DC EXECUTE_DATA_DC) @@ -1651,7 +1651,7 @@ static zend_never_inline zend_long zend_check_string_offset(zval *dim, int type } return offset; } - zend_illegal_string_offset(dim); + zend_illegal_string_offset(dim, type); return 0; } case IS_UNDEF: @@ -1667,7 +1667,7 @@ static zend_never_inline zend_long zend_check_string_offset(zval *dim, int type dim = Z_REFVAL_P(dim); goto try_again; default: - zend_illegal_string_offset(dim); + zend_illegal_string_offset(dim, type); return 0; } @@ -2288,7 +2288,7 @@ static zend_never_inline ZEND_COLD void ZEND_FASTCALL zend_use_scalar_as_array(v zend_throw_error(NULL, "Cannot use a scalar value as an array"); } -static zend_never_inline ZEND_COLD void ZEND_FASTCALL zend_cannot_add_element(void) +ZEND_API zend_never_inline ZEND_COLD void ZEND_FASTCALL zend_cannot_add_element(void) { zend_throw_error(NULL, "Cannot add element to the array as the next element is already occupied"); } @@ -2390,7 +2390,7 @@ static zend_never_inline uint8_t slow_index_convert(HashTable *ht, const zval *d value->lval = 1; return IS_LONG; default: - zend_illegal_array_offset(dim); + zend_illegal_array_offset_access(dim); return IS_NULL; } } @@ -2464,7 +2464,7 @@ static zend_never_inline uint8_t slow_index_convert_w(HashTable *ht, const zval value->lval = 1; return IS_LONG; default: - zend_illegal_array_offset(dim); + zend_illegal_array_offset_access(dim); return IS_NULL; } } @@ -2762,7 +2762,7 @@ static zend_always_inline void zend_fetch_dimension_address_read(zval *result, z ZVAL_NULL(result); return; } - zend_illegal_string_offset(dim); + zend_illegal_string_offset(dim, BP_VAR_R); ZVAL_NULL(result); return; } @@ -2801,7 +2801,7 @@ static zend_always_inline void zend_fetch_dimension_address_read(zval *result, z dim = Z_REFVAL_P(dim); goto try_string_offset; default: - zend_illegal_string_offset(dim); + zend_illegal_string_offset(dim, BP_VAR_R); ZVAL_NULL(result); return; } @@ -2923,7 +2923,7 @@ static zend_never_inline zval* ZEND_FASTCALL zend_find_array_dim_slow(HashTable ZVAL_UNDEFINED_OP2(); goto str_idx; } else { - zend_illegal_empty_or_isset_offset(offset); + zend_illegal_array_offset_isset(offset); return NULL; } } @@ -3046,7 +3046,7 @@ static zend_never_inline bool ZEND_FASTCALL zend_array_key_exists_fast(HashTable str = ZSTR_EMPTY_ALLOC(); goto str_key; } else { - zend_illegal_array_offset(key); + zend_illegal_array_offset_access(key); return 0; } } diff --git a/Zend/zend_execute.h b/Zend/zend_execute.h index dab902c383cb3..f2cb6764788b3 100644 --- a/Zend/zend_execute.h +++ b/Zend/zend_execute.h @@ -88,6 +88,8 @@ ZEND_API ZEND_COLD void ZEND_FASTCALL zend_invalid_class_constant_type_error(uin ZEND_API ZEND_COLD void ZEND_FASTCALL zend_object_released_while_assigning_to_property_error(const zend_property_info *info); +ZEND_API ZEND_COLD void ZEND_FASTCALL zend_cannot_add_element(void); + ZEND_API bool zend_verify_scalar_type_hint(uint32_t type_mask, zval *arg, bool strict, bool is_internal_arg); ZEND_API ZEND_COLD void zend_verify_arg_error( const zend_function *zf, const zend_arg_info *arg_info, uint32_t arg_num, zval *value); diff --git a/Zend/zend_fibers.c b/Zend/zend_fibers.c index 99b044adbd550..a43ed60dcd9b8 100644 --- a/Zend/zend_fibers.c +++ b/Zend/zend_fibers.c @@ -751,7 +751,7 @@ static HashTable *zend_fiber_object_gc(zend_object *object, zval **table, int *n HashTable *lastSymTable = NULL; zend_execute_data *ex = fiber->execute_data; for (; ex; ex = ex->prev_execute_data) { - HashTable *symTable = zend_unfinished_execution_gc_ex(ex, ex->call, buf, false); + HashTable *symTable = zend_unfinished_execution_gc_ex(ex, ex->func && ZEND_USER_CODE(ex->func->type) ? ex->call : NULL, buf, false); if (symTable) { if (lastSymTable) { zval *val; diff --git a/Zend/zend_gdb.c b/Zend/zend_gdb.c index 02afb6bc6f7bc..82e8182ba822a 100644 --- a/Zend/zend_gdb.c +++ b/Zend/zend_gdb.c @@ -113,7 +113,7 @@ ZEND_API bool zend_gdb_present(void) #if defined(__linux__) /* netbsd while having this procfs part, does not hold the tracer pid */ int fd = open("/proc/self/status", O_RDONLY); - if (fd > 0) { + if (fd >= 0) { char buf[1024]; ssize_t n = read(fd, buf, sizeof(buf) - 1); char *s; diff --git a/Zend/zend_hash.c b/Zend/zend_hash.c index 8a27bd20f9494..a8571af75c906 100644 --- a/Zend/zend_hash.c +++ b/Zend/zend_hash.c @@ -250,7 +250,7 @@ ZEND_API const HashTable zend_empty_array = { .gc.u.type_info = IS_ARRAY | (GC_IMMUTABLE << GC_FLAGS_SHIFT), .u.flags = HASH_FLAG_UNINITIALIZED, .nTableMask = HT_MIN_MASK, - .arData = (Bucket*)&uninitialized_bucket[2], + {.arData = (Bucket*)&uninitialized_bucket[2]}, .nNumUsed = 0, .nNumOfElements = 0, .nTableSize = HT_MIN_SIZE, @@ -1312,7 +1312,7 @@ ZEND_API void ZEND_FASTCALL zend_hash_rehash(HashTable *ht) } } } else { - uint32_t iter_pos = zend_hash_iterators_lower_pos(ht, 0); + uint32_t iter_pos = zend_hash_iterators_lower_pos(ht, i + 1); while (++i < ht->nNumUsed) { p++; diff --git a/Zend/zend_inheritance.c b/Zend/zend_inheritance.c index ff3a4d7080751..17bbae8335445 100644 --- a/Zend/zend_inheritance.c +++ b/Zend/zend_inheritance.c @@ -3276,8 +3276,17 @@ ZEND_API zend_class_entry *zend_try_early_bind(zend_class_entry *ce, zend_class_ inheritance_status status; zend_class_entry *proto = NULL; zend_class_entry *orig_linking_class; - uint32_t is_cacheable = ce->ce_flags & ZEND_ACC_IMMUTABLE; + if (ce->ce_flags & ZEND_ACC_LINKED) { + ZEND_ASSERT(ce->parent == NULL); + if (UNEXPECTED(!register_early_bound_ce(delayed_early_binding, lcname, ce))) { + return NULL; + } + zend_observer_class_linked_notify(ce, lcname); + return ce; + } + + uint32_t is_cacheable = ce->ce_flags & ZEND_ACC_IMMUTABLE; UPDATE_IS_CACHEABLE(parent_ce); if (is_cacheable) { if (zend_inheritance_cache_get && zend_inheritance_cache_add) { diff --git a/Zend/zend_interfaces.c b/Zend/zend_interfaces.c index 5d2f7d0ffc4a9..14593264a4803 100644 --- a/Zend/zend_interfaces.c +++ b/Zend/zend_interfaces.c @@ -344,8 +344,8 @@ static int zend_implement_iterator(zend_class_entry *interface, zend_class_entry &class_type->function_table, "rewind", sizeof("rewind") - 1); funcs_ptr->zf_valid = zend_hash_str_find_ptr( &class_type->function_table, "valid", sizeof("valid") - 1); - funcs_ptr->zf_key = zend_hash_str_find_ptr( - &class_type->function_table, "key", sizeof("key") - 1); + funcs_ptr->zf_key = zend_hash_find_ptr( + &class_type->function_table, ZSTR_KNOWN(ZEND_STR_KEY)); funcs_ptr->zf_current = zend_hash_str_find_ptr( &class_type->function_table, "current", sizeof("current") - 1); funcs_ptr->zf_next = zend_hash_str_find_ptr( diff --git a/Zend/zend_language_parser.y b/Zend/zend_language_parser.y index 9b663887264f0..298eaf95ad055 100644 --- a/Zend/zend_language_parser.y +++ b/Zend/zend_language_parser.y @@ -267,7 +267,7 @@ static YYSIZE_T zend_yytnamerr(char*, const char*); %type echo_expr_list unset_variables catch_name_list catch_list optional_variable parameter_list class_statement_list %type implements_list case_list if_stmt_without_else %type non_empty_parameter_list argument_list non_empty_argument_list property_list -%type class_const_list first_class_const_decl class_const_decl class_name_list trait_adaptations method_body non_empty_for_exprs +%type class_const_list class_const_decl class_name_list trait_adaptations method_body non_empty_for_exprs %type ctor_arguments alt_if_stmt_without_else trait_adaptation_list lexical_vars %type lexical_var_list encaps_list %type array_pair non_empty_array_pair_list array_pair_list possible_array_pair @@ -940,7 +940,10 @@ attributed_class_statement: { $$ = zend_ast_create(ZEND_AST_PROP_GROUP, $2, $3, NULL); $$->attr = $1; } | class_const_modifiers T_CONST class_const_list ';' - { $$ = zend_ast_create(ZEND_AST_CLASS_CONST_GROUP, $3, NULL); + { $$ = zend_ast_create(ZEND_AST_CLASS_CONST_GROUP, $3, NULL, NULL); + $$->attr = $1; } + | class_const_modifiers T_CONST type_expr class_const_list ';' + { $$ = zend_ast_create(ZEND_AST_CLASS_CONST_GROUP, $4, NULL, $3); $$->attr = $1; } | method_modifiers function returns_ref identifier backup_doc_comment '(' parameter_list ')' return_type backup_fn_flags method_body backup_fn_flags @@ -1076,21 +1079,20 @@ property: class_const_list: class_const_list ',' class_const_decl { $$ = zend_ast_list_add($1, $3); } - | first_class_const_decl { $$ = zend_ast_create_list(1, ZEND_AST_CLASS_CONST_DECL, $1); } -; - -first_class_const_decl: - T_STRING '=' expr backup_doc_comment { $$ = zend_ast_create(ZEND_AST_CONST_ELEM, $1, $3, ($4 ? zend_ast_create_zval_from_str($4) : NULL), NULL); } - | semi_reserved '=' expr backup_doc_comment { zval zv; if (zend_lex_tstring(&zv, $1) == FAILURE) { YYABORT; } $$ = zend_ast_create(ZEND_AST_CONST_ELEM, zend_ast_create_zval(&zv), $3, ($4 ? zend_ast_create_zval_from_str($4) : NULL), NULL); } - | type_expr identifier '=' expr backup_doc_comment { $$ = zend_ast_create(ZEND_AST_CONST_ELEM, $2, $4, ($5 ? zend_ast_create_zval_from_str($5) : NULL), $1); } + | class_const_decl { $$ = zend_ast_create_list(1, ZEND_AST_CLASS_CONST_DECL, $1); } ; class_const_decl: - identifier '=' expr backup_doc_comment { $$ = zend_ast_create(ZEND_AST_CONST_ELEM, $1, $3, ($4 ? zend_ast_create_zval_from_str($4) : NULL), NULL); } + T_STRING '=' expr backup_doc_comment { $$ = zend_ast_create(ZEND_AST_CONST_ELEM, $1, $3, ($4 ? zend_ast_create_zval_from_str($4) : NULL)); } + | semi_reserved '=' expr backup_doc_comment { + zval zv; + if (zend_lex_tstring(&zv, $1) == FAILURE) { YYABORT; } + $$ = zend_ast_create(ZEND_AST_CONST_ELEM, zend_ast_create_zval(&zv), $3, ($4 ? zend_ast_create_zval_from_str($4) : NULL)); + } ; const_decl: - T_STRING '=' expr backup_doc_comment { $$ = zend_ast_create(ZEND_AST_CONST_ELEM, $1, $3, ($4 ? zend_ast_create_zval_from_str($4) : NULL), NULL); } + T_STRING '=' expr backup_doc_comment { $$ = zend_ast_create(ZEND_AST_CONST_ELEM, $1, $3, ($4 ? zend_ast_create_zval_from_str($4) : NULL)); } ; echo_expr_list: diff --git a/Zend/zend_observer.c b/Zend/zend_observer.c index 79929bfdd80e5..2cb4db914758a 100644 --- a/Zend/zend_observer.c +++ b/Zend/zend_observer.c @@ -158,9 +158,8 @@ static bool zend_observer_remove_handler(void **first_handler, void *old_handler } else { if (cur_handler != last_handler) { memmove(cur_handler, cur_handler + 1, sizeof(cur_handler) * (last_handler - cur_handler)); - } else { - *last_handler = NULL; } + *last_handler = NULL; } return true; } @@ -196,7 +195,7 @@ ZEND_API void zend_observer_add_end_handler(zend_function *function, zend_observ if (*end_handler != ZEND_OBSERVER_NOT_OBSERVED) { // there's no space for new handlers, then it's forbidden to call this function ZEND_ASSERT(end_handler[registered_observers - 1] == NULL); - memmove(end_handler + 1, end_handler, registered_observers - 1); + memmove(end_handler + 1, end_handler, sizeof(end_handler) * (registered_observers - 1)); } *end_handler = end; } diff --git a/Zend/zend_opcode.c b/Zend/zend_opcode.c index 5db656b46c268..dc968bc395303 100644 --- a/Zend/zend_opcode.c +++ b/Zend/zend_opcode.c @@ -1122,6 +1122,7 @@ ZEND_API void pass_two(zend_op_array *op_array) case ZEND_FE_RESET_R: case ZEND_FE_RESET_RW: case ZEND_JMP_NULL: + case ZEND_BIND_INIT_STATIC_OR_JMP: ZEND_PASS_TWO_UPDATE_JMP_TARGET(op_array, opline, opline->op2); break; case ZEND_ASSERT_CHECK: diff --git a/Zend/zend_operators.c b/Zend/zend_operators.c index a9932a6b592b6..0b7902d4e36c6 100644 --- a/Zend/zend_operators.c +++ b/Zend/zend_operators.c @@ -1940,108 +1940,149 @@ ZEND_API zend_result ZEND_FASTCALL shift_right_function(zval *result, zval *op1, ZEND_API zend_result ZEND_FASTCALL concat_function(zval *result, zval *op1, zval *op2) /* {{{ */ { zval *orig_op1 = op1; - zval op1_copy, op2_copy; - - ZVAL_UNDEF(&op1_copy); - ZVAL_UNDEF(&op2_copy); + zend_string *op1_string, *op2_string; + bool free_op1_string = false; + bool free_op2_string = false; do { - if (UNEXPECTED(Z_TYPE_P(op1) != IS_STRING)) { + if (EXPECTED(Z_TYPE_P(op1) == IS_STRING)) { + op1_string = Z_STR_P(op1); + } else { if (Z_ISREF_P(op1)) { op1 = Z_REFVAL_P(op1); - if (Z_TYPE_P(op1) == IS_STRING) break; + if (Z_TYPE_P(op1) == IS_STRING) { + op1_string = Z_STR_P(op1); + break; + } } ZEND_TRY_BINARY_OBJECT_OPERATION(ZEND_CONCAT); - ZVAL_STR(&op1_copy, zval_get_string_func(op1)); + op1_string = zval_get_string_func(op1); if (UNEXPECTED(EG(exception))) { - zval_ptr_dtor_str(&op1_copy); + zend_string_release(op1_string); if (orig_op1 != result) { ZVAL_UNDEF(result); } return FAILURE; } + free_op1_string = true; if (result == op1) { if (UNEXPECTED(op1 == op2)) { - op2 = &op1_copy; + op2_string = op1_string; + goto has_op2_string; } } - op1 = &op1_copy; } } while (0); do { - if (UNEXPECTED(Z_TYPE_P(op2) != IS_STRING)) { - if (Z_ISREF_P(op2)) { - op2 = Z_REFVAL_P(op2); - if (Z_TYPE_P(op2) == IS_STRING) break; - } + if (EXPECTED(Z_TYPE_P(op2) == IS_STRING)) { + op2_string = Z_STR_P(op2); + } else { + if (Z_ISREF_P(op2)) { + op2 = Z_REFVAL_P(op2); + if (Z_TYPE_P(op2) == IS_STRING) { + op2_string = Z_STR_P(op2); + break; + } + } + /* hold an additional reference because a userland function could free this */ + if (!free_op1_string) { + op1_string = zend_string_copy(op1_string); + free_op1_string = true; + } ZEND_TRY_BINARY_OP2_OBJECT_OPERATION(ZEND_CONCAT); - ZVAL_STR(&op2_copy, zval_get_string_func(op2)); + op2_string = zval_get_string_func(op2); if (UNEXPECTED(EG(exception))) { - zval_ptr_dtor_str(&op1_copy); - zval_ptr_dtor_str(&op2_copy); + zend_string_release(op1_string); + zend_string_release(op2_string); if (orig_op1 != result) { ZVAL_UNDEF(result); } return FAILURE; } - op2 = &op2_copy; + free_op2_string = true; } } while (0); - if (UNEXPECTED(Z_STRLEN_P(op1) == 0)) { - if (EXPECTED(result != op2)) { +has_op2_string:; + if (UNEXPECTED(ZSTR_LEN(op1_string) == 0)) { + if (EXPECTED(result != op2 || Z_TYPE_P(result) != IS_STRING)) { if (result == orig_op1) { i_zval_ptr_dtor(result); } - ZVAL_COPY(result, op2); + if (free_op2_string) { + /* transfer ownership of op2_string */ + ZVAL_STR(result, op2_string); + free_op2_string = false; + } else { + ZVAL_STR_COPY(result, op2_string); + } } - } else if (UNEXPECTED(Z_STRLEN_P(op2) == 0)) { - if (EXPECTED(result != op1)) { + } else if (UNEXPECTED(ZSTR_LEN(op2_string) == 0)) { + if (EXPECTED(result != op1 || Z_TYPE_P(result) != IS_STRING)) { if (result == orig_op1) { i_zval_ptr_dtor(result); } - ZVAL_COPY(result, op1); + if (free_op1_string) { + /* transfer ownership of op1_string */ + ZVAL_STR(result, op1_string); + free_op1_string = false; + } else { + ZVAL_STR_COPY(result, op1_string); + } } } else { - size_t op1_len = Z_STRLEN_P(op1); - size_t op2_len = Z_STRLEN_P(op2); + size_t op1_len = ZSTR_LEN(op1_string); + size_t op2_len = ZSTR_LEN(op2_string); size_t result_len = op1_len + op2_len; zend_string *result_str; - uint32_t flags = ZSTR_GET_COPYABLE_CONCAT_PROPERTIES_BOTH(Z_STR_P(op1), Z_STR_P(op2)); + uint32_t flags = ZSTR_GET_COPYABLE_CONCAT_PROPERTIES_BOTH(op1_string, op2_string); if (UNEXPECTED(op1_len > ZSTR_MAX_LEN - op2_len)) { + if (free_op1_string) zend_string_release(op1_string); + if (free_op2_string) zend_string_release(op2_string); zend_throw_error(NULL, "String size overflow"); - zval_ptr_dtor_str(&op1_copy); - zval_ptr_dtor_str(&op2_copy); if (orig_op1 != result) { ZVAL_UNDEF(result); } return FAILURE; } - if (result == op1 && Z_REFCOUNTED_P(result)) { + if (result == op1) { /* special case, perform operations on result */ - result_str = zend_string_extend(Z_STR_P(result), result_len, 0); + result_str = zend_string_extend(op1_string, result_len, 0); + /* Free result after zend_string_extend(), as it may throw an out-of-memory error. If we + * free it before we would leave the released variable on the stack with shutdown trying + * to free it again. */ + if (free_op1_string) { + /* op1_string will be used as the result, so we should not free it */ + i_zval_ptr_dtor(result); + free_op1_string = false; + } + /* account for the case where result_str == op1_string == op2_string and the realloc is done */ + if (op1_string == op2_string) { + if (free_op2_string) { + zend_string_release(op2_string); + free_op2_string = false; + } + op2_string = result_str; + } } else { result_str = zend_string_alloc(result_len, 0); - memcpy(ZSTR_VAL(result_str), Z_STRVAL_P(op1), op1_len); + memcpy(ZSTR_VAL(result_str), ZSTR_VAL(op1_string), op1_len); if (result == orig_op1) { i_zval_ptr_dtor(result); } } GC_ADD_FLAGS(result_str, flags); - /* This has to happen first to account for the cases where result == op1 == op2 and - * the realloc is done. In this case this line will also update Z_STRVAL_P(op2) to - * point to the new string. The first op2_len bytes of result will still be the same. */ ZVAL_NEW_STR(result, result_str); - - memcpy(ZSTR_VAL(result_str) + op1_len, Z_STRVAL_P(op2), op2_len); + memcpy(ZSTR_VAL(result_str) + op1_len, ZSTR_VAL(op2_string), op2_len); ZSTR_VAL(result_str)[result_len] = '\0'; } - zval_ptr_dtor_str(&op1_copy); - zval_ptr_dtor_str(&op2_copy); + if (free_op1_string) zend_string_release(op1_string); + if (free_op2_string) zend_string_release(op2_string); + return SUCCESS; } /* }}} */ diff --git a/Zend/zend_types.h b/Zend/zend_types.h index c341ffa0b4d8c..af5f3821723fa 100644 --- a/Zend/zend_types.h +++ b/Zend/zend_types.h @@ -626,6 +626,9 @@ static zend_always_inline uint8_t zval_get_type(const zval* pz) { #define Z_TYPE_FLAGS(zval) (zval).u1.v.type_flags #define Z_TYPE_FLAGS_P(zval_p) Z_TYPE_FLAGS(*(zval_p)) +#define Z_TYPE_EXTRA(zval) (zval).u1.v.u.extra +#define Z_TYPE_EXTRA_P(zval_p) Z_TYPE_EXTRA(*(zval_p)) + #define Z_TYPE_INFO(zval) (zval).u1.type_info #define Z_TYPE_INFO_P(zval_p) Z_TYPE_INFO(*(zval_p)) @@ -752,6 +755,11 @@ static zend_always_inline uint32_t zval_gc_info(uint32_t gc_type_info) { /* zval.u1.v.type_flags */ #define IS_TYPE_REFCOUNTED (1<<0) #define IS_TYPE_COLLECTABLE (1<<1) +/* Used for static variables to check if they have been initialized. We can't use IS_UNDEF because + * we can't store IS_UNDEF zvals in the static_variables HashTable. This needs to live in type_info + * so that the ZEND_ASSIGN overrides it but is moved to extra to avoid breaking the Z_REFCOUNTED() + * optimization that only checks for Z_TYPE_FLAGS() without `& (IS_TYPE_COLLECTABLE|IS_TYPE_REFCOUNTED)`. */ +#define IS_STATIC_VAR_UNINITIALIZED (1<<0) #if 1 /* This optimized version assumes that we have a single "type_flag" */ diff --git a/Zend/zend_vm_def.h b/Zend/zend_vm_def.h index 0b6604217fa35..b7d98e55a001a 100644 --- a/Zend/zend_vm_def.h +++ b/Zend/zend_vm_def.h @@ -4584,6 +4584,8 @@ ZEND_VM_HANDLER(161, ZEND_GENERATOR_RETURN, CONST|TMP|VAR|CV, ANY, SPEC(OBSERVER ZEND_OBSERVER_FCALL_END(generator->execute_data, &generator->retval); + EG(current_execute_data) = EX(prev_execute_data); + /* Close the generator to free up resources */ zend_generator_close(generator, 1); @@ -6098,7 +6100,7 @@ ZEND_VM_C_LABEL(num_index): str = ZSTR_EMPTY_ALLOC(); ZEND_VM_C_GOTO(str_index); } else { - zend_illegal_array_offset(offset); + zend_illegal_array_offset_access(offset); zval_ptr_dtor_nogc(expr_ptr); } FREE_OP2(); @@ -6610,7 +6612,7 @@ ZEND_VM_C_LABEL(num_index_dim): key = ZSTR_EMPTY_ALLOC(); ZEND_VM_C_GOTO(str_index_dim); } else { - zend_illegal_unset_offset(offset); + zend_illegal_array_offset_unset(offset); } break; } else if (Z_ISREF_P(container)) { @@ -7986,6 +7988,7 @@ ZEND_VM_HELPER(zend_dispatch_try_catch_finally_helper, ANY, ANY, uint32_t try_ca cleanup_live_vars(execute_data, op_num, 0); if (UNEXPECTED((EX_CALL_INFO() & ZEND_CALL_GENERATOR) != 0)) { zend_generator *generator = zend_get_running_generator(EXECUTE_DATA_C); + EG(current_execute_data) = EX(prev_execute_data); zend_generator_close(generator, 1); ZEND_VM_RETURN(); } else { @@ -8085,6 +8088,7 @@ ZEND_VM_HANDLER(150, ZEND_USER_OPCODE, ANY, ANY) case ZEND_USER_OPCODE_RETURN: if (UNEXPECTED((EX_CALL_INFO() & ZEND_CALL_GENERATOR) != 0)) { zend_generator *generator = zend_get_running_generator(EXECUTE_DATA_C); + EG(current_execute_data) = EX(prev_execute_data); zend_generator_close(generator, 1); ZEND_VM_RETURN(); } else { @@ -8922,7 +8926,7 @@ ZEND_VM_HANDLER(182, ZEND_BIND_LEXICAL, TMP, CV, REF) ZEND_VM_NEXT_OPCODE(); } -ZEND_VM_HANDLER(183, ZEND_BIND_STATIC, CV, UNUSED, REF) +ZEND_VM_HANDLER(183, ZEND_BIND_STATIC, CV, ANY, REF) { USE_OPLINE HashTable *ht; @@ -8942,18 +8946,18 @@ ZEND_VM_HANDLER(183, ZEND_BIND_STATIC, CV, UNUSED, REF) SAVE_OPLINE(); if (opline->extended_value & ZEND_BIND_REF) { - if (Z_TYPE_P(value) == IS_CONSTANT_AST) { - if (UNEXPECTED(zval_update_constant_ex(value, EX(func)->op_array.scope) != SUCCESS)) { - HANDLE_EXCEPTION(); - } - } - i_zval_ptr_dtor(variable_ptr); if (UNEXPECTED(!Z_ISREF_P(value))) { zend_reference *ref = (zend_reference*)emalloc(sizeof(zend_reference)); GC_SET_REFCOUNT(ref, 2); GC_TYPE_INFO(ref) = GC_REFERENCE; - ZVAL_COPY_VALUE(&ref->val, value); + if (OP2_TYPE == IS_UNUSED) { + ZVAL_COPY_VALUE(&ref->val, value); + } else { + ZEND_ASSERT(!Z_REFCOUNTED_P(value)); + ZVAL_COPY(&ref->val, GET_OP2_ZVAL_PTR_DEREF(BP_VAR_R)); + FREE_OP2(); + } ref->sources.ptr = NULL; Z_REF_P(value) = ref; Z_TYPE_INFO_P(value) = IS_REFERENCE_EX; @@ -8961,6 +8965,9 @@ ZEND_VM_HANDLER(183, ZEND_BIND_STATIC, CV, UNUSED, REF) } else { Z_ADDREF_P(value); ZVAL_REF(variable_ptr, Z_REF_P(value)); + if (OP2_TYPE != IS_UNUSED) { + FREE_OP2(); + } } } else { i_zval_ptr_dtor(variable_ptr); @@ -8970,6 +8977,34 @@ ZEND_VM_HANDLER(183, ZEND_BIND_STATIC, CV, UNUSED, REF) ZEND_VM_NEXT_OPCODE_CHECK_EXCEPTION(); } +ZEND_VM_HANDLER(203, ZEND_BIND_INIT_STATIC_OR_JMP, CV, JMP_ADDR) +{ + USE_OPLINE + HashTable *ht; + zval *value; + zval *variable_ptr; + + variable_ptr = GET_OP1_ZVAL_PTR_PTR_UNDEF(BP_VAR_W); + + ht = ZEND_MAP_PTR_GET(EX(func)->op_array.static_variables_ptr); + if (!ht) { + ZEND_VM_NEXT_OPCODE(); + } + ZEND_ASSERT(GC_REFCOUNT(ht) == 1); + + value = (zval*)((char*)ht->arData + opline->extended_value); + if (Z_TYPE_EXTRA_P(value) & IS_STATIC_VAR_UNINITIALIZED) { + ZEND_VM_NEXT_OPCODE(); + } else { + SAVE_OPLINE(); + zval_ptr_dtor(variable_ptr); + ZEND_ASSERT(Z_TYPE_P(value) == IS_REFERENCE); + Z_ADDREF_P(value); + ZVAL_REF(variable_ptr, Z_REF_P(value)); + ZEND_VM_JMP_EX(OP_JMP_ADDR(opline, opline->op2), 1); + } +} + ZEND_VM_HOT_HANDLER(184, ZEND_FETCH_THIS, UNUSED, UNUSED) { USE_OPLINE diff --git a/Zend/zend_vm_execute.h b/Zend/zend_vm_execute.h index 21b927c02b895..56fdc55ea7404 100644 --- a/Zend/zend_vm_execute.h +++ b/Zend/zend_vm_execute.h @@ -3192,6 +3192,7 @@ static zend_never_inline ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL zend_dispatch_try cleanup_live_vars(execute_data, op_num, 0); if (UNEXPECTED((EX_CALL_INFO() & ZEND_CALL_GENERATOR) != 0)) { zend_generator *generator = zend_get_running_generator(EXECUTE_DATA_C); + EG(current_execute_data) = EX(prev_execute_data); zend_generator_close(generator, 1); ZEND_VM_RETURN(); } else { @@ -3291,6 +3292,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_USER_OPCODE_SPEC_HANDLER(ZEND_ case ZEND_USER_OPCODE_RETURN: if (UNEXPECTED((EX_CALL_INFO() & ZEND_CALL_GENERATOR) != 0)) { zend_generator *generator = zend_get_running_generator(EXECUTE_DATA_C); + EG(current_execute_data) = EX(prev_execute_data); zend_generator_close(generator, 1); ZEND_VM_RETURN(); } else { @@ -4621,6 +4623,8 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_GENERATOR_RETURN_SPEC_CONST_HA } } + EG(current_execute_data) = EX(prev_execute_data); + /* Close the generator to free up resources */ zend_generator_close(generator, 1); @@ -4666,6 +4670,8 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_GENERATOR_RETURN_SPEC_OBSERVER zend_observer_fcall_end(generator->execute_data, &generator->retval); + EG(current_execute_data) = EX(prev_execute_data); + /* Close the generator to free up resources */ zend_generator_close(generator, 1); @@ -7371,7 +7377,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_ADD_ARRAY_ELEMENT_SPEC_CONST_C str = ZSTR_EMPTY_ALLOC(); goto str_index; } else { - zend_illegal_array_offset(offset); + zend_illegal_array_offset_access(offset); zval_ptr_dtor_nogc(expr_ptr); } @@ -9690,7 +9696,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_ADD_ARRAY_ELEMENT_SPEC_CONST_T str = ZSTR_EMPTY_ALLOC(); goto str_index; } else { - zend_illegal_array_offset(offset); + zend_illegal_array_offset_access(offset); zval_ptr_dtor_nogc(expr_ptr); } zval_ptr_dtor_nogc(EX_VAR(opline->op2.var)); @@ -10613,7 +10619,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_ADD_ARRAY_ELEMENT_SPEC_CONST_U str = ZSTR_EMPTY_ALLOC(); goto str_index; } else { - zend_illegal_array_offset(offset); + zend_illegal_array_offset_access(offset); zval_ptr_dtor_nogc(expr_ptr); } @@ -12063,7 +12069,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_ADD_ARRAY_ELEMENT_SPEC_CONST_C str = ZSTR_EMPTY_ALLOC(); goto str_index; } else { - zend_illegal_array_offset(offset); + zend_illegal_array_offset_access(offset); zval_ptr_dtor_nogc(expr_ptr); } @@ -19308,6 +19314,8 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_GENERATOR_RETURN_SPEC_TMP_HAND } } + EG(current_execute_data) = EX(prev_execute_data); + /* Close the generator to free up resources */ zend_generator_close(generator, 1); @@ -20063,7 +20071,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_ADD_ARRAY_ELEMENT_SPEC_TMP_CON str = ZSTR_EMPTY_ALLOC(); goto str_index; } else { - zend_illegal_array_offset(offset); + zend_illegal_array_offset_access(offset); zval_ptr_dtor_nogc(expr_ptr); } @@ -20507,7 +20515,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_ADD_ARRAY_ELEMENT_SPEC_TMP_TMP str = ZSTR_EMPTY_ALLOC(); goto str_index; } else { - zend_illegal_array_offset(offset); + zend_illegal_array_offset_access(offset); zval_ptr_dtor_nogc(expr_ptr); } zval_ptr_dtor_nogc(EX_VAR(opline->op2.var)); @@ -20968,7 +20976,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_ADD_ARRAY_ELEMENT_SPEC_TMP_UNU str = ZSTR_EMPTY_ALLOC(); goto str_index; } else { - zend_illegal_array_offset(offset); + zend_illegal_array_offset_access(offset); zval_ptr_dtor_nogc(expr_ptr); } @@ -21372,7 +21380,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_ADD_ARRAY_ELEMENT_SPEC_TMP_CV_ str = ZSTR_EMPTY_ALLOC(); goto str_index; } else { - zend_illegal_array_offset(offset); + zend_illegal_array_offset_access(offset); zval_ptr_dtor_nogc(expr_ptr); } @@ -21974,6 +21982,8 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_GENERATOR_RETURN_SPEC_VAR_HAND } } + EG(current_execute_data) = EX(prev_execute_data); + /* Close the generator to free up resources */ zend_generator_close(generator, 1); @@ -25188,7 +25198,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_ADD_ARRAY_ELEMENT_SPEC_VAR_CON str = ZSTR_EMPTY_ALLOC(); goto str_index; } else { - zend_illegal_array_offset(offset); + zend_illegal_array_offset_access(offset); zval_ptr_dtor_nogc(expr_ptr); } @@ -25280,7 +25290,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_UNSET_DIM_SPEC_VAR_CONST_HANDL key = ZSTR_EMPTY_ALLOC(); goto str_index_dim; } else { - zend_illegal_unset_offset(offset); + zend_illegal_array_offset_unset(offset); } break; } else if (Z_ISREF_P(container)) { @@ -27627,7 +27637,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_ADD_ARRAY_ELEMENT_SPEC_VAR_TMP str = ZSTR_EMPTY_ALLOC(); goto str_index; } else { - zend_illegal_array_offset(offset); + zend_illegal_array_offset_access(offset); zval_ptr_dtor_nogc(expr_ptr); } zval_ptr_dtor_nogc(EX_VAR(opline->op2.var)); @@ -27719,7 +27729,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_UNSET_DIM_SPEC_VAR_TMPVAR_HAND key = ZSTR_EMPTY_ALLOC(); goto str_index_dim; } else { - zend_illegal_unset_offset(offset); + zend_illegal_array_offset_unset(offset); } break; } else if (Z_ISREF_P(container)) { @@ -29705,7 +29715,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_ADD_ARRAY_ELEMENT_SPEC_VAR_UNU str = ZSTR_EMPTY_ALLOC(); goto str_index; } else { - zend_illegal_array_offset(offset); + zend_illegal_array_offset_access(offset); zval_ptr_dtor_nogc(expr_ptr); } @@ -32015,7 +32025,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_ADD_ARRAY_ELEMENT_SPEC_VAR_CV_ str = ZSTR_EMPTY_ALLOC(); goto str_index; } else { - zend_illegal_array_offset(offset); + zend_illegal_array_offset_access(offset); zval_ptr_dtor_nogc(expr_ptr); } @@ -32107,7 +32117,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_UNSET_DIM_SPEC_VAR_CV_HANDLER( key = ZSTR_EMPTY_ALLOC(); goto str_index_dim; } else { - zend_illegal_unset_offset(offset); + zend_illegal_array_offset_unset(offset); } break; } else if (Z_ISREF_P(container)) { @@ -39449,6 +39459,8 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_GENERATOR_RETURN_SPEC_CV_HANDL } } + EG(current_execute_data) = EX(prev_execute_data); + /* Close the generator to free up resources */ zend_generator_close(generator, 1); @@ -40366,6 +40378,85 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_FETCH_CLASS_NAME_SPEC_CV_HANDL ZEND_VM_NEXT_OPCODE(); } +static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_BIND_STATIC_SPEC_CV_HANDLER(ZEND_OPCODE_HANDLER_ARGS) +{ + USE_OPLINE + HashTable *ht; + zval *value; + zval *variable_ptr; + + variable_ptr = EX_VAR(opline->op1.var); + + ht = ZEND_MAP_PTR_GET(EX(func)->op_array.static_variables_ptr); + if (!ht) { + ht = zend_array_dup(EX(func)->op_array.static_variables); + ZEND_MAP_PTR_SET(EX(func)->op_array.static_variables_ptr, ht); + } + ZEND_ASSERT(GC_REFCOUNT(ht) == 1); + + value = (zval*)((char*)ht->arData + (opline->extended_value & ~(ZEND_BIND_REF|ZEND_BIND_IMPLICIT|ZEND_BIND_EXPLICIT))); + + SAVE_OPLINE(); + if (opline->extended_value & ZEND_BIND_REF) { + i_zval_ptr_dtor(variable_ptr); + if (UNEXPECTED(!Z_ISREF_P(value))) { + zend_reference *ref = (zend_reference*)emalloc(sizeof(zend_reference)); + GC_SET_REFCOUNT(ref, 2); + GC_TYPE_INFO(ref) = GC_REFERENCE; + if (opline->op2_type == IS_UNUSED) { + ZVAL_COPY_VALUE(&ref->val, value); + } else { + ZEND_ASSERT(!Z_REFCOUNTED_P(value)); + ZVAL_COPY(&ref->val, get_zval_ptr_deref(opline->op2_type, opline->op2, BP_VAR_R)); + FREE_OP(opline->op2_type, opline->op2.var); + } + ref->sources.ptr = NULL; + Z_REF_P(value) = ref; + Z_TYPE_INFO_P(value) = IS_REFERENCE_EX; + ZVAL_REF(variable_ptr, ref); + } else { + Z_ADDREF_P(value); + ZVAL_REF(variable_ptr, Z_REF_P(value)); + if (opline->op2_type != IS_UNUSED) { + FREE_OP(opline->op2_type, opline->op2.var); + } + } + } else { + i_zval_ptr_dtor(variable_ptr); + ZVAL_COPY(variable_ptr, value); + } + + ZEND_VM_NEXT_OPCODE_CHECK_EXCEPTION(); +} + +static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_BIND_INIT_STATIC_OR_JMP_SPEC_CV_HANDLER(ZEND_OPCODE_HANDLER_ARGS) +{ + USE_OPLINE + HashTable *ht; + zval *value; + zval *variable_ptr; + + variable_ptr = EX_VAR(opline->op1.var); + + ht = ZEND_MAP_PTR_GET(EX(func)->op_array.static_variables_ptr); + if (!ht) { + ZEND_VM_NEXT_OPCODE(); + } + ZEND_ASSERT(GC_REFCOUNT(ht) == 1); + + value = (zval*)((char*)ht->arData + opline->extended_value); + if (Z_TYPE_EXTRA_P(value) & IS_STATIC_VAR_UNINITIALIZED) { + ZEND_VM_NEXT_OPCODE(); + } else { + SAVE_OPLINE(); + zval_ptr_dtor(variable_ptr); + ZEND_ASSERT(Z_TYPE_P(value) == IS_REFERENCE); + Z_ADDREF_P(value); + ZVAL_REF(variable_ptr, Z_REF_P(value)); + ZEND_VM_JMP_EX(OP_JMP_ADDR(opline, opline->op2), 1); + } +} + static ZEND_VM_HOT ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_PRE_INC_LONG_NO_OVERFLOW_SPEC_CV_RETVAL_UNUSED_HANDLER(ZEND_OPCODE_HANDLER_ARGS) { USE_OPLINE @@ -43611,7 +43702,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_ADD_ARRAY_ELEMENT_SPEC_CV_CONS str = ZSTR_EMPTY_ALLOC(); goto str_index; } else { - zend_illegal_array_offset(offset); + zend_illegal_array_offset_access(offset); zval_ptr_dtor_nogc(expr_ptr); } @@ -43703,7 +43794,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_UNSET_DIM_SPEC_CV_CONST_HANDLE key = ZSTR_EMPTY_ALLOC(); goto str_index_dim; } else { - zend_illegal_unset_offset(offset); + zend_illegal_array_offset_unset(offset); } break; } else if (Z_ISREF_P(container)) { @@ -47251,7 +47342,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_ADD_ARRAY_ELEMENT_SPEC_CV_TMPV str = ZSTR_EMPTY_ALLOC(); goto str_index; } else { - zend_illegal_array_offset(offset); + zend_illegal_array_offset_access(offset); zval_ptr_dtor_nogc(expr_ptr); } zval_ptr_dtor_nogc(EX_VAR(opline->op2.var)); @@ -47343,7 +47434,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_UNSET_DIM_SPEC_CV_TMPVAR_HANDL key = ZSTR_EMPTY_ALLOC(); goto str_index_dim; } else { - zend_illegal_unset_offset(offset); + zend_illegal_array_offset_unset(offset); } break; } else if (Z_ISREF_P(container)) { @@ -49213,7 +49304,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_ADD_ARRAY_ELEMENT_SPEC_CV_UNUS str = ZSTR_EMPTY_ALLOC(); goto str_index; } else { - zend_illegal_array_offset(offset); + zend_illegal_array_offset_access(offset); zval_ptr_dtor_nogc(expr_ptr); } @@ -49560,54 +49651,6 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_YIELD_SPEC_CV_UNUSED_HANDLER(Z ZEND_VM_RETURN(); } -static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_BIND_STATIC_SPEC_CV_UNUSED_HANDLER(ZEND_OPCODE_HANDLER_ARGS) -{ - USE_OPLINE - HashTable *ht; - zval *value; - zval *variable_ptr; - - variable_ptr = EX_VAR(opline->op1.var); - - ht = ZEND_MAP_PTR_GET(EX(func)->op_array.static_variables_ptr); - if (!ht) { - ht = zend_array_dup(EX(func)->op_array.static_variables); - ZEND_MAP_PTR_SET(EX(func)->op_array.static_variables_ptr, ht); - } - ZEND_ASSERT(GC_REFCOUNT(ht) == 1); - - value = (zval*)((char*)ht->arData + (opline->extended_value & ~(ZEND_BIND_REF|ZEND_BIND_IMPLICIT|ZEND_BIND_EXPLICIT))); - - SAVE_OPLINE(); - if (opline->extended_value & ZEND_BIND_REF) { - if (Z_TYPE_P(value) == IS_CONSTANT_AST) { - if (UNEXPECTED(zval_update_constant_ex(value, EX(func)->op_array.scope) != SUCCESS)) { - HANDLE_EXCEPTION(); - } - } - - i_zval_ptr_dtor(variable_ptr); - if (UNEXPECTED(!Z_ISREF_P(value))) { - zend_reference *ref = (zend_reference*)emalloc(sizeof(zend_reference)); - GC_SET_REFCOUNT(ref, 2); - GC_TYPE_INFO(ref) = GC_REFERENCE; - ZVAL_COPY_VALUE(&ref->val, value); - ref->sources.ptr = NULL; - Z_REF_P(value) = ref; - Z_TYPE_INFO_P(value) = IS_REFERENCE_EX; - ZVAL_REF(variable_ptr, ref); - } else { - Z_ADDREF_P(value); - ZVAL_REF(variable_ptr, Z_REF_P(value)); - } - } else { - i_zval_ptr_dtor(variable_ptr); - ZVAL_COPY(variable_ptr, value); - } - - ZEND_VM_NEXT_OPCODE_CHECK_EXCEPTION(); -} - static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_CHECK_VAR_SPEC_CV_UNUSED_HANDLER(ZEND_OPCODE_HANDLER_ARGS) { USE_OPLINE @@ -52760,7 +52803,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_ADD_ARRAY_ELEMENT_SPEC_CV_CV_H str = ZSTR_EMPTY_ALLOC(); goto str_index; } else { - zend_illegal_array_offset(offset); + zend_illegal_array_offset_access(offset); zval_ptr_dtor_nogc(expr_ptr); } @@ -52852,7 +52895,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_UNSET_DIM_SPEC_CV_CV_HANDLER(Z key = ZSTR_EMPTY_ALLOC(); goto str_index_dim; } else { - zend_illegal_unset_offset(offset); + zend_illegal_array_offset_unset(offset); } break; } else if (Z_ISREF_P(container)) { @@ -55758,7 +55801,7 @@ ZEND_API void execute_ex(zend_execute_data *ex) (void*)&&ZEND_NULL_LABEL, (void*)&&ZEND_NULL_LABEL, (void*)&&ZEND_BIND_LEXICAL_SPEC_TMP_CV_LABEL, - (void*)&&ZEND_BIND_STATIC_SPEC_CV_UNUSED_LABEL, + (void*)&&ZEND_BIND_STATIC_SPEC_CV_LABEL, (void*)&&ZEND_FETCH_THIS_SPEC_UNUSED_UNUSED_LABEL, (void*)&&ZEND_SEND_FUNC_ARG_SPEC_VAR_CONST_LABEL, (void*)&&ZEND_NULL_LABEL, @@ -55866,6 +55909,7 @@ ZEND_API void execute_ex(zend_execute_data *ex) (void*)&&ZEND_FETCH_GLOBALS_SPEC_UNUSED_UNUSED_LABEL, (void*)&&ZEND_VERIFY_NEVER_TYPE_SPEC_UNUSED_UNUSED_LABEL, (void*)&&ZEND_CALLABLE_CONVERT_SPEC_UNUSED_UNUSED_LABEL, + (void*)&&ZEND_BIND_INIT_STATIC_OR_JMP_SPEC_CV_LABEL, (void*)&&ZEND_RECV_NOTYPE_SPEC_LABEL, (void*)&&ZEND_JMP_FORWARD_SPEC_LABEL, (void*)&&ZEND_NULL_LABEL, @@ -60435,6 +60479,14 @@ ZEND_API void execute_ex(zend_execute_data *ex) VM_TRACE(ZEND_FETCH_CLASS_NAME_SPEC_CV) ZEND_FETCH_CLASS_NAME_SPEC_CV_HANDLER(ZEND_OPCODE_HANDLER_ARGS_PASSTHRU); HYBRID_BREAK(); + HYBRID_CASE(ZEND_BIND_STATIC_SPEC_CV): + VM_TRACE(ZEND_BIND_STATIC_SPEC_CV) + ZEND_BIND_STATIC_SPEC_CV_HANDLER(ZEND_OPCODE_HANDLER_ARGS_PASSTHRU); + HYBRID_BREAK(); + HYBRID_CASE(ZEND_BIND_INIT_STATIC_OR_JMP_SPEC_CV): + VM_TRACE(ZEND_BIND_INIT_STATIC_OR_JMP_SPEC_CV) + ZEND_BIND_INIT_STATIC_OR_JMP_SPEC_CV_HANDLER(ZEND_OPCODE_HANDLER_ARGS_PASSTHRU); + HYBRID_BREAK(); HYBRID_CASE(ZEND_PRE_INC_LONG_NO_OVERFLOW_SPEC_CV_RETVAL_UNUSED): VM_TRACE(ZEND_PRE_INC_LONG_NO_OVERFLOW_SPEC_CV_RETVAL_UNUSED) ZEND_PRE_INC_LONG_NO_OVERFLOW_SPEC_CV_RETVAL_UNUSED_HANDLER(ZEND_OPCODE_HANDLER_ARGS_PASSTHRU); @@ -61079,10 +61131,6 @@ ZEND_API void execute_ex(zend_execute_data *ex) VM_TRACE(ZEND_YIELD_SPEC_CV_UNUSED) ZEND_YIELD_SPEC_CV_UNUSED_HANDLER(ZEND_OPCODE_HANDLER_ARGS_PASSTHRU); HYBRID_BREAK(); - HYBRID_CASE(ZEND_BIND_STATIC_SPEC_CV_UNUSED): - VM_TRACE(ZEND_BIND_STATIC_SPEC_CV_UNUSED) - ZEND_BIND_STATIC_SPEC_CV_UNUSED_HANDLER(ZEND_OPCODE_HANDLER_ARGS_PASSTHRU); - HYBRID_BREAK(); HYBRID_CASE(ZEND_CHECK_VAR_SPEC_CV_UNUSED): VM_TRACE(ZEND_CHECK_VAR_SPEC_CV_UNUSED) ZEND_CHECK_VAR_SPEC_CV_UNUSED_HANDLER(ZEND_OPCODE_HANDLER_ARGS_PASSTHRU); @@ -63862,7 +63910,7 @@ void zend_vm_init(void) ZEND_NULL_HANDLER, ZEND_NULL_HANDLER, ZEND_BIND_LEXICAL_SPEC_TMP_CV_HANDLER, - ZEND_BIND_STATIC_SPEC_CV_UNUSED_HANDLER, + ZEND_BIND_STATIC_SPEC_CV_HANDLER, ZEND_FETCH_THIS_SPEC_UNUSED_UNUSED_HANDLER, ZEND_SEND_FUNC_ARG_SPEC_VAR_CONST_HANDLER, ZEND_NULL_HANDLER, @@ -63970,6 +64018,7 @@ void zend_vm_init(void) ZEND_FETCH_GLOBALS_SPEC_UNUSED_UNUSED_HANDLER, ZEND_VERIFY_NEVER_TYPE_SPEC_UNUSED_UNUSED_HANDLER, ZEND_CALLABLE_CONVERT_SPEC_UNUSED_UNUSED_HANDLER, + ZEND_BIND_INIT_STATIC_OR_JMP_SPEC_CV_HANDLER, ZEND_RECV_NOTYPE_SPEC_HANDLER, ZEND_JMP_FORWARD_SPEC_HANDLER, ZEND_NULL_HANDLER, @@ -64921,7 +64970,7 @@ void zend_vm_init(void) 1255, 1256 | SPEC_RULE_OP1, 1261 | SPEC_RULE_OP1, - 3470, + 3471, 1266 | SPEC_RULE_OP1, 1271 | SPEC_RULE_OP1, 1276 | SPEC_RULE_OP2, @@ -65079,59 +65128,59 @@ void zend_vm_init(void) 2564, 2565, 2566, - 3470, - 3470, - 3470, - 3470, - 3470, - 3470, - 3470, - 3470, - 3470, - 3470, - 3470, - 3470, - 3470, - 3470, - 3470, - 3470, - 3470, - 3470, - 3470, - 3470, - 3470, - 3470, - 3470, - 3470, - 3470, - 3470, - 3470, - 3470, - 3470, - 3470, - 3470, - 3470, - 3470, - 3470, - 3470, - 3470, - 3470, - 3470, - 3470, - 3470, - 3470, - 3470, - 3470, - 3470, - 3470, - 3470, - 3470, - 3470, - 3470, - 3470, - 3470, - 3470, - 3470, + 2567, + 3471, + 3471, + 3471, + 3471, + 3471, + 3471, + 3471, + 3471, + 3471, + 3471, + 3471, + 3471, + 3471, + 3471, + 3471, + 3471, + 3471, + 3471, + 3471, + 3471, + 3471, + 3471, + 3471, + 3471, + 3471, + 3471, + 3471, + 3471, + 3471, + 3471, + 3471, + 3471, + 3471, + 3471, + 3471, + 3471, + 3471, + 3471, + 3471, + 3471, + 3471, + 3471, + 3471, + 3471, + 3471, + 3471, + 3471, + 3471, + 3471, + 3471, + 3471, + 3471, }; #if (ZEND_VM_KIND == ZEND_VM_KIND_HYBRID) zend_opcode_handler_funcs = labels; @@ -65304,7 +65353,7 @@ ZEND_API void ZEND_FASTCALL zend_vm_set_opcode_handler_ex(zend_op* op, uint32_t if (op->op1_type == IS_CONST && op->op2_type == IS_CONST) { break; } - spec = 2569 | SPEC_RULE_OP1 | SPEC_RULE_OP2 | SPEC_RULE_COMMUTATIVE; + spec = 2570 | SPEC_RULE_OP1 | SPEC_RULE_OP2 | SPEC_RULE_COMMUTATIVE; if (op->op1_type < op->op2_type) { zend_swap_operands(op); } @@ -65312,7 +65361,7 @@ ZEND_API void ZEND_FASTCALL zend_vm_set_opcode_handler_ex(zend_op* op, uint32_t if (op->op1_type == IS_CONST && op->op2_type == IS_CONST) { break; } - spec = 2594 | SPEC_RULE_OP1 | SPEC_RULE_OP2 | SPEC_RULE_COMMUTATIVE; + spec = 2595 | SPEC_RULE_OP1 | SPEC_RULE_OP2 | SPEC_RULE_COMMUTATIVE; if (op->op1_type < op->op2_type) { zend_swap_operands(op); } @@ -65320,7 +65369,7 @@ ZEND_API void ZEND_FASTCALL zend_vm_set_opcode_handler_ex(zend_op* op, uint32_t if (op->op1_type == IS_CONST && op->op2_type == IS_CONST) { break; } - spec = 2619 | SPEC_RULE_OP1 | SPEC_RULE_OP2 | SPEC_RULE_COMMUTATIVE; + spec = 2620 | SPEC_RULE_OP1 | SPEC_RULE_OP2 | SPEC_RULE_COMMUTATIVE; if (op->op1_type < op->op2_type) { zend_swap_operands(op); } @@ -65331,17 +65380,17 @@ ZEND_API void ZEND_FASTCALL zend_vm_set_opcode_handler_ex(zend_op* op, uint32_t if (op->op1_type == IS_CONST && op->op2_type == IS_CONST) { break; } - spec = 2644 | SPEC_RULE_OP1 | SPEC_RULE_OP2; + spec = 2645 | SPEC_RULE_OP1 | SPEC_RULE_OP2; } else if (op1_info == MAY_BE_LONG && op2_info == MAY_BE_LONG) { if (op->op1_type == IS_CONST && op->op2_type == IS_CONST) { break; } - spec = 2669 | SPEC_RULE_OP1 | SPEC_RULE_OP2; + spec = 2670 | SPEC_RULE_OP1 | SPEC_RULE_OP2; } else if (op1_info == MAY_BE_DOUBLE && op2_info == MAY_BE_DOUBLE) { if (op->op1_type == IS_CONST && op->op2_type == IS_CONST) { break; } - spec = 2694 | SPEC_RULE_OP1 | SPEC_RULE_OP2; + spec = 2695 | SPEC_RULE_OP1 | SPEC_RULE_OP2; } break; case ZEND_MUL: @@ -65352,17 +65401,17 @@ ZEND_API void ZEND_FASTCALL zend_vm_set_opcode_handler_ex(zend_op* op, uint32_t if (op->op1_type == IS_CONST && op->op2_type == IS_CONST) { break; } - spec = 2719 | SPEC_RULE_OP1 | SPEC_RULE_OP2 | SPEC_RULE_COMMUTATIVE; + spec = 2720 | SPEC_RULE_OP1 | SPEC_RULE_OP2 | SPEC_RULE_COMMUTATIVE; } else if (op1_info == MAY_BE_LONG && op2_info == MAY_BE_LONG) { if (op->op1_type == IS_CONST && op->op2_type == IS_CONST) { break; } - spec = 2744 | SPEC_RULE_OP1 | SPEC_RULE_OP2 | SPEC_RULE_COMMUTATIVE; + spec = 2745 | SPEC_RULE_OP1 | SPEC_RULE_OP2 | SPEC_RULE_COMMUTATIVE; } else if (op1_info == MAY_BE_DOUBLE && op2_info == MAY_BE_DOUBLE) { if (op->op1_type == IS_CONST && op->op2_type == IS_CONST) { break; } - spec = 2769 | SPEC_RULE_OP1 | SPEC_RULE_OP2 | SPEC_RULE_COMMUTATIVE; + spec = 2770 | SPEC_RULE_OP1 | SPEC_RULE_OP2 | SPEC_RULE_COMMUTATIVE; } break; case ZEND_IS_IDENTICAL: @@ -65373,14 +65422,14 @@ ZEND_API void ZEND_FASTCALL zend_vm_set_opcode_handler_ex(zend_op* op, uint32_t if (op->op1_type == IS_CONST && op->op2_type == IS_CONST) { break; } - spec = 2794 | SPEC_RULE_OP1 | SPEC_RULE_OP2 | SPEC_RULE_SMART_BRANCH | SPEC_RULE_COMMUTATIVE; + spec = 2795 | SPEC_RULE_OP1 | SPEC_RULE_OP2 | SPEC_RULE_SMART_BRANCH | SPEC_RULE_COMMUTATIVE; } else if (op1_info == MAY_BE_DOUBLE && op2_info == MAY_BE_DOUBLE) { if (op->op1_type == IS_CONST && op->op2_type == IS_CONST) { break; } - spec = 2869 | SPEC_RULE_OP1 | SPEC_RULE_OP2 | SPEC_RULE_SMART_BRANCH | SPEC_RULE_COMMUTATIVE; + spec = 2870 | SPEC_RULE_OP1 | SPEC_RULE_OP2 | SPEC_RULE_SMART_BRANCH | SPEC_RULE_COMMUTATIVE; } else if (op->op1_type == IS_CV && (op->op2_type & (IS_CONST|IS_CV)) && !(op1_info & (MAY_BE_UNDEF|MAY_BE_REF)) && !(op2_info & (MAY_BE_UNDEF|MAY_BE_REF))) { - spec = 3094 | SPEC_RULE_OP2 | SPEC_RULE_COMMUTATIVE; + spec = 3095 | SPEC_RULE_OP2 | SPEC_RULE_COMMUTATIVE; } break; case ZEND_IS_NOT_IDENTICAL: @@ -65391,14 +65440,14 @@ ZEND_API void ZEND_FASTCALL zend_vm_set_opcode_handler_ex(zend_op* op, uint32_t if (op->op1_type == IS_CONST && op->op2_type == IS_CONST) { break; } - spec = 2944 | SPEC_RULE_OP1 | SPEC_RULE_OP2 | SPEC_RULE_SMART_BRANCH | SPEC_RULE_COMMUTATIVE; + spec = 2945 | SPEC_RULE_OP1 | SPEC_RULE_OP2 | SPEC_RULE_SMART_BRANCH | SPEC_RULE_COMMUTATIVE; } else if (op1_info == MAY_BE_DOUBLE && op2_info == MAY_BE_DOUBLE) { if (op->op1_type == IS_CONST && op->op2_type == IS_CONST) { break; } - spec = 3019 | SPEC_RULE_OP1 | SPEC_RULE_OP2 | SPEC_RULE_SMART_BRANCH | SPEC_RULE_COMMUTATIVE; + spec = 3020 | SPEC_RULE_OP1 | SPEC_RULE_OP2 | SPEC_RULE_SMART_BRANCH | SPEC_RULE_COMMUTATIVE; } else if (op->op1_type == IS_CV && (op->op2_type & (IS_CONST|IS_CV)) && !(op1_info & (MAY_BE_UNDEF|MAY_BE_REF)) && !(op2_info & (MAY_BE_UNDEF|MAY_BE_REF))) { - spec = 3099 | SPEC_RULE_OP2 | SPEC_RULE_COMMUTATIVE; + spec = 3100 | SPEC_RULE_OP2 | SPEC_RULE_COMMUTATIVE; } break; case ZEND_IS_EQUAL: @@ -65409,12 +65458,12 @@ ZEND_API void ZEND_FASTCALL zend_vm_set_opcode_handler_ex(zend_op* op, uint32_t if (op->op1_type == IS_CONST && op->op2_type == IS_CONST) { break; } - spec = 2794 | SPEC_RULE_OP1 | SPEC_RULE_OP2 | SPEC_RULE_SMART_BRANCH | SPEC_RULE_COMMUTATIVE; + spec = 2795 | SPEC_RULE_OP1 | SPEC_RULE_OP2 | SPEC_RULE_SMART_BRANCH | SPEC_RULE_COMMUTATIVE; } else if (op1_info == MAY_BE_DOUBLE && op2_info == MAY_BE_DOUBLE) { if (op->op1_type == IS_CONST && op->op2_type == IS_CONST) { break; } - spec = 2869 | SPEC_RULE_OP1 | SPEC_RULE_OP2 | SPEC_RULE_SMART_BRANCH | SPEC_RULE_COMMUTATIVE; + spec = 2870 | SPEC_RULE_OP1 | SPEC_RULE_OP2 | SPEC_RULE_SMART_BRANCH | SPEC_RULE_COMMUTATIVE; } break; case ZEND_IS_NOT_EQUAL: @@ -65425,12 +65474,12 @@ ZEND_API void ZEND_FASTCALL zend_vm_set_opcode_handler_ex(zend_op* op, uint32_t if (op->op1_type == IS_CONST && op->op2_type == IS_CONST) { break; } - spec = 2944 | SPEC_RULE_OP1 | SPEC_RULE_OP2 | SPEC_RULE_SMART_BRANCH | SPEC_RULE_COMMUTATIVE; + spec = 2945 | SPEC_RULE_OP1 | SPEC_RULE_OP2 | SPEC_RULE_SMART_BRANCH | SPEC_RULE_COMMUTATIVE; } else if (op1_info == MAY_BE_DOUBLE && op2_info == MAY_BE_DOUBLE) { if (op->op1_type == IS_CONST && op->op2_type == IS_CONST) { break; } - spec = 3019 | SPEC_RULE_OP1 | SPEC_RULE_OP2 | SPEC_RULE_SMART_BRANCH | SPEC_RULE_COMMUTATIVE; + spec = 3020 | SPEC_RULE_OP1 | SPEC_RULE_OP2 | SPEC_RULE_SMART_BRANCH | SPEC_RULE_COMMUTATIVE; } break; case ZEND_IS_SMALLER: @@ -65438,12 +65487,12 @@ ZEND_API void ZEND_FASTCALL zend_vm_set_opcode_handler_ex(zend_op* op, uint32_t if (op->op1_type == IS_CONST && op->op2_type == IS_CONST) { break; } - spec = 3104 | SPEC_RULE_OP1 | SPEC_RULE_OP2 | SPEC_RULE_SMART_BRANCH; + spec = 3105 | SPEC_RULE_OP1 | SPEC_RULE_OP2 | SPEC_RULE_SMART_BRANCH; } else if (op1_info == MAY_BE_DOUBLE && op2_info == MAY_BE_DOUBLE) { if (op->op1_type == IS_CONST && op->op2_type == IS_CONST) { break; } - spec = 3179 | SPEC_RULE_OP1 | SPEC_RULE_OP2 | SPEC_RULE_SMART_BRANCH; + spec = 3180 | SPEC_RULE_OP1 | SPEC_RULE_OP2 | SPEC_RULE_SMART_BRANCH; } break; case ZEND_IS_SMALLER_OR_EQUAL: @@ -65451,74 +65500,74 @@ ZEND_API void ZEND_FASTCALL zend_vm_set_opcode_handler_ex(zend_op* op, uint32_t if (op->op1_type == IS_CONST && op->op2_type == IS_CONST) { break; } - spec = 3254 | SPEC_RULE_OP1 | SPEC_RULE_OP2 | SPEC_RULE_SMART_BRANCH; + spec = 3255 | SPEC_RULE_OP1 | SPEC_RULE_OP2 | SPEC_RULE_SMART_BRANCH; } else if (op1_info == MAY_BE_DOUBLE && op2_info == MAY_BE_DOUBLE) { if (op->op1_type == IS_CONST && op->op2_type == IS_CONST) { break; } - spec = 3329 | SPEC_RULE_OP1 | SPEC_RULE_OP2 | SPEC_RULE_SMART_BRANCH; + spec = 3330 | SPEC_RULE_OP1 | SPEC_RULE_OP2 | SPEC_RULE_SMART_BRANCH; } break; case ZEND_QM_ASSIGN: if (op1_info == MAY_BE_LONG) { - spec = 3416 | SPEC_RULE_OP1; + spec = 3417 | SPEC_RULE_OP1; } else if (op1_info == MAY_BE_DOUBLE) { - spec = 3421 | SPEC_RULE_OP1; + spec = 3422 | SPEC_RULE_OP1; } else if ((op->op1_type == IS_CONST) ? !Z_REFCOUNTED_P(RT_CONSTANT(op, op->op1)) : (!(op1_info & ((MAY_BE_ANY|MAY_BE_UNDEF)-(MAY_BE_NULL|MAY_BE_FALSE|MAY_BE_TRUE|MAY_BE_LONG|MAY_BE_DOUBLE))))) { - spec = 3426 | SPEC_RULE_OP1; + spec = 3427 | SPEC_RULE_OP1; } break; case ZEND_PRE_INC: if (res_info == MAY_BE_LONG && op1_info == MAY_BE_LONG) { - spec = 3404 | SPEC_RULE_RETVAL; + spec = 3405 | SPEC_RULE_RETVAL; } else if (op1_info == MAY_BE_LONG) { - spec = 3406 | SPEC_RULE_RETVAL; + spec = 3407 | SPEC_RULE_RETVAL; } break; case ZEND_PRE_DEC: if (res_info == MAY_BE_LONG && op1_info == MAY_BE_LONG) { - spec = 3408 | SPEC_RULE_RETVAL; + spec = 3409 | SPEC_RULE_RETVAL; } else if (op1_info == MAY_BE_LONG) { - spec = 3410 | SPEC_RULE_RETVAL; + spec = 3411 | SPEC_RULE_RETVAL; } break; case ZEND_POST_INC: if (res_info == MAY_BE_LONG && op1_info == MAY_BE_LONG) { - spec = 3412; - } else if (op1_info == MAY_BE_LONG) { spec = 3413; + } else if (op1_info == MAY_BE_LONG) { + spec = 3414; } break; case ZEND_POST_DEC: if (res_info == MAY_BE_LONG && op1_info == MAY_BE_LONG) { - spec = 3414; - } else if (op1_info == MAY_BE_LONG) { spec = 3415; + } else if (op1_info == MAY_BE_LONG) { + spec = 3416; } break; case ZEND_JMP: if (OP_JMP_ADDR(op, op->op1) > op) { - spec = 2568; + spec = 2569; } break; case ZEND_RECV: if (op->op2.num == MAY_BE_ANY) { - spec = 2567; + spec = 2568; } break; case ZEND_SEND_VAL: if (op->op1_type == IS_CONST && op->op2_type == IS_UNUSED && !Z_REFCOUNTED_P(RT_CONSTANT(op, op->op1))) { - spec = 3466; + spec = 3467; } break; case ZEND_SEND_VAR_EX: if (op->op2_type == IS_UNUSED && op->op2.num <= MAX_ARG_FLAG_NUM && (op1_info & (MAY_BE_UNDEF|MAY_BE_REF)) == 0) { - spec = 3461 | SPEC_RULE_OP1; + spec = 3462 | SPEC_RULE_OP1; } break; case ZEND_FE_FETCH_R: if (op->op2_type == IS_CV && (op1_info & (MAY_BE_ANY|MAY_BE_REF)) == MAY_BE_ARRAY) { - spec = 3468 | SPEC_RULE_RETVAL; + spec = 3469 | SPEC_RULE_RETVAL; } break; case ZEND_FETCH_DIM_R: @@ -65526,17 +65575,17 @@ ZEND_API void ZEND_FASTCALL zend_vm_set_opcode_handler_ex(zend_op* op, uint32_t if (op->op1_type == IS_CONST && op->op2_type == IS_CONST) { break; } - spec = 3431 | SPEC_RULE_OP1 | SPEC_RULE_OP2; + spec = 3432 | SPEC_RULE_OP1 | SPEC_RULE_OP2; } break; case ZEND_SEND_VAL_EX: if (op->op2_type == IS_UNUSED && op->op2.num <= MAX_ARG_FLAG_NUM && op->op1_type == IS_CONST && !Z_REFCOUNTED_P(RT_CONSTANT(op, op->op1))) { - spec = 3467; + spec = 3468; } break; case ZEND_SEND_VAR: if (op->op2_type == IS_UNUSED && (op1_info & (MAY_BE_UNDEF|MAY_BE_REF)) == 0) { - spec = 3456 | SPEC_RULE_OP1; + spec = 3457 | SPEC_RULE_OP1; } break; case ZEND_BW_OR: diff --git a/Zend/zend_vm_handlers.h b/Zend/zend_vm_handlers.h index fae2138ef912e..97dfeac30cae8 100644 --- a/Zend/zend_vm_handlers.h +++ b/Zend/zend_vm_handlers.h @@ -1291,7 +1291,7 @@ _(2450, ZEND_FETCH_CLASS_CONSTANT_SPEC_UNUSED_TMPVARCV) \ _(2452, ZEND_FETCH_CLASS_CONSTANT_SPEC_UNUSED_TMPVARCV) \ _(2458, ZEND_BIND_LEXICAL_SPEC_TMP_CV) \ - _(2459, ZEND_BIND_STATIC_SPEC_CV_UNUSED) \ + _(2459, ZEND_BIND_STATIC_SPEC_CV) \ _(2460, ZEND_FETCH_THIS_SPEC_UNUSED_UNUSED) \ _(2461, ZEND_SEND_FUNC_ARG_SPEC_VAR_CONST) \ _(2464, ZEND_SEND_FUNC_ARG_SPEC_VAR_UNUSED) \ @@ -1362,498 +1362,499 @@ _(2564, ZEND_FETCH_GLOBALS_SPEC_UNUSED_UNUSED) \ _(2565, ZEND_VERIFY_NEVER_TYPE_SPEC_UNUSED_UNUSED) \ _(2566, ZEND_CALLABLE_CONVERT_SPEC_UNUSED_UNUSED) \ - _(2567, ZEND_RECV_NOTYPE_SPEC) \ - _(2568, ZEND_JMP_FORWARD_SPEC) \ - _(2574, ZEND_ADD_LONG_NO_OVERFLOW_SPEC_TMPVARCV_CONST) \ - _(2575, ZEND_ADD_LONG_NO_OVERFLOW_SPEC_TMPVARCV_TMPVARCV) \ + _(2567, ZEND_BIND_INIT_STATIC_OR_JMP_SPEC_CV) \ + _(2568, ZEND_RECV_NOTYPE_SPEC) \ + _(2569, ZEND_JMP_FORWARD_SPEC) \ + _(2575, ZEND_ADD_LONG_NO_OVERFLOW_SPEC_TMPVARCV_CONST) \ _(2576, ZEND_ADD_LONG_NO_OVERFLOW_SPEC_TMPVARCV_TMPVARCV) \ - _(2578, ZEND_ADD_LONG_NO_OVERFLOW_SPEC_TMPVARCV_TMPVARCV) \ - _(2579, ZEND_ADD_LONG_NO_OVERFLOW_SPEC_TMPVARCV_CONST) \ - _(2580, ZEND_ADD_LONG_NO_OVERFLOW_SPEC_TMPVARCV_TMPVARCV) \ + _(2577, ZEND_ADD_LONG_NO_OVERFLOW_SPEC_TMPVARCV_TMPVARCV) \ + _(2579, ZEND_ADD_LONG_NO_OVERFLOW_SPEC_TMPVARCV_TMPVARCV) \ + _(2580, ZEND_ADD_LONG_NO_OVERFLOW_SPEC_TMPVARCV_CONST) \ _(2581, ZEND_ADD_LONG_NO_OVERFLOW_SPEC_TMPVARCV_TMPVARCV) \ - _(2583, ZEND_ADD_LONG_NO_OVERFLOW_SPEC_TMPVARCV_TMPVARCV) \ - _(2589, ZEND_ADD_LONG_NO_OVERFLOW_SPEC_TMPVARCV_CONST) \ - _(2590, ZEND_ADD_LONG_NO_OVERFLOW_SPEC_TMPVARCV_TMPVARCV) \ + _(2582, ZEND_ADD_LONG_NO_OVERFLOW_SPEC_TMPVARCV_TMPVARCV) \ + _(2584, ZEND_ADD_LONG_NO_OVERFLOW_SPEC_TMPVARCV_TMPVARCV) \ + _(2590, ZEND_ADD_LONG_NO_OVERFLOW_SPEC_TMPVARCV_CONST) \ _(2591, ZEND_ADD_LONG_NO_OVERFLOW_SPEC_TMPVARCV_TMPVARCV) \ - _(2593, ZEND_ADD_LONG_NO_OVERFLOW_SPEC_TMPVARCV_TMPVARCV) \ - _(2599, ZEND_ADD_LONG_SPEC_TMPVARCV_CONST) \ - _(2600, ZEND_ADD_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(2592, ZEND_ADD_LONG_NO_OVERFLOW_SPEC_TMPVARCV_TMPVARCV) \ + _(2594, ZEND_ADD_LONG_NO_OVERFLOW_SPEC_TMPVARCV_TMPVARCV) \ + _(2600, ZEND_ADD_LONG_SPEC_TMPVARCV_CONST) \ _(2601, ZEND_ADD_LONG_SPEC_TMPVARCV_TMPVARCV) \ - _(2603, ZEND_ADD_LONG_SPEC_TMPVARCV_TMPVARCV) \ - _(2604, ZEND_ADD_LONG_SPEC_TMPVARCV_CONST) \ - _(2605, ZEND_ADD_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(2602, ZEND_ADD_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(2604, ZEND_ADD_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(2605, ZEND_ADD_LONG_SPEC_TMPVARCV_CONST) \ _(2606, ZEND_ADD_LONG_SPEC_TMPVARCV_TMPVARCV) \ - _(2608, ZEND_ADD_LONG_SPEC_TMPVARCV_TMPVARCV) \ - _(2614, ZEND_ADD_LONG_SPEC_TMPVARCV_CONST) \ - _(2615, ZEND_ADD_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(2607, ZEND_ADD_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(2609, ZEND_ADD_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(2615, ZEND_ADD_LONG_SPEC_TMPVARCV_CONST) \ _(2616, ZEND_ADD_LONG_SPEC_TMPVARCV_TMPVARCV) \ - _(2618, ZEND_ADD_LONG_SPEC_TMPVARCV_TMPVARCV) \ - _(2624, ZEND_ADD_DOUBLE_SPEC_TMPVARCV_CONST) \ - _(2625, ZEND_ADD_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(2617, ZEND_ADD_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(2619, ZEND_ADD_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(2625, ZEND_ADD_DOUBLE_SPEC_TMPVARCV_CONST) \ _(2626, ZEND_ADD_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ - _(2628, ZEND_ADD_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ - _(2629, ZEND_ADD_DOUBLE_SPEC_TMPVARCV_CONST) \ - _(2630, ZEND_ADD_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(2627, ZEND_ADD_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(2629, ZEND_ADD_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(2630, ZEND_ADD_DOUBLE_SPEC_TMPVARCV_CONST) \ _(2631, ZEND_ADD_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ - _(2633, ZEND_ADD_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ - _(2639, ZEND_ADD_DOUBLE_SPEC_TMPVARCV_CONST) \ - _(2640, ZEND_ADD_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(2632, ZEND_ADD_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(2634, ZEND_ADD_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(2640, ZEND_ADD_DOUBLE_SPEC_TMPVARCV_CONST) \ _(2641, ZEND_ADD_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ - _(2643, ZEND_ADD_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ - _(2645, ZEND_SUB_LONG_NO_OVERFLOW_SPEC_CONST_TMPVARCV) \ + _(2642, ZEND_ADD_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(2644, ZEND_ADD_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ _(2646, ZEND_SUB_LONG_NO_OVERFLOW_SPEC_CONST_TMPVARCV) \ - _(2648, ZEND_SUB_LONG_NO_OVERFLOW_SPEC_CONST_TMPVARCV) \ - _(2649, ZEND_SUB_LONG_NO_OVERFLOW_SPEC_TMPVARCV_CONST) \ - _(2650, ZEND_SUB_LONG_NO_OVERFLOW_SPEC_TMPVARCV_TMPVARCV) \ + _(2647, ZEND_SUB_LONG_NO_OVERFLOW_SPEC_CONST_TMPVARCV) \ + _(2649, ZEND_SUB_LONG_NO_OVERFLOW_SPEC_CONST_TMPVARCV) \ + _(2650, ZEND_SUB_LONG_NO_OVERFLOW_SPEC_TMPVARCV_CONST) \ _(2651, ZEND_SUB_LONG_NO_OVERFLOW_SPEC_TMPVARCV_TMPVARCV) \ - _(2653, ZEND_SUB_LONG_NO_OVERFLOW_SPEC_TMPVARCV_TMPVARCV) \ - _(2654, ZEND_SUB_LONG_NO_OVERFLOW_SPEC_TMPVARCV_CONST) \ - _(2655, ZEND_SUB_LONG_NO_OVERFLOW_SPEC_TMPVARCV_TMPVARCV) \ + _(2652, ZEND_SUB_LONG_NO_OVERFLOW_SPEC_TMPVARCV_TMPVARCV) \ + _(2654, ZEND_SUB_LONG_NO_OVERFLOW_SPEC_TMPVARCV_TMPVARCV) \ + _(2655, ZEND_SUB_LONG_NO_OVERFLOW_SPEC_TMPVARCV_CONST) \ _(2656, ZEND_SUB_LONG_NO_OVERFLOW_SPEC_TMPVARCV_TMPVARCV) \ - _(2658, ZEND_SUB_LONG_NO_OVERFLOW_SPEC_TMPVARCV_TMPVARCV) \ - _(2664, ZEND_SUB_LONG_NO_OVERFLOW_SPEC_TMPVARCV_CONST) \ - _(2665, ZEND_SUB_LONG_NO_OVERFLOW_SPEC_TMPVARCV_TMPVARCV) \ + _(2657, ZEND_SUB_LONG_NO_OVERFLOW_SPEC_TMPVARCV_TMPVARCV) \ + _(2659, ZEND_SUB_LONG_NO_OVERFLOW_SPEC_TMPVARCV_TMPVARCV) \ + _(2665, ZEND_SUB_LONG_NO_OVERFLOW_SPEC_TMPVARCV_CONST) \ _(2666, ZEND_SUB_LONG_NO_OVERFLOW_SPEC_TMPVARCV_TMPVARCV) \ - _(2668, ZEND_SUB_LONG_NO_OVERFLOW_SPEC_TMPVARCV_TMPVARCV) \ - _(2670, ZEND_SUB_LONG_SPEC_CONST_TMPVARCV) \ + _(2667, ZEND_SUB_LONG_NO_OVERFLOW_SPEC_TMPVARCV_TMPVARCV) \ + _(2669, ZEND_SUB_LONG_NO_OVERFLOW_SPEC_TMPVARCV_TMPVARCV) \ _(2671, ZEND_SUB_LONG_SPEC_CONST_TMPVARCV) \ - _(2673, ZEND_SUB_LONG_SPEC_CONST_TMPVARCV) \ - _(2674, ZEND_SUB_LONG_SPEC_TMPVARCV_CONST) \ - _(2675, ZEND_SUB_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(2672, ZEND_SUB_LONG_SPEC_CONST_TMPVARCV) \ + _(2674, ZEND_SUB_LONG_SPEC_CONST_TMPVARCV) \ + _(2675, ZEND_SUB_LONG_SPEC_TMPVARCV_CONST) \ _(2676, ZEND_SUB_LONG_SPEC_TMPVARCV_TMPVARCV) \ - _(2678, ZEND_SUB_LONG_SPEC_TMPVARCV_TMPVARCV) \ - _(2679, ZEND_SUB_LONG_SPEC_TMPVARCV_CONST) \ - _(2680, ZEND_SUB_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(2677, ZEND_SUB_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(2679, ZEND_SUB_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(2680, ZEND_SUB_LONG_SPEC_TMPVARCV_CONST) \ _(2681, ZEND_SUB_LONG_SPEC_TMPVARCV_TMPVARCV) \ - _(2683, ZEND_SUB_LONG_SPEC_TMPVARCV_TMPVARCV) \ - _(2689, ZEND_SUB_LONG_SPEC_TMPVARCV_CONST) \ - _(2690, ZEND_SUB_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(2682, ZEND_SUB_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(2684, ZEND_SUB_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(2690, ZEND_SUB_LONG_SPEC_TMPVARCV_CONST) \ _(2691, ZEND_SUB_LONG_SPEC_TMPVARCV_TMPVARCV) \ - _(2693, ZEND_SUB_LONG_SPEC_TMPVARCV_TMPVARCV) \ - _(2695, ZEND_SUB_DOUBLE_SPEC_CONST_TMPVARCV) \ + _(2692, ZEND_SUB_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(2694, ZEND_SUB_LONG_SPEC_TMPVARCV_TMPVARCV) \ _(2696, ZEND_SUB_DOUBLE_SPEC_CONST_TMPVARCV) \ - _(2698, ZEND_SUB_DOUBLE_SPEC_CONST_TMPVARCV) \ - _(2699, ZEND_SUB_DOUBLE_SPEC_TMPVARCV_CONST) \ - _(2700, ZEND_SUB_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(2697, ZEND_SUB_DOUBLE_SPEC_CONST_TMPVARCV) \ + _(2699, ZEND_SUB_DOUBLE_SPEC_CONST_TMPVARCV) \ + _(2700, ZEND_SUB_DOUBLE_SPEC_TMPVARCV_CONST) \ _(2701, ZEND_SUB_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ - _(2703, ZEND_SUB_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ - _(2704, ZEND_SUB_DOUBLE_SPEC_TMPVARCV_CONST) \ - _(2705, ZEND_SUB_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(2702, ZEND_SUB_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(2704, ZEND_SUB_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(2705, ZEND_SUB_DOUBLE_SPEC_TMPVARCV_CONST) \ _(2706, ZEND_SUB_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ - _(2708, ZEND_SUB_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ - _(2714, ZEND_SUB_DOUBLE_SPEC_TMPVARCV_CONST) \ - _(2715, ZEND_SUB_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(2707, ZEND_SUB_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(2709, ZEND_SUB_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(2715, ZEND_SUB_DOUBLE_SPEC_TMPVARCV_CONST) \ _(2716, ZEND_SUB_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ - _(2718, ZEND_SUB_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ - _(2724, ZEND_MUL_LONG_NO_OVERFLOW_SPEC_TMPVARCV_CONST) \ - _(2725, ZEND_MUL_LONG_NO_OVERFLOW_SPEC_TMPVARCV_TMPVARCV) \ + _(2717, ZEND_SUB_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(2719, ZEND_SUB_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(2725, ZEND_MUL_LONG_NO_OVERFLOW_SPEC_TMPVARCV_CONST) \ _(2726, ZEND_MUL_LONG_NO_OVERFLOW_SPEC_TMPVARCV_TMPVARCV) \ - _(2728, ZEND_MUL_LONG_NO_OVERFLOW_SPEC_TMPVARCV_TMPVARCV) \ - _(2729, ZEND_MUL_LONG_NO_OVERFLOW_SPEC_TMPVARCV_CONST) \ - _(2730, ZEND_MUL_LONG_NO_OVERFLOW_SPEC_TMPVARCV_TMPVARCV) \ + _(2727, ZEND_MUL_LONG_NO_OVERFLOW_SPEC_TMPVARCV_TMPVARCV) \ + _(2729, ZEND_MUL_LONG_NO_OVERFLOW_SPEC_TMPVARCV_TMPVARCV) \ + _(2730, ZEND_MUL_LONG_NO_OVERFLOW_SPEC_TMPVARCV_CONST) \ _(2731, ZEND_MUL_LONG_NO_OVERFLOW_SPEC_TMPVARCV_TMPVARCV) \ - _(2733, ZEND_MUL_LONG_NO_OVERFLOW_SPEC_TMPVARCV_TMPVARCV) \ - _(2739, ZEND_MUL_LONG_NO_OVERFLOW_SPEC_TMPVARCV_CONST) \ - _(2740, ZEND_MUL_LONG_NO_OVERFLOW_SPEC_TMPVARCV_TMPVARCV) \ + _(2732, ZEND_MUL_LONG_NO_OVERFLOW_SPEC_TMPVARCV_TMPVARCV) \ + _(2734, ZEND_MUL_LONG_NO_OVERFLOW_SPEC_TMPVARCV_TMPVARCV) \ + _(2740, ZEND_MUL_LONG_NO_OVERFLOW_SPEC_TMPVARCV_CONST) \ _(2741, ZEND_MUL_LONG_NO_OVERFLOW_SPEC_TMPVARCV_TMPVARCV) \ - _(2743, ZEND_MUL_LONG_NO_OVERFLOW_SPEC_TMPVARCV_TMPVARCV) \ - _(2749, ZEND_MUL_LONG_SPEC_TMPVARCV_CONST) \ - _(2750, ZEND_MUL_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(2742, ZEND_MUL_LONG_NO_OVERFLOW_SPEC_TMPVARCV_TMPVARCV) \ + _(2744, ZEND_MUL_LONG_NO_OVERFLOW_SPEC_TMPVARCV_TMPVARCV) \ + _(2750, ZEND_MUL_LONG_SPEC_TMPVARCV_CONST) \ _(2751, ZEND_MUL_LONG_SPEC_TMPVARCV_TMPVARCV) \ - _(2753, ZEND_MUL_LONG_SPEC_TMPVARCV_TMPVARCV) \ - _(2754, ZEND_MUL_LONG_SPEC_TMPVARCV_CONST) \ - _(2755, ZEND_MUL_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(2752, ZEND_MUL_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(2754, ZEND_MUL_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(2755, ZEND_MUL_LONG_SPEC_TMPVARCV_CONST) \ _(2756, ZEND_MUL_LONG_SPEC_TMPVARCV_TMPVARCV) \ - _(2758, ZEND_MUL_LONG_SPEC_TMPVARCV_TMPVARCV) \ - _(2764, ZEND_MUL_LONG_SPEC_TMPVARCV_CONST) \ - _(2765, ZEND_MUL_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(2757, ZEND_MUL_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(2759, ZEND_MUL_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(2765, ZEND_MUL_LONG_SPEC_TMPVARCV_CONST) \ _(2766, ZEND_MUL_LONG_SPEC_TMPVARCV_TMPVARCV) \ - _(2768, ZEND_MUL_LONG_SPEC_TMPVARCV_TMPVARCV) \ - _(2774, ZEND_MUL_DOUBLE_SPEC_TMPVARCV_CONST) \ - _(2775, ZEND_MUL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(2767, ZEND_MUL_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(2769, ZEND_MUL_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(2775, ZEND_MUL_DOUBLE_SPEC_TMPVARCV_CONST) \ _(2776, ZEND_MUL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ - _(2778, ZEND_MUL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ - _(2779, ZEND_MUL_DOUBLE_SPEC_TMPVARCV_CONST) \ - _(2780, ZEND_MUL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(2777, ZEND_MUL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(2779, ZEND_MUL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(2780, ZEND_MUL_DOUBLE_SPEC_TMPVARCV_CONST) \ _(2781, ZEND_MUL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ - _(2783, ZEND_MUL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ - _(2789, ZEND_MUL_DOUBLE_SPEC_TMPVARCV_CONST) \ - _(2790, ZEND_MUL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(2782, ZEND_MUL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(2784, ZEND_MUL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(2790, ZEND_MUL_DOUBLE_SPEC_TMPVARCV_CONST) \ _(2791, ZEND_MUL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ - _(2793, ZEND_MUL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ - _(2809, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_CONST) \ - _(2810, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_CONST_JMPZ) \ - _(2811, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_CONST_JMPNZ) \ - _(2812, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV) \ - _(2813, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(2814, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(2815, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV) \ - _(2816, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(2817, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(2821, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV) \ - _(2822, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(2823, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(2824, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_CONST) \ - _(2825, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_CONST_JMPZ) \ - _(2826, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_CONST_JMPNZ) \ - _(2827, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV) \ - _(2828, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(2829, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(2830, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV) \ - _(2831, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(2832, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(2836, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV) \ - _(2837, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(2838, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(2854, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_CONST) \ - _(2855, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_CONST_JMPZ) \ - _(2856, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_CONST_JMPNZ) \ - _(2857, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV) \ - _(2858, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(2859, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(2860, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV) \ - _(2861, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(2862, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(2866, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV) \ - _(2867, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(2868, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(2884, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_CONST) \ - _(2885, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_CONST_JMPZ) \ - _(2886, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_CONST_JMPNZ) \ - _(2887, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ - _(2888, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(2889, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(2890, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ - _(2891, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(2892, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(2896, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ - _(2897, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(2898, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(2899, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_CONST) \ - _(2900, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_CONST_JMPZ) \ - _(2901, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_CONST_JMPNZ) \ - _(2902, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ - _(2903, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(2904, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(2905, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ - _(2906, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(2907, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(2911, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ - _(2912, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(2913, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(2929, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_CONST) \ - _(2930, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_CONST_JMPZ) \ - _(2931, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_CONST_JMPNZ) \ - _(2932, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ - _(2933, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(2934, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(2935, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ - _(2936, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(2937, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(2941, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ - _(2942, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(2943, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(2959, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_CONST) \ - _(2960, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_CONST_JMPZ) \ - _(2961, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_CONST_JMPNZ) \ - _(2962, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV) \ - _(2963, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(2964, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(2965, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV) \ - _(2966, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(2967, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(2971, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV) \ - _(2972, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(2973, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(2974, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_CONST) \ - _(2975, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_CONST_JMPZ) \ - _(2976, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_CONST_JMPNZ) \ - _(2977, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV) \ - _(2978, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(2979, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(2980, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV) \ - _(2981, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(2982, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(2986, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV) \ - _(2987, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(2988, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(3004, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_CONST) \ - _(3005, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_CONST_JMPZ) \ - _(3006, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_CONST_JMPNZ) \ - _(3007, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV) \ - _(3008, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(3009, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(3010, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV) \ - _(3011, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(3012, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(3016, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV) \ - _(3017, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(3018, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(3034, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_CONST) \ - _(3035, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_CONST_JMPZ) \ - _(3036, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_CONST_JMPNZ) \ - _(3037, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ - _(3038, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(3039, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(3040, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ - _(3041, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(3042, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(3046, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ - _(3047, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(3048, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(3049, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_CONST) \ - _(3050, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_CONST_JMPZ) \ - _(3051, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_CONST_JMPNZ) \ - _(3052, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ - _(3053, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(3054, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(3055, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ - _(3056, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(3057, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(3061, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ - _(3062, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(3063, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(3079, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_CONST) \ - _(3080, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_CONST_JMPZ) \ - _(3081, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_CONST_JMPNZ) \ - _(3082, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ - _(3083, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(3084, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(3085, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ - _(3086, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(3087, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(3091, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ - _(3092, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(3093, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(3094, ZEND_IS_IDENTICAL_NOTHROW_SPEC_CV_CONST) \ - _(3098, ZEND_IS_IDENTICAL_NOTHROW_SPEC_CV_CV) \ - _(3099, ZEND_IS_NOT_IDENTICAL_NOTHROW_SPEC_CV_CONST) \ - _(3103, ZEND_IS_NOT_IDENTICAL_NOTHROW_SPEC_CV_CV) \ - _(3107, ZEND_IS_SMALLER_LONG_SPEC_CONST_TMPVARCV) \ - _(3108, ZEND_IS_SMALLER_LONG_SPEC_CONST_TMPVARCV_JMPZ) \ - _(3109, ZEND_IS_SMALLER_LONG_SPEC_CONST_TMPVARCV_JMPNZ) \ - _(3110, ZEND_IS_SMALLER_LONG_SPEC_CONST_TMPVARCV) \ - _(3111, ZEND_IS_SMALLER_LONG_SPEC_CONST_TMPVARCV_JMPZ) \ - _(3112, ZEND_IS_SMALLER_LONG_SPEC_CONST_TMPVARCV_JMPNZ) \ - _(3116, ZEND_IS_SMALLER_LONG_SPEC_CONST_TMPVARCV) \ - _(3117, ZEND_IS_SMALLER_LONG_SPEC_CONST_TMPVARCV_JMPZ) \ - _(3118, ZEND_IS_SMALLER_LONG_SPEC_CONST_TMPVARCV_JMPNZ) \ - _(3119, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_CONST) \ - _(3120, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_CONST_JMPZ) \ - _(3121, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_CONST_JMPNZ) \ - _(3122, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_TMPVARCV) \ - _(3123, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(3124, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(3125, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_TMPVARCV) \ - _(3126, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(3127, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(3131, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_TMPVARCV) \ - _(3132, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(3133, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(3134, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_CONST) \ - _(3135, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_CONST_JMPZ) \ - _(3136, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_CONST_JMPNZ) \ - _(3137, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_TMPVARCV) \ - _(3138, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(3139, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(3140, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_TMPVARCV) \ - _(3141, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(3142, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(3146, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_TMPVARCV) \ - _(3147, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(3148, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(3164, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_CONST) \ - _(3165, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_CONST_JMPZ) \ - _(3166, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_CONST_JMPNZ) \ - _(3167, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_TMPVARCV) \ - _(3168, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(3169, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(3170, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_TMPVARCV) \ - _(3171, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(3172, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(3176, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_TMPVARCV) \ - _(3177, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(3178, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(3182, ZEND_IS_SMALLER_DOUBLE_SPEC_CONST_TMPVARCV) \ - _(3183, ZEND_IS_SMALLER_DOUBLE_SPEC_CONST_TMPVARCV_JMPZ) \ - _(3184, ZEND_IS_SMALLER_DOUBLE_SPEC_CONST_TMPVARCV_JMPNZ) \ - _(3185, ZEND_IS_SMALLER_DOUBLE_SPEC_CONST_TMPVARCV) \ - _(3186, ZEND_IS_SMALLER_DOUBLE_SPEC_CONST_TMPVARCV_JMPZ) \ - _(3187, ZEND_IS_SMALLER_DOUBLE_SPEC_CONST_TMPVARCV_JMPNZ) \ - _(3191, ZEND_IS_SMALLER_DOUBLE_SPEC_CONST_TMPVARCV) \ - _(3192, ZEND_IS_SMALLER_DOUBLE_SPEC_CONST_TMPVARCV_JMPZ) \ - _(3193, ZEND_IS_SMALLER_DOUBLE_SPEC_CONST_TMPVARCV_JMPNZ) \ - _(3194, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_CONST) \ - _(3195, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_CONST_JMPZ) \ - _(3196, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_CONST_JMPNZ) \ - _(3197, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ - _(3198, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(3199, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(3200, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ - _(3201, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(3202, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(3206, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ - _(3207, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(3208, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(3209, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_CONST) \ - _(3210, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_CONST_JMPZ) \ - _(3211, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_CONST_JMPNZ) \ - _(3212, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ - _(3213, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(3214, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(3215, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ - _(3216, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(3217, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(3221, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ - _(3222, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(3223, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(3239, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_CONST) \ - _(3240, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_CONST_JMPZ) \ - _(3241, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_CONST_JMPNZ) \ - _(3242, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ - _(3243, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(3244, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(3245, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ - _(3246, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(3247, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(3251, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ - _(3252, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(3253, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(3257, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_CONST_TMPVARCV) \ - _(3258, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_CONST_TMPVARCV_JMPZ) \ - _(3259, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_CONST_TMPVARCV_JMPNZ) \ - _(3260, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_CONST_TMPVARCV) \ - _(3261, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_CONST_TMPVARCV_JMPZ) \ - _(3262, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_CONST_TMPVARCV_JMPNZ) \ - _(3266, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_CONST_TMPVARCV) \ - _(3267, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_CONST_TMPVARCV_JMPZ) \ - _(3268, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_CONST_TMPVARCV_JMPNZ) \ - _(3269, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_CONST) \ - _(3270, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_CONST_JMPZ) \ - _(3271, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_CONST_JMPNZ) \ - _(3272, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV) \ - _(3273, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(3274, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(3275, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV) \ - _(3276, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(3277, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(3281, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV) \ - _(3282, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(3283, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(3284, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_CONST) \ - _(3285, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_CONST_JMPZ) \ - _(3286, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_CONST_JMPNZ) \ - _(3287, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV) \ - _(3288, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(3289, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(3290, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV) \ - _(3291, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(3292, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(3296, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV) \ - _(3297, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(3298, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(3314, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_CONST) \ - _(3315, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_CONST_JMPZ) \ - _(3316, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_CONST_JMPNZ) \ - _(3317, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV) \ - _(3318, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(3319, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(3320, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV) \ - _(3321, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(3322, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(3326, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV) \ - _(3327, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(3328, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(3332, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_CONST_TMPVARCV) \ - _(3333, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_CONST_TMPVARCV_JMPZ) \ - _(3334, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_CONST_TMPVARCV_JMPNZ) \ - _(3335, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_CONST_TMPVARCV) \ - _(3336, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_CONST_TMPVARCV_JMPZ) \ - _(3337, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_CONST_TMPVARCV_JMPNZ) \ - _(3341, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_CONST_TMPVARCV) \ - _(3342, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_CONST_TMPVARCV_JMPZ) \ - _(3343, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_CONST_TMPVARCV_JMPNZ) \ - _(3344, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_CONST) \ - _(3345, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_CONST_JMPZ) \ - _(3346, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_CONST_JMPNZ) \ - _(3347, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ - _(3348, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(3349, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(3350, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ - _(3351, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(3352, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(3356, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ - _(3357, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(3358, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(3359, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_CONST) \ - _(3360, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_CONST_JMPZ) \ - _(3361, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_CONST_JMPNZ) \ - _(3362, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ - _(3363, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(3364, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(3365, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ - _(3366, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(3367, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(3371, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ - _(3372, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(3373, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(3389, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_CONST) \ - _(3390, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_CONST_JMPZ) \ - _(3391, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_CONST_JMPNZ) \ - _(3392, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ - _(3393, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(3394, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(3395, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ - _(3396, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(3397, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(3401, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ - _(3402, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ - _(3403, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ - _(3404, ZEND_PRE_INC_LONG_NO_OVERFLOW_SPEC_CV_RETVAL_UNUSED) \ - _(3405, ZEND_PRE_INC_LONG_NO_OVERFLOW_SPEC_CV_RETVAL_USED) \ - _(3406, ZEND_PRE_INC_LONG_SPEC_CV_RETVAL_UNUSED) \ - _(3407, ZEND_PRE_INC_LONG_SPEC_CV_RETVAL_USED) \ - _(3408, ZEND_PRE_DEC_LONG_NO_OVERFLOW_SPEC_CV_RETVAL_UNUSED) \ - _(3409, ZEND_PRE_DEC_LONG_NO_OVERFLOW_SPEC_CV_RETVAL_USED) \ - _(3410, ZEND_PRE_DEC_LONG_SPEC_CV_RETVAL_UNUSED) \ - _(3411, ZEND_PRE_DEC_LONG_SPEC_CV_RETVAL_USED) \ - _(3412, ZEND_POST_INC_LONG_NO_OVERFLOW_SPEC_CV) \ - _(3413, ZEND_POST_INC_LONG_SPEC_CV) \ - _(3414, ZEND_POST_DEC_LONG_NO_OVERFLOW_SPEC_CV) \ - _(3415, ZEND_POST_DEC_LONG_SPEC_CV) \ - _(3416, ZEND_QM_ASSIGN_LONG_SPEC_CONST) \ - _(3417, ZEND_QM_ASSIGN_LONG_SPEC_TMPVARCV) \ + _(2792, ZEND_MUL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(2794, ZEND_MUL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(2810, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_CONST) \ + _(2811, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_CONST_JMPZ) \ + _(2812, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_CONST_JMPNZ) \ + _(2813, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(2814, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(2815, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(2816, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(2817, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(2818, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(2822, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(2823, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(2824, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(2825, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_CONST) \ + _(2826, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_CONST_JMPZ) \ + _(2827, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_CONST_JMPNZ) \ + _(2828, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(2829, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(2830, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(2831, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(2832, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(2833, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(2837, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(2838, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(2839, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(2855, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_CONST) \ + _(2856, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_CONST_JMPZ) \ + _(2857, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_CONST_JMPNZ) \ + _(2858, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(2859, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(2860, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(2861, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(2862, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(2863, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(2867, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(2868, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(2869, ZEND_IS_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(2885, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_CONST) \ + _(2886, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_CONST_JMPZ) \ + _(2887, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_CONST_JMPNZ) \ + _(2888, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(2889, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(2890, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(2891, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(2892, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(2893, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(2897, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(2898, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(2899, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(2900, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_CONST) \ + _(2901, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_CONST_JMPZ) \ + _(2902, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_CONST_JMPNZ) \ + _(2903, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(2904, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(2905, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(2906, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(2907, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(2908, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(2912, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(2913, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(2914, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(2930, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_CONST) \ + _(2931, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_CONST_JMPZ) \ + _(2932, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_CONST_JMPNZ) \ + _(2933, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(2934, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(2935, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(2936, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(2937, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(2938, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(2942, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(2943, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(2944, ZEND_IS_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(2960, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_CONST) \ + _(2961, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_CONST_JMPZ) \ + _(2962, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_CONST_JMPNZ) \ + _(2963, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(2964, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(2965, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(2966, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(2967, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(2968, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(2972, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(2973, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(2974, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(2975, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_CONST) \ + _(2976, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_CONST_JMPZ) \ + _(2977, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_CONST_JMPNZ) \ + _(2978, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(2979, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(2980, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(2981, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(2982, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(2983, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(2987, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(2988, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(2989, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(3005, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_CONST) \ + _(3006, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_CONST_JMPZ) \ + _(3007, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_CONST_JMPNZ) \ + _(3008, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(3009, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(3010, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(3011, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(3012, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(3013, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(3017, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(3018, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(3019, ZEND_IS_NOT_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(3035, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_CONST) \ + _(3036, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_CONST_JMPZ) \ + _(3037, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_CONST_JMPNZ) \ + _(3038, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(3039, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(3040, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(3041, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(3042, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(3043, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(3047, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(3048, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(3049, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(3050, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_CONST) \ + _(3051, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_CONST_JMPZ) \ + _(3052, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_CONST_JMPNZ) \ + _(3053, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(3054, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(3055, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(3056, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(3057, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(3058, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(3062, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(3063, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(3064, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(3080, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_CONST) \ + _(3081, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_CONST_JMPZ) \ + _(3082, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_CONST_JMPNZ) \ + _(3083, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(3084, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(3085, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(3086, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(3087, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(3088, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(3092, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(3093, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(3094, ZEND_IS_NOT_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(3095, ZEND_IS_IDENTICAL_NOTHROW_SPEC_CV_CONST) \ + _(3099, ZEND_IS_IDENTICAL_NOTHROW_SPEC_CV_CV) \ + _(3100, ZEND_IS_NOT_IDENTICAL_NOTHROW_SPEC_CV_CONST) \ + _(3104, ZEND_IS_NOT_IDENTICAL_NOTHROW_SPEC_CV_CV) \ + _(3108, ZEND_IS_SMALLER_LONG_SPEC_CONST_TMPVARCV) \ + _(3109, ZEND_IS_SMALLER_LONG_SPEC_CONST_TMPVARCV_JMPZ) \ + _(3110, ZEND_IS_SMALLER_LONG_SPEC_CONST_TMPVARCV_JMPNZ) \ + _(3111, ZEND_IS_SMALLER_LONG_SPEC_CONST_TMPVARCV) \ + _(3112, ZEND_IS_SMALLER_LONG_SPEC_CONST_TMPVARCV_JMPZ) \ + _(3113, ZEND_IS_SMALLER_LONG_SPEC_CONST_TMPVARCV_JMPNZ) \ + _(3117, ZEND_IS_SMALLER_LONG_SPEC_CONST_TMPVARCV) \ + _(3118, ZEND_IS_SMALLER_LONG_SPEC_CONST_TMPVARCV_JMPZ) \ + _(3119, ZEND_IS_SMALLER_LONG_SPEC_CONST_TMPVARCV_JMPNZ) \ + _(3120, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_CONST) \ + _(3121, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_CONST_JMPZ) \ + _(3122, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_CONST_JMPNZ) \ + _(3123, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(3124, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(3125, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(3126, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(3127, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(3128, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(3132, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(3133, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(3134, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(3135, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_CONST) \ + _(3136, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_CONST_JMPZ) \ + _(3137, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_CONST_JMPNZ) \ + _(3138, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(3139, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(3140, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(3141, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(3142, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(3143, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(3147, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(3148, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(3149, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(3165, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_CONST) \ + _(3166, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_CONST_JMPZ) \ + _(3167, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_CONST_JMPNZ) \ + _(3168, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(3169, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(3170, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(3171, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(3172, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(3173, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(3177, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(3178, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(3179, ZEND_IS_SMALLER_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(3183, ZEND_IS_SMALLER_DOUBLE_SPEC_CONST_TMPVARCV) \ + _(3184, ZEND_IS_SMALLER_DOUBLE_SPEC_CONST_TMPVARCV_JMPZ) \ + _(3185, ZEND_IS_SMALLER_DOUBLE_SPEC_CONST_TMPVARCV_JMPNZ) \ + _(3186, ZEND_IS_SMALLER_DOUBLE_SPEC_CONST_TMPVARCV) \ + _(3187, ZEND_IS_SMALLER_DOUBLE_SPEC_CONST_TMPVARCV_JMPZ) \ + _(3188, ZEND_IS_SMALLER_DOUBLE_SPEC_CONST_TMPVARCV_JMPNZ) \ + _(3192, ZEND_IS_SMALLER_DOUBLE_SPEC_CONST_TMPVARCV) \ + _(3193, ZEND_IS_SMALLER_DOUBLE_SPEC_CONST_TMPVARCV_JMPZ) \ + _(3194, ZEND_IS_SMALLER_DOUBLE_SPEC_CONST_TMPVARCV_JMPNZ) \ + _(3195, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_CONST) \ + _(3196, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_CONST_JMPZ) \ + _(3197, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_CONST_JMPNZ) \ + _(3198, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(3199, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(3200, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(3201, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(3202, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(3203, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(3207, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(3208, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(3209, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(3210, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_CONST) \ + _(3211, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_CONST_JMPZ) \ + _(3212, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_CONST_JMPNZ) \ + _(3213, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(3214, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(3215, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(3216, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(3217, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(3218, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(3222, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(3223, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(3224, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(3240, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_CONST) \ + _(3241, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_CONST_JMPZ) \ + _(3242, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_CONST_JMPNZ) \ + _(3243, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(3244, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(3245, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(3246, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(3247, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(3248, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(3252, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(3253, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(3254, ZEND_IS_SMALLER_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(3258, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_CONST_TMPVARCV) \ + _(3259, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_CONST_TMPVARCV_JMPZ) \ + _(3260, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_CONST_TMPVARCV_JMPNZ) \ + _(3261, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_CONST_TMPVARCV) \ + _(3262, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_CONST_TMPVARCV_JMPZ) \ + _(3263, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_CONST_TMPVARCV_JMPNZ) \ + _(3267, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_CONST_TMPVARCV) \ + _(3268, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_CONST_TMPVARCV_JMPZ) \ + _(3269, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_CONST_TMPVARCV_JMPNZ) \ + _(3270, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_CONST) \ + _(3271, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_CONST_JMPZ) \ + _(3272, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_CONST_JMPNZ) \ + _(3273, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(3274, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(3275, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(3276, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(3277, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(3278, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(3282, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(3283, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(3284, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(3285, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_CONST) \ + _(3286, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_CONST_JMPZ) \ + _(3287, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_CONST_JMPNZ) \ + _(3288, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(3289, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(3290, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(3291, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(3292, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(3293, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(3297, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(3298, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(3299, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(3315, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_CONST) \ + _(3316, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_CONST_JMPZ) \ + _(3317, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_CONST_JMPNZ) \ + _(3318, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(3319, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(3320, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(3321, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(3322, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(3323, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(3327, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV) \ + _(3328, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(3329, ZEND_IS_SMALLER_OR_EQUAL_LONG_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(3333, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_CONST_TMPVARCV) \ + _(3334, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_CONST_TMPVARCV_JMPZ) \ + _(3335, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_CONST_TMPVARCV_JMPNZ) \ + _(3336, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_CONST_TMPVARCV) \ + _(3337, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_CONST_TMPVARCV_JMPZ) \ + _(3338, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_CONST_TMPVARCV_JMPNZ) \ + _(3342, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_CONST_TMPVARCV) \ + _(3343, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_CONST_TMPVARCV_JMPZ) \ + _(3344, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_CONST_TMPVARCV_JMPNZ) \ + _(3345, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_CONST) \ + _(3346, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_CONST_JMPZ) \ + _(3347, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_CONST_JMPNZ) \ + _(3348, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(3349, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(3350, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(3351, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(3352, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(3353, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(3357, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(3358, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(3359, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(3360, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_CONST) \ + _(3361, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_CONST_JMPZ) \ + _(3362, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_CONST_JMPNZ) \ + _(3363, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(3364, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(3365, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(3366, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(3367, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(3368, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(3372, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(3373, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(3374, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(3390, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_CONST) \ + _(3391, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_CONST_JMPZ) \ + _(3392, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_CONST_JMPNZ) \ + _(3393, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(3394, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(3395, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(3396, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(3397, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(3398, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(3402, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV) \ + _(3403, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPZ) \ + _(3404, ZEND_IS_SMALLER_OR_EQUAL_DOUBLE_SPEC_TMPVARCV_TMPVARCV_JMPNZ) \ + _(3405, ZEND_PRE_INC_LONG_NO_OVERFLOW_SPEC_CV_RETVAL_UNUSED) \ + _(3406, ZEND_PRE_INC_LONG_NO_OVERFLOW_SPEC_CV_RETVAL_USED) \ + _(3407, ZEND_PRE_INC_LONG_SPEC_CV_RETVAL_UNUSED) \ + _(3408, ZEND_PRE_INC_LONG_SPEC_CV_RETVAL_USED) \ + _(3409, ZEND_PRE_DEC_LONG_NO_OVERFLOW_SPEC_CV_RETVAL_UNUSED) \ + _(3410, ZEND_PRE_DEC_LONG_NO_OVERFLOW_SPEC_CV_RETVAL_USED) \ + _(3411, ZEND_PRE_DEC_LONG_SPEC_CV_RETVAL_UNUSED) \ + _(3412, ZEND_PRE_DEC_LONG_SPEC_CV_RETVAL_USED) \ + _(3413, ZEND_POST_INC_LONG_NO_OVERFLOW_SPEC_CV) \ + _(3414, ZEND_POST_INC_LONG_SPEC_CV) \ + _(3415, ZEND_POST_DEC_LONG_NO_OVERFLOW_SPEC_CV) \ + _(3416, ZEND_POST_DEC_LONG_SPEC_CV) \ + _(3417, ZEND_QM_ASSIGN_LONG_SPEC_CONST) \ _(3418, ZEND_QM_ASSIGN_LONG_SPEC_TMPVARCV) \ - _(3420, ZEND_QM_ASSIGN_LONG_SPEC_TMPVARCV) \ - _(3421, ZEND_QM_ASSIGN_DOUBLE_SPEC_CONST) \ - _(3422, ZEND_QM_ASSIGN_DOUBLE_SPEC_TMPVARCV) \ + _(3419, ZEND_QM_ASSIGN_LONG_SPEC_TMPVARCV) \ + _(3421, ZEND_QM_ASSIGN_LONG_SPEC_TMPVARCV) \ + _(3422, ZEND_QM_ASSIGN_DOUBLE_SPEC_CONST) \ _(3423, ZEND_QM_ASSIGN_DOUBLE_SPEC_TMPVARCV) \ - _(3425, ZEND_QM_ASSIGN_DOUBLE_SPEC_TMPVARCV) \ - _(3426, ZEND_QM_ASSIGN_NOREF_SPEC_CONST) \ - _(3427, ZEND_QM_ASSIGN_NOREF_SPEC_TMPVARCV) \ + _(3424, ZEND_QM_ASSIGN_DOUBLE_SPEC_TMPVARCV) \ + _(3426, ZEND_QM_ASSIGN_DOUBLE_SPEC_TMPVARCV) \ + _(3427, ZEND_QM_ASSIGN_NOREF_SPEC_CONST) \ _(3428, ZEND_QM_ASSIGN_NOREF_SPEC_TMPVARCV) \ - _(3430, ZEND_QM_ASSIGN_NOREF_SPEC_TMPVARCV) \ - _(3432, ZEND_FETCH_DIM_R_INDEX_SPEC_CONST_TMPVARCV) \ + _(3429, ZEND_QM_ASSIGN_NOREF_SPEC_TMPVARCV) \ + _(3431, ZEND_QM_ASSIGN_NOREF_SPEC_TMPVARCV) \ _(3433, ZEND_FETCH_DIM_R_INDEX_SPEC_CONST_TMPVARCV) \ - _(3435, ZEND_FETCH_DIM_R_INDEX_SPEC_CONST_TMPVARCV) \ - _(3436, ZEND_FETCH_DIM_R_INDEX_SPEC_TMPVAR_CONST) \ - _(3437, ZEND_FETCH_DIM_R_INDEX_SPEC_TMPVAR_TMPVARCV) \ + _(3434, ZEND_FETCH_DIM_R_INDEX_SPEC_CONST_TMPVARCV) \ + _(3436, ZEND_FETCH_DIM_R_INDEX_SPEC_CONST_TMPVARCV) \ + _(3437, ZEND_FETCH_DIM_R_INDEX_SPEC_TMPVAR_CONST) \ _(3438, ZEND_FETCH_DIM_R_INDEX_SPEC_TMPVAR_TMPVARCV) \ - _(3440, ZEND_FETCH_DIM_R_INDEX_SPEC_TMPVAR_TMPVARCV) \ - _(3441, ZEND_FETCH_DIM_R_INDEX_SPEC_TMPVAR_CONST) \ - _(3442, ZEND_FETCH_DIM_R_INDEX_SPEC_TMPVAR_TMPVARCV) \ + _(3439, ZEND_FETCH_DIM_R_INDEX_SPEC_TMPVAR_TMPVARCV) \ + _(3441, ZEND_FETCH_DIM_R_INDEX_SPEC_TMPVAR_TMPVARCV) \ + _(3442, ZEND_FETCH_DIM_R_INDEX_SPEC_TMPVAR_CONST) \ _(3443, ZEND_FETCH_DIM_R_INDEX_SPEC_TMPVAR_TMPVARCV) \ - _(3445, ZEND_FETCH_DIM_R_INDEX_SPEC_TMPVAR_TMPVARCV) \ - _(3451, ZEND_FETCH_DIM_R_INDEX_SPEC_CV_CONST) \ - _(3452, ZEND_FETCH_DIM_R_INDEX_SPEC_CV_TMPVARCV) \ + _(3444, ZEND_FETCH_DIM_R_INDEX_SPEC_TMPVAR_TMPVARCV) \ + _(3446, ZEND_FETCH_DIM_R_INDEX_SPEC_TMPVAR_TMPVARCV) \ + _(3452, ZEND_FETCH_DIM_R_INDEX_SPEC_CV_CONST) \ _(3453, ZEND_FETCH_DIM_R_INDEX_SPEC_CV_TMPVARCV) \ - _(3455, ZEND_FETCH_DIM_R_INDEX_SPEC_CV_TMPVARCV) \ - _(3458, ZEND_SEND_VAR_SIMPLE_SPEC_VAR) \ - _(3460, ZEND_SEND_VAR_SIMPLE_SPEC_CV) \ - _(3463, ZEND_SEND_VAR_EX_SIMPLE_SPEC_VAR_UNUSED) \ - _(3465, ZEND_SEND_VAR_EX_SIMPLE_SPEC_CV_UNUSED) \ - _(3466, ZEND_SEND_VAL_SIMPLE_SPEC_CONST) \ - _(3467, ZEND_SEND_VAL_EX_SIMPLE_SPEC_CONST) \ - _(3468, ZEND_FE_FETCH_R_SIMPLE_SPEC_VAR_CV_RETVAL_UNUSED) \ - _(3469, ZEND_FE_FETCH_R_SIMPLE_SPEC_VAR_CV_RETVAL_USED) \ - _(3469+1, ZEND_NULL) + _(3454, ZEND_FETCH_DIM_R_INDEX_SPEC_CV_TMPVARCV) \ + _(3456, ZEND_FETCH_DIM_R_INDEX_SPEC_CV_TMPVARCV) \ + _(3459, ZEND_SEND_VAR_SIMPLE_SPEC_VAR) \ + _(3461, ZEND_SEND_VAR_SIMPLE_SPEC_CV) \ + _(3464, ZEND_SEND_VAR_EX_SIMPLE_SPEC_VAR_UNUSED) \ + _(3466, ZEND_SEND_VAR_EX_SIMPLE_SPEC_CV_UNUSED) \ + _(3467, ZEND_SEND_VAL_SIMPLE_SPEC_CONST) \ + _(3468, ZEND_SEND_VAL_EX_SIMPLE_SPEC_CONST) \ + _(3469, ZEND_FE_FETCH_R_SIMPLE_SPEC_VAR_CV_RETVAL_UNUSED) \ + _(3470, ZEND_FE_FETCH_R_SIMPLE_SPEC_VAR_CV_RETVAL_USED) \ + _(3470+1, ZEND_NULL) diff --git a/Zend/zend_vm_opcodes.c b/Zend/zend_vm_opcodes.c index a9e4317e04e2d..e94b6c7d5f0cd 100644 --- a/Zend/zend_vm_opcodes.c +++ b/Zend/zend_vm_opcodes.c @@ -22,7 +22,7 @@ #include #include -static const char *zend_vm_opcodes_names[203] = { +static const char *zend_vm_opcodes_names[204] = { "ZEND_NOP", "ZEND_ADD", "ZEND_SUB", @@ -226,9 +226,10 @@ static const char *zend_vm_opcodes_names[203] = { "ZEND_FETCH_GLOBALS", "ZEND_VERIFY_NEVER_TYPE", "ZEND_CALLABLE_CONVERT", + "ZEND_BIND_INIT_STATIC_OR_JMP", }; -static uint32_t zend_vm_opcodes_flags[203] = { +static uint32_t zend_vm_opcodes_flags[204] = { 0x00000000, 0x00000b0b, 0x00000b0b, @@ -412,7 +413,7 @@ static uint32_t zend_vm_opcodes_flags[203] = { 0x00067000, 0x00040b73, 0x00100101, - 0x00100101, + 0x00100001, 0x00000101, 0x00001301, 0x00000101, @@ -432,6 +433,7 @@ static uint32_t zend_vm_opcodes_flags[203] = { 0x00000101, 0x00000101, 0x00000101, + 0x00002001, }; ZEND_API const char* ZEND_FASTCALL zend_get_opcode_name(uint8_t opcode) { diff --git a/Zend/zend_vm_opcodes.h b/Zend/zend_vm_opcodes.h index 43bd8bc252802..5531accbf0c20 100644 --- a/Zend/zend_vm_opcodes.h +++ b/Zend/zend_vm_opcodes.h @@ -285,7 +285,8 @@ END_EXTERN_C() #define ZEND_FETCH_GLOBALS 200 #define ZEND_VERIFY_NEVER_TYPE 201 #define ZEND_CALLABLE_CONVERT 202 +#define ZEND_BIND_INIT_STATIC_OR_JMP 203 -#define ZEND_VM_LAST_OPCODE 202 +#define ZEND_VM_LAST_OPCODE 203 #endif diff --git a/build/gen_stub.php b/build/gen_stub.php index c0b02b03738f2..f3b3b5fcc7514 100755 --- a/build/gen_stub.php +++ b/build/gen_stub.php @@ -725,11 +725,7 @@ public function getTypeForDoc(DOMDocument $doc): DOMElement { } } else { $type = $this->types[0]; - if ($type->isBuiltin && strtolower($type->name) === "true") { - $name = "bool"; - } else { - $name = $type->name; - } + $name = $type->name; $typeElement = $doc->createElement('type', $name); } diff --git a/build/libtool.m4 b/build/libtool.m4 index 91a1f9022f657..8ee7b453010f1 100644 --- a/build/libtool.m4 +++ b/build/libtool.m4 @@ -271,7 +271,7 @@ $rm -r conftest* dnl autoconf 2.13 compatibility dnl _LT_AC_TRY_LINK() -AC_DEFUN(_LT_AC_TRY_LINK, [ +AC_DEFUN([_LT_AC_TRY_LINK], [ cat > conftest.$ac_ext </dev/null && hard_links=no AC_MSG_RESULT([$hard_links]) if test "$hard_links" = no; then - AC_MSG_WARN([\`$CC' does not support \`-c -o', so \`make -j' may be unsafe]) + AC_MSG_WARN(['$CC' does not support '-c -o', so 'make -j' may be unsafe]) need_locks=warn fi else @@ -1935,15 +1934,15 @@ AC_ARG_WITH([tags], if test -f "$ltmain" && test -n "$tagnames"; then if test ! -f "${ofile}"; then - AC_MSG_WARN([output file \`$ofile' does not exist]) + AC_MSG_WARN([output file '$ofile' does not exist]) fi if test -z "$LTCC"; then eval "`$SHELL ${ofile} --config | grep '^LTCC='`" if test -z "$LTCC"; then - AC_MSG_WARN([output file \`$ofile' does not look like a libtool script]) + AC_MSG_WARN([output file '$ofile' does not look like a libtool script]) else - AC_MSG_WARN([using \`LTCC=$LTCC', extracted from \`$ofile']) + AC_MSG_WARN([using 'LTCC=$LTCC', extracted from '$ofile']) fi fi if test -z "$LTCFLAGS"; then @@ -1966,7 +1965,7 @@ if test -f "$ltmain" && test -n "$tagnames"; then if grep "^# ### BEGIN LIBTOOL TAG CONFIG: $tagname$" < "${ofile}" > /dev/null then - AC_MSG_ERROR([tag name \"$tagname\" already exists]) + AC_MSG_ERROR([tag name "$tagname" already exists]) fi # Update the list of available tags. @@ -2738,8 +2737,7 @@ fi AC_DEFUN([AC_LIBTOOL_LANG_C_CONFIG], [_LT_AC_LANG_C_CONFIG]) AC_DEFUN([_LT_AC_LANG_C_CONFIG], [lt_save_CC="$CC" -AC_LANG_SAVE -AC_LANG_C +AC_LANG_PUSH([C]) # Source file extension for C test sources. ac_ext=c @@ -2807,7 +2805,7 @@ AC_MSG_RESULT([$enable_static]) AC_LIBTOOL_CONFIG($1) -AC_LANG_RESTORE +AC_LANG_POP([C]) CC="$lt_save_CC" ])# AC_LIBTOOL_LANG_C_CONFIG @@ -2819,8 +2817,7 @@ CC="$lt_save_CC" # AC_LIBTOOL_CONFIG to write the compiler configuration to `libtool'. AC_DEFUN([AC_LIBTOOL_LANG_CXX_CONFIG], [_LT_AC_LANG_CXX_CONFIG(CXX)]) AC_DEFUN([_LT_AC_LANG_CXX_CONFIG], -[AC_LANG_SAVE -AC_LANG_CPLUSPLUS +[AC_LANG_PUSH([C++]) AC_REQUIRE([AC_PROG_CXX]) AC_REQUIRE([_LT_AC_PROG_CXXCPP]) @@ -3806,7 +3803,7 @@ AC_LIBTOOL_PROG_LD_HARDCODE_LIBPATH($1) AC_LIBTOOL_CONFIG($1) -AC_LANG_RESTORE +AC_LANG_POP([C++]) CC=$lt_save_CC LDCXX=$LD LD=$lt_save_LD diff --git a/configure.ac b/configure.ac index fcb629723817b..0a475ed80f311 100644 --- a/configure.ac +++ b/configure.ac @@ -373,6 +373,16 @@ if test "$ac_cv_func_dlopen" = "yes"; then fi AC_CHECK_LIB(m, sin) +case $host_alias in + riscv64*) + AC_CHECK_LIB(atomic, __atomic_exchange_1, [ + PHP_ADD_LIBRARY(atomic) + ], [ + AC_MSG_ERROR([Problem with enabling atomic. Please check config.log for details.]) + ]) + ;; +esac + dnl Check for inet_aton in -lc, -lbind and -lresolv. PHP_CHECK_FUNC(inet_aton, resolv, bind) diff --git a/docs/release-process.md b/docs/release-process.md index a082aabc65bef..f69ac6cf3b5d1 100644 --- a/docs/release-process.md +++ b/docs/release-process.md @@ -6,10 +6,10 @@ repository available according to the release schedule. The release schedule for each version is published on the [PHP wiki](https://wiki.php.net): +- [PHP 8.3](https://wiki.php.net/todo/php83) - [PHP 8.2](https://wiki.php.net/todo/php82) - [PHP 8.1](https://wiki.php.net/todo/php81) - [PHP 8.0](https://wiki.php.net/todo/php80) -- [PHP 7.4](https://wiki.php.net/todo/php74) The PHP project publishes builds every two weeks. @@ -605,7 +605,7 @@ slightly different steps. We'll call attention where the steps differ. git fetch --all git pull --rebase upstream master cd .. - git commit distributions + git commit distributions -m "X.Y.Z tarballs" git push upstream master ``` @@ -988,6 +988,20 @@ volunteers to begin the selection process for the next release managers. git push ``` + `web-php-distributions` is a submodule of `web-php`. You'll now have to update + the commit reference to reflect the change made in web-php-distributions. + + ```shell + cd /path/to/repos/php/web-php + git submodule update + cd distributions # This is the submodule refering to web-php-distributions + git pull origin master + cd .. + git add distributions + git commit --gpg-sign=YOURKEYID -m "Update php-keyring.gpg in distributions" + git push + ``` + 4. Request moderation access to php-announce@lists.php.net and primary-qa-tester@lists.php.net lists, so you are able to moderate your release announcements. All the announcements should be sent from your diff --git a/ext/bcmath/libbcmath/src/bcmath.h b/ext/bcmath/libbcmath/src/bcmath.h index 4e32a3cbacacb..6ce1350956417 100644 --- a/ext/bcmath/libbcmath/src/bcmath.h +++ b/ext/bcmath/libbcmath/src/bcmath.h @@ -129,7 +129,7 @@ int bc_modulo(bc_num num1, bc_num num2, bc_num *resul, int scale); int bc_divmod(bc_num num1, bc_num num2, bc_num *quo, bc_num *rem, int scale); -int bc_raisemod(bc_num base, bc_num expo, bc_num mo, bc_num *result, int scale); +zend_result bc_raisemod(bc_num base, bc_num expo, bc_num mo, bc_num *result, int scale); void bc_raise(bc_num num1, bc_num num2, bc_num *resul, int scale); diff --git a/ext/curl/interface.c b/ext/curl/interface.c index 3aeae5c575f82..710321ba1df93 100644 --- a/ext/curl/interface.c +++ b/ext/curl/interface.c @@ -1377,7 +1377,7 @@ static inline zend_result build_mime_structure_from_hash(php_curl *ch, zval *zpo curl_seek_callback seekfunc = seek_cb; #endif - prop = zend_read_property(curl_CURLFile_class, Z_OBJ_P(current), "name", sizeof("name")-1, 0, &rv); + prop = zend_read_property_ex(curl_CURLFile_class, Z_OBJ_P(current), ZSTR_KNOWN(ZEND_STR_NAME), /* silent */ false, &rv); ZVAL_DEREF(prop); if (Z_TYPE_P(prop) != IS_STRING) { php_error_docref(NULL, E_WARNING, "Invalid filename for key %s", ZSTR_VAL(string_key)); @@ -1839,7 +1839,6 @@ static zend_result _php_curl_setopt(php_curl *ch, zend_long option, zval *zvalue case CURLOPT_TLSAUTH_TYPE: case CURLOPT_TLSAUTH_PASSWORD: case CURLOPT_TLSAUTH_USERNAME: - case CURLOPT_ACCEPT_ENCODING: case CURLOPT_TRANSFER_ENCODING: case CURLOPT_DNS_SERVERS: case CURLOPT_MAIL_AUTH: @@ -1914,6 +1913,7 @@ static zend_result _php_curl_setopt(php_curl *ch, zend_long option, zval *zvalue case CURLOPT_RANGE: case CURLOPT_FTP_ACCOUNT: case CURLOPT_RTSP_SESSION_ID: + case CURLOPT_ACCEPT_ENCODING: #if LIBCURL_VERSION_NUM >= 0x072100 /* Available since 7.33.0 */ case CURLOPT_DNS_INTERFACE: case CURLOPT_DNS_LOCAL_IP4: diff --git a/ext/curl/tests/curl_setopt_CURLOPT_ACCEPT_ENCODING.phpt b/ext/curl/tests/curl_setopt_CURLOPT_ACCEPT_ENCODING.phpt new file mode 100644 index 0000000000000..c170308c2e981 --- /dev/null +++ b/ext/curl/tests/curl_setopt_CURLOPT_ACCEPT_ENCODING.phpt @@ -0,0 +1,38 @@ +--TEST-- +Test curl_setopt() with CURLOPT_ACCEPT_ENCODING +--EXTENSIONS-- +curl +--FILE-- + +--EXPECTF-- +GET /get.inc?test= HTTP/1.1 +Host: %s +Accept: */* +Accept-Encoding: gzip + +GET /get.inc?test= HTTP/1.1 +Host: %s +Accept: */* diff --git a/ext/date/php_date.c b/ext/date/php_date.c index 02f65e2a1b937..cb5c21621b2eb 100644 --- a/ext/date/php_date.c +++ b/ext/date/php_date.c @@ -2055,13 +2055,25 @@ static void php_timezone_to_string(php_timezone_obj *tzobj, zval *zv) ZVAL_STRING(zv, tzobj->tzi.tz->name); break; case TIMELIB_ZONETYPE_OFFSET: { - zend_string *tmpstr = zend_string_alloc(sizeof("UTC+05:00")-1, 0); timelib_sll utc_offset = tzobj->tzi.utc_offset; + int seconds = utc_offset % 60; + size_t size; + const char *format; + if (seconds == 0) { + size = sizeof("+05:00"); + format = "%c%02d:%02d"; + } else { + size = sizeof("+05:00:01"); + format = "%c%02d:%02d:%02d"; + } + zend_string *tmpstr = zend_string_alloc(size - 1, 0); - ZSTR_LEN(tmpstr) = snprintf(ZSTR_VAL(tmpstr), sizeof("+05:00"), "%c%02d:%02d", + /* Note: if seconds == 0, the seconds argument will be excessive and therefore ignored. */ + ZSTR_LEN(tmpstr) = snprintf(ZSTR_VAL(tmpstr), size, format, utc_offset < 0 ? '-' : '+', abs((int)(utc_offset / 3600)), - abs((int)(utc_offset % 3600) / 60)); + abs((int)(utc_offset % 3600) / 60), + abs(seconds)); ZVAL_NEW_STR(zv, tmpstr); } diff --git a/ext/date/tests/bug81097.phpt b/ext/date/tests/bug81097.phpt index 2cfd7e00a9dd4..7a3baf06a6389 100644 --- a/ext/date/tests/bug81097.phpt +++ b/ext/date/tests/bug81097.phpt @@ -10,5 +10,5 @@ object(DateTimeZone)#%d (%d) { ["timezone_type"]=> int(1) ["timezone"]=> - string(6) "+01:45" + string(9) "+01:45:30" } diff --git a/ext/date/tests/bug81565.phpt b/ext/date/tests/bug81565.phpt index 282093c7ec329..b23e950eafdf6 100644 --- a/ext/date/tests/bug81565.phpt +++ b/ext/date/tests/bug81565.phpt @@ -17,4 +17,4 @@ echo "\n", (new DatetimeZone('+01:45:30'))->getName(); 'timezone_type' => 1, 'timezone' => '+00:49', )) -+01:45 ++01:45:30 diff --git a/ext/date/tests/gh11281.phpt b/ext/date/tests/gh11281.phpt new file mode 100644 index 0000000000000..be1fe30b88c9d --- /dev/null +++ b/ext/date/tests/gh11281.phpt @@ -0,0 +1,33 @@ +--TEST-- +GH-11281 (DateTimeZone::getName() does not include seconds in offset) +--FILE-- +getName(), "\n"; +$tz = new DateTimeZone('+03:00:00'); +echo $tz->getName(), "\n"; +$tz = new DateTimeZone('-03:00:00'); +echo $tz->getName(), "\n"; +$tz = new DateTimeZone('+03:00:01'); +echo $tz->getName(), "\n"; +$tz = new DateTimeZone('-03:00:01'); +echo $tz->getName(), "\n"; +$tz = new DateTimeZone('+03:00:58'); +echo $tz->getName(), "\n"; +$tz = new DateTimeZone('-03:00:58'); +echo $tz->getName(), "\n"; +$tz = new DateTimeZone('+03:00:59'); +echo $tz->getName(), "\n"; +$tz = new DateTimeZone('-03:00:59'); +echo $tz->getName(), "\n"; +?> +--EXPECT-- ++03:00 ++03:00 +-03:00 ++03:00:01 +-03:00:01 ++03:00:58 +-03:00:58 ++03:00:59 +-03:00:59 diff --git a/ext/dom/attr.c b/ext/dom/attr.c index a262aea821395..417f92a25c364 100644 --- a/ext/dom/attr.c +++ b/ext/dom/attr.c @@ -147,11 +147,9 @@ int dom_attr_value_write(dom_object *obj, zval *newval) return FAILURE; } - if (attrp->children) { - node_list_unlink(attrp->children); - } - - xmlNodeSetContentLen((xmlNodePtr) attrp, (xmlChar *) ZSTR_VAL(str), ZSTR_LEN(str) + 1); + dom_remove_all_children((xmlNodePtr) attrp); + xmlNodePtr node = xmlNewTextLen((xmlChar *) ZSTR_VAL(str), ZSTR_LEN(str)); + xmlAddChild((xmlNodePtr) attrp, node); zend_string_release_ex(str, 0); return SUCCESS; diff --git a/ext/dom/characterdata.c b/ext/dom/characterdata.c index 85660a7b3549f..0a0373a5c1d11 100644 --- a/ext/dom/characterdata.c +++ b/ext/dom/characterdata.c @@ -70,7 +70,7 @@ int dom_characterdata_data_write(dom_object *obj, zval *newval) return FAILURE; } - xmlNodeSetContentLen(nodep, (xmlChar *) ZSTR_VAL(str), ZSTR_LEN(str) + 1); + xmlNodeSetContentLen(nodep, (xmlChar *) ZSTR_VAL(str), ZSTR_LEN(str)); zend_string_release_ex(str, 0); return SUCCESS; @@ -364,7 +364,7 @@ PHP_METHOD(DOMCharacterData, remove) PHP_METHOD(DOMCharacterData, after) { - int argc; + uint32_t argc; zval *args, *id; dom_object *intern; xmlNode *context; @@ -381,7 +381,7 @@ PHP_METHOD(DOMCharacterData, after) PHP_METHOD(DOMCharacterData, before) { - int argc; + uint32_t argc; zval *args, *id; dom_object *intern; xmlNode *context; @@ -398,7 +398,7 @@ PHP_METHOD(DOMCharacterData, before) PHP_METHOD(DOMCharacterData, replaceWith) { - int argc; + uint32_t argc; zval *args, *id; dom_object *intern; xmlNode *context; diff --git a/ext/dom/document.c b/ext/dom/document.c index c60198a3be110..1b26c9c7bfc73 100644 --- a/ext/dom/document.c +++ b/ext/dom/document.c @@ -297,7 +297,7 @@ readonly=no int dom_document_format_output_read(dom_object *obj, zval *retval) { if (obj->document) { - dom_doc_propsptr doc_prop = dom_get_doc_props(obj->document); + libxml_doc_props const* doc_prop = dom_get_doc_props_read_only(obj->document); ZVAL_BOOL(retval, doc_prop->formatoutput); } else { ZVAL_FALSE(retval); @@ -322,7 +322,7 @@ readonly=no int dom_document_validate_on_parse_read(dom_object *obj, zval *retval) { if (obj->document) { - dom_doc_propsptr doc_prop = dom_get_doc_props(obj->document); + libxml_doc_props const* doc_prop = dom_get_doc_props_read_only(obj->document); ZVAL_BOOL(retval, doc_prop->validateonparse); } else { ZVAL_FALSE(retval); @@ -347,7 +347,7 @@ readonly=no int dom_document_resolve_externals_read(dom_object *obj, zval *retval) { if (obj->document) { - dom_doc_propsptr doc_prop = dom_get_doc_props(obj->document); + libxml_doc_props const* doc_prop = dom_get_doc_props_read_only(obj->document); ZVAL_BOOL(retval, doc_prop->resolveexternals); } else { ZVAL_FALSE(retval); @@ -372,7 +372,7 @@ readonly=no int dom_document_preserve_whitespace_read(dom_object *obj, zval *retval) { if (obj->document) { - dom_doc_propsptr doc_prop = dom_get_doc_props(obj->document); + libxml_doc_props const* doc_prop = dom_get_doc_props_read_only(obj->document); ZVAL_BOOL(retval, doc_prop->preservewhitespace); } else { ZVAL_FALSE(retval); @@ -397,7 +397,7 @@ readonly=no int dom_document_recover_read(dom_object *obj, zval *retval) { if (obj->document) { - dom_doc_propsptr doc_prop = dom_get_doc_props(obj->document); + libxml_doc_props const* doc_prop = dom_get_doc_props_read_only(obj->document); ZVAL_BOOL(retval, doc_prop->recover); } else { ZVAL_FALSE(retval); @@ -422,7 +422,7 @@ readonly=no int dom_document_substitue_entities_read(dom_object *obj, zval *retval) { if (obj->document) { - dom_doc_propsptr doc_prop = dom_get_doc_props(obj->document); + libxml_doc_props const* doc_prop = dom_get_doc_props_read_only(obj->document); ZVAL_BOOL(retval, doc_prop->substituteentities); } else { ZVAL_FALSE(retval); @@ -777,7 +777,6 @@ PHP_METHOD(DOMDocument, getElementsByTagName) size_t name_len; dom_object *intern, *namednode; char *name; - xmlChar *local; id = ZEND_THIS; if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", &name, &name_len) == FAILURE) { @@ -788,8 +787,7 @@ PHP_METHOD(DOMDocument, getElementsByTagName) php_dom_create_iterator(return_value, DOM_NODELIST); namednode = Z_DOMOBJ_P(return_value); - local = xmlCharStrndup(name, name_len); - dom_namednode_iter(intern, 0, namednode, NULL, local, NULL); + dom_namednode_iter(intern, 0, namednode, NULL, name, name_len, NULL, 0); } /* }}} end dom_document_get_elements_by_tag_name */ @@ -847,6 +845,8 @@ PHP_METHOD(DOMDocument, importNode) } } + php_libxml_invalidate_node_list_cache_from_doc(docp); + DOM_RET_OBJ((xmlNodePtr) retnodep, &ret, intern); } /* }}} end dom_document_import_node */ @@ -859,7 +859,6 @@ PHP_METHOD(DOMDocument, createElementNS) zval *id; xmlDocPtr docp; xmlNodePtr nodep = NULL; - xmlNsPtr nsptr = NULL; int ret; size_t uri_len = 0, name_len = 0, value_len = 0; char *uri, *name, *value = NULL; @@ -880,7 +879,7 @@ PHP_METHOD(DOMDocument, createElementNS) if (xmlValidateName((xmlChar *) localname, 0) == 0) { nodep = xmlNewDocNode(docp, NULL, (xmlChar *) localname, (xmlChar *) value); if (nodep != NULL && uri != NULL) { - nsptr = xmlSearchNsByHref(nodep->doc, nodep, (xmlChar *) uri); + xmlNsPtr nsptr = xmlSearchNsByHref(nodep->doc, nodep, (xmlChar *) uri); if (nsptr == NULL) { nsptr = dom_get_ns(nodep, uri, &errorcode, prefix); } @@ -908,9 +907,6 @@ PHP_METHOD(DOMDocument, createElementNS) RETURN_FALSE; } - - nodep->ns = nsptr; - DOM_RET_OBJ(nodep, &ret, intern); } /* }}} end dom_document_create_element_ns */ @@ -991,7 +987,6 @@ PHP_METHOD(DOMDocument, getElementsByTagNameNS) size_t uri_len, name_len; dom_object *intern, *namednode; char *uri, *name; - xmlChar *local, *nsuri; id = ZEND_THIS; if (zend_parse_parameters(ZEND_NUM_ARGS(), "s!s", &uri, &uri_len, &name, &name_len) == FAILURE) { @@ -1002,12 +997,23 @@ PHP_METHOD(DOMDocument, getElementsByTagNameNS) php_dom_create_iterator(return_value, DOM_NODELIST); namednode = Z_DOMOBJ_P(return_value); - local = xmlCharStrndup(name, name_len); - nsuri = xmlCharStrndup(uri ? uri : "", uri_len); - dom_namednode_iter(intern, 0, namednode, NULL, local, nsuri); + dom_namednode_iter(intern, 0, namednode, NULL, name, name_len, uri ? uri : "", uri_len); } /* }}} end dom_document_get_elements_by_tag_name_ns */ +static bool php_dom_is_node_attached(const xmlNode *node) +{ + ZEND_ASSERT(node != NULL); + node = node->parent; + while (node != NULL) { + if (node->type == XML_DOCUMENT_NODE || node->type == XML_HTML_DOCUMENT_NODE) { + return true; + } + node = node->parent; + } + return false; +} + /* {{{ URL: http://www.w3.org/TR/2003/WD-DOM-Level-3-Core-20030226/DOM3-Core.html#core-ID-getElBId Since: DOM Level 2 */ @@ -1030,7 +1036,13 @@ PHP_METHOD(DOMDocument, getElementById) attrp = xmlGetID(docp, (xmlChar *) idname); - if (attrp && attrp->parent) { + /* From the moment an ID is created, libxml2's behaviour is to cache that element, even + * if that element is not yet attached to the document. Similarly, only upon destruction of + * the element the ID is actually removed by libxml2. Since libxml2 has such behaviour deeply + * ingrained in the library, and uses the cache for various purposes, it seems like a bad + * idea and lost cause to fight it. Instead, we'll simply walk the tree upwards to check + * if the node is attached to the document. */ + if (attrp && attrp->parent && php_dom_is_node_attached(attrp->parent)) { DOM_RET_OBJ((xmlNodePtr) attrp->parent, &ret, intern); } else { RETVAL_NULL(); @@ -1070,6 +1082,8 @@ PHP_METHOD(DOMDocument, normalizeDocument) DOM_GET_OBJ(docp, id, xmlDocPtr, intern); + php_libxml_invalidate_node_list_cache_from_doc(docp); + dom_normalize((xmlNodePtr) docp); } /* }}} end dom_document_normalize_document */ @@ -1176,7 +1190,6 @@ static xmlDocPtr dom_document_parser(zval *id, int mode, char *source, size_t so { xmlDocPtr ret; xmlParserCtxtPtr ctxt = NULL; - dom_doc_propsptr doc_props; dom_object *intern; php_libxml_ref_obj *document = NULL; int validate, recover, resolve_externals, keep_blanks, substitute_ent; @@ -1189,17 +1202,13 @@ static xmlDocPtr dom_document_parser(zval *id, int mode, char *source, size_t so document = intern->document; } - doc_props = dom_get_doc_props(document); + libxml_doc_props const* doc_props = dom_get_doc_props_read_only(document); validate = doc_props->validateonparse; resolve_externals = doc_props->resolveexternals; keep_blanks = doc_props->preservewhitespace; substitute_ent = doc_props->substituteentities; recover = doc_props->recover; - if (document == NULL) { - efree(doc_props); - } - xmlInitParser(); if (mode == DOM_LOAD_FILE) { @@ -1333,10 +1342,14 @@ static void dom_parse_document(INTERNAL_FUNCTION_PARAMETERS, int mode) { if (id != NULL) { intern = Z_DOMOBJ_P(id); + size_t old_modification_nr = 0; if (intern != NULL) { docp = (xmlDocPtr) dom_object_get_node(intern); doc_prop = NULL; if (docp != NULL) { + const php_libxml_doc_ptr *doc_ptr = docp->_private; + ZEND_ASSERT(doc_ptr != NULL); /* Must exist, we have a document */ + old_modification_nr = doc_ptr->cache_tag.modification_nr; php_libxml_decrement_node_ptr((php_libxml_node_object *) intern); doc_prop = intern->document->doc_props; intern->document->doc_props = NULL; @@ -1353,6 +1366,12 @@ static void dom_parse_document(INTERNAL_FUNCTION_PARAMETERS, int mode) { } php_libxml_increment_node_ptr((php_libxml_node_object *)intern, (xmlNodePtr)newdoc, (void *)intern); + /* Since iterators should invalidate, we need to start the modification number from the old counter */ + if (old_modification_nr != 0) { + php_libxml_doc_ptr* doc_ptr = (php_libxml_doc_ptr*) ((php_libxml_node_object*) intern)->node; /* downcast */ + doc_ptr->cache_tag.modification_nr = old_modification_nr; + php_libxml_invalidate_node_list_cache(doc_ptr); + } RETURN_TRUE; } else { @@ -1387,7 +1406,6 @@ PHP_METHOD(DOMDocument, save) size_t file_len = 0; int bytes, format, saveempty = 0; dom_object *intern; - dom_doc_propsptr doc_props; char *file; zend_long options = 0; @@ -1405,7 +1423,7 @@ PHP_METHOD(DOMDocument, save) /* encoding handled by property on doc */ - doc_props = dom_get_doc_props(intern->document); + libxml_doc_props const* doc_props = dom_get_doc_props_read_only(intern->document); format = doc_props->formatoutput; if (options & LIBXML_SAVE_NOEMPTYTAG) { saveempty = xmlSaveNoEmptyTags; @@ -1433,7 +1451,6 @@ PHP_METHOD(DOMDocument, saveXML) xmlBufferPtr buf; xmlChar *mem; dom_object *intern, *nodeobj; - dom_doc_propsptr doc_props; int size, format, saveempty = 0; zend_long options = 0; @@ -1444,7 +1461,7 @@ PHP_METHOD(DOMDocument, saveXML) DOM_GET_OBJ(docp, id, xmlDocPtr, intern); - doc_props = dom_get_doc_props(intern->document); + libxml_doc_props const* doc_props = dom_get_doc_props_read_only(intern->document); format = doc_props->formatoutput; if (nodep != NULL) { @@ -1570,6 +1587,8 @@ PHP_METHOD(DOMDocument, xinclude) php_dom_remove_xinclude_nodes(root); } + php_libxml_invalidate_node_list_cache_from_doc(docp); + if (err) { RETVAL_LONG(err); } else { @@ -1878,10 +1897,14 @@ static void dom_load_html(INTERNAL_FUNCTION_PARAMETERS, int mode) /* {{{ */ if (id != NULL && instanceof_function(Z_OBJCE_P(id), dom_document_class_entry)) { intern = Z_DOMOBJ_P(id); + size_t old_modification_nr = 0; if (intern != NULL) { docp = (xmlDocPtr) dom_object_get_node(intern); doc_prop = NULL; if (docp != NULL) { + const php_libxml_doc_ptr *doc_ptr = docp->_private; + ZEND_ASSERT(doc_ptr != NULL); /* Must exist, we have a document */ + old_modification_nr = doc_ptr->cache_tag.modification_nr; php_libxml_decrement_node_ptr((php_libxml_node_object *) intern); doc_prop = intern->document->doc_props; intern->document->doc_props = NULL; @@ -1898,6 +1921,12 @@ static void dom_load_html(INTERNAL_FUNCTION_PARAMETERS, int mode) /* {{{ */ } php_libxml_increment_node_ptr((php_libxml_node_object *)intern, (xmlNodePtr)newdoc, (void *)intern); + /* Since iterators should invalidate, we need to start the modification number from the old counter */ + if (old_modification_nr != 0) { + php_libxml_doc_ptr* doc_ptr = (php_libxml_doc_ptr*) ((php_libxml_node_object*) intern)->node; /* downcast */ + doc_ptr->cache_tag.modification_nr = old_modification_nr; + php_libxml_invalidate_node_list_cache(doc_ptr); + } RETURN_TRUE; } else { @@ -1928,7 +1957,6 @@ PHP_METHOD(DOMDocument, saveHTMLFile) size_t file_len; int bytes, format; dom_object *intern; - dom_doc_propsptr doc_props; char *file; const char *encoding; @@ -1947,7 +1975,7 @@ PHP_METHOD(DOMDocument, saveHTMLFile) encoding = (const char *) htmlGetMetaEncoding(docp); - doc_props = dom_get_doc_props(intern->document); + libxml_doc_props const* doc_props = dom_get_doc_props_read_only(intern->document); format = doc_props->formatoutput; bytes = htmlSaveFileFormat(file, docp, encoding, format); @@ -1969,7 +1997,6 @@ PHP_METHOD(DOMDocument, saveHTML) dom_object *intern, *nodeobj; xmlChar *mem = NULL; int format; - dom_doc_propsptr doc_props; id = ZEND_THIS; if (zend_parse_parameters(ZEND_NUM_ARGS(), @@ -1980,7 +2007,7 @@ PHP_METHOD(DOMDocument, saveHTML) DOM_GET_OBJ(docp, id, xmlDocPtr, intern); - doc_props = dom_get_doc_props(intern->document); + libxml_doc_props const* doc_props = dom_get_doc_props(intern->document); format = doc_props->formatoutput; if (nodep != NULL) { @@ -2073,7 +2100,7 @@ Since: DOM Living Standard (DOM4) */ PHP_METHOD(DOMDocument, append) { - int argc; + uint32_t argc; zval *args, *id; dom_object *intern; xmlNode *context; @@ -2094,7 +2121,7 @@ Since: DOM Living Standard (DOM4) */ PHP_METHOD(DOMDocument, prepend) { - int argc; + uint32_t argc; zval *args, *id; dom_object *intern; xmlNode *context; diff --git a/ext/dom/documentfragment.c b/ext/dom/documentfragment.c index 4e7f76a7de80a..b52a9c820baca 100644 --- a/ext/dom/documentfragment.c +++ b/ext/dom/documentfragment.c @@ -57,6 +57,7 @@ PHP_METHOD(DOMDocumentFragment, __construct) } /* }}} end DOMDocumentFragment::__construct */ +#if LIBXML_VERSION <= 20614 /* php_dom_xmlSetTreeDoc is a custom implementation of xmlSetTreeDoc needed for hack in appendXML due to libxml bug - no need to share this function */ static void php_dom_xmlSetTreeDoc(xmlNodePtr tree, xmlDocPtr doc) /* {{{ */ @@ -90,6 +91,7 @@ static void php_dom_xmlSetTreeDoc(xmlNodePtr tree, xmlDocPtr doc) /* {{{ */ } } /* }}} */ +#endif /* {{{ */ PHP_METHOD(DOMDocumentFragment, appendXML) { @@ -118,10 +120,11 @@ PHP_METHOD(DOMDocumentFragment, appendXML) { if (err != 0) { RETURN_FALSE; } - /* Following needed due to bug in libxml2 <= 2.6.14 - ifdef after next libxml release as bug is fixed in their cvs */ +#if LIBXML_VERSION <= 20614 + /* Following needed due to bug in libxml2 <= 2.6.14 */ php_dom_xmlSetTreeDoc(lst, nodep->doc); /* End stupid hack */ +#endif xmlAddChildList(nodep,lst); } @@ -135,7 +138,7 @@ Since: DOM Living Standard (DOM4) */ PHP_METHOD(DOMDocumentFragment, append) { - int argc; + uint32_t argc; zval *args, *id; dom_object *intern; xmlNode *context; @@ -156,7 +159,7 @@ Since: DOM Living Standard (DOM4) */ PHP_METHOD(DOMDocumentFragment, prepend) { - int argc; + uint32_t argc; zval *args, *id; dom_object *intern; xmlNode *context; diff --git a/ext/dom/documenttype.c b/ext/dom/documenttype.c index b046b05f80eff..cfc4b043edb22 100644 --- a/ext/dom/documenttype.c +++ b/ext/dom/documenttype.c @@ -65,7 +65,7 @@ int dom_documenttype_entities_read(dom_object *obj, zval *retval) entityht = (xmlHashTable *) doctypep->entities; intern = Z_DOMOBJ_P(retval); - dom_namednode_iter(obj, XML_ENTITY_NODE, intern, entityht, NULL, NULL); + dom_namednode_iter(obj, XML_ENTITY_NODE, intern, entityht, NULL, 0, NULL, 0); return SUCCESS; } @@ -93,7 +93,7 @@ int dom_documenttype_notations_read(dom_object *obj, zval *retval) notationht = (xmlHashTable *) doctypep->notations; intern = Z_DOMOBJ_P(retval); - dom_namednode_iter(obj, XML_NOTATION_NODE, intern, notationht, NULL, NULL); + dom_namednode_iter(obj, XML_NOTATION_NODE, intern, notationht, NULL, 0, NULL, 0); return SUCCESS; } diff --git a/ext/dom/dom_iterators.c b/ext/dom/dom_iterators.c index 72c97104db04d..2cf2c7bb6e7ce 100644 --- a/ext/dom/dom_iterators.c +++ b/ext/dom/dom_iterators.c @@ -179,7 +179,7 @@ static void php_dom_iterator_move_forward(zend_object_iterator *iter) /* {{{ */ dom_object *intern; dom_object *nnmap; dom_nnodemap_object *objmap; - int previndex=0; + int previndex; HashTable *nodeht; zval *entry; bool do_curobj_undef = 1; @@ -205,23 +205,32 @@ static void php_dom_iterator_move_forward(zend_object_iterator *iter) /* {{{ */ do_curobj_undef = 0; } } else { - curnode = (xmlNodePtr)((php_libxml_node_ptr *)intern->ptr)->node; if (objmap->nodetype == XML_ATTRIBUTE_NODE || objmap->nodetype == XML_ELEMENT_NODE) { + curnode = (xmlNodePtr)((php_libxml_node_ptr *)intern->ptr)->node; curnode = curnode->next; } else { - /* Nav the tree evey time as this is LIVE */ + /* The collection is live, we nav the tree from the base object if we cannot + * use the cache to restart from the last point. */ basenode = dom_object_get_node(objmap->baseobj); - if (basenode && (basenode->type == XML_DOCUMENT_NODE || - basenode->type == XML_HTML_DOCUMENT_NODE)) { - basenode = xmlDocGetRootElement((xmlDoc *) basenode); - } else if (basenode) { - basenode = basenode->children; - } else { + if (UNEXPECTED(!basenode)) { goto err; } + if (php_dom_is_cache_tag_stale_from_node(&iterator->cache_tag, basenode)) { + php_dom_mark_cache_tag_up_to_date_from_node(&iterator->cache_tag, basenode); + previndex = 0; + if (basenode && (basenode->type == XML_DOCUMENT_NODE || + basenode->type == XML_HTML_DOCUMENT_NODE)) { + curnode = xmlDocGetRootElement((xmlDoc *) basenode); + } else { + curnode = basenode->children; + } + } else { + previndex = iter->index - 1; + curnode = (xmlNodePtr)((php_libxml_node_ptr *)intern->ptr)->node; + } curnode = dom_get_elements_by_tag_name_ns_raw( - basenode, (char *) objmap->ns, (char *) objmap->local, &previndex, iter->index); + basenode, curnode, (char *) objmap->ns, (char *) objmap->local, &previndex, iter->index); } } } else { @@ -258,7 +267,7 @@ zend_object_iterator *php_dom_get_iterator(zend_class_entry *ce, zval *object, i { dom_object *intern; dom_nnodemap_object *objmap; - xmlNodePtr nodep, curnode=NULL; + xmlNodePtr curnode=NULL; int curindex = 0; HashTable *nodeht; zval *entry; @@ -270,6 +279,7 @@ zend_object_iterator *php_dom_get_iterator(zend_class_entry *ce, zval *object, i } iterator = emalloc(sizeof(php_dom_iterator)); zend_iterator_init(&iterator->intern); + iterator->cache_tag.modification_nr = 0; ZVAL_OBJ_COPY(&iterator->intern.data, Z_OBJ_P(object)); iterator->intern.funcs = &php_dom_iterator_funcs; @@ -288,24 +298,25 @@ zend_object_iterator *php_dom_get_iterator(zend_class_entry *ce, zval *object, i ZVAL_COPY(&iterator->curobj, entry); } } else { - nodep = (xmlNode *)dom_object_get_node(objmap->baseobj); - if (!nodep) { + xmlNodePtr basep = (xmlNode *)dom_object_get_node(objmap->baseobj); + if (!basep) { goto err; } if (objmap->nodetype == XML_ATTRIBUTE_NODE || objmap->nodetype == XML_ELEMENT_NODE) { if (objmap->nodetype == XML_ATTRIBUTE_NODE) { - curnode = (xmlNodePtr) nodep->properties; + curnode = (xmlNodePtr) basep->properties; } else { - curnode = (xmlNodePtr) nodep->children; + curnode = (xmlNodePtr) basep->children; } } else { + xmlNodePtr nodep = basep; if (nodep->type == XML_DOCUMENT_NODE || nodep->type == XML_HTML_DOCUMENT_NODE) { nodep = xmlDocGetRootElement((xmlDoc *) nodep); } else { nodep = nodep->children; } curnode = dom_get_elements_by_tag_name_ns_raw( - nodep, (char *) objmap->ns, (char *) objmap->local, &curindex, 0); + basep, nodep, (char *) objmap->ns, (char *) objmap->local, &curindex, 0); } } } else { diff --git a/ext/dom/element.c b/ext/dom/element.c index 19cef5834657a..198abc8891b3c 100644 --- a/ext/dom/element.c +++ b/ext/dom/element.c @@ -150,6 +150,7 @@ int dom_element_schema_type_info_read(dom_object *obj, zval *retval) /* }}} */ +/* Note: the object returned is not necessarily a node, but can be an attribute or a namespace declaration. */ static xmlNodePtr dom_get_dom1_attribute(xmlNodePtr elem, xmlChar *name) /* {{{ */ { int len; @@ -376,25 +377,13 @@ PHP_METHOD(DOMElement, getAttributeNode) } if (attrp->type == XML_NAMESPACE_DECL) { - xmlNsPtr curns; - xmlNodePtr nsparent; - - nsparent = attrp->_private; - curns = xmlNewNs(NULL, attrp->name, NULL); - if (attrp->children) { - curns->prefix = xmlStrdup((xmlChar *) attrp->children); - } - if (attrp->children) { - attrp = xmlNewDocNode(nodep->doc, NULL, (xmlChar *) attrp->children, attrp->name); - } else { - attrp = xmlNewDocNode(nodep->doc, NULL, (xmlChar *)"xmlns", attrp->name); - } - attrp->type = XML_NAMESPACE_DECL; - attrp->parent = nsparent; - attrp->ns = curns; + xmlNsPtr original = (xmlNsPtr) attrp; + /* Keep parent alive, because we're a fake child. */ + GC_ADDREF(&intern->std); + (void) php_dom_create_fake_namespace_decl(nodep, original, return_value, intern); + } else { + DOM_RET_OBJ((xmlNodePtr) attrp, &ret, intern); } - - DOM_RET_OBJ((xmlNodePtr) attrp, &ret, intern); } /* }}} end dom_element_get_attribute_node */ @@ -511,7 +500,6 @@ PHP_METHOD(DOMElement, getElementsByTagName) size_t name_len; dom_object *intern, *namednode; char *name; - xmlChar *local; id = ZEND_THIS; if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", &name, &name_len) == FAILURE) { @@ -522,8 +510,7 @@ PHP_METHOD(DOMElement, getElementsByTagName) php_dom_create_iterator(return_value, DOM_NODELIST); namednode = Z_DOMOBJ_P(return_value); - local = xmlCharStrndup(name, name_len); - dom_namednode_iter(intern, 0, namednode, NULL, local, NULL); + dom_namednode_iter(intern, 0, namednode, NULL, name, name_len, NULL, 0); } /* }}} end dom_element_get_elements_by_tag_name */ @@ -798,7 +785,7 @@ Since: DOM Level 2 PHP_METHOD(DOMElement, getAttributeNodeNS) { zval *id; - xmlNodePtr elemp, fakeAttrp; + xmlNodePtr elemp; xmlAttrPtr attrp; dom_object *intern; size_t uri_len, name_len; @@ -819,21 +806,9 @@ PHP_METHOD(DOMElement, getAttributeNodeNS) xmlNsPtr nsptr; nsptr = dom_get_nsdecl(elemp, (xmlChar *)name); if (nsptr != NULL) { - xmlNsPtr curns; - curns = xmlNewNs(NULL, nsptr->href, NULL); - if (nsptr->prefix) { - curns->prefix = xmlStrdup((xmlChar *) nsptr->prefix); - } - if (nsptr->prefix) { - fakeAttrp = xmlNewDocNode(elemp->doc, NULL, (xmlChar *) nsptr->prefix, nsptr->href); - } else { - fakeAttrp = xmlNewDocNode(elemp->doc, NULL, (xmlChar *)"xmlns", nsptr->href); - } - fakeAttrp->type = XML_NAMESPACE_DECL; - fakeAttrp->parent = elemp; - fakeAttrp->ns = curns; - - DOM_RET_OBJ(fakeAttrp, &ret, intern); + /* Keep parent alive, because we're a fake child. */ + GC_ADDREF(&intern->std); + (void) php_dom_create_fake_namespace_decl(elemp, nsptr, return_value, intern); } else { RETURN_NULL(); } @@ -930,7 +905,6 @@ PHP_METHOD(DOMElement, getElementsByTagNameNS) size_t uri_len, name_len; dom_object *intern, *namednode; char *uri, *name; - xmlChar *local, *nsuri; id = ZEND_THIS; if (zend_parse_parameters(ZEND_NUM_ARGS(), "s!s", &uri, &uri_len, &name, &name_len) == FAILURE) { @@ -941,9 +915,7 @@ PHP_METHOD(DOMElement, getElementsByTagNameNS) php_dom_create_iterator(return_value, DOM_NODELIST); namednode = Z_DOMOBJ_P(return_value); - local = xmlCharStrndup(name, name_len); - nsuri = xmlCharStrndup(uri ? uri : "", uri_len); - dom_namednode_iter(intern, 0, namednode, NULL, local, nsuri); + dom_namednode_iter(intern, 0, namednode, NULL, name, name_len, uri ? uri : "", uri_len); } /* }}} end dom_element_get_elements_by_tag_name_ns */ @@ -1160,7 +1132,7 @@ PHP_METHOD(DOMElement, remove) PHP_METHOD(DOMElement, after) { - int argc; + uint32_t argc; zval *args, *id; dom_object *intern; xmlNode *context; @@ -1177,7 +1149,7 @@ PHP_METHOD(DOMElement, after) PHP_METHOD(DOMElement, before) { - int argc; + uint32_t argc; zval *args, *id; dom_object *intern; xmlNode *context; @@ -1197,7 +1169,7 @@ Since: DOM Living Standard (DOM4) */ PHP_METHOD(DOMElement, append) { - int argc; + uint32_t argc; zval *args, *id; dom_object *intern; xmlNode *context; @@ -1218,7 +1190,7 @@ Since: DOM Living Standard (DOM4) */ PHP_METHOD(DOMElement, prepend) { - int argc; + uint32_t argc; zval *args, *id; dom_object *intern; xmlNode *context; @@ -1234,12 +1206,12 @@ PHP_METHOD(DOMElement, prepend) } /* }}} end DOMElement::prepend */ -/* {{{ URL: https://dom.spec.whatwg.org/#dom-parentnode-prepend +/* {{{ URL: https://dom.spec.whatwg.org/#dom-parentnode-replacechildren Since: DOM Living Standard (DOM4) */ PHP_METHOD(DOMElement, replaceWith) { - int argc; + uint32_t argc; zval *args, *id; dom_object *intern; xmlNode *context; @@ -1251,8 +1223,7 @@ PHP_METHOD(DOMElement, replaceWith) id = ZEND_THIS; DOM_GET_OBJ(context, id, xmlNodePtr, intern); - dom_parent_node_after(intern, args, argc); - dom_child_node_remove(intern); + dom_child_replace_with(intern, args, argc); } /* }}} end DOMElement::prepend */ diff --git a/ext/dom/namednodemap.c b/ext/dom/namednodemap.c index 99103ce30b7ad..dadab115a1c2a 100644 --- a/ext/dom/namednodemap.c +++ b/ext/dom/namednodemap.c @@ -142,9 +142,9 @@ PHP_METHOD(DOMNamedNodeMap, item) int count; id = ZEND_THIS; - if (zend_parse_parameters(ZEND_NUM_ARGS(), "l", &index) == FAILURE) { - RETURN_THROWS(); - } + ZEND_PARSE_PARAMETERS_START(1, 1) + Z_PARAM_LONG(index) + ZEND_PARSE_PARAMETERS_END(); if (index < 0 || ZEND_LONG_INT_OVFL(index)) { zend_argument_value_error(1, "must be between 0 and %d", INT_MAX); RETURN_THROWS(); diff --git a/ext/dom/node.c b/ext/dom/node.c index bc7108e087e75..29262f8579146 100644 --- a/ext/dom/node.c +++ b/ext/dom/node.c @@ -177,24 +177,25 @@ int dom_node_node_value_write(dom_object *obj, zval *newval) /* Access to Element node is implemented as a convenience method */ switch (nodep->type) { - case XML_ELEMENT_NODE: case XML_ATTRIBUTE_NODE: - if (nodep->children) { - node_list_unlink(nodep->children); - php_libxml_node_free_list((xmlNodePtr) nodep->children); - nodep->children = NULL; - } + dom_remove_all_children(nodep); + xmlAddChild(nodep, xmlNewTextLen((xmlChar *) ZSTR_VAL(str), ZSTR_LEN(str))); + break; + case XML_ELEMENT_NODE: + dom_remove_all_children(nodep); ZEND_FALLTHROUGH; case XML_TEXT_NODE: case XML_COMMENT_NODE: case XML_CDATA_SECTION_NODE: case XML_PI_NODE: - xmlNodeSetContentLen(nodep, (xmlChar *) ZSTR_VAL(str), ZSTR_LEN(str) + 1); + xmlNodeSetContentLen(nodep, (xmlChar *) ZSTR_VAL(str), ZSTR_LEN(str)); break; default: break; } + php_libxml_invalidate_node_list_cache_from_doc(nodep->doc); + zend_string_release_ex(str, 0); return SUCCESS; } @@ -274,7 +275,7 @@ int dom_node_child_nodes_read(dom_object *obj, zval *retval) php_dom_create_iterator(retval, DOM_NODELIST); intern = Z_DOMOBJ_P(retval); - dom_namednode_iter(obj, XML_ELEMENT_NODE, intern, NULL, NULL, NULL); + dom_namednode_iter(obj, XML_ELEMENT_NODE, intern, NULL, NULL, 0, NULL, 0); return SUCCESS; } @@ -482,7 +483,7 @@ int dom_node_attributes_read(dom_object *obj, zval *retval) if (nodep->type == XML_ELEMENT_NODE) { php_dom_create_iterator(retval, DOM_NAMEDNODEMAP); intern = Z_DOMOBJ_P(retval); - dom_namednode_iter(obj, XML_ATTRIBUTE_NODE, intern, NULL, NULL, NULL); + dom_namednode_iter(obj, XML_ATTRIBUTE_NODE, intern, NULL, NULL, 0, NULL, 0); } else { ZVAL_NULL(retval); } @@ -769,17 +770,25 @@ int dom_node_text_content_write(dom_object *obj, zval *newval) return FAILURE; } - if (nodep->type == XML_ELEMENT_NODE || nodep->type == XML_ATTRIBUTE_NODE) { - if (nodep->children) { - node_list_unlink(nodep->children); - php_libxml_node_free_list((xmlNodePtr) nodep->children); - nodep->children = NULL; - } + php_libxml_invalidate_node_list_cache_from_doc(nodep->doc); + + const xmlChar *xmlChars = (const xmlChar *) ZSTR_VAL(str); + int type = nodep->type; + + /* We can't directly call xmlNodeSetContent, because it might encode the string through + * xmlStringLenGetNodeList for types XML_DOCUMENT_FRAG_NODE, XML_ELEMENT_NODE, XML_ATTRIBUTE_NODE. + * See tree.c:xmlNodeSetContent in libxml. + * In these cases we need to use a text node to avoid the encoding. + * For the other cases, we *can* rely on xmlNodeSetContent because it is either a no-op, or handles + * the content without encoding. */ + if (type == XML_DOCUMENT_FRAG_NODE || type == XML_ELEMENT_NODE || type == XML_ATTRIBUTE_NODE) { + dom_remove_all_children(nodep); + xmlNode *textNode = xmlNewText(xmlChars); + xmlAddChild(nodep, textNode); + } else { + xmlNodeSetContent(nodep, xmlChars); } - /* we have to use xmlNodeAddContent() to get the same behavior as with xmlNewText() */ - xmlNodeSetContent(nodep, (xmlChar *) ""); - xmlNodeAddContent(nodep, (xmlChar *) ZSTR_VAL(str)); zend_string_release_ex(str, 0); return SUCCESS; @@ -886,6 +895,8 @@ PHP_METHOD(DOMNode, insertBefore) php_libxml_increment_doc_ref((php_libxml_node_object *)childobj, NULL); } + php_libxml_invalidate_node_list_cache_from_doc(parentp->doc); + if (ref != NULL) { DOM_GET_OBJ(refp, ref, xmlNodePtr, refpobj); if (refp->parent != parentp) { @@ -932,12 +943,20 @@ PHP_METHOD(DOMNode, insertBefore) return; } } + new_child = xmlAddPrevSibling(refp, child); + if (UNEXPECTED(NULL == new_child)) { + goto cannot_add; + } } else if (child->type == XML_DOCUMENT_FRAG_NODE) { + xmlNodePtr last = child->last; new_child = _php_dom_insert_fragment(parentp, refp->prev, refp, child, intern, childobj); - } - - if (new_child == NULL) { + dom_reconcile_ns_list(parentp->doc, new_child, last); + } else { new_child = xmlAddPrevSibling(refp, child); + if (UNEXPECTED(NULL == new_child)) { + goto cannot_add; + } + dom_reconcile_ns(parentp->doc, new_child); } } else { if (child->parent != NULL){ @@ -974,23 +993,28 @@ PHP_METHOD(DOMNode, insertBefore) return; } } + new_child = xmlAddChild(parentp, child); + if (UNEXPECTED(NULL == new_child)) { + goto cannot_add; + } } else if (child->type == XML_DOCUMENT_FRAG_NODE) { + xmlNodePtr last = child->last; new_child = _php_dom_insert_fragment(parentp, parentp->last, NULL, child, intern, childobj); - } - if (new_child == NULL) { + dom_reconcile_ns_list(parentp->doc, new_child, last); + } else { new_child = xmlAddChild(parentp, child); + if (UNEXPECTED(NULL == new_child)) { + goto cannot_add; + } + dom_reconcile_ns(parentp->doc, new_child); } } - if (NULL == new_child) { - zend_throw_error(NULL, "Cannot add newnode as the previous sibling of refnode"); - RETURN_THROWS(); - } - - dom_reconcile_ns(parentp->doc, new_child); - DOM_RET_OBJ(new_child, &ret, intern); - + return; +cannot_add: + zend_throw_error(NULL, "Cannot add newnode as the previous sibling of refnode"); + RETURN_THROWS(); } /* }}} end dom_node_insert_before */ @@ -1055,9 +1079,10 @@ PHP_METHOD(DOMNode, replaceChild) xmlUnlinkNode(oldchild); + xmlNodePtr last = newchild->last; newchild = _php_dom_insert_fragment(nodep, prevsib, nextsib, newchild, intern, newchildobj); if (newchild) { - dom_reconcile_ns(nodep->doc, newchild); + dom_reconcile_ns_list(nodep->doc, newchild, last); } } else if (oldchild != newchild) { xmlDtdPtr intSubset = xmlGetIntSubset(nodep->doc); @@ -1075,6 +1100,7 @@ PHP_METHOD(DOMNode, replaceChild) nodep->doc->intSubset = (xmlDtd *) newchild; } } + php_libxml_invalidate_node_list_cache_from_doc(nodep->doc); DOM_RET_OBJ(oldchild, &ret, intern); } /* }}} end dom_node_replace_child */ @@ -1116,6 +1142,7 @@ PHP_METHOD(DOMNode, removeChild) } xmlUnlinkNode(child); + php_libxml_invalidate_node_list_cache_from_doc(nodep->doc); DOM_RET_OBJ(child, &ret, intern); } /* }}} end dom_node_remove_child */ @@ -1204,22 +1231,30 @@ PHP_METHOD(DOMNode, appendChild) php_libxml_node_free_resource((xmlNodePtr) lastattr); } } + new_child = xmlAddChild(nodep, child); + if (UNEXPECTED(new_child == NULL)) { + goto cannot_add; + } } else if (child->type == XML_DOCUMENT_FRAG_NODE) { + xmlNodePtr last = child->last; new_child = _php_dom_insert_fragment(nodep, nodep->last, NULL, child, intern, childobj); - } - - if (new_child == NULL) { + dom_reconcile_ns_list(nodep->doc, new_child, last); + } else { new_child = xmlAddChild(nodep, child); - if (new_child == NULL) { - // TODO Convert to Error? - php_error_docref(NULL, E_WARNING, "Couldn't append node"); - RETURN_FALSE; + if (UNEXPECTED(new_child == NULL)) { + goto cannot_add; } + dom_reconcile_ns(nodep->doc, new_child); } - dom_reconcile_ns(nodep->doc, new_child); + php_libxml_invalidate_node_list_cache_from_doc(nodep->doc); DOM_RET_OBJ(new_child, &ret, intern); + return; +cannot_add: + // TODO Convert to Error? + php_error_docref(NULL, E_WARNING, "Couldn't append node"); + RETURN_FALSE; } /* }}} end dom_node_append_child */ @@ -1328,6 +1363,8 @@ PHP_METHOD(DOMNode, normalize) DOM_GET_OBJ(nodep, id, xmlNodePtr, intern); + php_libxml_invalidate_node_list_cache_from_doc(nodep->doc); + dom_normalize(nodep); } @@ -1560,6 +1597,8 @@ static void dom_canonicalization(INTERNAL_FUNCTION_PARAMETERS, int mode) /* {{{ RETURN_THROWS(); } + php_libxml_invalidate_node_list_cache_from_doc(docp); + if (xpath_array == NULL) { if (nodep->type != XML_DOCUMENT_NODE) { ctxp = xmlXPathNewContext(docp); @@ -1583,7 +1622,8 @@ static void dom_canonicalization(INTERNAL_FUNCTION_PARAMETERS, int mode) /* {{{ zval *tmp; char *xquery; - tmp = zend_hash_str_find(ht, "query", sizeof("query")-1); + /* Find "query" key */ + tmp = zend_hash_find(ht, ZSTR_KNOWN(ZEND_STR_QUERY)); if (!tmp) { /* if mode == 0 then $xpath arg is 3, if mode == 1 then $xpath is 4 */ zend_argument_value_error(3 + mode, "must have a \"query\" key"); diff --git a/ext/dom/nodelist.c b/ext/dom/nodelist.c index b03ebe1acd90a..55073b255016c 100644 --- a/ext/dom/nodelist.c +++ b/ext/dom/nodelist.c @@ -31,6 +31,24 @@ * Since: */ +static zend_always_inline void objmap_cache_release_cached_obj(dom_nnodemap_object *objmap) +{ + if (objmap->cached_obj) { + /* Since the DOM is a tree there can be no cycles. */ + if (GC_DELREF(&objmap->cached_obj->std) == 0) { + zend_objects_store_del(&objmap->cached_obj->std); + } + objmap->cached_obj = NULL; + objmap->cached_obj_index = 0; + } +} + +static zend_always_inline void reset_objmap_cache(dom_nnodemap_object *objmap) +{ + objmap_cache_release_cached_obj(objmap); + objmap->cached_length = -1; +} + static int get_nodelist_length(dom_object *obj) { dom_nnodemap_object *objmap = (dom_nnodemap_object *) obj->ptr; @@ -52,6 +70,17 @@ static int get_nodelist_length(dom_object *obj) return 0; } + if (!php_dom_is_cache_tag_stale_from_node(&objmap->cache_tag, nodep)) { + if (objmap->cached_length >= 0) { + return objmap->cached_length; + } + /* Only the length is out-of-date, the cache tag is still valid. + * Therefore, only overwrite the length and keep the currently cached object. */ + } else { + php_dom_mark_cache_tag_up_to_date_from_node(&objmap->cache_tag, nodep); + reset_objmap_cache(objmap); + } + int count = 0; if (objmap->nodetype == XML_ATTRIBUTE_NODE || objmap->nodetype == XML_ELEMENT_NODE) { xmlNodePtr curnode = nodep->children; @@ -63,15 +92,18 @@ static int get_nodelist_length(dom_object *obj) } } } else { + xmlNodePtr basep = nodep; if (nodep->type == XML_DOCUMENT_NODE || nodep->type == XML_HTML_DOCUMENT_NODE) { nodep = xmlDocGetRootElement((xmlDoc *) nodep); } else { nodep = nodep->children; } dom_get_elements_by_tag_name_ns_raw( - nodep, (char *) objmap->ns, (char *) objmap->local, &count, -1); + basep, nodep, (char *) objmap->ns, (char *) objmap->local, &count, INT_MAX - 1 /* because of <= */); } + objmap->cached_length = count; + return count; } @@ -113,17 +145,18 @@ PHP_METHOD(DOMNodeList, item) zval *id; zend_long index; int ret; + bool cache_itemnode = false; dom_object *intern; xmlNodePtr itemnode = NULL; dom_nnodemap_object *objmap; - xmlNodePtr nodep, curnode; + xmlNodePtr basep; int count = 0; id = ZEND_THIS; - if (zend_parse_parameters(ZEND_NUM_ARGS(), "l", &index) == FAILURE) { - RETURN_THROWS(); - } + ZEND_PARSE_PARAMETERS_START(1, 1) + Z_PARAM_LONG(index) + ZEND_PARSE_PARAMETERS_END(); if (index >= 0) { intern = Z_DOMOBJ_P(id); @@ -145,23 +178,51 @@ PHP_METHOD(DOMNodeList, item) return; } } else if (objmap->baseobj) { - nodep = dom_object_get_node(objmap->baseobj); - if (nodep) { + basep = dom_object_get_node(objmap->baseobj); + if (basep) { + xmlNodePtr nodep = basep; + /* For now we're only able to use cache for forward search. + * TODO: in the future we could extend the logic of the node list such that backwards searches + * are also possible. */ + bool restart = true; + int relative_index = index; + if (index >= objmap->cached_obj_index && objmap->cached_obj && !php_dom_is_cache_tag_stale_from_node(&objmap->cache_tag, nodep)) { + xmlNodePtr cached_obj_xml_node = dom_object_get_node(objmap->cached_obj); + + /* The node cannot be NULL if the cache is valid. If it is NULL, then it means we + * forgot an invalidation somewhere. Take the defensive programming approach and invalidate + * it here if it's NULL (except in debug mode where we would want to catch this). */ + if (UNEXPECTED(cached_obj_xml_node == NULL)) { +#if ZEND_DEBUG + ZEND_UNREACHABLE(); +#endif + reset_objmap_cache(objmap); + } else { + restart = false; + relative_index -= objmap->cached_obj_index; + nodep = cached_obj_xml_node; + } + } if (objmap->nodetype == XML_ATTRIBUTE_NODE || objmap->nodetype == XML_ELEMENT_NODE) { - curnode = nodep->children; - while (count < index && curnode != NULL) { + if (restart) { + nodep = nodep->children; + } + while (count < relative_index && nodep != NULL) { count++; - curnode = curnode->next; + nodep = nodep->next; } - itemnode = curnode; + itemnode = nodep; } else { - if (nodep->type == XML_DOCUMENT_NODE || nodep->type == XML_HTML_DOCUMENT_NODE) { - nodep = xmlDocGetRootElement((xmlDoc *) nodep); - } else { - nodep = nodep->children; + if (restart) { + if (basep->type == XML_DOCUMENT_NODE || basep->type == XML_HTML_DOCUMENT_NODE) { + nodep = xmlDocGetRootElement((xmlDoc*) basep); + } else { + nodep = basep->children; + } } - itemnode = dom_get_elements_by_tag_name_ns_raw(nodep, (char *) objmap->ns, (char *) objmap->local, &count, index); + itemnode = dom_get_elements_by_tag_name_ns_raw(basep, nodep, (char *) objmap->ns, (char *) objmap->local, &count, relative_index); } + cache_itemnode = true; } } } @@ -169,6 +230,25 @@ PHP_METHOD(DOMNodeList, item) if (itemnode) { DOM_RET_OBJ(itemnode, &ret, objmap->baseobj); + if (cache_itemnode) { + /* Hold additional reference for the cache, must happen before releasing the cache + * because we might be the last reference holder. + * Instead of storing and copying zvals, we store the object pointer directly. + * This saves us some bytes because a pointer is smaller than a zval. + * This also means we have to manually refcount the objects here, and remove the reference count + * in reset_objmap_cache() and the destructor. */ + dom_object *cached_obj = Z_DOMOBJ_P(return_value); + GC_ADDREF(&cached_obj->std); + /* If the tag is stale, all cached data is useless. Otherwise only the cached object is useless. */ + if (php_dom_is_cache_tag_stale_from_node(&objmap->cache_tag, itemnode)) { + php_dom_mark_cache_tag_up_to_date_from_node(&objmap->cache_tag, itemnode); + reset_objmap_cache(objmap); + } else { + objmap_cache_release_cached_obj(objmap); + } + objmap->cached_obj_index = index; + objmap->cached_obj = cached_obj; + } return; } } diff --git a/ext/dom/parentnode.c b/ext/dom/parentnode.c index 571da9b6411d6..c6d36f0c670b2 100644 --- a/ext/dom/parentnode.c +++ b/ext/dom/parentnode.c @@ -124,9 +124,25 @@ int dom_parent_node_child_element_count(dom_object *obj, zval *retval) } /* }}} */ -xmlNode* dom_zvals_to_fragment(php_libxml_ref_obj *document, xmlNode *contextNode, zval *nodes, int nodesc) +static bool dom_is_node_in_list(const zval *nodes, uint32_t nodesc, const xmlNodePtr node_to_find) +{ + for (uint32_t i = 0; i < nodesc; i++) { + if (Z_TYPE(nodes[i]) == IS_OBJECT) { + const zend_class_entry *ce = Z_OBJCE(nodes[i]); + + if (instanceof_function(ce, dom_node_class_entry)) { + if (dom_object_get_node(Z_DOMOBJ_P(nodes + i)) == node_to_find) { + return true; + } + } + } + } + + return false; +} + +xmlNode* dom_zvals_to_fragment(php_libxml_ref_obj *document, xmlNode *contextNode, zval *nodes, uint32_t nodesc) { - int i; xmlDoc *documentNode; xmlNode *fragment; xmlNode *newNode; @@ -153,7 +169,7 @@ xmlNode* dom_zvals_to_fragment(php_libxml_ref_obj *document, xmlNode *contextNod stricterror = dom_get_strict_error(document); - for (i = 0; i < nodesc; i++) { + for (uint32_t i = 0; i < nodesc; i++) { if (Z_TYPE(nodes[i]) == IS_OBJECT) { ce = Z_OBJCE(nodes[i]); @@ -177,17 +193,16 @@ xmlNode* dom_zvals_to_fragment(php_libxml_ref_obj *document, xmlNode *contextNod goto hierarchy_request_err; } - /* - * xmlNewDocText function will always returns same address to the second parameter if the parameters are greater than or equal to three. - * If it's text, that's fine, but if it's an object, it can cause invalid pointer because many new nodes point to the same memory address. - * So we must copy the new node to avoid this situation. - */ - if (nodesc > 1) { + /* Citing from the docs (https://gnome.pages.gitlab.gnome.org/libxml2/devhelp/libxml2-tree.html#xmlAddChild): + * "Add a new node to @parent, at the end of the child (or property) list merging adjacent TEXT nodes (in which case @cur is freed)". + * So we must take a copy if this situation arises to prevent a use-after-free. */ + bool will_free = newNode->type == XML_TEXT_NODE && fragment->last && fragment->last->type == XML_TEXT_NODE; + if (will_free) { newNode = xmlCopyNode(newNode, 1); } if (!xmlAddChild(fragment, newNode)) { - if (nodesc > 1) { + if (will_free) { xmlFreeNode(newNode); } goto hierarchy_request_err; @@ -201,8 +216,6 @@ xmlNode* dom_zvals_to_fragment(php_libxml_ref_obj *document, xmlNode *contextNod } else if (Z_TYPE(nodes[i]) == IS_STRING) { newNode = xmlNewDocText(documentNode, (xmlChar *) Z_STRVAL(nodes[i])); - xmlSetTreeDoc(newNode, documentNode); - if (!xmlAddChild(fragment, newNode)) { xmlFreeNode(newNode); goto hierarchy_request_err; @@ -239,10 +252,35 @@ static void dom_fragment_assign_parent_node(xmlNodePtr parentNode, xmlNodePtr fr fragment->last = NULL; } -void dom_parent_node_append(dom_object *context, zval *nodes, int nodesc) +static zend_result dom_hierarchy_node_list(xmlNodePtr parentNode, zval *nodes, uint32_t nodesc) +{ + for (uint32_t i = 0; i < nodesc; i++) { + if (Z_TYPE(nodes[i]) == IS_OBJECT) { + const zend_class_entry *ce = Z_OBJCE(nodes[i]); + + if (instanceof_function(ce, dom_node_class_entry)) { + if (dom_hierarchy(parentNode, dom_object_get_node(Z_DOMOBJ_P(nodes + i))) != SUCCESS) { + return FAILURE; + } + } + } + } + + return SUCCESS; +} + +void dom_parent_node_append(dom_object *context, zval *nodes, uint32_t nodesc) { xmlNode *parentNode = dom_object_get_node(context); xmlNodePtr newchild, prevsib; + + if (UNEXPECTED(dom_hierarchy_node_list(parentNode, nodes, nodesc) != SUCCESS)) { + php_dom_throw_error(HIERARCHY_REQUEST_ERR, dom_get_strict_error(context->document)); + return; + } + + php_libxml_invalidate_node_list_cache_from_doc(parentNode->doc); + xmlNode *fragment = dom_zvals_to_fragment(context->document, parentNode, nodes, nodesc); if (fragment == NULL) { @@ -259,19 +297,20 @@ void dom_parent_node_append(dom_object *context, zval *nodes, int nodesc) parentNode->children = newchild; } - parentNode->last = fragment->last; + xmlNodePtr last = fragment->last; + parentNode->last = last; newchild->prev = prevsib; dom_fragment_assign_parent_node(parentNode, fragment); - dom_reconcile_ns(parentNode->doc, newchild); + dom_reconcile_ns_list(parentNode->doc, newchild, last); } xmlFree(fragment); } -void dom_parent_node_prepend(dom_object *context, zval *nodes, int nodesc) +void dom_parent_node_prepend(dom_object *context, zval *nodes, uint32_t nodesc) { xmlNode *parentNode = dom_object_get_node(context); @@ -280,6 +319,13 @@ void dom_parent_node_prepend(dom_object *context, zval *nodes, int nodesc) return; } + if (UNEXPECTED(dom_hierarchy_node_list(parentNode, nodes, nodesc) != SUCCESS)) { + php_dom_throw_error(HIERARCHY_REQUEST_ERR, dom_get_strict_error(context->document)); + return; + } + + php_libxml_invalidate_node_list_cache_from_doc(parentNode->doc); + xmlNodePtr newchild, nextsib; xmlNode *fragment = dom_zvals_to_fragment(context->document, parentNode, nodes, nodesc); @@ -291,37 +337,79 @@ void dom_parent_node_prepend(dom_object *context, zval *nodes, int nodesc) nextsib = parentNode->children; if (newchild) { + xmlNodePtr last = fragment->last; parentNode->children = newchild; fragment->last->next = nextsib; - nextsib->prev = fragment->last; + nextsib->prev = last; dom_fragment_assign_parent_node(parentNode, fragment); - dom_reconcile_ns(parentNode->doc, newchild); + dom_reconcile_ns_list(parentNode->doc, newchild, last); } xmlFree(fragment); } -void dom_parent_node_after(dom_object *context, zval *nodes, int nodesc) +static void dom_pre_insert(xmlNodePtr insertion_point, xmlNodePtr parentNode, xmlNodePtr newchild, xmlNodePtr fragment) +{ + if (!insertion_point) { + /* Place it as last node */ + if (parentNode->children) { + /* There are children */ + fragment->last->prev = parentNode->last; + newchild->prev = parentNode->last->prev; + parentNode->last->next = newchild; + } else { + /* No children, because they moved out when they became a fragment */ + parentNode->children = newchild; + parentNode->last = newchild; + } + } else { + /* Insert fragment before insertion_point */ + fragment->last->next = insertion_point; + if (insertion_point->prev) { + insertion_point->prev->next = newchild; + newchild->prev = insertion_point->prev; + } + insertion_point->prev = newchild; + if (parentNode->children == insertion_point) { + parentNode->children = newchild; + } + } +} + +void dom_parent_node_after(dom_object *context, zval *nodes, uint32_t nodesc) { + /* Spec link: https://dom.spec.whatwg.org/#dom-childnode-after */ + xmlNode *prevsib = dom_object_get_node(context); xmlNodePtr newchild, parentNode; - xmlNode *fragment, *nextsib; + xmlNode *fragment; xmlDoc *doc; - bool afterlastchild; - - int stricterror = dom_get_strict_error(context->document); - if (!prevsib->parent) { - php_dom_throw_error(NO_MODIFICATION_ALLOWED_ERR, stricterror); + /* Spec step 1 */ + parentNode = prevsib->parent; + /* Spec step 2 */ + if (!parentNode) { + int stricterror = dom_get_strict_error(context->document); + php_dom_throw_error(HIERARCHY_REQUEST_ERR, stricterror); return; } + /* Spec step 3: find first following child not in nodes; otherwise null */ + xmlNodePtr viable_next_sibling = prevsib->next; + while (viable_next_sibling) { + if (!dom_is_node_in_list(nodes, nodesc, viable_next_sibling)) { + break; + } + viable_next_sibling = viable_next_sibling->next; + } + doc = prevsib->doc; - parentNode = prevsib->parent; - nextsib = prevsib->next; - afterlastchild = (nextsib == NULL); + + php_libxml_invalidate_node_list_cache_from_doc(doc); + + /* Spec step 4: convert nodes into fragment */ fragment = dom_zvals_to_fragment(context->document, parentNode, nodes, nodesc); if (fragment == NULL) { @@ -331,60 +419,50 @@ void dom_parent_node_after(dom_object *context, zval *nodes, int nodesc) newchild = fragment->children; if (newchild) { - /* first node and last node are both both parameters to DOMElement::after() method so nextsib and prevsib are null. */ - if (!parentNode->children) { - prevsib = nextsib = NULL; - } else if (afterlastchild) { - /* - * The new node will be inserted after last node, prevsib is last node. - * The first node is the parameter to DOMElement::after() if parentNode->children == prevsib is true - * and prevsib does not change, otherwise prevsib is parentNode->last (first node). - */ - prevsib = parentNode->children == prevsib ? prevsib : parentNode->last; - } else { - /* - * The new node will be inserted after first node, prevsib is first node. - * The first node is not the parameter to DOMElement::after() if parentNode->children == prevsib is true - * and prevsib does not change otherwise prevsib is null to mean that parentNode->children is the new node. - */ - prevsib = parentNode->children == prevsib ? prevsib : NULL; - } + xmlNodePtr last = fragment->last; - if (prevsib) { - fragment->last->next = prevsib->next; - if (prevsib->next) { - prevsib->next->prev = fragment->last; - } - prevsib->next = newchild; - } else { - parentNode->children = newchild; - if (nextsib) { - fragment->last->next = nextsib; - nextsib->prev = fragment->last; - } - } + /* Step 5: place fragment into the parent before viable_next_sibling */ + dom_pre_insert(viable_next_sibling, parentNode, newchild, fragment); - newchild->prev = prevsib; dom_fragment_assign_parent_node(parentNode, fragment); - dom_reconcile_ns(doc, newchild); + dom_reconcile_ns_list(doc, newchild, last); } xmlFree(fragment); } -void dom_parent_node_before(dom_object *context, zval *nodes, int nodesc) +void dom_parent_node_before(dom_object *context, zval *nodes, uint32_t nodesc) { + /* Spec link: https://dom.spec.whatwg.org/#dom-childnode-before */ + xmlNode *nextsib = dom_object_get_node(context); - xmlNodePtr newchild, prevsib, parentNode; - xmlNode *fragment, *afternextsib; + xmlNodePtr newchild, parentNode; + xmlNode *fragment; xmlDoc *doc; - bool beforefirstchild; - doc = nextsib->doc; - prevsib = nextsib->prev; - afternextsib = nextsib->next; + /* Spec step 1 */ parentNode = nextsib->parent; - beforefirstchild = !prevsib; + /* Spec step 2 */ + if (!parentNode) { + int stricterror = dom_get_strict_error(context->document); + php_dom_throw_error(HIERARCHY_REQUEST_ERR, stricterror); + return; + } + + /* Spec step 3: find first following child not in nodes; otherwise null */ + xmlNodePtr viable_previous_sibling = nextsib->prev; + while (viable_previous_sibling) { + if (!dom_is_node_in_list(nodes, nodesc, viable_previous_sibling)) { + break; + } + viable_previous_sibling = viable_previous_sibling->prev; + } + + doc = nextsib->doc; + + php_libxml_invalidate_node_list_cache_from_doc(doc); + + /* Spec step 4: convert nodes into fragment */ fragment = dom_zvals_to_fragment(context->document, parentNode, nodes, nodesc); if (fragment == NULL) { @@ -394,74 +472,66 @@ void dom_parent_node_before(dom_object *context, zval *nodes, int nodesc) newchild = fragment->children; if (newchild) { - /* first node and last node are both both parameters to DOMElement::before() method so nextsib is null. */ - if (!parentNode->children) { - nextsib = NULL; - } else if (beforefirstchild) { - /* - * The new node will be inserted before first node, nextsib is first node and afternextsib is last node. - * The first node is not the parameter to DOMElement::before() if parentNode->children == nextsib is true - * and nextsib does not change, otherwise nextsib is the last node. - */ - nextsib = parentNode->children == nextsib ? nextsib : afternextsib; - } else { - /* - * The new node will be inserted before last node, prevsib is first node and nestsib is last node. - * The first node is not the parameter to DOMElement::before() if parentNode->children == prevsib is true - * but last node may be, so use prevsib->next to determine the value of nextsib, otherwise nextsib does not change. - */ - nextsib = parentNode->children == prevsib ? prevsib->next : nextsib; - } + xmlNodePtr last = fragment->last; - if (parentNode->children == nextsib) { - parentNode->children = newchild; + /* Step 5: if viable_previous_sibling is null, set it to the parent's first child, otherwise viable_previous_sibling's next sibling */ + if (!viable_previous_sibling) { + viable_previous_sibling = parentNode->children; } else { - prevsib->next = newchild; + viable_previous_sibling = viable_previous_sibling->next; } - - fragment->last->next = nextsib; - if (nextsib) { - nextsib->prev = fragment->last; - } - - newchild->prev = prevsib; + /* Step 6: place fragment into the parent after viable_previous_sibling */ + dom_pre_insert(viable_previous_sibling, parentNode, newchild, fragment); dom_fragment_assign_parent_node(parentNode, fragment); - dom_reconcile_ns(doc, newchild); + dom_reconcile_ns_list(doc, newchild, last); } xmlFree(fragment); } -void dom_child_node_remove(dom_object *context) +static zend_result dom_child_removal_preconditions(const xmlNodePtr child, int stricterror) { - xmlNode *child = dom_object_get_node(context); - xmlNodePtr children; - int stricterror; - - stricterror = dom_get_strict_error(context->document); - if (dom_node_is_read_only(child) == SUCCESS || (child->parent != NULL && dom_node_is_read_only(child->parent) == SUCCESS)) { php_dom_throw_error(NO_MODIFICATION_ALLOWED_ERR, stricterror); - return; + return FAILURE; } if (!child->parent) { php_dom_throw_error(NOT_FOUND_ERR, stricterror); - return; + return FAILURE; } if (dom_node_children_valid(child->parent) == FAILURE) { - return; + return FAILURE; } - children = child->parent->children; + xmlNodePtr children = child->parent->children; if (!children) { php_dom_throw_error(NOT_FOUND_ERR, stricterror); + return FAILURE; + } + + return SUCCESS; +} + +void dom_child_node_remove(dom_object *context) +{ + xmlNode *child = dom_object_get_node(context); + xmlNodePtr children; + int stricterror; + + stricterror = dom_get_strict_error(context->document); + + if (UNEXPECTED(dom_child_removal_preconditions(child, stricterror) != SUCCESS)) { return; } + children = child->parent->children; + + php_libxml_invalidate_node_list_cache_from_doc(context->document->ptr); + while (children) { if (children == child) { xmlUnlinkNode(child); @@ -473,4 +543,42 @@ void dom_child_node_remove(dom_object *context) php_dom_throw_error(NOT_FOUND_ERR, stricterror); } +void dom_child_replace_with(dom_object *context, zval *nodes, uint32_t nodesc) +{ + xmlNodePtr child = dom_object_get_node(context); + xmlNodePtr parentNode = child->parent; + + int stricterror = dom_get_strict_error(context->document); + if (UNEXPECTED(dom_child_removal_preconditions(child, stricterror) != SUCCESS)) { + return; + } + + xmlNodePtr insertion_point = child->next; + + xmlNodePtr fragment = dom_zvals_to_fragment(context->document, parentNode, nodes, nodesc); + if (UNEXPECTED(fragment == NULL)) { + return; + } + + xmlNodePtr newchild = fragment->children; + xmlDocPtr doc = parentNode->doc; + + if (newchild) { + xmlNodePtr last = fragment->last; + + /* Unlink it unless it became a part of the fragment. + * Freeing will be taken care of by the lifetime of the returned dom object. */ + if (child->parent != fragment) { + xmlUnlinkNode(child); + } + + dom_pre_insert(insertion_point, parentNode, newchild, fragment); + + dom_fragment_assign_parent_node(parentNode, fragment); + dom_reconcile_ns_list(doc, newchild, last); + } + + xmlFree(fragment); +} + #endif diff --git a/ext/dom/php_dom.c b/ext/dom/php_dom.c index 662c6e9ef7ce1..adb3c1377344b 100644 --- a/ext/dom/php_dom.c +++ b/ext/dom/php_dom.c @@ -61,6 +61,7 @@ PHP_DOM_EXPORT zend_class_entry *dom_namespace_node_class_entry; zend_object_handlers dom_object_handlers; zend_object_handlers dom_nnodemap_object_handlers; +zend_object_handlers dom_object_namespace_node_handlers; #ifdef LIBXML_XPATH_ENABLED zend_object_handlers dom_xpath_object_handlers; #endif @@ -86,6 +87,10 @@ static HashTable dom_xpath_prop_handlers; #endif /* }}} */ +static zend_object *dom_objects_namespace_node_new(zend_class_entry *class_type); +static void dom_object_namespace_node_free_storage(zend_object *object); +static xmlNodePtr php_dom_create_fake_namespace_decl_node_ptr(xmlNodePtr nodep, xmlNsPtr original); + typedef int (*dom_read_t)(dom_object *obj, zval *retval); typedef int (*dom_write_t)(dom_object *obj, zval *newval); @@ -140,6 +145,17 @@ int dom_node_children_valid(xmlNodePtr node) { } /* }}} end dom_node_children_valid */ +static const libxml_doc_props default_doc_props = { + .formatoutput = false, + .validateonparse = false, + .resolveexternals = false, + .preservewhitespace = true, + .substituteentities = false, + .stricterror = true, + .recover = false, + .classmap = NULL, +}; + /* {{{ dom_get_doc_props() */ dom_doc_propsptr dom_get_doc_props(php_libxml_ref_obj *document) { @@ -149,28 +165,31 @@ dom_doc_propsptr dom_get_doc_props(php_libxml_ref_obj *document) return document->doc_props; } else { doc_props = emalloc(sizeof(libxml_doc_props)); - doc_props->formatoutput = 0; - doc_props->validateonparse = 0; - doc_props->resolveexternals = 0; - doc_props->preservewhitespace = 1; - doc_props->substituteentities = 0; - doc_props->stricterror = 1; - doc_props->recover = 0; - doc_props->classmap = NULL; + memcpy(doc_props, &default_doc_props, sizeof(libxml_doc_props)); if (document) { document->doc_props = doc_props; } return doc_props; } } +/* }}} */ + +libxml_doc_props const* dom_get_doc_props_read_only(const php_libxml_ref_obj *document) +{ + if (document && document->doc_props) { + return document->doc_props; + } else { + return &default_doc_props; + } +} static void dom_copy_doc_props(php_libxml_ref_obj *source_doc, php_libxml_ref_obj *dest_doc) { - dom_doc_propsptr source, dest; + dom_doc_propsptr dest; if (source_doc && dest_doc) { - source = dom_get_doc_props(source_doc); + libxml_doc_props const* source = dom_get_doc_props_read_only(source_doc); dest = dom_get_doc_props(dest_doc); dest->formatoutput = source->formatoutput; @@ -212,10 +231,8 @@ void dom_set_doc_classmap(php_libxml_ref_obj *document, zend_class_entry *basece zend_class_entry *dom_get_doc_classmap(php_libxml_ref_obj *document, zend_class_entry *basece) { - dom_doc_propsptr doc_props; - if (document) { - doc_props = dom_get_doc_props(document); + libxml_doc_props const* doc_props = dom_get_doc_props_read_only(document); if (doc_props->classmap) { zend_class_entry *ce = zend_hash_find_ptr(doc_props->classmap, basece->name); if (ce) { @@ -230,16 +247,7 @@ zend_class_entry *dom_get_doc_classmap(php_libxml_ref_obj *document, zend_class_ /* {{{ dom_get_strict_error() */ int dom_get_strict_error(php_libxml_ref_obj *document) { - int stricterror; - dom_doc_propsptr doc_props; - - doc_props = dom_get_doc_props(document); - stricterror = doc_props->stricterror; - if (document == NULL) { - efree(doc_props); - } - - return stricterror; + return dom_get_doc_props_read_only(document)->stricterror; } /* }}} */ @@ -473,6 +481,19 @@ PHP_FUNCTION(dom_import_simplexml) static dom_object* dom_objects_set_class(zend_class_entry *class_type); +static void dom_update_refcount_after_clone(dom_object *original, xmlNodePtr original_node, dom_object *clone, xmlNodePtr cloned_node) +{ + /* If we cloned a document then we must create new doc proxy */ + if (cloned_node->doc == original_node->doc) { + clone->document = original->document; + } + php_libxml_increment_doc_ref((php_libxml_node_object *)clone, cloned_node->doc); + php_libxml_increment_node_ptr((php_libxml_node_object *)clone, cloned_node, (void *)clone); + if (original->document != clone->document) { + dom_copy_doc_props(original->document, clone->document); + } +} + static zend_object *dom_objects_store_clone_obj(zend_object *zobject) /* {{{ */ { dom_object *intern = php_dom_obj_from_obj(zobject); @@ -485,15 +506,7 @@ static zend_object *dom_objects_store_clone_obj(zend_object *zobject) /* {{{ */ if (node != NULL) { xmlNodePtr cloned_node = xmlDocCopyNode(node, node->doc, 1); if (cloned_node != NULL) { - /* If we cloned a document then we must create new doc proxy */ - if (cloned_node->doc == node->doc) { - clone->document = intern->document; - } - php_libxml_increment_doc_ref((php_libxml_node_object *)clone, cloned_node->doc); - php_libxml_increment_node_ptr((php_libxml_node_object *)clone, cloned_node, (void *)clone); - if (intern->document != clone->document) { - dom_copy_doc_props(intern->document, clone->document); - } + dom_update_refcount_after_clone(intern, node, clone, cloned_node); } } @@ -505,6 +518,26 @@ static zend_object *dom_objects_store_clone_obj(zend_object *zobject) /* {{{ */ } /* }}} */ +static zend_object *dom_object_namespace_node_clone_obj(zend_object *zobject) +{ + dom_object_namespace_node *intern = php_dom_namespace_node_obj_from_obj(zobject); + zend_object *clone = dom_objects_namespace_node_new(intern->dom.std.ce); + dom_object_namespace_node *clone_intern = php_dom_namespace_node_obj_from_obj(clone); + + xmlNodePtr original_node = dom_object_get_node(&intern->dom); + ZEND_ASSERT(original_node->type == XML_NAMESPACE_DECL); + xmlNodePtr cloned_node = php_dom_create_fake_namespace_decl_node_ptr(original_node->parent, original_node->ns); + + if (intern->parent_intern) { + clone_intern->parent_intern = intern->parent_intern; + GC_ADDREF(&clone_intern->parent_intern->std); + } + dom_update_refcount_after_clone(&intern->dom, original_node, &clone_intern->dom, cloned_node); + + zend_objects_clone_members(clone, &intern->dom.std); + return clone; +} + static void dom_copy_prop_handler(zval *zv) /* {{{ */ { dom_prop_handler *hnd = Z_PTR_P(zv); @@ -570,6 +603,11 @@ PHP_MINIT_FUNCTION(dom) dom_nnodemap_object_handlers.read_dimension = dom_nodelist_read_dimension; dom_nnodemap_object_handlers.has_dimension = dom_nodelist_has_dimension; + memcpy(&dom_object_namespace_node_handlers, &dom_object_handlers, sizeof(zend_object_handlers)); + dom_object_namespace_node_handlers.offset = XtOffsetOf(dom_object_namespace_node, dom.std); + dom_object_namespace_node_handlers.free_obj = dom_object_namespace_node_free_storage; + dom_object_namespace_node_handlers.clone_obj = dom_object_namespace_node_clone_obj; + zend_hash_init(&classes, 0, NULL, NULL, 1); dom_domexception_class_entry = register_class_DOMException(zend_ce_exception); @@ -604,7 +642,7 @@ PHP_MINIT_FUNCTION(dom) zend_hash_add_ptr(&classes, dom_node_class_entry->name, &dom_node_prop_handlers); dom_namespace_node_class_entry = register_class_DOMNameSpaceNode(); - dom_namespace_node_class_entry->create_object = dom_objects_new; + dom_namespace_node_class_entry->create_object = dom_objects_namespace_node_new; zend_hash_init(&dom_namespace_node_prop_handlers, 0, NULL, dom_dtor_prop_handler, 1); dom_register_prop_handler(&dom_namespace_node_prop_handlers, "nodeName", sizeof("nodeName")-1, dom_node_node_name_read, NULL); @@ -939,7 +977,7 @@ void dom_objects_free_storage(zend_object *object) } /* }}} */ -void dom_namednode_iter(dom_object *basenode, int ntype, dom_object *intern, xmlHashTablePtr ht, xmlChar *local, xmlChar *ns) /* {{{ */ +void dom_namednode_iter(dom_object *basenode, int ntype, dom_object *intern, xmlHashTablePtr ht, const char *local, size_t local_len, const char *ns, size_t ns_len) /* {{{ */ { dom_nnodemap_object *mapptr = (dom_nnodemap_object *) intern->ptr; @@ -947,18 +985,38 @@ void dom_namednode_iter(dom_object *basenode, int ntype, dom_object *intern, xml ZVAL_OBJ_COPY(&mapptr->baseobj_zv, &basenode->std); + xmlDocPtr doc = basenode->document ? basenode->document->ptr : NULL; + mapptr->baseobj = basenode; mapptr->nodetype = ntype; mapptr->ht = ht; - mapptr->local = local; - mapptr->ns = ns; + + const xmlChar* tmp; + + if (local) { + int len = local_len > INT_MAX ? -1 : (int) local_len; + if (doc != NULL && (tmp = xmlDictExists(doc->dict, (const xmlChar *)local, len)) != NULL) { + mapptr->local = (xmlChar*) tmp; + } else { + mapptr->local = xmlCharStrndup(local, len); + mapptr->free_local = true; + } + } + + if (ns) { + int len = ns_len > INT_MAX ? -1 : (int) ns_len; + if (doc != NULL && (tmp = xmlDictExists(doc->dict, (const xmlChar *)ns, len)) != NULL) { + mapptr->ns = (xmlChar*) tmp; + } else { + mapptr->ns = xmlCharStrndup(ns, len); + mapptr->free_ns = true; + } + } } /* }}} */ -static dom_object* dom_objects_set_class(zend_class_entry *class_type) /* {{{ */ +static void dom_objects_set_class_ex(zend_class_entry *class_type, dom_object *intern) { - dom_object *intern = zend_object_alloc(sizeof(dom_object), class_type); - zend_class_entry *base_class = class_type; while ((base_class->type != ZEND_INTERNAL_CLASS || base_class->info.internal.module->module_number != dom_module_entry.module_number) && base_class->parent != NULL) { base_class = base_class->parent; @@ -968,10 +1026,14 @@ static dom_object* dom_objects_set_class(zend_class_entry *class_type) /* {{{ */ zend_object_std_init(&intern->std, class_type); object_properties_init(&intern->std, class_type); +} +static dom_object* dom_objects_set_class(zend_class_entry *class_type) +{ + dom_object *intern = zend_object_alloc(sizeof(dom_object), class_type); + dom_objects_set_class_ex(class_type, intern); return intern; } -/* }}} */ /* {{{ dom_objects_new */ zend_object *dom_objects_new(zend_class_entry *class_type) @@ -982,6 +1044,25 @@ zend_object *dom_objects_new(zend_class_entry *class_type) } /* }}} */ +static zend_object *dom_objects_namespace_node_new(zend_class_entry *class_type) +{ + dom_object_namespace_node *intern = zend_object_alloc(sizeof(dom_object_namespace_node), class_type); + dom_objects_set_class_ex(class_type, &intern->dom); + intern->dom.std.handlers = &dom_object_namespace_node_handlers; + return &intern->dom.std; +} + +static void dom_object_namespace_node_free_storage(zend_object *object) +{ + dom_object_namespace_node *intern = php_dom_namespace_node_obj_from_obj(object); + if (intern->parent_intern != NULL) { + zval tmp; + ZVAL_OBJ(&tmp, &intern->parent_intern->std); + zval_ptr_dtor(&tmp); + } + dom_objects_free_storage(object); +} + #ifdef LIBXML_XPATH_ENABLED /* {{{ zend_object dom_xpath_objects_new(zend_class_entry *class_type) */ zend_object *dom_xpath_objects_new(zend_class_entry *class_type) @@ -1007,10 +1088,13 @@ void dom_nnodemap_objects_free_storage(zend_object *object) /* {{{ */ dom_nnodemap_object *objmap = (dom_nnodemap_object *)intern->ptr; if (objmap) { - if (objmap->local) { + if (objmap->cached_obj && GC_DELREF(&objmap->cached_obj->std) == 0) { + zend_objects_store_del(&objmap->cached_obj->std); + } + if (objmap->free_local) { xmlFree(objmap->local); } - if (objmap->ns) { + if (objmap->free_ns) { xmlFree(objmap->ns); } if (!Z_ISUNDEF(objmap->baseobj_zv)) { @@ -1039,7 +1123,13 @@ zend_object *dom_nnodemap_objects_new(zend_class_entry *class_type) /* {{{ */ objmap->nodetype = 0; objmap->ht = NULL; objmap->local = NULL; + objmap->free_local = false; objmap->ns = NULL; + objmap->free_ns = false; + objmap->cache_tag.modification_nr = 0; + objmap->cached_length = -1; + objmap->cached_obj = NULL; + objmap->cached_obj_index = 0; return &intern->std; } @@ -1217,14 +1307,25 @@ bool dom_has_feature(zend_string *feature, zend_string *version) } /* }}} end dom_has_feature */ -xmlNode *dom_get_elements_by_tag_name_ns_raw(xmlNodePtr nodep, char *ns, char *local, int *cur, int index) /* {{{ */ +xmlNode *dom_get_elements_by_tag_name_ns_raw(xmlNodePtr basep, xmlNodePtr nodep, char *ns, char *local, int *cur, int index) /* {{{ */ { + /* Can happen with detached document */ + if (UNEXPECTED(nodep == NULL)) { + return NULL; + } + xmlNodePtr ret = NULL; + bool local_match_any = local[0] == '*' && local[1] == '\0'; - while (nodep != NULL && (*cur <= index || index == -1)) { + /* Note: The spec says that ns == '' must be transformed to ns == NULL. In other words, they are equivalent. + * PHP however does not do this and internally uses the empty string everywhere when the user provides ns == NULL. + * This is because for PHP ns == NULL has another meaning: "match every namespace" instead of "match the empty namespace". */ + bool ns_match_any = ns == NULL || (ns[0] == '*' && ns[1] == '\0'); + + while (*cur <= index) { if (nodep->type == XML_ELEMENT_NODE) { - if (xmlStrEqual(nodep->name, (xmlChar *)local) || xmlStrEqual((xmlChar *)"*", (xmlChar *)local)) { - if (ns == NULL || (!strcmp(ns, "") && nodep->ns == NULL) || (nodep->ns != NULL && (xmlStrEqual(nodep->ns->href, (xmlChar *)ns) || xmlStrEqual((xmlChar *)"*", (xmlChar *)ns)))) { + if (local_match_any || xmlStrEqual(nodep->name, (xmlChar *)local)) { + if (ns_match_any || (ns[0] == '\0' && nodep->ns == NULL) || (nodep->ns != NULL && xmlStrEqual(nodep->ns->href, (xmlChar *)ns))) { if (*cur == index) { ret = nodep; break; @@ -1232,16 +1333,33 @@ xmlNode *dom_get_elements_by_tag_name_ns_raw(xmlNodePtr nodep, char *ns, char *l (*cur)++; } } - ret = dom_get_elements_by_tag_name_ns_raw(nodep->children, ns, local, cur, index); - if (ret != NULL) { - break; + + if (nodep->children) { + nodep = nodep->children; + continue; } } - nodep = nodep->next; + + if (nodep->next) { + nodep = nodep->next; + } else { + /* Go upwards, until we find a parent node with a next sibling, or until we hit the base. */ + do { + nodep = nodep->parent; + if (nodep == basep) { + return NULL; + } + /* This shouldn't happen, unless there's an invalidation bug somewhere. */ + if (UNEXPECTED(nodep == NULL)) { + zend_throw_error(NULL, "Current node in traversal is not in the document. Please report this as a bug in php-src."); + return NULL; + } + } while (nodep->next == NULL); + nodep = nodep->next; + } } return ret; } -/* }}} */ /* }}} end dom_element_get_elements_by_tag_name_ns_raw */ static inline bool is_empty_node(xmlNodePtr nodep) @@ -1307,11 +1425,16 @@ void dom_normalize (xmlNodePtr nodep) /* {{{ void dom_set_old_ns(xmlDoc *doc, xmlNs *ns) */ void dom_set_old_ns(xmlDoc *doc, xmlNs *ns) { - xmlNs *cur; - if (doc == NULL) return; + ZEND_ASSERT(ns->next == NULL); + + /* Note: we'll use a prepend strategy instead of append to + * make sure we don't lose performance when the list is long. + * As libxml2 could assume the xml node is the first one, we'll place our + * new entries after the first one. */ + if (doc->oldNs == NULL) { doc->oldNs = (xmlNsPtr) xmlMalloc(sizeof(xmlNs)); if (doc->oldNs == NULL) { @@ -1321,48 +1444,83 @@ void dom_set_old_ns(xmlDoc *doc, xmlNs *ns) { doc->oldNs->type = XML_LOCAL_NAMESPACE; doc->oldNs->href = xmlStrdup(XML_XML_NAMESPACE); doc->oldNs->prefix = xmlStrdup((const xmlChar *)"xml"); + } else { + ns->next = doc->oldNs->next; } - - cur = doc->oldNs; - while (cur->next != NULL) { - cur = cur->next; - } - cur->next = ns; + doc->oldNs->next = ns; } /* }}} end dom_set_old_ns */ -void dom_reconcile_ns(xmlDocPtr doc, xmlNodePtr nodep) /* {{{ */ +static void dom_reconcile_ns_internal(xmlDocPtr doc, xmlNodePtr nodep) { xmlNsPtr nsptr, nsdftptr, curns, prevns = NULL; - if (nodep->type == XML_ELEMENT_NODE) { - /* Following if block primarily used for inserting nodes created via createElementNS */ - if (nodep->nsDef != NULL) { - curns = nodep->nsDef; - while (curns) { - nsdftptr = curns->next; - if (curns->href != NULL) { - if((nsptr = xmlSearchNsByHref(doc, nodep->parent, curns->href)) && - (curns->prefix == NULL || xmlStrEqual(nsptr->prefix, curns->prefix))) { - curns->next = NULL; - if (prevns == NULL) { - nodep->nsDef = nsdftptr; - } else { - prevns->next = nsdftptr; - } - dom_set_old_ns(doc, curns); - curns = prevns; + /* Following if block primarily used for inserting nodes created via createElementNS */ + if (nodep->nsDef != NULL) { + curns = nodep->nsDef; + while (curns) { + nsdftptr = curns->next; + if (curns->href != NULL) { + if((nsptr = xmlSearchNsByHref(doc, nodep->parent, curns->href)) && + (curns->prefix == NULL || xmlStrEqual(nsptr->prefix, curns->prefix))) { + curns->next = NULL; + if (prevns == NULL) { + nodep->nsDef = nsdftptr; + } else { + prevns->next = nsdftptr; } + /* Note: we can't get here if the ns is already on the oldNs list. + * This is because in that case the definition won't be on the node, and + * therefore won't be in the nodep->nsDef list. */ + dom_set_old_ns(doc, curns); + curns = prevns; } - prevns = curns; - curns = nsdftptr; } + prevns = curns; + curns = nsdftptr; } + } +} + +void dom_reconcile_ns(xmlDocPtr doc, xmlNodePtr nodep) /* {{{ */ +{ + if (nodep->type == XML_ELEMENT_NODE) { + dom_reconcile_ns_internal(doc, nodep); xmlReconciliateNs(doc, nodep); } } /* }}} */ +static void dom_reconcile_ns_list_internal(xmlDocPtr doc, xmlNodePtr nodep, xmlNodePtr last) +{ + ZEND_ASSERT(nodep != NULL); + while (true) { + if (nodep->type == XML_ELEMENT_NODE) { + dom_reconcile_ns_internal(doc, nodep); + if (nodep->children) { + dom_reconcile_ns_list_internal(doc, nodep->children, nodep->last /* process the whole children list */); + } + } + if (nodep == last) { + break; + } + nodep = nodep->next; + } +} + +void dom_reconcile_ns_list(xmlDocPtr doc, xmlNodePtr nodep, xmlNodePtr last) +{ + dom_reconcile_ns_list_internal(doc, nodep, last); + /* Outside of the recursion above because xmlReconciliateNs() performs its own recursion. */ + while (true) { + xmlReconciliateNs(doc, nodep); + if (nodep == last) { + break; + } + nodep = nodep->next; + } +} + /* http://www.w3.org/TR/2004/REC-DOM-Level-3-Core-20040407/core.html#ID-DocCrElNS @@ -1412,22 +1570,38 @@ NAMESPACE_ERR: Raised if /* {{{ xmlNsPtr dom_get_ns(xmlNodePtr nodep, char *uri, int *errorcode, char *prefix) */ xmlNsPtr dom_get_ns(xmlNodePtr nodep, char *uri, int *errorcode, char *prefix) { - xmlNsPtr nsptr = NULL; - - *errorcode = 0; + xmlNsPtr nsptr; if (! ((prefix && !strcmp (prefix, "xml") && strcmp(uri, (char *)XML_XML_NAMESPACE)) || (prefix && !strcmp (prefix, "xmlns") && strcmp(uri, (char *)DOM_XMLNS_NAMESPACE)) || (prefix && !strcmp(uri, (char *)DOM_XMLNS_NAMESPACE) && strcmp (prefix, "xmlns")))) { + /* Reuse the old namespaces from doc->oldNs if possible, before creating a new one. + * This will prevent the oldNs list from growing with duplicates. */ + xmlDocPtr doc = nodep->doc; + if (doc && doc->oldNs != NULL) { + nsptr = doc->oldNs; + do { + if (xmlStrEqual(nsptr->prefix, (xmlChar *)prefix) && xmlStrEqual(nsptr->href, (xmlChar *)uri)) { + goto out; + } + nsptr = nsptr->next; + } while (nsptr); + } + /* Couldn't reuse one, create a new one. */ nsptr = xmlNewNs(nodep, (xmlChar *)uri, (xmlChar *)prefix); + if (UNEXPECTED(nsptr == NULL)) { + goto err; + } + } else { + goto err; } - if (nsptr == NULL) { - *errorcode = NAMESPACE_ERR; - } - +out: + *errorcode = 0; return nsptr; - +err: + *errorcode = NAMESPACE_ERR; + return NULL; } /* }}} end dom_get_ns */ @@ -1461,6 +1635,32 @@ xmlNsPtr dom_get_nsdecl(xmlNode *node, xmlChar *localName) { } /* }}} end dom_get_nsdecl */ +static xmlNodePtr php_dom_create_fake_namespace_decl_node_ptr(xmlNodePtr nodep, xmlNsPtr original) +{ + xmlNodePtr attrp; + xmlNsPtr curns = xmlNewNs(NULL, original->href, NULL); + if (original->prefix) { + curns->prefix = xmlStrdup(original->prefix); + attrp = xmlNewDocNode(nodep->doc, NULL, (xmlChar *) original->prefix, original->href); + } else { + attrp = xmlNewDocNode(nodep->doc, NULL, (xmlChar *)"xmlns", original->href); + } + attrp->type = XML_NAMESPACE_DECL; + attrp->parent = nodep; + attrp->ns = curns; + return attrp; +} + +/* Note: Assumes the additional lifetime was already added in the caller. */ +xmlNodePtr php_dom_create_fake_namespace_decl(xmlNodePtr nodep, xmlNsPtr original, zval *return_value, dom_object *parent_intern) +{ + xmlNodePtr attrp = php_dom_create_fake_namespace_decl_node_ptr(nodep, original); + php_dom_create_object(attrp, return_value, parent_intern); + /* This object must exist, because we just created an object for it via php_dom_create_object(). */ + php_dom_namespace_node_obj_from_obj(Z_OBJ_P(return_value))->parent_intern = parent_intern; + return attrp; +} + static zval *dom_nodelist_read_dimension(zend_object *object, zval *offset, int type, zval *rv) /* {{{ */ { zval offset_copy; @@ -1491,4 +1691,14 @@ static int dom_nodelist_has_dimension(zend_object *object, zval *member, int che } } /* }}} end dom_nodelist_has_dimension */ +void dom_remove_all_children(xmlNodePtr nodep) +{ + if (nodep->children) { + node_list_unlink(nodep->children); + php_libxml_node_free_list((xmlNodePtr) nodep->children); + nodep->children = NULL; + nodep->last = NULL; + } +} + #endif /* HAVE_DOM */ diff --git a/ext/dom/php_dom.h b/ext/dom/php_dom.h index fdfdd4e7a31ca..4b327dc6b9d05 100644 --- a/ext/dom/php_dom.h +++ b/ext/dom/php_dom.h @@ -82,21 +82,41 @@ typedef struct _dom_nnodemap_object { dom_object *baseobj; zval baseobj_zv; int nodetype; + int cached_length; xmlHashTable *ht; xmlChar *local; xmlChar *ns; + php_libxml_cache_tag cache_tag; + dom_object *cached_obj; + int cached_obj_index; + bool free_local : 1; + bool free_ns : 1; } dom_nnodemap_object; typedef struct { zend_object_iterator intern; zval curobj; HashPosition pos; + php_libxml_cache_tag cache_tag; } php_dom_iterator; +typedef struct { + /* This may be a fake object that isn't actually in the children list of the parent. + * This is because some namespace declaration nodes aren't stored on the parent in libxml2, so we have to fake it. + * We could use a zval for this, but since this is always going to be an object let's save space... */ + dom_object *parent_intern; + dom_object dom; +} dom_object_namespace_node; + +static inline dom_object_namespace_node *php_dom_namespace_node_obj_from_obj(zend_object *obj) { + return (dom_object_namespace_node*)((char*)(obj) - XtOffsetOf(dom_object_namespace_node, dom.std)); +} + #include "domexception.h" dom_object *dom_object_get_data(xmlNodePtr obj); dom_doc_propsptr dom_get_doc_props(php_libxml_ref_obj *document); +libxml_doc_props const* dom_get_doc_props_read_only(const php_libxml_ref_obj *document); zend_object *dom_objects_new(zend_class_entry *class_type); zend_object *dom_nnodemap_objects_new(zend_class_entry *class_type); #ifdef LIBXML_XPATH_ENABLED @@ -110,27 +130,32 @@ int dom_check_qname(char *qname, char **localname, char **prefix, int uri_len, i xmlNsPtr dom_get_ns(xmlNodePtr node, char *uri, int *errorcode, char *prefix); void dom_set_old_ns(xmlDoc *doc, xmlNs *ns); void dom_reconcile_ns(xmlDocPtr doc, xmlNodePtr nodep); +void dom_reconcile_ns_list(xmlDocPtr doc, xmlNodePtr nodep, xmlNodePtr last); xmlNsPtr dom_get_nsdecl(xmlNode *node, xmlChar *localName); void dom_normalize (xmlNodePtr nodep); -xmlNode *dom_get_elements_by_tag_name_ns_raw(xmlNodePtr nodep, char *ns, char *local, int *cur, int index); +xmlNode *dom_get_elements_by_tag_name_ns_raw(xmlNodePtr basep, xmlNodePtr nodep, char *ns, char *local, int *cur, int index); void php_dom_create_implementation(zval *retval); int dom_hierarchy(xmlNodePtr parent, xmlNodePtr child); bool dom_has_feature(zend_string *feature, zend_string *version); int dom_node_is_read_only(xmlNodePtr node); int dom_node_children_valid(xmlNodePtr node); void php_dom_create_iterator(zval *return_value, int ce_type); -void dom_namednode_iter(dom_object *basenode, int ntype, dom_object *intern, xmlHashTablePtr ht, xmlChar *local, xmlChar *ns); +void dom_namednode_iter(dom_object *basenode, int ntype, dom_object *intern, xmlHashTablePtr ht, const char *local, size_t local_len, const char *ns, size_t ns_len); xmlNodePtr create_notation(const xmlChar *name, const xmlChar *ExternalID, const xmlChar *SystemID); xmlNode *php_dom_libxml_hash_iter(xmlHashTable *ht, int index); xmlNode *php_dom_libxml_notation_iter(xmlHashTable *ht, int index); zend_object_iterator *php_dom_get_iterator(zend_class_entry *ce, zval *object, int by_ref); void dom_set_doc_classmap(php_libxml_ref_obj *document, zend_class_entry *basece, zend_class_entry *ce); +xmlNodePtr php_dom_create_fake_namespace_decl(xmlNodePtr nodep, xmlNsPtr original, zval *return_value, dom_object *parent_intern); -void dom_parent_node_prepend(dom_object *context, zval *nodes, int nodesc); -void dom_parent_node_append(dom_object *context, zval *nodes, int nodesc); -void dom_parent_node_after(dom_object *context, zval *nodes, int nodesc); -void dom_parent_node_before(dom_object *context, zval *nodes, int nodesc); +void dom_parent_node_prepend(dom_object *context, zval *nodes, uint32_t nodesc); +void dom_parent_node_append(dom_object *context, zval *nodes, uint32_t nodesc); +void dom_parent_node_after(dom_object *context, zval *nodes, uint32_t nodesc); +void dom_parent_node_before(dom_object *context, zval *nodes, uint32_t nodesc); void dom_child_node_remove(dom_object *context); +void dom_child_replace_with(dom_object *context, zval *nodes, uint32_t nodesc); + +void dom_remove_all_children(xmlNodePtr nodep); #define DOM_GET_OBJ(__ptr, __id, __prtype, __intern) { \ __intern = Z_DOMOBJ_P(__id); \ @@ -152,6 +177,33 @@ void dom_child_node_remove(dom_object *context); #define DOM_NODELIST 0 #define DOM_NAMEDNODEMAP 1 +static zend_always_inline bool php_dom_is_cache_tag_stale_from_doc_ptr(const php_libxml_cache_tag *cache_tag, const php_libxml_doc_ptr *doc_ptr) +{ + ZEND_ASSERT(cache_tag != NULL); + ZEND_ASSERT(doc_ptr != NULL); + /* See overflow comment in php_libxml_invalidate_node_list_cache(). */ +#if SIZEOF_SIZE_T == 8 + return cache_tag->modification_nr != doc_ptr->cache_tag.modification_nr; +#else + return cache_tag->modification_nr != doc_ptr->cache_tag.modification_nr || UNEXPECTED(doc_ptr->cache_tag.modification_nr == SIZE_MAX); +#endif +} + +static zend_always_inline bool php_dom_is_cache_tag_stale_from_node(const php_libxml_cache_tag *cache_tag, const xmlNodePtr node) +{ + ZEND_ASSERT(node != NULL); + return !node->doc || !node->doc->_private || php_dom_is_cache_tag_stale_from_doc_ptr(cache_tag, node->doc->_private); +} + +static zend_always_inline void php_dom_mark_cache_tag_up_to_date_from_node(php_libxml_cache_tag *cache_tag, const xmlNodePtr node) +{ + ZEND_ASSERT(cache_tag != NULL); + if (node->doc && node->doc->_private) { + const php_libxml_doc_ptr* doc_ptr = node->doc->_private; + cache_tag->modification_nr = doc_ptr->cache_tag.modification_nr; + } +} + PHP_MINIT_FUNCTION(dom); PHP_MSHUTDOWN_FUNCTION(dom); PHP_MINFO_FUNCTION(dom); diff --git a/ext/dom/php_dom.stub.php b/ext/dom/php_dom.stub.php index 21886e5015084..0ef5c4e5cf0bf 100644 --- a/ext/dom/php_dom.stub.php +++ b/ext/dom/php_dom.stub.php @@ -467,7 +467,7 @@ public function count(): int {} public function getIterator(): Iterator {} - /** @return DOMNode|DOMNameSpaceNode|null */ + /** @return DOMElement|DOMNode|DOMNameSpaceNode|null */ public function item(int $index) {} } diff --git a/ext/dom/php_dom_arginfo.h b/ext/dom/php_dom_arginfo.h index d1234f1404011..3a3fb9afd82e4 100644 --- a/ext/dom/php_dom_arginfo.h +++ b/ext/dom/php_dom_arginfo.h @@ -1,5 +1,5 @@ /* This is a generated file, edit the .stub.php file instead. - * Stub hash: 9a6a0c2a5626466aa6397f0892ee5b08ec335e14 */ + * Stub hash: 060166e1fd79f7447f0eaafb626b33e12791e93b */ ZEND_BEGIN_ARG_WITH_RETURN_OBJ_INFO_EX(arginfo_dom_import_simplexml, 0, 1, DOMElement, 0) ZEND_ARG_TYPE_INFO(0, node, IS_OBJECT, 0) diff --git a/ext/dom/processinginstruction.c b/ext/dom/processinginstruction.c index 465ecb431e73a..1f85f91b28150 100644 --- a/ext/dom/processinginstruction.c +++ b/ext/dom/processinginstruction.c @@ -128,7 +128,9 @@ int dom_processinginstruction_data_write(dom_object *obj, zval *newval) return FAILURE; } - xmlNodeSetContentLen(nodep, (xmlChar *) ZSTR_VAL(str), ZSTR_LEN(str) + 1); + php_libxml_invalidate_node_list_cache_from_doc(nodep->doc); + + xmlNodeSetContentLen(nodep, (xmlChar *) ZSTR_VAL(str), ZSTR_LEN(str)); zend_string_release_ex(str, 0); return SUCCESS; diff --git a/ext/dom/tests/DOMAttr_entity_expansion.phpt b/ext/dom/tests/DOMAttr_entity_expansion.phpt new file mode 100644 index 0000000000000..e3482d1a9d739 --- /dev/null +++ b/ext/dom/tests/DOMAttr_entity_expansion.phpt @@ -0,0 +1,54 @@ +--TEST-- +DOMAttr entity expansion +--EXTENSIONS-- +dom +--FILE-- +createElement('elt'); +$doc->appendChild($elt); +$elt->setAttribute('a','&'); +print $doc->saveXML($elt) . "\n"; + +$attr = $elt->getAttributeNode('a'); +$attr->value = '&'; +print "$attr->value\n"; +print $doc->saveXML($elt) . "\n"; + +$attr->removeChild($attr->firstChild); +print $doc->saveXML($elt) . "\n"; + +$attr->nodeValue = '&'; +print "$attr->nodeValue\n"; +print $doc->saveXML($elt) . "\n"; + +$attr->nodeValue = '&'; +print "$attr->nodeValue\n"; +print $doc->saveXML($elt) . "\n"; + +$elt->removeAttributeNode($attr); +$elt->setAttributeNS('http://www.w3.org/2000/svg', 'svg:id','&'); +print $doc->saveXML($elt) . "\n"; + +$attr = $elt->getAttributeNodeNS('http://www.w3.org/2000/svg', 'id'); +$attr->value = '<&'; +print "$attr->value\n"; +print $doc->saveXML($elt) . "\n"; + +$node = new DOMAttr('foo','bar'); +$node->nodeValue = 'xx1yy'; +print "$node->nodeValue\n"; +?> +--EXPECT-- + +& + + +& + +& + + +<& + +xx1yy diff --git a/ext/dom/tests/DOMDocument_getElementsByTagNameNS_match_any_namespace.phpt b/ext/dom/tests/DOMDocument_getElementsByTagNameNS_match_any_namespace.phpt new file mode 100644 index 0000000000000..888d1ef9b8057 --- /dev/null +++ b/ext/dom/tests/DOMDocument_getElementsByTagNameNS_match_any_namespace.phpt @@ -0,0 +1,82 @@ +--TEST-- +DOMDocument::getElementsByTagNameNS() match any namespace +--EXTENSIONS-- +dom +--FILE-- + + +Books of the other guy.. + + + + xinclude: book.xml not found + + + + This is another namespace + + + +EOD; +$dom = new DOMDocument; + +// load the XML string defined above +$dom->loadXML($xml); + +function test($namespace, $local) { + global $dom; + $namespace_str = $namespace !== NULL ? "'$namespace'" : "null"; + echo "-- getElementsByTagNameNS($namespace_str, '$local') --\n"; + foreach ($dom->getElementsByTagNameNS($namespace, $local) as $element) { + echo 'local name: \'', $element->localName, '\', prefix: \'', $element->prefix, "'\n"; + } +} + +// Should *also* include objects even without a namespace +test(null, '*'); +// Should *also* include objects even without a namespace +test('*', '*'); +// Should *only* include objects without a namespace +test('', '*'); +// Should *only* include objects with the specified namespace +test('http://www.w3.org/2001/XInclude', '*'); +// Should not give any output +test('', 'fallback'); +// Should not give any output, because the null namespace is the same as the empty namespace +test(null, 'fallback'); +// Should only output the include from the empty namespace +test(null, 'include'); + +?> +--EXPECT-- +-- getElementsByTagNameNS(null, '*') -- +local name: 'chapter', prefix: '' +local name: 'title', prefix: '' +local name: 'para', prefix: '' +local name: 'error', prefix: '' +local name: 'include', prefix: '' +-- getElementsByTagNameNS('*', '*') -- +local name: 'chapter', prefix: '' +local name: 'title', prefix: '' +local name: 'para', prefix: '' +local name: 'include', prefix: 'xi' +local name: 'fallback', prefix: 'xi' +local name: 'error', prefix: '' +local name: 'include', prefix: '' +-- getElementsByTagNameNS('', '*') -- +local name: 'chapter', prefix: '' +local name: 'title', prefix: '' +local name: 'para', prefix: '' +local name: 'error', prefix: '' +local name: 'include', prefix: '' +-- getElementsByTagNameNS('http://www.w3.org/2001/XInclude', '*') -- +local name: 'include', prefix: 'xi' +local name: 'fallback', prefix: 'xi' +-- getElementsByTagNameNS('', 'fallback') -- +-- getElementsByTagNameNS(null, 'fallback') -- +-- getElementsByTagNameNS(null, 'include') -- +local name: 'include', prefix: '' diff --git a/ext/dom/tests/DOMDocument_getElementsByTagName_liveness.phpt b/ext/dom/tests/DOMDocument_getElementsByTagName_liveness.phpt new file mode 100644 index 0000000000000..2b4622d10d389 --- /dev/null +++ b/ext/dom/tests/DOMDocument_getElementsByTagName_liveness.phpt @@ -0,0 +1,47 @@ +--TEST-- +DOMDocument::getElementsByTagName() is live +--EXTENSIONS-- +dom +--FILE-- +loadXML( '' ); +$root = $doc->documentElement; + +$i = 0; + +/* Note that the list is live. The explanation for the output is as follows: + Before the loop we have the following (writing only the attributes): + 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 + + Now the loop starts, the current element is marked with a V. $i == 0: + V + 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 + 1 gets printed. $i == 0, which is even, so 1 gets removed, which results in: + V + 2 3 4 5 6 7 8 9 10 11 12 13 14 15 + Note that everything shifted to the left. + Because the list is live, the current element pointer still refers to the first index, which now corresponds to element with attribute 2. + Now the foreach body ends, which means we go to the next element, which is now 3 instead of 2. + V + 2 3 4 5 6 7 8 9 10 11 12 13 14 15 + 3 gets printed. $i == 1, which is odd, so nothing happens and we move on to the next element: + V + 2 3 4 5 6 7 8 9 10 11 12 13 14 15 + 4 gets printed. $i == 2, which is even, so 4 gets removed, which results in: + V + 2 3 5 6 7 8 9 10 11 12 13 14 15 + Note again everything shifted to the left. + Now the foreach body ends, which means we go to the next element, which is now 6 instead of 5. + V + 2 3 5 6 7 8 9 10 11 12 13 14 15 + 6 gets printed, etc... */ +foreach ($doc->getElementsByTagName('e') as $node) { + print $node->getAttribute('i') . ' '; + if ($i++ % 2 == 0) + $root->removeChild($node); +} +print "\n"; +?> +--EXPECT-- +1 3 4 6 7 9 10 12 13 15 diff --git a/ext/dom/tests/DOMDocument_getElementsByTagName_liveness_simplexml.phpt b/ext/dom/tests/DOMDocument_getElementsByTagName_liveness_simplexml.phpt new file mode 100644 index 0000000000000..0ac52cd5d662f --- /dev/null +++ b/ext/dom/tests/DOMDocument_getElementsByTagName_liveness_simplexml.phpt @@ -0,0 +1,29 @@ +--TEST-- +DOMDocument::getElementsByTagName() liveness with simplexml_import_dom +--EXTENSIONS-- +dom +simplexml +--FILE-- +loadXML( '' ); +$list = $doc->getElementsByTagName('e'); +print $list->item(5)->getAttribute('i')."\n"; +echo "before import\n"; +$s = simplexml_import_dom($doc->documentElement); +echo "after import\n"; + +unset($s->e[5]); +print $list->item(5)->getAttribute('i')."\n"; + +unset($s->e[5]); +print $list->item(5)->getAttribute('i')."\n"; + +?> +--EXPECT-- +6 +before import +after import +7 +8 diff --git a/ext/dom/tests/DOMDocument_getElementsByTagName_liveness_tree_walk.phpt b/ext/dom/tests/DOMDocument_getElementsByTagName_liveness_tree_walk.phpt new file mode 100644 index 0000000000000..91d810df51bc6 --- /dev/null +++ b/ext/dom/tests/DOMDocument_getElementsByTagName_liveness_tree_walk.phpt @@ -0,0 +1,89 @@ +--TEST-- +DOMDocument::getElementsByTagName() liveness tree walk +--EXTENSIONS-- +dom +--FILE-- +loadXML(''); + +echo "-- On first child, for --\n"; +$list = $doc->documentElement->firstChild->getElementsByTagName('b'); +var_dump($list->length); +for ($i = 0; $i < $list->length; $i++) { + echo $i, " ", $list->item($i)->getAttribute('i'), "\n"; +} +// Try to access one beyond to check if we don't get excess elements +var_dump($list->item($i)); + +echo "-- On first child, foreach --\n"; +foreach ($list as $item) { + echo $item->getAttribute('i'), "\n"; +} + +echo "-- On document, for --\n"; +$list = $doc->getElementsByTagName('b'); +var_dump($list->length); +for ($i = 0; $i < $list->length; $i++) { + echo $i, " ", $list->item($i)->getAttribute('i'), "\n"; +} +// Try to access one beyond to check if we don't get excess elements +var_dump($list->item($i)); + +echo "-- On document, foreach --\n"; +foreach ($list as $item) { + echo $item->getAttribute('i'), "\n"; +} + +echo "-- On document, after caching followed by removing --\n"; + +$list = $doc->documentElement->firstChild->getElementsByTagName('b'); +$list->item(0); // Activate item cache +$list->item(0)->remove(); +$list->item(0)->remove(); +$list->item(0)->remove(); +var_dump($list->length); +var_dump($list->item(0)); +foreach ($list as $item) { + echo "Should not execute\n"; +} + +echo "-- On document, clean list after removal --\n"; +$list = $doc->documentElement->firstChild->getElementsByTagName('b'); +var_dump($list->length); +var_dump($list->item(0)); +foreach ($list as $item) { + echo "Should not execute\n"; +} + +?> +--EXPECT-- +-- On first child, for -- +int(3) +0 1 +1 2 +2 3 +NULL +-- On first child, foreach -- +1 +2 +3 +-- On document, for -- +int(4) +0 1 +1 2 +2 3 +3 4 +NULL +-- On document, foreach -- +1 +2 +3 +4 +-- On document, after caching followed by removing -- +int(0) +NULL +-- On document, clean list after removal -- +int(0) +NULL diff --git a/ext/dom/tests/DOMDocument_getElementsByTagName_liveness_write_properties.phpt b/ext/dom/tests/DOMDocument_getElementsByTagName_liveness_write_properties.phpt new file mode 100644 index 0000000000000..af8af51844c9d --- /dev/null +++ b/ext/dom/tests/DOMDocument_getElementsByTagName_liveness_write_properties.phpt @@ -0,0 +1,43 @@ +--TEST-- +DOMDocument::getElementsByTagName() liveness affected by writing properties +--EXTENSIONS-- +dom +--FILE-- +'; +$fields = ['nodeValue', 'textContent']; + +foreach ($fields as $field) { + $doc = new DOMDocument; + $doc->loadXML($xml); + $list = $doc->getElementsByTagName('a'); + var_dump($list->item(0) === NULL); + $doc->documentElement->{$field} = 'new_content'; + var_dump($list->item(0) === NULL); + print $doc->saveXML(); +} + +// Shouldn't be affected +$doc = new DOMDocument; +$doc->loadXML($xml); +$list = $doc->getElementsByTagNameNS('foo', 'a'); +var_dump($list->item(0) === NULL); +$doc->documentElement->firstChild->prefix = 'ns2'; +var_dump($list->item(0) === NULL); +print $doc->saveXML(); + +?> +--EXPECT-- +bool(false) +bool(true) + +new_content +bool(false) +bool(true) + +new_content +bool(false) +bool(false) + + diff --git a/ext/dom/tests/DOMDocument_getElementsByTagName_liveness_xinclude.phpt b/ext/dom/tests/DOMDocument_getElementsByTagName_liveness_xinclude.phpt new file mode 100644 index 0000000000000..2c14a2080569e --- /dev/null +++ b/ext/dom/tests/DOMDocument_getElementsByTagName_liveness_xinclude.phpt @@ -0,0 +1,43 @@ +--TEST-- +DOMDocument::getElementsByTagName() liveness with DOMDocument::xinclude() +--EXTENSIONS-- +dom +--FILE-- + + +

Hello

+ + + +

xinclude: book.xml not found

+
+
+
+
+EOD; + +$dom = new DOMDocument; +$dom->loadXML($xml); +$elements = $dom->getElementsByTagName('p'); +var_dump($elements->item(0)->textContent); +@$dom->xinclude(); +var_dump($elements->item(1)->textContent); +echo $dom->saveXML(); + +?> +--EXPECT-- +string(5) "Hello" +string(28) "xinclude: book.xml not found" + + +

Hello

+ + +

xinclude: book.xml not found

+ +
+
diff --git a/ext/dom/tests/DOMDocument_item_cache_invalidation.phpt b/ext/dom/tests/DOMDocument_item_cache_invalidation.phpt new file mode 100644 index 0000000000000..dad532b8167fe --- /dev/null +++ b/ext/dom/tests/DOMDocument_item_cache_invalidation.phpt @@ -0,0 +1,69 @@ +--TEST-- +DOMDocument node list item cache invalidation +--EXTENSIONS-- +dom +--FILE-- +loadHTML('

hello

world

'); + +$elements = $doc->getElementsByTagName('p'); +$elements->item(0); // Activate item cache +$doc->loadHTML('

A

B

C

'); +var_dump($elements); +var_dump($elements->item(0)->textContent); // First lookup +var_dump($elements->item(2)->textContent); // Uses cache +var_dump($elements->item(1)->textContent); // Does not use cache + +echo "-- Remove cached item test --\n"; + +$doc = new DOMDocument(); +$doc->loadHTML('

hello

world

!

'); + +$elements = $doc->getElementsByTagName('p'); +$item = $elements->item(0); // Activate item cache +var_dump($item->textContent); +$item->remove(); +// Now element 0 means "world", and 1 means "!" +unset($item); +$item = $elements->item(1); +var_dump($item->textContent); + +echo "-- Removal of cached item in loop test --\n"; + +$doc = new DOMDocument; +$doc->loadXML( '' ); +$root = $doc->documentElement; + +$i = 0; +$elements = $root->getElementsByTagName('e'); +for ($i = 0; $i < 11; $i++) { + $node = $elements->item($i); + print $node->getAttribute('i') . ' '; + if ($i++ % 2 == 0) + $root->removeChild( $node ); +} +print "\n"; + +?> +--EXPECTF-- +-- Switch document test -- +object(DOMNodeList)#2 (1) { + ["length"]=> + int(3) +} +string(1) "A" +string(1) "C" +string(1) "B" +-- Remove cached item test -- +string(5) "hello" +string(1) "!" +-- Removal of cached item in loop test -- +1 4 7 10 13 +Fatal error: Uncaught Error: Call to a member function getAttribute() on null in %s:%d +Stack trace: +#0 {main} + thrown in %s on line %d diff --git a/ext/dom/tests/DOMDocument_length_cache_invalidation.phpt b/ext/dom/tests/DOMDocument_length_cache_invalidation.phpt new file mode 100644 index 0000000000000..7a3633894a381 --- /dev/null +++ b/ext/dom/tests/DOMDocument_length_cache_invalidation.phpt @@ -0,0 +1,34 @@ +--TEST-- +DOMDocument node list length cache invalidation +--EXTENSIONS-- +dom +--FILE-- +loadHTML('

hello

world

!

'); + +$elements = $doc->getElementsByTagName('p'); +$item = $elements->item(0); // Activate item cache +var_dump($elements->length); // Length not cached yet, should still compute +$item->remove(); +// Now element 0 means "world", and 1 means "!" +unset($item); +var_dump($elements->length); +$item = $elements->item(1); +var_dump($item->textContent); +$item = $elements->item(1); +var_dump($item->textContent); +$item = $elements->item(0); +var_dump($item->textContent); +$item = $elements->item(1); +var_dump($item->textContent); + +?> +--EXPECT-- +int(3) +int(2) +string(1) "!" +string(1) "!" +string(5) "world" +string(1) "!" diff --git a/ext/dom/tests/DOMDocument_liveness_caching_invalidation.phpt b/ext/dom/tests/DOMDocument_liveness_caching_invalidation.phpt new file mode 100644 index 0000000000000..e05bd1ac6f646 --- /dev/null +++ b/ext/dom/tests/DOMDocument_liveness_caching_invalidation.phpt @@ -0,0 +1,43 @@ +--TEST-- +DOMDocument liveness caching invalidation by textContent +--EXTENSIONS-- +dom +--FILE-- +loadXML(''); +$root = $doc->documentElement; + +$i = 0; + +echo "-- Overwrite during iteration --\n"; + +foreach ($doc->getElementsByTagName('e') as $node) { + if ($i++ == 2) { + $root->textContent = 'overwrite'; + } + var_dump($node->tagName, $node->getAttribute('id')); +} + +echo "-- Empty iteration --\n"; +foreach ($doc->getElementsByTagName('e') as $node) { + echo "Should not execute\n"; +} + +echo "-- After adding an element again --\n"; +$root->appendChild(new DOMElement('e')); +foreach ($doc->getElementsByTagName('e') as $node) { + echo "Should execute once\n"; +} +?> +--EXPECT-- +-- Overwrite during iteration -- +string(1) "e" +string(1) "1" +string(1) "e" +string(1) "2" +string(1) "e" +string(1) "3" +-- Empty iteration -- +-- After adding an element again -- +Should execute once diff --git a/ext/dom/tests/DOMDocument_loadXML_error2_gte2_11.phpt b/ext/dom/tests/DOMDocument_loadXML_error2_gte2_11.phpt new file mode 100644 index 0000000000000..ff5ceb3fbed53 --- /dev/null +++ b/ext/dom/tests/DOMDocument_loadXML_error2_gte2_11.phpt @@ -0,0 +1,34 @@ +--TEST-- +Test DOMDocument::loadXML() detects not-well formed XML +--SKIPIF-- += 2.11'); +?> +--DESCRIPTION-- +This test verifies the method detects attributes values not closed between " or ' +Environment variables used in the test: +- XML_FILE: the xml file to load +- LOAD_OPTIONS: the second parameter to pass to the method +- EXPECTED_RESULT: the expected result +--CREDITS-- +Antonio Diaz Ruiz +--INI-- +assert.bail=true +--EXTENSIONS-- +dom +--ENV-- +XML_FILE=/not_well_formed2.xml +LOAD_OPTIONS=0 +EXPECTED_RESULT=0 +--FILE_EXTERNAL-- +domdocumentloadxml_test_method.inc +--EXPECTF-- +Warning: DOMDocument::loadXML(): AttValue: " or ' expected in Entity, line: 4 in %s on line %d + +Warning: DOMDocument::loadXML(): internal error: xmlParseStartTag: problem parsing attributes in Entity, line: 4 in %s on line %d + +Warning: DOMDocument::loadXML(): Couldn't find end of Start Tag book line 4 in Entity, line: 4 in %s on line %d + +Warning: DOMDocument::loadXML(): Opening and ending tag mismatch: books line 3 and book in Entity, line: 7 in %s on line %d + +Warning: DOMDocument::loadXML(): Extra content at the end of the document in Entity, line: 8 in %s on line %d diff --git a/ext/dom/tests/DOMDocument_loadXML_error2.phpt b/ext/dom/tests/DOMDocument_loadXML_error2_pre2_11.phpt similarity index 90% rename from ext/dom/tests/DOMDocument_loadXML_error2.phpt rename to ext/dom/tests/DOMDocument_loadXML_error2_pre2_11.phpt index a24d5215da48b..c826386f7a4e1 100644 --- a/ext/dom/tests/DOMDocument_loadXML_error2.phpt +++ b/ext/dom/tests/DOMDocument_loadXML_error2_pre2_11.phpt @@ -1,5 +1,9 @@ --TEST-- Test DOMDocument::loadXML() detects not-well formed XML +--SKIPIF-- += 21100) die('skip libxml2 test variant for version < 2.11'); +?> --DESCRIPTION-- This test verifies the method detects attributes values not closed between " or ' Environment variables used in the test: diff --git a/ext/dom/tests/DOMDocument_load_error2_gte2_11.phpt b/ext/dom/tests/DOMDocument_load_error2_gte2_11.phpt new file mode 100644 index 0000000000000..32b6bf161142e --- /dev/null +++ b/ext/dom/tests/DOMDocument_load_error2_gte2_11.phpt @@ -0,0 +1,34 @@ +--TEST-- +Test DOMDocument::load() detects not-well formed +--SKIPIF-- += 2.11'); +?> +--DESCRIPTION-- +This test verifies the method detects attributes values not closed between " or ' +Environment variables used in the test: +- XML_FILE: the xml file to load +- LOAD_OPTIONS: the second parameter to pass to the method +- EXPECTED_RESULT: the expected result +--CREDITS-- +Antonio Diaz Ruiz +--INI-- +assert.bail=true +--EXTENSIONS-- +dom +--ENV-- +XML_FILE=/not_well_formed2.xml +LOAD_OPTIONS=0 +EXPECTED_RESULT=0 +--FILE_EXTERNAL-- +domdocumentload_test_method.inc +--EXPECTF-- +Warning: DOMDocument::load(): AttValue: " or ' expected in %s on line %d + +Warning: DOMDocument::load(): internal error: xmlParseStartTag: problem parsing attributes in %s on line %d + +Warning: DOMDocument::load(): Couldn't find end of Start Tag book line 4 in %s on line %d + +Warning: DOMDocument::load(): Opening and ending tag mismatch: books line 3 and book in %s on line %d + +Warning: DOMDocument::load(): Extra content at the end of the document in %s on line %d diff --git a/ext/dom/tests/DOMDocument_load_error2.phpt b/ext/dom/tests/DOMDocument_load_error2_pre2_11.phpt similarity index 90% rename from ext/dom/tests/DOMDocument_load_error2.phpt rename to ext/dom/tests/DOMDocument_load_error2_pre2_11.phpt index cd13b3f901b27..695740be9ca92 100644 --- a/ext/dom/tests/DOMDocument_load_error2.phpt +++ b/ext/dom/tests/DOMDocument_load_error2_pre2_11.phpt @@ -1,5 +1,9 @@ --TEST-- Test DOMDocument::load() detects not-well formed XML +--SKIPIF-- += 21100) die('skip libxml2 test variant for version < 2.11'); +?> --DESCRIPTION-- This test verifies the method detects attributes values not closed between " or ' Environment variables used in the test: diff --git a/ext/dom/tests/DOMElement_append_hierarchy_test.phpt b/ext/dom/tests/DOMElement_append_hierarchy_test.phpt new file mode 100644 index 0000000000000..2d70b10fe9f70 --- /dev/null +++ b/ext/dom/tests/DOMElement_append_hierarchy_test.phpt @@ -0,0 +1,89 @@ +--TEST-- +DOMElement::append() with hierarchy changes and errors +--EXTENSIONS-- +dom +--FILE-- +loadXML('

helloworld

'); + +echo "-- Append hello with world --\n"; +$dom = clone $dom_original; +$b_hello = $dom->firstChild->firstChild; +$b_world = $b_hello->nextSibling; +$b_hello->append($b_world); +var_dump($dom->saveHTML()); + +echo "-- Append hello with world's child --\n"; +$dom = clone $dom_original; +$b_hello = $dom->firstChild->firstChild; +$b_world = $b_hello->nextSibling; +$b_hello->append($b_world->firstChild); +var_dump($dom->saveHTML()); + +echo "-- Append world's child with hello --\n"; +$dom = clone $dom_original; +$b_hello = $dom->firstChild->firstChild; +$b_world = $b_hello->nextSibling; +$b_world->firstChild->append($b_hello); +var_dump($dom->saveHTML()); + +echo "-- Append hello with itself --\n"; +$dom = clone $dom_original; +$b_hello = $dom->firstChild->firstChild; +try { + $b_hello->append($b_hello); +} catch (\DOMException $e) { + echo $e->getMessage(), "\n"; +} +var_dump($dom->saveHTML()); + +echo "-- Append world's i tag with the parent --\n"; +$dom = clone $dom_original; +$b_hello = $dom->firstChild->firstChild; +$b_world = $b_hello->nextSibling; +try { + $b_world->firstChild->append($b_world); +} catch (\DOMException $e) { + echo $e->getMessage(), "\n"; +} +var_dump($dom->saveHTML()); + +echo "-- Append from another document --\n"; +$dom = clone $dom_original; +$dom2 = new DOMDocument; +$dom2->loadXML('

other

'); +try { + $dom->firstChild->firstChild->prepend($dom2->firstChild); +} catch (\DOMException $e) { + echo $e->getMessage(), "\n"; +} +var_dump($dom2->saveHTML()); +var_dump($dom->saveHTML()); + +?> +--EXPECT-- +-- Append hello with world -- +string(39) "

helloworld

+" +-- Append hello with world's child -- +string(39) "

helloworld

+" +-- Append world's child with hello -- +string(39) "

worldhello

+" +-- Append hello with itself -- +Hierarchy Request Error +string(39) "

helloworld

+" +-- Append world's i tag with the parent -- +Hierarchy Request Error +string(39) "

helloworld

+" +-- Append from another document -- +Wrong Document Error +string(13) "

other

+" +string(39) "

helloworld

+" diff --git a/ext/dom/tests/DOMElement_getElementsByTagName_without_document.phpt b/ext/dom/tests/DOMElement_getElementsByTagName_without_document.phpt new file mode 100644 index 0000000000000..9aebf3139cdf9 --- /dev/null +++ b/ext/dom/tests/DOMElement_getElementsByTagName_without_document.phpt @@ -0,0 +1,16 @@ +--TEST-- +Node list cache should not break on DOMElement::getElementsByTagName() without document +--EXTENSIONS-- +dom +--FILE-- +getElementsByTagName("b") as $x) { + var_dump($x); +} + +?> +Done +--EXPECT-- +Done diff --git a/ext/dom/tests/DOMElement_prepend_hierarchy_test.phpt b/ext/dom/tests/DOMElement_prepend_hierarchy_test.phpt new file mode 100644 index 0000000000000..4d9cf24a61828 --- /dev/null +++ b/ext/dom/tests/DOMElement_prepend_hierarchy_test.phpt @@ -0,0 +1,89 @@ +--TEST-- +DOMElement::prepend() with hierarchy changes and errors +--EXTENSIONS-- +dom +--FILE-- +loadXML('

helloworld

'); + +echo "-- Prepend hello with world --\n"; +$dom = clone $dom_original; +$b_hello = $dom->firstChild->firstChild; +$b_world = $b_hello->nextSibling; +$b_hello->prepend($b_world); +var_dump($dom->saveHTML()); + +echo "-- Prepend hello with world's child --\n"; +$dom = clone $dom_original; +$b_hello = $dom->firstChild->firstChild; +$b_world = $b_hello->nextSibling; +$b_hello->prepend($b_world->firstChild); +var_dump($dom->saveHTML()); + +echo "-- Prepend world's child with hello --\n"; +$dom = clone $dom_original; +$b_hello = $dom->firstChild->firstChild; +$b_world = $b_hello->nextSibling; +$b_world->firstChild->prepend($b_hello); +var_dump($dom->saveHTML()); + +echo "-- Prepend hello with itself --\n"; +$dom = clone $dom_original; +$b_hello = $dom->firstChild->firstChild; +try { + $b_hello->prepend($b_hello); +} catch (\DOMException $e) { + echo $e->getMessage(), "\n"; +} +var_dump($dom->saveHTML()); + +echo "-- Prepend world's i tag with the parent --\n"; +$dom = clone $dom_original; +$b_hello = $dom->firstChild->firstChild; +$b_world = $b_hello->nextSibling; +try { + $b_world->firstChild->prepend($b_world); +} catch (\DOMException $e) { + echo $e->getMessage(), "\n"; +} +var_dump($dom->saveHTML()); + +echo "-- Append from another document --\n"; +$dom = clone $dom_original; +$dom2 = new DOMDocument; +$dom2->loadXML('

other

'); +try { + $dom->firstChild->firstChild->prepend($dom2->firstChild); +} catch (\DOMException $e) { + echo $e->getMessage(), "\n"; +} +var_dump($dom2->saveHTML()); +var_dump($dom->saveHTML()); + +?> +--EXPECT-- +-- Prepend hello with world -- +string(39) "

worldhello

+" +-- Prepend hello with world's child -- +string(39) "

worldhello

+" +-- Prepend world's child with hello -- +string(39) "

helloworld

+" +-- Prepend hello with itself -- +Hierarchy Request Error +string(39) "

helloworld

+" +-- Prepend world's i tag with the parent -- +Hierarchy Request Error +string(39) "

helloworld

+" +-- Append from another document -- +Wrong Document Error +string(13) "

other

+" +string(39) "

helloworld

+" diff --git a/ext/dom/tests/bug67440.phpt b/ext/dom/tests/bug67440.phpt new file mode 100644 index 0000000000000..3e30f69b9ae4d --- /dev/null +++ b/ext/dom/tests/bug67440.phpt @@ -0,0 +1,151 @@ +--TEST-- +Bug #67440 (append_node of a DOMDocumentFragment does not reconcile namespaces) +--EXTENSIONS-- +dom +--FILE-- +loadXML(''); + $fragment = $document->createDocumentFragment(); + $fragment->appendChild($document->createTextNode("\n")); + $fragment->appendChild($document->createElementNS('http://example/ns', 'myns:childNode', '1')); + $fragment->appendChild($document->createTextNode("\n")); + $fragment->appendChild($document->createElementNS('http://example/ns', 'myns:childNode', '2')); + $fragment->appendChild($document->createTextNode("\n")); + return array($document, $fragment); +} + +function case1($method) { + list($document, $fragment) = createDocument(); + $document->documentElement->{$method}($fragment); + echo $document->saveXML(); +} + +function case2($method) { + list($document, $fragment) = createDocument(); + $childNodes = iterator_to_array($fragment->childNodes); + foreach ($childNodes as $childNode) { + $document->documentElement->{$method}($childNode); + } + echo $document->saveXML(); +} + +function case3($method) { + list($document, $fragment) = createDocument(); + $fragment->removeChild($fragment->firstChild); + $document->documentElement->{$method}($fragment); + echo $document->saveXML(); +} + +function case4($method) { + list($document, $fragment) = createDocument(); + $fragment->childNodes[1]->appendChild($document->createElementNS('http://example/ns2', 'myns2:childNode', '3')); + $document->documentElement->{$method}($fragment); + echo $document->saveXML(); +} + +echo "== appendChild ==\n"; +echo "-- fragment to document element --\n"; case1('appendChild'); echo "\n"; +echo "-- children manually document element --\n"; case2('appendChild'); echo "\n"; +echo "-- fragment to document where first element is not a text node --\n"; case3('appendChild'); echo "\n"; +echo "-- fragment with namespace declarations in children --\n"; case4('appendChild'); echo "\n"; + +echo "== insertBefore ==\n"; +echo "-- fragment to document element --\n"; case1('insertBefore'); echo "\n"; +echo "-- children manually document element --\n"; case2('insertBefore'); echo "\n"; +echo "-- fragment to document where first element is not a text node --\n"; case3('insertBefore'); echo "\n"; +echo "-- fragment with namespace declarations in children --\n"; case4('insertBefore'); echo "\n"; + +echo "== insertAfter ==\n"; +echo "-- fragment to document element --\n"; case1('insertBefore'); echo "\n"; +echo "-- children manually document element --\n"; case2('insertBefore'); echo "\n"; +echo "-- fragment to document where first element is not a text node --\n"; case3('insertBefore'); echo "\n"; +echo "-- fragment with namespace declarations in children --\n"; case4('insertBefore'); echo "\n"; + +?> +--EXPECT-- +== appendChild == +-- fragment to document element -- + + +1 +2 + + +-- children manually document element -- + + +1 +2 + + +-- fragment to document where first element is not a text node -- + +1 +2 + + +-- fragment with namespace declarations in children -- + + +13 +2 + + +== insertBefore == +-- fragment to document element -- + + +1 +2 + + +-- children manually document element -- + + +1 +2 + + +-- fragment to document where first element is not a text node -- + +1 +2 + + +-- fragment with namespace declarations in children -- + + +13 +2 + + +== insertAfter == +-- fragment to document element -- + + +1 +2 + + +-- children manually document element -- + + +1 +2 + + +-- fragment to document where first element is not a text node -- + +1 +2 + + +-- fragment with namespace declarations in children -- + + +13 +2 + diff --git a/ext/dom/tests/bug70359.phpt b/ext/dom/tests/bug70359.phpt new file mode 100644 index 0000000000000..b0a5ae57a3232 --- /dev/null +++ b/ext/dom/tests/bug70359.phpt @@ -0,0 +1,83 @@ +--TEST-- +Bug #70359 (print_r() on DOMAttr causes Segfault in php_libxml_node_free_list()) +--EXTENSIONS-- +dom +--FILE-- +loadXML(<< + +XML); +$spaceNode = $dom->documentElement->getAttributeNode('xmlns'); +print_r($spaceNode); + +echo "-- Test with parent and non-ns attribute --\n"; + +$dom = new DOMDocument(); +$dom->loadXML(<< + + + +XML); +$spaceNode = $dom->documentElement->firstElementChild->getAttributeNode('myattrib'); +var_dump($spaceNode->nodeType); +var_dump($spaceNode->nodeValue); + +$dom->documentElement->firstElementChild->remove(); +try { + print_r($spaceNode->parentNode); +} catch (\Error $e) { + echo $e->getMessage(), "\n"; +} + +echo "-- Test with parent and ns attribute --\n"; + +$dom = new DOMDocument(); +$dom->loadXML(<< + + + +XML); +$spaceNode = $dom->documentElement->firstElementChild->getAttributeNode('xmlns:xsi'); +print_r($spaceNode); + +$dom->documentElement->firstElementChild->remove(); +var_dump($spaceNode->parentNode->nodeName); // Shouldn't crash + +?> +--EXPECT-- +-- Test without parent -- +DOMNameSpaceNode Object +( + [nodeName] => xmlns + [nodeValue] => http://www.sitemaps.org/schemas/sitemap/0.9 + [nodeType] => 18 + [prefix] => + [localName] => xmlns + [namespaceURI] => http://www.sitemaps.org/schemas/sitemap/0.9 + [ownerDocument] => (object value omitted) + [parentNode] => (object value omitted) +) +-- Test with parent and non-ns attribute -- +int(2) +string(3) "bar" +Couldn't fetch DOMAttr. Node no longer exists +-- Test with parent and ns attribute -- +DOMNameSpaceNode Object +( + [nodeName] => xmlns:xsi + [nodeValue] => fooooooooooooooooooooo + [nodeType] => 18 + [prefix] => xsi + [localName] => xsi + [namespaceURI] => fooooooooooooooooooooo + [ownerDocument] => (object value omitted) + [parentNode] => (object value omitted) +) +string(3) "url" diff --git a/ext/dom/tests/bug77686.phpt b/ext/dom/tests/bug77686.phpt new file mode 100644 index 0000000000000..ddd7c3364786c --- /dev/null +++ b/ext/dom/tests/bug77686.phpt @@ -0,0 +1,40 @@ +--TEST-- +Bug #77686 (Removed elements are still returned by getElementById) +--EXTENSIONS-- +dom +--FILE-- +loadHTML('before
hello
after'); +$body = $doc->getElementById('x'); +$div = $doc->getElementById('y'); +var_dump($doc->getElementById('y')->textContent); + +// Detached from document, should not find it anymore +$body->removeChild($div); +var_dump($doc->getElementById('y')); + +// Added again, should find it +$body->appendChild($div); +var_dump($doc->getElementById('y')->textContent); + +// Should find root element without a problem +var_dump($doc->getElementById('htmlelement')->textContent); + +// Created element but not yet attached, should not find it before it is added +$new_element = $doc->createElement('p'); +$new_element->textContent = 'my new text'; +$new_element->setAttribute('id', 'myp'); +var_dump($doc->getElementById('myp')); +$body->appendChild($new_element); +var_dump($doc->getElementById('myp')->textContent); + +?> +--EXPECT-- +string(5) "hello" +NULL +string(5) "hello" +string(16) "beforeafterhello" +NULL +string(11) "my new text" diff --git a/ext/dom/tests/bug78577.phpt b/ext/dom/tests/bug78577.phpt new file mode 100644 index 0000000000000..2631efc1e206c --- /dev/null +++ b/ext/dom/tests/bug78577.phpt @@ -0,0 +1,33 @@ +--TEST-- +Bug #78577 (Crash in DOMNameSpace debug info handlers) +--EXTENSIONS-- +dom +--FILE-- +loadXML(''); + +$attr = $doc->documentElement->getAttributeNode('xmlns'); +var_dump($attr); + +?> +--EXPECT-- +object(DOMNameSpaceNode)#3 (8) { + ["nodeName"]=> + string(5) "xmlns" + ["nodeValue"]=> + string(19) "http://php.net/test" + ["nodeType"]=> + int(18) + ["prefix"]=> + string(0) "" + ["localName"]=> + string(5) "xmlns" + ["namespaceURI"]=> + string(19) "http://php.net/test" + ["ownerDocument"]=> + string(22) "(object value omitted)" + ["parentNode"]=> + string(22) "(object value omitted)" +} diff --git a/ext/dom/tests/bug80602.phpt b/ext/dom/tests/bug80602.phpt index 9f041f686f516..844d829cb08d0 100644 --- a/ext/dom/tests/bug80602.phpt +++ b/ext/dom/tests/bug80602.phpt @@ -8,84 +8,84 @@ $doc = new \DOMDocument(); $doc->loadXML('foo'); $target = $doc->documentElement->firstChild; $target->before($target); -echo $doc->saveXML($doc->documentElement).PHP_EOL; +echo "1 ", $doc->saveXML($doc->documentElement).PHP_EOL; $doc = new \DOMDocument(); $doc->loadXML('foo'); $target = $doc->documentElement->lastChild; $target->before($target); -echo $doc->saveXML($doc->documentElement).PHP_EOL; +echo "2 ", $doc->saveXML($doc->documentElement).PHP_EOL; $doc = new \DOMDocument(); $doc->loadXML('foo'); $target = $doc->documentElement->firstChild; $target->before($doc->documentElement->lastChild); -echo $doc->saveXML($doc->documentElement).PHP_EOL; +echo "3 ", $doc->saveXML($doc->documentElement).PHP_EOL; $doc = new \DOMDocument(); $doc->loadXML('foo'); $target = $doc->documentElement->lastChild; $target->before($doc->documentElement->firstChild); -echo $doc->saveXML($doc->documentElement).PHP_EOL; +echo "4 ", $doc->saveXML($doc->documentElement).PHP_EOL; $doc = new \DOMDocument(); $doc->loadXML('foo'); $target = $doc->documentElement->firstChild; $target->before($target, $doc->documentElement->lastChild); -echo $doc->saveXML($doc->documentElement).PHP_EOL; +echo "5 ", $doc->saveXML($doc->documentElement).PHP_EOL; $doc = new \DOMDocument(); $doc->loadXML('foo'); $target = $doc->documentElement->firstChild; $target->before($doc->documentElement->lastChild, $target); -echo $doc->saveXML($doc->documentElement).PHP_EOL; +echo "6 ", $doc->saveXML($doc->documentElement).PHP_EOL; $doc = new \DOMDocument(); $doc->loadXML('foo'); $target = $doc->documentElement->lastChild; $target->before($target, $doc->documentElement->firstChild); -echo $doc->saveXML($doc->documentElement).PHP_EOL; +echo "7 ", $doc->saveXML($doc->documentElement).PHP_EOL; $doc = new \DOMDocument(); $doc->loadXML('foo'); $target = $doc->documentElement->lastChild; $target->before($doc->documentElement->firstChild, $target); -echo $doc->saveXML($doc->documentElement).PHP_EOL; +echo "8 ", $doc->saveXML($doc->documentElement).PHP_EOL; $doc = new \DOMDocument(); $doc->loadXML('foo'); $target = $doc->documentElement->firstChild; $target->before('bar','baz'); -echo $doc->saveXML($doc->documentElement).PHP_EOL; +echo "9 ", $doc->saveXML($doc->documentElement).PHP_EOL; $doc = new \DOMDocument(); $doc->loadXML('foo'); $target = $doc->documentElement->lastChild; $target->before('bar','baz'); -echo $doc->saveXML($doc->documentElement).PHP_EOL; +echo "10 ", $doc->saveXML($doc->documentElement).PHP_EOL; $doc = new \DOMDocument(); $doc->loadXML('foo'); $target = $doc->documentElement->firstChild; $target->before($target, 'bar','baz'); -echo $doc->saveXML($doc->documentElement).PHP_EOL; +echo "11 ", $doc->saveXML($doc->documentElement).PHP_EOL; $doc = new \DOMDocument(); $doc->loadXML('foo'); $target = $doc->documentElement->firstChild; $target->before('bar', $target, 'baz'); -echo $doc->saveXML($doc->documentElement).PHP_EOL; +echo "12 ", $doc->saveXML($doc->documentElement).PHP_EOL; $doc = new \DOMDocument(); $doc->loadXML('foo'); $target = $doc->documentElement->firstChild; $target->before('bar', 'baz', $target); -echo $doc->saveXML($doc->documentElement).PHP_EOL; +echo "13 ", $doc->saveXML($doc->documentElement).PHP_EOL; @@ -93,19 +93,19 @@ $doc = new \DOMDocument(); $doc->loadXML('foo'); $target = $doc->documentElement->lastChild; $target->before($target, 'bar','baz'); -echo $doc->saveXML($doc->documentElement).PHP_EOL; +echo "14 ", $doc->saveXML($doc->documentElement).PHP_EOL; $doc = new \DOMDocument(); $doc->loadXML('foo'); $target = $doc->documentElement->lastChild; $target->before('bar', $target, 'baz'); -echo $doc->saveXML($doc->documentElement).PHP_EOL; +echo "15 ", $doc->saveXML($doc->documentElement).PHP_EOL; $doc = new \DOMDocument(); $doc->loadXML('foo'); $target = $doc->documentElement->lastChild; $target->before('bar', 'baz', $target); -echo $doc->saveXML($doc->documentElement).PHP_EOL; +echo "16 ", $doc->saveXML($doc->documentElement).PHP_EOL; @@ -113,21 +113,21 @@ $doc = new \DOMDocument(); $doc->loadXML('foo'); $target = $doc->documentElement->firstChild; $target->before('bar', $target, $doc->documentElement->lastChild); -echo $doc->saveXML($doc->documentElement).PHP_EOL; +echo "17 ", $doc->saveXML($doc->documentElement).PHP_EOL; $doc = new \DOMDocument(); $doc->loadXML('foo'); $target = $doc->documentElement->firstChild; $target->before($target, 'bar', $doc->documentElement->lastChild); -echo $doc->saveXML($doc->documentElement).PHP_EOL; +echo "18 ", $doc->saveXML($doc->documentElement).PHP_EOL; $doc = new \DOMDocument(); $doc->loadXML('foo'); $target = $doc->documentElement->firstChild; $target->before($target, $doc->documentElement->lastChild, 'bar'); -echo $doc->saveXML($doc->documentElement).PHP_EOL; +echo "19 ", $doc->saveXML($doc->documentElement).PHP_EOL; @@ -136,43 +136,43 @@ $doc = new \DOMDocument(); $doc->loadXML('foo'); $target = $doc->documentElement->lastChild; $target->before('bar', $doc->documentElement->firstChild, $target); -echo $doc->saveXML($doc->documentElement).PHP_EOL; +echo "20 ", $doc->saveXML($doc->documentElement).PHP_EOL; $doc = new \DOMDocument(); $doc->loadXML('foo'); $target = $doc->documentElement->lastChild; $target->before($doc->documentElement->firstChild, 'bar', $target); -echo $doc->saveXML($doc->documentElement).PHP_EOL; +echo "21 ", $doc->saveXML($doc->documentElement).PHP_EOL; $doc = new \DOMDocument(); $doc->loadXML('foo'); $target = $doc->documentElement->lastChild; $target->before($doc->documentElement->firstChild, $target, 'bar'); -echo $doc->saveXML($doc->documentElement).PHP_EOL; +echo "22 ", $doc->saveXML($doc->documentElement).PHP_EOL; ?> --EXPECTF-- -foo -foo -foo -foo -foo -foo -foo -foo -barbazfoo -foobarbaz -foobarbaz -barfoobaz -barbazfoo -foobarbaz -foobarbaz -foobarbaz -barfoo -foobar -foobar -barfoo -foobar -foobar +1 foo +2 foo +3 foo +4 foo +5 foo +6 foo +7 foo +8 foo +9 barbazfoo +10 foobarbaz +11 foobarbaz +12 barfoobaz +13 barbazfoo +14 foobarbaz +15 foobarbaz +16 foobarbaz +17 barfoo +18 foobar +19 foobar +20 barfoo +21 foobar +22 foobar diff --git a/ext/dom/tests/bug80602_2.phpt b/ext/dom/tests/bug80602_2.phpt index 1151417c0f845..7c5070f51424c 100644 --- a/ext/dom/tests/bug80602_2.phpt +++ b/ext/dom/tests/bug80602_2.phpt @@ -8,84 +8,84 @@ $doc = new \DOMDocument(); $doc->loadXML('foo'); $target = $doc->documentElement->firstChild; $target->after($target); -echo $doc->saveXML($doc->documentElement).PHP_EOL; +echo "1 ", $doc->saveXML($doc->documentElement).PHP_EOL; $doc = new \DOMDocument(); $doc->loadXML('foo'); $target = $doc->documentElement->lastChild; $target->after($target); -echo $doc->saveXML($doc->documentElement).PHP_EOL; +echo "2 ", $doc->saveXML($doc->documentElement).PHP_EOL; $doc = new \DOMDocument(); $doc->loadXML('foo'); $target = $doc->documentElement->firstChild; $target->after($doc->documentElement->lastChild); -echo $doc->saveXML($doc->documentElement).PHP_EOL; +echo "3 ", $doc->saveXML($doc->documentElement).PHP_EOL; $doc = new \DOMDocument(); $doc->loadXML('foo'); $target = $doc->documentElement->lastChild; $target->after($doc->documentElement->firstChild); -echo $doc->saveXML($doc->documentElement).PHP_EOL; +echo "4 ", $doc->saveXML($doc->documentElement).PHP_EOL; $doc = new \DOMDocument(); $doc->loadXML('foo'); $target = $doc->documentElement->firstChild; $target->after($target, $doc->documentElement->lastChild); -echo $doc->saveXML($doc->documentElement).PHP_EOL; +echo "5 ", $doc->saveXML($doc->documentElement).PHP_EOL; $doc = new \DOMDocument(); $doc->loadXML('foo'); $target = $doc->documentElement->firstChild; $target->after($doc->documentElement->lastChild, $target); -echo $doc->saveXML($doc->documentElement).PHP_EOL; +echo "6 ", $doc->saveXML($doc->documentElement).PHP_EOL; $doc = new \DOMDocument(); $doc->loadXML('foo'); $target = $doc->documentElement->lastChild; $target->after($target, $doc->documentElement->firstChild); -echo $doc->saveXML($doc->documentElement).PHP_EOL; +echo "7 ", $doc->saveXML($doc->documentElement).PHP_EOL; $doc = new \DOMDocument(); $doc->loadXML('foo'); $target = $doc->documentElement->lastChild; $target->after($doc->documentElement->firstChild, $target); -echo $doc->saveXML($doc->documentElement).PHP_EOL; +echo "8 ", $doc->saveXML($doc->documentElement).PHP_EOL; $doc = new \DOMDocument(); $doc->loadXML('foo'); $target = $doc->documentElement->firstChild; $target->after('bar','baz'); -echo $doc->saveXML($doc->documentElement).PHP_EOL; +echo "9 ", $doc->saveXML($doc->documentElement).PHP_EOL; $doc = new \DOMDocument(); $doc->loadXML('foo'); $target = $doc->documentElement->lastChild; $target->after('bar','baz'); -echo $doc->saveXML($doc->documentElement).PHP_EOL; +echo "10 ", $doc->saveXML($doc->documentElement).PHP_EOL; $doc = new \DOMDocument(); $doc->loadXML('foo'); $target = $doc->documentElement->firstChild; $target->after($target, 'bar','baz'); -echo $doc->saveXML($doc->documentElement).PHP_EOL; +echo "11 ", $doc->saveXML($doc->documentElement).PHP_EOL; $doc = new \DOMDocument(); $doc->loadXML('foo'); $target = $doc->documentElement->firstChild; $target->after('bar', $target, 'baz'); -echo $doc->saveXML($doc->documentElement).PHP_EOL; +echo "12 ", $doc->saveXML($doc->documentElement).PHP_EOL; $doc = new \DOMDocument(); $doc->loadXML('foo'); $target = $doc->documentElement->firstChild; $target->after('bar', 'baz', $target); -echo $doc->saveXML($doc->documentElement).PHP_EOL; +echo "13 ", $doc->saveXML($doc->documentElement).PHP_EOL; @@ -93,19 +93,19 @@ $doc = new \DOMDocument(); $doc->loadXML('foo'); $target = $doc->documentElement->lastChild; $target->after($target, 'bar','baz'); -echo $doc->saveXML($doc->documentElement).PHP_EOL; +echo "14 ", $doc->saveXML($doc->documentElement).PHP_EOL; $doc = new \DOMDocument(); $doc->loadXML('foo'); $target = $doc->documentElement->lastChild; $target->after('bar', $target, 'baz'); -echo $doc->saveXML($doc->documentElement).PHP_EOL; +echo "15 ", $doc->saveXML($doc->documentElement).PHP_EOL; $doc = new \DOMDocument(); $doc->loadXML('foo'); $target = $doc->documentElement->lastChild; $target->after('bar', 'baz', $target); -echo $doc->saveXML($doc->documentElement).PHP_EOL; +echo "16 ", $doc->saveXML($doc->documentElement).PHP_EOL; @@ -113,21 +113,21 @@ $doc = new \DOMDocument(); $doc->loadXML('foo'); $target = $doc->documentElement->firstChild; $target->after('bar', $target, $doc->documentElement->lastChild); -echo $doc->saveXML($doc->documentElement).PHP_EOL; +echo "17 ", $doc->saveXML($doc->documentElement).PHP_EOL; $doc = new \DOMDocument(); $doc->loadXML('foo'); $target = $doc->documentElement->firstChild; $target->after($target, 'bar', $doc->documentElement->lastChild); -echo $doc->saveXML($doc->documentElement).PHP_EOL; +echo "18 ", $doc->saveXML($doc->documentElement).PHP_EOL; $doc = new \DOMDocument(); $doc->loadXML('foo'); $target = $doc->documentElement->firstChild; $target->after($target, $doc->documentElement->lastChild, 'bar'); -echo $doc->saveXML($doc->documentElement).PHP_EOL; +echo "19 ", $doc->saveXML($doc->documentElement).PHP_EOL; @@ -136,43 +136,43 @@ $doc = new \DOMDocument(); $doc->loadXML('foo'); $target = $doc->documentElement->lastChild; $target->after('bar', $doc->documentElement->firstChild, $target); -echo $doc->saveXML($doc->documentElement).PHP_EOL; +echo "20 ", $doc->saveXML($doc->documentElement).PHP_EOL; $doc = new \DOMDocument(); $doc->loadXML('foo'); $target = $doc->documentElement->lastChild; $target->after($doc->documentElement->firstChild, 'bar', $target); -echo $doc->saveXML($doc->documentElement).PHP_EOL; +echo "21 ", $doc->saveXML($doc->documentElement).PHP_EOL; $doc = new \DOMDocument(); $doc->loadXML('foo'); $target = $doc->documentElement->lastChild; $target->after($doc->documentElement->firstChild, $target, 'bar'); -echo $doc->saveXML($doc->documentElement).PHP_EOL; +echo "22 ", $doc->saveXML($doc->documentElement).PHP_EOL; ?> --EXPECTF-- -foo -foo -foo -foo -foo -foo -foo -foo -foobarbaz -foobarbaz -foobarbaz -barfoobaz -barbazfoo -foobarbaz -foobarbaz -foobarbaz -barfoo -foobar -foobar -barfoo -foobar -foobar +1 foo +2 foo +3 foo +4 foo +5 foo +6 foo +7 foo +8 foo +9 foobarbaz +10 foobarbaz +11 foobarbaz +12 barfoobaz +13 barbazfoo +14 foobarbaz +15 foobarbaz +16 foobarbaz +17 barfoo +18 foobar +19 foobar +20 barfoo +21 foobar +22 foobar diff --git a/ext/dom/tests/bug80602_3.phpt b/ext/dom/tests/bug80602_3.phpt new file mode 100644 index 0000000000000..f9bf67e778da5 --- /dev/null +++ b/ext/dom/tests/bug80602_3.phpt @@ -0,0 +1,120 @@ +--TEST-- +Bug #80602 (Segfault when using DOMChildNode::before()) - use-after-free variation +--FILE-- +loadXML('foo'); +$target = $doc->documentElement->lastChild; +$target->before('bar', $doc->documentElement->firstChild, 'baz'); +echo $doc->saveXML($doc->documentElement), "\n"; +var_dump($target); + +$doc = new \DOMDocument(); +$doc->loadXML('foo'); +$target = $doc->documentElement->lastChild; +// Note: after instead of before +$target->after('bar', $doc->documentElement->firstChild, 'baz'); +echo $doc->saveXML($doc->documentElement), "\n"; +var_dump($target); + +?> +--EXPECTF-- +barfoobaz +object(DOMElement)#3 (23) { + ["schemaTypeInfo"]=> + NULL + ["tagName"]=> + string(4) "last" + ["firstElementChild"]=> + NULL + ["lastElementChild"]=> + NULL + ["childElementCount"]=> + int(0) + ["previousElementSibling"]=> + NULL + ["nextElementSibling"]=> + NULL + ["nodeName"]=> + string(4) "last" + ["nodeValue"]=> + string(0) "" + ["nodeType"]=> + int(1) + ["parentNode"]=> + string(22) "(object value omitted)" + ["childNodes"]=> + string(22) "(object value omitted)" + ["firstChild"]=> + NULL + ["lastChild"]=> + NULL + ["previousSibling"]=> + string(22) "(object value omitted)" + ["nextSibling"]=> + NULL + ["attributes"]=> + string(22) "(object value omitted)" + ["ownerDocument"]=> + string(22) "(object value omitted)" + ["namespaceURI"]=> + NULL + ["prefix"]=> + string(0) "" + ["localName"]=> + string(4) "last" + ["baseURI"]=> + string(%d) %s + ["textContent"]=> + string(0) "" +} +barfoobaz +object(DOMElement)#2 (23) { + ["schemaTypeInfo"]=> + NULL + ["tagName"]=> + string(4) "last" + ["firstElementChild"]=> + NULL + ["lastElementChild"]=> + NULL + ["childElementCount"]=> + int(0) + ["previousElementSibling"]=> + NULL + ["nextElementSibling"]=> + NULL + ["nodeName"]=> + string(4) "last" + ["nodeValue"]=> + string(0) "" + ["nodeType"]=> + int(1) + ["parentNode"]=> + string(22) "(object value omitted)" + ["childNodes"]=> + string(22) "(object value omitted)" + ["firstChild"]=> + NULL + ["lastChild"]=> + NULL + ["previousSibling"]=> + NULL + ["nextSibling"]=> + string(22) "(object value omitted)" + ["attributes"]=> + string(22) "(object value omitted)" + ["ownerDocument"]=> + string(22) "(object value omitted)" + ["namespaceURI"]=> + NULL + ["prefix"]=> + string(0) "" + ["localName"]=> + string(4) "last" + ["baseURI"]=> + string(%d) %s + ["textContent"]=> + string(0) "" +} diff --git a/ext/dom/tests/bug80602_4.phpt b/ext/dom/tests/bug80602_4.phpt new file mode 100644 index 0000000000000..a1df8d10caa31 --- /dev/null +++ b/ext/dom/tests/bug80602_4.phpt @@ -0,0 +1,33 @@ +--TEST-- +Bug #80602 (Segfault when using DOMChildNode::before()) - after text merge variation +--FILE-- +loadXML('foobar'); +$foo = $doc->firstChild->firstChild; +$bar = $doc->firstChild->lastChild; + +$foo->after($bar); + +var_dump($doc->saveXML()); + +$foo->nodeValue = "x"; + +var_dump($doc->saveXML()); + +$bar->nodeValue = "y"; + +var_dump($doc->saveXML()); + +?> +--EXPECT-- +string(43) " +foobar +" +string(41) " +xbar +" +string(39) " +xy +" diff --git a/ext/dom/tests/bug81642.phpt b/ext/dom/tests/bug81642.phpt new file mode 100644 index 0000000000000..7bf3dde50588e --- /dev/null +++ b/ext/dom/tests/bug81642.phpt @@ -0,0 +1,49 @@ +--TEST-- +Bug #81642 (DOMChildNode::replaceWith() bug when replacing a node with itself) +--EXTENSIONS-- +dom +--FILE-- +appendChild($target = $doc->createElement('test')); +$target->replaceWith($target); +var_dump($doc->saveXML()); + +// Replace with itself + another element +$doc = new DOMDocument(); +$doc->appendChild($target = $doc->createElement('test')); +$target->replaceWith($target, $doc->createElement('foo')); +var_dump($doc->saveXML()); + +// Replace with text node +$doc = new DOMDocument(); +$doc->appendChild($target = $doc->createElement('test')); +$target->replaceWith($target, 'foo'); +var_dump($doc->saveXML()); + +// Replace with text node variant 2 +$doc = new DOMDocument(); +$doc->appendChild($target = $doc->createElement('test')); +$target->replaceWith('bar', $target, 'foo'); +var_dump($doc->saveXML()); + +?> +--EXPECT-- +string(30) " + +" +string(37) " + + +" +string(34) " + +foo +" +string(38) " +bar + +foo +" diff --git a/ext/dom/tests/bug_lifetime_parentNode_getAttributeNodeNS.phpt b/ext/dom/tests/bug_lifetime_parentNode_getAttributeNodeNS.phpt new file mode 100644 index 0000000000000..3c53e08d4db76 --- /dev/null +++ b/ext/dom/tests/bug_lifetime_parentNode_getAttributeNodeNS.phpt @@ -0,0 +1,20 @@ +--TEST-- +Lifetime issue with parentNode on getAttributeNodeNS() +--EXTENSIONS-- +dom +--FILE-- + + + +'; + +$xml=new DOMDocument(); +$xml->loadXML($xmlString); +$ns2 = $xml->documentElement->getAttributeNodeNS("http://www.w3.org/2000/xmlns/", "ns2"); +$ns2->parentNode->remove(); +var_dump($ns2->parentNode->localName); + +?> +--EXPECT-- +string(4) "root" diff --git a/ext/dom/tests/clone_nodes.phpt b/ext/dom/tests/clone_nodes.phpt new file mode 100644 index 0000000000000..1841c702caf8d --- /dev/null +++ b/ext/dom/tests/clone_nodes.phpt @@ -0,0 +1,72 @@ +--TEST-- +Clone nodes +--EXTENSIONS-- +dom +--FILE-- +loadXML(''); + +$attr = $doc->documentElement->getAttributeNode('xmlns'); +var_dump($attr); + +$attrClone = clone $attr; +var_dump($attrClone->nodeValue); +var_dump($attrClone->parentNode->nodeName); + +unset($doc); +unset($attr); + +var_dump($attrClone->nodeValue); +var_dump($attrClone->parentNode->nodeName); + +echo "-- Clone DOMNode --\n"; + +$doc = new DOMDocument; +$doc->loadXML(''); + +$bar = $doc->documentElement->firstChild; +$barClone = clone $bar; +$bar->remove(); +unset($bar); + +var_dump($barClone->nodeName); + +$doc->firstElementChild->remove(); +unset($doc); + +var_dump($barClone->nodeName); +var_dump($barClone->parentNode); + +?> +--EXPECT-- +-- Clone DOMNameSpaceNode -- +object(DOMNameSpaceNode)#3 (8) { + ["nodeName"]=> + string(5) "xmlns" + ["nodeValue"]=> + string(19) "http://php.net/test" + ["nodeType"]=> + int(18) + ["prefix"]=> + string(0) "" + ["localName"]=> + string(5) "xmlns" + ["namespaceURI"]=> + string(19) "http://php.net/test" + ["ownerDocument"]=> + string(22) "(object value omitted)" + ["parentNode"]=> + string(22) "(object value omitted)" +} +string(19) "http://php.net/test" +string(3) "foo" +string(19) "http://php.net/test" +string(3) "foo" +-- Clone DOMNode -- +string(3) "bar" +string(3) "bar" +NULL diff --git a/ext/dom/tests/gh10234.phpt b/ext/dom/tests/gh10234.phpt new file mode 100644 index 0000000000000..5edc8fc6c1ff1 --- /dev/null +++ b/ext/dom/tests/gh10234.phpt @@ -0,0 +1,93 @@ +--TEST-- +GH-10234 (Setting DOMAttr::textContent results in an empty attribute value.) +--EXTENSIONS-- +dom +--FILE-- +loadXML(''); +$attribute = $document->documentElement->getAttributeNode('attribute'); + +echo "-- Attribute tests --\n"; + +var_dump($document->saveHTML()); +var_dump($attribute->textContent); + +$attribute->textContent = 'new value'; +var_dump($attribute->textContent); +var_dump($document->saveHTML()); + +$attribute->textContent = 'hello & world'; +var_dump($attribute->textContent); +var_dump($document->saveHTML()); + +$attribute->textContent = 'hi'; +var_dump($attribute->textContent); +var_dump($document->saveHTML()); + +$attribute->textContent = 'quote "test"'; +var_dump($attribute->textContent); +var_dump($document->saveHTML()); + +$attribute->textContent = "quote 'test'"; +var_dump($attribute->textContent); +var_dump($document->saveHTML()); + +$attribute->textContent = "quote '\"test\"'"; +var_dump($attribute->textContent); +var_dump($document->saveHTML()); + +echo "-- Document element tests --\n"; + +$document->documentElement->textContent = 'hello & world'; +var_dump($document->documentElement->textContent); +var_dump($document->saveHTML()); + +$document->documentElement->textContent = 'hi'; +var_dump($document->documentElement->textContent); +var_dump($document->saveHTML()); + +$document->documentElement->textContent = 'quote "test"'; +var_dump($document->documentElement->textContent); +var_dump($document->saveHTML()); + +$document->documentElement->textContent = "quote 'test'"; +var_dump($document->documentElement->textContent); +var_dump($document->saveHTML()); +?> +--EXPECT-- +-- Attribute tests -- +string(38) " +" +string(5) "value" +string(9) "new value" +string(42) " +" +string(13) "hello & world" +string(50) " +" +string(9) "hi" +string(54) " +" +string(12) "quote "test"" +string(45) " +" +string(12) "quote 'test'" +string(45) " +" +string(14) "quote '"test"'" +string(57) " +" +-- Document element tests -- +string(13) "hello & world" +string(74) "hello & world +" +string(9) "hi" +string(78) "<b>hi</b> +" +string(12) "quote "test"" +string(69) "quote "test" +" +string(12) "quote 'test'" +string(69) "quote 'test' +" diff --git a/ext/dom/tests/gh11288.phpt b/ext/dom/tests/gh11288.phpt new file mode 100644 index 0000000000000..f70bea80d9085 --- /dev/null +++ b/ext/dom/tests/gh11288.phpt @@ -0,0 +1,67 @@ +--TEST-- +GH-11288 (Error: Couldn't fetch DOMElement introduced in 8.2.6, 8.1.19) +--FILE-- + + +Loremipsum + +HTML; + +$dom = new DOMDocument(); +$dom->loadHTML($html); + +$spans = iterator_to_array($dom->getElementsByTagName('span')->getIterator()); +foreach ($spans as $span) { + if ('unwrap_me' === $span->getAttribute('class')) { + $fragment = $dom->createDocumentFragment(); + $fragment->append(...$span->childNodes); + $span->parentNode?->replaceChild($fragment, $span); + } +} + +var_dump(str_replace("\n", "", $dom->saveHTML())); + +$html = << + +Loremipsum + +HTML; + +$dom = new DOMDocument(); +$dom->loadHTML($html); + +$spans = iterator_to_array($dom->getElementsByTagName('span')->getIterator()); +foreach ($spans as $span) { + if ('unwrap_me' === $span->getAttribute('class')) { + $span->replaceWith(...$span->childNodes); + } +} + +var_dump(str_replace("\n", "", $dom->saveHTML())); + +$html = << + +Loremipsum + +HTML; + +$dom = new DOMDocument(); +$dom->loadHTML($html); + +$spans = iterator_to_array($dom->getElementsByTagName('span')->getIterator()); +foreach ($spans as $span) { + if ('unwrap_me' === $span->getAttribute('class')) { + $span->replaceWith('abc'); + } +} + +var_dump(str_replace("\n", "", $dom->saveHTML())); +?> +--EXPECT-- +string(108) "Loremipsum" +string(108) "Loremipsum" +string(44) "abc" diff --git a/ext/dom/tests/gh11289.phpt b/ext/dom/tests/gh11289.phpt new file mode 100644 index 0000000000000..7771a486bd66b --- /dev/null +++ b/ext/dom/tests/gh11289.phpt @@ -0,0 +1,28 @@ +--TEST-- +GH-11289 (DOMException: Not Found Error introduced in 8.2.6, 8.1.19) +--FILE-- + + + +
+ + +HTML; + +$dom = new DOMDocument(); +$dom->loadHTML($html); + +$divs = iterator_to_array($dom->getElementsByTagName('div')->getIterator()); +foreach ($divs as $div) { + $fragment = $dom->createDocumentFragment(); + $fragment->appendXML('

Hi!

'); + $div->replaceWith(...$fragment->childNodes); +} + +var_dump(str_replace("\n", "", $dom->saveHTML())); +?> +--EXPECT-- +string(55) "

Hi!

" diff --git a/ext/dom/tests/gh11290.phpt b/ext/dom/tests/gh11290.phpt new file mode 100644 index 0000000000000..2900720301041 --- /dev/null +++ b/ext/dom/tests/gh11290.phpt @@ -0,0 +1,27 @@ +--TEST-- +GH-11290 (DOMElement::replaceWith causes crash) +--FILE-- + + + +

Loremipsumdolor

+ + +HTML; + +$dom = new DOMDocument(); +$dom->loadHTML($html); + +$spans = iterator_to_array($dom->getElementsByTagName('span')->getIterator()); +foreach ($spans as $span) { + if ('unwrap_me' === $span->getAttribute('class')) { + $span->replaceWith(...$span->childNodes); + } +} + +var_dump(str_replace("\n", "", $dom->saveHTML())); +?> +--EXPECT-- +string(67) "

Loremipsumdolor

" diff --git a/ext/dom/tests/gh11347.phpt b/ext/dom/tests/gh11347.phpt new file mode 100644 index 0000000000000..189231f925081 --- /dev/null +++ b/ext/dom/tests/gh11347.phpt @@ -0,0 +1,26 @@ +--TEST-- +GH-11347 (Memory leak when calling a static method inside an xpath query) +--EXTENSIONS-- +dom +--FILE-- +loadHTML('hello'); +$xpath = new DOMXpath($doc); +$xpath->registerNamespace("php", "http://php.net/xpath"); +$xpath->registerPHPFunctions(); +$xpath->query("//a[php:function('MyClass::dump', string(@href))]"); + +?> +Done +--EXPECT-- +string(15) "https://php.net" +Done diff --git a/ext/dom/tests/gh9142.phpt b/ext/dom/tests/gh9142.phpt new file mode 100644 index 0000000000000..f72dfa823f38c --- /dev/null +++ b/ext/dom/tests/gh9142.phpt @@ -0,0 +1,20 @@ +--TEST-- +GH-9142 (DOMChildNode replaceWith() double-free error when replacing elements not separated by any whitespace) +--FILE-- +OneTwo'; + +($dom = new DOMDocument('1.0', 'UTF-8'))->loadHTML($document); + +foreach ((new DOMXPath($dom))->query('//var') as $var) { + $var->replaceWith($dom->createElement('p', $var->nodeValue)); +} + +var_dump($dom->saveHTML()); + +?> +--EXPECT-- +string(154) " +

One

Two

+" diff --git a/ext/dom/tests/reconcile_reused_namespace.phpt b/ext/dom/tests/reconcile_reused_namespace.phpt new file mode 100644 index 0000000000000..5f9ab6c0d80fa --- /dev/null +++ b/ext/dom/tests/reconcile_reused_namespace.phpt @@ -0,0 +1,42 @@ +--TEST-- +Reconcile a reused namespace from doc->oldNs +--EXTENSIONS-- +dom +--FILE-- +createElementNS('http://www.w3.org/2000/xhtml', 'html'); + +$dom->loadXML(<< + +XML); +$root = $dom->firstElementChild; + +echo "Add first\n"; +$element = $dom->createElementNS('http://example.com/B', 'p', 'Hello World'); +$root->appendChild($element); + +echo "Add second\n"; +$element = $dom->createElementNS('http://example.com/A', 'p', 'Hello World'); +$root->appendChild($element); + +echo "Add third\n"; +$element = $dom->createElementNS('http://example.com/A', 'p', 'Hello World'); +$root->appendChild($element); + +var_dump($dom->saveXML()); + +?> +--EXPECT-- +Add first +Add second +Add third +string(201) " +Hello WorldHello WorldHello World +" diff --git a/ext/dom/tests/xpath_domnamespacenode.phpt b/ext/dom/tests/xpath_domnamespacenode.phpt index f0bfbed10dda6..97059c18e54da 100644 --- a/ext/dom/tests/xpath_domnamespacenode.phpt +++ b/ext/dom/tests/xpath_domnamespacenode.phpt @@ -17,7 +17,7 @@ var_dump($nodes->item(0)); ?> --EXPECT-- -object(DOMNameSpaceNode)#3 (8) { +object(DOMNameSpaceNode)#4 (8) { ["nodeName"]=> string(9) "xmlns:xml" ["nodeValue"]=> diff --git a/ext/dom/tests/xpath_domnamespacenode_advanced.phpt b/ext/dom/tests/xpath_domnamespacenode_advanced.phpt new file mode 100644 index 0000000000000..bbc49dc54652d --- /dev/null +++ b/ext/dom/tests/xpath_domnamespacenode_advanced.phpt @@ -0,0 +1,75 @@ +--TEST-- +DOMXPath::query() can return DOMNodeList with DOMNameSpaceNode items - advanced variation +--EXTENSIONS-- +dom +--FILE-- +loadXML(<<<'XML' + + Hello PHP! + +XML); + +$xpath = new DOMXPath($dom); +$query = '//namespace::*'; + +echo "-- All namespace attributes --\n"; + +foreach ($xpath->query($query) as $attribute) { + echo $attribute->nodeName . ' = ' . $attribute->nodeValue . PHP_EOL; + var_dump($attribute->parentNode->tagName); +} + +echo "-- All namespace attributes with removal attempt --\n"; + +foreach ($xpath->query($query) as $attribute) { + echo "Before: ", $attribute->parentNode->tagName, "\n"; + // Second & third attempt should fail because it's no longer in the document + try { + $attribute->parentNode->remove(); + } catch (\DOMException $e) { + echo $e->getMessage(), "\n"; + } + // However, it should not cause a use-after-free + echo "After: ", $attribute->parentNode->tagName, "\n"; +} + +?> +--EXPECT-- +-- All namespace attributes -- +xmlns:xml = http://www.w3.org/XML/1998/namespace +string(4) "root" +xmlns:bar = http://example.com/bar +string(4) "root" +xmlns:foo = http://example.com/foo +string(4) "root" +xmlns:xml = http://www.w3.org/XML/1998/namespace +string(5) "child" +xmlns:bar = http://example.com/bar +string(5) "child" +xmlns:foo = http://example.com/foo +string(5) "child" +xmlns:baz = http://example.com/baz +string(5) "child" +-- All namespace attributes with removal attempt -- +Before: root +After: root +Before: root +Not Found Error +After: root +Before: root +Not Found Error +After: root +Before: child +After: child +Before: child +Not Found Error +After: child +Before: child +Not Found Error +After: child +Before: child +Not Found Error +After: child diff --git a/ext/dom/xpath.c b/ext/dom/xpath.c index 876d8b00dae0e..62e11f6b99bfb 100644 --- a/ext/dom/xpath.c +++ b/ext/dom/xpath.c @@ -101,24 +101,18 @@ static void dom_xpath_ext_function_php(xmlXPathParserContextPtr ctxt, int nargs, zval child; /* not sure, if we need this... it's copied from xpath.c */ if (node->type == XML_NAMESPACE_DECL) { - xmlNsPtr curns; - xmlNodePtr nsparent; - - nsparent = node->_private; - curns = xmlNewNs(NULL, node->name, NULL); - if (node->children) { - curns->prefix = xmlStrdup((xmlChar *) node->children); - } - if (node->children) { - node = xmlNewDocNode(node->doc, NULL, (xmlChar *) node->children, node->name); - } else { - node = xmlNewDocNode(node->doc, NULL, (xmlChar *) "xmlns", node->name); - } - node->type = XML_NAMESPACE_DECL; - node->parent = nsparent; - node->ns = curns; + xmlNodePtr nsparent = node->_private; + xmlNsPtr original = (xmlNsPtr) node; + + /* Make sure parent dom object exists, so we can take an extra reference. */ + zval parent_zval; /* don't destroy me, my lifetime is transfered to the fake namespace decl */ + php_dom_create_object(nsparent, &parent_zval, &intern->dom); + dom_object *parent_intern = Z_DOMOBJ_P(&parent_zval); + + node = php_dom_create_fake_namespace_decl(nsparent, original, &child, parent_intern); + } else { + php_dom_create_object(node, &child, &intern->dom); } - php_dom_create_object(node, &child, &intern->dom); add_next_index_zval(&fci.params[i], &child); } } else { @@ -182,7 +176,7 @@ static void dom_xpath_ext_function_php(xmlXPathParserContextPtr ctxt, int nargs, } cleanup: zend_string_release_ex(callable, 0); - zval_ptr_dtor_str(&fci.function_name); + zval_ptr_dtor_nogc(&fci.function_name); if (fci.param_count > 0) { for (i = 0; i < nargs - 1; i++) { zval_ptr_dtor(&fci.params[i]); @@ -421,24 +415,18 @@ static void php_xpath_eval(INTERNAL_FUNCTION_PARAMETERS, int type) /* {{{ */ zval child; if (node->type == XML_NAMESPACE_DECL) { - xmlNsPtr curns; - xmlNodePtr nsparent; + xmlNodePtr nsparent = node->_private; + xmlNsPtr original = (xmlNsPtr) node; - nsparent = node->_private; - curns = xmlNewNs(NULL, node->name, NULL); - if (node->children) { - curns->prefix = xmlStrdup((xmlChar *) node->children); - } - if (node->children) { - node = xmlNewDocNode(docp, NULL, (xmlChar *) node->children, node->name); - } else { - node = xmlNewDocNode(docp, NULL, (xmlChar *) "xmlns", node->name); - } - node->type = XML_NAMESPACE_DECL; - node->parent = nsparent; - node->ns = curns; + /* Make sure parent dom object exists, so we can take an extra reference. */ + zval parent_zval; /* don't destroy me, my lifetime is transfered to the fake namespace decl */ + php_dom_create_object(nsparent, &parent_zval, &intern->dom); + dom_object *parent_intern = Z_DOMOBJ_P(&parent_zval); + + node = php_dom_create_fake_namespace_decl(nsparent, original, &child, parent_intern); + } else { + php_dom_create_object(node, &child, &intern->dom); } - php_dom_create_object(node, &child, &intern->dom); add_next_index_zval(&retval, &child); } } else { diff --git a/ext/exif/exif.c b/ext/exif/exif.c index 66a6d7de43bc4..fa5ec9c855d4a 100644 --- a/ext/exif/exif.c +++ b/ext/exif/exif.c @@ -215,6 +215,25 @@ zend_module_entry exif_module_entry = { ZEND_GET_MODULE(exif) #endif +/* php_stream_read() may return early without reading all data, depending on the chunk size + * and whether it's a URL stream or not. This helper keeps reading until the requested amount + * is read or until there is no more data available to read. */ +static ssize_t exif_read_from_stream_file_looped(php_stream *stream, char *buf, size_t count) +{ + ssize_t total_read = 0; + while (total_read < count) { + ssize_t ret = php_stream_read(stream, buf + total_read, count - total_read); + if (ret == -1) { + return -1; + } + if (ret == 0) { + break; + } + total_read += ret; + } + return total_read; +} + /* {{{ php_strnlen * get length of string if buffer if less than buffer size or buffer size */ static size_t php_strnlen(char* str, size_t maxlen) { @@ -3321,7 +3340,7 @@ static bool exif_process_IFD_TAG_impl(image_info_type *ImageInfo, char *dir_entr exif_error_docref(NULL EXIFERR_CC, ImageInfo, E_WARNING, "Wrong file pointer: 0x%08X != 0x%08X", fgot, displacement+offset_val); return false; } - fgot = php_stream_read(ImageInfo->infile, value_ptr, byte_count); + fgot = exif_read_from_stream_file_looped(ImageInfo->infile, value_ptr, byte_count); php_stream_seek(ImageInfo->infile, fpos, SEEK_SET); if (fgot != byte_count) { EFREE_IF(outside); @@ -3846,7 +3865,7 @@ static bool exif_scan_JPEG_header(image_info_type *ImageInfo) Data[0] = (uchar)lh; Data[1] = (uchar)ll; - got = php_stream_read(ImageInfo->infile, (char*)(Data+2), itemlen-2); /* Read the whole section. */ + got = exif_read_from_stream_file_looped(ImageInfo->infile, (char*)(Data+2), itemlen-2); /* Read the whole section. */ if (got != itemlen-2) { exif_error_docref(NULL EXIFERR_CC, ImageInfo, E_WARNING, "Error reading from file: got=x%04X(=%d) != itemlen-2=x%04X(=%d)", got, got, itemlen-2, itemlen-2); return false; @@ -3864,7 +3883,7 @@ static bool exif_scan_JPEG_header(image_info_type *ImageInfo) size = ImageInfo->FileSize - fpos; sn = exif_file_sections_add(ImageInfo, M_PSEUDO, size, NULL); Data = ImageInfo->file.list[sn].data; - got = php_stream_read(ImageInfo->infile, (char*)Data, size); + got = exif_read_from_stream_file_looped(ImageInfo->infile, (char*)Data, size); if (got != size) { EXIF_ERRLOG_FILEEOF(ImageInfo) return false; @@ -4041,7 +4060,9 @@ static bool exif_process_IFD_in_TIFF_impl(image_info_type *ImageInfo, size_t dir exif_error_docref(NULL EXIFERR_CC, ImageInfo, E_NOTICE, "Read from TIFF: filesize(x%04X), IFD dir(x%04X + x%04X)", ImageInfo->FileSize, dir_offset, 2); #endif php_stream_seek(ImageInfo->infile, dir_offset, SEEK_SET); /* we do not know the order of sections */ - php_stream_read(ImageInfo->infile, (char*)ImageInfo->file.list[sn].data, 2); + if (UNEXPECTED(exif_read_from_stream_file_looped(ImageInfo->infile, (char*)ImageInfo->file.list[sn].data, 2) != 2)) { + return false; + } num_entries = php_ifd_get16u(ImageInfo->file.list[sn].data, ImageInfo->motorola_intel); dir_size = 2/*num dir entries*/ +12/*length of entry*/*(size_t)num_entries +4/* offset to next ifd (points to thumbnail or NULL)*/; if (ImageInfo->FileSize >= dir_size && ImageInfo->FileSize - dir_size >= dir_offset) { @@ -4051,7 +4072,9 @@ static bool exif_process_IFD_in_TIFF_impl(image_info_type *ImageInfo, size_t dir if (exif_file_sections_realloc(ImageInfo, sn, dir_size)) { return false; } - php_stream_read(ImageInfo->infile, (char*)(ImageInfo->file.list[sn].data+2), dir_size-2); + if (UNEXPECTED(exif_read_from_stream_file_looped(ImageInfo->infile, (char*)(ImageInfo->file.list[sn].data+2), dir_size-2) != dir_size - 2)) { + return false; + } next_offset = php_ifd_get32u(ImageInfo->file.list[sn].data + dir_size - 4, ImageInfo->motorola_intel); #ifdef EXIF_DEBUG exif_error_docref(NULL EXIFERR_CC, ImageInfo, E_NOTICE, "Read from TIFF done, next offset x%04X", next_offset); @@ -4139,7 +4162,7 @@ static bool exif_process_IFD_in_TIFF_impl(image_info_type *ImageInfo, size_t dir #ifdef EXIF_DEBUG exif_error_docref(NULL EXIFERR_CC, ImageInfo, E_NOTICE, "Read from TIFF: filesize(x%04X), IFD(x%04X + x%04X)", ImageInfo->FileSize, dir_offset, ifd_size); #endif - php_stream_read(ImageInfo->infile, (char*)(ImageInfo->file.list[sn].data+dir_size), ifd_size-dir_size); + exif_read_from_stream_file_looped(ImageInfo->infile, (char*)(ImageInfo->file.list[sn].data+dir_size), ifd_size-dir_size); #ifdef EXIF_DEBUG exif_error_docref(NULL EXIFERR_CC, ImageInfo, E_NOTICE, "Read from TIFF, done"); #endif @@ -4190,7 +4213,7 @@ static bool exif_process_IFD_in_TIFF_impl(image_info_type *ImageInfo, size_t dir if (!ImageInfo->Thumbnail.data) { ImageInfo->Thumbnail.data = safe_emalloc(ImageInfo->Thumbnail.size, 1, 0); php_stream_seek(ImageInfo->infile, ImageInfo->Thumbnail.offset, SEEK_SET); - fgot = php_stream_read(ImageInfo->infile, ImageInfo->Thumbnail.data, ImageInfo->Thumbnail.size); + fgot = exif_read_from_stream_file_looped(ImageInfo->infile, ImageInfo->Thumbnail.data, ImageInfo->Thumbnail.size); if (fgot != ImageInfo->Thumbnail.size) { EXIF_ERRLOG_THUMBEOF(ImageInfo) efree(ImageInfo->Thumbnail.data); @@ -4230,7 +4253,7 @@ static bool exif_process_IFD_in_TIFF_impl(image_info_type *ImageInfo, size_t dir if (!ImageInfo->Thumbnail.data && ImageInfo->Thumbnail.offset && ImageInfo->Thumbnail.size && ImageInfo->read_thumbnail) { ImageInfo->Thumbnail.data = safe_emalloc(ImageInfo->Thumbnail.size, 1, 0); php_stream_seek(ImageInfo->infile, ImageInfo->Thumbnail.offset, SEEK_SET); - fgot = php_stream_read(ImageInfo->infile, ImageInfo->Thumbnail.data, ImageInfo->Thumbnail.size); + fgot = exif_read_from_stream_file_looped(ImageInfo->infile, ImageInfo->Thumbnail.data, ImageInfo->Thumbnail.size); if (fgot != ImageInfo->Thumbnail.size) { EXIF_ERRLOG_THUMBEOF(ImageInfo) efree(ImageInfo->Thumbnail.data); @@ -4285,7 +4308,7 @@ static bool exif_scan_FILE_header(image_info_type *ImageInfo) if (ImageInfo->FileSize >= 2) { php_stream_seek(ImageInfo->infile, 0, SEEK_SET); - if (php_stream_read(ImageInfo->infile, (char*)file_header, 2) != 2) { + if (exif_read_from_stream_file_looped(ImageInfo->infile, (char*)file_header, 2) != 2) { return false; } if ((file_header[0]==0xff) && (file_header[1]==M_SOI)) { @@ -4296,7 +4319,7 @@ static bool exif_scan_FILE_header(image_info_type *ImageInfo) exif_error_docref(NULL EXIFERR_CC, ImageInfo, E_WARNING, "Invalid JPEG file"); } } else if (ImageInfo->FileSize >= 8) { - if (php_stream_read(ImageInfo->infile, (char*)(file_header+2), 6) != 6) { + if (exif_read_from_stream_file_looped(ImageInfo->infile, (char*)(file_header+2), 6) != 6) { return false; } if (!memcmp(file_header, "II\x2A\x00", 4)) { diff --git a/ext/exif/tests/gh10834.phpt b/ext/exif/tests/gh10834.phpt new file mode 100644 index 0000000000000..3c9caebdb70b6 --- /dev/null +++ b/ext/exif/tests/gh10834.phpt @@ -0,0 +1,79 @@ +--TEST-- +GH-10834 (exif_read_data() cannot read smaller stream wrapper chunk sizes) +--EXTENSIONS-- +exif +--FILE-- +position >= strlen($this->data); + } + + function stream_open($path, $mode, $options, &$opened_path) { + $this->position = 0; + $this->data = file_get_contents(__DIR__.'/bug50845.jpg'); + return true; + } + + function stream_seek($offset, $whence) { + switch ($whence) { + case SEEK_SET: + if ($offset < strlen($this->data) && $offset >= 0) { + $this->position = $offset; + return true; + } else { + return false; + } + break; + case SEEK_CUR: + if ($offset >= 0) { + $this->position += $offset; + return true; + } else { + return false; + } + break; + case SEEK_END: + if (strlen($this->data) + $offset >= 0) { + $this->position = strlen($this->data) + $offset; + return true; + } else { + return false; + } + break; + default: + return false; + } + } + + function stream_read($count) { + $ret = substr($this->data, $this->position, $count); + $this->position += strlen($ret); + return $ret; + } + + function stream_tell() { + return $this->position; + } +} + +stream_wrapper_register('var', 'VariableStream'); + +$fp = fopen('var://myvar', 'rb'); + +stream_set_chunk_size($fp, 10); +$headers = exif_read_data($fp); +var_dump(is_array($headers)); + +fclose($fp); +?> +--EXPECT-- +bool(true) diff --git a/ext/ffi/ffi_parser.c b/ext/ffi/ffi_parser.c index eca10c27d195b..b956f885ee001 100644 --- a/ext/ffi/ffi_parser.c +++ b/ext/ffi/ffi_parser.c @@ -3552,7 +3552,7 @@ static void parse(void) { } } -int zend_ffi_parse_decl(const char *str, size_t len) { +zend_result zend_ffi_parse_decl(const char *str, size_t len) { if (SETJMP(FFI_G(bailout))==0) { FFI_G(allow_vla) = 0; FFI_G(attribute_parsing) = 0; @@ -3565,7 +3565,7 @@ int zend_ffi_parse_decl(const char *str, size_t len) { } } -int zend_ffi_parse_type(const char *str, size_t len, zend_ffi_dcl *dcl) { +zend_result zend_ffi_parse_type(const char *str, size_t len, zend_ffi_dcl *dcl) { int sym; if (SETJMP(FFI_G(bailout))==0) { diff --git a/ext/ftp/ftp.c b/ext/ftp/ftp.c index 358f4fe7bf86c..2b8dca47fab01 100644 --- a/ext/ftp/ftp.c +++ b/ext/ftp/ftp.c @@ -708,7 +708,7 @@ ftp_mlsd_parse_line(HashTable *ht, const char *input) { /* Extract pathname */ ZVAL_STRINGL(&zstr, sp + 1, end - sp - 1); - zend_hash_str_update(ht, "name", sizeof("name")-1, &zstr); + zend_hash_update(ht, ZSTR_KNOWN(ZEND_STR_NAME), &zstr); end = sp; while (input < end) { diff --git a/ext/gmp/php_gmp_int.h b/ext/gmp/php_gmp_int.h index d4ef5f0157332..c37fdb136d023 100644 --- a/ext/gmp/php_gmp_int.h +++ b/ext/gmp/php_gmp_int.h @@ -28,7 +28,7 @@ static inline gmp_object *php_gmp_object_from_zend_object(zend_object *zobj) { PHP_GMP_API zend_class_entry *php_gmp_class_entry(void); /* GMP and MPIR use different datatypes on different platforms */ -#ifdef PHP_WIN32 +#ifdef _WIN64 typedef zend_long gmp_long; typedef zend_ulong gmp_ulong; #else diff --git a/ext/hash/hash.c b/ext/hash/hash.c index 2db70351461b4..14da238b264f5 100644 --- a/ext/hash/hash.c +++ b/ext/hash/hash.c @@ -450,7 +450,7 @@ PHP_FUNCTION(hash_file) bool raw_output = 0; HashTable *args = NULL; - ZEND_PARSE_PARAMETERS_START(2, 3) + ZEND_PARSE_PARAMETERS_START(2, 4) Z_PARAM_STR(algo) Z_PARAM_STRING(data, data_len) Z_PARAM_OPTIONAL diff --git a/ext/hash/hash.stub.php b/ext/hash/hash.stub.php index 9efd72361cde7..e0439c67bf23d 100644 --- a/ext/hash/hash.stub.php +++ b/ext/hash/hash.stub.php @@ -45,12 +45,14 @@ function hash_copy(HashContext $context): HashContext {} /** * @return array + * @compile-time-eval * @refcount 1 */ function hash_algos(): array {} /** * @return array + * @compile-time-eval * @refcount 1 */ function hash_hmac_algos(): array {} diff --git a/ext/hash/hash_arginfo.h b/ext/hash/hash_arginfo.h index dca428af029b5..d52a5979cb10e 100644 --- a/ext/hash/hash_arginfo.h +++ b/ext/hash/hash_arginfo.h @@ -1,5 +1,5 @@ /* This is a generated file, edit the .stub.php file instead. - * Stub hash: 7168b9b3d1422d4f8ff9270c5de2f42988a55811 */ + * Stub hash: 8838801d9789d4b77d57b290d993ee37784bbd1f */ ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_hash, 0, 2, IS_STRING, 0) ZEND_ARG_TYPE_INFO(0, algo, IS_STRING, 0) @@ -179,8 +179,8 @@ static const zend_function_entry ext_functions[] = { ZEND_FE(hash_update_file, arginfo_hash_update_file) ZEND_FE(hash_final, arginfo_hash_final) ZEND_FE(hash_copy, arginfo_hash_copy) - ZEND_FE(hash_algos, arginfo_hash_algos) - ZEND_FE(hash_hmac_algos, arginfo_hash_hmac_algos) + ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(hash_algos, arginfo_hash_algos) + ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(hash_hmac_algos, arginfo_hash_hmac_algos) ZEND_FE(hash_pbkdf2, arginfo_hash_pbkdf2) ZEND_FE(hash_equals, arginfo_hash_equals) ZEND_FE(hash_hkdf, arginfo_hash_hkdf) diff --git a/ext/hash/tests/hash_file_basic1.phpt b/ext/hash/tests/hash_file_basic1.phpt index 9ffe9a2f6cb01..334bad8c2726a 100644 --- a/ext/hash/tests/hash_file_basic1.phpt +++ b/ext/hash/tests/hash_file_basic1.phpt @@ -42,6 +42,12 @@ echo "sha512: " . hash_file('sha512', $file). "\n"; echo "snefru: " . hash_file('snefru', $file). "\n"; echo "tiger192,3: " . hash_file('tiger192,3', $file). "\n"; echo "whirlpool: " . hash_file('whirlpool', $file). "\n"; +echo "murmur3a: " . hash_file('murmur3a', $file). "\n"; +echo "murmur3a: " . hash_file('murmur3a', $file, false, ['seed' => 1234]). "\n"; +echo "murmur3c: " . hash_file('murmur3c', $file). "\n"; +echo "murmur3c: " . hash_file('murmur3c', $file, false, ['seed' => 1234]). "\n"; +echo "murmur3f: " . hash_file('murmur3f', $file). "\n"; +echo "murmur3f: " . hash_file('murmur3f', $file, false, ['seed' => 1234]). "\n"; echo "adler32(raw): " . bin2hex(hash_file('adler32', $file, TRUE)) . "\n"; echo "md5(raw): " . bin2hex(hash_file('md5', $file, TRUE)). "\n"; @@ -70,6 +76,12 @@ sha512: 1f42adaf938fbf136e381b164bae5f984c7f9fe60c82728bd889c14f187c7d63e81a0305 snefru: d414b2345d3e7fa1a31c044cf334bfc1fec24d89e464411998d579d24663895f tiger192,3: 7acf4ebea075fac6fc8ea0e2b4af3cfa71b9460e4c53403a whirlpool: 4248b149e000477269a4a5f1a84d97cfc3d0199b7aaf505913e6f010a6f83276029d11a9ad545374bc710eb59c7d958985023ab886ffa9ec9a23852844c764ec +murmur3a: bc6554c8 +murmur3a: 432e4379 +murmur3c: 8779de509ffc06fb27bcf5fc861504d6 +murmur3c: b43afac65c38a617323020432c170005 +murmur3f: 2b84cd546b2f18a9ab6f893194224afd +murmur3f: 6cc7716646664d6a83d68cb6563ac38e adler32(raw): ff87222e md5(raw): 704bf818448f5bbb94061332d2c889aa sha256(raw): a0f5702fa5d3670b80033d668e8732b70550392abb53841355447f8bb0f72245 diff --git a/ext/iconv/iconv.c b/ext/iconv/iconv.c index c2ed3f258bc88..93e83a95b5f1c 100644 --- a/ext/iconv/iconv.c +++ b/ext/iconv/iconv.c @@ -1978,7 +1978,7 @@ PHP_FUNCTION(iconv_mime_encode) if (pref != NULL) { zval *pzval; - if ((pzval = zend_hash_str_find_deref(Z_ARRVAL_P(pref), "scheme", sizeof("scheme") - 1)) != NULL) { + if ((pzval = zend_hash_find_deref(Z_ARRVAL_P(pref), ZSTR_KNOWN(ZEND_STR_SCHEME))) != NULL) { if (Z_TYPE_P(pzval) == IS_STRING && Z_STRLEN_P(pzval) > 0) { switch (Z_STRVAL_P(pzval)[0]) { case 'B': case 'b': diff --git a/ext/imap/php_imap.c b/ext/imap/php_imap.c index 7723669417af3..f9badf5585da2 100644 --- a/ext/imap/php_imap.c +++ b/ext/imap/php_imap.c @@ -83,7 +83,7 @@ MAILSTREAM DEFAULTPROTO; #endif static void _php_make_header_object(zval *myzvalue, ENVELOPE *en); -static void _php_imap_add_body(zval *arg, BODY *body); +static void _php_imap_add_body(zval *arg, const BODY *body); static zend_string* _php_imap_parse_address(ADDRESS *addresslist, zval *paddress); static zend_string* _php_rfc822_write_address(ADDRESS *addresslist); @@ -1351,6 +1351,33 @@ PHP_FUNCTION(imap_list) /* }}} */ +static void php_imap_populate_mailbox_object(zval *z_object, const FOBJECTLIST *mailbox) +{ + zend_update_property_string( + Z_OBJCE_P(z_object), Z_OBJ_P(z_object), + "name", strlen("name"), + (char*)mailbox->LTEXT + ); + zend_update_property_long( + Z_OBJCE_P(z_object), Z_OBJ_P(z_object), + "attributes", strlen("attributes"), + mailbox->attributes + ); +#ifdef IMAP41 + zend_update_property_str( + Z_OBJCE_P(z_object), Z_OBJ_P(z_object), + "delimiter", strlen("delimiter"), + ZSTR_CHAR((unsigned char)mailbox->delimiter) + ); +#else + zend_update_property_string( + Z_OBJCE_P(z_object), Z_OBJ_P(z_object), + "delimiter", strlen("delimiter"), + mailbox->delimiter + ); +#endif +} + /* {{{ Reads the list of mailboxes and returns a full array of objects containing name, attributes, and delimiter */ /* Author: CJH */ PHP_FUNCTION(imap_getmailboxes) @@ -1359,7 +1386,6 @@ PHP_FUNCTION(imap_getmailboxes) zend_string *ref, *pat; php_imap_object *imap_conn_struct; FOBJECTLIST *cur=NIL; - char *delim=NIL; if (zend_parse_parameters(ZEND_NUM_ARGS(), "OSS", &imap_conn_obj, php_imap_ce, &ref, &pat) == FAILURE) { RETURN_THROWS(); @@ -1377,24 +1403,14 @@ PHP_FUNCTION(imap_getmailboxes) } array_init(return_value); - delim = safe_emalloc(2, sizeof(char), 0); cur=IMAPG(imap_folder_objects); while (cur != NIL) { object_init(&mboxob); - add_property_string(&mboxob, "name", (char*)cur->LTEXT); - add_property_long(&mboxob, "attributes", cur->attributes); -#ifdef IMAP41 - delim[0] = (char)cur->delimiter; - delim[1] = 0; - add_property_string(&mboxob, "delimiter", delim); -#else - add_property_string(&mboxob, "delimiter", cur->delimiter); -#endif + php_imap_populate_mailbox_object(&mboxob, cur); php_imap_list_add_object(return_value, &mboxob); cur=cur->next; } mail_free_foblist(&IMAPG(imap_folder_objects), &IMAPG(imap_folder_objects_tail)); - efree(delim); IMAPG(folderlist_style) = FLIST_ARRAY; /* reset to default */ } /* }}} */ @@ -1431,12 +1447,42 @@ PHP_FUNCTION(imap_listscan) /* }}} */ +static void php_imap_populate_mailbox_properties_object(zval *z_object, const MAILSTREAM *imap_stream) +{ + char date[100]; + rfc822_date(date); + zend_update_property_string( + Z_OBJCE_P(z_object), Z_OBJ_P(z_object), + "Date", strlen("Date"), + date + ); + zend_update_property_string( + Z_OBJCE_P(z_object), Z_OBJ_P(z_object), + "Driver", strlen("Driver"), + imap_stream->dtb->name + ); + zend_update_property_string( + Z_OBJCE_P(z_object), Z_OBJ_P(z_object), + "Mailbox", strlen("Mailbox"), + imap_stream->mailbox + ); + zend_update_property_long( + Z_OBJCE_P(z_object), Z_OBJ_P(z_object), + "Nmsgs", strlen("Nmsgs"), + imap_stream->nmsgs + ); + zend_update_property_long( + Z_OBJCE_P(z_object), Z_OBJ_P(z_object), + "Recent", strlen("Recent"), + imap_stream->recent + ); +} + /* {{{ Get mailbox properties */ PHP_FUNCTION(imap_check) { zval *imap_conn_obj; php_imap_object *imap_conn_struct; - char date[100]; if (zend_parse_parameters(ZEND_NUM_ARGS(), "O", &imap_conn_obj, php_imap_ce) == FAILURE) { RETURN_THROWS(); @@ -1449,13 +1495,8 @@ PHP_FUNCTION(imap_check) } if (imap_conn_struct->imap_stream->mailbox) { - rfc822_date(date); object_init(return_value); - add_property_string(return_value, "Date", date); - add_property_string(return_value, "Driver", imap_conn_struct->imap_stream->dtb->name); - add_property_string(return_value, "Mailbox", imap_conn_struct->imap_stream->mailbox); - add_property_long(return_value, "Nmsgs", imap_conn_struct->imap_stream->nmsgs); - add_property_long(return_value, "Recent", imap_conn_struct->imap_stream->recent); + php_imap_populate_mailbox_properties_object(return_value, imap_conn_struct->imap_stream); } else { RETURN_FALSE; } @@ -1555,33 +1596,83 @@ PHP_FUNCTION(imap_headerinfo) /* now run through properties that are only going to be returned from a server, not text headers */ - add_property_string(return_value, "Recent", cache->recent ? (cache->seen ? "R": "N") : " "); - add_property_string(return_value, "Unseen", (cache->recent | cache->seen) ? " " : "U"); - add_property_string(return_value, "Flagged", cache->flagged ? "F" : " "); - add_property_string(return_value, "Answered", cache->answered ? "A" : " "); - add_property_string(return_value, "Deleted", cache->deleted ? "D" : " "); - add_property_string(return_value, "Draft", cache->draft ? "X" : " "); + zend_update_property_str( + Z_OBJCE_P(return_value), Z_OBJ_P(return_value), + "Recent", strlen("Recent"), + cache->recent ? ( + cache->seen ? ZSTR_CHAR('R') : ZSTR_CHAR('N') + ) : ZSTR_CHAR(' ') + ); + zend_update_property_str( + Z_OBJCE_P(return_value), Z_OBJ_P(return_value), + "Unseen", strlen("Unseen"), + (cache->recent | cache->seen) ? ZSTR_CHAR(' ') : ZSTR_CHAR('U') + ); + zend_update_property_str( + Z_OBJCE_P(return_value), Z_OBJ_P(return_value), + "Flagged", strlen("Flagged"), + cache->flagged ? ZSTR_CHAR('F') : ZSTR_CHAR(' ') + ); + zend_update_property_str( + Z_OBJCE_P(return_value), Z_OBJ_P(return_value), + "Answered", strlen("Answered"), + cache->answered ? ZSTR_CHAR('A') : ZSTR_CHAR(' ') + ); + zend_update_property_str( + Z_OBJCE_P(return_value), Z_OBJ_P(return_value), + "Deleted", strlen("Deleted"), + cache->deleted ? ZSTR_CHAR('D') : ZSTR_CHAR(' ') + ); + zend_update_property_str( + Z_OBJCE_P(return_value), Z_OBJ_P(return_value), + "Draft", strlen("Draft"), + cache->draft ? ZSTR_CHAR('X') : ZSTR_CHAR(' ') + ); snprintf(dummy, sizeof(dummy), "%4ld", cache->msgno); - add_property_string(return_value, "Msgno", dummy); + zend_update_property_string( + Z_OBJCE_P(return_value), Z_OBJ_P(return_value), + "Msgno", strlen("Msgno"), + dummy + ); mail_date(dummy, cache); - add_property_string(return_value, "MailDate", dummy); + zend_update_property_string( + Z_OBJCE_P(return_value), Z_OBJ_P(return_value), + "MailDate", strlen("MailDate"), + dummy + ); snprintf(dummy, sizeof(dummy), "%ld", cache->rfc822_size); - add_property_string(return_value, "Size", dummy); - - add_property_long(return_value, "udate", mail_longdate(cache)); + zend_update_property_string( + Z_OBJCE_P(return_value), Z_OBJ_P(return_value), + "Size", strlen("Size"), + dummy + ); + + zend_update_property_long( + Z_OBJCE_P(return_value), Z_OBJ_P(return_value), + "udate", strlen("udate"), + mail_longdate(cache) + ); if (en->from && fromlength) { fulladdress[0] = 0x00; mail_fetchfrom(fulladdress, imap_conn_struct->imap_stream, msgno, fromlength); - add_property_string(return_value, "fetchfrom", fulladdress); + zend_update_property_string( + Z_OBJCE_P(return_value), Z_OBJ_P(return_value), + "fetchfrom", strlen("fetchfrom"), + fulladdress + ); } if (en->subject && subjectlength) { fulladdress[0] = 0x00; mail_fetchsubject(fulladdress, imap_conn_struct->imap_stream, msgno, subjectlength); - add_property_string(return_value, "fetchsubject", fulladdress); + zend_update_property_string( + Z_OBJCE_P(return_value), Z_OBJ_P(return_value), + "fetchsubject", strlen("fetchsubject"), + fulladdress + ); } } /* }}} */ @@ -1654,7 +1745,6 @@ PHP_FUNCTION(imap_getsubscribed) zend_string *ref, *pat; php_imap_object *imap_conn_struct; FOBJECTLIST *cur=NIL; - char *delim=NIL; if (zend_parse_parameters(ZEND_NUM_ARGS(), "OSS", &imap_conn_obj, php_imap_ce, &ref, &pat) == FAILURE) { RETURN_THROWS(); @@ -1674,24 +1764,14 @@ PHP_FUNCTION(imap_getsubscribed) } array_init(return_value); - delim = safe_emalloc(2, sizeof(char), 0); cur=IMAPG(imap_sfolder_objects); while (cur != NIL) { object_init(&mboxob); - add_property_string(&mboxob, "name", (char*)cur->LTEXT); - add_property_long(&mboxob, "attributes", cur->attributes); -#ifdef IMAP41 - delim[0] = (char)cur->delimiter; - delim[1] = 0; - add_property_string(&mboxob, "delimiter", delim); -#else - add_property_string(&mboxob, "delimiter", cur->delimiter); -#endif + php_imap_populate_mailbox_object(&mboxob, cur); php_imap_list_add_object(return_value, &mboxob); cur=cur->next; } mail_free_foblist (&IMAPG(imap_sfolder_objects), &IMAPG(imap_sfolder_objects_tail)); - efree(delim); IMAPG(folderlist_style) = FLIST_ARRAY; /* reset to default */ } /* }}} */ @@ -2001,7 +2081,6 @@ PHP_FUNCTION(imap_mailboxmsginfo) { zval *imap_conn_obj; php_imap_object *imap_conn_struct; - char date[100]; unsigned long msgno; zend_ulong unreadmsg = 0, deletedmsg = 0, msize = 0; @@ -2027,15 +2106,23 @@ PHP_FUNCTION(imap_mailboxmsginfo) } msize = msize + cache->rfc822_size; } - add_property_long(return_value, "Unread", unreadmsg); - add_property_long(return_value, "Deleted", deletedmsg); - add_property_long(return_value, "Nmsgs", imap_conn_struct->imap_stream->nmsgs); - add_property_long(return_value, "Size", msize); - rfc822_date(date); - add_property_string(return_value, "Date", date); - add_property_string(return_value, "Driver", imap_conn_struct->imap_stream->dtb->name); - add_property_string(return_value, "Mailbox", imap_conn_struct->imap_stream->mailbox); - add_property_long(return_value, "Recent", imap_conn_struct->imap_stream->recent); + zend_update_property_long( + Z_OBJCE_P(return_value), Z_OBJ_P(return_value), + "Unread", strlen("Unread"), + unreadmsg + ); + zend_update_property_long( + Z_OBJCE_P(return_value), Z_OBJ_P(return_value), + "Deleted", strlen("Deleted"), + deletedmsg + ); + zend_update_property_long( + Z_OBJCE_P(return_value), Z_OBJ_P(return_value), + "Size", strlen("Size"), + msize + ); + + php_imap_populate_mailbox_properties_object(return_value, imap_conn_struct->imap_stream); } /* }}} */ @@ -2078,13 +2165,55 @@ PHP_FUNCTION(imap_rfc822_write_address) } /* }}} */ +static void php_imap_construct_address_object(zval *z_object, const ADDRESS *address) +{ + if (address->mailbox) { + zend_update_property_string( + Z_OBJCE_P(z_object), Z_OBJ_P(z_object), + "mailbox", strlen("mailbox"), + address->mailbox + ); + } + if (address->host) { + zend_update_property_string( + Z_OBJCE_P(z_object), Z_OBJ_P(z_object), + "host", strlen("host"), + address->host + ); + } + if (address->personal) { + zend_update_property_string( + Z_OBJCE_P(z_object), Z_OBJ_P(z_object), + "personal", strlen("personal"), + address->personal + ); + } + if (address->adl) { + zend_update_property_string( + Z_OBJCE_P(z_object), Z_OBJ_P(z_object), + "adl", strlen("adl"), + address->adl + ); + } +} + +static void php_imap_construct_list_of_addresses(zval *list, const ADDRESS *const address_list) +{ + const ADDRESS *current_address = address_list; + do { + zval tmp_object; + object_init(&tmp_object); + php_imap_construct_address_object(&tmp_object, current_address); + php_imap_list_add_object(list, &tmp_object); + } while ((current_address = current_address->next)); +} + /* {{{ Parses an address string */ PHP_FUNCTION(imap_rfc822_parse_adrlist) { - zval tovals; zend_string *str, *defaulthost; char *str_copy; - ADDRESS *addresstmp; + ADDRESS *address_list; ENVELOPE *env; if (zend_parse_parameters(ZEND_NUM_ARGS(), "SS", &str, &defaulthost) == FAILURE) { @@ -2100,28 +2229,14 @@ PHP_FUNCTION(imap_rfc822_parse_adrlist) array_init(return_value); - addresstmp = env->to; + address_list = env->to; - if (addresstmp) do { - object_init(&tovals); - if (addresstmp->mailbox) { - add_property_string(&tovals, "mailbox", addresstmp->mailbox); - } - if (addresstmp->host) { - add_property_string(&tovals, "host", addresstmp->host); - } - if (addresstmp->personal) { - add_property_string(&tovals, "personal", addresstmp->personal); - } - if (addresstmp->adl) { - add_property_string(&tovals, "adl", addresstmp->adl); - } - php_imap_list_add_object(return_value, &tovals); - } while ((addresstmp = addresstmp->next)); + if (address_list) { + php_imap_construct_list_of_addresses(return_value, address_list); + } mail_free_envelope(&env); } -/* }}} */ /* {{{ Convert a mime-encoded text to UTF-8 */ PHP_FUNCTION(imap_utf8) @@ -2671,24 +2786,48 @@ PHP_FUNCTION(imap_status) RETURN_THROWS(); } - object_init(return_value); - if (mail_status(imap_conn_struct->imap_stream, ZSTR_VAL(mbx), flags)) { - add_property_long(return_value, "flags", IMAPG(status_flags)); + object_init(return_value); + + zend_update_property_long( + Z_OBJCE_P(return_value), Z_OBJ_P(return_value), + "flags", strlen("flags"), + IMAPG(status_flags) + ); if (IMAPG(status_flags) & SA_MESSAGES) { - add_property_long(return_value, "messages", IMAPG(status_messages)); + zend_update_property_long( + Z_OBJCE_P(return_value), Z_OBJ_P(return_value), + "messages", strlen("messages"), + IMAPG(status_messages) + ); } if (IMAPG(status_flags) & SA_RECENT) { - add_property_long(return_value, "recent", IMAPG(status_recent)); + zend_update_property_long( + Z_OBJCE_P(return_value), Z_OBJ_P(return_value), + "recent", strlen("recent"), + IMAPG(status_recent) + ); } if (IMAPG(status_flags) & SA_UNSEEN) { - add_property_long(return_value, "unseen", IMAPG(status_unseen)); + zend_update_property_long( + Z_OBJCE_P(return_value), Z_OBJ_P(return_value), + "unseen", strlen("unseen"), + IMAPG(status_unseen) + ); } if (IMAPG(status_flags) & SA_UIDNEXT) { - add_property_long(return_value, "uidnext", IMAPG(status_uidnext)); + zend_update_property_long( + Z_OBJCE_P(return_value), Z_OBJ_P(return_value), + "uidnext", strlen("uidnext"), + IMAPG(status_uidnext) + ); } if (IMAPG(status_flags) & SA_UIDVALIDITY) { - add_property_long(return_value, "uidvalidity", IMAPG(status_uidvalidity)); + zend_update_property_long( + Z_OBJCE_P(return_value), Z_OBJ_P(return_value), + "uidvalidity", strlen("uidvalidity"), + IMAPG(status_uidvalidity) + ); } } else { RETURN_FALSE; @@ -2696,108 +2835,213 @@ PHP_FUNCTION(imap_status) } /* }}} */ -/* {{{ Read the structure of a specified body section of a specific message */ -PHP_FUNCTION(imap_bodystruct) +static void php_imap_populate_body_struct_object(zval *z_object, const BODY *body) { - zval *imap_conn_obj; - zend_long msgno; - zend_string *section; - php_imap_object *imap_conn_struct; - zval parametres, param, dparametres, dparam; - PARAMETER *par, *dpar; - BODY *body; - - if (zend_parse_parameters(ZEND_NUM_ARGS(), "OlS", &imap_conn_obj, php_imap_ce, &msgno, §ion) == FAILURE) { - RETURN_THROWS(); - } - - GET_IMAP_STREAM(imap_conn_struct, imap_conn_obj); - - PHP_IMAP_CHECK_MSGNO(msgno, 2); - - body=mail_body(imap_conn_struct->imap_stream, msgno, (unsigned char*)ZSTR_VAL(section)); - if (body == NULL) { - RETURN_FALSE; - } - - object_init(return_value); if (body->type <= TYPEMAX) { - add_property_long(return_value, "type", body->type); + zend_update_property_long( + Z_OBJCE_P(z_object), Z_OBJ_P(z_object), + "type", strlen("type"), + body->type + ); } + if (body->encoding <= ENCMAX) { - add_property_long(return_value, "encoding", body->encoding); + zend_update_property_long( + Z_OBJCE_P(z_object), Z_OBJ_P(z_object), + "encoding", strlen("encoding"), + body->encoding + ); } if (body->subtype) { - add_property_long(return_value, "ifsubtype", 1); - add_property_string(return_value, "subtype", body->subtype); + zend_update_property_long( + Z_OBJCE_P(z_object), Z_OBJ_P(z_object), + "ifsubtype", strlen("ifsubtype"), + 1 + ); + zend_update_property_string( + Z_OBJCE_P(z_object), Z_OBJ_P(z_object), + "subtype", strlen("subtype"), + body->subtype + ); } else { - add_property_long(return_value, "ifsubtype", 0); + zend_update_property_long( + Z_OBJCE_P(z_object), Z_OBJ_P(z_object), + "ifsubtype", strlen("ifsubtype"), + 0 + ); } if (body->description) { - add_property_long(return_value, "ifdescription", 1); - add_property_string(return_value, "description", body->description); + zend_update_property_long( + Z_OBJCE_P(z_object), Z_OBJ_P(z_object), + "ifdescription", strlen("ifdescription"), + 1 + ); + zend_update_property_string( + Z_OBJCE_P(z_object), Z_OBJ_P(z_object), + "description", strlen("description"), + body->description + ); } else { - add_property_long(return_value, "ifdescription", 0); + zend_update_property_long( + Z_OBJCE_P(z_object), Z_OBJ_P(z_object), + "ifdescription", strlen("ifdescription"), + 0 + ); } + if (body->id) { - add_property_long(return_value, "ifid", 1); - add_property_string(return_value, "id", body->id); + zend_update_property_long( + Z_OBJCE_P(z_object), Z_OBJ_P(z_object), + "ifid", strlen("ifid"), + 1 + ); + zend_update_property_string( + Z_OBJCE_P(z_object), Z_OBJ_P(z_object), + "id", strlen("id"), + body->id + ); } else { - add_property_long(return_value, "ifid", 0); + zend_update_property_long( + Z_OBJCE_P(z_object), Z_OBJ_P(z_object), + "ifid", strlen("ifid"), + 0 + ); } if (body->size.lines) { - add_property_long(return_value, "lines", body->size.lines); + zend_update_property_long( + Z_OBJCE_P(z_object), Z_OBJ_P(z_object), + "lines", strlen("lines"), + body->size.lines + ); } + if (body->size.bytes) { - add_property_long(return_value, "bytes", body->size.bytes); + zend_update_property_long( + Z_OBJCE_P(z_object), Z_OBJ_P(z_object), + "bytes", strlen("bytes"), + body->size.bytes + ); } + #ifdef IMAP41 if (body->disposition.type) { - add_property_long(return_value, "ifdisposition", 1); - add_property_string(return_value, "disposition", body->disposition.type); + zend_update_property_long( + Z_OBJCE_P(z_object), Z_OBJ_P(z_object), + "ifdisposition", strlen("ifdisposition"), + 1 + ); + zend_update_property_string( + Z_OBJCE_P(z_object), Z_OBJ_P(z_object), + "disposition", strlen("disposition"), + body->disposition.type + ); } else { - add_property_long(return_value, "ifdisposition", 0); + zend_update_property_long( + Z_OBJCE_P(z_object), Z_OBJ_P(z_object), + "ifdisposition", strlen("ifdisposition"), + 0 + ); } if (body->disposition.parameter) { - dpar = body->disposition.parameter; - add_property_long(return_value, "ifdparameters", 1); - array_init(&dparametres); + PARAMETER *disposition_parameter = body->disposition.parameter; + zval z_disposition_parameter_list; + + zend_update_property_long( + Z_OBJCE_P(z_object), Z_OBJ_P(z_object), + "ifdparameters", strlen("ifdparameters"), + 1 + ); + array_init(&z_disposition_parameter_list); do { - object_init(&dparam); - add_property_string(&dparam, "attribute", dpar->attribute); - add_property_string(&dparam, "value", dpar->value); - php_imap_list_add_object(&dparametres, &dparam); - } while ((dpar = dpar->next)); - php_imap_hash_add_object(return_value, "dparameters", &dparametres); + zval z_disposition_parameter; + object_init(&z_disposition_parameter); + zend_update_property_string( + Z_OBJCE_P(&z_disposition_parameter), Z_OBJ_P(&z_disposition_parameter), + "attribute", strlen("attribute"), + disposition_parameter->attribute + ); + zend_update_property_string( + Z_OBJCE_P(&z_disposition_parameter), Z_OBJ_P(&z_disposition_parameter), + "value", strlen("value"), + disposition_parameter->value + ); + php_imap_list_add_object(&z_disposition_parameter_list, &z_disposition_parameter); + } while ((disposition_parameter = disposition_parameter->next)); + php_imap_hash_add_object(z_object, "dparameters", &z_disposition_parameter_list); } else { - add_property_long(return_value, "ifdparameters", 0); + zend_update_property_long( + Z_OBJCE_P(z_object), Z_OBJ_P(z_object), + "ifdparameters", strlen("ifdparameters"), + 0 + ); } #endif - if ((par = body->parameter)) { - add_property_long(return_value, "ifparameters", 1); + PARAMETER *body_parameters = body->parameter; + zval z_body_parameter_list; - array_init(¶metres); - do { - object_init(¶m); - if (par->attribute) { - add_property_string(¶m, "attribute", par->attribute); - } - if (par->value) { - add_property_string(¶m, "value", par->value); - } + if (body_parameters) { + zend_update_property_long( + Z_OBJCE_P(z_object), Z_OBJ_P(z_object), + "ifparameters", strlen("ifparameters"), + 1 + ); - php_imap_list_add_object(¶metres, ¶m); - } while ((par = par->next)); + array_init(&z_body_parameter_list); + do { + zval z_body_parameter; + object_init(&z_body_parameter); + zend_update_property_string( + Z_OBJCE_P(&z_body_parameter), Z_OBJ_P(&z_body_parameter), + "attribute", strlen("attribute"), + body_parameters->attribute + ); + zend_update_property_string( + Z_OBJCE_P(&z_body_parameter), Z_OBJ_P(&z_body_parameter), + "value", strlen("value"), + body_parameters->value + ); + + php_imap_list_add_object(&z_body_parameter_list, &z_body_parameter); + } while ((body_parameters = body_parameters->next)); } else { - object_init(¶metres); - add_property_long(return_value, "ifparameters", 0); + object_init(&z_body_parameter_list); + zend_update_property_long( + Z_OBJCE_P(z_object), Z_OBJ_P(z_object), + "ifparameters", strlen("ifparameters"), + 0 + ); + } + php_imap_hash_add_object(z_object, "parameters", &z_body_parameter_list); +} + +/* {{{ Read the structure of a specified body section of a specific message */ +PHP_FUNCTION(imap_bodystruct) +{ + zval *imap_conn_obj; + zend_long msgno; + zend_string *section; + php_imap_object *imap_conn_struct; + + if (zend_parse_parameters(ZEND_NUM_ARGS(), "OlS", &imap_conn_obj, php_imap_ce, &msgno, §ion) == FAILURE) { + RETURN_THROWS(); } - php_imap_hash_add_object(return_value, "parameters", ¶metres); + + GET_IMAP_STREAM(imap_conn_struct, imap_conn_obj); + + PHP_IMAP_CHECK_MSGNO(msgno, 2); + + const BODY *body = mail_body(imap_conn_struct->imap_stream, msgno, (unsigned char*)ZSTR_VAL(section)); + if (body == NULL) { + RETURN_FALSE; + } + + object_init(return_value); + php_imap_populate_body_struct_object(return_value, body); } /* }}} */ @@ -2837,46 +3081,118 @@ PHP_FUNCTION(imap_fetch_overview) for (i = 1; i <= imap_conn_struct->imap_stream->nmsgs; i++) { if (((elt = mail_elt (imap_conn_struct->imap_stream, i))->sequence) && (env = mail_fetch_structure (imap_conn_struct->imap_stream, i, NIL, NIL))) { + + // TODO Use part _php_make_header_object function? object_init(&myoverview); if (env->subject) { - add_property_string(&myoverview, "subject", env->subject); + zend_update_property_string( + Z_OBJCE_P(&myoverview), Z_OBJ_P(&myoverview), + "subject", strlen("subject"), + env->subject + ); } if (env->from) { env->from->next=NULL; address =_php_rfc822_write_address(env->from); if (address) { - add_property_str(&myoverview, "from", address); + zend_update_property_str( + Z_OBJCE_P(&myoverview), Z_OBJ_P(&myoverview), + "from", strlen("from"), + address + ); + zend_string_release(address); } } if (env->to) { env->to->next = NULL; address = _php_rfc822_write_address(env->to); if (address) { - add_property_str(&myoverview, "to", address); + zend_update_property_str( + Z_OBJCE_P(&myoverview), Z_OBJ_P(&myoverview), + "to", strlen("to"), + address + ); + zend_string_release(address); } } if (env->date) { - add_property_string(&myoverview, "date", (char*)env->date); + zend_update_property_string( + Z_OBJCE_P(&myoverview), Z_OBJ_P(&myoverview), + "date", strlen("date"), + (char*)env->date + ); } if (env->message_id) { - add_property_string(&myoverview, "message_id", env->message_id); + zend_update_property_string( + Z_OBJCE_P(&myoverview), Z_OBJ_P(&myoverview), + "message_id", strlen("message_id"), + env->message_id + ); } if (env->references) { - add_property_string(&myoverview, "references", env->references); + zend_update_property_string( + Z_OBJCE_P(&myoverview), Z_OBJ_P(&myoverview), + "references", strlen("references"), + env->references + ); } if (env->in_reply_to) { - add_property_string(&myoverview, "in_reply_to", env->in_reply_to); + zend_update_property_string( + Z_OBJCE_P(&myoverview), Z_OBJ_P(&myoverview), + "in_reply_to", strlen("in_reply_to"), + env->in_reply_to + ); } - add_property_long(&myoverview, "size", elt->rfc822_size); - add_property_long(&myoverview, "uid", mail_uid(imap_conn_struct->imap_stream, i)); - add_property_long(&myoverview, "msgno", i); - add_property_long(&myoverview, "recent", elt->recent); - add_property_long(&myoverview, "flagged", elt->flagged); - add_property_long(&myoverview, "answered", elt->answered); - add_property_long(&myoverview, "deleted", elt->deleted); - add_property_long(&myoverview, "seen", elt->seen); - add_property_long(&myoverview, "draft", elt->draft); - add_property_long(&myoverview, "udate", mail_longdate(elt)); + zend_update_property_long( + Z_OBJCE_P(&myoverview), Z_OBJ_P(&myoverview), + "size", strlen("size"), + elt->rfc822_size + ); + zend_update_property_long( + Z_OBJCE_P(&myoverview), Z_OBJ_P(&myoverview), + "uid", strlen("uid"), + mail_uid(imap_conn_struct->imap_stream, i) + ); + zend_update_property_long( + Z_OBJCE_P(&myoverview), Z_OBJ_P(&myoverview), + "msgno", strlen("msgno"), + i + ); + zend_update_property_long( + Z_OBJCE_P(&myoverview), Z_OBJ_P(&myoverview), + "recent", strlen("recent"), + elt->recent + ); + zend_update_property_long( + Z_OBJCE_P(&myoverview), Z_OBJ_P(&myoverview), + "flagged", strlen("flagged"), + elt->flagged + ); + zend_update_property_long( + Z_OBJCE_P(&myoverview), Z_OBJ_P(&myoverview), + "answered", strlen("answered"), + elt->answered + ); + zend_update_property_long( + Z_OBJCE_P(&myoverview), Z_OBJ_P(&myoverview), + "deleted", strlen("deleted"), + elt->deleted + ); + zend_update_property_long( + Z_OBJCE_P(&myoverview), Z_OBJ_P(&myoverview), + "seen", strlen("seen"), + elt->seen + ); + zend_update_property_long( + Z_OBJCE_P(&myoverview), Z_OBJ_P(&myoverview), + "draft", strlen("draft"), + elt->draft + ); + zend_update_property_long( + Z_OBJCE_P(&myoverview), Z_OBJ_P(&myoverview), + "udate", strlen("udate"), + mail_longdate(elt) + ); php_imap_list_add_object(return_value, &myoverview); } } @@ -3707,6 +4023,21 @@ PHP_FUNCTION(imap_last_error) } /* }}} */ +static void php_imap_populate_mime_header_object(zval *z_object, const char *charset, const char *content) +{ + ZEND_ASSERT(z_object && "Object must be initialized"); + zend_update_property_string( + Z_OBJCE_P(z_object), Z_OBJ_P(z_object), + "charset", strlen("charset"), + charset + ); + zend_update_property_string( + Z_OBJCE_P(z_object), Z_OBJ_P(z_object), + "text", strlen("text"), + content + ); +} + /* {{{ Decode mime header element in accordance with RFC 2047 and return array of objects containing 'charset' encoding and decoded 'text' */ PHP_FUNCTION(imap_mime_header_decode) { @@ -3736,8 +4067,7 @@ PHP_FUNCTION(imap_mime_header_decode) memcpy(text, &string[offset], charset_token-offset); text[charset_token - offset] = 0x00; object_init(&myobject); - add_property_string(&myobject, "charset", "default"); - add_property_string(&myobject, "text", text); + php_imap_populate_mime_header_object(&myobject, "default", text); zend_hash_next_index_insert(Z_ARRVAL_P(return_value), &myobject); } if ((encoding_token = (zend_long)php_memnstr(&string[charset_token+2], "?", 1, string+end))) { /* Find token for encoding */ @@ -3762,8 +4092,7 @@ PHP_FUNCTION(imap_mime_header_decode) RETURN_FALSE; } object_init(&myobject); - add_property_string(&myobject, "charset", charset); - add_property_string(&myobject, "text", decode); + php_imap_populate_mime_header_object(&myobject, charset, decode); zend_hash_next_index_insert(Z_ARRVAL_P(return_value), &myobject); /* only free decode if it was allocated by rfc822_qprint or rfc822_base64 */ @@ -3791,8 +4120,7 @@ PHP_FUNCTION(imap_mime_header_decode) memcpy(text, &string[charset_token], end - charset_token); /* Extract unencoded text from string */ text[end - charset_token] = 0x00; object_init(&myobject); - add_property_string(&myobject, "charset", "default"); - add_property_string(&myobject, "text", text); + php_imap_populate_mime_header_object(&myobject, "default", text); zend_hash_next_index_insert(Z_ARRVAL_P(return_value), &myobject); offset = end; /* We have reached the end of the string. */ @@ -3909,25 +4237,16 @@ static zend_string* _php_rfc822_write_address(ADDRESS *addresslist) /* }}} */ #endif /* {{{ _php_imap_parse_address */ -static zend_string* _php_imap_parse_address (ADDRESS *addresslist, zval *paddress) +static zend_string* _php_imap_parse_address (ADDRESS *address_list, zval *paddress) { zend_string *fulladdress; ADDRESS *addresstmp; - zval tmpvals; - addresstmp = addresslist; + addresstmp = address_list; fulladdress = _php_rfc822_write_address(addresstmp); - addresstmp = addresslist; - do { - object_init(&tmpvals); - if (addresstmp->personal) add_property_string(&tmpvals, "personal", addresstmp->personal); - if (addresstmp->adl) add_property_string(&tmpvals, "adl", addresstmp->adl); - if (addresstmp->mailbox) add_property_string(&tmpvals, "mailbox", addresstmp->mailbox); - if (addresstmp->host) add_property_string(&tmpvals, "host", addresstmp->host); - php_imap_list_add_object(paddress, &tmpvals); - } while ((addresstmp = addresstmp->next)); + php_imap_construct_list_of_addresses(paddress, address_list); return fulladdress; } /* }}} */ @@ -3940,22 +4259,83 @@ static void _php_make_header_object(zval *myzvalue, ENVELOPE *en) object_init(myzvalue); - if (en->remail) add_property_string(myzvalue, "remail", en->remail); - if (en->date) add_property_string(myzvalue, "date", (char*)en->date); - if (en->date) add_property_string(myzvalue, "Date", (char*)en->date); - if (en->subject) add_property_string(myzvalue, "subject", en->subject); - if (en->subject) add_property_string(myzvalue, "Subject", en->subject); - if (en->in_reply_to) add_property_string(myzvalue, "in_reply_to", en->in_reply_to); - if (en->message_id) add_property_string(myzvalue, "message_id", en->message_id); - if (en->newsgroups) add_property_string(myzvalue, "newsgroups", en->newsgroups); - if (en->followup_to) add_property_string(myzvalue, "followup_to", en->followup_to); - if (en->references) add_property_string(myzvalue, "references", en->references); + if (en->remail) { + zend_update_property_string( + Z_OBJCE_P(myzvalue), Z_OBJ_P(myzvalue), + "remail", strlen("remail"), + en->remail + ); + } + if (en->date) { + zend_update_property_string( + Z_OBJCE_P(myzvalue), Z_OBJ_P(myzvalue), + "date", strlen("date"), + (char*)en->date + ); + zend_update_property_string( + Z_OBJCE_P(myzvalue), Z_OBJ_P(myzvalue), + "Date", strlen("Date"), + (char*)en->date + ); + } + if (en->subject) { + zend_update_property_string( + Z_OBJCE_P(myzvalue), Z_OBJ_P(myzvalue), + "subject", strlen("subject"), + en->subject + ); + zend_update_property_string( + Z_OBJCE_P(myzvalue), Z_OBJ_P(myzvalue), + "Subject", strlen("Subject"), + en->subject + ); + } + if (en->in_reply_to) { + zend_update_property_string( + Z_OBJCE_P(myzvalue), Z_OBJ_P(myzvalue), + "in_reply_to", strlen("in_reply_to"), + en->in_reply_to + ); + } + if (en->message_id) { + zend_update_property_string( + Z_OBJCE_P(myzvalue), Z_OBJ_P(myzvalue), + "message_id", strlen("message_id"), + en->message_id + ); + } + if (en->newsgroups) { + zend_update_property_string( + Z_OBJCE_P(myzvalue), Z_OBJ_P(myzvalue), + "newsgroups", strlen("newsgroups"), + en->newsgroups + ); + } + if (en->followup_to) { + zend_update_property_string( + Z_OBJCE_P(myzvalue), Z_OBJ_P(myzvalue), + "followup_to", strlen("followup_to"), + en->followup_to + ); + } + if (en->references) { + zend_update_property_string( + Z_OBJCE_P(myzvalue), Z_OBJ_P(myzvalue), + "references", strlen("references"), + en->references + ); + } if (en->to) { array_init(&paddress); fulladdress = _php_imap_parse_address(en->to, &paddress); if (fulladdress) { - add_property_str(myzvalue, "toaddress", fulladdress); + zend_update_property_str( + Z_OBJCE_P(myzvalue), Z_OBJ_P(myzvalue), + "toaddress", strlen("toaddress"), + fulladdress + ); + zend_string_release(fulladdress); } php_imap_hash_add_object(myzvalue, "to", &paddress); } @@ -3964,7 +4344,12 @@ static void _php_make_header_object(zval *myzvalue, ENVELOPE *en) array_init(&paddress); fulladdress = _php_imap_parse_address(en->from, &paddress); if (fulladdress) { - add_property_str(myzvalue, "fromaddress", fulladdress); + zend_update_property_str( + Z_OBJCE_P(myzvalue), Z_OBJ_P(myzvalue), + "fromaddress", strlen("fromaddress"), + fulladdress + ); + zend_string_release(fulladdress); } php_imap_hash_add_object(myzvalue, "from", &paddress); } @@ -3973,7 +4358,12 @@ static void _php_make_header_object(zval *myzvalue, ENVELOPE *en) array_init(&paddress); fulladdress = _php_imap_parse_address(en->cc, &paddress); if (fulladdress) { - add_property_str(myzvalue, "ccaddress", fulladdress); + zend_update_property_str( + Z_OBJCE_P(myzvalue), Z_OBJ_P(myzvalue), + "ccaddress", strlen("ccaddress"), + fulladdress + ); + zend_string_release(fulladdress); } php_imap_hash_add_object(myzvalue, "cc", &paddress); } @@ -3982,7 +4372,12 @@ static void _php_make_header_object(zval *myzvalue, ENVELOPE *en) array_init(&paddress); fulladdress = _php_imap_parse_address(en->bcc, &paddress); if (fulladdress) { - add_property_str(myzvalue, "bccaddress", fulladdress); + zend_update_property_str( + Z_OBJCE_P(myzvalue), Z_OBJ_P(myzvalue), + "bccaddress", strlen("bccaddress"), + fulladdress + ); + zend_string_release(fulladdress); } php_imap_hash_add_object(myzvalue, "bcc", &paddress); } @@ -3991,7 +4386,12 @@ static void _php_make_header_object(zval *myzvalue, ENVELOPE *en) array_init(&paddress); fulladdress = _php_imap_parse_address(en->reply_to, &paddress); if (fulladdress) { - add_property_str(myzvalue, "reply_toaddress", fulladdress); + zend_update_property_str( + Z_OBJCE_P(myzvalue), Z_OBJ_P(myzvalue), + "reply_toaddress", strlen("reply_toaddress"), + fulladdress + ); + zend_string_release(fulladdress); } php_imap_hash_add_object(myzvalue, "reply_to", &paddress); } @@ -4000,7 +4400,12 @@ static void _php_make_header_object(zval *myzvalue, ENVELOPE *en) array_init(&paddress); fulladdress = _php_imap_parse_address(en->sender, &paddress); if (fulladdress) { - add_property_str(myzvalue, "senderaddress", fulladdress); + zend_update_property_str( + Z_OBJCE_P(myzvalue), Z_OBJ_P(myzvalue), + "senderaddress", strlen("senderaddress"), + fulladdress + ); + zend_string_release(fulladdress); } php_imap_hash_add_object(myzvalue, "sender", &paddress); } @@ -4009,7 +4414,12 @@ static void _php_make_header_object(zval *myzvalue, ENVELOPE *en) array_init(&paddress); fulladdress = _php_imap_parse_address(en->return_path, &paddress); if (fulladdress) { - add_property_str(myzvalue, "return_pathaddress", fulladdress); + zend_update_property_str( + Z_OBJCE_P(myzvalue), Z_OBJ_P(myzvalue), + "return_pathaddress", strlen("return_pathaddress"), + fulladdress + ); + zend_string_release(fulladdress); } php_imap_hash_add_object(myzvalue, "return_path", &paddress); // From rebase might need? @@ -4019,113 +4429,34 @@ static void _php_make_header_object(zval *myzvalue, ENVELOPE *en) /* }}} */ /* {{{ _php_imap_add_body */ -void _php_imap_add_body(zval *arg, BODY *body) +void _php_imap_add_body(zval *arg, const BODY *body) { - zval parametres, param, dparametres, dparam; - PARAMETER *par, *dpar; - PART *part; - - if (body->type <= TYPEMAX) { - add_property_long(arg, "type", body->type); - } - - if (body->encoding <= ENCMAX) { - add_property_long(arg, "encoding", body->encoding); - } - - if (body->subtype) { - add_property_long(arg, "ifsubtype", 1); - add_property_string(arg, "subtype", body->subtype); - } else { - add_property_long(arg, "ifsubtype", 0); - } - - if (body->description) { - add_property_long(arg, "ifdescription", 1); - add_property_string(arg, "description", body->description); - } else { - add_property_long(arg, "ifdescription", 0); - } - - if (body->id) { - add_property_long(arg, "ifid", 1); - add_property_string(arg, "id", body->id); - } else { - add_property_long(arg, "ifid", 0); - } - - if (body->size.lines) { - add_property_long(arg, "lines", body->size.lines); - } - - if (body->size.bytes) { - add_property_long(arg, "bytes", body->size.bytes); - } - -#ifdef IMAP41 - if (body->disposition.type) { - add_property_long(arg, "ifdisposition", 1); - add_property_string(arg, "disposition", body->disposition.type); - } else { - add_property_long(arg, "ifdisposition", 0); - } - - if (body->disposition.parameter) { - dpar = body->disposition.parameter; - add_property_long(arg, "ifdparameters", 1); - array_init(&dparametres); - do { - object_init(&dparam); - add_property_string(&dparam, "attribute", dpar->attribute); - add_property_string(&dparam, "value", dpar->value); - php_imap_list_add_object(&dparametres, &dparam); - } while ((dpar = dpar->next)); - php_imap_hash_add_object(arg, "dparameters", &dparametres); - } else { - add_property_long(arg, "ifdparameters", 0); - } -#endif - - if ((par = body->parameter)) { - add_property_long(arg, "ifparameters", 1); - - array_init(¶metres); - do { - object_init(¶m); - if (par->attribute) { - add_property_string(¶m, "attribute", par->attribute); - } - if (par->value) { - add_property_string(¶m, "value", par->value); - } - - php_imap_list_add_object(¶metres, ¶m); - } while ((par = par->next)); - } else { - object_init(¶metres); - add_property_long(arg, "ifparameters", 0); - } - php_imap_hash_add_object(arg, "parameters", ¶metres); + php_imap_populate_body_struct_object(arg, body); /* multipart message ? */ if (body->type == TYPEMULTIPART) { - array_init(¶metres); - for (part = body->CONTENT_PART; part; part = part->next) { - object_init(¶m); - _php_imap_add_body(¶m, &part->body); - php_imap_list_add_object(¶metres, ¶m); + zval z_content_part_list; + + array_init(&z_content_part_list); + for (const PART *content_part = body->CONTENT_PART; content_part; content_part = content_part->next) { + zval z_content_part; + object_init(&z_content_part); + _php_imap_add_body(&z_content_part, &content_part->body); + php_imap_list_add_object(&z_content_part_list, &z_content_part); } - php_imap_hash_add_object(arg, "parts", ¶metres); + php_imap_hash_add_object(arg, "parts", &z_content_part_list); } /* encapsulated message ? */ if ((body->type == TYPEMESSAGE) && (!strcasecmp(body->subtype, "rfc822"))) { - body = body->CONTENT_MSG_BODY; - array_init(¶metres); - object_init(¶m); - _php_imap_add_body(¶m, body); - php_imap_list_add_object(¶metres, ¶m); - php_imap_hash_add_object(arg, "parts", ¶metres); + zval message_list, message; + + const BODY *message_body = body->CONTENT_MSG_BODY; + array_init(&message_list); + object_init(&message); + _php_imap_add_body(&message, message_body); + php_imap_list_add_object(&message_list, &message); + php_imap_hash_add_object(arg, "parts", &message_list); } } /* }}} */ diff --git a/ext/intl/calendar/calendar.stub.php b/ext/intl/calendar/calendar.stub.php index 97807b139e0d4..748d5eaf25285 100644 --- a/ext/intl/calendar/calendar.stub.php +++ b/ext/intl/calendar/calendar.stub.php @@ -244,7 +244,7 @@ public function after(IntlCalendar $other): bool {} public function before(IntlCalendar $other): bool {} /** - * @return bool + * @return true * @alias intlcal_clear */ public function clear(?int $field = null) {} // TODO make return type void @@ -341,7 +341,7 @@ public function getMaximum(int $field): int|false {} public function getMinimalDaysInFirstWeek(): int|false {} /** - * @return bool + * @return true * @alias intlcal_set_minimal_days_in_first_week */ public function setMinimalDaysInFirstWeek(int $days) {} // TODO make return void @@ -432,31 +432,31 @@ public function roll(int $field, $value): bool {} public function isSet(int $field): bool {} /** - * @return bool + * @return true * @alias intlcal_set */ public function set(int $year, int $month, int $dayOfMonth = UNKNOWN, int $hour = UNKNOWN, int $minute = UNKNOWN, int $second = UNKNOWN) {} // TODO make return type void /** - * @return bool + * @return true * @alias intlcal_set_first_day_of_week */ public function setFirstDayOfWeek(int $dayOfWeek) {} // TODO make return type void /** - * @return bool + * @return true * @alias intlcal_set_lenient */ public function setLenient(bool $lenient) {} // TODO make return type void /** - * @return bool + * @return true * @alias intlcal_set_repeated_wall_time_option */ public function setRepeatedWallTimeOption(int $option) {} // TODO make return type void /** - * @return bool + * @return true * @alias intlcal_set_skipped_wall_time_option */ public function setSkippedWallTimeOption(int $option) {} // TODO make return type void diff --git a/ext/intl/calendar/calendar_arginfo.h b/ext/intl/calendar/calendar_arginfo.h index 6173d1283806b..b67149915b9d3 100644 --- a/ext/intl/calendar/calendar_arginfo.h +++ b/ext/intl/calendar/calendar_arginfo.h @@ -1,5 +1,5 @@ /* This is a generated file, edit the .stub.php file instead. - * Stub hash: ef1b6e5e8ff6890ccb176c08c22499048afdfdb5 */ + * Stub hash: c1d451a668ccab343208ab5cc30ab8457d6802b9 */ ZEND_BEGIN_ARG_INFO_EX(arginfo_class_IntlCalendar___construct, 0, 0, 0) ZEND_END_ARG_INFO() diff --git a/ext/intl/php_intl.stub.php b/ext/intl/php_intl.stub.php index 8980a807919b9..eab42fcc0ff57 100644 --- a/ext/intl/php_intl.stub.php +++ b/ext/intl/php_intl.stub.php @@ -201,12 +201,12 @@ function intlcal_after(IntlCalendar $calendar, IntlCalendar $other): bool {} function intlcal_before(IntlCalendar $calendar, IntlCalendar $other): bool {} -function intlcal_set(IntlCalendar $calendar, int $year, int $month, int $dayOfMonth = UNKNOWN, int $hour = UNKNOWN, int $minute = UNKNOWN, int $second = UNKNOWN): bool {} +function intlcal_set(IntlCalendar $calendar, int $year, int $month, int $dayOfMonth = UNKNOWN, int $hour = UNKNOWN, int $minute = UNKNOWN, int $second = UNKNOWN): true {} /** @param int|bool $value */ function intlcal_roll(IntlCalendar $calendar, int $field, $value): bool {} -function intlcal_clear(IntlCalendar $calendar, ?int $field = null): bool {} +function intlcal_clear(IntlCalendar $calendar, ?int $field = null): true {} function intlcal_field_difference(IntlCalendar $calendar, float $timestamp, int $field): int|false {} @@ -228,7 +228,7 @@ function intlcal_get_maximum(IntlCalendar $calendar, int $field): int|false {} function intlcal_get_minimal_days_in_first_week(IntlCalendar $calendar): int|false {} -function intlcal_set_minimal_days_in_first_week(IntlCalendar $calendar, int $days): bool {} +function intlcal_set_minimal_days_in_first_week(IntlCalendar $calendar, int $days): true {} function intlcal_get_minimum(IntlCalendar $calendar, int $field): int|false {} @@ -248,9 +248,9 @@ function intlcal_is_equivalent_to(IntlCalendar $calendar, IntlCalendar $other): function intlcal_is_weekend(IntlCalendar $calendar, ?float $timestamp = null): bool {} -function intlcal_set_first_day_of_week(IntlCalendar $calendar, int $dayOfWeek): bool {} +function intlcal_set_first_day_of_week(IntlCalendar $calendar, int $dayOfWeek): true {} -function intlcal_set_lenient(IntlCalendar $calendar, bool $lenient): bool {} +function intlcal_set_lenient(IntlCalendar $calendar, bool $lenient): true {} function intlcal_get_repeated_wall_time_option(IntlCalendar $calendar): int {} @@ -258,9 +258,9 @@ function intlcal_equals(IntlCalendar $calendar, IntlCalendar $other): bool {} function intlcal_get_skipped_wall_time_option(IntlCalendar $calendar): int {} -function intlcal_set_repeated_wall_time_option(IntlCalendar $calendar, int $option): bool {} +function intlcal_set_repeated_wall_time_option(IntlCalendar $calendar, int $option): true {} -function intlcal_set_skipped_wall_time_option(IntlCalendar $calendar, int $option): bool {} +function intlcal_set_skipped_wall_time_option(IntlCalendar $calendar, int $option): true {} function intlcal_from_date_time(DateTime|string $datetime, ?string $locale = null): ?IntlCalendar {} diff --git a/ext/intl/php_intl_arginfo.h b/ext/intl/php_intl_arginfo.h index 8f2903315a386..c05ecb7b24973 100644 --- a/ext/intl/php_intl_arginfo.h +++ b/ext/intl/php_intl_arginfo.h @@ -1,5 +1,5 @@ /* This is a generated file, edit the .stub.php file instead. - * Stub hash: 136c14d9162548cd7211985ce9a5d767a90a0b99 */ + * Stub hash: c32e74bddb55455f69083a302bcaf52f654b1293 */ ZEND_BEGIN_ARG_WITH_RETURN_OBJ_INFO_EX(arginfo_intlcal_create_instance, 0, 0, IntlCalendar, 1) ZEND_ARG_INFO_WITH_DEFAULT_VALUE(0, timezone, "null") @@ -50,7 +50,7 @@ ZEND_END_ARG_INFO() #define arginfo_intlcal_before arginfo_intlcal_after -ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_intlcal_set, 0, 3, _IS_BOOL, 0) +ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_intlcal_set, 0, 3, IS_TRUE, 0) ZEND_ARG_OBJ_INFO(0, calendar, IntlCalendar, 0) ZEND_ARG_TYPE_INFO(0, year, IS_LONG, 0) ZEND_ARG_TYPE_INFO(0, month, IS_LONG, 0) @@ -66,7 +66,7 @@ ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_intlcal_roll, 0, 3, _IS_BOOL, 0) ZEND_ARG_INFO(0, value) ZEND_END_ARG_INFO() -ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_intlcal_clear, 0, 1, _IS_BOOL, 0) +ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_intlcal_clear, 0, 1, IS_TRUE, 0) ZEND_ARG_OBJ_INFO(0, calendar, IntlCalendar, 0) ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, field, IS_LONG, 1, "null") ZEND_END_ARG_INFO() @@ -103,7 +103,7 @@ ZEND_END_ARG_INFO() #define arginfo_intlcal_get_minimal_days_in_first_week arginfo_intlcal_get_first_day_of_week -ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_intlcal_set_minimal_days_in_first_week, 0, 2, _IS_BOOL, 0) +ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_intlcal_set_minimal_days_in_first_week, 0, 2, IS_TRUE, 0) ZEND_ARG_OBJ_INFO(0, calendar, IntlCalendar, 0) ZEND_ARG_TYPE_INFO(0, days, IS_LONG, 0) ZEND_END_ARG_INFO() @@ -138,12 +138,12 @@ ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_intlcal_is_weekend, 0, 1, _IS_BO ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, timestamp, IS_DOUBLE, 1, "null") ZEND_END_ARG_INFO() -ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_intlcal_set_first_day_of_week, 0, 2, _IS_BOOL, 0) +ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_intlcal_set_first_day_of_week, 0, 2, IS_TRUE, 0) ZEND_ARG_OBJ_INFO(0, calendar, IntlCalendar, 0) ZEND_ARG_TYPE_INFO(0, dayOfWeek, IS_LONG, 0) ZEND_END_ARG_INFO() -ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_intlcal_set_lenient, 0, 2, _IS_BOOL, 0) +ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_intlcal_set_lenient, 0, 2, IS_TRUE, 0) ZEND_ARG_OBJ_INFO(0, calendar, IntlCalendar, 0) ZEND_ARG_TYPE_INFO(0, lenient, _IS_BOOL, 0) ZEND_END_ARG_INFO() @@ -156,7 +156,7 @@ ZEND_END_ARG_INFO() #define arginfo_intlcal_get_skipped_wall_time_option arginfo_intlcal_get_repeated_wall_time_option -ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_intlcal_set_repeated_wall_time_option, 0, 2, _IS_BOOL, 0) +ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_intlcal_set_repeated_wall_time_option, 0, 2, IS_TRUE, 0) ZEND_ARG_OBJ_INFO(0, calendar, IntlCalendar, 0) ZEND_ARG_TYPE_INFO(0, option, IS_LONG, 0) ZEND_END_ARG_INFO() diff --git a/ext/json/json.c b/ext/json/json.c index 2de78e3dabc2e..d102edebb23a4 100644 --- a/ext/json/json.c +++ b/ext/json/json.c @@ -103,6 +103,21 @@ static PHP_MINFO_FUNCTION(json) } /* }}} */ +PHP_JSON_API zend_string *php_json_encode_string(const char *s, size_t len, int options) +{ + smart_str buf = {0}; + php_json_encoder encoder; + + php_json_encode_init(&encoder); + + if (php_json_escape_string(&buf, s, len, options, &encoder) == FAILURE) { + smart_str_free(&buf); + return NULL; + } + + return smart_str_extract(&buf); +} + PHP_JSON_API zend_result php_json_encode_ex(smart_str *buf, zval *val, int options, zend_long depth) /* {{{ */ { php_json_encoder encoder; diff --git a/ext/json/json_encoder.c b/ext/json/json_encoder.c index adb53598326bd..14fd86d73426c 100644 --- a/ext/json/json_encoder.c +++ b/ext/json/json_encoder.c @@ -31,10 +31,6 @@ static const char digits[] = "0123456789abcdef"; -static zend_result php_json_escape_string( - smart_str *buf, const char *s, size_t len, - int options, php_json_encoder *encoder); - static int php_json_determine_array_type(zval *val) /* {{{ */ { zend_array *myht = Z_ARRVAL_P(val); @@ -319,7 +315,7 @@ static zend_result php_json_encode_array(smart_str *buf, zval *val, int options, } /* }}} */ -static zend_result php_json_escape_string( +zend_result php_json_escape_string( smart_str *buf, const char *s, size_t len, int options, php_json_encoder *encoder) /* {{{ */ { diff --git a/ext/json/php_json.h b/ext/json/php_json.h index 3c47e44875cc7..00c87eca53c9e 100644 --- a/ext/json/php_json.h +++ b/ext/json/php_json.h @@ -99,6 +99,8 @@ PHP_JSON_API ZEND_EXTERN_MODULE_GLOBALS(json) ZEND_TSRMLS_CACHE_EXTERN() #endif +PHP_JSON_API zend_string *php_json_encode_string(const char *s, size_t len, int options); + PHP_JSON_API zend_result php_json_encode_ex(smart_str *buf, zval *val, int options, zend_long depth); PHP_JSON_API zend_result php_json_encode(smart_str *buf, zval *val, int options); PHP_JSON_API zend_result php_json_decode_ex(zval *return_value, const char *str, size_t str_len, zend_long options, zend_long depth); diff --git a/ext/json/php_json_encoder.h b/ext/json/php_json_encoder.h index e1c48e2b50922..d66d71cb5538c 100644 --- a/ext/json/php_json_encoder.h +++ b/ext/json/php_json_encoder.h @@ -35,4 +35,6 @@ static inline void php_json_encode_init(php_json_encoder *encoder) zend_result php_json_encode_zval(smart_str *buf, zval *val, int options, php_json_encoder *encoder); +zend_result php_json_escape_string(smart_str *buf, const char *s, size_t len, int options, php_json_encoder *encoder); + #endif /* PHP_JSON_ENCODER_H */ diff --git a/ext/ldap/ldap.c b/ext/ldap/ldap.c index 715bde8bd13e3..8a60df4edf7b6 100644 --- a/ext/ldap/ldap.c +++ b/ext/ldap/ldap.c @@ -412,7 +412,7 @@ static int _php_ldap_control_from_array(LDAP *ld, LDAPControl** ctrl, zval* arra struct berval control_value = { 0L, NULL }; int control_value_alloc = 0; - if ((val = zend_hash_str_find(Z_ARRVAL_P(array), "value", sizeof("value") - 1)) != NULL) { + if ((val = zend_hash_find(Z_ARRVAL_P(array), ZSTR_KNOWN(ZEND_STR_VALUE))) != NULL) { if (Z_TYPE_P(val) != IS_ARRAY) { tmpstring = zval_get_string(val); if (EG(exception)) { @@ -634,7 +634,8 @@ static int _php_ldap_control_from_array(LDAP *ld, LDAPControl** ctrl, zval* arra } else if ((tmp = zend_hash_str_find(Z_ARRVAL_P(val), "offset", sizeof("offset") - 1)) != NULL) { vlvInfo.ldvlv_attrvalue = NULL; vlvInfo.ldvlv_offset = zval_get_long(tmp); - if ((tmp = zend_hash_str_find(Z_ARRVAL_P(val), "count", sizeof("count") - 1)) != NULL) { + /* Find "count" key */ + if ((tmp = zend_hash_find(Z_ARRVAL_P(val), ZSTR_KNOWN(ZEND_STR_COUNT))) != NULL) { vlvInfo.ldvlv_count = zval_get_long(tmp); } else { rc = -1; diff --git a/ext/libxml/libxml.c b/ext/libxml/libxml.c index 634195cb57624..d12c215367c3c 100644 --- a/ext/libxml/libxml.c +++ b/ext/libxml/libxml.c @@ -113,7 +113,7 @@ static int php_libxml_clear_object(php_libxml_node_object *object) return php_libxml_decrement_doc_ref(object); } -static int php_libxml_unregister_node(xmlNodePtr nodep) +static void php_libxml_unregister_node(xmlNodePtr nodep) { php_libxml_node_object *wrapper; @@ -130,8 +130,6 @@ static int php_libxml_unregister_node(xmlNodePtr nodep) nodeptr->node = NULL; } } - - return -1; } static void php_libxml_node_free(xmlNodePtr node) @@ -209,9 +207,7 @@ PHP_LIBXML_API void php_libxml_node_free_list(xmlNodePtr node) curnode = node->next; xmlUnlinkNode(node); - if (php_libxml_unregister_node(node) == 0) { - node->doc = NULL; - } + php_libxml_unregister_node(node); php_libxml_node_free(node); } } @@ -514,6 +510,8 @@ static void php_libxml_ctx_error_level(int level, void *ctx, const char *msg) } else { php_error_docref(NULL, level, "%s in Entity, line: %d", msg, parser->input->line); } + } else { + php_error_docref(NULL, E_WARNING, "%s", msg); } } @@ -1161,8 +1159,14 @@ PHP_LIBXML_API int php_libxml_increment_node_ptr(php_libxml_node_object *object, object->node->_private = private_data; } } else { + if (UNEXPECTED(node->type == XML_DOCUMENT_NODE || node->type == XML_HTML_DOCUMENT_NODE)) { + php_libxml_doc_ptr *doc_ptr = emalloc(sizeof(php_libxml_doc_ptr)); + doc_ptr->cache_tag.modification_nr = 1; /* iterators start at 0, such that they will start in an uninitialised state */ + object->node = (php_libxml_node_ptr *) doc_ptr; /* downcast */ + } else { + object->node = emalloc(sizeof(php_libxml_node_ptr)); + } ret_refcount = 1; - object->node = emalloc(sizeof(php_libxml_node_ptr)); object->node->node = node; object->node->refcount = 1; object->node->_private = private_data; @@ -1262,9 +1266,7 @@ PHP_LIBXML_API void php_libxml_node_free_resource(xmlNodePtr node) default: php_libxml_node_free_list((xmlNodePtr) node->properties); } - if (php_libxml_unregister_node(node) == 0) { - node->doc = NULL; - } + php_libxml_unregister_node(node); php_libxml_node_free(node); } else { php_libxml_unregister_node(node); diff --git a/ext/libxml/php_libxml.h b/ext/libxml/php_libxml.h index ff8a634e0cf9b..a23ff6ee57c13 100644 --- a/ext/libxml/php_libxml.h +++ b/ext/libxml/php_libxml.h @@ -47,14 +47,14 @@ ZEND_BEGIN_MODULE_GLOBALS(libxml) ZEND_END_MODULE_GLOBALS(libxml) typedef struct _libxml_doc_props { - int formatoutput; - int validateonparse; - int resolveexternals; - int preservewhitespace; - int substituteentities; - int stricterror; - int recover; HashTable *classmap; + bool formatoutput; + bool validateonparse; + bool resolveexternals; + bool preservewhitespace; + bool substituteentities; + bool stricterror; + bool recover; } libxml_doc_props; typedef struct _php_libxml_ref_obj { @@ -69,6 +69,16 @@ typedef struct _php_libxml_node_ptr { void *_private; } php_libxml_node_ptr; +typedef struct { + size_t modification_nr; +} php_libxml_cache_tag; + +/* extends php_libxml_node_ptr */ +typedef struct { + php_libxml_node_ptr node_ptr; + php_libxml_cache_tag cache_tag; +} php_libxml_doc_ptr; + typedef struct _php_libxml_node_object { php_libxml_node_ptr *node; php_libxml_ref_obj *document; @@ -81,6 +91,27 @@ static inline php_libxml_node_object *php_libxml_node_fetch_object(zend_object * return (php_libxml_node_object *)((char*)(obj) - obj->handlers->offset); } +static zend_always_inline void php_libxml_invalidate_node_list_cache(php_libxml_doc_ptr *doc_ptr) +{ +#if SIZEOF_SIZE_T == 8 + /* If one operation happens every nanosecond, then it would still require 584 years to overflow + * the counter. So we'll just assume this never happens. */ + doc_ptr->cache_tag.modification_nr++; +#else + size_t new_modification_nr = doc_ptr->cache_tag.modification_nr + 1; + if (EXPECTED(new_modification_nr > 0)) { /* unsigned overflow; checking after addition results in one less instruction */ + doc_ptr->cache_tag.modification_nr = new_modification_nr; + } +#endif +} + +static zend_always_inline void php_libxml_invalidate_node_list_cache_from_doc(xmlDocPtr docp) +{ + if (docp && docp->_private) { /* docp is NULL for detached nodes */ + php_libxml_invalidate_node_list_cache(docp->_private); + } +} + #define Z_LIBXML_NODE_P(zv) php_libxml_node_fetch_object(Z_OBJ_P((zv))) typedef void * (*php_libxml_export_node) (zval *object); diff --git a/ext/libxml/tests/bug61367-read_2.phpt b/ext/libxml/tests/bug61367-read_2.phpt index 92f1829a44dcf..bbcf696773bf0 100644 --- a/ext/libxml/tests/bug61367-read_2.phpt +++ b/ext/libxml/tests/bug61367-read_2.phpt @@ -58,6 +58,6 @@ bool(true) int(4) bool(true) -Warning: DOMDocument::loadXML(): I/O warning : failed to load external entity "file:///%s/test_bug_61367-read/bad" in %s on line %d +Warning: DOMDocument::loadXML(): %Sfailed to load external entity "file:///%s/test_bug_61367-read/bad" in %s on line %d Warning: Attempt to read property "nodeValue" on null in %s on line %d diff --git a/ext/libxml/tests/libxml_disable_entity_loader_2.phpt b/ext/libxml/tests/libxml_disable_entity_loader_2.phpt index ad253171625f9..182fe13cfda96 100644 --- a/ext/libxml/tests/libxml_disable_entity_loader_2.phpt +++ b/ext/libxml/tests/libxml_disable_entity_loader_2.phpt @@ -39,6 +39,6 @@ bool(true) Deprecated: Function libxml_disable_entity_loader() is deprecated in %s on line %d bool(false) -Warning: DOMDocument::loadXML(): I/O warning : failed to load external entity "%s" in %s on line %d +Warning: DOMDocument::loadXML(): %Sfailed to load external entity "%s" in %s on line %d bool(true) Done diff --git a/ext/libxml/tests/libxml_set_external_entity_loader_variation2.phpt b/ext/libxml/tests/libxml_set_external_entity_loader_variation2.phpt index 9ebf2c0e9d32a..5657b727bacd2 100644 --- a/ext/libxml/tests/libxml_set_external_entity_loader_variation2.phpt +++ b/ext/libxml/tests/libxml_set_external_entity_loader_variation2.phpt @@ -39,6 +39,8 @@ echo "Done.\n"; string(10) "-//FOO/BAR" string(%d) "%sfoobar.dtd" +Warning: DOMDocument::validate(): Failed to load external entity "-//FOO/BAR" in %s on line %d + Warning: DOMDocument::validate(): Could not load the external subset "foobar.dtd" in %s on line %d bool(false) bool(true) diff --git a/ext/mbstring/config.m4 b/ext/mbstring/config.m4 index db2298661920f..2a3da1ce88a3b 100644 --- a/ext/mbstring/config.m4 +++ b/ext/mbstring/config.m4 @@ -95,30 +95,12 @@ AC_DEFUN([PHP_MBSTRING_SETUP_LIBMBFL], [ libmbfl/filters/html_entities.c libmbfl/filters/mbfilter_7bit.c libmbfl/filters/mbfilter_base64.c - libmbfl/filters/mbfilter_big5.c - libmbfl/filters/mbfilter_cp5022x.c - libmbfl/filters/mbfilter_cp51932.c - libmbfl/filters/mbfilter_cp932.c - libmbfl/filters/mbfilter_cp936.c - libmbfl/filters/mbfilter_gb18030.c - libmbfl/filters/mbfilter_euc_cn.c - libmbfl/filters/mbfilter_euc_jp.c - libmbfl/filters/mbfilter_euc_jp_win.c - libmbfl/filters/mbfilter_euc_kr.c - libmbfl/filters/mbfilter_euc_tw.c + libmbfl/filters/mbfilter_cjk.c libmbfl/filters/mbfilter_htmlent.c - libmbfl/filters/mbfilter_hz.c - libmbfl/filters/mbfilter_iso2022_jp_ms.c - libmbfl/filters/mbfilter_iso2022jp_mobile.c - libmbfl/filters/mbfilter_iso2022_kr.c - libmbfl/filters/mbfilter_jis.c libmbfl/filters/mbfilter_qprint.c libmbfl/filters/mbfilter_singlebyte.c - libmbfl/filters/mbfilter_sjis.c - libmbfl/filters/mbfilter_sjis_2004.c libmbfl/filters/mbfilter_ucs2.c libmbfl/filters/mbfilter_ucs4.c - libmbfl/filters/mbfilter_uhc.c libmbfl/filters/mbfilter_utf16.c libmbfl/filters/mbfilter_utf32.c libmbfl/filters/mbfilter_utf7.c diff --git a/ext/mbstring/config.w32 b/ext/mbstring/config.w32 index 5ba672434356e..780fe47defd9a 100644 --- a/ext/mbstring/config.w32 +++ b/ext/mbstring/config.w32 @@ -17,17 +17,13 @@ if (PHP_MBSTRING != "no") { "ext\\mbstring\\libmbfl\\config.h", true); ADD_SOURCES("ext/mbstring/libmbfl/filters", "html_entities.c \ - mbfilter_7bit.c mbfilter_base64.c mbfilter_big5.c mbfilter_cp932.c \ - mbfilter_cp936.c mbfilter_cp51932.c mbfilter_euc_cn.c \ - mbfilter_euc_jp.c mbfilter_euc_jp_win.c mbfilter_euc_kr.c \ - mbfilter_euc_tw.c mbfilter_htmlent.c mbfilter_hz.c mbfilter_iso2022_kr.c \ - mbfilter_jis.c mbfilter_iso2022_jp_ms.c mbfilter_gb18030.c \ - mbfilter_sjis_2004.c mbfilter_qprint.c mbfilter_sjis.c mbfilter_ucs2.c \ - mbfilter_ucs4.c mbfilter_uhc.c mbfilter_utf16.c mbfilter_utf32.c \ + mbfilter_7bit.c mbfilter_base64.c \ + mbfilter_cjk.c mbfilter_htmlent.c \ + mbfilter_qprint.c mbfilter_ucs2.c \ + mbfilter_ucs4.c mbfilter_utf16.c mbfilter_utf32.c \ mbfilter_utf7.c mbfilter_utf7imap.c mbfilter_utf8.c \ mbfilter_utf8_mobile.c mbfilter_uuencode.c \ - mbfilter_cp5022x.c \ - mbfilter_iso2022jp_mobile.c mbfilter_singlebyte.c", "mbstring"); + mbfilter_singlebyte.c", "mbstring"); ADD_SOURCES("ext/mbstring/libmbfl/mbfl", "mbfilter.c mbfilter_8bit.c \ mbfilter_pass.c mbfilter_wchar.c mbfl_convert.c mbfl_encoding.c \ diff --git a/ext/mbstring/libmbfl/filters/mbfilter_big5.c b/ext/mbstring/libmbfl/filters/mbfilter_big5.c deleted file mode 100644 index ab10c6a5df3e4..0000000000000 --- a/ext/mbstring/libmbfl/filters/mbfilter_big5.c +++ /dev/null @@ -1,660 +0,0 @@ -/* - * "streamable kanji code filter and converter" - * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. - * - * LICENSE NOTICES - * - * This file is part of "streamable kanji code filter and converter", - * which is distributed under the terms of GNU Lesser General Public - * License (version 2) as published by the Free Software Foundation. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with "streamable kanji code filter and converter"; - * if not, write to the Free Software Foundation, Inc., 59 Temple Place, - * Suite 330, Boston, MA 02111-1307 USA - * - * The author of this file: Rui Hirokawa - * - */ -/* - * The source code included in this file was separated from mbfilter_tw.c - * by moriyoshi koizumi on 4 dec 2002. - * - */ - -#include "mbfilter.h" -#include "mbfilter_big5.h" - -#include "unicode_table_big5.h" - -static int mbfl_filt_conv_big5_wchar_flush(mbfl_convert_filter *filter); -static size_t mb_big5_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state); -static void mb_wchar_to_big5(uint32_t *in, size_t len, mb_convert_buf *buf, bool end); -static size_t mb_cp950_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state); -static void mb_wchar_to_cp950(uint32_t *in, size_t len, mb_convert_buf *buf, bool end); - -static const unsigned char mblen_table_big5[] = { /* 0x81-0xFE */ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 -}; - -static const char *mbfl_encoding_big5_aliases[] = {"CN-BIG5", "BIG-FIVE", "BIGFIVE", NULL}; - -const mbfl_encoding mbfl_encoding_big5 = { - mbfl_no_encoding_big5, - "BIG-5", - "BIG5", - mbfl_encoding_big5_aliases, - mblen_table_big5, - MBFL_ENCTYPE_GL_UNSAFE, - &vtbl_big5_wchar, - &vtbl_wchar_big5, - mb_big5_to_wchar, - mb_wchar_to_big5, - NULL -}; - -const mbfl_encoding mbfl_encoding_cp950 = { - mbfl_no_encoding_cp950, - "CP950", - "BIG5", - NULL, - mblen_table_big5, - MBFL_ENCTYPE_GL_UNSAFE, - &vtbl_cp950_wchar, - &vtbl_wchar_cp950, - mb_cp950_to_wchar, - mb_wchar_to_cp950, - NULL -}; - -const struct mbfl_convert_vtbl vtbl_big5_wchar = { - mbfl_no_encoding_big5, - mbfl_no_encoding_wchar, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_big5_wchar, - mbfl_filt_conv_big5_wchar_flush, - NULL, -}; - -const struct mbfl_convert_vtbl vtbl_wchar_big5 = { - mbfl_no_encoding_wchar, - mbfl_no_encoding_big5, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_wchar_big5, - mbfl_filt_conv_common_flush, - NULL -}; - -const struct mbfl_convert_vtbl vtbl_cp950_wchar = { - mbfl_no_encoding_cp950, - mbfl_no_encoding_wchar, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_big5_wchar, - mbfl_filt_conv_big5_wchar_flush, - NULL, -}; - -const struct mbfl_convert_vtbl vtbl_wchar_cp950 = { - mbfl_no_encoding_wchar, - mbfl_no_encoding_cp950, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_wchar_big5, - mbfl_filt_conv_common_flush, - NULL, -}; - -#define CK(statement) do { if ((statement) < 0) return (-1); } while (0) - -/* 63 + 94 = 157 or 94 */ -static unsigned short cp950_pua_tbl[][4] = { - {0xe000, 0xe310, 0xfa40, 0xfefe}, - {0xe311, 0xeeb7, 0x8e40, 0xa0fe}, - {0xeeb8, 0xf6b0, 0x8140, 0x8dfe}, - {0xf6b1, 0xf70e, 0xc6a1, 0xc6fe}, - {0xf70f, 0xf848, 0xc740, 0xc8fe}, -}; - -static inline int is_in_cp950_pua(int c1, int c) -{ - if ((c1 >= 0xfa && c1 <= 0xfe) || (c1 >= 0x8e && c1 <= 0xa0) || - (c1 >= 0x81 && c1 <= 0x8d) || (c1 >= 0xc7 && c1 <= 0xc8)) { - return (c >= 0x40 && c <= 0x7e) || (c >= 0xa1 && c <= 0xfe); - } else if (c1 == 0xc6) { - return c >= 0xa1 && c <= 0xfe; - } - return 0; -} - -int mbfl_filt_conv_big5_wchar(int c, mbfl_convert_filter *filter) -{ - int k, c1, w; - - switch (filter->status) { - case 0: - if (c >= 0 && c < 0x80) { /* latin */ - CK((*filter->output_function)(c, filter->data)); - } else if (filter->from->no_encoding != mbfl_no_encoding_cp950 && c > 0xA0 && c <= 0xF9 && c != 0xC8) { - filter->status = 1; - filter->cache = c; - } else if (filter->from->no_encoding == mbfl_no_encoding_cp950 && c > 0x80 && c <= 0xFE) { - filter->status = 1; - filter->cache = c; - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - case 1: /* dbcs second byte */ - filter->status = 0; - c1 = filter->cache; - if ((c > 0x3f && c < 0x7f) || (c > 0xa0 && c < 0xff)) { - if (c < 0x7f) { - w = (c1 - 0xa1)*157 + (c - 0x40); - } else { - w = (c1 - 0xa1)*157 + (c - 0xa1) + 0x3f; - } - if (w >= 0 && w < big5_ucs_table_size) { - w = big5_ucs_table[w]; - } else { - w = 0; - } - - if (filter->from->no_encoding == mbfl_no_encoding_cp950) { - /* PUA for CP950 */ - if (is_in_cp950_pua(c1, c)) { - int c2 = (c1 << 8) | c; - - for (k = 0; k < sizeof(cp950_pua_tbl) / (sizeof(unsigned short)*4); k++) { - if (c2 >= cp950_pua_tbl[k][2] && c2 <= cp950_pua_tbl[k][3]) { - break; - } - } - - if ((cp950_pua_tbl[k][2] & 0xff) == 0x40) { - w = 157*(c1 - (cp950_pua_tbl[k][2]>>8)) + c - (c >= 0xa1 ? 0x62 : 0x40) + cp950_pua_tbl[k][0]; - } else { - w = c2 - cp950_pua_tbl[k][2] + cp950_pua_tbl[k][0]; - } - } else if (c1 == 0xA1) { - if (c == 0x45) { - w = 0x2027; - } else if (c == 0x4E) { - w = 0xFE51; - } else if (c == 0x5A) { - w = 0x2574; - } else if (c == 0xC2) { - w = 0x00AF; - } else if (c == 0xC3) { - w = 0xFFE3; - } else if (c == 0xC5) { - w = 0x02CD; - } else if (c == 0xE3) { - w = 0xFF5E; - } else if (c == 0xF2) { - w = 0x2295; - } else if (c == 0xF3) { - w = 0x2299; - } else if (c == 0xFE) { - w = 0xFF0F; - } - } else if (c1 == 0xA2) { - if (c == 0x40) { - w = 0xFF3C; - } else if (c == 0x41) { - w = 0x2215; - } else if (c == 0x42) { - w = 0xFE68; - } else if (c == 0x46) { - w = 0xFFE0; - } else if (c == 0x47) { - w = 0xFFE1; - } else if (c == 0xCC) { - w = 0x5341; - } else if (c == 0xCE) { - w = 0x5345; - } - } - } - - if (w <= 0) { - w = MBFL_BAD_INPUT; - } - CK((*filter->output_function)(w, filter->data)); - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - EMPTY_SWITCH_DEFAULT_CASE(); - } - - return 0; -} - -static int mbfl_filt_conv_big5_wchar_flush(mbfl_convert_filter *filter) -{ - if (filter->status == 1) { - /* 2-byte character was truncated */ - filter->status = 0; - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - - if (filter->flush_function) { - (*filter->flush_function)(filter->data); - } - - return 0; -} - -int mbfl_filt_conv_wchar_big5(int c, mbfl_convert_filter *filter) -{ - int k, s = 0; - - if (c >= ucs_a1_big5_table_min && c < ucs_a1_big5_table_max) { - s = ucs_a1_big5_table[c - ucs_a1_big5_table_min]; - } else if (c >= ucs_a2_big5_table_min && c < ucs_a2_big5_table_max) { - s = ucs_a2_big5_table[c - ucs_a2_big5_table_min]; - } else if (c >= ucs_a3_big5_table_min && c < ucs_a3_big5_table_max) { - s = ucs_a3_big5_table[c - ucs_a3_big5_table_min]; - } else if (c >= ucs_i_big5_table_min && c < ucs_i_big5_table_max) { - s = ucs_i_big5_table[c - ucs_i_big5_table_min]; - } else if (c >= ucs_r1_big5_table_min && c < ucs_r1_big5_table_max) { - s = ucs_r1_big5_table[c - ucs_r1_big5_table_min]; - } else if (c >= ucs_r2_big5_table_min && c < ucs_r2_big5_table_max) { - s = ucs_r2_big5_table[c - ucs_r2_big5_table_min]; - } - - if (filter->to->no_encoding == mbfl_no_encoding_cp950) { - if (c >= 0xe000 && c <= 0xf848) { /* PUA for CP950 */ - for (k = 0; k < sizeof(cp950_pua_tbl) / (sizeof(unsigned short)*4); k++) { - if (c <= cp950_pua_tbl[k][1]) { - break; - } - } - - int c1 = c - cp950_pua_tbl[k][0]; - if ((cp950_pua_tbl[k][2] & 0xff) == 0x40) { - int c2 = cp950_pua_tbl[k][2] >> 8; - s = ((c1 / 157) + c2) << 8; - c1 %= 157; - s |= c1 + (c1 >= 0x3f ? 0x62 : 0x40); - } else { - s = c1 + cp950_pua_tbl[k][2]; - } - } else if (c == 0x00A2) { - s = 0; - } else if (c == 0x00A3) { - s = 0; - } else if (c == 0x00AF) { - s = 0xA1C2; - } else if (c == 0x02CD) { - s = 0xA1C5; - } else if (c == 0x0401) { - s = 0; - } else if (c >= 0x0414 && c <= 0x041C) { - s = 0; - } else if (c >= 0x0423 && c <= 0x044F) { - s = 0; - } else if (c == 0x0451) { - s = 0; - } else if (c == 0x2022) { - s = 0; - } else if (c == 0x2027) { - s = 0xA145; - } else if (c == 0x203E) { - s = 0; - } else if (c == 0x2215) { - s = 0xA241; - } else if (c == 0x223C) { - s = 0; - } else if (c == 0x2295) { - s = 0xA1F2; - } else if (c == 0x2299) { - s = 0xA1F3; - } else if (c >= 0x2460 && c <= 0x247D) { - s = 0; - } else if (c == 0x2574) { - s = 0xA15A; - } else if (c == 0x2609) { - s = 0; - } else if (c == 0x2641) { - s = 0; - } else if (c == 0x3005 || (c >= 0x302A && c <= 0x30FF)) { - s = 0; - } else if (c == 0xFE51) { - s = 0xA14E; - } else if (c == 0xFE68) { - s = 0xA242; - } else if (c == 0xFF3C) { - s = 0xA240; - } else if (c == 0xFF5E) { - s = 0xA1E3; - } else if (c == 0xFF64) { - s = 0; - } else if (c == 0xFFE0) { - s = 0xA246; - } else if (c == 0xFFE1) { - s = 0xA247; - } else if (c == 0xFFE3) { - s = 0xA1C3; - } else if (c == 0xFF0F) { - s = 0xA1FE; - } - } - - if (s <= 0) { - if (c == 0) { - s = 0; - } else { - s = -1; - } - } - - if (s >= 0) { - if (s <= 0x80) { /* latin */ - CK((*filter->output_function)(s, filter->data)); - } else { - CK((*filter->output_function)((s >> 8) & 0xff, filter->data)); - CK((*filter->output_function)(s & 0xff, filter->data)); - } - } else { - CK(mbfl_filt_conv_illegal_output(c, filter)); - } - - return 0; -} - -static size_t mb_big5_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) -{ - unsigned char *p = *in, *e = p + *in_len; - uint32_t *out = buf, *limit = buf + bufsize; - - e--; /* Stop the main loop 1 byte short of the end of the input */ - - while (p < e && out < limit) { - unsigned char c = *p++; - - if (c <= 0x7F) { - *out++ = c; - } else if (c > 0xA0 && c <= 0xF9) { - /* We don't need to check p < e here; it's not possible that this pointer dereference - * will be outside the input string, because of e-- above */ - unsigned char c2 = *p++; - - if ((c2 >= 0x40 && c2 <= 0x7E) || (c2 >= 0xA1 && c2 <= 0xFE)) { - unsigned int w = (c - 0xA1)*157 + c2 - ((c2 <= 0x7E) ? 0x40 : 0xA1 - 0x3F); - ZEND_ASSERT(w < big5_ucs_table_size); - w = big5_ucs_table[w]; - if (!w) { - if (c == 0xC8) { - p--; - } - w = MBFL_BAD_INPUT; - } - *out++ = w; - } else { - *out++ = MBFL_BAD_INPUT; - } - } else { - *out++ = MBFL_BAD_INPUT; - } - } - - /* Finish up last byte of input string if there is one */ - if (p == e && out < limit) { - unsigned char c = *p++; - *out++ = (c <= 0x7F) ? c : MBFL_BAD_INPUT; - } - - *in_len = e - p + 1; - *in = p; - return out - buf; -} - -static void mb_wchar_to_big5(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) -{ - unsigned char *out, *limit; - MB_CONVERT_BUF_LOAD(buf, out, limit); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len); - - while (len--) { - uint32_t w = *in++; - unsigned int s = 0; - - if (w >= ucs_a1_big5_table_min && w < ucs_a1_big5_table_max) { - s = ucs_a1_big5_table[w - ucs_a1_big5_table_min]; - } else if (w >= ucs_a2_big5_table_min && w < ucs_a2_big5_table_max) { - s = ucs_a2_big5_table[w - ucs_a2_big5_table_min]; - } else if (w >= ucs_a3_big5_table_min && w < ucs_a3_big5_table_max) { - s = ucs_a3_big5_table[w - ucs_a3_big5_table_min]; - } else if (w >= ucs_i_big5_table_min && w < ucs_i_big5_table_max) { - s = ucs_i_big5_table[w - ucs_i_big5_table_min]; - } else if (w >= ucs_r1_big5_table_min && w < ucs_r1_big5_table_max) { - s = ucs_r1_big5_table[w - ucs_r1_big5_table_min]; - } else if (w >= ucs_r2_big5_table_min && w < ucs_r2_big5_table_max) { - s = ucs_r2_big5_table[w - ucs_r2_big5_table_min]; - } - - if (!s) { - if (w == 0) { - out = mb_convert_buf_add(out, 0); - } else { - MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_big5); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len); - } - } else if (s <= 0x80) { - out = mb_convert_buf_add(out, s); - } else { - MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2); - out = mb_convert_buf_add2(out, (s >> 8) & 0xFF, s & 0xFF); - } - } - - MB_CONVERT_BUF_STORE(buf, out, limit); -} - -static size_t mb_cp950_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) -{ - unsigned char *p = *in, *e = p + *in_len; - uint32_t *out = buf, *limit = buf + bufsize; - - while (p < e && out < limit) { - unsigned char c = *p++; - - if (c <= 0x7F) { - *out++ = c; - } else if (c > 0x80 && c <= 0xFE && p < e) { - unsigned char c2 = *p++; - - if ((c2 >= 0x40 && c2 <= 0x7E) || (c2 >= 0xA1 && c2 <= 0xFE)) { - unsigned int w = ((c - 0xA1)*157) + c2 - ((c2 <= 0x7E) ? 0x40 : 0xA1 - 0x3F); - w = (w < big5_ucs_table_size) ? big5_ucs_table[w] : 0; - - /* PUA for CP950 */ - if (is_in_cp950_pua(c, c2)) { - unsigned int s = (c << 8) | c2; - - int k; - for (k = 0; k < sizeof(cp950_pua_tbl) / (sizeof(unsigned short)*4); k++) { - if (s >= cp950_pua_tbl[k][2] && s <= cp950_pua_tbl[k][3]) { - break; - } - } - - if ((cp950_pua_tbl[k][2] & 0xFF) == 0x40) { - w = 157*(c - (cp950_pua_tbl[k][2] >> 8)) + c2 - (c2 >= 0xA1 ? 0x62 : 0x40) + cp950_pua_tbl[k][0]; - } else { - w = s - cp950_pua_tbl[k][2] + cp950_pua_tbl[k][0]; - } - } else if (c == 0xA1) { - if (c2 == 0x45) { - w = 0x2027; - } else if (c2 == 0x4E) { - w = 0xFE51; - } else if (c2 == 0x5A) { - w = 0x2574; - } else if (c2 == 0xC2) { - w = 0x00AF; - } else if (c2 == 0xC3) { - w = 0xFFE3; - } else if (c2 == 0xC5) { - w = 0x02CD; - } else if (c2 == 0xE3) { - w = 0xFF5E; - } else if (c2 == 0xF2) { - w = 0x2295; - } else if (c2 == 0xF3) { - w = 0x2299; - } else if (c2 == 0xFE) { - w = 0xFF0F; - } - } else if (c == 0xA2) { - if (c2 == 0x40) { - w = 0xFF3C; - } else if (c2 == 0x41) { - w = 0x2215; - } else if (c2 == 0x42) { - w = 0xFE68; - } else if (c2 == 0x46) { - w = 0xFFE0; - } else if (c2 == 0x47) { - w = 0xFFE1; - } else if (c2 == 0xCC) { - w = 0x5341; - } else if (c2 == 0xCE) { - w = 0x5345; - } - } - - if (!w) - w = MBFL_BAD_INPUT; - *out++ = w; - } else { - *out++ = MBFL_BAD_INPUT; - } - } else { - *out++ = MBFL_BAD_INPUT; - } - } - - *in_len = e - p; - *in = p; - return out - buf; -} - -static void mb_wchar_to_cp950(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) -{ - unsigned char *out, *limit; - MB_CONVERT_BUF_LOAD(buf, out, limit); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len); - - while (len--) { - uint32_t w = *in++; - unsigned int s = 0; - - if (w >= ucs_a1_big5_table_min && w < ucs_a1_big5_table_max) { - s = ucs_a1_big5_table[w - ucs_a1_big5_table_min]; - } else if (w >= ucs_a2_big5_table_min && w < ucs_a2_big5_table_max) { - s = ucs_a2_big5_table[w - ucs_a2_big5_table_min]; - } else if (w >= ucs_a3_big5_table_min && w < ucs_a3_big5_table_max) { - s = ucs_a3_big5_table[w - ucs_a3_big5_table_min]; - } else if (w >= ucs_i_big5_table_min && w < ucs_i_big5_table_max) { - s = ucs_i_big5_table[w - ucs_i_big5_table_min]; - } else if (w >= ucs_r1_big5_table_min && w < ucs_r1_big5_table_max) { - s = ucs_r1_big5_table[w - ucs_r1_big5_table_min]; - } else if (w >= ucs_r2_big5_table_min && w < ucs_r2_big5_table_max) { - s = ucs_r2_big5_table[w - ucs_r2_big5_table_min]; - } - - if (w >= 0xE000 && w <= 0xF848) { - int k; - for (k = 0; k < sizeof(cp950_pua_tbl) / (sizeof(unsigned short)*4); k++) { - if (w <= cp950_pua_tbl[k][1]) { - break; - } - } - - int c1 = w - cp950_pua_tbl[k][0]; - if ((cp950_pua_tbl[k][2] & 0xFF) == 0x40) { - int c2 = cp950_pua_tbl[k][2] >> 8; - s = ((c1 / 157) + c2) << 8; - c1 %= 157; - s |= c1 + (c1 >= 0x3F ? 0x62 : 0x40); - } else { - s = c1 + cp950_pua_tbl[k][2]; - } - } else if (w == 0xA2 || w == 0xA3 || w == 0x401 || (w >= 0x414 && w <= 0x41C) || (w >= 0x423 && w <= 0x44F) || w == 0x451 || w == 0x2022 || w == 0x203E || w == 0x223C || (w >= 0x2460 && w <= 0x247D) || w == 0x2609 || w == 0x2641 || w == 0x3005 || (w >= 0x302A && w <= 0x30FF) || w == 0xFF64) { - s = 0; - } else if (w == 0xAF) { - s = 0xA1C2; - } else if (w == 0x2CD) { - s = 0xA1C5; - } else if (w == 0x2027) { - s = 0xA145; - } else if (w == 0x2215) { - s = 0xA241; - } else if (w == 0x2295) { - s = 0xA1F2; - } else if (w == 0x2299) { - s = 0xA1F3; - } else if (w == 0x2574) { - s = 0xA15A; - } else if (w == 0xFE51) { - s = 0xA14E; - } else if (w == 0xFE68) { - s = 0xA242; - } else if (w == 0xFF3C) { - s = 0xA240; - } else if (w == 0xFF5E) { - s = 0xA1E3; - } else if (w == 0xFFE0) { - s = 0xA246; - } else if (w == 0xFFE1) { - s = 0xA247; - } else if (w == 0xFFE3) { - s = 0xA1C3; - } else if (w == 0xFF0F) { - s = 0xA1FE; - } - - if (!s) { - if (w == 0) { - out = mb_convert_buf_add(out, 0); - } else { - MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_big5); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len); - } - } else if (s <= 0x80) { - out = mb_convert_buf_add(out, s); - } else { - MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2); - out = mb_convert_buf_add2(out, (s >> 8) & 0xFF, s & 0xFF); - } - } - - MB_CONVERT_BUF_STORE(buf, out, limit); -} diff --git a/ext/mbstring/libmbfl/filters/mbfilter_big5.h b/ext/mbstring/libmbfl/filters/mbfilter_big5.h deleted file mode 100644 index e475b6bd0c537..0000000000000 --- a/ext/mbstring/libmbfl/filters/mbfilter_big5.h +++ /dev/null @@ -1,46 +0,0 @@ -/* - * "streamable kanji code filter and converter" - * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. - * - * LICENSE NOTICES - * - * This file is part of "streamable kanji code filter and converter", - * which is distributed under the terms of GNU Lesser General Public - * License (version 2) as published by the Free Software Foundation. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with "streamable kanji code filter and converter"; - * if not, write to the Free Software Foundation, Inc., 59 Temple Place, - * Suite 330, Boston, MA 02111-1307 USA - * - * The author of this file: Rui Hirokawa - * - */ -/* - * The source code included in this files was separated from mbfilter_tw.h - * by moriyoshi koizumi on 4 dec 2002. - * - */ - -#ifndef MBFL_MBFILTER_BIG5_H -#define MBFL_MBFILTER_BIG5_H - -#include "mbfilter.h" - -extern const mbfl_encoding mbfl_encoding_big5; -extern const struct mbfl_convert_vtbl vtbl_big5_wchar; -extern const struct mbfl_convert_vtbl vtbl_wchar_big5; - -extern const mbfl_encoding mbfl_encoding_cp950; -extern const struct mbfl_convert_vtbl vtbl_cp950_wchar; -extern const struct mbfl_convert_vtbl vtbl_wchar_cp950; - -int mbfl_filt_conv_big5_wchar(int c, mbfl_convert_filter *filter); -int mbfl_filt_conv_wchar_big5(int c, mbfl_convert_filter *filter); - -#endif /* MBFL_MBFILTER_BIG5_H */ diff --git a/ext/mbstring/libmbfl/filters/mbfilter_cjk.c b/ext/mbstring/libmbfl/filters/mbfilter_cjk.c new file mode 100644 index 0000000000000..13635764326f3 --- /dev/null +++ b/ext/mbstring/libmbfl/filters/mbfilter_cjk.c @@ -0,0 +1,12571 @@ +#include "mbfilter_cjk.h" + +#include "unicode_table_jis.h" +#include "unicode_table_jis2004.h" +#include "unicode_table_big5.h" +#include "unicode_table_cns11643.h" +#include "unicode_table_cp932_ext.h" +#include "unicode_table_cp936.h" +#include "unicode_table_gb18030.h" +#include "unicode_table_gb2312.h" +#include "unicode_table_uhc.h" +#include "cp932_table.h" +#include "sjis_mac2uni.h" +#include "translit_kana_jisx0201_jisx0208.h" +#include "emoji2uni.h" + +/* Regional Indicator Unicode codepoints are from 0x1F1E6-0x1F1FF + * These correspond to the letters A-Z + * To display the flag emoji for a country, two unicode codepoints are combined, + * which correspond to the two-letter code for that country + * This macro converts uppercase ASCII values to Regional Indicator codepoints */ +#define NFLAGS(c) (0x1F1A5+((unsigned int)(c))) + +static const char nflags_s[10][2] = {"CN", "DE", "ES", "FR", "GB", "IT", "JP", "KR", "RU", "US"}; +static const int nflags_code_kddi[10] = { 0x2549, 0x2546, 0x24C0, 0x2545, 0x2548, 0x2547, 0x2750, 0x254A, 0x24C1, 0x27F7 }; +static const int nflags_code_sb[10] = { 0x2B0A, 0x2B05, 0x2B08, 0x2B04, 0x2B07, 0x2B06, 0x2B02, 0x2B0B, 0x2B09, 0x2B03 }; + +#define EMIT_KEYPAD_EMOJI(c) do { *snd = (c); return 0x20E3; } while(0) +#define EMIT_FLAG_EMOJI(country) do { *snd = NFLAGS((country)[0]); return NFLAGS((country)[1]); } while(0) + +static const char nflags_kddi[6][2] = {"FR", "DE", "IT", "GB", "CN", "KR"}; +static const char nflags_sb[10][2] = {"JP", "US", "FR", "DE", "IT", "GB", "ES", "RU", "CN", "KR"}; + +/* number -> (ku*94)+ten value for telephone keypad character */ +#define DOCOMO_KEYPAD(n) ((n) == 0 ? 0x296F : (0x2965 + (n))) +#define DOCOMO_KEYPAD_HASH 0x2964 + +/* `tbl` contains inclusive ranges, each represented by a pair of unsigned shorts */ +static int mbfl_bisec_srch(int w, const unsigned short *tbl, int n) +{ + int l = 0, r = n-1; + while (l <= r) { + int probe = (l + r) >> 1; + unsigned short lo = tbl[2 * probe], hi = tbl[(2 * probe) + 1]; + if (w < lo) { + r = probe - 1; + } else if (w > hi) { + l = probe + 1; + } else { + return probe; + } + } + return -1; +} + +/* `tbl` contains single values, not ranges */ +int mbfl_bisec_srch2(int w, const unsigned short tbl[], int n) +{ + int l = 0, r = n-1; + while (l <= r) { + int probe = (l + r) >> 1; + unsigned short val = tbl[probe]; + if (w < val) { + r = probe - 1; + } else if (w > val) { + l = probe + 1; + } else { + return probe; + } + } + return -1; +} + +#define SJIS_ENCODE(c1,c2,s1,s2) \ + do { \ + s1 = ((c1 - 1) >> 1) + ((c1) < 0x5F ? 0x71 : 0xB1); \ + s2 = c2; \ + if ((c1) & 1) { \ + if ((c2) < 0x60) { \ + s2--; \ + } \ + s2 += 0x20; \ + } else { \ + s2 += 0x7e; \ + } \ + } while (0) + +#define SJIS_DECODE(c1,c2,s1,s2) \ + do { \ + if (c1 < 0xa0) { \ + s1 = ((c1 - 0x81) << 1) + 0x21; \ + } else { \ + s1 = ((c1 - 0xc1) << 1) + 0x21; \ + } \ + s2 = c2; \ + if (c2 < 0x9f) { \ + if (c2 < 0x7f) { \ + s2++; \ + } \ + s2 -= 0x20; \ + } else { \ + s1++; \ + s2 -= 0x7e; \ + } \ + } while (0) + +#define CK(statement) do { if ((statement) < 0) return (-1); } while (0) + +/* + * ISO-2022 variants + */ + +#define ASCII 0 +#define JISX0201_KANA 0x20 +#define JISX0208_KANJI 0x80 + +static int mbfl_filt_conv_jis_wchar(int c, mbfl_convert_filter *filter) +{ + int c1, s, w; + +retry: + switch (filter->status & 0xf) { +/* case 0x00: ASCII */ +/* case 0x10: X 0201 latin */ +/* case 0x20: X 0201 kana */ +/* case 0x80: X 0208 */ +/* case 0x90: X 0212 */ + case 0: + if (c == 0x1b) { + filter->status += 2; + } else if (c == 0x0e) { /* "kana in" */ + filter->status = 0x20; + } else if (c == 0x0f) { /* "kana out" */ + filter->status = 0; + } else if (filter->status == 0x10 && c == 0x5c) { /* YEN SIGN */ + CK((*filter->output_function)(0xa5, filter->data)); + } else if (filter->status == 0x10 && c == 0x7e) { /* OVER LINE */ + CK((*filter->output_function)(0x203e, filter->data)); + } else if (filter->status == 0x20 && c > 0x20 && c < 0x60) { /* kana */ + CK((*filter->output_function)(0xff40 + c, filter->data)); + } else if ((filter->status == 0x80 || filter->status == 0x90) && c > 0x20 && c < 0x7f) { /* kanji first char */ + filter->cache = c; + filter->status += 1; + } else if (c >= 0 && c < 0x80) { /* latin, CTLs */ + CK((*filter->output_function)(c, filter->data)); + } else if (c > 0xa0 && c < 0xe0) { /* GR kana */ + CK((*filter->output_function)(0xfec0 + c, filter->data)); + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + +/* case 0x81: X 0208 second char */ +/* case 0x91: X 0212 second char */ + case 1: + filter->status &= ~0xf; + c1 = filter->cache; + if (c > 0x20 && c < 0x7f) { + s = (c1 - 0x21)*94 + c - 0x21; + if (filter->status == 0x80) { + if (s >= 0 && s < jisx0208_ucs_table_size) { + w = jisx0208_ucs_table[s]; + } else { + w = 0; + } + + if (w <= 0) { + w = MBFL_BAD_INPUT; + } + } else { + if (s >= 0 && s < jisx0212_ucs_table_size) { + w = jisx0212_ucs_table[s]; + } else { + w = 0; + } + + if (w <= 0) { + w = MBFL_BAD_INPUT; + } + } + CK((*filter->output_function)(w, filter->data)); + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + /* ESC */ +/* case 0x02: */ +/* case 0x12: */ +/* case 0x22: */ +/* case 0x82: */ +/* case 0x92: */ + case 2: + if (c == 0x24) { /* '$' */ + filter->status++; + } else if (c == 0x28) { /* '(' */ + filter->status += 3; + } else { + filter->status &= ~0xf; + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + goto retry; + } + break; + + /* ESC $ */ +/* case 0x03: */ +/* case 0x13: */ +/* case 0x23: */ +/* case 0x83: */ +/* case 0x93: */ + case 3: + if (c == 0x40 || c == 0x42) { /* '@' or 'B' */ + filter->status = 0x80; + } else if (c == 0x28) { /* '(' */ + filter->status++; + } else { + filter->status &= ~0xf; + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + CK((*filter->output_function)(0x24, filter->data)); + goto retry; + } + break; + + /* ESC $ ( */ +/* case 0x04: */ +/* case 0x14: */ +/* case 0x24: */ +/* case 0x84: */ +/* case 0x94: */ + case 4: + if (c == 0x40 || c == 0x42) { /* '@' or 'B' */ + filter->status = 0x80; + } else if (c == 0x44) { /* 'D' */ + filter->status = 0x90; + } else { + filter->status &= ~0xf; + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + CK((*filter->output_function)(0x24, filter->data)); + CK((*filter->output_function)(0x28, filter->data)); + goto retry; + } + break; + + /* ESC ( */ +/* case 0x05: */ +/* case 0x15: */ +/* case 0x25: */ +/* case 0x85: */ +/* case 0x95: */ + case 5: + if (c == 0x42 || c == 0x48) { /* 'B' or 'H' */ + filter->status = 0; + } else if (c == 0x4a) { /* 'J' */ + filter->status = 0x10; + } else if (c == 0x49) { /* 'I' */ + filter->status = 0x20; + } else { + filter->status &= ~0xf; + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + CK((*filter->output_function)(0x28, filter->data)); + goto retry; + } + break; + + EMPTY_SWITCH_DEFAULT_CASE(); + } + + return 0; +} + +static int mbfl_filt_conv_jis_wchar_flush(mbfl_convert_filter *filter) +{ + if (filter->status & 0xF) { + /* 2-byte (JIS X 0208 or 0212) character was truncated, + * or else escape sequence was truncated */ + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + filter->status = 0; + + if (filter->flush_function) { + (*filter->flush_function)(filter->data); + } + + return 0; +} + +static int mbfl_filt_conv_wchar_jis(int c, mbfl_convert_filter *filter) +{ + int s = 0; + + if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) { + s = ucs_a1_jis_table[c - ucs_a1_jis_table_min]; + } else if (c == 0x203E) { /* OVERLINE */ + s = 0x1007E; /* Convert to JISX 0201 OVERLINE */ + } else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) { + s = ucs_a2_jis_table[c - ucs_a2_jis_table_min]; + } else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) { + s = ucs_i_jis_table[c - ucs_i_jis_table_min]; + } else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) { + s = ucs_r_jis_table[c - ucs_r_jis_table_min]; + } + if (s <= 0) { + if (c == 0xa5) { /* YEN SIGN */ + s = 0x1005c; + } else if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */ + s = 0x2140; + } else if (c == 0x2225) { /* PARALLEL TO */ + s = 0x2142; + } else if (c == 0xff0d) { /* FULLWIDTH HYPHEN-MINUS */ + s = 0x215d; + } else if (c == 0xffe0) { /* FULLWIDTH CENT SIGN */ + s = 0x2171; + } else if (c == 0xffe1) { /* FULLWIDTH POUND SIGN */ + s = 0x2172; + } else if (c == 0xffe2) { /* FULLWIDTH NOT SIGN */ + s = 0x224c; + } + if (c == 0) { + s = 0; + } else if (s <= 0) { + s = -1; + } + } + if (s >= 0) { + if (s < 0x80) { /* ASCII */ + if ((filter->status & 0xff00) != 0) { + CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ + CK((*filter->output_function)(0x28, filter->data)); /* '(' */ + CK((*filter->output_function)(0x42, filter->data)); /* 'B' */ + } + filter->status = 0; + CK((*filter->output_function)(s, filter->data)); + } else if (s < 0x8080) { /* X 0208 */ + if ((filter->status & 0xff00) != 0x200) { + CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ + CK((*filter->output_function)(0x24, filter->data)); /* '$' */ + CK((*filter->output_function)(0x42, filter->data)); /* 'B' */ + } + filter->status = 0x200; + CK((*filter->output_function)((s >> 8) & 0x7f, filter->data)); + CK((*filter->output_function)(s & 0x7f, filter->data)); + } else if (s < 0x10000) { /* X 0212 */ + if ((filter->status & 0xff00) != 0x300) { + CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ + CK((*filter->output_function)(0x24, filter->data)); /* '$' */ + CK((*filter->output_function)(0x28, filter->data)); /* '(' */ + CK((*filter->output_function)(0x44, filter->data)); /* 'D' */ + } + filter->status = 0x300; + CK((*filter->output_function)((s >> 8) & 0x7f, filter->data)); + CK((*filter->output_function)(s & 0x7f, filter->data)); + } else { /* X 0201 latin */ + if ((filter->status & 0xff00) != 0x400) { + CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ + CK((*filter->output_function)(0x28, filter->data)); /* '(' */ + CK((*filter->output_function)(0x4a, filter->data)); /* 'J' */ + } + filter->status = 0x400; + CK((*filter->output_function)(s & 0x7f, filter->data)); + } + } else { + CK(mbfl_filt_conv_illegal_output(c, filter)); + } + + return 0; +} + +static int mbfl_filt_conv_wchar_2022jp(int c, mbfl_convert_filter *filter) +{ + int s; + + s = 0; + if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) { + s = ucs_a1_jis_table[c - ucs_a1_jis_table_min]; + } else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) { + s = ucs_a2_jis_table[c - ucs_a2_jis_table_min]; + } else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) { + s = ucs_i_jis_table[c - ucs_i_jis_table_min]; + } else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) { + s = ucs_r_jis_table[c - ucs_r_jis_table_min]; + } + + if (s <= 0) { + if (c == 0xa5) { /* YEN SIGN */ + s = 0x1005c; + } else if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */ + s = 0x2140; + } else if (c == 0x2225) { /* PARALLEL TO */ + s = 0x2142; + } else if (c == 0xff0d) { /* FULLWIDTH HYPHEN-MINUS */ + s = 0x215d; + } else if (c == 0xffe0) { /* FULLWIDTH CENT SIGN */ + s = 0x2171; + } else if (c == 0xffe1) { /* FULLWIDTH POUND SIGN */ + s = 0x2172; + } else if (c == 0xffe2) { /* FULLWIDTH NOT SIGN */ + s = 0x224c; + } + if (c == 0) { + s = 0; + } else if (s <= 0) { + s = -1; + } + } else if ((s >= 0x80 && s < 0x2121) || (s > 0x8080)) { + s = -1; + } + if (s >= 0) { + if (s < 0x80) { /* ASCII */ + if ((filter->status & 0xff00) != 0) { + CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ + CK((*filter->output_function)(0x28, filter->data)); /* '(' */ + CK((*filter->output_function)(0x42, filter->data)); /* 'B' */ + } + filter->status = 0; + CK((*filter->output_function)(s, filter->data)); + } else if (s < 0x10000) { /* X 0208 */ + if ((filter->status & 0xff00) != 0x200) { + CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ + CK((*filter->output_function)(0x24, filter->data)); /* '$' */ + CK((*filter->output_function)(0x42, filter->data)); /* 'B' */ + } + filter->status = 0x200; + CK((*filter->output_function)((s >> 8) & 0x7f, filter->data)); + CK((*filter->output_function)(s & 0x7f, filter->data)); + } else { /* X 0201 latin */ + if ((filter->status & 0xff00) != 0x400) { + CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ + CK((*filter->output_function)(0x28, filter->data)); /* '(' */ + CK((*filter->output_function)(0x4a, filter->data)); /* 'J' */ + } + filter->status = 0x400; + CK((*filter->output_function)(s & 0x7f, filter->data)); + } + } + + return 0; +} + +#define ASCII 0 +#define JISX_0201_LATIN 1 +#define JISX_0201_KANA 2 +#define JISX_0208 3 +#define JISX_0212 4 + +static size_t mb_iso2022jp_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) +{ + ZEND_ASSERT(bufsize >= 3); + + unsigned char *p = *in, *e = p + *in_len; + uint32_t *out = buf, *limit = buf + bufsize; + + while (p < e && out < limit) { + unsigned char c = *p++; + + if (c == 0x1B) { + /* ESC seen; this is an escape sequence */ + if ((e - p) < 2) { + *out++ = MBFL_BAD_INPUT; + if (p != e && (*p == '$' || *p == '(')) + p++; + continue; + } + + unsigned char c2 = *p++; + if (c2 == '$') { + unsigned char c3 = *p++; + if (c3 == '@' || c3 == 'B') { + *state = JISX_0208; + } else if (c3 == '(') { + if (p == e) { + *out++ = MBFL_BAD_INPUT; + break; + } + unsigned char c4 = *p++; + if (c4 == '@' || c4 == 'B') { + *state = JISX_0208; + } else if (c4 == 'D') { + *state = JISX_0212; + } else { + if ((limit - out) < 3) { + p -= 4; + break; + } + *out++ = MBFL_BAD_INPUT; + *out++ = '$'; + *out++ = '('; + p--; + } + } else { + if ((limit - out) < 2) { + p -= 3; + break; + } + *out++ = MBFL_BAD_INPUT; + *out++ = '$'; + p--; + } + } else if (c2 == '(') { + unsigned char c3 = *p++; + if (c3 == 'B' || c3 == 'H') { + *state = ASCII; + } else if (c3 == 'J') { + *state = JISX_0201_LATIN; + } else if (c3 == 'I') { + *state = JISX_0201_KANA; + } else { + if ((limit - out) < 2) { + p -= 3; + break; + } + *out++ = MBFL_BAD_INPUT; + *out++ = '('; + p--; + } + } else { + *out++ = MBFL_BAD_INPUT; + p--; + } + } else if (c == 0xE) { + /* "Kana In" marker; this is just for JIS-7/8, but we also accept it for ISO-2022-JP */ + *state = JISX_0201_KANA; + } else if (c == 0xF) { + /* "Kana Out" marker */ + *state = ASCII; + } else if (*state == JISX_0201_LATIN && c == 0x5C) { /* YEN SIGN */ + *out++ = 0xA5; + } else if (*state == JISX_0201_LATIN && c == 0x7E) { /* OVER LINE */ + *out++ = 0x203E; + } else if (*state == JISX_0201_KANA && c > 0x20 && c < 0x60) { + *out++ = 0xFF40 + c; + } else if (*state >= JISX_0208 && c > 0x20 && c < 0x7F) { + if (p == e) { + *out++ = MBFL_BAD_INPUT; + break; + } + unsigned char c2 = *p++; + if (c2 > 0x20 && c2 < 0x7F) { + unsigned int s = (c - 0x21)*94 + c2 - 0x21; + uint32_t w = 0; + if (*state == JISX_0208) { + if (s < jisx0208_ucs_table_size) { + w = jisx0208_ucs_table[s]; + } + if (!w) { + w = MBFL_BAD_INPUT; + } + } else { + if (s < jisx0212_ucs_table_size) { + w = jisx0212_ucs_table[s]; + } + if (!w) { + w = MBFL_BAD_INPUT; + } + } + *out++ = w; + } else { + *out++ = MBFL_BAD_INPUT; + } + } else if (c < 0x80) { + *out++ = c; + } else if (c >= 0xA1 && c <= 0xDF) { + /* GR-invoked Kana; "GR" stands for "graphics right" and refers to bytes + * with the MSB bit (in the context of ISO-2022 encoding). + * + * In this regard, Wikipedia states: + * "Other, older variants known as JIS7 and JIS8 build directly on the 7-bit and 8-bit + * encodings defined by JIS X 0201 and allow use of JIS X 0201 kana from G1 without + * escape sequences, using Shift Out and Shift In or setting the eighth bit + * (GR-invoked), respectively." + * + * Note that we support both the 'JIS7' use of 0xE/0xF Shift In/Shift Out codes + * and the 'JIS8' use of GR-invoked Kana */ + *out++ = 0xFEC0 + c; + } else { + *out++ = MBFL_BAD_INPUT; + } + } + + *in_len = e - p; + *in = p; + return out - buf; +} + +static void mb_wchar_to_iso2022jp(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) +{ + unsigned char *out, *limit; + MB_CONVERT_BUF_LOAD(buf, out, limit); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2); + + while (len--) { + uint32_t w = *in++; + unsigned int s = 0; + + if (w >= ucs_a1_jis_table_min && w < ucs_a1_jis_table_max) { + s = ucs_a1_jis_table[w - ucs_a1_jis_table_min]; + } else if (w >= ucs_a2_jis_table_min && w < ucs_a2_jis_table_max) { + s = ucs_a2_jis_table[w - ucs_a2_jis_table_min]; + } else if (w >= ucs_i_jis_table_min && w < ucs_i_jis_table_max) { + s = ucs_i_jis_table[w - ucs_i_jis_table_min]; + } else if (w >= ucs_r_jis_table_min && w < ucs_r_jis_table_max) { + s = ucs_r_jis_table[w - ucs_r_jis_table_min]; + } + + if (s == 0) { + if (w == 0xA5) { /* YEN SIGN */ + s = 0x1005C; + } else if (w == 0xFF3C) { /* FULLWIDTH REVERSE SOLIDUS */ + s = 0x2140; + } else if (w == 0x2225) { /* PARALLEL TO */ + s = 0x2142; + } else if (w == 0xFF0D) { /* FULLWIDTH HYPHEN-MINUS */ + s = 0x215D; + } else if (w == 0xFFE0) { /* FULLWIDTH CENT SIGN */ + s = 0x2171; + } else if (w == 0xFFE1) { /* FULLWIDTH POUND SIGN */ + s = 0x2172; + } else if (w == 0xFFE2) { /* FULLWIDTH NOT SIGN */ + s = 0x224C; + } else if (w != 0) { + MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_iso2022jp); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2); + continue; + } + } else if ((s >= 0x80 && s < 0x2121) || (s > 0x8080)) { + MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_iso2022jp); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2); + continue; + } + + if (s < 0x80) { /* ASCII */ + if (buf->state != ASCII) { + MB_CONVERT_BUF_ENSURE(buf, out, limit, (len * 2) + 4); + out = mb_convert_buf_add3(out, 0x1B, '(', 'B'); + buf->state = ASCII; + } + out = mb_convert_buf_add(out, s); + } else if (s < 0x8080) { /* JIS X 0208 */ + if (buf->state != JISX_0208) { + MB_CONVERT_BUF_ENSURE(buf, out, limit, (len * 2) + 5); + out = mb_convert_buf_add3(out, 0x1B, '$', 'B'); + buf->state = JISX_0208; + } + out = mb_convert_buf_add2(out, (s >> 8) & 0x7F, s & 0x7F); + } else if (s < 0x10000) { /* JIS X 0212 */ + if (buf->state != JISX_0212) { + MB_CONVERT_BUF_ENSURE(buf, out, limit, (len * 2) + 6); + out = mb_convert_buf_add4(out, 0x1B, '$', '(', 'D'); + buf->state = JISX_0212; + } + out = mb_convert_buf_add2(out, (s >> 8) & 0x7F, s & 0x7F); + } else { /* X 0201 Latin */ + if (buf->state != JISX_0201_LATIN) { + MB_CONVERT_BUF_ENSURE(buf, out, limit, (len * 2) + 4); + out = mb_convert_buf_add3(out, 0x1B, '(', 'J'); + buf->state = JISX_0201_LATIN; + } + out = mb_convert_buf_add(out, s & 0x7F); + } + } + + if (end && buf->state != ASCII) { + MB_CONVERT_BUF_ENSURE(buf, out, limit, 3); + out = mb_convert_buf_add3(out, 0x1B, '(', 'B'); + } + + MB_CONVERT_BUF_STORE(buf, out, limit); +} + +static void mb_wchar_to_jis(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) +{ + unsigned char *out, *limit; + MB_CONVERT_BUF_LOAD(buf, out, limit); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2); + + while (len--) { + uint32_t w = *in++; + unsigned int s = 0; + + if (w >= ucs_a1_jis_table_min && w < ucs_a1_jis_table_max) { + s = ucs_a1_jis_table[w - ucs_a1_jis_table_min]; + } else if (w == 0x203E) { /* OVERLINE */ + s = 0x1007E; /* Convert to JISX 0201 OVERLINE */ + } else if (w >= ucs_a2_jis_table_min && w < ucs_a2_jis_table_max) { + s = ucs_a2_jis_table[w - ucs_a2_jis_table_min]; + } else if (w >= ucs_i_jis_table_min && w < ucs_i_jis_table_max) { + s = ucs_i_jis_table[w - ucs_i_jis_table_min]; + } else if (w >= ucs_r_jis_table_min && w < ucs_r_jis_table_max) { + s = ucs_r_jis_table[w - ucs_r_jis_table_min]; + } + + if (s == 0) { + if (w == 0xA5) { /* YEN SIGN */ + s = 0x1005C; + } else if (w == 0xFF3C) { /* FULLWIDTH REVERSE SOLIDUS */ + s = 0x2140; + } else if (w == 0x2225) { /* PARALLEL TO */ + s = 0x2142; + } else if (w == 0xFF0D) { /* FULLWIDTH HYPHEN-MINUS */ + s = 0x215D; + } else if (w == 0xFFE0) { /* FULLWIDTH CENT SIGN */ + s = 0x2171; + } else if (w == 0xFFE1) { /* FULLWIDTH POUND SIGN */ + s = 0x2172; + } else if (w == 0xFFE2) { /* FULLWIDTH NOT SIGN */ + s = 0x224C; + } else if (w != 0) { + MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_iso2022jp); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2); + continue; + } + } + + if (s < 0x80) { /* ASCII */ + if (buf->state != ASCII) { + MB_CONVERT_BUF_ENSURE(buf, out, limit, (len * 2) + 4); + out = mb_convert_buf_add3(out, 0x1B, '(', 'B'); + buf->state = ASCII; + } + out = mb_convert_buf_add(out, s); + } else if (s >= 0xA1 && s <= 0xDF) { + if (buf->state != JISX_0201_KANA) { + MB_CONVERT_BUF_ENSURE(buf, out, limit, (len * 2) + 4); + out = mb_convert_buf_add3(out, 0x1B, '(', 'I'); + buf->state = JISX_0201_KANA; + } + out = mb_convert_buf_add(out, s & 0x7F); + } else if (s < 0x8080) { /* JIS X 0208 */ + if (buf->state != JISX_0208) { + MB_CONVERT_BUF_ENSURE(buf, out, limit, (len * 2) + 5); + out = mb_convert_buf_add3(out, 0x1B, '$', 'B'); + buf->state = JISX_0208; + } + out = mb_convert_buf_add2(out, (s >> 8) & 0x7F, s & 0x7F); + } else if (s < 0x10000) { /* JIS X 0212 */ + if (buf->state != JISX_0212) { + MB_CONVERT_BUF_ENSURE(buf, out, limit, (len * 2) + 6); + out = mb_convert_buf_add4(out, 0x1B, '$', '(', 'D'); + buf->state = JISX_0212; + } + out = mb_convert_buf_add2(out, (s >> 8) & 0x7F, s & 0x7F); + } else { /* X 0201 Latin */ + if (buf->state != JISX_0201_LATIN) { + MB_CONVERT_BUF_ENSURE(buf, out, limit, (len * 2) + 4); + out = mb_convert_buf_add3(out, 0x1B, '(', 'J'); + buf->state = JISX_0201_LATIN; + } + out = mb_convert_buf_add(out, s & 0x7F); + } + } + + if (end && buf->state != ASCII) { + MB_CONVERT_BUF_ENSURE(buf, out, limit, 3); + out = mb_convert_buf_add3(out, 0x1B, '(', 'B'); + } + + MB_CONVERT_BUF_STORE(buf, out, limit); +} + +#define JISX_0201_KANA_SO 5 + +static bool mb_check_jis(unsigned char *in, size_t in_len) +{ + unsigned char *p = in, *e = p + in_len; + unsigned int state = ASCII; + + while (p < e) { + unsigned char c = *p++; + if (c == 0x1B) { + /* ESC seen; this is an escape sequence */ + if (state == JISX_0201_KANA_SO) { + return false; + } + if ((e - p) < 2) { + return false; + } + unsigned char c2 = *p++; + if (c2 == '$') { + unsigned char c3 = *p++; + if (c3 == '@' || c3 == 'B') { + state = JISX_0208; + } else if (c3 == '(') { + if (p == e) { + return false; + } + unsigned char c4 = *p++; + if (c4 == '@' || c4 == 'B') { + state = JISX_0208; + } else if (c4 == 'D') { + state = JISX_0212; + } else { + return false; + } + } else { + return false; + } + } else if (c2 == '(') { + unsigned char c3 = *p++; + /* ESC ( H is treated as a sequence transitioning to ASCII for historical reasons. + * see https://github.com/php/php-src/pull/10828#issuecomment-1478342432. */ + if (c3 == 'B' || c3 == 'H') { + state = ASCII; + } else if (c3 == 'J') { + state = JISX_0201_LATIN; + } else if (c3 == 'I') { + state = JISX_0201_KANA; + } else { + return false; + } + } else { + return false; + } + } else if (c == 0xE) { + /* "Kana In" marker */ + if (state != ASCII) { + return false; + } + state = JISX_0201_KANA_SO; + } else if (c == 0xF) { + /* "Kana Out" marker */ + if (state != JISX_0201_KANA_SO) { + return false; + } + state = ASCII; + } else if ((state == JISX_0208 || state == JISX_0212) && (c > 0x20 && c < 0x7F)) { + if (p == e) { + return false; + } + unsigned char c2 = *p++; + if (c2 > 0x20 && c2 < 0x7F) { + unsigned int s = (c - 0x21)*94 + c2 - 0x21; + if (state == JISX_0208) { + if (s < jisx0208_ucs_table_size && jisx0208_ucs_table[s]) { + continue; + } + } else { + if (s < jisx0212_ucs_table_size && jisx0212_ucs_table[s]) { + continue; + } + } + return false; + } else { + return false; + } + } else if (c < 0x80) { + continue; + } else if (c >= 0xA1 && c <= 0xDF) { + /* GR-invoked Kana */ + continue; + } else { + return false; + } + } + + return state == ASCII; +} + +static bool mb_check_iso2022jp(unsigned char *in, size_t in_len) +{ + unsigned char *p = in, *e = p + in_len; + unsigned int state = ASCII; + + while (p < e) { + unsigned char c = *p++; + if (c == 0x1B) { + /* ESC seen; this is an escape sequence */ + if ((e - p) < 2) { + return false; + } + unsigned char c2 = *p++; + if (c2 == '$') { + unsigned char c3 = *p++; + if (c3 == '@' || c3 == 'B') { + state = JISX_0208; + } else { + return false; + } + } else if (c2 == '(') { + unsigned char c3 = *p++; + if (c3 == 'B') { + state = ASCII; + } else if (c3 == 'J') { + state = JISX_0201_LATIN; + } else { + return false; + } + } else { + return false; + } + } else if (c == 0xE || c == 0xF) { + /* "Kana In" or "Kana Out" marker; ISO-2022-JP is not accepted. */ + return false; + } else if (state == JISX_0208 && (c > 0x20 && c < 0x7F)) { + if (p == e) { + return false; + } + unsigned char c2 = *p++; + if (c2 > 0x20 && c2 < 0x7F) { + unsigned int s = (c - 0x21)*94 + c2 - 0x21; + if (s < jisx0208_ucs_table_size && jisx0208_ucs_table[s]) { + continue; + } + return false; + } else { + return false; + } + } else if (c < 0x80) { + continue; + } else { + return false; + } + } + + return state == ASCII; +} + +/* Unicode codepoints for emoji are above 0x1F000, but we only store 16-bits + * in our tables. Therefore, add 0x10000 to recover the true values. + * + * Again, for some emoji which are not supported by Unicode, we use codepoints + * in the Private Use Area above 0xFE000. Again, add 0xF0000 to recover the + * true value. */ +static inline int convert_emoji_cp(int cp) +{ + if (cp > 0xF000) + return cp + 0x10000; + else if (cp > 0xE000) + return cp + 0xF0000; + return cp; +} + +int mbfilter_sjis_emoji_kddi2unicode(int s, int *snd) +{ + if (s >= mb_tbl_code2uni_kddi1_min && s <= mb_tbl_code2uni_kddi1_max) { + if (s == 0x24C0) { /* Spain */ + EMIT_FLAG_EMOJI("ES"); + } else if (s == 0x24C1) { /* Russia */ + EMIT_FLAG_EMOJI("RU"); + } else if (s >= 0x2545 && s <= 0x254A) { + EMIT_FLAG_EMOJI(nflags_kddi[s - 0x2545]); + } else if (s == 0x25BC) { + EMIT_KEYPAD_EMOJI('#'); + } else { + *snd = 0; + return convert_emoji_cp(mb_tbl_code2uni_kddi1[s - mb_tbl_code2uni_kddi1_min]); + } + } else if (s >= mb_tbl_code2uni_kddi2_min && s <= mb_tbl_code2uni_kddi2_max) { + if (s == 0x2750) { /* Japan */ + EMIT_FLAG_EMOJI("JP"); + } else if (s >= 0x27A6 && s <= 0x27AE) { + EMIT_KEYPAD_EMOJI(s - 0x27A6 + '1'); + } else if (s == 0x27F7) { /* United States */ + EMIT_FLAG_EMOJI("US"); + } else if (s == 0x2830) { + EMIT_KEYPAD_EMOJI('0'); + } else { + *snd = 0; + return convert_emoji_cp(mb_tbl_code2uni_kddi2[s - mb_tbl_code2uni_kddi2_min]); + } + } + return 0; +} + +static int mbfl_filt_conv_2022jp_mobile_wchar(int c, mbfl_convert_filter *filter) +{ + int c1, s, w, snd = 0; + + switch (filter->status & 0xF) { + case 0: + if (c == 0x1B) { + filter->status += 2; + } else if (filter->status == JISX0201_KANA && c > 0x20 && c < 0x60) { + CK((*filter->output_function)(0xFF40 + c, filter->data)); + } else if (filter->status == JISX0208_KANJI && c > 0x20 && c < 0x80) { + filter->cache = c; + filter->status += 1; + } else if (c >= 0 && c < 0x80) { /* ASCII */ + CK((*filter->output_function)(c, filter->data)); + } else if (c > 0xA0 && c < 0xE0) { /* Kana */ + CK((*filter->output_function)(0xFEC0 + c, filter->data)); + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + /* JISX 0208, second byte */ + case 1: + w = 0; + filter->status &= ~0xF; + c1 = filter->cache; + if (c > 0x20 && c < 0x7F) { + s = ((c1 - 0x21) * 94) + c - 0x21; + + if (s <= 137) { + if (s == 31) { + w = 0xFF3C; /* FULLWIDTH REVERSE SOLIDUS */ + } else if (s == 32) { + w = 0xFF5E; /* FULLWIDTH TILDE */ + } else if (s == 33) { + w = 0x2225; /* PARALLEL TO */ + } else if (s == 60) { + w = 0xFF0D; /* FULLWIDTH HYPHEN-MINUS */ + } else if (s == 80) { + w = 0xFFE0; /* FULLWIDTH CENT SIGN */ + } else if (s == 81) { + w = 0xFFE1; /* FULLWIDTH POUND SIGN */ + } else if (s == 137) { + w = 0xFFE2; /* FULLWIDTH NOT SIGN */ + } + } + + if (s >= (84 * 94) && s < (91 * 94)) { + s += 22 * 94; + w = mbfilter_sjis_emoji_kddi2unicode(s, &snd); + if (w > 0 && snd > 0) { + (*filter->output_function)(snd, filter->data); + } + } + + if (w == 0) { + if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) { + w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min]; + } else if (s >= 0 && s < jisx0208_ucs_table_size) { + w = jisx0208_ucs_table[s]; + } + } + + if (w <= 0) { + w = MBFL_BAD_INPUT; + } + CK((*filter->output_function)(w, filter->data)); + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + /* ESC */ + case 2: + if (c == '$') { + filter->status++; + } else if (c == '(') { + filter->status += 3; + } else { + filter->status &= ~0xF; + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + /* ESC $ */ + case 3: + if (c == '@' || c == 'B') { + filter->status = JISX0208_KANJI; + } else if (c == '(') { + filter->status++; + } else { + filter->status &= ~0xF; + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + /* ESC $ ( */ + case 4: + if (c == '@' || c == 'B') { + filter->status = JISX0208_KANJI; + } else { + filter->status &= ~0xF; + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + /* ESC ( */ + case 5: + if (c == 'B' || c == 'J') { + filter->status = 0; /* ASCII mode */ + } else if (c == 'I') { + filter->status = JISX0201_KANA; + } else { + filter->status &= ~0xF; + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + } + + return 0; +} + +static int mbfl_filt_conv_2022jp_mobile_wchar_flush(mbfl_convert_filter *filter) +{ + if (filter->status & 0xF) { + (*filter->output_function)(MBFL_BAD_INPUT, filter->data); + } + filter->status = 0; + + if (filter->flush_function) { + (*filter->flush_function)(filter->data); + } + + return 0; +} + +static int mbfilter_unicode2sjis_emoji_kddi(int c, int *s1, mbfl_convert_filter *filter) +{ + if ((filter->status & 0xF) == 1) { + int c1 = filter->cache; + filter->cache = 0; + filter->status &= ~0xFF; + if (c == 0x20E3) { + if (c1 == '#') { + *s1 = 0x25BC; + } else if (c1 == '0') { + *s1 = 0x2830; + } else { /* Previous character was '1'-'9' */ + *s1 = 0x27A6 + (c1 - '1'); + } + return 1; + } else { + if (filter->status & 0xFF00) { + CK((*filter->output_function)(0x1B, filter->data)); /* ESC */ + CK((*filter->output_function)('(', filter->data)); + CK((*filter->output_function)('B', filter->data)); + } + CK((*filter->output_function)(c1, filter->data)); + filter->status = 0; + } + } + + if (c == '#' || (c >= '0' && c <= '9')) { + filter->status |= 1; + filter->cache = c; + return 0; + } + + if (c == 0xA9) { /* Copyright sign */ + *s1 = 0x27DC; + return 1; + } else if (c == 0xAE) { /* Registered sign */ + *s1 = 0x27DD; + return 1; + } else if (c >= mb_tbl_uni_kddi2code2_min && c <= mb_tbl_uni_kddi2code2_max) { + int i = mbfl_bisec_srch2(c, mb_tbl_uni_kddi2code2_key, mb_tbl_uni_kddi2code2_len); + if (i >= 0) { + *s1 = mb_tbl_uni_kddi2code2_value[i]; + return 1; + } + } else if (c >= mb_tbl_uni_kddi2code3_min && c <= mb_tbl_uni_kddi2code3_max) { + int i = mbfl_bisec_srch2(c - 0x10000, mb_tbl_uni_kddi2code3_key, mb_tbl_uni_kddi2code3_len); + if (i >= 0) { + *s1 = mb_tbl_uni_kddi2code3_value[i]; + return 1; + } + } else if (c >= mb_tbl_uni_kddi2code5_min && c <= mb_tbl_uni_kddi2code5_max) { + int i = mbfl_bisec_srch2(c - 0xF0000, mb_tbl_uni_kddi2code5_key, mb_tbl_uni_kddi2code5_len); + if (i >= 0) { + *s1 = mb_tbl_uni_kddi2code5_val[i]; + return 1; + } + } + return 0; +} + +/* (ku*94)+ten value -> Shift-JIS byte sequence */ +#define CODE2JIS(c1,c2,s1,s2) \ + c1 = (s1)/94+0x21; \ + c2 = (s1)-94*((c1)-0x21)+0x21; \ + s1 = ((c1) << 8) | (c2); \ + s2 = 1 + +static int mbfl_filt_conv_wchar_2022jp_mobile(int c, mbfl_convert_filter *filter) +{ + int c1, c2, s1 = 0, s2 = 0; + + if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) { + s1 = ucs_a1_jis_table[c - ucs_a1_jis_table_min]; + } else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) { + s1 = ucs_a2_jis_table[c - ucs_a2_jis_table_min]; + } else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) { + s1 = ucs_i_jis_table[c - ucs_i_jis_table_min]; + } else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) { + s1 = ucs_r_jis_table[c - ucs_r_jis_table_min]; + } + + if (s1 <= 0) { + if (c == 0xA5) { /* YEN SIGN */ + s1 = 0x216F; /* FULLWIDTH YEN SIGN */ + } else if (c == 0xFF3C) { /* FULLWIDTH REVERSE SOLIDUS */ + s1 = 0x2140; + } else if (c == 0x2225) { /* PARALLEL TO */ + s1 = 0x2142; + } else if (c == 0xFF0D) { /* FULLWIDTH HYPHEN-MINUS */ + s1 = 0x215d; + } else if (c == 0xFFE0) { /* FULLWIDTH CENT SIGN */ + s1 = 0x2171; + } else if (c == 0xFFE1) { /* FULLWIDTH POUND SIGN */ + s1 = 0x2172; + } else if (c == 0xFFE2) { /* FULLWIDTH NOT SIGN */ + s1 = 0x224c; + } + } + + if (mbfilter_unicode2sjis_emoji_kddi(c, &s1, filter) > 0) { + /* A KDDI emoji was detected and stored in s1 */ + CODE2JIS(c1,c2,s1,s2); + s1 -= 0x1600; + } else if ((filter->status & 0xFF) == 1 && filter->cache) { + /* We are just processing one of KDDI's special emoji for a phone keypad button */ + return 0; + } + + if ((s1 <= 0) || (s1 >= 0xa1a1 && s2 == 0)) { /* not found or X 0212 */ + s1 = -1; + for (c1 = 0; c1 < cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; c1++) { + if (c == cp932ext1_ucs_table[c1]) { + s1 = (((c1 / 94) + 0x2D) << 8) + (c1 % 94) + 0x21; + break; + } + } + + if (c == 0) { + s1 = 0; + } + } + + if (s1 >= 0) { + if (s1 < 0x80) { /* ASCII */ + if (filter->status & 0xFF00) { + CK((*filter->output_function)(0x1B, filter->data)); /* ESC */ + CK((*filter->output_function)('(', filter->data)); + CK((*filter->output_function)('B', filter->data)); + } + CK((*filter->output_function)(s1, filter->data)); + filter->status = 0; + } else if (s1 > 0xA0 && s1 < 0xE0) { /* Kana */ + if ((filter->status & 0xFF00) != 0x100) { + CK((*filter->output_function)(0x1B, filter->data)); /* ESC */ + CK((*filter->output_function)('(', filter->data)); + CK((*filter->output_function)('I', filter->data)); + } + filter->status = 0x100; + CK((*filter->output_function)(s1 & 0x7F, filter->data)); + } else if (s1 < 0x7E7F) { /* JIS X 0208 */ + if ((filter->status & 0xFF00) != 0x200) { + CK((*filter->output_function)(0x1B, filter->data)); /* ESC */ + CK((*filter->output_function)('$', filter->data)); + CK((*filter->output_function)('B', filter->data)); + } + filter->status = 0x200; + CK((*filter->output_function)((s1 >> 8) & 0xFF, filter->data)); + CK((*filter->output_function)(s1 & 0x7F, filter->data)); + } + } else { + CK(mbfl_filt_conv_illegal_output(c, filter)); + } + + return 0; +} + +static int mbfl_filt_conv_wchar_2022jp_mobile_flush(mbfl_convert_filter *filter) +{ + /* Go back to ASCII mode (so strings can be safely concatenated) */ + if (filter->status & 0xFF00) { + (*filter->output_function)(0x1B, filter->data); /* ESC */ + (*filter->output_function)('(', filter->data); + (*filter->output_function)('B', filter->data); + } + + int c1 = filter->cache; + if ((filter->status & 0xFF) == 1 && (c1 == '#' || (c1 >= '0' && c1 <= '9'))) { + (*filter->output_function)(c1, filter->data); + } + filter->status = filter->cache = 0; + + if (filter->flush_function) { + (*filter->flush_function)(filter->data); + } + + return 0; +} + +static size_t mb_iso2022jp_kddi_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) +{ + unsigned char *p = *in, *e = p + *in_len; + uint32_t *out = buf, *limit = buf + bufsize - 1; + + while (p < e && out < limit) { + unsigned char c = *p++; + + if (c == 0x1B) { + if ((e - p) < 2) { + p = e; + *out++ = MBFL_BAD_INPUT; + break; + } + unsigned char c2 = *p++; + unsigned char c3 = *p++; + + if (c2 == '$') { + if (c3 == '@' || c3 == 'B') { + *state = JISX0208_KANJI; + } else if (c3 == '(') { + if (p == e) { + *out++ = MBFL_BAD_INPUT; + break; + } + unsigned char c4 = *p++; + + if (c4 == '@' || c4 == 'B') { + *state = JISX0208_KANJI; + } else { + *out++ = MBFL_BAD_INPUT; + } + } else { + *out++ = MBFL_BAD_INPUT; + } + } else if (c2 == '(') { + if (c3 == 'B' || c3 == 'J') { + *state = ASCII; + } else if (c3 == 'I') { + *state = JISX0201_KANA; + } else { + *out++ = MBFL_BAD_INPUT; + } + } else { + p--; + *out++ = MBFL_BAD_INPUT; + } + } else if (*state == JISX0201_KANA && c >= 0x21 && c <= 0x5F) { + *out++ = 0xFF40 + c; + } else if (*state == JISX0208_KANJI && c >= 0x21 && c <= 0x7F) { + if (p == e) { + *out++ = MBFL_BAD_INPUT; + break; + } + unsigned char c2 = *p++; + + if (c2 >= 0x21 && c2 <= 0x7E) { + unsigned int s = ((c - 0x21) * 94) + c2 - 0x21; + uint32_t w = 0; + + if (s <= 137) { + if (s == 31) { + w = 0xFF3C; /* FULLWIDTH REVERSE SOLIDUS */ + } else if (s == 32) { + w = 0xFF5E; /* FULLWIDTH TILDE */ + } else if (s == 33) { + w = 0x2225; /* PARALLEL TO */ + } else if (s == 60) { + w = 0xFF0D; /* FULLWIDTH HYPHEN-MINUS */ + } else if (s == 80) { + w = 0xFFE0; /* FULLWIDTH CENT SIGN */ + } else if (s == 81) { + w = 0xFFE1; /* FULLWIDTH POUND SIGN */ + } else if (s == 137) { + w = 0xFFE2; /* FULLWIDTH NOT SIGN */ + } + } + + if (s >= (84 * 94) && s < (91 * 94)) { + int snd = 0; + s += 22 * 94; + w = mbfilter_sjis_emoji_kddi2unicode(s, &snd); + if (w && snd) { + *out++ = snd; + } + } + + if (!w) { + if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) { + w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min]; + } else if (s < jisx0208_ucs_table_size) { + w = jisx0208_ucs_table[s]; + } + } + + *out++ = w ? w : MBFL_BAD_INPUT; + } else { + *out++ = MBFL_BAD_INPUT; + } + } else if (c <= 0x7F) { + *out++ = c; + } else if (c >= 0xA1 && c <= 0xDF) { + *out++ = 0xFEC0 + c; + } else { + *out++ = MBFL_BAD_INPUT; + } + } + + *in_len = e - p; + *in = p; + return out - buf; +} + +static void mb_wchar_to_iso2022jp_kddi(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) +{ + unsigned char *out, *limit; + MB_CONVERT_BUF_LOAD(buf, out, limit); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len); + + while (len--) { + uint32_t w = *in++; + unsigned int s = 0; + + if (w >= ucs_a1_jis_table_min && w < ucs_a1_jis_table_max) { + s = ucs_a1_jis_table[w - ucs_a1_jis_table_min]; + } else if (w >= ucs_a2_jis_table_min && w < ucs_a2_jis_table_max) { + s = ucs_a2_jis_table[w - ucs_a2_jis_table_min]; + } else if (w >= ucs_i_jis_table_min && w < ucs_i_jis_table_max) { + s = ucs_i_jis_table[w - ucs_i_jis_table_min]; + } else if (w >= ucs_r_jis_table_min && w < ucs_r_jis_table_max) { + s = ucs_r_jis_table[w - ucs_r_jis_table_min]; + } + + if (!s) { + if (w == 0xA5) { /* YEN SIGN */ + s = 0x216F; /* FULLWIDTH YEN SIGN */ + } else if (w == 0xFF3C) { /* FULLWIDTH REVERSE SOLIDUS */ + s = 0x2140; + } else if (w == 0x2225) { /* PARALLEL TO */ + s = 0x2142; + } else if (w == 0xFF0D) { /* FULLWIDTH HYPHEN-MINUS */ + s = 0x215D; + } else if (w == 0xFFE0) { /* FULLWIDTH CENT SIGN */ + s = 0x2171; + } else if (w == 0xFFE1) { /* FULLWIDTH POUND SIGN */ + s = 0x2172; + } else if (w == 0xFFE2) { /* FULLWIDTH NOT SIGN */ + s = 0x224C; + } + } + + if ((w == '#' || (w >= '0' && w <= '9')) && len) { + uint32_t w2 = *in++; len--; + + if (w2 == 0x20E3) { + unsigned int s1 = 0; + if (w == '#') { + s1 = 0x25BC; + } else if (w == '0') { + s1 = 0x2830; + } else { /* Previous character was '1'-'9' */ + s1 = 0x27A6 + (w - '1'); + } + s = (((s1 / 94) + 0x21) << 8) + ((s1 % 94) + 0x21) - 0x1600; + } else { + in--; len++; + } + } else if (w >= NFLAGS('C') && w <= NFLAGS('U') && len) { /* C for CN, U for US */ + uint32_t w2 = *in++; len--; + + if (w2 >= NFLAGS('B') && w2 <= NFLAGS('U')) { /* B for GB, U for RU */ + for (int i = 0; i < 10; i++) { + if (w == NFLAGS(nflags_s[i][0]) && w2 == NFLAGS(nflags_s[i][1])) { + unsigned int s1 = nflags_code_kddi[i]; + s = (((s1 / 94) + 0x21) << 8) + ((s1 % 94) + 0x21) - 0x1600; + goto found_flag_emoji; + } + } + } + + in--; len++; +found_flag_emoji: ; + } + + if (w == 0xA9) { /* Copyright sign */ + unsigned int s1 = 0x27DC; + s = (((s1 / 94) + 0x21) << 8) + ((s1 % 94) + 0x21) - 0x1600; + } else if (w == 0xAE) { /* Registered sign */ + unsigned int s1 = 0x27DD; + s = (((s1 / 94) + 0x21) << 8) + ((s1 % 94) + 0x21) - 0x1600; + } else if (w >= mb_tbl_uni_kddi2code2_min && w <= mb_tbl_uni_kddi2code2_max) { + int i = mbfl_bisec_srch2(w, mb_tbl_uni_kddi2code2_key, mb_tbl_uni_kddi2code2_len); + if (i >= 0) { + unsigned int s1 = mb_tbl_uni_kddi2code2_value[i]; + s = (((s1 / 94) + 0x21) << 8) + ((s1 % 94) + 0x21) - 0x1600; + } + } else if (w >= mb_tbl_uni_kddi2code3_min && w <= mb_tbl_uni_kddi2code3_max) { + int i = mbfl_bisec_srch2(w - 0x10000, mb_tbl_uni_kddi2code3_key, mb_tbl_uni_kddi2code3_len); + if (i >= 0) { + unsigned int s1 = mb_tbl_uni_kddi2code3_value[i]; + s = (((s1 / 94) + 0x21) << 8) + ((s1 % 94) + 0x21) - 0x1600; + } + } else if (w >= mb_tbl_uni_kddi2code5_min && w <= mb_tbl_uni_kddi2code5_max) { + int i = mbfl_bisec_srch2(w - 0xF0000, mb_tbl_uni_kddi2code5_key, mb_tbl_uni_kddi2code5_len); + if (i >= 0) { + unsigned int s1 = mb_tbl_uni_kddi2code5_val[i]; + s = (((s1 / 94) + 0x21) << 8) + ((s1 % 94) + 0x21) - 0x1600; + } + } + + if (!s || s >= 0xA1A1) { + s = 0; + for (int i = 0; i < cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; i++) { + if (w == cp932ext1_ucs_table[i]) { + s = (((i / 94) + 0x2D) << 8) + (i % 94) + 0x21; + break; + } + } + if (w == 0) + s = 0; + } + + if (!s && w) { + MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_iso2022jp_kddi); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len); + } else if (s <= 0x7F) { + if (buf->state != ASCII) { + MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 4); + out = mb_convert_buf_add3(out, 0x1B, '(', 'B'); + buf->state = ASCII; + } + out = mb_convert_buf_add(out, s); + } else if (s >= 0xA1 && s <= 0xDF) { + if (buf->state != JISX0201_KANA) { + MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 4); + out = mb_convert_buf_add3(out, 0x1B, '(', 'I'); + buf->state = JISX0201_KANA; + } + out = mb_convert_buf_add(out, s & 0x7F); + } else if (s <= 0x7E7E) { + if (buf->state != JISX0208_KANJI) { + MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 5); + out = mb_convert_buf_add3(out, 0x1B, '$', 'B'); + buf->state = JISX0208_KANJI; + } else { + MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2); + } + out = mb_convert_buf_add2(out, (s >> 8) & 0xFF, s & 0xFF); + } else { + MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_iso2022jp_kddi); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len); + } + } + + if (end && buf->state != ASCII) { + MB_CONVERT_BUF_ENSURE(buf, out, limit, 3); + out = mb_convert_buf_add3(out, 0x1B, '(', 'B'); + } + + MB_CONVERT_BUF_STORE(buf, out, limit); +} + +static int mbfl_filt_conv_jis2004_wchar(int c, mbfl_convert_filter *filter) +{ + int k; + int c1, c2, s, s1 = 0, s2 = 0, w = 0, w1; + + switch (filter->status & 0xf) { + case 0: + if (c >= 0 && c < 0x80) { /* latin */ + if (filter->from->no_encoding == mbfl_no_encoding_eucjp2004) { + CK((*filter->output_function)(c, filter->data)); + } else if (filter->from->no_encoding == mbfl_no_encoding_sjis2004) { + if (c == 0x5c) { + CK((*filter->output_function)(0x00a5, filter->data)); + } else if (c == 0x7e) { + CK((*filter->output_function)(0x203e, filter->data)); + } else { + CK((*filter->output_function)(c, filter->data)); + } + } else { /* ISO-2022-JP-2004 */ + if (c == 0x1b) { + filter->status += 6; + } else if ((filter->status == 0x80 || filter->status == 0x90 || filter->status == 0xa0) + && c > 0x20 && c < 0x7f) { /* kanji first char */ + filter->cache = c; + if (filter->status == 0x90) { + filter->status += 1; /* JIS X 0213 plane 1 */ + } else if (filter->status == 0xa0) { + filter->status += 4; /* JIS X 0213 plane 2 */ + } else { + filter->status += 5; /* JIS X 0208 */ + } + } else { + CK((*filter->output_function)(c, filter->data)); + } + } + } else { + if (filter->from->no_encoding == mbfl_no_encoding_eucjp2004) { + if (c > 0xa0 && c < 0xff) { /* X 0213 plane 1 first char */ + filter->status = 1; + filter->cache = c; + } else if (c == 0x8e) { /* kana first char */ + filter->cache = 0x8E; /* So error will be reported if input is truncated right here */ + filter->status = 2; + } else if (c == 0x8f) { /* X 0213 plane 2 first char */ + filter->status = 3; + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + } else if (filter->from->no_encoding == mbfl_no_encoding_sjis2004) { + if (c > 0xa0 && c < 0xe0) { /* kana */ + CK((*filter->output_function)(0xfec0 + c, filter->data)); + } else if (c > 0x80 && c < 0xfd && c != 0xa0) { /* kanji first char */ + filter->status = 1; + filter->cache = c; + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + } + break; + + case 1: /* kanji second char */ + filter->status &= ~0xf; + c1 = filter->cache; + + if (filter->from->no_encoding == mbfl_no_encoding_eucjp2004) { + if (c > 0xa0 && c < 0xff) { + s1 = c1 - 0x80; + s2 = c - 0x80; + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + break; + } + } else if (filter->from->no_encoding == mbfl_no_encoding_sjis2004) { + if (c >= 0x40 && c <= 0xfc && c != 0x7f) { + SJIS_DECODE(c1, c, s1, s2); + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + break; + } + } else { /* ISO-2022-JP-2004 */ + if (c >= 0x21 && c <= 0x7E) { + s1 = c1; + s2 = c; + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + break; + } + } + w1 = (s1 << 8) | s2; + + /* conversion for combining characters */ + if ((w1 >= 0x2477 && w1 <= 0x2479) || (w1 >= 0x2479 && w1 <= 0x247B) || + (w1 >= 0x2577 && w1 <= 0x257E) || w1 == 0x2678 || w1 == 0x2B44 || + (w1 >= 0x2B48 && w1 <= 0x2B4F) || (w1 >= 0x2B65 && w1 <= 0x2B66)) { + k = mbfl_bisec_srch2(w1, jisx0213_u2_key, jisx0213_u2_tbl_len); + if (k >= 0) { + w = jisx0213_u2_tbl[2*k]; + CK((*filter->output_function)(w, filter->data)); + w = jisx0213_u2_tbl[2*k+1]; + } + } + + /* conversion for BMP */ + if (w <= 0) { + w1 = (s1 - 0x21)*94 + s2 - 0x21; + if (w1 >= 0 && w1 < jisx0213_ucs_table_size) { + w = jisx0213_ucs_table[w1]; + } + } + + /* conversion for CJK Unified Ideographs ext.B (U+2XXXX) */ + if (w <= 0) { + k = mbfl_bisec_srch2(w1, jisx0213_jis_u5_key, jisx0213_u5_tbl_len); + if (k >= 0) { + w = jisx0213_jis_u5_tbl[k] + 0x20000; + } + } + + if (w <= 0) { + w = MBFL_BAD_INPUT; + } + CK((*filter->output_function)(w, filter->data)); + break; + + case 2: /* got 0x8e: EUC-JP-2004 kana */ + filter->status = 0; + if (c > 0xa0 && c < 0xe0) { + w = 0xfec0 + c; + CK((*filter->output_function)(w, filter->data)); + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + case 3: /* X 0213 plane 2 first char: EUC-JP-2004 (0x8f) */ + if (c == 0xA1 || (c >= 0xA3 && c <= 0xA5) || c == 0xA8 || (c >= 0xAC && c <= 0xAF) || (c >= 0xEE && c <= 0xFE)) { + filter->cache = c - 0x80; + filter->status++; + } else { + filter->status = 0; + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + case 4: /* X 0213 plane 2 second char: EUC-JP-2004, ISO-2022-JP-2004 */ + filter->status &= ~0xF; + c1 = filter->cache; + if (filter->from->no_encoding == mbfl_no_encoding_eucjp2004) { + c2 = c - 0x80; + } else { + c2 = c; + } + + if (c2 < 0x21 || c2 > 0x7E) { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + break; + } + + s1 = c1 - 0x21; + s2 = c2 - 0x21; + + if (((s1 >= 0 && s1 <= 4 && s1 != 1) || s1 == 7 || (s1 >= 11 && s1 <= 14) || + (s1 >= 77 && s1 < 94)) && s2 >= 0 && s2 < 94) { + /* calc offset from ku */ + for (k = 0; k < jisx0213_p2_ofst_len; k++) { + if (s1 == jisx0213_p2_ofst[k]) { + break; + } + } + k -= jisx0213_p2_ofst[k]; + + /* check for japanese chars in BMP */ + s = (s1 + 94 + k)*94 + s2; + ZEND_ASSERT(s < jisx0213_ucs_table_size); + w = jisx0213_ucs_table[s]; + + /* check for japanese chars in CJK Unified Ideographs ext.B (U+2XXXX) */ + if (w <= 0) { + k = mbfl_bisec_srch2(s, jisx0213_jis_u5_key, jisx0213_u5_tbl_len); + if (k >= 0) { + w = jisx0213_jis_u5_tbl[k] + 0x20000; + } + } + + if (w <= 0) { + w = MBFL_BAD_INPUT; + } + + CK((*filter->output_function)(w, filter->data)); + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + case 5: /* X 0208: ISO-2022-JP-2004 */ + filter->status &= ~0xf; + c1 = filter->cache; + if (c > 0x20 && c < 0x7f) { + s = (c1 - 0x21)*94 + c - 0x21; + if (s >= 0 && s < jisx0208_ucs_table_size) { + w = jisx0208_ucs_table[s]; + } + } + + if (w <= 0) { + w = MBFL_BAD_INPUT; + } + + CK((*filter->output_function)(w, filter->data)); + break; + + /* ESC: ISO-2022-JP-2004 */ +/* case 0x06: */ +/* case 0x16: */ +/* case 0x26: */ +/* case 0x86: */ +/* case 0x96: */ +/* case 0xa6: */ + case 6: + if (c == '$') { + filter->status++; + } else if (c == '(') { + filter->status += 3; + } else { + filter->status &= ~0xf; + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + /* ESC $: ISO-2022-JP-2004 */ +/* case 0x07: */ +/* case 0x17: */ +/* case 0x27: */ +/* case 0x87: */ +/* case 0x97: */ +/* case 0xa7: */ + case 7: + if (c == 'B') { /* JIS X 0208-1983 */ + filter->status = 0x80; + } else if (c == '(') { + filter->status++; + } else { + filter->status &= ~0xf; + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + /* ESC $ (: ISO-2022-JP-2004 */ +/* case 0x08: */ +/* case 0x18: */ +/* case 0x28: */ +/* case 0x88: */ +/* case 0x98: */ +/* case 0xa8: */ + case 8: + if (c == 'Q') { /* JIS X 0213 plane 1 */ + filter->status = 0x90; + } else if (c == 'P') { /* JIS X 0213 plane 2 */ + filter->status = 0xa0; + } else { + filter->status &= ~0xf; + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + /* ESC (: ISO-2022-JP-2004 */ +/* case 0x09: */ +/* case 0x19: */ +/* case 0x29: */ +/* case 0x89: */ +/* case 0x99: */ + case 9: + if (c == 'B') { + filter->status = 0; + } else { + filter->status &= ~0xf; + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + EMPTY_SWITCH_DEFAULT_CASE(); + } + + return 0; +} + +static int mbfl_filt_conv_jis2004_wchar_flush(mbfl_convert_filter *filter) +{ + if (filter->status & 0xF) { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + filter->status = 0; + + if (filter->flush_function) { + return (*filter->flush_function)(filter->data); + } + + return 0; +} + +static int mbfl_filt_conv_wchar_jis2004(int c, mbfl_convert_filter *filter) +{ + int k; + int c1, c2, s1, s2; + +retry: + s1 = 0; + /* check for 1st char of combining characters */ + if ((filter->status & 0xf) == 0 && ( + c == 0x00E6 || + (c >= 0x0254 && c <= 0x02E9) || + (c >= 0x304B && c <= 0x3053) || + (c >= 0x30AB && c <= 0x30C8) || + c == 0x31F7)) { + for (k = 0; k < jisx0213_u2_tbl_len; k++) { + if (c == jisx0213_u2_tbl[2*k]) { + filter->status++; + filter->cache = k; + return 0; + } + } + } + + /* check for 2nd char of combining characters */ + if ((filter->status & 0xf) == 1 && filter->cache >= 0 && filter->cache < jisx0213_u2_tbl_len) { + k = filter->cache; + filter->status &= ~0xf; + filter->cache = 0; + + c1 = jisx0213_u2_tbl[2*k]; + if ((c1 == 0x0254 || c1 == 0x028C || c1 == 0x0259 || c1 == 0x025A) && c == 0x0301) { + k++; + } + if (c == jisx0213_u2_tbl[2*k+1]) { + s1 = jisx0213_u2_key[k]; + } else { /* fallback */ + s1 = jisx0213_u2_fb_tbl[k]; + + if (filter->to->no_encoding == mbfl_no_encoding_sjis2004) { + c1 = (s1 >> 8) & 0xff; + c2 = s1 & 0xff; + SJIS_ENCODE(c1, c2, s1, s2); + } else if (filter->to->no_encoding == mbfl_no_encoding_eucjp2004) { + s2 = (s1 & 0xff) + 0x80; + s1 = ((s1 >> 8) & 0xff) + 0x80; + } else { + if (filter->status != 0x200) { + CK((*filter->output_function)(0x1b, filter->data)); + CK((*filter->output_function)('$', filter->data)); + CK((*filter->output_function)('(', filter->data)); + CK((*filter->output_function)('Q', filter->data)); + } + filter->status = 0x200; + + s2 = s1 & 0x7f; + s1 = (s1 >> 8) & 0x7f; + } + + /* Flush out cached data */ + CK((*filter->output_function)(s1, filter->data)); + CK((*filter->output_function)(s2, filter->data)); + goto retry; + } + } + + /* check for major japanese chars: U+4E00 - U+9FFF */ + if (s1 <= 0) { + for (k = 0; k < uni2jis_tbl_len; k++) { + if (c >= uni2jis_tbl_range[k][0] && c <= uni2jis_tbl_range[k][1]) { + s1 = uni2jis_tbl[k][c-uni2jis_tbl_range[k][0]]; + break; + } + } + } + + /* check for japanese chars in compressed mapping area: U+1E00 - U+4DBF */ + if (s1 <= 0 && c >= ucs_c1_jisx0213_min && c <= ucs_c1_jisx0213_max) { + k = mbfl_bisec_srch(c, ucs_c1_jisx0213_tbl, ucs_c1_jisx0213_tbl_len); + if (k >= 0) { + s1 = ucs_c1_jisx0213_ofst[k] + c - ucs_c1_jisx0213_tbl[2*k]; + } + } + + /* check for japanese chars in CJK Unified Ideographs ext.B (U+2XXXX) */ + if (s1 <= 0 && c >= jisx0213_u5_tbl_min && c <= jisx0213_u5_tbl_max) { + k = mbfl_bisec_srch2(c - 0x20000, jisx0213_u5_jis_key, jisx0213_u5_tbl_len); + if (k >= 0) { + s1 = jisx0213_u5_jis_tbl[k]; + } + } + + if (s1 <= 0) { + /* CJK Compatibility Forms: U+FE30 - U+FE4F */ + if (c == 0xfe45) { + s1 = 0x233e; + } else if (c == 0xfe46) { + s1 = 0x233d; + } else if (c >= 0xf91d && c <= 0xf9dc) { + /* CJK Compatibility Ideographs: U+F900 - U+F92A */ + k = mbfl_bisec_srch2(c, ucs_r2b_jisx0213_cmap_key, ucs_r2b_jisx0213_cmap_len); + if (k >= 0) { + s1 = ucs_r2b_jisx0213_cmap_val[k]; + } + } + } + + if (s1 <= 0) { + if (c == 0) { + s1 = 0; + } else { + s1 = -1; + } + } + + if (s1 >= 0) { + if (s1 < 0x80) { /* ASCII */ + if (filter->to->no_encoding == mbfl_no_encoding_2022jp_2004 && (filter->status & 0xff00)) { + CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ + CK((*filter->output_function)('(', filter->data)); + CK((*filter->output_function)('B', filter->data)); + } + filter->status = 0; + CK((*filter->output_function)(s1, filter->data)); + } else if (s1 < 0x100) { /* latin or kana */ + if (filter->to->no_encoding == mbfl_no_encoding_eucjp2004) { + CK((*filter->output_function)(0x8e, filter->data)); + CK((*filter->output_function)(s1, filter->data)); + } else if (filter->to->no_encoding == mbfl_no_encoding_sjis2004 && (s1 >= 0xA1 && s1 <= 0xDF)) { + CK((*filter->output_function)(s1, filter->data)); + } else { + CK(mbfl_filt_conv_illegal_output(c, filter)); + } + } else if (s1 < 0x7f00) { /* X 0213 plane 1 */ + if (filter->to->no_encoding == mbfl_no_encoding_sjis2004) { + c1 = (s1 >> 8) & 0xff; + c2 = s1 & 0xff; + SJIS_ENCODE(c1, c2, s1, s2); + } else if (filter->to->no_encoding == mbfl_no_encoding_eucjp2004) { + s2 = (s1 & 0xff) + 0x80; + s1 = ((s1 >> 8) & 0xff) + 0x80; + } else { + if ((filter->status & 0xff00) != 0x200) { + CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ + CK((*filter->output_function)('$', filter->data)); + CK((*filter->output_function)('(', filter->data)); + CK((*filter->output_function)('Q', filter->data)); + } + filter->status = 0x200; + s2 = s1 & 0xff; + s1 = (s1 >> 8) & 0xff; + } + CK((*filter->output_function)(s1, filter->data)); + CK((*filter->output_function)(s2, filter->data)); + } else { /* X 0213 plane 2 */ + if (filter->to->no_encoding == mbfl_no_encoding_sjis2004) { + c1 = (s1 >> 8) & 0xff; + c2 = s1 & 0xff; + SJIS_ENCODE(c1, c2, s1, s2); + } else { + s2 = s1 & 0xff; + k = ((s1 >> 8) & 0xff) - 0x7f; + if (k >= 0 && k < jisx0213_p2_ofst_len) { + s1 = jisx0213_p2_ofst[k] + 0x21; + } + if (filter->to->no_encoding == mbfl_no_encoding_eucjp2004) { + s2 |= 0x80; + s1 |= 0x80; + CK((*filter->output_function)(0x8f, filter->data)); + } else { + if ((filter->status & 0xff00) != 0x200) { + CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ + CK((*filter->output_function)('$', filter->data)); + CK((*filter->output_function)('(', filter->data)); + CK((*filter->output_function)('P', filter->data)); + } + filter->status = 0x200; + } + } + + CK((*filter->output_function)(s1, filter->data)); + CK((*filter->output_function)(s2, filter->data)); + } + } else { + CK(mbfl_filt_conv_illegal_output(c, filter)); + } + + return 0; +} + +static int mbfl_filt_conv_wchar_jis2004_flush(mbfl_convert_filter *filter) +{ + int k, c1, c2, s1, s2; + + k = filter->cache; + filter->cache = 0; + + if (filter->status == 1 && k >= 0 && k <= jisx0213_u2_tbl_len) { + s1 = jisx0213_u2_fb_tbl[k]; + + if (filter->to->no_encoding == mbfl_no_encoding_sjis2004) { + c1 = (s1 >> 8) & 0xff; + c2 = s1 & 0xff; + SJIS_ENCODE(c1, c2, s1, s2); + } else if (filter->to->no_encoding == mbfl_no_encoding_eucjp2004) { + s2 = (s1 & 0xff) | 0x80; + s1 = ((s1 >> 8) & 0xff) | 0x80; + } else { + s2 = s1 & 0x7f; + s1 = (s1 >> 8) & 0x7f; + if ((filter->status & 0xff00) != 0x200) { + CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ + CK((*filter->output_function)('$', filter->data)); + CK((*filter->output_function)('(', filter->data)); + CK((*filter->output_function)('Q', filter->data)); + } + filter->status = 0x200; + } + + CK((*filter->output_function)(s1, filter->data)); + CK((*filter->output_function)(s2, filter->data)); + } + + /* If we had switched to a different charset, go back to ASCII mode + * This makes it possible to concatenate arbitrary valid strings + * together and get a valid string */ + if (filter->status & 0xff00) { + CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ + CK((*filter->output_function)('(', filter->data)); + CK((*filter->output_function)('B', filter->data)); + } + + filter->status = 0; + + if (filter->flush_function) { + return (*filter->flush_function)(filter->data); + } + + return 0; +} + +#define ASCII 0 +#define JISX0208 1 +#define JISX0213_PLANE1 2 +#define JISX0213_PLANE2 3 + +static size_t mb_iso2022jp2004_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) +{ + unsigned char *p = *in, *e = p + *in_len; + uint32_t *out = buf, *limit = buf + bufsize - 1; + + while (p < e && out < limit) { + unsigned char c = *p++; + + if (c <= 0x7F) { + if (c == 0x1B) { + if ((e - p) < 2) { + *out++ = MBFL_BAD_INPUT; + p = e; + break; + } + unsigned char c2 = *p++; + unsigned char c3 = *p++; + if (c2 == '$') { + if (c3 == 'B') { + *state = JISX0208; + } else if (c3 == '(') { + if (p == e) { + *out++ = MBFL_BAD_INPUT; + break; + } + unsigned char c4 = *p++; + if (c4 == 'Q') { + *state = JISX0213_PLANE1; + } else if (c4 == 'P') { + *state = JISX0213_PLANE2; + } else { + *out++ = MBFL_BAD_INPUT; + } + } else { + *out++ = MBFL_BAD_INPUT; + } + } else if (c2 == '(') { + if (c3 == 'B') { + *state = ASCII; + } else { + *out++ = MBFL_BAD_INPUT; + } + } else { + p--; + *out++ = MBFL_BAD_INPUT; + } + } else if (*state >= JISX0208 && c > 0x20 && c < 0x7F) { + if (p == e) { + *out++ = MBFL_BAD_INPUT; + break; + } + unsigned char c2 = *p++; + if (c2 < 0x21 || c2 > 0x7E) { + *out++ = MBFL_BAD_INPUT; + continue; + } + + if (*state == JISX0213_PLANE1) { + unsigned int w1 = (c << 8) | c2; + + /* Conversion for combining characters */ + if ((w1 >= 0x2477 && w1 <= 0x2479) || (w1 >= 0x2479 && w1 <= 0x247B) || (w1 >= 0x2577 && w1 <= 0x257E) || w1 == 0x2678 || w1 == 0x2B44 || (w1 >= 0x2B48 && w1 <= 0x2B4F) || (w1 >= 0x2B65 && w1 <= 0x2B66)) { + int k = mbfl_bisec_srch2(w1, jisx0213_u2_key, jisx0213_u2_tbl_len); + if (k >= 0) { + *out++ = jisx0213_u2_tbl[2*k]; + *out++ = jisx0213_u2_tbl[2*k+1]; + continue; + } + } + + /* Conversion for BMP */ + uint32_t w = 0; + w1 = (c - 0x21)*94 + c2 - 0x21; + if (w1 < jisx0213_ucs_table_size) { + w = jisx0213_ucs_table[w1]; + } + + /* Conversion for CJK Unified Ideographs ext.B (U+2XXXX) */ + if (!w) { + int k = mbfl_bisec_srch2(w1, jisx0213_jis_u5_key, jisx0213_u5_tbl_len); + if (k >= 0) { + w = jisx0213_jis_u5_tbl[k] + 0x20000; + } + } + + *out++ = w ? w : MBFL_BAD_INPUT; + } else if (*state == JISX0213_PLANE2) { + + unsigned int s1 = c - 0x21, s2 = c2 - 0x21; + + if (((s1 <= 4 && s1 != 1) || s1 == 7 || (s1 >= 11 && s1 <= 14) || (s1 >= 77 && s1 < 94)) && s2 < 94) { + int k; + for (k = 0; k < jisx0213_p2_ofst_len; k++) { + if (s1 == jisx0213_p2_ofst[k]) { + break; + } + } + k -= jisx0213_p2_ofst[k]; + + /* Check for Japanese chars in BMP */ + unsigned int s = (s1 + 94 + k)*94 + s2; + ZEND_ASSERT(s < jisx0213_ucs_table_size); + uint32_t w = jisx0213_ucs_table[s]; + + /* Check for Japanese chars in CJK Unified Ideographs ext B (U+2XXXX) */ + if (!w) { + k = mbfl_bisec_srch2(s, jisx0213_jis_u5_key, jisx0213_u5_tbl_len); + if (k >= 0) { + w = jisx0213_jis_u5_tbl[k] + 0x20000; + } + } + + *out++ = w ? w : MBFL_BAD_INPUT; + } else { + *out++ = MBFL_BAD_INPUT; + } + } else { /* state == JISX0208 */ + unsigned int s = (c - 0x21)*94 + c2 - 0x21; + uint32_t w = 0; + if (s < jisx0208_ucs_table_size) { + w = jisx0208_ucs_table[s]; + } + *out++ = w ? w : MBFL_BAD_INPUT; + } + } else { + *out++ = c; + } + } else { + *out++ = MBFL_BAD_INPUT; + } + } + + *in_len = e - p; + *in = p; + return out - buf; +} + +static void mb_wchar_to_iso2022jp2004(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) +{ + unsigned char *out, *limit; + MB_CONVERT_BUF_LOAD(buf, out, limit); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len); + + uint32_t w; + if (buf->state & 0xFF00) { + int k = (buf->state >> 8) - 1; + w = jisx0213_u2_tbl[2*k]; + buf->state &= 0xFF; + goto process_codepoint; + } + + while (len--) { + w = *in++; +process_codepoint: ; + unsigned int s = 0; + + if (w == 0xE6 || (w >= 0x254 && w <= 0x2E9) || (w >= 0x304B && w <= 0x3053) || (w >= 0x30AB && w <= 0x30C8) || w == 0x31F7) { + for (int k = 0; k < jisx0213_u2_tbl_len; k++) { + if (w == jisx0213_u2_tbl[2*k]) { + if (!len) { + if (!end) { + buf->state |= (k+1) << 8; + MB_CONVERT_BUF_STORE(buf, out, limit); + return; + } + } else { + uint32_t w2 = *in++; len--; + if ((w == 0x254 || w == 0x28C || w == 0x259 || w == 0x25A) && w2 == 0x301) { + k++; + } + if (w2 == jisx0213_u2_tbl[2*k+1]) { + s = jisx0213_u2_key[k]; + break; + } + in--; len++; + } + + s = jisx0213_u2_fb_tbl[k]; + break; + } + } + } + + /* Check for major Japanese chars: U+4E00-U+9FFF */ + if (!s) { + for (int k = 0; k < uni2jis_tbl_len; k++) { + if (w >= uni2jis_tbl_range[k][0] && w <= uni2jis_tbl_range[k][1]) { + s = uni2jis_tbl[k][w - uni2jis_tbl_range[k][0]]; + break; + } + } + } + + /* Check for Japanese chars in compressed mapping area: U+1E00-U+4DBF */ + if (!s && w >= ucs_c1_jisx0213_min && w <= ucs_c1_jisx0213_max) { + int k = mbfl_bisec_srch(w, ucs_c1_jisx0213_tbl, ucs_c1_jisx0213_tbl_len); + if (k >= 0) { + s = ucs_c1_jisx0213_ofst[k] + w - ucs_c1_jisx0213_tbl[2*k]; + } + } + + /* Check for Japanese chars in CJK Unified Ideographs ext B (U+2XXXX) */ + if (!s && w >= jisx0213_u5_tbl_min && w <= jisx0213_u5_tbl_max) { + int k = mbfl_bisec_srch2(w - 0x20000, jisx0213_u5_jis_key, jisx0213_u5_tbl_len); + if (k >= 0) { + s = jisx0213_u5_jis_tbl[k]; + } + } + + if (!s) { + /* CJK Compatibility Forms: U+FE30-U+FE4F */ + if (w == 0xFE45) { + s = 0x233E; + } else if (w == 0xFE46) { + s = 0x233D; + } else if (w >= 0xF91D && w <= 0xF9DC) { + /* CJK Compatibility Ideographs: U+F900-U+F92A */ + int k = mbfl_bisec_srch2(w, ucs_r2b_jisx0213_cmap_key, ucs_r2b_jisx0213_cmap_len); + if (k >= 0) { + s = ucs_r2b_jisx0213_cmap_val[k]; + } + } + } + + if (!s && w) { + MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_iso2022jp2004); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len); + } else if (s <= 0x7F) { + if (buf->state != ASCII) { + MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 4); + out = mb_convert_buf_add3(out, 0x1B, '(', 'B'); + buf->state = ASCII; + } + out = mb_convert_buf_add(out, s); + } else if (s <= 0xFF) { + MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_iso2022jp2004); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len); + } else if (s <= 0x7EFF) { + if (buf->state != JISX0213_PLANE1) { + MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 6); + out = mb_convert_buf_add4(out, 0x1B, '$', '(', 'Q'); + buf->state = JISX0213_PLANE1; + } else { + MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2); + } + out = mb_convert_buf_add2(out, (s >> 8) & 0xFF, s & 0xFF); + } else { + if (buf->state != JISX0213_PLANE2) { + MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 6); + out = mb_convert_buf_add4(out, 0x1B, '$', '(', 'P'); + buf->state = JISX0213_PLANE2; + } else { + MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2); + } + unsigned int s2 = s & 0xFF; + int k = ((s >> 8) & 0xFF) - 0x7F; + ZEND_ASSERT(k < jisx0213_p2_ofst_len); + s = jisx0213_p2_ofst[k] + 0x21; + out = mb_convert_buf_add2(out, s, s2); + } + } + + if (end && buf->state != ASCII) { + MB_CONVERT_BUF_ENSURE(buf, out, limit, 3); + out = mb_convert_buf_add3(out, 0x1B, '(', 'B'); + } + + MB_CONVERT_BUF_STORE(buf, out, limit); +} + +static int mbfl_filt_conv_cp5022x_wchar(int c, mbfl_convert_filter *filter) +{ + int c1, s, w; + +retry: + switch (filter->status & 0xf) { +/* case 0x00: ASCII */ +/* case 0x10: X 0201 latin */ +/* case 0x20: X 0201 kana */ +/* case 0x80: X 0208 */ +/* case 0x90: X 0212 */ + case 0: + if (c == 0x1b) { + filter->status += 2; + } else if (c == 0x0e) { /* "kana in" */ + filter->status = 0x20; + } else if (c == 0x0f) { /* "kana out" */ + filter->status = 0; + } else if (filter->status == 0x10 && c == 0x5c) { /* YEN SIGN */ + CK((*filter->output_function)(0xa5, filter->data)); + } else if (filter->status == 0x10 && c == 0x7e) { /* OVER LINE */ + CK((*filter->output_function)(0x203e, filter->data)); + } else if (filter->status == 0x20 && c > 0x20 && c < 0x60) { /* kana */ + CK((*filter->output_function)(0xff40 + c, filter->data)); + } else if ((filter->status == 0x80 || filter->status == 0x90) && c > 0x20 && c <= 0x97) { /* kanji first char */ + filter->cache = c; + filter->status += 1; + } else if (c >= 0 && c < 0x80) { /* latin, CTLs */ + CK((*filter->output_function)(c, filter->data)); + } else if (c > 0xa0 && c < 0xe0) { /* GR kana */ + CK((*filter->output_function)(0xfec0 + c, filter->data)); + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + +/* case 0x81: X 0208 second char */ +/* case 0x91: X 0212 second char */ + case 1: + filter->status &= ~0xf; + c1 = filter->cache; + if (c > 0x20 && c < 0x7f) { + s = (c1 - 0x21)*94 + c - 0x21; + if (filter->status == 0x80) { + if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) { + w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min]; + } else if (s >= 0 && s < jisx0208_ucs_table_size) { + w = jisx0208_ucs_table[s]; + } else if (s >= cp932ext2_ucs_table_min && s < cp932ext2_ucs_table_max) { + w = cp932ext2_ucs_table[s - cp932ext2_ucs_table_min]; + } else if (s >= cp932ext3_ucs_table_min && s < cp932ext3_ucs_table_max) { + w = cp932ext3_ucs_table[s - cp932ext3_ucs_table_min]; + } else if (s >= 94 * 94 && s < 114 * 94) { + /* user-defined => PUA (Microsoft extended) */ + w = s - 94*94 + 0xe000; + } else { + w = 0; + } + + if (w <= 0) { + w = MBFL_BAD_INPUT; + } + } else { + if (s >= 0 && s < jisx0212_ucs_table_size) { + w = jisx0212_ucs_table[s]; + } else { + w = 0; + } + + if (w <= 0) { + w = MBFL_BAD_INPUT; + } + } + CK((*filter->output_function)(w, filter->data)); + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + /* ESC */ +/* case 0x02: */ +/* case 0x12: */ +/* case 0x22: */ +/* case 0x82: */ +/* case 0x92: */ + case 2: + if (c == 0x24) { /* '$' */ + filter->status++; + } else if (c == 0x28) { /* '(' */ + filter->status += 3; + } else { + filter->status &= ~0xf; + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + goto retry; + } + break; + + /* ESC $ */ +/* case 0x03: */ +/* case 0x13: */ +/* case 0x23: */ +/* case 0x83: */ +/* case 0x93: */ + case 3: + if (c == 0x40 || c == 0x42) { /* '@' or 'B' */ + filter->status = 0x80; + } else if (c == 0x28) { /* '(' */ + filter->status++; + } else { + filter->status &= ~0xf; + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + CK((*filter->output_function)(0x24, filter->data)); + goto retry; + } + break; + + /* ESC $ ( */ +/* case 0x04: */ +/* case 0x14: */ +/* case 0x24: */ +/* case 0x84: */ +/* case 0x94: */ + case 4: + if (c == 0x40 || c == 0x42) { /* '@' or 'B' */ + filter->status = 0x80; + } else if (c == 0x44) { /* 'D' */ + filter->status = 0x90; + } else { + filter->status &= ~0xf; + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + CK((*filter->output_function)(0x24, filter->data)); + CK((*filter->output_function)(0x28, filter->data)); + goto retry; + } + break; + + /* ESC ( */ +/* case 0x05: */ +/* case 0x15: */ +/* case 0x25: */ +/* case 0x85: */ +/* case 0x95: */ + case 5: + if (c == 0x42 || c == 0x48) { /* 'B' or 'H' */ + filter->status = 0; + } else if (c == 0x4a) { /* 'J' */ + filter->status = 0x10; + } else if (c == 0x49) { /* 'I' */ + filter->status = 0x20; + } else { + filter->status &= ~0xf; + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + CK((*filter->output_function)(0x28, filter->data)); + goto retry; + } + break; + + EMPTY_SWITCH_DEFAULT_CASE(); + } + + return 0; +} + +static int mbfl_filt_conv_cp5022x_wchar_flush(mbfl_convert_filter *filter) +{ + if (filter->status & 0xF) { + /* 2-byte (JIS X 0208 or 0212) character was truncated, or else + * escape sequence was truncated */ + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + filter->status = 0; + + if (filter->flush_function) { + (*filter->flush_function)(filter->data); + } + + return 0; +} + +static const unsigned char hankana2zenkana_table[64] = { + 0x00,0x02,0x0C,0x0D,0x01,0xFB,0xF2,0xA1,0xA3,0xA5, + 0xA7,0xA9,0xE3,0xE5,0xE7,0xC3,0xFC,0xA2,0xA4,0xA6, + 0xA8,0xAA,0xAB,0xAD,0xAF,0xB1,0xB3,0xB5,0xB7,0xB9, + 0xBB,0xBD,0xBF,0xC1,0xC4,0xC6,0xC8,0xCA,0xCB,0xCC, + 0xCD,0xCE,0xCF,0xD2,0xD5,0xD8,0xDB,0xDE,0xDF,0xE0, + 0xE1,0xE2,0xE4,0xE6,0xE8,0xE9,0xEA,0xEB,0xEC,0xED, + 0xEF,0xF3,0x9B,0x9C +}; + +static const unsigned char hankana2zenhira_table[64] = { + 0x00,0x02,0x0C,0x0D,0x01,0xFB,0x92,0x41,0x43,0x45, + 0x47,0x49,0x83,0x85,0x87,0x63,0xFC,0x42,0x44,0x46, + 0x48,0x4A,0x4B,0x4D,0x4F,0x51,0x53,0x55,0x57,0x59, + 0x5B,0x5D,0x5F,0x61,0x64,0x66,0x68,0x6A,0x6B,0x6C, + 0x6D,0x6E,0x6F,0x72,0x75,0x78,0x7B,0x7E,0x7F,0x80, + 0x81,0x82,0x84,0x86,0x88,0x89,0x8A,0x8B,0x8C,0x8D, + 0x8F,0x93,0x9B,0x9C +}; + +static const unsigned char zenkana2hankana_table[84][2] = { + {0x67,0x00},{0x71,0x00},{0x68,0x00},{0x72,0x00},{0x69,0x00}, + {0x73,0x00},{0x6A,0x00},{0x74,0x00},{0x6B,0x00},{0x75,0x00}, + {0x76,0x00},{0x76,0x9E},{0x77,0x00},{0x77,0x9E},{0x78,0x00}, + {0x78,0x9E},{0x79,0x00},{0x79,0x9E},{0x7A,0x00},{0x7A,0x9E}, + {0x7B,0x00},{0x7B,0x9E},{0x7C,0x00},{0x7C,0x9E},{0x7D,0x00}, + {0x7D,0x9E},{0x7E,0x00},{0x7E,0x9E},{0x7F,0x00},{0x7F,0x9E}, + {0x80,0x00},{0x80,0x9E},{0x81,0x00},{0x81,0x9E},{0x6F,0x00}, + {0x82,0x00},{0x82,0x9E},{0x83,0x00},{0x83,0x9E},{0x84,0x00}, + {0x84,0x9E},{0x85,0x00},{0x86,0x00},{0x87,0x00},{0x88,0x00}, + {0x89,0x00},{0x8A,0x00},{0x8A,0x9E},{0x8A,0x9F},{0x8B,0x00}, + {0x8B,0x9E},{0x8B,0x9F},{0x8C,0x00},{0x8C,0x9E},{0x8C,0x9F}, + {0x8D,0x00},{0x8D,0x9E},{0x8D,0x9F},{0x8E,0x00},{0x8E,0x9E}, + {0x8E,0x9F},{0x8F,0x00},{0x90,0x00},{0x91,0x00},{0x92,0x00}, + {0x93,0x00},{0x6C,0x00},{0x94,0x00},{0x6D,0x00},{0x95,0x00}, + {0x6E,0x00},{0x96,0x00},{0x97,0x00},{0x98,0x00},{0x99,0x00}, + {0x9A,0x00},{0x9B,0x00},{0x9C,0x00},{0x9C,0x00},{0x72,0x00}, + {0x74,0x00},{0x66,0x00},{0x9D,0x00},{0x73,0x9E} +}; + +/* Apply various transforms to input codepoint, such as converting halfwidth katakana + * to fullwidth katakana. `mode` is a bitfield which controls which transforms are + * actually performed. The bit values are defined in translit_kana_jisx0201_jisx0208.h. + * `mode` must not call for transforms which are inverses (i.e. which would cancel + * each other out). + * + * In some cases, successive input codepoints may be merged into one output codepoint. + * (That is the purpose of the `next` parameter.) If the `next` codepoint is consumed + * and should be skipped over, `*consumed` will be set to true. Otherwise, `*consumed` + * will not be modified. If there is no following codepoint, `next` should be zero. + * + * Again, in some cases, one input codepoint may convert to two output codepoints. + * If so, the second output codepoint will be stored in `*second`. + * + * Return the resulting codepoint. If none of the requested transforms apply, return + * the input codepoint unchanged. + */ +uint32_t mb_convert_kana_codepoint(uint32_t c, uint32_t next, bool *consumed, uint32_t *second, unsigned int mode) +{ + if ((mode & MBFL_HAN2ZEN_ALL) && c >= 0x21 && c <= 0x7D && c != '"' && c != '\'' && c != '\\') { + return c + 0xFEE0; + } + if ((mode & MBFL_HAN2ZEN_ALPHA) && ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'))) { + return c + 0xFEE0; + } + if ((mode & MBFL_HAN2ZEN_NUMERIC) && c >= '0' && c <= '9') { + return c + 0xFEE0; + } + if ((mode & MBFL_HAN2ZEN_SPACE) && c == ' ') { + return 0x3000; + } + + if (mode & (MBFL_HAN2ZEN_KATAKANA | MBFL_HAN2ZEN_HIRAGANA)) { + /* Convert Hankaku kana to Zenkaku kana + * Either all Hankaku kana (including katakana and hiragana) will be converted + * to Zenkaku katakana, or to Zenkaku hiragana */ + if ((mode & MBFL_HAN2ZEN_KATAKANA) && (mode & MBFL_HAN2ZEN_GLUE)) { + if (c >= 0xFF61 && c <= 0xFF9F) { + int n = c - 0xFF60; + + if (next >= 0xFF61 && next <= 0xFF9F) { + if (next == 0xFF9E && ((n >= 22 && n <= 36) || (n >= 42 && n <= 46))) { + *consumed = true; + return 0x3001 + hankana2zenkana_table[n]; + } + if (next == 0xFF9E && n == 19) { + *consumed = true; + return 0x30F4; + } + if (next == 0xFF9F && n >= 42 && n <= 46) { + *consumed = true; + return 0x3002 + hankana2zenkana_table[n]; + } + } + + return 0x3000 + hankana2zenkana_table[n]; + } + } + if ((mode & MBFL_HAN2ZEN_HIRAGANA) && (mode & MBFL_HAN2ZEN_GLUE)) { + if (c >= 0xFF61 && c <= 0xFF9F) { + int n = c - 0xFF60; + + if (next >= 0xFF61 && next <= 0xFF9F) { + if (next == 0xFF9E && ((n >= 22 && n <= 36) || (n >= 42 && n <= 46))) { + *consumed = true; + return 0x3001 + hankana2zenhira_table[n]; + } + if (next == 0xFF9F && n >= 42 && n <= 46) { + *consumed = true; + return 0x3002 + hankana2zenhira_table[n]; + } + } + + return 0x3000 + hankana2zenhira_table[n]; + } + } + if ((mode & MBFL_HAN2ZEN_KATAKANA) && c >= 0xFF61 && c <= 0xFF9F) { + return 0x3000 + hankana2zenkana_table[c - 0xFF60]; + } + if ((mode & MBFL_HAN2ZEN_HIRAGANA) && c >= 0xFF61 && c <= 0xFF9F) { + return 0x3000 + hankana2zenhira_table[c - 0xFF60]; + } + } + + if (mode & MBFL_HAN2ZEN_SPECIAL) { /* special ascii to symbol */ + if (c == '\\' || c == 0xA5) { /* YEN SIGN */ + return 0xFFE5; /* FULLWIDTH YEN SIGN */ + } + if (c == 0x7E || c == 0x203E) { + return 0xFFE3; /* FULLWIDTH MACRON */ + } + if (c == '\'') { + return 0x2019; /* RIGHT SINGLE QUOTATION MARK */ + } + if (c == '"') { + return 0x201D; /* RIGHT DOUBLE QUOTATION MARK */ + } + } + + if (mode & (MBFL_ZEN2HAN_ALL | MBFL_ZEN2HAN_ALPHA | MBFL_ZEN2HAN_NUMERIC | MBFL_ZEN2HAN_SPACE)) { + /* Zenkaku to Hankaku */ + if ((mode & MBFL_ZEN2HAN_ALL) && c >= 0xFF01 && c <= 0xFF5D && c != 0xFF02 && c != 0xFF07 && c != 0xFF3C) { + /* all except " ' \ ~ */ + return c - 0xFEE0; + } + if ((mode & MBFL_ZEN2HAN_ALPHA) && ((c >= 0xFF21 && c <= 0xFF3A) || (c >= 0xFF41 && c <= 0xFF5A))) { + return c - 0xFEE0; + } + if ((mode & MBFL_ZEN2HAN_NUMERIC) && (c >= 0xFF10 && c <= 0xFF19)) { + return c - 0xFEE0; + } + if ((mode & MBFL_ZEN2HAN_SPACE) && (c == 0x3000)) { + return ' '; + } + if ((mode & MBFL_ZEN2HAN_ALL) && (c == 0x2212)) { /* MINUS SIGN */ + return '-'; + } + } + + if (mode & (MBFL_ZEN2HAN_KATAKANA | MBFL_ZEN2HAN_HIRAGANA)) { + /* Zenkaku kana to hankaku kana */ + if ((mode & MBFL_ZEN2HAN_KATAKANA) && c >= 0x30A1 && c <= 0x30F4) { + /* Zenkaku katakana to hankaku kana */ + int n = c - 0x30A1; + if (zenkana2hankana_table[n][1]) { + *second = 0xFF00 + zenkana2hankana_table[n][1]; + } + return 0xFF00 + zenkana2hankana_table[n][0]; + } + if ((mode & MBFL_ZEN2HAN_HIRAGANA) && c >= 0x3041 && c <= 0x3093) { + /* Zenkaku hiragana to hankaku kana */ + int n = c - 0x3041; + if (zenkana2hankana_table[n][1]) { + *second = 0xFF00 + zenkana2hankana_table[n][1]; + } + return 0xFF00 + zenkana2hankana_table[n][0]; + } + if (c == 0x3001) { + return 0xFF64; /* HALFWIDTH IDEOGRAPHIC COMMA */ + } + if (c == 0x3002) { + return 0xFF61; /* HALFWIDTH IDEOGRAPHIC FULL STOP */ + } + if (c == 0x300C) { + return 0xFF62; /* HALFWIDTH LEFT CORNER BRACKET */ + } + if (c == 0x300D) { + return 0xFF63; /* HALFWIDTH RIGHT CORNER BRACKET */ + } + if (c == 0x309B) { + return 0xFF9E; /* HALFWIDTH KATAKANA VOICED SOUND MARK */ + } + if (c == 0x309C) { + return 0xff9f; /* HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK */ + } + if (c == 0x30FC) { + return 0xFF70; /* HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK */ + } + if (c == 0x30FB) { + return 0xFF65; /* HALFWIDTH KATAKANA MIDDLE DOT */ + } + } + + if (mode & (MBFL_ZENKAKU_HIRA2KATA | MBFL_ZENKAKU_KATA2HIRA)) { + if ((mode & MBFL_ZENKAKU_HIRA2KATA) && ((c >= 0x3041 && c <= 0x3093) || c == 0x309D || c == 0x309E)) { + /* Zenkaku hiragana to Zenkaku katakana */ + return c + 0x60; + } + if ((mode & MBFL_ZENKAKU_KATA2HIRA) && ((c >= 0x30A1 && c <= 0x30F3) || c == 0x30FD || c == 0x30FE)) { + /* Zenkaku katakana to Zenkaku hiragana */ + return c - 0x60; + } + } + + if (mode & MBFL_ZEN2HAN_SPECIAL) { /* special symbol to ascii */ + if (c == 0xFFE5 || c == 0xFF3C) { /* FULLWIDTH YEN SIGN/FULLWIDTH REVERSE SOLIDUS */ + return '\\'; + } + if (c == 0xFFE3 || c == 0x203E) { /* FULLWIDTH MACRON/OVERLINE */ + return '~'; + } + if (c == 0x2018 || c == 0x2019) { /* LEFT/RIGHT SINGLE QUOTATION MARK*/ + return '\''; + } + if (c == 0x201C || c == 0x201D) { /* LEFT/RIGHT DOUBLE QUOTATION MARK */ + return '"'; + } + } + + return c; +} + +static int mbfl_filt_conv_wchar_cp50221(int c, mbfl_convert_filter *filter); + +static int mbfl_filt_conv_wchar_cp50220(int c, mbfl_convert_filter *filter) +{ + int mode = MBFL_HAN2ZEN_KATAKANA | MBFL_HAN2ZEN_GLUE; + bool consumed = false; + + if (filter->cache) { + int s = mb_convert_kana_codepoint(filter->cache, c, &consumed, NULL, mode); + filter->cache = consumed ? 0 : c; + /* Terrible hack to get CP50220 to emit error markers in the proper + * position, not reordering them with subsequent characters */ + filter->filter_function = mbfl_filt_conv_wchar_cp50221; + mbfl_filt_conv_wchar_cp50221(s, filter); + filter->filter_function = mbfl_filt_conv_wchar_cp50220; + if (c == 0 && !consumed) { + (*filter->output_function)(0, filter->data); + } + } else if (c == 0) { + /* This case has to be handled separately, since `filter->cache == 0` means + * no codepoint is cached */ + (*filter->output_function)(0, filter->data); + } else { + filter->cache = c; + } + + return 0; +} + +static int mbfl_filt_conv_any_jis_flush(mbfl_convert_filter *filter) +{ + /* back to latin */ + if ((filter->status & 0xff00) != 0) { + CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ + CK((*filter->output_function)(0x28, filter->data)); /* '(' */ + CK((*filter->output_function)(0x42, filter->data)); /* 'B' */ + } + filter->status = 0; + + if (filter->flush_function != NULL) { + return (*filter->flush_function)(filter->data); + } + + return 0; +} + +static int mbfl_filt_conv_wchar_cp50220_flush(mbfl_convert_filter *filter) +{ + int mode = MBFL_HAN2ZEN_KATAKANA | MBFL_HAN2ZEN_GLUE; + + if (filter->cache) { + int s = mb_convert_kana_codepoint(filter->cache, 0, NULL, NULL, mode); + filter->filter_function = mbfl_filt_conv_wchar_cp50221; + mbfl_filt_conv_wchar_cp50221(s, filter); + filter->filter_function = mbfl_filt_conv_wchar_cp50220; + filter->cache = 0; + } + + return mbfl_filt_conv_any_jis_flush(filter); +} + +static int mbfl_filt_conv_wchar_cp50221(int c, mbfl_convert_filter *filter) +{ + int s = 0; + + if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) { + s = ucs_a1_jis_table[c - ucs_a1_jis_table_min]; + } else if (c == 0x203E) { /* OVERLINE */ + s = 0x1007E; /* Convert to JISX 0201 OVERLINE */ + } else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) { + s = ucs_a2_jis_table[c - ucs_a2_jis_table_min]; + } else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) { + s = ucs_i_jis_table[c - ucs_i_jis_table_min]; + } else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) { + s = ucs_r_jis_table[c - ucs_r_jis_table_min]; + } else if (c >= 0xE000 && c <= 0xE757) { + /* 'private'/'user' codepoints */ + s = c - 0xE000; + s = ((s / 94) + 0x7F) << 8 | ((s % 94) + 0x21); + } + + if (s <= 0) { + if (c == 0xa5) { /* YEN SIGN */ + s = 0x1005c; + } else if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */ + s = 0x2140; + } else if (c == 0x2225) { /* PARALLEL TO */ + s = 0x2142; + } else if (c == 0xff0d) { /* FULLWIDTH HYPHEN-MINUS */ + s = 0x215d; + } else if (c == 0xffe0) { /* FULLWIDTH CENT SIGN */ + s = 0x2171; + } else if (c == 0xffe1) { /* FULLWIDTH POUND SIGN */ + s = 0x2172; + } else if (c == 0xffe2) { /* FULLWIDTH NOT SIGN */ + s = 0x224c; + } + } + + /* Above, we do a series of lookups in `ucs_*_jis_table` to find a + * corresponding kuten code for this Unicode codepoint + * If we get zero, that means the codepoint is not in JIS X 0208 + * On the other hand, if we get a result with the high bits set on both + * upper and lower bytes, that is not a code in JIS X 0208 but rather + * in JIS X 0213 + * In either case, check if this codepoint is one of the extensions added + * to JIS X 0208 by MicroSoft (to make CP932) */ + if (s == 0 || ((s & 0x8000) && (s & 0x80))) { + int i; + s = -1; + + for (i = 0; + i < cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; + i++) { + const int oh = cp932ext1_ucs_table_min / 94; + + if (c == cp932ext1_ucs_table[i]) { + s = ((i / 94 + oh + 0x21) << 8) + (i % 94 + 0x21); + break; + } + } + + if (s < 0) { + const int oh = cp932ext2_ucs_table_min / 94; + const int cp932ext2_ucs_table_size = + cp932ext2_ucs_table_max - cp932ext2_ucs_table_min; + for (i = 0; i < cp932ext2_ucs_table_size; i++) { + if (c == cp932ext2_ucs_table[i]) { + s = ((i / 94 + oh + 0x21) << 8) + (i % 94 + 0x21); + break; + } + } + } + + if (c == 0) { + s = 0; + } else if (s <= 0) { + s = -1; + } + } + + if (s >= 0) { + if (s < 0x80) { /* ASCII */ + if ((filter->status & 0xff00) != 0) { + CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ + CK((*filter->output_function)(0x28, filter->data)); /* '(' */ + CK((*filter->output_function)(0x42, filter->data)); /* 'B' */ + filter->status = 0; + } + CK((*filter->output_function)(s, filter->data)); + } else if (s >= 0xa0 && s < 0xe0) { /* X 0201 kana */ + if ((filter->status & 0xff00) != 0x500) { + CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ + CK((*filter->output_function)(0x28, filter->data)); /* '(' */ + CK((*filter->output_function)(0x49, filter->data)); /* 'I' */ + filter->status = 0x500; + } + CK((*filter->output_function)(s - 0x80, filter->data)); + } else if (s <= 0x927E) { /* X 0208 + extensions */ + if ((filter->status & 0xff00) != 0x200) { + CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ + CK((*filter->output_function)(0x24, filter->data)); /* '$' */ + CK((*filter->output_function)(0x42, filter->data)); /* 'B' */ + filter->status = 0x200; + } + CK((*filter->output_function)((s >> 8) & 0xff, filter->data)); + CK((*filter->output_function)(s & 0xff, filter->data)); + } else if (s < 0x10000) { /* X0212 */ + CK(mbfl_filt_conv_illegal_output(c, filter)); + } else { /* X 0201 latin */ + if ((filter->status & 0xff00) != 0x400) { + CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ + CK((*filter->output_function)(0x28, filter->data)); /* '(' */ + CK((*filter->output_function)(0x4a, filter->data)); /* 'J' */ + } + filter->status = 0x400; + CK((*filter->output_function)(s & 0x7f, filter->data)); + } + } else { + CK(mbfl_filt_conv_illegal_output(c, filter)); + } + + return 0; +} + +static int mbfl_filt_conv_wchar_cp50222(int c, mbfl_convert_filter *filter) +{ + int s = 0; + + if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) { + s = ucs_a1_jis_table[c - ucs_a1_jis_table_min]; + } else if (c == 0x203E) { /* OVERLINE */ + s = 0x1007E; /* Convert to JISX 0201 OVERLINE */ + } else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) { + s = ucs_a2_jis_table[c - ucs_a2_jis_table_min]; + } else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) { + s = ucs_i_jis_table[c - ucs_i_jis_table_min]; + } else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) { + s = ucs_r_jis_table[c - ucs_r_jis_table_min]; + } else if (c >= 0xE000 && c <= 0xE757) { + /* 'private'/'user' codepoints */ + s = c - 0xE000; + s = ((s / 94) + 0x7F) << 8 | ((s % 94) + 0x21); + } + + if (s <= 0) { + if (c == 0xa5) { /* YEN SIGN */ + s = 0x1005c; + } else if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */ + s = 0x2140; + } else if (c == 0x2225) { /* PARALLEL TO */ + s = 0x2142; + } else if (c == 0xff0d) { /* FULLWIDTH HYPHEN-MINUS */ + s = 0x215d; + } else if (c == 0xffe0) { /* FULLWIDTH CENT SIGN */ + s = 0x2171; + } else if (c == 0xffe1) { /* FULLWIDTH POUND SIGN */ + s = 0x2172; + } else if (c == 0xffe2) { /* FULLWIDTH NOT SIGN */ + s = 0x224c; + } + } + if (s == 0 || ((s & 0x8000) && (s & 0x80))) { + int i; + s = -1; + + for (i = 0; + i < cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; i++) { + const int oh = cp932ext1_ucs_table_min / 94; + + if (c == cp932ext1_ucs_table[i]) { + s = ((i / 94 + oh + 0x21) << 8) + (i % 94 + 0x21); + break; + } + } + + if (s <= 0) { + const int oh = cp932ext2_ucs_table_min / 94; + const int cp932ext2_ucs_table_size = + cp932ext2_ucs_table_max - cp932ext2_ucs_table_min; + for (i = 0; i < cp932ext2_ucs_table_size; i++) { + if (c == cp932ext2_ucs_table[i]) { + s = ((i / 94 + oh + 0x21) << 8) + (i % 94 + 0x21); + break; + } + } + } + + if (c == 0) { + s = 0; + } else if (s <= 0) { + s = -1; + } + } + + if (s >= 0) { + if (s < 0x80) { /* ASCII */ + if ((filter->status & 0xff00) == 0x500) { + CK((*filter->output_function)(0x0f, filter->data)); /* SO */ + filter->status = 0; + } else if ((filter->status & 0xff00) != 0) { + CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ + CK((*filter->output_function)(0x28, filter->data)); /* '(' */ + CK((*filter->output_function)(0x42, filter->data)); /* 'B' */ + filter->status = 0; + } + CK((*filter->output_function)(s, filter->data)); + } else if (s >= 0xa0 && s < 0xe0) { /* X 0201 kana */ + if ((filter->status & 0xff00) != 0x500) { + CK((*filter->output_function)(0x0e, filter->data)); /* SI */ + filter->status = 0x500; + } + CK((*filter->output_function)(s - 0x80, filter->data)); + } else if (s <= 0x927E) { /* X 0208 */ + if ((filter->status & 0xff00) == 0x500) { + CK((*filter->output_function)(0x0f, filter->data)); /* SO */ + filter->status = 0; + } + if ((filter->status & 0xff00) != 0x200) { + CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ + CK((*filter->output_function)(0x24, filter->data)); /* '$' */ + CK((*filter->output_function)(0x42, filter->data)); /* 'B' */ + filter->status = 0x200; + } + CK((*filter->output_function)((s >> 8) & 0xff, filter->data)); + CK((*filter->output_function)(s & 0xff, filter->data)); + } else if (s < 0x10000) { /* X0212 */ + CK(mbfl_filt_conv_illegal_output(c, filter)); + } else { /* X 0201 latin */ + if ((filter->status & 0xff00) == 0x500) { + CK((*filter->output_function)(0x0f, filter->data)); /* SO */ + filter->status = 0; + } + if ((filter->status & 0xff00) != 0x400) { + CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ + CK((*filter->output_function)(0x28, filter->data)); /* '(' */ + CK((*filter->output_function)(0x4a, filter->data)); /* 'J' */ + } + filter->status = 0x400; + CK((*filter->output_function)(s & 0x7f, filter->data)); + } + } else { + CK(mbfl_filt_conv_illegal_output(c, filter)); + } + + return 0; +} + +static int mbfl_filt_conv_wchar_cp50222_flush(mbfl_convert_filter *filter) +{ + /* back to latin */ + if ((filter->status & 0xff00) == 0x500) { + CK((*filter->output_function)(0x0f, filter->data)); /* SO */ + } else if ((filter->status & 0xff00) != 0) { + CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ + CK((*filter->output_function)(0x28, filter->data)); /* '(' */ + CK((*filter->output_function)(0x42, filter->data)); /* 'B' */ + } + filter->status = 0; + + if (filter->flush_function) { + (*filter->flush_function)(filter->data); + } + + return 0; +} + +#define ASCII 0 +#define JISX_0201_LATIN 1 +#define JISX_0201_KANA 2 +#define JISX_0208 3 +#define JISX_0212 4 + +static size_t mb_cp5022x_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) +{ + ZEND_ASSERT(bufsize >= 3); + + unsigned char *p = *in, *e = p + *in_len; + uint32_t *out = buf, *limit = buf + bufsize; + + while (p < e && out < limit) { + unsigned char c = *p++; + + if (c == 0x1B) { + /* Escape sequence */ + if ((e - p) < 2) { + *out++ = MBFL_BAD_INPUT; + /* Duplicate error-handling behavior of legacy code */ + if (p < e && (*p == '(' || *p == '$')) + p++; + continue; + } + unsigned char c2 = *p++; + if (c2 == '$') { + unsigned char c3 = *p++; + if (c3 == '@' || c3 == 'B') { + *state = JISX_0208; + } else if (c3 == '(') { + if (p == e) { + *out++ = MBFL_BAD_INPUT; + break; + } + unsigned char c4 = *p++; + if (c4 == '@' || c4 == 'B') { + *state = JISX_0208; + } else if (c4 == 'D') { + *state = JISX_0212; + } else { + if ((limit - out) < 3) { + p -= 4; + break; + } + *out++ = MBFL_BAD_INPUT; + *out++ = '$'; + *out++ = '('; + p--; + } + } else { + if ((limit - out) < 2) { + p -= 3; + break; + } + *out++ = MBFL_BAD_INPUT; + *out++ = '$'; + p--; + } + } else if (c2 == '(') { + unsigned char c3 = *p++; + if (c3 == 'B' || c3 == 'H') { + *state = ASCII; + } else if (c3 == 'J') { + *state = JISX_0201_LATIN; + } else if (c3 == 'I') { + *state = JISX_0201_KANA; + } else { + if ((limit - out) < 2) { + p -= 3; + break; + } + *out++ = MBFL_BAD_INPUT; + *out++ = '('; + p--; + } + } else { + *out++ = MBFL_BAD_INPUT; + p--; + } + } else if (c == 0xE) { + *state = JISX_0201_KANA; + } else if (c == 0xF) { + *state = ASCII; + } else if (*state == JISX_0201_LATIN && c == 0x5C) { /* YEN SIGN */ + *out++ = 0xA5; + } else if (*state == JISX_0201_LATIN && c == 0x7E) { /* OVER LINE */ + *out++ = 0x203E; + } else if (*state == JISX_0201_KANA && c > 0x20 && c < 0x60) { + *out++ = 0xFF40 + c; + } else if (*state >= JISX_0208 && c > 0x20 && c <= 0x97) { + if (p == e) { + *out++ = MBFL_BAD_INPUT; + break; + } + unsigned char c2 = *p++; + if (c2 > 0x20 && c2 < 0x7F) { + unsigned int s = (c - 0x21)*94 + c2 - 0x21; + uint32_t w = 0; + if (*state == JISX_0208) { + if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) { + w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min]; + } else if (s < jisx0208_ucs_table_size) { + w = jisx0208_ucs_table[s]; + } else if (s >= cp932ext2_ucs_table_min && s < cp932ext2_ucs_table_max) { + w = cp932ext2_ucs_table[s - cp932ext2_ucs_table_min]; + } else if (s >= cp932ext3_ucs_table_min && s < cp932ext3_ucs_table_max) { + w = cp932ext3_ucs_table[s - cp932ext3_ucs_table_min]; + } else if (s >= 94*94 && s < 114*94) { + /* MicroSoft extension */ + w = s - 94*94 + 0xE000; + } + if (!w) + w = MBFL_BAD_INPUT; + } else { + if (s < jisx0212_ucs_table_size) { + w = jisx0212_ucs_table[s]; + } + if (!w) + w = MBFL_BAD_INPUT; + } + *out++ = w; + } else { + *out++ = MBFL_BAD_INPUT; + } + } else if (c < 0x80) { + *out++ = c; + } else if (c >= 0xA1 && c <= 0xDF) { + *out++ = 0xFEC0 + c; + } else { + *out++ = MBFL_BAD_INPUT; + } + } + + *in_len = e - p; + *in = p; + return out - buf; +} + +static unsigned int lookup_wchar(uint32_t w) +{ + unsigned int s = 0; + + if (w >= ucs_a1_jis_table_min && w < ucs_a1_jis_table_max) { + s = ucs_a1_jis_table[w - ucs_a1_jis_table_min]; + } else if (w == 0x203E) { /* OVERLINE */ + s = 0x1007E; /* Convert to JISX 0201 OVERLINE */ + } else if (w >= ucs_a2_jis_table_min && w < ucs_a2_jis_table_max) { + s = ucs_a2_jis_table[w - ucs_a2_jis_table_min]; + } else if (w >= ucs_i_jis_table_min && w < ucs_i_jis_table_max) { + s = ucs_i_jis_table[w - ucs_i_jis_table_min]; + } else if (w >= ucs_r_jis_table_min && w < ucs_r_jis_table_max) { + s = ucs_r_jis_table[w - ucs_r_jis_table_min]; + } else if (w >= 0xE000 && w <= 0xE757) { + /* Private Use Area codepoints */ + s = w - 0xE000; + s = ((s / 94) + 0x7F) << 8 | ((s % 94) + 0x21); + } + + if (!s) { + if (w == 0xA5) { /* YEN SIGN */ + s = 0x1005C; + } else if (w == 0xFF3C) { /* FULLWIDTH REVERSE SOLIDUS */ + s = 0x2140; + } else if (w == 0x2225) { /* PARALLEL TO */ + s = 0x2142; + } else if (w == 0xFF0D) { /* FULLWIDTH HYPHEN-MINUS */ + s = 0x215D; + } else if (w == 0xFFE0) { /* FULLWIDTH CENT SIGN */ + s = 0x2171; + } else if (w == 0xFFE1) { /* FULLWIDTH POUND SIGN */ + s = 0x2172; + } else if (w == 0xFFE2) { /* FULLWIDTH NOT SIGN */ + s = 0x224C; + } else if (w == 0) { + return 0; + } + } + + /* Above, we do a series of lookups in `ucs_*_jis_table` to find a + * corresponding kuten code for this Unicode codepoint + * If we get zero, that means the codepoint is not in JIS X 0208 + * On the other hand, if we get a result with the high bits set on both + * upper and lower bytes, that is not a code in JIS X 0208 but rather + * in JIS X 0213 + * In either case, check if this codepoint is one of the extensions added + * to JIS X 0208 by MicroSoft (to make CP932) */ + if (!s || s >= 0x8080) { + for (int i = 0; i < cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; i++) { + if (w == cp932ext1_ucs_table[i]) { + return (((i / 94) + (cp932ext1_ucs_table_min / 94) + 0x21) << 8) + (i % 94) + 0x21; + } + } + + for (int i = 0; i < cp932ext2_ucs_table_max - cp932ext2_ucs_table_min; i++) { + if (w == cp932ext2_ucs_table[i]) { + return (((i / 94) + (cp932ext2_ucs_table_min / 94) + 0x21) << 8) + (i % 94) + 0x21; + } + } + } + + return s; +} + +static void mb_wchar_to_cp50221(uint32_t *in, size_t len, mb_convert_buf *buf, bool end); + +static void mb_wchar_to_cp50220(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) +{ + unsigned char *out, *limit; + MB_CONVERT_BUF_LOAD(buf, out, limit); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len); + + uint32_t w; + + if (buf->state & 0xFFFF00) { + /* Reprocess cached codepoint */ + w = buf->state >> 8; + buf->state &= 0xFF; + goto reprocess_codepoint; + } + + while (len--) { + w = *in++; +reprocess_codepoint: + + if (w >= 0xFF61 && w <= 0xFF9F && !len && !end) { + /* This codepoint may need to combine with the next one, + * but the 'next one' will come in a separate buffer */ + buf->state |= w << 8; + break; + } + + bool consumed = false; + w = mb_convert_kana_codepoint(w, len ? *in : 0, &consumed, NULL, MBFL_HAN2ZEN_KATAKANA | MBFL_HAN2ZEN_GLUE); + if (consumed) { + /* Two successive codepoints were converted into one */ + in++; len--; consumed = false; + } + + unsigned int s = lookup_wchar(w); + + if (!s && w) { + MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_cp50221); + } else if (s < 0x80) { + /* ASCII */ + MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 4); + if (buf->state != ASCII) { + out = mb_convert_buf_add3(out, 0x1B, '(', 'B'); + buf->state = ASCII; + } + out = mb_convert_buf_add(out, s); + } else if (s >= 0xA0 && s < 0xE0) { + /* JISX 0201 Kana */ + MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 4); + if (buf->state != JISX_0201_KANA) { + out = mb_convert_buf_add3(out, 0x1B, '(', 'I'); + buf->state = JISX_0201_KANA; + } + out = mb_convert_buf_add(out, s - 0x80); + } else if (s <= 0x927E) { + /* JISX 0208 Kanji */ + MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 5); + if (buf->state != JISX_0208) { + out = mb_convert_buf_add3(out, 0x1B, '$', 'B'); + buf->state = JISX_0208; + } + out = mb_convert_buf_add2(out, (s >> 8) & 0xFF, s & 0xFF); + } else if (s >= 0x10000) { + /* JISX 0201 Latin; we 'tag' these by adding 0x10000 */ + MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 4); + if (buf->state != JISX_0201_LATIN) { + out = mb_convert_buf_add3(out, 0x1B, '(', 'J'); + buf->state = JISX_0201_LATIN; + } + out = mb_convert_buf_add(out, s & 0x7F); + } else { + MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_cp50221); + } + } + + if (end && buf->state != ASCII) { + MB_CONVERT_BUF_ENSURE(buf, out, limit, 3); + out = mb_convert_buf_add3(out, 0x1B, '(', 'B'); + } + + MB_CONVERT_BUF_STORE(buf, out, limit); +} + +static void mb_wchar_to_cp50221(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) +{ + unsigned char *out, *limit; + MB_CONVERT_BUF_LOAD(buf, out, limit); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len); + + while (len--) { + uint32_t w = *in++; + unsigned int s = lookup_wchar(w); + + if (!s && w) { + MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_cp50221); + } else if (s < 0x80) { + /* ASCII */ + MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 4); + if (buf->state != ASCII) { + out = mb_convert_buf_add3(out, 0x1B, '(', 'B'); + buf->state = ASCII; + } + out = mb_convert_buf_add(out, s); + } else if (s >= 0xA0 && s < 0xE0) { + /* JISX 0201 Kana */ + MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 4); + if (buf->state != JISX_0201_KANA) { + out = mb_convert_buf_add3(out, 0x1B, '(', 'I'); + buf->state = JISX_0201_KANA; + } + out = mb_convert_buf_add(out, s - 0x80); + } else if (s <= 0x927E) { + /* JISX 0208 Kanji */ + MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 5); + if (buf->state != JISX_0208) { + out = mb_convert_buf_add3(out, 0x1B, '$', 'B'); + buf->state = JISX_0208; + } + out = mb_convert_buf_add2(out, (s >> 8) & 0xFF, s & 0xFF); + } else if (s >= 0x10000) { + /* JISX 0201 Latin; we 'tag' these by adding 0x10000 */ + MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 4); + if (buf->state != JISX_0201_LATIN) { + out = mb_convert_buf_add3(out, 0x1B, '(', 'J'); + buf->state = JISX_0201_LATIN; + } + out = mb_convert_buf_add(out, s & 0x7F); + } else { + MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_cp50221); + } + } + + if (end && buf->state != ASCII) { + MB_CONVERT_BUF_ENSURE(buf, out, limit, 3); + out = mb_convert_buf_add3(out, 0x1B, '(', 'B'); + } + + MB_CONVERT_BUF_STORE(buf, out, limit); +} + +static void mb_wchar_to_cp50222(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) +{ + unsigned char *out, *limit; + MB_CONVERT_BUF_LOAD(buf, out, limit); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len); + + while (len--) { + uint32_t w = *in++; + unsigned int s = lookup_wchar(w); + + if (!s && w) { + MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_cp50222); + } else if (s < 0x80) { + /* ASCII */ + MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 4); + if (buf->state == JISX_0201_KANA) { + out = mb_convert_buf_add(out, 0xF); + buf->state = ASCII; + } else if (buf->state != ASCII) { + out = mb_convert_buf_add3(out, 0x1B, '(', 'B'); + buf->state = ASCII; + } + out = mb_convert_buf_add(out, s); + } else if (s >= 0xA0 && s < 0xE0) { + /* JISX 0201 Kana */ + MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2); + if (buf->state != JISX_0201_KANA) { + out = mb_convert_buf_add(out, 0xE); + buf->state = JISX_0201_KANA; + } + out = mb_convert_buf_add(out, s - 0x80); + } else if (s <= 0x927E) { + /* JISX 0208 Kanji */ + MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 6); + if (buf->state == JISX_0201_KANA) { + out = mb_convert_buf_add(out, 0xF); + } + if (buf->state != JISX_0208) { + out = mb_convert_buf_add3(out, 0x1B, '$', 'B'); + buf->state = JISX_0208; + } + out = mb_convert_buf_add2(out, (s >> 8) & 0xFF, s & 0xFF); + } else if (s >= 0x10000) { + /* JISX 0201 Latin; we 'tag' these by adding 0x10000 */ + MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 5); + if (buf->state == JISX_0201_KANA) { + out = mb_convert_buf_add(out, 0xF); + } + if (buf->state != JISX_0201_LATIN) { + out = mb_convert_buf_add3(out, 0x1B, '(', 'J'); + buf->state = JISX_0201_LATIN; + } + out = mb_convert_buf_add(out, s & 0x7F); + } else { + MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_cp50222); + } + } + + if (end) { + if (buf->state == JISX_0201_KANA) { + MB_CONVERT_BUF_ENSURE(buf, out, limit, 1); + out = mb_convert_buf_add(out, 0xF); + } else if (buf->state != ASCII) { + MB_CONVERT_BUF_ENSURE(buf, out, limit, 3); + out = mb_convert_buf_add3(out, 0x1B, '(', 'B'); + } + } + + MB_CONVERT_BUF_STORE(buf, out, limit); +} + +#define ASCII 0 +#define JISX0201_KANA 0x20 +#define JISX0208_KANJI 0x80 +#define UDC 0xA0 + +static int mbfl_filt_conv_2022jpms_wchar(int c, mbfl_convert_filter *filter) +{ + int c1, s, w; + + switch (filter->status & 0xF) { + case 0: + if (c == 0x1B) { + filter->status += 2; + } else if (filter->status == JISX0201_KANA && c > 0x20 && c < 0x60) { + CK((*filter->output_function)(0xFF40 + c, filter->data)); + } else if ((filter->status == JISX0208_KANJI || filter->status == UDC) && c > 0x20 && c < 0x80) { + filter->cache = c; + filter->status += 1; + } else if (c >= 0 && c < 0x80) { /* ASCII */ + CK((*filter->output_function)(c, filter->data)); + } else if (c > 0xA0 && c < 0xE0) { /* Kana */ + CK((*filter->output_function)(0xFEC0 + c, filter->data)); + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + /* Kanji, second byte */ + case 1: + w = 0; + filter->status &= ~0xF; + c1 = filter->cache; + if (c > 0x20 && c < 0x7F) { + s = ((c1 - 0x21) * 94) + c - 0x21; + if (filter->status == JISX0208_KANJI) { + if (s <= 137) { + if (s == 31) { + w = 0xFF3C; /* FULLWIDTH REVERSE SOLIDUS */ + } else if (s == 32) { + w = 0xFF5E; /* FULLWIDTH TILDE */ + } else if (s == 33) { + w = 0x2225; /* PARALLEL TO */ + } else if (s == 60) { + w = 0xFF0D; /* FULLWIDTH HYPHEN-MINUS */ + } else if (s == 80) { + w = 0xFFE0; /* FULLWIDTH CENT SIGN */ + } else if (s == 81) { + w = 0xFFE1; /* FULLWIDTH POUND SIGN */ + } else if (s == 137) { + w = 0xFFE2; /* FULLWIDTH NOT SIGN */ + } + } + + if (w == 0) { + if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) { /* vendor ext1 (13ku) */ + w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min]; + } else if (s >= 0 && s < jisx0208_ucs_table_size) { + w = jisx0208_ucs_table[s]; + } else if (s >= cp932ext2_ucs_table_min && s < cp932ext2_ucs_table_max) { /* vendor ext2 (89ku - 92ku) */ + w = cp932ext2_ucs_table[s - cp932ext2_ucs_table_min]; + } + } + + if (w <= 0) { + w = MBFL_BAD_INPUT; + } + } else { + if (c1 > 0x20 && c1 < 0x35) { + w = 0xE000 + ((c1 - 0x21) * 94) + c - 0x21; + } else { + w = MBFL_BAD_INPUT; + } + } + CK((*filter->output_function)(w, filter->data)); + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + /* ESC */ + case 2: + if (c == '$') { + filter->status++; + } else if (c == '(') { + filter->status += 3; + } else { + filter->status &= ~0xF; + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + /* ESC $ */ + case 3: + if (c == '@' || c == 'B') { + filter->status = JISX0208_KANJI; + } else if (c == '(') { + filter->status++; + } else { + filter->status &= ~0xF; + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + /* ESC $ ( */ + case 4: + if (c == '@' || c == 'B') { + filter->status = JISX0208_KANJI; + } else if (c == '?') { + filter->status = UDC; + } else { + filter->status &= ~0xF; + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + /* ESC ( */ + case 5: + if (c == 'B' || c == 'J') { + filter->status = 0; + } else if (c == 'I') { + filter->status = JISX0201_KANA; + } else { + filter->status &= ~0xF; + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + } + + return 0; +} + +static int mbfl_filt_conv_2022jpms_wchar_flush(mbfl_convert_filter *filter) +{ + if (filter->status & 0xF) { + (*filter->output_function)(MBFL_BAD_INPUT, filter->data); + } + filter->status = 0; + + if (filter->flush_function) { + (*filter->flush_function)(filter->data); + } + + return 0; +} + +#define sjistoidx(c1, c2) \ + (((c1) > 0x9f) ? (((c1) - 0xc1) * 188 + (c2) - (((c2) > 0x7e) ? 0x41 : 0x40)) : (((c1) - 0x81) * 188 + (c2) - (((c2) > 0x7e) ? 0x41 : 0x40))) +#define idxtojis1(c) (((c) / 94) + 0x21) +#define idxtojis2(c) (((c) % 94) + 0x21) + +static int cp932ext3_cp932ext2_jis(int c) +{ + int idx; + + idx = sjistoidx(0xfa, 0x40) + c; + if (idx >= sjistoidx(0xfa, 0x5c)) + idx -= sjistoidx(0xfa, 0x5c) - sjistoidx(0xed, 0x40); + else if (idx >= sjistoidx(0xfa, 0x55)) + idx -= sjistoidx(0xfa, 0x55) - sjistoidx(0xee, 0xfa); + else if (idx >= sjistoidx(0xfa, 0x40)) + idx -= sjistoidx(0xfa, 0x40) - sjistoidx(0xee, 0xef); + return idxtojis1(idx) << 8 | idxtojis2(idx); +} + +static int mbfl_filt_conv_wchar_2022jpms(int c, mbfl_convert_filter *filter) +{ + int c1, c2, s1 = 0, s2 = 0; + + if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) { + s1 = ucs_a1_jis_table[c - ucs_a1_jis_table_min]; + } else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) { + s1 = ucs_a2_jis_table[c - ucs_a2_jis_table_min]; + } else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) { + s1 = ucs_i_jis_table[c - ucs_i_jis_table_min]; + } else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) { + s1 = ucs_r_jis_table[c - ucs_r_jis_table_min]; + } else if (c >= 0xE000 && c < (0xE000 + 20*94)) { + /* Private User Area (95ku - 114ku) */ + s1 = c - 0xE000; + c1 = (s1 / 94) + 0x7f; + c2 = (s1 % 94) + 0x21; + s1 = (c1 << 8) | c2; + } + + if (s1 <= 0) { + if (c == 0xA5) { /* YEN SIGN */ + s1 = 0x216F; /* FULLWIDTH YEN SIGN */ + } else if (c == 0xFF3C) { /* FULLWIDTH REVERSE SOLIDUS */ + s1 = 0x2140; + } else if (c == 0x2225) { /* PARALLEL TO */ + s1 = 0x2142; + } else if (c == 0xFF0D) { /* FULLWIDTH HYPHEN-MINUS */ + s1 = 0x215d; + } else if (c == 0xFFE0) { /* FULLWIDTH CENT SIGN */ + s1 = 0x2171; + } else if (c == 0xFFE1) { /* FULLWIDTH POUND SIGN */ + s1 = 0x2172; + } else if (c == 0xFFE2) { /* FULLWIDTH NOT SIGN */ + s1 = 0x224C; + } + } + + if ((s1 <= 0) || (s1 >= 0xa1a1 && s2 == 0)) { /* not found or X 0212 */ + s1 = -1; + for (c1 = 0; c1 < cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; c1++) { + if (c == cp932ext1_ucs_table[c1]) { + s1 = (((c1 / 94) + 0x2D) << 8) + (c1 % 94) + 0x21; + break; + } + } + + if (s1 <= 0) { + for (c1 = 0; c1 < cp932ext3_ucs_table_max - cp932ext3_ucs_table_min; c1++) { + if (c == cp932ext3_ucs_table[c1]) { + s1 = cp932ext3_cp932ext2_jis(c1); + break; + } + } + } + + if (c == 0) { + s1 = 0; + } + } + + if (s1 >= 0) { + if (s1 < 0x80) { /* latin */ + if (filter->status & 0xFF00) { + CK((*filter->output_function)(0x1B, filter->data)); /* ESC */ + CK((*filter->output_function)('(', filter->data)); + CK((*filter->output_function)('B', filter->data)); + } + CK((*filter->output_function)(s1, filter->data)); + filter->status = 0; + } else if (s1 > 0xA0 && s1 < 0xE0) { /* kana */ + if ((filter->status & 0xFF00) != 0x100) { + CK((*filter->output_function)(0x1B, filter->data)); /* ESC */ + CK((*filter->output_function)('(', filter->data)); + CK((*filter->output_function)('I', filter->data)); + } + filter->status = 0x100; + CK((*filter->output_function)(s1 & 0x7F, filter->data)); + } else if (s1 < 0x7E7F) { /* X 0208 */ + if ((filter->status & 0xFF00) != 0x200) { + CK((*filter->output_function)(0x1B, filter->data)); /* ESC */ + CK((*filter->output_function)('$', filter->data)); + CK((*filter->output_function)('B', filter->data)); + } + filter->status = 0x200; + CK((*filter->output_function)((s1 >> 8) & 0xFF, filter->data)); + CK((*filter->output_function)(s1 & 0x7F, filter->data)); + } else if (s1 < 0x927F) { /* UDC */ + if ((filter->status & 0xFF00) != 0x800) { + CK((*filter->output_function)(0x1B, filter->data)); /* ESC */ + CK((*filter->output_function)('$', filter->data)); + CK((*filter->output_function)('(', filter->data)); + CK((*filter->output_function)('?', filter->data)); + } + filter->status = 0x800; + CK((*filter->output_function)(((s1 >> 8) - 0x5E) & 0x7F, filter->data)); + CK((*filter->output_function)(s1 & 0x7F, filter->data)); + } + } else { + CK(mbfl_filt_conv_illegal_output(c, filter)); + } + + return 0; +} + +static int mbfl_filt_conv_any_2022jpms_flush(mbfl_convert_filter *filter) +{ + /* Go back to ASCII (so strings can be safely concatenated) */ + if ((filter->status & 0xFF00) != 0) { + CK((*filter->output_function)(0x1B, filter->data)); /* ESC */ + CK((*filter->output_function)('(', filter->data)); + CK((*filter->output_function)('B', filter->data)); + } + filter->status = 0; + + if (filter->flush_function) { + (*filter->flush_function)(filter->data); + } + + return 0; +} + +static size_t mb_iso2022jpms_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) +{ + unsigned char *p = *in, *e = p + *in_len; + uint32_t *out = buf, *limit = buf + bufsize; + + while (p < e && out < limit) { + unsigned char c = *p++; + + if (c == 0x1B) { + if ((e - p) < 2) { + *out++ = MBFL_BAD_INPUT; + p = e; + break; + } + unsigned char c2 = *p++; + unsigned char c3 = *p++; + + if (c2 == '$') { + if (c3 == '@' || c3 == 'B') { + *state = JISX0208_KANJI; + } else if (c3 == '(' && p < e) { + unsigned char c4 = *p++; + + if (c4 == '@' || c4 == 'B') { + *state = JISX0208_KANJI; + } else if (c4 == '?') { + *state = UDC; + } else { + *out++ = MBFL_BAD_INPUT; + } + } else { + *out++ = MBFL_BAD_INPUT; + } + } else if (c2 == '(') { + if (c3 == 'B' || c3 == 'J') { + *state = ASCII; + } else if (c3 == 'I') { + *state = JISX0201_KANA; + } else { + *out++ = MBFL_BAD_INPUT; + } + } else { + p--; + *out++ = MBFL_BAD_INPUT; + } + } else if (*state == JISX0201_KANA && c >= 0x21 && c <= 0x5F) { + *out++ = 0xFF40 + c; + } else if ((*state == JISX0208_KANJI || *state == UDC) && c >= 0x21 && c <= 0x7F) { + if (p == e) { + *out++ = MBFL_BAD_INPUT; + break; + } + unsigned char c2 = *p++; + unsigned int w = 0; + + if (c2 >= 0x21 && c2 <= 0x7E) { + unsigned int s = ((c - 0x21) * 94) + c2 - 0x21; + if (*state == JISX0208_KANJI) { + if (s <= 137) { + if (s == 31) { + w = 0xFF3C; /* FULLWIDTH REVERSE SOLIDUS */ + } else if (s == 32) { + w = 0xFF5E; /* FULLWIDTH TILDE */ + } else if (s == 33) { + w = 0x2225; /* PARALLEL TO */ + } else if (s == 60) { + w = 0xFF0D; /* FULLWIDTH HYPHEN-MINUS */ + } else if (s == 80) { + w = 0xFFE0; /* FULLWIDTH CENT SIGN */ + } else if (s == 81) { + w = 0xFFE1; /* FULLWIDTH POUND SIGN */ + } else if (s == 137) { + w = 0xFFE2; /* FULLWIDTH NOT SIGN */ + } + } + + if (!w) { + if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) { + w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min]; + } else if (s < jisx0208_ucs_table_size) { + w = jisx0208_ucs_table[s]; + } else if (s >= cp932ext2_ucs_table_min && s < cp932ext2_ucs_table_max) { + w = cp932ext2_ucs_table[s - cp932ext2_ucs_table_min]; + } + } + } else if (c >= 0x21 && c <= 0x34) { + w = 0xE000 + ((c - 0x21) * 94) + c2 - 0x21; + } + + *out++ = w ? w : MBFL_BAD_INPUT; + } else { + *out++ = MBFL_BAD_INPUT; + } + } else if (c <= 0x7F) { + *out++ = c; + } else if (c >= 0xA1 && c <= 0xDF) { + *out++ = 0xFEC0 + c; + } else { + *out++ = MBFL_BAD_INPUT; + } + } + + *in_len = e - p; + *in = p; + return out - buf; +} + +static void mb_wchar_to_iso2022jpms(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) +{ + unsigned char *out, *limit; + MB_CONVERT_BUF_LOAD(buf, out, limit); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len); + + while (len--) { + uint32_t w = *in++; + unsigned int s = 0; + + if (w >= ucs_a1_jis_table_min && w < ucs_a1_jis_table_max) { + s = ucs_a1_jis_table[w - ucs_a1_jis_table_min]; + } else if (w >= ucs_a2_jis_table_min && w < ucs_a2_jis_table_max) { + s = ucs_a2_jis_table[w - ucs_a2_jis_table_min]; + } else if (w >= ucs_i_jis_table_min && w < ucs_i_jis_table_max) { + s = ucs_i_jis_table[w - ucs_i_jis_table_min]; + } else if (w >= ucs_r_jis_table_min && w < ucs_r_jis_table_max) { + s = ucs_r_jis_table[w - ucs_r_jis_table_min]; + } else if (w >= 0xE000 && w < (0xE000 + 20*94)) { + /* Private User Area (95ku - 114ku) */ + s = ((((w - 0xE000) / 94) + 0x7F) << 8) | (((w - 0xE000) % 94) + 0x21); + } + + if (!s) { + if (w == 0xA5) { /* YEN SIGN */ + s = 0x216F; /* FULLWIDTH YEN SIGN */ + } else if (w == 0xFF3C) { /* FULLWIDTH REVERSE SOLIDUS */ + s = 0x2140; + } else if (w == 0x2225) { /* PARALLEL TO */ + s = 0x2142; + } else if (w == 0xFF0D) { /* FULLWIDTH HYPHEN-MINUS */ + s = 0x215D; + } else if (w == 0xFFE0) { /* FULLWIDTH CENT SIGN */ + s = 0x2171; + } else if (w == 0xFFE1) { /* FULLWIDTH POUND SIGN */ + s = 0x2172; + } else if (w == 0xFFE2) { /* FULLWIDTH NOT SIGN */ + s = 0x224C; + } + } + + if (s >= 0xA1A1) /* JISX 0212 */ + s = 0; + + if (!s && w) { + for (int i = 0; i < cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; i++) { + if (w == cp932ext1_ucs_table[i]) { + s = (((i / 94) + 0x2D) << 8) + (i % 94) + 0x21; + break; + } + } + + if (!s) { + for (int i = 0; i < cp932ext3_ucs_table_max - cp932ext3_ucs_table_min; i++) { + if (w == cp932ext3_ucs_table[i]) { + s = cp932ext3_cp932ext2_jis(i); + break; + } + } + } + } + + if (!s && w) { + MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_iso2022jpms); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len); + } else if (s <= 0x7F) { + if (buf->state != ASCII) { + MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 4); + out = mb_convert_buf_add3(out, 0x1B, '(', 'B'); + buf->state = ASCII; + } + out = mb_convert_buf_add(out, s); + } else if (s >= 0xA1 && s <= 0xDF) { + if (buf->state != JISX0201_KANA) { + MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 4); + out = mb_convert_buf_add3(out, 0x1B, '(', 'I'); + buf->state = JISX0201_KANA; + } + out = mb_convert_buf_add(out, s & 0x7F); + } else if (s <= 0x7E7E) { + if (buf->state != JISX0208_KANJI) { + MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 5); + out = mb_convert_buf_add3(out, 0x1B, '$', 'B'); + buf->state = JISX0208_KANJI; + } else { + MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2); + } + out = mb_convert_buf_add2(out, (s >> 8) & 0xFF, s & 0x7F); + } else if (s < 0x927F) { + if (buf->state != UDC) { + MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 6); + out = mb_convert_buf_add4(out, 0x1B, '$', '(', '?'); + buf->state = UDC; + } else { + MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2); + } + out = mb_convert_buf_add2(out, ((s >> 8) - 0x5E) & 0x7F, s & 0x7F); + } else { + MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_iso2022jpms); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len); + } + } + + if (end && buf->state != ASCII) { + MB_CONVERT_BUF_ENSURE(buf, out, limit, 3); + out = mb_convert_buf_add3(out, 0x1B, '(', 'B'); + } + + MB_CONVERT_BUF_STORE(buf, out, limit); +} + +static int mbfl_filt_conv_2022kr_wchar(int c, mbfl_convert_filter *filter) +{ + int w = 0; + + switch (filter->status & 0xf) { + /* case 0x00: ASCII */ + /* case 0x10: KSC5601 */ + case 0: + if (c == 0x1b) { /* ESC */ + filter->status += 2; + } else if (c == 0x0f) { /* shift in (ASCII) */ + filter->status = 0; + } else if (c == 0x0e) { /* shift out (KSC5601) */ + filter->status = 0x10; + } else if ((filter->status & 0x10) && c > 0x20 && c < 0x7f) { + /* KSC5601 lead byte */ + filter->cache = c; + filter->status = 0x11; + } else if ((filter->status & 0x10) == 0 && c >= 0 && c < 0x80) { + /* latin, CTLs */ + CK((*filter->output_function)(c, filter->data)); + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + case 1: /* dbcs second byte */ + filter->status = 0x10; + int c1 = filter->cache; + int flag = 0; + + if (c1 > 0x20 && c1 < 0x47) { + flag = 1; + } else if (c1 >= 0x47 && c1 <= 0x7e && c1 != 0x49) { + flag = 2; + } + + if (flag > 0 && c > 0x20 && c < 0x7f) { + if (flag == 1) { + if (c1 != 0x22 || c <= 0x65) { + w = (c1 - 1)*190 + (c - 0x41) + 0x80; + ZEND_ASSERT(w < uhc1_ucs_table_size); + w = uhc1_ucs_table[w]; + } + } else { + w = (c1 - 0x47)*94 + c - 0x21; + if (w < uhc3_ucs_table_size) { + w = uhc3_ucs_table[w]; + } else { + w = MBFL_BAD_INPUT; + } + } + + if (w <= 0) { + w = MBFL_BAD_INPUT; + } + CK((*filter->output_function)(w, filter->data)); + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + case 2: /* ESC */ + if (c == '$') { + filter->status++; + } else { + filter->status &= ~0xF; + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + case 3: /* ESC $ */ + if (c == ')') { + filter->status++; + } else { + filter->status &= ~0xF; + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + case 4: /* ESC $ ) */ + filter->status = 0; + if (c != 'C') { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + EMPTY_SWITCH_DEFAULT_CASE(); + } + + return 0; +} + +static int mbfl_filt_conv_2022kr_wchar_flush(mbfl_convert_filter *filter) +{ + if (filter->status & 0xF) { + /* 2-byte character was truncated */ + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + filter->status = 0; + + if (filter->flush_function) { + (*filter->flush_function)(filter->data); + } + + return 0; +} + +static int mbfl_filt_conv_wchar_2022kr(int c, mbfl_convert_filter *filter) +{ + int c1, c2, s = 0; + + if ((filter->status & 0x100) == 0) { + CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ + CK((*filter->output_function)('$', filter->data)); + CK((*filter->output_function)(')', filter->data)); + CK((*filter->output_function)('C', filter->data)); + filter->status |= 0x100; + } + + if (c >= ucs_a1_uhc_table_min && c < ucs_a1_uhc_table_max) { + s = ucs_a1_uhc_table[c - ucs_a1_uhc_table_min]; + } else if (c >= ucs_a2_uhc_table_min && c < ucs_a2_uhc_table_max) { + s = ucs_a2_uhc_table[c - ucs_a2_uhc_table_min]; + } else if (c >= ucs_a3_uhc_table_min && c < ucs_a3_uhc_table_max) { + s = ucs_a3_uhc_table[c - ucs_a3_uhc_table_min]; + } else if (c >= ucs_i_uhc_table_min && c < ucs_i_uhc_table_max) { + s = ucs_i_uhc_table[c - ucs_i_uhc_table_min]; + } else if (c >= ucs_s_uhc_table_min && c < ucs_s_uhc_table_max) { + s = ucs_s_uhc_table[c - ucs_s_uhc_table_min]; + } else if (c >= ucs_r1_uhc_table_min && c < ucs_r1_uhc_table_max) { + s = ucs_r1_uhc_table[c - ucs_r1_uhc_table_min]; + } else if (c >= ucs_r2_uhc_table_min && c < ucs_r2_uhc_table_max) { + s = ucs_r2_uhc_table[c - ucs_r2_uhc_table_min]; + } + + c1 = (s >> 8) & 0xff; + c2 = s & 0xff; + /* exclude UHC extension area */ + if (c1 < 0xa1 || c2 < 0xa1) { + s = c; + } else if (s & 0x8000) { + s -= 0x8080; + } + + if (s <= 0) { + if (c == 0) { + s = 0; + } else { + s = -1; + } + } else if ((s >= 0x80 && s < 0x2121) || (s > 0x8080)) { + s = -1; + } + + if (s >= 0) { + if (s < 0x80 && s >= 0) { /* ASCII */ + if (filter->status & 0x10) { + CK((*filter->output_function)(0x0f, filter->data)); /* shift in */ + filter->status &= ~0x10; + } + CK((*filter->output_function)(s, filter->data)); + } else { + if ((filter->status & 0x10) == 0) { + CK((*filter->output_function)(0x0e, filter->data)); /* shift out */ + filter->status |= 0x10; + } + CK((*filter->output_function)((s >> 8) & 0xff, filter->data)); + CK((*filter->output_function)(s & 0xff, filter->data)); + } + } else { + CK(mbfl_filt_conv_illegal_output(c, filter)); + } + + return 0; +} + +static int mbfl_filt_conv_any_2022kr_flush(mbfl_convert_filter *filter) +{ + if (filter->status & 0xF) { + /* Escape sequence or 2-byte character was truncated */ + (*filter->output_function)(MBFL_BAD_INPUT, filter->data); + } + /* back to ascii */ + if (filter->status & 0x10) { + CK((*filter->output_function)(0x0f, filter->data)); /* shift in */ + } + + filter->status = filter->cache = 0; + + if (filter->flush_function) { + return (*filter->flush_function)(filter->data); + } + + return 0; +} + +#define ASCII 0 +#define KSC5601 1 + +static size_t mb_iso2022kr_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) +{ + unsigned char *p = *in, *e = p + *in_len; + uint32_t *out = buf, *limit = buf + bufsize; + + while (p < e && out < limit) { + unsigned char c = *p++; + + if (c == 0x1B) { + if ((e - p) < 3) { + *out++ = MBFL_BAD_INPUT; + if (p < e && *p++ == '$') { + if (p < e) { + p++; + } + } + continue; + } + unsigned char c2 = *p++; + unsigned char c3 = *p++; + unsigned char c4 = *p++; + if (c2 == '$' && c3 == ')' && c4 == 'C') { + *state = ASCII; + } else { + if (c3 != ')') { + p--; + if (c2 != '$') + p--; + } + *out++ = MBFL_BAD_INPUT; + } + } else if (c == 0xF) { + *state = ASCII; + } else if (c == 0xE) { + *state = KSC5601; + } else if (c >= 0x21 && c <= 0x7E && *state == KSC5601) { + if (p == e) { + *out++ = MBFL_BAD_INPUT; + break; + } + unsigned char c2 = *p++; + unsigned int w = 0; + + if (c2 < 0x21 || c2 > 0x7E) { + *out++ = MBFL_BAD_INPUT; + continue; + } + + if (c < 0x47) { + if (c != 0x22 || c2 <= 0x65) { + w = (c - 1)*190 + c2 - 0x41 + 0x80; + ZEND_ASSERT(w < uhc1_ucs_table_size); + w = uhc1_ucs_table[w]; + } + } else if (c != 0x49 && c <= 0x7D) { + w = (c - 0x47)*94 + c2 - 0x21; + ZEND_ASSERT(w < uhc3_ucs_table_size); + w = uhc3_ucs_table[w]; + } + + if (!w) + w = MBFL_BAD_INPUT; + *out++ = w; + } else if (c < 0x80 && *state == ASCII) { + *out++ = c; + } else { + *out++ = MBFL_BAD_INPUT; + } + } + + *in_len = e - p; + *in = p; + return out - buf; +} + +#define EMITTED_ESC_SEQUENCE 0x10 + +static void mb_wchar_to_iso2022kr(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) +{ + unsigned char *out, *limit; + MB_CONVERT_BUF_LOAD(buf, out, limit); + + /* This escape sequence needs to come *somewhere* at the beginning of a line before + * we can use the Shift In/Shift Out bytes, but it only needs to come once in a string + * Rather than tracking newlines, we can just emit the sequence once at the beginning + * of the output string... since that will always be "the beginning of a line" */ + if (len && !(buf->state & EMITTED_ESC_SEQUENCE)) { + MB_CONVERT_BUF_ENSURE(buf, out, limit, 4 + len); + out = mb_convert_buf_add4(out, 0x1B, '$', ')', 'C'); + buf->state |= EMITTED_ESC_SEQUENCE; + } else { + MB_CONVERT_BUF_ENSURE(buf, out, limit, len); + } + + while (len--) { + uint32_t w = *in++; + unsigned int s = 0; + + if (w >= ucs_a1_uhc_table_min && w < ucs_a1_uhc_table_max) { + s = ucs_a1_uhc_table[w - ucs_a1_uhc_table_min]; + } else if (w >= ucs_a2_uhc_table_min && w < ucs_a2_uhc_table_max) { + s = ucs_a2_uhc_table[w - ucs_a2_uhc_table_min]; + } else if (w >= ucs_a3_uhc_table_min && w < ucs_a3_uhc_table_max) { + s = ucs_a3_uhc_table[w - ucs_a3_uhc_table_min]; + } else if (w >= ucs_i_uhc_table_min && w < ucs_i_uhc_table_max) { + s = ucs_i_uhc_table[w - ucs_i_uhc_table_min]; + } else if (w >= ucs_s_uhc_table_min && w < ucs_s_uhc_table_max) { + s = ucs_s_uhc_table[w - ucs_s_uhc_table_min]; + } else if (w >= ucs_r1_uhc_table_min && w < ucs_r1_uhc_table_max) { + s = ucs_r1_uhc_table[w - ucs_r1_uhc_table_min]; + } else if (w >= ucs_r2_uhc_table_min && w < ucs_r2_uhc_table_max) { + s = ucs_r2_uhc_table[w - ucs_r2_uhc_table_min]; + } + + if (((s >> 8) & 0xFF) < 0xA1 || (s & 0xFF) < 0xA1) { + s = w; + } else { + s -= 0x8080; + } + + if ((s >= 0x80 && s < 0x2121) || (s > 0x8080)) { + MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_iso2022kr); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len); + } else if (s < 0x80) { + if ((buf->state & 1) != ASCII) { + MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2); + out = mb_convert_buf_add(out, 0xF); + buf->state &= ~KSC5601; + } + out = mb_convert_buf_add(out, s); + } else { + if ((buf->state & 1) != KSC5601) { + MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 3); + out = mb_convert_buf_add(out, 0xE); + buf->state |= KSC5601; + } else { + MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2); + } + out = mb_convert_buf_add2(out, (s >> 8) & 0xFF, s & 0xFF); + } + } + + if (end && (buf->state & 1) != ASCII) { + MB_CONVERT_BUF_ENSURE(buf, out, limit, 1); + out = mb_convert_buf_add(out, 0xF); + } + + MB_CONVERT_BUF_STORE(buf, out, limit); +} + +static const struct mbfl_convert_vtbl vtbl_jis_wchar = { + mbfl_no_encoding_jis, + mbfl_no_encoding_wchar, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_jis_wchar, + mbfl_filt_conv_jis_wchar_flush, + NULL, +}; + +static const struct mbfl_convert_vtbl vtbl_wchar_jis = { + mbfl_no_encoding_wchar, + mbfl_no_encoding_jis, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_wchar_jis, + mbfl_filt_conv_any_jis_flush, + NULL, +}; + +const mbfl_encoding mbfl_encoding_jis = { + mbfl_no_encoding_jis, + "JIS", + "ISO-2022-JP", + NULL, + NULL, + MBFL_ENCTYPE_GL_UNSAFE, + &vtbl_jis_wchar, + &vtbl_wchar_jis, + mb_iso2022jp_to_wchar, + mb_wchar_to_jis, + mb_check_jis +}; + +static const struct mbfl_convert_vtbl vtbl_2022jp_wchar = { + mbfl_no_encoding_2022jp, + mbfl_no_encoding_wchar, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_jis_wchar, + mbfl_filt_conv_jis_wchar_flush, + NULL, +}; + +static const struct mbfl_convert_vtbl vtbl_wchar_2022jp = { + mbfl_no_encoding_wchar, + mbfl_no_encoding_2022jp, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_wchar_2022jp, + mbfl_filt_conv_any_jis_flush, + NULL, +}; + +const mbfl_encoding mbfl_encoding_2022jp = { + mbfl_no_encoding_2022jp, + "ISO-2022-JP", + "ISO-2022-JP", + NULL, + NULL, + MBFL_ENCTYPE_GL_UNSAFE, + &vtbl_2022jp_wchar, + &vtbl_wchar_2022jp, + mb_iso2022jp_to_wchar, + mb_wchar_to_iso2022jp, + mb_check_iso2022jp +}; + +static const char *mbfl_encoding_2022jp_kddi_aliases[] = {"ISO-2022-JP-KDDI", NULL}; + +static const struct mbfl_convert_vtbl vtbl_2022jp_kddi_wchar = { + mbfl_no_encoding_2022jp_kddi, + mbfl_no_encoding_wchar, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_2022jp_mobile_wchar, + mbfl_filt_conv_2022jp_mobile_wchar_flush, + NULL, +}; + +static const struct mbfl_convert_vtbl vtbl_wchar_2022jp_kddi = { + mbfl_no_encoding_wchar, + mbfl_no_encoding_2022jp_kddi, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_wchar_2022jp_mobile, + mbfl_filt_conv_wchar_2022jp_mobile_flush, + NULL, +}; + +const mbfl_encoding mbfl_encoding_2022jp_kddi = { + mbfl_no_encoding_2022jp_kddi, + "ISO-2022-JP-MOBILE#KDDI", + "ISO-2022-JP", + mbfl_encoding_2022jp_kddi_aliases, + NULL, + MBFL_ENCTYPE_GL_UNSAFE, + &vtbl_2022jp_kddi_wchar, + &vtbl_wchar_2022jp_kddi, + mb_iso2022jp_kddi_to_wchar, + mb_wchar_to_iso2022jp_kddi, + NULL +}; + +static const struct mbfl_convert_vtbl vtbl_2022jp_2004_wchar = { + mbfl_no_encoding_2022jp_2004, + mbfl_no_encoding_wchar, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_jis2004_wchar, + mbfl_filt_conv_jis2004_wchar_flush, + NULL, +}; + +static const struct mbfl_convert_vtbl vtbl_wchar_2022jp_2004 = { + mbfl_no_encoding_wchar, + mbfl_no_encoding_2022jp_2004, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_wchar_jis2004, + mbfl_filt_conv_wchar_jis2004_flush, + NULL, +}; + +const mbfl_encoding mbfl_encoding_2022jp_2004 = { + mbfl_no_encoding_2022jp_2004, + "ISO-2022-JP-2004", + "ISO-2022-JP-2004", + NULL, + NULL, + MBFL_ENCTYPE_GL_UNSAFE, + &vtbl_2022jp_2004_wchar, + &vtbl_wchar_2022jp_2004, + mb_iso2022jp2004_to_wchar, + mb_wchar_to_iso2022jp2004, + NULL +}; + +/* Previously, a dubious 'encoding' called 'cp50220raw' was supported + * This was just CP50220, but the implementation was less strict regarding + * invalid characters; it would silently pass some through + * This 'encoding' only existed in mbstring. In case some poor, lost soul is + * still using it, retain minimal support by aliasing it to CP50220 + * + * Further, mbstring also had a made-up encoding called "JIS-ms" + * This was the same as CP5022{0,1,2}, but without their special ways of + * handling conversion of Unicode half-width katakana */ +static const char *cp50220_aliases[] = {"cp50220raw", "cp50220-raw", "JIS-ms", NULL}; + +static const struct mbfl_convert_vtbl vtbl_cp50220_wchar = { + mbfl_no_encoding_cp50220, + mbfl_no_encoding_wchar, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_cp5022x_wchar, + mbfl_filt_conv_cp5022x_wchar_flush, + NULL, +}; + +static const struct mbfl_convert_vtbl vtbl_wchar_cp50220 = { + mbfl_no_encoding_wchar, + mbfl_no_encoding_cp50220, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_wchar_cp50220, + mbfl_filt_conv_wchar_cp50220_flush, + NULL, +}; + +static const struct mbfl_convert_vtbl vtbl_cp50221_wchar = { + mbfl_no_encoding_cp50221, + mbfl_no_encoding_wchar, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_cp5022x_wchar, + mbfl_filt_conv_cp5022x_wchar_flush, + NULL, +}; + +static const struct mbfl_convert_vtbl vtbl_wchar_cp50221 = { + mbfl_no_encoding_wchar, + mbfl_no_encoding_cp50221, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_wchar_cp50221, + mbfl_filt_conv_any_jis_flush, + NULL, +}; + +static const struct mbfl_convert_vtbl vtbl_cp50222_wchar = { + mbfl_no_encoding_cp50222, + mbfl_no_encoding_wchar, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_cp5022x_wchar, + mbfl_filt_conv_cp5022x_wchar_flush, + NULL, +}; + +static const struct mbfl_convert_vtbl vtbl_wchar_cp50222 = { + mbfl_no_encoding_wchar, + mbfl_no_encoding_cp50222, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_wchar_cp50222, + mbfl_filt_conv_wchar_cp50222_flush, + NULL, +}; + +const mbfl_encoding mbfl_encoding_cp50220 = { + mbfl_no_encoding_cp50220, + "CP50220", + "ISO-2022-JP", + cp50220_aliases, + NULL, + MBFL_ENCTYPE_GL_UNSAFE, + &vtbl_cp50220_wchar, + &vtbl_wchar_cp50220, + mb_cp5022x_to_wchar, + mb_wchar_to_cp50220, + NULL +}; + +const mbfl_encoding mbfl_encoding_cp50221 = { + mbfl_no_encoding_cp50221, + "CP50221", + "ISO-2022-JP", + NULL, + NULL, + MBFL_ENCTYPE_GL_UNSAFE, + &vtbl_cp50221_wchar, + &vtbl_wchar_cp50221, + mb_cp5022x_to_wchar, + mb_wchar_to_cp50221, + NULL +}; + +const mbfl_encoding mbfl_encoding_cp50222 = { + mbfl_no_encoding_cp50222, + "CP50222", + "ISO-2022-JP", + NULL, + NULL, + MBFL_ENCTYPE_GL_UNSAFE, + &vtbl_cp50222_wchar, + &vtbl_wchar_cp50222, + mb_cp5022x_to_wchar, + mb_wchar_to_cp50222, + NULL +}; + +static const char *mbfl_encoding_2022jpms_aliases[] = {"ISO2022JPMS", NULL}; + +static const struct mbfl_convert_vtbl vtbl_2022jpms_wchar = { + mbfl_no_encoding_2022jpms, + mbfl_no_encoding_wchar, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_2022jpms_wchar, + mbfl_filt_conv_2022jpms_wchar_flush, + NULL, +}; + +static const struct mbfl_convert_vtbl vtbl_wchar_2022jpms = { + mbfl_no_encoding_wchar, + mbfl_no_encoding_2022jpms, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_wchar_2022jpms, + mbfl_filt_conv_any_2022jpms_flush, + NULL, +}; + +const mbfl_encoding mbfl_encoding_2022jpms = { + mbfl_no_encoding_2022jpms, + "ISO-2022-JP-MS", + "ISO-2022-JP", + mbfl_encoding_2022jpms_aliases, + NULL, + MBFL_ENCTYPE_GL_UNSAFE, + &vtbl_2022jpms_wchar, + &vtbl_wchar_2022jpms, + mb_iso2022jpms_to_wchar, + mb_wchar_to_iso2022jpms, + NULL +}; + +/* ISO-2022-KR is defined in RFC 1557 + * + * The RFC says that ESC $ ) C must appear once in a ISO-2022-KR string, + * at the beginning of a line, before any instances of the Shift In or + * Shift Out bytes which are used to switch between ASCII/KSC 5601 modes + * + * We don't enforce that for ISO-2022-KR input */ + +static const struct mbfl_convert_vtbl vtbl_wchar_2022kr = { + mbfl_no_encoding_wchar, + mbfl_no_encoding_2022kr, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_wchar_2022kr, + mbfl_filt_conv_any_2022kr_flush, + NULL, +}; + +static const struct mbfl_convert_vtbl vtbl_2022kr_wchar = { + mbfl_no_encoding_2022kr, + mbfl_no_encoding_wchar, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_2022kr_wchar, + mbfl_filt_conv_2022kr_wchar_flush, + NULL, +}; + +const mbfl_encoding mbfl_encoding_2022kr = { + mbfl_no_encoding_2022kr, + "ISO-2022-KR", + "ISO-2022-KR", + NULL, + NULL, + MBFL_ENCTYPE_GL_UNSAFE, + &vtbl_2022kr_wchar, + &vtbl_wchar_2022kr, + mb_iso2022kr_to_wchar, + mb_wchar_to_iso2022kr, + NULL +}; + +/* + * SJIS variants + */ + +static int mbfl_filt_conv_sjis_wchar(int c, mbfl_convert_filter *filter) +{ + int s1, s2, w; + + switch (filter->status) { + case 0: + if (c >= 0 && c < 0x80) { /* ASCII */ + CK((*filter->output_function)(c, filter->data)); + } else if (c > 0xA0 && c < 0xE0) { /* Kana */ + CK((*filter->output_function)(0xFEC0 + c, filter->data)); + } else if (c > 0x80 && c < 0xF0 && c != 0xA0) { /* Kanji, first byte */ + filter->status = 1; + filter->cache = c; + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + case 1: /* Kanji, second byte */ + filter->status = 0; + int c1 = filter->cache; + if (c >= 0x40 && c <= 0xFC && c != 0x7F) { + SJIS_DECODE(c1, c, s1, s2); + w = (s1 - 0x21)*94 + s2 - 0x21; + if (w >= 0 && w < jisx0208_ucs_table_size) { + w = jisx0208_ucs_table[w]; + if (!w) + w = MBFL_BAD_INPUT; + } else { + w = MBFL_BAD_INPUT; + } + CK((*filter->output_function)(w, filter->data)); + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + } + + return 0; +} + +static int mbfl_filt_conv_sjis_wchar_flush(mbfl_convert_filter *filter) +{ + if (filter->status && filter->status != 4) { + (*filter->output_function)(MBFL_BAD_INPUT, filter->data); + } + filter->status = 0; + + if (filter->flush_function) { + (*filter->flush_function)(filter->data); + } + + return 0; +} + +static int mbfl_filt_conv_wchar_sjis(int c, mbfl_convert_filter *filter) +{ + int c1, c2, s1 = 0, s2; + + if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) { + s1 = ucs_a1_jis_table[c - ucs_a1_jis_table_min]; + } else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) { + s1 = ucs_a2_jis_table[c - ucs_a2_jis_table_min]; + } else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) { + s1 = ucs_i_jis_table[c - ucs_i_jis_table_min]; + } else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) { + s1 = ucs_r_jis_table[c - ucs_r_jis_table_min]; + } + if (s1 <= 0) { + if (c == 0xA5) { /* YEN SIGN */ + s1 = 0x216F; /* FULLWIDTH YEN SIGN */ + } else if (c == 0xAF || c == 0x203E) { /* U+00AF is MACRON, U+203E is OVERLINE */ + s1 = 0x2131; /* FULLWIDTH MACRON */ + } else if (c == 0xFF3C) { /* FULLWIDTH REVERSE SOLIDUS */ + s1 = 0x2140; + } else if (c == 0x2225) { /* PARALLEL TO */ + s1 = 0x2142; + } else if (c == 0xFF0D) { /* FULLWIDTH HYPHEN-MINUS */ + s1 = 0x215D; + } else if (c == 0xFFE0) { /* FULLWIDTH CENT SIGN */ + s1 = 0x2171; + } else if (c == 0xFFE1) { /* FULLWIDTH POUND SIGN */ + s1 = 0x2172; + } else if (c == 0xFFE2) { /* FULLWIDTH NOT SIGN */ + s1 = 0x224C; + } else if (c == 0) { + s1 = 0; + } else { + s1 = -1; + } + } else if (s1 >= 0x8080) { /* JIS X 0212; not supported */ + s1 = -1; + } + + if (s1 >= 0) { + if (s1 < 0x100) { /* Latin/Kana */ + CK((*filter->output_function)(s1, filter->data)); + } else { /* Kanji */ + c1 = (s1 >> 8) & 0xFF; + c2 = s1 & 0xFF; + SJIS_ENCODE(c1, c2, s1, s2); + CK((*filter->output_function)(s1, filter->data)); + CK((*filter->output_function)(s2, filter->data)); + } + } else { + CK(mbfl_filt_conv_illegal_output(c, filter)); + } + + return 0; +} + +static const unsigned short sjis_decode_tbl1[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xFFFF, 0, 188, 376, 564, 752, 940, 1128, 1316, 1504, 1692, 1880, 2068, 2256, 2444, 2632, 2820, 3008, 3196, 3384, 3572, 3760, 3948, 4136, 4324, 4512, 4700, 4888, 5076, 5264, 5452, 5640, 0xFFFF, -6016, -5828, -5640, -5452, -5264, -5076, -4888, -4700, -4512, -4324, -4136, -3948, -3760, -3572, -3384, -3196, -3008, -2820, -2632, -2444, -2256, -2068, -1880, -1692, -1504, -1316, -1128, -940, -752, -564, -376, -188, 0, 188, 376, 564, 752, 940, 1128, 1316, 1504, 1692, 1880, 2068, 2256, 2444, 2632, 2820, 3008, 3196, 3384, 3572, 3760, 3948, 4136, 4324, 4512, 4700, 4888, 5076, 5264, 5452, 5640, 5828, 6016, 6204, 6392, 6580, 6768, 6956, 7144, 7332, 7520, 7708, 7896, 8084, 8272, 8460, 8648, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF +}; + +static const unsigned short sjis_decode_tbl2[] = { + 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 0xFFFF, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 0xFFFF, 0xFFFF, 0xFFFF +}; + +static size_t mb_sjis_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) +{ + unsigned char *p = *in, *e = p + *in_len; + uint32_t *out = buf, *limit = buf + bufsize; + + e--; /* Stop the main loop 1 byte short of the end of the input */ + + while (p < e && out < limit) { + unsigned char c = *p++; + + if (c <= 0x7F) { + *out++ = c; + } else if (c >= 0xA1 && c <= 0xDF) { /* Kana */ + *out++ = 0xFEC0 + c; + } else { + /* Don't need to check p < e; it's not possible to go out of bounds here, due to e-- above */ + unsigned char c2 = *p++; + /* This is only legal if c2 >= 0x40 && c2 <= 0xFC && c2 != 0x7F + * But the values in the above conversion tables have been chosen such that + * illegal values of c2 will always result in w > jisx0208_ucs_table_size, + * so we don't need to do a separate bounds check on c2 + * Likewise, the values in the conversion tables are such that illegal values + * for c will always result in w > jisx0208_ucs_table_size */ + uint32_t w = sjis_decode_tbl1[c] + sjis_decode_tbl2[c2]; + if (w < jisx0208_ucs_table_size) { + w = jisx0208_ucs_table[w]; + if (!w) + w = MBFL_BAD_INPUT; + *out++ = w; + } else { + if (c == 0x80 || c == 0xA0 || c > 0xEF) { + p--; + } + *out++ = MBFL_BAD_INPUT; + } + } + } + + /* Finish up last byte of input string if there is one */ + if (p == e && out < limit) { + unsigned char c = *p++; + if (c <= 0x7F) { + *out++ = c; + } else if (c >= 0xA1 && c <= 0xDF) { + *out++ = 0xFEC0 + c; + } else { + *out++ = MBFL_BAD_INPUT; + } + } + + *in_len = e - p + 1; + *in = p; + return out - buf; +} + +static void mb_wchar_to_sjis(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) +{ + unsigned char *out, *limit; + MB_CONVERT_BUF_LOAD(buf, out, limit); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2); + + while (len--) { + uint32_t w = *in++; + unsigned int s = 0; + + if (w >= ucs_a1_jis_table_min && w < ucs_a1_jis_table_max) { + s = ucs_a1_jis_table[w - ucs_a1_jis_table_min]; + } else if (w >= ucs_a2_jis_table_min && w < ucs_a2_jis_table_max) { + s = ucs_a2_jis_table[w - ucs_a2_jis_table_min]; + } else if (w >= ucs_i_jis_table_min && w < ucs_i_jis_table_max) { + s = ucs_i_jis_table[w - ucs_i_jis_table_min]; + } else if (w >= ucs_r_jis_table_min && w < ucs_r_jis_table_max) { + s = ucs_r_jis_table[w - ucs_r_jis_table_min]; + } + + if (s == 0) { + if (w == 0xA5) { /* YEN SIGN */ + s = 0x216F; /* FULLWIDTH YEN SIGN */ + } else if (w == 0xAF || w == 0x203E) { + s = 0x2131; /* FULLWIDTH MACRON */ + } else if (w == 0xFF3C) { /* FULLWIDTH REVERSE SOLIDUS */ + s = 0x2140; + } else if (w == 0x2225) { /* PARALLEL TO */ + s = 0x2142; + } else if (w == 0xFF0D) { /* FULLWIDTH HYPHEN-MINUS */ + s = 0x215D; + } else if (w == 0xFFE0) { /* FULLWIDTH CENT SIGN */ + s = 0x2171; + } else if (w == 0xFFE1) { /* FULLWIDTH POUND SIGN */ + s = 0x2172; + } else if (w == 0xFFE2) { /* FULLWIDTH NOT SIGN */ + s = 0x224C; + } else if (w != 0) { + MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_sjis); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2); + continue; + } + } else if (s >= 0x8080) { /* JIS X 0212; not supported */ + MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_sjis); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2); + continue; + } + + if (s <= 0xFF) { + /* Latin/Kana */ + out = mb_convert_buf_add(out, s); + } else { + /* Kanji */ + unsigned int c1 = (s >> 8) & 0xFF, c2 = s & 0xFF, s2; + SJIS_ENCODE(c1, c2, s, s2); + out = mb_convert_buf_add2(out, s, s2); + } + } + + MB_CONVERT_BUF_STORE(buf, out, limit); +} + +static int mbfl_filt_conv_sjis_mac_wchar(int c, mbfl_convert_filter *filter) +{ + int i, j, n; + int c1, s, s1, s2, w; + + switch (filter->status) { + case 0: + if (c >= 0 && c < 0x80 && c != 0x5c) { /* latin */ + CK((*filter->output_function)(c, filter->data)); + } else if (c > 0xa0 && c < 0xe0) { /* kana */ + CK((*filter->output_function)(0xfec0 + c, filter->data)); + } else if (c > 0x80 && c <= 0xed && c != 0xa0) { /* kanji first char */ + filter->status = 1; + filter->cache = c; + } else if (c == 0x5c) { + CK((*filter->output_function)(0x00a5, filter->data)); + } else if (c == 0x80) { + CK((*filter->output_function)(0x005c, filter->data)); + } else if (c == 0xa0) { + CK((*filter->output_function)(0x00a0, filter->data)); + } else if (c == 0xfd) { + CK((*filter->output_function)(0x00a9, filter->data)); + } else if (c == 0xfe) { + CK((*filter->output_function)(0x2122, filter->data)); + } else if (c == 0xff) { + CK((*filter->output_function)(0x2026, filter->data)); + CK((*filter->output_function)(0xf87f, filter->data)); + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + case 1: /* kanji second char */ + filter->status = 0; + c1 = filter->cache; + if (c >= 0x40 && c <= 0xfc && c != 0x7f) { + w = 0; + SJIS_DECODE(c1, c, s1, s2); + s = (s1 - 0x21)*94 + s2 - 0x21; + if (s <= 0x89) { + if (s == 0x1c) { + w = 0x2014; /* EM DASH */ + } else if (s == 0x1f) { + w = 0xff3c; /* FULLWIDTH REVERSE SOLIDUS */ + } else if (s == 0x20) { + w = 0x301c; /* FULLWIDTH TILDE */ + } else if (s == 0x21) { + w = 0x2016; /* PARALLEL TO */ + } else if (s == 0x3c) { + w = 0x2212; /* FULLWIDTH HYPHEN-MINUS */ + } else if (s == 0x50) { + w = 0x00a2; /* FULLWIDTH CENT SIGN */ + } else if (s == 0x51) { + w = 0x00a3; /* FULLWIDTH POUND SIGN */ + } else if (s == 0x89) { + w = 0x00ac; /* FULLWIDTH NOT SIGN */ + } + } + + /* apple gaiji area 0x8540 - 0x886d */ + if (w == 0) { + for (i=0; i<7; i++) { + if (s >= code_tbl[i][0] && s <= code_tbl[i][1]) { + w = s - code_tbl[i][0] + code_tbl[i][2]; + break; + } + } + } + + if (w == 0) { + + for (i=0; ioutput_function)(code_tbl_m[i][j], filter->data)); + } + w = code_tbl_m[i][n-1]; + break; + } + } + } + + if (w == 0) { + for (i=0; i<8; i++) { + if (s >= code_ofst_tbl[i][0] && s <= code_ofst_tbl[i][1]) { + w = code_map[i][s - code_ofst_tbl[i][0]]; + if (w == 0) { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + return 0; + } + s2 = 0; + if (s >= 0x043e && s <= 0x0441) { + s2 = 0xf87a; + } else if (s == 0x03b1 || s == 0x03b7) { + s2 = 0xf87f; + } else if (s == 0x04b8 || s == 0x04b9 || s == 0x04c4) { + s2 = 0x20dd; + } else if (s == 0x1ed9 || s == 0x1eda || s == 0x1ee8 || s == 0x1ef3 || + (s >= 0x1ef5 && s <= 0x1efb) || s == 0x1f05 || s == 0x1f06 || + s == 0x1f18 || (s >= 0x1ff2 && s <= 0x20a5)) { + s2 = 0xf87e; + } + if (s2 > 0) { + CK((*filter->output_function)(w, filter->data)); + w = s2; + } + break; + } + } + } + + if (w == 0 && s >= 0 && s < jisx0208_ucs_table_size) { /* X 0208 */ + w = jisx0208_ucs_table[s]; + } + + if (w <= 0) { + w = MBFL_BAD_INPUT; + } + CK((*filter->output_function)(w, filter->data)); + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + EMPTY_SWITCH_DEFAULT_CASE(); + } + + return 0; +} + +static int mbfl_filt_conv_wchar_sjis_mac(int c, mbfl_convert_filter *filter) +{ + int i, c1, c2, s1 = 0, s2 = 0, mode; + + // a1: U+0000 -> U+046F + // a2: U+2000 -> U+30FF + // i: U+4E00 -> U+9FFF + // r: U+FF00 -> U+FFFF + + switch (filter->status) { + case 1: + c1 = filter->cache; + filter->cache = filter->status = 0; + + if (c == 0xf87a) { + for (i = 0; i < 4; i++) { + if (c1 == s_form_tbl[i+34+3+3]) { + s1 = s_form_sjis_tbl[i+34+3+3]; + break; + } + } + if (s1 <= 0) { + s2 = c1; + } + } else if (c == 0x20dd) { + for (i = 0; i < 3; i++) { + if (c1 == s_form_tbl[i+34+3]) { + s1 = s_form_sjis_tbl[i+34+3]; + break; + } + } + if (s1 <= 0) { + s2 = c1; + } + } else if (c == 0xf87f) { + for (i = 0; i < 3; i++) { + if (c1 == s_form_tbl[i+34]) { + s1 = s_form_sjis_tbl[i+34]; + break; + } + } + if (s1 <= 0) { + s2 = c1; + s1 = -1; + } + } else if (c == 0xf87e) { + for (i = 0; i < 34; i++) { + if (c1 == s_form_tbl[i]) { + s1 = s_form_sjis_tbl[i]; + break; + } + } + if (s1 <= 0) { + s2 = c1; + s1 = -1; + } + } else { + s2 = c1; + s1 = c; + } + + if (s2 > 0) { + for (i = 0; i < s_form_tbl_len; i++) { + if (c1 == s_form_tbl[i]) { + s1 = s_form_sjis_fallback_tbl[i]; + break; + } + } + } + + if (s1 >= 0) { + if (s1 < 0x100) { + CK((*filter->output_function)(s1, filter->data)); + } else { + CK((*filter->output_function)((s1 >> 8) & 0xff, filter->data)); + CK((*filter->output_function)(s1 & 0xff, filter->data)); + } + } else { + CK(mbfl_filt_conv_illegal_output(c, filter)); + } + + if (s2 <= 0 || s1 == -1) { + break; + } + s1 = s2 = 0; + ZEND_FALLTHROUGH; + + case 0: + if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) { + s1 = ucs_a1_jis_table[c - ucs_a1_jis_table_min]; + if (c == 0x5c) { + s1 = 0x80; + } else if (c == 0xa9) { + s1 = 0xfd; + } + } else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) { + s1 = ucs_a2_jis_table[c - ucs_a2_jis_table_min]; + if (c == 0x2122) { + s1 = 0xfe; + } else if (c == 0x2014) { + s1 = 0x213d; + } else if (c == 0x2116) { + s1 = 0x2c1d; + } + } else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) { + s1 = ucs_i_jis_table[c - ucs_i_jis_table_min]; + } else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) { + s1 = ucs_r_jis_table[c - ucs_r_jis_table_min]; + } + + if (c >= 0x2000) { + for (i = 0; i < s_form_tbl_len; i++) { + if (c == s_form_tbl[i]) { + filter->status = 1; + filter->cache = c; + return 0; + } + } + + if (c == 0xf860 || c == 0xf861 || c == 0xf862) { + /* Apple 'transcoding hint' codepoints (from private use area) */ + filter->status = 2; + filter->cache = c; + return 0; + } + } + + if (s1 <= 0) { + if (c == 0xa0) { + s1 = 0x00a0; + } else if (c == 0xa5) { /* YEN SIGN */ + /* Unicode has codepoint 0xFFE5 for a fullwidth Yen sign; + * convert codepoint 0xA5 to halfwidth Yen sign */ + s1 = 0x5c; /* HALFWIDTH YEN SIGN */ + } else if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */ + s1 = 0x2140; + } + } + + if (s1 <= 0) { + for (i=0; i= wchar2sjis_mac_r_tbl[i][0] && c <= wchar2sjis_mac_r_tbl[i][1]) { + s1 = c - wchar2sjis_mac_r_tbl[i][0] + wchar2sjis_mac_r_tbl[i][2]; + break; + } + } + + if (s1 <= 0) { + for (i=0; i= wchar2sjis_mac_r_map[i][0] && c <= wchar2sjis_mac_r_map[i][1]) { + s1 = wchar2sjis_mac_code_map[i][c-wchar2sjis_mac_r_map[i][0]]; + break; + } + } + } + + if (s1 <= 0) { + for (i=0; i 0) { + c1 = s1/94+0x21; + c2 = s1-94*(c1-0x21)+0x21; + s1 = (c1 << 8) | c2; + s2 = 1; + } + } + + if ((s1 <= 0) || (s1 >= 0x8080 && s2 == 0)) { /* not found or X 0212 */ + s1 = -1; + c1 = 0; + + if (c == 0) { + s1 = 0; + } else if (s1 <= 0) { + s1 = -1; + } + } + + if (s1 >= 0) { + if (s1 < 0x100) { /* latin or kana */ + CK((*filter->output_function)(s1, filter->data)); + } else { /* kanji */ + c1 = (s1 >> 8) & 0xff; + c2 = s1 & 0xff; + SJIS_ENCODE(c1, c2, s1, s2); + CK((*filter->output_function)(s1, filter->data)); + CK((*filter->output_function)(s2, filter->data)); + } + } else { + CK(mbfl_filt_conv_illegal_output(c, filter)); + } + break; + + case 2: + c1 = filter->cache; + filter->cache = 0; + filter->status = 0; + if (c1 == 0xf860) { + for (i = 0; i < 5; i++) { + if (c == code_tbl_m[i][2]) { + filter->cache = c | 0x10000; + filter->status = 3; + break; + } + } + } else if (c1 == 0xf861) { + for (i = 0; i < 3; i++) { + if (c == code_tbl_m[i+5][2]) { + filter->cache = c | 0x20000; + filter->status = 3; + break; + } + } + } else if (c1 == 0xf862) { + for (i = 0; i < 4; i++) { + if (c == code_tbl_m[i+5+3][2]) { + filter->cache = c | 0x40000; + filter->status = 3; + break; + } + } + } + + if (filter->status == 0) { + /* Didn't find any of expected codepoints after Apple transcoding hint */ + CK(mbfl_filt_conv_illegal_output(c1, filter)); + return mbfl_filt_conv_wchar_sjis_mac(c, filter); + } + break; + + case 3: + s1 = 0; + c1 = filter->cache & 0xffff; + mode = (filter->cache & 0xf0000) >> 16; + + filter->cache = filter->status = 0; + + if (mode == 0x1) { + for (i = 0; i < 5; i++) { + if (c1 == code_tbl_m[i][2] && c == code_tbl_m[i][3]) { + s1 = code_tbl_m[i][0]; + break; + } + } + + if (s1 > 0) { + c1 = s1/94+0x21; + c2 = s1-94*(c1-0x21)+0x21; + SJIS_ENCODE(c1, c2, s1, s2); + CK((*filter->output_function)(s1, filter->data)); + CK((*filter->output_function)(s2, filter->data)); + } else { + CK(mbfl_filt_conv_illegal_output(0xf860, filter)); + CK(mbfl_filt_conv_illegal_output(c1, filter)); + CK(mbfl_filt_conv_illegal_output(c, filter)); + } + } else if (mode == 0x2) { + for (i = 0; i < 3; i++) { + if (c1 == code_tbl_m[i+5][2] && c == code_tbl_m[i+5][3]) { + filter->cache = c | 0x20000; + filter->status = 4; + break; + } + } + } else if (mode == 0x4) { + for (i = 0; i < 4; i++) { + if (c1 == code_tbl_m[i+8][2] && c == code_tbl_m[i+8][3]) { + filter->cache = c | 0x40000; + filter->status = 4; + break; + } + } + } + break; + + case 4: + s1 = 0; + c1 = filter->cache & 0xffff; + mode = (filter->cache & 0xf0000) >> 16; + + filter->cache = 0; + filter->status = 0; + + if (mode == 0x2) { + for (i = 0; i < 3; i++) { + if (c1 == code_tbl_m[i+5][3] && c == code_tbl_m[i+5][4]) { + s1 = code_tbl_m[i+5][0]; + break; + } + } + + if (s1 > 0) { + c1 = s1/94+0x21; + c2 = s1-94*(c1-0x21)+0x21; + SJIS_ENCODE(c1, c2, s1, s2); + CK((*filter->output_function)(s1, filter->data)); + CK((*filter->output_function)(s2, filter->data)); + } else { + CK(mbfl_filt_conv_illegal_output(0xf861, filter)); + for (i = 0; i < 3; i++) { + if (c1 == code_tbl_m[i+5][3]) { + CK(mbfl_filt_conv_illegal_output(code_tbl_m[i+5][2], filter)); + break; + } + } + CK(mbfl_filt_conv_illegal_output(c1, filter)); + CK(mbfl_filt_conv_illegal_output(c, filter)); + } + } else if (mode == 0x4) { + for (i = 0; i < 4; i++) { + if (c1 == code_tbl_m[i+8][3] && c == code_tbl_m[i+8][4]) { + filter->cache = c | 0x40000; + filter->status = 5; + break; + } + } + } + break; + + case 5: + s1 = 0; + c1 = filter->cache & 0xffff; + mode = (filter->cache & 0xf0000) >> 16; + + filter->cache = filter->status = 0; + + if (mode == 0x4) { + for (i = 0; i < 4; i++) { + if (c1 == code_tbl_m[i+8][4] && c == code_tbl_m[i+8][5]) { + s1 = code_tbl_m[i+8][0]; + break; + } + } + + if (s1 > 0) { + c1 = s1/94+0x21; + c2 = s1-94*(c1-0x21)+0x21; + SJIS_ENCODE(c1, c2, s1, s2); + CK((*filter->output_function)(s1, filter->data)); + CK((*filter->output_function)(s2, filter->data)); + } else { + CK(mbfl_filt_conv_illegal_output(0xf862, filter)); + for (i = 0; i < 4; i++) { + if (c1 == code_tbl_m[i+8][4]) { + CK(mbfl_filt_conv_illegal_output( code_tbl_m[i+8][2], filter)); + CK(mbfl_filt_conv_illegal_output( code_tbl_m[i+8][3], filter)); + break; + } + } + CK(mbfl_filt_conv_illegal_output(c1, filter)); + CK(mbfl_filt_conv_illegal_output(c, filter)); + } + } + break; + + EMPTY_SWITCH_DEFAULT_CASE(); + } + + return 0; +} + +static int mbfl_filt_conv_wchar_sjis_mac_flush(mbfl_convert_filter *filter) +{ + int i, c1, s1 = 0; + if (filter->status == 1 && filter->cache > 0) { + c1 = filter->cache; + for (i=0;i 0) { + CK((*filter->output_function)((s1 >> 8) & 0xff, filter->data)); + CK((*filter->output_function)(s1 & 0xff, filter->data)); + } + } + filter->cache = 0; + filter->status = 0; + + if (filter->flush_function != NULL) { + return (*filter->flush_function)(filter->data); + } + + return 0; +} + +static size_t mb_sjismac_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) +{ + /* A single SJIS-Mac kuten code can convert to up to 5 Unicode codepoints, oh my! */ + ZEND_ASSERT(bufsize >= 5); + + unsigned char *p = *in, *e = p + *in_len; + uint32_t *out = buf, *limit = buf + bufsize; + + while (p < e && out < limit) { + unsigned char c = *p++; + + if (c <= 0x80 || c == 0xA0) { + if (c == 0x5C) { + *out++ = 0xA5; + } else if (c == 0x80) { + *out++ = 0x5C; + } else { + *out++ = c; + } + } else if (c >= 0xA1 && c <= 0xDF) { + *out++ = 0xFEC0 + c; + } else if (c <= 0xED) { + if (p == e) { + *out++ = MBFL_BAD_INPUT; + break; + } + unsigned char c2 = *p++; + uint32_t w = sjis_decode_tbl1[c] + sjis_decode_tbl2[c2]; + + if (w <= 0x89) { + if (w == 0x1C) { + *out++ = 0x2014; /* EM DASH */ + continue; + } else if (w == 0x1F) { + *out++ = 0xFF3C; /* FULLWIDTH REVERSE SOLIDUS */ + continue; + } else if (w == 0x20) { + *out++ = 0x301C; /* FULLWIDTH TILDE */ + continue; + } else if (w == 0x21) { + *out++ = 0x2016; /* PARALLEL TO */ + continue; + } else if (w == 0x3C) { + *out++ = 0x2212; /* FULLWIDTH HYPHEN-MINUS */ + continue; + } else if (w == 0x50) { + *out++ = 0xA2; /* FULLWIDTH CENT SIGN */ + continue; + } else if (w == 0x51) { + *out++ = 0xA3; /* FULLWIDTH POUND SIGN */ + continue; + } else if (w == 0x89) { + *out++ = 0xAC; /* FULLWIDTH NOT SIGN */ + continue; + } + } else { + if (w >= 0x2F0 && w <= 0x3A3) { + for (int i = 0; i < 7; i++) { + if (w >= code_tbl[i][0] && w <= code_tbl[i][1]) { + *out++ = w - code_tbl[i][0] + code_tbl[i][2]; + goto next_iteration; + } + } + } + + if (w >= 0x340 && w <= 0x523) { + for (int i = 0; i < code_tbl_m_len; i++) { + if (w == code_tbl_m[i][0]) { + int n = 5; + if (code_tbl_m[i][1] == 0xF860) { + n = 3; + } else if (code_tbl_m[i][1] == 0xF861) { + n = 4; + } + if ((limit - out) < n) { + p -= 2; + goto finished; + } + for (int j = 1; j <= n; j++) { + *out++ = code_tbl_m[i][j]; + } + goto next_iteration; + } + } + } + + if (w >= 0x3AC && w <= 0x20A5) { + for (int i = 0; i < 8; i++) { + if (w >= code_ofst_tbl[i][0] && w <= code_ofst_tbl[i][1]) { + uint32_t w2 = code_map[i][w - code_ofst_tbl[i][0]]; + if (!w2) { + *out++ = MBFL_BAD_INPUT; + goto next_iteration; + } + if ((limit - out) < 2) { + p -= 2; + goto finished; + } + *out++ = w2; + if (w >= 0x43E && w <= 0x441) { + *out++ = 0xF87A; + } else if (w == 0x3B1 || w == 0x3B7) { + *out++ = 0xF87F; + } else if (w == 0x4B8 || w == 0x4B9 || w == 0x4C4) { + *out++ = 0x20DD; + } else if (w == 0x1ED9 || w == 0x1EDA || w == 0x1EE8 || w == 0x1EF3 || (w >= 0x1EF5 && w <= 0x1EFB) || w == 0x1F05 || w == 0x1F06 || w == 0x1F18 || (w >= 0x1FF2 && w <= 0x20A5)) { + *out++ = 0xF87E; + } + goto next_iteration; + } + } + } + } + + if (w < jisx0208_ucs_table_size) { + w = jisx0208_ucs_table[w]; + if (!w) + w = MBFL_BAD_INPUT; + *out++ = w; + } else { + *out++ = MBFL_BAD_INPUT; + } + } else if (c == 0xFD) { + *out++ = 0xA9; + } else if (c == 0xFE) { + *out++ = 0x2122; + } else if (c == 0xFF) { + if ((limit - out) < 2) { + p--; + break; + } + *out++ = 0x2026; + *out++ = 0xF87F; + } else { + *out++ = MBFL_BAD_INPUT; + } +next_iteration: ; + } + +finished: + *in_len = e - p; + *in = p; + return out - buf; +} + +static bool process_s_form(uint32_t w, uint32_t w2, unsigned int *s) +{ + if (w2 == 0xF87A) { + for (int i = 0; i < 4; i++) { + if (w == s_form_tbl[i+34+3+3]) { + *s = s_form_sjis_tbl[i+34+3+3]; + return true; + } + } + } else if (w2 == 0x20DD) { + for (int i = 0; i < 3; i++) { + if (w == s_form_tbl[i+34+3]) { + *s = s_form_sjis_tbl[i+34+3]; + return true; + } + } + } else if (w2 == 0xF87F) { + for (int i = 0; i < 3; i++) { + if (w == s_form_tbl[i+34]) { + *s = s_form_sjis_tbl[i+34]; + return true; + } + } + } else if (w2 == 0xF87E) { + for (int i = 0; i < 34; i++) { + if (w == s_form_tbl[i]) { + *s = s_form_sjis_tbl[i]; + return true; + } + } + } + + return false; +} + +/* For codepoints F860-F862, which are treated specially in MacJapanese */ +static int transcoding_hint_cp_width[3] = { 3, 4, 5 }; + +static void mb_wchar_to_sjismac(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) +{ + unsigned char *out, *limit; + MB_CONVERT_BUF_LOAD(buf, out, limit); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len); + + uint32_t w; + + if (buf->state) { + w = buf->state & 0xFFFF; + if (buf->state & 0xFF000000L) { + goto resume_transcoding_hint; + } else { + buf->state = 0; + goto process_codepoint; + } + } + + while (len--) { + w = *in++; +process_codepoint: ; + unsigned int s = 0; + + if (w >= ucs_a1_jis_table_min && w < ucs_a1_jis_table_max) { + if (w == 0x5C) { + s = 0x80; + } else if (w == 0xA9) { + s = 0xFD; + } else { + s = ucs_a1_jis_table[w - ucs_a1_jis_table_min]; + } + } else if (w >= ucs_a2_jis_table_min && w < ucs_a2_jis_table_max) { + if (w == 0x2122) { + s = 0xFE; + } else if (w == 0x2014) { + s = 0x213D; + } else if (w == 0x2116) { + s = 0x2C1D; + } else { + s = ucs_a2_jis_table[w - ucs_a2_jis_table_min]; + } + } else if (w >= ucs_i_jis_table_min && w < ucs_i_jis_table_max) { + s = ucs_i_jis_table[w - ucs_i_jis_table_min]; + } else if (w >= ucs_r_jis_table_min && w < ucs_r_jis_table_max) { + s = ucs_r_jis_table[w - ucs_r_jis_table_min]; + } + + if (w >= 0x2000) { + for (int i = 0; i < s_form_tbl_len; i++) { + if (w == s_form_tbl[i]) { + if (!len) { + if (end) { + s = s_form_sjis_fallback_tbl[i]; + if (s) { + MB_CONVERT_BUF_ENSURE(buf, out, limit, 2); + out = mb_convert_buf_add2(out, (s >> 8) & 0xFF, s & 0xFF); + } else { + MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_sjismac); + } + } else { + buf->state = w; + } + MB_CONVERT_BUF_STORE(buf, out, limit); + return; + } + uint32_t w2 = *in++; + len--; + + if (!process_s_form(w, w2, &s)) { + in--; len++; + + for (int i = 0; i < s_form_tbl_len; i++) { + if (w == s_form_tbl[i]) { + s = s_form_sjis_fallback_tbl[i]; + break; + } + } + } + + if (s <= 0xFF) { + out = mb_convert_buf_add(out, s); + } else { + MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2); + out = mb_convert_buf_add2(out, (s >> 8) & 0xFF, s & 0xFF); + } + + goto next_iteration; + } + } + + if (w == 0xF860 || w == 0xF861 || w == 0xF862) { + /* Apple 'transcoding hint' codepoints (from private use area) */ + if (!len) { + if (end) { + MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_sjismac); + } else { + buf->state = w; + } + MB_CONVERT_BUF_STORE(buf, out, limit); + return; + } + + uint32_t w2 = *in++; + len--; + + for (int i = 0; i < code_tbl_m_len; i++) { + if (w == code_tbl_m[i][1] && w2 == code_tbl_m[i][2]) { + /* This might be a valid transcoding hint sequence */ + int index = 3; + + if (buf->state) { +resume_transcoding_hint: + i = buf->state >> 24; + index = (buf->state >> 16) & 0xFF; + buf->state = 0; + } + + int expected = transcoding_hint_cp_width[w - 0xF860]; + + while (index <= expected) { + if (!len) { + if (end) { + for (int j = 1; j < index; j++) { + MB_CONVERT_ERROR(buf, out, limit, code_tbl_m[i][j], mb_wchar_to_sjismac); + } + } else { + buf->state = (i << 24) | (index << 16) | (w & 0xFFFF); + } + MB_CONVERT_BUF_STORE(buf, out, limit); + return; + } + + w2 = *in++; + len--; + + if (w2 != code_tbl_m[i][index]) { + /* Didn't match */ + for (int j = 1; j < index; j++) { + MB_CONVERT_ERROR(buf, out, limit, code_tbl_m[i][j], mb_wchar_to_sjismac); + } + MB_CONVERT_ERROR(buf, out, limit, w2, mb_wchar_to_sjismac); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len); + goto next_iteration; + } + + index++; + } + + /* Successful match, emit SJIS-mac bytes */ + s = code_tbl_m[i][0]; + unsigned int c1 = (s / 94) + 0x21, c2 = (s % 94) + 0x21, s1, s2; + SJIS_ENCODE(c1, c2, s1, s2); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2); + out = mb_convert_buf_add2(out, s1, s2); + goto next_iteration; + } + } + + /* No valid transcoding hint sequence found */ + in--; len++; + MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_sjismac); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len); + continue; + } + } + + if (!s) { + if (w == 0xA0) { + s = 0xA0; + } else if (w == 0xA5) { /* YEN SIGN */ + /* Unicode has codepoint 0xFFE5 for a fullwidth Yen sign; + * convert codepoint 0xA5 to halfwidth Yen sign */ + s = 0x5C; /* HALFWIDTH YEN SIGN */ + } else if (w == 0xFF3C) { /* FULLWIDTH REVERSE SOLIDUS */ + s = 0x2140; + } else { + for (int i = 0; i < wchar2sjis_mac_r_tbl_len; i++) { + if (w >= wchar2sjis_mac_r_tbl[i][0] && w <= wchar2sjis_mac_r_tbl[i][1]) { + s = w - wchar2sjis_mac_r_tbl[i][0] + wchar2sjis_mac_r_tbl[i][2]; + s = (((s / 94) + 0x21) << 8) | ((s % 94) + 0x21); + goto found_kuten_code; + } + } + + for (int i = 0; i < wchar2sjis_mac_r_map_len; i++) { + if (w >= wchar2sjis_mac_r_map[i][0] && w <= wchar2sjis_mac_r_map[i][1]) { + s = wchar2sjis_mac_code_map[i][w - wchar2sjis_mac_r_map[i][0]]; + if (s) { + s = (((s / 94) + 0x21) << 8) | ((s % 94) + 0x21); + goto found_kuten_code; + } + } + } + + for (int i = 0; i < wchar2sjis_mac_wchar_tbl_len; i++) { + if (w == wchar2sjis_mac_wchar_tbl[i][0]) { + s = wchar2sjis_mac_wchar_tbl[i][1]; + s = (((s / 94) + 0x21) << 8) | ((s % 94) + 0x21); + goto found_kuten_code; + } + } + } + } + +found_kuten_code: + if ((!s && w) || s >= 0x8080) { + MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_sjismac); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len); + } else if (s <= 0xFF) { + out = mb_convert_buf_add(out, s); + } else { + unsigned int c1 = (s >> 8) & 0xFF, c2 = s & 0xFF, s1, s2; + SJIS_ENCODE(c1, c2, s1, s2); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2); + out = mb_convert_buf_add2(out, s1, s2); + } + +next_iteration: ; + } + + MB_CONVERT_BUF_STORE(buf, out, limit); +} + +int mbfilter_sjis_emoji_docomo2unicode(int s, int *snd) +{ + /* All three mobile vendors had emoji for numbers on a telephone keypad + * Unicode doesn't have those, but it has a combining character which puts + * a 'keypad button' around the following character, making it look like + * a key on a telephone or keyboard. That combining char is codepoint 0x20E3. */ + if (s >= mb_tbl_code2uni_docomo1_min && s <= mb_tbl_code2uni_docomo1_max) { + if ((s >= DOCOMO_KEYPAD(1) && s <= DOCOMO_KEYPAD(9)) || s == DOCOMO_KEYPAD(0) || s == DOCOMO_KEYPAD_HASH) { + EMIT_KEYPAD_EMOJI(convert_emoji_cp(mb_tbl_code2uni_docomo1[s - mb_tbl_code2uni_docomo1_min])); + } else { + *snd = 0; + return convert_emoji_cp(mb_tbl_code2uni_docomo1[s - mb_tbl_code2uni_docomo1_min]); + } + } + return 0; +} + +int mbfilter_sjis_emoji_sb2unicode(int s, int *snd) +{ + if (s >= mb_tbl_code2uni_sb1_min && s <= mb_tbl_code2uni_sb1_max) { + if (s == 0x2817 || (s >= 0x2823 && s <= 0x282C)) { + EMIT_KEYPAD_EMOJI(mb_tbl_code2uni_sb1[s - mb_tbl_code2uni_sb1_min]); + } else { + *snd = 0; + return convert_emoji_cp(mb_tbl_code2uni_sb1[s - mb_tbl_code2uni_sb1_min]); + } + } else if (s >= mb_tbl_code2uni_sb2_min && s <= mb_tbl_code2uni_sb2_max) { + *snd = 0; + return convert_emoji_cp(mb_tbl_code2uni_sb2[s - mb_tbl_code2uni_sb2_min]); + } else if (s >= mb_tbl_code2uni_sb3_min && s <= mb_tbl_code2uni_sb3_max) { + if (s >= 0x2B02 && s <= 0x2B0B) { + EMIT_FLAG_EMOJI(nflags_sb[s - 0x2B02]); + } else { + *snd = 0; + return convert_emoji_cp(mb_tbl_code2uni_sb3[s - mb_tbl_code2uni_sb3_min]); + } + } + return 0; +} + +int mbfilter_unicode2sjis_emoji_docomo(int c, int *s1, mbfl_convert_filter *filter) +{ + /* When converting SJIS-Mobile to Unicode, we convert keypad symbol emoji + * to a sequence of 2 codepoints, one of which is a combining character which + * adds the 'key' image around the other + * + * In the other direction, look for such sequences and convert them to a + * single emoji */ + if (filter->status == 1) { + int c1 = filter->cache; + filter->cache = filter->status = 0; + if (c == 0x20E3) { + if (c1 == '#') { + *s1 = 0x2964; + } else if (c1 == '0') { + *s1 = 0x296F; + } else { /* Previous character was '1'-'9' */ + *s1 = 0x2966 + (c1 - '1'); + } + return 1; + } else { + /* This character wasn't combining character to make keypad symbol, + * so pass the previous character through... and proceed to process the + * current character as usual + * (Single-byte ASCII characters are valid in Shift-JIS...) */ + CK((*filter->output_function)(c1, filter->data)); + } + } + + if (c == '#' || (c >= '0' && c <= '9')) { + filter->status = 1; + filter->cache = c; + return 0; + } + + if (c == 0xA9) { /* Copyright sign */ + *s1 = 0x29B5; + return 1; + } else if (c == 0x00AE) { /* Registered sign */ + *s1 = 0x29BA; + return 1; + } else if (c >= mb_tbl_uni_docomo2code2_min && c <= mb_tbl_uni_docomo2code2_max) { + int i = mbfl_bisec_srch2(c, mb_tbl_uni_docomo2code2_key, mb_tbl_uni_docomo2code2_len); + if (i >= 0) { + *s1 = mb_tbl_uni_docomo2code2_value[i]; + return 1; + } + } else if (c >= mb_tbl_uni_docomo2code3_min && c <= mb_tbl_uni_docomo2code3_max) { + int i = mbfl_bisec_srch2(c - 0x10000, mb_tbl_uni_docomo2code3_key, mb_tbl_uni_docomo2code3_len); + if (i >= 0) { + *s1 = mb_tbl_uni_docomo2code3_value[i]; + return 1; + } + } else if (c >= mb_tbl_uni_docomo2code5_min && c <= mb_tbl_uni_docomo2code5_max) { + int i = mbfl_bisec_srch2(c - 0xF0000, mb_tbl_uni_docomo2code5_key, mb_tbl_uni_docomo2code5_len); + if (i >= 0) { + *s1 = mb_tbl_uni_docomo2code5_val[i]; + return 1; + } + } + return 0; +} + +int mbfilter_unicode2sjis_emoji_kddi_sjis(int c, int *s1, mbfl_convert_filter *filter) +{ + if (filter->status == 1) { + int c1 = filter->cache; + filter->cache = filter->status = 0; + if (c == 0x20E3) { + if (c1 == '#') { + *s1 = 0x25BC; + } else if (c1 == '0') { + *s1 = 0x2830; + } else { /* Previous character was '1'-'9' */ + *s1 = 0x27a6 + (c1 - '1'); + } + return 1; + } else { + CK((*filter->output_function)(c1, filter->data)); + } + } else if (filter->status == 2) { + int c1 = filter->cache; + filter->cache = filter->status = 0; + if (c >= NFLAGS('B') && c <= NFLAGS('U')) { /* B for GB, U for RU */ + for (int i = 0; i < 10; i++) { + if (c1 == NFLAGS(nflags_s[i][0]) && c == NFLAGS(nflags_s[i][1])) { + *s1 = nflags_code_kddi[i]; + return 1; + } + } + } + + /* If none of the KDDI national flag emoji matched, then we have no way + * to convert the previous codepoint... */ + mbfl_filt_conv_illegal_output(c1, filter); + } + + if (c == '#' || (c >= '0' && c <= '9')) { + filter->status = 1; + filter->cache = c; + return 0; + } else if (c >= NFLAGS('C') && c <= NFLAGS('U')) { /* C for CN, U for US */ + filter->status = 2; + filter->cache = c; + return 0; + } + + if (c == 0xA9) { /* Copyright sign */ + *s1 = 0x27DC; + return 1; + } else if (c == 0xAE) { /* Registered sign */ + *s1 = 0x27DD; + return 1; + } else if (c >= mb_tbl_uni_kddi2code2_min && c <= mb_tbl_uni_kddi2code2_max) { + int i = mbfl_bisec_srch2(c, mb_tbl_uni_kddi2code2_key, mb_tbl_uni_kddi2code2_len); + if (i >= 0) { + *s1 = mb_tbl_uni_kddi2code2_value[i]; + return 1; + } + } else if (c >= mb_tbl_uni_kddi2code3_min && c <= mb_tbl_uni_kddi2code3_max) { + int i = mbfl_bisec_srch2(c - 0x10000, mb_tbl_uni_kddi2code3_key, mb_tbl_uni_kddi2code3_len); + if (i >= 0) { + *s1 = mb_tbl_uni_kddi2code3_value[i]; + return 1; + } + } else if (c >= mb_tbl_uni_kddi2code5_min && c <= mb_tbl_uni_kddi2code5_max) { + int i = mbfl_bisec_srch2(c - 0xF0000, mb_tbl_uni_kddi2code5_key, mb_tbl_uni_kddi2code5_len); + if (i >= 0) { + *s1 = mb_tbl_uni_kddi2code5_val[i]; + return 1; + } + } + return 0; +} + +int mbfilter_unicode2sjis_emoji_sb(int c, int *s1, mbfl_convert_filter *filter) +{ + if (filter->status == 1) { + int c1 = filter->cache; + filter->cache = filter->status = 0; + if (c == 0x20E3) { + if (c1 == '#') { + *s1 = 0x2817; + } else if (c1 == '0') { + *s1 = 0x282c; + } else { /* Previous character was '1'-'9' */ + *s1 = 0x2823 + (c1 - '1'); + } + return 1; + } else { + (*filter->output_function)(c1, filter->data); + } + } else if (filter->status == 2) { + int c1 = filter->cache; + filter->cache = filter->status = 0; + if (c >= NFLAGS('B') && c <= NFLAGS('U')) { /* B for GB, U for RU */ + for (int i = 0; i < 10; i++) { + if (c1 == NFLAGS(nflags_s[i][0]) && c == NFLAGS(nflags_s[i][1])) { + *s1 = nflags_code_sb[i]; + return 1; + } + } + } + + /* If none of the SoftBank national flag emoji matched, then we have no way + * to convert the previous codepoint... */ + mbfl_filt_conv_illegal_output(c1, filter); + } + + if (c == '#' || (c >= '0' && c <= '9')) { + filter->status = 1; + filter->cache = c; + return 0; + } else if (c >= NFLAGS('C') && c <= NFLAGS('U')) { /* C for CN, U for US */ + filter->status = 2; + filter->cache = c; + return 0; + } + + if (c == 0xA9) { /* Copyright sign */ + *s1 = 0x2855; + return 1; + } else if (c == 0xAE) { /* Registered sign */ + *s1 = 0x2856; + return 1; + } else if (c >= mb_tbl_uni_sb2code2_min && c <= mb_tbl_uni_sb2code2_max) { + int i = mbfl_bisec_srch2(c, mb_tbl_uni_sb2code2_key, mb_tbl_uni_sb2code2_len); + if (i >= 0) { + *s1 = mb_tbl_uni_sb2code2_value[i]; + return 1; + } + } else if (c >= mb_tbl_uni_sb2code3_min && c <= mb_tbl_uni_sb2code3_max) { + int i = mbfl_bisec_srch2(c - 0x10000, mb_tbl_uni_sb2code3_key, mb_tbl_uni_sb2code3_len); + if (i >= 0) { + *s1 = mb_tbl_uni_sb2code3_value[i]; + return 1; + } + } else if (c >= mb_tbl_uni_sb2code5_min && c <= mb_tbl_uni_sb2code5_max) { + int i = mbfl_bisec_srch2(c - 0xF0000, mb_tbl_uni_sb2code5_key, mb_tbl_uni_sb2code5_len); + if (i >= 0) { + *s1 = mb_tbl_uni_sb2code5_val[i]; + return 1; + } + } + return 0; +} + +static int mbfl_filt_conv_sjis_mobile_wchar(int c, mbfl_convert_filter *filter) +{ + int c1, s, s1, s2, w, snd = 0; + + switch (filter->status) { + case 0: + if (c >= 0 && c < 0x80) { /* ASCII */ + if (filter->from == &mbfl_encoding_sjis_sb && c == 0x1B) { + /* ESC; escape sequences were used on older SoftBank phones for emoji */ + filter->cache = c; + filter->status = 2; + } else { + CK((*filter->output_function)(c, filter->data)); + } + } else if (c > 0xA0 && c < 0xE0) { /* Kana */ + CK((*filter->output_function)(0xFEC0 + c, filter->data)); + } else if (c > 0x80 && c < 0xFD && c != 0xA0) { /* Kanji, first byte */ + filter->status = 1; + filter->cache = c; + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + case 1: /* Kanji, second byte */ + filter->status = 0; + c1 = filter->cache; + if (c >= 0x40 && c <= 0xFC && c != 0x7F) { + w = 0; + SJIS_DECODE(c1, c, s1, s2); + s = ((s1 - 0x21) * 94) + s2 - 0x21; + if (s <= 137) { + if (s == 31) { + w = 0xFF3C; /* FULLWIDTH REVERSE SOLIDUS */ + } else if (s == 32) { + w = 0xFF5E; /* FULLWIDTH TILDE */ + } else if (s == 33) { + w = 0x2225; /* PARALLEL TO */ + } else if (s == 60) { + w = 0xFF0D; /* FULLWIDTH HYPHEN-MINUS */ + } else if (s == 80) { + w = 0xFFE0; /* FULLWIDTH CENT SIGN */ + } else if (s == 81) { + w = 0xFFE1; /* FULLWIDTH POUND SIGN */ + } else if (s == 137) { + w = 0xFFE2; /* FULLWIDTH NOT SIGN */ + } + } + if (w == 0) { + if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) { /* vendor ext1 (13ku) */ + w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min]; + } else if (s >= 0 && s < jisx0208_ucs_table_size) { /* X 0208 */ + w = jisx0208_ucs_table[s]; + } else if (s >= cp932ext2_ucs_table_min && s < cp932ext2_ucs_table_max) { /* vendor ext2 (89ku - 92ku) */ + w = cp932ext2_ucs_table[s - cp932ext2_ucs_table_min]; + } + + /* Emoji */ + if (filter->from == &mbfl_encoding_sjis_docomo && s >= mb_tbl_code2uni_docomo1_min && s <= mb_tbl_code2uni_docomo1_max) { + w = mbfilter_sjis_emoji_docomo2unicode(s, &snd); + if (snd > 0) { + CK((*filter->output_function)(snd, filter->data)); + } + } else if (filter->from == &mbfl_encoding_sjis_kddi && s >= mb_tbl_code2uni_kddi1_min && s <= mb_tbl_code2uni_kddi2_max) { + w = mbfilter_sjis_emoji_kddi2unicode(s, &snd); + if (snd > 0) { + CK((*filter->output_function)(snd, filter->data)); + } + } else if (filter->from == &mbfl_encoding_sjis_sb && s >= mb_tbl_code2uni_sb1_min && s <= mb_tbl_code2uni_sb3_max) { + w = mbfilter_sjis_emoji_sb2unicode(s, &snd); + if (snd > 0) { + CK((*filter->output_function)(snd, filter->data)); + } + } + + if (w == 0) { + if (s >= cp932ext3_ucs_table_min && s < cp932ext3_ucs_table_max) { /* vendor ext3 (115ku - 119ku) */ + w = cp932ext3_ucs_table[s - cp932ext3_ucs_table_min]; + } else if (s >= (94*94) && s < (114*94)) { /* user (95ku - 114ku) */ + w = s - (94*94) + 0xe000; + } + } + } + if (w <= 0) { + w = MBFL_BAD_INPUT; + } + CK((*filter->output_function)(w, filter->data)); + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + /* ESC: Softbank Emoji */ + case 2: + if (c == '$') { + filter->cache = c; + filter->status++; + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + filter->status = filter->cache = 0; + } + break; + + /* ESC $: Softbank Emoji */ + case 3: + if ((c >= 'E' && c <= 'G') || (c >= 'O' && c <= 'Q')) { + filter->cache = c; + filter->status++; + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + filter->status = filter->cache = 0; + } + break; + + /* ESC $ [GEFOPQ]: Softbank Emoji */ + case 4: + c1 = filter->cache; + if (c == 0xF) { /* Terminate sequence of emoji */ + filter->status = filter->cache = 0; + return 0; + } else { + if (c1 == 'G' && c >= 0x21 && c <= 0x7a) { + s1 = (0x91 - 0x21) * 94; + } else if (c1 == 'E' && c >= 0x21 && c <= 0x7A) { + s1 = (0x8D - 0x21) * 94; + } else if (c1 == 'F' && c >= 0x21 && c <= 0x7A) { + s1 = (0x8E - 0x21) * 94; + } else if (c1 == 'O' && c >= 0x21 && c <= 0x6D) { + s1 = (0x92 - 0x21) * 94; + } else if (c1 == 'P' && c >= 0x21 && c <= 0x6C) { + s1 = (0x95 - 0x21) * 94; + } else if (c1 == 'Q' && c >= 0x21 && c <= 0x5E) { + s1 = (0x96 - 0x21) * 94; + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + filter->status = filter->cache = 0; + return 0; + } + + w = mbfilter_sjis_emoji_sb2unicode(s1 + c - 0x21, &snd); + if (w > 0) { + if (snd > 0) { + CK((*filter->output_function)(snd, filter->data)); + } + CK((*filter->output_function)(w, filter->data)); + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + filter->status = filter->cache = 0; + } + } + } + + return 0; +} + +static int mbfl_filt_conv_wchar_sjis_mobile(int c, mbfl_convert_filter *filter) +{ + int c1, c2, s1 = 0, s2 = 0; + + if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) { + s1 = ucs_a1_jis_table[c - ucs_a1_jis_table_min]; + } else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) { + s1 = ucs_a2_jis_table[c - ucs_a2_jis_table_min]; + } else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) { + s1 = ucs_i_jis_table[c - ucs_i_jis_table_min]; + } else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) { + s1 = ucs_r_jis_table[c - ucs_r_jis_table_min]; + } else if (c >= 0xE000 && c < (0xE000 + 20*94)) { + /* Private User Area (95ku - 114ku) */ + s1 = c - 0xE000; + c1 = (s1 / 94) + 0x7F; + c2 = (s1 % 94) + 0x21; + s1 = (c1 << 8) | c2; + s2 = 1; + } + + if (s1 <= 0) { + if (c == 0xA5) { /* YEN SIGN */ + s1 = 0x216F; /* FULLWIDTH YEN SIGN */ + } else if (c == 0xFF3c) { /* FULLWIDTH REVERSE SOLIDUS */ + s1 = 0x2140; + } else if (c == 0x2225) { /* PARALLEL TO */ + s1 = 0x2142; + } else if (c == 0xFF0D) { /* FULLWIDTH HYPHEN-MINUS */ + s1 = 0x215D; + } else if (c == 0xFFE0) { /* FULLWIDTH CENT SIGN */ + s1 = 0x2171; + } else if (c == 0xFFE1) { /* FULLWIDTH POUND SIGN */ + s1 = 0x2172; + } else if (c == 0xFFE2) { /* FULLWIDTH NOT SIGN */ + s1 = 0x224C; + } + } + + if ((s1 <= 0) || (s1 >= 0x8080 && s2 == 0)) { /* not found or X 0212 */ + s1 = -1; + + /* CP932 vendor ext1 (13ku) */ + for (c1 = 0; c1 < cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; c1++) { + if (c == cp932ext1_ucs_table[c1]) { + s1 = (((c1 / 94) + 0x2D) << 8) + (c1 % 94) + 0x21; + break; + } + } + + if (s1 <= 0) { + /* CP932 vendor ext2 (115ku - 119ku) */ + for (c1 = 0; c1 < cp932ext2_ucs_table_max - cp932ext2_ucs_table_min; c1++) { + if (c == cp932ext2_ucs_table[c1]) { + s1 = (((c1 / 94) + 0x79) << 8) + (c1 % 94) + 0x21; + break; + } + } + } + + if (c == 0) { + s1 = 0; + } + } + + if ((filter->to == &mbfl_encoding_sjis_docomo && mbfilter_unicode2sjis_emoji_docomo(c, &s1, filter)) || + (filter->to == &mbfl_encoding_sjis_kddi && mbfilter_unicode2sjis_emoji_kddi_sjis(c, &s1, filter)) || + (filter->to == &mbfl_encoding_sjis_sb && mbfilter_unicode2sjis_emoji_sb(c, &s1, filter))) { + s1 = (((s1 / 94) + 0x21) << 8) | ((s1 % 94) + 0x21); + } + + if (filter->status) { + return 0; + } + + if (s1 >= 0) { + if (s1 < 0x100) { /* Latin/Kana */ + CK((*filter->output_function)(s1, filter->data)); + } else { /* Kanji */ + c1 = (s1 >> 8) & 0xff; + c2 = s1 & 0xff; + SJIS_ENCODE(c1, c2, s1, s2); + CK((*filter->output_function)(s1, filter->data)); + CK((*filter->output_function)(s2, filter->data)); + } + } else { + CK(mbfl_filt_conv_illegal_output(c, filter)); + } + + return 0; +} + +int mbfl_filt_conv_sjis_mobile_flush(mbfl_convert_filter *filter) +{ + int c1 = filter->cache; + if (filter->status == 1 && (c1 == '#' || (c1 >= '0' && c1 <= '9'))) { + filter->cache = filter->status = 0; + CK((*filter->output_function)(c1, filter->data)); + } else if (filter->status == 2) { + /* First of a pair of Regional Indicator codepoints came at the end of a string */ + filter->cache = filter->status = 0; + mbfl_filt_conv_illegal_output(c1, filter); + } + + if (filter->flush_function) { + (*filter->flush_function)(filter->data); + } + + return 0; +} + +static const unsigned short sjis_mobile_decode_tbl1[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xFFFF, 0, 188, 376, 564, 752, 940, 1128, 1316, 1504, 1692, 1880, 2068, 2256, 2444, 2632, 2820, 3008, 3196, 3384, 3572, 3760, 3948, 4136, 4324, 4512, 4700, 4888, 5076, 5264, 5452, 5640, 0xFFFF, -6016, -5828, -5640, -5452, -5264, -5076, -4888, -4700, -4512, -4324, -4136, -3948, -3760, -3572, -3384, -3196, -3008, -2820, -2632, -2444, -2256, -2068, -1880, -1692, -1504, -1316, -1128, -940, -752, -564, -376, -188, 0, 188, 376, 564, 752, 940, 1128, 1316, 1504, 1692, 1880, 2068, 2256, 2444, 2632, 2820, 3008, 3196, 3384, 3572, 3760, 3948, 4136, 4324, 4512, 4700, 4888, 5076, 5264, 5452, 5640, 5828, 6016, 6204, 6392, 6580, 6768, 6956, 7144, 7332, 7520, 7708, 7896, 8084, 8272, 8460, 8648, 8836, 9024, 9212, 9400, 9588, 9776, 9964, 10152, 10340, 10528, 10716, 10904, 11092, 0xFFFF, 0xFFFF, 0xFFFF +}; + +static size_t mb_sjis_docomo_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) +{ + unsigned char *p = *in, *e = p + *in_len; + /* Leave one extra space available in output buffer, since some iterations of + * main loop (below) may emit two wchars */ + uint32_t *out = buf, *limit = buf + bufsize - 1; + + while (p < e && out < limit) { + unsigned char c = *p++; + + if (c <= 0x7F) { + *out++ = c; + } else if (c >= 0xA1 && c <= 0xDF) { + /* Kana */ + *out++ = 0xFEC0 + c; + } else { + /* Kanji */ + if (p == e) { + *out++ = MBFL_BAD_INPUT; + break; + } + unsigned char c2 = *p++; + uint32_t w = sjis_mobile_decode_tbl1[c] + sjis_decode_tbl2[c2]; + + if (w <= 137) { + if (w == 31) { + *out++ = 0xFF3C; /* FULLWIDTH REVERSE SOLIDUS */ + continue; + } else if (w == 32) { + *out++ = 0xFF5E; /* FULLWIDTH TILDE */ + continue; + } else if (w == 33) { + *out++ = 0x2225; /* PARALLEL TO */ + continue; + } else if (w == 60) { + *out++ = 0xFF0D; /* FULLWIDTH HYPHEN-MINUS */ + continue; + } else if (w == 80) { + *out++ = 0xFFE0; /* FULLWIDTH CENT SIGN */ + continue; + } else if (w == 81) { + *out++ = 0xFFE1; /* FULLWIDTH POUND SIGN */ + continue; + } else if (w == 137) { + *out++ = 0xFFE2; /* FULLWIDTH NOT SIGN */ + continue; + } + } + + if (w >= mb_tbl_code2uni_docomo1_min && w <= mb_tbl_code2uni_docomo1_max) { + int snd = 0; + w = mbfilter_sjis_emoji_docomo2unicode(w, &snd); + if (snd) { + *out++ = snd; + } + } else if (w >= cp932ext1_ucs_table_min && w < cp932ext1_ucs_table_max) { + w = cp932ext1_ucs_table[w - cp932ext1_ucs_table_min]; + } else if (w < jisx0208_ucs_table_size) { + w = jisx0208_ucs_table[w]; + } else if (w >= cp932ext2_ucs_table_min && w < cp932ext2_ucs_table_max) { + w = cp932ext2_ucs_table[w - cp932ext2_ucs_table_min]; + } else if (w >= cp932ext3_ucs_table_min && w < cp932ext3_ucs_table_max) { + w = cp932ext3_ucs_table[w - cp932ext3_ucs_table_min]; + } else if (w >= (94*94) && w < (114*94)) { + w = w - (94*94) + 0xE000; + } else { + if (c == 0x80 || c == 0xA0 || c >= 0xFD) { + p--; + } + *out++ = MBFL_BAD_INPUT; + continue; + } + + *out++ = w ? w : MBFL_BAD_INPUT; + } + } + + *in_len = e - p; + *in = p; + return out - buf; +} + +static void mb_wchar_to_sjis_docomo(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) +{ + unsigned char *out, *limit; + MB_CONVERT_BUF_LOAD(buf, out, limit); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len + (buf->state ? 1 : 0)); + + uint32_t w; + unsigned int s = 0; + + if (buf->state) { + /* Continue what we were doing on the previous call */ + w = buf->state; + buf->state = 0; + goto reprocess_wchar; + } + + while (len--) { + w = *in++; +reprocess_wchar: + s = 0; + + if (w >= ucs_a1_jis_table_min && w < ucs_a1_jis_table_max) { + s = ucs_a1_jis_table[w - ucs_a1_jis_table_min]; + } else if (w >= ucs_a2_jis_table_min && w < ucs_a2_jis_table_max) { + s = ucs_a2_jis_table[w - ucs_a2_jis_table_min]; + } else if (w >= ucs_i_jis_table_min && w < ucs_i_jis_table_max) { + s = ucs_i_jis_table[w - ucs_i_jis_table_min]; + } else if (w >= ucs_r_jis_table_min && w < ucs_r_jis_table_max) { + s = ucs_r_jis_table[w - ucs_r_jis_table_min]; + } else if (w >= 0xE000 && w < (0xE000 + 20*94)) { + /* Private User Area (95ku - 114ku) */ + s = w - 0xE000; + s = (((s / 94) + 0x7F) << 8) | ((s % 94) + 0x21); + goto process_emoji; + } + + if (!s) { + if (w == 0xA5) { /* YEN SIGN */ + s = 0x216F; /* FULLWIDTH YEN SIGN */ + } else if (w == 0xFF3C) { /* FULLWIDTH REVERSE SOLIDUS */ + s = 0x2140; + } else if (w == 0x2225) { /* PARALLEL TO */ + s = 0x2142; + } else if (w == 0xFF0D) { /* FULLWIDTH HYPHEN-MINUS */ + s = 0x215D; + } else if (w == 0xFFE0) { /* FULLWIDTH CENT SIGN */ + s = 0x2171; + } else if (w == 0xFFE1) { /* FULLWIDTH POUND SIGN */ + s = 0x2172; + } else if (w == 0xFFE2) { /* FULLWIDTH NOT SIGN */ + s = 0x224C; + } + } + + if (w && (!s || s >= 0x8080)) { + s = 0; + + for (int i = 0; i < cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; i++) { + if (w == cp932ext1_ucs_table[i]) { + s = (((i / 94) + 0x2D) << 8) + (i % 94) + 0x21; + goto process_emoji; + } + } + + for (int i = 0; i < cp932ext2_ucs_table_max - cp932ext2_ucs_table_min; i++) { + if (w == cp932ext2_ucs_table[i]) { + s = (((i / 94) + 0x79) << 8) + (i % 94) + 0x21; + goto process_emoji; + } + } + } + +process_emoji: + /* When converting SJIS-Mobile to Unicode, we convert keypad symbol emoji + * to a sequence of 2 codepoints, one of which is a combining character which + * adds the 'key' image around the other + * + * In the other direction, look for such sequences and convert them to a + * single emoji */ + if (w == '#' || (w >= '0' && w <= '9')) { + if (!len) { + if (end) { + goto emit_output; + } else { + /* If we are at the end of the current buffer of codepoints, but another + * buffer is coming, then remember that we have to reprocess `w` */ + buf->state = w; + break; + } + } + uint32_t w2 = *in++; len--; + if (w2 == 0x20E3) { + if (w == '#') { + s = 0x2964; + } else if (w == '0') { + s = 0x296F; + } else { /* Previous character was '1'-'9' */ + s = 0x2966 + (w - '1'); + } + s = (((s / 94) + 0x21) << 8) | ((s % 94) + 0x21); + } else { + in--; len++; + } + } else if (w == 0xA9) { /* Copyright sign */ + s = (((0x29B5 / 94) + 0x21) << 8) | ((0x29B5 % 94) + 0x21); + } else if (w == 0xAE) { /* Registered sign */ + s = (((0x29BA / 94) + 0x21) << 8) | ((0x29BA % 94) + 0x21); + } else if (w >= mb_tbl_uni_docomo2code2_min && w <= mb_tbl_uni_docomo2code2_max) { + int i = mbfl_bisec_srch2(w, mb_tbl_uni_docomo2code2_key, mb_tbl_uni_docomo2code2_len); + if (i >= 0) { + s = mb_tbl_uni_docomo2code2_value[i]; + s = (((s / 94) + 0x21) << 8) | ((s % 94) + 0x21); + } + } else if (w >= mb_tbl_uni_docomo2code3_min && w <= mb_tbl_uni_docomo2code3_max) { + int i = mbfl_bisec_srch2(w - 0x10000, mb_tbl_uni_docomo2code3_key, mb_tbl_uni_docomo2code3_len); + if (i >= 0) { + s = mb_tbl_uni_docomo2code3_value[i]; + s = (((s / 94) + 0x21) << 8) | ((s % 94) + 0x21); + } + } else if (w >= mb_tbl_uni_docomo2code5_min && w <= mb_tbl_uni_docomo2code5_max) { + int i = mbfl_bisec_srch2(w - 0xF0000, mb_tbl_uni_docomo2code5_key, mb_tbl_uni_docomo2code5_len); + if (i >= 0) { + s = mb_tbl_uni_docomo2code5_val[i]; + s = (((s / 94) + 0x21) << 8) | ((s % 94) + 0x21); + } + } + +emit_output: + if (!s && w) { + MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_sjis_docomo); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len); + } else if (s <= 0xFF) { + out = mb_convert_buf_add(out, s); + } else { + unsigned int c1 = (s >> 8) & 0xFF, c2 = s & 0xFF, s1, s2; + SJIS_ENCODE(c1, c2, s1, s2); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2); + out = mb_convert_buf_add2(out, s1, s2); + } + } + + MB_CONVERT_BUF_STORE(buf, out, limit); +} + +static size_t mb_sjis_kddi_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) +{ + unsigned char *p = *in, *e = p + *in_len; + uint32_t *out = buf, *limit = buf + bufsize - 1; + + while (p < e && out < limit) { + unsigned char c = *p++; + + if (c <= 0x7F) { + *out++ = c; + } else if (c >= 0xA1 && c <= 0xDF) { + /* Kana */ + *out++ = 0xFEC0 + c; + } else { + /* Kanji */ + if (p == e) { + *out++ = MBFL_BAD_INPUT; + break; + } + unsigned char c2 = *p++; + uint32_t w = sjis_mobile_decode_tbl1[c] + sjis_decode_tbl2[c2]; + + if (w <= 137) { + if (w == 31) { + *out++ = 0xFF3C; /* FULLWIDTH REVERSE SOLIDUS */ + continue; + } else if (w == 32) { + *out++ = 0xFF5E; /* FULLWIDTH TILDE */ + continue; + } else if (w == 33) { + *out++ = 0x2225; /* PARALLEL TO */ + continue; + } else if (w == 60) { + *out++ = 0xFF0D; /* FULLWIDTH HYPHEN-MINUS */ + continue; + } else if (w == 80) { + *out++ = 0xFFE0; /* FULLWIDTH CENT SIGN */ + continue; + } else if (w == 81) { + *out++ = 0xFFE1; /* FULLWIDTH POUND SIGN */ + continue; + } else if (w == 137) { + *out++ = 0xFFE2; /* FULLWIDTH NOT SIGN */ + continue; + } + } + + if (w >= mb_tbl_code2uni_kddi1_min && w <= mb_tbl_code2uni_kddi2_max) { + int snd = 0; + w = mbfilter_sjis_emoji_kddi2unicode(w, &snd); + if (!w) { + w = sjis_mobile_decode_tbl1[c] + sjis_decode_tbl2[c2]; + if (w >= (94*94) && w < (114*94)) { + w = w - (94*94) + 0xE000; + } + } else if (snd) { + *out++ = snd; + } + } else if (w >= cp932ext1_ucs_table_min && w < cp932ext1_ucs_table_max) { + w = cp932ext1_ucs_table[w - cp932ext1_ucs_table_min]; + } else if (w < jisx0208_ucs_table_size) { + w = jisx0208_ucs_table[w]; + } else if (w >= cp932ext2_ucs_table_min && w < cp932ext2_ucs_table_max) { + w = cp932ext2_ucs_table[w - cp932ext2_ucs_table_min]; + } else if (w >= cp932ext3_ucs_table_min && w < cp932ext3_ucs_table_max) { + w = cp932ext3_ucs_table[w - cp932ext3_ucs_table_min]; + } else if (w >= (94*94) && w < (114*94)) { + w = w - (94*94) + 0xE000; + } else { + if (c == 0x80 || c == 0xA0 || c >= 0xFD) { + p--; + } + *out++ = MBFL_BAD_INPUT; + continue; + } + + *out++ = w ? w : MBFL_BAD_INPUT; + } + } + + *in_len = e - p; + *in = p; + return out - buf; +} + +static void mb_wchar_to_sjis_kddi(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) +{ + unsigned char *out, *limit; + MB_CONVERT_BUF_LOAD(buf, out, limit); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len + (buf->state ? 1 : 0)); + + uint32_t w; + unsigned int s = 0; + + if (buf->state) { + w = buf->state; + buf->state = 0; + goto reprocess_wchar; + } + + while (len--) { + w = *in++; +reprocess_wchar: + s = 0; + + if (w >= ucs_a1_jis_table_min && w < ucs_a1_jis_table_max) { + s = ucs_a1_jis_table[w - ucs_a1_jis_table_min]; + } else if (w >= ucs_a2_jis_table_min && w < ucs_a2_jis_table_max) { + s = ucs_a2_jis_table[w - ucs_a2_jis_table_min]; + } else if (w >= ucs_i_jis_table_min && w < ucs_i_jis_table_max) { + s = ucs_i_jis_table[w - ucs_i_jis_table_min]; + } else if (w >= ucs_r_jis_table_min && w < ucs_r_jis_table_max) { + s = ucs_r_jis_table[w - ucs_r_jis_table_min]; + } else if (w >= 0xE000 && w < (0xE000 + 20*94)) { + /* Private User Area (95ku - 114ku) */ + s = w - 0xE000; + s = (((s / 94) + 0x7F) << 8) | ((s % 94) + 0x21); + goto process_emoji; + } + + if (!s) { + if (w == 0xA5) { /* YEN SIGN */ + s = 0x216F; /* FULLWIDTH YEN SIGN */ + } else if (w == 0xFF3c) { /* FULLWIDTH REVERSE SOLIDUS */ + s = 0x2140; + } else if (w == 0x2225) { /* PARALLEL TO */ + s = 0x2142; + } else if (w == 0xFF0D) { /* FULLWIDTH HYPHEN-MINUS */ + s = 0x215D; + } else if (w == 0xFFE0) { /* FULLWIDTH CENT SIGN */ + s = 0x2171; + } else if (w == 0xFFE1) { /* FULLWIDTH POUND SIGN */ + s = 0x2172; + } else if (w == 0xFFE2) { /* FULLWIDTH NOT SIGN */ + s = 0x224C; + } + } + + if (w && (!s || s >= 0x8080)) { + s = 0; + + for (int i = 0; i < cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; i++) { + if (w == cp932ext1_ucs_table[i]) { + s = (((i / 94) + 0x2D) << 8) + (i % 94) + 0x21; + goto process_emoji; + } + } + + for (int i = 0; i < cp932ext2_ucs_table_max - cp932ext2_ucs_table_min; i++) { + if (w == cp932ext2_ucs_table[i]) { + s = (((i / 94) + 0x79) << 8) + (i % 94) + 0x21; + goto process_emoji; + } + } + } + +process_emoji: + if (w == '#' || (w >= '0' && w <= '9')) { + if (!len) { + if (end) { + goto emit_output; + } else { + /* If we are at the end of the current buffer of codepoints, but another + * buffer is coming, then remember that we have to reprocess `w` */ + buf->state = w; + break; + } + } + uint32_t w2 = *in++; len--; + if (w2 == 0x20E3) { + if (w == '#') { + s = 0x25BC; + } else if (w == '0') { + s = 0x2830; + } else { /* Previous character was '1'-'9' */ + s = 0x27A6 + (w - '1'); + } + s = (((s / 94) + 0x21) << 8) | ((s % 94) + 0x21); + } else { + in--; len++; + } + } else if (w >= NFLAGS('C') && w <= NFLAGS('U')) { /* C for CN, U for US */ + if (!len) { + if (end) { + MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_sjis_kddi); + } else { + /* Reprocess `w` when this function is called again with another buffer + * of wchars */ + buf->state = w; + } + break; + } + uint32_t w2 = *in++; len--; + if (w2 >= NFLAGS('B') && w2 <= NFLAGS('U')) { /* B for GB, U for RU */ + for (int i = 0; i < 10; i++) { + if (w == NFLAGS(nflags_s[i][0]) && w2 == NFLAGS(nflags_s[i][1])) { + s = nflags_code_kddi[i]; + s = (((s / 94) + 0x21) << 8) | ((s % 94) + 0x21); + goto emit_output; + } + } + } + in--; len++; + MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_sjis_kddi); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len); + continue; + } else if (w == 0xA9) { /* Copyright sign */ + s = (((0x27DC / 94) + 0x21) << 8) | ((0x27DC % 94) + 0x21); + } else if (w == 0xAE) { /* Registered sign */ + s = (((0x27DD / 94) + 0x21) << 8) | ((0x27DD % 94) + 0x21); + } else if (w >= mb_tbl_uni_kddi2code2_min && w <= mb_tbl_uni_kddi2code2_max) { + int i = mbfl_bisec_srch2(w, mb_tbl_uni_kddi2code2_key, mb_tbl_uni_kddi2code2_len); + if (i >= 0) { + s = mb_tbl_uni_kddi2code2_value[i]; + s = (((s / 94) + 0x21) << 8) | ((s % 94) + 0x21); + } + } else if (w >= mb_tbl_uni_kddi2code3_min && w <= mb_tbl_uni_kddi2code3_max) { + int i = mbfl_bisec_srch2(w - 0x10000, mb_tbl_uni_kddi2code3_key, mb_tbl_uni_kddi2code3_len); + if (i >= 0) { + s = mb_tbl_uni_kddi2code3_value[i]; + s = (((s / 94) + 0x21) << 8) | ((s % 94) + 0x21); + } + } else if (w >= mb_tbl_uni_kddi2code5_min && w <= mb_tbl_uni_kddi2code5_max) { + int i = mbfl_bisec_srch2(w - 0xF0000, mb_tbl_uni_kddi2code5_key, mb_tbl_uni_kddi2code5_len); + if (i >= 0) { + s = mb_tbl_uni_kddi2code5_val[i]; + s = (((s / 94) + 0x21) << 8) | ((s % 94) + 0x21); + } + } + +emit_output: + if (!s && w) { + MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_sjis_kddi); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len); + } else if (s <= 0xFF) { + out = mb_convert_buf_add(out, s); + } else { + unsigned int c1 = (s >> 8) & 0xFF, c2 = s & 0xFF, s1, s2; + SJIS_ENCODE(c1, c2, s1, s2); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2); + out = mb_convert_buf_add2(out, s1, s2); + } + } + + MB_CONVERT_BUF_STORE(buf, out, limit); +} + +static size_t mb_sjis_sb_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) +{ + unsigned char *p = *in, *e = p + *in_len; + uint32_t *out = buf, *limit = buf + bufsize - 1; + + if (*state) { + goto softbank_emoji_escapes; + } + + while (p < e && out < limit) { + unsigned char c = *p++; + + if (c == 0x1B) { + /* Escape sequence */ + if (p == e || *p++ != '$' || p == e) { + *out++ = MBFL_BAD_INPUT; + continue; + } + unsigned char c2 = *p++; + if ((c2 < 'E' || c2 > 'G') && (c2 < 'O' || c2 > 'Q')) { + *out++ = MBFL_BAD_INPUT; + continue; + } + /* Escape sequence was valid, next should be a series of specially + * encoded Softbank emoji */ + *state = c2; + +softbank_emoji_escapes: + while (p < e && out < limit) { + c = *p++; + if (c == 0xF) { + *state = 0; + break; + } + unsigned int s = 0; + if (*state == 'G' && c >= 0x21 && c <= 0x7A) { + s = (0x91 - 0x21) * 94; + } else if (*state == 'E' && c >= 0x21 && c <= 0x7A) { + s = (0x8D - 0x21) * 94; + } else if (*state == 'F' && c >= 0x21 && c <= 0x7A) { + s = (0x8E - 0x21) * 94; + } else if (*state == 'O' && c >= 0x21 && c <= 0x6D) { + s = (0x92 - 0x21) * 94; + } else if (*state == 'P' && c >= 0x21 && c <= 0x6C) { + s = (0x95 - 0x21) * 94; + } else if (*state == 'Q' && c >= 0x21 && c <= 0x5E) { + s = (0x96 - 0x21) * 94; + } else { + *out++ = MBFL_BAD_INPUT; + *state = 0; + break; + } + + int snd = 0; + uint32_t w = mbfilter_sjis_emoji_sb2unicode(s + c - 0x21, &snd); + if (w) { + if (snd) { + *out++ = snd; + } + *out++ = w; + } else { + *out++ = MBFL_BAD_INPUT; + *state = 0; + break; + } + } + } else if (c <= 0x7F) { + *out++ = c; + } else if (c >= 0xA1 && c <= 0xDF) { + /* Kana */ + *out++ = 0xFEC0 + c; + } else { + /* Kanji */ + if (p == e) { + *out++ = MBFL_BAD_INPUT; + break; + } + unsigned char c2 = *p++; + uint32_t w = sjis_mobile_decode_tbl1[c] + sjis_decode_tbl2[c2]; + + if (w <= 137) { + if (w == 31) { + *out++ = 0xFF3C; /* FULLWIDTH REVERSE SOLIDUS */ + continue; + } else if (w == 32) { + *out++ = 0xFF5E; /* FULLWIDTH TILDE */ + continue; + } else if (w == 33) { + *out++ = 0x2225; /* PARALLEL TO */ + continue; + } else if (w == 60) { + *out++ = 0xFF0D; /* FULLWIDTH HYPHEN-MINUS */ + continue; + } else if (w == 80) { + *out++ = 0xFFE0; /* FULLWIDTH CENT SIGN */ + continue; + } else if (w == 81) { + *out++ = 0xFFE1; /* FULLWIDTH POUND SIGN */ + continue; + } else if (w == 137) { + *out++ = 0xFFE2; /* FULLWIDTH NOT SIGN */ + continue; + } + } + + if (w >= mb_tbl_code2uni_sb1_min && w <= mb_tbl_code2uni_sb3_max) { + int snd = 0; + w = mbfilter_sjis_emoji_sb2unicode(w, &snd); + if (!w) { + w = sjis_mobile_decode_tbl1[c] + sjis_decode_tbl2[c2]; + if (w >= cp932ext3_ucs_table_min && w < cp932ext3_ucs_table_max) { + w = cp932ext3_ucs_table[w - cp932ext3_ucs_table_min]; + } else if (w >= (94*94) && w < (114*94)) { + w = w - (94*94) + 0xE000; + } + } else if (snd) { + *out++ = snd; + } + } else if (w >= cp932ext1_ucs_table_min && w < cp932ext1_ucs_table_max) { + w = cp932ext1_ucs_table[w - cp932ext1_ucs_table_min]; + } else if (w < jisx0208_ucs_table_size) { + w = jisx0208_ucs_table[w]; + } else if (w >= cp932ext2_ucs_table_min && w < cp932ext2_ucs_table_max) { + w = cp932ext2_ucs_table[w - cp932ext2_ucs_table_min]; + } else if (w >= cp932ext3_ucs_table_min && w < cp932ext3_ucs_table_max) { + w = cp932ext3_ucs_table[w - cp932ext3_ucs_table_min]; + } else if (w >= (94*94) && w < (114*94)) { + w = w - (94*94) + 0xE000; + } else { + if (c == 0x80 || c == 0xA0 || c >= 0xFD) { + p--; + } + *out++ = MBFL_BAD_INPUT; + continue; + } + + *out++ = w ? w : MBFL_BAD_INPUT; + } + } + + *in_len = e - p; + *in = p; + return out - buf; +} + +static void mb_wchar_to_sjis_sb(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) +{ + unsigned char *out, *limit; + MB_CONVERT_BUF_LOAD(buf, out, limit); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len + (buf->state ? 1 : 0)); + + uint32_t w; + unsigned int s = 0; + + if (buf->state) { + w = buf->state; + buf->state = 0; + goto reprocess_wchar; + } + + while (len--) { + w = *in++; +reprocess_wchar: + s = 0; + + if (w >= ucs_a1_jis_table_min && w < ucs_a1_jis_table_max) { + s = ucs_a1_jis_table[w - ucs_a1_jis_table_min]; + } else if (w >= ucs_a2_jis_table_min && w < ucs_a2_jis_table_max) { + s = ucs_a2_jis_table[w - ucs_a2_jis_table_min]; + } else if (w >= ucs_i_jis_table_min && w < ucs_i_jis_table_max) { + s = ucs_i_jis_table[w - ucs_i_jis_table_min]; + } else if (w >= ucs_r_jis_table_min && w < ucs_r_jis_table_max) { + s = ucs_r_jis_table[w - ucs_r_jis_table_min]; + } else if (w >= 0xE000 && w < (0xE000 + 20*94)) { + /* Private User Area (95ku - 114ku) */ + s = w - 0xE000; + s = (((s / 94) + 0x7F) << 8) | ((s % 94) + 0x21); + goto process_emoji; + } + + if (!s) { + if (w == 0xA5) { /* YEN SIGN */ + s = 0x216F; /* FULLWIDTH YEN SIGN */ + } else if (w == 0xFF3C) { /* FULLWIDTH REVERSE SOLIDUS */ + s = 0x2140; + } else if (w == 0x2225) { /* PARALLEL TO */ + s = 0x2142; + } else if (w == 0xFF0D) { /* FULLWIDTH HYPHEN-MINUS */ + s = 0x215D; + } else if (w == 0xFFE0) { /* FULLWIDTH CENT SIGN */ + s = 0x2171; + } else if (w == 0xFFE1) { /* FULLWIDTH POUND SIGN */ + s = 0x2172; + } else if (w == 0xFFE2) { /* FULLWIDTH NOT SIGN */ + s = 0x224C; + } + } + + if (w && (!s || s >= 0x8080)) { + s = 0; + + for (int i = 0; i < cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; i++) { + if (w == cp932ext1_ucs_table[i]) { + s = (((i / 94) + 0x2D) << 8) + (i % 94) + 0x21; + goto process_emoji; + } + } + + for (int i = 0; i < cp932ext2_ucs_table_max - cp932ext2_ucs_table_min; i++) { + if (w == cp932ext2_ucs_table[i]) { + s = (((i / 94) + 0x79) << 8) + (i % 94) + 0x21; + goto process_emoji; + } + } + } + +process_emoji: + if (w == '#' || (w >= '0' && w <= '9')) { + if (!len) { + if (end) { + goto emit_output; + } else { + /* If we are at the end of the current buffer of codepoints, but another + * buffer is coming, then remember that we have to reprocess `w` */ + buf->state = w; + break; + } + } + uint32_t w2 = *in++; len--; + if (w2 == 0x20E3) { + if (w == '#') { + s = 0x2817; + } else if (w == '0') { + s = 0x282c; + } else { /* Previous character was '1'-'9' */ + s = 0x2823 + (w - '1'); + } + s = (((s / 94) + 0x21) << 8) | ((s % 94) + 0x21); + } else { + in--; len++; + } + } else if (w >= NFLAGS('C') && w <= NFLAGS('U')) { /* C for CN, U for US */ + if (!len) { + if (end) { + MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_sjis_sb); + } else { + /* Reprocess `w` when this function is called again with + * another buffer of wchars */ + buf->state = w; + } + break; + } + uint32_t w2 = *in++; len--; + if (w2 >= NFLAGS('B') && w2 <= NFLAGS('U')) { /* B for GB, U for RU */ + for (int i = 0; i < 10; i++) { + if (w == NFLAGS(nflags_s[i][0]) && w2 == NFLAGS(nflags_s[i][1])) { + s = nflags_code_sb[i]; + s = (((s / 94) + 0x21) << 8) | ((s % 94) + 0x21); + goto emit_output; + } + } + } + in--; len++; + MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_sjis_sb); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len); + continue; + } else if (w == 0xA9) { /* Copyright sign */ + s = (((0x2855 / 94) + 0x21) << 8) | ((0x2855 % 94) + 0x21); + } else if (w == 0xAE) { /* Registered sign */ + s = (((0x2856 / 94) + 0x21) << 8) | ((0x2856 % 94) + 0x21); + } else if (w >= mb_tbl_uni_sb2code2_min && w <= mb_tbl_uni_sb2code2_max) { + int i = mbfl_bisec_srch2(w, mb_tbl_uni_sb2code2_key, mb_tbl_uni_sb2code2_len); + if (i >= 0) { + s = mb_tbl_uni_sb2code2_value[i]; + s = (((s / 94) + 0x21) << 8) | ((s % 94) + 0x21); + } + } else if (w >= mb_tbl_uni_sb2code3_min && w <= mb_tbl_uni_sb2code3_max) { + int i = mbfl_bisec_srch2(w - 0x10000, mb_tbl_uni_sb2code3_key, mb_tbl_uni_sb2code3_len); + if (i >= 0) { + s = mb_tbl_uni_sb2code3_value[i]; + s = (((s / 94) + 0x21) << 8) | ((s % 94) + 0x21); + } + } else if (w >= mb_tbl_uni_sb2code5_min && w <= mb_tbl_uni_sb2code5_max) { + int i = mbfl_bisec_srch2(w - 0xF0000, mb_tbl_uni_sb2code5_key, mb_tbl_uni_sb2code5_len); + if (i >= 0) { + s = mb_tbl_uni_sb2code5_val[i]; + s = (((s / 94) + 0x21) << 8) | ((s % 94) + 0x21); + } + } + +emit_output: + if (!s && w) { + MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_sjis_sb); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len); + } else if (s <= 0xFF) { + out = mb_convert_buf_add(out, s); + } else { + unsigned int c1 = (s >> 8) & 0xFF, c2 = s & 0xFF, s1, s2; + SJIS_ENCODE(c1, c2, s1, s2); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2); + out = mb_convert_buf_add2(out, s1, s2); + } + } + + MB_CONVERT_BUF_STORE(buf, out, limit); +} + +static size_t mb_sjis2004_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) +{ + unsigned char *p = *in, *e = p + *in_len; + uint32_t *out = buf, *limit = buf + bufsize - 1; + + while (p < e && out < limit) { + unsigned char c = *p++; + + if (c <= 0x7F) { + if (c == 0x5C) { + *out++ = 0xA5; + } else if (c == 0x7E) { + *out++ = 0x203E; + } else { + *out++ = c; + } + } else if (c >= 0xA1 && c <= 0xDF) { + *out++ = 0xFEC0 + c; + } else { + if (p == e) { + *out++ = MBFL_BAD_INPUT; + break; + } + unsigned char c2 = *p++; + uint32_t w1 = sjis_mobile_decode_tbl1[c] + sjis_decode_tbl2[c2]; + + /* Conversion for combining characters */ + if (w1 >= 0x0170 && w1 <= 0x03F1) { + int k = mbfl_bisec_srch2(w1, jisx0213_u2_key_b, jisx0213_u2_tbl_len); + if (k >= 0) { + *out++ = jisx0213_u2_tbl[2*k]; + *out++ = jisx0213_u2_tbl[2*k+1]; + continue; + } + } + + /* Conversion for BMP */ + if (w1 < jisx0213_ucs_table_size) { + uint32_t w = jisx0213_ucs_table[w1]; + if (w) { + *out++ = w; + continue; + } + } + + /* Conversion for CJK Unified Ideographs extension B (U+2XXXX) */ + int k = mbfl_bisec_srch2(w1, jisx0213_jis_u5_key, jisx0213_u5_tbl_len); + if (k >= 0) { + *out++ = jisx0213_jis_u5_tbl[k] + 0x20000; + } else { + if (c == 0x80 || c == 0xA0 || c >= 0xFD) { + p--; + } + *out++ = MBFL_BAD_INPUT; + } + } + } + + *in_len = e - p; + *in = p; + return out - buf; +} + +static void mb_wchar_to_sjis2004(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) +{ + unsigned char *out, *limit; + MB_CONVERT_BUF_LOAD(buf, out, limit); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len); + + uint32_t w; + if (buf->state) { + w = buf->state; + buf->state = 0; + goto process_codepoint; + } + + while (len--) { + w = *in++; +process_codepoint: ; + unsigned int s = 0; + + if (w == 0xE6 || (w >= 0x254 && w <= 0x2E9) || (w >= 0x304B && w <= 0x3053) || (w >= 0x30AB && w <= 0x30C8) || w == 0x31F7) { + for (int k = 0; k < jisx0213_u2_tbl_len; k++) { + if (w == jisx0213_u2_tbl[2*k]) { + if (!len) { + if (!end) { + buf->state = w; + MB_CONVERT_BUF_STORE(buf, out, limit); + return; + } + } else { + uint32_t w2 = *in++; len--; + if ((w == 0x254 || w == 0x28C || w == 0x259 || w == 0x25A) && w2 == 0x301) { + k++; + } + if (w2 == jisx0213_u2_tbl[2*k+1]) { + s = jisx0213_u2_key[k]; + break; + } + in--; len++; + } + + /* Fallback */ + s = jisx0213_u2_fb_tbl[k]; + break; + } + } + } + + /* Check for major Japanese chars: U+4E00-U+9FFF */ + if (!s) { + for (int k = 0; k < uni2jis_tbl_len; k++) { + if (w >= uni2jis_tbl_range[k][0] && w <= uni2jis_tbl_range[k][1]) { + s = uni2jis_tbl[k][w - uni2jis_tbl_range[k][0]]; + break; + } + } + } + + /* Check for Japanese chars in compressed mapping area: U+1E00-U+4DBF */ + if (!s && w >= ucs_c1_jisx0213_min && w <= ucs_c1_jisx0213_max) { + int k = mbfl_bisec_srch(w, ucs_c1_jisx0213_tbl, ucs_c1_jisx0213_tbl_len); + if (k >= 0) { + s = ucs_c1_jisx0213_ofst[k] + w - ucs_c1_jisx0213_tbl[2*k]; + } + } + + /* Check for Japanese chars in CJK Unified Ideographs ext.B (U+2XXXX) */ + if (!s && w >= jisx0213_u5_tbl_min && w <= jisx0213_u5_tbl_max) { + int k = mbfl_bisec_srch2(w - 0x20000, jisx0213_u5_jis_key, jisx0213_u5_tbl_len); + if (k >= 0) { + s = jisx0213_u5_jis_tbl[k]; + } + } + + if (!s) { + /* CJK Compatibility Forms: U+FE30-U+FE4F */ + if (w == 0xFE45) { + s = 0x233E; + } else if (w == 0xFE46) { + s = 0x233D; + } else if (w >= 0xF91D && w <= 0xF9DC) { + /* CJK Compatibility Ideographs: U+F900-U+F92A */ + int k = mbfl_bisec_srch2(w, ucs_r2b_jisx0213_cmap_key, ucs_r2b_jisx0213_cmap_len); + if (k >= 0) { + s = ucs_r2b_jisx0213_cmap_val[k]; + } + } + } + + if (!s && w) { + MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_sjis2004); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len); + } else if (s <= 0xFF) { + out = mb_convert_buf_add(out, s); + } else { + unsigned int c1 = (s >> 8) & 0xFF, c2 = s & 0xFF, s1, s2; + SJIS_ENCODE(c1, c2, s1, s2); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2); + out = mb_convert_buf_add2(out, s1, s2); + } + } + + MB_CONVERT_BUF_STORE(buf, out, limit); +} + +static int mbfl_filt_conv_cp932_wchar(int c, mbfl_convert_filter *filter) +{ + int c1, s, s1, s2, w; + + switch (filter->status) { + case 0: + if (c >= 0 && c < 0x80) { /* latin */ + CK((*filter->output_function)(c, filter->data)); + } else if (c > 0xa0 && c < 0xe0) { /* kana */ + CK((*filter->output_function)(0xfec0 + c, filter->data)); + } else if (c > 0x80 && c < 0xfd && c != 0xa0) { /* kanji first char */ + filter->status = 1; + filter->cache = c; + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + case 1: /* kanji second char */ + filter->status = 0; + c1 = filter->cache; + if (c >= 0x40 && c <= 0xfc && c != 0x7f) { + w = 0; + SJIS_DECODE(c1, c, s1, s2); + s = (s1 - 0x21)*94 + s2 - 0x21; + if (s <= 137) { + if (s == 31) { + w = 0xff3c; /* FULLWIDTH REVERSE SOLIDUS */ + } else if (s == 32) { + w = 0xff5e; /* FULLWIDTH TILDE */ + } else if (s == 33) { + w = 0x2225; /* PARALLEL TO */ + } else if (s == 60) { + w = 0xff0d; /* FULLWIDTH HYPHEN-MINUS */ + } else if (s == 80) { + w = 0xffe0; /* FULLWIDTH CENT SIGN */ + } else if (s == 81) { + w = 0xffe1; /* FULLWIDTH POUND SIGN */ + } else if (s == 137) { + w = 0xffe2; /* FULLWIDTH NOT SIGN */ + } + } + if (w == 0) { + if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) { /* vendor ext1 (13ku) */ + w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min]; + } else if (s >= 0 && s < jisx0208_ucs_table_size) { /* X 0208 */ + w = jisx0208_ucs_table[s]; + } else if (s >= cp932ext2_ucs_table_min && s < cp932ext2_ucs_table_max) { /* vendor ext2 (89ku - 92ku) */ + w = cp932ext2_ucs_table[s - cp932ext2_ucs_table_min]; + } else if (s >= cp932ext3_ucs_table_min && s < cp932ext3_ucs_table_max) { /* vendor ext3 (115ku - 119ku) */ + w = cp932ext3_ucs_table[s - cp932ext3_ucs_table_min]; + } else if (s >= (94*94) && s < (114*94)) { /* user (95ku - 114ku) */ + w = s - (94*94) + 0xe000; + } + } + + if (w <= 0) { + w = MBFL_BAD_INPUT; + } + + CK((*filter->output_function)(w, filter->data)); + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + EMPTY_SWITCH_DEFAULT_CASE(); + } + + return 0; +} + +static int mbfl_filt_conv_cp932_wchar_flush(mbfl_convert_filter *filter) +{ + if (filter->status) { + (*filter->output_function)(MBFL_BAD_INPUT, filter->data); + filter->status = 0; + } + + if (filter->flush_function) { + (*filter->flush_function)(filter->data); + } + + return 0; +} + +static int mbfl_filt_conv_wchar_cp932(int c, mbfl_convert_filter *filter) +{ + int c1, c2, s1, s2; + + s1 = 0; + s2 = 0; + if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) { + s1 = ucs_a1_jis_table[c - ucs_a1_jis_table_min]; + } else if (c == 0x203E) { + s1 = 0x7E; + } else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) { + s1 = ucs_a2_jis_table[c - ucs_a2_jis_table_min]; + } else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) { + s1 = ucs_i_jis_table[c - ucs_i_jis_table_min]; + } else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) { + s1 = ucs_r_jis_table[c - ucs_r_jis_table_min]; + } else if (c >= 0xe000 && c < (0xe000 + 20*94)) { /* user (95ku - 114ku) */ + s1 = c - 0xe000; + c1 = s1/94 + 0x7f; + c2 = s1%94 + 0x21; + s1 = (c1 << 8) | c2; + s2 = 1; + } + if (s1 <= 0) { + if (c == 0xa5) { /* YEN SIGN */ + s1 = 0x5C; + } else if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */ + s1 = 0x2140; + } else if (c == 0x2225) { /* PARALLEL TO */ + s1 = 0x2142; + } else if (c == 0xff0d) { /* FULLWIDTH HYPHEN-MINUS */ + s1 = 0x215d; + } else if (c == 0xffe0) { /* FULLWIDTH CENT SIGN */ + s1 = 0x2171; + } else if (c == 0xffe1) { /* FULLWIDTH POUND SIGN */ + s1 = 0x2172; + } else if (c == 0xffe2) { /* FULLWIDTH NOT SIGN */ + s1 = 0x224c; + } + } + if ((s1 <= 0) || (s1 >= 0x8080 && s2 == 0)) { /* not found or X 0212 */ + s1 = -1; + c1 = 0; + c2 = cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; + while (c1 < c2) { /* CP932 vendor ext1 (13ku) */ + if (c == cp932ext1_ucs_table[c1]) { + s1 = ((c1/94 + 0x2d) << 8) + (c1%94 + 0x21); + break; + } + c1++; + } + if (s1 <= 0) { + c1 = 0; + c2 = cp932ext3_ucs_table_max - cp932ext3_ucs_table_min; + while (c1 < c2) { /* CP932 vendor ext3 (115ku - 119ku) */ + if (c == cp932ext3_ucs_table[c1]) { + s1 = ((c1/94 + 0x93) << 8) + (c1%94 + 0x21); + break; + } + c1++; + } + } + if (c == 0) { + s1 = 0; + } else if (s1 <= 0) { + s1 = -1; + } + } + if (s1 >= 0) { + if (s1 < 0x100) { /* latin or kana */ + CK((*filter->output_function)(s1, filter->data)); + } else { /* kanji */ + c1 = (s1 >> 8) & 0xff; + c2 = s1 & 0xff; + SJIS_ENCODE(c1, c2, s1, s2); + CK((*filter->output_function)(s1, filter->data)); + CK((*filter->output_function)(s2, filter->data)); + } + } else { + CK(mbfl_filt_conv_illegal_output(c, filter)); + } + + return 0; +} + +static int mbfl_filt_conv_wchar_sjiswin(int c, mbfl_convert_filter *filter) +{ + if (c == 0xA5) { + CK((*filter->output_function)(0x81, filter->data)); + CK((*filter->output_function)(0x8F, filter->data)); + } else if (c == 0x203E) { + CK((*filter->output_function)(0x81, filter->data)); + CK((*filter->output_function)(0x50, filter->data)); + } else { + return mbfl_filt_conv_wchar_cp932(c, filter); + } + return 0; +} + +static size_t mb_cp932_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) +{ + unsigned char *p = *in, *e = p + *in_len; + uint32_t *out = buf, *limit = buf + bufsize; + + while (p < e && out < limit) { + unsigned char c = *p++; + + if (c < 0x80) { + *out++ = c; + } else if (c > 0xA0 && c < 0xE0) { + /* Kana */ + *out++ = 0xFEC0 + c; + } else { + if (p == e) { + *out++ = MBFL_BAD_INPUT; + break; + } + unsigned char c2 = *p++; + unsigned int w = 0; + unsigned int s = sjis_mobile_decode_tbl1[c] + sjis_decode_tbl2[c2]; + + if (s <= 137) { + if (s == 31) { + w = 0xFF3C; /* FULLWIDTH REVERSE SOLIDUS */ + } else if (s == 32) { + w = 0xFF5E; /* FULLWIDTH TILDE */ + } else if (s == 33) { + w = 0x2225; /* PARALLEL TO */ + } else if (s == 60) { + w = 0xFF0D; /* FULLWIDTH HYPHEN-MINUS */ + } else if (s == 80) { + w = 0xFFE0; /* FULLWIDTH CENT SIGN */ + } else if (s == 81) { + w = 0xFFE1; /* FULLWIDTH POUND SIGN */ + } else if (s == 137) { + w = 0xFFE2; /* FULLWIDTH NOT SIGN */ + } + } + + if (w == 0) { + if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) { + w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min]; + } else if (s < jisx0208_ucs_table_size) { + w = jisx0208_ucs_table[s]; + } else if (s >= cp932ext2_ucs_table_min && s < cp932ext2_ucs_table_max) { + w = cp932ext2_ucs_table[s - cp932ext2_ucs_table_min]; + } else if (s >= cp932ext3_ucs_table_min && s < cp932ext3_ucs_table_max) { + w = cp932ext3_ucs_table[s - cp932ext3_ucs_table_min]; + } else if (s >= (94*94) && s < (114*94)) { + w = s - (94*94) + 0xE000; + } + } + + if (!w) { + if (c == 0x80 || c == 0xA0 || c >= 0xFD) { + p--; + } + w = MBFL_BAD_INPUT; + } + *out++ = w; + } + } + + *in_len = e - p; + *in = p; + return out - buf; +} + +static void mb_wchar_to_cp932(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) +{ + unsigned char *out, *limit; + MB_CONVERT_BUF_LOAD(buf, out, limit); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2); + + while (len--) { + uint32_t w = *in++; + unsigned int s1 = 0, s2 = 0, c1, c2; + + if (w >= ucs_a1_jis_table_min && w < ucs_a1_jis_table_max) { + s1 = ucs_a1_jis_table[w - ucs_a1_jis_table_min]; + } else if (w == 0x203E) { + s1 = 0x7E; + } else if (w >= ucs_a2_jis_table_min && w < ucs_a2_jis_table_max) { + s1 = ucs_a2_jis_table[w - ucs_a2_jis_table_min]; + } else if (w >= ucs_i_jis_table_min && w < ucs_i_jis_table_max) { + s1 = ucs_i_jis_table[w - ucs_i_jis_table_min]; + } else if (w >= ucs_r_jis_table_min && w < ucs_r_jis_table_max) { + s1 = ucs_r_jis_table[w - ucs_r_jis_table_min]; + } else if (w >= 0xE000 && w < (0xE000 + 20*94)) { + s1 = w - 0xE000; + c1 = s1/94 + 0x7F; + c2 = s1%94 + 0x21; + s1 = (c1 << 8) | c2; + s2 = 1; + } + + if (w == 0xA5) { /* YEN SIGN */ + s1 = 0x5C; + } else if (w == 0xFF3C) { /* FULLWIDTH REVERSE SOLIDUS */ + s1 = 0x2140; + } else if (w == 0x2225) { /* PARALLEL TO */ + s1 = 0x2142; + } else if (w == 0xFF0D) { /* FULLWIDTH HYPHEN-MINUS */ + s1 = 0x215D; + } else if (w == 0xFFE0) { /* FULLWIDTH CENT SIGN */ + s1 = 0x2171; + } else if (w == 0xFFE1) { /* FULLWIDTH POUND SIGN */ + s1 = 0x2172; + } else if (w == 0xFFE2) { /* FULLWIDTH NOT SIGN */ + s1 = 0x224C; + } else if (w == 0) { + out = mb_convert_buf_add(out, 0); + continue; + } + + if (!s1 || (s1 >= 0x8080 && !s2)) { /* not found or X 0212 */ + for (unsigned int i = 0; i < cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; i++) { + if (cp932ext1_ucs_table[i] == w) { + s1 = ((i/94 + 0x2D) << 8) + (i%94 + 0x21); + goto emit_output; + } + } + + for (unsigned int i = 0; i < cp932ext3_ucs_table_max - cp932ext3_ucs_table_min; i++) { + if (cp932ext3_ucs_table[i] == w) { + s1 = ((i/94 + 0x93) << 8) + (i%94 + 0x21); + goto emit_output; + } + } + + MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_cp932); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2); + continue; + } + +emit_output: + if (s1 < 0x100) { + out = mb_convert_buf_add(out, s1); + } else { + c1 = (s1 >> 8) & 0xFF; + c2 = s1 & 0xFF; + SJIS_ENCODE(c1, c2, s1, s2); + out = mb_convert_buf_add2(out, s1, s2); + } + } + + MB_CONVERT_BUF_STORE(buf, out, limit); +} + +static void mb_wchar_to_sjiswin(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) +{ + unsigned char *out, *limit; + MB_CONVERT_BUF_LOAD(buf, out, limit); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2); + + while (len--) { + uint32_t w = *in++; + unsigned int s1 = 0, s2 = 0, c1, c2; + + if (w >= ucs_a1_jis_table_min && w < ucs_a1_jis_table_max) { + s1 = ucs_a1_jis_table[w - ucs_a1_jis_table_min]; + } else if (w >= ucs_a2_jis_table_min && w < ucs_a2_jis_table_max) { + s1 = ucs_a2_jis_table[w - ucs_a2_jis_table_min]; + } else if (w >= ucs_i_jis_table_min && w < ucs_i_jis_table_max) { + s1 = ucs_i_jis_table[w - ucs_i_jis_table_min]; + } else if (w >= ucs_r_jis_table_min && w < ucs_r_jis_table_max) { + s1 = ucs_r_jis_table[w - ucs_r_jis_table_min]; + } else if (w >= 0xE000 && w < (0xE000 + 20*94)) { + s1 = w - 0xE000; + c1 = s1/94 + 0x7F; + c2 = s1%94 + 0x21; + s1 = (c1 << 8) | c2; + s2 = 1; + } + + if (w == 0xA5) { /* YEN SIGN */ + s1 = 0x216F; /* FULLWIDTH YEN SIGN */ + } else if (w == 0xFF3C) { /* FULLWIDTH REVERSE SOLIDUS */ + s1 = 0x2140; + } else if (w == 0x2225) { /* PARALLEL TO */ + s1 = 0x2142; + } else if (w == 0xFF0D) { /* FULLWIDTH HYPHEN-MINUS */ + s1 = 0x215D; + } else if (w == 0xFFE0) { /* FULLWIDTH CENT SIGN */ + s1 = 0x2171; + } else if (w == 0xFFE1) { /* FULLWIDTH POUND SIGN */ + s1 = 0x2172; + } else if (w == 0xFFE2) { /* FULLWIDTH NOT SIGN */ + s1 = 0x224C; + } else if (w == 0) { + out = mb_convert_buf_add(out, 0); + continue; + } + + if (!s1 || (s1 >= 0x8080 && !s2)) { /* not found or X 0212 */ + for (unsigned int i = 0; i < cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; i++) { + if (cp932ext1_ucs_table[i] == w) { + s1 = ((i/94 + 0x2D) << 8) + (i%94 + 0x21); + goto emit_output; + } + } + + for (unsigned int i = 0; i < cp932ext3_ucs_table_max - cp932ext3_ucs_table_min; i++) { + if (cp932ext3_ucs_table[i] == w) { + s1 = ((i/94 + 0x93) << 8) + (i%94 + 0x21); + goto emit_output; + } + } + + MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_cp932); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2); + continue; + } + +emit_output: + if (s1 < 0x100) { + out = mb_convert_buf_add(out, s1); + } else { + c1 = (s1 >> 8) & 0xFF; + c2 = s1 & 0xFF; + SJIS_ENCODE(c1, c2, s1, s2); + out = mb_convert_buf_add2(out, s1, s2); + } + } + + MB_CONVERT_BUF_STORE(buf, out, limit); +} + +static const unsigned char mblen_table_sjis[] = { /* 0x81-0x9F,0xE0-0xEF */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 +}; + +static const unsigned char mblen_table_sjismac[] = { /* 0x81-0x9F,0xE0-0xED */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 +}; + +static const unsigned char mblen_table_sjis_mobile[] = { /* 0x81-0x9F,0xE0-0xFC */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1 +}; + +static const char *mbfl_encoding_sjis_aliases[] = {"x-sjis", "SHIFT-JIS", NULL}; + +static const struct mbfl_convert_vtbl vtbl_sjis_wchar = { + mbfl_no_encoding_sjis, + mbfl_no_encoding_wchar, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_sjis_wchar, + mbfl_filt_conv_sjis_wchar_flush, + NULL +}; + +static const struct mbfl_convert_vtbl vtbl_wchar_sjis = { + mbfl_no_encoding_wchar, + mbfl_no_encoding_sjis, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_wchar_sjis, + mbfl_filt_conv_common_flush, + NULL +}; + +const mbfl_encoding mbfl_encoding_sjis = { + mbfl_no_encoding_sjis, + "SJIS", + "Shift_JIS", + mbfl_encoding_sjis_aliases, + mblen_table_sjis, + MBFL_ENCTYPE_GL_UNSAFE, + &vtbl_sjis_wchar, + &vtbl_wchar_sjis, + mb_sjis_to_wchar, + mb_wchar_to_sjis, + NULL +}; + +static const char *mbfl_encoding_sjis_mac_aliases[] = {"MacJapanese", "x-Mac-Japanese", NULL}; + +static const struct mbfl_convert_vtbl vtbl_sjis_mac_wchar = { + mbfl_no_encoding_sjis_mac, + mbfl_no_encoding_wchar, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_sjis_mac_wchar, + mbfl_filt_conv_sjis_wchar_flush, + NULL, +}; + +static const struct mbfl_convert_vtbl vtbl_wchar_sjis_mac = { + mbfl_no_encoding_wchar, + mbfl_no_encoding_sjis_mac, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_wchar_sjis_mac, + mbfl_filt_conv_wchar_sjis_mac_flush, + NULL, +}; + +const mbfl_encoding mbfl_encoding_sjis_mac = { + mbfl_no_encoding_sjis_mac, + "SJIS-mac", + "Shift_JIS", + mbfl_encoding_sjis_mac_aliases, + mblen_table_sjismac, + MBFL_ENCTYPE_GL_UNSAFE, + &vtbl_sjis_mac_wchar, + &vtbl_wchar_sjis_mac, + mb_sjismac_to_wchar, + mb_wchar_to_sjismac, + NULL +}; + +static const char *mbfl_encoding_sjis_docomo_aliases[] = {"SJIS-DOCOMO", "shift_jis-imode", "x-sjis-emoji-docomo", NULL}; +static const char *mbfl_encoding_sjis_kddi_aliases[] = {"SJIS-KDDI", "shift_jis-kddi", "x-sjis-emoji-kddi", NULL}; +static const char *mbfl_encoding_sjis_sb_aliases[] = {"SJIS-SOFTBANK", "shift_jis-softbank", "x-sjis-emoji-softbank", NULL}; + +static const struct mbfl_convert_vtbl vtbl_sjis_docomo_wchar = { + mbfl_no_encoding_sjis_docomo, + mbfl_no_encoding_wchar, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_sjis_mobile_wchar, + mbfl_filt_conv_sjis_wchar_flush, + NULL, +}; + +static const struct mbfl_convert_vtbl vtbl_wchar_sjis_docomo = { + mbfl_no_encoding_wchar, + mbfl_no_encoding_sjis_docomo, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_wchar_sjis_mobile, + mbfl_filt_conv_sjis_mobile_flush, + NULL, +}; + +const mbfl_encoding mbfl_encoding_sjis_docomo = { + mbfl_no_encoding_sjis_docomo, + "SJIS-Mobile#DOCOMO", + "Shift_JIS", + mbfl_encoding_sjis_docomo_aliases, + mblen_table_sjis_mobile, + MBFL_ENCTYPE_GL_UNSAFE, + &vtbl_sjis_docomo_wchar, + &vtbl_wchar_sjis_docomo, + mb_sjis_docomo_to_wchar, + mb_wchar_to_sjis_docomo, + NULL +}; + +static const struct mbfl_convert_vtbl vtbl_sjis_kddi_wchar = { + mbfl_no_encoding_sjis_kddi, + mbfl_no_encoding_wchar, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_sjis_mobile_wchar, + mbfl_filt_conv_sjis_wchar_flush, + NULL, +}; + +static const struct mbfl_convert_vtbl vtbl_wchar_sjis_kddi = { + mbfl_no_encoding_wchar, + mbfl_no_encoding_sjis_kddi, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_wchar_sjis_mobile, + mbfl_filt_conv_sjis_mobile_flush, + NULL, +}; + +const mbfl_encoding mbfl_encoding_sjis_kddi = { + mbfl_no_encoding_sjis_kddi, + "SJIS-Mobile#KDDI", + "Shift_JIS", + mbfl_encoding_sjis_kddi_aliases, + mblen_table_sjis_mobile, + MBFL_ENCTYPE_GL_UNSAFE, + &vtbl_sjis_kddi_wchar, + &vtbl_wchar_sjis_kddi, + mb_sjis_kddi_to_wchar, + mb_wchar_to_sjis_kddi, + NULL +}; + +static const struct mbfl_convert_vtbl vtbl_sjis_sb_wchar = { + mbfl_no_encoding_sjis_sb, + mbfl_no_encoding_wchar, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_sjis_mobile_wchar, + mbfl_filt_conv_sjis_wchar_flush, + NULL, +}; + +static const struct mbfl_convert_vtbl vtbl_wchar_sjis_sb = { + mbfl_no_encoding_wchar, + mbfl_no_encoding_sjis_sb, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_wchar_sjis_mobile, + mbfl_filt_conv_sjis_mobile_flush, + NULL, +}; + +const mbfl_encoding mbfl_encoding_sjis_sb = { + mbfl_no_encoding_sjis_sb, + "SJIS-Mobile#SOFTBANK", + "Shift_JIS", + mbfl_encoding_sjis_sb_aliases, + mblen_table_sjis_mobile, + MBFL_ENCTYPE_GL_UNSAFE, + &vtbl_sjis_sb_wchar, + &vtbl_wchar_sjis_sb, + mb_sjis_sb_to_wchar, + mb_wchar_to_sjis_sb, + NULL +}; + +/* Although the specification for Shift-JIS-2004 indicates that 0x5C and + * 0x7E should (respectively) represent a Yen sign and an overbar, feedback + * from Japanese PHP users indicates that they prefer 0x5C and 0x7E to be + * treated as equivalent to U+005C and U+007E. This is the historical + * behavior of mbstring, and promotes compatibility with other software + * which handles Shift-JIS and Shift-JIS-2004 text in this way. */ + +static const char *mbfl_encoding_sjis2004_aliases[] = {"SJIS2004","Shift_JIS-2004", NULL}; + +static const struct mbfl_convert_vtbl vtbl_sjis2004_wchar = { + mbfl_no_encoding_sjis2004, + mbfl_no_encoding_wchar, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_jis2004_wchar, + mbfl_filt_conv_jis2004_wchar_flush, + NULL, +}; + +static const struct mbfl_convert_vtbl vtbl_wchar_sjis2004 = { + mbfl_no_encoding_wchar, + mbfl_no_encoding_sjis2004, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_wchar_jis2004, + mbfl_filt_conv_wchar_jis2004_flush, + NULL, +}; + +const mbfl_encoding mbfl_encoding_sjis2004 = { + mbfl_no_encoding_sjis2004, + "SJIS-2004", + "Shift_JIS", + mbfl_encoding_sjis2004_aliases, + mblen_table_sjis_mobile, /* Leading byte values used for SJIS-2004 are the same as mobile SJIS variants */ + MBFL_ENCTYPE_GL_UNSAFE, + &vtbl_sjis2004_wchar, + &vtbl_wchar_sjis2004, + mb_sjis2004_to_wchar, + mb_wchar_to_sjis2004, + NULL +}; + +/* CP932 is Microsoft's version of Shift-JIS. + * + * What we call "SJIS-win" is a variant of CP932 which maps U+00A5 + * and U+203E the same way as eucJP-win; namely, instead of mapping + * U+00A5 (YEN SIGN) to 0x5C and U+203E (OVERLINE) to 0x7E, + * these codepoints are mapped to appropriate JIS X 0208 characters. + * + * When converting from Shift-JIS to Unicode, there is no difference + * between CP932 and "SJIS-win". + * + * Additional facts: + * + * • In the libmbfl library which formed the base for mbstring, "CP932" and + * "SJIS-win" were originally aliases. The differing mappings were added in + * December 2002. The libmbfl author later stated that this was done so that + * "CP932" would comply with a certain specification, while "SJIS-win" would + * maintain the existing mappings. He does not remember which specification + * it was. + * • The WHATWG specification for "Shift_JIS" (followed by web browsers) + * agrees with our mappings for "CP932". + * • Microsoft Windows' "best-fit" mappings for CP932 (via the + * WideCharToMultiByte API) convert U+00A5 to 0x5C, which also agrees with + * our mappings for "CP932". + * • glibc's iconv converts U+203E to CP932 0x7E, which again agrees with + * our mappings for "CP932". + * • When converting Shift-JIS to CP932, the conversion goes through Unicode. + * Shift-JIS 0x7E converts to U+203E, so mapping U+203E to 0x7E means that + * 0x7E will go to 0x7E when converting Shift-JIS to CP932. + */ + +static const unsigned char mblen_table_sjiswin[] = { /* 0x81-0x9F,0xE0-0xFF */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 +}; + +static const char *mbfl_encoding_cp932_aliases[] = {"MS932", "Windows-31J", "MS_Kanji", NULL}; +static const char *mbfl_encoding_sjiswin_aliases[] = {"SJIS-ms", "SJIS-open", NULL}; + +static const struct mbfl_convert_vtbl vtbl_cp932_wchar = { + mbfl_no_encoding_cp932, + mbfl_no_encoding_wchar, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_cp932_wchar, + mbfl_filt_conv_cp932_wchar_flush, + NULL, +}; + +static const struct mbfl_convert_vtbl vtbl_wchar_cp932 = { + mbfl_no_encoding_wchar, + mbfl_no_encoding_cp932, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_wchar_cp932, + mbfl_filt_conv_common_flush, + NULL, +}; + +const mbfl_encoding mbfl_encoding_cp932 = { + mbfl_no_encoding_cp932, + "CP932", + "Shift_JIS", + mbfl_encoding_cp932_aliases, + mblen_table_sjiswin, + MBFL_ENCTYPE_GL_UNSAFE, + &vtbl_cp932_wchar, + &vtbl_wchar_cp932, + mb_cp932_to_wchar, + mb_wchar_to_cp932, + NULL +}; + +static const struct mbfl_convert_vtbl vtbl_sjiswin_wchar = { + mbfl_no_encoding_sjiswin, + mbfl_no_encoding_wchar, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_cp932_wchar, + mbfl_filt_conv_cp932_wchar_flush, + NULL, +}; + +static const struct mbfl_convert_vtbl vtbl_wchar_sjiswin = { + mbfl_no_encoding_wchar, + mbfl_no_encoding_sjiswin, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_wchar_sjiswin, + mbfl_filt_conv_common_flush, + NULL, +}; + +const mbfl_encoding mbfl_encoding_sjiswin = { + mbfl_no_encoding_sjiswin, + "SJIS-win", + "Shift_JIS", + mbfl_encoding_sjiswin_aliases, + mblen_table_sjiswin, + MBFL_ENCTYPE_GL_UNSAFE, + &vtbl_sjiswin_wchar, + &vtbl_wchar_sjiswin, + mb_cp932_to_wchar, + mb_wchar_to_sjiswin, + NULL +}; + +/* + * EUC variants + */ + +static int mbfl_filt_conv_eucjp_wchar(int c, mbfl_convert_filter *filter) +{ + int c1, s, w = 0; + + switch (filter->status) { + case 0: + if (c >= 0 && c < 0x80) { /* latin */ + CK((*filter->output_function)(c, filter->data)); + } else if (c > 0xa0 && c < 0xff) { /* X 0208 first char */ + filter->status = 1; + filter->cache = c; + } else if (c == 0x8e) { /* kana first char */ + filter->status = 2; + } else if (c == 0x8f) { /* X 0212 first char */ + filter->status = 3; + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + case 1: /* got first half */ + filter->status = 0; + c1 = filter->cache; + if (c > 0xa0 && c < 0xff) { + s = (c1 - 0xa1)*94 + c - 0xa1; + if (s >= 0 && s < jisx0208_ucs_table_size) { + w = jisx0208_ucs_table[s]; + if (!w) + w = MBFL_BAD_INPUT; + } else { + w = MBFL_BAD_INPUT; + } + + CK((*filter->output_function)(w, filter->data)); + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + case 2: /* got 0x8e */ + filter->status = 0; + if (c > 0xa0 && c < 0xe0) { + w = 0xfec0 + c; + CK((*filter->output_function)(w, filter->data)); + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + case 3: /* got 0x8f, JIS X 0212 first byte */ + filter->status++; + filter->cache = c; + break; + + case 4: /* got 0x8f, JIS X 0212 second byte */ + filter->status = 0; + c1 = filter->cache; + if (c > 0xA0 && c < 0xFF && c1 > 0xA0 && c1 < 0xFF) { + s = (c1 - 0xa1)*94 + c - 0xa1; + if (s >= 0 && s < jisx0212_ucs_table_size) { + w = jisx0212_ucs_table[s]; + if (!w) + w = MBFL_BAD_INPUT; + } else { + w = MBFL_BAD_INPUT; + } + + CK((*filter->output_function)(w, filter->data)); + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + EMPTY_SWITCH_DEFAULT_CASE(); + } + + return 0; +} + +static int mbfl_filt_conv_eucjp_wchar_flush(mbfl_convert_filter *filter) +{ + if (filter->status) { + (*filter->output_function)(MBFL_BAD_INPUT, filter->data); + filter->status = 0; + } + + if (filter->flush_function) { + (*filter->flush_function)(filter->data); + } + + return 0; +} + +static int mbfl_filt_conv_wchar_eucjp(int c, mbfl_convert_filter *filter) +{ + int s = 0; + + if (c == 0xAF) { /* U+00AF is MACRON */ + s = 0xA2B4; /* Use JIS X 0212 overline */ + } else if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) { + s = ucs_a1_jis_table[c - ucs_a1_jis_table_min]; + } else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) { + s = ucs_a2_jis_table[c - ucs_a2_jis_table_min]; + } else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) { + s = ucs_i_jis_table[c - ucs_i_jis_table_min]; + } else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) { + s = ucs_r_jis_table[c - ucs_r_jis_table_min]; + } + if (s <= 0) { + if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */ + s = 0x2140; + } else if (c == 0x2225) { /* PARALLEL TO */ + s = 0x2142; + } else if (c == 0xff0d) { /* FULLWIDTH HYPHEN-MINUS */ + s = 0x215d; + } else if (c == 0xffe0) { /* FULLWIDTH CENT SIGN */ + s = 0x2171; + } else if (c == 0xffe1) { /* FULLWIDTH POUND SIGN */ + s = 0x2172; + } else if (c == 0xffe2) { /* FULLWIDTH NOT SIGN */ + s = 0x224c; + } else if (c == 0) { + s = 0; + } else { + s = -1; + } + } + if (s >= 0) { + if (s < 0x80) { /* latin */ + CK((*filter->output_function)(s, filter->data)); + } else if (s < 0x100) { /* kana */ + CK((*filter->output_function)(0x8e, filter->data)); + CK((*filter->output_function)(s, filter->data)); + } else if (s < 0x8080) { /* X 0208 */ + CK((*filter->output_function)(((s >> 8) & 0xff) | 0x80, filter->data)); + CK((*filter->output_function)((s & 0xff) | 0x80, filter->data)); + } else { /* X 0212 */ + CK((*filter->output_function)(0x8f, filter->data)); + CK((*filter->output_function)(((s >> 8) & 0xff) | 0x80, filter->data)); + CK((*filter->output_function)((s & 0xff) | 0x80, filter->data)); + } + } else { + CK(mbfl_filt_conv_illegal_output(c, filter)); + } + + return 0; +} + +static size_t mb_eucjp_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) +{ + unsigned char *p = *in, *e = p + *in_len; + uint32_t *out = buf, *limit = buf + bufsize; + + while (p < e && out < limit) { + unsigned char c = *p++; + + if (c < 0x80) { + *out++ = c; + } else if (c >= 0xA1 && c <= 0xFE && p < e) { + /* JISX 0208 */ + unsigned char c2 = *p++; + if (c2 >= 0xA1 && c2 <= 0xFE) { + unsigned int s = (c - 0xA1)*94 + c2 - 0xA1; + if (s < jisx0208_ucs_table_size) { + uint32_t w = jisx0208_ucs_table[s]; + if (!w) + w = MBFL_BAD_INPUT; + *out++ = w; + } else { + *out++ = MBFL_BAD_INPUT; + } + } else { + *out++ = MBFL_BAD_INPUT; + } + } else if (c == 0x8E && p < e) { + /* Kana */ + unsigned char c2 = *p++; + *out++ = (c2 >= 0xA1 && c2 <= 0xDF) ? 0xFEC0 + c2 : MBFL_BAD_INPUT; + } else if (c == 0x8F) { + /* JISX 0212 */ + if ((e - p) >= 2) { + unsigned char c2 = *p++; + unsigned char c3 = *p++; + if (c3 >= 0xA1 && c3 <= 0xFE && c2 >= 0xA1 && c2 <= 0xFE) { + unsigned int s = (c2 - 0xA1)*94 + c3 - 0xA1; + if (s < jisx0212_ucs_table_size) { + uint32_t w = jisx0212_ucs_table[s]; + if (!w) + w = MBFL_BAD_INPUT; + *out++ = w; + } else { + *out++ = MBFL_BAD_INPUT; + } + } else { + *out++ = MBFL_BAD_INPUT; + } + } else { + *out++ = MBFL_BAD_INPUT; + p = e; /* Jump to end of string */ + } + } else { + *out++ = MBFL_BAD_INPUT; + } + } + + *in_len = e - p; + *in = p; + return out - buf; +} + +static void mb_wchar_to_eucjp(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) +{ + unsigned char *out, *limit; + MB_CONVERT_BUF_LOAD(buf, out, limit); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2); + + while (len--) { + uint32_t w = *in++; + unsigned int s = 0; + + if (w == 0xAF) { /* U+00AF is MACRON */ + s = 0xA2B4; /* Use JIS X 0212 overline */ + } else if (w >= ucs_a1_jis_table_min && w < ucs_a1_jis_table_max) { + s = ucs_a1_jis_table[w - ucs_a1_jis_table_min]; + } else if (w >= ucs_a2_jis_table_min && w < ucs_a2_jis_table_max) { + s = ucs_a2_jis_table[w - ucs_a2_jis_table_min]; + } else if (w >= ucs_i_jis_table_min && w < ucs_i_jis_table_max) { + s = ucs_i_jis_table[w - ucs_i_jis_table_min]; + } else if (w >= ucs_r_jis_table_min && w < ucs_r_jis_table_max) { + s = ucs_r_jis_table[w - ucs_r_jis_table_min]; + } + + if (s == 0) { + if (w == 0xFF3C) { /* FULLWIDTH REVERSE SOLIDUS */ + s = 0x2140; + } else if (w == 0x2225) { /* PARALLEL TO */ + s = 0x2142; + } else if (w == 0xFF0D) { /* FULLWIDTH HYPHEN-MINUS */ + s = 0x215D; + } else if (w == 0xFFE0) { /* FULLWIDTH CENT SIGN */ + s = 0x2171; + } else if (w == 0xFFE1) { /* FULLWIDTH POUND SIGN */ + s = 0x2172; + } else if (w == 0xFFE2) { /* FULLWIDTH NOT SIGN */ + s = 0x224C; + } else if (w == 0) { + out = mb_convert_buf_add(out, 0); + continue; + } else { + MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_eucjp); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2); + continue; + } + } + + if (s < 0x80) { + out = mb_convert_buf_add(out, s); + } else if (s < 0x100) { + out = mb_convert_buf_add2(out, 0x8E, s); + } else if (s < 0x8080) { + out = mb_convert_buf_add2(out, ((s >> 8) & 0xFF) | 0x80, (s & 0xFF) | 0x80); + } else { + MB_CONVERT_BUF_ENSURE(buf, out, limit, (len * 2) + 3); + out = mb_convert_buf_add3(out, 0x8F, ((s >> 8) & 0xFF) | 0x80, (s & 0xFF) | 0x80); + } + } + + MB_CONVERT_BUF_STORE(buf, out, limit); +} + +static int mbfl_filt_conv_eucjpwin_wchar(int c, mbfl_convert_filter *filter) +{ + int c1, s, w, n; + + switch (filter->status) { + case 0: + if (c >= 0 && c < 0x80) { /* latin */ + CK((*filter->output_function)(c, filter->data)); + } else if (c >= 0xa1 && c <= 0xfe) { /* CP932 first char */ + filter->status = 1; + filter->cache = c; + } else if (c == 0x8e) { /* kana first char */ + filter->status = 2; + } else if (c == 0x8f) { /* X 0212 first char */ + filter->status = 3; + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + case 1: /* got first half */ + filter->status = 0; + c1 = filter->cache; + if (c > 0xa0 && c < 0xff) { + w = 0; + s = (c1 - 0xa1)*94 + c - 0xa1; + if (s <= 137) { + if (s == 31) { + w = 0xff3c; /* FULLWIDTH REVERSE SOLIDUS */ + } else if (s == 32) { + w = 0xff5e; /* FULLWIDTH TILDE */ + } else if (s == 33) { + w = 0x2225; /* PARALLEL TO */ + } else if (s == 60) { + w = 0xff0d; /* FULLWIDTH HYPHEN-MINUS */ + } else if (s == 80) { + w = 0xffe0; /* FULLWIDTH CENT SIGN */ + } else if (s == 81) { + w = 0xffe1; /* FULLWIDTH POUND SIGN */ + } else if (s == 137) { + w = 0xffe2; /* FULLWIDTH NOT SIGN */ + } + } + + if (w == 0) { + if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) { /* vendor ext1 (13ku) */ + w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min]; + } else if (s >= 0 && s < jisx0208_ucs_table_size) { /* X 0208 */ + w = jisx0208_ucs_table[s]; + } else if (s >= (84 * 94)) { /* user (85ku - 94ku) */ + w = s - (84 * 94) + 0xe000; + } + } + + if (w <= 0) { + w = MBFL_BAD_INPUT; + } + CK((*filter->output_function)(w, filter->data)); + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + case 2: /* got 0x8e, X0201 kana */ + filter->status = 0; + if (c > 0xa0 && c < 0xe0) { + w = 0xfec0 + c; + CK((*filter->output_function)(w, filter->data)); + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + case 3: /* got 0x8f, X 0212 first char */ + filter->status++; + filter->cache = c; + break; + + case 4: /* got 0x8f, X 0212 second char */ + filter->status = 0; + c1 = filter->cache; + if (c1 > 0xa0 && c1 < 0xff && c > 0xa0 && c < 0xff) { + s = (c1 - 0xa1)*94 + c - 0xa1; + + if (s >= 0 && s < jisx0212_ucs_table_size) { + w = jisx0212_ucs_table[s]; + + if (w == 0x007e) { + w = 0xff5e; /* FULLWIDTH TILDE */ + } + } else if (s >= (82*94) && s < (84*94)) { /* vender ext3 (83ku - 84ku) <-> CP932 (115ku -120ku) */ + s = (c1 << 8) | c; + w = 0; + n = 0; + while (n < cp932ext3_eucjp_table_size) { + if (s == cp932ext3_eucjp_table[n]) { + if (n < (cp932ext3_ucs_table_max - cp932ext3_ucs_table_min)) { + w = cp932ext3_ucs_table[n]; + } + break; + } + n++; + } + } else if (s >= (84*94)) { /* user (85ku - 94ku) */ + w = s - (84*94) + (0xe000 + (94*10)); + } else { + w = 0; + } + + if (w == 0x00A6) { + w = 0xFFE4; /* FULLWIDTH BROKEN BAR */ + } + + if (w <= 0) { + w = MBFL_BAD_INPUT; + } + CK((*filter->output_function)(w, filter->data)); + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + EMPTY_SWITCH_DEFAULT_CASE(); + } + + return 0; +} + +static int mbfl_filt_conv_eucjpwin_wchar_flush(mbfl_convert_filter *filter) +{ + if (filter->status) { + (*filter->output_function)(MBFL_BAD_INPUT, filter->data); + filter->status = 0; + } + + if (filter->flush_function) { + (*filter->flush_function)(filter->data); + } + + return 0; +} + +static int mbfl_filt_conv_wchar_eucjpwin(int c, mbfl_convert_filter *filter) +{ + int c1, c2, s1 = 0; + + if (c == 0xAF) { /* U+00AF is MACRON */ + s1 = 0xA2B4; /* Use JIS X 0212 overline */ + } else if (c == 0x203E) { + s1 = 0x7E; + } else if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) { + s1 = ucs_a1_jis_table[c - ucs_a1_jis_table_min]; + } else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) { + s1 = ucs_a2_jis_table[c - ucs_a2_jis_table_min]; + } else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) { + s1 = ucs_i_jis_table[c - ucs_i_jis_table_min]; + } else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) { + s1 = ucs_r_jis_table[c - ucs_r_jis_table_min]; + } else if (c >= 0xe000 && c < (0xe000 + 10*94)) { /* user (X0208 85ku - 94ku) */ + s1 = c - 0xe000; + c1 = s1/94 + 0x75; + c2 = s1%94 + 0x21; + s1 = (c1 << 8) | c2; + } else if (c >= (0xe000 + 10*94) && c < (0xe000 + 20*94)) { /* user (X0212 85ku - 94ku) */ + s1 = c - (0xe000 + 10*94); + c1 = s1/94 + 0xf5; + c2 = s1%94 + 0xa1; + s1 = (c1 << 8) | c2; + } + + if (s1 == 0xa2f1) { + s1 = 0x2d62; /* NUMERO SIGN */ + } + + if (s1 <= 0) { + if (c == 0xa5) { /* YEN SIGN */ + s1 = 0x5C; + } else if (c == 0x2014) { + s1 = 0x213D; + } else if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */ + s1 = 0x2140; + } else if (c == 0x2225) { /* PARALLEL TO */ + s1 = 0x2142; + } else if (c == 0xff0d) { /* FULLWIDTH HYPHEN-MINUS */ + s1 = 0x215d; + } else if (c == 0xffe0) { /* FULLWIDTH CENT SIGN */ + s1 = 0x2171; + } else if (c == 0xffe1) { /* FULLWIDTH POUND SIGN */ + s1 = 0x2172; + } else if (c == 0xffe2) { /* FULLWIDTH NOT SIGN */ + s1 = 0x224c; + } else { + s1 = -1; + c1 = 0; + c2 = cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; + while (c1 < c2) { /* CP932 vendor ext1 (13ku) */ + const int oh = cp932ext1_ucs_table_min / 94; + + if (c == cp932ext1_ucs_table[c1]) { + s1 = ((c1 / 94 + oh + 0x21) << 8) + (c1 % 94 + 0x21); + break; + } + c1++; + } + if (s1 < 0) { + c1 = 0; + c2 = cp932ext3_ucs_table_max - cp932ext3_ucs_table_min; + while (c1 < c2) { /* CP932 vendor ext3 (115ku - 119ku) */ + if (c == cp932ext3_ucs_table[c1]) { + if (c1 < cp932ext3_eucjp_table_size) { + s1 = cp932ext3_eucjp_table[c1]; + } + break; + } + c1++; + } + } + } + + if (c == 0) { + s1 = 0; + } else if (s1 <= 0) { + s1 = -1; + } + } + + if (s1 >= 0) { + if (s1 < 0x80) { /* latin */ + CK((*filter->output_function)(s1, filter->data)); + } else if (s1 < 0x100) { /* kana */ + CK((*filter->output_function)(0x8e, filter->data)); + CK((*filter->output_function)(s1, filter->data)); + } else if (s1 < 0x8080) { /* X 0208 */ + CK((*filter->output_function)(((s1 >> 8) & 0xff) | 0x80, filter->data)); + CK((*filter->output_function)((s1 & 0xff) | 0x80, filter->data)); + } else { /* X 0212 */ + CK((*filter->output_function)(0x8f, filter->data)); + CK((*filter->output_function)(((s1 >> 8) & 0xff) | 0x80, filter->data)); + CK((*filter->output_function)((s1 & 0xff) | 0x80, filter->data)); + } + } else { + CK(mbfl_filt_conv_illegal_output(c, filter)); + } + + return 0; +} + +static size_t mb_eucjpwin_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) +{ + unsigned char *p = *in, *e = p + *in_len; + uint32_t *out = buf, *limit = buf + bufsize; + + while (p < e && out < limit) { + unsigned char c = *p++; + + if (c < 0x80) { + *out++ = c; + } else if (c >= 0xA1 && c <= 0xFE && p < e) { + unsigned char c2 = *p++; + + if (c2 >= 0xA1 && c2 <= 0xFE) { + unsigned int s = (c - 0xA1)*94 + c2 - 0xA1, w = 0; + + if (s <= 137) { + if (s == 31) { + w = 0xFF3C; /* FULLWIDTH REVERSE SOLIDUS */ + } else if (s == 32) { + w = 0xFF5E; /* FULLWIDTH TILDE */ + } else if (s == 33) { + w = 0x2225; /* PARALLEL TO */ + } else if (s == 60) { + w = 0xFF0D; /* FULLWIDTH HYPHEN-MINUS */ + } else if (s == 80) { + w = 0xFFE0; /* FULLWIDTH CENT SIGN */ + } else if (s == 81) { + w = 0xFFE1; /* FULLWIDTH POUND SIGN */ + } else if (s == 137) { + w = 0xFFE2; /* FULLWIDTH NOT SIGN */ + } + } + + if (w == 0) { + if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) { + w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min]; + } else if (s < jisx0208_ucs_table_size) { + w = jisx0208_ucs_table[s]; + } else if (s >= (84 * 94)) { + w = s - (84 * 94) + 0xE000; + } + } + + if (!w) + w = MBFL_BAD_INPUT; + *out++ = w; + } else { + *out++ = MBFL_BAD_INPUT; + } + } else if (c == 0x8E && p < e) { + unsigned char c2 = *p++; + if (c2 >= 0xA1 && c2 <= 0xDF) { + *out++ = 0xFEC0 + c2; + } else { + *out++ = MBFL_BAD_INPUT; + } + } else if (c == 0x8F && p < e) { + unsigned char c2 = *p++; + if (p == e) { + *out++ = MBFL_BAD_INPUT; + continue; + } + unsigned char c3 = *p++; + + if (c2 >= 0xA1 && c2 <= 0xFE && c3 >= 0xA1 && c3 <= 0xFE) { + unsigned int s = (c2 - 0xA1)*94 + c3 - 0xA1, w = 0; + + if (s < jisx0212_ucs_table_size) { + w = jisx0212_ucs_table[s]; + if (w == 0x7E) + w = 0xFF5E; /* FULLWIDTH TILDE */ + } else if (s >= (82*94) && s < (84*94)) { + s = (c2 << 8) | c3; + for (int i = 0; i < cp932ext3_eucjp_table_size; i++) { + if (cp932ext3_eucjp_table[i] == s) { + w = cp932ext3_ucs_table[i]; + break; + } + } + } else if (s >= (84*94)) { + w = s - (84*94) + 0xE000 + (94*10); + } + + if (w == 0xA6) + w = 0xFFE4; /* FULLWIDTH BROKEN BAR */ + + if (!w) + w = MBFL_BAD_INPUT; + *out++ = w; + } else { + *out++ = MBFL_BAD_INPUT; + } + } else { + *out++ = MBFL_BAD_INPUT; + } + } + + *in_len = e - p; + *in = p; + return out - buf; +} + +static void mb_wchar_to_eucjpwin(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) +{ + unsigned char *out, *limit; + MB_CONVERT_BUF_LOAD(buf, out, limit); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2); + + while (len--) { + uint32_t w = *in++; + unsigned int s = 0; + + if (w == 0) { + out = mb_convert_buf_add(out, 0); + continue; + } else if (w == 0xAF) { /* U+00AF is MACRON */ + s = 0xA2B4; /* Use JIS X 0212 overline */ + } else if (w == 0x203E) { + s = 0x7E; + } else if (w >= ucs_a1_jis_table_min && w < ucs_a1_jis_table_max) { + s = ucs_a1_jis_table[w - ucs_a1_jis_table_min]; + } else if (w >= ucs_a2_jis_table_min && w < ucs_a2_jis_table_max) { + s = ucs_a2_jis_table[w - ucs_a2_jis_table_min]; + } else if (w >= ucs_i_jis_table_min && w < ucs_i_jis_table_max) { + s = ucs_i_jis_table[w - ucs_i_jis_table_min]; + } else if (w >= ucs_r_jis_table_min && w < ucs_r_jis_table_max) { + s = ucs_r_jis_table[w - ucs_r_jis_table_min]; + } else if (w >= 0xE000 && w < (0xE000 + 10*94)) { + s = w - 0xE000; + s = ((s/94 + 0x75) << 8) + (s%94) + 0x21; + } else if (w >= (0xE000 + 10*94) && w < (0xE000 + 20*94)) { + s = w - (0xE000 + 10*94); + s = ((s/94 + 0xF5) << 8) + (s%94) + 0xA1; + } + + if (s == 0xA2F1) + s = 0x2D62; /* NUMERO SIGN */ + + if (s == 0) { + if (w == 0xA5) { /* YEN SIGN */ + s = 0x5C; + } else if (w == 0x2014) { /* EM DASH */ + s = 0x213D; + } else if (w == 0xFF3C) { /* FULLWIDTH REVERSE SOLIDUS */ + s = 0x2140; + } else if (w == 0x2225) { /* PARALLEL TO */ + s = 0x2142; + } else if (w == 0xFF0D) { /* FULLWIDTH HYPHEN-MINUS */ + s = 0x215D; + } else if (w == 0xFFE0) { /* FULLWIDTH CENT SIGN */ + s = 0x2171; + } else if (w == 0xFFE1) { /* FULLWIDTH POUND SIGN */ + s = 0x2172; + } else if (w == 0xFFE2) { /* FULLWIDTH NOT SIGN */ + s = 0x224C; + } else { + for (int i = 0; i < cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; i++) { + if (cp932ext1_ucs_table[i] == w) { + s = (((i/94) + (cp932ext1_ucs_table_min/94) + 0x21) << 8) + (i%94) + 0x21; + break; + } + } + + if (!s) { + for (int i = 0; i < cp932ext3_ucs_table_max - cp932ext3_ucs_table_min; i++) { + if (cp932ext3_ucs_table[i] == w) { + s = cp932ext3_eucjp_table[i]; + break; + } + } + } + } + } + + if (!s) { + MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_eucjpwin); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2); + } else if (s < 0x80) { + out = mb_convert_buf_add(out, s); + } else if (s < 0x100) { + out = mb_convert_buf_add2(out, 0x8E, s); + } else if (s < 0x8080) { + out = mb_convert_buf_add2(out, ((s >> 8) & 0xFF) | 0x80, (s & 0xFF) | 0x80); + } else { + MB_CONVERT_BUF_ENSURE(buf, out, limit, (len * 2) + 3); + out = mb_convert_buf_add3(out, 0x8F, ((s >> 8) & 0xFF) | 0x80, (s & 0xFF) | 0x80); + } + } + + MB_CONVERT_BUF_STORE(buf, out, limit); +} + +static int mbfl_filt_conv_cp51932_wchar(int c, mbfl_convert_filter *filter) +{ + int c1, s, w; + + switch (filter->status) { + case 0: + if (c >= 0 && c < 0x80) { /* latin */ + CK((*filter->output_function)(c, filter->data)); + } else if (c >= 0xA1 && c <= 0xFE) { /* CP932, first byte */ + filter->status = 1; + filter->cache = c; + } else if (c == 0x8e) { /* kana first char */ + filter->status = 2; + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + case 1: /* got first half */ + filter->status = 0; + c1 = filter->cache; + if (c > 0xa0 && c < 0xff) { + w = 0; + s = (c1 - 0xa1)*94 + c - 0xa1; + if (s <= 137) { + if (s == 31) { + w = 0xff3c; /* FULLWIDTH REVERSE SOLIDUS */ + } else if (s == 32) { + w = 0xff5e; /* FULLWIDTH TILDE */ + } else if (s == 33) { + w = 0x2225; /* PARALLEL TO */ + } else if (s == 60) { + w = 0xff0d; /* FULLWIDTH HYPHEN-MINUS */ + } else if (s == 80) { + w = 0xffe0; /* FULLWIDTH CENT SIGN */ + } else if (s == 81) { + w = 0xffe1; /* FULLWIDTH POUND SIGN */ + } else if (s == 137) { + w = 0xffe2; /* FULLWIDTH NOT SIGN */ + } + } + if (w == 0) { + if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) { /* vendor ext1 (13ku) */ + w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min]; + } else if (s >= 0 && s < jisx0208_ucs_table_size) { /* X 0208 */ + w = jisx0208_ucs_table[s]; + } else if (s >= cp932ext2_ucs_table_min && s < cp932ext2_ucs_table_max) { /* vendor ext2 (89ku - 92ku) */ + w = cp932ext2_ucs_table[s - cp932ext2_ucs_table_min]; + } + } + if (w <= 0) { + w = MBFL_BAD_INPUT; + } + CK((*filter->output_function)(w, filter->data)); + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + case 2: /* got 0x8e, X0201 kana */ + filter->status = 0; + if (c > 0xa0 && c < 0xe0) { + w = 0xfec0 + c; + CK((*filter->output_function)(w, filter->data)); + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + EMPTY_SWITCH_DEFAULT_CASE(); + } + + return 0; +} + +static int mbfl_filt_conv_cp51932_wchar_flush(mbfl_convert_filter *filter) +{ + if (filter->status) { + /* Input string was truncated */ + (*filter->output_function)(MBFL_BAD_INPUT, filter->data); + filter->status = 0; + } + + if (filter->flush_function) { + (*filter->flush_function)(filter->data); + } + + return 0; +} + +static int mbfl_filt_conv_wchar_cp51932(int c, mbfl_convert_filter *filter) +{ + int c1, c2, s1; + + s1 = 0; + if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) { + s1 = ucs_a1_jis_table[c - ucs_a1_jis_table_min]; + } else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) { + s1 = ucs_a2_jis_table[c - ucs_a2_jis_table_min]; + } else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) { + s1 = ucs_i_jis_table[c - ucs_i_jis_table_min]; + } else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) { + s1 = ucs_r_jis_table[c - ucs_r_jis_table_min]; + } + if (s1 >= 0x8080) s1 = -1; /* we don't support JIS X0213 */ + if (s1 <= 0) { + if (c == 0xa5) { /* YEN SIGN */ + s1 = 0x216F; /* FULLWIDTH YEN SIGN */ + } else if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */ + s1 = 0x2140; + } else if (c == 0x2225) { /* PARALLEL TO */ + s1 = 0x2142; + } else if (c == 0xff0d) { /* FULLWIDTH HYPHEN-MINUS */ + s1 = 0x215d; + } else if (c == 0xffe0) { /* FULLWIDTH CENT SIGN */ + s1 = 0x2171; + } else if (c == 0xffe1) { /* FULLWIDTH POUND SIGN */ + s1 = 0x2172; + } else if (c == 0xffe2) { /* FULLWIDTH NOT SIGN */ + s1 = 0x224c; + } else { + s1 = -1; + c1 = 0; + c2 = cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; + while (c1 < c2) { /* CP932 vendor ext1 (13ku) */ + if (c == cp932ext1_ucs_table[c1]) { + s1 = ((c1/94 + 0x2d) << 8) + (c1%94 + 0x21); + break; + } + c1++; + } + if (s1 < 0) { + c1 = 0; + c2 = cp932ext2_ucs_table_max - cp932ext2_ucs_table_min; + while (c1 < c2) { /* CP932 vendor ext3 (115ku - 119ku) */ + if (c == cp932ext2_ucs_table[c1]) { + s1 = ((c1/94 + 0x79) << 8) +(c1%94 + 0x21); + break; + } + c1++; + } + } + } + if (c == 0) { + s1 = 0; + } else if (s1 <= 0) { + s1 = -1; + } + } + + if (s1 >= 0) { + if (s1 < 0x80) { /* latin */ + CK((*filter->output_function)(s1, filter->data)); + } else if (s1 < 0x100) { /* kana */ + CK((*filter->output_function)(0x8e, filter->data)); + CK((*filter->output_function)(s1, filter->data)); + } else if (s1 < 0x8080) { /* X 0208 */ + CK((*filter->output_function)(((s1 >> 8) & 0xff) | 0x80, filter->data)); + CK((*filter->output_function)((s1 & 0xff) | 0x80, filter->data)); + } else { + CK(mbfl_filt_conv_illegal_output(c, filter)); + } + } else { + CK(mbfl_filt_conv_illegal_output(c, filter)); + } + + return 0; +} + +static size_t mb_cp51932_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) +{ + unsigned char *p = *in, *e = p + *in_len; + uint32_t *out = buf, *limit = buf + bufsize; + + while (p < e && out < limit) { + unsigned char c = *p++; + + if (c < 0x80) { + *out++ = c; + } else if (c >= 0xA1 && c <= 0xFE && p < e) { + unsigned char c2 = *p++; + if (c2 >= 0xA1 && c2 <= 0xFE) { + unsigned int s = (c - 0xA1)*94 + c2 - 0xA1, w = 0; + + if (s <= 137) { + if (s == 31) { + w = 0xFF3C; /* FULLWIDTH REVERSE SOLIDUS */ + } else if (s == 32) { + w = 0xFF5E; /* FULLWIDTH TILDE */ + } else if (s == 33) { + w = 0x2225; /* PARALLEL TO */ + } else if (s == 60) { + w = 0xFF0D; /* FULLWIDTH HYPHEN-MINUS */ + } else if (s == 80) { + w = 0xFFE0; /* FULLWIDTH CENT SIGN */ + } else if (s == 81) { + w = 0xFFE1; /* FULLWIDTH POUND SIGN */ + } else if (s == 137) { + w = 0xFFE2; /* FULLWIDTH NOT SIGN */ + } + } + + if (w == 0) { + if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) { + w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min]; + } else if (s < jisx0208_ucs_table_size) { + w = jisx0208_ucs_table[s]; + } else if (s >= cp932ext2_ucs_table_min && s < cp932ext2_ucs_table_max) { + w = cp932ext2_ucs_table[s - cp932ext2_ucs_table_min]; + } + } + + if (!w) + w = MBFL_BAD_INPUT; + *out++ = w; + } else { + *out++ = MBFL_BAD_INPUT; + } + } else if (c == 0x8E && p < e) { + unsigned char c2 = *p++; + if (c2 >= 0xA1 && c2 <= 0xDF) { + *out++ = 0xFEC0 + c2; + } else { + *out++ = MBFL_BAD_INPUT; + } + } else { + *out++ = MBFL_BAD_INPUT; + } + } + + *in_len = e - p; + *in = p; + return out - buf; +} + +static void mb_wchar_to_cp51932(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) +{ + unsigned char *out, *limit; + MB_CONVERT_BUF_LOAD(buf, out, limit); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2); + + while (len--) { + uint32_t w = *in++; + unsigned int s = 0; + + if (w == 0) { + out = mb_convert_buf_add(out, 0); + continue; + } else if (w >= ucs_a1_jis_table_min && w < ucs_a1_jis_table_max) { + s = ucs_a1_jis_table[w - ucs_a1_jis_table_min]; + } else if (w >= ucs_a2_jis_table_min && w < ucs_a2_jis_table_max) { + s = ucs_a2_jis_table[w - ucs_a2_jis_table_min]; + } else if (w >= ucs_i_jis_table_min && w < ucs_i_jis_table_max) { + s = ucs_i_jis_table[w - ucs_i_jis_table_min]; + } else if (w >= ucs_r_jis_table_min && w < ucs_r_jis_table_max) { + s = ucs_r_jis_table[w - ucs_r_jis_table_min]; + } + + if (s >= 0x8080) s = 0; /* We don't support JIS X0213 */ + + if (s == 0) { + if (w == 0xA5) { /* YEN SIGN */ + s = 0x216F; /* FULLWIDTH YEN SIGN */ + } else if (w == 0xFF3C) { /* FULLWIDTH REVERSE SOLIDUS */ + s = 0x2140; + } else if (w == 0x2225) { /* PARALLEL TO */ + s = 0x2142; + } else if (w == 0xFF0D) { /* FULLWIDTH HYPHEN-MINUS */ + s = 0x215D; + } else if (w == 0xFFE0) { /* FULLWIDTH CENT SIGN */ + s = 0x2171; + } else if (w == 0xFFE1) { /* FULLWIDTH POUND SIGN */ + s = 0x2172; + } else if (w == 0xFFE2) { /* FULLWIDTH NOT SIGN */ + s = 0x224C; + } else { + for (int i = 0; i < cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; i++) { + if (cp932ext1_ucs_table[i] == w) { + s = ((i/94 + 0x2D) << 8) + (i%94) + 0x21; + goto found_it; + } + } + + for (int i = 0; i < cp932ext2_ucs_table_max - cp932ext2_ucs_table_min; i++) { + if (cp932ext2_ucs_table[i] == w) { + s = ((i/94 + 0x79) << 8) + (i%94) + 0x21; + goto found_it; + } + } + } +found_it: ; + } + + if (!s || s >= 0x8080) { + MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_cp51932); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2); + } else if (s < 0x80) { + out = mb_convert_buf_add(out, s); + } else if (s < 0x100) { + out = mb_convert_buf_add2(out, 0x8E, s); + } else { + out = mb_convert_buf_add2(out, ((s >> 8) & 0xFF) | 0x80, (s & 0xFF) | 0x80); + } + } + + MB_CONVERT_BUF_STORE(buf, out, limit); +} + +static size_t mb_eucjp2004_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) +{ + unsigned char *p = *in, *e = p + *in_len; + uint32_t *out = buf, *limit = buf + bufsize - 1; + + while (p < e && out < limit) { + unsigned char c = *p++; + + if (c <= 0x7F) { + *out++ = c; + } else if (c >= 0xA1 && c <= 0xFE) { + /* Kanji */ + if (p == e) { + *out++ = MBFL_BAD_INPUT; + break; + } + unsigned char c2 = *p++; + if (c2 <= 0xA0 || c2 == 0xFF) { + *out++ = MBFL_BAD_INPUT; + continue; + } + + unsigned int s1 = c - 0x80, s2 = c2 - 0x80; + unsigned int w1 = (s1 << 8) | s2, w = 0; + + /* Conversion for combining characters */ + if ((w1 >= 0x2477 && w1 <= 0x2479) || (w1 >= 0x2479 && w1 <= 0x247B) || (w1 >= 0x2577 && w1 <= 0x257E) || w1 == 0x2678 || w1 == 0x2B44 || (w1 >= 0x2B48 && w1 <= 0x2B4F) || (w1 >= 0x2B65 && w1 <= 0x2B66)) { + int k = mbfl_bisec_srch2(w1, jisx0213_u2_key, jisx0213_u2_tbl_len); + if (k >= 0) { + *out++ = jisx0213_u2_tbl[2*k]; + *out++ = jisx0213_u2_tbl[2*k+1]; + continue; + } + } + + /* Conversion for BMP */ + w1 = (s1 - 0x21)*94 + s2 - 0x21; + if (w1 < jisx0213_ucs_table_size) { + w = jisx0213_ucs_table[w1]; + } + + /* Conversion for CJK Unified Ideographs ext.B (U+2XXXX) */ + if (!w) { + int k = mbfl_bisec_srch2(w1, jisx0213_jis_u5_key, jisx0213_u5_tbl_len); + if (k >= 0) { + w = jisx0213_jis_u5_tbl[k] + 0x20000; + } + } + + *out++ = w ? w : MBFL_BAD_INPUT; + } else if (c == 0x8E && p < e) { + /* Kana */ + unsigned char c2 = *p++; + if (c2 >= 0xA1 && c2 <= 0xDF) { + *out++ = 0xFEC0 + c2; + } else { + *out++ = MBFL_BAD_INPUT; + } + } else if (c == 0x8F && p < e) { + unsigned char c2 = *p++; + if ((c2 == 0xA1 || (c2 >= 0xA3 && c2 <= 0xA5) || c2 == 0xA8 || (c2 >= 0xAC && c2 <= 0xAF) || (c2 >= 0xEE && c2 <= 0xFE)) && p < e) { + unsigned char c3 = *p++; + + if (c3 < 0xA1 || c3 == 0xFF) { + *out++ = MBFL_BAD_INPUT; + continue; + } + + unsigned int s1 = c2 - 0xA1, s2 = c3 - 0xA1; + + if (((s1 <= 4 && s1 != 1) || s1 == 7 || (s1 >= 11 && s1 <= 14) || (s1 >= 77 && s1 < 94)) && s2 < 94) { + int k; + for (k = 0; k < jisx0213_p2_ofst_len; k++) { + if (s1 == jisx0213_p2_ofst[k]) { + break; + } + } + k -= jisx0213_p2_ofst[k]; + + /* Check for Japanese chars in BMP */ + unsigned int s = (s1 + 94 + k)*94 + s2; + ZEND_ASSERT(s < jisx0213_ucs_table_size); + unsigned int w = jisx0213_ucs_table[s]; + + /* Check for Japanese chars in CJK Unified Ideographs ext B (U+2XXXX) */ + if (!w) { + k = mbfl_bisec_srch2(s, jisx0213_jis_u5_key, jisx0213_u5_tbl_len); + if (k >= 0) { + w = jisx0213_jis_u5_tbl[k] + 0x20000; + } + } + + *out++ = w ? w : MBFL_BAD_INPUT; + } else { + *out++ = MBFL_BAD_INPUT; + } + } else { + *out++ = MBFL_BAD_INPUT; + } + } else { + *out++ = MBFL_BAD_INPUT; + } + } + + *in_len = e - p; + *in = p; + return out - buf; +} + +static void mb_wchar_to_eucjp2004(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) +{ + unsigned char *out, *limit; + MB_CONVERT_BUF_LOAD(buf, out, limit); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len); + + uint32_t w; + if (buf->state) { + w = buf->state; + buf->state = 0; + goto process_codepoint; + } + + while (len--) { + w = *in++; +process_codepoint: ; + unsigned int s = 0; + + /* Check for 1st char of combining characters */ + if (w == 0xE6 || (w >= 0x254 && w <= 0x2E9) || (w >= 0x304B && w <= 0x3053) || (w >= 0x30AB && w <= 0x30C8) || w == 0x31F7) { + for (int k = 0; k < jisx0213_u2_tbl_len; k++) { + if (w == jisx0213_u2_tbl[2*k]) { + if (!len) { + if (!end) { + buf->state = w; + MB_CONVERT_BUF_STORE(buf, out, limit); + return; + } + } else { + uint32_t w2 = *in++; len--; + if ((w == 0x254 || w == 0x28C || w == 0x259 || w == 0x25A) && w2 == 0x301) { + k++; + } + if (w2 == jisx0213_u2_tbl[2*k+1]) { + s = jisx0213_u2_key[k]; + break; + } + in--; len++; + } + + /* Fallback */ + s = jisx0213_u2_fb_tbl[k]; + break; + } + } + } + + /* Check for major Japanese chars: U+4E00-U+9FFF */ + if (!s) { + for (int k = 0; k < uni2jis_tbl_len; k++) { + if (w >= uni2jis_tbl_range[k][0] && w <= uni2jis_tbl_range[k][1]) { + s = uni2jis_tbl[k][w - uni2jis_tbl_range[k][0]]; + break; + } + } + } + + /* Check for Japanese chars in compressed mapping area: U+1E00-U+4DBF */ + if (!s && w >= ucs_c1_jisx0213_min && w <= ucs_c1_jisx0213_max) { + int k = mbfl_bisec_srch(w, ucs_c1_jisx0213_tbl, ucs_c1_jisx0213_tbl_len); + if (k >= 0) { + s = ucs_c1_jisx0213_ofst[k] + w - ucs_c1_jisx0213_tbl[2*k]; + } + } + + /* Check for Japanese chars in CJK Unified Ideographs ext.B (U+2XXXX) */ + if (!s && w >= jisx0213_u5_tbl_min && w <= jisx0213_u5_tbl_max) { + int k = mbfl_bisec_srch2(w - 0x20000, jisx0213_u5_jis_key, jisx0213_u5_tbl_len); + if (k >= 0) { + s = jisx0213_u5_jis_tbl[k]; + } + } + + if (!s) { + /* CJK Compatibility Forms: U+FE30-U+FE4F */ + if (w == 0xFE45) { + s = 0x233E; + } else if (w == 0xFE46) { + s = 0x233D; + } else if (w >= 0xF91D && w <= 0xF9DC) { + /* CJK Compatibility Ideographs: U+F900-U+F92A */ + int k = mbfl_bisec_srch2(w, ucs_r2b_jisx0213_cmap_key, ucs_r2b_jisx0213_cmap_len); + if (k >= 0) { + s = ucs_r2b_jisx0213_cmap_val[k]; + } + } + } + + if (!s && w) { + MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_eucjp2004); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len); + } else if (s <= 0x7F) { + out = mb_convert_buf_add(out, s); + } else if (s <= 0xFF) { + MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2); + out = mb_convert_buf_add2(out, 0x8E, s); + } else if (s <= 0x7EFF) { + MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2); + out = mb_convert_buf_add2(out, ((s >> 8) & 0xFF) + 0x80, (s & 0xFF) + 0x80); + } else { + unsigned int s2 = s & 0xFF; + int k = ((s >> 8) & 0xFF) - 0x7F; + ZEND_ASSERT(k < jisx0213_p2_ofst_len); + s = jisx0213_p2_ofst[k] + 0x21; + MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 3); + out = mb_convert_buf_add3(out, 0x8F, s | 0x80, s2 | 0x80); + } + } + + MB_CONVERT_BUF_STORE(buf, out, limit); +} + +static int mbfl_filt_conv_euccn_wchar(int c, mbfl_convert_filter *filter) +{ + int c1, w; + + switch (filter->status) { + case 0: + if (c >= 0 && c < 0x80) { /* latin */ + CK((*filter->output_function)(c, filter->data)); + } else if ((c >= 0xA1 && c <= 0xA9) || (c >= 0xB0 && c <= 0xF7)) { /* dbcs lead byte */ + filter->status = 1; + filter->cache = c; + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + case 1: /* dbcs second byte */ + filter->status = 0; + c1 = filter->cache; + if (c > 0xA0 && c < 0xFF) { + w = (c1 - 0x81)*192 + c - 0x40; + ZEND_ASSERT(w < cp936_ucs_table_size); + if (w == 0x1864) { + w = 0x30FB; + } else if (w == 0x186A) { + w = 0x2015; + } else if ((w >= 0x1921 && w <= 0x192A) || w == 0x1963 || (w >= 0x1C59 && w <= 0x1C7E) || (w >= 0x1DBB && w <= 0x1DC4)) { + w = 0; + } else { + w = cp936_ucs_table[w]; + } + + if (w <= 0) { + w = MBFL_BAD_INPUT; + } + + CK((*filter->output_function)(w, filter->data)); + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + EMPTY_SWITCH_DEFAULT_CASE(); + } + + return 0; +} + +static int mbfl_filt_conv_wchar_euccn(int c, mbfl_convert_filter *filter) +{ + int s = 0; + + if (c >= ucs_a1_cp936_table_min && c < ucs_a1_cp936_table_max) { + if (c == 0xB7 || c == 0x144 || c == 0x148 || c == 0x251 || c == 0x261) { + s = 0; + } else { + s = ucs_a1_cp936_table[c - ucs_a1_cp936_table_min]; + } + } else if (c >= ucs_a2_cp936_table_min && c < ucs_a2_cp936_table_max) { + if (c == 0x2015) { + s = 0xA1AA; + } else if (c == 0x2014 || (c >= 0x2170 && c <= 0x2179)) { + s = 0; + } else { + s = ucs_a2_cp936_table[c - ucs_a2_cp936_table_min]; + } + } else if (c >= ucs_a3_cp936_table_min && c < ucs_a3_cp936_table_max) { + if (c == 0x30FB) { + s = 0xA1A4; + } else { + s = ucs_a3_cp936_table[c - ucs_a3_cp936_table_min]; + } + } else if (c >= ucs_i_cp936_table_min && c < ucs_i_cp936_table_max) { + s = ucs_i_cp936_table[c - ucs_i_cp936_table_min]; + } else if (c >= ucs_hff_cp936_table_min && c < ucs_hff_cp936_table_max) { + if (c == 0xFF04) { + s = 0xA1E7; + } else if (c == 0xFF5E) { + s = 0xA1AB; + } else if (c >= 0xFF01 && c <= 0xFF5D) { + s = c - 0xFF01 + 0xA3A1; + } else if (c >= 0xFFE0 && c <= 0xFFE5) { + s = ucs_hff_s_cp936_table[c - 0xFFE0]; + } + } + + /* exclude CP936 extensions */ + if (((s >> 8) & 0xFF) < 0xA1 || (s & 0xFF) < 0xA1) { + s = 0; + } + + if (s <= 0) { + if (c < 0x80) { + s = c; + } else if (s <= 0) { + s = -1; + } + } + + if (s >= 0) { + if (s < 0x80) { /* latin */ + CK((*filter->output_function)(s, filter->data)); + } else { + CK((*filter->output_function)((s >> 8) & 0xFF, filter->data)); + CK((*filter->output_function)(s & 0xFF, filter->data)); + } + } else { + CK(mbfl_filt_conv_illegal_output(c, filter)); + } + + return 0; +} + +static int mbfl_filt_conv_euccn_wchar_flush(mbfl_convert_filter *filter) +{ + if (filter->status == 1) { + /* 2-byte character was truncated */ + filter->status = 0; + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + + if (filter->flush_function) { + (*filter->flush_function)(filter->data); + } + + return 0; +} + +static size_t mb_euccn_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) +{ + unsigned char *p = *in, *e = p + *in_len; + uint32_t *out = buf, *limit = buf + bufsize; + + while (p < e && out < limit) { + unsigned char c = *p++; + + if (c < 0x80) { + *out++ = c; + } else if (((c >= 0xA1 && c <= 0xA9) || (c >= 0xB0 && c <= 0xF7)) && p < e) { + unsigned char c2 = *p++; + + if (c2 >= 0xA1 && c2 <= 0xFE) { + unsigned int w = (c - 0x81)*192 + c2 - 0x40; + ZEND_ASSERT(w < cp936_ucs_table_size); + if (w == 0x1864) { + w = 0x30FB; + } else if (w == 0x186A) { + w = 0x2015; + } else if ((w >= 0x1921 && w <= 0x192A) || w == 0x1963 || (w >= 0x1C59 && w <= 0x1C7E) || (w >= 0x1DBB && w <= 0x1DC4)) { + w = 0; + } else { + w = cp936_ucs_table[w]; + } + + if (!w) + w = MBFL_BAD_INPUT; + *out++ = w; + } else { + *out++ = MBFL_BAD_INPUT; + } + } else { + *out++ = MBFL_BAD_INPUT; + } + } + + *in_len = e - p; + *in = p; + return out - buf; +} + +static void mb_wchar_to_euccn(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) +{ + unsigned char *out, *limit; + MB_CONVERT_BUF_LOAD(buf, out, limit); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2); + + while (len--) { + uint32_t w = *in++; + unsigned int s = 0; + + if (w >= ucs_a1_cp936_table_min && w < ucs_a1_cp936_table_max) { + if (w != 0xB7 && w != 0x144 && w != 0x148 && w != 0x251 && w != 0x261) { + s = ucs_a1_cp936_table[w - ucs_a1_cp936_table_min]; + } + } else if (w >= ucs_a2_cp936_table_min && w < ucs_a2_cp936_table_max) { + if (w == 0x2015) { + s = 0xA1AA; + } else if (w != 0x2014 && (w < 0x2170 || w > 0x2179)) { + s = ucs_a2_cp936_table[w - ucs_a2_cp936_table_min]; + } + } else if (w >= ucs_a3_cp936_table_min && w < ucs_a3_cp936_table_max) { + if (w == 0x30FB) { + s = 0xA1A4; + } else { + s = ucs_a3_cp936_table[w - ucs_a3_cp936_table_min]; + } + } else if (w >= ucs_i_cp936_table_min && w < ucs_i_cp936_table_max) { + s = ucs_i_cp936_table[w - ucs_i_cp936_table_min]; + } else if (w >= ucs_hff_cp936_table_min && w < ucs_hff_cp936_table_max) { + if (w == 0xFF04) { + s = 0xA1E7; + } else if (w == 0xFF5E) { + s = 0xA1AB; + } else if (w >= 0xFF01 && w <= 0xFF5D) { + s = w - 0xFF01 + 0xA3A1; + } else if (w >= 0xFFE0 && w <= 0xFFE5) { + s = ucs_hff_s_cp936_table[w - 0xFFE0]; + } + } + + /* Exclude CP936 extensions */ + if (((s >> 8) & 0xFF) < 0xA1 || (s & 0xFF) < 0xA1) { + s = 0; + } + + if (!s) { + if (w < 0x80) { + out = mb_convert_buf_add(out, w); + } else { + MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_euccn); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2); + } + } else if (s < 0x80) { + out = mb_convert_buf_add(out, s); + } else { + out = mb_convert_buf_add2(out, (s >> 8) & 0xFF, s & 0xFF); + } + } + + MB_CONVERT_BUF_STORE(buf, out, limit); +} + +static int mbfl_filt_conv_euctw_wchar(int c, mbfl_convert_filter *filter) +{ + int c1, s, w; + + switch (filter->status) { + case 0: + if (c >= 0 && c < 0x80) { /* latin */ + CK((*filter->output_function)(c, filter->data)); + } else if (((c >= 0xA1 && c <= 0xA6) || (c >= 0xC2 && c <= 0xFD)) && c != 0xC3) { /* 2-byte character, first byte */ + filter->status = 1; + filter->cache = c; + } else if (c == 0x8E) { /* 4-byte character, first byte */ + filter->status = 2; + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + case 1: /* 2-byte character, second byte */ + filter->status = 0; + c1 = filter->cache; + if (c > 0xA0 && c < 0xFF) { + w = (c1 - 0xA1)*94 + (c - 0xA1); + if (w >= 0 && w < cns11643_1_ucs_table_size) { + w = cns11643_1_ucs_table[w]; + } else { + w = 0; + } + + if (w <= 0) { + w = MBFL_BAD_INPUT; + } + + CK((*filter->output_function)(w, filter->data)); + } else { + filter->status = filter->cache = 0; + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + case 2: /* got 0x8e, second byte */ + if (c == 0xA1 || c == 0xA2 || c == 0xAE) { + filter->status = 3; + filter->cache = c - 0xA1; + } else { + filter->status = filter->cache = 0; + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + case 3: /* got 0x8e, third byte */ + filter->status = 0; + c1 = filter->cache; + if (c >= 0xA1 && ((c1 == 0 && ((c >= 0xA1 && c <= 0xA6) || (c >= 0xC2 && c <= 0xFD)) && c != 0xC3) || + (c1 == 1 && c <= 0xF2) || (c1 == 13 && c <= 0xE7))) { + filter->status = 4; + filter->cache = (c1 << 8) + c - 0xA1; + } else { + filter->status = filter->cache = 0; + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + case 4: /* multi-byte character, fourth byte */ + filter->status = 0; + c1 = filter->cache; + if (c1 <= 0xDFF && c > 0xA0 && c < 0xFF) { + int plane = (c1 & 0xF00) >> 8; /* This is actually the CNS-11643 plane minus one */ + s = (c1 & 0xFF)*94 + c - 0xA1; + w = 0; + if (s >= 0) { + /* A later version of CNS-11643 moved all the characters in "plane 14" to "plane 3", + * and added tens of thousands more characters in planes 4, 5, 6, and 7 + * We only support the older version of CNS-11643 + * This is the same as iconv from glibc 2.2 */ + if (plane == 0 && s < cns11643_1_ucs_table_size) { + w = cns11643_1_ucs_table[s]; + } else if (plane == 1 && s < cns11643_2_ucs_table_size) { + w = cns11643_2_ucs_table[s]; + } else if (plane == 13 && s < cns11643_14_ucs_table_size) { + w = cns11643_14_ucs_table[s]; + } + } + + if (w <= 0) { + w = MBFL_BAD_INPUT; + } + + CK((*filter->output_function)(w, filter->data)); + } else { + filter->status = filter->cache = 0; + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + EMPTY_SWITCH_DEFAULT_CASE(); + } + + return 0; +} + +static int mbfl_filt_conv_wchar_euctw(int c, mbfl_convert_filter *filter) +{ + int s = 0; + + if (c >= ucs_a1_cns11643_table_min && c < ucs_a1_cns11643_table_max) { + s = ucs_a1_cns11643_table[c - ucs_a1_cns11643_table_min]; + } else if (c >= ucs_a2_cns11643_table_min && c < ucs_a2_cns11643_table_max) { + s = ucs_a2_cns11643_table[c - ucs_a2_cns11643_table_min]; + } else if (c >= ucs_a3_cns11643_table_min && c < ucs_a3_cns11643_table_max) { + s = ucs_a3_cns11643_table[c - ucs_a3_cns11643_table_min]; + } else if (c >= ucs_i_cns11643_table_min && c < ucs_i_cns11643_table_max) { + s = ucs_i_cns11643_table[c - ucs_i_cns11643_table_min]; + } else if (c >= ucs_r_cns11643_table_min && c < ucs_r_cns11643_table_max) { + s = ucs_r_cns11643_table[c - ucs_r_cns11643_table_min]; + } + + if (s <= 0) { + if (c == 0) { + s = 0; + } else if (s <= 0) { + s = -1; + } + } + + if (s >= 0) { + int plane = (s & 0x1F0000) >> 16; + if (plane <= 1) { + if (s < 0x80) { /* latin */ + CK((*filter->output_function)(s, filter->data)); + } else { + s = (s & 0xFFFF) | 0x8080; + CK((*filter->output_function)((s >> 8) & 0xFF, filter->data)); + CK((*filter->output_function)(s & 0xFF, filter->data)); + } + } else { + s = (0x8EA00000 + (plane << 16)) | ((s & 0xFFFF) | 0x8080); + CK((*filter->output_function)(0x8e , filter->data)); + CK((*filter->output_function)((s >> 16) & 0xFF, filter->data)); + CK((*filter->output_function)((s >> 8) & 0xFF, filter->data)); + CK((*filter->output_function)(s & 0xFF, filter->data)); + } + } else { + CK(mbfl_filt_conv_illegal_output(c, filter)); + } + return 0; +} + +static int mbfl_filt_conv_euctw_wchar_flush(mbfl_convert_filter *filter) +{ + if (filter->status) { + /* 2-byte or 4-byte character was truncated */ + filter->status = 0; + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + + if (filter->flush_function) { + (*filter->flush_function)(filter->data); + } + + return 0; +} + +static size_t mb_euctw_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) +{ + unsigned char *p = *in, *e = p + *in_len; + uint32_t *out = buf, *limit = buf + bufsize; + + while (p < e && out < limit) { + unsigned char c = *p++; + + if (c < 0x80) { + *out++ = c; + } else if (((c >= 0xA1 && c <= 0xA6) || (c >= 0xC2 && c <= 0xFD)) && c != 0xC3 && p < e) { + unsigned char c2 = *p++; + + if (c2 >= 0xA1 && c2 <= 0xFE) { + unsigned int w = (c - 0xA1)*94 + (c2 - 0xA1); + if (w < cns11643_1_ucs_table_size) { + w = cns11643_1_ucs_table[w]; + } else { + w = 0; + } + if (!w) + w = MBFL_BAD_INPUT; + *out++ = w; + } else { + *out++ = MBFL_BAD_INPUT; + } + } else if (c == 0x8E && p < e) { + unsigned char c2 = *p++; + + if ((c2 == 0xA1 || c2 == 0xA2 || c2 == 0xAE) && p < e) { + unsigned int plane = c2 - 0xA1; /* This is actually the CNS-11643 plane minus one */ + unsigned char c3 = *p++; + + if (c3 >= 0xA1 && ((plane == 0 && ((c3 >= 0xA1 && c3 <= 0xA6) || (c3 >= 0xC2 && c3 <= 0xFD)) && c3 != 0xC3) || (plane == 1 && c3 <= 0xF2) || (plane == 13 && c3 <= 0xE7)) && p < e) { + unsigned char c4 = *p++; + + if (c2 <= 0xAE && c4 > 0xA0 && c4 < 0xFF) { + unsigned int s = (c3 - 0xA1)*94 + c4 - 0xA1, w = 0; + + /* A later version of CNS-11643 moved all the characters in "plane 14" to "plane 3", + * and added tens of thousands more characters in planes 4, 5, 6, and 7 + * We only support the older version of CNS-11643 + * This is the same as iconv from glibc 2.2 */ + if (plane == 0 && s < cns11643_1_ucs_table_size) { + w = cns11643_1_ucs_table[s]; + } else if (plane == 1 && s < cns11643_2_ucs_table_size) { + w = cns11643_2_ucs_table[s]; + } else if (plane == 13 && s < cns11643_14_ucs_table_size) { + w = cns11643_14_ucs_table[s]; + } + + if (!w) + w = MBFL_BAD_INPUT; + *out++ = w; + continue; + } + } + } + + *out++ = MBFL_BAD_INPUT; + } else { + *out++ = MBFL_BAD_INPUT; + } + } + + *in_len = e - p; + *in = p; + return out - buf; +} + +static void mb_wchar_to_euctw(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) +{ + unsigned char *out, *limit; + MB_CONVERT_BUF_LOAD(buf, out, limit); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2); + + while (len--) { + uint32_t w = *in++; + unsigned int s = 0; + + if (w >= ucs_a1_cns11643_table_min && w < ucs_a1_cns11643_table_max) { + s = ucs_a1_cns11643_table[w - ucs_a1_cns11643_table_min]; + } else if (w >= ucs_a2_cns11643_table_min && w < ucs_a2_cns11643_table_max) { + s = ucs_a2_cns11643_table[w - ucs_a2_cns11643_table_min]; + } else if (w >= ucs_a3_cns11643_table_min && w < ucs_a3_cns11643_table_max) { + s = ucs_a3_cns11643_table[w - ucs_a3_cns11643_table_min]; + } else if (w >= ucs_i_cns11643_table_min && w < ucs_i_cns11643_table_max) { + s = ucs_i_cns11643_table[w - ucs_i_cns11643_table_min]; + } else if (w >= ucs_r_cns11643_table_min && w < ucs_r_cns11643_table_max) { + s = ucs_r_cns11643_table[w - ucs_r_cns11643_table_min]; + } + + if (!s) { + if (w == 0) { + out = mb_convert_buf_add(out, 0); + } else { + MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_euctw); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2); + } + } else { + unsigned int plane = s >> 16; + if (plane <= 1) { + if (s < 0x80) { + out = mb_convert_buf_add(out, s); + } else { + out = mb_convert_buf_add2(out, ((s >> 8) & 0xFF) | 0x80, (s & 0xFF) | 0x80); + } + } else { + MB_CONVERT_BUF_ENSURE(buf, out, limit, (len * 2) + 4); + out = mb_convert_buf_add4(out, 0x8E, 0xA0 + plane, ((s >> 8) & 0xFF) | 0x80, (s & 0xFF) | 0x80); + } + } + } + + MB_CONVERT_BUF_STORE(buf, out, limit); +} + +static int mbfl_filt_conv_euckr_wchar(int c, mbfl_convert_filter *filter) +{ + int c1, w, flag; + + switch (filter->status) { + case 0: + if (c >= 0 && c < 0x80) { /* latin */ + CK((*filter->output_function)(c, filter->data)); + } else if (((c >= 0xA1 && c <= 0xAC) || (c >= 0xB0 && c <= 0xFD)) && c != 0xC9) { /* dbcs lead byte */ + filter->status = 1; + filter->cache = c; + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + case 1: /* dbcs second byte */ + filter->status = 0; + c1 = filter->cache; + flag = 0; + if (c1 >= 0xa1 && c1 <= 0xc6) { + flag = 1; + } else if (c1 >= 0xc7 && c1 <= 0xfe && c1 != 0xc9) { + flag = 2; + } + if (flag > 0 && c >= 0xa1 && c <= 0xfe) { + if (flag == 1) { /* 1st: 0xa1..0xc6, 2nd: 0x41..0x7a, 0x81..0xfe */ + w = (c1 - 0x81)*190 + c - 0x41; + ZEND_ASSERT(w < uhc1_ucs_table_size); + w = uhc1_ucs_table[w]; + } else { /* 1st: 0xc7..0xc8,0xca..0xfe, 2nd: 0xa1..0xfe */ + w = (c1 - 0xc7)*94 + c - 0xa1; + ZEND_ASSERT(w < uhc3_ucs_table_size); + w = uhc3_ucs_table[w]; + } + + if (w <= 0) { + w = MBFL_BAD_INPUT; + } + CK((*filter->output_function)(w, filter->data)); + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + EMPTY_SWITCH_DEFAULT_CASE(); + } + + return 0; +} + +static int mbfl_filt_conv_wchar_euckr(int c, mbfl_convert_filter *filter) +{ + int s = 0; + + if (c >= ucs_a1_uhc_table_min && c < ucs_a1_uhc_table_max) { + s = ucs_a1_uhc_table[c - ucs_a1_uhc_table_min]; + } else if (c >= ucs_a2_uhc_table_min && c < ucs_a2_uhc_table_max) { + s = ucs_a2_uhc_table[c - ucs_a2_uhc_table_min]; + } else if (c >= ucs_a3_uhc_table_min && c < ucs_a3_uhc_table_max) { + s = ucs_a3_uhc_table[c - ucs_a3_uhc_table_min]; + } else if (c >= ucs_i_uhc_table_min && c < ucs_i_uhc_table_max) { + s = ucs_i_uhc_table[c - ucs_i_uhc_table_min]; + } else if (c >= ucs_s_uhc_table_min && c < ucs_s_uhc_table_max) { + s = ucs_s_uhc_table[c - ucs_s_uhc_table_min]; + } else if (c >= ucs_r1_uhc_table_min && c < ucs_r1_uhc_table_max) { + s = ucs_r1_uhc_table[c - ucs_r1_uhc_table_min]; + } else if (c >= ucs_r2_uhc_table_min && c < ucs_r2_uhc_table_max) { + s = ucs_r2_uhc_table[c - ucs_r2_uhc_table_min]; + } + + /* exclude UHC extension area (although we are using the UHC conversion tables) */ + if (((s >> 8) & 0xFF) < 0xA1 || (s & 0xFF) < 0xA1) { + s = 0; + } + + if (s <= 0) { + if (c < 0x80) { + s = c; + } else { + s = -1; + } + } + + if (s >= 0) { + if (s < 0x80) { /* latin */ + CK((*filter->output_function)(s, filter->data)); + } else { + CK((*filter->output_function)((s >> 8) & 0xff, filter->data)); + CK((*filter->output_function)(s & 0xff, filter->data)); + } + } else { + CK(mbfl_filt_conv_illegal_output(c, filter)); + } + + return 0; +} + +static int mbfl_filt_conv_euckr_wchar_flush(mbfl_convert_filter *filter) +{ + if (filter->status == 1) { + /* 2-byte character was truncated */ + filter->status = 0; + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + + if (filter->flush_function) { + (*filter->flush_function)(filter->data); + } + + return 0; +} + +static size_t mb_euckr_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) +{ + unsigned char *p = *in, *e = p + *in_len; + uint32_t *out = buf, *limit = buf + bufsize; + + while (p < e && out < limit) { + unsigned char c = *p++; + + if (c < 0x80) { + *out++ = c; + } else if (((c >= 0xA1 && c <= 0xAC) || (c >= 0xB0 && c <= 0xFD)) && c != 0xC9 && p < e) { + unsigned char c2 = *p++; + if (c2 < 0xA1 || c2 == 0xFF) { + *out++ = MBFL_BAD_INPUT; + continue; + } + + if (c <= 0xC6) { + unsigned int w = (c - 0x81)*190 + c2 - 0x41; + ZEND_ASSERT(w < uhc1_ucs_table_size); + w = uhc1_ucs_table[w]; + if (!w) + w = MBFL_BAD_INPUT; + *out++ = w; + } else { + unsigned int w = (c - 0xC7)*94 + c2 - 0xA1; + ZEND_ASSERT(w < uhc3_ucs_table_size); + w = uhc3_ucs_table[w]; + if (!w) + w = MBFL_BAD_INPUT; + *out++ = w; + } + } else { + *out++ = MBFL_BAD_INPUT; + } + } + + *in_len = e - p; + *in = p; + return out - buf; +} + +static void mb_wchar_to_euckr(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) +{ + unsigned char *out, *limit; + MB_CONVERT_BUF_LOAD(buf, out, limit); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len); + + while (len--) { + uint32_t w = *in++; + unsigned int s = 0; + + if (w >= ucs_a1_uhc_table_min && w < ucs_a1_uhc_table_max) { + s = ucs_a1_uhc_table[w - ucs_a1_uhc_table_min]; + } else if (w >= ucs_a2_uhc_table_min && w < ucs_a2_uhc_table_max) { + s = ucs_a2_uhc_table[w - ucs_a2_uhc_table_min]; + } else if (w >= ucs_a3_uhc_table_min && w < ucs_a3_uhc_table_max) { + s = ucs_a3_uhc_table[w - ucs_a3_uhc_table_min]; + } else if (w >= ucs_i_uhc_table_min && w < ucs_i_uhc_table_max) { + s = ucs_i_uhc_table[w - ucs_i_uhc_table_min]; + } else if (w >= ucs_s_uhc_table_min && w < ucs_s_uhc_table_max) { + s = ucs_s_uhc_table[w - ucs_s_uhc_table_min]; + } else if (w >= ucs_r1_uhc_table_min && w < ucs_r1_uhc_table_max) { + s = ucs_r1_uhc_table[w - ucs_r1_uhc_table_min]; + } else if (w >= ucs_r2_uhc_table_min && w < ucs_r2_uhc_table_max) { + s = ucs_r2_uhc_table[w - ucs_r2_uhc_table_min]; + } + + /* Exclude UHC extension area (although we are using the UHC conversion tables) */ + if (((s >> 8) & 0xFF) < 0xA1 || (s & 0xFF) < 0xA1) { + s = 0; + } + + if (!s) { + if (w < 0x80) { + out = mb_convert_buf_add(out, w); + } else { + MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_euckr); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len); + } + } else if (s < 0x80) { + out = mb_convert_buf_add(out, s); + } else { + MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2); + out = mb_convert_buf_add2(out, (s >> 8) & 0xFF, s & 0xFF); + } + } + + MB_CONVERT_BUF_STORE(buf, out, limit); +} + +static int mbfl_filt_conv_uhc_wchar(int c, mbfl_convert_filter *filter) +{ + switch (filter->status) { + case 0: + if (c >= 0 && c < 0x80) { /* latin */ + CK((*filter->output_function)(c, filter->data)); + } else if (c > 0x80 && c < 0xfe && c != 0xc9) { /* dbcs lead byte */ + filter->status = 1; + filter->cache = c; + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + case 1: /* dbcs second byte */ + filter->status = 0; + int c1 = filter->cache, w = 0; + + if (c1 >= 0x81 && c1 <= 0xc6 && c >= 0x41 && c <= 0xfe) { + w = (c1 - 0x81)*190 + (c - 0x41); + if (w >= 0 && w < uhc1_ucs_table_size) { + w = uhc1_ucs_table[w]; + } + } else if (c1 >= 0xc7 && c1 < 0xfe && c >= 0xa1 && c <= 0xfe) { + w = (c1 - 0xc7)*94 + (c - 0xa1); + if (w >= 0 && w < uhc3_ucs_table_size) { + w = uhc3_ucs_table[w]; + } + } + + if (w == 0) { + w = MBFL_BAD_INPUT; + } + CK((*filter->output_function)(w, filter->data)); + break; + + EMPTY_SWITCH_DEFAULT_CASE(); + } + + return 0; +} + +static int mbfl_filt_conv_uhc_wchar_flush(mbfl_convert_filter *filter) +{ + if (filter->status == 1) { + /* 2-byte character was truncated */ + filter->status = 0; + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + + if (filter->flush_function) { + (*filter->flush_function)(filter->data); + } + + return 0; +} + +static int mbfl_filt_conv_wchar_uhc(int c, mbfl_convert_filter *filter) +{ + int s = 0; + + if (c >= ucs_a1_uhc_table_min && c < ucs_a1_uhc_table_max) { + s = ucs_a1_uhc_table[c - ucs_a1_uhc_table_min]; + } else if (c >= ucs_a2_uhc_table_min && c < ucs_a2_uhc_table_max) { + s = ucs_a2_uhc_table[c - ucs_a2_uhc_table_min]; + } else if (c >= ucs_a3_uhc_table_min && c < ucs_a3_uhc_table_max) { + s = ucs_a3_uhc_table[c - ucs_a3_uhc_table_min]; + } else if (c >= ucs_i_uhc_table_min && c < ucs_i_uhc_table_max) { + s = ucs_i_uhc_table[c - ucs_i_uhc_table_min]; + } else if (c >= ucs_s_uhc_table_min && c < ucs_s_uhc_table_max) { + s = ucs_s_uhc_table[c - ucs_s_uhc_table_min]; + } else if (c >= ucs_r1_uhc_table_min && c < ucs_r1_uhc_table_max) { + s = ucs_r1_uhc_table[c - ucs_r1_uhc_table_min]; + } else if (c >= ucs_r2_uhc_table_min && c < ucs_r2_uhc_table_max) { + s = ucs_r2_uhc_table[c - ucs_r2_uhc_table_min]; + } + + if (s == 0 && c != 0) { + s = -1; + } + + if (s >= 0) { + if (s < 0x80) { /* latin */ + CK((*filter->output_function)(s, filter->data)); + } else { + CK((*filter->output_function)((s >> 8) & 0xff, filter->data)); + CK((*filter->output_function)(s & 0xff, filter->data)); + } + } else { + CK(mbfl_filt_conv_illegal_output(c, filter)); + } + + return 0; +} + +static size_t mb_uhc_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) +{ + unsigned char *p = *in, *e = p + *in_len; + uint32_t *out = buf, *limit = buf + bufsize; + + e--; /* Stop the main loop 1 byte short of the end of the input */ + + while (p < e && out < limit) { + unsigned char c = *p++; + + if (c < 0x80) { + *out++ = c; + } else if (c > 0x80 && c < 0xFE) { + /* We don't need to check p < e here; it's not possible that this pointer dereference + * will be outside the input string, because of e-- above */ + unsigned char c2 = *p++; + if (c2 < 0x41 || c2 == 0xFF) { + *out++ = MBFL_BAD_INPUT; + continue; + } + unsigned int w = 0; + + if (c <= 0xC6) { + w = (c - 0x81)*190 + c2 - 0x41; + ZEND_ASSERT(w < uhc1_ucs_table_size); + w = uhc1_ucs_table[w]; + } else if (c2 >= 0xA1) { + w = (c - 0xC7)*94 + c2 - 0xA1; + ZEND_ASSERT(w < uhc3_ucs_table_size); + w = uhc3_ucs_table[w]; + } + if (!w) { + /* If c == 0xC9, we shouldn't have tried to read a 2-byte char at all... but it is faster + * to fix up that rare case here rather than include an extra check in the hot path */ + if (c == 0xC9) { + p--; + } + w = MBFL_BAD_INPUT; + } + *out++ = w; + } else { + *out++ = MBFL_BAD_INPUT; + } + } + + /* Finish up last byte of input string if there is one */ + if (p == e && out < limit) { + unsigned char c = *p++; + *out++ = (c < 0x80) ? c : MBFL_BAD_INPUT; + } + + *in_len = e - p + 1; + *in = p; + return out - buf; +} + +static void mb_wchar_to_uhc(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) +{ + unsigned char *out, *limit; + MB_CONVERT_BUF_LOAD(buf, out, limit); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len); + + while (len--) { + uint32_t w = *in++; + unsigned int s = 0; + + if (w >= ucs_a1_uhc_table_min && w < ucs_a1_uhc_table_max) { + s = ucs_a1_uhc_table[w - ucs_a1_uhc_table_min]; + } else if (w >= ucs_a2_uhc_table_min && w < ucs_a2_uhc_table_max) { + s = ucs_a2_uhc_table[w - ucs_a2_uhc_table_min]; + } else if (w >= ucs_a3_uhc_table_min && w < ucs_a3_uhc_table_max) { + s = ucs_a3_uhc_table[w - ucs_a3_uhc_table_min]; + } else if (w >= ucs_i_uhc_table_min && w < ucs_i_uhc_table_max) { + s = ucs_i_uhc_table[w - ucs_i_uhc_table_min]; + } else if (w >= ucs_s_uhc_table_min && w < ucs_s_uhc_table_max) { + s = ucs_s_uhc_table[w - ucs_s_uhc_table_min]; + } else if (w >= ucs_r1_uhc_table_min && w < ucs_r1_uhc_table_max) { + s = ucs_r1_uhc_table[w - ucs_r1_uhc_table_min]; + } else if (w >= ucs_r2_uhc_table_min && w < ucs_r2_uhc_table_max) { + s = ucs_r2_uhc_table[w - ucs_r2_uhc_table_min]; + } + + if (!s) { + if (w == 0) { + out = mb_convert_buf_add(out, 0); + } else { + MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_uhc); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len); + } + } else if (s < 0x80) { + out = mb_convert_buf_add(out, s); + } else { + MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2); + out = mb_convert_buf_add2(out, (s >> 8) & 0xFF, s & 0xFF); + } + } + + MB_CONVERT_BUF_STORE(buf, out, limit); +} + +static const unsigned char mblen_table_eucjp[] = { /* 0xA1-0xFE */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 +}; + +static const char *mbfl_encoding_euc_jp_aliases[] = {"EUC", "EUC_JP", "eucJP", "x-euc-jp", NULL}; + +static const struct mbfl_convert_vtbl vtbl_eucjp_wchar = { + mbfl_no_encoding_euc_jp, + mbfl_no_encoding_wchar, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_eucjp_wchar, + mbfl_filt_conv_eucjp_wchar_flush, + NULL, +}; + +static const struct mbfl_convert_vtbl vtbl_wchar_eucjp = { + mbfl_no_encoding_wchar, + mbfl_no_encoding_euc_jp, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_wchar_eucjp, + mbfl_filt_conv_common_flush, + NULL, +}; + +const mbfl_encoding mbfl_encoding_euc_jp = { + mbfl_no_encoding_euc_jp, + "EUC-JP", + "EUC-JP", + mbfl_encoding_euc_jp_aliases, + mblen_table_eucjp, + 0, + &vtbl_eucjp_wchar, + &vtbl_wchar_eucjp, + mb_eucjp_to_wchar, + mb_wchar_to_eucjp, + NULL +}; + +static const char *mbfl_encoding_eucjp2004_aliases[] = {"EUC_JP-2004", NULL}; + +static const struct mbfl_convert_vtbl vtbl_eucjp2004_wchar = { + mbfl_no_encoding_eucjp2004, + mbfl_no_encoding_wchar, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_jis2004_wchar, + mbfl_filt_conv_jis2004_wchar_flush, + NULL, +}; + +static const struct mbfl_convert_vtbl vtbl_wchar_eucjp2004 = { + mbfl_no_encoding_wchar, + mbfl_no_encoding_eucjp2004, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_wchar_jis2004, + mbfl_filt_conv_wchar_jis2004_flush, + NULL, +}; + +const mbfl_encoding mbfl_encoding_eucjp2004 = { + mbfl_no_encoding_eucjp2004, + "EUC-JP-2004", + "EUC-JP", + mbfl_encoding_eucjp2004_aliases, + mblen_table_eucjp, + 0, + &vtbl_eucjp2004_wchar, + &vtbl_wchar_eucjp2004, + mb_eucjp2004_to_wchar, + mb_wchar_to_eucjp2004, + NULL +}; + +static const char *mbfl_encoding_eucjp_win_aliases[] = {"eucJP-open", "eucJP-ms", NULL}; + +static const struct mbfl_convert_vtbl vtbl_eucjpwin_wchar = { + mbfl_no_encoding_eucjp_win, + mbfl_no_encoding_wchar, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_eucjpwin_wchar, + mbfl_filt_conv_eucjpwin_wchar_flush, + NULL, +}; + +static const struct mbfl_convert_vtbl vtbl_wchar_eucjpwin = { + mbfl_no_encoding_wchar, + mbfl_no_encoding_eucjp_win, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_wchar_eucjpwin, + mbfl_filt_conv_common_flush, + NULL, +}; + +const mbfl_encoding mbfl_encoding_eucjp_win = { + mbfl_no_encoding_eucjp_win, + "eucJP-win", + "EUC-JP", + mbfl_encoding_eucjp_win_aliases, + mblen_table_eucjp, + 0, + &vtbl_eucjpwin_wchar, + &vtbl_wchar_eucjpwin, + mb_eucjpwin_to_wchar, + mb_wchar_to_eucjpwin, + NULL +}; + +static const char *mbfl_encoding_cp51932_aliases[] = {"cp51932", NULL}; + +static const struct mbfl_convert_vtbl vtbl_cp51932_wchar = { + mbfl_no_encoding_cp51932, + mbfl_no_encoding_wchar, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_cp51932_wchar, + mbfl_filt_conv_cp51932_wchar_flush, + NULL, +}; + +static const struct mbfl_convert_vtbl vtbl_wchar_cp51932 = { + mbfl_no_encoding_wchar, + mbfl_no_encoding_cp51932, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_wchar_cp51932, + mbfl_filt_conv_common_flush, + NULL, +}; + +const mbfl_encoding mbfl_encoding_cp51932 = { + mbfl_no_encoding_cp51932, + "CP51932", + "CP51932", + mbfl_encoding_cp51932_aliases, + mblen_table_eucjp, + 0, + &vtbl_cp51932_wchar, + &vtbl_wchar_cp51932, + mb_cp51932_to_wchar, + mb_wchar_to_cp51932, + NULL +}; + +static const unsigned char mblen_table_euccn[] = { /* 0xA1-0xFE */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 +}; + +static const char *mbfl_encoding_euc_cn_aliases[] = {"CN-GB", "EUC_CN", "eucCN", "x-euc-cn", "gb2312", NULL}; + +static const struct mbfl_convert_vtbl vtbl_euccn_wchar = { + mbfl_no_encoding_euc_cn, + mbfl_no_encoding_wchar, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_euccn_wchar, + mbfl_filt_conv_euccn_wchar_flush, + NULL, +}; + +static const struct mbfl_convert_vtbl vtbl_wchar_euccn = { + mbfl_no_encoding_wchar, + mbfl_no_encoding_euc_cn, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_wchar_euccn, + mbfl_filt_conv_common_flush, + NULL, +}; + +const mbfl_encoding mbfl_encoding_euc_cn = { + mbfl_no_encoding_euc_cn, + "EUC-CN", + "CN-GB", + mbfl_encoding_euc_cn_aliases, + mblen_table_euccn, + 0, + &vtbl_euccn_wchar, + &vtbl_wchar_euccn, + mb_euccn_to_wchar, + mb_wchar_to_euccn, + NULL +}; + +static const char *mbfl_encoding_euc_tw_aliases[] = {"EUC_TW", "eucTW", "x-euc-tw", NULL}; + +static const struct mbfl_convert_vtbl vtbl_euctw_wchar = { + mbfl_no_encoding_euc_tw, + mbfl_no_encoding_wchar, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_euctw_wchar, + mbfl_filt_conv_euctw_wchar_flush, + NULL, +}; + +static const struct mbfl_convert_vtbl vtbl_wchar_euctw = { + mbfl_no_encoding_wchar, + mbfl_no_encoding_euc_tw, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_wchar_euctw, + mbfl_filt_conv_common_flush, + NULL, +}; + +const mbfl_encoding mbfl_encoding_euc_tw = { + mbfl_no_encoding_euc_tw, + "EUC-TW", + "EUC-TW", + mbfl_encoding_euc_tw_aliases, + mblen_table_euccn, + 0, + &vtbl_euctw_wchar, + &vtbl_wchar_euctw, + mb_euctw_to_wchar, + mb_wchar_to_euctw, + NULL +}; + +static const char *mbfl_encoding_euc_kr_aliases[] = {"EUC_KR", "eucKR", "x-euc-kr", NULL}; + +static const struct mbfl_convert_vtbl vtbl_euckr_wchar = { + mbfl_no_encoding_euc_kr, + mbfl_no_encoding_wchar, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_euckr_wchar, + mbfl_filt_conv_euckr_wchar_flush, + NULL, +}; + +static const struct mbfl_convert_vtbl vtbl_wchar_euckr = { + mbfl_no_encoding_wchar, + mbfl_no_encoding_euc_kr, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_wchar_euckr, + mbfl_filt_conv_common_flush, + NULL, +}; + +const mbfl_encoding mbfl_encoding_euc_kr = { + mbfl_no_encoding_euc_kr, + "EUC-KR", + "EUC-KR", + mbfl_encoding_euc_kr_aliases, + mblen_table_euccn, + 0, + &vtbl_euckr_wchar, + &vtbl_wchar_euckr, + mb_euckr_to_wchar, + mb_wchar_to_euckr, + NULL +}; + +/* UHC was introduced by MicroSoft in Windows 95, and is also known as CP949. + * It is the same as EUC-KR, but with 8,822 additional characters added to + * complete all the characters in the Johab charset. */ + +static const unsigned char mblen_table_81_to_fe[] = { /* 0x81-0xFE */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 +}; + +static const char *mbfl_encoding_uhc_aliases[] = {"CP949", NULL}; + +static const struct mbfl_convert_vtbl vtbl_uhc_wchar = { + mbfl_no_encoding_uhc, + mbfl_no_encoding_wchar, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_uhc_wchar, + mbfl_filt_conv_uhc_wchar_flush, + NULL, +}; + +static const struct mbfl_convert_vtbl vtbl_wchar_uhc = { + mbfl_no_encoding_wchar, + mbfl_no_encoding_uhc, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_wchar_uhc, + mbfl_filt_conv_common_flush, + NULL, +}; + +const mbfl_encoding mbfl_encoding_uhc = { + mbfl_no_encoding_uhc, + "UHC", + "UHC", + mbfl_encoding_uhc_aliases, + mblen_table_81_to_fe, + 0, + &vtbl_uhc_wchar, + &vtbl_wchar_uhc, + mb_uhc_to_wchar, + mb_wchar_to_uhc, + NULL +}; + +/* + * GB18030/CP936 + */ + +static int mbfl_filt_conv_gb18030_wchar(int c, mbfl_convert_filter *filter) +{ + int k; + int c1, c2, c3, w = -1; + + switch (filter->status) { + case 0: + if (c >= 0 && c < 0x80) { /* latin */ + CK((*filter->output_function)(c, filter->data)); + } else if (c > 0x80 && c < 0xff) { /* dbcs/qbcs lead byte */ + filter->status = 1; + filter->cache = c; + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + case 1: /* dbcs/qbcs second byte */ + c1 = filter->cache; + filter->status = 0; + + if (c1 >= 0x81 && c1 <= 0x84 && c >= 0x30 && c <= 0x39) { + /* 4 byte range: Unicode BMP */ + filter->status = 2; + filter->cache = (c1 << 8) | c; + return 0; + } else if (c1 >= 0x90 && c1 <= 0xe3 && c >= 0x30 && c <= 0x39) { + /* 4 byte range: Unicode 16 planes */ + filter->status = 2; + filter->cache = (c1 << 8) | c; + return 0; + } else if (((c1 >= 0xaa && c1 <= 0xaf) || (c1 >= 0xf8 && c1 <= 0xfe)) && (c >= 0xa1 && c <= 0xfe)) { + /* UDA part 1,2: U+E000-U+E4C5 */ + w = 94*(c1 >= 0xf8 ? c1 - 0xf2 : c1 - 0xaa) + (c - 0xa1) + 0xe000; + CK((*filter->output_function)(w, filter->data)); + } else if (c1 >= 0xa1 && c1 <= 0xa7 && c >= 0x40 && c < 0xa1 && c != 0x7f) { + /* UDA part3 : U+E4C6-U+E765*/ + w = 96*(c1 - 0xa1) + c - (c >= 0x80 ? 0x41 : 0x40) + 0xe4c6; + CK((*filter->output_function)(w, filter->data)); + } + + c2 = (c1 << 8) | c; + + if (w <= 0 && + ((c2 >= 0xa2ab && c2 <= 0xa9f0 + (0xe80f-0xe801)) || + (c2 >= 0xd7fa && c2 <= 0xd7fa + (0xe814-0xe810)) || + (c2 >= 0xfe50 && c2 <= 0xfe80 + (0xe864-0xe844)))) { + for (k = 0; k < mbfl_gb18030_pua_tbl_max; k++) { + if (c2 >= mbfl_gb18030_pua_tbl[k][2] && c2 <= mbfl_gb18030_pua_tbl[k][2] + mbfl_gb18030_pua_tbl[k][1] - mbfl_gb18030_pua_tbl[k][0]) { + w = c2 - mbfl_gb18030_pua_tbl[k][2] + mbfl_gb18030_pua_tbl[k][0]; + CK((*filter->output_function)(w, filter->data)); + break; + } + } + } + + if (w <= 0) { + if ((c1 >= 0xa1 && c1 <= 0xa9 && c >= 0xa1 && c <= 0xfe) || + (c1 >= 0xb0 && c1 <= 0xf7 && c >= 0xa1 && c <= 0xfe) || + (c1 >= 0x81 && c1 <= 0xa0 && c >= 0x40 && c <= 0xfe && c != 0x7f) || + (c1 >= 0xaa && c1 <= 0xfe && c >= 0x40 && c <= 0xa0 && c != 0x7f) || + (c1 >= 0xa8 && c1 <= 0xa9 && c >= 0x40 && c <= 0xa0 && c != 0x7f)) { + w = (c1 - 0x81)*192 + c - 0x40; + ZEND_ASSERT(w < cp936_ucs_table_size); + CK((*filter->output_function)(cp936_ucs_table[w], filter->data)); + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + } + break; + + case 2: /* qbcs third byte */ + c1 = (filter->cache >> 8) & 0xff; + c2 = filter->cache & 0xff; + filter->status = filter->cache = 0; + if (((c1 >= 0x81 && c1 <= 0x84) || (c1 >= 0x90 && c1 <= 0xe3)) && c2 >= 0x30 && c2 <= 0x39 && c >= 0x81 && c <= 0xfe) { + filter->cache = (c1 << 16) | (c2 << 8) | c; + filter->status = 3; + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + case 3: /* qbcs fourth byte */ + c1 = (filter->cache >> 16) & 0xff; + c2 = (filter->cache >> 8) & 0xff; + c3 = filter->cache & 0xff; + filter->status = filter->cache = 0; + if (((c1 >= 0x81 && c1 <= 0x84) || (c1 >= 0x90 && c1 <= 0xe3)) && c2 >= 0x30 && c2 <= 0x39 && c3 >= 0x81 && c3 <= 0xfe && c >= 0x30 && c <= 0x39) { + if (c1 >= 0x90 && c1 <= 0xe3) { + w = ((((c1 - 0x90)*10 + (c2 - 0x30))*126 + (c3 - 0x81)))*10 + (c - 0x30) + 0x10000; + if (w > 0x10FFFF) { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + return 0; + } + } else { /* Unicode BMP */ + w = (((c1 - 0x81)*10 + (c2 - 0x30))*126 + (c3 - 0x81))*10 + (c - 0x30); + if (w >= 0 && w <= 39419) { + k = mbfl_bisec_srch(w, mbfl_gb2uni_tbl, mbfl_gb_uni_max); + w += mbfl_gb_uni_ofst[k]; + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + return 0; + } + } + CK((*filter->output_function)(w, filter->data)); + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + EMPTY_SWITCH_DEFAULT_CASE(); + } + + return 0; +} + +static int mbfl_filt_conv_gb18030_wchar_flush(mbfl_convert_filter *filter) +{ + if (filter->status) { + /* multi-byte character was truncated */ + filter->status = 0; + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + + if (filter->flush_function) { + (*filter->flush_function)(filter->data); + } + + return 0; +} + +static int mbfl_filt_conv_wchar_gb18030(int c, mbfl_convert_filter *filter) +{ + int k, k1, k2; + int c1, s = 0, s1 = 0; + + if (c >= ucs_a1_cp936_table_min && c < ucs_a1_cp936_table_max) { + if (c == 0x01f9) { + s = 0xa8bf; + } else { + s = ucs_a1_cp936_table[c - ucs_a1_cp936_table_min]; + } + } else if (c >= ucs_a2_cp936_table_min && c < ucs_a2_cp936_table_max) { + if (c == 0x20ac) { /* euro-sign */ + s = 0xa2e3; + } else { + s = ucs_a2_cp936_table[c - ucs_a2_cp936_table_min]; + } + } else if (c >= ucs_a3_cp936_table_min && c < ucs_a3_cp936_table_max) { + s = ucs_a3_cp936_table[c - ucs_a3_cp936_table_min]; + } else if (c >= ucs_i_cp936_table_min && c < ucs_i_cp936_table_max) { + s = ucs_i_cp936_table[c - ucs_i_cp936_table_min]; + } else if (c >= ucs_ci_cp936_table_min && c < ucs_ci_cp936_table_max) { + /* U+F900-FA2F CJK Compatibility Ideographs */ + if (c == 0xf92c) { + s = 0xfd9c; + } else if (c == 0xf979) { + s = 0xfd9d; + } else if (c == 0xf995) { + s = 0xfd9e; + } else if (c == 0xf9e7) { + s = 0xfd9f; + } else if (c == 0xf9f1) { + s = 0xfda0; + } else if (c >= 0xfa0c && c <= 0xfa29) { + s = ucs_ci_s_cp936_table[c - 0xfa0c]; + } + } else if (c >= ucs_cf_cp936_table_min && c < ucs_cf_cp936_table_max) { + /* FE30h CJK Compatibility Forms */ + s = ucs_cf_cp936_table[c - ucs_cf_cp936_table_min]; + } else if (c >= ucs_sfv_cp936_table_min && c < ucs_sfv_cp936_table_max) { + /* U+FE50-FE6F Small Form Variants */ + s = ucs_sfv_cp936_table[c - ucs_sfv_cp936_table_min]; + } else if (c >= ucs_hff_cp936_table_min && c < ucs_hff_cp936_table_max) { + /* U+FF00-FFFF HW/FW Forms */ + if (c == 0xff04) { + s = 0xa1e7; + } else if (c == 0xff5e) { + s = 0xa1ab; + } else if (c >= 0xff01 && c <= 0xff5d) { + s = c - 0xff01 + 0xa3a1; + } else if (c >= 0xffe0 && c <= 0xffe5) { + s = ucs_hff_s_cp936_table[c-0xffe0]; + } + } + + /* While GB18030 and CP936 are very similar, some mappings are different between these encodings; + * do a binary search in a table of differing codepoints to see if we have one */ + if (s <= 0 && c >= mbfl_gb18030_c_tbl_key[0] && c <= mbfl_gb18030_c_tbl_key[mbfl_gb18030_c_tbl_max-1]) { + k1 = mbfl_bisec_srch2(c, mbfl_gb18030_c_tbl_key, mbfl_gb18030_c_tbl_max); + if (k1 >= 0) { + s = mbfl_gb18030_c_tbl_val[k1]; + } + } + + if (c >= 0xe000 && c <= 0xe864) { /* PUA */ + if (c < 0xe766) { + if (c < 0xe4c6) { + c1 = c - 0xe000; + s = (c1 % 94) + 0xa1; + c1 /= 94; + s |= (c1 < 0x06 ? c1 + 0xaa : c1 + 0xf2) << 8; + } else { + c1 = c - 0xe4c6; + s = ((c1 / 96) + 0xa1) << 8; + c1 %= 96; + s |= c1 + (c1 >= 0x3f ? 0x41 : 0x40); + } + } else { + /* U+E766..U+E864 */ + k1 = 0; + k2 = mbfl_gb18030_pua_tbl_max; + while (k1 < k2) { + k = (k1 + k2) >> 1; + if (c < mbfl_gb18030_pua_tbl[k][0]) { + k2 = k; + } else if (c > mbfl_gb18030_pua_tbl[k][1]) { + k1 = k + 1; + } else { + s = c - mbfl_gb18030_pua_tbl[k][0] + mbfl_gb18030_pua_tbl[k][2]; + break; + } + } + } + } + + /* If we have not yet found a suitable mapping for this codepoint, it requires a 4-byte code */ + if (s <= 0 && c >= 0x0080 && c <= 0xffff) { + /* BMP */ + s = mbfl_bisec_srch(c, mbfl_uni2gb_tbl, mbfl_gb_uni_max); + if (s >= 0) { + c1 = c - mbfl_gb_uni_ofst[s]; + s = (c1 % 10) + 0x30; + c1 /= 10; + s |= ((c1 % 126) + 0x81) << 8; + c1 /= 126; + s |= ((c1 % 10) + 0x30) << 16; + c1 /= 10; + s1 = c1 + 0x81; + } + } else if (c >= 0x10000 && c <= 0x10ffff) { + /* Code set 3: Unicode U+10000..U+10FFFF */ + c1 = c - 0x10000; + s = (c1 % 10) + 0x30; + c1 /= 10; + s |= ((c1 % 126) + 0x81) << 8; + c1 /= 126; + s |= ((c1 % 10) + 0x30) << 16; + c1 /= 10; + s1 = c1 + 0x90; + } + + if (c == 0) { + s = 0; + } else if (s == 0) { + s = -1; + } + + if (s >= 0) { + if (s <= 0x80) { /* latin */ + CK((*filter->output_function)(s, filter->data)); + } else if (s1 > 0) { /* qbcs */ + CK((*filter->output_function)(s1 & 0xff, filter->data)); + CK((*filter->output_function)((s >> 16) & 0xff, filter->data)); + CK((*filter->output_function)((s >> 8) & 0xff, filter->data)); + CK((*filter->output_function)(s & 0xff, filter->data)); + } else { /* dbcs */ + CK((*filter->output_function)((s >> 8) & 0xff, filter->data)); + CK((*filter->output_function)(s & 0xff, filter->data)); + } + } else { + CK(mbfl_filt_conv_illegal_output(c, filter)); + } + + return 0; +} + +static const unsigned short gb18030_pua_tbl3[] = { + /* 0xFE50 */ + 0x0000,0xE816,0xE817,0xE818,0x0000,0x0000,0x0000,0x0000, + 0x0000,0xE81E,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000, + 0x0000,0xE826,0x0000,0x0000,0x0000,0x0000,0xE82B,0xE82C, + 0x0000,0x0000,0x0000,0x0000,0xE831,0xE832,0x0000,0x0000, + 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0xE83B,0x0000, + 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0xE843,0x0000, + 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000, + 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000, + 0xE854,0xE855,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000, + 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000, + /* 0xFEA0 */ + 0xE864 +}; + +static size_t mb_gb18030_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) +{ + unsigned char *p = *in, *e = p + *in_len; + uint32_t *out = buf, *limit = buf + bufsize; + + while (p < e && out < limit) { + unsigned char c = *p++; + + if (c < 0x80) { + *out++ = c; + } else if (c == 0x80 || c == 0xFF) { + *out++ = MBFL_BAD_INPUT; + } else { + if (p == e) { + *out++ = MBFL_BAD_INPUT; + break; + } + unsigned char c2 = *p++; + + if (((c >= 0x81 && c <= 0x84) || (c >= 0x90 && c <= 0xE3)) && c2 >= 0x30 && c2 <= 0x39) { + if (p >= e) { + *out++ = MBFL_BAD_INPUT; + break; + } + unsigned char c3 = *p++; + + if (c3 >= 0x81 && c3 <= 0xFE && p < e) { + unsigned char c4 = *p++; + + if (c4 >= 0x30 && c4 <= 0x39) { + if (c >= 0x90 && c <= 0xE3) { + unsigned int w = ((((c - 0x90)*10 + (c2 - 0x30))*126 + (c3 - 0x81)))*10 + (c4 - 0x30) + 0x10000; + *out++ = (w > 0x10FFFF) ? MBFL_BAD_INPUT : w; + } else { + /* Unicode BMP */ + unsigned int w = (((c - 0x81)*10 + (c2 - 0x30))*126 + (c3 - 0x81))*10 + (c4 - 0x30); + if (w <= 39419) { + *out++ = w + mbfl_gb_uni_ofst[mbfl_bisec_srch(w, mbfl_gb2uni_tbl, mbfl_gb_uni_max)]; + } else { + *out++ = MBFL_BAD_INPUT; + } + } + } else { + *out++ = MBFL_BAD_INPUT; + } + } else { + *out++ = MBFL_BAD_INPUT; + } + } else if (((c >= 0xAA && c <= 0xAF) || (c >= 0xF8 && c <= 0xFE)) && (c2 >= 0xA1 && c2 <= 0xFE)) { + /* UDA part 1, 2: U+E000-U+E4C5 */ + *out++ = 94*(c >= 0xF8 ? c - 0xF2 : c - 0xAA) + (c2 - 0xA1) + 0xE000; + } else if (c >= 0xA1 && c <= 0xA7 && c2 >= 0x40 && c2 < 0xA1 && c2 != 0x7F) { + /* UDA part 3: U+E4C6-U+E765 */ + *out++ = 96*(c - 0xA1) + c2 - (c2 >= 0x80 ? 0x41 : 0x40) + 0xE4C6; + } else if (c2 >= 0x40 && c2 != 0x7F && c2 != 0xFF) { + unsigned int w = (c - 0x81)*192 + c2 - 0x40; + + if (w >= 0x192B) { + if (w <= 0x1EBE) { + if (w != 0x1963 && w != 0x1DBF && (w < 0x1E49 || w > 0x1E55) && w != 0x1E7F) { + *out++ = cp936_pua_tbl1[w - 0x192B]; + continue; + } + } else if (w >= 0x413A) { + if (w <= 0x413E) { + *out++ = cp936_pua_tbl2[w - 0x413A]; + continue; + } else if (w >= 0x5DD0 && w <= 0x5E20) { + unsigned int c = gb18030_pua_tbl3[w - 0x5DD0]; + if (c) { + *out++ = c; + continue; + } + } + } + } + + if ((c >= 0x81 && c <= 0xA9) || (c >= 0xB0 && c <= 0xF7 && c2 >= 0xA1) || (c >= 0xAA && c <= 0xFE && c2 <= 0xA0)) { + ZEND_ASSERT(w < cp936_ucs_table_size); + *out++ = cp936_ucs_table[w]; + } else { + *out++ = MBFL_BAD_INPUT; + } + } else { + *out++ = MBFL_BAD_INPUT; + } + } + } + + *in_len = e - p; + *in = p; + return out - buf; +} + +static void mb_wchar_to_gb18030(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) +{ + unsigned char *out, *limit; + MB_CONVERT_BUF_LOAD(buf, out, limit); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len); + + while (len--) { + uint32_t w = *in++; + unsigned int s = 0; + + if (w == 0) { + out = mb_convert_buf_add(out, 0); + continue; + } else if (w >= ucs_a1_cp936_table_min && w < ucs_a1_cp936_table_max) { + if (w == 0x1F9) { + s = 0xA8Bf; + } else { + s = ucs_a1_cp936_table[w - ucs_a1_cp936_table_min]; + } + } else if (w >= ucs_a2_cp936_table_min && w < ucs_a2_cp936_table_max) { + if (w == 0x20AC) { /* Euro sign */ + s = 0xA2E3; + } else { + s = ucs_a2_cp936_table[w - ucs_a2_cp936_table_min]; + } + } else if (w >= ucs_a3_cp936_table_min && w < ucs_a3_cp936_table_max) { + s = ucs_a3_cp936_table[w - ucs_a3_cp936_table_min]; + } else if (w >= ucs_i_cp936_table_min && w < ucs_i_cp936_table_max) { + s = ucs_i_cp936_table[w - ucs_i_cp936_table_min]; + } else if (w >= ucs_ci_cp936_table_min && w < ucs_ci_cp936_table_max) { + /* U+F900-U+FA2F CJK Compatibility Ideographs */ + if (w == 0xF92C) { + s = 0xFD9C; + } else if (w == 0xF979) { + s = 0xFD9D; + } else if (w == 0xF995) { + s = 0xFD9E; + } else if (w == 0xF9E7) { + s = 0xFD9F; + } else if (w == 0xF9F1) { + s = 0xFDA0; + } else if (w >= 0xFA0C && w <= 0xFA29) { + s = ucs_ci_s_cp936_table[w - 0xFA0C]; + } + } else if (w >= ucs_cf_cp936_table_min && w < ucs_cf_cp936_table_max) { + /* CJK Compatibility Forms */ + s = ucs_cf_cp936_table[w - ucs_cf_cp936_table_min]; + } else if (w >= ucs_sfv_cp936_table_min && w < ucs_sfv_cp936_table_max) { + /* U+FE50-U+FE6F Small Form Variants */ + s = ucs_sfv_cp936_table[w - ucs_sfv_cp936_table_min]; + } else if (w >= ucs_hff_cp936_table_min && w < ucs_hff_cp936_table_max) { + /* U+FF00-U+FFFF HW/FW Forms */ + if (w == 0xFF04) { + s = 0xA1E7; + } else if (w == 0xFF5E) { + s = 0xA1AB; + } else if (w >= 0xFF01 && w <= 0xFF5D) { + s = w - 0xFF01 + 0xA3A1; + } else if (w >= 0xFFE0 && w <= 0xFFE5) { + s = ucs_hff_s_cp936_table[w - 0xFFE0]; + } + } else if (w >= 0xE000 && w <= 0xE864) { + /* PUA */ + if (w < 0xE766) { + if (w < 0xE4C6) { + unsigned int c1 = w - 0xE000; + s = (c1 % 94) + 0xA1; + c1 /= 94; + s |= (c1 + (c1 < 0x06 ? 0xAA : 0xF2)) << 8; + } else { + unsigned int c1 = w - 0xE4C6; + s = ((c1 / 96) + 0xA1) << 8; + c1 %= 96; + s |= c1 + (c1 >= 0x3F ? 0x41 : 0x40); + } + } else { + /* U+E766-U+E864 */ + unsigned int k1 = 0, k2 = mbfl_gb18030_pua_tbl_max; + while (k1 < k2) { + unsigned int k = (k1 + k2) >> 1; + if (w < mbfl_gb18030_pua_tbl[k][0]) { + k2 = k; + } else if (w > mbfl_gb18030_pua_tbl[k][1]) { + k1 = k + 1; + } else { + s = w - mbfl_gb18030_pua_tbl[k][0] + mbfl_gb18030_pua_tbl[k][2]; + break; + } + } + } + } + + /* While GB18030 and CP936 are very similar, some mappings are different between these encodings; + * do a binary search in a table of differing codepoints to see if we have one */ + if (!s && w >= mbfl_gb18030_c_tbl_key[0] && w <= mbfl_gb18030_c_tbl_key[mbfl_gb18030_c_tbl_max-1]) { + int i = mbfl_bisec_srch2(w, mbfl_gb18030_c_tbl_key, mbfl_gb18030_c_tbl_max); + if (i >= 0) { + s = mbfl_gb18030_c_tbl_val[i]; + } + } + + /* If we have not yet found a suitable mapping for this codepoint, it requires a 4-byte code */ + if (!s && w >= 0x80 && w <= 0xFFFF) { + /* BMP */ + int i = mbfl_bisec_srch(w, mbfl_uni2gb_tbl, mbfl_gb_uni_max); + if (i >= 0) { + unsigned int c1 = w - mbfl_gb_uni_ofst[i]; + s = (c1 % 10) + 0x30; + c1 /= 10; + s |= ((c1 % 126) + 0x81) << 8; + c1 /= 126; + s |= ((c1 % 10) + 0x30) << 16; + c1 /= 10; + s |= (c1 + 0x81) << 24; + } + } else if (w >= 0x10000 && w <= 0x10FFFF) { + /* Code set 3: Unicode U+10000-U+10FFFF */ + unsigned int c1 = w - 0x10000; + s = (c1 % 10) + 0x30; + c1 /= 10; + s |= ((c1 % 126) + 0x81) << 8; + c1 /= 126; + s |= ((c1 % 10) + 0x30) << 16; + c1 /= 10; + s |= (c1 + 0x90) << 24; + } + + if (!s) { + MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_gb18030); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len); + } else if (s < 0x80) { + out = mb_convert_buf_add(out, s); + } else if (s > 0xFFFFFF) { + MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 4); + out = mb_convert_buf_add4(out, (s >> 24) & 0xFF, (s >> 16) & 0xFF, (s >> 8) & 0xFF, s & 0xFF); + } else { + MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2); + out = mb_convert_buf_add2(out, (s >> 8) & 0xFF, s & 0xFF); + } + } + + MB_CONVERT_BUF_STORE(buf, out, limit); +} + +static int mbfl_filt_conv_cp936_wchar(int c, mbfl_convert_filter *filter) +{ + int k; + int c1, c2, w = -1; + + switch (filter->status) { + case 0: + if (c >= 0 && c < 0x80) { /* latin */ + CK((*filter->output_function)(c, filter->data)); + } else if (c == 0x80) { /* euro sign */ + CK((*filter->output_function)(0x20ac, filter->data)); + } else if (c < 0xff) { /* dbcs lead byte */ + filter->status = 1; + filter->cache = c; + } else { /* 0xff */ + CK((*filter->output_function)(0xf8f5, filter->data)); + } + break; + + case 1: /* dbcs second byte */ + filter->status = 0; + c1 = filter->cache; + + if (((c1 >= 0xaa && c1 <= 0xaf) || (c1 >= 0xf8 && c1 <= 0xfe)) && + (c >= 0xa1 && c <= 0xfe)) { + /* UDA part1,2: U+E000-U+E4C5 */ + w = 94*(c1 >= 0xf8 ? c1 - 0xf2 : c1 - 0xaa) + (c - 0xa1) + 0xe000; + CK((*filter->output_function)(w, filter->data)); + } else if (c1 >= 0xa1 && c1 <= 0xa7 && c >= 0x40 && c < 0xa1 && c != 0x7f) { + /* UDA part3 : U+E4C6-U+E765*/ + w = 96*(c1 - 0xa1) + c - (c >= 0x80 ? 0x41 : 0x40) + 0xe4c6; + CK((*filter->output_function)(w, filter->data)); + } + + c2 = (c1 << 8) | c; + + if (w <= 0 && + ((c2 >= 0xa2ab && c2 <= 0xa9f0 + (0xe80f-0xe801)) || + (c2 >= 0xd7fa && c2 <= 0xd7fa + (0xe814-0xe810)) || + (c2 >= 0xfe50 && c2 <= 0xfe80 + (0xe864-0xe844)))) { + for (k = 0; k < mbfl_cp936_pua_tbl_max; k++) { + if (c2 >= mbfl_cp936_pua_tbl[k][2] && + c2 <= mbfl_cp936_pua_tbl[k][2] + + mbfl_cp936_pua_tbl[k][1] - mbfl_cp936_pua_tbl[k][0]) { + w = c2 - mbfl_cp936_pua_tbl[k][2] + mbfl_cp936_pua_tbl[k][0]; + CK((*filter->output_function)(w, filter->data)); + break; + } + } + } + + if (w <= 0) { + if (c1 < 0xff && c1 > 0x80 && c >= 0x40 && c < 0xff && c != 0x7f) { + w = (c1 - 0x81)*192 + c - 0x40; + ZEND_ASSERT(w < cp936_ucs_table_size); + CK((*filter->output_function)(cp936_ucs_table[w], filter->data)); + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + } + break; + + EMPTY_SWITCH_DEFAULT_CASE(); + } + + return 0; +} + +static int mbfl_filt_conv_cp936_wchar_flush(mbfl_convert_filter *filter) +{ + if (filter->status) { + /* 2-byte character was truncated */ + filter->status = 0; + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + + if (filter->flush_function) { + (*filter->flush_function)(filter->data); + } + + return 0; +} + +static int mbfl_filt_conv_wchar_cp936(int c, mbfl_convert_filter *filter) +{ + int k, k1, k2; + int c1, s = 0; + + if (c >= ucs_a1_cp936_table_min && c < ucs_a1_cp936_table_max) { + /* U+0000 - U+0451 */ + s = ucs_a1_cp936_table[c - ucs_a1_cp936_table_min]; + } else if (c >= ucs_a2_cp936_table_min && c < ucs_a2_cp936_table_max) { + /* U+2000 - U+26FF */ + if (c == 0x203e) { + s = 0xa3fe; + } else if (c == 0x2218) { + s = 0xa1e3; + } else if (c == 0x223c) { + s = 0xa1ab; + } else { + s = ucs_a2_cp936_table[c - ucs_a2_cp936_table_min]; + } + } else if (c >= ucs_a3_cp936_table_min && c < ucs_a3_cp936_table_max) { + /* U+2F00 - U+33FF */ + s = ucs_a3_cp936_table[c - ucs_a3_cp936_table_min]; + } else if (c >= ucs_i_cp936_table_min && c < ucs_i_cp936_table_max) { + /* U+4D00-9FFF CJK Unified Ideographs (+ Extension A) */ + s = ucs_i_cp936_table[c - ucs_i_cp936_table_min]; + } else if (c >= 0xe000 && c <= 0xe864) { /* PUA */ + if (c < 0xe766) { + if (c < 0xe4c6) { + c1 = c - 0xe000; + s = (c1 % 94) + 0xa1; c1 /= 94; + s |= (c1 < 0x06 ? c1 + 0xaa : c1 + 0xf2) << 8; + } else { + c1 = c - 0xe4c6; + s = ((c1 / 96) + 0xa1) << 8; c1 %= 96; + s |= c1 + (c1 >= 0x3f ? 0x41 : 0x40); + } + } else { + /* U+E766..U+E864 */ + k1 = 0; k2 = mbfl_cp936_pua_tbl_max; + while (k1 < k2) { + k = (k1 + k2) >> 1; + if (c < mbfl_cp936_pua_tbl[k][0]) { + k2 = k; + } else if (c > mbfl_cp936_pua_tbl[k][1]) { + k1 = k + 1; + } else { + s = c - mbfl_cp936_pua_tbl[k][0] + mbfl_cp936_pua_tbl[k][2]; + break; + } + } + } + } else if (c == 0xf8f5) { + s = 0xff; + } else if (c >= ucs_ci_cp936_table_min && c < ucs_ci_cp936_table_max) { + /* U+F900-FA2F CJK Compatibility Ideographs */ + s = ucs_ci_cp936_table[c - ucs_ci_cp936_table_min]; + } else if (c >= ucs_cf_cp936_table_min && c < ucs_cf_cp936_table_max) { + s = ucs_cf_cp936_table[c - ucs_cf_cp936_table_min]; + } else if (c >= ucs_sfv_cp936_table_min && c < ucs_sfv_cp936_table_max) { + s = ucs_sfv_cp936_table[c - ucs_sfv_cp936_table_min]; /* U+FE50-FE6F Small Form Variants */ + } else if (c >= ucs_hff_cp936_table_min && c < ucs_hff_cp936_table_max) { + /* U+FF00-FFFF HW/FW Forms */ + if (c == 0xff04) { + s = 0xa1e7; + } else if (c == 0xff5e) { + s = 0xa1ab; + } else if (c >= 0xff01 && c <= 0xff5d) { + s = c - 0xff01 + 0xa3a1; + } else if (c >= 0xffe0 && c <= 0xffe5) { + s = ucs_hff_s_cp936_table[c-0xffe0]; + } + } + + if (s <= 0) { + if (c == 0) { + s = 0; + } else if (s <= 0) { + s = -1; + } + } + + if (s >= 0) { + if (s <= 0x80 || s == 0xff) { /* latin */ + CK((*filter->output_function)(s, filter->data)); + } else { + CK((*filter->output_function)((s >> 8) & 0xff, filter->data)); + CK((*filter->output_function)(s & 0xff, filter->data)); + } + } else { + CK(mbfl_filt_conv_illegal_output(c, filter)); + } + + return 0; +} + +static size_t mb_cp936_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) +{ + unsigned char *p = *in, *e = p + *in_len; + uint32_t *out = buf, *limit = buf + bufsize; + + while (p < e && out < limit) { + unsigned char c = *p++; + + if (c < 0x80) { + *out++ = c; + } else if (c == 0x80) { + *out++ = 0x20AC; /* Euro sign */ + } else if (c < 0xFF) { + if (p >= e) { + *out++ = MBFL_BAD_INPUT; + continue; + } + + unsigned char c2 = *p++; + if (c2 < 0x40 || c2 == 0x7F || c2 == 0xFF) { + *out++ = MBFL_BAD_INPUT; + continue; + } + + if (((c >= 0xAA && c <= 0xAF) || (c >= 0xF8 && c <= 0xFE)) && c2 >= 0xA1) { + /* UDA part 1, 2: U+E000-U+E4C5 */ + *out++ = 94*(c >= 0xF8 ? c - 0xF2 : c - 0xAA) + (c2 - 0xA1) + 0xE000; + } else if (c >= 0xA1 && c <= 0xA7 && c2 < 0xA1) { + /* UDA part 3: U+E4C6-U+E765*/ + *out++ = 96*(c - 0xA1) + c2 - (c2 >= 0x80 ? 0x41 : 0x40) + 0xE4C6; + } else { + unsigned int w = (c - 0x81)*192 + c2 - 0x40; /* Convert c, c2 into GB 2312 table lookup index */ + + /* For CP936 and GB18030, certain GB 2312 byte combinations are mapped to PUA codepoints, + * whereas the same combinations aren't mapped to any codepoint for HZ and EUC-CN + * To avoid duplicating the entire GB 2312 -> Unicode lookup table, we have three + * auxiliary tables which are consulted instead for specific ranges of lookup indices */ + if (w >= 0x192B) { + if (w <= 0x1EBE) { + *out++ = cp936_pua_tbl1[w - 0x192B]; + continue; + } else if (w >= 0x413A) { + if (w <= 0x413E) { + *out++ = cp936_pua_tbl2[w - 0x413A]; + continue; + } else if (w >= 0x5DD0 && w <= 0x5E20) { + *out++ = cp936_pua_tbl3[w - 0x5DD0]; + continue; + } + } + } + + ZEND_ASSERT(w < cp936_ucs_table_size); + *out++ = cp936_ucs_table[w]; + } + } else { + *out++ = 0xF8F5; + } + } + + *in_len = e - p; + *in = p; + return out - buf; +} + +static void mb_wchar_to_cp936(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) +{ + unsigned char *out, *limit; + MB_CONVERT_BUF_LOAD(buf, out, limit); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2); + + while (len--) { + uint32_t w = *in++; + unsigned int s = 0; + + if (w >= ucs_a1_cp936_table_min && w < ucs_a1_cp936_table_max) { + /* U+0000-U+0451 */ + s = ucs_a1_cp936_table[w - ucs_a1_cp936_table_min]; + } else if (w >= ucs_a2_cp936_table_min && w < ucs_a2_cp936_table_max) { + /* U+2000-U+26FF */ + if (w == 0x203E) { + s = 0xA3FE; + } else if (w == 0x2218) { + s = 0xA1E3; + } else if (w == 0x223C) { + s = 0xA1AB; + } else { + s = ucs_a2_cp936_table[w - ucs_a2_cp936_table_min]; + } + } else if (w >= ucs_a3_cp936_table_min && w < ucs_a3_cp936_table_max) { + /* U+2F00-U+33FF */ + s = ucs_a3_cp936_table[w - ucs_a3_cp936_table_min]; + } else if (w >= ucs_i_cp936_table_min && w < ucs_i_cp936_table_max) { + /* U+4D00-9FFF CJK Unified Ideographs (+ Extension A) */ + s = ucs_i_cp936_table[w - ucs_i_cp936_table_min]; + } else if (w >= 0xE000 && w <= 0xE864) { + /* PUA */ + if (w < 0xe766) { + if (w < 0xe4c6) { + unsigned int c1 = w - 0xE000; + s = (c1 % 94) + 0xA1; + c1 /= 94; + s |= (c1 < 0x6 ? c1 + 0xAA : c1 + 0xF2) << 8; + } else { + unsigned int c1 = w - 0xE4C6; + s = ((c1 / 96) + 0xA1) << 8; + c1 %= 96; + s |= c1 + (c1 >= 0x3F ? 0x41 : 0x40); + } + } else { + /* U+E766-U+E864 */ + unsigned int k1 = 0; + unsigned int k2 = mbfl_cp936_pua_tbl_max; + while (k1 < k2) { + int k = (k1 + k2) >> 1; + if (w < mbfl_cp936_pua_tbl[k][0]) { + k2 = k; + } else if (w > mbfl_cp936_pua_tbl[k][1]) { + k1 = k + 1; + } else { + s = w - mbfl_cp936_pua_tbl[k][0] + mbfl_cp936_pua_tbl[k][2]; + break; + } + } + } + } else if (w == 0xF8F5) { + s = 0xFF; + } else if (w >= ucs_ci_cp936_table_min && w < ucs_ci_cp936_table_max) { + /* U+F900-U+FA2F CJK Compatibility Ideographs */ + s = ucs_ci_cp936_table[w - ucs_ci_cp936_table_min]; + } else if (w >= ucs_cf_cp936_table_min && w < ucs_cf_cp936_table_max) { + s = ucs_cf_cp936_table[w - ucs_cf_cp936_table_min]; + } else if (w >= ucs_sfv_cp936_table_min && w < ucs_sfv_cp936_table_max) { + /* U+FE50-U+FE6F Small Form Variants */ + s = ucs_sfv_cp936_table[w - ucs_sfv_cp936_table_min]; + } else if (w >= ucs_hff_cp936_table_min && w < ucs_hff_cp936_table_max) { + /* U+FF00-U+FFFF HW/FW Forms */ + if (w == 0xFF04) { + s = 0xA1E7; + } else if (w == 0xFF5E) { + s = 0xA1AB; + } else if (w >= 0xFF01 && w <= 0xFF5D) { + s = w - 0xFF01 + 0xA3A1; + } else if (w >= 0xFFE0 && w <= 0xFFE5) { + s = ucs_hff_s_cp936_table[w - 0xFFE0]; + } + } + + if (!s) { + if (w == 0) { + out = mb_convert_buf_add(out, 0); + } else { + MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_cp936); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2); + } + } else if (s <= 0x80 || s == 0xFF) { + out = mb_convert_buf_add(out, s); + } else { + out = mb_convert_buf_add2(out, (s >> 8) & 0xFF, s & 0xFF); + } + } + + MB_CONVERT_BUF_STORE(buf, out, limit); +} + +static const char *mbfl_encoding_gb18030_aliases[] = {"gb-18030", "gb-18030-2000", NULL}; + +static const struct mbfl_convert_vtbl vtbl_gb18030_wchar = { + mbfl_no_encoding_gb18030, + mbfl_no_encoding_wchar, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_gb18030_wchar, + mbfl_filt_conv_gb18030_wchar_flush, + NULL, +}; + +static const struct mbfl_convert_vtbl vtbl_wchar_gb18030 = { + mbfl_no_encoding_wchar, + mbfl_no_encoding_gb18030, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_wchar_gb18030, + mbfl_filt_conv_common_flush, + NULL, +}; + +const mbfl_encoding mbfl_encoding_gb18030 = { + mbfl_no_encoding_gb18030, + "GB18030", + "GB18030", + mbfl_encoding_gb18030_aliases, + NULL, + MBFL_ENCTYPE_GL_UNSAFE, + &vtbl_gb18030_wchar, + &vtbl_wchar_gb18030, + mb_gb18030_to_wchar, + mb_wchar_to_gb18030, + NULL +}; + +static const char *mbfl_encoding_cp936_aliases[] = {"CP-936", "GBK", NULL}; + +static const struct mbfl_convert_vtbl vtbl_cp936_wchar = { + mbfl_no_encoding_cp936, + mbfl_no_encoding_wchar, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_cp936_wchar, + mbfl_filt_conv_cp936_wchar_flush, + NULL, +}; + +static const struct mbfl_convert_vtbl vtbl_wchar_cp936 = { + mbfl_no_encoding_wchar, + mbfl_no_encoding_cp936, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_wchar_cp936, + mbfl_filt_conv_common_flush, + NULL, +}; + +const mbfl_encoding mbfl_encoding_cp936 = { + mbfl_no_encoding_cp936, + "CP936", + "CP936", + mbfl_encoding_cp936_aliases, + mblen_table_81_to_fe, + MBFL_ENCTYPE_GL_UNSAFE, + &vtbl_cp936_wchar, + &vtbl_wchar_cp936, + mb_cp936_to_wchar, + mb_wchar_to_cp936, + NULL +}; + +/* + * BIG5/CP950 + */ + +/* 63 + 94 = 157 or 94 */ +static unsigned short cp950_pua_tbl[][4] = { + {0xe000, 0xe310, 0xfa40, 0xfefe}, + {0xe311, 0xeeb7, 0x8e40, 0xa0fe}, + {0xeeb8, 0xf6b0, 0x8140, 0x8dfe}, + {0xf6b1, 0xf70e, 0xc6a1, 0xc6fe}, + {0xf70f, 0xf848, 0xc740, 0xc8fe}, +}; + +static inline int is_in_cp950_pua(int c1, int c) +{ + if ((c1 >= 0xfa && c1 <= 0xfe) || (c1 >= 0x8e && c1 <= 0xa0) || (c1 >= 0x81 && c1 <= 0x8d) || (c1 >= 0xc7 && c1 <= 0xc8)) { + return (c >= 0x40 && c <= 0x7e) || (c >= 0xa1 && c <= 0xfe); + } else if (c1 == 0xc6) { + return c >= 0xa1 && c <= 0xfe; + } + return 0; +} + +static int mbfl_filt_conv_big5_wchar(int c, mbfl_convert_filter *filter) +{ + int k, c1, w; + + switch (filter->status) { + case 0: + if (c >= 0 && c < 0x80) { /* latin */ + CK((*filter->output_function)(c, filter->data)); + } else if (filter->from->no_encoding != mbfl_no_encoding_cp950 && c > 0xA0 && c <= 0xF9 && c != 0xC8) { + filter->status = 1; + filter->cache = c; + } else if (filter->from->no_encoding == mbfl_no_encoding_cp950 && c > 0x80 && c <= 0xFE) { + filter->status = 1; + filter->cache = c; + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + case 1: /* dbcs second byte */ + filter->status = 0; + c1 = filter->cache; + if ((c > 0x3f && c < 0x7f) || (c > 0xa0 && c < 0xff)) { + if (c < 0x7f) { + w = (c1 - 0xa1)*157 + (c - 0x40); + } else { + w = (c1 - 0xa1)*157 + (c - 0xa1) + 0x3f; + } + if (w >= 0 && w < big5_ucs_table_size) { + w = big5_ucs_table[w]; + } else { + w = 0; + } + + if (filter->from->no_encoding == mbfl_no_encoding_cp950) { + /* PUA for CP950 */ + if (is_in_cp950_pua(c1, c)) { + int c2 = (c1 << 8) | c; + + for (k = 0; k < sizeof(cp950_pua_tbl) / (sizeof(unsigned short)*4); k++) { + if (c2 >= cp950_pua_tbl[k][2] && c2 <= cp950_pua_tbl[k][3]) { + break; + } + } + + if ((cp950_pua_tbl[k][2] & 0xff) == 0x40) { + w = 157*(c1 - (cp950_pua_tbl[k][2]>>8)) + c - (c >= 0xa1 ? 0x62 : 0x40) + cp950_pua_tbl[k][0]; + } else { + w = c2 - cp950_pua_tbl[k][2] + cp950_pua_tbl[k][0]; + } + } else if (c1 == 0xA1) { + if (c == 0x45) { + w = 0x2027; + } else if (c == 0x4E) { + w = 0xFE51; + } else if (c == 0x5A) { + w = 0x2574; + } else if (c == 0xC2) { + w = 0x00AF; + } else if (c == 0xC3) { + w = 0xFFE3; + } else if (c == 0xC5) { + w = 0x02CD; + } else if (c == 0xE3) { + w = 0xFF5E; + } else if (c == 0xF2) { + w = 0x2295; + } else if (c == 0xF3) { + w = 0x2299; + } else if (c == 0xFE) { + w = 0xFF0F; + } + } else if (c1 == 0xA2) { + if (c == 0x40) { + w = 0xFF3C; + } else if (c == 0x41) { + w = 0x2215; + } else if (c == 0x42) { + w = 0xFE68; + } else if (c == 0x46) { + w = 0xFFE0; + } else if (c == 0x47) { + w = 0xFFE1; + } else if (c == 0xCC) { + w = 0x5341; + } else if (c == 0xCE) { + w = 0x5345; + } + } + } + + if (w <= 0) { + w = MBFL_BAD_INPUT; + } + CK((*filter->output_function)(w, filter->data)); + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + EMPTY_SWITCH_DEFAULT_CASE(); + } + + return 0; +} + +static int mbfl_filt_conv_big5_wchar_flush(mbfl_convert_filter *filter) +{ + if (filter->status == 1) { + /* 2-byte character was truncated */ + filter->status = 0; + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + + if (filter->flush_function) { + (*filter->flush_function)(filter->data); + } + + return 0; +} + +static int mbfl_filt_conv_wchar_big5(int c, mbfl_convert_filter *filter) +{ + int k, s = 0; + + if (c >= ucs_a1_big5_table_min && c < ucs_a1_big5_table_max) { + s = ucs_a1_big5_table[c - ucs_a1_big5_table_min]; + } else if (c >= ucs_a2_big5_table_min && c < ucs_a2_big5_table_max) { + s = ucs_a2_big5_table[c - ucs_a2_big5_table_min]; + } else if (c >= ucs_a3_big5_table_min && c < ucs_a3_big5_table_max) { + s = ucs_a3_big5_table[c - ucs_a3_big5_table_min]; + } else if (c >= ucs_i_big5_table_min && c < ucs_i_big5_table_max) { + s = ucs_i_big5_table[c - ucs_i_big5_table_min]; + } else if (c >= ucs_r1_big5_table_min && c < ucs_r1_big5_table_max) { + s = ucs_r1_big5_table[c - ucs_r1_big5_table_min]; + } else if (c >= ucs_r2_big5_table_min && c < ucs_r2_big5_table_max) { + s = ucs_r2_big5_table[c - ucs_r2_big5_table_min]; + } + + if (filter->to->no_encoding == mbfl_no_encoding_cp950) { + if (c >= 0xe000 && c <= 0xf848) { /* PUA for CP950 */ + for (k = 0; k < sizeof(cp950_pua_tbl) / (sizeof(unsigned short)*4); k++) { + if (c <= cp950_pua_tbl[k][1]) { + break; + } + } + + int c1 = c - cp950_pua_tbl[k][0]; + if ((cp950_pua_tbl[k][2] & 0xff) == 0x40) { + int c2 = cp950_pua_tbl[k][2] >> 8; + s = ((c1 / 157) + c2) << 8; + c1 %= 157; + s |= c1 + (c1 >= 0x3f ? 0x62 : 0x40); + } else { + s = c1 + cp950_pua_tbl[k][2]; + } + } else if (c == 0x00A2) { + s = 0; + } else if (c == 0x00A3) { + s = 0; + } else if (c == 0x00AF) { + s = 0xA1C2; + } else if (c == 0x02CD) { + s = 0xA1C5; + } else if (c == 0x0401) { + s = 0; + } else if (c >= 0x0414 && c <= 0x041C) { + s = 0; + } else if (c >= 0x0423 && c <= 0x044F) { + s = 0; + } else if (c == 0x0451) { + s = 0; + } else if (c == 0x2022) { + s = 0; + } else if (c == 0x2027) { + s = 0xA145; + } else if (c == 0x203E) { + s = 0; + } else if (c == 0x2215) { + s = 0xA241; + } else if (c == 0x223C) { + s = 0; + } else if (c == 0x2295) { + s = 0xA1F2; + } else if (c == 0x2299) { + s = 0xA1F3; + } else if (c >= 0x2460 && c <= 0x247D) { + s = 0; + } else if (c == 0x2574) { + s = 0xA15A; + } else if (c == 0x2609) { + s = 0; + } else if (c == 0x2641) { + s = 0; + } else if (c == 0x3005 || (c >= 0x302A && c <= 0x30FF)) { + s = 0; + } else if (c == 0xFE51) { + s = 0xA14E; + } else if (c == 0xFE68) { + s = 0xA242; + } else if (c == 0xFF3C) { + s = 0xA240; + } else if (c == 0xFF5E) { + s = 0xA1E3; + } else if (c == 0xFF64) { + s = 0; + } else if (c == 0xFFE0) { + s = 0xA246; + } else if (c == 0xFFE1) { + s = 0xA247; + } else if (c == 0xFFE3) { + s = 0xA1C3; + } else if (c == 0xFF0F) { + s = 0xA1FE; + } + } + + if (s <= 0) { + if (c == 0) { + s = 0; + } else { + s = -1; + } + } + + if (s >= 0) { + if (s <= 0x80) { /* latin */ + CK((*filter->output_function)(s, filter->data)); + } else { + CK((*filter->output_function)((s >> 8) & 0xff, filter->data)); + CK((*filter->output_function)(s & 0xff, filter->data)); + } + } else { + CK(mbfl_filt_conv_illegal_output(c, filter)); + } + + return 0; +} + +static size_t mb_big5_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) +{ + unsigned char *p = *in, *e = p + *in_len; + uint32_t *out = buf, *limit = buf + bufsize; + + e--; /* Stop the main loop 1 byte short of the end of the input */ + + while (p < e && out < limit) { + unsigned char c = *p++; + + if (c <= 0x7F) { + *out++ = c; + } else if (c > 0xA0 && c <= 0xF9) { + /* We don't need to check p < e here; it's not possible that this pointer dereference + * will be outside the input string, because of e-- above */ + unsigned char c2 = *p++; + + if ((c2 >= 0x40 && c2 <= 0x7E) || (c2 >= 0xA1 && c2 <= 0xFE)) { + unsigned int w = (c - 0xA1)*157 + c2 - ((c2 <= 0x7E) ? 0x40 : 0xA1 - 0x3F); + ZEND_ASSERT(w < big5_ucs_table_size); + w = big5_ucs_table[w]; + if (!w) { + if (c == 0xC8) { + p--; + } + w = MBFL_BAD_INPUT; + } + *out++ = w; + } else { + *out++ = MBFL_BAD_INPUT; + } + } else { + *out++ = MBFL_BAD_INPUT; + } + } + + /* Finish up last byte of input string if there is one */ + if (p == e && out < limit) { + unsigned char c = *p++; + *out++ = (c <= 0x7F) ? c : MBFL_BAD_INPUT; + } + + *in_len = e - p + 1; + *in = p; + return out - buf; +} + +static void mb_wchar_to_big5(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) +{ + unsigned char *out, *limit; + MB_CONVERT_BUF_LOAD(buf, out, limit); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len); + + while (len--) { + uint32_t w = *in++; + unsigned int s = 0; + + if (w >= ucs_a1_big5_table_min && w < ucs_a1_big5_table_max) { + s = ucs_a1_big5_table[w - ucs_a1_big5_table_min]; + } else if (w >= ucs_a2_big5_table_min && w < ucs_a2_big5_table_max) { + s = ucs_a2_big5_table[w - ucs_a2_big5_table_min]; + } else if (w >= ucs_a3_big5_table_min && w < ucs_a3_big5_table_max) { + s = ucs_a3_big5_table[w - ucs_a3_big5_table_min]; + } else if (w >= ucs_i_big5_table_min && w < ucs_i_big5_table_max) { + s = ucs_i_big5_table[w - ucs_i_big5_table_min]; + } else if (w >= ucs_r1_big5_table_min && w < ucs_r1_big5_table_max) { + s = ucs_r1_big5_table[w - ucs_r1_big5_table_min]; + } else if (w >= ucs_r2_big5_table_min && w < ucs_r2_big5_table_max) { + s = ucs_r2_big5_table[w - ucs_r2_big5_table_min]; + } + + if (!s) { + if (w == 0) { + out = mb_convert_buf_add(out, 0); + } else { + MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_big5); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len); + } + } else if (s <= 0x80) { + out = mb_convert_buf_add(out, s); + } else { + MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2); + out = mb_convert_buf_add2(out, (s >> 8) & 0xFF, s & 0xFF); + } + } + + MB_CONVERT_BUF_STORE(buf, out, limit); +} + +static size_t mb_cp950_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) +{ + unsigned char *p = *in, *e = p + *in_len; + uint32_t *out = buf, *limit = buf + bufsize; + + while (p < e && out < limit) { + unsigned char c = *p++; + + if (c <= 0x7F) { + *out++ = c; + } else if (c > 0x80 && c <= 0xFE && p < e) { + unsigned char c2 = *p++; + + if ((c2 >= 0x40 && c2 <= 0x7E) || (c2 >= 0xA1 && c2 <= 0xFE)) { + unsigned int w = ((c - 0xA1)*157) + c2 - ((c2 <= 0x7E) ? 0x40 : 0xA1 - 0x3F); + w = (w < big5_ucs_table_size) ? big5_ucs_table[w] : 0; + + /* PUA for CP950 */ + if (is_in_cp950_pua(c, c2)) { + unsigned int s = (c << 8) | c2; + + int k; + for (k = 0; k < sizeof(cp950_pua_tbl) / (sizeof(unsigned short)*4); k++) { + if (s >= cp950_pua_tbl[k][2] && s <= cp950_pua_tbl[k][3]) { + break; + } + } + + if ((cp950_pua_tbl[k][2] & 0xFF) == 0x40) { + w = 157*(c - (cp950_pua_tbl[k][2] >> 8)) + c2 - (c2 >= 0xA1 ? 0x62 : 0x40) + cp950_pua_tbl[k][0]; + } else { + w = s - cp950_pua_tbl[k][2] + cp950_pua_tbl[k][0]; + } + } else if (c == 0xA1) { + if (c2 == 0x45) { + w = 0x2027; + } else if (c2 == 0x4E) { + w = 0xFE51; + } else if (c2 == 0x5A) { + w = 0x2574; + } else if (c2 == 0xC2) { + w = 0x00AF; + } else if (c2 == 0xC3) { + w = 0xFFE3; + } else if (c2 == 0xC5) { + w = 0x02CD; + } else if (c2 == 0xE3) { + w = 0xFF5E; + } else if (c2 == 0xF2) { + w = 0x2295; + } else if (c2 == 0xF3) { + w = 0x2299; + } else if (c2 == 0xFE) { + w = 0xFF0F; + } + } else if (c == 0xA2) { + if (c2 == 0x40) { + w = 0xFF3C; + } else if (c2 == 0x41) { + w = 0x2215; + } else if (c2 == 0x42) { + w = 0xFE68; + } else if (c2 == 0x46) { + w = 0xFFE0; + } else if (c2 == 0x47) { + w = 0xFFE1; + } else if (c2 == 0xCC) { + w = 0x5341; + } else if (c2 == 0xCE) { + w = 0x5345; + } + } + + if (!w) + w = MBFL_BAD_INPUT; + *out++ = w; + } else { + *out++ = MBFL_BAD_INPUT; + } + } else { + *out++ = MBFL_BAD_INPUT; + } + } + + *in_len = e - p; + *in = p; + return out - buf; +} + +static void mb_wchar_to_cp950(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) +{ + unsigned char *out, *limit; + MB_CONVERT_BUF_LOAD(buf, out, limit); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len); + + while (len--) { + uint32_t w = *in++; + unsigned int s = 0; + + if (w >= ucs_a1_big5_table_min && w < ucs_a1_big5_table_max) { + s = ucs_a1_big5_table[w - ucs_a1_big5_table_min]; + } else if (w >= ucs_a2_big5_table_min && w < ucs_a2_big5_table_max) { + s = ucs_a2_big5_table[w - ucs_a2_big5_table_min]; + } else if (w >= ucs_a3_big5_table_min && w < ucs_a3_big5_table_max) { + s = ucs_a3_big5_table[w - ucs_a3_big5_table_min]; + } else if (w >= ucs_i_big5_table_min && w < ucs_i_big5_table_max) { + s = ucs_i_big5_table[w - ucs_i_big5_table_min]; + } else if (w >= ucs_r1_big5_table_min && w < ucs_r1_big5_table_max) { + s = ucs_r1_big5_table[w - ucs_r1_big5_table_min]; + } else if (w >= ucs_r2_big5_table_min && w < ucs_r2_big5_table_max) { + s = ucs_r2_big5_table[w - ucs_r2_big5_table_min]; + } + + if (w >= 0xE000 && w <= 0xF848) { + int k; + for (k = 0; k < sizeof(cp950_pua_tbl) / (sizeof(unsigned short)*4); k++) { + if (w <= cp950_pua_tbl[k][1]) { + break; + } + } + + int c1 = w - cp950_pua_tbl[k][0]; + if ((cp950_pua_tbl[k][2] & 0xFF) == 0x40) { + int c2 = cp950_pua_tbl[k][2] >> 8; + s = ((c1 / 157) + c2) << 8; + c1 %= 157; + s |= c1 + (c1 >= 0x3F ? 0x62 : 0x40); + } else { + s = c1 + cp950_pua_tbl[k][2]; + } + } else if (w == 0xA2 || w == 0xA3 || w == 0x401 || (w >= 0x414 && w <= 0x41C) || (w >= 0x423 && w <= 0x44F) || w == 0x451 || w == 0x2022 || w == 0x203E || w == 0x223C || (w >= 0x2460 && w <= 0x247D) || w == 0x2609 || w == 0x2641 || w == 0x3005 || (w >= 0x302A && w <= 0x30FF) || w == 0xFF64) { + s = 0; + } else if (w == 0xAF) { + s = 0xA1C2; + } else if (w == 0x2CD) { + s = 0xA1C5; + } else if (w == 0x2027) { + s = 0xA145; + } else if (w == 0x2215) { + s = 0xA241; + } else if (w == 0x2295) { + s = 0xA1F2; + } else if (w == 0x2299) { + s = 0xA1F3; + } else if (w == 0x2574) { + s = 0xA15A; + } else if (w == 0xFE51) { + s = 0xA14E; + } else if (w == 0xFE68) { + s = 0xA242; + } else if (w == 0xFF3C) { + s = 0xA240; + } else if (w == 0xFF5E) { + s = 0xA1E3; + } else if (w == 0xFFE0) { + s = 0xA246; + } else if (w == 0xFFE1) { + s = 0xA247; + } else if (w == 0xFFE3) { + s = 0xA1C3; + } else if (w == 0xFF0F) { + s = 0xA1FE; + } + + if (!s) { + if (w == 0) { + out = mb_convert_buf_add(out, 0); + } else { + MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_big5); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len); + } + } else if (s <= 0x80) { + out = mb_convert_buf_add(out, s); + } else { + MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2); + out = mb_convert_buf_add2(out, (s >> 8) & 0xFF, s & 0xFF); + } + } + + MB_CONVERT_BUF_STORE(buf, out, limit); +} + +static const char *mbfl_encoding_big5_aliases[] = {"CN-BIG5", "BIG-FIVE", "BIGFIVE", NULL}; + +static const struct mbfl_convert_vtbl vtbl_big5_wchar = { + mbfl_no_encoding_big5, + mbfl_no_encoding_wchar, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_big5_wchar, + mbfl_filt_conv_big5_wchar_flush, + NULL, +}; + +static const struct mbfl_convert_vtbl vtbl_wchar_big5 = { + mbfl_no_encoding_wchar, + mbfl_no_encoding_big5, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_wchar_big5, + mbfl_filt_conv_common_flush, + NULL +}; + +const mbfl_encoding mbfl_encoding_big5 = { + mbfl_no_encoding_big5, + "BIG-5", + "BIG5", + mbfl_encoding_big5_aliases, + mblen_table_81_to_fe, + MBFL_ENCTYPE_GL_UNSAFE, + &vtbl_big5_wchar, + &vtbl_wchar_big5, + mb_big5_to_wchar, + mb_wchar_to_big5, + NULL +}; + +static const struct mbfl_convert_vtbl vtbl_cp950_wchar = { + mbfl_no_encoding_cp950, + mbfl_no_encoding_wchar, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_big5_wchar, + mbfl_filt_conv_big5_wchar_flush, + NULL, +}; + +static const struct mbfl_convert_vtbl vtbl_wchar_cp950 = { + mbfl_no_encoding_wchar, + mbfl_no_encoding_cp950, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_wchar_big5, + mbfl_filt_conv_common_flush, + NULL, +}; + +const mbfl_encoding mbfl_encoding_cp950 = { + mbfl_no_encoding_cp950, + "CP950", + "BIG5", + NULL, + mblen_table_81_to_fe, + MBFL_ENCTYPE_GL_UNSAFE, + &vtbl_cp950_wchar, + &vtbl_wchar_cp950, + mb_cp950_to_wchar, + mb_wchar_to_cp950, + NULL +}; + +/* + * HZ + */ + +static int mbfl_filt_conv_hz_wchar(int c, mbfl_convert_filter *filter) +{ + int c1, s, w; + + switch (filter->status & 0xf) { + /* case 0x00: ASCII */ + /* case 0x10: GB2312 */ + case 0: + if (c == '~') { + filter->status += 2; + } else if (filter->status == 0x10 && ((c > 0x20 && c <= 0x29) || (c >= 0x30 && c <= 0x77))) { + /* DBCS first char */ + filter->cache = c; + filter->status += 1; + } else if (filter->status == 0 && c >= 0 && c < 0x80) { /* latin, CTLs */ + CK((*filter->output_function)(c, filter->data)); + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + /* case 0x11: GB2312 second char */ + case 1: + filter->status &= ~0xf; + c1 = filter->cache; + if (c1 > 0x20 && c1 < 0x7F && c > 0x20 && c < 0x7F) { + s = (c1 - 1)*192 + c + 0x40; /* GB2312 */ + ZEND_ASSERT(s < cp936_ucs_table_size); + if (s == 0x1864) { + w = 0x30FB; + } else if (s == 0x186A) { + w = 0x2015; + } else if (s == 0x186C) { + w = 0x2225; + } else if ((s >= 0x1920 && s <= 0x192A) || s == 0x1963 || (s >= 0x1C60 && s <= 0x1C7F) || (s >= 0x1DBB && s <= 0x1DC4)) { + w = 0; + } else { + w = cp936_ucs_table[s]; + } + + if (w <= 0) { + w = MBFL_BAD_INPUT; + } + + CK((*filter->output_function)(w, filter->data)); + } else { + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + /* '~' */ + case 2: + if (c == '}' && filter->status == 0x12) { + filter->status = 0; + } else if (c == '{' && filter->status == 2) { + filter->status = 0x10; + } else if (c == '~' && filter->status == 2) { + CK((*filter->output_function)('~', filter->data)); + filter->status -= 2; + } else if (c == '\n') { + /* "~\n" is a line continuation; no output is needed, nor should we shift modes */ + filter->status -= 2; + } else { + /* Invalid character after ~ */ + filter->status -= 2; + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + break; + + EMPTY_SWITCH_DEFAULT_CASE(); + } + + return 0; +} + +static int mbfl_filt_conv_hz_wchar_flush(mbfl_convert_filter *filter) +{ + if (filter->status == 0x11) { + /* 2-byte character was truncated */ + CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); + } + + filter->status = 0; + + if (filter->flush_function) { + (*filter->flush_function)(filter->data); + } + + return 0; +} + +static int mbfl_filt_conv_wchar_hz(int c, mbfl_convert_filter *filter) +{ + int s = 0; + + if (c >= ucs_a1_cp936_table_min && c < ucs_a1_cp936_table_max) { + if (c == 0xB7 || c == 0x144 || c == 0x148 || c == 0x251 || c == 0x261 || c == 0x2CA || c == 0x2CB || c == 0x2D9) { + s = 0; + } else { + s = ucs_a1_cp936_table[c - ucs_a1_cp936_table_min]; + } + } else if (c >= ucs_a2_cp936_table_min && c < ucs_a2_cp936_table_max) { + if (c == 0x2015) { + s = 0xA1AA; + } else if (c == 0x2010 || c == 0x2013 || c == 0x2014 || c == 0x2016 || c == 0x2025 || c == 0x2035 || + c == 0x2105 || c == 0x2109 || c == 0x2121 || (c >= 0x2170 && c <= 0x2179) || (c >= 0x2196 && c <= 0x2199) || + c == 0x2215 || c == 0x221F || c == 0x2223 || c == 0x2252 || c == 0x2266 || c == 0x2267 || c == 0x2295 || + (c >= 0x2550 && c <= 0x2573) || c == 0x22BF || c == 0x2609 || (c >= 0x2581 && c <= 0x258F) || + (c >= 0x2593 && c <= 0x2595) || c == 0x25BC || c == 0x25BD || (c >= 0x25E2 && c <= 0x25E5)) { + s = 0; + } else { + s = ucs_a2_cp936_table[c - ucs_a2_cp936_table_min]; + } + } else if (c >= ucs_a3_cp936_table_min && c < ucs_a3_cp936_table_max) { + if (c == 0x30FB) { + s = 0xA1A4; + } else if (c == 0x3006 || c == 0x3007 || c == 0x3012 || c == 0x3231 || c == 0x32A3 || c >= 0x3300 || + (c >= 0x3018 && c <= 0x3040) || (c >= 0x309B && c <= 0x309E) || (c >= 0x30FC && c <= 0x30FE)) { + s = 0; + } else { + s = ucs_a3_cp936_table[c - ucs_a3_cp936_table_min]; + } + } else if (c >= ucs_i_gb2312_table_min && c < ucs_i_gb2312_table_max) { + s = ucs_i_gb2312_table[c - ucs_i_gb2312_table_min]; + } else if (c >= ucs_hff_cp936_table_min && c < ucs_hff_cp936_table_max) { + if (c == 0xFF04) { + s = 0xA1E7; + } else if (c == 0xFF5E) { + s = 0xA1AB; + } else if (c >= 0xFF01 && c <= 0xFF5D) { + s = c - 0xFF01 + 0xA3A1; + } else if (c == 0xFFE0 || c == 0xFFE1 || c == 0xFFE3 || c == 0xFFE5) { + s = ucs_hff_s_cp936_table[c - 0xFFE0]; + } + } + + if (s & 0x8000) { + s -= 0x8080; + } + + if (s <= 0) { + s = (c == 0) ? 0 : -1; + } else if ((s >= 0x80 && s < 0x2121) || s > 0x8080) { + s = -1; + } + + if (s >= 0) { + if (s < 0x80) { /* ASCII */ + if ((filter->status & 0xff00) != 0) { + CK((*filter->output_function)('~', filter->data)); + CK((*filter->output_function)('}', filter->data)); + } + filter->status = 0; + if (s == 0x7E) { + CK((*filter->output_function)('~', filter->data)); + } + CK((*filter->output_function)(s, filter->data)); + } else { /* GB 2312-80 */ + if ((filter->status & 0xFF00) != 0x200) { + CK((*filter->output_function)('~', filter->data)); + CK((*filter->output_function)('{', filter->data)); + } + filter->status = 0x200; + CK((*filter->output_function)((s >> 8) & 0x7F, filter->data)); + CK((*filter->output_function)(s & 0x7F, filter->data)); + } + } else { + CK(mbfl_filt_conv_illegal_output(c, filter)); + } + + return 0; +} + +static int mbfl_filt_conv_any_hz_flush(mbfl_convert_filter *filter) +{ + /* back to latin */ + if (filter->status & 0xFF00) { + CK((*filter->output_function)('~', filter->data)); + CK((*filter->output_function)('}', filter->data)); + } + filter->status = 0; + return 0; +} + +#define ASCII 0 +#define GB2312 1 + +static size_t mb_hz_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) +{ + unsigned char *p = *in, *e = p + *in_len; + uint32_t *out = buf, *limit = buf + bufsize; + + while (p < e && out < limit) { + unsigned char c = *p++; + + if (c == '~') { + if (p == e) { + break; + } + unsigned char c2 = *p++; + + if (c2 == '}' && *state == GB2312) { + *state = ASCII; + } else if (c2 == '{' && *state == ASCII) { + *state = GB2312; + } else if (c2 == '~' && *state == ASCII) { + *out++ = '~'; + } else if (c2 == '\n') { + /* "~\n" is a line continuation; no output is needed, nor should we shift modes */ + } else { + /* Invalid character after ~ */ + *out++ = MBFL_BAD_INPUT; + } + } else if (((c > 0x20 && c <= 0x29) || (c >= 0x30 && c <= 0x77)) && p < e && *state == GB2312) { + unsigned char c2 = *p++; + + if (c > 0x20 && c < 0x7F && c2 > 0x20 && c2 < 0x7F) { + unsigned int s = (c - 1)*192 + c2 + 0x40; + ZEND_ASSERT(s < cp936_ucs_table_size); + + if (s == 0x1864) { + s = 0x30FB; + } else if (s == 0x186A) { + s = 0x2015; + } else if (s == 0x186C) { + s = 0x2225; + } else if ((s >= 0x1920 && s <= 0x192A) || s == 0x1963 || (s >= 0x1C60 && s <= 0x1C7F) || (s >= 0x1DBB && s <= 0x1DC4)) { + s = 0; + } else { + s = cp936_ucs_table[s]; + } + if (!s) + s = MBFL_BAD_INPUT; + *out++ = s; + } else { + *out++ = MBFL_BAD_INPUT; + } + } else if (c < 0x80 && *state == ASCII) { + *out++ = c; + } else { + *out++ = MBFL_BAD_INPUT; + } + } + + *in_len = e - p; + *in = p; + return out - buf; +} + +static void mb_wchar_to_hz(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) +{ + unsigned char *out, *limit; + MB_CONVERT_BUF_LOAD(buf, out, limit); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len); + + while (len--) { + uint32_t w = *in++; + unsigned int s = 0; + + if (w >= ucs_a1_cp936_table_min && w < ucs_a1_cp936_table_max) { + if (w == 0xB7 || w == 0x144 || w == 0x148 || w == 0x251 || w == 0x261 || w == 0x2CA || w == 0x2CB || w == 0x2D9) { + s = 0; + } else { + s = ucs_a1_cp936_table[w - ucs_a1_cp936_table_min]; + } + } else if (w >= ucs_a2_cp936_table_min && w < ucs_a2_cp936_table_max) { + if (w == 0x2015) { + s = 0xA1AA; + } else if (w == 0x2010 || w == 0x2013 || w == 0x2014 || w == 0x2016 || w == 0x2025 || w == 0x2035 || w == 0x2105 || w == 0x2109 || w == 0x2121 || (w >= 0x2170 && w <= 0x2179) || (w >= 0x2196 && w <= 0x2199) || w == 0x2215 || w == 0x221F || w == 0x2223 || w == 0x2252 || w == 0x2266 || w == 0x2267 || w == 0x2295 || (w >= 0x2550 && w <= 0x2573) || w == 0x22BF || w == 0x2609 || (w >= 0x2581 && w <= 0x258F) || (w >= 0x2593 && w <= 0x2595) || w == 0x25BC || w == 0x25BD || (w >= 0x25E2 && w <= 0x25E5)) { + s = 0; + } else { + s = ucs_a2_cp936_table[w - ucs_a2_cp936_table_min]; + } + } else if (w >= ucs_a3_cp936_table_min && w < ucs_a3_cp936_table_max) { + if (w == 0x30FB) { + s = 0xA1A4; + } else if (w == 0x3006 || w == 0x3007 || w == 0x3012 || w == 0x3231 || w == 0x32A3 || w >= 0x3300 || (w >= 0x3018 && w <= 0x3040) || (w >= 0x309B && w <= 0x309E) || (w >= 0x30FC && w <= 0x30FE)) { + s = 0; + } else { + s = ucs_a3_cp936_table[w - ucs_a3_cp936_table_min]; + } + } else if (w >= ucs_i_gb2312_table_min && w < ucs_i_gb2312_table_max) { + s = ucs_i_gb2312_table[w - ucs_i_gb2312_table_min]; + } else if (w >= ucs_hff_cp936_table_min && w < ucs_hff_cp936_table_max) { + if (w == 0xFF04) { + s = 0xA1E7; + } else if (w == 0xFF5E) { + s = 0xA1AB; + } else if (w >= 0xFF01 && w <= 0xFF5D) { + s = w - 0xFF01 + 0xA3A1; + } else if (w == 0xFFE0 || w == 0xFFE1 || w == 0xFFE3 || w == 0xFFE5) { + s = ucs_hff_s_cp936_table[w - 0xFFE0]; + } + } + + s &= ~0x8080; + + if ((!s && w) || (s >= 0x80 && s < 0x2121)) { + MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_hz); + MB_CONVERT_BUF_ENSURE(buf, out, limit, len); + } else if (s < 0x80) { + /* ASCII */ + if (buf->state != ASCII) { + MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 3); + out = mb_convert_buf_add2(out, '~', '}'); + buf->state = ASCII; + } + if (s == '~') { + MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2); + out = mb_convert_buf_add2(out, '~', '~'); + } else { + out = mb_convert_buf_add(out, s); + } + } else { + /* GB 2312-80 */ + if (buf->state != GB2312) { + MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 4); + out = mb_convert_buf_add2(out, '~', '{'); + buf->state = GB2312; + } else { + MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2); + } + out = mb_convert_buf_add2(out, (s >> 8) & 0x7F, s & 0x7F); + } + } + + if (end && buf->state != ASCII) { + /* If not in ASCII state, need to emit closing control chars */ + MB_CONVERT_BUF_ENSURE(buf, out, limit, 2); + out = mb_convert_buf_add2(out, '~', '}'); + } + + MB_CONVERT_BUF_STORE(buf, out, limit); +} + +static const struct mbfl_convert_vtbl vtbl_hz_wchar = { + mbfl_no_encoding_hz, + mbfl_no_encoding_wchar, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_hz_wchar, + mbfl_filt_conv_hz_wchar_flush, + NULL, +}; + +static const struct mbfl_convert_vtbl vtbl_wchar_hz = { + mbfl_no_encoding_wchar, + mbfl_no_encoding_hz, + mbfl_filt_conv_common_ctor, + NULL, + mbfl_filt_conv_wchar_hz, + mbfl_filt_conv_any_hz_flush, + NULL, +}; + +const mbfl_encoding mbfl_encoding_hz = { + mbfl_no_encoding_hz, + "HZ", + "HZ-GB-2312", + NULL, + NULL, + MBFL_ENCTYPE_GL_UNSAFE, + &vtbl_hz_wchar, + &vtbl_wchar_hz, + mb_hz_to_wchar, + mb_wchar_to_hz, + NULL +}; diff --git a/ext/mbstring/libmbfl/filters/mbfilter_cjk.h b/ext/mbstring/libmbfl/filters/mbfilter_cjk.h new file mode 100644 index 0000000000000..0749b755e3cce --- /dev/null +++ b/ext/mbstring/libmbfl/filters/mbfilter_cjk.h @@ -0,0 +1,48 @@ +#ifndef MBFL_MBFILTER_CJK_H +#define MBFL_MBFILTER_CJK_H + +#include "mbfilter.h" + +extern const mbfl_encoding mbfl_encoding_jis; +extern const mbfl_encoding mbfl_encoding_2022jp; +extern const mbfl_encoding mbfl_encoding_2022jp_kddi; +extern const mbfl_encoding mbfl_encoding_2022jpms; +extern const mbfl_encoding mbfl_encoding_2022jp_2004; +extern const mbfl_encoding mbfl_encoding_cp50220; +extern const mbfl_encoding mbfl_encoding_cp50221; +extern const mbfl_encoding mbfl_encoding_cp50222; +extern const mbfl_encoding mbfl_encoding_2022kr; + +extern const mbfl_encoding mbfl_encoding_sjis; +extern const mbfl_encoding mbfl_encoding_sjis_mac; +extern const mbfl_encoding mbfl_encoding_sjis_docomo; +extern const mbfl_encoding mbfl_encoding_sjis_kddi; +extern const mbfl_encoding mbfl_encoding_sjis_sb; +extern const mbfl_encoding mbfl_encoding_sjis2004; +extern const mbfl_encoding mbfl_encoding_cp932; +extern const mbfl_encoding mbfl_encoding_sjiswin; + +extern const mbfl_encoding mbfl_encoding_euc_jp; +extern const mbfl_encoding mbfl_encoding_eucjp_win; +extern const mbfl_encoding mbfl_encoding_eucjp2004; +extern const mbfl_encoding mbfl_encoding_cp51932; +extern const mbfl_encoding mbfl_encoding_euc_cn; +extern const mbfl_encoding mbfl_encoding_euc_tw; +extern const mbfl_encoding mbfl_encoding_euc_kr; +extern const mbfl_encoding mbfl_encoding_uhc; + +extern const mbfl_encoding mbfl_encoding_gb18030; +extern const mbfl_encoding mbfl_encoding_cp936; +extern const mbfl_encoding mbfl_encoding_big5; +extern const mbfl_encoding mbfl_encoding_cp950; +extern const mbfl_encoding mbfl_encoding_hz; + +int mbfilter_sjis_emoji_docomo2unicode(int s, int *snd); +int mbfilter_sjis_emoji_kddi2unicode(int s, int *snd); +int mbfilter_sjis_emoji_sb2unicode(int s, int *snd); + +int mbfilter_unicode2sjis_emoji_docomo(int c, int *s1, mbfl_convert_filter *filter); +int mbfilter_unicode2sjis_emoji_kddi_sjis(int c, int *s1, mbfl_convert_filter *filter); +int mbfilter_unicode2sjis_emoji_sb(int c, int *s1, mbfl_convert_filter *filter); + +#endif /* MBFL_MBFILTER_CJK_H */ diff --git a/ext/mbstring/libmbfl/filters/mbfilter_cp5022x.c b/ext/mbstring/libmbfl/filters/mbfilter_cp5022x.c deleted file mode 100644 index 93c33da9543d0..0000000000000 --- a/ext/mbstring/libmbfl/filters/mbfilter_cp5022x.c +++ /dev/null @@ -1,1252 +0,0 @@ -/* - * "streamable kanji code filter and converter" - * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. - * - * LICENSE NOTICES - * - * This file is part of "streamable kanji code filter and converter", - * which is distributed under the terms of GNU Lesser General Public - * License (version 2) as published by the Free Software Foundation. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with "streamable kanji code filter and converter"; - * if not, write to the Free Software Foundation, Inc., 59 Temple Place, - * Suite 330, Boston, MA 02111-1307 USA - * - * The author of this file: Moriyoshi Koizumi - * - */ - -#include "mbfilter.h" -#include "mbfilter_cp5022x.h" -#include "mbfilter_jis.h" - -#include "unicode_table_cp932_ext.h" -#include "unicode_table_jis.h" -#include "cp932_table.h" -#include "translit_kana_jisx0201_jisx0208.h" - -static int mbfl_filt_conv_cp5022x_wchar_flush(mbfl_convert_filter *filter); -static int mbfl_filt_conv_wchar_cp50220_flush(mbfl_convert_filter *filter); -static int mbfl_filt_conv_wchar_cp50222_flush(mbfl_convert_filter *filter); -static int mbfl_filt_conv_wchar_cp50220(int c, mbfl_convert_filter *filter); -static size_t mb_cp5022x_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state); -static void mb_wchar_to_cp50220(uint32_t *in, size_t len, mb_convert_buf *buf, bool end); -static void mb_wchar_to_cp50221(uint32_t *in, size_t len, mb_convert_buf *buf, bool end); -static void mb_wchar_to_cp50222(uint32_t *in, size_t len, mb_convert_buf *buf, bool end); - -/* Previously, a dubious 'encoding' called 'cp50220raw' was supported - * This was just CP50220, but the implementation was less strict regarding - * invalid characters; it would silently pass some through - * This 'encoding' only existed in mbstring. In case some poor, lost soul is - * still using it, retain minimal support by aliasing it to CP50220 - * - * Further, mbstring also had a made-up encoding called "JIS-ms" - * This was the same as CP5022{0,1,2}, but without their special ways of - * handling conversion of Unicode half-width katakana */ -static const char *cp50220_aliases[] = {"cp50220raw", "cp50220-raw", "JIS-ms", NULL}; - -const mbfl_encoding mbfl_encoding_cp50220 = { - mbfl_no_encoding_cp50220, - "CP50220", - "ISO-2022-JP", - cp50220_aliases, - NULL, - MBFL_ENCTYPE_GL_UNSAFE, - &vtbl_cp50220_wchar, - &vtbl_wchar_cp50220, - mb_cp5022x_to_wchar, - mb_wchar_to_cp50220, - NULL -}; - -const mbfl_encoding mbfl_encoding_cp50221 = { - mbfl_no_encoding_cp50221, - "CP50221", - "ISO-2022-JP", - NULL, - NULL, - MBFL_ENCTYPE_GL_UNSAFE, - &vtbl_cp50221_wchar, - &vtbl_wchar_cp50221, - mb_cp5022x_to_wchar, - mb_wchar_to_cp50221, - NULL -}; - -const mbfl_encoding mbfl_encoding_cp50222 = { - mbfl_no_encoding_cp50222, - "CP50222", - "ISO-2022-JP", - NULL, - NULL, - MBFL_ENCTYPE_GL_UNSAFE, - &vtbl_cp50222_wchar, - &vtbl_wchar_cp50222, - mb_cp5022x_to_wchar, - mb_wchar_to_cp50222, - NULL -}; - -const struct mbfl_convert_vtbl vtbl_cp50220_wchar = { - mbfl_no_encoding_cp50220, - mbfl_no_encoding_wchar, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_cp5022x_wchar, - mbfl_filt_conv_cp5022x_wchar_flush, - NULL, -}; - -const struct mbfl_convert_vtbl vtbl_wchar_cp50220 = { - mbfl_no_encoding_wchar, - mbfl_no_encoding_cp50220, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_wchar_cp50220, - mbfl_filt_conv_wchar_cp50220_flush, - NULL, -}; - -const struct mbfl_convert_vtbl vtbl_cp50221_wchar = { - mbfl_no_encoding_cp50221, - mbfl_no_encoding_wchar, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_cp5022x_wchar, - mbfl_filt_conv_cp5022x_wchar_flush, - NULL, -}; - -const struct mbfl_convert_vtbl vtbl_wchar_cp50221 = { - mbfl_no_encoding_wchar, - mbfl_no_encoding_cp50221, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_wchar_cp50221, - mbfl_filt_conv_any_jis_flush, - NULL, -}; - -const struct mbfl_convert_vtbl vtbl_cp50222_wchar = { - mbfl_no_encoding_cp50222, - mbfl_no_encoding_wchar, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_cp5022x_wchar, - mbfl_filt_conv_cp5022x_wchar_flush, - NULL, -}; - -const struct mbfl_convert_vtbl vtbl_wchar_cp50222 = { - mbfl_no_encoding_wchar, - mbfl_no_encoding_cp50222, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_wchar_cp50222, - mbfl_filt_conv_wchar_cp50222_flush, - NULL, -}; - -#define CK(statement) do { if ((statement) < 0) return (-1); } while (0) - -int mbfl_filt_conv_cp5022x_wchar(int c, mbfl_convert_filter *filter) -{ - int c1, s, w; - -retry: - switch (filter->status & 0xf) { -/* case 0x00: ASCII */ -/* case 0x10: X 0201 latin */ -/* case 0x20: X 0201 kana */ -/* case 0x80: X 0208 */ -/* case 0x90: X 0212 */ - case 0: - if (c == 0x1b) { - filter->status += 2; - } else if (c == 0x0e) { /* "kana in" */ - filter->status = 0x20; - } else if (c == 0x0f) { /* "kana out" */ - filter->status = 0; - } else if (filter->status == 0x10 && c == 0x5c) { /* YEN SIGN */ - CK((*filter->output_function)(0xa5, filter->data)); - } else if (filter->status == 0x10 && c == 0x7e) { /* OVER LINE */ - CK((*filter->output_function)(0x203e, filter->data)); - } else if (filter->status == 0x20 && c > 0x20 && c < 0x60) { /* kana */ - CK((*filter->output_function)(0xff40 + c, filter->data)); - } else if ((filter->status == 0x80 || filter->status == 0x90) && c > 0x20 && c <= 0x97) { /* kanji first char */ - filter->cache = c; - filter->status += 1; - } else if (c >= 0 && c < 0x80) { /* latin, CTLs */ - CK((*filter->output_function)(c, filter->data)); - } else if (c > 0xa0 && c < 0xe0) { /* GR kana */ - CK((*filter->output_function)(0xfec0 + c, filter->data)); - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - -/* case 0x81: X 0208 second char */ -/* case 0x91: X 0212 second char */ - case 1: - filter->status &= ~0xf; - c1 = filter->cache; - if (c > 0x20 && c < 0x7f) { - s = (c1 - 0x21)*94 + c - 0x21; - if (filter->status == 0x80) { - if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) { - w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min]; - } else if (s >= 0 && s < jisx0208_ucs_table_size) { - w = jisx0208_ucs_table[s]; - } else if (s >= cp932ext2_ucs_table_min && s < cp932ext2_ucs_table_max) { - w = cp932ext2_ucs_table[s - cp932ext2_ucs_table_min]; - } else if (s >= cp932ext3_ucs_table_min && s < cp932ext3_ucs_table_max) { - w = cp932ext3_ucs_table[s - cp932ext3_ucs_table_min]; - } else if (s >= 94 * 94 && s < 114 * 94) { - /* user-defined => PUA (Microsoft extended) */ - w = s - 94*94 + 0xe000; - } else { - w = 0; - } - - if (w <= 0) { - w = MBFL_BAD_INPUT; - } - } else { - if (s >= 0 && s < jisx0212_ucs_table_size) { - w = jisx0212_ucs_table[s]; - } else { - w = 0; - } - - if (w <= 0) { - w = MBFL_BAD_INPUT; - } - } - CK((*filter->output_function)(w, filter->data)); - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - /* ESC */ -/* case 0x02: */ -/* case 0x12: */ -/* case 0x22: */ -/* case 0x82: */ -/* case 0x92: */ - case 2: - if (c == 0x24) { /* '$' */ - filter->status++; - } else if (c == 0x28) { /* '(' */ - filter->status += 3; - } else { - filter->status &= ~0xf; - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - goto retry; - } - break; - - /* ESC $ */ -/* case 0x03: */ -/* case 0x13: */ -/* case 0x23: */ -/* case 0x83: */ -/* case 0x93: */ - case 3: - if (c == 0x40 || c == 0x42) { /* '@' or 'B' */ - filter->status = 0x80; - } else if (c == 0x28) { /* '(' */ - filter->status++; - } else { - filter->status &= ~0xf; - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - CK((*filter->output_function)(0x24, filter->data)); - goto retry; - } - break; - - /* ESC $ ( */ -/* case 0x04: */ -/* case 0x14: */ -/* case 0x24: */ -/* case 0x84: */ -/* case 0x94: */ - case 4: - if (c == 0x40 || c == 0x42) { /* '@' or 'B' */ - filter->status = 0x80; - } else if (c == 0x44) { /* 'D' */ - filter->status = 0x90; - } else { - filter->status &= ~0xf; - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - CK((*filter->output_function)(0x24, filter->data)); - CK((*filter->output_function)(0x28, filter->data)); - goto retry; - } - break; - - /* ESC ( */ -/* case 0x05: */ -/* case 0x15: */ -/* case 0x25: */ -/* case 0x85: */ -/* case 0x95: */ - case 5: - if (c == 0x42 || c == 0x48) { /* 'B' or 'H' */ - filter->status = 0; - } else if (c == 0x4a) { /* 'J' */ - filter->status = 0x10; - } else if (c == 0x49) { /* 'I' */ - filter->status = 0x20; - } else { - filter->status &= ~0xf; - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - CK((*filter->output_function)(0x28, filter->data)); - goto retry; - } - break; - - EMPTY_SWITCH_DEFAULT_CASE(); - } - - return 0; -} - -static int mbfl_filt_conv_cp5022x_wchar_flush(mbfl_convert_filter *filter) -{ - if (filter->status & 0xF) { - /* 2-byte (JIS X 0208 or 0212) character was truncated, or else - * escape sequence was truncated */ - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - filter->status = 0; - - if (filter->flush_function) { - (*filter->flush_function)(filter->data); - } - - return 0; -} - -/* Apply various transforms to input codepoint, such as converting halfwidth katakana - * to fullwidth katakana. `mode` is a bitfield which controls which transforms are - * actually performed. The bit values are defined in translit_kana_jisx0201_jisx0208.h. - * `mode` must not call for transforms which are inverses (i.e. which would cancel - * each other out). - * - * In some cases, successive input codepoints may be merged into one output codepoint. - * (That is the purpose of the `next` parameter.) If the `next` codepoint is consumed - * and should be skipped over, `*consumed` will be set to true. Otherwise, `*consumed` - * will not be modified. If there is no following codepoint, `next` should be zero. - * - * Again, in some cases, one input codepoint may convert to two output codepoints. - * If so, the second output codepoint will be stored in `*second`. - * - * Return the resulting codepoint. If none of the requested transforms apply, return - * the input codepoint unchanged. - */ -uint32_t mb_convert_kana_codepoint(uint32_t c, uint32_t next, bool *consumed, uint32_t *second, unsigned int mode) -{ - if ((mode & MBFL_HAN2ZEN_ALL) && c >= 0x21 && c <= 0x7D && c != '"' && c != '\'' && c != '\\') { - return c + 0xFEE0; - } - if ((mode & MBFL_HAN2ZEN_ALPHA) && ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'))) { - return c + 0xFEE0; - } - if ((mode & MBFL_HAN2ZEN_NUMERIC) && c >= '0' && c <= '9') { - return c + 0xFEE0; - } - if ((mode & MBFL_HAN2ZEN_SPACE) && c == ' ') { - return 0x3000; - } - - if (mode & (MBFL_HAN2ZEN_KATAKANA | MBFL_HAN2ZEN_HIRAGANA)) { - /* Convert Hankaku kana to Zenkaku kana - * Either all Hankaku kana (including katakana and hiragana) will be converted - * to Zenkaku katakana, or to Zenkaku hiragana */ - if ((mode & MBFL_HAN2ZEN_KATAKANA) && (mode & MBFL_HAN2ZEN_GLUE)) { - if (c >= 0xFF61 && c <= 0xFF9F) { - int n = c - 0xFF60; - - if (next >= 0xFF61 && next <= 0xFF9F) { - if (next == 0xFF9E && ((n >= 22 && n <= 36) || (n >= 42 && n <= 46))) { - *consumed = true; - return 0x3001 + hankana2zenkana_table[n]; - } - if (next == 0xFF9E && n == 19) { - *consumed = true; - return 0x30F4; - } - if (next == 0xFF9F && n >= 42 && n <= 46) { - *consumed = true; - return 0x3002 + hankana2zenkana_table[n]; - } - } - - return 0x3000 + hankana2zenkana_table[n]; - } - } - if ((mode & MBFL_HAN2ZEN_HIRAGANA) && (mode & MBFL_HAN2ZEN_GLUE)) { - if (c >= 0xFF61 && c <= 0xFF9F) { - int n = c - 0xFF60; - - if (next >= 0xFF61 && next <= 0xFF9F) { - if (next == 0xFF9E && ((n >= 22 && n <= 36) || (n >= 42 && n <= 46))) { - *consumed = true; - return 0x3001 + hankana2zenhira_table[n]; - } - if (next == 0xFF9F && n >= 42 && n <= 46) { - *consumed = true; - return 0x3002 + hankana2zenhira_table[n]; - } - } - - return 0x3000 + hankana2zenhira_table[n]; - } - } - if ((mode & MBFL_HAN2ZEN_KATAKANA) && c >= 0xFF61 && c <= 0xFF9F) { - return 0x3000 + hankana2zenkana_table[c - 0xFF60]; - } - if ((mode & MBFL_HAN2ZEN_HIRAGANA) && c >= 0xFF61 && c <= 0xFF9F) { - return 0x3000 + hankana2zenhira_table[c - 0xFF60]; - } - } - - if (mode & MBFL_HAN2ZEN_SPECIAL) { /* special ascii to symbol */ - if (c == '\\' || c == 0xA5) { /* YEN SIGN */ - return 0xFFE5; /* FULLWIDTH YEN SIGN */ - } - if (c == 0x7E || c == 0x203E) { - return 0xFFE3; /* FULLWIDTH MACRON */ - } - if (c == '\'') { - return 0x2019; /* RIGHT SINGLE QUOTATION MARK */ - } - if (c == '"') { - return 0x201D; /* RIGHT DOUBLE QUOTATION MARK */ - } - } - - if (mode & (MBFL_ZEN2HAN_ALL | MBFL_ZEN2HAN_ALPHA | MBFL_ZEN2HAN_NUMERIC | MBFL_ZEN2HAN_SPACE)) { - /* Zenkaku to Hankaku */ - if ((mode & MBFL_ZEN2HAN_ALL) && c >= 0xFF01 && c <= 0xFF5D && c != 0xFF02 && c != 0xFF07 && c != 0xFF3C) { - /* all except " ' \ ~ */ - return c - 0xFEE0; - } - if ((mode & MBFL_ZEN2HAN_ALPHA) && ((c >= 0xFF21 && c <= 0xFF3A) || (c >= 0xFF41 && c <= 0xFF5A))) { - return c - 0xFEE0; - } - if ((mode & MBFL_ZEN2HAN_NUMERIC) && (c >= 0xFF10 && c <= 0xFF19)) { - return c - 0xFEE0; - } - if ((mode & MBFL_ZEN2HAN_SPACE) && (c == 0x3000)) { - return ' '; - } - if ((mode & MBFL_ZEN2HAN_ALL) && (c == 0x2212)) { /* MINUS SIGN */ - return '-'; - } - } - - if (mode & (MBFL_ZEN2HAN_KATAKANA | MBFL_ZEN2HAN_HIRAGANA)) { - /* Zenkaku kana to hankaku kana */ - if ((mode & MBFL_ZEN2HAN_KATAKANA) && c >= 0x30A1 && c <= 0x30F4) { - /* Zenkaku katakana to hankaku kana */ - int n = c - 0x30A1; - if (zenkana2hankana_table[n][1]) { - *second = 0xFF00 + zenkana2hankana_table[n][1]; - } - return 0xFF00 + zenkana2hankana_table[n][0]; - } - if ((mode & MBFL_ZEN2HAN_HIRAGANA) && c >= 0x3041 && c <= 0x3093) { - /* Zenkaku hiragana to hankaku kana */ - int n = c - 0x3041; - if (zenkana2hankana_table[n][1]) { - *second = 0xFF00 + zenkana2hankana_table[n][1]; - } - return 0xFF00 + zenkana2hankana_table[n][0]; - } - if (c == 0x3001) { - return 0xFF64; /* HALFWIDTH IDEOGRAPHIC COMMA */ - } - if (c == 0x3002) { - return 0xFF61; /* HALFWIDTH IDEOGRAPHIC FULL STOP */ - } - if (c == 0x300C) { - return 0xFF62; /* HALFWIDTH LEFT CORNER BRACKET */ - } - if (c == 0x300D) { - return 0xFF63; /* HALFWIDTH RIGHT CORNER BRACKET */ - } - if (c == 0x309B) { - return 0xFF9E; /* HALFWIDTH KATAKANA VOICED SOUND MARK */ - } - if (c == 0x309C) { - return 0xff9f; /* HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK */ - } - if (c == 0x30FC) { - return 0xFF70; /* HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK */ - } - if (c == 0x30FB) { - return 0xFF65; /* HALFWIDTH KATAKANA MIDDLE DOT */ - } - } - - if (mode & (MBFL_ZENKAKU_HIRA2KATA | MBFL_ZENKAKU_KATA2HIRA)) { - if ((mode & MBFL_ZENKAKU_HIRA2KATA) && ((c >= 0x3041 && c <= 0x3093) || c == 0x309D || c == 0x309E)) { - /* Zenkaku hiragana to Zenkaku katakana */ - return c + 0x60; - } - if ((mode & MBFL_ZENKAKU_KATA2HIRA) && ((c >= 0x30A1 && c <= 0x30F3) || c == 0x30FD || c == 0x30FE)) { - /* Zenkaku katakana to Zenkaku hiragana */ - return c - 0x60; - } - } - - if (mode & MBFL_ZEN2HAN_SPECIAL) { /* special symbol to ascii */ - if (c == 0xFFE5 || c == 0xFF3C) { /* FULLWIDTH YEN SIGN/FULLWIDTH REVERSE SOLIDUS */ - return '\\'; - } - if (c == 0xFFE3 || c == 0x203E) { /* FULLWIDTH MACRON/OVERLINE */ - return '~'; - } - if (c == 0x2018 || c == 0x2019) { /* LEFT/RIGHT SINGLE QUOTATION MARK*/ - return '\''; - } - if (c == 0x201C || c == 0x201D) { /* LEFT/RIGHT DOUBLE QUOTATION MARK */ - return '"'; - } - } - - return c; -} - -static int mbfl_filt_conv_wchar_cp50220(int c, mbfl_convert_filter *filter) -{ - int mode = MBFL_HAN2ZEN_KATAKANA | MBFL_HAN2ZEN_GLUE; - bool consumed = false; - - if (filter->cache) { - int s = mb_convert_kana_codepoint(filter->cache, c, &consumed, NULL, mode); - filter->cache = consumed ? 0 : c; - /* Terrible hack to get CP50220 to emit error markers in the proper - * position, not reordering them with subsequent characters */ - filter->filter_function = mbfl_filt_conv_wchar_cp50221; - mbfl_filt_conv_wchar_cp50221(s, filter); - filter->filter_function = mbfl_filt_conv_wchar_cp50220; - if (c == 0 && !consumed) { - (*filter->output_function)(0, filter->data); - } - } else if (c == 0) { - /* This case has to be handled separately, since `filter->cache == 0` means - * no codepoint is cached */ - (*filter->output_function)(0, filter->data); - } else { - filter->cache = c; - } - - return 0; -} - -static int mbfl_filt_conv_wchar_cp50220_flush(mbfl_convert_filter *filter) -{ - int mode = MBFL_HAN2ZEN_KATAKANA | MBFL_HAN2ZEN_GLUE; - - if (filter->cache) { - int s = mb_convert_kana_codepoint(filter->cache, 0, NULL, NULL, mode); - filter->filter_function = mbfl_filt_conv_wchar_cp50221; - mbfl_filt_conv_wchar_cp50221(s, filter); - filter->filter_function = mbfl_filt_conv_wchar_cp50220; - filter->cache = 0; - } - - return mbfl_filt_conv_any_jis_flush(filter); -} - -int mbfl_filt_conv_wchar_cp50221(int c, mbfl_convert_filter *filter) -{ - int s = 0; - - if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) { - s = ucs_a1_jis_table[c - ucs_a1_jis_table_min]; - } else if (c == 0x203E) { /* OVERLINE */ - s = 0x1007E; /* Convert to JISX 0201 OVERLINE */ - } else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) { - s = ucs_a2_jis_table[c - ucs_a2_jis_table_min]; - } else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) { - s = ucs_i_jis_table[c - ucs_i_jis_table_min]; - } else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) { - s = ucs_r_jis_table[c - ucs_r_jis_table_min]; - } else if (c >= 0xE000 && c <= 0xE757) { - /* 'private'/'user' codepoints */ - s = c - 0xE000; - s = ((s / 94) + 0x7F) << 8 | ((s % 94) + 0x21); - } - - if (s <= 0) { - if (c == 0xa5) { /* YEN SIGN */ - s = 0x1005c; - } else if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */ - s = 0x2140; - } else if (c == 0x2225) { /* PARALLEL TO */ - s = 0x2142; - } else if (c == 0xff0d) { /* FULLWIDTH HYPHEN-MINUS */ - s = 0x215d; - } else if (c == 0xffe0) { /* FULLWIDTH CENT SIGN */ - s = 0x2171; - } else if (c == 0xffe1) { /* FULLWIDTH POUND SIGN */ - s = 0x2172; - } else if (c == 0xffe2) { /* FULLWIDTH NOT SIGN */ - s = 0x224c; - } - } - - /* Above, we do a series of lookups in `ucs_*_jis_table` to find a - * corresponding kuten code for this Unicode codepoint - * If we get zero, that means the codepoint is not in JIS X 0208 - * On the other hand, if we get a result with the high bits set on both - * upper and lower bytes, that is not a code in JIS X 0208 but rather - * in JIS X 0213 - * In either case, check if this codepoint is one of the extensions added - * to JIS X 0208 by MicroSoft (to make CP932) */ - if (s == 0 || ((s & 0x8000) && (s & 0x80))) { - int i; - s = -1; - - for (i = 0; - i < cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; - i++) { - const int oh = cp932ext1_ucs_table_min / 94; - - if (c == cp932ext1_ucs_table[i]) { - s = ((i / 94 + oh + 0x21) << 8) + (i % 94 + 0x21); - break; - } - } - - if (s < 0) { - const int oh = cp932ext2_ucs_table_min / 94; - const int cp932ext2_ucs_table_size = - cp932ext2_ucs_table_max - cp932ext2_ucs_table_min; - for (i = 0; i < cp932ext2_ucs_table_size; i++) { - if (c == cp932ext2_ucs_table[i]) { - s = ((i / 94 + oh + 0x21) << 8) + (i % 94 + 0x21); - break; - } - } - } - - if (c == 0) { - s = 0; - } else if (s <= 0) { - s = -1; - } - } - - if (s >= 0) { - if (s < 0x80) { /* ASCII */ - if ((filter->status & 0xff00) != 0) { - CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ - CK((*filter->output_function)(0x28, filter->data)); /* '(' */ - CK((*filter->output_function)(0x42, filter->data)); /* 'B' */ - filter->status = 0; - } - CK((*filter->output_function)(s, filter->data)); - } else if (s >= 0xa0 && s < 0xe0) { /* X 0201 kana */ - if ((filter->status & 0xff00) != 0x500) { - CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ - CK((*filter->output_function)(0x28, filter->data)); /* '(' */ - CK((*filter->output_function)(0x49, filter->data)); /* 'I' */ - filter->status = 0x500; - } - CK((*filter->output_function)(s - 0x80, filter->data)); - } else if (s <= 0x927E) { /* X 0208 + extensions */ - if ((filter->status & 0xff00) != 0x200) { - CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ - CK((*filter->output_function)(0x24, filter->data)); /* '$' */ - CK((*filter->output_function)(0x42, filter->data)); /* 'B' */ - filter->status = 0x200; - } - CK((*filter->output_function)((s >> 8) & 0xff, filter->data)); - CK((*filter->output_function)(s & 0xff, filter->data)); - } else if (s < 0x10000) { /* X0212 */ - CK(mbfl_filt_conv_illegal_output(c, filter)); - } else { /* X 0201 latin */ - if ((filter->status & 0xff00) != 0x400) { - CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ - CK((*filter->output_function)(0x28, filter->data)); /* '(' */ - CK((*filter->output_function)(0x4a, filter->data)); /* 'J' */ - } - filter->status = 0x400; - CK((*filter->output_function)(s & 0x7f, filter->data)); - } - } else { - CK(mbfl_filt_conv_illegal_output(c, filter)); - } - - return 0; -} - -/* - * wchar => CP50222 - */ -int mbfl_filt_conv_wchar_cp50222(int c, mbfl_convert_filter *filter) -{ - int s = 0; - - if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) { - s = ucs_a1_jis_table[c - ucs_a1_jis_table_min]; - } else if (c == 0x203E) { /* OVERLINE */ - s = 0x1007E; /* Convert to JISX 0201 OVERLINE */ - } else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) { - s = ucs_a2_jis_table[c - ucs_a2_jis_table_min]; - } else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) { - s = ucs_i_jis_table[c - ucs_i_jis_table_min]; - } else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) { - s = ucs_r_jis_table[c - ucs_r_jis_table_min]; - } else if (c >= 0xE000 && c <= 0xE757) { - /* 'private'/'user' codepoints */ - s = c - 0xE000; - s = ((s / 94) + 0x7F) << 8 | ((s % 94) + 0x21); - } - - if (s <= 0) { - if (c == 0xa5) { /* YEN SIGN */ - s = 0x1005c; - } else if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */ - s = 0x2140; - } else if (c == 0x2225) { /* PARALLEL TO */ - s = 0x2142; - } else if (c == 0xff0d) { /* FULLWIDTH HYPHEN-MINUS */ - s = 0x215d; - } else if (c == 0xffe0) { /* FULLWIDTH CENT SIGN */ - s = 0x2171; - } else if (c == 0xffe1) { /* FULLWIDTH POUND SIGN */ - s = 0x2172; - } else if (c == 0xffe2) { /* FULLWIDTH NOT SIGN */ - s = 0x224c; - } - } - if (s == 0 || ((s & 0x8000) && (s & 0x80))) { - int i; - s = -1; - - for (i = 0; - i < cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; i++) { - const int oh = cp932ext1_ucs_table_min / 94; - - if (c == cp932ext1_ucs_table[i]) { - s = ((i / 94 + oh + 0x21) << 8) + (i % 94 + 0x21); - break; - } - } - - if (s <= 0) { - const int oh = cp932ext2_ucs_table_min / 94; - const int cp932ext2_ucs_table_size = - cp932ext2_ucs_table_max - cp932ext2_ucs_table_min; - for (i = 0; i < cp932ext2_ucs_table_size; i++) { - if (c == cp932ext2_ucs_table[i]) { - s = ((i / 94 + oh + 0x21) << 8) + (i % 94 + 0x21); - break; - } - } - } - - if (c == 0) { - s = 0; - } else if (s <= 0) { - s = -1; - } - } - - if (s >= 0) { - if (s < 0x80) { /* ASCII */ - if ((filter->status & 0xff00) == 0x500) { - CK((*filter->output_function)(0x0f, filter->data)); /* SO */ - filter->status = 0; - } else if ((filter->status & 0xff00) != 0) { - CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ - CK((*filter->output_function)(0x28, filter->data)); /* '(' */ - CK((*filter->output_function)(0x42, filter->data)); /* 'B' */ - filter->status = 0; - } - CK((*filter->output_function)(s, filter->data)); - } else if (s >= 0xa0 && s < 0xe0) { /* X 0201 kana */ - if ((filter->status & 0xff00) != 0x500) { - CK((*filter->output_function)(0x0e, filter->data)); /* SI */ - filter->status = 0x500; - } - CK((*filter->output_function)(s - 0x80, filter->data)); - } else if (s <= 0x927E) { /* X 0208 */ - if ((filter->status & 0xff00) == 0x500) { - CK((*filter->output_function)(0x0f, filter->data)); /* SO */ - filter->status = 0; - } - if ((filter->status & 0xff00) != 0x200) { - CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ - CK((*filter->output_function)(0x24, filter->data)); /* '$' */ - CK((*filter->output_function)(0x42, filter->data)); /* 'B' */ - filter->status = 0x200; - } - CK((*filter->output_function)((s >> 8) & 0xff, filter->data)); - CK((*filter->output_function)(s & 0xff, filter->data)); - } else if (s < 0x10000) { /* X0212 */ - CK(mbfl_filt_conv_illegal_output(c, filter)); - } else { /* X 0201 latin */ - if ((filter->status & 0xff00) == 0x500) { - CK((*filter->output_function)(0x0f, filter->data)); /* SO */ - filter->status = 0; - } - if ((filter->status & 0xff00) != 0x400) { - CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ - CK((*filter->output_function)(0x28, filter->data)); /* '(' */ - CK((*filter->output_function)(0x4a, filter->data)); /* 'J' */ - } - filter->status = 0x400; - CK((*filter->output_function)(s & 0x7f, filter->data)); - } - } else { - CK(mbfl_filt_conv_illegal_output(c, filter)); - } - - return 0; -} - -static int mbfl_filt_conv_wchar_cp50222_flush(mbfl_convert_filter *filter) -{ - /* back to latin */ - if ((filter->status & 0xff00) == 0x500) { - CK((*filter->output_function)(0x0f, filter->data)); /* SO */ - } else if ((filter->status & 0xff00) != 0) { - CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ - CK((*filter->output_function)(0x28, filter->data)); /* '(' */ - CK((*filter->output_function)(0x42, filter->data)); /* 'B' */ - } - filter->status = 0; - - if (filter->flush_function) { - (*filter->flush_function)(filter->data); - } - - return 0; -} - -#define ASCII 0 -#define JISX_0201_LATIN 1 -#define JISX_0201_KANA 2 -#define JISX_0208 3 -#define JISX_0212 4 - -static size_t mb_cp5022x_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) -{ - ZEND_ASSERT(bufsize >= 3); - - unsigned char *p = *in, *e = p + *in_len; - uint32_t *out = buf, *limit = buf + bufsize; - - while (p < e && out < limit) { - unsigned char c = *p++; - - if (c == 0x1B) { - /* Escape sequence */ - if ((e - p) < 2) { - *out++ = MBFL_BAD_INPUT; - /* Duplicate error-handling behavior of legacy code */ - if (p < e && (*p == '(' || *p == '$')) - p++; - continue; - } - unsigned char c2 = *p++; - if (c2 == '$') { - unsigned char c3 = *p++; - if (c3 == '@' || c3 == 'B') { - *state = JISX_0208; - } else if (c3 == '(') { - if (p == e) { - *out++ = MBFL_BAD_INPUT; - break; - } - unsigned char c4 = *p++; - if (c4 == '@' || c4 == 'B') { - *state = JISX_0208; - } else if (c4 == 'D') { - *state = JISX_0212; - } else { - if ((limit - out) < 3) { - p -= 4; - break; - } - *out++ = MBFL_BAD_INPUT; - *out++ = '$'; - *out++ = '('; - p--; - } - } else { - if ((limit - out) < 2) { - p -= 3; - break; - } - *out++ = MBFL_BAD_INPUT; - *out++ = '$'; - p--; - } - } else if (c2 == '(') { - unsigned char c3 = *p++; - if (c3 == 'B' || c3 == 'H') { - *state = ASCII; - } else if (c3 == 'J') { - *state = JISX_0201_LATIN; - } else if (c3 == 'I') { - *state = JISX_0201_KANA; - } else { - if ((limit - out) < 2) { - p -= 3; - break; - } - *out++ = MBFL_BAD_INPUT; - *out++ = '('; - p--; - } - } else { - *out++ = MBFL_BAD_INPUT; - p--; - } - } else if (c == 0xE) { - *state = JISX_0201_KANA; - } else if (c == 0xF) { - *state = ASCII; - } else if (*state == JISX_0201_LATIN && c == 0x5C) { /* YEN SIGN */ - *out++ = 0xA5; - } else if (*state == JISX_0201_LATIN && c == 0x7E) { /* OVER LINE */ - *out++ = 0x203E; - } else if (*state == JISX_0201_KANA && c > 0x20 && c < 0x60) { - *out++ = 0xFF40 + c; - } else if (*state >= JISX_0208 && c > 0x20 && c <= 0x97) { - if (p == e) { - *out++ = MBFL_BAD_INPUT; - break; - } - unsigned char c2 = *p++; - if (c2 > 0x20 && c2 < 0x7F) { - unsigned int s = (c - 0x21)*94 + c2 - 0x21; - uint32_t w = 0; - if (*state == JISX_0208) { - if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) { - w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min]; - } else if (s < jisx0208_ucs_table_size) { - w = jisx0208_ucs_table[s]; - } else if (s >= cp932ext2_ucs_table_min && s < cp932ext2_ucs_table_max) { - w = cp932ext2_ucs_table[s - cp932ext2_ucs_table_min]; - } else if (s >= cp932ext3_ucs_table_min && s < cp932ext3_ucs_table_max) { - w = cp932ext3_ucs_table[s - cp932ext3_ucs_table_min]; - } else if (s >= 94*94 && s < 114*94) { - /* MicroSoft extension */ - w = s - 94*94 + 0xE000; - } - if (!w) - w = MBFL_BAD_INPUT; - } else { - if (s < jisx0212_ucs_table_size) { - w = jisx0212_ucs_table[s]; - } - if (!w) - w = MBFL_BAD_INPUT; - } - *out++ = w; - } else { - *out++ = MBFL_BAD_INPUT; - } - } else if (c < 0x80) { - *out++ = c; - } else if (c >= 0xA1 && c <= 0xDF) { - *out++ = 0xFEC0 + c; - } else { - *out++ = MBFL_BAD_INPUT; - } - } - - *in_len = e - p; - *in = p; - return out - buf; -} - -static unsigned int lookup_wchar(uint32_t w) -{ - unsigned int s = 0; - - if (w >= ucs_a1_jis_table_min && w < ucs_a1_jis_table_max) { - s = ucs_a1_jis_table[w - ucs_a1_jis_table_min]; - } else if (w == 0x203E) { /* OVERLINE */ - s = 0x1007E; /* Convert to JISX 0201 OVERLINE */ - } else if (w >= ucs_a2_jis_table_min && w < ucs_a2_jis_table_max) { - s = ucs_a2_jis_table[w - ucs_a2_jis_table_min]; - } else if (w >= ucs_i_jis_table_min && w < ucs_i_jis_table_max) { - s = ucs_i_jis_table[w - ucs_i_jis_table_min]; - } else if (w >= ucs_r_jis_table_min && w < ucs_r_jis_table_max) { - s = ucs_r_jis_table[w - ucs_r_jis_table_min]; - } else if (w >= 0xE000 && w <= 0xE757) { - /* Private Use Area codepoints */ - s = w - 0xE000; - s = ((s / 94) + 0x7F) << 8 | ((s % 94) + 0x21); - } - - if (!s) { - if (w == 0xA5) { /* YEN SIGN */ - s = 0x1005C; - } else if (w == 0xFF3C) { /* FULLWIDTH REVERSE SOLIDUS */ - s = 0x2140; - } else if (w == 0x2225) { /* PARALLEL TO */ - s = 0x2142; - } else if (w == 0xFF0D) { /* FULLWIDTH HYPHEN-MINUS */ - s = 0x215D; - } else if (w == 0xFFE0) { /* FULLWIDTH CENT SIGN */ - s = 0x2171; - } else if (w == 0xFFE1) { /* FULLWIDTH POUND SIGN */ - s = 0x2172; - } else if (w == 0xFFE2) { /* FULLWIDTH NOT SIGN */ - s = 0x224C; - } else if (w == 0) { - return 0; - } - } - - /* Above, we do a series of lookups in `ucs_*_jis_table` to find a - * corresponding kuten code for this Unicode codepoint - * If we get zero, that means the codepoint is not in JIS X 0208 - * On the other hand, if we get a result with the high bits set on both - * upper and lower bytes, that is not a code in JIS X 0208 but rather - * in JIS X 0213 - * In either case, check if this codepoint is one of the extensions added - * to JIS X 0208 by MicroSoft (to make CP932) */ - if (!s || s >= 0x8080) { - for (int i = 0; i < cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; i++) { - if (w == cp932ext1_ucs_table[i]) { - return (((i / 94) + (cp932ext1_ucs_table_min / 94) + 0x21) << 8) + (i % 94) + 0x21; - } - } - - for (int i = 0; i < cp932ext2_ucs_table_max - cp932ext2_ucs_table_min; i++) { - if (w == cp932ext2_ucs_table[i]) { - return (((i / 94) + (cp932ext2_ucs_table_min / 94) + 0x21) << 8) + (i % 94) + 0x21; - } - } - } - - return s; -} - -static void mb_wchar_to_cp50220(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) -{ - unsigned char *out, *limit; - MB_CONVERT_BUF_LOAD(buf, out, limit); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len); - - uint32_t w; - - if (buf->state & 0xFFFF00) { - /* Reprocess cached codepoint */ - w = buf->state >> 8; - buf->state &= 0xFF; - goto reprocess_codepoint; - } - - while (len--) { - w = *in++; -reprocess_codepoint: - - if (w >= 0xFF61 && w <= 0xFF9F && !len && !end) { - /* This codepoint may need to combine with the next one, - * but the 'next one' will come in a separate buffer */ - buf->state |= w << 8; - break; - } - - bool consumed = false; - w = mb_convert_kana_codepoint(w, len ? *in : 0, &consumed, NULL, MBFL_HAN2ZEN_KATAKANA | MBFL_HAN2ZEN_GLUE); - if (consumed) { - /* Two successive codepoints were converted into one */ - in++; len--; consumed = false; - } - - unsigned int s = lookup_wchar(w); - - if (!s && w) { - MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_cp50221); - } else if (s < 0x80) { - /* ASCII */ - MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 4); - if (buf->state != ASCII) { - out = mb_convert_buf_add3(out, 0x1B, '(', 'B'); - buf->state = ASCII; - } - out = mb_convert_buf_add(out, s); - } else if (s >= 0xA0 && s < 0xE0) { - /* JISX 0201 Kana */ - MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 4); - if (buf->state != JISX_0201_KANA) { - out = mb_convert_buf_add3(out, 0x1B, '(', 'I'); - buf->state = JISX_0201_KANA; - } - out = mb_convert_buf_add(out, s - 0x80); - } else if (s <= 0x927E) { - /* JISX 0208 Kanji */ - MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 5); - if (buf->state != JISX_0208) { - out = mb_convert_buf_add3(out, 0x1B, '$', 'B'); - buf->state = JISX_0208; - } - out = mb_convert_buf_add2(out, (s >> 8) & 0xFF, s & 0xFF); - } else if (s >= 0x10000) { - /* JISX 0201 Latin; we 'tag' these by adding 0x10000 */ - MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 4); - if (buf->state != JISX_0201_LATIN) { - out = mb_convert_buf_add3(out, 0x1B, '(', 'J'); - buf->state = JISX_0201_LATIN; - } - out = mb_convert_buf_add(out, s & 0x7F); - } else { - MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_cp50221); - } - } - - if (end && buf->state != ASCII) { - MB_CONVERT_BUF_ENSURE(buf, out, limit, 3); - out = mb_convert_buf_add3(out, 0x1B, '(', 'B'); - } - - MB_CONVERT_BUF_STORE(buf, out, limit); -} - -static void mb_wchar_to_cp50221(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) -{ - unsigned char *out, *limit; - MB_CONVERT_BUF_LOAD(buf, out, limit); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len); - - while (len--) { - uint32_t w = *in++; - unsigned int s = lookup_wchar(w); - - if (!s && w) { - MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_cp50221); - } else if (s < 0x80) { - /* ASCII */ - MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 4); - if (buf->state != ASCII) { - out = mb_convert_buf_add3(out, 0x1B, '(', 'B'); - buf->state = ASCII; - } - out = mb_convert_buf_add(out, s); - } else if (s >= 0xA0 && s < 0xE0) { - /* JISX 0201 Kana */ - MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 4); - if (buf->state != JISX_0201_KANA) { - out = mb_convert_buf_add3(out, 0x1B, '(', 'I'); - buf->state = JISX_0201_KANA; - } - out = mb_convert_buf_add(out, s - 0x80); - } else if (s <= 0x927E) { - /* JISX 0208 Kanji */ - MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 5); - if (buf->state != JISX_0208) { - out = mb_convert_buf_add3(out, 0x1B, '$', 'B'); - buf->state = JISX_0208; - } - out = mb_convert_buf_add2(out, (s >> 8) & 0xFF, s & 0xFF); - } else if (s >= 0x10000) { - /* JISX 0201 Latin; we 'tag' these by adding 0x10000 */ - MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 4); - if (buf->state != JISX_0201_LATIN) { - out = mb_convert_buf_add3(out, 0x1B, '(', 'J'); - buf->state = JISX_0201_LATIN; - } - out = mb_convert_buf_add(out, s & 0x7F); - } else { - MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_cp50221); - } - } - - if (end && buf->state != ASCII) { - MB_CONVERT_BUF_ENSURE(buf, out, limit, 3); - out = mb_convert_buf_add3(out, 0x1B, '(', 'B'); - } - - MB_CONVERT_BUF_STORE(buf, out, limit); -} - -static void mb_wchar_to_cp50222(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) -{ - unsigned char *out, *limit; - MB_CONVERT_BUF_LOAD(buf, out, limit); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len); - - while (len--) { - uint32_t w = *in++; - unsigned int s = lookup_wchar(w); - - if (!s && w) { - MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_cp50222); - } else if (s < 0x80) { - /* ASCII */ - MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 4); - if (buf->state == JISX_0201_KANA) { - out = mb_convert_buf_add(out, 0xF); - buf->state = ASCII; - } else if (buf->state != ASCII) { - out = mb_convert_buf_add3(out, 0x1B, '(', 'B'); - buf->state = ASCII; - } - out = mb_convert_buf_add(out, s); - } else if (s >= 0xA0 && s < 0xE0) { - /* JISX 0201 Kana */ - MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2); - if (buf->state != JISX_0201_KANA) { - out = mb_convert_buf_add(out, 0xE); - buf->state = JISX_0201_KANA; - } - out = mb_convert_buf_add(out, s - 0x80); - } else if (s <= 0x927E) { - /* JISX 0208 Kanji */ - MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 6); - if (buf->state == JISX_0201_KANA) { - out = mb_convert_buf_add(out, 0xF); - } - if (buf->state != JISX_0208) { - out = mb_convert_buf_add3(out, 0x1B, '$', 'B'); - buf->state = JISX_0208; - } - out = mb_convert_buf_add2(out, (s >> 8) & 0xFF, s & 0xFF); - } else if (s >= 0x10000) { - /* JISX 0201 Latin; we 'tag' these by adding 0x10000 */ - MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 5); - if (buf->state == JISX_0201_KANA) { - out = mb_convert_buf_add(out, 0xF); - } - if (buf->state != JISX_0201_LATIN) { - out = mb_convert_buf_add3(out, 0x1B, '(', 'J'); - buf->state = JISX_0201_LATIN; - } - out = mb_convert_buf_add(out, s & 0x7F); - } else { - MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_cp50222); - } - } - - if (end) { - if (buf->state == JISX_0201_KANA) { - MB_CONVERT_BUF_ENSURE(buf, out, limit, 1); - out = mb_convert_buf_add(out, 0xF); - } else if (buf->state != ASCII) { - MB_CONVERT_BUF_ENSURE(buf, out, limit, 3); - out = mb_convert_buf_add3(out, 0x1B, '(', 'B'); - } - } - - MB_CONVERT_BUF_STORE(buf, out, limit); -} diff --git a/ext/mbstring/libmbfl/filters/mbfilter_cp5022x.h b/ext/mbstring/libmbfl/filters/mbfilter_cp5022x.h deleted file mode 100644 index 0cc90f4b45889..0000000000000 --- a/ext/mbstring/libmbfl/filters/mbfilter_cp5022x.h +++ /dev/null @@ -1,50 +0,0 @@ -/* - * "streamable kanji code filter and converter" - * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. - * - * LICENSE NOTICES - * - * This file is part of "streamable kanji code filter and converter", - * which is distributed under the terms of GNU Lesser General Public - * License (version 2) as published by the Free Software Foundation. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with "streamable kanji code filter and converter"; - * if not, write to the Free Software Foundation, Inc., 59 Temple Place, - * Suite 330, Boston, MA 02111-1307 USA - * - * The author of this file: - * - */ -/* - * The source code included in this files was separated from mbfilter_ja.c - * by moriyoshi koizumi on 4 dec 2002. - * - */ - -#ifndef MBFL_MBFILTER_CP50221_h -#define MBFL_MBFILTER_CP50221_h - -#include "mbfilter.h" - -extern const mbfl_encoding mbfl_encoding_cp50220; -extern const mbfl_encoding mbfl_encoding_cp50221; -extern const mbfl_encoding mbfl_encoding_cp50222; - -extern const struct mbfl_convert_vtbl vtbl_cp50220_wchar; -extern const struct mbfl_convert_vtbl vtbl_wchar_cp50220; -extern const struct mbfl_convert_vtbl vtbl_cp50221_wchar; -extern const struct mbfl_convert_vtbl vtbl_wchar_cp50221; -extern const struct mbfl_convert_vtbl vtbl_cp50222_wchar; -extern const struct mbfl_convert_vtbl vtbl_wchar_cp50222; - -int mbfl_filt_conv_cp5022x_wchar(int c, mbfl_convert_filter *filter); -int mbfl_filt_conv_wchar_cp50221(int c, mbfl_convert_filter *filter); -int mbfl_filt_conv_wchar_cp50222(int c, mbfl_convert_filter *filter); - -#endif /* MBFL_MBFILTER_CP50221_h */ diff --git a/ext/mbstring/libmbfl/filters/mbfilter_cp51932.c b/ext/mbstring/libmbfl/filters/mbfilter_cp51932.c deleted file mode 100644 index d3aae8b10f56e..0000000000000 --- a/ext/mbstring/libmbfl/filters/mbfilter_cp51932.c +++ /dev/null @@ -1,412 +0,0 @@ -/* - * "streamable kanji code filter and converter" - * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. - * - * LICENSE NOTICES - * - * This file is part of "streamable kanji code filter and converter", - * which is distributed under the terms of GNU Lesser General Public - * License (version 2) as published by the Free Software Foundation. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with "streamable kanji code filter and converter"; - * if not, write to the Free Software Foundation, Inc., 59 Temple Place, - * Suite 330, Boston, MA 02111-1307 USA - * - * The author of this file: - * - */ -/* - * The source code included in this files was separated from mbfilter_ja.c - * by moriyoshi koizumi on 4 dec 2002. - * - */ - -#include "mbfilter.h" -#include "mbfilter_cp51932.h" - -#include "unicode_table_cp932_ext.h" -#include "unicode_table_jis.h" -#include "cp932_table.h" - -static int mbfl_filt_conv_cp51932_wchar_flush(mbfl_convert_filter *filter); -static size_t mb_cp51932_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state); -static void mb_wchar_to_cp51932(uint32_t *in, size_t len, mb_convert_buf *buf, bool end); - -static const unsigned char mblen_table_eucjp[] = { /* 0xA1-0xFE */ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 -}; - -static const char *mbfl_encoding_cp51932_aliases[] = {"cp51932", NULL}; - -const mbfl_encoding mbfl_encoding_cp51932 = { - mbfl_no_encoding_cp51932, - "CP51932", - "CP51932", - mbfl_encoding_cp51932_aliases, - mblen_table_eucjp, - 0, - &vtbl_cp51932_wchar, - &vtbl_wchar_cp51932, - mb_cp51932_to_wchar, - mb_wchar_to_cp51932, - NULL -}; - -const struct mbfl_convert_vtbl vtbl_cp51932_wchar = { - mbfl_no_encoding_cp51932, - mbfl_no_encoding_wchar, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_cp51932_wchar, - mbfl_filt_conv_cp51932_wchar_flush, - NULL, -}; - -const struct mbfl_convert_vtbl vtbl_wchar_cp51932 = { - mbfl_no_encoding_wchar, - mbfl_no_encoding_cp51932, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_wchar_cp51932, - mbfl_filt_conv_common_flush, - NULL, -}; - -#define CK(statement) do { if ((statement) < 0) return (-1); } while (0) - -/* - * cp51932 => wchar - */ -int -mbfl_filt_conv_cp51932_wchar(int c, mbfl_convert_filter *filter) -{ - int c1, s, w; - - switch (filter->status) { - case 0: - if (c >= 0 && c < 0x80) { /* latin */ - CK((*filter->output_function)(c, filter->data)); - } else if (c >= 0xA1 && c <= 0xFE) { /* CP932, first byte */ - filter->status = 1; - filter->cache = c; - } else if (c == 0x8e) { /* kana first char */ - filter->status = 2; - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - case 1: /* got first half */ - filter->status = 0; - c1 = filter->cache; - if (c > 0xa0 && c < 0xff) { - w = 0; - s = (c1 - 0xa1)*94 + c - 0xa1; - if (s <= 137) { - if (s == 31) { - w = 0xff3c; /* FULLWIDTH REVERSE SOLIDUS */ - } else if (s == 32) { - w = 0xff5e; /* FULLWIDTH TILDE */ - } else if (s == 33) { - w = 0x2225; /* PARALLEL TO */ - } else if (s == 60) { - w = 0xff0d; /* FULLWIDTH HYPHEN-MINUS */ - } else if (s == 80) { - w = 0xffe0; /* FULLWIDTH CENT SIGN */ - } else if (s == 81) { - w = 0xffe1; /* FULLWIDTH POUND SIGN */ - } else if (s == 137) { - w = 0xffe2; /* FULLWIDTH NOT SIGN */ - } - } - if (w == 0) { - if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) { /* vendor ext1 (13ku) */ - w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min]; - } else if (s >= 0 && s < jisx0208_ucs_table_size) { /* X 0208 */ - w = jisx0208_ucs_table[s]; - } else if (s >= cp932ext2_ucs_table_min && s < cp932ext2_ucs_table_max) { /* vendor ext2 (89ku - 92ku) */ - w = cp932ext2_ucs_table[s - cp932ext2_ucs_table_min]; - } - } - if (w <= 0) { - w = MBFL_BAD_INPUT; - } - CK((*filter->output_function)(w, filter->data)); - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - case 2: /* got 0x8e, X0201 kana */ - filter->status = 0; - if (c > 0xa0 && c < 0xe0) { - w = 0xfec0 + c; - CK((*filter->output_function)(w, filter->data)); - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - EMPTY_SWITCH_DEFAULT_CASE(); - } - - return 0; -} - -static int mbfl_filt_conv_cp51932_wchar_flush(mbfl_convert_filter *filter) -{ - if (filter->status) { - /* Input string was truncated */ - (*filter->output_function)(MBFL_BAD_INPUT, filter->data); - filter->status = 0; - } - - if (filter->flush_function) { - (*filter->flush_function)(filter->data); - } - - return 0; -} - -/* - * wchar => cp51932 - */ -int -mbfl_filt_conv_wchar_cp51932(int c, mbfl_convert_filter *filter) -{ - int c1, c2, s1; - - s1 = 0; - if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) { - s1 = ucs_a1_jis_table[c - ucs_a1_jis_table_min]; - } else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) { - s1 = ucs_a2_jis_table[c - ucs_a2_jis_table_min]; - } else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) { - s1 = ucs_i_jis_table[c - ucs_i_jis_table_min]; - } else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) { - s1 = ucs_r_jis_table[c - ucs_r_jis_table_min]; - } - if (s1 >= 0x8080) s1 = -1; /* we don't support JIS X0213 */ - if (s1 <= 0) { - if (c == 0xa5) { /* YEN SIGN */ - s1 = 0x216F; /* FULLWIDTH YEN SIGN */ - } else if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */ - s1 = 0x2140; - } else if (c == 0x2225) { /* PARALLEL TO */ - s1 = 0x2142; - } else if (c == 0xff0d) { /* FULLWIDTH HYPHEN-MINUS */ - s1 = 0x215d; - } else if (c == 0xffe0) { /* FULLWIDTH CENT SIGN */ - s1 = 0x2171; - } else if (c == 0xffe1) { /* FULLWIDTH POUND SIGN */ - s1 = 0x2172; - } else if (c == 0xffe2) { /* FULLWIDTH NOT SIGN */ - s1 = 0x224c; - } else { - s1 = -1; - c1 = 0; - c2 = cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; - while (c1 < c2) { /* CP932 vendor ext1 (13ku) */ - if (c == cp932ext1_ucs_table[c1]) { - s1 = ((c1/94 + 0x2d) << 8) + (c1%94 + 0x21); - break; - } - c1++; - } - if (s1 < 0) { - c1 = 0; - c2 = cp932ext2_ucs_table_max - cp932ext2_ucs_table_min; - while (c1 < c2) { /* CP932 vendor ext3 (115ku - 119ku) */ - if (c == cp932ext2_ucs_table[c1]) { - s1 = ((c1/94 + 0x79) << 8) +(c1%94 + 0x21); - break; - } - c1++; - } - } - } - if (c == 0) { - s1 = 0; - } else if (s1 <= 0) { - s1 = -1; - } - } - - if (s1 >= 0) { - if (s1 < 0x80) { /* latin */ - CK((*filter->output_function)(s1, filter->data)); - } else if (s1 < 0x100) { /* kana */ - CK((*filter->output_function)(0x8e, filter->data)); - CK((*filter->output_function)(s1, filter->data)); - } else if (s1 < 0x8080) { /* X 0208 */ - CK((*filter->output_function)(((s1 >> 8) & 0xff) | 0x80, filter->data)); - CK((*filter->output_function)((s1 & 0xff) | 0x80, filter->data)); - } else { - CK(mbfl_filt_conv_illegal_output(c, filter)); - } - } else { - CK(mbfl_filt_conv_illegal_output(c, filter)); - } - - return 0; -} - -static size_t mb_cp51932_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) -{ - unsigned char *p = *in, *e = p + *in_len; - uint32_t *out = buf, *limit = buf + bufsize; - - while (p < e && out < limit) { - unsigned char c = *p++; - - if (c < 0x80) { - *out++ = c; - } else if (c >= 0xA1 && c <= 0xFE && p < e) { - unsigned char c2 = *p++; - if (c2 >= 0xA1 && c2 <= 0xFE) { - unsigned int s = (c - 0xA1)*94 + c2 - 0xA1, w = 0; - - if (s <= 137) { - if (s == 31) { - w = 0xFF3C; /* FULLWIDTH REVERSE SOLIDUS */ - } else if (s == 32) { - w = 0xFF5E; /* FULLWIDTH TILDE */ - } else if (s == 33) { - w = 0x2225; /* PARALLEL TO */ - } else if (s == 60) { - w = 0xFF0D; /* FULLWIDTH HYPHEN-MINUS */ - } else if (s == 80) { - w = 0xFFE0; /* FULLWIDTH CENT SIGN */ - } else if (s == 81) { - w = 0xFFE1; /* FULLWIDTH POUND SIGN */ - } else if (s == 137) { - w = 0xFFE2; /* FULLWIDTH NOT SIGN */ - } - } - - if (w == 0) { - if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) { - w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min]; - } else if (s < jisx0208_ucs_table_size) { - w = jisx0208_ucs_table[s]; - } else if (s >= cp932ext2_ucs_table_min && s < cp932ext2_ucs_table_max) { - w = cp932ext2_ucs_table[s - cp932ext2_ucs_table_min]; - } - } - - if (!w) - w = MBFL_BAD_INPUT; - *out++ = w; - } else { - *out++ = MBFL_BAD_INPUT; - } - } else if (c == 0x8E && p < e) { - unsigned char c2 = *p++; - if (c2 >= 0xA1 && c2 <= 0xDF) { - *out++ = 0xFEC0 + c2; - } else { - *out++ = MBFL_BAD_INPUT; - } - } else { - *out++ = MBFL_BAD_INPUT; - } - } - - *in_len = e - p; - *in = p; - return out - buf; -} - -static void mb_wchar_to_cp51932(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) -{ - unsigned char *out, *limit; - MB_CONVERT_BUF_LOAD(buf, out, limit); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2); - - while (len--) { - uint32_t w = *in++; - unsigned int s = 0; - - if (w == 0) { - out = mb_convert_buf_add(out, 0); - continue; - } else if (w >= ucs_a1_jis_table_min && w < ucs_a1_jis_table_max) { - s = ucs_a1_jis_table[w - ucs_a1_jis_table_min]; - } else if (w >= ucs_a2_jis_table_min && w < ucs_a2_jis_table_max) { - s = ucs_a2_jis_table[w - ucs_a2_jis_table_min]; - } else if (w >= ucs_i_jis_table_min && w < ucs_i_jis_table_max) { - s = ucs_i_jis_table[w - ucs_i_jis_table_min]; - } else if (w >= ucs_r_jis_table_min && w < ucs_r_jis_table_max) { - s = ucs_r_jis_table[w - ucs_r_jis_table_min]; - } - - if (s >= 0x8080) s = 0; /* We don't support JIS X0213 */ - - if (s == 0) { - if (w == 0xA5) { /* YEN SIGN */ - s = 0x216F; /* FULLWIDTH YEN SIGN */ - } else if (w == 0xFF3C) { /* FULLWIDTH REVERSE SOLIDUS */ - s = 0x2140; - } else if (w == 0x2225) { /* PARALLEL TO */ - s = 0x2142; - } else if (w == 0xFF0D) { /* FULLWIDTH HYPHEN-MINUS */ - s = 0x215D; - } else if (w == 0xFFE0) { /* FULLWIDTH CENT SIGN */ - s = 0x2171; - } else if (w == 0xFFE1) { /* FULLWIDTH POUND SIGN */ - s = 0x2172; - } else if (w == 0xFFE2) { /* FULLWIDTH NOT SIGN */ - s = 0x224C; - } else { - for (int i = 0; i < cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; i++) { - if (cp932ext1_ucs_table[i] == w) { - s = ((i/94 + 0x2D) << 8) + (i%94) + 0x21; - goto found_it; - } - } - - for (int i = 0; i < cp932ext2_ucs_table_max - cp932ext2_ucs_table_min; i++) { - if (cp932ext2_ucs_table[i] == w) { - s = ((i/94 + 0x79) << 8) + (i%94) + 0x21; - goto found_it; - } - } - } -found_it: ; - } - - if (!s || s >= 0x8080) { - MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_cp51932); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2); - } else if (s < 0x80) { - out = mb_convert_buf_add(out, s); - } else if (s < 0x100) { - out = mb_convert_buf_add2(out, 0x8E, s); - } else { - out = mb_convert_buf_add2(out, ((s >> 8) & 0xFF) | 0x80, (s & 0xFF) | 0x80); - } - } - - MB_CONVERT_BUF_STORE(buf, out, limit); -} diff --git a/ext/mbstring/libmbfl/filters/mbfilter_cp932.c b/ext/mbstring/libmbfl/filters/mbfilter_cp932.c deleted file mode 100644 index 506c24393906d..0000000000000 --- a/ext/mbstring/libmbfl/filters/mbfilter_cp932.c +++ /dev/null @@ -1,618 +0,0 @@ -/* - * "streamable kanji code filter and converter" - * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. - * - * LICENSE NOTICES - * - * This file is part of "streamable kanji code filter and converter", - * which is distributed under the terms of GNU Lesser General Public - * License (version 2) as published by the Free Software Foundation. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with "streamable kanji code filter and converter"; - * if not, write to the Free Software Foundation, Inc., 59 Temple Place, - * Suite 330, Boston, MA 02111-1307 USA - * - * The author of this file: - * - */ -/* - * the source code included in this files was separated from mbfilter_ja.c - * by moriyoshi koizumi on 4 dec 2002. - * - */ - -/* CP932 is Microsoft's version of Shift-JIS. - * - * What we call "SJIS-win" is a variant of CP932 which maps U+00A5 - * and U+203E the same way as eucJP-win; namely, instead of mapping - * U+00A5 (YEN SIGN) to 0x5C and U+203E (OVERLINE) to 0x7E, - * these codepoints are mapped to appropriate JIS X 0208 characters. - * - * When converting from Shift-JIS to Unicode, there is no difference - * between CP932 and "SJIS-win". - * - * Additional facts: - * - * • In the libmbfl library which formed the base for mbstring, "CP932" and - * "SJIS-win" were originally aliases. The differing mappings were added in - * December 2002. The libmbfl author later stated that this was done so that - * "CP932" would comply with a certain specification, while "SJIS-win" would - * maintain the existing mappings. He does not remember which specification - * it was. - * • The WHATWG specification for "Shift_JIS" (followed by web browsers) - * agrees with our mappings for "CP932". - * • Microsoft Windows' "best-fit" mappings for CP932 (via the - * WideCharToMultiByte API) convert U+00A5 to 0x5C, which also agrees with - * our mappings for "CP932". - * • glibc's iconv converts U+203E to CP932 0x7E, which again agrees with - * our mappings for "CP932". - * • When converting Shift-JIS to CP932, the conversion goes through Unicode. - * Shift-JIS 0x7E converts to U+203E, so mapping U+203E to 0x7E means that - * 0x7E will go to 0x7E when converting Shift-JIS to CP932. - */ - -#include "mbfilter.h" -#include "mbfilter_cp932.h" - -#include "unicode_table_cp932_ext.h" -#include "unicode_table_jis.h" - -static int mbfl_filt_conv_cp932_wchar_flush(mbfl_convert_filter *filter); -static size_t mb_cp932_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state); -static void mb_wchar_to_cp932(uint32_t *in, size_t len, mb_convert_buf *buf, bool end); -static void mb_wchar_to_sjiswin(uint32_t *in, size_t len, mb_convert_buf *buf, bool end); - -static const unsigned char mblen_table_sjis[] = { /* 0x80-0x9f,0xE0-0xFF */ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 -}; - -static const char *mbfl_encoding_cp932_aliases[] = {"MS932", "Windows-31J", "MS_Kanji", NULL}; -static const char *mbfl_encoding_sjiswin_aliases[] = {"SJIS-ms", "SJIS-open", NULL}; - -const mbfl_encoding mbfl_encoding_cp932 = { - mbfl_no_encoding_cp932, - "CP932", - "Shift_JIS", - mbfl_encoding_cp932_aliases, - mblen_table_sjis, - MBFL_ENCTYPE_GL_UNSAFE, - &vtbl_cp932_wchar, - &vtbl_wchar_cp932, - mb_cp932_to_wchar, - mb_wchar_to_cp932, - NULL -}; - -const struct mbfl_convert_vtbl vtbl_cp932_wchar = { - mbfl_no_encoding_cp932, - mbfl_no_encoding_wchar, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_cp932_wchar, - mbfl_filt_conv_cp932_wchar_flush, - NULL, -}; - -const struct mbfl_convert_vtbl vtbl_wchar_cp932 = { - mbfl_no_encoding_wchar, - mbfl_no_encoding_cp932, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_wchar_cp932, - mbfl_filt_conv_common_flush, - NULL, -}; - -const mbfl_encoding mbfl_encoding_sjiswin = { - mbfl_no_encoding_sjiswin, - "SJIS-win", - "Shift_JIS", - mbfl_encoding_sjiswin_aliases, - mblen_table_sjis, - MBFL_ENCTYPE_GL_UNSAFE, - &vtbl_sjiswin_wchar, - &vtbl_wchar_sjiswin, - mb_cp932_to_wchar, - mb_wchar_to_sjiswin, - NULL -}; - -const struct mbfl_convert_vtbl vtbl_sjiswin_wchar = { - mbfl_no_encoding_sjiswin, - mbfl_no_encoding_wchar, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_cp932_wchar, - mbfl_filt_conv_cp932_wchar_flush, - NULL, -}; - -const struct mbfl_convert_vtbl vtbl_wchar_sjiswin = { - mbfl_no_encoding_wchar, - mbfl_no_encoding_sjiswin, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_wchar_sjiswin, - mbfl_filt_conv_common_flush, - NULL, -}; - -#define CK(statement) do { if ((statement) < 0) return (-1); } while (0) - -#define SJIS_ENCODE(c1,c2,s1,s2) \ - do { \ - s1 = c1; \ - s1--; \ - s1 >>= 1; \ - if ((c1) < 0x5f) { \ - s1 += 0x71; \ - } else { \ - s1 += 0xb1; \ - } \ - s2 = c2; \ - if ((c1) & 1) { \ - if ((c2) < 0x60) { \ - s2--; \ - } \ - s2 += 0x20; \ - } else { \ - s2 += 0x7e; \ - } \ - } while (0) - -#define SJIS_DECODE(c1,c2,s1,s2) \ - do { \ - s1 = c1; \ - if (s1 < 0xa0) { \ - s1 -= 0x81; \ - } else { \ - s1 -= 0xc1; \ - } \ - s1 <<= 1; \ - s1 += 0x21; \ - s2 = c2; \ - if (s2 < 0x9f) { \ - if (s2 < 0x7f) { \ - s2++; \ - } \ - s2 -= 0x20; \ - } else { \ - s1++; \ - s2 -= 0x7e; \ - } \ - } while (0) - -int mbfl_filt_conv_cp932_wchar(int c, mbfl_convert_filter *filter) -{ - int c1, s, s1, s2, w; - - switch (filter->status) { - case 0: - if (c >= 0 && c < 0x80) { /* latin */ - CK((*filter->output_function)(c, filter->data)); - } else if (c > 0xa0 && c < 0xe0) { /* kana */ - CK((*filter->output_function)(0xfec0 + c, filter->data)); - } else if (c > 0x80 && c < 0xfd && c != 0xa0) { /* kanji first char */ - filter->status = 1; - filter->cache = c; - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - case 1: /* kanji second char */ - filter->status = 0; - c1 = filter->cache; - if (c >= 0x40 && c <= 0xfc && c != 0x7f) { - w = 0; - SJIS_DECODE(c1, c, s1, s2); - s = (s1 - 0x21)*94 + s2 - 0x21; - if (s <= 137) { - if (s == 31) { - w = 0xff3c; /* FULLWIDTH REVERSE SOLIDUS */ - } else if (s == 32) { - w = 0xff5e; /* FULLWIDTH TILDE */ - } else if (s == 33) { - w = 0x2225; /* PARALLEL TO */ - } else if (s == 60) { - w = 0xff0d; /* FULLWIDTH HYPHEN-MINUS */ - } else if (s == 80) { - w = 0xffe0; /* FULLWIDTH CENT SIGN */ - } else if (s == 81) { - w = 0xffe1; /* FULLWIDTH POUND SIGN */ - } else if (s == 137) { - w = 0xffe2; /* FULLWIDTH NOT SIGN */ - } - } - if (w == 0) { - if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) { /* vendor ext1 (13ku) */ - w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min]; - } else if (s >= 0 && s < jisx0208_ucs_table_size) { /* X 0208 */ - w = jisx0208_ucs_table[s]; - } else if (s >= cp932ext2_ucs_table_min && s < cp932ext2_ucs_table_max) { /* vendor ext2 (89ku - 92ku) */ - w = cp932ext2_ucs_table[s - cp932ext2_ucs_table_min]; - } else if (s >= cp932ext3_ucs_table_min && s < cp932ext3_ucs_table_max) { /* vendor ext3 (115ku - 119ku) */ - w = cp932ext3_ucs_table[s - cp932ext3_ucs_table_min]; - } else if (s >= (94*94) && s < (114*94)) { /* user (95ku - 114ku) */ - w = s - (94*94) + 0xe000; - } - } - - if (w <= 0) { - w = MBFL_BAD_INPUT; - } - - CK((*filter->output_function)(w, filter->data)); - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - EMPTY_SWITCH_DEFAULT_CASE(); - } - - return 0; -} - -static int mbfl_filt_conv_cp932_wchar_flush(mbfl_convert_filter *filter) -{ - if (filter->status) { - (*filter->output_function)(MBFL_BAD_INPUT, filter->data); - filter->status = 0; - } - - if (filter->flush_function) { - (*filter->flush_function)(filter->data); - } - - return 0; -} - -int mbfl_filt_conv_wchar_cp932(int c, mbfl_convert_filter *filter) -{ - int c1, c2, s1, s2; - - s1 = 0; - s2 = 0; - if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) { - s1 = ucs_a1_jis_table[c - ucs_a1_jis_table_min]; - } else if (c == 0x203E) { - s1 = 0x7E; - } else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) { - s1 = ucs_a2_jis_table[c - ucs_a2_jis_table_min]; - } else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) { - s1 = ucs_i_jis_table[c - ucs_i_jis_table_min]; - } else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) { - s1 = ucs_r_jis_table[c - ucs_r_jis_table_min]; - } else if (c >= 0xe000 && c < (0xe000 + 20*94)) { /* user (95ku - 114ku) */ - s1 = c - 0xe000; - c1 = s1/94 + 0x7f; - c2 = s1%94 + 0x21; - s1 = (c1 << 8) | c2; - s2 = 1; - } - if (s1 <= 0) { - if (c == 0xa5) { /* YEN SIGN */ - s1 = 0x5C; - } else if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */ - s1 = 0x2140; - } else if (c == 0x2225) { /* PARALLEL TO */ - s1 = 0x2142; - } else if (c == 0xff0d) { /* FULLWIDTH HYPHEN-MINUS */ - s1 = 0x215d; - } else if (c == 0xffe0) { /* FULLWIDTH CENT SIGN */ - s1 = 0x2171; - } else if (c == 0xffe1) { /* FULLWIDTH POUND SIGN */ - s1 = 0x2172; - } else if (c == 0xffe2) { /* FULLWIDTH NOT SIGN */ - s1 = 0x224c; - } - } - if ((s1 <= 0) || (s1 >= 0x8080 && s2 == 0)) { /* not found or X 0212 */ - s1 = -1; - c1 = 0; - c2 = cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; - while (c1 < c2) { /* CP932 vendor ext1 (13ku) */ - if (c == cp932ext1_ucs_table[c1]) { - s1 = ((c1/94 + 0x2d) << 8) + (c1%94 + 0x21); - break; - } - c1++; - } - if (s1 <= 0) { - c1 = 0; - c2 = cp932ext3_ucs_table_max - cp932ext3_ucs_table_min; - while (c1 < c2) { /* CP932 vendor ext3 (115ku - 119ku) */ - if (c == cp932ext3_ucs_table[c1]) { - s1 = ((c1/94 + 0x93) << 8) + (c1%94 + 0x21); - break; - } - c1++; - } - } - if (c == 0) { - s1 = 0; - } else if (s1 <= 0) { - s1 = -1; - } - } - if (s1 >= 0) { - if (s1 < 0x100) { /* latin or kana */ - CK((*filter->output_function)(s1, filter->data)); - } else { /* kanji */ - c1 = (s1 >> 8) & 0xff; - c2 = s1 & 0xff; - SJIS_ENCODE(c1, c2, s1, s2); - CK((*filter->output_function)(s1, filter->data)); - CK((*filter->output_function)(s2, filter->data)); - } - } else { - CK(mbfl_filt_conv_illegal_output(c, filter)); - } - - return 0; -} - -int mbfl_filt_conv_wchar_sjiswin(int c, mbfl_convert_filter *filter) -{ - if (c == 0xA5) { - CK((*filter->output_function)(0x81, filter->data)); - CK((*filter->output_function)(0x8F, filter->data)); - } else if (c == 0x203E) { - CK((*filter->output_function)(0x81, filter->data)); - CK((*filter->output_function)(0x50, filter->data)); - } else { - return mbfl_filt_conv_wchar_cp932(c, filter); - } - return 0; -} - -static size_t mb_cp932_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) -{ - unsigned char *p = *in, *e = p + *in_len; - uint32_t *out = buf, *limit = buf + bufsize; - - while (p < e && out < limit) { - unsigned char c = *p++; - - if (c < 0x80) { - *out++ = c; - } else if (c > 0xA0 && c < 0xE0) { - /* Kana */ - *out++ = 0xFEC0 + c; - } else if (c > 0x80 && c < 0xFD && c != 0xA0 && p < e) { - unsigned char c2 = *p++; - - if (c2 >= 0x40 && c2 <= 0xFC && c2 != 0x7F) { - unsigned int s1, s2, w = 0; - SJIS_DECODE(c, c2, s1, s2); - unsigned int s = (s1 - 0x21)*94 + s2 - 0x21; - - if (s <= 137) { - if (s == 31) { - w = 0xFF3C; /* FULLWIDTH REVERSE SOLIDUS */ - } else if (s == 32) { - w = 0xFF5E; /* FULLWIDTH TILDE */ - } else if (s == 33) { - w = 0x2225; /* PARALLEL TO */ - } else if (s == 60) { - w = 0xFF0D; /* FULLWIDTH HYPHEN-MINUS */ - } else if (s == 80) { - w = 0xFFE0; /* FULLWIDTH CENT SIGN */ - } else if (s == 81) { - w = 0xFFE1; /* FULLWIDTH POUND SIGN */ - } else if (s == 137) { - w = 0xFFE2; /* FULLWIDTH NOT SIGN */ - } - } - - if (w == 0) { - if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) { - w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min]; - } else if (s < jisx0208_ucs_table_size) { - w = jisx0208_ucs_table[s]; - } else if (s >= cp932ext2_ucs_table_min && s < cp932ext2_ucs_table_max) { - w = cp932ext2_ucs_table[s - cp932ext2_ucs_table_min]; - } else if (s >= cp932ext3_ucs_table_min && s < cp932ext3_ucs_table_max) { - w = cp932ext3_ucs_table[s - cp932ext3_ucs_table_min]; - } else if (s >= (94*94) && s < (114*94)) { - w = s - (94*94) + 0xE000; - } - } - - if (!w) - w = MBFL_BAD_INPUT; - *out++ = w; - } else { - *out++ = MBFL_BAD_INPUT; - } - } else { - *out++ = MBFL_BAD_INPUT; - } - } - - *in_len = e - p; - *in = p; - return out - buf; -} - -static void mb_wchar_to_cp932(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) -{ - unsigned char *out, *limit; - MB_CONVERT_BUF_LOAD(buf, out, limit); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2); - - while (len--) { - uint32_t w = *in++; - unsigned int s1 = 0, s2 = 0, c1, c2; - - if (w >= ucs_a1_jis_table_min && w < ucs_a1_jis_table_max) { - s1 = ucs_a1_jis_table[w - ucs_a1_jis_table_min]; - } else if (w == 0x203E) { - s1 = 0x7E; - } else if (w >= ucs_a2_jis_table_min && w < ucs_a2_jis_table_max) { - s1 = ucs_a2_jis_table[w - ucs_a2_jis_table_min]; - } else if (w >= ucs_i_jis_table_min && w < ucs_i_jis_table_max) { - s1 = ucs_i_jis_table[w - ucs_i_jis_table_min]; - } else if (w >= ucs_r_jis_table_min && w < ucs_r_jis_table_max) { - s1 = ucs_r_jis_table[w - ucs_r_jis_table_min]; - } else if (w >= 0xE000 && w < (0xE000 + 20*94)) { - s1 = w - 0xE000; - c1 = s1/94 + 0x7F; - c2 = s1%94 + 0x21; - s1 = (c1 << 8) | c2; - s2 = 1; - } - - if (w == 0xA5) { /* YEN SIGN */ - s1 = 0x5C; - } else if (w == 0xFF3C) { /* FULLWIDTH REVERSE SOLIDUS */ - s1 = 0x2140; - } else if (w == 0x2225) { /* PARALLEL TO */ - s1 = 0x2142; - } else if (w == 0xFF0D) { /* FULLWIDTH HYPHEN-MINUS */ - s1 = 0x215D; - } else if (w == 0xFFE0) { /* FULLWIDTH CENT SIGN */ - s1 = 0x2171; - } else if (w == 0xFFE1) { /* FULLWIDTH POUND SIGN */ - s1 = 0x2172; - } else if (w == 0xFFE2) { /* FULLWIDTH NOT SIGN */ - s1 = 0x224C; - } else if (w == 0) { - out = mb_convert_buf_add(out, 0); - continue; - } - - if (!s1 || (s1 >= 0x8080 && !s2)) { /* not found or X 0212 */ - for (unsigned int i = 0; i < cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; i++) { - if (cp932ext1_ucs_table[i] == w) { - s1 = ((i/94 + 0x2D) << 8) + (i%94 + 0x21); - goto emit_output; - } - } - - for (unsigned int i = 0; i < cp932ext3_ucs_table_max - cp932ext3_ucs_table_min; i++) { - if (cp932ext3_ucs_table[i] == w) { - s1 = ((i/94 + 0x93) << 8) + (i%94 + 0x21); - goto emit_output; - } - } - - MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_cp932); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2); - continue; - } - -emit_output: - if (s1 < 0x100) { - out = mb_convert_buf_add(out, s1); - } else { - c1 = (s1 >> 8) & 0xFF; - c2 = s1 & 0xFF; - SJIS_ENCODE(c1, c2, s1, s2); - out = mb_convert_buf_add2(out, s1, s2); - } - } - - MB_CONVERT_BUF_STORE(buf, out, limit); -} - -static void mb_wchar_to_sjiswin(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) -{ - unsigned char *out, *limit; - MB_CONVERT_BUF_LOAD(buf, out, limit); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2); - - while (len--) { - uint32_t w = *in++; - unsigned int s1 = 0, s2 = 0, c1, c2; - - if (w >= ucs_a1_jis_table_min && w < ucs_a1_jis_table_max) { - s1 = ucs_a1_jis_table[w - ucs_a1_jis_table_min]; - } else if (w >= ucs_a2_jis_table_min && w < ucs_a2_jis_table_max) { - s1 = ucs_a2_jis_table[w - ucs_a2_jis_table_min]; - } else if (w >= ucs_i_jis_table_min && w < ucs_i_jis_table_max) { - s1 = ucs_i_jis_table[w - ucs_i_jis_table_min]; - } else if (w >= ucs_r_jis_table_min && w < ucs_r_jis_table_max) { - s1 = ucs_r_jis_table[w - ucs_r_jis_table_min]; - } else if (w >= 0xE000 && w < (0xE000 + 20*94)) { - s1 = w - 0xE000; - c1 = s1/94 + 0x7F; - c2 = s1%94 + 0x21; - s1 = (c1 << 8) | c2; - s2 = 1; - } - - if (w == 0xA5) { /* YEN SIGN */ - s1 = 0x216F; /* FULLWIDTH YEN SIGN */ - } else if (w == 0xFF3C) { /* FULLWIDTH REVERSE SOLIDUS */ - s1 = 0x2140; - } else if (w == 0x2225) { /* PARALLEL TO */ - s1 = 0x2142; - } else if (w == 0xFF0D) { /* FULLWIDTH HYPHEN-MINUS */ - s1 = 0x215D; - } else if (w == 0xFFE0) { /* FULLWIDTH CENT SIGN */ - s1 = 0x2171; - } else if (w == 0xFFE1) { /* FULLWIDTH POUND SIGN */ - s1 = 0x2172; - } else if (w == 0xFFE2) { /* FULLWIDTH NOT SIGN */ - s1 = 0x224C; - } else if (w == 0) { - out = mb_convert_buf_add(out, 0); - continue; - } - - if (!s1 || (s1 >= 0x8080 && !s2)) { /* not found or X 0212 */ - for (unsigned int i = 0; i < cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; i++) { - if (cp932ext1_ucs_table[i] == w) { - s1 = ((i/94 + 0x2D) << 8) + (i%94 + 0x21); - goto emit_output; - } - } - - for (unsigned int i = 0; i < cp932ext3_ucs_table_max - cp932ext3_ucs_table_min; i++) { - if (cp932ext3_ucs_table[i] == w) { - s1 = ((i/94 + 0x93) << 8) + (i%94 + 0x21); - goto emit_output; - } - } - - MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_cp932); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2); - continue; - } - -emit_output: - if (s1 < 0x100) { - out = mb_convert_buf_add(out, s1); - } else { - c1 = (s1 >> 8) & 0xFF; - c2 = s1 & 0xFF; - SJIS_ENCODE(c1, c2, s1, s2); - out = mb_convert_buf_add2(out, s1, s2); - } - } - - MB_CONVERT_BUF_STORE(buf, out, limit); -} diff --git a/ext/mbstring/libmbfl/filters/mbfilter_cp932.h b/ext/mbstring/libmbfl/filters/mbfilter_cp932.h deleted file mode 100644 index 8dce3ab9e91d8..0000000000000 --- a/ext/mbstring/libmbfl/filters/mbfilter_cp932.h +++ /dev/null @@ -1,47 +0,0 @@ -/* - * "streamable kanji code filter and converter" - * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. - * - * LICENSE NOTICES - * - * This file is part of "streamable kanji code filter and converter", - * which is distributed under the terms of GNU Lesser General Public - * License (version 2) as published by the Free Software Foundation. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with "streamable kanji code filter and converter"; - * if not, write to the Free Software Foundation, Inc., 59 Temple Place, - * Suite 330, Boston, MA 02111-1307 USA - * - * The author of this file: - * - */ -/* - * the source code included in this files was separated from mbfilter_ja.c - * by moriyoshi koizumi on 4 dec 2002. - * - */ - -#ifndef MBFL_MBFILTER_CP932_H -#define MBFL_MBFILTER_CP932_H - -#include "mbfilter.h" - -extern const mbfl_encoding mbfl_encoding_cp932; -extern const struct mbfl_convert_vtbl vtbl_cp932_wchar; -extern const struct mbfl_convert_vtbl vtbl_wchar_cp932; - -extern const mbfl_encoding mbfl_encoding_sjiswin; -extern const struct mbfl_convert_vtbl vtbl_sjiswin_wchar; -extern const struct mbfl_convert_vtbl vtbl_wchar_sjiswin; - -int mbfl_filt_conv_cp932_wchar(int c, mbfl_convert_filter *filter); -int mbfl_filt_conv_wchar_cp932(int c, mbfl_convert_filter *filter); -int mbfl_filt_conv_wchar_sjiswin(int c, mbfl_convert_filter *filter); - -#endif /* MBFL_MBFILTER_CP932_H */ diff --git a/ext/mbstring/libmbfl/filters/mbfilter_cp936.c b/ext/mbstring/libmbfl/filters/mbfilter_cp936.c deleted file mode 100644 index ba3e6c6436708..0000000000000 --- a/ext/mbstring/libmbfl/filters/mbfilter_cp936.c +++ /dev/null @@ -1,439 +0,0 @@ -/* - * "streamable kanji code filter and converter" - * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. - * - * LICENSE NOTICES - * - * This file is part of "streamable kanji code filter and converter", - * which is distributed under the terms of GNU Lesser General Public - * License (version 2) as published by the Free Software Foundation. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with "streamable kanji code filter and converter"; - * if not, write to the Free Software Foundation, Inc., 59 Temple Place, - * Suite 330, Boston, MA 02111-1307 USA - * - * The author of this file: - * - */ -/* - * the source code included in this file was separated from mbfilter_cn.c - * by moriyoshi koizumi on 4 dec 2002. - * - */ - -#include "mbfilter.h" -#include "mbfilter_cp936.h" -#define UNICODE_TABLE_CP936_DEF -#include "unicode_table_cp936.h" - -static int mbfl_filt_conv_cp936_wchar_flush(mbfl_convert_filter *filter); -static size_t mb_cp936_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state); -static void mb_wchar_to_cp936(uint32_t *in, size_t len, mb_convert_buf *buf, bool end); - - -static const unsigned char mblen_table_cp936[] = { /* 0x81-0xFE */ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 -}; - -static const char *mbfl_encoding_cp936_aliases[] = {"CP-936", "GBK", NULL}; - -const mbfl_encoding mbfl_encoding_cp936 = { - mbfl_no_encoding_cp936, - "CP936", - "CP936", - mbfl_encoding_cp936_aliases, - mblen_table_cp936, - MBFL_ENCTYPE_GL_UNSAFE, - &vtbl_cp936_wchar, - &vtbl_wchar_cp936, - mb_cp936_to_wchar, - mb_wchar_to_cp936, - NULL -}; - -const struct mbfl_convert_vtbl vtbl_cp936_wchar = { - mbfl_no_encoding_cp936, - mbfl_no_encoding_wchar, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_cp936_wchar, - mbfl_filt_conv_cp936_wchar_flush, - NULL, -}; - -const struct mbfl_convert_vtbl vtbl_wchar_cp936 = { - mbfl_no_encoding_wchar, - mbfl_no_encoding_cp936, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_wchar_cp936, - mbfl_filt_conv_common_flush, - NULL, -}; - - -#define CK(statement) do { if ((statement) < 0) return (-1); } while (0) - -int mbfl_filt_conv_cp936_wchar(int c, mbfl_convert_filter *filter) -{ - int k; - int c1, c2, w = -1; - - switch (filter->status) { - case 0: - if (c >= 0 && c < 0x80) { /* latin */ - CK((*filter->output_function)(c, filter->data)); - } else if (c == 0x80) { /* euro sign */ - CK((*filter->output_function)(0x20ac, filter->data)); - } else if (c < 0xff) { /* dbcs lead byte */ - filter->status = 1; - filter->cache = c; - } else { /* 0xff */ - CK((*filter->output_function)(0xf8f5, filter->data)); - } - break; - - case 1: /* dbcs second byte */ - filter->status = 0; - c1 = filter->cache; - - if (((c1 >= 0xaa && c1 <= 0xaf) || (c1 >= 0xf8 && c1 <= 0xfe)) && - (c >= 0xa1 && c <= 0xfe)) { - /* UDA part1,2: U+E000-U+E4C5 */ - w = 94*(c1 >= 0xf8 ? c1 - 0xf2 : c1 - 0xaa) + (c - 0xa1) + 0xe000; - CK((*filter->output_function)(w, filter->data)); - } else if (c1 >= 0xa1 && c1 <= 0xa7 && c >= 0x40 && c < 0xa1 && c != 0x7f) { - /* UDA part3 : U+E4C6-U+E765*/ - w = 96*(c1 - 0xa1) + c - (c >= 0x80 ? 0x41 : 0x40) + 0xe4c6; - CK((*filter->output_function)(w, filter->data)); - } - - c2 = (c1 << 8) | c; - - if (w <= 0 && - ((c2 >= 0xa2ab && c2 <= 0xa9f0 + (0xe80f-0xe801)) || - (c2 >= 0xd7fa && c2 <= 0xd7fa + (0xe814-0xe810)) || - (c2 >= 0xfe50 && c2 <= 0xfe80 + (0xe864-0xe844)))) { - for (k = 0; k < mbfl_cp936_pua_tbl_max; k++) { - if (c2 >= mbfl_cp936_pua_tbl[k][2] && - c2 <= mbfl_cp936_pua_tbl[k][2] + - mbfl_cp936_pua_tbl[k][1] - mbfl_cp936_pua_tbl[k][0]) { - w = c2 - mbfl_cp936_pua_tbl[k][2] + mbfl_cp936_pua_tbl[k][0]; - CK((*filter->output_function)(w, filter->data)); - break; - } - } - } - - if (w <= 0) { - if (c1 < 0xff && c1 > 0x80 && c >= 0x40 && c < 0xff && c != 0x7f) { - w = (c1 - 0x81)*192 + c - 0x40; - ZEND_ASSERT(w < cp936_ucs_table_size); - CK((*filter->output_function)(cp936_ucs_table[w], filter->data)); - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - } - break; - - EMPTY_SWITCH_DEFAULT_CASE(); - } - - return 0; -} - -static int mbfl_filt_conv_cp936_wchar_flush(mbfl_convert_filter *filter) -{ - if (filter->status) { - /* 2-byte character was truncated */ - filter->status = 0; - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - - if (filter->flush_function) { - (*filter->flush_function)(filter->data); - } - - return 0; -} - -int mbfl_filt_conv_wchar_cp936(int c, mbfl_convert_filter *filter) -{ - int k, k1, k2; - int c1, s = 0; - - if (c >= ucs_a1_cp936_table_min && c < ucs_a1_cp936_table_max) { - /* U+0000 - U+0451 */ - s = ucs_a1_cp936_table[c - ucs_a1_cp936_table_min]; - } else if (c >= ucs_a2_cp936_table_min && c < ucs_a2_cp936_table_max) { - /* U+2000 - U+26FF */ - if (c == 0x203e) { - s = 0xa3fe; - } else if (c == 0x2218) { - s = 0xa1e3; - } else if (c == 0x223c) { - s = 0xa1ab; - } else { - s = ucs_a2_cp936_table[c - ucs_a2_cp936_table_min]; - } - } else if (c >= ucs_a3_cp936_table_min && c < ucs_a3_cp936_table_max) { - /* U+2F00 - U+33FF */ - s = ucs_a3_cp936_table[c - ucs_a3_cp936_table_min]; - } else if (c >= ucs_i_cp936_table_min && c < ucs_i_cp936_table_max) { - /* U+4D00-9FFF CJK Unified Ideographs (+ Extension A) */ - s = ucs_i_cp936_table[c - ucs_i_cp936_table_min]; - } else if (c >= 0xe000 && c <= 0xe864) { /* PUA */ - if (c < 0xe766) { - if (c < 0xe4c6) { - c1 = c - 0xe000; - s = (c1 % 94) + 0xa1; c1 /= 94; - s |= (c1 < 0x06 ? c1 + 0xaa : c1 + 0xf2) << 8; - } else { - c1 = c - 0xe4c6; - s = ((c1 / 96) + 0xa1) << 8; c1 %= 96; - s |= c1 + (c1 >= 0x3f ? 0x41 : 0x40); - } - } else { - /* U+E766..U+E864 */ - k1 = 0; k2 = mbfl_cp936_pua_tbl_max; - while (k1 < k2) { - k = (k1 + k2) >> 1; - if (c < mbfl_cp936_pua_tbl[k][0]) { - k2 = k; - } else if (c > mbfl_cp936_pua_tbl[k][1]) { - k1 = k + 1; - } else { - s = c - mbfl_cp936_pua_tbl[k][0] + mbfl_cp936_pua_tbl[k][2]; - break; - } - } - } - } else if (c == 0xf8f5) { - s = 0xff; - } else if (c >= ucs_ci_cp936_table_min && c < ucs_ci_cp936_table_max) { - /* U+F900-FA2F CJK Compatibility Ideographs */ - s = ucs_ci_cp936_table[c - ucs_ci_cp936_table_min]; - } else if (c >= ucs_cf_cp936_table_min && c < ucs_cf_cp936_table_max) { - s = ucs_cf_cp936_table[c - ucs_cf_cp936_table_min]; - } else if (c >= ucs_sfv_cp936_table_min && c < ucs_sfv_cp936_table_max) { - s = ucs_sfv_cp936_table[c - ucs_sfv_cp936_table_min]; /* U+FE50-FE6F Small Form Variants */ - } else if (c >= ucs_hff_cp936_table_min && c < ucs_hff_cp936_table_max) { - /* U+FF00-FFFF HW/FW Forms */ - if (c == 0xff04) { - s = 0xa1e7; - } else if (c == 0xff5e) { - s = 0xa1ab; - } else if (c >= 0xff01 && c <= 0xff5d) { - s = c - 0xff01 + 0xa3a1; - } else if (c >= 0xffe0 && c <= 0xffe5) { - s = ucs_hff_s_cp936_table[c-0xffe0]; - } - } - - if (s <= 0) { - if (c == 0) { - s = 0; - } else if (s <= 0) { - s = -1; - } - } - - if (s >= 0) { - if (s <= 0x80 || s == 0xff) { /* latin */ - CK((*filter->output_function)(s, filter->data)); - } else { - CK((*filter->output_function)((s >> 8) & 0xff, filter->data)); - CK((*filter->output_function)(s & 0xff, filter->data)); - } - } else { - CK(mbfl_filt_conv_illegal_output(c, filter)); - } - - return 0; -} - -static size_t mb_cp936_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) -{ - unsigned char *p = *in, *e = p + *in_len; - uint32_t *out = buf, *limit = buf + bufsize; - - while (p < e && out < limit) { - unsigned char c = *p++; - - if (c < 0x80) { - *out++ = c; - } else if (c == 0x80) { - *out++ = 0x20AC; /* Euro sign */ - } else if (c < 0xFF) { - if (p >= e) { - *out++ = MBFL_BAD_INPUT; - continue; - } - - unsigned char c2 = *p++; - if (c2 < 0x40 || c2 == 0x7F || c2 == 0xFF) { - *out++ = MBFL_BAD_INPUT; - continue; - } - - if (((c >= 0xAA && c <= 0xAF) || (c >= 0xF8 && c <= 0xFE)) && c2 >= 0xA1) { - /* UDA part 1, 2: U+E000-U+E4C5 */ - *out++ = 94*(c >= 0xF8 ? c - 0xF2 : c - 0xAA) + (c2 - 0xA1) + 0xE000; - } else if (c >= 0xA1 && c <= 0xA7 && c2 < 0xA1) { - /* UDA part 3: U+E4C6-U+E765*/ - *out++ = 96*(c - 0xA1) + c2 - (c2 >= 0x80 ? 0x41 : 0x40) + 0xE4C6; - } else { - unsigned int w = (c - 0x81)*192 + c2 - 0x40; /* Convert c, c2 into GB 2312 table lookup index */ - - /* For CP936 and GB18030, certain GB 2312 byte combinations are mapped to PUA codepoints, - * whereas the same combinations aren't mapped to any codepoint for HZ and EUC-CN - * To avoid duplicating the entire GB 2312 -> Unicode lookup table, we have three - * auxiliary tables which are consulted instead for specific ranges of lookup indices */ - if (w >= 0x192B) { - if (w <= 0x1EBE) { - *out++ = cp936_pua_tbl1[w - 0x192B]; - continue; - } else if (w >= 0x413A) { - if (w <= 0x413E) { - *out++ = cp936_pua_tbl2[w - 0x413A]; - continue; - } else if (w >= 0x5DD0 && w <= 0x5E20) { - *out++ = cp936_pua_tbl3[w - 0x5DD0]; - continue; - } - } - } - - ZEND_ASSERT(w < cp936_ucs_table_size); - *out++ = cp936_ucs_table[w]; - } - } else { - *out++ = 0xF8F5; - } - } - - *in_len = e - p; - *in = p; - return out - buf; -} - -static void mb_wchar_to_cp936(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) -{ - unsigned char *out, *limit; - MB_CONVERT_BUF_LOAD(buf, out, limit); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2); - - while (len--) { - uint32_t w = *in++; - unsigned int s = 0; - - if (w >= ucs_a1_cp936_table_min && w < ucs_a1_cp936_table_max) { - /* U+0000-U+0451 */ - s = ucs_a1_cp936_table[w - ucs_a1_cp936_table_min]; - } else if (w >= ucs_a2_cp936_table_min && w < ucs_a2_cp936_table_max) { - /* U+2000-U+26FF */ - if (w == 0x203E) { - s = 0xA3FE; - } else if (w == 0x2218) { - s = 0xA1E3; - } else if (w == 0x223C) { - s = 0xA1AB; - } else { - s = ucs_a2_cp936_table[w - ucs_a2_cp936_table_min]; - } - } else if (w >= ucs_a3_cp936_table_min && w < ucs_a3_cp936_table_max) { - /* U+2F00-U+33FF */ - s = ucs_a3_cp936_table[w - ucs_a3_cp936_table_min]; - } else if (w >= ucs_i_cp936_table_min && w < ucs_i_cp936_table_max) { - /* U+4D00-9FFF CJK Unified Ideographs (+ Extension A) */ - s = ucs_i_cp936_table[w - ucs_i_cp936_table_min]; - } else if (w >= 0xE000 && w <= 0xE864) { - /* PUA */ - if (w < 0xe766) { - if (w < 0xe4c6) { - unsigned int c1 = w - 0xE000; - s = (c1 % 94) + 0xA1; - c1 /= 94; - s |= (c1 < 0x6 ? c1 + 0xAA : c1 + 0xF2) << 8; - } else { - unsigned int c1 = w - 0xE4C6; - s = ((c1 / 96) + 0xA1) << 8; - c1 %= 96; - s |= c1 + (c1 >= 0x3F ? 0x41 : 0x40); - } - } else { - /* U+E766-U+E864 */ - unsigned int k1 = 0; - unsigned int k2 = mbfl_cp936_pua_tbl_max; - while (k1 < k2) { - int k = (k1 + k2) >> 1; - if (w < mbfl_cp936_pua_tbl[k][0]) { - k2 = k; - } else if (w > mbfl_cp936_pua_tbl[k][1]) { - k1 = k + 1; - } else { - s = w - mbfl_cp936_pua_tbl[k][0] + mbfl_cp936_pua_tbl[k][2]; - break; - } - } - } - } else if (w == 0xF8F5) { - s = 0xFF; - } else if (w >= ucs_ci_cp936_table_min && w < ucs_ci_cp936_table_max) { - /* U+F900-U+FA2F CJK Compatibility Ideographs */ - s = ucs_ci_cp936_table[w - ucs_ci_cp936_table_min]; - } else if (w >= ucs_cf_cp936_table_min && w < ucs_cf_cp936_table_max) { - s = ucs_cf_cp936_table[w - ucs_cf_cp936_table_min]; - } else if (w >= ucs_sfv_cp936_table_min && w < ucs_sfv_cp936_table_max) { - /* U+FE50-U+FE6F Small Form Variants */ - s = ucs_sfv_cp936_table[w - ucs_sfv_cp936_table_min]; - } else if (w >= ucs_hff_cp936_table_min && w < ucs_hff_cp936_table_max) { - /* U+FF00-U+FFFF HW/FW Forms */ - if (w == 0xFF04) { - s = 0xA1E7; - } else if (w == 0xFF5E) { - s = 0xA1AB; - } else if (w >= 0xFF01 && w <= 0xFF5D) { - s = w - 0xFF01 + 0xA3A1; - } else if (w >= 0xFFE0 && w <= 0xFFE5) { - s = ucs_hff_s_cp936_table[w - 0xFFE0]; - } - } - - if (!s) { - if (w == 0) { - out = mb_convert_buf_add(out, 0); - } else { - MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_cp936); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2); - } - } else if (s <= 0x80 || s == 0xFF) { - out = mb_convert_buf_add(out, s); - } else { - out = mb_convert_buf_add2(out, (s >> 8) & 0xFF, s & 0xFF); - } - } - - MB_CONVERT_BUF_STORE(buf, out, limit); -} diff --git a/ext/mbstring/libmbfl/filters/mbfilter_cp936.h b/ext/mbstring/libmbfl/filters/mbfilter_cp936.h deleted file mode 100644 index d10391f5d2201..0000000000000 --- a/ext/mbstring/libmbfl/filters/mbfilter_cp936.h +++ /dev/null @@ -1,42 +0,0 @@ -/* - * "streamable kanji code filter and converter" - * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. - * - * LICENSE NOTICES - * - * This file is part of "streamable kanji code filter and converter", - * which is distributed under the terms of GNU Lesser General Public - * License (version 2) as published by the Free Software Foundation. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with "streamable kanji code filter and converter"; - * if not, write to the Free Software Foundation, Inc., 59 Temple Place, - * Suite 330, Boston, MA 02111-1307 USA - * - * The author of this file: - * - */ -/* - * the source code included in this files was separated from mbfilter_cn.h - * by moriyoshi koizumi on 4 dec 2002. - * - */ - -#ifndef MBFL_MBFILTER_CP936_H -#define MBFL_MBFILTER_CP936_H - -#include "mbfilter.h" - -extern const mbfl_encoding mbfl_encoding_cp936; -extern const struct mbfl_convert_vtbl vtbl_cp936_wchar; -extern const struct mbfl_convert_vtbl vtbl_wchar_cp936; - -int mbfl_filt_conv_cp936_wchar(int c, mbfl_convert_filter *filter); -int mbfl_filt_conv_wchar_cp936(int c, mbfl_convert_filter *filter); - -#endif /* MBFL_MBFILTER_CP936_H */ diff --git a/ext/mbstring/libmbfl/filters/mbfilter_euc_cn.c b/ext/mbstring/libmbfl/filters/mbfilter_euc_cn.c deleted file mode 100644 index d8181d7f7c30d..0000000000000 --- a/ext/mbstring/libmbfl/filters/mbfilter_euc_cn.c +++ /dev/null @@ -1,326 +0,0 @@ -/* - * "streamable kanji code filter and converter" - * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. - * - * LICENSE NOTICES - * - * This file is part of "streamable kanji code filter and converter", - * which is distributed under the terms of GNU Lesser General Public - * License (version 2) as published by the Free Software Foundation. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with "streamable kanji code filter and converter"; - * if not, write to the Free Software Foundation, Inc., 59 Temple Place, - * Suite 330, Boston, MA 02111-1307 USA - * - * The author of this file: - * - */ -/* - * The source code included in this file was separated from mbfilter_cn.c - * by Moriyoshi Koizumi on 4 Dec 2002. - * - */ - -#include "mbfilter.h" -#include "mbfilter_euc_cn.h" - -#include "unicode_table_cp936.h" - -static int mbfl_filt_conv_euccn_wchar_flush(mbfl_convert_filter *filter); -static size_t mb_euccn_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state); -static void mb_wchar_to_euccn(uint32_t *in, size_t len, mb_convert_buf *buf, bool end); - -static const unsigned char mblen_table_euccn[] = { /* 0xA1-0xFE */ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 -}; - -static const char *mbfl_encoding_euc_cn_aliases[] = {"CN-GB", "EUC_CN", "eucCN", "x-euc-cn", "gb2312", NULL}; - -const mbfl_encoding mbfl_encoding_euc_cn = { - mbfl_no_encoding_euc_cn, - "EUC-CN", - "CN-GB", - mbfl_encoding_euc_cn_aliases, - mblen_table_euccn, - 0, - &vtbl_euccn_wchar, - &vtbl_wchar_euccn, - mb_euccn_to_wchar, - mb_wchar_to_euccn, - NULL -}; - -const struct mbfl_convert_vtbl vtbl_euccn_wchar = { - mbfl_no_encoding_euc_cn, - mbfl_no_encoding_wchar, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_euccn_wchar, - mbfl_filt_conv_euccn_wchar_flush, - NULL, -}; - -const struct mbfl_convert_vtbl vtbl_wchar_euccn = { - mbfl_no_encoding_wchar, - mbfl_no_encoding_euc_cn, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_wchar_euccn, - mbfl_filt_conv_common_flush, - NULL, -}; - -#define CK(statement) do { if ((statement) < 0) return (-1); } while (0) - -int mbfl_filt_conv_euccn_wchar(int c, mbfl_convert_filter *filter) -{ - int c1, w; - - switch (filter->status) { - case 0: - if (c >= 0 && c < 0x80) { /* latin */ - CK((*filter->output_function)(c, filter->data)); - } else if ((c >= 0xA1 && c <= 0xA9) || (c >= 0xB0 && c <= 0xF7)) { /* dbcs lead byte */ - filter->status = 1; - filter->cache = c; - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - case 1: /* dbcs second byte */ - filter->status = 0; - c1 = filter->cache; - if (c > 0xA0 && c < 0xFF) { - w = (c1 - 0x81)*192 + c - 0x40; - ZEND_ASSERT(w < cp936_ucs_table_size); - if (w == 0x1864) { - w = 0x30FB; - } else if (w == 0x186A) { - w = 0x2015; - } else if ((w >= 0x1921 && w <= 0x192A) || w == 0x1963 || (w >= 0x1C59 && w <= 0x1C7E) || (w >= 0x1DBB && w <= 0x1DC4)) { - w = 0; - } else { - w = cp936_ucs_table[w]; - } - - if (w <= 0) { - w = MBFL_BAD_INPUT; - } - - CK((*filter->output_function)(w, filter->data)); - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - EMPTY_SWITCH_DEFAULT_CASE(); - } - - return 0; -} - -int mbfl_filt_conv_wchar_euccn(int c, mbfl_convert_filter *filter) -{ - int s = 0; - - if (c >= ucs_a1_cp936_table_min && c < ucs_a1_cp936_table_max) { - if (c == 0xB7 || c == 0x144 || c == 0x148 || c == 0x251 || c == 0x261) { - s = 0; - } else { - s = ucs_a1_cp936_table[c - ucs_a1_cp936_table_min]; - } - } else if (c >= ucs_a2_cp936_table_min && c < ucs_a2_cp936_table_max) { - if (c == 0x2015) { - s = 0xA1AA; - } else if (c == 0x2014 || (c >= 0x2170 && c <= 0x2179)) { - s = 0; - } else { - s = ucs_a2_cp936_table[c - ucs_a2_cp936_table_min]; - } - } else if (c >= ucs_a3_cp936_table_min && c < ucs_a3_cp936_table_max) { - if (c == 0x30FB) { - s = 0xA1A4; - } else { - s = ucs_a3_cp936_table[c - ucs_a3_cp936_table_min]; - } - } else if (c >= ucs_i_cp936_table_min && c < ucs_i_cp936_table_max) { - s = ucs_i_cp936_table[c - ucs_i_cp936_table_min]; - } else if (c >= ucs_hff_cp936_table_min && c < ucs_hff_cp936_table_max) { - if (c == 0xFF04) { - s = 0xA1E7; - } else if (c == 0xFF5E) { - s = 0xA1AB; - } else if (c >= 0xFF01 && c <= 0xFF5D) { - s = c - 0xFF01 + 0xA3A1; - } else if (c >= 0xFFE0 && c <= 0xFFE5) { - s = ucs_hff_s_cp936_table[c - 0xFFE0]; - } - } - - /* exclude CP936 extensions */ - if (((s >> 8) & 0xFF) < 0xA1 || (s & 0xFF) < 0xA1) { - s = 0; - } - - if (s <= 0) { - if (c < 0x80) { - s = c; - } else if (s <= 0) { - s = -1; - } - } - - if (s >= 0) { - if (s < 0x80) { /* latin */ - CK((*filter->output_function)(s, filter->data)); - } else { - CK((*filter->output_function)((s >> 8) & 0xFF, filter->data)); - CK((*filter->output_function)(s & 0xFF, filter->data)); - } - } else { - CK(mbfl_filt_conv_illegal_output(c, filter)); - } - - return 0; -} - -static int mbfl_filt_conv_euccn_wchar_flush(mbfl_convert_filter *filter) -{ - if (filter->status == 1) { - /* 2-byte character was truncated */ - filter->status = 0; - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - - if (filter->flush_function) { - (*filter->flush_function)(filter->data); - } - - return 0; -} - -static size_t mb_euccn_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) -{ - unsigned char *p = *in, *e = p + *in_len; - uint32_t *out = buf, *limit = buf + bufsize; - - while (p < e && out < limit) { - unsigned char c = *p++; - - if (c < 0x80) { - *out++ = c; - } else if (((c >= 0xA1 && c <= 0xA9) || (c >= 0xB0 && c <= 0xF7)) && p < e) { - unsigned char c2 = *p++; - - if (c2 >= 0xA1 && c2 <= 0xFE) { - unsigned int w = (c - 0x81)*192 + c2 - 0x40; - ZEND_ASSERT(w < cp936_ucs_table_size); - if (w == 0x1864) { - w = 0x30FB; - } else if (w == 0x186A) { - w = 0x2015; - } else if ((w >= 0x1921 && w <= 0x192A) || w == 0x1963 || (w >= 0x1C59 && w <= 0x1C7E) || (w >= 0x1DBB && w <= 0x1DC4)) { - w = 0; - } else { - w = cp936_ucs_table[w]; - } - - if (!w) - w = MBFL_BAD_INPUT; - *out++ = w; - } else { - *out++ = MBFL_BAD_INPUT; - } - } else { - *out++ = MBFL_BAD_INPUT; - } - } - - *in_len = e - p; - *in = p; - return out - buf; -} - -static void mb_wchar_to_euccn(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) -{ - unsigned char *out, *limit; - MB_CONVERT_BUF_LOAD(buf, out, limit); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2); - - while (len--) { - uint32_t w = *in++; - unsigned int s = 0; - - if (w >= ucs_a1_cp936_table_min && w < ucs_a1_cp936_table_max) { - if (w != 0xB7 && w != 0x144 && w != 0x148 && w != 0x251 && w != 0x261) { - s = ucs_a1_cp936_table[w - ucs_a1_cp936_table_min]; - } - } else if (w >= ucs_a2_cp936_table_min && w < ucs_a2_cp936_table_max) { - if (w == 0x2015) { - s = 0xA1AA; - } else if (w != 0x2014 && (w < 0x2170 || w > 0x2179)) { - s = ucs_a2_cp936_table[w - ucs_a2_cp936_table_min]; - } - } else if (w >= ucs_a3_cp936_table_min && w < ucs_a3_cp936_table_max) { - if (w == 0x30FB) { - s = 0xA1A4; - } else { - s = ucs_a3_cp936_table[w - ucs_a3_cp936_table_min]; - } - } else if (w >= ucs_i_cp936_table_min && w < ucs_i_cp936_table_max) { - s = ucs_i_cp936_table[w - ucs_i_cp936_table_min]; - } else if (w >= ucs_hff_cp936_table_min && w < ucs_hff_cp936_table_max) { - if (w == 0xFF04) { - s = 0xA1E7; - } else if (w == 0xFF5E) { - s = 0xA1AB; - } else if (w >= 0xFF01 && w <= 0xFF5D) { - s = w - 0xFF01 + 0xA3A1; - } else if (w >= 0xFFE0 && w <= 0xFFE5) { - s = ucs_hff_s_cp936_table[w - 0xFFE0]; - } - } - - /* Exclude CP936 extensions */ - if (((s >> 8) & 0xFF) < 0xA1 || (s & 0xFF) < 0xA1) { - s = 0; - } - - if (!s) { - if (w < 0x80) { - out = mb_convert_buf_add(out, w); - } else { - MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_euccn); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2); - } - } else if (s < 0x80) { - out = mb_convert_buf_add(out, s); - } else { - out = mb_convert_buf_add2(out, (s >> 8) & 0xFF, s & 0xFF); - } - } - - MB_CONVERT_BUF_STORE(buf, out, limit); -} diff --git a/ext/mbstring/libmbfl/filters/mbfilter_euc_cn.h b/ext/mbstring/libmbfl/filters/mbfilter_euc_cn.h deleted file mode 100644 index 7ef92d8b4b87b..0000000000000 --- a/ext/mbstring/libmbfl/filters/mbfilter_euc_cn.h +++ /dev/null @@ -1,42 +0,0 @@ -/* - * "streamable kanji code filter and converter" - * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. - * - * LICENSE NOTICES - * - * This file is part of "streamable kanji code filter and converter", - * which is distributed under the terms of GNU Lesser General Public - * License (version 2) as published by the Free Software Foundation. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with "streamable kanji code filter and converter"; - * if not, write to the Free Software Foundation, Inc., 59 Temple Place, - * Suite 330, Boston, MA 02111-1307 USA - * - * The author of this file: - * - */ -/* - * The source code included in this files was separated from mbfilter_euc_cn.h - * by Moriyoshi Koizumi on 4 Dec 2002. - * - */ - -#ifndef MBFL_MBFILTER_EUC_CN_H -#define MBFL_MBFILTER_EUC_CN_H - -#include "mbfilter.h" - -extern const mbfl_encoding mbfl_encoding_euc_cn; -extern const struct mbfl_convert_vtbl vtbl_euccn_wchar; -extern const struct mbfl_convert_vtbl vtbl_wchar_euccn; - -int mbfl_filt_conv_euccn_wchar(int c, mbfl_convert_filter *filter); -int mbfl_filt_conv_wchar_euccn(int c, mbfl_convert_filter *filter); - -#endif /* MBFL_MBFILTER_EUC_CN_H */ diff --git a/ext/mbstring/libmbfl/filters/mbfilter_euc_jp.c b/ext/mbstring/libmbfl/filters/mbfilter_euc_jp.c deleted file mode 100644 index d9b1362d15f93..0000000000000 --- a/ext/mbstring/libmbfl/filters/mbfilter_euc_jp.c +++ /dev/null @@ -1,373 +0,0 @@ -/* - * "streamable kanji code filter and converter" - * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. - * - * LICENSE NOTICES - * - * This file is part of "streamable kanji code filter and converter", - * which is distributed under the terms of GNU Lesser General Public - * License (version 2) as published by the Free Software Foundation. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with "streamable kanji code filter and converter"; - * if not, write to the Free Software Foundation, Inc., 59 Temple Place, - * Suite 330, Boston, MA 02111-1307 USA - * - * The author of this file: - * - */ -/* - * The source code included in this file was separated from mbfilter_ja.c - * by moriyoshi koizumi on 4 dec 2002. - * - */ - -#include "mbfilter.h" -#include "mbfilter_euc_jp.h" - -#include "unicode_table_cp932_ext.h" -#include "unicode_table_jis.h" - -static int mbfl_filt_conv_eucjp_wchar_flush(mbfl_convert_filter *filter); -static size_t mb_eucjp_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state); -static void mb_wchar_to_eucjp(uint32_t *in, size_t len, mb_convert_buf *buf, bool end); - -const unsigned char mblen_table_eucjp[] = { /* 0xA1-0xFE */ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 -}; - -static const char *mbfl_encoding_euc_jp_aliases[] = {"EUC", "EUC_JP", "eucJP", "x-euc-jp", NULL}; - -const mbfl_encoding mbfl_encoding_euc_jp = { - mbfl_no_encoding_euc_jp, - "EUC-JP", - "EUC-JP", - mbfl_encoding_euc_jp_aliases, - mblen_table_eucjp, - 0, - &vtbl_eucjp_wchar, - &vtbl_wchar_eucjp, - mb_eucjp_to_wchar, - mb_wchar_to_eucjp, - NULL -}; - -const struct mbfl_convert_vtbl vtbl_eucjp_wchar = { - mbfl_no_encoding_euc_jp, - mbfl_no_encoding_wchar, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_eucjp_wchar, - mbfl_filt_conv_eucjp_wchar_flush, - NULL, -}; - -const struct mbfl_convert_vtbl vtbl_wchar_eucjp = { - mbfl_no_encoding_wchar, - mbfl_no_encoding_euc_jp, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_wchar_eucjp, - mbfl_filt_conv_common_flush, - NULL, -}; - -#define CK(statement) do { if ((statement) < 0) return (-1); } while (0) - -/* - * EUC-JP => wchar - */ -int -mbfl_filt_conv_eucjp_wchar(int c, mbfl_convert_filter *filter) -{ - int c1, s, w = 0; - - switch (filter->status) { - case 0: - if (c >= 0 && c < 0x80) { /* latin */ - CK((*filter->output_function)(c, filter->data)); - } else if (c > 0xa0 && c < 0xff) { /* X 0208 first char */ - filter->status = 1; - filter->cache = c; - } else if (c == 0x8e) { /* kana first char */ - filter->status = 2; - } else if (c == 0x8f) { /* X 0212 first char */ - filter->status = 3; - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - case 1: /* got first half */ - filter->status = 0; - c1 = filter->cache; - if (c > 0xa0 && c < 0xff) { - s = (c1 - 0xa1)*94 + c - 0xa1; - if (s >= 0 && s < jisx0208_ucs_table_size) { - w = jisx0208_ucs_table[s]; - if (!w) - w = MBFL_BAD_INPUT; - } else { - w = MBFL_BAD_INPUT; - } - - CK((*filter->output_function)(w, filter->data)); - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - case 2: /* got 0x8e */ - filter->status = 0; - if (c > 0xa0 && c < 0xe0) { - w = 0xfec0 + c; - CK((*filter->output_function)(w, filter->data)); - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - case 3: /* got 0x8f, JIS X 0212 first byte */ - filter->status++; - filter->cache = c; - break; - - case 4: /* got 0x8f, JIS X 0212 second byte */ - filter->status = 0; - c1 = filter->cache; - if (c > 0xA0 && c < 0xFF && c1 > 0xA0 && c1 < 0xFF) { - s = (c1 - 0xa1)*94 + c - 0xa1; - if (s >= 0 && s < jisx0212_ucs_table_size) { - w = jisx0212_ucs_table[s]; - if (!w) - w = MBFL_BAD_INPUT; - } else { - w = MBFL_BAD_INPUT; - } - - CK((*filter->output_function)(w, filter->data)); - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - EMPTY_SWITCH_DEFAULT_CASE(); - } - - return 0; -} - -static int mbfl_filt_conv_eucjp_wchar_flush(mbfl_convert_filter *filter) -{ - if (filter->status) { - (*filter->output_function)(MBFL_BAD_INPUT, filter->data); - filter->status = 0; - } - - if (filter->flush_function) { - (*filter->flush_function)(filter->data); - } - - return 0; -} - -/* - * wchar => EUC-JP - */ -int -mbfl_filt_conv_wchar_eucjp(int c, mbfl_convert_filter *filter) -{ - int s = 0; - - if (c == 0xAF) { /* U+00AF is MACRON */ - s = 0xA2B4; /* Use JIS X 0212 overline */ - } else if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) { - s = ucs_a1_jis_table[c - ucs_a1_jis_table_min]; - } else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) { - s = ucs_a2_jis_table[c - ucs_a2_jis_table_min]; - } else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) { - s = ucs_i_jis_table[c - ucs_i_jis_table_min]; - } else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) { - s = ucs_r_jis_table[c - ucs_r_jis_table_min]; - } - if (s <= 0) { - if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */ - s = 0x2140; - } else if (c == 0x2225) { /* PARALLEL TO */ - s = 0x2142; - } else if (c == 0xff0d) { /* FULLWIDTH HYPHEN-MINUS */ - s = 0x215d; - } else if (c == 0xffe0) { /* FULLWIDTH CENT SIGN */ - s = 0x2171; - } else if (c == 0xffe1) { /* FULLWIDTH POUND SIGN */ - s = 0x2172; - } else if (c == 0xffe2) { /* FULLWIDTH NOT SIGN */ - s = 0x224c; - } else if (c == 0) { - s = 0; - } else { - s = -1; - } - } - if (s >= 0) { - if (s < 0x80) { /* latin */ - CK((*filter->output_function)(s, filter->data)); - } else if (s < 0x100) { /* kana */ - CK((*filter->output_function)(0x8e, filter->data)); - CK((*filter->output_function)(s, filter->data)); - } else if (s < 0x8080) { /* X 0208 */ - CK((*filter->output_function)(((s >> 8) & 0xff) | 0x80, filter->data)); - CK((*filter->output_function)((s & 0xff) | 0x80, filter->data)); - } else { /* X 0212 */ - CK((*filter->output_function)(0x8f, filter->data)); - CK((*filter->output_function)(((s >> 8) & 0xff) | 0x80, filter->data)); - CK((*filter->output_function)((s & 0xff) | 0x80, filter->data)); - } - } else { - CK(mbfl_filt_conv_illegal_output(c, filter)); - } - - return 0; -} - -static size_t mb_eucjp_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) -{ - unsigned char *p = *in, *e = p + *in_len; - uint32_t *out = buf, *limit = buf + bufsize; - - while (p < e && out < limit) { - unsigned char c = *p++; - - if (c < 0x80) { - *out++ = c; - } else if (c >= 0xA1 && c <= 0xFE && p < e) { - /* JISX 0208 */ - unsigned char c2 = *p++; - if (c2 >= 0xA1 && c2 <= 0xFE) { - unsigned int s = (c - 0xA1)*94 + c2 - 0xA1; - if (s < jisx0208_ucs_table_size) { - uint32_t w = jisx0208_ucs_table[s]; - if (!w) - w = MBFL_BAD_INPUT; - *out++ = w; - } else { - *out++ = MBFL_BAD_INPUT; - } - } else { - *out++ = MBFL_BAD_INPUT; - } - } else if (c == 0x8E && p < e) { - /* Kana */ - unsigned char c2 = *p++; - *out++ = (c2 >= 0xA1 && c2 <= 0xDF) ? 0xFEC0 + c2 : MBFL_BAD_INPUT; - } else if (c == 0x8F) { - /* JISX 0212 */ - if ((e - p) >= 2) { - unsigned char c2 = *p++; - unsigned char c3 = *p++; - if (c3 >= 0xA1 && c3 <= 0xFE && c2 >= 0xA1 && c2 <= 0xFE) { - unsigned int s = (c2 - 0xA1)*94 + c3 - 0xA1; - if (s < jisx0212_ucs_table_size) { - uint32_t w = jisx0212_ucs_table[s]; - if (!w) - w = MBFL_BAD_INPUT; - *out++ = w; - } else { - *out++ = MBFL_BAD_INPUT; - } - } else { - *out++ = MBFL_BAD_INPUT; - } - } else { - *out++ = MBFL_BAD_INPUT; - p = e; /* Jump to end of string */ - } - } else { - *out++ = MBFL_BAD_INPUT; - } - } - - *in_len = e - p; - *in = p; - return out - buf; -} - -static void mb_wchar_to_eucjp(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) -{ - unsigned char *out, *limit; - MB_CONVERT_BUF_LOAD(buf, out, limit); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2); - - while (len--) { - uint32_t w = *in++; - unsigned int s = 0; - - if (w == 0xAF) { /* U+00AF is MACRON */ - s = 0xA2B4; /* Use JIS X 0212 overline */ - } else if (w >= ucs_a1_jis_table_min && w < ucs_a1_jis_table_max) { - s = ucs_a1_jis_table[w - ucs_a1_jis_table_min]; - } else if (w >= ucs_a2_jis_table_min && w < ucs_a2_jis_table_max) { - s = ucs_a2_jis_table[w - ucs_a2_jis_table_min]; - } else if (w >= ucs_i_jis_table_min && w < ucs_i_jis_table_max) { - s = ucs_i_jis_table[w - ucs_i_jis_table_min]; - } else if (w >= ucs_r_jis_table_min && w < ucs_r_jis_table_max) { - s = ucs_r_jis_table[w - ucs_r_jis_table_min]; - } - - if (s == 0) { - if (w == 0xFF3C) { /* FULLWIDTH REVERSE SOLIDUS */ - s = 0x2140; - } else if (w == 0x2225) { /* PARALLEL TO */ - s = 0x2142; - } else if (w == 0xFF0D) { /* FULLWIDTH HYPHEN-MINUS */ - s = 0x215D; - } else if (w == 0xFFE0) { /* FULLWIDTH CENT SIGN */ - s = 0x2171; - } else if (w == 0xFFE1) { /* FULLWIDTH POUND SIGN */ - s = 0x2172; - } else if (w == 0xFFE2) { /* FULLWIDTH NOT SIGN */ - s = 0x224C; - } else if (w == 0) { - out = mb_convert_buf_add(out, 0); - continue; - } else { - MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_eucjp); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2); - continue; - } - } - - if (s < 0x80) { - out = mb_convert_buf_add(out, s); - } else if (s < 0x100) { - out = mb_convert_buf_add2(out, 0x8E, s); - } else if (s < 0x8080) { - out = mb_convert_buf_add2(out, ((s >> 8) & 0xFF) | 0x80, (s & 0xFF) | 0x80); - } else { - MB_CONVERT_BUF_ENSURE(buf, out, limit, (len * 2) + 3); - out = mb_convert_buf_add3(out, 0x8F, ((s >> 8) & 0xFF) | 0x80, (s & 0xFF) | 0x80); - } - } - - MB_CONVERT_BUF_STORE(buf, out, limit); -} diff --git a/ext/mbstring/libmbfl/filters/mbfilter_euc_jp.h b/ext/mbstring/libmbfl/filters/mbfilter_euc_jp.h deleted file mode 100644 index cc7aa3a6bff49..0000000000000 --- a/ext/mbstring/libmbfl/filters/mbfilter_euc_jp.h +++ /dev/null @@ -1,42 +0,0 @@ -/* - * "streamable kanji code filter and converter" - * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. - * - * LICENSE NOTICES - * - * This file is part of "streamable kanji code filter and converter", - * which is distributed under the terms of GNU Lesser General Public - * License (version 2) as published by the Free Software Foundation. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with "streamable kanji code filter and converter"; - * if not, write to the Free Software Foundation, Inc., 59 Temple Place, - * Suite 330, Boston, MA 02111-1307 USA - * - * The author of this file: - * - */ -/* - * The source code included in this files was separated from mbfilter_ja.h - * by moriyoshi koizumi on 4 dec 2002. - * - */ - -#ifndef MBFL_MBFILTER_EUC_JP_H -#define MBFL_MBFILTER_EUC_JP_H - -#include "mbfilter.h" - -extern const mbfl_encoding mbfl_encoding_euc_jp; -extern const struct mbfl_convert_vtbl vtbl_eucjp_wchar; -extern const struct mbfl_convert_vtbl vtbl_wchar_eucjp; - -int mbfl_filt_conv_eucjp_wchar(int c, mbfl_convert_filter *filter); -int mbfl_filt_conv_wchar_eucjp(int c, mbfl_convert_filter *filter); - -#endif /* MBFL_MBFILTER_EUC_JP_H */ diff --git a/ext/mbstring/libmbfl/filters/mbfilter_euc_jp_2004.h b/ext/mbstring/libmbfl/filters/mbfilter_euc_jp_2004.h deleted file mode 100644 index e86fad9564cd2..0000000000000 --- a/ext/mbstring/libmbfl/filters/mbfilter_euc_jp_2004.h +++ /dev/null @@ -1,39 +0,0 @@ -/* - * "streamable kanji code filter and converter" - * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. - * - * LICENSE NOTICES - * - * This file is part of "streamable kanji code filter and converter", - * which is distributed under the terms of GNU Lesser General Public - * License (version 2) as published by the Free Software Foundation. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with "streamable kanji code filter and converter"; - * if not, write to the Free Software Foundation, Inc., 59 Temple Place, - * Suite 330, Boston, MA 02111-1307 USA - * - * The author of this file: - * - */ -/* - * The source code included in this files was separated from mbfilter_ja.h - * by rui hirokawa on 15 aug 2011. - * - */ - -#ifndef MBFL_MBFILTER_EUC_JP_2004_H -#define MBFL_MBFILTER_EUC_JP_2004_H - -#include "mbfilter.h" - -extern const mbfl_encoding mbfl_encoding_eucjp2004; -extern const struct mbfl_convert_vtbl vtbl_eucjp2004_wchar; -extern const struct mbfl_convert_vtbl vtbl_wchar_eucjp2004; - -#endif /* MBFL_MBFILTER_EUC_JP_2004_H */ diff --git a/ext/mbstring/libmbfl/filters/mbfilter_euc_jp_win.c b/ext/mbstring/libmbfl/filters/mbfilter_euc_jp_win.c deleted file mode 100644 index 96b9546dde105..0000000000000 --- a/ext/mbstring/libmbfl/filters/mbfilter_euc_jp_win.c +++ /dev/null @@ -1,536 +0,0 @@ -/* - * "streamable kanji code filter and converter" - * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. - * - * LICENSE NOTICES - * - * This file is part of "streamable kanji code filter and converter", - * which is distributed under the terms of GNU Lesser General Public - * License (version 2) as published by the Free Software Foundation. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with "streamable kanji code filter and converter"; - * if not, write to the Free Software Foundation, Inc., 59 Temple Place, - * Suite 330, Boston, MA 02111-1307 USA - * - * The author of this file: - * - */ -/* - * The source code included in this file was separated from mbfilter_ja.c - * by moriyoshi koizumi on 4 dec 2002. - * - */ - -#include "mbfilter.h" -#include "mbfilter_euc_jp_win.h" - -#include "unicode_table_cp932_ext.h" -#include "unicode_table_jis.h" -#include "cp932_table.h" - -static int mbfl_filt_conv_eucjpwin_wchar_flush(mbfl_convert_filter *filter); -static size_t mb_eucjpwin_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state); -static void mb_wchar_to_eucjpwin(uint32_t *in, size_t len, mb_convert_buf *buf, bool end); - -static const unsigned char mblen_table_eucjp[] = { /* 0xA1-0xFE */ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 -}; - -static const char *mbfl_encoding_eucjp_win_aliases[] = {"eucJP-open", "eucJP-ms", NULL}; - -const mbfl_encoding mbfl_encoding_eucjp_win = { - mbfl_no_encoding_eucjp_win, - "eucJP-win", - "EUC-JP", - mbfl_encoding_eucjp_win_aliases, - mblen_table_eucjp, - 0, - &vtbl_eucjpwin_wchar, - &vtbl_wchar_eucjpwin, - mb_eucjpwin_to_wchar, - mb_wchar_to_eucjpwin, - NULL -}; - -const struct mbfl_convert_vtbl vtbl_eucjpwin_wchar = { - mbfl_no_encoding_eucjp_win, - mbfl_no_encoding_wchar, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_eucjpwin_wchar, - mbfl_filt_conv_eucjpwin_wchar_flush, - NULL, -}; - -const struct mbfl_convert_vtbl vtbl_wchar_eucjpwin = { - mbfl_no_encoding_wchar, - mbfl_no_encoding_eucjp_win, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_wchar_eucjpwin, - mbfl_filt_conv_common_flush, - NULL, -}; - -#define CK(statement) do { if ((statement) < 0) return (-1); } while (0) - -int mbfl_filt_conv_eucjpwin_wchar(int c, mbfl_convert_filter *filter) -{ - int c1, s, w, n; - - switch (filter->status) { - case 0: - if (c >= 0 && c < 0x80) { /* latin */ - CK((*filter->output_function)(c, filter->data)); - } else if (c >= 0xa1 && c <= 0xfe) { /* CP932 first char */ - filter->status = 1; - filter->cache = c; - } else if (c == 0x8e) { /* kana first char */ - filter->status = 2; - } else if (c == 0x8f) { /* X 0212 first char */ - filter->status = 3; - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - case 1: /* got first half */ - filter->status = 0; - c1 = filter->cache; - if (c > 0xa0 && c < 0xff) { - w = 0; - s = (c1 - 0xa1)*94 + c - 0xa1; - if (s <= 137) { - if (s == 31) { - w = 0xff3c; /* FULLWIDTH REVERSE SOLIDUS */ - } else if (s == 32) { - w = 0xff5e; /* FULLWIDTH TILDE */ - } else if (s == 33) { - w = 0x2225; /* PARALLEL TO */ - } else if (s == 60) { - w = 0xff0d; /* FULLWIDTH HYPHEN-MINUS */ - } else if (s == 80) { - w = 0xffe0; /* FULLWIDTH CENT SIGN */ - } else if (s == 81) { - w = 0xffe1; /* FULLWIDTH POUND SIGN */ - } else if (s == 137) { - w = 0xffe2; /* FULLWIDTH NOT SIGN */ - } - } - - if (w == 0) { - if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) { /* vendor ext1 (13ku) */ - w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min]; - } else if (s >= 0 && s < jisx0208_ucs_table_size) { /* X 0208 */ - w = jisx0208_ucs_table[s]; - } else if (s >= (84 * 94)) { /* user (85ku - 94ku) */ - w = s - (84 * 94) + 0xe000; - } - } - - if (w <= 0) { - w = MBFL_BAD_INPUT; - } - CK((*filter->output_function)(w, filter->data)); - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - case 2: /* got 0x8e, X0201 kana */ - filter->status = 0; - if (c > 0xa0 && c < 0xe0) { - w = 0xfec0 + c; - CK((*filter->output_function)(w, filter->data)); - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - case 3: /* got 0x8f, X 0212 first char */ - filter->status++; - filter->cache = c; - break; - - case 4: /* got 0x8f, X 0212 second char */ - filter->status = 0; - c1 = filter->cache; - if (c1 > 0xa0 && c1 < 0xff && c > 0xa0 && c < 0xff) { - s = (c1 - 0xa1)*94 + c - 0xa1; - - if (s >= 0 && s < jisx0212_ucs_table_size) { - w = jisx0212_ucs_table[s]; - - if (w == 0x007e) { - w = 0xff5e; /* FULLWIDTH TILDE */ - } - } else if (s >= (82*94) && s < (84*94)) { /* vender ext3 (83ku - 84ku) <-> CP932 (115ku -120ku) */ - s = (c1 << 8) | c; - w = 0; - n = 0; - while (n < cp932ext3_eucjp_table_size) { - if (s == cp932ext3_eucjp_table[n]) { - if (n < (cp932ext3_ucs_table_max - cp932ext3_ucs_table_min)) { - w = cp932ext3_ucs_table[n]; - } - break; - } - n++; - } - } else if (s >= (84*94)) { /* user (85ku - 94ku) */ - w = s - (84*94) + (0xe000 + (94*10)); - } else { - w = 0; - } - - if (w == 0x00A6) { - w = 0xFFE4; /* FULLWIDTH BROKEN BAR */ - } - - if (w <= 0) { - w = MBFL_BAD_INPUT; - } - CK((*filter->output_function)(w, filter->data)); - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - EMPTY_SWITCH_DEFAULT_CASE(); - } - - return 0; -} - -static int mbfl_filt_conv_eucjpwin_wchar_flush(mbfl_convert_filter *filter) -{ - if (filter->status) { - (*filter->output_function)(MBFL_BAD_INPUT, filter->data); - filter->status = 0; - } - - if (filter->flush_function) { - (*filter->flush_function)(filter->data); - } - - return 0; -} - -int mbfl_filt_conv_wchar_eucjpwin(int c, mbfl_convert_filter *filter) -{ - int c1, c2, s1 = 0; - - if (c == 0xAF) { /* U+00AF is MACRON */ - s1 = 0xA2B4; /* Use JIS X 0212 overline */ - } else if (c == 0x203E) { - s1 = 0x7E; - } else if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) { - s1 = ucs_a1_jis_table[c - ucs_a1_jis_table_min]; - } else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) { - s1 = ucs_a2_jis_table[c - ucs_a2_jis_table_min]; - } else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) { - s1 = ucs_i_jis_table[c - ucs_i_jis_table_min]; - } else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) { - s1 = ucs_r_jis_table[c - ucs_r_jis_table_min]; - } else if (c >= 0xe000 && c < (0xe000 + 10*94)) { /* user (X0208 85ku - 94ku) */ - s1 = c - 0xe000; - c1 = s1/94 + 0x75; - c2 = s1%94 + 0x21; - s1 = (c1 << 8) | c2; - } else if (c >= (0xe000 + 10*94) && c < (0xe000 + 20*94)) { /* user (X0212 85ku - 94ku) */ - s1 = c - (0xe000 + 10*94); - c1 = s1/94 + 0xf5; - c2 = s1%94 + 0xa1; - s1 = (c1 << 8) | c2; - } - - if (s1 == 0xa2f1) { - s1 = 0x2d62; /* NUMERO SIGN */ - } - - if (s1 <= 0) { - if (c == 0xa5) { /* YEN SIGN */ - s1 = 0x5C; - } else if (c == 0x2014) { - s1 = 0x213D; - } else if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */ - s1 = 0x2140; - } else if (c == 0x2225) { /* PARALLEL TO */ - s1 = 0x2142; - } else if (c == 0xff0d) { /* FULLWIDTH HYPHEN-MINUS */ - s1 = 0x215d; - } else if (c == 0xffe0) { /* FULLWIDTH CENT SIGN */ - s1 = 0x2171; - } else if (c == 0xffe1) { /* FULLWIDTH POUND SIGN */ - s1 = 0x2172; - } else if (c == 0xffe2) { /* FULLWIDTH NOT SIGN */ - s1 = 0x224c; - } else { - s1 = -1; - c1 = 0; - c2 = cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; - while (c1 < c2) { /* CP932 vendor ext1 (13ku) */ - const int oh = cp932ext1_ucs_table_min / 94; - - if (c == cp932ext1_ucs_table[c1]) { - s1 = ((c1 / 94 + oh + 0x21) << 8) + (c1 % 94 + 0x21); - break; - } - c1++; - } - if (s1 < 0) { - c1 = 0; - c2 = cp932ext3_ucs_table_max - cp932ext3_ucs_table_min; - while (c1 < c2) { /* CP932 vendor ext3 (115ku - 119ku) */ - if (c == cp932ext3_ucs_table[c1]) { - if (c1 < cp932ext3_eucjp_table_size) { - s1 = cp932ext3_eucjp_table[c1]; - } - break; - } - c1++; - } - } - } - - if (c == 0) { - s1 = 0; - } else if (s1 <= 0) { - s1 = -1; - } - } - - if (s1 >= 0) { - if (s1 < 0x80) { /* latin */ - CK((*filter->output_function)(s1, filter->data)); - } else if (s1 < 0x100) { /* kana */ - CK((*filter->output_function)(0x8e, filter->data)); - CK((*filter->output_function)(s1, filter->data)); - } else if (s1 < 0x8080) { /* X 0208 */ - CK((*filter->output_function)(((s1 >> 8) & 0xff) | 0x80, filter->data)); - CK((*filter->output_function)((s1 & 0xff) | 0x80, filter->data)); - } else { /* X 0212 */ - CK((*filter->output_function)(0x8f, filter->data)); - CK((*filter->output_function)(((s1 >> 8) & 0xff) | 0x80, filter->data)); - CK((*filter->output_function)((s1 & 0xff) | 0x80, filter->data)); - } - } else { - CK(mbfl_filt_conv_illegal_output(c, filter)); - } - - return 0; -} - -static size_t mb_eucjpwin_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) -{ - unsigned char *p = *in, *e = p + *in_len; - uint32_t *out = buf, *limit = buf + bufsize; - - while (p < e && out < limit) { - unsigned char c = *p++; - - if (c < 0x80) { - *out++ = c; - } else if (c >= 0xA1 && c <= 0xFE && p < e) { - unsigned char c2 = *p++; - - if (c2 >= 0xA1 && c2 <= 0xFE) { - unsigned int s = (c - 0xA1)*94 + c2 - 0xA1, w = 0; - - if (s <= 137) { - if (s == 31) { - w = 0xFF3C; /* FULLWIDTH REVERSE SOLIDUS */ - } else if (s == 32) { - w = 0xFF5E; /* FULLWIDTH TILDE */ - } else if (s == 33) { - w = 0x2225; /* PARALLEL TO */ - } else if (s == 60) { - w = 0xFF0D; /* FULLWIDTH HYPHEN-MINUS */ - } else if (s == 80) { - w = 0xFFE0; /* FULLWIDTH CENT SIGN */ - } else if (s == 81) { - w = 0xFFE1; /* FULLWIDTH POUND SIGN */ - } else if (s == 137) { - w = 0xFFE2; /* FULLWIDTH NOT SIGN */ - } - } - - if (w == 0) { - if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) { - w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min]; - } else if (s < jisx0208_ucs_table_size) { - w = jisx0208_ucs_table[s]; - } else if (s >= (84 * 94)) { - w = s - (84 * 94) + 0xE000; - } - } - - if (!w) - w = MBFL_BAD_INPUT; - *out++ = w; - } else { - *out++ = MBFL_BAD_INPUT; - } - } else if (c == 0x8E && p < e) { - unsigned char c2 = *p++; - if (c2 >= 0xA1 && c2 <= 0xDF) { - *out++ = 0xFEC0 + c2; - } else { - *out++ = MBFL_BAD_INPUT; - } - } else if (c == 0x8F && p < e) { - unsigned char c2 = *p++; - if (p == e) { - *out++ = MBFL_BAD_INPUT; - continue; - } - unsigned char c3 = *p++; - - if (c2 >= 0xA1 && c2 <= 0xFE && c3 >= 0xA1 && c3 <= 0xFE) { - unsigned int s = (c2 - 0xA1)*94 + c3 - 0xA1, w = 0; - - if (s < jisx0212_ucs_table_size) { - w = jisx0212_ucs_table[s]; - if (w == 0x7E) - w = 0xFF5E; /* FULLWIDTH TILDE */ - } else if (s >= (82*94) && s < (84*94)) { - s = (c2 << 8) | c3; - for (int i = 0; i < cp932ext3_eucjp_table_size; i++) { - if (cp932ext3_eucjp_table[i] == s) { - w = cp932ext3_ucs_table[i]; - break; - } - } - } else if (s >= (84*94)) { - w = s - (84*94) + 0xE000 + (94*10); - } - - if (w == 0xA6) - w = 0xFFE4; /* FULLWIDTH BROKEN BAR */ - - if (!w) - w = MBFL_BAD_INPUT; - *out++ = w; - } else { - *out++ = MBFL_BAD_INPUT; - } - } else { - *out++ = MBFL_BAD_INPUT; - } - } - - *in_len = e - p; - *in = p; - return out - buf; -} - -static void mb_wchar_to_eucjpwin(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) -{ - unsigned char *out, *limit; - MB_CONVERT_BUF_LOAD(buf, out, limit); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2); - - while (len--) { - uint32_t w = *in++; - unsigned int s = 0; - - if (w == 0) { - out = mb_convert_buf_add(out, 0); - continue; - } else if (w == 0xAF) { /* U+00AF is MACRON */ - s = 0xA2B4; /* Use JIS X 0212 overline */ - } else if (w == 0x203E) { - s = 0x7E; - } else if (w >= ucs_a1_jis_table_min && w < ucs_a1_jis_table_max) { - s = ucs_a1_jis_table[w - ucs_a1_jis_table_min]; - } else if (w >= ucs_a2_jis_table_min && w < ucs_a2_jis_table_max) { - s = ucs_a2_jis_table[w - ucs_a2_jis_table_min]; - } else if (w >= ucs_i_jis_table_min && w < ucs_i_jis_table_max) { - s = ucs_i_jis_table[w - ucs_i_jis_table_min]; - } else if (w >= ucs_r_jis_table_min && w < ucs_r_jis_table_max) { - s = ucs_r_jis_table[w - ucs_r_jis_table_min]; - } else if (w >= 0xE000 && w < (0xE000 + 10*94)) { - s = w - 0xE000; - s = ((s/94 + 0x75) << 8) + (s%94) + 0x21; - } else if (w >= (0xE000 + 10*94) && w < (0xE000 + 20*94)) { - s = w - (0xE000 + 10*94); - s = ((s/94 + 0xF5) << 8) + (s%94) + 0xA1; - } - - if (s == 0xA2F1) - s = 0x2D62; /* NUMERO SIGN */ - - if (s == 0) { - if (w == 0xA5) { /* YEN SIGN */ - s = 0x5C; - } else if (w == 0x2014) { /* EM DASH */ - s = 0x213D; - } else if (w == 0xFF3C) { /* FULLWIDTH REVERSE SOLIDUS */ - s = 0x2140; - } else if (w == 0x2225) { /* PARALLEL TO */ - s = 0x2142; - } else if (w == 0xFF0D) { /* FULLWIDTH HYPHEN-MINUS */ - s = 0x215D; - } else if (w == 0xFFE0) { /* FULLWIDTH CENT SIGN */ - s = 0x2171; - } else if (w == 0xFFE1) { /* FULLWIDTH POUND SIGN */ - s = 0x2172; - } else if (w == 0xFFE2) { /* FULLWIDTH NOT SIGN */ - s = 0x224C; - } else { - for (int i = 0; i < cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; i++) { - if (cp932ext1_ucs_table[i] == w) { - s = (((i/94) + (cp932ext1_ucs_table_min/94) + 0x21) << 8) + (i%94) + 0x21; - break; - } - } - - if (!s) { - for (int i = 0; i < cp932ext3_ucs_table_max - cp932ext3_ucs_table_min; i++) { - if (cp932ext3_ucs_table[i] == w) { - s = cp932ext3_eucjp_table[i]; - break; - } - } - } - } - } - - if (!s) { - MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_eucjpwin); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2); - } else if (s < 0x80) { - out = mb_convert_buf_add(out, s); - } else if (s < 0x100) { - out = mb_convert_buf_add2(out, 0x8E, s); - } else if (s < 0x8080) { - out = mb_convert_buf_add2(out, ((s >> 8) & 0xFF) | 0x80, (s & 0xFF) | 0x80); - } else { - MB_CONVERT_BUF_ENSURE(buf, out, limit, (len * 2) + 3); - out = mb_convert_buf_add3(out, 0x8F, ((s >> 8) & 0xFF) | 0x80, (s & 0xFF) | 0x80); - } - } - - MB_CONVERT_BUF_STORE(buf, out, limit); -} diff --git a/ext/mbstring/libmbfl/filters/mbfilter_euc_jp_win.h b/ext/mbstring/libmbfl/filters/mbfilter_euc_jp_win.h deleted file mode 100644 index bb1e4dc392d7b..0000000000000 --- a/ext/mbstring/libmbfl/filters/mbfilter_euc_jp_win.h +++ /dev/null @@ -1,42 +0,0 @@ -/* - * "streamable kanji code filter and converter" - * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. - * - * LICENSE NOTICES - * - * This file is part of "streamable kanji code filter and converter", - * which is distributed under the terms of GNU Lesser General Public - * License (version 2) as published by the Free Software Foundation. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with "streamable kanji code filter and converter"; - * if not, write to the Free Software Foundation, Inc., 59 Temple Place, - * Suite 330, Boston, MA 02111-1307 USA - * - * The author of this file: - * - */ -/* - * The source code included in this files was separated from mbfilter_ja.h - * by moriyoshi koizumi on 4 dec 2002. - * - */ - -#ifndef MBFL_MBFILTER_EUC_JP_WIN_H -#define MBFL_MBFILTER_EUC_JP_WIN_H - -#include "mbfilter.h" - -extern const mbfl_encoding mbfl_encoding_eucjp_win; -extern const struct mbfl_convert_vtbl vtbl_eucjpwin_wchar; -extern const struct mbfl_convert_vtbl vtbl_wchar_eucjpwin; - -int mbfl_filt_conv_eucjpwin_wchar(int c, mbfl_convert_filter *filter); -int mbfl_filt_conv_wchar_eucjpwin(int c, mbfl_convert_filter *filter); - -#endif /* MBFL_MBFILTER_EUC_JP_WIN_H */ diff --git a/ext/mbstring/libmbfl/filters/mbfilter_euc_kr.c b/ext/mbstring/libmbfl/filters/mbfilter_euc_kr.c deleted file mode 100644 index 2c95a80ba965c..0000000000000 --- a/ext/mbstring/libmbfl/filters/mbfilter_euc_kr.c +++ /dev/null @@ -1,297 +0,0 @@ -/* - * "streamable kanji code filter and converter" - * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. - * - * LICENSE NOTICES - * - * This file is part of "streamable kanji code filter and converter", - * which is distributed under the terms of GNU Lesser General Public - * License (version 2) as published by the Free Software Foundation. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with "streamable kanji code filter and converter"; - * if not, write to the Free Software Foundation, Inc., 59 Temple Place, - * Suite 330, Boston, MA 02111-1307 USA - * - * The author of this file: - * - */ -/* - * The source code included in this files was separated from mbfilter_kr.c - * by moriyoshi koizumi on 4 dec 2002. - * - */ - -#include "mbfilter.h" -#include "mbfilter_euc_kr.h" -#include "unicode_table_uhc.h" - -static int mbfl_filt_conv_euckr_wchar_flush(mbfl_convert_filter *filter); -static size_t mb_euckr_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state); -static void mb_wchar_to_euckr(uint32_t *in, size_t len, mb_convert_buf *buf, bool end); - -static const unsigned char mblen_table_euckr[] = { /* 0xA1-0xFE */ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 -}; - -static const char *mbfl_encoding_euc_kr_aliases[] = {"EUC_KR", "eucKR", "x-euc-kr", NULL}; - -const mbfl_encoding mbfl_encoding_euc_kr = { - mbfl_no_encoding_euc_kr, - "EUC-KR", - "EUC-KR", - mbfl_encoding_euc_kr_aliases, - mblen_table_euckr, - 0, - &vtbl_euckr_wchar, - &vtbl_wchar_euckr, - mb_euckr_to_wchar, - mb_wchar_to_euckr, - NULL -}; - -const struct mbfl_convert_vtbl vtbl_euckr_wchar = { - mbfl_no_encoding_euc_kr, - mbfl_no_encoding_wchar, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_euckr_wchar, - mbfl_filt_conv_euckr_wchar_flush, - NULL, -}; - -const struct mbfl_convert_vtbl vtbl_wchar_euckr = { - mbfl_no_encoding_wchar, - mbfl_no_encoding_euc_kr, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_wchar_euckr, - mbfl_filt_conv_common_flush, - NULL, -}; - -#define CK(statement) do { if ((statement) < 0) return (-1); } while (0) - -int mbfl_filt_conv_euckr_wchar(int c, mbfl_convert_filter *filter) -{ - int c1, w, flag; - - switch (filter->status) { - case 0: - if (c >= 0 && c < 0x80) { /* latin */ - CK((*filter->output_function)(c, filter->data)); - } else if (((c >= 0xA1 && c <= 0xAC) || (c >= 0xB0 && c <= 0xFD)) && c != 0xC9) { /* dbcs lead byte */ - filter->status = 1; - filter->cache = c; - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - case 1: /* dbcs second byte */ - filter->status = 0; - c1 = filter->cache; - flag = 0; - if (c1 >= 0xa1 && c1 <= 0xc6) { - flag = 1; - } else if (c1 >= 0xc7 && c1 <= 0xfe && c1 != 0xc9) { - flag = 2; - } - if (flag > 0 && c >= 0xa1 && c <= 0xfe) { - if (flag == 1) { /* 1st: 0xa1..0xc6, 2nd: 0x41..0x7a, 0x81..0xfe */ - w = (c1 - 0x81)*190 + c - 0x41; - ZEND_ASSERT(w < uhc1_ucs_table_size); - w = uhc1_ucs_table[w]; - } else { /* 1st: 0xc7..0xc8,0xca..0xfe, 2nd: 0xa1..0xfe */ - w = (c1 - 0xc7)*94 + c - 0xa1; - ZEND_ASSERT(w < uhc3_ucs_table_size); - w = uhc3_ucs_table[w]; - } - - if (w <= 0) { - w = MBFL_BAD_INPUT; - } - CK((*filter->output_function)(w, filter->data)); - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - EMPTY_SWITCH_DEFAULT_CASE(); - } - - return 0; -} - -int mbfl_filt_conv_wchar_euckr(int c, mbfl_convert_filter *filter) -{ - int s = 0; - - if (c >= ucs_a1_uhc_table_min && c < ucs_a1_uhc_table_max) { - s = ucs_a1_uhc_table[c - ucs_a1_uhc_table_min]; - } else if (c >= ucs_a2_uhc_table_min && c < ucs_a2_uhc_table_max) { - s = ucs_a2_uhc_table[c - ucs_a2_uhc_table_min]; - } else if (c >= ucs_a3_uhc_table_min && c < ucs_a3_uhc_table_max) { - s = ucs_a3_uhc_table[c - ucs_a3_uhc_table_min]; - } else if (c >= ucs_i_uhc_table_min && c < ucs_i_uhc_table_max) { - s = ucs_i_uhc_table[c - ucs_i_uhc_table_min]; - } else if (c >= ucs_s_uhc_table_min && c < ucs_s_uhc_table_max) { - s = ucs_s_uhc_table[c - ucs_s_uhc_table_min]; - } else if (c >= ucs_r1_uhc_table_min && c < ucs_r1_uhc_table_max) { - s = ucs_r1_uhc_table[c - ucs_r1_uhc_table_min]; - } else if (c >= ucs_r2_uhc_table_min && c < ucs_r2_uhc_table_max) { - s = ucs_r2_uhc_table[c - ucs_r2_uhc_table_min]; - } - - /* exclude UHC extension area (although we are using the UHC conversion tables) */ - if (((s >> 8) & 0xFF) < 0xA1 || (s & 0xFF) < 0xA1) { - s = 0; - } - - if (s <= 0) { - if (c < 0x80) { - s = c; - } else { - s = -1; - } - } - - if (s >= 0) { - if (s < 0x80) { /* latin */ - CK((*filter->output_function)(s, filter->data)); - } else { - CK((*filter->output_function)((s >> 8) & 0xff, filter->data)); - CK((*filter->output_function)(s & 0xff, filter->data)); - } - } else { - CK(mbfl_filt_conv_illegal_output(c, filter)); - } - - return 0; -} - -static int mbfl_filt_conv_euckr_wchar_flush(mbfl_convert_filter *filter) -{ - if (filter->status == 1) { - /* 2-byte character was truncated */ - filter->status = 0; - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - - if (filter->flush_function) { - (*filter->flush_function)(filter->data); - } - - return 0; -} - -static size_t mb_euckr_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) -{ - unsigned char *p = *in, *e = p + *in_len; - uint32_t *out = buf, *limit = buf + bufsize; - - while (p < e && out < limit) { - unsigned char c = *p++; - - if (c < 0x80) { - *out++ = c; - } else if (((c >= 0xA1 && c <= 0xAC) || (c >= 0xB0 && c <= 0xFD)) && c != 0xC9 && p < e) { - unsigned char c2 = *p++; - if (c2 < 0xA1 || c2 == 0xFF) { - *out++ = MBFL_BAD_INPUT; - continue; - } - - if (c <= 0xC6) { - unsigned int w = (c - 0x81)*190 + c2 - 0x41; - ZEND_ASSERT(w < uhc1_ucs_table_size); - w = uhc1_ucs_table[w]; - if (!w) - w = MBFL_BAD_INPUT; - *out++ = w; - } else { - unsigned int w = (c - 0xC7)*94 + c2 - 0xA1; - ZEND_ASSERT(w < uhc3_ucs_table_size); - w = uhc3_ucs_table[w]; - if (!w) - w = MBFL_BAD_INPUT; - *out++ = w; - } - } else { - *out++ = MBFL_BAD_INPUT; - } - } - - *in_len = e - p; - *in = p; - return out - buf; -} - -static void mb_wchar_to_euckr(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) -{ - unsigned char *out, *limit; - MB_CONVERT_BUF_LOAD(buf, out, limit); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len); - - while (len--) { - uint32_t w = *in++; - unsigned int s = 0; - - if (w >= ucs_a1_uhc_table_min && w < ucs_a1_uhc_table_max) { - s = ucs_a1_uhc_table[w - ucs_a1_uhc_table_min]; - } else if (w >= ucs_a2_uhc_table_min && w < ucs_a2_uhc_table_max) { - s = ucs_a2_uhc_table[w - ucs_a2_uhc_table_min]; - } else if (w >= ucs_a3_uhc_table_min && w < ucs_a3_uhc_table_max) { - s = ucs_a3_uhc_table[w - ucs_a3_uhc_table_min]; - } else if (w >= ucs_i_uhc_table_min && w < ucs_i_uhc_table_max) { - s = ucs_i_uhc_table[w - ucs_i_uhc_table_min]; - } else if (w >= ucs_s_uhc_table_min && w < ucs_s_uhc_table_max) { - s = ucs_s_uhc_table[w - ucs_s_uhc_table_min]; - } else if (w >= ucs_r1_uhc_table_min && w < ucs_r1_uhc_table_max) { - s = ucs_r1_uhc_table[w - ucs_r1_uhc_table_min]; - } else if (w >= ucs_r2_uhc_table_min && w < ucs_r2_uhc_table_max) { - s = ucs_r2_uhc_table[w - ucs_r2_uhc_table_min]; - } - - /* Exclude UHC extension area (although we are using the UHC conversion tables) */ - if (((s >> 8) & 0xFF) < 0xA1 || (s & 0xFF) < 0xA1) { - s = 0; - } - - if (!s) { - if (w < 0x80) { - out = mb_convert_buf_add(out, w); - } else { - MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_euckr); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len); - } - } else if (s < 0x80) { - out = mb_convert_buf_add(out, s); - } else { - MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2); - out = mb_convert_buf_add2(out, (s >> 8) & 0xFF, s & 0xFF); - } - } - - MB_CONVERT_BUF_STORE(buf, out, limit); -} diff --git a/ext/mbstring/libmbfl/filters/mbfilter_euc_kr.h b/ext/mbstring/libmbfl/filters/mbfilter_euc_kr.h deleted file mode 100644 index e0c13cf53ad73..0000000000000 --- a/ext/mbstring/libmbfl/filters/mbfilter_euc_kr.h +++ /dev/null @@ -1,42 +0,0 @@ -/* - * "streamable kanji code filter and converter" - * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. - * - * LICENSE NOTICES - * - * This file is part of "streamable kanji code filter and converter", - * which is distributed under the terms of GNU Lesser General Public - * License (version 2) as published by the Free Software Foundation. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with "streamable kanji code filter and converter"; - * if not, write to the Free Software Foundation, Inc., 59 Temple Place, - * Suite 330, Boston, MA 02111-1307 USA - * - * The author of this file: - * - */ -/* - * The source code included in this files was separated from mbfilter_kr.h - * by moriyoshi koizumi on 4 dec 2002. - * - */ - -#ifndef MBFL_MBFILTER_EUC_KR_H -#define MBFL_MBFILTER_EUC_KR_H - -#include "mbfilter.h" - -extern const mbfl_encoding mbfl_encoding_euc_kr; -extern const struct mbfl_convert_vtbl vtbl_euckr_wchar; -extern const struct mbfl_convert_vtbl vtbl_wchar_euckr; - -int mbfl_filt_conv_euckr_wchar(int c, mbfl_convert_filter *filter); -int mbfl_filt_conv_wchar_euckr(int c, mbfl_convert_filter *filter); - -#endif /* MBFL_MBFILTER_EUC_KR_H */ diff --git a/ext/mbstring/libmbfl/filters/mbfilter_euc_tw.c b/ext/mbstring/libmbfl/filters/mbfilter_euc_tw.c deleted file mode 100644 index 522f5f4a05a5b..0000000000000 --- a/ext/mbstring/libmbfl/filters/mbfilter_euc_tw.c +++ /dev/null @@ -1,375 +0,0 @@ -/* - * "streamable kanji code filter and converter" - * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. - * - * LICENSE NOTICES - * - * This file is part of "streamable kanji code filter and converter", - * which is distributed under the terms of GNU Lesser General Public - * License (version 2) as published by the Free Software Foundation. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with "streamable kanji code filter and converter"; - * if not, write to the Free Software Foundation, Inc., 59 Temple Place, - * Suite 330, Boston, MA 02111-1307 USA - * - * The author of this file: Rui Hirokawa - * - */ -/* - * The source code included in this files was separated from mbfilter_tw.c - * by moriyoshi koizumi on 4 dec 2002. - * - */ - -#include "mbfilter.h" -#include "mbfilter_euc_tw.h" - -#include "unicode_table_cns11643.h" - -static int mbfl_filt_conv_euctw_wchar_flush(mbfl_convert_filter *filter); -static size_t mb_euctw_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state); -static void mb_wchar_to_euctw(uint32_t *in, size_t len, mb_convert_buf *buf, bool end); - -static const unsigned char mblen_table_euctw[] = { /* 0xA1-0xFE */ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 -}; - - -static const char *mbfl_encoding_euc_tw_aliases[] = {"EUC_TW", "eucTW", "x-euc-tw", NULL}; - -const mbfl_encoding mbfl_encoding_euc_tw = { - mbfl_no_encoding_euc_tw, - "EUC-TW", - "EUC-TW", - mbfl_encoding_euc_tw_aliases, - mblen_table_euctw, - 0, - &vtbl_euctw_wchar, - &vtbl_wchar_euctw, - mb_euctw_to_wchar, - mb_wchar_to_euctw, - NULL -}; - -const struct mbfl_convert_vtbl vtbl_euctw_wchar = { - mbfl_no_encoding_euc_tw, - mbfl_no_encoding_wchar, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_euctw_wchar, - mbfl_filt_conv_euctw_wchar_flush, - NULL, -}; - -const struct mbfl_convert_vtbl vtbl_wchar_euctw = { - mbfl_no_encoding_wchar, - mbfl_no_encoding_euc_tw, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_wchar_euctw, - mbfl_filt_conv_common_flush, - NULL, -}; - -#define CK(statement) do { if ((statement) < 0) return (-1); } while (0) - -int mbfl_filt_conv_euctw_wchar(int c, mbfl_convert_filter *filter) -{ - int c1, s, w; - - switch (filter->status) { - case 0: - if (c >= 0 && c < 0x80) { /* latin */ - CK((*filter->output_function)(c, filter->data)); - } else if (((c >= 0xA1 && c <= 0xA6) || (c >= 0xC2 && c <= 0xFD)) && c != 0xC3) { /* 2-byte character, first byte */ - filter->status = 1; - filter->cache = c; - } else if (c == 0x8E) { /* 4-byte character, first byte */ - filter->status = 2; - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - case 1: /* 2-byte character, second byte */ - filter->status = 0; - c1 = filter->cache; - if (c > 0xA0 && c < 0xFF) { - w = (c1 - 0xA1)*94 + (c - 0xA1); - if (w >= 0 && w < cns11643_1_ucs_table_size) { - w = cns11643_1_ucs_table[w]; - } else { - w = 0; - } - - if (w <= 0) { - w = MBFL_BAD_INPUT; - } - - CK((*filter->output_function)(w, filter->data)); - } else { - filter->status = filter->cache = 0; - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - case 2: /* got 0x8e, second byte */ - if (c == 0xA1 || c == 0xA2 || c == 0xAE) { - filter->status = 3; - filter->cache = c - 0xA1; - } else { - filter->status = filter->cache = 0; - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - case 3: /* got 0x8e, third byte */ - filter->status = 0; - c1 = filter->cache; - if (c >= 0xA1 && ((c1 == 0 && ((c >= 0xA1 && c <= 0xA6) || (c >= 0xC2 && c <= 0xFD)) && c != 0xC3) || - (c1 == 1 && c <= 0xF2) || (c1 == 13 && c <= 0xE7))) { - filter->status = 4; - filter->cache = (c1 << 8) + c - 0xA1; - } else { - filter->status = filter->cache = 0; - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - case 4: /* multi-byte character, fourth byte */ - filter->status = 0; - c1 = filter->cache; - if (c1 <= 0xDFF && c > 0xA0 && c < 0xFF) { - int plane = (c1 & 0xF00) >> 8; /* This is actually the CNS-11643 plane minus one */ - s = (c1 & 0xFF)*94 + c - 0xA1; - w = 0; - if (s >= 0) { - /* A later version of CNS-11643 moved all the characters in "plane 14" to "plane 3", - * and added tens of thousands more characters in planes 4, 5, 6, and 7 - * We only support the older version of CNS-11643 - * This is the same as iconv from glibc 2.2 */ - if (plane == 0 && s < cns11643_1_ucs_table_size) { - w = cns11643_1_ucs_table[s]; - } else if (plane == 1 && s < cns11643_2_ucs_table_size) { - w = cns11643_2_ucs_table[s]; - } else if (plane == 13 && s < cns11643_14_ucs_table_size) { - w = cns11643_14_ucs_table[s]; - } - } - - if (w <= 0) { - w = MBFL_BAD_INPUT; - } - - CK((*filter->output_function)(w, filter->data)); - } else { - filter->status = filter->cache = 0; - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - EMPTY_SWITCH_DEFAULT_CASE(); - } - - return 0; -} - -int mbfl_filt_conv_wchar_euctw(int c, mbfl_convert_filter *filter) -{ - int s = 0; - - if (c >= ucs_a1_cns11643_table_min && c < ucs_a1_cns11643_table_max) { - s = ucs_a1_cns11643_table[c - ucs_a1_cns11643_table_min]; - } else if (c >= ucs_a2_cns11643_table_min && c < ucs_a2_cns11643_table_max) { - s = ucs_a2_cns11643_table[c - ucs_a2_cns11643_table_min]; - } else if (c >= ucs_a3_cns11643_table_min && c < ucs_a3_cns11643_table_max) { - s = ucs_a3_cns11643_table[c - ucs_a3_cns11643_table_min]; - } else if (c >= ucs_i_cns11643_table_min && c < ucs_i_cns11643_table_max) { - s = ucs_i_cns11643_table[c - ucs_i_cns11643_table_min]; - } else if (c >= ucs_r_cns11643_table_min && c < ucs_r_cns11643_table_max) { - s = ucs_r_cns11643_table[c - ucs_r_cns11643_table_min]; - } - - if (s <= 0) { - if (c == 0) { - s = 0; - } else if (s <= 0) { - s = -1; - } - } - - if (s >= 0) { - int plane = (s & 0x1F0000) >> 16; - if (plane <= 1) { - if (s < 0x80) { /* latin */ - CK((*filter->output_function)(s, filter->data)); - } else { - s = (s & 0xFFFF) | 0x8080; - CK((*filter->output_function)((s >> 8) & 0xFF, filter->data)); - CK((*filter->output_function)(s & 0xFF, filter->data)); - } - } else { - s = (0x8EA00000 + (plane << 16)) | ((s & 0xFFFF) | 0x8080); - CK((*filter->output_function)(0x8e , filter->data)); - CK((*filter->output_function)((s >> 16) & 0xFF, filter->data)); - CK((*filter->output_function)((s >> 8) & 0xFF, filter->data)); - CK((*filter->output_function)(s & 0xFF, filter->data)); - } - } else { - CK(mbfl_filt_conv_illegal_output(c, filter)); - } - return 0; -} - -static int mbfl_filt_conv_euctw_wchar_flush(mbfl_convert_filter *filter) -{ - if (filter->status) { - /* 2-byte or 4-byte character was truncated */ - filter->status = 0; - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - - if (filter->flush_function) { - (*filter->flush_function)(filter->data); - } - - return 0; -} - -static size_t mb_euctw_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) -{ - unsigned char *p = *in, *e = p + *in_len; - uint32_t *out = buf, *limit = buf + bufsize; - - while (p < e && out < limit) { - unsigned char c = *p++; - - if (c < 0x80) { - *out++ = c; - } else if (((c >= 0xA1 && c <= 0xA6) || (c >= 0xC2 && c <= 0xFD)) && c != 0xC3 && p < e) { - unsigned char c2 = *p++; - - if (c2 >= 0xA1 && c2 <= 0xFE) { - unsigned int w = (c - 0xA1)*94 + (c2 - 0xA1); - if (w < cns11643_1_ucs_table_size) { - w = cns11643_1_ucs_table[w]; - } else { - w = 0; - } - if (!w) - w = MBFL_BAD_INPUT; - *out++ = w; - } else { - *out++ = MBFL_BAD_INPUT; - } - } else if (c == 0x8E && p < e) { - unsigned char c2 = *p++; - - if ((c2 == 0xA1 || c2 == 0xA2 || c2 == 0xAE) && p < e) { - unsigned int plane = c2 - 0xA1; /* This is actually the CNS-11643 plane minus one */ - unsigned char c3 = *p++; - - if (c3 >= 0xA1 && ((plane == 0 && ((c3 >= 0xA1 && c3 <= 0xA6) || (c3 >= 0xC2 && c3 <= 0xFD)) && c3 != 0xC3) || (plane == 1 && c3 <= 0xF2) || (plane == 13 && c3 <= 0xE7)) && p < e) { - unsigned char c4 = *p++; - - if (c2 <= 0xAE && c4 > 0xA0 && c4 < 0xFF) { - unsigned int s = (c3 - 0xA1)*94 + c4 - 0xA1, w = 0; - - /* A later version of CNS-11643 moved all the characters in "plane 14" to "plane 3", - * and added tens of thousands more characters in planes 4, 5, 6, and 7 - * We only support the older version of CNS-11643 - * This is the same as iconv from glibc 2.2 */ - if (plane == 0 && s < cns11643_1_ucs_table_size) { - w = cns11643_1_ucs_table[s]; - } else if (plane == 1 && s < cns11643_2_ucs_table_size) { - w = cns11643_2_ucs_table[s]; - } else if (plane == 13 && s < cns11643_14_ucs_table_size) { - w = cns11643_14_ucs_table[s]; - } - - if (!w) - w = MBFL_BAD_INPUT; - *out++ = w; - continue; - } - } - } - - *out++ = MBFL_BAD_INPUT; - } else { - *out++ = MBFL_BAD_INPUT; - } - } - - *in_len = e - p; - *in = p; - return out - buf; -} - -static void mb_wchar_to_euctw(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) -{ - unsigned char *out, *limit; - MB_CONVERT_BUF_LOAD(buf, out, limit); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2); - - while (len--) { - uint32_t w = *in++; - unsigned int s = 0; - - if (w >= ucs_a1_cns11643_table_min && w < ucs_a1_cns11643_table_max) { - s = ucs_a1_cns11643_table[w - ucs_a1_cns11643_table_min]; - } else if (w >= ucs_a2_cns11643_table_min && w < ucs_a2_cns11643_table_max) { - s = ucs_a2_cns11643_table[w - ucs_a2_cns11643_table_min]; - } else if (w >= ucs_a3_cns11643_table_min && w < ucs_a3_cns11643_table_max) { - s = ucs_a3_cns11643_table[w - ucs_a3_cns11643_table_min]; - } else if (w >= ucs_i_cns11643_table_min && w < ucs_i_cns11643_table_max) { - s = ucs_i_cns11643_table[w - ucs_i_cns11643_table_min]; - } else if (w >= ucs_r_cns11643_table_min && w < ucs_r_cns11643_table_max) { - s = ucs_r_cns11643_table[w - ucs_r_cns11643_table_min]; - } - - if (!s) { - if (w == 0) { - out = mb_convert_buf_add(out, 0); - } else { - MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_euctw); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2); - } - } else { - unsigned int plane = s >> 16; - if (plane <= 1) { - if (s < 0x80) { - out = mb_convert_buf_add(out, s); - } else { - out = mb_convert_buf_add2(out, ((s >> 8) & 0xFF) | 0x80, (s & 0xFF) | 0x80); - } - } else { - MB_CONVERT_BUF_ENSURE(buf, out, limit, (len * 2) + 4); - out = mb_convert_buf_add4(out, 0x8E, 0xA0 + plane, ((s >> 8) & 0xFF) | 0x80, (s & 0xFF) | 0x80); - } - } - } - - MB_CONVERT_BUF_STORE(buf, out, limit); -} diff --git a/ext/mbstring/libmbfl/filters/mbfilter_euc_tw.h b/ext/mbstring/libmbfl/filters/mbfilter_euc_tw.h deleted file mode 100644 index 9c2ffa480210f..0000000000000 --- a/ext/mbstring/libmbfl/filters/mbfilter_euc_tw.h +++ /dev/null @@ -1,42 +0,0 @@ -/* - * "streamable kanji code filter and converter" - * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. - * - * LICENSE NOTICES - * - * This file is part of "streamable kanji code filter and converter", - * which is distributed under the terms of GNU Lesser General Public - * License (version 2) as published by the Free Software Foundation. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with "streamable kanji code filter and converter"; - * if not, write to the Free Software Foundation, Inc., 59 Temple Place, - * Suite 330, Boston, MA 02111-1307 USA - * - * The author of this file: Rui Hirokawa - * - */ -/* - * The source code included in this files was separated from mbfilter_tw.h - * by moriyoshi koizumi on 4 dec 2002. - * - */ - -#ifndef MBFL_MBFILTER_EUC_TW_H -#define MBFL_MBFILTER_EUC_TW_H - -#include "mbfilter.h" - -extern const mbfl_encoding mbfl_encoding_euc_tw; -extern const struct mbfl_convert_vtbl vtbl_euctw_wchar; -extern const struct mbfl_convert_vtbl vtbl_wchar_euctw; - -int mbfl_filt_conv_euctw_wchar(int c, mbfl_convert_filter *filter); -int mbfl_filt_conv_wchar_euctw(int c, mbfl_convert_filter *filter); - -#endif /* MBFL_MBFILTER_EUC_TW_H */ diff --git a/ext/mbstring/libmbfl/filters/mbfilter_gb18030.c b/ext/mbstring/libmbfl/filters/mbfilter_gb18030.c deleted file mode 100644 index 6485e735ed4ba..0000000000000 --- a/ext/mbstring/libmbfl/filters/mbfilter_gb18030.c +++ /dev/null @@ -1,644 +0,0 @@ -/* - * "streamable kanji code filter and converter" - * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. - * - * LICENSE NOTICES - * - * This file is part of "streamable kanji code filter and converter", - * which is distributed under the terms of GNU Lesser General Public - * License (version 2) as published by the Free Software Foundation. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with "streamable kanji code filter and converter"; - * if not, write to the Free Software Foundation, Inc., 59 Temple Place, - * Suite 330, Boston, MA 02111-1307 USA - * - * The author of this file: - * - */ -/* - * the source code included in this file was separated from mbfilter_cp936.c - * by rui hirokawa on 11 Aug 2011. - * - */ - -#include "mbfilter.h" -#include "mbfilter_gb18030.h" - -#include "unicode_table_cp936.h" -#include "unicode_table_gb18030.h" - -static int mbfl_filt_conv_gb18030_wchar_flush(mbfl_convert_filter *filter); -static size_t mb_gb18030_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state); -static void mb_wchar_to_gb18030(uint32_t *in, size_t len, mb_convert_buf *buf, bool end); - -static const char *mbfl_encoding_gb18030_aliases[] = {"gb-18030", "gb-18030-2000", NULL}; - -const mbfl_encoding mbfl_encoding_gb18030 = { - mbfl_no_encoding_gb18030, - "GB18030", - "GB18030", - mbfl_encoding_gb18030_aliases, - NULL, - MBFL_ENCTYPE_GL_UNSAFE, - &vtbl_gb18030_wchar, - &vtbl_wchar_gb18030, - mb_gb18030_to_wchar, - mb_wchar_to_gb18030, - NULL -}; - -const struct mbfl_convert_vtbl vtbl_gb18030_wchar = { - mbfl_no_encoding_gb18030, - mbfl_no_encoding_wchar, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_gb18030_wchar, - mbfl_filt_conv_gb18030_wchar_flush, - NULL, -}; - -const struct mbfl_convert_vtbl vtbl_wchar_gb18030 = { - mbfl_no_encoding_wchar, - mbfl_no_encoding_gb18030, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_wchar_gb18030, - mbfl_filt_conv_common_flush, - NULL, -}; - -#define CK(statement) do { if ((statement) < 0) return (-1); } while (0) - -/* `tbl` contains inclusive ranges, each represented by a pair of unsigned shorts */ -int mbfl_bisec_srch(int w, const unsigned short *tbl, int n) -{ - int l = 0, r = n-1; - while (l <= r) { - int probe = (l + r) >> 1; - unsigned short lo = tbl[2 * probe], hi = tbl[(2 * probe) + 1]; - if (w < lo) { - r = probe - 1; - } else if (w > hi) { - l = probe + 1; - } else { - return probe; - } - } - return -1; -} - -/* `tbl` contains single values, not ranges */ -int mbfl_bisec_srch2(int w, const unsigned short tbl[], int n) -{ - int l = 0, r = n-1; - while (l <= r) { - int probe = (l + r) >> 1; - unsigned short val = tbl[probe]; - if (w < val) { - r = probe - 1; - } else if (w > val) { - l = probe + 1; - } else { - return probe; - } - } - return -1; -} - -int mbfl_filt_conv_gb18030_wchar(int c, mbfl_convert_filter *filter) -{ - int k; - int c1, c2, c3, w = -1; - - switch (filter->status) { - case 0: - if (c >= 0 && c < 0x80) { /* latin */ - CK((*filter->output_function)(c, filter->data)); - } else if (c > 0x80 && c < 0xff) { /* dbcs/qbcs lead byte */ - filter->status = 1; - filter->cache = c; - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - case 1: /* dbcs/qbcs second byte */ - c1 = filter->cache; - filter->status = 0; - - if (c1 >= 0x81 && c1 <= 0x84 && c >= 0x30 && c <= 0x39) { - /* 4 byte range: Unicode BMP */ - filter->status = 2; - filter->cache = (c1 << 8) | c; - return 0; - } else if (c1 >= 0x90 && c1 <= 0xe3 && c >= 0x30 && c <= 0x39) { - /* 4 byte range: Unicode 16 planes */ - filter->status = 2; - filter->cache = (c1 << 8) | c; - return 0; - } else if (((c1 >= 0xaa && c1 <= 0xaf) || (c1 >= 0xf8 && c1 <= 0xfe)) && (c >= 0xa1 && c <= 0xfe)) { - /* UDA part 1,2: U+E000-U+E4C5 */ - w = 94*(c1 >= 0xf8 ? c1 - 0xf2 : c1 - 0xaa) + (c - 0xa1) + 0xe000; - CK((*filter->output_function)(w, filter->data)); - } else if (c1 >= 0xa1 && c1 <= 0xa7 && c >= 0x40 && c < 0xa1 && c != 0x7f) { - /* UDA part3 : U+E4C6-U+E765*/ - w = 96*(c1 - 0xa1) + c - (c >= 0x80 ? 0x41 : 0x40) + 0xe4c6; - CK((*filter->output_function)(w, filter->data)); - } - - c2 = (c1 << 8) | c; - - if (w <= 0 && - ((c2 >= 0xa2ab && c2 <= 0xa9f0 + (0xe80f-0xe801)) || - (c2 >= 0xd7fa && c2 <= 0xd7fa + (0xe814-0xe810)) || - (c2 >= 0xfe50 && c2 <= 0xfe80 + (0xe864-0xe844)))) { - for (k = 0; k < mbfl_gb18030_pua_tbl_max; k++) { - if (c2 >= mbfl_gb18030_pua_tbl[k][2] && c2 <= mbfl_gb18030_pua_tbl[k][2] + mbfl_gb18030_pua_tbl[k][1] - mbfl_gb18030_pua_tbl[k][0]) { - w = c2 - mbfl_gb18030_pua_tbl[k][2] + mbfl_gb18030_pua_tbl[k][0]; - CK((*filter->output_function)(w, filter->data)); - break; - } - } - } - - if (w <= 0) { - if ((c1 >= 0xa1 && c1 <= 0xa9 && c >= 0xa1 && c <= 0xfe) || - (c1 >= 0xb0 && c1 <= 0xf7 && c >= 0xa1 && c <= 0xfe) || - (c1 >= 0x81 && c1 <= 0xa0 && c >= 0x40 && c <= 0xfe && c != 0x7f) || - (c1 >= 0xaa && c1 <= 0xfe && c >= 0x40 && c <= 0xa0 && c != 0x7f) || - (c1 >= 0xa8 && c1 <= 0xa9 && c >= 0x40 && c <= 0xa0 && c != 0x7f)) { - w = (c1 - 0x81)*192 + c - 0x40; - ZEND_ASSERT(w < cp936_ucs_table_size); - CK((*filter->output_function)(cp936_ucs_table[w], filter->data)); - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - } - break; - - case 2: /* qbcs third byte */ - c1 = (filter->cache >> 8) & 0xff; - c2 = filter->cache & 0xff; - filter->status = filter->cache = 0; - if (((c1 >= 0x81 && c1 <= 0x84) || (c1 >= 0x90 && c1 <= 0xe3)) && c2 >= 0x30 && c2 <= 0x39 && c >= 0x81 && c <= 0xfe) { - filter->cache = (c1 << 16) | (c2 << 8) | c; - filter->status = 3; - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - case 3: /* qbcs fourth byte */ - c1 = (filter->cache >> 16) & 0xff; - c2 = (filter->cache >> 8) & 0xff; - c3 = filter->cache & 0xff; - filter->status = filter->cache = 0; - if (((c1 >= 0x81 && c1 <= 0x84) || (c1 >= 0x90 && c1 <= 0xe3)) && c2 >= 0x30 && c2 <= 0x39 && c3 >= 0x81 && c3 <= 0xfe && c >= 0x30 && c <= 0x39) { - if (c1 >= 0x90 && c1 <= 0xe3) { - w = ((((c1 - 0x90)*10 + (c2 - 0x30))*126 + (c3 - 0x81)))*10 + (c - 0x30) + 0x10000; - if (w > 0x10FFFF) { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - return 0; - } - } else { /* Unicode BMP */ - w = (((c1 - 0x81)*10 + (c2 - 0x30))*126 + (c3 - 0x81))*10 + (c - 0x30); - if (w >= 0 && w <= 39419) { - k = mbfl_bisec_srch(w, mbfl_gb2uni_tbl, mbfl_gb_uni_max); - w += mbfl_gb_uni_ofst[k]; - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - return 0; - } - } - CK((*filter->output_function)(w, filter->data)); - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - EMPTY_SWITCH_DEFAULT_CASE(); - } - - return 0; -} - -static int mbfl_filt_conv_gb18030_wchar_flush(mbfl_convert_filter *filter) -{ - if (filter->status) { - /* multi-byte character was truncated */ - filter->status = 0; - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - - if (filter->flush_function) { - (*filter->flush_function)(filter->data); - } - - return 0; -} - -int mbfl_filt_conv_wchar_gb18030(int c, mbfl_convert_filter *filter) -{ - int k, k1, k2; - int c1, s = 0, s1 = 0; - - if (c >= ucs_a1_cp936_table_min && c < ucs_a1_cp936_table_max) { - if (c == 0x01f9) { - s = 0xa8bf; - } else { - s = ucs_a1_cp936_table[c - ucs_a1_cp936_table_min]; - } - } else if (c >= ucs_a2_cp936_table_min && c < ucs_a2_cp936_table_max) { - if (c == 0x20ac) { /* euro-sign */ - s = 0xa2e3; - } else { - s = ucs_a2_cp936_table[c - ucs_a2_cp936_table_min]; - } - } else if (c >= ucs_a3_cp936_table_min && c < ucs_a3_cp936_table_max) { - s = ucs_a3_cp936_table[c - ucs_a3_cp936_table_min]; - } else if (c >= ucs_i_cp936_table_min && c < ucs_i_cp936_table_max) { - s = ucs_i_cp936_table[c - ucs_i_cp936_table_min]; - } else if (c >= ucs_ci_cp936_table_min && c < ucs_ci_cp936_table_max) { - /* U+F900-FA2F CJK Compatibility Ideographs */ - if (c == 0xf92c) { - s = 0xfd9c; - } else if (c == 0xf979) { - s = 0xfd9d; - } else if (c == 0xf995) { - s = 0xfd9e; - } else if (c == 0xf9e7) { - s = 0xfd9f; - } else if (c == 0xf9f1) { - s = 0xfda0; - } else if (c >= 0xfa0c && c <= 0xfa29) { - s = ucs_ci_s_cp936_table[c - 0xfa0c]; - } - } else if (c >= ucs_cf_cp936_table_min && c < ucs_cf_cp936_table_max) { - /* FE30h CJK Compatibility Forms */ - s = ucs_cf_cp936_table[c - ucs_cf_cp936_table_min]; - } else if (c >= ucs_sfv_cp936_table_min && c < ucs_sfv_cp936_table_max) { - /* U+FE50-FE6F Small Form Variants */ - s = ucs_sfv_cp936_table[c - ucs_sfv_cp936_table_min]; - } else if (c >= ucs_hff_cp936_table_min && c < ucs_hff_cp936_table_max) { - /* U+FF00-FFFF HW/FW Forms */ - if (c == 0xff04) { - s = 0xa1e7; - } else if (c == 0xff5e) { - s = 0xa1ab; - } else if (c >= 0xff01 && c <= 0xff5d) { - s = c - 0xff01 + 0xa3a1; - } else if (c >= 0xffe0 && c <= 0xffe5) { - s = ucs_hff_s_cp936_table[c-0xffe0]; - } - } - - /* While GB18030 and CP936 are very similar, some mappings are different between these encodings; - * do a binary search in a table of differing codepoints to see if we have one */ - if (s <= 0 && c >= mbfl_gb18030_c_tbl_key[0] && c <= mbfl_gb18030_c_tbl_key[mbfl_gb18030_c_tbl_max-1]) { - k1 = mbfl_bisec_srch2(c, mbfl_gb18030_c_tbl_key, mbfl_gb18030_c_tbl_max); - if (k1 >= 0) { - s = mbfl_gb18030_c_tbl_val[k1]; - } - } - - if (c >= 0xe000 && c <= 0xe864) { /* PUA */ - if (c < 0xe766) { - if (c < 0xe4c6) { - c1 = c - 0xe000; - s = (c1 % 94) + 0xa1; - c1 /= 94; - s |= (c1 < 0x06 ? c1 + 0xaa : c1 + 0xf2) << 8; - } else { - c1 = c - 0xe4c6; - s = ((c1 / 96) + 0xa1) << 8; - c1 %= 96; - s |= c1 + (c1 >= 0x3f ? 0x41 : 0x40); - } - } else { - /* U+E766..U+E864 */ - k1 = 0; - k2 = mbfl_gb18030_pua_tbl_max; - while (k1 < k2) { - k = (k1 + k2) >> 1; - if (c < mbfl_gb18030_pua_tbl[k][0]) { - k2 = k; - } else if (c > mbfl_gb18030_pua_tbl[k][1]) { - k1 = k + 1; - } else { - s = c - mbfl_gb18030_pua_tbl[k][0] + mbfl_gb18030_pua_tbl[k][2]; - break; - } - } - } - } - - /* If we have not yet found a suitable mapping for this codepoint, it requires a 4-byte code */ - if (s <= 0 && c >= 0x0080 && c <= 0xffff) { - /* BMP */ - s = mbfl_bisec_srch(c, mbfl_uni2gb_tbl, mbfl_gb_uni_max); - if (s >= 0) { - c1 = c - mbfl_gb_uni_ofst[s]; - s = (c1 % 10) + 0x30; - c1 /= 10; - s |= ((c1 % 126) + 0x81) << 8; - c1 /= 126; - s |= ((c1 % 10) + 0x30) << 16; - c1 /= 10; - s1 = c1 + 0x81; - } - } else if (c >= 0x10000 && c <= 0x10ffff) { - /* Code set 3: Unicode U+10000..U+10FFFF */ - c1 = c - 0x10000; - s = (c1 % 10) + 0x30; - c1 /= 10; - s |= ((c1 % 126) + 0x81) << 8; - c1 /= 126; - s |= ((c1 % 10) + 0x30) << 16; - c1 /= 10; - s1 = c1 + 0x90; - } - - if (c == 0) { - s = 0; - } else if (s == 0) { - s = -1; - } - - if (s >= 0) { - if (s <= 0x80) { /* latin */ - CK((*filter->output_function)(s, filter->data)); - } else if (s1 > 0) { /* qbcs */ - CK((*filter->output_function)(s1 & 0xff, filter->data)); - CK((*filter->output_function)((s >> 16) & 0xff, filter->data)); - CK((*filter->output_function)((s >> 8) & 0xff, filter->data)); - CK((*filter->output_function)(s & 0xff, filter->data)); - } else { /* dbcs */ - CK((*filter->output_function)((s >> 8) & 0xff, filter->data)); - CK((*filter->output_function)(s & 0xff, filter->data)); - } - } else { - CK(mbfl_filt_conv_illegal_output(c, filter)); - } - - return 0; -} - -static const unsigned short gb18030_pua_tbl3[] = { -/* 0xFE50 */ -0x0000,0xE816,0xE817,0xE818,0x0000,0x0000,0x0000,0x0000, -0x0000,0xE81E,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000, -0x0000,0xE826,0x0000,0x0000,0x0000,0x0000,0xE82B,0xE82C, -0x0000,0x0000,0x0000,0x0000,0xE831,0xE832,0x0000,0x0000, -0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0xE83B,0x0000, -0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0xE843,0x0000, -0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000, -0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000, -0xE854,0xE855,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000, -0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000, -/* 0xFEA0 */ -0xE864 -}; - -static size_t mb_gb18030_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) -{ - unsigned char *p = *in, *e = p + *in_len; - uint32_t *out = buf, *limit = buf + bufsize; - - while (p < e && out < limit) { - unsigned char c = *p++; - - if (c < 0x80) { - *out++ = c; - } else if (c == 0x80 || c == 0xFF) { - *out++ = MBFL_BAD_INPUT; - } else { - if (p == e) { - *out++ = MBFL_BAD_INPUT; - break; - } - unsigned char c2 = *p++; - - if (((c >= 0x81 && c <= 0x84) || (c >= 0x90 && c <= 0xE3)) && c2 >= 0x30 && c2 <= 0x39) { - if (p >= e) { - *out++ = MBFL_BAD_INPUT; - break; - } - unsigned char c3 = *p++; - - if (c3 >= 0x81 && c3 <= 0xFE && p < e) { - unsigned char c4 = *p++; - - if (c4 >= 0x30 && c4 <= 0x39) { - if (c >= 0x90 && c <= 0xE3) { - unsigned int w = ((((c - 0x90)*10 + (c2 - 0x30))*126 + (c3 - 0x81)))*10 + (c4 - 0x30) + 0x10000; - *out++ = (w > 0x10FFFF) ? MBFL_BAD_INPUT : w; - } else { - /* Unicode BMP */ - unsigned int w = (((c - 0x81)*10 + (c2 - 0x30))*126 + (c3 - 0x81))*10 + (c4 - 0x30); - if (w <= 39419) { - *out++ = w + mbfl_gb_uni_ofst[mbfl_bisec_srch(w, mbfl_gb2uni_tbl, mbfl_gb_uni_max)]; - } else { - *out++ = MBFL_BAD_INPUT; - } - } - } else { - *out++ = MBFL_BAD_INPUT; - } - } else { - *out++ = MBFL_BAD_INPUT; - } - } else if (((c >= 0xAA && c <= 0xAF) || (c >= 0xF8 && c <= 0xFE)) && (c2 >= 0xA1 && c2 <= 0xFE)) { - /* UDA part 1, 2: U+E000-U+E4C5 */ - *out++ = 94*(c >= 0xF8 ? c - 0xF2 : c - 0xAA) + (c2 - 0xA1) + 0xE000; - } else if (c >= 0xA1 && c <= 0xA7 && c2 >= 0x40 && c2 < 0xA1 && c2 != 0x7F) { - /* UDA part 3: U+E4C6-U+E765 */ - *out++ = 96*(c - 0xA1) + c2 - (c2 >= 0x80 ? 0x41 : 0x40) + 0xE4C6; - } else if (c2 >= 0x40 && c2 != 0x7F && c2 != 0xFF) { - unsigned int w = (c - 0x81)*192 + c2 - 0x40; - - if (w >= 0x192B) { - if (w <= 0x1EBE) { - if (w != 0x1963 && w != 0x1DBF && (w < 0x1E49 || w > 0x1E55) && w != 0x1E7F) { - *out++ = cp936_pua_tbl1[w - 0x192B]; - continue; - } - } else if (w >= 0x413A) { - if (w <= 0x413E) { - *out++ = cp936_pua_tbl2[w - 0x413A]; - continue; - } else if (w >= 0x5DD0 && w <= 0x5E20) { - unsigned int c = gb18030_pua_tbl3[w - 0x5DD0]; - if (c) { - *out++ = c; - continue; - } - } - } - } - - if ((c >= 0x81 && c <= 0xA9) || (c >= 0xB0 && c <= 0xF7 && c2 >= 0xA1) || (c >= 0xAA && c <= 0xFE && c2 <= 0xA0)) { - ZEND_ASSERT(w < cp936_ucs_table_size); - *out++ = cp936_ucs_table[w]; - } else { - *out++ = MBFL_BAD_INPUT; - } - } else { - *out++ = MBFL_BAD_INPUT; - } - } - } - - *in_len = e - p; - *in = p; - return out - buf; -} - -static void mb_wchar_to_gb18030(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) -{ - unsigned char *out, *limit; - MB_CONVERT_BUF_LOAD(buf, out, limit); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len); - - while (len--) { - uint32_t w = *in++; - unsigned int s = 0; - - if (w == 0) { - out = mb_convert_buf_add(out, 0); - continue; - } else if (w >= ucs_a1_cp936_table_min && w < ucs_a1_cp936_table_max) { - if (w == 0x1F9) { - s = 0xA8Bf; - } else { - s = ucs_a1_cp936_table[w - ucs_a1_cp936_table_min]; - } - } else if (w >= ucs_a2_cp936_table_min && w < ucs_a2_cp936_table_max) { - if (w == 0x20AC) { /* Euro sign */ - s = 0xA2E3; - } else { - s = ucs_a2_cp936_table[w - ucs_a2_cp936_table_min]; - } - } else if (w >= ucs_a3_cp936_table_min && w < ucs_a3_cp936_table_max) { - s = ucs_a3_cp936_table[w - ucs_a3_cp936_table_min]; - } else if (w >= ucs_i_cp936_table_min && w < ucs_i_cp936_table_max) { - s = ucs_i_cp936_table[w - ucs_i_cp936_table_min]; - } else if (w >= ucs_ci_cp936_table_min && w < ucs_ci_cp936_table_max) { - /* U+F900-U+FA2F CJK Compatibility Ideographs */ - if (w == 0xF92C) { - s = 0xFD9C; - } else if (w == 0xF979) { - s = 0xFD9D; - } else if (w == 0xF995) { - s = 0xFD9E; - } else if (w == 0xF9E7) { - s = 0xFD9F; - } else if (w == 0xF9F1) { - s = 0xFDA0; - } else if (w >= 0xFA0C && w <= 0xFA29) { - s = ucs_ci_s_cp936_table[w - 0xFA0C]; - } - } else if (w >= ucs_cf_cp936_table_min && w < ucs_cf_cp936_table_max) { - /* CJK Compatibility Forms */ - s = ucs_cf_cp936_table[w - ucs_cf_cp936_table_min]; - } else if (w >= ucs_sfv_cp936_table_min && w < ucs_sfv_cp936_table_max) { - /* U+FE50-U+FE6F Small Form Variants */ - s = ucs_sfv_cp936_table[w - ucs_sfv_cp936_table_min]; - } else if (w >= ucs_hff_cp936_table_min && w < ucs_hff_cp936_table_max) { - /* U+FF00-U+FFFF HW/FW Forms */ - if (w == 0xFF04) { - s = 0xA1E7; - } else if (w == 0xFF5E) { - s = 0xA1AB; - } else if (w >= 0xFF01 && w <= 0xFF5D) { - s = w - 0xFF01 + 0xA3A1; - } else if (w >= 0xFFE0 && w <= 0xFFE5) { - s = ucs_hff_s_cp936_table[w - 0xFFE0]; - } - } else if (w >= 0xE000 && w <= 0xE864) { - /* PUA */ - if (w < 0xE766) { - if (w < 0xE4C6) { - unsigned int c1 = w - 0xE000; - s = (c1 % 94) + 0xA1; - c1 /= 94; - s |= (c1 + (c1 < 0x06 ? 0xAA : 0xF2)) << 8; - } else { - unsigned int c1 = w - 0xE4C6; - s = ((c1 / 96) + 0xA1) << 8; - c1 %= 96; - s |= c1 + (c1 >= 0x3F ? 0x41 : 0x40); - } - } else { - /* U+E766-U+E864 */ - unsigned int k1 = 0, k2 = mbfl_gb18030_pua_tbl_max; - while (k1 < k2) { - unsigned int k = (k1 + k2) >> 1; - if (w < mbfl_gb18030_pua_tbl[k][0]) { - k2 = k; - } else if (w > mbfl_gb18030_pua_tbl[k][1]) { - k1 = k + 1; - } else { - s = w - mbfl_gb18030_pua_tbl[k][0] + mbfl_gb18030_pua_tbl[k][2]; - break; - } - } - } - } - - /* While GB18030 and CP936 are very similar, some mappings are different between these encodings; - * do a binary search in a table of differing codepoints to see if we have one */ - if (!s && w >= mbfl_gb18030_c_tbl_key[0] && w <= mbfl_gb18030_c_tbl_key[mbfl_gb18030_c_tbl_max-1]) { - int i = mbfl_bisec_srch2(w, mbfl_gb18030_c_tbl_key, mbfl_gb18030_c_tbl_max); - if (i >= 0) { - s = mbfl_gb18030_c_tbl_val[i]; - } - } - - /* If we have not yet found a suitable mapping for this codepoint, it requires a 4-byte code */ - if (!s && w >= 0x80 && w <= 0xFFFF) { - /* BMP */ - int i = mbfl_bisec_srch(w, mbfl_uni2gb_tbl, mbfl_gb_uni_max); - if (i >= 0) { - unsigned int c1 = w - mbfl_gb_uni_ofst[i]; - s = (c1 % 10) + 0x30; - c1 /= 10; - s |= ((c1 % 126) + 0x81) << 8; - c1 /= 126; - s |= ((c1 % 10) + 0x30) << 16; - c1 /= 10; - s |= (c1 + 0x81) << 24; - } - } else if (w >= 0x10000 && w <= 0x10FFFF) { - /* Code set 3: Unicode U+10000-U+10FFFF */ - unsigned int c1 = w - 0x10000; - s = (c1 % 10) + 0x30; - c1 /= 10; - s |= ((c1 % 126) + 0x81) << 8; - c1 /= 126; - s |= ((c1 % 10) + 0x30) << 16; - c1 /= 10; - s |= (c1 + 0x90) << 24; - } - - if (!s) { - MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_gb18030); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len); - } else if (s < 0x80) { - out = mb_convert_buf_add(out, s); - } else if (s > 0xFFFFFF) { - MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 4); - out = mb_convert_buf_add4(out, (s >> 24) & 0xFF, (s >> 16) & 0xFF, (s >> 8) & 0xFF, s & 0xFF); - } else { - MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2); - out = mb_convert_buf_add2(out, (s >> 8) & 0xFF, s & 0xFF); - } - } - - MB_CONVERT_BUF_STORE(buf, out, limit); -} diff --git a/ext/mbstring/libmbfl/filters/mbfilter_gb18030.h b/ext/mbstring/libmbfl/filters/mbfilter_gb18030.h deleted file mode 100644 index e7f0eae16bf6e..0000000000000 --- a/ext/mbstring/libmbfl/filters/mbfilter_gb18030.h +++ /dev/null @@ -1,42 +0,0 @@ -/* - * "streamable kanji code filter and converter" - * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. - * - * LICENSE NOTICES - * - * This file is part of "streamable kanji code filter and converter", - * which is distributed under the terms of GNU Lesser General Public - * License (version 2) as published by the Free Software Foundation. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with "streamable kanji code filter and converter"; - * if not, write to the Free Software Foundation, Inc., 59 Temple Place, - * Suite 330, Boston, MA 02111-1307 USA - * - * The author of this file: - * - */ -/* - * the source code included in this files was separated from mbfilter_cn.h - * by moriyoshi koizumi on 4 dec 2002. - * - */ - -#ifndef MBFL_MBFILTER_GB18030_H -#define MBFL_MBFILTER_GB18030_H - -#include "mbfilter.h" - -extern const mbfl_encoding mbfl_encoding_gb18030; -extern const struct mbfl_convert_vtbl vtbl_gb18030_wchar; -extern const struct mbfl_convert_vtbl vtbl_wchar_gb18030; - -int mbfl_filt_conv_gb18030_wchar(int c, mbfl_convert_filter *filter); -int mbfl_filt_conv_wchar_gb18030(int c, mbfl_convert_filter *filter); - -#endif /* MBFL_MBFILTER_GB18030_H */ diff --git a/ext/mbstring/libmbfl/filters/mbfilter_hz.c b/ext/mbstring/libmbfl/filters/mbfilter_hz.c deleted file mode 100644 index b047bfc8b7b27..0000000000000 --- a/ext/mbstring/libmbfl/filters/mbfilter_hz.c +++ /dev/null @@ -1,409 +0,0 @@ -/* - * "streamable kanji code filter and converter" - * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. - * - * LICENSE NOTICES - * - * This file is part of "streamable kanji code filter and converter", - * which is distributed under the terms of GNU Lesser General Public - * License (version 2) as published by the Free Software Foundation. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with "streamable kanji code filter and converter"; - * if not, write to the Free Software Foundation, Inc., 59 Temple Place, - * Suite 330, Boston, MA 02111-1307 USA - * - * The author of this file: - * - */ -/* - * The source code included in this files was separated from mbfilter_cn.c - * by moriyoshi koizumi on 4 dec 2002. - * - */ - -#include "mbfilter.h" -#include "mbfilter_hz.h" - -#include "unicode_table_cp936.h" -#include "unicode_table_gb2312.h" - -static int mbfl_filt_conv_hz_wchar_flush(mbfl_convert_filter *filter); -static size_t mb_hz_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state); -static void mb_wchar_to_hz(uint32_t *in, size_t len, mb_convert_buf *buf, bool end); - -const mbfl_encoding mbfl_encoding_hz = { - mbfl_no_encoding_hz, - "HZ", - "HZ-GB-2312", - NULL, - NULL, - MBFL_ENCTYPE_GL_UNSAFE, - &vtbl_hz_wchar, - &vtbl_wchar_hz, - mb_hz_to_wchar, - mb_wchar_to_hz, - NULL -}; - -const struct mbfl_convert_vtbl vtbl_hz_wchar = { - mbfl_no_encoding_hz, - mbfl_no_encoding_wchar, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_hz_wchar, - mbfl_filt_conv_hz_wchar_flush, - NULL, -}; - -const struct mbfl_convert_vtbl vtbl_wchar_hz = { - mbfl_no_encoding_wchar, - mbfl_no_encoding_hz, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_wchar_hz, - mbfl_filt_conv_any_hz_flush, - NULL, -}; - -#define CK(statement) do { if ((statement) < 0) return (-1); } while (0) - -int mbfl_filt_conv_hz_wchar(int c, mbfl_convert_filter *filter) -{ - int c1, s, w; - - switch (filter->status & 0xf) { - /* case 0x00: ASCII */ - /* case 0x10: GB2312 */ - case 0: - if (c == '~') { - filter->status += 2; - } else if (filter->status == 0x10 && ((c > 0x20 && c <= 0x29) || (c >= 0x30 && c <= 0x77))) { - /* DBCS first char */ - filter->cache = c; - filter->status += 1; - } else if (filter->status == 0 && c >= 0 && c < 0x80) { /* latin, CTLs */ - CK((*filter->output_function)(c, filter->data)); - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - /* case 0x11: GB2312 second char */ - case 1: - filter->status &= ~0xf; - c1 = filter->cache; - if (c1 > 0x20 && c1 < 0x7F && c > 0x20 && c < 0x7F) { - s = (c1 - 1)*192 + c + 0x40; /* GB2312 */ - ZEND_ASSERT(s < cp936_ucs_table_size); - if (s == 0x1864) { - w = 0x30FB; - } else if (s == 0x186A) { - w = 0x2015; - } else if (s == 0x186C) { - w = 0x2225; - } else if ((s >= 0x1920 && s <= 0x192A) || s == 0x1963 || (s >= 0x1C60 && s <= 0x1C7F) || (s >= 0x1DBB && s <= 0x1DC4)) { - w = 0; - } else { - w = cp936_ucs_table[s]; - } - - if (w <= 0) { - w = MBFL_BAD_INPUT; - } - - CK((*filter->output_function)(w, filter->data)); - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - /* '~' */ - case 2: - if (c == '}' && filter->status == 0x12) { - filter->status = 0; - } else if (c == '{' && filter->status == 2) { - filter->status = 0x10; - } else if (c == '~' && filter->status == 2) { - CK((*filter->output_function)('~', filter->data)); - filter->status -= 2; - } else if (c == '\n') { - /* "~\n" is a line continuation; no output is needed, nor should we shift modes */ - filter->status -= 2; - } else { - /* Invalid character after ~ */ - filter->status -= 2; - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - EMPTY_SWITCH_DEFAULT_CASE(); - } - - return 0; -} - -static int mbfl_filt_conv_hz_wchar_flush(mbfl_convert_filter *filter) -{ - if (filter->status == 0x11) { - /* 2-byte character was truncated */ - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - - filter->status = 0; - - if (filter->flush_function) { - (*filter->flush_function)(filter->data); - } - - return 0; -} - -int mbfl_filt_conv_wchar_hz(int c, mbfl_convert_filter *filter) -{ - int s = 0; - - if (c >= ucs_a1_cp936_table_min && c < ucs_a1_cp936_table_max) { - if (c == 0xB7 || c == 0x144 || c == 0x148 || c == 0x251 || c == 0x261 || c == 0x2CA || c == 0x2CB || c == 0x2D9) { - s = 0; - } else { - s = ucs_a1_cp936_table[c - ucs_a1_cp936_table_min]; - } - } else if (c >= ucs_a2_cp936_table_min && c < ucs_a2_cp936_table_max) { - if (c == 0x2015) { - s = 0xA1AA; - } else if (c == 0x2010 || c == 0x2013 || c == 0x2014 || c == 0x2016 || c == 0x2025 || c == 0x2035 || - c == 0x2105 || c == 0x2109 || c == 0x2121 || (c >= 0x2170 && c <= 0x2179) || (c >= 0x2196 && c <= 0x2199) || - c == 0x2215 || c == 0x221F || c == 0x2223 || c == 0x2252 || c == 0x2266 || c == 0x2267 || c == 0x2295 || - (c >= 0x2550 && c <= 0x2573) || c == 0x22BF || c == 0x2609 || (c >= 0x2581 && c <= 0x258F) || - (c >= 0x2593 && c <= 0x2595) || c == 0x25BC || c == 0x25BD || (c >= 0x25E2 && c <= 0x25E5)) { - s = 0; - } else { - s = ucs_a2_cp936_table[c - ucs_a2_cp936_table_min]; - } - } else if (c >= ucs_a3_cp936_table_min && c < ucs_a3_cp936_table_max) { - if (c == 0x30FB) { - s = 0xA1A4; - } else if (c == 0x3006 || c == 0x3007 || c == 0x3012 || c == 0x3231 || c == 0x32A3 || c >= 0x3300 || - (c >= 0x3018 && c <= 0x3040) || (c >= 0x309B && c <= 0x309E) || (c >= 0x30FC && c <= 0x30FE)) { - s = 0; - } else { - s = ucs_a3_cp936_table[c - ucs_a3_cp936_table_min]; - } - } else if (c >= ucs_i_gb2312_table_min && c < ucs_i_gb2312_table_max) { - s = ucs_i_gb2312_table[c - ucs_i_gb2312_table_min]; - } else if (c >= ucs_hff_cp936_table_min && c < ucs_hff_cp936_table_max) { - if (c == 0xFF04) { - s = 0xA1E7; - } else if (c == 0xFF5E) { - s = 0xA1AB; - } else if (c >= 0xFF01 && c <= 0xFF5D) { - s = c - 0xFF01 + 0xA3A1; - } else if (c == 0xFFE0 || c == 0xFFE1 || c == 0xFFE3 || c == 0xFFE5) { - s = ucs_hff_s_cp936_table[c - 0xFFE0]; - } - } - - if (s & 0x8000) { - s -= 0x8080; - } - - if (s <= 0) { - s = (c == 0) ? 0 : -1; - } else if ((s >= 0x80 && s < 0x2121) || s > 0x8080) { - s = -1; - } - - if (s >= 0) { - if (s < 0x80) { /* ASCII */ - if ((filter->status & 0xff00) != 0) { - CK((*filter->output_function)('~', filter->data)); - CK((*filter->output_function)('}', filter->data)); - } - filter->status = 0; - if (s == 0x7E) { - CK((*filter->output_function)('~', filter->data)); - } - CK((*filter->output_function)(s, filter->data)); - } else { /* GB 2312-80 */ - if ((filter->status & 0xFF00) != 0x200) { - CK((*filter->output_function)('~', filter->data)); - CK((*filter->output_function)('{', filter->data)); - } - filter->status = 0x200; - CK((*filter->output_function)((s >> 8) & 0x7F, filter->data)); - CK((*filter->output_function)(s & 0x7F, filter->data)); - } - } else { - CK(mbfl_filt_conv_illegal_output(c, filter)); - } - - return 0; -} - -int mbfl_filt_conv_any_hz_flush(mbfl_convert_filter *filter) -{ - /* back to latin */ - if (filter->status & 0xFF00) { - CK((*filter->output_function)('~', filter->data)); - CK((*filter->output_function)('}', filter->data)); - } - filter->status = 0; - return 0; -} - -#define ASCII 0 -#define GB2312 1 - -static size_t mb_hz_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) -{ - unsigned char *p = *in, *e = p + *in_len; - uint32_t *out = buf, *limit = buf + bufsize; - - while (p < e && out < limit) { - unsigned char c = *p++; - - if (c == '~') { - if (p == e) { - break; - } - unsigned char c2 = *p++; - - if (c2 == '}' && *state == GB2312) { - *state = ASCII; - } else if (c2 == '{' && *state == ASCII) { - *state = GB2312; - } else if (c2 == '~' && *state == ASCII) { - *out++ = '~'; - } else if (c2 == '\n') { - /* "~\n" is a line continuation; no output is needed, nor should we shift modes */ - } else { - /* Invalid character after ~ */ - *out++ = MBFL_BAD_INPUT; - } - } else if (((c > 0x20 && c <= 0x29) || (c >= 0x30 && c <= 0x77)) && p < e && *state == GB2312) { - unsigned char c2 = *p++; - - if (c > 0x20 && c < 0x7F && c2 > 0x20 && c2 < 0x7F) { - unsigned int s = (c - 1)*192 + c2 + 0x40; - ZEND_ASSERT(s < cp936_ucs_table_size); - - if (s == 0x1864) { - s = 0x30FB; - } else if (s == 0x186A) { - s = 0x2015; - } else if (s == 0x186C) { - s = 0x2225; - } else if ((s >= 0x1920 && s <= 0x192A) || s == 0x1963 || (s >= 0x1C60 && s <= 0x1C7F) || (s >= 0x1DBB && s <= 0x1DC4)) { - s = 0; - } else { - s = cp936_ucs_table[s]; - } - if (!s) - s = MBFL_BAD_INPUT; - *out++ = s; - } else { - *out++ = MBFL_BAD_INPUT; - } - } else if (c < 0x80 && *state == ASCII) { - *out++ = c; - } else { - *out++ = MBFL_BAD_INPUT; - } - } - - *in_len = e - p; - *in = p; - return out - buf; -} - -static void mb_wchar_to_hz(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) -{ - unsigned char *out, *limit; - MB_CONVERT_BUF_LOAD(buf, out, limit); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len); - - while (len--) { - uint32_t w = *in++; - unsigned int s = 0; - - if (w >= ucs_a1_cp936_table_min && w < ucs_a1_cp936_table_max) { - if (w == 0xB7 || w == 0x144 || w == 0x148 || w == 0x251 || w == 0x261 || w == 0x2CA || w == 0x2CB || w == 0x2D9) { - s = 0; - } else { - s = ucs_a1_cp936_table[w - ucs_a1_cp936_table_min]; - } - } else if (w >= ucs_a2_cp936_table_min && w < ucs_a2_cp936_table_max) { - if (w == 0x2015) { - s = 0xA1AA; - } else if (w == 0x2010 || w == 0x2013 || w == 0x2014 || w == 0x2016 || w == 0x2025 || w == 0x2035 || w == 0x2105 || w == 0x2109 || w == 0x2121 || (w >= 0x2170 && w <= 0x2179) || (w >= 0x2196 && w <= 0x2199) || w == 0x2215 || w == 0x221F || w == 0x2223 || w == 0x2252 || w == 0x2266 || w == 0x2267 || w == 0x2295 || (w >= 0x2550 && w <= 0x2573) || w == 0x22BF || w == 0x2609 || (w >= 0x2581 && w <= 0x258F) || (w >= 0x2593 && w <= 0x2595) || w == 0x25BC || w == 0x25BD || (w >= 0x25E2 && w <= 0x25E5)) { - s = 0; - } else { - s = ucs_a2_cp936_table[w - ucs_a2_cp936_table_min]; - } - } else if (w >= ucs_a3_cp936_table_min && w < ucs_a3_cp936_table_max) { - if (w == 0x30FB) { - s = 0xA1A4; - } else if (w == 0x3006 || w == 0x3007 || w == 0x3012 || w == 0x3231 || w == 0x32A3 || w >= 0x3300 || (w >= 0x3018 && w <= 0x3040) || (w >= 0x309B && w <= 0x309E) || (w >= 0x30FC && w <= 0x30FE)) { - s = 0; - } else { - s = ucs_a3_cp936_table[w - ucs_a3_cp936_table_min]; - } - } else if (w >= ucs_i_gb2312_table_min && w < ucs_i_gb2312_table_max) { - s = ucs_i_gb2312_table[w - ucs_i_gb2312_table_min]; - } else if (w >= ucs_hff_cp936_table_min && w < ucs_hff_cp936_table_max) { - if (w == 0xFF04) { - s = 0xA1E7; - } else if (w == 0xFF5E) { - s = 0xA1AB; - } else if (w >= 0xFF01 && w <= 0xFF5D) { - s = w - 0xFF01 + 0xA3A1; - } else if (w == 0xFFE0 || w == 0xFFE1 || w == 0xFFE3 || w == 0xFFE5) { - s = ucs_hff_s_cp936_table[w - 0xFFE0]; - } - } - - s &= ~0x8080; - - if ((!s && w) || (s >= 0x80 && s < 0x2121)) { - MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_hz); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len); - } else if (s < 0x80) { - /* ASCII */ - if (buf->state != ASCII) { - MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 3); - out = mb_convert_buf_add2(out, '~', '}'); - buf->state = ASCII; - } - if (s == '~') { - MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2); - out = mb_convert_buf_add2(out, '~', '~'); - } else { - out = mb_convert_buf_add(out, s); - } - } else { - /* GB 2312-80 */ - if (buf->state != GB2312) { - MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 4); - out = mb_convert_buf_add2(out, '~', '{'); - buf->state = GB2312; - } else { - MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2); - } - out = mb_convert_buf_add2(out, (s >> 8) & 0x7F, s & 0x7F); - } - } - - if (end && buf->state != ASCII) { - /* If not in ASCII state, need to emit closing control chars */ - MB_CONVERT_BUF_ENSURE(buf, out, limit, 2); - out = mb_convert_buf_add2(out, '~', '}'); - } - - MB_CONVERT_BUF_STORE(buf, out, limit); -} diff --git a/ext/mbstring/libmbfl/filters/mbfilter_hz.h b/ext/mbstring/libmbfl/filters/mbfilter_hz.h deleted file mode 100644 index 6b1dfb1564c9d..0000000000000 --- a/ext/mbstring/libmbfl/filters/mbfilter_hz.h +++ /dev/null @@ -1,43 +0,0 @@ -/* - * "streamable kanji code filter and converter" - * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. - * - * LICENSE NOTICES - * - * This file is part of "streamable kanji code filter and converter", - * which is distributed under the terms of GNU Lesser General Public - * License (version 2) as published by the Free Software Foundation. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with "streamable kanji code filter and converter"; - * if not, write to the Free Software Foundation, Inc., 59 Temple Place, - * Suite 330, Boston, MA 02111-1307 USA - * - * The author of this file: - * - */ -/* - * The source code included in this files was separated from mbfilter_cn.h - * by moriyoshi koizumi on 4 dec 2002. - * - */ - -#ifndef MBFL_MBFILTER_HZ_H -#define MBFL_MBFILTER_HZ_H - -#include "mbfilter.h" - -extern const mbfl_encoding mbfl_encoding_hz; -extern const struct mbfl_convert_vtbl vtbl_hz_wchar; -extern const struct mbfl_convert_vtbl vtbl_wchar_hz; - -int mbfl_filt_conv_hz_wchar(int c, mbfl_convert_filter *filter); -int mbfl_filt_conv_wchar_hz(int c, mbfl_convert_filter *filter); -int mbfl_filt_conv_any_hz_flush(mbfl_convert_filter *filter); - -#endif /* MBFL_MBFILTER_HZ_H */ diff --git a/ext/mbstring/libmbfl/filters/mbfilter_iso2022_jp_ms.c b/ext/mbstring/libmbfl/filters/mbfilter_iso2022_jp_ms.c deleted file mode 100644 index e3676d30e2904..0000000000000 --- a/ext/mbstring/libmbfl/filters/mbfilter_iso2022_jp_ms.c +++ /dev/null @@ -1,584 +0,0 @@ -/* - * "streamable kanji code filter and converter" - * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. - * - * LICENSE NOTICES - * - * This file is part of "streamable kanji code filter and converter", - * which is distributed under the terms of GNU Lesser General Public - * License (version 2) as published by the Free Software Foundation. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with "streamable kanji code filter and converter"; - * if not, write to the Free Software Foundation, Inc., 59 Temple Place, - * Suite 330, Boston, MA 02111-1307 USA - * - * The author of this file: - * - */ -/* - * The source code included in this files was separated from mbfilter_ja.c - * by moriyoshi koizumi on 4 dec 2002. - * - */ - -#include "mbfilter.h" -#include "mbfilter_iso2022_jp_ms.h" - -#include "unicode_table_cp932_ext.h" -#include "unicode_table_jis.h" -#include "cp932_table.h" - -static size_t mb_iso2022jpms_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state); -static void mb_wchar_to_iso2022jpms(uint32_t *in, size_t len, mb_convert_buf *buf, bool end); - -static int mbfl_filt_conv_2022jpms_wchar_flush(mbfl_convert_filter *filter); - -static const char *mbfl_encoding_2022jpms_aliases[] = {"ISO2022JPMS", NULL}; - -const mbfl_encoding mbfl_encoding_2022jpms = { - mbfl_no_encoding_2022jpms, - "ISO-2022-JP-MS", - "ISO-2022-JP", - mbfl_encoding_2022jpms_aliases, - NULL, - MBFL_ENCTYPE_GL_UNSAFE, - &vtbl_2022jpms_wchar, - &vtbl_wchar_2022jpms, - mb_iso2022jpms_to_wchar, - mb_wchar_to_iso2022jpms, - NULL -}; - -const struct mbfl_convert_vtbl vtbl_2022jpms_wchar = { - mbfl_no_encoding_2022jpms, - mbfl_no_encoding_wchar, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_2022jpms_wchar, - mbfl_filt_conv_2022jpms_wchar_flush, - NULL, -}; - -const struct mbfl_convert_vtbl vtbl_wchar_2022jpms = { - mbfl_no_encoding_wchar, - mbfl_no_encoding_2022jpms, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_wchar_2022jpms, - mbfl_filt_conv_any_2022jpms_flush, - NULL, -}; - -#define CK(statement) do { if ((statement) < 0) return (-1); } while (0) - -#define sjistoidx(c1, c2) \ - (((c1) > 0x9f) \ - ? (((c1) - 0xc1) * 188 + (c2) - (((c2) > 0x7e) ? 0x41 : 0x40)) \ - : (((c1) - 0x81) * 188 + (c2) - (((c2) > 0x7e) ? 0x41 : 0x40))) -#define idxtojis1(c) (((c) / 94) + 0x21) -#define idxtojis2(c) (((c) % 94) + 0x21) - -#define ASCII 0 -#define JISX0201_KANA 0x20 -#define JISX0208_KANJI 0x80 -#define UDC 0xA0 - -int mbfl_filt_conv_2022jpms_wchar(int c, mbfl_convert_filter *filter) -{ - int c1, s, w; - - switch (filter->status & 0xF) { - case 0: - if (c == 0x1B) { - filter->status += 2; - } else if (filter->status == JISX0201_KANA && c > 0x20 && c < 0x60) { - CK((*filter->output_function)(0xFF40 + c, filter->data)); - } else if ((filter->status == JISX0208_KANJI || filter->status == UDC) && c > 0x20 && c < 0x80) { - filter->cache = c; - filter->status += 1; - } else if (c >= 0 && c < 0x80) { /* ASCII */ - CK((*filter->output_function)(c, filter->data)); - } else if (c > 0xA0 && c < 0xE0) { /* Kana */ - CK((*filter->output_function)(0xFEC0 + c, filter->data)); - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - /* Kanji, second byte */ - case 1: - w = 0; - filter->status &= ~0xF; - c1 = filter->cache; - if (c > 0x20 && c < 0x7F) { - s = ((c1 - 0x21) * 94) + c - 0x21; - if (filter->status == JISX0208_KANJI) { - if (s <= 137) { - if (s == 31) { - w = 0xFF3C; /* FULLWIDTH REVERSE SOLIDUS */ - } else if (s == 32) { - w = 0xFF5E; /* FULLWIDTH TILDE */ - } else if (s == 33) { - w = 0x2225; /* PARALLEL TO */ - } else if (s == 60) { - w = 0xFF0D; /* FULLWIDTH HYPHEN-MINUS */ - } else if (s == 80) { - w = 0xFFE0; /* FULLWIDTH CENT SIGN */ - } else if (s == 81) { - w = 0xFFE1; /* FULLWIDTH POUND SIGN */ - } else if (s == 137) { - w = 0xFFE2; /* FULLWIDTH NOT SIGN */ - } - } - - if (w == 0) { - if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) { /* vendor ext1 (13ku) */ - w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min]; - } else if (s >= 0 && s < jisx0208_ucs_table_size) { - w = jisx0208_ucs_table[s]; - } else if (s >= cp932ext2_ucs_table_min && s < cp932ext2_ucs_table_max) { /* vendor ext2 (89ku - 92ku) */ - w = cp932ext2_ucs_table[s - cp932ext2_ucs_table_min]; - } - } - - if (w <= 0) { - w = MBFL_BAD_INPUT; - } - } else { - if (c1 > 0x20 && c1 < 0x35) { - w = 0xE000 + ((c1 - 0x21) * 94) + c - 0x21; - } else { - w = MBFL_BAD_INPUT; - } - } - CK((*filter->output_function)(w, filter->data)); - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - /* ESC */ - case 2: - if (c == '$') { - filter->status++; - } else if (c == '(') { - filter->status += 3; - } else { - filter->status &= ~0xF; - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - /* ESC $ */ - case 3: - if (c == '@' || c == 'B') { - filter->status = JISX0208_KANJI; - } else if (c == '(') { - filter->status++; - } else { - filter->status &= ~0xF; - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - /* ESC $ ( */ - case 4: - if (c == '@' || c == 'B') { - filter->status = JISX0208_KANJI; - } else if (c == '?') { - filter->status = UDC; - } else { - filter->status &= ~0xF; - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - /* ESC ( */ - case 5: - if (c == 'B' || c == 'J') { - filter->status = 0; - } else if (c == 'I') { - filter->status = JISX0201_KANA; - } else { - filter->status &= ~0xF; - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - } - - return 0; -} - - -static int mbfl_filt_conv_2022jpms_wchar_flush(mbfl_convert_filter *filter) -{ - if (filter->status & 0xF) { - (*filter->output_function)(MBFL_BAD_INPUT, filter->data); - } - filter->status = 0; - - if (filter->flush_function) { - (*filter->flush_function)(filter->data); - } - - return 0; -} - -static int cp932ext3_cp932ext2_jis(int c) -{ - int idx; - - idx = sjistoidx(0xfa, 0x40) + c; - if (idx >= sjistoidx(0xfa, 0x5c)) - idx -= sjistoidx(0xfa, 0x5c) - sjistoidx(0xed, 0x40); - else if (idx >= sjistoidx(0xfa, 0x55)) - idx -= sjistoidx(0xfa, 0x55) - sjistoidx(0xee, 0xfa); - else if (idx >= sjistoidx(0xfa, 0x40)) - idx -= sjistoidx(0xfa, 0x40) - sjistoidx(0xee, 0xef); - return idxtojis1(idx) << 8 | idxtojis2(idx); -} - -int mbfl_filt_conv_wchar_2022jpms(int c, mbfl_convert_filter *filter) -{ - int c1, c2, s1 = 0, s2 = 0; - - if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) { - s1 = ucs_a1_jis_table[c - ucs_a1_jis_table_min]; - } else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) { - s1 = ucs_a2_jis_table[c - ucs_a2_jis_table_min]; - } else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) { - s1 = ucs_i_jis_table[c - ucs_i_jis_table_min]; - } else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) { - s1 = ucs_r_jis_table[c - ucs_r_jis_table_min]; - } else if (c >= 0xE000 && c < (0xE000 + 20*94)) { - /* Private User Area (95ku - 114ku) */ - s1 = c - 0xE000; - c1 = (s1 / 94) + 0x7f; - c2 = (s1 % 94) + 0x21; - s1 = (c1 << 8) | c2; - } - - if (s1 <= 0) { - if (c == 0xA5) { /* YEN SIGN */ - s1 = 0x216F; /* FULLWIDTH YEN SIGN */ - } else if (c == 0xFF3C) { /* FULLWIDTH REVERSE SOLIDUS */ - s1 = 0x2140; - } else if (c == 0x2225) { /* PARALLEL TO */ - s1 = 0x2142; - } else if (c == 0xFF0D) { /* FULLWIDTH HYPHEN-MINUS */ - s1 = 0x215d; - } else if (c == 0xFFE0) { /* FULLWIDTH CENT SIGN */ - s1 = 0x2171; - } else if (c == 0xFFE1) { /* FULLWIDTH POUND SIGN */ - s1 = 0x2172; - } else if (c == 0xFFE2) { /* FULLWIDTH NOT SIGN */ - s1 = 0x224C; - } - } - - if ((s1 <= 0) || (s1 >= 0xa1a1 && s2 == 0)) { /* not found or X 0212 */ - s1 = -1; - for (c1 = 0; c1 < cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; c1++) { - if (c == cp932ext1_ucs_table[c1]) { - s1 = (((c1 / 94) + 0x2D) << 8) + (c1 % 94) + 0x21; - break; - } - } - - if (s1 <= 0) { - for (c1 = 0; c1 < cp932ext3_ucs_table_max - cp932ext3_ucs_table_min; c1++) { - if (c == cp932ext3_ucs_table[c1]) { - s1 = cp932ext3_cp932ext2_jis(c1); - break; - } - } - } - - if (c == 0) { - s1 = 0; - } - } - - if (s1 >= 0) { - if (s1 < 0x80) { /* latin */ - if (filter->status & 0xFF00) { - CK((*filter->output_function)(0x1B, filter->data)); /* ESC */ - CK((*filter->output_function)('(', filter->data)); - CK((*filter->output_function)('B', filter->data)); - } - CK((*filter->output_function)(s1, filter->data)); - filter->status = 0; - } else if (s1 > 0xA0 && s1 < 0xE0) { /* kana */ - if ((filter->status & 0xFF00) != 0x100) { - CK((*filter->output_function)(0x1B, filter->data)); /* ESC */ - CK((*filter->output_function)('(', filter->data)); - CK((*filter->output_function)('I', filter->data)); - } - filter->status = 0x100; - CK((*filter->output_function)(s1 & 0x7F, filter->data)); - } else if (s1 < 0x7E7F) { /* X 0208 */ - if ((filter->status & 0xFF00) != 0x200) { - CK((*filter->output_function)(0x1B, filter->data)); /* ESC */ - CK((*filter->output_function)('$', filter->data)); - CK((*filter->output_function)('B', filter->data)); - } - filter->status = 0x200; - CK((*filter->output_function)((s1 >> 8) & 0xFF, filter->data)); - CK((*filter->output_function)(s1 & 0x7F, filter->data)); - } else if (s1 < 0x927F) { /* UDC */ - if ((filter->status & 0xFF00) != 0x800) { - CK((*filter->output_function)(0x1B, filter->data)); /* ESC */ - CK((*filter->output_function)('$', filter->data)); - CK((*filter->output_function)('(', filter->data)); - CK((*filter->output_function)('?', filter->data)); - } - filter->status = 0x800; - CK((*filter->output_function)(((s1 >> 8) - 0x5E) & 0x7F, filter->data)); - CK((*filter->output_function)(s1 & 0x7F, filter->data)); - } - } else { - CK(mbfl_filt_conv_illegal_output(c, filter)); - } - - return 0; -} - -int mbfl_filt_conv_any_2022jpms_flush(mbfl_convert_filter *filter) -{ - /* Go back to ASCII (so strings can be safely concatenated) */ - if ((filter->status & 0xFF00) != 0) { - CK((*filter->output_function)(0x1B, filter->data)); /* ESC */ - CK((*filter->output_function)('(', filter->data)); - CK((*filter->output_function)('B', filter->data)); - } - filter->status = 0; - - if (filter->flush_function) { - (*filter->flush_function)(filter->data); - } - - return 0; -} - -static size_t mb_iso2022jpms_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) -{ - unsigned char *p = *in, *e = p + *in_len; - uint32_t *out = buf, *limit = buf + bufsize; - - while (p < e && out < limit) { - unsigned char c = *p++; - - if (c == 0x1B) { - if ((e - p) < 2) { - *out++ = MBFL_BAD_INPUT; - p = e; - break; - } - unsigned char c2 = *p++; - unsigned char c3 = *p++; - - if (c2 == '$') { - if (c3 == '@' || c3 == 'B') { - *state = JISX0208_KANJI; - } else if (c3 == '(' && p < e) { - unsigned char c4 = *p++; - - if (c4 == '@' || c4 == 'B') { - *state = JISX0208_KANJI; - } else if (c4 == '?') { - *state = UDC; - } else { - *out++ = MBFL_BAD_INPUT; - } - } else { - *out++ = MBFL_BAD_INPUT; - } - } else if (c2 == '(') { - if (c3 == 'B' || c3 == 'J') { - *state = ASCII; - } else if (c3 == 'I') { - *state = JISX0201_KANA; - } else { - *out++ = MBFL_BAD_INPUT; - } - } else { - p--; - *out++ = MBFL_BAD_INPUT; - } - } else if (*state == JISX0201_KANA && c >= 0x21 && c <= 0x5F) { - *out++ = 0xFF40 + c; - } else if ((*state == JISX0208_KANJI || *state == UDC) && c >= 0x21 && c <= 0x7F) { - if (p == e) { - *out++ = MBFL_BAD_INPUT; - break; - } - unsigned char c2 = *p++; - unsigned int w = 0; - - if (c2 >= 0x21 && c2 <= 0x7E) { - unsigned int s = ((c - 0x21) * 94) + c2 - 0x21; - if (*state == JISX0208_KANJI) { - if (s <= 137) { - if (s == 31) { - w = 0xFF3C; /* FULLWIDTH REVERSE SOLIDUS */ - } else if (s == 32) { - w = 0xFF5E; /* FULLWIDTH TILDE */ - } else if (s == 33) { - w = 0x2225; /* PARALLEL TO */ - } else if (s == 60) { - w = 0xFF0D; /* FULLWIDTH HYPHEN-MINUS */ - } else if (s == 80) { - w = 0xFFE0; /* FULLWIDTH CENT SIGN */ - } else if (s == 81) { - w = 0xFFE1; /* FULLWIDTH POUND SIGN */ - } else if (s == 137) { - w = 0xFFE2; /* FULLWIDTH NOT SIGN */ - } - } - - if (!w) { - if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) { - w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min]; - } else if (s < jisx0208_ucs_table_size) { - w = jisx0208_ucs_table[s]; - } else if (s >= cp932ext2_ucs_table_min && s < cp932ext2_ucs_table_max) { - w = cp932ext2_ucs_table[s - cp932ext2_ucs_table_min]; - } - } - } else if (c >= 0x21 && c <= 0x34) { - w = 0xE000 + ((c - 0x21) * 94) + c2 - 0x21; - } - - *out++ = w ? w : MBFL_BAD_INPUT; - } else { - *out++ = MBFL_BAD_INPUT; - } - } else if (c <= 0x7F) { - *out++ = c; - } else if (c >= 0xA1 && c <= 0xDF) { - *out++ = 0xFEC0 + c; - } else { - *out++ = MBFL_BAD_INPUT; - } - } - - *in_len = e - p; - *in = p; - return out - buf; -} - -static void mb_wchar_to_iso2022jpms(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) -{ - unsigned char *out, *limit; - MB_CONVERT_BUF_LOAD(buf, out, limit); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len); - - while (len--) { - uint32_t w = *in++; - unsigned int s = 0; - - if (w >= ucs_a1_jis_table_min && w < ucs_a1_jis_table_max) { - s = ucs_a1_jis_table[w - ucs_a1_jis_table_min]; - } else if (w >= ucs_a2_jis_table_min && w < ucs_a2_jis_table_max) { - s = ucs_a2_jis_table[w - ucs_a2_jis_table_min]; - } else if (w >= ucs_i_jis_table_min && w < ucs_i_jis_table_max) { - s = ucs_i_jis_table[w - ucs_i_jis_table_min]; - } else if (w >= ucs_r_jis_table_min && w < ucs_r_jis_table_max) { - s = ucs_r_jis_table[w - ucs_r_jis_table_min]; - } else if (w >= 0xE000 && w < (0xE000 + 20*94)) { - /* Private User Area (95ku - 114ku) */ - s = ((((w - 0xE000) / 94) + 0x7F) << 8) | (((w - 0xE000) % 94) + 0x21); - } - - if (!s) { - if (w == 0xA5) { /* YEN SIGN */ - s = 0x216F; /* FULLWIDTH YEN SIGN */ - } else if (w == 0xFF3C) { /* FULLWIDTH REVERSE SOLIDUS */ - s = 0x2140; - } else if (w == 0x2225) { /* PARALLEL TO */ - s = 0x2142; - } else if (w == 0xFF0D) { /* FULLWIDTH HYPHEN-MINUS */ - s = 0x215D; - } else if (w == 0xFFE0) { /* FULLWIDTH CENT SIGN */ - s = 0x2171; - } else if (w == 0xFFE1) { /* FULLWIDTH POUND SIGN */ - s = 0x2172; - } else if (w == 0xFFE2) { /* FULLWIDTH NOT SIGN */ - s = 0x224C; - } - } - - if (s >= 0xA1A1) /* JISX 0212 */ - s = 0; - - if (!s && w) { - for (int i = 0; i < cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; i++) { - if (w == cp932ext1_ucs_table[i]) { - s = (((i / 94) + 0x2D) << 8) + (i % 94) + 0x21; - break; - } - } - - if (!s) { - for (int i = 0; i < cp932ext3_ucs_table_max - cp932ext3_ucs_table_min; i++) { - if (w == cp932ext3_ucs_table[i]) { - s = cp932ext3_cp932ext2_jis(i); - break; - } - } - } - } - - if (!s && w) { - MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_iso2022jpms); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len); - } else if (s <= 0x7F) { - if (buf->state != ASCII) { - MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 4); - out = mb_convert_buf_add3(out, 0x1B, '(', 'B'); - buf->state = ASCII; - } - out = mb_convert_buf_add(out, s); - } else if (s >= 0xA1 && s <= 0xDF) { - if (buf->state != JISX0201_KANA) { - MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 4); - out = mb_convert_buf_add3(out, 0x1B, '(', 'I'); - buf->state = JISX0201_KANA; - } - out = mb_convert_buf_add(out, s & 0x7F); - } else if (s <= 0x7E7E) { - if (buf->state != JISX0208_KANJI) { - MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 5); - out = mb_convert_buf_add3(out, 0x1B, '$', 'B'); - buf->state = JISX0208_KANJI; - } else { - MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2); - } - out = mb_convert_buf_add2(out, (s >> 8) & 0xFF, s & 0x7F); - } else if (s < 0x927F) { - if (buf->state != UDC) { - MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 6); - out = mb_convert_buf_add4(out, 0x1B, '$', '(', '?'); - buf->state = UDC; - } else { - MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2); - } - out = mb_convert_buf_add2(out, ((s >> 8) - 0x5E) & 0x7F, s & 0x7F); - } else { - MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_iso2022jpms); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len); - } - } - - if (end && buf->state != ASCII) { - MB_CONVERT_BUF_ENSURE(buf, out, limit, 3); - out = mb_convert_buf_add3(out, 0x1B, '(', 'B'); - } - - MB_CONVERT_BUF_STORE(buf, out, limit); -} diff --git a/ext/mbstring/libmbfl/filters/mbfilter_iso2022_jp_ms.h b/ext/mbstring/libmbfl/filters/mbfilter_iso2022_jp_ms.h deleted file mode 100644 index fdc85183d7ea2..0000000000000 --- a/ext/mbstring/libmbfl/filters/mbfilter_iso2022_jp_ms.h +++ /dev/null @@ -1,43 +0,0 @@ -/* - * "streamable kanji code filter and converter" - * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. - * - * LICENSE NOTICES - * - * This file is part of "streamable kanji code filter and converter", - * which is distributed under the terms of GNU Lesser General Public - * License (version 2) as published by the Free Software Foundation. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with "streamable kanji code filter and converter"; - * if not, write to the Free Software Foundation, Inc., 59 Temple Place, - * Suite 330, Boston, MA 02111-1307 USA - * - * The author of this file: - * - */ -/* - * The source code included in this files was separated from mbfilter_ja.c - * by moriyoshi koizumi on 4 dec 2002. - * - */ - -#ifndef MBFL_MBFILTER_ISO2022_JP_MS_H -#define MBFL_MBFILTER_ISO2022_JP_MS_H - -#include "mbfilter.h" - -extern const mbfl_encoding mbfl_encoding_2022jpms; -extern const struct mbfl_convert_vtbl vtbl_2022jpms_wchar; -extern const struct mbfl_convert_vtbl vtbl_wchar_2022jpms; - -int mbfl_filt_conv_2022jpms_wchar(int c, mbfl_convert_filter *filter); -int mbfl_filt_conv_wchar_2022jpms(int c, mbfl_convert_filter *filter); -int mbfl_filt_conv_any_2022jpms_flush(mbfl_convert_filter *filter); - -#endif /* MBFL_MBFILTER_ISO2022_JP_MS_H */ diff --git a/ext/mbstring/libmbfl/filters/mbfilter_iso2022_kr.c b/ext/mbstring/libmbfl/filters/mbfilter_iso2022_kr.c deleted file mode 100644 index dcf8fc51b6637..0000000000000 --- a/ext/mbstring/libmbfl/filters/mbfilter_iso2022_kr.c +++ /dev/null @@ -1,431 +0,0 @@ -/* - * "streamable kanji code filter and converter" - * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. - * - * LICENSE NOTICES - * - * This file is part of "streamable kanji code filter and converter", - * which is distributed under the terms of GNU Lesser General Public - * License (version 2) as published by the Free Software Foundation. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with "streamable kanji code filter and converter"; - * if not, write to the Free Software Foundation, Inc., 59 Temple Place, - * Suite 330, Boston, MA 02111-1307 USA - * - * The author of this file: - * - */ -/* - * The source code included in this files was separated from mbfilter_kr.c - * by moriyoshi koizumi on 4 dec 2002. - * - */ - -/* ISO-2022-KR is defined in RFC 1557 - * - * The RFC says that ESC $ ) C must appear once in a ISO-2022-KR string, - * at the beginning of a line, before any instances of the Shift In or - * Shift Out bytes which are used to switch between ASCII/KSC 5601 modes - * - * We don't enforce that for ISO-2022-KR input */ - -#include "mbfilter.h" -#include "mbfilter_iso2022_kr.h" -#include "unicode_table_uhc.h" - -static int mbfl_filt_conv_2022kr_wchar_flush(mbfl_convert_filter *filter); -static int mbfl_filt_conv_any_2022kr_flush(mbfl_convert_filter *filter); -static size_t mb_iso2022kr_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state); -static void mb_wchar_to_iso2022kr(uint32_t *in, size_t len, mb_convert_buf *buf, bool end); - -const mbfl_encoding mbfl_encoding_2022kr = { - mbfl_no_encoding_2022kr, - "ISO-2022-KR", - "ISO-2022-KR", - NULL, - NULL, - MBFL_ENCTYPE_GL_UNSAFE, - &vtbl_2022kr_wchar, - &vtbl_wchar_2022kr, - mb_iso2022kr_to_wchar, - mb_wchar_to_iso2022kr, - NULL -}; - -const struct mbfl_convert_vtbl vtbl_wchar_2022kr = { - mbfl_no_encoding_wchar, - mbfl_no_encoding_2022kr, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_wchar_2022kr, - mbfl_filt_conv_any_2022kr_flush, - NULL, -}; - -const struct mbfl_convert_vtbl vtbl_2022kr_wchar = { - mbfl_no_encoding_2022kr, - mbfl_no_encoding_wchar, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_2022kr_wchar, - mbfl_filt_conv_2022kr_wchar_flush, - NULL, -}; - -#define CK(statement) do { if ((statement) < 0) return (-1); } while (0) - -int mbfl_filt_conv_2022kr_wchar(int c, mbfl_convert_filter *filter) -{ - int w = 0; - - switch (filter->status & 0xf) { - /* case 0x00: ASCII */ - /* case 0x10: KSC5601 */ - case 0: - if (c == 0x1b) { /* ESC */ - filter->status += 2; - } else if (c == 0x0f) { /* shift in (ASCII) */ - filter->status = 0; - } else if (c == 0x0e) { /* shift out (KSC5601) */ - filter->status = 0x10; - } else if ((filter->status & 0x10) && c > 0x20 && c < 0x7f) { - /* KSC5601 lead byte */ - filter->cache = c; - filter->status = 0x11; - } else if ((filter->status & 0x10) == 0 && c >= 0 && c < 0x80) { - /* latin, CTLs */ - CK((*filter->output_function)(c, filter->data)); - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - case 1: /* dbcs second byte */ - filter->status = 0x10; - int c1 = filter->cache; - int flag = 0; - - if (c1 > 0x20 && c1 < 0x47) { - flag = 1; - } else if (c1 >= 0x47 && c1 <= 0x7e && c1 != 0x49) { - flag = 2; - } - - if (flag > 0 && c > 0x20 && c < 0x7f) { - if (flag == 1) { - if (c1 != 0x22 || c <= 0x65) { - w = (c1 - 1)*190 + (c - 0x41) + 0x80; - ZEND_ASSERT(w < uhc1_ucs_table_size); - w = uhc1_ucs_table[w]; - } - } else { - w = (c1 - 0x47)*94 + c - 0x21; - if (w < uhc3_ucs_table_size) { - w = uhc3_ucs_table[w]; - } else { - w = MBFL_BAD_INPUT; - } - } - - if (w <= 0) { - w = MBFL_BAD_INPUT; - } - CK((*filter->output_function)(w, filter->data)); - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - case 2: /* ESC */ - if (c == '$') { - filter->status++; - } else { - filter->status &= ~0xF; - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - case 3: /* ESC $ */ - if (c == ')') { - filter->status++; - } else { - filter->status &= ~0xF; - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - case 4: /* ESC $ ) */ - filter->status = 0; - if (c != 'C') { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - EMPTY_SWITCH_DEFAULT_CASE(); - } - - return 0; -} - -static int mbfl_filt_conv_2022kr_wchar_flush(mbfl_convert_filter *filter) -{ - if (filter->status & 0xF) { - /* 2-byte character was truncated */ - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - filter->status = 0; - - if (filter->flush_function) { - (*filter->flush_function)(filter->data); - } - - return 0; -} - -int mbfl_filt_conv_wchar_2022kr(int c, mbfl_convert_filter *filter) -{ - int c1, c2, s = 0; - - if ((filter->status & 0x100) == 0) { - CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ - CK((*filter->output_function)('$', filter->data)); - CK((*filter->output_function)(')', filter->data)); - CK((*filter->output_function)('C', filter->data)); - filter->status |= 0x100; - } - - if (c >= ucs_a1_uhc_table_min && c < ucs_a1_uhc_table_max) { - s = ucs_a1_uhc_table[c - ucs_a1_uhc_table_min]; - } else if (c >= ucs_a2_uhc_table_min && c < ucs_a2_uhc_table_max) { - s = ucs_a2_uhc_table[c - ucs_a2_uhc_table_min]; - } else if (c >= ucs_a3_uhc_table_min && c < ucs_a3_uhc_table_max) { - s = ucs_a3_uhc_table[c - ucs_a3_uhc_table_min]; - } else if (c >= ucs_i_uhc_table_min && c < ucs_i_uhc_table_max) { - s = ucs_i_uhc_table[c - ucs_i_uhc_table_min]; - } else if (c >= ucs_s_uhc_table_min && c < ucs_s_uhc_table_max) { - s = ucs_s_uhc_table[c - ucs_s_uhc_table_min]; - } else if (c >= ucs_r1_uhc_table_min && c < ucs_r1_uhc_table_max) { - s = ucs_r1_uhc_table[c - ucs_r1_uhc_table_min]; - } else if (c >= ucs_r2_uhc_table_min && c < ucs_r2_uhc_table_max) { - s = ucs_r2_uhc_table[c - ucs_r2_uhc_table_min]; - } - - c1 = (s >> 8) & 0xff; - c2 = s & 0xff; - /* exclude UHC extension area */ - if (c1 < 0xa1 || c2 < 0xa1) { - s = c; - } else if (s & 0x8000) { - s -= 0x8080; - } - - if (s <= 0) { - if (c == 0) { - s = 0; - } else { - s = -1; - } - } else if ((s >= 0x80 && s < 0x2121) || (s > 0x8080)) { - s = -1; - } - - if (s >= 0) { - if (s < 0x80 && s >= 0) { /* ASCII */ - if (filter->status & 0x10) { - CK((*filter->output_function)(0x0f, filter->data)); /* shift in */ - filter->status &= ~0x10; - } - CK((*filter->output_function)(s, filter->data)); - } else { - if ((filter->status & 0x10) == 0) { - CK((*filter->output_function)(0x0e, filter->data)); /* shift out */ - filter->status |= 0x10; - } - CK((*filter->output_function)((s >> 8) & 0xff, filter->data)); - CK((*filter->output_function)(s & 0xff, filter->data)); - } - } else { - CK(mbfl_filt_conv_illegal_output(c, filter)); - } - - return 0; -} - -static int mbfl_filt_conv_any_2022kr_flush(mbfl_convert_filter *filter) -{ - if (filter->status & 0xF) { - /* Escape sequence or 2-byte character was truncated */ - (*filter->output_function)(MBFL_BAD_INPUT, filter->data); - } - /* back to ascii */ - if (filter->status & 0x10) { - CK((*filter->output_function)(0x0f, filter->data)); /* shift in */ - } - - filter->status = filter->cache = 0; - - if (filter->flush_function) { - return (*filter->flush_function)(filter->data); - } - - return 0; -} - -#define ASCII 0 -#define KSC5601 1 - -static size_t mb_iso2022kr_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) -{ - unsigned char *p = *in, *e = p + *in_len; - uint32_t *out = buf, *limit = buf + bufsize; - - while (p < e && out < limit) { - unsigned char c = *p++; - - if (c == 0x1B) { - if ((e - p) < 3) { - *out++ = MBFL_BAD_INPUT; - if (p < e && *p++ == '$') { - if (p < e) { - p++; - } - } - continue; - } - unsigned char c2 = *p++; - unsigned char c3 = *p++; - unsigned char c4 = *p++; - if (c2 == '$' && c3 == ')' && c4 == 'C') { - *state = ASCII; - } else { - if (c3 != ')') { - p--; - if (c2 != '$') - p--; - } - *out++ = MBFL_BAD_INPUT; - } - } else if (c == 0xF) { - *state = ASCII; - } else if (c == 0xE) { - *state = KSC5601; - } else if (c >= 0x21 && c <= 0x7E && *state == KSC5601) { - if (p == e) { - *out++ = MBFL_BAD_INPUT; - break; - } - unsigned char c2 = *p++; - unsigned int w = 0; - - if (c2 < 0x21 || c2 > 0x7E) { - *out++ = MBFL_BAD_INPUT; - continue; - } - - if (c < 0x47) { - if (c != 0x22 || c2 <= 0x65) { - w = (c - 1)*190 + c2 - 0x41 + 0x80; - ZEND_ASSERT(w < uhc1_ucs_table_size); - w = uhc1_ucs_table[w]; - } - } else if (c != 0x49 && c <= 0x7D) { - w = (c - 0x47)*94 + c2 - 0x21; - ZEND_ASSERT(w < uhc3_ucs_table_size); - w = uhc3_ucs_table[w]; - } - - if (!w) - w = MBFL_BAD_INPUT; - *out++ = w; - } else if (c < 0x80 && *state == ASCII) { - *out++ = c; - } else { - *out++ = MBFL_BAD_INPUT; - } - } - - *in_len = e - p; - *in = p; - return out - buf; -} - -#define EMITTED_ESC_SEQUENCE 0x10 - -static void mb_wchar_to_iso2022kr(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) -{ - unsigned char *out, *limit; - MB_CONVERT_BUF_LOAD(buf, out, limit); - - /* This escape sequence needs to come *somewhere* at the beginning of a line before - * we can use the Shift In/Shift Out bytes, but it only needs to come once in a string - * Rather than tracking newlines, we can just emit the sequence once at the beginning - * of the output string... since that will always be "the beginning of a line" */ - if (len && !(buf->state & EMITTED_ESC_SEQUENCE)) { - MB_CONVERT_BUF_ENSURE(buf, out, limit, 4 + len); - out = mb_convert_buf_add4(out, 0x1B, '$', ')', 'C'); - buf->state |= EMITTED_ESC_SEQUENCE; - } else { - MB_CONVERT_BUF_ENSURE(buf, out, limit, len); - } - - while (len--) { - uint32_t w = *in++; - unsigned int s = 0; - - if (w >= ucs_a1_uhc_table_min && w < ucs_a1_uhc_table_max) { - s = ucs_a1_uhc_table[w - ucs_a1_uhc_table_min]; - } else if (w >= ucs_a2_uhc_table_min && w < ucs_a2_uhc_table_max) { - s = ucs_a2_uhc_table[w - ucs_a2_uhc_table_min]; - } else if (w >= ucs_a3_uhc_table_min && w < ucs_a3_uhc_table_max) { - s = ucs_a3_uhc_table[w - ucs_a3_uhc_table_min]; - } else if (w >= ucs_i_uhc_table_min && w < ucs_i_uhc_table_max) { - s = ucs_i_uhc_table[w - ucs_i_uhc_table_min]; - } else if (w >= ucs_s_uhc_table_min && w < ucs_s_uhc_table_max) { - s = ucs_s_uhc_table[w - ucs_s_uhc_table_min]; - } else if (w >= ucs_r1_uhc_table_min && w < ucs_r1_uhc_table_max) { - s = ucs_r1_uhc_table[w - ucs_r1_uhc_table_min]; - } else if (w >= ucs_r2_uhc_table_min && w < ucs_r2_uhc_table_max) { - s = ucs_r2_uhc_table[w - ucs_r2_uhc_table_min]; - } - - if (((s >> 8) & 0xFF) < 0xA1 || (s & 0xFF) < 0xA1) { - s = w; - } else { - s -= 0x8080; - } - - if ((s >= 0x80 && s < 0x2121) || (s > 0x8080)) { - MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_iso2022kr); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len); - } else if (s < 0x80) { - if ((buf->state & 1) != ASCII) { - MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2); - out = mb_convert_buf_add(out, 0xF); - buf->state &= ~KSC5601; - } - out = mb_convert_buf_add(out, s); - } else { - if ((buf->state & 1) != KSC5601) { - MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 3); - out = mb_convert_buf_add(out, 0xE); - buf->state |= KSC5601; - } else { - MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2); - } - out = mb_convert_buf_add2(out, (s >> 8) & 0xFF, s & 0xFF); - } - } - - if (end && (buf->state & 1) != ASCII) { - MB_CONVERT_BUF_ENSURE(buf, out, limit, 1); - out = mb_convert_buf_add(out, 0xF); - } - - MB_CONVERT_BUF_STORE(buf, out, limit); -} diff --git a/ext/mbstring/libmbfl/filters/mbfilter_iso2022_kr.h b/ext/mbstring/libmbfl/filters/mbfilter_iso2022_kr.h deleted file mode 100644 index dc6687a61478a..0000000000000 --- a/ext/mbstring/libmbfl/filters/mbfilter_iso2022_kr.h +++ /dev/null @@ -1,42 +0,0 @@ -/* - * "streamable kanji code filter and converter" - * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. - * - * LICENSE NOTICES - * - * This file is part of "streamable kanji code filter and converter", - * which is distributed under the terms of GNU Lesser General Public - * License (version 2) as published by the Free Software Foundation. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with "streamable kanji code filter and converter"; - * if not, write to the Free Software Foundation, Inc., 59 Temple Place, - * Suite 330, Boston, MA 02111-1307 USA - * - * The author of this file: - * - */ -/* - * The source code included in this files was separated from mbfilter_kr.h - * by moriyoshi koizumi on 4 dec 2002. - * - */ - -#ifndef MBFL_MBFILTER_ISO2022_KR_H -#define MBFL_MBFILTER_ISO2022_KR_H - -#include "mbfilter.h" - -extern const mbfl_encoding mbfl_encoding_2022kr; -extern const struct mbfl_convert_vtbl vtbl_wchar_2022kr; -extern const struct mbfl_convert_vtbl vtbl_2022kr_wchar; - -int mbfl_filt_conv_2022kr_wchar(int c, mbfl_convert_filter *filter); -int mbfl_filt_conv_wchar_2022kr(int c, mbfl_convert_filter *filter); - -#endif /* MBFL_MBFILTER_ISO2022_KR_H */ diff --git a/ext/mbstring/libmbfl/filters/mbfilter_iso2022jp_mobile.c b/ext/mbstring/libmbfl/filters/mbfilter_iso2022jp_mobile.c deleted file mode 100644 index 79b7a4714af23..0000000000000 --- a/ext/mbstring/libmbfl/filters/mbfilter_iso2022jp_mobile.c +++ /dev/null @@ -1,757 +0,0 @@ -/* - * "streamable kanji code filter and converter" - * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. - * - * LICENSE NOTICES - * - * This file is part of "streamable kanji code filter and converter", - * which is distributed under the terms of GNU Lesser General Public - * License (version 2) as published by the Free Software Foundation. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with "streamable kanji code filter and converter"; - * if not, write to the Free Software Foundation, Inc., 59 Temple Place, - * Suite 330, Boston, MA 02111-1307 USA - * - * The author of this file: - * - */ -/* - * The source code included in this file was separated from mbfilter_iso2022_jp_ms.c - * by Rui Hirokawa on 25 July 2011. - * - */ - -#include "mbfilter.h" -#include "mbfilter_iso2022jp_mobile.h" - -#include "unicode_table_cp932_ext.h" -#include "unicode_table_jis.h" -#include "cp932_table.h" -#include "emoji2uni.h" - -static size_t mb_iso2022jp_kddi_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state); -static void mb_wchar_to_iso2022jp_kddi(uint32_t *in, size_t len, mb_convert_buf *buf, bool end); - -static int mbfl_filt_conv_2022jp_mobile_wchar(int c, mbfl_convert_filter *filter); -static int mbfl_filt_conv_wchar_2022jp_mobile(int c, mbfl_convert_filter *filter); -static int mbfl_filt_conv_2022jp_mobile_wchar_flush(mbfl_convert_filter *filter); -static int mbfl_filt_conv_wchar_2022jp_mobile_flush(mbfl_convert_filter *filter); - -extern int mbfl_bisec_srch2(int w, const unsigned short tbl[], int n); - -/* Regional Indicator Unicode codepoints are from 0x1F1E6-0x1F1FF - * These correspond to the letters A-Z - * To display the flag emoji for a country, two unicode codepoints are combined, - * which correspond to the two-letter code for that country - * This macro converts uppercase ASCII values to Regional Indicator codepoints */ -#define NFLAGS(c) (0x1F1A5+((unsigned int)(c))) - -static const char nflags_s[10][2] = { - "CN","DE","ES","FR","GB","IT","JP","KR","RU","US" -}; -static const int nflags_code_kddi[10] = { - 0x2549, 0x2546, 0x24C0, 0x2545, 0x2548, 0x2547, 0x2750, 0x254A, 0x24C1, 0x27F7 -}; - -static const char *mbfl_encoding_2022jp_kddi_aliases[] = {"ISO-2022-JP-KDDI", NULL}; - -const mbfl_encoding mbfl_encoding_2022jp_kddi = { - mbfl_no_encoding_2022jp_kddi, - "ISO-2022-JP-MOBILE#KDDI", - "ISO-2022-JP", - mbfl_encoding_2022jp_kddi_aliases, - NULL, - MBFL_ENCTYPE_GL_UNSAFE, - &vtbl_2022jp_kddi_wchar, - &vtbl_wchar_2022jp_kddi, - mb_iso2022jp_kddi_to_wchar, - mb_wchar_to_iso2022jp_kddi, - NULL -}; - -const struct mbfl_convert_vtbl vtbl_2022jp_kddi_wchar = { - mbfl_no_encoding_2022jp_kddi, - mbfl_no_encoding_wchar, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_2022jp_mobile_wchar, - mbfl_filt_conv_2022jp_mobile_wchar_flush, - NULL, -}; - -const struct mbfl_convert_vtbl vtbl_wchar_2022jp_kddi = { - mbfl_no_encoding_wchar, - mbfl_no_encoding_2022jp_kddi, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_wchar_2022jp_mobile, - mbfl_filt_conv_wchar_2022jp_mobile_flush, - NULL, -}; - -#define CK(statement) do { if ((statement) < 0) return (-1); } while (0) - -#define SJIS_ENCODE(c1,c2,s1,s2) \ - do { \ - s1 = ((c1 - 1) >> 1) + ((c1) < 0x5F ? 0x71 : 0xB1); \ - s2 = c2; \ - if ((c1) & 1) { \ - if ((c2) < 0x60) { \ - s2--; \ - } \ - s2 += 0x20; \ - } else { \ - s2 += 0x7e; \ - } \ - } while (0) - -#define SJIS_DECODE(c1,c2,s1,s2) \ - do { \ - if (c1 < 0xa0) { \ - s1 = ((c1 - 0x81) << 1) + 0x21; \ - } else { \ - s1 = ((c1 - 0xc1) << 1) + 0x21; \ - } \ - s2 = c2; \ - if (c2 < 0x9f) { \ - if (c2 < 0x7f) { \ - s2++; \ - } \ - s2 -= 0x20; \ - } else { \ - s1++; \ - s2 -= 0x7e; \ - } \ - } while (0) - -/* (ku*94)+ten value -> Shift-JIS byte sequence */ -#define CODE2JIS(c1,c2,s1,s2) \ - c1 = (s1)/94+0x21; \ - c2 = (s1)-94*((c1)-0x21)+0x21; \ - s1 = ((c1) << 8) | (c2); \ - s2 = 1 - -#define ASCII 0 -#define JISX0201_KANA 0x20 -#define JISX0208_KANJI 0x80 - -#define EMIT_KEYPAD_EMOJI(c) do { *snd = (c); return 0x20E3; } while(0) -#define EMIT_FLAG_EMOJI(country) do { *snd = NFLAGS((country)[0]); return NFLAGS((country)[1]); } while(0) - -static const char nflags_kddi[6][2] = {"FR", "DE", "IT", "GB", "CN", "KR"}; - -static inline int convert_emoji_cp(int cp) -{ - if (cp > 0xF000) - return cp + 0x10000; - if (cp > 0xE000) - return cp + 0xF0000; - return cp; -} - -static int mbfilter_sjis_emoji_kddi2unicode(int s, int *snd) -{ - if (s >= mb_tbl_code2uni_kddi1_min && s <= mb_tbl_code2uni_kddi1_max) { - if (s == 0x24C0) { /* Spain */ - EMIT_FLAG_EMOJI("ES"); - } else if (s == 0x24C1) { /* Russia */ - EMIT_FLAG_EMOJI("RU"); - } else if (s >= 0x2545 && s <= 0x254A) { - EMIT_FLAG_EMOJI(nflags_kddi[s - 0x2545]); - } else if (s == 0x25BC) { - EMIT_KEYPAD_EMOJI('#'); - } else { - *snd = 0; - return convert_emoji_cp(mb_tbl_code2uni_kddi1[s - mb_tbl_code2uni_kddi1_min]); - } - } else if (s >= mb_tbl_code2uni_kddi2_min && s <= mb_tbl_code2uni_kddi2_max) { - if (s == 0x2750) { /* Japan */ - EMIT_FLAG_EMOJI("JP"); - } else if (s >= 0x27A6 && s <= 0x27AE) { - EMIT_KEYPAD_EMOJI(s - 0x27A6 + '1'); - } else if (s == 0x27F7) { /* United States */ - EMIT_FLAG_EMOJI("US"); - } else if (s == 0x2830) { - EMIT_KEYPAD_EMOJI('0'); - } else { - *snd = 0; - return convert_emoji_cp(mb_tbl_code2uni_kddi2[s - mb_tbl_code2uni_kddi2_min]); - } - } - return 0; -} - -static int mbfl_filt_conv_2022jp_mobile_wchar(int c, mbfl_convert_filter *filter) -{ - int c1, s, w, snd = 0; - - switch (filter->status & 0xF) { - case 0: - if (c == 0x1B) { - filter->status += 2; - } else if (filter->status == JISX0201_KANA && c > 0x20 && c < 0x60) { - CK((*filter->output_function)(0xFF40 + c, filter->data)); - } else if (filter->status == JISX0208_KANJI && c > 0x20 && c < 0x80) { - filter->cache = c; - filter->status += 1; - } else if (c >= 0 && c < 0x80) { /* ASCII */ - CK((*filter->output_function)(c, filter->data)); - } else if (c > 0xA0 && c < 0xE0) { /* Kana */ - CK((*filter->output_function)(0xFEC0 + c, filter->data)); - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - /* JISX 0208, second byte */ - case 1: - w = 0; - filter->status &= ~0xF; - c1 = filter->cache; - if (c > 0x20 && c < 0x7F) { - s = ((c1 - 0x21) * 94) + c - 0x21; - - if (s <= 137) { - if (s == 31) { - w = 0xFF3C; /* FULLWIDTH REVERSE SOLIDUS */ - } else if (s == 32) { - w = 0xFF5E; /* FULLWIDTH TILDE */ - } else if (s == 33) { - w = 0x2225; /* PARALLEL TO */ - } else if (s == 60) { - w = 0xFF0D; /* FULLWIDTH HYPHEN-MINUS */ - } else if (s == 80) { - w = 0xFFE0; /* FULLWIDTH CENT SIGN */ - } else if (s == 81) { - w = 0xFFE1; /* FULLWIDTH POUND SIGN */ - } else if (s == 137) { - w = 0xFFE2; /* FULLWIDTH NOT SIGN */ - } - } - - if (s >= (84 * 94) && s < (91 * 94)) { - s += 22 * 94; - w = mbfilter_sjis_emoji_kddi2unicode(s, &snd); - if (w > 0 && snd > 0) { - (*filter->output_function)(snd, filter->data); - } - } - - if (w == 0) { - if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) { - w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min]; - } else if (s >= 0 && s < jisx0208_ucs_table_size) { - w = jisx0208_ucs_table[s]; - } - } - - if (w <= 0) { - w = MBFL_BAD_INPUT; - } - CK((*filter->output_function)(w, filter->data)); - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - /* ESC */ - case 2: - if (c == '$') { - filter->status++; - } else if (c == '(') { - filter->status += 3; - } else { - filter->status &= ~0xF; - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - /* ESC $ */ - case 3: - if (c == '@' || c == 'B') { - filter->status = JISX0208_KANJI; - } else if (c == '(') { - filter->status++; - } else { - filter->status &= ~0xF; - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - /* ESC $ ( */ - case 4: - if (c == '@' || c == 'B') { - filter->status = JISX0208_KANJI; - } else { - filter->status &= ~0xF; - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - /* ESC ( */ - case 5: - if (c == 'B' || c == 'J') { - filter->status = 0; /* ASCII mode */ - } else if (c == 'I') { - filter->status = JISX0201_KANA; - } else { - filter->status &= ~0xF; - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - } - - return 0; -} - -static int mbfl_filt_conv_2022jp_mobile_wchar_flush(mbfl_convert_filter *filter) -{ - if (filter->status & 0xF) { - (*filter->output_function)(MBFL_BAD_INPUT, filter->data); - } - filter->status = 0; - - if (filter->flush_function) { - (*filter->flush_function)(filter->data); - } - - return 0; -} - -static int mbfilter_unicode2sjis_emoji_kddi(int c, int *s1, mbfl_convert_filter *filter) -{ - if ((filter->status & 0xF) == 1) { - int c1 = filter->cache; - filter->cache = 0; - filter->status &= ~0xFF; - if (c == 0x20E3) { - if (c1 == '#') { - *s1 = 0x25BC; - } else if (c1 == '0') { - *s1 = 0x2830; - } else { /* Previous character was '1'-'9' */ - *s1 = 0x27A6 + (c1 - '1'); - } - return 1; - } else { - if (filter->status & 0xFF00) { - CK((*filter->output_function)(0x1B, filter->data)); /* ESC */ - CK((*filter->output_function)('(', filter->data)); - CK((*filter->output_function)('B', filter->data)); - } - CK((*filter->output_function)(c1, filter->data)); - filter->status = 0; - } - } - - if (c == '#' || (c >= '0' && c <= '9')) { - filter->status |= 1; - filter->cache = c; - return 0; - } - - if (c == 0xA9) { /* Copyright sign */ - *s1 = 0x27DC; - return 1; - } else if (c == 0xAE) { /* Registered sign */ - *s1 = 0x27DD; - return 1; - } else if (c >= mb_tbl_uni_kddi2code2_min && c <= mb_tbl_uni_kddi2code2_max) { - int i = mbfl_bisec_srch2(c, mb_tbl_uni_kddi2code2_key, mb_tbl_uni_kddi2code2_len); - if (i >= 0) { - *s1 = mb_tbl_uni_kddi2code2_value[i]; - return 1; - } - } else if (c >= mb_tbl_uni_kddi2code3_min && c <= mb_tbl_uni_kddi2code3_max) { - int i = mbfl_bisec_srch2(c - 0x10000, mb_tbl_uni_kddi2code3_key, mb_tbl_uni_kddi2code3_len); - if (i >= 0) { - *s1 = mb_tbl_uni_kddi2code3_value[i]; - return 1; - } - } else if (c >= mb_tbl_uni_kddi2code5_min && c <= mb_tbl_uni_kddi2code5_max) { - int i = mbfl_bisec_srch2(c - 0xF0000, mb_tbl_uni_kddi2code5_key, mb_tbl_uni_kddi2code5_len); - if (i >= 0) { - *s1 = mb_tbl_uni_kddi2code5_val[i]; - return 1; - } - } - return 0; -} - -static int mbfl_filt_conv_wchar_2022jp_mobile(int c, mbfl_convert_filter *filter) -{ - int c1, c2, s1 = 0, s2 = 0; - - if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) { - s1 = ucs_a1_jis_table[c - ucs_a1_jis_table_min]; - } else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) { - s1 = ucs_a2_jis_table[c - ucs_a2_jis_table_min]; - } else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) { - s1 = ucs_i_jis_table[c - ucs_i_jis_table_min]; - } else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) { - s1 = ucs_r_jis_table[c - ucs_r_jis_table_min]; - } - - if (s1 <= 0) { - if (c == 0xA5) { /* YEN SIGN */ - s1 = 0x216F; /* FULLWIDTH YEN SIGN */ - } else if (c == 0xFF3C) { /* FULLWIDTH REVERSE SOLIDUS */ - s1 = 0x2140; - } else if (c == 0x2225) { /* PARALLEL TO */ - s1 = 0x2142; - } else if (c == 0xFF0D) { /* FULLWIDTH HYPHEN-MINUS */ - s1 = 0x215d; - } else if (c == 0xFFE0) { /* FULLWIDTH CENT SIGN */ - s1 = 0x2171; - } else if (c == 0xFFE1) { /* FULLWIDTH POUND SIGN */ - s1 = 0x2172; - } else if (c == 0xFFE2) { /* FULLWIDTH NOT SIGN */ - s1 = 0x224c; - } - } - - if (mbfilter_unicode2sjis_emoji_kddi(c, &s1, filter) > 0) { - /* A KDDI emoji was detected and stored in s1 */ - CODE2JIS(c1,c2,s1,s2); - s1 -= 0x1600; - } else if ((filter->status & 0xFF) == 1 && filter->cache) { - /* We are just processing one of KDDI's special emoji for a phone keypad button */ - return 0; - } - - if ((s1 <= 0) || (s1 >= 0xa1a1 && s2 == 0)) { /* not found or X 0212 */ - s1 = -1; - for (c1 = 0; c1 < cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; c1++) { - if (c == cp932ext1_ucs_table[c1]) { - s1 = (((c1 / 94) + 0x2D) << 8) + (c1 % 94) + 0x21; - break; - } - } - - if (c == 0) { - s1 = 0; - } - } - - if (s1 >= 0) { - if (s1 < 0x80) { /* ASCII */ - if (filter->status & 0xFF00) { - CK((*filter->output_function)(0x1B, filter->data)); /* ESC */ - CK((*filter->output_function)('(', filter->data)); - CK((*filter->output_function)('B', filter->data)); - } - CK((*filter->output_function)(s1, filter->data)); - filter->status = 0; - } else if (s1 > 0xA0 && s1 < 0xE0) { /* Kana */ - if ((filter->status & 0xFF00) != 0x100) { - CK((*filter->output_function)(0x1B, filter->data)); /* ESC */ - CK((*filter->output_function)('(', filter->data)); - CK((*filter->output_function)('I', filter->data)); - } - filter->status = 0x100; - CK((*filter->output_function)(s1 & 0x7F, filter->data)); - } else if (s1 < 0x7E7F) { /* JIS X 0208 */ - if ((filter->status & 0xFF00) != 0x200) { - CK((*filter->output_function)(0x1B, filter->data)); /* ESC */ - CK((*filter->output_function)('$', filter->data)); - CK((*filter->output_function)('B', filter->data)); - } - filter->status = 0x200; - CK((*filter->output_function)((s1 >> 8) & 0xFF, filter->data)); - CK((*filter->output_function)(s1 & 0x7F, filter->data)); - } - } else { - CK(mbfl_filt_conv_illegal_output(c, filter)); - } - - return 0; -} - -static int mbfl_filt_conv_wchar_2022jp_mobile_flush(mbfl_convert_filter *filter) -{ - /* Go back to ASCII mode (so strings can be safely concatenated) */ - if (filter->status & 0xFF00) { - (*filter->output_function)(0x1B, filter->data); /* ESC */ - (*filter->output_function)('(', filter->data); - (*filter->output_function)('B', filter->data); - } - - int c1 = filter->cache; - if ((filter->status & 0xFF) == 1 && (c1 == '#' || (c1 >= '0' && c1 <= '9'))) { - (*filter->output_function)(c1, filter->data); - } - filter->status = filter->cache = 0; - - if (filter->flush_function) { - (*filter->flush_function)(filter->data); - } - - return 0; -} - -static size_t mb_iso2022jp_kddi_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) -{ - unsigned char *p = *in, *e = p + *in_len; - uint32_t *out = buf, *limit = buf + bufsize - 1; - - while (p < e && out < limit) { - unsigned char c = *p++; - - if (c == 0x1B) { - if ((e - p) < 2) { - p = e; - *out++ = MBFL_BAD_INPUT; - break; - } - unsigned char c2 = *p++; - unsigned char c3 = *p++; - - if (c2 == '$') { - if (c3 == '@' || c3 == 'B') { - *state = JISX0208_KANJI; - } else if (c3 == '(') { - if (p == e) { - *out++ = MBFL_BAD_INPUT; - break; - } - unsigned char c4 = *p++; - - if (c4 == '@' || c4 == 'B') { - *state = JISX0208_KANJI; - } else { - *out++ = MBFL_BAD_INPUT; - } - } else { - *out++ = MBFL_BAD_INPUT; - } - } else if (c2 == '(') { - if (c3 == 'B' || c3 == 'J') { - *state = ASCII; - } else if (c3 == 'I') { - *state = JISX0201_KANA; - } else { - *out++ = MBFL_BAD_INPUT; - } - } else { - p--; - *out++ = MBFL_BAD_INPUT; - } - } else if (*state == JISX0201_KANA && c >= 0x21 && c <= 0x5F) { - *out++ = 0xFF40 + c; - } else if (*state == JISX0208_KANJI && c >= 0x21 && c <= 0x7F) { - if (p == e) { - *out++ = MBFL_BAD_INPUT; - break; - } - unsigned char c2 = *p++; - - if (c2 >= 0x21 && c2 <= 0x7E) { - unsigned int s = ((c - 0x21) * 94) + c2 - 0x21; - uint32_t w = 0; - - if (s <= 137) { - if (s == 31) { - w = 0xFF3C; /* FULLWIDTH REVERSE SOLIDUS */ - } else if (s == 32) { - w = 0xFF5E; /* FULLWIDTH TILDE */ - } else if (s == 33) { - w = 0x2225; /* PARALLEL TO */ - } else if (s == 60) { - w = 0xFF0D; /* FULLWIDTH HYPHEN-MINUS */ - } else if (s == 80) { - w = 0xFFE0; /* FULLWIDTH CENT SIGN */ - } else if (s == 81) { - w = 0xFFE1; /* FULLWIDTH POUND SIGN */ - } else if (s == 137) { - w = 0xFFE2; /* FULLWIDTH NOT SIGN */ - } - } - - if (s >= (84 * 94) && s < (91 * 94)) { - int snd = 0; - s += 22 * 94; - w = mbfilter_sjis_emoji_kddi2unicode(s, &snd); - if (w && snd) { - *out++ = snd; - } - } - - if (!w) { - if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) { - w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min]; - } else if (s < jisx0208_ucs_table_size) { - w = jisx0208_ucs_table[s]; - } - } - - *out++ = w ? w : MBFL_BAD_INPUT; - } else { - *out++ = MBFL_BAD_INPUT; - } - } else if (c <= 0x7F) { - *out++ = c; - } else if (c >= 0xA1 && c <= 0xDF) { - *out++ = 0xFEC0 + c; - } else { - *out++ = MBFL_BAD_INPUT; - } - } - - *in_len = e - p; - *in = p; - return out - buf; -} - -static void mb_wchar_to_iso2022jp_kddi(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) -{ - unsigned char *out, *limit; - MB_CONVERT_BUF_LOAD(buf, out, limit); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len); - - while (len--) { - uint32_t w = *in++; - unsigned int s = 0; - - if (w >= ucs_a1_jis_table_min && w < ucs_a1_jis_table_max) { - s = ucs_a1_jis_table[w - ucs_a1_jis_table_min]; - } else if (w >= ucs_a2_jis_table_min && w < ucs_a2_jis_table_max) { - s = ucs_a2_jis_table[w - ucs_a2_jis_table_min]; - } else if (w >= ucs_i_jis_table_min && w < ucs_i_jis_table_max) { - s = ucs_i_jis_table[w - ucs_i_jis_table_min]; - } else if (w >= ucs_r_jis_table_min && w < ucs_r_jis_table_max) { - s = ucs_r_jis_table[w - ucs_r_jis_table_min]; - } - - if (!s) { - if (w == 0xA5) { /* YEN SIGN */ - s = 0x216F; /* FULLWIDTH YEN SIGN */ - } else if (w == 0xFF3C) { /* FULLWIDTH REVERSE SOLIDUS */ - s = 0x2140; - } else if (w == 0x2225) { /* PARALLEL TO */ - s = 0x2142; - } else if (w == 0xFF0D) { /* FULLWIDTH HYPHEN-MINUS */ - s = 0x215D; - } else if (w == 0xFFE0) { /* FULLWIDTH CENT SIGN */ - s = 0x2171; - } else if (w == 0xFFE1) { /* FULLWIDTH POUND SIGN */ - s = 0x2172; - } else if (w == 0xFFE2) { /* FULLWIDTH NOT SIGN */ - s = 0x224C; - } - } - - if ((w == '#' || (w >= '0' && w <= '9')) && len) { - uint32_t w2 = *in++; len--; - - if (w2 == 0x20E3) { - unsigned int s1 = 0; - if (w == '#') { - s1 = 0x25BC; - } else if (w == '0') { - s1 = 0x2830; - } else { /* Previous character was '1'-'9' */ - s1 = 0x27A6 + (w - '1'); - } - s = (((s1 / 94) + 0x21) << 8) + ((s1 % 94) + 0x21) - 0x1600; - } else { - in--; len++; - } - } else if (w >= NFLAGS('C') && w <= NFLAGS('U') && len) { /* C for CN, U for US */ - uint32_t w2 = *in++; len--; - - if (w2 >= NFLAGS('B') && w2 <= NFLAGS('U')) { /* B for GB, U for RU */ - for (int i = 0; i < 10; i++) { - if (w == NFLAGS(nflags_s[i][0]) && w2 == NFLAGS(nflags_s[i][1])) { - unsigned int s1 = nflags_code_kddi[i]; - s = (((s1 / 94) + 0x21) << 8) + ((s1 % 94) + 0x21) - 0x1600; - goto found_flag_emoji; - } - } - } - - in--; len++; -found_flag_emoji: ; - } - - if (w == 0xA9) { /* Copyright sign */ - unsigned int s1 = 0x27DC; - s = (((s1 / 94) + 0x21) << 8) + ((s1 % 94) + 0x21) - 0x1600; - } else if (w == 0xAE) { /* Registered sign */ - unsigned int s1 = 0x27DD; - s = (((s1 / 94) + 0x21) << 8) + ((s1 % 94) + 0x21) - 0x1600; - } else if (w >= mb_tbl_uni_kddi2code2_min && w <= mb_tbl_uni_kddi2code2_max) { - int i = mbfl_bisec_srch2(w, mb_tbl_uni_kddi2code2_key, mb_tbl_uni_kddi2code2_len); - if (i >= 0) { - unsigned int s1 = mb_tbl_uni_kddi2code2_value[i]; - s = (((s1 / 94) + 0x21) << 8) + ((s1 % 94) + 0x21) - 0x1600; - } - } else if (w >= mb_tbl_uni_kddi2code3_min && w <= mb_tbl_uni_kddi2code3_max) { - int i = mbfl_bisec_srch2(w - 0x10000, mb_tbl_uni_kddi2code3_key, mb_tbl_uni_kddi2code3_len); - if (i >= 0) { - unsigned int s1 = mb_tbl_uni_kddi2code3_value[i]; - s = (((s1 / 94) + 0x21) << 8) + ((s1 % 94) + 0x21) - 0x1600; - } - } else if (w >= mb_tbl_uni_kddi2code5_min && w <= mb_tbl_uni_kddi2code5_max) { - int i = mbfl_bisec_srch2(w - 0xF0000, mb_tbl_uni_kddi2code5_key, mb_tbl_uni_kddi2code5_len); - if (i >= 0) { - unsigned int s1 = mb_tbl_uni_kddi2code5_val[i]; - s = (((s1 / 94) + 0x21) << 8) + ((s1 % 94) + 0x21) - 0x1600; - } - } - - if (!s || s >= 0xA1A1) { - s = 0; - for (int i = 0; i < cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; i++) { - if (w == cp932ext1_ucs_table[i]) { - s = (((i / 94) + 0x2D) << 8) + (i % 94) + 0x21; - break; - } - } - if (w == 0) - s = 0; - } - - if (!s && w) { - MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_iso2022jp_kddi); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len); - } else if (s <= 0x7F) { - if (buf->state != ASCII) { - MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 4); - out = mb_convert_buf_add3(out, 0x1B, '(', 'B'); - buf->state = ASCII; - } - out = mb_convert_buf_add(out, s); - } else if (s >= 0xA1 && s <= 0xDF) { - if (buf->state != JISX0201_KANA) { - MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 4); - out = mb_convert_buf_add3(out, 0x1B, '(', 'I'); - buf->state = JISX0201_KANA; - } - out = mb_convert_buf_add(out, s & 0x7F); - } else if (s <= 0x7E7E) { - if (buf->state != JISX0208_KANJI) { - MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 5); - out = mb_convert_buf_add3(out, 0x1B, '$', 'B'); - buf->state = JISX0208_KANJI; - } else { - MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2); - } - out = mb_convert_buf_add2(out, (s >> 8) & 0xFF, s & 0xFF); - } else { - MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_iso2022jp_kddi); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len); - } - } - - if (end && buf->state != ASCII) { - MB_CONVERT_BUF_ENSURE(buf, out, limit, 3); - out = mb_convert_buf_add3(out, 0x1B, '(', 'B'); - } - - MB_CONVERT_BUF_STORE(buf, out, limit); -} diff --git a/ext/mbstring/libmbfl/filters/mbfilter_iso2022jp_mobile.h b/ext/mbstring/libmbfl/filters/mbfilter_iso2022jp_mobile.h deleted file mode 100644 index c2beafde64726..0000000000000 --- a/ext/mbstring/libmbfl/filters/mbfilter_iso2022jp_mobile.h +++ /dev/null @@ -1,39 +0,0 @@ -/* - * "streamable kanji code filter and converter" - * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. - * - * LICENSE NOTICES - * - * This file is part of "streamable kanji code filter and converter", - * which is distributed under the terms of GNU Lesser General Public - * License (version 2) as published by the Free Software Foundation. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with "streamable kanji code filter and converter"; - * if not, write to the Free Software Foundation, Inc., 59 Temple Place, - * Suite 330, Boston, MA 02111-1307 USA - * - * The author of this file: - * - */ -/* - * The source code included in this files was separated from mbfilter_iso2022_jp_ms.h - * by Rui Hirokawa on 25 July 2011. - * - */ - -#ifndef MBFL_MBFILTER_ISO2022_JP_MOBILE_H -#define MBFL_MBFILTER_ISO2022_JP_MOBILE_H - -#include "mbfilter.h" - -extern const mbfl_encoding mbfl_encoding_2022jp_kddi; -extern const struct mbfl_convert_vtbl vtbl_2022jp_kddi_wchar; -extern const struct mbfl_convert_vtbl vtbl_wchar_2022jp_kddi; - -#endif /* MBFL_MBFILTER_ISO2022_JP_MOBILE_H */ diff --git a/ext/mbstring/libmbfl/filters/mbfilter_jis.c b/ext/mbstring/libmbfl/filters/mbfilter_jis.c deleted file mode 100644 index 80af0e695644c..0000000000000 --- a/ext/mbstring/libmbfl/filters/mbfilter_jis.c +++ /dev/null @@ -1,944 +0,0 @@ -/* - * "streamable kanji code filter and converter" - * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. - * - * LICENSE NOTICES - * - * This file is part of "streamable kanji code filter and converter", - * which is distributed under the terms of GNU Lesser General Public - * License (version 2) as published by the Free Software Foundation. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with "streamable kanji code filter and converter"; - * if not, write to the Free Software Foundation, Inc., 59 Temple Place, - * Suite 330, Boston, MA 02111-1307 USA - * - * The author of this file: - * - */ -/* - * The source code included in this files was separated from mbfilter_ja.c - * by moriyoshi koizumi on 4 dec 2002. - * - */ - -#include "mbfilter.h" -#include "mbfilter_jis.h" - -#include "unicode_table_cp932_ext.h" -#include "unicode_table_jis.h" - -static int mbfl_filt_conv_jis_wchar_flush(mbfl_convert_filter *filter); -static size_t mb_iso2022jp_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state); -static void mb_wchar_to_iso2022jp(uint32_t *in, size_t len, mb_convert_buf *buf, bool end); -static void mb_wchar_to_jis(uint32_t *in, size_t len, mb_convert_buf *buf, bool end); -static bool mb_check_iso2022jp(unsigned char *in, size_t in_len); -static bool mb_check_jis(unsigned char *in, size_t in_len); - -const mbfl_encoding mbfl_encoding_jis = { - mbfl_no_encoding_jis, - "JIS", - "ISO-2022-JP", - NULL, - NULL, - MBFL_ENCTYPE_GL_UNSAFE, - &vtbl_jis_wchar, - &vtbl_wchar_jis, - mb_iso2022jp_to_wchar, - mb_wchar_to_jis, - mb_check_jis -}; - -const mbfl_encoding mbfl_encoding_2022jp = { - mbfl_no_encoding_2022jp, - "ISO-2022-JP", - "ISO-2022-JP", - NULL, - NULL, - MBFL_ENCTYPE_GL_UNSAFE, - &vtbl_2022jp_wchar, - &vtbl_wchar_2022jp, - mb_iso2022jp_to_wchar, - mb_wchar_to_iso2022jp, - mb_check_iso2022jp -}; - -const struct mbfl_convert_vtbl vtbl_jis_wchar = { - mbfl_no_encoding_jis, - mbfl_no_encoding_wchar, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_jis_wchar, - mbfl_filt_conv_jis_wchar_flush, - NULL, -}; - -const struct mbfl_convert_vtbl vtbl_wchar_jis = { - mbfl_no_encoding_wchar, - mbfl_no_encoding_jis, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_wchar_jis, - mbfl_filt_conv_any_jis_flush, - NULL, -}; - -const struct mbfl_convert_vtbl vtbl_2022jp_wchar = { - mbfl_no_encoding_2022jp, - mbfl_no_encoding_wchar, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_jis_wchar, - mbfl_filt_conv_jis_wchar_flush, - NULL, -}; - -const struct mbfl_convert_vtbl vtbl_wchar_2022jp = { - mbfl_no_encoding_wchar, - mbfl_no_encoding_2022jp, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_wchar_2022jp, - mbfl_filt_conv_any_jis_flush, - NULL, -}; - -#define CK(statement) do { if ((statement) < 0) return (-1); } while (0) - -/* - * JIS => wchar - */ -int -mbfl_filt_conv_jis_wchar(int c, mbfl_convert_filter *filter) -{ - int c1, s, w; - -retry: - switch (filter->status & 0xf) { -/* case 0x00: ASCII */ -/* case 0x10: X 0201 latin */ -/* case 0x20: X 0201 kana */ -/* case 0x80: X 0208 */ -/* case 0x90: X 0212 */ - case 0: - if (c == 0x1b) { - filter->status += 2; - } else if (c == 0x0e) { /* "kana in" */ - filter->status = 0x20; - } else if (c == 0x0f) { /* "kana out" */ - filter->status = 0; - } else if (filter->status == 0x10 && c == 0x5c) { /* YEN SIGN */ - CK((*filter->output_function)(0xa5, filter->data)); - } else if (filter->status == 0x10 && c == 0x7e) { /* OVER LINE */ - CK((*filter->output_function)(0x203e, filter->data)); - } else if (filter->status == 0x20 && c > 0x20 && c < 0x60) { /* kana */ - CK((*filter->output_function)(0xff40 + c, filter->data)); - } else if ((filter->status == 0x80 || filter->status == 0x90) && c > 0x20 && c < 0x7f) { /* kanji first char */ - filter->cache = c; - filter->status += 1; - } else if (c >= 0 && c < 0x80) { /* latin, CTLs */ - CK((*filter->output_function)(c, filter->data)); - } else if (c > 0xa0 && c < 0xe0) { /* GR kana */ - CK((*filter->output_function)(0xfec0 + c, filter->data)); - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - -/* case 0x81: X 0208 second char */ -/* case 0x91: X 0212 second char */ - case 1: - filter->status &= ~0xf; - c1 = filter->cache; - if (c > 0x20 && c < 0x7f) { - s = (c1 - 0x21)*94 + c - 0x21; - if (filter->status == 0x80) { - if (s >= 0 && s < jisx0208_ucs_table_size) { - w = jisx0208_ucs_table[s]; - } else { - w = 0; - } - - if (w <= 0) { - w = MBFL_BAD_INPUT; - } - } else { - if (s >= 0 && s < jisx0212_ucs_table_size) { - w = jisx0212_ucs_table[s]; - } else { - w = 0; - } - - if (w <= 0) { - w = MBFL_BAD_INPUT; - } - } - CK((*filter->output_function)(w, filter->data)); - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - /* ESC */ -/* case 0x02: */ -/* case 0x12: */ -/* case 0x22: */ -/* case 0x82: */ -/* case 0x92: */ - case 2: - if (c == 0x24) { /* '$' */ - filter->status++; - } else if (c == 0x28) { /* '(' */ - filter->status += 3; - } else { - filter->status &= ~0xf; - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - goto retry; - } - break; - - /* ESC $ */ -/* case 0x03: */ -/* case 0x13: */ -/* case 0x23: */ -/* case 0x83: */ -/* case 0x93: */ - case 3: - if (c == 0x40 || c == 0x42) { /* '@' or 'B' */ - filter->status = 0x80; - } else if (c == 0x28) { /* '(' */ - filter->status++; - } else { - filter->status &= ~0xf; - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - CK((*filter->output_function)(0x24, filter->data)); - goto retry; - } - break; - - /* ESC $ ( */ -/* case 0x04: */ -/* case 0x14: */ -/* case 0x24: */ -/* case 0x84: */ -/* case 0x94: */ - case 4: - if (c == 0x40 || c == 0x42) { /* '@' or 'B' */ - filter->status = 0x80; - } else if (c == 0x44) { /* 'D' */ - filter->status = 0x90; - } else { - filter->status &= ~0xf; - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - CK((*filter->output_function)(0x24, filter->data)); - CK((*filter->output_function)(0x28, filter->data)); - goto retry; - } - break; - - /* ESC ( */ -/* case 0x05: */ -/* case 0x15: */ -/* case 0x25: */ -/* case 0x85: */ -/* case 0x95: */ - case 5: - if (c == 0x42 || c == 0x48) { /* 'B' or 'H' */ - filter->status = 0; - } else if (c == 0x4a) { /* 'J' */ - filter->status = 0x10; - } else if (c == 0x49) { /* 'I' */ - filter->status = 0x20; - } else { - filter->status &= ~0xf; - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - CK((*filter->output_function)(0x28, filter->data)); - goto retry; - } - break; - - EMPTY_SWITCH_DEFAULT_CASE(); - } - - return 0; -} - -static int mbfl_filt_conv_jis_wchar_flush(mbfl_convert_filter *filter) -{ - if (filter->status & 0xF) { - /* 2-byte (JIS X 0208 or 0212) character was truncated, - * or else escape sequence was truncated */ - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - filter->status = 0; - - if (filter->flush_function) { - (*filter->flush_function)(filter->data); - } - - return 0; -} - -/* - * wchar => JIS - */ -int -mbfl_filt_conv_wchar_jis(int c, mbfl_convert_filter *filter) -{ - int s = 0; - - if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) { - s = ucs_a1_jis_table[c - ucs_a1_jis_table_min]; - } else if (c == 0x203E) { /* OVERLINE */ - s = 0x1007E; /* Convert to JISX 0201 OVERLINE */ - } else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) { - s = ucs_a2_jis_table[c - ucs_a2_jis_table_min]; - } else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) { - s = ucs_i_jis_table[c - ucs_i_jis_table_min]; - } else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) { - s = ucs_r_jis_table[c - ucs_r_jis_table_min]; - } - if (s <= 0) { - if (c == 0xa5) { /* YEN SIGN */ - s = 0x1005c; - } else if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */ - s = 0x2140; - } else if (c == 0x2225) { /* PARALLEL TO */ - s = 0x2142; - } else if (c == 0xff0d) { /* FULLWIDTH HYPHEN-MINUS */ - s = 0x215d; - } else if (c == 0xffe0) { /* FULLWIDTH CENT SIGN */ - s = 0x2171; - } else if (c == 0xffe1) { /* FULLWIDTH POUND SIGN */ - s = 0x2172; - } else if (c == 0xffe2) { /* FULLWIDTH NOT SIGN */ - s = 0x224c; - } - if (c == 0) { - s = 0; - } else if (s <= 0) { - s = -1; - } - } - if (s >= 0) { - if (s < 0x80) { /* ASCII */ - if ((filter->status & 0xff00) != 0) { - CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ - CK((*filter->output_function)(0x28, filter->data)); /* '(' */ - CK((*filter->output_function)(0x42, filter->data)); /* 'B' */ - } - filter->status = 0; - CK((*filter->output_function)(s, filter->data)); - } else if (s < 0x8080) { /* X 0208 */ - if ((filter->status & 0xff00) != 0x200) { - CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ - CK((*filter->output_function)(0x24, filter->data)); /* '$' */ - CK((*filter->output_function)(0x42, filter->data)); /* 'B' */ - } - filter->status = 0x200; - CK((*filter->output_function)((s >> 8) & 0x7f, filter->data)); - CK((*filter->output_function)(s & 0x7f, filter->data)); - } else if (s < 0x10000) { /* X 0212 */ - if ((filter->status & 0xff00) != 0x300) { - CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ - CK((*filter->output_function)(0x24, filter->data)); /* '$' */ - CK((*filter->output_function)(0x28, filter->data)); /* '(' */ - CK((*filter->output_function)(0x44, filter->data)); /* 'D' */ - } - filter->status = 0x300; - CK((*filter->output_function)((s >> 8) & 0x7f, filter->data)); - CK((*filter->output_function)(s & 0x7f, filter->data)); - } else { /* X 0201 latin */ - if ((filter->status & 0xff00) != 0x400) { - CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ - CK((*filter->output_function)(0x28, filter->data)); /* '(' */ - CK((*filter->output_function)(0x4a, filter->data)); /* 'J' */ - } - filter->status = 0x400; - CK((*filter->output_function)(s & 0x7f, filter->data)); - } - } else { - CK(mbfl_filt_conv_illegal_output(c, filter)); - } - - return 0; -} - - -/* - * wchar => ISO-2022-JP - */ -int -mbfl_filt_conv_wchar_2022jp(int c, mbfl_convert_filter *filter) -{ - int s; - - s = 0; - if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) { - s = ucs_a1_jis_table[c - ucs_a1_jis_table_min]; - } else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) { - s = ucs_a2_jis_table[c - ucs_a2_jis_table_min]; - } else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) { - s = ucs_i_jis_table[c - ucs_i_jis_table_min]; - } else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) { - s = ucs_r_jis_table[c - ucs_r_jis_table_min]; - } - - if (s <= 0) { - if (c == 0xa5) { /* YEN SIGN */ - s = 0x1005c; - } else if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */ - s = 0x2140; - } else if (c == 0x2225) { /* PARALLEL TO */ - s = 0x2142; - } else if (c == 0xff0d) { /* FULLWIDTH HYPHEN-MINUS */ - s = 0x215d; - } else if (c == 0xffe0) { /* FULLWIDTH CENT SIGN */ - s = 0x2171; - } else if (c == 0xffe1) { /* FULLWIDTH POUND SIGN */ - s = 0x2172; - } else if (c == 0xffe2) { /* FULLWIDTH NOT SIGN */ - s = 0x224c; - } - if (c == 0) { - s = 0; - } else if (s <= 0) { - s = -1; - } - } else if ((s >= 0x80 && s < 0x2121) || (s > 0x8080)) { - s = -1; - } - if (s >= 0) { - if (s < 0x80) { /* ASCII */ - if ((filter->status & 0xff00) != 0) { - CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ - CK((*filter->output_function)(0x28, filter->data)); /* '(' */ - CK((*filter->output_function)(0x42, filter->data)); /* 'B' */ - } - filter->status = 0; - CK((*filter->output_function)(s, filter->data)); - } else if (s < 0x10000) { /* X 0208 */ - if ((filter->status & 0xff00) != 0x200) { - CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ - CK((*filter->output_function)(0x24, filter->data)); /* '$' */ - CK((*filter->output_function)(0x42, filter->data)); /* 'B' */ - } - filter->status = 0x200; - CK((*filter->output_function)((s >> 8) & 0x7f, filter->data)); - CK((*filter->output_function)(s & 0x7f, filter->data)); - } else { /* X 0201 latin */ - if ((filter->status & 0xff00) != 0x400) { - CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ - CK((*filter->output_function)(0x28, filter->data)); /* '(' */ - CK((*filter->output_function)(0x4a, filter->data)); /* 'J' */ - } - filter->status = 0x400; - CK((*filter->output_function)(s & 0x7f, filter->data)); - } - } else { - CK(mbfl_filt_conv_illegal_output(c, filter)); - } - - return 0; -} - -int -mbfl_filt_conv_any_jis_flush(mbfl_convert_filter *filter) -{ - /* back to latin */ - if ((filter->status & 0xff00) != 0) { - CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ - CK((*filter->output_function)(0x28, filter->data)); /* '(' */ - CK((*filter->output_function)(0x42, filter->data)); /* 'B' */ - } - filter->status = 0; - - if (filter->flush_function != NULL) { - return (*filter->flush_function)(filter->data); - } - - return 0; -} - -#define ASCII 0 -#define JISX_0201_LATIN 1 -#define JISX_0201_KANA 2 -#define JISX_0208 3 -#define JISX_0212 4 - -static size_t mb_iso2022jp_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) -{ - ZEND_ASSERT(bufsize >= 3); - - unsigned char *p = *in, *e = p + *in_len; - uint32_t *out = buf, *limit = buf + bufsize; - - while (p < e && out < limit) { - unsigned char c = *p++; - - if (c == 0x1B) { - /* ESC seen; this is an escape sequence */ - if ((e - p) < 2) { - *out++ = MBFL_BAD_INPUT; - if (p != e && (*p == '$' || *p == '(')) - p++; - continue; - } - - unsigned char c2 = *p++; - if (c2 == '$') { - unsigned char c3 = *p++; - if (c3 == '@' || c3 == 'B') { - *state = JISX_0208; - } else if (c3 == '(') { - if (p == e) { - *out++ = MBFL_BAD_INPUT; - break; - } - unsigned char c4 = *p++; - if (c4 == '@' || c4 == 'B') { - *state = JISX_0208; - } else if (c4 == 'D') { - *state = JISX_0212; - } else { - if ((limit - out) < 3) { - p -= 4; - break; - } - *out++ = MBFL_BAD_INPUT; - *out++ = '$'; - *out++ = '('; - p--; - } - } else { - if ((limit - out) < 2) { - p -= 3; - break; - } - *out++ = MBFL_BAD_INPUT; - *out++ = '$'; - p--; - } - } else if (c2 == '(') { - unsigned char c3 = *p++; - if (c3 == 'B' || c3 == 'H') { - *state = ASCII; - } else if (c3 == 'J') { - *state = JISX_0201_LATIN; - } else if (c3 == 'I') { - *state = JISX_0201_KANA; - } else { - if ((limit - out) < 2) { - p -= 3; - break; - } - *out++ = MBFL_BAD_INPUT; - *out++ = '('; - p--; - } - } else { - *out++ = MBFL_BAD_INPUT; - p--; - } - } else if (c == 0xE) { - /* "Kana In" marker; this is just for JIS-7/8, but we also accept it for ISO-2022-JP */ - *state = JISX_0201_KANA; - } else if (c == 0xF) { - /* "Kana Out" marker */ - *state = ASCII; - } else if (*state == JISX_0201_LATIN && c == 0x5C) { /* YEN SIGN */ - *out++ = 0xA5; - } else if (*state == JISX_0201_LATIN && c == 0x7E) { /* OVER LINE */ - *out++ = 0x203E; - } else if (*state == JISX_0201_KANA && c > 0x20 && c < 0x60) { - *out++ = 0xFF40 + c; - } else if (*state >= JISX_0208 && c > 0x20 && c < 0x7F) { - if (p == e) { - *out++ = MBFL_BAD_INPUT; - break; - } - unsigned char c2 = *p++; - if (c2 > 0x20 && c2 < 0x7F) { - unsigned int s = (c - 0x21)*94 + c2 - 0x21; - uint32_t w = 0; - if (*state == JISX_0208) { - if (s < jisx0208_ucs_table_size) { - w = jisx0208_ucs_table[s]; - } - if (!w) { - w = MBFL_BAD_INPUT; - } - } else { - if (s < jisx0212_ucs_table_size) { - w = jisx0212_ucs_table[s]; - } - if (!w) { - w = MBFL_BAD_INPUT; - } - } - *out++ = w; - } else { - *out++ = MBFL_BAD_INPUT; - } - } else if (c < 0x80) { - *out++ = c; - } else if (c >= 0xA1 && c <= 0xDF) { - /* GR-invoked Kana; "GR" stands for "graphics right" and refers to bytes - * with the MSB bit (in the context of ISO-2022 encoding). - * - * In this regard, Wikipedia states: - * "Other, older variants known as JIS7 and JIS8 build directly on the 7-bit and 8-bit - * encodings defined by JIS X 0201 and allow use of JIS X 0201 kana from G1 without - * escape sequences, using Shift Out and Shift In or setting the eighth bit - * (GR-invoked), respectively." - * - * Note that we support both the 'JIS7' use of 0xE/0xF Shift In/Shift Out codes - * and the 'JIS8' use of GR-invoked Kana */ - *out++ = 0xFEC0 + c; - } else { - *out++ = MBFL_BAD_INPUT; - } - } - - *in_len = e - p; - *in = p; - return out - buf; -} - -static void mb_wchar_to_iso2022jp(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) -{ - unsigned char *out, *limit; - MB_CONVERT_BUF_LOAD(buf, out, limit); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2); - - while (len--) { - uint32_t w = *in++; - unsigned int s = 0; - - if (w >= ucs_a1_jis_table_min && w < ucs_a1_jis_table_max) { - s = ucs_a1_jis_table[w - ucs_a1_jis_table_min]; - } else if (w >= ucs_a2_jis_table_min && w < ucs_a2_jis_table_max) { - s = ucs_a2_jis_table[w - ucs_a2_jis_table_min]; - } else if (w >= ucs_i_jis_table_min && w < ucs_i_jis_table_max) { - s = ucs_i_jis_table[w - ucs_i_jis_table_min]; - } else if (w >= ucs_r_jis_table_min && w < ucs_r_jis_table_max) { - s = ucs_r_jis_table[w - ucs_r_jis_table_min]; - } - - if (s == 0) { - if (w == 0xA5) { /* YEN SIGN */ - s = 0x1005C; - } else if (w == 0xFF3C) { /* FULLWIDTH REVERSE SOLIDUS */ - s = 0x2140; - } else if (w == 0x2225) { /* PARALLEL TO */ - s = 0x2142; - } else if (w == 0xFF0D) { /* FULLWIDTH HYPHEN-MINUS */ - s = 0x215D; - } else if (w == 0xFFE0) { /* FULLWIDTH CENT SIGN */ - s = 0x2171; - } else if (w == 0xFFE1) { /* FULLWIDTH POUND SIGN */ - s = 0x2172; - } else if (w == 0xFFE2) { /* FULLWIDTH NOT SIGN */ - s = 0x224C; - } else if (w != 0) { - MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_iso2022jp); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2); - continue; - } - } else if ((s >= 0x80 && s < 0x2121) || (s > 0x8080)) { - MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_iso2022jp); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2); - continue; - } - - if (s < 0x80) { /* ASCII */ - if (buf->state != ASCII) { - MB_CONVERT_BUF_ENSURE(buf, out, limit, (len * 2) + 4); - out = mb_convert_buf_add3(out, 0x1B, '(', 'B'); - buf->state = ASCII; - } - out = mb_convert_buf_add(out, s); - } else if (s < 0x8080) { /* JIS X 0208 */ - if (buf->state != JISX_0208) { - MB_CONVERT_BUF_ENSURE(buf, out, limit, (len * 2) + 5); - out = mb_convert_buf_add3(out, 0x1B, '$', 'B'); - buf->state = JISX_0208; - } - out = mb_convert_buf_add2(out, (s >> 8) & 0x7F, s & 0x7F); - } else if (s < 0x10000) { /* JIS X 0212 */ - if (buf->state != JISX_0212) { - MB_CONVERT_BUF_ENSURE(buf, out, limit, (len * 2) + 6); - out = mb_convert_buf_add4(out, 0x1B, '$', '(', 'D'); - buf->state = JISX_0212; - } - out = mb_convert_buf_add2(out, (s >> 8) & 0x7F, s & 0x7F); - } else { /* X 0201 Latin */ - if (buf->state != JISX_0201_LATIN) { - MB_CONVERT_BUF_ENSURE(buf, out, limit, (len * 2) + 4); - out = mb_convert_buf_add3(out, 0x1B, '(', 'J'); - buf->state = JISX_0201_LATIN; - } - out = mb_convert_buf_add(out, s & 0x7F); - } - } - - if (end && buf->state != ASCII) { - MB_CONVERT_BUF_ENSURE(buf, out, limit, 3); - out = mb_convert_buf_add3(out, 0x1B, '(', 'B'); - } - - MB_CONVERT_BUF_STORE(buf, out, limit); -} - -static void mb_wchar_to_jis(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) -{ - unsigned char *out, *limit; - MB_CONVERT_BUF_LOAD(buf, out, limit); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2); - - while (len--) { - uint32_t w = *in++; - unsigned int s = 0; - - if (w >= ucs_a1_jis_table_min && w < ucs_a1_jis_table_max) { - s = ucs_a1_jis_table[w - ucs_a1_jis_table_min]; - } else if (w == 0x203E) { /* OVERLINE */ - s = 0x1007E; /* Convert to JISX 0201 OVERLINE */ - } else if (w >= ucs_a2_jis_table_min && w < ucs_a2_jis_table_max) { - s = ucs_a2_jis_table[w - ucs_a2_jis_table_min]; - } else if (w >= ucs_i_jis_table_min && w < ucs_i_jis_table_max) { - s = ucs_i_jis_table[w - ucs_i_jis_table_min]; - } else if (w >= ucs_r_jis_table_min && w < ucs_r_jis_table_max) { - s = ucs_r_jis_table[w - ucs_r_jis_table_min]; - } - - if (s == 0) { - if (w == 0xA5) { /* YEN SIGN */ - s = 0x1005C; - } else if (w == 0xFF3C) { /* FULLWIDTH REVERSE SOLIDUS */ - s = 0x2140; - } else if (w == 0x2225) { /* PARALLEL TO */ - s = 0x2142; - } else if (w == 0xFF0D) { /* FULLWIDTH HYPHEN-MINUS */ - s = 0x215D; - } else if (w == 0xFFE0) { /* FULLWIDTH CENT SIGN */ - s = 0x2171; - } else if (w == 0xFFE1) { /* FULLWIDTH POUND SIGN */ - s = 0x2172; - } else if (w == 0xFFE2) { /* FULLWIDTH NOT SIGN */ - s = 0x224C; - } else if (w != 0) { - MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_iso2022jp); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2); - continue; - } - } - - if (s < 0x80) { /* ASCII */ - if (buf->state != ASCII) { - MB_CONVERT_BUF_ENSURE(buf, out, limit, (len * 2) + 4); - out = mb_convert_buf_add3(out, 0x1B, '(', 'B'); - buf->state = ASCII; - } - out = mb_convert_buf_add(out, s); - } else if (s >= 0xA1 && s <= 0xDF) { - if (buf->state != JISX_0201_KANA) { - MB_CONVERT_BUF_ENSURE(buf, out, limit, (len * 2) + 4); - out = mb_convert_buf_add3(out, 0x1B, '(', 'I'); - buf->state = JISX_0201_KANA; - } - out = mb_convert_buf_add(out, s & 0x7F); - } else if (s < 0x8080) { /* JIS X 0208 */ - if (buf->state != JISX_0208) { - MB_CONVERT_BUF_ENSURE(buf, out, limit, (len * 2) + 5); - out = mb_convert_buf_add3(out, 0x1B, '$', 'B'); - buf->state = JISX_0208; - } - out = mb_convert_buf_add2(out, (s >> 8) & 0x7F, s & 0x7F); - } else if (s < 0x10000) { /* JIS X 0212 */ - if (buf->state != JISX_0212) { - MB_CONVERT_BUF_ENSURE(buf, out, limit, (len * 2) + 6); - out = mb_convert_buf_add4(out, 0x1B, '$', '(', 'D'); - buf->state = JISX_0212; - } - out = mb_convert_buf_add2(out, (s >> 8) & 0x7F, s & 0x7F); - } else { /* X 0201 Latin */ - if (buf->state != JISX_0201_LATIN) { - MB_CONVERT_BUF_ENSURE(buf, out, limit, (len * 2) + 4); - out = mb_convert_buf_add3(out, 0x1B, '(', 'J'); - buf->state = JISX_0201_LATIN; - } - out = mb_convert_buf_add(out, s & 0x7F); - } - } - - if (end && buf->state != ASCII) { - MB_CONVERT_BUF_ENSURE(buf, out, limit, 3); - out = mb_convert_buf_add3(out, 0x1B, '(', 'B'); - } - - MB_CONVERT_BUF_STORE(buf, out, limit); -} - -#define JISX_0201_KANA_SO 5 - -static bool mb_check_jis(unsigned char *in, size_t in_len) -{ - unsigned char *p = in, *e = p + in_len; - unsigned int state = ASCII; - - while (p < e) { - unsigned char c = *p++; - if (c == 0x1B) { - /* ESC seen; this is an escape sequence */ - if (state == JISX_0201_KANA_SO) { - return false; - } - if ((e - p) < 2) { - return false; - } - unsigned char c2 = *p++; - if (c2 == '$') { - unsigned char c3 = *p++; - if (c3 == '@' || c3 == 'B') { - state = JISX_0208; - } else if (c3 == '(') { - if (p == e) { - return false; - } - unsigned char c4 = *p++; - if (c4 == '@' || c4 == 'B') { - state = JISX_0208; - } else if (c4 == 'D') { - state = JISX_0212; - } else { - return false; - } - } else { - return false; - } - } else if (c2 == '(') { - unsigned char c3 = *p++; - /* ESC ( H is treated as a sequence transitioning to ASCII for historical reasons. - * see https://github.com/php/php-src/pull/10828#issuecomment-1478342432. */ - if (c3 == 'B' || c3 == 'H') { - state = ASCII; - } else if (c3 == 'J') { - state = JISX_0201_LATIN; - } else if (c3 == 'I') { - state = JISX_0201_KANA; - } else { - return false; - } - } else { - return false; - } - } else if (c == 0xE) { - /* "Kana In" marker */ - if (state != ASCII) { - return false; - } - state = JISX_0201_KANA_SO; - } else if (c == 0xF) { - /* "Kana Out" marker */ - if (state != JISX_0201_KANA_SO) { - return false; - } - state = ASCII; - } else if ((state == JISX_0208 || state == JISX_0212) && (c > 0x20 && c < 0x7F)) { - if (p == e) { - return false; - } - unsigned char c2 = *p++; - if (c2 > 0x20 && c2 < 0x7F) { - unsigned int s = (c - 0x21)*94 + c2 - 0x21; - if (state == JISX_0208) { - if (s < jisx0208_ucs_table_size && jisx0208_ucs_table[s]) { - continue; - } - } else { - if (s < jisx0212_ucs_table_size && jisx0212_ucs_table[s]) { - continue; - } - } - return false; - } else { - return false; - } - } else if (c < 0x80) { - continue; - } else if (c >= 0xA1 && c <= 0xDF) { - /* GR-invoked Kana */ - continue; - } else { - return false; - } - } - - return state == ASCII; -} - - -static bool mb_check_iso2022jp(unsigned char *in, size_t in_len) -{ - unsigned char *p = in, *e = p + in_len; - unsigned int state = ASCII; - - while (p < e) { - unsigned char c = *p++; - if (c == 0x1B) { - /* ESC seen; this is an escape sequence */ - if ((e - p) < 2) { - return false; - } - unsigned char c2 = *p++; - if (c2 == '$') { - unsigned char c3 = *p++; - if (c3 == '@' || c3 == 'B') { - state = JISX_0208; - } else { - return false; - } - } else if (c2 == '(') { - unsigned char c3 = *p++; - if (c3 == 'B') { - state = ASCII; - } else if (c3 == 'J') { - state = JISX_0201_LATIN; - } else { - return false; - } - } else { - return false; - } - } else if (c == 0xE || c == 0xF) { - /* "Kana In" or "Kana Out" marker; ISO-2022-JP is not accepted. */ - return false; - } else if (state == JISX_0208 && (c > 0x20 && c < 0x7F)) { - if (p == e) { - return false; - } - unsigned char c2 = *p++; - if (c2 > 0x20 && c2 < 0x7F) { - unsigned int s = (c - 0x21)*94 + c2 - 0x21; - if (s < jisx0208_ucs_table_size && jisx0208_ucs_table[s]) { - continue; - } - return false; - } else { - return false; - } - } else if (c < 0x80) { - continue; - } else { - return false; - } - } - - return state == ASCII; -} diff --git a/ext/mbstring/libmbfl/filters/mbfilter_jis.h b/ext/mbstring/libmbfl/filters/mbfilter_jis.h deleted file mode 100644 index 55787c9acb7ac..0000000000000 --- a/ext/mbstring/libmbfl/filters/mbfilter_jis.h +++ /dev/null @@ -1,47 +0,0 @@ -/* - * "streamable kanji code filter and converter" - * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. - * - * LICENSE NOTICES - * - * This file is part of "streamable kanji code filter and converter", - * which is distributed under the terms of GNU Lesser General Public - * License (version 2) as published by the Free Software Foundation. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with "streamable kanji code filter and converter"; - * if not, write to the Free Software Foundation, Inc., 59 Temple Place, - * Suite 330, Boston, MA 02111-1307 USA - * - * The author of this file: - * - */ -/* - * The source code included in this files was separated from mbfilter_ja.c - * by moriyoshi koizumi on 4 dec 2002. - * - */ - -#ifndef MBFL_MBFILTER_JIS_H -#define MBFL_MBFILTER_JIS_H - -#include "mbfilter.h" - -extern const mbfl_encoding mbfl_encoding_jis; -extern const mbfl_encoding mbfl_encoding_2022jp; -extern const struct mbfl_convert_vtbl vtbl_jis_wchar; -extern const struct mbfl_convert_vtbl vtbl_wchar_jis; -extern const struct mbfl_convert_vtbl vtbl_2022jp_wchar; -extern const struct mbfl_convert_vtbl vtbl_wchar_2022jp; - -int mbfl_filt_conv_jis_wchar(int c, mbfl_convert_filter *filter); -int mbfl_filt_conv_wchar_jis(int c, mbfl_convert_filter *filter); -int mbfl_filt_conv_wchar_2022jp(int c, mbfl_convert_filter *filter); -int mbfl_filt_conv_any_jis_flush(mbfl_convert_filter *filter); - -#endif /* MBFL_MBFILTER_JIS_H */ diff --git a/ext/mbstring/libmbfl/filters/mbfilter_sjis.c b/ext/mbstring/libmbfl/filters/mbfilter_sjis.c deleted file mode 100644 index 4db34c56b0e57..0000000000000 --- a/ext/mbstring/libmbfl/filters/mbfilter_sjis.c +++ /dev/null @@ -1,2941 +0,0 @@ -/* - * "streamable kanji code filter and converter" - * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. - * - * LICENSE NOTICES - * - * This file is part of "streamable kanji code filter and converter", - * which is distributed under the terms of GNU Lesser General Public - * License (version 2) as published by the Free Software Foundation. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with "streamable kanji code filter and converter"; - * if not, write to the Free Software Foundation, Inc., 59 Temple Place, - * Suite 330, Boston, MA 02111-1307 USA - * - * The author of this file: - * - */ -/* - * The source code included in this file was separated from mbfilter_ja.c - * by moriyoshi koizumi on 4 dec 2002. - * - */ - -#include "mbfilter.h" -#include "mbfilter_sjis.h" -#include "mbfilter_sjis_mac.h" -#include "mbfilter_sjis_mobile.h" - -#define UNICODE_TABLE_CP932_DEF -#define UNICODE_TABLE_JIS_DEF - -#include "unicode_table_cp932_ext.h" -#include "unicode_table_jis.h" -#include "sjis_mac2uni.h" -#include "emoji2uni.h" - -extern int mbfl_bisec_srch2(int w, const unsigned short tbl[], int n); - -static int mbfl_filt_conv_sjis_wchar_flush(mbfl_convert_filter *filter); -static size_t mb_sjis_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state); -static void mb_wchar_to_sjis(uint32_t *in, size_t len, mb_convert_buf *buf, bool end); - -static int mbfl_filt_conv_sjis_mac_wchar(int c, mbfl_convert_filter *filter); -static int mbfl_filt_conv_wchar_sjis_mac(int c, mbfl_convert_filter *filter); -static int mbfl_filt_conv_wchar_sjis_mac_flush(mbfl_convert_filter *filter); -static size_t mb_sjismac_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state); -static void mb_wchar_to_sjismac(uint32_t *in, size_t len, mb_convert_buf *buf, bool end); - -static int mbfl_filt_conv_wchar_sjis_mobile(int c, mbfl_convert_filter *filter); -static int mbfl_filt_conv_sjis_mobile_wchar(int c, mbfl_convert_filter *filter); -static size_t mb_sjis_docomo_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state); -static void mb_wchar_to_sjis_docomo(uint32_t *in, size_t len, mb_convert_buf *buf, bool end); -static size_t mb_sjis_kddi_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state); -static void mb_wchar_to_sjis_kddi(uint32_t *in, size_t len, mb_convert_buf *buf, bool end); -static size_t mb_sjis_sb_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state); -static void mb_wchar_to_sjis_sb(uint32_t *in, size_t len, mb_convert_buf *buf, bool end); - -const unsigned char mblen_table_sjis[] = { /* 0x81-0x9F,0xE0-0xEF */ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 -}; - -const unsigned char mblen_table_sjismac[] = { /* 0x81-0x9F,0xE0-0xED */ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 -}; - -const unsigned char mblen_table_sjis_mobile[] = { /* 0x81-0x9F,0xE0-0xFC */ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1 -}; - -static const char *mbfl_encoding_sjis_aliases[] = {"x-sjis", "SHIFT-JIS", NULL}; - -const mbfl_encoding mbfl_encoding_sjis = { - mbfl_no_encoding_sjis, - "SJIS", - "Shift_JIS", - mbfl_encoding_sjis_aliases, - mblen_table_sjis, - MBFL_ENCTYPE_GL_UNSAFE, - &vtbl_sjis_wchar, - &vtbl_wchar_sjis, - mb_sjis_to_wchar, - mb_wchar_to_sjis, - NULL -}; - -const struct mbfl_convert_vtbl vtbl_sjis_wchar = { - mbfl_no_encoding_sjis, - mbfl_no_encoding_wchar, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_sjis_wchar, - mbfl_filt_conv_sjis_wchar_flush, - NULL -}; - -const struct mbfl_convert_vtbl vtbl_wchar_sjis = { - mbfl_no_encoding_wchar, - mbfl_no_encoding_sjis, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_wchar_sjis, - mbfl_filt_conv_common_flush, - NULL -}; - -static const char *mbfl_encoding_sjis_mac_aliases[] = {"MacJapanese", "x-Mac-Japanese", NULL}; - -const mbfl_encoding mbfl_encoding_sjis_mac = { - mbfl_no_encoding_sjis_mac, - "SJIS-mac", - "Shift_JIS", - mbfl_encoding_sjis_mac_aliases, - mblen_table_sjismac, - MBFL_ENCTYPE_GL_UNSAFE, - &vtbl_sjis_mac_wchar, - &vtbl_wchar_sjis_mac, - mb_sjismac_to_wchar, - mb_wchar_to_sjismac, - NULL -}; - -const struct mbfl_convert_vtbl vtbl_sjis_mac_wchar = { - mbfl_no_encoding_sjis_mac, - mbfl_no_encoding_wchar, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_sjis_mac_wchar, - mbfl_filt_conv_sjis_wchar_flush, - NULL, -}; - -const struct mbfl_convert_vtbl vtbl_wchar_sjis_mac = { - mbfl_no_encoding_wchar, - mbfl_no_encoding_sjis_mac, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_wchar_sjis_mac, - mbfl_filt_conv_wchar_sjis_mac_flush, - NULL, -}; - -static const char *mbfl_encoding_sjis_docomo_aliases[] = {"SJIS-DOCOMO", "shift_jis-imode", "x-sjis-emoji-docomo", NULL}; -static const char *mbfl_encoding_sjis_kddi_aliases[] = {"SJIS-KDDI", "shift_jis-kddi", "x-sjis-emoji-kddi", NULL}; -static const char *mbfl_encoding_sjis_sb_aliases[] = {"SJIS-SOFTBANK", "shift_jis-softbank", "x-sjis-emoji-softbank", NULL}; - -const mbfl_encoding mbfl_encoding_sjis_docomo = { - mbfl_no_encoding_sjis_docomo, - "SJIS-Mobile#DOCOMO", - "Shift_JIS", - mbfl_encoding_sjis_docomo_aliases, - mblen_table_sjis_mobile, - MBFL_ENCTYPE_GL_UNSAFE, - &vtbl_sjis_docomo_wchar, - &vtbl_wchar_sjis_docomo, - mb_sjis_docomo_to_wchar, - mb_wchar_to_sjis_docomo, - NULL -}; - -const mbfl_encoding mbfl_encoding_sjis_kddi = { - mbfl_no_encoding_sjis_kddi, - "SJIS-Mobile#KDDI", - "Shift_JIS", - mbfl_encoding_sjis_kddi_aliases, - mblen_table_sjis_mobile, - MBFL_ENCTYPE_GL_UNSAFE, - &vtbl_sjis_kddi_wchar, - &vtbl_wchar_sjis_kddi, - mb_sjis_kddi_to_wchar, - mb_wchar_to_sjis_kddi, - NULL -}; - -const mbfl_encoding mbfl_encoding_sjis_sb = { - mbfl_no_encoding_sjis_sb, - "SJIS-Mobile#SOFTBANK", - "Shift_JIS", - mbfl_encoding_sjis_sb_aliases, - mblen_table_sjis_mobile, - MBFL_ENCTYPE_GL_UNSAFE, - &vtbl_sjis_sb_wchar, - &vtbl_wchar_sjis_sb, - mb_sjis_sb_to_wchar, - mb_wchar_to_sjis_sb, - NULL -}; - -const struct mbfl_convert_vtbl vtbl_sjis_docomo_wchar = { - mbfl_no_encoding_sjis_docomo, - mbfl_no_encoding_wchar, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_sjis_mobile_wchar, - mbfl_filt_conv_sjis_wchar_flush, - NULL, -}; - -const struct mbfl_convert_vtbl vtbl_wchar_sjis_docomo = { - mbfl_no_encoding_wchar, - mbfl_no_encoding_sjis_docomo, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_wchar_sjis_mobile, - mbfl_filt_conv_sjis_mobile_flush, - NULL, -}; - -const struct mbfl_convert_vtbl vtbl_sjis_kddi_wchar = { - mbfl_no_encoding_sjis_kddi, - mbfl_no_encoding_wchar, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_sjis_mobile_wchar, - mbfl_filt_conv_sjis_wchar_flush, - NULL, -}; - -const struct mbfl_convert_vtbl vtbl_wchar_sjis_kddi = { - mbfl_no_encoding_wchar, - mbfl_no_encoding_sjis_kddi, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_wchar_sjis_mobile, - mbfl_filt_conv_sjis_mobile_flush, - NULL, -}; - -const struct mbfl_convert_vtbl vtbl_sjis_sb_wchar = { - mbfl_no_encoding_sjis_sb, - mbfl_no_encoding_wchar, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_sjis_mobile_wchar, - mbfl_filt_conv_sjis_wchar_flush, - NULL, -}; - -const struct mbfl_convert_vtbl vtbl_wchar_sjis_sb = { - mbfl_no_encoding_wchar, - mbfl_no_encoding_sjis_sb, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_wchar_sjis_mobile, - mbfl_filt_conv_sjis_mobile_flush, - NULL, -}; - -#define CK(statement) do { if ((statement) < 0) return (-1); } while (0) - -#define SJIS_ENCODE(c1,c2,s1,s2) \ - do { \ - s1 = c1; \ - s1--; \ - s1 >>= 1; \ - if ((c1) < 0x5f) { \ - s1 += 0x71; \ - } else { \ - s1 += 0xb1; \ - } \ - s2 = c2; \ - if ((c1) & 1) { \ - if ((c2) < 0x60) { \ - s2--; \ - } \ - s2 += 0x20; \ - } else { \ - s2 += 0x7e; \ - } \ - } while (0) - -#define SJIS_DECODE(c1,c2,s1,s2) \ - do { \ - s1 = c1; \ - if (s1 < 0xa0) { \ - s1 -= 0x81; \ - } else { \ - s1 -= 0xc1; \ - } \ - s1 <<= 1; \ - s1 += 0x21; \ - s2 = c2; \ - if (s2 < 0x9f) { \ - if (s2 < 0x7f) { \ - s2++; \ - } \ - s2 -= 0x20; \ - } else { \ - s1++; \ - s2 -= 0x7e; \ - } \ - } while (0) - -int mbfl_filt_conv_sjis_wchar(int c, mbfl_convert_filter *filter) -{ - int s1, s2, w; - - switch (filter->status) { - case 0: - if (c >= 0 && c < 0x80) { /* ASCII */ - CK((*filter->output_function)(c, filter->data)); - } else if (c > 0xA0 && c < 0xE0) { /* Kana */ - CK((*filter->output_function)(0xFEC0 + c, filter->data)); - } else if (c > 0x80 && c < 0xF0 && c != 0xA0) { /* Kanji, first byte */ - filter->status = 1; - filter->cache = c; - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - case 1: /* Kanji, second byte */ - filter->status = 0; - int c1 = filter->cache; - if (c >= 0x40 && c <= 0xFC && c != 0x7F) { - SJIS_DECODE(c1, c, s1, s2); - w = (s1 - 0x21)*94 + s2 - 0x21; - if (w >= 0 && w < jisx0208_ucs_table_size) { - w = jisx0208_ucs_table[w]; - if (!w) - w = MBFL_BAD_INPUT; - } else { - w = MBFL_BAD_INPUT; - } - CK((*filter->output_function)(w, filter->data)); - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - } - - return 0; -} - -static int mbfl_filt_conv_sjis_wchar_flush(mbfl_convert_filter *filter) -{ - if (filter->status && filter->status != 4) { - (*filter->output_function)(MBFL_BAD_INPUT, filter->data); - } - filter->status = 0; - - if (filter->flush_function) { - (*filter->flush_function)(filter->data); - } - - return 0; -} - -int mbfl_filt_conv_wchar_sjis(int c, mbfl_convert_filter *filter) -{ - int c1, c2, s1, s2; - - s1 = 0; - if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) { - s1 = ucs_a1_jis_table[c - ucs_a1_jis_table_min]; - } else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) { - s1 = ucs_a2_jis_table[c - ucs_a2_jis_table_min]; - } else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) { - s1 = ucs_i_jis_table[c - ucs_i_jis_table_min]; - } else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) { - s1 = ucs_r_jis_table[c - ucs_r_jis_table_min]; - } - if (s1 <= 0) { - if (c == 0xA5) { /* YEN SIGN */ - s1 = 0x216F; /* FULLWIDTH YEN SIGN */ - } else if (c == 0xAF || c == 0x203E) { /* U+00AF is MACRON, U+203E is OVERLINE */ - s1 = 0x2131; /* FULLWIDTH MACRON */ - } else if (c == 0xFF3C) { /* FULLWIDTH REVERSE SOLIDUS */ - s1 = 0x2140; - } else if (c == 0x2225) { /* PARALLEL TO */ - s1 = 0x2142; - } else if (c == 0xFF0D) { /* FULLWIDTH HYPHEN-MINUS */ - s1 = 0x215D; - } else if (c == 0xFFE0) { /* FULLWIDTH CENT SIGN */ - s1 = 0x2171; - } else if (c == 0xFFE1) { /* FULLWIDTH POUND SIGN */ - s1 = 0x2172; - } else if (c == 0xFFE2) { /* FULLWIDTH NOT SIGN */ - s1 = 0x224C; - } else if (c == 0) { - s1 = 0; - } else { - s1 = -1; - } - } else if (s1 >= 0x8080) { /* JIS X 0212; not supported */ - s1 = -1; - } - - if (s1 >= 0) { - if (s1 < 0x100) { /* Latin/Kana */ - CK((*filter->output_function)(s1, filter->data)); - } else { /* Kanji */ - c1 = (s1 >> 8) & 0xFF; - c2 = s1 & 0xFF; - SJIS_ENCODE(c1, c2, s1, s2); - CK((*filter->output_function)(s1, filter->data)); - CK((*filter->output_function)(s2, filter->data)); - } - } else { - CK(mbfl_filt_conv_illegal_output(c, filter)); - } - - return 0; -} - -static const unsigned short sjis_decode_tbl1[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xFFFF, 0, 188, 376, 564, 752, 940, 1128, 1316, 1504, 1692, 1880, 2068, 2256, 2444, 2632, 2820, 3008, 3196, 3384, 3572, 3760, 3948, 4136, 4324, 4512, 4700, 4888, 5076, 5264, 5452, 5640, 0xFFFF, -6016, -5828, -5640, -5452, -5264, -5076, -4888, -4700, -4512, -4324, -4136, -3948, -3760, -3572, -3384, -3196, -3008, -2820, -2632, -2444, -2256, -2068, -1880, -1692, -1504, -1316, -1128, -940, -752, -564, -376, -188, 0, 188, 376, 564, 752, 940, 1128, 1316, 1504, 1692, 1880, 2068, 2256, 2444, 2632, 2820, 3008, 3196, 3384, 3572, 3760, 3948, 4136, 4324, 4512, 4700, 4888, 5076, 5264, 5452, 5640, 5828, 6016, 6204, 6392, 6580, 6768, 6956, 7144, 7332, 7520, 7708, 7896, 8084, 8272, 8460, 8648, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF -}; - -static const unsigned short sjis_decode_tbl2[] = { - 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 0xFFFF, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 0xFFFF, 0xFFFF, 0xFFFF -}; - -static size_t mb_sjis_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) -{ - unsigned char *p = *in, *e = p + *in_len; - uint32_t *out = buf, *limit = buf + bufsize; - - e--; /* Stop the main loop 1 byte short of the end of the input */ - - while (p < e && out < limit) { - unsigned char c = *p++; - - if (c <= 0x7F) { - *out++ = c; - } else if (c >= 0xA1 && c <= 0xDF) { /* Kana */ - *out++ = 0xFEC0 + c; - } else { - /* Don't need to check p < e; it's not possible to go out of bounds here, due to e-- above */ - unsigned char c2 = *p++; - /* This is only legal if c2 >= 0x40 && c2 <= 0xFC && c2 != 0x7F - * But the values in the above conversion tables have been chosen such that - * illegal values of c2 will always result in w > jisx0208_ucs_table_size, - * so we don't need to do a separate bounds check on c2 - * Likewise, the values in the conversion tables are such that illegal values - * for c will always result in w > jisx0208_ucs_table_size */ - uint32_t w = sjis_decode_tbl1[c] + sjis_decode_tbl2[c2]; - if (w < jisx0208_ucs_table_size) { - w = jisx0208_ucs_table[w]; - if (!w) - w = MBFL_BAD_INPUT; - *out++ = w; - } else { - if (c == 0x80 || c == 0xA0 || c > 0xEF) { - p--; - } - *out++ = MBFL_BAD_INPUT; - } - } - } - - /* Finish up last byte of input string if there is one */ - if (p == e && out < limit) { - unsigned char c = *p++; - if (c <= 0x7F) { - *out++ = c; - } else if (c >= 0xA1 && c <= 0xDF) { - *out++ = 0xFEC0 + c; - } else { - *out++ = MBFL_BAD_INPUT; - } - } - - *in_len = e - p + 1; - *in = p; - return out - buf; -} - -static void mb_wchar_to_sjis(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) -{ - unsigned char *out, *limit; - MB_CONVERT_BUF_LOAD(buf, out, limit); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2); - - while (len--) { - uint32_t w = *in++; - unsigned int s = 0; - - if (w >= ucs_a1_jis_table_min && w < ucs_a1_jis_table_max) { - s = ucs_a1_jis_table[w - ucs_a1_jis_table_min]; - } else if (w >= ucs_a2_jis_table_min && w < ucs_a2_jis_table_max) { - s = ucs_a2_jis_table[w - ucs_a2_jis_table_min]; - } else if (w >= ucs_i_jis_table_min && w < ucs_i_jis_table_max) { - s = ucs_i_jis_table[w - ucs_i_jis_table_min]; - } else if (w >= ucs_r_jis_table_min && w < ucs_r_jis_table_max) { - s = ucs_r_jis_table[w - ucs_r_jis_table_min]; - } - - if (s == 0) { - if (w == 0xA5) { /* YEN SIGN */ - s = 0x216F; /* FULLWIDTH YEN SIGN */ - } else if (w == 0xAF || w == 0x203E) { - s = 0x2131; /* FULLWIDTH MACRON */ - } else if (w == 0xFF3C) { /* FULLWIDTH REVERSE SOLIDUS */ - s = 0x2140; - } else if (w == 0x2225) { /* PARALLEL TO */ - s = 0x2142; - } else if (w == 0xFF0D) { /* FULLWIDTH HYPHEN-MINUS */ - s = 0x215D; - } else if (w == 0xFFE0) { /* FULLWIDTH CENT SIGN */ - s = 0x2171; - } else if (w == 0xFFE1) { /* FULLWIDTH POUND SIGN */ - s = 0x2172; - } else if (w == 0xFFE2) { /* FULLWIDTH NOT SIGN */ - s = 0x224C; - } else if (w != 0) { - MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_sjis); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2); - continue; - } - } else if (s >= 0x8080) { /* JIS X 0212; not supported */ - MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_sjis); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2); - continue; - } - - if (s <= 0xFF) { - /* Latin/Kana */ - out = mb_convert_buf_add(out, s); - } else { - /* Kanji */ - unsigned int c1 = (s >> 8) & 0xFF, c2 = s & 0xFF, s2; - SJIS_ENCODE(c1, c2, s, s2); - out = mb_convert_buf_add2(out, s, s2); - } - } - - MB_CONVERT_BUF_STORE(buf, out, limit); -} - -static int mbfl_filt_conv_sjis_mac_wchar(int c, mbfl_convert_filter *filter) -{ - int i, j, n; - int c1, s, s1, s2, w; - - switch (filter->status) { - case 0: - if (c >= 0 && c < 0x80 && c != 0x5c) { /* latin */ - CK((*filter->output_function)(c, filter->data)); - } else if (c > 0xa0 && c < 0xe0) { /* kana */ - CK((*filter->output_function)(0xfec0 + c, filter->data)); - } else if (c > 0x80 && c <= 0xed && c != 0xa0) { /* kanji first char */ - filter->status = 1; - filter->cache = c; - } else if (c == 0x5c) { - CK((*filter->output_function)(0x00a5, filter->data)); - } else if (c == 0x80) { - CK((*filter->output_function)(0x005c, filter->data)); - } else if (c == 0xa0) { - CK((*filter->output_function)(0x00a0, filter->data)); - } else if (c == 0xfd) { - CK((*filter->output_function)(0x00a9, filter->data)); - } else if (c == 0xfe) { - CK((*filter->output_function)(0x2122, filter->data)); - } else if (c == 0xff) { - CK((*filter->output_function)(0x2026, filter->data)); - CK((*filter->output_function)(0xf87f, filter->data)); - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - case 1: /* kanji second char */ - filter->status = 0; - c1 = filter->cache; - if (c >= 0x40 && c <= 0xfc && c != 0x7f) { - w = 0; - SJIS_DECODE(c1, c, s1, s2); - s = (s1 - 0x21)*94 + s2 - 0x21; - if (s <= 0x89) { - if (s == 0x1c) { - w = 0x2014; /* EM DASH */ - } else if (s == 0x1f) { - w = 0xff3c; /* FULLWIDTH REVERSE SOLIDUS */ - } else if (s == 0x20) { - w = 0x301c; /* FULLWIDTH TILDE */ - } else if (s == 0x21) { - w = 0x2016; /* PARALLEL TO */ - } else if (s == 0x3c) { - w = 0x2212; /* FULLWIDTH HYPHEN-MINUS */ - } else if (s == 0x50) { - w = 0x00a2; /* FULLWIDTH CENT SIGN */ - } else if (s == 0x51) { - w = 0x00a3; /* FULLWIDTH POUND SIGN */ - } else if (s == 0x89) { - w = 0x00ac; /* FULLWIDTH NOT SIGN */ - } - } - - /* apple gaiji area 0x8540 - 0x886d */ - if (w == 0) { - for (i=0; i<7; i++) { - if (s >= code_tbl[i][0] && s <= code_tbl[i][1]) { - w = s - code_tbl[i][0] + code_tbl[i][2]; - break; - } - } - } - - if (w == 0) { - - for (i=0; ioutput_function)(code_tbl_m[i][j], filter->data)); - } - w = code_tbl_m[i][n-1]; - break; - } - } - } - - if (w == 0) { - for (i=0; i<8; i++) { - if (s >= code_ofst_tbl[i][0] && s <= code_ofst_tbl[i][1]) { - w = code_map[i][s - code_ofst_tbl[i][0]]; - if (w == 0) { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - return 0; - } - s2 = 0; - if (s >= 0x043e && s <= 0x0441) { - s2 = 0xf87a; - } else if (s == 0x03b1 || s == 0x03b7) { - s2 = 0xf87f; - } else if (s == 0x04b8 || s == 0x04b9 || s == 0x04c4) { - s2 = 0x20dd; - } else if (s == 0x1ed9 || s == 0x1eda || s == 0x1ee8 || s == 0x1ef3 || - (s >= 0x1ef5 && s <= 0x1efb) || s == 0x1f05 || s == 0x1f06 || - s == 0x1f18 || (s >= 0x1ff2 && s <= 0x20a5)) { - s2 = 0xf87e; - } - if (s2 > 0) { - CK((*filter->output_function)(w, filter->data)); - w = s2; - } - break; - } - } - } - - if (w == 0 && s >= 0 && s < jisx0208_ucs_table_size) { /* X 0208 */ - w = jisx0208_ucs_table[s]; - } - - if (w <= 0) { - w = MBFL_BAD_INPUT; - } - CK((*filter->output_function)(w, filter->data)); - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - EMPTY_SWITCH_DEFAULT_CASE(); - } - - return 0; -} - -static int mbfl_filt_conv_wchar_sjis_mac(int c, mbfl_convert_filter *filter) -{ - int i, c1, c2, s1 = 0, s2 = 0, mode; - - // a1: U+0000 -> U+046F - // a2: U+2000 -> U+30FF - // i: U+4E00 -> U+9FFF - // r: U+FF00 -> U+FFFF - - switch (filter->status) { - case 1: - c1 = filter->cache; - filter->cache = filter->status = 0; - - if (c == 0xf87a) { - for (i = 0; i < 4; i++) { - if (c1 == s_form_tbl[i+34+3+3]) { - s1 = s_form_sjis_tbl[i+34+3+3]; - break; - } - } - if (s1 <= 0) { - s2 = c1; - } - } else if (c == 0x20dd) { - for (i = 0; i < 3; i++) { - if (c1 == s_form_tbl[i+34+3]) { - s1 = s_form_sjis_tbl[i+34+3]; - break; - } - } - if (s1 <= 0) { - s2 = c1; - } - } else if (c == 0xf87f) { - for (i = 0; i < 3; i++) { - if (c1 == s_form_tbl[i+34]) { - s1 = s_form_sjis_tbl[i+34]; - break; - } - } - if (s1 <= 0) { - s2 = c1; - s1 = -1; - } - } else if (c == 0xf87e) { - for (i = 0; i < 34; i++) { - if (c1 == s_form_tbl[i]) { - s1 = s_form_sjis_tbl[i]; - break; - } - } - if (s1 <= 0) { - s2 = c1; - s1 = -1; - } - } else { - s2 = c1; - s1 = c; - } - - if (s2 > 0) { - for (i = 0; i < s_form_tbl_len; i++) { - if (c1 == s_form_tbl[i]) { - s1 = s_form_sjis_fallback_tbl[i]; - break; - } - } - } - - if (s1 >= 0) { - if (s1 < 0x100) { - CK((*filter->output_function)(s1, filter->data)); - } else { - CK((*filter->output_function)((s1 >> 8) & 0xff, filter->data)); - CK((*filter->output_function)(s1 & 0xff, filter->data)); - } - } else { - CK(mbfl_filt_conv_illegal_output(c, filter)); - } - - if (s2 <= 0 || s1 == -1) { - break; - } - s1 = s2 = 0; - ZEND_FALLTHROUGH; - - case 0: - if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) { - s1 = ucs_a1_jis_table[c - ucs_a1_jis_table_min]; - if (c == 0x5c) { - s1 = 0x80; - } else if (c == 0xa9) { - s1 = 0xfd; - } - } else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) { - s1 = ucs_a2_jis_table[c - ucs_a2_jis_table_min]; - if (c == 0x2122) { - s1 = 0xfe; - } else if (c == 0x2014) { - s1 = 0x213d; - } else if (c == 0x2116) { - s1 = 0x2c1d; - } - } else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) { - s1 = ucs_i_jis_table[c - ucs_i_jis_table_min]; - } else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) { - s1 = ucs_r_jis_table[c - ucs_r_jis_table_min]; - } - - if (c >= 0x2000) { - for (i = 0; i < s_form_tbl_len; i++) { - if (c == s_form_tbl[i]) { - filter->status = 1; - filter->cache = c; - return 0; - } - } - - if (c == 0xf860 || c == 0xf861 || c == 0xf862) { - /* Apple 'transcoding hint' codepoints (from private use area) */ - filter->status = 2; - filter->cache = c; - return 0; - } - } - - if (s1 <= 0) { - if (c == 0xa0) { - s1 = 0x00a0; - } else if (c == 0xa5) { /* YEN SIGN */ - /* Unicode has codepoint 0xFFE5 for a fullwidth Yen sign; - * convert codepoint 0xA5 to halfwidth Yen sign */ - s1 = 0x5c; /* HALFWIDTH YEN SIGN */ - } else if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */ - s1 = 0x2140; - } - } - - if (s1 <= 0) { - for (i=0; i= wchar2sjis_mac_r_tbl[i][0] && c <= wchar2sjis_mac_r_tbl[i][1]) { - s1 = c - wchar2sjis_mac_r_tbl[i][0] + wchar2sjis_mac_r_tbl[i][2]; - break; - } - } - - if (s1 <= 0) { - for (i=0; i= wchar2sjis_mac_r_map[i][0] && c <= wchar2sjis_mac_r_map[i][1]) { - s1 = wchar2sjis_mac_code_map[i][c-wchar2sjis_mac_r_map[i][0]]; - break; - } - } - } - - if (s1 <= 0) { - for (i=0; i 0) { - c1 = s1/94+0x21; - c2 = s1-94*(c1-0x21)+0x21; - s1 = (c1 << 8) | c2; - s2 = 1; - } - } - - if ((s1 <= 0) || (s1 >= 0x8080 && s2 == 0)) { /* not found or X 0212 */ - s1 = -1; - c1 = 0; - - if (c == 0) { - s1 = 0; - } else if (s1 <= 0) { - s1 = -1; - } - } - - if (s1 >= 0) { - if (s1 < 0x100) { /* latin or kana */ - CK((*filter->output_function)(s1, filter->data)); - } else { /* kanji */ - c1 = (s1 >> 8) & 0xff; - c2 = s1 & 0xff; - SJIS_ENCODE(c1, c2, s1, s2); - CK((*filter->output_function)(s1, filter->data)); - CK((*filter->output_function)(s2, filter->data)); - } - } else { - CK(mbfl_filt_conv_illegal_output(c, filter)); - } - break; - - case 2: - c1 = filter->cache; - filter->cache = 0; - filter->status = 0; - if (c1 == 0xf860) { - for (i = 0; i < 5; i++) { - if (c == code_tbl_m[i][2]) { - filter->cache = c | 0x10000; - filter->status = 3; - break; - } - } - } else if (c1 == 0xf861) { - for (i = 0; i < 3; i++) { - if (c == code_tbl_m[i+5][2]) { - filter->cache = c | 0x20000; - filter->status = 3; - break; - } - } - } else if (c1 == 0xf862) { - for (i = 0; i < 4; i++) { - if (c == code_tbl_m[i+5+3][2]) { - filter->cache = c | 0x40000; - filter->status = 3; - break; - } - } - } - - if (filter->status == 0) { - /* Didn't find any of expected codepoints after Apple transcoding hint */ - CK(mbfl_filt_conv_illegal_output(c1, filter)); - return mbfl_filt_conv_wchar_sjis_mac(c, filter); - } - break; - - case 3: - s1 = 0; - c1 = filter->cache & 0xffff; - mode = (filter->cache & 0xf0000) >> 16; - - filter->cache = filter->status = 0; - - if (mode == 0x1) { - for (i = 0; i < 5; i++) { - if (c1 == code_tbl_m[i][2] && c == code_tbl_m[i][3]) { - s1 = code_tbl_m[i][0]; - break; - } - } - - if (s1 > 0) { - c1 = s1/94+0x21; - c2 = s1-94*(c1-0x21)+0x21; - SJIS_ENCODE(c1, c2, s1, s2); - CK((*filter->output_function)(s1, filter->data)); - CK((*filter->output_function)(s2, filter->data)); - } else { - CK(mbfl_filt_conv_illegal_output(0xf860, filter)); - CK(mbfl_filt_conv_illegal_output(c1, filter)); - CK(mbfl_filt_conv_illegal_output(c, filter)); - } - } else if (mode == 0x2) { - for (i = 0; i < 3; i++) { - if (c1 == code_tbl_m[i+5][2] && c == code_tbl_m[i+5][3]) { - filter->cache = c | 0x20000; - filter->status = 4; - break; - } - } - } else if (mode == 0x4) { - for (i = 0; i < 4; i++) { - if (c1 == code_tbl_m[i+8][2] && c == code_tbl_m[i+8][3]) { - filter->cache = c | 0x40000; - filter->status = 4; - break; - } - } - } - break; - - case 4: - s1 = 0; - c1 = filter->cache & 0xffff; - mode = (filter->cache & 0xf0000) >> 16; - - filter->cache = 0; - filter->status = 0; - - if (mode == 0x2) { - for (i = 0; i < 3; i++) { - if (c1 == code_tbl_m[i+5][3] && c == code_tbl_m[i+5][4]) { - s1 = code_tbl_m[i+5][0]; - break; - } - } - - if (s1 > 0) { - c1 = s1/94+0x21; - c2 = s1-94*(c1-0x21)+0x21; - SJIS_ENCODE(c1, c2, s1, s2); - CK((*filter->output_function)(s1, filter->data)); - CK((*filter->output_function)(s2, filter->data)); - } else { - CK(mbfl_filt_conv_illegal_output(0xf861, filter)); - for (i = 0; i < 3; i++) { - if (c1 == code_tbl_m[i+5][3]) { - CK(mbfl_filt_conv_illegal_output(code_tbl_m[i+5][2], filter)); - break; - } - } - CK(mbfl_filt_conv_illegal_output(c1, filter)); - CK(mbfl_filt_conv_illegal_output(c, filter)); - } - } else if (mode == 0x4) { - for (i = 0; i < 4; i++) { - if (c1 == code_tbl_m[i+8][3] && c == code_tbl_m[i+8][4]) { - filter->cache = c | 0x40000; - filter->status = 5; - break; - } - } - } - break; - - case 5: - s1 = 0; - c1 = filter->cache & 0xffff; - mode = (filter->cache & 0xf0000) >> 16; - - filter->cache = filter->status = 0; - - if (mode == 0x4) { - for (i = 0; i < 4; i++) { - if (c1 == code_tbl_m[i+8][4] && c == code_tbl_m[i+8][5]) { - s1 = code_tbl_m[i+8][0]; - break; - } - } - - if (s1 > 0) { - c1 = s1/94+0x21; - c2 = s1-94*(c1-0x21)+0x21; - SJIS_ENCODE(c1, c2, s1, s2); - CK((*filter->output_function)(s1, filter->data)); - CK((*filter->output_function)(s2, filter->data)); - } else { - CK(mbfl_filt_conv_illegal_output(0xf862, filter)); - for (i = 0; i < 4; i++) { - if (c1 == code_tbl_m[i+8][4]) { - CK(mbfl_filt_conv_illegal_output( code_tbl_m[i+8][2], filter)); - CK(mbfl_filt_conv_illegal_output( code_tbl_m[i+8][3], filter)); - break; - } - } - CK(mbfl_filt_conv_illegal_output(c1, filter)); - CK(mbfl_filt_conv_illegal_output(c, filter)); - } - } - break; - - EMPTY_SWITCH_DEFAULT_CASE(); - } - - return 0; -} - -static int mbfl_filt_conv_wchar_sjis_mac_flush(mbfl_convert_filter *filter) -{ - int i, c1, s1 = 0; - if (filter->status == 1 && filter->cache > 0) { - c1 = filter->cache; - for (i=0;i 0) { - CK((*filter->output_function)((s1 >> 8) & 0xff, filter->data)); - CK((*filter->output_function)(s1 & 0xff, filter->data)); - } - } - filter->cache = 0; - filter->status = 0; - - if (filter->flush_function != NULL) { - return (*filter->flush_function)(filter->data); - } - - return 0; -} - -static size_t mb_sjismac_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) -{ - /* A single SJIS-Mac kuten code can convert to up to 5 Unicode codepoints, oh my! */ - ZEND_ASSERT(bufsize >= 5); - - unsigned char *p = *in, *e = p + *in_len; - uint32_t *out = buf, *limit = buf + bufsize; - - while (p < e && out < limit) { - unsigned char c = *p++; - - if (c <= 0x80 || c == 0xA0) { - if (c == 0x5C) { - *out++ = 0xA5; - } else if (c == 0x80) { - *out++ = 0x5C; - } else { - *out++ = c; - } - } else if (c >= 0xA1 && c <= 0xDF) { - *out++ = 0xFEC0 + c; - } else if (c <= 0xED) { - if (p == e) { - *out++ = MBFL_BAD_INPUT; - break; - } - unsigned char c2 = *p++; - uint32_t w = sjis_decode_tbl1[c] + sjis_decode_tbl2[c2]; - - if (w <= 0x89) { - if (w == 0x1C) { - *out++ = 0x2014; /* EM DASH */ - continue; - } else if (w == 0x1F) { - *out++ = 0xFF3C; /* FULLWIDTH REVERSE SOLIDUS */ - continue; - } else if (w == 0x20) { - *out++ = 0x301C; /* FULLWIDTH TILDE */ - continue; - } else if (w == 0x21) { - *out++ = 0x2016; /* PARALLEL TO */ - continue; - } else if (w == 0x3C) { - *out++ = 0x2212; /* FULLWIDTH HYPHEN-MINUS */ - continue; - } else if (w == 0x50) { - *out++ = 0xA2; /* FULLWIDTH CENT SIGN */ - continue; - } else if (w == 0x51) { - *out++ = 0xA3; /* FULLWIDTH POUND SIGN */ - continue; - } else if (w == 0x89) { - *out++ = 0xAC; /* FULLWIDTH NOT SIGN */ - continue; - } - } else { - if (w >= 0x2F0 && w <= 0x3A3) { - for (int i = 0; i < 7; i++) { - if (w >= code_tbl[i][0] && w <= code_tbl[i][1]) { - *out++ = w - code_tbl[i][0] + code_tbl[i][2]; - goto next_iteration; - } - } - } - - if (w >= 0x340 && w <= 0x523) { - for (int i = 0; i < code_tbl_m_len; i++) { - if (w == code_tbl_m[i][0]) { - int n = 5; - if (code_tbl_m[i][1] == 0xF860) { - n = 3; - } else if (code_tbl_m[i][1] == 0xF861) { - n = 4; - } - if ((limit - out) < n) { - p -= 2; - goto finished; - } - for (int j = 1; j <= n; j++) { - *out++ = code_tbl_m[i][j]; - } - goto next_iteration; - } - } - } - - if (w >= 0x3AC && w <= 0x20A5) { - for (int i = 0; i < 8; i++) { - if (w >= code_ofst_tbl[i][0] && w <= code_ofst_tbl[i][1]) { - uint32_t w2 = code_map[i][w - code_ofst_tbl[i][0]]; - if (!w2) { - *out++ = MBFL_BAD_INPUT; - goto next_iteration; - } - if ((limit - out) < 2) { - p -= 2; - goto finished; - } - *out++ = w2; - if (w >= 0x43E && w <= 0x441) { - *out++ = 0xF87A; - } else if (w == 0x3B1 || w == 0x3B7) { - *out++ = 0xF87F; - } else if (w == 0x4B8 || w == 0x4B9 || w == 0x4C4) { - *out++ = 0x20DD; - } else if (w == 0x1ED9 || w == 0x1EDA || w == 0x1EE8 || w == 0x1EF3 || (w >= 0x1EF5 && w <= 0x1EFB) || w == 0x1F05 || w == 0x1F06 || w == 0x1F18 || (w >= 0x1FF2 && w <= 0x20A5)) { - *out++ = 0xF87E; - } - goto next_iteration; - } - } - } - } - - if (w < jisx0208_ucs_table_size) { - w = jisx0208_ucs_table[w]; - if (!w) - w = MBFL_BAD_INPUT; - *out++ = w; - } else { - *out++ = MBFL_BAD_INPUT; - } - } else if (c == 0xFD) { - *out++ = 0xA9; - } else if (c == 0xFE) { - *out++ = 0x2122; - } else if (c == 0xFF) { - if ((limit - out) < 2) { - p--; - break; - } - *out++ = 0x2026; - *out++ = 0xF87F; - } else { - *out++ = MBFL_BAD_INPUT; - } -next_iteration: ; - } - -finished: - *in_len = e - p; - *in = p; - return out - buf; -} - -static bool process_s_form(uint32_t w, uint32_t w2, unsigned int *s) -{ - if (w2 == 0xF87A) { - for (int i = 0; i < 4; i++) { - if (w == s_form_tbl[i+34+3+3]) { - *s = s_form_sjis_tbl[i+34+3+3]; - return true; - } - } - } else if (w2 == 0x20DD) { - for (int i = 0; i < 3; i++) { - if (w == s_form_tbl[i+34+3]) { - *s = s_form_sjis_tbl[i+34+3]; - return true; - } - } - } else if (w2 == 0xF87F) { - for (int i = 0; i < 3; i++) { - if (w == s_form_tbl[i+34]) { - *s = s_form_sjis_tbl[i+34]; - return true; - } - } - } else if (w2 == 0xF87E) { - for (int i = 0; i < 34; i++) { - if (w == s_form_tbl[i]) { - *s = s_form_sjis_tbl[i]; - return true; - } - } - } - - return false; -} - -/* For codepoints F860-F862, which are treated specially in MacJapanese */ -static int transcoding_hint_cp_width[3] = { 3, 4, 5 }; - -static void mb_wchar_to_sjismac(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) -{ - unsigned char *out, *limit; - MB_CONVERT_BUF_LOAD(buf, out, limit); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len); - - uint32_t w; - - if (buf->state) { - w = buf->state & 0xFFFF; - if (buf->state & 0xFF000000L) { - goto resume_transcoding_hint; - } else { - buf->state = 0; - goto process_codepoint; - } - } - - while (len--) { - w = *in++; -process_codepoint: ; - unsigned int s = 0; - - if (w >= ucs_a1_jis_table_min && w < ucs_a1_jis_table_max) { - if (w == 0x5C) { - s = 0x80; - } else if (w == 0xA9) { - s = 0xFD; - } else { - s = ucs_a1_jis_table[w - ucs_a1_jis_table_min]; - } - } else if (w >= ucs_a2_jis_table_min && w < ucs_a2_jis_table_max) { - if (w == 0x2122) { - s = 0xFE; - } else if (w == 0x2014) { - s = 0x213D; - } else if (w == 0x2116) { - s = 0x2C1D; - } else { - s = ucs_a2_jis_table[w - ucs_a2_jis_table_min]; - } - } else if (w >= ucs_i_jis_table_min && w < ucs_i_jis_table_max) { - s = ucs_i_jis_table[w - ucs_i_jis_table_min]; - } else if (w >= ucs_r_jis_table_min && w < ucs_r_jis_table_max) { - s = ucs_r_jis_table[w - ucs_r_jis_table_min]; - } - - if (w >= 0x2000) { - for (int i = 0; i < s_form_tbl_len; i++) { - if (w == s_form_tbl[i]) { - if (!len) { - if (end) { - s = s_form_sjis_fallback_tbl[i]; - if (s) { - MB_CONVERT_BUF_ENSURE(buf, out, limit, 2); - out = mb_convert_buf_add2(out, (s >> 8) & 0xFF, s & 0xFF); - } else { - MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_sjismac); - } - } else { - buf->state = w; - } - MB_CONVERT_BUF_STORE(buf, out, limit); - return; - } - uint32_t w2 = *in++; - len--; - - if (!process_s_form(w, w2, &s)) { - in--; len++; - - for (int i = 0; i < s_form_tbl_len; i++) { - if (w == s_form_tbl[i]) { - s = s_form_sjis_fallback_tbl[i]; - break; - } - } - } - - if (s <= 0xFF) { - out = mb_convert_buf_add(out, s); - } else { - MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2); - out = mb_convert_buf_add2(out, (s >> 8) & 0xFF, s & 0xFF); - } - - goto next_iteration; - } - } - - if (w == 0xF860 || w == 0xF861 || w == 0xF862) { - /* Apple 'transcoding hint' codepoints (from private use area) */ - if (!len) { - if (end) { - MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_sjismac); - } else { - buf->state = w; - } - MB_CONVERT_BUF_STORE(buf, out, limit); - return; - } - - uint32_t w2 = *in++; - len--; - - for (int i = 0; i < code_tbl_m_len; i++) { - if (w == code_tbl_m[i][1] && w2 == code_tbl_m[i][2]) { - /* This might be a valid transcoding hint sequence */ - int index = 3; - - if (buf->state) { -resume_transcoding_hint: - i = buf->state >> 24; - index = (buf->state >> 16) & 0xFF; - buf->state = 0; - } - - int expected = transcoding_hint_cp_width[w - 0xF860]; - - while (index <= expected) { - if (!len) { - if (end) { - for (int j = 1; j < index; j++) { - MB_CONVERT_ERROR(buf, out, limit, code_tbl_m[i][j], mb_wchar_to_sjismac); - } - } else { - buf->state = (i << 24) | (index << 16) | (w & 0xFFFF); - } - MB_CONVERT_BUF_STORE(buf, out, limit); - return; - } - - w2 = *in++; - len--; - - if (w2 != code_tbl_m[i][index]) { - /* Didn't match */ - for (int j = 1; j < index; j++) { - MB_CONVERT_ERROR(buf, out, limit, code_tbl_m[i][j], mb_wchar_to_sjismac); - } - MB_CONVERT_ERROR(buf, out, limit, w2, mb_wchar_to_sjismac); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len); - goto next_iteration; - } - - index++; - } - - /* Successful match, emit SJIS-mac bytes */ - s = code_tbl_m[i][0]; - unsigned int c1 = (s / 94) + 0x21, c2 = (s % 94) + 0x21, s1, s2; - SJIS_ENCODE(c1, c2, s1, s2); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2); - out = mb_convert_buf_add2(out, s1, s2); - goto next_iteration; - } - } - - /* No valid transcoding hint sequence found */ - in--; len++; - MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_sjismac); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len); - continue; - } - } - - if (!s) { - if (w == 0xA0) { - s = 0xA0; - } else if (w == 0xA5) { /* YEN SIGN */ - /* Unicode has codepoint 0xFFE5 for a fullwidth Yen sign; - * convert codepoint 0xA5 to halfwidth Yen sign */ - s = 0x5C; /* HALFWIDTH YEN SIGN */ - } else if (w == 0xFF3C) { /* FULLWIDTH REVERSE SOLIDUS */ - s = 0x2140; - } else { - for (int i = 0; i < wchar2sjis_mac_r_tbl_len; i++) { - if (w >= wchar2sjis_mac_r_tbl[i][0] && w <= wchar2sjis_mac_r_tbl[i][1]) { - s = w - wchar2sjis_mac_r_tbl[i][0] + wchar2sjis_mac_r_tbl[i][2]; - s = (((s / 94) + 0x21) << 8) | ((s % 94) + 0x21); - goto found_kuten_code; - } - } - - for (int i = 0; i < wchar2sjis_mac_r_map_len; i++) { - if (w >= wchar2sjis_mac_r_map[i][0] && w <= wchar2sjis_mac_r_map[i][1]) { - s = wchar2sjis_mac_code_map[i][w - wchar2sjis_mac_r_map[i][0]]; - if (s) { - s = (((s / 94) + 0x21) << 8) | ((s % 94) + 0x21); - goto found_kuten_code; - } - } - } - - for (int i = 0; i < wchar2sjis_mac_wchar_tbl_len; i++) { - if (w == wchar2sjis_mac_wchar_tbl[i][0]) { - s = wchar2sjis_mac_wchar_tbl[i][1]; - s = (((s / 94) + 0x21) << 8) | ((s % 94) + 0x21); - goto found_kuten_code; - } - } - } - } - -found_kuten_code: - if ((!s && w) || s >= 0x8080) { - MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_sjismac); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len); - } else if (s <= 0xFF) { - out = mb_convert_buf_add(out, s); - } else { - unsigned int c1 = (s >> 8) & 0xFF, c2 = s & 0xFF, s1, s2; - SJIS_ENCODE(c1, c2, s1, s2); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2); - out = mb_convert_buf_add2(out, s1, s2); - } - -next_iteration: ; - } - - MB_CONVERT_BUF_STORE(buf, out, limit); -} - -static const char nflags_s[10][2] = {"CN","DE","ES","FR","GB","IT","JP","KR","RU","US"}; -static const int nflags_code_kddi[10] = {0x2549, 0x2546, 0x24c0, 0x2545, 0x2548, 0x2547, 0x2750, 0x254a, 0x24c1, 0x27f7}; -static const int nflags_code_sb[10] = {0x2b0a, 0x2b05, 0x2b08, 0x2b04, 0x2b07, 0x2b06, 0x2b02, 0x2b0b, 0x2b09, 0x2b03}; - -const unsigned short mbfl_docomo2uni_pua[4][3] = { - {0x28c2, 0x292f, 0xe63e}, - {0x2930, 0x2934, 0xe6ac}, - {0x2935, 0x2951, 0xe6b1}, - {0x2952, 0x29db, 0xe6ce}, -}; - -const unsigned short mbfl_kddi2uni_pua[7][3] = { - {0x26ec, 0x2838, 0xe468}, - {0x284c, 0x2863, 0xe5b5}, - {0x24b8, 0x24ca, 0xe5cd}, - {0x24cb, 0x2545, 0xea80}, - {0x2839, 0x284b, 0xeafb}, - {0x2546, 0x25c0, 0xeb0e}, - {0x25c1, 0x25c6, 0xeb89}, -}; - -const unsigned short mbfl_sb2uni_pua[6][3] = { - {0x27a9, 0x2802, 0xe101}, - {0x2808, 0x2861, 0xe201}, - {0x2921, 0x297a, 0xe001}, - {0x2980, 0x29cc, 0xe301}, - {0x2a99, 0x2ae4, 0xe401}, - {0x2af8, 0x2b35, 0xe501}, -}; - -const unsigned short mbfl_kddi2uni_pua_b[8][3] = { - {0x24b8, 0x24f6, 0xec40}, - {0x24f7, 0x2573, 0xec80}, - {0x2574, 0x25b2, 0xed40}, - {0x25b3, 0x25c6, 0xed80}, - {0x26ec, 0x272a, 0xef40}, - {0x272b, 0x27a7, 0xef80}, - {0x27a8, 0x27e6, 0xf040}, - {0x27e7, 0x2863, 0xf080}, -}; - -/* Regional Indicator Unicode codepoints are from 0x1F1E6-0x1F1FF - * These correspond to the letters A-Z - * To display the flag emoji for a country, two unicode codepoints are combined, - * which correspond to the two-letter code for that country - * This macro converts uppercase ASCII values to Regional Indicator codepoints */ -#define NFLAGS(c) (0x1F1A5+(int)(c)) - -int mbfilter_conv_map_tbl(int c, int *w, const unsigned short map[][3], int n) -{ - for (int i = 0; i < n; i++) { - if (map[i][0] <= c && c <= map[i][1]) { - *w = c - map[i][0] + map[i][2]; - return 1; - } - } - return 0; -} - -int mbfilter_conv_r_map_tbl(int c, int *w, const unsigned short map[][3], int n) -{ - /* Convert in reverse direction */ - for (int i = 0; i < n; i++) { - if (map[i][2] <= c && c <= map[i][2] - map[i][0] + map[i][1]) { - *w = c + map[i][0] - map[i][2]; - return 1; - } - } - return 0; -} - -/* number -> (ku*94)+ten value for telephone keypad character */ -#define DOCOMO_KEYPAD(n) ((n) == 0 ? 0x296F : (0x2965 + (n))) -#define DOCOMO_KEYPAD_HASH 0x2964 - -#define EMIT_KEYPAD_EMOJI(c) do { *snd = (c); return 0x20E3; } while(0) - -/* Unicode codepoints for emoji are above 0x1F000, but we only store 16-bits - * in our tables. Therefore, add 0x10000 to recover the true values. - * - * Again, for some emoji which are not supported by Unicode, we use codepoints - * in the Private Use Area above 0xFE000. Again, add 0xF0000 to recover the - * true value. */ -static inline int convert_emoji_cp(int cp) -{ - if (cp > 0xF000) - return cp + 0x10000; - else if (cp > 0xE000) - return cp + 0xF0000; - return cp; -} - -int mbfilter_sjis_emoji_docomo2unicode(int s, int *snd) -{ - /* All three mobile vendors had emoji for numbers on a telephone keypad - * Unicode doesn't have those, but it has a combining character which puts - * a 'keypad button' around the following character, making it look like - * a key on a telephone or keyboard. That combining char is codepoint 0x20E3. */ - if (s >= mb_tbl_code2uni_docomo1_min && s <= mb_tbl_code2uni_docomo1_max) { - if ((s >= DOCOMO_KEYPAD(1) && s <= DOCOMO_KEYPAD(9)) || s == DOCOMO_KEYPAD(0) || s == DOCOMO_KEYPAD_HASH) { - EMIT_KEYPAD_EMOJI(convert_emoji_cp(mb_tbl_code2uni_docomo1[s - mb_tbl_code2uni_docomo1_min])); - } else { - *snd = 0; - return convert_emoji_cp(mb_tbl_code2uni_docomo1[s - mb_tbl_code2uni_docomo1_min]); - } - } - return 0; -} - -#define EMIT_FLAG_EMOJI(country) do { *snd = NFLAGS((country)[0]); return NFLAGS((country)[1]); } while(0) - -static const char nflags_kddi[6][2] = {"FR", "DE", "IT", "GB", "CN", "KR"}; - -int mbfilter_sjis_emoji_kddi2unicode(int s, int *snd) -{ - if (s >= mb_tbl_code2uni_kddi1_min && s <= mb_tbl_code2uni_kddi1_max) { - if (s == 0x24C0) { /* Spain */ - EMIT_FLAG_EMOJI("ES"); - } else if (s == 0x24C1) { /* Russia */ - EMIT_FLAG_EMOJI("RU"); - } else if (s >= 0x2545 && s <= 0x254A) { - EMIT_FLAG_EMOJI(nflags_kddi[s - 0x2545]); - } else if (s == 0x25BC) { - EMIT_KEYPAD_EMOJI('#'); - } else { - *snd = 0; - return convert_emoji_cp(mb_tbl_code2uni_kddi1[s - mb_tbl_code2uni_kddi1_min]); - } - } else if (s >= mb_tbl_code2uni_kddi2_min && s <= mb_tbl_code2uni_kddi2_max) { - if (s == 0x2750) { /* Japan */ - EMIT_FLAG_EMOJI("JP"); - } else if (s >= 0x27A6 && s <= 0x27AE) { - EMIT_KEYPAD_EMOJI(s - 0x27A6 + '1'); - } else if (s == 0x27F7) { /* United States */ - EMIT_FLAG_EMOJI("US"); - } else if (s == 0x2830) { - EMIT_KEYPAD_EMOJI('0'); - } else { - *snd = 0; - return convert_emoji_cp(mb_tbl_code2uni_kddi2[s - mb_tbl_code2uni_kddi2_min]); - } - } - return 0; -} - -static const char nflags_sb[10][2] = {"JP", "US", "FR", "DE", "IT", "GB", "ES", "RU", "CN", "KR"}; - -int mbfilter_sjis_emoji_sb2unicode(int s, int *snd) -{ - if (s >= mb_tbl_code2uni_sb1_min && s <= mb_tbl_code2uni_sb1_max) { - if (s == 0x2817 || (s >= 0x2823 && s <= 0x282C)) { - EMIT_KEYPAD_EMOJI(mb_tbl_code2uni_sb1[s - mb_tbl_code2uni_sb1_min]); - } else { - *snd = 0; - return convert_emoji_cp(mb_tbl_code2uni_sb1[s - mb_tbl_code2uni_sb1_min]); - } - } else if (s >= mb_tbl_code2uni_sb2_min && s <= mb_tbl_code2uni_sb2_max) { - *snd = 0; - return convert_emoji_cp(mb_tbl_code2uni_sb2[s - mb_tbl_code2uni_sb2_min]); - } else if (s >= mb_tbl_code2uni_sb3_min && s <= mb_tbl_code2uni_sb3_max) { - if (s >= 0x2B02 && s <= 0x2B0B) { - EMIT_FLAG_EMOJI(nflags_sb[s - 0x2B02]); - } else { - *snd = 0; - return convert_emoji_cp(mb_tbl_code2uni_sb3[s - mb_tbl_code2uni_sb3_min]); - } - } - return 0; -} - -int mbfilter_unicode2sjis_emoji_docomo(int c, int *s1, mbfl_convert_filter *filter) -{ - /* When converting SJIS-Mobile to Unicode, we convert keypad symbol emoji - * to a sequence of 2 codepoints, one of which is a combining character which - * adds the 'key' image around the other - * - * In the other direction, look for such sequences and convert them to a - * single emoji */ - if (filter->status == 1) { - int c1 = filter->cache; - filter->cache = filter->status = 0; - if (c == 0x20E3) { - if (c1 == '#') { - *s1 = 0x2964; - } else if (c1 == '0') { - *s1 = 0x296F; - } else { /* Previous character was '1'-'9' */ - *s1 = 0x2966 + (c1 - '1'); - } - return 1; - } else { - /* This character wasn't combining character to make keypad symbol, - * so pass the previous character through... and proceed to process the - * current character as usual - * (Single-byte ASCII characters are valid in Shift-JIS...) */ - CK((*filter->output_function)(c1, filter->data)); - } - } - - if (c == '#' || (c >= '0' && c <= '9')) { - filter->status = 1; - filter->cache = c; - return 0; - } - - if (c == 0xA9) { /* Copyright sign */ - *s1 = 0x29B5; - return 1; - } else if (c == 0x00AE) { /* Registered sign */ - *s1 = 0x29BA; - return 1; - } else if (c >= mb_tbl_uni_docomo2code2_min && c <= mb_tbl_uni_docomo2code2_max) { - int i = mbfl_bisec_srch2(c, mb_tbl_uni_docomo2code2_key, mb_tbl_uni_docomo2code2_len); - if (i >= 0) { - *s1 = mb_tbl_uni_docomo2code2_value[i]; - return 1; - } - } else if (c >= mb_tbl_uni_docomo2code3_min && c <= mb_tbl_uni_docomo2code3_max) { - int i = mbfl_bisec_srch2(c - 0x10000, mb_tbl_uni_docomo2code3_key, mb_tbl_uni_docomo2code3_len); - if (i >= 0) { - *s1 = mb_tbl_uni_docomo2code3_value[i]; - return 1; - } - } else if (c >= mb_tbl_uni_docomo2code5_min && c <= mb_tbl_uni_docomo2code5_max) { - int i = mbfl_bisec_srch2(c - 0xF0000, mb_tbl_uni_docomo2code5_key, mb_tbl_uni_docomo2code5_len); - if (i >= 0) { - *s1 = mb_tbl_uni_docomo2code5_val[i]; - return 1; - } - } - return 0; -} - -int mbfilter_unicode2sjis_emoji_kddi(int c, int *s1, mbfl_convert_filter *filter) -{ - if (filter->status == 1) { - int c1 = filter->cache; - filter->cache = filter->status = 0; - if (c == 0x20E3) { - if (c1 == '#') { - *s1 = 0x25BC; - } else if (c1 == '0') { - *s1 = 0x2830; - } else { /* Previous character was '1'-'9' */ - *s1 = 0x27a6 + (c1 - '1'); - } - return 1; - } else { - CK((*filter->output_function)(c1, filter->data)); - } - } else if (filter->status == 2) { - int c1 = filter->cache; - filter->cache = filter->status = 0; - if (c >= NFLAGS('B') && c <= NFLAGS('U')) { /* B for GB, U for RU */ - for (int i = 0; i < 10; i++) { - if (c1 == NFLAGS(nflags_s[i][0]) && c == NFLAGS(nflags_s[i][1])) { - *s1 = nflags_code_kddi[i]; - return 1; - } - } - } - - /* If none of the KDDI national flag emoji matched, then we have no way - * to convert the previous codepoint... */ - mbfl_filt_conv_illegal_output(c1, filter); - } - - if (c == '#' || (c >= '0' && c <= '9')) { - filter->status = 1; - filter->cache = c; - return 0; - } else if (c >= NFLAGS('C') && c <= NFLAGS('U')) { /* C for CN, U for US */ - filter->status = 2; - filter->cache = c; - return 0; - } - - if (c == 0xA9) { /* Copyright sign */ - *s1 = 0x27DC; - return 1; - } else if (c == 0xAE) { /* Registered sign */ - *s1 = 0x27DD; - return 1; - } else if (c >= mb_tbl_uni_kddi2code2_min && c <= mb_tbl_uni_kddi2code2_max) { - int i = mbfl_bisec_srch2(c, mb_tbl_uni_kddi2code2_key, mb_tbl_uni_kddi2code2_len); - if (i >= 0) { - *s1 = mb_tbl_uni_kddi2code2_value[i]; - return 1; - } - } else if (c >= mb_tbl_uni_kddi2code3_min && c <= mb_tbl_uni_kddi2code3_max) { - int i = mbfl_bisec_srch2(c - 0x10000, mb_tbl_uni_kddi2code3_key, mb_tbl_uni_kddi2code3_len); - if (i >= 0) { - *s1 = mb_tbl_uni_kddi2code3_value[i]; - return 1; - } - } else if (c >= mb_tbl_uni_kddi2code5_min && c <= mb_tbl_uni_kddi2code5_max) { - int i = mbfl_bisec_srch2(c - 0xF0000, mb_tbl_uni_kddi2code5_key, mb_tbl_uni_kddi2code5_len); - if (i >= 0) { - *s1 = mb_tbl_uni_kddi2code5_val[i]; - return 1; - } - } - return 0; -} - -int mbfilter_unicode2sjis_emoji_sb(int c, int *s1, mbfl_convert_filter *filter) -{ - if (filter->status == 1) { - int c1 = filter->cache; - filter->cache = filter->status = 0; - if (c == 0x20E3) { - if (c1 == '#') { - *s1 = 0x2817; - } else if (c1 == '0') { - *s1 = 0x282c; - } else { /* Previous character was '1'-'9' */ - *s1 = 0x2823 + (c1 - '1'); - } - return 1; - } else { - (*filter->output_function)(c1, filter->data); - } - } else if (filter->status == 2) { - int c1 = filter->cache; - filter->cache = filter->status = 0; - if (c >= NFLAGS('B') && c <= NFLAGS('U')) { /* B for GB, U for RU */ - for (int i = 0; i < 10; i++) { - if (c1 == NFLAGS(nflags_s[i][0]) && c == NFLAGS(nflags_s[i][1])) { - *s1 = nflags_code_sb[i]; - return 1; - } - } - } - - /* If none of the SoftBank national flag emoji matched, then we have no way - * to convert the previous codepoint... */ - mbfl_filt_conv_illegal_output(c1, filter); - } - - if (c == '#' || (c >= '0' && c <= '9')) { - filter->status = 1; - filter->cache = c; - return 0; - } else if (c >= NFLAGS('C') && c <= NFLAGS('U')) { /* C for CN, U for US */ - filter->status = 2; - filter->cache = c; - return 0; - } - - if (c == 0xA9) { /* Copyright sign */ - *s1 = 0x2855; - return 1; - } else if (c == 0xAE) { /* Registered sign */ - *s1 = 0x2856; - return 1; - } else if (c >= mb_tbl_uni_sb2code2_min && c <= mb_tbl_uni_sb2code2_max) { - int i = mbfl_bisec_srch2(c, mb_tbl_uni_sb2code2_key, mb_tbl_uni_sb2code2_len); - if (i >= 0) { - *s1 = mb_tbl_uni_sb2code2_value[i]; - return 1; - } - } else if (c >= mb_tbl_uni_sb2code3_min && c <= mb_tbl_uni_sb2code3_max) { - int i = mbfl_bisec_srch2(c - 0x10000, mb_tbl_uni_sb2code3_key, mb_tbl_uni_sb2code3_len); - if (i >= 0) { - *s1 = mb_tbl_uni_sb2code3_value[i]; - return 1; - } - } else if (c >= mb_tbl_uni_sb2code5_min && c <= mb_tbl_uni_sb2code5_max) { - int i = mbfl_bisec_srch2(c - 0xF0000, mb_tbl_uni_sb2code5_key, mb_tbl_uni_sb2code5_len); - if (i >= 0) { - *s1 = mb_tbl_uni_sb2code5_val[i]; - return 1; - } - } - return 0; -} - -static int mbfl_filt_conv_sjis_mobile_wchar(int c, mbfl_convert_filter *filter) -{ - int c1, s, s1, s2, w, snd = 0; - - switch (filter->status) { - case 0: - if (c >= 0 && c < 0x80) { /* ASCII */ - if (filter->from == &mbfl_encoding_sjis_sb && c == 0x1B) { - /* ESC; escape sequences were used on older SoftBank phones for emoji */ - filter->cache = c; - filter->status = 2; - } else { - CK((*filter->output_function)(c, filter->data)); - } - } else if (c > 0xA0 && c < 0xE0) { /* Kana */ - CK((*filter->output_function)(0xFEC0 + c, filter->data)); - } else if (c > 0x80 && c < 0xFD && c != 0xA0) { /* Kanji, first byte */ - filter->status = 1; - filter->cache = c; - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - case 1: /* Kanji, second byte */ - filter->status = 0; - c1 = filter->cache; - if (c >= 0x40 && c <= 0xFC && c != 0x7F) { - w = 0; - SJIS_DECODE(c1, c, s1, s2); - s = ((s1 - 0x21) * 94) + s2 - 0x21; - if (s <= 137) { - if (s == 31) { - w = 0xFF3C; /* FULLWIDTH REVERSE SOLIDUS */ - } else if (s == 32) { - w = 0xFF5E; /* FULLWIDTH TILDE */ - } else if (s == 33) { - w = 0x2225; /* PARALLEL TO */ - } else if (s == 60) { - w = 0xFF0D; /* FULLWIDTH HYPHEN-MINUS */ - } else if (s == 80) { - w = 0xFFE0; /* FULLWIDTH CENT SIGN */ - } else if (s == 81) { - w = 0xFFE1; /* FULLWIDTH POUND SIGN */ - } else if (s == 137) { - w = 0xFFE2; /* FULLWIDTH NOT SIGN */ - } - } - if (w == 0) { - if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) { /* vendor ext1 (13ku) */ - w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min]; - } else if (s >= 0 && s < jisx0208_ucs_table_size) { /* X 0208 */ - w = jisx0208_ucs_table[s]; - } else if (s >= cp932ext2_ucs_table_min && s < cp932ext2_ucs_table_max) { /* vendor ext2 (89ku - 92ku) */ - w = cp932ext2_ucs_table[s - cp932ext2_ucs_table_min]; - } - - /* Emoji */ - if (filter->from == &mbfl_encoding_sjis_docomo && s >= mb_tbl_code2uni_docomo1_min && s <= mb_tbl_code2uni_docomo1_max) { - w = mbfilter_sjis_emoji_docomo2unicode(s, &snd); - if (snd > 0) { - CK((*filter->output_function)(snd, filter->data)); - } - } else if (filter->from == &mbfl_encoding_sjis_kddi && s >= mb_tbl_code2uni_kddi1_min && s <= mb_tbl_code2uni_kddi2_max) { - w = mbfilter_sjis_emoji_kddi2unicode(s, &snd); - if (snd > 0) { - CK((*filter->output_function)(snd, filter->data)); - } - } else if (filter->from == &mbfl_encoding_sjis_sb && s >= mb_tbl_code2uni_sb1_min && s <= mb_tbl_code2uni_sb3_max) { - w = mbfilter_sjis_emoji_sb2unicode(s, &snd); - if (snd > 0) { - CK((*filter->output_function)(snd, filter->data)); - } - } - - if (w == 0) { - if (s >= cp932ext3_ucs_table_min && s < cp932ext3_ucs_table_max) { /* vendor ext3 (115ku - 119ku) */ - w = cp932ext3_ucs_table[s - cp932ext3_ucs_table_min]; - } else if (s >= (94*94) && s < (114*94)) { /* user (95ku - 114ku) */ - w = s - (94*94) + 0xe000; - } - } - } - if (w <= 0) { - w = MBFL_BAD_INPUT; - } - CK((*filter->output_function)(w, filter->data)); - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - /* ESC: Softbank Emoji */ - case 2: - if (c == '$') { - filter->cache = c; - filter->status++; - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - filter->status = filter->cache = 0; - } - break; - - /* ESC $: Softbank Emoji */ - case 3: - if ((c >= 'E' && c <= 'G') || (c >= 'O' && c <= 'Q')) { - filter->cache = c; - filter->status++; - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - filter->status = filter->cache = 0; - } - break; - - /* ESC $ [GEFOPQ]: Softbank Emoji */ - case 4: - c1 = filter->cache; - if (c == 0xF) { /* Terminate sequence of emoji */ - filter->status = filter->cache = 0; - return 0; - } else { - if (c1 == 'G' && c >= 0x21 && c <= 0x7a) { - s1 = (0x91 - 0x21) * 94; - } else if (c1 == 'E' && c >= 0x21 && c <= 0x7A) { - s1 = (0x8D - 0x21) * 94; - } else if (c1 == 'F' && c >= 0x21 && c <= 0x7A) { - s1 = (0x8E - 0x21) * 94; - } else if (c1 == 'O' && c >= 0x21 && c <= 0x6D) { - s1 = (0x92 - 0x21) * 94; - } else if (c1 == 'P' && c >= 0x21 && c <= 0x6C) { - s1 = (0x95 - 0x21) * 94; - } else if (c1 == 'Q' && c >= 0x21 && c <= 0x5E) { - s1 = (0x96 - 0x21) * 94; - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - filter->status = filter->cache = 0; - return 0; - } - - w = mbfilter_sjis_emoji_sb2unicode(s1 + c - 0x21, &snd); - if (w > 0) { - if (snd > 0) { - CK((*filter->output_function)(snd, filter->data)); - } - CK((*filter->output_function)(w, filter->data)); - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - filter->status = filter->cache = 0; - } - } - } - - return 0; -} - -static int mbfl_filt_conv_wchar_sjis_mobile(int c, mbfl_convert_filter *filter) -{ - int c1, c2, s1 = 0, s2 = 0; - - if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) { - s1 = ucs_a1_jis_table[c - ucs_a1_jis_table_min]; - } else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) { - s1 = ucs_a2_jis_table[c - ucs_a2_jis_table_min]; - } else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) { - s1 = ucs_i_jis_table[c - ucs_i_jis_table_min]; - } else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) { - s1 = ucs_r_jis_table[c - ucs_r_jis_table_min]; - } else if (c >= 0xE000 && c < (0xE000 + 20*94)) { - /* Private User Area (95ku - 114ku) */ - s1 = c - 0xE000; - c1 = (s1 / 94) + 0x7F; - c2 = (s1 % 94) + 0x21; - s1 = (c1 << 8) | c2; - s2 = 1; - } - - if (s1 <= 0) { - if (c == 0xA5) { /* YEN SIGN */ - s1 = 0x216F; /* FULLWIDTH YEN SIGN */ - } else if (c == 0xFF3c) { /* FULLWIDTH REVERSE SOLIDUS */ - s1 = 0x2140; - } else if (c == 0x2225) { /* PARALLEL TO */ - s1 = 0x2142; - } else if (c == 0xFF0D) { /* FULLWIDTH HYPHEN-MINUS */ - s1 = 0x215D; - } else if (c == 0xFFE0) { /* FULLWIDTH CENT SIGN */ - s1 = 0x2171; - } else if (c == 0xFFE1) { /* FULLWIDTH POUND SIGN */ - s1 = 0x2172; - } else if (c == 0xFFE2) { /* FULLWIDTH NOT SIGN */ - s1 = 0x224C; - } - } - - if ((s1 <= 0) || (s1 >= 0x8080 && s2 == 0)) { /* not found or X 0212 */ - s1 = -1; - - /* CP932 vendor ext1 (13ku) */ - for (c1 = 0; c1 < cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; c1++) { - if (c == cp932ext1_ucs_table[c1]) { - s1 = (((c1 / 94) + 0x2D) << 8) + (c1 % 94) + 0x21; - break; - } - } - - if (s1 <= 0) { - /* CP932 vendor ext2 (115ku - 119ku) */ - for (c1 = 0; c1 < cp932ext2_ucs_table_max - cp932ext2_ucs_table_min; c1++) { - if (c == cp932ext2_ucs_table[c1]) { - s1 = (((c1 / 94) + 0x79) << 8) + (c1 % 94) + 0x21; - break; - } - } - } - - if (c == 0) { - s1 = 0; - } - } - - if ((filter->to == &mbfl_encoding_sjis_docomo && mbfilter_unicode2sjis_emoji_docomo(c, &s1, filter)) || - (filter->to == &mbfl_encoding_sjis_kddi && mbfilter_unicode2sjis_emoji_kddi(c, &s1, filter)) || - (filter->to == &mbfl_encoding_sjis_sb && mbfilter_unicode2sjis_emoji_sb(c, &s1, filter))) { - s1 = (((s1 / 94) + 0x21) << 8) | ((s1 % 94) + 0x21); - } - - if (filter->status) { - return 0; - } - - if (s1 >= 0) { - if (s1 < 0x100) { /* Latin/Kana */ - CK((*filter->output_function)(s1, filter->data)); - } else { /* Kanji */ - c1 = (s1 >> 8) & 0xff; - c2 = s1 & 0xff; - SJIS_ENCODE(c1, c2, s1, s2); - CK((*filter->output_function)(s1, filter->data)); - CK((*filter->output_function)(s2, filter->data)); - } - } else { - CK(mbfl_filt_conv_illegal_output(c, filter)); - } - - return 0; -} - -int mbfl_filt_conv_sjis_mobile_flush(mbfl_convert_filter *filter) -{ - int c1 = filter->cache; - if (filter->status == 1 && (c1 == '#' || (c1 >= '0' && c1 <= '9'))) { - filter->cache = filter->status = 0; - CK((*filter->output_function)(c1, filter->data)); - } else if (filter->status == 2) { - /* First of a pair of Regional Indicator codepoints came at the end of a string */ - filter->cache = filter->status = 0; - mbfl_filt_conv_illegal_output(c1, filter); - } - - if (filter->flush_function) { - (*filter->flush_function)(filter->data); - } - - return 0; -} - -static const unsigned short sjis_mobile_decode_tbl1[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xFFFF, 0, 188, 376, 564, 752, 940, 1128, 1316, 1504, 1692, 1880, 2068, 2256, 2444, 2632, 2820, 3008, 3196, 3384, 3572, 3760, 3948, 4136, 4324, 4512, 4700, 4888, 5076, 5264, 5452, 5640, 0xFFFF, -6016, -5828, -5640, -5452, -5264, -5076, -4888, -4700, -4512, -4324, -4136, -3948, -3760, -3572, -3384, -3196, -3008, -2820, -2632, -2444, -2256, -2068, -1880, -1692, -1504, -1316, -1128, -940, -752, -564, -376, -188, 0, 188, 376, 564, 752, 940, 1128, 1316, 1504, 1692, 1880, 2068, 2256, 2444, 2632, 2820, 3008, 3196, 3384, 3572, 3760, 3948, 4136, 4324, 4512, 4700, 4888, 5076, 5264, 5452, 5640, 5828, 6016, 6204, 6392, 6580, 6768, 6956, 7144, 7332, 7520, 7708, 7896, 8084, 8272, 8460, 8648, 8836, 9024, 9212, 9400, 9588, 9776, 9964, 10152, 10340, 10528, 10716, 10904, 11092, 0xFFFF, 0xFFFF, 0xFFFF -}; - -static size_t mb_sjis_docomo_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) -{ - unsigned char *p = *in, *e = p + *in_len; - /* Leave one extra space available in output buffer, since some iterations of - * main loop (below) may emit two wchars */ - uint32_t *out = buf, *limit = buf + bufsize - 1; - - while (p < e && out < limit) { - unsigned char c = *p++; - - if (c <= 0x7F) { - *out++ = c; - } else if (c >= 0xA1 && c <= 0xDF) { - /* Kana */ - *out++ = 0xFEC0 + c; - } else { - /* Kanji */ - if (p == e) { - *out++ = MBFL_BAD_INPUT; - break; - } - unsigned char c2 = *p++; - uint32_t w = sjis_mobile_decode_tbl1[c] + sjis_decode_tbl2[c2]; - - if (w <= 137) { - if (w == 31) { - *out++ = 0xFF3C; /* FULLWIDTH REVERSE SOLIDUS */ - continue; - } else if (w == 32) { - *out++ = 0xFF5E; /* FULLWIDTH TILDE */ - continue; - } else if (w == 33) { - *out++ = 0x2225; /* PARALLEL TO */ - continue; - } else if (w == 60) { - *out++ = 0xFF0D; /* FULLWIDTH HYPHEN-MINUS */ - continue; - } else if (w == 80) { - *out++ = 0xFFE0; /* FULLWIDTH CENT SIGN */ - continue; - } else if (w == 81) { - *out++ = 0xFFE1; /* FULLWIDTH POUND SIGN */ - continue; - } else if (w == 137) { - *out++ = 0xFFE2; /* FULLWIDTH NOT SIGN */ - continue; - } - } - - if (w >= mb_tbl_code2uni_docomo1_min && w <= mb_tbl_code2uni_docomo1_max) { - int snd = 0; - w = mbfilter_sjis_emoji_docomo2unicode(w, &snd); - if (snd) { - *out++ = snd; - } - } else if (w >= cp932ext1_ucs_table_min && w < cp932ext1_ucs_table_max) { - w = cp932ext1_ucs_table[w - cp932ext1_ucs_table_min]; - } else if (w < jisx0208_ucs_table_size) { - w = jisx0208_ucs_table[w]; - } else if (w >= cp932ext2_ucs_table_min && w < cp932ext2_ucs_table_max) { - w = cp932ext2_ucs_table[w - cp932ext2_ucs_table_min]; - } else if (w >= cp932ext3_ucs_table_min && w < cp932ext3_ucs_table_max) { - w = cp932ext3_ucs_table[w - cp932ext3_ucs_table_min]; - } else if (w >= (94*94) && w < (114*94)) { - w = w - (94*94) + 0xE000; - } else { - if (c == 0x80 || c == 0xA0 || c >= 0xFD) { - p--; - } - *out++ = MBFL_BAD_INPUT; - continue; - } - - *out++ = w ? w : MBFL_BAD_INPUT; - } - } - - *in_len = e - p; - *in = p; - return out - buf; -} - -static void mb_wchar_to_sjis_docomo(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) -{ - unsigned char *out, *limit; - MB_CONVERT_BUF_LOAD(buf, out, limit); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len + (buf->state ? 1 : 0)); - - uint32_t w; - unsigned int s = 0; - - if (buf->state) { - /* Continue what we were doing on the previous call */ - w = buf->state; - buf->state = 0; - goto reprocess_wchar; - } - - while (len--) { - w = *in++; -reprocess_wchar: - s = 0; - - if (w >= ucs_a1_jis_table_min && w < ucs_a1_jis_table_max) { - s = ucs_a1_jis_table[w - ucs_a1_jis_table_min]; - } else if (w >= ucs_a2_jis_table_min && w < ucs_a2_jis_table_max) { - s = ucs_a2_jis_table[w - ucs_a2_jis_table_min]; - } else if (w >= ucs_i_jis_table_min && w < ucs_i_jis_table_max) { - s = ucs_i_jis_table[w - ucs_i_jis_table_min]; - } else if (w >= ucs_r_jis_table_min && w < ucs_r_jis_table_max) { - s = ucs_r_jis_table[w - ucs_r_jis_table_min]; - } else if (w >= 0xE000 && w < (0xE000 + 20*94)) { - /* Private User Area (95ku - 114ku) */ - s = w - 0xE000; - s = (((s / 94) + 0x7F) << 8) | ((s % 94) + 0x21); - goto process_emoji; - } - - if (!s) { - if (w == 0xA5) { /* YEN SIGN */ - s = 0x216F; /* FULLWIDTH YEN SIGN */ - } else if (w == 0xFF3C) { /* FULLWIDTH REVERSE SOLIDUS */ - s = 0x2140; - } else if (w == 0x2225) { /* PARALLEL TO */ - s = 0x2142; - } else if (w == 0xFF0D) { /* FULLWIDTH HYPHEN-MINUS */ - s = 0x215D; - } else if (w == 0xFFE0) { /* FULLWIDTH CENT SIGN */ - s = 0x2171; - } else if (w == 0xFFE1) { /* FULLWIDTH POUND SIGN */ - s = 0x2172; - } else if (w == 0xFFE2) { /* FULLWIDTH NOT SIGN */ - s = 0x224C; - } - } - - if (w && (!s || s >= 0x8080)) { - s = 0; - - for (int i = 0; i < cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; i++) { - if (w == cp932ext1_ucs_table[i]) { - s = (((i / 94) + 0x2D) << 8) + (i % 94) + 0x21; - goto process_emoji; - } - } - - for (int i = 0; i < cp932ext2_ucs_table_max - cp932ext2_ucs_table_min; i++) { - if (w == cp932ext2_ucs_table[i]) { - s = (((i / 94) + 0x79) << 8) + (i % 94) + 0x21; - goto process_emoji; - } - } - } - -process_emoji: - /* When converting SJIS-Mobile to Unicode, we convert keypad symbol emoji - * to a sequence of 2 codepoints, one of which is a combining character which - * adds the 'key' image around the other - * - * In the other direction, look for such sequences and convert them to a - * single emoji */ - if (w == '#' || (w >= '0' && w <= '9')) { - if (!len) { - if (end) { - goto emit_output; - } else { - /* If we are at the end of the current buffer of codepoints, but another - * buffer is coming, then remember that we have to reprocess `w` */ - buf->state = w; - break; - } - } - uint32_t w2 = *in++; len--; - if (w2 == 0x20E3) { - if (w == '#') { - s = 0x2964; - } else if (w == '0') { - s = 0x296F; - } else { /* Previous character was '1'-'9' */ - s = 0x2966 + (w - '1'); - } - s = (((s / 94) + 0x21) << 8) | ((s % 94) + 0x21); - } else { - in--; len++; - } - } else if (w == 0xA9) { /* Copyright sign */ - s = (((0x29B5 / 94) + 0x21) << 8) | ((0x29B5 % 94) + 0x21); - } else if (w == 0xAE) { /* Registered sign */ - s = (((0x29BA / 94) + 0x21) << 8) | ((0x29BA % 94) + 0x21); - } else if (w >= mb_tbl_uni_docomo2code2_min && w <= mb_tbl_uni_docomo2code2_max) { - int i = mbfl_bisec_srch2(w, mb_tbl_uni_docomo2code2_key, mb_tbl_uni_docomo2code2_len); - if (i >= 0) { - s = mb_tbl_uni_docomo2code2_value[i]; - s = (((s / 94) + 0x21) << 8) | ((s % 94) + 0x21); - } - } else if (w >= mb_tbl_uni_docomo2code3_min && w <= mb_tbl_uni_docomo2code3_max) { - int i = mbfl_bisec_srch2(w - 0x10000, mb_tbl_uni_docomo2code3_key, mb_tbl_uni_docomo2code3_len); - if (i >= 0) { - s = mb_tbl_uni_docomo2code3_value[i]; - s = (((s / 94) + 0x21) << 8) | ((s % 94) + 0x21); - } - } else if (w >= mb_tbl_uni_docomo2code5_min && w <= mb_tbl_uni_docomo2code5_max) { - int i = mbfl_bisec_srch2(w - 0xF0000, mb_tbl_uni_docomo2code5_key, mb_tbl_uni_docomo2code5_len); - if (i >= 0) { - s = mb_tbl_uni_docomo2code5_val[i]; - s = (((s / 94) + 0x21) << 8) | ((s % 94) + 0x21); - } - } - -emit_output: - if (!s && w) { - MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_sjis_docomo); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len); - } else if (s <= 0xFF) { - out = mb_convert_buf_add(out, s); - } else { - unsigned int c1 = (s >> 8) & 0xFF, c2 = s & 0xFF, s1, s2; - SJIS_ENCODE(c1, c2, s1, s2); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2); - out = mb_convert_buf_add2(out, s1, s2); - } - } - - MB_CONVERT_BUF_STORE(buf, out, limit); -} - -static size_t mb_sjis_kddi_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) -{ - unsigned char *p = *in, *e = p + *in_len; - uint32_t *out = buf, *limit = buf + bufsize - 1; - - while (p < e && out < limit) { - unsigned char c = *p++; - - if (c <= 0x7F) { - *out++ = c; - } else if (c >= 0xA1 && c <= 0xDF) { - /* Kana */ - *out++ = 0xFEC0 + c; - } else { - /* Kanji */ - if (p == e) { - *out++ = MBFL_BAD_INPUT; - break; - } - unsigned char c2 = *p++; - uint32_t w = sjis_mobile_decode_tbl1[c] + sjis_decode_tbl2[c2]; - - if (w <= 137) { - if (w == 31) { - *out++ = 0xFF3C; /* FULLWIDTH REVERSE SOLIDUS */ - continue; - } else if (w == 32) { - *out++ = 0xFF5E; /* FULLWIDTH TILDE */ - continue; - } else if (w == 33) { - *out++ = 0x2225; /* PARALLEL TO */ - continue; - } else if (w == 60) { - *out++ = 0xFF0D; /* FULLWIDTH HYPHEN-MINUS */ - continue; - } else if (w == 80) { - *out++ = 0xFFE0; /* FULLWIDTH CENT SIGN */ - continue; - } else if (w == 81) { - *out++ = 0xFFE1; /* FULLWIDTH POUND SIGN */ - continue; - } else if (w == 137) { - *out++ = 0xFFE2; /* FULLWIDTH NOT SIGN */ - continue; - } - } - - if (w >= mb_tbl_code2uni_kddi1_min && w <= mb_tbl_code2uni_kddi2_max) { - int snd = 0; - w = mbfilter_sjis_emoji_kddi2unicode(w, &snd); - if (!w) { - w = sjis_mobile_decode_tbl1[c] + sjis_decode_tbl2[c2]; - if (w >= (94*94) && w < (114*94)) { - w = w - (94*94) + 0xE000; - } - } else if (snd) { - *out++ = snd; - } - } else if (w >= cp932ext1_ucs_table_min && w < cp932ext1_ucs_table_max) { - w = cp932ext1_ucs_table[w - cp932ext1_ucs_table_min]; - } else if (w < jisx0208_ucs_table_size) { - w = jisx0208_ucs_table[w]; - } else if (w >= cp932ext2_ucs_table_min && w < cp932ext2_ucs_table_max) { - w = cp932ext2_ucs_table[w - cp932ext2_ucs_table_min]; - } else if (w >= cp932ext3_ucs_table_min && w < cp932ext3_ucs_table_max) { - w = cp932ext3_ucs_table[w - cp932ext3_ucs_table_min]; - } else if (w >= (94*94) && w < (114*94)) { - w = w - (94*94) + 0xE000; - } else { - if (c == 0x80 || c == 0xA0 || c >= 0xFD) { - p--; - } - *out++ = MBFL_BAD_INPUT; - continue; - } - - *out++ = w ? w : MBFL_BAD_INPUT; - } - } - - *in_len = e - p; - *in = p; - return out - buf; -} - -static void mb_wchar_to_sjis_kddi(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) -{ - unsigned char *out, *limit; - MB_CONVERT_BUF_LOAD(buf, out, limit); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len + (buf->state ? 1 : 0)); - - uint32_t w; - unsigned int s = 0; - - if (buf->state) { - w = buf->state; - buf->state = 0; - goto reprocess_wchar; - } - - while (len--) { - w = *in++; -reprocess_wchar: - s = 0; - - if (w >= ucs_a1_jis_table_min && w < ucs_a1_jis_table_max) { - s = ucs_a1_jis_table[w - ucs_a1_jis_table_min]; - } else if (w >= ucs_a2_jis_table_min && w < ucs_a2_jis_table_max) { - s = ucs_a2_jis_table[w - ucs_a2_jis_table_min]; - } else if (w >= ucs_i_jis_table_min && w < ucs_i_jis_table_max) { - s = ucs_i_jis_table[w - ucs_i_jis_table_min]; - } else if (w >= ucs_r_jis_table_min && w < ucs_r_jis_table_max) { - s = ucs_r_jis_table[w - ucs_r_jis_table_min]; - } else if (w >= 0xE000 && w < (0xE000 + 20*94)) { - /* Private User Area (95ku - 114ku) */ - s = w - 0xE000; - s = (((s / 94) + 0x7F) << 8) | ((s % 94) + 0x21); - goto process_emoji; - } - - if (!s) { - if (w == 0xA5) { /* YEN SIGN */ - s = 0x216F; /* FULLWIDTH YEN SIGN */ - } else if (w == 0xFF3c) { /* FULLWIDTH REVERSE SOLIDUS */ - s = 0x2140; - } else if (w == 0x2225) { /* PARALLEL TO */ - s = 0x2142; - } else if (w == 0xFF0D) { /* FULLWIDTH HYPHEN-MINUS */ - s = 0x215D; - } else if (w == 0xFFE0) { /* FULLWIDTH CENT SIGN */ - s = 0x2171; - } else if (w == 0xFFE1) { /* FULLWIDTH POUND SIGN */ - s = 0x2172; - } else if (w == 0xFFE2) { /* FULLWIDTH NOT SIGN */ - s = 0x224C; - } - } - - if (w && (!s || s >= 0x8080)) { - s = 0; - - for (int i = 0; i < cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; i++) { - if (w == cp932ext1_ucs_table[i]) { - s = (((i / 94) + 0x2D) << 8) + (i % 94) + 0x21; - goto process_emoji; - } - } - - for (int i = 0; i < cp932ext2_ucs_table_max - cp932ext2_ucs_table_min; i++) { - if (w == cp932ext2_ucs_table[i]) { - s = (((i / 94) + 0x79) << 8) + (i % 94) + 0x21; - goto process_emoji; - } - } - } - -process_emoji: - if (w == '#' || (w >= '0' && w <= '9')) { - if (!len) { - if (end) { - goto emit_output; - } else { - /* If we are at the end of the current buffer of codepoints, but another - * buffer is coming, then remember that we have to reprocess `w` */ - buf->state = w; - break; - } - } - uint32_t w2 = *in++; len--; - if (w2 == 0x20E3) { - if (w == '#') { - s = 0x25BC; - } else if (w == '0') { - s = 0x2830; - } else { /* Previous character was '1'-'9' */ - s = 0x27A6 + (w - '1'); - } - s = (((s / 94) + 0x21) << 8) | ((s % 94) + 0x21); - } else { - in--; len++; - } - } else if (w >= NFLAGS('C') && w <= NFLAGS('U')) { /* C for CN, U for US */ - if (!len) { - if (end) { - MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_sjis_kddi); - } else { - /* Reprocess `w` when this function is called again with another buffer - * of wchars */ - buf->state = w; - } - break; - } - uint32_t w2 = *in++; len--; - if (w2 >= NFLAGS('B') && w2 <= NFLAGS('U')) { /* B for GB, U for RU */ - for (int i = 0; i < 10; i++) { - if (w == NFLAGS(nflags_s[i][0]) && w2 == NFLAGS(nflags_s[i][1])) { - s = nflags_code_kddi[i]; - s = (((s / 94) + 0x21) << 8) | ((s % 94) + 0x21); - goto emit_output; - } - } - } - in--; len++; - MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_sjis_kddi); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len); - continue; - } else if (w == 0xA9) { /* Copyright sign */ - s = (((0x27DC / 94) + 0x21) << 8) | ((0x27DC % 94) + 0x21); - } else if (w == 0xAE) { /* Registered sign */ - s = (((0x27DD / 94) + 0x21) << 8) | ((0x27DD % 94) + 0x21); - } else if (w >= mb_tbl_uni_kddi2code2_min && w <= mb_tbl_uni_kddi2code2_max) { - int i = mbfl_bisec_srch2(w, mb_tbl_uni_kddi2code2_key, mb_tbl_uni_kddi2code2_len); - if (i >= 0) { - s = mb_tbl_uni_kddi2code2_value[i]; - s = (((s / 94) + 0x21) << 8) | ((s % 94) + 0x21); - } - } else if (w >= mb_tbl_uni_kddi2code3_min && w <= mb_tbl_uni_kddi2code3_max) { - int i = mbfl_bisec_srch2(w - 0x10000, mb_tbl_uni_kddi2code3_key, mb_tbl_uni_kddi2code3_len); - if (i >= 0) { - s = mb_tbl_uni_kddi2code3_value[i]; - s = (((s / 94) + 0x21) << 8) | ((s % 94) + 0x21); - } - } else if (w >= mb_tbl_uni_kddi2code5_min && w <= mb_tbl_uni_kddi2code5_max) { - int i = mbfl_bisec_srch2(w - 0xF0000, mb_tbl_uni_kddi2code5_key, mb_tbl_uni_kddi2code5_len); - if (i >= 0) { - s = mb_tbl_uni_kddi2code5_val[i]; - s = (((s / 94) + 0x21) << 8) | ((s % 94) + 0x21); - } - } - -emit_output: - if (!s && w) { - MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_sjis_kddi); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len); - } else if (s <= 0xFF) { - out = mb_convert_buf_add(out, s); - } else { - unsigned int c1 = (s >> 8) & 0xFF, c2 = s & 0xFF, s1, s2; - SJIS_ENCODE(c1, c2, s1, s2); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2); - out = mb_convert_buf_add2(out, s1, s2); - } - } - - MB_CONVERT_BUF_STORE(buf, out, limit); -} - -static size_t mb_sjis_sb_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) -{ - unsigned char *p = *in, *e = p + *in_len; - uint32_t *out = buf, *limit = buf + bufsize - 1; - - if (*state) { - goto softbank_emoji_escapes; - } - - while (p < e && out < limit) { - unsigned char c = *p++; - - if (c == 0x1B) { - /* Escape sequence */ - if (p == e || *p++ != '$' || p == e) { - *out++ = MBFL_BAD_INPUT; - continue; - } - unsigned char c2 = *p++; - if ((c2 < 'E' || c2 > 'G') && (c2 < 'O' || c2 > 'Q')) { - *out++ = MBFL_BAD_INPUT; - continue; - } - /* Escape sequence was valid, next should be a series of specially - * encoded Softbank emoji */ - *state = c2; - -softbank_emoji_escapes: - while (p < e && out < limit) { - c = *p++; - if (c == 0xF) { - *state = 0; - break; - } - unsigned int s = 0; - if (*state == 'G' && c >= 0x21 && c <= 0x7A) { - s = (0x91 - 0x21) * 94; - } else if (*state == 'E' && c >= 0x21 && c <= 0x7A) { - s = (0x8D - 0x21) * 94; - } else if (*state == 'F' && c >= 0x21 && c <= 0x7A) { - s = (0x8E - 0x21) * 94; - } else if (*state == 'O' && c >= 0x21 && c <= 0x6D) { - s = (0x92 - 0x21) * 94; - } else if (*state == 'P' && c >= 0x21 && c <= 0x6C) { - s = (0x95 - 0x21) * 94; - } else if (*state == 'Q' && c >= 0x21 && c <= 0x5E) { - s = (0x96 - 0x21) * 94; - } else { - *out++ = MBFL_BAD_INPUT; - *state = 0; - break; - } - - int snd = 0; - uint32_t w = mbfilter_sjis_emoji_sb2unicode(s + c - 0x21, &snd); - if (w) { - if (snd) { - *out++ = snd; - } - *out++ = w; - } else { - *out++ = MBFL_BAD_INPUT; - *state = 0; - break; - } - } - } else if (c <= 0x7F) { - *out++ = c; - } else if (c >= 0xA1 && c <= 0xDF) { - /* Kana */ - *out++ = 0xFEC0 + c; - } else { - /* Kanji */ - if (p == e) { - *out++ = MBFL_BAD_INPUT; - break; - } - unsigned char c2 = *p++; - uint32_t w = sjis_mobile_decode_tbl1[c] + sjis_decode_tbl2[c2]; - - if (w <= 137) { - if (w == 31) { - *out++ = 0xFF3C; /* FULLWIDTH REVERSE SOLIDUS */ - continue; - } else if (w == 32) { - *out++ = 0xFF5E; /* FULLWIDTH TILDE */ - continue; - } else if (w == 33) { - *out++ = 0x2225; /* PARALLEL TO */ - continue; - } else if (w == 60) { - *out++ = 0xFF0D; /* FULLWIDTH HYPHEN-MINUS */ - continue; - } else if (w == 80) { - *out++ = 0xFFE0; /* FULLWIDTH CENT SIGN */ - continue; - } else if (w == 81) { - *out++ = 0xFFE1; /* FULLWIDTH POUND SIGN */ - continue; - } else if (w == 137) { - *out++ = 0xFFE2; /* FULLWIDTH NOT SIGN */ - continue; - } - } - - if (w >= mb_tbl_code2uni_sb1_min && w <= mb_tbl_code2uni_sb3_max) { - int snd = 0; - w = mbfilter_sjis_emoji_sb2unicode(w, &snd); - if (!w) { - w = sjis_mobile_decode_tbl1[c] + sjis_decode_tbl2[c2]; - if (w >= cp932ext3_ucs_table_min && w < cp932ext3_ucs_table_max) { - w = cp932ext3_ucs_table[w - cp932ext3_ucs_table_min]; - } else if (w >= (94*94) && w < (114*94)) { - w = w - (94*94) + 0xE000; - } - } else if (snd) { - *out++ = snd; - } - } else if (w >= cp932ext1_ucs_table_min && w < cp932ext1_ucs_table_max) { - w = cp932ext1_ucs_table[w - cp932ext1_ucs_table_min]; - } else if (w < jisx0208_ucs_table_size) { - w = jisx0208_ucs_table[w]; - } else if (w >= cp932ext2_ucs_table_min && w < cp932ext2_ucs_table_max) { - w = cp932ext2_ucs_table[w - cp932ext2_ucs_table_min]; - } else if (w >= cp932ext3_ucs_table_min && w < cp932ext3_ucs_table_max) { - w = cp932ext3_ucs_table[w - cp932ext3_ucs_table_min]; - } else if (w >= (94*94) && w < (114*94)) { - w = w - (94*94) + 0xE000; - } else { - if (c == 0x80 || c == 0xA0 || c >= 0xFD) { - p--; - } - *out++ = MBFL_BAD_INPUT; - continue; - } - - *out++ = w ? w : MBFL_BAD_INPUT; - } - } - - *in_len = e - p; - *in = p; - return out - buf; -} - -static void mb_wchar_to_sjis_sb(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) -{ - unsigned char *out, *limit; - MB_CONVERT_BUF_LOAD(buf, out, limit); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len + (buf->state ? 1 : 0)); - - uint32_t w; - unsigned int s = 0; - - if (buf->state) { - w = buf->state; - buf->state = 0; - goto reprocess_wchar; - } - - while (len--) { - w = *in++; -reprocess_wchar: - s = 0; - - if (w >= ucs_a1_jis_table_min && w < ucs_a1_jis_table_max) { - s = ucs_a1_jis_table[w - ucs_a1_jis_table_min]; - } else if (w >= ucs_a2_jis_table_min && w < ucs_a2_jis_table_max) { - s = ucs_a2_jis_table[w - ucs_a2_jis_table_min]; - } else if (w >= ucs_i_jis_table_min && w < ucs_i_jis_table_max) { - s = ucs_i_jis_table[w - ucs_i_jis_table_min]; - } else if (w >= ucs_r_jis_table_min && w < ucs_r_jis_table_max) { - s = ucs_r_jis_table[w - ucs_r_jis_table_min]; - } else if (w >= 0xE000 && w < (0xE000 + 20*94)) { - /* Private User Area (95ku - 114ku) */ - s = w - 0xE000; - s = (((s / 94) + 0x7F) << 8) | ((s % 94) + 0x21); - goto process_emoji; - } - - if (!s) { - if (w == 0xA5) { /* YEN SIGN */ - s = 0x216F; /* FULLWIDTH YEN SIGN */ - } else if (w == 0xFF3C) { /* FULLWIDTH REVERSE SOLIDUS */ - s = 0x2140; - } else if (w == 0x2225) { /* PARALLEL TO */ - s = 0x2142; - } else if (w == 0xFF0D) { /* FULLWIDTH HYPHEN-MINUS */ - s = 0x215D; - } else if (w == 0xFFE0) { /* FULLWIDTH CENT SIGN */ - s = 0x2171; - } else if (w == 0xFFE1) { /* FULLWIDTH POUND SIGN */ - s = 0x2172; - } else if (w == 0xFFE2) { /* FULLWIDTH NOT SIGN */ - s = 0x224C; - } - } - - if (w && (!s || s >= 0x8080)) { - s = 0; - - for (int i = 0; i < cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; i++) { - if (w == cp932ext1_ucs_table[i]) { - s = (((i / 94) + 0x2D) << 8) + (i % 94) + 0x21; - goto process_emoji; - } - } - - for (int i = 0; i < cp932ext2_ucs_table_max - cp932ext2_ucs_table_min; i++) { - if (w == cp932ext2_ucs_table[i]) { - s = (((i / 94) + 0x79) << 8) + (i % 94) + 0x21; - goto process_emoji; - } - } - } - -process_emoji: - if (w == '#' || (w >= '0' && w <= '9')) { - if (!len) { - if (end) { - goto emit_output; - } else { - /* If we are at the end of the current buffer of codepoints, but another - * buffer is coming, then remember that we have to reprocess `w` */ - buf->state = w; - break; - } - } - uint32_t w2 = *in++; len--; - if (w2 == 0x20E3) { - if (w == '#') { - s = 0x2817; - } else if (w == '0') { - s = 0x282c; - } else { /* Previous character was '1'-'9' */ - s = 0x2823 + (w - '1'); - } - s = (((s / 94) + 0x21) << 8) | ((s % 94) + 0x21); - } else { - in--; len++; - } - } else if (w >= NFLAGS('C') && w <= NFLAGS('U')) { /* C for CN, U for US */ - if (!len) { - if (end) { - MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_sjis_sb); - } else { - /* Reprocess `w` when this function is called again with - * another buffer of wchars */ - buf->state = w; - } - break; - } - uint32_t w2 = *in++; len--; - if (w2 >= NFLAGS('B') && w2 <= NFLAGS('U')) { /* B for GB, U for RU */ - for (int i = 0; i < 10; i++) { - if (w == NFLAGS(nflags_s[i][0]) && w2 == NFLAGS(nflags_s[i][1])) { - s = nflags_code_sb[i]; - s = (((s / 94) + 0x21) << 8) | ((s % 94) + 0x21); - goto emit_output; - } - } - } - in--; len++; - MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_sjis_sb); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len); - continue; - } else if (w == 0xA9) { /* Copyright sign */ - s = (((0x2855 / 94) + 0x21) << 8) | ((0x2855 % 94) + 0x21); - } else if (w == 0xAE) { /* Registered sign */ - s = (((0x2856 / 94) + 0x21) << 8) | ((0x2856 % 94) + 0x21); - } else if (w >= mb_tbl_uni_sb2code2_min && w <= mb_tbl_uni_sb2code2_max) { - int i = mbfl_bisec_srch2(w, mb_tbl_uni_sb2code2_key, mb_tbl_uni_sb2code2_len); - if (i >= 0) { - s = mb_tbl_uni_sb2code2_value[i]; - s = (((s / 94) + 0x21) << 8) | ((s % 94) + 0x21); - } - } else if (w >= mb_tbl_uni_sb2code3_min && w <= mb_tbl_uni_sb2code3_max) { - int i = mbfl_bisec_srch2(w - 0x10000, mb_tbl_uni_sb2code3_key, mb_tbl_uni_sb2code3_len); - if (i >= 0) { - s = mb_tbl_uni_sb2code3_value[i]; - s = (((s / 94) + 0x21) << 8) | ((s % 94) + 0x21); - } - } else if (w >= mb_tbl_uni_sb2code5_min && w <= mb_tbl_uni_sb2code5_max) { - int i = mbfl_bisec_srch2(w - 0xF0000, mb_tbl_uni_sb2code5_key, mb_tbl_uni_sb2code5_len); - if (i >= 0) { - s = mb_tbl_uni_sb2code5_val[i]; - s = (((s / 94) + 0x21) << 8) | ((s % 94) + 0x21); - } - } - -emit_output: - if (!s && w) { - MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_sjis_sb); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len); - } else if (s <= 0xFF) { - out = mb_convert_buf_add(out, s); - } else { - unsigned int c1 = (s >> 8) & 0xFF, c2 = s & 0xFF, s1, s2; - SJIS_ENCODE(c1, c2, s1, s2); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2); - out = mb_convert_buf_add2(out, s1, s2); - } - } - - MB_CONVERT_BUF_STORE(buf, out, limit); -} diff --git a/ext/mbstring/libmbfl/filters/mbfilter_sjis.h b/ext/mbstring/libmbfl/filters/mbfilter_sjis.h deleted file mode 100644 index b0689fce64388..0000000000000 --- a/ext/mbstring/libmbfl/filters/mbfilter_sjis.h +++ /dev/null @@ -1,46 +0,0 @@ -/* - * "streamable kanji code filter and converter" - * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. - * - * LICENSE NOTICES - * - * This file is part of "streamable kanji code filter and converter", - * which is distributed under the terms of GNU Lesser General Public - * License (version 2) as published by the Free Software Foundation. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with "streamable kanji code filter and converter"; - * if not, write to the Free Software Foundation, Inc., 59 Temple Place, - * Suite 330, Boston, MA 02111-1307 USA - * - * The author of this file: - * - */ -/* - * The source code included in this files was separated from mbfilter_ja.c - * by moriyoshi koizumi on 4 dec 2002. - * - */ - -#ifndef MBFL_MBFILTER_SJIS_H -#define MBFL_MBFILTER_SJIS_H - -#include "mbfilter.h" - -extern const mbfl_encoding mbfl_encoding_sjis; -extern const struct mbfl_convert_vtbl vtbl_sjis_wchar; -extern const struct mbfl_convert_vtbl vtbl_wchar_sjis; - -int mbfl_filt_conv_sjis_wchar(int c, mbfl_convert_filter *filter); -int mbfl_filt_conv_wchar_sjis(int c, mbfl_convert_filter *filter); - -#endif /* MBFL_MBFILTER_SJIS_H */ - -/* - * charset=UTF-8 - */ diff --git a/ext/mbstring/libmbfl/filters/mbfilter_sjis_2004.c b/ext/mbstring/libmbfl/filters/mbfilter_sjis_2004.c deleted file mode 100644 index bc4d932187061..0000000000000 --- a/ext/mbstring/libmbfl/filters/mbfilter_sjis_2004.c +++ /dev/null @@ -1,1420 +0,0 @@ -/* - * "streamable kanji code filter and converter" - * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. - * - * LICENSE NOTICES - * - * This file is part of "streamable kanji code filter and converter", - * which is distributed under the terms of GNU Lesser General Public - * License (version 2) as published by the Free Software Foundation. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with "streamable kanji code filter and converter"; - * if not, write to the Free Software Foundation, Inc., 59 Temple Place, - * Suite 330, Boston, MA 02111-1307 USA - * - * The author of this file: - * - */ -/* - * The source code included in this files was separated from mbfilter_sjis.c - * by rui hirokawa on 15 aug 2011. - */ - -/* Although the specification for Shift-JIS-2004 indicates that 0x5C and - * 0x7E should (respectively) represent a Yen sign and an overbar, feedback - * from Japanese PHP users indicates that they prefer 0x5C and 0x7E to be - * treated as equivalent to U+005C and U+007E. This is the historical - * behavior of mbstring, and promotes compatibility with other software - * which handles Shift-JIS and Shift-JIS-2004 text in this way. */ - -#include "mbfilter.h" -#include "mbfilter_sjis_2004.h" -#include "mbfilter_euc_jp_2004.h" -#include "mbfilter_iso2022jp_2004.h" - -#include "unicode_table_jis2004.h" -#include "unicode_table_jis.h" - -extern const unsigned char mblen_table_sjis_mobile[]; -extern const unsigned char mblen_table_eucjp[]; - -static size_t mb_sjis2004_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state); -static void mb_wchar_to_sjis2004(uint32_t *in, size_t len, mb_convert_buf *buf, bool end); -static size_t mb_eucjp2004_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state); -static void mb_wchar_to_eucjp2004(uint32_t *in, size_t len, mb_convert_buf *buf, bool end); -static size_t mb_iso2022jp2004_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state); -static void mb_wchar_to_iso2022jp2004(uint32_t *in, size_t len, mb_convert_buf *buf, bool end); - -extern int mbfl_bisec_srch(int w, const unsigned short *tbl, int n); -extern int mbfl_bisec_srch2(int w, const unsigned short tbl[], int n); - -static const char *mbfl_encoding_sjis2004_aliases[] = {"SJIS2004","Shift_JIS-2004", NULL}; -static const char *mbfl_encoding_eucjp2004_aliases[] = {"EUC_JP-2004", NULL}; - -const mbfl_encoding mbfl_encoding_sjis2004 = { - mbfl_no_encoding_sjis2004, - "SJIS-2004", - "Shift_JIS", - mbfl_encoding_sjis2004_aliases, - mblen_table_sjis_mobile, /* Leading byte values used for SJIS-2004 are the same as mobile SJIS variants */ - MBFL_ENCTYPE_GL_UNSAFE, - &vtbl_sjis2004_wchar, - &vtbl_wchar_sjis2004, - mb_sjis2004_to_wchar, - mb_wchar_to_sjis2004, - NULL -}; - -const struct mbfl_convert_vtbl vtbl_sjis2004_wchar = { - mbfl_no_encoding_sjis2004, - mbfl_no_encoding_wchar, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_jis2004_wchar, - mbfl_filt_conv_jis2004_wchar_flush, - NULL, -}; - -const struct mbfl_convert_vtbl vtbl_wchar_sjis2004 = { - mbfl_no_encoding_wchar, - mbfl_no_encoding_sjis2004, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_wchar_jis2004, - mbfl_filt_conv_wchar_jis2004_flush, - NULL, -}; - -const mbfl_encoding mbfl_encoding_eucjp2004 = { - mbfl_no_encoding_eucjp2004, - "EUC-JP-2004", - "EUC-JP", - mbfl_encoding_eucjp2004_aliases, - mblen_table_eucjp, - 0, - &vtbl_eucjp2004_wchar, - &vtbl_wchar_eucjp2004, - mb_eucjp2004_to_wchar, - mb_wchar_to_eucjp2004, - NULL -}; - -const struct mbfl_convert_vtbl vtbl_eucjp2004_wchar = { - mbfl_no_encoding_eucjp2004, - mbfl_no_encoding_wchar, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_jis2004_wchar, - mbfl_filt_conv_jis2004_wchar_flush, - NULL, -}; - -const struct mbfl_convert_vtbl vtbl_wchar_eucjp2004 = { - mbfl_no_encoding_wchar, - mbfl_no_encoding_eucjp2004, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_wchar_jis2004, - mbfl_filt_conv_wchar_jis2004_flush, - NULL, -}; - -const mbfl_encoding mbfl_encoding_2022jp_2004 = { - mbfl_no_encoding_2022jp_2004, - "ISO-2022-JP-2004", - "ISO-2022-JP-2004", - NULL, - NULL, - MBFL_ENCTYPE_GL_UNSAFE, - &vtbl_2022jp_2004_wchar, - &vtbl_wchar_2022jp_2004, - mb_iso2022jp2004_to_wchar, - mb_wchar_to_iso2022jp2004, - NULL -}; - -const struct mbfl_convert_vtbl vtbl_2022jp_2004_wchar = { - mbfl_no_encoding_2022jp_2004, - mbfl_no_encoding_wchar, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_jis2004_wchar, - mbfl_filt_conv_jis2004_wchar_flush, - NULL, -}; - -const struct mbfl_convert_vtbl vtbl_wchar_2022jp_2004 = { - mbfl_no_encoding_wchar, - mbfl_no_encoding_2022jp_2004, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_wchar_jis2004, - mbfl_filt_conv_wchar_jis2004_flush, - NULL, -}; - -#define CK(statement) do { if ((statement) < 0) return (-1); } while (0) - -#define SJIS_ENCODE(c1,c2,s1,s2) \ - do { \ - s1 = c1; \ - s1--; \ - s1 >>= 1; \ - if ((c1) < 0x5f) { \ - s1 += 0x71; \ - } else { \ - s1 += 0xb1; \ - } \ - s2 = c2; \ - if ((c1) & 1) { \ - if ((c2) < 0x60) { \ - s2--; \ - } \ - s2 += 0x20; \ - } else { \ - s2 += 0x7e; \ - } \ - } while (0) - -#define SJIS_DECODE(c1,c2,s1,s2) \ - do { \ - s1 = c1; \ - if (s1 < 0xa0) { \ - s1 -= 0x81; \ - } else { \ - s1 -= 0xc1; \ - } \ - s1 <<= 1; \ - s1 += 0x21; \ - s2 = c2; \ - if (s2 < 0x9f) { \ - if (s2 < 0x7f) { \ - s2++; \ - } \ - s2 -= 0x20; \ - } else { \ - s1++; \ - s2 -= 0x7e; \ - } \ - } while (0) - -int mbfl_filt_conv_jis2004_wchar(int c, mbfl_convert_filter *filter) -{ - int k; - int c1, c2, s, s1 = 0, s2 = 0, w = 0, w1; - - switch (filter->status & 0xf) { - case 0: - if (c >= 0 && c < 0x80) { /* latin */ - if (filter->from->no_encoding == mbfl_no_encoding_eucjp2004) { - CK((*filter->output_function)(c, filter->data)); - } else if (filter->from->no_encoding == mbfl_no_encoding_sjis2004) { - if (c == 0x5c) { - CK((*filter->output_function)(0x00a5, filter->data)); - } else if (c == 0x7e) { - CK((*filter->output_function)(0x203e, filter->data)); - } else { - CK((*filter->output_function)(c, filter->data)); - } - } else { /* ISO-2022-JP-2004 */ - if (c == 0x1b) { - filter->status += 6; - } else if ((filter->status == 0x80 || filter->status == 0x90 || filter->status == 0xa0) - && c > 0x20 && c < 0x7f) { /* kanji first char */ - filter->cache = c; - if (filter->status == 0x90) { - filter->status += 1; /* JIS X 0213 plane 1 */ - } else if (filter->status == 0xa0) { - filter->status += 4; /* JIS X 0213 plane 2 */ - } else { - filter->status += 5; /* JIS X 0208 */ - } - } else { - CK((*filter->output_function)(c, filter->data)); - } - } - } else { - if (filter->from->no_encoding == mbfl_no_encoding_eucjp2004) { - if (c > 0xa0 && c < 0xff) { /* X 0213 plane 1 first char */ - filter->status = 1; - filter->cache = c; - } else if (c == 0x8e) { /* kana first char */ - filter->cache = 0x8E; /* So error will be reported if input is truncated right here */ - filter->status = 2; - } else if (c == 0x8f) { /* X 0213 plane 2 first char */ - filter->status = 3; - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - } else if (filter->from->no_encoding == mbfl_no_encoding_sjis2004) { - if (c > 0xa0 && c < 0xe0) { /* kana */ - CK((*filter->output_function)(0xfec0 + c, filter->data)); - } else if (c > 0x80 && c < 0xfd && c != 0xa0) { /* kanji first char */ - filter->status = 1; - filter->cache = c; - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - } - break; - - case 1: /* kanji second char */ - filter->status &= ~0xf; - c1 = filter->cache; - - if (filter->from->no_encoding == mbfl_no_encoding_eucjp2004) { - if (c > 0xa0 && c < 0xff) { - s1 = c1 - 0x80; - s2 = c - 0x80; - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - break; - } - } else if (filter->from->no_encoding == mbfl_no_encoding_sjis2004) { - if (c >= 0x40 && c <= 0xfc && c != 0x7f) { - SJIS_DECODE(c1, c, s1, s2); - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - break; - } - } else { /* ISO-2022-JP-2004 */ - if (c >= 0x21 && c <= 0x7E) { - s1 = c1; - s2 = c; - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - break; - } - } - w1 = (s1 << 8) | s2; - - /* conversion for combining characters */ - if ((w1 >= 0x2477 && w1 <= 0x2479) || (w1 >= 0x2479 && w1 <= 0x247B) || - (w1 >= 0x2577 && w1 <= 0x257E) || w1 == 0x2678 || w1 == 0x2B44 || - (w1 >= 0x2B48 && w1 <= 0x2B4F) || (w1 >= 0x2B65 && w1 <= 0x2B66)) { - k = mbfl_bisec_srch2(w1, jisx0213_u2_key, jisx0213_u2_tbl_len); - if (k >= 0) { - w = jisx0213_u2_tbl[2*k]; - CK((*filter->output_function)(w, filter->data)); - w = jisx0213_u2_tbl[2*k+1]; - } - } - - /* conversion for BMP */ - if (w <= 0) { - w1 = (s1 - 0x21)*94 + s2 - 0x21; - if (w1 >= 0 && w1 < jisx0213_ucs_table_size) { - w = jisx0213_ucs_table[w1]; - } - } - - /* conversion for CJK Unified Ideographs ext.B (U+2XXXX) */ - if (w <= 0) { - w1 = (s1 << 8) | s2; - k = mbfl_bisec_srch2(w1, jisx0213_jis_u5_key, jisx0213_u5_tbl_len); - if (k >= 0) { - w = jisx0213_jis_u5_tbl[k] + 0x20000; - } - } - - if (w <= 0) { - w = MBFL_BAD_INPUT; - } - CK((*filter->output_function)(w, filter->data)); - break; - - case 2: /* got 0x8e: EUC-JP-2004 kana */ - filter->status = 0; - if (c > 0xa0 && c < 0xe0) { - w = 0xfec0 + c; - CK((*filter->output_function)(w, filter->data)); - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - case 3: /* X 0213 plane 2 first char: EUC-JP-2004 (0x8f) */ - if (c == 0xA1 || (c >= 0xA3 && c <= 0xA5) || c == 0xA8 || (c >= 0xAC && c <= 0xAF) || (c >= 0xEE && c <= 0xFE)) { - filter->cache = c - 0x80; - filter->status++; - } else { - filter->status = 0; - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - case 4: /* X 0213 plane 2 second char: EUC-JP-2004, ISO-2022-JP-2004 */ - filter->status &= ~0xF; - c1 = filter->cache; - if (filter->from->no_encoding == mbfl_no_encoding_eucjp2004) { - c2 = c - 0x80; - } else { - c2 = c; - } - - if (c2 < 0x21 || c2 > 0x7E) { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - break; - } - - s1 = c1 - 0x21; - s2 = c2 - 0x21; - - if (((s1 >= 0 && s1 <= 4 && s1 != 1) || s1 == 7 || (s1 >= 11 && s1 <= 14) || - (s1 >= 77 && s1 < 94)) && s2 >= 0 && s2 < 94) { - /* calc offset from ku */ - for (k = 0; k < jisx0213_p2_ofst_len; k++) { - if (s1 == jisx0213_p2_ofst[k]) { - break; - } - } - k -= jisx0213_p2_ofst[k]; - - /* check for japanese chars in BMP */ - s = (s1 + 94 + k)*94 + s2; - ZEND_ASSERT(s < jisx0213_ucs_table_size); - w = jisx0213_ucs_table[s]; - - /* check for japanese chars in CJK Unified Ideographs ext.B (U+2XXXX) */ - if (w <= 0) { - w1 = ((c1 + k + 94) << 8) | c2; - k = mbfl_bisec_srch2(w1, jisx0213_jis_u5_key, jisx0213_u5_tbl_len); - if (k >= 0) { - w = jisx0213_jis_u5_tbl[k] + 0x20000; - } - } - - if (w <= 0) { - w = MBFL_BAD_INPUT; - } - - CK((*filter->output_function)(w, filter->data)); - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - case 5: /* X 0208: ISO-2022-JP-2004 */ - filter->status &= ~0xf; - c1 = filter->cache; - if (c > 0x20 && c < 0x7f) { - s = (c1 - 0x21)*94 + c - 0x21; - if (s >= 0 && s < jisx0208_ucs_table_size) { - w = jisx0208_ucs_table[s]; - } - } - - if (w <= 0) { - w = MBFL_BAD_INPUT; - } - - CK((*filter->output_function)(w, filter->data)); - break; - - /* ESC: ISO-2022-JP-2004 */ -/* case 0x06: */ -/* case 0x16: */ -/* case 0x26: */ -/* case 0x86: */ -/* case 0x96: */ -/* case 0xa6: */ - case 6: - if (c == '$') { - filter->status++; - } else if (c == '(') { - filter->status += 3; - } else { - filter->status &= ~0xf; - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - /* ESC $: ISO-2022-JP-2004 */ -/* case 0x07: */ -/* case 0x17: */ -/* case 0x27: */ -/* case 0x87: */ -/* case 0x97: */ -/* case 0xa7: */ - case 7: - if (c == 'B') { /* JIS X 0208-1983 */ - filter->status = 0x80; - } else if (c == '(') { - filter->status++; - } else { - filter->status &= ~0xf; - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - /* ESC $ (: ISO-2022-JP-2004 */ -/* case 0x08: */ -/* case 0x18: */ -/* case 0x28: */ -/* case 0x88: */ -/* case 0x98: */ -/* case 0xa8: */ - case 8: - if (c == 'Q') { /* JIS X 0213 plane 1 */ - filter->status = 0x90; - } else if (c == 'P') { /* JIS X 0213 plane 2 */ - filter->status = 0xa0; - } else { - filter->status &= ~0xf; - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - /* ESC (: ISO-2022-JP-2004 */ -/* case 0x09: */ -/* case 0x19: */ -/* case 0x29: */ -/* case 0x89: */ -/* case 0x99: */ - case 9: - if (c == 'B') { - filter->status = 0; - } else { - filter->status &= ~0xf; - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - EMPTY_SWITCH_DEFAULT_CASE(); - } - - return 0; -} - -int mbfl_filt_conv_jis2004_wchar_flush(mbfl_convert_filter *filter) -{ - if (filter->status & 0xF) { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - filter->status = 0; - - if (filter->flush_function) { - return (*filter->flush_function)(filter->data); - } - - return 0; -} - -int mbfl_filt_conv_wchar_jis2004(int c, mbfl_convert_filter *filter) -{ - int k; - int c1, c2, s1, s2; - -retry: - s1 = 0; - /* check for 1st char of combining characters */ - if ((filter->status & 0xf) == 0 && ( - c == 0x00E6 || - (c >= 0x0254 && c <= 0x02E9) || - (c >= 0x304B && c <= 0x3053) || - (c >= 0x30AB && c <= 0x30C8) || - c == 0x31F7)) { - for (k = 0; k < jisx0213_u2_tbl_len; k++) { - if (c == jisx0213_u2_tbl[2*k]) { - filter->status++; - filter->cache = k; - return 0; - } - } - } - - /* check for 2nd char of combining characters */ - if ((filter->status & 0xf) == 1 && filter->cache >= 0 && filter->cache < jisx0213_u2_tbl_len) { - k = filter->cache; - filter->status &= ~0xf; - filter->cache = 0; - - c1 = jisx0213_u2_tbl[2*k]; - if ((c1 == 0x0254 || c1 == 0x028C || c1 == 0x0259 || c1 == 0x025A) && c == 0x0301) { - k++; - } - if (c == jisx0213_u2_tbl[2*k+1]) { - s1 = jisx0213_u2_key[k]; - } else { /* fallback */ - s1 = jisx0213_u2_fb_tbl[k]; - - if (filter->to->no_encoding == mbfl_no_encoding_sjis2004) { - c1 = (s1 >> 8) & 0xff; - c2 = s1 & 0xff; - SJIS_ENCODE(c1, c2, s1, s2); - } else if (filter->to->no_encoding == mbfl_no_encoding_eucjp2004) { - s2 = (s1 & 0xff) + 0x80; - s1 = ((s1 >> 8) & 0xff) + 0x80; - } else { - if (filter->status != 0x200) { - CK((*filter->output_function)(0x1b, filter->data)); - CK((*filter->output_function)('$', filter->data)); - CK((*filter->output_function)('(', filter->data)); - CK((*filter->output_function)('Q', filter->data)); - } - filter->status = 0x200; - - s2 = s1 & 0x7f; - s1 = (s1 >> 8) & 0x7f; - } - - /* Flush out cached data */ - CK((*filter->output_function)(s1, filter->data)); - CK((*filter->output_function)(s2, filter->data)); - goto retry; - } - } - - /* check for major japanese chars: U+4E00 - U+9FFF */ - if (s1 <= 0) { - for (k = 0; k < uni2jis_tbl_len; k++) { - if (c >= uni2jis_tbl_range[k][0] && c <= uni2jis_tbl_range[k][1]) { - s1 = uni2jis_tbl[k][c-uni2jis_tbl_range[k][0]]; - break; - } - } - } - - /* check for japanese chars in compressed mapping area: U+1E00 - U+4DBF */ - if (s1 <= 0 && c >= ucs_c1_jisx0213_min && c <= ucs_c1_jisx0213_max) { - k = mbfl_bisec_srch(c, ucs_c1_jisx0213_tbl, ucs_c1_jisx0213_tbl_len); - if (k >= 0) { - s1 = ucs_c1_jisx0213_ofst[k] + c - ucs_c1_jisx0213_tbl[2*k]; - } - } - - /* check for japanese chars in CJK Unified Ideographs ext.B (U+2XXXX) */ - if (s1 <= 0 && c >= jisx0213_u5_tbl_min && c <= jisx0213_u5_tbl_max) { - k = mbfl_bisec_srch2(c - 0x20000, jisx0213_u5_jis_key, jisx0213_u5_tbl_len); - if (k >= 0) { - s1 = jisx0213_u5_jis_tbl[k]; - } - } - - if (s1 <= 0) { - /* CJK Compatibility Forms: U+FE30 - U+FE4F */ - if (c == 0xfe45) { - s1 = 0x233e; - } else if (c == 0xfe46) { - s1 = 0x233d; - } else if (c >= 0xf91d && c <= 0xf9dc) { - /* CJK Compatibility Ideographs: U+F900 - U+F92A */ - k = mbfl_bisec_srch2(c, ucs_r2b_jisx0213_cmap_key, ucs_r2b_jisx0213_cmap_len); - if (k >= 0) { - s1 = ucs_r2b_jisx0213_cmap_val[k]; - } - } - } - - if (s1 <= 0) { - if (c == 0) { - s1 = 0; - } else { - s1 = -1; - } - } - - if (s1 >= 0) { - if (s1 < 0x80) { /* ASCII */ - if (filter->to->no_encoding == mbfl_no_encoding_2022jp_2004 && (filter->status & 0xff00)) { - CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ - CK((*filter->output_function)('(', filter->data)); - CK((*filter->output_function)('B', filter->data)); - } - filter->status = 0; - CK((*filter->output_function)(s1, filter->data)); - } else if (s1 < 0x100) { /* latin or kana */ - if (filter->to->no_encoding == mbfl_no_encoding_eucjp2004) { - CK((*filter->output_function)(0x8e, filter->data)); - CK((*filter->output_function)(s1, filter->data)); - } else if (filter->to->no_encoding == mbfl_no_encoding_sjis2004 && (s1 >= 0xA1 && s1 <= 0xDF)) { - CK((*filter->output_function)(s1, filter->data)); - } else { - CK(mbfl_filt_conv_illegal_output(c, filter)); - } - } else if (s1 < 0x7f00) { /* X 0213 plane 1 */ - if (filter->to->no_encoding == mbfl_no_encoding_sjis2004) { - c1 = (s1 >> 8) & 0xff; - c2 = s1 & 0xff; - SJIS_ENCODE(c1, c2, s1, s2); - } else if (filter->to->no_encoding == mbfl_no_encoding_eucjp2004) { - s2 = (s1 & 0xff) + 0x80; - s1 = ((s1 >> 8) & 0xff) + 0x80; - } else { - if ((filter->status & 0xff00) != 0x200) { - CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ - CK((*filter->output_function)('$', filter->data)); - CK((*filter->output_function)('(', filter->data)); - CK((*filter->output_function)('Q', filter->data)); - } - filter->status = 0x200; - s2 = s1 & 0xff; - s1 = (s1 >> 8) & 0xff; - } - CK((*filter->output_function)(s1, filter->data)); - CK((*filter->output_function)(s2, filter->data)); - } else { /* X 0213 plane 2 */ - if (filter->to->no_encoding == mbfl_no_encoding_sjis2004) { - c1 = (s1 >> 8) & 0xff; - c2 = s1 & 0xff; - SJIS_ENCODE(c1, c2, s1, s2); - } else { - s2 = s1 & 0xff; - k = ((s1 >> 8) & 0xff) - 0x7f; - if (k >= 0 && k < jisx0213_p2_ofst_len) { - s1 = jisx0213_p2_ofst[k] + 0x21; - } - if (filter->to->no_encoding == mbfl_no_encoding_eucjp2004) { - s2 |= 0x80; - s1 |= 0x80; - CK((*filter->output_function)(0x8f, filter->data)); - } else { - if ((filter->status & 0xff00) != 0x200) { - CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ - CK((*filter->output_function)('$', filter->data)); - CK((*filter->output_function)('(', filter->data)); - CK((*filter->output_function)('P', filter->data)); - } - filter->status = 0x200; - } - } - - CK((*filter->output_function)(s1, filter->data)); - CK((*filter->output_function)(s2, filter->data)); - } - } else { - CK(mbfl_filt_conv_illegal_output(c, filter)); - } - - return 0; -} - -int mbfl_filt_conv_wchar_jis2004_flush(mbfl_convert_filter *filter) -{ - int k, c1, c2, s1, s2; - - k = filter->cache; - filter->cache = 0; - - if (filter->status == 1 && k >= 0 && k <= jisx0213_u2_tbl_len) { - s1 = jisx0213_u2_fb_tbl[k]; - - if (filter->to->no_encoding == mbfl_no_encoding_sjis2004) { - c1 = (s1 >> 8) & 0xff; - c2 = s1 & 0xff; - SJIS_ENCODE(c1, c2, s1, s2); - } else if (filter->to->no_encoding == mbfl_no_encoding_eucjp2004) { - s2 = (s1 & 0xff) | 0x80; - s1 = ((s1 >> 8) & 0xff) | 0x80; - } else { - s2 = s1 & 0x7f; - s1 = (s1 >> 8) & 0x7f; - if ((filter->status & 0xff00) != 0x200) { - CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ - CK((*filter->output_function)('$', filter->data)); - CK((*filter->output_function)('(', filter->data)); - CK((*filter->output_function)('Q', filter->data)); - } - filter->status = 0x200; - } - - CK((*filter->output_function)(s1, filter->data)); - CK((*filter->output_function)(s2, filter->data)); - } - - /* If we had switched to a different charset, go back to ASCII mode - * This makes it possible to concatenate arbitrary valid strings - * together and get a valid string */ - if (filter->status & 0xff00) { - CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ - CK((*filter->output_function)('(', filter->data)); - CK((*filter->output_function)('B', filter->data)); - } - - filter->status = 0; - - if (filter->flush_function) { - return (*filter->flush_function)(filter->data); - } - - return 0; -} - -static size_t mb_sjis2004_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) -{ - unsigned char *p = *in, *e = p + *in_len; - uint32_t *out = buf, *limit = buf + bufsize - 1; - - while (p < e && out < limit) { - unsigned char c = *p++; - - if (c <= 0x7F) { - if (c == 0x5C) { - *out++ = 0xA5; - } else if (c == 0x7E) { - *out++ = 0x203E; - } else { - *out++ = c; - } - } else if (c >= 0xA1 && c <= 0xDF) { - *out++ = 0xFEC0 + c; - } else if (c > 0x80 && c < 0xFD && c != 0xA0) { - if (p == e) { - *out++ = MBFL_BAD_INPUT; - break; - } - unsigned char c2 = *p++; - - if (c2 < 0x40 || c2 > 0xFC || c2 == 0x7F) { - *out++ = MBFL_BAD_INPUT; - continue; - } - - unsigned int s1, s2; - SJIS_DECODE(c, c2, s1, s2); - unsigned int w1 = (s1 << 8) | s2, w = 0; - - /* Conversion for combining characters */ - if ((w1 >= 0x2477 && w1 <= 0x2479) || (w1 >= 0x2479 && w1 <= 0x247B) || (w1 >= 0x2577 && w1 <= 0x257E) || w1 == 0x2678 || w1 == 0x2B44 || (w1 >= 0x2B48 && w1 <= 0x2B4F) || (w1 >= 0x2B65 && w1 <= 0x2B66)) { - int k = mbfl_bisec_srch2(w1, jisx0213_u2_key, jisx0213_u2_tbl_len); - if (k >= 0) { - *out++ = jisx0213_u2_tbl[2*k]; - *out++ = jisx0213_u2_tbl[2*k+1]; - continue; - } - } - - /* Conversion for BMP */ - w1 = (s1 - 0x21)*94 + s2 - 0x21; - if (w1 < jisx0213_ucs_table_size) { - w = jisx0213_ucs_table[w1]; - } - - /* Conversion for CJK Unified Ideographs extension B (U+2XXXX) */ - if (!w) { - w1 = (s1 << 8) | s2; - int k = mbfl_bisec_srch2(w1, jisx0213_jis_u5_key, jisx0213_u5_tbl_len); - if (k >= 0) { - w = jisx0213_jis_u5_tbl[k] + 0x20000; - } - } - - *out++ = w ? w : MBFL_BAD_INPUT; - } else { - *out++ = MBFL_BAD_INPUT; - } - } - - *in_len = e - p; - *in = p; - return out - buf; -} - -static void mb_wchar_to_sjis2004(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) -{ - unsigned char *out, *limit; - MB_CONVERT_BUF_LOAD(buf, out, limit); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len); - - uint32_t w; - if (buf->state) { - w = buf->state; - buf->state = 0; - goto process_codepoint; - } - - while (len--) { - w = *in++; -process_codepoint: ; - unsigned int s = 0; - - if (w == 0xE6 || (w >= 0x254 && w <= 0x2E9) || (w >= 0x304B && w <= 0x3053) || (w >= 0x30AB && w <= 0x30C8) || w == 0x31F7) { - for (int k = 0; k < jisx0213_u2_tbl_len; k++) { - if (w == jisx0213_u2_tbl[2*k]) { - if (!len) { - if (!end) { - buf->state = w; - MB_CONVERT_BUF_STORE(buf, out, limit); - return; - } - } else { - uint32_t w2 = *in++; len--; - if ((w == 0x254 || w == 0x28C || w == 0x259 || w == 0x25A) && w2 == 0x301) { - k++; - } - if (w2 == jisx0213_u2_tbl[2*k+1]) { - s = jisx0213_u2_key[k]; - break; - } - in--; len++; - } - - /* Fallback */ - s = jisx0213_u2_fb_tbl[k]; - break; - } - } - } - - /* Check for major Japanese chars: U+4E00-U+9FFF */ - if (!s) { - for (int k = 0; k < uni2jis_tbl_len; k++) { - if (w >= uni2jis_tbl_range[k][0] && w <= uni2jis_tbl_range[k][1]) { - s = uni2jis_tbl[k][w - uni2jis_tbl_range[k][0]]; - break; - } - } - } - - /* Check for Japanese chars in compressed mapping area: U+1E00-U+4DBF */ - if (!s && w >= ucs_c1_jisx0213_min && w <= ucs_c1_jisx0213_max) { - int k = mbfl_bisec_srch(w, ucs_c1_jisx0213_tbl, ucs_c1_jisx0213_tbl_len); - if (k >= 0) { - s = ucs_c1_jisx0213_ofst[k] + w - ucs_c1_jisx0213_tbl[2*k]; - } - } - - /* Check for Japanese chars in CJK Unified Ideographs ext.B (U+2XXXX) */ - if (!s && w >= jisx0213_u5_tbl_min && w <= jisx0213_u5_tbl_max) { - int k = mbfl_bisec_srch2(w - 0x20000, jisx0213_u5_jis_key, jisx0213_u5_tbl_len); - if (k >= 0) { - s = jisx0213_u5_jis_tbl[k]; - } - } - - if (!s) { - /* CJK Compatibility Forms: U+FE30-U+FE4F */ - if (w == 0xFE45) { - s = 0x233E; - } else if (w == 0xFE46) { - s = 0x233D; - } else if (w >= 0xF91D && w <= 0xF9DC) { - /* CJK Compatibility Ideographs: U+F900-U+F92A */ - int k = mbfl_bisec_srch2(w, ucs_r2b_jisx0213_cmap_key, ucs_r2b_jisx0213_cmap_len); - if (k >= 0) { - s = ucs_r2b_jisx0213_cmap_val[k]; - } - } - } - - if (!s && w) { - MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_sjis2004); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len); - } else if (s <= 0xFF) { - out = mb_convert_buf_add(out, s); - } else { - unsigned int c1 = (s >> 8) & 0xFF, c2 = s & 0xFF, s1, s2; - SJIS_ENCODE(c1, c2, s1, s2); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2); - out = mb_convert_buf_add2(out, s1, s2); - } - } - - MB_CONVERT_BUF_STORE(buf, out, limit); -} - -static size_t mb_eucjp2004_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) -{ - unsigned char *p = *in, *e = p + *in_len; - uint32_t *out = buf, *limit = buf + bufsize - 1; - - while (p < e && out < limit) { - unsigned char c = *p++; - - if (c <= 0x7F) { - *out++ = c; - } else if (c >= 0xA1 && c <= 0xFE) { - /* Kanji */ - if (p == e) { - *out++ = MBFL_BAD_INPUT; - break; - } - unsigned char c2 = *p++; - if (c2 <= 0xA0 || c2 == 0xFF) { - *out++ = MBFL_BAD_INPUT; - continue; - } - - unsigned int s1 = c - 0x80, s2 = c2 - 0x80; - unsigned int w1 = (s1 << 8) | s2, w = 0; - - /* Conversion for combining characters */ - if ((w1 >= 0x2477 && w1 <= 0x2479) || (w1 >= 0x2479 && w1 <= 0x247B) || (w1 >= 0x2577 && w1 <= 0x257E) || w1 == 0x2678 || w1 == 0x2B44 || (w1 >= 0x2B48 && w1 <= 0x2B4F) || (w1 >= 0x2B65 && w1 <= 0x2B66)) { - int k = mbfl_bisec_srch2(w1, jisx0213_u2_key, jisx0213_u2_tbl_len); - if (k >= 0) { - *out++ = jisx0213_u2_tbl[2*k]; - *out++ = jisx0213_u2_tbl[2*k+1]; - continue; - } - } - - /* Conversion for BMP */ - w1 = (s1 - 0x21)*94 + s2 - 0x21; - if (w1 < jisx0213_ucs_table_size) { - w = jisx0213_ucs_table[w1]; - } - - /* Conversion for CJK Unified Ideographs ext.B (U+2XXXX) */ - if (!w) { - w1 = (s1 << 8) | s2; - int k = mbfl_bisec_srch2(w1, jisx0213_jis_u5_key, jisx0213_u5_tbl_len); - if (k >= 0) { - w = jisx0213_jis_u5_tbl[k] + 0x20000; - } - } - - *out++ = w ? w : MBFL_BAD_INPUT; - } else if (c == 0x8E && p < e) { - /* Kana */ - unsigned char c2 = *p++; - if (c2 >= 0xA1 && c2 <= 0xDF) { - *out++ = 0xFEC0 + c2; - } else { - *out++ = MBFL_BAD_INPUT; - } - } else if (c == 0x8F && p < e) { - unsigned char c2 = *p++; - if ((c2 == 0xA1 || (c2 >= 0xA3 && c2 <= 0xA5) || c2 == 0xA8 || (c2 >= 0xAC && c2 <= 0xAF) || (c2 >= 0xEE && c2 <= 0xFE)) && p < e) { - unsigned char c3 = *p++; - - if (c3 < 0xA1 || c3 == 0xFF) { - *out++ = MBFL_BAD_INPUT; - continue; - } - - unsigned int s1 = c2 - 0xA1, s2 = c3 - 0xA1; - - if (((s1 <= 4 && s1 != 1) || s1 == 7 || (s1 >= 11 && s1 <= 14) || (s1 >= 77 && s1 < 94)) && s2 < 94) { - int k; - for (k = 0; k < jisx0213_p2_ofst_len; k++) { - if (s1 == jisx0213_p2_ofst[k]) { - break; - } - } - k -= jisx0213_p2_ofst[k]; - - /* Check for Japanese chars in BMP */ - unsigned int s = (s1 + 94 + k)*94 + s2; - ZEND_ASSERT(s < jisx0213_ucs_table_size); - unsigned int w = jisx0213_ucs_table[s]; - - /* Check for Japanese chars in CJK Unified Ideographs ext B (U+2XXXX) */ - if (!w) { - k = mbfl_bisec_srch2(((c2 - 0x80 + k + 94) << 8) | (c3 - 0x80), jisx0213_jis_u5_key, jisx0213_u5_tbl_len); - if (k >= 0) { - w = jisx0213_jis_u5_tbl[k] + 0x20000; - } - } - - *out++ = w ? w : MBFL_BAD_INPUT; - } else { - *out++ = MBFL_BAD_INPUT; - } - } else { - *out++ = MBFL_BAD_INPUT; - } - } else { - *out++ = MBFL_BAD_INPUT; - } - } - - *in_len = e - p; - *in = p; - return out - buf; -} - -static void mb_wchar_to_eucjp2004(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) -{ - unsigned char *out, *limit; - MB_CONVERT_BUF_LOAD(buf, out, limit); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len); - - uint32_t w; - if (buf->state) { - w = buf->state; - buf->state = 0; - goto process_codepoint; - } - - while (len--) { - w = *in++; -process_codepoint: ; - unsigned int s = 0; - - /* Check for 1st char of combining characters */ - if (w == 0xE6 || (w >= 0x254 && w <= 0x2E9) || (w >= 0x304B && w <= 0x3053) || (w >= 0x30AB && w <= 0x30C8) || w == 0x31F7) { - for (int k = 0; k < jisx0213_u2_tbl_len; k++) { - if (w == jisx0213_u2_tbl[2*k]) { - if (!len) { - if (!end) { - buf->state = w; - MB_CONVERT_BUF_STORE(buf, out, limit); - return; - } - } else { - uint32_t w2 = *in++; len--; - if ((w == 0x254 || w == 0x28C || w == 0x259 || w == 0x25A) && w2 == 0x301) { - k++; - } - if (w2 == jisx0213_u2_tbl[2*k+1]) { - s = jisx0213_u2_key[k]; - break; - } - in--; len++; - } - - /* Fallback */ - s = jisx0213_u2_fb_tbl[k]; - break; - } - } - } - - /* Check for major Japanese chars: U+4E00-U+9FFF */ - if (!s) { - for (int k = 0; k < uni2jis_tbl_len; k++) { - if (w >= uni2jis_tbl_range[k][0] && w <= uni2jis_tbl_range[k][1]) { - s = uni2jis_tbl[k][w - uni2jis_tbl_range[k][0]]; - break; - } - } - } - - /* Check for Japanese chars in compressed mapping area: U+1E00-U+4DBF */ - if (!s && w >= ucs_c1_jisx0213_min && w <= ucs_c1_jisx0213_max) { - int k = mbfl_bisec_srch(w, ucs_c1_jisx0213_tbl, ucs_c1_jisx0213_tbl_len); - if (k >= 0) { - s = ucs_c1_jisx0213_ofst[k] + w - ucs_c1_jisx0213_tbl[2*k]; - } - } - - /* Check for Japanese chars in CJK Unified Ideographs ext.B (U+2XXXX) */ - if (!s && w >= jisx0213_u5_tbl_min && w <= jisx0213_u5_tbl_max) { - int k = mbfl_bisec_srch2(w - 0x20000, jisx0213_u5_jis_key, jisx0213_u5_tbl_len); - if (k >= 0) { - s = jisx0213_u5_jis_tbl[k]; - } - } - - if (!s) { - /* CJK Compatibility Forms: U+FE30-U+FE4F */ - if (w == 0xFE45) { - s = 0x233E; - } else if (w == 0xFE46) { - s = 0x233D; - } else if (w >= 0xF91D && w <= 0xF9DC) { - /* CJK Compatibility Ideographs: U+F900-U+F92A */ - int k = mbfl_bisec_srch2(w, ucs_r2b_jisx0213_cmap_key, ucs_r2b_jisx0213_cmap_len); - if (k >= 0) { - s = ucs_r2b_jisx0213_cmap_val[k]; - } - } - } - - if (!s && w) { - MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_eucjp2004); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len); - } else if (s <= 0x7F) { - out = mb_convert_buf_add(out, s); - } else if (s <= 0xFF) { - MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2); - out = mb_convert_buf_add2(out, 0x8E, s); - } else if (s <= 0x7EFF) { - MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2); - out = mb_convert_buf_add2(out, ((s >> 8) & 0xFF) + 0x80, (s & 0xFF) + 0x80); - } else { - unsigned int s2 = s & 0xFF; - int k = ((s >> 8) & 0xFF) - 0x7F; - ZEND_ASSERT(k < jisx0213_p2_ofst_len); - s = jisx0213_p2_ofst[k] + 0x21; - MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 3); - out = mb_convert_buf_add3(out, 0x8F, s | 0x80, s2 | 0x80); - } - } - - MB_CONVERT_BUF_STORE(buf, out, limit); -} - -#define ASCII 0 -#define JISX0208 1 -#define JISX0213_PLANE1 2 -#define JISX0213_PLANE2 3 - -static size_t mb_iso2022jp2004_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) -{ - unsigned char *p = *in, *e = p + *in_len; - uint32_t *out = buf, *limit = buf + bufsize - 1; - - while (p < e && out < limit) { - unsigned char c = *p++; - - if (c <= 0x7F) { - if (c == 0x1B) { - if ((e - p) < 2) { - *out++ = MBFL_BAD_INPUT; - p = e; - break; - } - unsigned char c2 = *p++; - unsigned char c3 = *p++; - if (c2 == '$') { - if (c3 == 'B') { - *state = JISX0208; - } else if (c3 == '(') { - if (p == e) { - *out++ = MBFL_BAD_INPUT; - break; - } - unsigned char c4 = *p++; - if (c4 == 'Q') { - *state = JISX0213_PLANE1; - } else if (c4 == 'P') { - *state = JISX0213_PLANE2; - } else { - *out++ = MBFL_BAD_INPUT; - } - } else { - *out++ = MBFL_BAD_INPUT; - } - } else if (c2 == '(') { - if (c3 == 'B') { - *state = ASCII; - } else { - *out++ = MBFL_BAD_INPUT; - } - } else { - p--; - *out++ = MBFL_BAD_INPUT; - } - } else if (*state >= JISX0208 && c > 0x20 && c < 0x7F) { - if (p == e) { - *out++ = MBFL_BAD_INPUT; - break; - } - unsigned char c2 = *p++; - if (c2 < 0x21 || c2 > 0x7E) { - *out++ = MBFL_BAD_INPUT; - continue; - } - - if (*state == JISX0213_PLANE1) { - unsigned int w1 = (c << 8) | c2; - - /* Conversion for combining characters */ - if ((w1 >= 0x2477 && w1 <= 0x2479) || (w1 >= 0x2479 && w1 <= 0x247B) || (w1 >= 0x2577 && w1 <= 0x257E) || w1 == 0x2678 || w1 == 0x2B44 || (w1 >= 0x2B48 && w1 <= 0x2B4F) || (w1 >= 0x2B65 && w1 <= 0x2B66)) { - int k = mbfl_bisec_srch2(w1, jisx0213_u2_key, jisx0213_u2_tbl_len); - if (k >= 0) { - *out++ = jisx0213_u2_tbl[2*k]; - *out++ = jisx0213_u2_tbl[2*k+1]; - continue; - } - } - - /* Conversion for BMP */ - uint32_t w = 0; - w1 = (c - 0x21)*94 + c2 - 0x21; - if (w1 < jisx0213_ucs_table_size) { - w = jisx0213_ucs_table[w1]; - } - - /* Conversion for CJK Unified Ideographs ext.B (U+2XXXX) */ - if (!w) { - int k = mbfl_bisec_srch2((c << 8) | c2, jisx0213_jis_u5_key, jisx0213_u5_tbl_len); - if (k >= 0) { - w = jisx0213_jis_u5_tbl[k] + 0x20000; - } - } - - *out++ = w ? w : MBFL_BAD_INPUT; - } else if (*state == JISX0213_PLANE2) { - - unsigned int s1 = c - 0x21, s2 = c2 - 0x21; - - if (((s1 <= 4 && s1 != 1) || s1 == 7 || (s1 >= 11 && s1 <= 14) || (s1 >= 77 && s1 < 94)) && s2 < 94) { - int k; - for (k = 0; k < jisx0213_p2_ofst_len; k++) { - if (s1 == jisx0213_p2_ofst[k]) { - break; - } - } - k -= jisx0213_p2_ofst[k]; - - /* Check for Japanese chars in BMP */ - unsigned int s = (s1 + 94 + k)*94 + s2; - ZEND_ASSERT(s < jisx0213_ucs_table_size); - uint32_t w = jisx0213_ucs_table[s]; - - /* Check for Japanese chars in CJK Unified Ideographs ext B (U+2XXXX) */ - if (!w) { - k = mbfl_bisec_srch2(((c + k + 94) << 8) | c2, jisx0213_jis_u5_key, jisx0213_u5_tbl_len); - if (k >= 0) { - w = jisx0213_jis_u5_tbl[k] + 0x20000; - } - } - - *out++ = w ? w : MBFL_BAD_INPUT; - } else { - *out++ = MBFL_BAD_INPUT; - } - } else { /* state == JISX0208 */ - unsigned int s = (c - 0x21)*94 + c2 - 0x21; - uint32_t w = 0; - if (s < jisx0208_ucs_table_size) { - w = jisx0208_ucs_table[s]; - } - *out++ = w ? w : MBFL_BAD_INPUT; - } - } else { - *out++ = c; - } - } else { - *out++ = MBFL_BAD_INPUT; - } - } - - *in_len = e - p; - *in = p; - return out - buf; -} - -static void mb_wchar_to_iso2022jp2004(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) -{ - unsigned char *out, *limit; - MB_CONVERT_BUF_LOAD(buf, out, limit); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len); - - uint32_t w; - if (buf->state & 0xFF00) { - int k = (buf->state >> 8) - 1; - w = jisx0213_u2_tbl[2*k]; - buf->state &= 0xFF; - goto process_codepoint; - } - - while (len--) { - w = *in++; -process_codepoint: ; - unsigned int s = 0; - - if (w == 0xE6 || (w >= 0x254 && w <= 0x2E9) || (w >= 0x304B && w <= 0x3053) || (w >= 0x30AB && w <= 0x30C8) || w == 0x31F7) { - for (int k = 0; k < jisx0213_u2_tbl_len; k++) { - if (w == jisx0213_u2_tbl[2*k]) { - if (!len) { - if (!end) { - buf->state |= (k+1) << 8; - MB_CONVERT_BUF_STORE(buf, out, limit); - return; - } - } else { - uint32_t w2 = *in++; len--; - if ((w == 0x254 || w == 0x28C || w == 0x259 || w == 0x25A) && w2 == 0x301) { - k++; - } - if (w2 == jisx0213_u2_tbl[2*k+1]) { - s = jisx0213_u2_key[k]; - break; - } - in--; len++; - } - - s = jisx0213_u2_fb_tbl[k]; - break; - } - } - } - - /* Check for major Japanese chars: U+4E00-U+9FFF */ - if (!s) { - for (int k = 0; k < uni2jis_tbl_len; k++) { - if (w >= uni2jis_tbl_range[k][0] && w <= uni2jis_tbl_range[k][1]) { - s = uni2jis_tbl[k][w - uni2jis_tbl_range[k][0]]; - break; - } - } - } - - /* Check for Japanese chars in compressed mapping area: U+1E00-U+4DBF */ - if (!s && w >= ucs_c1_jisx0213_min && w <= ucs_c1_jisx0213_max) { - int k = mbfl_bisec_srch(w, ucs_c1_jisx0213_tbl, ucs_c1_jisx0213_tbl_len); - if (k >= 0) { - s = ucs_c1_jisx0213_ofst[k] + w - ucs_c1_jisx0213_tbl[2*k]; - } - } - - /* Check for Japanese chars in CJK Unified Ideographs ext B (U+2XXXX) */ - if (!s && w >= jisx0213_u5_tbl_min && w <= jisx0213_u5_tbl_max) { - int k = mbfl_bisec_srch2(w - 0x20000, jisx0213_u5_jis_key, jisx0213_u5_tbl_len); - if (k >= 0) { - s = jisx0213_u5_jis_tbl[k]; - } - } - - if (!s) { - /* CJK Compatibility Forms: U+FE30-U+FE4F */ - if (w == 0xFE45) { - s = 0x233E; - } else if (w == 0xFE46) { - s = 0x233D; - } else if (w >= 0xF91D && w <= 0xF9DC) { - /* CJK Compatibility Ideographs: U+F900-U+F92A */ - int k = mbfl_bisec_srch2(w, ucs_r2b_jisx0213_cmap_key, ucs_r2b_jisx0213_cmap_len); - if (k >= 0) { - s = ucs_r2b_jisx0213_cmap_val[k]; - } - } - } - - if (!s && w) { - MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_iso2022jp2004); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len); - } else if (s <= 0x7F) { - if (buf->state != ASCII) { - MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 4); - out = mb_convert_buf_add3(out, 0x1B, '(', 'B'); - buf->state = ASCII; - } - out = mb_convert_buf_add(out, s); - } else if (s <= 0xFF) { - MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_iso2022jp2004); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len); - } else if (s <= 0x7EFF) { - if (buf->state != JISX0213_PLANE1) { - MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 6); - out = mb_convert_buf_add4(out, 0x1B, '$', '(', 'Q'); - buf->state = JISX0213_PLANE1; - } else { - MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2); - } - out = mb_convert_buf_add2(out, (s >> 8) & 0xFF, s & 0xFF); - } else { - if (buf->state != JISX0213_PLANE2) { - MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 6); - out = mb_convert_buf_add4(out, 0x1B, '$', '(', 'P'); - buf->state = JISX0213_PLANE2; - } else { - MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2); - } - unsigned int s2 = s & 0xFF; - int k = ((s >> 8) & 0xFF) - 0x7F; - ZEND_ASSERT(k < jisx0213_p2_ofst_len); - s = jisx0213_p2_ofst[k] + 0x21; - out = mb_convert_buf_add2(out, s, s2); - } - } - - if (end && buf->state != ASCII) { - MB_CONVERT_BUF_ENSURE(buf, out, limit, 3); - out = mb_convert_buf_add3(out, 0x1B, '(', 'B'); - } - - MB_CONVERT_BUF_STORE(buf, out, limit); -} diff --git a/ext/mbstring/libmbfl/filters/mbfilter_sjis_2004.h b/ext/mbstring/libmbfl/filters/mbfilter_sjis_2004.h deleted file mode 100644 index 869fd145c1ce0..0000000000000 --- a/ext/mbstring/libmbfl/filters/mbfilter_sjis_2004.h +++ /dev/null @@ -1,49 +0,0 @@ -/* - * "streamable kanji code filter and converter" - * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. - * - * LICENSE NOTICES - * - * This file is part of "streamable kanji code filter and converter", - * which is distributed under the terms of GNU Lesser General Public - * License (version 2) as published by the Free Software Foundation. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with "streamable kanji code filter and converter"; - * if not, write to the Free Software Foundation, Inc., 59 Temple Place, - * Suite 330, Boston, MA 02111-1307 USA - * - * The author of this file: - * - */ -/* - * The source code included in this files was separated from mbfilter_sjis.c - * by rui hirokawa on 15 aug 2011. - * - */ - -#ifndef MBFL_MBFILTER_SJIS_2004_H -#define MBFL_MBFILTER_SJIS_2004_H - -#include "mbfilter.h" - -extern const mbfl_encoding mbfl_encoding_sjis2004; -extern const struct mbfl_convert_vtbl vtbl_sjis2004_wchar; -extern const struct mbfl_convert_vtbl vtbl_wchar_sjis2004; - -int mbfl_filt_conv_jis2004_wchar(int c, mbfl_convert_filter *filter); -int mbfl_filt_conv_wchar_jis2004(int c, mbfl_convert_filter *filter); - -int mbfl_filt_conv_wchar_jis2004_flush(mbfl_convert_filter *filter); -int mbfl_filt_conv_jis2004_wchar_flush(mbfl_convert_filter *filter); - -#endif /* MBFL_MBFILTER_SJIS_2004_H */ - -/* - * charset=UTF-8 - */ diff --git a/ext/mbstring/libmbfl/filters/mbfilter_sjis_mac.h b/ext/mbstring/libmbfl/filters/mbfilter_sjis_mac.h deleted file mode 100644 index 58d8eb2ab03e4..0000000000000 --- a/ext/mbstring/libmbfl/filters/mbfilter_sjis_mac.h +++ /dev/null @@ -1,39 +0,0 @@ -/* - * "streamable kanji code filter and converter" - * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. - * - * LICENSE NOTICES - * - * This file is part of "streamable kanji code filter and converter", - * which is distributed under the terms of GNU Lesser General Public - * License (version 2) as published by the Free Software Foundation. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with "streamable kanji code filter and converter"; - * if not, write to the Free Software Foundation, Inc., 59 Temple Place, - * Suite 330, Boston, MA 02111-1307 USA - * - * The author of this file: - * - */ -/* - * the source code included in this files was separated from mbfilter_sjis_open.c - * by Rui Hirokawa on 25 July 2011. - * - */ - -#ifndef MBFL_MBFILTER_SJIS_MAC_H -#define MBFL_MBFILTER_SJIS_MAC_H - -#include "mbfilter.h" - -extern const mbfl_encoding mbfl_encoding_sjis_mac; -extern const struct mbfl_convert_vtbl vtbl_sjis_mac_wchar; -extern const struct mbfl_convert_vtbl vtbl_wchar_sjis_mac; - -#endif /* MBFL_MBFILTER_SJIS_MAC_H */ diff --git a/ext/mbstring/libmbfl/filters/mbfilter_sjis_mobile.h b/ext/mbstring/libmbfl/filters/mbfilter_sjis_mobile.h deleted file mode 100644 index 6085e2b5a1266..0000000000000 --- a/ext/mbstring/libmbfl/filters/mbfilter_sjis_mobile.h +++ /dev/null @@ -1,64 +0,0 @@ -/* - * "streamable kanji code filter and converter" - * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. - * - * LICENSE NOTICES - * - * This file is part of "streamable kanji code filter and converter", - * which is distributed under the terms of GNU Lesser General Public - * License (version 2) as published by the Free Software Foundation. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with "streamable kanji code filter and converter"; - * if not, write to the Free Software Foundation, Inc., 59 Temple Place, - * Suite 330, Boston, MA 02111-1307 USA - * - * The author of this file: - * - */ -/* - * the source code included in this files was separated from mbfilter_sjis_open.c - * by Rui Hirokawa on 25 July 2011. - * - */ - -#ifndef MBFL_MBFILTER_SJIS_MOBILE_H -#define MBFL_MBFILTER_SJIS_MOBILE_H - -#include "mbfilter.h" - -extern const mbfl_encoding mbfl_encoding_sjis_docomo; -extern const mbfl_encoding mbfl_encoding_sjis_kddi; -extern const mbfl_encoding mbfl_encoding_sjis_sb; - -extern const struct mbfl_convert_vtbl vtbl_sjis_docomo_wchar; -extern const struct mbfl_convert_vtbl vtbl_wchar_sjis_docomo; -extern const struct mbfl_convert_vtbl vtbl_sjis_kddi_wchar; -extern const struct mbfl_convert_vtbl vtbl_wchar_sjis_kddi; -extern const struct mbfl_convert_vtbl vtbl_sjis_sb_wchar; -extern const struct mbfl_convert_vtbl vtbl_wchar_sjis_sb; - -extern const unsigned short mbfl_docomo2uni_pua[4][3]; -extern const unsigned short mbfl_kddi2uni_pua[7][3]; -extern const unsigned short mbfl_sb2uni_pua[6][3]; -extern const unsigned short mbfl_kddi2uni_pua_b[8][3]; - -int mbfl_filt_conv_sjis_mobile_flush(mbfl_convert_filter *filter); - -int mbfilter_sjis_emoji_docomo2unicode(int s, int *snd); -int mbfilter_sjis_emoji_kddi2unicode(int s, int *snd); -int mbfilter_sjis_emoji_sb2unicode(int s, int *snd); - -int mbfilter_unicode2sjis_emoji_docomo(int c, int *s1, mbfl_convert_filter *filter); -int mbfilter_unicode2sjis_emoji_kddi(int c, int *s1, mbfl_convert_filter *filter); -int mbfilter_unicode2sjis_emoji_sb(int c, int *s1, mbfl_convert_filter *filter); - -int mbfilter_conv_map_tbl(int c, int *w, const unsigned short map[][3], int n); -int mbfilter_conv_r_map_tbl(int c, int *w, const unsigned short map[][3], int n); - -#endif /* MBFL_MBFILTER_SJIS_MOBILE_H */ diff --git a/ext/mbstring/libmbfl/filters/mbfilter_uhc.c b/ext/mbstring/libmbfl/filters/mbfilter_uhc.c deleted file mode 100644 index 8d611adb5ac3e..0000000000000 --- a/ext/mbstring/libmbfl/filters/mbfilter_uhc.c +++ /dev/null @@ -1,297 +0,0 @@ -/* - * "streamable kanji code filter and converter" - * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. - * - * LICENSE NOTICES - * - * This file is part of "streamable kanji code filter and converter", - * which is distributed under the terms of GNU Lesser General Public - * License (version 2) as published by the Free Software Foundation. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with "streamable kanji code filter and converter"; - * if not, write to the Free Software Foundation, Inc., 59 Temple Place, - * Suite 330, Boston, MA 02111-1307 USA - * - * The author of this file: - * - */ -/* - * The source code included in this files was separated from mbfilter_kr.c - * by moriyoshi koizumi on 4 dec 2002. - * - */ - -/* UHC was introduced by MicroSoft in Windows 95, and is also known as CP949. - * It is the same as EUC-KR, but with 8,822 additional characters added to - * complete all the characters in the Johab charset. */ - -#include "mbfilter.h" -#include "mbfilter_uhc.h" -#define UNICODE_TABLE_UHC_DEF -#include "unicode_table_uhc.h" - -static int mbfl_filt_conv_uhc_wchar_flush(mbfl_convert_filter *filter); -static size_t mb_uhc_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state); -static void mb_wchar_to_uhc(uint32_t *in, size_t len, mb_convert_buf *buf, bool end); - -static const unsigned char mblen_table_uhc[] = { /* 0x81-0xFE */ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 -}; - -static const char *mbfl_encoding_uhc_aliases[] = {"CP949", NULL}; - -const mbfl_encoding mbfl_encoding_uhc = { - mbfl_no_encoding_uhc, - "UHC", - "UHC", - mbfl_encoding_uhc_aliases, - mblen_table_uhc, - 0, - &vtbl_uhc_wchar, - &vtbl_wchar_uhc, - mb_uhc_to_wchar, - mb_wchar_to_uhc, - NULL -}; - -const struct mbfl_convert_vtbl vtbl_uhc_wchar = { - mbfl_no_encoding_uhc, - mbfl_no_encoding_wchar, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_uhc_wchar, - mbfl_filt_conv_uhc_wchar_flush, - NULL, -}; - -const struct mbfl_convert_vtbl vtbl_wchar_uhc = { - mbfl_no_encoding_wchar, - mbfl_no_encoding_uhc, - mbfl_filt_conv_common_ctor, - NULL, - mbfl_filt_conv_wchar_uhc, - mbfl_filt_conv_common_flush, - NULL, -}; - -#define CK(statement) do { if ((statement) < 0) return (-1); } while (0) - -int mbfl_filt_conv_uhc_wchar(int c, mbfl_convert_filter *filter) -{ - switch (filter->status) { - case 0: - if (c >= 0 && c < 0x80) { /* latin */ - CK((*filter->output_function)(c, filter->data)); - } else if (c > 0x80 && c < 0xfe && c != 0xc9) { /* dbcs lead byte */ - filter->status = 1; - filter->cache = c; - } else { - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - break; - - case 1: /* dbcs second byte */ - filter->status = 0; - int c1 = filter->cache, w = 0; - - if (c1 >= 0x81 && c1 <= 0xc6 && c >= 0x41 && c <= 0xfe) { - w = (c1 - 0x81)*190 + (c - 0x41); - if (w >= 0 && w < uhc1_ucs_table_size) { - w = uhc1_ucs_table[w]; - } - } else if (c1 >= 0xc7 && c1 < 0xfe && c >= 0xa1 && c <= 0xfe) { - w = (c1 - 0xc7)*94 + (c - 0xa1); - if (w >= 0 && w < uhc3_ucs_table_size) { - w = uhc3_ucs_table[w]; - } - } - - if (w == 0) { - w = MBFL_BAD_INPUT; - } - CK((*filter->output_function)(w, filter->data)); - break; - - EMPTY_SWITCH_DEFAULT_CASE(); - } - - return 0; -} - -static int mbfl_filt_conv_uhc_wchar_flush(mbfl_convert_filter *filter) -{ - if (filter->status == 1) { - /* 2-byte character was truncated */ - filter->status = 0; - CK((*filter->output_function)(MBFL_BAD_INPUT, filter->data)); - } - - if (filter->flush_function) { - (*filter->flush_function)(filter->data); - } - - return 0; -} - -int mbfl_filt_conv_wchar_uhc(int c, mbfl_convert_filter *filter) -{ - int s = 0; - - if (c >= ucs_a1_uhc_table_min && c < ucs_a1_uhc_table_max) { - s = ucs_a1_uhc_table[c - ucs_a1_uhc_table_min]; - } else if (c >= ucs_a2_uhc_table_min && c < ucs_a2_uhc_table_max) { - s = ucs_a2_uhc_table[c - ucs_a2_uhc_table_min]; - } else if (c >= ucs_a3_uhc_table_min && c < ucs_a3_uhc_table_max) { - s = ucs_a3_uhc_table[c - ucs_a3_uhc_table_min]; - } else if (c >= ucs_i_uhc_table_min && c < ucs_i_uhc_table_max) { - s = ucs_i_uhc_table[c - ucs_i_uhc_table_min]; - } else if (c >= ucs_s_uhc_table_min && c < ucs_s_uhc_table_max) { - s = ucs_s_uhc_table[c - ucs_s_uhc_table_min]; - } else if (c >= ucs_r1_uhc_table_min && c < ucs_r1_uhc_table_max) { - s = ucs_r1_uhc_table[c - ucs_r1_uhc_table_min]; - } else if (c >= ucs_r2_uhc_table_min && c < ucs_r2_uhc_table_max) { - s = ucs_r2_uhc_table[c - ucs_r2_uhc_table_min]; - } - - if (s == 0 && c != 0) { - s = -1; - } - - if (s >= 0) { - if (s < 0x80) { /* latin */ - CK((*filter->output_function)(s, filter->data)); - } else { - CK((*filter->output_function)((s >> 8) & 0xff, filter->data)); - CK((*filter->output_function)(s & 0xff, filter->data)); - } - } else { - CK(mbfl_filt_conv_illegal_output(c, filter)); - } - - return 0; -} - -static size_t mb_uhc_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) -{ - unsigned char *p = *in, *e = p + *in_len; - uint32_t *out = buf, *limit = buf + bufsize; - - e--; /* Stop the main loop 1 byte short of the end of the input */ - - while (p < e && out < limit) { - unsigned char c = *p++; - - if (c < 0x80) { - *out++ = c; - } else if (c > 0x80 && c < 0xFE) { - /* We don't need to check p < e here; it's not possible that this pointer dereference - * will be outside the input string, because of e-- above */ - unsigned char c2 = *p++; - if (c2 < 0x41 || c2 == 0xFF) { - *out++ = MBFL_BAD_INPUT; - continue; - } - unsigned int w = 0; - - if (c <= 0xC6) { - w = (c - 0x81)*190 + c2 - 0x41; - ZEND_ASSERT(w < uhc1_ucs_table_size); - w = uhc1_ucs_table[w]; - } else if (c2 >= 0xA1) { - w = (c - 0xC7)*94 + c2 - 0xA1; - ZEND_ASSERT(w < uhc3_ucs_table_size); - w = uhc3_ucs_table[w]; - if (!w) { - /* If c == 0xC9, we shouldn't have tried to read a 2-byte char at all... but it is faster - * to fix up that rare case here rather than include an extra check in the hot path */ - if (c == 0xC9) { - p--; - } - *out++ = MBFL_BAD_INPUT; - continue; - } - } - if (!w) { - w = MBFL_BAD_INPUT; - } - *out++ = w; - } else { - *out++ = MBFL_BAD_INPUT; - } - } - - /* Finish up last byte of input string if there is one */ - if (p == e && out < limit) { - unsigned char c = *p++; - *out++ = (c < 0x80) ? c : MBFL_BAD_INPUT; - } - - *in_len = e - p + 1; - *in = p; - return out - buf; -} - -static void mb_wchar_to_uhc(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) -{ - unsigned char *out, *limit; - MB_CONVERT_BUF_LOAD(buf, out, limit); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len); - - while (len--) { - uint32_t w = *in++; - unsigned int s = 0; - - if (w >= ucs_a1_uhc_table_min && w < ucs_a1_uhc_table_max) { - s = ucs_a1_uhc_table[w - ucs_a1_uhc_table_min]; - } else if (w >= ucs_a2_uhc_table_min && w < ucs_a2_uhc_table_max) { - s = ucs_a2_uhc_table[w - ucs_a2_uhc_table_min]; - } else if (w >= ucs_a3_uhc_table_min && w < ucs_a3_uhc_table_max) { - s = ucs_a3_uhc_table[w - ucs_a3_uhc_table_min]; - } else if (w >= ucs_i_uhc_table_min && w < ucs_i_uhc_table_max) { - s = ucs_i_uhc_table[w - ucs_i_uhc_table_min]; - } else if (w >= ucs_s_uhc_table_min && w < ucs_s_uhc_table_max) { - s = ucs_s_uhc_table[w - ucs_s_uhc_table_min]; - } else if (w >= ucs_r1_uhc_table_min && w < ucs_r1_uhc_table_max) { - s = ucs_r1_uhc_table[w - ucs_r1_uhc_table_min]; - } else if (w >= ucs_r2_uhc_table_min && w < ucs_r2_uhc_table_max) { - s = ucs_r2_uhc_table[w - ucs_r2_uhc_table_min]; - } - - if (!s) { - if (w == 0) { - out = mb_convert_buf_add(out, 0); - } else { - MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_uhc); - MB_CONVERT_BUF_ENSURE(buf, out, limit, len); - } - } else if (s < 0x80) { - out = mb_convert_buf_add(out, s); - } else { - MB_CONVERT_BUF_ENSURE(buf, out, limit, len + 2); - out = mb_convert_buf_add2(out, (s >> 8) & 0xFF, s & 0xFF); - } - } - - MB_CONVERT_BUF_STORE(buf, out, limit); -} diff --git a/ext/mbstring/libmbfl/filters/mbfilter_uhc.h b/ext/mbstring/libmbfl/filters/mbfilter_uhc.h deleted file mode 100644 index 860d45eb86fe1..0000000000000 --- a/ext/mbstring/libmbfl/filters/mbfilter_uhc.h +++ /dev/null @@ -1,42 +0,0 @@ -/* - * "streamable kanji code filter and converter" - * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. - * - * LICENSE NOTICES - * - * This file is part of "streamable kanji code filter and converter", - * which is distributed under the terms of GNU Lesser General Public - * License (version 2) as published by the Free Software Foundation. - * - * This software is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with "streamable kanji code filter and converter"; - * if not, write to the Free Software Foundation, Inc., 59 Temple Place, - * Suite 330, Boston, MA 02111-1307 USA - * - * The author of this file: - * - */ -/* - * The source code included in this files was separated from mbfilter_kr.h - * by moriyoshi koizumi on 4 dec 2002. - * - */ - -#ifndef MBFL_MBFILTER_UHC_H -#define MBFL_MBFILTER_UHC_H - -#include "mbfilter.h" - -extern const mbfl_encoding mbfl_encoding_uhc; -extern const struct mbfl_convert_vtbl vtbl_uhc_wchar; -extern const struct mbfl_convert_vtbl vtbl_wchar_uhc; - -int mbfl_filt_conv_uhc_wchar(int c, mbfl_convert_filter *filter); -int mbfl_filt_conv_wchar_uhc(int c, mbfl_convert_filter *filter); - -#endif /* MBFL_MBFILTER_UHC_H */ diff --git a/ext/mbstring/libmbfl/filters/mbfilter_utf8_mobile.c b/ext/mbstring/libmbfl/filters/mbfilter_utf8_mobile.c index 7d5fdc3e0a469..dd253cfe689fc 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_utf8_mobile.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_utf8_mobile.c @@ -30,7 +30,7 @@ #include "mbfilter.h" #include "mbfilter_utf8_mobile.h" -#include "mbfilter_sjis_mobile.h" +#include "mbfilter_cjk.h" #include "emoji2uni.h" @@ -47,6 +47,66 @@ static void mb_wchar_to_utf8_kddi_b(uint32_t *in, size_t len, mb_convert_buf *bu static size_t mb_utf8_sb_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state); static void mb_wchar_to_utf8_sb(uint32_t *in, size_t len, mb_convert_buf *buf, bool end); +static bool mbfilter_conv_map_tbl(int c, int *w, const unsigned short map[][3], int n) +{ + for (int i = 0; i < n; i++) { + if (map[i][0] <= c && c <= map[i][1]) { + *w = c - map[i][0] + map[i][2]; + return true; + } + } + return false; +} + +static bool mbfilter_conv_r_map_tbl(int c, int *w, const unsigned short map[][3], int n) +{ + /* Convert in reverse direction */ + for (int i = 0; i < n; i++) { + if (map[i][2] <= c && c <= map[i][2] - map[i][0] + map[i][1]) { + *w = c + map[i][0] - map[i][2]; + return true; + } + } + return false; +} + +static const unsigned short mbfl_docomo2uni_pua[4][3] = { + {0x28c2, 0x292f, 0xe63e}, + {0x2930, 0x2934, 0xe6ac}, + {0x2935, 0x2951, 0xe6b1}, + {0x2952, 0x29db, 0xe6ce}, +}; + +static const unsigned short mbfl_kddi2uni_pua[7][3] = { + {0x26ec, 0x2838, 0xe468}, + {0x284c, 0x2863, 0xe5b5}, + {0x24b8, 0x24ca, 0xe5cd}, + {0x24cb, 0x2545, 0xea80}, + {0x2839, 0x284b, 0xeafb}, + {0x2546, 0x25c0, 0xeb0e}, + {0x25c1, 0x25c6, 0xeb89}, +}; + +static const unsigned short mbfl_kddi2uni_pua_b[8][3] = { + {0x24b8, 0x24f6, 0xec40}, + {0x24f7, 0x2573, 0xec80}, + {0x2574, 0x25b2, 0xed40}, + {0x25b3, 0x25c6, 0xed80}, + {0x26ec, 0x272a, 0xef40}, + {0x272b, 0x27a7, 0xef80}, + {0x27a8, 0x27e6, 0xf040}, + {0x27e7, 0x2863, 0xf080}, +}; + +static const unsigned short mbfl_sb2uni_pua[6][3] = { + {0x27a9, 0x2802, 0xe101}, + {0x2808, 0x2861, 0xe201}, + {0x2921, 0x297a, 0xe001}, + {0x2980, 0x29cc, 0xe301}, + {0x2a99, 0x2ae4, 0xe401}, + {0x2af8, 0x2b35, 0xe501}, +}; + extern const unsigned char mblen_table_utf8[]; static const char *mbfl_encoding_utf8_docomo_aliases[] = {"UTF-8-DOCOMO", "UTF8-DOCOMO", NULL}; @@ -224,13 +284,13 @@ int mbfl_filt_conv_utf8_mobile_wchar(int c, mbfl_convert_filter *filter) s = (filter->cache << 6) | (c & 0x3f); filter->cache = 0; - if (filter->from->no_encoding == mbfl_no_encoding_utf8_docomo && mbfilter_conv_r_map_tbl(s, &s1, mbfl_docomo2uni_pua, 4) > 0) { + if (filter->from->no_encoding == mbfl_no_encoding_utf8_docomo && mbfilter_conv_r_map_tbl(s, &s1, mbfl_docomo2uni_pua, 4)) { s = mbfilter_sjis_emoji_docomo2unicode(s1, &snd); - } else if (filter->from->no_encoding == mbfl_no_encoding_utf8_kddi_a && mbfilter_conv_r_map_tbl(s, &s1, mbfl_kddi2uni_pua, 7) > 0) { + } else if (filter->from->no_encoding == mbfl_no_encoding_utf8_kddi_a && mbfilter_conv_r_map_tbl(s, &s1, mbfl_kddi2uni_pua, 7)) { s = mbfilter_sjis_emoji_kddi2unicode(s1, &snd); - } else if (filter->from->no_encoding == mbfl_no_encoding_utf8_kddi_b && mbfilter_conv_r_map_tbl(s, &s1, mbfl_kddi2uni_pua_b, 8) > 0) { + } else if (filter->from->no_encoding == mbfl_no_encoding_utf8_kddi_b && mbfilter_conv_r_map_tbl(s, &s1, mbfl_kddi2uni_pua_b, 8)) { s = mbfilter_sjis_emoji_kddi2unicode(s1, &snd); - } else if (filter->from->no_encoding == mbfl_no_encoding_utf8_sb && mbfilter_conv_r_map_tbl(s, &s1, mbfl_sb2uni_pua, 6) > 0) { + } else if (filter->from->no_encoding == mbfl_no_encoding_utf8_sb && mbfilter_conv_r_map_tbl(s, &s1, mbfl_sb2uni_pua, 6)) { s = mbfilter_sjis_emoji_sb2unicode(s1, &snd); } @@ -297,10 +357,10 @@ int mbfl_filt_conv_wchar_utf8_mobile(int c, mbfl_convert_filter *filter) if (c >= 0 && c < 0x110000) { int s1, c1; - if ((filter->to->no_encoding == mbfl_no_encoding_utf8_docomo && mbfilter_unicode2sjis_emoji_docomo(c, &s1, filter) > 0 && mbfilter_conv_map_tbl(s1, &c1, mbfl_docomo2uni_pua, 4) > 0) || - (filter->to->no_encoding == mbfl_no_encoding_utf8_kddi_a && mbfilter_unicode2sjis_emoji_kddi(c, &s1, filter) > 0 && mbfilter_conv_map_tbl(s1, &c1, mbfl_kddi2uni_pua, 7) > 0) || - (filter->to->no_encoding == mbfl_no_encoding_utf8_kddi_b && mbfilter_unicode2sjis_emoji_kddi(c, &s1, filter) > 0 && mbfilter_conv_map_tbl(s1, &c1, mbfl_kddi2uni_pua_b, 8) > 0) || - (filter->to->no_encoding == mbfl_no_encoding_utf8_sb && mbfilter_unicode2sjis_emoji_sb(c, &s1, filter) > 0 && mbfilter_conv_map_tbl(s1, &c1, mbfl_sb2uni_pua, 6) > 0)) { + if ((filter->to->no_encoding == mbfl_no_encoding_utf8_docomo && mbfilter_unicode2sjis_emoji_docomo(c, &s1, filter) > 0 && mbfilter_conv_map_tbl(s1, &c1, mbfl_docomo2uni_pua, 4)) || + (filter->to->no_encoding == mbfl_no_encoding_utf8_kddi_a && mbfilter_unicode2sjis_emoji_kddi_sjis(c, &s1, filter) > 0 && mbfilter_conv_map_tbl(s1, &c1, mbfl_kddi2uni_pua, 7)) || + (filter->to->no_encoding == mbfl_no_encoding_utf8_kddi_b && mbfilter_unicode2sjis_emoji_kddi_sjis(c, &s1, filter) > 0 && mbfilter_conv_map_tbl(s1, &c1, mbfl_kddi2uni_pua_b, 8)) || + (filter->to->no_encoding == mbfl_no_encoding_utf8_sb && mbfilter_unicode2sjis_emoji_sb(c, &s1, filter) > 0 && mbfilter_conv_map_tbl(s1, &c1, mbfl_sb2uni_pua, 6))) { c = c1; } @@ -426,7 +486,7 @@ static size_t mb_mobile_utf8_to_wchar(unsigned char **in, size_t *in_len, uint32 } int s1 = 0, snd = 0; - if (mbfilter_conv_r_map_tbl(s, &s1, emoji_map, n) > 0) { + if (mbfilter_conv_r_map_tbl(s, &s1, emoji_map, n)) { s = convert_emoji(s1, &snd); if (snd) { *out++ = snd; @@ -492,7 +552,7 @@ static void mb_wchar_to_utf8_docomo(uint32_t *in, size_t len, mb_convert_buf *bu } } - if (s && mbfilter_conv_map_tbl(s, &c1, mbfl_docomo2uni_pua, 4) > 0) { + if (s && mbfilter_conv_map_tbl(s, &c1, mbfl_docomo2uni_pua, 4)) { w = c1; } @@ -589,7 +649,7 @@ static void mb_wchar_to_utf8_kddi(uint32_t *in, size_t len, mb_convert_buf *buf, } process_kuten: - if (s && mbfilter_conv_map_tbl(s, &c1, emoji_map, n) > 0) { + if (s && mbfilter_conv_map_tbl(s, &c1, emoji_map, n)) { w = c1; } @@ -701,7 +761,7 @@ static void mb_wchar_to_utf8_sb(uint32_t *in, size_t len, mb_convert_buf *buf, b } process_kuten: - if (s && mbfilter_conv_map_tbl(s, &c1, mbfl_sb2uni_pua, 6) > 0) { + if (s && mbfilter_conv_map_tbl(s, &c1, mbfl_sb2uni_pua, 6)) { w = c1; } diff --git a/ext/mbstring/libmbfl/filters/translit_kana_jisx0201_jisx0208.h b/ext/mbstring/libmbfl/filters/translit_kana_jisx0201_jisx0208.h index 545333928b74c..ea117228893c7 100644 --- a/ext/mbstring/libmbfl/filters/translit_kana_jisx0201_jisx0208.h +++ b/ext/mbstring/libmbfl/filters/translit_kana_jisx0201_jisx0208.h @@ -47,43 +47,4 @@ #define MBFL_HAN2ZEN_GLUE 0x10000 -static const unsigned char hankana2zenkana_table[64] = { - 0x00,0x02,0x0C,0x0D,0x01,0xFB,0xF2,0xA1,0xA3,0xA5, - 0xA7,0xA9,0xE3,0xE5,0xE7,0xC3,0xFC,0xA2,0xA4,0xA6, - 0xA8,0xAA,0xAB,0xAD,0xAF,0xB1,0xB3,0xB5,0xB7,0xB9, - 0xBB,0xBD,0xBF,0xC1,0xC4,0xC6,0xC8,0xCA,0xCB,0xCC, - 0xCD,0xCE,0xCF,0xD2,0xD5,0xD8,0xDB,0xDE,0xDF,0xE0, - 0xE1,0xE2,0xE4,0xE6,0xE8,0xE9,0xEA,0xEB,0xEC,0xED, - 0xEF,0xF3,0x9B,0x9C -}; - -static const unsigned char hankana2zenhira_table[64] = { - 0x00,0x02,0x0C,0x0D,0x01,0xFB,0x92,0x41,0x43,0x45, - 0x47,0x49,0x83,0x85,0x87,0x63,0xFC,0x42,0x44,0x46, - 0x48,0x4A,0x4B,0x4D,0x4F,0x51,0x53,0x55,0x57,0x59, - 0x5B,0x5D,0x5F,0x61,0x64,0x66,0x68,0x6A,0x6B,0x6C, - 0x6D,0x6E,0x6F,0x72,0x75,0x78,0x7B,0x7E,0x7F,0x80, - 0x81,0x82,0x84,0x86,0x88,0x89,0x8A,0x8B,0x8C,0x8D, - 0x8F,0x93,0x9B,0x9C -}; -static const unsigned char zenkana2hankana_table[84][2] = { - {0x67,0x00},{0x71,0x00},{0x68,0x00},{0x72,0x00},{0x69,0x00}, - {0x73,0x00},{0x6A,0x00},{0x74,0x00},{0x6B,0x00},{0x75,0x00}, - {0x76,0x00},{0x76,0x9E},{0x77,0x00},{0x77,0x9E},{0x78,0x00}, - {0x78,0x9E},{0x79,0x00},{0x79,0x9E},{0x7A,0x00},{0x7A,0x9E}, - {0x7B,0x00},{0x7B,0x9E},{0x7C,0x00},{0x7C,0x9E},{0x7D,0x00}, - {0x7D,0x9E},{0x7E,0x00},{0x7E,0x9E},{0x7F,0x00},{0x7F,0x9E}, - {0x80,0x00},{0x80,0x9E},{0x81,0x00},{0x81,0x9E},{0x6F,0x00}, - {0x82,0x00},{0x82,0x9E},{0x83,0x00},{0x83,0x9E},{0x84,0x00}, - {0x84,0x9E},{0x85,0x00},{0x86,0x00},{0x87,0x00},{0x88,0x00}, - {0x89,0x00},{0x8A,0x00},{0x8A,0x9E},{0x8A,0x9F},{0x8B,0x00}, - {0x8B,0x9E},{0x8B,0x9F},{0x8C,0x00},{0x8C,0x9E},{0x8C,0x9F}, - {0x8D,0x00},{0x8D,0x9E},{0x8D,0x9F},{0x8E,0x00},{0x8E,0x9E}, - {0x8E,0x9F},{0x8F,0x00},{0x90,0x00},{0x91,0x00},{0x92,0x00}, - {0x93,0x00},{0x6C,0x00},{0x94,0x00},{0x6D,0x00},{0x95,0x00}, - {0x6E,0x00},{0x96,0x00},{0x97,0x00},{0x98,0x00},{0x99,0x00}, - {0x9A,0x00},{0x9B,0x00},{0x9C,0x00},{0x9C,0x00},{0x72,0x00}, - {0x74,0x00},{0x66,0x00},{0x9D,0x00},{0x73,0x9E} -}; - #endif /* TRANSLIT_KANA_JISX0201_JISX0208_H */ diff --git a/ext/mbstring/libmbfl/filters/unicode_table_cp932_ext.h b/ext/mbstring/libmbfl/filters/unicode_table_cp932_ext.h index 8b1efdd0bed55..66944b09f128e 100644 --- a/ext/mbstring/libmbfl/filters/unicode_table_cp932_ext.h +++ b/ext/mbstring/libmbfl/filters/unicode_table_cp932_ext.h @@ -25,8 +25,6 @@ #ifndef UNICODE_TABLE_CP932_EXT_H #define UNICODE_TABLE_CP932_EXT_H -#ifdef UNICODE_TABLE_CP932_DEF - const unsigned short cp932ext1_ucs_table[] = { /* ku 13 */ 0x2460,0x2461,0x2462,0x2463,0x2464,0x2465,0x2466,0x2467, @@ -169,19 +167,4 @@ const unsigned short cp932ext3_ucs_table[] = { const int cp932ext3_ucs_table_min = (115 - 1)*94; const int cp932ext3_ucs_table_max = (115 - 1)*94 + (sizeof (cp932ext3_ucs_table) / sizeof (unsigned short)); -#else - -extern const unsigned short cp932ext1_ucs_table[]; -extern const unsigned short cp932ext2_ucs_table[]; -extern const unsigned short cp932ext3_ucs_table[]; - -extern const int cp932ext1_ucs_table_min; -extern const int cp932ext1_ucs_table_max; -extern const int cp932ext2_ucs_table_min; -extern const int cp932ext2_ucs_table_max; -extern const int cp932ext3_ucs_table_min; -extern const int cp932ext3_ucs_table_max; - -#endif - #endif /* UNICODE_TABLE_CP932_EXT_H */ diff --git a/ext/mbstring/libmbfl/filters/unicode_table_cp936.h b/ext/mbstring/libmbfl/filters/unicode_table_cp936.h index c225c586ffb35..1d739cce29f6a 100644 --- a/ext/mbstring/libmbfl/filters/unicode_table_cp936.h +++ b/ext/mbstring/libmbfl/filters/unicode_table_cp936.h @@ -19,17 +19,11 @@ * Suite 330, Boston, MA 02111-1307 USA * * The author of this file: Rui Hirokawa - * */ #ifndef UNICODE_TABLE_CP936_H #define UNICODE_TABLE_CP936_H -/* - * Unicode table - */ -#ifdef UNICODE_TABLE_CP936_DEF - /* CP936 -> Unicode, but without PUA codepoints used in CP936 and GB18030 */ const unsigned short cp936_ucs_table[] = { /* 0x8140 */ @@ -6634,41 +6628,4 @@ static const unsigned short mbfl_cp936_pua_tbl[][3] = { static const int mbfl_cp936_pua_tbl_max = sizeof(mbfl_cp936_pua_tbl)/(sizeof(unsigned short)*3); -#else - -extern const unsigned short cp936_ucs_table[]; -extern const unsigned short cp936_pua_tbl1[]; -extern const unsigned short cp936_pua_tbl2[]; -extern const unsigned short cp936_pua_tbl3[]; - -extern const unsigned short ucs_a1_cp936_table[]; -extern const unsigned short ucs_a2_cp936_table[]; -extern const unsigned short ucs_a3_cp936_table[]; -extern const unsigned short ucs_i_cp936_table[]; -extern const unsigned short ucs_cf_cp936_table[]; -extern const unsigned short ucs_sfv_cp936_table[]; - -extern const unsigned short ucs_ci_s_cp936_table[]; -extern const unsigned short ucs_hff_s_cp936_table[]; - -extern const int cp936_ucs_table_size; -extern const int ucs_a1_cp936_table_min; -extern const int ucs_a1_cp936_table_max; -extern const int ucs_a2_cp936_table_min; -extern const int ucs_a2_cp936_table_max; -extern const int ucs_a3_cp936_table_min; -extern const int ucs_a3_cp936_table_max; -extern const int ucs_i_cp936_table_min; -extern const int ucs_i_cp936_table_max; -extern const int ucs_ci_cp936_table_min; -extern const int ucs_ci_cp936_table_max; -extern const int ucs_cf_cp936_table_min; -extern const int ucs_cf_cp936_table_max; -extern const int ucs_sfv_cp936_table_min; -extern const int ucs_sfv_cp936_table_max; -extern const int ucs_hff_cp936_table_min; -extern const int ucs_hff_cp936_table_max; - -#endif - #endif /* UNICODE_TABLE_CP936_H */ diff --git a/ext/mbstring/libmbfl/filters/unicode_table_jis.h b/ext/mbstring/libmbfl/filters/unicode_table_jis.h index 04e6a63b9e274..de4a325ab9935 100644 --- a/ext/mbstring/libmbfl/filters/unicode_table_jis.h +++ b/ext/mbstring/libmbfl/filters/unicode_table_jis.h @@ -21,17 +21,11 @@ * The authors of this file: PHP3 internationalization team * You can contact the primary authors; 金本 茂 , * Tsukada Takuya . - * */ #ifndef UNICODE_TABLE_JIS_H #define UNICODE_TABLE_JIS_H -#ifdef UNICODE_TABLE_JIS_DEF - -/* - * Unicode table - */ const unsigned short jisx0208_ucs_table[] = { /* ku 1 */ 0x3000,0x3001,0x3002,0xFF0C,0xFF0E,0x30FB,0xFF1A,0xFF1B, @@ -5846,27 +5840,4 @@ const unsigned short ucs_r_jis_table[] = { int ucs_r_jis_table_min = 0xFF00; int ucs_r_jis_table_max = 0xFF00 + (sizeof (ucs_r_jis_table) / sizeof (unsigned short)); -#else - -extern const unsigned short jisx0208_ucs_table[]; -extern const unsigned short jisx0212_ucs_table[]; -extern const unsigned short ucs_a1_jis_table[]; -extern const unsigned short ucs_a2_jis_table[]; -extern const unsigned short ucs_i_jis_table[]; -extern const unsigned short ucs_r_jis_table[]; - -extern const int jisx0208_ucs_table_size; -extern const int jisx0212_ucs_table_size; -extern const int ucs_a1_jis_table_min; -extern const int ucs_a1_jis_table_max; -extern const int ucs_a2_jis_table_min; -extern const int ucs_a2_jis_table_max; -extern const int ucs_i_jis_table_min; -extern const int ucs_i_jis_table_max; -extern int ucs_r_jis_table_min; -extern int ucs_r_jis_table_max; - -#endif - - #endif /* UNICODE_TABLE_JIS_H */ diff --git a/ext/mbstring/libmbfl/filters/unicode_table_jis2004.h b/ext/mbstring/libmbfl/filters/unicode_table_jis2004.h index 965a567b4b7d7..0b16114f6584d 100644 --- a/ext/mbstring/libmbfl/filters/unicode_table_jis2004.h +++ b/ext/mbstring/libmbfl/filters/unicode_table_jis2004.h @@ -4888,44 +4888,45 @@ static const unsigned short jisx0213_jis_u5_tbl[] = { 0x2218,0xA38C,0xA437,0xA5F1,0xA602,0xA61A,0xA6B2,}; static const unsigned short jisx0213_jis_u5_key[] = { -0x2E22,0x2F42,0x2F4C,0x2F60,0x2F7B,0x4F54,0x4F63,0x4F6E, -0x753A,0x7572,0x7629,0x7632,0x7660,0x776C,0x787E,0x7929, -0x7947,0x7954,0x796E,0x7A5D,0x7B33,0x7B49,0x7B6C,0x7C49, -0x7C51,0x7E66,0x7F21,0x7F2B,0x7F2E,0x7F36,0x7F46,0x7F70, -0x7F77,0x7F79,0x8030,0x8037,0x8038,0x803A,0x803B,0x803F, -0x8040,0x8045,0x8048,0x804A,0x804B,0x805B,0x8066,0x806C, -0x8122,0x8125,0x8127,0x8131,0x8132,0x8138,0x813F,0x8141, -0x814A,0x8152,0x8153,0x8159,0x815C,0x8177,0x822A,0x8231, -0x8232,0x823A,0x823D,0x8259,0x825C,0x825E,0x8263,0x826A, -0x826B,0x8272,0x8274,0x8275,0x8325,0x8332,0x833E,0x8344, -0x8347,0x8355,0x8356,0x837E,0x8422,0x842B,0x8430,0x8450, -0x8465,0x846D,0x8472,0x8524,0x8529,0x852A,0x8532,0x8534, -0x8535,0x8539,0x8556,0x857D,0x8623,0x8624,0x863A,0x863C, -0x863D,0x8642,0x8643,0x8644,0x8647,0x8649,0x8655,0x8656, -0x8657,0x865B,0x8677,0x8678,0x872A,0x873F,0x8740,0x8742, -0x8743,0x874E,0x8759,0x8761,0x8769,0x876A,0x8770,0x8775, -0x8823,0x8834,0x8849,0x885C,0x885E,0x885F,0x8860,0x8932, -0x8947,0x894D,0x8961,0x8964,0x8A22,0x8A33,0x8A39,0x8A53, -0x8A7B,0x8B2E,0x8B30,0x8B35,0x8B44,0x8B5D,0x8B61,0x8B66, -0x8B69,0x8B75,0x8B77,0x8B7A,0x8C21,0x8C23,0x8C24,0x8C28, -0x8C2C,0x8C3D,0x8C48,0x8C5B,0x8C75,0x8C76,0x8D32,0x8D3D, -0x8D3E,0x8D40,0x8D52,0x8D5D,0x8D5E,0x8D73,0x8D74,0x8D75, -0x8D77,0x8D7B,0x8D7D,0x8E22,0x8E24,0x8E27,0x8E2E,0x8E2F, -0x8E34,0x8E35,0x8E3D,0x8E42,0x8E4F,0x8E69,0x8E6B,0x8E72, -0x8E75,0x8E79,0x8F35,0x8F3A,0x8F46,0x8F56,0x8F58,0x8F5A, -0x8F5D,0x8F5F,0x8F63,0x8F6A,0x8F70,0x8F73,0x9044,0x904E, -0x905D,0x9075,0x907E,0x9121,0x9122,0x9133,0x9136,0x9164, -0x9165,0x916B,0x916E,0x9173,0x9229,0x922A,0x922C,0x9234, -0x923C,0x923E,0x9242,0x9256,0x9263,0x9277,0x9279,0x927A, -0x9325,0x932F,0x9332,0x9339,0x9342,0x9348,0x9359,0x935E, -0x9366,0x936B,0x937A,0x937E,0x9421,0x942C,0x942F,0x944F, -0x9450,0x9457,0x9465,0x9466,0x9471,0x9472,0x947E,0x9521, -0x952C,0x952D,0x9536,0x9537,0x953D,0x953E,0x954E,0x954F, -0x9557,0x955A,0x955C,0x955D,0x9561,0x9565,0x9567,0x9569, -0x9571,0x9622,0x9623,0x9638,0x9642,0x964C,0x9656,0x9659, -0x965D,0x9676,0x972C,0x974B,0x974C,0x9759,0x975B,0x975D, -0x9767,0x976D,0x9770,0x9825,0x9829,0x982B,0x9832,0x9835, -0x9853,0x9858,0x985A,0x986E,0x9870,0x9872,0x9876,}; +0x04C7,0x0545,0x054F,0x0563,0x057E,0x1117,0x1126,0x1131, +0x1EF1,0x1F29,0x1F3E,0x1F47,0x1F75,0x1FDF,0x204F,0x2058, +0x2076,0x2083,0x209D,0x20EA,0x211E,0x2134,0x2157,0x2192, +0x219A,0x226B,0x2284,0x228E,0x2291,0x2299,0x22A9,0x22D3, +0x22DA,0x22DC,0x22F1,0x22F8,0x22F9,0x22FB,0x22FC,0x2300, +0x2301,0x2306,0x2309,0x230B,0x230C,0x231C,0x2327,0x232D, +0x2341,0x2344,0x2346,0x2350,0x2351,0x2357,0x235E,0x2360, +0x2369,0x2371,0x2372,0x2378,0x237B,0x2396,0x23A7,0x23AE, +0x23AF,0x23B7,0x23BA,0x23D6,0x23D9,0x23DB,0x23E0,0x23E7, +0x23E8,0x23EF,0x23F1,0x23F2,0x2400,0x240D,0x2419,0x241F, +0x2422,0x2430,0x2431,0x2459,0x245B,0x2464,0x2469,0x2489, +0x249E,0x24A6,0x24AB,0x24BB,0x24C0,0x24C1,0x24C9,0x24CB, +0x24CC,0x24D0,0x24ED,0x2514,0x2518,0x2519,0x252F,0x2531, +0x2532,0x2537,0x2538,0x2539,0x253C,0x253E,0x254A,0x254B, +0x254C,0x2550,0x256C,0x256D,0x257D,0x2592,0x2593,0x2595, +0x2596,0x25A1,0x25AC,0x25B4,0x25BC,0x25BD,0x25C3,0x25C8, +0x25D4,0x25E5,0x25FA,0x260D,0x260F,0x2610,0x2611,0x2641, +0x2656,0x265C,0x2670,0x2673,0x268F,0x26A0,0x26A6,0x26C0, +0x26E8,0x26F9,0x26FB,0x2700,0x270F,0x2728,0x272C,0x2731, +0x2734,0x2740,0x2742,0x2745,0x274A,0x274C,0x274D,0x2751, +0x2755,0x2766,0x2771,0x2784,0x279E,0x279F,0x27B9,0x27C4, +0x27C5,0x27C7,0x27D9,0x27E4,0x27E5,0x27FA,0x27FB,0x27FC, +0x27FE,0x2802,0x2804,0x2807,0x2809,0x280C,0x2813,0x2814, +0x2819,0x281A,0x2822,0x2827,0x2834,0x284E,0x2850,0x2857, +0x285A,0x285E,0x2878,0x287D,0x2889,0x2899,0x289B,0x289D, +0x28A0,0x28A2,0x28A6,0x28AD,0x28B3,0x28B6,0x28E5,0x28EF, +0x28FE,0x2916,0x291F,0x2920,0x2921,0x2932,0x2935,0x2963, +0x2964,0x296A,0x296D,0x2972,0x2986,0x2987,0x2989,0x2991, +0x2999,0x299B,0x299F,0x29B3,0x29C0,0x29D4,0x29D6,0x29D7, +0x29E0,0x29EA,0x29ED,0x29F4,0x29FD,0x2A03,0x2A14,0x2A19, +0x2A21,0x2A26,0x2A35,0x2A39,0x2A3A,0x2A45,0x2A48,0x2A68, +0x2A69,0x2A70,0x2A7E,0x2A7F,0x2A8A,0x2A8B,0x2A97,0x2A98, +0x2AA3,0x2AA4,0x2AAD,0x2AAE,0x2AB4,0x2AB5,0x2AC5,0x2AC6, +0x2ACE,0x2AD1,0x2AD3,0x2AD4,0x2AD8,0x2ADC,0x2ADE,0x2AE0, +0x2AE8,0x2AF7,0x2AF8,0x2B0D,0x2B17,0x2B21,0x2B2B,0x2B2E, +0x2B32,0x2B4B,0x2B5F,0x2B7E,0x2B7F,0x2B8C,0x2B8E,0x2B90, +0x2B9A,0x2BA0,0x2BA3,0x2BB6,0x2BBA,0x2BBC,0x2BC3,0x2BC6, +0x2BE4,0x2BE9,0x2BEB,0x2BFF,0x2C01,0x2C03,0x2C07 +}; static const unsigned short jisx0213_u5_jis_tbl[] = { 0x2E22,0x7F21,0x7F2B,0x7F2E,0x7F36,0x7F46,0x7F70,0x7F79, @@ -5016,7 +5017,15 @@ static const unsigned short jisx0213_u2_key[] = { 0x2477,0x2478,0x2479,0x247A,0x247B,0x2577,0x2578,0x2579, 0x257A,0x257B,0x257C,0x257D,0x257E,0x2678,0x2B44,0x2B48, 0x2B49,0x2B4A,0x2B4B,0x2B4C,0x2B4D,0x2B4E,0x2B4F,0x2B65, - 0x2B66}; + 0x2B66 +}; + +static const unsigned short jisx0213_u2_key_b[] = { + 0x0170,0x0171,0x0172,0x0173,0x0174,0x01CE,0x01CF,0x01D0, + 0x01D1,0x01D2,0x01D3,0x01D4,0x01D5,0x022D,0x03CF,0x03D3, + 0x03D4,0x03D5,0x03D6,0x03D7,0x03D8,0x03D9,0x03DA,0x03F0, + 0x03F1 +}; /* combined pairs in Unicode */ static const unsigned short jisx0213_u2_tbl[] = { @@ -5026,18 +5035,19 @@ static const unsigned short jisx0213_u2_tbl[] = { 0x30C8,0x309A,0x31F7,0x309A,0x00E6,0x0300,0x0254,0x0300, 0x0254,0x0301,0x028C,0x0300,0x028C,0x0301,0x0259,0x0300, 0x0259,0x0301,0x025A,0x0300,0x025A,0x0301,0x02E9,0x02E5, - 0x02E5,0x02E9}; + 0x02E5,0x02E9 +}; /* fallback chars for combined chars in Unicode */ static const unsigned short jisx0213_u2_fb_tbl[] = { 0x242B,0x242D,0x242F,0x2431,0x2433,0x252B,0x252D,0x252F, 0x2531,0x2533,0x253B,0x2544,0x2548,0x2675,0x295C,0x2B38, 0x2B38,0x2B37,0x2B37,0x2B30,0x2B30,0x2B43,0x2B43,0x2B64, - 0x2B60}; + 0x2B60 +}; static const int jisx0213_u2_tbl_len = sizeof(jisx0213_u2_key)/sizeof(unsigned short); - static const unsigned short jisx0213_p2_ofst[] = { 0, 7, 2, 3, 4, 11, 12, 13, 14, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93 diff --git a/ext/mbstring/libmbfl/filters/unicode_table_uhc.h b/ext/mbstring/libmbfl/filters/unicode_table_uhc.h index 737d7921619eb..ef45c8547acc3 100644 --- a/ext/mbstring/libmbfl/filters/unicode_table_uhc.h +++ b/ext/mbstring/libmbfl/filters/unicode_table_uhc.h @@ -25,11 +25,6 @@ #ifndef UNICODE_TABLE_UHC_H #define UNICODE_TABLE_UHC_H -/* - * Unicode table - */ -#ifdef UNICODE_TABLE_UHC_DEF - const unsigned short uhc1_ucs_table[] = { 0xac02,0xac03,0xac05,0xac06,0xac0b,0xac0c,0xac0d,0xac0e, 0xac0f,0xac18,0xac1e,0xac1f,0xac21,0xac22,0xac23,0xac25, @@ -7178,42 +7173,4 @@ const unsigned short ucs_r2_uhc_table[] = { const int ucs_r2_uhc_table_min = 0xff00; const int ucs_r2_uhc_table_max = 0xff00 + (sizeof (ucs_r2_uhc_table) / sizeof (unsigned short)); -#else - -extern const unsigned short uhc1_ucs_table[]; -extern const unsigned short uhc2_ucs_table[]; -extern const unsigned short uhc3_ucs_table[]; -extern const unsigned short ucs_a1_uhc_table[]; -extern const unsigned short ucs_a2_uhc_table[]; -extern const unsigned short ucs_a3_uhc_table[]; -extern const unsigned short ucs_i_uhc_table[]; -extern const unsigned short ucs_s_uhc_table[]; -extern const unsigned short ucs_r1_uhc_table[]; -extern const unsigned short ucs_r2_uhc_table[]; - -extern const int uhc1_ucs_table_size; -extern const int uhc2_ucs_table_size; -extern const int uhc3_ucs_table_size; -extern const int ucs_a1_uhc_table_min; -extern const int ucs_a1_uhc_table_max; -extern const int ucs_a2_uhc_table_min; -extern const int ucs_a2_uhc_table_max; -extern const int ucs_a3_uhc_table_min; -extern const int ucs_a3_uhc_table_max; -extern const int ucs_i_uhc_table_min; -extern const int ucs_i_uhc_table_max; -extern const int ucs_s_uhc_table_min; -extern const int ucs_s_uhc_table_max; -extern const int ucs_r1_uhc_table_min; -extern const int ucs_r1_uhc_table_max; -extern const int ucs_r2_uhc_table_min; -extern const int ucs_r2_uhc_table_max; - - - - -#endif - - - #endif /* UNICODE_TABLE_UHC_H */ diff --git a/ext/mbstring/libmbfl/mbfl/mbfl_convert.c b/ext/mbstring/libmbfl/mbfl/mbfl_convert.c index edad3a3b57599..47d7980d5492e 100644 --- a/ext/mbstring/libmbfl/mbfl/mbfl_convert.c +++ b/ext/mbstring/libmbfl/mbfl/mbfl_convert.c @@ -36,30 +36,8 @@ #include "mbfilter_8bit.h" #include "mbfilter_wchar.h" -#include "filters/mbfilter_euc_cn.h" -#include "filters/mbfilter_hz.h" -#include "filters/mbfilter_euc_tw.h" -#include "filters/mbfilter_big5.h" -#include "filters/mbfilter_uhc.h" -#include "filters/mbfilter_euc_kr.h" -#include "filters/mbfilter_iso2022_kr.h" -#include "filters/mbfilter_sjis.h" -#include "filters/mbfilter_sjis_2004.h" -#include "filters/mbfilter_sjis_mobile.h" -#include "filters/mbfilter_sjis_mac.h" -#include "filters/mbfilter_cp51932.h" -#include "filters/mbfilter_jis.h" -#include "filters/mbfilter_iso2022_jp_ms.h" -#include "filters/mbfilter_iso2022jp_2004.h" -#include "filters/mbfilter_iso2022jp_mobile.h" -#include "filters/mbfilter_euc_jp.h" -#include "filters/mbfilter_euc_jp_2004.h" -#include "filters/mbfilter_euc_jp_win.h" -#include "filters/mbfilter_gb18030.h" -#include "filters/mbfilter_cp932.h" -#include "filters/mbfilter_cp936.h" -#include "filters/mbfilter_cp5022x.h" #include "filters/mbfilter_base64.h" +#include "filters/mbfilter_cjk.h" #include "filters/mbfilter_qprint.h" #include "filters/mbfilter_uuencode.h" #include "filters/mbfilter_7bit.h" diff --git a/ext/mbstring/libmbfl/mbfl/mbfl_encoding.c b/ext/mbstring/libmbfl/mbfl/mbfl_encoding.c index 1d44756ee051a..d78e4763b4878 100644 --- a/ext/mbstring/libmbfl/mbfl/mbfl_encoding.c +++ b/ext/mbstring/libmbfl/mbfl/mbfl_encoding.c @@ -39,30 +39,8 @@ #include "mbfilter_pass.h" #include "mbfilter_8bit.h" -#include "filters/mbfilter_euc_cn.h" -#include "filters/mbfilter_hz.h" -#include "filters/mbfilter_euc_tw.h" -#include "filters/mbfilter_big5.h" -#include "filters/mbfilter_uhc.h" -#include "filters/mbfilter_euc_kr.h" -#include "filters/mbfilter_iso2022_kr.h" -#include "filters/mbfilter_sjis.h" -#include "filters/mbfilter_sjis_mobile.h" -#include "filters/mbfilter_sjis_mac.h" -#include "filters/mbfilter_sjis_2004.h" -#include "filters/mbfilter_cp51932.h" -#include "filters/mbfilter_jis.h" -#include "filters/mbfilter_iso2022_jp_ms.h" -#include "filters/mbfilter_iso2022jp_2004.h" -#include "filters/mbfilter_iso2022jp_mobile.h" -#include "filters/mbfilter_euc_jp.h" -#include "filters/mbfilter_euc_jp_win.h" -#include "filters/mbfilter_euc_jp_2004.h" -#include "filters/mbfilter_gb18030.h" -#include "filters/mbfilter_cp932.h" -#include "filters/mbfilter_cp936.h" -#include "filters/mbfilter_cp5022x.h" #include "filters/mbfilter_base64.h" +#include "filters/mbfilter_cjk.h" #include "filters/mbfilter_qprint.h" #include "filters/mbfilter_uuencode.h" #include "filters/mbfilter_7bit.h" diff --git a/ext/mbstring/mb_gpc.c b/ext/mbstring/mb_gpc.c index 1ffe0af56bc7e..33fbd32edc663 100644 --- a/ext/mbstring/mb_gpc.c +++ b/ext/mbstring/mb_gpc.c @@ -234,7 +234,7 @@ const mbfl_encoding *_php_mb_encoding_handler_ex(const php_mb_encoding_handler_i } else if (info->num_from_encodings == 1) { from_encoding = info->from_encodings[0]; } else { - from_encoding = mb_guess_encoding_for_strings((const unsigned char**)val_list, len_list, num, info->from_encodings, info->num_from_encodings, MBSTRG(strict_detection)); + from_encoding = mb_guess_encoding_for_strings((const unsigned char**)val_list, len_list, num, info->from_encodings, info->num_from_encodings, MBSTRG(strict_detection), false); if (!from_encoding) { if (info->report_errors) { php_error_docref(NULL, E_WARNING, "Unable to detect encoding"); diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c index 73dea4e5c6b81..758c00b4261ef 100644 --- a/ext/mbstring/mbstring.c +++ b/ext/mbstring/mbstring.c @@ -90,7 +90,7 @@ static inline bool php_mb_is_no_encoding_utf8(enum mbfl_no_encoding no_enc); static bool mb_check_str_encoding(zend_string *str, const mbfl_encoding *encoding); -static const mbfl_encoding* mb_guess_encoding(unsigned char *in, size_t in_len, const mbfl_encoding **elist, unsigned int elist_size, bool strict); +static const mbfl_encoding* mb_guess_encoding(unsigned char *in, size_t in_len, const mbfl_encoding **elist, unsigned int elist_size, bool strict, bool order_significant); static zend_string* mb_mime_header_encode(zend_string *input, const mbfl_encoding *incode, const mbfl_encoding *outcode, bool base64, char *linefeed, size_t linefeed_len, zend_long indent); @@ -452,7 +452,7 @@ static const zend_encoding *php_mb_zend_encoding_detector(const unsigned char *a list_size = MBSTRG(current_detect_order_list_size); } - return (const zend_encoding*)mb_guess_encoding((unsigned char*)arg_string, arg_length, (const mbfl_encoding **)list, list_size, false); + return (const zend_encoding*)mb_guess_encoding((unsigned char*)arg_string, arg_length, (const mbfl_encoding **)list, list_size, false, false); } static size_t php_mb_zend_encoding_converter(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length, const zend_encoding *encoding_to, const zend_encoding *encoding_from) @@ -1016,6 +1016,7 @@ ZEND_TSRMLS_CACHE_UPDATE(); mbstring_globals->internal_encoding_set = 0; mbstring_globals->http_output_set = 0; mbstring_globals->http_input_set = 0; + mbstring_globals->all_encodings_list = NULL; } /* }}} */ @@ -1156,6 +1157,13 @@ PHP_RSHUTDOWN_FUNCTION(mbstring) MBSTRG(outconv_enabled) = false; MBSTRG(outconv_state) = 0; + if (MBSTRG(all_encodings_list)) { + GC_DELREF(MBSTRG(all_encodings_list)); + zend_hash_destroy(MBSTRG(all_encodings_list)); + efree(MBSTRG(all_encodings_list)); + MBSTRG(all_encodings_list) = NULL; + } + #ifdef HAVE_MBREGEX PHP_RSHUTDOWN(mb_regex) (INIT_FUNC_ARGS_PASSTHRU); #endif @@ -1808,7 +1816,6 @@ static size_t mb_get_strlen(zend_string *string, const mbfl_encoding *encoding) return mb_fast_strlen_utf8((unsigned char*)ZSTR_VAL(string), ZSTR_LEN(string)); } - uint32_t wchar_buf[128]; unsigned char *in = (unsigned char*)ZSTR_VAL(string); size_t in_len = ZSTR_LEN(string); @@ -2687,7 +2694,7 @@ MBSTRING_API zend_string* php_mb_convert_encoding(const char *input, size_t leng from_encoding = *from_encodings; } else { /* auto detect */ - from_encoding = mb_guess_encoding((unsigned char*)input, length, from_encodings, num_from_encodings, MBSTRG(strict_detection)); + from_encoding = mb_guess_encoding((unsigned char*)input, length, from_encodings, num_from_encodings, MBSTRG(strict_detection), true); if (!from_encoding) { php_error_docref(NULL, E_WARNING, "Unable to detect character encoding"); return NULL; @@ -2988,28 +2995,38 @@ struct candidate { size_t in_len; uint64_t demerits; /* Wide bit size to prevent overflow */ unsigned int state; + float multiplier; }; -static size_t init_candidate_array(struct candidate *array, size_t length, const mbfl_encoding **encodings, const unsigned char **in, size_t *in_len, size_t n, bool strict) +static size_t init_candidate_array(struct candidate *array, size_t length, const mbfl_encoding **encodings, const unsigned char **in, size_t *in_len, size_t n, bool strict, bool order_significant) { size_t j = 0; for (size_t i = 0; i < length; i++) { const mbfl_encoding *enc = encodings[i]; + array[j].enc = enc; + array[j].state = 0; + array[j].demerits = 0; + /* If any candidate encodings have specialized validation functions, use them * to eliminate as many candidates as possible */ - if (strict && enc->check != NULL) { + if (enc->check != NULL) { for (size_t k = 0; k < n; k++) { if (!enc->check((unsigned char*)in[k], in_len[k])) { - goto skip_to_next; + if (strict) { + goto skip_to_next; + } else { + array[j].demerits += 500; + } } } } - array[j].enc = enc; - array[j].state = 0; - array[j].demerits = 0; + /* This multiplier can optionally be used to make candidate encodings listed + * first more likely to be chosen. It is a weight factor which multiplies + * the number of demerits counted for each candidate. */ + array[j].multiplier = order_significant ? 1.0 + ((0.3 * i) / length) : 1.0; j++; skip_to_next: ; } @@ -3085,10 +3102,14 @@ static size_t count_demerits(struct candidate *array, size_t length, bool strict } } + for (size_t i = 0; i < length; i++) { + array[i].demerits *= array[i].multiplier; + } + return length; } -MBSTRING_API const mbfl_encoding* mb_guess_encoding_for_strings(const unsigned char **strings, size_t *str_lengths, size_t n, const mbfl_encoding **elist, unsigned int elist_size, bool strict) +MBSTRING_API const mbfl_encoding* mb_guess_encoding_for_strings(const unsigned char **strings, size_t *str_lengths, size_t n, const mbfl_encoding **elist, unsigned int elist_size, bool strict, bool order_significant) { if (elist_size == 0) { return NULL; @@ -3109,7 +3130,7 @@ MBSTRING_API const mbfl_encoding* mb_guess_encoding_for_strings(const unsigned c /* Allocate on stack; when we return, this array is automatically freed */ struct candidate *array = alloca(elist_size * sizeof(struct candidate)); - elist_size = init_candidate_array(array, elist_size, elist, strings, str_lengths, n, strict); + elist_size = init_candidate_array(array, elist_size, elist, strings, str_lengths, n, strict, order_significant); while (n--) { start_string(array, elist_size, strings[n], str_lengths[n]); @@ -3133,9 +3154,9 @@ MBSTRING_API const mbfl_encoding* mb_guess_encoding_for_strings(const unsigned c /* When doing 'strict' detection, any string which is invalid in the candidate encoding * is rejected. With non-strict detection, we just continue, but apply demerits for * each invalid byte sequence */ -static const mbfl_encoding* mb_guess_encoding(unsigned char *in, size_t in_len, const mbfl_encoding **elist, unsigned int elist_size, bool strict) +static const mbfl_encoding* mb_guess_encoding(unsigned char *in, size_t in_len, const mbfl_encoding **elist, unsigned int elist_size, bool strict, bool order_significant) { - return mb_guess_encoding_for_strings((const unsigned char**)&in, &in_len, 1, elist, elist_size, strict); + return mb_guess_encoding_for_strings((const unsigned char**)&in, &in_len, 1, elist, elist_size, strict, order_significant); } /* {{{ Encodings of the given string is returned (as a string) */ @@ -3154,8 +3175,17 @@ PHP_FUNCTION(mb_detect_encoding) Z_PARAM_BOOL(strict) ZEND_PARSE_PARAMETERS_END(); + /* Should we pay attention to the order of the provided candidate encodings and prefer + * the earlier ones (if more than one candidate encoding matches)? + * If the entire list of supported encodings returned by `mb_list_encodings` is passed + * in, then don't treat the order as significant */ + bool order_significant = true; + /* make encoding list */ if (encoding_ht) { + if (encoding_ht == MBSTRG(all_encodings_list)) { + order_significant = false; + } if (FAILURE == php_mb_parse_encoding_array(encoding_ht, &elist, &size, 2)) { RETURN_THROWS(); } @@ -3187,7 +3217,7 @@ PHP_FUNCTION(mb_detect_encoding) if (size == 1 && *elist == &mbfl_encoding_utf8 && (GC_FLAGS(str) & IS_STR_VALID_UTF8)) { ret = &mbfl_encoding_utf8; } else { - ret = mb_guess_encoding((unsigned char*)ZSTR_VAL(str), ZSTR_LEN(str), elist, size, strict); + ret = mb_guess_encoding((unsigned char*)ZSTR_VAL(str), ZSTR_LEN(str), elist, size, strict, order_significant); } efree(ZEND_VOIDP(elist)); @@ -3205,10 +3235,22 @@ PHP_FUNCTION(mb_list_encodings) { ZEND_PARSE_PARAMETERS_NONE(); - array_init(return_value); - for (const mbfl_encoding **encodings = mbfl_get_supported_encodings(); *encodings; encodings++) { - add_next_index_string(return_value, (*encodings)->name); + if (MBSTRG(all_encodings_list) == NULL) { + /* Initialize shared array of supported encoding names + * This is done so that we can check if `mb_list_encodings()` is being + * passed to other mbstring functions using a cheap pointer equality check */ + HashTable *array = emalloc(sizeof(HashTable)); + zend_hash_init(array, 80, NULL, zval_ptr_dtor_str, false); + for (const mbfl_encoding **encodings = mbfl_get_supported_encodings(); *encodings; encodings++) { + zval tmp; + ZVAL_STRING(&tmp, (*encodings)->name); + zend_hash_next_index_insert(array, &tmp); + } + MBSTRG(all_encodings_list) = array; } + + GC_ADDREF(MBSTRG(all_encodings_list)); + RETURN_ARR(MBSTRG(all_encodings_list)); } /* }}} */ @@ -3536,8 +3578,15 @@ PHP_FUNCTION(mb_convert_variables) from_encoding = MBSTRG(current_internal_encoding); + bool order_significant = true; + /* pre-conversion encoding */ if (from_enc_ht) { + if (from_enc_ht == MBSTRG(all_encodings_list)) { + /* If entire list of supported encodings returned by `mb_list_encodings` is passed + * in, then don't treat the order of the list as significant */ + order_significant = false; + } if (php_mb_parse_encoding_array(from_enc_ht, &elist, &elistsz, 2) == FAILURE) { RETURN_THROWS(); } @@ -3575,7 +3624,7 @@ PHP_FUNCTION(mb_convert_variables) RETURN_FALSE; } } - from_encoding = mb_guess_encoding_for_strings(val_list, len_list, num, elist, elistsz, MBSTRG(strict_detection)); + from_encoding = mb_guess_encoding_for_strings(val_list, len_list, num, elist, elistsz, MBSTRG(strict_detection), order_significant); efree(ZEND_VOIDP(val_list)); efree(len_list); if (!from_encoding) { @@ -4162,7 +4211,6 @@ PHP_FUNCTION(mb_send_mail) *head_enc, /* header transfer encoding */ *body_enc; /* body transfer encoding */ const mbfl_language *lang; - int err = 0; HashTable ht_headers; zval *s; @@ -4293,7 +4341,7 @@ PHP_FUNCTION(mb_send_mail) /* Subject: */ const mbfl_encoding *enc = MBSTRG(current_internal_encoding); if (enc == &mbfl_encoding_pass) { - enc = mb_guess_encoding((unsigned char*)ZSTR_VAL(subject), ZSTR_LEN(subject), MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection)); + enc = mb_guess_encoding((unsigned char*)ZSTR_VAL(subject), ZSTR_LEN(subject), MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection), false); } const char *line_sep = PG(mail_mixed_lf_and_crlf) ? "\n" : CRLF; size_t line_sep_len = strlen(line_sep); @@ -4303,7 +4351,7 @@ PHP_FUNCTION(mb_send_mail) /* message body */ const mbfl_encoding *msg_enc = MBSTRG(current_internal_encoding); if (msg_enc == &mbfl_encoding_pass) { - msg_enc = mb_guess_encoding((unsigned char*)message, message_len, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection)); + msg_enc = mb_guess_encoding((unsigned char*)message, message_len, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection), false); } unsigned int num_errors = 0; @@ -4377,7 +4425,7 @@ PHP_FUNCTION(mb_send_mail) extra_cmd = php_escape_shell_cmd(ZSTR_VAL(extra_cmd)); } - RETVAL_BOOL(!err && php_mail(to_r, ZSTR_VAL(subject), message, ZSTR_VAL(str_headers), extra_cmd ? ZSTR_VAL(extra_cmd) : NULL)); + RETVAL_BOOL(php_mail(to_r, ZSTR_VAL(subject), message, ZSTR_VAL(str_headers), extra_cmd ? ZSTR_VAL(extra_cmd) : NULL)); if (extra_cmd) { zend_string_release_ex(extra_cmd, 0); diff --git a/ext/mbstring/mbstring.h b/ext/mbstring/mbstring.h index 0837e45cf327a..7256605bd2462 100644 --- a/ext/mbstring/mbstring.h +++ b/ext/mbstring/mbstring.h @@ -67,7 +67,7 @@ MBSTRING_API size_t php_mb_mbchar_bytes(const char *s, const mbfl_encoding *enc) MBSTRING_API size_t php_mb_stripos(bool mode, zend_string *haystack, zend_string *needle, zend_long offset, const mbfl_encoding *enc); MBSTRING_API bool php_mb_check_encoding(const char *input, size_t length, const mbfl_encoding *encoding); -MBSTRING_API const mbfl_encoding* mb_guess_encoding_for_strings(const unsigned char **strings, size_t *str_lengths, size_t n, const mbfl_encoding **elist, unsigned int elist_size, bool strict); +MBSTRING_API const mbfl_encoding* mb_guess_encoding_for_strings(const unsigned char **strings, size_t *str_lengths, size_t n, const mbfl_encoding **elist, unsigned int elist_size, bool strict, bool order_significant); ZEND_BEGIN_MODULE_GLOBALS(mbstring) char *internal_encoding_name; @@ -88,6 +88,7 @@ ZEND_BEGIN_MODULE_GLOBALS(mbstring) size_t current_detect_order_list_size; enum mbfl_no_encoding *default_detect_order_list; size_t default_detect_order_list_size; + HashTable *all_encodings_list; int filter_illegal_mode; uint32_t filter_illegal_substchar; int current_filter_illegal_mode; diff --git a/ext/mbstring/mbstring.stub.php b/ext/mbstring/mbstring.stub.php index 0d04e34bc7823..add0a35e05b80 100644 --- a/ext/mbstring/mbstring.stub.php +++ b/ext/mbstring/mbstring.stub.php @@ -140,7 +140,6 @@ function mb_detect_encoding(string $string, array|string|null $encodings = null, /** * @return array - * @refcount 1 */ function mb_list_encodings(): array {} diff --git a/ext/mbstring/mbstring_arginfo.h b/ext/mbstring/mbstring_arginfo.h index 3bf10789e0d6c..e8985793e380f 100644 --- a/ext/mbstring/mbstring_arginfo.h +++ b/ext/mbstring/mbstring_arginfo.h @@ -1,5 +1,5 @@ /* This is a generated file, edit the .stub.php file instead. - * Stub hash: 0c9ac8888b8332557f7098cfb9d259757af8b3c6 */ + * Stub hash: 26a027093075613056921c4d1a7eee65d52ec5eb */ ZEND_BEGIN_ARG_WITH_RETURN_TYPE_MASK_EX(arginfo_mb_language, 0, 0, MAY_BE_STRING|MAY_BE_BOOL) ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, language, IS_STRING, 1, "null") diff --git a/ext/mbstring/tests/cp932_encoding.phpt b/ext/mbstring/tests/cp932_encoding.phpt index df700f20286a5..b7bfee7496ccb 100644 --- a/ext/mbstring/tests/cp932_encoding.phpt +++ b/ext/mbstring/tests/cp932_encoding.phpt @@ -142,6 +142,8 @@ convertInvalidString("\xEA", "%", "SJIS-win", "UTF-8"); convertInvalidString("\x81\x20", "%", "SJIS-win", "UTF-8"); convertInvalidString("\xEA\xA9", "%", "SJIS-win", "UTF-8"); +echo 'mb_strlen("\x80\x81", "CP932") == ' . mb_strlen("\x80\x81", "CP932") . PHP_EOL; + echo "Done!\n"; ?> --EXPECT-- @@ -151,4 +153,5 @@ Unicode -> CP932 conversion works on all invalid codepoints SJIS-win verification and conversion works on all valid characters SJIS-win verification and conversion works on all invalid characters Unicode -> SJIS-win conversion works on all invalid codepoints +mb_strlen("\x80\x81", "CP932") == 2 Done! diff --git a/ext/mbstring/tests/encoding_tests.inc b/ext/mbstring/tests/encoding_tests.inc index 978cb8db399c0..095aeae517fcc 100644 --- a/ext/mbstring/tests/encoding_tests.inc +++ b/ext/mbstring/tests/encoding_tests.inc @@ -148,6 +148,12 @@ function testAllValidChars($charMap, $fromEncoding, $toEncoding, $bothWays = tru } testValidString($fromString, $toString, $fromEncoding, $toEncoding, $bothWays); + + $strLen = mb_strlen($fromString, $fromEncoding); + if ($strLen !== mb_strlen($toString, $toEncoding)) { + echo "Length of $fromEncoding string '" . bin2hex($fromString) . "' was different than expected; mb_strlen returned $strLen" . PHP_EOL; + testFailedIncrement(); + } } } diff --git a/ext/mbstring/tests/gh10192_utf7.phpt b/ext/mbstring/tests/gh10192_utf7.phpt index 2930942c12c5a..9aa4eb6925463 100644 --- a/ext/mbstring/tests/gh10192_utf7.phpt +++ b/ext/mbstring/tests/gh10192_utf7.phpt @@ -75,7 +75,7 @@ foreach ($testcases as $title => $case) { --EXPECT-- non-base64 character after + string(5) "UTF-8" -string(5) "UTF-7" +string(5) "UTF-8" bool(false) string(5) "UTF-7" bool(false) @@ -93,7 +93,7 @@ int(0) base64 character before + string(5) "UTF-8" -string(5) "UTF-7" +string(5) "UTF-8" bool(false) string(5) "UTF-7" bool(false) @@ -174,7 +174,7 @@ int(2) - and + string(5) "UTF-8" -string(5) "UTF-7" +string(5) "UTF-8" bool(false) string(5) "UTF-7" bool(false) @@ -219,7 +219,7 @@ int(2) valid direct encoding character = after + string(5) "UTF-8" -string(5) "UTF-7" +string(5) "UTF-8" bool(false) string(5) "UTF-7" bool(false) @@ -228,7 +228,7 @@ int(2) invalid direct encoding character ~ after + string(5) "UTF-8" -string(5) "UTF-7" +string(5) "UTF-8" bool(false) string(5) "UTF-7" bool(false) @@ -237,7 +237,7 @@ int(2) invalid direct encoding character \ after + string(5) "UTF-8" -string(5) "UTF-7" +string(5) "UTF-8" bool(false) string(5) "UTF-7" bool(false) @@ -246,7 +246,7 @@ int(2) invalid direct encoding character ESC after + string(5) "UTF-8" -string(5) "UTF-7" +string(5) "UTF-8" bool(false) string(5) "UTF-7" bool(false) diff --git a/ext/mbstring/tests/gh11217.phpt b/ext/mbstring/tests/gh11217.phpt new file mode 100644 index 0000000000000..d500f22cbd7bb --- /dev/null +++ b/ext/mbstring/tests/gh11217.phpt @@ -0,0 +1,12 @@ +--TEST-- +GH-11217: Segfault in mb_strrpos/mb_strripos with ASCII encoding and negative offset +--EXTENSIONS-- +mbstring +--FILE-- + +--EXPECT-- +int(0) +int(0) diff --git a/ext/mbstring/tests/mb_detect_encoding.phpt b/ext/mbstring/tests/mb_detect_encoding.phpt index 544375fbd2998..11d5a1c31364f 100644 --- a/ext/mbstring/tests/mb_detect_encoding.phpt +++ b/ext/mbstring/tests/mb_detect_encoding.phpt @@ -78,6 +78,13 @@ echo mb_detect_encoding($test, ['UTF-8', 'ISO-8859-1']), "\n"; // Should be UTF- echo mb_detect_encoding('abc', ['UUENCODE', 'UTF-8']), "\n"; echo mb_detect_encoding('abc', ['UUENCODE', 'QPrint', 'HTML-ENTITIES', 'Base64', '7bit', '8bit', 'SJIS']), "\n"; +// This test case courtesy of Adrien Foulon +// It depends on the below use of '+' being recognized as invalid UTF-7 +$css = 'input[type="radio"]:checked + img { + border: 5px solid #0083ca; +}'; +echo mb_detect_encoding($css, mb_list_encodings(), true), "\n"; + echo "== DETECT ORDER ==\n"; mb_detect_order('auto'); @@ -88,6 +95,17 @@ print("EUC-JP: " . mb_detect_encoding($euc_jp) . "\n"); print("SJIS: " . mb_detect_encoding($sjis) . "\n"); +// Thanks to Ulrik Nielsen for the following tests; the hex strings are the same file, but in two +// different text encodings +// We do not have any strong hints showing that the second one is actually UTF-8... +// but mb_detect_encoding still guesses UTF-8 because it is the first one in the list + +$win1252text = hex2bin("2320546869732066696c6520636f6e7461696e732057696e646f77732d3132353220656e636f646564206461746120616e642048544d4c20656e7469746965730a61626364650ae6f8e50af00a3c703e476f646461673c6272202f3e0a7b726561646f626a206f626a65637469643d24726573756c745b305d2e706172656e7469642061737369676e3d22646f63227d3c6272202f3e0a23205468697320697320746f20656e73757265207468617420646966666572656e74206b696e6473206f662048544d4c20656e74697469657320617265206265696e6720636f6e76657274656420636f72726563746c790af00ad00a2623383739313b0a262331373238373b0a262333383937393b0a2623353437333b0a616263646520e6f8e520f020d0203c703e476f646461673c6272202f3e207b726561646f626a206f626a65637469643d24726573756c745b305d2e706172656e7469642061737369676e3d22646f63227d3c6272202f3e202623383739313b20262331373238373b20262333383937393b202623353437333b0a232054686520666f6c6c6f77696e67206368617261637465727320617265207370656369616c206368617261637465727320746861742068617320646966666572656e7420636f646520706f696e747320696e2049534f2d383835392d3120616e642057696e646f77732d31323532202d207468617420776520646966666572656e746961746520636f72726563746c79206265747765656e2049534f2d383835392d3120616e642057696e646f77732d313235320a8c0a890a2320506f6c69736820737472696e670a50727a656a6426233337383b20646f2070727a65676c26233236313b64750a"); +echo mb_detect_encoding($win1252text, ['UTF-8', 'CP1252', 'ISO-8859-1'], true), "\n"; + +$utf8text = hex2bin("2320546869732066696c6520636f6e7461696e73205554462d3820656e636f64656420646174610a61626364650ac3a6c3b8c3a50ac3b00a3c703e476f646461673c6272202f3e0a7b726561646f626a206f626a65637469643d24726573756c745b305d2e706172656e7469642061737369676e3d22646f63227d3c6272202f3e0a23205468697320697320746f20656e73757265207468617420646966666572656e74206b696e6473206f662048544d4c20656e74697469657320617265206265696e6720636f6e76657274656420636f72726563746c790ac3b00ac3900ae289970ae48e870ae9a1830ae195a10a616263646520c3a6c3b8c3a520c3b020c390203c703e476f646461673c6272202f3e207b726561646f626a206f626a65637469643d24726573756c745b305d2e706172656e7469642061737369676e3d22646f63227d3c6272202f3e20e2899720e48e8720e9a18320e195a10a232054686520666f6c6c6f77696e67206368617261637465727320617265207370656369616c206368617261637465727320746861742068617320646966666572656e7420636f646520706f696e747320696e2049534f2d383835392d3120616e642057696e646f77732d31323532202d207468617420776520646966666572656e746961746520636f72726563746c79206265747765656e2049534f2d383835392d3120616e642057696e646f77732d313235320ac5920ae280b00a2320506f6c69736820737472696e670a50727a656a64c5ba20646f2070727a65676cc48564750a"); +echo mb_detect_encoding($utf8text, ['UTF-8', 'CP1252', 'ISO-8859-1'], true), "\n"; + echo "== INVALID PARAMETER ==\n"; print("INT: " . mb_detect_encoding(1234, 'EUC-JP') . "\n"); // EUC-JP @@ -389,10 +407,13 @@ UTF-8 UTF-8 UTF-8 SJIS +UTF-8 == DETECT ORDER == JIS: JIS EUC-JP: EUC-JP SJIS: SJIS +Windows-1252 +UTF-8 == INVALID PARAMETER == INT: EUC-JP EUC-JP: EUC-JP diff --git a/ext/mbstring/tests/mb_strlen.phpt b/ext/mbstring/tests/mb_strlen.phpt index b3fb28309bcbe..c8279a3c8bfd5 100644 --- a/ext/mbstring/tests/mb_strlen.phpt +++ b/ext/mbstring/tests/mb_strlen.phpt @@ -35,6 +35,13 @@ print "-- Testing illegal bytes 0x80,0xFD-FF --\n"; print mb_strlen("\x80\xA1", 'SJIS') . "\n"; print mb_strlen("abc\xFD\xFE\xFF", 'SJIS') . "\n"; +echo "== CP932 ==\n"; +print mb_strlen("\x80\xA1", "CP932") . "\n"; +// 0xFD, 0xFE, 0xFF is reserved. +print mb_strlen("abc\xFD\xFE\xFF", 'CP932') . "\n"; +print mb_strlen("\x80\xA1", "SJIS-win") . "\n"; +print mb_strlen("abc\xFD\xFE\xFF", 'SJIS-win') . "\n"; + echo "== MacJapanese ==\n"; print mb_strlen("\x80\xA1", 'MacJapanese') . "\n"; print mb_strlen("abc\xFD\xFE\xFF", 'MacJapanese') . "\n"; @@ -107,6 +114,11 @@ try { -- Testing illegal bytes 0x80,0xFD-FF -- 2 6 +== CP932 == +2 +6 +2 +6 == MacJapanese == 2 7 diff --git a/ext/mbstring/tests/mb_strrpos_basic.phpt b/ext/mbstring/tests/mb_strrpos_basic.phpt index 28e038da406bc..599dfd38da12a 100644 --- a/ext/mbstring/tests/mb_strrpos_basic.phpt +++ b/ext/mbstring/tests/mb_strrpos_basic.phpt @@ -22,6 +22,9 @@ var_dump(mb_strrpos($string_ascii, 'is', 4, 'ISO-8859-1')); echo "\n-- ASCII string 2 --\n"; var_dump(mb_strrpos($string_ascii, 'hello, world')); +echo "\n-- ASCII string with negative offset --\n"; +var_dump(mb_strrpos($string_ascii, 'hello', -1, 'ISO-8859-1')); + echo "\n-- Multibyte string 1 --\n"; $needle1 = base64_decode('44CC'); var_dump(mb_strrpos($string_mb, $needle1)); @@ -41,6 +44,9 @@ int(15) -- ASCII string 2 -- bool(false) +-- ASCII string with negative offset -- +bool(false) + -- Multibyte string 1 -- int(20) diff --git a/ext/mbstring/tests/uhc_encoding.phpt b/ext/mbstring/tests/uhc_encoding.phpt index 622569c68c699..da8ec711ebfae 100644 --- a/ext/mbstring/tests/uhc_encoding.phpt +++ b/ext/mbstring/tests/uhc_encoding.phpt @@ -14,6 +14,11 @@ testEncodingFromUTF16ConversionTable(__DIR__ . '/data/CP949.txt', 'UHC'); // Regression test convertInvalidString("\xE4\xA4\xB4<", "\x75\x1A\x00%", "UHC", "UTF-16BE"); +// When optimizing performance of CP949 conversion, I accidentally broke the +// case where 0xC9 appears before a valid character which starts with a +// byte lower than 0xA1 +convertInvalidString("\xC9\x9E\x98", "%\xEC\x98\x92", "UHC", "UTF-8"); + // Test "long" illegal character markers mb_substitute_character("long"); convertInvalidString("\x80", "%", "UHC", "UTF-8"); diff --git a/ext/opcache/ZendAccelerator.c b/ext/opcache/ZendAccelerator.c index 11ab472631b56..a4113f954ef18 100644 --- a/ext/opcache/ZendAccelerator.c +++ b/ext/opcache/ZendAccelerator.c @@ -390,6 +390,10 @@ static inline void accel_unlock_all(void) #ifdef ZEND_WIN32 accel_deactivate_sub(); #else + if (lock_file == -1) { + return; + } + struct flock mem_usage_unlock_all; mem_usage_unlock_all.l_type = F_UNLCK; diff --git a/ext/opcache/jit/zend_jit.c b/ext/opcache/jit/zend_jit.c index e5a748d8355a8..1df2b9af92b04 100644 --- a/ext/opcache/jit/zend_jit.c +++ b/ext/opcache/jit/zend_jit.c @@ -144,6 +144,31 @@ static zend_jit_trace_info *zend_jit_get_current_trace_info(void); static uint32_t zend_jit_trace_find_exit_point(const void* addr); #endif +#if ZEND_JIT_TARGET_X86 && defined(__linux__) +# if PHP_HAVE_BUILTIN_CPU_SUPPORTS && defined(__GNUC__) && (ZEND_GCC_VERSION >= 11000) +# define ZEND_JIT_SUPPORT_CLDEMOTE 1 +# else +# define ZEND_JIT_SUPPORT_CLDEMOTE 0 +# endif +#endif + +#if ZEND_JIT_SUPPORT_CLDEMOTE +#include +#pragma GCC push_options +#pragma GCC target("cldemote") +// check cldemote by CPUID when JIT startup +static int cpu_support_cldemote = 0; +static inline void shared_cacheline_demote(uintptr_t start, size_t size) { + uintptr_t cache_line_base = start & ~0x3F; + do { + _cldemote((void *)cache_line_base); + // next cacheline start size + cache_line_base += 64; + } while (cache_line_base < start + size); +} +#pragma GCC pop_options +#endif + static int zend_jit_assign_to_variable(dasm_State **Dst, const zend_op *opline, zend_jit_addr var_use_addr, @@ -341,6 +366,7 @@ static int zend_jit_needs_call_chain(zend_call_info *call_info, uint32_t b, cons case ZEND_DECLARE_ANON_CLASS: case ZEND_FE_FETCH_R: case ZEND_FE_FETCH_RW: + case ZEND_BIND_INIT_STATIC_OR_JMP: return 1; case ZEND_DO_ICALL: case ZEND_DO_UCALL: @@ -423,6 +449,7 @@ static int zend_jit_needs_call_chain(zend_call_info *call_info, uint32_t b, cons case ZEND_DECLARE_ANON_CLASS: case ZEND_FE_FETCH_R: case ZEND_FE_FETCH_RW: + case ZEND_BIND_INIT_STATIC_OR_JMP: return 1; case ZEND_DO_ICALL: case ZEND_DO_UCALL: @@ -973,6 +1000,12 @@ static void *dasm_link_and_encode(dasm_State **dasm_state, /* flush the hardware I-cache */ JIT_CACHE_FLUSH(entry, entry + size); + /* hint to the hardware to push out the cache line that contains the linear address */ +#if ZEND_JIT_SUPPORT_CLDEMOTE + if (cpu_support_cldemote && JIT_G(trigger) == ZEND_JIT_ON_HOT_TRACE) { + shared_cacheline_demote((uintptr_t)entry, size); + } +#endif if (trace_num) { zend_jit_trace_add_code(entry, dasm_getpclabel(dasm_state, 1)); @@ -4018,6 +4051,7 @@ static int zend_jit(const zend_op_array *op_array, zend_ssa *ssa, const zend_op case ZEND_ASSERT_CHECK: case ZEND_FE_FETCH_R: case ZEND_FE_FETCH_RW: + case ZEND_BIND_INIT_STATIC_OR_JMP: if (!zend_jit_handler(&dasm_state, opline, zend_may_throw(opline, ssa_op, op_array, ssa)) || !zend_jit_cond_jmp(&dasm_state, opline + 1, ssa->cfg.blocks[b].successors[0])) { @@ -4788,7 +4822,7 @@ ZEND_EXT_API int zend_jit_config(zend_string *jit, int stage) JIT_G(trigger) = ZEND_JIT_ON_HOT_TRACE; JIT_G(opt_flags) = ZEND_JIT_REG_ALLOC_GLOBAL | ZEND_JIT_CPU_AVX; return SUCCESS; - } else if (zend_string_equals_literal_ci(jit, "function")) { + } else if (zend_string_equals_ci(jit, ZSTR_KNOWN(ZEND_STR_FUNCTION))) { JIT_G(enabled) = 1; JIT_G(on) = 1; JIT_G(opt_level) = ZEND_JIT_LEVEL_OPT_SCRIPT; @@ -4902,6 +4936,10 @@ ZEND_EXT_API int zend_jit_startup(void *buf, size_t size, bool reattached) zend_jit_gdb_init(); #endif +#if ZEND_JIT_SUPPORT_CLDEMOTE + cpu_support_cldemote = zend_cpu_supports_cldemote(); +#endif + #ifdef HAVE_PTHREAD_JIT_WRITE_PROTECT_NP zend_write_protect = pthread_jit_write_protect_supported_np(); #endif diff --git a/ext/opcache/jit/zend_jit_helpers.c b/ext/opcache/jit/zend_jit_helpers.c index 41c7e14a804cb..f9541bd7087dc 100644 --- a/ext/opcache/jit/zend_jit_helpers.c +++ b/ext/opcache/jit/zend_jit_helpers.c @@ -27,21 +27,6 @@ static ZEND_COLD void undef_result_after_exception(void) { } } -static ZEND_COLD void zend_jit_illegal_array_offset(const zval *offset) -{ - zend_type_error("Cannot access offset of type %s on array", zend_get_type_by_const(Z_TYPE_P(offset))); -} - -static ZEND_COLD void zend_jit_illegal_empty_or_isset_offset(const zval *offset) -{ - zend_type_error("Cannot access offset of type %s in isset or empty", zend_get_type_by_const(Z_TYPE_P(offset))); -} - -static ZEND_COLD void zend_jit_illegal_string_offset(zval *offset) -{ - zend_type_error("Cannot access offset of type %s on string", zend_zval_value_name(offset)); -} - static zend_never_inline zend_function* ZEND_FASTCALL _zend_jit_init_func_run_time_cache(zend_op_array *op_array) /* {{{ */ { void **run_time_cache; @@ -493,7 +478,7 @@ static void ZEND_FASTCALL zend_jit_fetch_dim_r_helper(zend_array *ht, zval *dim, hval = 1; goto num_index; default: - zend_jit_illegal_array_offset(dim); + zend_illegal_container_offset(ZSTR_KNOWN(ZEND_STR_ARRAY), dim, BP_VAR_R); undef_result_after_exception(); return; } @@ -635,7 +620,7 @@ static void ZEND_FASTCALL zend_jit_fetch_dim_is_helper(zend_array *ht, zval *dim hval = 1; goto num_index; default: - zend_jit_illegal_array_offset(dim); + zend_illegal_container_offset(ZSTR_KNOWN(ZEND_STR_ARRAY), dim, BP_VAR_IS); undef_result_after_exception(); return; } @@ -737,7 +722,7 @@ static int ZEND_FASTCALL zend_jit_fetch_dim_isset_helper(zend_array *ht, zval *d hval = 1; goto num_index; default: - zend_jit_illegal_empty_or_isset_offset(dim); + zend_illegal_container_offset(ZSTR_KNOWN(ZEND_STR_ARRAY), dim, BP_VAR_IS); return 0; } @@ -873,7 +858,7 @@ static zval* ZEND_FASTCALL zend_jit_fetch_dim_rw_helper(zend_array *ht, zval *di hval = 1; goto num_index; default: - zend_jit_illegal_array_offset(dim); + zend_illegal_container_offset(ZSTR_KNOWN(ZEND_STR_ARRAY), dim, BP_VAR_RW); undef_result_after_exception(); return NULL; } @@ -1006,7 +991,7 @@ static zval* ZEND_FASTCALL zend_jit_fetch_dim_w_helper(zend_array *ht, zval *dim hval = 1; goto num_index; default: - zend_jit_illegal_array_offset(dim); + zend_illegal_container_offset(ZSTR_KNOWN(ZEND_STR_ARRAY), dim, BP_VAR_R); undef_result_after_exception(); if (EG(opline_before_exception) && (EG(opline_before_exception)+1)->opcode == ZEND_OP_DATA @@ -1029,7 +1014,8 @@ static zval* ZEND_FASTCALL zend_jit_fetch_dim_w_helper(zend_array *ht, zval *dim return retval; } -static zend_never_inline zend_long zend_check_string_offset(zval *dim/*, int type*/) +/* type is one of the BP_VAR_* constants */ +static zend_never_inline zend_long zend_check_string_offset(zval *dim, int type) { zend_long offset; @@ -1049,7 +1035,7 @@ static zend_never_inline zend_long zend_check_string_offset(zval *dim/*, int typ } return offset; } - zend_jit_illegal_string_offset(dim); + zend_illegal_container_offset(ZSTR_KNOWN(ZEND_STR_STRING), dim, BP_VAR_R); return 0; } case IS_UNDEF: @@ -1065,7 +1051,7 @@ static zend_never_inline zend_long zend_check_string_offset(zval *dim/*, int typ dim = Z_REFVAL_P(dim); goto try_again; default: - zend_jit_illegal_string_offset(dim); + zend_illegal_container_offset(ZSTR_KNOWN(ZEND_STR_STRING), dim, type); return 0; } @@ -1103,7 +1089,7 @@ static zend_string* ZEND_FASTCALL zend_jit_fetch_dim_str_r_helper(zend_string *s if (!(GC_FLAGS(str) & IS_STR_INTERNED)) { GC_ADDREF(str); } - offset = zend_check_string_offset(dim/*, BP_VAR_R*/); + offset = zend_check_string_offset(dim, BP_VAR_R); if (!(GC_FLAGS(str) & IS_STR_INTERNED) && UNEXPECTED(GC_DELREF(str) == 0)) { zend_string *ret = zend_jit_fetch_dim_str_offset(str, offset); zend_string_efree(str); @@ -1140,7 +1126,7 @@ static void ZEND_FASTCALL zend_jit_fetch_dim_str_is_helper(zend_string *str, zva dim = Z_REFVAL_P(dim); goto try_string_offset; default: - zend_jit_illegal_string_offset(dim); + zend_illegal_container_offset(ZSTR_KNOWN(ZEND_STR_STRING), dim, BP_VAR_IS); break; } @@ -1242,7 +1228,7 @@ static zend_never_inline void zend_assign_to_string_offset(zval *str, zval *dim, /* The string may be destroyed while throwing the notice. * Temporarily increase the refcount to detect this situation. */ GC_ADDREF(s); - offset = zend_check_string_offset(dim/*, BP_VAR_W*/); + offset = zend_check_string_offset(dim, BP_VAR_W); if (UNEXPECTED(GC_DELREF(s) == 0)) { zend_string_efree(s); if (result) { @@ -1418,7 +1404,7 @@ static zend_always_inline void ZEND_FASTCALL zend_jit_fetch_dim_obj_helper(zval zend_throw_error(NULL, "[] operator not supported for strings"); } else { if (UNEXPECTED(Z_TYPE_P(dim) != IS_LONG)) { - zend_check_string_offset(dim/*, BP_VAR_RW*/); + zend_check_string_offset(dim, BP_VAR_RW); } zend_wrong_string_offset_error(); } @@ -1606,7 +1592,7 @@ static void ZEND_FASTCALL zend_jit_assign_dim_op_helper(zval *container, zval *d zend_throw_error(NULL, "[] operator not supported for strings"); } else { if (UNEXPECTED(Z_TYPE_P(dim) != IS_LONG)) { - zend_check_string_offset(dim/*, BP_VAR_RW*/); + zend_check_string_offset(dim, BP_VAR_RW); } zend_wrong_string_offset_error(); } diff --git a/ext/opcache/jit/zend_jit_trace.c b/ext/opcache/jit/zend_jit_trace.c index ed8cdbc95b590..33fced0787793 100644 --- a/ext/opcache/jit/zend_jit_trace.c +++ b/ext/opcache/jit/zend_jit_trace.c @@ -332,6 +332,7 @@ static int zend_jit_trace_may_exit(const zend_op_array *op_array, const zend_op case ZEND_SWITCH_LONG: case ZEND_SWITCH_STRING: case ZEND_MATCH: + case ZEND_BIND_INIT_STATIC_OR_JMP: /* branch opcodes */ return 1; case ZEND_NEW: @@ -6835,7 +6836,8 @@ static const void *zend_jit_trace(zend_jit_trace_rec *trace_buffer, uint32_t par if (!(t->flags & ZEND_JIT_TRACE_USES_INITIAL_IP) || (ra && zend_jit_trace_stack_needs_deoptimization(stack, op_array->last_var + op_array->T))) { - uint32_t exit_point = zend_jit_trace_get_exit_point(opline, ZEND_JIT_EXIT_TO_VM); + /* Deoptimize to the first instruction of the loop */ + uint32_t exit_point = zend_jit_trace_get_exit_point(trace_buffer[1].opline, ZEND_JIT_EXIT_TO_VM); timeout_exit_addr = zend_jit_trace_get_exit_addr(exit_point); if (!timeout_exit_addr) { @@ -7187,8 +7189,6 @@ static void zend_jit_stop_hot_trace_counters(zend_op_array *op_array) uint32_t i; jit_extension = (zend_jit_op_array_trace_extension*)ZEND_FUNC_INFO(op_array); - zend_shared_alloc_lock(); - SHM_UNPROTECT(); for (i = 0; i < op_array->last; i++) { /* Opline with Jit-ed code handler is skipped. */ if (jit_extension->trace_info[i].trace_flags & @@ -7200,8 +7200,6 @@ static void zend_jit_stop_hot_trace_counters(zend_op_array *op_array) op_array->opcodes[i].handler = jit_extension->trace_info[i].orig_handler; } } - SHM_PROTECT(); - zend_shared_alloc_unlock(); } /* Get the tracing op_array. */ @@ -7240,6 +7238,9 @@ static void zend_jit_stop_persistent_script(zend_persistent_script *script) /* Get all scripts which are accelerated by JIT */ static void zend_jit_stop_counter_handlers(void) { + zend_shared_alloc_lock(); + /* mprotect has an extreme overhead, avoid calls to it for every function. */ + SHM_UNPROTECT(); for (uint32_t i = 0; i < ZCSG(hash).max_num_entries; i++) { zend_accel_hash_entry *cache_entry; for (cache_entry = ZCSG(hash).hash_table[i]; cache_entry; cache_entry = cache_entry->next) { @@ -7249,6 +7250,8 @@ static void zend_jit_stop_counter_handlers(void) zend_jit_stop_persistent_script(script); } } + SHM_PROTECT(); + zend_shared_alloc_unlock(); } static void zend_jit_blacklist_root_trace(const zend_op *opline, size_t offset) diff --git a/ext/opcache/shared_alloc_mmap.c b/ext/opcache/shared_alloc_mmap.c index 1414ef96149d0..ca492b122157d 100644 --- a/ext/opcache/shared_alloc_mmap.c +++ b/ext/opcache/shared_alloc_mmap.c @@ -67,8 +67,13 @@ static void *find_prefered_mmap_base(size_t requested_size) while (fgets(buffer, MAXPATHLEN, f) && sscanf(buffer, "%lx-%lx", &start, &end) == 2) { if ((uintptr_t)execute_ex >= start) { /* the current segment lays before PHP .text segment or PHP .text segment itself */ + /*Search for candidates at the end of the free segment near the .text segment + to prevent candidates from being missed due to large hole*/ if (last_free_addr + requested_size <= start) { - last_candidate = last_free_addr; + last_candidate = ZEND_MM_ALIGNED_SIZE_EX(start - requested_size, huge_page_size); + if (last_candidate + requested_size > start) { + last_candidate -= huge_page_size; + } } if ((uintptr_t)execute_ex < end) { /* the current segment is PHP .text segment itself */ @@ -117,7 +122,10 @@ static void *find_prefered_mmap_base(size_t requested_size) if ((uintptr_t)execute_ex >= e_start) { /* the current segment lays before PHP .text segment or PHP .text segment itself */ if (last_free_addr + requested_size <= e_start) { - last_candidate = last_free_addr; + last_candidate = ZEND_MM_ALIGNED_SIZE_EX(e_start - requested_size, huge_page_size); + if (last_candidate + requested_size > e_start) { + last_candidate -= huge_page_size; + } } if ((uintptr_t)execute_ex < e_end) { /* the current segment is PHP .text segment itself */ diff --git a/ext/opcache/tests/gh8846-1.inc b/ext/opcache/tests/gh8846-1.inc new file mode 100644 index 0000000000000..6169e1cfea4a9 --- /dev/null +++ b/ext/opcache/tests/gh8846-1.inc @@ -0,0 +1,4 @@ + +--CLEAN-- + +--EXPECTF-- +bool(true) +
+Fatal error: Cannot declare class Foo, because the name is already in use in %sgh8846-2.inc on line %d
+ +bool(true) +Ok diff --git a/ext/opcache/tests/jit/assign_dim_002.phpt b/ext/opcache/tests/jit/assign_dim_002.phpt index 83b4bfdec7873..743ca1bf093ee 100644 --- a/ext/opcache/tests/jit/assign_dim_002.phpt +++ b/ext/opcache/tests/jit/assign_dim_002.phpt @@ -161,7 +161,7 @@ array(1) { int(1) } } -Cannot access offset of type object on array +Cannot access offset of type Closure on array array(1) { [0]=> array(2) { diff --git a/ext/opcache/tests/jit/init_fcall_003.phpt b/ext/opcache/tests/jit/init_fcall_003.phpt index f37344cbce4a9..180f0745c16c6 100644 --- a/ext/opcache/tests/jit/init_fcall_003.phpt +++ b/ext/opcache/tests/jit/init_fcall_003.phpt @@ -11,6 +11,8 @@ opcache.jit_hot_loop=64 opcache.jit_hot_func=127 opcache.jit_hot_return=8 opcache.jit_hot_side_exit=8 +--EXTENSIONS-- +opcache --FILE-- +--EXPECTF-- +$_main: + ; (lines=4, args=0, vars=1, tmps=1) + ; (after optimizer) + ; %s +0000 T1 = ISSET_ISEMPTY_CV (empty) CV0($xx) +0001 JMPNZ T1 0003 +0002 RETURN null +0003 RETURN int(1) + +xx: + ; (lines=1, args=0, vars=0, tmps=0) + ; (after optimizer) + ; %s +0000 RETURN string("somegarbage") diff --git a/ext/opcache/tests/opt/gh11245_2.phpt b/ext/opcache/tests/opt/gh11245_2.phpt new file mode 100644 index 0000000000000..8e967bf9f41be --- /dev/null +++ b/ext/opcache/tests/opt/gh11245_2.phpt @@ -0,0 +1,35 @@ +--TEST-- +GH-11245: In some specific cases SWITCH with one default statement will cause segfault (TMP variation) +--INI-- +opcache.enable=1 +opcache.enable_cli=1 +opcache.optimization_level=0x7FFFBFFF +opcache.opt_debug_level=0x20000 +opcache.preload= +--EXTENSIONS-- +opcache +--FILE-- + +--EXPECTF-- +$_main: + ; (lines=7, args=0, vars=1, tmps=2) + ; (after optimizer) + ; %s +0000 T1 = PRE_INC_STATIC_PROP string("prop") string("X") +0001 T2 = ISSET_ISEMPTY_CV (empty) CV0($xx) +0002 JMPZ T2 0005 +0003 FREE T1 +0004 RETURN null +0005 FREE T1 +0006 RETURN int(1) +LIVE RANGES: + 1: 0001 - 0005 (tmp/var) diff --git a/ext/opcache/tests/optimize_static_002.phpt b/ext/opcache/tests/optimize_static_002.phpt new file mode 100644 index 0000000000000..e4299ba4f3fff --- /dev/null +++ b/ext/opcache/tests/optimize_static_002.phpt @@ -0,0 +1,22 @@ +--TEST-- +Keep BIND_STATIC when static variable has an initializer +--INI-- +opcache.enable=1 +opcache.enable_cli=1 +opcache.optimization_level=-1 +--EXTENSIONS-- +opcache +--FILE-- +getStaticVariables()); +} +foo(); +?> +--EXPECT-- +array(1) { + ["a"]=> + int(42) +} diff --git a/ext/opcache/tests/optimize_static_003.phpt b/ext/opcache/tests/optimize_static_003.phpt new file mode 100644 index 0000000000000..1a3c98a48fb0b --- /dev/null +++ b/ext/opcache/tests/optimize_static_003.phpt @@ -0,0 +1,24 @@ +--TEST-- +Keep BIND_STATIC when static variable has an initializer +--INI-- +opcache.enable=1 +opcache.enable_cli=1 +opcache.optimization_level=-1 +--EXTENSIONS-- +opcache +--FILE-- +getStaticVariables()); +} +foo(); +?> +--EXPECT-- +array(1) { + ["a"]=> + &int(42) +} diff --git a/ext/opcache/zend_accelerator_util_funcs.c b/ext/opcache/zend_accelerator_util_funcs.c index b99a50b212828..e021270eff3d3 100644 --- a/ext/opcache/zend_accelerator_util_funcs.c +++ b/ext/opcache/zend_accelerator_util_funcs.c @@ -357,15 +357,16 @@ static void zend_accel_do_delayed_early_binding( zval *zv = zend_hash_find_known_hash(EG(class_table), early_binding->rtd_key); if (zv) { zend_class_entry *orig_ce = Z_CE_P(zv); - zend_class_entry *parent_ce = - zend_hash_find_ex_ptr(EG(class_table), early_binding->lc_parent_name, 1); - if (parent_ce) { + zend_class_entry *parent_ce = !(orig_ce->ce_flags & ZEND_ACC_LINKED) + ? zend_hash_find_ex_ptr(EG(class_table), early_binding->lc_parent_name, 1) + : NULL; + if (parent_ce || (orig_ce->ce_flags & ZEND_ACC_LINKED)) { ce = zend_try_early_bind(orig_ce, parent_ce, early_binding->lcname, zv); } } - } - if (ce && early_binding->cache_slot != (uint32_t) -1) { - *(void**)((char*)run_time_cache + early_binding->cache_slot) = ce; + if (ce && early_binding->cache_slot != (uint32_t) -1) { + *(void**)((char*)run_time_cache + early_binding->cache_slot) = ce; + } } } CG(compiled_filename) = orig_compiled_filename; diff --git a/ext/opcache/zend_file_cache.c b/ext/opcache/zend_file_cache.c index f4c9a77996b96..864bc4aff202c 100644 --- a/ext/opcache/zend_file_cache.c +++ b/ext/opcache/zend_file_cache.c @@ -556,6 +556,7 @@ static void zend_file_cache_serialize_op_array(zend_op_array *op_arra case ZEND_FE_RESET_RW: case ZEND_ASSERT_CHECK: case ZEND_JMP_NULL: + case ZEND_BIND_INIT_STATIC_OR_JMP: SERIALIZE_PTR(opline->op2.jmp_addr); break; case ZEND_CATCH: @@ -1404,6 +1405,7 @@ static void zend_file_cache_unserialize_op_array(zend_op_array *op_arr case ZEND_FE_RESET_RW: case ZEND_ASSERT_CHECK: case ZEND_JMP_NULL: + case ZEND_BIND_INIT_STATIC_OR_JMP: UNSERIALIZE_PTR(opline->op2.jmp_addr); break; case ZEND_CATCH: diff --git a/ext/opcache/zend_persist.c b/ext/opcache/zend_persist.c index e21aaa069348a..c8330c1e79057 100644 --- a/ext/opcache/zend_persist.c +++ b/ext/opcache/zend_persist.c @@ -578,6 +578,7 @@ static void zend_persist_op_array_ex(zend_op_array *op_array, zend_persistent_sc case ZEND_FE_RESET_RW: case ZEND_ASSERT_CHECK: case ZEND_JMP_NULL: + case ZEND_BIND_INIT_STATIC_OR_JMP: opline->op2.jmp_addr = &new_opcodes[opline->op2.jmp_addr - op_array->opcodes]; break; case ZEND_CATCH: @@ -1120,7 +1121,7 @@ void zend_update_parent_ce(zend_class_entry *ce) if (zend_class_implements_interface(ce, zend_ce_iterator)) { ce->iterator_funcs_ptr->zf_rewind = zend_hash_str_find_ptr(&ce->function_table, "rewind", sizeof("rewind") - 1); ce->iterator_funcs_ptr->zf_valid = zend_hash_str_find_ptr(&ce->function_table, "valid", sizeof("valid") - 1); - ce->iterator_funcs_ptr->zf_key = zend_hash_str_find_ptr(&ce->function_table, "key", sizeof("key") - 1); + ce->iterator_funcs_ptr->zf_key = zend_hash_find_ptr(&ce->function_table, ZSTR_KNOWN(ZEND_STR_KEY)); ce->iterator_funcs_ptr->zf_current = zend_hash_str_find_ptr(&ce->function_table, "current", sizeof("current") - 1); ce->iterator_funcs_ptr->zf_next = zend_hash_str_find_ptr(&ce->function_table, "next", sizeof("next") - 1); } diff --git a/ext/opcache/zend_shared_alloc.c b/ext/opcache/zend_shared_alloc.c index 6fbf8ea20ae12..befcbe442abb1 100644 --- a/ext/opcache/zend_shared_alloc.c +++ b/ext/opcache/zend_shared_alloc.c @@ -59,7 +59,7 @@ zend_smm_shared_globals *smm_shared_globals; #ifdef ZTS static MUTEX_T zts_lock; #endif -int lock_file; +int lock_file = -1; static char lockfile_name[MAXPATHLEN]; #endif @@ -199,8 +199,8 @@ int zend_shared_alloc_startup(size_t requested_size, size_t reserved_size) res = zend_shared_alloc_try(he, requested_size, &ZSMMG(shared_segments), &ZSMMG(shared_segments_count), &error_in); if (res) { /* this model works! */ + break; } - break; } } } @@ -211,6 +211,7 @@ int zend_shared_alloc_startup(size_t requested_size, size_t reserved_size) } #if ENABLE_FILE_CACHE_FALLBACK if (ALLOC_FALLBACK == res) { + smm_shared_globals = NULL; return ALLOC_FALLBACK; } #endif @@ -236,6 +237,7 @@ int zend_shared_alloc_startup(size_t requested_size, size_t reserved_size) } #if ENABLE_FILE_CACHE_FALLBACK if (ALLOC_FALLBACK == res) { + smm_shared_globals = NULL; return ALLOC_FALLBACK; } #endif diff --git a/ext/openssl/openssl.c b/ext/openssl/openssl.c index f74f25c30403e..4009a5b84df1f 100644 --- a/ext/openssl/openssl.c +++ b/ext/openssl/openssl.c @@ -476,6 +476,37 @@ void php_openssl_store_errors(void) } /* }}} */ +/* {{{ php_openssl_errors_set_mark */ +void php_openssl_errors_set_mark(void) { + if (!OPENSSL_G(errors)) { + return; + } + + if (!OPENSSL_G(errors_mark)) { + OPENSSL_G(errors_mark) = pecalloc(1, sizeof(struct php_openssl_errors), 1); + } + + memcpy(OPENSSL_G(errors_mark), OPENSSL_G(errors), sizeof(struct php_openssl_errors)); +} +/* }}} */ + +/* {{{ php_openssl_errors_restore_mark */ +void php_openssl_errors_restore_mark(void) { + if (!OPENSSL_G(errors)) { + return; + } + + struct php_openssl_errors *errors = OPENSSL_G(errors); + + if (!OPENSSL_G(errors_mark)) { + errors->top = 0; + errors->bottom = 0; + } else { + memcpy(errors, OPENSSL_G(errors_mark), sizeof(struct php_openssl_errors)); + } +} +/* }}} */ + /* openssl file path check error function */ static void php_openssl_check_path_error(uint32_t arg_num, int type, const char *format, ...) { @@ -1293,6 +1324,7 @@ PHP_GINIT_FUNCTION(openssl) ZEND_TSRMLS_CACHE_UPDATE(); #endif openssl_globals->errors = NULL; + openssl_globals->errors_mark = NULL; } /* }}} */ @@ -1302,6 +1334,9 @@ PHP_GSHUTDOWN_FUNCTION(openssl) if (openssl_globals->errors) { pefree(openssl_globals->errors, 1); } + if (openssl_globals->errors_mark) { + pefree(openssl_globals->errors_mark, 1); + } } /* }}} */ @@ -3571,12 +3606,14 @@ static EVP_PKEY *php_openssl_pkey_from_zval( } /* it's an X509 file/cert of some kind, and we need to extract the data from that */ if (public_key) { + php_openssl_errors_set_mark(); cert = php_openssl_x509_from_str(Z_STR_P(val), arg_num, false, NULL); if (cert) { free_cert = 1; } else { /* not a X509 certificate, try to retrieve public key */ + php_openssl_errors_restore_mark(); BIO* in; if (is_file) { in = BIO_new_file(file_path, PHP_OPENSSL_BIO_MODE_R(PKCS7_BINARY)); diff --git a/ext/openssl/openssl.stub.php b/ext/openssl/openssl.stub.php index a9fad2eaeae90..6ebb519ba34a2 100644 --- a/ext/openssl/openssl.stub.php +++ b/ext/openssl/openssl.stub.php @@ -161,6 +161,11 @@ * @cvalue PKCS7_NOSIGS */ const PKCS7_NOSIGS = UNKNOWN; +/** + * @var int + * @cvalue PKCS7_NOOLDMIMETYPE + */ +const PKCS7_NOOLDMIMETYPE = UNKNOWN; /** * @var int @@ -202,6 +207,11 @@ * @cvalue CMS_NOSIGS */ const OPENSSL_CMS_NOSIGS = UNKNOWN; +/** + * @var int + * @cvalue CMS_NOOLDMIMETYPE + */ +const OPENSSL_CMS_OLDMIMETYPE = UNKNOWN; /** * @var int diff --git a/ext/openssl/openssl_arginfo.h b/ext/openssl/openssl_arginfo.h index 3e1b4a778a967..b53e70242466f 100644 --- a/ext/openssl/openssl_arginfo.h +++ b/ext/openssl/openssl_arginfo.h @@ -1,5 +1,5 @@ /* This is a generated file, edit the .stub.php file instead. - * Stub hash: be79b4cc0d9eb4469c43f10208b86369dcc1239d */ + * Stub hash: 549946c91248fddc4d43502d32335b68cfbe71f2 */ ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_openssl_x509_export_to_file, 0, 2, _IS_BOOL, 0) ZEND_ARG_OBJ_TYPE_MASK(0, certificate, OpenSSLCertificate, MAY_BE_STRING, NULL) @@ -578,6 +578,7 @@ static void register_openssl_symbols(int module_number) REGISTER_LONG_CONSTANT("PKCS7_NOATTR", PKCS7_NOATTR, CONST_PERSISTENT); REGISTER_LONG_CONSTANT("PKCS7_BINARY", PKCS7_BINARY, CONST_PERSISTENT); REGISTER_LONG_CONSTANT("PKCS7_NOSIGS", PKCS7_NOSIGS, CONST_PERSISTENT); + REGISTER_LONG_CONSTANT("PKCS7_NOOLDMIMETYPE", PKCS7_NOOLDMIMETYPE, CONST_PERSISTENT); REGISTER_LONG_CONSTANT("OPENSSL_CMS_DETACHED", CMS_DETACHED, CONST_PERSISTENT); REGISTER_LONG_CONSTANT("OPENSSL_CMS_TEXT", CMS_TEXT, CONST_PERSISTENT); REGISTER_LONG_CONSTANT("OPENSSL_CMS_NOINTERN", CMS_NOINTERN, CONST_PERSISTENT); @@ -586,6 +587,7 @@ static void register_openssl_symbols(int module_number) REGISTER_LONG_CONSTANT("OPENSSL_CMS_NOATTR", CMS_NOATTR, CONST_PERSISTENT); REGISTER_LONG_CONSTANT("OPENSSL_CMS_BINARY", CMS_BINARY, CONST_PERSISTENT); REGISTER_LONG_CONSTANT("OPENSSL_CMS_NOSIGS", CMS_NOSIGS, CONST_PERSISTENT); + REGISTER_LONG_CONSTANT("OPENSSL_CMS_OLDMIMETYPE", CMS_NOOLDMIMETYPE, CONST_PERSISTENT); REGISTER_LONG_CONSTANT("OPENSSL_PKCS1_PADDING", RSA_PKCS1_PADDING, CONST_PERSISTENT); #if defined(RSA_SSLV23_PADDING) REGISTER_LONG_CONSTANT("OPENSSL_SSLV23_PADDING", RSA_SSLV23_PADDING, CONST_PERSISTENT); diff --git a/ext/openssl/php_openssl.h b/ext/openssl/php_openssl.h index 5cfadbedc9829..304854b4bf91d 100644 --- a/ext/openssl/php_openssl.h +++ b/ext/openssl/php_openssl.h @@ -80,6 +80,7 @@ struct php_openssl_errors { ZEND_BEGIN_MODULE_GLOBALS(openssl) struct php_openssl_errors *errors; + struct php_openssl_errors *errors_mark; ZEND_END_MODULE_GLOBALS(openssl) #define OPENSSL_G(v) ZEND_MODULE_GLOBALS_ACCESSOR(openssl, v) diff --git a/ext/openssl/tests/bug11054.pem b/ext/openssl/tests/bug11054.pem new file mode 100644 index 0000000000000..60d7afa827f2c --- /dev/null +++ b/ext/openssl/tests/bug11054.pem @@ -0,0 +1,9 @@ +-----BEGIN PUBLIC KEY----- +MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAvYH14fT4DPgyffkDOrHt +x0q+rxclB48h2ykgbR3QyDG2d7hMSXjtqEseO/iR1FdAv7UevIKyHFbHpJilOIwo +mEqQNxUQCWdZsWhv7ZVfG8UUgki7LKMGPruJM97vteBS101hSCaCQz+zTyVyP8Uy +nqx5zlPmcBUA92gAFfSCa+tm/lR2BY5g/20mZX/lMY0xXV1iLhfdK6RgJYXX2SdH +YR/01IgmjgTfIp7gX+xixDgGZuZY++jo8C52udFkCf5vxyG4Ed57vRfCLFOPfeY4 +r3i0Jiply65zSo8y/6KxudRtmGOfV2qb2EsMTW9PaLs3+rnhhiYBM/nR4V5ux6u6 +DwIDAQAB +-----END PUBLIC KEY----- diff --git a/ext/openssl/tests/bug11054.phpt b/ext/openssl/tests/bug11054.phpt new file mode 100644 index 0000000000000..25f23d5b1de3e --- /dev/null +++ b/ext/openssl/tests/bug11054.phpt @@ -0,0 +1,15 @@ +--TEST-- +Bug #11054: Calling with a PEM public key results in error +--EXTENSIONS-- +openssl +--FILE-- + +--EXPECT-- +bool(false) diff --git a/ext/openssl/tests/openssl_cms_encrypt_basic.phpt b/ext/openssl/tests/openssl_cms_encrypt_basic.phpt index cf3e7ae7f3787..14b5231fdd327 100644 --- a/ext/openssl/tests/openssl_cms_encrypt_basic.phpt +++ b/ext/openssl/tests/openssl_cms_encrypt_basic.phpt @@ -9,6 +9,9 @@ $outfile = tempnam(sys_get_temp_dir(), "cms_enc_basic"); if ($outfile === false) die("failed to get a temporary filename!"); $outfile2 = $outfile . ".out"; +$outfile3 = tempnam(sys_get_temp_dir(), "cms_enc_basic"); +if ($outfile3 === false) + die("failed to get a temporary filename!"); $single_cert = "file://" . __DIR__ . "/cert.crt"; $privkey = "file://" . __DIR__ . "/private_rsa_1024.key"; $wrongkey = "file://" . __DIR__ . "/private_rsa_2048.key"; @@ -33,6 +36,7 @@ var_dump(openssl_cms_encrypt($infile, $outfile, $wrong, $headers, cipher_algo: $ var_dump(openssl_cms_encrypt($infile, $outfile, $empty, $headers, cipher_algo: $cipher)); var_dump(openssl_cms_encrypt($infile, $outfile, $multi_certs, $headers, cipher_algo: $cipher)); var_dump(openssl_cms_encrypt($infile, $outfile, array_map('openssl_x509_read', $multi_certs), $headers, cipher_algo: $cipher)); +var_dump(openssl_cms_encrypt($infile, $outfile3, $single_cert, $headers, flags: OPENSSL_CMS_OLDMIMETYPE, cipher_algo: $cipher)); if (file_exists($outfile)) { echo "true\n"; @@ -42,6 +46,15 @@ if (file_exists($outfile2)) { echo "true\n"; unlink($outfile2); } + +if (file_exists($outfile3)) { + $content = file_get_contents($outfile3, false, null, 0, 256); + if (str_contains($content, 'Content-Type: application/x-pkcs7-mime; smime-type=enveloped-data; name="smime.p7m"')) { + echo "true\n"; + } + unset($content); + unlink($outfile3); +} ?> --EXPECT-- bool(true) @@ -57,5 +70,7 @@ bool(false) bool(false) bool(true) bool(true) +bool(true) +true true true diff --git a/ext/openssl/tests/openssl_error_string_basic.phpt b/ext/openssl/tests/openssl_error_string_basic.phpt index e4ea264b3bf1f..02e8b3fbc49d1 100644 --- a/ext/openssl/tests/openssl_error_string_basic.phpt +++ b/ext/openssl/tests/openssl_error_string_basic.phpt @@ -123,7 +123,7 @@ expect_openssl_errors('openssl_private_decrypt', ['04065072']); // public encrypt and decrypt with failed padding check and padding @openssl_public_encrypt("data", $crypted, $public_key_file, 1000); @openssl_public_decrypt("data", $crypted, $public_key_file); -expect_openssl_errors('openssl_private_(en|de)crypt padding', [$err_pem_no_start_line, '0408F090', '04067072']); +expect_openssl_errors('openssl_private_(en|de)crypt padding', ['0408F090', '04067072']); // X509 echo "X509 errors\n"; diff --git a/ext/openssl/tests/openssl_error_string_basic_openssl3.phpt b/ext/openssl/tests/openssl_error_string_basic_openssl3.phpt index d435a53e3047f..041a0a0b5648a 100644 --- a/ext/openssl/tests/openssl_error_string_basic_openssl3.phpt +++ b/ext/openssl/tests/openssl_error_string_basic_openssl3.phpt @@ -114,9 +114,6 @@ expect_openssl_errors('openssl_pkey_export_to_file write', ['10080002']); // successful export @openssl_pkey_export($private_key_file_with_pass, $out, 'wrong pwd', $options); expect_openssl_errors('openssl_pkey_export', ['1C800064', '04800065']); -// invalid x509 for getting public key -@openssl_pkey_get_public($private_key_file); -expect_openssl_errors('openssl_pkey_get_public', [$err_pem_no_start_line]); // private encrypt with unknown padding @openssl_private_encrypt("data", $crypted, $private_key_file, 1000); expect_openssl_errors('openssl_private_encrypt', ['1C8000A5']); @@ -126,7 +123,7 @@ expect_openssl_errors('openssl_private_decrypt', ['0200009F', '02000072']); // public encrypt and decrypt with failed padding check and padding @openssl_public_encrypt("data", $crypted, $public_key_file, 1000); @openssl_public_decrypt("data", $crypted, $public_key_file); -expect_openssl_errors('openssl_private_(en|de)crypt padding', [$err_pem_no_start_line, '02000076', '0200008A', '02000072', '1C880004']); +expect_openssl_errors('openssl_private_(en|de)crypt padding', ['02000076', '0200008A', '02000072', '1C880004']); // X509 echo "X509 errors\n"; @@ -170,7 +167,6 @@ openssl_pkey_export_to_file opening: ok openssl_pkey_export_to_file pem: ok openssl_pkey_export_to_file write: ok openssl_pkey_export: ok -openssl_pkey_get_public: ok openssl_private_encrypt: ok openssl_private_decrypt: ok openssl_private_(en|de)crypt padding: ok diff --git a/ext/openssl/tests/openssl_pkcs7_encrypt_basic.phpt b/ext/openssl/tests/openssl_pkcs7_encrypt_basic.phpt index 070fd1c4ec8af..0ae5873c9bb49 100644 --- a/ext/openssl/tests/openssl_pkcs7_encrypt_basic.phpt +++ b/ext/openssl/tests/openssl_pkcs7_encrypt_basic.phpt @@ -11,6 +11,9 @@ if ($outfile === false) $outfile2 = tempnam(sys_get_temp_dir(), "ssl"); if ($outfile2 === false) die("failed to get a temporary filename!"); +$outfile3 = tempnam(sys_get_temp_dir(), "ssl"); +if ($outfile3 === false) + die("failed to get a temporary filename!"); $single_cert = "file://" . __DIR__ . "/cert.crt"; $privkey = "file://" . __DIR__ . "/private_rsa_1024.key"; @@ -34,6 +37,7 @@ var_dump(openssl_pkcs7_encrypt($infile, $outfile, $wrong, $headers, 0, $cipher)) var_dump(openssl_pkcs7_encrypt($infile, $outfile, $empty, $headers, 0, $cipher)); var_dump(openssl_pkcs7_encrypt($infile, $outfile, $multi_certs, $headers, 0, $cipher)); var_dump(openssl_pkcs7_encrypt($infile, $outfile, array_map('openssl_x509_read', $multi_certs), $headers, 0, $cipher)); +var_dump(openssl_pkcs7_encrypt($infile, $outfile3, $single_cert, $headers, PKCS7_NOOLDMIMETYPE, $cipher)); if (file_exists($outfile)) { echo "true\n"; @@ -43,6 +47,15 @@ if (file_exists($outfile2)) { echo "true\n"; unlink($outfile2); } + +if (file_exists($outfile3)) { + $content = file_get_contents($outfile3, false, null, 0, 256); + if (str_contains($content, 'Content-Type: application/pkcs7-mime; smime-type=enveloped-data; name="smime.p7m"')) { + echo "true\n"; + } + unset($content); + unlink($outfile3); +} ?> --EXPECT-- bool(true) @@ -57,5 +70,7 @@ bool(false) bool(false) bool(true) bool(true) +bool(true) +true true true diff --git a/ext/openssl/tests/san_ipv6_peer_matching.phpt b/ext/openssl/tests/san_ipv6_peer_matching.phpt new file mode 100644 index 0000000000000..81966025d3969 --- /dev/null +++ b/ext/openssl/tests/san_ipv6_peer_matching.phpt @@ -0,0 +1,69 @@ +--TEST-- +IPv6 Peer verification matches SAN names +--EXTENSIONS-- +openssl +--SKIPIF-- + +--FILE-- + [ + 'local_cert' => '%s', + ]]); + + $server = stream_socket_server($serverUri, $errno, $errstr, $serverFlags, $serverCtx); + phpt_notify(); + + @stream_socket_accept($server, 1); + @stream_socket_accept($server, 1); +CODE; +$serverCode = sprintf($serverCode, $certFile); + +$clientCode = <<<'CODE' + $serverUri = "ssl://[::1]:64324"; + $clientFlags = STREAM_CLIENT_CONNECT; + $clientCtx = stream_context_create(['ssl' => [ + 'verify_peer' => false, + ]]); + + phpt_wait(); + + stream_context_set_option($clientCtx, 'ssl', 'peer_name', '2001:db8:85a3:8d3:1319:8a2e:370:7348'); + var_dump(stream_socket_client($serverUri, $errno, $errstr, 1, $clientFlags, $clientCtx)); + + stream_context_set_option($clientCtx, 'ssl', 'peer_name', '2001:db8:85a3:8d3:1319:8a2e:370:7349'); + var_dump(stream_socket_client($serverUri, $errno, $errstr, 1, $clientFlags, $clientCtx)); +CODE; + +include 'CertificateGenerator.inc'; +$certificateGenerator = new CertificateGenerator(); +$certificateGenerator->saveNewCertAsFileWithKey(null, $certFile, null, $san); + +include 'ServerClientTestCase.inc'; +ServerClientTestCase::getInstance()->run($clientCode, $serverCode); +?> +--CLEAN-- + +--EXPECTF-- +resource(%d) of type (stream) + +Warning: stream_socket_client(): Unable to locate peer certificate CN in %s on line %d + +Warning: stream_socket_client(): Failed to enable crypto in %s on line %d + +Warning: stream_socket_client(): Unable to connect to ssl://[::1]:64324 (Unknown error) in %s on line %d +bool(false) diff --git a/ext/openssl/xp_ssl.c b/ext/openssl/xp_ssl.c index 7b604be043a5c..38acea244a217 100644 --- a/ext/openssl/xp_ssl.c +++ b/ext/openssl/xp_ssl.c @@ -39,6 +39,7 @@ #ifdef PHP_WIN32 #include "win32/winutil.h" #include "win32/time.h" +#include #include /* These are from Wincrypt.h, they conflict with OpenSSL */ #undef X509_NAME @@ -50,6 +51,10 @@ # define MSG_DONTWAIT 0 #endif +#ifdef HAVE_ARPA_INET_H +#include +#endif + /* Flags for determining allowed stream crypto methods */ #define STREAM_CRYPTO_IS_CLIENT (1<<0) #define STREAM_CRYPTO_METHOD_SSLv2 (1<<1) @@ -125,6 +130,24 @@ #define PHP_X509_NAME_ENTRY_TO_UTF8(ne, i, out) \ ASN1_STRING_to_UTF8(&out, X509_NAME_ENTRY_get_data(X509_NAME_get_entry(ne, i))) +#if defined(HAVE_IPV6) && defined(HAVE_INET_PTON) +/* Used for IPv6 Address peer verification */ +#define EXPAND_IPV6_ADDRESS(_str, _bytes) \ + do { \ + snprintf(_str, 40, "%X:%X:%X:%X:%X:%X:%X:%X", \ + _bytes[0] << 8 | _bytes[1], \ + _bytes[2] << 8 | _bytes[3], \ + _bytes[4] << 8 | _bytes[5], \ + _bytes[6] << 8 | _bytes[7], \ + _bytes[8] << 8 | _bytes[9], \ + _bytes[10] << 8 | _bytes[11], \ + _bytes[12] << 8 | _bytes[13], \ + _bytes[14] << 8 | _bytes[15] \ + ); \ + } while(0) +#define HAVE_IPV6_SAN 1 +#endif + #if PHP_OPENSSL_API_VERSION < 0x10100 static RSA *php_openssl_tmp_rsa_cb(SSL *s, int is_export, int keylength); #endif @@ -436,6 +459,19 @@ static bool php_openssl_matches_san_list(X509 *peer, const char *subject_name) / GENERAL_NAMES *alt_names = X509_get_ext_d2i(peer, NID_subject_alt_name, 0, 0); int alt_name_count = sk_GENERAL_NAME_num(alt_names); +#ifdef HAVE_IPV6_SAN + /* detect if subject name is an IPv6 address and expand once if required */ + char subject_name_ipv6_expanded[40]; + unsigned char ipv6[16]; + bool subject_name_is_ipv6 = false; + subject_name_ipv6_expanded[0] = 0; + + if (inet_pton(AF_INET6, subject_name, &ipv6)) { + EXPAND_IPV6_ADDRESS(subject_name_ipv6_expanded, ipv6); + subject_name_is_ipv6 = true; + } +#endif + for (i = 0; i < alt_name_count; i++) { GENERAL_NAME *san = sk_GENERAL_NAME_value(alt_names, i); @@ -474,10 +510,17 @@ static bool php_openssl_matches_san_list(X509 *peer, const char *subject_name) / return 1; } } - /* No, we aren't bothering to check IPv6 addresses. Why? - * Because IP SAN names are officially deprecated and are - * not allowed by CAs starting in 2015. Deal with it. - */ +#ifdef HAVE_IPV6_SAN + else if (san->d.ip->length == 16 && subject_name_is_ipv6) { + ipbuffer[0] = 0; + EXPAND_IPV6_ADDRESS(ipbuffer, san->d.iPAddress->data); + if (strcasecmp((const char*)subject_name_ipv6_expanded, (const char*)ipbuffer) == 0) { + sk_GENERAL_NAME_pop_free(alt_names, GENERAL_NAME_free); + + return 1; + } + } +#endif } } diff --git a/ext/pcntl/pcntl.c b/ext/pcntl/pcntl.c index bd4c32ca408c2..e55808743b62c 100644 --- a/ext/pcntl/pcntl.c +++ b/ext/pcntl/pcntl.c @@ -1295,7 +1295,7 @@ PHP_FUNCTION(pcntl_forkx) zend_long flags; pid_t pid; - ZEND_PARSE_PARAMETERS_START(1, 2) + ZEND_PARSE_PARAMETERS_START(1, 1) Z_PARAM_LONG(flags) ZEND_PARSE_PARAMETERS_END(); diff --git a/ext/pcntl/tests/pcntl_unshare_03.phpt b/ext/pcntl/tests/pcntl_unshare_03.phpt index 56433c0ee9307..567ff6b77b21c 100644 --- a/ext/pcntl/tests/pcntl_unshare_03.phpt +++ b/ext/pcntl/tests/pcntl_unshare_03.phpt @@ -9,7 +9,7 @@ if (!function_exists("pcntl_unshare")) die("skip pcntl_unshare is not available" if (!defined("CLONE_NEWNET")) die("skip flag unavailable"); if (posix_getuid() !== 0 && (!defined("CLONE_NEWUSER") || - (pcntl_unshare(CLONE_NEWUSER) == false && pcntl_get_last_error() == PCNTL_EPERM))) { + (@pcntl_unshare(CLONE_NEWUSER) == false && pcntl_get_last_error() == PCNTL_EPERM))) { die("skip Insufficient privileges for CLONE_NEWUSER"); } if (@pcntl_unshare(CLONE_NEWNET) == false && pcntl_get_last_error() == PCNTL_EPERM) { diff --git a/ext/pcre/php_pcre.c b/ext/pcre/php_pcre.c index 626bb20fb4832..6ad0b6eb76fa4 100644 --- a/ext/pcre/php_pcre.c +++ b/ext/pcre/php_pcre.c @@ -2406,6 +2406,10 @@ PHP_FUNCTION(preg_replace_callback_array) zend_argument_type_error(1, "must contain only valid callbacks"); goto error; } + if (!str_idx_regex) { + zend_argument_type_error(1, "must contain only string patterns as keys"); + goto error; + } ZVAL_COPY_VALUE(&fci.function_name, replace); diff --git a/ext/pcre/tests/preg_replace_callback_array_numeric_index_error.phpt b/ext/pcre/tests/preg_replace_callback_array_numeric_index_error.phpt new file mode 100644 index 0000000000000..55dfabce8649c --- /dev/null +++ b/ext/pcre/tests/preg_replace_callback_array_numeric_index_error.phpt @@ -0,0 +1,15 @@ +--TEST-- +preg_replace_callback_array() invalid pattern +--FILE-- + function () {}], + 'a', +); +?> +--EXPECTF-- +Fatal error: Uncaught TypeError: preg_replace_callback_array(): Argument #1 ($pattern) must contain only string patterns as keys in %s:%d +Stack trace: +#0 %s(%d): preg_replace_callback_array(Array, 'a') +#1 {main} + thrown in %s on line %d diff --git a/ext/pdo_pgsql/pgsql_driver.c b/ext/pdo_pgsql/pgsql_driver.c index ec4d5ec65866b..46b3f25f4086b 100644 --- a/ext/pdo_pgsql/pgsql_driver.c +++ b/ext/pdo_pgsql/pgsql_driver.c @@ -463,33 +463,53 @@ static int pdo_pgsql_get_attribute(pdo_dbh_t *dbh, zend_long attr, zval *return_ case PDO_ATTR_CONNECTION_STATUS: switch (PQstatus(H->server)) { case CONNECTION_STARTED: - ZVAL_STRINGL(return_value, "Waiting for connection to be made.", sizeof("Waiting for connection to be made.")-1); + ZVAL_STRINGL(return_value, "Waiting for connection to be made.", strlen("Waiting for connection to be made.")); break; case CONNECTION_MADE: case CONNECTION_OK: - ZVAL_STRINGL(return_value, "Connection OK; waiting to send.", sizeof("Connection OK; waiting to send.")-1); + ZVAL_STRINGL(return_value, "Connection OK; waiting to send.", strlen("Connection OK; waiting to send.")); break; case CONNECTION_AWAITING_RESPONSE: - ZVAL_STRINGL(return_value, "Waiting for a response from the server.", sizeof("Waiting for a response from the server.")-1); + ZVAL_STRINGL(return_value, "Waiting for a response from the server.", strlen("Waiting for a response from the server.")); break; case CONNECTION_AUTH_OK: - ZVAL_STRINGL(return_value, "Received authentication; waiting for backend start-up to finish.", sizeof("Received authentication; waiting for backend start-up to finish.")-1); + ZVAL_STRINGL(return_value, "Received authentication; waiting for backend start-up to finish.", strlen("Received authentication; waiting for backend start-up to finish.")); break; #ifdef CONNECTION_SSL_STARTUP case CONNECTION_SSL_STARTUP: - ZVAL_STRINGL(return_value, "Negotiating SSL encryption.", sizeof("Negotiating SSL encryption.")-1); + ZVAL_STRINGL(return_value, "Negotiating SSL encryption.", strlen("Negotiating SSL encryption.")); break; #endif case CONNECTION_SETENV: - ZVAL_STRINGL(return_value, "Negotiating environment-driven parameter settings.", sizeof("Negotiating environment-driven parameter settings.")-1); + ZVAL_STRINGL(return_value, "Negotiating environment-driven parameter settings.", strlen("Negotiating environment-driven parameter settings.")); break; +#ifdef CONNECTION_CONSUME + case CONNECTION_CONSUME: + ZVAL_STRINGL(return_value, "Flushing send queue/consuming extra data.", strlen("Flushing send queue/consuming extra data.")); + break; +#endif +#ifdef CONNECTION_GSS_STARTUP + case CONNECTION_SSL_STARTUP: + ZVAL_STRINGL(return_value, "Negotiating GSSAPI.", strlen("Negotiating GSSAPI.")); + break; +#endif +#ifdef CONNECTION_CHECK_TARGET + case CONNECTION_CHECK_TARGET: + ZVAL_STRINGL(return_value, "Connection OK; checking target server properties.", strlen("Connection OK; checking target server properties.")); + break; +#endif +#ifdef CONNECTION_CHECK_STANDBY + case CONNECTION_CHECK_STANDBY: + ZVAL_STRINGL(return_value, "Connection OK; checking if server in standby.", strlen("Connection OK; checking if server in standby.")); + break; +#endif case CONNECTION_BAD: default: - ZVAL_STRINGL(return_value, "Bad connection.", sizeof("Bad connection.")-1); + ZVAL_STRINGL(return_value, "Bad connection.", strlen("Bad connection.")); break; } break; diff --git a/ext/pdo_sqlite/sqlite_statement.c b/ext/pdo_sqlite/sqlite_statement.c index 90de059a3b7ac..c6b907f6fc22f 100644 --- a/ext/pdo_sqlite/sqlite_statement.c +++ b/ext/pdo_sqlite/sqlite_statement.c @@ -319,12 +319,12 @@ static int pdo_sqlite_stmt_col_meta(pdo_stmt_t *stmt, zend_long colno, zval *ret switch (sqlite3_column_type(S->stmt, colno)) { case SQLITE_NULL: - add_assoc_string(return_value, "native_type", "null"); + add_assoc_str(return_value, "native_type", ZSTR_KNOWN(ZEND_STR_NULL_LOWERCASE)); add_assoc_long(return_value, "pdo_type", PDO_PARAM_NULL); break; case SQLITE_FLOAT: - add_assoc_string(return_value, "native_type", "double"); + add_assoc_str(return_value, "native_type", ZSTR_KNOWN(ZEND_STR_DOUBLE)); add_assoc_long(return_value, "pdo_type", PDO_PARAM_STR); break; @@ -333,12 +333,12 @@ static int pdo_sqlite_stmt_col_meta(pdo_stmt_t *stmt, zend_long colno, zval *ret /* TODO Check this is correct */ ZEND_FALLTHROUGH; case SQLITE_TEXT: - add_assoc_string(return_value, "native_type", "string"); + add_assoc_str(return_value, "native_type", ZSTR_KNOWN(ZEND_STR_STRING)); add_assoc_long(return_value, "pdo_type", PDO_PARAM_STR); break; case SQLITE_INTEGER: - add_assoc_string(return_value, "native_type", "integer"); + add_assoc_str(return_value, "native_type", ZSTR_KNOWN(ZEND_STR_INTEGER)); add_assoc_long(return_value, "pdo_type", PDO_PARAM_INT); break; } diff --git a/ext/pgsql/pgsql.c b/ext/pgsql/pgsql.c index f2a7fb2ac296b..68325155c23ca 100644 --- a/ext/pgsql/pgsql.c +++ b/ext/pgsql/pgsql.c @@ -112,6 +112,10 @@ char pgsql_libpq_version[16]; #define PQfreemem free #endif +#if PG_VERSION_NUM < 120000 +#define PQERRORS_SQLSTATE 0 +#endif + ZEND_DECLARE_MODULE_GLOBALS(pgsql) static PHP_GINIT_FUNCTION(pgsql); @@ -2821,7 +2825,7 @@ PHP_FUNCTION(pg_set_error_verbosity) pgsql = link->conn; - if (verbosity & (PQERRORS_TERSE|PQERRORS_DEFAULT|PQERRORS_VERBOSE)) { + if (verbosity & (PQERRORS_TERSE|PQERRORS_DEFAULT|PQERRORS_VERBOSE|PQERRORS_SQLSTATE)) { RETURN_LONG(PQsetErrorVerbosity(pgsql, verbosity)); } else { RETURN_FALSE; @@ -2829,6 +2833,29 @@ PHP_FUNCTION(pg_set_error_verbosity) } /* }}} */ +PHP_FUNCTION(pg_set_error_context_visibility) +{ + zval *pgsql_link = NULL; + zend_long visibility; + PGconn *pgsql; + pgsql_link_handle *link; + + if (zend_parse_parameters(ZEND_NUM_ARGS(), "Ol", &pgsql_link, pgsql_link_ce, &visibility) == FAILURE) { + RETURN_THROWS(); + } + link = Z_PGSQL_LINK_P(pgsql_link); + CHECK_PGSQL_LINK(link); + + pgsql = link->conn; + + if (visibility == PQSHOW_CONTEXT_NEVER || visibility & (PQSHOW_CONTEXT_ERRORS|PQSHOW_CONTEXT_ALWAYS)) { + RETURN_LONG(PQsetErrorContextVisibility(pgsql, visibility)); + } else { + zend_argument_value_error(2, "must be one of PGSQL_SHOW_CONTEXT_NEVER, PGSQL_SHOW_CONTEXT_ERRORS or PGSQL_SHOW_CONTEXT_ALWAYS"); + RETURN_THROWS(); + } +} + /* {{{ Set client encoding */ PHP_FUNCTION(pg_set_client_encoding) { @@ -3327,7 +3354,7 @@ PHP_FUNCTION(pg_result_error) RETURN_FALSE; } - err = (char *)PQresultErrorMessage(pgsql_result); + err = PQresultErrorMessage(pgsql_result); RETURN_STRING(err); } /* }}} */ @@ -3361,7 +3388,7 @@ PHP_FUNCTION(pg_result_error_field) #endif |PG_DIAG_CONTEXT|PG_DIAG_SOURCE_FILE|PG_DIAG_SOURCE_LINE |PG_DIAG_SOURCE_FUNCTION)) { - field = (char *)PQresultErrorField(pgsql_result, (int)fieldcode); + field = PQresultErrorField(pgsql_result, (int)fieldcode); if (field == NULL) { RETURN_NULL(); } else { @@ -4292,7 +4319,7 @@ static php_pgsql_data_type php_pgsql_get_data_type(const zend_string *type_name) /* This is stupid way to do. I'll fix it when I decide how to support user defined types. (Yasuo) */ /* boolean */ - if (zend_string_equals_literal(type_name, "bool")|| zend_string_equals_literal(type_name, "boolean")) + if (zend_string_equals(type_name, ZSTR_KNOWN(ZEND_STR_BOOL)) ||zend_string_equals(type_name, ZSTR_KNOWN(ZEND_STR_BOOLEAN))) return PG_BOOL; /* object id */ if (zend_string_equals_literal(type_name, "oid")) @@ -4525,7 +4552,6 @@ PHP_PGSQL_API zend_result php_pgsql_convert(PGconn *pg_link, const zend_string * data_type = php_pgsql_get_data_type(Z_STR_P(type)); } - /* TODO: Should E_NOTICE be converted to type error if PHP type cannot be converted to field type? */ switch(data_type) { case PG_BOOL: @@ -4550,7 +4576,7 @@ PHP_PGSQL_API zend_result php_pgsql_convert(PGconn *pg_link, const zend_string * ZVAL_STRINGL(&new_val, "'f'", sizeof("'f'")-1); } else { - php_error_docref(NULL, E_NOTICE, "Detected invalid value (%s) for PostgreSQL %s field (%s)", Z_STRVAL_P(val), Z_STRVAL_P(type), ZSTR_VAL(field)); + zend_value_error("%s(): Field \"%s\" must be of type bool, invalid PostgreSQL string boolean value \"%s\" given", get_active_function_name(), ZSTR_VAL(field), Z_STRVAL_P(val)); err = 1; } } @@ -4582,7 +4608,7 @@ PHP_PGSQL_API zend_result php_pgsql_convert(PGconn *pg_link, const zend_string * } PGSQL_CONV_CHECK_IGNORE(); if (err) { - php_error_docref(NULL, E_NOTICE, "Expects string, null, long or boolelan value for PostgreSQL '%s' (%s)", Z_STRVAL_P(type), ZSTR_VAL(field)); + zend_type_error("%s(): Field \"%s\" must be of type string|null|int|bool, %s given", get_active_function_name(), ZSTR_VAL(field), Z_STRVAL_P(type)); } break; @@ -4626,7 +4652,7 @@ PHP_PGSQL_API zend_result php_pgsql_convert(PGconn *pg_link, const zend_string * } PGSQL_CONV_CHECK_IGNORE(); if (err) { - php_error_docref(NULL, E_NOTICE, "Expects NULL, string, long or double value for pgsql '%s' (%s)", Z_STRVAL_P(type), ZSTR_VAL(field)); + zend_type_error("%s(): Field \"%s\" must be of type int|null, %s given", get_active_function_name(), ZSTR_VAL(field), Z_STRVAL_P(type)); } break; @@ -4675,7 +4701,7 @@ PHP_PGSQL_API zend_result php_pgsql_convert(PGconn *pg_link, const zend_string * } PGSQL_CONV_CHECK_IGNORE(); if (err) { - php_error_docref(NULL, E_NOTICE, "Expects NULL, string, long or double value for PostgreSQL '%s' (%s)", Z_STRVAL_P(type), ZSTR_VAL(field)); + zend_type_error("%s(): Field \"%s\" must be of type %s|int|null, %s given", get_active_function_name(), (data_type == PG_MONEY ? "money" : "float"), ZSTR_VAL(field), Z_STRVAL_P(type)); } break; @@ -4736,7 +4762,7 @@ PHP_PGSQL_API zend_result php_pgsql_convert(PGconn *pg_link, const zend_string * } PGSQL_CONV_CHECK_IGNORE(); if (err) { - php_error_docref(NULL, E_NOTICE, "Expects NULL, string, long or double value for PostgreSQL '%s' (%s)", Z_STRVAL_P(type), ZSTR_VAL(field)); + zend_type_error("%s(): Field \"%s\" must be of type string|null, %s given", get_active_function_name(), ZSTR_VAL(field), Z_STRVAL_P(type)); } break; @@ -4778,7 +4804,7 @@ PHP_PGSQL_API zend_result php_pgsql_convert(PGconn *pg_link, const zend_string * } PGSQL_CONV_CHECK_IGNORE(); if (err) { - php_error_docref(NULL, E_NOTICE, "Expects NULL, string, long or double value for '%s' (%s)", Z_STRVAL_P(type), ZSTR_VAL(field)); + zend_type_error("%s(): Field \"%s\" must be of type int|null, %s given", get_active_function_name(), ZSTR_VAL(field), zend_zval_value_name(val)); } break; @@ -4797,7 +4823,7 @@ PHP_PGSQL_API zend_result php_pgsql_convert(PGconn *pg_link, const zend_string * at all though and let the server side to handle it.*/ if (php_pgsql_convert_match(Z_STR_P(val), REGEX0, sizeof(REGEX0)-1, 0) == FAILURE && php_pgsql_convert_match(Z_STR_P(val), REGEX1, sizeof(REGEX1)-1, 0) == FAILURE) { - err = 1; + err = 2; } else { ZVAL_STR(&new_val, php_pgsql_add_quotes(Z_STR_P(val))); @@ -4816,7 +4842,11 @@ PHP_PGSQL_API zend_result php_pgsql_convert(PGconn *pg_link, const zend_string * } PGSQL_CONV_CHECK_IGNORE(); if (err) { - php_error_docref(NULL, E_NOTICE, "Expects NULL or IPv4 or IPv6 address string for '%s' (%s)", Z_STRVAL_P(type), ZSTR_VAL(field)); + if (err == 2) { + zend_value_error("%s(): Field \"%s\" must be a valid IPv4 or IPv6 address string, \"%s\" given", get_active_function_name(), ZSTR_VAL(field), Z_STRVAL_P(val)); + } else { + zend_type_error("%s(): Field \"%s\" must be of type string|null, given %s", get_active_function_name(), ZSTR_VAL(field), zend_zval_value_name(val)); + } } break; @@ -4850,7 +4880,7 @@ PHP_PGSQL_API zend_result php_pgsql_convert(PGconn *pg_link, const zend_string * } PGSQL_CONV_CHECK_IGNORE(); if (err) { - php_error_docref(NULL, E_NOTICE, "Expects NULL or string for PostgreSQL %s field (%s)", Z_STRVAL_P(type), ZSTR_VAL(field)); + zend_type_error("%s(): Field \"%s\" must be of type string|null, %s given", get_active_function_name(), ZSTR_VAL(field), Z_STRVAL_P(type)); } break; @@ -4882,7 +4912,7 @@ PHP_PGSQL_API zend_result php_pgsql_convert(PGconn *pg_link, const zend_string * } PGSQL_CONV_CHECK_IGNORE(); if (err) { - php_error_docref(NULL, E_NOTICE, "Expects NULL or string for PostgreSQL %s field (%s)", Z_STRVAL_P(type), ZSTR_VAL(field)); + zend_type_error("%s(): Field \"%s\" must be of type string|null, %s given", get_active_function_name(), ZSTR_VAL(field), Z_STRVAL_P(type)); } break; @@ -4914,7 +4944,7 @@ PHP_PGSQL_API zend_result php_pgsql_convert(PGconn *pg_link, const zend_string * } PGSQL_CONV_CHECK_IGNORE(); if (err) { - php_error_docref(NULL, E_NOTICE, "Expects NULL or string for PostgreSQL %s field (%s)", Z_STRVAL_P(type), ZSTR_VAL(field)); + zend_type_error("%s(): Field \"%s\" must be of type string|null, %s given", get_active_function_name(), ZSTR_VAL(field), Z_STRVAL_P(type)); } break; @@ -4992,7 +5022,7 @@ PHP_PGSQL_API zend_result php_pgsql_convert(PGconn *pg_link, const zend_string * } PGSQL_CONV_CHECK_IGNORE(); if (err) { - php_error_docref(NULL, E_NOTICE, "Expects NULL or string for PostgreSQL %s field (%s)", Z_STRVAL_P(type), ZSTR_VAL(field)); + zend_type_error("%s(): Field \"%s\" must be of type string|null, %s given", get_active_function_name(), ZSTR_VAL(field), Z_STRVAL_P(type)); } break; case PG_BYTEA: @@ -5033,7 +5063,7 @@ PHP_PGSQL_API zend_result php_pgsql_convert(PGconn *pg_link, const zend_string * } PGSQL_CONV_CHECK_IGNORE(); if (err) { - php_error_docref(NULL, E_NOTICE, "Expects NULL, string, long or double value for PostgreSQL '%s' (%s)", Z_STRVAL_P(type), ZSTR_VAL(field)); + zend_type_error("%s(): Field \"%s\" must be of type string|null, %s given", get_active_function_name(), ZSTR_VAL(field), Z_STRVAL_P(type)); } break; @@ -5064,7 +5094,7 @@ PHP_PGSQL_API zend_result php_pgsql_convert(PGconn *pg_link, const zend_string * } PGSQL_CONV_CHECK_IGNORE(); if (err) { - php_error_docref(NULL, E_NOTICE, "Expects NULL or string for PostgreSQL %s field (%s)", Z_STRVAL_P(type), ZSTR_VAL(field)); + zend_type_error("%s(): Field \"%s\" must be of type string|null, %s given", get_active_function_name(), ZSTR_VAL(field), Z_STRVAL_P(type)); } break; diff --git a/ext/pgsql/pgsql.stub.php b/ext/pgsql/pgsql.stub.php index e095350543c79..1b5356bd7be99 100644 --- a/ext/pgsql/pgsql.stub.php +++ b/ext/pgsql/pgsql.stub.php @@ -183,6 +183,19 @@ * @cvalue PQERRORS_VERBOSE */ const PGSQL_ERRORS_VERBOSE = UNKNOWN; + #if PGVERSION_NUM > 110000 + /** + * @var int + * @cvalue PQERRORS_SQLSTATE + */ + const PGSQL_ERRORS_SQLSTATE = UNKNOWN; + #else + /** + * @var int + * @cvalue PQERRORS_TERSE + */ + const PGSQL_ERRORS_SQLSTATE = UNKNOWN; + #endif /* For lo_seek() */ @@ -449,6 +462,25 @@ */ const PGSQL_PIPELINE_ABORTED = UNKNOWN; #endif + + /* For pg_set_error_context_visibility() */ + + /** + * @var int + * @cvalue PQSHOW_CONTEXT_NEVER + */ + const PGSQL_SHOW_CONTEXT_NEVER = UNKNOWN; + /** + * @var int + * @cvalue PQSHOW_CONTEXT_ERRORS + */ + const PGSQL_SHOW_CONTEXT_ERRORS = UNKNOWN; + /** + * @var int + * @cvalue PQSHOW_CONTEXT_ALWAYS + */ + const PGSQL_SHOW_CONTEXT_ALWAYS = UNKNOWN; + function pg_connect(string $connection_string, int $flags = 0): PgSql\Connection|false {} @@ -456,7 +488,7 @@ function pg_pconnect(string $connection_string, int $flags = 0): PgSql\Connectio function pg_connect_poll(PgSql\Connection $connection): int {} - function pg_close(?PgSql\Connection $connection = null): bool {} + function pg_close(?PgSql\Connection $connection = null): true {} /** @refcount 1 */ function pg_dbname(?PgSql\Connection $connection = null): string {} @@ -678,7 +710,7 @@ function pg_getlastoid(PgSql\Result $result): string|int|false {} function pg_trace(string $filename, string $mode = "w", ?PgSql\Connection $connection = null, int $trace_mode = 0): bool {} - function pg_untrace(?PgSql\Connection $connection = null): bool {} + function pg_untrace(?PgSql\Connection $connection = null): true {} /** * @param PgSql\Connection $connection @@ -938,6 +970,8 @@ function pg_exit_pipeline_mode(PgSql\Connection $connection): bool {} function pg_pipeline_sync(PgSql\Connection $connection): bool {} function pg_pipeline_status(PgSql\Connection $connection): int {} #endif + + function pg_set_error_context_visibility(PgSql\Connection $connection, int $visibility): int {} } namespace PgSql { diff --git a/ext/pgsql/pgsql_arginfo.h b/ext/pgsql/pgsql_arginfo.h index 2b8e7cd17ae6a..26e0777b1cbf5 100644 --- a/ext/pgsql/pgsql_arginfo.h +++ b/ext/pgsql/pgsql_arginfo.h @@ -1,5 +1,5 @@ /* This is a generated file, edit the .stub.php file instead. - * Stub hash: f18a73443942daa2b3695e8750c8daaea6b96194 */ + * Stub hash: a37be19da43ac0838655b0ba7e34382e9c7424f5 */ ZEND_BEGIN_ARG_WITH_RETURN_OBJ_TYPE_MASK_EX(arginfo_pg_connect, 0, 1, PgSql\\Connection, MAY_BE_FALSE) ZEND_ARG_TYPE_INFO(0, connection_string, IS_STRING, 0) @@ -12,7 +12,7 @@ ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_pg_connect_poll, 0, 1, IS_LONG, ZEND_ARG_OBJ_INFO(0, connection, PgSql\\Connection, 0) ZEND_END_ARG_INFO() -ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_pg_close, 0, 0, _IS_BOOL, 0) +ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_pg_close, 0, 0, IS_TRUE, 0) ZEND_ARG_OBJ_INFO_WITH_DEFAULT_VALUE(0, connection, PgSql\\Connection, 1, "null") ZEND_END_ARG_INFO() @@ -41,7 +41,9 @@ ZEND_BEGIN_ARG_WITH_RETURN_TYPE_MASK_EX(arginfo_pg_parameter_status, 0, 1, MAY_B ZEND_ARG_TYPE_INFO(0, name, IS_STRING, 0) ZEND_END_ARG_INFO() -#define arginfo_pg_ping arginfo_pg_close +ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_pg_ping, 0, 0, _IS_BOOL, 0) + ZEND_ARG_OBJ_INFO_WITH_DEFAULT_VALUE(0, connection, PgSql\\Connection, 1, "null") +ZEND_END_ARG_INFO() ZEND_BEGIN_ARG_WITH_RETURN_OBJ_TYPE_MASK_EX(arginfo_pg_query, 0, 1, PgSql\\Result, MAY_BE_FALSE) ZEND_ARG_INFO(0, connection) @@ -297,7 +299,7 @@ ZEND_END_ARG_INFO() #define arginfo_pg_clientencoding arginfo_pg_dbname -#define arginfo_pg_end_copy arginfo_pg_close +#define arginfo_pg_end_copy arginfo_pg_ping ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_pg_put_line, 0, 1, _IS_BOOL, 0) ZEND_ARG_INFO(0, connection) @@ -470,6 +472,11 @@ ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_pg_pipeline_status, 0, 1, IS_LON ZEND_END_ARG_INFO() #endif +ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_pg_set_error_context_visibility, 0, 2, IS_LONG, 0) + ZEND_ARG_OBJ_INFO(0, connection, PgSql\\Connection, 0) + ZEND_ARG_TYPE_INFO(0, visibility, IS_LONG, 0) +ZEND_END_ARG_INFO() + ZEND_FUNCTION(pg_connect); ZEND_FUNCTION(pg_pconnect); @@ -572,6 +579,7 @@ ZEND_FUNCTION(pg_pipeline_sync); #if defined(LIBPQ_HAS_PIPELINING) ZEND_FUNCTION(pg_pipeline_status); #endif +ZEND_FUNCTION(pg_set_error_context_visibility); static const zend_function_entry ext_functions[] = { @@ -701,6 +709,7 @@ static const zend_function_entry ext_functions[] = { #if defined(LIBPQ_HAS_PIPELINING) ZEND_FE(pg_pipeline_status, arginfo_pg_pipeline_status) #endif + ZEND_FE(pg_set_error_context_visibility, arginfo_pg_set_error_context_visibility) ZEND_FE_END }; @@ -754,6 +763,12 @@ static void register_pgsql_symbols(int module_number) REGISTER_LONG_CONSTANT("PGSQL_ERRORS_TERSE", PQERRORS_TERSE, CONST_PERSISTENT); REGISTER_LONG_CONSTANT("PGSQL_ERRORS_DEFAULT", PQERRORS_DEFAULT, CONST_PERSISTENT); REGISTER_LONG_CONSTANT("PGSQL_ERRORS_VERBOSE", PQERRORS_VERBOSE, CONST_PERSISTENT); +#if PGVERSION_NUM > 110000 + REGISTER_LONG_CONSTANT("PGSQL_ERRORS_SQLSTATE", PQERRORS_SQLSTATE, CONST_PERSISTENT); +#endif +#if !(PGVERSION_NUM > 110000) + REGISTER_LONG_CONSTANT("PGSQL_ERRORS_SQLSTATE", PQERRORS_TERSE, CONST_PERSISTENT); +#endif REGISTER_LONG_CONSTANT("PGSQL_SEEK_SET", SEEK_SET, CONST_PERSISTENT); REGISTER_LONG_CONSTANT("PGSQL_SEEK_CUR", SEEK_CUR, CONST_PERSISTENT); REGISTER_LONG_CONSTANT("PGSQL_SEEK_END", SEEK_END, CONST_PERSISTENT); @@ -827,6 +842,9 @@ static void register_pgsql_symbols(int module_number) #if defined(LIBPQ_HAS_PIPELINING) REGISTER_LONG_CONSTANT("PGSQL_PIPELINE_ABORTED", PQ_PIPELINE_ABORTED, CONST_PERSISTENT); #endif + REGISTER_LONG_CONSTANT("PGSQL_SHOW_CONTEXT_NEVER", PQSHOW_CONTEXT_NEVER, CONST_PERSISTENT); + REGISTER_LONG_CONSTANT("PGSQL_SHOW_CONTEXT_ERRORS", PQSHOW_CONTEXT_ERRORS, CONST_PERSISTENT); + REGISTER_LONG_CONSTANT("PGSQL_SHOW_CONTEXT_ALWAYS", PQSHOW_CONTEXT_ALWAYS, CONST_PERSISTENT); } static zend_class_entry *register_class_PgSql_Connection(void) diff --git a/ext/pgsql/tests/07optional.phpt b/ext/pgsql/tests/07optional.phpt index 6c58727e4adbe..41f89ca6c8c3b 100644 --- a/ext/pgsql/tests/07optional.phpt +++ b/ext/pgsql/tests/07optional.phpt @@ -19,7 +19,11 @@ if (function_exists('pg_set_error_verbosity')) { pg_set_error_verbosity($db, PGSQL_ERRORS_TERSE); pg_set_error_verbosity($db, PGSQL_ERRORS_DEFAULT); pg_set_error_verbosity($db, PGSQL_ERRORS_VERBOSE); + pg_set_error_verbosity($db, PGSQL_ERRORS_SQLSTATE); } +pg_set_error_context_visibility($db, PGSQL_SHOW_CONTEXT_NEVER); +pg_set_error_context_visibility($db, PGSQL_SHOW_CONTEXT_ERRORS); +pg_set_error_context_visibility($db, PGSQL_SHOW_CONTEXT_ALWAYS); echo "OK"; ?> --EXPECT-- diff --git a/ext/pgsql/tests/bug71998.phpt b/ext/pgsql/tests/bug71998.phpt index 854c4e4227436..96fc58ff8272e 100644 --- a/ext/pgsql/tests/bug71998.phpt +++ b/ext/pgsql/tests/bug71998.phpt @@ -57,7 +57,13 @@ $i = 0; $errors = 0; foreach ($ips as $ip) { $data = array("id" => ++$i, "remote_addr" => $ip); - $r = @pg_insert($db, 'tmp_statistics', $data); + $r = true; + try { + @pg_insert($db, 'tmp_statistics', $data); + } catch (\ValueError $e) { + echo $e->getMessage() . PHP_EOL; + $r = false; + } if (!$r && in_array($ip, $bad)) { $errors++; @@ -79,6 +85,13 @@ pg_close($db); ?> --EXPECT-- +pg_insert(): Field "remote_addr" must be a valid IPv4 or IPv6 address string, "256.257.258.259" given +pg_insert(): Field "remote_addr" must be a valid IPv4 or IPv6 address string, "fe08::7:8interface" given +pg_insert(): Field "remote_addr" must be a valid IPv4 or IPv6 address string, "schnitzel" given +pg_insert(): Field "remote_addr" must be a valid IPv4 or IPv6 address string, "10002.3.4" given +pg_insert(): Field "remote_addr" must be a valid IPv4 or IPv6 address string, "1.2.3.4.5" given +pg_insert(): Field "remote_addr" must be a valid IPv4 or IPv6 address string, "256.0.0.0" given +pg_insert(): Field "remote_addr" must be a valid IPv4 or IPv6 address string, "260.0.0.0" given array(2) { [0]=> string(1) "1" diff --git a/ext/pgsql/tests/bug77047.phpt b/ext/pgsql/tests/bug77047.phpt index 50a4d61e7b520..8995050677b8d 100644 --- a/ext/pgsql/tests/bug77047.phpt +++ b/ext/pgsql/tests/bug77047.phpt @@ -19,10 +19,18 @@ pg_query($db, "CREATE TABLE bug77047 ( t TIME WITHOUT TIME ZONE )"); -pg_insert($db, "bug77047", array("t" => "13:31")); +try { + pg_insert($db, "bug77047", array("t" => "13:31")); +} catch (\TypeError $e) { + echo $e->getMessage(); +} pg_insert($db, "bug77047", array("t" => "13:31:13")); pg_insert($db, "bug77047", array("t" => "1:2:3")); -pg_insert($db, "bug77047", array("t" => "xyz")); +try { + pg_insert($db, "bug77047", array("t" => "xyz")); +} catch (\TypeError $e) { + echo $e->getMessage() . PHP_EOL; +} pg_insert($db, "bug77047", array("t" => NULL)); pg_insert($db, "bug77047", array("t" => "")); @@ -33,10 +41,9 @@ while (false !== ($row = pg_fetch_row($res))) { ?> --EXPECTF-- -Notice: pg_insert(): Expects NULL or string for PostgreSQL time field (t) in %s on line %d +pg_insert(): Field "t" must be of type string|null, time given string(8) "13:31:00" string(8) "13:31:13" string(8) "01:02:03" NULL NULL - diff --git a/ext/pgsql/tests/pg_trace.phpt b/ext/pgsql/tests/pg_trace.phpt index 0917959bbef77..16e483cc99213 100644 --- a/ext/pgsql/tests/pg_trace.phpt +++ b/ext/pgsql/tests/pg_trace.phpt @@ -21,6 +21,6 @@ var_dump(pg_trace($tracefile, 'w', $db, 0)); $res = pg_query($db, 'select 1'); ?> ---EXPECT-- -pg_trace(): Argument #4 ($trace_mode) cannot set as trace is unsupported +--EXPECTF-- +pg_trace(): Argument #4 ($trace_mode) %s bool(true) diff --git a/ext/phar/Makefile.frag b/ext/phar/Makefile.frag index 58789cae25b57..7a867dd7df28f 100644 --- a/ext/phar/Makefile.frag +++ b/ext/phar/Makefile.frag @@ -29,22 +29,38 @@ $(builddir)/phar/phar.inc: $(srcdir)/phar/phar.inc -@test -d $(builddir)/phar || mkdir $(builddir)/phar -@test -f $(builddir)/phar/phar.inc || cp $(srcdir)/phar/phar.inc $(builddir)/phar/phar.inc + +TEST_PHP_EXECUTABLE = $(shell $(PHP_EXECUTABLE) -v 2>&1) +TEST_PHP_EXECUTABLE_RES = $(shell echo "$(TEST_PHP_EXECUTABLE)" | grep -c 'Exec format error') + $(builddir)/phar.php: $(srcdir)/build_precommand.php $(srcdir)/phar/*.inc $(srcdir)/phar/*.php $(SAPI_CLI_PATH) - -@echo "Generating phar.php" - @$(PHP_PHARCMD_EXECUTABLE) $(PHP_PHARCMD_SETTINGS) $(srcdir)/build_precommand.php > $(builddir)/phar.php + -@(echo "Generating phar.php"; \ + if [ "$(TEST_PHP_EXECUTABLE_RES)" != 1 ]; then \ + $(PHP_PHARCMD_EXECUTABLE) $(PHP_PHARCMD_SETTINGS) $(srcdir)/build_precommand.php > $(builddir)/phar.php; \ + else \ + echo "Skipping phar.php generating during cross compilation"; \ + fi) $(builddir)/phar.phar: $(builddir)/phar.php $(builddir)/phar/phar.inc $(srcdir)/phar/*.inc $(srcdir)/phar/*.php $(SAPI_CLI_PATH) - -@echo "Generating phar.phar" - -@rm -f $(builddir)/phar.phar - -@rm -f $(srcdir)/phar.phar - @$(PHP_PHARCMD_EXECUTABLE) $(PHP_PHARCMD_SETTINGS) $(builddir)/phar.php pack -f $(builddir)/phar.phar -a pharcommand -c auto -x \\.svn -p 0 -s $(srcdir)/phar/phar.php -h sha1 -b "$(PHP_PHARCMD_BANG)" $(srcdir)/phar/ - -@chmod +x $(builddir)/phar.phar + -@(echo "Generating phar.phar"; \ + if [ "$(TEST_PHP_EXECUTABLE_RES)" != 1 ]; then \ + rm -f $(builddir)/phar.phar; \ + rm -f $(srcdir)/phar.phar; \ + $(PHP_PHARCMD_EXECUTABLE) $(PHP_PHARCMD_SETTINGS) $(builddir)/phar.php pack -f $(builddir)/phar.phar -a pharcommand -c auto -x \\.svn -p 0 -s $(srcdir)/phar/phar.php -h sha1 -b "$(PHP_PHARCMD_BANG)" $(srcdir)/phar/; \ + chmod +x $(builddir)/phar.phar; \ + else \ + echo "Skipping phar.phar generating during cross compilation"; \ + fi) install-pharcmd: pharcmd - -@$(mkinstalldirs) $(INSTALL_ROOT)$(bindir) - $(INSTALL) $(builddir)/phar.phar $(INSTALL_ROOT)$(bindir)/$(program_prefix)phar$(program_suffix).phar - -@rm -f $(INSTALL_ROOT)$(bindir)/$(program_prefix)phar$(program_suffix) - $(LN_S) -f $(program_prefix)phar$(program_suffix).phar $(INSTALL_ROOT)$(bindir)/$(program_prefix)phar$(program_suffix) - @$(mkinstalldirs) $(INSTALL_ROOT)$(mandir)/man1 - @$(INSTALL_DATA) $(builddir)/phar.1 $(INSTALL_ROOT)$(mandir)/man1/$(program_prefix)phar$(program_suffix).1 - @$(INSTALL_DATA) $(builddir)/phar.phar.1 $(INSTALL_ROOT)$(mandir)/man1/$(program_prefix)phar$(program_suffix).phar.1 + @(if [ "$(TEST_PHP_EXECUTABLE_RES)" != 1 ]; then \ + $(mkinstalldirs) $(INSTALL_ROOT)$(bindir); \ + $(INSTALL) $(builddir)/phar.phar $(INSTALL_ROOT)$(bindir)/$(program_prefix)phar$(program_suffix).phar; \ + rm -f $(INSTALL_ROOT)$(bindir)/$(program_prefix)phar$(program_suffix); \ + $(LN_S) -f $(program_prefix)phar$(program_suffix).phar $(INSTALL_ROOT)$(bindir)/$(program_prefix)phar$(program_suffix); \ + $(mkinstalldirs) $(INSTALL_ROOT)$(mandir)/man1; \ + $(INSTALL_DATA) $(builddir)/phar.1 $(INSTALL_ROOT)$(mandir)/man1/$(program_prefix)phar$(program_suffix).1; \ + $(INSTALL_DATA) $(builddir)/phar.phar.1 $(INSTALL_ROOT)$(mandir)/man1/$(program_prefix)phar$(program_suffix).phar.1; \ + else \ + echo "Skipping install-pharcmd during cross compilation"; \ + fi) diff --git a/ext/phar/phar_object.c b/ext/phar/phar_object.c index 47a4ca541d12e..004b3707861d8 100644 --- a/ext/phar/phar_object.c +++ b/ext/phar/phar_object.c @@ -3709,7 +3709,7 @@ PHP_METHOD(Phar, offsetSet) { char *fname, *cont_str = NULL; size_t fname_len, cont_len; - zval *zresource; + zval *zresource = NULL; if (zend_parse_parameters_ex(ZEND_PARSE_PARAMS_QUIET, ZEND_NUM_ARGS(), "pr", &fname, &fname_len, &zresource) == FAILURE && zend_parse_parameters(ZEND_NUM_ARGS(), "ps", &fname, &fname_len, &cont_str, &cont_len) == FAILURE) { diff --git a/ext/readline/tests/bug77812-readline.phpt b/ext/readline/tests/bug77812-readline.phpt index a18686781718b..a2d6c212c536a 100644 --- a/ext/readline/tests/bug77812-readline.phpt +++ b/ext/readline/tests/bug77812-readline.phpt @@ -13,7 +13,6 @@ $php = getenv('TEST_PHP_EXECUTABLE'); $ini = getenv('TEST_PHP_EXTRA_ARGS'); $descriptorspec = [['pipe', 'r'], STDOUT, STDERR]; $proc = proc_open("$php $ini -a", $descriptorspec, $pipes); -var_dump($proc); fwrite($pipes[0], "echo << --EXPECTF-- -resource(%d) of type (process) Interactive shell php > echo <<op_array.static_variables); ZEND_MAP_PTR_SET(fptr->op_array.static_variables_ptr, ht); } - ZEND_HASH_MAP_FOREACH_VAL(ht, val) { - if (UNEXPECTED(zval_update_constant_ex(val, fptr->common.scope) != SUCCESS)) { - RETURN_THROWS(); - } - } ZEND_HASH_FOREACH_END(); zend_hash_copy(Z_ARRVAL_P(return_value), ht, zval_add_ref); } else { RETURN_EMPTY_ARRAY(); @@ -7143,7 +7137,7 @@ ZEND_METHOD(ReflectionFiber, getCallable) static zval *_reflection_write_property(zend_object *object, zend_string *name, zval *value, void **cache_slot) { if (zend_hash_exists(&object->ce->properties_info, name) - && (zend_string_equals_literal(name, "name") || zend_string_equals_literal(name, "class"))) + && (zend_string_equals(name, ZSTR_KNOWN(ZEND_STR_NAME)) || zend_string_equals(name, ZSTR_KNOWN(ZEND_STR_CLASS)))) { zend_throw_exception_ex(reflection_exception_ptr, 0, "Cannot set read-only property %s::$%s", ZSTR_VAL(object->ce->name), ZSTR_VAL(name)); diff --git a/ext/reflection/tests/ReflectionMethod_getStaticVariables_basic_extra_bleed.phpt b/ext/reflection/tests/ReflectionMethod_getStaticVariables_basic_extra_bleed.phpt new file mode 100644 index 0000000000000..6803e7a063dd3 --- /dev/null +++ b/ext/reflection/tests/ReflectionMethod_getStaticVariables_basic_extra_bleed.phpt @@ -0,0 +1,31 @@ +--TEST-- +ReflectionMethod::getStaticVariables() should not bleed IS_TYPE_UNINITIALIZED +--FILE-- +getStaticVariables()['a']; + + static $a = test(); + var_dump($a); + + // Technically, IS_TYPE_UNINITIALIZED does bleed, but it doesn't matter since there's no way we + // can assign it to the static variable directly instead of the reference. + $staticVar = &$methodInfo->getStaticVariables()['a']; + $staticVar = $nullWithIsTypeUninitialized; +} + +foo(); +foo(); + +?> +--EXPECT-- +test() called +int(42) +NULL diff --git a/ext/reflection/tests/new_in_constexpr.phpt b/ext/reflection/tests/new_in_constexpr.phpt index c9b77c0eed216..bda5aaba6997a 100644 --- a/ext/reflection/tests/new_in_constexpr.phpt +++ b/ext/reflection/tests/new_in_constexpr.phpt @@ -9,6 +9,9 @@ function test1() { } $rf = new ReflectionFunction('test1'); +var_dump($rf->getStaticVariables()); +test1(); + $s = $rf->getStaticVariables(); var_dump($s['x'] === test1()); @@ -23,5 +26,9 @@ var_dump($rp->getDefaultValue() !== test2()); ?> --EXPECT-- +array(1) { + ["x"]=> + NULL +} bool(true) bool(true) diff --git a/ext/session/session.c b/ext/session/session.c index c3ee25313fbbe..05898594547f9 100644 --- a/ext/session/session.c +++ b/ext/session/session.c @@ -1927,7 +1927,7 @@ PHP_FUNCTION(session_module_name) } if (name) { - if (zend_string_equals_literal_ci(name, "user")) { + if (zend_string_equals_ci(name, ZSTR_KNOWN(ZEND_STR_USER))) { zend_argument_value_error(1, "cannot be \"user\""); RETURN_THROWS(); } @@ -1967,7 +1967,7 @@ static inline void set_user_save_handler_ini(void) { zend_string *ini_name, *ini_val; ini_name = ZSTR_INIT_LITERAL("session.save_handler", 0); - ini_val = ZSTR_INIT_LITERAL("user", 0); + ini_val = ZSTR_KNOWN(ZEND_STR_USER); PS(set_handler) = 1; zend_alter_ini_entry(ini_name, ini_val, PHP_INI_USER, PHP_INI_STAGE_RUNTIME); PS(set_handler) = 0; diff --git a/ext/session/tests/session_regenerate_id_cookie.phpt b/ext/session/tests/session_regenerate_id_cookie.phpt index a61e24d2a04d4..f1dc0727205d2 100644 --- a/ext/session/tests/session_regenerate_id_cookie.phpt +++ b/ext/session/tests/session_regenerate_id_cookie.phpt @@ -2,6 +2,8 @@ Test session_regenerate_id() function : basic functionality --EXTENSIONS-- session +--INI-- +session.sid_length = 32 --SKIPIF-- '); -var_dump(`$php -n -d session.name=PHPSESSID $file`); +$extra_arguments = getenv('TEST_PHP_EXTRA_ARGS'); +var_dump(`$php $extra_arguments -d session.name=PHPSESSID $file`); unlink($file); diff --git a/ext/simplexml/simplexml.c b/ext/simplexml/simplexml.c index d3f2865e12036..e219d7d07ef75 100644 --- a/ext/simplexml/simplexml.c +++ b/ext/simplexml/simplexml.c @@ -33,13 +33,12 @@ #include "zend_interfaces.h" #include "ext/spl/spl_iterators.h" -zend_class_entry *sxe_class_entry = NULL; PHP_SXE_API zend_class_entry *ce_SimpleXMLIterator; PHP_SXE_API zend_class_entry *ce_SimpleXMLElement; PHP_SXE_API zend_class_entry *sxe_get_element_class_entry(void) /* {{{ */ { - return sxe_class_entry; + return ce_SimpleXMLElement; } /* }}} */ @@ -101,7 +100,7 @@ static inline int match_ns(php_sxe_object *sxe, xmlNodePtr node, xmlChar *name, return 1; } - if (node->ns && !xmlStrcmp(prefix ? node->ns->prefix : node->ns->href, name)) { + if (node->ns && xmlStrEqual(prefix ? node->ns->prefix : node->ns->href, name)) { return 1; } @@ -127,7 +126,7 @@ static xmlNodePtr sxe_get_element_by_offset(php_sxe_object *sxe, zend_long offse SKIP_TEXT(node) if (node->type == XML_ELEMENT_NODE && match_ns(sxe, node, sxe->iter.nsprefix, sxe->iter.isprefix)) { if (sxe->iter.type == SXE_ITER_CHILD || ( - sxe->iter.type == SXE_ITER_ELEMENT && !xmlStrcmp(node->name, sxe->iter.name))) { + sxe->iter.type == SXE_ITER_ELEMENT && xmlStrEqual(node->name, sxe->iter.name))) { if (nodendx == offset) { break; } @@ -151,7 +150,7 @@ static xmlNodePtr sxe_find_element_by_name(php_sxe_object *sxe, xmlNodePtr node, while (node) { SKIP_TEXT(node) if (node->type == XML_ELEMENT_NODE && match_ns(sxe, node, sxe->iter.nsprefix, sxe->iter.isprefix)) { - if (!xmlStrcmp(node->name, name)) { + if (xmlStrEqual(node->name, name)) { return node; } } @@ -161,11 +160,10 @@ static xmlNodePtr sxe_find_element_by_name(php_sxe_object *sxe, xmlNodePtr node, return NULL; } /* }}} */ -static xmlNodePtr sxe_get_element_by_name(php_sxe_object *sxe, xmlNodePtr node, char **name, SXE_ITER *type) /* {{{ */ +static xmlNodePtr sxe_get_element_by_name(php_sxe_object *sxe, xmlNodePtr node, char *name, SXE_ITER *type) /* {{{ */ { int orgtype; xmlNodePtr orgnode = node; - xmlNodePtr retnode = NULL; if (sxe->iter.type != SXE_ITER_ATTRLIST) { @@ -188,26 +186,15 @@ static xmlNodePtr sxe_get_element_by_name(php_sxe_object *sxe, xmlNodePtr node, while (node) { SKIP_TEXT(node) if (node->type == XML_ELEMENT_NODE && match_ns(sxe, node, sxe->iter.nsprefix, sxe->iter.isprefix)) { - if (!xmlStrcmp(node->name, (xmlChar *)*name)) { - if (1||retnode) - { - *type = SXE_ITER_ELEMENT; - return orgnode; - } - retnode = node; + if (xmlStrEqual(node->name, (xmlChar *)name)) { + *type = SXE_ITER_ELEMENT; + return orgnode; } } next_iter: node = node->next; } - if (retnode) - { - *type = SXE_ITER_NONE; - *name = NULL; - return retnode; - } - return NULL; } /* }}} */ @@ -281,7 +268,7 @@ static zval *sxe_prop_dim_read(zend_object *object, zval *member, bool elements, if (Z_TYPE_P(member) != IS_LONG || sxe->iter.type == SXE_ITER_ATTRLIST) { if (Z_TYPE_P(member) == IS_LONG) { while (attr && nodendx <= Z_LVAL_P(member)) { - if ((!test || !xmlStrcmp(attr->name, sxe->iter.name)) && match_ns(sxe, (xmlNodePtr) attr, sxe->iter.nsprefix, sxe->iter.isprefix)) { + if ((!test || xmlStrEqual(attr->name, sxe->iter.name)) && match_ns(sxe, (xmlNodePtr) attr, sxe->iter.nsprefix, sxe->iter.isprefix)) { if (nodendx == Z_LVAL_P(member)) { _node_as_zval(sxe, (xmlNodePtr) attr, rv, SXE_ITER_NONE, NULL, sxe->iter.nsprefix, sxe->iter.isprefix); break; @@ -292,7 +279,7 @@ static zval *sxe_prop_dim_read(zend_object *object, zval *member, bool elements, } } else { while (attr) { - if ((!test || !xmlStrcmp(attr->name, sxe->iter.name)) && !xmlStrcmp(attr->name, (xmlChar *)name) && match_ns(sxe, (xmlNodePtr) attr, sxe->iter.nsprefix, sxe->iter.isprefix)) { + if ((!test || xmlStrEqual(attr->name, sxe->iter.name)) && xmlStrEqual(attr->name, (xmlChar *)name) && match_ns(sxe, (xmlNodePtr) attr, sxe->iter.nsprefix, sxe->iter.isprefix)) { _node_as_zval(sxe, (xmlNodePtr) attr, rv, SXE_ITER_NONE, NULL, sxe->iter.nsprefix, sxe->iter.isprefix); break; } @@ -442,6 +429,8 @@ static zval *sxe_prop_dim_write(zend_object *object, zval *member, zval *value, GET_NODE(sxe, node); + php_libxml_invalidate_node_list_cache_from_doc(node->doc); + if (sxe->iter.type == SXE_ITER_ATTRLIST) { attribs = 1; elements = 0; @@ -481,7 +470,7 @@ static zval *sxe_prop_dim_write(zend_object *object, zval *member, zval *value, value_str = zval_get_string(value); break; case IS_OBJECT: - if (Z_OBJCE_P(value) == sxe_class_entry) { + if (Z_OBJCE_P(value) == ce_SimpleXMLElement) { zval zval_copy; if (sxe_object_cast_ex(Z_OBJ_P(value), &zval_copy, IS_STRING) == FAILURE) { zend_throw_error(NULL, "Unable to cast node to string"); @@ -505,7 +494,7 @@ static zval *sxe_prop_dim_write(zend_object *object, zval *member, zval *value, if (attribs) { if (Z_TYPE_P(member) == IS_LONG) { while (attr && nodendx <= Z_LVAL_P(member)) { - if ((!test || !xmlStrcmp(attr->name, sxe->iter.name)) && match_ns(sxe, (xmlNodePtr) attr, sxe->iter.nsprefix, sxe->iter.isprefix)) { + if ((!test || xmlStrEqual(attr->name, sxe->iter.name)) && match_ns(sxe, (xmlNodePtr) attr, sxe->iter.nsprefix, sxe->iter.isprefix)) { if (nodendx == Z_LVAL_P(member)) { is_attr = 1; ++counter; @@ -517,7 +506,7 @@ static zval *sxe_prop_dim_write(zend_object *object, zval *member, zval *value, } } else { while (attr) { - if ((!test || !xmlStrcmp(attr->name, sxe->iter.name)) && !xmlStrcmp(attr->name, (xmlChar *)Z_STRVAL_P(member)) && match_ns(sxe, (xmlNodePtr) attr, sxe->iter.nsprefix, sxe->iter.isprefix)) { + if ((!test || xmlStrEqual(attr->name, sxe->iter.name)) && xmlStrEqual(attr->name, (xmlChar *)Z_STRVAL_P(member)) && match_ns(sxe, (xmlNodePtr) attr, sxe->iter.nsprefix, sxe->iter.isprefix)) { is_attr = 1; ++counter; break; @@ -556,7 +545,7 @@ static zval *sxe_prop_dim_write(zend_object *object, zval *member, zval *value, while (node) { SKIP_TEXT(node); - if (!xmlStrcmp(node->name, (xmlChar *)Z_STRVAL_P(member)) && match_ns(sxe, node, sxe->iter.nsprefix, sxe->iter.isprefix)) { + if (xmlStrEqual(node->name, (xmlChar *)Z_STRVAL_P(member)) && match_ns(sxe, node, sxe->iter.nsprefix, sxe->iter.isprefix)) { newnode = node; ++counter; } @@ -645,7 +634,7 @@ static zval *sxe_property_get_adr(zend_object *object, zend_string *zname, int f sxe = php_sxe_fetch_object(object); GET_NODE(sxe, node); name = ZSTR_VAL(zname); - node = sxe_get_element_by_name(sxe, node, &name, &type); + node = sxe_get_element_by_name(sxe, node, name, &type); if (node) { return NULL; } @@ -719,7 +708,7 @@ static int sxe_prop_dim_exists(zend_object *object, zval *member, int check_empt int nodendx = 0; while (attr && nodendx <= Z_LVAL_P(member)) { - if ((!test || !xmlStrcmp(attr->name, sxe->iter.name)) && match_ns(sxe, (xmlNodePtr) attr, sxe->iter.nsprefix, sxe->iter.isprefix)) { + if ((!test || xmlStrEqual(attr->name, sxe->iter.name)) && match_ns(sxe, (xmlNodePtr) attr, sxe->iter.nsprefix, sxe->iter.isprefix)) { if (nodendx == Z_LVAL_P(member)) { exists = 1; break; @@ -730,7 +719,7 @@ static int sxe_prop_dim_exists(zend_object *object, zval *member, int check_empt } } else { while (attr) { - if ((!test || !xmlStrcmp(attr->name, sxe->iter.name)) && !xmlStrcmp(attr->name, (xmlChar *)Z_STRVAL_P(member)) && match_ns(sxe, (xmlNodePtr) attr, sxe->iter.nsprefix, sxe->iter.isprefix)) { + if ((!test || xmlStrEqual(attr->name, sxe->iter.name)) && xmlStrEqual(attr->name, (xmlChar *)Z_STRVAL_P(member)) && match_ns(sxe, (xmlNodePtr) attr, sxe->iter.nsprefix, sxe->iter.isprefix)) { exists = 1; break; } @@ -739,7 +728,7 @@ static int sxe_prop_dim_exists(zend_object *object, zval *member, int check_empt } } if (exists && check_empty == 1 && - (!attr->children || !attr->children->content || !attr->children->content[0] || !xmlStrcmp(attr->children->content, (const xmlChar *) "0")) ) { + (!attr->children || !attr->children->content || !attr->children->content[0] || xmlStrEqual(attr->children->content, (const xmlChar *) "0")) ) { /* Attribute with no content in it's text node */ exists = 0; } @@ -758,7 +747,7 @@ static int sxe_prop_dim_exists(zend_object *object, zval *member, int check_empt exists = 1; if (check_empty == 1 && (!node->children || (node->children->type == XML_TEXT_NODE && !node->children->next && - (!node->children->content || !node->children->content[0] || !xmlStrcmp(node->children->content, (const xmlChar *) "0")))) ) { + (!node->children->content || !node->children->content[0] || xmlStrEqual(node->children->content, (const xmlChar *) "0")))) ) { exists = 0; } } @@ -813,6 +802,8 @@ static void sxe_prop_dim_delete(zend_object *object, zval *member, bool elements GET_NODE(sxe, node); + php_libxml_invalidate_node_list_cache_from_doc(node->doc); + if (Z_TYPE_P(member) == IS_LONG) { if (sxe->iter.type != SXE_ITER_ATTRLIST) { attribs = 0; @@ -841,7 +832,7 @@ static void sxe_prop_dim_delete(zend_object *object, zval *member, bool elements int nodendx = 0; while (attr && nodendx <= Z_LVAL_P(member)) { - if ((!test || !xmlStrcmp(attr->name, sxe->iter.name)) && match_ns(sxe, (xmlNodePtr) attr, sxe->iter.nsprefix, sxe->iter.isprefix)) { + if ((!test || xmlStrEqual(attr->name, sxe->iter.name)) && match_ns(sxe, (xmlNodePtr) attr, sxe->iter.nsprefix, sxe->iter.isprefix)) { if (nodendx == Z_LVAL_P(member)) { xmlUnlinkNode((xmlNodePtr) attr); php_libxml_node_free_resource((xmlNodePtr) attr); @@ -854,7 +845,7 @@ static void sxe_prop_dim_delete(zend_object *object, zval *member, bool elements } else { while (attr) { anext = attr->next; - if ((!test || !xmlStrcmp(attr->name, sxe->iter.name)) && !xmlStrcmp(attr->name, (xmlChar *)Z_STRVAL_P(member)) && match_ns(sxe, (xmlNodePtr) attr, sxe->iter.nsprefix, sxe->iter.isprefix)) { + if ((!test || xmlStrEqual(attr->name, sxe->iter.name)) && xmlStrEqual(attr->name, (xmlChar *)Z_STRVAL_P(member)) && match_ns(sxe, (xmlNodePtr) attr, sxe->iter.nsprefix, sxe->iter.isprefix)) { xmlUnlinkNode((xmlNodePtr) attr); php_libxml_node_free_resource((xmlNodePtr) attr); break; @@ -881,7 +872,7 @@ static void sxe_prop_dim_delete(zend_object *object, zval *member, bool elements SKIP_TEXT(node); - if (!xmlStrcmp(node->name, (xmlChar *)Z_STRVAL_P(member)) && match_ns(sxe, node, sxe->iter.nsprefix, sxe->iter.isprefix)) { + if (xmlStrEqual(node->name, (xmlChar *)Z_STRVAL_P(member)) && match_ns(sxe, node, sxe->iter.nsprefix, sxe->iter.isprefix)) { xmlUnlinkNode(node); php_libxml_node_free_resource(node); } @@ -1006,7 +997,7 @@ static int sxe_prop_is_empty(zend_object *object) /* {{{ */ attr = node ? (xmlAttrPtr)node->properties : NULL; test = sxe->iter.name && sxe->iter.type == SXE_ITER_ATTRLIST; while (attr) { - if ((!test || !xmlStrcmp(attr->name, sxe->iter.name)) && match_ns(sxe, (xmlNodePtr)attr, sxe->iter.nsprefix, sxe->iter.isprefix)) { + if ((!test || xmlStrEqual(attr->name, sxe->iter.name)) && match_ns(sxe, (xmlNodePtr)attr, sxe->iter.nsprefix, sxe->iter.isprefix)) { return 0; } attr = attr->next; @@ -1116,7 +1107,7 @@ static HashTable *sxe_get_prop_hash(zend_object *object, int is_debug) /* {{{ */ ZVAL_UNDEF(&zattr); test = sxe->iter.name && sxe->iter.type == SXE_ITER_ATTRLIST; while (attr) { - if ((!test || !xmlStrcmp(attr->name, sxe->iter.name)) && match_ns(sxe, (xmlNodePtr)attr, sxe->iter.nsprefix, sxe->iter.isprefix)) { + if ((!test || xmlStrEqual(attr->name, sxe->iter.name)) && match_ns(sxe, (xmlNodePtr)attr, sxe->iter.nsprefix, sxe->iter.isprefix)) { ZVAL_STR(&value, sxe_xmlNodeListGetString((xmlDocPtr) sxe->document->ptr, attr->children, 1)); namelen = xmlStrlen(attr->name); if (Z_ISUNDEF(zattr)) { @@ -1686,6 +1677,8 @@ PHP_METHOD(SimpleXMLElement, addChild) sxe = Z_SXEOBJ_P(ZEND_THIS); GET_NODE(sxe, node); + php_libxml_invalidate_node_list_cache_from_doc(node->doc); + if (sxe->iter.type == SXE_ITER_ATTRLIST) { php_error_docref(NULL, E_WARNING, "Cannot add element to attributes"); return; @@ -2190,7 +2183,7 @@ static zend_function* php_sxe_find_fptr_count(zend_class_entry *ce) int inherited = 0; while (parent) { - if (parent == sxe_class_entry) { + if (parent == ce_SimpleXMLElement) { break; } parent = parent->parent; @@ -2198,7 +2191,8 @@ static zend_function* php_sxe_find_fptr_count(zend_class_entry *ce) } if (inherited) { - fptr_count = zend_hash_str_find_ptr(&ce->function_table, "count", sizeof("count") - 1); + /* Find count() method */ + fptr_count = zend_hash_find_ptr(&ce->function_table, ZSTR_KNOWN(ZEND_STR_COUNT)); if (fptr_count->common.scope == parent) { fptr_count = NULL; } @@ -2248,7 +2242,7 @@ PHP_FUNCTION(simplexml_load_file) char *ns = NULL; size_t ns_len = 0; zend_long options = 0; - zend_class_entry *ce= sxe_class_entry; + zend_class_entry *ce= ce_SimpleXMLElement; zend_function *fptr_count; bool isprefix = 0; @@ -2268,7 +2262,7 @@ PHP_FUNCTION(simplexml_load_file) } if (!ce) { - ce = sxe_class_entry; + ce = ce_SimpleXMLElement; fptr_count = NULL; } else { fptr_count = php_sxe_find_fptr_count(ce); @@ -2293,7 +2287,7 @@ PHP_FUNCTION(simplexml_load_string) char *ns = NULL; size_t ns_len = 0; zend_long options = 0; - zend_class_entry *ce= sxe_class_entry; + zend_class_entry *ce= ce_SimpleXMLElement; zend_function *fptr_count; bool isprefix = 0; @@ -2321,7 +2315,7 @@ PHP_FUNCTION(simplexml_load_string) } if (!ce) { - ce = sxe_class_entry; + ce = ce_SimpleXMLElement; fptr_count = NULL; } else { fptr_count = php_sxe_find_fptr_count(ce); @@ -2399,7 +2393,7 @@ static xmlNodePtr php_sxe_iterator_fetch(php_sxe_object *sxe, xmlNodePtr node, i if (sxe->iter.name) { while (node) { if (node->type == XML_ATTRIBUTE_NODE) { - if (!xmlStrcmp(node->name, sxe->iter.name) && match_ns(sxe, node, prefix, isprefix)) { + if (xmlStrEqual(node->name, sxe->iter.name) && match_ns(sxe, node, prefix, isprefix)) { break; } } @@ -2418,7 +2412,7 @@ static xmlNodePtr php_sxe_iterator_fetch(php_sxe_object *sxe, xmlNodePtr node, i } else if (sxe->iter.type == SXE_ITER_ELEMENT && sxe->iter.name) { while (node) { if (node->type == XML_ELEMENT_NODE) { - if (!xmlStrcmp(node->name, sxe->iter.name) && match_ns(sxe, node, prefix, isprefix)) { + if (xmlStrEqual(node->name, sxe->iter.name) && match_ns(sxe, node, prefix, isprefix)) { break; } } @@ -2595,7 +2589,7 @@ PHP_FUNCTION(simplexml_import_dom) zval *node; php_libxml_node_object *object; xmlNodePtr nodep = NULL; - zend_class_entry *ce = sxe_class_entry; + zend_class_entry *ce = ce_SimpleXMLElement; zend_function *fptr_count; if (zend_parse_parameters(ZEND_NUM_ARGS(), "o|C!", &node, &ce) == FAILURE) { @@ -2620,7 +2614,7 @@ PHP_FUNCTION(simplexml_import_dom) if (nodep && nodep->type == XML_ELEMENT_NODE) { if (!ce) { - ce = sxe_class_entry; + ce = ce_SimpleXMLElement; fptr_count = NULL; } else { fptr_count = php_sxe_find_fptr_count(ce); @@ -2670,10 +2664,10 @@ ZEND_GET_MODULE(simplexml) /* {{{ PHP_MINIT_FUNCTION(simplexml) */ PHP_MINIT_FUNCTION(simplexml) { - sxe_class_entry = register_class_SimpleXMLElement(zend_ce_stringable, zend_ce_countable, spl_ce_RecursiveIterator); - sxe_class_entry->create_object = sxe_object_new; - sxe_class_entry->default_object_handlers = &sxe_object_handlers; - sxe_class_entry->get_iterator = php_sxe_get_iterator; + ce_SimpleXMLElement = register_class_SimpleXMLElement(zend_ce_stringable, zend_ce_countable, spl_ce_RecursiveIterator); + ce_SimpleXMLElement->create_object = sxe_object_new; + ce_SimpleXMLElement->default_object_handlers = &sxe_object_handlers; + ce_SimpleXMLElement->get_iterator = php_sxe_get_iterator; memcpy(&sxe_object_handlers, &std_object_handlers, sizeof(zend_object_handlers)); sxe_object_handlers.offset = XtOffsetOf(php_sxe_object, zo); @@ -2696,12 +2690,9 @@ PHP_MINIT_FUNCTION(simplexml) sxe_object_handlers.get_closure = NULL; sxe_object_handlers.get_gc = sxe_get_gc; - /* TODO: Why do we have two variables for this? */ - ce_SimpleXMLElement = sxe_class_entry; - ce_SimpleXMLIterator = register_class_SimpleXMLIterator(ce_SimpleXMLElement); - php_libxml_register_export(sxe_class_entry, simplexml_export_node); + php_libxml_register_export(ce_SimpleXMLElement, simplexml_export_node); return SUCCESS; } @@ -2710,7 +2701,7 @@ PHP_MINIT_FUNCTION(simplexml) /* {{{ PHP_MSHUTDOWN_FUNCTION(simplexml) */ PHP_MSHUTDOWN_FUNCTION(simplexml) { - sxe_class_entry = NULL; + ce_SimpleXMLElement = NULL; return SUCCESS; } /* }}} */ diff --git a/ext/snmp/snmp.stub.php b/ext/snmp/snmp.stub.php index 84fc738354aeb..b02cfd44ee3c0 100644 --- a/ext/snmp/snmp.stub.php +++ b/ext/snmp/snmp.stub.php @@ -125,11 +125,11 @@ function snmpset(string $hostname, string $community, array|string $object_id, a function snmp_get_quick_print(): bool {} -function snmp_set_quick_print(bool $enable): bool {} +function snmp_set_quick_print(bool $enable): true {} -function snmp_set_enum_print(bool $enable): bool {} +function snmp_set_enum_print(bool $enable): true {} -function snmp_set_oid_output_format(int $format): bool {} +function snmp_set_oid_output_format(int $format): true {} /** @alias snmp_set_oid_output_format */ function snmp_set_oid_numeric_print(int $format): bool {} @@ -175,7 +175,7 @@ function snmp3_set( array|string $object_id, array|string $type, array|string $value, int $timeout = -1, int $retries = -1): bool {} -function snmp_set_valueretrieval(int $method): bool {} +function snmp_set_valueretrieval(int $method): true {} function snmp_get_valueretrieval(): int {} diff --git a/ext/snmp/snmp_arginfo.h b/ext/snmp/snmp_arginfo.h index bbe95f1dd2cfb..f3f52e4bcea9a 100644 --- a/ext/snmp/snmp_arginfo.h +++ b/ext/snmp/snmp_arginfo.h @@ -1,5 +1,5 @@ /* This is a generated file, edit the .stub.php file instead. - * Stub hash: a79a697fa8c1ab2513bde03e0c2367d0caaec7d8 */ + * Stub hash: 659db99d46c15b508e992d55a1e421f48b51f6e3 */ ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_snmpget, 0, 3, IS_MIXED, 0) ZEND_ARG_TYPE_INFO(0, hostname, IS_STRING, 0) @@ -36,17 +36,19 @@ ZEND_END_ARG_INFO() ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_snmp_get_quick_print, 0, 0, _IS_BOOL, 0) ZEND_END_ARG_INFO() -ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_snmp_set_quick_print, 0, 1, _IS_BOOL, 0) +ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_snmp_set_quick_print, 0, 1, IS_TRUE, 0) ZEND_ARG_TYPE_INFO(0, enable, _IS_BOOL, 0) ZEND_END_ARG_INFO() #define arginfo_snmp_set_enum_print arginfo_snmp_set_quick_print -ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_snmp_set_oid_output_format, 0, 1, _IS_BOOL, 0) +ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_snmp_set_oid_output_format, 0, 1, IS_TRUE, 0) ZEND_ARG_TYPE_INFO(0, format, IS_LONG, 0) ZEND_END_ARG_INFO() -#define arginfo_snmp_set_oid_numeric_print arginfo_snmp_set_oid_output_format +ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_snmp_set_oid_numeric_print, 0, 1, _IS_BOOL, 0) + ZEND_ARG_TYPE_INFO(0, format, IS_LONG, 0) +ZEND_END_ARG_INFO() #define arginfo_snmp2_get arginfo_snmpget @@ -103,7 +105,7 @@ ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_snmp3_set, 0, 10, _IS_BOOL, 0) ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, retries, IS_LONG, 0, "-1") ZEND_END_ARG_INFO() -ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_snmp_set_valueretrieval, 0, 1, _IS_BOOL, 0) +ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_snmp_set_valueretrieval, 0, 1, IS_TRUE, 0) ZEND_ARG_TYPE_INFO(0, method, IS_LONG, 0) ZEND_END_ARG_INFO() diff --git a/ext/soap/php_http.c b/ext/soap/php_http.c index 29a918fb8a2cb..5a887bd1e1e04 100644 --- a/ext/soap/php_http.c +++ b/ext/soap/php_http.c @@ -20,6 +20,7 @@ #include "ext/standard/base64.h" #include "ext/standard/md5.h" #include "ext/random/php_random.h" +#include "ext/hash/php_hash.h" static char *get_http_header_value_nodup(char *headers, char *type, size_t *len); static char *get_http_header_value(char *headers, char *type); @@ -657,18 +658,23 @@ int make_http_soap_request(zval *this_ptr, has_authorization = 1; if (Z_TYPE_P(digest) == IS_ARRAY) { char HA1[33], HA2[33], response[33], cnonce[33], nc[9]; - zend_long nonce; + unsigned char nonce[16]; PHP_MD5_CTX md5ctx; unsigned char hash[16]; - php_random_bytes_throw(&nonce, sizeof(nonce)); - nonce &= 0x7fffffff; + if (UNEXPECTED(php_random_bytes_throw(&nonce, sizeof(nonce)) != SUCCESS)) { + ZEND_ASSERT(EG(exception)); + php_stream_close(stream); + convert_to_null(Z_CLIENT_HTTPURL_P(this_ptr)); + convert_to_null(Z_CLIENT_HTTPSOCKET_P(this_ptr)); + convert_to_null(Z_CLIENT_USE_PROXY_P(this_ptr)); + smart_str_free(&soap_headers_z); + smart_str_free(&soap_headers); + return FALSE; + } - PHP_MD5Init(&md5ctx); - snprintf(cnonce, sizeof(cnonce), ZEND_LONG_FMT, nonce); - PHP_MD5Update(&md5ctx, (unsigned char*)cnonce, strlen(cnonce)); - PHP_MD5Final(hash, &md5ctx); - make_digest(cnonce, hash); + php_hash_bin2hex(cnonce, nonce, sizeof(nonce)); + cnonce[32] = 0; if ((tmp = zend_hash_str_find(Z_ARRVAL_P(digest), "nc", sizeof("nc")-1)) != NULL && Z_TYPE_P(tmp) == IS_LONG) { diff --git a/ext/soap/soap.c b/ext/soap/soap.c index fea43f2f82146..f750e440f31c9 100644 --- a/ext/soap/soap.c +++ b/ext/soap/soap.c @@ -584,8 +584,8 @@ PHP_METHOD(SoapFault, __toString) this_ptr = ZEND_THIS; faultcode = zend_read_property(soap_fault_class_entry, Z_OBJ_P(this_ptr), "faultcode", sizeof("faultcode")-1, 1, &rv1); faultstring = zend_read_property(soap_fault_class_entry, Z_OBJ_P(this_ptr), "faultstring", sizeof("faultstring")-1, 1, &rv2); - file = zend_read_property(soap_fault_class_entry, Z_OBJ_P(this_ptr), "file", sizeof("file")-1, 1, &rv3); - line = zend_read_property(soap_fault_class_entry, Z_OBJ_P(this_ptr), "line", sizeof("line")-1, 1, &rv4); + file = zend_read_property_ex(soap_fault_class_entry, Z_OBJ_P(this_ptr), ZSTR_KNOWN(ZEND_STR_FILE), /* silent */ true, &rv3); + line = zend_read_property_ex(soap_fault_class_entry, Z_OBJ_P(this_ptr), ZSTR_KNOWN(ZEND_STR_LINE), /* silent */ true, &rv4); zend_call_method_with_0_params( Z_OBJ_P(ZEND_THIS), Z_OBJCE_P(ZEND_THIS), NULL, "gettraceasstring", &trace); @@ -1107,7 +1107,7 @@ static void _soap_server_exception(soapServicePtr service, sdlFunctionPtr functi } else if (instanceof_function(Z_OBJCE(exception_object), zend_ce_error)) { if (service->send_errors) { zval rv; - zend_string *msg = zval_get_string(zend_read_property(zend_ce_error, Z_OBJ(exception_object), "message", sizeof("message")-1, 0, &rv)); + zend_string *msg = zval_get_string(zend_read_property_ex(zend_ce_error, Z_OBJ(exception_object), ZSTR_KNOWN(ZEND_STR_MESSAGE), /* silent */ false, &rv)); add_soap_fault_ex(&exception_object, this_ptr, "Server", ZSTR_VAL(msg), NULL, NULL); zend_string_release_ex(msg, 0); } else { @@ -1943,7 +1943,7 @@ PHP_METHOD(SoapClient, __construct) php_stream_context_set_option(context, "ssl", "passphrase", tmp); } } - if ((tmp = zend_hash_str_find(ht, "trace", sizeof("trace")-1)) != NULL && + if ((tmp = zend_hash_find(ht, ZSTR_KNOWN(ZEND_STR_TRACE))) != NULL && (Z_TYPE_P(tmp) == IS_TRUE || (Z_TYPE_P(tmp) == IS_LONG && Z_LVAL_P(tmp) == 1))) { ZVAL_TRUE(Z_CLIENT_TRACE_P(this_ptr)); @@ -2747,7 +2747,7 @@ static void set_soap_fault(zval *obj, char *fault_code_ns, char *fault_code, cha } ZVAL_STRING(Z_FAULT_STRING_P(obj), fault_string ? fault_string : ""); - zend_update_property_string(zend_ce_exception, Z_OBJ_P(obj), "message", sizeof("message")-1, (fault_string ? fault_string : "")); + zend_update_property_ex(zend_ce_exception, Z_OBJ_P(obj), ZSTR_KNOWN(ZEND_STR_MESSAGE), Z_FAULT_STRING_P(obj)); if (fault_code != NULL) { int soap_version = SOAP_GLOBAL(soap_version); diff --git a/ext/soap/tests/bug73037.phpt b/ext/soap/tests/bug73037.phpt index 3853e50c26913..001940a4d37ed 100644 --- a/ext/soap/tests/bug73037.phpt +++ b/ext/soap/tests/bug73037.phpt @@ -59,8 +59,12 @@ function get_data($max) } $router = "bug73037_server.php"; -$args = substr(PHP_OS, 0, 3) == 'WIN' - ? ["-d", "extension_dir=" . ini_get("extension_dir"), "-d", "extension=php_soap.dll"] : []; +$args = ["-d", "extension_dir=" . ini_get("extension_dir"), "-d", "extension=" . (substr(PHP_OS, 0, 3) == "WIN" ? "php_" : "") . "soap." . PHP_SHLIB_SUFFIX]; +if (php_ini_loaded_file()) { + // Necessary such that it works from a development directory in which case extension_dir might not be the real extension dir + $args[] = "-c"; + $args[] = php_ini_loaded_file(); +} $code = <<<'PHP' $s = new SoapServer(NULL, array('uri' => 'http://here')); $s->setObject(new stdclass()); diff --git a/ext/soap/tests/custom_content_type.phpt b/ext/soap/tests/custom_content_type.phpt index b8bc8c9870113..d32f1df783591 100644 --- a/ext/soap/tests/custom_content_type.phpt +++ b/ext/soap/tests/custom_content_type.phpt @@ -13,8 +13,12 @@ soap include __DIR__ . "/../../../sapi/cli/tests/php_cli_server.inc"; -$args = substr(PHP_OS, 0, 3) == 'WIN' - ? ["-d", "extension_dir=" . ini_get("extension_dir"), "-d", "extension=php_soap.dll"] : []; +$args = ["-d", "extension_dir=" . ini_get("extension_dir"), "-d", "extension=" . (substr(PHP_OS, 0, 3) == "WIN" ? "php_" : "") . "soap." . PHP_SHLIB_SUFFIX]; +if (php_ini_loaded_file()) { + // Necessary such that it works from a development directory in which case extension_dir might not be the real extension dir + $args[] = "-c"; + $args[] = php_ini_loaded_file(); +} $code = <<<'PHP' /* Receive */ $content = trim(file_get_contents("php://input")) . PHP_EOL; diff --git a/ext/sodium/libsodium.c b/ext/sodium/libsodium.c index 6eb10f7571358..a037c7b680d53 100644 --- a/ext/sodium/libsodium.c +++ b/ext/sodium/libsodium.c @@ -125,12 +125,12 @@ ZEND_GET_MODULE(sodium) /* Remove argument information from backtrace to prevent information leaks */ static void sodium_remove_param_values_from_backtrace(zend_object *obj) { zval rv; - zval *trace = zend_read_property(zend_get_exception_base(obj), obj, "trace", sizeof("trace")-1, 0, &rv); + zval *trace = zend_read_property_ex(zend_get_exception_base(obj), obj, ZSTR_KNOWN(ZEND_STR_TRACE), /* silent */ false, &rv); if (trace && Z_TYPE_P(trace) == IS_ARRAY) { zval *frame; ZEND_HASH_FOREACH_VAL(Z_ARRVAL_P(trace), frame) { if (Z_TYPE_P(frame) == IS_ARRAY) { - zval *args = zend_hash_str_find(Z_ARRVAL_P(frame), "args", sizeof("args")-1); + zval *args = zend_hash_find(Z_ARRVAL_P(frame), ZSTR_KNOWN(ZEND_STR_ARGS)); if (args) { zval_ptr_dtor(args); ZVAL_EMPTY_ARRAY(args); diff --git a/ext/spl/spl_array.c b/ext/spl/spl_array.c index 1ef0c48d272d6..2e67b3cfe8c37 100644 --- a/ext/spl/spl_array.c +++ b/ext/spl/spl_array.c @@ -95,21 +95,6 @@ static inline HashTable **spl_array_get_hash_table_ptr(spl_array_object* intern) } /* }}} */ -static void spl_array_illegal_offset(const zval *offset) -{ - zend_type_error("Cannot access offset of type %s on ArrayObject", zend_get_type_by_const(Z_TYPE_P(offset))); -} - -static void spl_array_illegal_empty_or_isset_offset(const zval *offset) -{ - zend_type_error("Cannot access offset of type %s in isset or empty", zend_get_type_by_const(Z_TYPE_P(offset))); -} - -static void spl_array_illegal_unset_offset(const zval *offset) -{ - zend_type_error("Cannot access offset of type %s in unset", zend_get_type_by_const(Z_TYPE_P(offset))); -} - static inline HashTable *spl_array_get_hash_table(spl_array_object* intern) { /* {{{ */ return *spl_array_get_hash_table_ptr(intern); } @@ -226,7 +211,8 @@ static zend_object *spl_array_object_new_ex(zend_class_entry *class_type, zend_o if (intern->fptr_offset_del->common.scope == parent) { intern->fptr_offset_del = NULL; } - intern->fptr_count = zend_hash_str_find_ptr(&class_type->function_table, "count", sizeof("count") - 1); + /* Find count() method */ + intern->fptr_count = zend_hash_find_ptr(&class_type->function_table, ZSTR_KNOWN(ZEND_STR_COUNT)); if (intern->fptr_count->common.scope == parent) { intern->fptr_count = NULL; } @@ -269,6 +255,8 @@ static void spl_hash_key_release(spl_hash_key *key) { } } +/* This function does not throw any exceptions for illegal offsets, calls to + * zend_illegal_container_offset(); need to be made if the return value is FAILURE */ static zend_result get_hash_key(spl_hash_key *key, spl_array_object *intern, zval *offset) { key->release_key = false; @@ -309,7 +297,6 @@ static zend_result get_hash_key(spl_hash_key *key, spl_array_object *intern, zva ZVAL_DEREF(offset); goto try_again; default: - spl_array_illegal_offset(offset); return FAILURE; } @@ -320,7 +307,8 @@ static zend_result get_hash_key(spl_hash_key *key, spl_array_object *intern, zva return SUCCESS; } -static zval *spl_array_get_dimension_ptr(int check_inherited, spl_array_object *intern, zval *offset, int type) /* {{{ */ +static zval *spl_array_get_dimension_ptr(bool check_inherited, spl_array_object *intern, const zend_string *ce_name, + zval *offset, int type) /* {{{ */ { zval *retval; spl_hash_key key; @@ -336,7 +324,7 @@ static zval *spl_array_get_dimension_ptr(int check_inherited, spl_array_object * } if (get_hash_key(&key, intern, offset) == FAILURE) { - spl_array_illegal_offset(offset); + zend_illegal_container_offset(ce_name, offset, type); return (type == BP_VAR_W || type == BP_VAR_RW) ? &EG(error_zval) : &EG(uninitialized_zval); } @@ -438,7 +426,7 @@ static zval *spl_array_read_dimension_ex(int check_inherited, zend_object *objec } } - ret = spl_array_get_dimension_ptr(check_inherited, intern, offset, type); + ret = spl_array_get_dimension_ptr(check_inherited, intern, object->ce->name, offset, type); /* When in a write context, * ZE has to be fooled into thinking this is in a reference set @@ -512,7 +500,7 @@ static void spl_array_write_dimension_ex(int check_inherited, zend_object *objec } if (get_hash_key(&key, intern, offset) == FAILURE) { - spl_array_illegal_offset(offset); + zend_illegal_container_offset(object->ce->name, offset, BP_VAR_W); zval_ptr_dtor(value); return; } @@ -553,7 +541,7 @@ static void spl_array_unset_dimension_ex(int check_inherited, zend_object *objec } if (get_hash_key(&key, intern, offset) == FAILURE) { - spl_array_illegal_unset_offset(offset); + zend_illegal_container_offset(object->ce->name, offset, BP_VAR_UNSET); return; } @@ -623,7 +611,7 @@ static bool spl_array_has_dimension_ex(bool check_inherited, zend_object *object spl_hash_key key; if (get_hash_key(&key, intern, offset) == FAILURE) { - spl_array_illegal_empty_or_isset_offset(offset); + zend_illegal_container_offset(object->ce->name, offset, BP_VAR_IS); return 0; } @@ -861,7 +849,7 @@ static zval *spl_array_get_property_ptr_ptr(zend_object *object, zend_string *na return NULL; } ZVAL_STR(&member, name); - return spl_array_get_dimension_ptr(1, intern, &member, type); + return spl_array_get_dimension_ptr(1, intern, object->ce->name, &member, type); } return zend_std_get_property_ptr_ptr(object, name, type, cache_slot); } /* }}} */ diff --git a/ext/spl/spl_array.stub.php b/ext/spl/spl_array.stub.php index 64855f37d2e0b..8841fb351aefa 100644 --- a/ext/spl/spl_array.stub.php +++ b/ext/spl/spl_array.stub.php @@ -45,22 +45,22 @@ public function getFlags(): int {} public function setFlags(int $flags): void {} /** @tentative-return-type */ - public function asort(int $flags = SORT_REGULAR): bool {} + public function asort(int $flags = SORT_REGULAR): true {} /** @tentative-return-type */ - public function ksort(int $flags = SORT_REGULAR): bool {} + public function ksort(int $flags = SORT_REGULAR): true {} /** @tentative-return-type */ - public function uasort(callable $callback): bool {} + public function uasort(callable $callback): true {} /** @tentative-return-type */ - public function uksort(callable $callback): bool {} + public function uksort(callable $callback): true {} /** @tentative-return-type */ - public function natsort(): bool {} + public function natsort(): true {} /** @tentative-return-type */ - public function natcasesort(): bool {} + public function natcasesort(): true {} /** @tentative-return-type */ public function unserialize(string $data): void {} @@ -163,37 +163,37 @@ public function setFlags(int $flags): void {} * @tentative-return-type * @implementation-alias ArrayObject::asort */ - public function asort(int $flags = SORT_REGULAR): bool {} + public function asort(int $flags = SORT_REGULAR): true {} /** * @tentative-return-type * @implementation-alias ArrayObject::ksort */ - public function ksort(int $flags = SORT_REGULAR): bool {} + public function ksort(int $flags = SORT_REGULAR): true {} /** * @tentative-return-type * @implementation-alias ArrayObject::uasort */ - public function uasort(callable $callback): bool {} + public function uasort(callable $callback): true {} /** * @tentative-return-type * @implementation-alias ArrayObject::uksort */ - public function uksort(callable $callback): bool {} + public function uksort(callable $callback): true {} /** * @tentative-return-type * @implementation-alias ArrayObject::natsort */ - public function natsort(): bool {} + public function natsort(): true {} /** * @tentative-return-type * @implementation-alias ArrayObject::natcasesort */ - public function natcasesort(): bool {} + public function natcasesort(): true {} /** * @tentative-return-type diff --git a/ext/spl/spl_array_arginfo.h b/ext/spl/spl_array_arginfo.h index 9c1bb5b7b2014..de2ec5f1efb04 100644 --- a/ext/spl/spl_array_arginfo.h +++ b/ext/spl/spl_array_arginfo.h @@ -1,5 +1,5 @@ /* This is a generated file, edit the .stub.php file instead. - * Stub hash: 13e1f68463c2abc1a1ce0e1cff5f47a12407cfc1 */ + * Stub hash: d0ce4612e25d2b8a765544c835fa2347ae9b23f2 */ ZEND_BEGIN_ARG_INFO_EX(arginfo_class_ArrayObject___construct, 0, 0, 0) ZEND_ARG_TYPE_MASK(0, array, MAY_BE_ARRAY|MAY_BE_OBJECT, "[]") @@ -40,19 +40,19 @@ ZEND_BEGIN_ARG_WITH_TENTATIVE_RETURN_TYPE_INFO_EX(arginfo_class_ArrayObject_setF ZEND_ARG_TYPE_INFO(0, flags, IS_LONG, 0) ZEND_END_ARG_INFO() -ZEND_BEGIN_ARG_WITH_TENTATIVE_RETURN_TYPE_INFO_EX(arginfo_class_ArrayObject_asort, 0, 0, _IS_BOOL, 0) +ZEND_BEGIN_ARG_WITH_TENTATIVE_RETURN_TYPE_INFO_EX(arginfo_class_ArrayObject_asort, 0, 0, IS_TRUE, 0) ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, flags, IS_LONG, 0, "SORT_REGULAR") ZEND_END_ARG_INFO() #define arginfo_class_ArrayObject_ksort arginfo_class_ArrayObject_asort -ZEND_BEGIN_ARG_WITH_TENTATIVE_RETURN_TYPE_INFO_EX(arginfo_class_ArrayObject_uasort, 0, 1, _IS_BOOL, 0) +ZEND_BEGIN_ARG_WITH_TENTATIVE_RETURN_TYPE_INFO_EX(arginfo_class_ArrayObject_uasort, 0, 1, IS_TRUE, 0) ZEND_ARG_TYPE_INFO(0, callback, IS_CALLABLE, 0) ZEND_END_ARG_INFO() #define arginfo_class_ArrayObject_uksort arginfo_class_ArrayObject_uasort -ZEND_BEGIN_ARG_WITH_TENTATIVE_RETURN_TYPE_INFO_EX(arginfo_class_ArrayObject_natsort, 0, 0, _IS_BOOL, 0) +ZEND_BEGIN_ARG_WITH_TENTATIVE_RETURN_TYPE_INFO_EX(arginfo_class_ArrayObject_natsort, 0, 0, IS_TRUE, 0) ZEND_END_ARG_INFO() #define arginfo_class_ArrayObject_natcasesort arginfo_class_ArrayObject_natsort @@ -139,7 +139,8 @@ ZEND_END_ARG_INFO() #define arginfo_class_ArrayIterator_next arginfo_class_ArrayIterator_rewind -#define arginfo_class_ArrayIterator_valid arginfo_class_ArrayObject_natsort +ZEND_BEGIN_ARG_WITH_TENTATIVE_RETURN_TYPE_INFO_EX(arginfo_class_ArrayIterator_valid, 0, 0, _IS_BOOL, 0) +ZEND_END_ARG_INFO() ZEND_BEGIN_ARG_WITH_TENTATIVE_RETURN_TYPE_INFO_EX(arginfo_class_ArrayIterator_seek, 0, 1, IS_VOID, 0) ZEND_ARG_TYPE_INFO(0, offset, IS_LONG, 0) @@ -147,7 +148,7 @@ ZEND_END_ARG_INFO() #define arginfo_class_ArrayIterator___debugInfo arginfo_class_ArrayObject_getArrayCopy -#define arginfo_class_RecursiveArrayIterator_hasChildren arginfo_class_ArrayObject_natsort +#define arginfo_class_RecursiveArrayIterator_hasChildren arginfo_class_ArrayIterator_valid ZEND_BEGIN_ARG_WITH_TENTATIVE_RETURN_OBJ_INFO_EX(arginfo_class_RecursiveArrayIterator_getChildren, 0, 0, RecursiveArrayIterator, 1) ZEND_END_ARG_INFO() diff --git a/ext/spl/spl_directory.c b/ext/spl/spl_directory.c index 029edcdfb21de..378f707f44c3b 100644 --- a/ext/spl/spl_directory.c +++ b/ext/spl/spl_directory.c @@ -454,7 +454,9 @@ static void spl_filesystem_info_set_filename(spl_filesystem_object *intern, zend path_len = ZSTR_LEN(path); if (path_len > 1 && IS_SLASH_AT(ZSTR_VAL(path), path_len-1)) { - path_len--; + do { + path_len--; + } while (path_len > 1 && IS_SLASH_AT(ZSTR_VAL(path), path_len - 1)); intern->file_name = zend_string_init(ZSTR_VAL(path), path_len, 0); } else { intern->file_name = zend_string_copy(path); diff --git a/ext/spl/spl_dllist.c b/ext/spl/spl_dllist.c index 176989936ed8f..74dc7731fd152 100644 --- a/ext/spl/spl_dllist.c +++ b/ext/spl/spl_dllist.c @@ -72,8 +72,8 @@ typedef struct _spl_dllist_it spl_dllist_it; struct _spl_dllist_object { spl_ptr_llist *llist; - int traverse_position; spl_ptr_llist_element *traverse_pointer; + int traverse_position; int flags; zend_function *fptr_offset_get; zend_function *fptr_offset_set; @@ -375,7 +375,8 @@ static zend_object *spl_dllist_object_new_ex(zend_class_entry *class_type, zend_ if (intern->fptr_offset_del->common.scope == parent) { intern->fptr_offset_del = NULL; } - intern->fptr_count = zend_hash_str_find_ptr(&class_type->function_table, "count", sizeof("count") - 1); + /* Find count() method */ + intern->fptr_count = zend_hash_find_ptr(&class_type->function_table, ZSTR_KNOWN(ZEND_STR_COUNT)); if (intern->fptr_count->common.scope == parent) { intern->fptr_count = NULL; } diff --git a/ext/spl/spl_fixedarray.c b/ext/spl/spl_fixedarray.c index 574b06fc4e93c..7687544fc6043 100644 --- a/ext/spl/spl_fixedarray.c +++ b/ext/spl/spl_fixedarray.c @@ -83,11 +83,6 @@ static bool spl_fixedarray_empty(spl_fixedarray *array) return true; } -static void spl_fixedarray_illegal_offset(const zval *offset) -{ - zend_type_error("Cannot access offset of type %s on FixedArray", zend_get_type_by_const(Z_TYPE_P(offset))); -} - static void spl_fixedarray_default_ctor(spl_fixedarray *array) { array->size = 0; @@ -287,7 +282,8 @@ static zend_object *spl_fixedarray_object_new_ex(zend_class_entry *class_type, z ZEND_ASSERT(parent); if (UNEXPECTED(inherited)) { - zend_function *fptr_count = zend_hash_str_find_ptr(&class_type->function_table, "count", sizeof("count") - 1); + /* Find count() method */ + zend_function *fptr_count = zend_hash_find_ptr(&class_type->function_table, ZSTR_KNOWN(ZEND_STR_COUNT)); if (fptr_count->common.scope == parent) { fptr_count = NULL; } @@ -338,7 +334,8 @@ static zend_long spl_offset_convert_to_long(zval *offset) /* {{{ */ return Z_RES_HANDLE_P(offset); } - spl_fixedarray_illegal_offset(offset); + /* Use SplFixedArray name from the CE */ + zend_illegal_container_offset(spl_ce_SplFixedArray->name, offset, BP_VAR_R); return 0; } diff --git a/ext/spl/spl_heap.c b/ext/spl/spl_heap.c index d781ff4ca010d..4f242d3a3c394 100644 --- a/ext/spl/spl_heap.c +++ b/ext/spl/spl_heap.c @@ -451,7 +451,8 @@ static zend_object *spl_heap_object_new_ex(zend_class_entry *class_type, zend_ob if (intern->fptr_cmp->common.scope == parent) { intern->fptr_cmp = NULL; } - intern->fptr_count = zend_hash_str_find_ptr(&class_type->function_table, "count", sizeof("count") - 1); + /* Find count() method */ + intern->fptr_count = zend_hash_find_ptr(&class_type->function_table, ZSTR_KNOWN(ZEND_STR_COUNT)); if (intern->fptr_count->common.scope == parent) { intern->fptr_count = NULL; } diff --git a/ext/spl/spl_heap.stub.php b/ext/spl/spl_heap.stub.php index ac9e4151ea308..a1b4dfdbde318 100644 --- a/ext/spl/spl_heap.stub.php +++ b/ext/spl/spl_heap.stub.php @@ -23,7 +23,7 @@ class SplPriorityQueue implements Iterator, Countable /** @tentative-return-type */ public function compare(mixed $priority1, mixed $priority2): int {} - /** @return bool */ + /** @return true */ public function insert(mixed $value, mixed $priority) {} // TODO make return type void /** @tentative-return-type */ diff --git a/ext/spl/spl_heap_arginfo.h b/ext/spl/spl_heap_arginfo.h index 42d9590fea2f4..99eda7e079359 100644 --- a/ext/spl/spl_heap_arginfo.h +++ b/ext/spl/spl_heap_arginfo.h @@ -1,5 +1,5 @@ /* This is a generated file, edit the .stub.php file instead. - * Stub hash: 8e4784e749d6c70174a0958e73e4e9907adcd4b5 */ + * Stub hash: 4045035ec5bee0f951fa31df75c3f42c31bd8be2 */ ZEND_BEGIN_ARG_WITH_TENTATIVE_RETURN_TYPE_INFO_EX(arginfo_class_SplPriorityQueue_compare, 0, 2, IS_LONG, 0) ZEND_ARG_TYPE_INFO(0, priority1, IS_MIXED, 0) diff --git a/ext/spl/tests/ArrayObject_illegal_offset.phpt b/ext/spl/tests/ArrayObject_illegal_offset.phpt index 08353c704c6f3..a2803e4729663 100644 --- a/ext/spl/tests/ArrayObject_illegal_offset.phpt +++ b/ext/spl/tests/ArrayObject_illegal_offset.phpt @@ -36,4 +36,4 @@ Cannot access offset of type array on ArrayObject Cannot access offset of type array on ArrayObject Cannot access offset of type array on ArrayObject Cannot access offset of type array in isset or empty -Cannot access offset of type array in unset +Cannot unset offset of type array on ArrayObject diff --git a/ext/spl/tests/fixedarray_001.phpt b/ext/spl/tests/fixedarray_001.phpt index 35a7a9cf17725..0683555934d53 100644 --- a/ext/spl/tests/fixedarray_001.phpt +++ b/ext/spl/tests/fixedarray_001.phpt @@ -46,7 +46,7 @@ var_dump($b[0]); ?> --EXPECT-- RuntimeException: Index invalid or out of range -TypeError: Cannot access offset of type string on FixedArray +TypeError: Cannot access offset of type string on SplFixedArray RuntimeException: Index invalid or out of range string(6) "value0" string(6) "value2" diff --git a/ext/spl/tests/fixedarray_002.phpt b/ext/spl/tests/fixedarray_002.phpt index 940d5996f5dbc..0ee2dcb8ba11d 100644 --- a/ext/spl/tests/fixedarray_002.phpt +++ b/ext/spl/tests/fixedarray_002.phpt @@ -71,7 +71,7 @@ var_dump(count($a), $a->getSize(), count($a) == $a->getSize()); A::offsetSet RuntimeException: Index invalid or out of range A::offsetGet -TypeError: Cannot access offset of type string on FixedArray +TypeError: Cannot access offset of type string on SplFixedArray A::offsetUnset RuntimeException: Index invalid or out of range A::offsetSet diff --git a/ext/spl/tests/fixedarray_003.phpt b/ext/spl/tests/fixedarray_003.phpt index d246561c1b7e8..cca9ac07e9f7a 100644 --- a/ext/spl/tests/fixedarray_003.phpt +++ b/ext/spl/tests/fixedarray_003.phpt @@ -1,5 +1,5 @@ --TEST-- -SPL: FixedArray: Non integer offset handling +SPL: SplFixedArray: Non integer offset handling --FILE-- getBasename()); + var_dump($file->getFilename()); +} + +test('/dir/anotherdir/basedir//'); +test('/dir/anotherdir/basedir/'); +test('/dir/anotherdir/basedir'); +test('/dir/anotherdir//basedir'); +test('///'); +test('//'); +test('/'); +test(''); + +?> +--EXPECT-- +Testing: '/dir/anotherdir/basedir//' +string(7) "basedir" +string(7) "basedir" +Testing: '/dir/anotherdir/basedir/' +string(7) "basedir" +string(7) "basedir" +Testing: '/dir/anotherdir/basedir' +string(7) "basedir" +string(7) "basedir" +Testing: '/dir/anotherdir//basedir' +string(7) "basedir" +string(7) "basedir" +Testing: '///' +string(0) "" +string(1) "/" +Testing: '//' +string(0) "" +string(1) "/" +Testing: '/' +string(0) "" +string(1) "/" +Testing: '' +string(0) "" +string(0) "" diff --git a/ext/standard/array.c b/ext/standard/array.c index 5399706320d19..19269c36a9f8c 100644 --- a/ext/standard/array.c +++ b/ext/standard/array.c @@ -126,25 +126,25 @@ static zend_never_inline ZEND_COLD int stable_sort_fallback(Bucket *a, Bucket *b static zend_always_inline int php_array_key_compare_unstable_i(Bucket *f, Bucket *s) /* {{{ */ { - zval first; - zval second; - - if (f->key == NULL && s->key == NULL) { - return (zend_long)f->h > (zend_long)s->h ? 1 : -1; - } else if (f->key && s->key) { - return zendi_smart_strcmp(f->key, s->key); - } - if (f->key) { - ZVAL_STR(&first, f->key); - } else { - ZVAL_LONG(&first, f->h); - } - if (s->key) { - ZVAL_STR(&second, s->key); - } else { - ZVAL_LONG(&second, s->h); - } - return zend_compare(&first, &second); + zval first; + zval second; + + if (f->key == NULL && s->key == NULL) { + return (zend_long)f->h > (zend_long)s->h ? 1 : -1; + } else if (f->key && s->key) { + return zendi_smart_strcmp(f->key, s->key); + } + if (f->key) { + ZVAL_STR(&first, f->key); + } else { + ZVAL_LONG(&first, f->h); + } + if (s->key) { + ZVAL_STR(&second, s->key); + } else { + ZVAL_LONG(&second, s->h); + } + return zend_compare(&first, &second); } /* }}} */ @@ -1210,7 +1210,7 @@ static int php_data_compare(const void *f, const void *s) /* {{{ */ Return the lowest value in an array or a series of arguments */ PHP_FUNCTION(min) { - int argc; + uint32_t argc; zval *args = NULL; ZEND_PARSE_PARAMETERS_START(1, -1) @@ -1233,15 +1233,58 @@ PHP_FUNCTION(min) } } else { /* mixed min ( mixed $value1 , mixed $value2 [, mixed $value3... ] ) */ - zval *min, result; - int i; + zval *min; + uint32_t i; min = &args[0]; + zend_long min_lval; + double min_dval; - for (i = 1; i < argc; i++) { - is_smaller_function(&result, &args[i], min); - if (Z_TYPE(result) == IS_TRUE) { - min = &args[i]; + if (Z_TYPE_P(min) == IS_LONG) { + min_lval = Z_LVAL_P(min); + + for (i = 1; i < argc; i++) { + if (EXPECTED(Z_TYPE(args[i]) == IS_LONG)) { + if (min_lval > Z_LVAL(args[i])) { + min_lval = Z_LVAL(args[i]); + min = &args[i]; + } + } else if (Z_TYPE(args[i]) == IS_DOUBLE && (zend_dval_to_lval((double) min_lval) == min_lval)) { + /* if min_lval can be exactly represented as a double, go to double dedicated code */ + min_dval = (double) min_lval; + goto double_compare; + } else { + goto generic_compare; + } + } + + RETURN_LONG(min_lval); + } else if (Z_TYPE_P(min) == IS_DOUBLE) { + min_dval = Z_DVAL_P(min); + + for (i = 1; i < argc; i++) { + if (EXPECTED(Z_TYPE(args[i]) == IS_DOUBLE)) { + double_compare: + if (min_dval > Z_DVAL(args[i])) { + min_dval = Z_DVAL(args[i]); + min = &args[i]; + } + } else if (Z_TYPE(args[i]) == IS_LONG && (zend_dval_to_lval((double) Z_LVAL(args[i])) == Z_LVAL(args[i]))) { + /* if the value can be exactly represented as a double, use double dedicated code otherwise generic */ + if (min_dval > (double)Z_LVAL(args[i])) { + min_dval = (double)Z_LVAL(args[i]); + min = &args[i]; + } + } else { + goto generic_compare; + } + } + } else { + for (i = 1; i < argc; i++) { + generic_compare: + if (zend_compare(&args[i], min) < 0) { + min = &args[i]; + } } } @@ -1257,7 +1300,7 @@ PHP_FUNCTION(min) PHP_FUNCTION(max) { zval *args = NULL; - int argc; + uint32_t argc; ZEND_PARSE_PARAMETERS_START(1, -1) Z_PARAM_VARIADIC('+', args, argc) @@ -1279,15 +1322,58 @@ PHP_FUNCTION(max) } } else { /* mixed max ( mixed $value1 , mixed $value2 [, mixed $value3... ] ) */ - zval *max, result; - int i; + zval *max; + uint32_t i; max = &args[0]; + zend_long max_lval; + double max_dval; - for (i = 1; i < argc; i++) { - is_smaller_or_equal_function(&result, &args[i], max); - if (Z_TYPE(result) == IS_FALSE) { - max = &args[i]; + if (Z_TYPE_P(max) == IS_LONG) { + max_lval = Z_LVAL_P(max); + + for (i = 1; i < argc; i++) { + if (EXPECTED(Z_TYPE(args[i]) == IS_LONG)) { + if (max_lval < Z_LVAL(args[i])) { + max_lval = Z_LVAL(args[i]); + max = &args[i]; + } + } else if (Z_TYPE(args[i]) == IS_DOUBLE && (zend_dval_to_lval((double) max_lval) == max_lval)) { + /* if max_lval can be exactly represented as a double, go to double dedicated code */ + max_dval = (double) max_lval; + goto double_compare; + } else { + goto generic_compare; + } + } + + RETURN_LONG(max_lval); + } else if (Z_TYPE_P(max) == IS_DOUBLE) { + max_dval = Z_DVAL_P(max); + + for (i = 1; i < argc; i++) { + if (EXPECTED(Z_TYPE(args[i]) == IS_DOUBLE)) { + double_compare: + if (max_dval < Z_DVAL(args[i])) { + max_dval = Z_DVAL(args[i]); + max = &args[i]; + } + } else if (Z_TYPE(args[i]) == IS_LONG && (zend_dval_to_lval((double) Z_LVAL(args[i])) == Z_LVAL(args[i]))) { + /* if the value can be exactly represented as a double, use double dedicated code otherwise generic */ + if (max_dval < (double)Z_LVAL(args[i])) { + max_dval = (double)Z_LVAL(args[i]); + max = &args[i]; + } + } else { + goto generic_compare; + } + } + } else { + for (i = 1; i < argc; i++) { + generic_compare: + if (zend_compare(&args[i], max) > 0) { + max = &args[i]; + } } } @@ -1301,8 +1387,8 @@ typedef struct { zend_fcall_info_cache fci_cache; } php_array_walk_context; -static int php_array_walk( - php_array_walk_context *context, zval *array, zval *userdata, int recursive) +static zend_result php_array_walk( + php_array_walk_context *context, zval *array, zval *userdata, bool recursive) { zval args[3], /* Arguments to userland function */ retval, /* Return value - unused */ @@ -1310,7 +1396,7 @@ static int php_array_walk( HashTable *target_hash = HASH_OF(array); HashPosition pos; uint32_t ht_iter; - int result = SUCCESS; + zend_result result = SUCCESS; /* Create a local copy of fci, as we want to use different arguments at different * levels of recursion. */ @@ -1452,7 +1538,7 @@ PHP_FUNCTION(array_walk) Z_PARAM_ZVAL(userdata) ZEND_PARSE_PARAMETERS_END(); - php_array_walk(&context, array, userdata, 0); + php_array_walk(&context, array, userdata, /* recursive */ false); RETURN_TRUE; } /* }}} */ @@ -1471,7 +1557,7 @@ PHP_FUNCTION(array_walk_recursive) Z_PARAM_ZVAL(userdata) ZEND_PARSE_PARAMETERS_END(); - php_array_walk(&context, array, userdata, 1); + php_array_walk(&context, array, userdata, /* recursive */ true); RETURN_TRUE; } /* }}} */ @@ -1691,7 +1777,7 @@ static zend_long php_extract_ref_if_exists(zend_array *arr, zend_array *symbol_t if (zend_string_equals_literal(var_name, "GLOBALS")) { continue; } - if (zend_string_equals_literal(var_name, "this")) { + if (zend_string_equals(var_name, ZSTR_KNOWN(ZEND_STR_THIS))) { zend_throw_error(NULL, "Cannot re-assign $this"); return -1; } @@ -1737,7 +1823,7 @@ static zend_long php_extract_if_exists(zend_array *arr, zend_array *symbol_table if (zend_string_equals_literal(var_name, "GLOBALS")) { continue; } - if (zend_string_equals_literal(var_name, "this")) { + if (zend_string_equals(var_name, ZSTR_KNOWN(ZEND_STR_THIS))) { zend_throw_error(NULL, "Cannot re-assign $this"); return -1; } @@ -1770,7 +1856,7 @@ static zend_long php_extract_ref_overwrite(zend_array *arr, zend_array *symbol_t if (!php_valid_var_name(ZSTR_VAL(var_name), ZSTR_LEN(var_name))) { continue; } - if (zend_string_equals_literal(var_name, "this")) { + if (zend_string_equals(var_name, ZSTR_KNOWN(ZEND_STR_THIS))) { zend_throw_error(NULL, "Cannot re-assign $this"); return -1; } @@ -1820,7 +1906,7 @@ static zend_long php_extract_overwrite(zend_array *arr, zend_array *symbol_table if (!php_valid_var_name(ZSTR_VAL(var_name), ZSTR_LEN(var_name))) { continue; } - if (zend_string_equals_literal(var_name, "this")) { + if (zend_string_equals(var_name, ZSTR_KNOWN(ZEND_STR_THIS))) { zend_throw_error(NULL, "Cannot re-assign $this"); return -1; } @@ -1879,7 +1965,7 @@ static zend_long php_extract_ref_prefix_if_exists(zend_array *arr, zend_array *s } php_prefix_varname(&final_name, prefix, ZSTR_VAL(var_name), ZSTR_LEN(var_name), 1); if (php_valid_var_name(Z_STRVAL(final_name), Z_STRLEN(final_name))) { - if (zend_string_equals_literal(Z_STR(final_name), "this")) { + if (zend_string_equals(Z_STR(final_name), ZSTR_KNOWN(ZEND_STR_THIS))) { zend_throw_error(NULL, "Cannot re-assign $this"); return -1; } else { @@ -1933,7 +2019,7 @@ static zend_long php_extract_prefix_if_exists(zend_array *arr, zend_array *symbo } php_prefix_varname(&final_name, prefix, ZSTR_VAL(var_name), ZSTR_LEN(var_name), 1); if (php_valid_var_name(Z_STRVAL(final_name), Z_STRLEN(final_name))) { - if (zend_string_equals_literal(Z_STR(final_name), "this")) { + if (zend_string_equals(Z_STR(final_name), ZSTR_KNOWN(ZEND_STR_THIS))) { zend_throw_error(NULL, "Cannot re-assign $this"); return -1; } else { @@ -1996,7 +2082,7 @@ static zend_long php_extract_ref_prefix_same(zend_array *arr, zend_array *symbol prefix: php_prefix_varname(&final_name, prefix, ZSTR_VAL(var_name), ZSTR_LEN(var_name), 1); if (php_valid_var_name(Z_STRVAL(final_name), Z_STRLEN(final_name))) { - if (zend_string_equals_literal(Z_STR(final_name), "this")) { + if (zend_string_equals(Z_STR(final_name), ZSTR_KNOWN(ZEND_STR_THIS))) { zend_throw_error(NULL, "Cannot re-assign $this"); return -1; } else { @@ -2022,7 +2108,7 @@ static zend_long php_extract_ref_prefix_same(zend_array *arr, zend_array *symbol if (!php_valid_var_name(ZSTR_VAL(var_name), ZSTR_LEN(var_name))) { continue; } - if (zend_string_equals_literal(var_name, "this")) { + if (zend_string_equals(var_name, ZSTR_KNOWN(ZEND_STR_THIS))) { goto prefix; } if (Z_ISREF_P(entry)) { @@ -2068,7 +2154,7 @@ static zend_long php_extract_prefix_same(zend_array *arr, zend_array *symbol_tab prefix: php_prefix_varname(&final_name, prefix, ZSTR_VAL(var_name), ZSTR_LEN(var_name), 1); if (php_valid_var_name(Z_STRVAL(final_name), Z_STRLEN(final_name))) { - if (zend_string_equals_literal(Z_STR(final_name), "this")) { + if (zend_string_equals(Z_STR(final_name), ZSTR_KNOWN(ZEND_STR_THIS))) { zend_throw_error(NULL, "Cannot re-assign $this"); return -1; } else { @@ -2094,7 +2180,7 @@ static zend_long php_extract_prefix_same(zend_array *arr, zend_array *symbol_tab if (!php_valid_var_name(ZSTR_VAL(var_name), ZSTR_LEN(var_name))) { continue; } - if (zend_string_equals_literal(var_name, "this")) { + if (zend_string_equals(var_name, ZSTR_KNOWN(ZEND_STR_THIS))) { goto prefix; } ZVAL_DEREF(entry); @@ -2127,7 +2213,7 @@ static zend_long php_extract_ref_prefix_all(zend_array *arr, zend_array *symbol_ zend_string_release_ex(str, 0); } if (php_valid_var_name(Z_STRVAL(final_name), Z_STRLEN(final_name))) { - if (zend_string_equals_literal(Z_STR(final_name), "this")) { + if (zend_string_equals(Z_STR(final_name), ZSTR_KNOWN(ZEND_STR_THIS))) { zend_throw_error(NULL, "Cannot re-assign $this"); return -1; } else { @@ -2174,7 +2260,7 @@ static zend_long php_extract_prefix_all(zend_array *arr, zend_array *symbol_tabl zend_string_release_ex(str, 0); } if (php_valid_var_name(Z_STRVAL(final_name), Z_STRLEN(final_name))) { - if (zend_string_equals_literal(Z_STR(final_name), "this")) { + if (zend_string_equals(Z_STR(final_name), ZSTR_KNOWN(ZEND_STR_THIS))) { zend_throw_error(NULL, "Cannot re-assign $this"); return -1; } else { @@ -2212,7 +2298,7 @@ static zend_long php_extract_ref_prefix_invalid(zend_array *arr, zend_array *sym ZEND_HASH_FOREACH_KEY_VAL(arr, num_key, var_name, entry) { if (var_name) { if (!php_valid_var_name(ZSTR_VAL(var_name), ZSTR_LEN(var_name)) - || zend_string_equals_literal(var_name, "this")) { + || zend_string_equals(var_name, ZSTR_KNOWN(ZEND_STR_THIS))) { php_prefix_varname(&final_name, prefix, ZSTR_VAL(var_name), ZSTR_LEN(var_name), 1); if (!php_valid_var_name(Z_STRVAL(final_name), Z_STRLEN(final_name))) { zval_ptr_dtor_str(&final_name); @@ -2230,7 +2316,7 @@ static zend_long php_extract_ref_prefix_invalid(zend_array *arr, zend_array *sym continue; } } - if (zend_string_equals_literal(Z_STR(final_name), "this")) { + if (zend_string_equals(Z_STR(final_name), ZSTR_KNOWN(ZEND_STR_THIS))) { zend_throw_error(NULL, "Cannot re-assign $this"); return -1; } else { @@ -2267,7 +2353,7 @@ static zend_long php_extract_prefix_invalid(zend_array *arr, zend_array *symbol_ ZEND_HASH_FOREACH_KEY_VAL(arr, num_key, var_name, entry) { if (var_name) { if (!php_valid_var_name(ZSTR_VAL(var_name), ZSTR_LEN(var_name)) - || zend_string_equals_literal(var_name, "this")) { + || zend_string_equals(var_name, ZSTR_KNOWN(ZEND_STR_THIS))) { php_prefix_varname(&final_name, prefix, ZSTR_VAL(var_name), ZSTR_LEN(var_name), 1); if (!php_valid_var_name(Z_STRVAL(final_name), Z_STRLEN(final_name))) { zval_ptr_dtor_str(&final_name); @@ -2285,7 +2371,7 @@ static zend_long php_extract_prefix_invalid(zend_array *arr, zend_array *symbol_ continue; } } - if (zend_string_equals_literal(Z_STR(final_name), "this")) { + if (zend_string_equals(Z_STR(final_name), ZSTR_KNOWN(ZEND_STR_THIS))) { zend_throw_error(NULL, "Cannot re-assign $this"); return -1; } else { @@ -2328,7 +2414,7 @@ static zend_long php_extract_ref_skip(zend_array *arr, zend_array *symbol_table) if (!php_valid_var_name(ZSTR_VAL(var_name), ZSTR_LEN(var_name))) { continue; } - if (zend_string_equals_literal(var_name, "this")) { + if (zend_string_equals(var_name, ZSTR_KNOWN(ZEND_STR_THIS))) { continue; } orig_var = zend_hash_find_known_hash(symbol_table, var_name); @@ -2376,7 +2462,7 @@ static zend_long php_extract_skip(zend_array *arr, zend_array *symbol_table) /* if (!php_valid_var_name(ZSTR_VAL(var_name), ZSTR_LEN(var_name))) { continue; } - if (zend_string_equals_literal(var_name, "this")) { + if (zend_string_equals(var_name, ZSTR_KNOWN(ZEND_STR_THIS))) { continue; } orig_var = zend_hash_find_known_hash(symbol_table, var_name); @@ -2515,7 +2601,7 @@ static void php_compact_var(HashTable *eg_active_symbol_table, zval *return_valu ZVAL_DEREF(value_ptr); Z_TRY_ADDREF_P(value_ptr); zend_hash_update(Z_ARRVAL_P(return_value), Z_STR_P(entry), value_ptr); - } else if (zend_string_equals_literal(Z_STR_P(entry), "this")) { + } else if (zend_string_equals(Z_STR_P(entry), ZSTR_KNOWN(ZEND_STR_THIS))) { zend_object *object = zend_get_this_object(EG(current_execute_data)); if (object) { ZVAL_OBJ_COPY(&data, object); @@ -2525,7 +2611,7 @@ static void php_compact_var(HashTable *eg_active_symbol_table, zval *return_valu php_error_docref(NULL, E_WARNING, "Undefined variable $%s", ZSTR_VAL(Z_STR_P(entry))); } } else if (Z_TYPE_P(entry) == IS_ARRAY) { - if (Z_REFCOUNTED_P(entry)) { + if (Z_REFCOUNTED_P(entry)) { if (Z_IS_RECURSIVE_P(entry)) { zend_throw_error(NULL, "Recursion detected"); return; @@ -2535,7 +2621,7 @@ static void php_compact_var(HashTable *eg_active_symbol_table, zval *return_valu ZEND_HASH_FOREACH_VAL(Z_ARRVAL_P(entry), value_ptr) { php_compact_var(eg_active_symbol_table, return_value, value_ptr, pos); } ZEND_HASH_FOREACH_END(); - if (Z_REFCOUNTED_P(entry)) { + if (Z_REFCOUNTED_P(entry)) { Z_UNPROTECT_RECURSION_P(entry); } } else { @@ -3179,8 +3265,7 @@ PHP_FUNCTION(array_push) zval *args, /* Function arguments array */ *stack, /* Input array */ new_var; /* Variable to be pushed */ - int i, /* Loop counter */ - argc; /* Number of function arguments */ + uint32_t argc; /* Number of function arguments */ ZEND_PARSE_PARAMETERS_START(1, -1) @@ -3189,7 +3274,7 @@ PHP_FUNCTION(array_push) ZEND_PARSE_PARAMETERS_END(); /* For each subsequent argument, make it a reference, increase refcount, and add it to the end of the array */ - for (i = 0; i < argc; i++) { + for (uint32_t i = 0; i < argc; i++) { ZVAL_COPY(&new_var, &args[i]); if (zend_hash_next_index_insert(Z_ARRVAL_P(stack), &new_var) == NULL) { @@ -3387,8 +3472,7 @@ PHP_FUNCTION(array_unshift) zval *args, /* Function arguments array */ *stack; /* Input stack */ HashTable new_hash; /* New hashtable for the stack */ - int argc; /* Number of function arguments */ - int i; + uint32_t argc; /* Number of function arguments */ zend_string *key; zval *value; @@ -3398,7 +3482,7 @@ PHP_FUNCTION(array_unshift) ZEND_PARSE_PARAMETERS_END(); zend_hash_init(&new_hash, zend_hash_num_elements(Z_ARRVAL_P(stack)) + argc, NULL, ZVAL_PTR_DTOR, 0); - for (i = 0; i < argc; i++) { + for (uint32_t i = 0; i < argc; i++) { Z_TRY_ADDREF(args[i]); zend_hash_next_index_insert_new(&new_hash, &args[i]); } @@ -3732,7 +3816,12 @@ PHPAPI int php_array_merge_recursive(HashTable *dest, HashTable *src) /* {{{ */ } } else { Z_TRY_ADDREF_P(src_zval); - zend_hash_next_index_insert(Z_ARRVAL_P(dest_zval), src_zval); + zval *zv = zend_hash_next_index_insert(Z_ARRVAL_P(dest_zval), src_zval); + if (EXPECTED(!zv)) { + Z_TRY_DELREF_P(src_zval); + zend_cannot_add_element(); + return 0; + } } zval_ptr_dtor(&tmp); } else { @@ -3741,6 +3830,10 @@ PHPAPI int php_array_merge_recursive(HashTable *dest, HashTable *src) /* {{{ */ } } else { zval *zv = zend_hash_next_index_insert(dest, src_entry); + if (UNEXPECTED(!zv)) { + zend_cannot_add_element(); + return 0; + } zval_add_ref(zv); } } ZEND_HASH_FOREACH_END(); @@ -3818,8 +3911,8 @@ PHPAPI int php_array_replace_recursive(HashTable *dest, HashTable *src) /* {{{ * dest_zval = dest_entry; ZVAL_DEREF(dest_zval); if (Z_IS_RECURSIVE_P(dest_zval) || - Z_IS_RECURSIVE_P(src_zval) || - (Z_ISREF_P(src_entry) && Z_ISREF_P(dest_entry) && Z_REF_P(src_entry) == Z_REF_P(dest_entry) && (Z_REFCOUNT_P(dest_entry) % 2))) { + Z_IS_RECURSIVE_P(src_zval) || + (Z_ISREF_P(src_entry) && Z_ISREF_P(dest_entry) && Z_REF_P(src_entry) == Z_REF_P(dest_entry) && (Z_REFCOUNT_P(dest_entry) % 2))) { zend_throw_error(NULL, "Recursion detected"); return 0; } @@ -3857,7 +3950,7 @@ static zend_always_inline void php_array_replace_wrapper(INTERNAL_FUNCTION_PARAM { zval *args = NULL; zval *arg; - int argc, i; + uint32_t argc, i; HashTable *dest; ZEND_PARSE_PARAMETERS_START(1, -1) @@ -3907,7 +4000,7 @@ static zend_always_inline void php_array_merge_wrapper(INTERNAL_FUNCTION_PARAMET { zval *args = NULL; zval *arg; - int argc, i; + uint32_t argc, i; zval *src_entry; HashTable *src, *dest; uint32_t count = 0; @@ -4717,7 +4810,7 @@ static int zval_user_compare(zval *a, zval *b) /* {{{ */ static void php_array_intersect_key(INTERNAL_FUNCTION_PARAMETERS, int data_compare_type) /* {{{ */ { - int argc, i; + uint32_t argc, i; zval *args; int (*intersect_data_compare_func)(zval *, zval *) = NULL; bool ok; @@ -4799,7 +4892,8 @@ static void php_array_intersect(INTERNAL_FUNCTION_PARAMETERS, int behavior, int { zval *args = NULL; HashTable *hash; - int arr_argc, i, c = 0; + uint32_t arr_argc, i; + int c = 0; uint32_t idx; Bucket **lists, *list, **ptrs, *p; char *param_spec; @@ -5117,7 +5211,7 @@ PHP_FUNCTION(array_uintersect_uassoc) static void php_array_diff_key(INTERNAL_FUNCTION_PARAMETERS, int data_compare_type) /* {{{ */ { - int argc, i; + uint32_t argc, i; zval *args; int (*diff_data_compare_func)(zval *, zval *) = NULL; bool ok; @@ -5194,7 +5288,8 @@ static void php_array_diff(INTERNAL_FUNCTION_PARAMETERS, int behavior, int data_ { zval *args = NULL; HashTable *hash; - int arr_argc, i, c; + uint32_t arr_argc, i; + int c; uint32_t idx; Bucket **lists, *list, **ptrs, *p; char *param_spec; @@ -5460,7 +5555,7 @@ PHP_FUNCTION(array_diff_ukey) PHP_FUNCTION(array_diff) { zval *args; - int argc, i; + uint32_t argc, i; uint32_t num; HashTable exclude; zval *value; @@ -5661,15 +5756,15 @@ PHP_FUNCTION(array_multisort) zval* args; zval** arrays; Bucket** indirect; - uint32_t idx; + uint32_t idx; HashTable* hash; - int argc; - int array_size; - int num_arrays = 0; + uint32_t argc; + uint32_t array_size; + uint32_t num_arrays = 0; int parse_state[MULTISORT_LAST]; /* 0 - flag not allowed 1 - flag allowed */ int sort_order = PHP_SORT_ASC; int sort_type = PHP_SORT_REGULAR; - int i, k, n; + uint32_t i, k, n; bucket_compare_func_t *func; ZEND_PARSE_PARAMETERS_START(1, -1) @@ -5755,7 +5850,7 @@ PHP_FUNCTION(array_multisort) /* Make sure the arrays are of the same size. */ array_size = zend_hash_num_elements(Z_ARRVAL_P(arrays[0])); for (i = 1; i < num_arrays; i++) { - if (zend_hash_num_elements(Z_ARRVAL_P(arrays[i])) != (uint32_t)array_size) { + if (zend_hash_num_elements(Z_ARRVAL_P(arrays[i])) != array_size) { zend_value_error("Array sizes are inconsistent"); MULTISORT_ABORT; } @@ -5773,8 +5868,10 @@ PHP_FUNCTION(array_multisort) * of the input arrays + 1. The last column is UNDEF to indicate the end * of the row. It also stores the original position for stable sorting. */ indirect = (Bucket **)safe_emalloc(array_size, sizeof(Bucket *), 0); + /* Move num_arrays multiplication to size because it's essentially impossible to overflow. */ + Bucket *indirects = (Bucket *)safe_emalloc(array_size, sizeof(Bucket) * (num_arrays + 1), 0); for (i = 0; i < array_size; i++) { - indirect[i] = (Bucket *)safe_emalloc((num_arrays + 1), sizeof(Bucket), 0); + indirect[i] = indirects + (i * (num_arrays + 1)); } for (i = 0; i < num_arrays; i++) { k = 0; @@ -5803,6 +5900,9 @@ PHP_FUNCTION(array_multisort) /* Do the actual sort magic - bada-bim, bada-boom. */ zend_sort(indirect, array_size, sizeof(Bucket *), php_multisort_compare, (swap_func_t)array_bucket_p_sawp); + if (EG(exception)) { + goto clean_up; + } /* Restructure the arrays based on sorted indirect - this is mostly taken from zend_hash_sort() function. */ for (i = 0; i < num_arrays; i++) { @@ -5815,14 +5915,14 @@ PHP_FUNCTION(array_multisort) ZVAL_COPY_VALUE(&hash->arPacked[k], &indirect[k][i].val); } } else { - int repack = 1; + bool repack = true; for (n = 0, k = 0; k < array_size; k++) { hash->arData[k] = indirect[k][i]; if (hash->arData[k].key == NULL) { hash->arData[k].h = n++; } else { - repack = 0; + repack = false; } } if (repack) { @@ -5832,15 +5932,13 @@ PHP_FUNCTION(array_multisort) } } } + RETVAL_TRUE; - /* Clean up. */ - for (i = 0; i < array_size; i++) { - efree(indirect[i]); - } +clean_up: + efree(indirects); efree(indirect); efree(func); efree(arrays); - RETURN_TRUE; } /* }}} */ diff --git a/ext/standard/assert.c b/ext/standard/assert.c index 0b43033dd4d30..e9a3ef3158f25 100644 --- a/ext/standard/assert.c +++ b/ext/standard/assert.c @@ -34,8 +34,6 @@ ZEND_DECLARE_MODULE_GLOBALS(assert) #define ASSERTG(v) ZEND_MODULE_GLOBALS_ACCESSOR(assert, v) -#define SAFE_STRING(s) ((s)?(s):"") - PHPAPI zend_class_entry *assertion_error_ce; static PHP_INI_MH(OnChangeCallback) /* {{{ */ @@ -151,9 +149,12 @@ PHP_FUNCTION(assert) zval args[4]; zval retval; uint32_t lineno = zend_get_executed_lineno(); - const char *filename = zend_get_executed_filename(); + zend_string *filename = zend_get_executed_filename_ex(); + if (UNEXPECTED(!filename)) { + filename = ZSTR_KNOWN(ZEND_STR_UNKNOWN_CAPITALIZED); + } - ZVAL_STRING(&args[0], SAFE_STRING(filename)); + ZVAL_STR(&args[0], filename); ZVAL_LONG(&args[1], lineno); ZVAL_NULL(&args[2]); @@ -166,7 +167,6 @@ PHP_FUNCTION(assert) call_user_function(NULL, NULL, &ASSERTG(callback), &retval, 3, args); } - zval_ptr_dtor(&args[0]); zval_ptr_dtor(&retval); } @@ -177,7 +177,7 @@ PHP_FUNCTION(assert) zend_exception_error(EG(exception), E_ERROR); } } else if (ASSERTG(warning)) { - php_error_docref(NULL, E_WARNING, "%s failed", description_str ? ZSTR_VAL(description_str) : "Assertion failed"); + php_error_docref(NULL, E_WARNING, "%s failed", description_str ? ZSTR_VAL(description_str) : "Assertion"); } if (ASSERTG(bail)) { @@ -195,7 +195,7 @@ PHP_FUNCTION(assert_options) zval *value = NULL; zend_long what; bool oldint; - int ac = ZEND_NUM_ARGS(); + uint32_t ac = ZEND_NUM_ARGS(); zend_string *key; ZEND_PARSE_PARAMETERS_START(1, 2) diff --git a/ext/standard/basic_functions.stub.php b/ext/standard/basic_functions.stub.php index 9124626b5c5b5..effb05ff9f982 100755 --- a/ext/standard/basic_functions.stub.php +++ b/ext/standard/basic_functions.stub.php @@ -1574,9 +1574,9 @@ function count(Countable|array $value, int $mode = COUNT_NORMAL): int {} /** @alias count */ function sizeof(Countable|array $value, int $mode = COUNT_NORMAL): int {} -function natsort(array &$array): bool {} +function natsort(array &$array): true {} -function natcasesort(array &$array): bool {} +function natcasesort(array &$array): true {} function asort(array &$array, int $flags = SORT_REGULAR): true {} @@ -1584,7 +1584,7 @@ function arsort(array &$array, int $flags = SORT_REGULAR): true {} function sort(array &$array, int $flags = SORT_REGULAR): true {} -function rsort(array &$array, int $flags = SORT_REGULAR): bool {} +function rsort(array &$array, int $flags = SORT_REGULAR): true {} function usort(array &$array, callable $callback): true {} @@ -1660,6 +1660,9 @@ function array_unshift(array &$array, mixed ...$values): int {} function array_splice(array &$array, int $offset, ?int $length = null, mixed $replacement = []): array {} +/** + * @compile-time-eval + */ function array_slice(array $array, int $offset, ?int $length = null, bool $preserve_keys = false): array {} /** @@ -1708,14 +1711,21 @@ function array_values(array $array): array {} /** * @return array + * @compile-time-eval * @refcount 1 */ function array_count_values(array $array): array {} -/** @refcount 1 */ +/** + * @compile-time-eval + * @refcount 1 + */ function array_column(array $array, int|string|null $column_key, int|string|null $index_key = null): array {} -/** @refcount 1 */ +/** + * @compile-time-eval + * @refcount 1 + */ function array_reverse(array $array, bool $preserve_keys = false): array {} function array_pad(array $array, int $length, mixed $value): array {} @@ -1844,8 +1854,14 @@ function array_multisort(&$array, &...$rest): bool {} /** @return int|string|array */ function array_rand(array $array, int $num = 1): int|string|array {} +/** + * @compile-time-eval + */ function array_sum(array $array): int|float {} +/** + * @compile-time-eval + */ function array_product(array $array): int|float {} function array_reduce(array $array, callable $callback, mixed $initial = null): mixed {} @@ -1866,8 +1882,14 @@ function array_key_exists($key, array $array): bool {} */ function key_exists($key, array $array): bool {} +/** + * @compile-time-eval + */ function array_chunk(array $array, int $length, bool $preserve_keys = false): array {} +/** + * @compile-time-eval + */ function array_combine(array $keys, array $values): array {} /** @compile-time-eval */ @@ -2033,6 +2055,7 @@ function parse_ini_file(string $filename, bool $process_sections = false, int $s /** * @return array|false + * @compile-time-eval * @refcount 1 */ function parse_ini_string(string $ini_string, bool $process_sections = false, int $scanner_mode = INI_SCANNER_NORMAL): array|false {} @@ -2261,8 +2284,14 @@ function bin2hex(string $string): string {} */ function hex2bin(string $string): string|false {} +/** + * @compile-time-eval + */ function strspn(string $string, string $characters, int $offset = 0, ?int $length = null): int {} +/** + * @compile-time-eval + */ function strcspn(string $string, string $characters, int $offset = 0, ?int $length = null): int {} #ifdef HAVE_NL_LANGINFO @@ -2284,7 +2313,10 @@ function chop(string $string, string $characters = " \n\r\t\v\0"): string {} /** @compile-time-eval */ function ltrim(string $string, string $characters = " \n\r\t\v\0"): string {} -/** @refcount 1 */ +/** + * @compile-time-eval + * @refcount 1 + */ function wordwrap(string $string, int $width = 75, string $break = "\n", bool $cut_long_words = false): string {} /** @@ -2302,7 +2334,10 @@ function implode(string|array $separator, ?array $array = null): string {} /** @alias implode */ function join(string|array $separator, ?array $array = null): string {} -/** @refcount 1 */ +/** + * @compile-time-eval + * @refcount 1 + */ function strtok(string $string, ?string $token = null): string|false {} /** @compile-time-eval */ @@ -2323,7 +2358,10 @@ function dirname(string $path, int $levels = 1): string {} */ function pathinfo(string $path, int $flags = PATHINFO_ALL): array|string {} -/** @refcount 1 */ +/** + * @compile-time-eval + * @refcount 1 + */ function stristr(string $haystack, string $needle, bool $before_needle = false): string|false {} /** @@ -2347,7 +2385,10 @@ function strrpos(string $haystack, string $needle, int $offset = 0): int|false { /** @compile-time-eval */ function strripos(string $haystack, string $needle, int $offset = 0): int|false {} -/** @refcount 1 */ +/** + * @compile-time-eval + * @refcount 1 + */ function strrchr(string $haystack, string $needle): string|false {} /** @compile-time-eval */ @@ -2359,16 +2400,25 @@ function str_starts_with(string $haystack, string $needle): bool {} /** @compile-time-eval */ function str_ends_with(string $haystack, string $needle): bool {} -/** @refcount 1 */ +/** + * @compile-time-eval + * @refcount 1 + */ function chunk_split(string $string, int $length = 76, string $separator = "\r\n"): string {} /** @compile-time-eval */ function substr(string $string, int $offset, ?int $length = null): string {} -/** @return string|array */ +/** + * @return string|array + * @compile-time-eval + */ function substr_replace(array|string $string, array|string $replace, array|int $offset, array|int|null $length = null): string|array {} -/** @refcount 1 */ +/** + * @compile-time-eval + * @refcount 1 + */ function quotemeta(string $string): string {} /** @compile-time-eval */ @@ -2392,6 +2442,9 @@ function lcfirst(string $string): string {} */ function ucwords(string $string, string $separators = " \t\r\n\f\v"): string {} +/** + * @compile-time-eval + */ function strtr(string $string, string|array $from, ?string $to = null): string {} /** @@ -2403,14 +2456,26 @@ function strrev(string $string): string {} /** @param float $percent */ function similar_text(string $string1, string $string2, &$percent = null): int {} +/** + * @compile-time-eval + */ function addcslashes(string $string, string $characters): string {} +/** + * @compile-time-eval + */ function addslashes(string $string): string {} -/** @refcount 1 */ +/** + * @compile-time-eval + * @refcount 1 + */ function stripcslashes(string $string): string {} -/** @refcount 1 */ +/** + * @compile-time-eval + * @refcount 1 + */ function stripslashes(string $string): string {} /** @@ -2430,9 +2495,15 @@ function str_ireplace(array|string $search, array|string $replace, string|array /** @refcount 1 */ function hebrev(string $string, int $max_chars_per_line = 0): string {} +/** + * @compile-time-eval + */ function nl2br(string $string, bool $use_xhtml = true): string {} -/** @refcount 1 */ +/** + * @compile-time-eval + * @refcount 1 + */ function strip_tags(string $string, array|string|null $allowed_tags = null): string {} /** @@ -2455,6 +2526,7 @@ function str_repeat(string $string, int $times): string {} /** * @return array|string + * @compile-time-eval * @refcount 1 */ function count_chars(string $string, int $mode = 0): array|string {} @@ -2469,6 +2541,9 @@ function localeconv(): array {} function strnatcasecmp(string $string1, string $string2): int {} +/** + * @compile-time-eval + */ function substr_count(string $haystack, string $needle, int $offset = 0, ?int $length = null): int {} function str_pad(string $string, int $length, string $pad_string = " ", int $pad_type = STR_PAD_RIGHT): string {} @@ -2479,7 +2554,10 @@ function str_pad(string $string, int $length, string $pad_string = " ", int $pad */ function sscanf(string $string, string $format, mixed &...$vars): array|int|null {} -/** @refcount 1 */ +/** + * @compile-time-eval + * @refcount 1 + */ function str_rot13(string $string): string {} /** @refcount 1 */ @@ -2497,18 +2575,26 @@ function str_word_count(string $string, int $format = 0, ?string $characters = n */ function str_split(string $string, int $length = 1): array {} -/** @refcount 1 */ +/** + * @compile-time-eval + * @refcount 1 + */ function strpbrk(string $string, string $characters): string|false {} +/** + * @compile-time-eval + */ function substr_compare(string $haystack, string $needle, int $offset, ?int $length = null, bool $case_insensitive = false): int {} /** + * @compile-time-eval * @refcount 1 * @deprecated */ function utf8_encode(string $string): string {} /** + * @compile-time-eval * @refcount 1 * @deprecated */ @@ -2883,10 +2969,16 @@ function http_build_query(array|object $data, string $numeric_prefix = "", ?stri /* image.c */ -/** @refcount 1 */ +/** + * @compile-time-eval + * @refcount 1 + */ function image_type_to_mime_type(int $image_type): string {} -/** @refcount 1 */ +/** + * @compile-time-eval + * @refcount 1 + */ function image_type_to_extension(int $image_type, bool $include_dot = true): string|false {} /** @@ -3165,6 +3257,9 @@ function password_needs_rehash(string $hash, string|int|null $algo, array $optio function password_verify(#[\SensitiveParameter] string $password, string $hash): bool {} +/** + * @compile-time-eval + */ function password_algos(): array {} /* proc_open.c */ @@ -3193,10 +3288,16 @@ function proc_get_status($process): array {} /* quot_print.c */ -/** @refcount 1 */ +/** + * @compile-time-eval + * @refcount 1 + */ function quoted_printable_decode(string $string): string {} -/** @refcount 1 */ +/** + * @compile-time-eval + * @refcount 1 + */ function quoted_printable_encode(string $string): string {} /* soundex.c */ @@ -3527,6 +3628,7 @@ function uniqid(string $prefix = "", bool $more_entropy = false): string {} /** * @return int|string|array|null|false + * @compile-time-eval * @refcount 1 */ function parse_url(string $url, int $component = -1): int|string|array|null|false {} @@ -3592,10 +3694,16 @@ function stream_filter_register(string $filter_name, string $class): bool {} /* uuencode.c */ -/** @refcount 1 */ +/** + * @compile-time-eval + * @refcount 1 + */ function convert_uuencode(string $string): string {} -/** @refcount 1 */ +/** + * @compile-time-eval + * @refcount 1 + */ function convert_uudecode(string $string): string|false {} /* var.c */ diff --git a/ext/standard/basic_functions_arginfo.h b/ext/standard/basic_functions_arginfo.h index bb46fb87090f6..5612ee21867e8 100644 --- a/ext/standard/basic_functions_arginfo.h +++ b/ext/standard/basic_functions_arginfo.h @@ -1,5 +1,5 @@ /* This is a generated file, edit the .stub.php file instead. - * Stub hash: 73f82e392f5adf146b9b8dfb39496b3ce8465115 */ + * Stub hash: 9cc9c0954bd7032d363ce9a531be621274b9a7e2 */ ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_set_time_limit, 0, 1, _IS_BOOL, 0) ZEND_ARG_TYPE_INFO(0, seconds, IS_LONG, 0) @@ -88,7 +88,7 @@ ZEND_END_ARG_INFO() #define arginfo_sizeof arginfo_count -ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_natsort, 0, 1, _IS_BOOL, 0) +ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_natsort, 0, 1, IS_TRUE, 0) ZEND_ARG_TYPE_INFO(1, array, IS_ARRAY, 0) ZEND_END_ARG_INFO() @@ -100,10 +100,7 @@ ZEND_END_ARG_INFO() #define arginfo_sort arginfo_krsort -ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_rsort, 0, 1, _IS_BOOL, 0) - ZEND_ARG_TYPE_INFO(1, array, IS_ARRAY, 0) - ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, flags, IS_LONG, 0, "SORT_REGULAR") -ZEND_END_ARG_INFO() +#define arginfo_rsort arginfo_krsort ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_usort, 0, 2, IS_TRUE, 0) ZEND_ARG_TYPE_INFO(1, array, IS_ARRAY, 0) @@ -189,9 +186,7 @@ ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_range, 0, 2, IS_ARRAY, 0) ZEND_ARG_TYPE_MASK(0, step, MAY_BE_LONG|MAY_BE_DOUBLE, "1") ZEND_END_ARG_INFO() -ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_shuffle, 0, 1, IS_TRUE, 0) - ZEND_ARG_TYPE_INFO(1, array, IS_ARRAY, 0) -ZEND_END_ARG_INFO() +#define arginfo_shuffle arginfo_natsort ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_array_pop, 0, 1, IS_MIXED, 0) ZEND_ARG_TYPE_INFO(1, array, IS_ARRAY, 0) @@ -2881,7 +2876,7 @@ static const zend_function_entry ext_functions[] = { ZEND_FE(array_shift, arginfo_array_shift) ZEND_FE(array_unshift, arginfo_array_unshift) ZEND_FE(array_splice, arginfo_array_splice) - ZEND_FE(array_slice, arginfo_array_slice) + ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(array_slice, arginfo_array_slice) ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(array_merge, arginfo_array_merge) ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(array_merge_recursive, arginfo_array_merge_recursive) ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(array_replace, arginfo_array_replace) @@ -2890,9 +2885,9 @@ static const zend_function_entry ext_functions[] = { ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(array_key_first, arginfo_array_key_first) ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(array_key_last, arginfo_array_key_last) ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(array_values, arginfo_array_values) - ZEND_FE(array_count_values, arginfo_array_count_values) - ZEND_FE(array_column, arginfo_array_column) - ZEND_FE(array_reverse, arginfo_array_reverse) + ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(array_count_values, arginfo_array_count_values) + ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(array_column, arginfo_array_column) + ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(array_reverse, arginfo_array_reverse) ZEND_FE(array_pad, arginfo_array_pad) ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(array_flip, arginfo_array_flip) ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(array_change_key_case, arginfo_array_change_key_case) @@ -2915,15 +2910,15 @@ static const zend_function_entry ext_functions[] = { ZEND_FE(array_udiff_uassoc, arginfo_array_udiff_uassoc) ZEND_FE(array_multisort, arginfo_array_multisort) ZEND_FE(array_rand, arginfo_array_rand) - ZEND_FE(array_sum, arginfo_array_sum) - ZEND_FE(array_product, arginfo_array_product) + ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(array_sum, arginfo_array_sum) + ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(array_product, arginfo_array_product) ZEND_FE(array_reduce, arginfo_array_reduce) ZEND_FE(array_filter, arginfo_array_filter) ZEND_FE(array_map, arginfo_array_map) ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(array_key_exists, arginfo_array_key_exists) ZEND_FALIAS(key_exists, array_key_exists, arginfo_key_exists) - ZEND_FE(array_chunk, arginfo_array_chunk) - ZEND_FE(array_combine, arginfo_array_combine) + ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(array_chunk, arginfo_array_chunk) + ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(array_combine, arginfo_array_combine) ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(array_is_list, arginfo_array_is_list) ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(base64_encode, arginfo_base64_encode) ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(base64_decode, arginfo_base64_decode) @@ -2987,7 +2982,7 @@ static const zend_function_entry ext_functions[] = { ZEND_FE(is_uploaded_file, arginfo_is_uploaded_file) ZEND_FE(move_uploaded_file, arginfo_move_uploaded_file) ZEND_FE(parse_ini_file, arginfo_parse_ini_file) - ZEND_FE(parse_ini_string, arginfo_parse_ini_string) + ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(parse_ini_string, arginfo_parse_ini_string) #if ZEND_DEBUG ZEND_FE(config_get_hash, arginfo_config_get_hash) #endif @@ -3069,8 +3064,8 @@ static const zend_function_entry ext_functions[] = { ZEND_FE(assert_options, arginfo_assert_options) ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(bin2hex, arginfo_bin2hex) ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(hex2bin, arginfo_hex2bin) - ZEND_FE(strspn, arginfo_strspn) - ZEND_FE(strcspn, arginfo_strcspn) + ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(strspn, arginfo_strspn) + ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(strcspn, arginfo_strcspn) #if defined(HAVE_NL_LANGINFO) ZEND_FE(nl_langinfo, arginfo_nl_langinfo) #endif @@ -3079,65 +3074,65 @@ static const zend_function_entry ext_functions[] = { ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(rtrim, arginfo_rtrim) ZEND_FALIAS(chop, rtrim, arginfo_chop) ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(ltrim, arginfo_ltrim) - ZEND_FE(wordwrap, arginfo_wordwrap) + ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(wordwrap, arginfo_wordwrap) ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(explode, arginfo_explode) ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(implode, arginfo_implode) ZEND_FALIAS(join, implode, arginfo_join) - ZEND_FE(strtok, arginfo_strtok) + ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(strtok, arginfo_strtok) ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(strtoupper, arginfo_strtoupper) ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(strtolower, arginfo_strtolower) ZEND_FE(basename, arginfo_basename) ZEND_FE(dirname, arginfo_dirname) ZEND_FE(pathinfo, arginfo_pathinfo) - ZEND_FE(stristr, arginfo_stristr) + ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(stristr, arginfo_stristr) ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(strstr, arginfo_strstr) ZEND_FALIAS(strchr, strstr, arginfo_strchr) ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(strpos, arginfo_strpos) ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(stripos, arginfo_stripos) ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(strrpos, arginfo_strrpos) ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(strripos, arginfo_strripos) - ZEND_FE(strrchr, arginfo_strrchr) + ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(strrchr, arginfo_strrchr) ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(str_contains, arginfo_str_contains) ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(str_starts_with, arginfo_str_starts_with) ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(str_ends_with, arginfo_str_ends_with) - ZEND_FE(chunk_split, arginfo_chunk_split) + ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(chunk_split, arginfo_chunk_split) ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(substr, arginfo_substr) - ZEND_FE(substr_replace, arginfo_substr_replace) - ZEND_FE(quotemeta, arginfo_quotemeta) + ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(substr_replace, arginfo_substr_replace) + ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(quotemeta, arginfo_quotemeta) ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(ord, arginfo_ord) ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(chr, arginfo_chr) ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(ucfirst, arginfo_ucfirst) ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(lcfirst, arginfo_lcfirst) ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(ucwords, arginfo_ucwords) - ZEND_FE(strtr, arginfo_strtr) + ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(strtr, arginfo_strtr) ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(strrev, arginfo_strrev) ZEND_FE(similar_text, arginfo_similar_text) - ZEND_FE(addcslashes, arginfo_addcslashes) - ZEND_FE(addslashes, arginfo_addslashes) - ZEND_FE(stripcslashes, arginfo_stripcslashes) - ZEND_FE(stripslashes, arginfo_stripslashes) + ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(addcslashes, arginfo_addcslashes) + ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(addslashes, arginfo_addslashes) + ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(stripcslashes, arginfo_stripcslashes) + ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(stripslashes, arginfo_stripslashes) ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(str_replace, arginfo_str_replace) ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(str_ireplace, arginfo_str_ireplace) ZEND_FE(hebrev, arginfo_hebrev) - ZEND_FE(nl2br, arginfo_nl2br) - ZEND_FE(strip_tags, arginfo_strip_tags) + ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(nl2br, arginfo_nl2br) + ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(strip_tags, arginfo_strip_tags) ZEND_FE(setlocale, arginfo_setlocale) ZEND_FE(parse_str, arginfo_parse_str) ZEND_FE(str_getcsv, arginfo_str_getcsv) ZEND_FE(str_repeat, arginfo_str_repeat) - ZEND_FE(count_chars, arginfo_count_chars) + ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(count_chars, arginfo_count_chars) ZEND_FE(strnatcmp, arginfo_strnatcmp) ZEND_FE(localeconv, arginfo_localeconv) ZEND_FE(strnatcasecmp, arginfo_strnatcasecmp) - ZEND_FE(substr_count, arginfo_substr_count) + ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(substr_count, arginfo_substr_count) ZEND_FE(str_pad, arginfo_str_pad) ZEND_FE(sscanf, arginfo_sscanf) - ZEND_FE(str_rot13, arginfo_str_rot13) + ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(str_rot13, arginfo_str_rot13) ZEND_FE(str_shuffle, arginfo_str_shuffle) ZEND_FE(str_word_count, arginfo_str_word_count) ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(str_split, arginfo_str_split) - ZEND_FE(strpbrk, arginfo_strpbrk) - ZEND_FE(substr_compare, arginfo_substr_compare) + ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(strpbrk, arginfo_strpbrk) + ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(substr_compare, arginfo_substr_compare) ZEND_DEP_FE(utf8_encode, arginfo_utf8_encode) ZEND_DEP_FE(utf8_decode, arginfo_utf8_decode) ZEND_FE(opendir, arginfo_opendir) @@ -3250,8 +3245,8 @@ static const zend_function_entry ext_functions[] = { ZEND_FE(fsockopen, arginfo_fsockopen) ZEND_FE(pfsockopen, arginfo_pfsockopen) ZEND_FE(http_build_query, arginfo_http_build_query) - ZEND_FE(image_type_to_mime_type, arginfo_image_type_to_mime_type) - ZEND_FE(image_type_to_extension, arginfo_image_type_to_extension) + ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(image_type_to_mime_type, arginfo_image_type_to_mime_type) + ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(image_type_to_extension, arginfo_image_type_to_extension) ZEND_FE(getimagesize, arginfo_getimagesize) ZEND_FE(getimagesizefromstring, arginfo_getimagesizefromstring) ZEND_FE(phpinfo, arginfo_phpinfo) @@ -3334,7 +3329,7 @@ static const zend_function_entry ext_functions[] = { ZEND_FE(password_hash, arginfo_password_hash) ZEND_FE(password_needs_rehash, arginfo_password_needs_rehash) ZEND_FE(password_verify, arginfo_password_verify) - ZEND_FE(password_algos, arginfo_password_algos) + ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(password_algos, arginfo_password_algos) #if defined(PHP_CAN_SUPPORT_PROC_OPEN) ZEND_FE(proc_open, arginfo_proc_open) #endif @@ -3347,8 +3342,8 @@ static const zend_function_entry ext_functions[] = { #if defined(PHP_CAN_SUPPORT_PROC_OPEN) ZEND_FE(proc_get_status, arginfo_proc_get_status) #endif - ZEND_FE(quoted_printable_decode, arginfo_quoted_printable_decode) - ZEND_FE(quoted_printable_encode, arginfo_quoted_printable_encode) + ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(quoted_printable_decode, arginfo_quoted_printable_decode) + ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(quoted_printable_encode, arginfo_quoted_printable_encode) ZEND_FE(soundex, arginfo_soundex) ZEND_FE(stream_select, arginfo_stream_select) ZEND_FE(stream_context_create, arginfo_stream_context_create) @@ -3427,7 +3422,7 @@ static const zend_function_entry ext_functions[] = { #if defined(HAVE_GETTIMEOFDAY) ZEND_FE(uniqid, arginfo_uniqid) #endif - ZEND_FE(parse_url, arginfo_parse_url) + ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(parse_url, arginfo_parse_url) ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(urlencode, arginfo_urlencode) ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(urldecode, arginfo_urldecode) ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(rawurlencode, arginfo_rawurlencode) @@ -3439,8 +3434,8 @@ static const zend_function_entry ext_functions[] = { ZEND_FE(stream_bucket_new, arginfo_stream_bucket_new) ZEND_FE(stream_get_filters, arginfo_stream_get_filters) ZEND_FE(stream_filter_register, arginfo_stream_filter_register) - ZEND_FE(convert_uuencode, arginfo_convert_uuencode) - ZEND_FE(convert_uudecode, arginfo_convert_uudecode) + ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(convert_uuencode, arginfo_convert_uuencode) + ZEND_SUPPORTS_COMPILE_TIME_EVAL_FE(convert_uudecode, arginfo_convert_uudecode) ZEND_FE(var_dump, arginfo_var_dump) ZEND_FE(var_export, arginfo_var_export) ZEND_FE(debug_zval_dump, arginfo_debug_zval_dump) diff --git a/ext/standard/filestat.c b/ext/standard/filestat.c index a55f7b6fb8ce8..4fb712cabd985 100644 --- a/ext/standard/filestat.c +++ b/ext/standard/filestat.c @@ -889,7 +889,7 @@ PHPAPI void php_stat(zend_string *filename, int type, zval *return_value) case S_IFCHR: RETURN_STRING("char"); case S_IFDIR: RETURN_STRING("dir"); case S_IFBLK: RETURN_STRING("block"); - case S_IFREG: RETURN_STRING("file"); + case S_IFREG: RETURN_STR(ZSTR_KNOWN(ZEND_STR_FILE)); /* "file" */ #if defined(S_IFSOCK) && !defined(PHP_WIN32) case S_IFSOCK: RETURN_STRING("socket"); #endif diff --git a/ext/standard/head.c b/ext/standard/head.c index 5bdae98dfce56..7d223c646f215 100644 --- a/ext/standard/head.c +++ b/ext/standard/head.c @@ -363,6 +363,18 @@ PHP_FUNCTION(http_response_code) if (response_code) { + if (SG(headers_sent) && !SG(request_info).no_headers) { + const char *output_start_filename = php_output_get_start_filename(); + int output_start_lineno = php_output_get_start_lineno(); + + if (output_start_filename) { + php_error_docref(NULL, E_WARNING, "Cannot set response code - headers already sent " + "(output started at %s:%d)", output_start_filename, output_start_lineno); + } else { + php_error_docref(NULL, E_WARNING, "Cannot set response code - headers already sent"); + } + RETURN_FALSE; + } zend_long old_response_code; old_response_code = SG(sapi_headers).http_response_code; diff --git a/ext/standard/http_fopen_wrapper.c b/ext/standard/http_fopen_wrapper.c index 89ea1220177f4..daaaa41b00f9b 100644 --- a/ext/standard/http_fopen_wrapper.c +++ b/ext/standard/http_fopen_wrapper.c @@ -79,6 +79,7 @@ #define HTTP_WRAPPER_HEADER_INIT 1 #define HTTP_WRAPPER_REDIRECTED 2 +#define HTTP_WRAPPER_KEEP_METHOD 4 static inline void strip_header(char *header_bag, char *lc_header_bag, const char *lc_header_name) @@ -140,6 +141,7 @@ static php_stream *php_stream_url_wrap_http_ex(php_stream_wrapper *wrapper, char *user_headers = NULL; int header_init = ((flags & HTTP_WRAPPER_HEADER_INIT) != 0); int redirected = ((flags & HTTP_WRAPPER_REDIRECTED) != 0); + int redirect_keep_method = ((flags & HTTP_WRAPPER_KEEP_METHOD) != 0); bool follow_location = 1; php_stream_filter *transfer_encoding = NULL; int response_code; @@ -158,6 +160,7 @@ static php_stream *php_stream_url_wrap_http_ex(php_stream_wrapper *wrapper, return NULL; } + ZEND_ASSERT(resource->scheme); if (!zend_string_equals_literal_ci(resource->scheme, "http") && !zend_string_equals_literal_ci(resource->scheme, "https")) { if (!context || @@ -181,7 +184,7 @@ static php_stream *php_stream_url_wrap_http_ex(php_stream_wrapper *wrapper, return NULL; } - use_ssl = resource->scheme && (ZSTR_LEN(resource->scheme) > 4) && ZSTR_VAL(resource->scheme)[4] == 's'; + use_ssl = (ZSTR_LEN(resource->scheme) > 4) && ZSTR_VAL(resource->scheme)[4] == 's'; /* choose default ports */ if (use_ssl && resource->port == 0) resource->port = 443; @@ -363,8 +366,8 @@ static php_stream *php_stream_url_wrap_http_ex(php_stream_wrapper *wrapper, if (context && (tmpzval = php_stream_context_get_option(context, "http", "method")) != NULL) { if (Z_TYPE_P(tmpzval) == IS_STRING && Z_STRLEN_P(tmpzval) > 0) { /* As per the RFC, automatically redirected requests MUST NOT use other methods than - * GET and HEAD unless it can be confirmed by the user */ - if (!redirected + * GET and HEAD unless it can be confirmed by the user. */ + if (!redirected || redirect_keep_method || zend_string_equals_literal(Z_STR_P(tmpzval), "GET") || zend_string_equals_literal(Z_STR_P(tmpzval), "HEAD") ) { @@ -458,7 +461,7 @@ static php_stream *php_stream_url_wrap_http_ex(php_stream_wrapper *wrapper, zend_str_tolower(ZSTR_VAL(tmp), ZSTR_LEN(tmp)); t = ZSTR_VAL(tmp); - if (!header_init) { + if (!header_init && !redirect_keep_method) { /* strip POST headers on redirect */ strip_header(user_headers, t, "content-length:"); strip_header(user_headers, t, "content-type:"); @@ -606,7 +609,7 @@ static php_stream *php_stream_url_wrap_http_ex(php_stream_wrapper *wrapper, * see bug #44603 for details. Since Content-Type maybe part of user's headers we need to do this check first. */ if ( - header_init && + (header_init || redirect_keep_method) && context && !(have_header & HTTP_HEADER_CONTENT_LENGTH) && (tmpzval = php_stream_context_get_option(context, "http", "content")) != NULL && @@ -624,7 +627,7 @@ static php_stream *php_stream_url_wrap_http_ex(php_stream_wrapper *wrapper, } /* Request content, such as for POST requests */ - if (header_init && context && + if ((header_init || redirect_keep_method) && context && (tmpzval = php_stream_context_get_option(context, "http", "content")) != NULL && Z_TYPE_P(tmpzval) == IS_STRING && Z_STRLEN_P(tmpzval) > 0) { if (!(have_header & HTTP_HEADER_CONTENT_LENGTH)) { @@ -913,9 +916,16 @@ static php_stream *php_stream_url_wrap_http_ex(php_stream_wrapper *wrapper, CHECK_FOR_CNTRL_CHARS(resource->pass); CHECK_FOR_CNTRL_CHARS(resource->path); } + int new_flags = HTTP_WRAPPER_REDIRECTED; + if (response_code == 307 || response_code == 308) { + /* RFC 7538 specifies that status code 308 does not allow changing the request method from POST to GET. + * RFC 7231 does the same for status code 307. + * To keep consistency between POST and PATCH requests, we'll also not change the request method from PATCH to GET, even though it's allowed it's not mandated by the RFC. */ + new_flags |= HTTP_WRAPPER_KEEP_METHOD; + } stream = php_stream_url_wrap_http_ex( wrapper, new_path, mode, options, opened_path, context, - --redirect_max, HTTP_WRAPPER_REDIRECTED, response_header STREAMS_CC); + --redirect_max, new_flags, response_header STREAMS_CC); } else { php_stream_wrapper_log_error(wrapper, options, "HTTP request failed! %s", tmp_line); } @@ -955,6 +965,13 @@ static php_stream *php_stream_url_wrap_http_ex(php_stream_wrapper *wrapper, if (transfer_encoding) { php_stream_filter_append(&stream->readfilters, transfer_encoding); } + + /* It's possible that the server already sent in more data than just the headers. + * We account for this by adjusting the progress counter by the difference of + * already read header data and the body. */ + if (stream->writepos > stream->readpos) { + php_stream_notify_progress_increment(context, stream->writepos - stream->readpos, 0); + } } return stream; diff --git a/ext/standard/proc_open.c b/ext/standard/proc_open.c index 03fd0716bacf3..182860720c608 100644 --- a/ext/standard/proc_open.c +++ b/ext/standard/proc_open.c @@ -917,7 +917,7 @@ static zend_result set_proc_descriptor_from_array(zval *descitem, descriptorspec } else if (zend_string_equals_literal(ztype, "socket")) { /* Set descriptor to socketpair */ retval = set_proc_descriptor_to_socket(&descriptors[ndesc]); - } else if (zend_string_equals_literal(ztype, "file")) { + } else if (zend_string_equals(ztype, ZSTR_KNOWN(ZEND_STR_FILE))) { /* Set descriptor to file */ if ((zfile = get_string_parameter(descitem, 1, "file name parameter for 'file'")) == NULL) { goto finish; @@ -940,7 +940,7 @@ static zend_result set_proc_descriptor_from_array(zval *descitem, descriptorspec retval = redirect_proc_descriptor( &descriptors[ndesc], (int)Z_LVAL_P(ztarget), descriptors, ndesc, nindex); - } else if (zend_string_equals_literal(ztype, "null")) { + } else if (zend_string_equals(ztype, ZSTR_KNOWN(ZEND_STR_NULL_LOWERCASE))) { /* Set descriptor to blackhole (discard all data written) */ retval = set_proc_descriptor_to_blackhole(&descriptors[ndesc]); } else if (zend_string_equals_literal(ztype, "pty")) { diff --git a/ext/standard/tests/array/array_key_exists_variation1.phpt b/ext/standard/tests/array/array_key_exists_variation1.phpt index eb35d1bfae0c1..e3133cfc8eb1c 100644 --- a/ext/standard/tests/array/array_key_exists_variation1.phpt +++ b/ext/standard/tests/array/array_key_exists_variation1.phpt @@ -141,7 +141,7 @@ bool(true) bool(true) -- Iteration 17 -- -Cannot access offset of type object on array +Cannot access offset of type classA on array -- Iteration 18 -- bool(false) diff --git a/ext/standard/tests/array/bug68553.phpt b/ext/standard/tests/array/bug68553.phpt index 7325a68da5413..f16c94699d5af 100644 --- a/ext/standard/tests/array/bug68553.phpt +++ b/ext/standard/tests/array/bug68553.phpt @@ -79,5 +79,5 @@ array(8) { NULL } } -Cannot access offset of type object on array +Cannot access offset of type stdClass on array Cannot access offset of type array on array diff --git a/ext/standard/tests/array/max_int_float_optimisation.phpt b/ext/standard/tests/array/max_int_float_optimisation.phpt new file mode 100644 index 0000000000000..0f5df35d12a7c --- /dev/null +++ b/ext/standard/tests/array/max_int_float_optimisation.phpt @@ -0,0 +1,61 @@ +--TEST-- +Check max() optimisation for int and float types +--SKIPIF-- + +--FILE-- + +--EXPECT-- +Start as int optimisation: +int(10) +int(10) +int(10) +int(10) +int(10) +int(10) +string(2) "15" +Check that int not representable as float works: +int(-9223372036854775807) +float(1.8446744073709552E+19) +float(INF) +Start as float optimisation: +float(10.5) +float(10.5) +float(10.5) +float(10.5) +float(10.5) +float(10.5) +string(4) "15.5" +Check that int not representable as float works: +int(-9223372036854775807) +float(1.8446744073709552E+19) +float(INF) diff --git a/ext/standard/tests/array/min_int_float_optimisation.phpt b/ext/standard/tests/array/min_int_float_optimisation.phpt new file mode 100644 index 0000000000000..e383b833694c7 --- /dev/null +++ b/ext/standard/tests/array/min_int_float_optimisation.phpt @@ -0,0 +1,61 @@ +--TEST-- +Check min() optimisation for int and float types +--SKIPIF-- + +--FILE-- + +--EXPECT-- +Start as int optimisation: +int(2) +int(2) +int(2) +int(2) +int(2) +int(2) +string(1) "1" +Check that int not representable as float works: +int(9223372036854775806) +float(-1.8446744073709552E+19) +int(9223372036854775806) +Start as float optimisation: +float(2.5) +float(2.5) +float(2.5) +float(2.5) +float(2.5) +float(2.5) +string(3) "1.5" +Check that int not representable as float works: +int(9223372036854775806) +float(-1.8446744073709552E+19) +int(9223372036854775806) diff --git a/ext/standard/tests/assert/assert_closures_multiple.phpt b/ext/standard/tests/assert/assert_closures_multiple.phpt new file mode 100644 index 0000000000000..e63c355299f39 --- /dev/null +++ b/ext/standard/tests/assert/assert_closures_multiple.phpt @@ -0,0 +1,35 @@ +--TEST-- +assert() asserting multiple with callback +--INI-- +assert.active = 1 +assert.warning = 1 +assert.bail = 0 +assert.exception=1 +--FILE-- + +DONE +--EXPECT-- +DONE diff --git a/ext/standard/tests/assert/assert_warnings.phpt b/ext/standard/tests/assert/assert_warnings.phpt new file mode 100644 index 0000000000000..804575dbc1daf --- /dev/null +++ b/ext/standard/tests/assert/assert_warnings.phpt @@ -0,0 +1,13 @@ +--TEST-- +assert(): warnings with no descriptions. +--INI-- +assert.active = 1 +assert.warning = 1 +assert.bail = 0 +assert.exception=0 +--FILE-- + +--EXPECTF-- +Warning: assert(): Assertion failed in %s on line %d diff --git a/ext/standard/tests/general_functions/http_response_code.phpt b/ext/standard/tests/general_functions/http_response_code.phpt index ab290c3cefe19..8f8b87511a3b9 100644 --- a/ext/standard/tests/general_functions/http_response_code.phpt +++ b/ext/standard/tests/general_functions/http_response_code.phpt @@ -21,8 +21,17 @@ var_dump( // Get the new response code http_response_code() ); +echo "Now we've sent the headers\n"; +var_dump( + // This should fail + http_response_code(500) +); ?> ---EXPECT-- +--EXPECTF-- bool(false) bool(true) int(201) +Now we've sent the headers + +Warning: http_response_code(): Cannot set response code - headers already sent (output started at %s:%d) in %s on line %d +bool(false) diff --git a/ext/standard/tests/http/bug67430.phpt b/ext/standard/tests/http/bug67430.phpt index e72e419fc02ac..1a515537e6609 100644 --- a/ext/standard/tests/http/bug67430.phpt +++ b/ext/standard/tests/http/bug67430.phpt @@ -41,7 +41,7 @@ POST / HTTP/1.1 Host: %s:%d Connection: close -GET /foo HTTP/1.1 +POST /foo HTTP/1.1 Host: %s:%d Connection: close diff --git a/ext/standard/tests/http/gh11274.phpt b/ext/standard/tests/http/gh11274.phpt new file mode 100644 index 0000000000000..fc125bfc494cf --- /dev/null +++ b/ext/standard/tests/http/gh11274.phpt @@ -0,0 +1,62 @@ +--TEST-- +GH-11274 (POST/PATCH request via file_get_contents + stream_context_create switches to GET after a HTTP 308 redirect) +--INI-- +allow_url_fopen=1 +--CONFLICTS-- +server +--FILE-- + ['method' => 'POST', 'header' => 'Content-type: application/x-www-form-urlencoded', 'content' => http_build_query(['hello' => 'world'])]])); + echo file_get_contents("http://" . PHP_CLI_SERVER_ADDRESS . "/test$suffix", false, stream_context_create(['http' => ['method' => 'PATCH', 'header' => 'Content-type: application/x-www-form-urlencoded', 'content' => http_build_query(['hello' => 'world'])]])); + echo file_get_contents("http://" . PHP_CLI_SERVER_ADDRESS . "/test/$suffix", false, stream_context_create(['http' => ['method' => 'POST', 'header' => 'Content-type: application/x-www-form-urlencoded', 'content' => http_build_query(['hello' => 'world'])]])); + echo file_get_contents("http://" . PHP_CLI_SERVER_ADDRESS . "/test/$suffix", false, stream_context_create(['http' => ['method' => 'PATCH', 'header' => 'Content-type: application/x-www-form-urlencoded', 'content' => http_build_query(['hello' => 'world'])]])); +} +?> +--EXPECT-- +-- Testing unredirected request -- +method: POST; body: hello=world +method: PATCH; body: hello=world +method: POST; body: hello=world +method: PATCH; body: hello=world +-- Testing redirect status code 301 -- +method: GET; body: +method: GET; body: +method: GET; body: +method: GET; body: +-- Testing redirect status code 302 -- +method: GET; body: +method: GET; body: +method: GET; body: +method: GET; body: +-- Testing redirect status code 307 -- +method: POST; body: hello=world +method: PATCH; body: hello=world +method: POST; body: hello=world +method: PATCH; body: hello=world +-- Testing redirect status code 308 -- +method: POST; body: hello=world +method: PATCH; body: hello=world +method: POST; body: hello=world +method: PATCH; body: hello=world diff --git a/ext/standard/tests/http/gh8641.phpt b/ext/standard/tests/http/gh8641.phpt new file mode 100644 index 0000000000000..9ccedc443dfec --- /dev/null +++ b/ext/standard/tests/http/gh8641.phpt @@ -0,0 +1,34 @@ +--TEST-- +GH-8641 ([Stream] STREAM_NOTIFY_COMPLETED over HTTP never emitted) +--SKIPIF-- + +--INI-- +allow_url_fopen=1 +--FILE-- + "stream_notification_callback")); + +$responses = array( + "data://text/plain,HTTP/1.0 200 Ok\r\nContent-Length: 11\r\n\r\nHello world", +); + +['pid' => $pid, 'uri' => $uri] = http_server($responses, $output); + +$f = file_get_contents($uri, 0, $ctx); + +http_server_kill($pid); +var_dump($f); +?> +--EXPECTF-- +8 11 11 +string(11) "Hello world" diff --git a/ext/standard/tests/network/socket_get_status_basic.phpt b/ext/standard/tests/network/socket_get_status_basic.phpt index 32a8d9ce6fbb4..215d9551e0a5d 100644 --- a/ext/standard/tests/network/socket_get_status_basic.phpt +++ b/ext/standard/tests/network/socket_get_status_basic.phpt @@ -17,7 +17,7 @@ fclose($server); ?> --EXPECTF-- -array(7) { +array(8) { ["timed_out"]=> bool(false) ["blocked"]=> @@ -32,4 +32,6 @@ array(7) { int(0) ["seekable"]=> bool(false) + ["uri"]=> + string(21) "tcp://127.0.0.1:%d" } diff --git a/ext/standard/tests/streams/gh10031.phpt b/ext/standard/tests/streams/gh10031.phpt new file mode 100644 index 0000000000000..aa3576dab51ad --- /dev/null +++ b/ext/standard/tests/streams/gh10031.phpt @@ -0,0 +1,52 @@ +--TEST-- +GH-10031 ([Stream] STREAM_NOTIFY_PROGRESS over HTTP emitted irregularly for last chunk of data) +--SKIPIF-- + +--INI-- +allow_url_fopen=1 +--CONFLICTS-- +server +--FILE-- + ['ignore_errors' => true,]]); +$lastBytesTransferred = 0; +stream_context_set_params($context, ['notification' => function ($code, $s, $m, $mc, $bytes_transferred, $bytes_max) +use (&$lastBytesTransferred) { + if ($code === STREAM_NOTIFY_FILE_SIZE_IS) echo "expected filesize=$bytes_max".PHP_EOL; + $lastBytesTransferred = $bytes_transferred; + @ob_flush(); +}]); + +$get = file_get_contents("http://".PHP_CLI_SERVER_ADDRESS, false, $context); + +echo "got filesize=" . strlen($get) . PHP_EOL; +var_dump($lastBytesTransferred); + +?> +--EXPECT-- +expected filesize=1000 +got filesize=1000 +int(1000) diff --git a/ext/standard/tests/streams/gh11418.phpt b/ext/standard/tests/streams/gh11418.phpt new file mode 100644 index 0000000000000..99f70ff4c6b24 --- /dev/null +++ b/ext/standard/tests/streams/gh11418.phpt @@ -0,0 +1,36 @@ +--TEST-- +GH-11418: fgets on a redis socket connection fails on PHP 8.3 +--FILE-- +run($clientCode, $serverCode); + +?> +--EXPECT-- +Hi Hello World diff --git a/ext/standard/tests/streams/stream_get_meta_data_socket_basic.phpt b/ext/standard/tests/streams/stream_get_meta_data_socket_basic.phpt index 66658bd5d0c33..b0a82616a9145 100644 --- a/ext/standard/tests/streams/stream_get_meta_data_socket_basic.phpt +++ b/ext/standard/tests/streams/stream_get_meta_data_socket_basic.phpt @@ -9,7 +9,7 @@ fclose($tcp_socket); ?> --EXPECTF-- -array(7) { +array(8) { ["timed_out"]=> bool(false) ["blocked"]=> @@ -24,4 +24,6 @@ array(7) { int(0) ["seekable"]=> bool(false) + ["uri"]=> + string(21) "tcp://127.0.0.1:31330" } diff --git a/ext/standard/tests/streams/stream_get_meta_data_socket_variation1.phpt b/ext/standard/tests/streams/stream_get_meta_data_socket_variation1.phpt index 9db45e8c2f588..0601bd194ea16 100644 --- a/ext/standard/tests/streams/stream_get_meta_data_socket_variation1.phpt +++ b/ext/standard/tests/streams/stream_get_meta_data_socket_variation1.phpt @@ -38,7 +38,7 @@ var_dump(stream_get_meta_data($client)); ?> --EXPECTF-- Write some data: -array(7) { +array(8) { ["timed_out"]=> bool(false) ["blocked"]=> @@ -53,11 +53,13 @@ array(7) { int(0) ["seekable"]=> bool(false) + ["uri"]=> + string(21) "tcp://127.0.0.1:31331" } Read a line from the client, causing data to be buffered: -array(7) { +array(8) { ["timed_out"]=> bool(false) ["blocked"]=> @@ -72,11 +74,13 @@ array(7) { int(15) ["seekable"]=> bool(false) + ["uri"]=> + string(21) "tcp://127.0.0.1:31331" } Read 3 bytes of data from the client: -array(7) { +array(8) { ["timed_out"]=> bool(false) ["blocked"]=> @@ -91,11 +95,13 @@ array(7) { int(12) ["seekable"]=> bool(false) + ["uri"]=> + string(21) "tcp://127.0.0.1:31331" } Close the server side socket and read the remaining data from the client: -array(7) { +array(8) { ["timed_out"]=> bool(false) ["blocked"]=> @@ -110,4 +116,6 @@ array(7) { int(0) ["seekable"]=> bool(false) + ["uri"]=> + string(21) "tcp://127.0.0.1:31331" } diff --git a/ext/standard/tests/streams/stream_get_meta_data_socket_variation2.phpt b/ext/standard/tests/streams/stream_get_meta_data_socket_variation2.phpt index f6e6504536eeb..cd1ba6b41dd80 100644 --- a/ext/standard/tests/streams/stream_get_meta_data_socket_variation2.phpt +++ b/ext/standard/tests/streams/stream_get_meta_data_socket_variation2.phpt @@ -36,7 +36,7 @@ fclose($server); ?> --EXPECTF-- -array(7) { +array(8) { ["timed_out"]=> bool(false) ["blocked"]=> @@ -51,11 +51,13 @@ array(7) { int(0) ["seekable"]=> bool(false) + ["uri"]=> + string(21) "tcp://127.0.0.1:31332" } Set a timeout on the client and attempt a read: -array(7) { +array(8) { ["timed_out"]=> bool(true) ["blocked"]=> @@ -70,11 +72,13 @@ array(7) { int(0) ["seekable"]=> bool(false) + ["uri"]=> + string(21) "tcp://127.0.0.1:31332" } Write some data from the server: -array(7) { +array(8) { ["timed_out"]=> bool(true) ["blocked"]=> @@ -89,11 +93,13 @@ array(7) { int(0) ["seekable"]=> bool(false) + ["uri"]=> + string(21) "tcp://127.0.0.1:31332" } Read some data from the client: -array(7) { +array(8) { ["timed_out"]=> bool(false) ["blocked"]=> @@ -108,4 +114,6 @@ array(7) { int(0) ["seekable"]=> bool(false) + ["uri"]=> + string(21) "tcp://127.0.0.1:31332" } diff --git a/ext/standard/tests/streams/stream_get_meta_data_socket_variation3.phpt b/ext/standard/tests/streams/stream_get_meta_data_socket_variation3.phpt index ef1a22a3a1637..4413312b7eb46 100644 --- a/ext/standard/tests/streams/stream_get_meta_data_socket_variation3.phpt +++ b/ext/standard/tests/streams/stream_get_meta_data_socket_variation3.phpt @@ -31,7 +31,7 @@ fclose($server); ?> --EXPECTF-- -array(7) { +array(8) { ["timed_out"]=> bool(false) ["blocked"]=> @@ -46,12 +46,14 @@ array(7) { int(0) ["seekable"]=> bool(false) + ["uri"]=> + string(21) "tcp://127.0.0.1:31333" } Set blocking to false: bool(true) -array(7) { +array(8) { ["timed_out"]=> bool(false) ["blocked"]=> @@ -66,12 +68,14 @@ array(7) { int(0) ["seekable"]=> bool(false) + ["uri"]=> + string(21) "tcp://127.0.0.1:31333" } Set blocking to true: bool(true) -array(7) { +array(8) { ["timed_out"]=> bool(false) ["blocked"]=> @@ -86,4 +90,6 @@ array(7) { int(0) ["seekable"]=> bool(false) + ["uri"]=> + string(21) "tcp://127.0.0.1:31333" } diff --git a/ext/standard/tests/streams/stream_get_meta_data_socket_variation4.phpt b/ext/standard/tests/streams/stream_get_meta_data_socket_variation4.phpt index cd83b1342c7bc..14e1a6b38769b 100644 --- a/ext/standard/tests/streams/stream_get_meta_data_socket_variation4.phpt +++ b/ext/standard/tests/streams/stream_get_meta_data_socket_variation4.phpt @@ -36,7 +36,7 @@ fclose($client); ?> --EXPECTF-- Write some data: -array(7) { +array(8) { ["timed_out"]=> bool(false) ["blocked"]=> @@ -51,11 +51,13 @@ array(7) { int(%i) ["seekable"]=> bool(false) + ["uri"]=> + string(21) "tcp://127.0.0.1:31334" } Read a line from the client: -array(7) { +array(8) { ["timed_out"]=> bool(false) ["blocked"]=> @@ -70,11 +72,13 @@ array(7) { int(%i) ["seekable"]=> bool(false) + ["uri"]=> + string(21) "tcp://127.0.0.1:31334" } Close the server side socket and read the remaining data from the client: -array(7) { +array(8) { ["timed_out"]=> bool(false) ["blocked"]=> @@ -89,4 +93,6 @@ array(7) { int(%i) ["seekable"]=> bool(false) + ["uri"]=> + string(21) "tcp://127.0.0.1:31334" } diff --git a/ext/standard/type.c b/ext/standard/type.c index a564446bd8ba6..6cd72fc744ca4 100644 --- a/ext/standard/type.c +++ b/ext/standard/type.c @@ -100,31 +100,31 @@ PHP_FUNCTION(settype) } else { ptr = Z_REFVAL_P(var); } - if (zend_string_equals_literal_ci(type, "integer")) { + if (zend_string_equals_ci(type, ZSTR_KNOWN(ZEND_STR_INTEGER))) { convert_to_long(ptr); - } else if (zend_string_equals_literal_ci(type, "int")) { + } else if (zend_string_equals_ci(type, ZSTR_KNOWN(ZEND_STR_INT))) { convert_to_long(ptr); - } else if (zend_string_equals_literal_ci(type, "float")) { + } else if (zend_string_equals_ci(type, ZSTR_KNOWN(ZEND_STR_FLOAT))) { convert_to_double(ptr); - } else if (zend_string_equals_literal_ci(type, "double")) { /* deprecated */ + } else if (zend_string_equals_ci(type, ZSTR_KNOWN(ZEND_STR_DOUBLE))) { /* deprecated */ convert_to_double(ptr); - } else if (zend_string_equals_literal_ci(type, "string")) { + } else if (zend_string_equals_ci(type, ZSTR_KNOWN(ZEND_STR_STRING))) { convert_to_string(ptr); - } else if (zend_string_equals_literal_ci(type, "array")) { + } else if (zend_string_equals_ci(type, ZSTR_KNOWN(ZEND_STR_ARRAY))) { convert_to_array(ptr); - } else if (zend_string_equals_literal_ci(type, "object")) { + } else if (zend_string_equals_ci(type, ZSTR_KNOWN(ZEND_STR_OBJECT))) { convert_to_object(ptr); - } else if (zend_string_equals_literal_ci(type, "bool")) { + } else if (zend_string_equals_ci(type, ZSTR_KNOWN(ZEND_STR_BOOL))) { convert_to_boolean(ptr); - } else if (zend_string_equals_literal_ci(type, "boolean")) { + } else if (zend_string_equals_ci(type, ZSTR_KNOWN(ZEND_STR_BOOLEAN))) { convert_to_boolean(ptr); - } else if (zend_string_equals_literal_ci(type, "null")) { + } else if (zend_string_equals_ci(type, ZSTR_KNOWN(ZEND_STR_NULL_LOWERCASE))) { convert_to_null(ptr); } else { if (ptr == &tmp) { zval_ptr_dtor(&tmp); } - if (zend_string_equals_literal_ci(type, "resource")) { + if (zend_string_equals_ci(type, ZSTR_KNOWN(ZEND_STR_RESOURCE))) { zend_value_error("Cannot convert to resource type"); } else { zend_argument_value_error(2, "must be a valid type"); diff --git a/ext/xml/tests/bug26614_libxml_gte2_11.phpt b/ext/xml/tests/bug26614_libxml_gte2_11.phpt new file mode 100644 index 0000000000000..9a81b67686d14 --- /dev/null +++ b/ext/xml/tests/bug26614_libxml_gte2_11.phpt @@ -0,0 +1,95 @@ +--TEST-- +Bug #26614 (CDATA sections skipped on line count) +--EXTENSIONS-- +xml +--SKIPIF-- += 2.11'); +?> +--FILE-- + + + +'; + +// Case 2: replace some characters so that we get comments instead +$xmls["Comment"] =' + + +'; + +// Case 3: replace even more characters so that only textual data is left +$xmls["Text"] =' + +-!-- ATA[ +multi +line +CDATA +block +--- +'; + +function startElement($parser, $name, $attrs) { + printf("<$name> at line %d, col %d (byte %d)\n", + xml_get_current_line_number($parser), + xml_get_current_column_number($parser), + xml_get_current_byte_index($parser)); +} + +function endElement($parser, $name) { + printf(" at line %d, col %d (byte %d)\n", + xml_get_current_line_number($parser), + xml_get_current_column_number($parser), + xml_get_current_byte_index($parser)); +} + +function characterData($parser, $data) { + // dummy +} + +foreach ($xmls as $desc => $xml) { + echo "$desc\n"; + $xml_parser = xml_parser_create(); + xml_set_element_handler($xml_parser, "startElement", "endElement"); + xml_set_character_data_handler($xml_parser, "characterData"); + if (!xml_parse($xml_parser, $xml, true)) + echo "Error: ".xml_error_string(xml_get_error_code($xml_parser))."\n"; + xml_parser_free($xml_parser); +} +?> +--EXPECTF-- +CDATA + at line 2, col %d (byte 50) + at line 9, col %d (byte 96) +Comment + at line 2, col %d (byte 50) + at line 9, col %d (byte 96) +Text + at line 2, col %d (byte 50) + at line 9, col %d (byte 96) diff --git a/ext/xml/tests/bug26614_libxml.phpt b/ext/xml/tests/bug26614_libxml_pre2_11.phpt similarity index 96% rename from ext/xml/tests/bug26614_libxml.phpt rename to ext/xml/tests/bug26614_libxml_pre2_11.phpt index 6acf2c44b2a66..c581a08e9b8fb 100644 --- a/ext/xml/tests/bug26614_libxml.phpt +++ b/ext/xml/tests/bug26614_libxml_pre2_11.phpt @@ -5,6 +5,7 @@ xml --SKIPIF-- = 21100) die('skip libxml2 test variant for version < 2.11'); ?> --FILE-- lastwasopen) { zval *myval; /* check if the current tag already has a value - if yes append to that! */ - if ((myval = zend_hash_str_find(Z_ARRVAL_P(parser->ctag), "value", sizeof("value") - 1))) { + if ((myval = zend_hash_find(Z_ARRVAL_P(parser->ctag), ZSTR_KNOWN(ZEND_STR_VALUE)))) { size_t newlen = Z_STRLEN_P(myval) + ZSTR_LEN(decoded_value); Z_STR_P(myval) = zend_string_extend(Z_STR_P(myval), newlen, 0); strncpy(Z_STRVAL_P(myval) + Z_STRLEN_P(myval) - ZSTR_LEN(decoded_value), @@ -781,7 +781,7 @@ void _xml_characterDataHandler(void *userData, const XML_Char *s, int len) ZEND_HASH_REVERSE_FOREACH_VAL(Z_ARRVAL(parser->data), curtag) { if ((mytype = zend_hash_str_find(Z_ARRVAL_P(curtag),"type", sizeof("type") - 1))) { if (zend_string_equals_literal(Z_STR_P(mytype), "cdata")) { - if ((myval = zend_hash_str_find(Z_ARRVAL_P(curtag), "value", sizeof("value") - 1))) { + if ((myval = zend_hash_find(Z_ARRVAL_P(curtag), ZSTR_KNOWN(ZEND_STR_VALUE)))) { size_t newlen = Z_STRLEN_P(myval) + ZSTR_LEN(decoded_value); Z_STR_P(myval) = zend_string_extend(Z_STR_P(myval), newlen, 0); strncpy(Z_STRVAL_P(myval) + Z_STRLEN_P(myval) - ZSTR_LEN(decoded_value), diff --git a/ext/xmlreader/php_xmlreader.stub.php b/ext/xmlreader/php_xmlreader.stub.php index ed4cc393e27bb..26cbf69a2f0df 100644 --- a/ext/xmlreader/php_xmlreader.stub.php +++ b/ext/xmlreader/php_xmlreader.stub.php @@ -149,8 +149,8 @@ class XMLReader public string $xmlLang; - /** @return bool */ - public function close() {} // TODO make the return type void + /** @return true */ + public function close() {} // TODO make return type void /** @tentative-return-type */ public function getAttribute(string $name): ?string {} diff --git a/ext/xmlreader/php_xmlreader_arginfo.h b/ext/xmlreader/php_xmlreader_arginfo.h index c80006570ccac..7556579d0ee30 100644 --- a/ext/xmlreader/php_xmlreader_arginfo.h +++ b/ext/xmlreader/php_xmlreader_arginfo.h @@ -1,5 +1,5 @@ /* This is a generated file, edit the .stub.php file instead. - * Stub hash: 0102030bdd0ca8806bbd881ed3f6788d4b18c462 */ + * Stub hash: 4751b68b857ffbf53cab6d1aa88fe8f6120d4fc6 */ ZEND_BEGIN_ARG_INFO_EX(arginfo_class_XMLReader_close, 0, 0, 0) ZEND_END_ARG_INFO() diff --git a/ext/xsl/php_xsl.stub.php b/ext/xsl/php_xsl.stub.php index 140c498523b42..52300c019ecc7 100644 --- a/ext/xsl/php_xsl.stub.php +++ b/ext/xsl/php_xsl.stub.php @@ -110,8 +110,8 @@ public function hasExsltSupport(): bool {} /** @tentative-return-type */ public function registerPHPFunctions(array|string|null $functions = null): void {} - /** @return bool */ - public function setProfiling(?string $filename) {} // TODO make the return type void + /** @return true */ + public function setProfiling(?string $filename) {} // TODO make return type void /** @tentative-return-type */ public function setSecurityPrefs(int $preferences): int {} diff --git a/ext/xsl/php_xsl_arginfo.h b/ext/xsl/php_xsl_arginfo.h index 546dc3229efa6..aabf6629cc17f 100644 --- a/ext/xsl/php_xsl_arginfo.h +++ b/ext/xsl/php_xsl_arginfo.h @@ -1,5 +1,5 @@ /* This is a generated file, edit the .stub.php file instead. - * Stub hash: a0615bef7b2211570d9da95a31857832a06360dd */ + * Stub hash: 7d747e7b5989c18169e67d9a9d70256583fffd8e */ ZEND_BEGIN_ARG_WITH_TENTATIVE_RETURN_TYPE_INFO_EX(arginfo_class_XSLTProcessor_importStylesheet, 0, 1, _IS_BOOL, 0) ZEND_ARG_TYPE_INFO(0, stylesheet, IS_OBJECT, 0) diff --git a/ext/xsl/xsltprocessor.c b/ext/xsl/xsltprocessor.c index 7f3d1e0e42170..5f1482ea31b67 100644 --- a/ext/xsl/xsltprocessor.c +++ b/ext/xsl/xsltprocessor.c @@ -289,7 +289,7 @@ static void xsl_ext_function_php(xmlXPathParserContextPtr ctxt, int nargs, int t zval_ptr_dtor(&retval); } zend_string_release_ex(callable, 0); - zval_ptr_dtor(&handler); + zval_ptr_dtor_nogc(&handler); if (fci.param_count > 0) { for (i = 0; i < nargs - 1; i++) { zval_ptr_dtor(&args[i]); diff --git a/ext/zend_test/tests/gh11423.phpt b/ext/zend_test/tests/gh11423.phpt new file mode 100644 index 0000000000000..3c153229b3976 --- /dev/null +++ b/ext/zend_test/tests/gh11423.phpt @@ -0,0 +1,29 @@ +--TEST-- +GH-11423 (internal constants have their namespace lowercased) +--EXTENSIONS-- +zend_test +--FILE-- +getConstants()); + +define('NS1\ns2\Const1','value1'); +var_dump(get_defined_constants(true)["user"]); + +?> +--EXPECT-- +array(4) { + ["ZEND_TEST_DEPRECATED"]=> + int(42) + ["ZEND_CONSTANT_A"]=> + string(6) "global" + ["zendtestns2\ZEND_CONSTANT_A"]=> + string(10) "namespaced" + ["zendtestns2\zendsubns\ZEND_CONSTANT_A"]=> + string(10) "namespaced" +} +array(1) { + ["ns1\ns2\Const1"]=> + string(6) "value1" +} diff --git a/ext/zend_test/tests/observer_basic_01.phpt b/ext/zend_test/tests/observer_basic_01.phpt index b208ae2341e85..4f6dea29ecc7f 100644 --- a/ext/zend_test/tests/observer_basic_01.phpt +++ b/ext/zend_test/tests/observer_basic_01.phpt @@ -5,6 +5,7 @@ zend_test --INI-- zend_test.observer.enabled=1 zend_test.observer.observe_all=1 +opcache.optimization_level=0x7FFFBFFF & ~0x0080 --FILE-- innerstream, PHP_STREAM_IS_MEMORY)) { zend_off_t pos = php_stream_tell(ts->innerstream); - + if (pos + count >= ts->smax) { zend_string *membuf = php_stream_memory_get_buffer(ts->innerstream); php_stream *file = php_stream_fopen_temporary_file(ts->tmpdir, "php", NULL); @@ -614,6 +614,8 @@ static php_stream * php_stream_url_wrap_rfc2397(php_stream_wrapper *wrapper, con int base64 = 0; zend_string *base64_comma = NULL; + ZEND_ASSERT(mode); + ZVAL_NULL(&meta); if (memcmp(path, "data:", 5)) { return NULL; @@ -729,7 +731,7 @@ static php_stream * php_stream_url_wrap_rfc2397(php_stream_wrapper *wrapper, con stream->ops = &php_stream_rfc2397_ops; ts = (php_stream_temp_data*)stream->abstract; assert(ts != NULL); - ts->mode = mode && mode[0] == 'r' && mode[1] != '+' ? TEMP_STREAM_READONLY : 0; + ts->mode = mode[0] == 'r' && mode[1] != '+' ? TEMP_STREAM_READONLY : 0; ZVAL_COPY_VALUE(&ts->meta, &meta); } if (base64_comma) { diff --git a/main/streams/php_stream_context.h b/main/streams/php_stream_context.h index c98f5420ac3e6..d4ebe29bc162e 100644 --- a/main/streams/php_stream_context.h +++ b/main/streams/php_stream_context.h @@ -94,6 +94,10 @@ END_EXTERN_C() php_stream_notification_notify((context), PHP_STREAM_NOTIFY_PROGRESS, PHP_STREAM_NOTIFY_SEVERITY_INFO, \ NULL, 0, (bsofar), (bmax), NULL); } } while(0) +#define php_stream_notify_completed(context) do { if ((context) && (context)->notifier) { \ + php_stream_notification_notify((context), PHP_STREAM_NOTIFY_COMPLETED, PHP_STREAM_NOTIFY_SEVERITY_INFO, \ + NULL, 0, (context)->notifier->progress, (context)->notifier->progress_max, NULL); } } while(0) + #define php_stream_notify_progress_init(context, sofar, bmax) do { if ((context) && (context)->notifier) { \ (context)->notifier->progress = (sofar); \ (context)->notifier->progress_max = (bmax); \ diff --git a/main/streams/streams.c b/main/streams/streams.c index f655faef10cbf..2a5178e294266 100644 --- a/main/streams/streams.c +++ b/main/streams/streams.c @@ -542,6 +542,9 @@ PHPAPI zend_result _php_stream_fill_read_buffer(php_stream *stream, size_t size) { /* allocate/fill the buffer */ + zend_result retval; + bool old_eof = stream->eof; + if (stream->readfilters.head) { size_t to_read_now = MIN(size, stream->chunk_size); char *chunk_buf; @@ -562,7 +565,8 @@ PHPAPI zend_result _php_stream_fill_read_buffer(php_stream *stream, size_t size) justread = stream->ops->read(stream, chunk_buf, stream->chunk_size); if (justread < 0 && stream->writepos == stream->readpos) { efree(chunk_buf); - return FAILURE; + retval = FAILURE; + goto out_check_eof; } else if (justread > 0) { bucket = php_stream_bucket_new(stream, chunk_buf, justread, 0, 0); @@ -633,7 +637,8 @@ PHPAPI zend_result _php_stream_fill_read_buffer(php_stream *stream, size_t size) * further reads should fail. */ stream->eof = 1; efree(chunk_buf); - return FAILURE; + retval = FAILURE; + goto out_is_eof; } if (justread <= 0) { @@ -643,7 +648,6 @@ PHPAPI zend_result _php_stream_fill_read_buffer(php_stream *stream, size_t size) efree(chunk_buf); return SUCCESS; - } else { /* is there enough data in the buffer ? */ if (stream->writepos - stream->readpos < (zend_off_t)size) { @@ -670,18 +674,27 @@ PHPAPI zend_result _php_stream_fill_read_buffer(php_stream *stream, size_t size) stream->readbuflen - stream->writepos ); if (justread < 0) { - return FAILURE; + retval = FAILURE; + goto out_check_eof; } stream->writepos += justread; + retval = SUCCESS; + goto out_check_eof; } return SUCCESS; } + +out_check_eof: + if (old_eof != stream->eof) { +out_is_eof: + php_stream_notify_completed(PHP_STREAM_CONTEXT(stream)); + } + return retval; } PHPAPI ssize_t _php_stream_read(php_stream *stream, char *buf, size_t size) { - ssize_t toread = 0; - stream->didread = 0; + ssize_t toread = 0, didread = 0; while (size > 0) { @@ -700,7 +713,8 @@ PHPAPI ssize_t _php_stream_read(php_stream *stream, char *buf, size_t size) stream->readpos += toread; size -= toread; buf += toread; - stream->didread += toread; + didread += toread; + stream->has_buffered_data = 1; } /* ignore eof here; the underlying state might have changed */ @@ -713,14 +727,14 @@ PHPAPI ssize_t _php_stream_read(php_stream *stream, char *buf, size_t size) if (toread < 0) { /* Report an error if the read failed and we did not read any data * before that. Otherwise return the data we did read. */ - if (stream->didread == 0) { + if (didread == 0) { return toread; } break; } } else { if (php_stream_fill_read_buffer(stream, size) != SUCCESS) { - if (stream->didread == 0) { + if (didread == 0) { return -1; } break; @@ -737,9 +751,10 @@ PHPAPI ssize_t _php_stream_read(php_stream *stream, char *buf, size_t size) } } if (toread > 0) { - stream->didread += toread; + didread += toread; buf += toread; size -= toread; + stream->has_buffered_data = 1; } else { /* EOF, or temporary end of data (for non-blocking mode). */ break; @@ -753,11 +768,12 @@ PHPAPI ssize_t _php_stream_read(php_stream *stream, char *buf, size_t size) } } - if (stream->didread > 0) { - stream->position += stream->didread; + if (didread > 0) { + stream->position += didread; + stream->has_buffered_data = 0; } - return stream->didread; + return didread; } /* Like php_stream_read(), but reading into a zend_string buffer. This has some similarity @@ -1124,6 +1140,7 @@ PHPAPI zend_string *php_stream_get_record(php_stream *stream, size_t maxlen, con static ssize_t _php_stream_write_buffer(php_stream *stream, const char *buf, size_t count) { ssize_t didwrite = 0; + ssize_t retval; /* if we have a seekable stream we need to ensure that data is written at the * current stream->position. This means invalidating the read buffer and then @@ -1134,15 +1151,19 @@ static ssize_t _php_stream_write_buffer(php_stream *stream, const char *buf, siz stream->ops->seek(stream, stream->position, SEEK_SET, &stream->position); } + bool old_eof = stream->eof; + while (count > 0) { ssize_t justwrote = stream->ops->write(stream, buf, count); if (justwrote <= 0) { /* If we already successfully wrote some bytes and a write error occurred * later, report the successfully written bytes. */ if (didwrite == 0) { - return justwrote; + retval = justwrote; + goto out; } - return didwrite; + retval = didwrite; + goto out; } buf += justwrote; @@ -1151,7 +1172,13 @@ static ssize_t _php_stream_write_buffer(php_stream *stream, const char *buf, siz stream->position += justwrote; } - return didwrite; + retval = didwrite; + +out: + if (old_eof != stream->eof) { + php_stream_notify_completed(PHP_STREAM_CONTEXT(stream)); + } + return retval; } /* push some data through the write filter chain. @@ -2315,7 +2342,6 @@ PHPAPI php_stream_context *php_stream_context_alloc(void) php_stream_context *context; context = ecalloc(1, sizeof(php_stream_context)); - context->notifier = NULL; array_init(&context->options); context->res = zend_register_resource(context, php_le_stream_context()); diff --git a/main/streams/transports.c b/main/streams/transports.c index 1c9a83be2c882..38850a3b541a4 100644 --- a/main/streams/transports.c +++ b/main/streams/transports.c @@ -59,7 +59,7 @@ PHPAPI php_stream *_php_stream_xport_create(const char *name, size_t namelen, in { php_stream *stream = NULL; php_stream_transport_factory factory = NULL; - const char *p, *protocol = NULL; + const char *p, *protocol, *orig_path = NULL; size_t n = 0; bool failed = false; bool bailout = false; @@ -94,6 +94,7 @@ PHPAPI php_stream *_php_stream_xport_create(const char *name, size_t namelen, in } } + orig_path = name; for (p = name; isalnum((int)*p) || *p == '+' || *p == '-' || *p == '.'; p++) { n++; } @@ -135,6 +136,7 @@ PHPAPI php_stream *_php_stream_xport_create(const char *name, size_t namelen, in if (stream) { zend_try { php_stream_context_set(stream, context); + stream->orig_path = pestrdup(orig_path, persistent_id ? 1 : 0); if ((flags & STREAM_XPORT_SERVER) == 0) { /* client */ diff --git a/main/streams/userspace.c b/main/streams/userspace.c index 33ec6c0990b13..165bd7da3ad98 100644 --- a/main/streams/userspace.c +++ b/main/streams/userspace.c @@ -469,7 +469,6 @@ PHP_FUNCTION(stream_wrapper_register) uwrap->wrapper.wops = &user_stream_wops; uwrap->wrapper.abstract = uwrap; uwrap->wrapper.is_url = ((flags & PHP_STREAM_IS_URL) != 0); - uwrap->resource = NULL; rsrc = zend_register_resource(uwrap, le_protocols); diff --git a/main/streams/xp_socket.c b/main/streams/xp_socket.c index 4ea0dc8e880bf..6c770d77aed95 100644 --- a/main/streams/xp_socket.c +++ b/main/streams/xp_socket.c @@ -168,7 +168,7 @@ static ssize_t php_sockop_read(php_stream *stream, char *buf, size_t count) /* Special handling for blocking read. */ if (sock->is_blocked) { /* Find out if there is any data buffered from the previous read. */ - bool has_buffered_data = stream->didread > 0; + bool has_buffered_data = stream->has_buffered_data; /* No need to wait if there is any data buffered or no timeout. */ bool dont_wait = has_buffered_data || (sock->timeout.tv_sec == 0 && sock->timeout.tv_usec == 0); @@ -966,9 +966,5 @@ PHPAPI php_stream *php_stream_generic_socket_factory(const char *proto, size_t p return NULL; } - if (flags == 0) { - return stream; - } - return stream; } diff --git a/run-tests.php b/run-tests.php index 0d0b683becd79..a112e8f5347f0 100755 --- a/run-tests.php +++ b/run-tests.php @@ -2834,7 +2834,7 @@ function run_test(string $php, $file, array $env): string function error_may_be_retried(string $output): bool { - return preg_match('((timed out)|(connection refused))i', $output) === 1; + return preg_match('((timed out)|(connection refused)|(404: page not found)|(address already in use)|(mailbox already exists))i', $output) === 1; } function expectf_to_regex(?string $wanted): string diff --git a/sapi/cli/php_cli.c b/sapi/cli/php_cli.c index 5e84ae7379cfb..a5e3755be6594 100644 --- a/sapi/cli/php_cli.c +++ b/sapi/cli/php_cli.c @@ -574,7 +574,7 @@ static zend_result cli_seek_file_begin(zend_file_handle *file_handle, char *scri { FILE *fp = VCWD_FOPEN(script_file, "rb"); if (!fp) { - php_printf("Could not open input file: %s\n", script_file); + fprintf(stderr, "Could not open input file: %s\n", script_file); return FAILURE; } @@ -1076,7 +1076,7 @@ static int do_cli(int argc, char **argv) /* {{{ */ if (EG(exception)) { zval rv; - zval *msg = zend_read_property(zend_ce_exception, EG(exception), "message", sizeof("message")-1, 0, &rv); + zval *msg = zend_read_property_ex(zend_ce_exception, EG(exception), ZSTR_KNOWN(ZEND_STR_MESSAGE), /* silent */ false, &rv); zend_printf("Exception: %s\n", Z_STRVAL_P(msg)); zend_object_release(EG(exception)); EG(exception) = NULL; diff --git a/sapi/cli/php_cli_server.c b/sapi/cli/php_cli_server.c index 8ea04137d1229..9c71b835801eb 100644 --- a/sapi/cli/php_cli_server.c +++ b/sapi/cli/php_cli_server.c @@ -361,7 +361,7 @@ static void append_essential_headers(smart_str* buffer, php_cli_server_client *c zval *val; struct timeval tv = {0}; - if (NULL != (val = zend_hash_str_find(&client->request.headers, "host", sizeof("host")-1))) { + if (NULL != (val = zend_hash_find(&client->request.headers, ZSTR_KNOWN(ZEND_STR_HOST)))) { smart_str_appends_ex(buffer, "Host: ", persistent); smart_str_append_ex(buffer, Z_STR_P(val), persistent); smart_str_appends_ex(buffer, "\r\n", persistent); diff --git a/sapi/cli/ps_title.c b/sapi/cli/ps_title.c index 8ff7ef719e17f..8eb14963c682a 100644 --- a/sapi/cli/ps_title.c +++ b/sapi/cli/ps_title.c @@ -169,19 +169,18 @@ char** save_ps_args(int argc, char** argv) end_of_area = argv[i] + strlen(argv[i]); } + if (!is_contiguous_area) { + goto clobber_error; + } + /* * check for contiguous environ strings following argv */ - for (i = 0; is_contiguous_area && (environ[i] != NULL); i++) + for (i = 0; environ[i] != NULL; i++) { - if (end_of_area + 1 != environ[i]) { - is_contiguous_area = false; + if (end_of_area + 1 == environ[i]) { + end_of_area = environ[i] + strlen(environ[i]); } - end_of_area = environ[i] + strlen(environ[i]); - } - - if (!is_contiguous_area) { - goto clobber_error; } ps_buffer = argv[0]; diff --git a/sapi/cli/tests/007.phpt b/sapi/cli/tests/007.phpt index 2de5880a89f74..6f27586e27d96 100644 --- a/sapi/cli/tests/007.phpt +++ b/sapi/cli/tests/007.phpt @@ -45,8 +45,8 @@ string(81) " " -string(33) "Could not open input file: wrong -" +Could not open input file: wrong +NULL string(43) " " Done diff --git a/sapi/cli/tests/008.phpt b/sapi/cli/tests/008.phpt index 5edd40c84fefd..a8b205056300e 100644 --- a/sapi/cli/tests/008.phpt +++ b/sapi/cli/tests/008.phpt @@ -41,6 +41,6 @@ Stack trace: #0 {main} thrown in %s on line %d " -string(33) "Could not open input file: wrong -" +Could not open input file: wrong +NULL Done diff --git a/sapi/cli/tests/011.phpt b/sapi/cli/tests/011.phpt index bd61b260f5a94..d45cb94a08c71 100644 --- a/sapi/cli/tests/011.phpt +++ b/sapi/cli/tests/011.phpt @@ -50,8 +50,8 @@ echo "Done\n"; --EXPECTF-- string(%d) "No syntax errors detected in %s011.test.php " -string(40) "Could not open input file: some.unknown -" +Could not open input file: some.unknown +NULL string(%d) " Parse error: %s expecting %s{%s in %s on line %d Errors parsing %s011.test.php diff --git a/sapi/cli/tests/014.phpt b/sapi/cli/tests/014.phpt index 09f69faa521b3..734eef0c2ce95 100644 --- a/sapi/cli/tests/014.phpt +++ b/sapi/cli/tests/014.phpt @@ -40,6 +40,6 @@ string(1478) "
<?php
$test 
"var"//var
/* test class */
class test {
    private 
$var = array();

    public static function 
foo(Test $arg) {
        echo 
"hello";
        
var_dump($this);
    }
}

$o = new test;
?>
" -string(35) "Could not open input file: unknown -" +Could not open input file: unknown +NULL Done diff --git a/sapi/fpm/fpm/fpm_children.c b/sapi/fpm/fpm/fpm_children.c index 2f8e3dc4d0acc..1c9780e3de3c1 100644 --- a/sapi/fpm/fpm/fpm_children.c +++ b/sapi/fpm/fpm/fpm_children.c @@ -63,10 +63,27 @@ static void fpm_child_free(struct fpm_child_s *child) /* {{{ */ } /* }}} */ +static void fpm_postponed_child_free(struct fpm_event_s *ev, short which, void *arg) +{ + struct fpm_child_s *child = (struct fpm_child_s *) arg; + + if (child->fd_stdout != -1) { + fpm_event_del(&child->ev_stdout); + close(child->fd_stdout); + } + if (child->fd_stderr != -1) { + fpm_event_del(&child->ev_stderr); + close(child->fd_stderr); + } + + fpm_child_free((struct fpm_child_s *) child); +} + static void fpm_child_close(struct fpm_child_s *child, int in_event_loop) /* {{{ */ { if (child->fd_stdout != -1) { if (in_event_loop) { + child->postponed_free = true; fpm_event_fire(&child->ev_stdout); } if (child->fd_stdout != -1) { @@ -76,6 +93,7 @@ static void fpm_child_close(struct fpm_child_s *child, int in_event_loop) /* {{{ if (child->fd_stderr != -1) { if (in_event_loop) { + child->postponed_free = true; fpm_event_fire(&child->ev_stderr); } if (child->fd_stderr != -1) { @@ -83,7 +101,12 @@ static void fpm_child_close(struct fpm_child_s *child, int in_event_loop) /* {{{ } } - fpm_child_free(child); + if (in_event_loop && child->postponed_free) { + fpm_event_set_timer(&child->ev_free, 0, &fpm_postponed_child_free, child); + fpm_event_add(&child->ev_free, 1000); + } else { + fpm_child_free(child); + } } /* }}} */ diff --git a/sapi/fpm/fpm/fpm_children.h b/sapi/fpm/fpm/fpm_children.h index 679c34ba0383e..fe06eb3ba84cd 100644 --- a/sapi/fpm/fpm/fpm_children.h +++ b/sapi/fpm/fpm/fpm_children.h @@ -23,12 +23,13 @@ struct fpm_child_s { struct fpm_child_s *prev, *next; struct timeval started; struct fpm_worker_pool_s *wp; - struct fpm_event_s ev_stdout, ev_stderr; + struct fpm_event_s ev_stdout, ev_stderr, ev_free; int shm_slot_i; int fd_stdout, fd_stderr; void (*tracer)(struct fpm_child_s *); struct timeval slow_logged; - int idle_kill; + bool idle_kill; + bool postponed_free; pid_t pid; int scoreboard_i; struct zlog_stream *log_stream; diff --git a/sapi/fpm/fpm/fpm_conf.c b/sapi/fpm/fpm/fpm_conf.c index 8f7548a402c23..4823c039b059b 100644 --- a/sapi/fpm/fpm/fpm_conf.c +++ b/sapi/fpm/fpm/fpm_conf.c @@ -534,7 +534,7 @@ static char *fpm_conf_set_pm(zval *value, void **config, intptr_t offset) /* {{{ { zend_string *val = Z_STR_P(value); struct fpm_worker_pool_config_s *c = *config; - if (zend_string_equals_literal_ci(val, "static")) { + if (zend_string_equals_ci(val, ZSTR_KNOWN(ZEND_STR_STATIC))) { c->pm = PM_STYLE_STATIC; } else if (zend_string_equals_literal_ci(val, "dynamic")) { c->pm = PM_STYLE_DYNAMIC; diff --git a/sapi/fpm/fpm/fpm_main.c b/sapi/fpm/fpm/fpm_main.c index 7ef0372c08ef1..b3ae2f69cc556 100644 --- a/sapi/fpm/fpm/fpm_main.c +++ b/sapi/fpm/fpm/fpm_main.c @@ -1909,19 +1909,16 @@ consult the installation file that came with this distribution, or visit \n\ } } zend_catch { } zend_end_try(); - /* we want to serve more requests if this is fastcgi - * so cleanup and continue, request shutdown is - * handled later */ - - goto fastcgi_request_done; - } - - fpm_request_executing(); + /* We want to serve more requests if this is fastcgi so cleanup and continue, + * request shutdown is handled later. */ + } else { + fpm_request_executing(); - /* Reset exit status from the previous execution */ - EG(exit_status) = 0; + /* Reset exit status from the previous execution */ + EG(exit_status) = 0; - php_execute_script(&file_handle); + php_execute_script(&file_handle); + } /* Without opcache, or the first time with opcache, the file handle will be placed * in the CG(open_files) list by open_file_for_scanning(). Starting from the second diff --git a/sapi/fpm/fpm/fpm_php.c b/sapi/fpm/fpm/fpm_php.c index 92b189668206e..aadaafbb05630 100644 --- a/sapi/fpm/fpm/fpm_php.c +++ b/sapi/fpm/fpm/fpm_php.c @@ -252,13 +252,13 @@ int fpm_php_limit_extensions(char *path) /* {{{ */ } /* }}} */ -char* fpm_php_get_string_from_table(zend_string *table, char *key) /* {{{ */ +bool fpm_php_is_key_in_table(zend_string *table, const char *key, size_t key_len) /* {{{ */ { - zval *data, *tmp; + zval *data; zend_string *str; - if (!table || !key) { - return NULL; - } + + ZEND_ASSERT(table); + ZEND_ASSERT(key); /* inspired from ext/standard/info.c */ @@ -270,12 +270,12 @@ char* fpm_php_get_string_from_table(zend_string *table, char *key) /* {{{ */ return NULL; } - ZEND_HASH_FOREACH_STR_KEY_VAL(Z_ARRVAL_P(data), str, tmp) { - if (str && !strncmp(ZSTR_VAL(str), key, ZSTR_LEN(str))) { - return Z_STRVAL_P(tmp); + ZEND_HASH_FOREACH_STR_KEY(Z_ARRVAL_P(data), str) { + if (str && zend_string_equals_cstr(str, key, key_len)) { + return true; } } ZEND_HASH_FOREACH_END(); - return NULL; + return false; } /* }}} */ diff --git a/sapi/fpm/fpm/fpm_php.h b/sapi/fpm/fpm/fpm_php.h index 01ed65799170d..d61857c5e0e11 100644 --- a/sapi/fpm/fpm/fpm_php.h +++ b/sapi/fpm/fpm/fpm_php.h @@ -41,6 +41,6 @@ void fpm_php_soft_quit(void); int fpm_php_init_main(void); int fpm_php_apply_defines_ex(struct key_value_s *kv, int mode); int fpm_php_limit_extensions(char *path); -char* fpm_php_get_string_from_table(zend_string *table, char *key); +bool fpm_php_is_key_in_table(zend_string *table, const char *key, size_t key_len); #endif diff --git a/sapi/fpm/fpm/fpm_process_ctl.c b/sapi/fpm/fpm/fpm_process_ctl.c index 48eb0003d4918..7a55d98b046fc 100644 --- a/sapi/fpm/fpm/fpm_process_ctl.c +++ b/sapi/fpm/fpm/fpm_process_ctl.c @@ -318,7 +318,7 @@ static void fpm_pctl_kill_idle_child(struct fpm_child_s *child) /* {{{ */ if (child->idle_kill) { fpm_pctl_kill(child->pid, FPM_PCTL_KILL); } else { - child->idle_kill = 1; + child->idle_kill = true; fpm_pctl_kill(child->pid, FPM_PCTL_QUIT); } } diff --git a/sapi/fpm/fpm/fpm_status.c b/sapi/fpm/fpm/fpm_status.c index e78cbeab1105a..20c5770768a73 100644 --- a/sapi/fpm/fpm/fpm_status.c +++ b/sapi/fpm/fpm/fpm_status.c @@ -13,7 +13,8 @@ #include "fpm_atomic.h" #include "fpm_conf.h" #include "fpm_php.h" -#include +#include "ext/standard/html.h" +#include "ext/json/php_json.h" static char *fpm_status_uri = NULL; static char *fpm_status_ping_uri = NULL; @@ -140,10 +141,10 @@ int fpm_status_handle_request(void) /* {{{ */ struct fpm_scoreboard_proc_s *proc; char *buffer, *time_format, time_buffer[64]; time_t now_epoch; - int full, encode, has_start_time; + int full, has_start_time; + bool encode_html, encode_json; char *short_syntax, *short_post; char *full_pre, *full_syntax, *full_post, *full_separator; - zend_string *_GET_str; if (!SG(request_info).request_uri) { return 0; @@ -168,14 +169,17 @@ int fpm_status_handle_request(void) /* {{{ */ /* STATUS */ if (fpm_status_uri && !strcmp(fpm_status_uri, SG(request_info).request_uri)) { + zend_string *_GET_str; + fpm_request_executing(); /* full status ? */ _GET_str = ZSTR_INIT_LITERAL("_GET", 0); - full = (fpm_php_get_string_from_table(_GET_str, "full") != NULL); + full = fpm_php_is_key_in_table(_GET_str, ZEND_STRL("full")); short_syntax = short_post = NULL; full_separator = full_pre = full_syntax = full_post = NULL; - encode = 0; + encode_html = false; + encode_json = false; has_start_time = 1; scoreboard_p = fpm_scoreboard_get(); @@ -215,10 +219,10 @@ int fpm_status_handle_request(void) /* {{{ */ } /* HTML */ - if (fpm_php_get_string_from_table(_GET_str, "html")) { + if (fpm_php_is_key_in_table(_GET_str, ZEND_STRL("html"))) { sapi_add_header_ex(ZEND_STRL("Content-Type: text/html"), 1, 1); time_format = "%d/%b/%Y:%H:%M:%S %z"; - encode = 1; + encode_html = true; short_syntax = "\n" @@ -284,10 +288,10 @@ int fpm_status_handle_request(void) /* {{{ */ } /* XML */ - } else if (fpm_php_get_string_from_table(_GET_str, "xml")) { + } else if (fpm_php_is_key_in_table(_GET_str, ZEND_STRL("xml"))) { sapi_add_header_ex(ZEND_STRL("Content-Type: text/xml"), 1, 1); time_format = "%s"; - encode = 1; + encode_html = true; short_syntax = "\n" @@ -332,10 +336,12 @@ int fpm_status_handle_request(void) /* {{{ */ } /* JSON */ - } else if (fpm_php_get_string_from_table(_GET_str, "json")) { + } else if (fpm_php_is_key_in_table(_GET_str, ZEND_STRL("json"))) { sapi_add_header_ex(ZEND_STRL("Content-Type: application/json"), 1, 1); time_format = "%s"; + encode_json = true; + short_syntax = "{" "\"pool\":\"%s\"," @@ -379,7 +385,7 @@ int fpm_status_handle_request(void) /* {{{ */ } /* OpenMetrics */ - } else if (fpm_php_get_string_from_table(_GET_str, "openmetrics")) { + } else if (fpm_php_is_key_in_table(_GET_str, ZEND_STRL("openmetrics"))) { sapi_add_header_ex(ZEND_STRL("Content-Type: application/openmetrics-text; version=1.0.0; charset=utf-8"), 1, 1); time_format = "%s"; @@ -549,11 +555,24 @@ int fpm_status_handle_request(void) /* {{{ */ query_string = NULL; tmp_query_string = NULL; if (proc->query_string[0] != '\0') { - if (!encode) { - query_string = proc->query_string; + if (encode_html) { + tmp_query_string = php_escape_html_entities_ex( + (const unsigned char *) proc->query_string, + strlen(proc->query_string), 1, ENT_HTML_IGNORE_ERRORS & ENT_COMPAT, + NULL, /* double_encode */ 1, /* quiet */ 0); + } else if (encode_json) { + tmp_query_string = php_json_encode_string(proc->query_string, + strlen(proc->query_string), PHP_JSON_INVALID_UTF8_IGNORE); } else { - tmp_query_string = php_escape_html_entities_ex((const unsigned char *) proc->query_string, strlen(proc->query_string), 1, ENT_HTML_IGNORE_ERRORS & ENT_COMPAT, NULL, /* double_encode */ 1, /* quiet */ 0); + query_string = proc->query_string; + } + if (tmp_query_string) { query_string = ZSTR_VAL(tmp_query_string); + /* remove quotes around the string */ + if (encode_json && ZSTR_LEN(tmp_query_string) >= 2) { + query_string[ZSTR_LEN(tmp_query_string) - 1] = '\0'; + ++query_string; + } } } diff --git a/sapi/fpm/fpm/fpm_stdio.c b/sapi/fpm/fpm/fpm_stdio.c index a6c0793d9347e..a225d3357dd99 100644 --- a/sapi/fpm/fpm/fpm_stdio.c +++ b/sapi/fpm/fpm/fpm_stdio.c @@ -180,10 +180,7 @@ static void fpm_stdio_child_said(struct fpm_event_s *ev, short which, void *arg) if (!arg) { return; } - child = fpm_child_find((intptr_t) arg); - if (!child) { - return; - } + child = (struct fpm_child_s *) arg; is_stdout = (fd == child->fd_stdout); if (is_stdout) { @@ -276,6 +273,7 @@ static void fpm_stdio_child_said(struct fpm_event_s *ev, short which, void *arg) fpm_event_del(event); + child->postponed_free = true; if (is_stdout) { close(child->fd_stdout); child->fd_stdout = -1; @@ -329,10 +327,10 @@ int fpm_stdio_parent_use_pipes(struct fpm_child_s *child) /* {{{ */ child->fd_stdout = fd_stdout[0]; child->fd_stderr = fd_stderr[0]; - fpm_event_set(&child->ev_stdout, child->fd_stdout, FPM_EV_READ, fpm_stdio_child_said, (void *) (intptr_t) child->pid); + fpm_event_set(&child->ev_stdout, child->fd_stdout, FPM_EV_READ, fpm_stdio_child_said, child); fpm_event_add(&child->ev_stdout, 0); - fpm_event_set(&child->ev_stderr, child->fd_stderr, FPM_EV_READ, fpm_stdio_child_said, (void *) (intptr_t) child->pid); + fpm_event_set(&child->ev_stderr, child->fd_stderr, FPM_EV_READ, fpm_stdio_child_said, child); fpm_event_add(&child->ev_stderr, 0); return 0; } diff --git a/sapi/fpm/tests/bug64539-status-json-encoding.phpt b/sapi/fpm/tests/bug64539-status-json-encoding.phpt new file mode 100644 index 0000000000000..f5c856c11c8a5 --- /dev/null +++ b/sapi/fpm/tests/bug64539-status-json-encoding.phpt @@ -0,0 +1,48 @@ +--TEST-- +FPM: bug64539 - status json format escaping +--SKIPIF-- + +--FILE-- +start(); +$tester->expectLogStartNotices(); +$responses = $tester + ->multiRequest([ + ['query' => 'a=b"c'], + ['uri' => '/status', 'query' => 'full&json', 'delay' => 100000], + ]); +$responses[1]->expectJsonBodyPatternForStatusProcessField('request uri', '\?a=b"c$'); +$tester->terminate(); +$tester->expectLogTerminatingNotices(); +$tester->close(); + +?> +Done +--EXPECT-- +Done +--CLEAN-- + diff --git a/sapi/fpm/tests/log-suppress-output.phpt b/sapi/fpm/tests/log-suppress-output.phpt index 5a5e7bb9544ba..a507180e99227 100644 --- a/sapi/fpm/tests/log-suppress-output.phpt +++ b/sapi/fpm/tests/log-suppress-output.phpt @@ -38,7 +38,7 @@ function doTestCalls(FPM\Tester &$tester, bool $expectSuppressableEntries) $tester->request(query: 'test=output', uri: '/ping')->expectBody('pong', 'text/plain'); $tester->expectAccessLog("'GET /ping?test=output' 200", suppressable: false); - $tester->request(headers: ['X_ERROR' => 1])->expectBody('Not OK'); + $tester->request(headers: ['X_ERROR' => 1])->expectStatus('500 Internal Server Error')->expectBody('Not OK'); $tester->expectAccessLog("'GET /log-suppress-output.src.php' 500", suppressable: false); $tester->request()->expectBody('OK'); @@ -54,8 +54,8 @@ function doTestCalls(FPM\Tester &$tester, bool $expectSuppressableEntries) $src = <<getBody('application/json'); + $data = json_decode($rawData, true); + if (empty($data['processes']) || !is_array($data['processes'])) { + $this->error( + "The body data is not a valid status json containing processes field '$rawData'" + ); + } + foreach ($data['processes'] as $process) { + if (preg_match('|' . $pattern . '|', $process[$fieldName]) !== false) { + return $this; + } + } + + $this->error( + "No field $fieldName matched pattern $pattern for any process in status data '$rawData'" + ); + + return $this; + } + /** * @return Response */ @@ -229,18 +260,22 @@ class Response /** * Print raw body. + * + * @param string $contentType Expect body to have specified content type. */ - public function dumpBody() + public function dumpBody(string $contentType = 'text/html') { - var_dump($this->getBody()); + var_dump($this->getBody($contentType)); } /** * Print raw body. + * + * @param string $contentType Expect body to have specified content type. */ - public function printBody() + public function printBody(string $contentType = 'text/html') { - echo $this->getBody() . "\n"; + echo $this->getBody($contentType) . "\n"; } /** diff --git a/sapi/fpm/tests/tester.inc b/sapi/fpm/tests/tester.inc index 39c964761b282..a3f40ed9bcc40 100644 --- a/sapi/fpm/tests/tester.inc +++ b/sapi/fpm/tests/tester.inc @@ -826,6 +826,10 @@ class Tester $requestData['uri'] ?? null ); + if (isset($requestData['delay'])) { + usleep($requestData['delay']); + } + return [ 'client' => $client, 'requestId' => $client->async_request($params, false), diff --git a/sapi/fuzzer/fuzzer-sapi.c b/sapi/fuzzer/fuzzer-sapi.c index cbb09e08f2730..d0a9b553aa856 100644 --- a/sapi/fuzzer/fuzzer-sapi.c +++ b/sapi/fuzzer/fuzzer-sapi.c @@ -80,7 +80,7 @@ static void send_header(sapi_header_struct *sapi_header, void *server_context) { } -static char* read_cookies() +static char* read_cookies(void) { /* TODO: fuzz these! */ return NULL; @@ -172,7 +172,7 @@ int fuzzer_init_php(const char *extra_ini) return SUCCESS; } -int fuzzer_request_startup() +int fuzzer_request_startup(void) { if (php_request_startup() == FAILURE) { php_module_shutdown(); @@ -188,7 +188,7 @@ int fuzzer_request_startup() return SUCCESS; } -void fuzzer_request_shutdown() +void fuzzer_request_shutdown(void) { zend_try { /* Destroy thrown exceptions. This does not happen as part of request shutdown. */ @@ -207,7 +207,7 @@ void fuzzer_request_shutdown() } /* Set up a dummy stack frame so that exceptions may be thrown. */ -void fuzzer_setup_dummy_frame() +void fuzzer_setup_dummy_frame(void) { static zend_execute_data execute_data; static zend_function func; diff --git a/sapi/litespeed/lsapi_main.c b/sapi/litespeed/lsapi_main.c index 46ad97f2bf335..9c2f4e1344232 100644 --- a/sapi/litespeed/lsapi_main.c +++ b/sapi/litespeed/lsapi_main.c @@ -1347,7 +1347,7 @@ static int cli_main( int argc, char * argv[] ) php_request_shutdown( NULL ); } } else { - php_printf("Could not open input file: %s.\n", *p); + fprintf(stderr, "Could not open input file: %s.\n", *p); } } else { cli_usage(); diff --git a/sapi/phpdbg/phpdbg.c b/sapi/phpdbg/phpdbg.c index ba70244c143f0..17b7513789ee7 100644 --- a/sapi/phpdbg/phpdbg.c +++ b/sapi/phpdbg/phpdbg.c @@ -436,7 +436,7 @@ static zend_always_inline bool phpdbg_is_ignored_opcode(uint8_t opcode) { || opcode == ZEND_DECLARE_CLASS_DELAYED || opcode == ZEND_DECLARE_ANON_CLASS || opcode == ZEND_FAST_RET || opcode == ZEND_TICKS || opcode == ZEND_EXT_STMT || opcode == ZEND_EXT_FCALL_BEGIN || opcode == ZEND_EXT_FCALL_END - || opcode == ZEND_BIND_GLOBAL + || opcode == ZEND_BIND_GLOBAL || opcode == ZEND_BIND_INIT_STATIC_OR_JMP ; } diff --git a/sapi/phpdbg/phpdbg_frame.c b/sapi/phpdbg/phpdbg_frame.c index 644668d8d14e5..ed0d9573436a5 100644 --- a/sapi/phpdbg/phpdbg_frame.c +++ b/sapi/phpdbg/phpdbg_frame.c @@ -171,24 +171,24 @@ static void phpdbg_dump_prototype(zval *tmp) /* {{{ */ { zval *funcname, *class, class_zv, *args, *argstmp; - funcname = zend_hash_str_find(Z_ARRVAL_P(tmp), ZEND_STRL("function")); + funcname = zend_hash_find(Z_ARRVAL_P(tmp), ZSTR_KNOWN(ZEND_STR_FUNCTION)); - if ((class = zend_hash_str_find(Z_ARRVAL_P(tmp), ZEND_STRL("object")))) { + if ((class = zend_hash_find(Z_ARRVAL_P(tmp), ZSTR_KNOWN(ZEND_STR_OBJECT)))) { ZVAL_NEW_STR(&class_zv, Z_OBJCE_P(class)->name); class = &class_zv; } else { - class = zend_hash_str_find(Z_ARRVAL_P(tmp), ZEND_STRL("class")); + class = zend_hash_find(Z_ARRVAL_P(tmp), ZSTR_KNOWN(ZEND_STR_CLASS)); } if (class) { - zval *type = zend_hash_str_find(Z_ARRVAL_P(tmp), ZEND_STRL("type")); + zval *type = zend_hash_find(Z_ARRVAL_P(tmp), ZSTR_KNOWN(ZEND_STR_TYPE)); phpdbg_out("%s%s%s(", Z_STRVAL_P(class), Z_STRVAL_P(type), Z_STRVAL_P(funcname)); } else { phpdbg_out("%s(", Z_STRVAL_P(funcname)); } - args = zend_hash_str_find(Z_ARRVAL_P(tmp), ZEND_STRL("args")); + args = zend_hash_find(Z_ARRVAL_P(tmp), ZSTR_KNOWN(ZEND_STR_ARGS)); if (args) { const zend_function *func = NULL; @@ -287,8 +287,8 @@ void phpdbg_dump_backtrace(size_t num) /* {{{ */ phpdbg_out(" (internal function)\n"); } - file = zend_hash_str_find(Z_ARRVAL_P(tmp), ZEND_STRL("file")); - line = zend_hash_str_find(Z_ARRVAL_P(tmp), ZEND_STRL("line")); + file = zend_hash_find(Z_ARRVAL_P(tmp), ZSTR_KNOWN(ZEND_STR_FILE)); + line = zend_hash_find(Z_ARRVAL_P(tmp), ZSTR_KNOWN(ZEND_STR_LINE)); zend_hash_move_forward_ex(Z_ARRVAL(zbacktrace), &position); } diff --git a/sapi/phpdbg/phpdbg_prompt.c b/sapi/phpdbg/phpdbg_prompt.c index f8041c660f266..39befbd64ec32 100644 --- a/sapi/phpdbg/phpdbg_prompt.c +++ b/sapi/phpdbg/phpdbg_prompt.c @@ -715,8 +715,8 @@ static inline void phpdbg_handle_exception(void) /* {{{ */ EG(exception) = NULL; zend_call_known_instance_method_with_0_params(ex->ce->__tostring, ex, &tmp); - file = zval_get_string(zend_read_property(zend_get_exception_base(ex), ex, ZEND_STRL("file"), 1, &rv)); - line = zval_get_long(zend_read_property(zend_get_exception_base(ex), ex, ZEND_STRL("line"), 1, &rv)); + file = zval_get_string(zend_read_property_ex(zend_get_exception_base(ex), ex, ZSTR_KNOWN(ZEND_STR_FILE), /* silent */ true, &rv)); + line = zval_get_long(zend_read_property_ex(zend_get_exception_base(ex), ex, ZSTR_KNOWN(ZEND_STR_LINE), /* silent */ true, &rv)); if (EG(exception)) { EG(exception) = NULL; @@ -724,7 +724,7 @@ static inline void phpdbg_handle_exception(void) /* {{{ */ } else { zend_update_property_string(zend_get_exception_base(ex), ex, ZEND_STRL("string"), Z_STRVAL(tmp)); zval_ptr_dtor(&tmp); - msg = zval_get_string(zend_read_property(zend_get_exception_base(ex), ex, ZEND_STRL("string"), 1, &rv)); + msg = zval_get_string(zend_read_property_ex(zend_get_exception_base(ex), ex, ZSTR_KNOWN(ZEND_STR_STRING), /* silent */ true, &rv)); } phpdbg_error("Uncaught %s in %s on line " ZEND_LONG_FMT, ZSTR_VAL(ex->ce->name), ZSTR_VAL(file), line); @@ -1695,9 +1695,9 @@ void phpdbg_execute_ex(zend_execute_data *execute_data) /* {{{ */ PHPDBG_G(handled_exception) = exception; zval rv; - zend_string *file = zval_get_string(zend_read_property(zend_get_exception_base(exception), exception, ZEND_STRL("file"), 1, &rv)); - zend_long line = zval_get_long(zend_read_property(zend_get_exception_base(exception), exception, ZEND_STRL("line"), 1, &rv)); - zend_string *msg = zval_get_string(zend_read_property(zend_get_exception_base(exception), exception, ZEND_STRL("message"), 1, &rv)); + zend_string *file = zval_get_string(zend_read_property_ex(zend_get_exception_base(exception), exception, ZSTR_KNOWN(ZEND_STR_FILE), /* silent */ true, &rv)); + zend_long line = zval_get_long(zend_read_property_ex(zend_get_exception_base(exception), exception, ZSTR_KNOWN(ZEND_STR_LINE), /* silent */ true, &rv)); + zend_string *msg = zval_get_string(zend_read_property_ex(zend_get_exception_base(exception), exception, ZSTR_KNOWN(ZEND_STR_MESSAGE), /* silent */ true, &rv)); phpdbg_error("Uncaught %s in %s on line " ZEND_LONG_FMT ": %.*s", ZSTR_VAL(exception->ce->name), ZSTR_VAL(file), line, diff --git a/sapi/phpdbg/phpdbg_watch.c b/sapi/phpdbg/phpdbg_watch.c index 0468d4614fd5d..d4af608ff762b 100644 --- a/sapi/phpdbg/phpdbg_watch.c +++ b/sapi/phpdbg/phpdbg_watch.c @@ -1365,7 +1365,7 @@ static int phpdbg_watchpoint_parse_symtables(char *input, size_t len, int (*call int ret; if (scope && len >= 5 && !memcmp("$this", input, 5)) { - zend_hash_str_add(EG(current_execute_data)->symbol_table, ZEND_STRL("this"), &EG(current_execute_data)->This); + zend_hash_add(EG(current_execute_data)->symbol_table, ZSTR_KNOWN(ZEND_STR_THIS), &EG(current_execute_data)->This); } if (callback == phpdbg_create_array_watchpoint) { diff --git a/sapi/phpdbg/tests/print_001.phpt b/sapi/phpdbg/tests/print_001.phpt index c25c5178fef4c..a981cb0001f67 100644 --- a/sapi/phpdbg/tests/print_001.phpt +++ b/sapi/phpdbg/tests/print_001.phpt @@ -29,7 +29,7 @@ Foo\Bar::Foo: ; (lines=5, args=1, vars=1, tmps=1) ; %s:5-7 L0005 0000 CV0($bar) = RECV 1 -L0006 0001 INIT_NS_FCALL_BY_NAME 1 string("Foo\var_dump") +L0006 0001 INIT_NS_FCALL_BY_NAME 1 string("Foo\\var_dump") L0006 0002 SEND_VAR_EX CV0($bar) 1 L0006 0003 DO_FCALL L0007 0004 RETURN null @@ -44,10 +44,10 @@ prompt> [Context %s (9 ops)] $_main: ; (lines=9, args=0, vars=0, tmps=4) ; %s:1-21 -L0018 0000 V0 = NEW 0 string("Foo\Bar") +L0018 0000 V0 = NEW 0 string("Foo\\Bar") L0018 0001 DO_FCALL L0018 0002 INIT_METHOD_CALL 1 V0 string("Foo") -L0018 0003 SEND_VAL_EX string("test") 1 +L0018 0003 SEND_VAL_EX string("test \"quotes\"") 1 L0018 0004 DO_FCALL L0019 0005 INIT_FCALL %d %d string("foo") L0019 0006 SEND_VAL string("test") 1 @@ -72,6 +72,6 @@ namespace { var_dump(strrev($baz)); } - (new \Foo\Bar)->Foo("test"); + (new \Foo\Bar)->Foo('test "quotes"'); foo("test"); } diff --git a/tests/classes/tostring_001.phpt b/tests/classes/tostring_001.phpt index ddbe4d152dde0..e9ef7b4175832 100644 --- a/tests/classes/tostring_001.phpt +++ b/tests/classes/tostring_001.phpt @@ -118,7 +118,7 @@ test2::__toString() Converted ====test7==== test2::__toString() -Cannot access offset of type object on array +Cannot access offset of type test2 on array ====test8==== test2::__toString() string(9) "Converted" diff --git a/tests/lang/static_basic_002.phpt b/tests/lang/static_basic_002.phpt index b6e9856e8222e..a8d700d7bd9c6 100644 --- a/tests/lang/static_basic_002.phpt +++ b/tests/lang/static_basic_002.phpt @@ -3,26 +3,9 @@ Multiple declarations of the same static variable --FILE-- ---EXPECT-- -int(5) -int(11) -int(14) +--EXPECTF-- +Fatal error: Duplicate declaration of static variable $a in %s on line %d