Skip to content

Commit 82cfbfc

Browse files
committed
Add str_extend()
This pre-allocates a large string, for usage with concatenations. Users must take care to keep the refcount to 1, if they desire benefiting from this. Note that it is generally pointless to call str_extend("", $size) (i.e. extending an empty string), given that e.g. concatenation will special case empty strings, and then use the other string. (Which is why not a str_alloc($size), which would be pointless and thrown away during concat op.) This has a very slight performance improvement on the general case of appending a single byte in a loop (given that zend_string_extend now uses perealloc3) of about 8%. In particular zend_string_extend() will mostly run into the fast path of zend_mm_realloc_heap for huge allocations. When using str_extend(), appending a single byte in a loop is 33% faster than the old baseline. The tested loop is: $str = str_extend("a", 1 << 26); for ($i = 0; $i < 1 << 25; ++$i) { $str .= "a"; }
1 parent 9762c44 commit 82cfbfc

File tree

7 files changed

+90
-11
lines changed

7 files changed

+90
-11
lines changed

Zend/zend_alloc.c

Lines changed: 33 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1671,7 +1671,10 @@ static zend_never_inline void *zend_mm_realloc_huge(zend_mm_heap *heap, void *pt
16711671
return zend_mm_realloc_slow(heap, ptr, size, MIN(old_size, copy_size) ZEND_FILE_LINE_RELAY_CC ZEND_FILE_LINE_ORIG_RELAY_CC);
16721672
}
16731673

1674-
static zend_always_inline void *zend_mm_realloc_heap(zend_mm_heap *heap, void *ptr, size_t size, bool use_copy_size, size_t copy_size ZEND_FILE_LINE_DC ZEND_FILE_LINE_ORIG_DC)
1674+
#define EREALLOC_DEFAULT 0
1675+
#define EREALLOC_COPY 1
1676+
#define EREALLOC_NOSHRINK 2
1677+
static zend_always_inline void *zend_mm_realloc_heap(zend_mm_heap *heap, void *ptr, size_t size, int mode, size_t copy_size ZEND_FILE_LINE_DC ZEND_FILE_LINE_ORIG_DC)
16751678
{
16761679
size_t page_offset;
16771680
size_t old_size;
@@ -1686,6 +1689,12 @@ static zend_always_inline void *zend_mm_realloc_heap(zend_mm_heap *heap, void *p
16861689
if (EXPECTED(ptr == NULL)) {
16871690
return _zend_mm_alloc(heap, size ZEND_FILE_LINE_RELAY_CC ZEND_FILE_LINE_ORIG_RELAY_CC);
16881691
} else {
1692+
if (mode == EREALLOC_NOSHRINK) {
1693+
old_size = zend_mm_get_huge_block_size(heap, ptr ZEND_FILE_LINE_RELAY_CC ZEND_FILE_LINE_ORIG_RELAY_CC);
1694+
if (EXPECTED(size <= old_size)) {
1695+
return ptr;
1696+
}
1697+
}
16891698
return zend_mm_realloc_huge(heap, ptr, size, copy_size ZEND_FILE_LINE_RELAY_CC ZEND_FILE_LINE_ORIG_RELAY_CC);
16901699
}
16911700
} else {
@@ -1713,10 +1722,10 @@ static zend_always_inline void *zend_mm_realloc_heap(zend_mm_heap *heap, void *p
17131722
/* Check if requested size fits into current bin */
17141723
if (size <= old_size) {
17151724
/* Check if truncation is necessary */
1716-
if (old_bin_num > 0 && size < bin_data_size[old_bin_num - 1]) {
1725+
if (mode != EREALLOC_NOSHRINK && old_bin_num > 0 && size < bin_data_size[old_bin_num - 1]) {
17171726
/* truncation */
17181727
ret = zend_mm_alloc_small(heap, ZEND_MM_SMALL_SIZE_TO_BIN(size) ZEND_FILE_LINE_RELAY_CC ZEND_FILE_LINE_ORIG_RELAY_CC);
1719-
copy_size = use_copy_size ? MIN(size, copy_size) : size;
1728+
copy_size = mode != EREALLOC_DEFAULT ? MIN(size, copy_size) : size;
17201729
memcpy(ret, ptr, copy_size);
17211730
zend_mm_free_small(heap, ptr, old_bin_num);
17221731
} else {
@@ -1731,7 +1740,7 @@ static zend_always_inline void *zend_mm_realloc_heap(zend_mm_heap *heap, void *p
17311740
size_t orig_peak = heap->peak;
17321741
#endif
17331742
ret = zend_mm_alloc_small(heap, ZEND_MM_SMALL_SIZE_TO_BIN(size) ZEND_FILE_LINE_RELAY_CC ZEND_FILE_LINE_ORIG_RELAY_CC);
1734-
copy_size = use_copy_size ? MIN(old_size, copy_size) : old_size;
1743+
copy_size = mode != EREALLOC_DEFAULT ? MIN(old_size, copy_size) : old_size;
17351744
memcpy(ret, ptr, copy_size);
17361745
zend_mm_free_small(heap, ptr, old_bin_num);
17371746
#if ZEND_MM_STAT
@@ -1759,7 +1768,7 @@ static zend_always_inline void *zend_mm_realloc_heap(zend_mm_heap *heap, void *p
17591768
old_size = ZEND_MM_LRUN_PAGES(info) * ZEND_MM_PAGE_SIZE;
17601769
if (size > ZEND_MM_MAX_SMALL_SIZE && size <= ZEND_MM_MAX_LARGE_SIZE) {
17611770
new_size = ZEND_MM_ALIGNED_SIZE_EX(size, ZEND_MM_PAGE_SIZE);
1762-
if (new_size == old_size) {
1771+
if (mode == EREALLOC_NOSHRINK ? new_size <= old_size : (new_size == old_size)) {
17631772
#if ZEND_DEBUG
17641773
dbg = zend_mm_get_debug_info(heap, ptr);
17651774
dbg->size = real_size;
@@ -2579,12 +2588,17 @@ ZEND_API void ZEND_FASTCALL _zend_mm_free(zend_mm_heap *heap, void *ptr ZEND_FIL
25792588

25802589
void* ZEND_FASTCALL _zend_mm_realloc(zend_mm_heap *heap, void *ptr, size_t size ZEND_FILE_LINE_DC ZEND_FILE_LINE_ORIG_DC)
25812590
{
2582-
return zend_mm_realloc_heap(heap, ptr, size, 0, size ZEND_FILE_LINE_RELAY_CC ZEND_FILE_LINE_ORIG_RELAY_CC);
2591+
return zend_mm_realloc_heap(heap, ptr, size, EREALLOC_DEFAULT, size ZEND_FILE_LINE_RELAY_CC ZEND_FILE_LINE_ORIG_RELAY_CC);
25832592
}
25842593

25852594
void* ZEND_FASTCALL _zend_mm_realloc2(zend_mm_heap *heap, void *ptr, size_t size, size_t copy_size ZEND_FILE_LINE_DC ZEND_FILE_LINE_ORIG_DC)
25862595
{
2587-
return zend_mm_realloc_heap(heap, ptr, size, 1, copy_size ZEND_FILE_LINE_RELAY_CC ZEND_FILE_LINE_ORIG_RELAY_CC);
2596+
return zend_mm_realloc_heap(heap, ptr, size, EREALLOC_COPY, copy_size ZEND_FILE_LINE_RELAY_CC ZEND_FILE_LINE_ORIG_RELAY_CC);
2597+
}
2598+
2599+
void* ZEND_FASTCALL _zend_mm_realloc3(zend_mm_heap *heap, void *ptr, size_t size, size_t copy_size ZEND_FILE_LINE_DC ZEND_FILE_LINE_ORIG_DC)
2600+
{
2601+
return zend_mm_realloc_heap(heap, ptr, size, EREALLOC_NOSHRINK, copy_size ZEND_FILE_LINE_RELAY_CC ZEND_FILE_LINE_ORIG_RELAY_CC);
25882602
}
25892603

25902604
ZEND_API size_t ZEND_FASTCALL _zend_mm_block_size(zend_mm_heap *heap, void *ptr ZEND_FILE_LINE_DC ZEND_FILE_LINE_ORIG_DC)
@@ -2801,7 +2815,7 @@ ZEND_API void* ZEND_FASTCALL _erealloc(void *ptr, size_t size ZEND_FILE_LINE_DC
28012815
return AG(mm_heap)->custom_heap._realloc(ptr, size ZEND_FILE_LINE_RELAY_CC ZEND_FILE_LINE_ORIG_RELAY_CC);
28022816
}
28032817
#endif
2804-
return zend_mm_realloc_heap(AG(mm_heap), ptr, size, 0, size ZEND_FILE_LINE_RELAY_CC ZEND_FILE_LINE_ORIG_RELAY_CC);
2818+
return zend_mm_realloc_heap(AG(mm_heap), ptr, size, EREALLOC_DEFAULT, size ZEND_FILE_LINE_RELAY_CC ZEND_FILE_LINE_ORIG_RELAY_CC);
28052819
}
28062820

28072821
ZEND_API void* ZEND_FASTCALL _erealloc2(void *ptr, size_t size, size_t copy_size ZEND_FILE_LINE_DC ZEND_FILE_LINE_ORIG_DC)
@@ -2811,7 +2825,17 @@ ZEND_API void* ZEND_FASTCALL _erealloc2(void *ptr, size_t size, size_t copy_size
28112825
return AG(mm_heap)->custom_heap._realloc(ptr, size ZEND_FILE_LINE_RELAY_CC ZEND_FILE_LINE_ORIG_RELAY_CC);
28122826
}
28132827
#endif
2814-
return zend_mm_realloc_heap(AG(mm_heap), ptr, size, 1, copy_size ZEND_FILE_LINE_RELAY_CC ZEND_FILE_LINE_ORIG_RELAY_CC);
2828+
return zend_mm_realloc_heap(AG(mm_heap), ptr, size, EREALLOC_COPY, copy_size ZEND_FILE_LINE_RELAY_CC ZEND_FILE_LINE_ORIG_RELAY_CC);
2829+
}
2830+
2831+
ZEND_API void* ZEND_FASTCALL _erealloc3(void *ptr, size_t size, size_t copy_size ZEND_FILE_LINE_DC ZEND_FILE_LINE_ORIG_DC)
2832+
{
2833+
#if ZEND_MM_CUSTOM
2834+
if (UNEXPECTED(AG(mm_heap)->use_custom_heap)) {
2835+
return AG(mm_heap)->custom_heap._realloc(ptr, size ZEND_FILE_LINE_RELAY_CC ZEND_FILE_LINE_ORIG_RELAY_CC);
2836+
}
2837+
#endif
2838+
return zend_mm_realloc_heap(AG(mm_heap), ptr, size, EREALLOC_NOSHRINK, copy_size ZEND_FILE_LINE_RELAY_CC ZEND_FILE_LINE_ORIG_RELAY_CC);
28152839
}
28162840

28172841
ZEND_API size_t ZEND_FASTCALL _zend_mem_block_size(void *ptr ZEND_FILE_LINE_DC ZEND_FILE_LINE_ORIG_DC)

Zend/zend_alloc.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ ZEND_API void ZEND_FASTCALL _efree(void *ptr ZEND_FILE_LINE_DC ZEND_FILE_LINE_
7171
ZEND_API ZEND_ATTRIBUTE_MALLOC void* ZEND_FASTCALL _ecalloc(size_t nmemb, size_t size ZEND_FILE_LINE_DC ZEND_FILE_LINE_ORIG_DC) ZEND_ATTRIBUTE_ALLOC_SIZE2(1,2);
7272
ZEND_API void* ZEND_FASTCALL _erealloc(void *ptr, size_t size ZEND_FILE_LINE_DC ZEND_FILE_LINE_ORIG_DC) ZEND_ATTRIBUTE_ALLOC_SIZE(2);
7373
ZEND_API void* ZEND_FASTCALL _erealloc2(void *ptr, size_t size, size_t copy_size ZEND_FILE_LINE_DC ZEND_FILE_LINE_ORIG_DC) ZEND_ATTRIBUTE_ALLOC_SIZE(2);
74+
ZEND_API void* ZEND_FASTCALL _erealloc3(void *ptr, size_t size, size_t copy_size ZEND_FILE_LINE_DC ZEND_FILE_LINE_ORIG_DC) ZEND_ATTRIBUTE_ALLOC_SIZE(2);
7475
ZEND_API void* ZEND_FASTCALL _safe_erealloc(void *ptr, size_t nmemb, size_t size, size_t offset ZEND_FILE_LINE_DC ZEND_FILE_LINE_ORIG_DC);
7576
ZEND_API void* ZEND_FASTCALL _safe_realloc(void *ptr, size_t nmemb, size_t size, size_t offset);
7677
ZEND_API ZEND_ATTRIBUTE_MALLOC char* ZEND_FASTCALL _estrdup(const char *s ZEND_FILE_LINE_DC ZEND_FILE_LINE_ORIG_DC);
@@ -158,6 +159,7 @@ ZEND_API void ZEND_FASTCALL _efree_huge(void *, size_t size);
158159
#define ecalloc(nmemb, size) _ecalloc((nmemb), (size) ZEND_FILE_LINE_CC ZEND_FILE_LINE_EMPTY_CC)
159160
#define erealloc(ptr, size) _erealloc((ptr), (size) ZEND_FILE_LINE_CC ZEND_FILE_LINE_EMPTY_CC)
160161
#define erealloc2(ptr, size, copy_size) _erealloc2((ptr), (size), (copy_size) ZEND_FILE_LINE_CC ZEND_FILE_LINE_EMPTY_CC)
162+
#define erealloc3(ptr, size, copy_size) _erealloc3((ptr), (size), (copy_size) ZEND_FILE_LINE_CC ZEND_FILE_LINE_EMPTY_CC)
161163
#define safe_erealloc(ptr, nmemb, size, offset) _safe_erealloc((ptr), (nmemb), (size), (offset) ZEND_FILE_LINE_CC ZEND_FILE_LINE_EMPTY_CC)
162164
#define erealloc_recoverable(ptr, size) _erealloc((ptr), (size) ZEND_FILE_LINE_CC ZEND_FILE_LINE_EMPTY_CC)
163165
#define erealloc2_recoverable(ptr, size, copy_size) _erealloc2((ptr), (size), (copy_size) ZEND_FILE_LINE_CC ZEND_FILE_LINE_EMPTY_CC)
@@ -200,6 +202,7 @@ ZEND_API ZEND_ATTRIBUTE_MALLOC char * __zend_strdup(const char *s);
200202
#define pecalloc(nmemb, size, persistent) ((persistent)?__zend_calloc((nmemb), (size) ZEND_FILE_LINE_CC ZEND_FILE_LINE_EMPTY_CC):ecalloc((nmemb), (size)))
201203
#define perealloc(ptr, size, persistent) ((persistent)?__zend_realloc((ptr), (size) ZEND_FILE_LINE_CC ZEND_FILE_LINE_EMPTY_CC):erealloc((ptr), (size)))
202204
#define perealloc2(ptr, size, copy_size, persistent) ((persistent)?__zend_realloc((ptr), (size) ZEND_FILE_LINE_CC ZEND_FILE_LINE_EMPTY_CC):erealloc2((ptr), (size), (copy_size)))
205+
#define perealloc3(ptr, size, copy_size, persistent) ((persistent)?__zend_realloc((ptr), (size) ZEND_FILE_LINE_CC ZEND_FILE_LINE_EMPTY_CC):erealloc3((ptr), (size), (copy_size)))
203206
#define safe_perealloc(ptr, nmemb, size, offset, persistent) ((persistent)?_safe_realloc((ptr), (nmemb), (size), (offset)):safe_erealloc((ptr), (nmemb), (size), (offset)))
204207
#define perealloc_recoverable(ptr, size, persistent) ((persistent)?realloc((ptr), (size)):erealloc_recoverable((ptr), (size)))
205208
#define perealloc2_recoverable(ptr, size, persistent) ((persistent)?realloc((ptr), (size)):erealloc2_recoverable((ptr), (size), (copy_size)))

Zend/zend_string.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -261,7 +261,7 @@ static zend_always_inline zend_string *zend_string_extend(zend_string *s, size_t
261261
ZEND_ASSERT(len >= ZSTR_LEN(s));
262262
if (!ZSTR_IS_INTERNED(s)) {
263263
if (EXPECTED(GC_REFCOUNT(s) == 1)) {
264-
ret = (zend_string *)perealloc(s, ZEND_MM_ALIGNED_SIZE(_ZSTR_STRUCT_SIZE(len)), persistent);
264+
ret = (zend_string *)perealloc3(s, ZEND_MM_ALIGNED_SIZE(_ZSTR_STRUCT_SIZE(len)), _ZSTR_STRUCT_SIZE(ZSTR_LEN(s)), persistent);
265265
ZSTR_LEN(ret) = len;
266266
zend_string_forget_hash_val(ret);
267267
return ret;

ext/standard/basic_functions.stub.php

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2448,6 +2448,11 @@ function substr_replace(array|string $string, array|string $replace, array|int $
24482448
*/
24492449
function quotemeta(string $string): string {}
24502450

2451+
/**
2452+
* @compile-time-eval
2453+
*/
2454+
function str_extend(string $string, int $size): string {}
2455+
24512456
/** @compile-time-eval */
24522457
function ord(string $character): int {}
24532458

ext/standard/basic_functions_arginfo.h

Lines changed: 8 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

ext/standard/string.c

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2643,6 +2643,28 @@ PHP_FUNCTION(quotemeta)
26432643
}
26442644
/* }}} */
26452645

2646+
PHP_FUNCTION(str_extend)
2647+
{
2648+
zend_string *str;
2649+
zend_long size;
2650+
2651+
ZEND_PARSE_PARAMETERS_START(2, 2)
2652+
Z_PARAM_STR(str)
2653+
Z_PARAM_LONG(size)
2654+
ZEND_PARSE_PARAMETERS_END();
2655+
2656+
size_t len = ZSTR_LEN(str);
2657+
if (len > size) {
2658+
zend_argument_value_error(2, "must not be smaller than the input string");
2659+
RETURN_THROWS();
2660+
}
2661+
2662+
ZVAL_UNDEF(ZEND_CALL_ARG(execute_data, 0)); // avoid copies, so that we may benefit from the RC=1 optimization
2663+
RETVAL_STR(zend_string_extend(str, size, 0));
2664+
Z_STRLEN_P(return_value) = len;
2665+
}
2666+
/* }}} */
2667+
26462668
/* {{{ Returns ASCII value of character
26472669
Warning: This function is special-cased by zend_compile.c and so is bypassed for constant string argument */
26482670
PHP_FUNCTION(ord)
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
--TEST--
2+
Append to string allocated with str_extend()
3+
--FILE--
4+
<?php
5+
6+
$str = str_extend("a", 1 << 22);
7+
for ($i = 0; $i < 1 << 21; ++$i) {
8+
$str .= "a";
9+
}
10+
11+
var_dump(array_filter(count_chars($str)));
12+
13+
?>
14+
--EXPECT--
15+
array(1) {
16+
[97]=>
17+
int(2097153)
18+
}

0 commit comments

Comments
 (0)