Skip to content

Commit

Permalink
rb_str_buf_append: add a fast path for ENC_CODERANGE_VALID
Browse files Browse the repository at this point in the history
If the RHS has valid encoding, and both strings have the same
encoding, we can use the fast path.

However we need to update the LHS coderange.

```
compare-ruby: ruby 3.2.0dev (2022-07-21T14:46:32Z master cdbb9b8) [arm64-darwin21]
built-ruby: ruby 3.2.0dev (2022-07-21T15:08:55Z string-concat-vali.. 61c61c564a) [arm64-darwin21]
warming up...

|                    |compare-ruby|built-ruby|
|:-------------------|-----------:|---------:|
|binary_concat_7bit  |    550.089k|  568.002k|
|                    |           -|     1.03x|
|utf8_concat_7bit    |    552.799k|  563.395k|
|                    |           -|     1.02x|
|utf8_concat_UTF8    |    329.423k|  410.754k|
|                    |           -|     1.25x|
```
  • Loading branch information
byroot committed Jul 25, 2022
1 parent 307835f commit 11a2772
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 16 deletions.
26 changes: 13 additions & 13 deletions benchmark/string_concat.yml
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
prelude: |
CHUNK = "a" * 64
BCHUNK = "a".b * 64
UCHUNK = "€" * 32
GC.disable # GC causes a lot of variance
benchmark:
binary_concat_utf8: |
binary_concat_7bit: |
buffer = String.new(capacity: 4096, encoding: Encoding::BINARY)
buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK
buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK
Expand All @@ -13,17 +13,7 @@ benchmark:
buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK
buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK
buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK
binary_concat_binary: |
buffer = String.new(capacity: 4096, encoding: Encoding::BINARY)
buffer << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK
buffer << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK
buffer << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK
buffer << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK
buffer << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK
buffer << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK
buffer << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK
buffer << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK
utf8_concat_utf8: |
utf8_concat_7bit: |
buffer = String.new(capacity: 4096, encoding: Encoding::UTF_8)
buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK
buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK
Expand All @@ -33,3 +23,13 @@ benchmark:
buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK
buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK
buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK
utf8_concat_UTF8: |
buffer = String.new(capacity: 4096, encoding: Encoding::UTF_8)
buffer << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK
buffer << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK
buffer << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK
buffer << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK
buffer << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK
buffer << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK
buffer << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK
buffer << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK << UCHUNK
19 changes: 16 additions & 3 deletions string.c
Original file line number Diff line number Diff line change
Expand Up @@ -3329,9 +3329,22 @@ VALUE
rb_str_buf_append(VALUE str, VALUE str2)
{
int str2_cr = rb_enc_str_coderange(str2);
if (str2_cr == ENC_CODERANGE_7BIT && str_enc_fastpath(str)) {
str_buf_cat4(str, RSTRING_PTR(str2), RSTRING_LEN(str2), true);
return str;

if (str_enc_fastpath(str)) {
switch (str2_cr) {
case ENC_CODERANGE_7BIT:
str_buf_cat4(str, RSTRING_PTR(str2), RSTRING_LEN(str2), true);
return str;
case ENC_CODERANGE_VALID:
if (ENCODING_GET_INLINED(str) == ENCODING_GET_INLINED(str2)) {
str_buf_cat4(str, RSTRING_PTR(str2), RSTRING_LEN(str2), true);
int str_cr = ENC_CODERANGE(str);
if (UNLIKELY(str_cr != ENC_CODERANGE_VALID)) {
ENC_CODERANGE_SET(str, RB_ENC_CODERANGE_AND(str_cr, str2_cr));
}
return str;
}
}
}

rb_enc_cr_str_buf_cat(str, RSTRING_PTR(str2), RSTRING_LEN(str2),
Expand Down

0 comments on commit 11a2772

Please sign in to comment.