Skip to content

Commit

Permalink
string.c: use str_enc_fastpath in TERM_LEN
Browse files Browse the repository at this point in the history
Not having to fetch the rb_encoding save a significant
amount of time.

Additionally, even when we have to fetch it, we can do
it faster using `ENCODING_GET` rather than `rb_enc_get`.

```
compare-ruby: ruby 3.2.0dev (2022-07-19T08:41:40Z master cb9fd92) [arm64-darwin21]
built-ruby: ruby 3.2.0dev (2022-07-21T11:16:16Z faster-buffer-conc.. 4f001f0748) [arm64-darwin21]
warming up...

|                      |compare-ruby|built-ruby|
|:---------------------|-----------:|---------:|
|binary_concat_utf8    |    510.580k|  565.600k|
|                      |           -|     1.11x|
|binary_concat_binary  |    512.653k|  571.483k|
|                      |           -|     1.11x|
|utf8_concat_utf8      |    511.396k|  566.879k|
|                      |           -|     1.11x|
```
  • Loading branch information
byroot committed Jul 21, 2022
1 parent 80672b2 commit f954c5d
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 18 deletions.
7 changes: 4 additions & 3 deletions benchmark/string_concat.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
prelude: |
CHUNK = "a" * 64
BCHUNK = "a".b * 64
GC.disable # GC causes a lot of variance
benchmark:
binary_concat_utf8: |
buffer = String.new(capacity: 4096, encoding: Encoding::BINARY)
Expand All @@ -11,7 +12,7 @@ benchmark:
buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK
buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK
buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK
buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK
buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK
binary_concat_binary: |
buffer = String.new(capacity: 4096, encoding: Encoding::BINARY)
buffer << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK
Expand All @@ -21,7 +22,7 @@ benchmark:
buffer << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK
buffer << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK
buffer << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK
buffer << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK
buffer << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK
utf8_concat_utf8: |
buffer = String.new(capacity: 4096, encoding: Encoding::UTF_8)
buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK
Expand All @@ -31,4 +32,4 @@ benchmark:
buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK
buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK
buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK
buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK
buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK
30 changes: 15 additions & 15 deletions string.c
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,21 @@ VALUE rb_cSymbol;
}\
} while (0)

#define TERM_LEN(str) rb_enc_mbminlen(rb_enc_get(str))
static inline bool
str_enc_fastpath(VALUE str)
{
// The overwhelming majority of strings are in one of these 3 encodings.
switch (ENCODING_GET_INLINED(str)) {
case ENCINDEX_ASCII_8BIT:
case ENCINDEX_UTF_8:
case ENCINDEX_US_ASCII:
return true;
default:
return false;
}
}

#define TERM_LEN(str) (str_enc_fastpath(str) ? 1 : rb_enc_mbminlen(rb_enc_from_index(ENCODING_GET(str))))
#define TERM_FILL(ptr, termlen) do {\
char *const term_fill_ptr = (ptr);\
const int term_fill_len = (termlen);\
Expand Down Expand Up @@ -3311,20 +3325,6 @@ rb_str_buf_cat_ascii(VALUE str, const char *ptr)
}
}

static inline bool
str_enc_fastpath(VALUE str)
{
// The overwhelming majority of strings are in one of these 3 encodings.
switch (ENCODING_GET_INLINED(str)) {
case ENCINDEX_ASCII_8BIT:
case ENCINDEX_UTF_8:
case ENCINDEX_US_ASCII:
return true;
default:
return false;
}
}

VALUE
rb_str_buf_append(VALUE str, VALUE str2)
{
Expand Down

0 comments on commit f954c5d

Please sign in to comment.