Skip to content

Commit

Permalink
Use batch APIs to create Array and Hash objects
Browse files Browse the repository at this point in the history
Naively appending elements into RArray or RHash is inneficient because
it might cause multiple reallocations and rehasing.

So it's preferable to accumulate all the elements onto a stack, and
then use batch APIs to directly create right sized containers.

Before:

```
== Parsing activitypub.json (58160 bytes)
ruby 3.3.4 (2024-07-09 revision be1089c8ec) +YJIT [arm64-darwin23]
Warming up --------------------------------------
                json   779.000 i/100ms
                  oj   799.000 i/100ms
          Oj::Parser   953.000 i/100ms
           rapidjson   630.000 i/100ms
Calculating -------------------------------------
                json      7.989k (± 0.7%) i/s  (125.17 μs/i) -     40.508k in   5.070571s
                  oj      7.931k (± 1.8%) i/s  (126.09 μs/i) -     39.950k in   5.039171s
          Oj::Parser      9.624k (± 0.7%) i/s  (103.91 μs/i) -     48.603k in   5.050694s
           rapidjson      6.287k (± 0.3%) i/s  (159.05 μs/i) -     31.500k in   5.010181s

Comparison:
                json:     7989.2 i/s
          Oj::Parser:     9623.6 i/s - 1.20x  faster
                  oj:     7930.8 i/s - same-ish: difference falls within error
           rapidjson:     6287.3 i/s - 1.27x  slower

== Parsing twitter.json (567916 bytes)
ruby 3.3.4 (2024-07-09 revision be1089c8ec) +YJIT [arm64-darwin23]
Warming up --------------------------------------
                json    66.000 i/100ms
                  oj    62.000 i/100ms
          Oj::Parser    78.000 i/100ms
           rapidjson    55.000 i/100ms
Calculating -------------------------------------
                json    673.530 (± 0.7%) i/s    (1.48 ms/i) -      3.432k in   5.095837s
                  oj    620.473 (± 0.5%) i/s    (1.61 ms/i) -      3.162k in   5.096259s
          Oj::Parser    767.687 (± 0.9%) i/s    (1.30 ms/i) -      3.900k in   5.080601s
           rapidjson    553.048 (± 1.1%) i/s    (1.81 ms/i) -      2.805k in   5.072525s

Comparison:
                json:      673.5 i/s
          Oj::Parser:      767.7 i/s - 1.14x  faster
                  oj:      620.5 i/s - 1.09x  slower
           rapidjson:      553.0 i/s - 1.22x  slower

== Parsing citm_catalog.json (1727030 bytes)
ruby 3.3.4 (2024-07-09 revision be1089c8ec) +YJIT [arm64-darwin23]
Warming up --------------------------------------
                json    38.000 i/100ms
                  oj    34.000 i/100ms
          Oj::Parser    47.000 i/100ms
           rapidjson    38.000 i/100ms
Calculating -------------------------------------
                json    381.312 (± 0.5%) i/s    (2.62 ms/i) -      1.938k in   5.082614s
                  oj    328.735 (± 2.1%) i/s    (3.04 ms/i) -      1.666k in   5.070407s
          Oj::Parser    458.938 (± 0.9%) i/s    (2.18 ms/i) -      2.303k in   5.018529s
           rapidjson    376.744 (± 1.3%) i/s    (2.65 ms/i) -      1.900k in   5.044113s

Comparison:
                json:      381.3 i/s
          Oj::Parser:      458.9 i/s - 1.20x  faster
           rapidjson:      376.7 i/s - same-ish: difference falls within error
                  oj:      328.7 i/s - 1.16x  slower
```

After:

```
== Parsing activitypub.json (58160 bytes)
ruby 3.3.4 (2024-07-09 revision be1089c8ec) +YJIT [arm64-darwin23]
Warming up --------------------------------------
                json   960.000 i/100ms
                  oj   796.000 i/100ms
          Oj::Parser   969.000 i/100ms
           rapidjson   636.000 i/100ms
Calculating -------------------------------------
                json      8.957k (± 0.5%) i/s  (111.65 μs/i) -     45.120k in   5.037777s
                  oj      7.966k (± 0.5%) i/s  (125.53 μs/i) -     40.596k in   5.096207s
          Oj::Parser      9.579k (± 0.3%) i/s  (104.39 μs/i) -     48.450k in   5.057822s
           rapidjson      6.261k (± 8.9%) i/s  (159.73 μs/i) -     31.800k in   5.182342s

Comparison:
                json:     8956.5 i/s
          Oj::Parser:     9579.3 i/s - 1.07x  faster
                  oj:     7966.2 i/s - 1.12x  slower
           rapidjson:     6260.6 i/s - 1.43x  slower

== Parsing twitter.json (567916 bytes)
ruby 3.3.4 (2024-07-09 revision be1089c8ec) +YJIT [arm64-darwin23]
Warming up --------------------------------------
                json    82.000 i/100ms
                  oj    62.000 i/100ms
          Oj::Parser    77.000 i/100ms
           rapidjson    55.000 i/100ms
Calculating -------------------------------------
                json    803.998 (± 0.6%) i/s    (1.24 ms/i) -      4.100k in   5.099692s
                  oj    608.292 (± 0.8%) i/s    (1.64 ms/i) -      3.100k in   5.096566s
          Oj::Parser    760.206 (± 0.5%) i/s    (1.32 ms/i) -      3.850k in   5.064529s
           rapidjson    549.562 (± 0.5%) i/s    (1.82 ms/i) -      2.750k in   5.004166s

Comparison:
                json:      804.0 i/s
          Oj::Parser:      760.2 i/s - 1.06x  slower
                  oj:      608.3 i/s - 1.32x  slower
           rapidjson:      549.6 i/s - 1.46x  slower

== Parsing citm_catalog.json (1727030 bytes)
ruby 3.3.4 (2024-07-09 revision be1089c8ec) +YJIT [arm64-darwin23]
Warming up --------------------------------------
                json    43.000 i/100ms
                  oj    34.000 i/100ms
          Oj::Parser    47.000 i/100ms
           rapidjson    36.000 i/100ms
Calculating -------------------------------------
                json    447.336 (± 0.9%) i/s    (2.24 ms/i) -      2.279k in   5.094945s
                  oj    336.266 (± 2.4%) i/s    (2.97 ms/i) -      1.700k in   5.058625s
          Oj::Parser    466.559 (± 1.3%) i/s    (2.14 ms/i) -      2.350k in   5.037637s
           rapidjson    392.039 (± 0.8%) i/s    (2.55 ms/i) -      1.980k in   5.050826s

Comparison:
                json:      447.3 i/s
          Oj::Parser:      466.6 i/s - 1.04x  faster
           rapidjson:      392.0 i/s - 1.14x  slower
                  oj:      336.3 i/s - 1.33x  slower
```
  • Loading branch information
byroot committed Nov 3, 2024
1 parent 87b063a commit d0d4c1d
Show file tree
Hide file tree
Showing 4 changed files with 539 additions and 166 deletions.
12 changes: 6 additions & 6 deletions ext/json/ext/fbuffer/fbuffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,8 @@ typedef unsigned char _Bool;
#endif

enum fbuffer_type {
HEAP = 0,
STACK = 1,
FBUFFER_HEAP_ALLOCATED = 0,
FBUFFER_STACK_ALLOCATED = 1,
};

typedef struct FBufferStruct {
Expand Down Expand Up @@ -73,15 +73,15 @@ static void fbuffer_stack_init(FBuffer *fb, unsigned long initial_length, char *
{
fb->initial_length = (initial_length > 0) ? initial_length : FBUFFER_INITIAL_LENGTH_DEFAULT;
if (stack_buffer) {
fb->type = STACK;
fb->type = FBUFFER_STACK_ALLOCATED;
fb->ptr = stack_buffer;
fb->capa = stack_buffer_size;
}
}

static void fbuffer_free(FBuffer *fb)
{
if (fb->ptr && fb->type == HEAP) {
if (fb->ptr && fb->type == FBUFFER_HEAP_ALLOCATED) {
ruby_xfree(fb->ptr);
}
}
Expand All @@ -105,10 +105,10 @@ static void fbuffer_do_inc_capa(FBuffer *fb, unsigned long requested)
for (required = fb->capa; requested > required - fb->len; required <<= 1);

if (required > fb->capa) {
if (fb->type == STACK) {
if (fb->type == FBUFFER_STACK_ALLOCATED) {
const char *old_buffer = fb->ptr;
fb->ptr = ALLOC_N(char, required);
fb->type = HEAP;
fb->type = FBUFFER_HEAP_ALLOCATED;
MEMCPY(fb->ptr, old_buffer, char, fb->len);
} else {
REALLOC_N(fb->ptr, char, required);
Expand Down
5 changes: 4 additions & 1 deletion ext/json/ext/parser/extconf.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,10 @@
require 'mkmf'

have_func("rb_enc_interned_str", "ruby.h") # RUBY_VERSION >= 3.0
have_func("rb_gc_mark_locations") # Missing on TruffleRuby
have_func("rb_hash_new_capa", "ruby.h") # RUBY_VERSION >= 3.2
have_func("rb_gc_mark_locations", "ruby.h") # Missing on TruffleRuby
have_func("rb_hash_bulk_insert", "ruby.h") # Missing on TruffleRuby

append_cflags("-std=c99")

create_makefile 'json/ext/parser'
Loading

0 comments on commit d0d4c1d

Please sign in to comment.