Skip to content

Commit 0f80cf6

Browse files
committed
Merge pull request #1931 from pguyot/w43/private-append
Implement support for private_append These changes are made under both the "Apache 2.0" and the "GNU Lesser General Public License 2.1 or later" license terms (dual license). SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
2 parents da726c2 + 322301b commit 0f80cf6

File tree

14 files changed

+394
-161
lines changed

14 files changed

+394
-161
lines changed

libs/jit/src/jit.erl

Lines changed: 173 additions & 113 deletions
Large diffs are not rendered by default.

libs/jit/src/jit_aarch64.erl

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -941,7 +941,7 @@ if_block_cond(
941941
) when ?IS_GPR(Reg) ->
942942
% AND with mask
943943
OffsetBefore = StreamModule:offset(Stream0),
944-
State1 = and_(State0, Reg, Mask),
944+
{State1, Reg} = and_(State0, RegTuple, Mask),
945945
Stream1 = State1#state.stream,
946946
% Compare with value
947947
I2 = jit_aarch64_asm:cmp(Reg, Val),
@@ -1953,9 +1953,18 @@ op_imm(#state{stream_module = StreamModule, stream = Stream0} = State, Op, RegA,
19531953
%% @param Val immediate value to AND
19541954
%% @return Updated backend state
19551955
%%-----------------------------------------------------------------------------
1956-
-spec and_(state(), aarch64_register(), integer()) -> state().
1957-
and_(State, Reg, Val) ->
1958-
op_imm(State, and_, Reg, Reg, Val).
1956+
and_(State, {free, Reg}, Val) ->
1957+
NewState = op_imm(State, and_, Reg, Reg, Val),
1958+
{NewState, Reg};
1959+
and_(
1960+
#state{available_regs = [ResultReg | T], used_regs = UR} = State,
1961+
Reg,
1962+
Val
1963+
) ->
1964+
NewState = op_imm(
1965+
State#state{available_regs = T, used_regs = [ResultReg | UR]}, and_, ResultReg, Reg, Val
1966+
),
1967+
{NewState, ResultReg}.
19591968

19601969
%%-----------------------------------------------------------------------------
19611970
%% @doc Perform bitwise OR of a register with an immediate value.

libs/jit/src/jit_armv6m.erl

Lines changed: 23 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@
7575
-include_lib("jit.hrl").
7676

7777
-include("primitives.hrl").
78+
-include("term.hrl").
7879

7980
-define(ASSERT(Expr), true = Expr).
8081

@@ -1310,7 +1311,7 @@ if_block_cond(
13101311
I1 = jit_armv6m_asm:mov(Temp, Reg),
13111312
Stream1 = StreamModule:append(Stream0, I1),
13121313
State1 = State0#state{stream = Stream1},
1313-
State2 = and_(State1#state{available_regs = AT}, Temp, Mask),
1314+
{State2, Temp} = and_(State1#state{available_regs = AT}, {free, Temp}, Mask),
13141315
Stream2 = State2#state.stream,
13151316
% Compare with value
13161317
I2 = jit_armv6m_asm:cmp(Temp, Val),
@@ -1329,7 +1330,7 @@ if_block_cond(
13291330
) when ?IS_GPR(Reg) ->
13301331
% AND with mask
13311332
OffsetBefore = StreamModule:offset(Stream0),
1332-
State1 = and_(State0, Reg, Mask),
1333+
{State1, Reg} = and_(State0, RegTuple, Mask),
13331334
Stream1 = State1#state.stream,
13341335
% Compare with value
13351336
I2 = jit_armv6m_asm:cmp(Reg, Val),
@@ -2517,34 +2518,34 @@ get_module_index(
25172518
%% JIT currentl calls this with two values: ?TERM_PRIMARY_CLEAR_MASK (-4) to
25182519
%% clear bits and ?TERM_BOXED_TAG_MASK (0x3F). We can avoid any literal pool
25192520
%% by using BICS for -4.
2520-
and_(#state{stream_module = StreamModule, stream = Stream0} = State0, Reg, 16#FFFFFF) ->
2521+
and_(#state{stream_module = StreamModule, stream = Stream0} = State0, {free, Reg}, 16#FFFFFF) ->
25212522
I1 = jit_armv6m_asm:lsls(Reg, Reg, 8),
25222523
I2 = jit_armv6m_asm:lsrs(Reg, Reg, 8),
25232524
Stream1 = StreamModule:append(Stream0, <<I1/binary, I2/binary>>),
2524-
State0#state{stream = Stream1};
2525+
{State0#state{stream = Stream1}, Reg};
25252526
and_(
25262527
#state{stream_module = StreamModule, available_regs = [Temp | AT]} = State0,
2527-
Reg,
2528+
{free, Reg},
25282529
Val
25292530
) when Val < 0 andalso Val >= -256 ->
25302531
State1 = mov_immediate(State0#state{available_regs = AT}, Temp, bnot (Val)),
25312532
Stream1 = State1#state.stream,
25322533
I = jit_armv6m_asm:bics(Reg, Temp),
25332534
Stream2 = StreamModule:append(Stream1, I),
2534-
State1#state{available_regs = [Temp | AT], stream = Stream2};
2535+
{State1#state{available_regs = [Temp | AT], stream = Stream2}, Reg};
25352536
and_(
25362537
#state{stream_module = StreamModule, available_regs = [Temp | AT]} = State0,
2537-
Reg,
2538+
{free, Reg},
25382539
Val
25392540
) ->
25402541
State1 = mov_immediate(State0#state{available_regs = AT}, Temp, Val),
25412542
Stream1 = State1#state.stream,
25422543
I = jit_armv6m_asm:ands(Reg, Temp),
25432544
Stream2 = StreamModule:append(Stream1, I),
2544-
State1#state{available_regs = [Temp | AT], stream = Stream2};
2545+
{State1#state{available_regs = [Temp | AT], stream = Stream2}, Reg};
25452546
and_(
25462547
#state{stream_module = StreamModule, available_regs = []} = State0,
2547-
Reg,
2548+
{free, Reg},
25482549
Val
25492550
) when Val < 0 andalso Val >= -256 ->
25502551
% No available registers, use r0 as temp and save it to r12
@@ -2561,10 +2562,10 @@ and_(
25612562
% Restore r0 from r12
25622563
Restore = jit_armv6m_asm:mov(r0, ?IP_REG),
25632564
Stream4 = StreamModule:append(Stream3, Restore),
2564-
State0#state{stream = Stream4};
2565+
{State0#state{stream = Stream4}, Reg};
25652566
and_(
25662567
#state{stream_module = StreamModule, available_regs = []} = State0,
2567-
Reg,
2568+
{free, Reg},
25682569
Val
25692570
) ->
25702571
% No available registers, use r0 as temp and save it to r12
@@ -2581,7 +2582,17 @@ and_(
25812582
% Restore r0 from r12
25822583
Restore = jit_armv6m_asm:mov(r0, ?IP_REG),
25832584
Stream4 = StreamModule:append(Stream3, Restore),
2584-
State0#state{stream = Stream4}.
2585+
{State0#state{stream = Stream4}, Reg};
2586+
and_(
2587+
#state{stream_module = StreamModule, available_regs = [ResultReg | AT], used_regs = UR} =
2588+
State0,
2589+
Reg,
2590+
?TERM_PRIMARY_CLEAR_MASK
2591+
) ->
2592+
I1 = jit_armv6m_asm:lsrs(ResultReg, Reg, 2),
2593+
I2 = jit_armv6m_asm:lsls(ResultReg, ResultReg, 2),
2594+
Stream1 = StreamModule:append(State0#state.stream, <<I1/binary, I2/binary>>),
2595+
{State0#state{stream = Stream1, available_regs = AT, used_regs = [ResultReg | UR]}, ResultReg}.
25852596

25862597
or_(
25872598
#state{stream_module = StreamModule, available_regs = [Temp | AT]} = State0,

libs/jit/src/jit_x86_64.erl

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1834,15 +1834,38 @@ get_module_index(
18341834
Reg
18351835
}.
18361836

1837-
and_(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) ->
1837+
and_(#state{stream_module = StreamModule, stream = Stream0} = State, {free, Reg}, Val) when
1838+
?IS_GPR(Reg)
1839+
->
18381840
% 32 bits instructions on x86-64 zero the high 32 bits
18391841
I1 =
18401842
if
18411843
Val >= 0, Val =< 16#FFFFFFFF -> jit_x86_64_asm:andl(Val, Reg);
18421844
true -> jit_x86_64_asm:andq(Val, Reg)
18431845
end,
18441846
Stream1 = StreamModule:append(Stream0, I1),
1845-
State#state{stream = Stream1}.
1847+
{State#state{stream = Stream1}, Reg};
1848+
and_(
1849+
#state{
1850+
stream_module = StreamModule,
1851+
available_regs = [ResultReg | T],
1852+
used_regs = UR,
1853+
stream = Stream0
1854+
} = State,
1855+
Reg,
1856+
Val
1857+
) when
1858+
?IS_GPR(Reg)
1859+
->
1860+
I1 = jit_x86_64_asm:movq(Reg, ResultReg),
1861+
I2 =
1862+
if
1863+
Val >= 0, Val =< 16#FFFFFFFF -> jit_x86_64_asm:andl(Val, ResultReg);
1864+
true -> jit_x86_64_asm:andq(Val, ResultReg)
1865+
end,
1866+
Stream1 = StreamModule:append(Stream0, I1),
1867+
Stream2 = StreamModule:append(Stream1, I2),
1868+
{State#state{stream = Stream2, available_regs = T, used_regs = [ResultReg | UR]}, ResultReg}.
18461869

18471870
or_(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) ->
18481871
I1 = jit_x86_64_asm:orq(Val, Reg),

libs/jit/src/primitives.hrl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@
9292
-define(PRIM_BITSTRING_GET_UTF32, 69).
9393
-define(PRIM_TERM_COPY_MAP, 70).
9494
-define(PRIM_STACKTRACE_BUILD, 71).
95+
-define(PRIM_TERM_REUSE_BINARY, 72).
9596

9697
% Parameters to ?PRIM_MEMORY_ENSURE_FREE_WITH_ROOTS
9798
% -define(MEMORY_NO_SHRINK, 0).

libs/jit/src/term.hrl

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,3 +74,5 @@
7474
-define(REFC_BINARY_MIN_64, 64).
7575
-define(TERM_BOXED_REFC_BINARY_SIZE, 6).
7676
-define(BINARY_HEADER_SIZE, 2).
77+
78+
-define(TERM_INVALID_TERM, 0).

src/libAtomVM/jit.c

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1301,6 +1301,12 @@ static term jit_term_create_empty_binary(Context *ctx, size_t len)
13011301
return term_create_empty_binary(len, &ctx->heap, ctx->global);
13021302
}
13031303

1304+
static term jit_term_reuse_binary(Context *ctx, term src, size_t len)
1305+
{
1306+
TRACE("jit_term_reuse_binary: src=0x%lx, len=%d\n", src, (int) len);
1307+
return term_reuse_binary(src, len, &ctx->heap, ctx->global);
1308+
}
1309+
13041310
static int jit_decode_flags_list(Context *ctx, JITState *jit_state, term flags)
13051311
{
13061312
int flags_value = 0;
@@ -1734,7 +1740,8 @@ const ModuleNativeInterface module_native_interface = {
17341740
jit_bitstring_get_utf16,
17351741
jit_bitstring_get_utf32,
17361742
term_copy_map,
1737-
jit_stacktrace_build
1743+
jit_stacktrace_build,
1744+
jit_term_reuse_binary
17381745
};
17391746

17401747
#endif

src/libAtomVM/jit.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,7 @@ struct ModuleNativeInterface
158158
term (*bitstring_get_utf32)(term src, int flags_value);
159159
term (*term_copy_map)(Context *ctx, term src);
160160
term (*stacktrace_build)(Context *ctx);
161+
term (*term_reuse_binary)(Context *ctx, term src, size_t len);
161162
};
162163

163164
extern const ModuleNativeInterface module_native_interface;

src/libAtomVM/opcodesswitch.h

Lines changed: 43 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4074,6 +4074,9 @@ HOT_FUNC int scheduler_entry_point(GlobalContext *glb)
40744074
RAISE_ERROR(OUT_OF_MEMORY_ATOM);
40754075
}
40764076
term t = term_create_empty_binary(size_val, &ctx->heap, ctx->global);
4077+
if (UNLIKELY(term_is_invalid_term(t))) {
4078+
RAISE_ERROR(OUT_OF_MEMORY_ATOM);
4079+
}
40774080

40784081
ctx->bs = t;
40794082
ctx->bs_offset = 0;
@@ -4122,6 +4125,9 @@ HOT_FUNC int scheduler_entry_point(GlobalContext *glb)
41224125
RAISE_ERROR(OUT_OF_MEMORY_ATOM);
41234126
}
41244127
term t = term_create_empty_binary(size_val / 8, &ctx->heap, ctx->global);
4128+
if (UNLIKELY(term_is_invalid_term(t))) {
4129+
RAISE_ERROR(OUT_OF_MEMORY_ATOM);
4130+
}
41254131

41264132
ctx->bs = t;
41274133
ctx->bs_offset = 0;
@@ -4530,6 +4536,9 @@ HOT_FUNC int scheduler_entry_point(GlobalContext *glb)
45304536
RAISE_ERROR(OUT_OF_MEMORY_ATOM);
45314537
}
45324538
term t = term_create_empty_binary(0, &ctx->heap, ctx->global);
4539+
if (UNLIKELY(term_is_invalid_term(t))) {
4540+
RAISE_ERROR(OUT_OF_MEMORY_ATOM);
4541+
}
45334542

45344543
ctx->bs = t;
45354544
ctx->bs_offset = 0;
@@ -4595,6 +4604,9 @@ HOT_FUNC int scheduler_entry_point(GlobalContext *glb)
45954604
TRACE("bs_append/8, fail=%u size=" AVM_INT_FMT " unit=%u src=0x%" TERM_X_FMT " dreg=%c%i\n", (unsigned) fail, size_val, (unsigned) unit, src, T_DEST_REG(dreg));
45964605
src = x_regs[live];
45974606
term t = term_create_empty_binary(src_size + size_val / 8, &ctx->heap, ctx->global);
4607+
if (UNLIKELY(term_is_invalid_term(t))) {
4608+
RAISE_ERROR(OUT_OF_MEMORY_ATOM);
4609+
}
45984610
memcpy((void *) term_binary_data(t), (void *) term_binary_data(src), src_size);
45994611

46004612
ctx->bs = t;
@@ -4641,8 +4653,10 @@ HOT_FUNC int scheduler_entry_point(GlobalContext *glb)
46414653
RAISE_ERROR(OUT_OF_MEMORY_ATOM);
46424654
}
46434655
DECODE_COMPACT_TERM(src, src_pc)
4644-
term t = term_create_empty_binary(src_size + size_val / 8, &ctx->heap, ctx->global);
4645-
memcpy((void *) term_binary_data(t), (void *) term_binary_data(src), src_size);
4656+
term t = term_reuse_binary(src, src_size + size_val / 8, &ctx->heap, ctx->global);
4657+
if (UNLIKELY(term_is_invalid_term(t))) {
4658+
RAISE_ERROR(OUT_OF_MEMORY_ATOM);
4659+
}
46464660

46474661
ctx->bs = t;
46484662
ctx->bs_offset = src_size * 8;
@@ -6736,6 +6750,7 @@ HOT_FUNC int scheduler_entry_point(GlobalContext *glb)
67366750
// Verify parameters and compute binary size in first iteration
67376751
#ifdef IMPL_EXECUTE_LOOP
67386752
size_t binary_size = 0;
6753+
bool reuse_binary = false;
67396754
#endif
67406755
for (size_t j = 0; j < nb_segments; j++) {
67416756
term atom_type;
@@ -6824,6 +6839,9 @@ HOT_FUNC int scheduler_entry_point(GlobalContext *glb)
68246839
// We only support src as a binary of bytes here.
68256840
segment_size = term_binary_size(src);
68266841
segment_unit = 8;
6842+
if (atom_type == PRIVATE_APPEND_ATOM && j == 0) {
6843+
reuse_binary = true;
6844+
}
68276845
} else {
68286846
VERIFY_IS_INTEGER(size, "bs_create_bin/6", fail);
68296847
avm_int_t signed_size_value = term_to_int(size);
@@ -6864,7 +6882,16 @@ HOT_FUNC int scheduler_entry_point(GlobalContext *glb)
68646882
if (UNLIKELY(memory_ensure_free_with_roots(ctx, alloc + term_binary_heap_size(binary_size / 8), live, x_regs, MEMORY_CAN_SHRINK) != MEMORY_GC_OK)) {
68656883
RAISE_ERROR(OUT_OF_MEMORY_ATOM);
68666884
}
6867-
term t = term_create_empty_binary(binary_size / 8, &ctx->heap, ctx->global);
6885+
term t;
6886+
if (!reuse_binary) {
6887+
t = term_create_empty_binary(binary_size / 8, &ctx->heap, ctx->global);
6888+
if (UNLIKELY(term_is_invalid_term(t))) {
6889+
RAISE_ERROR(OUT_OF_MEMORY_ATOM);
6890+
}
6891+
} else {
6892+
// t will be created in the first segment (PRIVATE_APPEND case)
6893+
t = term_invalid_term();
6894+
}
68686895
size_t offset = 0;
68696896

68706897
for (size_t j = 0; j < nb_segments; j++) {
@@ -6968,9 +6995,17 @@ HOT_FUNC int scheduler_entry_point(GlobalContext *glb)
69686995
TRACE("bs_create_bin/6: current offset (%d) is not evenly divisible by 8\n", (int) offset);
69696996
RAISE_ERROR(UNSUPPORTED_ATOM);
69706997
}
6998+
size_t src_size = term_binary_size(src);
6999+
if (reuse_binary && j == 0) {
7000+
t = term_reuse_binary(src, binary_size / 8, &ctx->heap, ctx->global);
7001+
if (UNLIKELY(term_is_invalid_term(t))) {
7002+
RAISE_ERROR(OUT_OF_MEMORY_ATOM);
7003+
}
7004+
segment_size = src_size * 8;
7005+
break;
7006+
}
69717007
uint8_t *dst = (uint8_t *) term_binary_data(t) + (offset / 8);
69727008
const uint8_t *bin = (const uint8_t *) term_binary_data(src);
6973-
size_t binary_size = term_binary_size(src);
69747009
if (size != ALL_ATOM) {
69757010
VERIFY_IS_INTEGER(size, "bs_create_bin/6", fail);
69767011
avm_int_t signed_size_value = term_to_int(size);
@@ -6979,17 +7014,17 @@ HOT_FUNC int scheduler_entry_point(GlobalContext *glb)
69797014
RAISE_ERROR(BADARG_ATOM);
69807015
}
69817016
size_value = (size_t) signed_size_value;
6982-
if (size_value > binary_size) {
7017+
if (size_value > src_size) {
69837018
if (fail == 0) {
69847019
RAISE_ERROR(BADARG_ATOM);
69857020
} else {
69867021
JUMP_TO_LABEL(mod, fail);
69877022
}
69887023
}
6989-
binary_size = size_value;
7024+
src_size = size_value;
69907025
}
6991-
memcpy(dst, bin, binary_size);
6992-
segment_size = binary_size * 8;
7026+
memcpy(dst, bin, src_size);
7027+
segment_size = src_size * 8;
69937028
break;
69947029
}
69957030
default:

0 commit comments

Comments
 (0)