Skip to content

Commit

Permalink
Merge pull request #1417 from pguyot/w52/add-externalterm_to_term_wit…
Browse files Browse the repository at this point in the history
…h_roots

Add externalterm to term with roots

These changes are made under both the "Apache 2.0" and the "GNU Lesser General
Public License 2.1 or later" license terms (dual license).

SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
  • Loading branch information
bettio committed Dec 28, 2024
2 parents 6e79ed1 + 8422b0a commit efd40ba
Show file tree
Hide file tree
Showing 4 changed files with 78 additions and 62 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Added `net:gethostname/0` on platforms with gethostname(3).
- Added `socket:getopt/2`
- Added `atomvm:subprocess/4` to perform pipe/fork/execve on POSIX platforms
- Added `externalterm_to_term_with_roots` to efficiently preserve roots when allocating memory for external terms.

### Changed

- Removed `externalterm_to_term_copy` added in [0.6.5] and introduced flags to `externalterm_to_term` to perform copy.

### Fixed

Expand Down
67 changes: 27 additions & 40 deletions src/libAtomVM/externalterm.c
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@

#include "bitstring.h"
#include "defaultatoms.h"
#include "memory.h"
#include "term.h"
#include "unicode.h"
#include "utils.h"
Expand Down Expand Up @@ -81,20 +82,8 @@ static size_t compute_external_size(term t, GlobalContext *glb);
static int externalterm_from_term(uint8_t **buf, size_t *len, term t, GlobalContext *glb);
static int serialize_term(uint8_t *buf, term t, GlobalContext *glb);

/**
* @brief
* @param external_term buffer containing external term
* @param size size of the external_term
* @param ctx current context in which terms may be stored
* @param opts additional opts, such as ExternalTermToHeapFragment for storing parsed
* terms in a heap fragment.
* are stored in the context heap.
* @param bytes_read the number of bytes read off external_term in order to yield a term
* @param copy whether to copy binary data and atom strings (pass `true', unless `external_term' is a const binary and will not be deallocated)
* @return the parsed term
*/
static term externalterm_to_term_internal(const void *external_term, size_t size, Context *ctx,
ExternalTermOpts opts, size_t *bytes_read, bool copy)
term externalterm_to_term_with_roots(const void *external_term, size_t size, Context *ctx,
ExternalTermFlags flags, size_t *bytes_read, size_t num_roots, term *roots)
{
const uint8_t *external_term_buf = (const uint8_t *) external_term;

Expand All @@ -107,65 +96,63 @@ static term externalterm_to_term_internal(const void *external_term, size_t size
}

size_t eterm_size;
int heap_usage = calculate_heap_usage(external_term_buf + 1, size - 1, &eterm_size, copy);
int heap_usage = calculate_heap_usage(external_term_buf + 1, size - 1, &eterm_size, flags & ExternalTermCopy);
if (heap_usage == INVALID_TERM_SIZE) {
return term_invalid_term();
}

term result;
if (opts & ExternalTermToHeapFragment) {
if (flags & ExternalTermToHeapFragment) {
// We need to allocate fragments as reading external terms from modules
// is not accounted for by the compiler when it emits test_heap opcodes
Heap heap;
if (UNLIKELY(memory_init_heap(&heap, heap_usage) != MEMORY_GC_OK)) {
return term_invalid_term();
}
result = parse_external_terms(external_term_buf + 1, &eterm_size, copy, &heap, ctx->global);
result = parse_external_terms(external_term_buf + 1, &eterm_size, flags & ExternalTermCopy, &heap, ctx->global);
memory_heap_append_heap(&ctx->heap, &heap);
} else {
if (UNLIKELY(memory_ensure_free(ctx, heap_usage) != MEMORY_GC_OK)) {
if (UNLIKELY(memory_ensure_free_with_roots(ctx, heap_usage, num_roots, roots, MEMORY_CAN_SHRINK) != MEMORY_GC_OK)) {
fprintf(stderr, "Unable to ensure %zu free words in heap\n", eterm_size);
return term_invalid_term();
}
result = parse_external_terms(external_term_buf + 1, &eterm_size, copy, &ctx->heap, ctx->global);
result = parse_external_terms(external_term_buf + 1, &eterm_size, flags & ExternalTermCopy, &ctx->heap, ctx->global);
}
*bytes_read = eterm_size + 1;
return result;
}

term externalterm_to_term(const void *external_term, size_t size, Context *ctx, ExternalTermOpts opts)
{
size_t bytes_read = 0;
return externalterm_to_term_internal(external_term, size, ctx, opts, &bytes_read, false);
}

term externalterm_to_term_copy(const void *external_term, size_t size, Context *ctx, ExternalTermOpts opts)
term externalterm_to_term(const void *external_term, size_t size, Context *ctx, ExternalTermFlags flags)
{
size_t bytes_read = 0;
return externalterm_to_term_internal(external_term, size, ctx, opts, &bytes_read, true);
return externalterm_to_term_with_roots(external_term, size, ctx, flags, &bytes_read, 0, NULL);
}

enum ExternalTermResult externalterm_from_binary(Context *ctx, term *dst, term binary, size_t *bytes_read)
{
if (!term_is_binary(binary)) {
return EXTERNAL_TERM_BAD_ARG;
}
//
// Copy the binary data to a buffer (in case of GC)
//
size_t len = term_binary_size(binary);
const uint8_t *data = (const uint8_t *) term_binary_data(binary);
uint8_t *buf = malloc(len);
if (IS_NULL_PTR(buf)) {
fprintf(stderr, "Unable to allocate %zu bytes for binary buffer.\n", len);
return EXTERNAL_TERM_MALLOC;
term t;
if (term_is_heap_binary(binary)) {
// If the binary is a heap binary, we will copy its buffer. We cannot simply add it
// to roots because it may be moved and pointer will change between computation of size
// and decoding.
uint8_t *buf = malloc(len);
if (IS_NULL_PTR(buf)) {
fprintf(stderr, "Unable to allocate %zu bytes for binary buffer.\n", len);
return EXTERNAL_TERM_MALLOC;
}
memcpy(buf, data, len);
t = externalterm_to_term_with_roots(buf, len, ctx, ExternalTermCopy, bytes_read, 0, NULL);
free(buf);
} else {
// If the binary is not a heap binary, the pointer will not move during GC. So we don't
// need to copy data.
t = externalterm_to_term_with_roots(data, len, ctx, ExternalTermCopy, bytes_read, 1, &binary);
}
memcpy(buf, data, len);
//
// convert
//
term t = externalterm_to_term_internal(buf, len, ctx, false, bytes_read, true);
free(buf);
if (term_is_invalid_term(t)) {
return EXTERNAL_TERM_BAD_ARG;
} else {
Expand Down
45 changes: 25 additions & 20 deletions src/libAtomVM/externalterm.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,9 @@ enum ExternalTermResult
typedef enum
{
ExternalTermNoOpts = 0,
ExternalTermToHeapFragment = 1
} ExternalTermOpts;
ExternalTermToHeapFragment = 1,
ExternalTermCopy = 2,
} ExternalTermFlags;

/**
* @brief Gets a term from external term data.
Expand All @@ -57,30 +58,34 @@ typedef enum
* @param external_term the external term that will be deserialized.
* @param size to allocate for term.
* @param ctx the context that owns the memory that will be allocated.
* @param opts if non-zero, use a heap fragment to store the generated
* terms. Otherwise, use the heap in the provided context. Note that when using the
* context heap, this function may call the GC, if there is insufficient space to
* store the generated terms.
* @param flags options to deserialize data.
* The following flags are supported:
* - `ExternalTermToHeapFragment' : use a heap fragment to store the generated
* terms (default is to use the heap in the provided context). Note that when
* using the context heap, this function may call the GC, if there is
* insufficient space to store the generated terms.
* - `ExternalTermCopy' : copy (binary and atom) data. Suitable when
* `external_term' pointer is not a const literal.
* @returns a term.
*/
term externalterm_to_term(
const void *external_term, size_t size, Context *ctx, ExternalTermOpts opts);
const void *external_term, size_t size, Context *ctx, ExternalTermFlags flags);

/**
* @brief Gets a term from external term data, and makes a copy of all data.
*
* @details Deserialize an external term from external format and returns a term.
* @param external_term the external term that will be deserialized.
* @param size to allocate for term.
* @param ctx the context that owns the memory that will be allocated.
* @param opts if non-zero, use a heap fragment to store the generated
* terms. Otherwise, use the heap in the provided context. Note that when using the
* context heap, this function may call the GC, if there is insufficient space to
* store the generated terms.
* @returns a term.
* @brief Create a term from external term data. This variant is meant to be used
* for distribution or in cases where the number of bytes read is important.
*
* @param external_term buffer containing external term
* @param size size of the external_term
* @param ctx current context in which terms may be stored
* @param flags additional flags (see above)
* @param bytes_read the number of bytes read off external_term in order to yield a term
* @param num_roots number of roots when invoking GC
* @param roots roots to preserve when invoking GC
* @return the parsed term
*/
term externalterm_to_term_copy(
const void *external_term, size_t size, Context *ctx, ExternalTermOpts opts);
term externalterm_to_term_with_roots(const void *external_term, size_t size, Context *ctx,
ExternalTermFlags flags, size_t *bytes_read, size_t num_roots, term *roots);

/**
* @brief Create a term from a binary.
Expand Down
23 changes: 21 additions & 2 deletions src/libAtomVM/term.h
Original file line number Diff line number Diff line change
Expand Up @@ -407,9 +407,9 @@ static inline bool term_is_binary(term t)
}

/**
* @brief Checks if a term is a binary
* @brief Checks if a term is a refc binary
*
* @details Returns \c true if a term is a binary stored on the heap, otherwise \c false.
* @details Returns \c true if a term is a ref-counted binary, otherwise \c false.
* @param t the term that will be checked.
* @return \c true if check succeeds, \c false otherwise.
*/
Expand All @@ -425,6 +425,25 @@ static inline bool term_is_refc_binary(term t)
return false;
}

/**
* @brief Checks if a term is a heap binary
*
* @details Returns \c true if a term is a binary stored on the heap, otherwise \c false.
* @param t the term that will be checked.
* @return \c true if check succeeds, \c false otherwise.
*/
static inline bool term_is_heap_binary(term t)
{
/* boxed: 10 */
if (term_is_boxed(t)) {
const term *boxed_value = term_to_const_term_ptr(t);
int masked_value = boxed_value[0] & TERM_BOXED_TAG_MASK;
return masked_value == TERM_BOXED_HEAP_BINARY;
}

return false;
}

static inline bool term_refc_binary_is_const(term t)
{
const term *boxed_value = term_to_const_term_ptr(t);
Expand Down

0 comments on commit efd40ba

Please sign in to comment.