Skip to content

Commit

Permalink
Add support for sized deallocation.
Browse files Browse the repository at this point in the history
This adds a new `sdallocx` function to the external API, allowing the
size to be passed by the caller.  It avoids some extra reads in the
thread cache fast path.  In the case where stats are enabled, this
avoids the work of calculating the size from the pointer.

An assertion validates the size that's passed in, so enabling debugging
will allow users of the API to debug cases where an incorrect size is
passed in.

The performance win for a contrived microbenchmark doing an allocation
and immediately freeing it is ~10%.  It may have a different impact on a
real workload.

Closes #28
  • Loading branch information
thestinger authored and Jason Evans committed Sep 9, 2014
1 parent c3f8650 commit 4cfe551
Show file tree
Hide file tree
Showing 10 changed files with 201 additions and 5 deletions.
1 change: 1 addition & 0 deletions Makefile.in
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,7 @@ TESTS_UNIT_AUX := $(srcroot)test/unit/prof_accum_a.c \
$(srcroot)test/unit/prof_accum_b.c
TESTS_INTEGRATION := $(srcroot)test/integration/aligned_alloc.c \
$(srcroot)test/integration/allocated.c \
$(srcroot)test/integration/sdallocx.c \
$(srcroot)test/integration/mallocx.c \
$(srcroot)test/integration/MALLOCX_ARENA.c \
$(srcroot)test/integration/posix_memalign.c \
Expand Down
2 changes: 1 addition & 1 deletion configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -452,7 +452,7 @@ AC_PROG_RANLIB
AC_PATH_PROG([LD], [ld], [false], [$PATH])
AC_PATH_PROG([AUTOCONF], [autoconf], [false], [$PATH])

public_syms="malloc_conf malloc_message malloc calloc posix_memalign aligned_alloc realloc free mallocx rallocx xallocx sallocx dallocx nallocx mallctl mallctlnametomib mallctlbymib malloc_stats_print malloc_usable_size"
public_syms="malloc_conf malloc_message malloc calloc posix_memalign aligned_alloc realloc free mallocx rallocx xallocx sallocx dallocx sdallocx nallocx mallctl mallctlnametomib mallctlbymib malloc_stats_print malloc_usable_size"

dnl Check for allocator-related functions that should be wrapped.
AC_CHECK_FUNC([memalign],
Expand Down
19 changes: 18 additions & 1 deletion doc/jemalloc.xml.in
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
<refname>xallocx</refname>
<refname>sallocx</refname>
<refname>dallocx</refname>
<refname>sdallocx</refname>
<refname>nallocx</refname>
<refname>mallctl</refname>
<refname>mallctlnametomib</refname>
Expand Down Expand Up @@ -120,6 +121,12 @@
<paramdef>void *<parameter>ptr</parameter></paramdef>
<paramdef>int <parameter>flags</parameter></paramdef>
</funcprototype>
<funcprototype>
<funcdef>void <function>sdallocx</function></funcdef>
<paramdef>void *<parameter>ptr</parameter></paramdef>
<paramdef>size_t <parameter>size</parameter></paramdef>
<paramdef>int <parameter>flags</parameter></paramdef>
</funcprototype>
<funcprototype>
<funcdef>size_t <function>nallocx</function></funcdef>
<paramdef>size_t <parameter>size</parameter></paramdef>
Expand Down Expand Up @@ -228,7 +235,8 @@
<function>rallocx<parameter/></function>,
<function>xallocx<parameter/></function>,
<function>sallocx<parameter/></function>,
<function>dallocx<parameter/></function>, and
<function>dallocx<parameter/></function>,
<function>sdallocx<parameter/></function>, and
<function>nallocx<parameter/></function> functions all have a
<parameter>flags</parameter> argument that can be used to specify
options. The functions only check the options that are contextually
Expand Down Expand Up @@ -312,6 +320,15 @@
memory referenced by <parameter>ptr</parameter> to be made available for
future allocations.</para>

<para>The <function>sdallocx<parameter/></function> function is an
extension of <function>dallocx<parameter/></function> with a
<parameter>size</parameter> parameter to allow the caller to pass in the
allocation size as an optimization. The minimum valid input size is the
original requested size of the allocation, and the maximum valid input
size is the corresponding value returned by
<function>nallocx<parameter/></function> or
<function>sallocx<parameter/></function>.</para>

<para>The <function>nallocx<parameter/></function> function allocates no
memory, but it performs the same size computation as the
<function>mallocx<parameter/></function> function, and returns the real
Expand Down
33 changes: 30 additions & 3 deletions include/jemalloc/internal/arena.h
Original file line number Diff line number Diff line change
Expand Up @@ -488,6 +488,7 @@ void arena_prof_tctx_set(const void *ptr, prof_tctx_t *tctx);
void *arena_malloc(arena_t *arena, size_t size, bool zero, bool try_tcache);
size_t arena_salloc(const void *ptr, bool demote);
void arena_dalloc(arena_chunk_t *chunk, void *ptr, bool try_tcache);
void arena_sdalloc(arena_chunk_t *chunk, void *ptr, size_t size, bool try_tcache);
#endif

#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_))
Expand Down Expand Up @@ -1139,9 +1140,7 @@ arena_dalloc(arena_chunk_t *chunk, void *ptr, bool try_tcache)
if ((mapbits & CHUNK_MAP_LARGE) == 0) {
/* Small allocation. */
if (try_tcache && (tcache = tcache_get(false)) != NULL) {
size_t binind;

binind = arena_ptr_small_binind_get(ptr, mapbits);
size_t binind = arena_ptr_small_binind_get(ptr, mapbits);
tcache_dalloc_small(tcache, ptr, binind);
} else
arena_dalloc_small(chunk->arena, chunk, ptr, pageind);
Expand All @@ -1157,6 +1156,34 @@ arena_dalloc(arena_chunk_t *chunk, void *ptr, bool try_tcache)
arena_dalloc_large(chunk->arena, chunk, ptr);
}
}

JEMALLOC_ALWAYS_INLINE void
arena_sdalloc(arena_chunk_t *chunk, void *ptr, size_t size, bool try_tcache)
{
tcache_t *tcache;

assert(ptr != NULL);
assert(CHUNK_ADDR2BASE(ptr) != ptr);

if (size < PAGE) {
/* Small allocation. */
if (try_tcache && (tcache = tcache_get(false)) != NULL) {
size_t binind = small_size2bin(size);
tcache_dalloc_small(tcache, ptr, binind);
} else {
size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
arena_dalloc_small(chunk->arena, chunk, ptr, pageind);
}
} else {
assert(((uintptr_t)ptr & PAGE_MASK) == 0);

if (try_tcache && size <= tcache_maxclass && (tcache =
tcache_get(false)) != NULL) {
tcache_dalloc_large(tcache, ptr, size);
} else
arena_dalloc_large(chunk->arena, chunk, ptr);
}
}
# endif /* JEMALLOC_ARENA_INLINE_C */
#endif

Expand Down
26 changes: 26 additions & 0 deletions include/jemalloc/internal/jemalloc_internal.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -634,8 +634,10 @@ size_t ivsalloc(const void *ptr, bool demote);
size_t u2rz(size_t usize);
size_t p2rz(const void *ptr);
void idalloct(void *ptr, bool try_tcache);
void isdalloct(void *ptr, size_t size, bool try_tcache);
void idalloc(void *ptr);
void iqalloc(void *ptr, bool try_tcache);
void isqalloc(void *ptr, size_t size, bool try_tcache);
void *iralloct_realign(void *ptr, size_t oldsize, size_t size, size_t extra,
size_t alignment, bool zero, bool try_tcache_alloc, bool try_tcache_dalloc,
arena_t *arena);
Expand Down Expand Up @@ -787,6 +789,20 @@ idalloct(void *ptr, bool try_tcache)
huge_dalloc(ptr);
}

JEMALLOC_ALWAYS_INLINE void
isdalloct(void *ptr, size_t size, bool try_tcache)
{
arena_chunk_t *chunk;

assert(ptr != NULL);

chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
if (chunk != ptr)
arena_sdalloc(chunk, ptr, size, try_tcache);
else
huge_dalloc(ptr);
}

JEMALLOC_ALWAYS_INLINE void
idalloc(void *ptr)
{
Expand All @@ -804,6 +820,16 @@ iqalloc(void *ptr, bool try_tcache)
idalloct(ptr, try_tcache);
}

JEMALLOC_ALWAYS_INLINE void
isqalloc(void *ptr, size_t size, bool try_tcache)
{

if (config_fill && opt_quarantine)
quarantine(ptr);
else
idalloct(ptr, try_tcache);
}

JEMALLOC_ALWAYS_INLINE void *
iralloct_realign(void *ptr, size_t oldsize, size_t size, size_t extra,
size_t alignment, bool zero, bool try_tcache_alloc, bool try_tcache_dalloc,
Expand Down
3 changes: 3 additions & 0 deletions include/jemalloc/internal/private_symbols.txt
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ arena_ralloc_no_move
arena_redzone_corruption
arena_run_regind
arena_salloc
arena_sdalloc
arena_stats_merge
arena_tcache_fill_small
arenas
Expand Down Expand Up @@ -228,7 +229,9 @@ iralloc
iralloct
iralloct_realign
isalloc
isdalloct
isthreaded
isqalloc
ivsalloc
ixalloc
jemalloc_postfork_child
Expand Down
1 change: 1 addition & 0 deletions include/jemalloc/jemalloc_protos.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ JEMALLOC_EXPORT size_t @je_@xallocx(void *ptr, size_t size, size_t extra,
JEMALLOC_EXPORT size_t @je_@sallocx(const void *ptr, int flags)
JEMALLOC_ATTR(pure);
JEMALLOC_EXPORT void @je_@dallocx(void *ptr, int flags);
JEMALLOC_EXPORT void @je_@sdallocx(void *ptr, size_t size, int flags);
JEMALLOC_EXPORT size_t @je_@nallocx(size_t size, int flags)
JEMALLOC_ATTR(pure);

Expand Down
44 changes: 44 additions & 0 deletions src/jemalloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -1223,6 +1223,24 @@ ifree(void *ptr, bool try_tcache)
JEMALLOC_VALGRIND_FREE(ptr, rzsize);
}

JEMALLOC_INLINE_C void
isfree(void *ptr, size_t usize, bool try_tcache)
{
UNUSED size_t rzsize JEMALLOC_CC_SILENCE_INIT(0);

assert(ptr != NULL);
assert(malloc_initialized || IS_INITIALIZER);

if (config_prof && opt_prof)
prof_free(ptr, usize);
if (config_stats)
thread_allocated_tsd_get()->deallocated += usize;
if (config_valgrind && in_valgrind)
rzsize = p2rz(ptr);
isqalloc(ptr, usize, try_tcache);
JEMALLOC_VALGRIND_FREE(ptr, rzsize);
}

void *
je_realloc(void *ptr, size_t size)
{
Expand Down Expand Up @@ -1820,6 +1838,32 @@ je_dallocx(void *ptr, int flags)
ifree(ptr, try_tcache);
}

void
je_sdallocx(void *ptr, size_t size, int flags)
{
bool try_tcache;

assert(ptr != NULL);
assert(malloc_initialized || IS_INITIALIZER);
assert(size == isalloc(ptr, config_prof));

if ((flags & MALLOCX_LG_ALIGN_MASK) == 0)
size = s2u(size);
else
size = sa2u(size, MALLOCX_ALIGN_GET_SPECIFIED(flags));

if ((flags & MALLOCX_ARENA_MASK) != 0) {
unsigned arena_ind = MALLOCX_ARENA_GET(flags);
arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
try_tcache = (chunk == ptr || chunk->arena !=
arenas[arena_ind]);
} else
try_tcache = true;

UTRACE(ptr, 0, 0);
isfree(ptr, size, try_tcache);
}

size_t
je_nallocx(size_t size, int flags)
{
Expand Down
57 changes: 57 additions & 0 deletions test/integration/sdallocx.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
#include "test/jemalloc_test.h"

#define MAXALIGN (((size_t)1) << 25)
#define NITER 4

TEST_BEGIN(test_basic)
{
void *ptr = mallocx(64, 0);
sdallocx(ptr, 64, 0);
}
TEST_END

TEST_BEGIN(test_alignment_and_size)
{
size_t nsz, sz, alignment, total;
unsigned i;
void *ps[NITER];

for (i = 0; i < NITER; i++)
ps[i] = NULL;

for (alignment = 8;
alignment <= MAXALIGN;
alignment <<= 1) {
total = 0;
for (sz = 1;
sz < 3 * alignment && sz < (1U << 31);
sz += (alignment >> (LG_SIZEOF_PTR-1)) - 1) {
for (i = 0; i < NITER; i++) {
nsz = nallocx(sz, MALLOCX_ALIGN(alignment) |
MALLOCX_ZERO);
ps[i] = mallocx(sz, MALLOCX_ALIGN(alignment) |
MALLOCX_ZERO);
total += nsz;
if (total >= (MAXALIGN << 1))
break;
}
for (i = 0; i < NITER; i++) {
if (ps[i] != NULL) {
sdallocx(ps[i], sz,
MALLOCX_ALIGN(alignment));
ps[i] = NULL;
}
}
}
}
}
TEST_END

int
main(void)
{

return (test(
test_basic,
test_alignment_and_size));
}
20 changes: 20 additions & 0 deletions test/stress/microbench.c
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,17 @@ malloc_dallocx(void)
dallocx(p, 0);
}

static void
malloc_sdallocx(void)
{
void *p = malloc(1);
if (p == NULL) {
test_fail("Unexpected malloc() failure");
return;
}
sdallocx(p, 1, 0);
}

TEST_BEGIN(test_free_vs_dallocx)
{

Expand All @@ -80,6 +91,14 @@ TEST_BEGIN(test_free_vs_dallocx)
}
TEST_END

TEST_BEGIN(test_dallocx_vs_sdallocx)
{

compare_funcs(10*1000*1000, 100*1000*1000, "dallocx", malloc_dallocx,
"sdallocx", malloc_sdallocx);
}
TEST_END

static void
malloc_mus_free(void)
{
Expand Down Expand Up @@ -135,6 +154,7 @@ main(void)
return (test(
test_malloc_vs_mallocx,
test_free_vs_dallocx,
test_dallocx_vs_sdallocx,
test_mus_vs_sallocx,
test_sallocx_vs_nallocx));
}

0 comments on commit 4cfe551

Please sign in to comment.