diff --git a/klib/mbedtls.c b/klib/mbedtls.c index 77c81d214..3be5f3ca8 100644 --- a/klib/mbedtls.c +++ b/klib/mbedtls.c @@ -295,13 +295,7 @@ void mbedtls_platform_zeroize( void *buf, size_t len ) void *mbedtls_calloc(size_t n, size_t s) { - /* To maintain the malloc/free interface with mcache, allocations must stay - within the range of objcaches and not fall back to parent allocs. */ size_t total = n * s; - if (total > U64_FROM_BIT(MAX_MCACHE_ORDER)) { - rprintf("%s: %ld bytes exceeds max alloc order\n", __func__, total); - return 0; - } void *p = allocate(tls.h, total); if (p != INVALID_ADDRESS) { runtime_memset(p, 0, total); diff --git a/platform/pc/service.c b/platform/pc/service.c index 971c191dc..cc4d74ac0 100644 --- a/platform/pc/service.c +++ b/platform/pc/service.c @@ -243,7 +243,7 @@ static void __attribute__((noinline)) init_service_new_stack() kernel_heaps kh = get_kernel_heaps(); early_init_debug("in init_service_new_stack"); init_page_tables((heap)heap_linear_backed(kh)); - bytes pagesize = is_low_memory_machine(kh) ? PAGESIZE : PAGESIZE_2M; + bytes pagesize = is_low_memory_machine() ? PAGESIZE : PAGESIZE_2M; init_tuples(locking_heap_wrapper(heap_general(kh), allocate_tagged_region(kh, tag_table_tuple, pagesize))); init_symbols(allocate_tagged_region(kh, tag_symbol, pagesize), heap_locked(kh)); diff --git a/platform/riscv-virt/service.c b/platform/riscv-virt/service.c index 17257140d..8f4ef9401 100644 --- a/platform/riscv-virt/service.c +++ b/platform/riscv-virt/service.c @@ -103,7 +103,7 @@ static void __attribute__((noinline)) init_service_new_stack(void) init_page_tables((heap)heap_linear_backed(kh)); /* mmu init complete; unmap temporary identity map */ unmap(PHYSMEM_BASE, INIT_IDENTITY_SIZE); - bytes pagesize = is_low_memory_machine(kh) ? PAGESIZE : PAGESIZE_2M; + bytes pagesize = is_low_memory_machine() ? PAGESIZE : PAGESIZE_2M; init_tuples(locking_heap_wrapper(heap_general(kh), allocate_tagged_region(kh, tag_table_tuple, pagesize))); init_symbols(allocate_tagged_region(kh, tag_symbol, pagesize), heap_locked(kh)); diff --git a/platform/virt/service.c b/platform/virt/service.c index ab2fdf633..eb5d6e597 100644 --- a/platform/virt/service.c +++ b/platform/virt/service.c @@ -254,7 +254,7 @@ static void __attribute__((noinline)) init_service_new_stack(void) init_debug("in init_service_new_stack\n"); kernel_heaps kh = get_kernel_heaps(); init_page_tables((heap)heap_linear_backed(kh)); - bytes pagesize = is_low_memory_machine(kh) ? PAGESIZE : PAGESIZE_2M; + bytes pagesize = is_low_memory_machine() ? PAGESIZE : PAGESIZE_2M; init_tuples(locking_heap_wrapper(heap_general(kh), allocate_tagged_region(kh, tag_table_tuple, pagesize))); init_symbols(allocate_tagged_region(kh, tag_symbol, pagesize), heap_locked(kh)); diff --git a/rules.mk b/rules.mk index 888a816ef..6b2e3af80 100644 --- a/rules.mk +++ b/rules.mk @@ -248,6 +248,8 @@ ifeq ($1,kernel.elf) $(call cmd,mvdis) endif else +LDFLAGS-$1.dbg= $$(LDFLAGS-$1) +LIBS-$1.dbg= $$(LIBS-$1) $$(PROG-$1).dbg: $$(OBJS-$1) @$(MKDIR) $$(dir $$@) $$(call cmd,ld) diff --git a/src/config.h b/src/config.h index 9612b1fc5..be4c83c09 100644 --- a/src/config.h +++ b/src/config.h @@ -69,6 +69,7 @@ #define USER_MEMORY_RESERVE (4 * MB) #define LOW_MEMORY_THRESHOLD (64 * MB) #define SG_FRAG_BYTE_THRESHOLD (128*KB) +#define PAGECACHE_LOWMEM_CONTIGUOUS_PAGESIZE (128*KB) /* don't go below this minimum amount of physical memory when inflating balloon */ #define BALLOON_MEMORY_MINIMUM (16 * MB) @@ -81,6 +82,7 @@ /* must be large enough for vendor code that use malloc/free interface */ #define MAX_MCACHE_ORDER 16 +#define MAX_LOWMEM_MCACHE_ORDER 11 /* ftrace buffer size */ #define DEFAULT_TRACE_ARRAY_SIZE (512ULL << 20) diff --git a/src/kernel/init.c b/src/kernel/init.c index 93969af55..ca3202e04 100644 --- a/src/kernel/init.c +++ b/src/kernel/init.c @@ -75,9 +75,10 @@ void init_kernel_heaps(void) #endif assert(heaps.linear_backed != INVALID_ADDRESS); - bytes pagesize = is_low_memory_machine(&heaps) ? - U64_FROM_BIT(MAX_MCACHE_ORDER + 1) : PAGESIZE_2M; - heaps.general = allocate_mcache(&bootstrap, (heap)heaps.linear_backed, 5, MAX_MCACHE_ORDER, + boolean is_lowmem = is_low_memory_machine(); + int max_mcache_order = is_lowmem ? MAX_LOWMEM_MCACHE_ORDER : MAX_MCACHE_ORDER; + bytes pagesize = is_lowmem ? U64_FROM_BIT(max_mcache_order + 1) : PAGESIZE_2M; + heaps.general = allocate_mcache(&bootstrap, (heap)heaps.linear_backed, 5, max_mcache_order, pagesize); assert(heaps.general != INVALID_ADDRESS); diff --git a/src/kernel/kernel.h b/src/kernel/kernel.h index 050c9d2dd..38921bbb8 100644 --- a/src/kernel/kernel.h +++ b/src/kernel/kernel.h @@ -808,9 +808,9 @@ boolean mm_register_mem_cleaner(mem_cleaner cleaner); kernel_heaps get_kernel_heaps(void); -static inline boolean is_low_memory_machine(kernel_heaps kh) +static inline boolean is_low_memory_machine(void) { - return (heap_total((heap)heap_physical(kh)) < LOW_MEMORY_THRESHOLD); + return (heap_total((heap)heap_physical(get_kernel_heaps())) < LOW_MEMORY_THRESHOLD); } struct filesystem *get_root_fs(void); diff --git a/src/kernel/pagecache.c b/src/kernel/pagecache.c index 6c4b3dc7c..7690e9048 100644 --- a/src/kernel/pagecache.c +++ b/src/kernel/pagecache.c @@ -1780,7 +1780,10 @@ void init_pagecache(heap general, heap contiguous, heap physical, u64 pagesize) assert(pagesize == U64_FROM_BIT(pc->page_order)); pc->h = general; #ifdef KERNEL - pc->contiguous = (heap)allocate_objcache(general, contiguous, PAGESIZE, PAGESIZE_2M, true); + pc->contiguous = (heap)allocate_objcache(general, contiguous, PAGESIZE, + is_low_memory_machine() ? + PAGECACHE_LOWMEM_CONTIGUOUS_PAGESIZE : + PAGESIZE_2M, true); #else pc->contiguous = contiguous; #endif diff --git a/src/net/lwip.h b/src/net/lwip.h index a2705c887..16ba02057 100644 --- a/src/net/lwip.h +++ b/src/net/lwip.h @@ -17,6 +17,7 @@ #include #define MAX_LWIP_ALLOC_ORDER 16 +#define MAX_LOWMEM_LWIP_ALLOC_ORDER 11 status direct_connect(heap h, ip_addr_t *addr, u16 port, connection_handler ch); diff --git a/src/net/net.c b/src/net/net.c index 65b10c54d..f17541e5f 100644 --- a/src/net/net.c +++ b/src/net/net.c @@ -85,9 +85,6 @@ void lwip_debug(char * format, ...) void *lwip_allocate(u64 size) { - /* To maintain the malloc/free interface with mcache, allocations must stay - within the range of objcaches and not fall back to parent allocs. */ - assert(size <= U64_FROM_BIT(MAX_LWIP_ALLOC_ORDER)); void *p = allocate(lwip_heap, size); return ((p != INVALID_ADDRESS) ? p : 0); } @@ -388,9 +385,10 @@ void init_net(kernel_heaps kh) { heap h = heap_general(kh); heap backed = (heap)heap_linear_backed(kh); - bytes pagesize = is_low_memory_machine(kh) ? - U64_FROM_BIT(MAX_LWIP_ALLOC_ORDER + 1) : PAGESIZE_2M; - lwip_heap = allocate_mcache(h, backed, 5, MAX_LWIP_ALLOC_ORDER, pagesize); + boolean is_lowmem = is_low_memory_machine(); + int lwip_alloc_order = is_lowmem ? MAX_LOWMEM_LWIP_ALLOC_ORDER : MAX_LWIP_ALLOC_ORDER; + bytes pagesize = is_lowmem ? U64_FROM_BIT(lwip_alloc_order + 1) : PAGESIZE_2M; + lwip_heap = allocate_mcache(h, backed, 5, lwip_alloc_order, pagesize); assert(lwip_heap != INVALID_ADDRESS); lwip_heap = locking_heap_wrapper(h, lwip_heap); assert(lwip_heap != INVALID_ADDRESS); diff --git a/src/runtime/heap/mcache.c b/src/runtime/heap/mcache.c index 6bb66c9b6..920bdc5de 100644 --- a/src/runtime/heap/mcache.c +++ b/src/runtime/heap/mcache.c @@ -27,8 +27,18 @@ typedef struct mcache { u64 allocated; u64 parent_threshold; tuple mgmt; + table fallbacks; } *mcache; +/* Mix each set of address bits between PAGELOG and 23 for a more even + distribution among the four (default) buckets in the fallback table. */ +static key fallback_key(void *p) +{ + u64 a = u64_from_pointer(p); + return ((a >> 12) ^ (a >> 14)) ^ ((a >> 16) ^ (a >> 18)) ^ + ((a >> 20) ^ (a >> 22)); +} + u64 mcache_alloc(heap h, bytes b) { mcache m = (mcache)h; @@ -41,19 +51,29 @@ u64 mcache_alloc(heap h, bytes b) rputs(": "); #endif if (b > m->parent_threshold) { + if (!m->fallbacks) { + m->fallbacks = allocate_table(m->meta, fallback_key, pointer_equal); + if (m->fallbacks == INVALID_ADDRESS) { + rputs("mcache_alloc: failed to allocate fallbacks table\n"); + return INVALID_PHYSICAL; + } + } u64 size = pad(b, m->parent->pagesize); u64 a = allocate_u64(m->parent, size); if (a != INVALID_PHYSICAL) { m->allocated += size; + table_set(m->fallbacks, pointer_from_u64(a), pointer_from_u64(b)); + } #ifdef MCACHE_DEBUG - rputs("fallback to parent, size "); - print_u64(size); - rputs(", addr "); - print_u64(a); - rputs("\n"); + rputs("fallback to parent, size "); + print_u64(b); + rputs(", padded to "); + print_u64(size); + rputs(", addr "); + print_u64(a); + rputs("\n"); #endif - return a; - } + return a; } /* Could become a binary search if search set is large... */ @@ -102,15 +122,52 @@ void mcache_dealloc(heap h, u64 a, bytes b) #endif mcache m = (mcache)h; + u64 size = 0; + /* The fallback table tracks allocations that fall back to the parent + heap. This allows use of a "malloc-style" interface to the mcache in + which the allocation size is not specified on a deallocate. The cost of + this is a table insertion when making a fallback allocation and a table + lookup/removal when deallocating a fallback allocation of a known size, + or on any deallocation (free) of an unknown size. */ if (b != -1ull && b > m->parent_threshold) { - u64 size = pad(b, m->parent->pagesize); + if (!m->fallbacks) { + rputs("mcache_dealloc: fallbacks table not allocated\n"); + return; + } + size = u64_from_pointer(table_remove(m->fallbacks, pointer_from_u64(a))); + if (!size) { + rputs("mcache_dealloc: address "); + print_u64(a); + rputs(" (size "); + print_u64(b); + rputs(") not found in fallback table\n"); + return; + } + if (size != b) { + rputs("mcache_dealloc: address "); + print_u64(a); + rputs(" (given size "); + print_u64(b); + rputs(") does not match alloc size ("); + print_u64(size); + rputs("\n"); + } + size = pad(size, m->parent->pagesize); + } + if (b == -1ull && m->fallbacks) { + size = u64_from_pointer(table_remove(m->fallbacks, pointer_from_u64(a))); + if (size > 0) + size = pad(size, m->parent->pagesize); + } + + if (size) { #ifdef MCACHE_DEBUG rputs("dealloc size "); print_u64(b); rputs(", pagesize "); print_u64(m->parent->pagesize); - rputs(", parent alloc, padded size "); + rputs(", parent alloc size "); print_u64(size); rputs("\n"); #endif @@ -178,6 +235,19 @@ void destroy_mcache(heap h) if (o) o->destroy(o); } + if (m->fallbacks) { + table_foreach(m->fallbacks, p, size) { +#ifdef MCACHE_DEBUG + rputs(" dealloc fallback @ "); + print_u64(u64_from_pointer(p)); + rputs(", size "); + print_u64(u64_from_pointer(size)); + rputs("\n"); +#endif + deallocate(m->parent, p, pad(u64_from_pointer(size), m->parent->pagesize)); + } + deallocate_table(m->fallbacks); + } deallocate(m->meta, m, sizeof(struct mcache)); } @@ -291,6 +361,7 @@ heap allocate_mcache(heap meta, heap parent, int min_order, int max_order, bytes m->allocated = 0; m->parent_threshold = U64_FROM_BIT(max_order); m->mgmt = 0; + m->fallbacks = 0; for(int i = 0, order = min_order; order <= max_order; i++, order++) { u64 obj_size = U64_FROM_BIT(order); diff --git a/src/virtio/virtio_net.c b/src/virtio/virtio_net.c index c56b4729e..0c1843f15 100644 --- a/src/virtio/virtio_net.c +++ b/src/virtio/virtio_net.c @@ -79,7 +79,7 @@ typedef struct xpbuf struct pbuf_custom p; vnet vn; closure_struct(vnet_input, input); -} *xpbuf; +} __attribute__((aligned(8))) *xpbuf; closure_function(1, 1, void, tx_complete, @@ -277,6 +277,12 @@ static err_t virtioif_init(struct netif *netif) return ERR_OK; } +static inline u64 find_page_size(bytes each, int n) +{ + /* extra element to cover objcache meta */ + return 1ul << find_order(each * (n + 1)); +} + static void virtio_net_attach(vtdev dev) { //u32 badness = VIRTIO_F_BAD_FEATURE | VIRTIO_NET_F_CSUM | VIRTIO_NET_F_GUEST_CSUM | @@ -294,11 +300,6 @@ static void virtio_net_attach(vtdev dev) sizeof(struct virtio_net_hdr_mrg_rxbuf) : sizeof(struct virtio_net_hdr); vn->rxbuflen = pad(vn->net_header_len + sizeof(struct eth_hdr) + sizeof(struct eth_vlan_hdr) + 1500, 8); /* padding to make xpbuf structures aligned to 8 bytes */ - virtio_net_debug("%s: net_header_len %d, rxbuflen %d\n", __func__, vn->net_header_len, vn->rxbuflen); - vn->rxbuffers = allocate_objcache(h, (heap)contiguous, vn->rxbuflen + sizeof(struct xpbuf), - PAGESIZE_2M, true); - vn->txhandlers = allocate_objcache(h, (heap)contiguous, sizeof(closure_struct_type(tx_complete)), - PAGESIZE, true); mm_register_mem_cleaner(init_closure(&vn->mem_cleaner, vnet_mem_cleaner)); /* rx = 0, tx = 1, ctl = 2 by page 53 of http://docs.oasis-open.org/virtio/virtio/v1.0/cs01/virtio-v1.0-cs01.pdf */ @@ -306,12 +307,22 @@ static void virtio_net_attach(vtdev dev) virtio_alloc_virtqueue(dev, "virtio net tx", 1, &vn->txq); virtqueue_set_polling(vn->txq, true); virtio_alloc_virtqueue(dev, "virtio net rx", 0, &vn->rxq); - // just need vn->net_header_len contig bytes really + bytes rx_allocsize = vn->rxbuflen + sizeof(struct xpbuf); + bytes rxbuffers_pagesize = find_page_size(rx_allocsize, virtqueue_entries(vn->rxq)); + bytes tx_handler_size = sizeof(closure_struct_type(tx_complete)); + bytes tx_handler_pagesize = find_page_size(tx_handler_size, virtqueue_entries(vn->txq)); + virtio_net_debug("%s: net_header_len %d, rx_allocsize %d, rxbuffers_pagesize %d " + "tx_handler_size %d tx_handler_pagesize %d\n", __func__, vn->net_header_len, + rx_allocsize, rxbuffers_pagesize, tx_handler_size, tx_handler_pagesize); + vn->rxbuffers = allocate_objcache(h, (heap)contiguous, rx_allocsize, rxbuffers_pagesize, true); + assert(vn->rxbuffers != INVALID_ADDRESS); + vn->txhandlers = allocate_objcache(h, (heap)contiguous, tx_handler_size, tx_handler_pagesize, true); + assert(vn->txhandlers != INVALID_ADDRESS); vn->empty = alloc_map(contiguous, contiguous->h.pagesize, &vn->empty_phys); assert(vn->empty != INVALID_ADDRESS); - for (int i = 0; i < vn->net_header_len; i++) ((u8 *)vn->empty)[i] = 0; + for (int i = 0; i < vn->net_header_len; i++) + ((u8 *)vn->empty)[i] = 0; vn->n->state = vn; - // initialization complete vtdev_set_status(dev, VIRTIO_CONFIG_STATUS_DRIVER_OK); netif_add(vn->n, 0, 0, 0, diff --git a/test/runtime/Makefile b/test/runtime/Makefile index 5e481f237..7563f5cda 100644 --- a/test/runtime/Makefile +++ b/test/runtime/Makefile @@ -1,4 +1,5 @@ # these are built for the target platform (Linux x86_64) +DEBUG_STRIP=y PROGRAMS= \ aio \ dup \