From 5a255f666cf9c872e37cfc85d0ab35f8be20881a Mon Sep 17 00:00:00 2001 From: Vijay Dhanraj Date: Wed, 8 Sep 2021 16:58:49 -0700 Subject: [PATCH 1/2] [Pal/LibOS] Refactor `PAL_TOPO_INFO` struct usage This commit removes topology related stuff from `PAL_CONTROL` struct and exposes it as a stand-alone struct to both PAL and LibOS. Currently `PAL_CONTROL` is used a const ptr in LibOS which prevents us from modifying the objects during checkpoint-and-restore. Signed-off-by: Vijay Dhanraj --- LibOS/shim/include/shim_internal.h | 1 + LibOS/shim/src/fs/proc/info.c | 7 +- LibOS/shim/src/fs/sys/cache_info.c | 4 +- LibOS/shim/src/fs/sys/cpu_info.c | 10 +- LibOS/shim/src/fs/sys/fs.c | 119 +++++++++++++++++++++++- LibOS/shim/src/fs/sys/node_info.c | 7 +- LibOS/shim/src/shim_init.c | 3 + LibOS/shim/src/sys/shim_sched.c | 2 +- Pal/include/pal/pal.h | 3 +- Pal/include/pal_internal.h | 2 + Pal/src/db_main.c | 8 +- Pal/src/host/Linux-SGX/enclave_ocalls.c | 2 +- Pal/src/pal-symbols | 1 + 13 files changed, 150 insertions(+), 19 deletions(-) diff --git a/LibOS/shim/include/shim_internal.h b/LibOS/shim/include/shim_internal.h index b08afc38fc..339aa6abfc 100644 --- a/LibOS/shim/include/shim_internal.h +++ b/LibOS/shim/include/shim_internal.h @@ -26,6 +26,7 @@ void* shim_init(int argc, void* args); extern int g_log_level; extern const PAL_CONTROL* g_pal_control; +extern PAL_TOPO_INFO* g_topo_info; // TODO(mkow): We should make it cross-object-inlinable, ideally by enabling LTO, less ideally by // pasting it here and making `inline`, but our current linker scripts prevent both. diff --git a/LibOS/shim/src/fs/proc/info.c b/LibOS/shim/src/fs/proc/info.c index 0395b64923..0657de52b2 100644 --- a/LibOS/shim/src/fs/proc/info.c +++ b/LibOS/shim/src/fs/proc/info.c @@ -113,7 +113,8 @@ int proc_cpuinfo_load(struct shim_dentry* dent, char** out_data, size_t* out_siz size += ret; \ } while (0) - for (size_t n = 0; n < g_pal_control->cpu_info.online_logical_cores; n++) { + uint64_t online_logical_cores = g_topo_info->online_logical_cores.resource_count; + for (size_t n = 0; n < online_logical_cores; n++) { /* Below strings must match exactly the strings retrieved from /proc/cpuinfo * (see Linux's arch/x86/kernel/cpu/proc.c) */ ADD_INFO("processor\t: %lu\n", n); @@ -122,9 +123,9 @@ int proc_cpuinfo_load(struct shim_dentry* dent, char** out_data, size_t* out_siz ADD_INFO("model\t\t: %lu\n", g_pal_control->cpu_info.cpu_model); ADD_INFO("model name\t: %s\n", g_pal_control->cpu_info.cpu_brand); ADD_INFO("stepping\t: %lu\n", g_pal_control->cpu_info.cpu_stepping); - ADD_INFO("physical id\t: %d\n", g_pal_control->cpu_info.cpu_socket[n]); + ADD_INFO("physical id\t: %d\n", g_topo_info->core_topology[n].cpu_socket); ADD_INFO("core id\t\t: %lu\n", n); - ADD_INFO("cpu cores\t: %lu\n", g_pal_control->cpu_info.physical_cores_per_socket); + ADD_INFO("cpu cores\t: %lu\n", g_topo_info->physical_cores_per_socket); double bogomips = g_pal_control->cpu_info.cpu_bogomips; // Apparently graphene snprintf cannot into floats. ADD_INFO("bogomips\t: %lu.%02lu\n", (unsigned long)bogomips, diff --git a/LibOS/shim/src/fs/sys/cache_info.c b/LibOS/shim/src/fs/sys/cache_info.c index a75e06c34d..987bd45c35 100644 --- a/LibOS/shim/src/fs/sys/cache_info.c +++ b/LibOS/shim/src/fs/sys/cache_info.c @@ -26,8 +26,8 @@ int sys_cache_load(struct shim_dentry* dent, char** out_data, size_t* out_size) return ret; const char* name = dent->name; - PAL_CORE_CACHE_INFO* cache = &g_pal_control->topo_info.core_topology[cpu_num].cache[cache_num]; - const char* str; + PAL_CORE_CACHE_INFO* cache = &g_topo_info->core_topology[cpu_num].cache[cache_num]; + char str[PAL_SYSFS_MAP_FILESZ] = {'\0'}; if (strcmp(name, "shared_cpu_map") == 0) { str = cache->shared_cpu_map; } else if (strcmp(name, "level") == 0) { diff --git a/LibOS/shim/src/fs/sys/cpu_info.c b/LibOS/shim/src/fs/sys/cpu_info.c index cef6bf0e89..121282eec6 100644 --- a/LibOS/shim/src/fs/sys/cpu_info.c +++ b/LibOS/shim/src/fs/sys/cpu_info.c @@ -17,9 +17,11 @@ int sys_cpu_general_load(struct shim_dentry* dent, char** out_data, size_t* out_ const char* str; if (strcmp(name, "online") == 0) { - str = g_pal_control->topo_info.online_logical_cores; + ret = sys_convert_range_info_str(g_topo_info->online_logical_cores, str, + PAL_SYSFS_BUF_FILESZ, ","); } else if (strcmp(name, "possible") == 0) { - str = g_pal_control->topo_info.possible_logical_cores; + ret = sys_convert_range_info_str(g_topo_info->possible_logical_cores, str, + PAL_SYSFS_BUF_FILESZ, ","); } else { log_debug("unrecognized file: %s", name); return -ENOENT; @@ -36,8 +38,8 @@ int sys_cpu_load(struct shim_dentry* dent, char** out_data, size_t* out_size) { return ret; const char* name = dent->name; - PAL_CORE_TOPO_INFO* core_topology = &g_pal_control->topo_info.core_topology[cpu_num]; - const char* str; + PAL_CORE_TOPO_INFO* core_topology = &g_topo_info->core_topology[cpu_num]; + char str[PAL_SYSFS_MAP_FILESZ] = {'\0'}; if (strcmp(name, "online") == 0) { /* `cpu/cpuX/online` is not present for cpu0 */ if (cpu_num == 0) diff --git a/LibOS/shim/src/fs/sys/fs.c b/LibOS/shim/src/fs/sys/fs.c index f7907ce820..a2c509d218 100644 --- a/LibOS/shim/src/fs/sys/fs.c +++ b/LibOS/shim/src/fs/sys/fs.c @@ -14,6 +14,119 @@ #include "shim_fs_pseudo.h" #include "stat.h" +PAL_TOPO_INFO* g_topo_info = NULL; + +int sys_convert_int_to_str(PAL_NUM val, SIZE_QUALIFIER_T size_qual, char* str, int max_len) { + int ret = 0; + switch (size_qual) { + case KILO: + ret = snprintf(str, max_len, "%luK", val); + break; + case MEGA: + ret = snprintf(str, max_len, "%luM", val); + break; + case GIGA: + ret = snprintf(str, max_len, "%luG", val); + break; + default: + ret = snprintf(str, max_len, "%lu", val); + break; + } + return ret; +} + +int sys_convert_range_info_str(PAL_RES_RANGE_INFO res_range_info, char* str, int max_len, + const char* sep) { + if (res_range_info.range_count > INT64_MAX) + return -EINVAL; + int64_t range_cnt = (int64_t)res_range_info.range_count; + int offset = 0; + for (int64_t i = 0; i < range_cnt; i++) { + if (max_len - offset < 0) + return -ENOMEM; + + int ret; + char end_str[PAL_SYSFS_BUF_FILESZ] = {'\0'}; + if (res_range_info.ranges[i].end == UINT64_MAX) { + ret = snprintf(end_str, PAL_SYSFS_BUF_FILESZ, "%s", ""); + } else { + ret = snprintf(end_str, PAL_SYSFS_BUF_FILESZ, "-%lu", res_range_info.ranges[i].end); + } + + if (ret < 0) + return ret; + + ret = snprintf(str + offset, max_len - offset, "%lu%s%s", res_range_info.ranges[i].start, + end_str, (i + 1 == range_cnt) ? "\0" : sep); + if (ret < 0) + return ret; + offset += ret; + } + return 0; +} + +#define PRINTF_UINT32_HEX_PATTERN "%x%x%x%x%x%x%x%x%s" +#define PRINTF_UINT32_NIBBLE_TO_HEX(flag) \ + (flag & 0xf0000000) >> 28, \ + (flag & 0xf000000) >> 24, \ + (flag & 0xf00000) >> 20, \ + (flag & 0xf0000) >> 16, \ + (flag & 0xf000) >> 12, \ + (flag & 0xf00) >> 8, \ + (flag & 0xf0) >> 4, \ + (flag & 0xf) + +int sys_convert_range_info_bitmap_str(PAL_RES_RANGE_INFO res_range_info, char* str, int max_len) { + if (g_topo_info->possible_logical_cores.resource_count > INT64_MAX) + return -1; + int ret = 0; + + /* Extract cpumask from the ranges */ + int64_t possible_cores = g_topo_info->possible_logical_cores.resource_count; + int64_t num_cpumask = BITS_TO_INT(possible_cores); + unsigned int* bitmap = (unsigned int*)calloc(num_cpumask, sizeof(unsigned int)); + if (!bitmap) + return -ENOMEM; + + if (res_range_info.range_count > INT64_MAX) + return -EINVAL; + for (int64_t i = 0; i < (int64_t)res_range_info.range_count; i++) { + uint64_t start = res_range_info.ranges[i].start; + uint64_t end = res_range_info.ranges[i].end; + if (end == UINT64_MAX) + end = start; + if (start > INT64_MAX || end > INT64_MAX) + return -EINVAL; + for (int64_t j = (int64_t)start; j <= (int64_t)end; j++) { + int64_t index = j / (sizeof(int) * BITS_IN_BYTE); + if (index >= num_cpumask) { + ret = -EINVAL; + goto out_bitmap; + } + bitmap[index] |= 1U << (j % (sizeof(int) * BITS_IN_BYTE)); + } + } + + /* Convert cpumask to strings */ + int offset = 0; + for (int64_t j = num_cpumask - 1; j >= 0; j-- ) { + if (max_len - offset < 0) { + ret = -ENOMEM; + goto out_bitmap; + } + int ret = snprintf(str + offset, max_len - offset, PRINTF_UINT32_HEX_PATTERN, + PRINTF_UINT32_NIBBLE_TO_HEX(bitmap[j]), (j == 0) ? "\0" : ","); + if (ret < 0) + goto out_bitmap; + offset += ret; + } + ret = 0; + +out_bitmap: + free(bitmap); + return ret; +} + static int sys_resource(struct shim_dentry* parent, const char* name, unsigned int* out_num, readdir_callback_t callback, void* arg) { const char* parent_name = parent->name; @@ -22,13 +135,13 @@ static int sys_resource(struct shim_dentry* parent, const char* name, unsigned i const char* prefix; if (strcmp(parent_name, "node") == 0) { - pal_total = g_pal_control->topo_info.num_online_nodes; + pal_total = g_topo_info->nodes.resource_count; prefix = "node"; } else if (strcmp(parent_name, "cpu") == 0) { - pal_total = g_pal_control->cpu_info.online_logical_cores; + pal_total = g_topo_info->online_logical_cores.resource_count; prefix = "cpu"; } else if (strcmp(parent_name, "cache") == 0) { - pal_total = g_pal_control->topo_info.num_cache_index; + pal_total = g_topo_info->num_cache_index; prefix = "index"; } else { log_debug("unrecognized resource: %s", parent_name); diff --git a/LibOS/shim/src/fs/sys/node_info.c b/LibOS/shim/src/fs/sys/node_info.c index 32e596a44b..7d932843e2 100644 --- a/LibOS/shim/src/fs/sys/node_info.c +++ b/LibOS/shim/src/fs/sys/node_info.c @@ -15,7 +15,8 @@ int sys_node_general_load(struct shim_dentry* dent, char** out_data, size_t* out const char* name = dent->name; const char* str; if (strcmp(name, "online") == 0) { - str = g_pal_control->topo_info.online_nodes; + ret = sys_convert_range_info_str(g_topo_info->nodes, str, PAL_SYSFS_BUF_FILESZ, + ","); } else { log_debug("unrecognized file: %s", name); return -ENOENT; @@ -32,8 +33,8 @@ int sys_node_load(struct shim_dentry* dent, char** out_data, size_t* out_size) { return ret; const char* name = dent->name; - PAL_NUMA_TOPO_INFO* numa_topology = &g_pal_control->topo_info.numa_topology[node_num]; - const char* str = NULL; + PAL_NUMA_TOPO_INFO* numa_topology = &g_topo_info->numa_topology[node_num]; + char str[PAL_SYSFS_MAP_FILESZ] = {'\0'}; if (strcmp(name, "cpumap" ) == 0) { str = numa_topology->cpumap; } else if (strcmp(name, "distance") == 0) { diff --git a/LibOS/shim/src/shim_init.c b/LibOS/shim/src/shim_init.c index ee409e47ee..2db24aaa76 100644 --- a/LibOS/shim/src/shim_init.c +++ b/LibOS/shim/src/shim_init.c @@ -427,6 +427,9 @@ noreturn void* shim_init(int argc, void* args) { RUN_INIT(receive_checkpoint_and_restore, &hdr); } else { g_process_ipc_ids.self_vmid = STARTING_VMID; + /* Updated via checkpoint-and-restore on child processes. */ + g_topo_info = DkGetPalTopologyInfo(); + assert(g_topo_info); } RUN_INIT(init_mount_root); diff --git a/LibOS/shim/src/sys/shim_sched.c b/LibOS/shim/src/sys/shim_sched.c index 8f7338747b..a33a437c07 100644 --- a/LibOS/shim/src/sys/shim_sched.c +++ b/LibOS/shim/src/sys/shim_sched.c @@ -175,7 +175,7 @@ long shim_do_sched_setaffinity(pid_t pid, unsigned int cpumask_size, unsigned lo long shim_do_sched_getaffinity(pid_t pid, unsigned int cpumask_size, unsigned long* user_mask_ptr) { int ret; - size_t cpu_cnt = g_pal_control->cpu_info.online_logical_cores; + size_t cpu_cnt = g_topo_info->online_logical_cores.resource_count; /* Check if user_mask_ptr is valid */ if (!is_user_memory_writable(user_mask_ptr, cpumask_size)) diff --git a/Pal/include/pal/pal.h b/Pal/include/pal/pal.h index f4a8d176c5..be2a342933 100644 --- a/Pal/include/pal/pal.h +++ b/Pal/include/pal/pal.h @@ -161,11 +161,12 @@ typedef struct PAL_CONTROL_ { PAL_CPU_INFO cpu_info; /*!< CPU information (only required ones) */ PAL_MEM_INFO mem_info; /*!< memory information (only required ones) */ - PAL_TOPO_INFO topo_info; /*!< Topology information (only required ones) */ } PAL_CONTROL; const PAL_CONTROL* DkGetPalControl(void); +PAL_TOPO_INFO* DkGetPalTopologyInfo(void); + /* * MEMORY ALLOCATION */ diff --git a/Pal/include/pal_internal.h b/Pal/include/pal_internal.h index c84febc803..7c5c4dcfce 100644 --- a/Pal/include/pal_internal.h +++ b/Pal/include/pal_internal.h @@ -131,6 +131,8 @@ extern struct pal_internal_state g_pal_state; extern PAL_CONTROL g_pal_control; +extern PAL_TOPO_INFO g_topo_info; + int add_preloaded_range(uintptr_t start, uintptr_t end, const char* comment); #define IS_ALLOC_ALIGNED(addr) IS_ALIGNED_POW2(addr, g_pal_state.alloc_align) diff --git a/Pal/src/db_main.c b/Pal/src/db_main.c index 5ee2469b5b..f4328b9a41 100644 --- a/Pal/src/db_main.c +++ b/Pal/src/db_main.c @@ -27,6 +27,12 @@ const PAL_CONTROL* DkGetPalControl(void) { return &g_pal_control; } +PAL_TOPO_INFO g_topo_info; + +PAL_TOPO_INFO* DkGetPalTopologyInfo(void) { + return &g_topo_info; +} + struct pal_internal_state g_pal_state; static void load_libraries(void) { @@ -442,7 +448,7 @@ noreturn void pal_main(uint64_t instance_id, /* current instance id */ } g_pal_control.mem_info.mem_total = _DkMemoryQuota(); - if (_DkGetTopologyInfo(&g_pal_control.topo_info) < 0) { + if (_DkGetTopologyInfo(&g_topo_info) < 0) { goto out_fail; } diff --git a/Pal/src/host/Linux-SGX/enclave_ocalls.c b/Pal/src/host/Linux-SGX/enclave_ocalls.c index b30b194d17..23eb616ae0 100644 --- a/Pal/src/host/Linux-SGX/enclave_ocalls.c +++ b/Pal/src/host/Linux-SGX/enclave_ocalls.c @@ -1881,7 +1881,7 @@ int ocall_sched_setaffinity(void* tcs, size_t cpumask_size, void* cpu_mask) { static bool is_cpumask_valid(void* cpu_mask, size_t cpumask_size) { size_t max_cpumask_bits = cpumask_size * BITS_IN_BYTE; - size_t valid_cpumask_bits = g_pal_control.cpu_info.online_logical_cores; + size_t valid_cpumask_bits = g_topo_info.online_logical_cores.resource_count; size_t invalid_bits = max_cpumask_bits - valid_cpumask_bits; if (invalid_bits == 0) diff --git a/Pal/src/pal-symbols b/Pal/src/pal-symbols index 9c66dbd9bf..b06168484a 100644 --- a/Pal/src/pal-symbols +++ b/Pal/src/pal-symbols @@ -45,3 +45,4 @@ DkAttestationQuote DkSetProtectedFilesKey DkDebugLog DkGetPalControl +DkGetPalTopologyInfo From f05518fda6ca9fd43a35b992d20c6d09ff444fbf Mon Sep 17 00:00:00 2001 From: Vijay Dhanraj Date: Wed, 8 Sep 2021 17:42:24 -0700 Subject: [PATCH 2/2] [Pal/LibOS] Add checkpoint-and-restore support for sysfs Signed-off-by: Vijay Dhanraj --- LibOS/shim/src/fs/sys/fs.c | 220 ++++++++++++++++++++++++++++++ LibOS/shim/src/sys/shim_clone.c | 1 + Pal/src/host/Linux-SGX/db_main.c | 14 +- Pal/src/host/Linux-SGX/sgx_main.c | 56 +------- 4 files changed, 235 insertions(+), 56 deletions(-) diff --git a/LibOS/shim/src/fs/sys/fs.c b/LibOS/shim/src/fs/sys/fs.c index a2c509d218..de3fb33f18 100644 --- a/LibOS/shim/src/fs/sys/fs.c +++ b/LibOS/shim/src/fs/sys/fs.c @@ -10,11 +10,15 @@ #include +#include "shim_checkpoint.h" #include "shim_fs.h" #include "shim_fs_pseudo.h" #include "stat.h" PAL_TOPO_INFO* g_topo_info = NULL; +int64_t g_num_cores_online; +int64_t g_num_nodes_online; +int64_t g_num_cache_lvls; int sys_convert_int_to_str(PAL_NUM val, SIZE_QUALIFIER_T size_qual, char* str, int max_len) { int ret = 0; @@ -276,3 +280,219 @@ int init_sysfs(void) { return 0; } + +BEGIN_CP_FUNC(numa_topology) { + __UNUSED(size); + assert(size == sizeof(PAL_NUMA_TOPO_INFO)); + + PAL_NUMA_TOPO_INFO* numa_topo = (PAL_NUMA_TOPO_INFO*)obj; + PAL_NUMA_TOPO_INFO* new_numa_topo = NULL; + + size_t off = GET_FROM_CP_MAP(obj); + if (!off) { + size_t numa_topo_sz = g_num_nodes_online * sizeof(PAL_NUMA_TOPO_INFO); + off = ADD_CP_OFFSET(numa_topo_sz); + ADD_TO_CP_MAP(obj, off); + new_numa_topo = (PAL_NUMA_TOPO_INFO*)(base + off); + memcpy(new_numa_topo, numa_topo, numa_topo_sz); + + for (int64_t idx = 0; idx < g_num_nodes_online; idx++) { + if (numa_topo[idx].cpumap.range_count > 0) { + size_t range_sz = numa_topo[idx].cpumap.range_count * sizeof(PAL_RANGE_INFO); + size_t toff = ADD_CP_OFFSET(range_sz); + new_numa_topo[idx].cpumap.ranges = (void*)(base + toff); + memcpy(new_numa_topo[idx].cpumap.ranges, numa_topo[idx].cpumap.ranges, range_sz); + } + + if (numa_topo[idx].distance.range_count > 0) { + size_t range_sz = numa_topo[idx].distance.range_count * sizeof(PAL_RANGE_INFO); + size_t toff = ADD_CP_OFFSET(range_sz); + new_numa_topo[idx].distance.ranges = (void*)(base + toff); + memcpy(new_numa_topo[idx].distance.ranges, numa_topo[idx].distance.ranges, range_sz); + } + } + ADD_CP_FUNC_ENTRY(off); + } else { + new_numa_topo = (PAL_NUMA_TOPO_INFO*)(base + off); + } + + if (objp) { + *objp = (void*)new_numa_topo; + } +} +END_CP_FUNC_NO_RS(numa_topology) + +BEGIN_CP_FUNC(cache) { + __UNUSED(size); + assert(size == sizeof(PAL_CORE_CACHE_INFO)); + + PAL_CORE_CACHE_INFO* cache = (PAL_CORE_CACHE_INFO*)obj; + PAL_CORE_CACHE_INFO* new_cache = NULL; + + size_t off = GET_FROM_CP_MAP(obj); + if (!off) { + size_t cache_topo_sz = g_num_cache_lvls * sizeof(PAL_CORE_CACHE_INFO); + off = ADD_CP_OFFSET(cache_topo_sz); + ADD_TO_CP_MAP(obj, off); + new_cache = (PAL_CORE_CACHE_INFO*)(base + off); + memcpy(new_cache, cache, cache_topo_sz); + + for (int64_t idx = 0; idx < g_num_cache_lvls; idx++) { + if (cache[idx].shared_cpu_map.range_count > 0) { + size_t range_sz = cache[idx].shared_cpu_map.range_count * sizeof(PAL_RANGE_INFO); + size_t toff = ADD_CP_OFFSET(range_sz); + new_cache[idx].shared_cpu_map.ranges = (void*)(base + toff); + memcpy(new_cache[idx].shared_cpu_map.ranges, cache[idx].shared_cpu_map.ranges, + range_sz); + } + } + ADD_CP_FUNC_ENTRY(off); + } else { + new_cache = (PAL_CORE_CACHE_INFO*)(base + off); + } + + if (objp) { + *objp = (void*)new_cache; + } +} +END_CP_FUNC_NO_RS(cache) + +BEGIN_CP_FUNC(core_topology) { + __UNUSED(size); + assert(size == sizeof(PAL_CORE_TOPO_INFO)); + + PAL_CORE_TOPO_INFO* core_topo = (PAL_CORE_TOPO_INFO*)obj; + PAL_CORE_TOPO_INFO* new_core_topo = NULL; + + size_t off = GET_FROM_CP_MAP(obj); + if (!off) { + size_t core_topo_sz = g_num_cores_online * sizeof(PAL_CORE_TOPO_INFO); + off = ADD_CP_OFFSET(core_topo_sz); + ADD_TO_CP_MAP(obj, off); + new_core_topo = (PAL_CORE_TOPO_INFO*)(base + off); + memcpy(new_core_topo, core_topo, core_topo_sz); + + for (int64_t idx = 0; idx < g_num_cores_online; idx++) { + if (core_topo[idx].core_siblings.range_count > 0) { + size_t range_sz = core_topo[idx].core_siblings.range_count * sizeof(PAL_RANGE_INFO); + size_t toff = ADD_CP_OFFSET(range_sz); + new_core_topo[idx].core_siblings.ranges = (void*)(base + toff); + memcpy(new_core_topo[idx].core_siblings.ranges, core_topo[idx].core_siblings.ranges, + range_sz); + } + + if (core_topo[idx].thread_siblings.range_count > 0) { + size_t range_sz = core_topo[idx].thread_siblings.range_count * sizeof(PAL_RANGE_INFO); + size_t toff = ADD_CP_OFFSET(range_sz); + new_core_topo[idx].thread_siblings.ranges = (void*)(base + toff); + memcpy(new_core_topo[idx].thread_siblings.ranges, + core_topo[idx].thread_siblings.ranges, range_sz); + } + + DO_CP(cache, core_topo[idx].cache, &new_core_topo[idx].cache); + } + ADD_CP_FUNC_ENTRY(off); + } else { + new_core_topo = (PAL_CORE_TOPO_INFO*)(base + off); + } + + if (objp) { + *objp = (void*)new_core_topo; + } +} +END_CP_FUNC_NO_RS(core_topology) + +BEGIN_CP_FUNC(topo_info) { + __UNUSED(size); + __UNUSED(objp); + assert(size == sizeof(PAL_TOPO_INFO)); + + PAL_TOPO_INFO* topo_info = (PAL_TOPO_INFO*)obj; + PAL_TOPO_INFO* new_topo_info = NULL; + + size_t off = GET_FROM_CP_MAP(obj); + if (!off) { + off = ADD_CP_OFFSET(sizeof(*topo_info)); + ADD_TO_CP_MAP(obj, off); + new_topo_info = (PAL_TOPO_INFO*)(base + off); + *new_topo_info = *topo_info; + + if (topo_info->online_logical_cores.range_count > 0) { + size_t range_sz = topo_info->online_logical_cores.range_count * sizeof(PAL_RANGE_INFO); + size_t toff = ADD_CP_OFFSET(range_sz); + new_topo_info->online_logical_cores.ranges = (void*)(base + toff); + memcpy(new_topo_info->online_logical_cores.ranges, + topo_info->online_logical_cores.ranges, range_sz); + } + g_num_cores_online = topo_info->online_logical_cores.resource_count; + g_num_cache_lvls = topo_info->num_cache_index; + + if (topo_info->possible_logical_cores.range_count > 0) { + size_t range_sz = topo_info->possible_logical_cores.range_count * sizeof(PAL_RANGE_INFO); + size_t toff = ADD_CP_OFFSET(range_sz); + new_topo_info->possible_logical_cores.ranges = (void*)(base + toff); + memcpy(new_topo_info->possible_logical_cores.ranges, + topo_info->possible_logical_cores.ranges, range_sz); + } + + if (topo_info->nodes.range_count > 0) { + size_t range_sz = topo_info->nodes.range_count * sizeof(PAL_RANGE_INFO); + size_t toff = ADD_CP_OFFSET(range_sz); + new_topo_info->nodes.ranges = (void*)(base + toff); + memcpy(new_topo_info->nodes.ranges, topo_info->nodes.ranges, range_sz); + } + g_num_nodes_online = topo_info->nodes.resource_count; + + DO_CP(core_topology, topo_info->core_topology, &new_topo_info->core_topology); + DO_CP(numa_topology, topo_info->numa_topology, &new_topo_info->numa_topology); + + ADD_CP_FUNC_ENTRY(off); + } else { + new_topo_info = (PAL_TOPO_INFO*)(base + off); + } + + if (objp) + *objp = (void*)new_topo_info; +} +END_CP_FUNC(topo_info) + +BEGIN_RS_FUNC(topo_info) { + __UNUSED(offset); + PAL_TOPO_INFO* topo_info = (void*)(base + GET_CP_FUNC_ENTRY()); + + if (topo_info->online_logical_cores.range_count > 0) { + CP_REBASE(topo_info->online_logical_cores.ranges); + } else { + assert(!topo_info->online_logical_cores.ranges); + } + + if (topo_info->possible_logical_cores.range_count > 0) { + CP_REBASE(topo_info->possible_logical_cores.ranges); + } else { + assert(!topo_info->possible_logical_cores.ranges); + } + + if (topo_info->nodes.range_count > 0) { + CP_REBASE(topo_info->nodes.ranges); + } else { + assert(!topo_info->nodes.ranges); + } + + CP_REBASE(topo_info->core_topology); + for (uint64_t idx = 0; idx < topo_info->online_logical_cores.resource_count; idx++) { + CP_REBASE(topo_info->core_topology[idx].core_siblings.ranges); + CP_REBASE(topo_info->core_topology[idx].thread_siblings.ranges); + CP_REBASE(topo_info->core_topology[idx].cache); + for (uint64_t lvl = 0; lvl < topo_info->num_cache_index; lvl++) { + CP_REBASE(topo_info->core_topology[idx].cache[lvl].shared_cpu_map.ranges); + } + } + CP_REBASE(topo_info->numa_topology); + for (uint64_t idx = 0; idx < topo_info->nodes.resource_count; idx++) { + CP_REBASE(topo_info->numa_topology[idx].cpumap.ranges); + CP_REBASE(topo_info->numa_topology[idx].distance.ranges); + } + + g_topo_info = topo_info; +} +END_RS_FUNC(topo_info) diff --git a/LibOS/shim/src/sys/shim_clone.c b/LibOS/shim/src/sys/shim_clone.c index 8aa20a583f..266f4979d3 100644 --- a/LibOS/shim/src/sys/shim_clone.c +++ b/LibOS/shim/src/sys/shim_clone.c @@ -115,6 +115,7 @@ static BEGIN_MIGRATION_DEF(fork, struct shim_process* process_description, DEFINE_MIGRATE(migratable, NULL, 0); DEFINE_MIGRATE(brk, NULL, 0); DEFINE_MIGRATE(loaded_elf_objects, NULL, 0); + DEFINE_MIGRATE(topo_info, g_topo_info, sizeof(*g_topo_info)); #ifdef DEBUG DEFINE_MIGRATE(gdb_map, NULL, 0); #endif diff --git a/Pal/src/host/Linux-SGX/db_main.c b/Pal/src/host/Linux-SGX/db_main.c index 6026359fa9..23f09edc27 100644 --- a/Pal/src/host/Linux-SGX/db_main.c +++ b/Pal/src/host/Linux-SGX/db_main.c @@ -644,11 +644,6 @@ noreturn void pal_linux_main(char* uptr_libpal_uri, size_t libpal_uri_len, char* init_tsc(); (void)get_tsc(); /* must be after `ready_for_exceptions=1` since it may generate SIGILL */ - /* Now that enclave memory is set up, parse and store host topology info into g_pal_sec struct */ - ret = parse_host_topo_info(&sec_info); - if (ret < 0) - ocall_exit(1, /*is_exitgroup=*/true); - /* initialize master key (used for pipes' encryption for all enclaves of an application); it * will be overwritten below in init_child_process() with inherited-from-parent master key if * this enclave is child */ @@ -668,6 +663,15 @@ noreturn void pal_linux_main(char* uptr_libpal_uri, size_t libpal_uri_len, char* } } + /* Now that enclave memory is set up, sanitize host topology info into g_pal_sec struct. + * Note: This is done only on the first process and the subsequent forked child/children use + * the sanitized info from the first process. */ + if (!parent) { + ret = parse_host_topo_info(sec_info.topo_info); + if (ret < 0) + ocall_exit(1, /*is_exitgroup=*/true); + } + uint64_t manifest_size = GET_ENCLAVE_TLS(manifest_size); void* manifest_addr = g_enclave_top - ALIGN_UP_PTR_POW2(manifest_size, g_page_size); diff --git a/Pal/src/host/Linux-SGX/sgx_main.c b/Pal/src/host/Linux-SGX/sgx_main.c index 624b50e279..7221312353 100644 --- a/Pal/src/host/Linux-SGX/sgx_main.c +++ b/Pal/src/host/Linux-SGX/sgx_main.c @@ -897,59 +897,13 @@ static int load_enclave(struct pal_enclave* enclave, char* args, size_t args_siz pal_sec->uid = DO_SYSCALL(getuid); pal_sec->gid = DO_SYSCALL(getgid); - /* we cannot use CPUID(0xb) because it counts even disabled-by-BIOS cores (e.g. HT cores); - * instead extract info on total number of logical cores, number of physical cores, - * SMT support etc. by parsing sysfs pseudo-files */ - int online_logical_cores = get_hw_resource("/sys/devices/system/cpu/online", /*count=*/true); - if (online_logical_cores < 0) - return online_logical_cores; - pal_sec->online_logical_cores = online_logical_cores; - - int possible_logical_cores = get_hw_resource("/sys/devices/system/cpu/possible", - /*count=*/true); - if (possible_logical_cores < 0) - return possible_logical_cores; - pal_sec->possible_logical_cores = possible_logical_cores; - - /* TODO: correctly support offline cores */ - if (possible_logical_cores > 0 && possible_logical_cores > online_logical_cores) { - log_warning("some CPUs seem to be offline; Graphene doesn't take this into account " - "which may lead to subpar performance"); - } - - int core_siblings = get_hw_resource("/sys/devices/system/cpu/cpu0/topology/core_siblings_list", - /*count=*/true); - if (core_siblings < 0) - return core_siblings; - - int smt_siblings = get_hw_resource("/sys/devices/system/cpu/cpu0/topology/thread_siblings_list", - /*count=*/true); - if (smt_siblings < 0) - return smt_siblings; - pal_sec->physical_cores_per_socket = core_siblings / smt_siblings; - - /* array of "logical core -> socket" mappings */ - int* cpu_socket = (int*)malloc(online_logical_cores * sizeof(int)); - if (!cpu_socket) - return -ENOMEM; - - char filename[128]; - for (int idx = 0; idx < online_logical_cores; idx++) { - snprintf(filename, sizeof(filename), - "/sys/devices/system/cpu/cpu%d/topology/physical_package_id", idx); - cpu_socket[idx] = get_hw_resource(filename, /*count=*/false); - if (cpu_socket[idx] < 0) { - log_error("Cannot read %s", filename); - ret = cpu_socket[idx]; - free(cpu_socket); + /* Only the first process reads topology info from the host. Subsequent forked child/children + * get this information from the first process */ + if (enclave->is_first_process) { + ret = get_topology_info(&pal_sec->topo_info); + if (ret < 0) return ret; - } } - pal_sec->cpu_socket = cpu_socket; - - ret = get_topology_info(&pal_sec->topo_info); - if (ret < 0) - return ret; #ifdef DEBUG size_t env_i = 0;