From 05635da687c2cea924558bce63055ab64e8e95e4 Mon Sep 17 00:00:00 2001 From: Nathan Hjelm Date: Tue, 5 Dec 2017 17:36:12 -0700 Subject: [PATCH] rcache/grdma: try to prevent erroneous free error messages It is possible to have parts of an in-use registered region be passed to munmap or madvise. This does not necessarily mean the user has made an error but does mean the entire region should be invalidated. This commit checks that the munmap or madvise base matches the beginning of the cached region. If it does and the region is in-use then we print an error. There will certainly be false-negatives where a user unmaps something that really is in-use but that is preferrable to a false-positive. References #4509 Signed-off-by: Nathan Hjelm (cherry picked from commit d3fa1bbbb0ecd3428e52300752ea6869a1ffac46) Signed-off-by: Nathan Hjelm --- opal/mca/rcache/grdma/rcache_grdma_module.c | 24 +++++++++++++++------ 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/opal/mca/rcache/grdma/rcache_grdma_module.c b/opal/mca/rcache/grdma/rcache_grdma_module.c index 327c2845a02..67e7fa3a560 100644 --- a/opal/mca/rcache/grdma/rcache_grdma_module.c +++ b/opal/mca/rcache/grdma/rcache_grdma_module.c @@ -14,7 +14,7 @@ * Copyright (c) 2006 Voltaire. All rights reserved. * Copyright (c) 2007 Mellanox Technologies. All rights reserved. * Copyright (c) 2010 IBM Corporation. All rights reserved. - * Copyright (c) 2011-2016 Los Alamos National Security, LLC. All rights + * Copyright (c) 2011-2017 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2013 NVIDIA Corporation. All rights reserved. * Copyright (c) 2016 Research Organization for Information Science @@ -422,20 +422,29 @@ static int mca_rcache_grdma_deregister (mca_rcache_base_module_t *rcache, return rc; } +struct gc_add_args_t { + void *base; + size_t size; +}; +typedef struct gc_add_args_t gc_add_args_t; + static int gc_add (mca_rcache_base_registration_t *grdma_reg, void *ctx) { mca_rcache_grdma_module_t *rcache_grdma = (mca_rcache_grdma_module_t *) grdma_reg->rcache; - - /* unused */ - (void) ctx; + gc_add_args_t *args = (gc_add_args_t *) ctx; if (grdma_reg->flags & MCA_RCACHE_FLAGS_INVALID) { /* nothing more to do */ return OPAL_SUCCESS; } - if (grdma_reg->ref_count) { - /* attempted to remove an active registration */ + if (grdma_reg->ref_count && grdma_reg->base == args->base) { + /* attempted to remove an active registration. to handle cases where part of + * an active registration has been unmapped we check if the bases match. this + * *hopefully* will suppress erroneously emitted errors. if we can't suppress + * the erroneous error in all cases then this check and return should be removed + * entirely. we are not required to give an error for a user freeing a buffer + * that is in-use by MPI. Its just a nice to have. */ return OPAL_ERROR; } @@ -457,7 +466,8 @@ static int mca_rcache_grdma_invalidate_range (mca_rcache_base_module_t *rcache, void *base, size_t size) { mca_rcache_grdma_module_t *rcache_grdma = (mca_rcache_grdma_module_t *) rcache; - return mca_rcache_base_vma_iterate (rcache_grdma->cache->vma_module, base, size, gc_add, NULL); + gc_add_args_t args = {.base = base, .size = size}; + return mca_rcache_base_vma_iterate (rcache_grdma->cache->vma_module, base, size, gc_add, &args); } /* Make sure this registration request is not stale. In other words, ensure