From a03bb5e60165b11be11f8c1e8e492a274a742011 Mon Sep 17 00:00:00 2001 From: Dennis Heimbigner Date: Sun, 18 Dec 2022 13:18:00 -0700 Subject: [PATCH] Fix infinite loop in file inferencing re: Issue https://github.com/Unidata/netcdf-c/issues/2573 The file type inferencer in libdispatch/dinference.c has a simple forward inference mechanism so that the occurrence of certain mode values in a URL fragment implies inclusion of additional mode values. This kind of inference is notorious for leading to cycles if not careful. Unfortunately, this occurred in the one in dinference.c. This was fixed by providing a more complicated, but more reliable inference mechanism. ## Misc. Other Changes * Found and fixed a couple of memory leaks. * There is a recent problem in building HDF4 support on github actions. Fixed by using the internal HDF4 xdr capability. * Some filter-related code was not being properly ifdef'd with ENABLE_NCZARRA_FILTERS. --- .github/workflows/run_tests_osx.yml | 2 +- .github/workflows/run_tests_ubuntu.yml | 6 +- .github/workflows/run_tests_win_cygwin.yml | 2 +- .github/workflows/run_tests_win_mingw.yml | 2 +- libdispatch/dfile.c | 23 +-- libdispatch/dinfermodel.c | 205 +++++++++++++++------ libdispatch/ncbytes.c | 4 +- libdispatch/nclist.c | 4 +- libnczarr/zsync.c | 11 +- libnczarr/zvar.c | 6 + 10 files changed, 178 insertions(+), 87 deletions(-) diff --git a/.github/workflows/run_tests_osx.yml b/.github/workflows/run_tests_osx.yml index fc73521c85..11864a8012 100644 --- a/.github/workflows/run_tests_osx.yml +++ b/.github/workflows/run_tests_osx.yml @@ -7,7 +7,7 @@ name: Run macOS-based netCDF Tests -on: [pull_request, workflow_dispatch] +on: [pull_request,workflow_dispatch] jobs: diff --git a/.github/workflows/run_tests_ubuntu.yml b/.github/workflows/run_tests_ubuntu.yml index 618c13ed2f..550faed042 100644 --- a/.github/workflows/run_tests_ubuntu.yml +++ b/.github/workflows/run_tests_ubuntu.yml @@ -4,7 +4,7 @@ name: Run Ubuntu/Linux netCDF Tests -on: [pull_request, workflow_dispatch] +on: [pull_request,workflow_dispatch] jobs: @@ -42,7 +42,7 @@ jobs: wget https://support.hdfgroup.org/ftp/HDF/releases/HDF4.2.15/src/hdf-4.2.15.tar.bz2 tar -jxf hdf-4.2.15.tar.bz2 pushd hdf-4.2.15 - ./configure --prefix=${HOME}/environments/${{ matrix.hdf5 }} --disable-static --enable-shared --disable-fortran --disable-netcdf --with-szlib + ./configure --prefix=${HOME}/environments/${{ matrix.hdf5 }} --disable-static --enable-shared --disable-fortran --disable-netcdf --with-szlib --enable-hdf4-xdr make -j make install -j popd @@ -89,7 +89,7 @@ jobs: wget https://support.hdfgroup.org/ftp/HDF/releases/HDF4.2.15/src/hdf-4.2.15.tar.bz2 tar -jxf hdf-4.2.15.tar.bz2 pushd hdf-4.2.15 - CC=mpicc ./configure --prefix=${HOME}/environments/${{ matrix.hdf5 }} --disable-static --enable-shared --disable-fortran --disable-netcdf --with-szlib --enable-parallel + CC=mpicc ./configure --prefix=${HOME}/environments/${{ matrix.hdf5 }} --disable-static --enable-shared --disable-fortran --disable-netcdf --with-szlib --enable-parallel --enable-hdf4-xdr make -j make install -j popd diff --git a/.github/workflows/run_tests_win_cygwin.yml b/.github/workflows/run_tests_win_cygwin.yml index 361e6265b7..147216e768 100644 --- a/.github/workflows/run_tests_win_cygwin.yml +++ b/.github/workflows/run_tests_win_cygwin.yml @@ -1,6 +1,6 @@ name: Run Cygwin-based tests -on: [pull_request, workflow_dispatch] +on: [pull_request,workflow_dispatch] env: SHELLOPTS: igncr diff --git a/.github/workflows/run_tests_win_mingw.yml b/.github/workflows/run_tests_win_mingw.yml index 5ff67e1a16..d872128597 100644 --- a/.github/workflows/run_tests_win_mingw.yml +++ b/.github/workflows/run_tests_win_mingw.yml @@ -9,7 +9,7 @@ name: Run MSYS2, MinGW64-based Tests env: CPPFLAGS: "-D_BSD_SOURCE" -on: [pull_request, workflow_dispatch] +on: [pull_request,workflow_dispatch] jobs: diff --git a/libdispatch/dfile.c b/libdispatch/dfile.c index ae756a8f27..a53be47893 100644 --- a/libdispatch/dfile.c +++ b/libdispatch/dfile.c @@ -1839,19 +1839,16 @@ NC_create(const char *path0, int cmode, size_t initialsz, TRACE(nc_create); if(path0 == NULL) - return NC_EINVAL; + {stat = NC_EINVAL; goto done;} /* Check mode flag for sanity. */ - if ((stat = check_create_mode(cmode))) - return stat; + if ((stat = check_create_mode(cmode))) goto done; /* Initialize the library. The available dispatch tables * will depend on how netCDF was built * (with/without netCDF-4, DAP, CDMREMOTE). */ - if(!NC_initialized) - { - if ((stat = nc_initialize())) - return stat; + if(!NC_initialized) { + if ((stat = nc_initialize())) goto done; } { @@ -1863,10 +1860,7 @@ NC_create(const char *path0, int cmode, size_t initialsz, memset(&model,0,sizeof(model)); newpath = NULL; - if((stat = NC_infermodel(path,&cmode,1,useparallel,NULL,&model,&newpath))) { - nullfree(newpath); - goto done; - } + if((stat = NC_infermodel(path,&cmode,1,useparallel,NULL,&model,&newpath))) goto done; if(newpath) { nullfree(path); path = newpath; @@ -1918,7 +1912,7 @@ NC_create(const char *path0, int cmode, size_t initialsz, dispatcher = NC3_dispatch_table; break; default: - return NC_ENOTNC; + {stat = NC_ENOTNC; goto done;} } /* Create the NC* instance and insert its dispatcher and model */ @@ -1937,6 +1931,7 @@ NC_create(const char *path0, int cmode, size_t initialsz, } done: nullfree(path); + nullfree(newpath); return stat; } @@ -1980,12 +1975,12 @@ NC_open(const char *path0, int omode, int basepe, size_t *chunksizehintp, TRACE(nc_open); if(!NC_initialized) { stat = nc_initialize(); - if(stat) return stat; + if(stat) goto done; } /* Check inputs. */ if (!path0) - return NC_EINVAL; + {stat = NC_EINVAL; goto done;} /* Capture the inmemory related flags */ mmap = ((omode & NC_MMAP) == NC_MMAP); diff --git a/libdispatch/dinfermodel.c b/libdispatch/dinfermodel.c index 74fd55a4fc..ff3e8e9770 100644 --- a/libdispatch/dinfermodel.c +++ b/libdispatch/dinfermodel.c @@ -143,7 +143,15 @@ static const struct MACRODEF { {NULL,NULL,{NULL}} }; -/* Mode inferences: if mode contains key, then add the inference and infer again */ +/* +Mode inferences: if mode contains key value, then add the inferred value; +Warning: be careful how this list is constructed to avoid infinite inferences. +In order to (mostly) avoid that consequence, any attempt to +infer a value that is already present will be ignored. +This effectively means that the inference graph +must be a DAG and may not have cycles. +You have been warned. +*/ static const struct MODEINFER { char* key; char* inference; @@ -151,6 +159,7 @@ static const struct MODEINFER { {"zarr","nczarr"}, {"xarray","zarr"}, {"noxarray","nczarr"}, +{"noxarray","zarr"}, {NULL,NULL} }; @@ -202,6 +211,7 @@ static int processmacros(NClist** fraglistp); static char* envvlist2string(NClist* pairs, const char*); static void set_default_mode(int* cmodep); static int parseonchar(const char* s, int ch, NClist* segments); +static int mergelist(NClist** valuesp); static int openmagic(struct MagicFile* file); static int readmagic(struct MagicFile* file, long pos, char* magic); @@ -217,8 +227,9 @@ static int parsepair(const char* pair, char** keyp, char** valuep); static NClist* parsemode(const char* modeval); static const char* getmodekey(const NClist* envv); static int replacemode(NClist* envv, const char* newval); -static int inferone(const char* mode, NClist* newmodes); +static void infernext(NClist* current, NClist* next); static int negateone(const char* mode, NClist* modes); +static void cleanstringlist(NClist* strs, int caseinsensitive); /* If the path looks like a URL, then parse it, reformat it. @@ -416,28 +427,6 @@ envvlist2string(NClist* envv, const char* delim) return result; } -/* Convert a list into a comma'd string */ -static char* -list2string(NClist* list) -{ - int i; - NCbytes* buf = NULL; - char* result = NULL; - - if(list == NULL || nclistlength(list)==0) return strdup(""); - buf = ncbytesnew(); - for(i=0;i 0) ncbytescat(buf,","); - ncbytescat(buf,m); - } - result = ncbytesextract(buf); - ncbytesfree(buf); - if(result == NULL) result = strdup(""); - return result; -} - /* Given a mode= argument, fill in the impl */ static int processmodearg(const char* arg, NCmodel* model) @@ -504,9 +493,10 @@ processinferences(NClist* fraglenv) { int stat = NC_NOERR; const char* modeval = NULL; - NClist* modes = NULL; NClist* newmodes = nclistnew(); - int i,inferred = 0; + NClist* currentmodes = NULL; + NClist* nextmodes = nclistnew(); + int i; char* newmodeval = NULL; if(fraglenv == NULL || nclistlength(fraglenv) == 0) goto done; @@ -515,22 +505,53 @@ processinferences(NClist* fraglenv) if((modeval = getmodekey(fraglenv))==NULL) goto done; /* Get the mode as list */ - modes = parsemode(modeval); - - /* Repeatedly walk the mode list until no more new positive inferences */ - do { - for(i=0;ikey;tests++) { - if(strcasecmp(tests->key,mode)==0) { - /* Append the inferred mode; dups removed later */ - nclistpush(newmodes,strdup(tests->inference)); - changed = 1; + int i; + for(i=0;ikey;tests++) { + if(strcasecmp(tests->key,cur)==0) { + /* Append the inferred mode unless dup */ + if(!nclistmatch(next,tests->inference,1)) + nclistpush(next,strdup(tests->inference)); + } } } - return changed; } +/* +Given a list of strings, remove nulls and duplicates +*/ static int -mergekey(NClist** valuesp) +mergelist(NClist** valuesp) { int i,j; int stat = NC_NOERR; @@ -686,12 +714,12 @@ cleanfragments(NClist** fraglenvp) /* collect all unique keys */ collectallkeys(fraglenv,allkeys); - /* Collect all values for same key across all fragments */ + /* Collect all values for same key across all fragment pairs */ for(i=0;i 0) ncbytescat(buf,","); + ncbytescat(buf,m); + } + result = ncbytesextract(buf); + ncbytesfree(buf); + if(result == NULL) result = strdup(""); + return result; +} + +#if 0 +/* Given a comma separated string, remove duplicates; mostly used to cleanup mode list */ +static char* +cleancommalist(const char* commalist, int caseinsensitive) +{ + NClist* tmp = nclistnew(); + char* newlist = NULL; + if(commalist == NULL || strlen(commalist)==0) return nulldup(commalist); + (void)parseonchar(commalist,',',tmp);/* split on commas */ + cleanstringlist(tmp,caseinsensitive); + newlist = list2string(tmp); + nclistfreeall(tmp); + return newlist; +} +#endif + +/* Given a list of strings, remove nulls and duplicated */ +static void +cleanstringlist(NClist* strs, int caseinsensitive) +{ + int i,j; + if(nclistlength(strs) == 0) return; + /* Remove nulls */ + for(i=nclistlength(strs)-1;i>=0;i--) { + if(nclistget(strs,i)==NULL) nclistremove(strs,i); + } + /* Remove duplicates*/ + for(i=0;ii;j--) { + int match; + const char* candidate = nclistget(strs,j); + if(caseinsensitive) + match = (strcasecmp(value,candidate) == 0); + else + match = (strcmp(value,candidate) == 0); + if(match) {char* dup = nclistremove(strs,j); nullfree(dup);} + } + } +} + + /**************************************************/ /** * @internal Given an existing file, figure out its format and return @@ -1502,8 +1595,10 @@ printlist(NClist* list, const char* tag) { int i; fprintf(stderr,"%s:",tag); - for(i=0;ilength == 0) return ncbytesfail(); diff --git a/libdispatch/nclist.c b/libdispatch/nclist.c index 49f0dded45..b5f815864c 100644 --- a/libdispatch/nclist.c +++ b/libdispatch/nclist.c @@ -183,6 +183,7 @@ nclistremove(NClist* l, size_t i) return elem; } +/* Match on == */ int nclistcontains(NClist* l, void* elem) { @@ -193,7 +194,7 @@ nclistcontains(NClist* l, void* elem) return 0; } -/* Return 1/0 */ +/* Match on str(case)cmp */ int nclistmatch(NClist* l, const char* elem, int casesensitive) { @@ -230,7 +231,6 @@ nclistelemremove(NClist* l, void* elem) return found; } - /* Extends nclist to include a unique operator which remove duplicate values; NULL values removed return value is always 1. diff --git a/libnczarr/zsync.c b/libnczarr/zsync.c index b3a93ee767..f237cd2b61 100644 --- a/libnczarr/zsync.c +++ b/libnczarr/zsync.c @@ -1429,6 +1429,7 @@ define_vars(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NClist* varnames) char* varpath = NULL; char* key = NULL; NCZ_FILE_INFO_T* zinfo = NULL; + NC_VAR_INFO_T* var = NULL; NCZ_VAR_INFO_T* zvar = NULL; NCZMAP* map = NULL; NCjson* jvar = NULL; @@ -1460,7 +1461,6 @@ define_vars(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NClist* varnames) /* Load each var in turn */ for(i = 0; i < nclistlength(varnames); i++) { - NC_VAR_INFO_T* var; const char* varname = nclistget(varnames,i); if((stat = nc4_var_list_add2(grp, varname, &var))) goto done; @@ -1477,10 +1477,6 @@ define_vars(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NClist* varnames) /* Indicate we do not have quantizer yet */ var->quantize_mode = -1; - /* Set filter list */ - assert(var->filters == NULL); - var->filters = (void*)nclistnew(); - /* Construct var path */ if((stat = NCZ_varkey(var,&varpath))) goto done; @@ -1697,9 +1693,9 @@ define_vars(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NClist* varnames) object MUST contain a "id" key identifying the codec to be used. */ /* Do filters key before compressor key so final filter chain is in correct order */ { +#ifdef ENABLE_NCZARR_FILTERS if(var->filters == NULL) var->filters = (void*)nclistnew(); if(zvar->incompletefilters == NULL) zvar->incompletefilters = (void*)nclistnew(); -#ifdef ENABLE_NCZARR_FILTERS { int k; chainindex = 0; /* track location of filter in the chain */ if((stat = NCZ_filter_initialize())) goto done; @@ -1722,8 +1718,8 @@ define_vars(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NClist* varnames) /* From V2 Spec: A JSON object identifying the primary compression codec and providing configuration parameters, or ``null`` if no compressor is to be used. */ { - if(var->filters == NULL) var->filters = (void*)nclistnew(); #ifdef ENABLE_NCZARR_FILTERS + if(var->filters == NULL) var->filters = (void*)nclistnew(); if((stat = NCZ_filter_initialize())) goto done; if((stat = NCJdictget(jvar,"compressor",&jfilter))) goto done; if(jfilter != NULL && NCJsort(jfilter) != NCJ_NULL) { @@ -1752,6 +1748,7 @@ define_vars(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NClist* varnames) nullfree(shapes); shapes = NULL; if(formatv1) {NCJreclaim(jncvar); jncvar = NULL;} NCJreclaim(jvar); jvar = NULL; + var = NULL; } done: diff --git a/libnczarr/zvar.c b/libnczarr/zvar.c index 12014bae87..2a98646e36 100644 --- a/libnczarr/zvar.c +++ b/libnczarr/zvar.c @@ -391,9 +391,11 @@ NCZ_def_var(int ncid, const char *name, nc_type xtype, int ndims, var->meta_read = NC_TRUE; var->atts_read = NC_TRUE; +#ifdef ENABLE_NCZARR_FILTERS /* Set the filter list */ assert(var->filters == NULL); var->filters = (void*)nclistnew(); +#endif /* Point to the type, and increment its ref. count */ var->type_info = type; @@ -558,10 +560,12 @@ ncz_def_var_extra(int ncid, int varid, int *shuffle, int *unused1, /* Can't turn on parallel and deflate/fletcher32/szip/shuffle * before HDF5 1.10.3. */ +#ifdef ENABLE_NCZARR_FILTERS #ifndef HDF5_SUPPORTS_PAR_FILTERS if (h5->parallel == NC_TRUE) if (nclistlength(((NClist*)var->filters)) > 0 || fletcher32 || shuffle) {retval = NC_EINVAL; goto done;} +#endif #endif /* If the HDF5 dataset has already been created, then it is too @@ -628,8 +632,10 @@ ncz_def_var_extra(int ncid, int varid, int *shuffle, int *unused1, * no filters in use for this data. */ if (storage != NC_CHUNKED) { +#ifdef NCZARR_FILTERS if (nclistlength(((NClist*)var->filters)) > 0) {retval = NC_EINVAL; goto done;} +#endif for (d = 0; d < var->ndims; d++) if (var->dim[d]->unlimited) {retval = NC_EINVAL; goto done;}