From ec258bf314bb67c06b22714ccd1e82303bcd72c0 Mon Sep 17 00:00:00 2001 From: Dennis Heimbigner Date: Mon, 9 Aug 2021 17:05:02 -0600 Subject: [PATCH] Fix a number of bugs in the nczarr code. re: Issues https://github.com/Unidata/netcdf-c/issues/2063, https://github.com/Unidata/netcdf-c/issues/2062, https://github.com/Unidata/netcdf-c/issues/2061, https://github.com/Unidata/netcdf-c/issues/2059 1. Support "fill_value: null" (https://github.com/Unidata/netcdf-c/issues/2063). 2. Handle the dtype case "|u1" (https://github.com/Unidata/netcdf-c/issues/2062). 3. When writing a pure Zarr format file, some nczarr attributes inadvertently crept in (https://github.com/Unidata/netcdf-c/issues/2061). 4. If there is no fill value, then the .zarray fill_value key should have the value null rather than left out (https://github.com/Unidata/netcdf-c/issues/2059). Hat tip: Even Rouault --- .github/workflows/run_tests.yml | 2 +- libnczarr/zs3sdk.cpp | 4 +- libnczarr/zsync.c | 103 +++++++++++++++++++------------- libnczarr/zutil.c | 1 + 4 files changed, 65 insertions(+), 45 deletions(-) diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml index ad7ccb61ce..b575d1bce4 100644 --- a/.github/workflows/run_tests.yml +++ b/.github/workflows/run_tests.yml @@ -4,7 +4,7 @@ name: Run netCDF Tests -on: [pull_request] +on: [pull_request,push] jobs: diff --git a/libnczarr/zs3sdk.cpp b/libnczarr/zs3sdk.cpp index 35846716bf..0b0c2db4a3 100644 --- a/libnczarr/zs3sdk.cpp +++ b/libnczarr/zs3sdk.cpp @@ -78,7 +78,9 @@ NCZ_s3sdkcreateconfig(const char* host, const char* region, void** configp) if(region) config->region = region; if(host) config->endpointOverride = host; config->enableEndpointDiscovery = true; - config->followRedirects = true; +#if 0 + config->followRedirects = Aws::Client::FollowRedirectsPolicy::ALWAYS; +#endif if(configp) * configp = config; return ZUNTRACE(stat); } diff --git a/libnczarr/zsync.c b/libnczarr/zsync.c index 95f82679a0..1872180eb7 100644 --- a/libnczarr/zsync.c +++ b/libnczarr/zsync.c @@ -116,6 +116,7 @@ ncz_sync_grp(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp) int i,stat = NC_NOERR; NCZ_FILE_INFO_T* zinfo = NULL; char version[1024]; + int purezarr = 0; NCZMAP* map = NULL; char* fullpath = NULL; char* key = NULL; @@ -132,47 +133,50 @@ ncz_sync_grp(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp) zinfo = file->format_file_info; map = zinfo->map; + purezarr = (zinfo->controls.flags & FLAG_PUREZARR)?1:0; + /* Construct grp key */ if((stat = NCZ_grpkey(grp,&fullpath))) goto done; - /* Create dimensions dict */ - if((stat = ncz_collect_dims(file,grp,&jdims))) goto done; + if(!purezarr) { + /* Create dimensions dict */ + if((stat = ncz_collect_dims(file,grp,&jdims))) goto done; - /* Create vars list */ - if((stat = NCJnew(NCJ_ARRAY,&jvars))) - goto done; - for(i=0; ivars); i++) { - NC_VAR_INFO_T* var = (NC_VAR_INFO_T*)ncindexith(grp->vars,i); - if((stat = NCJaddstring(jvars,NCJ_STRING,var->hdr.name))) goto done; - } + /* Create vars list */ + if((stat = NCJnew(NCJ_ARRAY,&jvars))) + goto done; + for(i=0; ivars); i++) { + NC_VAR_INFO_T* var = (NC_VAR_INFO_T*)ncindexith(grp->vars,i); + if((stat = NCJaddstring(jvars,NCJ_STRING,var->hdr.name))) goto done; + } - /* Create subgroups list */ - if((stat = NCJnew(NCJ_ARRAY,&jsubgrps))) - goto done; - for(i=0; ichildren); i++) { - NC_GRP_INFO_T* g = (NC_GRP_INFO_T*)ncindexith(grp->children,i); - if((stat = NCJaddstring(jsubgrps,NCJ_STRING,g->hdr.name))) goto done; + /* Create subgroups list */ + if((stat = NCJnew(NCJ_ARRAY,&jsubgrps))) + goto done; + for(i=0; ichildren); i++) { + NC_GRP_INFO_T* g = (NC_GRP_INFO_T*)ncindexith(grp->children,i); + if((stat = NCJaddstring(jsubgrps,NCJ_STRING,g->hdr.name))) goto done; + } + /* Create the "_NCZARR_GROUP" dict */ + if((stat = NCJnew(NCJ_DICT,&json))) + goto done; + /* Insert the various dicts and arrays */ + if((stat = NCJinsert(json,"dims",jdims))) goto done; + jdims = NULL; /* avoid memory problems */ + if((stat = NCJinsert(json,"vars",jvars))) goto done; + jvars = NULL; /* avoid memory problems */ + if((stat = NCJinsert(json,"groups",jsubgrps))) goto done; + jsubgrps = NULL; /* avoid memory problems */ } - /* Create the "_NCZARR_GROUP" dict */ - if((stat = NCJnew(NCJ_DICT,&json))) - goto done; - /* Insert the various dicts and arrays */ - if((stat = NCJinsert(json,"dims",jdims))) goto done; - jdims = NULL; /* avoid memory problems */ - if((stat = NCJinsert(json,"vars",jvars))) goto done; - jvars = NULL; /* avoid memory problems */ - if((stat = NCJinsert(json,"groups",jsubgrps))) goto done; - jsubgrps = NULL; /* avoid memory problems */ - /* build ZGROUP contents */ if((stat = NCJnew(NCJ_DICT,&jgroup))) goto done; snprintf(version,sizeof(version),"%d",zinfo->zarr.zarr_version); if((stat = NCJaddstring(jgroup,NCJ_STRING,"zarr_format"))) goto done; if((stat = NCJaddstring(jgroup,NCJ_INT,version))) goto done; - if(grp->parent == NULL) { /* Root group */ + if(!purezarr && grp->parent == NULL) { /* Root group */ snprintf(version,sizeof(version),"%lu.%lu.%lu", zinfo->zarr.nczarr_version.major, zinfo->zarr.nczarr_version.minor, @@ -185,9 +189,11 @@ ncz_sync_grp(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp) jsuper = NULL; } - /* Insert the "_NCZARR_GROUP" dict */ - if((stat = NCJinsert(jgroup,NCZ_V2_GROUP,json))) goto done; - json = NULL; + if(!purezarr) { + /* Insert the "_NCZARR_GROUP" dict */ + if((stat = NCJinsert(jgroup,NCZ_V2_GROUP,json))) goto done; + json = NULL; + } /* build ZGROUP path */ if((stat = nczm_concat(fullpath,ZGROUP,&key))) @@ -329,7 +335,9 @@ ncz_sync_var(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var) jtmp = NULL; /* fill_value key */ - if(!var->no_fill) { + if(var->no_fill) { + if((stat=NCJnew(NCJ_NULL,&jfill))) goto done; + } else {/*!var->no_fill*/ int fillsort; int atomictype = var->type_info->hdr.id; /* A scalar value providing the default value to use for uninitialized @@ -354,9 +362,9 @@ ncz_sync_var(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var) jfill = jtmp; jtmp = NULL; } - if((stat = NCJinsert(jvar,"fill_value",jfill))) goto done; - jfill = NULL; } + if((stat = NCJinsert(jvar,"fill_value",jfill))) goto done; + jfill = NULL; /* order key */ if((stat = NCJaddstring(jvar,NCJ_STRING,"order"))) goto done; @@ -577,6 +585,7 @@ ncz_sync_atts(NC_FILE_INFO_T* file, NC_OBJ* container, NCindex* attlist) char* content = NULL; char* dimpath = NULL; int isxarray = 0; + int isrootgroup = 0; LOG((3, "%s", __func__)); @@ -585,6 +594,12 @@ ncz_sync_atts(NC_FILE_INFO_T* file, NC_OBJ* container, NCindex* attlist) if(zinfo->controls.flags & FLAG_XARRAYDIMS) isxarray = 1; + if(container->sort == NCVAR) { + NC_VAR_INFO_T* var = (NC_VAR_INFO_T*)container; + if(var->container && var->container->parent == NULL) + isrootgroup = 1; + } + if(!isxarray && ncindexsize(attlist) == 0) goto done; /* do nothing */ @@ -611,7 +626,7 @@ ncz_sync_atts(NC_FILE_INFO_T* file, NC_OBJ* container, NCindex* attlist) } /* Construct container path */ - if(NCJsort(container) == NCGRP) + if(container->sort == NCGRP) stat = NCZ_grpkey((NC_GRP_INFO_T*)container,&fullpath); else stat = NCZ_varkey((NC_VAR_INFO_T*)container,&fullpath); @@ -622,8 +637,8 @@ ncz_sync_atts(NC_FILE_INFO_T* file, NC_OBJ* container, NCindex* attlist) if((stat = ncz_jsonize_atts(attlist,&jatts))) goto done; - if(NCJsort(container) == NCVAR) { - if(isxarray) { + if(container->sort == NCVAR) { + if(isrootgroup && isxarray) { NC_VAR_INFO_T* var = (NC_VAR_INFO_T*)container; /* Insert the XARRAY _ARRAY_ATTRIBUTE attribute */ if((stat = NCJnew(NCJ_ARRAY,&jdimrefs))) @@ -740,7 +755,7 @@ load_jatts(NCZMAP* map, NC_OBJ* container, int nczarrv1, NCjson** jattrsp, NClis /* alway return (possibly empty) list of types */ atypes = nclistnew(); - if(NCJsort(container) == NCGRP) { + if(container->sort == NCGRP) { NC_GRP_INFO_T* grp = (NC_GRP_INFO_T*)container; /* Get grp's fullpath name */ if((stat = NCZ_grpkey(grp,&fullpath))) @@ -1184,7 +1199,7 @@ ncz_read_atts(NC_FILE_INFO_T* file, NC_OBJ* container) zinfo = file->format_file_info; map = zinfo->map; - if(NCJsort(container) == NCGRP) + if(container->sort == NCGRP) attlist = ((NC_GRP_INFO_T*)container)->att; else attlist = ((NC_VAR_INFO_T*)container)->att; @@ -1210,7 +1225,7 @@ ncz_read_atts(NC_FILE_INFO_T* file, NC_OBJ* container) if(ra != NULL) { /* case 1: name = _NCProperties, grp=root, varid==NC_GLOBAL, flags & READONLYFLAG */ if(strcmp(NCJstring(key),NCPROPS)==0 - && NCJsort(container) == NCGRP + && container->sort == NCGRP && file->root_grp == (NC_GRP_INFO_T*)container) { /* Setup provenance */ if(NCJsort(value) != NCJ_STRING) @@ -1220,7 +1235,7 @@ ncz_read_atts(NC_FILE_INFO_T* file, NC_OBJ* container) } /* case 2: name = _ARRAY_DIMENSIONS, sort==NCVAR, flags & HIDDENATTRFLAG */ if(strcmp(NCJstring(key),NC_XARRAY_DIMS)==0 - && NCJsort(container) == NCVAR + && container->sort == NCVAR && (ra->flags & HIDDENATTRFLAG)) { /* store for later */ NCZ_VAR_INFO_T* zvar = (NCZ_VAR_INFO_T*)((NC_VAR_INFO_T*)container)->format_var_info; @@ -1249,13 +1264,13 @@ ncz_read_atts(NC_FILE_INFO_T* file, NC_OBJ* container) } } /* If we have not read a _FillValue, then go ahead and create it */ - if(fillvalueatt == NULL && NCJsort(container) == NCVAR) { + if(fillvalueatt == NULL && container->sort == NCVAR) { if((stat = ncz_create_fillvalue((NC_VAR_INFO_T*)container))) goto done; } /* Remember that we have read the atts for this var or group. */ - if(NCJsort(container) == NCVAR) + if(container->sort == NCVAR) ((NC_VAR_INFO_T*)container)->atts_read = 1; else ((NC_GRP_INFO_T*)container)->atts_read = 1; @@ -1399,6 +1414,8 @@ define_vars(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NClist* varnames) if((stat = ncz_gettype(file,grp,vtype,&var->type_info))) goto done; } else {stat = NC_EBADTYPE; goto done;} + if(endianness == NC_ENDIAN_NATIVE) + endianness = zinfo->native_endianness; if(endianness == NC_ENDIAN_LITTLE || endianness == NC_ENDIAN_BIG) { var->endianness = endianness; } else {stat = NC_EBADTYPE; goto done;} @@ -1466,7 +1483,7 @@ define_vars(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NClist* varnames) /* fill_value */ { if((stat = NCJdictget(jvar,"fill_value",&jvalue))) goto done; - if(jvalue == NULL) + if(jvalue == NULL || NCJsort(jvalue) == NCJ_NULL) var->no_fill = 1; else { size_t fvlen; diff --git a/libnczarr/zutil.c b/libnczarr/zutil.c index 36c2a57557..5c79d8762f 100644 --- a/libnczarr/zutil.c +++ b/libnczarr/zutil.c @@ -514,6 +514,7 @@ ncz_dtype2typeinfo(const char* dtype, nc_type* nctypep, int* endianp) switch (dtype[0]) { case '<': endianness = NC_ENDIAN_LITTLE; break; case '>': endianness = NC_ENDIAN_BIG; break; + case '|': endianness = NC_ENDIAN_NATIVE; break; default: goto zerr; } /* Decode the type length */