Skip to content

Commit

Permalink
Fix JSON quoted string processing in libnczarr
Browse files Browse the repository at this point in the history
re: github issue Unidata#1982

The problem was that the libnczarr/zsjon.c handling of strings with
embedded double quotes was wrong; a one line fix.
Also added a test case.

Misc. other changes:

1. I Discovered, en passant, that the handling of 64 bit constants
had an error that was fixed.
2. cleanup of the constant conversion code to recurse on arrays of values.
  • Loading branch information
DennisHeimbigner committed May 6, 2021
1 parent e6bdee5 commit 91168e3
Show file tree
Hide file tree
Showing 15 changed files with 128 additions and 71 deletions.
1 change: 0 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@ CMakeLists.txt.user
scan-build
.deps
.libs
*.zip
Makefile
.DS_Store
build-par
Expand Down
6 changes: 0 additions & 6 deletions libdap2/getvara.c
Original file line number Diff line number Diff line change
Expand Up @@ -490,7 +490,6 @@ movetor(NCDAPCOMMON* nccomm,
CDFnode* xnode = (CDFnode*)nclistget(path,depth);
OCdatanode reccontent = NULL;
OCdatanode dimcontent = NULL;
OCdatanode fieldcontent = NULL;
Dapodometer* odom = NULL;
int hasstringdim = 0;
DCEsegment* segment;
Expand Down Expand Up @@ -605,7 +604,6 @@ fprintf(stderr," segment=%s hasstringdim=%d\n",

done:
oc_data_free(conn,dimcontent);
oc_data_free(conn,fieldcontent);
oc_data_free(conn,reccontent);
if(ocstat != OC_NOERR) ncstat = ocerrtoncerr(ocstat);
if(odom) dapodom_free(odom);
Expand All @@ -627,8 +625,6 @@ movetofield(NCDAPCOMMON* nccomm,
size_t fieldindex,gridindex;
OClink conn = nccomm->oc.conn;
CDFnode* xnode = (CDFnode*)nclistget(path,depth);
OCdatanode reccontent = NULL;
OCdatanode dimcontent = NULL;
OCdatanode fieldcontent = NULL;
CDFnode* xnext;
int newdepth;
Expand Down Expand Up @@ -675,9 +671,7 @@ movetofield(NCDAPCOMMON* nccomm,
segments);

done:
oc_data_free(conn,dimcontent);
oc_data_free(conn,fieldcontent);
oc_data_free(conn,reccontent);
if(ocstat != OC_NOERR) ncstat = ocerrtoncerr(ocstat);
return THROW(ncstat);
}
Expand Down
2 changes: 0 additions & 2 deletions libdap2/ncd2dispatch.c
Original file line number Diff line number Diff line change
Expand Up @@ -1626,7 +1626,6 @@ getseqdimsize(NCDAPCOMMON* dapcomm, CDFnode* seq, size_t* sizep)
NCerror ncstat = NC_NOERR;
OCerror ocstat = OC_NOERR;
OClink conn = dapcomm->oc.conn;
OCdatanode rootcontent = NULL;
OCddsnode ocroot;
CDFnode* dxdroot;
CDFnode* xseq;
Expand Down Expand Up @@ -1685,7 +1684,6 @@ fprintf(stderr,"sequencesize: %s = %lu\n",seq->ocname,(unsigned long)seqsize);

fail:
ncbytesfree(seqcountconstraints);
oc_data_free(conn,rootcontent);
if(ocstat != OC_NOERR) ncstat = ocerrtoncerr(ocstat);
return ncstat;
}
Expand Down
4 changes: 2 additions & 2 deletions libnczarr/zarr.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@ extern int ncz_unload_jatts(NCZ_FILE_INFO_T*, NC_OBJ* container, NCjson* jattrs,
extern int ncz_close_file(NC_FILE_INFO_T* file, int abort);

/* zcvt.c */
extern int NCZ_convert1(NCjson* jsrc, nc_type, char* memory0);
extern int NCZ_stringconvert1(nc_type typid, char* src, char** strp);
extern int NCZ_convert1(NCjson* jsrc, nc_type, unsigned char* memory0);
extern int NCZ_stringconvert1(nc_type typid, unsigned char* src, char** strp);
extern int NCZ_stringconvert(nc_type typid, size_t len, void* data0, NCjson** jdatap);

/* zsync.c */
Expand Down
4 changes: 2 additions & 2 deletions libnczarr/zcvt.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ struct ZCVT {
};

int
NCZ_convert1(NCjson* jsrc, nc_type dsttype, char* memory)
NCZ_convert1(NCjson* jsrc, nc_type dsttype, unsigned char* memory)
{
int stat = NC_NOERR;
nc_type srctype;
Expand Down Expand Up @@ -234,7 +234,7 @@ NCZ_convert1(NCjson* jsrc, nc_type dsttype, char* memory)
}

int
NCZ_stringconvert1(nc_type srctype, char* src, char** strp)
NCZ_stringconvert1(nc_type srctype, unsigned char* src, char** strp)
{
int stat = NC_NOERR;
struct ZCVT zcvt;
Expand Down
2 changes: 1 addition & 1 deletion libnczarr/zjson.c
Original file line number Diff line number Diff line change
Expand Up @@ -318,7 +318,7 @@ NCJlex(NCJparser* parser)
start = parser->pos;
for(;;) {
c = *parser->pos++;
if(c == NCJ_ESCAPE) c++;
if(c == NCJ_ESCAPE) parser->pos++;
else if(c == NCJ_QUOTE || c == '\0') break;
}
if(c == '\0') {
Expand Down
109 changes: 68 additions & 41 deletions libnczarr/zsync.c
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ static int parsedimrefs(NC_FILE_INFO_T*, NClist* dimnames, size64_t* shape, NC_
static int decodeints(NCjson* jshape, size64_t* shapes);
static int computeattrdata(nc_type* typeidp, NCjson* values, size_t* lenp, void** datap);
static int inferattrtype(NCjson* values, nc_type* typeidp);
static int mininttype(unsigned long long u64);
static int mininttype(unsigned long long u64, int negative);
static int computedimrefs(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, int purezarr, int xarray, int ndims, NClist* dimnames, size64_t* shapes, NC_DIM_INFO_T** dims);

/**************************************************/
Expand Down Expand Up @@ -825,7 +825,7 @@ zconvert(nc_type typeid, size_t typelen, void* dst0, NCjson* src)
int stat = NC_NOERR;
int i;
size_t len;
char* dst = dst0; /* Work in char* space so we can do pointer arithmetic */
unsigned char* dst = dst0; /* Work in char* space so we can do pointer arithmetic */

switch (src->sort) {
case NCJ_ARRAY:
Expand Down Expand Up @@ -902,6 +902,7 @@ computeattrdata(nc_type* typeidp, NCjson* values, size_t* lenp, void** datap)
void* data = NULL;
size_t typelen;
nc_type typeid = NC_NAT;
int reclaimvalues = 0;

/* Get assumed type */
if(typeidp) typeid = *typeidp;
Expand Down Expand Up @@ -941,56 +942,42 @@ computeattrdata(nc_type* typeidp, NCjson* values, size_t* lenp, void** datap)
if(typeidp) *typeidp = typeid; /* return possibly inferred type */

done:
if(reclaimvalues) NCJreclaim(values); /* we created it */
nullfree(data);
return THROW(stat);
}

static int
inferattrtype(NCjson* values, nc_type* typeidp)
inferattrtype(NCjson* value, nc_type* typeidp)
{
nc_type typeid;
NCjson* j = NULL;
unsigned long long u64;
long long i64;
int negative = 0;

if(NCJlength(values) == 0) return NC_EINVAL;
switch (values->sort) {
case NCJ_ARRAY:
/* use the first value to decide */
if(NCJarrayith(values,0,&j)) return NC_EINVAL;
switch(j->sort) {
case NCJ_INT:
if(j->value[0] == '-') {
sscanf(j->value,"%lld",&i64);
u64 = (unsigned long long)i64;
} else
sscanf(j->value,"%llu",&u64);
typeid = mininttype(u64);
break;
case NCJ_DOUBLE:
typeid = NC_DOUBLE;
break;
case NCJ_BOOLEAN:
typeid = NC_UBYTE;
break;
default: return NC_EINTERNAL;
}
break;
/* Might be a singleton */
if(NCJlength(value) == 0) return NC_EINVAL;
if(value->sort == NCJ_ARRAY) {
if(NCJarrayith(value,0,&j)) return NC_EINVAL;
return inferattrtype(j,typeidp);
}
if(value->value)
negative = (value->value[0] == '-');
switch (value->sort) {
case NCJ_INT:
if(values->value[0] == '-') {
sscanf(values->value,"%lld",&i64);
u64 = (unsigned long long)i64;
} else
sscanf(values->value,"%llu",&u64);
typeid = mininttype(u64);
break;
if(negative) {
sscanf(value->value,"%lld",&i64);
u64 = (unsigned long long)i64;
} else
sscanf(value->value,"%llu",&u64);
typeid = mininttype(u64,negative);
break;
case NCJ_DOUBLE:
typeid = NC_DOUBLE;
break;
typeid = NC_DOUBLE;
break;
case NCJ_BOOLEAN:
typeid = NC_UBYTE;
break;
typeid = NC_UBYTE;
break;
case NCJ_STRING: /* requires special handling as an array of characters */
typeid = NC_CHAR;
break;
Expand All @@ -1002,10 +989,10 @@ inferattrtype(NCjson* values, nc_type* typeidp)
}

static int
mininttype(unsigned long long u64)
mininttype(unsigned long long u64, int negative)
{
long long i64 = (long long)u64; /* keep bit pattern */
if(u64 >= NC_MAX_INT64) return NC_UINT64;
if(!negative && u64 >= NC_MAX_INT64) return NC_UINT64;
if(i64 < 0) {
if(i64 >= NC_MIN_BYTE) return NC_BYTE;
if(i64 >= NC_MIN_SHORT) return NC_SHORT;
Expand Down Expand Up @@ -1175,7 +1162,7 @@ ncz_read_atts(NC_FILE_INFO_T* file, NC_OBJ* container)

if(jattrs != NULL) {
/* Iterate over the attributes to create the in-memory attributes */
/* Watch for reading _FillValue and possibly _ARRAY_DIMENSIONS (xarray) */
/* Watch for special cases: _FillValue and _ARRAY_DIMENSIONS (xarray) */
for(i=0;i<nclistlength(jattrs->contents);i+=2) {
NCjson* key = nclistget(jattrs->contents,i);
NCjson* value = nclistget(jattrs->contents,i+1);
Expand Down Expand Up @@ -2032,3 +2019,43 @@ computedimrefs(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, int purezarr, int xarra
NCJreclaim(jatts);
return THROW(stat);
}

#if 0
Not currently used
Special compatibility case:
if the value of the attribute is a dictionary,
or an array with non-atomic values, then
then stringify it and pretend it is of char type.
/* Return 1 if this json is not an
atomic value or an array of atomic values.
That is, it does not look like valid
attribute data.
*/
static int
iscomplexjson(NCjson* j)
{
int i;
switch(j->sort) {
case NCJ_ARRAY:
/* verify that the elements of the array are not complex */
for(i=0;i<nclistlength(j->contents);i++) {
switch (((NCjson*)nclistget(j->contents,i))->sort) {
case NCJ_DICT:
case NCJ_ARRAY:
case NCJ_UNDEF:
case NCJ_NULL:
return 1;
default: break;
}
}
return 0;
case NCJ_DICT:
case NCJ_UNDEF:
case NCJ_NULL:
break;
default:
return 0;
}
return 1;
}
#endif
2 changes: 1 addition & 1 deletion nczarr_test/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ ref_avail1.cdl ref_avail1.dmp ref_avail1.txt \
ref_xarray.cdl ref_purezarr.cdl ref_purezarr_base.cdl ref_nczarr2zarr.cdl

# Interoperability files
EXTRA_DIST += power_901_constants.zip ref_power_901_constants.cdl
EXTRA_DIST += ref_power_901_constants.zip ref_power_901_constants.cdl ref_quotes.zip ref_quotes.cdl

CLEANFILES = ut_*.txt ut*.cdl tmp*.nc tmp*.cdl tmp*.txt tmp*.dmp tmp*.zip tmp*.nc

Expand Down
Binary file removed nczarr_test/power_901_constants.zip
Binary file not shown.
2 changes: 1 addition & 1 deletion nczarr_test/ref_power_901_constants.cdl
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
netcdf power_901_constants {
netcdf ref_power_901_constants {
dimensions:
lat = 361 ;
lon = 576 ;
Expand Down
Binary file added nczarr_test/ref_power_901_constants.zip
Binary file not shown.
19 changes: 19 additions & 0 deletions nczarr_test/ref_quotes.cdl
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
netcdf ref_quotes {
dimensions:
time = 10 ;
lat = 20 ;
lon = 30 ;
variables:
float fractional_snow_cover(time, lat, lon) ;
fractional_snow_cover:ID = 68b ;
fractional_snow_cover:esa_cci_path = NaN ;
fractional_snow_cover:long_name = "Surface Fraction Covered by Snow" ;
fractional_snow_cover:orig_attrs = "{\'comment\': \'Grid cell fractional snow cover based on the Globsnow CCI product.\', \'long_name\': \'Surface fraction covered by snow.\', \'project_name\': \'GlobSnow\', \'references\': \'Luojus, Kari, et al. \"ESA DUE Globsnow-Global Snow Database for Climate Research.\" ESA Special Publication. Vol. 686. 2010.\', \'source_name\': \'MFSC\', \'standard_name\': \'surface_snow_area_fraction\', \'units\': \'percent\', \'url\': \'http://www.globsnow.info/\'}" ;
fractional_snow_cover:orig_version = "v2.0" ;
fractional_snow_cover:project_name = "GlobSnow" ;
fractional_snow_cover:time_coverage_end = "2013-01-05" ;
fractional_snow_cover:time_coverage_resolution = "P8D" ;
fractional_snow_cover:time_coverage_start = "2003-01-05" ;
fractional_snow_cover:units = "percent" ;
fractional_snow_cover:url = "http://www.globsnow.info/" ;
}
Binary file added nczarr_test/ref_quotes.zip
Binary file not shown.
19 changes: 11 additions & 8 deletions nczarr_test/run_interop.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ testcasefile() {
fileargs ${execdir}/$ref "mode=$mode,$zext"
rm -f tmp_${ref}_${zext}.cdl
${NCDUMP} $flags $fileurl > tmp_${ref}_${zext}.cdl
diff -b ${srcdir}/ref_${ref}.cdl tmp_${ref}_${zext}.cdl
diff -b ${srcdir}/${ref}.cdl tmp_${ref}_${zext}.cdl
}

testcasezip() {
Expand All @@ -30,25 +30,28 @@ testcasezip() {
fileargs ${execdir}/$ref "mode=$mode,$zext"
rm -f tmp_${ref}_${zext}.cdl
${NCDUMP} $flags $fileurl > tmp_${ref}_${zext}.cdl
diff -b ${srcdir}/ref_${ref}.cdl tmp_${ref}_${zext}.cdl
diff -b ${srcdir}/${ref}.cdl tmp_${ref}_${zext}.cdl
}

testallcases() {
zext=$1
case "$zext" in
file)
# need to unpack
rm -fr power_901_constants power_901_constants.file
unzip ${srcdir}/power_901_constants.zip > /dev/null
mv power_901_constants power_901_constants.file
testcasefile power_901_constants zarr metaonly; # test xarray as default
rm -fr ref_power_901_constants ref_power_901_constants.file
unzip ${srcdir}/ref_power_901_constants.zip > /dev/null
mv ref_power_901_constants ref_power_901_constants.file
testcasefile ref_power_901_constants zarr metaonly; # test xarray as default
;;
zip)
# Move into position
if test "x$srcdir" != "x$execdir" ; then
cp ${srcdir}/power_901_constants.zip ${execdir}
cp ${srcdir}/ref_power_901_constants.zip ${execdir}
cp ${srcdir}/ref_quotes.zip ${execdir}
fi
testcasezip power_901_constants xarray metaonly
testcasezip ref_power_901_constants xarray metaonly
# Test large constant interoperability
testcasezip ref_quotes zarr metaonly
;;
*) echo "unimplemented kind: $1" ; exit 1;;
esac
Expand Down
Loading

0 comments on commit 91168e3

Please sign in to comment.