From dd4e71c15063794539ebeb234796fef7143cf29d Mon Sep 17 00:00:00 2001 From: Dennis Heimbigner Date: Fri, 2 Mar 2018 16:55:58 -0700 Subject: [PATCH] 1. Solve issue raised on netcdfgroup mailing list: https://www.unidata.ucar.edu/mailing_lists/archives/netcdfgroup/2018/msg00020.html See docs/filters.md for a detailed description of the solution. Basically we add nccopy flags "-F none" and "-F :none" to suppress output compression as desired. 2. Add tests to nc_test4/tst_filter.sh to test out the solution. --- Makefile.am | 3 + cf | 5 +- docs/Doxyfile.in | 1 - docs/filters.md | 34 +++++- examples/C/hdf5plugins/Makefile.am | 2 +- nc_test4/hdf5plugins/Makefile.am | 4 +- nc_test4/tst_filter.sh | 83 +++++++++----- nc_test4/tst_rename.c | 1 - ncdump/Makefile.am | 4 + ncdump/nccopy.c | 167 +++++++++++++++++++++-------- 10 files changed, 221 insertions(+), 83 deletions(-) diff --git a/Makefile.am b/Makefile.am index f8c1e3eb89..fb7dc08885 100644 --- a/Makefile.am +++ b/Makefile.am @@ -7,6 +7,9 @@ # Ed Hartnett, Ward Fisher + + + # This directory stores libtool macros, put there by aclocal. ACLOCAL_AMFLAGS = -I m4 diff --git a/cf b/cf index a1a55b785e..84b259b1c1 100644 --- a/cf +++ b/cf @@ -1,6 +1,6 @@ #!/bin/bash #NB=1 -#DB=1 +DB=1 #X=-x FAST=1 @@ -124,7 +124,8 @@ FLAGS="$FLAGS --disable-parallel4" fi if test "x${DB}" = x1 ; then -FLAGS="$FLAGS --disable-shared --enable-static" +#FLAGS="$FLAGS --disable-shared --enable-static" +FLAGS="$FLAGS --enable-static" else FLAGS="$FLAGS --enable-shared" fi diff --git a/docs/Doxyfile.in b/docs/Doxyfile.in index 98830f0c39..81ab12fc3f 100644 --- a/docs/Doxyfile.in +++ b/docs/Doxyfile.in @@ -750,7 +750,6 @@ INPUT = \ @abs_top_srcdir@/docs/install-fortran.md \ @abs_top_srcdir@/docs/types.dox \ @abs_top_srcdir@/docs/internal.dox \ - @abs_top_srcdir@/docs/indexing.dox \ @abs_top_srcdir@/docs/windows-binaries.md \ @abs_top_srcdir@/docs/guide.dox \ @abs_top_srcdir@/docs/OPeNDAP.dox \ diff --git a/docs/filters.md b/docs/filters.md index 0919206e21..499184125e 100644 --- a/docs/filters.md +++ b/docs/filters.md @@ -142,10 +142,34 @@ The "-F" option can be used repeatedly as long as the variable name part is different. A different filter id and parameters can be specified for each occurrence. -Note that if the input file has compressed variables, that fact -will be invisble to nccopy because it is handled within the -netcdf-c/hdf5 library code. This is true for any program that calls -the netcdf-c library. +As a rule, any input filter on an input variable will be applied +to the equivalent output variable -- assuming the output file type +is netcdf-4. It is, however, sometimes convenient to suppress +output compression either totally or on a per-variable basis. +Total suppression of output filters can be accomplished by specifying +a special case of "-F", namely this. +```` +nccopy -F "none" input.nc output.nc +```` +Suppression of output filtering for a specific variable can be accomplished +using this format. +```` +nccopy -F "var,none" input.nc output.nc +```` +where "var" is the fully qualified name of the variable. + +The rules for all possible cases of the "-F" flag are defined +by this table. + + +
-F none-Fvar,...Input FilterApplied Output Filter +
trueunspecifiedNAunfiltered +
true-Fvar,noneNAunfiltered +
true-Fvar,...NAuse output filter +
falseunspecifieddefineduse input filter +
false-Fvar,noneNAunfiltered +
false-Fvar,...NAuse output filter +
Parameter Encoding {#ParamEncode} ========== @@ -416,5 +440,5 @@ References {#References} 1. https://support.hdfgroup.org/HDF5/doc/Advanced/DynamicallyLoadedFilters/HDF5DynamicallyLoadedFilters.pdf 2. https://support.hdfgroup.org/HDF5/doc/TechNotes/TechNote-HDF5-CompressionTroubleshooting.pdf -3. https://support.hdfgroup.org/services/filters.html +3. https://portal.hdfgroup.org/display/support/Contributions#Contributions-filters 4. https://support.hdfgroup.org/services/contributions.html#filters diff --git a/examples/C/hdf5plugins/Makefile.am b/examples/C/hdf5plugins/Makefile.am index ffd0667451..9b82d38dc4 100644 --- a/examples/C/hdf5plugins/Makefile.am +++ b/examples/C/hdf5plugins/Makefile.am @@ -21,7 +21,7 @@ DLLSRC=${PLUGINSRC} ${BZIP2SRC} lib_LTLIBRARIES = libbzip2.la libbzip2_la_SOURCES = ${DLLSRC} -libbzip2_la_LDFLAGS = -module -avoid-version -shared -export-dynamic -no-undefined +libbzip2_la_LDFLAGS = -module -avoid-version -shared -export-dynamic -no-undefined -rpath ${abs_builddir} endif #ENABLE_FILTER_TESTING EXTRA_DIST = CMakeLists.txt H5Zbzip2.c Makefile.am blocksort.c bzlib.c bzlib.h bzlib_private.h compress.c \ diff --git a/nc_test4/hdf5plugins/Makefile.am b/nc_test4/hdf5plugins/Makefile.am index 044e14ba60..6ec7552309 100644 --- a/nc_test4/hdf5plugins/Makefile.am +++ b/nc_test4/hdf5plugins/Makefile.am @@ -18,9 +18,9 @@ DLLSRC=${PLUGINSRC} ${BZIP2SRC} ${PLUGINHDRS} ${BZIP2HDRS} lib_LTLIBRARIES = libbzip2.la libmisc.la libbzip2_la_SOURCES = ${DLLSRC} -libbzip2_la_LDFLAGS = -module -avoid-version -shared -export-dynamic -no-undefined +libbzip2_la_LDFLAGS = -module -avoid-version -shared -export-dynamic -no-undefined -rpath ${abs_builddir} libmisc_la_SOURCES = H5Zmisc.c h5misc.h -libmisc_la_LDFLAGS = -module -avoid-version -shared -export-dynamic -no-undefined +libmisc_la_LDFLAGS = -module -avoid-version -shared -export-dynamic -no-undefined -rpath ${abs_builddir} endif #ENABLE_FILTER_TESTING diff --git a/nc_test4/tst_filter.sh b/nc_test4/tst_filter.sh index 9fde657aa4..1dc5df1eac 100755 --- a/nc_test4/tst_filter.sh +++ b/nc_test4/tst_filter.sh @@ -55,11 +55,11 @@ if ! test -f ${MISCPATH} ; then echo "Unable to locate ${MISCPATH}"; exit 1; fi if test "x$API" = x1 ; then echo "*** Testing dynamic filters using API" -rm -f ./bzip2.nc ./bzip2.dump ./tmp_tst_filter +rm -f ./bzip2.nc ./bzip2.dump ./tst_filter.txt ${execdir}/test_filter -${NCDUMP} -s bzip2.nc > ./tmp_tst_filter +${NCDUMP} -s bzip2.nc > ./tst_filter.txt # Remove irrelevant -s output -sclean ./tmp_tst_filter ./bzip2.dump +sclean ./tst_filter.txt ./bzip2.dump diff -b -w ${srcdir}/bzip2.cdl ./bzip2.dump echo "*** Pass: API dynamic filter" fi @@ -67,61 +67,88 @@ fi if test "x$MISC" = x1 ; then echo echo "*** Testing dynamic filters parameter passing" -rm -f ./testmisc.nc tmp_tst_filter tmp_tst_filter2 +rm -f ./testmisc.nc tst_filter.txt tst_filter2.txt ${execdir}/test_filter_misc # Verify the parameters via ncdump -${NCDUMP} -s testmisc.nc > ./tmp_tst_filter +${NCDUMP} -s testmisc.nc > ./tst_filter.txt # Extract the parameters -getfilterattr ./tmp_tst_filter ./tmp_tst_filter2 -rm -f ./tmp_tst_filter -trimleft ./tmp_tst_filter2 ./tmp_tst_filter -rm -f ./tmp_tst_filter2 -cat >./tmp_tst_filter2 <./tst_filter2.txt < ./tmp_tst_filter +${NCDUMP} -s bzip2.nc > ./tst_filter.txt # Remove irrelevant -s output -sclean ./tmp_tst_filter ./bzip2.dump +sclean ./tst_filter.txt ./bzip2.dump diff -b -w ${srcdir}/bzip2.cdl ./bzip2.dump echo "*** Pass: ncgen dynamic filter" fi if test "x$NCP" = x1 ; then echo "*** Testing dynamic filters using nccopy" -rm -f ./unfiltered.nc ./filtered.nc ./filtered.dump ./tmp_tst_filter +rm -f ./unfiltered.nc ./filtered.nc ./tmp.nc ./filtered.dump ./tst_filter.txt ${NCGEN} -4 -lb -o unfiltered.nc ${srcdir}/unfiltered.cdl +echo " *** Testing simple filter application" ${NCCOPY} -F "/g/var,307,9,4" unfiltered.nc filtered.nc -${NCDUMP} -s filtered.nc > ./tmp_tst_filter +${NCDUMP} -s filtered.nc > ./tst_filter.txt # Remove irrelevant -s output -sclean ./tmp_tst_filter ./filtered.dump +sclean ./tst_filter.txt ./filtered.dump diff -b -w ${srcdir}/filtered.cdl ./filtered.dump -echo "*** Pass: nccopy dynamic filter" +echo " *** Pass: nccopy simple filter" + +echo " *** Testing pass-thru of filters" +rm -f ./tst_filter.txt tst_filter2.txt ./tst_filter2.nc +${NCCOPY} ./filtered.nc ./tst_filter2.nc +${NCDUMP} -s tst_filter2.nc > ./tst_filter.txt +sed -e '/_Filter/p' -e d < ./tst_filter.txt >tst_filter2.txt +test -s tst_filter2.txt +echo " *** Pass: pass-thru of filters" + +echo " *** Testing -F none" +rm -f ./tst_filter.txt ./tst_filter2.txt ./tst_filter.nc +${NCCOPY} -F none ./filtered.nc ./tst_filter.nc +${NCDUMP} -s tst_filter.nc > ./tst_filter.txt +sed -e '/_Filter/p' -e d < ./tst_filter.txt >./tst_filter2.txt +test ! -s tst_filter2.txt +echo " *** Pass: -F none" + +echo " *** Testing -F var,none " +rm -f ./tst_filter.txt ./tst_filter.nc +${NCCOPY} -F "/g/var,none" ./filtered.nc ./tst_filter.nc +${NCDUMP} -s tst_filter.nc > ./tst_filter.txt +sed -e '/_Filter/p' -e d < ./tst_filter.txt >tst_filter2.txt +test ! -s tst_filter2.txt +echo " *** Pass: -F var,none" + +echo "*** Pass: all nccopy filter tests" fi if test "x$UNK" = x1 ; then echo "*** Testing access to filter info when filter dll is not available" -rm -f bzip2.nc ./tmp_tst_filter +rm -f bzip2.nc ./tst_filter.txt # build bzip2.nc ${NCGEN} -lb -4 -o bzip2.nc ${srcdir}/bzip2.cdl # dump and clean bzip2.nc header only when filter is avail -${NCDUMP} -hs bzip2.nc > ./tmp_tst_filter +${NCDUMP} -hs bzip2.nc > ./tst_filter.txt # Remove irrelevant -s output -sclean ./tmp_tst_filter bzip2.dump +sclean ./tst_filter.txt bzip2.dump # Now hide the filter code mv ${BZIP2PATH} ${BZIP2PATH}.save # dump and clean bzip2.nc header only when filter is not avail -rm -f ./tmp_tst_filter -${NCDUMP} -hs bzip2.nc > ./tmp_tst_filter +rm -f ./tst_filter.txt +${NCDUMP} -hs bzip2.nc > ./tst_filter.txt # Remove irrelevant -s output -sclean ./tmp_tst_filter bzip2x.dump +sclean ./tst_filter.txt bzip2x.dump # Restore the filter code mv ${BZIP2PATH}.save ${BZIP2PATH} diff -b -w ./bzip2.dump ./bzip2x.dump @@ -137,10 +164,10 @@ echo "*** Pass: ncgen dynamic filter" fi #cleanup -rm -f ./bzip*.nc ./unfiltered.nc ./filtered.nc ./tmp_tst_filter ./tmp_tst_filter2 *.dump bzip*hdr.* -rm -fr ./test_bzip2.c -rm -fr ./testmisc.nc - +rm -f ./bzip*.nc ./unfiltered.nc ./filtered.nc ./tst_filter.txt ./tst_filter2.txt *.dump bzip*hdr.* +rm -f ./test_bzip2.c +rm -f ./testmisc.nc +rm -f ./tst_filter2.nc echo "*** Pass: all selected tests passed" exit 0 diff --git a/nc_test4/tst_rename.c b/nc_test4/tst_rename.c index 4df3843714..83e79db2ba 100644 --- a/nc_test4/tst_rename.c +++ b/nc_test4/tst_rename.c @@ -178,7 +178,6 @@ main(int argc, char **argv) fprintf(stderr,"*** Test Charlie's test for renaming with one enddef..."); { int ncid, dimid, varid; - nc_set_log_level(5); /* Create a nice, simple file. This file will contain one * dataset, "lon", which is a dimscale. */ diff --git a/ncdump/Makefile.am b/ncdump/Makefile.am index c827568971..58673ee6da 100644 --- a/ncdump/Makefile.am +++ b/ncdump/Makefile.am @@ -4,6 +4,10 @@ # Ed Hartnett, Dennis Heimbigner, Ward Fisher +#SH_LOG_DRIVER = $(SHELL) $(top_srcdir)/test-driver-verbose +#sh_LOG_DRIVER = $(SHELL) $(top_srcdir)/test-driver-verbose +#LOG_DRIVER = $(SHELL) $(top_srcdir)/test-driver-verbose + # Put together AM_CPPFLAGS and AM_LDFLAGS. include $(top_srcdir)/lib_flags.am LDADD = ${top_builddir}/liblib/libnetcdf.la diff --git a/ncdump/nccopy.c b/ncdump/nccopy.c index 2e5038966f..2ffd4ff9e8 100644 --- a/ncdump/nccopy.c +++ b/ncdump/nccopy.c @@ -61,6 +61,7 @@ typedef struct VarID { struct FilterSpec { char* fqn; + int nofilter; /* 1=> do not apply any filters to this variable */ unsigned int filterid; size_t nparams; unsigned int* params; @@ -68,6 +69,7 @@ struct FilterSpec { static int nfilterspecs = 0; /* Number of defined filter specs */ static struct FilterSpec filterspecs[MAX_FILTER_SPECS]; +static int suppressfilters = 0; /* 1 => do not apply any output filters unless specified */ #endif @@ -83,9 +85,11 @@ static size_t option_chunk_cache_size = CHUNK_CACHE_SIZE; /* default from config static size_t option_chunk_cache_nelems = CHUNK_CACHE_NELEMS; /* default from config.h */ static int option_read_diskless = 0; /* default, don't read input into memory on open */ static int option_write_diskless = 0; /* default, don't write output to diskless file */ +#ifdef USE_NETCDF4 static int option_min_chunk_bytes = CHUNK_THRESHOLD; /* default, don't chunk variable if prod of * chunksizes of its dimensions is smaller * than this */ +#endif static int option_nlgrps = 0; /* Number of groups specified with -g * option on command line */ static char** option_lgrps = 0; /* list of group names specified with -g @@ -242,6 +246,9 @@ parsefilterspec(const char* optarg0, struct FilterSpec* spec) char* remainder = NULL; if(optarg0 == NULL || strlen(optarg0) == 0 || spec == NULL) return 0; + + memset(spec,0,sizeof(struct FilterSpec)); + optarg = strdup(optarg0); /* Collect the fqn, taking escapes into account */ @@ -253,7 +260,8 @@ parsefilterspec(const char* optarg0, struct FilterSpec* spec) else if(*p == '\0') {remainder = p; break;} /* else continue */ } - if(strlen(optarg) == 0) return 0; /* fqn does not exist */ + if(strlen(optarg) == 0) {stat = NC_EINVAL; goto done;} /* fqn does not exist */ + /* Make sure leading '/' is in place */ if(optarg[0]=='/') spec->fqn = strdup(optarg); @@ -263,6 +271,12 @@ parsefilterspec(const char* optarg0, struct FilterSpec* spec) strcat(spec->fqn,optarg); } + /* Check for special cases */ + if(strcmp(remainder,"none") == 0) { + spec->nofilter = 1; + goto done; + } + /* Collect the id+parameters */ if((stat = NC_parsefilterspec(remainder,&id,&nparams,¶ms)) == NC_NOERR) { if(spec != NULL) { @@ -271,6 +285,9 @@ parsefilterspec(const char* optarg0, struct FilterSpec* spec) spec->params = params; } } + +done: + if(optarg) free(optarg); return stat; } @@ -721,57 +738,101 @@ static int copy_var_filter(int igrp, int varid, int ogrp, int o_varid) { int stat = NC_NOERR; -#ifdef USE_NETCDF4 VarID vid = {igrp,varid}; VarID ovid = {ogrp,o_varid}; /* handle filter parameters, copying from input, overriding with command-line options */ - struct FilterSpec spec; - int i, found; + struct FilterSpec inspec, ospec, actualspec; + int i; char* ofqn = NULL; int format, oformat; + int inputdefined, outputdefined, unfiltered; /* Get file format of the input and output */ if((stat=nc_inq_format(vid.grpid,&format))) goto done; if((stat=nc_inq_format(ovid.grpid,&oformat))) goto done; if(oformat != NC_FORMAT_NETCDF4 && oformat != NC_FORMAT_NETCDF4_CLASSIC) - goto done; /* Can only use filter when output is netcdf4 */ + goto done; /* Can only use filter when output is some netcdf4 variant */ /* Compute the output vid's FQN */ if((stat = computeFQN(ovid,&ofqn))) goto done; - /* See if any filter spec is defined for this output variable */ - for(found=0,i=0;i 0) {/* input has a filter */ - spec.params = (unsigned int*)malloc(sizeof(unsigned int)*spec.nparams); - if((stat=nc_inq_var_filter(vid.grpid,vid.varid,&spec.filterid,&spec.nparams,spec.params))) - goto done; + + /* Clear the in and out specs */ + memset(&inspec,0,sizeof(inspec)); + memset(&ospec,0,sizeof(ospec)); + memset(&actualspec,0,sizeof(actualspec)); + + /* Is there a filter on the output variable */ + outputdefined = 0; /* default is no filter defined */ + /* Only bother to look if output is netcdf-4 variant */ + if(oformat == NC_FORMAT_NETCDF4 || oformat == NC_FORMAT_NETCDF4_CLASSIC) { + /* See if any output filter spec is defined for this output variable */ + for(i=0;i 0) {/* Apply filter */ -#ifdef USE_NETCDF4 - if((stat=nc_def_var_filter(ovid.grpid,ovid.varid,spec.filterid,spec.nparams,spec.params))) + + /* Rules for choosing output filter are as follows: + + global output input Actual Output + suppress filter filter filter + ----------------------------------------------- + true undefined NA unfiltered + true 'none' NA unfiltered + true defined NA use output filter + false undefined defined use input filter + false 'none' NA unfiltered + false defined NA use output filter + */ + + unfiltered = 0; + + if(suppressfilters && !outputdefined) /* row 1 */ + unfiltered = 1; + else if(suppressfilters && outputdefined && ospec.nofilter) /* row 2 */ + unfiltered = 1; + else if(suppressfilters && outputdefined) /* row 3 */ + actualspec = ospec; + else if(!suppressfilters && !outputdefined && inputdefined) /* row 4 */ + actualspec = inspec; + else if(!suppressfilters && outputdefined && ospec.nofilter) /* row 5 */ + unfiltered = 1; + else if(!suppressfilters && outputdefined) /* row 6 */ + actualspec = ospec; + + /* Apply actual filter spec if any */ + if(!unfiltered) { + if((stat=nc_def_var_filter(ovid.grpid,ovid.varid, + actualspec.filterid, + actualspec.nparams, + actualspec.params))) goto done; -#endif } done: /* Cleanup */ - if(spec.filterid > 0 && spec.nparams > 0 && spec.params != NULL) - free(spec.params); -#endif /*USE_NETCDF4*/ + if(ofqn != NULL) free(ofqn); + if(inspec.fqn) free(inspec.fqn); + if(inspec.params) free(inspec.params); + /* Note we do not clean actualspec because it is a copy of in|out spec */ return stat; } @@ -1342,8 +1403,10 @@ copy_data(int igrp, int ogrp) /* Count total number of dimensions in ncid and all its descendant subgroups */ int count_dims(int ncid) { - int numgrps; int ndims; +#ifdef USE_NETCDF4 + int numgrps; +#endif NC_CHECK(nc_inq_ndims(ncid, &ndims)); #ifdef USE_NETCDF4 NC_CHECK(nc_inq_grps(ncid, &numgrps, NULL)); @@ -1784,10 +1847,12 @@ usage(void) int main(int argc, char**argv) { + int exitcode = EXIT_SUCCESS; char* inputfile = NULL; char* outputfile = NULL; int c; #ifdef USE_NETCDF4 + int i; struct FilterSpec filterspec; #endif @@ -1970,14 +2035,20 @@ main(int argc, char**argv) break; case 'F': /* optional filter spec for a specified variable */ #ifdef USE_NETCDF4 - if(parsefilterspec(optarg,&filterspec) != NC_NOERR) - usage(); - if(nfilterspecs >= (MAX_FILTER_SPECS-1)) - error("too many -F filterspecs\n"); - filterspecs[nfilterspecs] = filterspec; - nfilterspecs++; - // Force output to be netcdf-4 - option_kind = NC_FORMAT_NETCDF4; + /* If the arg is "none" then suppress all filters + on output unless explicit */ + if(strcmp(optarg,"none")==0) { + suppressfilters = 1; + } else { + if(parsefilterspec(optarg,&filterspec) != NC_NOERR) + usage(); + if(nfilterspecs >= (MAX_FILTER_SPECS-1)) + error("too many -F filterspecs\n"); + filterspecs[nfilterspecs] = filterspec; + nfilterspecs++; + // Force output to be netcdf-4 + option_kind = NC_FORMAT_NETCDF4; + } #else error("-F requires netcdf-4"); #endif @@ -2001,7 +2072,7 @@ main(int argc, char**argv) #ifdef USE_NETCDF4 #ifdef DEBUGFILTER - { int i,j; + { int j; for(i=0;i