Hello,
I am new to HDF5 and I am currently comparing NetCDF4 and HDF5 compression.
My test consists in starting from a NetCDF3 file, converting it in NetCDF4 using cdo, then repacking the resulting NetCDF4 using h5repack.
I tried several options for the h5repack but the h5 file is always bigger than the NetCDF4 file.
For example :
h5repack -f SOFF=31,IN -f GZIP=9 -l CHUNK=20x10
I did not find a lot of guidance on how to best use h5repack options and wonder if there are better options to achive maximum compression using h5repack ?
Here is a dump of the NetCDF4 file I am working with :
HDF5 “HYCOM2D4.nc” {
GROUP “/” {
ATTRIBUTE “CDI” {
DATATYPE H5T_STRING {
STRSIZE 65;
STRPAD H5T_STR_NULLTERM;
CSET H5T_CSET_ASCII;
CTYPE H5T_C_S1;
}
DATASPACE SCALAR
}
ATTRIBUTE “CDO” {
DATATYPE H5T_STRING {
STRSIZE 65;
STRPAD H5T_STR_NULLTERM;
CSET H5T_CSET_ASCII;
CTYPE H5T_C_S1;
}
DATASPACE SCALAR
}
ATTRIBUTE “Conventions” {
DATATYPE H5T_STRING {
STRSIZE 6;
STRPAD H5T_STR_NULLTERM;
CSET H5T_CSET_ASCII;
CTYPE H5T_C_S1;
}
DATASPACE SCALAR
}
ATTRIBUTE “NCO” {
DATATYPE H5T_STRING {
STRSIZE 6;
STRPAD H5T_STR_NULLTERM;
CSET H5T_CSET_ASCII;
CTYPE H5T_C_S1;
}
DATASPACE SCALAR
}
ATTRIBUTE “_NCProperties” {
DATATYPE H5T_STRING {
STRSIZE 55;
STRPAD H5T_STR_NULLTERM;
CSET H5T_CSET_ASCII;
CTYPE H5T_C_S1;
}
DATASPACE SCALAR
}
ATTRIBUTE “area” {
DATATYPE H5T_STRING {
STRSIZE 10;
STRPAD H5T_STR_NULLTERM;
CSET H5T_CSET_ASCII;
CTYPE H5T_C_S1;
}
DATASPACE SCALAR
}
ATTRIBUTE “bulletin_date” {
DATATYPE H5T_STRING {
STRSIZE 19;
STRPAD H5T_STR_NULLTERM;
CSET H5T_CSET_ASCII;
CTYPE H5T_C_S1;
}
DATASPACE SCALAR
}
ATTRIBUTE “forcing” {
DATATYPE H5T_STRING {
STRSIZE 32;
STRPAD H5T_STR_NULLTERM;
CSET H5T_CSET_ASCII;
CTYPE H5T_C_S1;
}
DATASPACE SCALAR
}
ATTRIBUTE “geospatial_lat_max” {
DATATYPE H5T_IEEE_F32LE
DATASPACE SIMPLE { ( 1 ) / ( 1 ) }
}
ATTRIBUTE “geospatial_lat_min” {
DATATYPE H5T_IEEE_F32LE
DATASPACE SIMPLE { ( 1 ) / ( 1 ) }
}
ATTRIBUTE “geospatial_lon_max” {
DATATYPE H5T_IEEE_F32LE
DATASPACE SIMPLE { ( 1 ) / ( 1 ) }
}
ATTRIBUTE “geospatial_lon_min” {
DATATYPE H5T_IEEE_F32LE
DATASPACE SIMPLE { ( 1 ) / ( 1 ) }
}
ATTRIBUTE “grid” {
DATATYPE H5T_STRING {
STRSIZE 11;
STRPAD H5T_STR_NULLTERM;
CSET H5T_CSET_ASCII;
CTYPE H5T_C_S1;
}
DATASPACE SCALAR
}
ATTRIBUTE “history” {
DATATYPE H5T_STRING {
STRSIZE 62;
STRPAD H5T_STR_NULLTERM;
CSET H5T_CSET_ASCII;
CTYPE H5T_C_S1;
}
DATASPACE SCALAR
}
ATTRIBUTE “instituion” {
DATATYPE H5T_STRING {
STRSIZE 21;
STRPAD H5T_STR_NULLTERM;
CSET H5T_CSET_ASCII;
CTYPE H5T_C_S1;
}
DATASPACE SCALAR
}
ATTRIBUTE “reference” {
DATATYPE H5T_STRING {
STRSIZE 32;
STRPAD H5T_STR_NULLTERM;
CSET H5T_CSET_ASCII;
CTYPE H5T_C_S1;
}
DATASPACE SCALAR
}
ATTRIBUTE “source” {
DATATYPE H5T_STRING {
STRSIZE 28;
STRPAD H5T_STR_NULLTERM;
CSET H5T_CSET_ASCII;
CTYPE H5T_C_S1;
}
DATASPACE SCALAR
}
ATTRIBUTE “title” {
DATATYPE H5T_STRING {
STRSIZE 25;
STRPAD H5T_STR_NULLTERM;
CSET H5T_CSET_ASCII;
CTYPE H5T_C_S1;
}
DATASPACE SCALAR
}
DATASET “X” {
DATATYPE H5T_IEEE_F32BE
DATASPACE SIMPLE { ( 1547 ) / ( 1547 ) }
STORAGE_LAYOUT {
CONTIGUOUS
SIZE 0
OFFSET 18446744073709551615
}
FILTERS {
NONE
}
FILLVALUE {
FILL_TIME H5D_FILL_TIME_IFSET
VALUE H5D_FILL_VALUE_DEFAULT
}
ALLOCATION_TIME {
H5D_ALLOC_TIME_LATE
}
ATTRIBUTE “CLASS” {
DATATYPE H5T_STRING {
STRSIZE 16;
STRPAD H5T_STR_NULLTERM;
CSET H5T_CSET_ASCII;
CTYPE H5T_C_S1;
}
DATASPACE SCALAR
}
ATTRIBUTE “NAME” {
DATATYPE H5T_STRING {
STRSIZE 64;
STRPAD H5T_STR_NULLTERM;
CSET H5T_CSET_ASCII;
CTYPE H5T_C_S1;
}
DATASPACE SCALAR
}
ATTRIBUTE “REFERENCE_LIST” {
DATATYPE H5T_COMPOUND {
H5T_REFERENCE { H5T_STD_REF_OBJECT } “dataset”;
H5T_STD_I32LE “dimension”;
}
DATASPACE SIMPLE { ( 4 ) / ( 4 ) }
}
ATTRIBUTE “_Netcdf4Dimid” {
DATATYPE H5T_STD_I32LE
DATASPACE SCALAR
}
}
DATASET “Y” {
DATATYPE H5T_IEEE_F32BE
DATASPACE SIMPLE { ( 1014 ) / ( 1014 ) }
STORAGE_LAYOUT {
CONTIGUOUS
SIZE 0
OFFSET 18446744073709551615
}
FILTERS {
NONE
}
FILLVALUE {
FILL_TIME H5D_FILL_TIME_IFSET
VALUE H5D_FILL_VALUE_DEFAULT
}
ALLOCATION_TIME {
H5D_ALLOC_TIME_LATE
}
ATTRIBUTE “CLASS” {
DATATYPE H5T_STRING {
STRSIZE 16;
STRPAD H5T_STR_NULLTERM;
CSET H5T_CSET_ASCII;
CTYPE H5T_C_S1;
}
DATASPACE SCALAR
}
ATTRIBUTE “NAME” {
DATATYPE H5T_STRING {
STRSIZE 64;
STRPAD H5T_STR_NULLTERM;
CSET H5T_CSET_ASCII;
CTYPE H5T_C_S1;
}
DATASPACE SCALAR
}
ATTRIBUTE “REFERENCE_LIST” {
DATATYPE H5T_COMPOUND {
H5T_REFERENCE { H5T_STD_REF_OBJECT } “dataset”;
H5T_STD_I32LE “dimension”;
}
DATASPACE SIMPLE { ( 4 ) / ( 4 ) }
}
ATTRIBUTE “_Netcdf4Dimid” {
DATATYPE H5T_STD_I32LE
DATASPACE SCALAR
}
}
DATASET “lat” {
DATATYPE H5T_IEEE_F32LE
DATASPACE SIMPLE { ( 1014, 1547 ) / ( 1014, 1547 ) }
STORAGE_LAYOUT {
CHUNKED ( 1014, 1547 )
SIZE 2454927 (2.556:1 COMPRESSION)
}
FILTERS {
PREPROCESSING SHUFFLE
COMPRESSION DEFLATE { LEVEL 1 }
}
FILLVALUE {
FILL_TIME H5D_FILL_TIME_IFSET
VALUE 9.96921e+36
}
ALLOCATION_TIME {
H5D_ALLOC_TIME_INCR
}
ATTRIBUTE “DIMENSION_LIST” {
DATATYPE H5T_VLEN { H5T_REFERENCE { H5T_STD_REF_OBJECT }}
DATASPACE SIMPLE { ( 2 ) / ( 2 ) }
}
ATTRIBUTE “_FillValue” {
DATATYPE H5T_IEEE_F32LE
DATASPACE SIMPLE { ( 1 ) / ( 1 ) }
}
ATTRIBUTE “long_name” {
DATATYPE H5T_STRING {
STRSIZE 8;
STRPAD H5T_STR_NULLTERM;
CSET H5T_CSET_ASCII;
CTYPE H5T_C_S1;
}
DATASPACE SCALAR
}
ATTRIBUTE “missing_value” {
DATATYPE H5T_IEEE_F32LE
DATASPACE SIMPLE { ( 1 ) / ( 1 ) }
}
ATTRIBUTE “units” {
DATATYPE H5T_STRING {
STRSIZE 12;
STRPAD H5T_STR_NULLTERM;
CSET H5T_CSET_ASCII;
CTYPE H5T_C_S1;
}
DATASPACE SCALAR
}
}
DATASET “lon” {
DATATYPE H5T_IEEE_F32LE
DATASPACE SIMPLE { ( 1014, 1547 ) / ( 1014, 1547 ) }
STORAGE_LAYOUT {
CHUNKED ( 1014, 1547 )
SIZE 2266535 (2.768:1 COMPRESSION)
}
FILTERS {
PREPROCESSING SHUFFLE
COMPRESSION DEFLATE { LEVEL 1 }
}
FILLVALUE {
FILL_TIME H5D_FILL_TIME_IFSET
VALUE 9.96921e+36
}
ALLOCATION_TIME {
H5D_ALLOC_TIME_INCR
}
ATTRIBUTE “DIMENSION_LIST” {
DATATYPE H5T_VLEN { H5T_REFERENCE { H5T_STD_REF_OBJECT }}
DATASPACE SIMPLE { ( 2 ) / ( 2 ) }
}
ATTRIBUTE “_FillValue” {
DATATYPE H5T_IEEE_F32LE
DATASPACE SIMPLE { ( 1 ) / ( 1 ) }
}
ATTRIBUTE “long_name” {
DATATYPE H5T_STRING {
STRSIZE 9;
STRPAD H5T_STR_NULLTERM;
CSET H5T_CSET_ASCII;
CTYPE H5T_C_S1;
}
DATASPACE SCALAR
}
ATTRIBUTE “missing_value” {
DATATYPE H5T_IEEE_F32LE
DATASPACE SIMPLE { ( 1 ) / ( 1 ) }
}
ATTRIBUTE “units” {
DATATYPE H5T_STRING {
STRSIZE 11;
STRPAD H5T_STR_NULLTERM;
CSET H5T_CSET_ASCII;
CTYPE H5T_C_S1;
}
DATASPACE SCALAR
}
}
DATASET “ssh” {
DATATYPE H5T_STD_I16LE
DATASPACE SIMPLE { ( 24, 1014, 1547 ) / ( H5S_UNLIMITED, 1014, 1547 ) }
STORAGE_LAYOUT {
CHUNKED ( 5, 493, 752 )
SIZE 15680798 (4.802:1 COMPRESSION)
}
FILTERS {
PREPROCESSING SHUFFLE
COMPRESSION DEFLATE { LEVEL 1 }
}
FILLVALUE {
FILL_TIME H5D_FILL_TIME_IFSET
VALUE -32767
}
ALLOCATION_TIME {
H5D_ALLOC_TIME_INCR
}
ATTRIBUTE “DIMENSION_LIST” {
DATATYPE H5T_VLEN { H5T_REFERENCE { H5T_STD_REF_OBJECT }}
DATASPACE SIMPLE { ( 3 ) / ( 3 ) }
}
ATTRIBUTE “_FillValue” {
DATATYPE H5T_STD_I16LE
DATASPACE SIMPLE { ( 1 ) / ( 1 ) }
}
ATTRIBUTE “add_offset” {
DATATYPE H5T_IEEE_F32LE
DATASPACE SIMPLE { ( 1 ) / ( 1 ) }
}
ATTRIBUTE “coordinates” {
DATATYPE H5T_STRING {
STRSIZE 7;
STRPAD H5T_STR_NULLTERM;
CSET H5T_CSET_ASCII;
CTYPE H5T_C_S1;
}
DATASPACE SCALAR
}
ATTRIBUTE “long_name” {
DATATYPE H5T_STRING {
STRSIZE 39;
STRPAD H5T_STR_NULLTERM;
CSET H5T_CSET_ASCII;
CTYPE H5T_C_S1;
}
DATASPACE SCALAR
}
ATTRIBUTE “missing_value” {
DATATYPE H5T_STD_I16LE
DATASPACE SIMPLE { ( 1 ) / ( 1 ) }
}
ATTRIBUTE “scale_factor” {
DATATYPE H5T_IEEE_F32LE
DATASPACE SIMPLE { ( 1 ) / ( 1 ) }
}
ATTRIBUTE “standard_name” {
DATATYPE H5T_STRING {
STRSIZE 18;
STRPAD H5T_STR_NULLTERM;
CSET H5T_CSET_ASCII;
CTYPE H5T_C_S1;
}
DATASPACE SCALAR
}
ATTRIBUTE “units” {
DATATYPE H5T_STRING {
STRSIZE 1;
STRPAD H5T_STR_NULLTERM;
CSET H5T_CSET_ASCII;
CTYPE H5T_C_S1;
}
DATASPACE SCALAR
}
}
DATASET “surge” {
DATATYPE H5T_STD_I16LE
DATASPACE SIMPLE { ( 24, 1014, 1547 ) / ( H5S_UNLIMITED, 1014, 1547 ) }
STORAGE_LAYOUT {
CHUNKED ( 5, 493, 752 )
SIZE 14448195 (5.211:1 COMPRESSION)
}
FILTERS {
PREPROCESSING SHUFFLE
COMPRESSION DEFLATE { LEVEL 1 }
}
FILLVALUE {
FILL_TIME H5D_FILL_TIME_IFSET
VALUE -32767
}
ALLOCATION_TIME {
H5D_ALLOC_TIME_INCR
}
ATTRIBUTE “DIMENSION_LIST” {
DATATYPE H5T_VLEN { H5T_REFERENCE { H5T_STD_REF_OBJECT }}
DATASPACE SIMPLE { ( 3 ) / ( 3 ) }
}
ATTRIBUTE “_FillValue” {
DATATYPE H5T_STD_I16LE
DATASPACE SIMPLE { ( 1 ) / ( 1 ) }
}
ATTRIBUTE “add_offset” {
DATATYPE H5T_IEEE_F32LE
DATASPACE SIMPLE { ( 1 ) / ( 1 ) }
}
ATTRIBUTE “coordinates” {
DATATYPE H5T_STRING {
STRSIZE 7;
STRPAD H5T_STR_NULLTERM;
CSET H5T_CSET_ASCII;
CTYPE H5T_C_S1;
}
DATASPACE SCALAR
}
ATTRIBUTE “long_name” {
DATATYPE H5T_STRING {
STRSIZE 7;
STRPAD H5T_STR_NULLTERM;
CSET H5T_CSET_ASCII;
CTYPE H5T_C_S1;
}
DATASPACE SCALAR
}
ATTRIBUTE “missing_value” {
DATATYPE H5T_STD_I16LE
DATASPACE SIMPLE { ( 1 ) / ( 1 ) }
}
ATTRIBUTE “scale_factor” {
DATATYPE H5T_IEEE_F32LE
DATASPACE SIMPLE { ( 1 ) / ( 1 ) }
}
ATTRIBUTE “standard_name” {
DATATYPE H5T_STRING {
STRSIZE 5;
STRPAD H5T_STR_NULLTERM;
CSET H5T_CSET_ASCII;
CTYPE H5T_C_S1;
}
DATASPACE SCALAR
}
ATTRIBUTE “units” {
DATATYPE H5T_STRING {
STRSIZE 1;
STRPAD H5T_STR_NULLTERM;
CSET H5T_CSET_ASCII;
CTYPE H5T_C_S1;
}
DATASPACE SCALAR
}
}
DATASET “time” {
DATATYPE H5T_IEEE_F64LE
DATASPACE SIMPLE { ( 24 ) / ( H5S_UNLIMITED ) }
STORAGE_LAYOUT {
CHUNKED ( 512 )
SIZE 108 (1.778:1 COMPRESSION)
}
FILTERS {
PREPROCESSING SHUFFLE
COMPRESSION DEFLATE { LEVEL 1 }
}
FILLVALUE {
FILL_TIME H5D_FILL_TIME_IFSET
VALUE 9.96921e+36
}
ALLOCATION_TIME {
H5D_ALLOC_TIME_INCR
}
ATTRIBUTE “CLASS” {
DATATYPE H5T_STRING {
STRSIZE 16;
STRPAD H5T_STR_NULLTERM;
CSET H5T_CSET_ASCII;
CTYPE H5T_C_S1;
}
DATASPACE SCALAR
}
ATTRIBUTE “NAME” {
DATATYPE H5T_STRING {
STRSIZE 5;
STRPAD H5T_STR_NULLTERM;
CSET H5T_CSET_ASCII;
CTYPE H5T_C_S1;
}
DATASPACE SCALAR
}
ATTRIBUTE “REFERENCE_LIST” {
DATATYPE H5T_COMPOUND {
H5T_REFERENCE { H5T_STD_REF_OBJECT } “dataset”;
H5T_STD_I32LE “dimension”;
}
DATASPACE SIMPLE { ( 2 ) / ( 2 ) }
}
ATTRIBUTE “_Netcdf4Dimid” {
DATATYPE H5T_STD_I32LE
DATASPACE SCALAR
}
ATTRIBUTE “axis” {
DATATYPE H5T_STRING {
STRSIZE 1;
STRPAD H5T_STR_NULLTERM;
CSET H5T_CSET_ASCII;
CTYPE H5T_C_S1;
}
DATASPACE SCALAR
}
ATTRIBUTE “calendar” {
DATATYPE H5T_STRING {
STRSIZE 8;
STRPAD H5T_STR_NULLTERM;
CSET H5T_CSET_ASCII;
CTYPE H5T_C_S1;
}
DATASPACE SCALAR
}
ATTRIBUTE “long_name” {
DATATYPE H5T_STRING {
STRSIZE 4;
STRPAD H5T_STR_NULLTERM;
CSET H5T_CSET_ASCII;
CTYPE H5T_C_S1;
}
DATASPACE SCALAR
}
ATTRIBUTE “standard_name” {
DATATYPE H5T_STRING {
STRSIZE 4;
STRPAD H5T_STR_NULLTERM;
CSET H5T_CSET_ASCII;
CTYPE H5T_C_S1;
}
DATASPACE SCALAR
}
ATTRIBUTE “units” {
DATATYPE H5T_STRING {
STRSIZE 30;
STRPAD H5T_STR_NULLTERM;
CSET H5T_CSET_ASCII;
CTYPE H5T_C_S1;
}
DATASPACE SCALAR
}
}
}
}