Just to add a data point to the story. It appears that the Fletcher32 checksum is “the odd one out.” Here’s a sample program that will show that compression takes place for variable-length integer sequences.
#include "hdf5.h"
#include <stdio.h>
#include <stdlib.h>
int main()
{
__label__ fail_file, fail_dtype, fail_dspace, fail_dcpl, fail_dset, fail_write;
int retval = EXIT_SUCCESS;
hid_t file, dspace, dtype, dcpl, dset;
if ((file = H5Fcreate("vlen.h5", H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT)) ==
H5I_INVALID_HID) {
retval = EXIT_FAILURE;
goto fail_file;
}
if ((dtype = H5Tvlen_create(H5T_STD_I32LE)) == H5I_INVALID_HID) {
retval = EXIT_FAILURE;
goto fail_dtype;
}
if ((dspace = H5Screate_simple(1, (hsize_t[]){2048},
(hsize_t[]){H5S_UNLIMITED})) ==
H5I_INVALID_HID) {
retval = EXIT_FAILURE;
goto fail_dspace;
}
if ((dcpl = H5Pcreate(H5P_DATASET_CREATE)) == H5I_INVALID_HID) {
retval = EXIT_FAILURE;
goto fail_dcpl;
}
if (H5Pset_chunk(dcpl, 1, (hsize_t[]) {1024}) < 0 ||
H5Pset_deflate(dcpl, 1) < 0
//H5Pset_fletcher32(dcpl) < 0
) {
retval = EXIT_FAILURE;
goto fail_dset;
}
if ((dset = H5Dcreate(file, "dset", dtype, dspace, H5P_DEFAULT, dcpl,
H5P_DEFAULT)) == H5I_INVALID_HID) {
retval = EXIT_FAILURE;
goto fail_dset;
}
{
int data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
size_t offset[] = {0, 1, 3, 6};
hvl_t buf[2048];
size_t i;
// create an array that looks like this:
// { {0}, {1,2}, {3,4,5}, {6,7,8,9}, ...}
for (i = 0; i < 2048; ++i)
{
size_t rem = i%4;
buf[i].len = 1 + rem;
buf[i].p = data + offset[rem];
}
if (H5Dwrite(dset, dtype, H5S_ALL, H5S_ALL, H5P_DEFAULT, buf) < 0)
{
retval = EXIT_FAILURE;
goto fail_write;
}
}
fail_write:
H5Dclose(dset);
fail_dset:
H5Pclose(dcpl);
fail_dcpl:
H5Sclose(dspace);
fail_dspace:
H5Tclose(dtype);
fail_dtype:
H5Fclose(file);
fail_file:
return retval;
}
The output of h5dump -pBH vlen.h5
looks like this:
HDF5 "vlen.h5" {
SUPER_BLOCK {
SUPERBLOCK_VERSION 0
FREELIST_VERSION 0
SYMBOLTABLE_VERSION 0
OBJECTHEADER_VERSION 0
OFFSET_SIZE 8
LENGTH_SIZE 8
BTREE_RANK 16
BTREE_LEAF 4
ISTORE_K 32
FILE_SPACE_STRATEGY H5F_FSPACE_STRATEGY_FSM_AGGR
FREE_SPACE_PERSIST FALSE
FREE_SPACE_SECTION_THRESHOLD 1
FILE_SPACE_PAGE_SIZE 4096
USER_BLOCK {
USERBLOCK_SIZE 0
}
}
GROUP "/" {
DATASET "dset" {
DATATYPE H5T_VLEN { H5T_STD_I32LE}
DATASPACE SIMPLE { ( 2048 ) / ( H5S_UNLIMITED ) }
STORAGE_LAYOUT {
CHUNKED ( 1024 )
SIZE 5772 (5.677:1 COMPRESSION)
}
FILTERS {
COMPRESSION DEFLATE { LEVEL 1 }
}
FILLVALUE {
FILL_TIME H5D_FILL_TIME_ALLOC
VALUE H5D_FILL_VALUE_DEFAULT
}
ALLOCATION_TIME {
H5D_ALLOC_TIME_INCR
}
}
}
}
We are just compressing the hvl_t
elements, which are pretty regular in this case.
For Fletcher32, the code fails with the error stack reported by h5py
.
(This is with develop
, which stands at HDF5 1.13.0-7.)
G.