Thanks Barbara. I’m including code below for reading 3 different types of strings using C++ API for completeness.
Here’s my python code for generating 3 test hdf5 files
- variable length UTF8 string
- variable length ASCII string
- fixed length ASCII string:
import h5py
f=h5py.File(‘utf8.h5’,‘w’)
dt = h5py.special_dtype(vlen=str) #unicode
dset = f.create_dataset(“name”, (1,), dtype=dt)
dset[…]=“0.1.0”
f.close()
f=h5py.File(‘ascii.h5’,‘w’)
dt = h5py.special_dtype(vlen=bytes)
dset = f.create_dataset(“name”, (1,), dtype=dt)
dset[…]=“0.1.0”
f.close()
f=h5py.File(‘ascii_fix.h5’,‘w’)
import numpy as np
dset = f.create_dataset(“name”, (1,), dtype=“S5”)
dset[0]=np.string_(“0.1.0”)
f.close()
Output hdf5 files look like this:
HDF5 “utf8.h5” {
DATASET “name” {
DATATYPE H5T_STRING {
STRSIZE H5T_VARIABLE;
STRPAD H5T_STR_NULLTERM;
CSET H5T_CSET_UTF8;
CTYPE H5T_C_S1;
}
DATASPACE SIMPLE { ( 1 ) / ( 1 ) }
DATA {
(0): “0.1.0”
}
}
}
HDF5 “ascii.h5” {
DATASET “name” {
DATATYPE H5T_STRING {
STRSIZE H5T_VARIABLE;
STRPAD H5T_STR_NULLTERM;
CSET H5T_CSET_ASCII;
CTYPE H5T_C_S1;
}
DATASPACE SIMPLE { ( 1 ) / ( 1 ) }
DATA {
(0): “0.1.0”
}
}
}
HDF5 “ascii_fix.h5” {
DATASET “name” {
DATATYPE H5T_STRING {
STRSIZE 5;
STRPAD H5T_STR_NULLPAD;
CSET H5T_CSET_ASCII;
CTYPE H5T_C_S1;
}
DATASPACE SIMPLE { ( 1 ) / ( 1 ) }
DATA {
(0): “0.1.0”
}
}
}
Here’s the C++ code for reading these files:
#include “hdf5.h”
int main( void )
{
hid_t file_id;
file_id = H5Fopen(“utf8.h5”,H5F_ACC_RDONLY,H5P_DEFAULT);
hid_t dset = H5Dopen(file_id, “name”, H5P_DEFAULT);
hid_t filetype = H5Dget_type(dset);
hid_t space = H5Dget_space(dset);
hsize_t dims[1] = {1};
int ndims = H5Sget_simple_extent_dims(space, dims, NULL);
char rdata = (char) malloc(dims[0]sizeof(char));
hid_t memtype = H5Tcopy(H5T_C_S1);
herr_t status = H5Tset_size(memtype, H5T_VARIABLE);
status = H5Tset_cset(memtype, H5T_CSET_UTF8); // Specify UTF8 here
status = H5Dread(dset, memtype, H5S_ALL, H5S_ALL, H5P_DEFAULT, rdata);
printf(“version: %s\n”, rdata[0]);
H5Fclose (file_id);
return 0;
}
#include “hdf5.h”
int main( void )
{
// Open PAL HDF5 file and read in header information
hid_t file_id;
file_id = H5Fopen(“ascii.h5”,H5F_ACC_RDONLY,H5P_DEFAULT);
if(file_id < 0){
exit(1);
}
hid_t dset = H5Dopen(file_id, “name”, H5P_DEFAULT);
hid_t filetype = H5Dget_type(dset);
hid_t space = H5Dget_space(dset);
hsize_t dims[1] = {1};
int ndims = H5Sget_simple_extent_dims(space, dims, NULL);
char rdata = (char) malloc(dims[0]sizeof(char));
hid_t memtype = H5Tcopy(H5T_C_S1);
herr_t status = H5Tset_size(memtype, H5T_VARIABLE);
status = H5Dread(dset, memtype, H5S_ALL, H5S_ALL, H5P_DEFAULT, rdata);
printf(“version: %s\n”, rdata[0]);
H5Fclose (file_id);
return 0;
}
#include “hdf5.h”
#include “hdf5_hl.h”
int main( void )
{
hid_t file_id;
char version[1024];
file_id = H5Fopen(“ascii_fix.h5”,H5F_ACC_RDONLY,H5P_DEFAULT);
herr_t status = H5LTread_dataset_string(file_id, “/name”, version);
printf(“version: %s\n”, version);
H5Fclose (file_id);
return 0;
}