Creating a compound datatype with no predefined struct

I am trying to write a set of fields to a H5Table, not knowing what the fields are apriori.
Calculating the offsets here has been an issue, since HOFFSET/offsetof cannot be used due to absence of a predefined struct. I am calculating the offsets assuming contiguous allocation and no padding which may be erroneous.
Is there an alternative way to handle such a situation.

hid_t out_stream = H5Fcreate("vlen",H5F_ACC_TRUNC,H5P_DEFAULT,H5P_DEFAULT);
//vlen_string = H5Tcopy(H5T_C_S1);
//H5Tset_size(vlen_string,H5T_VARIABLE);
vlen_string = H5Tvlen_create(H5T_C_S1);
std::vector<std::string> field_names = {"FIELD_1","FIELD_2","FIELD_3"};
std::vector<hid_t> field_types = {getH5Type(CVGTYPE::INT),getH5Type(CVGTYPE::COMMENT),getH5Type(CVGTYPE::DOUBLE)};
std::vector<hsize_t> field_offsets = {0,0,0};
//Create Compound Datatype
hsize_t tot_size = 0;
hsize_t offset = 0;
for(int i=0;i<field_names.size();i++){
    hsize_t type_size = H5Tget_size(field_types[i]);
    field_offsets[i]=offset;
    tot_size += type_size;
    offset += type_size;
}hid_t compound_dtype = H5Tcreate(H5T_COMPOUND,tot_size);
for(int i=0;i<field_names.size();i++){
    H5Tinsert(compound_dtype,field_names[i].c_str(),field_offsets[i],field_types[i]);
}
//Create Table
int nrecords = 0;
int chunk_size = 0;
hsize_t compound_dtype_size = sizeof(compound_dtype);
const char* field_names_c[field_names.size()];
for (size_t i = 0; i < field_names.size(); ++i) {
   field_names_c[i] = field_names[i].c_str();
}
H5TBmake_table("some_table",out_stream,"some_dataset_name",hsize_t(field_names.size()),nrecords,compound_dtype_size,field_names_c,field_offsets.data(),field_types.data(),chunk_size,NULL,0,NULL);

This throws the following error

HDF5-DIAG: Error detected in HDF5 (1.14.5):
  #000: /home/gsabhishek/Downloads/hdf5-1.14.5/src/H5Tcompound.c line 340 in H5Tinsert(): unable to insert member
    major: Datatype
    minor: Unable to insert object
  #001: /home/gsabhishek/Downloads/hdf5-1.14.5/src/H5Tcompound.c line 421 in H5T__insert(): member extends past end of compound type
    major: Datatype
    minor: Unable to insert object

Similar to this post, but I cannot use H5QL as of now.

The error message is clear: H5T__insert(): member extends past end of compound type. How about you check the invariant field_offsets[i] + H5Tget_size(field_types[i]) <= tot_size in the H5Tinsert loop?

G.

Thanks.
I checked the insert size

std::cout<<i<<"th Element Inserted :"<<field_offsets[i]+H5Tget_size(field_types[i])<<" Tot_size : "<<tot_size<<std::endl;
0 Element Inserted :4 Tot_size : 28
1 Element Inserted :20 Tot_size : 28
2 Element Inserted :28 Tot_size : 28

H5Tinsert does not throw an error but H5TBmake_table is the one throwing the error,

The problematic line is:
hsize_t compound_dtype_size = sizeof(compound_dtype);

The documentation states:

[in] type_size The size in bytes of the structure associated with the table; This value is obtained with sizeof().

Notice that it says “… the structure associated with the table,” which you said you don’t have.

Clearly, sizeof(compound_dtype) == sizeof(hsize_t) == 8 < 28, which is the problem.

sizeof() can work only with a predefined struct. The documentation should be clearer on this.

What would work is hsize_t compound_dtype_size = H5Tget_size(compound_dtype);

OK?

G.

@gheber
Thanks. This fixed the issue.

A follow up issue I have is regarding the creation of a data buffer we pass to H5TBmake_table

I am trying to create a void pointer for the data. Since we have a variable length string, the size of the void pointer may not be equal to the compound_dtype_size and the offsets calculated during the creation of compound_dtype would also not be valid anymore.

So here I tried to calculate the new offsets and field sizes. The code runs fine, but when I open the table with hdfview I get an error and the fields are not populated.

std::vector<int> data1 = {1,2,4};
std::vector<std::string> data2 = {"do","do re","do re mi"};
std::vector<double> data3 = {1.,4.,16.};
for(int n=0;n<3;n++){
  hsize_t start = n;
  void *data = ::operator new( sizeof(int)+data2[n].size()+sizeof(double));
  memcpy(data, &data1[n], sizeof(int));
  memcpy((char *)data + sizeof(int), data2[n].c_str(), data2[n].size());
  memcpy((char *)data + sizeof(int) + data2[n].size(), &data3[n], sizeof(double));
  std::vector<size_t> new_field_sizes={sizeof(int),data2[n].size(),sizeof(double)};
  std::vector<size_t> new_field_offsets={0,new_field_sizes[0],new_field_sizes[0]+new_field_sizes[1]};
 H5TBinsert_record(out_stream,"some_dataset_name",start,nrecords,compound_dtype,new_field_offsets.data(),new_field_sizes.data(),data);
  ::operator delete(data);
}

What’s the datatype of FIELD_2?

It’s a variable length string.

I created the type as
hid_t vlen_string = H5Tvlen_create(H5T_C_S1);

I recommend you read the section on string handling in the HDF5 user guide and look at this example. The gist is that the API for variable-length strings works with pointers. In a structure/compound datatype, the corresponding field of the nominal structure is a pointer and not an array of characters. In your snippet, the field would potentially have a different size, data2[n].size(), for each string, which is not how structures work. OK?

G.

Thanks.
I got most of it working, but I seem to be missing something here and I am still unable to write the strings.

hid_t vlen_string;

hid_t getH5Type(const CVGTYPE &type){
    if(type==CVGTYPE::INT) return H5T_NATIVE_INT;
    else if(type==CVGTYPE::DOUBLE) return H5T_NATIVE_DOUBLE;
    else if(type==CVGTYPE::COMMENT) return vlen_string;
    else throw std::runtime_error("Invalid CVGType");
}

hid_t getH5TypeOffset(const CVGTYPE &type){
    if(type==CVGTYPE::INT) return H5Tget_size(H5T_NATIVE_INT);
    else if(type==CVGTYPE::DOUBLE) return H5Tget_size(H5T_NATIVE_DOUBLE);
    else if(type==CVGTYPE::COMMENT) return sizeof(hvl_t);
    else throw std::runtime_error("Invalid CVGType");
}

void test_vlen_compound_dtype(){
    hid_t out_stream = H5Fcreate("vlen_table.h5",H5F_ACC_TRUNC,H5P_DEFAULT,H5P_DEFAULT);
    vlen_string = H5Tvlen_create(H5T_C_S1);
    std::vector<std::string> field_names = {"FIELD_1","FIELD_2","FIELD_3"};
    std::vector<hid_t> field_types = {getH5Type(CVGTYPE::INT),getH5Type(CVGTYPE::COMMENT),getH5Type(CVGTYPE::DOUBLE)};
    std::vector<size_t> field_sizes = {0,0,0};
    std::vector<size_t> field_offsets = {0,0,0};
    //Create Compound Datatype
    hsize_t tot_size = 0;
    hsize_t offset = 0;
    for(int i=0;i<field_names.size();i++){
        hsize_t type_size = H5Tget_size(field_types[i]);
        field_offsets[i]=offset;
        field_sizes[i] = type_size;
        tot_size += type_size;
        offset += type_size;
    }
    hid_t compound_dtype = H5Tcreate(H5T_COMPOUND,tot_size);
    for(size_t i=0;i<field_names.size();i++){
        H5Tinsert(compound_dtype,field_names[i].c_str(),field_offsets[i],field_types[i]);
    }
    //Create Table
    hsize_t nfields = field_names.size();
    hsize_t nrecords = 0;
    hsize_t chunk_size = 1;
    const char* field_names_c[field_names.size()];
    for (size_t i = 0; i < field_names.size(); ++i) {
        field_names_c[i] = field_names[i].c_str();
    }
    H5TBmake_table("some_table",out_stream,"some_dataset_name",nfields,nrecords,tot_size,field_names_c,field_offsets.data(),field_types.data(),chunk_size,NULL,0,NULL);

    std::vector<int> data1 = {1,2,4};
    std::vector<std::string> data2 = {"do","do re","do re mi"};
    std::vector<double> data3 = {1.,4.,16.};
    for(int n=0;n<data1.size();n++){
        void* buffer = ::operator new(tot_size);
        memcpy((char*)buffer + field_offsets[0], &data1[n], field_sizes[0]);
        hvl_t vlen_data;
        vlen_data.len = data2[n].length();
        vlen_data.p = new char[vlen_data.len];
        std::strcpy(static_cast<char*>(vlen_data.p), data2[n].c_str());
        memcpy((char*)buffer + field_offsets[1], &vlen_data, field_sizes[1]);
        memcpy((char*)buffer + field_offsets[2], &data3[n], field_sizes[2]);
        std::cout<<static_cast<char*>(reinterpret_cast<hvl_t*>((char*)buffer + field_offsets[1])->p)<<std::endl;
        hsize_t start = n;
        hsize_t nrecs_to_insert = 1;
        H5TBappend_records(out_stream,"some_dataset_name",nrecs_to_insert,tot_size,field_offsets.data(),field_sizes.data(),buffer);
        delete[] static_cast<char*>(vlen_data.p);
        ::operator delete(buffer);
    }
    H5Tclose(compound_dtype);
    H5Fclose(out_stream);
    return
}

When I print out from the buffer using

std::cout<<static_cast<char*>(reinterpret_cast<hvl_t*>((char*)buffer + field_offsets[1])->p)<<std::endl;

I am able to output the strings.
But in the hdfview, I see Error

OK, I think you overshot a little: variable-length strings can be seen as special cases of variable-length sequences. However, the HDF5 library API treats them differently. Compare this vlen of integers example with the vlen string example. The main difference is that in the former example, you deal with hvl_t, whereas it is char* in the latter. OK?

G.

Thanks.
This finally worked!! Thanks a lot.
Here is the final code

void test_vlen_compound_dtype(){
    hid_t out_stream = H5Fcreate("vlen_table.h5",H5F_ACC_TRUNC,H5P_DEFAULT,H5P_DEFAULT);
    hid_t vlen_string = H5Tcopy(H5T_C_S1);
    H5Tset_size(vlen_string,H5T_VARIABLE);
    std::vector<std::string> field_names = {"FIELD_1","FIELD_2","FIELD_3"};
    std::vector<hid_t> field_types = {H5T_NATIVE_INT,vlen_string,H5T_NATIVE_DOUBLE};
    std::vector<size_t> field_sizes = {H5Tget_size(H5T_NATIVE_INT),sizeof(char*),H5Tget_size(H5T_NATIVE_DOUBLE)};
    std::vector<size_t> field_offsets = {0,0,0};
    //Create Compound Datatype
    hsize_t tot_size = 0;
    hsize_t offset = 0;
    for(int i=0;i<field_names.size();i++){
        hsize_t type_size = field_sizes[i];
        field_offsets[i]=offset;
        tot_size += type_size;
        offset += type_size;
    }
    hid_t compound_dtype = H5Tcreate(H5T_COMPOUND,tot_size);
    for(size_t i=0;i<field_names.size();i++){
        H5Tinsert(compound_dtype,field_names[i].c_str(),field_offsets[i],field_types[i]);
    }
    //Create Table
    hsize_t nfields = field_names.size();
    hsize_t nrecords = 0;
    hsize_t chunk_size = 1;
    const char* field_names_c[field_names.size()];
    for (size_t i = 0; i < field_names.size(); ++i) {
        field_names_c[i] = field_names[i].c_str();
    }
    H5TBmake_table("some_table",out_stream,"some_dataset_name",nfields,nrecords,tot_size,field_names_c,field_offsets.data(),field_types.data(),chunk_size,NULL,0,NULL);
    std::vector<int> data1 = {1,2,4};
    std::vector<std::string> data2 = {"do","do re","do re mi"};
    std::vector<double> data3 = {1.,4.,16.};
    void* buffer = ::operator new(tot_size);
    for(int n=0;n<data1.size();n++){
        memcpy((char*)buffer + field_offsets[0], &data1[n], field_sizes[0]);
        char* vlen_data = new char[data2[n].length()];
        std::strcpy(vlen_data, data2[n].c_str());
        //vlen_data[data2[n].length()] = '\0';
        memcpy((char*)buffer + field_offsets[1], &vlen_data, field_sizes[1]);
        memcpy((char*)buffer + field_offsets[2], &data3[n], field_sizes[2]);
        hsize_t start = n;
        hsize_t nrecs_to_insert = 1;
        H5TBappend_records(out_stream,"some_dataset_name",nrecs_to_insert,tot_size,field_offsets.data(),field_sizes.data(),buffer);
        delete[] vlen_data;
    }::operator delete(buffer);
    H5Tclose(compound_dtype);
    H5Tclose(vlen_string);
    H5Fclose(out_stream);
    return;
}

1 Like