Read/write compound containing `std::string` using native C hdf5 lib


#1

Hi,

I’m trying to read/write compound datatype that contains std::string instead of char * using C hdf5.
Using the example from hdf5 I slightly modified it on the following way:

#include <string>           // ADDED BY ME

#include "hdf5.h"
#include <stdio.h>
#include <stdlib.h>

#define FILE            "h5ex_t_cmpd.h5"
#define DATASET         "DS1"
#define DIM0            4

typedef struct {
    int     serial_no;
    std::string location;   // CHANGED FROM char* to std::string BY ME
    double  temperature;
    double  pressure;
} sensor_t;                                 /* Compound type */

int
main (void)
{
    hid_t       file, filetype, memtype, strtype, space, dset;
                                            /* Handles */
    herr_t      status;
    hsize_t     dims[1] = {DIM0};
    sensor_t    wdata[DIM0],                /* Write buffer */
                rdata[DIM0];                /* Read buffer */
    int         ndims,
                i;

    /*
     * Initialize data.
     */
    wdata[0].serial_no = 1153;
    wdata[0].location = "Exterior (static)";
    wdata[0].temperature = 53.23;
    wdata[0].pressure = 24.57;
    wdata[1].serial_no = 1184;
    wdata[1].location = "Intake";
    wdata[1].temperature = 55.12;
    wdata[1].pressure = 22.95;
    wdata[2].serial_no = 1027;
    wdata[2].location = "Intake manifold";
    wdata[2].temperature = 103.55;
    wdata[2].pressure = 31.23;
    wdata[3].serial_no = 1313;
    wdata[3].location = "Exhaust manifold";
    wdata[3].temperature = 1252.89;
    wdata[3].pressure = 84.11;

    /*
     * Create a new file using the default properties.
     */
    file = H5Fcreate (FILE, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT);

    /*
     * Create variable-length string datatype.
     */
    strtype = H5Tcopy (H5T_C_S1);
    status = H5Tset_size (strtype, H5T_VARIABLE);

    /*
     * Create the compound datatype for memory.
     */
    memtype = H5Tcreate (H5T_COMPOUND, sizeof (sensor_t));
    status = H5Tinsert (memtype, "Serial number",
                HOFFSET (sensor_t, serial_no), H5T_NATIVE_INT);
    status = H5Tinsert (memtype, "Location", HOFFSET (sensor_t, location),
                strtype);
    status = H5Tinsert (memtype, "Temperature (F)",
                HOFFSET (sensor_t, temperature), H5T_NATIVE_DOUBLE);
    status = H5Tinsert (memtype, "Pressure (inHg)",
                HOFFSET (sensor_t, pressure), H5T_NATIVE_DOUBLE);

    /*
     * Create the compound datatype for the file.  Because the standard
     * types we are using for the file may have different sizes than
     * the corresponding native types, we must manually calculate the
     * offset of each member.
     */
    filetype = H5Tcreate (H5T_COMPOUND, 8 + sizeof (hvl_t) + 8 + 8);
    status = H5Tinsert (filetype, "Serial number", 0, H5T_STD_I64BE);
    status = H5Tinsert (filetype, "Location", 8, strtype);
    status = H5Tinsert (filetype, "Temperature (F)", 8 + sizeof (hvl_t),
                H5T_IEEE_F64BE);
    status = H5Tinsert (filetype, "Pressure (inHg)", 8 + sizeof (hvl_t) + 8,
                H5T_IEEE_F64BE);

    /*
     * Create dataspace.  Setting maximum size to NULL sets the maximum
     * size to be the current size.
     */
    space = H5Screate_simple (1, dims, NULL);

    /*
     * Create the dataset and write the compound data to it.
     */
    dset = H5Dcreate (file, DATASET, filetype, space, H5P_DEFAULT, H5P_DEFAULT,
                H5P_DEFAULT);
    status = H5Dwrite (dset, memtype, H5S_ALL, H5S_ALL, H5P_DEFAULT, wdata);

    /*
     * Close and release resources.
     */
    status = H5Dclose (dset);
    status = H5Sclose (space);
    status = H5Tclose (filetype);
    status = H5Fclose (file);


    /*
     * Now we begin the read section of this example.  Here we assume
     * the dataset has the same name and rank, but can have any size.
     * Therefore we must allocate a new array to read in data using
     * malloc().  For simplicity, we do not rebuild memtype.
     */

    /*
     * Open file and dataset.
     */
    file = H5Fopen (FILE, H5F_ACC_RDONLY, H5P_DEFAULT);
    dset = H5Dopen (file, DATASET, H5P_DEFAULT);

    /*
     * Get dataspace and allocate memory for read buffer.
     */
    space = H5Dget_space (dset);
    ndims = H5Sget_simple_extent_dims (space, dims, NULL);
    // rdata = (sensor_t *) malloc (dims[0] * sizeof (sensor_t));   // COMMENTED BY ME (I explicitely set the array size in the beginning)

    /*
     * Read the data.
     */
    status = H5Dread (dset, memtype, H5S_ALL, H5S_ALL, H5P_DEFAULT, rdata);

    /*
     * Output the data to the screen.
     */
    for (i=0; i<dims[0]; i++) {
        printf ("%s[%d]:\n", DATASET, i);
        printf ("Serial number   : %d\n", rdata[i].serial_no);
        printf ("Location        : %s\n", rdata[i].location);
        printf ("Temperature (F) : %f\n", rdata[i].temperature);
        printf ("Pressure (inHg) : %f\n\n", rdata[i].pressure);
    }

    /*
     * Close and release resources.  H5Dvlen_reclaim will automatically
     * traverse the structure and free any vlen data (strings in this
     * case).
     */
    status = H5Dvlen_reclaim (memtype, space, H5P_DEFAULT, rdata);
    free (rdata);
    status = H5Dclose (dset);
    status = H5Sclose (space);
    status = H5Tclose (memtype);
    status = H5Tclose (strtype);
    status = H5Fclose (file);

    return 0;
}

After build and run the code I can see that everything was correctly written to the file (even location member variable) using HDFViewer.

But when I read the dataset I can see that location member variable is empty.

Is there a way to overcome this and read the data directly to the string?


#2

Kerim, it’s just dumb luck. See libc++'s implementation of std::string Your write is working because the data layout for the “Short string mode” looks like hvl_t. Your luck runs out in the read direction.

#include <iostream>
#include <string>
#include <typeinfo>

int main()
{
  std::cout << "char* vs std::string: ";
  std::cout << ( typeid(char*)==typeid(std::string) ) << '\n';
  std::cout << sizeof(char*) << '\n';
  std::cout << sizeof(std::string) << '\n';
  return 0;
}

returns

char* vs std::string: 0
8
32

Best, G.


#3

Thank you very much for the information!
Now I see that I should not move in this direction. I think I will be forking with limited length char array instead