C++ Dataset cant extend it

	   H5File file( FILE_NAME, H5F_ACC_TRUNC );


    //
    //  one dimension
    //
	hsize_t str_dimsf[2];
	int xsize = arr_c_str.size();
    str_dimsf[0]  = 2;
	str_dimsf[1]  = 2;
	/*
	hsize_t      maxdims[2];
	maxdims[0] = 20;
	maxdims[1] = 20;//H5S_UNLIMITED;
	*/

	hsize_t maxdims = H5S_UNLIMITED;


    H5::DataSpace   dataspace(2, str_dimsf );//,maxdims
	char *matrix[2][2];
	matrix[0][0] = "Astronomy";
	matrix[0][1] = "Biology";
	matrix[1][0] = "Physics";
	matrix[1][1] = "Chemistry";

    // Variable length string
    H5::StrType datatype(H5::PredType::C_S1, H5T_VARIABLE); 
    H5::DataSet str_dataset = file.createDataSet("MyDataset", datatype, dataspace);

   // str_dataset.write(arr_c_str.data(), datatype);
	 str_dataset.write(matrix, datatype);


	 hsize_t      size[2];
	 size[0]   = 10; 
	 size[1]   = 1; 
	 hsize_t size2 = 2;
	//status = H5Dextend (dataset, size);
	 //herr_t status = H5Dextend (str_dataset.getId(), size);
	//str_dataset.extend(size);
	 hsize_t      dims[2]  = { 3, 3};  
	hsize_t   dims2[2] = { 7, 1};            /* data2 dimensions */
	hsize_t      size3[2];
	hsize_t      dims1[2] = { 3, 3};            /* data1 dimensions */
	
	dims[0]   = 10;
	dims[1] = 2; 
	size3[0]   = 2;
	size3[1]   = 2;
	hsize_t size4[2];

	size4[0] = 3;
	size4[1] = 1;

	herr_t status = H5Dextend (str_dataset.getId(), size);

	//str_dataset.extend( size3 );
	//str_dataset.extend(&size2);

	//str_dataset.extend( size4 );
	H5D_space_status_t space_status;

	str_dataset.getSpaceStatus(space_status);
	H5::DataSpace file_space = str_dataset.getSpace();

	H5::DataSpace file_space2 = H5Dget_space(str_dataset.getId());


	//H5Dextend (str_dataset.getId(), size4);
	

	return ;
  }

I have tried a piece of code with a string dataset and cant extend it no matter how I try.

Am I missing anything ? In one of the posts someone suggested to query space to get
the dataset updated , that also I tried.

Note :
(1) H5::DataSpace dataspace(2, str_dimsf , maxdims );
also crashes.
(2) str_dataset.extend( always crashes ) , cant figure out how , but I tried c code with dataset id and that doesnt crash,

Appreciate any comments , suggestions , pointers .

thanks & warm regards
~ Rudresh

Rudresh, how are you? The basic problem is that the dataset created by

file.createDataSet("MyDataset", datatype, dataspace);

is not extendible. It has contiguous storage layout, which fixes its logical extent at creation time. To be extendible, you must create a dataset with chunked layout, and ensure that the maximum extent(s) is unlimited or meets your needs.

Since I’m lazy and curious, I asked ChatGPT to write me an example. My instruction was this: Using the C++ API, create an extendible chunked integer HDF5 dataset with a 2x2 chunk size and write a 4x4 array to it!

Here is what it produced:

#include <iostream>
#include <H5Cpp.h>

using namespace H5;

void write_4x4_array_to_hdf5() {
    // Define the file and dataset properties
    const H5std_string FILE_NAME("extendible_chunked_integer_dataset.h5");
    const H5std_string DATASET_NAME("IntegerDataset");
    const hsize_t CHUNK_SIZE = 2;

    // Create the file
    H5File file(FILE_NAME, H5F_ACC_TRUNC);

    // Define the dataset dimensions
    hsize_t initial_dims[2] = {4, 4};
    hsize_t max_dims[2] = {H5S_UNLIMITED, H5S_UNLIMITED};
    hsize_t chunk_dims[2] = {CHUNK_SIZE, CHUNK_SIZE};

    // Create the dataspace
    DataSpace dataspace(2, initial_dims, max_dims);

    // Create the dataset creation property list and set the chunk size
    DSetCreatPropList dcpl;
    dcpl.setChunk(2, chunk_dims);

    // Create the dataset
    DataSet dataset = file.createDataSet(DATASET_NAME, PredType::NATIVE_INT, dataspace, dcpl);

    // Define and fill the 4x4 array
    int data[4][4] = {
        {0, 1, 2, 3},
        {4, 5, 6, 7},
        {8, 9, 10, 11},
        {12, 13, 14, 15}
    };

    // Write the data to the dataset
    dataset.write(data, PredType::NATIVE_INT);

    // Close the dataset, dataspace, and file
    dataset.close();
    dataspace.close();
    file.close();
}

int main() {
    try {
        write_4x4_array_to_hdf5();
    } catch (FileIException error) {
        error.printError();
        return -1;
    } catch (DataSetIException error) {
        error.printError();
        return -1;
    } catch (DataSpaceIException error) {
        error.printError();
        return -1;
    }
    std::cout << "Successfully created extendible chunked integer HDF5 dataset and wrote a 4x4 array to it!" << std::endl;
    return 0;
}

I did not try to compile and run this, but there’s a good chance it will. Can you give this a try and then modify it for your needs?

Best, G.

1 Like

Hi @rudreshjoshi,

In case you are not restricted to a particular API, you may want to try HDFql, a high-level (declarative) language which greatly alleviates you from HDF5 low-level details. To have an extendible dataset in C++ using HDFql could be done as follows:

// declare a variable named 'matrix'
char *matrix[2][2];

// fill-up 'matrix' with some values
matrix[0][0] = "Astronomy";
matrix[0][1] = "Biology";
matrix[1][0] = "Physics";
matrix[1][1] = "Chemistry";

// create an HDF5 file named 'test.h5' and use (i.e. open) it
HDFql::execute("CREATE AND USE FILE test.h5");

// register 'matrix' for subsequent use (by HDFql)
HDFql::variableTransientRegister(&matrix);

// create a dataset named 'dset' of two dimensions (the first being extendible) and write the values stored in 'matrix' in it
HDFql::execute("CREATE DATASET dset AS VARCHAR(2 TO UNLIMITED, 2) VALUES FROM MEMORY 0");

// alter (i.e. extend) first dimension of 'dset' with +3 (i.e. this dimension will have a size equal to 5)
HDFql::execute("ALTER DIMENSION dset TO +3");

Hope it helps!

1 Like

Here’s ChatGPT’s take on that:

Using the HDFql C++ API, create an extendible chunked integer dataset with a 2x2 chunk size and write a 4x4 array to it!

#include <iostream>
#include <vector>
#include "HDFql.hpp"

int main() {
    // Step 1: Include the necessary header files
    // Already done at the beginning of the code

    // Step 2: Create an HDF5 file
    HDFql::execute("CREATE FILE my_file.h5");

    // Step 3: Create an extendible dataset using the create function with chunking and data type specified
    HDFql::execute("CREATE CHUNKED DATASET my_file.h5:/my_dataset AS INT(0:UNLIMITED, 0:UNLIMITED) CHUNK(2, 2)");

    // Step 4: Write a 4x4 array to the dataset
    std::vector<int> data = {1, 2, 3, 4,
                             5, 6, 7, 8,
                             9, 10, 11, 12,
                             13, 14, 15, 16};

    // Resize the dataset to 4x4
    HDFql::execute("ALTER DATASET my_file.h5:/my_dataset SET DIMENSION SIZES 4, 4");

    // Write the data to the dataset
    HDFql::variableRegister(data);
    HDFql::execute("INSERT INTO my_file.h5:/my_dataset VALUES FROM MEMORY " + std::to_string(HDFql::variableGetNumber()));

    // Step 5: Close the HDF5 file
    HDFql::execute("CLOSE FILE my_file.h5");

    return 0;
}

Correct?

G.

2 Likes

stream of scalars:

#include <h5cpp/all>

int main() {
	h5::fd_t fd = h5::create("example.h5", H5F_ACC_TRUNC);
	h5::pt_t pt = h5::create<size_t>(fd, "stream of scalars", h5::max_dims{H5S_UNLIMITED});
	
	for(auto value: {1,2,3,4,5})
		h5::append(pt, value);
}

stream of matrices/frames:

#include <armadillo>
#include <h5cpp/all>

int main() {
	h5::fd_t fd = h5::create("example.h5", H5F_ACC_TRUNC);

	size_t nrows = 2, ncols=5, nframes=3;
	h5::pt_t pt = h5::create<double>(fd, "stream of matrices",
			h5::max_dims{H5S_UNLIMITED,nrows,ncols}, h5::chunk{1,nrows,ncols} );
	arma::mat M(nrows,ncols);
    // do actual IO
	for( int i = 0; i < nframes; i++)
		h5::append(pt, M);
}

steven

1 Like

Thanks a lot.
I am alright , how about you ?

Yes your code was helpful , , and since I wanted string variables to be added , I took some help from your code, comments and some help from chatgpt. It almost did it except that in write , it was not writing or crashing.
I had to add some piece of code and that worked. Thanks a lot for your help.

Here is the complete code , if someone wants to try :

{

const H5std_string FILE_NAME(“chunked_dataset.h5”);
const H5std_string DATASET_NAME(“chunked_string_dataset”);
const int ROWS = 10;
const int COLS = 5;
const int INITIAL_ROWS = 2;

int main ()
{
try {
// Create a new HDF5 file
H5::H5File file(FILE_NAME, H5F_ACC_TRUNC);

    // Create the dataspace
    hsize_t dims[2] = {ROWS, COLS};
    hsize_t max_dims[2] = {H5S_UNLIMITED, COLS};
    H5::DataSpace dataspace(2, dims, max_dims);

    // Set the dataset creation properties
    H5::DSetCreatPropList cparms;
    cparms.setChunk(2, dims);

	H5::StrType datatype(H5::PredType::C_S1, H5T_VARIABLE);



    // Create the dataset
   // H5::DataSet dataset = file.createDataSet(DATASET_NAME, H5::StrType(H5::PredType::C_S1), dataspace, cparms);
	
	H5::DataSet dataset = file.createDataSet(DATASET_NAME, datatype, dataspace, cparms);


    // Create the memory dataspace
    hsize_t mem_dims[2] = {INITIAL_ROWS, COLS};
    H5::DataSpace memspace(2, mem_dims);

    // Create a 2D array of strings
    const char* data[INITIAL_ROWS][COLS] = {
        {"Hello", "world", "!", "", ""},
        {"This", "is", "a", "sample", "array"}
    };

    // Select a hyperslab in the dataset
    hsize_t start[2] = {0, 0};
    hsize_t count[2] = {1, COLS};
    dataspace.selectHyperslab(H5S_SELECT_SET, count, start);

    // Write the data to the hyperslab in the dataset
    //dataset.write(data, H5::StrType(H5::PredType::C_S1), memspace, dataspace);
	

	//dataset.write("Its a string" , H5::StrType(H5::PredType::C_S1), memspace, dataspace);


	char *matrix_ext[1][5];
	matrix_ext[0][0] = "This is str1 ";
	matrix_ext[0][1] = "This is str2";
	matrix_ext[0][2] = "This is str3";
	matrix_ext[0][3] = "This is str4 ";

	matrix_ext[0][4] = "This is str5 ";
	
	hsize_t      dims3[2]  = { 1, 5};  // dataset dimensions at creation
 //   hsize_t      maxdims[2] = {H5S_UNLIMITED, H5S_UNLIMITED};
    DataSpace mspace2( 2, dims3, max_dims);

	dataset.write( matrix_ext, datatype , mspace2 , dataspace );

	// Select a hyperslab in the dataset
    hsize_t start2[2] = {1, 0};
    hsize_t count2[2] = {1, COLS};
    dataspace.selectHyperslab(H5S_SELECT_SET, count2, start2);

	dataset.write( matrix_ext, datatype , mspace2 , dataspace );

    // Extend the dataset to its final size (10x5)
    hsize_t final_dims[2] = {ROWS, COLS};
    dataset.extend(final_dims);

    // Close the dataset and file
    dataset.close();
    file.close();

    std::cout << "Dataset created successfully!" << std::endl;
} catch (H5::Exception& error) {
    error.printErrorStack();
    return -1;
}

return 0;
1 Like

Thanks a lot. We might not be using hdfql but will I will keep in mind if there is a need , I will use it.

1 Like

Thanks a lot Steven. I wanted to have a dataset of strings ( rows x columns ) , but will keep this in mind
in case of some requirements.

Just for completeness, here’s ChatGPT’s response to Using the H5CPP API, create an extendible chunked integer dataset with a 2x2 chunk size and write a 4x4 array to it!

#include <iostream>
#include <h5cpp/all>
#include <Eigen/Dense>

int main() {
    // Create a 4x4 array
    Eigen::Matrix<int, 4, 4> data;
    data << 1, 2, 3, 4,
            5, 6, 7, 8,
            9, 10, 11, 12,
            13, 14, 15, 16;

    // Create a file and a dataset
    h5::fd_t file = h5::create("data.h5", H5F_ACC_TRUNC);
    h5::ds_t<int> dataset = h5::create<int>(file, "dataset",
                                             h5::max_dims{H5S_UNLIMITED, H5S_UNLIMITED},
                                             h5::chunk{2, 2});

    // Write the data to the dataset
    dataset = data;

    // Close the file and dataset
    dataset.close();
    file.close();

    std::cout << "Data written to data.h5 successfully!" << std::endl;

    return 0;
}

The close bits are superfluous because of RAII. The assignment dataset = data; looks cool(*) but is not (?) supported in the current implementation. @steven.varga is that correct?

G.

(*) You made it look too much like h5py :slight_smile:

1 Like

Hi @gheber,

Thanks for sharing this!

ChatGPT got most of it right - at least, it for sure pointed to the right direction on how to create an extendible chunked integer dataset with a 2x2 chunk size and write a 4x4 array to it using HDFql in C++. For correctness, here is (one of) the solution for this use-case:

#include <iostream>
#include <vector>
#include "HDFql.hpp"

int main() {
    // Step 1: Include the necessary header files
    // Already done at the beginning of the code

    // Step 2: Create an HDF5 file
    HDFql::execute("CREATE FILE my_file.h5");

    // Step 3: Create an extendible dataset
    HDFql::execute("CREATE CHUNKED(2, 2) DATASET my_file.h5 my_dataset AS INT(0 TO UNLIMITED, 0 TO UNLIMITED)");

    // Step 4: Write a 4x4 array to the dataset
    std::vector<int> data = {1, 2, 3, 4,
                             5, 6, 7, 8,
                             9, 10, 11, 12,
                             13, 14, 15, 16};

    // Resize the dataset to 4x4
    HDFql::execute("ALTER DIMENSION my_file.h5 my_dataset TO 4, 4");

    // Write the data to the dataset
    HDFql::execute("INSERT INTO my_file.h5 my_dataset VALUES FROM MEMORY " + std::to_string(HDFql::variableTransientRegister(data)));

    return 0;
}

Hope it helps!

1 Like